UNIXworkcode

1 /* 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 3 * 4 * Copyright 2019 Olaf Wintermann. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 33 #include <cx/string.h> 34 #include <cx/map.h> 35 #include <cx/hash_map.h> 36 #include <cx/buffer.h> 37 #include <cx/linked_list.h> 38 39 #include "../util/util.h" 40 #include "../util/pool.h" 41 42 #include "xml.h" 43 44 /***************************************************************************** 45 * Utility functions 46 *****************************************************************************/ 47 48 /* 49 * generates a string key for an xml namespace 50 * format: prefix '\0' href 51 */ 52 static CxHashKey xml_namespace_key(CxAllocator *a, WSNamespace *ns) { 53 cxmutstr key_data = cx_strcat_a(a, 3, 54 ns->prefix ? cx_str((char*)ns->prefix) : cx_strn("\0", 1), 55 cx_strn("\0", 1), 56 cx_str((char*)ns->href)); 57 return cx_hash_key_bytes((unsigned char*)key_data.ptr, key_data.length); 58 } 59 60 61 /***************************************************************************** 62 * Public functions 63 *****************************************************************************/ 64 65 /* ------------------------ wsxml_iterator ------------------------ */ 66 67 typedef struct StackElm { 68 WSXmlNode *node; // list of nodes 69 //WSXmlNode *parent; // if not NULL, call endcb after node->next is NULL 70 int endonly; 71 struct StackElm *next; 72 } StackElm; 73 74 #define STACK_PUSH(stack, elm) if(stack) { elm->next = stack; } stack = elm; 75 76 int wsxml_iterator( 77 pool_handle_t *pool, 78 WSXmlNode *node, 79 wsxml_func begincb, 80 wsxml_func endcb, 81 void *udata) 82 { 83 if(!node) { 84 return 0; 85 } 86 87 StackElm *stack = pool_malloc(pool, sizeof(StackElm)); 88 if(!stack) { 89 return 1; // OOM 90 } 91 stack->next = NULL; 92 stack->node = node; 93 stack->endonly = 0; 94 //stack->parent = NULL; 95 96 int ret = 0; 97 int br = 0; 98 while(stack) { 99 StackElm *cur = stack; 100 WSXmlNode *xmlnode = cur->node; // get top stack element 101 stack = cur->next; // and remove it 102 cur->next = NULL; 103 104 while(xmlnode && !cur->endonly) { 105 // element begin callback 106 if(begincb(xmlnode, udata)) { 107 br = 1; 108 break; // I don't like break with labels - is this wrong? 109 } 110 111 if(xmlnode->children) { 112 // put the children on the stack 113 // the next stack iteration will process the children 114 StackElm *newelm = pool_malloc(pool, sizeof(StackElm)); 115 if(!newelm) { 116 ret = 1; 117 br = 1; 118 break; 119 } 120 newelm->next = NULL; 121 newelm->node = xmlnode->children; 122 // setting the parent will make sure endcb will be called 123 // for the current xmlnode after all children are processed 124 //newelm->parent = xmlnode; 125 newelm->endonly = 0; 126 127 // if xmlnode->next is not NULL, there are still nodes at 128 // this level, therefore we have to put these also on the 129 // stack 130 // this way, the remaining nodes are processed after all 131 // children and the end tag are processed 132 if(xmlnode->next) { 133 StackElm *nextelm = pool_malloc(pool, sizeof(StackElm)); 134 if(!nextelm) { 135 ret = 1; 136 br = 1; 137 break; 138 } 139 nextelm->node = xmlnode->next; 140 nextelm->next = NULL; 141 nextelm->endonly = 0; 142 STACK_PUSH(stack, nextelm); 143 } 144 145 // we have to put the end tag of the current element 146 // on the stack to ensure endcb is called for the current 147 // element, after all children are processed 148 // reuse cur 149 cur->node = xmlnode; 150 cur->endonly = 1; 151 STACK_PUSH(stack, cur); 152 153 cur = NULL; 154 155 // now we can put the children on the stack 156 STACK_PUSH(stack, newelm); 157 // break, because we don't want to process xmlnode->next now 158 break; 159 } else { 160 // no children means, the end callback can be called directly 161 // after the begin callback (no intermediate nodes) 162 cur->node = NULL; 163 if(endcb(xmlnode, udata)) { 164 br = 1; 165 break; 166 } 167 } 168 169 // continue with next node at this level 170 xmlnode = xmlnode->next; 171 } 172 if(br) { 173 break; // break because of an error 174 } 175 176 if(cur && cur->node) { 177 //xmlNode *endNode = cur->parent ? cur->parent : cur->node; 178 xmlNode *endNode = cur->node; 179 if(endcb(endNode, udata)) { 180 break; 181 } 182 pool_free(pool, cur); 183 } 184 } 185 186 // free all remaining elements 187 StackElm *elm = stack; 188 while(elm) { 189 StackElm *next = elm->next; 190 pool_free(pool, elm); 191 elm = next; 192 } 193 194 return ret; 195 } 196 197 /* ------------------- wsxml_get_required_namespaces ------------------- */ 198 199 typedef struct WSNsCollector { 200 CxAllocator *a; 201 CxMap *nsmap; 202 WebdavNSList *def; 203 int error; 204 } WSNsCollector; 205 206 static int nslist_node_begin(xmlNode *node, void *userdata) { 207 WSNsCollector *col = userdata; 208 // namespace required for all elements 209 if(node->type == XML_ELEMENT_NODE && node->ns) { 210 // we create a list of unique prefix-href namespaces by putting 211 // all namespaces in a map 212 CxHashKey nskey = xml_namespace_key(col->a, node->ns); 213 if(!nskey.data) { 214 col->error = 1; 215 return 1; 216 } 217 if(cxMapPut(col->nsmap, nskey, node->ns)) { 218 col->error = 1; 219 return 1; 220 } 221 222 // collect all namespace definitions for removing these namespaces 223 // from col->nsmap later 224 WSNamespace *def = node->nsDef; 225 while(def) { 226 WebdavNSList *newdef = cxMalloc(col->a, sizeof(WebdavNSList)); 227 if(!newdef) { 228 col->error = 1; 229 return 1; 230 } 231 newdef->namespace = def; 232 newdef->prev = NULL; 233 newdef->next = NULL; 234 // prepend newdef to the list 235 if(col->def) { 236 newdef->next = col->def; 237 col->def->prev = newdef; 238 } 239 col->def = newdef; 240 241 // continue with next namespace definition 242 def = def->next; 243 } 244 } 245 return 0; 246 } 247 248 static int nslist_node_end(xmlNode *node, void *userdata) { 249 return 0; 250 } 251 252 WebdavNSList* wsxml_get_required_namespaces( 253 pool_handle_t *pool, 254 WSXmlNode *node, 255 int *error) 256 { 257 if(error) *error = 0; 258 259 CxAllocator *a = pool_allocator(pool); 260 CxMap *nsmap = cxHashMapCreate(a, CX_STORE_POINTERS, 16); 261 if(!nsmap) { 262 if(error) *error = 1; 263 return NULL; 264 } 265 266 WSNsCollector col; 267 col.a = a; 268 col.nsmap = nsmap; 269 col.def = NULL; 270 271 // iterate over all xml elements 272 // this will fill the hashmap with all namespaces 273 // all namespace definitions are added to col.def 274 WebdavNSList *list = NULL; 275 WebdavNSList *end = NULL; 276 if(wsxml_iterator(pool, node, nslist_node_begin, nslist_node_end, &col)) { 277 if(error) *error = 1; 278 } else { 279 // remove all namespace definitions from the map 280 // what we get is a map that contains all missing namespace definitions 281 WebdavNSList *def = col.def; 282 while(def) { 283 CxHashKey nskey = xml_namespace_key(a, def->namespace); 284 if(!nskey.data) { 285 if(error) *error = 1; 286 break; 287 } 288 cxMapRemove(nsmap, nskey); 289 def = def->next; 290 } 291 292 // convert nsmap to a list 293 CxIterator i = cxMapIteratorValues(nsmap); 294 WSNamespace *ns; 295 cx_foreach(WSNamespace *, ns, i) { 296 WebdavNSList *newelm = pool_malloc(pool, sizeof(WebdavNSList)); 297 if(!newelm) { 298 if(error) *error = 1; 299 list = NULL; 300 break; 301 } 302 newelm->namespace = ns; 303 newelm->next = NULL; 304 newelm->prev = NULL; 305 cx_linked_list_add((void**)&list, (void**)&end, offsetof(WebdavNSList, prev), offsetof(WebdavNSList, next), newelm); 306 } 307 } 308 309 cxMapDestroy(nsmap); 310 return list; 311 } 312 313 314 static ssize_t buf_writefunc(void *buf, const char *s, size_t len) { 315 int w = cxBufferWrite(s, 1, len, buf); 316 return w == 0 ? IO_ERROR : w; 317 } 318 319 WSXmlData* wsxml_node2data( 320 pool_handle_t *pool, 321 WSXmlNode *node) 322 { 323 CxBuffer buf; 324 if(cxBufferInit(&buf, NULL, 1024, pool_allocator(pool), CX_BUFFER_AUTO_EXTEND|CX_BUFFER_FREE_CONTENTS)) { 325 return NULL; 326 } 327 328 int error = 0; 329 WebdavNSList *nslist = wsxml_get_required_namespaces(pool, node, &error); 330 if(error) { 331 return NULL; 332 } 333 334 Writer writer; 335 char buffer[512]; 336 writer_init_with_stream(&writer, &buf, buf_writefunc, buffer, 512); 337 338 WSXmlData *data = NULL; 339 if(!wsxml_write_nodes(pool, &writer, NULL, node) && !writer_flush(&writer)) { 340 data = pool_malloc(pool, sizeof(WSXmlData)); 341 if(data) { 342 data->data = pool_malloc(pool, buf.size + 1); 343 if(data->data) { 344 memcpy(data->data, buf.space, buf.size); 345 data->data[buf.size] = '\0'; 346 data->length = buf.size; 347 data->namespaces = nslist; 348 } 349 } 350 } 351 352 cxBufferDestroy(&buf); 353 354 return data; 355 } 356 357 char* wsxml_nslist2string(pool_handle_t *pool, WebdavNSList *nslist) { 358 if(!nslist) return NULL; 359 360 // get required string length 361 size_t len = 0; 362 WebdavNSList *elm = nslist; 363 while(elm) { 364 WSNamespace *ns = elm->namespace; 365 if(ns) { 366 if(ns->prefix) len += strlen((const char*)ns->prefix); 367 if(ns->href) len += strlen((const char*)ns->href); 368 len += 2; // 1 char for ':', 1 char for \n or \0 369 } 370 elm = elm->next; 371 } 372 373 // alloc string 374 char *str = pool_malloc(pool, len); 375 if(!str) { 376 return NULL; 377 } 378 char *pos = str; 379 380 // copy namespace definitions to the string 381 elm = nslist; 382 while(elm) { 383 WSNamespace *ns = elm->namespace; 384 if(ns) { 385 if(ns->prefix) { 386 size_t prefixlen = strlen((const char*)ns->prefix); 387 memcpy(pos, ns->prefix, prefixlen); 388 pos[prefixlen] = ':'; 389 pos += prefixlen + 1; 390 } else { 391 pos[0] = ':'; 392 pos++; 393 } 394 if(ns->href) { 395 size_t hreflen = strlen((const char*)ns->href); 396 memcpy(pos, ns->href, hreflen); 397 pos[hreflen] = elm->next ? '\n' : '\0'; 398 pos += hreflen + 1; 399 } else { 400 pos[0] = elm->next ? '\n' : '\0'; 401 pos++; 402 } 403 } 404 elm = elm->next; 405 } 406 407 return str; 408 } 409 410 WebdavNSList* wsxml_string2nslist(pool_handle_t *pool, char *nsliststr) { 411 if(!nsliststr) return NULL; 412 size_t len = strlen(nsliststr); 413 WebdavNSList *list_start = NULL; 414 WebdavNSList *list_current = NULL; 415 416 char *prefix = nsliststr; 417 size_t prefix_start = 0; 418 size_t prefix_len = 0; 419 char *href = NULL; 420 size_t href_start = len; 421 size_t i; 422 for(i=0;i<=len;i++) { 423 char c = nsliststr[i]; 424 if(c == '\n' || c == '\0') { 425 if(i > href_start) { 426 WebdavNSList *elm = pool_malloc(pool, sizeof(WebdavNSList)); 427 if(!elm) { 428 break; 429 } 430 elm->prev = list_current; 431 elm->next = NULL; 432 WSNamespace *ns = pool_malloc(pool, sizeof(WSNamespace)); 433 elm->namespace = ns; 434 if(!ns) { 435 break; 436 } 437 memset(ns, 0, sizeof(WSNamespace)); 438 ns->prefix = prefix_len > 0 ? (xmlChar*)cx_strdup_pool(pool, cx_mutstrn(prefix, prefix_len)).ptr : NULL; 439 ns->href = (xmlChar*)cx_strdup_pool(pool, cx_mutstrn(href, i-href_start)).ptr; 440 if(list_current) { 441 list_current->next = elm; 442 } else { 443 list_start = elm; 444 } 445 list_current = elm; 446 } 447 prefix_start = i + 1; 448 prefix = nsliststr + prefix_start; 449 prefix_len = 0; 450 href_start = len; 451 href = NULL; 452 } else if(!href && c == ':') { 453 prefix_len = i - prefix_start; 454 href_start = i + 1; 455 href = nsliststr + href_start; 456 } 457 } 458 459 if(i < len) { 460 // error, cleanup 461 while(list_start) { 462 if(list_start->namespace) { 463 WSNamespace *ns = list_start->namespace; 464 if(ns->prefix) { 465 pool_free(pool, (char*)ns->prefix); 466 } 467 if(ns->href) { 468 pool_free(pool, (char*)ns->href); 469 } 470 pool_free(pool, ns); 471 } 472 WebdavNSList *next = list_start->next; 473 pool_free(pool, list_start); 474 list_start = next; 475 } 476 list_start = NULL; 477 } 478 479 return list_start; 480 } 481 482 /***************************************************************************** 483 * Non public functions 484 *****************************************************************************/ 485 486 typedef struct XmlWriter { 487 /* 488 * Memory pool for temp memory allocations 489 */ 490 pool_handle_t *pool; 491 492 /* 493 * Buffered output stream 494 */ 495 Writer *out; 496 497 /* 498 * Map for all previously defined namespaces 499 * key: (char*) namespace prefix 500 * value: WSNamespace* 501 */ 502 CxMap *namespaces; 503 504 /* 505 * Should namespace definitions be created 506 */ 507 WSBool define_namespaces; 508 } XmlWriter; 509 510 /* 511 * Serialize an XML text node 512 * This replaces some special characters with entity refs 513 * type: 0 = element text, 1 = attribute text 514 */ 515 static void xml_ser_text(Writer *out, int type, const char *text) { 516 size_t start = 0; 517 size_t i; 518 cxstring entityref = { NULL, 0 }; 519 for(i=0;text[i]!='\0';i++) { 520 char c = text[i]; 521 if(c == '&') { 522 entityref = (cxstring)CX_STR("&amp;"); 523 } else if(type == 0) { 524 if(c == '<') { 525 entityref = (cxstring)CX_STR("&lt;"); 526 } else if(c == '>') { 527 entityref = (cxstring)CX_STR("&gt;"); 528 } 529 } else { 530 if(c == '\"') { 531 entityref = (cxstring)CX_STR("&quot;"); 532 } else if(c == '\'') { 533 entityref = (cxstring)CX_STR("&apos;"); 534 } 535 } 536 537 if(entityref.ptr) { 538 size_t len = i-start; 539 if(len > 0) { 540 writer_put(out, text+start, len); 541 } 542 writer_puts(out, entityref); 543 entityref.ptr = NULL; 544 entityref.length = 0; 545 start = i+1; 546 } 547 } 548 size_t len = i-start; 549 if(len > 0) { 550 writer_put(out, text+start, len); 551 } 552 } 553 554 /* 555 * Serialize an XML element node 556 */ 557 static void xml_ser_element(XmlWriter *xw, xmlNode *node) { 558 Writer *out = xw->out; 559 writer_putc(out, '<'); 560 561 // write prefix and ':' 562 if(node->ns && node->ns->prefix) { 563 writer_puts(out, cx_str((char*)node->ns->prefix)); 564 writer_putc(out, ':'); 565 } 566 567 // node name 568 writer_puts(out, cx_str((char*)node->name)); 569 570 // namespace definitions 571 if(xw->define_namespaces) { 572 xmlNs *nsdef = node->nsDef; 573 while(nsdef) { 574 // we define only namespaces without prefix or namespaces 575 // with prefix, that are not already defined 576 // xw->namespaces contains all namespace, that were defined 577 // before xml serialization 578 if(!nsdef->prefix) { 579 writer_put_lit(out, " xmlns=\""); 580 writer_put_str(out, (char*)nsdef->href); 581 writer_putc (out, '""'); 582 } else { 583 WSNamespace *n = xw->namespaces ? 584 cxMapGet(xw->namespaces, cx_hash_key_str((const char*)nsdef->prefix)) : 585 NULL; 586 if(!n) { 587 writer_put_lit(out, " xmlns:"); 588 writer_put_str(out, (const char*)nsdef->prefix); 589 writer_put_lit(out, "=\""); 590 writer_put_str(out, (const char*)nsdef->href); 591 writer_putc (out, '""'); 592 } 593 } 594 595 nsdef = nsdef->next; 596 } 597 } 598 599 // attributes 600 xmlAttr *attr = node->properties; 601 while(attr) { 602 // format: ' [<prefix>:]<name>="<value>"' 603 writer_putc(out, ' '); 604 // optional namespace 605 if(attr->ns && attr->ns->prefix) { 606 writer_puts(out, cx_str((char*)attr->ns->prefix)); 607 writer_putc(out, ':'); 608 } 609 // <name>=" 610 writer_put_str(out, (char*)attr->name); 611 writer_put_lit(out, "=\""); 612 // value 613 xmlNode *value = attr->children; 614 while(value) { 615 if(value->content) { 616 xml_ser_text(out, 1, (const char*)value->content); 617 } 618 value = value->next; 619 } 620 // trailing quote 621 writer_putc(out, '""'); 622 623 attr = attr->next; 624 } 625 626 if(node->children) { 627 writer_putc(out, '>'); 628 } else { 629 writer_put_lit(out, "/>"); 630 } 631 } 632 633 static int xml_ser_node_begin(xmlNode *node, void *userdata) { 634 XmlWriter *xw = userdata; 635 switch(node->type) { 636 case XML_ELEMENT_NODE: xml_ser_element(xw, node); break; 637 case XML_ATTRIBUTE_NODE: break; 638 case XML_TEXT_NODE: { 639 xml_ser_text(xw->out, 0, (const char*)node->content); 640 break; 641 } 642 case XML_CDATA_SECTION_NODE: { 643 break; 644 } 645 case XML_ENTITY_REF_NODE: break; 646 case XML_ENTITY_NODE: break; 647 case XML_PI_NODE: break; 648 case XML_COMMENT_NODE: break; 649 case XML_DOCUMENT_NODE: break; 650 case XML_DOCUMENT_TYPE_NODE: break; 651 case XML_DOCUMENT_FRAG_NODE: break; 652 case XML_NOTATION_NODE: break; 653 case XML_HTML_DOCUMENT_NODE: break; 654 case XML_DTD_NODE: break; 655 case XML_ELEMENT_DECL: break; 656 case XML_ATTRIBUTE_DECL: break; 657 case XML_ENTITY_DECL: break; 658 case XML_NAMESPACE_DECL: break; 659 case XML_XINCLUDE_START: break; 660 case XML_XINCLUDE_END: break; 661 default: break; 662 } 663 return 0; 664 } 665 666 static int xml_ser_node_end(xmlNode *node, void *userdata) { 667 XmlWriter *xw = userdata; 668 Writer *out = xw->out; 669 if(node->type == XML_ELEMENT_NODE) { 670 if(node->children) { 671 writer_put_lit(xw->out, "</"); 672 // write prefix and ':' 673 if(node->ns && node->ns->prefix) { 674 writer_puts(out, cx_str((char*)node->ns->prefix)); 675 writer_putc(out, ':'); 676 } 677 // name and close tag 678 writer_puts(out, cx_str((char*)node->name)); 679 writer_putc(out, '>'); 680 681 } // element was already closed in xml_ser_node_begin 682 } 683 return 0; 684 } 685 686 687 static int xml_write_nodes( 688 pool_handle_t *pool, 689 Writer *out, 690 CxMap *nsdefs, 691 WSBool createdefs, 692 xmlNode *node) 693 { 694 XmlWriter xmlwriter; 695 xmlwriter.pool = pool; 696 xmlwriter.out = out; 697 xmlwriter.namespaces = nsdefs; 698 xmlwriter.define_namespaces = createdefs; 699 700 // iterate over xml nodes 701 // this includes node->children and node->next 702 int err = wsxml_iterator( 703 pool, 704 node, 705 xml_ser_node_begin, 706 xml_ser_node_end, 707 &xmlwriter); 708 if(err) { 709 return -1; 710 } 711 712 return out->error; 713 } 714 715 int wsxml_write_nodes( 716 pool_handle_t *pool, 717 Writer *out, 718 CxMap *nsdefs, 719 xmlNode *node) 720 { 721 return xml_write_nodes(pool, out, nsdefs, TRUE, node); 722 } 723 724 int wsxml_write_nodes_without_nsdef( 725 pool_handle_t *pool, 726 Writer *out, 727 xmlNode *node) 728 { 729 return xml_write_nodes(pool, out, NULL, FALSE, node); 730 } 731