Wed, 27 Nov 2024 23:00:07 +0100
add TODO to use a future ucx feature
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2019 Olaf Wintermann. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <cx/string.h> #include <cx/map.h> #include <cx/hash_map.h> #include <cx/buffer.h> #include <cx/linked_list.h> #include "../util/util.h" #include "../util/pool.h" #include "xml.h" /***************************************************************************** * Utility functions *****************************************************************************/ /* * generates a string key for an xml namespace * format: prefix '\0' href */ static CxHashKey xml_namespace_key(CxAllocator *a, WSNamespace *ns) { cxmutstr key_data = cx_strcat_a(a, 3, ns->prefix ? cx_str((char*)ns->prefix) : cx_strn("\0", 1), cx_strn("\0", 1), cx_str((char*)ns->href)); return cx_hash_key_bytes((unsigned char*)key_data.ptr, key_data.length); } /***************************************************************************** * Public functions *****************************************************************************/ /* ------------------------ wsxml_iterator ------------------------ */ typedef struct StackElm { WSXmlNode *node; // list of nodes //WSXmlNode *parent; // if not NULL, call endcb after node->next is NULL int endonly; struct StackElm *next; } StackElm; #define STACK_PUSH(stack, elm) if(stack) { elm->next = stack; } stack = elm; int wsxml_iterator( pool_handle_t *pool, WSXmlNode *node, wsxml_func begincb, wsxml_func endcb, void *udata) { if(!node) { return 0; } StackElm *stack = pool_malloc(pool, sizeof(StackElm)); if(!stack) { return 1; // OOM } stack->next = NULL; stack->node = node; stack->endonly = 0; //stack->parent = NULL; int ret = 0; int br = 0; while(stack) { StackElm *cur = stack; WSXmlNode *xmlnode = cur->node; // get top stack element stack = cur->next; // and remove it cur->next = NULL; while(xmlnode && !cur->endonly) { // element begin callback if(begincb(xmlnode, udata)) { br = 1; break; // I don't like break with labels - is this wrong? } if(xmlnode->children) { // put the children on the stack // the next stack iteration will process the children StackElm *newelm = pool_malloc(pool, sizeof(StackElm)); if(!newelm) { ret = 1; br = 1; break; } newelm->next = NULL; newelm->node = xmlnode->children; // setting the parent will make sure endcb will be called // for the current xmlnode after all children are processed //newelm->parent = xmlnode; newelm->endonly = 0; // if xmlnode->next is not NULL, there are still nodes at // this level, therefore we have to put these also on the // stack // this way, the remaining nodes are processed after all // children and the end tag are processed if(xmlnode->next) { StackElm *nextelm = pool_malloc(pool, sizeof(StackElm)); if(!nextelm) { ret = 1; br = 1; break; } nextelm->node = xmlnode->next; nextelm->next = NULL; nextelm->endonly = 0; STACK_PUSH(stack, nextelm); } // we have to put the end tag of the current element // on the stack to ensure endcb is called for the current // element, after all children are processed // reuse cur cur->node = xmlnode; cur->endonly = 1; STACK_PUSH(stack, cur); cur = NULL; // now we can put the children on the stack STACK_PUSH(stack, newelm); // break, because we don't want to process xmlnode->next now break; } else { // no children means, the end callback can be called directly // after the begin callback (no intermediate nodes) cur->node = NULL; if(endcb(xmlnode, udata)) { br = 1; break; } } // continue with next node at this level xmlnode = xmlnode->next; } if(br) { break; // break because of an error } if(cur && cur->node) { //xmlNode *endNode = cur->parent ? cur->parent : cur->node; xmlNode *endNode = cur->node; if(endcb(endNode, udata)) { break; } pool_free(pool, cur); } } // free all remaining elements StackElm *elm = stack; while(elm) { StackElm *next = elm->next; pool_free(pool, elm); elm = next; } return ret; } /* ------------------- wsxml_get_required_namespaces ------------------- */ typedef struct WSNsCollector { CxAllocator *a; CxMap *nsmap; WebdavNSList *def; int error; } WSNsCollector; static int nslist_node_begin(xmlNode *node, void *userdata) { WSNsCollector *col = userdata; // namespace required for all elements if(node->type == XML_ELEMENT_NODE && node->ns) { // we create a list of unique prefix-href namespaces by putting // all namespaces in a map CxHashKey nskey = xml_namespace_key(col->a, node->ns); if(!nskey.data) { col->error = 1; return 1; } if(cxMapPut(col->nsmap, nskey, node->ns)) { col->error = 1; return 1; } // collect all namespace definitions for removing these namespaces // from col->nsmap later WSNamespace *def = node->nsDef; while(def) { WebdavNSList *newdef = cxMalloc(col->a, sizeof(WebdavNSList)); if(!newdef) { col->error = 1; return 1; } newdef->namespace = def; newdef->prev = NULL; newdef->next = NULL; // prepend newdef to the list if(col->def) { newdef->next = col->def; col->def->prev = newdef; } col->def = newdef; // continue with next namespace definition def = def->next; } } return 0; } static int nslist_node_end(xmlNode *node, void *userdata) { return 0; } WebdavNSList* wsxml_get_required_namespaces( pool_handle_t *pool, WSXmlNode *node, int *error) { if(error) *error = 0; CxAllocator *a = pool_allocator(pool); CxMap *nsmap = cxHashMapCreate(a, CX_STORE_POINTERS, 16); if(!nsmap) { if(error) *error = 1; return NULL; } WSNsCollector col; col.a = a; col.nsmap = nsmap; col.def = NULL; // iterate over all xml elements // this will fill the hashmap with all namespaces // all namespace definitions are added to col.def WebdavNSList *list = NULL; WebdavNSList *end = NULL; if(wsxml_iterator(pool, node, nslist_node_begin, nslist_node_end, &col)) { if(error) *error = 1; } else { // remove all namespace definitions from the map // what we get is a map that contains all missing namespace definitions WebdavNSList *def = col.def; while(def) { CxHashKey nskey = xml_namespace_key(a, def->namespace); if(!nskey.data) { if(error) *error = 1; break; } cxMapRemove(nsmap, nskey); def = def->next; } // convert nsmap to a list CxIterator i = cxMapIteratorValues(nsmap); WSNamespace *ns; cx_foreach(WSNamespace *, ns, i) { WebdavNSList *newelm = pool_malloc(pool, sizeof(WebdavNSList)); if(!newelm) { if(error) *error = 1; list = NULL; break; } newelm->namespace = ns; newelm->next = NULL; newelm->prev = NULL; cx_linked_list_add((void**)&list, (void**)&end, offsetof(WebdavNSList, prev), offsetof(WebdavNSList, next), newelm); } } cxMapDestroy(nsmap); return list; } static ssize_t buf_writefunc(void *buf, const char *s, size_t len) { int w = cxBufferWrite(s, 1, len, buf); return w == 0 ? IO_ERROR : w; } WSXmlData* wsxml_node2data( pool_handle_t *pool, WSXmlNode *node) { CxBuffer buf; if(cxBufferInit(&buf, NULL, 1024, pool_allocator(pool), CX_BUFFER_AUTO_EXTEND|CX_BUFFER_FREE_CONTENTS)) { return NULL; } int error = 0; WebdavNSList *nslist = wsxml_get_required_namespaces(pool, node, &error); if(error) { return NULL; } Writer writer; char buffer[512]; writer_init_with_stream(&writer, &buf, buf_writefunc, buffer, 512); WSXmlData *data = NULL; if(!wsxml_write_nodes(pool, &writer, NULL, node) && !writer_flush(&writer)) { data = pool_malloc(pool, sizeof(WSXmlData)); if(data) { data->data = pool_malloc(pool, buf.size + 1); if(data->data) { memcpy(data->data, buf.space, buf.size); data->data[buf.size] = '\0'; data->length = buf.size; data->namespaces = nslist; } } } cxBufferDestroy(&buf); return data; } char* wsxml_nslist2string(pool_handle_t *pool, WebdavNSList *nslist) { if(!nslist) return NULL; // get required string length size_t len = 0; WebdavNSList *elm = nslist; while(elm) { WSNamespace *ns = elm->namespace; if(ns) { if(ns->prefix) len += strlen((const char*)ns->prefix); if(ns->href) len += strlen((const char*)ns->href); len += 2; // 1 char for ':', 1 char for \n or \0 } elm = elm->next; } // alloc string char *str = pool_malloc(pool, len); if(!str) { return NULL; } char *pos = str; // copy namespace definitions to the string elm = nslist; while(elm) { WSNamespace *ns = elm->namespace; if(ns) { if(ns->prefix) { size_t prefixlen = strlen((const char*)ns->prefix); memcpy(pos, ns->prefix, prefixlen); pos[prefixlen] = ':'; pos += prefixlen + 1; } else { pos[0] = ':'; pos++; } if(ns->href) { size_t hreflen = strlen((const char*)ns->href); memcpy(pos, ns->href, hreflen); pos[hreflen] = elm->next ? '\n' : '\0'; pos += hreflen + 1; } else { pos[0] = elm->next ? '\n' : '\0'; pos++; } } elm = elm->next; } return str; } WebdavNSList* wsxml_string2nslist(pool_handle_t *pool, char *nsliststr) { if(!nsliststr) return NULL; size_t len = strlen(nsliststr); WebdavNSList *list_start = NULL; WebdavNSList *list_current = NULL; char *prefix = nsliststr; size_t prefix_start = 0; size_t prefix_len = 0; char *href = NULL; size_t href_start = len; size_t i; for(i=0;i<=len;i++) { char c = nsliststr[i]; if(c == '\n' || c == '\0') { if(i > href_start) { WebdavNSList *elm = pool_malloc(pool, sizeof(WebdavNSList)); if(!elm) { break; } elm->prev = list_current; elm->next = NULL; WSNamespace *ns = pool_malloc(pool, sizeof(WSNamespace)); elm->namespace = ns; if(!ns) { break; } memset(ns, 0, sizeof(WSNamespace)); ns->prefix = prefix_len > 0 ? (xmlChar*)cx_strdup_pool(pool, cx_mutstrn(prefix, prefix_len)).ptr : NULL; ns->href = (xmlChar*)cx_strdup_pool(pool, cx_mutstrn(href, i-href_start)).ptr; if(list_current) { list_current->next = elm; } else { list_start = elm; } list_current = elm; } prefix_start = i + 1; prefix = nsliststr + prefix_start; prefix_len = 0; href_start = len; href = NULL; } else if(!href && c == ':') { prefix_len = i - prefix_start; href_start = i + 1; href = nsliststr + href_start; } } if(i < len) { // error, cleanup while(list_start) { if(list_start->namespace) { WSNamespace *ns = list_start->namespace; if(ns->prefix) { pool_free(pool, (char*)ns->prefix); } if(ns->href) { pool_free(pool, (char*)ns->href); } pool_free(pool, ns); } WebdavNSList *next = list_start->next; pool_free(pool, list_start); list_start = next; } list_start = NULL; } return list_start; } /***************************************************************************** * Non public functions *****************************************************************************/ typedef struct XmlWriter { /* * Memory pool for temp memory allocations */ pool_handle_t *pool; /* * Buffered output stream */ Writer *out; /* * Map for all previously defined namespaces * key: (char*) namespace prefix * value: WSNamespace* */ CxMap *namespaces; /* * Should namespace definitions be created */ WSBool define_namespaces; } XmlWriter; /* * Serialize an XML text node * This replaces some special characters with entity refs * type: 0 = element text, 1 = attribute text */ static void xml_ser_text(Writer *out, int type, const char *text) { size_t start = 0; size_t i; cxstring entityref = { NULL, 0 }; for(i=0;text[i]!='\0';i++) { char c = text[i]; if(c == '&') { entityref = (cxstring)CX_STR("&"); } else if(type == 0) { if(c == '<') { entityref = (cxstring)CX_STR("<"); } else if(c == '>') { entityref = (cxstring)CX_STR(">"); } } else { if(c == '\"') { entityref = (cxstring)CX_STR("""); } else if(c == '\'') { entityref = (cxstring)CX_STR("'"); } } if(entityref.ptr) { size_t len = i-start; if(len > 0) { writer_put(out, text+start, len); } writer_puts(out, entityref); entityref.ptr = NULL; entityref.length = 0; start = i+1; } } size_t len = i-start; if(len > 0) { writer_put(out, text+start, len); } } /* * Serialize an XML element node */ static void xml_ser_element(XmlWriter *xw, xmlNode *node) { Writer *out = xw->out; writer_putc(out, '<'); // write prefix and ':' if(node->ns && node->ns->prefix) { writer_puts(out, cx_str((char*)node->ns->prefix)); writer_putc(out, ':'); } // node name writer_puts(out, cx_str((char*)node->name)); // namespace definitions if(xw->define_namespaces) { xmlNs *nsdef = node->nsDef; while(nsdef) { // we define only namespaces without prefix or namespaces // with prefix, that are not already defined // xw->namespaces contains all namespace, that were defined // before xml serialization if(!nsdef->prefix) { writer_put_lit(out, " xmlns=\""); writer_put_str(out, (char*)nsdef->href); writer_putc (out, '"'); } else { WSNamespace *n = xw->namespaces ? cxMapGet(xw->namespaces, cx_hash_key_str((const char*)nsdef->prefix)) : NULL; if(!n) { writer_put_lit(out, " xmlns:"); writer_put_str(out, (const char*)nsdef->prefix); writer_put_lit(out, "=\""); writer_put_str(out, (const char*)nsdef->href); writer_putc (out, '"'); } } nsdef = nsdef->next; } } // attributes xmlAttr *attr = node->properties; while(attr) { // format: ' [<prefix>:]<name>="<value>"' writer_putc(out, ' '); // optional namespace if(attr->ns && attr->ns->prefix) { writer_puts(out, cx_str((char*)attr->ns->prefix)); writer_putc(out, ':'); } // <name>=" writer_put_str(out, (char*)attr->name); writer_put_lit(out, "=\""); // value xmlNode *value = attr->children; while(value) { if(value->content) { xml_ser_text(out, 1, (const char*)value->content); } value = value->next; } // trailing quote writer_putc(out, '"'); attr = attr->next; } if(node->children) { writer_putc(out, '>'); } else { writer_put_lit(out, "/>"); } } static int xml_ser_node_begin(xmlNode *node, void *userdata) { XmlWriter *xw = userdata; switch(node->type) { case XML_ELEMENT_NODE: xml_ser_element(xw, node); break; case XML_ATTRIBUTE_NODE: break; case XML_TEXT_NODE: { xml_ser_text(xw->out, 0, (const char*)node->content); break; } case XML_CDATA_SECTION_NODE: { break; } case XML_ENTITY_REF_NODE: break; case XML_ENTITY_NODE: break; case XML_PI_NODE: break; case XML_COMMENT_NODE: break; case XML_DOCUMENT_NODE: break; case XML_DOCUMENT_TYPE_NODE: break; case XML_DOCUMENT_FRAG_NODE: break; case XML_NOTATION_NODE: break; case XML_HTML_DOCUMENT_NODE: break; case XML_DTD_NODE: break; case XML_ELEMENT_DECL: break; case XML_ATTRIBUTE_DECL: break; case XML_ENTITY_DECL: break; case XML_NAMESPACE_DECL: break; case XML_XINCLUDE_START: break; case XML_XINCLUDE_END: break; default: break; } return 0; } static int xml_ser_node_end(xmlNode *node, void *userdata) { XmlWriter *xw = userdata; Writer *out = xw->out; if(node->type == XML_ELEMENT_NODE) { if(node->children) { writer_put_lit(xw->out, "</"); // write prefix and ':' if(node->ns && node->ns->prefix) { writer_puts(out, cx_str((char*)node->ns->prefix)); writer_putc(out, ':'); } // name and close tag writer_puts(out, cx_str((char*)node->name)); writer_putc(out, '>'); } // element was already closed in xml_ser_node_begin } return 0; } static int xml_write_nodes( pool_handle_t *pool, Writer *out, CxMap *nsdefs, WSBool createdefs, xmlNode *node) { XmlWriter xmlwriter; xmlwriter.pool = pool; xmlwriter.out = out; xmlwriter.namespaces = nsdefs; xmlwriter.define_namespaces = createdefs; // iterate over xml nodes // this includes node->children and node->next int err = wsxml_iterator( pool, node, xml_ser_node_begin, xml_ser_node_end, &xmlwriter); if(err) { return -1; } return out->error; } int wsxml_write_nodes( pool_handle_t *pool, Writer *out, CxMap *nsdefs, xmlNode *node) { return xml_write_nodes(pool, out, nsdefs, TRUE, node); } int wsxml_write_nodes_without_nsdef( pool_handle_t *pool, Writer *out, xmlNode *node) { return xml_write_nodes(pool, out, NULL, FALSE, node); }