--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/server/webdav/xml.c Sat Sep 24 16:26:10 2022 +0200 @@ -0,0 +1,734 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2019 Olaf Wintermann. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <ucx/string.h> +#include <ucx/map.h> +#include <ucx/buffer.h> + +#include "../util/util.h" +#include "../util/pool.h" + +#include "xml.h" + +/***************************************************************************** + * Utility functions + *****************************************************************************/ + +/* + * generates a string key for an xml namespace + * format: prefix '\0' href + */ +static sstr_t xml_namespace_key(UcxAllocator *a, WSNamespace *ns) { + sstr_t key = sstrcat_a(a, 3, + ns->prefix ? sstr((char*)ns->prefix) : S("\0"), + S("\0"), + sstr((char*)ns->href)); + return key; +} + + +/***************************************************************************** + * Public functions + *****************************************************************************/ + +/* ------------------------ wsxml_iterator ------------------------ */ + +typedef struct StackElm { + WSXmlNode *node; // list of nodes + //WSXmlNode *parent; // if not NULL, call endcb after node->next is NULL + int endonly; + struct StackElm *next; +} StackElm; + +#define STACK_PUSH(stack, elm) if(stack) { elm->next = stack; } stack = elm; + +int wsxml_iterator( + pool_handle_t *pool, + WSXmlNode *node, + wsxml_func begincb, + wsxml_func endcb, + void *udata) +{ + if(!node) { + return 0; + } + + StackElm *stack = pool_malloc(pool, sizeof(StackElm)); + if(!stack) { + return 1; // OOM + } + stack->next = NULL; + stack->node = node; + stack->endonly = 0; + //stack->parent = NULL; + + int ret = 0; + int br = 0; + while(stack) { + StackElm *cur = stack; + WSXmlNode *xmlnode = cur->node; // get top stack element + stack = cur->next; // and remove it + cur->next = NULL; + + while(xmlnode && !cur->endonly) { + // element begin callback + if(begincb(xmlnode, udata)) { + br = 1; + break; // I don't like break with labels - is this wrong? + } + + if(xmlnode->children) { + // put the children on the stack + // the next stack iteration will process the children + StackElm *newelm = pool_malloc(pool, sizeof(StackElm)); + if(!newelm) { + ret = 1; + br = 1; + break; + } + newelm->next = NULL; + newelm->node = xmlnode->children; + // setting the parent will make sure endcb will be called + // for the current xmlnode after all children are processed + //newelm->parent = xmlnode; + newelm->endonly = 0; + + // if xmlnode->next is not NULL, there are still nodes at + // this level, therefore we have to put these also on the + // stack + // this way, the remaining nodes are processed after all + // children and the end tag are processed + if(xmlnode->next) { + StackElm *nextelm = pool_malloc(pool, sizeof(StackElm)); + if(!nextelm) { + ret = 1; + br = 1; + break; + } + nextelm->node = xmlnode->next; + nextelm->next = NULL; + nextelm->endonly = 0; + STACK_PUSH(stack, nextelm); + } + + // we have to put the end tag of the current element + // on the stack to ensure endcb is called for the current + // element, after all children are processed + // reuse cur + cur->node = xmlnode; + cur->endonly = 1; + STACK_PUSH(stack, cur); + + cur = NULL; + + // now we can put the children on the stack + STACK_PUSH(stack, newelm); + // break, because we don't want to process xmlnode->next now + break; + } else { + // no children means, the end callback can be called directly + // after the begin callback (no intermediate nodes) + cur->node = NULL; + if(endcb(xmlnode, udata)) { + br = 1; + break; + } + } + + // continue with next node at this level + xmlnode = xmlnode->next; + } + if(br) { + break; // break because of an error + } + + if(cur && cur->node) { + //xmlNode *endNode = cur->parent ? cur->parent : cur->node; + xmlNode *endNode = cur->node; + if(endcb(endNode, udata)) { + break; + } + pool_free(pool, cur); + } + } + + // free all remaining elements + StackElm *elm = stack; + while(elm) { + StackElm *next = elm->next; + pool_free(pool, elm); + elm = next; + } + + return ret; +} + +/* ------------------- wsxml_get_required_namespaces ------------------- */ + +typedef struct WSNsCollector { + UcxAllocator *a; + UcxMap *nsmap; + WebdavNSList *def; + int error; +} WSNsCollector; + +static int nslist_node_begin(xmlNode *node, void *userdata) { + WSNsCollector *col = userdata; + // namespace required for all elements + if(node->type == XML_ELEMENT_NODE && node->ns) { + // we create a list of unique prefix-href namespaces by putting + // all namespaces in a map + sstr_t nskey = xml_namespace_key(col->a, node->ns); + if(!nskey.ptr) { + col->error = 1; + return 1; + } + if(ucx_map_sstr_put(col->nsmap, nskey, node->ns)) { + col->error = 1; + return 1; + } + + // collect all namespace definitions for removing these namespaces + // from col->nsmap later + WSNamespace *def = node->nsDef; + while(def) { + WebdavNSList *newdef = col->a->malloc( + col->a->pool, sizeof(WebdavNSList)); + if(!newdef) { + col->error = 1; + return 1; + } + newdef->namespace = def; + newdef->prev = NULL; + newdef->next = NULL; + // prepend newdef to the list + if(col->def) { + newdef->next = col->def; + col->def->prev = newdef; + } + col->def = newdef; + + // continue with next namespace definition + def = def->next; + } + } + return 0; +} + +static int nslist_node_end(xmlNode *node, void *userdata) { + return 0; +} + +WebdavNSList* wsxml_get_required_namespaces( + pool_handle_t *pool, + WSXmlNode *node, + int *error) +{ + if(error) *error = 0; + + UcxAllocator a = util_pool_allocator(pool); + UcxMap *nsmap = ucx_map_new_a(&a, 16); + if(!nsmap) { + if(error) *error = 1; + return NULL; + } + + WSNsCollector col; + col.a = &a; + col.nsmap = nsmap; + col.def = NULL; + + // iterate over all xml elements + // this will fill the hashmap with all namespaces + // all namespace definitions are added to col.def + WebdavNSList *list = NULL; + WebdavNSList *end = NULL; + if(wsxml_iterator(pool, node, nslist_node_begin, nslist_node_end, &col)) { + if(error) *error = 1; + } else { + // remove all namespace definitions from the map + // what we get is a map that contains all missing namespace definitions + WebdavNSList *def = col.def; + while(def) { + sstr_t nskey = xml_namespace_key(&a, def->namespace); + if(!nskey.ptr) { + if(error) *error = 1; + break; + } + ucx_map_sstr_remove(nsmap, nskey); + def = def->next; + } + + // convert nsmap to a list + UcxMapIterator i = ucx_map_iterator(nsmap); + WSNamespace *ns; + UCX_MAP_FOREACH(key, ns, i) { + WebdavNSList *newelm = pool_malloc(pool, sizeof(WebdavNSList)); + if(!newelm) { + if(error) *error = 1; + list = NULL; + break; + } + newelm->namespace = ns; + newelm->next = NULL; + newelm->prev = end; // NULL or the end of list + if(end) { + end->next = newelm; // append new element + } else { + list = newelm; // start new list + } + end = newelm; + } + } + + ucx_map_free(nsmap); + return list; +} + + +static ssize_t buf_writefunc(void *buf, const char *s, size_t len) { + int w = ucx_buffer_write(s, 1, len, buf); + return w == 0 ? IO_ERROR : w; +} + +WSXmlData* wsxml_node2data( + pool_handle_t *pool, + WSXmlNode *node) +{ + UcxBuffer *buf = ucx_buffer_new(NULL, 1024, UCX_BUFFER_AUTOEXTEND); + if(!buf) { + return NULL; + } + + int error = 0; + WebdavNSList *nslist = wsxml_get_required_namespaces(pool, node, &error); + if(error) { + return NULL; + } + + Writer writer; + char buffer[512]; + writer_init_with_stream(&writer, buf, buf_writefunc, buffer, 512); + + WSXmlData *data = NULL; + if(!wsxml_write_nodes(pool, &writer, NULL, node) && !writer_flush(&writer)) { + data = pool_malloc(pool, sizeof(WSXmlData)); + if(data) { + data->data = pool_malloc(pool, buf->size + 1); + if(data->data) { + memcpy(data->data, buf->space, buf->size); + data->data[buf->size] = '\0'; + data->length = buf->size; + data->namespaces = nslist; + } + } + } + + ucx_buffer_free(buf); + + return data; +} + +char* wsxml_nslist2string(pool_handle_t *pool, WebdavNSList *nslist) { + if(!nslist) return NULL; + + // get required string length + size_t len = 0; + WebdavNSList *elm = nslist; + while(elm) { + WSNamespace *ns = elm->namespace; + if(ns) { + if(ns->prefix) len += strlen((const char*)ns->prefix); + if(ns->href) len += strlen((const char*)ns->href); + len += 2; // 1 char for ':', 1 char for \n or \0 + } + elm = elm->next; + } + + // alloc string + char *str = pool_malloc(pool, len); + if(!str) { + return NULL; + } + char *pos = str; + + // copy namespace definitions to the string + elm = nslist; + while(elm) { + WSNamespace *ns = elm->namespace; + if(ns) { + if(ns->prefix) { + size_t prefixlen = strlen((const char*)ns->prefix); + memcpy(pos, ns->prefix, prefixlen); + pos[prefixlen] = ':'; + pos += prefixlen + 1; + } else { + pos[0] = ':'; + pos++; + } + if(ns->href) { + size_t hreflen = strlen((const char*)ns->href); + memcpy(pos, ns->href, hreflen); + pos[hreflen] = elm->next ? '\n' : '\0'; + pos += hreflen + 1; + } else { + pos[0] = elm->next ? '\n' : '\0'; + pos++; + } + } + elm = elm->next; + } + + return str; +} + +WebdavNSList* wsxml_string2nslist(pool_handle_t *pool, char *nsliststr) { + if(!nsliststr) return NULL; + size_t len = strlen(nsliststr); + WebdavNSList *list_start = NULL; + WebdavNSList *list_current = NULL; + + char *prefix = nsliststr; + size_t prefix_start = 0; + size_t prefix_len = 0; + char *href = NULL; + size_t href_start = len; + size_t i; + for(i=0;i<=len;i++) { + char c = nsliststr[i]; + if(c == '\n' || c == '\0') { + if(i > href_start) { + WebdavNSList *elm = pool_malloc(pool, sizeof(WebdavNSList)); + if(!elm) { + break; + } + elm->prev = list_current; + elm->next = NULL; + WSNamespace *ns = pool_malloc(pool, sizeof(WSNamespace)); + elm->namespace = ns; + if(!ns) { + break; + } + memset(ns, 0, sizeof(WSNamespace)); + ns->prefix = prefix_len > 0 ? (xmlChar*)sstrdup_pool(pool, sstrn(prefix, prefix_len)).ptr : NULL; + ns->href = (xmlChar*)sstrdup_pool(pool, sstrn(href, i-href_start)).ptr; + if(list_current) { + list_current->next = elm; + } else { + list_start = elm; + } + list_current = elm; + } + prefix_start = i + 1; + prefix = nsliststr + prefix_start; + prefix_len = 0; + href_start = len; + href = NULL; + } else if(!href && c == ':') { + prefix_len = i - prefix_start; + href_start = i + 1; + href = nsliststr + href_start; + } + } + + if(i < len) { + // error, cleanup + while(list_start) { + if(list_start->namespace) { + WSNamespace *ns = list_start->namespace; + if(ns->prefix) { + pool_free(pool, (char*)ns->prefix); + } + if(ns->href) { + pool_free(pool, (char*)ns->href); + } + pool_free(pool, ns); + } + WebdavNSList *next = list_start->next; + pool_free(pool, list_start); + list_start = next; + } + list_start = NULL; + } + + return list_start; +} + +/***************************************************************************** + * Non public functions + *****************************************************************************/ + +typedef struct XmlWriter { + /* + * Memory pool for temp memory allocations + */ + pool_handle_t *pool; + + /* + * Buffered output stream + */ + Writer *out; + + /* + * Map for all previously defined namespaces + * key: (char*) namespace prefix + * value: WSNamespace* + */ + UcxMap *namespaces; + + /* + * Should namespace definitions be created + */ + WSBool define_namespaces; +} XmlWriter; + +/* + * Serialize an XML text node + * This replaces some special characters with entity refs + * type: 0 = element text, 1 = attribute text + */ +static void xml_ser_text(Writer *out, int type, const char *text) { + size_t start = 0; + size_t i; + sstr_t entityref = { NULL, 0 }; + for(i=0;text[i]!='\0';i++) { + char c = text[i]; + if(c == '&') { + entityref = S("&"); + } else if(type == 0) { + if(c == '<') { + entityref = S("<"); + } else if(c == '>') { + entityref = S(">"); + } + } else { + if(c == '\"') { + entityref = S("""); + } else if(c == '\'') { + entityref = S("'"); + } + } + + if(entityref.ptr) { + size_t len = i-start; + if(len > 0) { + writer_put(out, text+start, len); + } + writer_puts(out, entityref); + entityref.ptr = NULL; + entityref.length = 0; + start = i+1; + } + } + size_t len = i-start; + if(len > 0) { + writer_put(out, text+start, len); + } +} + +/* + * Serialize an XML element node + */ +static void xml_ser_element(XmlWriter *xw, xmlNode *node) { + Writer *out = xw->out; + writer_putc(out, '<'); + + // write prefix and ':' + if(node->ns && node->ns->prefix) { + writer_puts(out, sstr((char*)node->ns->prefix)); + writer_putc(out, ':'); + } + + // node name + writer_puts(out, sstr((char*)node->name)); + + // namespace definitions + if(xw->define_namespaces) { + xmlNs *nsdef = node->nsDef; + while(nsdef) { + // we define only namespaces without prefix or namespaces + // with prefix, that are not already defined + // xw->namespaces contains all namespace, that were defined + // before xml serialization + if(!nsdef->prefix) { + writer_puts(out, S(" xmlns=\"")); + writer_puts(out, sstr((char*)nsdef->href)); + writer_putc(out, '"'); + } else { + WSNamespace *n = xw->namespaces ? + ucx_map_cstr_get(xw->namespaces, (char*)nsdef->prefix) : + NULL; + if(!n) { + writer_puts(out, S(" xmlns:")); + writer_puts(out, sstr((char*)nsdef->prefix)); + writer_puts(out, S("=\"")); + writer_puts(out, sstr((char*)nsdef->href)); + writer_putc(out, '"'); + } + } + + nsdef = nsdef->next; + } + } + + // attributes + xmlAttr *attr = node->properties; + while(attr) { + // format: ' [<prefix>:]<name>="<value>"' + writer_putc(out, ' '); + // optional namespace + if(attr->ns && attr->ns->prefix) { + writer_puts(out, sstr((char*)attr->ns->prefix)); + writer_putc(out, ':'); + } + // <name>=" + writer_puts(out, sstr((char*)attr->name)); + writer_puts(out, S("=\"")); + // value + xmlNode *value = attr->children; + while(value) { + if(value->content) { + xml_ser_text(out, 1, (const char*)value->content); + } + value = value->next; + } + // trailing quote + writer_putc(out, '"'); + + attr = attr->next; + } + + if(node->children) { + writer_putc(out, '>'); + } else { + writer_puts(out, S("/>")); + } +} + +static int xml_ser_node_begin(xmlNode *node, void *userdata) { + XmlWriter *xw = userdata; + switch(node->type) { + case XML_ELEMENT_NODE: xml_ser_element(xw, node); break; + case XML_ATTRIBUTE_NODE: break; + case XML_TEXT_NODE: { + xml_ser_text(xw->out, 0, (const char*)node->content); + break; + } + case XML_CDATA_SECTION_NODE: { + break; + } + case XML_ENTITY_REF_NODE: break; + case XML_ENTITY_NODE: break; + case XML_PI_NODE: break; + case XML_COMMENT_NODE: break; + case XML_DOCUMENT_NODE: break; + case XML_DOCUMENT_TYPE_NODE: break; + case XML_DOCUMENT_FRAG_NODE: break; + case XML_NOTATION_NODE: break; + case XML_HTML_DOCUMENT_NODE: break; + case XML_DTD_NODE: break; + case XML_ELEMENT_DECL: break; + case XML_ATTRIBUTE_DECL: break; + case XML_ENTITY_DECL: break; + case XML_NAMESPACE_DECL: break; + case XML_XINCLUDE_START: break; + case XML_XINCLUDE_END: break; + default: break; + } + return 0; +} + +static int xml_ser_node_end(xmlNode *node, void *userdata) { + XmlWriter *xw = userdata; + Writer *out = xw->out; + if(node->type == XML_ELEMENT_NODE) { + if(node->children) { + writer_puts(xw->out, S("</")); + // write prefix and ':' + if(node->ns && node->ns->prefix) { + writer_puts(out, sstr((char*)node->ns->prefix)); + writer_putc(out, ':'); + } + // name and close tag + writer_puts(out, sstr((char*)node->name)); + writer_putc(out, '>'); + + } // element was already closed in xml_ser_node_begin + } + return 0; +} + + +static int xml_write_nodes( + pool_handle_t *pool, + Writer *out, + UcxMap *nsdefs, + WSBool createdefs, + xmlNode *node) +{ + XmlWriter xmlwriter; + xmlwriter.pool = pool; + xmlwriter.out = out; + xmlwriter.namespaces = nsdefs; + xmlwriter.define_namespaces = createdefs; + + // iterate over xml nodes + // this includes node->children and node->next + int err = wsxml_iterator( + pool, + node, + xml_ser_node_begin, + xml_ser_node_end, + &xmlwriter); + if(err) { + return -1; + } + + return out->error; +} + +int wsxml_write_nodes( + pool_handle_t *pool, + Writer *out, + UcxMap *nsdefs, + xmlNode *node) +{ + return xml_write_nodes(pool, out, nsdefs, TRUE, node); +} + +int wsxml_write_nodes_without_nsdef( + pool_handle_t *pool, + Writer *out, + xmlNode *node) +{ + return xml_write_nodes(pool, out, NULL, FALSE, node); +}