src/server/webdav/xml.c

changeset 385
a1f4cb076d2f
parent 324
44cf877b3d9f
child 415
d938228c382e
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/server/webdav/xml.c	Sat Sep 24 16:26:10 2022 +0200
@@ -0,0 +1,734 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
+ *
+ * Copyright 2019 Olaf Wintermann. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <ucx/string.h>
+#include <ucx/map.h>
+#include <ucx/buffer.h>
+
+#include "../util/util.h"
+#include "../util/pool.h"
+
+#include "xml.h"
+
+/*****************************************************************************
+ *    Utility functions
+ *****************************************************************************/
+
+/*
+ * generates a string key for an xml namespace
+ * format: prefix '\0' href
+ */
+static sstr_t xml_namespace_key(UcxAllocator *a, WSNamespace *ns) {
+    sstr_t key = sstrcat_a(a, 3,
+            ns->prefix ? sstr((char*)ns->prefix) : S("\0"),
+            S("\0"),
+            sstr((char*)ns->href));
+    return key;
+}
+
+
+/*****************************************************************************
+ *    Public functions
+ *****************************************************************************/
+
+/* ------------------------ wsxml_iterator ------------------------ */
+
+typedef struct StackElm {
+    WSXmlNode *node; // list of nodes
+    //WSXmlNode *parent; // if not NULL, call endcb after node->next is NULL
+    int endonly;
+    struct StackElm *next;
+} StackElm;
+
+#define STACK_PUSH(stack, elm) if(stack) { elm->next = stack; } stack = elm;
+
+int wsxml_iterator(
+        pool_handle_t *pool,
+        WSXmlNode *node,
+        wsxml_func begincb,
+        wsxml_func endcb,
+        void *udata)
+{  
+    if(!node) {
+        return 0;
+    }
+    
+    StackElm *stack = pool_malloc(pool, sizeof(StackElm));
+    if(!stack) {
+        return 1; // OOM
+    }
+    stack->next = NULL;
+    stack->node = node;
+    stack->endonly = 0;
+    //stack->parent = NULL;
+    
+    int ret = 0;
+    int br = 0;
+    while(stack) {
+        StackElm *cur = stack;
+        WSXmlNode *xmlnode = cur->node; // get top stack element
+        stack = cur->next;              // and remove it
+        cur->next = NULL;
+        
+        while(xmlnode && !cur->endonly) {
+            // element begin callback
+            if(begincb(xmlnode, udata)) {
+                br = 1;
+                break; // I don't like break with labels - is this wrong?
+            }
+            
+            if(xmlnode->children) {
+                // put the children on the stack
+                // the next stack iteration will process the children
+                StackElm *newelm = pool_malloc(pool, sizeof(StackElm));
+                if(!newelm) {
+                    ret = 1;
+                    br = 1;
+                    break;
+                }
+                newelm->next = NULL;
+                newelm->node = xmlnode->children;
+                // setting the parent will make sure endcb will be called
+                // for the current xmlnode after all children are processed
+                //newelm->parent = xmlnode;
+                newelm->endonly = 0;
+                
+                // if xmlnode->next is not NULL, there are still nodes at
+                // this level, therefore we have to put these also on the
+                // stack
+                // this way, the remaining nodes are processed after all
+                // children and the end tag are processed
+                if(xmlnode->next) {
+                    StackElm *nextelm = pool_malloc(pool, sizeof(StackElm));
+                    if(!nextelm) {
+                        ret = 1;
+                        br = 1;
+                        break;
+                    }
+                    nextelm->node = xmlnode->next;
+                    nextelm->next = NULL;
+                    nextelm->endonly = 0;
+                    STACK_PUSH(stack, nextelm);
+                }
+                
+                // we have to put the end tag of the current element
+                // on the stack to ensure endcb is called for the current
+                // element, after all children are processed
+                // reuse cur
+                cur->node = xmlnode;
+                cur->endonly = 1;
+                STACK_PUSH(stack, cur);
+                
+                cur = NULL;
+                
+                // now we can put the children on the stack
+                STACK_PUSH(stack, newelm);
+                // break, because we don't want to process xmlnode->next now
+                break;
+            } else {
+                // no children means, the end callback can be called directly
+                // after the begin callback (no intermediate nodes)
+                cur->node = NULL;
+                if(endcb(xmlnode, udata)) {
+                    br = 1;
+                    break;
+                }
+            }
+            
+            // continue with next node at this level
+            xmlnode = xmlnode->next;
+        }
+        if(br) {
+            break; // break because of an error
+        }
+        
+        if(cur && cur->node) {
+            //xmlNode *endNode = cur->parent ? cur->parent : cur->node;
+            xmlNode *endNode = cur->node;
+            if(endcb(endNode, udata)) {
+                break;
+            }
+            pool_free(pool, cur);
+        }
+    }
+    
+    // free all remaining elements
+    StackElm *elm = stack;
+    while(elm) {
+        StackElm *next = elm->next;
+        pool_free(pool, elm);
+        elm = next;
+    }
+
+    return ret;
+}
+
+/* ------------------- wsxml_get_required_namespaces ------------------- */
+
+typedef struct WSNsCollector {
+    UcxAllocator *a;
+    UcxMap *nsmap;
+    WebdavNSList *def;
+    int error;
+} WSNsCollector;
+
+static int nslist_node_begin(xmlNode *node, void *userdata) {
+    WSNsCollector *col = userdata;
+    // namespace required for all elements
+    if(node->type == XML_ELEMENT_NODE && node->ns) {
+        // we create a list of unique prefix-href namespaces by putting
+        // all namespaces in a map
+        sstr_t nskey = xml_namespace_key(col->a, node->ns);
+        if(!nskey.ptr) {
+            col->error = 1;
+            return 1;
+        }
+        if(ucx_map_sstr_put(col->nsmap, nskey, node->ns)) {
+            col->error = 1;
+            return 1;
+        }
+        
+        // collect all namespace definitions for removing these namespaces
+        // from col->nsmap later
+        WSNamespace *def = node->nsDef;
+        while(def) {
+            WebdavNSList *newdef = col->a->malloc(
+                    col->a->pool, sizeof(WebdavNSList));
+            if(!newdef) {
+                col->error = 1;
+                return 1;
+            }
+            newdef->namespace = def;
+            newdef->prev = NULL;
+            newdef->next = NULL;
+            // prepend newdef to the list
+            if(col->def) {
+                newdef->next = col->def;
+                col->def->prev = newdef;
+            }
+            col->def = newdef;
+            
+            // continue with next namespace definition
+            def = def->next;
+        }
+    }
+    return 0;
+}
+
+static int nslist_node_end(xmlNode *node, void *userdata) {
+    return 0;
+}
+
+WebdavNSList* wsxml_get_required_namespaces(
+        pool_handle_t *pool,
+        WSXmlNode *node,
+        int *error)
+{
+    if(error) *error = 0;
+    
+    UcxAllocator a = util_pool_allocator(pool);
+    UcxMap *nsmap = ucx_map_new_a(&a, 16);
+    if(!nsmap) {
+        if(error) *error = 1;
+        return NULL;
+    }
+    
+    WSNsCollector col;
+    col.a = &a;
+    col.nsmap = nsmap;
+    col.def = NULL;
+    
+    // iterate over all xml elements
+    // this will fill the hashmap with all namespaces
+    // all namespace definitions are added to col.def
+    WebdavNSList *list = NULL;
+    WebdavNSList *end = NULL;
+    if(wsxml_iterator(pool, node, nslist_node_begin, nslist_node_end, &col)) {
+        if(error) *error = 1;
+    } else {
+        // remove all namespace definitions from the map
+        // what we get is a map that contains all missing namespace definitions
+        WebdavNSList *def = col.def;
+        while(def) {
+            sstr_t nskey = xml_namespace_key(&a, def->namespace);
+            if(!nskey.ptr) {
+                if(error) *error = 1;
+                break;
+            }
+            ucx_map_sstr_remove(nsmap, nskey);
+            def = def->next;
+        }
+        
+        // convert nsmap to a list
+        UcxMapIterator i = ucx_map_iterator(nsmap);
+        WSNamespace *ns;
+        UCX_MAP_FOREACH(key, ns, i) {
+            WebdavNSList *newelm = pool_malloc(pool, sizeof(WebdavNSList));
+            if(!newelm) {
+                if(error) *error = 1;
+                list = NULL;
+                break;
+            }
+            newelm->namespace = ns;
+            newelm->next = NULL;
+            newelm->prev = end; // NULL or the end of list
+            if(end) {
+                end->next = newelm; // append new element
+            } else {
+                list = newelm; // start new list
+            }
+            end = newelm;
+        }
+    }
+    
+    ucx_map_free(nsmap);
+    return list;
+}
+
+
+static ssize_t buf_writefunc(void *buf, const char *s, size_t len) {
+    int w = ucx_buffer_write(s, 1, len, buf);
+    return w == 0 ? IO_ERROR : w;
+}
+
+WSXmlData* wsxml_node2data(
+        pool_handle_t *pool,
+        WSXmlNode *node)
+{
+    UcxBuffer *buf = ucx_buffer_new(NULL, 1024, UCX_BUFFER_AUTOEXTEND);
+    if(!buf) {
+        return NULL;
+    }
+    
+    int error = 0;
+    WebdavNSList *nslist = wsxml_get_required_namespaces(pool, node, &error);
+    if(error) {
+        return NULL;
+    }
+    
+    Writer writer;
+    char buffer[512];
+    writer_init_with_stream(&writer, buf, buf_writefunc, buffer, 512);
+    
+    WSXmlData *data = NULL;
+    if(!wsxml_write_nodes(pool, &writer, NULL, node) && !writer_flush(&writer)) {
+        data = pool_malloc(pool, sizeof(WSXmlData));
+        if(data) {
+            data->data = pool_malloc(pool, buf->size + 1);
+            if(data->data) {
+                memcpy(data->data, buf->space, buf->size);
+                data->data[buf->size] = '\0';
+                data->length = buf->size;
+                data->namespaces = nslist;
+            }
+        }
+    }
+    
+    ucx_buffer_free(buf);
+    
+    return data;
+}
+
+char* wsxml_nslist2string(pool_handle_t *pool, WebdavNSList *nslist) {
+    if(!nslist) return NULL;
+    
+    // get required string length
+    size_t len = 0;
+    WebdavNSList *elm = nslist;
+    while(elm) {
+        WSNamespace *ns = elm->namespace;
+        if(ns) {
+            if(ns->prefix) len += strlen((const char*)ns->prefix);
+            if(ns->href) len += strlen((const char*)ns->href);
+            len += 2; // 1 char for ':', 1 char for \n or \0
+        }
+        elm = elm->next;
+    }
+    
+    // alloc string
+    char *str = pool_malloc(pool, len);
+    if(!str) {
+        return NULL;
+    }
+    char *pos = str;
+    
+    // copy namespace definitions to the string
+    elm = nslist;
+    while(elm) {
+        WSNamespace *ns = elm->namespace;
+        if(ns) {
+            if(ns->prefix) {
+                size_t prefixlen = strlen((const char*)ns->prefix);
+                memcpy(pos, ns->prefix, prefixlen);
+                pos[prefixlen] = ':';
+                pos += prefixlen + 1;
+            } else {
+                pos[0] = ':';
+                pos++;
+            }
+            if(ns->href) {
+                size_t hreflen = strlen((const char*)ns->href);
+                memcpy(pos, ns->href, hreflen);
+                pos[hreflen] = elm->next ? '\n' : '\0';
+                pos += hreflen + 1;
+            } else {
+                pos[0] = elm->next ? '\n' : '\0';
+                pos++;
+            }      
+        }
+        elm = elm->next;
+    }
+    
+    return str;
+}
+
+WebdavNSList* wsxml_string2nslist(pool_handle_t *pool, char *nsliststr) {
+    if(!nsliststr) return NULL;
+    size_t len = strlen(nsliststr);
+    WebdavNSList *list_start = NULL;
+    WebdavNSList *list_current = NULL;
+    
+    char *prefix = nsliststr;
+    size_t prefix_start = 0;
+    size_t prefix_len = 0;
+    char *href = NULL;
+    size_t href_start = len;
+    size_t i;
+    for(i=0;i<=len;i++) {
+        char c = nsliststr[i];
+        if(c == '\n' || c == '\0') {
+            if(i > href_start) {
+                WebdavNSList *elm = pool_malloc(pool, sizeof(WebdavNSList));
+                if(!elm) {
+                    break;
+                }
+                elm->prev = list_current;
+                elm->next = NULL;
+                WSNamespace *ns = pool_malloc(pool, sizeof(WSNamespace));
+                elm->namespace = ns;
+                if(!ns) {
+                    break;
+                }
+                memset(ns, 0, sizeof(WSNamespace));
+                ns->prefix = prefix_len > 0 ? (xmlChar*)sstrdup_pool(pool, sstrn(prefix, prefix_len)).ptr : NULL;
+                ns->href = (xmlChar*)sstrdup_pool(pool, sstrn(href, i-href_start)).ptr;
+                if(list_current) {
+                    list_current->next = elm;
+                } else {
+                    list_start = elm; 
+                }
+                list_current = elm;
+            }
+            prefix_start = i + 1;
+            prefix = nsliststr + prefix_start;
+            prefix_len = 0;
+            href_start = len;
+            href = NULL;
+        } else if(!href && c == ':') {
+            prefix_len = i - prefix_start;
+            href_start = i + 1;
+            href = nsliststr + href_start;
+        }
+    }
+    
+    if(i < len) {
+        // error, cleanup
+        while(list_start) {
+            if(list_start->namespace) {
+                WSNamespace *ns = list_start->namespace;
+                if(ns->prefix) {
+                    pool_free(pool, (char*)ns->prefix);
+                }
+                if(ns->href) {
+                    pool_free(pool, (char*)ns->href);
+                }
+                pool_free(pool, ns);
+            }
+            WebdavNSList *next = list_start->next;
+            pool_free(pool, list_start);
+            list_start = next;
+        }
+        list_start = NULL;
+    }
+    
+    return list_start;
+}
+
+/*****************************************************************************
+ *    Non public functions
+ *****************************************************************************/
+
+typedef struct XmlWriter {
+    /*
+     * Memory pool for temp memory allocations
+     */
+    pool_handle_t *pool;
+    
+    /*
+     * Buffered output stream
+     */
+    Writer *out;
+    
+    /*
+     * Map for all previously defined namespaces
+     * key: (char*) namespace prefix
+     * value: WSNamespace*
+     */
+    UcxMap *namespaces;
+    
+    /*
+     * Should namespace definitions be created
+     */
+    WSBool define_namespaces;
+} XmlWriter;
+
+/*
+ * Serialize an XML text node
+ * This replaces some special characters with entity refs
+ * type: 0 = element text, 1 = attribute text
+ */
+static void xml_ser_text(Writer *out, int type, const char *text) {
+    size_t start = 0;
+    size_t i;
+    sstr_t entityref = { NULL, 0 };
+    for(i=0;text[i]!='\0';i++) {
+        char c = text[i];
+        if(c == '&') {
+            entityref = S("&amp;");
+        } else if(type == 0) {
+            if(c == '<') {
+                entityref = S("&lt;");
+            } else if(c == '>') {
+                entityref = S("&gt;");
+            }
+        } else {
+            if(c == '\"') {
+                entityref = S("&quot;");
+            } else if(c == '\'') {
+                entityref = S("&apos;");
+            }
+        }
+        
+        if(entityref.ptr) {
+            size_t len = i-start;
+            if(len > 0) {
+                writer_put(out, text+start, len);
+            }
+            writer_puts(out, entityref);
+            entityref.ptr = NULL;
+            entityref.length = 0;
+            start = i+1;
+        }
+    }
+    size_t len = i-start;
+    if(len > 0) {
+        writer_put(out, text+start, len);
+    }
+}
+
+/*
+ * Serialize an XML element node
+ */
+static void xml_ser_element(XmlWriter *xw, xmlNode *node) {
+    Writer *out = xw->out;
+    writer_putc(out, '<');
+    
+    // write prefix and ':'
+    if(node->ns && node->ns->prefix) {
+        writer_puts(out, sstr((char*)node->ns->prefix));
+        writer_putc(out, ':');
+    }
+    
+    // node name
+    writer_puts(out, sstr((char*)node->name));
+    
+    // namespace definitions
+    if(xw->define_namespaces) {
+        xmlNs *nsdef = node->nsDef;
+        while(nsdef) {
+            // we define only namespaces without prefix or namespaces
+            // with prefix, that are not already defined
+            // xw->namespaces contains all namespace, that were defined
+            // before xml serialization
+            if(!nsdef->prefix) {
+                writer_puts(out, S(" xmlns=\""));
+                writer_puts(out, sstr((char*)nsdef->href));
+                writer_putc(out, '"');
+            } else {
+                WSNamespace *n = xw->namespaces ?
+                    ucx_map_cstr_get(xw->namespaces, (char*)nsdef->prefix) :
+                    NULL;
+                if(!n) {
+                    writer_puts(out, S(" xmlns:"));
+                    writer_puts(out, sstr((char*)nsdef->prefix));
+                    writer_puts(out, S("=\""));
+                    writer_puts(out, sstr((char*)nsdef->href));
+                    writer_putc(out, '"');
+                }
+            }
+            
+            nsdef = nsdef->next;
+        }
+    }
+    
+    // attributes
+    xmlAttr *attr = node->properties;
+    while(attr) {
+        // format: ' [<prefix>:]<name>="<value>"'
+        writer_putc(out, ' ');
+        // optional namespace
+        if(attr->ns && attr->ns->prefix) {
+            writer_puts(out, sstr((char*)attr->ns->prefix));
+            writer_putc(out, ':');
+        }
+        // <name>="
+        writer_puts(out, sstr((char*)attr->name));
+        writer_puts(out, S("=\""));
+        // value
+        xmlNode *value = attr->children;
+        while(value) {
+            if(value->content) {
+                xml_ser_text(out, 1, (const char*)value->content);
+            }
+            value = value->next;
+        }
+        // trailing quote
+        writer_putc(out, '"');
+        
+        attr = attr->next;
+    }
+    
+    if(node->children) {
+        writer_putc(out, '>');
+    } else {
+        writer_puts(out, S("/>"));
+    }
+}
+
+static int xml_ser_node_begin(xmlNode *node, void *userdata) {
+    XmlWriter *xw = userdata;
+    switch(node->type) {
+        case XML_ELEMENT_NODE: xml_ser_element(xw, node); break;
+        case XML_ATTRIBUTE_NODE: break;
+        case XML_TEXT_NODE: {
+            xml_ser_text(xw->out, 0, (const char*)node->content);
+            break;
+        }
+        case XML_CDATA_SECTION_NODE: {
+            break;
+        }
+        case XML_ENTITY_REF_NODE: break;
+        case XML_ENTITY_NODE: break;
+        case XML_PI_NODE: break;
+        case XML_COMMENT_NODE: break;
+        case XML_DOCUMENT_NODE: break;
+        case XML_DOCUMENT_TYPE_NODE: break;
+        case XML_DOCUMENT_FRAG_NODE: break;
+        case XML_NOTATION_NODE: break;
+        case XML_HTML_DOCUMENT_NODE: break;
+        case XML_DTD_NODE: break;
+        case XML_ELEMENT_DECL: break;
+        case XML_ATTRIBUTE_DECL: break;
+        case XML_ENTITY_DECL: break;
+        case XML_NAMESPACE_DECL: break;
+        case XML_XINCLUDE_START: break;
+        case XML_XINCLUDE_END: break;
+        default: break;
+    }
+    return 0;
+}
+
+static int xml_ser_node_end(xmlNode *node, void *userdata) {
+    XmlWriter *xw = userdata;
+    Writer *out = xw->out;
+    if(node->type == XML_ELEMENT_NODE) {
+        if(node->children) {
+            writer_puts(xw->out, S("</"));
+            // write prefix and ':'
+            if(node->ns && node->ns->prefix) {
+                writer_puts(out, sstr((char*)node->ns->prefix));
+                writer_putc(out, ':');
+            }
+            // name and close tag
+            writer_puts(out, sstr((char*)node->name));
+            writer_putc(out, '>');
+            
+        } // element was already closed in xml_ser_node_begin
+    }
+    return 0;
+}
+
+
+static int xml_write_nodes(
+        pool_handle_t *pool,
+        Writer *out,
+        UcxMap *nsdefs,
+        WSBool createdefs,
+        xmlNode *node)
+{
+    XmlWriter xmlwriter;
+    xmlwriter.pool = pool;
+    xmlwriter.out = out;
+    xmlwriter.namespaces = nsdefs;
+    xmlwriter.define_namespaces = createdefs;
+    
+    // iterate over xml nodes
+    // this includes node->children and node->next
+    int err = wsxml_iterator(
+            pool,
+            node,
+            xml_ser_node_begin,
+            xml_ser_node_end,
+            &xmlwriter);
+    if(err) {
+        return -1;
+    }
+    
+    return out->error;
+}
+
+int wsxml_write_nodes(
+        pool_handle_t *pool,
+        Writer *out,
+        UcxMap *nsdefs,
+        xmlNode *node)
+{
+    return xml_write_nodes(pool, out, nsdefs, TRUE, node);
+}
+
+int wsxml_write_nodes_without_nsdef(
+        pool_handle_t *pool,
+        Writer *out,
+        xmlNode *node)
+{
+    return xml_write_nodes(pool, out, NULL, FALSE, node);
+}

mercurial