src/server/webdav/xml.c

Wed, 27 Nov 2024 23:00:07 +0100

author
Olaf Wintermann <olaf.wintermann@gmail.com>
date
Wed, 27 Nov 2024 23:00:07 +0100
changeset 563
6ca97c99173e
parent 490
d218607f5a7e
permissions
-rw-r--r--

add TODO to use a future ucx feature

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2019 Olaf Wintermann. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <cx/string.h>
#include <cx/map.h>
#include <cx/hash_map.h>
#include <cx/buffer.h>
#include <cx/linked_list.h>

#include "../util/util.h"
#include "../util/pool.h"

#include "xml.h"

/*****************************************************************************
 *    Utility functions
 *****************************************************************************/

/*
 * generates a string key for an xml namespace
 * format: prefix '\0' href
 */
static CxHashKey xml_namespace_key(CxAllocator *a, WSNamespace *ns) {
    cxmutstr key_data = cx_strcat_a(a, 3,
            ns->prefix ? cx_str((char*)ns->prefix) : cx_strn("\0", 1),
            cx_strn("\0", 1),
            cx_str((char*)ns->href));
    return cx_hash_key_bytes((unsigned char*)key_data.ptr, key_data.length);
}


/*****************************************************************************
 *    Public functions
 *****************************************************************************/

/* ------------------------ wsxml_iterator ------------------------ */

typedef struct StackElm {
    WSXmlNode *node; // list of nodes
    //WSXmlNode *parent; // if not NULL, call endcb after node->next is NULL
    int endonly;
    struct StackElm *next;
} StackElm;

#define STACK_PUSH(stack, elm) if(stack) { elm->next = stack; } stack = elm;

int wsxml_iterator(
        pool_handle_t *pool,
        WSXmlNode *node,
        wsxml_func begincb,
        wsxml_func endcb,
        void *udata)
{  
    if(!node) {
        return 0;
    }
    
    StackElm *stack = pool_malloc(pool, sizeof(StackElm));
    if(!stack) {
        return 1; // OOM
    }
    stack->next = NULL;
    stack->node = node;
    stack->endonly = 0;
    //stack->parent = NULL;
    
    int ret = 0;
    int br = 0;
    while(stack) {
        StackElm *cur = stack;
        WSXmlNode *xmlnode = cur->node; // get top stack element
        stack = cur->next;              // and remove it
        cur->next = NULL;
        
        while(xmlnode && !cur->endonly) {
            // element begin callback
            if(begincb(xmlnode, udata)) {
                br = 1;
                break; // I don't like break with labels - is this wrong?
            }
            
            if(xmlnode->children) {
                // put the children on the stack
                // the next stack iteration will process the children
                StackElm *newelm = pool_malloc(pool, sizeof(StackElm));
                if(!newelm) {
                    ret = 1;
                    br = 1;
                    break;
                }
                newelm->next = NULL;
                newelm->node = xmlnode->children;
                // setting the parent will make sure endcb will be called
                // for the current xmlnode after all children are processed
                //newelm->parent = xmlnode;
                newelm->endonly = 0;
                
                // if xmlnode->next is not NULL, there are still nodes at
                // this level, therefore we have to put these also on the
                // stack
                // this way, the remaining nodes are processed after all
                // children and the end tag are processed
                if(xmlnode->next) {
                    StackElm *nextelm = pool_malloc(pool, sizeof(StackElm));
                    if(!nextelm) {
                        ret = 1;
                        br = 1;
                        break;
                    }
                    nextelm->node = xmlnode->next;
                    nextelm->next = NULL;
                    nextelm->endonly = 0;
                    STACK_PUSH(stack, nextelm);
                }
                
                // we have to put the end tag of the current element
                // on the stack to ensure endcb is called for the current
                // element, after all children are processed
                // reuse cur
                cur->node = xmlnode;
                cur->endonly = 1;
                STACK_PUSH(stack, cur);
                
                cur = NULL;
                
                // now we can put the children on the stack
                STACK_PUSH(stack, newelm);
                // break, because we don't want to process xmlnode->next now
                break;
            } else {
                // no children means, the end callback can be called directly
                // after the begin callback (no intermediate nodes)
                cur->node = NULL;
                if(endcb(xmlnode, udata)) {
                    br = 1;
                    break;
                }
            }
            
            // continue with next node at this level
            xmlnode = xmlnode->next;
        }
        if(br) {
            break; // break because of an error
        }
        
        if(cur && cur->node) {
            //xmlNode *endNode = cur->parent ? cur->parent : cur->node;
            xmlNode *endNode = cur->node;
            if(endcb(endNode, udata)) {
                break;
            }
            pool_free(pool, cur);
        }
    }
    
    // free all remaining elements
    StackElm *elm = stack;
    while(elm) {
        StackElm *next = elm->next;
        pool_free(pool, elm);
        elm = next;
    }

    return ret;
}

/* ------------------- wsxml_get_required_namespaces ------------------- */

typedef struct WSNsCollector {
    CxAllocator *a;
    CxMap *nsmap;
    WebdavNSList *def;
    int error;
} WSNsCollector;

static int nslist_node_begin(xmlNode *node, void *userdata) {
    WSNsCollector *col = userdata;
    // namespace required for all elements
    if(node->type == XML_ELEMENT_NODE && node->ns) {
        // we create a list of unique prefix-href namespaces by putting
        // all namespaces in a map
        CxHashKey nskey = xml_namespace_key(col->a, node->ns);
        if(!nskey.data) {
            col->error = 1;
            return 1;
        }
        if(cxMapPut(col->nsmap, nskey, node->ns)) {
            col->error = 1;
            return 1;
        }
        
        // collect all namespace definitions for removing these namespaces
        // from col->nsmap later
        WSNamespace *def = node->nsDef;
        while(def) {
            WebdavNSList *newdef = cxMalloc(col->a, sizeof(WebdavNSList));
            if(!newdef) {
                col->error = 1;
                return 1;
            }
            newdef->namespace = def;
            newdef->prev = NULL;
            newdef->next = NULL;
            // prepend newdef to the list
            if(col->def) {
                newdef->next = col->def;
                col->def->prev = newdef;
            }
            col->def = newdef;
            
            // continue with next namespace definition
            def = def->next;
        }
    }
    return 0;
}

static int nslist_node_end(xmlNode *node, void *userdata) {
    return 0;
}

WebdavNSList* wsxml_get_required_namespaces(
        pool_handle_t *pool,
        WSXmlNode *node,
        int *error)
{
    if(error) *error = 0;
    
    CxAllocator *a = pool_allocator(pool);
    CxMap *nsmap = cxHashMapCreate(a, CX_STORE_POINTERS, 16);
    if(!nsmap) {
        if(error) *error = 1;
        return NULL;
    }
    
    WSNsCollector col;
    col.a = a;
    col.nsmap = nsmap;
    col.def = NULL;
    
    // iterate over all xml elements
    // this will fill the hashmap with all namespaces
    // all namespace definitions are added to col.def
    WebdavNSList *list = NULL;
    WebdavNSList *end = NULL;
    if(wsxml_iterator(pool, node, nslist_node_begin, nslist_node_end, &col)) {
        if(error) *error = 1;
    } else {
        // remove all namespace definitions from the map
        // what we get is a map that contains all missing namespace definitions
        WebdavNSList *def = col.def;
        while(def) {
            CxHashKey nskey = xml_namespace_key(a, def->namespace);
            if(!nskey.data) {
                if(error) *error = 1;
                break;
            }
            cxMapRemove(nsmap, nskey);
            def = def->next;
        }
        
        // convert nsmap to a list
        CxIterator i = cxMapIteratorValues(nsmap);
        WSNamespace *ns;
        cx_foreach(WSNamespace *, ns, i) {
            WebdavNSList *newelm = pool_malloc(pool, sizeof(WebdavNSList));
            if(!newelm) {
                if(error) *error = 1;
                list = NULL;
                break;
            }
            newelm->namespace = ns;
            newelm->next = NULL;
            newelm->prev = NULL;
            cx_linked_list_add((void**)&list, (void**)&end, offsetof(WebdavNSList, prev), offsetof(WebdavNSList, next), newelm);
        }
    }
    
    cxMapDestroy(nsmap);
    return list;
}


static ssize_t buf_writefunc(void *buf, const char *s, size_t len) {
    int w = cxBufferWrite(s, 1, len, buf);
    return w == 0 ? IO_ERROR : w;
}

WSXmlData* wsxml_node2data(
        pool_handle_t *pool,
        WSXmlNode *node)
{
    CxBuffer buf;
    if(cxBufferInit(&buf, NULL, 1024, pool_allocator(pool), CX_BUFFER_AUTO_EXTEND|CX_BUFFER_FREE_CONTENTS)) {
        return NULL;
    }
    
    int error = 0;
    WebdavNSList *nslist = wsxml_get_required_namespaces(pool, node, &error);
    if(error) {
        return NULL;
    }
    
    Writer writer;
    char buffer[512];
    writer_init_with_stream(&writer, &buf, buf_writefunc, buffer, 512);
    
    WSXmlData *data = NULL;
    if(!wsxml_write_nodes(pool, &writer, NULL, node) && !writer_flush(&writer)) {
        data = pool_malloc(pool, sizeof(WSXmlData));
        if(data) {
            data->data = pool_malloc(pool, buf.size + 1);
            if(data->data) {
                memcpy(data->data, buf.space, buf.size);
                data->data[buf.size] = '\0';
                data->length = buf.size;
                data->namespaces = nslist;
            }
        }
    }
    
    cxBufferDestroy(&buf);
    
    return data;
}

char* wsxml_nslist2string(pool_handle_t *pool, WebdavNSList *nslist) {
    if(!nslist) return NULL;
    
    // get required string length
    size_t len = 0;
    WebdavNSList *elm = nslist;
    while(elm) {
        WSNamespace *ns = elm->namespace;
        if(ns) {
            if(ns->prefix) len += strlen((const char*)ns->prefix);
            if(ns->href) len += strlen((const char*)ns->href);
            len += 2; // 1 char for ':', 1 char for \n or \0
        }
        elm = elm->next;
    }
    
    // alloc string
    char *str = pool_malloc(pool, len);
    if(!str) {
        return NULL;
    }
    char *pos = str;
    
    // copy namespace definitions to the string
    elm = nslist;
    while(elm) {
        WSNamespace *ns = elm->namespace;
        if(ns) {
            if(ns->prefix) {
                size_t prefixlen = strlen((const char*)ns->prefix);
                memcpy(pos, ns->prefix, prefixlen);
                pos[prefixlen] = ':';
                pos += prefixlen + 1;
            } else {
                pos[0] = ':';
                pos++;
            }
            if(ns->href) {
                size_t hreflen = strlen((const char*)ns->href);
                memcpy(pos, ns->href, hreflen);
                pos[hreflen] = elm->next ? '\n' : '\0';
                pos += hreflen + 1;
            } else {
                pos[0] = elm->next ? '\n' : '\0';
                pos++;
            }      
        }
        elm = elm->next;
    }
    
    return str;
}

WebdavNSList* wsxml_string2nslist(pool_handle_t *pool, char *nsliststr) {
    if(!nsliststr) return NULL;
    size_t len = strlen(nsliststr);
    WebdavNSList *list_start = NULL;
    WebdavNSList *list_current = NULL;
    
    char *prefix = nsliststr;
    size_t prefix_start = 0;
    size_t prefix_len = 0;
    char *href = NULL;
    size_t href_start = len;
    size_t i;
    for(i=0;i<=len;i++) {
        char c = nsliststr[i];
        if(c == '\n' || c == '\0') {
            if(i > href_start) {
                WebdavNSList *elm = pool_malloc(pool, sizeof(WebdavNSList));
                if(!elm) {
                    break;
                }
                elm->prev = list_current;
                elm->next = NULL;
                WSNamespace *ns = pool_malloc(pool, sizeof(WSNamespace));
                elm->namespace = ns;
                if(!ns) {
                    break;
                }
                memset(ns, 0, sizeof(WSNamespace));
                ns->prefix = prefix_len > 0 ? (xmlChar*)cx_strdup_pool(pool, cx_mutstrn(prefix, prefix_len)).ptr : NULL;
                ns->href = (xmlChar*)cx_strdup_pool(pool, cx_mutstrn(href, i-href_start)).ptr;
                if(list_current) {
                    list_current->next = elm;
                } else {
                    list_start = elm; 
                }
                list_current = elm;
            }
            prefix_start = i + 1;
            prefix = nsliststr + prefix_start;
            prefix_len = 0;
            href_start = len;
            href = NULL;
        } else if(!href && c == ':') {
            prefix_len = i - prefix_start;
            href_start = i + 1;
            href = nsliststr + href_start;
        }
    }
    
    if(i < len) {
        // error, cleanup
        while(list_start) {
            if(list_start->namespace) {
                WSNamespace *ns = list_start->namespace;
                if(ns->prefix) {
                    pool_free(pool, (char*)ns->prefix);
                }
                if(ns->href) {
                    pool_free(pool, (char*)ns->href);
                }
                pool_free(pool, ns);
            }
            WebdavNSList *next = list_start->next;
            pool_free(pool, list_start);
            list_start = next;
        }
        list_start = NULL;
    }
    
    return list_start;
}

/*****************************************************************************
 *    Non public functions
 *****************************************************************************/

typedef struct XmlWriter {
    /*
     * Memory pool for temp memory allocations
     */
    pool_handle_t *pool;
    
    /*
     * Buffered output stream
     */
    Writer *out;
    
    /*
     * Map for all previously defined namespaces
     * key: (char*) namespace prefix
     * value: WSNamespace*
     */
    CxMap *namespaces;
    
    /*
     * Should namespace definitions be created
     */
    WSBool define_namespaces;
} XmlWriter;

/*
 * Serialize an XML text node
 * This replaces some special characters with entity refs
 * type: 0 = element text, 1 = attribute text
 */
static void xml_ser_text(Writer *out, int type, const char *text) {
    size_t start = 0;
    size_t i;
    cxstring entityref = { NULL, 0 };
    for(i=0;text[i]!='\0';i++) {
        char c = text[i];
        if(c == '&') {
            entityref = (cxstring)CX_STR("&amp;");
        } else if(type == 0) {
            if(c == '<') {
                entityref = (cxstring)CX_STR("&lt;");
            } else if(c == '>') {
                entityref = (cxstring)CX_STR("&gt;");
            }
        } else {
            if(c == '\"') {
                entityref = (cxstring)CX_STR("&quot;");
            } else if(c == '\'') {
                entityref = (cxstring)CX_STR("&apos;");
            }
        }
        
        if(entityref.ptr) {
            size_t len = i-start;
            if(len > 0) {
                writer_put(out, text+start, len);
            }
            writer_puts(out, entityref);
            entityref.ptr = NULL;
            entityref.length = 0;
            start = i+1;
        }
    }
    size_t len = i-start;
    if(len > 0) {
        writer_put(out, text+start, len);
    }
}

/*
 * Serialize an XML element node
 */
static void xml_ser_element(XmlWriter *xw, xmlNode *node) {
    Writer *out = xw->out;
    writer_putc(out, '<');
    
    // write prefix and ':'
    if(node->ns && node->ns->prefix) {
        writer_puts(out, cx_str((char*)node->ns->prefix));
        writer_putc(out, ':');
    }
    
    // node name
    writer_puts(out, cx_str((char*)node->name));
    
    // namespace definitions
    if(xw->define_namespaces) {
        xmlNs *nsdef = node->nsDef;
        while(nsdef) {
            // we define only namespaces without prefix or namespaces
            // with prefix, that are not already defined
            // xw->namespaces contains all namespace, that were defined
            // before xml serialization
            if(!nsdef->prefix) {
                writer_put_lit(out, " xmlns=\"");
                writer_put_str(out, (char*)nsdef->href);
                writer_putc   (out, '"');
            } else {
                WSNamespace *n = xw->namespaces ?
                    cxMapGet(xw->namespaces, cx_hash_key_str((const char*)nsdef->prefix)) :
                    NULL;
                if(!n) {
                    writer_put_lit(out, " xmlns:");
                    writer_put_str(out, (const char*)nsdef->prefix);
                    writer_put_lit(out, "=\"");
                    writer_put_str(out, (const char*)nsdef->href);
                    writer_putc   (out, '"');
                }
            }
            
            nsdef = nsdef->next;
        }
    }
    
    // attributes
    xmlAttr *attr = node->properties;
    while(attr) {
        // format: ' [<prefix>:]<name>="<value>"'
        writer_putc(out, ' ');
        // optional namespace
        if(attr->ns && attr->ns->prefix) {
            writer_puts(out, cx_str((char*)attr->ns->prefix));
            writer_putc(out, ':');
        }
        // <name>="
        writer_put_str(out, (char*)attr->name);
        writer_put_lit(out, "=\"");
        // value
        xmlNode *value = attr->children;
        while(value) {
            if(value->content) {
                xml_ser_text(out, 1, (const char*)value->content);
            }
            value = value->next;
        }
        // trailing quote
        writer_putc(out, '"');
        
        attr = attr->next;
    }
    
    if(node->children) {
        writer_putc(out, '>');
    } else {
        writer_put_lit(out, "/>");
    }
}

static int xml_ser_node_begin(xmlNode *node, void *userdata) {
    XmlWriter *xw = userdata;
    switch(node->type) {
        case XML_ELEMENT_NODE: xml_ser_element(xw, node); break;
        case XML_ATTRIBUTE_NODE: break;
        case XML_TEXT_NODE: {
            xml_ser_text(xw->out, 0, (const char*)node->content);
            break;
        }
        case XML_CDATA_SECTION_NODE: {
            break;
        }
        case XML_ENTITY_REF_NODE: break;
        case XML_ENTITY_NODE: break;
        case XML_PI_NODE: break;
        case XML_COMMENT_NODE: break;
        case XML_DOCUMENT_NODE: break;
        case XML_DOCUMENT_TYPE_NODE: break;
        case XML_DOCUMENT_FRAG_NODE: break;
        case XML_NOTATION_NODE: break;
        case XML_HTML_DOCUMENT_NODE: break;
        case XML_DTD_NODE: break;
        case XML_ELEMENT_DECL: break;
        case XML_ATTRIBUTE_DECL: break;
        case XML_ENTITY_DECL: break;
        case XML_NAMESPACE_DECL: break;
        case XML_XINCLUDE_START: break;
        case XML_XINCLUDE_END: break;
        default: break;
    }
    return 0;
}

static int xml_ser_node_end(xmlNode *node, void *userdata) {
    XmlWriter *xw = userdata;
    Writer *out = xw->out;
    if(node->type == XML_ELEMENT_NODE) {
        if(node->children) {
            writer_put_lit(xw->out, "</");
            // write prefix and ':'
            if(node->ns && node->ns->prefix) {
                writer_puts(out, cx_str((char*)node->ns->prefix));
                writer_putc(out, ':');
            }
            // name and close tag
            writer_puts(out, cx_str((char*)node->name));
            writer_putc(out, '>');
            
        } // element was already closed in xml_ser_node_begin
    }
    return 0;
}


static int xml_write_nodes(
        pool_handle_t *pool,
        Writer *out,
        CxMap *nsdefs,
        WSBool createdefs,
        xmlNode *node)
{
    XmlWriter xmlwriter;
    xmlwriter.pool = pool;
    xmlwriter.out = out;
    xmlwriter.namespaces = nsdefs;
    xmlwriter.define_namespaces = createdefs;
    
    // iterate over xml nodes
    // this includes node->children and node->next
    int err = wsxml_iterator(
            pool,
            node,
            xml_ser_node_begin,
            xml_ser_node_end,
            &xmlwriter);
    if(err) {
        return -1;
    }
    
    return out->error;
}

int wsxml_write_nodes(
        pool_handle_t *pool,
        Writer *out,
        CxMap *nsdefs,
        xmlNode *node)
{
    return xml_write_nodes(pool, out, nsdefs, TRUE, node);
}

int wsxml_write_nodes_without_nsdef(
        pool_handle_t *pool,
        Writer *out,
        xmlNode *node)
{
    return xml_write_nodes(pool, out, NULL, FALSE, node);
}

mercurial