ucx/string.c

Thu, 21 Dec 2017 19:48:27 +0100

author
Mike Becker <universe@uap-core.de>
date
Thu, 21 Dec 2017 19:48:27 +0100
changeset 359
bacb54502b24
parent 335
c1bc13faadaa
child 505
481802342fdf
permissions
-rw-r--r--

davql: allow ANYWHERE keyword in SELECT statements

This may seem pointless, but users might want to be explicit about this and the grammar is more consistent.

This commit also adds some no-ops to the functions body of the SET parser, because some day the grammar might allow more clauses after the WHERE clause.

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "ucx/string.h"

#include "ucx/allocator.h"

#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <stdint.h>
#include <ctype.h>

sstr_t sstr(char *cstring) {
    sstr_t string;
    string.ptr = cstring;
    string.length = strlen(cstring);
    return string;
}

sstr_t sstrn(char *cstring, size_t length) {
    sstr_t string;
    string.ptr = cstring;
    string.length = length;
    return string;
}

size_t sstrnlen(size_t n, sstr_t s, ...) {
    va_list ap;
    size_t size = s.length;
    va_start(ap, s);

    for (size_t i = 1 ; i < n ; i++) {
        sstr_t str = va_arg(ap, sstr_t);
        size += str.length;
    }
    va_end(ap);

    return size;
}

static sstr_t sstrvcat_a(
        UcxAllocator *a,
        size_t count,
        sstr_t s1,
        sstr_t s2,
        va_list ap) {
    sstr_t str;
    str.ptr = NULL;
    str.length = 0;
    if(count < 2) {
        return str;
    }
    
    sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
    if(!strings) {
        return str;
    }
    
    // get all args and overall length
    strings[0] = s1;
    strings[1] = s2;
    size_t strlen = s1.length + s2.length;
    for (size_t i=2;i<count;i++) {
        sstr_t s = va_arg (ap, sstr_t);
        strings[i] = s;
        strlen += s.length;
    }
    
    // create new string
    str.ptr = (char*) almalloc(a, strlen + 1);
    str.length = strlen;
    if(!str.ptr) {
        free(strings);
        str.length = 0;
        return str;
    }
    
    // concatenate strings
    size_t pos = 0;
    for (size_t i=0;i<count;i++) {
        sstr_t s = strings[i];
        memcpy(str.ptr + pos, s.ptr, s.length);
        pos += s.length;
    }
    
    str.ptr[str.length] = '\0';
    
    free(strings);
    
    return str;
}

sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
    va_list ap;
    va_start(ap, s2);
    sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
    va_end(ap);
    return s;
}

sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
    va_list ap;
    va_start(ap, s2);
    sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
    va_end(ap);
    return s;
}

sstr_t sstrsubs(sstr_t s, size_t start) {
    return sstrsubsl (s, start, s.length-start);
}

sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
    sstr_t new_sstr;
    if (start >= s.length) {
        new_sstr.ptr = NULL;
        new_sstr.length = 0;
    } else {
        if (length > s.length-start) {
            length = s.length-start;
        }
        new_sstr.ptr = &s.ptr[start];
        new_sstr.length = length;
    }
    return new_sstr;
}

sstr_t sstrchr(sstr_t s, int c) {
    for(size_t i=0;i<s.length;i++) {
        if(s.ptr[i] == c) {
            return sstrsubs(s, i);
        }
    }
    sstr_t n;
    n.ptr = NULL;
    n.length = 0;
    return n;
}

sstr_t sstrrchr(sstr_t s, int c) {
    if (s.length > 0) {
        for(size_t i=s.length;i>0;i--) {
            if(s.ptr[i-1] == c) {
                return sstrsubs(s, i-1);
            }
        }
    }
    sstr_t n;
    n.ptr = NULL;
    n.length = 0;
    return n;
}

#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
    ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])

#define ptable_w(useheap, ptable, index, src) do {\
    if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
    else ((size_t*)ptable)[index] = src;\
    } while (0);

sstr_t sstrstr(sstr_t string, sstr_t match) {
    if (match.length == 0) {
        return string;
    }
    
    /* prepare default return value in case of no match */
    sstr_t result = sstrn(NULL, 0);
    
    /*
     * IMPORTANT:
     * our prefix table contains the prefix length PLUS ONE
     * this is our decision, because we want to use the full range of size_t
     * the original algorithm needs a (-1) at one single place
     * and we want to avoid that
     */
    
    /* static prefix table */
    static uint8_t s_prefix_table[256];
    
    /* check pattern length and use appropriate prefix table */
    /* if the pattern exceeds static prefix table, allocate on the heap */
    register int useheap = match.length > 255;
    register void* ptable = useheap ?
        calloc(match.length+1, sizeof(size_t)): s_prefix_table;
    
    /* keep counter in registers */
    register size_t i, j;
    
    /* fill prefix table */
    i = 0; j = 0;
    ptable_w(useheap, ptable, i, j);
    while (i < match.length) {
        while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
            ptable_r(j, useheap, ptable, j-1);
        }
        i++; j++;
        ptable_w(useheap, ptable, i, j);
    }

    /* search */
    i = 0; j = 1;
    while (i < string.length) {
        while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
            ptable_r(j, useheap, ptable, j-1);
        }
        i++; j++;
        if (j-1 == match.length) {
            size_t start = i - match.length;
            result.ptr = string.ptr + start;
            result.length = string.length - start;
            break;
        }
    }

    /* if prefix table was allocated on the heap, free it */
    if (ptable != s_prefix_table) {
        free(ptable);
    }
    
    return result;
}

#undef ptable_r
#undef ptable_w

sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
    return sstrsplit_a(ucx_default_allocator(), s, d, n);
}

sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
    if (s.length == 0 || d.length == 0) {
        *n = -1;
        return NULL;
    }
    
    /* special cases: delimiter is at least as large as the string */
    if (d.length >= s.length) {
        /* exact match */
        if (sstrcmp(s, d) == 0) {
            *n = 0;
            return NULL;
        } else /* no match possible */ {
            *n = 1;
            sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
            *result = sstrdup_a(allocator, s);
            return result;
        }
    }
    
    ssize_t nmax = *n;
    size_t arrlen = 16;
    sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t));

    if (result) {
        sstr_t curpos = s;
        ssize_t j = 1;
        while (1) {
            sstr_t match;
            /* optimize for one byte delimiters */
            if (d.length == 1) {
                match = curpos;
                for (size_t i = 0 ; i < curpos.length ; i++) {
                    if (curpos.ptr[i] == *(d.ptr)) {
                        match.ptr = curpos.ptr + i;
                        break;
                    }
                    match.length--;
                }
            } else {
                match = sstrstr(curpos, d);
            }
            if (match.length > 0) {
                /* is this our last try? */
                if (nmax == 0 || j < nmax) {
                    /* copy the current string to the array */
                    sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
                    result[j-1] = sstrdup_a(allocator, item);
                    size_t processed = item.length + d.length;
                    curpos.ptr += processed;
                    curpos.length -= processed;

                    /* allocate memory for the next string */
                    j++;
                    if (j > arrlen) {
                        arrlen *= 2;
                        sstr_t* reallocated = (sstr_t*) alrealloc(
                                allocator, result, arrlen*sizeof(sstr_t));
                        if (reallocated) {
                            result = reallocated;
                        } else {
                            for (ssize_t i = 0 ; i < j-1 ; i++) {
                                alfree(allocator, result[i].ptr);
                            }
                            alfree(allocator, result);
                            *n = -2;
                            return NULL;
                        }
                    }
                } else {
                    /* nmax reached, copy the _full_ remaining string */
                    result[j-1] = sstrdup_a(allocator, curpos);
                    break;
                }
            } else {
                /* no more matches, copy last string */
                result[j-1] = sstrdup_a(allocator, curpos);
                break;
            }
        }
        *n = j;
    } else {
        *n = -2;
    }

    return result;
}

int sstrcmp(sstr_t s1, sstr_t s2) {
    if (s1.length == s2.length) {
        return memcmp(s1.ptr, s2.ptr, s1.length);
    } else if (s1.length > s2.length) {
        return 1;
    } else {
        return -1;
    }
}

int sstrcasecmp(sstr_t s1, sstr_t s2) {
    if (s1.length == s2.length) {
#ifdef _WIN32
        return _strnicmp(s1.ptr, s2.ptr, s1.length);
#else
        return strncasecmp(s1.ptr, s2.ptr, s1.length);
#endif
    } else if (s1.length > s2.length) {
        return 1;
    } else {
        return -1;
    }
}

sstr_t sstrdup(sstr_t s) {
    return sstrdup_a(ucx_default_allocator(), s);
}

sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
    sstr_t newstring;
    newstring.ptr = (char*)almalloc(allocator, s.length + 1);
    if (newstring.ptr) {
        newstring.length = s.length;
        newstring.ptr[newstring.length] = 0;
        
        memcpy(newstring.ptr, s.ptr, s.length);
    } else {
        newstring.length = 0;
    }
    
    return newstring;
}

sstr_t sstrtrim(sstr_t string) {
    sstr_t newstr = string;
    
    while (newstr.length > 0 && isspace(*newstr.ptr)) {
        newstr.ptr++;
        newstr.length--;
    }
    while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
        newstr.length--;
    }
    
    return newstr;
}

int sstrprefix(sstr_t string, sstr_t prefix) {
    if (string.length == 0) {
        return prefix.length == 0;
    }
    if (prefix.length == 0) {
        return 1;
    }
    
    if (prefix.length > string.length) {
        return 0;
    } else {
        return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
    }
}

int sstrsuffix(sstr_t string, sstr_t suffix) {
    if (string.length == 0) {
        return suffix.length == 0;
    }
    if (suffix.length == 0) {
        return 1;
    }
    
    if (suffix.length > string.length) {
        return 0;
    } else {
        return memcmp(string.ptr+string.length-suffix.length,
            suffix.ptr, suffix.length) == 0;
    }
}

sstr_t sstrlower(sstr_t string) {
    sstr_t ret = sstrdup(string);
    for (size_t i = 0; i < ret.length ; i++) {
        ret.ptr[i] = tolower(ret.ptr[i]);
    }
    return ret;
}

sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
    sstr_t ret = sstrdup_a(allocator, string);
    for (size_t i = 0; i < ret.length ; i++) {
        ret.ptr[i] = tolower(ret.ptr[i]);
    }
    return ret;
}

sstr_t sstrupper(sstr_t string) {
    sstr_t ret = sstrdup(string);
    for (size_t i = 0; i < ret.length ; i++) {
        ret.ptr[i] = toupper(ret.ptr[i]);
    }
    return ret;
}

sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
    sstr_t ret = sstrdup_a(allocator, string);
    for (size_t i = 0; i < ret.length ; i++) {
        ret.ptr[i] = toupper(ret.ptr[i]);
    }
    return ret;
}

mercurial