ucx/string.c

Sun, 24 Jun 2018 11:07:34 +0200

author
Olaf Wintermann <olaf.wintermann@gmail.com>
date
Sun, 24 Jun 2018 11:07:34 +0200
changeset 426
9cec06cfeade
parent 335
c1bc13faadaa
child 505
481802342fdf
permissions
-rw-r--r--

renames <tags> element to <tagconfig>

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "ucx/string.h"

#include "ucx/allocator.h"

#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <stdint.h>
#include <ctype.h>

sstr_t sstr(char *cstring) {
    sstr_t string;
    string.ptr = cstring;
    string.length = strlen(cstring);
    return string;
}

sstr_t sstrn(char *cstring, size_t length) {
    sstr_t string;
    string.ptr = cstring;
    string.length = length;
    return string;
}

size_t sstrnlen(size_t n, sstr_t s, ...) {
    va_list ap;
    size_t size = s.length;
    va_start(ap, s);

    for (size_t i = 1 ; i < n ; i++) {
        sstr_t str = va_arg(ap, sstr_t);
        size += str.length;
    }
    va_end(ap);

    return size;
}

static sstr_t sstrvcat_a(
        UcxAllocator *a,
        size_t count,
        sstr_t s1,
        sstr_t s2,
        va_list ap) {
    sstr_t str;
    str.ptr = NULL;
    str.length = 0;
    if(count < 2) {
        return str;
    }
    
    sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
    if(!strings) {
        return str;
    }
    
    // get all args and overall length
    strings[0] = s1;
    strings[1] = s2;
    size_t strlen = s1.length + s2.length;
    for (size_t i=2;i<count;i++) {
        sstr_t s = va_arg (ap, sstr_t);
        strings[i] = s;
        strlen += s.length;
    }
    
    // create new string
    str.ptr = (char*) almalloc(a, strlen + 1);
    str.length = strlen;
    if(!str.ptr) {
        free(strings);
        str.length = 0;
        return str;
    }
    
    // concatenate strings
    size_t pos = 0;
    for (size_t i=0;i<count;i++) {
        sstr_t s = strings[i];
        memcpy(str.ptr + pos, s.ptr, s.length);
        pos += s.length;
    }
    
    str.ptr[str.length] = '\0';
    
    free(strings);
    
    return str;
}

sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
    va_list ap;
    va_start(ap, s2);
    sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
    va_end(ap);
    return s;
}

sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
    va_list ap;
    va_start(ap, s2);
    sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
    va_end(ap);
    return s;
}

sstr_t sstrsubs(sstr_t s, size_t start) {
    return sstrsubsl (s, start, s.length-start);
}

sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
    sstr_t new_sstr;
    if (start >= s.length) {
        new_sstr.ptr = NULL;
        new_sstr.length = 0;
    } else {
        if (length > s.length-start) {
            length = s.length-start;
        }
        new_sstr.ptr = &s.ptr[start];
        new_sstr.length = length;
    }
    return new_sstr;
}

sstr_t sstrchr(sstr_t s, int c) {
    for(size_t i=0;i<s.length;i++) {
        if(s.ptr[i] == c) {
            return sstrsubs(s, i);
        }
    }
    sstr_t n;
    n.ptr = NULL;
    n.length = 0;
    return n;
}

sstr_t sstrrchr(sstr_t s, int c) {
    if (s.length > 0) {
        for(size_t i=s.length;i>0;i--) {
            if(s.ptr[i-1] == c) {
                return sstrsubs(s, i-1);
            }
        }
    }
    sstr_t n;
    n.ptr = NULL;
    n.length = 0;
    return n;
}

#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
    ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])

#define ptable_w(useheap, ptable, index, src) do {\
    if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
    else ((size_t*)ptable)[index] = src;\
    } while (0);

sstr_t sstrstr(sstr_t string, sstr_t match) {
    if (match.length == 0) {
        return string;
    }
    
    /* prepare default return value in case of no match */
    sstr_t result = sstrn(NULL, 0);
    
    /*
     * IMPORTANT:
     * our prefix table contains the prefix length PLUS ONE
     * this is our decision, because we want to use the full range of size_t
     * the original algorithm needs a (-1) at one single place
     * and we want to avoid that
     */
    
    /* static prefix table */
    static uint8_t s_prefix_table[256];
    
    /* check pattern length and use appropriate prefix table */
    /* if the pattern exceeds static prefix table, allocate on the heap */
    register int useheap = match.length > 255;
    register void* ptable = useheap ?
        calloc(match.length+1, sizeof(size_t)): s_prefix_table;
    
    /* keep counter in registers */
    register size_t i, j;
    
    /* fill prefix table */
    i = 0; j = 0;
    ptable_w(useheap, ptable, i, j);
    while (i < match.length) {
        while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
            ptable_r(j, useheap, ptable, j-1);
        }
        i++; j++;
        ptable_w(useheap, ptable, i, j);
    }

    /* search */
    i = 0; j = 1;
    while (i < string.length) {
        while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
            ptable_r(j, useheap, ptable, j-1);
        }
        i++; j++;
        if (j-1 == match.length) {
            size_t start = i - match.length;
            result.ptr = string.ptr + start;
            result.length = string.length - start;
            break;
        }
    }

    /* if prefix table was allocated on the heap, free it */
    if (ptable != s_prefix_table) {
        free(ptable);
    }
    
    return result;
}

#undef ptable_r
#undef ptable_w

sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
    return sstrsplit_a(ucx_default_allocator(), s, d, n);
}

sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
    if (s.length == 0 || d.length == 0) {
        *n = -1;
        return NULL;
    }
    
    /* special cases: delimiter is at least as large as the string */
    if (d.length >= s.length) {
        /* exact match */
        if (sstrcmp(s, d) == 0) {
            *n = 0;
            return NULL;
        } else /* no match possible */ {
            *n = 1;
            sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
            *result = sstrdup_a(allocator, s);
            return result;
        }
    }
    
    ssize_t nmax = *n;
    size_t arrlen = 16;
    sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t));

    if (result) {
        sstr_t curpos = s;
        ssize_t j = 1;
        while (1) {
            sstr_t match;
            /* optimize for one byte delimiters */
            if (d.length == 1) {
                match = curpos;
                for (size_t i = 0 ; i < curpos.length ; i++) {
                    if (curpos.ptr[i] == *(d.ptr)) {
                        match.ptr = curpos.ptr + i;
                        break;
                    }
                    match.length--;
                }
            } else {
                match = sstrstr(curpos, d);
            }
            if (match.length > 0) {
                /* is this our last try? */
                if (nmax == 0 || j < nmax) {
                    /* copy the current string to the array */
                    sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
                    result[j-1] = sstrdup_a(allocator, item);
                    size_t processed = item.length + d.length;
                    curpos.ptr += processed;
                    curpos.length -= processed;

                    /* allocate memory for the next string */
                    j++;
                    if (j > arrlen) {
                        arrlen *= 2;
                        sstr_t* reallocated = (sstr_t*) alrealloc(
                                allocator, result, arrlen*sizeof(sstr_t));
                        if (reallocated) {
                            result = reallocated;
                        } else {
                            for (ssize_t i = 0 ; i < j-1 ; i++) {
                                alfree(allocator, result[i].ptr);
                            }
                            alfree(allocator, result);
                            *n = -2;
                            return NULL;
                        }
                    }
                } else {
                    /* nmax reached, copy the _full_ remaining string */
                    result[j-1] = sstrdup_a(allocator, curpos);
                    break;
                }
            } else {
                /* no more matches, copy last string */
                result[j-1] = sstrdup_a(allocator, curpos);
                break;
            }
        }
        *n = j;
    } else {
        *n = -2;
    }

    return result;
}

int sstrcmp(sstr_t s1, sstr_t s2) {
    if (s1.length == s2.length) {
        return memcmp(s1.ptr, s2.ptr, s1.length);
    } else if (s1.length > s2.length) {
        return 1;
    } else {
        return -1;
    }
}

int sstrcasecmp(sstr_t s1, sstr_t s2) {
    if (s1.length == s2.length) {
#ifdef _WIN32
        return _strnicmp(s1.ptr, s2.ptr, s1.length);
#else
        return strncasecmp(s1.ptr, s2.ptr, s1.length);
#endif
    } else if (s1.length > s2.length) {
        return 1;
    } else {
        return -1;
    }
}

sstr_t sstrdup(sstr_t s) {
    return sstrdup_a(ucx_default_allocator(), s);
}

sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
    sstr_t newstring;
    newstring.ptr = (char*)almalloc(allocator, s.length + 1);
    if (newstring.ptr) {
        newstring.length = s.length;
        newstring.ptr[newstring.length] = 0;
        
        memcpy(newstring.ptr, s.ptr, s.length);
    } else {
        newstring.length = 0;
    }
    
    return newstring;
}

sstr_t sstrtrim(sstr_t string) {
    sstr_t newstr = string;
    
    while (newstr.length > 0 && isspace(*newstr.ptr)) {
        newstr.ptr++;
        newstr.length--;
    }
    while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
        newstr.length--;
    }
    
    return newstr;
}

int sstrprefix(sstr_t string, sstr_t prefix) {
    if (string.length == 0) {
        return prefix.length == 0;
    }
    if (prefix.length == 0) {
        return 1;
    }
    
    if (prefix.length > string.length) {
        return 0;
    } else {
        return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
    }
}

int sstrsuffix(sstr_t string, sstr_t suffix) {
    if (string.length == 0) {
        return suffix.length == 0;
    }
    if (suffix.length == 0) {
        return 1;
    }
    
    if (suffix.length > string.length) {
        return 0;
    } else {
        return memcmp(string.ptr+string.length-suffix.length,
            suffix.ptr, suffix.length) == 0;
    }
}

sstr_t sstrlower(sstr_t string) {
    sstr_t ret = sstrdup(string);
    for (size_t i = 0; i < ret.length ; i++) {
        ret.ptr[i] = tolower(ret.ptr[i]);
    }
    return ret;
}

sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
    sstr_t ret = sstrdup_a(allocator, string);
    for (size_t i = 0; i < ret.length ; i++) {
        ret.ptr[i] = tolower(ret.ptr[i]);
    }
    return ret;
}

sstr_t sstrupper(sstr_t string) {
    sstr_t ret = sstrdup(string);
    for (size_t i = 0; i < ret.length ; i++) {
        ret.ptr[i] = toupper(ret.ptr[i]);
    }
    return ret;
}

sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
    sstr_t ret = sstrdup_a(allocator, string);
    for (size_t i = 0; i < ret.length ; i++) {
        ret.ptr[i] = toupper(ret.ptr[i]);
    }
    return ret;
}

mercurial