Wed, 30 May 2018 18:38:42 +0200
fixes whitespace handling and error detection for basic tag filters
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "ucx/string.h" #include "ucx/allocator.h" #include <stdlib.h> #include <string.h> #include <stdarg.h> #include <stdint.h> #include <ctype.h> sstr_t sstr(char *cstring) { sstr_t string; string.ptr = cstring; string.length = strlen(cstring); return string; } sstr_t sstrn(char *cstring, size_t length) { sstr_t string; string.ptr = cstring; string.length = length; return string; } size_t sstrnlen(size_t n, sstr_t s, ...) { va_list ap; size_t size = s.length; va_start(ap, s); for (size_t i = 1 ; i < n ; i++) { sstr_t str = va_arg(ap, sstr_t); size += str.length; } va_end(ap); return size; } static sstr_t sstrvcat_a( UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, va_list ap) { sstr_t str; str.ptr = NULL; str.length = 0; if(count < 2) { return str; } sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t)); if(!strings) { return str; } // get all args and overall length strings[0] = s1; strings[1] = s2; size_t strlen = s1.length + s2.length; for (size_t i=2;i<count;i++) { sstr_t s = va_arg (ap, sstr_t); strings[i] = s; strlen += s.length; } // create new string str.ptr = (char*) almalloc(a, strlen + 1); str.length = strlen; if(!str.ptr) { free(strings); str.length = 0; return str; } // concatenate strings size_t pos = 0; for (size_t i=0;i<count;i++) { sstr_t s = strings[i]; memcpy(str.ptr + pos, s.ptr, s.length); pos += s.length; } str.ptr[str.length] = '\0'; free(strings); return str; } sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) { va_list ap; va_start(ap, s2); sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap); va_end(ap); return s; } sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) { va_list ap; va_start(ap, s2); sstr_t s = sstrvcat_a(a, count, s1, s2, ap); va_end(ap); return s; } sstr_t sstrsubs(sstr_t s, size_t start) { return sstrsubsl (s, start, s.length-start); } sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { sstr_t new_sstr; if (start >= s.length) { new_sstr.ptr = NULL; new_sstr.length = 0; } else { if (length > s.length-start) { length = s.length-start; } new_sstr.ptr = &s.ptr[start]; new_sstr.length = length; } return new_sstr; } sstr_t sstrchr(sstr_t s, int c) { for(size_t i=0;i<s.length;i++) { if(s.ptr[i] == c) { return sstrsubs(s, i); } } sstr_t n; n.ptr = NULL; n.length = 0; return n; } sstr_t sstrrchr(sstr_t s, int c) { if (s.length > 0) { for(size_t i=s.length;i>0;i--) { if(s.ptr[i-1] == c) { return sstrsubs(s, i-1); } } } sstr_t n; n.ptr = NULL; n.length = 0; return n; } #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) #define ptable_w(useheap, ptable, index, src) do {\ if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ else ((size_t*)ptable)[index] = src;\ } while (0); sstr_t sstrstr(sstr_t string, sstr_t match) { if (match.length == 0) { return string; } /* prepare default return value in case of no match */ sstr_t result = sstrn(NULL, 0); /* * IMPORTANT: * our prefix table contains the prefix length PLUS ONE * this is our decision, because we want to use the full range of size_t * the original algorithm needs a (-1) at one single place * and we want to avoid that */ /* static prefix table */ static uint8_t s_prefix_table[256]; /* check pattern length and use appropriate prefix table */ /* if the pattern exceeds static prefix table, allocate on the heap */ register int useheap = match.length > 255; register void* ptable = useheap ? calloc(match.length+1, sizeof(size_t)): s_prefix_table; /* keep counter in registers */ register size_t i, j; /* fill prefix table */ i = 0; j = 0; ptable_w(useheap, ptable, i, j); while (i < match.length) { while (j >= 1 && match.ptr[j-1] != match.ptr[i]) { ptable_r(j, useheap, ptable, j-1); } i++; j++; ptable_w(useheap, ptable, i, j); } /* search */ i = 0; j = 1; while (i < string.length) { while (j >= 1 && string.ptr[i] != match.ptr[j-1]) { ptable_r(j, useheap, ptable, j-1); } i++; j++; if (j-1 == match.length) { size_t start = i - match.length; result.ptr = string.ptr + start; result.length = string.length - start; break; } } /* if prefix table was allocated on the heap, free it */ if (ptable != s_prefix_table) { free(ptable); } return result; } #undef ptable_r #undef ptable_w sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { return sstrsplit_a(ucx_default_allocator(), s, d, n); } sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) { if (s.length == 0 || d.length == 0) { *n = -1; return NULL; } /* special cases: delimiter is at least as large as the string */ if (d.length >= s.length) { /* exact match */ if (sstrcmp(s, d) == 0) { *n = 0; return NULL; } else /* no match possible */ { *n = 1; sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); *result = sstrdup_a(allocator, s); return result; } } ssize_t nmax = *n; size_t arrlen = 16; sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t)); if (result) { sstr_t curpos = s; ssize_t j = 1; while (1) { sstr_t match; /* optimize for one byte delimiters */ if (d.length == 1) { match = curpos; for (size_t i = 0 ; i < curpos.length ; i++) { if (curpos.ptr[i] == *(d.ptr)) { match.ptr = curpos.ptr + i; break; } match.length--; } } else { match = sstrstr(curpos, d); } if (match.length > 0) { /* is this our last try? */ if (nmax == 0 || j < nmax) { /* copy the current string to the array */ sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr); result[j-1] = sstrdup_a(allocator, item); size_t processed = item.length + d.length; curpos.ptr += processed; curpos.length -= processed; /* allocate memory for the next string */ j++; if (j > arrlen) { arrlen *= 2; sstr_t* reallocated = (sstr_t*) alrealloc( allocator, result, arrlen*sizeof(sstr_t)); if (reallocated) { result = reallocated; } else { for (ssize_t i = 0 ; i < j-1 ; i++) { alfree(allocator, result[i].ptr); } alfree(allocator, result); *n = -2; return NULL; } } } else { /* nmax reached, copy the _full_ remaining string */ result[j-1] = sstrdup_a(allocator, curpos); break; } } else { /* no more matches, copy last string */ result[j-1] = sstrdup_a(allocator, curpos); break; } } *n = j; } else { *n = -2; } return result; } int sstrcmp(sstr_t s1, sstr_t s2) { if (s1.length == s2.length) { return memcmp(s1.ptr, s2.ptr, s1.length); } else if (s1.length > s2.length) { return 1; } else { return -1; } } int sstrcasecmp(sstr_t s1, sstr_t s2) { if (s1.length == s2.length) { #ifdef _WIN32 return _strnicmp(s1.ptr, s2.ptr, s1.length); #else return strncasecmp(s1.ptr, s2.ptr, s1.length); #endif } else if (s1.length > s2.length) { return 1; } else { return -1; } } sstr_t sstrdup(sstr_t s) { return sstrdup_a(ucx_default_allocator(), s); } sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) { sstr_t newstring; newstring.ptr = (char*)almalloc(allocator, s.length + 1); if (newstring.ptr) { newstring.length = s.length; newstring.ptr[newstring.length] = 0; memcpy(newstring.ptr, s.ptr, s.length); } else { newstring.length = 0; } return newstring; } sstr_t sstrtrim(sstr_t string) { sstr_t newstr = string; while (newstr.length > 0 && isspace(*newstr.ptr)) { newstr.ptr++; newstr.length--; } while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) { newstr.length--; } return newstr; } int sstrprefix(sstr_t string, sstr_t prefix) { if (string.length == 0) { return prefix.length == 0; } if (prefix.length == 0) { return 1; } if (prefix.length > string.length) { return 0; } else { return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; } } int sstrsuffix(sstr_t string, sstr_t suffix) { if (string.length == 0) { return suffix.length == 0; } if (suffix.length == 0) { return 1; } if (suffix.length > string.length) { return 0; } else { return memcmp(string.ptr+string.length-suffix.length, suffix.ptr, suffix.length) == 0; } } sstr_t sstrlower(sstr_t string) { sstr_t ret = sstrdup(string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = tolower(ret.ptr[i]); } return ret; } sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) { sstr_t ret = sstrdup_a(allocator, string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = tolower(ret.ptr[i]); } return ret; } sstr_t sstrupper(sstr_t string) { sstr_t ret = sstrdup(string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = toupper(ret.ptr[i]); } return ret; } sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) { sstr_t ret = sstrdup_a(allocator, string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = toupper(ret.ptr[i]); } return ret; }