diff -r ddfead6ea863 -r 4784c14aa639 src/ucx/string.c --- a/src/ucx/string.c Sun Aug 23 22:02:01 2020 +0200 +++ b/src/ucx/string.c Sun Aug 23 23:04:17 2020 +0200 @@ -1,7 +1,7 @@ /* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * - * Copyright 2016 Olaf Wintermann. All rights reserved. + * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,13 +26,19 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include "ucx/string.h" + +#include "ucx/allocator.h" + #include #include #include +#include #include -#include "string.h" -#include "allocator.h" +#ifndef _WIN32 +#include /* for strncasecmp() */ +#endif /* _WIN32 */ sstr_t sstr(char *cstring) { sstr_t string; @@ -48,13 +54,35 @@ return string; } -size_t sstrnlen(size_t n, sstr_t s, ...) { +scstr_t scstr(const char *cstring) { + scstr_t string; + string.ptr = cstring; + string.length = strlen(cstring); + return string; +} + +scstr_t scstrn(const char *cstring, size_t length) { + scstr_t string; + string.ptr = cstring; + string.length = length; + return string; +} + + +size_t scstrnlen(size_t n, ...) { + if (n == 0) return 0; + va_list ap; - size_t size = s.length; - va_start(ap, s); + va_start(ap, n); + + size_t size = 0; - for (size_t i = 1 ; i < n ; i++) { - sstr_t str = va_arg(ap, sstr_t); + for (size_t i = 0 ; i < n ; i++) { + scstr_t str = va_arg(ap, scstr_t); + if(SIZE_MAX - str.length < size) { + size = SIZE_MAX; + break; + } size += str.length; } va_end(ap); @@ -65,8 +93,7 @@ static sstr_t sstrvcat_a( UcxAllocator *a, size_t count, - sstr_t s1, - sstr_t s2, + scstr_t s1, va_list ap) { sstr_t str; str.ptr = NULL; @@ -75,7 +102,13 @@ return str; } - sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t)); + scstr_t s2 = va_arg (ap, scstr_t); + + if(((size_t)-1) - s1.length < s2.length) { + return str; + } + + scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t)); if(!strings) { return str; } @@ -83,16 +116,25 @@ // get all args and overall length strings[0] = s1; strings[1] = s2; - size_t strlen = s1.length + s2.length; + size_t slen = s1.length + s2.length; + int error = 0; for (size_t i=2;i str_length) { + return 0; + } + + if(length > str_length - start) { + length = str_length - start; + } + *newlen = length; + *newpos = start; + return 1; } sstr_t sstrsubs(sstr_t s, size_t start) { @@ -135,132 +199,301 @@ } sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { - sstr_t new_sstr; - if (start >= s.length) { - new_sstr.ptr = NULL; - new_sstr.length = 0; - } else { - if (length > s.length-start) { - length = s.length-start; + size_t pos; + sstr_t ret = { NULL, 0 }; + if(ucx_substring(s.length, start, length, &ret.length, &pos)) { + ret.ptr = s.ptr + pos; + } + return ret; +} + +scstr_t scstrsubs(scstr_t string, size_t start) { + return scstrsubsl(string, start, string.length-start); +} + +scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) { + size_t pos; + scstr_t ret = { NULL, 0 }; + if(ucx_substring(s.length, start, length, &ret.length, &pos)) { + ret.ptr = s.ptr + pos; + } + return ret; +} + + +static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) { + for(size_t i=0;i 0) { + for(size_t i=length ; i>0 ; i--) { + if(str[i-1] == chr) { + *pos = i-1; + return 1; + } + } + } + return 0; } sstr_t sstrchr(sstr_t s, int c) { - for(size_t i=0;i 0) { - for(size_t i=s.length;i>0;i--) { - if(s.ptr[i-1] == c) { - return sstrsubs(s, i-1); - } - } + size_t pos = 0; + if(ucx_strrchr(s.ptr, s.length, c, &pos)) { + return sstrsubs(s, pos); } - sstr_t n; - n.ptr = NULL; - n.length = 0; - return n; + return sstrn(NULL, 0); +} + +scstr_t scstrchr(scstr_t s, int c) { + size_t pos = 0; + if(ucx_strchr(s.ptr, s.length, c, &pos)) { + return scstrsubs(s, pos); + } + return scstrn(NULL, 0); } -sstr_t sstrstr(sstr_t string, sstr_t match) { - if (match.length == 0) { - return string; +scstr_t scstrrchr(scstr_t s, int c) { + size_t pos = 0; + if(ucx_strrchr(s.ptr, s.length, c, &pos)) { + return scstrsubs(s, pos); + } + return scstrn(NULL, 0); +} + +#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ + ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) + +#define ptable_w(useheap, ptable, index, src) do {\ + if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ + else ((size_t*)ptable)[index] = src;\ + } while (0); + + +static const char* ucx_strstr( + const char *str, + size_t length, + const char *match, + size_t matchlen, + size_t *newlen) +{ + *newlen = length; + if (matchlen == 0) { + return str; } - for (size_t i = 0 ; i < string.length ; i++) { - sstr_t substr = sstrsubs(string, i); - if (sstrprefix(substr, match)) { - return substr; + const char *result = NULL; + size_t resultlen = 0; + + /* + * IMPORTANT: + * our prefix table contains the prefix length PLUS ONE + * this is our decision, because we want to use the full range of size_t + * the original algorithm needs a (-1) at one single place + * and we want to avoid that + */ + + /* static prefix table */ + static uint8_t s_prefix_table[256]; + + /* check pattern length and use appropriate prefix table */ + /* if the pattern exceeds static prefix table, allocate on the heap */ + register int useheap = matchlen > 255; + register void* ptable = useheap ? + calloc(matchlen+1, sizeof(size_t)): s_prefix_table; + + /* keep counter in registers */ + register size_t i, j; + + /* fill prefix table */ + i = 0; j = 0; + ptable_w(useheap, ptable, i, j); + while (i < matchlen) { + while (j >= 1 && match[j-1] != match[i]) { + ptable_r(j, useheap, ptable, j-1); } + i++; j++; + ptable_w(useheap, ptable, i, j); + } + + /* search */ + i = 0; j = 1; + while (i < length) { + while (j >= 1 && str[i] != match[j-1]) { + ptable_r(j, useheap, ptable, j-1); + } + i++; j++; + if (j-1 == matchlen) { + size_t start = i - matchlen; + result = str + start; + resultlen = length - start; + break; + } + } + + /* if prefix table was allocated on the heap, free it */ + if (ptable != s_prefix_table) { + free(ptable); } - sstr_t emptystr; - emptystr.length = 0; - emptystr.ptr = NULL; - return emptystr; + *newlen = resultlen; + return result; +} + +sstr_t scstrsstr(sstr_t string, scstr_t match) { + sstr_t result; + + size_t reslen; + const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); + if(!resstr) { + result.ptr = NULL; + result.length = 0; + return result; + } + + size_t pos = resstr - string.ptr; + result.ptr = string.ptr + pos; + result.length = reslen; + + return result; } -sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { - return sstrsplit_a(ucx_default_allocator(), s, d, n); +scstr_t scstrscstr(scstr_t string, scstr_t match) { + scstr_t result; + + size_t reslen; + const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); + if(!resstr) { + result.ptr = NULL; + result.length = 0; + return result; + } + + size_t pos = resstr - string.ptr; + result.ptr = string.ptr + pos; + result.length = reslen; + + return result; } -sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) { +#undef ptable_r +#undef ptable_w + +sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) { + return scstrsplit_a(ucx_default_allocator(), s, d, n); +} + +sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) { if (s.length == 0 || d.length == 0) { *n = -1; return NULL; } - - sstr_t* result; - ssize_t nmax = *n; - *n = 1; - - /* special case: exact match - no processing needed */ - if (sstrcmp(s, d) == 0) { - *n = 0; - return NULL; + + /* special cases: delimiter is at least as large as the string */ + if (d.length >= s.length) { + /* exact match */ + if (sstrcmp(s, d) == 0) { + *n = 0; + return NULL; + } else /* no match possible */ { + *n = 1; + sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); + if(result) { + *result = sstrdup_a(allocator, s); + } else { + *n = -2; + } + return result; + } } - sstr_t sv = sstrdup(s); - if (sv.length == 0) { - *n = -2; - return NULL; - } - - for (size_t i = 0 ; i < s.length ; i++) { - sstr_t substr = sstrsubs(sv, i); - if (sstrprefix(substr, d)) { - (*n)++; - for (size_t j = 0 ; j < d.length ; j++) { - sv.ptr[i+j] = 0; - } - i += d.length - 1; // -1, because the loop will do a i++ - } - if ((*n) == nmax) break; - } - result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)*(*n)); + + ssize_t nmax = *n; + size_t arrlen = 16; + sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t)); if (result) { - char *pptr = sv.ptr; - for (ssize_t i = 0 ; i < *n ; i++) { - size_t l = strlen(pptr); - char* ptr = (char*) almalloc(allocator, l + 1); - if (ptr) { - memcpy(ptr, pptr, l); - ptr[l] = 0; + scstr_t curpos = s; + ssize_t j = 1; + while (1) { + scstr_t match; + /* optimize for one byte delimiters */ + if (d.length == 1) { + match = curpos; + for (size_t i = 0 ; i < curpos.length ; i++) { + if (curpos.ptr[i] == *(d.ptr)) { + match.ptr = curpos.ptr + i; + break; + } + match.length--; + } + } else { + match = scstrscstr(curpos, d); + } + if (match.length > 0) { + /* is this our last try? */ + if (nmax == 0 || j < nmax) { + /* copy the current string to the array */ + scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr); + result[j-1] = sstrdup_a(allocator, item); + size_t processed = item.length + d.length; + curpos.ptr += processed; + curpos.length -= processed; - result[i] = sstrn(ptr, l); - pptr += l + d.length; + /* allocate memory for the next string */ + j++; + if (j > arrlen) { + arrlen *= 2; + size_t reallocsz; + sstr_t* reallocated = NULL; + if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) { + reallocated = (sstr_t*) alrealloc( + allocator, result, reallocsz); + } + if (reallocated) { + result = reallocated; + } else { + for (ssize_t i = 0 ; i < j-1 ; i++) { + alfree(allocator, result[i].ptr); + } + alfree(allocator, result); + *n = -2; + return NULL; + } + } + } else { + /* nmax reached, copy the _full_ remaining string */ + result[j-1] = sstrdup_a(allocator, curpos); + break; + } } else { - for (ssize_t j = i-1 ; j >= 0 ; j--) { - alfree(allocator, result[j].ptr); - } - alfree(allocator, result); - *n = -2; + /* no more matches, copy last string */ + result[j-1] = sstrdup_a(allocator, curpos); break; } } + *n = j; } else { *n = -2; } - - free(sv.ptr); return result; } -int sstrcmp(sstr_t s1, sstr_t s2) { +int scstrcmp(scstr_t s1, scstr_t s2) { if (s1.length == s2.length) { return memcmp(s1.ptr, s2.ptr, s1.length); } else if (s1.length > s2.length) { @@ -270,7 +503,7 @@ } } -int sstrcasecmp(sstr_t s1, sstr_t s2) { +int scstrcasecmp(scstr_t s1, scstr_t s2) { if (s1.length == s2.length) { #ifdef _WIN32 return _strnicmp(s1.ptr, s2.ptr, s1.length); @@ -284,11 +517,11 @@ } } -sstr_t sstrdup(sstr_t s) { +sstr_t scstrdup(scstr_t s) { return sstrdup_a(ucx_default_allocator(), s); } -sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) { +sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) { sstr_t newstring; newstring.ptr = (char*)almalloc(allocator, s.length + 1); if (newstring.ptr) { @@ -303,21 +536,38 @@ return newstring; } -sstr_t sstrtrim(sstr_t string) { - sstr_t newstr = string; + +static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) { + const char *newptr = s; + size_t length = len; - while (newstr.length > 0 && isspace(*newstr.ptr)) { - newstr.ptr++; - newstr.length--; + while(length > 0 && isspace(*newptr)) { + newptr++; + length--; } - while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) { - newstr.length--; + while(length > 0 && isspace(newptr[length-1])) { + length--; } + *newlen = length; + return newptr - s; +} + +sstr_t sstrtrim(sstr_t string) { + sstr_t newstr; + newstr.ptr = string.ptr + + ucx_strtrim(string.ptr, string.length, &newstr.length); return newstr; } -int sstrprefix(sstr_t string, sstr_t prefix) { +scstr_t scstrtrim(scstr_t string) { + scstr_t newstr; + newstr.ptr = string.ptr + + ucx_strtrim(string.ptr, string.length, &newstr.length); + return newstr; +} + +int scstrprefix(scstr_t string, scstr_t prefix) { if (string.length == 0) { return prefix.length == 0; } @@ -332,7 +582,7 @@ } } -int sstrsuffix(sstr_t string, sstr_t suffix) { +int scstrsuffix(scstr_t string, scstr_t suffix) { if (string.length == 0) { return suffix.length == 0; } @@ -348,7 +598,39 @@ } } -sstr_t sstrlower(sstr_t string) { +int scstrcaseprefix(scstr_t string, scstr_t prefix) { + if (string.length == 0) { + return prefix.length == 0; + } + if (prefix.length == 0) { + return 1; + } + + if (prefix.length > string.length) { + return 0; + } else { + scstr_t subs = scstrsubsl(string, 0, prefix.length); + return scstrcasecmp(subs, prefix) == 0; + } +} + +int scstrcasesuffix(scstr_t string, scstr_t suffix) { + if (string.length == 0) { + return suffix.length == 0; + } + if (suffix.length == 0) { + return 1; + } + + if (suffix.length > string.length) { + return 0; + } else { + scstr_t subs = scstrsubs(string, string.length-suffix.length); + return scstrcasecmp(subs, suffix) == 0; + } +} + +sstr_t scstrlower(scstr_t string) { sstr_t ret = sstrdup(string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = tolower(ret.ptr[i]); @@ -356,7 +638,7 @@ return ret; } -sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) { +sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) { sstr_t ret = sstrdup_a(allocator, string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = tolower(ret.ptr[i]); @@ -364,7 +646,7 @@ return ret; } -sstr_t sstrupper(sstr_t string) { +sstr_t scstrupper(scstr_t string) { sstr_t ret = sstrdup(string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = toupper(ret.ptr[i]); @@ -372,10 +654,24 @@ return ret; } -sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) { +sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) { sstr_t ret = sstrdup_a(allocator, string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = toupper(ret.ptr[i]); } return ret; } + +// type adjustment functions +scstr_t ucx_sc2sc(scstr_t str) { + return str; +} +scstr_t ucx_ss2sc(sstr_t str) { + scstr_t cs; + cs.ptr = str.ptr; + cs.length = str.length; + return cs; +} +scstr_t ucx_ss2c_s(scstr_t c) { + return c; +}