--- a/ucx/string.c Sun May 23 09:44:43 2021 +0200 +++ b/ucx/string.c Sat Jan 04 16:38:48 2025 +0100 @@ -1,7 +1,7 @@ /* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * - * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved. + * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,63 +26,71 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "ucx/string.h" +#include "cx/string.h" +#include "cx/utils.h" -#include "ucx/allocator.h" - -#include <stdlib.h> #include <string.h> #include <stdarg.h> -#include <stdint.h> #include <ctype.h> #ifndef _WIN32 -#include <strings.h> /* for strncasecmp() */ -#endif /* _WIN32 */ + +#include <strings.h> // for strncasecmp() -sstr_t sstr(char *cstring) { - sstr_t string; - string.ptr = cstring; - string.length = strlen(cstring); - return string; +#endif // _WIN32 + +cxmutstr cx_mutstr(char *cstring) { + return (cxmutstr) {cstring, strlen(cstring)}; } -sstr_t sstrn(char *cstring, size_t length) { - sstr_t string; - string.ptr = cstring; - string.length = length; - return string; +cxmutstr cx_mutstrn( + char *cstring, + size_t length +) { + return (cxmutstr) {cstring, length}; +} + +cxstring cx_str(const char *cstring) { + return (cxstring) {cstring, strlen(cstring)}; +} + +cxstring cx_strn( + const char *cstring, + size_t length +) { + return (cxstring) {cstring, length}; } -scstr_t scstr(const char *cstring) { - scstr_t string; - string.ptr = cstring; - string.length = strlen(cstring); - return string; +cxstring cx_strcast(cxmutstr str) { + return (cxstring) {str.ptr, str.length}; } -scstr_t scstrn(const char *cstring, size_t length) { - scstr_t string; - string.ptr = cstring; - string.length = length; - return string; +void cx_strfree(cxmutstr *str) { + free(str->ptr); + str->ptr = NULL; + str->length = 0; } +void cx_strfree_a( + const CxAllocator *alloc, + cxmutstr *str +) { + cxFree(alloc, str->ptr); + str->ptr = NULL; + str->length = 0; +} -size_t scstrnlen(size_t n, ...) { - if (n == 0) return 0; - +size_t cx_strlen( + size_t count, + ... +) { + if (count == 0) return 0; + va_list ap; - va_start(ap, n); - + va_start(ap, count); size_t size = 0; - - for (size_t i = 0 ; i < n ; i++) { - scstr_t str = va_arg(ap, scstr_t); - if(SIZE_MAX - str.length < size) { - size = SIZE_MAX; - break; - } + cx_for_n(i, count) { + cxstring str = va_arg(ap, cxstring); size += str.length; } va_end(ap); @@ -90,410 +98,341 @@ return size; } -static sstr_t sstrvcat_a( - UcxAllocator *a, +cxmutstr cx_strcat_ma( + const CxAllocator *alloc, + cxmutstr str, size_t count, - scstr_t s1, - va_list ap) { - sstr_t str; - str.ptr = NULL; - str.length = 0; - if(count < 2) { - return str; - } - - scstr_t s2 = va_arg (ap, scstr_t); - - if(((size_t)-1) - s1.length < s2.length) { - return str; - } - - scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t)); - if(!strings) { - return str; - } - + ... +) { + if (count == 0) return str; + + cxstring *strings = calloc(count, sizeof(cxstring)); + if (!strings) abort(); + + va_list ap; + va_start(ap, count); + // get all args and overall length - strings[0] = s1; - strings[1] = s2; - size_t slen = s1.length + s2.length; - int error = 0; - for (size_t i=2;i<count;i++) { - scstr_t s = va_arg (ap, scstr_t); + size_t slen = str.length; + cx_for_n(i, count) { + cxstring s = va_arg (ap, cxstring); strings[i] = s; - if(((size_t)-1) - s.length < slen) { - error = 1; - break; - } slen += s.length; } - if(error) { - free(strings); - return str; + va_end(ap); + + // reallocate or create new string + if (str.ptr == NULL) { + str.ptr = cxMalloc(alloc, slen + 1); + } else { + str.ptr = cxRealloc(alloc, str.ptr, slen + 1); } - - // create new string - str.ptr = (char*) almalloc(a, slen + 1); + if (str.ptr == NULL) abort(); + + // concatenate strings + size_t pos = str.length; str.length = slen; - if(!str.ptr) { - free(strings); - str.length = 0; - return str; - } - - // concatenate strings - size_t pos = 0; - for (size_t i=0;i<count;i++) { - scstr_t s = strings[i]; + cx_for_n(i, count) { + cxstring s = strings[i]; memcpy(str.ptr + pos, s.ptr, s.length); pos += s.length; } - + + // terminate string str.ptr[str.length] = '\0'; - + + // free temporary array free(strings); - + return str; } -sstr_t scstrcat(size_t count, scstr_t s1, ...) { - va_list ap; - va_start(ap, s1); - sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap); - va_end(ap); - return s; -} - -sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) { - va_list ap; - va_start(ap, s1); - sstr_t s = sstrvcat_a(a, count, s1, ap); - va_end(ap); - return s; +cxstring cx_strsubs( + cxstring string, + size_t start +) { + return cx_strsubsl(string, start, string.length - start); } -static int ucx_substring( - size_t str_length, - size_t start, - size_t length, - size_t *newlen, - size_t *newpos) -{ - *newlen = 0; - *newpos = 0; - - if(start > str_length) { - return 0; - } - - if(length > str_length - start) { - length = str_length - start; - } - *newlen = length; - *newpos = start; - return 1; -} - -sstr_t sstrsubs(sstr_t s, size_t start) { - return sstrsubsl (s, start, s.length-start); +cxmutstr cx_strsubs_m( + cxmutstr string, + size_t start +) { + return cx_strsubsl_m(string, start, string.length - start); } -sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { - size_t pos; - sstr_t ret = { NULL, 0 }; - if(ucx_substring(s.length, start, length, &ret.length, &pos)) { - ret.ptr = s.ptr + pos; +cxstring cx_strsubsl( + cxstring string, + size_t start, + size_t length +) { + if (start > string.length) { + return (cxstring) {NULL, 0}; } - return ret; -} -scstr_t scstrsubs(scstr_t string, size_t start) { - return scstrsubsl(string, start, string.length-start); -} + size_t rem_len = string.length - start; + if (length > rem_len) { + length = rem_len; + } -scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) { - size_t pos; - scstr_t ret = { NULL, 0 }; - if(ucx_substring(s.length, start, length, &ret.length, &pos)) { - ret.ptr = s.ptr + pos; - } - return ret; + return (cxstring) {string.ptr + start, length}; } - -static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) { - for(size_t i=0;i<length;i++) { - if(str[i] == chr) { - *pos = i; - return 1; - } - } - return 0; +cxmutstr cx_strsubsl_m( + cxmutstr string, + size_t start, + size_t length +) { + cxstring result = cx_strsubsl(cx_strcast(string), start, length); + return (cxmutstr) {(char *) result.ptr, result.length}; } -static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) { - if(length > 0) { - for(size_t i=length ; i>0 ; i--) { - if(str[i-1] == chr) { - *pos = i-1; - return 1; - } +cxstring cx_strchr( + cxstring string, + int chr +) { + chr = 0xFF & chr; + // TODO: improve by comparing multiple bytes at once + cx_for_n(i, string.length) { + if (string.ptr[i] == chr) { + return cx_strsubs(string, i); } } - return 0; + return (cxstring) {NULL, 0}; +} + +cxmutstr cx_strchr_m( + cxmutstr string, + int chr +) { + cxstring result = cx_strchr(cx_strcast(string), chr); + return (cxmutstr) {(char *) result.ptr, result.length}; } -sstr_t sstrchr(sstr_t s, int c) { - size_t pos = 0; - if(ucx_strchr(s.ptr, s.length, c, &pos)) { - return sstrsubs(s, pos); +cxstring cx_strrchr( + cxstring string, + int chr +) { + chr = 0xFF & chr; + size_t i = string.length; + while (i > 0) { + i--; + // TODO: improve by comparing multiple bytes at once + if (string.ptr[i] == chr) { + return cx_strsubs(string, i); + } } - return sstrn(NULL, 0); + return (cxstring) {NULL, 0}; } -sstr_t sstrrchr(sstr_t s, int c) { - size_t pos = 0; - if(ucx_strrchr(s.ptr, s.length, c, &pos)) { - return sstrsubs(s, pos); - } - return sstrn(NULL, 0); -} - -scstr_t scstrchr(scstr_t s, int c) { - size_t pos = 0; - if(ucx_strchr(s.ptr, s.length, c, &pos)) { - return scstrsubs(s, pos); - } - return scstrn(NULL, 0); -} - -scstr_t scstrrchr(scstr_t s, int c) { - size_t pos = 0; - if(ucx_strrchr(s.ptr, s.length, c, &pos)) { - return scstrsubs(s, pos); - } - return scstrn(NULL, 0); +cxmutstr cx_strrchr_m( + cxmutstr string, + int chr +) { + cxstring result = cx_strrchr(cx_strcast(string), chr); + return (cxmutstr) {(char *) result.ptr, result.length}; } -#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ - ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) - -#define ptable_w(useheap, ptable, index, src) do {\ - if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ - else ((size_t*)ptable)[index] = src;\ - } while (0); - +#ifndef CX_STRSTR_SBO_SIZE +#define CX_STRSTR_SBO_SIZE 512 +#endif +unsigned const cx_strstr_sbo_size = CX_STRSTR_SBO_SIZE; -static const char* ucx_strstr( - const char *str, - size_t length, - const char *match, - size_t matchlen, - size_t *newlen) -{ - *newlen = length; - if (matchlen == 0) { - return str; +cxstring cx_strstr( + cxstring haystack, + cxstring needle +) { + if (needle.length == 0) { + return haystack; } - - const char *result = NULL; - size_t resultlen = 0; - + + // optimize for single-char needles + if (needle.length == 1) { + return cx_strchr(haystack, *needle.ptr); + } + /* * IMPORTANT: - * our prefix table contains the prefix length PLUS ONE - * this is our decision, because we want to use the full range of size_t - * the original algorithm needs a (-1) at one single place - * and we want to avoid that + * Our prefix table contains the prefix length PLUS ONE + * this is our decision, because we want to use the full range of size_t. + * The original algorithm needs a (-1) at one single place, + * and we want to avoid that. */ - - /* static prefix table */ - static uint8_t s_prefix_table[256]; - - /* check pattern length and use appropriate prefix table */ - /* if the pattern exceeds static prefix table, allocate on the heap */ - register int useheap = matchlen > 255; - register void* ptable = useheap ? - calloc(matchlen+1, sizeof(size_t)): s_prefix_table; - - /* keep counter in registers */ + + // local prefix table + size_t s_prefix_table[CX_STRSTR_SBO_SIZE]; + + // check needle length and use appropriate prefix table + // if the pattern exceeds static prefix table, allocate on the heap + bool useheap = needle.length >= CX_STRSTR_SBO_SIZE; + register size_t *ptable = useheap ? calloc(needle.length + 1, + sizeof(size_t)) : s_prefix_table; + + // keep counter in registers register size_t i, j; - - /* fill prefix table */ - i = 0; j = 0; - ptable_w(useheap, ptable, i, j); - while (i < matchlen) { - while (j >= 1 && match[j-1] != match[i]) { - ptable_r(j, useheap, ptable, j-1); + + // fill prefix table + i = 0; + j = 0; + ptable[i] = j; + while (i < needle.length) { + while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { + j = ptable[j - 1]; } - i++; j++; - ptable_w(useheap, ptable, i, j); + i++; + j++; + ptable[i] = j; } - /* search */ - i = 0; j = 1; - while (i < length) { - while (j >= 1 && str[i] != match[j-1]) { - ptable_r(j, useheap, ptable, j-1); + // search + cxstring result = {NULL, 0}; + i = 0; + j = 1; + while (i < haystack.length) { + while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { + j = ptable[j - 1]; } - i++; j++; - if (j-1 == matchlen) { - size_t start = i - matchlen; - result = str + start; - resultlen = length - start; + i++; + j++; + if (j - 1 == needle.length) { + size_t start = i - needle.length; + result.ptr = haystack.ptr + start; + result.length = haystack.length - start; break; } } - /* if prefix table was allocated on the heap, free it */ + // if prefix table was allocated on the heap, free it if (ptable != s_prefix_table) { free(ptable); } - - *newlen = resultlen; - return result; -} - -sstr_t scstrsstr(sstr_t string, scstr_t match) { - sstr_t result; - - size_t reslen; - const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); - if(!resstr) { - result.ptr = NULL; - result.length = 0; - return result; - } - - size_t pos = resstr - string.ptr; - result.ptr = string.ptr + pos; - result.length = reslen; - - return result; -} - -scstr_t scstrscstr(scstr_t string, scstr_t match) { - scstr_t result; - - size_t reslen; - const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); - if(!resstr) { - result.ptr = NULL; - result.length = 0; - return result; - } - - size_t pos = resstr - string.ptr; - result.ptr = string.ptr + pos; - result.length = reslen; - - return result; -} - -#undef ptable_r -#undef ptable_w - -sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) { - return scstrsplit_a(ucx_default_allocator(), s, d, n); -} - -sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) { - if (s.length == 0 || d.length == 0) { - *n = -1; - return NULL; - } - - /* special cases: delimiter is at least as large as the string */ - if (d.length >= s.length) { - /* exact match */ - if (sstrcmp(s, d) == 0) { - *n = 0; - return NULL; - } else /* no match possible */ { - *n = 1; - sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); - if(result) { - *result = sstrdup_a(allocator, s); - } else { - *n = -2; - } - return result; - } - } - - ssize_t nmax = *n; - size_t arrlen = 16; - sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t)); - - if (result) { - scstr_t curpos = s; - ssize_t j = 1; - while (1) { - scstr_t match; - /* optimize for one byte delimiters */ - if (d.length == 1) { - match = curpos; - for (size_t i = 0 ; i < curpos.length ; i++) { - if (curpos.ptr[i] == *(d.ptr)) { - match.ptr = curpos.ptr + i; - break; - } - match.length--; - } - } else { - match = scstrscstr(curpos, d); - } - if (match.length > 0) { - /* is this our last try? */ - if (nmax == 0 || j < nmax) { - /* copy the current string to the array */ - scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr); - result[j-1] = sstrdup_a(allocator, item); - size_t processed = item.length + d.length; - curpos.ptr += processed; - curpos.length -= processed; - - /* allocate memory for the next string */ - j++; - if (j > arrlen) { - arrlen *= 2; - size_t reallocsz; - sstr_t* reallocated = NULL; - if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) { - reallocated = (sstr_t*) alrealloc( - allocator, result, reallocsz); - } - if (reallocated) { - result = reallocated; - } else { - for (ssize_t i = 0 ; i < j-1 ; i++) { - alfree(allocator, result[i].ptr); - } - alfree(allocator, result); - *n = -2; - return NULL; - } - } - } else { - /* nmax reached, copy the _full_ remaining string */ - result[j-1] = sstrdup_a(allocator, curpos); - break; - } - } else { - /* no more matches, copy last string */ - result[j-1] = sstrdup_a(allocator, curpos); - break; - } - } - *n = j; - } else { - *n = -2; - } return result; } -int scstrcmp(scstr_t s1, scstr_t s2) { +cxmutstr cx_strstr_m( + cxmutstr haystack, + cxstring needle +) { + cxstring result = cx_strstr(cx_strcast(haystack), needle); + return (cxmutstr) {(char *) result.ptr, result.length}; +} + +size_t cx_strsplit( + cxstring string, + cxstring delim, + size_t limit, + cxstring *output +) { + // special case: output limit is zero + if (limit == 0) return 0; + + // special case: delimiter is empty + if (delim.length == 0) { + output[0] = string; + return 1; + } + + // special cases: delimiter is at least as large as the string + if (delim.length >= string.length) { + // exact match + if (cx_strcmp(string, delim) == 0) { + output[0] = cx_strn(string.ptr, 0); + output[1] = cx_strn(string.ptr + string.length, 0); + return 2; + } else { + // no match possible + output[0] = string; + return 1; + } + } + + size_t n = 0; + cxstring curpos = string; + while (1) { + ++n; + cxstring match = cx_strstr(curpos, delim); + if (match.length > 0) { + // is the limit reached? + if (n < limit) { + // copy the current string to the array + cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); + output[n - 1] = item; + size_t processed = item.length + delim.length; + curpos.ptr += processed; + curpos.length -= processed; + } else { + // limit reached, copy the _full_ remaining string + output[n - 1] = curpos; + break; + } + } else { + // no more matches, copy last string + output[n - 1] = curpos; + break; + } + } + + return n; +} + +size_t cx_strsplit_a( + const CxAllocator *allocator, + cxstring string, + cxstring delim, + size_t limit, + cxstring **output +) { + // find out how many splits we're going to make and allocate memory + size_t n = 0; + cxstring curpos = string; + while (1) { + ++n; + cxstring match = cx_strstr(curpos, delim); + if (match.length > 0) { + // is the limit reached? + if (n < limit) { + size_t processed = match.ptr - curpos.ptr + delim.length; + curpos.ptr += processed; + curpos.length -= processed; + } else { + // limit reached + break; + } + } else { + // no more matches + break; + } + } + *output = cxCalloc(allocator, n, sizeof(cxstring)); + return cx_strsplit(string, delim, n, *output); +} + +size_t cx_strsplit_m( + cxmutstr string, + cxstring delim, + size_t limit, + cxmutstr *output +) { + return cx_strsplit(cx_strcast(string), + delim, limit, (cxstring *) output); +} + +size_t cx_strsplit_ma( + const CxAllocator *allocator, + cxmutstr string, + cxstring delim, + size_t limit, + cxmutstr **output +) { + return cx_strsplit_a(allocator, cx_strcast(string), + delim, limit, (cxstring **) output); +} + +int cx_strcmp( + cxstring s1, + cxstring s2 +) { if (s1.length == s2.length) { return memcmp(s1.ptr, s2.ptr, s1.length); } else if (s1.length > s2.length) { @@ -503,7 +442,10 @@ } } -int scstrcasecmp(scstr_t s1, scstr_t s2) { +int cx_strcasecmp( + cxstring s1, + cxstring s2 +) { if (s1.length == s2.length) { #ifdef _WIN32 return _strnicmp(s1.ptr, s2.ptr, s1.length); @@ -517,216 +459,186 @@ } } -sstr_t scstrdup(scstr_t s) { - return sstrdup_a(ucx_default_allocator(), s); +int cx_strcmp_p( + const void *s1, + const void *s2 +) { + const cxstring *left = s1; + const cxstring *right = s2; + return cx_strcmp(*left, *right); +} + +int cx_strcasecmp_p( + const void *s1, + const void *s2 +) { + const cxstring *left = s1; + const cxstring *right = s2; + return cx_strcasecmp(*left, *right); } -sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) { - sstr_t newstring; - newstring.ptr = (char*)almalloc(allocator, s.length + 1); - if (newstring.ptr) { - newstring.length = s.length; - newstring.ptr[newstring.length] = 0; - - memcpy(newstring.ptr, s.ptr, s.length); - } else { - newstring.length = 0; +cxmutstr cx_strdup_a( + const CxAllocator *allocator, + cxstring string +) { + cxmutstr result = { + cxMalloc(allocator, string.length + 1), + string.length + }; + if (result.ptr == NULL) { + result.length = 0; + return result; } - - return newstring; + memcpy(result.ptr, string.ptr, string.length); + result.ptr[string.length] = '\0'; + return result; +} + +cxstring cx_strtrim(cxstring string) { + cxstring result = string; + // TODO: optimize by comparing multiple bytes at once + while (result.length > 0 && isspace(*result.ptr)) { + result.ptr++; + result.length--; + } + while (result.length > 0 && isspace(result.ptr[result.length - 1])) { + result.length--; + } + return result; } +cxmutstr cx_strtrim_m(cxmutstr string) { + cxstring result = cx_strtrim(cx_strcast(string)); + return (cxmutstr) {(char *) result.ptr, result.length}; +} -static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) { - const char *newptr = s; - size_t length = len; - - while(length > 0 && isspace(*newptr)) { - newptr++; - length--; - } - while(length > 0 && isspace(newptr[length-1])) { - length--; - } - - *newlen = length; - return newptr - s; +bool cx_strprefix( + cxstring string, + cxstring prefix +) { + if (string.length < prefix.length) return false; + return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; +} + +bool cx_strsuffix( + cxstring string, + cxstring suffix +) { + if (string.length < suffix.length) return false; + return memcmp(string.ptr + string.length - suffix.length, + suffix.ptr, suffix.length) == 0; } -sstr_t sstrtrim(sstr_t string) { - sstr_t newstr; - newstr.ptr = string.ptr - + ucx_strtrim(string.ptr, string.length, &newstr.length); - return newstr; +bool cx_strcaseprefix( + cxstring string, + cxstring prefix +) { + if (string.length < prefix.length) return false; +#ifdef _WIN32 + return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; +#else + return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0; +#endif } -scstr_t scstrtrim(scstr_t string) { - scstr_t newstr; - newstr.ptr = string.ptr - + ucx_strtrim(string.ptr, string.length, &newstr.length); - return newstr; +bool cx_strcasesuffix( + cxstring string, + cxstring suffix +) { + if (string.length < suffix.length) return false; +#ifdef _WIN32 + return _strnicmp(string.ptr+string.length-suffix.length, + suffix.ptr, suffix.length) == 0; +#else + return strncasecmp(string.ptr + string.length - suffix.length, + suffix.ptr, suffix.length) == 0; +#endif } -int scstrprefix(scstr_t string, scstr_t prefix) { - if (string.length == 0) { - return prefix.length == 0; - } - if (prefix.length == 0) { - return 1; - } - - if (prefix.length > string.length) { - return 0; - } else { - return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; +void cx_strlower(cxmutstr string) { + cx_for_n(i, string.length) { + string.ptr[i] = (char) tolower(string.ptr[i]); } } -int scstrsuffix(scstr_t string, scstr_t suffix) { - if (string.length == 0) { - return suffix.length == 0; - } - if (suffix.length == 0) { - return 1; - } - - if (suffix.length > string.length) { - return 0; - } else { - return memcmp(string.ptr+string.length-suffix.length, - suffix.ptr, suffix.length) == 0; - } -} - -int scstrcaseprefix(scstr_t string, scstr_t prefix) { - if (string.length == 0) { - return prefix.length == 0; - } - if (prefix.length == 0) { - return 1; - } - - if (prefix.length > string.length) { - return 0; - } else { - scstr_t subs = scstrsubsl(string, 0, prefix.length); - return scstrcasecmp(subs, prefix) == 0; +void cx_strupper(cxmutstr string) { + cx_for_n(i, string.length) { + string.ptr[i] = (char) toupper(string.ptr[i]); } } -int scstrcasesuffix(scstr_t string, scstr_t suffix) { - if (string.length == 0) { - return suffix.length == 0; - } - if (suffix.length == 0) { - return 1; - } - - if (suffix.length > string.length) { - return 0; - } else { - scstr_t subs = scstrsubs(string, string.length-suffix.length); - return scstrcasecmp(subs, suffix) == 0; - } -} - -sstr_t scstrlower(scstr_t string) { - sstr_t ret = sstrdup(string); - for (size_t i = 0; i < ret.length ; i++) { - ret.ptr[i] = tolower(ret.ptr[i]); - } - return ret; -} +#ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE +#define CX_STRREPLACE_INDEX_BUFFER_SIZE 64 +#endif -sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) { - sstr_t ret = sstrdup_a(allocator, string); - for (size_t i = 0; i < ret.length ; i++) { - ret.ptr[i] = tolower(ret.ptr[i]); - } - return ret; -} - -sstr_t scstrupper(scstr_t string) { - sstr_t ret = sstrdup(string); - for (size_t i = 0; i < ret.length ; i++) { - ret.ptr[i] = toupper(ret.ptr[i]); - } - return ret; -} - -sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) { - sstr_t ret = sstrdup_a(allocator, string); - for (size_t i = 0; i < ret.length ; i++) { - ret.ptr[i] = toupper(ret.ptr[i]); - } - return ret; -} - -#define REPLACE_INDEX_BUFFER_MAX 100 - -struct scstrreplace_ibuf { - size_t* buf; - unsigned int len; /* small indices */ - struct scstrreplace_ibuf* next; +struct cx_strreplace_ibuf { + size_t *buf; + struct cx_strreplace_ibuf *next; + unsigned int len; }; -static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) { +static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { while (buf) { - struct scstrreplace_ibuf *next = buf->next; + struct cx_strreplace_ibuf *next = buf->next; free(buf->buf); free(buf); buf = next; } } -sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str, - scstr_t pattern, scstr_t replacement, size_t replmax) { +cxmutstr cx_strreplacen_a( + const CxAllocator *allocator, + cxstring str, + cxstring pattern, + cxstring replacement, + size_t replmax +) { if (pattern.length == 0 || pattern.length > str.length || replmax == 0) - return sstrdup(str); + return cx_strdup_a(allocator, str); - /* Compute expected buffer length */ + // Compute expected buffer length size_t ibufmax = str.length / pattern.length; size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; - if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { - ibuflen = REPLACE_INDEX_BUFFER_MAX; + if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) { + ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE; } - /* Allocate first index buffer */ - struct scstrreplace_ibuf *firstbuf, *curbuf; - firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf)); - if (!firstbuf) return sstrn(NULL, 0); + // Allocate first index buffer + struct cx_strreplace_ibuf *firstbuf, *curbuf; + firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); + if (!firstbuf) return cx_mutstrn(NULL, 0); firstbuf->buf = calloc(ibuflen, sizeof(size_t)); if (!firstbuf->buf) { free(firstbuf); - return sstrn(NULL, 0); + return cx_mutstrn(NULL, 0); } - /* Search occurrences */ - scstr_t searchstr = str; + // Search occurrences + cxstring searchstr = str; size_t found = 0; do { - scstr_t match = scstrscstr(searchstr, pattern); + cxstring match = cx_strstr(searchstr, pattern); if (match.length > 0) { - /* Allocate next buffer in chain, if required */ + // Allocate next buffer in chain, if required if (curbuf->len == ibuflen) { - struct scstrreplace_ibuf *nextbuf = - calloc(1, sizeof(struct scstrreplace_ibuf)); + struct cx_strreplace_ibuf *nextbuf = + calloc(1, sizeof(struct cx_strreplace_ibuf)); if (!nextbuf) { - scstrrepl_free_ibuf(firstbuf); - return sstrn(NULL, 0); + cx_strrepl_free_ibuf(firstbuf); + return cx_mutstrn(NULL, 0); } nextbuf->buf = calloc(ibuflen, sizeof(size_t)); if (!nextbuf->buf) { free(nextbuf); - scstrrepl_free_ibuf(firstbuf); - return sstrn(NULL, 0); + cx_strrepl_free_ibuf(firstbuf); + return cx_mutstrn(NULL, 0); } curbuf->next = nextbuf; curbuf = nextbuf; } - /* Record match index */ + // Record match index found++; size_t idx = match.ptr - str.ptr; curbuf->buf[curbuf->len++] = idx; @@ -737,8 +649,8 @@ } } while (searchstr.length > 0 && found < replmax); - /* Allocate result string */ - sstr_t result; + // Allocate result string + cxmutstr result; { ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; size_t rcount = 0; @@ -748,60 +660,127 @@ curbuf = curbuf->next; } while (curbuf); result.length = str.length + rcount * adjlen; - result.ptr = almalloc(allocator, result.length); + result.ptr = cxMalloc(allocator, result.length + 1); if (!result.ptr) { - scstrrepl_free_ibuf(firstbuf); - return sstrn(NULL, 0); + cx_strrepl_free_ibuf(firstbuf); + return cx_mutstrn(NULL, 0); } } - /* Build result string */ + // Build result string curbuf = firstbuf; size_t srcidx = 0; - char* destptr = result.ptr; + char *destptr = result.ptr; do { for (size_t i = 0; i < curbuf->len; i++) { - /* Copy source part up to next match*/ + // Copy source part up to next match size_t idx = curbuf->buf[i]; size_t srclen = idx - srcidx; if (srclen > 0) { - memcpy(destptr, str.ptr+srcidx, srclen); + memcpy(destptr, str.ptr + srcidx, srclen); destptr += srclen; srcidx += srclen; } - /* Copy the replacement and skip the source pattern */ + // Copy the replacement and skip the source pattern srcidx += pattern.length; memcpy(destptr, replacement.ptr, replacement.length); destptr += replacement.length; } curbuf = curbuf->next; } while (curbuf); - memcpy(destptr, str.ptr+srcidx, str.length-srcidx); + memcpy(destptr, str.ptr + srcidx, str.length - srcidx); - /* Free index buffer */ - scstrrepl_free_ibuf(firstbuf); + // Result is guaranteed to be zero-terminated + result.ptr[result.length] = '\0'; + + // Free index buffer + cx_strrepl_free_ibuf(firstbuf); return result; } -sstr_t scstrreplacen(scstr_t str, scstr_t pattern, - scstr_t replacement, size_t replmax) { - return scstrreplacen_a(ucx_default_allocator(), - str, pattern, replacement, replmax); +CxStrtokCtx cx_strtok( + cxstring str, + cxstring delim, + size_t limit +) { + CxStrtokCtx ctx; + ctx.str = str; + ctx.delim = delim; + ctx.limit = limit; + ctx.pos = 0; + ctx.next_pos = 0; + ctx.delim_pos = 0; + ctx.found = 0; + ctx.delim_more = NULL; + ctx.delim_more_count = 0; + return ctx; +} + +CxStrtokCtx cx_strtok_m( + cxmutstr str, + cxstring delim, + size_t limit +) { + return cx_strtok(cx_strcast(str), delim, limit); } +bool cx_strtok_next( + CxStrtokCtx *ctx, + cxstring *token +) { + // abortion criteria + if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) { + return false; + } -// type adjustment functions -scstr_t ucx_sc2sc(scstr_t str) { - return str; + // determine the search start + cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos); + + // search the next delimiter + cxstring delim = cx_strstr(haystack, ctx->delim); + + // if found, make delim capture exactly the delimiter + if (delim.length > 0) { + delim.length = ctx->delim.length; + } + + // if more delimiters are specified, check them now + if (ctx->delim_more_count > 0) { + cx_for_n(i, ctx->delim_more_count) { + cxstring d = cx_strstr(haystack, ctx->delim_more[i]); + if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) { + delim.ptr = d.ptr; + delim.length = ctx->delim_more[i].length; + } + } + } + + // store the token information and adjust the context + ctx->found++; + ctx->pos = ctx->next_pos; + token->ptr = &ctx->str.ptr[ctx->pos]; + ctx->delim_pos = delim.length == 0 ? + ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr); + token->length = ctx->delim_pos - ctx->pos; + ctx->next_pos = ctx->delim_pos + delim.length; + + return true; } -scstr_t ucx_ss2sc(sstr_t str) { - scstr_t cs; - cs.ptr = str.ptr; - cs.length = str.length; - return cs; + +bool cx_strtok_next_m( + CxStrtokCtx *ctx, + cxmutstr *token +) { + return cx_strtok_next(ctx, (cxstring *) token); } -scstr_t ucx_ss2c_s(scstr_t c) { - return c; + +void cx_strtok_delim( + CxStrtokCtx *ctx, + const cxstring *delim, + size_t count +) { + ctx->delim_more = delim; + ctx->delim_more_count = count; }