toolkit: comparison ucx/string.c

-:fe49cff3c571
+:bb7da585debc
 /*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
-* Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
+* Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
-#include "ucx/string.h"
+#include "cx/string.h"
+#include "cx/utils.h"
-#include "ucx/allocator.h"
-#include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
-#include <stdint.h>
 #include <ctype.h>
 #ifndef _WIN32
-#include <strings.h> /* for strncasecmp() */
-#endif /* _WIN32 */
+#include <strings.h> // for strncasecmp()
-sstr_t sstr(char *cstring) {
+#endif // _WIN32
-sstr_t string;
-string.ptr = cstring;
+cxmutstr cx_mutstr(char *cstring) {
-string.length = strlen(cstring);
+return (cxmutstr) {cstring, strlen(cstring)};
-return string;
+}
-}
+cxmutstr cx_mutstrn(
-sstr_t sstrn(char *cstring, size_t length) {
+char *cstring,
-sstr_t string;
+size_t length
-string.ptr = cstring;
+) {
-string.length = length;
+return (cxmutstr) {cstring, length};
-return string;
+}
-}
+cxstring cx_str(const char *cstring) {
-scstr_t scstr(const char *cstring) {
+return (cxstring) {cstring, strlen(cstring)};
-scstr_t string;
+}
-string.ptr = cstring;
-string.length = strlen(cstring);
+cxstring cx_strn(
-return string;
+const char *cstring,
-}
+size_t length
+) {
-scstr_t scstrn(const char *cstring, size_t length) {
+return (cxstring) {cstring, length};
-scstr_t string;
+}
-string.ptr = cstring;
-string.length = length;
+cxstring cx_strcast(cxmutstr str) {
-return string;
+return (cxstring) {str.ptr, str.length};
 }
+void cx_strfree(cxmutstr *str) {
-size_t scstrnlen(size_t n, ...) {
+free(str->ptr);
-if (n == 0) return 0;
+str->ptr = NULL;
+str->length = 0;
+}
+void cx_strfree_a(
+const CxAllocator *alloc,
+cxmutstr *str
+) {
+cxFree(alloc, str->ptr);
+str->ptr = NULL;
+str->length = 0;
+}
+size_t cx_strlen(
+size_t count,
+...
+) {
+if (count == 0) return 0;
 va_list ap;
-va_start(ap, n);
+va_start(ap, count);
 size_t size = 0;
+cx_for_n(i, count) {
-for (size_t i = 0 ; i < n ; i++) {
+cxstring str = va_arg(ap, cxstring);
-scstr_t str = va_arg(ap, scstr_t);
-if(SIZE_MAX - str.length < size) {
-size = SIZE_MAX;
-break;
-}
 size += str.length;
 }
 va_end(ap);
 return size;
 }
-static sstr_t sstrvcat_a(
+cxmutstr cx_strcat_ma(
-UcxAllocator *a,
+const CxAllocator *alloc,
+cxmutstr str,
 size_t count,
-scstr_t s1,
+...
-va_list ap) {
+) {
-sstr_t str;
+if (count == 0) return str;
-str.ptr = NULL;
-str.length = 0;
+cxstring *strings = calloc(count, sizeof(cxstring));
-if(count < 2) {
+if (!strings) abort();
-return str;
-}
+va_list ap;
+va_start(ap, count);
-scstr_t s2 = va_arg (ap, scstr_t);
-if(((size_t)-1) - s1.length < s2.length) {
-return str;
-}
-scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t));
-if(!strings) {
-return str;
-}
 // get all args and overall length
-strings[0] = s1;
+size_t slen = str.length;
-strings[1] = s2;
+cx_for_n(i, count) {
-size_t slen = s1.length + s2.length;
+cxstring s = va_arg (ap, cxstring);
-int error = 0;
-for (size_t i=2;i<count;i++) {
-scstr_t s = va_arg (ap, scstr_t);
 strings[i] = s;
-if(((size_t)-1) - s.length < slen) {
-error = 1;
-break;
-}
 slen += s.length;
 }
-if(error) {
+va_end(ap);
-free(strings);
-return str;
+// reallocate or create new string
-}
+if (str.ptr == NULL) {
+str.ptr = cxMalloc(alloc, slen + 1);
-// create new string
+} else {
-str.ptr = (char*) almalloc(a, slen + 1);
+str.ptr = cxRealloc(alloc, str.ptr, slen + 1);
+}
+if (str.ptr == NULL) abort();
+// concatenate strings
+size_t pos = str.length;
 str.length = slen;
-if(!str.ptr) {
+cx_for_n(i, count) {
-free(strings);
+cxstring s = strings[i];
-str.length = 0;
-return str;
-}
-// concatenate strings
-size_t pos = 0;
-for (size_t i=0;i<count;i++) {
-scstr_t s = strings[i];
 memcpy(str.ptr + pos, s.ptr, s.length);
 pos += s.length;
 }
+// terminate string
 str.ptr[str.length] = '\0';
+// free temporary array
 free(strings);
 return str;
 }
-sstr_t scstrcat(size_t count, scstr_t s1, ...) {
+cxstring cx_strsubs(
-va_list ap;
+cxstring string,
-va_start(ap, s1);
+size_t start
-sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap);
+) {
-va_end(ap);
+return cx_strsubsl(string, start, string.length - start);
-return s;
+}
-}
+cxmutstr cx_strsubs_m(
-sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) {
+cxmutstr string,
-va_list ap;
+size_t start
-va_start(ap, s1);
+) {
-sstr_t s = sstrvcat_a(a, count, s1, ap);
+return cx_strsubsl_m(string, start, string.length - start);
-va_end(ap);
+}
-return s;
-}
+cxstring cx_strsubsl(
+cxstring string,
-static int ucx_substring(
-size_t str_length,
 size_t start,
-size_t length,
+size_t length
-size_t *newlen,
+) {
-size_t *newpos)
+if (start > string.length) {
-{
+return (cxstring) {NULL, 0};
-*newlen = 0;
+}
-*newpos = 0;
+size_t rem_len = string.length - start;
-if(start > str_length) {
+if (length > rem_len) {
-return 0;
+length = rem_len;
 }
-if(length > str_length - start) {
+return (cxstring) {string.ptr + start, length};
-length = str_length - start;
+}
-}
-*newlen = length;
+cxmutstr cx_strsubsl_m(
-*newpos = start;
+cxmutstr string,
-return 1;
+size_t start,
-}
+size_t length
+) {
-sstr_t sstrsubs(sstr_t s, size_t start) {
+cxstring result = cx_strsubsl(cx_strcast(string), start, length);
-return sstrsubsl (s, start, s.length-start);
+return (cxmutstr) {(char *) result.ptr, result.length};
 }
-sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
+cxstring cx_strchr(
-size_t pos;
+cxstring string,
-sstr_t ret = { NULL, 0 };
+int chr
-if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
+) {
-ret.ptr = s.ptr + pos;
+chr = 0xFF & chr;
-}
+// TODO: improve by comparing multiple bytes at once
-return ret;
+cx_for_n(i, string.length) {
-}
+if (string.ptr[i] == chr) {
+return cx_strsubs(string, i);
-scstr_t scstrsubs(scstr_t string, size_t start) {
+}
-return scstrsubsl(string, start, string.length-start);
+}
-}
+return (cxstring) {NULL, 0};
+}
-scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) {
-size_t pos;
+cxmutstr cx_strchr_m(
-scstr_t ret = { NULL, 0 };
+cxmutstr string,
-if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
+int chr
-ret.ptr = s.ptr + pos;
+) {
-}
+cxstring result = cx_strchr(cx_strcast(string), chr);
-return ret;
+return (cxmutstr) {(char *) result.ptr, result.length};
 }
+cxstring cx_strrchr(
-static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) {
+cxstring string,
-for(size_t i=0;i<length;i++) {
+int chr
-if(str[i] == chr) {
+) {
-*pos = i;
+chr = 0xFF & chr;
-return 1;
+size_t i = string.length;
-}
+while (i > 0) {
-}
+i--;
-return 0;
+// TODO: improve by comparing multiple bytes at once
-}
+if (string.ptr[i] == chr) {
+return cx_strsubs(string, i);
-static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) {
+}
-if(length > 0) {
+}
-for(size_t i=length ; i>0 ; i--) {
+return (cxstring) {NULL, 0};
-if(str[i-1] == chr) {
+}
-*pos = i-1;
-return 1;
+cxmutstr cx_strrchr_m(
-}
+cxmutstr string,
-}
+int chr
-}
+) {
-return 0;
+cxstring result = cx_strrchr(cx_strcast(string), chr);
-}
+return (cxmutstr) {(char *) result.ptr, result.length};
+}
-sstr_t sstrchr(sstr_t s, int c) {
-size_t pos = 0;
+#ifndef CX_STRSTR_SBO_SIZE
-if(ucx_strchr(s.ptr, s.length, c, &pos)) {
+#define CX_STRSTR_SBO_SIZE 512
-return sstrsubs(s, pos);
+#endif
-}
+unsigned const cx_strstr_sbo_size = CX_STRSTR_SBO_SIZE;
-return sstrn(NULL, 0);
-}
+cxstring cx_strstr(
+cxstring haystack,
-sstr_t sstrrchr(sstr_t s, int c) {
+cxstring needle
-size_t pos = 0;
+) {
-if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
+if (needle.length == 0) {
-return sstrsubs(s, pos);
+return haystack;
 }
-return sstrn(NULL, 0);
-}
+// optimize for single-char needles
+if (needle.length == 1) {
-scstr_t scstrchr(scstr_t s, int c) {
+return cx_strchr(haystack, *needle.ptr);
-size_t pos = 0;
+}
-if(ucx_strchr(s.ptr, s.length, c, &pos)) {
-return scstrsubs(s, pos);
-}
-return scstrn(NULL, 0);
-}
-scstr_t scstrrchr(scstr_t s, int c) {
-size_t pos = 0;
-if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
-return scstrsubs(s, pos);
-}
-return scstrn(NULL, 0);
-}
-#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
-((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
-#define ptable_w(useheap, ptable, index, src) do {\
-if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
-else ((size_t*)ptable)[index] = src;\
-} while (0);
-static const char* ucx_strstr(
-const char *str,
-size_t length,
-const char *match,
-size_t matchlen,
-size_t *newlen)
-{
-*newlen = length;
-if (matchlen == 0) {
-return str;
-}
-const char *result = NULL;
-size_t resultlen = 0;
 /*
 * IMPORTANT:
-* our prefix table contains the prefix length PLUS ONE
+* Our prefix table contains the prefix length PLUS ONE
-* this is our decision, because we want to use the full range of size_t
+* this is our decision, because we want to use the full range of size_t.
-* the original algorithm needs a (-1) at one single place
+* The original algorithm needs a (-1) at one single place,
-* and we want to avoid that
+* and we want to avoid that.
 */
-/* static prefix table */
+// local prefix table
-static uint8_t s_prefix_table[256];
+size_t s_prefix_table[CX_STRSTR_SBO_SIZE];
-/* check pattern length and use appropriate prefix table */
+// check needle length and use appropriate prefix table
-/* if the pattern exceeds static prefix table, allocate on the heap */
+// if the pattern exceeds static prefix table, allocate on the heap
-register int useheap = matchlen > 255;
+bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
-register void* ptable = useheap ?
+register size_t *ptable = useheap ? calloc(needle.length + 1,
-calloc(matchlen+1, sizeof(size_t)): s_prefix_table;
+sizeof(size_t)) : s_prefix_table;
-/* keep counter in registers */
+// keep counter in registers
 register size_t i, j;
-/* fill prefix table */
+// fill prefix table
-i = 0; j = 0;
+i = 0;
-ptable_w(useheap, ptable, i, j);
+j = 0;
-while (i < matchlen) {
+ptable[i] = j;
-while (j >= 1 && match[j-1] != match[i]) {
+while (i < needle.length) {
-ptable_r(j, useheap, ptable, j-1);
+while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
-}
+j = ptable[j - 1];
-i++; j++;
+}
-ptable_w(useheap, ptable, i, j);
+i++;
-}
+j++;
+ptable[i] = j;
-/* search */
+}
-i = 0; j = 1;
-while (i < length) {
+// search
-while (j >= 1 && str[i] != match[j-1]) {
+cxstring result = {NULL, 0};
-ptable_r(j, useheap, ptable, j-1);
+i = 0;
-}
+j = 1;
-i++; j++;
+while (i < haystack.length) {
-if (j-1 == matchlen) {
+while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
-size_t start = i - matchlen;
+j = ptable[j - 1];
-result = str + start;
+}
-resultlen = length - start;
+i++;
+j++;
+if (j - 1 == needle.length) {
+size_t start = i - needle.length;
+result.ptr = haystack.ptr + start;
+result.length = haystack.length - start;
 break;
 }
 }
-/* if prefix table was allocated on the heap, free it */
+// if prefix table was allocated on the heap, free it
 if (ptable != s_prefix_table) {
 free(ptable);
 }
-*newlen = resultlen;
 return result;
 }
-sstr_t scstrsstr(sstr_t string, scstr_t match) {
+cxmutstr cx_strstr_m(
-sstr_t result;
+cxmutstr haystack,
+cxstring needle
-size_t reslen;
+) {
-const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
+cxstring result = cx_strstr(cx_strcast(haystack), needle);
-if(!resstr) {
+return (cxmutstr) {(char *) result.ptr, result.length};
-result.ptr = NULL;
+}
-result.length = 0;
-return result;
+size_t cx_strsplit(
-}
+cxstring string,
+cxstring delim,
-size_t pos = resstr - string.ptr;
+size_t limit,
-result.ptr = string.ptr + pos;
+cxstring *output
-result.length = reslen;
+) {
+// special case: output limit is zero
-return result;
+if (limit == 0) return 0;
-}
+// special case: delimiter is empty
-scstr_t scstrscstr(scstr_t string, scstr_t match) {
+if (delim.length == 0) {
-scstr_t result;
+output[0] = string;
+return 1;
-size_t reslen;
+}
-const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
-if(!resstr) {
+// special cases: delimiter is at least as large as the string
-result.ptr = NULL;
+if (delim.length >= string.length) {
-result.length = 0;
+// exact match
-return result;
+if (cx_strcmp(string, delim) == 0) {
-}
+output[0] = cx_strn(string.ptr, 0);
+output[1] = cx_strn(string.ptr + string.length, 0);
-size_t pos = resstr - string.ptr;
+return 2;
-result.ptr = string.ptr + pos;
+} else {
-result.length = reslen;
+// no match possible
+output[0] = string;
-return result;
+return 1;
 }
+}
-#undef ptable_r
-#undef ptable_w
+size_t n = 0;
+cxstring curpos = string;
-sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) {
+while (1) {
-return scstrsplit_a(ucx_default_allocator(), s, d, n);
+++n;
-}
+cxstring match = cx_strstr(curpos, delim);
+if (match.length > 0) {
-sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) {
+// is the limit reached?
-if (s.length == 0 || d.length == 0) {
+if (n < limit) {
-*n = -1;
+// copy the current string to the array
-return NULL;
+cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
-}
+output[n - 1] = item;
+size_t processed = item.length + delim.length;
-/* special cases: delimiter is at least as large as the string */
+curpos.ptr += processed;
-if (d.length >= s.length) {
+curpos.length -= processed;
-/* exact match */
-if (sstrcmp(s, d) == 0) {
-*n = 0;
-return NULL;
-} else /* no match possible */ {
-*n = 1;
-sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
-if(result) {
-*result = sstrdup_a(allocator, s);
 } else {
-*n = -2;
+// limit reached, copy the _full_ remaining string
-}
+output[n - 1] = curpos;
-return result;
-}
-}
-ssize_t nmax = *n;
-size_t arrlen = 16;
-sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
-if (result) {
-scstr_t curpos = s;
-ssize_t j = 1;
-while (1) {
-scstr_t match;
-/* optimize for one byte delimiters */
-if (d.length == 1) {
-match = curpos;
-for (size_t i = 0 ; i < curpos.length ; i++) {
-if (curpos.ptr[i] == *(d.ptr)) {
-match.ptr = curpos.ptr + i;
-break;
-}
-match.length--;
-}
-} else {
-match = scstrscstr(curpos, d);
-}
-if (match.length > 0) {
-/* is this our last try? */
-if (nmax == 0 || j < nmax) {
-/* copy the current string to the array */
-scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr);
-result[j-1] = sstrdup_a(allocator, item);
-size_t processed = item.length + d.length;
-curpos.ptr += processed;
-curpos.length -= processed;
-/* allocate memory for the next string */
-j++;
-if (j > arrlen) {
-arrlen *= 2;
-size_t reallocsz;
-sstr_t* reallocated = NULL;
-if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
-reallocated = (sstr_t*) alrealloc(
-allocator, result, reallocsz);
-}
-if (reallocated) {
-result = reallocated;
-} else {
-for (ssize_t i = 0 ; i < j-1 ; i++) {
-alfree(allocator, result[i].ptr);
-}
-alfree(allocator, result);
-*n = -2;
-return NULL;
-}
-}
-} else {
-/* nmax reached, copy the _full_ remaining string */
-result[j-1] = sstrdup_a(allocator, curpos);
-break;
-}
-} else {
-/* no more matches, copy last string */
-result[j-1] = sstrdup_a(allocator, curpos);
 break;
 }
-}
+} else {
-*n = j;
+// no more matches, copy last string
-} else {
+output[n - 1] = curpos;
-*n = -2;
+break;
 }
+}
-return result;
-}
+return n;
+}
-int scstrcmp(scstr_t s1, scstr_t s2) {
+size_t cx_strsplit_a(
+const CxAllocator *allocator,
+cxstring string,
+cxstring delim,
+size_t limit,
+cxstring **output
+) {
+// find out how many splits we're going to make and allocate memory
+size_t n = 0;
+cxstring curpos = string;
+while (1) {
+++n;
+cxstring match = cx_strstr(curpos, delim);
+if (match.length > 0) {
+// is the limit reached?
+if (n < limit) {
+size_t processed = match.ptr - curpos.ptr + delim.length;
+curpos.ptr += processed;
+curpos.length -= processed;
+} else {
+// limit reached
+break;
+}
+} else {
+// no more matches
+break;
+}
+}
+*output = cxCalloc(allocator, n, sizeof(cxstring));
+return cx_strsplit(string, delim, n, *output);
+}
+size_t cx_strsplit_m(
+cxmutstr string,
+cxstring delim,
+size_t limit,
+cxmutstr *output
+) {
+return cx_strsplit(cx_strcast(string),
+delim, limit, (cxstring *) output);
+}
+size_t cx_strsplit_ma(
+const CxAllocator *allocator,
+cxmutstr string,
+cxstring delim,
+size_t limit,
+cxmutstr **output
+) {
+return cx_strsplit_a(allocator, cx_strcast(string),
+delim, limit, (cxstring **) output);
+}
+int cx_strcmp(
+cxstring s1,
+cxstring s2
+) {
 if (s1.length == s2.length) {
 return memcmp(s1.ptr, s2.ptr, s1.length);
 } else if (s1.length > s2.length) {
 return 1;
 } else {
 return -1;
 }
 }
-int scstrcasecmp(scstr_t s1, scstr_t s2) {
+int cx_strcasecmp(
+cxstring s1,
+cxstring s2
+) {
 if (s1.length == s2.length) {
 #ifdef _WIN32
 return _strnicmp(s1.ptr, s2.ptr, s1.length);
 #else
 return strncasecmp(s1.ptr, s2.ptr, s1.length);
 } else {
 return -1;
 }
 }
-sstr_t scstrdup(scstr_t s) {
+int cx_strcmp_p(
-return sstrdup_a(ucx_default_allocator(), s);
+const void *s1,
-}
+const void *s2
+) {
-sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) {
+const cxstring *left = s1;
-sstr_t newstring;
+const cxstring *right = s2;
-newstring.ptr = (char*)almalloc(allocator, s.length + 1);
+return cx_strcmp(*left, *right);
-if (newstring.ptr) {
+}
-newstring.length = s.length;
-newstring.ptr[newstring.length] = 0;
+int cx_strcasecmp_p(
+const void *s1,
-memcpy(newstring.ptr, s.ptr, s.length);
+const void *s2
-} else {
+) {
-newstring.length = 0;
+const cxstring *left = s1;
-}
+const cxstring *right = s2;
+return cx_strcasecmp(*left, *right);
-return newstring;
+}
-}
+cxmutstr cx_strdup_a(
+const CxAllocator *allocator,
-static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) {
+cxstring string
-const char *newptr = s;
+) {
-size_t length = len;
+cxmutstr result = {
+cxMalloc(allocator, string.length + 1),
-while(length > 0 && isspace(*newptr)) {
+string.length
-newptr++;
+};
-length--;
+if (result.ptr == NULL) {
-}
+result.length = 0;
-while(length > 0 && isspace(newptr[length-1])) {
+return result;
-length--;
+}
-}
+memcpy(result.ptr, string.ptr, string.length);
+result.ptr[string.length] = '\0';
-*newlen = length;
+return result;
-return newptr - s;
+}
-}
+cxstring cx_strtrim(cxstring string) {
-sstr_t sstrtrim(sstr_t string) {
+cxstring result = string;
-sstr_t newstr;
+// TODO: optimize by comparing multiple bytes at once
-newstr.ptr = string.ptr
+while (result.length > 0 && isspace(*result.ptr)) {
-+ ucx_strtrim(string.ptr, string.length, &newstr.length);
+result.ptr++;
-return newstr;
+result.length--;
 }
+while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
-scstr_t scstrtrim(scstr_t string) {
+result.length--;
-scstr_t newstr;
+}
-newstr.ptr = string.ptr
+return result;
-+ ucx_strtrim(string.ptr, string.length, &newstr.length);
+}
-return newstr;
-}
+cxmutstr cx_strtrim_m(cxmutstr string) {
+cxstring result = cx_strtrim(cx_strcast(string));
-int scstrprefix(scstr_t string, scstr_t prefix) {
+return (cxmutstr) {(char *) result.ptr, result.length};
-if (string.length == 0) {
+}
-return prefix.length == 0;
-}
+bool cx_strprefix(
-if (prefix.length == 0) {
+cxstring string,
-return 1;
+cxstring prefix
-}
+) {
+if (string.length < prefix.length) return false;
-if (prefix.length > string.length) {
+return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
-return 0;
+}
-} else {
-return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
+bool cx_strsuffix(
-}
+cxstring string,
-}
+cxstring suffix
+) {
-int scstrsuffix(scstr_t string, scstr_t suffix) {
+if (string.length < suffix.length) return false;
-if (string.length == 0) {
+return memcmp(string.ptr + string.length - suffix.length,
-return suffix.length == 0;
+suffix.ptr, suffix.length) == 0;
 }
-if (suffix.length == 0) {
-return 1;
+bool cx_strcaseprefix(
-}
+cxstring string,
+cxstring prefix
-if (suffix.length > string.length) {
+) {
-return 0;
+if (string.length < prefix.length) return false;
-} else {
+#ifdef _WIN32
-return memcmp(string.ptr+string.length-suffix.length,
+return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
-suffix.ptr, suffix.length) == 0;
+#else
-}
+return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
-}
+#endif
+}
-int scstrcaseprefix(scstr_t string, scstr_t prefix) {
-if (string.length == 0) {
+bool cx_strcasesuffix(
-return prefix.length == 0;
+cxstring string,
-}
+cxstring suffix
-if (prefix.length == 0) {
+) {
-return 1;
+if (string.length < suffix.length) return false;
-}
+#ifdef _WIN32
+return _strnicmp(string.ptr+string.length-suffix.length,
-if (prefix.length > string.length) {
+suffix.ptr, suffix.length) == 0;
-return 0;
+#else
-} else {
+return strncasecmp(string.ptr + string.length - suffix.length,
-scstr_t subs = scstrsubsl(string, 0, prefix.length);
+suffix.ptr, suffix.length) == 0;
-return scstrcasecmp(subs, prefix) == 0;
+#endif
 }
-}
+void cx_strlower(cxmutstr string) {
-int scstrcasesuffix(scstr_t string, scstr_t suffix) {
+cx_for_n(i, string.length) {
-if (string.length == 0) {
+string.ptr[i] = (char) tolower(string.ptr[i]);
-return suffix.length == 0;
+}
 }
-if (suffix.length == 0) {
-return 1;
+void cx_strupper(cxmutstr string) {
-}
+cx_for_n(i, string.length) {
+string.ptr[i] = (char) toupper(string.ptr[i]);
-if (suffix.length > string.length) {
+}
-return 0;
+}
-} else {
-scstr_t subs = scstrsubs(string, string.length-suffix.length);
+#ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
-return scstrcasecmp(subs, suffix) == 0;
+#define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
-}
+#endif
-}
+struct cx_strreplace_ibuf {
-sstr_t scstrlower(scstr_t string) {
+size_t *buf;
-sstr_t ret = sstrdup(string);
+struct cx_strreplace_ibuf *next;
-for (size_t i = 0; i < ret.length ; i++) {
+unsigned int len;
-ret.ptr[i] = tolower(ret.ptr[i]);
-}
-return ret;
-}
-sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) {
-sstr_t ret = sstrdup_a(allocator, string);
-for (size_t i = 0; i < ret.length ; i++) {
-ret.ptr[i] = tolower(ret.ptr[i]);
-}
-return ret;
-}
-sstr_t scstrupper(scstr_t string) {
-sstr_t ret = sstrdup(string);
-for (size_t i = 0; i < ret.length ; i++) {
-ret.ptr[i] = toupper(ret.ptr[i]);
-}
-return ret;
-}
-sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) {
-sstr_t ret = sstrdup_a(allocator, string);
-for (size_t i = 0; i < ret.length ; i++) {
-ret.ptr[i] = toupper(ret.ptr[i]);
-}
-return ret;
-}
-#define REPLACE_INDEX_BUFFER_MAX 100
-struct scstrreplace_ibuf {
-size_t* buf;
-unsigned int len; /* small indices */
-struct scstrreplace_ibuf* next;
 };
-static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) {
+static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
 while (buf) {
-struct scstrreplace_ibuf *next = buf->next;
+struct cx_strreplace_ibuf *next = buf->next;
 free(buf->buf);
 free(buf);
 buf = next;
 }
 }
-sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str,
+cxmutstr cx_strreplacen_a(
-scstr_t pattern, scstr_t replacement, size_t replmax) {
+const CxAllocator *allocator,
+cxstring str,
+cxstring pattern,
+cxstring replacement,
+size_t replmax
+) {
 if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
-return sstrdup(str);
+return cx_strdup_a(allocator, str);
-/* Compute expected buffer length */
+// Compute expected buffer length
 size_t ibufmax = str.length / pattern.length;
 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
-if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
+if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) {
-ibuflen = REPLACE_INDEX_BUFFER_MAX;
+ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE;
 }
-/* Allocate first index buffer */
+// Allocate first index buffer
-struct scstrreplace_ibuf *firstbuf, *curbuf;
+struct cx_strreplace_ibuf *firstbuf, *curbuf;
-firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf));
+firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
-if (!firstbuf) return sstrn(NULL, 0);
+if (!firstbuf) return cx_mutstrn(NULL, 0);
 firstbuf->buf = calloc(ibuflen, sizeof(size_t));
 if (!firstbuf->buf) {
 free(firstbuf);
-return sstrn(NULL, 0);
+return cx_mutstrn(NULL, 0);
 }
-/* Search occurrences */
+// Search occurrences
-scstr_t searchstr = str;
+cxstring searchstr = str;
 size_t found = 0;
 do {
-scstr_t match = scstrscstr(searchstr, pattern);
+cxstring match = cx_strstr(searchstr, pattern);
 if (match.length > 0) {
-/* Allocate next buffer in chain, if required */
+// Allocate next buffer in chain, if required
 if (curbuf->len == ibuflen) {
-struct scstrreplace_ibuf *nextbuf =
+struct cx_strreplace_ibuf *nextbuf =
-calloc(1, sizeof(struct scstrreplace_ibuf));
+calloc(1, sizeof(struct cx_strreplace_ibuf));
 if (!nextbuf) {
-scstrrepl_free_ibuf(firstbuf);
+cx_strrepl_free_ibuf(firstbuf);
-return sstrn(NULL, 0);
+return cx_mutstrn(NULL, 0);
 }
 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
 if (!nextbuf->buf) {
 free(nextbuf);
-scstrrepl_free_ibuf(firstbuf);
+cx_strrepl_free_ibuf(firstbuf);
-return sstrn(NULL, 0);
+return cx_mutstrn(NULL, 0);
 }
 curbuf->next = nextbuf;
 curbuf = nextbuf;
 }
-/* Record match index */
+// Record match index
 found++;
 size_t idx = match.ptr - str.ptr;
 curbuf->buf[curbuf->len++] = idx;
 searchstr.ptr = match.ptr + pattern.length;
 searchstr.length = str.length - idx - pattern.length;
 } else {
 break;
 }
 } while (searchstr.length > 0 && found < replmax);
-/* Allocate result string */
+// Allocate result string
-sstr_t result;
+cxmutstr result;
 {
 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
 size_t rcount = 0;
 curbuf = firstbuf;
 do {
 rcount += curbuf->len;
 curbuf = curbuf->next;
 } while (curbuf);
 result.length = str.length + rcount * adjlen;
-result.ptr = almalloc(allocator, result.length);
+result.ptr = cxMalloc(allocator, result.length + 1);
 if (!result.ptr) {
-scstrrepl_free_ibuf(firstbuf);
+cx_strrepl_free_ibuf(firstbuf);
-return sstrn(NULL, 0);
+return cx_mutstrn(NULL, 0);
 }
 }
-/* Build result string */
+// Build result string
 curbuf = firstbuf;
 size_t srcidx = 0;
-char* destptr = result.ptr;
+char *destptr = result.ptr;
 do {
 for (size_t i = 0; i < curbuf->len; i++) {
-/* Copy source part up to next match*/
+// Copy source part up to next match
 size_t idx = curbuf->buf[i];
 size_t srclen = idx - srcidx;
 if (srclen > 0) {
-memcpy(destptr, str.ptr+srcidx, srclen);
+memcpy(destptr, str.ptr + srcidx, srclen);
 destptr += srclen;
 srcidx += srclen;
 }
-/* Copy the replacement and skip the source pattern */
+// Copy the replacement and skip the source pattern
 srcidx += pattern.length;
 memcpy(destptr, replacement.ptr, replacement.length);
 destptr += replacement.length;
 }
 curbuf = curbuf->next;
 } while (curbuf);
-memcpy(destptr, str.ptr+srcidx, str.length-srcidx);
+memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
-/* Free index buffer */
+// Result is guaranteed to be zero-terminated
-scstrrepl_free_ibuf(firstbuf);
+result.ptr[result.length] = '\0';
+// Free index buffer
+cx_strrepl_free_ibuf(firstbuf);
 return result;
 }
-sstr_t scstrreplacen(scstr_t str, scstr_t pattern,
+CxStrtokCtx cx_strtok(
-scstr_t replacement, size_t replmax) {
+cxstring str,
-return scstrreplacen_a(ucx_default_allocator(),
+cxstring delim,
-str, pattern, replacement, replmax);
+size_t limit
-}
+) {
+CxStrtokCtx ctx;
+ctx.str = str;
-// type adjustment functions
+ctx.delim = delim;
-scstr_t ucx_sc2sc(scstr_t str) {
+ctx.limit = limit;
-return str;
+ctx.pos = 0;
-}
+ctx.next_pos = 0;
-scstr_t ucx_ss2sc(sstr_t str) {
+ctx.delim_pos = 0;
-scstr_t cs;
+ctx.found = 0;
-cs.ptr = str.ptr;
+ctx.delim_more = NULL;
-cs.length = str.length;
+ctx.delim_more_count = 0;
-return cs;
+return ctx;
 }
-scstr_t ucx_ss2c_s(scstr_t c) {
-return c;
+CxStrtokCtx cx_strtok_m(
-}
+cxmutstr str,
+cxstring delim,
+size_t limit
+) {
+return cx_strtok(cx_strcast(str), delim, limit);
+}
+bool cx_strtok_next(
+CxStrtokCtx *ctx,
+cxstring *token
+) {
+// abortion criteria
+if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) {
+return false;
+}
+// determine the search start
+cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos);
+// search the next delimiter
+cxstring delim = cx_strstr(haystack, ctx->delim);
+// if found, make delim capture exactly the delimiter
+if (delim.length > 0) {
+delim.length = ctx->delim.length;
+}
+// if more delimiters are specified, check them now
+if (ctx->delim_more_count > 0) {
+cx_for_n(i, ctx->delim_more_count) {
+cxstring d = cx_strstr(haystack, ctx->delim_more[i]);
+if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) {
+delim.ptr = d.ptr;
+delim.length = ctx->delim_more[i].length;
+}
+}
+}
+// store the token information and adjust the context
+ctx->found++;
+ctx->pos = ctx->next_pos;
+token->ptr = &ctx->str.ptr[ctx->pos];
+ctx->delim_pos = delim.length == 0 ?
+ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr);
+token->length = ctx->delim_pos - ctx->pos;
+ctx->next_pos = ctx->delim_pos + delim.length;
+return true;
+}
+bool cx_strtok_next_m(
+CxStrtokCtx *ctx,
+cxmutstr *token
+) {
+return cx_strtok_next(ctx, (cxstring *) token);
+}
+void cx_strtok_delim(
+CxStrtokCtx *ctx,
+const cxstring *delim,
+size_t count
+) {
+ctx->delim_more = delim;
+ctx->delim_more_count = count;
+}

Mercurial > hg > toolkit / file comparison

comparison: ucx/string.c

ucx/string.c