ucx/string.c

changeset 431
bb7da585debc
parent 324
ce13a778654a
child 440
7c4b9cba09ca
--- a/ucx/string.c	Sun May 23 09:44:43 2021 +0200
+++ b/ucx/string.c	Sat Jan 04 16:38:48 2025 +0100
@@ -1,7 +1,7 @@
 /*
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
  *
- * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
+ * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -26,63 +26,71 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include "ucx/string.h"
+#include "cx/string.h"
+#include "cx/utils.h"
 
-#include "ucx/allocator.h"
-
-#include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
-#include <stdint.h>
 #include <ctype.h>
 
 #ifndef _WIN32
-#include <strings.h> /* for strncasecmp() */
-#endif /* _WIN32 */
+
+#include <strings.h> // for strncasecmp()
 
-sstr_t sstr(char *cstring) {
-    sstr_t string;
-    string.ptr = cstring;
-    string.length = strlen(cstring);
-    return string;
+#endif // _WIN32
+
+cxmutstr cx_mutstr(char *cstring) {
+    return (cxmutstr) {cstring, strlen(cstring)};
 }
 
-sstr_t sstrn(char *cstring, size_t length) {
-    sstr_t string;
-    string.ptr = cstring;
-    string.length = length;
-    return string;
+cxmutstr cx_mutstrn(
+        char *cstring,
+        size_t length
+) {
+    return (cxmutstr) {cstring, length};
+}
+
+cxstring cx_str(const char *cstring) {
+    return (cxstring) {cstring, strlen(cstring)};
+}
+
+cxstring cx_strn(
+        const char *cstring,
+        size_t length
+) {
+    return (cxstring) {cstring, length};
 }
 
-scstr_t scstr(const char *cstring) {
-    scstr_t string;
-    string.ptr = cstring;
-    string.length = strlen(cstring);
-    return string;
+cxstring cx_strcast(cxmutstr str) {
+    return (cxstring) {str.ptr, str.length};
 }
 
-scstr_t scstrn(const char *cstring, size_t length) {
-    scstr_t string;
-    string.ptr = cstring;
-    string.length = length;
-    return string;
+void cx_strfree(cxmutstr *str) {
+    free(str->ptr);
+    str->ptr = NULL;
+    str->length = 0;
 }
 
+void cx_strfree_a(
+        const CxAllocator *alloc,
+        cxmutstr *str
+) {
+    cxFree(alloc, str->ptr);
+    str->ptr = NULL;
+    str->length = 0;
+}
 
-size_t scstrnlen(size_t n, ...) {
-    if (n == 0) return 0;
-    
+size_t cx_strlen(
+        size_t count,
+        ...
+) {
+    if (count == 0) return 0;
+
     va_list ap;
-    va_start(ap, n);
-    
+    va_start(ap, count);
     size_t size = 0;
-
-    for (size_t i = 0 ; i < n ; i++) {
-        scstr_t str = va_arg(ap, scstr_t);
-        if(SIZE_MAX - str.length < size) {
-            size = SIZE_MAX;
-            break;
-        }
+    cx_for_n(i, count) {
+        cxstring str = va_arg(ap, cxstring);
         size += str.length;
     }
     va_end(ap);
@@ -90,410 +98,341 @@
     return size;
 }
 
-static sstr_t sstrvcat_a(
-        UcxAllocator *a,
+cxmutstr cx_strcat_ma(
+        const CxAllocator *alloc,
+        cxmutstr str,
         size_t count,
-        scstr_t s1,
-        va_list ap) {
-    sstr_t str;
-    str.ptr = NULL;
-    str.length = 0;
-    if(count < 2) {
-        return str;
-    }
-    
-    scstr_t s2 = va_arg (ap, scstr_t);
-    
-    if(((size_t)-1) - s1.length < s2.length) {
-        return str;
-    }
-    
-    scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t));
-    if(!strings) {
-        return str;
-    }
-    
+        ...
+) {
+    if (count == 0) return str;
+
+    cxstring *strings = calloc(count, sizeof(cxstring));
+    if (!strings) abort();
+
+    va_list ap;
+    va_start(ap, count);
+
     // get all args and overall length
-    strings[0] = s1;
-    strings[1] = s2;
-    size_t slen = s1.length + s2.length;
-    int error = 0;
-    for (size_t i=2;i<count;i++) {
-        scstr_t s = va_arg (ap, scstr_t);
+    size_t slen = str.length;
+    cx_for_n(i, count) {
+        cxstring s = va_arg (ap, cxstring);
         strings[i] = s;
-        if(((size_t)-1) - s.length < slen) {
-            error = 1;
-            break;
-        }
         slen += s.length;
     }
-    if(error) {
-        free(strings);
-        return str;
+    va_end(ap);
+
+    // reallocate or create new string
+    if (str.ptr == NULL) {
+        str.ptr = cxMalloc(alloc, slen + 1);
+    } else {
+        str.ptr = cxRealloc(alloc, str.ptr, slen + 1);
     }
-    
-    // create new string
-    str.ptr = (char*) almalloc(a, slen + 1);
+    if (str.ptr == NULL) abort();
+
+    // concatenate strings
+    size_t pos = str.length;
     str.length = slen;
-    if(!str.ptr) {
-        free(strings);
-        str.length = 0;
-        return str;
-    }
-    
-    // concatenate strings
-    size_t pos = 0;
-    for (size_t i=0;i<count;i++) {
-        scstr_t s = strings[i];
+    cx_for_n(i, count) {
+        cxstring s = strings[i];
         memcpy(str.ptr + pos, s.ptr, s.length);
         pos += s.length;
     }
-    
+
+    // terminate string
     str.ptr[str.length] = '\0';
-    
+
+    // free temporary array
     free(strings);
-    
+
     return str;
 }
 
-sstr_t scstrcat(size_t count, scstr_t s1, ...) {
-    va_list ap;
-    va_start(ap, s1);
-    sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap);
-    va_end(ap);
-    return s;
-}
-
-sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) {
-    va_list ap;
-    va_start(ap, s1);
-    sstr_t s = sstrvcat_a(a, count, s1, ap);
-    va_end(ap);
-    return s;
+cxstring cx_strsubs(
+        cxstring string,
+        size_t start
+) {
+    return cx_strsubsl(string, start, string.length - start);
 }
 
-static int ucx_substring(
-        size_t str_length,
-        size_t start,
-        size_t length,
-        size_t *newlen,
-        size_t *newpos)
-{
-    *newlen = 0;
-    *newpos = 0;
-    
-    if(start > str_length) {
-        return 0;
-    }
-    
-    if(length > str_length - start) {
-        length = str_length - start;
-    }
-    *newlen = length;
-    *newpos = start;
-    return 1;
-}
-
-sstr_t sstrsubs(sstr_t s, size_t start) {
-    return sstrsubsl (s, start, s.length-start);
+cxmutstr cx_strsubs_m(
+        cxmutstr string,
+        size_t start
+) {
+    return cx_strsubsl_m(string, start, string.length - start);
 }
 
-sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
-    size_t pos;
-    sstr_t ret = { NULL, 0 };
-    if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
-        ret.ptr = s.ptr + pos;
+cxstring cx_strsubsl(
+        cxstring string,
+        size_t start,
+        size_t length
+) {
+    if (start > string.length) {
+        return (cxstring) {NULL, 0};
     }
-    return ret;
-}
 
-scstr_t scstrsubs(scstr_t string, size_t start) {
-    return scstrsubsl(string, start, string.length-start);
-}
+    size_t rem_len = string.length - start;
+    if (length > rem_len) {
+        length = rem_len;
+    }
 
-scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) {
-    size_t pos;
-    scstr_t ret = { NULL, 0 };
-    if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
-        ret.ptr = s.ptr + pos;
-    }
-    return ret;
+    return (cxstring) {string.ptr + start, length};
 }
 
-
-static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) {
-    for(size_t i=0;i<length;i++) {
-        if(str[i] == chr) {
-            *pos = i;
-            return 1;
-        }
-    }
-    return 0;
+cxmutstr cx_strsubsl_m(
+        cxmutstr string,
+        size_t start,
+        size_t length
+) {
+    cxstring result = cx_strsubsl(cx_strcast(string), start, length);
+    return (cxmutstr) {(char *) result.ptr, result.length};
 }
 
-static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) {
-    if(length > 0) {
-        for(size_t i=length ; i>0 ; i--) {
-            if(str[i-1] == chr) {
-                *pos = i-1;
-                return 1;
-            }
+cxstring cx_strchr(
+        cxstring string,
+        int chr
+) {
+    chr = 0xFF & chr;
+    // TODO: improve by comparing multiple bytes at once
+    cx_for_n(i, string.length) {
+        if (string.ptr[i] == chr) {
+            return cx_strsubs(string, i);
         }
     }
-    return 0;
+    return (cxstring) {NULL, 0};
+}
+
+cxmutstr cx_strchr_m(
+        cxmutstr string,
+        int chr
+) {
+    cxstring result = cx_strchr(cx_strcast(string), chr);
+    return (cxmutstr) {(char *) result.ptr, result.length};
 }
 
-sstr_t sstrchr(sstr_t s, int c) {
-    size_t pos = 0;
-    if(ucx_strchr(s.ptr, s.length, c, &pos)) {
-        return sstrsubs(s, pos);
+cxstring cx_strrchr(
+        cxstring string,
+        int chr
+) {
+    chr = 0xFF & chr;
+    size_t i = string.length;
+    while (i > 0) {
+        i--;
+        // TODO: improve by comparing multiple bytes at once
+        if (string.ptr[i] == chr) {
+            return cx_strsubs(string, i);
+        }
     }
-    return sstrn(NULL, 0);
+    return (cxstring) {NULL, 0};
 }
 
-sstr_t sstrrchr(sstr_t s, int c) {
-    size_t pos = 0;
-    if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
-        return sstrsubs(s, pos);
-    }
-    return sstrn(NULL, 0);
-}
-
-scstr_t scstrchr(scstr_t s, int c) {
-    size_t pos = 0;
-    if(ucx_strchr(s.ptr, s.length, c, &pos)) {
-        return scstrsubs(s, pos);
-    }
-    return scstrn(NULL, 0);
-}
-
-scstr_t scstrrchr(scstr_t s, int c) {
-    size_t pos = 0;
-    if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
-        return scstrsubs(s, pos);
-    }
-    return scstrn(NULL, 0);
+cxmutstr cx_strrchr_m(
+        cxmutstr string,
+        int chr
+) {
+    cxstring result = cx_strrchr(cx_strcast(string), chr);
+    return (cxmutstr) {(char *) result.ptr, result.length};
 }
 
-#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
-    ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
-
-#define ptable_w(useheap, ptable, index, src) do {\
-    if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
-    else ((size_t*)ptable)[index] = src;\
-    } while (0);
-
+#ifndef CX_STRSTR_SBO_SIZE
+#define CX_STRSTR_SBO_SIZE 512
+#endif
+unsigned const cx_strstr_sbo_size = CX_STRSTR_SBO_SIZE;
 
-static const char* ucx_strstr(
-        const char *str,
-        size_t length,
-        const char *match,
-        size_t matchlen,
-        size_t *newlen)
-{
-    *newlen = length;
-    if (matchlen == 0) {
-        return str;
+cxstring cx_strstr(
+        cxstring haystack,
+        cxstring needle
+) {
+    if (needle.length == 0) {
+        return haystack;
     }
-    
-    const char *result = NULL;
-    size_t resultlen = 0;
-    
+
+    // optimize for single-char needles
+    if (needle.length == 1) {
+        return cx_strchr(haystack, *needle.ptr);
+    }
+
     /*
      * IMPORTANT:
-     * our prefix table contains the prefix length PLUS ONE
-     * this is our decision, because we want to use the full range of size_t
-     * the original algorithm needs a (-1) at one single place
-     * and we want to avoid that
+     * Our prefix table contains the prefix length PLUS ONE
+     * this is our decision, because we want to use the full range of size_t.
+     * The original algorithm needs a (-1) at one single place,
+     * and we want to avoid that.
      */
-    
-    /* static prefix table */
-    static uint8_t s_prefix_table[256];
-    
-    /* check pattern length and use appropriate prefix table */
-    /* if the pattern exceeds static prefix table, allocate on the heap */
-    register int useheap = matchlen > 255;
-    register void* ptable = useheap ?
-        calloc(matchlen+1, sizeof(size_t)): s_prefix_table;
-    
-    /* keep counter in registers */
+
+    // local prefix table
+    size_t s_prefix_table[CX_STRSTR_SBO_SIZE];
+
+    // check needle length and use appropriate prefix table
+    // if the pattern exceeds static prefix table, allocate on the heap
+    bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
+    register size_t *ptable = useheap ? calloc(needle.length + 1,
+                                               sizeof(size_t)) : s_prefix_table;
+
+    // keep counter in registers
     register size_t i, j;
-    
-    /* fill prefix table */
-    i = 0; j = 0;
-    ptable_w(useheap, ptable, i, j);
-    while (i < matchlen) {
-        while (j >= 1 && match[j-1] != match[i]) {
-            ptable_r(j, useheap, ptable, j-1);
+
+    // fill prefix table
+    i = 0;
+    j = 0;
+    ptable[i] = j;
+    while (i < needle.length) {
+        while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
+            j = ptable[j - 1];
         }
-        i++; j++;
-        ptable_w(useheap, ptable, i, j);
+        i++;
+        j++;
+        ptable[i] = j;
     }
 
-    /* search */
-    i = 0; j = 1;
-    while (i < length) {
-        while (j >= 1 && str[i] != match[j-1]) {
-            ptable_r(j, useheap, ptable, j-1);
+    // search
+    cxstring result = {NULL, 0};
+    i = 0;
+    j = 1;
+    while (i < haystack.length) {
+        while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
+            j = ptable[j - 1];
         }
-        i++; j++;
-        if (j-1 == matchlen) {
-            size_t start = i - matchlen;
-            result = str + start;
-            resultlen = length - start;
+        i++;
+        j++;
+        if (j - 1 == needle.length) {
+            size_t start = i - needle.length;
+            result.ptr = haystack.ptr + start;
+            result.length = haystack.length - start;
             break;
         }
     }
 
-    /* if prefix table was allocated on the heap, free it */
+    // if prefix table was allocated on the heap, free it
     if (ptable != s_prefix_table) {
         free(ptable);
     }
-    
-    *newlen = resultlen;
-    return result;
-}
-
-sstr_t scstrsstr(sstr_t string, scstr_t match) {
-    sstr_t result;
-    
-    size_t reslen;
-    const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
-    if(!resstr) {
-        result.ptr = NULL;
-        result.length = 0;
-        return result;
-    }
-    
-    size_t pos = resstr - string.ptr;
-    result.ptr = string.ptr + pos;
-    result.length = reslen;
-    
-    return result;
-}
-
-scstr_t scstrscstr(scstr_t string, scstr_t match) {
-    scstr_t result;
-    
-    size_t reslen;
-    const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
-    if(!resstr) {
-        result.ptr = NULL;
-        result.length = 0;
-        return result;
-    }
-    
-    size_t pos = resstr - string.ptr;
-    result.ptr = string.ptr + pos;
-    result.length = reslen;
-    
-    return result;
-}
-
-#undef ptable_r
-#undef ptable_w
-
-sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) {
-    return scstrsplit_a(ucx_default_allocator(), s, d, n);
-}
-
-sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) {
-    if (s.length == 0 || d.length == 0) {
-        *n = -1;
-        return NULL;
-    }
-    
-    /* special cases: delimiter is at least as large as the string */
-    if (d.length >= s.length) {
-        /* exact match */
-        if (sstrcmp(s, d) == 0) {
-            *n = 0;
-            return NULL;
-        } else /* no match possible */ {
-            *n = 1;
-            sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
-            if(result) {
-                *result = sstrdup_a(allocator, s);
-            } else {
-                *n = -2;
-            }
-            return result;
-        }
-    }
-    
-    ssize_t nmax = *n;
-    size_t arrlen = 16;
-    sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
-
-    if (result) {
-        scstr_t curpos = s;
-        ssize_t j = 1;
-        while (1) {
-            scstr_t match;
-            /* optimize for one byte delimiters */
-            if (d.length == 1) {
-                match = curpos;
-                for (size_t i = 0 ; i < curpos.length ; i++) {
-                    if (curpos.ptr[i] == *(d.ptr)) {
-                        match.ptr = curpos.ptr + i;
-                        break;
-                    }
-                    match.length--;
-                }
-            } else {
-                match = scstrscstr(curpos, d);
-            }
-            if (match.length > 0) {
-                /* is this our last try? */
-                if (nmax == 0 || j < nmax) {
-                    /* copy the current string to the array */
-                    scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr);
-                    result[j-1] = sstrdup_a(allocator, item);
-                    size_t processed = item.length + d.length;
-                    curpos.ptr += processed;
-                    curpos.length -= processed;
-
-                    /* allocate memory for the next string */
-                    j++;
-                    if (j > arrlen) {
-                        arrlen *= 2;
-                        size_t reallocsz;
-                        sstr_t* reallocated = NULL;
-                        if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
-                            reallocated = (sstr_t*) alrealloc(
-                                    allocator, result, reallocsz);
-                        }
-                        if (reallocated) {
-                            result = reallocated;
-                        } else {
-                            for (ssize_t i = 0 ; i < j-1 ; i++) {
-                                alfree(allocator, result[i].ptr);
-                            }
-                            alfree(allocator, result);
-                            *n = -2;
-                            return NULL;
-                        }
-                    }
-                } else {
-                    /* nmax reached, copy the _full_ remaining string */
-                    result[j-1] = sstrdup_a(allocator, curpos);
-                    break;
-                }
-            } else {
-                /* no more matches, copy last string */
-                result[j-1] = sstrdup_a(allocator, curpos);
-                break;
-            }
-        }
-        *n = j;
-    } else {
-        *n = -2;
-    }
 
     return result;
 }
 
-int scstrcmp(scstr_t s1, scstr_t s2) {
+cxmutstr cx_strstr_m(
+        cxmutstr haystack,
+        cxstring needle
+) {
+    cxstring result = cx_strstr(cx_strcast(haystack), needle);
+    return (cxmutstr) {(char *) result.ptr, result.length};
+}
+
+size_t cx_strsplit(
+        cxstring string,
+        cxstring delim,
+        size_t limit,
+        cxstring *output
+) {
+    // special case: output limit is zero
+    if (limit == 0) return 0;
+
+    // special case: delimiter is empty
+    if (delim.length == 0) {
+        output[0] = string;
+        return 1;
+    }
+
+    // special cases: delimiter is at least as large as the string
+    if (delim.length >= string.length) {
+        // exact match
+        if (cx_strcmp(string, delim) == 0) {
+            output[0] = cx_strn(string.ptr, 0);
+            output[1] = cx_strn(string.ptr + string.length, 0);
+            return 2;
+        } else {
+            // no match possible
+            output[0] = string;
+            return 1;
+        }
+    }
+
+    size_t n = 0;
+    cxstring curpos = string;
+    while (1) {
+        ++n;
+        cxstring match = cx_strstr(curpos, delim);
+        if (match.length > 0) {
+            // is the limit reached?
+            if (n < limit) {
+                // copy the current string to the array
+                cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
+                output[n - 1] = item;
+                size_t processed = item.length + delim.length;
+                curpos.ptr += processed;
+                curpos.length -= processed;
+            } else {
+                // limit reached, copy the _full_ remaining string
+                output[n - 1] = curpos;
+                break;
+            }
+        } else {
+            // no more matches, copy last string
+            output[n - 1] = curpos;
+            break;
+        }
+    }
+
+    return n;
+}
+
+size_t cx_strsplit_a(
+        const CxAllocator *allocator,
+        cxstring string,
+        cxstring delim,
+        size_t limit,
+        cxstring **output
+) {
+    // find out how many splits we're going to make and allocate memory
+    size_t n = 0;
+    cxstring curpos = string;
+    while (1) {
+        ++n;
+        cxstring match = cx_strstr(curpos, delim);
+        if (match.length > 0) {
+            // is the limit reached?
+            if (n < limit) {
+                size_t processed = match.ptr - curpos.ptr + delim.length;
+                curpos.ptr += processed;
+                curpos.length -= processed;
+            } else {
+                // limit reached
+                break;
+            }
+        } else {
+            // no more matches
+            break;
+        }
+    }
+    *output = cxCalloc(allocator, n, sizeof(cxstring));
+    return cx_strsplit(string, delim, n, *output);
+}
+
+size_t cx_strsplit_m(
+        cxmutstr string,
+        cxstring delim,
+        size_t limit,
+        cxmutstr *output
+) {
+    return cx_strsplit(cx_strcast(string),
+                       delim, limit, (cxstring *) output);
+}
+
+size_t cx_strsplit_ma(
+        const CxAllocator *allocator,
+        cxmutstr string,
+        cxstring delim,
+        size_t limit,
+        cxmutstr **output
+) {
+    return cx_strsplit_a(allocator, cx_strcast(string),
+                         delim, limit, (cxstring **) output);
+}
+
+int cx_strcmp(
+        cxstring s1,
+        cxstring s2
+) {
     if (s1.length == s2.length) {
         return memcmp(s1.ptr, s2.ptr, s1.length);
     } else if (s1.length > s2.length) {
@@ -503,7 +442,10 @@
     }
 }
 
-int scstrcasecmp(scstr_t s1, scstr_t s2) {
+int cx_strcasecmp(
+        cxstring s1,
+        cxstring s2
+) {
     if (s1.length == s2.length) {
 #ifdef _WIN32
         return _strnicmp(s1.ptr, s2.ptr, s1.length);
@@ -517,216 +459,186 @@
     }
 }
 
-sstr_t scstrdup(scstr_t s) {
-    return sstrdup_a(ucx_default_allocator(), s);
+int cx_strcmp_p(
+        const void *s1,
+        const void *s2
+) {
+    const cxstring *left = s1;
+    const cxstring *right = s2;
+    return cx_strcmp(*left, *right);
+}
+
+int cx_strcasecmp_p(
+        const void *s1,
+        const void *s2
+) {
+    const cxstring *left = s1;
+    const cxstring *right = s2;
+    return cx_strcasecmp(*left, *right);
 }
 
-sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) {
-    sstr_t newstring;
-    newstring.ptr = (char*)almalloc(allocator, s.length + 1);
-    if (newstring.ptr) {
-        newstring.length = s.length;
-        newstring.ptr[newstring.length] = 0;
-        
-        memcpy(newstring.ptr, s.ptr, s.length);
-    } else {
-        newstring.length = 0;
+cxmutstr cx_strdup_a(
+        const CxAllocator *allocator,
+        cxstring string
+) {
+    cxmutstr result = {
+            cxMalloc(allocator, string.length + 1),
+            string.length
+    };
+    if (result.ptr == NULL) {
+        result.length = 0;
+        return result;
     }
-    
-    return newstring;
+    memcpy(result.ptr, string.ptr, string.length);
+    result.ptr[string.length] = '\0';
+    return result;
+}
+
+cxstring cx_strtrim(cxstring string) {
+    cxstring result = string;
+    // TODO: optimize by comparing multiple bytes at once
+    while (result.length > 0 && isspace(*result.ptr)) {
+        result.ptr++;
+        result.length--;
+    }
+    while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
+        result.length--;
+    }
+    return result;
 }
 
+cxmutstr cx_strtrim_m(cxmutstr string) {
+    cxstring result = cx_strtrim(cx_strcast(string));
+    return (cxmutstr) {(char *) result.ptr, result.length};
+}
 
-static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) {
-    const char *newptr = s;
-    size_t length = len;
-    
-    while(length > 0 && isspace(*newptr)) {
-        newptr++;
-        length--;
-    }
-    while(length > 0 && isspace(newptr[length-1])) {
-        length--;
-    }
-    
-    *newlen = length;
-    return newptr - s;
+bool cx_strprefix(
+        cxstring string,
+        cxstring prefix
+) {
+    if (string.length < prefix.length) return false;
+    return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
+}
+
+bool cx_strsuffix(
+        cxstring string,
+        cxstring suffix
+) {
+    if (string.length < suffix.length) return false;
+    return memcmp(string.ptr + string.length - suffix.length,
+                  suffix.ptr, suffix.length) == 0;
 }
 
-sstr_t sstrtrim(sstr_t string) {
-    sstr_t newstr;
-    newstr.ptr = string.ptr
-                 + ucx_strtrim(string.ptr, string.length, &newstr.length);
-    return newstr;
+bool cx_strcaseprefix(
+        cxstring string,
+        cxstring prefix
+) {
+    if (string.length < prefix.length) return false;
+#ifdef _WIN32
+    return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
+#else
+    return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
+#endif
 }
 
-scstr_t scstrtrim(scstr_t string) {
-    scstr_t newstr;
-    newstr.ptr = string.ptr
-                 + ucx_strtrim(string.ptr, string.length, &newstr.length);
-    return newstr;
+bool cx_strcasesuffix(
+        cxstring string,
+        cxstring suffix
+) {
+    if (string.length < suffix.length) return false;
+#ifdef _WIN32
+    return _strnicmp(string.ptr+string.length-suffix.length,
+                  suffix.ptr, suffix.length) == 0;
+#else
+    return strncasecmp(string.ptr + string.length - suffix.length,
+                       suffix.ptr, suffix.length) == 0;
+#endif
 }
 
-int scstrprefix(scstr_t string, scstr_t prefix) {
-    if (string.length == 0) {
-        return prefix.length == 0;
-    }
-    if (prefix.length == 0) {
-        return 1;
-    }
-    
-    if (prefix.length > string.length) {
-        return 0;
-    } else {
-        return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
+void cx_strlower(cxmutstr string) {
+    cx_for_n(i, string.length) {
+        string.ptr[i] = (char) tolower(string.ptr[i]);
     }
 }
 
-int scstrsuffix(scstr_t string, scstr_t suffix) {
-    if (string.length == 0) {
-        return suffix.length == 0;
-    }
-    if (suffix.length == 0) {
-        return 1;
-    }
-    
-    if (suffix.length > string.length) {
-        return 0;
-    } else {
-        return memcmp(string.ptr+string.length-suffix.length,
-            suffix.ptr, suffix.length) == 0;
-    }
-}
-
-int scstrcaseprefix(scstr_t string, scstr_t prefix) {
-    if (string.length == 0) {
-        return prefix.length == 0;
-    }
-    if (prefix.length == 0) {
-        return 1;
-    }
-    
-    if (prefix.length > string.length) {
-        return 0;
-    } else {
-        scstr_t subs = scstrsubsl(string, 0, prefix.length);
-        return scstrcasecmp(subs, prefix) == 0;
+void cx_strupper(cxmutstr string) {
+    cx_for_n(i, string.length) {
+        string.ptr[i] = (char) toupper(string.ptr[i]);
     }
 }
 
-int scstrcasesuffix(scstr_t string, scstr_t suffix) {
-    if (string.length == 0) {
-        return suffix.length == 0;
-    }
-    if (suffix.length == 0) {
-        return 1;
-    }
-    
-    if (suffix.length > string.length) {
-        return 0;
-    } else {
-        scstr_t subs = scstrsubs(string, string.length-suffix.length);
-        return scstrcasecmp(subs, suffix) == 0;
-    }
-}
-
-sstr_t scstrlower(scstr_t string) {
-    sstr_t ret = sstrdup(string);
-    for (size_t i = 0; i < ret.length ; i++) {
-        ret.ptr[i] = tolower(ret.ptr[i]);
-    }
-    return ret;
-}
+#ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
+#define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
+#endif
 
-sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) {
-    sstr_t ret = sstrdup_a(allocator, string);
-    for (size_t i = 0; i < ret.length ; i++) {
-        ret.ptr[i] = tolower(ret.ptr[i]);
-    }
-    return ret;
-}
-
-sstr_t scstrupper(scstr_t string) {
-    sstr_t ret = sstrdup(string);
-    for (size_t i = 0; i < ret.length ; i++) {
-        ret.ptr[i] = toupper(ret.ptr[i]);
-    }
-    return ret;
-}
-
-sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) {
-    sstr_t ret = sstrdup_a(allocator, string);
-    for (size_t i = 0; i < ret.length ; i++) {
-        ret.ptr[i] = toupper(ret.ptr[i]);
-    }
-    return ret;
-}
-
-#define REPLACE_INDEX_BUFFER_MAX 100
-
-struct scstrreplace_ibuf {
-    size_t* buf;
-    unsigned int len; /* small indices */
-    struct scstrreplace_ibuf* next;
+struct cx_strreplace_ibuf {
+    size_t *buf;
+    struct cx_strreplace_ibuf *next;
+    unsigned int len;
 };
 
-static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) {
+static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
     while (buf) {
-        struct scstrreplace_ibuf *next = buf->next;
+        struct cx_strreplace_ibuf *next = buf->next;
         free(buf->buf);
         free(buf);
         buf = next;
     }
 }
 
-sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str,
-                     scstr_t pattern, scstr_t replacement, size_t replmax) {
+cxmutstr cx_strreplacen_a(
+        const CxAllocator *allocator,
+        cxstring str,
+        cxstring pattern,
+        cxstring replacement,
+        size_t replmax
+) {
 
     if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
-        return sstrdup(str);
+        return cx_strdup_a(allocator, str);
 
-    /* Compute expected buffer length */
+    // Compute expected buffer length
     size_t ibufmax = str.length / pattern.length;
     size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
-    if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
-        ibuflen = REPLACE_INDEX_BUFFER_MAX;
+    if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) {
+        ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE;
     }
 
-    /* Allocate first index buffer */
-    struct scstrreplace_ibuf *firstbuf, *curbuf;
-    firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf));
-    if (!firstbuf) return sstrn(NULL, 0);
+    // Allocate first index buffer
+    struct cx_strreplace_ibuf *firstbuf, *curbuf;
+    firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
+    if (!firstbuf) return cx_mutstrn(NULL, 0);
     firstbuf->buf = calloc(ibuflen, sizeof(size_t));
     if (!firstbuf->buf) {
         free(firstbuf);
-        return sstrn(NULL, 0);
+        return cx_mutstrn(NULL, 0);
     }
 
-    /* Search occurrences */
-    scstr_t searchstr = str;
+    // Search occurrences
+    cxstring searchstr = str;
     size_t found = 0;
     do {
-        scstr_t match = scstrscstr(searchstr, pattern);
+        cxstring match = cx_strstr(searchstr, pattern);
         if (match.length > 0) {
-            /* Allocate next buffer in chain, if required */
+            // Allocate next buffer in chain, if required
             if (curbuf->len == ibuflen) {
-                struct scstrreplace_ibuf *nextbuf =
-                        calloc(1, sizeof(struct scstrreplace_ibuf));
+                struct cx_strreplace_ibuf *nextbuf =
+                        calloc(1, sizeof(struct cx_strreplace_ibuf));
                 if (!nextbuf) {
-                    scstrrepl_free_ibuf(firstbuf);
-                    return sstrn(NULL, 0);
+                    cx_strrepl_free_ibuf(firstbuf);
+                    return cx_mutstrn(NULL, 0);
                 }
                 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
                 if (!nextbuf->buf) {
                     free(nextbuf);
-                    scstrrepl_free_ibuf(firstbuf);
-                    return sstrn(NULL, 0);
+                    cx_strrepl_free_ibuf(firstbuf);
+                    return cx_mutstrn(NULL, 0);
                 }
                 curbuf->next = nextbuf;
                 curbuf = nextbuf;
             }
 
-            /* Record match index */
+            // Record match index
             found++;
             size_t idx = match.ptr - str.ptr;
             curbuf->buf[curbuf->len++] = idx;
@@ -737,8 +649,8 @@
         }
     } while (searchstr.length > 0 && found < replmax);
 
-    /* Allocate result string */
-    sstr_t result;
+    // Allocate result string
+    cxmutstr result;
     {
         ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
         size_t rcount = 0;
@@ -748,60 +660,127 @@
             curbuf = curbuf->next;
         } while (curbuf);
         result.length = str.length + rcount * adjlen;
-        result.ptr = almalloc(allocator, result.length);
+        result.ptr = cxMalloc(allocator, result.length + 1);
         if (!result.ptr) {
-            scstrrepl_free_ibuf(firstbuf);
-            return sstrn(NULL, 0);
+            cx_strrepl_free_ibuf(firstbuf);
+            return cx_mutstrn(NULL, 0);
         }
     }
 
-    /* Build result string */
+    // Build result string
     curbuf = firstbuf;
     size_t srcidx = 0;
-    char* destptr = result.ptr;
+    char *destptr = result.ptr;
     do {
         for (size_t i = 0; i < curbuf->len; i++) {
-            /* Copy source part up to next match*/
+            // Copy source part up to next match
             size_t idx = curbuf->buf[i];
             size_t srclen = idx - srcidx;
             if (srclen > 0) {
-                memcpy(destptr, str.ptr+srcidx, srclen);
+                memcpy(destptr, str.ptr + srcidx, srclen);
                 destptr += srclen;
                 srcidx += srclen;
             }
 
-            /* Copy the replacement and skip the source pattern */
+            // Copy the replacement and skip the source pattern
             srcidx += pattern.length;
             memcpy(destptr, replacement.ptr, replacement.length);
             destptr += replacement.length;
         }
         curbuf = curbuf->next;
     } while (curbuf);
-    memcpy(destptr, str.ptr+srcidx, str.length-srcidx);
+    memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
 
-    /* Free index buffer */
-    scstrrepl_free_ibuf(firstbuf);
+    // Result is guaranteed to be zero-terminated
+    result.ptr[result.length] = '\0';
+
+    // Free index buffer
+    cx_strrepl_free_ibuf(firstbuf);
 
     return result;
 }
 
-sstr_t scstrreplacen(scstr_t str, scstr_t pattern,
-        scstr_t replacement, size_t replmax) {
-    return scstrreplacen_a(ucx_default_allocator(),
-            str, pattern, replacement, replmax);
+CxStrtokCtx cx_strtok(
+        cxstring str,
+        cxstring delim,
+        size_t limit
+) {
+    CxStrtokCtx ctx;
+    ctx.str = str;
+    ctx.delim = delim;
+    ctx.limit = limit;
+    ctx.pos = 0;
+    ctx.next_pos = 0;
+    ctx.delim_pos = 0;
+    ctx.found = 0;
+    ctx.delim_more = NULL;
+    ctx.delim_more_count = 0;
+    return ctx;
+}
+
+CxStrtokCtx cx_strtok_m(
+        cxmutstr str,
+        cxstring delim,
+        size_t limit
+) {
+    return cx_strtok(cx_strcast(str), delim, limit);
 }
 
+bool cx_strtok_next(
+        CxStrtokCtx *ctx,
+        cxstring *token
+) {
+    // abortion criteria
+    if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) {
+        return false;
+    }
 
-// type adjustment functions
-scstr_t ucx_sc2sc(scstr_t str) {
-    return str;
+    // determine the search start
+    cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos);
+
+    // search the next delimiter
+    cxstring delim = cx_strstr(haystack, ctx->delim);
+
+    // if found, make delim capture exactly the delimiter
+    if (delim.length > 0) {
+        delim.length = ctx->delim.length;
+    }
+
+    // if more delimiters are specified, check them now
+    if (ctx->delim_more_count > 0) {
+        cx_for_n(i, ctx->delim_more_count) {
+            cxstring d = cx_strstr(haystack, ctx->delim_more[i]);
+            if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) {
+                delim.ptr = d.ptr;
+                delim.length = ctx->delim_more[i].length;
+            }
+        }
+    }
+
+    // store the token information and adjust the context
+    ctx->found++;
+    ctx->pos = ctx->next_pos;
+    token->ptr = &ctx->str.ptr[ctx->pos];
+    ctx->delim_pos = delim.length == 0 ?
+                     ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr);
+    token->length = ctx->delim_pos - ctx->pos;
+    ctx->next_pos = ctx->delim_pos + delim.length;
+
+    return true;
 }
-scstr_t ucx_ss2sc(sstr_t str) {
-    scstr_t cs;
-    cs.ptr = str.ptr;
-    cs.length = str.length;
-    return cs;
+
+bool cx_strtok_next_m(
+        CxStrtokCtx *ctx,
+        cxmutstr *token
+) {
+    return cx_strtok_next(ctx, (cxstring *) token);
 }
-scstr_t ucx_ss2c_s(scstr_t c) {
-    return c;
+
+void cx_strtok_delim(
+        CxStrtokCtx *ctx,
+        const cxstring *delim,
+        size_t count
+) {
+    ctx->delim_more = delim;
+    ctx->delim_more_count = count;
 }

mercurial