diff -r b60487c3ec36 -r af685cc9d623 ucx/string.c
--- a/ucx/string.c	Sun Aug 31 14:39:13 2025 +0200
+++ b/ucx/string.c	Sat Nov 08 23:06:11 2025 +0100
@@ -25,6 +25,10 @@
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
+#ifdef MEMRCHR_NEED_GNU
+#define _GNU_SOURCE
+#endif
+
 #include "cx/string.h"
 
 #include <string.h>
@@ -33,6 +37,7 @@
 #include <errno.h>
 #include <limits.h>
 #include <float.h>
+#include <ctype.h>
 
 #ifdef _WIN32
 #define cx_strcasecmp_impl _strnicmp
@@ -42,7 +47,7 @@
 #endif
 
 cxmutstr cx_mutstr(char *cstring) {
-    return (cxmutstr) {cstring, strlen(cstring)};
+    return (cxmutstr) {cstring, cstring == NULL ? 0 : strlen(cstring)};
 }
 
 cxmutstr cx_mutstrn(
@@ -53,7 +58,7 @@
 }
 
 cxstring cx_str(const char *cstring) {
-    return (cxstring) {cstring, strlen(cstring)};
+    return (cxstring) {cstring, cstring == NULL ? 0 : strlen(cstring)};
 }
 
 cxstring cx_strn(
@@ -65,7 +70,7 @@
 
 void cx_strfree(cxmutstr *str) {
     if (str == NULL) return;
-    free(str->ptr);
+    cxFreeDefault(str->ptr);
     str->ptr = NULL;
     str->length = 0;
 }
@@ -80,6 +85,22 @@
     str->length = 0;
 }
 
+int cx_strcpy_a(
+        const CxAllocator *alloc,
+        cxmutstr *dest,
+        cxstring src
+) {
+    if (cxReallocate(alloc, &dest->ptr, src.length + 1)) {
+        return 1;
+    }
+
+    memcpy(dest->ptr, src.ptr, src.length);
+    dest->length = src.length;
+    dest->ptr[dest->length] = '\0';
+
+    return 0;
+}
+
 size_t cx_strlen(
         size_t count,
         ...
@@ -106,27 +127,16 @@
         ...
 ) {
     if (count == 0) return str;
-
-    cxstring strings_stack[8];
-    cxstring *strings;
-    if (count > 8) {
-        strings = calloc(count, sizeof(cxstring));
-        if (strings == NULL) {
-            return (cxmutstr) {NULL, 0};
-        }
-    } else {
-        strings = strings_stack;
-    }
-
     va_list ap;
     va_start(ap, count);
+    va_list ap2;
+    va_copy(ap2, ap);
 
-    // get all args and overall length
+    // compute overall length
     bool overflow = false;
     size_t slen = str.length;
     for (size_t i = 0; i < count; i++) {
-        cxstring s = va_arg (ap, cxstring);
-        strings[i] = s;
+        cxstring s = va_arg(ap, cxstring);
         if (slen > SIZE_MAX - str.length) overflow = true;
         slen += s.length;
     }
@@ -134,10 +144,8 @@
 
     // abort in case of overflow
     if (overflow) {
+        va_end(ap2);
         errno = EOVERFLOW;
-        if (strings != strings_stack) {
-            free(strings);
-        }
         return (cxmutstr) { NULL, 0 };
     }
 
@@ -149,9 +157,7 @@
         newstr = cxRealloc(alloc, str.ptr, slen + 1);
     }
     if (newstr == NULL) {
-        if (strings != strings_stack) {
-            free(strings);
-        }
+        va_end(ap2);
         return (cxmutstr) {NULL, 0};
     }
     str.ptr = newstr;
@@ -160,19 +166,15 @@
     size_t pos = str.length;
     str.length = slen;
     for (size_t i = 0; i < count; i++) {
-        cxstring s = strings[i];
+        cxstring s = va_arg(ap2, cxstring);
         memcpy(str.ptr + pos, s.ptr, s.length);
         pos += s.length;
     }
+    va_end(ap2);
 
     // terminate string
     str.ptr[str.length] = '\0';
 
-    // free temporary array
-    if (strings != strings_stack) {
-        free(strings);
-    }
-
     return str;
 }
 
@@ -234,19 +236,24 @@
 }
 
 cxstring cx_strrchr(
-        cxstring string,
-        int chr
+    cxstring string,
+    int chr
 ) {
+#ifdef WITH_MEMRCHR
+    char *ret = memrchr(string.ptr, 0xFF & chr, string.length);
+    if (ret == NULL) return (cxstring) {NULL, 0};
+    return (cxstring) {ret, string.length - (ret - string.ptr)};
+#else
     chr = 0xFF & chr;
     size_t i = string.length;
     while (i > 0) {
         i--;
-        // TODO: improve by comparing multiple bytes at once
         if (string.ptr[i] == chr) {
             return cx_strsubs(string, i);
         }
     }
     return (cxstring) {NULL, 0};
+#endif
 }
 
 cxmutstr cx_strrchr_m(
@@ -289,8 +296,9 @@
     // check needle length and use appropriate prefix table
     // if the pattern exceeds static prefix table, allocate on the heap
     const bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
-    register size_t *ptable = useheap ? calloc(needle.length + 1,
-                                               sizeof(size_t)) : s_prefix_table;
+    register size_t *ptable = useheap
+        ? cxCallocDefault(needle.length + 1, sizeof(size_t))
+        : s_prefix_table;
 
     // keep counter in registers
     register size_t i, j;
@@ -328,7 +336,7 @@
 
     // if prefix table was allocated on the heap, free it
     if (useheap) {
-        free(ptable);
+        cxFreeDefault(ptable);
     }
 
     return result;
@@ -453,7 +461,7 @@
                          delim, limit, (cxstring **) output);
 }
 
-int cx_strcmp(
+int cx_strcmp_(
         cxstring s1,
         cxstring s2
 ) {
@@ -470,7 +478,7 @@
     }
 }
 
-int cx_strcasecmp(
+int cx_strcasecmp_(
         cxstring s1,
         cxstring s2
 ) {
@@ -522,19 +530,13 @@
     return result;
 }
 
-static bool str_isspace(char c) {
-    // TODO: remove once UCX has public API for this
-    return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v' || c == '\f';
-}
-
 cxstring cx_strtrim(cxstring string) {
     cxstring result = string;
-    // TODO: optimize by comparing multiple bytes at once
-    while (result.length > 0 && str_isspace(*result.ptr)) {
+    while (result.length > 0 && isspace((unsigned char)(result.ptr[0]))) {
         result.ptr++;
         result.length--;
     }
-    while (result.length > 0 && str_isspace(result.ptr[result.length - 1])) {
+    while (result.length > 0 && isspace((unsigned char)result.ptr[result.length - 1])) {
         result.length--;
     }
     return result;
@@ -545,7 +547,7 @@
     return (cxmutstr) {(char *) result.ptr, result.length};
 }
 
-bool cx_strprefix(
+bool cx_strprefix_(
         cxstring string,
         cxstring prefix
 ) {
@@ -553,7 +555,7 @@
     return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
 }
 
-bool cx_strsuffix(
+bool cx_strsuffix_(
         cxstring string,
         cxstring suffix
 ) {
@@ -562,7 +564,7 @@
                   suffix.ptr, suffix.length) == 0;
 }
 
-bool cx_strcaseprefix(
+bool cx_strcaseprefix_(
         cxstring string,
         cxstring prefix
 ) {
@@ -574,7 +576,7 @@
 #endif
 }
 
-bool cx_strcasesuffix(
+bool cx_strcasesuffix_(
         cxstring string,
         cxstring suffix
 ) {
@@ -588,27 +590,6 @@
 #endif
 }
 
-#ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
-#define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
-#endif
-
-struct cx_strreplace_ibuf {
-    size_t *buf;
-    struct cx_strreplace_ibuf *next;
-    unsigned int len;
-};
-
-static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
-    // remember, the first data is on the stack!
-    buf = buf->next;
-    while (buf) {
-        struct cx_strreplace_ibuf *next = buf->next;
-        free(buf->buf);
-        free(buf);
-        buf = next;
-    }
-}
-
 cxmutstr cx_strreplacen_a(
         const CxAllocator *allocator,
         cxstring str,
@@ -616,108 +597,60 @@
         cxstring replacement,
         size_t replmax
 ) {
+    // special cases
+    if (search.length == 0 || search.length > str.length || replmax == 0) {
+        return cx_strdup_a(allocator, str);
+    }
 
-    if (search.length == 0 || search.length > str.length || replmax == 0)
-        return cx_strdup_a(allocator, str);
+    size_t in_len = str.length;
+    size_t search_len = search.length;
+    size_t repl_len = replacement.length;
 
-    // Compute expected buffer length
-    size_t ibufmax = str.length / search.length;
-    size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
-    if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) {
-        ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE;
+    // first run, count the occurrences
+    // and remember where the first is
+    size_t occurrences = 1;
+    cxstring first = cx_strstr(str, search);
+    if (first.length == 0) {
+        // special case, no replacements
+        return cx_strdup_a(allocator, str);
+    }
+    cxstring tmp = cx_strsubs(first, search_len);
+    while (occurrences < replmax &&
+            (tmp = cx_strstr(tmp, search)).length > 0) {
+        occurrences++;
+        tmp = cx_strsubs(tmp, search_len);
     }
 
-    // First index buffer can be on the stack
-    struct cx_strreplace_ibuf ibuf, *curbuf = &ibuf;
-    size_t ibuf_sbo[CX_STRREPLACE_INDEX_BUFFER_SIZE];
-    ibuf.buf = ibuf_sbo;
-    ibuf.next = NULL;
-    ibuf.len = 0;
+    // calculate necessary memory
+    signed long long diff_len = (signed long long) repl_len - search_len;
+    size_t out_len = in_len + diff_len * occurrences;
+    cxmutstr out = {
+        cxMalloc(allocator, out_len + 1),
+        out_len
+    };
+    if (out.ptr == NULL) return out;
 
-    // Search occurrences
-    cxstring searchstr = str;
-    size_t found = 0;
-    do {
-        cxstring match = cx_strstr(searchstr, search);
-        if (match.length > 0) {
-            // Allocate next buffer in chain, if required
-            if (curbuf->len == ibuflen) {
-                struct cx_strreplace_ibuf *nextbuf =
-                        calloc(1, sizeof(struct cx_strreplace_ibuf));
-                if (!nextbuf) {
-                    cx_strrepl_free_ibuf(&ibuf);
-                    return cx_mutstrn(NULL, 0);
-                }
-                nextbuf->buf = calloc(ibuflen, sizeof(size_t));
-                if (!nextbuf->buf) {
-                    free(nextbuf);
-                    cx_strrepl_free_ibuf(&ibuf);
-                    return cx_mutstrn(NULL, 0);
-                }
-                curbuf->next = nextbuf;
-                curbuf = nextbuf;
-            }
-
-            // Record match index
-            found++;
-            size_t idx = match.ptr - str.ptr;
-            curbuf->buf[curbuf->len++] = idx;
-            searchstr.ptr = match.ptr + search.length;
-            searchstr.length = str.length - idx - search.length;
-        } else {
-            break;
-        }
-    } while (searchstr.length > 0 && found < replmax);
-
-    // Allocate result string
-    cxmutstr result;
-    {
-        long long adjlen = (long long) replacement.length - (long long) search.length;
-        size_t rcount = 0;
-        curbuf = &ibuf;
-        do {
-            rcount += curbuf->len;
-            curbuf = curbuf->next;
-        } while (curbuf);
-        result.length = str.length + rcount * adjlen;
-        result.ptr = cxMalloc(allocator, result.length + 1);
-        if (!result.ptr) {
-            cx_strrepl_free_ibuf(&ibuf);
-            return cx_mutstrn(NULL, 0);
-        }
+    // second run: perform the replacements
+    // but start where we found the first occurrence
+    const char *inp = str.ptr;
+    tmp = first;
+    char *outp = out.ptr;
+    while (occurrences-- > 0 && (tmp = cx_strstr(tmp, search)).length > 0) {
+        size_t copylen = tmp.ptr - inp;
+        memcpy(outp, inp, copylen);
+        outp += copylen;
+        memcpy(outp, replacement.ptr, repl_len);
+        outp += repl_len;
+        inp += copylen + search_len;
+        tmp = cx_strsubs(tmp, search_len);
     }
 
-    // Build result string
-    curbuf = &ibuf;
-    size_t srcidx = 0;
-    char *destptr = result.ptr;
-    do {
-        for (size_t i = 0; i < curbuf->len; i++) {
-            // Copy source part up to next match
-            size_t idx = curbuf->buf[i];
-            size_t srclen = idx - srcidx;
-            if (srclen > 0) {
-                memcpy(destptr, str.ptr + srcidx, srclen);
-                destptr += srclen;
-                srcidx += srclen;
-            }
+    // add the remaining string
+    size_t copylen = in_len - (inp - str.ptr);
+    memcpy(outp, inp, copylen);
+    out.ptr[out_len] = '\0';
 
-            // Copy the replacement and skip the source pattern
-            srcidx += search.length;
-            memcpy(destptr, replacement.ptr, replacement.length);
-            destptr += replacement.length;
-        }
-        curbuf = curbuf->next;
-    } while (curbuf);
-    memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
-
-    // Result is guaranteed to be zero-terminated
-    result.ptr[result.length] = '\0';
-
-    // Free index buffer
-    cx_strrepl_free_ibuf(&ibuf);
-
-    return result;
+    return out;
 }
 
 CxStrtokCtx cx_strtok_(
@@ -1028,11 +961,6 @@
     return 0;
 }
 
-static bool str_isdigit(char c) {
-    // TODO: remove once UCX has public API for this
-    return c >= '0' && c <= '9';
-}
-
 int cx_strtod_lc_(cxstring str, double *output, char decsep, const char *groupsep) {
     // TODO: overflow check
     // TODO: increase precision
@@ -1065,7 +993,7 @@
     // parse all digits until we find the decsep
     size_t pos = 0;
     do {
-        if (str_isdigit(str.ptr[pos])) {
+        if (isdigit((unsigned char)str.ptr[pos])) {
             result = result * 10 + (str.ptr[pos] - '0');
         } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
             break;
@@ -1094,7 +1022,7 @@
         // parse everything until exponent or end
         double factor = 1.;
         do {
-            if (str_isdigit(str.ptr[pos])) {
+            if (isdigit((unsigned char)str.ptr[pos])) {
                 factor *= 0.1;
                 result = result + factor * (str.ptr[pos] - '0');
             } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
@@ -1135,7 +1063,7 @@
     // parse the exponent
     unsigned int exp = 0;
     do {
-        if (str_isdigit(str.ptr[pos])) {
+        if (isdigit((unsigned char)str.ptr[pos])) {
             exp = 10 * exp + (str.ptr[pos] - '0');
         } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
             errno = EINVAL;