src/ucx/string.c

changeset 582
82b60a8dd55c
parent 579
e10457d74fe1
child 613
b649d66c433e
--- a/src/ucx/string.c	Mon May 26 21:02:30 2025 +0200
+++ b/src/ucx/string.c	Mon May 26 21:06:17 2025 +0200
@@ -65,7 +65,7 @@
 
 void cx_strfree(cxmutstr *str) {
     if (str == NULL) return;
-    free(str->ptr);
+    cxFreeDefault(str->ptr);
     str->ptr = NULL;
     str->length = 0;
 }
@@ -80,6 +80,22 @@
     str->length = 0;
 }
 
+int cx_strcpy_a(
+        const CxAllocator *alloc,
+        cxmutstr *dest,
+        cxstring src
+) {
+    if (cxReallocate(alloc, &dest->ptr, src.length + 1)) {
+        return 1;
+    }
+
+    memcpy(dest->ptr, src.ptr, src.length);
+    dest->length = src.length;
+    dest->ptr[dest->length] = '\0';
+
+    return 0;
+}
+
 size_t cx_strlen(
         size_t count,
         ...
@@ -106,27 +122,16 @@
         ...
 ) {
     if (count == 0) return str;
-
-    cxstring strings_stack[8];
-    cxstring *strings;
-    if (count > 8) {
-        strings = calloc(count, sizeof(cxstring));
-        if (strings == NULL) {
-            return (cxmutstr) {NULL, 0};
-        }
-    } else {
-        strings = strings_stack;
-    }
-
     va_list ap;
     va_start(ap, count);
+    va_list ap2;
+    va_copy(ap2, ap);
 
-    // get all args and overall length
+    // compute overall length
     bool overflow = false;
     size_t slen = str.length;
     for (size_t i = 0; i < count; i++) {
-        cxstring s = va_arg (ap, cxstring);
-        strings[i] = s;
+        cxstring s = va_arg(ap, cxstring);
         if (slen > SIZE_MAX - str.length) overflow = true;
         slen += s.length;
     }
@@ -134,10 +139,8 @@
 
     // abort in case of overflow
     if (overflow) {
+        va_end(ap2);
         errno = EOVERFLOW;
-        if (strings != strings_stack) {
-            free(strings);
-        }
         return (cxmutstr) { NULL, 0 };
     }
 
@@ -149,9 +152,7 @@
         newstr = cxRealloc(alloc, str.ptr, slen + 1);
     }
     if (newstr == NULL) {
-        if (strings != strings_stack) {
-            free(strings);
-        }
+        va_end(ap2);
         return (cxmutstr) {NULL, 0};
     }
     str.ptr = newstr;
@@ -160,19 +161,15 @@
     size_t pos = str.length;
     str.length = slen;
     for (size_t i = 0; i < count; i++) {
-        cxstring s = strings[i];
+        cxstring s = va_arg(ap2, cxstring);
         memcpy(str.ptr + pos, s.ptr, s.length);
         pos += s.length;
     }
+    va_end(ap2);
 
     // terminate string
     str.ptr[str.length] = '\0';
 
-    // free temporary array
-    if (strings != strings_stack) {
-        free(strings);
-    }
-
     return str;
 }
 
@@ -289,8 +286,9 @@
     // check needle length and use appropriate prefix table
     // if the pattern exceeds static prefix table, allocate on the heap
     const bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
-    register size_t *ptable = useheap ? calloc(needle.length + 1,
-                                               sizeof(size_t)) : s_prefix_table;
+    register size_t *ptable = useheap
+        ? cxCallocDefault(needle.length + 1, sizeof(size_t))
+        : s_prefix_table;
 
     // keep counter in registers
     register size_t i, j;
@@ -328,7 +326,7 @@
 
     // if prefix table was allocated on the heap, free it
     if (useheap) {
-        free(ptable);
+        cxFreeDefault(ptable);
     }
 
     return result;
@@ -588,27 +586,6 @@
 #endif
 }
 
-#ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
-#define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
-#endif
-
-struct cx_strreplace_ibuf {
-    size_t *buf;
-    struct cx_strreplace_ibuf *next;
-    unsigned int len;
-};
-
-static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
-    // remember, the first data is on the stack!
-    buf = buf->next;
-    while (buf) {
-        struct cx_strreplace_ibuf *next = buf->next;
-        free(buf->buf);
-        free(buf);
-        buf = next;
-    }
-}
-
 cxmutstr cx_strreplacen_a(
         const CxAllocator *allocator,
         cxstring str,
@@ -616,108 +593,60 @@
         cxstring replacement,
         size_t replmax
 ) {
+    // special cases
+    if (search.length == 0 || search.length > str.length || replmax == 0) {
+        return cx_strdup_a(allocator, str);
+    }
 
-    if (search.length == 0 || search.length > str.length || replmax == 0)
-        return cx_strdup_a(allocator, str);
+    size_t in_len = str.length;
+    size_t search_len = search.length;
+    size_t repl_len = replacement.length;
 
-    // Compute expected buffer length
-    size_t ibufmax = str.length / search.length;
-    size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
-    if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) {
-        ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE;
+    // first run, count the occurrences
+    // and remember where the first is
+    size_t occurrences = 1;
+    cxstring first = cx_strstr(str, search);
+    if (first.length == 0) {
+        // special case, no replacements
+        return cx_strdup_a(allocator, str);
+    }
+    cxstring tmp = cx_strsubs(first, search_len);
+    while (occurrences < replmax &&
+            (tmp = cx_strstr(tmp, search)).length > 0) {
+        occurrences++;
+        tmp = cx_strsubs(tmp, search_len);
     }
 
-    // First index buffer can be on the stack
-    struct cx_strreplace_ibuf ibuf, *curbuf = &ibuf;
-    size_t ibuf_sbo[CX_STRREPLACE_INDEX_BUFFER_SIZE];
-    ibuf.buf = ibuf_sbo;
-    ibuf.next = NULL;
-    ibuf.len = 0;
+    // calculate necessary memory
+    signed long long diff_len = (signed long long) repl_len - search_len;
+    size_t out_len = in_len + diff_len * occurrences;
+    cxmutstr out = {
+        cxMalloc(allocator, out_len + 1),
+        out_len
+    };
+    if (out.ptr == NULL) return out;
 
-    // Search occurrences
-    cxstring searchstr = str;
-    size_t found = 0;
-    do {
-        cxstring match = cx_strstr(searchstr, search);
-        if (match.length > 0) {
-            // Allocate next buffer in chain, if required
-            if (curbuf->len == ibuflen) {
-                struct cx_strreplace_ibuf *nextbuf =
-                        calloc(1, sizeof(struct cx_strreplace_ibuf));
-                if (!nextbuf) {
-                    cx_strrepl_free_ibuf(&ibuf);
-                    return cx_mutstrn(NULL, 0);
-                }
-                nextbuf->buf = calloc(ibuflen, sizeof(size_t));
-                if (!nextbuf->buf) {
-                    free(nextbuf);
-                    cx_strrepl_free_ibuf(&ibuf);
-                    return cx_mutstrn(NULL, 0);
-                }
-                curbuf->next = nextbuf;
-                curbuf = nextbuf;
-            }
-
-            // Record match index
-            found++;
-            size_t idx = match.ptr - str.ptr;
-            curbuf->buf[curbuf->len++] = idx;
-            searchstr.ptr = match.ptr + search.length;
-            searchstr.length = str.length - idx - search.length;
-        } else {
-            break;
-        }
-    } while (searchstr.length > 0 && found < replmax);
-
-    // Allocate result string
-    cxmutstr result;
-    {
-        long long adjlen = (long long) replacement.length - (long long) search.length;
-        size_t rcount = 0;
-        curbuf = &ibuf;
-        do {
-            rcount += curbuf->len;
-            curbuf = curbuf->next;
-        } while (curbuf);
-        result.length = str.length + rcount * adjlen;
-        result.ptr = cxMalloc(allocator, result.length + 1);
-        if (!result.ptr) {
-            cx_strrepl_free_ibuf(&ibuf);
-            return cx_mutstrn(NULL, 0);
-        }
+    // second run: perform the replacements
+    // but start where we found the first occurrence
+    const char *inp = str.ptr;
+    tmp = first;
+    char *outp = out.ptr;
+    while (occurrences-- > 0 && (tmp = cx_strstr(tmp, search)).length > 0) {
+        size_t copylen = tmp.ptr - inp;
+        memcpy(outp, inp, copylen);
+        outp += copylen;
+        memcpy(outp, replacement.ptr, repl_len);
+        outp += repl_len;
+        inp += copylen + search_len;
+        tmp = cx_strsubs(tmp, search_len);
     }
 
-    // Build result string
-    curbuf = &ibuf;
-    size_t srcidx = 0;
-    char *destptr = result.ptr;
-    do {
-        for (size_t i = 0; i < curbuf->len; i++) {
-            // Copy source part up to next match
-            size_t idx = curbuf->buf[i];
-            size_t srclen = idx - srcidx;
-            if (srclen > 0) {
-                memcpy(destptr, str.ptr + srcidx, srclen);
-                destptr += srclen;
-                srcidx += srclen;
-            }
+    // add the remaining string
+    size_t copylen = in_len - (inp - str.ptr);
+    memcpy(outp, inp, copylen);
+    out.ptr[out_len] = '\0';
 
-            // Copy the replacement and skip the source pattern
-            srcidx += search.length;
-            memcpy(destptr, replacement.ptr, replacement.length);
-            destptr += replacement.length;
-        }
-        curbuf = curbuf->next;
-    } while (curbuf);
-    memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
-
-    // Result is guaranteed to be zero-terminated
-    result.ptr[result.length] = '\0';
-
-    // Free index buffer
-    cx_strrepl_free_ibuf(&ibuf);
-
-    return result;
+    return out;
 }
 
 CxStrtokCtx cx_strtok_(

mercurial