src/ucx/string.c

changeset 490
d218607f5a7e
parent 438
22eca559aded
--- a/src/ucx/string.c	Sat Mar 25 17:18:51 2023 +0100
+++ b/src/ucx/string.c	Fri May 05 18:02:11 2023 +0200
@@ -72,7 +72,7 @@
 }
 
 void cx_strfree_a(
-        CxAllocator *alloc,
+        CxAllocator const *alloc,
         cxmutstr *str
 ) {
     cxFree(alloc, str->ptr);
@@ -98,11 +98,14 @@
     return size;
 }
 
-cxmutstr cx_strcat_a(
-        CxAllocator *alloc,
+cxmutstr cx_strcat_ma(
+        CxAllocator const *alloc,
+        cxmutstr str,
         size_t count,
         ...
 ) {
+    if (count == 0) return str;
+
     cxstring *strings = calloc(count, sizeof(cxstring));
     if (!strings) abort();
 
@@ -110,34 +113,38 @@
     va_start(ap, count);
 
     // get all args and overall length
-    size_t slen = 0;
+    size_t slen = str.length;
     cx_for_n(i, count) {
         cxstring s = va_arg (ap, cxstring);
         strings[i] = s;
         slen += s.length;
     }
+    va_end(ap);
 
-    // create new string
-    cxmutstr result;
-    result.ptr = cxMalloc(alloc, slen + 1);
-    result.length = slen;
-    if (result.ptr == NULL) abort();
+    // reallocate or create new string
+    if (str.ptr == NULL) {
+        str.ptr = cxMalloc(alloc, slen + 1);
+    } else {
+        str.ptr = cxRealloc(alloc, str.ptr, slen + 1);
+    }
+    if (str.ptr == NULL) abort();
 
     // concatenate strings
-    size_t pos = 0;
+    size_t pos = str.length;
+    str.length = slen;
     cx_for_n(i, count) {
         cxstring s = strings[i];
-        memcpy(result.ptr + pos, s.ptr, s.length);
+        memcpy(str.ptr + pos, s.ptr, s.length);
         pos += s.length;
     }
 
     // terminate string
-    result.ptr[result.length] = '\0';
+    str.ptr[str.length] = '\0';
 
     // free temporary array
     free(strings);
 
-    return result;
+    return str;
 }
 
 cxstring cx_strsubs(
@@ -226,7 +233,9 @@
     return (cxmutstr) {(char *) result.ptr, result.length};
 }
 
-#define STRSTR_SBO_BUFLEN 512
+#ifndef CX_STRSTR_SBO_SIZE
+#define CX_STRSTR_SBO_SIZE 512
+#endif
 
 cxstring cx_strstr(
         cxstring haystack,
@@ -250,11 +259,11 @@
      */
 
     // local prefix table
-    size_t s_prefix_table[STRSTR_SBO_BUFLEN];
+    size_t s_prefix_table[CX_STRSTR_SBO_SIZE];
 
     // check needle length and use appropriate prefix table
     // if the pattern exceeds static prefix table, allocate on the heap
-    bool useheap = needle.length >= STRSTR_SBO_BUFLEN;
+    bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
     register size_t *ptable = useheap ? calloc(needle.length + 1,
                                                sizeof(size_t)) : s_prefix_table;
 
@@ -367,7 +376,7 @@
 }
 
 size_t cx_strsplit_a(
-        CxAllocator *allocator,
+        CxAllocator const *allocator,
         cxstring string,
         cxstring delim,
         size_t limit,
@@ -409,7 +418,7 @@
 }
 
 size_t cx_strsplit_ma(
-        CxAllocator *allocator,
+        CxAllocator const *allocator,
         cxmutstr string,
         cxstring delim,
         size_t limit,
@@ -449,8 +458,26 @@
     }
 }
 
+int cx_strcmp_p(
+        void const *s1,
+        void const *s2
+) {
+    cxstring const *left = s1;
+    cxstring const *right = s2;
+    return cx_strcmp(*left, *right);
+}
+
+int cx_strcasecmp_p(
+        void const *s1,
+        void const *s2
+) {
+    cxstring const *left = s1;
+    cxstring const *right = s2;
+    return cx_strcasecmp(*left, *right);
+}
+
 cxmutstr cx_strdup_a(
-        CxAllocator *allocator,
+        CxAllocator const *allocator,
         cxstring string
 ) {
     cxmutstr result = {
@@ -539,7 +566,9 @@
     }
 }
 
-#define REPLACE_INDEX_BUFFER_MAX 100
+#ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
+#define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
+#endif
 
 struct cx_strreplace_ibuf {
     size_t *buf;
@@ -557,7 +586,7 @@
 }
 
 cxmutstr cx_strreplacen_a(
-        CxAllocator *allocator,
+        CxAllocator const *allocator,
         cxstring str,
         cxstring pattern,
         cxstring replacement,
@@ -570,8 +599,8 @@
     // Compute expected buffer length
     size_t ibufmax = str.length / pattern.length;
     size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
-    if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
-        ibuflen = REPLACE_INDEX_BUFFER_MAX;
+    if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) {
+        ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE;
     }
 
     // Allocate first index buffer
@@ -670,4 +699,87 @@
     return result;
 }
 
+CxStrtokCtx cx_strtok(
+        cxstring str,
+        cxstring delim,
+        size_t limit
+) {
+    CxStrtokCtx ctx;
+    ctx.str = str;
+    ctx.delim = delim;
+    ctx.limit = limit;
+    ctx.pos = 0;
+    ctx.next_pos = 0;
+    ctx.delim_pos = 0;
+    ctx.found = 0;
+    ctx.delim_more = NULL;
+    ctx.delim_more_count = 0;
+    return ctx;
+}
 
+CxStrtokCtx cx_strtok_m(
+        cxmutstr str,
+        cxstring delim,
+        size_t limit
+) {
+    return cx_strtok(cx_strcast(str), delim, limit);
+}
+
+bool cx_strtok_next(
+        CxStrtokCtx *ctx,
+        cxstring *token
+) {
+    // abortion criteria
+    if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) {
+        return false;
+    }
+
+    // determine the search start
+    cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos);
+
+    // search the next delimiter
+    cxstring delim = cx_strstr(haystack, ctx->delim);
+
+    // if found, make delim capture exactly the delimiter
+    if (delim.length > 0) {
+        delim.length = ctx->delim.length;
+    }
+
+    // if more delimiters are specified, check them now
+    if (ctx->delim_more_count > 0) {
+        cx_for_n(i, ctx->delim_more_count) {
+            cxstring d = cx_strstr(haystack, ctx->delim_more[i]);
+            if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) {
+                delim.ptr = d.ptr;
+                delim.length = ctx->delim_more[i].length;
+            }
+        }
+    }
+
+    // store the token information and adjust the context
+    ctx->found++;
+    ctx->pos = ctx->next_pos;
+    token->ptr = &ctx->str.ptr[ctx->pos];
+    ctx->delim_pos = delim.length == 0 ?
+                     ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr);
+    token->length = ctx->delim_pos - ctx->pos;
+    ctx->next_pos = ctx->delim_pos + delim.length;
+
+    return true;
+}
+
+bool cx_strtok_next_m(
+        CxStrtokCtx *ctx,
+        cxmutstr *token
+) {
+    return cx_strtok_next(ctx, (cxstring *) token);
+}
+
+void cx_strtok_delim(
+        CxStrtokCtx *ctx,
+        cxstring const *delim,
+        size_t count
+) {
+    ctx->delim_more = delim;
+    ctx->delim_more_count = count;
+}

mercurial