ucx/string.c

changeset 162
18892c0a9adc
parent 157
0b33b9396851
--- a/ucx/string.c	Sat Dec 05 10:34:10 2020 +0100
+++ b/ucx/string.c	Sat Dec 05 11:54:58 2020 +0100
@@ -36,6 +36,10 @@
 #include <stdint.h>
 #include <ctype.h>
 
+#ifndef _WIN32
+#include <strings.h> /* for strncasecmp() */
+#endif /* _WIN32 */
+
 sstr_t sstr(char *cstring) {
     sstr_t string;
     string.ptr = cstring;
@@ -66,6 +70,8 @@
 
 
 size_t scstrnlen(size_t n, ...) {
+    if (n == 0) return 0;
+    
     va_list ap;
     va_start(ap, n);
     
@@ -592,6 +598,38 @@
     }
 }
 
+int scstrcaseprefix(scstr_t string, scstr_t prefix) {
+    if (string.length == 0) {
+        return prefix.length == 0;
+    }
+    if (prefix.length == 0) {
+        return 1;
+    }
+    
+    if (prefix.length > string.length) {
+        return 0;
+    } else {
+        scstr_t subs = scstrsubsl(string, 0, prefix.length);
+        return scstrcasecmp(subs, prefix) == 0;
+    }
+}
+
+int scstrcasesuffix(scstr_t string, scstr_t suffix) {
+    if (string.length == 0) {
+        return suffix.length == 0;
+    }
+    if (suffix.length == 0) {
+        return 1;
+    }
+    
+    if (suffix.length > string.length) {
+        return 0;
+    } else {
+        scstr_t subs = scstrsubs(string, string.length-suffix.length);
+        return scstrcasecmp(subs, suffix) == 0;
+    }
+}
+
 sstr_t scstrlower(scstr_t string) {
     sstr_t ret = sstrdup(string);
     for (size_t i = 0; i < ret.length ; i++) {
@@ -624,6 +662,136 @@
     return ret;
 }
 
+#define REPLACE_INDEX_BUFFER_MAX 100
+
+struct scstrreplace_ibuf {
+    size_t* buf;
+    unsigned int len; /* small indices */
+    struct scstrreplace_ibuf* next;
+};
+
+static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) {
+    while (buf) {
+        struct scstrreplace_ibuf *next = buf->next;
+        free(buf->buf);
+        free(buf);
+        buf = next;
+    }
+}
+
+sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str,
+                     scstr_t pattern, scstr_t replacement, size_t replmax) {
+
+    if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
+        return sstrdup(str);
+
+    /* Compute expected buffer length */
+    size_t ibufmax = str.length / pattern.length;
+    size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
+    if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
+        ibuflen = REPLACE_INDEX_BUFFER_MAX;
+    }
+
+    /* Allocate first index buffer */
+    struct scstrreplace_ibuf *firstbuf, *curbuf;
+    firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf));
+    if (!firstbuf) return sstrn(NULL, 0);
+    firstbuf->buf = calloc(ibuflen, sizeof(size_t));
+    if (!firstbuf->buf) {
+        free(firstbuf);
+        return sstrn(NULL, 0);
+    }
+
+    /* Search occurrences */
+    scstr_t searchstr = str;
+    size_t found = 0;
+    do {
+        scstr_t match = scstrscstr(searchstr, pattern);
+        if (match.length > 0) {
+            /* Allocate next buffer in chain, if required */
+            if (curbuf->len == ibuflen) {
+                struct scstrreplace_ibuf *nextbuf =
+                        calloc(1, sizeof(struct scstrreplace_ibuf));
+                if (!nextbuf) {
+                    scstrrepl_free_ibuf(firstbuf);
+                    return sstrn(NULL, 0);
+                }
+                nextbuf->buf = calloc(ibuflen, sizeof(size_t));
+                if (!nextbuf->buf) {
+                    free(nextbuf);
+                    scstrrepl_free_ibuf(firstbuf);
+                    return sstrn(NULL, 0);
+                }
+                curbuf->next = nextbuf;
+                curbuf = nextbuf;
+            }
+
+            /* Record match index */
+            found++;
+            size_t idx = match.ptr - str.ptr;
+            curbuf->buf[curbuf->len++] = idx;
+            searchstr.ptr = match.ptr + pattern.length;
+            searchstr.length = str.length - idx - pattern.length;
+        } else {
+            break;
+        }
+    } while (searchstr.length > 0 && found < replmax);
+
+    /* Allocate result string */
+    sstr_t result;
+    {
+        ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
+        size_t rcount = 0;
+        curbuf = firstbuf;
+        do {
+            rcount += curbuf->len;
+            curbuf = curbuf->next;
+        } while (curbuf);
+        result.length = str.length + rcount * adjlen;
+        result.ptr = almalloc(allocator, result.length);
+        if (!result.ptr) {
+            scstrrepl_free_ibuf(firstbuf);
+            return sstrn(NULL, 0);
+        }
+    }
+
+    /* Build result string */
+    curbuf = firstbuf;
+    size_t srcidx = 0;
+    char* destptr = result.ptr;
+    do {
+        for (size_t i = 0; i < curbuf->len; i++) {
+            /* Copy source part up to next match*/
+            size_t idx = curbuf->buf[i];
+            size_t srclen = idx - srcidx;
+            if (srclen > 0) {
+                memcpy(destptr, str.ptr+srcidx, srclen);
+                destptr += srclen;
+                srcidx += srclen;
+            }
+
+            /* Copy the replacement and skip the source pattern */
+            srcidx += pattern.length;
+            memcpy(destptr, replacement.ptr, replacement.length);
+            destptr += replacement.length;
+        }
+        curbuf = curbuf->next;
+    } while (curbuf);
+    memcpy(destptr, str.ptr+srcidx, str.length-srcidx);
+
+    /* Free index buffer */
+    scstrrepl_free_ibuf(firstbuf);
+
+    return result;
+}
+
+sstr_t scstrreplacen(scstr_t str, scstr_t pattern,
+        scstr_t replacement, size_t replmax) {
+    return scstrreplacen_a(ucx_default_allocator(),
+            str, pattern, replacement, replmax);
+}
+
+
 // type adjustment functions
 scstr_t ucx_sc2sc(scstr_t str) {
     return str;

mercurial