--- a/src/ucx/string.c Tue Aug 25 12:07:56 2020 +0200 +++ b/src/ucx/string.c Sat Oct 24 17:34:32 2020 +0200 @@ -662,6 +662,136 @@ return ret; } +#define REPLACE_INDEX_BUFFER_MAX 100 + +struct scstrreplace_ibuf { + size_t* buf; + unsigned int len; /* small indices */ + struct scstrreplace_ibuf* next; +}; + +static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) { + while (buf) { + struct scstrreplace_ibuf *next = buf->next; + free(buf->buf); + free(buf); + buf = next; + } +} + +sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str, + scstr_t pattern, scstr_t replacement, size_t replmax) { + + if (pattern.length == 0 || pattern.length > str.length || replmax == 0) + return sstrdup(str); + + /* Compute expected buffer length */ + size_t ibufmax = str.length / pattern.length; + size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; + if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { + ibuflen = REPLACE_INDEX_BUFFER_MAX; + } + + /* Allocate first index buffer */ + struct scstrreplace_ibuf *firstbuf, *curbuf; + firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf)); + if (!firstbuf) return sstrn(NULL, 0); + firstbuf->buf = calloc(ibuflen, sizeof(size_t)); + if (!firstbuf->buf) { + free(firstbuf); + return sstrn(NULL, 0); + } + + /* Search occurrences */ + scstr_t searchstr = str; + size_t found = 0; + do { + scstr_t match = scstrscstr(searchstr, pattern); + if (match.length > 0) { + /* Allocate next buffer in chain, if required */ + if (curbuf->len == ibuflen) { + struct scstrreplace_ibuf *nextbuf = + calloc(1, sizeof(struct scstrreplace_ibuf)); + if (!nextbuf) { + scstrrepl_free_ibuf(firstbuf); + return sstrn(NULL, 0); + } + nextbuf->buf = calloc(ibuflen, sizeof(size_t)); + if (!nextbuf->buf) { + free(nextbuf); + scstrrepl_free_ibuf(firstbuf); + return sstrn(NULL, 0); + } + curbuf->next = nextbuf; + curbuf = nextbuf; + } + + /* Record match index */ + found++; + size_t idx = match.ptr - str.ptr; + curbuf->buf[curbuf->len++] = idx; + searchstr.ptr = match.ptr + pattern.length; + searchstr.length = str.length - idx - pattern.length; + } else { + break; + } + } while (searchstr.length > 0 && found < replmax); + + /* Allocate result string */ + sstr_t result; + { + ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; + size_t rcount = 0; + curbuf = firstbuf; + do { + rcount += curbuf->len; + curbuf = curbuf->next; + } while (curbuf); + result.length = str.length + rcount * adjlen; + result.ptr = almalloc(allocator, result.length); + if (!result.ptr) { + scstrrepl_free_ibuf(firstbuf); + return sstrn(NULL, 0); + } + } + + /* Build result string */ + curbuf = firstbuf; + size_t srcidx = 0; + char* destptr = result.ptr; + do { + for (size_t i = 0; i < curbuf->len; i++) { + /* Copy source part up to next match*/ + size_t idx = curbuf->buf[i]; + size_t srclen = idx - srcidx; + if (srclen > 0) { + memcpy(destptr, str.ptr+srcidx, srclen); + destptr += srclen; + srcidx += srclen; + } + + /* Copy the replacement and skip the source pattern */ + srcidx += pattern.length; + memcpy(destptr, replacement.ptr, replacement.length); + destptr += replacement.length; + } + curbuf = curbuf->next; + } while (curbuf); + memcpy(destptr, str.ptr+srcidx, str.length-srcidx); + + /* Free index buffer */ + scstrrepl_free_ibuf(firstbuf); + + return result; +} + +sstr_t scstrreplacen(scstr_t str, scstr_t pattern, + scstr_t replacement, size_t replmax) { + return scstrreplacen_a(ucx_default_allocator(), + str, pattern, replacement, replmax); +} + + // type adjustment functions scstr_t ucx_sc2sc(scstr_t str) { return str;