diff -r b60487c3ec36 -r af685cc9d623 ucx/string.c --- a/ucx/string.c Sun Aug 31 14:39:13 2025 +0200 +++ b/ucx/string.c Sat Nov 08 23:06:11 2025 +0100 @@ -25,6 +25,10 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ +#ifdef MEMRCHR_NEED_GNU +#define _GNU_SOURCE +#endif + #include "cx/string.h" #include @@ -33,6 +37,7 @@ #include #include #include +#include #ifdef _WIN32 #define cx_strcasecmp_impl _strnicmp @@ -42,7 +47,7 @@ #endif cxmutstr cx_mutstr(char *cstring) { - return (cxmutstr) {cstring, strlen(cstring)}; + return (cxmutstr) {cstring, cstring == NULL ? 0 : strlen(cstring)}; } cxmutstr cx_mutstrn( @@ -53,7 +58,7 @@ } cxstring cx_str(const char *cstring) { - return (cxstring) {cstring, strlen(cstring)}; + return (cxstring) {cstring, cstring == NULL ? 0 : strlen(cstring)}; } cxstring cx_strn( @@ -65,7 +70,7 @@ void cx_strfree(cxmutstr *str) { if (str == NULL) return; - free(str->ptr); + cxFreeDefault(str->ptr); str->ptr = NULL; str->length = 0; } @@ -80,6 +85,22 @@ str->length = 0; } +int cx_strcpy_a( + const CxAllocator *alloc, + cxmutstr *dest, + cxstring src +) { + if (cxReallocate(alloc, &dest->ptr, src.length + 1)) { + return 1; + } + + memcpy(dest->ptr, src.ptr, src.length); + dest->length = src.length; + dest->ptr[dest->length] = '\0'; + + return 0; +} + size_t cx_strlen( size_t count, ... @@ -106,27 +127,16 @@ ... ) { if (count == 0) return str; - - cxstring strings_stack[8]; - cxstring *strings; - if (count > 8) { - strings = calloc(count, sizeof(cxstring)); - if (strings == NULL) { - return (cxmutstr) {NULL, 0}; - } - } else { - strings = strings_stack; - } - va_list ap; va_start(ap, count); + va_list ap2; + va_copy(ap2, ap); - // get all args and overall length + // compute overall length bool overflow = false; size_t slen = str.length; for (size_t i = 0; i < count; i++) { - cxstring s = va_arg (ap, cxstring); - strings[i] = s; + cxstring s = va_arg(ap, cxstring); if (slen > SIZE_MAX - str.length) overflow = true; slen += s.length; } @@ -134,10 +144,8 @@ // abort in case of overflow if (overflow) { + va_end(ap2); errno = EOVERFLOW; - if (strings != strings_stack) { - free(strings); - } return (cxmutstr) { NULL, 0 }; } @@ -149,9 +157,7 @@ newstr = cxRealloc(alloc, str.ptr, slen + 1); } if (newstr == NULL) { - if (strings != strings_stack) { - free(strings); - } + va_end(ap2); return (cxmutstr) {NULL, 0}; } str.ptr = newstr; @@ -160,19 +166,15 @@ size_t pos = str.length; str.length = slen; for (size_t i = 0; i < count; i++) { - cxstring s = strings[i]; + cxstring s = va_arg(ap2, cxstring); memcpy(str.ptr + pos, s.ptr, s.length); pos += s.length; } + va_end(ap2); // terminate string str.ptr[str.length] = '\0'; - // free temporary array - if (strings != strings_stack) { - free(strings); - } - return str; } @@ -234,19 +236,24 @@ } cxstring cx_strrchr( - cxstring string, - int chr + cxstring string, + int chr ) { +#ifdef WITH_MEMRCHR + char *ret = memrchr(string.ptr, 0xFF & chr, string.length); + if (ret == NULL) return (cxstring) {NULL, 0}; + return (cxstring) {ret, string.length - (ret - string.ptr)}; +#else chr = 0xFF & chr; size_t i = string.length; while (i > 0) { i--; - // TODO: improve by comparing multiple bytes at once if (string.ptr[i] == chr) { return cx_strsubs(string, i); } } return (cxstring) {NULL, 0}; +#endif } cxmutstr cx_strrchr_m( @@ -289,8 +296,9 @@ // check needle length and use appropriate prefix table // if the pattern exceeds static prefix table, allocate on the heap const bool useheap = needle.length >= CX_STRSTR_SBO_SIZE; - register size_t *ptable = useheap ? calloc(needle.length + 1, - sizeof(size_t)) : s_prefix_table; + register size_t *ptable = useheap + ? cxCallocDefault(needle.length + 1, sizeof(size_t)) + : s_prefix_table; // keep counter in registers register size_t i, j; @@ -328,7 +336,7 @@ // if prefix table was allocated on the heap, free it if (useheap) { - free(ptable); + cxFreeDefault(ptable); } return result; @@ -453,7 +461,7 @@ delim, limit, (cxstring **) output); } -int cx_strcmp( +int cx_strcmp_( cxstring s1, cxstring s2 ) { @@ -470,7 +478,7 @@ } } -int cx_strcasecmp( +int cx_strcasecmp_( cxstring s1, cxstring s2 ) { @@ -522,19 +530,13 @@ return result; } -static bool str_isspace(char c) { - // TODO: remove once UCX has public API for this - return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v' || c == '\f'; -} - cxstring cx_strtrim(cxstring string) { cxstring result = string; - // TODO: optimize by comparing multiple bytes at once - while (result.length > 0 && str_isspace(*result.ptr)) { + while (result.length > 0 && isspace((unsigned char)(result.ptr[0]))) { result.ptr++; result.length--; } - while (result.length > 0 && str_isspace(result.ptr[result.length - 1])) { + while (result.length > 0 && isspace((unsigned char)result.ptr[result.length - 1])) { result.length--; } return result; @@ -545,7 +547,7 @@ return (cxmutstr) {(char *) result.ptr, result.length}; } -bool cx_strprefix( +bool cx_strprefix_( cxstring string, cxstring prefix ) { @@ -553,7 +555,7 @@ return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; } -bool cx_strsuffix( +bool cx_strsuffix_( cxstring string, cxstring suffix ) { @@ -562,7 +564,7 @@ suffix.ptr, suffix.length) == 0; } -bool cx_strcaseprefix( +bool cx_strcaseprefix_( cxstring string, cxstring prefix ) { @@ -574,7 +576,7 @@ #endif } -bool cx_strcasesuffix( +bool cx_strcasesuffix_( cxstring string, cxstring suffix ) { @@ -588,27 +590,6 @@ #endif } -#ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE -#define CX_STRREPLACE_INDEX_BUFFER_SIZE 64 -#endif - -struct cx_strreplace_ibuf { - size_t *buf; - struct cx_strreplace_ibuf *next; - unsigned int len; -}; - -static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { - // remember, the first data is on the stack! - buf = buf->next; - while (buf) { - struct cx_strreplace_ibuf *next = buf->next; - free(buf->buf); - free(buf); - buf = next; - } -} - cxmutstr cx_strreplacen_a( const CxAllocator *allocator, cxstring str, @@ -616,108 +597,60 @@ cxstring replacement, size_t replmax ) { + // special cases + if (search.length == 0 || search.length > str.length || replmax == 0) { + return cx_strdup_a(allocator, str); + } - if (search.length == 0 || search.length > str.length || replmax == 0) - return cx_strdup_a(allocator, str); + size_t in_len = str.length; + size_t search_len = search.length; + size_t repl_len = replacement.length; - // Compute expected buffer length - size_t ibufmax = str.length / search.length; - size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; - if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) { - ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE; + // first run, count the occurrences + // and remember where the first is + size_t occurrences = 1; + cxstring first = cx_strstr(str, search); + if (first.length == 0) { + // special case, no replacements + return cx_strdup_a(allocator, str); + } + cxstring tmp = cx_strsubs(first, search_len); + while (occurrences < replmax && + (tmp = cx_strstr(tmp, search)).length > 0) { + occurrences++; + tmp = cx_strsubs(tmp, search_len); } - // First index buffer can be on the stack - struct cx_strreplace_ibuf ibuf, *curbuf = &ibuf; - size_t ibuf_sbo[CX_STRREPLACE_INDEX_BUFFER_SIZE]; - ibuf.buf = ibuf_sbo; - ibuf.next = NULL; - ibuf.len = 0; + // calculate necessary memory + signed long long diff_len = (signed long long) repl_len - search_len; + size_t out_len = in_len + diff_len * occurrences; + cxmutstr out = { + cxMalloc(allocator, out_len + 1), + out_len + }; + if (out.ptr == NULL) return out; - // Search occurrences - cxstring searchstr = str; - size_t found = 0; - do { - cxstring match = cx_strstr(searchstr, search); - if (match.length > 0) { - // Allocate next buffer in chain, if required - if (curbuf->len == ibuflen) { - struct cx_strreplace_ibuf *nextbuf = - calloc(1, sizeof(struct cx_strreplace_ibuf)); - if (!nextbuf) { - cx_strrepl_free_ibuf(&ibuf); - return cx_mutstrn(NULL, 0); - } - nextbuf->buf = calloc(ibuflen, sizeof(size_t)); - if (!nextbuf->buf) { - free(nextbuf); - cx_strrepl_free_ibuf(&ibuf); - return cx_mutstrn(NULL, 0); - } - curbuf->next = nextbuf; - curbuf = nextbuf; - } - - // Record match index - found++; - size_t idx = match.ptr - str.ptr; - curbuf->buf[curbuf->len++] = idx; - searchstr.ptr = match.ptr + search.length; - searchstr.length = str.length - idx - search.length; - } else { - break; - } - } while (searchstr.length > 0 && found < replmax); - - // Allocate result string - cxmutstr result; - { - long long adjlen = (long long) replacement.length - (long long) search.length; - size_t rcount = 0; - curbuf = &ibuf; - do { - rcount += curbuf->len; - curbuf = curbuf->next; - } while (curbuf); - result.length = str.length + rcount * adjlen; - result.ptr = cxMalloc(allocator, result.length + 1); - if (!result.ptr) { - cx_strrepl_free_ibuf(&ibuf); - return cx_mutstrn(NULL, 0); - } + // second run: perform the replacements + // but start where we found the first occurrence + const char *inp = str.ptr; + tmp = first; + char *outp = out.ptr; + while (occurrences-- > 0 && (tmp = cx_strstr(tmp, search)).length > 0) { + size_t copylen = tmp.ptr - inp; + memcpy(outp, inp, copylen); + outp += copylen; + memcpy(outp, replacement.ptr, repl_len); + outp += repl_len; + inp += copylen + search_len; + tmp = cx_strsubs(tmp, search_len); } - // Build result string - curbuf = &ibuf; - size_t srcidx = 0; - char *destptr = result.ptr; - do { - for (size_t i = 0; i < curbuf->len; i++) { - // Copy source part up to next match - size_t idx = curbuf->buf[i]; - size_t srclen = idx - srcidx; - if (srclen > 0) { - memcpy(destptr, str.ptr + srcidx, srclen); - destptr += srclen; - srcidx += srclen; - } + // add the remaining string + size_t copylen = in_len - (inp - str.ptr); + memcpy(outp, inp, copylen); + out.ptr[out_len] = '\0'; - // Copy the replacement and skip the source pattern - srcidx += search.length; - memcpy(destptr, replacement.ptr, replacement.length); - destptr += replacement.length; - } - curbuf = curbuf->next; - } while (curbuf); - memcpy(destptr, str.ptr + srcidx, str.length - srcidx); - - // Result is guaranteed to be zero-terminated - result.ptr[result.length] = '\0'; - - // Free index buffer - cx_strrepl_free_ibuf(&ibuf); - - return result; + return out; } CxStrtokCtx cx_strtok_( @@ -1028,11 +961,6 @@ return 0; } -static bool str_isdigit(char c) { - // TODO: remove once UCX has public API for this - return c >= '0' && c <= '9'; -} - int cx_strtod_lc_(cxstring str, double *output, char decsep, const char *groupsep) { // TODO: overflow check // TODO: increase precision @@ -1065,7 +993,7 @@ // parse all digits until we find the decsep size_t pos = 0; do { - if (str_isdigit(str.ptr[pos])) { + if (isdigit((unsigned char)str.ptr[pos])) { result = result * 10 + (str.ptr[pos] - '0'); } else if (strchr(groupsep, str.ptr[pos]) == NULL) { break; @@ -1094,7 +1022,7 @@ // parse everything until exponent or end double factor = 1.; do { - if (str_isdigit(str.ptr[pos])) { + if (isdigit((unsigned char)str.ptr[pos])) { factor *= 0.1; result = result + factor * (str.ptr[pos] - '0'); } else if (strchr(groupsep, str.ptr[pos]) == NULL) { @@ -1135,7 +1063,7 @@ // parse the exponent unsigned int exp = 0; do { - if (str_isdigit(str.ptr[pos])) { + if (isdigit((unsigned char)str.ptr[pos])) { exp = 10 * exp + (str.ptr[pos] - '0'); } else if (strchr(groupsep, str.ptr[pos]) == NULL) { errno = EINVAL;