diff -r 118e2386d5b4 -r 063a9f29098c ucx/string.c --- a/ucx/string.c Sat Feb 22 18:10:36 2025 +0100 +++ b/ucx/string.c Sun Feb 23 14:28:47 2025 +0100 @@ -25,12 +25,10 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#define CX_STR_IMPLEMENTATION #include "cx/string.h" #include <string.h> #include <stdarg.h> -#include <ctype.h> #include <assert.h> #include <errno.h> #include <limits.h> @@ -222,14 +220,9 @@ cxstring string, int chr ) { - chr = 0xFF & chr; - // TODO: improve by comparing multiple bytes at once - for (size_t i = 0; i < string.length; i++) { - if (string.ptr[i] == chr) { - return cx_strsubs(string, i); - } - } - return (cxstring) {NULL, 0}; + char *ret = memchr(string.ptr, 0xFF & chr, string.length); + if (ret == NULL) return (cxstring) {NULL, 0}; + return (cxstring) {ret, string.length - (ret - string.ptr)}; } cxmutstr cx_strchr_m( @@ -265,7 +258,7 @@ } #ifndef CX_STRSTR_SBO_SIZE -#define CX_STRSTR_SBO_SIZE 512 +#define CX_STRSTR_SBO_SIZE 128 #endif const unsigned cx_strstr_sbo_size = CX_STRSTR_SBO_SIZE; @@ -295,7 +288,7 @@ // check needle length and use appropriate prefix table // if the pattern exceeds static prefix table, allocate on the heap - bool useheap = needle.length >= CX_STRSTR_SBO_SIZE; + const bool useheap = needle.length >= CX_STRSTR_SBO_SIZE; register size_t *ptable = useheap ? calloc(needle.length + 1, sizeof(size_t)) : s_prefix_table; @@ -334,7 +327,7 @@ } // if prefix table was allocated on the heap, free it - if (ptable != s_prefix_table) { + if (useheap) { free(ptable); } @@ -512,7 +505,7 @@ return cx_strcasecmp(*left, *right); } -cxmutstr cx_strdup_a( +cxmutstr cx_strdup_a_( const CxAllocator *allocator, cxstring string ) { @@ -529,14 +522,19 @@ return result; } +static bool str_isspace(char c) { + // TODO: remove once UCX has public API for this + return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v' || c == '\f'; +} + cxstring cx_strtrim(cxstring string) { cxstring result = string; // TODO: optimize by comparing multiple bytes at once - while (result.length > 0 && isspace(*result.ptr)) { + while (result.length > 0 && str_isspace(*result.ptr)) { result.ptr++; result.length--; } - while (result.length > 0 && isspace(result.ptr[result.length - 1])) { + while (result.length > 0 && str_isspace(result.ptr[result.length - 1])) { result.length--; } return result; @@ -590,18 +588,6 @@ #endif } -void cx_strlower(cxmutstr string) { - for (size_t i = 0; i < string.length; i++) { - string.ptr[i] = (char) tolower(string.ptr[i]); - } -} - -void cx_strupper(cxmutstr string) { - for (size_t i = 0; i < string.length; i++) { - string.ptr[i] = (char) toupper(string.ptr[i]); - } -} - #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64 #endif @@ -613,6 +599,8 @@ }; static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { + // remember, the first data is on the stack! + buf = buf->next; while (buf) { struct cx_strreplace_ibuf *next = buf->next; free(buf->buf); @@ -624,49 +612,46 @@ cxmutstr cx_strreplacen_a( const CxAllocator *allocator, cxstring str, - cxstring pattern, + cxstring search, cxstring replacement, size_t replmax ) { - if (pattern.length == 0 || pattern.length > str.length || replmax == 0) + if (search.length == 0 || search.length > str.length || replmax == 0) return cx_strdup_a(allocator, str); // Compute expected buffer length - size_t ibufmax = str.length / pattern.length; + size_t ibufmax = str.length / search.length; size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) { ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE; } - // Allocate first index buffer - struct cx_strreplace_ibuf *firstbuf, *curbuf; - firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); - if (!firstbuf) return cx_mutstrn(NULL, 0); - firstbuf->buf = calloc(ibuflen, sizeof(size_t)); - if (!firstbuf->buf) { - free(firstbuf); - return cx_mutstrn(NULL, 0); - } + // First index buffer can be on the stack + struct cx_strreplace_ibuf ibuf, *curbuf = &ibuf; + size_t ibuf_sbo[CX_STRREPLACE_INDEX_BUFFER_SIZE]; + ibuf.buf = ibuf_sbo; + ibuf.next = NULL; + ibuf.len = 0; // Search occurrences cxstring searchstr = str; size_t found = 0; do { - cxstring match = cx_strstr(searchstr, pattern); + cxstring match = cx_strstr(searchstr, search); if (match.length > 0) { // Allocate next buffer in chain, if required if (curbuf->len == ibuflen) { struct cx_strreplace_ibuf *nextbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); if (!nextbuf) { - cx_strrepl_free_ibuf(firstbuf); + cx_strrepl_free_ibuf(&ibuf); return cx_mutstrn(NULL, 0); } nextbuf->buf = calloc(ibuflen, sizeof(size_t)); if (!nextbuf->buf) { free(nextbuf); - cx_strrepl_free_ibuf(firstbuf); + cx_strrepl_free_ibuf(&ibuf); return cx_mutstrn(NULL, 0); } curbuf->next = nextbuf; @@ -677,8 +662,8 @@ found++; size_t idx = match.ptr - str.ptr; curbuf->buf[curbuf->len++] = idx; - searchstr.ptr = match.ptr + pattern.length; - searchstr.length = str.length - idx - pattern.length; + searchstr.ptr = match.ptr + search.length; + searchstr.length = str.length - idx - search.length; } else { break; } @@ -687,9 +672,9 @@ // Allocate result string cxmutstr result; { - ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; + long long adjlen = (long long) replacement.length - (long long) search.length; size_t rcount = 0; - curbuf = firstbuf; + curbuf = &ibuf; do { rcount += curbuf->len; curbuf = curbuf->next; @@ -697,13 +682,13 @@ result.length = str.length + rcount * adjlen; result.ptr = cxMalloc(allocator, result.length + 1); if (!result.ptr) { - cx_strrepl_free_ibuf(firstbuf); + cx_strrepl_free_ibuf(&ibuf); return cx_mutstrn(NULL, 0); } } // Build result string - curbuf = firstbuf; + curbuf = &ibuf; size_t srcidx = 0; char *destptr = result.ptr; do { @@ -718,7 +703,7 @@ } // Copy the replacement and skip the source pattern - srcidx += pattern.length; + srcidx += search.length; memcpy(destptr, replacement.ptr, replacement.length); destptr += replacement.length; } @@ -730,12 +715,12 @@ result.ptr[result.length] = '\0'; // Free index buffer - cx_strrepl_free_ibuf(firstbuf); + cx_strrepl_free_ibuf(&ibuf); return result; } -CxStrtokCtx cx_strtok( +CxStrtokCtx cx_strtok_( cxstring str, cxstring delim, size_t limit @@ -753,14 +738,6 @@ return ctx; } -CxStrtokCtx cx_strtok_m( - cxmutstr str, - cxstring delim, - size_t limit -) { - return cx_strtok(cx_strcast(str), delim, limit); -} - bool cx_strtok_next( CxStrtokCtx *ctx, cxstring *token @@ -832,25 +809,24 @@ *output = (rtype) result; \ return 0 -int cx_strtos_lc(cxstring str, short *output, int base, const char *groupsep) { +int cx_strtos_lc_(cxstring str, short *output, int base, const char *groupsep) { cx_strtoX_signed_impl(short, SHRT_MIN, SHRT_MAX); } -int cx_strtoi_lc(cxstring str, int *output, int base, const char *groupsep) { +int cx_strtoi_lc_(cxstring str, int *output, int base, const char *groupsep) { cx_strtoX_signed_impl(int, INT_MIN, INT_MAX); } -int cx_strtol_lc(cxstring str, long *output, int base, const char *groupsep) { +int cx_strtol_lc_(cxstring str, long *output, int base, const char *groupsep) { cx_strtoX_signed_impl(long, LONG_MIN, LONG_MAX); } -int cx_strtoll_lc(cxstring str, long long *output, int base, const char *groupsep) { +int cx_strtoll_lc_(cxstring str, long long *output, int base, const char *groupsep) { // strategy: parse as unsigned, check range, negate if required bool neg = false; size_t start_unsigned = 0; - // trim already, to search for a sign character - str = cx_strtrim(str); + // emptiness check if (str.length == 0) { errno = EINVAL; return -1; @@ -890,33 +866,23 @@ } } -int cx_strtoi8_lc(cxstring str, int8_t *output, int base, const char *groupsep) { +int cx_strtoi8_lc_(cxstring str, int8_t *output, int base, const char *groupsep) { cx_strtoX_signed_impl(int8_t, INT8_MIN, INT8_MAX); } -int cx_strtoi16_lc(cxstring str, int16_t *output, int base, const char *groupsep) { +int cx_strtoi16_lc_(cxstring str, int16_t *output, int base, const char *groupsep) { cx_strtoX_signed_impl(int16_t, INT16_MIN, INT16_MAX); } -int cx_strtoi32_lc(cxstring str, int32_t *output, int base, const char *groupsep) { +int cx_strtoi32_lc_(cxstring str, int32_t *output, int base, const char *groupsep) { cx_strtoX_signed_impl(int32_t, INT32_MIN, INT32_MAX); } -int cx_strtoi64_lc(cxstring str, int64_t *output, int base, const char *groupsep) { +int cx_strtoi64_lc_(cxstring str, int64_t *output, int base, const char *groupsep) { assert(sizeof(long long) == sizeof(int64_t)); // should be true on all platforms return cx_strtoll_lc(str, (long long*) output, base, groupsep); } -int cx_strtoz_lc(cxstring str, ssize_t *output, int base, const char *groupsep) { -#if SSIZE_MAX == INT32_MAX - return cx_strtoi32_lc(str, (int32_t*) output, base, groupsep); -#elif SSIZE_MAX == INT64_MAX - return cx_strtoll_lc(str, (long long*) output, base, groupsep); -#else -#error "unsupported ssize_t size" -#endif -} - #define cx_strtoX_unsigned_impl(rtype, rmax) \ uint64_t result; \ if (cx_strtou64_lc(str, &result, base, groupsep)) { \ @@ -929,21 +895,20 @@ *output = (rtype) result; \ return 0 -int cx_strtous_lc(cxstring str, unsigned short *output, int base, const char *groupsep) { +int cx_strtous_lc_(cxstring str, unsigned short *output, int base, const char *groupsep) { cx_strtoX_unsigned_impl(unsigned short, USHRT_MAX); } -int cx_strtou_lc(cxstring str, unsigned int *output, int base, const char *groupsep) { +int cx_strtou_lc_(cxstring str, unsigned int *output, int base, const char *groupsep) { cx_strtoX_unsigned_impl(unsigned int, UINT_MAX); } -int cx_strtoul_lc(cxstring str, unsigned long *output, int base, const char *groupsep) { +int cx_strtoul_lc_(cxstring str, unsigned long *output, int base, const char *groupsep) { cx_strtoX_unsigned_impl(unsigned long, ULONG_MAX); } -int cx_strtoull_lc(cxstring str, unsigned long long *output, int base, const char *groupsep) { +int cx_strtoull_lc_(cxstring str, unsigned long long *output, int base, const char *groupsep) { // some sanity checks - str = cx_strtrim(str); if (str.length == 0) { errno = EINVAL; return -1; @@ -1021,37 +986,37 @@ return 0; } -int cx_strtou8_lc(cxstring str, uint8_t *output, int base, const char *groupsep) { +int cx_strtou8_lc_(cxstring str, uint8_t *output, int base, const char *groupsep) { cx_strtoX_unsigned_impl(uint8_t, UINT8_MAX); } -int cx_strtou16_lc(cxstring str, uint16_t *output, int base, const char *groupsep) { +int cx_strtou16_lc_(cxstring str, uint16_t *output, int base, const char *groupsep) { cx_strtoX_unsigned_impl(uint16_t, UINT16_MAX); } -int cx_strtou32_lc(cxstring str, uint32_t *output, int base, const char *groupsep) { +int cx_strtou32_lc_(cxstring str, uint32_t *output, int base, const char *groupsep) { cx_strtoX_unsigned_impl(uint32_t, UINT32_MAX); } -int cx_strtou64_lc(cxstring str, uint64_t *output, int base, const char *groupsep) { +int cx_strtou64_lc_(cxstring str, uint64_t *output, int base, const char *groupsep) { assert(sizeof(unsigned long long) == sizeof(uint64_t)); // should be true on all platforms return cx_strtoull_lc(str, (unsigned long long*) output, base, groupsep); } -int cx_strtouz_lc(cxstring str, size_t *output, int base, const char *groupsep) { +int cx_strtoz_lc_(cxstring str, size_t *output, int base, const char *groupsep) { #if SIZE_MAX == UINT32_MAX - return cx_strtou32_lc(str, (uint32_t*) output, base, groupsep); + return cx_strtou32_lc_(str, (uint32_t*) output, base, groupsep); #elif SIZE_MAX == UINT64_MAX - return cx_strtoull_lc(str, (unsigned long long *) output, base, groupsep); + return cx_strtoull_lc_(str, (unsigned long long *) output, base, groupsep); #else #error "unsupported size_t size" #endif } -int cx_strtof_lc(cxstring str, float *output, char decsep, const char *groupsep) { +int cx_strtof_lc_(cxstring str, float *output, char decsep, const char *groupsep) { // use string to double and add a range check double d; - int ret = cx_strtod_lc(str, &d, decsep, groupsep); + int ret = cx_strtod_lc_(str, &d, decsep, groupsep); if (ret != 0) return ret; // note: FLT_MIN is the smallest POSITIVE number that can be represented double test = d < 0 ? -d : d; @@ -1063,12 +1028,16 @@ return 0; } -int cx_strtod_lc(cxstring str, double *output, char decsep, const char *groupsep) { +static bool str_isdigit(char c) { + // TODO: remove once UCX has public API for this + return c >= '0' && c <= '9'; +} + +int cx_strtod_lc_(cxstring str, double *output, char decsep, const char *groupsep) { // TODO: overflow check // TODO: increase precision - // trim and check - str = cx_strtrim(str); + // emptiness check if (str.length == 0) { errno = EINVAL; return -1; @@ -1096,7 +1065,7 @@ // parse all digits until we find the decsep size_t pos = 0; do { - if (isdigit(str.ptr[pos])) { + if (str_isdigit(str.ptr[pos])) { result = result * 10 + (str.ptr[pos] - '0'); } else if (strchr(groupsep, str.ptr[pos]) == NULL) { break; @@ -1125,7 +1094,7 @@ // parse everything until exponent or end double factor = 1.; do { - if (isdigit(str.ptr[pos])) { + if (str_isdigit(str.ptr[pos])) { factor *= 0.1; result = result + factor * (str.ptr[pos] - '0'); } else if (strchr(groupsep, str.ptr[pos]) == NULL) { @@ -1166,7 +1135,7 @@ // parse the exponent unsigned int exp = 0; do { - if (isdigit(str.ptr[pos])) { + if (str_isdigit(str.ptr[pos])) { exp = 10 * exp + (str.ptr[pos] - '0'); } else if (strchr(groupsep, str.ptr[pos]) == NULL) { errno = EINVAL;