Sun, 17 Dec 2023 15:33:50 +0100
fix faulty string to int conversion utilities
Probably it was expected that errno is set to EINVAL when illegal characters are encountered. But this is not standard and does not happen on every system, allowing illegal strings to be parsed as valid integers.
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "cx/string.h" #include "cx/utils.h" #include <string.h> #include <stdarg.h> #include <ctype.h> #ifndef _WIN32 #include <strings.h> // for strncasecmp() #endif // _WIN32 cxmutstr cx_mutstr(char *cstring) { return (cxmutstr) {cstring, strlen(cstring)}; } cxmutstr cx_mutstrn( char *cstring, size_t length ) { return (cxmutstr) {cstring, length}; } cxstring cx_str(const char *cstring) { return (cxstring) {cstring, strlen(cstring)}; } cxstring cx_strn( const char *cstring, size_t length ) { return (cxstring) {cstring, length}; } cxstring cx_strcast(cxmutstr str) { return (cxstring) {str.ptr, str.length}; } void cx_strfree(cxmutstr *str) { free(str->ptr); str->ptr = NULL; str->length = 0; } void cx_strfree_a( CxAllocator const *alloc, cxmutstr *str ) { cxFree(alloc, str->ptr); str->ptr = NULL; str->length = 0; } size_t cx_strlen( size_t count, ... ) { if (count == 0) return 0; va_list ap; va_start(ap, count); size_t size = 0; cx_for_n(i, count) { cxstring str = va_arg(ap, cxstring); size += str.length; } va_end(ap); return size; } cxmutstr cx_strcat_ma( CxAllocator const *alloc, cxmutstr str, size_t count, ... ) { if (count == 0) return str; cxstring *strings = calloc(count, sizeof(cxstring)); if (!strings) abort(); va_list ap; va_start(ap, count); // get all args and overall length size_t slen = str.length; cx_for_n(i, count) { cxstring s = va_arg (ap, cxstring); strings[i] = s; slen += s.length; } va_end(ap); // reallocate or create new string if (str.ptr == NULL) { str.ptr = cxMalloc(alloc, slen + 1); } else { str.ptr = cxRealloc(alloc, str.ptr, slen + 1); } if (str.ptr == NULL) abort(); // concatenate strings size_t pos = str.length; str.length = slen; cx_for_n(i, count) { cxstring s = strings[i]; memcpy(str.ptr + pos, s.ptr, s.length); pos += s.length; } // terminate string str.ptr[str.length] = '\0'; // free temporary array free(strings); return str; } cxstring cx_strsubs( cxstring string, size_t start ) { return cx_strsubsl(string, start, string.length - start); } cxmutstr cx_strsubs_m( cxmutstr string, size_t start ) { return cx_strsubsl_m(string, start, string.length - start); } cxstring cx_strsubsl( cxstring string, size_t start, size_t length ) { if (start > string.length) { return (cxstring) {NULL, 0}; } size_t rem_len = string.length - start; if (length > rem_len) { length = rem_len; } return (cxstring) {string.ptr + start, length}; } cxmutstr cx_strsubsl_m( cxmutstr string, size_t start, size_t length ) { cxstring result = cx_strsubsl(cx_strcast(string), start, length); return (cxmutstr) {(char *) result.ptr, result.length}; } cxstring cx_strchr( cxstring string, int chr ) { chr = 0xFF & chr; // TODO: improve by comparing multiple bytes at once cx_for_n(i, string.length) { if (string.ptr[i] == chr) { return cx_strsubs(string, i); } } return (cxstring) {NULL, 0}; } cxmutstr cx_strchr_m( cxmutstr string, int chr ) { cxstring result = cx_strchr(cx_strcast(string), chr); return (cxmutstr) {(char *) result.ptr, result.length}; } cxstring cx_strrchr( cxstring string, int chr ) { chr = 0xFF & chr; size_t i = string.length; while (i > 0) { i--; // TODO: improve by comparing multiple bytes at once if (string.ptr[i] == chr) { return cx_strsubs(string, i); } } return (cxstring) {NULL, 0}; } cxmutstr cx_strrchr_m( cxmutstr string, int chr ) { cxstring result = cx_strrchr(cx_strcast(string), chr); return (cxmutstr) {(char *) result.ptr, result.length}; } #ifndef CX_STRSTR_SBO_SIZE #define CX_STRSTR_SBO_SIZE 512 #endif cxstring cx_strstr( cxstring haystack, cxstring needle ) { if (needle.length == 0) { return haystack; } // optimize for single-char needles if (needle.length == 1) { return cx_strchr(haystack, *needle.ptr); } /* * IMPORTANT: * Our prefix table contains the prefix length PLUS ONE * this is our decision, because we want to use the full range of size_t. * The original algorithm needs a (-1) at one single place, * and we want to avoid that. */ // local prefix table size_t s_prefix_table[CX_STRSTR_SBO_SIZE]; // check needle length and use appropriate prefix table // if the pattern exceeds static prefix table, allocate on the heap bool useheap = needle.length >= CX_STRSTR_SBO_SIZE; register size_t *ptable = useheap ? calloc(needle.length + 1, sizeof(size_t)) : s_prefix_table; // keep counter in registers register size_t i, j; // fill prefix table i = 0; j = 0; ptable[i] = j; while (i < needle.length) { while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { j = ptable[j - 1]; } i++; j++; ptable[i] = j; } // search cxstring result = {NULL, 0}; i = 0; j = 1; while (i < haystack.length) { while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { j = ptable[j - 1]; } i++; j++; if (j - 1 == needle.length) { size_t start = i - needle.length; result.ptr = haystack.ptr + start; result.length = haystack.length - start; break; } } // if prefix table was allocated on the heap, free it if (ptable != s_prefix_table) { free(ptable); } return result; } cxmutstr cx_strstr_m( cxmutstr haystack, cxstring needle ) { cxstring result = cx_strstr(cx_strcast(haystack), needle); return (cxmutstr) {(char *) result.ptr, result.length}; } size_t cx_strsplit( cxstring string, cxstring delim, size_t limit, cxstring *output ) { // special case: output limit is zero if (limit == 0) return 0; // special case: delimiter is empty if (delim.length == 0) { output[0] = string; return 1; } // special cases: delimiter is at least as large as the string if (delim.length >= string.length) { // exact match if (cx_strcmp(string, delim) == 0) { output[0] = cx_strn(string.ptr, 0); output[1] = cx_strn(string.ptr + string.length, 0); return 2; } else { // no match possible output[0] = string; return 1; } } size_t n = 0; cxstring curpos = string; while (1) { ++n; cxstring match = cx_strstr(curpos, delim); if (match.length > 0) { // is the limit reached? if (n < limit) { // copy the current string to the array cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); output[n - 1] = item; size_t processed = item.length + delim.length; curpos.ptr += processed; curpos.length -= processed; } else { // limit reached, copy the _full_ remaining string output[n - 1] = curpos; break; } } else { // no more matches, copy last string output[n - 1] = curpos; break; } } return n; } size_t cx_strsplit_a( CxAllocator const *allocator, cxstring string, cxstring delim, size_t limit, cxstring **output ) { // find out how many splits we're going to make and allocate memory size_t n = 0; cxstring curpos = string; while (1) { ++n; cxstring match = cx_strstr(curpos, delim); if (match.length > 0) { // is the limit reached? if (n < limit) { size_t processed = match.ptr - curpos.ptr + delim.length; curpos.ptr += processed; curpos.length -= processed; } else { // limit reached break; } } else { // no more matches break; } } *output = cxCalloc(allocator, n, sizeof(cxstring)); return cx_strsplit(string, delim, n, *output); } size_t cx_strsplit_m( cxmutstr string, cxstring delim, size_t limit, cxmutstr *output ) { return cx_strsplit(cx_strcast(string), delim, limit, (cxstring *) output); } size_t cx_strsplit_ma( CxAllocator const *allocator, cxmutstr string, cxstring delim, size_t limit, cxmutstr **output ) { return cx_strsplit_a(allocator, cx_strcast(string), delim, limit, (cxstring **) output); } int cx_strcmp( cxstring s1, cxstring s2 ) { if (s1.length == s2.length) { return memcmp(s1.ptr, s2.ptr, s1.length); } else if (s1.length > s2.length) { return 1; } else { return -1; } } int cx_strcasecmp( cxstring s1, cxstring s2 ) { if (s1.length == s2.length) { #ifdef _WIN32 return _strnicmp(s1.ptr, s2.ptr, s1.length); #else return strncasecmp(s1.ptr, s2.ptr, s1.length); #endif } else if (s1.length > s2.length) { return 1; } else { return -1; } } int cx_strcmp_p( void const *s1, void const *s2 ) { cxstring const *left = s1; cxstring const *right = s2; return cx_strcmp(*left, *right); } int cx_strcasecmp_p( void const *s1, void const *s2 ) { cxstring const *left = s1; cxstring const *right = s2; return cx_strcasecmp(*left, *right); } cxmutstr cx_strdup_a( CxAllocator const *allocator, cxstring string ) { cxmutstr result = { cxMalloc(allocator, string.length + 1), string.length }; if (result.ptr == NULL) { result.length = 0; return result; } memcpy(result.ptr, string.ptr, string.length); result.ptr[string.length] = '\0'; return result; } cxstring cx_strtrim(cxstring string) { cxstring result = string; // TODO: optimize by comparing multiple bytes at once while (result.length > 0 && isspace(*result.ptr)) { result.ptr++; result.length--; } while (result.length > 0 && isspace(result.ptr[result.length - 1])) { result.length--; } return result; } cxmutstr cx_strtrim_m(cxmutstr string) { cxstring result = cx_strtrim(cx_strcast(string)); return (cxmutstr) {(char *) result.ptr, result.length}; } bool cx_strprefix( cxstring string, cxstring prefix ) { if (string.length < prefix.length) return false; return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; } bool cx_strsuffix( cxstring string, cxstring suffix ) { if (string.length < suffix.length) return false; return memcmp(string.ptr + string.length - suffix.length, suffix.ptr, suffix.length) == 0; } bool cx_strcaseprefix( cxstring string, cxstring prefix ) { if (string.length < prefix.length) return false; #ifdef _WIN32 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; #else return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0; #endif } bool cx_strcasesuffix( cxstring string, cxstring suffix ) { if (string.length < suffix.length) return false; #ifdef _WIN32 return _strnicmp(string.ptr+string.length-suffix.length, suffix.ptr, suffix.length) == 0; #else return strncasecmp(string.ptr + string.length - suffix.length, suffix.ptr, suffix.length) == 0; #endif } void cx_strlower(cxmutstr string) { cx_for_n(i, string.length) { string.ptr[i] = (char) tolower(string.ptr[i]); } } void cx_strupper(cxmutstr string) { cx_for_n(i, string.length) { string.ptr[i] = (char) toupper(string.ptr[i]); } } #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64 #endif struct cx_strreplace_ibuf { size_t *buf; struct cx_strreplace_ibuf *next; unsigned int len; }; static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { while (buf) { struct cx_strreplace_ibuf *next = buf->next; free(buf->buf); free(buf); buf = next; } } cxmutstr cx_strreplacen_a( CxAllocator const *allocator, cxstring str, cxstring pattern, cxstring replacement, size_t replmax ) { if (pattern.length == 0 || pattern.length > str.length || replmax == 0) return cx_strdup_a(allocator, str); // Compute expected buffer length size_t ibufmax = str.length / pattern.length; size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) { ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE; } // Allocate first index buffer struct cx_strreplace_ibuf *firstbuf, *curbuf; firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); if (!firstbuf) return cx_mutstrn(NULL, 0); firstbuf->buf = calloc(ibuflen, sizeof(size_t)); if (!firstbuf->buf) { free(firstbuf); return cx_mutstrn(NULL, 0); } // Search occurrences cxstring searchstr = str; size_t found = 0; do { cxstring match = cx_strstr(searchstr, pattern); if (match.length > 0) { // Allocate next buffer in chain, if required if (curbuf->len == ibuflen) { struct cx_strreplace_ibuf *nextbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); if (!nextbuf) { cx_strrepl_free_ibuf(firstbuf); return cx_mutstrn(NULL, 0); } nextbuf->buf = calloc(ibuflen, sizeof(size_t)); if (!nextbuf->buf) { free(nextbuf); cx_strrepl_free_ibuf(firstbuf); return cx_mutstrn(NULL, 0); } curbuf->next = nextbuf; curbuf = nextbuf; } // Record match index found++; size_t idx = match.ptr - str.ptr; curbuf->buf[curbuf->len++] = idx; searchstr.ptr = match.ptr + pattern.length; searchstr.length = str.length - idx - pattern.length; } else { break; } } while (searchstr.length > 0 && found < replmax); // Allocate result string cxmutstr result; { ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; size_t rcount = 0; curbuf = firstbuf; do { rcount += curbuf->len; curbuf = curbuf->next; } while (curbuf); result.length = str.length + rcount * adjlen; result.ptr = cxMalloc(allocator, result.length + 1); if (!result.ptr) { cx_strrepl_free_ibuf(firstbuf); return cx_mutstrn(NULL, 0); } } // Build result string curbuf = firstbuf; size_t srcidx = 0; char *destptr = result.ptr; do { for (size_t i = 0; i < curbuf->len; i++) { // Copy source part up to next match size_t idx = curbuf->buf[i]; size_t srclen = idx - srcidx; if (srclen > 0) { memcpy(destptr, str.ptr + srcidx, srclen); destptr += srclen; srcidx += srclen; } // Copy the replacement and skip the source pattern srcidx += pattern.length; memcpy(destptr, replacement.ptr, replacement.length); destptr += replacement.length; } curbuf = curbuf->next; } while (curbuf); memcpy(destptr, str.ptr + srcidx, str.length - srcidx); // Result is guaranteed to be zero-terminated result.ptr[result.length] = '\0'; // Free index buffer cx_strrepl_free_ibuf(firstbuf); return result; } CxStrtokCtx cx_strtok( cxstring str, cxstring delim, size_t limit ) { CxStrtokCtx ctx; ctx.str = str; ctx.delim = delim; ctx.limit = limit; ctx.pos = 0; ctx.next_pos = 0; ctx.delim_pos = 0; ctx.found = 0; ctx.delim_more = NULL; ctx.delim_more_count = 0; return ctx; } CxStrtokCtx cx_strtok_m( cxmutstr str, cxstring delim, size_t limit ) { return cx_strtok(cx_strcast(str), delim, limit); } bool cx_strtok_next( CxStrtokCtx *ctx, cxstring *token ) { // abortion criteria if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) { return false; } // determine the search start cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos); // search the next delimiter cxstring delim = cx_strstr(haystack, ctx->delim); // if found, make delim capture exactly the delimiter if (delim.length > 0) { delim.length = ctx->delim.length; } // if more delimiters are specified, check them now if (ctx->delim_more_count > 0) { cx_for_n(i, ctx->delim_more_count) { cxstring d = cx_strstr(haystack, ctx->delim_more[i]); if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) { delim.ptr = d.ptr; delim.length = ctx->delim_more[i].length; } } } // store the token information and adjust the context ctx->found++; ctx->pos = ctx->next_pos; token->ptr = &ctx->str.ptr[ctx->pos]; ctx->delim_pos = delim.length == 0 ? ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr); token->length = ctx->delim_pos - ctx->pos; ctx->next_pos = ctx->delim_pos + delim.length; return true; } bool cx_strtok_next_m( CxStrtokCtx *ctx, cxmutstr *token ) { return cx_strtok_next(ctx, (cxstring *) token); } void cx_strtok_delim( CxStrtokCtx *ctx, cxstring const *delim, size_t count ) { ctx->delim_more = delim; ctx->delim_more_count = count; }