Sat, 12 Nov 2022 11:52:47 +0100
allow '-' in tokens, add support for negative integers
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "cx/string.h" #include "cx/utils.h" #include <string.h> #include <stdarg.h> #include <ctype.h> #ifndef _WIN32 #include <strings.h> /* for strncasecmp() */ #endif /* _WIN32 */ cxmutstr cx_mutstr(char *cstring) { return (cxmutstr) {cstring, strlen(cstring)}; } cxmutstr cx_mutstrn( char *cstring, size_t length ) { return (cxmutstr) {cstring, length}; } cxstring cx_str(const char *cstring) { return (cxstring) {cstring, strlen(cstring)}; } cxstring cx_strn( const char *cstring, size_t length ) { return (cxstring) {cstring, length}; } cxstring cx_strcast(cxmutstr str) { return (cxstring) {str.ptr, str.length}; } void cx_strfree(cxmutstr *str) { free(str->ptr); str->ptr = NULL; str->length = 0; } void cx_strfree_a( CxAllocator *alloc, cxmutstr *str ) { cxFree(alloc, str->ptr); str->ptr = NULL; str->length = 0; } size_t cx_strlen( size_t count, ... ) { if (count == 0) return 0; va_list ap; va_start(ap, count); size_t size = 0; cx_for_n(i, count) { cxstring str = va_arg(ap, cxstring); size += str.length; } va_end(ap); return size; } cxmutstr cx_strcat_a( CxAllocator *alloc, size_t count, ... ) { cxstring *strings = calloc(count, sizeof(cxstring)); if (!strings) abort(); va_list ap; va_start(ap, count); // get all args and overall length size_t slen = 0; cx_for_n(i, count) { cxstring s = va_arg (ap, cxstring); strings[i] = s; slen += s.length; } // create new string cxmutstr result; result.ptr = cxMalloc(alloc, slen + 1); result.length = slen; if (result.ptr == NULL) abort(); // concatenate strings size_t pos = 0; cx_for_n(i, count) { cxstring s = strings[i]; memcpy(result.ptr + pos, s.ptr, s.length); pos += s.length; } // terminate string result.ptr[result.length] = '\0'; // free temporary array free(strings); return result; } cxstring cx_strsubs( cxstring string, size_t start ) { return cx_strsubsl(string, start, string.length - start); } cxmutstr cx_strsubs_m( cxmutstr string, size_t start ) { return cx_strsubsl_m(string, start, string.length - start); } cxstring cx_strsubsl( cxstring string, size_t start, size_t length ) { if (start > string.length) { return (cxstring) {NULL, 0}; } size_t rem_len = string.length - start; if (length > rem_len) { length = rem_len; } return (cxstring) {string.ptr + start, length}; } cxmutstr cx_strsubsl_m( cxmutstr string, size_t start, size_t length ) { cxstring result = cx_strsubsl(cx_strcast(string), start, length); return (cxmutstr) {(char *) result.ptr, result.length}; } cxstring cx_strchr( cxstring string, int chr ) { chr = 0xFF & chr; // TODO: improve by comparing multiple bytes at once cx_for_n(i, string.length) { if (string.ptr[i] == chr) { return cx_strsubs(string, i); } } return (cxstring) {NULL, 0}; } cxmutstr cx_strchr_m( cxmutstr string, int chr ) { cxstring result = cx_strchr(cx_strcast(string), chr); return (cxmutstr) {(char *) result.ptr, result.length}; } cxstring cx_strrchr( cxstring string, int chr ) { chr = 0xFF & chr; size_t i = string.length; while (i > 0) { i--; // TODO: improve by comparing multiple bytes at once if (string.ptr[i] == chr) { return cx_strsubs(string, i); } } return (cxstring) {NULL, 0}; } cxmutstr cx_strrchr_m( cxmutstr string, int chr ) { cxstring result = cx_strrchr(cx_strcast(string), chr); return (cxmutstr) {(char *) result.ptr, result.length}; } #define STRSTR_SBO_BUFLEN 512 cxstring cx_strstr( cxstring haystack, cxstring needle ) { if (needle.length == 0) { return haystack; } /* optimize for single-char needles */ if (needle.length == 1) { return cx_strchr(haystack, *needle.ptr); } /* * IMPORTANT: * Our prefix table contains the prefix length PLUS ONE * this is our decision, because we want to use the full range of size_t. * The original algorithm needs a (-1) at one single place, * and we want to avoid that. */ /* local prefix table */ size_t s_prefix_table[STRSTR_SBO_BUFLEN]; /* check needle length and use appropriate prefix table */ /* if the pattern exceeds static prefix table, allocate on the heap */ bool useheap = needle.length >= STRSTR_SBO_BUFLEN; register size_t *ptable = useheap ? calloc(needle.length + 1, sizeof(size_t)) : s_prefix_table; /* keep counter in registers */ register size_t i, j; /* fill prefix table */ i = 0; j = 0; ptable[i] = j; while (i < needle.length) { while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { j = ptable[j - 1]; } i++; j++; ptable[i] = j; } /* search */ cxstring result = {NULL, 0}; i = 0; j = 1; while (i < haystack.length) { while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { j = ptable[j - 1]; } i++; j++; if (j - 1 == needle.length) { size_t start = i - needle.length; result.ptr = haystack.ptr + start; result.length = haystack.length - start; break; } } /* if prefix table was allocated on the heap, free it */ if (ptable != s_prefix_table) { free(ptable); } return result; } cxmutstr cx_strstr_m( cxmutstr haystack, cxstring needle ) { cxstring result = cx_strstr(cx_strcast(haystack), needle); return (cxmutstr) {(char *) result.ptr, result.length}; } size_t cx_strsplit( cxstring string, cxstring delim, size_t limit, cxstring *output ) { /* special case: output limit is zero */ if (limit == 0) return 0; /* special case: delimiter is empty */ if (delim.length == 0) { output[0] = string; return 1; } /* special cases: delimiter is at least as large as the string */ if (delim.length >= string.length) { /* exact match */ if (cx_strcmp(string, delim) == 0) { output[0] = cx_strn(string.ptr, 0); output[1] = cx_strn(string.ptr + string.length, 0); return 2; } else /* no match possible */ { output[0] = string; return 1; } } size_t n = 0; cxstring curpos = string; while (1) { ++n; cxstring match = cx_strstr(curpos, delim); if (match.length > 0) { /* is the limit reached? */ if (n < limit) { /* copy the current string to the array */ cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); output[n - 1] = item; size_t processed = item.length + delim.length; curpos.ptr += processed; curpos.length -= processed; } else { /* limit reached, copy the _full_ remaining string */ output[n - 1] = curpos; break; } } else { /* no more matches, copy last string */ output[n - 1] = curpos; break; } } return n; } size_t cx_strsplit_a( CxAllocator *allocator, cxstring string, cxstring delim, size_t limit, cxstring **output ) { /* find out how many splits we're going to make and allocate memory */ size_t n = 0; cxstring curpos = string; while (1) { ++n; cxstring match = cx_strstr(curpos, delim); if (match.length > 0) { /* is the limit reached? */ if (n < limit) { size_t processed = match.ptr - curpos.ptr + delim.length; curpos.ptr += processed; curpos.length -= processed; } else { /* limit reached */ break; } } else { /* no more matches */ break; } } *output = cxCalloc(allocator, n, sizeof(cxstring)); return cx_strsplit(string, delim, n, *output); } size_t cx_strsplit_m( cxmutstr string, cxstring delim, size_t limit, cxmutstr *output ) { return cx_strsplit(cx_strcast(string), delim, limit, (cxstring *) output); } size_t cx_strsplit_ma( CxAllocator *allocator, cxmutstr string, cxstring delim, size_t limit, cxmutstr **output ) { return cx_strsplit_a(allocator, cx_strcast(string), delim, limit, (cxstring **) output); } int cx_strcmp( cxstring s1, cxstring s2 ) { if (s1.length == s2.length) { return memcmp(s1.ptr, s2.ptr, s1.length); } else if (s1.length > s2.length) { return 1; } else { return -1; } } int cx_strcasecmp( cxstring s1, cxstring s2 ) { if (s1.length == s2.length) { #ifdef _WIN32 return _strnicmp(s1.ptr, s2.ptr, s1.length); #else return strncasecmp(s1.ptr, s2.ptr, s1.length); #endif } else if (s1.length > s2.length) { return 1; } else { return -1; } } cxmutstr cx_strdup_a( CxAllocator *allocator, cxstring string ) { cxmutstr result = { cxMalloc(allocator, string.length + 1), string.length }; if (result.ptr == NULL) { result.length = 0; return result; } memcpy(result.ptr, string.ptr, string.length); result.ptr[string.length] = '\0'; return result; } cxstring cx_strtrim(cxstring string) { cxstring result = string; // TODO: optimize by comparing multiple bytes at once while (result.length > 0 && isspace(*result.ptr)) { result.ptr++; result.length--; } while (result.length > 0 && isspace(result.ptr[result.length - 1])) { result.length--; } return result; } cxmutstr cx_strtrim_m(cxmutstr string) { cxstring result = cx_strtrim(cx_strcast(string)); return (cxmutstr) {(char *) result.ptr, result.length}; } bool cx_strprefix( cxstring string, cxstring prefix ) { if (string.length < prefix.length) return false; return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; } bool cx_strsuffix( cxstring string, cxstring suffix ) { if (string.length < suffix.length) return false; return memcmp(string.ptr + string.length - suffix.length, suffix.ptr, suffix.length) == 0; } bool cx_strcaseprefix( cxstring string, cxstring prefix ) { if (string.length < prefix.length) return false; #ifdef _WIN32 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; #else return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0; #endif } bool cx_strcasesuffix( cxstring string, cxstring suffix ) { if (string.length < suffix.length) return false; #ifdef _WIN32 return _strnicmp(string.ptr+string.length-suffix.length, suffix.ptr, suffix.length) == 0; #else return strncasecmp(string.ptr + string.length - suffix.length, suffix.ptr, suffix.length) == 0; #endif } void cx_strlower(cxmutstr string) { cx_for_n(i, string.length) { string.ptr[i] = (char) tolower(string.ptr[i]); } } void cx_strupper(cxmutstr string) { cx_for_n(i, string.length) { string.ptr[i] = (char) toupper(string.ptr[i]); } } #define REPLACE_INDEX_BUFFER_MAX 100 struct cx_strreplace_ibuf { size_t *buf; struct cx_strreplace_ibuf *next; unsigned int len; }; static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { while (buf) { struct cx_strreplace_ibuf *next = buf->next; free(buf->buf); free(buf); buf = next; } } cxmutstr cx_strreplacen_a( CxAllocator *allocator, cxstring str, cxstring pattern, cxstring replacement, size_t replmax ) { if (pattern.length == 0 || pattern.length > str.length || replmax == 0) return cx_strdup_a(allocator, str); /* Compute expected buffer length */ size_t ibufmax = str.length / pattern.length; size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { ibuflen = REPLACE_INDEX_BUFFER_MAX; } /* Allocate first index buffer */ struct cx_strreplace_ibuf *firstbuf, *curbuf; firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); if (!firstbuf) return cx_mutstrn(NULL, 0); firstbuf->buf = calloc(ibuflen, sizeof(size_t)); if (!firstbuf->buf) { free(firstbuf); return cx_mutstrn(NULL, 0); } /* Search occurrences */ cxstring searchstr = str; size_t found = 0; do { cxstring match = cx_strstr(searchstr, pattern); if (match.length > 0) { /* Allocate next buffer in chain, if required */ if (curbuf->len == ibuflen) { struct cx_strreplace_ibuf *nextbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); if (!nextbuf) { cx_strrepl_free_ibuf(firstbuf); return cx_mutstrn(NULL, 0); } nextbuf->buf = calloc(ibuflen, sizeof(size_t)); if (!nextbuf->buf) { free(nextbuf); cx_strrepl_free_ibuf(firstbuf); return cx_mutstrn(NULL, 0); } curbuf->next = nextbuf; curbuf = nextbuf; } /* Record match index */ found++; size_t idx = match.ptr - str.ptr; curbuf->buf[curbuf->len++] = idx; searchstr.ptr = match.ptr + pattern.length; searchstr.length = str.length - idx - pattern.length; } else { break; } } while (searchstr.length > 0 && found < replmax); /* Allocate result string */ cxmutstr result; { ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; size_t rcount = 0; curbuf = firstbuf; do { rcount += curbuf->len; curbuf = curbuf->next; } while (curbuf); result.length = str.length + rcount * adjlen; result.ptr = cxMalloc(allocator, result.length + 1); if (!result.ptr) { cx_strrepl_free_ibuf(firstbuf); return cx_mutstrn(NULL, 0); } } /* Build result string */ curbuf = firstbuf; size_t srcidx = 0; char *destptr = result.ptr; do { for (size_t i = 0; i < curbuf->len; i++) { /* Copy source part up to next match*/ size_t idx = curbuf->buf[i]; size_t srclen = idx - srcidx; if (srclen > 0) { memcpy(destptr, str.ptr + srcidx, srclen); destptr += srclen; srcidx += srclen; } /* Copy the replacement and skip the source pattern */ srcidx += pattern.length; memcpy(destptr, replacement.ptr, replacement.length); destptr += replacement.length; } curbuf = curbuf->next; } while (curbuf); memcpy(destptr, str.ptr + srcidx, str.length - srcidx); /* Result is guaranteed to be zero-terminated */ result.ptr[result.length] = '\0'; /* Free index buffer */ cx_strrepl_free_ibuf(firstbuf); return result; }