--- a/ucx/string.c Mon Feb 04 14:46:11 2019 +0100 +++ b/ucx/string.c Mon Feb 04 17:49:50 2019 +0100 @@ -1,7 +1,7 @@ /* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * - * Copyright 2016 Olaf Wintermann. All rights reserved. + * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,15 +26,16 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include "ucx/string.h" + +#include "ucx/allocator.h" + #include <stdlib.h> #include <string.h> #include <stdarg.h> #include <stdint.h> #include <ctype.h> -#include "string.h" -#include "allocator.h" - sstr_t sstr(char *cstring) { sstr_t string; string.ptr = cstring; @@ -49,13 +50,33 @@ return string; } -size_t sstrnlen(size_t n, sstr_t s, ...) { +scstr_t scstr(const char *cstring) { + scstr_t string; + string.ptr = cstring; + string.length = strlen(cstring); + return string; +} + +scstr_t scstrn(const char *cstring, size_t length) { + scstr_t string; + string.ptr = cstring; + string.length = length; + return string; +} + + +size_t scstrnlen(size_t n, ...) { va_list ap; - size_t size = s.length; - va_start(ap, s); + va_start(ap, n); + + size_t size = 0; - for (size_t i = 1 ; i < n ; i++) { - sstr_t str = va_arg(ap, sstr_t); + for (size_t i = 0 ; i < n ; i++) { + scstr_t str = va_arg(ap, scstr_t); + if(SIZE_MAX - str.length < size) { + size = SIZE_MAX; + break; + } size += str.length; } va_end(ap); @@ -66,8 +87,7 @@ static sstr_t sstrvcat_a( UcxAllocator *a, size_t count, - sstr_t s1, - sstr_t s2, + scstr_t s1, va_list ap) { sstr_t str; str.ptr = NULL; @@ -76,7 +96,13 @@ return str; } - sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t)); + scstr_t s2 = va_arg (ap, scstr_t); + + if(((size_t)-1) - s1.length < s2.length) { + return str; + } + + scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t)); if(!strings) { return str; } @@ -84,16 +110,25 @@ // get all args and overall length strings[0] = s1; strings[1] = s2; - size_t strlen = s1.length + s2.length; + size_t slen = s1.length + s2.length; + int error = 0; for (size_t i=2;i<count;i++) { - sstr_t s = va_arg (ap, sstr_t); + scstr_t s = va_arg (ap, scstr_t); strings[i] = s; - strlen += s.length; + if(((size_t)-1) - s.length < slen) { + error = 1; + break; + } + slen += s.length; + } + if(error) { + free(strings); + return str; } // create new string - str.ptr = (char*) almalloc(a, strlen + 1); - str.length = strlen; + str.ptr = (char*) almalloc(a, slen + 1); + str.length = slen; if(!str.ptr) { free(strings); str.length = 0; @@ -103,7 +138,7 @@ // concatenate strings size_t pos = 0; for (size_t i=0;i<count;i++) { - sstr_t s = strings[i]; + scstr_t s = strings[i]; memcpy(str.ptr + pos, s.ptr, s.length); pos += s.length; } @@ -115,20 +150,42 @@ return str; } -sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) { +sstr_t scstrcat(size_t count, scstr_t s1, ...) { va_list ap; - va_start(ap, s2); - sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap); + va_start(ap, s1); + sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap); + va_end(ap); + return s; +} + +sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) { + va_list ap; + va_start(ap, s1); + sstr_t s = sstrvcat_a(a, count, s1, ap); va_end(ap); return s; } -sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) { - va_list ap; - va_start(ap, s2); - sstr_t s = sstrvcat_a(a, count, s1, s2, ap); - va_end(ap); - return s; +static int ucx_substring( + size_t str_length, + size_t start, + size_t length, + size_t *newlen, + size_t *newpos) +{ + *newlen = 0; + *newpos = 0; + + if(start > str_length) { + return 0; + } + + if(length > str_length - start) { + length = str_length - start; + } + *newlen = length; + *newpos = start; + return 1; } sstr_t sstrsubs(sstr_t s, size_t start) { @@ -136,44 +193,80 @@ } sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { - sstr_t new_sstr; - if (start >= s.length) { - new_sstr.ptr = NULL; - new_sstr.length = 0; - } else { - if (length > s.length-start) { - length = s.length-start; + size_t pos; + sstr_t ret = { NULL, 0 }; + if(ucx_substring(s.length, start, length, &ret.length, &pos)) { + ret.ptr = s.ptr + pos; + } + return ret; +} + +scstr_t scstrsubs(scstr_t string, size_t start) { + return scstrsubsl(string, start, string.length-start); +} + +scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) { + size_t pos; + scstr_t ret = { NULL, 0 }; + if(ucx_substring(s.length, start, length, &ret.length, &pos)) { + ret.ptr = s.ptr + pos; + } + return ret; +} + + +static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) { + for(size_t i=0;i<length;i++) { + if(str[i] == chr) { + *pos = i; + return 1; } - new_sstr.ptr = &s.ptr[start]; - new_sstr.length = length; } - return new_sstr; + return 0; +} + +static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) { + if(length > 0) { + for(size_t i=length ; i>0 ; i--) { + if(str[i-1] == chr) { + *pos = i-1; + return 1; + } + } + } + return 0; } sstr_t sstrchr(sstr_t s, int c) { - for(size_t i=0;i<s.length;i++) { - if(s.ptr[i] == c) { - return sstrsubs(s, i); - } + size_t pos = 0; + if(ucx_strchr(s.ptr, s.length, c, &pos)) { + return sstrsubs(s, pos); } - sstr_t n; - n.ptr = NULL; - n.length = 0; - return n; + return sstrn(NULL, 0); } sstr_t sstrrchr(sstr_t s, int c) { - if (s.length > 0) { - for(size_t i=s.length;i>0;i--) { - if(s.ptr[i-1] == c) { - return sstrsubs(s, i-1); - } - } + size_t pos = 0; + if(ucx_strrchr(s.ptr, s.length, c, &pos)) { + return sstrsubs(s, pos); } - sstr_t n; - n.ptr = NULL; - n.length = 0; - return n; + return sstrn(NULL, 0); +} + +scstr_t scstrchr(scstr_t s, int c) { + size_t pos = 0; + if(ucx_strchr(s.ptr, s.length, c, &pos)) { + return scstrsubs(s, pos); + } + return scstrn(NULL, 0); +} + +scstr_t scstrrchr(scstr_t s, int c) { + size_t pos = 0; + if(ucx_strrchr(s.ptr, s.length, c, &pos)) { + return scstrsubs(s, pos); + } + return scstrn(NULL, 0); } #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ @@ -184,13 +277,21 @@ else ((size_t*)ptable)[index] = src;\ } while (0); -sstr_t sstrstr(sstr_t string, sstr_t match) { - if (match.length == 0) { - return string; + +static const char* ucx_strstr( + const char *str, + size_t length, + const char *match, + size_t matchlen, + size_t *newlen) +{ + *newlen = length; + if (matchlen == 0) { + return str; } - /* prepare default return value in case of no match */ - sstr_t result = sstrn(NULL, 0); + const char *result = NULL; + size_t resultlen = 0; /* * IMPORTANT: @@ -205,9 +306,9 @@ /* check pattern length and use appropriate prefix table */ /* if the pattern exceeds static prefix table, allocate on the heap */ - register int useheap = match.length > 255; + register int useheap = matchlen > 255; register void* ptable = useheap ? - calloc(match.length+1, sizeof(size_t)): s_prefix_table; + calloc(matchlen+1, sizeof(size_t)): s_prefix_table; /* keep counter in registers */ register size_t i, j; @@ -215,8 +316,8 @@ /* fill prefix table */ i = 0; j = 0; ptable_w(useheap, ptable, i, j); - while (i < match.length) { - while (j >= 1 && match.ptr[j-1] != match.ptr[i]) { + while (i < matchlen) { + while (j >= 1 && match[j-1] != match[i]) { ptable_r(j, useheap, ptable, j-1); } i++; j++; @@ -225,15 +326,15 @@ /* search */ i = 0; j = 1; - while (i < string.length) { - while (j >= 1 && string.ptr[i] != match.ptr[j-1]) { + while (i < length) { + while (j >= 1 && str[i] != match[j-1]) { ptable_r(j, useheap, ptable, j-1); } i++; j++; - if (j-1 == match.length) { - size_t start = i - match.length; - result.ptr = string.ptr + start; - result.length = string.length - start; + if (j-1 == matchlen) { + size_t start = i - matchlen; + result = str + start; + resultlen = length - start; break; } } @@ -243,17 +344,54 @@ free(ptable); } + *newlen = resultlen; + return result; +} + +sstr_t scstrsstr(sstr_t string, scstr_t match) { + sstr_t result; + + size_t reslen; + const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); + if(!resstr) { + result.ptr = NULL; + result.length = 0; + return result; + } + + size_t pos = resstr - string.ptr; + result.ptr = string.ptr + pos; + result.length = reslen; + + return result; +} + +scstr_t scstrscstr(scstr_t string, scstr_t match) { + scstr_t result; + + size_t reslen; + const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); + if(!resstr) { + result.ptr = NULL; + result.length = 0; + return result; + } + + size_t pos = resstr - string.ptr; + result.ptr = string.ptr + pos; + result.length = reslen; + return result; } #undef ptable_r #undef ptable_w -sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { - return sstrsplit_a(ucx_default_allocator(), s, d, n); +sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) { + return scstrsplit_a(ucx_default_allocator(), s, d, n); } -sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) { +sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) { if (s.length == 0 || d.length == 0) { *n = -1; return NULL; @@ -268,20 +406,24 @@ } else /* no match possible */ { *n = 1; sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); - *result = sstrdup_a(allocator, s); + if(result) { + *result = sstrdup_a(allocator, s); + } else { + *n = -2; + } return result; } } ssize_t nmax = *n; size_t arrlen = 16; - sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t)); + sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t)); if (result) { - sstr_t curpos = s; + scstr_t curpos = s; ssize_t j = 1; while (1) { - sstr_t match; + scstr_t match; /* optimize for one byte delimiters */ if (d.length == 1) { match = curpos; @@ -293,13 +435,13 @@ match.length--; } } else { - match = sstrstr(curpos, d); + match = scstrscstr(curpos, d); } if (match.length > 0) { /* is this our last try? */ if (nmax == 0 || j < nmax) { /* copy the current string to the array */ - sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr); + scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr); result[j-1] = sstrdup_a(allocator, item); size_t processed = item.length + d.length; curpos.ptr += processed; @@ -309,8 +451,12 @@ j++; if (j > arrlen) { arrlen *= 2; - sstr_t* reallocated = (sstr_t*) alrealloc( - allocator, result, arrlen*sizeof(sstr_t)); + size_t reallocsz; + sstr_t* reallocated = NULL; + if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) { + reallocated = (sstr_t*) alrealloc( + allocator, result, reallocsz); + } if (reallocated) { result = reallocated; } else { @@ -341,7 +487,7 @@ return result; } -int sstrcmp(sstr_t s1, sstr_t s2) { +int scstrcmp(scstr_t s1, scstr_t s2) { if (s1.length == s2.length) { return memcmp(s1.ptr, s2.ptr, s1.length); } else if (s1.length > s2.length) { @@ -351,7 +497,7 @@ } } -int sstrcasecmp(sstr_t s1, sstr_t s2) { +int scstrcasecmp(scstr_t s1, scstr_t s2) { if (s1.length == s2.length) { #ifdef _WIN32 return _strnicmp(s1.ptr, s2.ptr, s1.length); @@ -365,11 +511,11 @@ } } -sstr_t sstrdup(sstr_t s) { +sstr_t scstrdup(scstr_t s) { return sstrdup_a(ucx_default_allocator(), s); } -sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) { +sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) { sstr_t newstring; newstring.ptr = (char*)almalloc(allocator, s.length + 1); if (newstring.ptr) { @@ -384,21 +530,38 @@ return newstring; } -sstr_t sstrtrim(sstr_t string) { - sstr_t newstr = string; + +static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) { + const char *newptr = s; + size_t length = len; - while (newstr.length > 0 && isspace(*newstr.ptr)) { - newstr.ptr++; - newstr.length--; + while(length > 0 && isspace(*newptr)) { + newptr++; + length--; } - while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) { - newstr.length--; + while(length > 0 && isspace(newptr[length-1])) { + length--; } + *newlen = length; + return newptr - s; +} + +sstr_t sstrtrim(sstr_t string) { + sstr_t newstr; + newstr.ptr = string.ptr + + ucx_strtrim(string.ptr, string.length, &newstr.length); return newstr; } -int sstrprefix(sstr_t string, sstr_t prefix) { +scstr_t scstrtrim(scstr_t string) { + scstr_t newstr; + newstr.ptr = string.ptr + + ucx_strtrim(string.ptr, string.length, &newstr.length); + return newstr; +} + +int scstrprefix(scstr_t string, scstr_t prefix) { if (string.length == 0) { return prefix.length == 0; } @@ -413,7 +576,7 @@ } } -int sstrsuffix(sstr_t string, sstr_t suffix) { +int scstrsuffix(scstr_t string, scstr_t suffix) { if (string.length == 0) { return suffix.length == 0; } @@ -429,7 +592,7 @@ } } -sstr_t sstrlower(sstr_t string) { +sstr_t scstrlower(scstr_t string) { sstr_t ret = sstrdup(string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = tolower(ret.ptr[i]); @@ -437,7 +600,7 @@ return ret; } -sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) { +sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) { sstr_t ret = sstrdup_a(allocator, string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = tolower(ret.ptr[i]); @@ -445,7 +608,7 @@ return ret; } -sstr_t sstrupper(sstr_t string) { +sstr_t scstrupper(scstr_t string) { sstr_t ret = sstrdup(string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = toupper(ret.ptr[i]); @@ -453,10 +616,24 @@ return ret; } -sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) { +sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) { sstr_t ret = sstrdup_a(allocator, string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = toupper(ret.ptr[i]); } return ret; } + +// type adjustment functions +scstr_t ucx_sc2sc(scstr_t str) { + return str; +} +scstr_t ucx_ss2sc(sstr_t str) { + scstr_t cs; + cs.ptr = str.ptr; + cs.length = str.length; + return cs; +} +scstr_t ucx_ss2c_s(scstr_t c) { + return c; +}