diff -r 11f3bb408051 -r 62921b370c60 ucx/string.c --- a/ucx/string.c Wed Nov 22 12:59:13 2017 +0100 +++ b/ucx/string.c Sun Jan 21 12:13:09 2018 +0100 @@ -1,7 +1,7 @@ /* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * - * Copyright 2015 Olaf Wintermann. All rights reserved. + * Copyright 2016 Olaf Wintermann. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "string.h" @@ -175,6 +176,79 @@ return n; } +#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ + ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) + +#define ptable_w(useheap, ptable, index, src) do {\ + if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ + else ((size_t*)ptable)[index] = src;\ + } while (0); + +sstr_t sstrstr(sstr_t string, sstr_t match) { + if (match.length == 0) { + return string; + } + + /* prepare default return value in case of no match */ + sstr_t result = sstrn(NULL, 0); + + /* + * IMPORTANT: + * our prefix table contains the prefix length PLUS ONE + * this is our decision, because we want to use the full range of size_t + * the original algorithm needs a (-1) at one single place + * and we want to avoid that + */ + + /* static prefix table */ + static uint8_t s_prefix_table[256]; + + /* check pattern length and use appropriate prefix table */ + /* if the pattern exceeds static prefix table, allocate on the heap */ + register int useheap = match.length > 255; + register void* ptable = useheap ? + calloc(match.length+1, sizeof(size_t)): s_prefix_table; + + /* keep counter in registers */ + register size_t i, j; + + /* fill prefix table */ + i = 0; j = 0; + ptable_w(useheap, ptable, i, j); + while (i < match.length) { + while (j >= 1 && match.ptr[j-1] != match.ptr[i]) { + ptable_r(j, useheap, ptable, j-1); + } + i++; j++; + ptable_w(useheap, ptable, i, j); + } + + /* search */ + i = 0; j = 1; + while (i < string.length) { + while (j >= 1 && string.ptr[i] != match.ptr[j-1]) { + ptable_r(j, useheap, ptable, j-1); + } + i++; j++; + if (j-1 == match.length) { + size_t start = i - match.length; + result.ptr = string.ptr + start; + result.length = string.length - start; + break; + } + } + + /* if prefix table was allocated on the heap, free it */ + if (ptable != s_prefix_table) { + free(ptable); + } + + return result; +} + +#undef ptable_r +#undef ptable_w + sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { return sstrsplit_a(ucx_default_allocator(), s, d, n); } @@ -184,70 +258,85 @@ *n = -1; return NULL; } - - sstr_t* result; - ssize_t nmax = *n; - *n = 1; - - /* special case: exact match - no processing needed */ - if (sstrcmp(s, d) == 0) { - *n = 0; - return NULL; + + /* special cases: delimiter is at least as large as the string */ + if (d.length >= s.length) { + /* exact match */ + if (sstrcmp(s, d) == 0) { + *n = 0; + return NULL; + } else /* no match possible */ { + *n = 1; + sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); + *result = sstrdup_a(allocator, s); + return result; + } } - sstr_t sv = sstrdup(s); - if (sv.length == 0) { - *n = -2; - return NULL; - } + + ssize_t nmax = *n; + size_t arrlen = 16; + sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t)); - for (size_t i = 0 ; i < s.length ; i++) { - if (sv.ptr[i] == d.ptr[0]) { - _Bool match = 1; - for (size_t j = 1 ; j < d.length ; j++) { - if (j+i < s.length) { - match &= (sv.ptr[i+j] == d.ptr[j]); + if (result) { + sstr_t curpos = s; + ssize_t j = 1; + while (1) { + sstr_t match; + /* optimize for one byte delimiters */ + if (d.length == 1) { + match = curpos; + for (size_t i = 0 ; i < curpos.length ; i++) { + if (curpos.ptr[i] == *(d.ptr)) { + match.ptr = curpos.ptr + i; + break; + } + match.length--; + } + } else { + match = sstrstr(curpos, d); + } + if (match.length > 0) { + /* is this our last try? */ + if (nmax == 0 || j < nmax) { + /* copy the current string to the array */ + sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr); + result[j-1] = sstrdup_a(allocator, item); + size_t processed = item.length + d.length; + curpos.ptr += processed; + curpos.length -= processed; + + /* allocate memory for the next string */ + j++; + if (j > arrlen) { + arrlen *= 2; + sstr_t* reallocated = (sstr_t*) alrealloc( + allocator, result, arrlen*sizeof(sstr_t)); + if (reallocated) { + result = reallocated; + } else { + for (ssize_t i = 0 ; i < j-1 ; i++) { + alfree(allocator, result[i].ptr); + } + alfree(allocator, result); + *n = -2; + return NULL; + } + } } else { - match = 0; + /* nmax reached, copy the _full_ remaining string */ + result[j-1] = sstrdup_a(allocator, curpos); break; } - } - if (match) { - (*n)++; - for (size_t j = 0 ; j < d.length ; j++) { - sv.ptr[i+j] = 0; - } - i += d.length; - } - } - if ((*n) == nmax) break; - } - result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)*(*n)); - - if (result) { - char *pptr = sv.ptr; - for (ssize_t i = 0 ; i < *n ; i++) { - size_t l = strlen(pptr); - char* ptr = (char*) almalloc(allocator, l + 1); - if (ptr) { - memcpy(ptr, pptr, l); - ptr[l] = 0; - - result[i] = sstrn(ptr, l); - pptr += l + d.length; } else { - for (ssize_t j = i-1 ; j >= 0 ; j--) { - alfree(allocator, result[j].ptr); - } - alfree(allocator, result); - *n = -2; + /* no more matches, copy last string */ + result[j-1] = sstrdup_a(allocator, curpos); break; } } + *n = j; } else { *n = -2; } - - free(sv.ptr); return result; }