ucx/string.c

changeset 431
bb7da585debc
parent 324
ce13a778654a
child 440
7c4b9cba09ca
equal deleted inserted replaced
169:fe49cff3c571 431:bb7da585debc
1 /* 1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 * 3 *
4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved. 4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met: 7 * modification, are permitted provided that the following conditions are met:
8 * 8 *
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE. 26 * POSSIBILITY OF SUCH DAMAGE.
27 */ 27 */
28 28
29 #include "ucx/string.h" 29 #include "cx/string.h"
30 30 #include "cx/utils.h"
31 #include "ucx/allocator.h" 31
32
33 #include <stdlib.h>
34 #include <string.h> 32 #include <string.h>
35 #include <stdarg.h> 33 #include <stdarg.h>
36 #include <stdint.h>
37 #include <ctype.h> 34 #include <ctype.h>
38 35
39 #ifndef _WIN32 36 #ifndef _WIN32
40 #include <strings.h> /* for strncasecmp() */ 37
41 #endif /* _WIN32 */ 38 #include <strings.h> // for strncasecmp()
42 39
43 sstr_t sstr(char *cstring) { 40 #endif // _WIN32
44 sstr_t string; 41
45 string.ptr = cstring; 42 cxmutstr cx_mutstr(char *cstring) {
46 string.length = strlen(cstring); 43 return (cxmutstr) {cstring, strlen(cstring)};
47 return string; 44 }
48 } 45
49 46 cxmutstr cx_mutstrn(
50 sstr_t sstrn(char *cstring, size_t length) { 47 char *cstring,
51 sstr_t string; 48 size_t length
52 string.ptr = cstring; 49 ) {
53 string.length = length; 50 return (cxmutstr) {cstring, length};
54 return string; 51 }
55 } 52
56 53 cxstring cx_str(const char *cstring) {
57 scstr_t scstr(const char *cstring) { 54 return (cxstring) {cstring, strlen(cstring)};
58 scstr_t string; 55 }
59 string.ptr = cstring; 56
60 string.length = strlen(cstring); 57 cxstring cx_strn(
61 return string; 58 const char *cstring,
62 } 59 size_t length
63 60 ) {
64 scstr_t scstrn(const char *cstring, size_t length) { 61 return (cxstring) {cstring, length};
65 scstr_t string; 62 }
66 string.ptr = cstring; 63
67 string.length = length; 64 cxstring cx_strcast(cxmutstr str) {
68 return string; 65 return (cxstring) {str.ptr, str.length};
69 } 66 }
70 67
71 68 void cx_strfree(cxmutstr *str) {
72 size_t scstrnlen(size_t n, ...) { 69 free(str->ptr);
73 if (n == 0) return 0; 70 str->ptr = NULL;
74 71 str->length = 0;
72 }
73
74 void cx_strfree_a(
75 const CxAllocator *alloc,
76 cxmutstr *str
77 ) {
78 cxFree(alloc, str->ptr);
79 str->ptr = NULL;
80 str->length = 0;
81 }
82
83 size_t cx_strlen(
84 size_t count,
85 ...
86 ) {
87 if (count == 0) return 0;
88
75 va_list ap; 89 va_list ap;
76 va_start(ap, n); 90 va_start(ap, count);
77
78 size_t size = 0; 91 size_t size = 0;
79 92 cx_for_n(i, count) {
80 for (size_t i = 0 ; i < n ; i++) { 93 cxstring str = va_arg(ap, cxstring);
81 scstr_t str = va_arg(ap, scstr_t);
82 if(SIZE_MAX - str.length < size) {
83 size = SIZE_MAX;
84 break;
85 }
86 size += str.length; 94 size += str.length;
87 } 95 }
88 va_end(ap); 96 va_end(ap);
89 97
90 return size; 98 return size;
91 } 99 }
92 100
93 static sstr_t sstrvcat_a( 101 cxmutstr cx_strcat_ma(
94 UcxAllocator *a, 102 const CxAllocator *alloc,
103 cxmutstr str,
95 size_t count, 104 size_t count,
96 scstr_t s1, 105 ...
97 va_list ap) { 106 ) {
98 sstr_t str; 107 if (count == 0) return str;
99 str.ptr = NULL; 108
100 str.length = 0; 109 cxstring *strings = calloc(count, sizeof(cxstring));
101 if(count < 2) { 110 if (!strings) abort();
102 return str; 111
103 } 112 va_list ap;
104 113 va_start(ap, count);
105 scstr_t s2 = va_arg (ap, scstr_t); 114
106
107 if(((size_t)-1) - s1.length < s2.length) {
108 return str;
109 }
110
111 scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t));
112 if(!strings) {
113 return str;
114 }
115
116 // get all args and overall length 115 // get all args and overall length
117 strings[0] = s1; 116 size_t slen = str.length;
118 strings[1] = s2; 117 cx_for_n(i, count) {
119 size_t slen = s1.length + s2.length; 118 cxstring s = va_arg (ap, cxstring);
120 int error = 0;
121 for (size_t i=2;i<count;i++) {
122 scstr_t s = va_arg (ap, scstr_t);
123 strings[i] = s; 119 strings[i] = s;
124 if(((size_t)-1) - s.length < slen) {
125 error = 1;
126 break;
127 }
128 slen += s.length; 120 slen += s.length;
129 } 121 }
130 if(error) { 122 va_end(ap);
131 free(strings); 123
132 return str; 124 // reallocate or create new string
133 } 125 if (str.ptr == NULL) {
134 126 str.ptr = cxMalloc(alloc, slen + 1);
135 // create new string 127 } else {
136 str.ptr = (char*) almalloc(a, slen + 1); 128 str.ptr = cxRealloc(alloc, str.ptr, slen + 1);
129 }
130 if (str.ptr == NULL) abort();
131
132 // concatenate strings
133 size_t pos = str.length;
137 str.length = slen; 134 str.length = slen;
138 if(!str.ptr) { 135 cx_for_n(i, count) {
139 free(strings); 136 cxstring s = strings[i];
140 str.length = 0;
141 return str;
142 }
143
144 // concatenate strings
145 size_t pos = 0;
146 for (size_t i=0;i<count;i++) {
147 scstr_t s = strings[i];
148 memcpy(str.ptr + pos, s.ptr, s.length); 137 memcpy(str.ptr + pos, s.ptr, s.length);
149 pos += s.length; 138 pos += s.length;
150 } 139 }
151 140
141 // terminate string
152 str.ptr[str.length] = '\0'; 142 str.ptr[str.length] = '\0';
153 143
144 // free temporary array
154 free(strings); 145 free(strings);
155 146
156 return str; 147 return str;
157 } 148 }
158 149
159 sstr_t scstrcat(size_t count, scstr_t s1, ...) { 150 cxstring cx_strsubs(
160 va_list ap; 151 cxstring string,
161 va_start(ap, s1); 152 size_t start
162 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap); 153 ) {
163 va_end(ap); 154 return cx_strsubsl(string, start, string.length - start);
164 return s; 155 }
165 } 156
166 157 cxmutstr cx_strsubs_m(
167 sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) { 158 cxmutstr string,
168 va_list ap; 159 size_t start
169 va_start(ap, s1); 160 ) {
170 sstr_t s = sstrvcat_a(a, count, s1, ap); 161 return cx_strsubsl_m(string, start, string.length - start);
171 va_end(ap); 162 }
172 return s; 163
173 } 164 cxstring cx_strsubsl(
174 165 cxstring string,
175 static int ucx_substring(
176 size_t str_length,
177 size_t start, 166 size_t start,
178 size_t length, 167 size_t length
179 size_t *newlen, 168 ) {
180 size_t *newpos) 169 if (start > string.length) {
181 { 170 return (cxstring) {NULL, 0};
182 *newlen = 0; 171 }
183 *newpos = 0; 172
184 173 size_t rem_len = string.length - start;
185 if(start > str_length) { 174 if (length > rem_len) {
186 return 0; 175 length = rem_len;
187 } 176 }
188 177
189 if(length > str_length - start) { 178 return (cxstring) {string.ptr + start, length};
190 length = str_length - start; 179 }
191 } 180
192 *newlen = length; 181 cxmutstr cx_strsubsl_m(
193 *newpos = start; 182 cxmutstr string,
194 return 1; 183 size_t start,
195 } 184 size_t length
196 185 ) {
197 sstr_t sstrsubs(sstr_t s, size_t start) { 186 cxstring result = cx_strsubsl(cx_strcast(string), start, length);
198 return sstrsubsl (s, start, s.length-start); 187 return (cxmutstr) {(char *) result.ptr, result.length};
199 } 188 }
200 189
201 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { 190 cxstring cx_strchr(
202 size_t pos; 191 cxstring string,
203 sstr_t ret = { NULL, 0 }; 192 int chr
204 if(ucx_substring(s.length, start, length, &ret.length, &pos)) { 193 ) {
205 ret.ptr = s.ptr + pos; 194 chr = 0xFF & chr;
206 } 195 // TODO: improve by comparing multiple bytes at once
207 return ret; 196 cx_for_n(i, string.length) {
208 } 197 if (string.ptr[i] == chr) {
209 198 return cx_strsubs(string, i);
210 scstr_t scstrsubs(scstr_t string, size_t start) { 199 }
211 return scstrsubsl(string, start, string.length-start); 200 }
212 } 201 return (cxstring) {NULL, 0};
213 202 }
214 scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) { 203
215 size_t pos; 204 cxmutstr cx_strchr_m(
216 scstr_t ret = { NULL, 0 }; 205 cxmutstr string,
217 if(ucx_substring(s.length, start, length, &ret.length, &pos)) { 206 int chr
218 ret.ptr = s.ptr + pos; 207 ) {
219 } 208 cxstring result = cx_strchr(cx_strcast(string), chr);
220 return ret; 209 return (cxmutstr) {(char *) result.ptr, result.length};
221 } 210 }
222 211
223 212 cxstring cx_strrchr(
224 static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) { 213 cxstring string,
225 for(size_t i=0;i<length;i++) { 214 int chr
226 if(str[i] == chr) { 215 ) {
227 *pos = i; 216 chr = 0xFF & chr;
228 return 1; 217 size_t i = string.length;
229 } 218 while (i > 0) {
230 } 219 i--;
231 return 0; 220 // TODO: improve by comparing multiple bytes at once
232 } 221 if (string.ptr[i] == chr) {
233 222 return cx_strsubs(string, i);
234 static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) { 223 }
235 if(length > 0) { 224 }
236 for(size_t i=length ; i>0 ; i--) { 225 return (cxstring) {NULL, 0};
237 if(str[i-1] == chr) { 226 }
238 *pos = i-1; 227
239 return 1; 228 cxmutstr cx_strrchr_m(
240 } 229 cxmutstr string,
241 } 230 int chr
242 } 231 ) {
243 return 0; 232 cxstring result = cx_strrchr(cx_strcast(string), chr);
244 } 233 return (cxmutstr) {(char *) result.ptr, result.length};
245 234 }
246 sstr_t sstrchr(sstr_t s, int c) { 235
247 size_t pos = 0; 236 #ifndef CX_STRSTR_SBO_SIZE
248 if(ucx_strchr(s.ptr, s.length, c, &pos)) { 237 #define CX_STRSTR_SBO_SIZE 512
249 return sstrsubs(s, pos); 238 #endif
250 } 239 unsigned const cx_strstr_sbo_size = CX_STRSTR_SBO_SIZE;
251 return sstrn(NULL, 0); 240
252 } 241 cxstring cx_strstr(
253 242 cxstring haystack,
254 sstr_t sstrrchr(sstr_t s, int c) { 243 cxstring needle
255 size_t pos = 0; 244 ) {
256 if(ucx_strrchr(s.ptr, s.length, c, &pos)) { 245 if (needle.length == 0) {
257 return sstrsubs(s, pos); 246 return haystack;
258 } 247 }
259 return sstrn(NULL, 0); 248
260 } 249 // optimize for single-char needles
261 250 if (needle.length == 1) {
262 scstr_t scstrchr(scstr_t s, int c) { 251 return cx_strchr(haystack, *needle.ptr);
263 size_t pos = 0; 252 }
264 if(ucx_strchr(s.ptr, s.length, c, &pos)) { 253
265 return scstrsubs(s, pos);
266 }
267 return scstrn(NULL, 0);
268 }
269
270 scstr_t scstrrchr(scstr_t s, int c) {
271 size_t pos = 0;
272 if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
273 return scstrsubs(s, pos);
274 }
275 return scstrn(NULL, 0);
276 }
277
278 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
279 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
280
281 #define ptable_w(useheap, ptable, index, src) do {\
282 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
283 else ((size_t*)ptable)[index] = src;\
284 } while (0);
285
286
287 static const char* ucx_strstr(
288 const char *str,
289 size_t length,
290 const char *match,
291 size_t matchlen,
292 size_t *newlen)
293 {
294 *newlen = length;
295 if (matchlen == 0) {
296 return str;
297 }
298
299 const char *result = NULL;
300 size_t resultlen = 0;
301
302 /* 254 /*
303 * IMPORTANT: 255 * IMPORTANT:
304 * our prefix table contains the prefix length PLUS ONE 256 * Our prefix table contains the prefix length PLUS ONE
305 * this is our decision, because we want to use the full range of size_t 257 * this is our decision, because we want to use the full range of size_t.
306 * the original algorithm needs a (-1) at one single place 258 * The original algorithm needs a (-1) at one single place,
307 * and we want to avoid that 259 * and we want to avoid that.
308 */ 260 */
309 261
310 /* static prefix table */ 262 // local prefix table
311 static uint8_t s_prefix_table[256]; 263 size_t s_prefix_table[CX_STRSTR_SBO_SIZE];
312 264
313 /* check pattern length and use appropriate prefix table */ 265 // check needle length and use appropriate prefix table
314 /* if the pattern exceeds static prefix table, allocate on the heap */ 266 // if the pattern exceeds static prefix table, allocate on the heap
315 register int useheap = matchlen > 255; 267 bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
316 register void* ptable = useheap ? 268 register size_t *ptable = useheap ? calloc(needle.length + 1,
317 calloc(matchlen+1, sizeof(size_t)): s_prefix_table; 269 sizeof(size_t)) : s_prefix_table;
318 270
319 /* keep counter in registers */ 271 // keep counter in registers
320 register size_t i, j; 272 register size_t i, j;
321 273
322 /* fill prefix table */ 274 // fill prefix table
323 i = 0; j = 0; 275 i = 0;
324 ptable_w(useheap, ptable, i, j); 276 j = 0;
325 while (i < matchlen) { 277 ptable[i] = j;
326 while (j >= 1 && match[j-1] != match[i]) { 278 while (i < needle.length) {
327 ptable_r(j, useheap, ptable, j-1); 279 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
328 } 280 j = ptable[j - 1];
329 i++; j++; 281 }
330 ptable_w(useheap, ptable, i, j); 282 i++;
331 } 283 j++;
332 284 ptable[i] = j;
333 /* search */ 285 }
334 i = 0; j = 1; 286
335 while (i < length) { 287 // search
336 while (j >= 1 && str[i] != match[j-1]) { 288 cxstring result = {NULL, 0};
337 ptable_r(j, useheap, ptable, j-1); 289 i = 0;
338 } 290 j = 1;
339 i++; j++; 291 while (i < haystack.length) {
340 if (j-1 == matchlen) { 292 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
341 size_t start = i - matchlen; 293 j = ptable[j - 1];
342 result = str + start; 294 }
343 resultlen = length - start; 295 i++;
296 j++;
297 if (j - 1 == needle.length) {
298 size_t start = i - needle.length;
299 result.ptr = haystack.ptr + start;
300 result.length = haystack.length - start;
344 break; 301 break;
345 } 302 }
346 } 303 }
347 304
348 /* if prefix table was allocated on the heap, free it */ 305 // if prefix table was allocated on the heap, free it
349 if (ptable != s_prefix_table) { 306 if (ptable != s_prefix_table) {
350 free(ptable); 307 free(ptable);
351 } 308 }
352 309
353 *newlen = resultlen;
354 return result; 310 return result;
355 } 311 }
356 312
357 sstr_t scstrsstr(sstr_t string, scstr_t match) { 313 cxmutstr cx_strstr_m(
358 sstr_t result; 314 cxmutstr haystack,
359 315 cxstring needle
360 size_t reslen; 316 ) {
361 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); 317 cxstring result = cx_strstr(cx_strcast(haystack), needle);
362 if(!resstr) { 318 return (cxmutstr) {(char *) result.ptr, result.length};
363 result.ptr = NULL; 319 }
364 result.length = 0; 320
365 return result; 321 size_t cx_strsplit(
366 } 322 cxstring string,
367 323 cxstring delim,
368 size_t pos = resstr - string.ptr; 324 size_t limit,
369 result.ptr = string.ptr + pos; 325 cxstring *output
370 result.length = reslen; 326 ) {
371 327 // special case: output limit is zero
372 return result; 328 if (limit == 0) return 0;
373 } 329
374 330 // special case: delimiter is empty
375 scstr_t scstrscstr(scstr_t string, scstr_t match) { 331 if (delim.length == 0) {
376 scstr_t result; 332 output[0] = string;
377 333 return 1;
378 size_t reslen; 334 }
379 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); 335
380 if(!resstr) { 336 // special cases: delimiter is at least as large as the string
381 result.ptr = NULL; 337 if (delim.length >= string.length) {
382 result.length = 0; 338 // exact match
383 return result; 339 if (cx_strcmp(string, delim) == 0) {
384 } 340 output[0] = cx_strn(string.ptr, 0);
385 341 output[1] = cx_strn(string.ptr + string.length, 0);
386 size_t pos = resstr - string.ptr; 342 return 2;
387 result.ptr = string.ptr + pos; 343 } else {
388 result.length = reslen; 344 // no match possible
389 345 output[0] = string;
390 return result; 346 return 1;
391 } 347 }
392 348 }
393 #undef ptable_r 349
394 #undef ptable_w 350 size_t n = 0;
395 351 cxstring curpos = string;
396 sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) { 352 while (1) {
397 return scstrsplit_a(ucx_default_allocator(), s, d, n); 353 ++n;
398 } 354 cxstring match = cx_strstr(curpos, delim);
399 355 if (match.length > 0) {
400 sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) { 356 // is the limit reached?
401 if (s.length == 0 || d.length == 0) { 357 if (n < limit) {
402 *n = -1; 358 // copy the current string to the array
403 return NULL; 359 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
404 } 360 output[n - 1] = item;
405 361 size_t processed = item.length + delim.length;
406 /* special cases: delimiter is at least as large as the string */ 362 curpos.ptr += processed;
407 if (d.length >= s.length) { 363 curpos.length -= processed;
408 /* exact match */
409 if (sstrcmp(s, d) == 0) {
410 *n = 0;
411 return NULL;
412 } else /* no match possible */ {
413 *n = 1;
414 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
415 if(result) {
416 *result = sstrdup_a(allocator, s);
417 } else { 364 } else {
418 *n = -2; 365 // limit reached, copy the _full_ remaining string
419 } 366 output[n - 1] = curpos;
420 return result;
421 }
422 }
423
424 ssize_t nmax = *n;
425 size_t arrlen = 16;
426 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
427
428 if (result) {
429 scstr_t curpos = s;
430 ssize_t j = 1;
431 while (1) {
432 scstr_t match;
433 /* optimize for one byte delimiters */
434 if (d.length == 1) {
435 match = curpos;
436 for (size_t i = 0 ; i < curpos.length ; i++) {
437 if (curpos.ptr[i] == *(d.ptr)) {
438 match.ptr = curpos.ptr + i;
439 break;
440 }
441 match.length--;
442 }
443 } else {
444 match = scstrscstr(curpos, d);
445 }
446 if (match.length > 0) {
447 /* is this our last try? */
448 if (nmax == 0 || j < nmax) {
449 /* copy the current string to the array */
450 scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr);
451 result[j-1] = sstrdup_a(allocator, item);
452 size_t processed = item.length + d.length;
453 curpos.ptr += processed;
454 curpos.length -= processed;
455
456 /* allocate memory for the next string */
457 j++;
458 if (j > arrlen) {
459 arrlen *= 2;
460 size_t reallocsz;
461 sstr_t* reallocated = NULL;
462 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
463 reallocated = (sstr_t*) alrealloc(
464 allocator, result, reallocsz);
465 }
466 if (reallocated) {
467 result = reallocated;
468 } else {
469 for (ssize_t i = 0 ; i < j-1 ; i++) {
470 alfree(allocator, result[i].ptr);
471 }
472 alfree(allocator, result);
473 *n = -2;
474 return NULL;
475 }
476 }
477 } else {
478 /* nmax reached, copy the _full_ remaining string */
479 result[j-1] = sstrdup_a(allocator, curpos);
480 break;
481 }
482 } else {
483 /* no more matches, copy last string */
484 result[j-1] = sstrdup_a(allocator, curpos);
485 break; 367 break;
486 } 368 }
487 } 369 } else {
488 *n = j; 370 // no more matches, copy last string
489 } else { 371 output[n - 1] = curpos;
490 *n = -2; 372 break;
491 } 373 }
492 374 }
493 return result; 375
494 } 376 return n;
495 377 }
496 int scstrcmp(scstr_t s1, scstr_t s2) { 378
379 size_t cx_strsplit_a(
380 const CxAllocator *allocator,
381 cxstring string,
382 cxstring delim,
383 size_t limit,
384 cxstring **output
385 ) {
386 // find out how many splits we're going to make and allocate memory
387 size_t n = 0;
388 cxstring curpos = string;
389 while (1) {
390 ++n;
391 cxstring match = cx_strstr(curpos, delim);
392 if (match.length > 0) {
393 // is the limit reached?
394 if (n < limit) {
395 size_t processed = match.ptr - curpos.ptr + delim.length;
396 curpos.ptr += processed;
397 curpos.length -= processed;
398 } else {
399 // limit reached
400 break;
401 }
402 } else {
403 // no more matches
404 break;
405 }
406 }
407 *output = cxCalloc(allocator, n, sizeof(cxstring));
408 return cx_strsplit(string, delim, n, *output);
409 }
410
411 size_t cx_strsplit_m(
412 cxmutstr string,
413 cxstring delim,
414 size_t limit,
415 cxmutstr *output
416 ) {
417 return cx_strsplit(cx_strcast(string),
418 delim, limit, (cxstring *) output);
419 }
420
421 size_t cx_strsplit_ma(
422 const CxAllocator *allocator,
423 cxmutstr string,
424 cxstring delim,
425 size_t limit,
426 cxmutstr **output
427 ) {
428 return cx_strsplit_a(allocator, cx_strcast(string),
429 delim, limit, (cxstring **) output);
430 }
431
432 int cx_strcmp(
433 cxstring s1,
434 cxstring s2
435 ) {
497 if (s1.length == s2.length) { 436 if (s1.length == s2.length) {
498 return memcmp(s1.ptr, s2.ptr, s1.length); 437 return memcmp(s1.ptr, s2.ptr, s1.length);
499 } else if (s1.length > s2.length) { 438 } else if (s1.length > s2.length) {
500 return 1; 439 return 1;
501 } else { 440 } else {
502 return -1; 441 return -1;
503 } 442 }
504 } 443 }
505 444
506 int scstrcasecmp(scstr_t s1, scstr_t s2) { 445 int cx_strcasecmp(
446 cxstring s1,
447 cxstring s2
448 ) {
507 if (s1.length == s2.length) { 449 if (s1.length == s2.length) {
508 #ifdef _WIN32 450 #ifdef _WIN32
509 return _strnicmp(s1.ptr, s2.ptr, s1.length); 451 return _strnicmp(s1.ptr, s2.ptr, s1.length);
510 #else 452 #else
511 return strncasecmp(s1.ptr, s2.ptr, s1.length); 453 return strncasecmp(s1.ptr, s2.ptr, s1.length);
515 } else { 457 } else {
516 return -1; 458 return -1;
517 } 459 }
518 } 460 }
519 461
520 sstr_t scstrdup(scstr_t s) { 462 int cx_strcmp_p(
521 return sstrdup_a(ucx_default_allocator(), s); 463 const void *s1,
522 } 464 const void *s2
523 465 ) {
524 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) { 466 const cxstring *left = s1;
525 sstr_t newstring; 467 const cxstring *right = s2;
526 newstring.ptr = (char*)almalloc(allocator, s.length + 1); 468 return cx_strcmp(*left, *right);
527 if (newstring.ptr) { 469 }
528 newstring.length = s.length; 470
529 newstring.ptr[newstring.length] = 0; 471 int cx_strcasecmp_p(
530 472 const void *s1,
531 memcpy(newstring.ptr, s.ptr, s.length); 473 const void *s2
532 } else { 474 ) {
533 newstring.length = 0; 475 const cxstring *left = s1;
534 } 476 const cxstring *right = s2;
535 477 return cx_strcasecmp(*left, *right);
536 return newstring; 478 }
537 } 479
538 480 cxmutstr cx_strdup_a(
539 481 const CxAllocator *allocator,
540 static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) { 482 cxstring string
541 const char *newptr = s; 483 ) {
542 size_t length = len; 484 cxmutstr result = {
543 485 cxMalloc(allocator, string.length + 1),
544 while(length > 0 && isspace(*newptr)) { 486 string.length
545 newptr++; 487 };
546 length--; 488 if (result.ptr == NULL) {
547 } 489 result.length = 0;
548 while(length > 0 && isspace(newptr[length-1])) { 490 return result;
549 length--; 491 }
550 } 492 memcpy(result.ptr, string.ptr, string.length);
551 493 result.ptr[string.length] = '\0';
552 *newlen = length; 494 return result;
553 return newptr - s; 495 }
554 } 496
555 497 cxstring cx_strtrim(cxstring string) {
556 sstr_t sstrtrim(sstr_t string) { 498 cxstring result = string;
557 sstr_t newstr; 499 // TODO: optimize by comparing multiple bytes at once
558 newstr.ptr = string.ptr 500 while (result.length > 0 && isspace(*result.ptr)) {
559 + ucx_strtrim(string.ptr, string.length, &newstr.length); 501 result.ptr++;
560 return newstr; 502 result.length--;
561 } 503 }
562 504 while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
563 scstr_t scstrtrim(scstr_t string) { 505 result.length--;
564 scstr_t newstr; 506 }
565 newstr.ptr = string.ptr 507 return result;
566 + ucx_strtrim(string.ptr, string.length, &newstr.length); 508 }
567 return newstr; 509
568 } 510 cxmutstr cx_strtrim_m(cxmutstr string) {
569 511 cxstring result = cx_strtrim(cx_strcast(string));
570 int scstrprefix(scstr_t string, scstr_t prefix) { 512 return (cxmutstr) {(char *) result.ptr, result.length};
571 if (string.length == 0) { 513 }
572 return prefix.length == 0; 514
573 } 515 bool cx_strprefix(
574 if (prefix.length == 0) { 516 cxstring string,
575 return 1; 517 cxstring prefix
576 } 518 ) {
577 519 if (string.length < prefix.length) return false;
578 if (prefix.length > string.length) { 520 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
579 return 0; 521 }
580 } else { 522
581 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; 523 bool cx_strsuffix(
582 } 524 cxstring string,
583 } 525 cxstring suffix
584 526 ) {
585 int scstrsuffix(scstr_t string, scstr_t suffix) { 527 if (string.length < suffix.length) return false;
586 if (string.length == 0) { 528 return memcmp(string.ptr + string.length - suffix.length,
587 return suffix.length == 0; 529 suffix.ptr, suffix.length) == 0;
588 } 530 }
589 if (suffix.length == 0) { 531
590 return 1; 532 bool cx_strcaseprefix(
591 } 533 cxstring string,
592 534 cxstring prefix
593 if (suffix.length > string.length) { 535 ) {
594 return 0; 536 if (string.length < prefix.length) return false;
595 } else { 537 #ifdef _WIN32
596 return memcmp(string.ptr+string.length-suffix.length, 538 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
597 suffix.ptr, suffix.length) == 0; 539 #else
598 } 540 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
599 } 541 #endif
600 542 }
601 int scstrcaseprefix(scstr_t string, scstr_t prefix) { 543
602 if (string.length == 0) { 544 bool cx_strcasesuffix(
603 return prefix.length == 0; 545 cxstring string,
604 } 546 cxstring suffix
605 if (prefix.length == 0) { 547 ) {
606 return 1; 548 if (string.length < suffix.length) return false;
607 } 549 #ifdef _WIN32
608 550 return _strnicmp(string.ptr+string.length-suffix.length,
609 if (prefix.length > string.length) { 551 suffix.ptr, suffix.length) == 0;
610 return 0; 552 #else
611 } else { 553 return strncasecmp(string.ptr + string.length - suffix.length,
612 scstr_t subs = scstrsubsl(string, 0, prefix.length); 554 suffix.ptr, suffix.length) == 0;
613 return scstrcasecmp(subs, prefix) == 0; 555 #endif
614 } 556 }
615 } 557
616 558 void cx_strlower(cxmutstr string) {
617 int scstrcasesuffix(scstr_t string, scstr_t suffix) { 559 cx_for_n(i, string.length) {
618 if (string.length == 0) { 560 string.ptr[i] = (char) tolower(string.ptr[i]);
619 return suffix.length == 0; 561 }
620 } 562 }
621 if (suffix.length == 0) { 563
622 return 1; 564 void cx_strupper(cxmutstr string) {
623 } 565 cx_for_n(i, string.length) {
624 566 string.ptr[i] = (char) toupper(string.ptr[i]);
625 if (suffix.length > string.length) { 567 }
626 return 0; 568 }
627 } else { 569
628 scstr_t subs = scstrsubs(string, string.length-suffix.length); 570 #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
629 return scstrcasecmp(subs, suffix) == 0; 571 #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
630 } 572 #endif
631 } 573
632 574 struct cx_strreplace_ibuf {
633 sstr_t scstrlower(scstr_t string) { 575 size_t *buf;
634 sstr_t ret = sstrdup(string); 576 struct cx_strreplace_ibuf *next;
635 for (size_t i = 0; i < ret.length ; i++) { 577 unsigned int len;
636 ret.ptr[i] = tolower(ret.ptr[i]);
637 }
638 return ret;
639 }
640
641 sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) {
642 sstr_t ret = sstrdup_a(allocator, string);
643 for (size_t i = 0; i < ret.length ; i++) {
644 ret.ptr[i] = tolower(ret.ptr[i]);
645 }
646 return ret;
647 }
648
649 sstr_t scstrupper(scstr_t string) {
650 sstr_t ret = sstrdup(string);
651 for (size_t i = 0; i < ret.length ; i++) {
652 ret.ptr[i] = toupper(ret.ptr[i]);
653 }
654 return ret;
655 }
656
657 sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) {
658 sstr_t ret = sstrdup_a(allocator, string);
659 for (size_t i = 0; i < ret.length ; i++) {
660 ret.ptr[i] = toupper(ret.ptr[i]);
661 }
662 return ret;
663 }
664
665 #define REPLACE_INDEX_BUFFER_MAX 100
666
667 struct scstrreplace_ibuf {
668 size_t* buf;
669 unsigned int len; /* small indices */
670 struct scstrreplace_ibuf* next;
671 }; 578 };
672 579
673 static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) { 580 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
674 while (buf) { 581 while (buf) {
675 struct scstrreplace_ibuf *next = buf->next; 582 struct cx_strreplace_ibuf *next = buf->next;
676 free(buf->buf); 583 free(buf->buf);
677 free(buf); 584 free(buf);
678 buf = next; 585 buf = next;
679 } 586 }
680 } 587 }
681 588
682 sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str, 589 cxmutstr cx_strreplacen_a(
683 scstr_t pattern, scstr_t replacement, size_t replmax) { 590 const CxAllocator *allocator,
591 cxstring str,
592 cxstring pattern,
593 cxstring replacement,
594 size_t replmax
595 ) {
684 596
685 if (pattern.length == 0 || pattern.length > str.length || replmax == 0) 597 if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
686 return sstrdup(str); 598 return cx_strdup_a(allocator, str);
687 599
688 /* Compute expected buffer length */ 600 // Compute expected buffer length
689 size_t ibufmax = str.length / pattern.length; 601 size_t ibufmax = str.length / pattern.length;
690 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; 602 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
691 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { 603 if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) {
692 ibuflen = REPLACE_INDEX_BUFFER_MAX; 604 ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE;
693 } 605 }
694 606
695 /* Allocate first index buffer */ 607 // Allocate first index buffer
696 struct scstrreplace_ibuf *firstbuf, *curbuf; 608 struct cx_strreplace_ibuf *firstbuf, *curbuf;
697 firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf)); 609 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
698 if (!firstbuf) return sstrn(NULL, 0); 610 if (!firstbuf) return cx_mutstrn(NULL, 0);
699 firstbuf->buf = calloc(ibuflen, sizeof(size_t)); 611 firstbuf->buf = calloc(ibuflen, sizeof(size_t));
700 if (!firstbuf->buf) { 612 if (!firstbuf->buf) {
701 free(firstbuf); 613 free(firstbuf);
702 return sstrn(NULL, 0); 614 return cx_mutstrn(NULL, 0);
703 } 615 }
704 616
705 /* Search occurrences */ 617 // Search occurrences
706 scstr_t searchstr = str; 618 cxstring searchstr = str;
707 size_t found = 0; 619 size_t found = 0;
708 do { 620 do {
709 scstr_t match = scstrscstr(searchstr, pattern); 621 cxstring match = cx_strstr(searchstr, pattern);
710 if (match.length > 0) { 622 if (match.length > 0) {
711 /* Allocate next buffer in chain, if required */ 623 // Allocate next buffer in chain, if required
712 if (curbuf->len == ibuflen) { 624 if (curbuf->len == ibuflen) {
713 struct scstrreplace_ibuf *nextbuf = 625 struct cx_strreplace_ibuf *nextbuf =
714 calloc(1, sizeof(struct scstrreplace_ibuf)); 626 calloc(1, sizeof(struct cx_strreplace_ibuf));
715 if (!nextbuf) { 627 if (!nextbuf) {
716 scstrrepl_free_ibuf(firstbuf); 628 cx_strrepl_free_ibuf(firstbuf);
717 return sstrn(NULL, 0); 629 return cx_mutstrn(NULL, 0);
718 } 630 }
719 nextbuf->buf = calloc(ibuflen, sizeof(size_t)); 631 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
720 if (!nextbuf->buf) { 632 if (!nextbuf->buf) {
721 free(nextbuf); 633 free(nextbuf);
722 scstrrepl_free_ibuf(firstbuf); 634 cx_strrepl_free_ibuf(firstbuf);
723 return sstrn(NULL, 0); 635 return cx_mutstrn(NULL, 0);
724 } 636 }
725 curbuf->next = nextbuf; 637 curbuf->next = nextbuf;
726 curbuf = nextbuf; 638 curbuf = nextbuf;
727 } 639 }
728 640
729 /* Record match index */ 641 // Record match index
730 found++; 642 found++;
731 size_t idx = match.ptr - str.ptr; 643 size_t idx = match.ptr - str.ptr;
732 curbuf->buf[curbuf->len++] = idx; 644 curbuf->buf[curbuf->len++] = idx;
733 searchstr.ptr = match.ptr + pattern.length; 645 searchstr.ptr = match.ptr + pattern.length;
734 searchstr.length = str.length - idx - pattern.length; 646 searchstr.length = str.length - idx - pattern.length;
735 } else { 647 } else {
736 break; 648 break;
737 } 649 }
738 } while (searchstr.length > 0 && found < replmax); 650 } while (searchstr.length > 0 && found < replmax);
739 651
740 /* Allocate result string */ 652 // Allocate result string
741 sstr_t result; 653 cxmutstr result;
742 { 654 {
743 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; 655 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
744 size_t rcount = 0; 656 size_t rcount = 0;
745 curbuf = firstbuf; 657 curbuf = firstbuf;
746 do { 658 do {
747 rcount += curbuf->len; 659 rcount += curbuf->len;
748 curbuf = curbuf->next; 660 curbuf = curbuf->next;
749 } while (curbuf); 661 } while (curbuf);
750 result.length = str.length + rcount * adjlen; 662 result.length = str.length + rcount * adjlen;
751 result.ptr = almalloc(allocator, result.length); 663 result.ptr = cxMalloc(allocator, result.length + 1);
752 if (!result.ptr) { 664 if (!result.ptr) {
753 scstrrepl_free_ibuf(firstbuf); 665 cx_strrepl_free_ibuf(firstbuf);
754 return sstrn(NULL, 0); 666 return cx_mutstrn(NULL, 0);
755 } 667 }
756 } 668 }
757 669
758 /* Build result string */ 670 // Build result string
759 curbuf = firstbuf; 671 curbuf = firstbuf;
760 size_t srcidx = 0; 672 size_t srcidx = 0;
761 char* destptr = result.ptr; 673 char *destptr = result.ptr;
762 do { 674 do {
763 for (size_t i = 0; i < curbuf->len; i++) { 675 for (size_t i = 0; i < curbuf->len; i++) {
764 /* Copy source part up to next match*/ 676 // Copy source part up to next match
765 size_t idx = curbuf->buf[i]; 677 size_t idx = curbuf->buf[i];
766 size_t srclen = idx - srcidx; 678 size_t srclen = idx - srcidx;
767 if (srclen > 0) { 679 if (srclen > 0) {
768 memcpy(destptr, str.ptr+srcidx, srclen); 680 memcpy(destptr, str.ptr + srcidx, srclen);
769 destptr += srclen; 681 destptr += srclen;
770 srcidx += srclen; 682 srcidx += srclen;
771 } 683 }
772 684
773 /* Copy the replacement and skip the source pattern */ 685 // Copy the replacement and skip the source pattern
774 srcidx += pattern.length; 686 srcidx += pattern.length;
775 memcpy(destptr, replacement.ptr, replacement.length); 687 memcpy(destptr, replacement.ptr, replacement.length);
776 destptr += replacement.length; 688 destptr += replacement.length;
777 } 689 }
778 curbuf = curbuf->next; 690 curbuf = curbuf->next;
779 } while (curbuf); 691 } while (curbuf);
780 memcpy(destptr, str.ptr+srcidx, str.length-srcidx); 692 memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
781 693
782 /* Free index buffer */ 694 // Result is guaranteed to be zero-terminated
783 scstrrepl_free_ibuf(firstbuf); 695 result.ptr[result.length] = '\0';
696
697 // Free index buffer
698 cx_strrepl_free_ibuf(firstbuf);
784 699
785 return result; 700 return result;
786 } 701 }
787 702
788 sstr_t scstrreplacen(scstr_t str, scstr_t pattern, 703 CxStrtokCtx cx_strtok(
789 scstr_t replacement, size_t replmax) { 704 cxstring str,
790 return scstrreplacen_a(ucx_default_allocator(), 705 cxstring delim,
791 str, pattern, replacement, replmax); 706 size_t limit
792 } 707 ) {
793 708 CxStrtokCtx ctx;
794 709 ctx.str = str;
795 // type adjustment functions 710 ctx.delim = delim;
796 scstr_t ucx_sc2sc(scstr_t str) { 711 ctx.limit = limit;
797 return str; 712 ctx.pos = 0;
798 } 713 ctx.next_pos = 0;
799 scstr_t ucx_ss2sc(sstr_t str) { 714 ctx.delim_pos = 0;
800 scstr_t cs; 715 ctx.found = 0;
801 cs.ptr = str.ptr; 716 ctx.delim_more = NULL;
802 cs.length = str.length; 717 ctx.delim_more_count = 0;
803 return cs; 718 return ctx;
804 } 719 }
805 scstr_t ucx_ss2c_s(scstr_t c) { 720
806 return c; 721 CxStrtokCtx cx_strtok_m(
807 } 722 cxmutstr str,
723 cxstring delim,
724 size_t limit
725 ) {
726 return cx_strtok(cx_strcast(str), delim, limit);
727 }
728
729 bool cx_strtok_next(
730 CxStrtokCtx *ctx,
731 cxstring *token
732 ) {
733 // abortion criteria
734 if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) {
735 return false;
736 }
737
738 // determine the search start
739 cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos);
740
741 // search the next delimiter
742 cxstring delim = cx_strstr(haystack, ctx->delim);
743
744 // if found, make delim capture exactly the delimiter
745 if (delim.length > 0) {
746 delim.length = ctx->delim.length;
747 }
748
749 // if more delimiters are specified, check them now
750 if (ctx->delim_more_count > 0) {
751 cx_for_n(i, ctx->delim_more_count) {
752 cxstring d = cx_strstr(haystack, ctx->delim_more[i]);
753 if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) {
754 delim.ptr = d.ptr;
755 delim.length = ctx->delim_more[i].length;
756 }
757 }
758 }
759
760 // store the token information and adjust the context
761 ctx->found++;
762 ctx->pos = ctx->next_pos;
763 token->ptr = &ctx->str.ptr[ctx->pos];
764 ctx->delim_pos = delim.length == 0 ?
765 ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr);
766 token->length = ctx->delim_pos - ctx->pos;
767 ctx->next_pos = ctx->delim_pos + delim.length;
768
769 return true;
770 }
771
772 bool cx_strtok_next_m(
773 CxStrtokCtx *ctx,
774 cxmutstr *token
775 ) {
776 return cx_strtok_next(ctx, (cxstring *) token);
777 }
778
779 void cx_strtok_delim(
780 CxStrtokCtx *ctx,
781 const cxstring *delim,
782 size_t count
783 ) {
784 ctx->delim_more = delim;
785 ctx->delim_more_count = count;
786 }

mercurial