ucx/string.c

changeset 747
efbd59642577
parent 505
481802342fdf
child 748
49a284f61e8c
equal deleted inserted replaced
746:a569148841ff 747:efbd59642577
1 /* 1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 * 3 *
4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved. 4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met: 7 * modification, are permitted provided that the following conditions are met:
8 * 8 *
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE. 26 * POSSIBILITY OF SUCH DAMAGE.
27 */ 27 */
28 28
29 #include "ucx/string.h" 29 #include "cx/string.h"
30 30 #include "cx/utils.h"
31 #include "ucx/allocator.h" 31
32
33 #include <stdlib.h>
34 #include <string.h> 32 #include <string.h>
35 #include <stdarg.h> 33 #include <stdarg.h>
36 #include <stdint.h>
37 #include <ctype.h> 34 #include <ctype.h>
38 35
39 sstr_t sstr(char *cstring) { 36 #ifndef _WIN32
40 sstr_t string; 37
41 string.ptr = cstring; 38 #include <strings.h> // for strncasecmp()
42 string.length = strlen(cstring); 39
43 return string; 40 #endif // _WIN32
44 } 41
45 42 cxmutstr cx_mutstr(char *cstring) {
46 sstr_t sstrn(char *cstring, size_t length) { 43 return (cxmutstr) {cstring, strlen(cstring)};
47 sstr_t string; 44 }
48 string.ptr = cstring; 45
49 string.length = length; 46 cxmutstr cx_mutstrn(
50 return string; 47 char *cstring,
51 } 48 size_t length
52 49 ) {
53 scstr_t scstr(const char *cstring) { 50 return (cxmutstr) {cstring, length};
54 scstr_t string; 51 }
55 string.ptr = cstring; 52
56 string.length = strlen(cstring); 53 cxstring cx_str(const char *cstring) {
57 return string; 54 return (cxstring) {cstring, strlen(cstring)};
58 } 55 }
59 56
60 scstr_t scstrn(const char *cstring, size_t length) { 57 cxstring cx_strn(
61 scstr_t string; 58 const char *cstring,
62 string.ptr = cstring; 59 size_t length
63 string.length = length; 60 ) {
64 return string; 61 return (cxstring) {cstring, length};
65 } 62 }
66 63
67 64 cxstring cx_strcast(cxmutstr str) {
68 size_t scstrnlen(size_t n, ...) { 65 return (cxstring) {str.ptr, str.length};
66 }
67
68 void cx_strfree(cxmutstr *str) {
69 free(str->ptr);
70 str->ptr = NULL;
71 str->length = 0;
72 }
73
74 void cx_strfree_a(
75 CxAllocator const *alloc,
76 cxmutstr *str
77 ) {
78 cxFree(alloc, str->ptr);
79 str->ptr = NULL;
80 str->length = 0;
81 }
82
83 size_t cx_strlen(
84 size_t count,
85 ...
86 ) {
87 if (count == 0) return 0;
88
69 va_list ap; 89 va_list ap;
70 va_start(ap, n); 90 va_start(ap, count);
71
72 size_t size = 0; 91 size_t size = 0;
73 92 cx_for_n(i, count) {
74 for (size_t i = 0 ; i < n ; i++) { 93 cxstring str = va_arg(ap, cxstring);
75 scstr_t str = va_arg(ap, scstr_t);
76 if(SIZE_MAX - str.length < size) {
77 size = SIZE_MAX;
78 break;
79 }
80 size += str.length; 94 size += str.length;
81 } 95 }
82 va_end(ap); 96 va_end(ap);
83 97
84 return size; 98 return size;
85 } 99 }
86 100
87 static sstr_t sstrvcat_a( 101 cxmutstr cx_strcat_a(
88 UcxAllocator *a, 102 CxAllocator const *alloc,
89 size_t count, 103 size_t count,
90 scstr_t s1, 104 ...
91 va_list ap) { 105 ) {
92 sstr_t str; 106 cxstring *strings = calloc(count, sizeof(cxstring));
93 str.ptr = NULL; 107 if (!strings) abort();
94 str.length = 0; 108
95 if(count < 2) { 109 va_list ap;
96 return str; 110 va_start(ap, count);
97 } 111
98
99 scstr_t s2 = va_arg (ap, scstr_t);
100
101 if(((size_t)-1) - s1.length < s2.length) {
102 return str;
103 }
104
105 scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t));
106 if(!strings) {
107 return str;
108 }
109
110 // get all args and overall length 112 // get all args and overall length
111 strings[0] = s1; 113 size_t slen = 0;
112 strings[1] = s2; 114 cx_for_n(i, count) {
113 size_t slen = s1.length + s2.length; 115 cxstring s = va_arg (ap, cxstring);
114 int error = 0;
115 for (size_t i=2;i<count;i++) {
116 scstr_t s = va_arg (ap, scstr_t);
117 strings[i] = s; 116 strings[i] = s;
118 if(((size_t)-1) - s.length < slen) {
119 error = 1;
120 break;
121 }
122 slen += s.length; 117 slen += s.length;
123 } 118 }
124 if(error) { 119
125 free(strings);
126 return str;
127 }
128
129 // create new string 120 // create new string
130 str.ptr = (char*) almalloc(a, slen + 1); 121 cxmutstr result;
131 str.length = slen; 122 result.ptr = cxMalloc(alloc, slen + 1);
132 if(!str.ptr) { 123 result.length = slen;
133 free(strings); 124 if (result.ptr == NULL) abort();
134 str.length = 0; 125
135 return str;
136 }
137
138 // concatenate strings 126 // concatenate strings
139 size_t pos = 0; 127 size_t pos = 0;
140 for (size_t i=0;i<count;i++) { 128 cx_for_n(i, count) {
141 scstr_t s = strings[i]; 129 cxstring s = strings[i];
142 memcpy(str.ptr + pos, s.ptr, s.length); 130 memcpy(result.ptr + pos, s.ptr, s.length);
143 pos += s.length; 131 pos += s.length;
144 } 132 }
145 133
146 str.ptr[str.length] = '\0'; 134 // terminate string
147 135 result.ptr[result.length] = '\0';
136
137 // free temporary array
148 free(strings); 138 free(strings);
149 139
150 return str; 140 return result;
151 } 141 }
152 142
153 sstr_t scstrcat(size_t count, scstr_t s1, ...) { 143 cxstring cx_strsubs(
154 va_list ap; 144 cxstring string,
155 va_start(ap, s1); 145 size_t start
156 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap); 146 ) {
157 va_end(ap); 147 return cx_strsubsl(string, start, string.length - start);
158 return s; 148 }
159 } 149
160 150 cxmutstr cx_strsubs_m(
161 sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) { 151 cxmutstr string,
162 va_list ap; 152 size_t start
163 va_start(ap, s1); 153 ) {
164 sstr_t s = sstrvcat_a(a, count, s1, ap); 154 return cx_strsubsl_m(string, start, string.length - start);
165 va_end(ap); 155 }
166 return s; 156
167 } 157 cxstring cx_strsubsl(
168 158 cxstring string,
169 static int ucx_substring(
170 size_t str_length,
171 size_t start, 159 size_t start,
172 size_t length, 160 size_t length
173 size_t *newlen, 161 ) {
174 size_t *newpos) 162 if (start > string.length) {
175 { 163 return (cxstring) {NULL, 0};
176 *newlen = 0; 164 }
177 *newpos = 0; 165
178 166 size_t rem_len = string.length - start;
179 if(start > str_length) { 167 if (length > rem_len) {
180 return 0; 168 length = rem_len;
181 } 169 }
182 170
183 if(length > str_length - start) { 171 return (cxstring) {string.ptr + start, length};
184 length = str_length - start; 172 }
185 } 173
186 *newlen = length; 174 cxmutstr cx_strsubsl_m(
187 *newpos = start; 175 cxmutstr string,
188 return 1; 176 size_t start,
189 } 177 size_t length
190 178 ) {
191 sstr_t sstrsubs(sstr_t s, size_t start) { 179 cxstring result = cx_strsubsl(cx_strcast(string), start, length);
192 return sstrsubsl (s, start, s.length-start); 180 return (cxmutstr) {(char *) result.ptr, result.length};
193 } 181 }
194 182
195 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { 183 cxstring cx_strchr(
196 size_t pos; 184 cxstring string,
197 sstr_t ret = { NULL, 0 }; 185 int chr
198 if(ucx_substring(s.length, start, length, &ret.length, &pos)) { 186 ) {
199 ret.ptr = s.ptr + pos; 187 chr = 0xFF & chr;
200 } 188 // TODO: improve by comparing multiple bytes at once
201 return ret; 189 cx_for_n(i, string.length) {
202 } 190 if (string.ptr[i] == chr) {
203 191 return cx_strsubs(string, i);
204 scstr_t scstrsubs(scstr_t string, size_t start) { 192 }
205 return scstrsubsl(string, start, string.length-start); 193 }
206 } 194 return (cxstring) {NULL, 0};
207 195 }
208 scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) { 196
209 size_t pos; 197 cxmutstr cx_strchr_m(
210 scstr_t ret = { NULL, 0 }; 198 cxmutstr string,
211 if(ucx_substring(s.length, start, length, &ret.length, &pos)) { 199 int chr
212 ret.ptr = s.ptr + pos; 200 ) {
213 } 201 cxstring result = cx_strchr(cx_strcast(string), chr);
214 return ret; 202 return (cxmutstr) {(char *) result.ptr, result.length};
215 } 203 }
216 204
217 205 cxstring cx_strrchr(
218 static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) { 206 cxstring string,
219 for(size_t i=0;i<length;i++) { 207 int chr
220 if(str[i] == chr) { 208 ) {
221 *pos = i; 209 chr = 0xFF & chr;
222 return 1; 210 size_t i = string.length;
223 } 211 while (i > 0) {
224 } 212 i--;
225 return 0; 213 // TODO: improve by comparing multiple bytes at once
226 } 214 if (string.ptr[i] == chr) {
227 215 return cx_strsubs(string, i);
228 static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) { 216 }
229 if(length > 0) { 217 }
230 for(size_t i=length ; i>0 ; i--) { 218 return (cxstring) {NULL, 0};
231 if(str[i-1] == chr) { 219 }
232 *pos = i-1; 220
233 return 1; 221 cxmutstr cx_strrchr_m(
234 } 222 cxmutstr string,
235 } 223 int chr
236 } 224 ) {
237 return 0; 225 cxstring result = cx_strrchr(cx_strcast(string), chr);
238 } 226 return (cxmutstr) {(char *) result.ptr, result.length};
239 227 }
240 sstr_t sstrchr(sstr_t s, int c) { 228
241 size_t pos = 0; 229 #ifndef CX_STRSTR_SBO_SIZE
242 if(ucx_strchr(s.ptr, s.length, c, &pos)) { 230 #define CX_STRSTR_SBO_SIZE 512
243 return sstrsubs(s, pos); 231 #endif
244 } 232
245 return sstrn(NULL, 0); 233 cxstring cx_strstr(
246 } 234 cxstring haystack,
247 235 cxstring needle
248 sstr_t sstrrchr(sstr_t s, int c) { 236 ) {
249 size_t pos = 0; 237 if (needle.length == 0) {
250 if(ucx_strrchr(s.ptr, s.length, c, &pos)) { 238 return haystack;
251 return sstrsubs(s, pos); 239 }
252 } 240
253 return sstrn(NULL, 0); 241 // optimize for single-char needles
254 } 242 if (needle.length == 1) {
255 243 return cx_strchr(haystack, *needle.ptr);
256 scstr_t scstrchr(scstr_t s, int c) { 244 }
257 size_t pos = 0; 245
258 if(ucx_strchr(s.ptr, s.length, c, &pos)) {
259 return scstrsubs(s, pos);
260 }
261 return scstrn(NULL, 0);
262 }
263
264 scstr_t scstrrchr(scstr_t s, int c) {
265 size_t pos = 0;
266 if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
267 return scstrsubs(s, pos);
268 }
269 return scstrn(NULL, 0);
270 }
271
272 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
273 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
274
275 #define ptable_w(useheap, ptable, index, src) do {\
276 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
277 else ((size_t*)ptable)[index] = src;\
278 } while (0);
279
280
281 static const char* ucx_strstr(
282 const char *str,
283 size_t length,
284 const char *match,
285 size_t matchlen,
286 size_t *newlen)
287 {
288 *newlen = length;
289 if (matchlen == 0) {
290 return str;
291 }
292
293 const char *result = NULL;
294 size_t resultlen = 0;
295
296 /* 246 /*
297 * IMPORTANT: 247 * IMPORTANT:
298 * our prefix table contains the prefix length PLUS ONE 248 * Our prefix table contains the prefix length PLUS ONE
299 * this is our decision, because we want to use the full range of size_t 249 * this is our decision, because we want to use the full range of size_t.
300 * the original algorithm needs a (-1) at one single place 250 * The original algorithm needs a (-1) at one single place,
301 * and we want to avoid that 251 * and we want to avoid that.
302 */ 252 */
303 253
304 /* static prefix table */ 254 // local prefix table
305 static uint8_t s_prefix_table[256]; 255 size_t s_prefix_table[CX_STRSTR_SBO_SIZE];
306 256
307 /* check pattern length and use appropriate prefix table */ 257 // check needle length and use appropriate prefix table
308 /* if the pattern exceeds static prefix table, allocate on the heap */ 258 // if the pattern exceeds static prefix table, allocate on the heap
309 register int useheap = matchlen > 255; 259 bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
310 register void* ptable = useheap ? 260 register size_t *ptable = useheap ? calloc(needle.length + 1,
311 calloc(matchlen+1, sizeof(size_t)): s_prefix_table; 261 sizeof(size_t)) : s_prefix_table;
312 262
313 /* keep counter in registers */ 263 // keep counter in registers
314 register size_t i, j; 264 register size_t i, j;
315 265
316 /* fill prefix table */ 266 // fill prefix table
317 i = 0; j = 0; 267 i = 0;
318 ptable_w(useheap, ptable, i, j); 268 j = 0;
319 while (i < matchlen) { 269 ptable[i] = j;
320 while (j >= 1 && match[j-1] != match[i]) { 270 while (i < needle.length) {
321 ptable_r(j, useheap, ptable, j-1); 271 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
322 } 272 j = ptable[j - 1];
323 i++; j++; 273 }
324 ptable_w(useheap, ptable, i, j); 274 i++;
325 } 275 j++;
326 276 ptable[i] = j;
327 /* search */ 277 }
328 i = 0; j = 1; 278
329 while (i < length) { 279 // search
330 while (j >= 1 && str[i] != match[j-1]) { 280 cxstring result = {NULL, 0};
331 ptable_r(j, useheap, ptable, j-1); 281 i = 0;
332 } 282 j = 1;
333 i++; j++; 283 while (i < haystack.length) {
334 if (j-1 == matchlen) { 284 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
335 size_t start = i - matchlen; 285 j = ptable[j - 1];
336 result = str + start; 286 }
337 resultlen = length - start; 287 i++;
288 j++;
289 if (j - 1 == needle.length) {
290 size_t start = i - needle.length;
291 result.ptr = haystack.ptr + start;
292 result.length = haystack.length - start;
338 break; 293 break;
339 } 294 }
340 } 295 }
341 296
342 /* if prefix table was allocated on the heap, free it */ 297 // if prefix table was allocated on the heap, free it
343 if (ptable != s_prefix_table) { 298 if (ptable != s_prefix_table) {
344 free(ptable); 299 free(ptable);
345 } 300 }
346 301
347 *newlen = resultlen;
348 return result; 302 return result;
349 } 303 }
350 304
351 sstr_t scstrsstr(sstr_t string, scstr_t match) { 305 cxmutstr cx_strstr_m(
352 sstr_t result; 306 cxmutstr haystack,
353 307 cxstring needle
354 size_t reslen; 308 ) {
355 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); 309 cxstring result = cx_strstr(cx_strcast(haystack), needle);
356 if(!resstr) { 310 return (cxmutstr) {(char *) result.ptr, result.length};
357 result.ptr = NULL; 311 }
358 result.length = 0; 312
359 return result; 313 size_t cx_strsplit(
360 } 314 cxstring string,
361 315 cxstring delim,
362 size_t pos = resstr - string.ptr; 316 size_t limit,
363 result.ptr = string.ptr + pos; 317 cxstring *output
364 result.length = reslen; 318 ) {
365 319 // special case: output limit is zero
366 return result; 320 if (limit == 0) return 0;
367 } 321
368 322 // special case: delimiter is empty
369 scstr_t scstrscstr(scstr_t string, scstr_t match) { 323 if (delim.length == 0) {
370 scstr_t result; 324 output[0] = string;
371 325 return 1;
372 size_t reslen; 326 }
373 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); 327
374 if(!resstr) { 328 // special cases: delimiter is at least as large as the string
375 result.ptr = NULL; 329 if (delim.length >= string.length) {
376 result.length = 0; 330 // exact match
377 return result; 331 if (cx_strcmp(string, delim) == 0) {
378 } 332 output[0] = cx_strn(string.ptr, 0);
379 333 output[1] = cx_strn(string.ptr + string.length, 0);
380 size_t pos = resstr - string.ptr; 334 return 2;
381 result.ptr = string.ptr + pos; 335 } else {
382 result.length = reslen; 336 // no match possible
383 337 output[0] = string;
384 return result; 338 return 1;
385 } 339 }
386 340 }
387 #undef ptable_r 341
388 #undef ptable_w 342 size_t n = 0;
389 343 cxstring curpos = string;
390 sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) { 344 while (1) {
391 return scstrsplit_a(ucx_default_allocator(), s, d, n); 345 ++n;
392 } 346 cxstring match = cx_strstr(curpos, delim);
393 347 if (match.length > 0) {
394 sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) { 348 // is the limit reached?
395 if (s.length == 0 || d.length == 0) { 349 if (n < limit) {
396 *n = -1; 350 // copy the current string to the array
397 return NULL; 351 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
398 } 352 output[n - 1] = item;
399 353 size_t processed = item.length + delim.length;
400 /* special cases: delimiter is at least as large as the string */ 354 curpos.ptr += processed;
401 if (d.length >= s.length) { 355 curpos.length -= processed;
402 /* exact match */
403 if (sstrcmp(s, d) == 0) {
404 *n = 0;
405 return NULL;
406 } else /* no match possible */ {
407 *n = 1;
408 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
409 if(result) {
410 *result = sstrdup_a(allocator, s);
411 } else { 356 } else {
412 *n = -2; 357 // limit reached, copy the _full_ remaining string
413 } 358 output[n - 1] = curpos;
414 return result;
415 }
416 }
417
418 ssize_t nmax = *n;
419 size_t arrlen = 16;
420 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
421
422 if (result) {
423 scstr_t curpos = s;
424 ssize_t j = 1;
425 while (1) {
426 scstr_t match;
427 /* optimize for one byte delimiters */
428 if (d.length == 1) {
429 match = curpos;
430 for (size_t i = 0 ; i < curpos.length ; i++) {
431 if (curpos.ptr[i] == *(d.ptr)) {
432 match.ptr = curpos.ptr + i;
433 break;
434 }
435 match.length--;
436 }
437 } else {
438 match = scstrscstr(curpos, d);
439 }
440 if (match.length > 0) {
441 /* is this our last try? */
442 if (nmax == 0 || j < nmax) {
443 /* copy the current string to the array */
444 scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr);
445 result[j-1] = sstrdup_a(allocator, item);
446 size_t processed = item.length + d.length;
447 curpos.ptr += processed;
448 curpos.length -= processed;
449
450 /* allocate memory for the next string */
451 j++;
452 if (j > arrlen) {
453 arrlen *= 2;
454 size_t reallocsz;
455 sstr_t* reallocated = NULL;
456 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
457 reallocated = (sstr_t*) alrealloc(
458 allocator, result, reallocsz);
459 }
460 if (reallocated) {
461 result = reallocated;
462 } else {
463 for (ssize_t i = 0 ; i < j-1 ; i++) {
464 alfree(allocator, result[i].ptr);
465 }
466 alfree(allocator, result);
467 *n = -2;
468 return NULL;
469 }
470 }
471 } else {
472 /* nmax reached, copy the _full_ remaining string */
473 result[j-1] = sstrdup_a(allocator, curpos);
474 break;
475 }
476 } else {
477 /* no more matches, copy last string */
478 result[j-1] = sstrdup_a(allocator, curpos);
479 break; 359 break;
480 } 360 }
481 } 361 } else {
482 *n = j; 362 // no more matches, copy last string
483 } else { 363 output[n - 1] = curpos;
484 *n = -2; 364 break;
485 } 365 }
486 366 }
487 return result; 367
488 } 368 return n;
489 369 }
490 int scstrcmp(scstr_t s1, scstr_t s2) { 370
371 size_t cx_strsplit_a(
372 CxAllocator const *allocator,
373 cxstring string,
374 cxstring delim,
375 size_t limit,
376 cxstring **output
377 ) {
378 // find out how many splits we're going to make and allocate memory
379 size_t n = 0;
380 cxstring curpos = string;
381 while (1) {
382 ++n;
383 cxstring match = cx_strstr(curpos, delim);
384 if (match.length > 0) {
385 // is the limit reached?
386 if (n < limit) {
387 size_t processed = match.ptr - curpos.ptr + delim.length;
388 curpos.ptr += processed;
389 curpos.length -= processed;
390 } else {
391 // limit reached
392 break;
393 }
394 } else {
395 // no more matches
396 break;
397 }
398 }
399 *output = cxCalloc(allocator, n, sizeof(cxstring));
400 return cx_strsplit(string, delim, n, *output);
401 }
402
403 size_t cx_strsplit_m(
404 cxmutstr string,
405 cxstring delim,
406 size_t limit,
407 cxmutstr *output
408 ) {
409 return cx_strsplit(cx_strcast(string),
410 delim, limit, (cxstring *) output);
411 }
412
413 size_t cx_strsplit_ma(
414 CxAllocator const *allocator,
415 cxmutstr string,
416 cxstring delim,
417 size_t limit,
418 cxmutstr **output
419 ) {
420 return cx_strsplit_a(allocator, cx_strcast(string),
421 delim, limit, (cxstring **) output);
422 }
423
424 int cx_strcmp(
425 cxstring s1,
426 cxstring s2
427 ) {
491 if (s1.length == s2.length) { 428 if (s1.length == s2.length) {
492 return memcmp(s1.ptr, s2.ptr, s1.length); 429 return memcmp(s1.ptr, s2.ptr, s1.length);
493 } else if (s1.length > s2.length) { 430 } else if (s1.length > s2.length) {
494 return 1; 431 return 1;
495 } else { 432 } else {
496 return -1; 433 return -1;
497 } 434 }
498 } 435 }
499 436
500 int scstrcasecmp(scstr_t s1, scstr_t s2) { 437 int cx_strcasecmp(
438 cxstring s1,
439 cxstring s2
440 ) {
501 if (s1.length == s2.length) { 441 if (s1.length == s2.length) {
502 #ifdef _WIN32 442 #ifdef _WIN32
503 return _strnicmp(s1.ptr, s2.ptr, s1.length); 443 return _strnicmp(s1.ptr, s2.ptr, s1.length);
504 #else 444 #else
505 return strncasecmp(s1.ptr, s2.ptr, s1.length); 445 return strncasecmp(s1.ptr, s2.ptr, s1.length);
509 } else { 449 } else {
510 return -1; 450 return -1;
511 } 451 }
512 } 452 }
513 453
514 sstr_t scstrdup(scstr_t s) { 454 int cx_strcmp_p(
515 return sstrdup_a(ucx_default_allocator(), s); 455 void const *s1,
516 } 456 void const *s2
517 457 ) {
518 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) { 458 cxstring const *left = s1;
519 sstr_t newstring; 459 cxstring const *right = s2;
520 newstring.ptr = (char*)almalloc(allocator, s.length + 1); 460 return cx_strcmp(*left, *right);
521 if (newstring.ptr) { 461 }
522 newstring.length = s.length; 462
523 newstring.ptr[newstring.length] = 0; 463 int cx_strcasecmp_p(
524 464 void const *s1,
525 memcpy(newstring.ptr, s.ptr, s.length); 465 void const *s2
526 } else { 466 ) {
527 newstring.length = 0; 467 cxstring const *left = s1;
528 } 468 cxstring const *right = s2;
529 469 return cx_strcasecmp(*left, *right);
530 return newstring; 470 }
531 } 471
532 472 cxmutstr cx_strdup_a(
533 473 CxAllocator const *allocator,
534 static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) { 474 cxstring string
535 const char *newptr = s; 475 ) {
536 size_t length = len; 476 cxmutstr result = {
537 477 cxMalloc(allocator, string.length + 1),
538 while(length > 0 && isspace(*newptr)) { 478 string.length
539 newptr++; 479 };
540 length--; 480 if (result.ptr == NULL) {
541 } 481 result.length = 0;
542 while(length > 0 && isspace(newptr[length-1])) { 482 return result;
543 length--; 483 }
544 } 484 memcpy(result.ptr, string.ptr, string.length);
545 485 result.ptr[string.length] = '\0';
546 *newlen = length; 486 return result;
547 return newptr - s; 487 }
548 } 488
549 489 cxstring cx_strtrim(cxstring string) {
550 sstr_t sstrtrim(sstr_t string) { 490 cxstring result = string;
551 sstr_t newstr; 491 // TODO: optimize by comparing multiple bytes at once
552 newstr.ptr = string.ptr 492 while (result.length > 0 && isspace(*result.ptr)) {
553 + ucx_strtrim(string.ptr, string.length, &newstr.length); 493 result.ptr++;
554 return newstr; 494 result.length--;
555 } 495 }
556 496 while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
557 scstr_t scstrtrim(scstr_t string) { 497 result.length--;
558 scstr_t newstr; 498 }
559 newstr.ptr = string.ptr 499 return result;
560 + ucx_strtrim(string.ptr, string.length, &newstr.length); 500 }
561 return newstr; 501
562 } 502 cxmutstr cx_strtrim_m(cxmutstr string) {
563 503 cxstring result = cx_strtrim(cx_strcast(string));
564 int scstrprefix(scstr_t string, scstr_t prefix) { 504 return (cxmutstr) {(char *) result.ptr, result.length};
565 if (string.length == 0) { 505 }
566 return prefix.length == 0; 506
567 } 507 bool cx_strprefix(
568 if (prefix.length == 0) { 508 cxstring string,
569 return 1; 509 cxstring prefix
570 } 510 ) {
571 511 if (string.length < prefix.length) return false;
572 if (prefix.length > string.length) { 512 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
573 return 0; 513 }
574 } else { 514
575 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; 515 bool cx_strsuffix(
576 } 516 cxstring string,
577 } 517 cxstring suffix
578 518 ) {
579 int scstrsuffix(scstr_t string, scstr_t suffix) { 519 if (string.length < suffix.length) return false;
580 if (string.length == 0) { 520 return memcmp(string.ptr + string.length - suffix.length,
581 return suffix.length == 0; 521 suffix.ptr, suffix.length) == 0;
582 } 522 }
583 if (suffix.length == 0) { 523
584 return 1; 524 bool cx_strcaseprefix(
585 } 525 cxstring string,
586 526 cxstring prefix
587 if (suffix.length > string.length) { 527 ) {
588 return 0; 528 if (string.length < prefix.length) return false;
589 } else { 529 #ifdef _WIN32
590 return memcmp(string.ptr+string.length-suffix.length, 530 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
591 suffix.ptr, suffix.length) == 0; 531 #else
592 } 532 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
593 } 533 #endif
594 534 }
595 sstr_t scstrlower(scstr_t string) { 535
596 sstr_t ret = sstrdup(string); 536 bool cx_strcasesuffix(
597 for (size_t i = 0; i < ret.length ; i++) { 537 cxstring string,
598 ret.ptr[i] = tolower(ret.ptr[i]); 538 cxstring suffix
599 } 539 ) {
600 return ret; 540 if (string.length < suffix.length) return false;
601 } 541 #ifdef _WIN32
602 542 return _strnicmp(string.ptr+string.length-suffix.length,
603 sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) { 543 suffix.ptr, suffix.length) == 0;
604 sstr_t ret = sstrdup_a(allocator, string); 544 #else
605 for (size_t i = 0; i < ret.length ; i++) { 545 return strncasecmp(string.ptr + string.length - suffix.length,
606 ret.ptr[i] = tolower(ret.ptr[i]); 546 suffix.ptr, suffix.length) == 0;
607 } 547 #endif
608 return ret; 548 }
609 } 549
610 550 void cx_strlower(cxmutstr string) {
611 sstr_t scstrupper(scstr_t string) { 551 cx_for_n(i, string.length) {
612 sstr_t ret = sstrdup(string); 552 string.ptr[i] = (char) tolower(string.ptr[i]);
613 for (size_t i = 0; i < ret.length ; i++) { 553 }
614 ret.ptr[i] = toupper(ret.ptr[i]); 554 }
615 } 555
616 return ret; 556 void cx_strupper(cxmutstr string) {
617 } 557 cx_for_n(i, string.length) {
618 558 string.ptr[i] = (char) toupper(string.ptr[i]);
619 sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) { 559 }
620 sstr_t ret = sstrdup_a(allocator, string); 560 }
621 for (size_t i = 0; i < ret.length ; i++) { 561
622 ret.ptr[i] = toupper(ret.ptr[i]); 562 #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
623 } 563 #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
624 return ret; 564 #endif
625 } 565
626 566 struct cx_strreplace_ibuf {
627 // type adjustment functions 567 size_t *buf;
628 scstr_t ucx_sc2sc(scstr_t str) { 568 struct cx_strreplace_ibuf *next;
629 return str; 569 unsigned int len;
630 } 570 };
631 scstr_t ucx_ss2sc(sstr_t str) { 571
632 scstr_t cs; 572 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
633 cs.ptr = str.ptr; 573 while (buf) {
634 cs.length = str.length; 574 struct cx_strreplace_ibuf *next = buf->next;
635 return cs; 575 free(buf->buf);
636 } 576 free(buf);
637 scstr_t ucx_ss2c_s(scstr_t c) { 577 buf = next;
638 return c; 578 }
639 } 579 }
580
581 cxmutstr cx_strreplacen_a(
582 CxAllocator const *allocator,
583 cxstring str,
584 cxstring pattern,
585 cxstring replacement,
586 size_t replmax
587 ) {
588
589 if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
590 return cx_strdup_a(allocator, str);
591
592 // Compute expected buffer length
593 size_t ibufmax = str.length / pattern.length;
594 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
595 if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) {
596 ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE;
597 }
598
599 // Allocate first index buffer
600 struct cx_strreplace_ibuf *firstbuf, *curbuf;
601 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
602 if (!firstbuf) return cx_mutstrn(NULL, 0);
603 firstbuf->buf = calloc(ibuflen, sizeof(size_t));
604 if (!firstbuf->buf) {
605 free(firstbuf);
606 return cx_mutstrn(NULL, 0);
607 }
608
609 // Search occurrences
610 cxstring searchstr = str;
611 size_t found = 0;
612 do {
613 cxstring match = cx_strstr(searchstr, pattern);
614 if (match.length > 0) {
615 // Allocate next buffer in chain, if required
616 if (curbuf->len == ibuflen) {
617 struct cx_strreplace_ibuf *nextbuf =
618 calloc(1, sizeof(struct cx_strreplace_ibuf));
619 if (!nextbuf) {
620 cx_strrepl_free_ibuf(firstbuf);
621 return cx_mutstrn(NULL, 0);
622 }
623 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
624 if (!nextbuf->buf) {
625 free(nextbuf);
626 cx_strrepl_free_ibuf(firstbuf);
627 return cx_mutstrn(NULL, 0);
628 }
629 curbuf->next = nextbuf;
630 curbuf = nextbuf;
631 }
632
633 // Record match index
634 found++;
635 size_t idx = match.ptr - str.ptr;
636 curbuf->buf[curbuf->len++] = idx;
637 searchstr.ptr = match.ptr + pattern.length;
638 searchstr.length = str.length - idx - pattern.length;
639 } else {
640 break;
641 }
642 } while (searchstr.length > 0 && found < replmax);
643
644 // Allocate result string
645 cxmutstr result;
646 {
647 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
648 size_t rcount = 0;
649 curbuf = firstbuf;
650 do {
651 rcount += curbuf->len;
652 curbuf = curbuf->next;
653 } while (curbuf);
654 result.length = str.length + rcount * adjlen;
655 result.ptr = cxMalloc(allocator, result.length + 1);
656 if (!result.ptr) {
657 cx_strrepl_free_ibuf(firstbuf);
658 return cx_mutstrn(NULL, 0);
659 }
660 }
661
662 // Build result string
663 curbuf = firstbuf;
664 size_t srcidx = 0;
665 char *destptr = result.ptr;
666 do {
667 for (size_t i = 0; i < curbuf->len; i++) {
668 // Copy source part up to next match
669 size_t idx = curbuf->buf[i];
670 size_t srclen = idx - srcidx;
671 if (srclen > 0) {
672 memcpy(destptr, str.ptr + srcidx, srclen);
673 destptr += srclen;
674 srcidx += srclen;
675 }
676
677 // Copy the replacement and skip the source pattern
678 srcidx += pattern.length;
679 memcpy(destptr, replacement.ptr, replacement.length);
680 destptr += replacement.length;
681 }
682 curbuf = curbuf->next;
683 } while (curbuf);
684 memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
685
686 // Result is guaranteed to be zero-terminated
687 result.ptr[result.length] = '\0';
688
689 // Free index buffer
690 cx_strrepl_free_ibuf(firstbuf);
691
692 return result;
693 }
694
695 CxStrtokCtx cx_strtok(
696 cxstring str,
697 cxstring delim,
698 size_t limit
699 ) {
700 CxStrtokCtx ctx;
701 ctx.str = str;
702 ctx.delim = delim;
703 ctx.limit = limit;
704 ctx.pos = 0;
705 ctx.next_pos = 0;
706 ctx.delim_pos = 0;
707 ctx.found = 0;
708 ctx.delim_more = NULL;
709 ctx.delim_more_count = 0;
710 return ctx;
711 }
712
713 CxStrtokCtx cx_strtok_m(
714 cxmutstr str,
715 cxstring delim,
716 size_t limit
717 ) {
718 return cx_strtok(cx_strcast(str), delim, limit);
719 }
720
721 bool cx_strtok_next(
722 CxStrtokCtx *ctx,
723 cxstring *token
724 ) {
725 // abortion criteria
726 if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) {
727 return false;
728 }
729
730 // determine the search start
731 cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos);
732
733 // search the next delimiter
734 cxstring delim = cx_strstr(haystack, ctx->delim);
735
736 // if found, make delim capture exactly the delimiter
737 if (delim.length > 0) {
738 delim.length = ctx->delim.length;
739 }
740
741 // if more delimiters are specified, check them now
742 if (ctx->delim_more_count > 0) {
743 cx_for_n(i, ctx->delim_more_count) {
744 cxstring d = cx_strstr(haystack, ctx->delim_more[i]);
745 if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) {
746 delim.ptr = d.ptr;
747 delim.length = ctx->delim_more[i].length;
748 }
749 }
750 }
751
752 // store the token information and adjust the context
753 ctx->found++;
754 ctx->pos = ctx->next_pos;
755 token->ptr = &ctx->str.ptr[ctx->pos];
756 ctx->delim_pos = delim.length == 0 ?
757 ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr);
758 token->length = ctx->delim_pos - ctx->pos;
759 ctx->next_pos = ctx->delim_pos + delim.length;
760
761 return true;
762 }
763
764 bool cx_strtok_next_m(
765 CxStrtokCtx *ctx,
766 cxmutstr *token
767 ) {
768 return cx_strtok_next(ctx, (cxstring *) token);
769 }
770
771 void cx_strtok_delim(
772 CxStrtokCtx *ctx,
773 cxstring const *delim,
774 size_t count
775 ) {
776 ctx->delim_more = delim;
777 ctx->delim_more_count = count;
778 }

mercurial