|
1 /* |
|
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
|
3 * |
|
4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. |
|
5 * |
|
6 * Redistribution and use in source and binary forms, with or without |
|
7 * modification, are permitted provided that the following conditions are met: |
|
8 * |
|
9 * 1. Redistributions of source code must retain the above copyright |
|
10 * notice, this list of conditions and the following disclaimer. |
|
11 * |
|
12 * 2. Redistributions in binary form must reproduce the above copyright |
|
13 * notice, this list of conditions and the following disclaimer in the |
|
14 * documentation and/or other materials provided with the distribution. |
|
15 * |
|
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
|
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
26 * POSSIBILITY OF SUCH DAMAGE. |
|
27 */ |
|
28 |
|
29 #include "cx/string.h" |
|
30 #include "cx/utils.h" |
|
31 |
|
32 #include <string.h> |
|
33 #include <stdarg.h> |
|
34 #include <ctype.h> |
|
35 |
|
36 #ifndef _WIN32 |
|
37 |
|
38 #include <strings.h> // for strncasecmp() |
|
39 |
|
40 #endif // _WIN32 |
|
41 |
|
42 cxmutstr cx_mutstr(char *cstring) { |
|
43 return (cxmutstr) {cstring, strlen(cstring)}; |
|
44 } |
|
45 |
|
46 cxmutstr cx_mutstrn( |
|
47 char *cstring, |
|
48 size_t length |
|
49 ) { |
|
50 return (cxmutstr) {cstring, length}; |
|
51 } |
|
52 |
|
53 cxstring cx_str(const char *cstring) { |
|
54 return (cxstring) {cstring, strlen(cstring)}; |
|
55 } |
|
56 |
|
57 cxstring cx_strn( |
|
58 const char *cstring, |
|
59 size_t length |
|
60 ) { |
|
61 return (cxstring) {cstring, length}; |
|
62 } |
|
63 |
|
64 cxstring cx_strcast(cxmutstr str) { |
|
65 return (cxstring) {str.ptr, str.length}; |
|
66 } |
|
67 |
|
68 void cx_strfree(cxmutstr *str) { |
|
69 free(str->ptr); |
|
70 str->ptr = NULL; |
|
71 str->length = 0; |
|
72 } |
|
73 |
|
74 void cx_strfree_a( |
|
75 CxAllocator const *alloc, |
|
76 cxmutstr *str |
|
77 ) { |
|
78 cxFree(alloc, str->ptr); |
|
79 str->ptr = NULL; |
|
80 str->length = 0; |
|
81 } |
|
82 |
|
83 size_t cx_strlen( |
|
84 size_t count, |
|
85 ... |
|
86 ) { |
|
87 if (count == 0) return 0; |
|
88 |
|
89 va_list ap; |
|
90 va_start(ap, count); |
|
91 size_t size = 0; |
|
92 cx_for_n(i, count) { |
|
93 cxstring str = va_arg(ap, cxstring); |
|
94 size += str.length; |
|
95 } |
|
96 va_end(ap); |
|
97 |
|
98 return size; |
|
99 } |
|
100 |
|
101 cxmutstr cx_strcat_ma( |
|
102 CxAllocator const *alloc, |
|
103 cxmutstr str, |
|
104 size_t count, |
|
105 ... |
|
106 ) { |
|
107 if (count == 0) return str; |
|
108 |
|
109 cxstring *strings = calloc(count, sizeof(cxstring)); |
|
110 if (!strings) abort(); |
|
111 |
|
112 va_list ap; |
|
113 va_start(ap, count); |
|
114 |
|
115 // get all args and overall length |
|
116 size_t slen = str.length; |
|
117 cx_for_n(i, count) { |
|
118 cxstring s = va_arg (ap, cxstring); |
|
119 strings[i] = s; |
|
120 slen += s.length; |
|
121 } |
|
122 va_end(ap); |
|
123 |
|
124 // reallocate or create new string |
|
125 if (str.ptr == NULL) { |
|
126 str.ptr = cxMalloc(alloc, slen + 1); |
|
127 } else { |
|
128 str.ptr = cxRealloc(alloc, str.ptr, slen + 1); |
|
129 } |
|
130 if (str.ptr == NULL) abort(); |
|
131 |
|
132 // concatenate strings |
|
133 size_t pos = str.length; |
|
134 str.length = slen; |
|
135 cx_for_n(i, count) { |
|
136 cxstring s = strings[i]; |
|
137 memcpy(str.ptr + pos, s.ptr, s.length); |
|
138 pos += s.length; |
|
139 } |
|
140 |
|
141 // terminate string |
|
142 str.ptr[str.length] = '\0'; |
|
143 |
|
144 // free temporary array |
|
145 free(strings); |
|
146 |
|
147 return str; |
|
148 } |
|
149 |
|
150 cxstring cx_strsubs( |
|
151 cxstring string, |
|
152 size_t start |
|
153 ) { |
|
154 return cx_strsubsl(string, start, string.length - start); |
|
155 } |
|
156 |
|
157 cxmutstr cx_strsubs_m( |
|
158 cxmutstr string, |
|
159 size_t start |
|
160 ) { |
|
161 return cx_strsubsl_m(string, start, string.length - start); |
|
162 } |
|
163 |
|
164 cxstring cx_strsubsl( |
|
165 cxstring string, |
|
166 size_t start, |
|
167 size_t length |
|
168 ) { |
|
169 if (start > string.length) { |
|
170 return (cxstring) {NULL, 0}; |
|
171 } |
|
172 |
|
173 size_t rem_len = string.length - start; |
|
174 if (length > rem_len) { |
|
175 length = rem_len; |
|
176 } |
|
177 |
|
178 return (cxstring) {string.ptr + start, length}; |
|
179 } |
|
180 |
|
181 cxmutstr cx_strsubsl_m( |
|
182 cxmutstr string, |
|
183 size_t start, |
|
184 size_t length |
|
185 ) { |
|
186 cxstring result = cx_strsubsl(cx_strcast(string), start, length); |
|
187 return (cxmutstr) {(char *) result.ptr, result.length}; |
|
188 } |
|
189 |
|
190 cxstring cx_strchr( |
|
191 cxstring string, |
|
192 int chr |
|
193 ) { |
|
194 chr = 0xFF & chr; |
|
195 // TODO: improve by comparing multiple bytes at once |
|
196 cx_for_n(i, string.length) { |
|
197 if (string.ptr[i] == chr) { |
|
198 return cx_strsubs(string, i); |
|
199 } |
|
200 } |
|
201 return (cxstring) {NULL, 0}; |
|
202 } |
|
203 |
|
204 cxmutstr cx_strchr_m( |
|
205 cxmutstr string, |
|
206 int chr |
|
207 ) { |
|
208 cxstring result = cx_strchr(cx_strcast(string), chr); |
|
209 return (cxmutstr) {(char *) result.ptr, result.length}; |
|
210 } |
|
211 |
|
212 cxstring cx_strrchr( |
|
213 cxstring string, |
|
214 int chr |
|
215 ) { |
|
216 chr = 0xFF & chr; |
|
217 size_t i = string.length; |
|
218 while (i > 0) { |
|
219 i--; |
|
220 // TODO: improve by comparing multiple bytes at once |
|
221 if (string.ptr[i] == chr) { |
|
222 return cx_strsubs(string, i); |
|
223 } |
|
224 } |
|
225 return (cxstring) {NULL, 0}; |
|
226 } |
|
227 |
|
228 cxmutstr cx_strrchr_m( |
|
229 cxmutstr string, |
|
230 int chr |
|
231 ) { |
|
232 cxstring result = cx_strrchr(cx_strcast(string), chr); |
|
233 return (cxmutstr) {(char *) result.ptr, result.length}; |
|
234 } |
|
235 |
|
236 #ifndef CX_STRSTR_SBO_SIZE |
|
237 #define CX_STRSTR_SBO_SIZE 512 |
|
238 #endif |
|
239 |
|
240 cxstring cx_strstr( |
|
241 cxstring haystack, |
|
242 cxstring needle |
|
243 ) { |
|
244 if (needle.length == 0) { |
|
245 return haystack; |
|
246 } |
|
247 |
|
248 // optimize for single-char needles |
|
249 if (needle.length == 1) { |
|
250 return cx_strchr(haystack, *needle.ptr); |
|
251 } |
|
252 |
|
253 /* |
|
254 * IMPORTANT: |
|
255 * Our prefix table contains the prefix length PLUS ONE |
|
256 * this is our decision, because we want to use the full range of size_t. |
|
257 * The original algorithm needs a (-1) at one single place, |
|
258 * and we want to avoid that. |
|
259 */ |
|
260 |
|
261 // local prefix table |
|
262 size_t s_prefix_table[CX_STRSTR_SBO_SIZE]; |
|
263 |
|
264 // check needle length and use appropriate prefix table |
|
265 // if the pattern exceeds static prefix table, allocate on the heap |
|
266 bool useheap = needle.length >= CX_STRSTR_SBO_SIZE; |
|
267 register size_t *ptable = useheap ? calloc(needle.length + 1, |
|
268 sizeof(size_t)) : s_prefix_table; |
|
269 |
|
270 // keep counter in registers |
|
271 register size_t i, j; |
|
272 |
|
273 // fill prefix table |
|
274 i = 0; |
|
275 j = 0; |
|
276 ptable[i] = j; |
|
277 while (i < needle.length) { |
|
278 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { |
|
279 j = ptable[j - 1]; |
|
280 } |
|
281 i++; |
|
282 j++; |
|
283 ptable[i] = j; |
|
284 } |
|
285 |
|
286 // search |
|
287 cxstring result = {NULL, 0}; |
|
288 i = 0; |
|
289 j = 1; |
|
290 while (i < haystack.length) { |
|
291 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { |
|
292 j = ptable[j - 1]; |
|
293 } |
|
294 i++; |
|
295 j++; |
|
296 if (j - 1 == needle.length) { |
|
297 size_t start = i - needle.length; |
|
298 result.ptr = haystack.ptr + start; |
|
299 result.length = haystack.length - start; |
|
300 break; |
|
301 } |
|
302 } |
|
303 |
|
304 // if prefix table was allocated on the heap, free it |
|
305 if (ptable != s_prefix_table) { |
|
306 free(ptable); |
|
307 } |
|
308 |
|
309 return result; |
|
310 } |
|
311 |
|
312 cxmutstr cx_strstr_m( |
|
313 cxmutstr haystack, |
|
314 cxstring needle |
|
315 ) { |
|
316 cxstring result = cx_strstr(cx_strcast(haystack), needle); |
|
317 return (cxmutstr) {(char *) result.ptr, result.length}; |
|
318 } |
|
319 |
|
320 size_t cx_strsplit( |
|
321 cxstring string, |
|
322 cxstring delim, |
|
323 size_t limit, |
|
324 cxstring *output |
|
325 ) { |
|
326 // special case: output limit is zero |
|
327 if (limit == 0) return 0; |
|
328 |
|
329 // special case: delimiter is empty |
|
330 if (delim.length == 0) { |
|
331 output[0] = string; |
|
332 return 1; |
|
333 } |
|
334 |
|
335 // special cases: delimiter is at least as large as the string |
|
336 if (delim.length >= string.length) { |
|
337 // exact match |
|
338 if (cx_strcmp(string, delim) == 0) { |
|
339 output[0] = cx_strn(string.ptr, 0); |
|
340 output[1] = cx_strn(string.ptr + string.length, 0); |
|
341 return 2; |
|
342 } else { |
|
343 // no match possible |
|
344 output[0] = string; |
|
345 return 1; |
|
346 } |
|
347 } |
|
348 |
|
349 size_t n = 0; |
|
350 cxstring curpos = string; |
|
351 while (1) { |
|
352 ++n; |
|
353 cxstring match = cx_strstr(curpos, delim); |
|
354 if (match.length > 0) { |
|
355 // is the limit reached? |
|
356 if (n < limit) { |
|
357 // copy the current string to the array |
|
358 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); |
|
359 output[n - 1] = item; |
|
360 size_t processed = item.length + delim.length; |
|
361 curpos.ptr += processed; |
|
362 curpos.length -= processed; |
|
363 } else { |
|
364 // limit reached, copy the _full_ remaining string |
|
365 output[n - 1] = curpos; |
|
366 break; |
|
367 } |
|
368 } else { |
|
369 // no more matches, copy last string |
|
370 output[n - 1] = curpos; |
|
371 break; |
|
372 } |
|
373 } |
|
374 |
|
375 return n; |
|
376 } |
|
377 |
|
378 size_t cx_strsplit_a( |
|
379 CxAllocator const *allocator, |
|
380 cxstring string, |
|
381 cxstring delim, |
|
382 size_t limit, |
|
383 cxstring **output |
|
384 ) { |
|
385 // find out how many splits we're going to make and allocate memory |
|
386 size_t n = 0; |
|
387 cxstring curpos = string; |
|
388 while (1) { |
|
389 ++n; |
|
390 cxstring match = cx_strstr(curpos, delim); |
|
391 if (match.length > 0) { |
|
392 // is the limit reached? |
|
393 if (n < limit) { |
|
394 size_t processed = match.ptr - curpos.ptr + delim.length; |
|
395 curpos.ptr += processed; |
|
396 curpos.length -= processed; |
|
397 } else { |
|
398 // limit reached |
|
399 break; |
|
400 } |
|
401 } else { |
|
402 // no more matches |
|
403 break; |
|
404 } |
|
405 } |
|
406 *output = cxCalloc(allocator, n, sizeof(cxstring)); |
|
407 return cx_strsplit(string, delim, n, *output); |
|
408 } |
|
409 |
|
410 size_t cx_strsplit_m( |
|
411 cxmutstr string, |
|
412 cxstring delim, |
|
413 size_t limit, |
|
414 cxmutstr *output |
|
415 ) { |
|
416 return cx_strsplit(cx_strcast(string), |
|
417 delim, limit, (cxstring *) output); |
|
418 } |
|
419 |
|
420 size_t cx_strsplit_ma( |
|
421 CxAllocator const *allocator, |
|
422 cxmutstr string, |
|
423 cxstring delim, |
|
424 size_t limit, |
|
425 cxmutstr **output |
|
426 ) { |
|
427 return cx_strsplit_a(allocator, cx_strcast(string), |
|
428 delim, limit, (cxstring **) output); |
|
429 } |
|
430 |
|
431 int cx_strcmp( |
|
432 cxstring s1, |
|
433 cxstring s2 |
|
434 ) { |
|
435 if (s1.length == s2.length) { |
|
436 return memcmp(s1.ptr, s2.ptr, s1.length); |
|
437 } else if (s1.length > s2.length) { |
|
438 return 1; |
|
439 } else { |
|
440 return -1; |
|
441 } |
|
442 } |
|
443 |
|
444 int cx_strcasecmp( |
|
445 cxstring s1, |
|
446 cxstring s2 |
|
447 ) { |
|
448 if (s1.length == s2.length) { |
|
449 #ifdef _WIN32 |
|
450 return _strnicmp(s1.ptr, s2.ptr, s1.length); |
|
451 #else |
|
452 return strncasecmp(s1.ptr, s2.ptr, s1.length); |
|
453 #endif |
|
454 } else if (s1.length > s2.length) { |
|
455 return 1; |
|
456 } else { |
|
457 return -1; |
|
458 } |
|
459 } |
|
460 |
|
461 int cx_strcmp_p( |
|
462 void const *s1, |
|
463 void const *s2 |
|
464 ) { |
|
465 cxstring const *left = s1; |
|
466 cxstring const *right = s2; |
|
467 return cx_strcmp(*left, *right); |
|
468 } |
|
469 |
|
470 int cx_strcasecmp_p( |
|
471 void const *s1, |
|
472 void const *s2 |
|
473 ) { |
|
474 cxstring const *left = s1; |
|
475 cxstring const *right = s2; |
|
476 return cx_strcasecmp(*left, *right); |
|
477 } |
|
478 |
|
479 cxmutstr cx_strdup_a( |
|
480 CxAllocator const *allocator, |
|
481 cxstring string |
|
482 ) { |
|
483 cxmutstr result = { |
|
484 cxMalloc(allocator, string.length + 1), |
|
485 string.length |
|
486 }; |
|
487 if (result.ptr == NULL) { |
|
488 result.length = 0; |
|
489 return result; |
|
490 } |
|
491 memcpy(result.ptr, string.ptr, string.length); |
|
492 result.ptr[string.length] = '\0'; |
|
493 return result; |
|
494 } |
|
495 |
|
496 cxstring cx_strtrim(cxstring string) { |
|
497 cxstring result = string; |
|
498 // TODO: optimize by comparing multiple bytes at once |
|
499 while (result.length > 0 && isspace(*result.ptr)) { |
|
500 result.ptr++; |
|
501 result.length--; |
|
502 } |
|
503 while (result.length > 0 && isspace(result.ptr[result.length - 1])) { |
|
504 result.length--; |
|
505 } |
|
506 return result; |
|
507 } |
|
508 |
|
509 cxmutstr cx_strtrim_m(cxmutstr string) { |
|
510 cxstring result = cx_strtrim(cx_strcast(string)); |
|
511 return (cxmutstr) {(char *) result.ptr, result.length}; |
|
512 } |
|
513 |
|
514 bool cx_strprefix( |
|
515 cxstring string, |
|
516 cxstring prefix |
|
517 ) { |
|
518 if (string.length < prefix.length) return false; |
|
519 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; |
|
520 } |
|
521 |
|
522 bool cx_strsuffix( |
|
523 cxstring string, |
|
524 cxstring suffix |
|
525 ) { |
|
526 if (string.length < suffix.length) return false; |
|
527 return memcmp(string.ptr + string.length - suffix.length, |
|
528 suffix.ptr, suffix.length) == 0; |
|
529 } |
|
530 |
|
531 bool cx_strcaseprefix( |
|
532 cxstring string, |
|
533 cxstring prefix |
|
534 ) { |
|
535 if (string.length < prefix.length) return false; |
|
536 #ifdef _WIN32 |
|
537 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; |
|
538 #else |
|
539 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0; |
|
540 #endif |
|
541 } |
|
542 |
|
543 bool cx_strcasesuffix( |
|
544 cxstring string, |
|
545 cxstring suffix |
|
546 ) { |
|
547 if (string.length < suffix.length) return false; |
|
548 #ifdef _WIN32 |
|
549 return _strnicmp(string.ptr+string.length-suffix.length, |
|
550 suffix.ptr, suffix.length) == 0; |
|
551 #else |
|
552 return strncasecmp(string.ptr + string.length - suffix.length, |
|
553 suffix.ptr, suffix.length) == 0; |
|
554 #endif |
|
555 } |
|
556 |
|
557 void cx_strlower(cxmutstr string) { |
|
558 cx_for_n(i, string.length) { |
|
559 string.ptr[i] = (char) tolower(string.ptr[i]); |
|
560 } |
|
561 } |
|
562 |
|
563 void cx_strupper(cxmutstr string) { |
|
564 cx_for_n(i, string.length) { |
|
565 string.ptr[i] = (char) toupper(string.ptr[i]); |
|
566 } |
|
567 } |
|
568 |
|
569 #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE |
|
570 #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64 |
|
571 #endif |
|
572 |
|
573 struct cx_strreplace_ibuf { |
|
574 size_t *buf; |
|
575 struct cx_strreplace_ibuf *next; |
|
576 unsigned int len; |
|
577 }; |
|
578 |
|
579 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { |
|
580 while (buf) { |
|
581 struct cx_strreplace_ibuf *next = buf->next; |
|
582 free(buf->buf); |
|
583 free(buf); |
|
584 buf = next; |
|
585 } |
|
586 } |
|
587 |
|
588 cxmutstr cx_strreplacen_a( |
|
589 CxAllocator const *allocator, |
|
590 cxstring str, |
|
591 cxstring pattern, |
|
592 cxstring replacement, |
|
593 size_t replmax |
|
594 ) { |
|
595 |
|
596 if (pattern.length == 0 || pattern.length > str.length || replmax == 0) |
|
597 return cx_strdup_a(allocator, str); |
|
598 |
|
599 // Compute expected buffer length |
|
600 size_t ibufmax = str.length / pattern.length; |
|
601 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; |
|
602 if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) { |
|
603 ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE; |
|
604 } |
|
605 |
|
606 // Allocate first index buffer |
|
607 struct cx_strreplace_ibuf *firstbuf, *curbuf; |
|
608 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); |
|
609 if (!firstbuf) return cx_mutstrn(NULL, 0); |
|
610 firstbuf->buf = calloc(ibuflen, sizeof(size_t)); |
|
611 if (!firstbuf->buf) { |
|
612 free(firstbuf); |
|
613 return cx_mutstrn(NULL, 0); |
|
614 } |
|
615 |
|
616 // Search occurrences |
|
617 cxstring searchstr = str; |
|
618 size_t found = 0; |
|
619 do { |
|
620 cxstring match = cx_strstr(searchstr, pattern); |
|
621 if (match.length > 0) { |
|
622 // Allocate next buffer in chain, if required |
|
623 if (curbuf->len == ibuflen) { |
|
624 struct cx_strreplace_ibuf *nextbuf = |
|
625 calloc(1, sizeof(struct cx_strreplace_ibuf)); |
|
626 if (!nextbuf) { |
|
627 cx_strrepl_free_ibuf(firstbuf); |
|
628 return cx_mutstrn(NULL, 0); |
|
629 } |
|
630 nextbuf->buf = calloc(ibuflen, sizeof(size_t)); |
|
631 if (!nextbuf->buf) { |
|
632 free(nextbuf); |
|
633 cx_strrepl_free_ibuf(firstbuf); |
|
634 return cx_mutstrn(NULL, 0); |
|
635 } |
|
636 curbuf->next = nextbuf; |
|
637 curbuf = nextbuf; |
|
638 } |
|
639 |
|
640 // Record match index |
|
641 found++; |
|
642 size_t idx = match.ptr - str.ptr; |
|
643 curbuf->buf[curbuf->len++] = idx; |
|
644 searchstr.ptr = match.ptr + pattern.length; |
|
645 searchstr.length = str.length - idx - pattern.length; |
|
646 } else { |
|
647 break; |
|
648 } |
|
649 } while (searchstr.length > 0 && found < replmax); |
|
650 |
|
651 // Allocate result string |
|
652 cxmutstr result; |
|
653 { |
|
654 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; |
|
655 size_t rcount = 0; |
|
656 curbuf = firstbuf; |
|
657 do { |
|
658 rcount += curbuf->len; |
|
659 curbuf = curbuf->next; |
|
660 } while (curbuf); |
|
661 result.length = str.length + rcount * adjlen; |
|
662 result.ptr = cxMalloc(allocator, result.length + 1); |
|
663 if (!result.ptr) { |
|
664 cx_strrepl_free_ibuf(firstbuf); |
|
665 return cx_mutstrn(NULL, 0); |
|
666 } |
|
667 } |
|
668 |
|
669 // Build result string |
|
670 curbuf = firstbuf; |
|
671 size_t srcidx = 0; |
|
672 char *destptr = result.ptr; |
|
673 do { |
|
674 for (size_t i = 0; i < curbuf->len; i++) { |
|
675 // Copy source part up to next match |
|
676 size_t idx = curbuf->buf[i]; |
|
677 size_t srclen = idx - srcidx; |
|
678 if (srclen > 0) { |
|
679 memcpy(destptr, str.ptr + srcidx, srclen); |
|
680 destptr += srclen; |
|
681 srcidx += srclen; |
|
682 } |
|
683 |
|
684 // Copy the replacement and skip the source pattern |
|
685 srcidx += pattern.length; |
|
686 memcpy(destptr, replacement.ptr, replacement.length); |
|
687 destptr += replacement.length; |
|
688 } |
|
689 curbuf = curbuf->next; |
|
690 } while (curbuf); |
|
691 memcpy(destptr, str.ptr + srcidx, str.length - srcidx); |
|
692 |
|
693 // Result is guaranteed to be zero-terminated |
|
694 result.ptr[result.length] = '\0'; |
|
695 |
|
696 // Free index buffer |
|
697 cx_strrepl_free_ibuf(firstbuf); |
|
698 |
|
699 return result; |
|
700 } |
|
701 |
|
702 CxStrtokCtx cx_strtok( |
|
703 cxstring str, |
|
704 cxstring delim, |
|
705 size_t limit |
|
706 ) { |
|
707 CxStrtokCtx ctx; |
|
708 ctx.str = str; |
|
709 ctx.delim = delim; |
|
710 ctx.limit = limit; |
|
711 ctx.pos = 0; |
|
712 ctx.next_pos = 0; |
|
713 ctx.delim_pos = 0; |
|
714 ctx.found = 0; |
|
715 ctx.delim_more = NULL; |
|
716 ctx.delim_more_count = 0; |
|
717 return ctx; |
|
718 } |
|
719 |
|
720 CxStrtokCtx cx_strtok_m( |
|
721 cxmutstr str, |
|
722 cxstring delim, |
|
723 size_t limit |
|
724 ) { |
|
725 return cx_strtok(cx_strcast(str), delim, limit); |
|
726 } |
|
727 |
|
728 bool cx_strtok_next( |
|
729 CxStrtokCtx *ctx, |
|
730 cxstring *token |
|
731 ) { |
|
732 // abortion criteria |
|
733 if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) { |
|
734 return false; |
|
735 } |
|
736 |
|
737 // determine the search start |
|
738 cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos); |
|
739 |
|
740 // search the next delimiter |
|
741 cxstring delim = cx_strstr(haystack, ctx->delim); |
|
742 |
|
743 // if found, make delim capture exactly the delimiter |
|
744 if (delim.length > 0) { |
|
745 delim.length = ctx->delim.length; |
|
746 } |
|
747 |
|
748 // if more delimiters are specified, check them now |
|
749 if (ctx->delim_more_count > 0) { |
|
750 cx_for_n(i, ctx->delim_more_count) { |
|
751 cxstring d = cx_strstr(haystack, ctx->delim_more[i]); |
|
752 if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) { |
|
753 delim.ptr = d.ptr; |
|
754 delim.length = ctx->delim_more[i].length; |
|
755 } |
|
756 } |
|
757 } |
|
758 |
|
759 // store the token information and adjust the context |
|
760 ctx->found++; |
|
761 ctx->pos = ctx->next_pos; |
|
762 token->ptr = &ctx->str.ptr[ctx->pos]; |
|
763 ctx->delim_pos = delim.length == 0 ? |
|
764 ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr); |
|
765 token->length = ctx->delim_pos - ctx->pos; |
|
766 ctx->next_pos = ctx->delim_pos + delim.length; |
|
767 |
|
768 return true; |
|
769 } |
|
770 |
|
771 bool cx_strtok_next_m( |
|
772 CxStrtokCtx *ctx, |
|
773 cxmutstr *token |
|
774 ) { |
|
775 return cx_strtok_next(ctx, (cxstring *) token); |
|
776 } |
|
777 |
|
778 void cx_strtok_delim( |
|
779 CxStrtokCtx *ctx, |
|
780 cxstring const *delim, |
|
781 size_t count |
|
782 ) { |
|
783 ctx->delim_more = delim; |
|
784 ctx->delim_more_count = count; |
|
785 } |