| |
1 /* |
| |
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
| |
3 * |
| |
4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. |
| |
5 * |
| |
6 * Redistribution and use in source and binary forms, with or without |
| |
7 * modification, are permitted provided that the following conditions are met: |
| |
8 * |
| |
9 * 1. Redistributions of source code must retain the above copyright |
| |
10 * notice, this list of conditions and the following disclaimer. |
| |
11 * |
| |
12 * 2. Redistributions in binary form must reproduce the above copyright |
| |
13 * notice, this list of conditions and the following disclaimer in the |
| |
14 * documentation and/or other materials provided with the distribution. |
| |
15 * |
| |
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| |
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| |
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| |
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| |
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| |
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| |
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| |
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| |
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| |
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| |
26 * POSSIBILITY OF SUCH DAMAGE. |
| |
27 */ |
| |
28 |
| |
29 #include "cx/string.h" |
| |
30 #include "cx/utils.h" |
| |
31 |
| |
32 #include <string.h> |
| |
33 #include <stdarg.h> |
| |
34 #include <ctype.h> |
| |
35 |
| |
36 #ifndef _WIN32 |
| |
37 |
| |
38 #include <strings.h> // for strncasecmp() |
| |
39 |
| |
40 #endif // _WIN32 |
| |
41 |
| |
42 cxmutstr cx_mutstr(char *cstring) { |
| |
43 return (cxmutstr) {cstring, strlen(cstring)}; |
| |
44 } |
| |
45 |
| |
46 cxmutstr cx_mutstrn( |
| |
47 char *cstring, |
| |
48 size_t length |
| |
49 ) { |
| |
50 return (cxmutstr) {cstring, length}; |
| |
51 } |
| |
52 |
| |
53 cxstring cx_str(const char *cstring) { |
| |
54 return (cxstring) {cstring, strlen(cstring)}; |
| |
55 } |
| |
56 |
| |
57 cxstring cx_strn( |
| |
58 const char *cstring, |
| |
59 size_t length |
| |
60 ) { |
| |
61 return (cxstring) {cstring, length}; |
| |
62 } |
| |
63 |
| |
64 cxstring cx_strcast(cxmutstr str) { |
| |
65 return (cxstring) {str.ptr, str.length}; |
| |
66 } |
| |
67 |
| |
68 void cx_strfree(cxmutstr *str) { |
| |
69 free(str->ptr); |
| |
70 str->ptr = NULL; |
| |
71 str->length = 0; |
| |
72 } |
| |
73 |
| |
74 void cx_strfree_a( |
| |
75 CxAllocator const *alloc, |
| |
76 cxmutstr *str |
| |
77 ) { |
| |
78 cxFree(alloc, str->ptr); |
| |
79 str->ptr = NULL; |
| |
80 str->length = 0; |
| |
81 } |
| |
82 |
| |
83 size_t cx_strlen( |
| |
84 size_t count, |
| |
85 ... |
| |
86 ) { |
| |
87 if (count == 0) return 0; |
| |
88 |
| |
89 va_list ap; |
| |
90 va_start(ap, count); |
| |
91 size_t size = 0; |
| |
92 cx_for_n(i, count) { |
| |
93 cxstring str = va_arg(ap, cxstring); |
| |
94 size += str.length; |
| |
95 } |
| |
96 va_end(ap); |
| |
97 |
| |
98 return size; |
| |
99 } |
| |
100 |
| |
101 cxmutstr cx_strcat_ma( |
| |
102 CxAllocator const *alloc, |
| |
103 cxmutstr str, |
| |
104 size_t count, |
| |
105 ... |
| |
106 ) { |
| |
107 if (count == 0) return str; |
| |
108 |
| |
109 cxstring *strings = calloc(count, sizeof(cxstring)); |
| |
110 if (!strings) abort(); |
| |
111 |
| |
112 va_list ap; |
| |
113 va_start(ap, count); |
| |
114 |
| |
115 // get all args and overall length |
| |
116 size_t slen = str.length; |
| |
117 cx_for_n(i, count) { |
| |
118 cxstring s = va_arg (ap, cxstring); |
| |
119 strings[i] = s; |
| |
120 slen += s.length; |
| |
121 } |
| |
122 va_end(ap); |
| |
123 |
| |
124 // reallocate or create new string |
| |
125 if (str.ptr == NULL) { |
| |
126 str.ptr = cxMalloc(alloc, slen + 1); |
| |
127 } else { |
| |
128 str.ptr = cxRealloc(alloc, str.ptr, slen + 1); |
| |
129 } |
| |
130 if (str.ptr == NULL) abort(); |
| |
131 |
| |
132 // concatenate strings |
| |
133 size_t pos = str.length; |
| |
134 str.length = slen; |
| |
135 cx_for_n(i, count) { |
| |
136 cxstring s = strings[i]; |
| |
137 memcpy(str.ptr + pos, s.ptr, s.length); |
| |
138 pos += s.length; |
| |
139 } |
| |
140 |
| |
141 // terminate string |
| |
142 str.ptr[str.length] = '\0'; |
| |
143 |
| |
144 // free temporary array |
| |
145 free(strings); |
| |
146 |
| |
147 return str; |
| |
148 } |
| |
149 |
| |
150 cxstring cx_strsubs( |
| |
151 cxstring string, |
| |
152 size_t start |
| |
153 ) { |
| |
154 return cx_strsubsl(string, start, string.length - start); |
| |
155 } |
| |
156 |
| |
157 cxmutstr cx_strsubs_m( |
| |
158 cxmutstr string, |
| |
159 size_t start |
| |
160 ) { |
| |
161 return cx_strsubsl_m(string, start, string.length - start); |
| |
162 } |
| |
163 |
| |
164 cxstring cx_strsubsl( |
| |
165 cxstring string, |
| |
166 size_t start, |
| |
167 size_t length |
| |
168 ) { |
| |
169 if (start > string.length) { |
| |
170 return (cxstring) {NULL, 0}; |
| |
171 } |
| |
172 |
| |
173 size_t rem_len = string.length - start; |
| |
174 if (length > rem_len) { |
| |
175 length = rem_len; |
| |
176 } |
| |
177 |
| |
178 return (cxstring) {string.ptr + start, length}; |
| |
179 } |
| |
180 |
| |
181 cxmutstr cx_strsubsl_m( |
| |
182 cxmutstr string, |
| |
183 size_t start, |
| |
184 size_t length |
| |
185 ) { |
| |
186 cxstring result = cx_strsubsl(cx_strcast(string), start, length); |
| |
187 return (cxmutstr) {(char *) result.ptr, result.length}; |
| |
188 } |
| |
189 |
| |
190 cxstring cx_strchr( |
| |
191 cxstring string, |
| |
192 int chr |
| |
193 ) { |
| |
194 chr = 0xFF & chr; |
| |
195 // TODO: improve by comparing multiple bytes at once |
| |
196 cx_for_n(i, string.length) { |
| |
197 if (string.ptr[i] == chr) { |
| |
198 return cx_strsubs(string, i); |
| |
199 } |
| |
200 } |
| |
201 return (cxstring) {NULL, 0}; |
| |
202 } |
| |
203 |
| |
204 cxmutstr cx_strchr_m( |
| |
205 cxmutstr string, |
| |
206 int chr |
| |
207 ) { |
| |
208 cxstring result = cx_strchr(cx_strcast(string), chr); |
| |
209 return (cxmutstr) {(char *) result.ptr, result.length}; |
| |
210 } |
| |
211 |
| |
212 cxstring cx_strrchr( |
| |
213 cxstring string, |
| |
214 int chr |
| |
215 ) { |
| |
216 chr = 0xFF & chr; |
| |
217 size_t i = string.length; |
| |
218 while (i > 0) { |
| |
219 i--; |
| |
220 // TODO: improve by comparing multiple bytes at once |
| |
221 if (string.ptr[i] == chr) { |
| |
222 return cx_strsubs(string, i); |
| |
223 } |
| |
224 } |
| |
225 return (cxstring) {NULL, 0}; |
| |
226 } |
| |
227 |
| |
228 cxmutstr cx_strrchr_m( |
| |
229 cxmutstr string, |
| |
230 int chr |
| |
231 ) { |
| |
232 cxstring result = cx_strrchr(cx_strcast(string), chr); |
| |
233 return (cxmutstr) {(char *) result.ptr, result.length}; |
| |
234 } |
| |
235 |
| |
236 #ifndef CX_STRSTR_SBO_SIZE |
| |
237 #define CX_STRSTR_SBO_SIZE 512 |
| |
238 #endif |
| |
239 unsigned const cx_strstr_sbo_size = CX_STRSTR_SBO_SIZE; |
| |
240 |
| |
241 cxstring cx_strstr( |
| |
242 cxstring haystack, |
| |
243 cxstring needle |
| |
244 ) { |
| |
245 if (needle.length == 0) { |
| |
246 return haystack; |
| |
247 } |
| |
248 |
| |
249 // optimize for single-char needles |
| |
250 if (needle.length == 1) { |
| |
251 return cx_strchr(haystack, *needle.ptr); |
| |
252 } |
| |
253 |
| |
254 /* |
| |
255 * IMPORTANT: |
| |
256 * Our prefix table contains the prefix length PLUS ONE |
| |
257 * this is our decision, because we want to use the full range of size_t. |
| |
258 * The original algorithm needs a (-1) at one single place, |
| |
259 * and we want to avoid that. |
| |
260 */ |
| |
261 |
| |
262 // local prefix table |
| |
263 size_t s_prefix_table[CX_STRSTR_SBO_SIZE]; |
| |
264 |
| |
265 // check needle length and use appropriate prefix table |
| |
266 // if the pattern exceeds static prefix table, allocate on the heap |
| |
267 bool useheap = needle.length >= CX_STRSTR_SBO_SIZE; |
| |
268 register size_t *ptable = useheap ? calloc(needle.length + 1, |
| |
269 sizeof(size_t)) : s_prefix_table; |
| |
270 |
| |
271 // keep counter in registers |
| |
272 register size_t i, j; |
| |
273 |
| |
274 // fill prefix table |
| |
275 i = 0; |
| |
276 j = 0; |
| |
277 ptable[i] = j; |
| |
278 while (i < needle.length) { |
| |
279 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { |
| |
280 j = ptable[j - 1]; |
| |
281 } |
| |
282 i++; |
| |
283 j++; |
| |
284 ptable[i] = j; |
| |
285 } |
| |
286 |
| |
287 // search |
| |
288 cxstring result = {NULL, 0}; |
| |
289 i = 0; |
| |
290 j = 1; |
| |
291 while (i < haystack.length) { |
| |
292 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { |
| |
293 j = ptable[j - 1]; |
| |
294 } |
| |
295 i++; |
| |
296 j++; |
| |
297 if (j - 1 == needle.length) { |
| |
298 size_t start = i - needle.length; |
| |
299 result.ptr = haystack.ptr + start; |
| |
300 result.length = haystack.length - start; |
| |
301 break; |
| |
302 } |
| |
303 } |
| |
304 |
| |
305 // if prefix table was allocated on the heap, free it |
| |
306 if (ptable != s_prefix_table) { |
| |
307 free(ptable); |
| |
308 } |
| |
309 |
| |
310 return result; |
| |
311 } |
| |
312 |
| |
313 cxmutstr cx_strstr_m( |
| |
314 cxmutstr haystack, |
| |
315 cxstring needle |
| |
316 ) { |
| |
317 cxstring result = cx_strstr(cx_strcast(haystack), needle); |
| |
318 return (cxmutstr) {(char *) result.ptr, result.length}; |
| |
319 } |
| |
320 |
| |
321 size_t cx_strsplit( |
| |
322 cxstring string, |
| |
323 cxstring delim, |
| |
324 size_t limit, |
| |
325 cxstring *output |
| |
326 ) { |
| |
327 // special case: output limit is zero |
| |
328 if (limit == 0) return 0; |
| |
329 |
| |
330 // special case: delimiter is empty |
| |
331 if (delim.length == 0) { |
| |
332 output[0] = string; |
| |
333 return 1; |
| |
334 } |
| |
335 |
| |
336 // special cases: delimiter is at least as large as the string |
| |
337 if (delim.length >= string.length) { |
| |
338 // exact match |
| |
339 if (cx_strcmp(string, delim) == 0) { |
| |
340 output[0] = cx_strn(string.ptr, 0); |
| |
341 output[1] = cx_strn(string.ptr + string.length, 0); |
| |
342 return 2; |
| |
343 } else { |
| |
344 // no match possible |
| |
345 output[0] = string; |
| |
346 return 1; |
| |
347 } |
| |
348 } |
| |
349 |
| |
350 size_t n = 0; |
| |
351 cxstring curpos = string; |
| |
352 while (1) { |
| |
353 ++n; |
| |
354 cxstring match = cx_strstr(curpos, delim); |
| |
355 if (match.length > 0) { |
| |
356 // is the limit reached? |
| |
357 if (n < limit) { |
| |
358 // copy the current string to the array |
| |
359 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); |
| |
360 output[n - 1] = item; |
| |
361 size_t processed = item.length + delim.length; |
| |
362 curpos.ptr += processed; |
| |
363 curpos.length -= processed; |
| |
364 } else { |
| |
365 // limit reached, copy the _full_ remaining string |
| |
366 output[n - 1] = curpos; |
| |
367 break; |
| |
368 } |
| |
369 } else { |
| |
370 // no more matches, copy last string |
| |
371 output[n - 1] = curpos; |
| |
372 break; |
| |
373 } |
| |
374 } |
| |
375 |
| |
376 return n; |
| |
377 } |
| |
378 |
| |
379 size_t cx_strsplit_a( |
| |
380 CxAllocator const *allocator, |
| |
381 cxstring string, |
| |
382 cxstring delim, |
| |
383 size_t limit, |
| |
384 cxstring **output |
| |
385 ) { |
| |
386 // find out how many splits we're going to make and allocate memory |
| |
387 size_t n = 0; |
| |
388 cxstring curpos = string; |
| |
389 while (1) { |
| |
390 ++n; |
| |
391 cxstring match = cx_strstr(curpos, delim); |
| |
392 if (match.length > 0) { |
| |
393 // is the limit reached? |
| |
394 if (n < limit) { |
| |
395 size_t processed = match.ptr - curpos.ptr + delim.length; |
| |
396 curpos.ptr += processed; |
| |
397 curpos.length -= processed; |
| |
398 } else { |
| |
399 // limit reached |
| |
400 break; |
| |
401 } |
| |
402 } else { |
| |
403 // no more matches |
| |
404 break; |
| |
405 } |
| |
406 } |
| |
407 *output = cxCalloc(allocator, n, sizeof(cxstring)); |
| |
408 return cx_strsplit(string, delim, n, *output); |
| |
409 } |
| |
410 |
| |
411 size_t cx_strsplit_m( |
| |
412 cxmutstr string, |
| |
413 cxstring delim, |
| |
414 size_t limit, |
| |
415 cxmutstr *output |
| |
416 ) { |
| |
417 return cx_strsplit(cx_strcast(string), |
| |
418 delim, limit, (cxstring *) output); |
| |
419 } |
| |
420 |
| |
421 size_t cx_strsplit_ma( |
| |
422 CxAllocator const *allocator, |
| |
423 cxmutstr string, |
| |
424 cxstring delim, |
| |
425 size_t limit, |
| |
426 cxmutstr **output |
| |
427 ) { |
| |
428 return cx_strsplit_a(allocator, cx_strcast(string), |
| |
429 delim, limit, (cxstring **) output); |
| |
430 } |
| |
431 |
| |
432 int cx_strcmp( |
| |
433 cxstring s1, |
| |
434 cxstring s2 |
| |
435 ) { |
| |
436 if (s1.length == s2.length) { |
| |
437 return memcmp(s1.ptr, s2.ptr, s1.length); |
| |
438 } else if (s1.length > s2.length) { |
| |
439 return 1; |
| |
440 } else { |
| |
441 return -1; |
| |
442 } |
| |
443 } |
| |
444 |
| |
445 int cx_strcasecmp( |
| |
446 cxstring s1, |
| |
447 cxstring s2 |
| |
448 ) { |
| |
449 if (s1.length == s2.length) { |
| |
450 #ifdef _WIN32 |
| |
451 return _strnicmp(s1.ptr, s2.ptr, s1.length); |
| |
452 #else |
| |
453 return strncasecmp(s1.ptr, s2.ptr, s1.length); |
| |
454 #endif |
| |
455 } else if (s1.length > s2.length) { |
| |
456 return 1; |
| |
457 } else { |
| |
458 return -1; |
| |
459 } |
| |
460 } |
| |
461 |
| |
462 int cx_strcmp_p( |
| |
463 void const *s1, |
| |
464 void const *s2 |
| |
465 ) { |
| |
466 cxstring const *left = s1; |
| |
467 cxstring const *right = s2; |
| |
468 return cx_strcmp(*left, *right); |
| |
469 } |
| |
470 |
| |
471 int cx_strcasecmp_p( |
| |
472 void const *s1, |
| |
473 void const *s2 |
| |
474 ) { |
| |
475 cxstring const *left = s1; |
| |
476 cxstring const *right = s2; |
| |
477 return cx_strcasecmp(*left, *right); |
| |
478 } |
| |
479 |
| |
480 cxmutstr cx_strdup_a( |
| |
481 CxAllocator const *allocator, |
| |
482 cxstring string |
| |
483 ) { |
| |
484 cxmutstr result = { |
| |
485 cxMalloc(allocator, string.length + 1), |
| |
486 string.length |
| |
487 }; |
| |
488 if (result.ptr == NULL) { |
| |
489 result.length = 0; |
| |
490 return result; |
| |
491 } |
| |
492 memcpy(result.ptr, string.ptr, string.length); |
| |
493 result.ptr[string.length] = '\0'; |
| |
494 return result; |
| |
495 } |
| |
496 |
| |
497 cxstring cx_strtrim(cxstring string) { |
| |
498 cxstring result = string; |
| |
499 // TODO: optimize by comparing multiple bytes at once |
| |
500 while (result.length > 0 && isspace(*result.ptr)) { |
| |
501 result.ptr++; |
| |
502 result.length--; |
| |
503 } |
| |
504 while (result.length > 0 && isspace(result.ptr[result.length - 1])) { |
| |
505 result.length--; |
| |
506 } |
| |
507 return result; |
| |
508 } |
| |
509 |
| |
510 cxmutstr cx_strtrim_m(cxmutstr string) { |
| |
511 cxstring result = cx_strtrim(cx_strcast(string)); |
| |
512 return (cxmutstr) {(char *) result.ptr, result.length}; |
| |
513 } |
| |
514 |
| |
515 bool cx_strprefix( |
| |
516 cxstring string, |
| |
517 cxstring prefix |
| |
518 ) { |
| |
519 if (string.length < prefix.length) return false; |
| |
520 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; |
| |
521 } |
| |
522 |
| |
523 bool cx_strsuffix( |
| |
524 cxstring string, |
| |
525 cxstring suffix |
| |
526 ) { |
| |
527 if (string.length < suffix.length) return false; |
| |
528 return memcmp(string.ptr + string.length - suffix.length, |
| |
529 suffix.ptr, suffix.length) == 0; |
| |
530 } |
| |
531 |
| |
532 bool cx_strcaseprefix( |
| |
533 cxstring string, |
| |
534 cxstring prefix |
| |
535 ) { |
| |
536 if (string.length < prefix.length) return false; |
| |
537 #ifdef _WIN32 |
| |
538 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; |
| |
539 #else |
| |
540 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0; |
| |
541 #endif |
| |
542 } |
| |
543 |
| |
544 bool cx_strcasesuffix( |
| |
545 cxstring string, |
| |
546 cxstring suffix |
| |
547 ) { |
| |
548 if (string.length < suffix.length) return false; |
| |
549 #ifdef _WIN32 |
| |
550 return _strnicmp(string.ptr+string.length-suffix.length, |
| |
551 suffix.ptr, suffix.length) == 0; |
| |
552 #else |
| |
553 return strncasecmp(string.ptr + string.length - suffix.length, |
| |
554 suffix.ptr, suffix.length) == 0; |
| |
555 #endif |
| |
556 } |
| |
557 |
| |
558 void cx_strlower(cxmutstr string) { |
| |
559 cx_for_n(i, string.length) { |
| |
560 string.ptr[i] = (char) tolower(string.ptr[i]); |
| |
561 } |
| |
562 } |
| |
563 |
| |
564 void cx_strupper(cxmutstr string) { |
| |
565 cx_for_n(i, string.length) { |
| |
566 string.ptr[i] = (char) toupper(string.ptr[i]); |
| |
567 } |
| |
568 } |
| |
569 |
| |
570 #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE |
| |
571 #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64 |
| |
572 #endif |
| |
573 |
| |
574 struct cx_strreplace_ibuf { |
| |
575 size_t *buf; |
| |
576 struct cx_strreplace_ibuf *next; |
| |
577 unsigned int len; |
| |
578 }; |
| |
579 |
| |
580 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { |
| |
581 while (buf) { |
| |
582 struct cx_strreplace_ibuf *next = buf->next; |
| |
583 free(buf->buf); |
| |
584 free(buf); |
| |
585 buf = next; |
| |
586 } |
| |
587 } |
| |
588 |
| |
589 cxmutstr cx_strreplacen_a( |
| |
590 CxAllocator const *allocator, |
| |
591 cxstring str, |
| |
592 cxstring pattern, |
| |
593 cxstring replacement, |
| |
594 size_t replmax |
| |
595 ) { |
| |
596 |
| |
597 if (pattern.length == 0 || pattern.length > str.length || replmax == 0) |
| |
598 return cx_strdup_a(allocator, str); |
| |
599 |
| |
600 // Compute expected buffer length |
| |
601 size_t ibufmax = str.length / pattern.length; |
| |
602 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; |
| |
603 if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) { |
| |
604 ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE; |
| |
605 } |
| |
606 |
| |
607 // Allocate first index buffer |
| |
608 struct cx_strreplace_ibuf *firstbuf, *curbuf; |
| |
609 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); |
| |
610 if (!firstbuf) return cx_mutstrn(NULL, 0); |
| |
611 firstbuf->buf = calloc(ibuflen, sizeof(size_t)); |
| |
612 if (!firstbuf->buf) { |
| |
613 free(firstbuf); |
| |
614 return cx_mutstrn(NULL, 0); |
| |
615 } |
| |
616 |
| |
617 // Search occurrences |
| |
618 cxstring searchstr = str; |
| |
619 size_t found = 0; |
| |
620 do { |
| |
621 cxstring match = cx_strstr(searchstr, pattern); |
| |
622 if (match.length > 0) { |
| |
623 // Allocate next buffer in chain, if required |
| |
624 if (curbuf->len == ibuflen) { |
| |
625 struct cx_strreplace_ibuf *nextbuf = |
| |
626 calloc(1, sizeof(struct cx_strreplace_ibuf)); |
| |
627 if (!nextbuf) { |
| |
628 cx_strrepl_free_ibuf(firstbuf); |
| |
629 return cx_mutstrn(NULL, 0); |
| |
630 } |
| |
631 nextbuf->buf = calloc(ibuflen, sizeof(size_t)); |
| |
632 if (!nextbuf->buf) { |
| |
633 free(nextbuf); |
| |
634 cx_strrepl_free_ibuf(firstbuf); |
| |
635 return cx_mutstrn(NULL, 0); |
| |
636 } |
| |
637 curbuf->next = nextbuf; |
| |
638 curbuf = nextbuf; |
| |
639 } |
| |
640 |
| |
641 // Record match index |
| |
642 found++; |
| |
643 size_t idx = match.ptr - str.ptr; |
| |
644 curbuf->buf[curbuf->len++] = idx; |
| |
645 searchstr.ptr = match.ptr + pattern.length; |
| |
646 searchstr.length = str.length - idx - pattern.length; |
| |
647 } else { |
| |
648 break; |
| |
649 } |
| |
650 } while (searchstr.length > 0 && found < replmax); |
| |
651 |
| |
652 // Allocate result string |
| |
653 cxmutstr result; |
| |
654 { |
| |
655 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; |
| |
656 size_t rcount = 0; |
| |
657 curbuf = firstbuf; |
| |
658 do { |
| |
659 rcount += curbuf->len; |
| |
660 curbuf = curbuf->next; |
| |
661 } while (curbuf); |
| |
662 result.length = str.length + rcount * adjlen; |
| |
663 result.ptr = cxMalloc(allocator, result.length + 1); |
| |
664 if (!result.ptr) { |
| |
665 cx_strrepl_free_ibuf(firstbuf); |
| |
666 return cx_mutstrn(NULL, 0); |
| |
667 } |
| |
668 } |
| |
669 |
| |
670 // Build result string |
| |
671 curbuf = firstbuf; |
| |
672 size_t srcidx = 0; |
| |
673 char *destptr = result.ptr; |
| |
674 do { |
| |
675 for (size_t i = 0; i < curbuf->len; i++) { |
| |
676 // Copy source part up to next match |
| |
677 size_t idx = curbuf->buf[i]; |
| |
678 size_t srclen = idx - srcidx; |
| |
679 if (srclen > 0) { |
| |
680 memcpy(destptr, str.ptr + srcidx, srclen); |
| |
681 destptr += srclen; |
| |
682 srcidx += srclen; |
| |
683 } |
| |
684 |
| |
685 // Copy the replacement and skip the source pattern |
| |
686 srcidx += pattern.length; |
| |
687 memcpy(destptr, replacement.ptr, replacement.length); |
| |
688 destptr += replacement.length; |
| |
689 } |
| |
690 curbuf = curbuf->next; |
| |
691 } while (curbuf); |
| |
692 memcpy(destptr, str.ptr + srcidx, str.length - srcidx); |
| |
693 |
| |
694 // Result is guaranteed to be zero-terminated |
| |
695 result.ptr[result.length] = '\0'; |
| |
696 |
| |
697 // Free index buffer |
| |
698 cx_strrepl_free_ibuf(firstbuf); |
| |
699 |
| |
700 return result; |
| |
701 } |
| |
702 |
| |
703 CxStrtokCtx cx_strtok( |
| |
704 cxstring str, |
| |
705 cxstring delim, |
| |
706 size_t limit |
| |
707 ) { |
| |
708 CxStrtokCtx ctx; |
| |
709 ctx.str = str; |
| |
710 ctx.delim = delim; |
| |
711 ctx.limit = limit; |
| |
712 ctx.pos = 0; |
| |
713 ctx.next_pos = 0; |
| |
714 ctx.delim_pos = 0; |
| |
715 ctx.found = 0; |
| |
716 ctx.delim_more = NULL; |
| |
717 ctx.delim_more_count = 0; |
| |
718 return ctx; |
| |
719 } |
| |
720 |
| |
721 CxStrtokCtx cx_strtok_m( |
| |
722 cxmutstr str, |
| |
723 cxstring delim, |
| |
724 size_t limit |
| |
725 ) { |
| |
726 return cx_strtok(cx_strcast(str), delim, limit); |
| |
727 } |
| |
728 |
| |
729 bool cx_strtok_next( |
| |
730 CxStrtokCtx *ctx, |
| |
731 cxstring *token |
| |
732 ) { |
| |
733 // abortion criteria |
| |
734 if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) { |
| |
735 return false; |
| |
736 } |
| |
737 |
| |
738 // determine the search start |
| |
739 cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos); |
| |
740 |
| |
741 // search the next delimiter |
| |
742 cxstring delim = cx_strstr(haystack, ctx->delim); |
| |
743 |
| |
744 // if found, make delim capture exactly the delimiter |
| |
745 if (delim.length > 0) { |
| |
746 delim.length = ctx->delim.length; |
| |
747 } |
| |
748 |
| |
749 // if more delimiters are specified, check them now |
| |
750 if (ctx->delim_more_count > 0) { |
| |
751 cx_for_n(i, ctx->delim_more_count) { |
| |
752 cxstring d = cx_strstr(haystack, ctx->delim_more[i]); |
| |
753 if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) { |
| |
754 delim.ptr = d.ptr; |
| |
755 delim.length = ctx->delim_more[i].length; |
| |
756 } |
| |
757 } |
| |
758 } |
| |
759 |
| |
760 // store the token information and adjust the context |
| |
761 ctx->found++; |
| |
762 ctx->pos = ctx->next_pos; |
| |
763 token->ptr = &ctx->str.ptr[ctx->pos]; |
| |
764 ctx->delim_pos = delim.length == 0 ? |
| |
765 ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr); |
| |
766 token->length = ctx->delim_pos - ctx->pos; |
| |
767 ctx->next_pos = ctx->delim_pos + delim.length; |
| |
768 |
| |
769 return true; |
| |
770 } |
| |
771 |
| |
772 bool cx_strtok_next_m( |
| |
773 CxStrtokCtx *ctx, |
| |
774 cxmutstr *token |
| |
775 ) { |
| |
776 return cx_strtok_next(ctx, (cxstring *) token); |
| |
777 } |
| |
778 |
| |
779 void cx_strtok_delim( |
| |
780 CxStrtokCtx *ctx, |
| |
781 cxstring const *delim, |
| |
782 size_t count |
| |
783 ) { |
| |
784 ctx->delim_more = delim; |
| |
785 ctx->delim_more_count = count; |
| |
786 } |