|
1 /* |
|
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
|
3 * |
|
4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved. |
|
5 * |
|
6 * Redistribution and use in source and binary forms, with or without |
|
7 * modification, are permitted provided that the following conditions are met: |
|
8 * |
|
9 * 1. Redistributions of source code must retain the above copyright |
|
10 * notice, this list of conditions and the following disclaimer. |
|
11 * |
|
12 * 2. Redistributions in binary form must reproduce the above copyright |
|
13 * notice, this list of conditions and the following disclaimer in the |
|
14 * documentation and/or other materials provided with the distribution. |
|
15 * |
|
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
|
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
26 * POSSIBILITY OF SUCH DAMAGE. |
|
27 */ |
|
28 |
|
29 #include "ucx/string.h" |
|
30 |
|
31 #include "ucx/allocator.h" |
|
32 |
|
33 #include <stdlib.h> |
|
34 #include <string.h> |
|
35 #include <stdarg.h> |
|
36 #include <stdint.h> |
|
37 #include <ctype.h> |
|
38 |
|
39 #ifndef _WIN32 |
|
40 #include <strings.h> /* for strncasecmp() */ |
|
41 #endif /* _WIN32 */ |
|
42 |
|
43 sstr_t sstr(char *cstring) { |
|
44 sstr_t string; |
|
45 string.ptr = cstring; |
|
46 string.length = strlen(cstring); |
|
47 return string; |
|
48 } |
|
49 |
|
50 sstr_t sstrn(char *cstring, size_t length) { |
|
51 sstr_t string; |
|
52 string.ptr = cstring; |
|
53 string.length = length; |
|
54 return string; |
|
55 } |
|
56 |
|
57 scstr_t scstr(const char *cstring) { |
|
58 scstr_t string; |
|
59 string.ptr = cstring; |
|
60 string.length = strlen(cstring); |
|
61 return string; |
|
62 } |
|
63 |
|
64 scstr_t scstrn(const char *cstring, size_t length) { |
|
65 scstr_t string; |
|
66 string.ptr = cstring; |
|
67 string.length = length; |
|
68 return string; |
|
69 } |
|
70 |
|
71 |
|
72 size_t scstrnlen(size_t n, ...) { |
|
73 if (n == 0) return 0; |
|
74 |
|
75 va_list ap; |
|
76 va_start(ap, n); |
|
77 |
|
78 size_t size = 0; |
|
79 |
|
80 for (size_t i = 0 ; i < n ; i++) { |
|
81 scstr_t str = va_arg(ap, scstr_t); |
|
82 if(SIZE_MAX - str.length < size) { |
|
83 size = SIZE_MAX; |
|
84 break; |
|
85 } |
|
86 size += str.length; |
|
87 } |
|
88 va_end(ap); |
|
89 |
|
90 return size; |
|
91 } |
|
92 |
|
93 static sstr_t sstrvcat_a( |
|
94 UcxAllocator *a, |
|
95 size_t count, |
|
96 scstr_t s1, |
|
97 va_list ap) { |
|
98 sstr_t str; |
|
99 str.ptr = NULL; |
|
100 str.length = 0; |
|
101 if(count < 2) { |
|
102 return str; |
|
103 } |
|
104 |
|
105 scstr_t s2 = va_arg (ap, scstr_t); |
|
106 |
|
107 if(((size_t)-1) - s1.length < s2.length) { |
|
108 return str; |
|
109 } |
|
110 |
|
111 scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t)); |
|
112 if(!strings) { |
|
113 return str; |
|
114 } |
|
115 |
|
116 // get all args and overall length |
|
117 strings[0] = s1; |
|
118 strings[1] = s2; |
|
119 size_t slen = s1.length + s2.length; |
|
120 int error = 0; |
|
121 for (size_t i=2;i<count;i++) { |
|
122 scstr_t s = va_arg (ap, scstr_t); |
|
123 strings[i] = s; |
|
124 if(((size_t)-1) - s.length < slen) { |
|
125 error = 1; |
|
126 break; |
|
127 } |
|
128 slen += s.length; |
|
129 } |
|
130 if(error) { |
|
131 free(strings); |
|
132 return str; |
|
133 } |
|
134 |
|
135 // create new string |
|
136 str.ptr = (char*) almalloc(a, slen + 1); |
|
137 str.length = slen; |
|
138 if(!str.ptr) { |
|
139 free(strings); |
|
140 str.length = 0; |
|
141 return str; |
|
142 } |
|
143 |
|
144 // concatenate strings |
|
145 size_t pos = 0; |
|
146 for (size_t i=0;i<count;i++) { |
|
147 scstr_t s = strings[i]; |
|
148 memcpy(str.ptr + pos, s.ptr, s.length); |
|
149 pos += s.length; |
|
150 } |
|
151 |
|
152 str.ptr[str.length] = '\0'; |
|
153 |
|
154 free(strings); |
|
155 |
|
156 return str; |
|
157 } |
|
158 |
|
159 sstr_t scstrcat(size_t count, scstr_t s1, ...) { |
|
160 va_list ap; |
|
161 va_start(ap, s1); |
|
162 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap); |
|
163 va_end(ap); |
|
164 return s; |
|
165 } |
|
166 |
|
167 sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) { |
|
168 va_list ap; |
|
169 va_start(ap, s1); |
|
170 sstr_t s = sstrvcat_a(a, count, s1, ap); |
|
171 va_end(ap); |
|
172 return s; |
|
173 } |
|
174 |
|
175 static int ucx_substring( |
|
176 size_t str_length, |
|
177 size_t start, |
|
178 size_t length, |
|
179 size_t *newlen, |
|
180 size_t *newpos) |
|
181 { |
|
182 *newlen = 0; |
|
183 *newpos = 0; |
|
184 |
|
185 if(start > str_length) { |
|
186 return 0; |
|
187 } |
|
188 |
|
189 if(length > str_length - start) { |
|
190 length = str_length - start; |
|
191 } |
|
192 *newlen = length; |
|
193 *newpos = start; |
|
194 return 1; |
|
195 } |
|
196 |
|
197 sstr_t sstrsubs(sstr_t s, size_t start) { |
|
198 return sstrsubsl (s, start, s.length-start); |
|
199 } |
|
200 |
|
201 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { |
|
202 size_t pos; |
|
203 sstr_t ret = { NULL, 0 }; |
|
204 if(ucx_substring(s.length, start, length, &ret.length, &pos)) { |
|
205 ret.ptr = s.ptr + pos; |
|
206 } |
|
207 return ret; |
|
208 } |
|
209 |
|
210 scstr_t scstrsubs(scstr_t string, size_t start) { |
|
211 return scstrsubsl(string, start, string.length-start); |
|
212 } |
|
213 |
|
214 scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) { |
|
215 size_t pos; |
|
216 scstr_t ret = { NULL, 0 }; |
|
217 if(ucx_substring(s.length, start, length, &ret.length, &pos)) { |
|
218 ret.ptr = s.ptr + pos; |
|
219 } |
|
220 return ret; |
|
221 } |
|
222 |
|
223 |
|
224 static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) { |
|
225 for(size_t i=0;i<length;i++) { |
|
226 if(str[i] == chr) { |
|
227 *pos = i; |
|
228 return 1; |
|
229 } |
|
230 } |
|
231 return 0; |
|
232 } |
|
233 |
|
234 static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) { |
|
235 if(length > 0) { |
|
236 for(size_t i=length ; i>0 ; i--) { |
|
237 if(str[i-1] == chr) { |
|
238 *pos = i-1; |
|
239 return 1; |
|
240 } |
|
241 } |
|
242 } |
|
243 return 0; |
|
244 } |
|
245 |
|
246 sstr_t sstrchr(sstr_t s, int c) { |
|
247 size_t pos = 0; |
|
248 if(ucx_strchr(s.ptr, s.length, c, &pos)) { |
|
249 return sstrsubs(s, pos); |
|
250 } |
|
251 return sstrn(NULL, 0); |
|
252 } |
|
253 |
|
254 sstr_t sstrrchr(sstr_t s, int c) { |
|
255 size_t pos = 0; |
|
256 if(ucx_strrchr(s.ptr, s.length, c, &pos)) { |
|
257 return sstrsubs(s, pos); |
|
258 } |
|
259 return sstrn(NULL, 0); |
|
260 } |
|
261 |
|
262 scstr_t scstrchr(scstr_t s, int c) { |
|
263 size_t pos = 0; |
|
264 if(ucx_strchr(s.ptr, s.length, c, &pos)) { |
|
265 return scstrsubs(s, pos); |
|
266 } |
|
267 return scstrn(NULL, 0); |
|
268 } |
|
269 |
|
270 scstr_t scstrrchr(scstr_t s, int c) { |
|
271 size_t pos = 0; |
|
272 if(ucx_strrchr(s.ptr, s.length, c, &pos)) { |
|
273 return scstrsubs(s, pos); |
|
274 } |
|
275 return scstrn(NULL, 0); |
|
276 } |
|
277 |
|
278 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ |
|
279 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) |
|
280 |
|
281 #define ptable_w(useheap, ptable, index, src) do {\ |
|
282 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ |
|
283 else ((size_t*)ptable)[index] = src;\ |
|
284 } while (0); |
|
285 |
|
286 |
|
287 static const char* ucx_strstr( |
|
288 const char *str, |
|
289 size_t length, |
|
290 const char *match, |
|
291 size_t matchlen, |
|
292 size_t *newlen) |
|
293 { |
|
294 *newlen = length; |
|
295 if (matchlen == 0) { |
|
296 return str; |
|
297 } |
|
298 |
|
299 const char *result = NULL; |
|
300 size_t resultlen = 0; |
|
301 |
|
302 /* |
|
303 * IMPORTANT: |
|
304 * our prefix table contains the prefix length PLUS ONE |
|
305 * this is our decision, because we want to use the full range of size_t |
|
306 * the original algorithm needs a (-1) at one single place |
|
307 * and we want to avoid that |
|
308 */ |
|
309 |
|
310 /* static prefix table */ |
|
311 static uint8_t s_prefix_table[256]; |
|
312 |
|
313 /* check pattern length and use appropriate prefix table */ |
|
314 /* if the pattern exceeds static prefix table, allocate on the heap */ |
|
315 register int useheap = matchlen > 255; |
|
316 register void* ptable = useheap ? |
|
317 calloc(matchlen+1, sizeof(size_t)): s_prefix_table; |
|
318 |
|
319 /* keep counter in registers */ |
|
320 register size_t i, j; |
|
321 |
|
322 /* fill prefix table */ |
|
323 i = 0; j = 0; |
|
324 ptable_w(useheap, ptable, i, j); |
|
325 while (i < matchlen) { |
|
326 while (j >= 1 && match[j-1] != match[i]) { |
|
327 ptable_r(j, useheap, ptable, j-1); |
|
328 } |
|
329 i++; j++; |
|
330 ptable_w(useheap, ptable, i, j); |
|
331 } |
|
332 |
|
333 /* search */ |
|
334 i = 0; j = 1; |
|
335 while (i < length) { |
|
336 while (j >= 1 && str[i] != match[j-1]) { |
|
337 ptable_r(j, useheap, ptable, j-1); |
|
338 } |
|
339 i++; j++; |
|
340 if (j-1 == matchlen) { |
|
341 size_t start = i - matchlen; |
|
342 result = str + start; |
|
343 resultlen = length - start; |
|
344 break; |
|
345 } |
|
346 } |
|
347 |
|
348 /* if prefix table was allocated on the heap, free it */ |
|
349 if (ptable != s_prefix_table) { |
|
350 free(ptable); |
|
351 } |
|
352 |
|
353 *newlen = resultlen; |
|
354 return result; |
|
355 } |
|
356 |
|
357 sstr_t scstrsstr(sstr_t string, scstr_t match) { |
|
358 sstr_t result; |
|
359 |
|
360 size_t reslen; |
|
361 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); |
|
362 if(!resstr) { |
|
363 result.ptr = NULL; |
|
364 result.length = 0; |
|
365 return result; |
|
366 } |
|
367 |
|
368 size_t pos = resstr - string.ptr; |
|
369 result.ptr = string.ptr + pos; |
|
370 result.length = reslen; |
|
371 |
|
372 return result; |
|
373 } |
|
374 |
|
375 scstr_t scstrscstr(scstr_t string, scstr_t match) { |
|
376 scstr_t result; |
|
377 |
|
378 size_t reslen; |
|
379 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); |
|
380 if(!resstr) { |
|
381 result.ptr = NULL; |
|
382 result.length = 0; |
|
383 return result; |
|
384 } |
|
385 |
|
386 size_t pos = resstr - string.ptr; |
|
387 result.ptr = string.ptr + pos; |
|
388 result.length = reslen; |
|
389 |
|
390 return result; |
|
391 } |
|
392 |
|
393 #undef ptable_r |
|
394 #undef ptable_w |
|
395 |
|
396 sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) { |
|
397 return scstrsplit_a(ucx_default_allocator(), s, d, n); |
|
398 } |
|
399 |
|
400 sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) { |
|
401 if (s.length == 0 || d.length == 0) { |
|
402 *n = -1; |
|
403 return NULL; |
|
404 } |
|
405 |
|
406 /* special cases: delimiter is at least as large as the string */ |
|
407 if (d.length >= s.length) { |
|
408 /* exact match */ |
|
409 if (sstrcmp(s, d) == 0) { |
|
410 *n = 0; |
|
411 return NULL; |
|
412 } else /* no match possible */ { |
|
413 *n = 1; |
|
414 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); |
|
415 if(result) { |
|
416 *result = sstrdup_a(allocator, s); |
|
417 } else { |
|
418 *n = -2; |
|
419 } |
|
420 return result; |
|
421 } |
|
422 } |
|
423 |
|
424 ssize_t nmax = *n; |
|
425 size_t arrlen = 16; |
|
426 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t)); |
|
427 |
|
428 if (result) { |
|
429 scstr_t curpos = s; |
|
430 ssize_t j = 1; |
|
431 while (1) { |
|
432 scstr_t match; |
|
433 /* optimize for one byte delimiters */ |
|
434 if (d.length == 1) { |
|
435 match = curpos; |
|
436 for (size_t i = 0 ; i < curpos.length ; i++) { |
|
437 if (curpos.ptr[i] == *(d.ptr)) { |
|
438 match.ptr = curpos.ptr + i; |
|
439 break; |
|
440 } |
|
441 match.length--; |
|
442 } |
|
443 } else { |
|
444 match = scstrscstr(curpos, d); |
|
445 } |
|
446 if (match.length > 0) { |
|
447 /* is this our last try? */ |
|
448 if (nmax == 0 || j < nmax) { |
|
449 /* copy the current string to the array */ |
|
450 scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr); |
|
451 result[j-1] = sstrdup_a(allocator, item); |
|
452 size_t processed = item.length + d.length; |
|
453 curpos.ptr += processed; |
|
454 curpos.length -= processed; |
|
455 |
|
456 /* allocate memory for the next string */ |
|
457 j++; |
|
458 if (j > arrlen) { |
|
459 arrlen *= 2; |
|
460 size_t reallocsz; |
|
461 sstr_t* reallocated = NULL; |
|
462 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) { |
|
463 reallocated = (sstr_t*) alrealloc( |
|
464 allocator, result, reallocsz); |
|
465 } |
|
466 if (reallocated) { |
|
467 result = reallocated; |
|
468 } else { |
|
469 for (ssize_t i = 0 ; i < j-1 ; i++) { |
|
470 alfree(allocator, result[i].ptr); |
|
471 } |
|
472 alfree(allocator, result); |
|
473 *n = -2; |
|
474 return NULL; |
|
475 } |
|
476 } |
|
477 } else { |
|
478 /* nmax reached, copy the _full_ remaining string */ |
|
479 result[j-1] = sstrdup_a(allocator, curpos); |
|
480 break; |
|
481 } |
|
482 } else { |
|
483 /* no more matches, copy last string */ |
|
484 result[j-1] = sstrdup_a(allocator, curpos); |
|
485 break; |
|
486 } |
|
487 } |
|
488 *n = j; |
|
489 } else { |
|
490 *n = -2; |
|
491 } |
|
492 |
|
493 return result; |
|
494 } |
|
495 |
|
496 int scstrcmp(scstr_t s1, scstr_t s2) { |
|
497 if (s1.length == s2.length) { |
|
498 return memcmp(s1.ptr, s2.ptr, s1.length); |
|
499 } else if (s1.length > s2.length) { |
|
500 return 1; |
|
501 } else { |
|
502 return -1; |
|
503 } |
|
504 } |
|
505 |
|
506 int scstrcasecmp(scstr_t s1, scstr_t s2) { |
|
507 if (s1.length == s2.length) { |
|
508 #ifdef _WIN32 |
|
509 return _strnicmp(s1.ptr, s2.ptr, s1.length); |
|
510 #else |
|
511 return strncasecmp(s1.ptr, s2.ptr, s1.length); |
|
512 #endif |
|
513 } else if (s1.length > s2.length) { |
|
514 return 1; |
|
515 } else { |
|
516 return -1; |
|
517 } |
|
518 } |
|
519 |
|
520 sstr_t scstrdup(scstr_t s) { |
|
521 return sstrdup_a(ucx_default_allocator(), s); |
|
522 } |
|
523 |
|
524 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) { |
|
525 sstr_t newstring; |
|
526 newstring.ptr = (char*)almalloc(allocator, s.length + 1); |
|
527 if (newstring.ptr) { |
|
528 newstring.length = s.length; |
|
529 newstring.ptr[newstring.length] = 0; |
|
530 |
|
531 memcpy(newstring.ptr, s.ptr, s.length); |
|
532 } else { |
|
533 newstring.length = 0; |
|
534 } |
|
535 |
|
536 return newstring; |
|
537 } |
|
538 |
|
539 |
|
540 static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) { |
|
541 const char *newptr = s; |
|
542 size_t length = len; |
|
543 |
|
544 while(length > 0 && isspace(*newptr)) { |
|
545 newptr++; |
|
546 length--; |
|
547 } |
|
548 while(length > 0 && isspace(newptr[length-1])) { |
|
549 length--; |
|
550 } |
|
551 |
|
552 *newlen = length; |
|
553 return newptr - s; |
|
554 } |
|
555 |
|
556 sstr_t sstrtrim(sstr_t string) { |
|
557 sstr_t newstr; |
|
558 newstr.ptr = string.ptr |
|
559 + ucx_strtrim(string.ptr, string.length, &newstr.length); |
|
560 return newstr; |
|
561 } |
|
562 |
|
563 scstr_t scstrtrim(scstr_t string) { |
|
564 scstr_t newstr; |
|
565 newstr.ptr = string.ptr |
|
566 + ucx_strtrim(string.ptr, string.length, &newstr.length); |
|
567 return newstr; |
|
568 } |
|
569 |
|
570 int scstrprefix(scstr_t string, scstr_t prefix) { |
|
571 if (string.length == 0) { |
|
572 return prefix.length == 0; |
|
573 } |
|
574 if (prefix.length == 0) { |
|
575 return 1; |
|
576 } |
|
577 |
|
578 if (prefix.length > string.length) { |
|
579 return 0; |
|
580 } else { |
|
581 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; |
|
582 } |
|
583 } |
|
584 |
|
585 int scstrsuffix(scstr_t string, scstr_t suffix) { |
|
586 if (string.length == 0) { |
|
587 return suffix.length == 0; |
|
588 } |
|
589 if (suffix.length == 0) { |
|
590 return 1; |
|
591 } |
|
592 |
|
593 if (suffix.length > string.length) { |
|
594 return 0; |
|
595 } else { |
|
596 return memcmp(string.ptr+string.length-suffix.length, |
|
597 suffix.ptr, suffix.length) == 0; |
|
598 } |
|
599 } |
|
600 |
|
601 int scstrcaseprefix(scstr_t string, scstr_t prefix) { |
|
602 if (string.length == 0) { |
|
603 return prefix.length == 0; |
|
604 } |
|
605 if (prefix.length == 0) { |
|
606 return 1; |
|
607 } |
|
608 |
|
609 if (prefix.length > string.length) { |
|
610 return 0; |
|
611 } else { |
|
612 scstr_t subs = scstrsubsl(string, 0, prefix.length); |
|
613 return scstrcasecmp(subs, prefix) == 0; |
|
614 } |
|
615 } |
|
616 |
|
617 int scstrcasesuffix(scstr_t string, scstr_t suffix) { |
|
618 if (string.length == 0) { |
|
619 return suffix.length == 0; |
|
620 } |
|
621 if (suffix.length == 0) { |
|
622 return 1; |
|
623 } |
|
624 |
|
625 if (suffix.length > string.length) { |
|
626 return 0; |
|
627 } else { |
|
628 scstr_t subs = scstrsubs(string, string.length-suffix.length); |
|
629 return scstrcasecmp(subs, suffix) == 0; |
|
630 } |
|
631 } |
|
632 |
|
633 sstr_t scstrlower(scstr_t string) { |
|
634 sstr_t ret = sstrdup(string); |
|
635 for (size_t i = 0; i < ret.length ; i++) { |
|
636 ret.ptr[i] = tolower(ret.ptr[i]); |
|
637 } |
|
638 return ret; |
|
639 } |
|
640 |
|
641 sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) { |
|
642 sstr_t ret = sstrdup_a(allocator, string); |
|
643 for (size_t i = 0; i < ret.length ; i++) { |
|
644 ret.ptr[i] = tolower(ret.ptr[i]); |
|
645 } |
|
646 return ret; |
|
647 } |
|
648 |
|
649 sstr_t scstrupper(scstr_t string) { |
|
650 sstr_t ret = sstrdup(string); |
|
651 for (size_t i = 0; i < ret.length ; i++) { |
|
652 ret.ptr[i] = toupper(ret.ptr[i]); |
|
653 } |
|
654 return ret; |
|
655 } |
|
656 |
|
657 sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) { |
|
658 sstr_t ret = sstrdup_a(allocator, string); |
|
659 for (size_t i = 0; i < ret.length ; i++) { |
|
660 ret.ptr[i] = toupper(ret.ptr[i]); |
|
661 } |
|
662 return ret; |
|
663 } |
|
664 |
|
665 #define REPLACE_INDEX_BUFFER_MAX 100 |
|
666 |
|
667 struct scstrreplace_ibuf { |
|
668 size_t* buf; |
|
669 unsigned int len; /* small indices */ |
|
670 struct scstrreplace_ibuf* next; |
|
671 }; |
|
672 |
|
673 static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) { |
|
674 while (buf) { |
|
675 struct scstrreplace_ibuf *next = buf->next; |
|
676 free(buf->buf); |
|
677 free(buf); |
|
678 buf = next; |
|
679 } |
|
680 } |
|
681 |
|
682 sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str, |
|
683 scstr_t pattern, scstr_t replacement, size_t replmax) { |
|
684 |
|
685 if (pattern.length == 0 || pattern.length > str.length || replmax == 0) |
|
686 return sstrdup(str); |
|
687 |
|
688 /* Compute expected buffer length */ |
|
689 size_t ibufmax = str.length / pattern.length; |
|
690 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; |
|
691 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { |
|
692 ibuflen = REPLACE_INDEX_BUFFER_MAX; |
|
693 } |
|
694 |
|
695 /* Allocate first index buffer */ |
|
696 struct scstrreplace_ibuf *firstbuf, *curbuf; |
|
697 firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf)); |
|
698 if (!firstbuf) return sstrn(NULL, 0); |
|
699 firstbuf->buf = calloc(ibuflen, sizeof(size_t)); |
|
700 if (!firstbuf->buf) { |
|
701 free(firstbuf); |
|
702 return sstrn(NULL, 0); |
|
703 } |
|
704 |
|
705 /* Search occurrences */ |
|
706 scstr_t searchstr = str; |
|
707 size_t found = 0; |
|
708 do { |
|
709 scstr_t match = scstrscstr(searchstr, pattern); |
|
710 if (match.length > 0) { |
|
711 /* Allocate next buffer in chain, if required */ |
|
712 if (curbuf->len == ibuflen) { |
|
713 struct scstrreplace_ibuf *nextbuf = |
|
714 calloc(1, sizeof(struct scstrreplace_ibuf)); |
|
715 if (!nextbuf) { |
|
716 scstrrepl_free_ibuf(firstbuf); |
|
717 return sstrn(NULL, 0); |
|
718 } |
|
719 nextbuf->buf = calloc(ibuflen, sizeof(size_t)); |
|
720 if (!nextbuf->buf) { |
|
721 free(nextbuf); |
|
722 scstrrepl_free_ibuf(firstbuf); |
|
723 return sstrn(NULL, 0); |
|
724 } |
|
725 curbuf->next = nextbuf; |
|
726 curbuf = nextbuf; |
|
727 } |
|
728 |
|
729 /* Record match index */ |
|
730 found++; |
|
731 size_t idx = match.ptr - str.ptr; |
|
732 curbuf->buf[curbuf->len++] = idx; |
|
733 searchstr.ptr = match.ptr + pattern.length; |
|
734 searchstr.length = str.length - idx - pattern.length; |
|
735 } else { |
|
736 break; |
|
737 } |
|
738 } while (searchstr.length > 0 && found < replmax); |
|
739 |
|
740 /* Allocate result string */ |
|
741 sstr_t result; |
|
742 { |
|
743 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; |
|
744 size_t rcount = 0; |
|
745 curbuf = firstbuf; |
|
746 do { |
|
747 rcount += curbuf->len; |
|
748 curbuf = curbuf->next; |
|
749 } while (curbuf); |
|
750 result.length = str.length + rcount * adjlen; |
|
751 result.ptr = almalloc(allocator, result.length); |
|
752 if (!result.ptr) { |
|
753 scstrrepl_free_ibuf(firstbuf); |
|
754 return sstrn(NULL, 0); |
|
755 } |
|
756 } |
|
757 |
|
758 /* Build result string */ |
|
759 curbuf = firstbuf; |
|
760 size_t srcidx = 0; |
|
761 char* destptr = result.ptr; |
|
762 do { |
|
763 for (size_t i = 0; i < curbuf->len; i++) { |
|
764 /* Copy source part up to next match*/ |
|
765 size_t idx = curbuf->buf[i]; |
|
766 size_t srclen = idx - srcidx; |
|
767 if (srclen > 0) { |
|
768 memcpy(destptr, str.ptr+srcidx, srclen); |
|
769 destptr += srclen; |
|
770 srcidx += srclen; |
|
771 } |
|
772 |
|
773 /* Copy the replacement and skip the source pattern */ |
|
774 srcidx += pattern.length; |
|
775 memcpy(destptr, replacement.ptr, replacement.length); |
|
776 destptr += replacement.length; |
|
777 } |
|
778 curbuf = curbuf->next; |
|
779 } while (curbuf); |
|
780 memcpy(destptr, str.ptr+srcidx, str.length-srcidx); |
|
781 |
|
782 /* Free index buffer */ |
|
783 scstrrepl_free_ibuf(firstbuf); |
|
784 |
|
785 return result; |
|
786 } |
|
787 |
|
788 sstr_t scstrreplacen(scstr_t str, scstr_t pattern, |
|
789 scstr_t replacement, size_t replmax) { |
|
790 return scstrreplacen_a(ucx_default_allocator(), |
|
791 str, pattern, replacement, replmax); |
|
792 } |
|
793 |
|
794 |
|
795 // type adjustment functions |
|
796 scstr_t ucx_sc2sc(scstr_t str) { |
|
797 return str; |
|
798 } |
|
799 scstr_t ucx_ss2sc(sstr_t str) { |
|
800 scstr_t cs; |
|
801 cs.ptr = str.ptr; |
|
802 cs.length = str.length; |
|
803 return cs; |
|
804 } |
|
805 scstr_t ucx_ss2c_s(scstr_t c) { |
|
806 return c; |
|
807 } |