| 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 * POSSIBILITY OF SUCH DAMAGE. |
26 * POSSIBILITY OF SUCH DAMAGE. |
| 27 */ |
27 */ |
| |
28 #ifdef MEMRCHR_NEED_GNU |
| |
29 #define _GNU_SOURCE |
| |
30 #endif |
| |
31 |
| 28 #include "cx/string.h" |
32 #include "cx/string.h" |
| 29 |
33 |
| 30 #include <string.h> |
34 #include <string.h> |
| 31 #include <stdarg.h> |
35 #include <stdarg.h> |
| 32 #include <assert.h> |
36 #include <assert.h> |
| 33 #include <errno.h> |
37 #include <errno.h> |
| 34 #include <limits.h> |
38 #include <limits.h> |
| 35 #include <float.h> |
39 #include <float.h> |
| |
40 #include <ctype.h> |
| 36 |
41 |
| 37 #ifdef _WIN32 |
42 #ifdef _WIN32 |
| 38 #define cx_strcasecmp_impl _strnicmp |
43 #define cx_strcasecmp_impl _strnicmp |
| 39 #else |
44 #else |
| 40 #include <strings.h> |
45 #include <strings.h> |
| 41 #define cx_strcasecmp_impl strncasecmp |
46 #define cx_strcasecmp_impl strncasecmp |
| 42 #endif |
47 #endif |
| 43 |
48 |
| 44 cxmutstr cx_mutstr(char *cstring) { |
49 cxmutstr cx_mutstr(char *cstring) { |
| 45 return (cxmutstr) {cstring, strlen(cstring)}; |
50 return (cxmutstr) {cstring, cstring == NULL ? 0 : strlen(cstring)}; |
| 46 } |
51 } |
| 47 |
52 |
| 48 cxmutstr cx_mutstrn( |
53 cxmutstr cx_mutstrn( |
| 49 char *cstring, |
54 char *cstring, |
| 50 size_t length |
55 size_t length |
| 51 ) { |
56 ) { |
| 52 return (cxmutstr) {cstring, length}; |
57 return (cxmutstr) {cstring, length}; |
| 53 } |
58 } |
| 54 |
59 |
| 55 cxstring cx_str(const char *cstring) { |
60 cxstring cx_str(const char *cstring) { |
| 56 return (cxstring) {cstring, strlen(cstring)}; |
61 return (cxstring) {cstring, cstring == NULL ? 0 : strlen(cstring)}; |
| 57 } |
62 } |
| 58 |
63 |
| 59 cxstring cx_strn( |
64 cxstring cx_strn( |
| 60 const char *cstring, |
65 const char *cstring, |
| 61 size_t length |
66 size_t length |
| 104 cxmutstr str, |
125 cxmutstr str, |
| 105 size_t count, |
126 size_t count, |
| 106 ... |
127 ... |
| 107 ) { |
128 ) { |
| 108 if (count == 0) return str; |
129 if (count == 0) return str; |
| 109 |
|
| 110 cxstring strings_stack[8]; |
|
| 111 cxstring *strings; |
|
| 112 if (count > 8) { |
|
| 113 strings = calloc(count, sizeof(cxstring)); |
|
| 114 if (strings == NULL) { |
|
| 115 return (cxmutstr) {NULL, 0}; |
|
| 116 } |
|
| 117 } else { |
|
| 118 strings = strings_stack; |
|
| 119 } |
|
| 120 |
|
| 121 va_list ap; |
130 va_list ap; |
| 122 va_start(ap, count); |
131 va_start(ap, count); |
| 123 |
132 va_list ap2; |
| 124 // get all args and overall length |
133 va_copy(ap2, ap); |
| |
134 |
| |
135 // compute overall length |
| 125 bool overflow = false; |
136 bool overflow = false; |
| 126 size_t slen = str.length; |
137 size_t slen = str.length; |
| 127 for (size_t i = 0; i < count; i++) { |
138 for (size_t i = 0; i < count; i++) { |
| 128 cxstring s = va_arg (ap, cxstring); |
139 cxstring s = va_arg(ap, cxstring); |
| 129 strings[i] = s; |
|
| 130 if (slen > SIZE_MAX - str.length) overflow = true; |
140 if (slen > SIZE_MAX - str.length) overflow = true; |
| 131 slen += s.length; |
141 slen += s.length; |
| 132 } |
142 } |
| 133 va_end(ap); |
143 va_end(ap); |
| 134 |
144 |
| 135 // abort in case of overflow |
145 // abort in case of overflow |
| 136 if (overflow) { |
146 if (overflow) { |
| |
147 va_end(ap2); |
| 137 errno = EOVERFLOW; |
148 errno = EOVERFLOW; |
| 138 if (strings != strings_stack) { |
|
| 139 free(strings); |
|
| 140 } |
|
| 141 return (cxmutstr) { NULL, 0 }; |
149 return (cxmutstr) { NULL, 0 }; |
| 142 } |
150 } |
| 143 |
151 |
| 144 // reallocate or create new string |
152 // reallocate or create new string |
| 145 char *newstr; |
153 char *newstr; |
| 147 newstr = cxMalloc(alloc, slen + 1); |
155 newstr = cxMalloc(alloc, slen + 1); |
| 148 } else { |
156 } else { |
| 149 newstr = cxRealloc(alloc, str.ptr, slen + 1); |
157 newstr = cxRealloc(alloc, str.ptr, slen + 1); |
| 150 } |
158 } |
| 151 if (newstr == NULL) { |
159 if (newstr == NULL) { |
| 152 if (strings != strings_stack) { |
160 va_end(ap2); |
| 153 free(strings); |
|
| 154 } |
|
| 155 return (cxmutstr) {NULL, 0}; |
161 return (cxmutstr) {NULL, 0}; |
| 156 } |
162 } |
| 157 str.ptr = newstr; |
163 str.ptr = newstr; |
| 158 |
164 |
| 159 // concatenate strings |
165 // concatenate strings |
| 160 size_t pos = str.length; |
166 size_t pos = str.length; |
| 161 str.length = slen; |
167 str.length = slen; |
| 162 for (size_t i = 0; i < count; i++) { |
168 for (size_t i = 0; i < count; i++) { |
| 163 cxstring s = strings[i]; |
169 cxstring s = va_arg(ap2, cxstring); |
| 164 memcpy(str.ptr + pos, s.ptr, s.length); |
170 memcpy(str.ptr + pos, s.ptr, s.length); |
| 165 pos += s.length; |
171 pos += s.length; |
| 166 } |
172 } |
| |
173 va_end(ap2); |
| 167 |
174 |
| 168 // terminate string |
175 // terminate string |
| 169 str.ptr[str.length] = '\0'; |
176 str.ptr[str.length] = '\0'; |
| 170 |
|
| 171 // free temporary array |
|
| 172 if (strings != strings_stack) { |
|
| 173 free(strings); |
|
| 174 } |
|
| 175 |
177 |
| 176 return str; |
178 return str; |
| 177 } |
179 } |
| 178 |
180 |
| 179 cxstring cx_strsubs( |
181 cxstring cx_strsubs( |
| 520 memcpy(result.ptr, string.ptr, string.length); |
528 memcpy(result.ptr, string.ptr, string.length); |
| 521 result.ptr[string.length] = '\0'; |
529 result.ptr[string.length] = '\0'; |
| 522 return result; |
530 return result; |
| 523 } |
531 } |
| 524 |
532 |
| 525 static bool str_isspace(char c) { |
|
| 526 // TODO: remove once UCX has public API for this |
|
| 527 return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v' || c == '\f'; |
|
| 528 } |
|
| 529 |
|
| 530 cxstring cx_strtrim(cxstring string) { |
533 cxstring cx_strtrim(cxstring string) { |
| 531 cxstring result = string; |
534 cxstring result = string; |
| 532 // TODO: optimize by comparing multiple bytes at once |
535 while (result.length > 0 && isspace((unsigned char)(result.ptr[0]))) { |
| 533 while (result.length > 0 && str_isspace(*result.ptr)) { |
|
| 534 result.ptr++; |
536 result.ptr++; |
| 535 result.length--; |
537 result.length--; |
| 536 } |
538 } |
| 537 while (result.length > 0 && str_isspace(result.ptr[result.length - 1])) { |
539 while (result.length > 0 && isspace((unsigned char)result.ptr[result.length - 1])) { |
| 538 result.length--; |
540 result.length--; |
| 539 } |
541 } |
| 540 return result; |
542 return result; |
| 541 } |
543 } |
| 542 |
544 |
| 543 cxmutstr cx_strtrim_m(cxmutstr string) { |
545 cxmutstr cx_strtrim_m(cxmutstr string) { |
| 544 cxstring result = cx_strtrim(cx_strcast(string)); |
546 cxstring result = cx_strtrim(cx_strcast(string)); |
| 545 return (cxmutstr) {(char *) result.ptr, result.length}; |
547 return (cxmutstr) {(char *) result.ptr, result.length}; |
| 546 } |
548 } |
| 547 |
549 |
| 548 bool cx_strprefix( |
550 bool cx_strprefix_( |
| 549 cxstring string, |
551 cxstring string, |
| 550 cxstring prefix |
552 cxstring prefix |
| 551 ) { |
553 ) { |
| 552 if (string.length < prefix.length) return false; |
554 if (string.length < prefix.length) return false; |
| 553 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; |
555 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; |
| 554 } |
556 } |
| 555 |
557 |
| 556 bool cx_strsuffix( |
558 bool cx_strsuffix_( |
| 557 cxstring string, |
559 cxstring string, |
| 558 cxstring suffix |
560 cxstring suffix |
| 559 ) { |
561 ) { |
| 560 if (string.length < suffix.length) return false; |
562 if (string.length < suffix.length) return false; |
| 561 return memcmp(string.ptr + string.length - suffix.length, |
563 return memcmp(string.ptr + string.length - suffix.length, |
| 562 suffix.ptr, suffix.length) == 0; |
564 suffix.ptr, suffix.length) == 0; |
| 563 } |
565 } |
| 564 |
566 |
| 565 bool cx_strcaseprefix( |
567 bool cx_strcaseprefix_( |
| 566 cxstring string, |
568 cxstring string, |
| 567 cxstring prefix |
569 cxstring prefix |
| 568 ) { |
570 ) { |
| 569 if (string.length < prefix.length) return false; |
571 if (string.length < prefix.length) return false; |
| 570 #ifdef _WIN32 |
572 #ifdef _WIN32 |
| 586 return strncasecmp(string.ptr + string.length - suffix.length, |
588 return strncasecmp(string.ptr + string.length - suffix.length, |
| 587 suffix.ptr, suffix.length) == 0; |
589 suffix.ptr, suffix.length) == 0; |
| 588 #endif |
590 #endif |
| 589 } |
591 } |
| 590 |
592 |
| 591 #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE |
|
| 592 #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64 |
|
| 593 #endif |
|
| 594 |
|
| 595 struct cx_strreplace_ibuf { |
|
| 596 size_t *buf; |
|
| 597 struct cx_strreplace_ibuf *next; |
|
| 598 unsigned int len; |
|
| 599 }; |
|
| 600 |
|
| 601 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { |
|
| 602 // remember, the first data is on the stack! |
|
| 603 buf = buf->next; |
|
| 604 while (buf) { |
|
| 605 struct cx_strreplace_ibuf *next = buf->next; |
|
| 606 free(buf->buf); |
|
| 607 free(buf); |
|
| 608 buf = next; |
|
| 609 } |
|
| 610 } |
|
| 611 |
|
| 612 cxmutstr cx_strreplacen_a( |
593 cxmutstr cx_strreplacen_a( |
| 613 const CxAllocator *allocator, |
594 const CxAllocator *allocator, |
| 614 cxstring str, |
595 cxstring str, |
| 615 cxstring search, |
596 cxstring search, |
| 616 cxstring replacement, |
597 cxstring replacement, |
| 617 size_t replmax |
598 size_t replmax |
| 618 ) { |
599 ) { |
| 619 |
600 // special cases |
| 620 if (search.length == 0 || search.length > str.length || replmax == 0) |
601 if (search.length == 0 || search.length > str.length || replmax == 0) { |
| 621 return cx_strdup_a(allocator, str); |
602 return cx_strdup_a(allocator, str); |
| 622 |
603 } |
| 623 // Compute expected buffer length |
604 |
| 624 size_t ibufmax = str.length / search.length; |
605 size_t in_len = str.length; |
| 625 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; |
606 size_t search_len = search.length; |
| 626 if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) { |
607 size_t repl_len = replacement.length; |
| 627 ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE; |
608 |
| 628 } |
609 // first run, count the occurrences |
| 629 |
610 // and remember where the first is |
| 630 // First index buffer can be on the stack |
611 size_t occurrences = 1; |
| 631 struct cx_strreplace_ibuf ibuf, *curbuf = &ibuf; |
612 cxstring first = cx_strstr(str, search); |
| 632 size_t ibuf_sbo[CX_STRREPLACE_INDEX_BUFFER_SIZE]; |
613 if (first.length == 0) { |
| 633 ibuf.buf = ibuf_sbo; |
614 // special case, no replacements |
| 634 ibuf.next = NULL; |
615 return cx_strdup_a(allocator, str); |
| 635 ibuf.len = 0; |
616 } |
| 636 |
617 cxstring tmp = cx_strsubs(first, search_len); |
| 637 // Search occurrences |
618 while (occurrences < replmax && |
| 638 cxstring searchstr = str; |
619 (tmp = cx_strstr(tmp, search)).length > 0) { |
| 639 size_t found = 0; |
620 occurrences++; |
| 640 do { |
621 tmp = cx_strsubs(tmp, search_len); |
| 641 cxstring match = cx_strstr(searchstr, search); |
622 } |
| 642 if (match.length > 0) { |
623 |
| 643 // Allocate next buffer in chain, if required |
624 // calculate necessary memory |
| 644 if (curbuf->len == ibuflen) { |
625 signed long long diff_len = (signed long long) repl_len - search_len; |
| 645 struct cx_strreplace_ibuf *nextbuf = |
626 size_t out_len = in_len + diff_len * occurrences; |
| 646 calloc(1, sizeof(struct cx_strreplace_ibuf)); |
627 cxmutstr out = { |
| 647 if (!nextbuf) { |
628 cxMalloc(allocator, out_len + 1), |
| 648 cx_strrepl_free_ibuf(&ibuf); |
629 out_len |
| 649 return cx_mutstrn(NULL, 0); |
630 }; |
| 650 } |
631 if (out.ptr == NULL) return out; |
| 651 nextbuf->buf = calloc(ibuflen, sizeof(size_t)); |
632 |
| 652 if (!nextbuf->buf) { |
633 // second run: perform the replacements |
| 653 free(nextbuf); |
634 // but start where we found the first occurrence |
| 654 cx_strrepl_free_ibuf(&ibuf); |
635 const char *inp = str.ptr; |
| 655 return cx_mutstrn(NULL, 0); |
636 tmp = first; |
| 656 } |
637 char *outp = out.ptr; |
| 657 curbuf->next = nextbuf; |
638 while (occurrences-- > 0 && (tmp = cx_strstr(tmp, search)).length > 0) { |
| 658 curbuf = nextbuf; |
639 size_t copylen = tmp.ptr - inp; |
| 659 } |
640 memcpy(outp, inp, copylen); |
| 660 |
641 outp += copylen; |
| 661 // Record match index |
642 memcpy(outp, replacement.ptr, repl_len); |
| 662 found++; |
643 outp += repl_len; |
| 663 size_t idx = match.ptr - str.ptr; |
644 inp += copylen + search_len; |
| 664 curbuf->buf[curbuf->len++] = idx; |
645 tmp = cx_strsubs(tmp, search_len); |
| 665 searchstr.ptr = match.ptr + search.length; |
646 } |
| 666 searchstr.length = str.length - idx - search.length; |
647 |
| 667 } else { |
648 // add the remaining string |
| 668 break; |
649 size_t copylen = in_len - (inp - str.ptr); |
| 669 } |
650 memcpy(outp, inp, copylen); |
| 670 } while (searchstr.length > 0 && found < replmax); |
651 out.ptr[out_len] = '\0'; |
| 671 |
652 |
| 672 // Allocate result string |
653 return out; |
| 673 cxmutstr result; |
|
| 674 { |
|
| 675 long long adjlen = (long long) replacement.length - (long long) search.length; |
|
| 676 size_t rcount = 0; |
|
| 677 curbuf = &ibuf; |
|
| 678 do { |
|
| 679 rcount += curbuf->len; |
|
| 680 curbuf = curbuf->next; |
|
| 681 } while (curbuf); |
|
| 682 result.length = str.length + rcount * adjlen; |
|
| 683 result.ptr = cxMalloc(allocator, result.length + 1); |
|
| 684 if (!result.ptr) { |
|
| 685 cx_strrepl_free_ibuf(&ibuf); |
|
| 686 return cx_mutstrn(NULL, 0); |
|
| 687 } |
|
| 688 } |
|
| 689 |
|
| 690 // Build result string |
|
| 691 curbuf = &ibuf; |
|
| 692 size_t srcidx = 0; |
|
| 693 char *destptr = result.ptr; |
|
| 694 do { |
|
| 695 for (size_t i = 0; i < curbuf->len; i++) { |
|
| 696 // Copy source part up to next match |
|
| 697 size_t idx = curbuf->buf[i]; |
|
| 698 size_t srclen = idx - srcidx; |
|
| 699 if (srclen > 0) { |
|
| 700 memcpy(destptr, str.ptr + srcidx, srclen); |
|
| 701 destptr += srclen; |
|
| 702 srcidx += srclen; |
|
| 703 } |
|
| 704 |
|
| 705 // Copy the replacement and skip the source pattern |
|
| 706 srcidx += search.length; |
|
| 707 memcpy(destptr, replacement.ptr, replacement.length); |
|
| 708 destptr += replacement.length; |
|
| 709 } |
|
| 710 curbuf = curbuf->next; |
|
| 711 } while (curbuf); |
|
| 712 memcpy(destptr, str.ptr + srcidx, str.length - srcidx); |
|
| 713 |
|
| 714 // Result is guaranteed to be zero-terminated |
|
| 715 result.ptr[result.length] = '\0'; |
|
| 716 |
|
| 717 // Free index buffer |
|
| 718 cx_strrepl_free_ibuf(&ibuf); |
|
| 719 |
|
| 720 return result; |
|
| 721 } |
654 } |
| 722 |
655 |
| 723 CxStrtokCtx cx_strtok_( |
656 CxStrtokCtx cx_strtok_( |
| 724 cxstring str, |
657 cxstring str, |
| 725 cxstring delim, |
658 cxstring delim, |