ucx/string.c

branch
dav-2
changeset 886
da79af4baec8
parent 854
1c8401ece69e
child 889
42cdbf9bbd49
equal deleted inserted replaced
885:591377a27fa3 886:da79af4baec8
40 #include <strings.h> 40 #include <strings.h>
41 #define cx_strcasecmp_impl strncasecmp 41 #define cx_strcasecmp_impl strncasecmp
42 #endif 42 #endif
43 43
44 cxmutstr cx_mutstr(char *cstring) { 44 cxmutstr cx_mutstr(char *cstring) {
45 return (cxmutstr) {cstring, strlen(cstring)}; 45 return (cxmutstr) {cstring, cstring == NULL ? 0 : strlen(cstring)};
46 } 46 }
47 47
48 cxmutstr cx_mutstrn( 48 cxmutstr cx_mutstrn(
49 char *cstring, 49 char *cstring,
50 size_t length 50 size_t length
51 ) { 51 ) {
52 return (cxmutstr) {cstring, length}; 52 return (cxmutstr) {cstring, length};
53 } 53 }
54 54
55 cxstring cx_str(const char *cstring) { 55 cxstring cx_str(const char *cstring) {
56 return (cxstring) {cstring, strlen(cstring)}; 56 return (cxstring) {cstring, cstring == NULL ? 0 : strlen(cstring)};
57 } 57 }
58 58
59 cxstring cx_strn( 59 cxstring cx_strn(
60 const char *cstring, 60 const char *cstring,
61 size_t length 61 size_t length
63 return (cxstring) {cstring, length}; 63 return (cxstring) {cstring, length};
64 } 64 }
65 65
66 void cx_strfree(cxmutstr *str) { 66 void cx_strfree(cxmutstr *str) {
67 if (str == NULL) return; 67 if (str == NULL) return;
68 free(str->ptr); 68 cxFreeDefault(str->ptr);
69 str->ptr = NULL; 69 str->ptr = NULL;
70 str->length = 0; 70 str->length = 0;
71 } 71 }
72 72
73 void cx_strfree_a( 73 void cx_strfree_a(
76 ) { 76 ) {
77 if (str == NULL) return; 77 if (str == NULL) return;
78 cxFree(alloc, str->ptr); 78 cxFree(alloc, str->ptr);
79 str->ptr = NULL; 79 str->ptr = NULL;
80 str->length = 0; 80 str->length = 0;
81 }
82
83 int cx_strcpy_a(
84 const CxAllocator *alloc,
85 cxmutstr *dest,
86 cxstring src
87 ) {
88 if (cxReallocate(alloc, &dest->ptr, src.length + 1)) {
89 return 1;
90 }
91
92 memcpy(dest->ptr, src.ptr, src.length);
93 dest->length = src.length;
94 dest->ptr[dest->length] = '\0';
95
96 return 0;
81 } 97 }
82 98
83 size_t cx_strlen( 99 size_t cx_strlen(
84 size_t count, 100 size_t count,
85 ... 101 ...
104 cxmutstr str, 120 cxmutstr str,
105 size_t count, 121 size_t count,
106 ... 122 ...
107 ) { 123 ) {
108 if (count == 0) return str; 124 if (count == 0) return str;
109
110 cxstring strings_stack[8];
111 cxstring *strings;
112 if (count > 8) {
113 strings = calloc(count, sizeof(cxstring));
114 if (strings == NULL) {
115 return (cxmutstr) {NULL, 0};
116 }
117 } else {
118 strings = strings_stack;
119 }
120
121 va_list ap; 125 va_list ap;
122 va_start(ap, count); 126 va_start(ap, count);
123 127 va_list ap2;
124 // get all args and overall length 128 va_copy(ap2, ap);
129
130 // compute overall length
125 bool overflow = false; 131 bool overflow = false;
126 size_t slen = str.length; 132 size_t slen = str.length;
127 for (size_t i = 0; i < count; i++) { 133 for (size_t i = 0; i < count; i++) {
128 cxstring s = va_arg (ap, cxstring); 134 cxstring s = va_arg(ap, cxstring);
129 strings[i] = s;
130 if (slen > SIZE_MAX - str.length) overflow = true; 135 if (slen > SIZE_MAX - str.length) overflow = true;
131 slen += s.length; 136 slen += s.length;
132 } 137 }
133 va_end(ap); 138 va_end(ap);
134 139
135 // abort in case of overflow 140 // abort in case of overflow
136 if (overflow) { 141 if (overflow) {
142 va_end(ap2);
137 errno = EOVERFLOW; 143 errno = EOVERFLOW;
138 if (strings != strings_stack) {
139 free(strings);
140 }
141 return (cxmutstr) { NULL, 0 }; 144 return (cxmutstr) { NULL, 0 };
142 } 145 }
143 146
144 // reallocate or create new string 147 // reallocate or create new string
145 char *newstr; 148 char *newstr;
147 newstr = cxMalloc(alloc, slen + 1); 150 newstr = cxMalloc(alloc, slen + 1);
148 } else { 151 } else {
149 newstr = cxRealloc(alloc, str.ptr, slen + 1); 152 newstr = cxRealloc(alloc, str.ptr, slen + 1);
150 } 153 }
151 if (newstr == NULL) { 154 if (newstr == NULL) {
152 if (strings != strings_stack) { 155 va_end(ap2);
153 free(strings);
154 }
155 return (cxmutstr) {NULL, 0}; 156 return (cxmutstr) {NULL, 0};
156 } 157 }
157 str.ptr = newstr; 158 str.ptr = newstr;
158 159
159 // concatenate strings 160 // concatenate strings
160 size_t pos = str.length; 161 size_t pos = str.length;
161 str.length = slen; 162 str.length = slen;
162 for (size_t i = 0; i < count; i++) { 163 for (size_t i = 0; i < count; i++) {
163 cxstring s = strings[i]; 164 cxstring s = va_arg(ap2, cxstring);
164 memcpy(str.ptr + pos, s.ptr, s.length); 165 memcpy(str.ptr + pos, s.ptr, s.length);
165 pos += s.length; 166 pos += s.length;
166 } 167 }
168 va_end(ap2);
167 169
168 // terminate string 170 // terminate string
169 str.ptr[str.length] = '\0'; 171 str.ptr[str.length] = '\0';
170
171 // free temporary array
172 if (strings != strings_stack) {
173 free(strings);
174 }
175 172
176 return str; 173 return str;
177 } 174 }
178 175
179 cxstring cx_strsubs( 176 cxstring cx_strsubs(
287 size_t s_prefix_table[CX_STRSTR_SBO_SIZE]; 284 size_t s_prefix_table[CX_STRSTR_SBO_SIZE];
288 285
289 // check needle length and use appropriate prefix table 286 // check needle length and use appropriate prefix table
290 // if the pattern exceeds static prefix table, allocate on the heap 287 // if the pattern exceeds static prefix table, allocate on the heap
291 const bool useheap = needle.length >= CX_STRSTR_SBO_SIZE; 288 const bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
292 register size_t *ptable = useheap ? calloc(needle.length + 1, 289 register size_t *ptable = useheap
293 sizeof(size_t)) : s_prefix_table; 290 ? cxCallocDefault(needle.length + 1, sizeof(size_t))
291 : s_prefix_table;
294 292
295 // keep counter in registers 293 // keep counter in registers
296 register size_t i, j; 294 register size_t i, j;
297 295
298 // fill prefix table 296 // fill prefix table
326 } 324 }
327 } 325 }
328 326
329 // if prefix table was allocated on the heap, free it 327 // if prefix table was allocated on the heap, free it
330 if (useheap) { 328 if (useheap) {
331 free(ptable); 329 cxFreeDefault(ptable);
332 } 330 }
333 331
334 return result; 332 return result;
335 } 333 }
336 334
586 return strncasecmp(string.ptr + string.length - suffix.length, 584 return strncasecmp(string.ptr + string.length - suffix.length,
587 suffix.ptr, suffix.length) == 0; 585 suffix.ptr, suffix.length) == 0;
588 #endif 586 #endif
589 } 587 }
590 588
591 #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
592 #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
593 #endif
594
595 struct cx_strreplace_ibuf {
596 size_t *buf;
597 struct cx_strreplace_ibuf *next;
598 unsigned int len;
599 };
600
601 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
602 // remember, the first data is on the stack!
603 buf = buf->next;
604 while (buf) {
605 struct cx_strreplace_ibuf *next = buf->next;
606 free(buf->buf);
607 free(buf);
608 buf = next;
609 }
610 }
611
612 cxmutstr cx_strreplacen_a( 589 cxmutstr cx_strreplacen_a(
613 const CxAllocator *allocator, 590 const CxAllocator *allocator,
614 cxstring str, 591 cxstring str,
615 cxstring search, 592 cxstring search,
616 cxstring replacement, 593 cxstring replacement,
617 size_t replmax 594 size_t replmax
618 ) { 595 ) {
619 596 // special cases
620 if (search.length == 0 || search.length > str.length || replmax == 0) 597 if (search.length == 0 || search.length > str.length || replmax == 0) {
621 return cx_strdup_a(allocator, str); 598 return cx_strdup_a(allocator, str);
622 599 }
623 // Compute expected buffer length 600
624 size_t ibufmax = str.length / search.length; 601 size_t in_len = str.length;
625 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; 602 size_t search_len = search.length;
626 if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) { 603 size_t repl_len = replacement.length;
627 ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE; 604
628 } 605 // first run, count the occurrences
629 606 // and remember where the first is
630 // First index buffer can be on the stack 607 size_t occurrences = 1;
631 struct cx_strreplace_ibuf ibuf, *curbuf = &ibuf; 608 cxstring first = cx_strstr(str, search);
632 size_t ibuf_sbo[CX_STRREPLACE_INDEX_BUFFER_SIZE]; 609 if (first.length == 0) {
633 ibuf.buf = ibuf_sbo; 610 // special case, no replacements
634 ibuf.next = NULL; 611 return cx_strdup_a(allocator, str);
635 ibuf.len = 0; 612 }
636 613 cxstring tmp = cx_strsubs(first, search_len);
637 // Search occurrences 614 while (occurrences < replmax &&
638 cxstring searchstr = str; 615 (tmp = cx_strstr(tmp, search)).length > 0) {
639 size_t found = 0; 616 occurrences++;
640 do { 617 tmp = cx_strsubs(tmp, search_len);
641 cxstring match = cx_strstr(searchstr, search); 618 }
642 if (match.length > 0) { 619
643 // Allocate next buffer in chain, if required 620 // calculate necessary memory
644 if (curbuf->len == ibuflen) { 621 signed long long diff_len = (signed long long) repl_len - search_len;
645 struct cx_strreplace_ibuf *nextbuf = 622 size_t out_len = in_len + diff_len * occurrences;
646 calloc(1, sizeof(struct cx_strreplace_ibuf)); 623 cxmutstr out = {
647 if (!nextbuf) { 624 cxMalloc(allocator, out_len + 1),
648 cx_strrepl_free_ibuf(&ibuf); 625 out_len
649 return cx_mutstrn(NULL, 0); 626 };
650 } 627 if (out.ptr == NULL) return out;
651 nextbuf->buf = calloc(ibuflen, sizeof(size_t)); 628
652 if (!nextbuf->buf) { 629 // second run: perform the replacements
653 free(nextbuf); 630 // but start where we found the first occurrence
654 cx_strrepl_free_ibuf(&ibuf); 631 const char *inp = str.ptr;
655 return cx_mutstrn(NULL, 0); 632 tmp = first;
656 } 633 char *outp = out.ptr;
657 curbuf->next = nextbuf; 634 while (occurrences-- > 0 && (tmp = cx_strstr(tmp, search)).length > 0) {
658 curbuf = nextbuf; 635 size_t copylen = tmp.ptr - inp;
659 } 636 memcpy(outp, inp, copylen);
660 637 outp += copylen;
661 // Record match index 638 memcpy(outp, replacement.ptr, repl_len);
662 found++; 639 outp += repl_len;
663 size_t idx = match.ptr - str.ptr; 640 inp += copylen + search_len;
664 curbuf->buf[curbuf->len++] = idx; 641 tmp = cx_strsubs(tmp, search_len);
665 searchstr.ptr = match.ptr + search.length; 642 }
666 searchstr.length = str.length - idx - search.length; 643
667 } else { 644 // add the remaining string
668 break; 645 size_t copylen = in_len - (inp - str.ptr);
669 } 646 memcpy(outp, inp, copylen);
670 } while (searchstr.length > 0 && found < replmax); 647 out.ptr[out_len] = '\0';
671 648
672 // Allocate result string 649 return out;
673 cxmutstr result;
674 {
675 long long adjlen = (long long) replacement.length - (long long) search.length;
676 size_t rcount = 0;
677 curbuf = &ibuf;
678 do {
679 rcount += curbuf->len;
680 curbuf = curbuf->next;
681 } while (curbuf);
682 result.length = str.length + rcount * adjlen;
683 result.ptr = cxMalloc(allocator, result.length + 1);
684 if (!result.ptr) {
685 cx_strrepl_free_ibuf(&ibuf);
686 return cx_mutstrn(NULL, 0);
687 }
688 }
689
690 // Build result string
691 curbuf = &ibuf;
692 size_t srcidx = 0;
693 char *destptr = result.ptr;
694 do {
695 for (size_t i = 0; i < curbuf->len; i++) {
696 // Copy source part up to next match
697 size_t idx = curbuf->buf[i];
698 size_t srclen = idx - srcidx;
699 if (srclen > 0) {
700 memcpy(destptr, str.ptr + srcidx, srclen);
701 destptr += srclen;
702 srcidx += srclen;
703 }
704
705 // Copy the replacement and skip the source pattern
706 srcidx += search.length;
707 memcpy(destptr, replacement.ptr, replacement.length);
708 destptr += replacement.length;
709 }
710 curbuf = curbuf->next;
711 } while (curbuf);
712 memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
713
714 // Result is guaranteed to be zero-terminated
715 result.ptr[result.length] = '\0';
716
717 // Free index buffer
718 cx_strrepl_free_ibuf(&ibuf);
719
720 return result;
721 } 650 }
722 651
723 CxStrtokCtx cx_strtok_( 652 CxStrtokCtx cx_strtok_(
724 cxstring str, 653 cxstring str,
725 cxstring delim, 654 cxstring delim,

mercurial