src/ucx/string.c

changeset 490
d218607f5a7e
parent 438
22eca559aded
equal deleted inserted replaced
489:921f83a8943f 490:d218607f5a7e
70 str->ptr = NULL; 70 str->ptr = NULL;
71 str->length = 0; 71 str->length = 0;
72 } 72 }
73 73
74 void cx_strfree_a( 74 void cx_strfree_a(
75 CxAllocator *alloc, 75 CxAllocator const *alloc,
76 cxmutstr *str 76 cxmutstr *str
77 ) { 77 ) {
78 cxFree(alloc, str->ptr); 78 cxFree(alloc, str->ptr);
79 str->ptr = NULL; 79 str->ptr = NULL;
80 str->length = 0; 80 str->length = 0;
96 va_end(ap); 96 va_end(ap);
97 97
98 return size; 98 return size;
99 } 99 }
100 100
101 cxmutstr cx_strcat_a( 101 cxmutstr cx_strcat_ma(
102 CxAllocator *alloc, 102 CxAllocator const *alloc,
103 cxmutstr str,
103 size_t count, 104 size_t count,
104 ... 105 ...
105 ) { 106 ) {
107 if (count == 0) return str;
108
106 cxstring *strings = calloc(count, sizeof(cxstring)); 109 cxstring *strings = calloc(count, sizeof(cxstring));
107 if (!strings) abort(); 110 if (!strings) abort();
108 111
109 va_list ap; 112 va_list ap;
110 va_start(ap, count); 113 va_start(ap, count);
111 114
112 // get all args and overall length 115 // get all args and overall length
113 size_t slen = 0; 116 size_t slen = str.length;
114 cx_for_n(i, count) { 117 cx_for_n(i, count) {
115 cxstring s = va_arg (ap, cxstring); 118 cxstring s = va_arg (ap, cxstring);
116 strings[i] = s; 119 strings[i] = s;
117 slen += s.length; 120 slen += s.length;
118 } 121 }
119 122 va_end(ap);
120 // create new string 123
121 cxmutstr result; 124 // reallocate or create new string
122 result.ptr = cxMalloc(alloc, slen + 1); 125 if (str.ptr == NULL) {
123 result.length = slen; 126 str.ptr = cxMalloc(alloc, slen + 1);
124 if (result.ptr == NULL) abort(); 127 } else {
128 str.ptr = cxRealloc(alloc, str.ptr, slen + 1);
129 }
130 if (str.ptr == NULL) abort();
125 131
126 // concatenate strings 132 // concatenate strings
127 size_t pos = 0; 133 size_t pos = str.length;
134 str.length = slen;
128 cx_for_n(i, count) { 135 cx_for_n(i, count) {
129 cxstring s = strings[i]; 136 cxstring s = strings[i];
130 memcpy(result.ptr + pos, s.ptr, s.length); 137 memcpy(str.ptr + pos, s.ptr, s.length);
131 pos += s.length; 138 pos += s.length;
132 } 139 }
133 140
134 // terminate string 141 // terminate string
135 result.ptr[result.length] = '\0'; 142 str.ptr[str.length] = '\0';
136 143
137 // free temporary array 144 // free temporary array
138 free(strings); 145 free(strings);
139 146
140 return result; 147 return str;
141 } 148 }
142 149
143 cxstring cx_strsubs( 150 cxstring cx_strsubs(
144 cxstring string, 151 cxstring string,
145 size_t start 152 size_t start
224 ) { 231 ) {
225 cxstring result = cx_strrchr(cx_strcast(string), chr); 232 cxstring result = cx_strrchr(cx_strcast(string), chr);
226 return (cxmutstr) {(char *) result.ptr, result.length}; 233 return (cxmutstr) {(char *) result.ptr, result.length};
227 } 234 }
228 235
229 #define STRSTR_SBO_BUFLEN 512 236 #ifndef CX_STRSTR_SBO_SIZE
237 #define CX_STRSTR_SBO_SIZE 512
238 #endif
230 239
231 cxstring cx_strstr( 240 cxstring cx_strstr(
232 cxstring haystack, 241 cxstring haystack,
233 cxstring needle 242 cxstring needle
234 ) { 243 ) {
248 * The original algorithm needs a (-1) at one single place, 257 * The original algorithm needs a (-1) at one single place,
249 * and we want to avoid that. 258 * and we want to avoid that.
250 */ 259 */
251 260
252 // local prefix table 261 // local prefix table
253 size_t s_prefix_table[STRSTR_SBO_BUFLEN]; 262 size_t s_prefix_table[CX_STRSTR_SBO_SIZE];
254 263
255 // check needle length and use appropriate prefix table 264 // check needle length and use appropriate prefix table
256 // if the pattern exceeds static prefix table, allocate on the heap 265 // if the pattern exceeds static prefix table, allocate on the heap
257 bool useheap = needle.length >= STRSTR_SBO_BUFLEN; 266 bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
258 register size_t *ptable = useheap ? calloc(needle.length + 1, 267 register size_t *ptable = useheap ? calloc(needle.length + 1,
259 sizeof(size_t)) : s_prefix_table; 268 sizeof(size_t)) : s_prefix_table;
260 269
261 // keep counter in registers 270 // keep counter in registers
262 register size_t i, j; 271 register size_t i, j;
365 374
366 return n; 375 return n;
367 } 376 }
368 377
369 size_t cx_strsplit_a( 378 size_t cx_strsplit_a(
370 CxAllocator *allocator, 379 CxAllocator const *allocator,
371 cxstring string, 380 cxstring string,
372 cxstring delim, 381 cxstring delim,
373 size_t limit, 382 size_t limit,
374 cxstring **output 383 cxstring **output
375 ) { 384 ) {
407 return cx_strsplit(cx_strcast(string), 416 return cx_strsplit(cx_strcast(string),
408 delim, limit, (cxstring *) output); 417 delim, limit, (cxstring *) output);
409 } 418 }
410 419
411 size_t cx_strsplit_ma( 420 size_t cx_strsplit_ma(
412 CxAllocator *allocator, 421 CxAllocator const *allocator,
413 cxmutstr string, 422 cxmutstr string,
414 cxstring delim, 423 cxstring delim,
415 size_t limit, 424 size_t limit,
416 cxmutstr **output 425 cxmutstr **output
417 ) { 426 ) {
447 } else { 456 } else {
448 return -1; 457 return -1;
449 } 458 }
450 } 459 }
451 460
461 int cx_strcmp_p(
462 void const *s1,
463 void const *s2
464 ) {
465 cxstring const *left = s1;
466 cxstring const *right = s2;
467 return cx_strcmp(*left, *right);
468 }
469
470 int cx_strcasecmp_p(
471 void const *s1,
472 void const *s2
473 ) {
474 cxstring const *left = s1;
475 cxstring const *right = s2;
476 return cx_strcasecmp(*left, *right);
477 }
478
452 cxmutstr cx_strdup_a( 479 cxmutstr cx_strdup_a(
453 CxAllocator *allocator, 480 CxAllocator const *allocator,
454 cxstring string 481 cxstring string
455 ) { 482 ) {
456 cxmutstr result = { 483 cxmutstr result = {
457 cxMalloc(allocator, string.length + 1), 484 cxMalloc(allocator, string.length + 1),
458 string.length 485 string.length
537 cx_for_n(i, string.length) { 564 cx_for_n(i, string.length) {
538 string.ptr[i] = (char) toupper(string.ptr[i]); 565 string.ptr[i] = (char) toupper(string.ptr[i]);
539 } 566 }
540 } 567 }
541 568
542 #define REPLACE_INDEX_BUFFER_MAX 100 569 #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
570 #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
571 #endif
543 572
544 struct cx_strreplace_ibuf { 573 struct cx_strreplace_ibuf {
545 size_t *buf; 574 size_t *buf;
546 struct cx_strreplace_ibuf *next; 575 struct cx_strreplace_ibuf *next;
547 unsigned int len; 576 unsigned int len;
555 buf = next; 584 buf = next;
556 } 585 }
557 } 586 }
558 587
559 cxmutstr cx_strreplacen_a( 588 cxmutstr cx_strreplacen_a(
560 CxAllocator *allocator, 589 CxAllocator const *allocator,
561 cxstring str, 590 cxstring str,
562 cxstring pattern, 591 cxstring pattern,
563 cxstring replacement, 592 cxstring replacement,
564 size_t replmax 593 size_t replmax
565 ) { 594 ) {
568 return cx_strdup_a(allocator, str); 597 return cx_strdup_a(allocator, str);
569 598
570 // Compute expected buffer length 599 // Compute expected buffer length
571 size_t ibufmax = str.length / pattern.length; 600 size_t ibufmax = str.length / pattern.length;
572 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; 601 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
573 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { 602 if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) {
574 ibuflen = REPLACE_INDEX_BUFFER_MAX; 603 ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE;
575 } 604 }
576 605
577 // Allocate first index buffer 606 // Allocate first index buffer
578 struct cx_strreplace_ibuf *firstbuf, *curbuf; 607 struct cx_strreplace_ibuf *firstbuf, *curbuf;
579 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); 608 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
668 cx_strrepl_free_ibuf(firstbuf); 697 cx_strrepl_free_ibuf(firstbuf);
669 698
670 return result; 699 return result;
671 } 700 }
672 701
673 702 CxStrtokCtx cx_strtok(
703 cxstring str,
704 cxstring delim,
705 size_t limit
706 ) {
707 CxStrtokCtx ctx;
708 ctx.str = str;
709 ctx.delim = delim;
710 ctx.limit = limit;
711 ctx.pos = 0;
712 ctx.next_pos = 0;
713 ctx.delim_pos = 0;
714 ctx.found = 0;
715 ctx.delim_more = NULL;
716 ctx.delim_more_count = 0;
717 return ctx;
718 }
719
720 CxStrtokCtx cx_strtok_m(
721 cxmutstr str,
722 cxstring delim,
723 size_t limit
724 ) {
725 return cx_strtok(cx_strcast(str), delim, limit);
726 }
727
728 bool cx_strtok_next(
729 CxStrtokCtx *ctx,
730 cxstring *token
731 ) {
732 // abortion criteria
733 if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) {
734 return false;
735 }
736
737 // determine the search start
738 cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos);
739
740 // search the next delimiter
741 cxstring delim = cx_strstr(haystack, ctx->delim);
742
743 // if found, make delim capture exactly the delimiter
744 if (delim.length > 0) {
745 delim.length = ctx->delim.length;
746 }
747
748 // if more delimiters are specified, check them now
749 if (ctx->delim_more_count > 0) {
750 cx_for_n(i, ctx->delim_more_count) {
751 cxstring d = cx_strstr(haystack, ctx->delim_more[i]);
752 if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) {
753 delim.ptr = d.ptr;
754 delim.length = ctx->delim_more[i].length;
755 }
756 }
757 }
758
759 // store the token information and adjust the context
760 ctx->found++;
761 ctx->pos = ctx->next_pos;
762 token->ptr = &ctx->str.ptr[ctx->pos];
763 ctx->delim_pos = delim.length == 0 ?
764 ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr);
765 token->length = ctx->delim_pos - ctx->pos;
766 ctx->next_pos = ctx->delim_pos + delim.length;
767
768 return true;
769 }
770
771 bool cx_strtok_next_m(
772 CxStrtokCtx *ctx,
773 cxmutstr *token
774 ) {
775 return cx_strtok_next(ctx, (cxstring *) token);
776 }
777
778 void cx_strtok_delim(
779 CxStrtokCtx *ctx,
780 cxstring const *delim,
781 size_t count
782 ) {
783 ctx->delim_more = delim;
784 ctx->delim_more_count = count;
785 }

mercurial