src/ucx/string.c

changeset 579
e10457d74fe1
parent 490
d218607f5a7e
equal deleted inserted replaced
578:eb48f716b31c 579:e10457d74fe1
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE. 26 * POSSIBILITY OF SUCH DAMAGE.
27 */ 27 */
28
29 #include "cx/string.h" 28 #include "cx/string.h"
30 #include "cx/utils.h"
31 29
32 #include <string.h> 30 #include <string.h>
33 #include <stdarg.h> 31 #include <stdarg.h>
34 #include <ctype.h> 32 #include <assert.h>
35 33 #include <errno.h>
36 #ifndef _WIN32 34 #include <limits.h>
37 35 #include <float.h>
38 #include <strings.h> // for strncasecmp() 36
39 37 #ifdef _WIN32
40 #endif // _WIN32 38 #define cx_strcasecmp_impl _strnicmp
39 #else
40 #include <strings.h>
41 #define cx_strcasecmp_impl strncasecmp
42 #endif
41 43
42 cxmutstr cx_mutstr(char *cstring) { 44 cxmutstr cx_mutstr(char *cstring) {
43 return (cxmutstr) {cstring, strlen(cstring)}; 45 return (cxmutstr) {cstring, strlen(cstring)};
44 } 46 }
45 47
59 size_t length 61 size_t length
60 ) { 62 ) {
61 return (cxstring) {cstring, length}; 63 return (cxstring) {cstring, length};
62 } 64 }
63 65
64 cxstring cx_strcast(cxmutstr str) {
65 return (cxstring) {str.ptr, str.length};
66 }
67
68 void cx_strfree(cxmutstr *str) { 66 void cx_strfree(cxmutstr *str) {
67 if (str == NULL) return;
69 free(str->ptr); 68 free(str->ptr);
70 str->ptr = NULL; 69 str->ptr = NULL;
71 str->length = 0; 70 str->length = 0;
72 } 71 }
73 72
74 void cx_strfree_a( 73 void cx_strfree_a(
75 CxAllocator const *alloc, 74 const CxAllocator *alloc,
76 cxmutstr *str 75 cxmutstr *str
77 ) { 76 ) {
77 if (str == NULL) return;
78 cxFree(alloc, str->ptr); 78 cxFree(alloc, str->ptr);
79 str->ptr = NULL; 79 str->ptr = NULL;
80 str->length = 0; 80 str->length = 0;
81 } 81 }
82 82
87 if (count == 0) return 0; 87 if (count == 0) return 0;
88 88
89 va_list ap; 89 va_list ap;
90 va_start(ap, count); 90 va_start(ap, count);
91 size_t size = 0; 91 size_t size = 0;
92 cx_for_n(i, count) { 92 for (size_t i = 0; i < count; i++) {
93 cxstring str = va_arg(ap, cxstring); 93 cxstring str = va_arg(ap, cxstring);
94 if (size > SIZE_MAX - str.length) errno = EOVERFLOW;
94 size += str.length; 95 size += str.length;
95 } 96 }
96 va_end(ap); 97 va_end(ap);
97 98
98 return size; 99 return size;
99 } 100 }
100 101
101 cxmutstr cx_strcat_ma( 102 cxmutstr cx_strcat_ma(
102 CxAllocator const *alloc, 103 const CxAllocator *alloc,
103 cxmutstr str, 104 cxmutstr str,
104 size_t count, 105 size_t count,
105 ... 106 ...
106 ) { 107 ) {
107 if (count == 0) return str; 108 if (count == 0) return str;
108 109
109 cxstring *strings = calloc(count, sizeof(cxstring)); 110 cxstring strings_stack[8];
110 if (!strings) abort(); 111 cxstring *strings;
112 if (count > 8) {
113 strings = calloc(count, sizeof(cxstring));
114 if (strings == NULL) {
115 return (cxmutstr) {NULL, 0};
116 }
117 } else {
118 strings = strings_stack;
119 }
111 120
112 va_list ap; 121 va_list ap;
113 va_start(ap, count); 122 va_start(ap, count);
114 123
115 // get all args and overall length 124 // get all args and overall length
125 bool overflow = false;
116 size_t slen = str.length; 126 size_t slen = str.length;
117 cx_for_n(i, count) { 127 for (size_t i = 0; i < count; i++) {
118 cxstring s = va_arg (ap, cxstring); 128 cxstring s = va_arg (ap, cxstring);
119 strings[i] = s; 129 strings[i] = s;
130 if (slen > SIZE_MAX - str.length) overflow = true;
120 slen += s.length; 131 slen += s.length;
121 } 132 }
122 va_end(ap); 133 va_end(ap);
123 134
135 // abort in case of overflow
136 if (overflow) {
137 errno = EOVERFLOW;
138 if (strings != strings_stack) {
139 free(strings);
140 }
141 return (cxmutstr) { NULL, 0 };
142 }
143
124 // reallocate or create new string 144 // reallocate or create new string
145 char *newstr;
125 if (str.ptr == NULL) { 146 if (str.ptr == NULL) {
126 str.ptr = cxMalloc(alloc, slen + 1); 147 newstr = cxMalloc(alloc, slen + 1);
127 } else { 148 } else {
128 str.ptr = cxRealloc(alloc, str.ptr, slen + 1); 149 newstr = cxRealloc(alloc, str.ptr, slen + 1);
129 } 150 }
130 if (str.ptr == NULL) abort(); 151 if (newstr == NULL) {
152 if (strings != strings_stack) {
153 free(strings);
154 }
155 return (cxmutstr) {NULL, 0};
156 }
157 str.ptr = newstr;
131 158
132 // concatenate strings 159 // concatenate strings
133 size_t pos = str.length; 160 size_t pos = str.length;
134 str.length = slen; 161 str.length = slen;
135 cx_for_n(i, count) { 162 for (size_t i = 0; i < count; i++) {
136 cxstring s = strings[i]; 163 cxstring s = strings[i];
137 memcpy(str.ptr + pos, s.ptr, s.length); 164 memcpy(str.ptr + pos, s.ptr, s.length);
138 pos += s.length; 165 pos += s.length;
139 } 166 }
140 167
141 // terminate string 168 // terminate string
142 str.ptr[str.length] = '\0'; 169 str.ptr[str.length] = '\0';
143 170
144 // free temporary array 171 // free temporary array
145 free(strings); 172 if (strings != strings_stack) {
173 free(strings);
174 }
146 175
147 return str; 176 return str;
148 } 177 }
149 178
150 cxstring cx_strsubs( 179 cxstring cx_strsubs(
189 218
190 cxstring cx_strchr( 219 cxstring cx_strchr(
191 cxstring string, 220 cxstring string,
192 int chr 221 int chr
193 ) { 222 ) {
194 chr = 0xFF & chr; 223 char *ret = memchr(string.ptr, 0xFF & chr, string.length);
195 // TODO: improve by comparing multiple bytes at once 224 if (ret == NULL) return (cxstring) {NULL, 0};
196 cx_for_n(i, string.length) { 225 return (cxstring) {ret, string.length - (ret - string.ptr)};
197 if (string.ptr[i] == chr) {
198 return cx_strsubs(string, i);
199 }
200 }
201 return (cxstring) {NULL, 0};
202 } 226 }
203 227
204 cxmutstr cx_strchr_m( 228 cxmutstr cx_strchr_m(
205 cxmutstr string, 229 cxmutstr string,
206 int chr 230 int chr
232 cxstring result = cx_strrchr(cx_strcast(string), chr); 256 cxstring result = cx_strrchr(cx_strcast(string), chr);
233 return (cxmutstr) {(char *) result.ptr, result.length}; 257 return (cxmutstr) {(char *) result.ptr, result.length};
234 } 258 }
235 259
236 #ifndef CX_STRSTR_SBO_SIZE 260 #ifndef CX_STRSTR_SBO_SIZE
237 #define CX_STRSTR_SBO_SIZE 512 261 #define CX_STRSTR_SBO_SIZE 128
238 #endif 262 #endif
263 const unsigned cx_strstr_sbo_size = CX_STRSTR_SBO_SIZE;
239 264
240 cxstring cx_strstr( 265 cxstring cx_strstr(
241 cxstring haystack, 266 cxstring haystack,
242 cxstring needle 267 cxstring needle
243 ) { 268 ) {
261 // local prefix table 286 // local prefix table
262 size_t s_prefix_table[CX_STRSTR_SBO_SIZE]; 287 size_t s_prefix_table[CX_STRSTR_SBO_SIZE];
263 288
264 // check needle length and use appropriate prefix table 289 // check needle length and use appropriate prefix table
265 // if the pattern exceeds static prefix table, allocate on the heap 290 // if the pattern exceeds static prefix table, allocate on the heap
266 bool useheap = needle.length >= CX_STRSTR_SBO_SIZE; 291 const bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
267 register size_t *ptable = useheap ? calloc(needle.length + 1, 292 register size_t *ptable = useheap ? calloc(needle.length + 1,
268 sizeof(size_t)) : s_prefix_table; 293 sizeof(size_t)) : s_prefix_table;
269 294
270 // keep counter in registers 295 // keep counter in registers
271 register size_t i, j; 296 register size_t i, j;
300 break; 325 break;
301 } 326 }
302 } 327 }
303 328
304 // if prefix table was allocated on the heap, free it 329 // if prefix table was allocated on the heap, free it
305 if (ptable != s_prefix_table) { 330 if (useheap) {
306 free(ptable); 331 free(ptable);
307 } 332 }
308 333
309 return result; 334 return result;
310 } 335 }
374 399
375 return n; 400 return n;
376 } 401 }
377 402
378 size_t cx_strsplit_a( 403 size_t cx_strsplit_a(
379 CxAllocator const *allocator, 404 const CxAllocator *allocator,
380 cxstring string, 405 cxstring string,
381 cxstring delim, 406 cxstring delim,
382 size_t limit, 407 size_t limit,
383 cxstring **output 408 cxstring **output
384 ) { 409 ) {
416 return cx_strsplit(cx_strcast(string), 441 return cx_strsplit(cx_strcast(string),
417 delim, limit, (cxstring *) output); 442 delim, limit, (cxstring *) output);
418 } 443 }
419 444
420 size_t cx_strsplit_ma( 445 size_t cx_strsplit_ma(
421 CxAllocator const *allocator, 446 const CxAllocator *allocator,
422 cxmutstr string, 447 cxmutstr string,
423 cxstring delim, 448 cxstring delim,
424 size_t limit, 449 size_t limit,
425 cxmutstr **output 450 cxmutstr **output
426 ) { 451 ) {
431 int cx_strcmp( 456 int cx_strcmp(
432 cxstring s1, 457 cxstring s1,
433 cxstring s2 458 cxstring s2
434 ) { 459 ) {
435 if (s1.length == s2.length) { 460 if (s1.length == s2.length) {
436 return memcmp(s1.ptr, s2.ptr, s1.length); 461 return strncmp(s1.ptr, s2.ptr, s1.length);
437 } else if (s1.length > s2.length) { 462 } else if (s1.length > s2.length) {
463 int r = strncmp(s1.ptr, s2.ptr, s2.length);
464 if (r != 0) return r;
438 return 1; 465 return 1;
439 } else { 466 } else {
467 int r = strncmp(s1.ptr, s2.ptr, s1.length);
468 if (r != 0) return r;
440 return -1; 469 return -1;
441 } 470 }
442 } 471 }
443 472
444 int cx_strcasecmp( 473 int cx_strcasecmp(
445 cxstring s1, 474 cxstring s1,
446 cxstring s2 475 cxstring s2
447 ) { 476 ) {
448 if (s1.length == s2.length) { 477 if (s1.length == s2.length) {
449 #ifdef _WIN32 478 return cx_strcasecmp_impl(s1.ptr, s2.ptr, s1.length);
450 return _strnicmp(s1.ptr, s2.ptr, s1.length);
451 #else
452 return strncasecmp(s1.ptr, s2.ptr, s1.length);
453 #endif
454 } else if (s1.length > s2.length) { 479 } else if (s1.length > s2.length) {
480 int r = cx_strcasecmp_impl(s1.ptr, s2.ptr, s2.length);
481 if (r != 0) return r;
455 return 1; 482 return 1;
456 } else { 483 } else {
484 int r = cx_strcasecmp_impl(s1.ptr, s2.ptr, s1.length);
485 if (r != 0) return r;
457 return -1; 486 return -1;
458 } 487 }
459 } 488 }
460 489
461 int cx_strcmp_p( 490 int cx_strcmp_p(
462 void const *s1, 491 const void *s1,
463 void const *s2 492 const void *s2
464 ) { 493 ) {
465 cxstring const *left = s1; 494 const cxstring *left = s1;
466 cxstring const *right = s2; 495 const cxstring *right = s2;
467 return cx_strcmp(*left, *right); 496 return cx_strcmp(*left, *right);
468 } 497 }
469 498
470 int cx_strcasecmp_p( 499 int cx_strcasecmp_p(
471 void const *s1, 500 const void *s1,
472 void const *s2 501 const void *s2
473 ) { 502 ) {
474 cxstring const *left = s1; 503 const cxstring *left = s1;
475 cxstring const *right = s2; 504 const cxstring *right = s2;
476 return cx_strcasecmp(*left, *right); 505 return cx_strcasecmp(*left, *right);
477 } 506 }
478 507
479 cxmutstr cx_strdup_a( 508 cxmutstr cx_strdup_a_(
480 CxAllocator const *allocator, 509 const CxAllocator *allocator,
481 cxstring string 510 cxstring string
482 ) { 511 ) {
483 cxmutstr result = { 512 cxmutstr result = {
484 cxMalloc(allocator, string.length + 1), 513 cxMalloc(allocator, string.length + 1),
485 string.length 514 string.length
491 memcpy(result.ptr, string.ptr, string.length); 520 memcpy(result.ptr, string.ptr, string.length);
492 result.ptr[string.length] = '\0'; 521 result.ptr[string.length] = '\0';
493 return result; 522 return result;
494 } 523 }
495 524
525 static bool str_isspace(char c) {
526 // TODO: remove once UCX has public API for this
527 return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v' || c == '\f';
528 }
529
496 cxstring cx_strtrim(cxstring string) { 530 cxstring cx_strtrim(cxstring string) {
497 cxstring result = string; 531 cxstring result = string;
498 // TODO: optimize by comparing multiple bytes at once 532 // TODO: optimize by comparing multiple bytes at once
499 while (result.length > 0 && isspace(*result.ptr)) { 533 while (result.length > 0 && str_isspace(*result.ptr)) {
500 result.ptr++; 534 result.ptr++;
501 result.length--; 535 result.length--;
502 } 536 }
503 while (result.length > 0 && isspace(result.ptr[result.length - 1])) { 537 while (result.length > 0 && str_isspace(result.ptr[result.length - 1])) {
504 result.length--; 538 result.length--;
505 } 539 }
506 return result; 540 return result;
507 } 541 }
508 542
552 return strncasecmp(string.ptr + string.length - suffix.length, 586 return strncasecmp(string.ptr + string.length - suffix.length,
553 suffix.ptr, suffix.length) == 0; 587 suffix.ptr, suffix.length) == 0;
554 #endif 588 #endif
555 } 589 }
556 590
557 void cx_strlower(cxmutstr string) {
558 cx_for_n(i, string.length) {
559 string.ptr[i] = (char) tolower(string.ptr[i]);
560 }
561 }
562
563 void cx_strupper(cxmutstr string) {
564 cx_for_n(i, string.length) {
565 string.ptr[i] = (char) toupper(string.ptr[i]);
566 }
567 }
568
569 #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE 591 #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
570 #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64 592 #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
571 #endif 593 #endif
572 594
573 struct cx_strreplace_ibuf { 595 struct cx_strreplace_ibuf {
575 struct cx_strreplace_ibuf *next; 597 struct cx_strreplace_ibuf *next;
576 unsigned int len; 598 unsigned int len;
577 }; 599 };
578 600
579 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { 601 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
602 // remember, the first data is on the stack!
603 buf = buf->next;
580 while (buf) { 604 while (buf) {
581 struct cx_strreplace_ibuf *next = buf->next; 605 struct cx_strreplace_ibuf *next = buf->next;
582 free(buf->buf); 606 free(buf->buf);
583 free(buf); 607 free(buf);
584 buf = next; 608 buf = next;
585 } 609 }
586 } 610 }
587 611
588 cxmutstr cx_strreplacen_a( 612 cxmutstr cx_strreplacen_a(
589 CxAllocator const *allocator, 613 const CxAllocator *allocator,
590 cxstring str, 614 cxstring str,
591 cxstring pattern, 615 cxstring search,
592 cxstring replacement, 616 cxstring replacement,
593 size_t replmax 617 size_t replmax
594 ) { 618 ) {
595 619
596 if (pattern.length == 0 || pattern.length > str.length || replmax == 0) 620 if (search.length == 0 || search.length > str.length || replmax == 0)
597 return cx_strdup_a(allocator, str); 621 return cx_strdup_a(allocator, str);
598 622
599 // Compute expected buffer length 623 // Compute expected buffer length
600 size_t ibufmax = str.length / pattern.length; 624 size_t ibufmax = str.length / search.length;
601 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; 625 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
602 if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) { 626 if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) {
603 ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE; 627 ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE;
604 } 628 }
605 629
606 // Allocate first index buffer 630 // First index buffer can be on the stack
607 struct cx_strreplace_ibuf *firstbuf, *curbuf; 631 struct cx_strreplace_ibuf ibuf, *curbuf = &ibuf;
608 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); 632 size_t ibuf_sbo[CX_STRREPLACE_INDEX_BUFFER_SIZE];
609 if (!firstbuf) return cx_mutstrn(NULL, 0); 633 ibuf.buf = ibuf_sbo;
610 firstbuf->buf = calloc(ibuflen, sizeof(size_t)); 634 ibuf.next = NULL;
611 if (!firstbuf->buf) { 635 ibuf.len = 0;
612 free(firstbuf);
613 return cx_mutstrn(NULL, 0);
614 }
615 636
616 // Search occurrences 637 // Search occurrences
617 cxstring searchstr = str; 638 cxstring searchstr = str;
618 size_t found = 0; 639 size_t found = 0;
619 do { 640 do {
620 cxstring match = cx_strstr(searchstr, pattern); 641 cxstring match = cx_strstr(searchstr, search);
621 if (match.length > 0) { 642 if (match.length > 0) {
622 // Allocate next buffer in chain, if required 643 // Allocate next buffer in chain, if required
623 if (curbuf->len == ibuflen) { 644 if (curbuf->len == ibuflen) {
624 struct cx_strreplace_ibuf *nextbuf = 645 struct cx_strreplace_ibuf *nextbuf =
625 calloc(1, sizeof(struct cx_strreplace_ibuf)); 646 calloc(1, sizeof(struct cx_strreplace_ibuf));
626 if (!nextbuf) { 647 if (!nextbuf) {
627 cx_strrepl_free_ibuf(firstbuf); 648 cx_strrepl_free_ibuf(&ibuf);
628 return cx_mutstrn(NULL, 0); 649 return cx_mutstrn(NULL, 0);
629 } 650 }
630 nextbuf->buf = calloc(ibuflen, sizeof(size_t)); 651 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
631 if (!nextbuf->buf) { 652 if (!nextbuf->buf) {
632 free(nextbuf); 653 free(nextbuf);
633 cx_strrepl_free_ibuf(firstbuf); 654 cx_strrepl_free_ibuf(&ibuf);
634 return cx_mutstrn(NULL, 0); 655 return cx_mutstrn(NULL, 0);
635 } 656 }
636 curbuf->next = nextbuf; 657 curbuf->next = nextbuf;
637 curbuf = nextbuf; 658 curbuf = nextbuf;
638 } 659 }
639 660
640 // Record match index 661 // Record match index
641 found++; 662 found++;
642 size_t idx = match.ptr - str.ptr; 663 size_t idx = match.ptr - str.ptr;
643 curbuf->buf[curbuf->len++] = idx; 664 curbuf->buf[curbuf->len++] = idx;
644 searchstr.ptr = match.ptr + pattern.length; 665 searchstr.ptr = match.ptr + search.length;
645 searchstr.length = str.length - idx - pattern.length; 666 searchstr.length = str.length - idx - search.length;
646 } else { 667 } else {
647 break; 668 break;
648 } 669 }
649 } while (searchstr.length > 0 && found < replmax); 670 } while (searchstr.length > 0 && found < replmax);
650 671
651 // Allocate result string 672 // Allocate result string
652 cxmutstr result; 673 cxmutstr result;
653 { 674 {
654 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; 675 long long adjlen = (long long) replacement.length - (long long) search.length;
655 size_t rcount = 0; 676 size_t rcount = 0;
656 curbuf = firstbuf; 677 curbuf = &ibuf;
657 do { 678 do {
658 rcount += curbuf->len; 679 rcount += curbuf->len;
659 curbuf = curbuf->next; 680 curbuf = curbuf->next;
660 } while (curbuf); 681 } while (curbuf);
661 result.length = str.length + rcount * adjlen; 682 result.length = str.length + rcount * adjlen;
662 result.ptr = cxMalloc(allocator, result.length + 1); 683 result.ptr = cxMalloc(allocator, result.length + 1);
663 if (!result.ptr) { 684 if (!result.ptr) {
664 cx_strrepl_free_ibuf(firstbuf); 685 cx_strrepl_free_ibuf(&ibuf);
665 return cx_mutstrn(NULL, 0); 686 return cx_mutstrn(NULL, 0);
666 } 687 }
667 } 688 }
668 689
669 // Build result string 690 // Build result string
670 curbuf = firstbuf; 691 curbuf = &ibuf;
671 size_t srcidx = 0; 692 size_t srcidx = 0;
672 char *destptr = result.ptr; 693 char *destptr = result.ptr;
673 do { 694 do {
674 for (size_t i = 0; i < curbuf->len; i++) { 695 for (size_t i = 0; i < curbuf->len; i++) {
675 // Copy source part up to next match 696 // Copy source part up to next match
680 destptr += srclen; 701 destptr += srclen;
681 srcidx += srclen; 702 srcidx += srclen;
682 } 703 }
683 704
684 // Copy the replacement and skip the source pattern 705 // Copy the replacement and skip the source pattern
685 srcidx += pattern.length; 706 srcidx += search.length;
686 memcpy(destptr, replacement.ptr, replacement.length); 707 memcpy(destptr, replacement.ptr, replacement.length);
687 destptr += replacement.length; 708 destptr += replacement.length;
688 } 709 }
689 curbuf = curbuf->next; 710 curbuf = curbuf->next;
690 } while (curbuf); 711 } while (curbuf);
692 713
693 // Result is guaranteed to be zero-terminated 714 // Result is guaranteed to be zero-terminated
694 result.ptr[result.length] = '\0'; 715 result.ptr[result.length] = '\0';
695 716
696 // Free index buffer 717 // Free index buffer
697 cx_strrepl_free_ibuf(firstbuf); 718 cx_strrepl_free_ibuf(&ibuf);
698 719
699 return result; 720 return result;
700 } 721 }
701 722
702 CxStrtokCtx cx_strtok( 723 CxStrtokCtx cx_strtok_(
703 cxstring str, 724 cxstring str,
704 cxstring delim, 725 cxstring delim,
705 size_t limit 726 size_t limit
706 ) { 727 ) {
707 CxStrtokCtx ctx; 728 CxStrtokCtx ctx;
715 ctx.delim_more = NULL; 736 ctx.delim_more = NULL;
716 ctx.delim_more_count = 0; 737 ctx.delim_more_count = 0;
717 return ctx; 738 return ctx;
718 } 739 }
719 740
720 CxStrtokCtx cx_strtok_m(
721 cxmutstr str,
722 cxstring delim,
723 size_t limit
724 ) {
725 return cx_strtok(cx_strcast(str), delim, limit);
726 }
727
728 bool cx_strtok_next( 741 bool cx_strtok_next(
729 CxStrtokCtx *ctx, 742 CxStrtokCtx *ctx,
730 cxstring *token 743 cxstring *token
731 ) { 744 ) {
732 // abortion criteria 745 // abortion criteria
745 delim.length = ctx->delim.length; 758 delim.length = ctx->delim.length;
746 } 759 }
747 760
748 // if more delimiters are specified, check them now 761 // if more delimiters are specified, check them now
749 if (ctx->delim_more_count > 0) { 762 if (ctx->delim_more_count > 0) {
750 cx_for_n(i, ctx->delim_more_count) { 763 for (size_t i = 0; i < ctx->delim_more_count; i++) {
751 cxstring d = cx_strstr(haystack, ctx->delim_more[i]); 764 cxstring d = cx_strstr(haystack, ctx->delim_more[i]);
752 if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) { 765 if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) {
753 delim.ptr = d.ptr; 766 delim.ptr = d.ptr;
754 delim.length = ctx->delim_more[i].length; 767 delim.length = ctx->delim_more[i].length;
755 } 768 }
775 return cx_strtok_next(ctx, (cxstring *) token); 788 return cx_strtok_next(ctx, (cxstring *) token);
776 } 789 }
777 790
778 void cx_strtok_delim( 791 void cx_strtok_delim(
779 CxStrtokCtx *ctx, 792 CxStrtokCtx *ctx,
780 cxstring const *delim, 793 const cxstring *delim,
781 size_t count 794 size_t count
782 ) { 795 ) {
783 ctx->delim_more = delim; 796 ctx->delim_more = delim;
784 ctx->delim_more_count = count; 797 ctx->delim_more_count = count;
785 } 798 }
799
800 #define cx_strtoX_signed_impl(rtype, rmin, rmax) \
801 long long result; \
802 if (cx_strtoll_lc(str, &result, base, groupsep)) { \
803 return -1; \
804 } \
805 if (result < rmin || result > rmax) { \
806 errno = ERANGE; \
807 return -1; \
808 } \
809 *output = (rtype) result; \
810 return 0
811
812 int cx_strtos_lc_(cxstring str, short *output, int base, const char *groupsep) {
813 cx_strtoX_signed_impl(short, SHRT_MIN, SHRT_MAX);
814 }
815
816 int cx_strtoi_lc_(cxstring str, int *output, int base, const char *groupsep) {
817 cx_strtoX_signed_impl(int, INT_MIN, INT_MAX);
818 }
819
820 int cx_strtol_lc_(cxstring str, long *output, int base, const char *groupsep) {
821 cx_strtoX_signed_impl(long, LONG_MIN, LONG_MAX);
822 }
823
824 int cx_strtoll_lc_(cxstring str, long long *output, int base, const char *groupsep) {
825 // strategy: parse as unsigned, check range, negate if required
826 bool neg = false;
827 size_t start_unsigned = 0;
828
829 // emptiness check
830 if (str.length == 0) {
831 errno = EINVAL;
832 return -1;
833 }
834
835 // test if we have a negative sign character
836 if (str.ptr[start_unsigned] == '-') {
837 neg = true;
838 start_unsigned++;
839 // must not be followed by positive sign character
840 if (str.length == 1 || str.ptr[start_unsigned] == '+') {
841 errno = EINVAL;
842 return -1;
843 }
844 }
845
846 // now parse the number with strtoull
847 unsigned long long v;
848 cxstring ustr = start_unsigned == 0 ? str
849 : cx_strn(str.ptr + start_unsigned, str.length - start_unsigned);
850 int ret = cx_strtoull_lc(ustr, &v, base, groupsep);
851 if (ret != 0) return ret;
852 if (neg) {
853 if (v - 1 > LLONG_MAX) {
854 errno = ERANGE;
855 return -1;
856 }
857 *output = -(long long) v;
858 return 0;
859 } else {
860 if (v > LLONG_MAX) {
861 errno = ERANGE;
862 return -1;
863 }
864 *output = (long long) v;
865 return 0;
866 }
867 }
868
869 int cx_strtoi8_lc_(cxstring str, int8_t *output, int base, const char *groupsep) {
870 cx_strtoX_signed_impl(int8_t, INT8_MIN, INT8_MAX);
871 }
872
873 int cx_strtoi16_lc_(cxstring str, int16_t *output, int base, const char *groupsep) {
874 cx_strtoX_signed_impl(int16_t, INT16_MIN, INT16_MAX);
875 }
876
877 int cx_strtoi32_lc_(cxstring str, int32_t *output, int base, const char *groupsep) {
878 cx_strtoX_signed_impl(int32_t, INT32_MIN, INT32_MAX);
879 }
880
881 int cx_strtoi64_lc_(cxstring str, int64_t *output, int base, const char *groupsep) {
882 assert(sizeof(long long) == sizeof(int64_t)); // should be true on all platforms
883 return cx_strtoll_lc(str, (long long*) output, base, groupsep);
884 }
885
886 #define cx_strtoX_unsigned_impl(rtype, rmax) \
887 uint64_t result; \
888 if (cx_strtou64_lc(str, &result, base, groupsep)) { \
889 return -1; \
890 } \
891 if (result > rmax) { \
892 errno = ERANGE; \
893 return -1; \
894 } \
895 *output = (rtype) result; \
896 return 0
897
898 int cx_strtous_lc_(cxstring str, unsigned short *output, int base, const char *groupsep) {
899 cx_strtoX_unsigned_impl(unsigned short, USHRT_MAX);
900 }
901
902 int cx_strtou_lc_(cxstring str, unsigned int *output, int base, const char *groupsep) {
903 cx_strtoX_unsigned_impl(unsigned int, UINT_MAX);
904 }
905
906 int cx_strtoul_lc_(cxstring str, unsigned long *output, int base, const char *groupsep) {
907 cx_strtoX_unsigned_impl(unsigned long, ULONG_MAX);
908 }
909
910 int cx_strtoull_lc_(cxstring str, unsigned long long *output, int base, const char *groupsep) {
911 // some sanity checks
912 if (str.length == 0) {
913 errno = EINVAL;
914 return -1;
915 }
916 if (!(base == 2 || base == 8 || base == 10 || base == 16)) {
917 errno = EINVAL;
918 return -1;
919 }
920 if (groupsep == NULL) groupsep = "";
921
922 // find the actual start of the number
923 if (str.ptr[0] == '+') {
924 str.ptr++;
925 str.length--;
926 if (str.length == 0) {
927 errno = EINVAL;
928 return -1;
929 }
930 }
931 size_t start = 0;
932
933 // if base is 2 or 16, some leading stuff may appear
934 if (base == 2) {
935 if ((str.ptr[0] | 32) == 'b') {
936 start = 1;
937 } else if (str.ptr[0] == '0' && str.length > 1) {
938 if ((str.ptr[1] | 32) == 'b') {
939 start = 2;
940 }
941 }
942 } else if (base == 16) {
943 if ((str.ptr[0] | 32) == 'x' || str.ptr[0] == '#') {
944 start = 1;
945 } else if (str.ptr[0] == '0' && str.length > 1) {
946 if ((str.ptr[1] | 32) == 'x') {
947 start = 2;
948 }
949 }
950 }
951
952 // check if there are digits left
953 if (start >= str.length) {
954 errno = EINVAL;
955 return -1;
956 }
957
958 // now parse the number
959 unsigned long long result = 0;
960 for (size_t i = start; i < str.length; i++) {
961 // ignore group separators
962 if (strchr(groupsep, str.ptr[i])) continue;
963
964 // determine the digit value of the character
965 unsigned char c = str.ptr[i];
966 if (c >= 'a') c = 10 + (c - 'a');
967 else if (c >= 'A') c = 10 + (c - 'A');
968 else if (c >= '0') c = c - '0';
969 else c = 255;
970 if (c >= base) {
971 errno = EINVAL;
972 return -1;
973 }
974
975 // now combine the digit with what we already have
976 unsigned long right = (result & 0xff) * base + c;
977 unsigned long long left = (result >> 8) * base + (right >> 8);
978 if (left > (ULLONG_MAX >> 8)) {
979 errno = ERANGE;
980 return -1;
981 }
982 result = (left << 8) + (right & 0xff);
983 }
984
985 *output = result;
986 return 0;
987 }
988
989 int cx_strtou8_lc_(cxstring str, uint8_t *output, int base, const char *groupsep) {
990 cx_strtoX_unsigned_impl(uint8_t, UINT8_MAX);
991 }
992
993 int cx_strtou16_lc_(cxstring str, uint16_t *output, int base, const char *groupsep) {
994 cx_strtoX_unsigned_impl(uint16_t, UINT16_MAX);
995 }
996
997 int cx_strtou32_lc_(cxstring str, uint32_t *output, int base, const char *groupsep) {
998 cx_strtoX_unsigned_impl(uint32_t, UINT32_MAX);
999 }
1000
1001 int cx_strtou64_lc_(cxstring str, uint64_t *output, int base, const char *groupsep) {
1002 assert(sizeof(unsigned long long) == sizeof(uint64_t)); // should be true on all platforms
1003 return cx_strtoull_lc(str, (unsigned long long*) output, base, groupsep);
1004 }
1005
1006 int cx_strtoz_lc_(cxstring str, size_t *output, int base, const char *groupsep) {
1007 #if SIZE_MAX == UINT32_MAX
1008 return cx_strtou32_lc_(str, (uint32_t*) output, base, groupsep);
1009 #elif SIZE_MAX == UINT64_MAX
1010 return cx_strtoull_lc_(str, (unsigned long long *) output, base, groupsep);
1011 #else
1012 #error "unsupported size_t size"
1013 #endif
1014 }
1015
1016 int cx_strtof_lc_(cxstring str, float *output, char decsep, const char *groupsep) {
1017 // use string to double and add a range check
1018 double d;
1019 int ret = cx_strtod_lc_(str, &d, decsep, groupsep);
1020 if (ret != 0) return ret;
1021 // note: FLT_MIN is the smallest POSITIVE number that can be represented
1022 double test = d < 0 ? -d : d;
1023 if (test < FLT_MIN || test > FLT_MAX) {
1024 errno = ERANGE;
1025 return -1;
1026 }
1027 *output = (float) d;
1028 return 0;
1029 }
1030
1031 static bool str_isdigit(char c) {
1032 // TODO: remove once UCX has public API for this
1033 return c >= '0' && c <= '9';
1034 }
1035
1036 int cx_strtod_lc_(cxstring str, double *output, char decsep, const char *groupsep) {
1037 // TODO: overflow check
1038 // TODO: increase precision
1039
1040 // emptiness check
1041 if (str.length == 0) {
1042 errno = EINVAL;
1043 return -1;
1044 }
1045
1046 double result = 0.;
1047 int sign = 1;
1048
1049 // check if there is a sign
1050 if (str.ptr[0] == '-') {
1051 sign = -1;
1052 str.ptr++;
1053 str.length--;
1054 } else if (str.ptr[0] == '+') {
1055 str.ptr++;
1056 str.length--;
1057 }
1058
1059 // there must be at least one char to parse
1060 if (str.length == 0) {
1061 errno = EINVAL;
1062 return -1;
1063 }
1064
1065 // parse all digits until we find the decsep
1066 size_t pos = 0;
1067 do {
1068 if (str_isdigit(str.ptr[pos])) {
1069 result = result * 10 + (str.ptr[pos] - '0');
1070 } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
1071 break;
1072 }
1073 } while (++pos < str.length);
1074
1075 // already done?
1076 if (pos == str.length) {
1077 *output = result * sign;
1078 return 0;
1079 }
1080
1081 // is the next char the decsep?
1082 if (str.ptr[pos] == decsep) {
1083 pos++;
1084 // it may end with the decsep, if it did not start with it
1085 if (pos == str.length) {
1086 if (str.length == 1) {
1087 errno = EINVAL;
1088 return -1;
1089 } else {
1090 *output = result * sign;
1091 return 0;
1092 }
1093 }
1094 // parse everything until exponent or end
1095 double factor = 1.;
1096 do {
1097 if (str_isdigit(str.ptr[pos])) {
1098 factor *= 0.1;
1099 result = result + factor * (str.ptr[pos] - '0');
1100 } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
1101 break;
1102 }
1103 } while (++pos < str.length);
1104 }
1105
1106 // no exponent?
1107 if (pos == str.length) {
1108 *output = result * sign;
1109 return 0;
1110 }
1111
1112 // now the next separator MUST be the exponent separator
1113 // and at least one char must follow
1114 if ((str.ptr[pos] | 32) != 'e' || str.length <= pos + 1) {
1115 errno = EINVAL;
1116 return -1;
1117 }
1118 pos++;
1119
1120 // check if we have a sign for the exponent
1121 double factor = 10.;
1122 if (str.ptr[pos] == '-') {
1123 factor = .1;
1124 pos++;
1125 } else if (str.ptr[pos] == '+') {
1126 pos++;
1127 }
1128
1129 // at least one digit must follow
1130 if (pos == str.length) {
1131 errno = EINVAL;
1132 return -1;
1133 }
1134
1135 // parse the exponent
1136 unsigned int exp = 0;
1137 do {
1138 if (str_isdigit(str.ptr[pos])) {
1139 exp = 10 * exp + (str.ptr[pos] - '0');
1140 } else if (strchr(groupsep, str.ptr[pos]) == NULL) {
1141 errno = EINVAL;
1142 return -1;
1143 }
1144 } while (++pos < str.length);
1145
1146 // apply the exponent by fast exponentiation
1147 do {
1148 if (exp & 1) {
1149 result *= factor;
1150 }
1151 factor *= factor;
1152 } while ((exp >>= 1) > 0);
1153
1154 // store the result and exit
1155 *output = result * sign;
1156 return 0;
1157 }

mercurial