ucx/string.c

changeset 162
18892c0a9adc
parent 157
0b33b9396851
equal deleted inserted replaced
161:b1eac0878ce7 162:18892c0a9adc
34 #include <string.h> 34 #include <string.h>
35 #include <stdarg.h> 35 #include <stdarg.h>
36 #include <stdint.h> 36 #include <stdint.h>
37 #include <ctype.h> 37 #include <ctype.h>
38 38
39 #ifndef _WIN32
40 #include <strings.h> /* for strncasecmp() */
41 #endif /* _WIN32 */
42
39 sstr_t sstr(char *cstring) { 43 sstr_t sstr(char *cstring) {
40 sstr_t string; 44 sstr_t string;
41 string.ptr = cstring; 45 string.ptr = cstring;
42 string.length = strlen(cstring); 46 string.length = strlen(cstring);
43 return string; 47 return string;
64 return string; 68 return string;
65 } 69 }
66 70
67 71
68 size_t scstrnlen(size_t n, ...) { 72 size_t scstrnlen(size_t n, ...) {
73 if (n == 0) return 0;
74
69 va_list ap; 75 va_list ap;
70 va_start(ap, n); 76 va_start(ap, n);
71 77
72 size_t size = 0; 78 size_t size = 0;
73 79
590 return memcmp(string.ptr+string.length-suffix.length, 596 return memcmp(string.ptr+string.length-suffix.length,
591 suffix.ptr, suffix.length) == 0; 597 suffix.ptr, suffix.length) == 0;
592 } 598 }
593 } 599 }
594 600
601 int scstrcaseprefix(scstr_t string, scstr_t prefix) {
602 if (string.length == 0) {
603 return prefix.length == 0;
604 }
605 if (prefix.length == 0) {
606 return 1;
607 }
608
609 if (prefix.length > string.length) {
610 return 0;
611 } else {
612 scstr_t subs = scstrsubsl(string, 0, prefix.length);
613 return scstrcasecmp(subs, prefix) == 0;
614 }
615 }
616
617 int scstrcasesuffix(scstr_t string, scstr_t suffix) {
618 if (string.length == 0) {
619 return suffix.length == 0;
620 }
621 if (suffix.length == 0) {
622 return 1;
623 }
624
625 if (suffix.length > string.length) {
626 return 0;
627 } else {
628 scstr_t subs = scstrsubs(string, string.length-suffix.length);
629 return scstrcasecmp(subs, suffix) == 0;
630 }
631 }
632
595 sstr_t scstrlower(scstr_t string) { 633 sstr_t scstrlower(scstr_t string) {
596 sstr_t ret = sstrdup(string); 634 sstr_t ret = sstrdup(string);
597 for (size_t i = 0; i < ret.length ; i++) { 635 for (size_t i = 0; i < ret.length ; i++) {
598 ret.ptr[i] = tolower(ret.ptr[i]); 636 ret.ptr[i] = tolower(ret.ptr[i]);
599 } 637 }
621 for (size_t i = 0; i < ret.length ; i++) { 659 for (size_t i = 0; i < ret.length ; i++) {
622 ret.ptr[i] = toupper(ret.ptr[i]); 660 ret.ptr[i] = toupper(ret.ptr[i]);
623 } 661 }
624 return ret; 662 return ret;
625 } 663 }
664
665 #define REPLACE_INDEX_BUFFER_MAX 100
666
667 struct scstrreplace_ibuf {
668 size_t* buf;
669 unsigned int len; /* small indices */
670 struct scstrreplace_ibuf* next;
671 };
672
673 static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) {
674 while (buf) {
675 struct scstrreplace_ibuf *next = buf->next;
676 free(buf->buf);
677 free(buf);
678 buf = next;
679 }
680 }
681
682 sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str,
683 scstr_t pattern, scstr_t replacement, size_t replmax) {
684
685 if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
686 return sstrdup(str);
687
688 /* Compute expected buffer length */
689 size_t ibufmax = str.length / pattern.length;
690 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
691 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
692 ibuflen = REPLACE_INDEX_BUFFER_MAX;
693 }
694
695 /* Allocate first index buffer */
696 struct scstrreplace_ibuf *firstbuf, *curbuf;
697 firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf));
698 if (!firstbuf) return sstrn(NULL, 0);
699 firstbuf->buf = calloc(ibuflen, sizeof(size_t));
700 if (!firstbuf->buf) {
701 free(firstbuf);
702 return sstrn(NULL, 0);
703 }
704
705 /* Search occurrences */
706 scstr_t searchstr = str;
707 size_t found = 0;
708 do {
709 scstr_t match = scstrscstr(searchstr, pattern);
710 if (match.length > 0) {
711 /* Allocate next buffer in chain, if required */
712 if (curbuf->len == ibuflen) {
713 struct scstrreplace_ibuf *nextbuf =
714 calloc(1, sizeof(struct scstrreplace_ibuf));
715 if (!nextbuf) {
716 scstrrepl_free_ibuf(firstbuf);
717 return sstrn(NULL, 0);
718 }
719 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
720 if (!nextbuf->buf) {
721 free(nextbuf);
722 scstrrepl_free_ibuf(firstbuf);
723 return sstrn(NULL, 0);
724 }
725 curbuf->next = nextbuf;
726 curbuf = nextbuf;
727 }
728
729 /* Record match index */
730 found++;
731 size_t idx = match.ptr - str.ptr;
732 curbuf->buf[curbuf->len++] = idx;
733 searchstr.ptr = match.ptr + pattern.length;
734 searchstr.length = str.length - idx - pattern.length;
735 } else {
736 break;
737 }
738 } while (searchstr.length > 0 && found < replmax);
739
740 /* Allocate result string */
741 sstr_t result;
742 {
743 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
744 size_t rcount = 0;
745 curbuf = firstbuf;
746 do {
747 rcount += curbuf->len;
748 curbuf = curbuf->next;
749 } while (curbuf);
750 result.length = str.length + rcount * adjlen;
751 result.ptr = almalloc(allocator, result.length);
752 if (!result.ptr) {
753 scstrrepl_free_ibuf(firstbuf);
754 return sstrn(NULL, 0);
755 }
756 }
757
758 /* Build result string */
759 curbuf = firstbuf;
760 size_t srcidx = 0;
761 char* destptr = result.ptr;
762 do {
763 for (size_t i = 0; i < curbuf->len; i++) {
764 /* Copy source part up to next match*/
765 size_t idx = curbuf->buf[i];
766 size_t srclen = idx - srcidx;
767 if (srclen > 0) {
768 memcpy(destptr, str.ptr+srcidx, srclen);
769 destptr += srclen;
770 srcidx += srclen;
771 }
772
773 /* Copy the replacement and skip the source pattern */
774 srcidx += pattern.length;
775 memcpy(destptr, replacement.ptr, replacement.length);
776 destptr += replacement.length;
777 }
778 curbuf = curbuf->next;
779 } while (curbuf);
780 memcpy(destptr, str.ptr+srcidx, str.length-srcidx);
781
782 /* Free index buffer */
783 scstrrepl_free_ibuf(firstbuf);
784
785 return result;
786 }
787
788 sstr_t scstrreplacen(scstr_t str, scstr_t pattern,
789 scstr_t replacement, size_t replmax) {
790 return scstrreplacen_a(ucx_default_allocator(),
791 str, pattern, replacement, replmax);
792 }
793
626 794
627 // type adjustment functions 795 // type adjustment functions
628 scstr_t ucx_sc2sc(scstr_t str) { 796 scstr_t ucx_sc2sc(scstr_t str) {
629 return str; 797 return str;
630 } 798 }

mercurial