ucx/string.c

changeset 1040
473d8cb58a6c
parent 1016
ccde46662db7
equal deleted inserted replaced
1039:6691e007cef7 1040:473d8cb58a6c
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE. 26 * POSSIBILITY OF SUCH DAMAGE.
27 */ 27 */
28 28
29 // for memrchr in glibc 29 #ifdef WITH_MEMRCHR
30 #define _GNU_SOURCE 30 #define _GNU_SOURCE
31 #endif
31 32
32 #include "cx/string.h" 33 #include "cx/string.h"
33 34
34 #include <string.h> 35 #include <string.h>
35 #include <stdarg.h> 36 #include <stdarg.h>
38 #include <limits.h> 39 #include <limits.h>
39 #include <float.h> 40 #include <float.h>
40 #include <ctype.h> 41 #include <ctype.h>
41 42
42 #ifdef _WIN32 43 #ifdef _WIN32
43 #define cx_strcasecmp_impl _strnicmp 44 static int cx_fixed_strnicmp(const char* s1, const char* s2, size_t count) {
45 // Microsoft's implementation crashes when count == 0 and either string is NULL
46 if (count == 0) return 0;
47 return _strnicmp(s1, s2, count);
48 }
49 #define cx_strcasecmp_impl cx_fixed_strnicmp
44 #else 50 #else
45 #include <strings.h> 51 #include <strings.h>
46 #define cx_strcasecmp_impl strncasecmp 52 #define cx_strcasecmp_impl strncasecmp
47 #endif 53 #endif
48 54
61 cxFree(alloc, str->ptr); 67 cxFree(alloc, str->ptr);
62 str->ptr = NULL; 68 str->ptr = NULL;
63 str->length = 0; 69 str->length = 0;
64 } 70 }
65 71
66 int cx_strcpy_a( 72 int cx_strcpy_a_(
67 const CxAllocator *alloc, 73 const CxAllocator *alloc,
68 cxmutstr *dest, 74 cxmutstr *dest,
69 cxstring src 75 cxstring src
70 ) { 76 ) {
71 if (cxReallocate(alloc, &dest->ptr, src.length + 1)) { 77 if (cxReallocate(alloc, &dest->ptr, src.length + 1)) {
96 va_end(ap); 102 va_end(ap);
97 103
98 return size; 104 return size;
99 } 105 }
100 106
101 cxmutstr cx_strcat_ma( 107 cxmutstr cx_strcat_a(
102 const CxAllocator *alloc, 108 const CxAllocator *alloc,
103 cxmutstr str, 109 cxmutstr str,
104 size_t count, 110 size_t count,
105 ... 111 ...
106 ) { 112 ) {
107 if (count == 0) return str; 113 if (count == 0) {
114 if (cxReallocate(alloc, &str.ptr, str.length + 1)) {
115 return CX_NULLSTR; // LCOV_EXCL_LINE
116 }
117 str.ptr[str.length] = '\0';
118 return str;
119 }
108 va_list ap; 120 va_list ap;
109 va_start(ap, count); 121 va_start(ap, count);
110 va_list ap2; 122 va_list ap2;
111 va_copy(ap2, ap); 123 va_copy(ap2, ap);
112 124
122 134
123 // abort in case of overflow 135 // abort in case of overflow
124 if (overflow) { 136 if (overflow) {
125 va_end(ap2); 137 va_end(ap2);
126 errno = EOVERFLOW; 138 errno = EOVERFLOW;
127 return (cxmutstr) { NULL, 0 }; 139 return CX_NULLSTR;
128 } 140 }
129 141
130 // reallocate or create new string 142 // reallocate or create a new string
131 char *newstr; 143 if (cxReallocate(alloc, &str.ptr, slen + 1)) {
132 if (str.ptr == NULL) { 144 // LCOV_EXCL_START
133 newstr = cxMalloc(alloc, slen + 1);
134 } else {
135 newstr = cxRealloc(alloc, str.ptr, slen + 1);
136 }
137 if (newstr == NULL) { // LCOV_EXCL_START
138 va_end(ap2); 145 va_end(ap2);
139 return (cxmutstr) {NULL, 0}; 146 return CX_NULLSTR;
140 } // LCOV_EXCL_STOP 147 // LCOV_EXCL_STOP
141 str.ptr = newstr; 148 }
142 149
143 // concatenate strings 150 // concatenate strings
144 size_t pos = str.length; 151 size_t pos = str.length;
145 str.length = slen; 152 str.length = slen;
146 for (size_t i = 0; i < count; i++) { 153 for (size_t i = 0; i < count; i++) {
154 str.ptr[str.length] = '\0'; 161 str.ptr[str.length] = '\0';
155 162
156 return str; 163 return str;
157 } 164 }
158 165
159 cxstring cx_strsubs( 166 cxstring cx_strsubs_(
160 cxstring string, 167 cxstring string,
161 size_t start 168 size_t start
162 ) { 169 ) {
163 return cx_strsubsl(string, start, string.length - start); 170 return cx_strsubsl_(string, start, string.length);
164 } 171 }
165 172
166 cxmutstr cx_strsubs_m( 173 cxstring cx_strsubsl_(
167 cxmutstr string,
168 size_t start
169 ) {
170 return cx_strsubsl_m(string, start, string.length - start);
171 }
172
173 cxstring cx_strsubsl(
174 cxstring string, 174 cxstring string,
175 size_t start, 175 size_t start,
176 size_t length 176 size_t length
177 ) { 177 ) {
178 if (start > string.length) { 178 if (start > string.length) {
185 } 185 }
186 186
187 return (cxstring) {string.ptr + start, length}; 187 return (cxstring) {string.ptr + start, length};
188 } 188 }
189 189
190 cxmutstr cx_strsubsl_m( 190 cxstring cx_strchr_(
191 cxmutstr string,
192 size_t start,
193 size_t length
194 ) {
195 cxstring result = cx_strsubsl(cx_strcast(string), start, length);
196 return (cxmutstr) {(char *) result.ptr, result.length};
197 }
198
199 cxstring cx_strchr(
200 cxstring string, 191 cxstring string,
201 int chr 192 int chr
202 ) { 193 ) {
203 char *ret = memchr(string.ptr, 0xFF & chr, string.length); 194 char *ret = memchr(string.ptr, 0xFF & chr, string.length);
204 if (ret == NULL) return (cxstring) {NULL, 0}; 195 if (ret == NULL) return (cxstring) {NULL, 0};
205 return (cxstring) {ret, string.length - (ret - string.ptr)}; 196 return (cxstring) {ret, string.length - (ret - string.ptr)};
206 } 197 }
207 198
208 cxmutstr cx_strchr_m( 199 cxstring cx_strrchr_(
209 cxmutstr string,
210 int chr
211 ) {
212 cxstring result = cx_strchr(cx_strcast(string), chr);
213 return (cxmutstr) {(char *) result.ptr, result.length};
214 }
215
216 cxstring cx_strrchr(
217 cxstring string, 200 cxstring string,
218 int chr 201 int chr
219 ) { 202 ) {
220 #ifdef WITH_MEMRCHR 203 #ifdef WITH_MEMRCHR
221 char *ret = memrchr(string.ptr, 0xFF & chr, string.length); 204 char *ret = memrchr(string.ptr, 0xFF & chr, string.length);
232 } 215 }
233 return (cxstring) {NULL, 0}; 216 return (cxstring) {NULL, 0};
234 #endif 217 #endif
235 } 218 }
236 219
237 cxmutstr cx_strrchr_m( 220 #ifndef CX_STRSTR_SBO_SIZE
238 cxmutstr string, 221 #define CX_STRSTR_SBO_SIZE 128
239 int chr
240 ) {
241 cxstring result = cx_strrchr(cx_strcast(string), chr);
242 return (cxmutstr) {(char *) result.ptr, result.length};
243 }
244
245 #ifndef cx_strSTR_SBO_SIZE
246 #define cx_strSTR_SBO_SIZE 128
247 #endif 222 #endif
248 const unsigned cx_strstr_sbo_size = cx_strSTR_SBO_SIZE; 223 const unsigned cx_strstr_sbo_size = CX_STRSTR_SBO_SIZE;
249 224
250 cxstring cx_strstr( 225 cxstring cx_strstr_(cxstring haystack, cxstring needle) {
251 cxstring haystack,
252 cxstring needle
253 ) {
254 if (needle.length == 0) { 226 if (needle.length == 0) {
255 return haystack; 227 return haystack;
256 } 228 }
257 229
258 // optimize for single-char needles 230 // optimize for single-char needles
267 * The original algorithm needs a (-1) at one single place, 239 * The original algorithm needs a (-1) at one single place,
268 * and we want to avoid that. 240 * and we want to avoid that.
269 */ 241 */
270 242
271 // local prefix table 243 // local prefix table
272 size_t s_prefix_table[cx_strSTR_SBO_SIZE]; 244 size_t s_prefix_table[CX_STRSTR_SBO_SIZE];
273 245
274 // check needle length and use appropriate prefix table 246 // check needle length and use appropriate prefix table
275 // if the pattern exceeds static prefix table, allocate on the heap 247 // if the pattern exceeds static prefix table, allocate on the heap
276 const bool useheap = needle.length >= cx_strSTR_SBO_SIZE; 248 const bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
277 register size_t *ptable = useheap 249 register size_t *ptable = useheap
278 ? cxCallocDefault(needle.length + 1, sizeof(size_t)) 250 ? cxCallocDefault(needle.length + 1, sizeof(size_t))
279 : s_prefix_table; 251 : s_prefix_table;
280 252
281 // keep counter in registers 253 // keep counter in registers
318 } 290 }
319 291
320 return result; 292 return result;
321 } 293 }
322 294
323 cxmutstr cx_strstr_m( 295 size_t cx_strsplit_(
324 cxmutstr haystack,
325 cxstring needle
326 ) {
327 cxstring result = cx_strstr(cx_strcast(haystack), needle);
328 return (cxmutstr) {(char *) result.ptr, result.length};
329 }
330
331 size_t cx_strsplit(
332 cxstring string, 296 cxstring string,
333 cxstring delim, 297 cxstring delim,
334 size_t limit, 298 size_t limit,
335 cxstring *output 299 cxstring *output
336 ) { 300 ) {
384 } 348 }
385 349
386 return n; 350 return n;
387 } 351 }
388 352
389 size_t cx_strsplit_a( 353 size_t cx_strsplit_a_(
390 const CxAllocator *allocator, 354 const CxAllocator *allocator,
391 cxstring string, 355 cxstring string,
392 cxstring delim, 356 cxstring delim,
393 size_t limit, 357 size_t limit,
394 cxstring **output 358 cxstring **output
413 // no more matches 377 // no more matches
414 break; 378 break;
415 } 379 }
416 } 380 }
417 *output = cxCalloc(allocator, n, sizeof(cxstring)); 381 *output = cxCalloc(allocator, n, sizeof(cxstring));
418 return cx_strsplit(string, delim, n, *output); 382 return cx_strsplit_(string, delim, n, *output);
419 } 383 }
420 384
421 size_t cx_strsplit_m( 385 size_t cx_strsplit_m_(
422 cxmutstr string, 386 cxmutstr string,
423 cxstring delim, 387 cxstring delim,
424 size_t limit, 388 size_t limit,
425 cxmutstr *output 389 cxmutstr *output
426 ) { 390 ) {
427 return cx_strsplit(cx_strcast(string), 391 return cx_strsplit_(cx_strcast(string),
428 delim, limit, (cxstring *) output); 392 delim, limit, (cxstring *) output);
429 } 393 }
430 394
431 size_t cx_strsplit_ma( 395 size_t cx_strsplit_ma_(
432 const CxAllocator *allocator, 396 const CxAllocator *allocator,
433 cxmutstr string, 397 cxmutstr string,
434 cxstring delim, 398 cxstring delim,
435 size_t limit, 399 size_t limit,
436 cxmutstr **output 400 cxmutstr **output
437 ) { 401 ) {
438 return cx_strsplit_a(allocator, cx_strcast(string), 402 return cx_strsplit_a_(allocator, cx_strcast(string),
439 delim, limit, (cxstring **) output); 403 delim, limit, (cxstring **) output);
440 } 404 }
441 405
442 int cx_strcmp_( 406 int cx_strcmp_(
443 cxstring s1, 407 cxstring s1,
508 memcpy(result.ptr, string.ptr, string.length); 472 memcpy(result.ptr, string.ptr, string.length);
509 result.ptr[string.length] = '\0'; 473 result.ptr[string.length] = '\0';
510 return result; 474 return result;
511 } 475 }
512 476
513 cxstring cx_strtrim(cxstring string) { 477 cxstring cx_strtrim_(cxstring string) {
514 cxstring result = string; 478 cxstring result = string;
515 while (result.length > 0 && isspace((unsigned char)(result.ptr[0]))) { 479 while (isspace((unsigned char)cx_strat(result, 0))) {
516 result.ptr++; 480 result.ptr++;
517 result.length--; 481 result.length--;
518 } 482 }
519 while (result.length > 0 && isspace((unsigned char)result.ptr[result.length - 1])) { 483 while (isspace((unsigned char)cx_strat(result, -1))) {
520 result.length--; 484 result.length--;
521 } 485 }
522 return result; 486 return result;
523 }
524
525 cxmutstr cx_strtrim_m(cxmutstr string) {
526 cxstring result = cx_strtrim(cx_strcast(string));
527 return (cxmutstr) {(char *) result.ptr, result.length};
528 } 487 }
529 488
530 bool cx_strprefix_( 489 bool cx_strprefix_(
531 cxstring string, 490 cxstring string,
532 cxstring prefix 491 cxstring prefix
568 return strncasecmp(string.ptr + string.length - suffix.length, 527 return strncasecmp(string.ptr + string.length - suffix.length,
569 suffix.ptr, suffix.length) == 0; 528 suffix.ptr, suffix.length) == 0;
570 #endif 529 #endif
571 } 530 }
572 531
573 cxmutstr cx_strreplacen_a( 532 cxmutstr cx_strreplace_(
574 const CxAllocator *allocator, 533 const CxAllocator *allocator,
575 cxstring str, 534 cxstring str,
576 cxstring search, 535 cxstring search,
577 cxstring replacement, 536 cxstring replacement,
578 size_t replmax 537 size_t replmax
649 ctx.delim_more = NULL; 608 ctx.delim_more = NULL;
650 ctx.delim_more_count = 0; 609 ctx.delim_more_count = 0;
651 return ctx; 610 return ctx;
652 } 611 }
653 612
654 bool cx_strtok_next( 613 bool cx_strtok_next_(
655 CxStrtokCtx *ctx, 614 CxStrtokCtx *ctx,
656 cxstring *token 615 cxstring *token
657 ) { 616 ) {
658 // abortion criteria 617 // abortion criteria
659 if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) { 618 if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) {
690 ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr); 649 ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr);
691 token->length = ctx->delim_pos - ctx->pos; 650 token->length = ctx->delim_pos - ctx->pos;
692 ctx->next_pos = ctx->delim_pos + delim.length; 651 ctx->next_pos = ctx->delim_pos + delim.length;
693 652
694 return true; 653 return true;
695 }
696
697 bool cx_strtok_next_m(
698 CxStrtokCtx *ctx,
699 cxmutstr *token
700 ) {
701 return cx_strtok_next(ctx, (cxstring *) token);
702 } 654 }
703 655
704 void cx_strtok_delim( 656 void cx_strtok_delim(
705 CxStrtokCtx *ctx, 657 CxStrtokCtx *ctx,
706 const cxstring *delim, 658 const cxstring *delim,

mercurial