src/ucx/string.c

changeset 415
d938228c382e
parent 260
4779a6fb4fbe
child 438
22eca559aded
equal deleted inserted replaced
414:99a34860c105 415:d938228c382e
1 /* 1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 * 3 *
4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved. 4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met: 7 * modification, are permitted provided that the following conditions are met:
8 * 8 *
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE. 26 * POSSIBILITY OF SUCH DAMAGE.
27 */ 27 */
28 28
29 #include "ucx/string.h" 29 #include "cx/string.h"
30 30 #include "cx/utils.h"
31 #include "ucx/allocator.h" 31
32
33 #include <stdlib.h>
34 #include <string.h> 32 #include <string.h>
35 #include <stdarg.h> 33 #include <stdarg.h>
36 #include <stdint.h>
37 #include <ctype.h> 34 #include <ctype.h>
38 35
39 #ifndef _WIN32 36 #ifndef _WIN32
37
40 #include <strings.h> /* for strncasecmp() */ 38 #include <strings.h> /* for strncasecmp() */
39
41 #endif /* _WIN32 */ 40 #endif /* _WIN32 */
42 41
43 sstr_t sstr(char *cstring) { 42 cxmutstr cx_mutstr(char *cstring) {
44 sstr_t string; 43 return (cxmutstr) {cstring, strlen(cstring)};
45 string.ptr = cstring; 44 }
46 string.length = strlen(cstring); 45
47 return string; 46 cxmutstr cx_mutstrn(
48 } 47 char *cstring,
49 48 size_t length
50 sstr_t sstrn(char *cstring, size_t length) { 49 ) {
51 sstr_t string; 50 return (cxmutstr) {cstring, length};
52 string.ptr = cstring; 51 }
53 string.length = length; 52
54 return string; 53 cxstring cx_str(const char *cstring) {
55 } 54 return (cxstring) {cstring, strlen(cstring)};
56 55 }
57 scstr_t scstr(const char *cstring) { 56
58 scstr_t string; 57 cxstring cx_strn(
59 string.ptr = cstring; 58 const char *cstring,
60 string.length = strlen(cstring); 59 size_t length
61 return string; 60 ) {
62 } 61 return (cxstring) {cstring, length};
63 62 }
64 scstr_t scstrn(const char *cstring, size_t length) { 63
65 scstr_t string; 64 cxstring cx_strcast(cxmutstr str) {
66 string.ptr = cstring; 65 return (cxstring) {str.ptr, str.length};
67 string.length = length; 66 }
68 return string; 67
69 } 68 void cx_strfree(cxmutstr *str) {
70 69 free(str->ptr);
71 70 str->ptr = NULL;
72 size_t scstrnlen(size_t n, ...) { 71 str->length = 0;
73 if (n == 0) return 0; 72 }
74 73
74 void cx_strfree_a(
75 CxAllocator *alloc,
76 cxmutstr *str
77 ) {
78 cxFree(alloc, str->ptr);
79 str->ptr = NULL;
80 str->length = 0;
81 }
82
83 size_t cx_strlen(
84 size_t count,
85 ...
86 ) {
87 if (count == 0) return 0;
88
75 va_list ap; 89 va_list ap;
76 va_start(ap, n); 90 va_start(ap, count);
77
78 size_t size = 0; 91 size_t size = 0;
79 92 cx_for_n(i, count) {
80 for (size_t i = 0 ; i < n ; i++) { 93 cxstring str = va_arg(ap, cxstring);
81 scstr_t str = va_arg(ap, scstr_t);
82 if(SIZE_MAX - str.length < size) {
83 size = SIZE_MAX;
84 break;
85 }
86 size += str.length; 94 size += str.length;
87 } 95 }
88 va_end(ap); 96 va_end(ap);
89 97
90 return size; 98 return size;
91 } 99 }
92 100
93 static sstr_t sstrvcat_a( 101 cxmutstr cx_strcat_a(
94 UcxAllocator *a, 102 CxAllocator *alloc,
95 size_t count, 103 size_t count,
96 scstr_t s1, 104 ...
97 va_list ap) { 105 ) {
98 sstr_t str; 106 cxstring *strings = calloc(count, sizeof(cxstring));
99 str.ptr = NULL; 107 if (!strings) abort();
100 str.length = 0; 108
101 if(count < 2) { 109 va_list ap;
102 return str; 110 va_start(ap, count);
103 } 111
104
105 scstr_t s2 = va_arg (ap, scstr_t);
106
107 if(((size_t)-1) - s1.length < s2.length) {
108 return str;
109 }
110
111 scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t));
112 if(!strings) {
113 return str;
114 }
115
116 // get all args and overall length 112 // get all args and overall length
117 strings[0] = s1; 113 size_t slen = 0;
118 strings[1] = s2; 114 cx_for_n(i, count) {
119 size_t slen = s1.length + s2.length; 115 cxstring s = va_arg (ap, cxstring);
120 int error = 0;
121 for (size_t i=2;i<count;i++) {
122 scstr_t s = va_arg (ap, scstr_t);
123 strings[i] = s; 116 strings[i] = s;
124 if(((size_t)-1) - s.length < slen) {
125 error = 1;
126 break;
127 }
128 slen += s.length; 117 slen += s.length;
129 } 118 }
130 if(error) { 119
131 free(strings);
132 return str;
133 }
134
135 // create new string 120 // create new string
136 str.ptr = (char*) almalloc(a, slen + 1); 121 cxmutstr result;
137 str.length = slen; 122 result.ptr = cxMalloc(alloc, slen + 1);
138 if(!str.ptr) { 123 result.length = slen;
139 free(strings); 124 if (result.ptr == NULL) abort();
140 str.length = 0; 125
141 return str;
142 }
143
144 // concatenate strings 126 // concatenate strings
145 size_t pos = 0; 127 size_t pos = 0;
146 for (size_t i=0;i<count;i++) { 128 cx_for_n(i, count) {
147 scstr_t s = strings[i]; 129 cxstring s = strings[i];
148 memcpy(str.ptr + pos, s.ptr, s.length); 130 memcpy(result.ptr + pos, s.ptr, s.length);
149 pos += s.length; 131 pos += s.length;
150 } 132 }
151 133
152 str.ptr[str.length] = '\0'; 134 // terminate string
153 135 result.ptr[result.length] = '\0';
136
137 // free temporary array
154 free(strings); 138 free(strings);
155 139
156 return str; 140 return result;
157 } 141 }
158 142
159 sstr_t scstrcat(size_t count, scstr_t s1, ...) { 143 cxstring cx_strsubs(
160 va_list ap; 144 cxstring string,
161 va_start(ap, s1); 145 size_t start
162 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap); 146 ) {
163 va_end(ap); 147 return cx_strsubsl(string, start, string.length - start);
164 return s; 148 }
165 } 149
166 150 cxmutstr cx_strsubs_m(
167 sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) { 151 cxmutstr string,
168 va_list ap; 152 size_t start
169 va_start(ap, s1); 153 ) {
170 sstr_t s = sstrvcat_a(a, count, s1, ap); 154 return cx_strsubsl_m(string, start, string.length - start);
171 va_end(ap); 155 }
172 return s; 156
173 } 157 cxstring cx_strsubsl(
174 158 cxstring string,
175 static int ucx_substring(
176 size_t str_length,
177 size_t start, 159 size_t start,
178 size_t length, 160 size_t length
179 size_t *newlen, 161 ) {
180 size_t *newpos) 162 if (start > string.length) {
181 { 163 return (cxstring) {NULL, 0};
182 *newlen = 0; 164 }
183 *newpos = 0; 165
184 166 size_t rem_len = string.length - start;
185 if(start > str_length) { 167 if (length > rem_len) {
186 return 0; 168 length = rem_len;
187 } 169 }
188 170
189 if(length > str_length - start) { 171 return (cxstring) {string.ptr + start, length};
190 length = str_length - start; 172 }
191 } 173
192 *newlen = length; 174 cxmutstr cx_strsubsl_m(
193 *newpos = start; 175 cxmutstr string,
194 return 1; 176 size_t start,
195 } 177 size_t length
196 178 ) {
197 sstr_t sstrsubs(sstr_t s, size_t start) { 179 cxstring result = cx_strsubsl(cx_strcast(string), start, length);
198 return sstrsubsl (s, start, s.length-start); 180 return (cxmutstr) {(char *) result.ptr, result.length};
199 } 181 }
200 182
201 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { 183 cxstring cx_strchr(
202 size_t pos; 184 cxstring string,
203 sstr_t ret = { NULL, 0 }; 185 int chr
204 if(ucx_substring(s.length, start, length, &ret.length, &pos)) { 186 ) {
205 ret.ptr = s.ptr + pos; 187 chr = 0xFF & chr;
206 } 188 // TODO: improve by comparing multiple bytes at once
207 return ret; 189 cx_for_n(i, string.length) {
208 } 190 if (string.ptr[i] == chr) {
209 191 return cx_strsubs(string, i);
210 scstr_t scstrsubs(scstr_t string, size_t start) { 192 }
211 return scstrsubsl(string, start, string.length-start); 193 }
212 } 194 return (cxstring) {NULL, 0};
213 195 }
214 scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) { 196
215 size_t pos; 197 cxmutstr cx_strchr_m(
216 scstr_t ret = { NULL, 0 }; 198 cxmutstr string,
217 if(ucx_substring(s.length, start, length, &ret.length, &pos)) { 199 int chr
218 ret.ptr = s.ptr + pos; 200 ) {
219 } 201 cxstring result = cx_strchr(cx_strcast(string), chr);
220 return ret; 202 return (cxmutstr) {(char *) result.ptr, result.length};
221 } 203 }
222 204
223 205 cxstring cx_strrchr(
224 static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) { 206 cxstring string,
225 for(size_t i=0;i<length;i++) { 207 int chr
226 if(str[i] == chr) { 208 ) {
227 *pos = i; 209 chr = 0xFF & chr;
228 return 1; 210 size_t i = string.length;
229 } 211 while (i > 0) {
230 } 212 i--;
231 return 0; 213 // TODO: improve by comparing multiple bytes at once
232 } 214 if (string.ptr[i] == chr) {
233 215 return cx_strsubs(string, i);
234 static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) { 216 }
235 if(length > 0) { 217 }
236 for(size_t i=length ; i>0 ; i--) { 218 return (cxstring) {NULL, 0};
237 if(str[i-1] == chr) { 219 }
238 *pos = i-1; 220
239 return 1; 221 cxmutstr cx_strrchr_m(
240 } 222 cxmutstr string,
241 } 223 int chr
242 } 224 ) {
243 return 0; 225 cxstring result = cx_strrchr(cx_strcast(string), chr);
244 } 226 return (cxmutstr) {(char *) result.ptr, result.length};
245 227 }
246 sstr_t sstrchr(sstr_t s, int c) { 228
247 size_t pos = 0; 229 #define STRSTR_SBO_BUFLEN 512
248 if(ucx_strchr(s.ptr, s.length, c, &pos)) { 230
249 return sstrsubs(s, pos); 231 cxstring cx_strstr(
250 } 232 cxstring haystack,
251 return sstrn(NULL, 0); 233 cxstring needle
252 } 234 ) {
253 235 if (needle.length == 0) {
254 sstr_t sstrrchr(sstr_t s, int c) { 236 return haystack;
255 size_t pos = 0; 237 }
256 if(ucx_strrchr(s.ptr, s.length, c, &pos)) { 238
257 return sstrsubs(s, pos); 239 /* optimize for single-char needles */
258 } 240 if (needle.length == 1) {
259 return sstrn(NULL, 0); 241 return cx_strchr(haystack, *needle.ptr);
260 } 242 }
261 243
262 scstr_t scstrchr(scstr_t s, int c) {
263 size_t pos = 0;
264 if(ucx_strchr(s.ptr, s.length, c, &pos)) {
265 return scstrsubs(s, pos);
266 }
267 return scstrn(NULL, 0);
268 }
269
270 scstr_t scstrrchr(scstr_t s, int c) {
271 size_t pos = 0;
272 if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
273 return scstrsubs(s, pos);
274 }
275 return scstrn(NULL, 0);
276 }
277
278 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
279 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
280
281 #define ptable_w(useheap, ptable, index, src) do {\
282 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
283 else ((size_t*)ptable)[index] = src;\
284 } while (0);
285
286
287 static const char* ucx_strstr(
288 const char *str,
289 size_t length,
290 const char *match,
291 size_t matchlen,
292 size_t *newlen)
293 {
294 *newlen = length;
295 if (matchlen == 0) {
296 return str;
297 }
298
299 const char *result = NULL;
300 size_t resultlen = 0;
301
302 /* 244 /*
303 * IMPORTANT: 245 * IMPORTANT:
304 * our prefix table contains the prefix length PLUS ONE 246 * Our prefix table contains the prefix length PLUS ONE
305 * this is our decision, because we want to use the full range of size_t 247 * this is our decision, because we want to use the full range of size_t.
306 * the original algorithm needs a (-1) at one single place 248 * The original algorithm needs a (-1) at one single place,
307 * and we want to avoid that 249 * and we want to avoid that.
308 */ 250 */
309 251
310 /* static prefix table */ 252 /* local prefix table */
311 static uint8_t s_prefix_table[256]; 253 size_t s_prefix_table[STRSTR_SBO_BUFLEN];
312 254
313 /* check pattern length and use appropriate prefix table */ 255 /* check needle length and use appropriate prefix table */
314 /* if the pattern exceeds static prefix table, allocate on the heap */ 256 /* if the pattern exceeds static prefix table, allocate on the heap */
315 register int useheap = matchlen > 255; 257 bool useheap = needle.length >= STRSTR_SBO_BUFLEN;
316 register void* ptable = useheap ? 258 register size_t *ptable = useheap ? calloc(needle.length + 1,
317 calloc(matchlen+1, sizeof(size_t)): s_prefix_table; 259 sizeof(size_t)) : s_prefix_table;
318 260
319 /* keep counter in registers */ 261 /* keep counter in registers */
320 register size_t i, j; 262 register size_t i, j;
321 263
322 /* fill prefix table */ 264 /* fill prefix table */
323 i = 0; j = 0; 265 i = 0;
324 ptable_w(useheap, ptable, i, j); 266 j = 0;
325 while (i < matchlen) { 267 ptable[i] = j;
326 while (j >= 1 && match[j-1] != match[i]) { 268 while (i < needle.length) {
327 ptable_r(j, useheap, ptable, j-1); 269 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
328 } 270 j = ptable[j - 1];
329 i++; j++; 271 }
330 ptable_w(useheap, ptable, i, j); 272 i++;
273 j++;
274 ptable[i] = j;
331 } 275 }
332 276
333 /* search */ 277 /* search */
334 i = 0; j = 1; 278 cxstring result = {NULL, 0};
335 while (i < length) { 279 i = 0;
336 while (j >= 1 && str[i] != match[j-1]) { 280 j = 1;
337 ptable_r(j, useheap, ptable, j-1); 281 while (i < haystack.length) {
338 } 282 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
339 i++; j++; 283 j = ptable[j - 1];
340 if (j-1 == matchlen) { 284 }
341 size_t start = i - matchlen; 285 i++;
342 result = str + start; 286 j++;
343 resultlen = length - start; 287 if (j - 1 == needle.length) {
288 size_t start = i - needle.length;
289 result.ptr = haystack.ptr + start;
290 result.length = haystack.length - start;
344 break; 291 break;
345 } 292 }
346 } 293 }
347 294
348 /* if prefix table was allocated on the heap, free it */ 295 /* if prefix table was allocated on the heap, free it */
349 if (ptable != s_prefix_table) { 296 if (ptable != s_prefix_table) {
350 free(ptable); 297 free(ptable);
351 } 298 }
352 299
353 *newlen = resultlen;
354 return result; 300 return result;
355 } 301 }
356 302
357 sstr_t scstrsstr(sstr_t string, scstr_t match) { 303 cxmutstr cx_strstr_m(
358 sstr_t result; 304 cxmutstr haystack,
359 305 cxstring needle
360 size_t reslen; 306 ) {
361 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); 307 cxstring result = cx_strstr(cx_strcast(haystack), needle);
362 if(!resstr) { 308 return (cxmutstr) {(char *) result.ptr, result.length};
363 result.ptr = NULL; 309 }
364 result.length = 0; 310
365 return result; 311 size_t cx_strsplit(
366 } 312 cxstring string,
367 313 cxstring delim,
368 size_t pos = resstr - string.ptr; 314 size_t limit,
369 result.ptr = string.ptr + pos; 315 cxstring *output
370 result.length = reslen; 316 ) {
371 317 /* special case: output limit is zero */
372 return result; 318 if (limit == 0) return 0;
373 } 319
374 320 /* special case: delimiter is empty */
375 scstr_t scstrscstr(scstr_t string, scstr_t match) { 321 if (delim.length == 0) {
376 scstr_t result; 322 output[0] = string;
377 323 return 1;
378 size_t reslen; 324 }
379 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); 325
380 if(!resstr) {
381 result.ptr = NULL;
382 result.length = 0;
383 return result;
384 }
385
386 size_t pos = resstr - string.ptr;
387 result.ptr = string.ptr + pos;
388 result.length = reslen;
389
390 return result;
391 }
392
393 #undef ptable_r
394 #undef ptable_w
395
396 sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) {
397 return scstrsplit_a(ucx_default_allocator(), s, d, n);
398 }
399
400 sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) {
401 if (s.length == 0 || d.length == 0) {
402 *n = -1;
403 return NULL;
404 }
405
406 /* special cases: delimiter is at least as large as the string */ 326 /* special cases: delimiter is at least as large as the string */
407 if (d.length >= s.length) { 327 if (delim.length >= string.length) {
408 /* exact match */ 328 /* exact match */
409 if (sstrcmp(s, d) == 0) { 329 if (cx_strcmp(string, delim) == 0) {
410 *n = 0; 330 output[0] = cx_strn(string.ptr, 0);
411 return NULL; 331 output[1] = cx_strn(string.ptr + string.length, 0);
332 return 2;
412 } else /* no match possible */ { 333 } else /* no match possible */ {
413 *n = 1; 334 output[0] = string;
414 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); 335 return 1;
415 if(result) { 336 }
416 *result = sstrdup_a(allocator, s); 337 }
338
339 size_t n = 0;
340 cxstring curpos = string;
341 while (1) {
342 ++n;
343 cxstring match = cx_strstr(curpos, delim);
344 if (match.length > 0) {
345 /* is the limit reached? */
346 if (n < limit) {
347 /* copy the current string to the array */
348 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
349 output[n - 1] = item;
350 size_t processed = item.length + delim.length;
351 curpos.ptr += processed;
352 curpos.length -= processed;
417 } else { 353 } else {
418 *n = -2; 354 /* limit reached, copy the _full_ remaining string */
419 } 355 output[n - 1] = curpos;
420 return result;
421 }
422 }
423
424 ssize_t nmax = *n;
425 size_t arrlen = 16;
426 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
427
428 if (result) {
429 scstr_t curpos = s;
430 ssize_t j = 1;
431 while (1) {
432 scstr_t match;
433 /* optimize for one byte delimiters */
434 if (d.length == 1) {
435 match = curpos;
436 for (size_t i = 0 ; i < curpos.length ; i++) {
437 if (curpos.ptr[i] == *(d.ptr)) {
438 match.ptr = curpos.ptr + i;
439 break;
440 }
441 match.length--;
442 }
443 } else {
444 match = scstrscstr(curpos, d);
445 }
446 if (match.length > 0) {
447 /* is this our last try? */
448 if (nmax == 0 || j < nmax) {
449 /* copy the current string to the array */
450 scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr);
451 result[j-1] = sstrdup_a(allocator, item);
452 size_t processed = item.length + d.length;
453 curpos.ptr += processed;
454 curpos.length -= processed;
455
456 /* allocate memory for the next string */
457 j++;
458 if (j > arrlen) {
459 arrlen *= 2;
460 size_t reallocsz;
461 sstr_t* reallocated = NULL;
462 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
463 reallocated = (sstr_t*) alrealloc(
464 allocator, result, reallocsz);
465 }
466 if (reallocated) {
467 result = reallocated;
468 } else {
469 for (ssize_t i = 0 ; i < j-1 ; i++) {
470 alfree(allocator, result[i].ptr);
471 }
472 alfree(allocator, result);
473 *n = -2;
474 return NULL;
475 }
476 }
477 } else {
478 /* nmax reached, copy the _full_ remaining string */
479 result[j-1] = sstrdup_a(allocator, curpos);
480 break;
481 }
482 } else {
483 /* no more matches, copy last string */
484 result[j-1] = sstrdup_a(allocator, curpos);
485 break; 356 break;
486 } 357 }
487 } 358 } else {
488 *n = j; 359 /* no more matches, copy last string */
489 } else { 360 output[n - 1] = curpos;
490 *n = -2; 361 break;
491 } 362 }
492 363 }
493 return result; 364
494 } 365 return n;
495 366 }
496 int scstrcmp(scstr_t s1, scstr_t s2) { 367
368 size_t cx_strsplit_a(
369 CxAllocator *allocator,
370 cxstring string,
371 cxstring delim,
372 size_t limit,
373 cxstring **output
374 ) {
375 /* find out how many splits we're going to make and allocate memory */
376 size_t n = 0;
377 cxstring curpos = string;
378 while (1) {
379 ++n;
380 cxstring match = cx_strstr(curpos, delim);
381 if (match.length > 0) {
382 /* is the limit reached? */
383 if (n < limit) {
384 size_t processed = match.ptr - curpos.ptr + delim.length;
385 curpos.ptr += processed;
386 curpos.length -= processed;
387 } else {
388 /* limit reached */
389 break;
390 }
391 } else {
392 /* no more matches */
393 break;
394 }
395 }
396 *output = cxCalloc(allocator, n, sizeof(cxstring));
397 return cx_strsplit(string, delim, n, *output);
398 }
399
400 size_t cx_strsplit_m(
401 cxmutstr string,
402 cxstring delim,
403 size_t limit,
404 cxmutstr *output
405 ) {
406 return cx_strsplit(cx_strcast(string),
407 delim, limit, (cxstring *) output);
408 }
409
410 size_t cx_strsplit_ma(
411 CxAllocator *allocator,
412 cxmutstr string,
413 cxstring delim,
414 size_t limit,
415 cxmutstr **output
416 ) {
417 return cx_strsplit_a(allocator, cx_strcast(string),
418 delim, limit, (cxstring **) output);
419 }
420
421 int cx_strcmp(
422 cxstring s1,
423 cxstring s2
424 ) {
497 if (s1.length == s2.length) { 425 if (s1.length == s2.length) {
498 return memcmp(s1.ptr, s2.ptr, s1.length); 426 return memcmp(s1.ptr, s2.ptr, s1.length);
499 } else if (s1.length > s2.length) { 427 } else if (s1.length > s2.length) {
500 return 1; 428 return 1;
501 } else { 429 } else {
502 return -1; 430 return -1;
503 } 431 }
504 } 432 }
505 433
506 int scstrcasecmp(scstr_t s1, scstr_t s2) { 434 int cx_strcasecmp(
435 cxstring s1,
436 cxstring s2
437 ) {
507 if (s1.length == s2.length) { 438 if (s1.length == s2.length) {
508 #ifdef _WIN32 439 #ifdef _WIN32
509 return _strnicmp(s1.ptr, s2.ptr, s1.length); 440 return _strnicmp(s1.ptr, s2.ptr, s1.length);
510 #else 441 #else
511 return strncasecmp(s1.ptr, s2.ptr, s1.length); 442 return strncasecmp(s1.ptr, s2.ptr, s1.length);
515 } else { 446 } else {
516 return -1; 447 return -1;
517 } 448 }
518 } 449 }
519 450
520 sstr_t scstrdup(scstr_t s) { 451 cxmutstr cx_strdup_a(
521 return sstrdup_a(ucx_default_allocator(), s); 452 CxAllocator *allocator,
522 } 453 cxstring string
523 454 ) {
524 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) { 455 cxmutstr result = {
525 sstr_t newstring; 456 cxMalloc(allocator, string.length + 1),
526 newstring.ptr = (char*)almalloc(allocator, s.length + 1); 457 string.length
527 if (newstring.ptr) { 458 };
528 newstring.length = s.length; 459 if (result.ptr == NULL) {
529 newstring.ptr[newstring.length] = 0; 460 result.length = 0;
530 461 return result;
531 memcpy(newstring.ptr, s.ptr, s.length); 462 }
532 } else { 463 memcpy(result.ptr, string.ptr, string.length);
533 newstring.length = 0; 464 result.ptr[string.length] = '\0';
534 } 465 return result;
535 466 }
536 return newstring; 467
537 } 468 cxstring cx_strtrim(cxstring string) {
538 469 cxstring result = string;
539 470 // TODO: optimize by comparing multiple bytes at once
540 static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) { 471 while (result.length > 0 && isspace(*result.ptr)) {
541 const char *newptr = s; 472 result.ptr++;
542 size_t length = len; 473 result.length--;
543 474 }
544 while(length > 0 && isspace(*newptr)) { 475 while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
545 newptr++; 476 result.length--;
546 length--; 477 }
547 } 478 return result;
548 while(length > 0 && isspace(newptr[length-1])) { 479 }
549 length--; 480
550 } 481 cxmutstr cx_strtrim_m(cxmutstr string) {
551 482 cxstring result = cx_strtrim(cx_strcast(string));
552 *newlen = length; 483 return (cxmutstr) {(char *) result.ptr, result.length};
553 return newptr - s; 484 }
554 } 485
555 486 bool cx_strprefix(
556 sstr_t sstrtrim(sstr_t string) { 487 cxstring string,
557 sstr_t newstr; 488 cxstring prefix
558 newstr.ptr = string.ptr 489 ) {
559 + ucx_strtrim(string.ptr, string.length, &newstr.length); 490 if (string.length < prefix.length) return false;
560 return newstr; 491 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
561 } 492 }
562 493
563 scstr_t scstrtrim(scstr_t string) { 494 bool cx_strsuffix(
564 scstr_t newstr; 495 cxstring string,
565 newstr.ptr = string.ptr 496 cxstring suffix
566 + ucx_strtrim(string.ptr, string.length, &newstr.length); 497 ) {
567 return newstr; 498 if (string.length < suffix.length) return false;
568 } 499 return memcmp(string.ptr + string.length - suffix.length,
569 500 suffix.ptr, suffix.length) == 0;
570 int scstrprefix(scstr_t string, scstr_t prefix) { 501 }
571 if (string.length == 0) { 502
572 return prefix.length == 0; 503 bool cx_strcaseprefix(
573 } 504 cxstring string,
574 if (prefix.length == 0) { 505 cxstring prefix
575 return 1; 506 ) {
576 } 507 if (string.length < prefix.length) return false;
577 508 #ifdef _WIN32
578 if (prefix.length > string.length) { 509 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
579 return 0; 510 #else
580 } else { 511 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
581 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; 512 #endif
582 } 513 }
583 } 514
584 515 bool cx_strcasesuffix(
585 int scstrsuffix(scstr_t string, scstr_t suffix) { 516 cxstring string,
586 if (string.length == 0) { 517 cxstring suffix
587 return suffix.length == 0; 518 ) {
588 } 519 if (string.length < suffix.length) return false;
589 if (suffix.length == 0) { 520 #ifdef _WIN32
590 return 1; 521 return _strnicmp(string.ptr+string.length-suffix.length,
591 } 522 suffix.ptr, suffix.length) == 0;
592 523 #else
593 if (suffix.length > string.length) { 524 return strncasecmp(string.ptr + string.length - suffix.length,
594 return 0; 525 suffix.ptr, suffix.length) == 0;
595 } else { 526 #endif
596 return memcmp(string.ptr+string.length-suffix.length, 527 }
597 suffix.ptr, suffix.length) == 0; 528
598 } 529 void cx_strlower(cxmutstr string) {
599 } 530 cx_for_n(i, string.length) {
600 531 string.ptr[i] = (char) tolower(string.ptr[i]);
601 int scstrcaseprefix(scstr_t string, scstr_t prefix) { 532 }
602 if (string.length == 0) { 533 }
603 return prefix.length == 0; 534
604 } 535 void cx_strupper(cxmutstr string) {
605 if (prefix.length == 0) { 536 cx_for_n(i, string.length) {
606 return 1; 537 string.ptr[i] = (char) toupper(string.ptr[i]);
607 } 538 }
608
609 if (prefix.length > string.length) {
610 return 0;
611 } else {
612 scstr_t subs = scstrsubsl(string, 0, prefix.length);
613 return scstrcasecmp(subs, prefix) == 0;
614 }
615 }
616
617 int scstrcasesuffix(scstr_t string, scstr_t suffix) {
618 if (string.length == 0) {
619 return suffix.length == 0;
620 }
621 if (suffix.length == 0) {
622 return 1;
623 }
624
625 if (suffix.length > string.length) {
626 return 0;
627 } else {
628 scstr_t subs = scstrsubs(string, string.length-suffix.length);
629 return scstrcasecmp(subs, suffix) == 0;
630 }
631 }
632
633 sstr_t scstrlower(scstr_t string) {
634 sstr_t ret = sstrdup(string);
635 for (size_t i = 0; i < ret.length ; i++) {
636 ret.ptr[i] = tolower(ret.ptr[i]);
637 }
638 return ret;
639 }
640
641 sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) {
642 sstr_t ret = sstrdup_a(allocator, string);
643 for (size_t i = 0; i < ret.length ; i++) {
644 ret.ptr[i] = tolower(ret.ptr[i]);
645 }
646 return ret;
647 }
648
649 sstr_t scstrupper(scstr_t string) {
650 sstr_t ret = sstrdup(string);
651 for (size_t i = 0; i < ret.length ; i++) {
652 ret.ptr[i] = toupper(ret.ptr[i]);
653 }
654 return ret;
655 }
656
657 sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) {
658 sstr_t ret = sstrdup_a(allocator, string);
659 for (size_t i = 0; i < ret.length ; i++) {
660 ret.ptr[i] = toupper(ret.ptr[i]);
661 }
662 return ret;
663 } 539 }
664 540
665 #define REPLACE_INDEX_BUFFER_MAX 100 541 #define REPLACE_INDEX_BUFFER_MAX 100
666 542
667 struct scstrreplace_ibuf { 543 struct cx_strreplace_ibuf {
668 size_t* buf; 544 size_t *buf;
669 unsigned int len; /* small indices */ 545 struct cx_strreplace_ibuf *next;
670 struct scstrreplace_ibuf* next; 546 unsigned int len;
671 }; 547 };
672 548
673 static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) { 549 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
674 while (buf) { 550 while (buf) {
675 struct scstrreplace_ibuf *next = buf->next; 551 struct cx_strreplace_ibuf *next = buf->next;
676 free(buf->buf); 552 free(buf->buf);
677 free(buf); 553 free(buf);
678 buf = next; 554 buf = next;
679 } 555 }
680 } 556 }
681 557
682 sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str, 558 cxmutstr cx_strreplacen_a(
683 scstr_t pattern, scstr_t replacement, size_t replmax) { 559 CxAllocator *allocator,
560 cxstring str,
561 cxstring pattern,
562 cxstring replacement,
563 size_t replmax
564 ) {
684 565
685 if (pattern.length == 0 || pattern.length > str.length || replmax == 0) 566 if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
686 return sstrdup(str); 567 return cx_strdup_a(allocator, str);
687 568
688 /* Compute expected buffer length */ 569 /* Compute expected buffer length */
689 size_t ibufmax = str.length / pattern.length; 570 size_t ibufmax = str.length / pattern.length;
690 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; 571 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
691 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { 572 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
692 ibuflen = REPLACE_INDEX_BUFFER_MAX; 573 ibuflen = REPLACE_INDEX_BUFFER_MAX;
693 } 574 }
694 575
695 /* Allocate first index buffer */ 576 /* Allocate first index buffer */
696 struct scstrreplace_ibuf *firstbuf, *curbuf; 577 struct cx_strreplace_ibuf *firstbuf, *curbuf;
697 firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf)); 578 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
698 if (!firstbuf) return sstrn(NULL, 0); 579 if (!firstbuf) return cx_mutstrn(NULL, 0);
699 firstbuf->buf = calloc(ibuflen, sizeof(size_t)); 580 firstbuf->buf = calloc(ibuflen, sizeof(size_t));
700 if (!firstbuf->buf) { 581 if (!firstbuf->buf) {
701 free(firstbuf); 582 free(firstbuf);
702 return sstrn(NULL, 0); 583 return cx_mutstrn(NULL, 0);
703 } 584 }
704 585
705 /* Search occurrences */ 586 /* Search occurrences */
706 scstr_t searchstr = str; 587 cxstring searchstr = str;
707 size_t found = 0; 588 size_t found = 0;
708 do { 589 do {
709 scstr_t match = scstrscstr(searchstr, pattern); 590 cxstring match = cx_strstr(searchstr, pattern);
710 if (match.length > 0) { 591 if (match.length > 0) {
711 /* Allocate next buffer in chain, if required */ 592 /* Allocate next buffer in chain, if required */
712 if (curbuf->len == ibuflen) { 593 if (curbuf->len == ibuflen) {
713 struct scstrreplace_ibuf *nextbuf = 594 struct cx_strreplace_ibuf *nextbuf =
714 calloc(1, sizeof(struct scstrreplace_ibuf)); 595 calloc(1, sizeof(struct cx_strreplace_ibuf));
715 if (!nextbuf) { 596 if (!nextbuf) {
716 scstrrepl_free_ibuf(firstbuf); 597 cx_strrepl_free_ibuf(firstbuf);
717 return sstrn(NULL, 0); 598 return cx_mutstrn(NULL, 0);
718 } 599 }
719 nextbuf->buf = calloc(ibuflen, sizeof(size_t)); 600 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
720 if (!nextbuf->buf) { 601 if (!nextbuf->buf) {
721 free(nextbuf); 602 free(nextbuf);
722 scstrrepl_free_ibuf(firstbuf); 603 cx_strrepl_free_ibuf(firstbuf);
723 return sstrn(NULL, 0); 604 return cx_mutstrn(NULL, 0);
724 } 605 }
725 curbuf->next = nextbuf; 606 curbuf->next = nextbuf;
726 curbuf = nextbuf; 607 curbuf = nextbuf;
727 } 608 }
728 609
736 break; 617 break;
737 } 618 }
738 } while (searchstr.length > 0 && found < replmax); 619 } while (searchstr.length > 0 && found < replmax);
739 620
740 /* Allocate result string */ 621 /* Allocate result string */
741 sstr_t result; 622 cxmutstr result;
742 { 623 {
743 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; 624 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
744 size_t rcount = 0; 625 size_t rcount = 0;
745 curbuf = firstbuf; 626 curbuf = firstbuf;
746 do { 627 do {
747 rcount += curbuf->len; 628 rcount += curbuf->len;
748 curbuf = curbuf->next; 629 curbuf = curbuf->next;
749 } while (curbuf); 630 } while (curbuf);
750 result.length = str.length + rcount * adjlen; 631 result.length = str.length + rcount * adjlen;
751 result.ptr = almalloc(allocator, result.length); 632 result.ptr = cxMalloc(allocator, result.length + 1);
752 if (!result.ptr) { 633 if (!result.ptr) {
753 scstrrepl_free_ibuf(firstbuf); 634 cx_strrepl_free_ibuf(firstbuf);
754 return sstrn(NULL, 0); 635 return cx_mutstrn(NULL, 0);
755 } 636 }
756 } 637 }
757 638
758 /* Build result string */ 639 /* Build result string */
759 curbuf = firstbuf; 640 curbuf = firstbuf;
760 size_t srcidx = 0; 641 size_t srcidx = 0;
761 char* destptr = result.ptr; 642 char *destptr = result.ptr;
762 do { 643 do {
763 for (size_t i = 0; i < curbuf->len; i++) { 644 for (size_t i = 0; i < curbuf->len; i++) {
764 /* Copy source part up to next match*/ 645 /* Copy source part up to next match*/
765 size_t idx = curbuf->buf[i]; 646 size_t idx = curbuf->buf[i];
766 size_t srclen = idx - srcidx; 647 size_t srclen = idx - srcidx;
767 if (srclen > 0) { 648 if (srclen > 0) {
768 memcpy(destptr, str.ptr+srcidx, srclen); 649 memcpy(destptr, str.ptr + srcidx, srclen);
769 destptr += srclen; 650 destptr += srclen;
770 srcidx += srclen; 651 srcidx += srclen;
771 } 652 }
772 653
773 /* Copy the replacement and skip the source pattern */ 654 /* Copy the replacement and skip the source pattern */
775 memcpy(destptr, replacement.ptr, replacement.length); 656 memcpy(destptr, replacement.ptr, replacement.length);
776 destptr += replacement.length; 657 destptr += replacement.length;
777 } 658 }
778 curbuf = curbuf->next; 659 curbuf = curbuf->next;
779 } while (curbuf); 660 } while (curbuf);
780 memcpy(destptr, str.ptr+srcidx, str.length-srcidx); 661 memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
662
663 /* Result is guaranteed to be zero-terminated */
664 result.ptr[result.length] = '\0';
781 665
782 /* Free index buffer */ 666 /* Free index buffer */
783 scstrrepl_free_ibuf(firstbuf); 667 cx_strrepl_free_ibuf(firstbuf);
784 668
785 return result; 669 return result;
786 } 670 }
787 671
788 sstr_t scstrreplacen(scstr_t str, scstr_t pattern, 672
789 scstr_t replacement, size_t replmax) {
790 return scstrreplacen_a(ucx_default_allocator(),
791 str, pattern, replacement, replmax);
792 }
793
794
795 // type adjustment functions
796 scstr_t ucx_sc2sc(scstr_t str) {
797 return str;
798 }
799 scstr_t ucx_ss2sc(sstr_t str) {
800 scstr_t cs;
801 cs.ptr = str.ptr;
802 cs.length = str.length;
803 return cs;
804 }
805 scstr_t ucx_ss2c_s(scstr_t c) {
806 return c;
807 }

mercurial