112 free(strings); |
154 free(strings); |
113 |
155 |
114 return str; |
156 return str; |
115 } |
157 } |
116 |
158 |
117 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) { |
159 sstr_t scstrcat(size_t count, scstr_t s1, ...) { |
118 va_list ap; |
160 va_list ap; |
119 va_start(ap, s2); |
161 va_start(ap, s1); |
120 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap); |
162 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap); |
121 va_end(ap); |
163 va_end(ap); |
122 return s; |
164 return s; |
123 } |
165 } |
124 |
166 |
125 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) { |
167 sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) { |
126 va_list ap; |
168 va_list ap; |
127 va_start(ap, s2); |
169 va_start(ap, s1); |
128 sstr_t s = sstrvcat_a(a, count, s1, s2, ap); |
170 sstr_t s = sstrvcat_a(a, count, s1, ap); |
129 va_end(ap); |
171 va_end(ap); |
130 return s; |
172 return s; |
131 } |
173 } |
132 |
174 |
|
175 static int ucx_substring( |
|
176 size_t str_length, |
|
177 size_t start, |
|
178 size_t length, |
|
179 size_t *newlen, |
|
180 size_t *newpos) |
|
181 { |
|
182 *newlen = 0; |
|
183 *newpos = 0; |
|
184 |
|
185 if(start > str_length) { |
|
186 return 0; |
|
187 } |
|
188 |
|
189 if(length > str_length - start) { |
|
190 length = str_length - start; |
|
191 } |
|
192 *newlen = length; |
|
193 *newpos = start; |
|
194 return 1; |
|
195 } |
|
196 |
133 sstr_t sstrsubs(sstr_t s, size_t start) { |
197 sstr_t sstrsubs(sstr_t s, size_t start) { |
134 return sstrsubsl (s, start, s.length-start); |
198 return sstrsubsl (s, start, s.length-start); |
135 } |
199 } |
136 |
200 |
137 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { |
201 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { |
138 sstr_t new_sstr; |
202 size_t pos; |
139 if (start >= s.length) { |
203 sstr_t ret = { NULL, 0 }; |
140 new_sstr.ptr = NULL; |
204 if(ucx_substring(s.length, start, length, &ret.length, &pos)) { |
141 new_sstr.length = 0; |
205 ret.ptr = s.ptr + pos; |
142 } else { |
206 } |
143 if (length > s.length-start) { |
207 return ret; |
144 length = s.length-start; |
208 } |
145 } |
209 |
146 new_sstr.ptr = &s.ptr[start]; |
210 scstr_t scstrsubs(scstr_t string, size_t start) { |
147 new_sstr.length = length; |
211 return scstrsubsl(string, start, string.length-start); |
148 } |
212 } |
149 return new_sstr; |
213 |
|
214 scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) { |
|
215 size_t pos; |
|
216 scstr_t ret = { NULL, 0 }; |
|
217 if(ucx_substring(s.length, start, length, &ret.length, &pos)) { |
|
218 ret.ptr = s.ptr + pos; |
|
219 } |
|
220 return ret; |
|
221 } |
|
222 |
|
223 |
|
224 static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) { |
|
225 for(size_t i=0;i<length;i++) { |
|
226 if(str[i] == chr) { |
|
227 *pos = i; |
|
228 return 1; |
|
229 } |
|
230 } |
|
231 return 0; |
|
232 } |
|
233 |
|
234 static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) { |
|
235 if(length > 0) { |
|
236 for(size_t i=length ; i>0 ; i--) { |
|
237 if(str[i-1] == chr) { |
|
238 *pos = i-1; |
|
239 return 1; |
|
240 } |
|
241 } |
|
242 } |
|
243 return 0; |
150 } |
244 } |
151 |
245 |
152 sstr_t sstrchr(sstr_t s, int c) { |
246 sstr_t sstrchr(sstr_t s, int c) { |
153 for(size_t i=0;i<s.length;i++) { |
247 size_t pos = 0; |
154 if(s.ptr[i] == c) { |
248 if(ucx_strchr(s.ptr, s.length, c, &pos)) { |
155 return sstrsubs(s, i); |
249 return sstrsubs(s, pos); |
156 } |
250 } |
157 } |
251 return sstrn(NULL, 0); |
158 sstr_t n; |
|
159 n.ptr = NULL; |
|
160 n.length = 0; |
|
161 return n; |
|
162 } |
252 } |
163 |
253 |
164 sstr_t sstrrchr(sstr_t s, int c) { |
254 sstr_t sstrrchr(sstr_t s, int c) { |
165 if (s.length > 0) { |
255 size_t pos = 0; |
166 for(size_t i=s.length;i>0;i--) { |
256 if(ucx_strrchr(s.ptr, s.length, c, &pos)) { |
167 if(s.ptr[i-1] == c) { |
257 return sstrsubs(s, pos); |
168 return sstrsubs(s, i-1); |
258 } |
169 } |
259 return sstrn(NULL, 0); |
170 } |
260 } |
171 } |
261 |
172 sstr_t n; |
262 scstr_t scstrchr(scstr_t s, int c) { |
173 n.ptr = NULL; |
263 size_t pos = 0; |
174 n.length = 0; |
264 if(ucx_strchr(s.ptr, s.length, c, &pos)) { |
175 return n; |
265 return scstrsubs(s, pos); |
176 } |
266 } |
177 |
267 return scstrn(NULL, 0); |
178 sstr_t sstrstr(sstr_t string, sstr_t match) { |
268 } |
179 if (match.length == 0) { |
269 |
180 return string; |
270 scstr_t scstrrchr(scstr_t s, int c) { |
181 } |
271 size_t pos = 0; |
182 |
272 if(ucx_strrchr(s.ptr, s.length, c, &pos)) { |
183 for (size_t i = 0 ; i < string.length ; i++) { |
273 return scstrsubs(s, pos); |
184 sstr_t substr = sstrsubs(string, i); |
274 } |
185 if (sstrprefix(substr, match)) { |
275 return scstrn(NULL, 0); |
186 return substr; |
276 } |
187 } |
277 |
188 } |
278 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ |
189 |
279 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) |
190 sstr_t emptystr; |
280 |
191 emptystr.length = 0; |
281 #define ptable_w(useheap, ptable, index, src) do {\ |
192 emptystr.ptr = NULL; |
282 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ |
193 return emptystr; |
283 else ((size_t*)ptable)[index] = src;\ |
194 } |
284 } while (0); |
195 |
285 |
196 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { |
286 |
197 return sstrsplit_a(ucx_default_allocator(), s, d, n); |
287 static const char* ucx_strstr( |
198 } |
288 const char *str, |
199 |
289 size_t length, |
200 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) { |
290 const char *match, |
|
291 size_t matchlen, |
|
292 size_t *newlen) |
|
293 { |
|
294 *newlen = length; |
|
295 if (matchlen == 0) { |
|
296 return str; |
|
297 } |
|
298 |
|
299 const char *result = NULL; |
|
300 size_t resultlen = 0; |
|
301 |
|
302 /* |
|
303 * IMPORTANT: |
|
304 * our prefix table contains the prefix length PLUS ONE |
|
305 * this is our decision, because we want to use the full range of size_t |
|
306 * the original algorithm needs a (-1) at one single place |
|
307 * and we want to avoid that |
|
308 */ |
|
309 |
|
310 /* static prefix table */ |
|
311 static uint8_t s_prefix_table[256]; |
|
312 |
|
313 /* check pattern length and use appropriate prefix table */ |
|
314 /* if the pattern exceeds static prefix table, allocate on the heap */ |
|
315 register int useheap = matchlen > 255; |
|
316 register void* ptable = useheap ? |
|
317 calloc(matchlen+1, sizeof(size_t)): s_prefix_table; |
|
318 |
|
319 /* keep counter in registers */ |
|
320 register size_t i, j; |
|
321 |
|
322 /* fill prefix table */ |
|
323 i = 0; j = 0; |
|
324 ptable_w(useheap, ptable, i, j); |
|
325 while (i < matchlen) { |
|
326 while (j >= 1 && match[j-1] != match[i]) { |
|
327 ptable_r(j, useheap, ptable, j-1); |
|
328 } |
|
329 i++; j++; |
|
330 ptable_w(useheap, ptable, i, j); |
|
331 } |
|
332 |
|
333 /* search */ |
|
334 i = 0; j = 1; |
|
335 while (i < length) { |
|
336 while (j >= 1 && str[i] != match[j-1]) { |
|
337 ptable_r(j, useheap, ptable, j-1); |
|
338 } |
|
339 i++; j++; |
|
340 if (j-1 == matchlen) { |
|
341 size_t start = i - matchlen; |
|
342 result = str + start; |
|
343 resultlen = length - start; |
|
344 break; |
|
345 } |
|
346 } |
|
347 |
|
348 /* if prefix table was allocated on the heap, free it */ |
|
349 if (ptable != s_prefix_table) { |
|
350 free(ptable); |
|
351 } |
|
352 |
|
353 *newlen = resultlen; |
|
354 return result; |
|
355 } |
|
356 |
|
357 sstr_t scstrsstr(sstr_t string, scstr_t match) { |
|
358 sstr_t result; |
|
359 |
|
360 size_t reslen; |
|
361 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); |
|
362 if(!resstr) { |
|
363 result.ptr = NULL; |
|
364 result.length = 0; |
|
365 return result; |
|
366 } |
|
367 |
|
368 size_t pos = resstr - string.ptr; |
|
369 result.ptr = string.ptr + pos; |
|
370 result.length = reslen; |
|
371 |
|
372 return result; |
|
373 } |
|
374 |
|
375 scstr_t scstrscstr(scstr_t string, scstr_t match) { |
|
376 scstr_t result; |
|
377 |
|
378 size_t reslen; |
|
379 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); |
|
380 if(!resstr) { |
|
381 result.ptr = NULL; |
|
382 result.length = 0; |
|
383 return result; |
|
384 } |
|
385 |
|
386 size_t pos = resstr - string.ptr; |
|
387 result.ptr = string.ptr + pos; |
|
388 result.length = reslen; |
|
389 |
|
390 return result; |
|
391 } |
|
392 |
|
393 #undef ptable_r |
|
394 #undef ptable_w |
|
395 |
|
396 sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) { |
|
397 return scstrsplit_a(ucx_default_allocator(), s, d, n); |
|
398 } |
|
399 |
|
400 sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) { |
201 if (s.length == 0 || d.length == 0) { |
401 if (s.length == 0 || d.length == 0) { |
202 *n = -1; |
402 *n = -1; |
203 return NULL; |
403 return NULL; |
204 } |
404 } |
205 |
405 |
206 sstr_t* result; |
406 /* special cases: delimiter is at least as large as the string */ |
|
407 if (d.length >= s.length) { |
|
408 /* exact match */ |
|
409 if (sstrcmp(s, d) == 0) { |
|
410 *n = 0; |
|
411 return NULL; |
|
412 } else /* no match possible */ { |
|
413 *n = 1; |
|
414 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); |
|
415 if(result) { |
|
416 *result = sstrdup_a(allocator, s); |
|
417 } else { |
|
418 *n = -2; |
|
419 } |
|
420 return result; |
|
421 } |
|
422 } |
|
423 |
207 ssize_t nmax = *n; |
424 ssize_t nmax = *n; |
208 *n = 1; |
425 size_t arrlen = 16; |
209 |
426 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t)); |
210 /* special case: exact match - no processing needed */ |
427 |
211 if (sstrcmp(s, d) == 0) { |
428 if (result) { |
212 *n = 0; |
429 scstr_t curpos = s; |
213 return NULL; |
430 ssize_t j = 1; |
214 } |
431 while (1) { |
215 sstr_t sv = sstrdup(s); |
432 scstr_t match; |
216 if (sv.length == 0) { |
433 /* optimize for one byte delimiters */ |
217 *n = -2; |
434 if (d.length == 1) { |
218 return NULL; |
435 match = curpos; |
219 } |
436 for (size_t i = 0 ; i < curpos.length ; i++) { |
220 |
437 if (curpos.ptr[i] == *(d.ptr)) { |
221 for (size_t i = 0 ; i < s.length ; i++) { |
438 match.ptr = curpos.ptr + i; |
222 sstr_t substr = sstrsubs(sv, i); |
439 break; |
223 if (sstrprefix(substr, d)) { |
440 } |
224 (*n)++; |
441 match.length--; |
225 for (size_t j = 0 ; j < d.length ; j++) { |
442 } |
226 sv.ptr[i+j] = 0; |
443 } else { |
|
444 match = scstrscstr(curpos, d); |
227 } |
445 } |
228 i += d.length - 1; // -1, because the loop will do a i++ |
446 if (match.length > 0) { |
229 } |
447 /* is this our last try? */ |
230 if ((*n) == nmax) break; |
448 if (nmax == 0 || j < nmax) { |
231 } |
449 /* copy the current string to the array */ |
232 result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)*(*n)); |
450 scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr); |
233 |
451 result[j-1] = sstrdup_a(allocator, item); |
234 if (result) { |
452 size_t processed = item.length + d.length; |
235 char *pptr = sv.ptr; |
453 curpos.ptr += processed; |
236 for (ssize_t i = 0 ; i < *n ; i++) { |
454 curpos.length -= processed; |
237 size_t l = strlen(pptr); |
455 |
238 char* ptr = (char*) almalloc(allocator, l + 1); |
456 /* allocate memory for the next string */ |
239 if (ptr) { |
457 j++; |
240 memcpy(ptr, pptr, l); |
458 if (j > arrlen) { |
241 ptr[l] = 0; |
459 arrlen *= 2; |
242 |
460 size_t reallocsz; |
243 result[i] = sstrn(ptr, l); |
461 sstr_t* reallocated = NULL; |
244 pptr += l + d.length; |
462 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) { |
|
463 reallocated = (sstr_t*) alrealloc( |
|
464 allocator, result, reallocsz); |
|
465 } |
|
466 if (reallocated) { |
|
467 result = reallocated; |
|
468 } else { |
|
469 for (ssize_t i = 0 ; i < j-1 ; i++) { |
|
470 alfree(allocator, result[i].ptr); |
|
471 } |
|
472 alfree(allocator, result); |
|
473 *n = -2; |
|
474 return NULL; |
|
475 } |
|
476 } |
|
477 } else { |
|
478 /* nmax reached, copy the _full_ remaining string */ |
|
479 result[j-1] = sstrdup_a(allocator, curpos); |
|
480 break; |
|
481 } |
245 } else { |
482 } else { |
246 for (ssize_t j = i-1 ; j >= 0 ; j--) { |
483 /* no more matches, copy last string */ |
247 alfree(allocator, result[j].ptr); |
484 result[j-1] = sstrdup_a(allocator, curpos); |
248 } |
|
249 alfree(allocator, result); |
|
250 *n = -2; |
|
251 break; |
485 break; |
252 } |
486 } |
253 } |
487 } |
|
488 *n = j; |
254 } else { |
489 } else { |
255 *n = -2; |
490 *n = -2; |
256 } |
491 } |
257 |
|
258 free(sv.ptr); |
|
259 |
492 |
260 return result; |
493 return result; |
261 } |
494 } |
262 |
495 |
263 int sstrcmp(sstr_t s1, sstr_t s2) { |
496 int scstrcmp(scstr_t s1, scstr_t s2) { |
264 if (s1.length == s2.length) { |
497 if (s1.length == s2.length) { |
265 return memcmp(s1.ptr, s2.ptr, s1.length); |
498 return memcmp(s1.ptr, s2.ptr, s1.length); |
266 } else if (s1.length > s2.length) { |
499 } else if (s1.length > s2.length) { |
267 return 1; |
500 return 1; |
268 } else { |
501 } else { |
269 return -1; |
502 return -1; |
270 } |
503 } |
271 } |
504 } |
272 |
505 |
273 int sstrcasecmp(sstr_t s1, sstr_t s2) { |
506 int scstrcasecmp(scstr_t s1, scstr_t s2) { |
274 if (s1.length == s2.length) { |
507 if (s1.length == s2.length) { |
275 #ifdef _WIN32 |
508 #ifdef _WIN32 |
276 return _strnicmp(s1.ptr, s2.ptr, s1.length); |
509 return _strnicmp(s1.ptr, s2.ptr, s1.length); |
277 #else |
510 #else |
278 return strncasecmp(s1.ptr, s2.ptr, s1.length); |
511 return strncasecmp(s1.ptr, s2.ptr, s1.length); |
346 return memcmp(string.ptr+string.length-suffix.length, |
596 return memcmp(string.ptr+string.length-suffix.length, |
347 suffix.ptr, suffix.length) == 0; |
597 suffix.ptr, suffix.length) == 0; |
348 } |
598 } |
349 } |
599 } |
350 |
600 |
351 sstr_t sstrlower(sstr_t string) { |
601 int scstrcaseprefix(scstr_t string, scstr_t prefix) { |
|
602 if (string.length == 0) { |
|
603 return prefix.length == 0; |
|
604 } |
|
605 if (prefix.length == 0) { |
|
606 return 1; |
|
607 } |
|
608 |
|
609 if (prefix.length > string.length) { |
|
610 return 0; |
|
611 } else { |
|
612 scstr_t subs = scstrsubsl(string, 0, prefix.length); |
|
613 return scstrcasecmp(subs, prefix) == 0; |
|
614 } |
|
615 } |
|
616 |
|
617 int scstrcasesuffix(scstr_t string, scstr_t suffix) { |
|
618 if (string.length == 0) { |
|
619 return suffix.length == 0; |
|
620 } |
|
621 if (suffix.length == 0) { |
|
622 return 1; |
|
623 } |
|
624 |
|
625 if (suffix.length > string.length) { |
|
626 return 0; |
|
627 } else { |
|
628 scstr_t subs = scstrsubs(string, string.length-suffix.length); |
|
629 return scstrcasecmp(subs, suffix) == 0; |
|
630 } |
|
631 } |
|
632 |
|
633 sstr_t scstrlower(scstr_t string) { |
352 sstr_t ret = sstrdup(string); |
634 sstr_t ret = sstrdup(string); |
353 for (size_t i = 0; i < ret.length ; i++) { |
635 for (size_t i = 0; i < ret.length ; i++) { |
354 ret.ptr[i] = tolower(ret.ptr[i]); |
636 ret.ptr[i] = tolower(ret.ptr[i]); |
355 } |
637 } |
356 return ret; |
638 return ret; |
357 } |
639 } |
358 |
640 |
359 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) { |
641 sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) { |
360 sstr_t ret = sstrdup_a(allocator, string); |
642 sstr_t ret = sstrdup_a(allocator, string); |
361 for (size_t i = 0; i < ret.length ; i++) { |
643 for (size_t i = 0; i < ret.length ; i++) { |
362 ret.ptr[i] = tolower(ret.ptr[i]); |
644 ret.ptr[i] = tolower(ret.ptr[i]); |
363 } |
645 } |
364 return ret; |
646 return ret; |
365 } |
647 } |
366 |
648 |
367 sstr_t sstrupper(sstr_t string) { |
649 sstr_t scstrupper(scstr_t string) { |
368 sstr_t ret = sstrdup(string); |
650 sstr_t ret = sstrdup(string); |
369 for (size_t i = 0; i < ret.length ; i++) { |
651 for (size_t i = 0; i < ret.length ; i++) { |
370 ret.ptr[i] = toupper(ret.ptr[i]); |
652 ret.ptr[i] = toupper(ret.ptr[i]); |
371 } |
653 } |
372 return ret; |
654 return ret; |
373 } |
655 } |
374 |
656 |
375 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) { |
657 sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) { |
376 sstr_t ret = sstrdup_a(allocator, string); |
658 sstr_t ret = sstrdup_a(allocator, string); |
377 for (size_t i = 0; i < ret.length ; i++) { |
659 for (size_t i = 0; i < ret.length ; i++) { |
378 ret.ptr[i] = toupper(ret.ptr[i]); |
660 ret.ptr[i] = toupper(ret.ptr[i]); |
379 } |
661 } |
380 return ret; |
662 return ret; |
381 } |
663 } |
|
664 |
|
665 // type adjustment functions |
|
666 scstr_t ucx_sc2sc(scstr_t str) { |
|
667 return str; |
|
668 } |
|
669 scstr_t ucx_ss2sc(sstr_t str) { |
|
670 scstr_t cs; |
|
671 cs.ptr = str.ptr; |
|
672 cs.length = str.length; |
|
673 return cs; |
|
674 } |
|
675 scstr_t ucx_ss2c_s(scstr_t c) { |
|
676 return c; |
|
677 } |