24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 * POSSIBILITY OF SUCH DAMAGE. |
26 * POSSIBILITY OF SUCH DAMAGE. |
27 */ |
27 */ |
28 |
28 |
29 #include "ucx/string.h" |
29 #include "cx/string.h" |
30 |
30 #include "cx/utils.h" |
31 #include "ucx/allocator.h" |
31 |
32 |
|
33 #include <stdlib.h> |
|
34 #include <string.h> |
32 #include <string.h> |
35 #include <stdarg.h> |
33 #include <stdarg.h> |
36 #include <stdint.h> |
|
37 #include <ctype.h> |
34 #include <ctype.h> |
38 |
35 |
39 sstr_t sstr(char *cstring) { |
36 #ifndef _WIN32 |
40 sstr_t string; |
37 |
41 string.ptr = cstring; |
38 #include <strings.h> // for strncasecmp() |
42 string.length = strlen(cstring); |
39 |
43 return string; |
40 #endif // _WIN32 |
44 } |
41 |
45 |
42 cxmutstr cx_mutstr(char *cstring) { |
46 sstr_t sstrn(char *cstring, size_t length) { |
43 return (cxmutstr) {cstring, strlen(cstring)}; |
47 sstr_t string; |
44 } |
48 string.ptr = cstring; |
45 |
49 string.length = length; |
46 cxmutstr cx_mutstrn( |
50 return string; |
47 char *cstring, |
51 } |
48 size_t length |
52 |
49 ) { |
53 scstr_t scstr(const char *cstring) { |
50 return (cxmutstr) {cstring, length}; |
54 scstr_t string; |
51 } |
55 string.ptr = cstring; |
52 |
56 string.length = strlen(cstring); |
53 cxstring cx_str(const char *cstring) { |
57 return string; |
54 return (cxstring) {cstring, strlen(cstring)}; |
58 } |
55 } |
59 |
56 |
60 scstr_t scstrn(const char *cstring, size_t length) { |
57 cxstring cx_strn( |
61 scstr_t string; |
58 const char *cstring, |
62 string.ptr = cstring; |
59 size_t length |
63 string.length = length; |
60 ) { |
64 return string; |
61 return (cxstring) {cstring, length}; |
65 } |
62 } |
66 |
63 |
67 |
64 cxstring cx_strcast(cxmutstr str) { |
68 size_t scstrnlen(size_t n, ...) { |
65 return (cxstring) {str.ptr, str.length}; |
|
66 } |
|
67 |
|
68 void cx_strfree(cxmutstr *str) { |
|
69 free(str->ptr); |
|
70 str->ptr = NULL; |
|
71 str->length = 0; |
|
72 } |
|
73 |
|
74 void cx_strfree_a( |
|
75 CxAllocator const *alloc, |
|
76 cxmutstr *str |
|
77 ) { |
|
78 cxFree(alloc, str->ptr); |
|
79 str->ptr = NULL; |
|
80 str->length = 0; |
|
81 } |
|
82 |
|
83 size_t cx_strlen( |
|
84 size_t count, |
|
85 ... |
|
86 ) { |
|
87 if (count == 0) return 0; |
|
88 |
69 va_list ap; |
89 va_list ap; |
70 va_start(ap, n); |
90 va_start(ap, count); |
71 |
|
72 size_t size = 0; |
91 size_t size = 0; |
73 |
92 cx_for_n(i, count) { |
74 for (size_t i = 0 ; i < n ; i++) { |
93 cxstring str = va_arg(ap, cxstring); |
75 scstr_t str = va_arg(ap, scstr_t); |
|
76 if(SIZE_MAX - str.length < size) { |
|
77 size = SIZE_MAX; |
|
78 break; |
|
79 } |
|
80 size += str.length; |
94 size += str.length; |
81 } |
95 } |
82 va_end(ap); |
96 va_end(ap); |
83 |
97 |
84 return size; |
98 return size; |
85 } |
99 } |
86 |
100 |
87 static sstr_t sstrvcat_a( |
101 cxmutstr cx_strcat_a( |
88 UcxAllocator *a, |
102 CxAllocator const *alloc, |
89 size_t count, |
103 size_t count, |
90 scstr_t s1, |
104 ... |
91 va_list ap) { |
105 ) { |
92 sstr_t str; |
106 cxstring *strings = calloc(count, sizeof(cxstring)); |
93 str.ptr = NULL; |
107 if (!strings) abort(); |
94 str.length = 0; |
108 |
95 if(count < 2) { |
109 va_list ap; |
96 return str; |
110 va_start(ap, count); |
97 } |
111 |
98 |
|
99 scstr_t s2 = va_arg (ap, scstr_t); |
|
100 |
|
101 if(((size_t)-1) - s1.length < s2.length) { |
|
102 return str; |
|
103 } |
|
104 |
|
105 scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t)); |
|
106 if(!strings) { |
|
107 return str; |
|
108 } |
|
109 |
|
110 // get all args and overall length |
112 // get all args and overall length |
111 strings[0] = s1; |
113 size_t slen = 0; |
112 strings[1] = s2; |
114 cx_for_n(i, count) { |
113 size_t slen = s1.length + s2.length; |
115 cxstring s = va_arg (ap, cxstring); |
114 int error = 0; |
|
115 for (size_t i=2;i<count;i++) { |
|
116 scstr_t s = va_arg (ap, scstr_t); |
|
117 strings[i] = s; |
116 strings[i] = s; |
118 if(((size_t)-1) - s.length < slen) { |
|
119 error = 1; |
|
120 break; |
|
121 } |
|
122 slen += s.length; |
117 slen += s.length; |
123 } |
118 } |
124 if(error) { |
119 |
125 free(strings); |
|
126 return str; |
|
127 } |
|
128 |
|
129 // create new string |
120 // create new string |
130 str.ptr = (char*) almalloc(a, slen + 1); |
121 cxmutstr result; |
131 str.length = slen; |
122 result.ptr = cxMalloc(alloc, slen + 1); |
132 if(!str.ptr) { |
123 result.length = slen; |
133 free(strings); |
124 if (result.ptr == NULL) abort(); |
134 str.length = 0; |
125 |
135 return str; |
|
136 } |
|
137 |
|
138 // concatenate strings |
126 // concatenate strings |
139 size_t pos = 0; |
127 size_t pos = 0; |
140 for (size_t i=0;i<count;i++) { |
128 cx_for_n(i, count) { |
141 scstr_t s = strings[i]; |
129 cxstring s = strings[i]; |
142 memcpy(str.ptr + pos, s.ptr, s.length); |
130 memcpy(result.ptr + pos, s.ptr, s.length); |
143 pos += s.length; |
131 pos += s.length; |
144 } |
132 } |
145 |
133 |
146 str.ptr[str.length] = '\0'; |
134 // terminate string |
147 |
135 result.ptr[result.length] = '\0'; |
|
136 |
|
137 // free temporary array |
148 free(strings); |
138 free(strings); |
149 |
139 |
150 return str; |
140 return result; |
151 } |
141 } |
152 |
142 |
153 sstr_t scstrcat(size_t count, scstr_t s1, ...) { |
143 cxstring cx_strsubs( |
154 va_list ap; |
144 cxstring string, |
155 va_start(ap, s1); |
145 size_t start |
156 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap); |
146 ) { |
157 va_end(ap); |
147 return cx_strsubsl(string, start, string.length - start); |
158 return s; |
148 } |
159 } |
149 |
160 |
150 cxmutstr cx_strsubs_m( |
161 sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) { |
151 cxmutstr string, |
162 va_list ap; |
152 size_t start |
163 va_start(ap, s1); |
153 ) { |
164 sstr_t s = sstrvcat_a(a, count, s1, ap); |
154 return cx_strsubsl_m(string, start, string.length - start); |
165 va_end(ap); |
155 } |
166 return s; |
156 |
167 } |
157 cxstring cx_strsubsl( |
168 |
158 cxstring string, |
169 static int ucx_substring( |
|
170 size_t str_length, |
|
171 size_t start, |
159 size_t start, |
172 size_t length, |
160 size_t length |
173 size_t *newlen, |
161 ) { |
174 size_t *newpos) |
162 if (start > string.length) { |
175 { |
163 return (cxstring) {NULL, 0}; |
176 *newlen = 0; |
164 } |
177 *newpos = 0; |
165 |
178 |
166 size_t rem_len = string.length - start; |
179 if(start > str_length) { |
167 if (length > rem_len) { |
180 return 0; |
168 length = rem_len; |
181 } |
169 } |
182 |
170 |
183 if(length > str_length - start) { |
171 return (cxstring) {string.ptr + start, length}; |
184 length = str_length - start; |
172 } |
185 } |
173 |
186 *newlen = length; |
174 cxmutstr cx_strsubsl_m( |
187 *newpos = start; |
175 cxmutstr string, |
188 return 1; |
176 size_t start, |
189 } |
177 size_t length |
190 |
178 ) { |
191 sstr_t sstrsubs(sstr_t s, size_t start) { |
179 cxstring result = cx_strsubsl(cx_strcast(string), start, length); |
192 return sstrsubsl (s, start, s.length-start); |
180 return (cxmutstr) {(char *) result.ptr, result.length}; |
193 } |
181 } |
194 |
182 |
195 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { |
183 cxstring cx_strchr( |
196 size_t pos; |
184 cxstring string, |
197 sstr_t ret = { NULL, 0 }; |
185 int chr |
198 if(ucx_substring(s.length, start, length, &ret.length, &pos)) { |
186 ) { |
199 ret.ptr = s.ptr + pos; |
187 chr = 0xFF & chr; |
200 } |
188 // TODO: improve by comparing multiple bytes at once |
201 return ret; |
189 cx_for_n(i, string.length) { |
202 } |
190 if (string.ptr[i] == chr) { |
203 |
191 return cx_strsubs(string, i); |
204 scstr_t scstrsubs(scstr_t string, size_t start) { |
192 } |
205 return scstrsubsl(string, start, string.length-start); |
193 } |
206 } |
194 return (cxstring) {NULL, 0}; |
207 |
195 } |
208 scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) { |
196 |
209 size_t pos; |
197 cxmutstr cx_strchr_m( |
210 scstr_t ret = { NULL, 0 }; |
198 cxmutstr string, |
211 if(ucx_substring(s.length, start, length, &ret.length, &pos)) { |
199 int chr |
212 ret.ptr = s.ptr + pos; |
200 ) { |
213 } |
201 cxstring result = cx_strchr(cx_strcast(string), chr); |
214 return ret; |
202 return (cxmutstr) {(char *) result.ptr, result.length}; |
215 } |
203 } |
216 |
204 |
217 |
205 cxstring cx_strrchr( |
218 static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) { |
206 cxstring string, |
219 for(size_t i=0;i<length;i++) { |
207 int chr |
220 if(str[i] == chr) { |
208 ) { |
221 *pos = i; |
209 chr = 0xFF & chr; |
222 return 1; |
210 size_t i = string.length; |
223 } |
211 while (i > 0) { |
224 } |
212 i--; |
225 return 0; |
213 // TODO: improve by comparing multiple bytes at once |
226 } |
214 if (string.ptr[i] == chr) { |
227 |
215 return cx_strsubs(string, i); |
228 static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) { |
216 } |
229 if(length > 0) { |
217 } |
230 for(size_t i=length ; i>0 ; i--) { |
218 return (cxstring) {NULL, 0}; |
231 if(str[i-1] == chr) { |
219 } |
232 *pos = i-1; |
220 |
233 return 1; |
221 cxmutstr cx_strrchr_m( |
234 } |
222 cxmutstr string, |
235 } |
223 int chr |
236 } |
224 ) { |
237 return 0; |
225 cxstring result = cx_strrchr(cx_strcast(string), chr); |
238 } |
226 return (cxmutstr) {(char *) result.ptr, result.length}; |
239 |
227 } |
240 sstr_t sstrchr(sstr_t s, int c) { |
228 |
241 size_t pos = 0; |
229 #ifndef CX_STRSTR_SBO_SIZE |
242 if(ucx_strchr(s.ptr, s.length, c, &pos)) { |
230 #define CX_STRSTR_SBO_SIZE 512 |
243 return sstrsubs(s, pos); |
231 #endif |
244 } |
232 |
245 return sstrn(NULL, 0); |
233 cxstring cx_strstr( |
246 } |
234 cxstring haystack, |
247 |
235 cxstring needle |
248 sstr_t sstrrchr(sstr_t s, int c) { |
236 ) { |
249 size_t pos = 0; |
237 if (needle.length == 0) { |
250 if(ucx_strrchr(s.ptr, s.length, c, &pos)) { |
238 return haystack; |
251 return sstrsubs(s, pos); |
239 } |
252 } |
240 |
253 return sstrn(NULL, 0); |
241 // optimize for single-char needles |
254 } |
242 if (needle.length == 1) { |
255 |
243 return cx_strchr(haystack, *needle.ptr); |
256 scstr_t scstrchr(scstr_t s, int c) { |
244 } |
257 size_t pos = 0; |
245 |
258 if(ucx_strchr(s.ptr, s.length, c, &pos)) { |
|
259 return scstrsubs(s, pos); |
|
260 } |
|
261 return scstrn(NULL, 0); |
|
262 } |
|
263 |
|
264 scstr_t scstrrchr(scstr_t s, int c) { |
|
265 size_t pos = 0; |
|
266 if(ucx_strrchr(s.ptr, s.length, c, &pos)) { |
|
267 return scstrsubs(s, pos); |
|
268 } |
|
269 return scstrn(NULL, 0); |
|
270 } |
|
271 |
|
272 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ |
|
273 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) |
|
274 |
|
275 #define ptable_w(useheap, ptable, index, src) do {\ |
|
276 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ |
|
277 else ((size_t*)ptable)[index] = src;\ |
|
278 } while (0); |
|
279 |
|
280 |
|
281 static const char* ucx_strstr( |
|
282 const char *str, |
|
283 size_t length, |
|
284 const char *match, |
|
285 size_t matchlen, |
|
286 size_t *newlen) |
|
287 { |
|
288 *newlen = length; |
|
289 if (matchlen == 0) { |
|
290 return str; |
|
291 } |
|
292 |
|
293 const char *result = NULL; |
|
294 size_t resultlen = 0; |
|
295 |
|
296 /* |
246 /* |
297 * IMPORTANT: |
247 * IMPORTANT: |
298 * our prefix table contains the prefix length PLUS ONE |
248 * Our prefix table contains the prefix length PLUS ONE |
299 * this is our decision, because we want to use the full range of size_t |
249 * this is our decision, because we want to use the full range of size_t. |
300 * the original algorithm needs a (-1) at one single place |
250 * The original algorithm needs a (-1) at one single place, |
301 * and we want to avoid that |
251 * and we want to avoid that. |
302 */ |
252 */ |
303 |
253 |
304 /* static prefix table */ |
254 // local prefix table |
305 static uint8_t s_prefix_table[256]; |
255 size_t s_prefix_table[CX_STRSTR_SBO_SIZE]; |
306 |
256 |
307 /* check pattern length and use appropriate prefix table */ |
257 // check needle length and use appropriate prefix table |
308 /* if the pattern exceeds static prefix table, allocate on the heap */ |
258 // if the pattern exceeds static prefix table, allocate on the heap |
309 register int useheap = matchlen > 255; |
259 bool useheap = needle.length >= CX_STRSTR_SBO_SIZE; |
310 register void* ptable = useheap ? |
260 register size_t *ptable = useheap ? calloc(needle.length + 1, |
311 calloc(matchlen+1, sizeof(size_t)): s_prefix_table; |
261 sizeof(size_t)) : s_prefix_table; |
312 |
262 |
313 /* keep counter in registers */ |
263 // keep counter in registers |
314 register size_t i, j; |
264 register size_t i, j; |
315 |
265 |
316 /* fill prefix table */ |
266 // fill prefix table |
317 i = 0; j = 0; |
267 i = 0; |
318 ptable_w(useheap, ptable, i, j); |
268 j = 0; |
319 while (i < matchlen) { |
269 ptable[i] = j; |
320 while (j >= 1 && match[j-1] != match[i]) { |
270 while (i < needle.length) { |
321 ptable_r(j, useheap, ptable, j-1); |
271 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { |
322 } |
272 j = ptable[j - 1]; |
323 i++; j++; |
273 } |
324 ptable_w(useheap, ptable, i, j); |
274 i++; |
325 } |
275 j++; |
326 |
276 ptable[i] = j; |
327 /* search */ |
277 } |
328 i = 0; j = 1; |
278 |
329 while (i < length) { |
279 // search |
330 while (j >= 1 && str[i] != match[j-1]) { |
280 cxstring result = {NULL, 0}; |
331 ptable_r(j, useheap, ptable, j-1); |
281 i = 0; |
332 } |
282 j = 1; |
333 i++; j++; |
283 while (i < haystack.length) { |
334 if (j-1 == matchlen) { |
284 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { |
335 size_t start = i - matchlen; |
285 j = ptable[j - 1]; |
336 result = str + start; |
286 } |
337 resultlen = length - start; |
287 i++; |
|
288 j++; |
|
289 if (j - 1 == needle.length) { |
|
290 size_t start = i - needle.length; |
|
291 result.ptr = haystack.ptr + start; |
|
292 result.length = haystack.length - start; |
338 break; |
293 break; |
339 } |
294 } |
340 } |
295 } |
341 |
296 |
342 /* if prefix table was allocated on the heap, free it */ |
297 // if prefix table was allocated on the heap, free it |
343 if (ptable != s_prefix_table) { |
298 if (ptable != s_prefix_table) { |
344 free(ptable); |
299 free(ptable); |
345 } |
300 } |
346 |
301 |
347 *newlen = resultlen; |
|
348 return result; |
302 return result; |
349 } |
303 } |
350 |
304 |
351 sstr_t scstrsstr(sstr_t string, scstr_t match) { |
305 cxmutstr cx_strstr_m( |
352 sstr_t result; |
306 cxmutstr haystack, |
353 |
307 cxstring needle |
354 size_t reslen; |
308 ) { |
355 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); |
309 cxstring result = cx_strstr(cx_strcast(haystack), needle); |
356 if(!resstr) { |
310 return (cxmutstr) {(char *) result.ptr, result.length}; |
357 result.ptr = NULL; |
311 } |
358 result.length = 0; |
312 |
359 return result; |
313 size_t cx_strsplit( |
360 } |
314 cxstring string, |
361 |
315 cxstring delim, |
362 size_t pos = resstr - string.ptr; |
316 size_t limit, |
363 result.ptr = string.ptr + pos; |
317 cxstring *output |
364 result.length = reslen; |
318 ) { |
365 |
319 // special case: output limit is zero |
366 return result; |
320 if (limit == 0) return 0; |
367 } |
321 |
368 |
322 // special case: delimiter is empty |
369 scstr_t scstrscstr(scstr_t string, scstr_t match) { |
323 if (delim.length == 0) { |
370 scstr_t result; |
324 output[0] = string; |
371 |
325 return 1; |
372 size_t reslen; |
326 } |
373 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); |
327 |
374 if(!resstr) { |
328 // special cases: delimiter is at least as large as the string |
375 result.ptr = NULL; |
329 if (delim.length >= string.length) { |
376 result.length = 0; |
330 // exact match |
377 return result; |
331 if (cx_strcmp(string, delim) == 0) { |
378 } |
332 output[0] = cx_strn(string.ptr, 0); |
379 |
333 output[1] = cx_strn(string.ptr + string.length, 0); |
380 size_t pos = resstr - string.ptr; |
334 return 2; |
381 result.ptr = string.ptr + pos; |
335 } else { |
382 result.length = reslen; |
336 // no match possible |
383 |
337 output[0] = string; |
384 return result; |
338 return 1; |
385 } |
339 } |
386 |
340 } |
387 #undef ptable_r |
341 |
388 #undef ptable_w |
342 size_t n = 0; |
389 |
343 cxstring curpos = string; |
390 sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) { |
344 while (1) { |
391 return scstrsplit_a(ucx_default_allocator(), s, d, n); |
345 ++n; |
392 } |
346 cxstring match = cx_strstr(curpos, delim); |
393 |
347 if (match.length > 0) { |
394 sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) { |
348 // is the limit reached? |
395 if (s.length == 0 || d.length == 0) { |
349 if (n < limit) { |
396 *n = -1; |
350 // copy the current string to the array |
397 return NULL; |
351 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); |
398 } |
352 output[n - 1] = item; |
399 |
353 size_t processed = item.length + delim.length; |
400 /* special cases: delimiter is at least as large as the string */ |
354 curpos.ptr += processed; |
401 if (d.length >= s.length) { |
355 curpos.length -= processed; |
402 /* exact match */ |
|
403 if (sstrcmp(s, d) == 0) { |
|
404 *n = 0; |
|
405 return NULL; |
|
406 } else /* no match possible */ { |
|
407 *n = 1; |
|
408 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); |
|
409 if(result) { |
|
410 *result = sstrdup_a(allocator, s); |
|
411 } else { |
356 } else { |
412 *n = -2; |
357 // limit reached, copy the _full_ remaining string |
413 } |
358 output[n - 1] = curpos; |
414 return result; |
|
415 } |
|
416 } |
|
417 |
|
418 ssize_t nmax = *n; |
|
419 size_t arrlen = 16; |
|
420 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t)); |
|
421 |
|
422 if (result) { |
|
423 scstr_t curpos = s; |
|
424 ssize_t j = 1; |
|
425 while (1) { |
|
426 scstr_t match; |
|
427 /* optimize for one byte delimiters */ |
|
428 if (d.length == 1) { |
|
429 match = curpos; |
|
430 for (size_t i = 0 ; i < curpos.length ; i++) { |
|
431 if (curpos.ptr[i] == *(d.ptr)) { |
|
432 match.ptr = curpos.ptr + i; |
|
433 break; |
|
434 } |
|
435 match.length--; |
|
436 } |
|
437 } else { |
|
438 match = scstrscstr(curpos, d); |
|
439 } |
|
440 if (match.length > 0) { |
|
441 /* is this our last try? */ |
|
442 if (nmax == 0 || j < nmax) { |
|
443 /* copy the current string to the array */ |
|
444 scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr); |
|
445 result[j-1] = sstrdup_a(allocator, item); |
|
446 size_t processed = item.length + d.length; |
|
447 curpos.ptr += processed; |
|
448 curpos.length -= processed; |
|
449 |
|
450 /* allocate memory for the next string */ |
|
451 j++; |
|
452 if (j > arrlen) { |
|
453 arrlen *= 2; |
|
454 size_t reallocsz; |
|
455 sstr_t* reallocated = NULL; |
|
456 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) { |
|
457 reallocated = (sstr_t*) alrealloc( |
|
458 allocator, result, reallocsz); |
|
459 } |
|
460 if (reallocated) { |
|
461 result = reallocated; |
|
462 } else { |
|
463 for (ssize_t i = 0 ; i < j-1 ; i++) { |
|
464 alfree(allocator, result[i].ptr); |
|
465 } |
|
466 alfree(allocator, result); |
|
467 *n = -2; |
|
468 return NULL; |
|
469 } |
|
470 } |
|
471 } else { |
|
472 /* nmax reached, copy the _full_ remaining string */ |
|
473 result[j-1] = sstrdup_a(allocator, curpos); |
|
474 break; |
|
475 } |
|
476 } else { |
|
477 /* no more matches, copy last string */ |
|
478 result[j-1] = sstrdup_a(allocator, curpos); |
|
479 break; |
359 break; |
480 } |
360 } |
481 } |
361 } else { |
482 *n = j; |
362 // no more matches, copy last string |
483 } else { |
363 output[n - 1] = curpos; |
484 *n = -2; |
364 break; |
485 } |
365 } |
486 |
366 } |
487 return result; |
367 |
488 } |
368 return n; |
489 |
369 } |
490 int scstrcmp(scstr_t s1, scstr_t s2) { |
370 |
|
371 size_t cx_strsplit_a( |
|
372 CxAllocator const *allocator, |
|
373 cxstring string, |
|
374 cxstring delim, |
|
375 size_t limit, |
|
376 cxstring **output |
|
377 ) { |
|
378 // find out how many splits we're going to make and allocate memory |
|
379 size_t n = 0; |
|
380 cxstring curpos = string; |
|
381 while (1) { |
|
382 ++n; |
|
383 cxstring match = cx_strstr(curpos, delim); |
|
384 if (match.length > 0) { |
|
385 // is the limit reached? |
|
386 if (n < limit) { |
|
387 size_t processed = match.ptr - curpos.ptr + delim.length; |
|
388 curpos.ptr += processed; |
|
389 curpos.length -= processed; |
|
390 } else { |
|
391 // limit reached |
|
392 break; |
|
393 } |
|
394 } else { |
|
395 // no more matches |
|
396 break; |
|
397 } |
|
398 } |
|
399 *output = cxCalloc(allocator, n, sizeof(cxstring)); |
|
400 return cx_strsplit(string, delim, n, *output); |
|
401 } |
|
402 |
|
403 size_t cx_strsplit_m( |
|
404 cxmutstr string, |
|
405 cxstring delim, |
|
406 size_t limit, |
|
407 cxmutstr *output |
|
408 ) { |
|
409 return cx_strsplit(cx_strcast(string), |
|
410 delim, limit, (cxstring *) output); |
|
411 } |
|
412 |
|
413 size_t cx_strsplit_ma( |
|
414 CxAllocator const *allocator, |
|
415 cxmutstr string, |
|
416 cxstring delim, |
|
417 size_t limit, |
|
418 cxmutstr **output |
|
419 ) { |
|
420 return cx_strsplit_a(allocator, cx_strcast(string), |
|
421 delim, limit, (cxstring **) output); |
|
422 } |
|
423 |
|
424 int cx_strcmp( |
|
425 cxstring s1, |
|
426 cxstring s2 |
|
427 ) { |
491 if (s1.length == s2.length) { |
428 if (s1.length == s2.length) { |
492 return memcmp(s1.ptr, s2.ptr, s1.length); |
429 return memcmp(s1.ptr, s2.ptr, s1.length); |
493 } else if (s1.length > s2.length) { |
430 } else if (s1.length > s2.length) { |
494 return 1; |
431 return 1; |
495 } else { |
432 } else { |
496 return -1; |
433 return -1; |
497 } |
434 } |
498 } |
435 } |
499 |
436 |
500 int scstrcasecmp(scstr_t s1, scstr_t s2) { |
437 int cx_strcasecmp( |
|
438 cxstring s1, |
|
439 cxstring s2 |
|
440 ) { |
501 if (s1.length == s2.length) { |
441 if (s1.length == s2.length) { |
502 #ifdef _WIN32 |
442 #ifdef _WIN32 |
503 return _strnicmp(s1.ptr, s2.ptr, s1.length); |
443 return _strnicmp(s1.ptr, s2.ptr, s1.length); |
504 #else |
444 #else |
505 return strncasecmp(s1.ptr, s2.ptr, s1.length); |
445 return strncasecmp(s1.ptr, s2.ptr, s1.length); |
509 } else { |
449 } else { |
510 return -1; |
450 return -1; |
511 } |
451 } |
512 } |
452 } |
513 |
453 |
514 sstr_t scstrdup(scstr_t s) { |
454 int cx_strcmp_p( |
515 return sstrdup_a(ucx_default_allocator(), s); |
455 void const *s1, |
516 } |
456 void const *s2 |
517 |
457 ) { |
518 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) { |
458 cxstring const *left = s1; |
519 sstr_t newstring; |
459 cxstring const *right = s2; |
520 newstring.ptr = (char*)almalloc(allocator, s.length + 1); |
460 return cx_strcmp(*left, *right); |
521 if (newstring.ptr) { |
461 } |
522 newstring.length = s.length; |
462 |
523 newstring.ptr[newstring.length] = 0; |
463 int cx_strcasecmp_p( |
524 |
464 void const *s1, |
525 memcpy(newstring.ptr, s.ptr, s.length); |
465 void const *s2 |
526 } else { |
466 ) { |
527 newstring.length = 0; |
467 cxstring const *left = s1; |
528 } |
468 cxstring const *right = s2; |
529 |
469 return cx_strcasecmp(*left, *right); |
530 return newstring; |
470 } |
531 } |
471 |
532 |
472 cxmutstr cx_strdup_a( |
533 |
473 CxAllocator const *allocator, |
534 static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) { |
474 cxstring string |
535 const char *newptr = s; |
475 ) { |
536 size_t length = len; |
476 cxmutstr result = { |
537 |
477 cxMalloc(allocator, string.length + 1), |
538 while(length > 0 && isspace(*newptr)) { |
478 string.length |
539 newptr++; |
479 }; |
540 length--; |
480 if (result.ptr == NULL) { |
541 } |
481 result.length = 0; |
542 while(length > 0 && isspace(newptr[length-1])) { |
482 return result; |
543 length--; |
483 } |
544 } |
484 memcpy(result.ptr, string.ptr, string.length); |
545 |
485 result.ptr[string.length] = '\0'; |
546 *newlen = length; |
486 return result; |
547 return newptr - s; |
487 } |
548 } |
488 |
549 |
489 cxstring cx_strtrim(cxstring string) { |
550 sstr_t sstrtrim(sstr_t string) { |
490 cxstring result = string; |
551 sstr_t newstr; |
491 // TODO: optimize by comparing multiple bytes at once |
552 newstr.ptr = string.ptr |
492 while (result.length > 0 && isspace(*result.ptr)) { |
553 + ucx_strtrim(string.ptr, string.length, &newstr.length); |
493 result.ptr++; |
554 return newstr; |
494 result.length--; |
555 } |
495 } |
556 |
496 while (result.length > 0 && isspace(result.ptr[result.length - 1])) { |
557 scstr_t scstrtrim(scstr_t string) { |
497 result.length--; |
558 scstr_t newstr; |
498 } |
559 newstr.ptr = string.ptr |
499 return result; |
560 + ucx_strtrim(string.ptr, string.length, &newstr.length); |
500 } |
561 return newstr; |
501 |
562 } |
502 cxmutstr cx_strtrim_m(cxmutstr string) { |
563 |
503 cxstring result = cx_strtrim(cx_strcast(string)); |
564 int scstrprefix(scstr_t string, scstr_t prefix) { |
504 return (cxmutstr) {(char *) result.ptr, result.length}; |
565 if (string.length == 0) { |
505 } |
566 return prefix.length == 0; |
506 |
567 } |
507 bool cx_strprefix( |
568 if (prefix.length == 0) { |
508 cxstring string, |
569 return 1; |
509 cxstring prefix |
570 } |
510 ) { |
571 |
511 if (string.length < prefix.length) return false; |
572 if (prefix.length > string.length) { |
512 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; |
573 return 0; |
513 } |
574 } else { |
514 |
575 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; |
515 bool cx_strsuffix( |
576 } |
516 cxstring string, |
577 } |
517 cxstring suffix |
578 |
518 ) { |
579 int scstrsuffix(scstr_t string, scstr_t suffix) { |
519 if (string.length < suffix.length) return false; |
580 if (string.length == 0) { |
520 return memcmp(string.ptr + string.length - suffix.length, |
581 return suffix.length == 0; |
521 suffix.ptr, suffix.length) == 0; |
582 } |
522 } |
583 if (suffix.length == 0) { |
523 |
584 return 1; |
524 bool cx_strcaseprefix( |
585 } |
525 cxstring string, |
586 |
526 cxstring prefix |
587 if (suffix.length > string.length) { |
527 ) { |
588 return 0; |
528 if (string.length < prefix.length) return false; |
589 } else { |
529 #ifdef _WIN32 |
590 return memcmp(string.ptr+string.length-suffix.length, |
530 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; |
591 suffix.ptr, suffix.length) == 0; |
531 #else |
592 } |
532 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0; |
593 } |
533 #endif |
594 |
534 } |
595 sstr_t scstrlower(scstr_t string) { |
535 |
596 sstr_t ret = sstrdup(string); |
536 bool cx_strcasesuffix( |
597 for (size_t i = 0; i < ret.length ; i++) { |
537 cxstring string, |
598 ret.ptr[i] = tolower(ret.ptr[i]); |
538 cxstring suffix |
599 } |
539 ) { |
600 return ret; |
540 if (string.length < suffix.length) return false; |
601 } |
541 #ifdef _WIN32 |
602 |
542 return _strnicmp(string.ptr+string.length-suffix.length, |
603 sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) { |
543 suffix.ptr, suffix.length) == 0; |
604 sstr_t ret = sstrdup_a(allocator, string); |
544 #else |
605 for (size_t i = 0; i < ret.length ; i++) { |
545 return strncasecmp(string.ptr + string.length - suffix.length, |
606 ret.ptr[i] = tolower(ret.ptr[i]); |
546 suffix.ptr, suffix.length) == 0; |
607 } |
547 #endif |
608 return ret; |
548 } |
609 } |
549 |
610 |
550 void cx_strlower(cxmutstr string) { |
611 sstr_t scstrupper(scstr_t string) { |
551 cx_for_n(i, string.length) { |
612 sstr_t ret = sstrdup(string); |
552 string.ptr[i] = (char) tolower(string.ptr[i]); |
613 for (size_t i = 0; i < ret.length ; i++) { |
553 } |
614 ret.ptr[i] = toupper(ret.ptr[i]); |
554 } |
615 } |
555 |
616 return ret; |
556 void cx_strupper(cxmutstr string) { |
617 } |
557 cx_for_n(i, string.length) { |
618 |
558 string.ptr[i] = (char) toupper(string.ptr[i]); |
619 sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) { |
559 } |
620 sstr_t ret = sstrdup_a(allocator, string); |
560 } |
621 for (size_t i = 0; i < ret.length ; i++) { |
561 |
622 ret.ptr[i] = toupper(ret.ptr[i]); |
562 #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE |
623 } |
563 #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64 |
624 return ret; |
564 #endif |
625 } |
565 |
626 |
566 struct cx_strreplace_ibuf { |
627 // type adjustment functions |
567 size_t *buf; |
628 scstr_t ucx_sc2sc(scstr_t str) { |
568 struct cx_strreplace_ibuf *next; |
629 return str; |
569 unsigned int len; |
630 } |
570 }; |
631 scstr_t ucx_ss2sc(sstr_t str) { |
571 |
632 scstr_t cs; |
572 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { |
633 cs.ptr = str.ptr; |
573 while (buf) { |
634 cs.length = str.length; |
574 struct cx_strreplace_ibuf *next = buf->next; |
635 return cs; |
575 free(buf->buf); |
636 } |
576 free(buf); |
637 scstr_t ucx_ss2c_s(scstr_t c) { |
577 buf = next; |
638 return c; |
578 } |
639 } |
579 } |
|
580 |
|
581 cxmutstr cx_strreplacen_a( |
|
582 CxAllocator const *allocator, |
|
583 cxstring str, |
|
584 cxstring pattern, |
|
585 cxstring replacement, |
|
586 size_t replmax |
|
587 ) { |
|
588 |
|
589 if (pattern.length == 0 || pattern.length > str.length || replmax == 0) |
|
590 return cx_strdup_a(allocator, str); |
|
591 |
|
592 // Compute expected buffer length |
|
593 size_t ibufmax = str.length / pattern.length; |
|
594 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; |
|
595 if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) { |
|
596 ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE; |
|
597 } |
|
598 |
|
599 // Allocate first index buffer |
|
600 struct cx_strreplace_ibuf *firstbuf, *curbuf; |
|
601 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); |
|
602 if (!firstbuf) return cx_mutstrn(NULL, 0); |
|
603 firstbuf->buf = calloc(ibuflen, sizeof(size_t)); |
|
604 if (!firstbuf->buf) { |
|
605 free(firstbuf); |
|
606 return cx_mutstrn(NULL, 0); |
|
607 } |
|
608 |
|
609 // Search occurrences |
|
610 cxstring searchstr = str; |
|
611 size_t found = 0; |
|
612 do { |
|
613 cxstring match = cx_strstr(searchstr, pattern); |
|
614 if (match.length > 0) { |
|
615 // Allocate next buffer in chain, if required |
|
616 if (curbuf->len == ibuflen) { |
|
617 struct cx_strreplace_ibuf *nextbuf = |
|
618 calloc(1, sizeof(struct cx_strreplace_ibuf)); |
|
619 if (!nextbuf) { |
|
620 cx_strrepl_free_ibuf(firstbuf); |
|
621 return cx_mutstrn(NULL, 0); |
|
622 } |
|
623 nextbuf->buf = calloc(ibuflen, sizeof(size_t)); |
|
624 if (!nextbuf->buf) { |
|
625 free(nextbuf); |
|
626 cx_strrepl_free_ibuf(firstbuf); |
|
627 return cx_mutstrn(NULL, 0); |
|
628 } |
|
629 curbuf->next = nextbuf; |
|
630 curbuf = nextbuf; |
|
631 } |
|
632 |
|
633 // Record match index |
|
634 found++; |
|
635 size_t idx = match.ptr - str.ptr; |
|
636 curbuf->buf[curbuf->len++] = idx; |
|
637 searchstr.ptr = match.ptr + pattern.length; |
|
638 searchstr.length = str.length - idx - pattern.length; |
|
639 } else { |
|
640 break; |
|
641 } |
|
642 } while (searchstr.length > 0 && found < replmax); |
|
643 |
|
644 // Allocate result string |
|
645 cxmutstr result; |
|
646 { |
|
647 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; |
|
648 size_t rcount = 0; |
|
649 curbuf = firstbuf; |
|
650 do { |
|
651 rcount += curbuf->len; |
|
652 curbuf = curbuf->next; |
|
653 } while (curbuf); |
|
654 result.length = str.length + rcount * adjlen; |
|
655 result.ptr = cxMalloc(allocator, result.length + 1); |
|
656 if (!result.ptr) { |
|
657 cx_strrepl_free_ibuf(firstbuf); |
|
658 return cx_mutstrn(NULL, 0); |
|
659 } |
|
660 } |
|
661 |
|
662 // Build result string |
|
663 curbuf = firstbuf; |
|
664 size_t srcidx = 0; |
|
665 char *destptr = result.ptr; |
|
666 do { |
|
667 for (size_t i = 0; i < curbuf->len; i++) { |
|
668 // Copy source part up to next match |
|
669 size_t idx = curbuf->buf[i]; |
|
670 size_t srclen = idx - srcidx; |
|
671 if (srclen > 0) { |
|
672 memcpy(destptr, str.ptr + srcidx, srclen); |
|
673 destptr += srclen; |
|
674 srcidx += srclen; |
|
675 } |
|
676 |
|
677 // Copy the replacement and skip the source pattern |
|
678 srcidx += pattern.length; |
|
679 memcpy(destptr, replacement.ptr, replacement.length); |
|
680 destptr += replacement.length; |
|
681 } |
|
682 curbuf = curbuf->next; |
|
683 } while (curbuf); |
|
684 memcpy(destptr, str.ptr + srcidx, str.length - srcidx); |
|
685 |
|
686 // Result is guaranteed to be zero-terminated |
|
687 result.ptr[result.length] = '\0'; |
|
688 |
|
689 // Free index buffer |
|
690 cx_strrepl_free_ibuf(firstbuf); |
|
691 |
|
692 return result; |
|
693 } |
|
694 |
|
695 CxStrtokCtx cx_strtok( |
|
696 cxstring str, |
|
697 cxstring delim, |
|
698 size_t limit |
|
699 ) { |
|
700 CxStrtokCtx ctx; |
|
701 ctx.str = str; |
|
702 ctx.delim = delim; |
|
703 ctx.limit = limit; |
|
704 ctx.pos = 0; |
|
705 ctx.next_pos = 0; |
|
706 ctx.delim_pos = 0; |
|
707 ctx.found = 0; |
|
708 ctx.delim_more = NULL; |
|
709 ctx.delim_more_count = 0; |
|
710 return ctx; |
|
711 } |
|
712 |
|
713 CxStrtokCtx cx_strtok_m( |
|
714 cxmutstr str, |
|
715 cxstring delim, |
|
716 size_t limit |
|
717 ) { |
|
718 return cx_strtok(cx_strcast(str), delim, limit); |
|
719 } |
|
720 |
|
721 bool cx_strtok_next( |
|
722 CxStrtokCtx *ctx, |
|
723 cxstring *token |
|
724 ) { |
|
725 // abortion criteria |
|
726 if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) { |
|
727 return false; |
|
728 } |
|
729 |
|
730 // determine the search start |
|
731 cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos); |
|
732 |
|
733 // search the next delimiter |
|
734 cxstring delim = cx_strstr(haystack, ctx->delim); |
|
735 |
|
736 // if found, make delim capture exactly the delimiter |
|
737 if (delim.length > 0) { |
|
738 delim.length = ctx->delim.length; |
|
739 } |
|
740 |
|
741 // if more delimiters are specified, check them now |
|
742 if (ctx->delim_more_count > 0) { |
|
743 cx_for_n(i, ctx->delim_more_count) { |
|
744 cxstring d = cx_strstr(haystack, ctx->delim_more[i]); |
|
745 if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) { |
|
746 delim.ptr = d.ptr; |
|
747 delim.length = ctx->delim_more[i].length; |
|
748 } |
|
749 } |
|
750 } |
|
751 |
|
752 // store the token information and adjust the context |
|
753 ctx->found++; |
|
754 ctx->pos = ctx->next_pos; |
|
755 token->ptr = &ctx->str.ptr[ctx->pos]; |
|
756 ctx->delim_pos = delim.length == 0 ? |
|
757 ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr); |
|
758 token->length = ctx->delim_pos - ctx->pos; |
|
759 ctx->next_pos = ctx->delim_pos + delim.length; |
|
760 |
|
761 return true; |
|
762 } |
|
763 |
|
764 bool cx_strtok_next_m( |
|
765 CxStrtokCtx *ctx, |
|
766 cxmutstr *token |
|
767 ) { |
|
768 return cx_strtok_next(ctx, (cxstring *) token); |
|
769 } |
|
770 |
|
771 void cx_strtok_delim( |
|
772 CxStrtokCtx *ctx, |
|
773 cxstring const *delim, |
|
774 size_t count |
|
775 ) { |
|
776 ctx->delim_more = delim; |
|
777 ctx->delim_more_count = count; |
|
778 } |