24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 * POSSIBILITY OF SUCH DAMAGE. |
26 * POSSIBILITY OF SUCH DAMAGE. |
27 */ |
27 */ |
28 |
28 |
29 #include "ucx/string.h" |
29 #include "cx/string.h" |
30 |
30 #include "cx/utils.h" |
31 #include "ucx/allocator.h" |
31 |
32 |
|
33 #include <stdlib.h> |
|
34 #include <string.h> |
32 #include <string.h> |
35 #include <stdarg.h> |
33 #include <stdarg.h> |
36 #include <stdint.h> |
|
37 #include <ctype.h> |
34 #include <ctype.h> |
38 |
35 |
39 #ifndef _WIN32 |
36 #ifndef _WIN32 |
|
37 |
40 #include <strings.h> /* for strncasecmp() */ |
38 #include <strings.h> /* for strncasecmp() */ |
|
39 |
41 #endif /* _WIN32 */ |
40 #endif /* _WIN32 */ |
42 |
41 |
43 sstr_t sstr(char *cstring) { |
42 cxmutstr cx_mutstr(char *cstring) { |
44 sstr_t string; |
43 return (cxmutstr) {cstring, strlen(cstring)}; |
45 string.ptr = cstring; |
44 } |
46 string.length = strlen(cstring); |
45 |
47 return string; |
46 cxmutstr cx_mutstrn( |
48 } |
47 char *cstring, |
49 |
48 size_t length |
50 sstr_t sstrn(char *cstring, size_t length) { |
49 ) { |
51 sstr_t string; |
50 return (cxmutstr) {cstring, length}; |
52 string.ptr = cstring; |
51 } |
53 string.length = length; |
52 |
54 return string; |
53 cxstring cx_str(const char *cstring) { |
55 } |
54 return (cxstring) {cstring, strlen(cstring)}; |
56 |
55 } |
57 scstr_t scstr(const char *cstring) { |
56 |
58 scstr_t string; |
57 cxstring cx_strn( |
59 string.ptr = cstring; |
58 const char *cstring, |
60 string.length = strlen(cstring); |
59 size_t length |
61 return string; |
60 ) { |
62 } |
61 return (cxstring) {cstring, length}; |
63 |
62 } |
64 scstr_t scstrn(const char *cstring, size_t length) { |
63 |
65 scstr_t string; |
64 cxstring cx_strcast(cxmutstr str) { |
66 string.ptr = cstring; |
65 return (cxstring) {str.ptr, str.length}; |
67 string.length = length; |
66 } |
68 return string; |
67 |
69 } |
68 void cx_strfree(cxmutstr *str) { |
70 |
69 free(str->ptr); |
71 |
70 str->ptr = NULL; |
72 size_t scstrnlen(size_t n, ...) { |
71 str->length = 0; |
73 if (n == 0) return 0; |
72 } |
74 |
73 |
|
74 void cx_strfree_a( |
|
75 CxAllocator *alloc, |
|
76 cxmutstr *str |
|
77 ) { |
|
78 cxFree(alloc, str->ptr); |
|
79 str->ptr = NULL; |
|
80 str->length = 0; |
|
81 } |
|
82 |
|
83 size_t cx_strlen( |
|
84 size_t count, |
|
85 ... |
|
86 ) { |
|
87 if (count == 0) return 0; |
|
88 |
75 va_list ap; |
89 va_list ap; |
76 va_start(ap, n); |
90 va_start(ap, count); |
77 |
|
78 size_t size = 0; |
91 size_t size = 0; |
79 |
92 cx_for_n(i, count) { |
80 for (size_t i = 0 ; i < n ; i++) { |
93 cxstring str = va_arg(ap, cxstring); |
81 scstr_t str = va_arg(ap, scstr_t); |
|
82 if(SIZE_MAX - str.length < size) { |
|
83 size = SIZE_MAX; |
|
84 break; |
|
85 } |
|
86 size += str.length; |
94 size += str.length; |
87 } |
95 } |
88 va_end(ap); |
96 va_end(ap); |
89 |
97 |
90 return size; |
98 return size; |
91 } |
99 } |
92 |
100 |
93 static sstr_t sstrvcat_a( |
101 cxmutstr cx_strcat_a( |
94 UcxAllocator *a, |
102 CxAllocator *alloc, |
95 size_t count, |
103 size_t count, |
96 scstr_t s1, |
104 ... |
97 va_list ap) { |
105 ) { |
98 sstr_t str; |
106 cxstring *strings = calloc(count, sizeof(cxstring)); |
99 str.ptr = NULL; |
107 if (!strings) abort(); |
100 str.length = 0; |
108 |
101 if(count < 2) { |
109 va_list ap; |
102 return str; |
110 va_start(ap, count); |
103 } |
111 |
104 |
|
105 scstr_t s2 = va_arg (ap, scstr_t); |
|
106 |
|
107 if(((size_t)-1) - s1.length < s2.length) { |
|
108 return str; |
|
109 } |
|
110 |
|
111 scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t)); |
|
112 if(!strings) { |
|
113 return str; |
|
114 } |
|
115 |
|
116 // get all args and overall length |
112 // get all args and overall length |
117 strings[0] = s1; |
113 size_t slen = 0; |
118 strings[1] = s2; |
114 cx_for_n(i, count) { |
119 size_t slen = s1.length + s2.length; |
115 cxstring s = va_arg (ap, cxstring); |
120 int error = 0; |
|
121 for (size_t i=2;i<count;i++) { |
|
122 scstr_t s = va_arg (ap, scstr_t); |
|
123 strings[i] = s; |
116 strings[i] = s; |
124 if(((size_t)-1) - s.length < slen) { |
|
125 error = 1; |
|
126 break; |
|
127 } |
|
128 slen += s.length; |
117 slen += s.length; |
129 } |
118 } |
130 if(error) { |
119 |
131 free(strings); |
|
132 return str; |
|
133 } |
|
134 |
|
135 // create new string |
120 // create new string |
136 str.ptr = (char*) almalloc(a, slen + 1); |
121 cxmutstr result; |
137 str.length = slen; |
122 result.ptr = cxMalloc(alloc, slen + 1); |
138 if(!str.ptr) { |
123 result.length = slen; |
139 free(strings); |
124 if (result.ptr == NULL) abort(); |
140 str.length = 0; |
125 |
141 return str; |
|
142 } |
|
143 |
|
144 // concatenate strings |
126 // concatenate strings |
145 size_t pos = 0; |
127 size_t pos = 0; |
146 for (size_t i=0;i<count;i++) { |
128 cx_for_n(i, count) { |
147 scstr_t s = strings[i]; |
129 cxstring s = strings[i]; |
148 memcpy(str.ptr + pos, s.ptr, s.length); |
130 memcpy(result.ptr + pos, s.ptr, s.length); |
149 pos += s.length; |
131 pos += s.length; |
150 } |
132 } |
151 |
133 |
152 str.ptr[str.length] = '\0'; |
134 // terminate string |
153 |
135 result.ptr[result.length] = '\0'; |
|
136 |
|
137 // free temporary array |
154 free(strings); |
138 free(strings); |
155 |
139 |
156 return str; |
140 return result; |
157 } |
141 } |
158 |
142 |
159 sstr_t scstrcat(size_t count, scstr_t s1, ...) { |
143 cxstring cx_strsubs( |
160 va_list ap; |
144 cxstring string, |
161 va_start(ap, s1); |
145 size_t start |
162 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap); |
146 ) { |
163 va_end(ap); |
147 return cx_strsubsl(string, start, string.length - start); |
164 return s; |
148 } |
165 } |
149 |
166 |
150 cxmutstr cx_strsubs_m( |
167 sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) { |
151 cxmutstr string, |
168 va_list ap; |
152 size_t start |
169 va_start(ap, s1); |
153 ) { |
170 sstr_t s = sstrvcat_a(a, count, s1, ap); |
154 return cx_strsubsl_m(string, start, string.length - start); |
171 va_end(ap); |
155 } |
172 return s; |
156 |
173 } |
157 cxstring cx_strsubsl( |
174 |
158 cxstring string, |
175 static int ucx_substring( |
|
176 size_t str_length, |
|
177 size_t start, |
159 size_t start, |
178 size_t length, |
160 size_t length |
179 size_t *newlen, |
161 ) { |
180 size_t *newpos) |
162 if (start > string.length) { |
181 { |
163 return (cxstring) {NULL, 0}; |
182 *newlen = 0; |
164 } |
183 *newpos = 0; |
165 |
184 |
166 size_t rem_len = string.length - start; |
185 if(start > str_length) { |
167 if (length > rem_len) { |
186 return 0; |
168 length = rem_len; |
187 } |
169 } |
188 |
170 |
189 if(length > str_length - start) { |
171 return (cxstring) {string.ptr + start, length}; |
190 length = str_length - start; |
172 } |
191 } |
173 |
192 *newlen = length; |
174 cxmutstr cx_strsubsl_m( |
193 *newpos = start; |
175 cxmutstr string, |
194 return 1; |
176 size_t start, |
195 } |
177 size_t length |
196 |
178 ) { |
197 sstr_t sstrsubs(sstr_t s, size_t start) { |
179 cxstring result = cx_strsubsl(cx_strcast(string), start, length); |
198 return sstrsubsl (s, start, s.length-start); |
180 return (cxmutstr) {(char *) result.ptr, result.length}; |
199 } |
181 } |
200 |
182 |
201 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { |
183 cxstring cx_strchr( |
202 size_t pos; |
184 cxstring string, |
203 sstr_t ret = { NULL, 0 }; |
185 int chr |
204 if(ucx_substring(s.length, start, length, &ret.length, &pos)) { |
186 ) { |
205 ret.ptr = s.ptr + pos; |
187 chr = 0xFF & chr; |
206 } |
188 // TODO: improve by comparing multiple bytes at once |
207 return ret; |
189 cx_for_n(i, string.length) { |
208 } |
190 if (string.ptr[i] == chr) { |
209 |
191 return cx_strsubs(string, i); |
210 scstr_t scstrsubs(scstr_t string, size_t start) { |
192 } |
211 return scstrsubsl(string, start, string.length-start); |
193 } |
212 } |
194 return (cxstring) {NULL, 0}; |
213 |
195 } |
214 scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) { |
196 |
215 size_t pos; |
197 cxmutstr cx_strchr_m( |
216 scstr_t ret = { NULL, 0 }; |
198 cxmutstr string, |
217 if(ucx_substring(s.length, start, length, &ret.length, &pos)) { |
199 int chr |
218 ret.ptr = s.ptr + pos; |
200 ) { |
219 } |
201 cxstring result = cx_strchr(cx_strcast(string), chr); |
220 return ret; |
202 return (cxmutstr) {(char *) result.ptr, result.length}; |
221 } |
203 } |
222 |
204 |
223 |
205 cxstring cx_strrchr( |
224 static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) { |
206 cxstring string, |
225 for(size_t i=0;i<length;i++) { |
207 int chr |
226 if(str[i] == chr) { |
208 ) { |
227 *pos = i; |
209 chr = 0xFF & chr; |
228 return 1; |
210 size_t i = string.length; |
229 } |
211 while (i > 0) { |
230 } |
212 i--; |
231 return 0; |
213 // TODO: improve by comparing multiple bytes at once |
232 } |
214 if (string.ptr[i] == chr) { |
233 |
215 return cx_strsubs(string, i); |
234 static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) { |
216 } |
235 if(length > 0) { |
217 } |
236 for(size_t i=length ; i>0 ; i--) { |
218 return (cxstring) {NULL, 0}; |
237 if(str[i-1] == chr) { |
219 } |
238 *pos = i-1; |
220 |
239 return 1; |
221 cxmutstr cx_strrchr_m( |
240 } |
222 cxmutstr string, |
241 } |
223 int chr |
242 } |
224 ) { |
243 return 0; |
225 cxstring result = cx_strrchr(cx_strcast(string), chr); |
244 } |
226 return (cxmutstr) {(char *) result.ptr, result.length}; |
245 |
227 } |
246 sstr_t sstrchr(sstr_t s, int c) { |
228 |
247 size_t pos = 0; |
229 #define STRSTR_SBO_BUFLEN 512 |
248 if(ucx_strchr(s.ptr, s.length, c, &pos)) { |
230 |
249 return sstrsubs(s, pos); |
231 cxstring cx_strstr( |
250 } |
232 cxstring haystack, |
251 return sstrn(NULL, 0); |
233 cxstring needle |
252 } |
234 ) { |
253 |
235 if (needle.length == 0) { |
254 sstr_t sstrrchr(sstr_t s, int c) { |
236 return haystack; |
255 size_t pos = 0; |
237 } |
256 if(ucx_strrchr(s.ptr, s.length, c, &pos)) { |
238 |
257 return sstrsubs(s, pos); |
239 /* optimize for single-char needles */ |
258 } |
240 if (needle.length == 1) { |
259 return sstrn(NULL, 0); |
241 return cx_strchr(haystack, *needle.ptr); |
260 } |
242 } |
261 |
243 |
262 scstr_t scstrchr(scstr_t s, int c) { |
|
263 size_t pos = 0; |
|
264 if(ucx_strchr(s.ptr, s.length, c, &pos)) { |
|
265 return scstrsubs(s, pos); |
|
266 } |
|
267 return scstrn(NULL, 0); |
|
268 } |
|
269 |
|
270 scstr_t scstrrchr(scstr_t s, int c) { |
|
271 size_t pos = 0; |
|
272 if(ucx_strrchr(s.ptr, s.length, c, &pos)) { |
|
273 return scstrsubs(s, pos); |
|
274 } |
|
275 return scstrn(NULL, 0); |
|
276 } |
|
277 |
|
278 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ |
|
279 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) |
|
280 |
|
281 #define ptable_w(useheap, ptable, index, src) do {\ |
|
282 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ |
|
283 else ((size_t*)ptable)[index] = src;\ |
|
284 } while (0); |
|
285 |
|
286 |
|
287 static const char* ucx_strstr( |
|
288 const char *str, |
|
289 size_t length, |
|
290 const char *match, |
|
291 size_t matchlen, |
|
292 size_t *newlen) |
|
293 { |
|
294 *newlen = length; |
|
295 if (matchlen == 0) { |
|
296 return str; |
|
297 } |
|
298 |
|
299 const char *result = NULL; |
|
300 size_t resultlen = 0; |
|
301 |
|
302 /* |
244 /* |
303 * IMPORTANT: |
245 * IMPORTANT: |
304 * our prefix table contains the prefix length PLUS ONE |
246 * Our prefix table contains the prefix length PLUS ONE |
305 * this is our decision, because we want to use the full range of size_t |
247 * this is our decision, because we want to use the full range of size_t. |
306 * the original algorithm needs a (-1) at one single place |
248 * The original algorithm needs a (-1) at one single place, |
307 * and we want to avoid that |
249 * and we want to avoid that. |
308 */ |
250 */ |
309 |
251 |
310 /* static prefix table */ |
252 /* local prefix table */ |
311 static uint8_t s_prefix_table[256]; |
253 size_t s_prefix_table[STRSTR_SBO_BUFLEN]; |
312 |
254 |
313 /* check pattern length and use appropriate prefix table */ |
255 /* check needle length and use appropriate prefix table */ |
314 /* if the pattern exceeds static prefix table, allocate on the heap */ |
256 /* if the pattern exceeds static prefix table, allocate on the heap */ |
315 register int useheap = matchlen > 255; |
257 bool useheap = needle.length >= STRSTR_SBO_BUFLEN; |
316 register void* ptable = useheap ? |
258 register size_t *ptable = useheap ? calloc(needle.length + 1, |
317 calloc(matchlen+1, sizeof(size_t)): s_prefix_table; |
259 sizeof(size_t)) : s_prefix_table; |
318 |
260 |
319 /* keep counter in registers */ |
261 /* keep counter in registers */ |
320 register size_t i, j; |
262 register size_t i, j; |
321 |
263 |
322 /* fill prefix table */ |
264 /* fill prefix table */ |
323 i = 0; j = 0; |
265 i = 0; |
324 ptable_w(useheap, ptable, i, j); |
266 j = 0; |
325 while (i < matchlen) { |
267 ptable[i] = j; |
326 while (j >= 1 && match[j-1] != match[i]) { |
268 while (i < needle.length) { |
327 ptable_r(j, useheap, ptable, j-1); |
269 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { |
328 } |
270 j = ptable[j - 1]; |
329 i++; j++; |
271 } |
330 ptable_w(useheap, ptable, i, j); |
272 i++; |
|
273 j++; |
|
274 ptable[i] = j; |
331 } |
275 } |
332 |
276 |
333 /* search */ |
277 /* search */ |
334 i = 0; j = 1; |
278 cxstring result = {NULL, 0}; |
335 while (i < length) { |
279 i = 0; |
336 while (j >= 1 && str[i] != match[j-1]) { |
280 j = 1; |
337 ptable_r(j, useheap, ptable, j-1); |
281 while (i < haystack.length) { |
338 } |
282 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { |
339 i++; j++; |
283 j = ptable[j - 1]; |
340 if (j-1 == matchlen) { |
284 } |
341 size_t start = i - matchlen; |
285 i++; |
342 result = str + start; |
286 j++; |
343 resultlen = length - start; |
287 if (j - 1 == needle.length) { |
|
288 size_t start = i - needle.length; |
|
289 result.ptr = haystack.ptr + start; |
|
290 result.length = haystack.length - start; |
344 break; |
291 break; |
345 } |
292 } |
346 } |
293 } |
347 |
294 |
348 /* if prefix table was allocated on the heap, free it */ |
295 /* if prefix table was allocated on the heap, free it */ |
349 if (ptable != s_prefix_table) { |
296 if (ptable != s_prefix_table) { |
350 free(ptable); |
297 free(ptable); |
351 } |
298 } |
352 |
299 |
353 *newlen = resultlen; |
|
354 return result; |
300 return result; |
355 } |
301 } |
356 |
302 |
357 sstr_t scstrsstr(sstr_t string, scstr_t match) { |
303 cxmutstr cx_strstr_m( |
358 sstr_t result; |
304 cxmutstr haystack, |
359 |
305 cxstring needle |
360 size_t reslen; |
306 ) { |
361 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); |
307 cxstring result = cx_strstr(cx_strcast(haystack), needle); |
362 if(!resstr) { |
308 return (cxmutstr) {(char *) result.ptr, result.length}; |
363 result.ptr = NULL; |
309 } |
364 result.length = 0; |
310 |
365 return result; |
311 size_t cx_strsplit( |
366 } |
312 cxstring string, |
367 |
313 cxstring delim, |
368 size_t pos = resstr - string.ptr; |
314 size_t limit, |
369 result.ptr = string.ptr + pos; |
315 cxstring *output |
370 result.length = reslen; |
316 ) { |
371 |
317 /* special case: output limit is zero */ |
372 return result; |
318 if (limit == 0) return 0; |
373 } |
319 |
374 |
320 /* special case: delimiter is empty */ |
375 scstr_t scstrscstr(scstr_t string, scstr_t match) { |
321 if (delim.length == 0) { |
376 scstr_t result; |
322 output[0] = string; |
377 |
323 return 1; |
378 size_t reslen; |
324 } |
379 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); |
325 |
380 if(!resstr) { |
|
381 result.ptr = NULL; |
|
382 result.length = 0; |
|
383 return result; |
|
384 } |
|
385 |
|
386 size_t pos = resstr - string.ptr; |
|
387 result.ptr = string.ptr + pos; |
|
388 result.length = reslen; |
|
389 |
|
390 return result; |
|
391 } |
|
392 |
|
393 #undef ptable_r |
|
394 #undef ptable_w |
|
395 |
|
396 sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) { |
|
397 return scstrsplit_a(ucx_default_allocator(), s, d, n); |
|
398 } |
|
399 |
|
400 sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) { |
|
401 if (s.length == 0 || d.length == 0) { |
|
402 *n = -1; |
|
403 return NULL; |
|
404 } |
|
405 |
|
406 /* special cases: delimiter is at least as large as the string */ |
326 /* special cases: delimiter is at least as large as the string */ |
407 if (d.length >= s.length) { |
327 if (delim.length >= string.length) { |
408 /* exact match */ |
328 /* exact match */ |
409 if (sstrcmp(s, d) == 0) { |
329 if (cx_strcmp(string, delim) == 0) { |
410 *n = 0; |
330 output[0] = cx_strn(string.ptr, 0); |
411 return NULL; |
331 output[1] = cx_strn(string.ptr + string.length, 0); |
|
332 return 2; |
412 } else /* no match possible */ { |
333 } else /* no match possible */ { |
413 *n = 1; |
334 output[0] = string; |
414 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); |
335 return 1; |
415 if(result) { |
336 } |
416 *result = sstrdup_a(allocator, s); |
337 } |
|
338 |
|
339 size_t n = 0; |
|
340 cxstring curpos = string; |
|
341 while (1) { |
|
342 ++n; |
|
343 cxstring match = cx_strstr(curpos, delim); |
|
344 if (match.length > 0) { |
|
345 /* is the limit reached? */ |
|
346 if (n < limit) { |
|
347 /* copy the current string to the array */ |
|
348 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); |
|
349 output[n - 1] = item; |
|
350 size_t processed = item.length + delim.length; |
|
351 curpos.ptr += processed; |
|
352 curpos.length -= processed; |
417 } else { |
353 } else { |
418 *n = -2; |
354 /* limit reached, copy the _full_ remaining string */ |
419 } |
355 output[n - 1] = curpos; |
420 return result; |
|
421 } |
|
422 } |
|
423 |
|
424 ssize_t nmax = *n; |
|
425 size_t arrlen = 16; |
|
426 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t)); |
|
427 |
|
428 if (result) { |
|
429 scstr_t curpos = s; |
|
430 ssize_t j = 1; |
|
431 while (1) { |
|
432 scstr_t match; |
|
433 /* optimize for one byte delimiters */ |
|
434 if (d.length == 1) { |
|
435 match = curpos; |
|
436 for (size_t i = 0 ; i < curpos.length ; i++) { |
|
437 if (curpos.ptr[i] == *(d.ptr)) { |
|
438 match.ptr = curpos.ptr + i; |
|
439 break; |
|
440 } |
|
441 match.length--; |
|
442 } |
|
443 } else { |
|
444 match = scstrscstr(curpos, d); |
|
445 } |
|
446 if (match.length > 0) { |
|
447 /* is this our last try? */ |
|
448 if (nmax == 0 || j < nmax) { |
|
449 /* copy the current string to the array */ |
|
450 scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr); |
|
451 result[j-1] = sstrdup_a(allocator, item); |
|
452 size_t processed = item.length + d.length; |
|
453 curpos.ptr += processed; |
|
454 curpos.length -= processed; |
|
455 |
|
456 /* allocate memory for the next string */ |
|
457 j++; |
|
458 if (j > arrlen) { |
|
459 arrlen *= 2; |
|
460 size_t reallocsz; |
|
461 sstr_t* reallocated = NULL; |
|
462 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) { |
|
463 reallocated = (sstr_t*) alrealloc( |
|
464 allocator, result, reallocsz); |
|
465 } |
|
466 if (reallocated) { |
|
467 result = reallocated; |
|
468 } else { |
|
469 for (ssize_t i = 0 ; i < j-1 ; i++) { |
|
470 alfree(allocator, result[i].ptr); |
|
471 } |
|
472 alfree(allocator, result); |
|
473 *n = -2; |
|
474 return NULL; |
|
475 } |
|
476 } |
|
477 } else { |
|
478 /* nmax reached, copy the _full_ remaining string */ |
|
479 result[j-1] = sstrdup_a(allocator, curpos); |
|
480 break; |
|
481 } |
|
482 } else { |
|
483 /* no more matches, copy last string */ |
|
484 result[j-1] = sstrdup_a(allocator, curpos); |
|
485 break; |
356 break; |
486 } |
357 } |
487 } |
358 } else { |
488 *n = j; |
359 /* no more matches, copy last string */ |
489 } else { |
360 output[n - 1] = curpos; |
490 *n = -2; |
361 break; |
491 } |
362 } |
492 |
363 } |
493 return result; |
364 |
494 } |
365 return n; |
495 |
366 } |
496 int scstrcmp(scstr_t s1, scstr_t s2) { |
367 |
|
368 size_t cx_strsplit_a( |
|
369 CxAllocator *allocator, |
|
370 cxstring string, |
|
371 cxstring delim, |
|
372 size_t limit, |
|
373 cxstring **output |
|
374 ) { |
|
375 /* find out how many splits we're going to make and allocate memory */ |
|
376 size_t n = 0; |
|
377 cxstring curpos = string; |
|
378 while (1) { |
|
379 ++n; |
|
380 cxstring match = cx_strstr(curpos, delim); |
|
381 if (match.length > 0) { |
|
382 /* is the limit reached? */ |
|
383 if (n < limit) { |
|
384 size_t processed = match.ptr - curpos.ptr + delim.length; |
|
385 curpos.ptr += processed; |
|
386 curpos.length -= processed; |
|
387 } else { |
|
388 /* limit reached */ |
|
389 break; |
|
390 } |
|
391 } else { |
|
392 /* no more matches */ |
|
393 break; |
|
394 } |
|
395 } |
|
396 *output = cxCalloc(allocator, n, sizeof(cxstring)); |
|
397 return cx_strsplit(string, delim, n, *output); |
|
398 } |
|
399 |
|
400 size_t cx_strsplit_m( |
|
401 cxmutstr string, |
|
402 cxstring delim, |
|
403 size_t limit, |
|
404 cxmutstr *output |
|
405 ) { |
|
406 return cx_strsplit(cx_strcast(string), |
|
407 delim, limit, (cxstring *) output); |
|
408 } |
|
409 |
|
410 size_t cx_strsplit_ma( |
|
411 CxAllocator *allocator, |
|
412 cxmutstr string, |
|
413 cxstring delim, |
|
414 size_t limit, |
|
415 cxmutstr **output |
|
416 ) { |
|
417 return cx_strsplit_a(allocator, cx_strcast(string), |
|
418 delim, limit, (cxstring **) output); |
|
419 } |
|
420 |
|
421 int cx_strcmp( |
|
422 cxstring s1, |
|
423 cxstring s2 |
|
424 ) { |
497 if (s1.length == s2.length) { |
425 if (s1.length == s2.length) { |
498 return memcmp(s1.ptr, s2.ptr, s1.length); |
426 return memcmp(s1.ptr, s2.ptr, s1.length); |
499 } else if (s1.length > s2.length) { |
427 } else if (s1.length > s2.length) { |
500 return 1; |
428 return 1; |
501 } else { |
429 } else { |
502 return -1; |
430 return -1; |
503 } |
431 } |
504 } |
432 } |
505 |
433 |
506 int scstrcasecmp(scstr_t s1, scstr_t s2) { |
434 int cx_strcasecmp( |
|
435 cxstring s1, |
|
436 cxstring s2 |
|
437 ) { |
507 if (s1.length == s2.length) { |
438 if (s1.length == s2.length) { |
508 #ifdef _WIN32 |
439 #ifdef _WIN32 |
509 return _strnicmp(s1.ptr, s2.ptr, s1.length); |
440 return _strnicmp(s1.ptr, s2.ptr, s1.length); |
510 #else |
441 #else |
511 return strncasecmp(s1.ptr, s2.ptr, s1.length); |
442 return strncasecmp(s1.ptr, s2.ptr, s1.length); |
515 } else { |
446 } else { |
516 return -1; |
447 return -1; |
517 } |
448 } |
518 } |
449 } |
519 |
450 |
520 sstr_t scstrdup(scstr_t s) { |
451 cxmutstr cx_strdup_a( |
521 return sstrdup_a(ucx_default_allocator(), s); |
452 CxAllocator *allocator, |
522 } |
453 cxstring string |
523 |
454 ) { |
524 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) { |
455 cxmutstr result = { |
525 sstr_t newstring; |
456 cxMalloc(allocator, string.length + 1), |
526 newstring.ptr = (char*)almalloc(allocator, s.length + 1); |
457 string.length |
527 if (newstring.ptr) { |
458 }; |
528 newstring.length = s.length; |
459 if (result.ptr == NULL) { |
529 newstring.ptr[newstring.length] = 0; |
460 result.length = 0; |
530 |
461 return result; |
531 memcpy(newstring.ptr, s.ptr, s.length); |
462 } |
532 } else { |
463 memcpy(result.ptr, string.ptr, string.length); |
533 newstring.length = 0; |
464 result.ptr[string.length] = '\0'; |
534 } |
465 return result; |
535 |
466 } |
536 return newstring; |
467 |
537 } |
468 cxstring cx_strtrim(cxstring string) { |
538 |
469 cxstring result = string; |
539 |
470 // TODO: optimize by comparing multiple bytes at once |
540 static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) { |
471 while (result.length > 0 && isspace(*result.ptr)) { |
541 const char *newptr = s; |
472 result.ptr++; |
542 size_t length = len; |
473 result.length--; |
543 |
474 } |
544 while(length > 0 && isspace(*newptr)) { |
475 while (result.length > 0 && isspace(result.ptr[result.length - 1])) { |
545 newptr++; |
476 result.length--; |
546 length--; |
477 } |
547 } |
478 return result; |
548 while(length > 0 && isspace(newptr[length-1])) { |
479 } |
549 length--; |
480 |
550 } |
481 cxmutstr cx_strtrim_m(cxmutstr string) { |
551 |
482 cxstring result = cx_strtrim(cx_strcast(string)); |
552 *newlen = length; |
483 return (cxmutstr) {(char *) result.ptr, result.length}; |
553 return newptr - s; |
484 } |
554 } |
485 |
555 |
486 bool cx_strprefix( |
556 sstr_t sstrtrim(sstr_t string) { |
487 cxstring string, |
557 sstr_t newstr; |
488 cxstring prefix |
558 newstr.ptr = string.ptr |
489 ) { |
559 + ucx_strtrim(string.ptr, string.length, &newstr.length); |
490 if (string.length < prefix.length) return false; |
560 return newstr; |
491 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; |
561 } |
492 } |
562 |
493 |
563 scstr_t scstrtrim(scstr_t string) { |
494 bool cx_strsuffix( |
564 scstr_t newstr; |
495 cxstring string, |
565 newstr.ptr = string.ptr |
496 cxstring suffix |
566 + ucx_strtrim(string.ptr, string.length, &newstr.length); |
497 ) { |
567 return newstr; |
498 if (string.length < suffix.length) return false; |
568 } |
499 return memcmp(string.ptr + string.length - suffix.length, |
569 |
500 suffix.ptr, suffix.length) == 0; |
570 int scstrprefix(scstr_t string, scstr_t prefix) { |
501 } |
571 if (string.length == 0) { |
502 |
572 return prefix.length == 0; |
503 bool cx_strcaseprefix( |
573 } |
504 cxstring string, |
574 if (prefix.length == 0) { |
505 cxstring prefix |
575 return 1; |
506 ) { |
576 } |
507 if (string.length < prefix.length) return false; |
577 |
508 #ifdef _WIN32 |
578 if (prefix.length > string.length) { |
509 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; |
579 return 0; |
510 #else |
580 } else { |
511 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0; |
581 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; |
512 #endif |
582 } |
513 } |
583 } |
514 |
584 |
515 bool cx_strcasesuffix( |
585 int scstrsuffix(scstr_t string, scstr_t suffix) { |
516 cxstring string, |
586 if (string.length == 0) { |
517 cxstring suffix |
587 return suffix.length == 0; |
518 ) { |
588 } |
519 if (string.length < suffix.length) return false; |
589 if (suffix.length == 0) { |
520 #ifdef _WIN32 |
590 return 1; |
521 return _strnicmp(string.ptr+string.length-suffix.length, |
591 } |
522 suffix.ptr, suffix.length) == 0; |
592 |
523 #else |
593 if (suffix.length > string.length) { |
524 return strncasecmp(string.ptr + string.length - suffix.length, |
594 return 0; |
525 suffix.ptr, suffix.length) == 0; |
595 } else { |
526 #endif |
596 return memcmp(string.ptr+string.length-suffix.length, |
527 } |
597 suffix.ptr, suffix.length) == 0; |
528 |
598 } |
529 void cx_strlower(cxmutstr string) { |
599 } |
530 cx_for_n(i, string.length) { |
600 |
531 string.ptr[i] = (char) tolower(string.ptr[i]); |
601 int scstrcaseprefix(scstr_t string, scstr_t prefix) { |
532 } |
602 if (string.length == 0) { |
533 } |
603 return prefix.length == 0; |
534 |
604 } |
535 void cx_strupper(cxmutstr string) { |
605 if (prefix.length == 0) { |
536 cx_for_n(i, string.length) { |
606 return 1; |
537 string.ptr[i] = (char) toupper(string.ptr[i]); |
607 } |
538 } |
608 |
|
609 if (prefix.length > string.length) { |
|
610 return 0; |
|
611 } else { |
|
612 scstr_t subs = scstrsubsl(string, 0, prefix.length); |
|
613 return scstrcasecmp(subs, prefix) == 0; |
|
614 } |
|
615 } |
|
616 |
|
617 int scstrcasesuffix(scstr_t string, scstr_t suffix) { |
|
618 if (string.length == 0) { |
|
619 return suffix.length == 0; |
|
620 } |
|
621 if (suffix.length == 0) { |
|
622 return 1; |
|
623 } |
|
624 |
|
625 if (suffix.length > string.length) { |
|
626 return 0; |
|
627 } else { |
|
628 scstr_t subs = scstrsubs(string, string.length-suffix.length); |
|
629 return scstrcasecmp(subs, suffix) == 0; |
|
630 } |
|
631 } |
|
632 |
|
633 sstr_t scstrlower(scstr_t string) { |
|
634 sstr_t ret = sstrdup(string); |
|
635 for (size_t i = 0; i < ret.length ; i++) { |
|
636 ret.ptr[i] = tolower(ret.ptr[i]); |
|
637 } |
|
638 return ret; |
|
639 } |
|
640 |
|
641 sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) { |
|
642 sstr_t ret = sstrdup_a(allocator, string); |
|
643 for (size_t i = 0; i < ret.length ; i++) { |
|
644 ret.ptr[i] = tolower(ret.ptr[i]); |
|
645 } |
|
646 return ret; |
|
647 } |
|
648 |
|
649 sstr_t scstrupper(scstr_t string) { |
|
650 sstr_t ret = sstrdup(string); |
|
651 for (size_t i = 0; i < ret.length ; i++) { |
|
652 ret.ptr[i] = toupper(ret.ptr[i]); |
|
653 } |
|
654 return ret; |
|
655 } |
|
656 |
|
657 sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) { |
|
658 sstr_t ret = sstrdup_a(allocator, string); |
|
659 for (size_t i = 0; i < ret.length ; i++) { |
|
660 ret.ptr[i] = toupper(ret.ptr[i]); |
|
661 } |
|
662 return ret; |
|
663 } |
539 } |
664 |
540 |
665 #define REPLACE_INDEX_BUFFER_MAX 100 |
541 #define REPLACE_INDEX_BUFFER_MAX 100 |
666 |
542 |
667 struct scstrreplace_ibuf { |
543 struct cx_strreplace_ibuf { |
668 size_t* buf; |
544 size_t *buf; |
669 unsigned int len; /* small indices */ |
545 struct cx_strreplace_ibuf *next; |
670 struct scstrreplace_ibuf* next; |
546 unsigned int len; |
671 }; |
547 }; |
672 |
548 |
673 static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) { |
549 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { |
674 while (buf) { |
550 while (buf) { |
675 struct scstrreplace_ibuf *next = buf->next; |
551 struct cx_strreplace_ibuf *next = buf->next; |
676 free(buf->buf); |
552 free(buf->buf); |
677 free(buf); |
553 free(buf); |
678 buf = next; |
554 buf = next; |
679 } |
555 } |
680 } |
556 } |
681 |
557 |
682 sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str, |
558 cxmutstr cx_strreplacen_a( |
683 scstr_t pattern, scstr_t replacement, size_t replmax) { |
559 CxAllocator *allocator, |
|
560 cxstring str, |
|
561 cxstring pattern, |
|
562 cxstring replacement, |
|
563 size_t replmax |
|
564 ) { |
684 |
565 |
685 if (pattern.length == 0 || pattern.length > str.length || replmax == 0) |
566 if (pattern.length == 0 || pattern.length > str.length || replmax == 0) |
686 return sstrdup(str); |
567 return cx_strdup_a(allocator, str); |
687 |
568 |
688 /* Compute expected buffer length */ |
569 /* Compute expected buffer length */ |
689 size_t ibufmax = str.length / pattern.length; |
570 size_t ibufmax = str.length / pattern.length; |
690 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; |
571 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; |
691 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { |
572 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { |
692 ibuflen = REPLACE_INDEX_BUFFER_MAX; |
573 ibuflen = REPLACE_INDEX_BUFFER_MAX; |
693 } |
574 } |
694 |
575 |
695 /* Allocate first index buffer */ |
576 /* Allocate first index buffer */ |
696 struct scstrreplace_ibuf *firstbuf, *curbuf; |
577 struct cx_strreplace_ibuf *firstbuf, *curbuf; |
697 firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf)); |
578 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); |
698 if (!firstbuf) return sstrn(NULL, 0); |
579 if (!firstbuf) return cx_mutstrn(NULL, 0); |
699 firstbuf->buf = calloc(ibuflen, sizeof(size_t)); |
580 firstbuf->buf = calloc(ibuflen, sizeof(size_t)); |
700 if (!firstbuf->buf) { |
581 if (!firstbuf->buf) { |
701 free(firstbuf); |
582 free(firstbuf); |
702 return sstrn(NULL, 0); |
583 return cx_mutstrn(NULL, 0); |
703 } |
584 } |
704 |
585 |
705 /* Search occurrences */ |
586 /* Search occurrences */ |
706 scstr_t searchstr = str; |
587 cxstring searchstr = str; |
707 size_t found = 0; |
588 size_t found = 0; |
708 do { |
589 do { |
709 scstr_t match = scstrscstr(searchstr, pattern); |
590 cxstring match = cx_strstr(searchstr, pattern); |
710 if (match.length > 0) { |
591 if (match.length > 0) { |
711 /* Allocate next buffer in chain, if required */ |
592 /* Allocate next buffer in chain, if required */ |
712 if (curbuf->len == ibuflen) { |
593 if (curbuf->len == ibuflen) { |
713 struct scstrreplace_ibuf *nextbuf = |
594 struct cx_strreplace_ibuf *nextbuf = |
714 calloc(1, sizeof(struct scstrreplace_ibuf)); |
595 calloc(1, sizeof(struct cx_strreplace_ibuf)); |
715 if (!nextbuf) { |
596 if (!nextbuf) { |
716 scstrrepl_free_ibuf(firstbuf); |
597 cx_strrepl_free_ibuf(firstbuf); |
717 return sstrn(NULL, 0); |
598 return cx_mutstrn(NULL, 0); |
718 } |
599 } |
719 nextbuf->buf = calloc(ibuflen, sizeof(size_t)); |
600 nextbuf->buf = calloc(ibuflen, sizeof(size_t)); |
720 if (!nextbuf->buf) { |
601 if (!nextbuf->buf) { |
721 free(nextbuf); |
602 free(nextbuf); |
722 scstrrepl_free_ibuf(firstbuf); |
603 cx_strrepl_free_ibuf(firstbuf); |
723 return sstrn(NULL, 0); |
604 return cx_mutstrn(NULL, 0); |
724 } |
605 } |
725 curbuf->next = nextbuf; |
606 curbuf->next = nextbuf; |
726 curbuf = nextbuf; |
607 curbuf = nextbuf; |
727 } |
608 } |
728 |
609 |