ucx/string.c

changeset 152
62921b370c60
parent 124
80609f9675f1
child 157
0b33b9396851
equal deleted inserted replaced
151:11f3bb408051 152:62921b370c60
1 /* 1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 * 3 *
4 * Copyright 2015 Olaf Wintermann. All rights reserved. 4 * Copyright 2016 Olaf Wintermann. All rights reserved.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met: 7 * modification, are permitted provided that the following conditions are met:
8 * 8 *
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
27 */ 27 */
28 28
29 #include <stdlib.h> 29 #include <stdlib.h>
30 #include <string.h> 30 #include <string.h>
31 #include <stdarg.h> 31 #include <stdarg.h>
32 #include <stdint.h>
32 #include <ctype.h> 33 #include <ctype.h>
33 34
34 #include "string.h" 35 #include "string.h"
35 #include "allocator.h" 36 #include "allocator.h"
36 37
173 n.ptr = NULL; 174 n.ptr = NULL;
174 n.length = 0; 175 n.length = 0;
175 return n; 176 return n;
176 } 177 }
177 178
179 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
180 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
181
182 #define ptable_w(useheap, ptable, index, src) do {\
183 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
184 else ((size_t*)ptable)[index] = src;\
185 } while (0);
186
187 sstr_t sstrstr(sstr_t string, sstr_t match) {
188 if (match.length == 0) {
189 return string;
190 }
191
192 /* prepare default return value in case of no match */
193 sstr_t result = sstrn(NULL, 0);
194
195 /*
196 * IMPORTANT:
197 * our prefix table contains the prefix length PLUS ONE
198 * this is our decision, because we want to use the full range of size_t
199 * the original algorithm needs a (-1) at one single place
200 * and we want to avoid that
201 */
202
203 /* static prefix table */
204 static uint8_t s_prefix_table[256];
205
206 /* check pattern length and use appropriate prefix table */
207 /* if the pattern exceeds static prefix table, allocate on the heap */
208 register int useheap = match.length > 255;
209 register void* ptable = useheap ?
210 calloc(match.length+1, sizeof(size_t)): s_prefix_table;
211
212 /* keep counter in registers */
213 register size_t i, j;
214
215 /* fill prefix table */
216 i = 0; j = 0;
217 ptable_w(useheap, ptable, i, j);
218 while (i < match.length) {
219 while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
220 ptable_r(j, useheap, ptable, j-1);
221 }
222 i++; j++;
223 ptable_w(useheap, ptable, i, j);
224 }
225
226 /* search */
227 i = 0; j = 1;
228 while (i < string.length) {
229 while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
230 ptable_r(j, useheap, ptable, j-1);
231 }
232 i++; j++;
233 if (j-1 == match.length) {
234 size_t start = i - match.length;
235 result.ptr = string.ptr + start;
236 result.length = string.length - start;
237 break;
238 }
239 }
240
241 /* if prefix table was allocated on the heap, free it */
242 if (ptable != s_prefix_table) {
243 free(ptable);
244 }
245
246 return result;
247 }
248
249 #undef ptable_r
250 #undef ptable_w
251
178 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { 252 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
179 return sstrsplit_a(ucx_default_allocator(), s, d, n); 253 return sstrsplit_a(ucx_default_allocator(), s, d, n);
180 } 254 }
181 255
182 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) { 256 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
183 if (s.length == 0 || d.length == 0) { 257 if (s.length == 0 || d.length == 0) {
184 *n = -1; 258 *n = -1;
185 return NULL; 259 return NULL;
186 } 260 }
187 261
188 sstr_t* result; 262 /* special cases: delimiter is at least as large as the string */
263 if (d.length >= s.length) {
264 /* exact match */
265 if (sstrcmp(s, d) == 0) {
266 *n = 0;
267 return NULL;
268 } else /* no match possible */ {
269 *n = 1;
270 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
271 *result = sstrdup_a(allocator, s);
272 return result;
273 }
274 }
275
189 ssize_t nmax = *n; 276 ssize_t nmax = *n;
190 *n = 1; 277 size_t arrlen = 16;
191 278 sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t));
192 /* special case: exact match - no processing needed */ 279
193 if (sstrcmp(s, d) == 0) { 280 if (result) {
194 *n = 0; 281 sstr_t curpos = s;
195 return NULL; 282 ssize_t j = 1;
196 } 283 while (1) {
197 sstr_t sv = sstrdup(s); 284 sstr_t match;
198 if (sv.length == 0) { 285 /* optimize for one byte delimiters */
199 *n = -2; 286 if (d.length == 1) {
200 return NULL; 287 match = curpos;
201 } 288 for (size_t i = 0 ; i < curpos.length ; i++) {
202 289 if (curpos.ptr[i] == *(d.ptr)) {
203 for (size_t i = 0 ; i < s.length ; i++) { 290 match.ptr = curpos.ptr + i;
204 if (sv.ptr[i] == d.ptr[0]) { 291 break;
205 _Bool match = 1; 292 }
206 for (size_t j = 1 ; j < d.length ; j++) { 293 match.length--;
207 if (j+i < s.length) { 294 }
208 match &= (sv.ptr[i+j] == d.ptr[j]); 295 } else {
296 match = sstrstr(curpos, d);
297 }
298 if (match.length > 0) {
299 /* is this our last try? */
300 if (nmax == 0 || j < nmax) {
301 /* copy the current string to the array */
302 sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
303 result[j-1] = sstrdup_a(allocator, item);
304 size_t processed = item.length + d.length;
305 curpos.ptr += processed;
306 curpos.length -= processed;
307
308 /* allocate memory for the next string */
309 j++;
310 if (j > arrlen) {
311 arrlen *= 2;
312 sstr_t* reallocated = (sstr_t*) alrealloc(
313 allocator, result, arrlen*sizeof(sstr_t));
314 if (reallocated) {
315 result = reallocated;
316 } else {
317 for (ssize_t i = 0 ; i < j-1 ; i++) {
318 alfree(allocator, result[i].ptr);
319 }
320 alfree(allocator, result);
321 *n = -2;
322 return NULL;
323 }
324 }
209 } else { 325 } else {
210 match = 0; 326 /* nmax reached, copy the _full_ remaining string */
327 result[j-1] = sstrdup_a(allocator, curpos);
211 break; 328 break;
212 } 329 }
213 }
214 if (match) {
215 (*n)++;
216 for (size_t j = 0 ; j < d.length ; j++) {
217 sv.ptr[i+j] = 0;
218 }
219 i += d.length;
220 }
221 }
222 if ((*n) == nmax) break;
223 }
224 result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)*(*n));
225
226 if (result) {
227 char *pptr = sv.ptr;
228 for (ssize_t i = 0 ; i < *n ; i++) {
229 size_t l = strlen(pptr);
230 char* ptr = (char*) almalloc(allocator, l + 1);
231 if (ptr) {
232 memcpy(ptr, pptr, l);
233 ptr[l] = 0;
234
235 result[i] = sstrn(ptr, l);
236 pptr += l + d.length;
237 } else { 330 } else {
238 for (ssize_t j = i-1 ; j >= 0 ; j--) { 331 /* no more matches, copy last string */
239 alfree(allocator, result[j].ptr); 332 result[j-1] = sstrdup_a(allocator, curpos);
240 }
241 alfree(allocator, result);
242 *n = -2;
243 break; 333 break;
244 } 334 }
245 } 335 }
336 *n = j;
246 } else { 337 } else {
247 *n = -2; 338 *n = -2;
248 } 339 }
249
250 free(sv.ptr);
251 340
252 return result; 341 return result;
253 } 342 }
254 343
255 int sstrcmp(sstr_t s1, sstr_t s2) { 344 int sstrcmp(sstr_t s1, sstr_t s2) {

mercurial