ucx/string.c

changeset 314
8722a668fb2a
parent 255
bf19378aed58
child 335
c1bc13faadaa
equal deleted inserted replaced
313:d721250984d0 314:8722a668fb2a
27 */ 27 */
28 28
29 #include <stdlib.h> 29 #include <stdlib.h>
30 #include <string.h> 30 #include <string.h>
31 #include <stdarg.h> 31 #include <stdarg.h>
32 #include <stdint.h>
32 #include <ctype.h> 33 #include <ctype.h>
33 34
34 #include "string.h" 35 #include "string.h"
35 #include "allocator.h" 36 #include "allocator.h"
36 37
173 n.ptr = NULL; 174 n.ptr = NULL;
174 n.length = 0; 175 n.length = 0;
175 return n; 176 return n;
176 } 177 }
177 178
179 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
180 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
181
182 #define ptable_w(useheap, ptable, index, src) do {\
183 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
184 else ((size_t*)ptable)[index] = src;\
185 } while (0);
186
178 sstr_t sstrstr(sstr_t string, sstr_t match) { 187 sstr_t sstrstr(sstr_t string, sstr_t match) {
179 if (match.length == 0) { 188 if (match.length == 0) {
180 return string; 189 return string;
181 } 190 }
182 191
183 for (size_t i = 0 ; i < string.length ; i++) { 192 /* prepare default return value in case of no match */
184 sstr_t substr = sstrsubs(string, i); 193 sstr_t result = sstrn(NULL, 0);
185 if (sstrprefix(substr, match)) { 194
186 return substr; 195 /*
187 } 196 * IMPORTANT:
188 } 197 * our prefix table contains the prefix length PLUS ONE
189 198 * this is our decision, because we want to use the full range of size_t
190 sstr_t emptystr; 199 * the original algorithm needs a (-1) at one single place
191 emptystr.length = 0; 200 * and we want to avoid that
192 emptystr.ptr = NULL; 201 */
193 return emptystr; 202
194 } 203 /* static prefix table */
204 static uint8_t s_prefix_table[256];
205
206 /* check pattern length and use appropriate prefix table */
207 /* if the pattern exceeds static prefix table, allocate on the heap */
208 register int useheap = match.length > 255;
209 register void* ptable = useheap ?
210 calloc(match.length+1, sizeof(size_t)): s_prefix_table;
211
212 /* keep counter in registers */
213 register size_t i, j;
214
215 /* fill prefix table */
216 i = 0; j = 0;
217 ptable_w(useheap, ptable, i, j);
218 while (i < match.length) {
219 while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
220 ptable_r(j, useheap, ptable, j-1);
221 }
222 i++; j++;
223 ptable_w(useheap, ptable, i, j);
224 }
225
226 /* search */
227 i = 0; j = 1;
228 while (i < string.length) {
229 while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
230 ptable_r(j, useheap, ptable, j-1);
231 }
232 i++; j++;
233 if (j-1 == match.length) {
234 size_t start = i - match.length;
235 result.ptr = string.ptr + start;
236 result.length = string.length - start;
237 break;
238 }
239 }
240
241 /* if prefix table was allocated on the heap, free it */
242 if (ptable != s_prefix_table) {
243 free(ptable);
244 }
245
246 return result;
247 }
248
249 #undef ptable_r
250 #undef ptable_w
195 251
196 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { 252 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
197 return sstrsplit_a(ucx_default_allocator(), s, d, n); 253 return sstrsplit_a(ucx_default_allocator(), s, d, n);
198 } 254 }
199 255
200 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) { 256 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
201 if (s.length == 0 || d.length == 0) { 257 if (s.length == 0 || d.length == 0) {
202 *n = -1; 258 *n = -1;
203 return NULL; 259 return NULL;
204 } 260 }
205 261
206 sstr_t* result; 262 /* special cases: delimiter is at least as large as the string */
263 if (d.length >= s.length) {
264 /* exact match */
265 if (sstrcmp(s, d) == 0) {
266 *n = 0;
267 return NULL;
268 } else /* no match possible */ {
269 *n = 1;
270 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
271 *result = sstrdup_a(allocator, s);
272 return result;
273 }
274 }
275
207 ssize_t nmax = *n; 276 ssize_t nmax = *n;
208 *n = 1; 277 size_t arrlen = 16;
209 278 sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t));
210 /* special case: exact match - no processing needed */ 279
211 if (sstrcmp(s, d) == 0) { 280 if (result) {
212 *n = 0; 281 sstr_t curpos = s;
213 return NULL; 282 ssize_t j = 1;
214 } 283 while (1) {
215 sstr_t sv = sstrdup(s); 284 sstr_t match;
216 if (sv.length == 0) { 285 /* optimize for one byte delimiters */
217 *n = -2; 286 if (d.length == 1) {
218 return NULL; 287 match = curpos;
219 } 288 for (size_t i = 0 ; i < curpos.length ; i++) {
220 289 if (curpos.ptr[i] == *(d.ptr)) {
221 for (size_t i = 0 ; i < s.length ; i++) { 290 match.ptr = curpos.ptr + i;
222 sstr_t substr = sstrsubs(sv, i); 291 break;
223 if (sstrprefix(substr, d)) { 292 }
224 (*n)++; 293 match.length--;
225 for (size_t j = 0 ; j < d.length ; j++) { 294 }
226 sv.ptr[i+j] = 0; 295 } else {
296 match = sstrstr(curpos, d);
227 } 297 }
228 i += d.length - 1; // -1, because the loop will do a i++ 298 if (match.length > 0) {
229 } 299 /* is this our last try? */
230 if ((*n) == nmax) break; 300 if (nmax == 0 || j < nmax) {
231 } 301 /* copy the current string to the array */
232 result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)*(*n)); 302 sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
233 303 result[j-1] = sstrdup_a(allocator, item);
234 if (result) { 304 size_t processed = item.length + d.length;
235 char *pptr = sv.ptr; 305 curpos.ptr += processed;
236 for (ssize_t i = 0 ; i < *n ; i++) { 306 curpos.length -= processed;
237 size_t l = strlen(pptr); 307
238 char* ptr = (char*) almalloc(allocator, l + 1); 308 /* allocate memory for the next string */
239 if (ptr) { 309 j++;
240 memcpy(ptr, pptr, l); 310 if (j > arrlen) {
241 ptr[l] = 0; 311 arrlen *= 2;
242 312 sstr_t* reallocated = (sstr_t*) alrealloc(
243 result[i] = sstrn(ptr, l); 313 allocator, result, arrlen*sizeof(sstr_t));
244 pptr += l + d.length; 314 if (reallocated) {
315 result = reallocated;
316 } else {
317 for (ssize_t i = 0 ; i < j-1 ; i++) {
318 alfree(allocator, result[i].ptr);
319 }
320 alfree(allocator, result);
321 *n = -2;
322 return NULL;
323 }
324 }
325 } else {
326 /* nmax reached, copy the _full_ remaining string */
327 result[j-1] = sstrdup_a(allocator, curpos);
328 break;
329 }
245 } else { 330 } else {
246 for (ssize_t j = i-1 ; j >= 0 ; j--) { 331 /* no more matches, copy last string */
247 alfree(allocator, result[j].ptr); 332 result[j-1] = sstrdup_a(allocator, curpos);
248 }
249 alfree(allocator, result);
250 *n = -2;
251 break; 333 break;
252 } 334 }
253 } 335 }
336 *n = j;
254 } else { 337 } else {
255 *n = -2; 338 *n = -2;
256 } 339 }
257
258 free(sv.ptr);
259 340
260 return result; 341 return result;
261 } 342 }
262 343
263 int sstrcmp(sstr_t s1, sstr_t s2) { 344 int sstrcmp(sstr_t s1, sstr_t s2) {

mercurial