173 n.ptr = NULL; |
174 n.ptr = NULL; |
174 n.length = 0; |
175 n.length = 0; |
175 return n; |
176 return n; |
176 } |
177 } |
177 |
178 |
|
179 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ |
|
180 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) |
|
181 |
|
182 #define ptable_w(useheap, ptable, index, src) do {\ |
|
183 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ |
|
184 else ((size_t*)ptable)[index] = src;\ |
|
185 } while (0); |
|
186 |
178 sstr_t sstrstr(sstr_t string, sstr_t match) { |
187 sstr_t sstrstr(sstr_t string, sstr_t match) { |
179 if (match.length == 0) { |
188 if (match.length == 0) { |
180 return string; |
189 return string; |
181 } |
190 } |
182 |
191 |
183 for (size_t i = 0 ; i < string.length ; i++) { |
192 /* prepare default return value in case of no match */ |
184 sstr_t substr = sstrsubs(string, i); |
193 sstr_t result = sstrn(NULL, 0); |
185 if (sstrprefix(substr, match)) { |
194 |
186 return substr; |
195 /* |
187 } |
196 * IMPORTANT: |
188 } |
197 * our prefix table contains the prefix length PLUS ONE |
189 |
198 * this is our decision, because we want to use the full range of size_t |
190 sstr_t emptystr; |
199 * the original algorithm needs a (-1) at one single place |
191 emptystr.length = 0; |
200 * and we want to avoid that |
192 emptystr.ptr = NULL; |
201 */ |
193 return emptystr; |
202 |
194 } |
203 /* static prefix table */ |
|
204 static uint8_t s_prefix_table[256]; |
|
205 |
|
206 /* check pattern length and use appropriate prefix table */ |
|
207 /* if the pattern exceeds static prefix table, allocate on the heap */ |
|
208 register int useheap = match.length > 255; |
|
209 register void* ptable = useheap ? |
|
210 calloc(match.length+1, sizeof(size_t)): s_prefix_table; |
|
211 |
|
212 /* keep counter in registers */ |
|
213 register size_t i, j; |
|
214 |
|
215 /* fill prefix table */ |
|
216 i = 0; j = 0; |
|
217 ptable_w(useheap, ptable, i, j); |
|
218 while (i < match.length) { |
|
219 while (j >= 1 && match.ptr[j-1] != match.ptr[i]) { |
|
220 ptable_r(j, useheap, ptable, j-1); |
|
221 } |
|
222 i++; j++; |
|
223 ptable_w(useheap, ptable, i, j); |
|
224 } |
|
225 |
|
226 /* search */ |
|
227 i = 0; j = 1; |
|
228 while (i < string.length) { |
|
229 while (j >= 1 && string.ptr[i] != match.ptr[j-1]) { |
|
230 ptable_r(j, useheap, ptable, j-1); |
|
231 } |
|
232 i++; j++; |
|
233 if (j-1 == match.length) { |
|
234 size_t start = i - match.length; |
|
235 result.ptr = string.ptr + start; |
|
236 result.length = string.length - start; |
|
237 break; |
|
238 } |
|
239 } |
|
240 |
|
241 /* if prefix table was allocated on the heap, free it */ |
|
242 if (ptable != s_prefix_table) { |
|
243 free(ptable); |
|
244 } |
|
245 |
|
246 return result; |
|
247 } |
|
248 |
|
249 #undef ptable_r |
|
250 #undef ptable_w |
195 |
251 |
196 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { |
252 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { |
197 return sstrsplit_a(ucx_default_allocator(), s, d, n); |
253 return sstrsplit_a(ucx_default_allocator(), s, d, n); |
198 } |
254 } |
199 |
255 |
200 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) { |
256 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) { |
201 if (s.length == 0 || d.length == 0) { |
257 if (s.length == 0 || d.length == 0) { |
202 *n = -1; |
258 *n = -1; |
203 return NULL; |
259 return NULL; |
204 } |
260 } |
205 |
261 |
206 sstr_t* result; |
262 /* special cases: delimiter is at least as large as the string */ |
|
263 if (d.length >= s.length) { |
|
264 /* exact match */ |
|
265 if (sstrcmp(s, d) == 0) { |
|
266 *n = 0; |
|
267 return NULL; |
|
268 } else /* no match possible */ { |
|
269 *n = 1; |
|
270 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); |
|
271 *result = sstrdup_a(allocator, s); |
|
272 return result; |
|
273 } |
|
274 } |
|
275 |
207 ssize_t nmax = *n; |
276 ssize_t nmax = *n; |
208 *n = 1; |
277 size_t arrlen = 16; |
209 |
278 sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t)); |
210 /* special case: exact match - no processing needed */ |
279 |
211 if (sstrcmp(s, d) == 0) { |
280 if (result) { |
212 *n = 0; |
281 sstr_t curpos = s; |
213 return NULL; |
282 ssize_t j = 1; |
214 } |
283 while (1) { |
215 sstr_t sv = sstrdup(s); |
284 sstr_t match; |
216 if (sv.length == 0) { |
285 /* optimize for one byte delimiters */ |
217 *n = -2; |
286 if (d.length == 1) { |
218 return NULL; |
287 match = curpos; |
219 } |
288 for (size_t i = 0 ; i < curpos.length ; i++) { |
220 |
289 if (curpos.ptr[i] == *(d.ptr)) { |
221 for (size_t i = 0 ; i < s.length ; i++) { |
290 match.ptr = curpos.ptr + i; |
222 sstr_t substr = sstrsubs(sv, i); |
291 break; |
223 if (sstrprefix(substr, d)) { |
292 } |
224 (*n)++; |
293 match.length--; |
225 for (size_t j = 0 ; j < d.length ; j++) { |
294 } |
226 sv.ptr[i+j] = 0; |
295 } else { |
|
296 match = sstrstr(curpos, d); |
227 } |
297 } |
228 i += d.length - 1; // -1, because the loop will do a i++ |
298 if (match.length > 0) { |
229 } |
299 /* is this our last try? */ |
230 if ((*n) == nmax) break; |
300 if (nmax == 0 || j < nmax) { |
231 } |
301 /* copy the current string to the array */ |
232 result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)*(*n)); |
302 sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr); |
233 |
303 result[j-1] = sstrdup_a(allocator, item); |
234 if (result) { |
304 size_t processed = item.length + d.length; |
235 char *pptr = sv.ptr; |
305 curpos.ptr += processed; |
236 for (ssize_t i = 0 ; i < *n ; i++) { |
306 curpos.length -= processed; |
237 size_t l = strlen(pptr); |
307 |
238 char* ptr = (char*) almalloc(allocator, l + 1); |
308 /* allocate memory for the next string */ |
239 if (ptr) { |
309 j++; |
240 memcpy(ptr, pptr, l); |
310 if (j > arrlen) { |
241 ptr[l] = 0; |
311 arrlen *= 2; |
242 |
312 sstr_t* reallocated = (sstr_t*) alrealloc( |
243 result[i] = sstrn(ptr, l); |
313 allocator, result, arrlen*sizeof(sstr_t)); |
244 pptr += l + d.length; |
314 if (reallocated) { |
|
315 result = reallocated; |
|
316 } else { |
|
317 for (ssize_t i = 0 ; i < j-1 ; i++) { |
|
318 alfree(allocator, result[i].ptr); |
|
319 } |
|
320 alfree(allocator, result); |
|
321 *n = -2; |
|
322 return NULL; |
|
323 } |
|
324 } |
|
325 } else { |
|
326 /* nmax reached, copy the _full_ remaining string */ |
|
327 result[j-1] = sstrdup_a(allocator, curpos); |
|
328 break; |
|
329 } |
245 } else { |
330 } else { |
246 for (ssize_t j = i-1 ; j >= 0 ; j--) { |
331 /* no more matches, copy last string */ |
247 alfree(allocator, result[j].ptr); |
332 result[j-1] = sstrdup_a(allocator, curpos); |
248 } |
|
249 alfree(allocator, result); |
|
250 *n = -2; |
|
251 break; |
333 break; |
252 } |
334 } |
253 } |
335 } |
|
336 *n = j; |
254 } else { |
337 } else { |
255 *n = -2; |
338 *n = -2; |
256 } |
339 } |
257 |
|
258 free(sv.ptr); |
|
259 |
340 |
260 return result; |
341 return result; |
261 } |
342 } |
262 |
343 |
263 int sstrcmp(sstr_t s1, sstr_t s2) { |
344 int sstrcmp(sstr_t s1, sstr_t s2) { |