173 n.ptr = NULL; |
174 n.ptr = NULL; |
174 n.length = 0; |
175 n.length = 0; |
175 return n; |
176 return n; |
176 } |
177 } |
177 |
178 |
|
179 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ |
|
180 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) |
|
181 |
|
182 #define ptable_w(useheap, ptable, index, src) do {\ |
|
183 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ |
|
184 else ((size_t*)ptable)[index] = src;\ |
|
185 } while (0); |
|
186 |
|
187 sstr_t sstrstr(sstr_t string, sstr_t match) { |
|
188 if (match.length == 0) { |
|
189 return string; |
|
190 } |
|
191 |
|
192 /* prepare default return value in case of no match */ |
|
193 sstr_t result = sstrn(NULL, 0); |
|
194 |
|
195 /* |
|
196 * IMPORTANT: |
|
197 * our prefix table contains the prefix length PLUS ONE |
|
198 * this is our decision, because we want to use the full range of size_t |
|
199 * the original algorithm needs a (-1) at one single place |
|
200 * and we want to avoid that |
|
201 */ |
|
202 |
|
203 /* static prefix table */ |
|
204 static uint8_t s_prefix_table[256]; |
|
205 |
|
206 /* check pattern length and use appropriate prefix table */ |
|
207 /* if the pattern exceeds static prefix table, allocate on the heap */ |
|
208 register int useheap = match.length > 255; |
|
209 register void* ptable = useheap ? |
|
210 calloc(match.length+1, sizeof(size_t)): s_prefix_table; |
|
211 |
|
212 /* keep counter in registers */ |
|
213 register size_t i, j; |
|
214 |
|
215 /* fill prefix table */ |
|
216 i = 0; j = 0; |
|
217 ptable_w(useheap, ptable, i, j); |
|
218 while (i < match.length) { |
|
219 while (j >= 1 && match.ptr[j-1] != match.ptr[i]) { |
|
220 ptable_r(j, useheap, ptable, j-1); |
|
221 } |
|
222 i++; j++; |
|
223 ptable_w(useheap, ptable, i, j); |
|
224 } |
|
225 |
|
226 /* search */ |
|
227 i = 0; j = 1; |
|
228 while (i < string.length) { |
|
229 while (j >= 1 && string.ptr[i] != match.ptr[j-1]) { |
|
230 ptable_r(j, useheap, ptable, j-1); |
|
231 } |
|
232 i++; j++; |
|
233 if (j-1 == match.length) { |
|
234 size_t start = i - match.length; |
|
235 result.ptr = string.ptr + start; |
|
236 result.length = string.length - start; |
|
237 break; |
|
238 } |
|
239 } |
|
240 |
|
241 /* if prefix table was allocated on the heap, free it */ |
|
242 if (ptable != s_prefix_table) { |
|
243 free(ptable); |
|
244 } |
|
245 |
|
246 return result; |
|
247 } |
|
248 |
|
249 #undef ptable_r |
|
250 #undef ptable_w |
|
251 |
178 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { |
252 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { |
179 return sstrsplit_a(ucx_default_allocator(), s, d, n); |
253 return sstrsplit_a(ucx_default_allocator(), s, d, n); |
180 } |
254 } |
181 |
255 |
182 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) { |
256 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) { |
183 if (s.length == 0 || d.length == 0) { |
257 if (s.length == 0 || d.length == 0) { |
184 *n = -1; |
258 *n = -1; |
185 return NULL; |
259 return NULL; |
186 } |
260 } |
187 |
261 |
188 sstr_t* result; |
262 /* special cases: delimiter is at least as large as the string */ |
|
263 if (d.length >= s.length) { |
|
264 /* exact match */ |
|
265 if (sstrcmp(s, d) == 0) { |
|
266 *n = 0; |
|
267 return NULL; |
|
268 } else /* no match possible */ { |
|
269 *n = 1; |
|
270 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); |
|
271 *result = sstrdup_a(allocator, s); |
|
272 return result; |
|
273 } |
|
274 } |
|
275 |
189 ssize_t nmax = *n; |
276 ssize_t nmax = *n; |
190 *n = 1; |
277 size_t arrlen = 16; |
191 |
278 sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t)); |
192 /* special case: exact match - no processing needed */ |
279 |
193 if (sstrcmp(s, d) == 0) { |
280 if (result) { |
194 *n = 0; |
281 sstr_t curpos = s; |
195 return NULL; |
282 ssize_t j = 1; |
196 } |
283 while (1) { |
197 sstr_t sv = sstrdup(s); |
284 sstr_t match; |
198 if (sv.length == 0) { |
285 /* optimize for one byte delimiters */ |
199 *n = -2; |
286 if (d.length == 1) { |
200 return NULL; |
287 match = curpos; |
201 } |
288 for (size_t i = 0 ; i < curpos.length ; i++) { |
202 |
289 if (curpos.ptr[i] == *(d.ptr)) { |
203 for (size_t i = 0 ; i < s.length ; i++) { |
290 match.ptr = curpos.ptr + i; |
204 if (sv.ptr[i] == d.ptr[0]) { |
291 break; |
205 _Bool match = 1; |
292 } |
206 for (size_t j = 1 ; j < d.length ; j++) { |
293 match.length--; |
207 if (j+i < s.length) { |
294 } |
208 match &= (sv.ptr[i+j] == d.ptr[j]); |
295 } else { |
|
296 match = sstrstr(curpos, d); |
|
297 } |
|
298 if (match.length > 0) { |
|
299 /* is this our last try? */ |
|
300 if (nmax == 0 || j < nmax) { |
|
301 /* copy the current string to the array */ |
|
302 sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr); |
|
303 result[j-1] = sstrdup_a(allocator, item); |
|
304 size_t processed = item.length + d.length; |
|
305 curpos.ptr += processed; |
|
306 curpos.length -= processed; |
|
307 |
|
308 /* allocate memory for the next string */ |
|
309 j++; |
|
310 if (j > arrlen) { |
|
311 arrlen *= 2; |
|
312 sstr_t* reallocated = (sstr_t*) alrealloc( |
|
313 allocator, result, arrlen*sizeof(sstr_t)); |
|
314 if (reallocated) { |
|
315 result = reallocated; |
|
316 } else { |
|
317 for (ssize_t i = 0 ; i < j-1 ; i++) { |
|
318 alfree(allocator, result[i].ptr); |
|
319 } |
|
320 alfree(allocator, result); |
|
321 *n = -2; |
|
322 return NULL; |
|
323 } |
|
324 } |
209 } else { |
325 } else { |
210 match = 0; |
326 /* nmax reached, copy the _full_ remaining string */ |
|
327 result[j-1] = sstrdup_a(allocator, curpos); |
211 break; |
328 break; |
212 } |
329 } |
213 } |
|
214 if (match) { |
|
215 (*n)++; |
|
216 for (size_t j = 0 ; j < d.length ; j++) { |
|
217 sv.ptr[i+j] = 0; |
|
218 } |
|
219 i += d.length; |
|
220 } |
|
221 } |
|
222 if ((*n) == nmax) break; |
|
223 } |
|
224 result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)*(*n)); |
|
225 |
|
226 if (result) { |
|
227 char *pptr = sv.ptr; |
|
228 for (ssize_t i = 0 ; i < *n ; i++) { |
|
229 size_t l = strlen(pptr); |
|
230 char* ptr = (char*) almalloc(allocator, l + 1); |
|
231 if (ptr) { |
|
232 memcpy(ptr, pptr, l); |
|
233 ptr[l] = 0; |
|
234 |
|
235 result[i] = sstrn(ptr, l); |
|
236 pptr += l + d.length; |
|
237 } else { |
330 } else { |
238 for (ssize_t j = i-1 ; j >= 0 ; j--) { |
331 /* no more matches, copy last string */ |
239 alfree(allocator, result[j].ptr); |
332 result[j-1] = sstrdup_a(allocator, curpos); |
240 } |
|
241 alfree(allocator, result); |
|
242 *n = -2; |
|
243 break; |
333 break; |
244 } |
334 } |
245 } |
335 } |
|
336 *n = j; |
246 } else { |
337 } else { |
247 *n = -2; |
338 *n = -2; |
248 } |
339 } |
249 |
|
250 free(sv.ptr); |
|
251 |
340 |
252 return result; |
341 return result; |
253 } |
342 } |
254 |
343 |
255 int sstrcmp(sstr_t s1, sstr_t s2) { |
344 int sstrcmp(sstr_t s1, sstr_t s2) { |