# HG changeset patch # User Mike Becker # Date 1506105753 -7200 # Node ID 8722a668fb2a35a78558a0f538bd77be2bba9e55 # Parent d721250984d07f9d7ab98ce55df43aed57ab5317 updates ucx version (most importantly this adds the new sstrstr implementation) diff -r d721250984d0 -r 8722a668fb2a ucx/avl.c --- a/ucx/avl.c Fri Sep 22 20:19:00 2017 +0200 +++ b/ucx/avl.c Fri Sep 22 20:42:33 2017 +0200 @@ -26,6 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include + #include "avl.h" #define ptrcast(ptr) ((void*)(ptr)) @@ -149,6 +151,46 @@ return n ? n->value : NULL; } +UcxAVLNode *ucx_avl_find_node(UcxAVLTree *tree, intptr_t key, + distance_func dfnc, int mode) { + UcxAVLNode *n = tree->root; + UcxAVLNode *closest = NULL; + + intmax_t cmpresult; + intmax_t closest_dist; + closest_dist = mode == UCX_AVL_FIND_LOWER_BOUNDED ? INTMAX_MIN : INTMAX_MAX; + + while (n && (cmpresult = dfnc( + ptrcast(key), ptrcast(n->key), tree->userdata))) { + if (mode == UCX_AVL_FIND_CLOSEST) { + intmax_t dist = cmpresult; + if (dist < 0) dist *= -1; + if (dist < closest_dist) { + closest_dist = dist; + closest = n; + } + } else if (mode == UCX_AVL_FIND_LOWER_BOUNDED && cmpresult <= 0) { + if (cmpresult > closest_dist) { + closest_dist = cmpresult; + closest = n; + } + } else if (mode == UCX_AVL_FIND_UPPER_BOUNDED && cmpresult >= 0) { + if (cmpresult < closest_dist) { + closest_dist = cmpresult; + closest = n; + } + } + n = cmpresult > 0 ? n->right : n->left; + } + return n ? n : closest; +} + +void *ucx_avl_find(UcxAVLTree *tree, intptr_t key, + distance_func dfnc, int mode) { + UcxAVLNode *n = ucx_avl_find_node(tree, key, dfnc, mode); + return n ? n->value : NULL; +} + int ucx_avl_put(UcxAVLTree *tree, intptr_t key, void *value) { return ucx_avl_put_s(tree, key, value, NULL); } @@ -272,3 +314,43 @@ size_t ucx_avl_count(UcxAVLTree *tree) { return ucx_avl_countn(tree->root); } + +UcxAVLNode* ucx_avl_pred(UcxAVLNode* node) { + if (node->left) { + UcxAVLNode* n = node->left; + while (n->right) { + n = n->right; + } + return n; + } else { + UcxAVLNode* n = node; + while (n->parent) { + if (n->parent->right == n) { + return n->parent; + } else { + n = n->parent; + } + } + return NULL; + } +} + +UcxAVLNode* ucx_avl_succ(UcxAVLNode* node) { + if (node->right) { + UcxAVLNode* n = node->right; + while (n->left) { + n = n->left; + } + return n; + } else { + UcxAVLNode* n = node; + while (n->parent) { + if (n->parent->left == n) { + return n->parent; + } else { + n = n->parent; + } + } + return NULL; + } +} diff -r d721250984d0 -r 8722a668fb2a ucx/avl.h --- a/ucx/avl.h Fri Sep 22 20:19:00 2017 +0200 +++ b/ucx/avl.h Fri Sep 22 20:42:33 2017 +0200 @@ -164,6 +164,68 @@ void *ucx_avl_get(UcxAVLTree *tree, intptr_t key); /** + * A mode for #ucx_avl_find_node() with the same behavior as + * #ucx_avl_get_node(). + */ +#define UCX_AVL_FIND_EXACT 0 +/** + * A mode for #ucx_avl_find_node() finding the node whose key is at least + * as large as the specified key. + */ +#define UCX_AVL_FIND_LOWER_BOUNDED 1 +/** + * A mode for #ucx_avl_find_node() finding the node whose key is at most + * as large as the specified key. + */ +#define UCX_AVL_FIND_UPPER_BOUNDED 2 +/** + * A mode for #ucx_avl_find_node() finding the node with a key that is as close + * to the specified key as possible. If the key is present, the behavior is + * like #ucx_avl_get_node(). This mode only returns NULL on + * empty trees. + */ +#define UCX_AVL_FIND_CLOSEST 3 + +/** + * Finds a node within the tree. The following modes are supported: + * + * + * The distance function provided MUST agree with the compare function of + * the AVL tree. + * + * @param tree the UcxAVLTree + * @param key the key + * @param dfnc the distance function + * @param mode the find mode + * @return the node (or NULL, if no node can be found) + */ +UcxAVLNode *ucx_avl_find_node(UcxAVLTree *tree, intptr_t key, + distance_func dfnc, int mode); + +/** + * Finds a value within the tree. + * See #ucx_avl_find_node() for details. + * + * @param tree the UcxAVLTree + * @param key the key + * @param dfnc the distance function + * @param mode the find mode + * @return the value (or NULL, if no value can be found) + */ +void *ucx_avl_find(UcxAVLTree *tree, intptr_t key, + distance_func dfnc, int mode); + +/** * Puts a key/value pair into the tree. * * Attention: use this function only, if a possible old value does not need @@ -196,7 +258,7 @@ * * Note: the specified node is logically removed. The tree implementation * decides which memory area is freed. In most cases the here provided node - * is freed, so it's further use is generally undefined. + * is freed, so its further use is generally undefined. * * @param tree the UcxAVLTree * @param node the node to remove @@ -241,6 +303,22 @@ */ size_t ucx_avl_count(UcxAVLTree *tree); +/** + * Finds the in-order predecessor of the given node. + * @param node an AVL node + * @return the in-order predecessor of the given node, or NULL if + * the given node is the in-order minimum + */ +UcxAVLNode* ucx_avl_pred(UcxAVLNode* node); + +/** + * Finds the in-order successor of the given node. + * @param node an AVL node + * @return the in-order successor of the given node, or NULL if + * the given node is the in-order maximum + */ +UcxAVLNode* ucx_avl_succ(UcxAVLNode* node); + #ifdef __cplusplus } #endif diff -r d721250984d0 -r 8722a668fb2a ucx/mempool.c --- a/ucx/mempool.c Fri Sep 22 20:19:00 2017 +0200 +++ b/ucx/mempool.c Fri Sep 22 20:42:33 2017 +0200 @@ -93,25 +93,30 @@ } int ucx_mempool_chcap(UcxMempool *pool, size_t newcap) { + if (newcap < pool->ndata) { + return 1; + } + void **data = (void**) realloc(pool->data, newcap*sizeof(void*)); if (data) { pool->data = data; pool->size = newcap; - return EXIT_SUCCESS; + return 0; } else { - return EXIT_FAILURE; + return 1; } } void *ucx_mempool_malloc(UcxMempool *pool, size_t n) { if (pool->ndata >= pool->size) { - // The hard coded 16 is documented for this function and ucx_mempool_new - if (ucx_mempool_chcap(pool, pool->size + 16) == EXIT_FAILURE) { + size_t newcap = pool->size*2; + if (newcap < pool->size || ucx_mempool_chcap(pool, newcap)) { return NULL; } } - ucx_memchunk *mem = (ucx_memchunk*)malloc(sizeof(ucx_destructor) + n); + void *p = malloc(sizeof(ucx_destructor) + n); + ucx_memchunk *mem = (ucx_memchunk*)p; if (!mem) { return NULL; } @@ -147,7 +152,7 @@ } fprintf(stderr, "FATAL: 0x%08" PRIxPTR" not in mpool 0x%08" PRIxPTR"\n", (intptr_t)ptr, (intptr_t)pool); - exit(EXIT_FAILURE); + abort(); } else { return newm + sizeof(ucx_destructor); } @@ -172,7 +177,7 @@ } fprintf(stderr, "FATAL: 0x%08" PRIxPTR" not in mpool 0x%08" PRIxPTR"\n", (intptr_t)ptr, (intptr_t)pool); - exit(EXIT_FAILURE); + abort(); } void ucx_mempool_destroy(UcxMempool *pool) { diff -r d721250984d0 -r 8722a668fb2a ucx/mempool.h --- a/ucx/mempool.h Fri Sep 22 20:19:00 2017 +0200 +++ b/ucx/mempool.h Fri Sep 22 20:42:33 2017 +0200 @@ -70,39 +70,29 @@ /** * Creates a memory pool with the specified initial size. * - * As the created memory pool automatically grows in size by 16 elements, when + * As the created memory pool automatically grows in size by factor two when * trying to allocate memory on a full pool, it is recommended that you use - * a multiple of 16 for the initial size. + * a power of two for the initial size. * - * @param n initial pool size (should be a multiple of 16) + * @param n initial pool size (should be a power of two, e.g. 16) * @return a pointer to the new memory pool + * @see ucx_mempool_new_default() */ UcxMempool *ucx_mempool_new(size_t n); /** * Resizes a memory pool. * + * This function will fail if the new capacity is not sufficient for the + * present data. + * * @param pool the pool to resize * @param newcap the new capacity - * @return EXIT_SUCCESS on success or - * EXIT_FAILURE on failure + * @return zero on success or non-zero on failure */ int ucx_mempool_chcap(UcxMempool *pool, size_t newcap); /** - * Changes the pool size to the next smallest multiple of 16. - * - * You may use this macro, to reduce the pool size after freeing - * many pooled memory items. - * - * @param pool the pool to clamp - * @return EXIT_SUCCESS on success or - * EXIT_FAILURE on failure - */ -#define ucx_mempool_clamp(pool) ucx_mempool_chcap(pool, \ - (pool->ndata & ~0xF)+0x10) - -/** * Allocates pooled memory. * * @param pool the memory pool @@ -145,7 +135,7 @@ * is called. * * If you specify memory, that is not pooled by the specified memory pool, the - * behavior is undefined. + * program will terminate with a call to abort(). * * @param pool the memory pool * @param ptr a pointer to the memory that shall be freed diff -r d721250984d0 -r 8722a668fb2a ucx/string.c --- a/ucx/string.c Fri Sep 22 20:19:00 2017 +0200 +++ b/ucx/string.c Fri Sep 22 20:42:33 2017 +0200 @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "string.h" @@ -175,24 +176,79 @@ return n; } +#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ + ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) + +#define ptable_w(useheap, ptable, index, src) do {\ + if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ + else ((size_t*)ptable)[index] = src;\ + } while (0); + sstr_t sstrstr(sstr_t string, sstr_t match) { if (match.length == 0) { return string; } - for (size_t i = 0 ; i < string.length ; i++) { - sstr_t substr = sstrsubs(string, i); - if (sstrprefix(substr, match)) { - return substr; + /* prepare default return value in case of no match */ + sstr_t result = sstrn(NULL, 0); + + /* + * IMPORTANT: + * our prefix table contains the prefix length PLUS ONE + * this is our decision, because we want to use the full range of size_t + * the original algorithm needs a (-1) at one single place + * and we want to avoid that + */ + + /* static prefix table */ + static uint8_t s_prefix_table[256]; + + /* check pattern length and use appropriate prefix table */ + /* if the pattern exceeds static prefix table, allocate on the heap */ + register int useheap = match.length > 255; + register void* ptable = useheap ? + calloc(match.length+1, sizeof(size_t)): s_prefix_table; + + /* keep counter in registers */ + register size_t i, j; + + /* fill prefix table */ + i = 0; j = 0; + ptable_w(useheap, ptable, i, j); + while (i < match.length) { + while (j >= 1 && match.ptr[j-1] != match.ptr[i]) { + ptable_r(j, useheap, ptable, j-1); + } + i++; j++; + ptable_w(useheap, ptable, i, j); + } + + /* search */ + i = 0; j = 1; + while (i < string.length) { + while (j >= 1 && string.ptr[i] != match.ptr[j-1]) { + ptable_r(j, useheap, ptable, j-1); + } + i++; j++; + if (j-1 == match.length) { + size_t start = i - match.length; + result.ptr = string.ptr + start; + result.length = string.length - start; + break; } } + + /* if prefix table was allocated on the heap, free it */ + if (ptable != s_prefix_table) { + free(ptable); + } - sstr_t emptystr; - emptystr.length = 0; - emptystr.ptr = NULL; - return emptystr; + return result; } +#undef ptable_r +#undef ptable_w + sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { return sstrsplit_a(ucx_default_allocator(), s, d, n); } @@ -202,60 +258,85 @@ *n = -1; return NULL; } - - sstr_t* result; - ssize_t nmax = *n; - *n = 1; - - /* special case: exact match - no processing needed */ - if (sstrcmp(s, d) == 0) { - *n = 0; - return NULL; + + /* special cases: delimiter is at least as large as the string */ + if (d.length >= s.length) { + /* exact match */ + if (sstrcmp(s, d) == 0) { + *n = 0; + return NULL; + } else /* no match possible */ { + *n = 1; + sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); + *result = sstrdup_a(allocator, s); + return result; + } } - sstr_t sv = sstrdup(s); - if (sv.length == 0) { - *n = -2; - return NULL; - } - - for (size_t i = 0 ; i < s.length ; i++) { - sstr_t substr = sstrsubs(sv, i); - if (sstrprefix(substr, d)) { - (*n)++; - for (size_t j = 0 ; j < d.length ; j++) { - sv.ptr[i+j] = 0; - } - i += d.length - 1; // -1, because the loop will do a i++ - } - if ((*n) == nmax) break; - } - result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)*(*n)); + + ssize_t nmax = *n; + size_t arrlen = 16; + sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t)); if (result) { - char *pptr = sv.ptr; - for (ssize_t i = 0 ; i < *n ; i++) { - size_t l = strlen(pptr); - char* ptr = (char*) almalloc(allocator, l + 1); - if (ptr) { - memcpy(ptr, pptr, l); - ptr[l] = 0; + sstr_t curpos = s; + ssize_t j = 1; + while (1) { + sstr_t match; + /* optimize for one byte delimiters */ + if (d.length == 1) { + match = curpos; + for (size_t i = 0 ; i < curpos.length ; i++) { + if (curpos.ptr[i] == *(d.ptr)) { + match.ptr = curpos.ptr + i; + break; + } + match.length--; + } + } else { + match = sstrstr(curpos, d); + } + if (match.length > 0) { + /* is this our last try? */ + if (nmax == 0 || j < nmax) { + /* copy the current string to the array */ + sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr); + result[j-1] = sstrdup_a(allocator, item); + size_t processed = item.length + d.length; + curpos.ptr += processed; + curpos.length -= processed; - result[i] = sstrn(ptr, l); - pptr += l + d.length; + /* allocate memory for the next string */ + j++; + if (j > arrlen) { + arrlen *= 2; + sstr_t* reallocated = (sstr_t*) alrealloc( + allocator, result, arrlen*sizeof(sstr_t)); + if (reallocated) { + result = reallocated; + } else { + for (ssize_t i = 0 ; i < j-1 ; i++) { + alfree(allocator, result[i].ptr); + } + alfree(allocator, result); + *n = -2; + return NULL; + } + } + } else { + /* nmax reached, copy the _full_ remaining string */ + result[j-1] = sstrdup_a(allocator, curpos); + break; + } } else { - for (ssize_t j = i-1 ; j >= 0 ; j--) { - alfree(allocator, result[j].ptr); - } - alfree(allocator, result); - *n = -2; + /* no more matches, copy last string */ + result[j-1] = sstrdup_a(allocator, curpos); break; } } + *n = j; } else { *n = -2; } - - free(sv.ptr); return result; } diff -r d721250984d0 -r 8722a668fb2a ucx/string.h --- a/ucx/string.h Fri Sep 22 20:19:00 2017 +0200 +++ b/ucx/string.h Fri Sep 22 20:42:33 2017 +0200 @@ -260,6 +260,9 @@ * * If the string ends with the delimiter and the maximum list size is not * exceeded, the last array item will be an empty string. + * In case the list size would be exceeded, the last array item will be the + * remaining string after the last split, including the terminating + * delimiter. * * Attention: The array pointer AND all sstr_t.ptr of the array * items must be manually passed to free(). Use sstrsplit_a() with diff -r d721250984d0 -r 8722a668fb2a ucx/test.c --- a/ucx/test.c Fri Sep 22 20:19:00 2017 +0200 +++ b/ucx/test.c Fri Sep 22 20:42:33 2017 +0200 @@ -86,6 +86,6 @@ elem->test(suite, output); } fwrite("\nAll test completed.\n", 1, 21, output); - fprintf(output, " Total: %d\n Success: %d\n Failure: %d\n", + fprintf(output, " Total: %u\n Success: %u\n Failure: %u\n", suite->success+suite->failure, suite->success, suite->failure); } diff -r d721250984d0 -r 8722a668fb2a ucx/ucx.h --- a/ucx/ucx.h Fri Sep 22 20:19:00 2017 +0200 +++ b/ucx/ucx.h Fri Sep 22 20:42:33 2017 +0200 @@ -40,9 +40,10 @@ #define UCX_VERSION_MAJOR 0 /** Minor UCX version as integer constant. */ -#define UCX_VERSION_MINOR 11 +#define UCX_VERSION_MINOR 12 #include +#include #ifdef _WIN32 #if !(defined __ssize_t_defined || defined _SSIZE_T_) @@ -86,7 +87,16 @@ * and 0 if both arguments are equal. If the third argument is * NULL, it shall be ignored. */ -typedef int(*cmp_func)(void*,void*,void*); +typedef int(*cmp_func)(const void*,const void*,void*); + +/** + * Function pointer to a distance function. + * + * The distance function shall take three arguments: the two values for which + * the distance shall be computed and optional additional data. + * The function shall then return the signed distance as integer value. + */ +typedef intmax_t(*distance_func)(const void*,const void*,void*); /** * Function pointer to a copy function. @@ -99,7 +109,7 @@ * passed to free() depends on the implementation of the * respective copy_func. */ -typedef void*(*copy_func)(void*,void*); +typedef void*(*copy_func)(const void*,void*); /** * Function pointer to a write function. diff -r d721250984d0 -r 8722a668fb2a ucx/utils.c --- a/ucx/utils.c Fri Sep 22 20:19:00 2017 +0200 +++ b/ucx/utils.c Fri Sep 22 20:42:33 2017 +0200 @@ -33,15 +33,15 @@ #include /* COPY FUCNTIONS */ -void* ucx_strcpy(void* s, void* data) { - char *str = (char*) s; +void* ucx_strcpy(const void* s, void* data) { + const char *str = (const char*) s; size_t n = 1+strlen(str); char *cpy = (char*) malloc(n); memcpy(cpy, str, n); return cpy; } -void* ucx_memcpy(void* m, void* n) { +void* ucx_memcpy(const void* m, void* n) { size_t k = *((size_t*)n); void *cpy = malloc(k); memcpy(cpy, m, k); @@ -87,17 +87,17 @@ /* COMPARE FUNCTIONS */ -int ucx_strcmp(void *s1, void *s2, void *data) { - return strcmp((char*)s1, (char*)s2); +int ucx_strcmp(const void *s1, const void *s2, void *data) { + return strcmp((const char*)s1, (const char*)s2); } -int ucx_strncmp(void *s1, void *s2, void *n) { - return strncmp((char*)s1, (char*)s2, *((size_t*) n)); +int ucx_strncmp(const void *s1, const void *s2, void *n) { + return strncmp((const char*)s1, (const char*)s2, *((size_t*) n)); } -int ucx_intcmp(void *i1, void *i2, void *data) { - int a = *((int*) i1); - int b = *((int*) i2); +int ucx_intcmp(const void *i1, const void *i2, void *data) { + int a = *((const int*) i1); + int b = *((const int*) i2); if (a == b) { return 0; } else { @@ -105,9 +105,9 @@ } } -int ucx_floatcmp(void *f1, void *f2, void *epsilon) { - float a = *((float*) f1); - float b = *((float*) f2); +int ucx_floatcmp(const void *f1, const void *f2, void *epsilon) { + float a = *((const float*) f1); + float b = *((const float*) f2); float e = !epsilon ? 1e-6f : *((float*)epsilon); if (fabsf(a - b) < e) { return 0; @@ -116,9 +116,9 @@ } } -int ucx_doublecmp(void *d1, void *d2, void *epsilon) { - double a = *((float*) d1); - double b = *((float*) d2); +int ucx_doublecmp(const void *d1, const void *d2, void *epsilon) { + double a = *((const double*) d1); + double b = *((const double*) d2); double e = !epsilon ? 1e-14 : *((double*)epsilon); if (fabs(a - b) < e) { return 0; @@ -127,9 +127,9 @@ } } -int ucx_ptrcmp(void *ptr1, void *ptr2, void *data) { - intptr_t p1 = (intptr_t) ptr1; - intptr_t p2 = (intptr_t) ptr2; +int ucx_ptrcmp(const void *ptr1, const void *ptr2, void *data) { + const intptr_t p1 = (const intptr_t) ptr1; + const intptr_t p2 = (const intptr_t) ptr2; if (p1 == p2) { return 0; } else { @@ -137,7 +137,7 @@ } } -int ucx_memcmp(void *ptr1, void *ptr2, void *n) { +int ucx_memcmp(const void *ptr1, const void *ptr2, void *n) { return memcmp(ptr1, ptr2, *((size_t*)n)); } diff -r d721250984d0 -r 8722a668fb2a ucx/utils.h --- a/ucx/utils.h Fri Sep 22 20:19:00 2017 +0200 +++ b/ucx/utils.h Fri Sep 22 20:42:33 2017 +0200 @@ -60,7 +60,7 @@ * @param data omitted * @return a pointer to a copy of s1 that can be passed to free(void*) */ -void *ucx_strcpy(void *s, void *data); +void *ucx_strcpy(const void *s, void *data); /** * Copies a memory area. @@ -69,7 +69,7 @@ * @return a pointer to a copy of the specified memory area that can * be passed to free(void*) */ -void *ucx_memcpy(void *m, void *n); +void *ucx_memcpy(const void *m, void *n); /** @@ -144,7 +144,7 @@ * @param data omitted * @return the result of strcmp(s1, s2) */ -int ucx_strcmp(void *s1, void *s2, void *data); +int ucx_strcmp(const void *s1, const void *s2, void *data); /** * Wraps the strncmp function. @@ -153,7 +153,7 @@ * @param n a pointer to the size_t containing the third strncmp parameter * @return the result of strncmp(s1, s2, *n) */ -int ucx_strncmp(void *s1, void *s2, void *n); +int ucx_strncmp(const void *s1, const void *s2, void *n); /** * Compares two integers of type int. @@ -163,7 +163,7 @@ * @return -1, if *i1 is less than *i2, 0 if both are equal, * 1 if *i1 is greater than *i2 */ -int ucx_intcmp(void *i1, void *i2, void *data); +int ucx_intcmp(const void *i1, const void *i2, void *data); /** * Compares two real numbers of type float. @@ -174,7 +174,7 @@ * 1 if *f1 is greater than *f2 */ -int ucx_floatcmp(void *f1, void *f2, void *data); +int ucx_floatcmp(const void *f1, const void *f2, void *data); /** * Compares two real numbers of type double. @@ -184,7 +184,7 @@ * @return -1, if *d1 is less than *d2, 0 if both are equal, * 1 if *d1 is greater than *d2 */ -int ucx_doublecmp(void *d1, void *d2, void *data); +int ucx_doublecmp(const void *d1, const void *d2, void *data); /** * Compares two pointers. @@ -194,7 +194,7 @@ * @return -1 if ptr1 is less than ptr2, 0 if both are equal, * 1 if ptr1 is greater than ptr2 */ -int ucx_ptrcmp(void *ptr1, void *ptr2, void *data); +int ucx_ptrcmp(const void *ptr1, const void *ptr2, void *data); /** * Compares two memory areas. @@ -203,7 +203,7 @@ * @param n a pointer to the size_t containing the third parameter for memcmp * @return the result of memcmp(ptr1, ptr2, *n) */ -int ucx_memcmp(void *ptr1, void *ptr2, void *n); +int ucx_memcmp(const void *ptr1, const void *ptr2, void *n); /** * A printf() like function which writes the output to a stream by