--- a/ucx/tree.c Thu Oct 03 18:54:19 2024 +0200 +++ b/ucx/tree.c Sun Oct 06 12:00:31 2024 +0200 @@ -28,37 +28,72 @@ #include "cx/tree.h" +#include "cx/array_list.h" + #include <assert.h> #define CX_TREE_PTR(cur, off) (*(void**)(((char*)(cur))+(off))) -#define CX_TREE_PTR(cur, off) (*(void**)(((char*)(cur))+(off))) #define tree_parent(node) CX_TREE_PTR(node, loc_parent) #define tree_children(node) CX_TREE_PTR(node, loc_children) +#define tree_last_child(node) CX_TREE_PTR(node, loc_last_child) #define tree_prev(node) CX_TREE_PTR(node, loc_prev) #define tree_next(node) CX_TREE_PTR(node, loc_next) +#define cx_tree_ptr_locations \ + loc_parent, loc_children, loc_last_child, loc_prev, loc_next + +static void cx_tree_zero_pointers( + void *node, + ptrdiff_t loc_parent, + ptrdiff_t loc_children, + ptrdiff_t loc_last_child, + ptrdiff_t loc_prev, + ptrdiff_t loc_next +) { + tree_parent(node) = NULL; + tree_prev(node) = NULL; + tree_next(node) = NULL; + tree_children(node) = NULL; + if (loc_last_child >= 0) { + tree_last_child(node) = NULL; + } +} + void cx_tree_link( void *restrict parent, void *restrict node, ptrdiff_t loc_parent, ptrdiff_t loc_children, + ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ) { void *current_parent = tree_parent(node); if (current_parent == parent) return; if (current_parent != NULL) { - cx_tree_unlink(node, loc_parent, loc_children, - loc_prev, loc_next); + cx_tree_unlink(node, cx_tree_ptr_locations); } if (tree_children(parent) == NULL) { tree_children(parent) = node; + if (loc_last_child >= 0) { + tree_last_child(parent) = node; + } } else { - void *children = tree_children(parent); - tree_prev(children) = node; - tree_next(node) = children; - tree_children(parent) = node; + if (loc_last_child >= 0) { + void *child = tree_last_child(parent); + tree_prev(node) = child; + tree_next(child) = node; + tree_last_child(parent) = node; + } else { + void *child = tree_children(parent); + void *next; + while ((next = tree_next(child)) != NULL) { + child = next; + } + tree_prev(node) = child; + tree_next(child) = node; + } } tree_parent(node) = parent; } @@ -67,6 +102,7 @@ void *node, ptrdiff_t loc_parent, ptrdiff_t loc_children, + ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ) { @@ -74,14 +110,849 @@ void *left = tree_prev(node); void *right = tree_next(node); - assert(left == NULL || tree_children(tree_parent(node)) != node); + void *parent = tree_parent(node); + assert(left == NULL || tree_children(parent) != node); + assert(right == NULL || loc_last_child < 0 || + tree_last_child(parent) != node); + if (left == NULL) { - tree_children(tree_parent(node)) = right; + tree_children(parent) = right; } else { tree_next(left) = right; } - if (right != NULL) tree_prev(right) = left; + if (right == NULL) { + if (loc_last_child >= 0) { + tree_last_child(parent) = left; + } + } else { + tree_prev(right) = left; + } + tree_parent(node) = NULL; tree_prev(node) = NULL; tree_next(node) = NULL; } + +int cx_tree_search( + const void *root, + const void *node, + cx_tree_search_func sfunc, + void **result, + ptrdiff_t loc_children, + ptrdiff_t loc_next +) { + int ret; + *result = NULL; + + // shortcut: compare root before doing anything else + ret = sfunc(root, node); + if (ret < 0) { + return ret; + } else if (ret == 0 || tree_children(root) == NULL) { + *result = (void*)root; + return ret; + } + + // create a working stack + CX_ARRAY_DECLARE(const void *, work); + cx_array_initialize(work, 32); + + // add the children of root to the working stack + { + void *c = tree_children(root); + while (c != NULL) { + cx_array_simple_add(work, c); + c = tree_next(c); + } + } + + // remember a candidate for adding the data + // also remember the exact return code from sfunc + void *candidate = (void *) root; + int ret_candidate = ret; + + // process the working stack + while (work_size > 0) { + // pop element + const void *elem = work[--work_size]; + + // apply the search function + ret = sfunc(elem, node); + + if (ret == 0) { + // if found, exit the search + *result = (void *) elem; + work_size = 0; + break; + } else if (ret > 0) { + // if children might contain the data, add them to the stack + void *c = tree_children(elem); + while (c != NULL) { + cx_array_simple_add(work, c); + c = tree_next(c); + } + + // remember this node in case no child is suitable + if (ret < ret_candidate) { + candidate = (void *) elem; + ret_candidate = ret; + } + } + } + + // not found, but was there a candidate? + if (ret != 0 && candidate != NULL) { + ret = ret_candidate; + *result = candidate; + } + + // free the working queue and return + free(work); + return ret; +} + +int cx_tree_search_data( + const void *root, + const void *data, + cx_tree_search_data_func sfunc, + void **result, + ptrdiff_t loc_children, + ptrdiff_t loc_next +) { + // it is basically the same implementation + return cx_tree_search( + root, data, + (cx_tree_search_func) sfunc, + result, + loc_children, loc_next); +} + +static bool cx_tree_iter_valid(const void *it) { + const struct cx_tree_iterator_s *iter = it; + return iter->node != NULL; +} + +static void *cx_tree_iter_current(const void *it) { + const struct cx_tree_iterator_s *iter = it; + return iter->node; +} + +static void cx_tree_iter_next(void *it) { + struct cx_tree_iterator_s *iter = it; + ptrdiff_t const loc_next = iter->loc_next; + ptrdiff_t const loc_children = iter->loc_children; + // protect us from misuse + if (!iter->base.valid(iter)) return; + + void *children; + + // check if we are currently exiting or entering nodes + if (iter->exiting) { + children = NULL; + // skipping on exit is pointless, just clear the flag + iter->skip = false; + } else { + if (iter->skip) { + // skip flag is set, pretend that there are no children + iter->skip = false; + children = NULL; + } else { + // try to enter the children (if any) + children = tree_children(iter->node); + } + } + + if (children == NULL) { + // search for the next node + void *next; + cx_tree_iter_search_next: + // check if there is a sibling + if (iter->exiting) { + next = iter->node_next; + } else { + next = tree_next(iter->node); + iter->node_next = next; + } + if (next == NULL) { + // no sibling, we are done with this node and exit + if (iter->visit_on_exit && !iter->exiting) { + // iter is supposed to visit the node again + iter->exiting = true; + } else { + iter->exiting = false; + if (iter->depth == 1) { + // there is no parent - we have iterated the entire tree + // invalidate the iterator and free the node stack + iter->node = iter->node_next = NULL; + iter->stack_capacity = iter->depth = 0; + free(iter->stack); + iter->stack = NULL; + } else { + // the parent node can be obtained from the top of stack + // this way we can avoid the loc_parent in the iterator + iter->depth--; + iter->node = iter->stack[iter->depth - 1]; + // retry with the parent node to find a sibling + goto cx_tree_iter_search_next; + } + } + } else { + if (iter->visit_on_exit && !iter->exiting) { + // iter is supposed to visit the node again + iter->exiting = true; + } else { + iter->exiting = false; + // move to the sibling + iter->counter++; + iter->node = next; + // new top of stack is the sibling + iter->stack[iter->depth - 1] = next; + } + } + } else { + // node has children, push the first child onto the stack and enter it + cx_array_simple_add(iter->stack, children); + iter->node = children; + iter->counter++; + } +} + +CxTreeIterator cx_tree_iterator( + void *root, + bool visit_on_exit, + ptrdiff_t loc_children, + ptrdiff_t loc_next +) { + CxTreeIterator iter; + iter.loc_children = loc_children; + iter.loc_next = loc_next; + iter.visit_on_exit = visit_on_exit; + + // initialize members + iter.node_next = NULL; + iter.exiting = false; + iter.skip = false; + + // assign base iterator functions + iter.base.mutating = false; + iter.base.remove = false; + iter.base.current_impl = NULL; + iter.base.valid = cx_tree_iter_valid; + iter.base.next = cx_tree_iter_next; + iter.base.current = cx_tree_iter_current; + + // visit the root node + iter.node = root; + if (root != NULL) { + iter.stack_capacity = 16; + iter.stack = malloc(sizeof(void *) * 16); + iter.stack[0] = root; + iter.counter = 1; + iter.depth = 1; + } else { + iter.stack_capacity = 0; + iter.stack = NULL; + iter.counter = 0; + iter.depth = 0; + } + + return iter; +} + +static bool cx_tree_visitor_valid(const void *it) { + const struct cx_tree_visitor_s *iter = it; + return iter->node != NULL; +} + +static void *cx_tree_visitor_current(const void *it) { + const struct cx_tree_visitor_s *iter = it; + return iter->node; +} + +__attribute__((__nonnull__)) +static void cx_tree_visitor_enqueue_siblings( + struct cx_tree_visitor_s *iter, void *node, ptrdiff_t loc_next) { + node = tree_next(node); + while (node != NULL) { + struct cx_tree_visitor_queue_s *q; + q = malloc(sizeof(struct cx_tree_visitor_queue_s)); + q->depth = iter->queue_last->depth; + q->node = node; + iter->queue_last->next = q; + iter->queue_last = q; + node = tree_next(node); + } + iter->queue_last->next = NULL; +} + +static void cx_tree_visitor_next(void *it) { + struct cx_tree_visitor_s *iter = it; + // protect us from misuse + if (!iter->base.valid(iter)) return; + + ptrdiff_t const loc_next = iter->loc_next; + ptrdiff_t const loc_children = iter->loc_children; + + // add the children of the current node to the queue + // unless the skip flag is set + void *children; + if (iter->skip) { + iter->skip = false; + children = NULL; + } else { + children = tree_children(iter->node); + } + if (children != NULL) { + struct cx_tree_visitor_queue_s *q; + q = malloc(sizeof(struct cx_tree_visitor_queue_s)); + q->depth = iter->depth + 1; + q->node = children; + if (iter->queue_last == NULL) { + assert(iter->queue_next == NULL); + iter->queue_next = q; + } else { + iter->queue_last->next = q; + } + iter->queue_last = q; + cx_tree_visitor_enqueue_siblings(iter, children, loc_next); + } + + // check if there is a next node + if (iter->queue_next == NULL) { + iter->node = NULL; + return; + } + + // dequeue the next node + iter->node = iter->queue_next->node; + iter->depth = iter->queue_next->depth; + { + struct cx_tree_visitor_queue_s *q = iter->queue_next; + iter->queue_next = q->next; + if (iter->queue_next == NULL) { + assert(iter->queue_last == q); + iter->queue_last = NULL; + } + free(q); + } + + // increment the node counter + iter->counter++; +} + +CxTreeVisitor cx_tree_visitor( + void *root, + ptrdiff_t loc_children, + ptrdiff_t loc_next +) { + CxTreeVisitor iter; + iter.loc_children = loc_children; + iter.loc_next = loc_next; + + // initialize members + iter.skip = false; + iter.queue_next = NULL; + iter.queue_last = NULL; + + // assign base iterator functions + iter.base.mutating = false; + iter.base.remove = false; + iter.base.current_impl = NULL; + iter.base.valid = cx_tree_visitor_valid; + iter.base.next = cx_tree_visitor_next; + iter.base.current = cx_tree_visitor_current; + + // visit the root node + iter.node = root; + if (root != NULL) { + iter.counter = 1; + iter.depth = 1; + } else { + iter.counter = 0; + iter.depth = 0; + } + + return iter; +} + +static void cx_tree_add_link_duplicate( + void *original, void *duplicate, + ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, + ptrdiff_t loc_prev, ptrdiff_t loc_next +) { + void *shared_parent = tree_parent(original); + if (shared_parent == NULL) { + cx_tree_link(original, duplicate, cx_tree_ptr_locations); + } else { + cx_tree_link(shared_parent, duplicate, cx_tree_ptr_locations); + } +} + +static void cx_tree_add_link_new( + void *parent, void *node, cx_tree_search_func sfunc, + ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, + ptrdiff_t loc_prev, ptrdiff_t loc_next +) { + // check the current children one by one, + // if they could be children of the new node + void *child = tree_children(parent); + while (child != NULL) { + void *next = tree_next(child); + + if (sfunc(node, child) > 0) { + // the sibling could be a child -> re-link + cx_tree_link(node, child, cx_tree_ptr_locations); + } + + child = next; + } + + // add new node as new child + cx_tree_link(parent, node, cx_tree_ptr_locations); +} + +int cx_tree_add( + const void *src, + cx_tree_search_func sfunc, + cx_tree_node_create_func cfunc, + void *cdata, + void **cnode, + void *root, + ptrdiff_t loc_parent, + ptrdiff_t loc_children, + ptrdiff_t loc_last_child, + ptrdiff_t loc_prev, + ptrdiff_t loc_next +) { + *cnode = cfunc(src, cdata); + if (*cnode == NULL) return 1; + cx_tree_zero_pointers(*cnode, cx_tree_ptr_locations); + + void *match = NULL; + int result = cx_tree_search( + root, + *cnode, + sfunc, + &match, + loc_children, + loc_next + ); + + if (result < 0) { + // node does not fit into the tree - return non-zero value + return 1; + } else if (result == 0) { + // data already found in the tree, link duplicate + cx_tree_add_link_duplicate(match, *cnode, cx_tree_ptr_locations); + } else { + // closest match found, add new node + cx_tree_add_link_new(match, *cnode, sfunc, cx_tree_ptr_locations); + } + + return 0; +} + +unsigned int cx_tree_add_look_around_depth = 3; + +size_t cx_tree_add_iter( + struct cx_iterator_base_s *iter, + size_t num, + cx_tree_search_func sfunc, + cx_tree_node_create_func cfunc, + void *cdata, + void **failed, + void *root, + ptrdiff_t loc_parent, + ptrdiff_t loc_children, + ptrdiff_t loc_last_child, + ptrdiff_t loc_prev, + ptrdiff_t loc_next +) { + // erase the failed pointer + *failed = NULL; + + // iter not valid? cancel... + if (!iter->valid(iter)) return 0; + + size_t processed = 0; + void *current_node = root; + const void *elem; + + for (void **eptr; processed < num && + iter->valid(iter) && (eptr = iter->current(iter)) != NULL; + iter->next(iter)) { + elem = *eptr; + + // create the new node + void *new_node = cfunc(elem, cdata); + if (new_node == NULL) return processed; + cx_tree_zero_pointers(new_node, cx_tree_ptr_locations); + + // start searching from current node + void *match; + int result; + unsigned int look_around_retries = cx_tree_add_look_around_depth; + cx_tree_add_look_around_retry: + result = cx_tree_search( + current_node, + new_node, + sfunc, + &match, + loc_children, + loc_next + ); + + if (result < 0) { + // traverse upwards and try to find better parents + void *parent = tree_parent(current_node); + if (parent != NULL) { + if (look_around_retries > 0) { + look_around_retries--; + current_node = parent; + } else { + // look around retries exhausted, start from the root + current_node = root; + } + goto cx_tree_add_look_around_retry; + } else { + // no parents. so we failed + *failed = new_node; + return processed; + } + } else if (result == 0) { + // data already found in the tree, link duplicate + cx_tree_add_link_duplicate(match, new_node, cx_tree_ptr_locations); + // but stick with the original match, in case we needed a new root + current_node = match; + } else { + // closest match found, add new node as child + cx_tree_add_link_new(match, new_node, sfunc, + cx_tree_ptr_locations); + current_node = match; + } + + processed++; + } + return processed; +} + +size_t cx_tree_add_array( + const void *src, + size_t num, + size_t elem_size, + cx_tree_search_func sfunc, + cx_tree_node_create_func cfunc, + void *cdata, + void **failed, + void *root, + ptrdiff_t loc_parent, + ptrdiff_t loc_children, + ptrdiff_t loc_last_child, + ptrdiff_t loc_prev, + ptrdiff_t loc_next +) { + // erase failed pointer + *failed = NULL; + + // super special case: zero elements + if (num == 0) { + return 0; + } + + // special case: one element does not need an iterator + if (num == 1) { + void *node; + if (0 == cx_tree_add( + src, sfunc, cfunc, cdata, &node, root, + loc_parent, loc_children, loc_last_child, + loc_prev, loc_next)) { + return 1; + } else { + *failed = node; + return 0; + } + } + + // otherwise, create iterator and hand over to other function + CxIterator iter = cxIterator(src, elem_size, num); + return cx_tree_add_iter(cxIteratorRef(iter), num, sfunc, + cfunc, cdata, failed, root, + loc_parent, loc_children, loc_last_child, + loc_prev, loc_next); +} + +static void cx_tree_default_destructor(CxTree *tree) { + if (tree->simple_destructor != NULL || tree->advanced_destructor != NULL) { + CxTreeIterator iter = tree->cl->iterator(tree, true); + cx_foreach(void *, node, iter) { + if (iter.exiting) { + if (tree->simple_destructor) { + tree->simple_destructor(node); + } + if (tree->advanced_destructor) { + tree->advanced_destructor(tree->destructor_data, node); + } + } + } + } + cxFree(tree->allocator, tree); +} + +static CxTreeIterator cx_tree_default_iterator( + CxTree *tree, + bool visit_on_exit +) { + return cx_tree_iterator( + tree->root, visit_on_exit, + tree->loc_children, tree->loc_next + ); +} + +static CxTreeVisitor cx_tree_default_visitor(CxTree *tree) { + return cx_tree_visitor(tree->root, tree->loc_children, tree->loc_next); +} + +static int cx_tree_default_insert_element( + CxTree *tree, + const void *data +) { + void *node; + if (tree->root == NULL) { + node = tree->node_create(data, tree); + if (node == NULL) return 1; + cx_tree_zero_pointers(node, cx_tree_node_layout(tree)); + tree->root = node; + tree->size = 1; + return 0; + } + int result = cx_tree_add(data, tree->search, tree->node_create, + tree, &node, tree->root, cx_tree_node_layout(tree)); + if (0 == result) { + tree->size++; + } else { + cxFree(tree->allocator, node); + } + return result; +} + +static size_t cx_tree_default_insert_many( + CxTree *tree, + struct cx_iterator_base_s *iter, + size_t n +) { + size_t ins = 0; + if (!iter->valid(iter)) return 0; + if (tree->root == NULL) { + // use the first element from the iter to create the root node + void **eptr = iter->current(iter); + void *node = tree->node_create(*eptr, tree); + if (node == NULL) return 0; + cx_tree_zero_pointers(node, cx_tree_node_layout(tree)); + tree->root = node; + ins = 1; + iter->next(iter); + } + void *failed; + ins += cx_tree_add_iter(iter, n, tree->search, tree->node_create, + tree, &failed, tree->root, cx_tree_node_layout(tree)); + tree->size += ins; + if (ins < n) { + cxFree(tree->allocator, failed); + } + return ins; +} + +static void *cx_tree_default_find( + CxTree *tree, + const void *subtree, + const void *data +) { + if (tree->root == NULL) return NULL; + + void *found; + if (0 == cx_tree_search_data( + subtree, + data, + tree->search_data, + &found, + tree->loc_children, + tree->loc_next + )) { + return found; + } else { + return NULL; + } +} + +static cx_tree_class cx_tree_default_class = { + cx_tree_default_destructor, + cx_tree_default_insert_element, + cx_tree_default_insert_many, + cx_tree_default_find, + cx_tree_default_iterator, + cx_tree_default_visitor +}; + +CxTree *cxTreeCreate( + const CxAllocator *allocator, + cx_tree_node_create_func create_func, + cx_tree_search_func search_func, + cx_tree_search_data_func search_data_func, + ptrdiff_t loc_parent, + ptrdiff_t loc_children, + ptrdiff_t loc_last_child, + ptrdiff_t loc_prev, + ptrdiff_t loc_next +) { + CxTree *tree = cxMalloc(allocator, sizeof(CxTree)); + if (tree == NULL) return NULL; + + tree->cl = &cx_tree_default_class; + tree->allocator = allocator; + tree->node_create = create_func; + tree->search = search_func; + tree->search_data = search_data_func; + tree->advanced_destructor = (cx_destructor_func2) cxFree; + tree->destructor_data = (void *) allocator; + tree->loc_parent = loc_parent; + tree->loc_children = loc_children; + tree->loc_last_child = loc_last_child; + tree->loc_prev = loc_prev; + tree->loc_next = loc_next; + tree->root = NULL; + tree->size = 0; + + return tree; +} + +CxTree *cxTreeCreateWrapped( + const CxAllocator *allocator, + void *root, + ptrdiff_t loc_parent, + ptrdiff_t loc_children, + ptrdiff_t loc_last_child, + ptrdiff_t loc_prev, + ptrdiff_t loc_next +) { + CxTree *tree = cxMalloc(allocator, sizeof(CxTree)); + if (tree == NULL) return NULL; + + tree->cl = &cx_tree_default_class; + // set the allocator anyway, just in case... + tree->allocator = allocator; + tree->node_create = NULL; + tree->search = NULL; + tree->search_data = NULL; + tree->simple_destructor = NULL; + tree->advanced_destructor = NULL; + tree->destructor_data = NULL; + tree->loc_parent = loc_parent; + tree->loc_children = loc_children; + tree->loc_last_child = loc_last_child; + tree->loc_prev = loc_prev; + tree->loc_next = loc_next; + tree->root = root; + tree->size = cxTreeSubtreeSize(tree, root); + return tree; +} + +int cxTreeAddChild( + CxTree *tree, + void *parent, + const void *data) { + void *node = tree->node_create(data, tree); + if (node == NULL) return 1; + cx_tree_zero_pointers(node, cx_tree_node_layout(tree)); + cx_tree_link(parent, node, cx_tree_node_layout(tree)); + tree->size++; + return 0; +} + +size_t cxTreeSubtreeSize(CxTree *tree, void *subtree_root) { + CxTreeVisitor visitor = cx_tree_visitor( + subtree_root, + tree->loc_children, + tree->loc_next + ); + while (cxIteratorValid(visitor)) { + cxIteratorNext(visitor); + } + return visitor.counter; +} + +size_t cxTreeSubtreeDepth(CxTree *tree, void *subtree_root) { + CxTreeVisitor visitor = cx_tree_visitor( + subtree_root, + tree->loc_children, + tree->loc_next + ); + while (cxIteratorValid(visitor)) { + cxIteratorNext(visitor); + } + return visitor.depth; +} + +size_t cxTreeDepth(CxTree *tree) { + CxTreeVisitor visitor = tree->cl->visitor(tree); + while (cxIteratorValid(visitor)) { + cxIteratorNext(visitor); + } + return visitor.depth; +} + +int cxTreeRemove( + CxTree *tree, + void *node, + cx_tree_relink_func relink_func +) { + if (node == tree->root) return 1; + + // determine the new parent + ptrdiff_t loc_parent = tree->loc_parent; + void *new_parent = tree_parent(node); + + // first, unlink from the parent + cx_tree_unlink(node, cx_tree_node_layout(tree)); + + // then relink each child + ptrdiff_t loc_children = tree->loc_children; + ptrdiff_t loc_next = tree->loc_next; + void *child = tree_children(node); + while (child != NULL) { + // forcibly set the parent to NULL - we do not use the unlink function + // because that would unnecessarily modify the children linked list + tree_parent(child) = NULL; + + // update contents, if required + if (relink_func != NULL) { + relink_func(child, node, new_parent); + } + + // link to new parent + cx_tree_link(new_parent, child, cx_tree_node_layout(tree)); + + // proceed to next child + child = tree_next(child); + } + + // clear the linked list of the removed node + tree_children(node) = NULL; + ptrdiff_t loc_last_child = tree->loc_last_child; + if (loc_last_child >= 0) tree_last_child(node) = NULL; + + // the tree now has one member less + tree->size--; + + return 0; +} + +void cxTreeRemoveSubtree(CxTree *tree, void *node) { + if (node == tree->root) { + tree->root = NULL; + tree->size = 0; + return; + } + size_t subtree_size = cxTreeSubtreeSize(tree, node); + cx_tree_unlink(node, cx_tree_node_layout(tree)); + tree->size -= subtree_size; +}