ucx/array_list.c

changeset 22
112b85020dc9
parent 21
5ea41679e15d
child 23
b26390e77237
--- a/ucx/array_list.c	Thu May 29 15:59:27 2025 +0200
+++ b/ucx/array_list.c	Wed Nov 12 18:37:58 2025 +0100
@@ -50,7 +50,7 @@
 }
 
 CxArrayReallocator cx_array_default_reallocator_impl = {
-        cx_array_default_realloc, NULL, NULL, 0, 0
+        cx_array_default_realloc, NULL, NULL
 };
 
 CxArrayReallocator *cx_array_default_reallocator = &cx_array_default_reallocator_impl;
@@ -72,11 +72,11 @@
     }
 
     // retrieve the pointer to the actual allocator
-    const CxAllocator *al = alloc->ptr1;
+    const CxAllocator *al = alloc->allocator;
 
     // check if the array is still located on the stack
     void *newmem;
-    if (array == alloc->ptr2) {
+    if (array == alloc->stack_ptr) {
         newmem = cxMalloc(al, n);
         if (newmem != NULL && array != NULL) {
             memcpy(newmem, array, old_capacity*elem_size);
@@ -89,27 +89,45 @@
 
 struct cx_array_reallocator_s cx_array_reallocator(
         const struct cx_allocator_s *allocator,
-        const void *stackmem
+        const void *stack_ptr
 ) {
     if (allocator == NULL) {
         allocator = cxDefaultAllocator;
     }
     return (struct cx_array_reallocator_s) {
             cx_array_advanced_realloc,
-            (void*) allocator, (void*) stackmem,
-            0, 0
+            allocator, stack_ptr,
     };
 }
 
 // LOW LEVEL ARRAY LIST FUNCTIONS
 
-static size_t cx_array_align_capacity(
-        size_t cap,
-        size_t alignment,
-        size_t max
+/**
+ * Intelligently calculates a new capacity, reserving some more
+ * elements than required to prevent too many allocations.
+ *
+ * @param current_capacity the current capacity of the array
+ * @param needed_capacity the required capacity of the array
+ * @param maximum_capacity the maximum capacity (given by the data type)
+ * @return the new capacity
+ */
+static size_t cx_array_grow_capacity(
+    size_t current_capacity,
+    size_t needed_capacity,
+    size_t maximum_capacity
 ) {
-    if (cap > max - alignment) {
-        return cap;
+    if (current_capacity >= needed_capacity) {
+        return current_capacity;
+    }
+    size_t cap = needed_capacity;
+    size_t alignment;
+    if (cap < 128) alignment = 16;
+    else if (cap < 1024) alignment = 64;
+    else if (cap < 8192) alignment = 512;
+    else alignment = 1024;
+
+    if (cap - 1 > maximum_capacity - alignment) {
+        return maximum_capacity;
     } else {
         return cap - (cap % alignment) + alignment;
     }
@@ -177,10 +195,6 @@
 
     // reallocate if possible
     if (newcap > oldcap) {
-        // calculate new capacity (next number divisible by 16)
-        newcap = cx_array_align_capacity(newcap, 16, max_size);
-
-        // perform reallocation
         void *newmem = reallocator->realloc(
                 *array, oldcap, newcap, elem_size, reallocator
         );
@@ -270,28 +284,24 @@
     }
 
     // check if resize is required
-    size_t minsize = index + elem_count;
-    size_t newsize = oldsize < minsize ? minsize : oldsize;
+    const size_t minsize = index + elem_count;
+    const size_t newsize = oldsize < minsize ? minsize : oldsize;
 
-    // reallocate if possible
-    size_t newcap = oldcap;
-    if (newsize > oldcap) {
-        // check, if we need to repair the src pointer
+    // reallocate if necessary
+    const size_t newcap = cx_array_grow_capacity(oldcap, newsize, max_size);
+    if (newcap > oldcap) {
+        // check if we need to repair the src pointer
         uintptr_t targetaddr = (uintptr_t) *target;
         uintptr_t srcaddr = (uintptr_t) src;
         bool repairsrc = targetaddr <= srcaddr
                          && srcaddr < targetaddr + oldcap * elem_size;
 
-        // calculate new capacity (next number divisible by 16)
-        newcap = cx_array_align_capacity(newsize, 16, max_size);
-        assert(newcap > newsize);
-
         // perform reallocation
         void *newmem = reallocator->realloc(
                 *target, oldcap, newcap, elem_size, reallocator
         );
         if (newmem == NULL) {
-            return 1;
+            return 1; // LCOV_EXCL_LINE
         }
 
         // repair src pointer, if necessary
@@ -335,7 +345,7 @@
     return 0;
 }
 
-int cx_array_insert_sorted(
+static int cx_array_insert_sorted_impl(
         void **target,
         size_t *size,
         size_t *capacity,
@@ -343,7 +353,8 @@
         const void *sorted_data,
         size_t elem_size,
         size_t elem_count,
-        CxArrayReallocator *reallocator
+        CxArrayReallocator *reallocator,
+        bool allow_duplicates
 ) {
     // assert pointers
     assert(target != NULL);
@@ -367,15 +378,16 @@
     }
 
     // store some counts
-    size_t old_size = *size;
-    size_t old_capacity = *capacity;
-    size_t needed_capacity = old_size + elem_count;
+    const size_t old_size = *size;
+    const size_t old_capacity = *capacity;
+    // the necessary capacity is the worst case assumption, including duplicates
+    const size_t needed_capacity = cx_array_grow_capacity(old_capacity,
+        old_size + elem_count, SIZE_MAX);
 
     // if we need more than we have, try a reallocation
     if (needed_capacity > old_capacity) {
-        size_t new_capacity = cx_array_align_capacity(needed_capacity, 16, SIZE_MAX);
         void *new_mem = reallocator->realloc(
-                *target, old_capacity, new_capacity, elem_size, reallocator
+                *target, old_capacity, needed_capacity, elem_size, reallocator
         );
         if (new_mem == NULL) {
             // give it up right away, there is no contract
@@ -383,7 +395,7 @@
             return 1;  // LCOV_EXCL_LINE
         }
         *target = new_mem;
-        *capacity = new_capacity;
+        *capacity = needed_capacity;
     }
 
     // now we have guaranteed that we can insert everything
@@ -418,13 +430,60 @@
                 bptr,
                 cmp_func
         );
+        // binary search gives us the smallest index;
+        // we also want to include equal elements here
+        while (si + copy_len < elem_count
+                && cmp_func(bptr, src+copy_len*elem_size) == 0) {
+            copy_len++;
+        }
 
         // copy the source elements
-        bytes_copied = copy_len * elem_size;
-        memcpy(dest, src, bytes_copied);
-        dest += bytes_copied;
-        src += bytes_copied;
-        si += copy_len;
+        if (copy_len > 0) {
+            if (allow_duplicates) {
+                // we can copy the entire chunk
+                bytes_copied = copy_len * elem_size;
+                memcpy(dest, src, bytes_copied);
+                dest += bytes_copied;
+                src += bytes_copied;
+                si += copy_len;
+                di += copy_len;
+            } else {
+                // first, check the end of the source chunk
+                // for being a duplicate of the bptr
+                const char *end_of_src = src + (copy_len - 1) * elem_size;
+                size_t skip_len = 0;
+                while (copy_len > 0 && cmp_func(bptr, end_of_src) == 0) {
+                    end_of_src -= elem_size;
+                    skip_len++;
+                    copy_len--;
+                }
+                char *last = dest == *target ? NULL : dest - elem_size;
+                // then iterate through the source chunk
+                // and skip all duplicates with the last element in the array
+                size_t more_skipped = 0;
+                for (unsigned j = 0; j < copy_len; j++) {
+                    if (last != NULL && cmp_func(last, src) == 0) {
+                        // duplicate - skip
+                        src += elem_size;
+                        si++;
+                        more_skipped++;
+                    } else {
+                        memcpy(dest, src, elem_size);
+                        src += elem_size;
+                        last = dest;
+                        dest += elem_size;
+                        si++;
+                        di++;
+                    }
+                }
+                // skip the previously identified elements as well
+                src += skip_len * elem_size;
+                si += skip_len;
+                skip_len += more_skipped;
+                // reduce the actual size by the number of skipped elements
+                *size -= skip_len;
+            }
+        }
 
         // when all source elements are in place, we are done
         if (si >= elem_count) break;
@@ -443,20 +502,103 @@
         memmove(dest, bptr, bytes_copied);
         dest += bytes_copied;
         bptr += bytes_copied;
+        di += copy_len;
         bi += copy_len;
     }
 
-    // still source elements left? simply append them
+    // still source elements left?
     if (si < elem_count) {
-        memcpy(dest, src, elem_size * (elem_count - si));
+        if (allow_duplicates) {
+            // duplicates allowed or nothing inserted yet: simply copy everything
+            memcpy(dest, src, elem_size * (elem_count - si));
+        } else {
+            if (dest != *target) {
+                // skip all source elements that equal the last element
+                char *last = dest - elem_size;
+                while (si < elem_count) {
+                    if (last != NULL && cmp_func(last, src) == 0) {
+                        src += elem_size;
+                        si++;
+                        (*size)--;
+                    } else {
+                        break;
+                    }
+                }
+            }
+            // we must check the elements in the chunk one by one
+            while (si < elem_count) {
+                // find a chain of elements that can be copied
+                size_t copy_len = 1, skip_len = 0;
+                {
+                    const char *left_src = src;
+                    while (si + copy_len < elem_count) {
+                        const char *right_src = left_src + elem_size;
+                        int d = cmp_func(left_src,  right_src);
+                        if (d < 0) {
+                            if (skip_len > 0) {
+                                // new larger element found;
+                                // handle it in the next cycle
+                                break;
+                            }
+                            left_src += elem_size;
+                            copy_len++;
+                        } else if (d == 0) {
+                            left_src += elem_size;
+                            skip_len++;
+                        } else {
+                            break;
+                        }
+                    }
+                }
+                size_t bytes_copied = copy_len * elem_size;
+                memcpy(dest, src, bytes_copied);
+                dest += bytes_copied;
+                src += bytes_copied + skip_len * elem_size;
+                si += copy_len + skip_len;
+                di += copy_len;
+                *size -= skip_len;
+            }
+        }
     }
 
-    // still buffer elements left?
-    // don't worry, we already moved them to the correct place
+    // buffered elements need to be moved when we skipped duplicates
+    size_t total_skipped = new_size - *size;
+    if (bi < new_size && total_skipped > 0) {
+        // move the remaining buffer to the end of the array
+        memmove(dest, bptr, elem_size * (new_size - bi));
+    }
 
     return 0;
 }
 
+int cx_array_insert_sorted(
+        void **target,
+        size_t *size,
+        size_t *capacity,
+        cx_compare_func cmp_func,
+        const void *sorted_data,
+        size_t elem_size,
+        size_t elem_count,
+        CxArrayReallocator *reallocator
+) {
+    return cx_array_insert_sorted_impl(target, size, capacity,
+        cmp_func, sorted_data, elem_size, elem_count, reallocator, true);
+}
+
+int cx_array_insert_unique(
+        void **target,
+        size_t *size,
+        size_t *capacity,
+        cx_compare_func cmp_func,
+        const void *sorted_data,
+        size_t elem_size,
+        size_t elem_count,
+        CxArrayReallocator *reallocator
+) {
+    return cx_array_insert_sorted_impl(target, size, capacity,
+        cmp_func, sorted_data, elem_size, elem_count, reallocator, false);
+}
+
 size_t cx_array_binary_search_inf(
         const void *arr,
         size_t size,
@@ -502,6 +644,13 @@
         result = cmp_func(elem, arr_elem);
         if (result == 0) {
             // found it!
+            // check previous elements;
+            // when they are equal, report the smallest index
+            arr_elem -= elem_size;
+            while (pivot_index > 0 && cmp_func(elem, arr_elem) == 0) {
+                pivot_index--;
+                arr_elem -= elem_size;
+            }
             return pivot_index;
         } else if (result < 0) {
             // element is smaller than pivot, continue search left
@@ -643,8 +792,8 @@
 
     // guarantee enough capacity
     if (arl->capacity < list->collection.size + n) {
-        size_t new_capacity = list->collection.size + n;
-        new_capacity = new_capacity - (new_capacity % 16) + 16;
+        const size_t new_capacity = cx_array_grow_capacity(arl->capacity,
+            list->collection.size + n, SIZE_MAX);
         if (cxReallocateArray(
                 list->collection.allocator,
                 &arl->data, new_capacity,
@@ -700,6 +849,31 @@
     }
 }
 
+static size_t cx_arl_insert_unique(
+        struct cx_list_s *list,
+        const void *sorted_data,
+        size_t n
+) {
+    // get a correctly typed pointer to the list
+    cx_array_list *arl = (cx_array_list *) list;
+
+    if (cx_array_insert_unique(
+            &arl->data,
+            &list->collection.size,
+            &arl->capacity,
+            list->collection.cmpfunc,
+            sorted_data,
+            list->collection.elem_size,
+            n,
+            &arl->reallocator
+    )) {
+        // array list implementation is "all or nothing"
+        return 0;
+    } else {
+        return n;
+    }
+}
+
 static void *cx_arl_insert_element(
         struct cx_list_s *list,
         size_t index,
@@ -717,7 +891,7 @@
         const void *elem,
         int prepend
 ) {
-    struct cx_list_s *list = iter->src_handle.m;
+    struct cx_list_s *list = iter->src_handle;
     if (iter->index < list->collection.size) {
         if (cx_arl_insert_element(list,
                 iter->index + 1 - prepend, elem) == NULL) {
@@ -928,7 +1102,7 @@
 
 static bool cx_arl_iter_valid(const void *it) {
     const struct cx_iterator_s *iter = it;
-    const struct cx_list_s *list = iter->src_handle.c;
+    const struct cx_list_s *list = iter->src_handle;
     return iter->index < list->collection.size;
 }
 
@@ -941,29 +1115,39 @@
     struct cx_iterator_s *iter = it;
     if (iter->base.remove) {
         iter->base.remove = false;
-        cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL);
+        cx_arl_remove(iter->src_handle, iter->index, 1, NULL);
+        iter->elem_count--;
     } else {
         iter->index++;
         iter->elem_handle =
                 ((char *) iter->elem_handle)
-                + ((const struct cx_list_s *) iter->src_handle.c)->collection.elem_size;
+                + ((const struct cx_list_s *) iter->src_handle)->collection.elem_size;
     }
 }
 
 static void cx_arl_iter_prev(void *it) {
     struct cx_iterator_s *iter = it;
-    const cx_array_list *list = iter->src_handle.c;
     if (iter->base.remove) {
         iter->base.remove = false;
-        cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL);
+        cx_arl_remove(iter->src_handle, iter->index, 1, NULL);
+        iter->elem_count--;
     }
     iter->index--;
+    cx_array_list *list = iter->src_handle;
     if (iter->index < list->base.collection.size) {
         iter->elem_handle = ((char *) list->data)
                             + iter->index * list->base.collection.elem_size;
     }
 }
 
+static int cx_arl_change_capacity(
+        struct cx_list_s *list,
+        size_t new_capacity
+) {
+    cx_array_list *arl = (cx_array_list *)list;
+    return cxReallocateArray(list->collection.allocator,
+        &arl->data, new_capacity, list->collection.elem_size);
+}
 
 static struct cx_iterator_s cx_arl_iterator(
         const struct cx_list_s *list,
@@ -973,7 +1157,7 @@
     struct cx_iterator_s iter;
 
     iter.index = index;
-    iter.src_handle.c = list;
+    iter.src_handle = (void*)list;
     iter.elem_handle = cx_arl_at(list, index);
     iter.elem_size = list->collection.elem_size;
     iter.elem_count = list->collection.size;
@@ -981,7 +1165,7 @@
     iter.base.current = cx_arl_iter_current;
     iter.base.next = backwards ? cx_arl_iter_prev : cx_arl_iter_next;
     iter.base.remove = false;
-    iter.base.mutating = false;
+    iter.base.allow_remove = true;
 
     return iter;
 }
@@ -991,6 +1175,7 @@
         cx_arl_insert_element,
         cx_arl_insert_array,
         cx_arl_insert_sorted,
+        cx_arl_insert_unique,
         cx_arl_insert_iter,
         cx_arl_remove,
         cx_arl_clear,
@@ -1000,6 +1185,7 @@
         cx_arl_sort,
         cx_arl_compare,
         cx_arl_reverse,
+        cx_arl_change_capacity,
         cx_arl_iterator,
 };
 

mercurial