src/ucx/array_list.c

changeset 621
956c03c25edd
parent 582
82b60a8dd55c
child 645
0c85c4cd0dd8
--- a/src/ucx/array_list.c	Fri Nov 07 11:06:58 2025 +0100
+++ b/src/ucx/array_list.c	Sun Nov 09 23:51:03 2025 +0100
@@ -50,7 +50,7 @@
 }
 
 CxArrayReallocator cx_array_default_reallocator_impl = {
-        cx_array_default_realloc, NULL, NULL, 0, 0
+        cx_array_default_realloc, NULL, NULL
 };
 
 CxArrayReallocator *cx_array_default_reallocator = &cx_array_default_reallocator_impl;
@@ -72,11 +72,11 @@
     }
 
     // retrieve the pointer to the actual allocator
-    const CxAllocator *al = alloc->ptr1;
+    const CxAllocator *al = alloc->allocator;
 
     // check if the array is still located on the stack
     void *newmem;
-    if (array == alloc->ptr2) {
+    if (array == alloc->stack_ptr) {
         newmem = cxMalloc(al, n);
         if (newmem != NULL && array != NULL) {
             memcpy(newmem, array, old_capacity*elem_size);
@@ -89,20 +89,27 @@
 
 struct cx_array_reallocator_s cx_array_reallocator(
         const struct cx_allocator_s *allocator,
-        const void *stackmem
+        const void *stack_ptr
 ) {
     if (allocator == NULL) {
         allocator = cxDefaultAllocator;
     }
     return (struct cx_array_reallocator_s) {
             cx_array_advanced_realloc,
-            (void*) allocator, (void*) stackmem,
-            0, 0
+            allocator, stack_ptr,
     };
 }
 
 // LOW LEVEL ARRAY LIST FUNCTIONS
 
+/**
+ * Increases the capacity until it is a multiple of a some alignment or reaches the maximum.
+ *
+ * @param cap the required capacity (must not be larger than @p max)
+ * @param alignment the alignment
+ * @param max the maximum capacity
+ * @return the aligned capacity
+ */
 static size_t cx_array_align_capacity(
         size_t cap,
         size_t alignment,
@@ -291,7 +298,7 @@
                 *target, oldcap, newcap, elem_size, reallocator
         );
         if (newmem == NULL) {
-            return 1;
+            return 1; // LCOV_EXCL_LINE
         }
 
         // repair src pointer, if necessary
@@ -335,7 +342,7 @@
     return 0;
 }
 
-int cx_array_insert_sorted(
+static int cx_array_insert_sorted_impl(
         void **target,
         size_t *size,
         size_t *capacity,
@@ -343,7 +350,8 @@
         const void *sorted_data,
         size_t elem_size,
         size_t elem_count,
-        CxArrayReallocator *reallocator
+        CxArrayReallocator *reallocator,
+        bool allow_duplicates
 ) {
     // assert pointers
     assert(target != NULL);
@@ -369,6 +377,7 @@
     // store some counts
     size_t old_size = *size;
     size_t old_capacity = *capacity;
+    // the necessary capacity is the worst case assumption, including duplicates
     size_t needed_capacity = old_size + elem_count;
 
     // if we need more than we have, try a reallocation
@@ -418,13 +427,60 @@
                 bptr,
                 cmp_func
         );
+        // binary search gives us the smallest index;
+        // we also want to include equal elements here
+        while (si + copy_len < elem_count
+                && cmp_func(bptr, src+copy_len*elem_size) == 0) {
+            copy_len++;
+        }
 
         // copy the source elements
-        bytes_copied = copy_len * elem_size;
-        memcpy(dest, src, bytes_copied);
-        dest += bytes_copied;
-        src += bytes_copied;
-        si += copy_len;
+        if (copy_len > 0) {
+            if (allow_duplicates) {
+                // we can copy the entire chunk
+                bytes_copied = copy_len * elem_size;
+                memcpy(dest, src, bytes_copied);
+                dest += bytes_copied;
+                src += bytes_copied;
+                si += copy_len;
+                di += copy_len;
+            } else {
+                // first, check the end of the source chunk
+                // for being a duplicate of the bptr
+                const char *end_of_src = src + (copy_len - 1) * elem_size;
+                size_t skip_len = 0;
+                while (copy_len > 0 && cmp_func(bptr, end_of_src) == 0) {
+                    end_of_src -= elem_size;
+                    skip_len++;
+                    copy_len--;
+                }
+                char *last = dest == *target ? NULL : dest - elem_size;
+                // then iterate through the source chunk
+                // and skip all duplicates with the last element in the array
+                size_t more_skipped = 0;
+                for (unsigned j = 0; j < copy_len; j++) {
+                    if (last != NULL && cmp_func(last, src) == 0) {
+                        // duplicate - skip
+                        src += elem_size;
+                        si++;
+                        more_skipped++;
+                    } else {
+                        memcpy(dest, src, elem_size);
+                        src += elem_size;
+                        last = dest;
+                        dest += elem_size;
+                        si++;
+                        di++;
+                    }
+                }
+                // skip the previously identified elements as well
+                src += skip_len * elem_size;
+                si += skip_len;
+                skip_len += more_skipped;
+                // reduce the actual size by the number of skipped elements
+                *size -= skip_len;
+            }
+        }
 
         // when all source elements are in place, we are done
         if (si >= elem_count) break;
@@ -443,20 +499,103 @@
         memmove(dest, bptr, bytes_copied);
         dest += bytes_copied;
         bptr += bytes_copied;
+        di += copy_len;
         bi += copy_len;
     }
 
-    // still source elements left? simply append them
+    // still source elements left?
     if (si < elem_count) {
-        memcpy(dest, src, elem_size * (elem_count - si));
+        if (allow_duplicates) {
+            // duplicates allowed or nothing inserted yet: simply copy everything
+            memcpy(dest, src, elem_size * (elem_count - si));
+        } else {
+            if (dest != *target) {
+                // skip all source elements that equal the last element
+                char *last = dest - elem_size;
+                while (si < elem_count) {
+                    if (last != NULL && cmp_func(last, src) == 0) {
+                        src += elem_size;
+                        si++;
+                        (*size)--;
+                    } else {
+                        break;
+                    }
+                }
+            }
+            // we must check the elements in the chunk one by one
+            while (si < elem_count) {
+                // find a chain of elements that can be copied
+                size_t copy_len = 1, skip_len = 0;
+                {
+                    const char *left_src = src;
+                    while (si + copy_len < elem_count) {
+                        const char *right_src = left_src + elem_size;
+                        int d = cmp_func(left_src,  right_src);
+                        if (d < 0) {
+                            if (skip_len > 0) {
+                                // new larger element found;
+                                // handle it in the next cycle
+                                break;
+                            }
+                            left_src += elem_size;
+                            copy_len++;
+                        } else if (d == 0) {
+                            left_src += elem_size;
+                            skip_len++;
+                        } else {
+                            break;
+                        }
+                    }
+                }
+                size_t bytes_copied = copy_len * elem_size;
+                memcpy(dest, src, bytes_copied);
+                dest += bytes_copied;
+                src += bytes_copied + skip_len * elem_size;
+                si += copy_len + skip_len;
+                di += copy_len;
+                *size -= skip_len;
+            }
+        }
     }
 
-    // still buffer elements left?
-    // don't worry, we already moved them to the correct place
+    // buffered elements need to be moved when we skipped duplicates
+    size_t total_skipped = new_size - *size;
+    if (bi < new_size && total_skipped > 0) {
+        // move the remaining buffer to the end of the array
+        memmove(dest, bptr, elem_size * (new_size - bi));
+    }
 
     return 0;
 }
 
+int cx_array_insert_sorted(
+        void **target,
+        size_t *size,
+        size_t *capacity,
+        cx_compare_func cmp_func,
+        const void *sorted_data,
+        size_t elem_size,
+        size_t elem_count,
+        CxArrayReallocator *reallocator
+) {
+    return cx_array_insert_sorted_impl(target, size, capacity,
+        cmp_func, sorted_data, elem_size, elem_count, reallocator, true);
+}
+
+int cx_array_insert_unique(
+        void **target,
+        size_t *size,
+        size_t *capacity,
+        cx_compare_func cmp_func,
+        const void *sorted_data,
+        size_t elem_size,
+        size_t elem_count,
+        CxArrayReallocator *reallocator
+) {
+    return cx_array_insert_sorted_impl(target, size, capacity,
+        cmp_func, sorted_data, elem_size, elem_count, reallocator, false);
+}
+
 size_t cx_array_binary_search_inf(
         const void *arr,
         size_t size,
@@ -502,6 +641,13 @@
         result = cmp_func(elem, arr_elem);
         if (result == 0) {
             // found it!
+            // check previous elements;
+            // when they are equal, report the smallest index
+            arr_elem -= elem_size;
+            while (pivot_index > 0 && cmp_func(elem, arr_elem) == 0) {
+                pivot_index--;
+                arr_elem -= elem_size;
+            }
             return pivot_index;
         } else if (result < 0) {
             // element is smaller than pivot, continue search left
@@ -644,7 +790,7 @@
     // guarantee enough capacity
     if (arl->capacity < list->collection.size + n) {
         size_t new_capacity = list->collection.size + n;
-        new_capacity = new_capacity - (new_capacity % 16) + 16;
+        new_capacity = cx_array_align_capacity(new_capacity, 16, SIZE_MAX);
         if (cxReallocateArray(
                 list->collection.allocator,
                 &arl->data, new_capacity,
@@ -700,6 +846,31 @@
     }
 }
 
+static size_t cx_arl_insert_unique(
+        struct cx_list_s *list,
+        const void *sorted_data,
+        size_t n
+) {
+    // get a correctly typed pointer to the list
+    cx_array_list *arl = (cx_array_list *) list;
+
+    if (cx_array_insert_unique(
+            &arl->data,
+            &list->collection.size,
+            &arl->capacity,
+            list->collection.cmpfunc,
+            sorted_data,
+            list->collection.elem_size,
+            n,
+            &arl->reallocator
+    )) {
+        // array list implementation is "all or nothing"
+        return 0;
+    } else {
+        return n;
+    }
+}
+
 static void *cx_arl_insert_element(
         struct cx_list_s *list,
         size_t index,
@@ -717,7 +888,7 @@
         const void *elem,
         int prepend
 ) {
-    struct cx_list_s *list = iter->src_handle.m;
+    struct cx_list_s *list = iter->src_handle;
     if (iter->index < list->collection.size) {
         if (cx_arl_insert_element(list,
                 iter->index + 1 - prepend, elem) == NULL) {
@@ -928,7 +1099,7 @@
 
 static bool cx_arl_iter_valid(const void *it) {
     const struct cx_iterator_s *iter = it;
-    const struct cx_list_s *list = iter->src_handle.c;
+    const struct cx_list_s *list = iter->src_handle;
     return iter->index < list->collection.size;
 }
 
@@ -941,23 +1112,25 @@
     struct cx_iterator_s *iter = it;
     if (iter->base.remove) {
         iter->base.remove = false;
-        cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL);
+        cx_arl_remove(iter->src_handle, iter->index, 1, NULL);
+        iter->elem_count--;
     } else {
         iter->index++;
         iter->elem_handle =
                 ((char *) iter->elem_handle)
-                + ((const struct cx_list_s *) iter->src_handle.c)->collection.elem_size;
+                + ((const struct cx_list_s *) iter->src_handle)->collection.elem_size;
     }
 }
 
 static void cx_arl_iter_prev(void *it) {
     struct cx_iterator_s *iter = it;
-    const cx_array_list *list = iter->src_handle.c;
     if (iter->base.remove) {
         iter->base.remove = false;
-        cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL);
+        cx_arl_remove(iter->src_handle, iter->index, 1, NULL);
+        iter->elem_count--;
     }
     iter->index--;
+    cx_array_list *list = iter->src_handle;
     if (iter->index < list->base.collection.size) {
         iter->elem_handle = ((char *) list->data)
                             + iter->index * list->base.collection.elem_size;
@@ -973,7 +1146,7 @@
     struct cx_iterator_s iter;
 
     iter.index = index;
-    iter.src_handle.c = list;
+    iter.src_handle = (void*)list;
     iter.elem_handle = cx_arl_at(list, index);
     iter.elem_size = list->collection.elem_size;
     iter.elem_count = list->collection.size;
@@ -981,7 +1154,7 @@
     iter.base.current = cx_arl_iter_current;
     iter.base.next = backwards ? cx_arl_iter_prev : cx_arl_iter_next;
     iter.base.remove = false;
-    iter.base.mutating = false;
+    iter.base.allow_remove = true;
 
     return iter;
 }
@@ -991,6 +1164,7 @@
         cx_arl_insert_element,
         cx_arl_insert_array,
         cx_arl_insert_sorted,
+        cx_arl_insert_unique,
         cx_arl_insert_iter,
         cx_arl_remove,
         cx_arl_clear,

mercurial