ucx/array_list.c

changeset 888
af685cc9d623
parent 854
1c8401ece69e
--- a/ucx/array_list.c	Sun Aug 31 14:39:13 2025 +0200
+++ b/ucx/array_list.c	Sat Nov 08 23:06:11 2025 +0100
@@ -36,20 +36,21 @@
 
 static void *cx_array_default_realloc(
         void *array,
-        size_t capacity,
+        cx_attr_unused size_t old_capacity,
+        size_t new_capacity,
         size_t elem_size,
         cx_attr_unused CxArrayReallocator *alloc
 ) {
     size_t n;
-    if (cx_szmul(capacity, elem_size, &n)) {
+    if (cx_szmul(new_capacity, elem_size, &n)) {
         errno = EOVERFLOW;
         return NULL;
     }
-    return realloc(array, n);
+    return cxReallocDefault(array, n);
 }
 
 CxArrayReallocator cx_array_default_reallocator_impl = {
-        cx_array_default_realloc, NULL, NULL, 0, 0
+        cx_array_default_realloc, NULL, NULL
 };
 
 CxArrayReallocator *cx_array_default_reallocator = &cx_array_default_reallocator_impl;
@@ -58,26 +59,27 @@
 
 static void *cx_array_advanced_realloc(
         void *array,
-        size_t capacity,
+        size_t old_capacity,
+        size_t new_capacity,
         size_t elem_size,
         cx_attr_unused CxArrayReallocator *alloc
 ) {
     // check for overflow
     size_t n;
-    if (cx_szmul(capacity, elem_size, &n)) {
+    if (cx_szmul(new_capacity, elem_size, &n)) {
         errno = EOVERFLOW;
         return NULL;
     }
 
     // retrieve the pointer to the actual allocator
-    const CxAllocator *al = alloc->ptr1;
+    const CxAllocator *al = alloc->allocator;
 
     // check if the array is still located on the stack
     void *newmem;
-    if (array == alloc->ptr2) {
+    if (array == alloc->stack_ptr) {
         newmem = cxMalloc(al, n);
         if (newmem != NULL && array != NULL) {
-            memcpy(newmem, array, n);
+            memcpy(newmem, array, old_capacity*elem_size);
         }
     } else {
         newmem = cxRealloc(al, array, n);
@@ -87,20 +89,27 @@
 
 struct cx_array_reallocator_s cx_array_reallocator(
         const struct cx_allocator_s *allocator,
-        const void *stackmem
+        const void *stack_ptr
 ) {
     if (allocator == NULL) {
         allocator = cxDefaultAllocator;
     }
     return (struct cx_array_reallocator_s) {
             cx_array_advanced_realloc,
-            (void*) allocator, (void*) stackmem,
-            0, 0
+            allocator, stack_ptr,
     };
 }
 
 // LOW LEVEL ARRAY LIST FUNCTIONS
 
+/**
+ * Increases the capacity until it is a multiple of a some alignment or reaches the maximum.
+ *
+ * @param cap the required capacity (must not be larger than @p max)
+ * @param alignment the alignment
+ * @param max the maximum capacity
+ * @return the aligned capacity
+ */
 static size_t cx_array_align_capacity(
         size_t cap,
         size_t alignment,
@@ -180,7 +189,7 @@
 
         // perform reallocation
         void *newmem = reallocator->realloc(
-                *array, newcap, elem_size, reallocator
+                *array, oldcap, newcap, elem_size, reallocator
         );
         if (newmem == NULL) {
             return 1; // LCOV_EXCL_LINE
@@ -286,10 +295,10 @@
 
         // perform reallocation
         void *newmem = reallocator->realloc(
-                *target, newcap, elem_size, reallocator
+                *target, oldcap, newcap, elem_size, reallocator
         );
         if (newmem == NULL) {
-            return 1;
+            return 1; // LCOV_EXCL_LINE
         }
 
         // repair src pointer, if necessary
@@ -333,7 +342,7 @@
     return 0;
 }
 
-int cx_array_insert_sorted(
+static int cx_array_insert_sorted_impl(
         void **target,
         size_t *size,
         size_t *capacity,
@@ -341,7 +350,8 @@
         const void *sorted_data,
         size_t elem_size,
         size_t elem_count,
-        CxArrayReallocator *reallocator
+        CxArrayReallocator *reallocator,
+        bool allow_duplicates
 ) {
     // assert pointers
     assert(target != NULL);
@@ -366,13 +376,15 @@
 
     // store some counts
     size_t old_size = *size;
+    size_t old_capacity = *capacity;
+    // the necessary capacity is the worst case assumption, including duplicates
     size_t needed_capacity = old_size + elem_count;
 
     // if we need more than we have, try a reallocation
-    if (needed_capacity > *capacity) {
+    if (needed_capacity > old_capacity) {
         size_t new_capacity = cx_array_align_capacity(needed_capacity, 16, SIZE_MAX);
         void *new_mem = reallocator->realloc(
-                *target, new_capacity, elem_size, reallocator
+                *target, old_capacity, new_capacity, elem_size, reallocator
         );
         if (new_mem == NULL) {
             // give it up right away, there is no contract
@@ -415,13 +427,60 @@
                 bptr,
                 cmp_func
         );
+        // binary search gives us the smallest index;
+        // we also want to include equal elements here
+        while (si + copy_len < elem_count
+                && cmp_func(bptr, src+copy_len*elem_size) == 0) {
+            copy_len++;
+        }
 
         // copy the source elements
-        bytes_copied = copy_len * elem_size;
-        memcpy(dest, src, bytes_copied);
-        dest += bytes_copied;
-        src += bytes_copied;
-        si += copy_len;
+        if (copy_len > 0) {
+            if (allow_duplicates) {
+                // we can copy the entire chunk
+                bytes_copied = copy_len * elem_size;
+                memcpy(dest, src, bytes_copied);
+                dest += bytes_copied;
+                src += bytes_copied;
+                si += copy_len;
+                di += copy_len;
+            } else {
+                // first, check the end of the source chunk
+                // for being a duplicate of the bptr
+                const char *end_of_src = src + (copy_len - 1) * elem_size;
+                size_t skip_len = 0;
+                while (copy_len > 0 && cmp_func(bptr, end_of_src) == 0) {
+                    end_of_src -= elem_size;
+                    skip_len++;
+                    copy_len--;
+                }
+                char *last = dest == *target ? NULL : dest - elem_size;
+                // then iterate through the source chunk
+                // and skip all duplicates with the last element in the array
+                size_t more_skipped = 0;
+                for (unsigned j = 0; j < copy_len; j++) {
+                    if (last != NULL && cmp_func(last, src) == 0) {
+                        // duplicate - skip
+                        src += elem_size;
+                        si++;
+                        more_skipped++;
+                    } else {
+                        memcpy(dest, src, elem_size);
+                        src += elem_size;
+                        last = dest;
+                        dest += elem_size;
+                        si++;
+                        di++;
+                    }
+                }
+                // skip the previously identified elements as well
+                src += skip_len * elem_size;
+                si += skip_len;
+                skip_len += more_skipped;
+                // reduce the actual size by the number of skipped elements
+                *size -= skip_len;
+            }
+        }
 
         // when all source elements are in place, we are done
         if (si >= elem_count) break;
@@ -440,20 +499,103 @@
         memmove(dest, bptr, bytes_copied);
         dest += bytes_copied;
         bptr += bytes_copied;
+        di += copy_len;
         bi += copy_len;
     }
 
-    // still source elements left? simply append them
+    // still source elements left?
     if (si < elem_count) {
-        memcpy(dest, src, elem_size * (elem_count - si));
+        if (allow_duplicates) {
+            // duplicates allowed or nothing inserted yet: simply copy everything
+            memcpy(dest, src, elem_size * (elem_count - si));
+        } else {
+            if (dest != *target) {
+                // skip all source elements that equal the last element
+                char *last = dest - elem_size;
+                while (si < elem_count) {
+                    if (last != NULL && cmp_func(last, src) == 0) {
+                        src += elem_size;
+                        si++;
+                        (*size)--;
+                    } else {
+                        break;
+                    }
+                }
+            }
+            // we must check the elements in the chunk one by one
+            while (si < elem_count) {
+                // find a chain of elements that can be copied
+                size_t copy_len = 1, skip_len = 0;
+                {
+                    const char *left_src = src;
+                    while (si + copy_len < elem_count) {
+                        const char *right_src = left_src + elem_size;
+                        int d = cmp_func(left_src,  right_src);
+                        if (d < 0) {
+                            if (skip_len > 0) {
+                                // new larger element found;
+                                // handle it in the next cycle
+                                break;
+                            }
+                            left_src += elem_size;
+                            copy_len++;
+                        } else if (d == 0) {
+                            left_src += elem_size;
+                            skip_len++;
+                        } else {
+                            break;
+                        }
+                    }
+                }
+                size_t bytes_copied = copy_len * elem_size;
+                memcpy(dest, src, bytes_copied);
+                dest += bytes_copied;
+                src += bytes_copied + skip_len * elem_size;
+                si += copy_len + skip_len;
+                di += copy_len;
+                *size -= skip_len;
+            }
+        }
     }
 
-    // still buffer elements left?
-    // don't worry, we already moved them to the correct place
+    // buffered elements need to be moved when we skipped duplicates
+    size_t total_skipped = new_size - *size;
+    if (bi < new_size && total_skipped > 0) {
+        // move the remaining buffer to the end of the array
+        memmove(dest, bptr, elem_size * (new_size - bi));
+    }
 
     return 0;
 }
 
+int cx_array_insert_sorted(
+        void **target,
+        size_t *size,
+        size_t *capacity,
+        cx_compare_func cmp_func,
+        const void *sorted_data,
+        size_t elem_size,
+        size_t elem_count,
+        CxArrayReallocator *reallocator
+) {
+    return cx_array_insert_sorted_impl(target, size, capacity,
+        cmp_func, sorted_data, elem_size, elem_count, reallocator, true);
+}
+
+int cx_array_insert_unique(
+        void **target,
+        size_t *size,
+        size_t *capacity,
+        cx_compare_func cmp_func,
+        const void *sorted_data,
+        size_t elem_size,
+        size_t elem_count,
+        CxArrayReallocator *reallocator
+) {
+    return cx_array_insert_sorted_impl(target, size, capacity,
+        cmp_func, sorted_data, elem_size, elem_count, reallocator, false);
+}
+
 size_t cx_array_binary_search_inf(
         const void *arr,
         size_t size,
@@ -499,6 +641,13 @@
         result = cmp_func(elem, arr_elem);
         if (result == 0) {
             // found it!
+            // check previous elements;
+            // when they are equal, report the smallest index
+            arr_elem -= elem_size;
+            while (pivot_index > 0 && cmp_func(elem, arr_elem) == 0) {
+                pivot_index--;
+                arr_elem -= elem_size;
+            }
             return pivot_index;
         } else if (result < 0) {
             // element is smaller than pivot, continue search left
@@ -572,7 +721,7 @@
 
     // decide if we can use the local buffer
     if (elem_size > CX_ARRAY_SWAP_SBO_SIZE) {
-        tmp = malloc(elem_size);
+        tmp = cxMallocDefault(elem_size);
         // we don't want to enforce error handling
         if (tmp == NULL) abort();
     } else {
@@ -591,7 +740,7 @@
 
     // free dynamic memory, if it was needed
     if (tmp != sbo_mem) {
-        free(tmp);
+        cxFreeDefault(tmp);
     }
 }
 
@@ -638,50 +787,38 @@
     // get a correctly typed pointer to the list
     cx_array_list *arl = (cx_array_list *) list;
 
-    // do we need to move some elements?
-    if (index < list->collection.size) {
-        const char *first_to_move = (const char *) arl->data;
-        first_to_move += index * list->collection.elem_size;
-        size_t elems_to_move = list->collection.size - index;
-        size_t start_of_moved = index + n;
-
-        if (cx_array_copy(
-                &arl->data,
-                &list->collection.size,
-                &arl->capacity,
-                0,
-                start_of_moved,
-                first_to_move,
-                list->collection.elem_size,
-                elems_to_move,
-                &arl->reallocator
-        )) {
-            // if moving existing elems is unsuccessful, abort
+    // guarantee enough capacity
+    if (arl->capacity < list->collection.size + n) {
+        size_t new_capacity = list->collection.size + n;
+        new_capacity = cx_array_align_capacity(new_capacity, 16, SIZE_MAX);
+        if (cxReallocateArray(
+                list->collection.allocator,
+                &arl->data, new_capacity,
+                list->collection.elem_size)
+        ) {
             return 0;
         }
+        arl->capacity = new_capacity;
     }
 
-    // note that if we had to move the elements, the following operation
-    // is guaranteed to succeed, because we have the memory already allocated
-    // therefore, it is impossible to leave this function with an invalid array
+    // determine insert position
+    char *arl_data = arl->data;
+    char *insert_pos = arl_data + index * list->collection.elem_size;
 
-    // place the new elements
-    if (cx_array_copy(
-            &arl->data,
-            &list->collection.size,
-            &arl->capacity,
-            0,
-            index,
-            array,
-            list->collection.elem_size,
-            n,
-            &arl->reallocator
-    )) {
-        // array list implementation is "all or nothing"
-        return 0;
-    } else {
-        return n;
+    // do we need to move some elements?
+    if (index < list->collection.size) {
+        size_t elems_to_move = list->collection.size - index;
+        char *target = insert_pos + n * list->collection.elem_size;
+        memmove(target, insert_pos, elems_to_move * list->collection.elem_size);
     }
+
+    // place the new elements, if any
+    if (array != NULL) {
+        memcpy(insert_pos, array, n * list->collection.elem_size);
+    }
+    list->collection.size += n;
+
+    return n;
 }
 
 static size_t cx_arl_insert_sorted(
@@ -709,12 +846,41 @@
     }
 }
 
-static int cx_arl_insert_element(
+static size_t cx_arl_insert_unique(
+        struct cx_list_s *list,
+        const void *sorted_data,
+        size_t n
+) {
+    // get a correctly typed pointer to the list
+    cx_array_list *arl = (cx_array_list *) list;
+
+    if (cx_array_insert_unique(
+            &arl->data,
+            &list->collection.size,
+            &arl->capacity,
+            list->collection.cmpfunc,
+            sorted_data,
+            list->collection.elem_size,
+            n,
+            &arl->reallocator
+    )) {
+        // array list implementation is "all or nothing"
+        return 0;
+    } else {
+        return n;
+    }
+}
+
+static void *cx_arl_insert_element(
         struct cx_list_s *list,
         size_t index,
         const void *element
 ) {
-    return 1 != cx_arl_insert_array(list, index, element, 1);
+    if (cx_arl_insert_array(list, index, element, 1) == 1) {
+        return ((char*)((cx_array_list *) list)->data) + index * list->collection.elem_size;
+    } else {
+        return NULL;
+    }
 }
 
 static int cx_arl_insert_iter(
@@ -722,28 +888,25 @@
         const void *elem,
         int prepend
 ) {
-    struct cx_list_s *list = iter->src_handle.m;
+    struct cx_list_s *list = iter->src_handle;
     if (iter->index < list->collection.size) {
-        int result = cx_arl_insert_element(
-                list,
-                iter->index + 1 - prepend,
-                elem
-        );
-        if (result == 0) {
-            iter->elem_count++;
-            if (prepend != 0) {
-                iter->index++;
-                iter->elem_handle = ((char *) iter->elem_handle) + list->collection.elem_size;
-            }
+        if (cx_arl_insert_element(list,
+                iter->index + 1 - prepend, elem) == NULL) {
+            return 1;
+        }
+        iter->elem_count++;
+        if (prepend != 0) {
+            iter->index++;
+            iter->elem_handle = ((char *) iter->elem_handle) + list->collection.elem_size;
         }
-        return result;
+        return 0;
     } else {
-        int result = cx_arl_insert_element(list, list->collection.size, elem);
-        if (result == 0) {
-            iter->elem_count++;
-            iter->index = list->collection.size;
+        if (cx_arl_insert_element(list, list->collection.size, elem) == NULL) {
+            return 1;
         }
-        return result;
+        iter->elem_count++;
+        iter->index = list->collection.size;
+        return 0;
     }
 }
 
@@ -936,7 +1099,7 @@
 
 static bool cx_arl_iter_valid(const void *it) {
     const struct cx_iterator_s *iter = it;
-    const struct cx_list_s *list = iter->src_handle.c;
+    const struct cx_list_s *list = iter->src_handle;
     return iter->index < list->collection.size;
 }
 
@@ -949,23 +1112,25 @@
     struct cx_iterator_s *iter = it;
     if (iter->base.remove) {
         iter->base.remove = false;
-        cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL);
+        cx_arl_remove(iter->src_handle, iter->index, 1, NULL);
+        iter->elem_count--;
     } else {
         iter->index++;
         iter->elem_handle =
                 ((char *) iter->elem_handle)
-                + ((const struct cx_list_s *) iter->src_handle.c)->collection.elem_size;
+                + ((const struct cx_list_s *) iter->src_handle)->collection.elem_size;
     }
 }
 
 static void cx_arl_iter_prev(void *it) {
     struct cx_iterator_s *iter = it;
-    const cx_array_list *list = iter->src_handle.c;
     if (iter->base.remove) {
         iter->base.remove = false;
-        cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL);
+        cx_arl_remove(iter->src_handle, iter->index, 1, NULL);
+        iter->elem_count--;
     }
     iter->index--;
+    cx_array_list *list = iter->src_handle;
     if (iter->index < list->base.collection.size) {
         iter->elem_handle = ((char *) list->data)
                             + iter->index * list->base.collection.elem_size;
@@ -981,7 +1146,7 @@
     struct cx_iterator_s iter;
 
     iter.index = index;
-    iter.src_handle.c = list;
+    iter.src_handle = (void*)list;
     iter.elem_handle = cx_arl_at(list, index);
     iter.elem_size = list->collection.elem_size;
     iter.elem_count = list->collection.size;
@@ -989,7 +1154,7 @@
     iter.base.current = cx_arl_iter_current;
     iter.base.next = backwards ? cx_arl_iter_prev : cx_arl_iter_next;
     iter.base.remove = false;
-    iter.base.mutating = false;
+    iter.base.allow_remove = true;
 
     return iter;
 }
@@ -999,6 +1164,7 @@
         cx_arl_insert_element,
         cx_arl_insert_array,
         cx_arl_insert_sorted,
+        cx_arl_insert_unique,
         cx_arl_insert_iter,
         cx_arl_remove,
         cx_arl_clear,

mercurial