ucx/array_list.c

changeset 112
c3f2f16fa4b8
parent 108
77254bd6dccb
child 113
dde28a806552
--- a/ucx/array_list.c	Sat Oct 04 14:54:25 2025 +0200
+++ b/ucx/array_list.c	Sun Oct 19 21:20:08 2025 +0200
@@ -291,7 +291,7 @@
                 *target, oldcap, newcap, elem_size, reallocator
         );
         if (newmem == NULL) {
-            return 1;
+            return 1; // LCOV_EXCL_LINE
         }
 
         // repair src pointer, if necessary
@@ -335,7 +335,7 @@
     return 0;
 }
 
-int cx_array_insert_sorted(
+static int cx_array_insert_sorted_impl(
         void **target,
         size_t *size,
         size_t *capacity,
@@ -343,7 +343,8 @@
         const void *sorted_data,
         size_t elem_size,
         size_t elem_count,
-        CxArrayReallocator *reallocator
+        CxArrayReallocator *reallocator,
+        bool allow_duplicates
 ) {
     // assert pointers
     assert(target != NULL);
@@ -369,6 +370,7 @@
     // store some counts
     size_t old_size = *size;
     size_t old_capacity = *capacity;
+    // the necessary capacity is the worst case assumption, including duplicates
     size_t needed_capacity = old_size + elem_count;
 
     // if we need more than we have, try a reallocation
@@ -418,13 +420,60 @@
                 bptr,
                 cmp_func
         );
+        // binary search gives us the smallest index;
+        // we also want to include equal elements here
+        while (si + copy_len < elem_count
+                && cmp_func(bptr, src+copy_len*elem_size) == 0) {
+            copy_len++;
+        }
 
         // copy the source elements
-        bytes_copied = copy_len * elem_size;
-        memcpy(dest, src, bytes_copied);
-        dest += bytes_copied;
-        src += bytes_copied;
-        si += copy_len;
+        if (copy_len > 0) {
+            if (allow_duplicates) {
+                // we can copy the entire chunk
+                bytes_copied = copy_len * elem_size;
+                memcpy(dest, src, bytes_copied);
+                dest += bytes_copied;
+                src += bytes_copied;
+                si += copy_len;
+                di += copy_len;
+            } else {
+                // first, check the end of the source chunk
+                // for being a duplicate of the bptr
+                const char *end_of_src = src + (copy_len - 1) * elem_size;
+                size_t skip_len = 0;
+                while (copy_len > 0 && cmp_func(bptr, end_of_src) == 0) {
+                    end_of_src -= elem_size;
+                    skip_len++;
+                    copy_len--;
+                }
+                char *last = dest == *target ? NULL : dest - elem_size;
+                // then iterate through the source chunk
+                // and skip all duplicates with the last element in the array
+                size_t more_skipped = 0;
+                for (unsigned j = 0; j < copy_len; j++) {
+                    if (last != NULL && cmp_func(last, src) == 0) {
+                        // duplicate - skip
+                        src += elem_size;
+                        si++;
+                        more_skipped++;
+                    } else {
+                        memcpy(dest, src, elem_size);
+                        src += elem_size;
+                        last = dest;
+                        dest += elem_size;
+                        si++;
+                        di++;
+                    }
+                }
+                // skip the previously identified elements as well
+                src += skip_len * elem_size;
+                si += skip_len;
+                skip_len += more_skipped;
+                // reduce the actual size by the number of skipped elements
+                *size -= skip_len;
+            }
+        }
 
         // when all source elements are in place, we are done
         if (si >= elem_count) break;
@@ -443,20 +492,103 @@
         memmove(dest, bptr, bytes_copied);
         dest += bytes_copied;
         bptr += bytes_copied;
+        di += copy_len;
         bi += copy_len;
     }
 
-    // still source elements left? simply append them
+    // still source elements left?
     if (si < elem_count) {
-        memcpy(dest, src, elem_size * (elem_count - si));
+        if (allow_duplicates) {
+            // duplicates allowed or nothing inserted yet: simply copy everything
+            memcpy(dest, src, elem_size * (elem_count - si));
+        } else {
+            if (dest != *target) {
+                // skip all source elements that equal the last element
+                char *last = dest - elem_size;
+                while (si < elem_count) {
+                    if (last != NULL && cmp_func(last, src) == 0) {
+                        src += elem_size;
+                        si++;
+                        (*size)--;
+                    } else {
+                        break;
+                    }
+                }
+            }
+            // we must check the elements in the chunk one by one
+            while (si < elem_count) {
+                // find a chain of elements that can be copied
+                size_t copy_len = 1, skip_len = 0;
+                {
+                    const char *left_src = src;
+                    while (si + copy_len < elem_count) {
+                        const char *right_src = left_src + elem_size;
+                        int d = cmp_func(left_src,  right_src);
+                        if (d < 0) {
+                            if (skip_len > 0) {
+                                // new larger element found;
+                                // handle it in the next cycle
+                                break;
+                            }
+                            left_src += elem_size;
+                            copy_len++;
+                        } else if (d == 0) {
+                            left_src += elem_size;
+                            skip_len++;
+                        } else {
+                            break;
+                        }
+                    }
+                }
+                size_t bytes_copied = copy_len * elem_size;
+                memcpy(dest, src, bytes_copied);
+                dest += bytes_copied;
+                src += bytes_copied + skip_len * elem_size;
+                si += copy_len + skip_len;
+                di += copy_len;
+                *size -= skip_len;
+            }
+        }
     }
 
-    // still buffer elements left?
-    // don't worry, we already moved them to the correct place
+    // buffered elements need to be moved when we skipped duplicates
+    size_t total_skipped = new_size - *size;
+    if (bi < new_size && total_skipped > 0) {
+        // move the remaining buffer to the end of the array
+        memmove(dest, bptr, elem_size * (new_size - bi));
+    }
 
     return 0;
 }
 
+int cx_array_insert_sorted(
+        void **target,
+        size_t *size,
+        size_t *capacity,
+        cx_compare_func cmp_func,
+        const void *sorted_data,
+        size_t elem_size,
+        size_t elem_count,
+        CxArrayReallocator *reallocator
+) {
+    return cx_array_insert_sorted_impl(target, size, capacity,
+        cmp_func, sorted_data, elem_size, elem_count, reallocator, true);
+}
+
+int cx_array_insert_unique(
+        void **target,
+        size_t *size,
+        size_t *capacity,
+        cx_compare_func cmp_func,
+        const void *sorted_data,
+        size_t elem_size,
+        size_t elem_count,
+        CxArrayReallocator *reallocator
+) {
+    return cx_array_insert_sorted_impl(target, size, capacity,
+        cmp_func, sorted_data, elem_size, elem_count, reallocator, false);
+}
+
 size_t cx_array_binary_search_inf(
         const void *arr,
         size_t size,
@@ -502,6 +634,13 @@
         result = cmp_func(elem, arr_elem);
         if (result == 0) {
             // found it!
+            // check previous elements;
+            // when they are equal, report the smallest index
+            arr_elem -= elem_size;
+            while (pivot_index > 0 && cmp_func(elem, arr_elem) == 0) {
+                pivot_index--;
+                arr_elem -= elem_size;
+            }
             return pivot_index;
         } else if (result < 0) {
             // element is smaller than pivot, continue search left
@@ -700,6 +839,31 @@
     }
 }
 
+static size_t cx_arl_insert_unique(
+        struct cx_list_s *list,
+        const void *sorted_data,
+        size_t n
+) {
+    // get a correctly typed pointer to the list
+    cx_array_list *arl = (cx_array_list *) list;
+
+    if (cx_array_insert_unique(
+            &arl->data,
+            &list->collection.size,
+            &arl->capacity,
+            list->collection.cmpfunc,
+            sorted_data,
+            list->collection.elem_size,
+            n,
+            &arl->reallocator
+    )) {
+        // array list implementation is "all or nothing"
+        return 0;
+    } else {
+        return n;
+    }
+}
+
 static void *cx_arl_insert_element(
         struct cx_list_s *list,
         size_t index,
@@ -942,6 +1106,7 @@
     if (iter->base.remove) {
         iter->base.remove = false;
         cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL);
+        iter->elem_count--;
     } else {
         iter->index++;
         iter->elem_handle =
@@ -956,6 +1121,7 @@
     if (iter->base.remove) {
         iter->base.remove = false;
         cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL);
+        iter->elem_count--;
     }
     iter->index--;
     if (iter->index < list->base.collection.size) {
@@ -991,6 +1157,7 @@
         cx_arl_insert_element,
         cx_arl_insert_array,
         cx_arl_insert_sorted,
+        cx_arl_insert_unique,
         cx_arl_insert_iter,
         cx_arl_remove,
         cx_arl_clear,

mercurial