src/ucx/array_list.c

changeset 645
0c85c4cd0dd8
parent 621
956c03c25edd
--- a/src/ucx/array_list.c	Sat Nov 29 19:03:52 2025 +0100
+++ b/src/ucx/array_list.c	Sun Nov 30 18:25:55 2025 +0100
@@ -42,10 +42,11 @@
         cx_attr_unused CxArrayReallocator *alloc
 ) {
     size_t n;
+    // LCOV_EXCL_START
     if (cx_szmul(new_capacity, elem_size, &n)) {
         errno = EOVERFLOW;
         return NULL;
-    }
+    } // LCOV_EXCL_STOP
     return cxReallocDefault(array, n);
 }
 
@@ -66,10 +67,11 @@
 ) {
     // check for overflow
     size_t n;
+    // LCOV_EXCL_START
     if (cx_szmul(new_capacity, elem_size, &n)) {
         errno = EOVERFLOW;
         return NULL;
-    }
+    } // LCOV_EXCL_STOP
 
     // retrieve the pointer to the actual allocator
     const CxAllocator *al = alloc->allocator;
@@ -103,23 +105,27 @@
 // LOW LEVEL ARRAY LIST FUNCTIONS
 
 /**
- * Increases the capacity until it is a multiple of a some alignment or reaches the maximum.
+ * Intelligently calculates a new capacity, reserving some more
+ * elements than required to prevent too many allocations.
  *
- * @param cap the required capacity (must not be larger than @p max)
- * @param alignment the alignment
- * @param max the maximum capacity
- * @return the aligned capacity
+ * @param current_capacity the current capacity of the array
+ * @param needed_capacity the required capacity of the array
+ * @return the new capacity
  */
-static size_t cx_array_align_capacity(
-        size_t cap,
-        size_t alignment,
-        size_t max
+static size_t cx_array_grow_capacity(
+    size_t current_capacity,
+    size_t needed_capacity
 ) {
-    if (cap > max - alignment) {
-        return cap;
-    } else {
-        return cap - (cap % alignment) + alignment;
+    if (current_capacity >= needed_capacity) {
+        return current_capacity;
     }
+    size_t cap = needed_capacity;
+    size_t alignment;
+    if (cap < 128) alignment = 16;
+    else if (cap < 1024) alignment = 64;
+    else if (cap < 8192) alignment = 512;
+    else alignment = 1024;
+    return cap - (cap % alignment) + alignment;
 }
 
 int cx_array_reserve(
@@ -184,10 +190,6 @@
 
     // reallocate if possible
     if (newcap > oldcap) {
-        // calculate new capacity (next number divisible by 16)
-        newcap = cx_array_align_capacity(newcap, 16, max_size);
-
-        // perform reallocation
         void *newmem = reallocator->realloc(
                 *array, oldcap, newcap, elem_size, reallocator
         );
@@ -277,22 +279,18 @@
     }
 
     // check if resize is required
-    size_t minsize = index + elem_count;
-    size_t newsize = oldsize < minsize ? minsize : oldsize;
+    const size_t minsize = index + elem_count;
+    const size_t newsize = oldsize < minsize ? minsize : oldsize;
 
-    // reallocate if possible
-    size_t newcap = oldcap;
-    if (newsize > oldcap) {
-        // check, if we need to repair the src pointer
+    // reallocate if necessary
+    const size_t newcap = cx_array_grow_capacity(oldcap, newsize);
+    if (newcap > oldcap) {
+        // check if we need to repair the src pointer
         uintptr_t targetaddr = (uintptr_t) *target;
         uintptr_t srcaddr = (uintptr_t) src;
         bool repairsrc = targetaddr <= srcaddr
                          && srcaddr < targetaddr + oldcap * elem_size;
 
-        // calculate new capacity (next number divisible by 16)
-        newcap = cx_array_align_capacity(newsize, 16, max_size);
-        assert(newcap > newsize);
-
         // perform reallocation
         void *newmem = reallocator->realloc(
                 *target, oldcap, newcap, elem_size, reallocator
@@ -369,22 +367,23 @@
     if (elem_count == 0) return 0;
 
     // overflow check
+    // LCOV_EXCL_START
     if (elem_count > SIZE_MAX - *size) {
         errno = EOVERFLOW;
         return 1;
     }
+    // LCOV_EXCL_STOP
 
     // store some counts
-    size_t old_size = *size;
-    size_t old_capacity = *capacity;
+    const size_t old_size = *size;
+    const size_t old_capacity = *capacity;
     // the necessary capacity is the worst case assumption, including duplicates
-    size_t needed_capacity = old_size + elem_count;
+    const size_t needed_capacity = cx_array_grow_capacity(old_capacity, old_size + elem_count);
 
     // if we need more than we have, try a reallocation
     if (needed_capacity > old_capacity) {
-        size_t new_capacity = cx_array_align_capacity(needed_capacity, 16, SIZE_MAX);
         void *new_mem = reallocator->realloc(
-                *target, old_capacity, new_capacity, elem_size, reallocator
+                *target, old_capacity, needed_capacity, elem_size, reallocator
         );
         if (new_mem == NULL) {
             // give it up right away, there is no contract
@@ -392,7 +391,7 @@
             return 1;  // LCOV_EXCL_LINE
         }
         *target = new_mem;
-        *capacity = new_capacity;
+        *capacity = needed_capacity;
     }
 
     // now we have guaranteed that we can insert everything
@@ -418,21 +417,23 @@
     // while there are both source and buffered elements left,
     // copy them interleaving
     while (si < elem_count && bi < new_size) {
-        // determine how many source elements can be inserted
+        // determine how many source elements can be inserted.
+        // the first element that shall not be inserted is the smallest element
+        // that is strictly larger than the first buffered element
+        // (located at the index of the infimum plus one).
+        // the infimum is guaranteed to exist:
+        // - if all src elements are larger,
+        //   there is no buffer, and this loop is skipped
+        // - if any src element is smaller or equal, the infimum exists
+        // - when all src elements that are smaller are copied, the second part
+        //   of this loop body will copy the remaining buffer (emptying it)
+        // Therefore, the buffer can never contain an element that is smaller
+        // than any element in the source and the infimum exists.
         size_t copy_len, bytes_copied;
-        copy_len = cx_array_binary_search_sup(
-                src,
-                elem_count - si,
-                elem_size,
-                bptr,
-                cmp_func
+        copy_len = cx_array_binary_search_inf(
+            src, elem_count - si, elem_size, bptr, cmp_func
         );
-        // binary search gives us the smallest index;
-        // we also want to include equal elements here
-        while (si + copy_len < elem_count
-                && cmp_func(bptr, src+copy_len*elem_size) == 0) {
-            copy_len++;
-        }
+        copy_len++;
 
         // copy the source elements
         if (copy_len > 0) {
@@ -509,26 +510,17 @@
             // duplicates allowed or nothing inserted yet: simply copy everything
             memcpy(dest, src, elem_size * (elem_count - si));
         } else {
-            if (dest != *target) {
-                // skip all source elements that equal the last element
-                char *last = dest - elem_size;
-                while (si < elem_count) {
-                    if (last != NULL && cmp_func(last, src) == 0) {
-                        src += elem_size;
-                        si++;
-                        (*size)--;
-                    } else {
-                        break;
-                    }
-                }
-            }
-            // we must check the elements in the chunk one by one
+            // we must check the remaining source elements one by one
+            // to skip the duplicates.
+            // Note that no source element can equal the last element in the
+            // destination, because that would have created an insertion point
+            // and a buffer, s.t. the above loop already handled the duplicates
             while (si < elem_count) {
                 // find a chain of elements that can be copied
                 size_t copy_len = 1, skip_len = 0;
                 {
                     const char *left_src = src;
-                    while (si + copy_len < elem_count) {
+                    while (si + copy_len + skip_len < elem_count) {
                         const char *right_src = left_src + elem_size;
                         int d = cmp_func(left_src,  right_src);
                         if (d < 0) {
@@ -596,7 +588,8 @@
         cmp_func, sorted_data, elem_size, elem_count, reallocator, false);
 }
 
-size_t cx_array_binary_search_inf(
+// implementation that finds ANY index
+static size_t cx_array_binary_search_inf_impl(
         const void *arr,
         size_t size,
         size_t elem_size,
@@ -641,13 +634,6 @@
         result = cmp_func(elem, arr_elem);
         if (result == 0) {
             // found it!
-            // check previous elements;
-            // when they are equal, report the smallest index
-            arr_elem -= elem_size;
-            while (pivot_index > 0 && cmp_func(elem, arr_elem) == 0) {
-                pivot_index--;
-                arr_elem -= elem_size;
-            }
             return pivot_index;
         } else if (result < 0) {
             // element is smaller than pivot, continue search left
@@ -662,6 +648,24 @@
     return result < 0 ? (pivot_index - 1) : pivot_index;
 }
 
+size_t cx_array_binary_search_inf(
+        const void *arr,
+        size_t size,
+        size_t elem_size,
+        const void *elem,
+        cx_compare_func cmp_func
+) {
+    size_t index = cx_array_binary_search_inf_impl(
+        arr, size, elem_size, elem, cmp_func);
+    // in case of equality, report the largest index
+    const char *e = ((const char *) arr) + (index + 1) * elem_size;
+    while (index + 1 < size && cmp_func(e, elem) == 0) {
+        e += elem_size;
+        index++;
+    }
+    return index;
+}
+
 size_t cx_array_binary_search(
         const void *arr,
         size_t size,
@@ -687,16 +691,25 @@
         const void *elem,
         cx_compare_func cmp_func
 ) {
-    size_t inf = cx_array_binary_search_inf(
+    size_t index = cx_array_binary_search_inf_impl(
             arr, size, elem_size, elem, cmp_func
     );
-    if (inf == size) {
-        // no infimum means, first element is supremum
+    const char *e = ((const char *) arr) + index * elem_size;
+    if (index == size) {
+        // no infimum means the first element is supremum
         return 0;
-    } else if (cmp_func(((const char *) arr) + inf * elem_size, elem) == 0) {
-        return inf;
+    } else if (cmp_func(e, elem) == 0) {
+        // found an equal element, search the smallest index
+        e -= elem_size; // e now contains the element at index-1
+        while (index > 0 && cmp_func(e, elem) == 0) {
+            e -= elem_size;
+            index--;
+        }
+        return index;
     } else {
-        return inf + 1;
+        // we already have the largest index of the infimum (by design)
+        // the next element is the supremum (or there is no supremum)
+        return index + 1;
     }
 }
 
@@ -789,14 +802,13 @@
 
     // guarantee enough capacity
     if (arl->capacity < list->collection.size + n) {
-        size_t new_capacity = list->collection.size + n;
-        new_capacity = cx_array_align_capacity(new_capacity, 16, SIZE_MAX);
+        const size_t new_capacity = cx_array_grow_capacity(arl->capacity,list->collection.size + n);
         if (cxReallocateArray(
                 list->collection.allocator,
                 &arl->data, new_capacity,
                 list->collection.elem_size)
         ) {
-            return 0;
+            return 0; // LCOV_EXCL_LINE
         }
         arl->capacity = new_capacity;
     }
@@ -840,7 +852,7 @@
             &arl->reallocator
     )) {
         // array list implementation is "all or nothing"
-        return 0;
+        return 0;  // LCOV_EXCL_LINE
     } else {
         return n;
     }
@@ -865,7 +877,7 @@
             &arl->reallocator
     )) {
         // array list implementation is "all or nothing"
-        return 0;
+        return 0;  // LCOV_EXCL_LINE
     } else {
         return n;
     }
@@ -892,7 +904,7 @@
     if (iter->index < list->collection.size) {
         if (cx_arl_insert_element(list,
                 iter->index + 1 - prepend, elem) == NULL) {
-            return 1;
+            return 1; // LCOV_EXCL_LINE
         }
         iter->elem_count++;
         if (prepend != 0) {
@@ -902,7 +914,7 @@
         return 0;
     } else {
         if (cx_arl_insert_element(list, list->collection.size, elem) == NULL) {
-            return 1;
+            return 1;  // LCOV_EXCL_LINE
         }
         iter->elem_count++;
         iter->index = list->collection.size;
@@ -1137,6 +1149,14 @@
     }
 }
 
+static int cx_arl_change_capacity(
+        struct cx_list_s *list,
+        size_t new_capacity
+) {
+    cx_array_list *arl = (cx_array_list *)list;
+    return cxReallocateArray(list->collection.allocator,
+        &arl->data, new_capacity, list->collection.elem_size);
+}
 
 static struct cx_iterator_s cx_arl_iterator(
         const struct cx_list_s *list,
@@ -1174,6 +1194,7 @@
         cx_arl_sort,
         cx_arl_compare,
         cx_arl_reverse,
+        cx_arl_change_capacity,
         cx_arl_iterator,
 };
 

mercurial