ucx/array_list.c

changeset 23
b26390e77237
parent 22
112b85020dc9
--- a/ucx/array_list.c	Wed Nov 12 18:37:58 2025 +0100
+++ b/ucx/array_list.c	Tue Dec 09 12:13:43 2025 +0100
@@ -42,10 +42,11 @@
         cx_attr_unused CxArrayReallocator *alloc
 ) {
     size_t n;
+    // LCOV_EXCL_START
     if (cx_szmul(new_capacity, elem_size, &n)) {
         errno = EOVERFLOW;
         return NULL;
-    }
+    } // LCOV_EXCL_STOP
     return cxReallocDefault(array, n);
 }
 
@@ -66,10 +67,11 @@
 ) {
     // check for overflow
     size_t n;
+    // LCOV_EXCL_START
     if (cx_szmul(new_capacity, elem_size, &n)) {
         errno = EOVERFLOW;
         return NULL;
-    }
+    } // LCOV_EXCL_STOP
 
     // retrieve the pointer to the actual allocator
     const CxAllocator *al = alloc->allocator;
@@ -108,13 +110,11 @@
  *
  * @param current_capacity the current capacity of the array
  * @param needed_capacity the required capacity of the array
- * @param maximum_capacity the maximum capacity (given by the data type)
  * @return the new capacity
  */
 static size_t cx_array_grow_capacity(
     size_t current_capacity,
-    size_t needed_capacity,
-    size_t maximum_capacity
+    size_t needed_capacity
 ) {
     if (current_capacity >= needed_capacity) {
         return current_capacity;
@@ -125,12 +125,7 @@
     else if (cap < 1024) alignment = 64;
     else if (cap < 8192) alignment = 512;
     else alignment = 1024;
-
-    if (cap - 1 > maximum_capacity - alignment) {
-        return maximum_capacity;
-    } else {
-        return cap - (cap % alignment) + alignment;
-    }
+    return cap - (cap % alignment) + alignment;
 }
 
 int cx_array_reserve(
@@ -288,7 +283,7 @@
     const size_t newsize = oldsize < minsize ? minsize : oldsize;
 
     // reallocate if necessary
-    const size_t newcap = cx_array_grow_capacity(oldcap, newsize, max_size);
+    const size_t newcap = cx_array_grow_capacity(oldcap, newsize);
     if (newcap > oldcap) {
         // check if we need to repair the src pointer
         uintptr_t targetaddr = (uintptr_t) *target;
@@ -372,17 +367,18 @@
     if (elem_count == 0) return 0;
 
     // overflow check
+    // LCOV_EXCL_START
     if (elem_count > SIZE_MAX - *size) {
         errno = EOVERFLOW;
         return 1;
     }
+    // LCOV_EXCL_STOP
 
     // store some counts
     const size_t old_size = *size;
     const size_t old_capacity = *capacity;
     // the necessary capacity is the worst case assumption, including duplicates
-    const size_t needed_capacity = cx_array_grow_capacity(old_capacity,
-        old_size + elem_count, SIZE_MAX);
+    const size_t needed_capacity = cx_array_grow_capacity(old_capacity, old_size + elem_count);
 
     // if we need more than we have, try a reallocation
     if (needed_capacity > old_capacity) {
@@ -421,21 +417,23 @@
     // while there are both source and buffered elements left,
     // copy them interleaving
     while (si < elem_count && bi < new_size) {
-        // determine how many source elements can be inserted
+        // determine how many source elements can be inserted.
+        // the first element that shall not be inserted is the smallest element
+        // that is strictly larger than the first buffered element
+        // (located at the index of the infimum plus one).
+        // the infimum is guaranteed to exist:
+        // - if all src elements are larger,
+        //   there is no buffer, and this loop is skipped
+        // - if any src element is smaller or equal, the infimum exists
+        // - when all src elements that are smaller are copied, the second part
+        //   of this loop body will copy the remaining buffer (emptying it)
+        // Therefore, the buffer can never contain an element that is smaller
+        // than any element in the source and the infimum exists.
         size_t copy_len, bytes_copied;
-        copy_len = cx_array_binary_search_sup(
-                src,
-                elem_count - si,
-                elem_size,
-                bptr,
-                cmp_func
+        copy_len = cx_array_binary_search_inf(
+            src, elem_count - si, elem_size, bptr, cmp_func
         );
-        // binary search gives us the smallest index;
-        // we also want to include equal elements here
-        while (si + copy_len < elem_count
-                && cmp_func(bptr, src+copy_len*elem_size) == 0) {
-            copy_len++;
-        }
+        copy_len++;
 
         // copy the source elements
         if (copy_len > 0) {
@@ -512,26 +510,17 @@
             // duplicates allowed or nothing inserted yet: simply copy everything
             memcpy(dest, src, elem_size * (elem_count - si));
         } else {
-            if (dest != *target) {
-                // skip all source elements that equal the last element
-                char *last = dest - elem_size;
-                while (si < elem_count) {
-                    if (last != NULL && cmp_func(last, src) == 0) {
-                        src += elem_size;
-                        si++;
-                        (*size)--;
-                    } else {
-                        break;
-                    }
-                }
-            }
-            // we must check the elements in the chunk one by one
+            // we must check the remaining source elements one by one
+            // to skip the duplicates.
+            // Note that no source element can equal the last element in the
+            // destination, because that would have created an insertion point
+            // and a buffer, s.t. the above loop already handled the duplicates
             while (si < elem_count) {
                 // find a chain of elements that can be copied
                 size_t copy_len = 1, skip_len = 0;
                 {
                     const char *left_src = src;
-                    while (si + copy_len < elem_count) {
+                    while (si + copy_len + skip_len < elem_count) {
                         const char *right_src = left_src + elem_size;
                         int d = cmp_func(left_src,  right_src);
                         if (d < 0) {
@@ -599,7 +588,8 @@
         cmp_func, sorted_data, elem_size, elem_count, reallocator, false);
 }
 
-size_t cx_array_binary_search_inf(
+// implementation that finds ANY index
+static size_t cx_array_binary_search_inf_impl(
         const void *arr,
         size_t size,
         size_t elem_size,
@@ -644,13 +634,6 @@
         result = cmp_func(elem, arr_elem);
         if (result == 0) {
             // found it!
-            // check previous elements;
-            // when they are equal, report the smallest index
-            arr_elem -= elem_size;
-            while (pivot_index > 0 && cmp_func(elem, arr_elem) == 0) {
-                pivot_index--;
-                arr_elem -= elem_size;
-            }
             return pivot_index;
         } else if (result < 0) {
             // element is smaller than pivot, continue search left
@@ -665,6 +648,24 @@
     return result < 0 ? (pivot_index - 1) : pivot_index;
 }
 
+size_t cx_array_binary_search_inf(
+        const void *arr,
+        size_t size,
+        size_t elem_size,
+        const void *elem,
+        cx_compare_func cmp_func
+) {
+    size_t index = cx_array_binary_search_inf_impl(
+        arr, size, elem_size, elem, cmp_func);
+    // in case of equality, report the largest index
+    const char *e = ((const char *) arr) + (index + 1) * elem_size;
+    while (index + 1 < size && cmp_func(e, elem) == 0) {
+        e += elem_size;
+        index++;
+    }
+    return index;
+}
+
 size_t cx_array_binary_search(
         const void *arr,
         size_t size,
@@ -690,16 +691,25 @@
         const void *elem,
         cx_compare_func cmp_func
 ) {
-    size_t inf = cx_array_binary_search_inf(
+    size_t index = cx_array_binary_search_inf_impl(
             arr, size, elem_size, elem, cmp_func
     );
-    if (inf == size) {
-        // no infimum means, first element is supremum
+    const char *e = ((const char *) arr) + index * elem_size;
+    if (index == size) {
+        // no infimum means the first element is supremum
         return 0;
-    } else if (cmp_func(((const char *) arr) + inf * elem_size, elem) == 0) {
-        return inf;
+    } else if (cmp_func(e, elem) == 0) {
+        // found an equal element, search the smallest index
+        e -= elem_size; // e now contains the element at index-1
+        while (index > 0 && cmp_func(e, elem) == 0) {
+            e -= elem_size;
+            index--;
+        }
+        return index;
     } else {
-        return inf + 1;
+        // we already have the largest index of the infimum (by design)
+        // the next element is the supremum (or there is no supremum)
+        return index + 1;
     }
 }
 
@@ -792,14 +802,13 @@
 
     // guarantee enough capacity
     if (arl->capacity < list->collection.size + n) {
-        const size_t new_capacity = cx_array_grow_capacity(arl->capacity,
-            list->collection.size + n, SIZE_MAX);
+        const size_t new_capacity = cx_array_grow_capacity(arl->capacity,list->collection.size + n);
         if (cxReallocateArray(
                 list->collection.allocator,
                 &arl->data, new_capacity,
                 list->collection.elem_size)
         ) {
-            return 0;
+            return 0; // LCOV_EXCL_LINE
         }
         arl->capacity = new_capacity;
     }
@@ -843,7 +852,7 @@
             &arl->reallocator
     )) {
         // array list implementation is "all or nothing"
-        return 0;
+        return 0;  // LCOV_EXCL_LINE
     } else {
         return n;
     }
@@ -868,7 +877,7 @@
             &arl->reallocator
     )) {
         // array list implementation is "all or nothing"
-        return 0;
+        return 0;  // LCOV_EXCL_LINE
     } else {
         return n;
     }
@@ -895,7 +904,7 @@
     if (iter->index < list->collection.size) {
         if (cx_arl_insert_element(list,
                 iter->index + 1 - prepend, elem) == NULL) {
-            return 1;
+            return 1; // LCOV_EXCL_LINE
         }
         iter->elem_count++;
         if (prepend != 0) {
@@ -905,7 +914,7 @@
         return 0;
     } else {
         if (cx_arl_insert_element(list, list->collection.size, elem) == NULL) {
-            return 1;
+            return 1;  // LCOV_EXCL_LINE
         }
         iter->elem_count++;
         iter->index = list->collection.size;

mercurial