--- a/src/ucx/array_list.c Sat Nov 29 19:03:52 2025 +0100 +++ b/src/ucx/array_list.c Sun Nov 30 18:25:55 2025 +0100 @@ -42,10 +42,11 @@ cx_attr_unused CxArrayReallocator *alloc ) { size_t n; + // LCOV_EXCL_START if (cx_szmul(new_capacity, elem_size, &n)) { errno = EOVERFLOW; return NULL; - } + } // LCOV_EXCL_STOP return cxReallocDefault(array, n); } @@ -66,10 +67,11 @@ ) { // check for overflow size_t n; + // LCOV_EXCL_START if (cx_szmul(new_capacity, elem_size, &n)) { errno = EOVERFLOW; return NULL; - } + } // LCOV_EXCL_STOP // retrieve the pointer to the actual allocator const CxAllocator *al = alloc->allocator; @@ -103,23 +105,27 @@ // LOW LEVEL ARRAY LIST FUNCTIONS /** - * Increases the capacity until it is a multiple of a some alignment or reaches the maximum. + * Intelligently calculates a new capacity, reserving some more + * elements than required to prevent too many allocations. * - * @param cap the required capacity (must not be larger than @p max) - * @param alignment the alignment - * @param max the maximum capacity - * @return the aligned capacity + * @param current_capacity the current capacity of the array + * @param needed_capacity the required capacity of the array + * @return the new capacity */ -static size_t cx_array_align_capacity( - size_t cap, - size_t alignment, - size_t max +static size_t cx_array_grow_capacity( + size_t current_capacity, + size_t needed_capacity ) { - if (cap > max - alignment) { - return cap; - } else { - return cap - (cap % alignment) + alignment; + if (current_capacity >= needed_capacity) { + return current_capacity; } + size_t cap = needed_capacity; + size_t alignment; + if (cap < 128) alignment = 16; + else if (cap < 1024) alignment = 64; + else if (cap < 8192) alignment = 512; + else alignment = 1024; + return cap - (cap % alignment) + alignment; } int cx_array_reserve( @@ -184,10 +190,6 @@ // reallocate if possible if (newcap > oldcap) { - // calculate new capacity (next number divisible by 16) - newcap = cx_array_align_capacity(newcap, 16, max_size); - - // perform reallocation void *newmem = reallocator->realloc( *array, oldcap, newcap, elem_size, reallocator ); @@ -277,22 +279,18 @@ } // check if resize is required - size_t minsize = index + elem_count; - size_t newsize = oldsize < minsize ? minsize : oldsize; + const size_t minsize = index + elem_count; + const size_t newsize = oldsize < minsize ? minsize : oldsize; - // reallocate if possible - size_t newcap = oldcap; - if (newsize > oldcap) { - // check, if we need to repair the src pointer + // reallocate if necessary + const size_t newcap = cx_array_grow_capacity(oldcap, newsize); + if (newcap > oldcap) { + // check if we need to repair the src pointer uintptr_t targetaddr = (uintptr_t) *target; uintptr_t srcaddr = (uintptr_t) src; bool repairsrc = targetaddr <= srcaddr && srcaddr < targetaddr + oldcap * elem_size; - // calculate new capacity (next number divisible by 16) - newcap = cx_array_align_capacity(newsize, 16, max_size); - assert(newcap > newsize); - // perform reallocation void *newmem = reallocator->realloc( *target, oldcap, newcap, elem_size, reallocator @@ -369,22 +367,23 @@ if (elem_count == 0) return 0; // overflow check + // LCOV_EXCL_START if (elem_count > SIZE_MAX - *size) { errno = EOVERFLOW; return 1; } + // LCOV_EXCL_STOP // store some counts - size_t old_size = *size; - size_t old_capacity = *capacity; + const size_t old_size = *size; + const size_t old_capacity = *capacity; // the necessary capacity is the worst case assumption, including duplicates - size_t needed_capacity = old_size + elem_count; + const size_t needed_capacity = cx_array_grow_capacity(old_capacity, old_size + elem_count); // if we need more than we have, try a reallocation if (needed_capacity > old_capacity) { - size_t new_capacity = cx_array_align_capacity(needed_capacity, 16, SIZE_MAX); void *new_mem = reallocator->realloc( - *target, old_capacity, new_capacity, elem_size, reallocator + *target, old_capacity, needed_capacity, elem_size, reallocator ); if (new_mem == NULL) { // give it up right away, there is no contract @@ -392,7 +391,7 @@ return 1; // LCOV_EXCL_LINE } *target = new_mem; - *capacity = new_capacity; + *capacity = needed_capacity; } // now we have guaranteed that we can insert everything @@ -418,21 +417,23 @@ // while there are both source and buffered elements left, // copy them interleaving while (si < elem_count && bi < new_size) { - // determine how many source elements can be inserted + // determine how many source elements can be inserted. + // the first element that shall not be inserted is the smallest element + // that is strictly larger than the first buffered element + // (located at the index of the infimum plus one). + // the infimum is guaranteed to exist: + // - if all src elements are larger, + // there is no buffer, and this loop is skipped + // - if any src element is smaller or equal, the infimum exists + // - when all src elements that are smaller are copied, the second part + // of this loop body will copy the remaining buffer (emptying it) + // Therefore, the buffer can never contain an element that is smaller + // than any element in the source and the infimum exists. size_t copy_len, bytes_copied; - copy_len = cx_array_binary_search_sup( - src, - elem_count - si, - elem_size, - bptr, - cmp_func + copy_len = cx_array_binary_search_inf( + src, elem_count - si, elem_size, bptr, cmp_func ); - // binary search gives us the smallest index; - // we also want to include equal elements here - while (si + copy_len < elem_count - && cmp_func(bptr, src+copy_len*elem_size) == 0) { - copy_len++; - } + copy_len++; // copy the source elements if (copy_len > 0) { @@ -509,26 +510,17 @@ // duplicates allowed or nothing inserted yet: simply copy everything memcpy(dest, src, elem_size * (elem_count - si)); } else { - if (dest != *target) { - // skip all source elements that equal the last element - char *last = dest - elem_size; - while (si < elem_count) { - if (last != NULL && cmp_func(last, src) == 0) { - src += elem_size; - si++; - (*size)--; - } else { - break; - } - } - } - // we must check the elements in the chunk one by one + // we must check the remaining source elements one by one + // to skip the duplicates. + // Note that no source element can equal the last element in the + // destination, because that would have created an insertion point + // and a buffer, s.t. the above loop already handled the duplicates while (si < elem_count) { // find a chain of elements that can be copied size_t copy_len = 1, skip_len = 0; { const char *left_src = src; - while (si + copy_len < elem_count) { + while (si + copy_len + skip_len < elem_count) { const char *right_src = left_src + elem_size; int d = cmp_func(left_src, right_src); if (d < 0) { @@ -596,7 +588,8 @@ cmp_func, sorted_data, elem_size, elem_count, reallocator, false); } -size_t cx_array_binary_search_inf( +// implementation that finds ANY index +static size_t cx_array_binary_search_inf_impl( const void *arr, size_t size, size_t elem_size, @@ -641,13 +634,6 @@ result = cmp_func(elem, arr_elem); if (result == 0) { // found it! - // check previous elements; - // when they are equal, report the smallest index - arr_elem -= elem_size; - while (pivot_index > 0 && cmp_func(elem, arr_elem) == 0) { - pivot_index--; - arr_elem -= elem_size; - } return pivot_index; } else if (result < 0) { // element is smaller than pivot, continue search left @@ -662,6 +648,24 @@ return result < 0 ? (pivot_index - 1) : pivot_index; } +size_t cx_array_binary_search_inf( + const void *arr, + size_t size, + size_t elem_size, + const void *elem, + cx_compare_func cmp_func +) { + size_t index = cx_array_binary_search_inf_impl( + arr, size, elem_size, elem, cmp_func); + // in case of equality, report the largest index + const char *e = ((const char *) arr) + (index + 1) * elem_size; + while (index + 1 < size && cmp_func(e, elem) == 0) { + e += elem_size; + index++; + } + return index; +} + size_t cx_array_binary_search( const void *arr, size_t size, @@ -687,16 +691,25 @@ const void *elem, cx_compare_func cmp_func ) { - size_t inf = cx_array_binary_search_inf( + size_t index = cx_array_binary_search_inf_impl( arr, size, elem_size, elem, cmp_func ); - if (inf == size) { - // no infimum means, first element is supremum + const char *e = ((const char *) arr) + index * elem_size; + if (index == size) { + // no infimum means the first element is supremum return 0; - } else if (cmp_func(((const char *) arr) + inf * elem_size, elem) == 0) { - return inf; + } else if (cmp_func(e, elem) == 0) { + // found an equal element, search the smallest index + e -= elem_size; // e now contains the element at index-1 + while (index > 0 && cmp_func(e, elem) == 0) { + e -= elem_size; + index--; + } + return index; } else { - return inf + 1; + // we already have the largest index of the infimum (by design) + // the next element is the supremum (or there is no supremum) + return index + 1; } } @@ -789,14 +802,13 @@ // guarantee enough capacity if (arl->capacity < list->collection.size + n) { - size_t new_capacity = list->collection.size + n; - new_capacity = cx_array_align_capacity(new_capacity, 16, SIZE_MAX); + const size_t new_capacity = cx_array_grow_capacity(arl->capacity,list->collection.size + n); if (cxReallocateArray( list->collection.allocator, &arl->data, new_capacity, list->collection.elem_size) ) { - return 0; + return 0; // LCOV_EXCL_LINE } arl->capacity = new_capacity; } @@ -840,7 +852,7 @@ &arl->reallocator )) { // array list implementation is "all or nothing" - return 0; + return 0; // LCOV_EXCL_LINE } else { return n; } @@ -865,7 +877,7 @@ &arl->reallocator )) { // array list implementation is "all or nothing" - return 0; + return 0; // LCOV_EXCL_LINE } else { return n; } @@ -892,7 +904,7 @@ if (iter->index < list->collection.size) { if (cx_arl_insert_element(list, iter->index + 1 - prepend, elem) == NULL) { - return 1; + return 1; // LCOV_EXCL_LINE } iter->elem_count++; if (prepend != 0) { @@ -902,7 +914,7 @@ return 0; } else { if (cx_arl_insert_element(list, list->collection.size, elem) == NULL) { - return 1; + return 1; // LCOV_EXCL_LINE } iter->elem_count++; iter->index = list->collection.size; @@ -1137,6 +1149,14 @@ } } +static int cx_arl_change_capacity( + struct cx_list_s *list, + size_t new_capacity +) { + cx_array_list *arl = (cx_array_list *)list; + return cxReallocateArray(list->collection.allocator, + &arl->data, new_capacity, list->collection.elem_size); +} static struct cx_iterator_s cx_arl_iterator( const struct cx_list_s *list, @@ -1174,6 +1194,7 @@ cx_arl_sort, cx_arl_compare, cx_arl_reverse, + cx_arl_change_capacity, cx_arl_iterator, };