--- a/ucx/array_list.c Thu May 29 15:59:27 2025 +0200 +++ b/ucx/array_list.c Wed Nov 12 18:37:58 2025 +0100 @@ -50,7 +50,7 @@ } CxArrayReallocator cx_array_default_reallocator_impl = { - cx_array_default_realloc, NULL, NULL, 0, 0 + cx_array_default_realloc, NULL, NULL }; CxArrayReallocator *cx_array_default_reallocator = &cx_array_default_reallocator_impl; @@ -72,11 +72,11 @@ } // retrieve the pointer to the actual allocator - const CxAllocator *al = alloc->ptr1; + const CxAllocator *al = alloc->allocator; // check if the array is still located on the stack void *newmem; - if (array == alloc->ptr2) { + if (array == alloc->stack_ptr) { newmem = cxMalloc(al, n); if (newmem != NULL && array != NULL) { memcpy(newmem, array, old_capacity*elem_size); @@ -89,27 +89,45 @@ struct cx_array_reallocator_s cx_array_reallocator( const struct cx_allocator_s *allocator, - const void *stackmem + const void *stack_ptr ) { if (allocator == NULL) { allocator = cxDefaultAllocator; } return (struct cx_array_reallocator_s) { cx_array_advanced_realloc, - (void*) allocator, (void*) stackmem, - 0, 0 + allocator, stack_ptr, }; } // LOW LEVEL ARRAY LIST FUNCTIONS -static size_t cx_array_align_capacity( - size_t cap, - size_t alignment, - size_t max +/** + * Intelligently calculates a new capacity, reserving some more + * elements than required to prevent too many allocations. + * + * @param current_capacity the current capacity of the array + * @param needed_capacity the required capacity of the array + * @param maximum_capacity the maximum capacity (given by the data type) + * @return the new capacity + */ +static size_t cx_array_grow_capacity( + size_t current_capacity, + size_t needed_capacity, + size_t maximum_capacity ) { - if (cap > max - alignment) { - return cap; + if (current_capacity >= needed_capacity) { + return current_capacity; + } + size_t cap = needed_capacity; + size_t alignment; + if (cap < 128) alignment = 16; + else if (cap < 1024) alignment = 64; + else if (cap < 8192) alignment = 512; + else alignment = 1024; + + if (cap - 1 > maximum_capacity - alignment) { + return maximum_capacity; } else { return cap - (cap % alignment) + alignment; } @@ -177,10 +195,6 @@ // reallocate if possible if (newcap > oldcap) { - // calculate new capacity (next number divisible by 16) - newcap = cx_array_align_capacity(newcap, 16, max_size); - - // perform reallocation void *newmem = reallocator->realloc( *array, oldcap, newcap, elem_size, reallocator ); @@ -270,28 +284,24 @@ } // check if resize is required - size_t minsize = index + elem_count; - size_t newsize = oldsize < minsize ? minsize : oldsize; + const size_t minsize = index + elem_count; + const size_t newsize = oldsize < minsize ? minsize : oldsize; - // reallocate if possible - size_t newcap = oldcap; - if (newsize > oldcap) { - // check, if we need to repair the src pointer + // reallocate if necessary + const size_t newcap = cx_array_grow_capacity(oldcap, newsize, max_size); + if (newcap > oldcap) { + // check if we need to repair the src pointer uintptr_t targetaddr = (uintptr_t) *target; uintptr_t srcaddr = (uintptr_t) src; bool repairsrc = targetaddr <= srcaddr && srcaddr < targetaddr + oldcap * elem_size; - // calculate new capacity (next number divisible by 16) - newcap = cx_array_align_capacity(newsize, 16, max_size); - assert(newcap > newsize); - // perform reallocation void *newmem = reallocator->realloc( *target, oldcap, newcap, elem_size, reallocator ); if (newmem == NULL) { - return 1; + return 1; // LCOV_EXCL_LINE } // repair src pointer, if necessary @@ -335,7 +345,7 @@ return 0; } -int cx_array_insert_sorted( +static int cx_array_insert_sorted_impl( void **target, size_t *size, size_t *capacity, @@ -343,7 +353,8 @@ const void *sorted_data, size_t elem_size, size_t elem_count, - CxArrayReallocator *reallocator + CxArrayReallocator *reallocator, + bool allow_duplicates ) { // assert pointers assert(target != NULL); @@ -367,15 +378,16 @@ } // store some counts - size_t old_size = *size; - size_t old_capacity = *capacity; - size_t needed_capacity = old_size + elem_count; + const size_t old_size = *size; + const size_t old_capacity = *capacity; + // the necessary capacity is the worst case assumption, including duplicates + const size_t needed_capacity = cx_array_grow_capacity(old_capacity, + old_size + elem_count, SIZE_MAX); // if we need more than we have, try a reallocation if (needed_capacity > old_capacity) { - size_t new_capacity = cx_array_align_capacity(needed_capacity, 16, SIZE_MAX); void *new_mem = reallocator->realloc( - *target, old_capacity, new_capacity, elem_size, reallocator + *target, old_capacity, needed_capacity, elem_size, reallocator ); if (new_mem == NULL) { // give it up right away, there is no contract @@ -383,7 +395,7 @@ return 1; // LCOV_EXCL_LINE } *target = new_mem; - *capacity = new_capacity; + *capacity = needed_capacity; } // now we have guaranteed that we can insert everything @@ -418,13 +430,60 @@ bptr, cmp_func ); + // binary search gives us the smallest index; + // we also want to include equal elements here + while (si + copy_len < elem_count + && cmp_func(bptr, src+copy_len*elem_size) == 0) { + copy_len++; + } // copy the source elements - bytes_copied = copy_len * elem_size; - memcpy(dest, src, bytes_copied); - dest += bytes_copied; - src += bytes_copied; - si += copy_len; + if (copy_len > 0) { + if (allow_duplicates) { + // we can copy the entire chunk + bytes_copied = copy_len * elem_size; + memcpy(dest, src, bytes_copied); + dest += bytes_copied; + src += bytes_copied; + si += copy_len; + di += copy_len; + } else { + // first, check the end of the source chunk + // for being a duplicate of the bptr + const char *end_of_src = src + (copy_len - 1) * elem_size; + size_t skip_len = 0; + while (copy_len > 0 && cmp_func(bptr, end_of_src) == 0) { + end_of_src -= elem_size; + skip_len++; + copy_len--; + } + char *last = dest == *target ? NULL : dest - elem_size; + // then iterate through the source chunk + // and skip all duplicates with the last element in the array + size_t more_skipped = 0; + for (unsigned j = 0; j < copy_len; j++) { + if (last != NULL && cmp_func(last, src) == 0) { + // duplicate - skip + src += elem_size; + si++; + more_skipped++; + } else { + memcpy(dest, src, elem_size); + src += elem_size; + last = dest; + dest += elem_size; + si++; + di++; + } + } + // skip the previously identified elements as well + src += skip_len * elem_size; + si += skip_len; + skip_len += more_skipped; + // reduce the actual size by the number of skipped elements + *size -= skip_len; + } + } // when all source elements are in place, we are done if (si >= elem_count) break; @@ -443,20 +502,103 @@ memmove(dest, bptr, bytes_copied); dest += bytes_copied; bptr += bytes_copied; + di += copy_len; bi += copy_len; } - // still source elements left? simply append them + // still source elements left? if (si < elem_count) { - memcpy(dest, src, elem_size * (elem_count - si)); + if (allow_duplicates) { + // duplicates allowed or nothing inserted yet: simply copy everything + memcpy(dest, src, elem_size * (elem_count - si)); + } else { + if (dest != *target) { + // skip all source elements that equal the last element + char *last = dest - elem_size; + while (si < elem_count) { + if (last != NULL && cmp_func(last, src) == 0) { + src += elem_size; + si++; + (*size)--; + } else { + break; + } + } + } + // we must check the elements in the chunk one by one + while (si < elem_count) { + // find a chain of elements that can be copied + size_t copy_len = 1, skip_len = 0; + { + const char *left_src = src; + while (si + copy_len < elem_count) { + const char *right_src = left_src + elem_size; + int d = cmp_func(left_src, right_src); + if (d < 0) { + if (skip_len > 0) { + // new larger element found; + // handle it in the next cycle + break; + } + left_src += elem_size; + copy_len++; + } else if (d == 0) { + left_src += elem_size; + skip_len++; + } else { + break; + } + } + } + size_t bytes_copied = copy_len * elem_size; + memcpy(dest, src, bytes_copied); + dest += bytes_copied; + src += bytes_copied + skip_len * elem_size; + si += copy_len + skip_len; + di += copy_len; + *size -= skip_len; + } + } } - // still buffer elements left? - // don't worry, we already moved them to the correct place + // buffered elements need to be moved when we skipped duplicates + size_t total_skipped = new_size - *size; + if (bi < new_size && total_skipped > 0) { + // move the remaining buffer to the end of the array + memmove(dest, bptr, elem_size * (new_size - bi)); + } return 0; } +int cx_array_insert_sorted( + void **target, + size_t *size, + size_t *capacity, + cx_compare_func cmp_func, + const void *sorted_data, + size_t elem_size, + size_t elem_count, + CxArrayReallocator *reallocator +) { + return cx_array_insert_sorted_impl(target, size, capacity, + cmp_func, sorted_data, elem_size, elem_count, reallocator, true); +} + +int cx_array_insert_unique( + void **target, + size_t *size, + size_t *capacity, + cx_compare_func cmp_func, + const void *sorted_data, + size_t elem_size, + size_t elem_count, + CxArrayReallocator *reallocator +) { + return cx_array_insert_sorted_impl(target, size, capacity, + cmp_func, sorted_data, elem_size, elem_count, reallocator, false); +} + size_t cx_array_binary_search_inf( const void *arr, size_t size, @@ -502,6 +644,13 @@ result = cmp_func(elem, arr_elem); if (result == 0) { // found it! + // check previous elements; + // when they are equal, report the smallest index + arr_elem -= elem_size; + while (pivot_index > 0 && cmp_func(elem, arr_elem) == 0) { + pivot_index--; + arr_elem -= elem_size; + } return pivot_index; } else if (result < 0) { // element is smaller than pivot, continue search left @@ -643,8 +792,8 @@ // guarantee enough capacity if (arl->capacity < list->collection.size + n) { - size_t new_capacity = list->collection.size + n; - new_capacity = new_capacity - (new_capacity % 16) + 16; + const size_t new_capacity = cx_array_grow_capacity(arl->capacity, + list->collection.size + n, SIZE_MAX); if (cxReallocateArray( list->collection.allocator, &arl->data, new_capacity, @@ -700,6 +849,31 @@ } } +static size_t cx_arl_insert_unique( + struct cx_list_s *list, + const void *sorted_data, + size_t n +) { + // get a correctly typed pointer to the list + cx_array_list *arl = (cx_array_list *) list; + + if (cx_array_insert_unique( + &arl->data, + &list->collection.size, + &arl->capacity, + list->collection.cmpfunc, + sorted_data, + list->collection.elem_size, + n, + &arl->reallocator + )) { + // array list implementation is "all or nothing" + return 0; + } else { + return n; + } +} + static void *cx_arl_insert_element( struct cx_list_s *list, size_t index, @@ -717,7 +891,7 @@ const void *elem, int prepend ) { - struct cx_list_s *list = iter->src_handle.m; + struct cx_list_s *list = iter->src_handle; if (iter->index < list->collection.size) { if (cx_arl_insert_element(list, iter->index + 1 - prepend, elem) == NULL) { @@ -928,7 +1102,7 @@ static bool cx_arl_iter_valid(const void *it) { const struct cx_iterator_s *iter = it; - const struct cx_list_s *list = iter->src_handle.c; + const struct cx_list_s *list = iter->src_handle; return iter->index < list->collection.size; } @@ -941,29 +1115,39 @@ struct cx_iterator_s *iter = it; if (iter->base.remove) { iter->base.remove = false; - cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL); + cx_arl_remove(iter->src_handle, iter->index, 1, NULL); + iter->elem_count--; } else { iter->index++; iter->elem_handle = ((char *) iter->elem_handle) - + ((const struct cx_list_s *) iter->src_handle.c)->collection.elem_size; + + ((const struct cx_list_s *) iter->src_handle)->collection.elem_size; } } static void cx_arl_iter_prev(void *it) { struct cx_iterator_s *iter = it; - const cx_array_list *list = iter->src_handle.c; if (iter->base.remove) { iter->base.remove = false; - cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL); + cx_arl_remove(iter->src_handle, iter->index, 1, NULL); + iter->elem_count--; } iter->index--; + cx_array_list *list = iter->src_handle; if (iter->index < list->base.collection.size) { iter->elem_handle = ((char *) list->data) + iter->index * list->base.collection.elem_size; } } +static int cx_arl_change_capacity( + struct cx_list_s *list, + size_t new_capacity +) { + cx_array_list *arl = (cx_array_list *)list; + return cxReallocateArray(list->collection.allocator, + &arl->data, new_capacity, list->collection.elem_size); +} static struct cx_iterator_s cx_arl_iterator( const struct cx_list_s *list, @@ -973,7 +1157,7 @@ struct cx_iterator_s iter; iter.index = index; - iter.src_handle.c = list; + iter.src_handle = (void*)list; iter.elem_handle = cx_arl_at(list, index); iter.elem_size = list->collection.elem_size; iter.elem_count = list->collection.size; @@ -981,7 +1165,7 @@ iter.base.current = cx_arl_iter_current; iter.base.next = backwards ? cx_arl_iter_prev : cx_arl_iter_next; iter.base.remove = false; - iter.base.mutating = false; + iter.base.allow_remove = true; return iter; } @@ -991,6 +1175,7 @@ cx_arl_insert_element, cx_arl_insert_array, cx_arl_insert_sorted, + cx_arl_insert_unique, cx_arl_insert_iter, cx_arl_remove, cx_arl_clear, @@ -1000,6 +1185,7 @@ cx_arl_sort, cx_arl_compare, cx_arl_reverse, + cx_arl_change_capacity, cx_arl_iterator, };