--- a/ucx/array_list.c Sun Aug 31 14:39:13 2025 +0200 +++ b/ucx/array_list.c Sat Nov 08 23:06:11 2025 +0100 @@ -36,20 +36,21 @@ static void *cx_array_default_realloc( void *array, - size_t capacity, + cx_attr_unused size_t old_capacity, + size_t new_capacity, size_t elem_size, cx_attr_unused CxArrayReallocator *alloc ) { size_t n; - if (cx_szmul(capacity, elem_size, &n)) { + if (cx_szmul(new_capacity, elem_size, &n)) { errno = EOVERFLOW; return NULL; } - return realloc(array, n); + return cxReallocDefault(array, n); } CxArrayReallocator cx_array_default_reallocator_impl = { - cx_array_default_realloc, NULL, NULL, 0, 0 + cx_array_default_realloc, NULL, NULL }; CxArrayReallocator *cx_array_default_reallocator = &cx_array_default_reallocator_impl; @@ -58,26 +59,27 @@ static void *cx_array_advanced_realloc( void *array, - size_t capacity, + size_t old_capacity, + size_t new_capacity, size_t elem_size, cx_attr_unused CxArrayReallocator *alloc ) { // check for overflow size_t n; - if (cx_szmul(capacity, elem_size, &n)) { + if (cx_szmul(new_capacity, elem_size, &n)) { errno = EOVERFLOW; return NULL; } // retrieve the pointer to the actual allocator - const CxAllocator *al = alloc->ptr1; + const CxAllocator *al = alloc->allocator; // check if the array is still located on the stack void *newmem; - if (array == alloc->ptr2) { + if (array == alloc->stack_ptr) { newmem = cxMalloc(al, n); if (newmem != NULL && array != NULL) { - memcpy(newmem, array, n); + memcpy(newmem, array, old_capacity*elem_size); } } else { newmem = cxRealloc(al, array, n); @@ -87,20 +89,27 @@ struct cx_array_reallocator_s cx_array_reallocator( const struct cx_allocator_s *allocator, - const void *stackmem + const void *stack_ptr ) { if (allocator == NULL) { allocator = cxDefaultAllocator; } return (struct cx_array_reallocator_s) { cx_array_advanced_realloc, - (void*) allocator, (void*) stackmem, - 0, 0 + allocator, stack_ptr, }; } // LOW LEVEL ARRAY LIST FUNCTIONS +/** + * Increases the capacity until it is a multiple of a some alignment or reaches the maximum. + * + * @param cap the required capacity (must not be larger than @p max) + * @param alignment the alignment + * @param max the maximum capacity + * @return the aligned capacity + */ static size_t cx_array_align_capacity( size_t cap, size_t alignment, @@ -180,7 +189,7 @@ // perform reallocation void *newmem = reallocator->realloc( - *array, newcap, elem_size, reallocator + *array, oldcap, newcap, elem_size, reallocator ); if (newmem == NULL) { return 1; // LCOV_EXCL_LINE @@ -286,10 +295,10 @@ // perform reallocation void *newmem = reallocator->realloc( - *target, newcap, elem_size, reallocator + *target, oldcap, newcap, elem_size, reallocator ); if (newmem == NULL) { - return 1; + return 1; // LCOV_EXCL_LINE } // repair src pointer, if necessary @@ -333,7 +342,7 @@ return 0; } -int cx_array_insert_sorted( +static int cx_array_insert_sorted_impl( void **target, size_t *size, size_t *capacity, @@ -341,7 +350,8 @@ const void *sorted_data, size_t elem_size, size_t elem_count, - CxArrayReallocator *reallocator + CxArrayReallocator *reallocator, + bool allow_duplicates ) { // assert pointers assert(target != NULL); @@ -366,13 +376,15 @@ // store some counts size_t old_size = *size; + size_t old_capacity = *capacity; + // the necessary capacity is the worst case assumption, including duplicates size_t needed_capacity = old_size + elem_count; // if we need more than we have, try a reallocation - if (needed_capacity > *capacity) { + if (needed_capacity > old_capacity) { size_t new_capacity = cx_array_align_capacity(needed_capacity, 16, SIZE_MAX); void *new_mem = reallocator->realloc( - *target, new_capacity, elem_size, reallocator + *target, old_capacity, new_capacity, elem_size, reallocator ); if (new_mem == NULL) { // give it up right away, there is no contract @@ -415,13 +427,60 @@ bptr, cmp_func ); + // binary search gives us the smallest index; + // we also want to include equal elements here + while (si + copy_len < elem_count + && cmp_func(bptr, src+copy_len*elem_size) == 0) { + copy_len++; + } // copy the source elements - bytes_copied = copy_len * elem_size; - memcpy(dest, src, bytes_copied); - dest += bytes_copied; - src += bytes_copied; - si += copy_len; + if (copy_len > 0) { + if (allow_duplicates) { + // we can copy the entire chunk + bytes_copied = copy_len * elem_size; + memcpy(dest, src, bytes_copied); + dest += bytes_copied; + src += bytes_copied; + si += copy_len; + di += copy_len; + } else { + // first, check the end of the source chunk + // for being a duplicate of the bptr + const char *end_of_src = src + (copy_len - 1) * elem_size; + size_t skip_len = 0; + while (copy_len > 0 && cmp_func(bptr, end_of_src) == 0) { + end_of_src -= elem_size; + skip_len++; + copy_len--; + } + char *last = dest == *target ? NULL : dest - elem_size; + // then iterate through the source chunk + // and skip all duplicates with the last element in the array + size_t more_skipped = 0; + for (unsigned j = 0; j < copy_len; j++) { + if (last != NULL && cmp_func(last, src) == 0) { + // duplicate - skip + src += elem_size; + si++; + more_skipped++; + } else { + memcpy(dest, src, elem_size); + src += elem_size; + last = dest; + dest += elem_size; + si++; + di++; + } + } + // skip the previously identified elements as well + src += skip_len * elem_size; + si += skip_len; + skip_len += more_skipped; + // reduce the actual size by the number of skipped elements + *size -= skip_len; + } + } // when all source elements are in place, we are done if (si >= elem_count) break; @@ -440,20 +499,103 @@ memmove(dest, bptr, bytes_copied); dest += bytes_copied; bptr += bytes_copied; + di += copy_len; bi += copy_len; } - // still source elements left? simply append them + // still source elements left? if (si < elem_count) { - memcpy(dest, src, elem_size * (elem_count - si)); + if (allow_duplicates) { + // duplicates allowed or nothing inserted yet: simply copy everything + memcpy(dest, src, elem_size * (elem_count - si)); + } else { + if (dest != *target) { + // skip all source elements that equal the last element + char *last = dest - elem_size; + while (si < elem_count) { + if (last != NULL && cmp_func(last, src) == 0) { + src += elem_size; + si++; + (*size)--; + } else { + break; + } + } + } + // we must check the elements in the chunk one by one + while (si < elem_count) { + // find a chain of elements that can be copied + size_t copy_len = 1, skip_len = 0; + { + const char *left_src = src; + while (si + copy_len < elem_count) { + const char *right_src = left_src + elem_size; + int d = cmp_func(left_src, right_src); + if (d < 0) { + if (skip_len > 0) { + // new larger element found; + // handle it in the next cycle + break; + } + left_src += elem_size; + copy_len++; + } else if (d == 0) { + left_src += elem_size; + skip_len++; + } else { + break; + } + } + } + size_t bytes_copied = copy_len * elem_size; + memcpy(dest, src, bytes_copied); + dest += bytes_copied; + src += bytes_copied + skip_len * elem_size; + si += copy_len + skip_len; + di += copy_len; + *size -= skip_len; + } + } } - // still buffer elements left? - // don't worry, we already moved them to the correct place + // buffered elements need to be moved when we skipped duplicates + size_t total_skipped = new_size - *size; + if (bi < new_size && total_skipped > 0) { + // move the remaining buffer to the end of the array + memmove(dest, bptr, elem_size * (new_size - bi)); + } return 0; } +int cx_array_insert_sorted( + void **target, + size_t *size, + size_t *capacity, + cx_compare_func cmp_func, + const void *sorted_data, + size_t elem_size, + size_t elem_count, + CxArrayReallocator *reallocator +) { + return cx_array_insert_sorted_impl(target, size, capacity, + cmp_func, sorted_data, elem_size, elem_count, reallocator, true); +} + +int cx_array_insert_unique( + void **target, + size_t *size, + size_t *capacity, + cx_compare_func cmp_func, + const void *sorted_data, + size_t elem_size, + size_t elem_count, + CxArrayReallocator *reallocator +) { + return cx_array_insert_sorted_impl(target, size, capacity, + cmp_func, sorted_data, elem_size, elem_count, reallocator, false); +} + size_t cx_array_binary_search_inf( const void *arr, size_t size, @@ -499,6 +641,13 @@ result = cmp_func(elem, arr_elem); if (result == 0) { // found it! + // check previous elements; + // when they are equal, report the smallest index + arr_elem -= elem_size; + while (pivot_index > 0 && cmp_func(elem, arr_elem) == 0) { + pivot_index--; + arr_elem -= elem_size; + } return pivot_index; } else if (result < 0) { // element is smaller than pivot, continue search left @@ -572,7 +721,7 @@ // decide if we can use the local buffer if (elem_size > CX_ARRAY_SWAP_SBO_SIZE) { - tmp = malloc(elem_size); + tmp = cxMallocDefault(elem_size); // we don't want to enforce error handling if (tmp == NULL) abort(); } else { @@ -591,7 +740,7 @@ // free dynamic memory, if it was needed if (tmp != sbo_mem) { - free(tmp); + cxFreeDefault(tmp); } } @@ -638,50 +787,38 @@ // get a correctly typed pointer to the list cx_array_list *arl = (cx_array_list *) list; - // do we need to move some elements? - if (index < list->collection.size) { - const char *first_to_move = (const char *) arl->data; - first_to_move += index * list->collection.elem_size; - size_t elems_to_move = list->collection.size - index; - size_t start_of_moved = index + n; - - if (cx_array_copy( - &arl->data, - &list->collection.size, - &arl->capacity, - 0, - start_of_moved, - first_to_move, - list->collection.elem_size, - elems_to_move, - &arl->reallocator - )) { - // if moving existing elems is unsuccessful, abort + // guarantee enough capacity + if (arl->capacity < list->collection.size + n) { + size_t new_capacity = list->collection.size + n; + new_capacity = cx_array_align_capacity(new_capacity, 16, SIZE_MAX); + if (cxReallocateArray( + list->collection.allocator, + &arl->data, new_capacity, + list->collection.elem_size) + ) { return 0; } + arl->capacity = new_capacity; } - // note that if we had to move the elements, the following operation - // is guaranteed to succeed, because we have the memory already allocated - // therefore, it is impossible to leave this function with an invalid array + // determine insert position + char *arl_data = arl->data; + char *insert_pos = arl_data + index * list->collection.elem_size; - // place the new elements - if (cx_array_copy( - &arl->data, - &list->collection.size, - &arl->capacity, - 0, - index, - array, - list->collection.elem_size, - n, - &arl->reallocator - )) { - // array list implementation is "all or nothing" - return 0; - } else { - return n; + // do we need to move some elements? + if (index < list->collection.size) { + size_t elems_to_move = list->collection.size - index; + char *target = insert_pos + n * list->collection.elem_size; + memmove(target, insert_pos, elems_to_move * list->collection.elem_size); } + + // place the new elements, if any + if (array != NULL) { + memcpy(insert_pos, array, n * list->collection.elem_size); + } + list->collection.size += n; + + return n; } static size_t cx_arl_insert_sorted( @@ -709,12 +846,41 @@ } } -static int cx_arl_insert_element( +static size_t cx_arl_insert_unique( + struct cx_list_s *list, + const void *sorted_data, + size_t n +) { + // get a correctly typed pointer to the list + cx_array_list *arl = (cx_array_list *) list; + + if (cx_array_insert_unique( + &arl->data, + &list->collection.size, + &arl->capacity, + list->collection.cmpfunc, + sorted_data, + list->collection.elem_size, + n, + &arl->reallocator + )) { + // array list implementation is "all or nothing" + return 0; + } else { + return n; + } +} + +static void *cx_arl_insert_element( struct cx_list_s *list, size_t index, const void *element ) { - return 1 != cx_arl_insert_array(list, index, element, 1); + if (cx_arl_insert_array(list, index, element, 1) == 1) { + return ((char*)((cx_array_list *) list)->data) + index * list->collection.elem_size; + } else { + return NULL; + } } static int cx_arl_insert_iter( @@ -722,28 +888,25 @@ const void *elem, int prepend ) { - struct cx_list_s *list = iter->src_handle.m; + struct cx_list_s *list = iter->src_handle; if (iter->index < list->collection.size) { - int result = cx_arl_insert_element( - list, - iter->index + 1 - prepend, - elem - ); - if (result == 0) { - iter->elem_count++; - if (prepend != 0) { - iter->index++; - iter->elem_handle = ((char *) iter->elem_handle) + list->collection.elem_size; - } + if (cx_arl_insert_element(list, + iter->index + 1 - prepend, elem) == NULL) { + return 1; + } + iter->elem_count++; + if (prepend != 0) { + iter->index++; + iter->elem_handle = ((char *) iter->elem_handle) + list->collection.elem_size; } - return result; + return 0; } else { - int result = cx_arl_insert_element(list, list->collection.size, elem); - if (result == 0) { - iter->elem_count++; - iter->index = list->collection.size; + if (cx_arl_insert_element(list, list->collection.size, elem) == NULL) { + return 1; } - return result; + iter->elem_count++; + iter->index = list->collection.size; + return 0; } } @@ -936,7 +1099,7 @@ static bool cx_arl_iter_valid(const void *it) { const struct cx_iterator_s *iter = it; - const struct cx_list_s *list = iter->src_handle.c; + const struct cx_list_s *list = iter->src_handle; return iter->index < list->collection.size; } @@ -949,23 +1112,25 @@ struct cx_iterator_s *iter = it; if (iter->base.remove) { iter->base.remove = false; - cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL); + cx_arl_remove(iter->src_handle, iter->index, 1, NULL); + iter->elem_count--; } else { iter->index++; iter->elem_handle = ((char *) iter->elem_handle) - + ((const struct cx_list_s *) iter->src_handle.c)->collection.elem_size; + + ((const struct cx_list_s *) iter->src_handle)->collection.elem_size; } } static void cx_arl_iter_prev(void *it) { struct cx_iterator_s *iter = it; - const cx_array_list *list = iter->src_handle.c; if (iter->base.remove) { iter->base.remove = false; - cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL); + cx_arl_remove(iter->src_handle, iter->index, 1, NULL); + iter->elem_count--; } iter->index--; + cx_array_list *list = iter->src_handle; if (iter->index < list->base.collection.size) { iter->elem_handle = ((char *) list->data) + iter->index * list->base.collection.elem_size; @@ -981,7 +1146,7 @@ struct cx_iterator_s iter; iter.index = index; - iter.src_handle.c = list; + iter.src_handle = (void*)list; iter.elem_handle = cx_arl_at(list, index); iter.elem_size = list->collection.elem_size; iter.elem_count = list->collection.size; @@ -989,7 +1154,7 @@ iter.base.current = cx_arl_iter_current; iter.base.next = backwards ? cx_arl_iter_prev : cx_arl_iter_next; iter.base.remove = false; - iter.base.mutating = false; + iter.base.allow_remove = true; return iter; } @@ -999,6 +1164,7 @@ cx_arl_insert_element, cx_arl_insert_array, cx_arl_insert_sorted, + cx_arl_insert_unique, cx_arl_insert_iter, cx_arl_remove, cx_arl_clear,