--- a/ucx/array_list.c Sat Oct 04 14:54:25 2025 +0200 +++ b/ucx/array_list.c Sun Oct 19 21:20:08 2025 +0200 @@ -291,7 +291,7 @@ *target, oldcap, newcap, elem_size, reallocator ); if (newmem == NULL) { - return 1; + return 1; // LCOV_EXCL_LINE } // repair src pointer, if necessary @@ -335,7 +335,7 @@ return 0; } -int cx_array_insert_sorted( +static int cx_array_insert_sorted_impl( void **target, size_t *size, size_t *capacity, @@ -343,7 +343,8 @@ const void *sorted_data, size_t elem_size, size_t elem_count, - CxArrayReallocator *reallocator + CxArrayReallocator *reallocator, + bool allow_duplicates ) { // assert pointers assert(target != NULL); @@ -369,6 +370,7 @@ // store some counts size_t old_size = *size; size_t old_capacity = *capacity; + // the necessary capacity is the worst case assumption, including duplicates size_t needed_capacity = old_size + elem_count; // if we need more than we have, try a reallocation @@ -418,13 +420,60 @@ bptr, cmp_func ); + // binary search gives us the smallest index; + // we also want to include equal elements here + while (si + copy_len < elem_count + && cmp_func(bptr, src+copy_len*elem_size) == 0) { + copy_len++; + } // copy the source elements - bytes_copied = copy_len * elem_size; - memcpy(dest, src, bytes_copied); - dest += bytes_copied; - src += bytes_copied; - si += copy_len; + if (copy_len > 0) { + if (allow_duplicates) { + // we can copy the entire chunk + bytes_copied = copy_len * elem_size; + memcpy(dest, src, bytes_copied); + dest += bytes_copied; + src += bytes_copied; + si += copy_len; + di += copy_len; + } else { + // first, check the end of the source chunk + // for being a duplicate of the bptr + const char *end_of_src = src + (copy_len - 1) * elem_size; + size_t skip_len = 0; + while (copy_len > 0 && cmp_func(bptr, end_of_src) == 0) { + end_of_src -= elem_size; + skip_len++; + copy_len--; + } + char *last = dest == *target ? NULL : dest - elem_size; + // then iterate through the source chunk + // and skip all duplicates with the last element in the array + size_t more_skipped = 0; + for (unsigned j = 0; j < copy_len; j++) { + if (last != NULL && cmp_func(last, src) == 0) { + // duplicate - skip + src += elem_size; + si++; + more_skipped++; + } else { + memcpy(dest, src, elem_size); + src += elem_size; + last = dest; + dest += elem_size; + si++; + di++; + } + } + // skip the previously identified elements as well + src += skip_len * elem_size; + si += skip_len; + skip_len += more_skipped; + // reduce the actual size by the number of skipped elements + *size -= skip_len; + } + } // when all source elements are in place, we are done if (si >= elem_count) break; @@ -443,20 +492,103 @@ memmove(dest, bptr, bytes_copied); dest += bytes_copied; bptr += bytes_copied; + di += copy_len; bi += copy_len; } - // still source elements left? simply append them + // still source elements left? if (si < elem_count) { - memcpy(dest, src, elem_size * (elem_count - si)); + if (allow_duplicates) { + // duplicates allowed or nothing inserted yet: simply copy everything + memcpy(dest, src, elem_size * (elem_count - si)); + } else { + if (dest != *target) { + // skip all source elements that equal the last element + char *last = dest - elem_size; + while (si < elem_count) { + if (last != NULL && cmp_func(last, src) == 0) { + src += elem_size; + si++; + (*size)--; + } else { + break; + } + } + } + // we must check the elements in the chunk one by one + while (si < elem_count) { + // find a chain of elements that can be copied + size_t copy_len = 1, skip_len = 0; + { + const char *left_src = src; + while (si + copy_len < elem_count) { + const char *right_src = left_src + elem_size; + int d = cmp_func(left_src, right_src); + if (d < 0) { + if (skip_len > 0) { + // new larger element found; + // handle it in the next cycle + break; + } + left_src += elem_size; + copy_len++; + } else if (d == 0) { + left_src += elem_size; + skip_len++; + } else { + break; + } + } + } + size_t bytes_copied = copy_len * elem_size; + memcpy(dest, src, bytes_copied); + dest += bytes_copied; + src += bytes_copied + skip_len * elem_size; + si += copy_len + skip_len; + di += copy_len; + *size -= skip_len; + } + } } - // still buffer elements left? - // don't worry, we already moved them to the correct place + // buffered elements need to be moved when we skipped duplicates + size_t total_skipped = new_size - *size; + if (bi < new_size && total_skipped > 0) { + // move the remaining buffer to the end of the array + memmove(dest, bptr, elem_size * (new_size - bi)); + } return 0; } +int cx_array_insert_sorted( + void **target, + size_t *size, + size_t *capacity, + cx_compare_func cmp_func, + const void *sorted_data, + size_t elem_size, + size_t elem_count, + CxArrayReallocator *reallocator +) { + return cx_array_insert_sorted_impl(target, size, capacity, + cmp_func, sorted_data, elem_size, elem_count, reallocator, true); +} + +int cx_array_insert_unique( + void **target, + size_t *size, + size_t *capacity, + cx_compare_func cmp_func, + const void *sorted_data, + size_t elem_size, + size_t elem_count, + CxArrayReallocator *reallocator +) { + return cx_array_insert_sorted_impl(target, size, capacity, + cmp_func, sorted_data, elem_size, elem_count, reallocator, false); +} + size_t cx_array_binary_search_inf( const void *arr, size_t size, @@ -502,6 +634,13 @@ result = cmp_func(elem, arr_elem); if (result == 0) { // found it! + // check previous elements; + // when they are equal, report the smallest index + arr_elem -= elem_size; + while (pivot_index > 0 && cmp_func(elem, arr_elem) == 0) { + pivot_index--; + arr_elem -= elem_size; + } return pivot_index; } else if (result < 0) { // element is smaller than pivot, continue search left @@ -700,6 +839,31 @@ } } +static size_t cx_arl_insert_unique( + struct cx_list_s *list, + const void *sorted_data, + size_t n +) { + // get a correctly typed pointer to the list + cx_array_list *arl = (cx_array_list *) list; + + if (cx_array_insert_unique( + &arl->data, + &list->collection.size, + &arl->capacity, + list->collection.cmpfunc, + sorted_data, + list->collection.elem_size, + n, + &arl->reallocator + )) { + // array list implementation is "all or nothing" + return 0; + } else { + return n; + } +} + static void *cx_arl_insert_element( struct cx_list_s *list, size_t index, @@ -942,6 +1106,7 @@ if (iter->base.remove) { iter->base.remove = false; cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL); + iter->elem_count--; } else { iter->index++; iter->elem_handle = @@ -956,6 +1121,7 @@ if (iter->base.remove) { iter->base.remove = false; cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL); + iter->elem_count--; } iter->index--; if (iter->index < list->base.collection.size) { @@ -991,6 +1157,7 @@ cx_arl_insert_element, cx_arl_insert_array, cx_arl_insert_sorted, + cx_arl_insert_unique, cx_arl_insert_iter, cx_arl_remove, cx_arl_clear,