--- a/ucx/array_list.c Thu Nov 28 17:53:13 2024 +0100 +++ b/ucx/array_list.c Mon Jan 06 21:18:36 2025 +0100 @@ -30,6 +30,7 @@ #include "cx/compare.h" #include <assert.h> #include <string.h> +#include <errno.h> // Default array reallocator @@ -37,65 +38,258 @@ void *array, size_t capacity, size_t elem_size, - __attribute__((__unused__)) struct cx_array_reallocator_s *alloc + cx_attr_unused CxArrayReallocator *alloc ) { - return realloc(array, capacity * elem_size); + size_t n; + if (cx_szmul(capacity, elem_size, &n)) { + errno = EOVERFLOW; + return NULL; + } + return realloc(array, n); } -struct cx_array_reallocator_s cx_array_default_reallocator_impl = { +CxArrayReallocator cx_array_default_reallocator_impl = { cx_array_default_realloc, NULL, NULL, 0, 0 }; -struct cx_array_reallocator_s *cx_array_default_reallocator = &cx_array_default_reallocator_impl; +CxArrayReallocator *cx_array_default_reallocator = &cx_array_default_reallocator_impl; + +// Stack-aware array reallocator + +static void *cx_array_advanced_realloc( + void *array, + size_t capacity, + size_t elem_size, + cx_attr_unused CxArrayReallocator *alloc +) { + // check for overflow + size_t n; + if (cx_szmul(capacity, elem_size, &n)) { + errno = EOVERFLOW; + return NULL; + } + + // retrieve the pointer to the actual allocator + const CxAllocator *al = alloc->ptr1; + + // check if the array is still located on the stack + void *newmem; + if (array == alloc->ptr2) { + newmem = cxMalloc(al, n); + if (newmem != NULL && array != NULL) { + memcpy(newmem, array, n); + } + } else { + newmem = cxRealloc(al, array, n); + } + return newmem; +} + +struct cx_array_reallocator_s cx_array_reallocator( + const struct cx_allocator_s *allocator, + const void *stackmem +) { + if (allocator == NULL) { + allocator = cxDefaultAllocator; + } + return (struct cx_array_reallocator_s) { + cx_array_advanced_realloc, + (void*) allocator, (void*) stackmem, + 0, 0 + }; +} // LOW LEVEL ARRAY LIST FUNCTIONS -enum cx_array_result cx_array_copy( - void **target, - size_t *size, - size_t *capacity, - size_t index, - void const *src, +static size_t cx_array_align_capacity( + size_t cap, + size_t alignment, + size_t max +) { + if (cap > max - alignment) { + return cap; + } else { + return cap - (cap % alignment) + alignment; + } +} + +int cx_array_reserve( + void **array, + void *size, + void *capacity, + unsigned width, size_t elem_size, size_t elem_count, - struct cx_array_reallocator_s *reallocator + CxArrayReallocator *reallocator +) { + // assert pointers + assert(array != NULL); + assert(size != NULL); + assert(capacity != NULL); + + // default reallocator + if (reallocator == NULL) { + reallocator = cx_array_default_reallocator; + } + + // determine size and capacity + size_t oldcap; + size_t oldsize; + size_t max_size; + if (width == 0 || width == sizeof(size_t)) { + oldcap = *(size_t*) capacity; + oldsize = *(size_t*) size; + max_size = SIZE_MAX; + } else if (width == sizeof(uint16_t)) { + oldcap = *(uint16_t*) capacity; + oldsize = *(uint16_t*) size; + max_size = UINT16_MAX; + } else if (width == sizeof(uint8_t)) { + oldcap = *(uint8_t*) capacity; + oldsize = *(uint8_t*) size; + max_size = UINT8_MAX; + } +#if CX_WORDSIZE == 64 + else if (width == sizeof(uint32_t)) { + oldcap = *(uint32_t*) capacity; + oldsize = *(uint32_t*) size; + max_size = UINT32_MAX; + } +#endif + else { + errno = EINVAL; + return 1; + } + + // assert that the array is allocated when it has capacity + assert(*array != NULL || oldcap == 0); + + // check for overflow + if (elem_count > max_size - oldsize) { + errno = EOVERFLOW; + return 1; + } + + // determine new capacity + size_t newcap = oldsize + elem_count; + + // reallocate if possible + if (newcap > oldcap) { + // calculate new capacity (next number divisible by 16) + newcap = cx_array_align_capacity(newcap, 16, max_size); + + // perform reallocation + void *newmem = reallocator->realloc( + *array, newcap, elem_size, reallocator + ); + if (newmem == NULL) { + return 1; // LCOV_EXCL_LINE + } + + // store new pointer + *array = newmem; + + // store new capacity + if (width == 0 || width == sizeof(size_t)) { + *(size_t*) capacity = newcap; + } else if (width == sizeof(uint16_t)) { + *(uint16_t*) capacity = (uint16_t) newcap; + } else if (width == sizeof(uint8_t)) { + *(uint8_t*) capacity = (uint8_t) newcap; + } +#if CX_WORDSIZE == 64 + else if (width == sizeof(uint32_t)) { + *(uint32_t*) capacity = (uint32_t) newcap; + } +#endif + } + + return 0; +} + +int cx_array_copy( + void **target, + void *size, + void *capacity, + unsigned width, + size_t index, + const void *src, + size_t elem_size, + size_t elem_count, + CxArrayReallocator *reallocator ) { // assert pointers assert(target != NULL); assert(size != NULL); + assert(capacity != NULL); assert(src != NULL); - // determine capacity - size_t cap = capacity == NULL ? *size : *capacity; + // default reallocator + if (reallocator == NULL) { + reallocator = cx_array_default_reallocator; + } + + // determine size and capacity + size_t oldcap; + size_t oldsize; + size_t max_size; + if (width == 0 || width == sizeof(size_t)) { + oldcap = *(size_t*) capacity; + oldsize = *(size_t*) size; + max_size = SIZE_MAX; + } else if (width == sizeof(uint16_t)) { + oldcap = *(uint16_t*) capacity; + oldsize = *(uint16_t*) size; + max_size = UINT16_MAX; + } else if (width == sizeof(uint8_t)) { + oldcap = *(uint8_t*) capacity; + oldsize = *(uint8_t*) size; + max_size = UINT8_MAX; + } +#if CX_WORDSIZE == 64 + else if (width == sizeof(uint32_t)) { + oldcap = *(uint32_t*) capacity; + oldsize = *(uint32_t*) size; + max_size = UINT32_MAX; + } +#endif + else { + errno = EINVAL; + return 1; + } + + // assert that the array is allocated when it has capacity + assert(*target != NULL || oldcap == 0); + + // check for overflow + if (index > max_size || elem_count > max_size - index) { + errno = EOVERFLOW; + return 1; + } // check if resize is required size_t minsize = index + elem_count; - size_t newsize = *size < minsize ? minsize : *size; - bool needrealloc = newsize > cap; + size_t newsize = oldsize < minsize ? minsize : oldsize; // reallocate if possible - if (needrealloc) { - // a reallocator and a capacity variable must be available - if (reallocator == NULL || capacity == NULL) { - return CX_ARRAY_REALLOC_NOT_SUPPORTED; - } - + size_t newcap = oldcap; + if (newsize > oldcap) { // check, if we need to repair the src pointer uintptr_t targetaddr = (uintptr_t) *target; uintptr_t srcaddr = (uintptr_t) src; bool repairsrc = targetaddr <= srcaddr - && srcaddr < targetaddr + cap * elem_size; + && srcaddr < targetaddr + oldcap * elem_size; // calculate new capacity (next number divisible by 16) - cap = newsize - (newsize % 16) + 16; - assert(cap > newsize); + newcap = cx_array_align_capacity(newsize, 16, max_size); + assert(newcap > newsize); // perform reallocation void *newmem = reallocator->realloc( - *target, cap, elem_size, reallocator + *target, newcap, elem_size, reallocator ); if (newmem == NULL) { - return CX_ARRAY_REALLOC_FAILED; + return 1; } // repair src pointer, if necessary @@ -103,9 +297,8 @@ src = ((char *) newmem) + (srcaddr - targetaddr); } - // store new pointer and capacity + // store new pointer *target = newmem; - *capacity = cap; } // determine target pointer @@ -113,17 +306,255 @@ start += index * elem_size; // copy elements and set new size + // note: no overflow check here, b/c we cannot get here w/o allocation memmove(start, src, elem_count * elem_size); - *size = newsize; + + // if any of size or capacity changed, store them back + if (newsize != oldsize || newcap != oldcap) { + if (width == 0 || width == sizeof(size_t)) { + *(size_t*) capacity = newcap; + *(size_t*) size = newsize; + } else if (width == sizeof(uint16_t)) { + *(uint16_t*) capacity = (uint16_t) newcap; + *(uint16_t*) size = (uint16_t) newsize; + } else if (width == sizeof(uint8_t)) { + *(uint8_t*) capacity = (uint8_t) newcap; + *(uint8_t*) size = (uint8_t) newsize; + } +#if CX_WORDSIZE == 64 + else if (width == sizeof(uint32_t)) { + *(uint32_t*) capacity = (uint32_t) newcap; + *(uint32_t*) size = (uint32_t) newsize; + } +#endif + } // return successfully - return CX_ARRAY_SUCCESS; + return 0; +} + +int cx_array_insert_sorted( + void **target, + size_t *size, + size_t *capacity, + cx_compare_func cmp_func, + const void *sorted_data, + size_t elem_size, + size_t elem_count, + CxArrayReallocator *reallocator +) { + // assert pointers + assert(target != NULL); + assert(size != NULL); + assert(capacity != NULL); + assert(cmp_func != NULL); + assert(sorted_data != NULL); + + // default reallocator + if (reallocator == NULL) { + reallocator = cx_array_default_reallocator; + } + + // corner case + if (elem_count == 0) return 0; + + // overflow check + if (elem_count > SIZE_MAX - *size) { + errno = EOVERFLOW; + return 1; + } + + // store some counts + size_t old_size = *size; + size_t needed_capacity = old_size + elem_count; + + // if we need more than we have, try a reallocation + if (needed_capacity > *capacity) { + size_t new_capacity = cx_array_align_capacity(needed_capacity, 16, SIZE_MAX); + void *new_mem = reallocator->realloc( + *target, new_capacity, elem_size, reallocator + ); + if (new_mem == NULL) { + // give it up right away, there is no contract + // that requires us to insert as much as we can + return 1; // LCOV_EXCL_LINE + } + *target = new_mem; + *capacity = new_capacity; + } + + // now we have guaranteed that we can insert everything + size_t new_size = old_size + elem_count; + *size = new_size; + + // declare the source and destination indices/pointers + size_t si = 0, di = 0; + const char *src = sorted_data; + char *dest = *target; + + // find the first insertion point + di = cx_array_binary_search_sup(dest, old_size, elem_size, src, cmp_func); + dest += di * elem_size; + + // move the remaining elements in the array completely to the right + // we will call it the "buffer" for parked elements + size_t buf_size = old_size - di; + size_t bi = new_size - buf_size; + char *bptr = ((char *) *target) + bi * elem_size; + memmove(bptr, dest, buf_size * elem_size); + + // while there are both source and buffered elements left, + // copy them interleaving + while (si < elem_count && bi < new_size) { + // determine how many source elements can be inserted + size_t copy_len, bytes_copied; + copy_len = cx_array_binary_search_sup( + src, + elem_count - si, + elem_size, + bptr, + cmp_func + ); + + // copy the source elements + bytes_copied = copy_len * elem_size; + memcpy(dest, src, bytes_copied); + dest += bytes_copied; + src += bytes_copied; + si += copy_len; + + // when all source elements are in place, we are done + if (si >= elem_count) break; + + // determine how many buffered elements need to be restored + copy_len = cx_array_binary_search_sup( + bptr, + new_size - bi, + elem_size, + src, + cmp_func + ); + + // restore the buffered elements + bytes_copied = copy_len * elem_size; + memmove(dest, bptr, bytes_copied); + dest += bytes_copied; + bptr += bytes_copied; + bi += copy_len; + } + + // still source elements left? simply append them + if (si < elem_count) { + memcpy(dest, src, elem_size * (elem_count - si)); + } + + // still buffer elements left? + // don't worry, we already moved them to the correct place + + return 0; +} + +size_t cx_array_binary_search_inf( + const void *arr, + size_t size, + size_t elem_size, + const void *elem, + cx_compare_func cmp_func +) { + // special case: empty array + if (size == 0) return 0; + + // declare a variable that will contain the compare results + int result; + + // cast the array pointer to something we can use offsets with + const char *array = arr; + + // check the first array element + result = cmp_func(elem, array); + if (result < 0) { + return size; + } else if (result == 0) { + return 0; + } + + // special case: there is only one element and that is smaller + if (size == 1) return 0; + + // check the last array element + result = cmp_func(elem, array + elem_size * (size - 1)); + if (result >= 0) { + return size - 1; + } + + // the element is now guaranteed to be somewhere in the list + // so start the binary search + size_t left_index = 1; + size_t right_index = size - 1; + size_t pivot_index; + + while (left_index <= right_index) { + pivot_index = left_index + (right_index - left_index) / 2; + const char *arr_elem = array + pivot_index * elem_size; + result = cmp_func(elem, arr_elem); + if (result == 0) { + // found it! + return pivot_index; + } else if (result < 0) { + // element is smaller than pivot, continue search left + right_index = pivot_index - 1; + } else { + // element is larger than pivot, continue search right + left_index = pivot_index + 1; + } + } + + // report the largest upper bound + return result < 0 ? (pivot_index - 1) : pivot_index; +} + +size_t cx_array_binary_search( + const void *arr, + size_t size, + size_t elem_size, + const void *elem, + cx_compare_func cmp_func +) { + size_t index = cx_array_binary_search_inf( + arr, size, elem_size, elem, cmp_func + ); + if (index < size && + cmp_func(((const char *) arr) + index * elem_size, elem) == 0) { + return index; + } else { + return size; + } +} + +size_t cx_array_binary_search_sup( + const void *arr, + size_t size, + size_t elem_size, + const void *elem, + cx_compare_func cmp_func +) { + size_t inf = cx_array_binary_search_inf( + arr, size, elem_size, elem, cmp_func + ); + if (inf == size) { + // no infimum means, first element is supremum + return 0; + } else if (cmp_func(((const char *) arr) + inf * elem_size, elem) == 0) { + return inf; + } else { + return inf + 1; + } } #ifndef CX_ARRAY_SWAP_SBO_SIZE #define CX_ARRAY_SWAP_SBO_SIZE 128 #endif -unsigned cx_array_swap_sbo_size = CX_ARRAY_SWAP_SBO_SIZE; +const unsigned cx_array_swap_sbo_size = CX_ARRAY_SWAP_SBO_SIZE; void cx_array_swap( void *arr, @@ -170,22 +601,9 @@ struct cx_list_s base; void *data; size_t capacity; - struct cx_array_reallocator_s reallocator; + CxArrayReallocator reallocator; } cx_array_list; -static void *cx_arl_realloc( - void *array, - size_t capacity, - size_t elem_size, - struct cx_array_reallocator_s *alloc -) { - // retrieve the pointer to the list allocator - CxAllocator const *al = alloc->ptr1; - - // use the list allocator to reallocate the memory - return cxRealloc(al, array, capacity * elem_size); -} - static void cx_arl_destructor(struct cx_list_s *list) { cx_array_list *arl = (cx_array_list *) list; @@ -211,7 +629,7 @@ static size_t cx_arl_insert_array( struct cx_list_s *list, size_t index, - void const *array, + const void *array, size_t n ) { // out of bounds and special case check @@ -222,15 +640,16 @@ // do we need to move some elements? if (index < list->collection.size) { - char const *first_to_move = (char const *) arl->data; + const char *first_to_move = (const char *) arl->data; first_to_move += index * list->collection.elem_size; size_t elems_to_move = list->collection.size - index; size_t start_of_moved = index + n; - if (CX_ARRAY_SUCCESS != cx_array_copy( + if (cx_array_copy( &arl->data, &list->collection.size, &arl->capacity, + 0, start_of_moved, first_to_move, list->collection.elem_size, @@ -247,34 +666,60 @@ // therefore, it is impossible to leave this function with an invalid array // place the new elements - if (CX_ARRAY_SUCCESS == cx_array_copy( + if (cx_array_copy( &arl->data, &list->collection.size, &arl->capacity, + 0, index, array, list->collection.elem_size, n, &arl->reallocator )) { - return n; - } else { // array list implementation is "all or nothing" return 0; + } else { + return n; + } +} + +static size_t cx_arl_insert_sorted( + struct cx_list_s *list, + const void *sorted_data, + size_t n +) { + // get a correctly typed pointer to the list + cx_array_list *arl = (cx_array_list *) list; + + if (cx_array_insert_sorted( + &arl->data, + &list->collection.size, + &arl->capacity, + list->collection.cmpfunc, + sorted_data, + list->collection.elem_size, + n, + &arl->reallocator + )) { + // array list implementation is "all or nothing" + return 0; + } else { + return n; } } static int cx_arl_insert_element( struct cx_list_s *list, size_t index, - void const *element + const void *element ) { return 1 != cx_arl_insert_array(list, index, element, 1); } static int cx_arl_insert_iter( struct cx_iterator_s *iter, - void const *elem, + const void *elem, int prepend ) { struct cx_list_s *list = iter->src_handle.m; @@ -284,57 +729,84 @@ iter->index + 1 - prepend, elem ); - if (result == 0 && prepend != 0) { - iter->index++; - iter->elem_handle = ((char *) iter->elem_handle) + list->collection.elem_size; + if (result == 0) { + iter->elem_count++; + if (prepend != 0) { + iter->index++; + iter->elem_handle = ((char *) iter->elem_handle) + list->collection.elem_size; + } } return result; } else { int result = cx_arl_insert_element(list, list->collection.size, elem); - iter->index = list->collection.size; + if (result == 0) { + iter->elem_count++; + iter->index = list->collection.size; + } return result; } } -static int cx_arl_remove( +static size_t cx_arl_remove( struct cx_list_s *list, - size_t index + size_t index, + size_t num, + void *targetbuf ) { cx_array_list *arl = (cx_array_list *) list; // out-of-bounds check + size_t remove; if (index >= list->collection.size) { - return 1; + remove = 0; + } else if (index + num > list->collection.size) { + remove = list->collection.size - index; + } else { + remove = num; } - // content destruction - cx_invoke_destructor(list, ((char *) arl->data) + index * list->collection.elem_size); + // easy exit + if (remove == 0) return 0; - // short-circuit removal of last element - if (index == list->collection.size - 1) { - list->collection.size--; - return 0; + // destroy or copy contents + if (targetbuf == NULL) { + for (size_t idx = index; idx < index + remove; idx++) { + cx_invoke_destructor( + list, + ((char *) arl->data) + idx * list->collection.elem_size + ); + } + } else { + memcpy( + targetbuf, + ((char *) arl->data) + index * list->collection.elem_size, + remove * list->collection.elem_size + ); } - // just move the elements starting at index to the left - int result = cx_array_copy( + // short-circuit removal of last elements + if (index + remove == list->collection.size) { + list->collection.size -= remove; + return remove; + } + + // just move the elements to the left + cx_array_copy( &arl->data, &list->collection.size, &arl->capacity, + 0, index, - ((char *) arl->data) + (index + 1) * list->collection.elem_size, + ((char *) arl->data) + (index + remove) * list->collection.elem_size, list->collection.elem_size, - list->collection.size - index - 1, + list->collection.size - index - remove, &arl->reallocator ); - // cx_array_copy cannot fail, array cannot grow - assert(result == 0); + // decrease the size + list->collection.size -= remove; - // decrease the size - list->collection.size--; - - return 0; + return remove; } static void cx_arl_clear(struct cx_list_s *list) { @@ -372,11 +844,11 @@ } static void *cx_arl_at( - struct cx_list_s const *list, + const struct cx_list_s *list, size_t index ) { if (index < list->collection.size) { - cx_array_list const *arl = (cx_array_list const *) list; + const cx_array_list *arl = (const cx_array_list *) list; char *space = arl->data; return space + index * list->collection.elem_size; } else { @@ -386,20 +858,21 @@ static ssize_t cx_arl_find_remove( struct cx_list_s *list, - void const *elem, + const void *elem, bool remove ) { assert(list->collection.cmpfunc != NULL); assert(list->collection.size < SIZE_MAX / 2); - char *cur = ((cx_array_list const *) list)->data; + char *cur = ((const cx_array_list *) list)->data; for (ssize_t i = 0; i < (ssize_t) list->collection.size; i++) { if (0 == list->collection.cmpfunc(elem, cur)) { if (remove) { - if (0 == cx_arl_remove(list, i)) { + if (1 == cx_arl_remove(list, i, 1, NULL)) { return i; } else { - return -1; + // should be unreachable + return -1; // LCOV_EXCL_LINE } } else { return i; @@ -421,13 +894,13 @@ } static int cx_arl_compare( - struct cx_list_s const *list, - struct cx_list_s const *other + const struct cx_list_s *list, + const struct cx_list_s *other ) { assert(list->collection.cmpfunc != NULL); if (list->collection.size == other->collection.size) { - char const *left = ((cx_array_list const *) list)->data; - char const *right = ((cx_array_list const *) other)->data; + const char *left = ((const cx_array_list *) list)->data; + const char *right = ((const cx_array_list *) other)->data; for (size_t i = 0; i < list->collection.size; i++) { int d = list->collection.cmpfunc(left, right); if (d != 0) { @@ -444,21 +917,21 @@ static void cx_arl_reverse(struct cx_list_s *list) { if (list->collection.size < 2) return; - void *data = ((cx_array_list const *) list)->data; + void *data = ((const cx_array_list *) list)->data; size_t half = list->collection.size / 2; for (size_t i = 0; i < half; i++) { cx_array_swap(data, list->collection.elem_size, i, list->collection.size - 1 - i); } } -static bool cx_arl_iter_valid(void const *it) { - struct cx_iterator_s const *iter = it; - struct cx_list_s const *list = iter->src_handle.c; +static bool cx_arl_iter_valid(const void *it) { + const struct cx_iterator_s *iter = it; + const struct cx_list_s *list = iter->src_handle.c; return iter->index < list->collection.size; } -static void *cx_arl_iter_current(void const *it) { - struct cx_iterator_s const *iter = it; +static void *cx_arl_iter_current(const void *it) { + const struct cx_iterator_s *iter = it; return iter->elem_handle; } @@ -466,21 +939,21 @@ struct cx_iterator_s *iter = it; if (iter->base.remove) { iter->base.remove = false; - cx_arl_remove(iter->src_handle.m, iter->index); + cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL); } else { iter->index++; iter->elem_handle = ((char *) iter->elem_handle) - + ((struct cx_list_s const *) iter->src_handle.c)->collection.elem_size; + + ((const struct cx_list_s *) iter->src_handle.c)->collection.elem_size; } } static void cx_arl_iter_prev(void *it) { struct cx_iterator_s *iter = it; - cx_array_list const* list = iter->src_handle.c; + const cx_array_list *list = iter->src_handle.c; if (iter->base.remove) { iter->base.remove = false; - cx_arl_remove(iter->src_handle.m, iter->index); + cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL); } iter->index--; if (iter->index < list->base.collection.size) { @@ -491,7 +964,7 @@ static struct cx_iterator_s cx_arl_iterator( - struct cx_list_s const *list, + const struct cx_list_s *list, size_t index, bool backwards ) { @@ -515,6 +988,7 @@ cx_arl_destructor, cx_arl_insert_element, cx_arl_insert_array, + cx_arl_insert_sorted, cx_arl_insert_iter, cx_arl_remove, cx_arl_clear, @@ -528,7 +1002,7 @@ }; CxList *cxArrayListCreate( - CxAllocator const *allocator, + const CxAllocator *allocator, cx_compare_func comparator, size_t elem_size, size_t initial_capacity @@ -555,14 +1029,13 @@ // allocate the array after the real elem_size is known list->data = cxCalloc(allocator, initial_capacity, elem_size); - if (list->data == NULL) { + if (list->data == NULL) { // LCOV_EXCL_START cxFree(allocator, list); return NULL; - } + } // LCOV_EXCL_STOP // configure the reallocator - list->reallocator.realloc = cx_arl_realloc; - list->reallocator.ptr1 = (void *) allocator; + list->reallocator = cx_array_reallocator(allocator, NULL); return (CxList *) list; }