ucx/array_list.c

changeset 101
7b3a3130be44
parent 49
2f71f4ee247a
--- a/ucx/array_list.c	Thu Dec 12 20:01:43 2024 +0100
+++ b/ucx/array_list.c	Mon Jan 06 22:22:55 2025 +0100
@@ -30,6 +30,7 @@
 #include "cx/compare.h"
 #include <assert.h>
 #include <string.h>
+#include <errno.h>
 
 // Default array reallocator
 
@@ -37,65 +38,258 @@
         void *array,
         size_t capacity,
         size_t elem_size,
-        __attribute__((__unused__)) struct cx_array_reallocator_s *alloc
+        cx_attr_unused CxArrayReallocator *alloc
 ) {
-    return realloc(array, capacity * elem_size);
+    size_t n;
+    if (cx_szmul(capacity, elem_size, &n)) {
+        errno = EOVERFLOW;
+        return NULL;
+    }
+    return realloc(array, n);
 }
 
-struct cx_array_reallocator_s cx_array_default_reallocator_impl = {
+CxArrayReallocator cx_array_default_reallocator_impl = {
         cx_array_default_realloc, NULL, NULL, 0, 0
 };
 
-struct cx_array_reallocator_s *cx_array_default_reallocator = &cx_array_default_reallocator_impl;
+CxArrayReallocator *cx_array_default_reallocator = &cx_array_default_reallocator_impl;
+
+// Stack-aware array reallocator
+
+static void *cx_array_advanced_realloc(
+        void *array,
+        size_t capacity,
+        size_t elem_size,
+        cx_attr_unused CxArrayReallocator *alloc
+) {
+    // check for overflow
+    size_t n;
+    if (cx_szmul(capacity, elem_size, &n)) {
+        errno = EOVERFLOW;
+        return NULL;
+    }
+
+    // retrieve the pointer to the actual allocator
+    const CxAllocator *al = alloc->ptr1;
+
+    // check if the array is still located on the stack
+    void *newmem;
+    if (array == alloc->ptr2) {
+        newmem = cxMalloc(al, n);
+        if (newmem != NULL && array != NULL) {
+            memcpy(newmem, array, n);
+        }
+    } else {
+        newmem = cxRealloc(al, array, n);
+    }
+    return newmem;
+}
+
+struct cx_array_reallocator_s cx_array_reallocator(
+        const struct cx_allocator_s *allocator,
+        const void *stackmem
+) {
+    if (allocator == NULL) {
+        allocator = cxDefaultAllocator;
+    }
+    return (struct cx_array_reallocator_s) {
+            cx_array_advanced_realloc,
+            (void*) allocator, (void*) stackmem,
+            0, 0
+    };
+}
 
 // LOW LEVEL ARRAY LIST FUNCTIONS
 
-enum cx_array_result cx_array_copy(
+static size_t cx_array_align_capacity(
+        size_t cap,
+        size_t alignment,
+        size_t max
+) {
+    if (cap > max - alignment) {
+        return cap;
+    } else {
+        return cap - (cap % alignment) + alignment;
+    }
+}
+
+int cx_array_reserve(
+        void **array,
+        void *size,
+        void *capacity,
+        unsigned width,
+        size_t elem_size,
+        size_t elem_count,
+        CxArrayReallocator *reallocator
+) {
+    // assert pointers
+    assert(array != NULL);
+    assert(size != NULL);
+    assert(capacity != NULL);
+
+    // default reallocator
+    if (reallocator == NULL) {
+        reallocator = cx_array_default_reallocator;
+    }
+
+    // determine size and capacity
+    size_t oldcap;
+    size_t oldsize;
+    size_t max_size;
+    if (width == 0 || width == sizeof(size_t)) {
+        oldcap = *(size_t*) capacity;
+        oldsize = *(size_t*) size;
+        max_size = SIZE_MAX;
+    } else if (width == sizeof(uint16_t)) {
+        oldcap = *(uint16_t*) capacity;
+        oldsize = *(uint16_t*) size;
+        max_size = UINT16_MAX;
+    } else if (width == sizeof(uint8_t)) {
+        oldcap = *(uint8_t*) capacity;
+        oldsize = *(uint8_t*) size;
+        max_size = UINT8_MAX;
+    }
+#if CX_WORDSIZE == 64
+    else if (width == sizeof(uint32_t)) {
+        oldcap = *(uint32_t*) capacity;
+        oldsize = *(uint32_t*) size;
+        max_size = UINT32_MAX;
+    }
+#endif
+    else {
+        errno = EINVAL;
+        return 1;
+    }
+
+    // assert that the array is allocated when it has capacity
+    assert(*array != NULL || oldcap == 0);
+
+    // check for overflow
+    if (elem_count > max_size - oldsize) {
+        errno = EOVERFLOW;
+        return 1;
+    }
+
+    // determine new capacity
+    size_t newcap = oldsize + elem_count;
+
+    // reallocate if possible
+    if (newcap > oldcap) {
+        // calculate new capacity (next number divisible by 16)
+        newcap = cx_array_align_capacity(newcap, 16, max_size);
+
+        // perform reallocation
+        void *newmem = reallocator->realloc(
+                *array, newcap, elem_size, reallocator
+        );
+        if (newmem == NULL) {
+            return 1; // LCOV_EXCL_LINE
+        }
+
+        // store new pointer
+        *array = newmem;
+
+        // store new capacity
+        if (width == 0 || width == sizeof(size_t)) {
+            *(size_t*) capacity = newcap;
+        } else if (width == sizeof(uint16_t)) {
+            *(uint16_t*) capacity = (uint16_t) newcap;
+        } else if (width == sizeof(uint8_t)) {
+            *(uint8_t*) capacity = (uint8_t) newcap;
+        }
+#if CX_WORDSIZE == 64
+        else if (width == sizeof(uint32_t)) {
+            *(uint32_t*) capacity = (uint32_t) newcap;
+        }
+#endif
+    }
+
+    return 0;
+}
+
+int cx_array_copy(
         void **target,
-        size_t *size,
-        size_t *capacity,
+        void *size,
+        void *capacity,
+        unsigned width,
         size_t index,
         const void *src,
         size_t elem_size,
         size_t elem_count,
-        struct cx_array_reallocator_s *reallocator
+        CxArrayReallocator *reallocator
 ) {
     // assert pointers
     assert(target != NULL);
     assert(size != NULL);
+    assert(capacity != NULL);
     assert(src != NULL);
 
-    // determine capacity
-    size_t cap = capacity == NULL ? *size : *capacity;
+    // default reallocator
+    if (reallocator == NULL) {
+        reallocator = cx_array_default_reallocator;
+    }
+
+    // determine size and capacity
+    size_t oldcap;
+    size_t oldsize;
+    size_t max_size;
+    if (width == 0 || width == sizeof(size_t)) {
+        oldcap = *(size_t*) capacity;
+        oldsize = *(size_t*) size;
+        max_size = SIZE_MAX;
+    } else if (width == sizeof(uint16_t)) {
+        oldcap = *(uint16_t*) capacity;
+        oldsize = *(uint16_t*) size;
+        max_size = UINT16_MAX;
+    } else if (width == sizeof(uint8_t)) {
+        oldcap = *(uint8_t*) capacity;
+        oldsize = *(uint8_t*) size;
+        max_size = UINT8_MAX;
+    }
+#if CX_WORDSIZE == 64
+    else if (width == sizeof(uint32_t)) {
+        oldcap = *(uint32_t*) capacity;
+        oldsize = *(uint32_t*) size;
+        max_size = UINT32_MAX;
+    }
+#endif
+    else {
+        errno = EINVAL;
+        return 1;
+    }
+
+    // assert that the array is allocated when it has capacity
+    assert(*target != NULL || oldcap == 0);
+
+    // check for overflow
+    if (index > max_size || elem_count > max_size - index) {
+        errno = EOVERFLOW;
+        return 1;
+    }
 
     // check if resize is required
     size_t minsize = index + elem_count;
-    size_t newsize = *size < minsize ? minsize : *size;
-    bool needrealloc = newsize > cap;
+    size_t newsize = oldsize < minsize ? minsize : oldsize;
 
     // reallocate if possible
-    if (needrealloc) {
-        // a reallocator and a capacity variable must be available
-        if (reallocator == NULL || capacity == NULL) {
-            return CX_ARRAY_REALLOC_NOT_SUPPORTED;
-        }
-
+    size_t newcap = oldcap;
+    if (newsize > oldcap) {
         // check, if we need to repair the src pointer
         uintptr_t targetaddr = (uintptr_t) *target;
         uintptr_t srcaddr = (uintptr_t) src;
         bool repairsrc = targetaddr <= srcaddr
-                         && srcaddr < targetaddr + cap * elem_size;
+                         && srcaddr < targetaddr + oldcap * elem_size;
 
         // calculate new capacity (next number divisible by 16)
-        cap = newsize - (newsize % 16) + 16;
-        assert(cap > newsize);
+        newcap = cx_array_align_capacity(newsize, 16, max_size);
+        assert(newcap > newsize);
 
         // perform reallocation
         void *newmem = reallocator->realloc(
-                *target, cap, elem_size, reallocator
+                *target, newcap, elem_size, reallocator
         );
         if (newmem == NULL) {
-            return CX_ARRAY_REALLOC_FAILED;
+            return 1;
         }
 
         // repair src pointer, if necessary
@@ -103,9 +297,8 @@
             src = ((char *) newmem) + (srcaddr - targetaddr);
         }
 
-        // store new pointer and capacity
+        // store new pointer
         *target = newmem;
-        *capacity = cap;
     }
 
     // determine target pointer
@@ -113,14 +306,34 @@
     start += index * elem_size;
 
     // copy elements and set new size
+    // note: no overflow check here, b/c we cannot get here w/o allocation
     memmove(start, src, elem_count * elem_size);
-    *size = newsize;
+
+    // if any of size or capacity changed, store them back
+    if (newsize != oldsize || newcap != oldcap) {
+        if (width == 0 || width == sizeof(size_t)) {
+            *(size_t*) capacity = newcap;
+            *(size_t*) size = newsize;
+        } else if (width == sizeof(uint16_t)) {
+            *(uint16_t*) capacity = (uint16_t) newcap;
+            *(uint16_t*) size = (uint16_t) newsize;
+        } else if (width == sizeof(uint8_t)) {
+            *(uint8_t*) capacity = (uint8_t) newcap;
+            *(uint8_t*) size = (uint8_t) newsize;
+        }
+#if CX_WORDSIZE == 64
+        else if (width == sizeof(uint32_t)) {
+            *(uint32_t*) capacity = (uint32_t) newcap;
+            *(uint32_t*) size = (uint32_t) newsize;
+        }
+#endif
+    }
 
     // return successfully
-    return CX_ARRAY_SUCCESS;
+    return 0;
 }
 
-enum cx_array_result cx_array_insert_sorted(
+int cx_array_insert_sorted(
         void **target,
         size_t *size,
         size_t *capacity,
@@ -128,7 +341,7 @@
         const void *sorted_data,
         size_t elem_size,
         size_t elem_count,
-        struct cx_array_reallocator_s *reallocator
+        CxArrayReallocator *reallocator
 ) {
     // assert pointers
     assert(target != NULL);
@@ -136,25 +349,35 @@
     assert(capacity != NULL);
     assert(cmp_func != NULL);
     assert(sorted_data != NULL);
-    assert(reallocator != NULL);
+
+    // default reallocator
+    if (reallocator == NULL) {
+        reallocator = cx_array_default_reallocator;
+    }
 
     // corner case
     if (elem_count == 0) return 0;
 
+    // overflow check
+    if (elem_count > SIZE_MAX - *size) {
+        errno = EOVERFLOW;
+        return 1;
+    }
+
     // store some counts
     size_t old_size = *size;
     size_t needed_capacity = old_size + elem_count;
 
     // if we need more than we have, try a reallocation
     if (needed_capacity > *capacity) {
-        size_t new_capacity = needed_capacity - (needed_capacity % 16) + 16;
+        size_t new_capacity = cx_array_align_capacity(needed_capacity, 16, SIZE_MAX);
         void *new_mem = reallocator->realloc(
                 *target, new_capacity, elem_size, reallocator
         );
         if (new_mem == NULL) {
             // give it up right away, there is no contract
             // that requires us to insert as much as we can
-            return CX_ARRAY_REALLOC_FAILED;
+            return 1;  // LCOV_EXCL_LINE
         }
         *target = new_mem;
         *capacity = new_capacity;
@@ -228,7 +451,7 @@
     // still buffer elements left?
     // don't worry, we already moved them to the correct place
 
-    return CX_ARRAY_SUCCESS;
+    return 0;
 }
 
 size_t cx_array_binary_search_inf(
@@ -255,6 +478,9 @@
         return 0;
     }
 
+    // special case: there is only one element and that is smaller
+    if (size == 1) return 0;
+
     // check the last array element
     result = cmp_func(elem, array + elem_size * (size - 1));
     if (result >= 0) {
@@ -287,10 +513,48 @@
     return result < 0 ? (pivot_index - 1) : pivot_index;
 }
 
+size_t cx_array_binary_search(
+        const void *arr,
+        size_t size,
+        size_t elem_size,
+        const void *elem,
+        cx_compare_func cmp_func
+) {
+    size_t index = cx_array_binary_search_inf(
+            arr, size, elem_size, elem, cmp_func
+    );
+    if (index < size &&
+            cmp_func(((const char *) arr) + index * elem_size, elem) == 0) {
+        return index;
+    } else {
+        return size;
+    }
+}
+
+size_t cx_array_binary_search_sup(
+        const void *arr,
+        size_t size,
+        size_t elem_size,
+        const void *elem,
+        cx_compare_func cmp_func
+) {
+    size_t inf = cx_array_binary_search_inf(
+            arr, size, elem_size, elem, cmp_func
+    );
+    if (inf == size) {
+        // no infimum means, first element is supremum
+        return 0;
+    } else if (cmp_func(((const char *) arr) + inf * elem_size, elem) == 0) {
+        return inf;
+    } else {
+        return inf + 1;
+    }
+}
+
 #ifndef CX_ARRAY_SWAP_SBO_SIZE
 #define CX_ARRAY_SWAP_SBO_SIZE 128
 #endif
-unsigned cx_array_swap_sbo_size = CX_ARRAY_SWAP_SBO_SIZE;
+const unsigned cx_array_swap_sbo_size = CX_ARRAY_SWAP_SBO_SIZE;
 
 void cx_array_swap(
         void *arr,
@@ -337,22 +601,9 @@
     struct cx_list_s base;
     void *data;
     size_t capacity;
-    struct cx_array_reallocator_s reallocator;
+    CxArrayReallocator reallocator;
 } cx_array_list;
 
-static void *cx_arl_realloc(
-        void *array,
-        size_t capacity,
-        size_t elem_size,
-        struct cx_array_reallocator_s *alloc
-) {
-    // retrieve the pointer to the list allocator
-    const CxAllocator *al = alloc->ptr1;
-
-    // use the list allocator to reallocate the memory
-    return cxRealloc(al, array, capacity * elem_size);
-}
-
 static void cx_arl_destructor(struct cx_list_s *list) {
     cx_array_list *arl = (cx_array_list *) list;
 
@@ -394,10 +645,11 @@
         size_t elems_to_move = list->collection.size - index;
         size_t start_of_moved = index + n;
 
-        if (CX_ARRAY_SUCCESS != cx_array_copy(
+        if (cx_array_copy(
                 &arl->data,
                 &list->collection.size,
                 &arl->capacity,
+                0,
                 start_of_moved,
                 first_to_move,
                 list->collection.elem_size,
@@ -414,20 +666,21 @@
     // therefore, it is impossible to leave this function with an invalid array
 
     // place the new elements
-    if (CX_ARRAY_SUCCESS == cx_array_copy(
+    if (cx_array_copy(
             &arl->data,
             &list->collection.size,
             &arl->capacity,
+            0,
             index,
             array,
             list->collection.elem_size,
             n,
             &arl->reallocator
     )) {
-        return n;
-    } else {
         // array list implementation is "all or nothing"
         return 0;
+    } else {
+        return n;
     }
 }
 
@@ -439,7 +692,7 @@
     // get a correctly typed pointer to the list
     cx_array_list *arl = (cx_array_list *) list;
 
-    if (CX_ARRAY_SUCCESS == cx_array_insert_sorted(
+    if (cx_array_insert_sorted(
             &arl->data,
             &list->collection.size,
             &arl->capacity,
@@ -449,10 +702,10 @@
             n,
             &arl->reallocator
     )) {
-        return n;
-    } else {
         // array list implementation is "all or nothing"
         return 0;
+    } else {
+        return n;
     }
 }
 
@@ -494,45 +747,66 @@
     }
 }
 
-static int cx_arl_remove(
+static size_t cx_arl_remove(
         struct cx_list_s *list,
-        size_t index
+        size_t index,
+        size_t num,
+        void *targetbuf
 ) {
     cx_array_list *arl = (cx_array_list *) list;
 
     // out-of-bounds check
+    size_t remove;
     if (index >= list->collection.size) {
-        return 1;
+        remove = 0;
+    } else if (index + num > list->collection.size) {
+        remove = list->collection.size - index;
+    } else {
+        remove = num;
     }
 
-    // content destruction
-    cx_invoke_destructor(list, ((char *) arl->data) + index * list->collection.elem_size);
+    // easy exit
+    if (remove == 0) return 0;
 
-    // short-circuit removal of last element
-    if (index == list->collection.size - 1) {
-        list->collection.size--;
-        return 0;
+    // destroy or copy contents
+    if (targetbuf == NULL) {
+        for (size_t idx = index; idx < index + remove; idx++) {
+            cx_invoke_destructor(
+                    list,
+                    ((char *) arl->data) + idx * list->collection.elem_size
+            );
+        }
+    } else {
+        memcpy(
+                targetbuf,
+                ((char *) arl->data) + index * list->collection.elem_size,
+                remove * list->collection.elem_size
+        );
     }
 
-    // just move the elements starting at index to the left
-    int result = cx_array_copy(
+    // short-circuit removal of last elements
+    if (index + remove == list->collection.size) {
+        list->collection.size -= remove;
+        return remove;
+    }
+
+    // just move the elements to the left
+    cx_array_copy(
             &arl->data,
             &list->collection.size,
             &arl->capacity,
+            0,
             index,
-            ((char *) arl->data) + (index + 1) * list->collection.elem_size,
+            ((char *) arl->data) + (index + remove) * list->collection.elem_size,
             list->collection.elem_size,
-            list->collection.size - index - 1,
+            list->collection.size - index - remove,
             &arl->reallocator
     );
 
-    // cx_array_copy cannot fail, array cannot grow
-    assert(result == 0);
+    // decrease the size
+    list->collection.size -= remove;
 
-    // decrease the size
-    list->collection.size--;
-
-    return 0;
+    return remove;
 }
 
 static void cx_arl_clear(struct cx_list_s *list) {
@@ -594,10 +868,11 @@
     for (ssize_t i = 0; i < (ssize_t) list->collection.size; i++) {
         if (0 == list->collection.cmpfunc(elem, cur)) {
             if (remove) {
-                if (0 == cx_arl_remove(list, i)) {
+                if (1 == cx_arl_remove(list, i, 1, NULL)) {
                     return i;
                 } else {
-                    return -1;
+                    // should be unreachable
+                    return -1;  // LCOV_EXCL_LINE
                 }
             } else {
                 return i;
@@ -664,7 +939,7 @@
     struct cx_iterator_s *iter = it;
     if (iter->base.remove) {
         iter->base.remove = false;
-        cx_arl_remove(iter->src_handle.m, iter->index);
+        cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL);
     } else {
         iter->index++;
         iter->elem_handle =
@@ -678,7 +953,7 @@
     const cx_array_list *list = iter->src_handle.c;
     if (iter->base.remove) {
         iter->base.remove = false;
-        cx_arl_remove(iter->src_handle.m, iter->index);
+        cx_arl_remove(iter->src_handle.m, iter->index, 1, NULL);
     }
     iter->index--;
     if (iter->index < list->base.collection.size) {
@@ -754,14 +1029,13 @@
 
     // allocate the array after the real elem_size is known
     list->data = cxCalloc(allocator, initial_capacity, elem_size);
-    if (list->data == NULL) {
+    if (list->data == NULL) { // LCOV_EXCL_START
         cxFree(allocator, list);
         return NULL;
-    }
+    } // LCOV_EXCL_STOP
 
     // configure the reallocator
-    list->reallocator.realloc = cx_arl_realloc;
-    list->reallocator.ptr1 = (void *) allocator;
+    list->reallocator = cx_array_reallocator(allocator, NULL);
 
     return (CxList *) list;
 }

mercurial