ucx/hash_map.c

changeset 747
efbd59642577
child 750
4d7a2238c5ac
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucx/hash_map.c	Fri Apr 21 21:25:32 2023 +0200
@@ -0,0 +1,509 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
+ *
+ * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include "cx/hash_map.h"
+#include "cx/utils.h"
+
+struct cx_hash_map_element_s {
+    /** A pointer to the next element in the current bucket. */
+    struct cx_hash_map_element_s *next;
+
+    /** The corresponding key. */
+    CxHashKey key;
+
+    /** The value data. */
+    char data[];
+};
+
+static void cx_hash_map_clear(struct cx_map_s *map) {
+    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
+    cx_for_n(i, hash_map->bucket_count) {
+        struct cx_hash_map_element_s *elem = hash_map->buckets[i];
+        if (elem != NULL) {
+            do {
+                struct cx_hash_map_element_s *next = elem->next;
+                // invoke the destructor
+                cx_invoke_destructor(map, elem->data);
+                // free the key data
+                cxFree(map->allocator, (void *) elem->key.data);
+                // free the node
+                cxFree(map->allocator, elem);
+                // proceed
+                elem = next;
+            } while (elem != NULL);
+
+            // do not leave a dangling pointer
+            hash_map->buckets[i] = NULL;
+        }
+    }
+    map->size = 0;
+}
+
+static void cx_hash_map_destructor(struct cx_map_s *map) {
+    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
+
+    // free the buckets
+    cx_hash_map_clear(map);
+    cxFree(map->allocator, hash_map->buckets);
+
+    // free the map structure
+    cxFree(map->allocator, map);
+}
+
+static int cx_hash_map_put(
+        CxMap *map,
+        CxHashKey key,
+        void *value
+) {
+    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
+    CxAllocator const *allocator = map->allocator;
+
+    unsigned hash = key.hash;
+    if (hash == 0) {
+        cx_hash_murmur(&key);
+        hash = key.hash;
+    }
+
+    size_t slot = hash % hash_map->bucket_count;
+    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
+    struct cx_hash_map_element_s *prev = NULL;
+
+    while (elm != NULL && elm->key.hash < hash) {
+        prev = elm;
+        elm = elm->next;
+    }
+
+    if (elm != NULL && elm->key.hash == hash && elm->key.len == key.len &&
+        memcmp(elm->key.data, key.data, key.len) == 0) {
+        // overwrite existing element
+        if (map->store_pointer) {
+            memcpy(elm->data, &value, sizeof(void *));
+        } else {
+            memcpy(elm->data, value, map->item_size);
+        }
+    } else {
+        // allocate new element
+        struct cx_hash_map_element_s *e = cxMalloc(
+                allocator,
+                sizeof(struct cx_hash_map_element_s) + map->item_size
+        );
+        if (e == NULL) {
+            return -1;
+        }
+
+        // write the value
+        if (map->store_pointer) {
+            memcpy(e->data, &value, sizeof(void *));
+        } else {
+            memcpy(e->data, value, map->item_size);
+        }
+
+        // copy the key
+        void *kd = cxMalloc(allocator, key.len);
+        if (kd == NULL) {
+            return -1;
+        }
+        memcpy(kd, key.data, key.len);
+        e->key.data = kd;
+        e->key.len = key.len;
+        e->key.hash = hash;
+
+        // insert the element into the linked list
+        if (prev == NULL) {
+            hash_map->buckets[slot] = e;
+        } else {
+            prev->next = e;
+        }
+        e->next = elm;
+
+        // increase the size
+        map->size++;
+    }
+
+    return 0;
+}
+
+static void cx_hash_map_unlink(
+        struct cx_hash_map_s *hash_map,
+        size_t slot,
+        struct cx_hash_map_element_s *prev,
+        struct cx_hash_map_element_s *elm
+) {
+    // unlink
+    if (prev == NULL) {
+        hash_map->buckets[slot] = elm->next;
+    } else {
+        prev->next = elm->next;
+    }
+    // free element
+    cxFree(hash_map->base.allocator, (void *) elm->key.data);
+    cxFree(hash_map->base.allocator, elm);
+    // decrease size
+    hash_map->base.size--;
+}
+
+/**
+ * Helper function to avoid code duplication.
+ *
+ * @param map the map
+ * @param key the key to look up
+ * @param remove flag indicating whether the looked up entry shall be removed
+ * @param destroy flag indicating whether the destructor shall be invoked
+ * @return a pointer to the value corresponding to the key or \c NULL
+ */
+static void *cx_hash_map_get_remove(
+        CxMap *map,
+        CxHashKey key,
+        bool remove,
+        bool destroy
+) {
+    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
+
+    unsigned hash = key.hash;
+    if (hash == 0) {
+        cx_hash_murmur(&key);
+        hash = key.hash;
+    }
+
+    size_t slot = hash % hash_map->bucket_count;
+    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
+    struct cx_hash_map_element_s *prev = NULL;
+    while (elm && elm->key.hash <= hash) {
+        if (elm->key.hash == hash && elm->key.len == key.len) {
+            if (memcmp(elm->key.data, key.data, key.len) == 0) {
+                void *data = NULL;
+                if (destroy) {
+                    cx_invoke_destructor(map, elm->data);
+                } else {
+                    if (map->store_pointer) {
+                        data = *(void **) elm->data;
+                    } else {
+                        data = elm->data;
+                    }
+                }
+                if (remove) {
+                    cx_hash_map_unlink(hash_map, slot, prev, elm);
+                }
+                return data;
+            }
+        }
+        prev = elm;
+        elm = prev->next;
+    }
+
+    return NULL;
+}
+
+static void *cx_hash_map_get(
+        CxMap const *map,
+        CxHashKey key
+) {
+    // we can safely cast, because we know the map stays untouched
+    return cx_hash_map_get_remove((CxMap *) map, key, false, false);
+}
+
+static void *cx_hash_map_remove(
+        CxMap *map,
+        CxHashKey key,
+        bool destroy
+) {
+    return cx_hash_map_get_remove(map, key, true, destroy);
+}
+
+static void *cx_hash_map_iter_current_entry(void const *it) {
+    struct cx_iterator_s const *iter = it;
+    // struct has to have a compatible signature
+    return (struct cx_map_entry_s *) &(iter->kv_data);
+}
+
+static void *cx_hash_map_iter_current_key(void const *it) {
+    struct cx_iterator_s const *iter = it;
+    struct cx_hash_map_element_s *elm = iter->elem_handle;
+    return &elm->key;
+}
+
+static void *cx_hash_map_iter_current_value(void const *it) {
+    struct cx_iterator_s const *iter = it;
+    struct cx_hash_map_s const *map = iter->src_handle;
+    struct cx_hash_map_element_s *elm = iter->elem_handle;
+    if (map->base.store_pointer) {
+        return *(void **) elm->data;
+    } else {
+        return elm->data;
+    }
+}
+
+static bool cx_hash_map_iter_valid(void const *it) {
+    struct cx_iterator_s const *iter = it;
+    return iter->elem_handle != NULL;
+}
+
+static void cx_hash_map_iter_next(void *it) {
+    struct cx_iterator_s *iter = it;
+    struct cx_hash_map_element_s *elm = iter->elem_handle;
+
+    // remove current element, if asked
+    if (iter->base.remove) {
+        // obtain mutable pointer to the map
+        struct cx_mut_iterator_s *miter = it;
+        struct cx_hash_map_s *map = miter->src_handle;
+
+        // clear the flag
+        iter->base.remove = false;
+
+        // determine the next element
+        struct cx_hash_map_element_s *next = elm->next;
+
+        // search the previous element
+        struct cx_hash_map_element_s *prev = NULL;
+        if (map->buckets[iter->slot] != elm) {
+            prev = map->buckets[iter->slot];
+            while (prev->next != elm) {
+                prev = prev->next;
+            }
+        }
+
+        // destroy
+        cx_invoke_destructor((struct cx_map_s *) map, elm->data);
+
+        // unlink
+        cx_hash_map_unlink(map, iter->slot, prev, elm);
+
+        // advance
+        elm = next;
+    } else {
+        // just advance
+        elm = elm->next;
+        iter->index++;
+    }
+
+    // search the next bucket, if required
+    struct cx_hash_map_s const *map = iter->src_handle;
+    while (elm == NULL && ++iter->slot < map->bucket_count) {
+        elm = map->buckets[iter->slot];
+    }
+
+    // fill the struct with the next element
+    iter->elem_handle = elm;
+    if (elm == NULL) {
+        iter->kv_data.key = NULL;
+        iter->kv_data.value = NULL;
+    } else {
+        iter->kv_data.key = &elm->key;
+        if (map->base.store_pointer) {
+            iter->kv_data.value = *(void **) elm->data;
+        } else {
+            iter->kv_data.value = elm->data;
+        }
+    }
+}
+
+static bool cx_hash_map_iter_flag_rm(void *it) {
+    struct cx_iterator_base_s *iter = it;
+    if (iter->mutating) {
+        iter->remove = true;
+        return true;
+    } else {
+        return false;
+    }
+}
+
+static CxIterator cx_hash_map_iterator(CxMap const *map) {
+    CxIterator iter;
+
+    iter.src_handle = map;
+    iter.base.valid = cx_hash_map_iter_valid;
+    iter.base.next = cx_hash_map_iter_next;
+    iter.base.current = cx_hash_map_iter_current_entry;
+    iter.base.flag_removal = cx_hash_map_iter_flag_rm;
+    iter.base.remove = false;
+    iter.base.mutating = false;
+
+    iter.slot = 0;
+    iter.index = 0;
+
+    if (map->size > 0) {
+        struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
+        struct cx_hash_map_element_s *elm = hash_map->buckets[0];
+        while (elm == NULL) {
+            elm = hash_map->buckets[++iter.slot];
+        }
+        iter.elem_handle = elm;
+        iter.kv_data.key = &elm->key;
+        if (map->store_pointer) {
+            iter.kv_data.value = *(void **) elm->data;
+        } else {
+            iter.kv_data.value = elm->data;
+        }
+    } else {
+        iter.elem_handle = NULL;
+        iter.kv_data.key = NULL;
+        iter.kv_data.value = NULL;
+    }
+
+    return iter;
+}
+
+static CxIterator cx_hash_map_iterator_keys(CxMap const *map) {
+    CxIterator iter = cx_hash_map_iterator(map);
+    iter.base.current = cx_hash_map_iter_current_key;
+    return iter;
+}
+
+static CxIterator cx_hash_map_iterator_values(CxMap const *map) {
+    CxIterator iter = cx_hash_map_iterator(map);
+    iter.base.current = cx_hash_map_iter_current_value;
+    return iter;
+}
+
+static CxMutIterator cx_hash_map_mut_iterator(CxMap *map) {
+    CxIterator it = cx_hash_map_iterator(map);
+    it.base.mutating = true;
+
+    // we know the iterators share the same memory layout
+    CxMutIterator iter;
+    memcpy(&iter, &it, sizeof(CxMutIterator));
+    return iter;
+}
+
+static CxMutIterator cx_hash_map_mut_iterator_keys(CxMap *map) {
+    CxMutIterator iter = cx_hash_map_mut_iterator(map);
+    iter.base.current = cx_hash_map_iter_current_key;
+    return iter;
+}
+
+static CxMutIterator cx_hash_map_mut_iterator_values(CxMap *map) {
+    CxMutIterator iter = cx_hash_map_mut_iterator(map);
+    iter.base.current = cx_hash_map_iter_current_value;
+    return iter;
+}
+
+static cx_map_class cx_hash_map_class = {
+        cx_hash_map_destructor,
+        cx_hash_map_clear,
+        cx_hash_map_put,
+        cx_hash_map_get,
+        cx_hash_map_remove,
+        cx_hash_map_iterator,
+        cx_hash_map_iterator_keys,
+        cx_hash_map_iterator_values,
+        cx_hash_map_mut_iterator,
+        cx_hash_map_mut_iterator_keys,
+        cx_hash_map_mut_iterator_values,
+};
+
+CxMap *cxHashMapCreate(
+        CxAllocator const *allocator,
+        size_t itemsize,
+        size_t buckets
+) {
+    if (buckets == 0) {
+        // implementation defined default
+        buckets = 16;
+    }
+
+    struct cx_hash_map_s *map = cxCalloc(allocator, 1,
+                                         sizeof(struct cx_hash_map_s));
+    if (map == NULL) return NULL;
+
+    // initialize hash map members
+    map->bucket_count = buckets;
+    map->buckets = cxCalloc(allocator, buckets,
+                            sizeof(struct cx_hash_map_element_s *));
+    if (map->buckets == NULL) {
+        cxFree(allocator, map);
+        return NULL;
+    }
+
+    // initialize base members
+    map->base.cl = &cx_hash_map_class;
+    map->base.allocator = allocator;
+
+    if (itemsize > 0) {
+        map->base.store_pointer = false;
+        map->base.item_size = itemsize;
+    } else {
+        map->base.store_pointer = true;
+        map->base.item_size = sizeof(void *);
+    }
+
+    return (CxMap *) map;
+}
+
+int cxMapRehash(CxMap *map) {
+    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
+    if (map->size > ((hash_map->bucket_count * 3) >> 2)) {
+
+        size_t new_bucket_count = (map->size * 5) >> 1;
+        struct cx_hash_map_element_s **new_buckets = cxCalloc(
+                map->allocator,
+                new_bucket_count, sizeof(struct cx_hash_map_element_s *)
+        );
+
+        if (new_buckets == NULL) {
+            return 1;
+        }
+
+        // iterate through the elements and assign them to their new slots
+        cx_for_n(slot, hash_map->bucket_count) {
+            struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
+            while (elm != NULL) {
+                struct cx_hash_map_element_s *next = elm->next;
+                size_t new_slot = elm->key.hash % new_bucket_count;
+
+                // find position where to insert
+                struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot];
+                struct cx_hash_map_element_s *bucket_prev = NULL;
+                while (bucket_next != NULL &&
+                       bucket_next->key.hash < elm->key.hash) {
+                    bucket_prev = bucket_next;
+                    bucket_next = bucket_next->next;
+                }
+
+                // insert
+                if (bucket_prev == NULL) {
+                    elm->next = new_buckets[new_slot];
+                    new_buckets[new_slot] = elm;
+                } else {
+                    bucket_prev->next = elm;
+                    elm->next = bucket_next;
+                }
+
+                // advance
+                elm = next;
+            }
+        }
+
+        // assign result to the map
+        hash_map->bucket_count = new_bucket_count;
+        cxFree(map->allocator, hash_map->buckets);
+        hash_map->buckets = new_buckets;
+    }
+    return 0;
+}

mercurial