diff -r 809581724cc7 -r 0358f1d9c506 ucx/hash_map.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ucx/hash_map.c Mon May 22 16:17:26 2023 +0200 @@ -0,0 +1,489 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cx/hash_map.h" +#include "cx/utils.h" + +#include +#include + +struct cx_hash_map_element_s { + /** A pointer to the next element in the current bucket. */ + struct cx_hash_map_element_s *next; + + /** The corresponding key. */ + CxHashKey key; + + /** The value data. */ + char data[]; +}; + +static void cx_hash_map_clear(struct cx_map_s *map) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + cx_for_n(i, hash_map->bucket_count) { + struct cx_hash_map_element_s *elem = hash_map->buckets[i]; + if (elem != NULL) { + do { + struct cx_hash_map_element_s *next = elem->next; + // invoke the destructor + cx_invoke_destructor(map, elem->data); + // free the key data + cxFree(map->allocator, (void *) elem->key.data); + // free the node + cxFree(map->allocator, elem); + // proceed + elem = next; + } while (elem != NULL); + + // do not leave a dangling pointer + hash_map->buckets[i] = NULL; + } + } + map->size = 0; +} + +static void cx_hash_map_destructor(struct cx_map_s *map) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + + // free the buckets + cx_hash_map_clear(map); + cxFree(map->allocator, hash_map->buckets); + + // free the map structure + cxFree(map->allocator, map); +} + +static int cx_hash_map_put( + CxMap *map, + CxHashKey key, + void *value +) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + CxAllocator const *allocator = map->allocator; + + unsigned hash = key.hash; + if (hash == 0) { + cx_hash_murmur(&key); + hash = key.hash; + } + + size_t slot = hash % hash_map->bucket_count; + struct cx_hash_map_element_s *elm = hash_map->buckets[slot]; + struct cx_hash_map_element_s *prev = NULL; + + while (elm != NULL && elm->key.hash < hash) { + prev = elm; + elm = elm->next; + } + + if (elm != NULL && elm->key.hash == hash && elm->key.len == key.len && + memcmp(elm->key.data, key.data, key.len) == 0) { + // overwrite existing element + if (map->store_pointer) { + memcpy(elm->data, &value, sizeof(void *)); + } else { + memcpy(elm->data, value, map->item_size); + } + } else { + // allocate new element + struct cx_hash_map_element_s *e = cxMalloc( + allocator, + sizeof(struct cx_hash_map_element_s) + map->item_size + ); + if (e == NULL) { + return -1; + } + + // write the value + if (map->store_pointer) { + memcpy(e->data, &value, sizeof(void *)); + } else { + memcpy(e->data, value, map->item_size); + } + + // copy the key + void *kd = cxMalloc(allocator, key.len); + if (kd == NULL) { + return -1; + } + memcpy(kd, key.data, key.len); + e->key.data = kd; + e->key.len = key.len; + e->key.hash = hash; + + // insert the element into the linked list + if (prev == NULL) { + hash_map->buckets[slot] = e; + } else { + prev->next = e; + } + e->next = elm; + + // increase the size + map->size++; + } + + return 0; +} + +static void cx_hash_map_unlink( + struct cx_hash_map_s *hash_map, + size_t slot, + struct cx_hash_map_element_s *prev, + struct cx_hash_map_element_s *elm +) { + // unlink + if (prev == NULL) { + hash_map->buckets[slot] = elm->next; + } else { + prev->next = elm->next; + } + // free element + cxFree(hash_map->base.allocator, (void *) elm->key.data); + cxFree(hash_map->base.allocator, elm); + // decrease size + hash_map->base.size--; +} + +/** + * Helper function to avoid code duplication. + * + * @param map the map + * @param key the key to look up + * @param remove flag indicating whether the looked up entry shall be removed + * @param destroy flag indicating whether the destructor shall be invoked + * @return a pointer to the value corresponding to the key or \c NULL + */ +static void *cx_hash_map_get_remove( + CxMap *map, + CxHashKey key, + bool remove, + bool destroy +) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + + unsigned hash = key.hash; + if (hash == 0) { + cx_hash_murmur(&key); + hash = key.hash; + } + + size_t slot = hash % hash_map->bucket_count; + struct cx_hash_map_element_s *elm = hash_map->buckets[slot]; + struct cx_hash_map_element_s *prev = NULL; + while (elm && elm->key.hash <= hash) { + if (elm->key.hash == hash && elm->key.len == key.len) { + if (memcmp(elm->key.data, key.data, key.len) == 0) { + void *data = NULL; + if (destroy) { + cx_invoke_destructor(map, elm->data); + } else { + if (map->store_pointer) { + data = *(void **) elm->data; + } else { + data = elm->data; + } + } + if (remove) { + cx_hash_map_unlink(hash_map, slot, prev, elm); + } + return data; + } + } + prev = elm; + elm = prev->next; + } + + return NULL; +} + +static void *cx_hash_map_get( + CxMap const *map, + CxHashKey key +) { + // we can safely cast, because we know the map stays untouched + return cx_hash_map_get_remove((CxMap *) map, key, false, false); +} + +static void *cx_hash_map_remove( + CxMap *map, + CxHashKey key, + bool destroy +) { + return cx_hash_map_get_remove(map, key, true, destroy); +} + +static void *cx_hash_map_iter_current_entry(void const *it) { + struct cx_iterator_s const *iter = it; + // struct has to have a compatible signature + return (struct cx_map_entry_s *) &(iter->kv_data); +} + +static void *cx_hash_map_iter_current_key(void const *it) { + struct cx_iterator_s const *iter = it; + struct cx_hash_map_element_s *elm = iter->elem_handle; + return &elm->key; +} + +static void *cx_hash_map_iter_current_value(void const *it) { + struct cx_iterator_s const *iter = it; + struct cx_hash_map_s const *map = iter->src_handle; + struct cx_hash_map_element_s *elm = iter->elem_handle; + if (map->base.store_pointer) { + return *(void **) elm->data; + } else { + return elm->data; + } +} + +static bool cx_hash_map_iter_valid(void const *it) { + struct cx_iterator_s const *iter = it; + return iter->elem_handle != NULL; +} + +static void cx_hash_map_iter_next(void *it) { + struct cx_iterator_s *iter = it; + struct cx_hash_map_element_s *elm = iter->elem_handle; + + // remove current element, if asked + if (iter->base.remove) { + // obtain mutable pointer to the map + struct cx_mut_iterator_s *miter = it; + struct cx_hash_map_s *map = miter->src_handle; + + // clear the flag + iter->base.remove = false; + + // determine the next element + struct cx_hash_map_element_s *next = elm->next; + + // search the previous element + struct cx_hash_map_element_s *prev = NULL; + if (map->buckets[iter->slot] != elm) { + prev = map->buckets[iter->slot]; + while (prev->next != elm) { + prev = prev->next; + } + } + + // destroy + cx_invoke_destructor((struct cx_map_s *) map, elm->data); + + // unlink + cx_hash_map_unlink(map, iter->slot, prev, elm); + + // advance + elm = next; + } else { + // just advance + elm = elm->next; + iter->index++; + } + + // search the next bucket, if required + struct cx_hash_map_s const *map = iter->src_handle; + while (elm == NULL && ++iter->slot < map->bucket_count) { + elm = map->buckets[iter->slot]; + } + + // fill the struct with the next element + iter->elem_handle = elm; + if (elm == NULL) { + iter->kv_data.key = NULL; + iter->kv_data.value = NULL; + } else { + iter->kv_data.key = &elm->key; + if (map->base.store_pointer) { + iter->kv_data.value = *(void **) elm->data; + } else { + iter->kv_data.value = elm->data; + } + } +} + +static bool cx_hash_map_iter_flag_rm(void *it) { + struct cx_iterator_base_s *iter = it; + if (iter->mutating) { + iter->remove = true; + return true; + } else { + return false; + } +} + +static CxIterator cx_hash_map_iterator( + CxMap const *map, + enum cx_map_iterator_type type +) { + CxIterator iter; + + iter.src_handle = map; + iter.base.valid = cx_hash_map_iter_valid; + iter.base.next = cx_hash_map_iter_next; + + switch (type) { + case CX_MAP_ITERATOR_PAIRS: + iter.base.current = cx_hash_map_iter_current_entry; + break; + case CX_MAP_ITERATOR_KEYS: + iter.base.current = cx_hash_map_iter_current_key; + break; + case CX_MAP_ITERATOR_VALUES: + iter.base.current = cx_hash_map_iter_current_value; + break; + default: + assert(false); + } + + iter.base.flag_removal = cx_hash_map_iter_flag_rm; + iter.base.remove = false; + iter.base.mutating = false; + + iter.slot = 0; + iter.index = 0; + + if (map->size > 0) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + struct cx_hash_map_element_s *elm = hash_map->buckets[0]; + while (elm == NULL) { + elm = hash_map->buckets[++iter.slot]; + } + iter.elem_handle = elm; + iter.kv_data.key = &elm->key; + if (map->store_pointer) { + iter.kv_data.value = *(void **) elm->data; + } else { + iter.kv_data.value = elm->data; + } + } else { + iter.elem_handle = NULL; + iter.kv_data.key = NULL; + iter.kv_data.value = NULL; + } + + return iter; +} + +static cx_map_class cx_hash_map_class = { + cx_hash_map_destructor, + cx_hash_map_clear, + cx_hash_map_put, + cx_hash_map_get, + cx_hash_map_remove, + cx_hash_map_iterator, +}; + +CxMap *cxHashMapCreate( + CxAllocator const *allocator, + size_t itemsize, + size_t buckets +) { + if (buckets == 0) { + // implementation defined default + buckets = 16; + } + + struct cx_hash_map_s *map = cxCalloc(allocator, 1, + sizeof(struct cx_hash_map_s)); + if (map == NULL) return NULL; + + // initialize hash map members + map->bucket_count = buckets; + map->buckets = cxCalloc(allocator, buckets, + sizeof(struct cx_hash_map_element_s *)); + if (map->buckets == NULL) { + cxFree(allocator, map); + return NULL; + } + + // initialize base members + map->base.cl = &cx_hash_map_class; + map->base.allocator = allocator; + + if (itemsize > 0) { + map->base.store_pointer = false; + map->base.item_size = itemsize; + } else { + map->base.store_pointer = true; + map->base.item_size = sizeof(void *); + } + + return (CxMap *) map; +} + +int cxMapRehash(CxMap *map) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + if (map->size > ((hash_map->bucket_count * 3) >> 2)) { + + size_t new_bucket_count = (map->size * 5) >> 1; + struct cx_hash_map_element_s **new_buckets = cxCalloc( + map->allocator, + new_bucket_count, sizeof(struct cx_hash_map_element_s *) + ); + + if (new_buckets == NULL) { + return 1; + } + + // iterate through the elements and assign them to their new slots + cx_for_n(slot, hash_map->bucket_count) { + struct cx_hash_map_element_s *elm = hash_map->buckets[slot]; + while (elm != NULL) { + struct cx_hash_map_element_s *next = elm->next; + size_t new_slot = elm->key.hash % new_bucket_count; + + // find position where to insert + struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot]; + struct cx_hash_map_element_s *bucket_prev = NULL; + while (bucket_next != NULL && + bucket_next->key.hash < elm->key.hash) { + bucket_prev = bucket_next; + bucket_next = bucket_next->next; + } + + // insert + if (bucket_prev == NULL) { + elm->next = new_buckets[new_slot]; + new_buckets[new_slot] = elm; + } else { + bucket_prev->next = elm; + elm->next = bucket_next; + } + + // advance + elm = next; + } + } + + // assign result to the map + hash_map->bucket_count = new_bucket_count; + cxFree(map->allocator, hash_map->buckets); + hash_map->buckets = new_buckets; + } + return 0; +}