--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ucx/hash_map.c Sun Nov 06 15:53:32 2022 +0100 @@ -0,0 +1,402 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <string.h> +#include "cx/hash_map.h" +#include "cx/utils.h" + +static void cx_hash_map_clear(struct cx_map_s *map) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + cx_for_n(i, hash_map->bucket_count) { + struct cx_hash_map_element_s *elem = hash_map->buckets[i]; + if (elem != NULL) { + do { + struct cx_hash_map_element_s *next = elem->next; + // free the key data + cxFree(map->allocator, elem->key.data.obj); + // free the node + cxFree(map->allocator, elem); + // proceed + elem = next; + } while (elem != NULL); + + // do not leave a dangling pointer + hash_map->buckets[i] = NULL; + } + } + map->size = 0; +} + +static void cx_hash_map_destructor(struct cx_map_s *map) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + + // free the buckets + cx_hash_map_clear(map); + cxFree(map->allocator, hash_map->buckets); + + // free the map structure + cxFree(map->allocator, map); +} + +static int cx_hash_map_put( + CxMap *map, + CxHashKey key, + void *value +) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + CxAllocator *allocator = map->allocator; + + unsigned hash = key.hash; + if (hash == 0) { + cx_hash_murmur(&key); + hash = key.hash; + } + + size_t slot = hash % hash_map->bucket_count; + struct cx_hash_map_element_s *elm = hash_map->buckets[slot]; + struct cx_hash_map_element_s *prev = NULL; + + while (elm != NULL && elm->key.hash < hash) { + prev = elm; + elm = elm->next; + } + + if (elm != NULL && elm->key.hash == hash && elm->key.len == key.len && + memcmp(elm->key.data.obj, key.data.obj, key.len) == 0) { + // overwrite existing element + elm->data = value; + } else { + // allocate new element + struct cx_hash_map_element_s *e = cxMalloc(allocator, sizeof(struct cx_hash_map_element_s)); + if (e == NULL) { + return -1; + } + + // write the value + // TODO: depending on future map features, we may want to copy here + e->data = value; + + // copy the key + void *kd = cxMalloc(allocator, key.len); + if (kd == NULL) { + return -1; + } + memcpy(kd, key.data.obj, key.len); + e->key.data.obj = kd; + e->key.len = key.len; + e->key.hash = hash; + + // insert the element into the linked list + if (prev == NULL) { + hash_map->buckets[slot] = e; + } else { + prev->next = e; + } + e->next = elm; + + // increase the size + map->size++; + } + + return 0; +} + +static void cx_hash_map_unlink( + struct cx_hash_map_s *hash_map, + size_t slot, + struct cx_hash_map_element_s *prev, + struct cx_hash_map_element_s *elm +) { + // unlink + if (prev == NULL) { + hash_map->buckets[slot] = elm->next; + } else { + prev->next = elm->next; + } + // free element + cxFree(hash_map->base.allocator, elm->key.data.obj); + cxFree(hash_map->base.allocator, elm); + // decrease size + hash_map->base.size--; +} + +/** + * Helper function to avoid code duplication. + * + * @param map the map + * @param key the key to look up + * @param remove flag indicating whether the looked up entry shall be removed + * @return the value corresponding to the key or \c NULL + */ +static void *cx_hash_map_get_remove( + CxMap *map, + CxHashKey key, + bool remove +) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + + unsigned hash = key.hash; + if (hash == 0) { + cx_hash_murmur(&key); + hash = key.hash; + } + + size_t slot = hash % hash_map->bucket_count; + struct cx_hash_map_element_s *elm = hash_map->buckets[slot]; + struct cx_hash_map_element_s *prev = NULL; + while (elm && elm->key.hash <= hash) { + if (elm->key.hash == hash && elm->key.len == key.len) { + if (memcmp(elm->key.data.obj, key.data.obj, key.len) == 0) { + void *data = elm->data; + if (remove) { + cx_hash_map_unlink(hash_map, slot, prev, elm); + } + return data; + } + } + prev = elm; + elm = prev->next; + } + + return NULL; +} + +static void *cx_hash_map_get( + CxMap const *map, + CxHashKey key +) { + // we can safely cast, because we know when remove=false, the map stays untouched + return cx_hash_map_get_remove((CxMap *) map, key, false); +} + +static void *cx_hash_map_remove( + CxMap *map, + CxHashKey key +) { + return cx_hash_map_get_remove(map, key, true); +} + +static void *cx_hash_map_iter_current_entry(CxIterator const *iter) { + // struct has to have a compatible signature + return (struct cx_map_entry_s *) &(iter->kv_data); +} + +static void *cx_hash_map_iter_current_key(CxIterator const *iter) { + struct cx_hash_map_element_s *elm = iter->elem_handle; + return &elm->key; +} + +static void *cx_hash_map_iter_current_value(CxIterator const *iter) { + struct cx_hash_map_element_s *elm = iter->elem_handle; + // TODO: return a pointer to data if this map is storing copies + return elm->data; +} + +static bool cx_hash_map_iter_valid(CxIterator const *iter) { + return iter->elem_handle != NULL; +} + +static void cx_hash_map_iter_next(CxIterator *iter) { + struct cx_hash_map_s *map = iter->src_handle; + struct cx_hash_map_element_s *elm = iter->elem_handle; + + // remove current element, if asked + if (iter->remove) { + // clear the flag + iter->remove = false; + + // determine the next element + struct cx_hash_map_element_s *next = elm->next; + + // search the previous element + struct cx_hash_map_element_s *prev = NULL; + if (map->buckets[iter->slot] != elm) { + prev = map->buckets[iter->slot]; + while (prev->next != elm) { + prev = prev->next; + } + } + + // unlink + cx_hash_map_unlink(map, iter->slot, prev, elm); + + // advance + elm = next; + } else { + // just advance + elm = elm->next; + iter->index++; + } + + // search the next bucket, if required + while (elm == NULL && ++iter->slot < map->bucket_count) { + elm = map->buckets[iter->slot]; + } + + // fill the struct with the next element + iter->elem_handle = elm; + if (elm == NULL) { + iter->kv_data.key = NULL; + iter->kv_data.value = NULL; + } else { + iter->kv_data.key = &elm->key; + // TODO: pointer to data if this map is storing copies + iter->kv_data.value = elm->data; + } +} + +static CxIterator cx_hash_map_iterator(CxMap *map) { + CxIterator iter; + + iter.src_handle = map; + iter.valid = cx_hash_map_iter_valid; + iter.next = cx_hash_map_iter_next; + iter.current = cx_hash_map_iter_current_entry; + + iter.slot = 0; + iter.index = 0; + iter.remove = false; + + if (map->size > 0) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + struct cx_hash_map_element_s *elm = hash_map->buckets[0]; + for (; elm == NULL; iter.slot++) { + elm = hash_map->buckets[iter.slot]; + } + iter.elem_handle = elm; + iter.kv_data.key = &elm->key; + // TODO: pointer to data if this map is storing copies + iter.kv_data.value = elm->data; + } else { + iter.elem_handle = NULL; + iter.kv_data.key = NULL; + iter.kv_data.value = NULL; + } + + return iter; +} + +static CxIterator cx_hash_map_iterator_keys(CxMap *map) { + CxIterator iter = cx_hash_map_iterator(map); + iter.current = cx_hash_map_iter_current_key; + return iter; +} + +static CxIterator cx_hash_map_iterator_values(CxMap *map) { + CxIterator iter = cx_hash_map_iterator(map); + iter.current = cx_hash_map_iter_current_value; + return iter; +} + +static cx_map_class cx_hash_map_class = { + cx_hash_map_destructor, + cx_hash_map_clear, + cx_hash_map_put, + cx_hash_map_get, + cx_hash_map_remove, + cx_hash_map_iterator, + cx_hash_map_iterator_keys, + cx_hash_map_iterator_values, +}; + +CxMap *cxHashMapCreate( + CxAllocator *allocator, + size_t buckets +) { + if (buckets == 0) { + // implementation defined default + buckets = 16; + } + + struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s)); + if (map == NULL) return NULL; + + // initialize hash map members + map->bucket_count = buckets; + map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *)); + if (map->buckets == NULL) { + cxFree(allocator, map); + return NULL; + } + + // initialize base members + map->base.cl = &cx_hash_map_class; + map->base.allocator = allocator; + map->base.size = 0; + + return (CxMap *) map; +} + +int cxMapRehash(CxMap *map) { + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; + if (map->size > ((hash_map->bucket_count * 3) >> 2)) { + + size_t new_bucket_count = (map->size * 5) >> 1; + struct cx_hash_map_element_s **new_buckets = cxCalloc(map->allocator, + new_bucket_count, sizeof(struct cx_hash_map_element_s *)); + + if (new_buckets == NULL) { + return 1; + } + + // iterate through the elements and assign them to their new slots + cx_for_n(slot, hash_map->bucket_count) { + struct cx_hash_map_element_s *elm = hash_map->buckets[slot]; + while (elm != NULL) { + struct cx_hash_map_element_s *next = elm->next; + size_t new_slot = elm->key.hash % new_bucket_count; + + // find position where to insert + struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot]; + struct cx_hash_map_element_s *bucket_prev = NULL; + while (bucket_next != NULL && bucket_next->key.hash < elm->key.hash) { + bucket_prev = bucket_next; + bucket_next = bucket_next->next; + } + + // insert + if (bucket_prev == NULL) { + elm->next = new_buckets[new_slot]; + new_buckets[new_slot] = elm; + } else { + bucket_prev->next = elm; + elm->next = bucket_next; + } + + // advance + elm = next; + } + } + + // assign result to the map + hash_map->bucket_count = new_bucket_count; + cxFree(map->allocator, hash_map->buckets); + hash_map->buckets = new_buckets; + } + return 0; +}