ucx/hash_map.c

Sun, 17 Dec 2023 15:33:50 +0100

author
Mike Becker <universe@uap-core.de>
date
Sun, 17 Dec 2023 15:33:50 +0100
changeset 800
30d484806c2b
parent 750
4d7a2238c5ac
child 816
839fefbdedc7
permissions
-rw-r--r--

fix faulty string to int conversion utilities

Probably it was expected that errno is set to EINVAL when illegal characters are encountered. But this is not standard and does not happen on every system, allowing illegal strings to be parsed as valid integers.

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "cx/hash_map.h"
#include "cx/utils.h"

#include <string.h>
#include <assert.h>

struct cx_hash_map_element_s {
    /** A pointer to the next element in the current bucket. */
    struct cx_hash_map_element_s *next;

    /** The corresponding key. */
    CxHashKey key;

    /** The value data. */
    char data[];
};

static void cx_hash_map_clear(struct cx_map_s *map) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    cx_for_n(i, hash_map->bucket_count) {
        struct cx_hash_map_element_s *elem = hash_map->buckets[i];
        if (elem != NULL) {
            do {
                struct cx_hash_map_element_s *next = elem->next;
                // invoke the destructor
                cx_invoke_destructor(map, elem->data);
                // free the key data
                cxFree(map->allocator, (void *) elem->key.data);
                // free the node
                cxFree(map->allocator, elem);
                // proceed
                elem = next;
            } while (elem != NULL);

            // do not leave a dangling pointer
            hash_map->buckets[i] = NULL;
        }
    }
    map->size = 0;
}

static void cx_hash_map_destructor(struct cx_map_s *map) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;

    // free the buckets
    cx_hash_map_clear(map);
    cxFree(map->allocator, hash_map->buckets);

    // free the map structure
    cxFree(map->allocator, map);
}

static int cx_hash_map_put(
        CxMap *map,
        CxHashKey key,
        void *value
) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    CxAllocator const *allocator = map->allocator;

    unsigned hash = key.hash;
    if (hash == 0) {
        cx_hash_murmur(&key);
        hash = key.hash;
    }

    size_t slot = hash % hash_map->bucket_count;
    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
    struct cx_hash_map_element_s *prev = NULL;

    while (elm != NULL && elm->key.hash < hash) {
        prev = elm;
        elm = elm->next;
    }

    if (elm != NULL && elm->key.hash == hash && elm->key.len == key.len &&
        memcmp(elm->key.data, key.data, key.len) == 0) {
        // overwrite existing element
        if (map->store_pointer) {
            memcpy(elm->data, &value, sizeof(void *));
        } else {
            memcpy(elm->data, value, map->item_size);
        }
    } else {
        // allocate new element
        struct cx_hash_map_element_s *e = cxMalloc(
                allocator,
                sizeof(struct cx_hash_map_element_s) + map->item_size
        );
        if (e == NULL) {
            return -1;
        }

        // write the value
        if (map->store_pointer) {
            memcpy(e->data, &value, sizeof(void *));
        } else {
            memcpy(e->data, value, map->item_size);
        }

        // copy the key
        void *kd = cxMalloc(allocator, key.len);
        if (kd == NULL) {
            return -1;
        }
        memcpy(kd, key.data, key.len);
        e->key.data = kd;
        e->key.len = key.len;
        e->key.hash = hash;

        // insert the element into the linked list
        if (prev == NULL) {
            hash_map->buckets[slot] = e;
        } else {
            prev->next = e;
        }
        e->next = elm;

        // increase the size
        map->size++;
    }

    return 0;
}

static void cx_hash_map_unlink(
        struct cx_hash_map_s *hash_map,
        size_t slot,
        struct cx_hash_map_element_s *prev,
        struct cx_hash_map_element_s *elm
) {
    // unlink
    if (prev == NULL) {
        hash_map->buckets[slot] = elm->next;
    } else {
        prev->next = elm->next;
    }
    // free element
    cxFree(hash_map->base.allocator, (void *) elm->key.data);
    cxFree(hash_map->base.allocator, elm);
    // decrease size
    hash_map->base.size--;
}

/**
 * Helper function to avoid code duplication.
 *
 * @param map the map
 * @param key the key to look up
 * @param remove flag indicating whether the looked up entry shall be removed
 * @param destroy flag indicating whether the destructor shall be invoked
 * @return a pointer to the value corresponding to the key or \c NULL
 */
static void *cx_hash_map_get_remove(
        CxMap *map,
        CxHashKey key,
        bool remove,
        bool destroy
) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;

    unsigned hash = key.hash;
    if (hash == 0) {
        cx_hash_murmur(&key);
        hash = key.hash;
    }

    size_t slot = hash % hash_map->bucket_count;
    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
    struct cx_hash_map_element_s *prev = NULL;
    while (elm && elm->key.hash <= hash) {
        if (elm->key.hash == hash && elm->key.len == key.len) {
            if (memcmp(elm->key.data, key.data, key.len) == 0) {
                void *data = NULL;
                if (destroy) {
                    cx_invoke_destructor(map, elm->data);
                } else {
                    if (map->store_pointer) {
                        data = *(void **) elm->data;
                    } else {
                        data = elm->data;
                    }
                }
                if (remove) {
                    cx_hash_map_unlink(hash_map, slot, prev, elm);
                }
                return data;
            }
        }
        prev = elm;
        elm = prev->next;
    }

    return NULL;
}

static void *cx_hash_map_get(
        CxMap const *map,
        CxHashKey key
) {
    // we can safely cast, because we know the map stays untouched
    return cx_hash_map_get_remove((CxMap *) map, key, false, false);
}

static void *cx_hash_map_remove(
        CxMap *map,
        CxHashKey key,
        bool destroy
) {
    return cx_hash_map_get_remove(map, key, true, destroy);
}

static void *cx_hash_map_iter_current_entry(void const *it) {
    struct cx_iterator_s const *iter = it;
    // struct has to have a compatible signature
    return (struct cx_map_entry_s *) &(iter->kv_data);
}

static void *cx_hash_map_iter_current_key(void const *it) {
    struct cx_iterator_s const *iter = it;
    struct cx_hash_map_element_s *elm = iter->elem_handle;
    return &elm->key;
}

static void *cx_hash_map_iter_current_value(void const *it) {
    struct cx_iterator_s const *iter = it;
    struct cx_hash_map_s const *map = iter->src_handle;
    struct cx_hash_map_element_s *elm = iter->elem_handle;
    if (map->base.store_pointer) {
        return *(void **) elm->data;
    } else {
        return elm->data;
    }
}

static bool cx_hash_map_iter_valid(void const *it) {
    struct cx_iterator_s const *iter = it;
    return iter->elem_handle != NULL;
}

static void cx_hash_map_iter_next(void *it) {
    struct cx_iterator_s *iter = it;
    struct cx_hash_map_element_s *elm = iter->elem_handle;

    // remove current element, if asked
    if (iter->base.remove) {
        // obtain mutable pointer to the map
        struct cx_mut_iterator_s *miter = it;
        struct cx_hash_map_s *map = miter->src_handle;

        // clear the flag
        iter->base.remove = false;

        // determine the next element
        struct cx_hash_map_element_s *next = elm->next;

        // search the previous element
        struct cx_hash_map_element_s *prev = NULL;
        if (map->buckets[iter->slot] != elm) {
            prev = map->buckets[iter->slot];
            while (prev->next != elm) {
                prev = prev->next;
            }
        }

        // destroy
        cx_invoke_destructor((struct cx_map_s *) map, elm->data);

        // unlink
        cx_hash_map_unlink(map, iter->slot, prev, elm);

        // advance
        elm = next;
    } else {
        // just advance
        elm = elm->next;
        iter->index++;
    }

    // search the next bucket, if required
    struct cx_hash_map_s const *map = iter->src_handle;
    while (elm == NULL && ++iter->slot < map->bucket_count) {
        elm = map->buckets[iter->slot];
    }

    // fill the struct with the next element
    iter->elem_handle = elm;
    if (elm == NULL) {
        iter->kv_data.key = NULL;
        iter->kv_data.value = NULL;
    } else {
        iter->kv_data.key = &elm->key;
        if (map->base.store_pointer) {
            iter->kv_data.value = *(void **) elm->data;
        } else {
            iter->kv_data.value = elm->data;
        }
    }
}

static bool cx_hash_map_iter_flag_rm(void *it) {
    struct cx_iterator_base_s *iter = it;
    if (iter->mutating) {
        iter->remove = true;
        return true;
    } else {
        return false;
    }
}

static CxIterator cx_hash_map_iterator(
        CxMap const *map,
        enum cx_map_iterator_type type
) {
    CxIterator iter;

    iter.src_handle = map;
    iter.base.valid = cx_hash_map_iter_valid;
    iter.base.next = cx_hash_map_iter_next;

    switch (type) {
        case CX_MAP_ITERATOR_PAIRS:
            iter.base.current = cx_hash_map_iter_current_entry;
            break;
        case CX_MAP_ITERATOR_KEYS:
            iter.base.current = cx_hash_map_iter_current_key;
            break;
        case CX_MAP_ITERATOR_VALUES:
            iter.base.current = cx_hash_map_iter_current_value;
            break;
        default:
            assert(false);
    }

    iter.base.flag_removal = cx_hash_map_iter_flag_rm;
    iter.base.remove = false;
    iter.base.mutating = false;

    iter.slot = 0;
    iter.index = 0;

    if (map->size > 0) {
        struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
        struct cx_hash_map_element_s *elm = hash_map->buckets[0];
        while (elm == NULL) {
            elm = hash_map->buckets[++iter.slot];
        }
        iter.elem_handle = elm;
        iter.kv_data.key = &elm->key;
        if (map->store_pointer) {
            iter.kv_data.value = *(void **) elm->data;
        } else {
            iter.kv_data.value = elm->data;
        }
    } else {
        iter.elem_handle = NULL;
        iter.kv_data.key = NULL;
        iter.kv_data.value = NULL;
    }

    return iter;
}

static cx_map_class cx_hash_map_class = {
        cx_hash_map_destructor,
        cx_hash_map_clear,
        cx_hash_map_put,
        cx_hash_map_get,
        cx_hash_map_remove,
        cx_hash_map_iterator,
};

CxMap *cxHashMapCreate(
        CxAllocator const *allocator,
        size_t itemsize,
        size_t buckets
) {
    if (buckets == 0) {
        // implementation defined default
        buckets = 16;
    }

    struct cx_hash_map_s *map = cxCalloc(allocator, 1,
                                         sizeof(struct cx_hash_map_s));
    if (map == NULL) return NULL;

    // initialize hash map members
    map->bucket_count = buckets;
    map->buckets = cxCalloc(allocator, buckets,
                            sizeof(struct cx_hash_map_element_s *));
    if (map->buckets == NULL) {
        cxFree(allocator, map);
        return NULL;
    }

    // initialize base members
    map->base.cl = &cx_hash_map_class;
    map->base.allocator = allocator;

    if (itemsize > 0) {
        map->base.store_pointer = false;
        map->base.item_size = itemsize;
    } else {
        map->base.store_pointer = true;
        map->base.item_size = sizeof(void *);
    }

    return (CxMap *) map;
}

int cxMapRehash(CxMap *map) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    if (map->size > ((hash_map->bucket_count * 3) >> 2)) {

        size_t new_bucket_count = (map->size * 5) >> 1;
        struct cx_hash_map_element_s **new_buckets = cxCalloc(
                map->allocator,
                new_bucket_count, sizeof(struct cx_hash_map_element_s *)
        );

        if (new_buckets == NULL) {
            return 1;
        }

        // iterate through the elements and assign them to their new slots
        cx_for_n(slot, hash_map->bucket_count) {
            struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
            while (elm != NULL) {
                struct cx_hash_map_element_s *next = elm->next;
                size_t new_slot = elm->key.hash % new_bucket_count;

                // find position where to insert
                struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot];
                struct cx_hash_map_element_s *bucket_prev = NULL;
                while (bucket_next != NULL &&
                       bucket_next->key.hash < elm->key.hash) {
                    bucket_prev = bucket_next;
                    bucket_next = bucket_next->next;
                }

                // insert
                if (bucket_prev == NULL) {
                    elm->next = new_buckets[new_slot];
                    new_buckets[new_slot] = elm;
                } else {
                    bucket_prev->next = elm;
                    elm->next = bucket_next;
                }

                // advance
                elm = next;
            }
        }

        // assign result to the map
        hash_map->bucket_count = new_bucket_count;
        cxFree(map->allocator, hash_map->buckets);
        hash_map->buckets = new_buckets;
    }
    return 0;
}

mercurial