src/ucx/hash_map.c

changeset 415
d938228c382e
child 438
22eca559aded
equal deleted inserted replaced
414:99a34860c105 415:d938228c382e
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <string.h>
30 #include "cx/hash_map.h"
31 #include "cx/utils.h"
32
33 static void cx_hash_map_clear(struct cx_map_s *map) {
34 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
35 cx_for_n(i, hash_map->bucket_count) {
36 struct cx_hash_map_element_s *elem = hash_map->buckets[i];
37 if (elem != NULL) {
38 do {
39 struct cx_hash_map_element_s *next = elem->next;
40 // free the key data
41 cxFree(map->allocator, elem->key.data.obj);
42 // free the node
43 cxFree(map->allocator, elem);
44 // proceed
45 elem = next;
46 } while (elem != NULL);
47
48 // do not leave a dangling pointer
49 hash_map->buckets[i] = NULL;
50 }
51 }
52 map->size = 0;
53 }
54
55 static void cx_hash_map_destructor(struct cx_map_s *map) {
56 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
57
58 // free the buckets
59 cx_hash_map_clear(map);
60 cxFree(map->allocator, hash_map->buckets);
61
62 // free the map structure
63 cxFree(map->allocator, map);
64 }
65
66 static int cx_hash_map_put(
67 CxMap *map,
68 CxHashKey key,
69 void *value
70 ) {
71 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
72 CxAllocator *allocator = map->allocator;
73
74 unsigned hash = key.hash;
75 if (hash == 0) {
76 cx_hash_murmur(&key);
77 hash = key.hash;
78 }
79
80 size_t slot = hash % hash_map->bucket_count;
81 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
82 struct cx_hash_map_element_s *prev = NULL;
83
84 while (elm != NULL && elm->key.hash < hash) {
85 prev = elm;
86 elm = elm->next;
87 }
88
89 if (elm != NULL && elm->key.hash == hash && elm->key.len == key.len &&
90 memcmp(elm->key.data.obj, key.data.obj, key.len) == 0) {
91 // overwrite existing element
92 elm->data = value;
93 } else {
94 // allocate new element
95 struct cx_hash_map_element_s *e = cxMalloc(allocator, sizeof(struct cx_hash_map_element_s));
96 if (e == NULL) {
97 return -1;
98 }
99
100 // write the value
101 // TODO: depending on future map features, we may want to copy here
102 e->data = value;
103
104 // copy the key
105 void *kd = cxMalloc(allocator, key.len);
106 if (kd == NULL) {
107 return -1;
108 }
109 memcpy(kd, key.data.obj, key.len);
110 e->key.data.obj = kd;
111 e->key.len = key.len;
112 e->key.hash = hash;
113
114 // insert the element into the linked list
115 if (prev == NULL) {
116 hash_map->buckets[slot] = e;
117 } else {
118 prev->next = e;
119 }
120 e->next = elm;
121
122 // increase the size
123 map->size++;
124 }
125
126 return 0;
127 }
128
129 static void cx_hash_map_unlink(
130 struct cx_hash_map_s *hash_map,
131 size_t slot,
132 struct cx_hash_map_element_s *prev,
133 struct cx_hash_map_element_s *elm
134 ) {
135 // unlink
136 if (prev == NULL) {
137 hash_map->buckets[slot] = elm->next;
138 } else {
139 prev->next = elm->next;
140 }
141 // free element
142 cxFree(hash_map->base.allocator, elm->key.data.obj);
143 cxFree(hash_map->base.allocator, elm);
144 // decrease size
145 hash_map->base.size--;
146 }
147
148 /**
149 * Helper function to avoid code duplication.
150 *
151 * @param map the map
152 * @param key the key to look up
153 * @param remove flag indicating whether the looked up entry shall be removed
154 * @return the value corresponding to the key or \c NULL
155 */
156 static void *cx_hash_map_get_remove(
157 CxMap *map,
158 CxHashKey key,
159 bool remove
160 ) {
161 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
162
163 unsigned hash = key.hash;
164 if (hash == 0) {
165 cx_hash_murmur(&key);
166 hash = key.hash;
167 }
168
169 size_t slot = hash % hash_map->bucket_count;
170 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
171 struct cx_hash_map_element_s *prev = NULL;
172 while (elm && elm->key.hash <= hash) {
173 if (elm->key.hash == hash && elm->key.len == key.len) {
174 if (memcmp(elm->key.data.obj, key.data.obj, key.len) == 0) {
175 void *data = elm->data;
176 if (remove) {
177 cx_hash_map_unlink(hash_map, slot, prev, elm);
178 }
179 return data;
180 }
181 }
182 prev = elm;
183 elm = prev->next;
184 }
185
186 return NULL;
187 }
188
189 static void *cx_hash_map_get(
190 CxMap const *map,
191 CxHashKey key
192 ) {
193 // we can safely cast, because we know when remove=false, the map stays untouched
194 return cx_hash_map_get_remove((CxMap *) map, key, false);
195 }
196
197 static void *cx_hash_map_remove(
198 CxMap *map,
199 CxHashKey key
200 ) {
201 return cx_hash_map_get_remove(map, key, true);
202 }
203
204 static void *cx_hash_map_iter_current_entry(CxIterator const *iter) {
205 // struct has to have a compatible signature
206 return (struct cx_map_entry_s *) &(iter->kv_data);
207 }
208
209 static void *cx_hash_map_iter_current_key(CxIterator const *iter) {
210 struct cx_hash_map_element_s *elm = iter->elem_handle;
211 return &elm->key;
212 }
213
214 static void *cx_hash_map_iter_current_value(CxIterator const *iter) {
215 struct cx_hash_map_element_s *elm = iter->elem_handle;
216 // TODO: return a pointer to data if this map is storing copies
217 return elm->data;
218 }
219
220 static bool cx_hash_map_iter_valid(CxIterator const *iter) {
221 return iter->elem_handle != NULL;
222 }
223
224 static void cx_hash_map_iter_next(CxIterator *iter) {
225 struct cx_hash_map_s *map = iter->src_handle;
226 struct cx_hash_map_element_s *elm = iter->elem_handle;
227
228 // remove current element, if asked
229 if (iter->remove) {
230 // clear the flag
231 iter->remove = false;
232
233 // determine the next element
234 struct cx_hash_map_element_s *next = elm->next;
235
236 // search the previous element
237 struct cx_hash_map_element_s *prev = NULL;
238 if (map->buckets[iter->slot] != elm) {
239 prev = map->buckets[iter->slot];
240 while (prev->next != elm) {
241 prev = prev->next;
242 }
243 }
244
245 // unlink
246 cx_hash_map_unlink(map, iter->slot, prev, elm);
247
248 // advance
249 elm = next;
250 } else {
251 // just advance
252 elm = elm->next;
253 iter->index++;
254 }
255
256 // search the next bucket, if required
257 while (elm == NULL && ++iter->slot < map->bucket_count) {
258 elm = map->buckets[iter->slot];
259 }
260
261 // fill the struct with the next element
262 iter->elem_handle = elm;
263 if (elm == NULL) {
264 iter->kv_data.key = NULL;
265 iter->kv_data.value = NULL;
266 } else {
267 iter->kv_data.key = &elm->key;
268 // TODO: pointer to data if this map is storing copies
269 iter->kv_data.value = elm->data;
270 }
271 }
272
273 static CxIterator cx_hash_map_iterator(CxMap *map) {
274 CxIterator iter;
275
276 iter.src_handle = map;
277 iter.valid = cx_hash_map_iter_valid;
278 iter.next = cx_hash_map_iter_next;
279 iter.current = cx_hash_map_iter_current_entry;
280
281 iter.slot = 0;
282 iter.index = 0;
283 iter.remove = false;
284
285 if (map->size > 0) {
286 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
287 struct cx_hash_map_element_s *elm = hash_map->buckets[0];
288 for (; elm == NULL; iter.slot++) {
289 elm = hash_map->buckets[iter.slot];
290 }
291 iter.elem_handle = elm;
292 iter.kv_data.key = &elm->key;
293 // TODO: pointer to data if this map is storing copies
294 iter.kv_data.value = elm->data;
295 } else {
296 iter.elem_handle = NULL;
297 iter.kv_data.key = NULL;
298 iter.kv_data.value = NULL;
299 }
300
301 return iter;
302 }
303
304 static CxIterator cx_hash_map_iterator_keys(CxMap *map) {
305 CxIterator iter = cx_hash_map_iterator(map);
306 iter.current = cx_hash_map_iter_current_key;
307 return iter;
308 }
309
310 static CxIterator cx_hash_map_iterator_values(CxMap *map) {
311 CxIterator iter = cx_hash_map_iterator(map);
312 iter.current = cx_hash_map_iter_current_value;
313 return iter;
314 }
315
316 static cx_map_class cx_hash_map_class = {
317 cx_hash_map_destructor,
318 cx_hash_map_clear,
319 cx_hash_map_put,
320 cx_hash_map_get,
321 cx_hash_map_remove,
322 cx_hash_map_iterator,
323 cx_hash_map_iterator_keys,
324 cx_hash_map_iterator_values,
325 };
326
327 CxMap *cxHashMapCreate(
328 CxAllocator *allocator,
329 size_t buckets
330 ) {
331 if (buckets == 0) {
332 // implementation defined default
333 buckets = 16;
334 }
335
336 struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s));
337 if (map == NULL) return NULL;
338
339 // initialize hash map members
340 map->bucket_count = buckets;
341 map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *));
342 if (map->buckets == NULL) {
343 cxFree(allocator, map);
344 return NULL;
345 }
346
347 // initialize base members
348 map->base.cl = &cx_hash_map_class;
349 map->base.allocator = allocator;
350 map->base.size = 0;
351
352 return (CxMap *) map;
353 }
354
355 int cxMapRehash(CxMap *map) {
356 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
357 if (map->size > ((hash_map->bucket_count * 3) >> 2)) {
358
359 size_t new_bucket_count = (map->size * 5) >> 1;
360 struct cx_hash_map_element_s **new_buckets = cxCalloc(map->allocator,
361 new_bucket_count, sizeof(struct cx_hash_map_element_s *));
362
363 if (new_buckets == NULL) {
364 return 1;
365 }
366
367 // iterate through the elements and assign them to their new slots
368 cx_for_n(slot, hash_map->bucket_count) {
369 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
370 while (elm != NULL) {
371 struct cx_hash_map_element_s *next = elm->next;
372 size_t new_slot = elm->key.hash % new_bucket_count;
373
374 // find position where to insert
375 struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot];
376 struct cx_hash_map_element_s *bucket_prev = NULL;
377 while (bucket_next != NULL && bucket_next->key.hash < elm->key.hash) {
378 bucket_prev = bucket_next;
379 bucket_next = bucket_next->next;
380 }
381
382 // insert
383 if (bucket_prev == NULL) {
384 elm->next = new_buckets[new_slot];
385 new_buckets[new_slot] = elm;
386 } else {
387 bucket_prev->next = elm;
388 elm->next = bucket_next;
389 }
390
391 // advance
392 elm = next;
393 }
394 }
395
396 // assign result to the map
397 hash_map->bucket_count = new_bucket_count;
398 cxFree(map->allocator, hash_map->buckets);
399 hash_map->buckets = new_buckets;
400 }
401 return 0;
402 }

mercurial