ucx/hash_map.c

branch
newapi
changeset 174
0358f1d9c506
equal deleted inserted replaced
173:809581724cc7 174:0358f1d9c506
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "cx/hash_map.h"
30 #include "cx/utils.h"
31
32 #include <string.h>
33 #include <assert.h>
34
35 struct cx_hash_map_element_s {
36 /** A pointer to the next element in the current bucket. */
37 struct cx_hash_map_element_s *next;
38
39 /** The corresponding key. */
40 CxHashKey key;
41
42 /** The value data. */
43 char data[];
44 };
45
46 static void cx_hash_map_clear(struct cx_map_s *map) {
47 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
48 cx_for_n(i, hash_map->bucket_count) {
49 struct cx_hash_map_element_s *elem = hash_map->buckets[i];
50 if (elem != NULL) {
51 do {
52 struct cx_hash_map_element_s *next = elem->next;
53 // invoke the destructor
54 cx_invoke_destructor(map, elem->data);
55 // free the key data
56 cxFree(map->allocator, (void *) elem->key.data);
57 // free the node
58 cxFree(map->allocator, elem);
59 // proceed
60 elem = next;
61 } while (elem != NULL);
62
63 // do not leave a dangling pointer
64 hash_map->buckets[i] = NULL;
65 }
66 }
67 map->size = 0;
68 }
69
70 static void cx_hash_map_destructor(struct cx_map_s *map) {
71 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
72
73 // free the buckets
74 cx_hash_map_clear(map);
75 cxFree(map->allocator, hash_map->buckets);
76
77 // free the map structure
78 cxFree(map->allocator, map);
79 }
80
81 static int cx_hash_map_put(
82 CxMap *map,
83 CxHashKey key,
84 void *value
85 ) {
86 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
87 CxAllocator const *allocator = map->allocator;
88
89 unsigned hash = key.hash;
90 if (hash == 0) {
91 cx_hash_murmur(&key);
92 hash = key.hash;
93 }
94
95 size_t slot = hash % hash_map->bucket_count;
96 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
97 struct cx_hash_map_element_s *prev = NULL;
98
99 while (elm != NULL && elm->key.hash < hash) {
100 prev = elm;
101 elm = elm->next;
102 }
103
104 if (elm != NULL && elm->key.hash == hash && elm->key.len == key.len &&
105 memcmp(elm->key.data, key.data, key.len) == 0) {
106 // overwrite existing element
107 if (map->store_pointer) {
108 memcpy(elm->data, &value, sizeof(void *));
109 } else {
110 memcpy(elm->data, value, map->item_size);
111 }
112 } else {
113 // allocate new element
114 struct cx_hash_map_element_s *e = cxMalloc(
115 allocator,
116 sizeof(struct cx_hash_map_element_s) + map->item_size
117 );
118 if (e == NULL) {
119 return -1;
120 }
121
122 // write the value
123 if (map->store_pointer) {
124 memcpy(e->data, &value, sizeof(void *));
125 } else {
126 memcpy(e->data, value, map->item_size);
127 }
128
129 // copy the key
130 void *kd = cxMalloc(allocator, key.len);
131 if (kd == NULL) {
132 return -1;
133 }
134 memcpy(kd, key.data, key.len);
135 e->key.data = kd;
136 e->key.len = key.len;
137 e->key.hash = hash;
138
139 // insert the element into the linked list
140 if (prev == NULL) {
141 hash_map->buckets[slot] = e;
142 } else {
143 prev->next = e;
144 }
145 e->next = elm;
146
147 // increase the size
148 map->size++;
149 }
150
151 return 0;
152 }
153
154 static void cx_hash_map_unlink(
155 struct cx_hash_map_s *hash_map,
156 size_t slot,
157 struct cx_hash_map_element_s *prev,
158 struct cx_hash_map_element_s *elm
159 ) {
160 // unlink
161 if (prev == NULL) {
162 hash_map->buckets[slot] = elm->next;
163 } else {
164 prev->next = elm->next;
165 }
166 // free element
167 cxFree(hash_map->base.allocator, (void *) elm->key.data);
168 cxFree(hash_map->base.allocator, elm);
169 // decrease size
170 hash_map->base.size--;
171 }
172
173 /**
174 * Helper function to avoid code duplication.
175 *
176 * @param map the map
177 * @param key the key to look up
178 * @param remove flag indicating whether the looked up entry shall be removed
179 * @param destroy flag indicating whether the destructor shall be invoked
180 * @return a pointer to the value corresponding to the key or \c NULL
181 */
182 static void *cx_hash_map_get_remove(
183 CxMap *map,
184 CxHashKey key,
185 bool remove,
186 bool destroy
187 ) {
188 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
189
190 unsigned hash = key.hash;
191 if (hash == 0) {
192 cx_hash_murmur(&key);
193 hash = key.hash;
194 }
195
196 size_t slot = hash % hash_map->bucket_count;
197 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
198 struct cx_hash_map_element_s *prev = NULL;
199 while (elm && elm->key.hash <= hash) {
200 if (elm->key.hash == hash && elm->key.len == key.len) {
201 if (memcmp(elm->key.data, key.data, key.len) == 0) {
202 void *data = NULL;
203 if (destroy) {
204 cx_invoke_destructor(map, elm->data);
205 } else {
206 if (map->store_pointer) {
207 data = *(void **) elm->data;
208 } else {
209 data = elm->data;
210 }
211 }
212 if (remove) {
213 cx_hash_map_unlink(hash_map, slot, prev, elm);
214 }
215 return data;
216 }
217 }
218 prev = elm;
219 elm = prev->next;
220 }
221
222 return NULL;
223 }
224
225 static void *cx_hash_map_get(
226 CxMap const *map,
227 CxHashKey key
228 ) {
229 // we can safely cast, because we know the map stays untouched
230 return cx_hash_map_get_remove((CxMap *) map, key, false, false);
231 }
232
233 static void *cx_hash_map_remove(
234 CxMap *map,
235 CxHashKey key,
236 bool destroy
237 ) {
238 return cx_hash_map_get_remove(map, key, true, destroy);
239 }
240
241 static void *cx_hash_map_iter_current_entry(void const *it) {
242 struct cx_iterator_s const *iter = it;
243 // struct has to have a compatible signature
244 return (struct cx_map_entry_s *) &(iter->kv_data);
245 }
246
247 static void *cx_hash_map_iter_current_key(void const *it) {
248 struct cx_iterator_s const *iter = it;
249 struct cx_hash_map_element_s *elm = iter->elem_handle;
250 return &elm->key;
251 }
252
253 static void *cx_hash_map_iter_current_value(void const *it) {
254 struct cx_iterator_s const *iter = it;
255 struct cx_hash_map_s const *map = iter->src_handle;
256 struct cx_hash_map_element_s *elm = iter->elem_handle;
257 if (map->base.store_pointer) {
258 return *(void **) elm->data;
259 } else {
260 return elm->data;
261 }
262 }
263
264 static bool cx_hash_map_iter_valid(void const *it) {
265 struct cx_iterator_s const *iter = it;
266 return iter->elem_handle != NULL;
267 }
268
269 static void cx_hash_map_iter_next(void *it) {
270 struct cx_iterator_s *iter = it;
271 struct cx_hash_map_element_s *elm = iter->elem_handle;
272
273 // remove current element, if asked
274 if (iter->base.remove) {
275 // obtain mutable pointer to the map
276 struct cx_mut_iterator_s *miter = it;
277 struct cx_hash_map_s *map = miter->src_handle;
278
279 // clear the flag
280 iter->base.remove = false;
281
282 // determine the next element
283 struct cx_hash_map_element_s *next = elm->next;
284
285 // search the previous element
286 struct cx_hash_map_element_s *prev = NULL;
287 if (map->buckets[iter->slot] != elm) {
288 prev = map->buckets[iter->slot];
289 while (prev->next != elm) {
290 prev = prev->next;
291 }
292 }
293
294 // destroy
295 cx_invoke_destructor((struct cx_map_s *) map, elm->data);
296
297 // unlink
298 cx_hash_map_unlink(map, iter->slot, prev, elm);
299
300 // advance
301 elm = next;
302 } else {
303 // just advance
304 elm = elm->next;
305 iter->index++;
306 }
307
308 // search the next bucket, if required
309 struct cx_hash_map_s const *map = iter->src_handle;
310 while (elm == NULL && ++iter->slot < map->bucket_count) {
311 elm = map->buckets[iter->slot];
312 }
313
314 // fill the struct with the next element
315 iter->elem_handle = elm;
316 if (elm == NULL) {
317 iter->kv_data.key = NULL;
318 iter->kv_data.value = NULL;
319 } else {
320 iter->kv_data.key = &elm->key;
321 if (map->base.store_pointer) {
322 iter->kv_data.value = *(void **) elm->data;
323 } else {
324 iter->kv_data.value = elm->data;
325 }
326 }
327 }
328
329 static bool cx_hash_map_iter_flag_rm(void *it) {
330 struct cx_iterator_base_s *iter = it;
331 if (iter->mutating) {
332 iter->remove = true;
333 return true;
334 } else {
335 return false;
336 }
337 }
338
339 static CxIterator cx_hash_map_iterator(
340 CxMap const *map,
341 enum cx_map_iterator_type type
342 ) {
343 CxIterator iter;
344
345 iter.src_handle = map;
346 iter.base.valid = cx_hash_map_iter_valid;
347 iter.base.next = cx_hash_map_iter_next;
348
349 switch (type) {
350 case CX_MAP_ITERATOR_PAIRS:
351 iter.base.current = cx_hash_map_iter_current_entry;
352 break;
353 case CX_MAP_ITERATOR_KEYS:
354 iter.base.current = cx_hash_map_iter_current_key;
355 break;
356 case CX_MAP_ITERATOR_VALUES:
357 iter.base.current = cx_hash_map_iter_current_value;
358 break;
359 default:
360 assert(false);
361 }
362
363 iter.base.flag_removal = cx_hash_map_iter_flag_rm;
364 iter.base.remove = false;
365 iter.base.mutating = false;
366
367 iter.slot = 0;
368 iter.index = 0;
369
370 if (map->size > 0) {
371 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
372 struct cx_hash_map_element_s *elm = hash_map->buckets[0];
373 while (elm == NULL) {
374 elm = hash_map->buckets[++iter.slot];
375 }
376 iter.elem_handle = elm;
377 iter.kv_data.key = &elm->key;
378 if (map->store_pointer) {
379 iter.kv_data.value = *(void **) elm->data;
380 } else {
381 iter.kv_data.value = elm->data;
382 }
383 } else {
384 iter.elem_handle = NULL;
385 iter.kv_data.key = NULL;
386 iter.kv_data.value = NULL;
387 }
388
389 return iter;
390 }
391
392 static cx_map_class cx_hash_map_class = {
393 cx_hash_map_destructor,
394 cx_hash_map_clear,
395 cx_hash_map_put,
396 cx_hash_map_get,
397 cx_hash_map_remove,
398 cx_hash_map_iterator,
399 };
400
401 CxMap *cxHashMapCreate(
402 CxAllocator const *allocator,
403 size_t itemsize,
404 size_t buckets
405 ) {
406 if (buckets == 0) {
407 // implementation defined default
408 buckets = 16;
409 }
410
411 struct cx_hash_map_s *map = cxCalloc(allocator, 1,
412 sizeof(struct cx_hash_map_s));
413 if (map == NULL) return NULL;
414
415 // initialize hash map members
416 map->bucket_count = buckets;
417 map->buckets = cxCalloc(allocator, buckets,
418 sizeof(struct cx_hash_map_element_s *));
419 if (map->buckets == NULL) {
420 cxFree(allocator, map);
421 return NULL;
422 }
423
424 // initialize base members
425 map->base.cl = &cx_hash_map_class;
426 map->base.allocator = allocator;
427
428 if (itemsize > 0) {
429 map->base.store_pointer = false;
430 map->base.item_size = itemsize;
431 } else {
432 map->base.store_pointer = true;
433 map->base.item_size = sizeof(void *);
434 }
435
436 return (CxMap *) map;
437 }
438
439 int cxMapRehash(CxMap *map) {
440 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
441 if (map->size > ((hash_map->bucket_count * 3) >> 2)) {
442
443 size_t new_bucket_count = (map->size * 5) >> 1;
444 struct cx_hash_map_element_s **new_buckets = cxCalloc(
445 map->allocator,
446 new_bucket_count, sizeof(struct cx_hash_map_element_s *)
447 );
448
449 if (new_buckets == NULL) {
450 return 1;
451 }
452
453 // iterate through the elements and assign them to their new slots
454 cx_for_n(slot, hash_map->bucket_count) {
455 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
456 while (elm != NULL) {
457 struct cx_hash_map_element_s *next = elm->next;
458 size_t new_slot = elm->key.hash % new_bucket_count;
459
460 // find position where to insert
461 struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot];
462 struct cx_hash_map_element_s *bucket_prev = NULL;
463 while (bucket_next != NULL &&
464 bucket_next->key.hash < elm->key.hash) {
465 bucket_prev = bucket_next;
466 bucket_next = bucket_next->next;
467 }
468
469 // insert
470 if (bucket_prev == NULL) {
471 elm->next = new_buckets[new_slot];
472 new_buckets[new_slot] = elm;
473 } else {
474 bucket_prev->next = elm;
475 elm->next = bucket_next;
476 }
477
478 // advance
479 elm = next;
480 }
481 }
482
483 // assign result to the map
484 hash_map->bucket_count = new_bucket_count;
485 cxFree(map->allocator, hash_map->buckets);
486 hash_map->buckets = new_buckets;
487 }
488 return 0;
489 }

mercurial