ucx/hash_map.c

changeset 431
bb7da585debc
parent 324
ce13a778654a
child 440
7c4b9cba09ca
equal deleted inserted replaced
169:fe49cff3c571 431:bb7da585debc
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "cx/hash_map.h"
30 #include "cx/utils.h"
31
32 #include <string.h>
33 #include <assert.h>
34
35 struct cx_hash_map_element_s {
36 /** A pointer to the next element in the current bucket. */
37 struct cx_hash_map_element_s *next;
38
39 /** The corresponding key. */
40 CxHashKey key;
41
42 /** The value data. */
43 char data[];
44 };
45
46 static void cx_hash_map_clear(struct cx_map_s *map) {
47 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
48 cx_for_n(i, hash_map->bucket_count) {
49 struct cx_hash_map_element_s *elem = hash_map->buckets[i];
50 if (elem != NULL) {
51 do {
52 struct cx_hash_map_element_s *next = elem->next;
53 // invoke the destructor
54 cx_invoke_destructor(map, elem->data);
55 // free the key data
56 cxFree(map->collection.allocator, (void *) elem->key.data);
57 // free the node
58 cxFree(map->collection.allocator, elem);
59 // proceed
60 elem = next;
61 } while (elem != NULL);
62
63 // do not leave a dangling pointer
64 hash_map->buckets[i] = NULL;
65 }
66 }
67 map->collection.size = 0;
68 }
69
70 static void cx_hash_map_destructor(struct cx_map_s *map) {
71 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
72
73 // free the buckets
74 cx_hash_map_clear(map);
75 cxFree(map->collection.allocator, hash_map->buckets);
76
77 // free the map structure
78 cxFree(map->collection.allocator, map);
79 }
80
81 static int cx_hash_map_put(
82 CxMap *map,
83 CxHashKey key,
84 void *value
85 ) {
86 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
87 const CxAllocator *allocator = map->collection.allocator;
88
89 unsigned hash = key.hash;
90 if (hash == 0) {
91 cx_hash_murmur(&key);
92 hash = key.hash;
93 }
94
95 size_t slot = hash % hash_map->bucket_count;
96 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
97 struct cx_hash_map_element_s *prev = NULL;
98
99 while (elm != NULL && elm->key.hash < hash) {
100 prev = elm;
101 elm = elm->next;
102 }
103
104 if (elm != NULL && elm->key.hash == hash && elm->key.len == key.len &&
105 memcmp(elm->key.data, key.data, key.len) == 0) {
106 // overwrite existing element
107 if (map->collection.store_pointer) {
108 memcpy(elm->data, &value, sizeof(void *));
109 } else {
110 memcpy(elm->data, value, map->collection.elem_size);
111 }
112 } else {
113 // allocate new element
114 struct cx_hash_map_element_s *e = cxMalloc(
115 allocator,
116 sizeof(struct cx_hash_map_element_s) + map->collection.elem_size
117 );
118 if (e == NULL) {
119 return -1;
120 }
121
122 // write the value
123 if (map->collection.store_pointer) {
124 memcpy(e->data, &value, sizeof(void *));
125 } else {
126 memcpy(e->data, value, map->collection.elem_size);
127 }
128
129 // copy the key
130 void *kd = cxMalloc(allocator, key.len);
131 if (kd == NULL) {
132 return -1;
133 }
134 memcpy(kd, key.data, key.len);
135 e->key.data = kd;
136 e->key.len = key.len;
137 e->key.hash = hash;
138
139 // insert the element into the linked list
140 if (prev == NULL) {
141 hash_map->buckets[slot] = e;
142 } else {
143 prev->next = e;
144 }
145 e->next = elm;
146
147 // increase the size
148 map->collection.size++;
149 }
150
151 return 0;
152 }
153
154 static void cx_hash_map_unlink(
155 struct cx_hash_map_s *hash_map,
156 size_t slot,
157 struct cx_hash_map_element_s *prev,
158 struct cx_hash_map_element_s *elm
159 ) {
160 // unlink
161 if (prev == NULL) {
162 hash_map->buckets[slot] = elm->next;
163 } else {
164 prev->next = elm->next;
165 }
166 // free element
167 cxFree(hash_map->base.collection.allocator, (void *) elm->key.data);
168 cxFree(hash_map->base.collection.allocator, elm);
169 // decrease size
170 hash_map->base.collection.size--;
171 }
172
173 /**
174 * Helper function to avoid code duplication.
175 *
176 * @param map the map
177 * @param key the key to look up
178 * @param remove flag indicating whether the looked up entry shall be removed
179 * @param destroy flag indicating whether the destructor shall be invoked
180 * @return a pointer to the value corresponding to the key or \c NULL
181 */
182 static void *cx_hash_map_get_remove(
183 CxMap *map,
184 CxHashKey key,
185 bool remove,
186 bool destroy
187 ) {
188 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
189
190 unsigned hash = key.hash;
191 if (hash == 0) {
192 cx_hash_murmur(&key);
193 hash = key.hash;
194 }
195
196 size_t slot = hash % hash_map->bucket_count;
197 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
198 struct cx_hash_map_element_s *prev = NULL;
199 while (elm && elm->key.hash <= hash) {
200 if (elm->key.hash == hash && elm->key.len == key.len) {
201 if (memcmp(elm->key.data, key.data, key.len) == 0) {
202 void *data = NULL;
203 if (destroy) {
204 cx_invoke_destructor(map, elm->data);
205 } else {
206 if (map->collection.store_pointer) {
207 data = *(void **) elm->data;
208 } else {
209 data = elm->data;
210 }
211 }
212 if (remove) {
213 cx_hash_map_unlink(hash_map, slot, prev, elm);
214 }
215 return data;
216 }
217 }
218 prev = elm;
219 elm = prev->next;
220 }
221
222 return NULL;
223 }
224
225 static void *cx_hash_map_get(
226 const CxMap *map,
227 CxHashKey key
228 ) {
229 // we can safely cast, because we know the map stays untouched
230 return cx_hash_map_get_remove((CxMap *) map, key, false, false);
231 }
232
233 static void *cx_hash_map_remove(
234 CxMap *map,
235 CxHashKey key,
236 bool destroy
237 ) {
238 return cx_hash_map_get_remove(map, key, true, destroy);
239 }
240
241 static void *cx_hash_map_iter_current_entry(const void *it) {
242 const struct cx_iterator_s *iter = it;
243 // struct has to have a compatible signature
244 return (struct cx_map_entry_s *) &(iter->kv_data);
245 }
246
247 static void *cx_hash_map_iter_current_key(const void *it) {
248 const struct cx_iterator_s *iter = it;
249 struct cx_hash_map_element_s *elm = iter->elem_handle;
250 return &elm->key;
251 }
252
253 static void *cx_hash_map_iter_current_value(const void *it) {
254 const struct cx_iterator_s *iter = it;
255 const struct cx_hash_map_s *map = iter->src_handle.c;
256 struct cx_hash_map_element_s *elm = iter->elem_handle;
257 if (map->base.collection.store_pointer) {
258 return *(void **) elm->data;
259 } else {
260 return elm->data;
261 }
262 }
263
264 static bool cx_hash_map_iter_valid(const void *it) {
265 const struct cx_iterator_s *iter = it;
266 return iter->elem_handle != NULL;
267 }
268
269 static void cx_hash_map_iter_next(void *it) {
270 struct cx_iterator_s *iter = it;
271 struct cx_hash_map_element_s *elm = iter->elem_handle;
272 struct cx_hash_map_s *map = iter->src_handle.m;
273
274 // remove current element, if asked
275 if (iter->base.remove) {
276
277 // clear the flag
278 iter->base.remove = false;
279
280 // determine the next element
281 struct cx_hash_map_element_s *next = elm->next;
282
283 // search the previous element
284 struct cx_hash_map_element_s *prev = NULL;
285 if (map->buckets[iter->slot] != elm) {
286 prev = map->buckets[iter->slot];
287 while (prev->next != elm) {
288 prev = prev->next;
289 }
290 }
291
292 // destroy
293 cx_invoke_destructor((struct cx_map_s *) map, elm->data);
294
295 // unlink
296 cx_hash_map_unlink(map, iter->slot, prev, elm);
297
298 // advance
299 elm = next;
300 } else {
301 // just advance
302 elm = elm->next;
303 iter->index++;
304 }
305
306 // search the next bucket, if required
307 while (elm == NULL && ++iter->slot < map->bucket_count) {
308 elm = map->buckets[iter->slot];
309 }
310
311 // fill the struct with the next element
312 iter->elem_handle = elm;
313 if (elm == NULL) {
314 iter->kv_data.key = NULL;
315 iter->kv_data.value = NULL;
316 } else {
317 iter->kv_data.key = &elm->key;
318 if (map->base.collection.store_pointer) {
319 iter->kv_data.value = *(void **) elm->data;
320 } else {
321 iter->kv_data.value = elm->data;
322 }
323 }
324 }
325
326 static CxIterator cx_hash_map_iterator(
327 const CxMap *map,
328 enum cx_map_iterator_type type
329 ) {
330 CxIterator iter;
331
332 iter.src_handle.c = map;
333 iter.elem_count = map->collection.size;
334
335 switch (type) {
336 case CX_MAP_ITERATOR_PAIRS:
337 iter.elem_size = sizeof(CxMapEntry);
338 iter.base.current = cx_hash_map_iter_current_entry;
339 break;
340 case CX_MAP_ITERATOR_KEYS:
341 iter.elem_size = sizeof(CxHashKey);
342 iter.base.current = cx_hash_map_iter_current_key;
343 break;
344 case CX_MAP_ITERATOR_VALUES:
345 iter.elem_size = map->collection.elem_size;
346 iter.base.current = cx_hash_map_iter_current_value;
347 break;
348 default:
349 assert(false);
350 }
351
352 iter.base.valid = cx_hash_map_iter_valid;
353 iter.base.next = cx_hash_map_iter_next;
354 iter.base.remove = false;
355 iter.base.mutating = false;
356
357 iter.slot = 0;
358 iter.index = 0;
359
360 if (map->collection.size > 0) {
361 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
362 struct cx_hash_map_element_s *elm = hash_map->buckets[0];
363 while (elm == NULL) {
364 elm = hash_map->buckets[++iter.slot];
365 }
366 iter.elem_handle = elm;
367 iter.kv_data.key = &elm->key;
368 if (map->collection.store_pointer) {
369 iter.kv_data.value = *(void **) elm->data;
370 } else {
371 iter.kv_data.value = elm->data;
372 }
373 } else {
374 iter.elem_handle = NULL;
375 iter.kv_data.key = NULL;
376 iter.kv_data.value = NULL;
377 }
378
379 return iter;
380 }
381
382 static cx_map_class cx_hash_map_class = {
383 cx_hash_map_destructor,
384 cx_hash_map_clear,
385 cx_hash_map_put,
386 cx_hash_map_get,
387 cx_hash_map_remove,
388 cx_hash_map_iterator,
389 };
390
391 CxMap *cxHashMapCreate(
392 const CxAllocator *allocator,
393 size_t itemsize,
394 size_t buckets
395 ) {
396 if (buckets == 0) {
397 // implementation defined default
398 buckets = 16;
399 }
400
401 struct cx_hash_map_s *map = cxCalloc(allocator, 1,
402 sizeof(struct cx_hash_map_s));
403 if (map == NULL) return NULL;
404
405 // initialize hash map members
406 map->bucket_count = buckets;
407 map->buckets = cxCalloc(allocator, buckets,
408 sizeof(struct cx_hash_map_element_s *));
409 if (map->buckets == NULL) {
410 cxFree(allocator, map);
411 return NULL;
412 }
413
414 // initialize base members
415 map->base.cl = &cx_hash_map_class;
416 map->base.collection.allocator = allocator;
417
418 if (itemsize > 0) {
419 map->base.collection.store_pointer = false;
420 map->base.collection.elem_size = itemsize;
421 } else {
422 map->base.collection.store_pointer = true;
423 map->base.collection.elem_size = sizeof(void *);
424 }
425
426 return (CxMap *) map;
427 }
428
429 int cxMapRehash(CxMap *map) {
430 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
431 if (map->collection.size > ((hash_map->bucket_count * 3) >> 2)) {
432
433 size_t new_bucket_count = (map->collection.size * 5) >> 1;
434 struct cx_hash_map_element_s **new_buckets = cxCalloc(
435 map->collection.allocator,
436 new_bucket_count, sizeof(struct cx_hash_map_element_s *)
437 );
438
439 if (new_buckets == NULL) {
440 return 1;
441 }
442
443 // iterate through the elements and assign them to their new slots
444 cx_for_n(slot, hash_map->bucket_count) {
445 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
446 while (elm != NULL) {
447 struct cx_hash_map_element_s *next = elm->next;
448 size_t new_slot = elm->key.hash % new_bucket_count;
449
450 // find position where to insert
451 struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot];
452 struct cx_hash_map_element_s *bucket_prev = NULL;
453 while (bucket_next != NULL &&
454 bucket_next->key.hash < elm->key.hash) {
455 bucket_prev = bucket_next;
456 bucket_next = bucket_next->next;
457 }
458
459 // insert
460 if (bucket_prev == NULL) {
461 elm->next = new_buckets[new_slot];
462 new_buckets[new_slot] = elm;
463 } else {
464 bucket_prev->next = elm;
465 elm->next = bucket_next;
466 }
467
468 // advance
469 elm = next;
470 }
471 }
472
473 // assign result to the map
474 hash_map->bucket_count = new_bucket_count;
475 cxFree(map->collection.allocator, hash_map->buckets);
476 hash_map->buckets = new_buckets;
477 }
478 return 0;
479 }

mercurial