1/*
2 * Copyright (c) 2018-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <skywalk/os_skywalk_private.h>
29
30#include "cuckoo_hashtable.h"
31
32#define CUCKOO_TAG "com.apple.skywalk.libcuckoo"
33SKMEM_TAG_DEFINE(cuckoo_tag, CUCKOO_TAG);
34
35
36SYSCTL_NODE(_kern_skywalk, OID_AUTO, libcuckoo, CTLFLAG_RW | CTLFLAG_LOCKED,
37 0, "Skywalk Cuckoo Hashtable Library");
38
39uint32_t cuckoo_verbose = 0;
40#if (DEVELOPMENT || DEBUG)
41SYSCTL_UINT(_kern_skywalk_libcuckoo, OID_AUTO, verbose,
42 CTLFLAG_RW | CTLFLAG_LOCKED, &cuckoo_verbose, 0, "");
43#endif /* DEVELOPMENT || DEBUG */
44
45typedef enum cht_verb {
46 CHTV_ERR = 0,
47 CHTV_WARN = 1,
48 CHTV_INFO = 2,
49 CHTV_DEBUG = 3,
50} cht_verb_t;
51
52static LCK_GRP_DECLARE(cht_lock_group, "CHT_LOCK");
53static LCK_ATTR_DECLARE(cht_lock_attr, 0, 0);
54
55#if SK_LOG
56#define cht_log(level, _fmt, ...) \
57 do { \
58 if (level <= cuckoo_verbose) { \
59 kprintf("Cuckoo: thread %p %-30s " _fmt "\n", \
60 current_thread(), __FUNCTION__, ##__VA_ARGS__); \
61 } \
62 } while (0);
63#else /* !SK_LOG */
64#define cht_log(_flag, _fmt, ...) do { ((void)0); } while (0)
65#endif /* !SK_LOG */
66
67#define cht_err(_fmt, ...) cht_log(CHTV_ERR, _fmt, ##__VA_ARGS__)
68#define cht_warn(_fmt, ...) cht_log(CHTV_WARN, _fmt, ##__VA_ARGS__)
69#define cht_info(_fmt, ...) cht_log(CHTV_INFO, _fmt, ##__VA_ARGS__)
70#define cht_debug(_fmt, ...) cht_log(CHTV_DEBUG, _fmt, ##__VA_ARGS__)
71
72static inline int
73cuckoo_node_chain(struct cuckoo_node *node,
74 struct cuckoo_node *new_node)
75{
76 struct cuckoo_node *prev_node = node;
77
78 /* new node must be zero initialized */
79 ASSERT(new_node->next == NULL);
80
81 /* use tail insert to check for duplicate along list */
82 while (__improbable(node != NULL)) {
83 if (node == new_node) {
84 return EEXIST;
85 }
86 prev_node = node;
87 node = node->next;
88 }
89
90 prev_node->next = new_node;
91
92 return 0;
93}
94
95static inline bool
96cuckoo_node_del(struct cuckoo_node **pnode,
97 struct cuckoo_node *del_node)
98{
99 ASSERT(pnode != NULL);
100
101 struct cuckoo_node *node = *pnode;
102 while (node != NULL && node != del_node) {
103 pnode = &node->next;
104 node = node->next;
105 }
106 if (__probable(node != NULL)) {
107 *pnode = node->next;
108 node->next = NULL;
109 return true;
110 }
111
112 return false;
113}
114
115static inline void
116cuckoo_node_set_next(struct cuckoo_node *node, struct cuckoo_node *next_node)
117{
118 node->next = next_node;
119}
120
121/* We probably won't add RCU soon so use simple pointer reference for now */
122static inline struct cuckoo_node *
123cuckoo_node_next(struct cuckoo_node *node)
124{
125 return node->next;
126}
127
128#define _CHT_MAX_LOAD_SHRINK 40 /* at least below 40% load to shrink */
129#define _CHT_MIN_LOAD_EXPAND 85 /* cuckoo could hold 85% full table */
130
131enum cuckoo_resize_ops {
132 _CHT_RESIZE_EXPAND = 0,
133 _CHT_RESIZE_SHRINK = 1,
134};
135
136/*
137 * Following classic Cuckoo hash table design, cuckoo_hashtable use k hash
138 * functions to derive multiple candidate hash table bucket indexes.
139 * Here cuckoo_hashtable use k=2.
140 * prim_bkt_idx = bkt_idx[1] = hash[1](key) % N_BUCKETS
141 * alt_bkt_idx = bkt_idx[2] = hash[2](key) % N_BUCKETS
142 *
143 * Currently, we let the caller pass in the actual key's hash value, because
144 * in most of the use cases, caller probably have already calculated the hash
145 * value of actual key (e.g. using hardware offloading or copy+hash). This also
146 * save us from storing the key in the table (or any side data structure). So
147 *
148 * hash[1] = hash // hash(hash value) passed in from caller
149 * hash[2] = __alt_hash(hash[1])
150 *
151 * __alt_hash derives h2 using h1's high bits, since calculating primary
152 * bucket index uses its low bits. So alt_hash is still a uniformly distributed
153 * random variable (but not independent of h1, but is fine for hashtable usage).
154 *
155 * There is option to store h2 in the table bucket as well but cuckoo_hashtable
156 * is not doing this to use less memory usage with the small price of a few
157 * more cpu cycles during add/del operation. Assuming that the hashtable is
158 * read-heavy rather than write-heavy, this is reasonable.
159 *
160 * In the rare case of full hash value collision, where
161 * hash[1] == hash[1]'
162 * , there is no way for the hash table to differentiate two objects, thus we
163 * need to chain the fully collided objects under the same bucket slot.
164 * The caller need to walk the chain to explicitly compare the full length key
165 * to find the correct object.
166 *
167 * Reference Counting
168 * The hashtable assumes all objects are reference counted. It takes function
169 * pointers that retain and release the object.
170 * Adding to the table will call its retain function.
171 * Deleting from the table will call its release function.
172 *
173 */
174
175/* hash might be zero, so always use _node == NULL to test empty slot */
176struct _slot {
177 uint32_t _hash;
178 struct cuckoo_node *_node;
179};
180
181/*
182 * Cuckoo hashtable cache line awareness:
183 * - ARM platform has 128B CPU cache line.
184 * - Intel platform has 64B CPU cache line. However, hardware prefetcher
185 * treats cache lines as 128B chunk and prefetch the other 64B cache line.
186 *
187 * Thus cuckoo_hashtable use 128B as bucket size to make best use CPU cache
188 * resource.
189 */
190#define _CHT_CACHELINE_CHUNK 128
191#define _CHT_SLOT_INVAL UINT8_MAX
192static const uint8_t _CHT_BUCKET_SLOTS =
193 ((_CHT_CACHELINE_CHUNK - sizeof(lck_mtx_t) - sizeof(uint8_t)) /
194 sizeof(struct _slot));
195
196struct _bucket {
197 struct _slot _slots[_CHT_BUCKET_SLOTS];
198 decl_lck_mtx_data(, _lock);
199 uint8_t _inuse;
200} __attribute__((aligned(_CHT_CACHELINE_CHUNK)));
201
202struct cuckoo_hashtable {
203 uint32_t _bitmask; /* 1s' mask for quick MOD */
204 uint32_t _n_buckets; /* number of buckets */
205
206 volatile uint32_t _n_entries; /* number of entires in table */
207 uint32_t _capacity; /* max number of entires */
208 uint32_t _rcapacity; /* requested capacity */
209
210 bool _busy;
211 uint32_t _resize_waiters;
212 decl_lck_rw_data(, _resize_lock);
213 decl_lck_mtx_data(, _lock);
214
215 struct _bucket *_buckets;
216
217 int (*_obj_cmp)(struct cuckoo_node *node, void *key);
218 void (*_obj_retain)(struct cuckoo_node *);
219 void (*_obj_release)(struct cuckoo_node *);
220} __attribute__((aligned(_CHT_CACHELINE_CHUNK)));
221
222static_assert(sizeof(struct _bucket) <= _CHT_CACHELINE_CHUNK);
223
224static inline void
225__slot_set(struct _slot *slt, uint32_t hash, struct cuckoo_node *node)
226{
227 slt->_hash = hash;
228 slt->_node = node;
229}
230
231static inline void
232__slot_reset(struct _slot *slt)
233{
234 slt->_hash = 0;
235 slt->_node = NULL;
236}
237
238static inline uint32_t
239__alt_hash(uint32_t hash)
240{
241#define _CHT_ALT_HASH_MIX 0x5bd1e995 /* Murmur hash mix */
242 uint32_t tag = hash >> 16;
243 uint32_t alt_hash = hash ^ ((tag + 1) * _CHT_ALT_HASH_MIX);
244 return alt_hash;
245}
246
247static inline struct _bucket *
248__get_bucket(struct cuckoo_hashtable *h, uint32_t b_i)
249{
250 return &h->_buckets[b_i];
251}
252
253static inline struct _bucket *
254__prim_bucket(struct cuckoo_hashtable *h, uint32_t hash)
255{
256 return __get_bucket(h, b_i: hash & h->_bitmask);
257}
258
259static inline struct _bucket *
260__alt_bucket(struct cuckoo_hashtable *h, uint32_t hash)
261{
262 return __get_bucket(h, b_i: __alt_hash(hash) & h->_bitmask);
263}
264
265#if SK_LOG
266static inline size_t
267__bucket_idx(struct cuckoo_hashtable *h, struct _bucket *b)
268{
269 return ((uintptr_t)b - (uintptr_t)&h->_buckets[0]) / sizeof(struct _bucket);
270}
271#endif /* SK_LOG */
272
273static inline struct _slot *
274__bucket_slot(struct _bucket *b, uint32_t slot_idx)
275{
276 return &b->_slots[slot_idx];
277}
278
279static inline bool
280__slot_empty(struct _slot *s)
281{
282 return s->_node == NULL;
283}
284
285static inline uint32_t
286__align32pow2(uint32_t v)
287{
288 v--;
289 v |= v >> 1;
290 v |= v >> 2;
291 v |= v >> 4;
292 v |= v >> 8;
293 v |= v >> 16;
294 v++;
295
296 return v;
297}
298
299uint32_t
300cuckoo_hashtable_load_factor(struct cuckoo_hashtable *h)
301{
302 return (100 * h->_n_entries) / (h->_n_buckets * _CHT_BUCKET_SLOTS);
303}
304
305/*
306 * Cuckoo hashtable uses regular mutex. Most operations(find/add) should
307 * finish faster than a context switch. It avoids using the spin lock since
308 * it might cause issues on certain platforms (e.g. x86_64) where the trap
309 * handler for dealing with FP/SIMD use would be invoked to perform thread-
310 * specific allocations; the use of FP/SIMD here is related to the memory
311 * compare with mask routines. Even in case of another thread holding a
312 * bucket lock and went asleep, cuckoo path search would try to find another
313 * path without blockers.
314 *
315 * The only exception is table expansion, which could take a long time, we use
316 * read/write lock to protect the whole table against any read/write in that
317 * case.
318 */
319
320/* find/add only acquires table rlock, and serialize with bucket lock */
321#define __lock_bucket(b) lck_mtx_lock(&b->_lock)
322#define __unlock_bucket(b) lck_mtx_unlock(&b->_lock)
323
324#define _CHT_DEADLOCK_THRESHOLD 20
325static inline bool
326__lock_bucket_with_backoff(struct _bucket *b)
327{
328 uint32_t try_counter = 0;
329 while (!lck_mtx_try_lock(lck: &b->_lock)) {
330 if (try_counter++ > _CHT_DEADLOCK_THRESHOLD) {
331 return false;
332 }
333 }
334 return true;
335}
336
337#define __rlock_table(h) lck_rw_lock_shared(&h->_resize_lock)
338#define __unrlock_table(h) lck_rw_unlock_shared(&h->_resize_lock)
339#define __r2wlock_table(h) lck_rw_lock_shared_to_exclusive(&h->_resize_lock)
340#define __wlock_table(h) lck_rw_lock_exclusive(&h->_resize_lock)
341#define __unwlock_table(h) lck_rw_unlock_exclusive(&h->_resize_lock)
342
343static inline int
344__resize_begin(struct cuckoo_hashtable *h)
345{
346 // takes care of concurrent resize
347 lck_mtx_lock(lck: &h->_lock);
348 while (h->_busy) {
349 if (++(h->_resize_waiters) == 0) { /* wraparound */
350 h->_resize_waiters++;
351 }
352 int error = msleep(chan: &h->_resize_waiters, mtx: &h->_lock,
353 pri: (PZERO + 1), wmesg: __FUNCTION__, NULL);
354 if (error == EINTR) {
355 cht_warn("resize waiter was interrupted");
356 ASSERT(h->_resize_waiters > 0);
357 h->_resize_waiters--;
358 lck_mtx_unlock(lck: &h->_lock);
359 return EINTR;
360 }
361 // resizer finished
362 lck_mtx_unlock(lck: &h->_lock);
363 return EAGAIN;
364 }
365
366 h->_busy = true;
367 lck_mtx_unlock(lck: &h->_lock);
368
369 // takes other readers offline
370 __wlock_table(h);
371 return 0;
372}
373
374static inline void
375__resize_end(struct cuckoo_hashtable *h)
376{
377 __unwlock_table(h);
378 lck_mtx_lock(lck: &h->_lock);
379 h->_busy = false;
380 if (__improbable(h->_resize_waiters > 0)) {
381 h->_resize_waiters = 0;
382 wakeup(chan: &h->_resize_waiters);
383 }
384 lck_mtx_unlock(lck: &h->_lock);
385}
386
387struct cuckoo_hashtable *
388cuckoo_hashtable_create(struct cuckoo_hashtable_params *p)
389{
390 struct cuckoo_hashtable *h = NULL;
391 uint32_t n = 0;
392 uint32_t n_buckets = 0;
393 struct _bucket *buckets = NULL;
394 uint32_t i;
395
396 if (p->cht_capacity > CUCKOO_HASHTABLE_ENTRIES_MAX ||
397 p->cht_capacity < _CHT_BUCKET_SLOTS) {
398 return NULL;
399 }
400
401 ASSERT(p->cht_capacity < UINT32_MAX);
402 n = (uint32_t)p->cht_capacity;
403 h = sk_alloc_type(struct cuckoo_hashtable, Z_WAITOK | Z_NOFAIL, cuckoo_tag);
404
405 n_buckets = __align32pow2(v: n / _CHT_BUCKET_SLOTS);
406 buckets = sk_alloc_type_array(struct _bucket, n_buckets, Z_WAITOK, cuckoo_tag);
407 if (buckets == NULL) {
408 sk_free_type(struct cuckoo_hashtable, h);
409 return NULL;
410 }
411
412 for (i = 0; i < n_buckets; i++) {
413 lck_mtx_init(lck: &buckets[i]._lock, grp: &cht_lock_group, attr: &cht_lock_attr);
414 }
415
416 lck_mtx_init(lck: &h->_lock, grp: &cht_lock_group, attr: &cht_lock_attr);
417
418 h->_n_entries = 0;
419 h->_n_buckets = n_buckets;
420 h->_capacity = h->_rcapacity = h->_n_buckets * _CHT_BUCKET_SLOTS;
421 h->_bitmask = n_buckets - 1;
422 h->_buckets = buckets;
423 lck_rw_init(lck: &h->_resize_lock, grp: &cht_lock_group, attr: &cht_lock_attr);
424 h->_busy = false;
425 h->_resize_waiters = 0;
426
427 ASSERT(p->cht_obj_retain != NULL);
428 ASSERT(p->cht_obj_release != NULL);
429 ASSERT(p->cht_obj_cmp != NULL);
430 h->_obj_cmp = p->cht_obj_cmp;
431 h->_obj_retain = p->cht_obj_retain;
432 h->_obj_release = p->cht_obj_release;
433
434 return h;
435}
436
437void
438cuckoo_hashtable_free(struct cuckoo_hashtable *h)
439{
440 uint32_t i;
441
442 if (h == NULL) {
443 return;
444 }
445
446 ASSERT(h->_n_entries == 0);
447
448 if (h->_buckets != NULL) {
449 for (i = 0; i < h->_n_buckets; i++) {
450 lck_mtx_destroy(lck: &h->_buckets[i]._lock, grp: &cht_lock_group);
451 }
452 sk_free_type_array(struct _bucket, h->_n_buckets, h->_buckets);
453 }
454 sk_free_type(struct cuckoo_hashtable, h);
455}
456
457size_t
458cuckoo_hashtable_entries(struct cuckoo_hashtable *h)
459{
460 return h->_n_entries;
461}
462
463size_t
464cuckoo_hashtable_capacity(struct cuckoo_hashtable *h)
465{
466 return h->_n_buckets * _CHT_BUCKET_SLOTS;
467}
468
469size_t
470cuckoo_hashtable_memory_footprint(struct cuckoo_hashtable *h)
471{
472 size_t total_meminuse = sizeof(struct cuckoo_hashtable) +
473 (h->_n_buckets * sizeof(struct _bucket));
474 return total_meminuse;
475}
476
477static inline struct cuckoo_node *
478__find_in_bucket(struct cuckoo_hashtable *h, struct _bucket *b, void *key,
479 uint32_t hash)
480{
481 uint32_t i;
482 struct cuckoo_node *node = NULL;
483
484 __lock_bucket(b);
485 if (b->_inuse == 0) {
486 goto done;
487 }
488 for (i = 0; i < _CHT_BUCKET_SLOTS; i++) {
489 if (b->_slots[i]._hash == hash) {
490 node = b->_slots[i]._node;
491 while (node != NULL) {
492 if (h->_obj_cmp(node, key) == 0) {
493 h->_obj_retain(node);
494 goto done;
495 }
496 node = cuckoo_node_next(node);
497 }
498 }
499 }
500
501done:
502 __unlock_bucket(b);
503 return node;
504}
505
506/* will return node retained */
507struct cuckoo_node *
508cuckoo_hashtable_find_with_hash(struct cuckoo_hashtable *h, void *key,
509 uint32_t hash)
510{
511 struct _bucket *b1, *b2;
512 struct cuckoo_node *node = NULL;
513
514 __rlock_table(h);
515
516 b1 = __prim_bucket(h, hash);
517 if ((node = __find_in_bucket(h, b: b1, key, hash)) != NULL) {
518 goto done;
519 }
520
521 b2 = __alt_bucket(h, hash);
522 if ((node = __find_in_bucket(h, b: b2, key, hash)) != NULL) {
523 goto done;
524 }
525
526done:
527 __unrlock_table(h);
528 return node;
529}
530
531/*
532 * To add a key into cuckoo_hashtable:
533 * 1. First it searches the key's two candidate buckets b1, b2
534 * 2. If there are slots available in b1 or b2, we place the key there
535 * 3. Otherwise cuckoo_hashtable will have to probe and make space
536 *
537 * To move keys around (open addressing hash table), cuckoo_hashtable needs to
538 * first find available slot via Cuckoo search. Here it uses bread-first-search
539 * to find the shorted path towards an empty bucket slot.
540 *
541 */
542static inline int
543__add_to_bucket(struct cuckoo_hashtable *h, struct _bucket *b,
544 struct cuckoo_node *node, uint32_t hash)
545{
546 int ret = -1;
547 uint8_t avail_i = _CHT_SLOT_INVAL;
548
549 __lock_bucket(b);
550 if (b->_inuse == _CHT_BUCKET_SLOTS) {
551 goto done;
552 }
553 for (uint8_t i = 0; i < _CHT_BUCKET_SLOTS; i++) {
554 struct _slot *s = __bucket_slot(b, slot_idx: i);
555 if (__slot_empty(s)) {
556 if (avail_i == _CHT_SLOT_INVAL) {
557 avail_i = i;
558 }
559 } else {
560 /* chain to existing slot with same hash */
561 if (__improbable(s->_hash == hash)) {
562 ASSERT(s->_node != NULL);
563 ret = cuckoo_node_chain(node: s->_node, new_node: node);
564 if (ret != 0) {
565 goto done;
566 }
567 cht_debug("hash %x node %p inserted [%zu][%d]",
568 hash, node, __bucket_idx(h, b), i);
569 OSAddAtomic(1, &h->_n_entries);
570 h->_obj_retain(node);
571 goto done;
572 }
573 }
574 }
575 if (avail_i != _CHT_SLOT_INVAL) {
576 h->_obj_retain(node);
577 b->_slots[avail_i]._hash = hash;
578 b->_slots[avail_i]._node = node;
579 b->_inuse++;
580 cht_debug("hash %x node %p inserted [%zu][%d]", hash, node,
581 __bucket_idx(h, b), avail_i);
582 OSAddAtomic(1, &h->_n_entries);
583 ret = 0;
584 }
585done:
586 __unlock_bucket(b);
587 return ret;
588}
589
590#define _CHT_BFS_QUEUE_LEN UINT8_MAX
591#define _CHT_BFS_QUEUE_END (_CHT_BFS_QUEUE_LEN - _CHT_BUCKET_SLOTS)
592
593struct _bfs_node {
594 uint32_t bkt_idx;
595 uint8_t prev_node_idx;
596 uint8_t prev_slot_idx;
597};
598
599/*
600 * Move slots backwards on cuckoo path
601 *
602 * cuckoo_move would hold at most 2 locks at any time, moving from
603 * the end of cuckoo path toward the bucket where new keys should be
604 * stored. There could be chances of dead lock in case of multiple
605 * writers have overlapping cuckoo path. We could arrange the order of
606 * locking to avoid that but then we have to take all locks upfront,
607 * which is not friendly to concurrent readers. So instead, we try to
608 * take one by one(but still at most 2 locks holding at any time),
609 * with backoff in mind.
610 */
611static int
612cuckoo_move(struct cuckoo_hashtable *h, struct cuckoo_node *node,
613 uint32_t hash, struct _bfs_node *queue, uint8_t leaf_node_idx,
614 uint8_t leaf_slot)
615{
616 struct _bfs_node *prev_node, *curr_node;
617 struct _bucket *from_bkt, *to_bkt, *alt_bkt;
618 uint8_t from_slot, to_slot;
619
620 curr_node = &queue[leaf_node_idx];
621 to_bkt = __get_bucket(h, b_i: curr_node->bkt_idx);
622 to_slot = leaf_slot;
623
624 __lock_bucket(to_bkt);
625
626 while (__probable(curr_node->prev_node_idx != _CHT_BFS_QUEUE_LEN)) {
627 prev_node = &queue[curr_node->prev_node_idx];
628 from_bkt = __get_bucket(h, b_i: prev_node->bkt_idx);
629 from_slot = curr_node->prev_slot_idx;
630
631 if (!__lock_bucket_with_backoff(b: from_bkt)) {
632 /* a dead lock or a sleeping-thread holding the lock */
633 __unlock_bucket(to_bkt);
634 cht_warn("cuckoo move deadlock detected");
635 return EINVAL;
636 }
637
638 /*
639 * Verify cuckoo path by checking:
640 * 1. from_bkt[from_slot]'s alternative bucket is still to_bkt
641 * 3. to_bkt[to_slot] is still vacant
642 */
643 alt_bkt = __alt_bucket(h, hash: from_bkt->_slots[from_slot]._hash);
644 if (alt_bkt != to_bkt ||
645 !__slot_empty(s: __bucket_slot(b: to_bkt, slot_idx: to_slot))) {
646 __unlock_bucket(from_bkt);
647 __unlock_bucket(to_bkt);
648 cht_warn("cuckoo move path invalid: %s %s",
649 alt_bkt != to_bkt ? "alt_bkt != to_bkt" : "",
650 !__slot_empty(__bucket_slot(to_bkt, to_slot)) ?
651 "!slot_empty(to_bkt, to_slot)" : "");
652 return EINVAL;
653 }
654
655 cht_log(CHTV_DEBUG, "Move [0x%lx][%d] to [0x%lx][%d]",
656 from_bkt - h->_buckets, from_slot, to_bkt - h->_buckets,
657 to_slot);
658
659 ASSERT(to_bkt->_slots[to_slot]._node == NULL);
660 ASSERT(to_bkt->_slots[to_slot]._hash == 0);
661
662 /* move entry backward */
663 to_bkt->_slots[to_slot] = from_bkt->_slots[from_slot];
664 to_bkt->_inuse++;
665 __slot_reset(slt: &from_bkt->_slots[from_slot]);
666 from_bkt->_inuse--;
667
668 __unlock_bucket(to_bkt);
669
670 curr_node = prev_node;
671 to_bkt = from_bkt;
672 to_slot = from_slot;
673 }
674
675 ASSERT(curr_node->prev_node_idx == _CHT_BFS_QUEUE_LEN);
676 ASSERT(curr_node->prev_slot_idx == _CHT_SLOT_INVAL);
677
678 /* if root slot is no longer valid */
679 if (to_bkt->_slots[to_slot]._node != NULL) {
680 __unlock_bucket(to_bkt);
681 return EINVAL;
682 }
683
684 to_bkt->_inuse++;
685 __slot_set(slt: &to_bkt->_slots[to_slot], hash, node);
686 h->_obj_retain(node);
687 __unlock_bucket(to_bkt);
688
689 OSAddAtomic(1, &h->_n_entries);
690
691 cht_debug("hash %x node %p inserted at [%zu][%d]", hash, node,
692 __bucket_idx(h, to_bkt), to_slot);
693
694 return 0;
695}
696
697static int
698cuckoo_probe(struct cuckoo_hashtable *h, struct cuckoo_node *node,
699 uint32_t hash)
700{
701 struct _bfs_node queue[_CHT_BFS_QUEUE_LEN];
702 uint8_t head, tail;
703 struct _bucket *b;
704 uint8_t avail_i;
705 int ret = ENOMEM;
706
707 /* probe starts from its primary bucket */
708 queue[0].bkt_idx = hash & h->_bitmask;
709 queue[0].prev_node_idx = _CHT_BFS_QUEUE_LEN;
710 queue[0].prev_slot_idx = _CHT_SLOT_INVAL;
711
712 head = 0;
713 tail = 1;
714
715 while (__probable(tail != head && tail < _CHT_BFS_QUEUE_END)) {
716 b = __get_bucket(h, b_i: queue[head].bkt_idx);
717 avail_i = _CHT_SLOT_INVAL;
718 for (uint8_t i = 0; i < _CHT_BUCKET_SLOTS; i++) {
719 struct _slot *s = __bucket_slot(b, slot_idx: i);
720 if (__slot_empty(s)) {
721 if (avail_i == _CHT_SLOT_INVAL) {
722 avail_i = i;
723 }
724 continue;
725 }
726
727 /*
728 * Another node with same hash could have been probed
729 * into this bucket, chain to it.
730 */
731 if (__improbable(s->_hash == hash)) {
732 ASSERT(s->_node != NULL);
733 ret = cuckoo_node_chain(node: s->_node, new_node: node);
734 if (ret != 0) {
735 goto done;
736 }
737 cht_debug("hash %x node %p inserted [%zu][%d]",
738 hash, node, __bucket_idx(h, b), i);
739 OSAddAtomic(1, &h->_n_entries);
740 h->_obj_retain(node);
741 goto done;
742 }
743
744 queue[tail].bkt_idx = __alt_hash(hash: s->_hash) & h->_bitmask;
745 queue[tail].prev_node_idx = head;
746 queue[tail].prev_slot_idx = i;
747 tail++;
748 }
749
750 if (avail_i != _CHT_SLOT_INVAL) {
751 ret = cuckoo_move(h, node, hash, queue, leaf_node_idx: head, leaf_slot: avail_i);
752 if (ret == 0) {
753 goto done;
754 } else if (ret == EINVAL) {
755 cht_warn("cukoo path invalidated");
756 goto skip;
757 } else {
758 cht_err("faild: unknown err %d", ret);
759 goto done;
760 }
761 }
762skip:
763 head++;
764 }
765
766 if (tail == head || tail >= _CHT_BFS_QUEUE_END) {
767 cht_warn("failed: cuckoo probe out of search space "
768 "head %d tail %d (%d/%d, load factor %d%%)", head, tail,
769 h->_n_entries, h->_capacity,
770 cuckoo_hashtable_load_factor(h));
771 ret = ENOSPC;
772 } else {
773 cht_warn("failed: cuckoo probe path invalidated "
774 " (%d/%d, load factor %d%%)", h->_n_entries, h->_capacity,
775 cuckoo_hashtable_load_factor(h));
776 ret = EAGAIN;
777 }
778done:
779 return ret;
780}
781
782static inline void
783__foreach_node(struct cuckoo_hashtable *h, bool wlocked,
784 void (^node_handler)(struct cuckoo_node *, uint32_t hash))
785{
786 if (!wlocked) {
787 __rlock_table(h);
788 }
789 for (uint32_t i = 0; i < h->_n_buckets; i++) {
790 struct _bucket *b = &h->_buckets[i];
791 if (b->_inuse == 0) {
792 continue;
793 }
794 if (!wlocked) {
795 __lock_bucket(b);
796 }
797 for (uint32_t j = 0; j < _CHT_BUCKET_SLOTS; j++) {
798 struct _slot *s = __bucket_slot(b, slot_idx: j);
799 struct cuckoo_node *node = NULL, *next_node = NULL;
800 node = s->_node;
801 while (node != NULL) {
802 next_node = cuckoo_node_next(node);
803 node_handler(node, s->_hash);
804 node = next_node;
805 }
806 }
807 if (!wlocked) {
808 __unlock_bucket(b);
809 }
810 }
811 if (!wlocked) {
812 __unrlock_table(h);
813 }
814}
815
816void
817cuckoo_hashtable_foreach(struct cuckoo_hashtable *h,
818 void (^node_handler)(struct cuckoo_node *, uint32_t hash))
819{
820 __foreach_node(h, false, node_handler);
821}
822
823static void
824cuckoo_dummy_retain(struct cuckoo_node *node)
825{
826#pragma unused(node)
827}
828
829static void
830cuckoo_dummy_release(struct cuckoo_node *node)
831{
832#pragma unused(node)
833}
834
835static int
836cuckoo_resize(struct cuckoo_hashtable *h, enum cuckoo_resize_ops option)
837{
838 int ret = 0;
839
840 /* backoff from concurrent expansion */
841 do {
842 ret = __resize_begin(h);
843 if (ret == EAGAIN) {
844 cht_info("resize done by peer");
845 return EAGAIN;
846 }
847 } while (ret == EINTR);
848
849 uint32_t curr_capacity = h->_n_buckets * _CHT_BUCKET_SLOTS;
850 uint32_t curr_load = (100 * h->_n_entries) / curr_capacity;
851 uint32_t curr_buckets = h->_n_buckets;
852 uint32_t new_capacity;
853 __block size_t add_called = 0;
854
855 /* check load factor to ensure we are not hitting something else */
856 if (option == _CHT_RESIZE_EXPAND) {
857 if (curr_load < _CHT_MIN_LOAD_EXPAND) {
858 cht_warn("Warning: early expand at %d load", curr_load);
859 }
860 new_capacity = curr_capacity * 2;
861 } else {
862 if (curr_load > _CHT_MAX_LOAD_SHRINK ||
863 curr_capacity == h->_rcapacity) {
864 goto done;
865 }
866 new_capacity = curr_capacity / 2;
867 }
868
869 cht_info("resize %d/(%d -> %d)", h->_n_entries,
870 curr_capacity, new_capacity);
871
872 struct cuckoo_hashtable_params new_p = {
873 .cht_capacity = new_capacity,
874 .cht_obj_cmp = h->_obj_cmp,
875 .cht_obj_retain = cuckoo_dummy_retain,
876 .cht_obj_release = cuckoo_dummy_release,
877 };
878 struct cuckoo_hashtable *tmp_h;
879 tmp_h = cuckoo_hashtable_create(p: &new_p);
880 if (tmp_h == NULL) {
881 ret = ENOMEM;
882 goto done;
883 }
884
885 __foreach_node(h, true, node_handler: ^(struct cuckoo_node *node, uint32_t hash) {
886 int error = 0;
887 cuckoo_node_set_next(node, NULL);
888 error = cuckoo_hashtable_add_with_hash(h: tmp_h, node, key: hash);
889 ASSERT(error == 0);
890 add_called++;
891 });
892
893 if (__improbable(cuckoo_hashtable_entries(h) !=
894 cuckoo_hashtable_entries(tmp_h))) {
895 panic("h %zu add_called %zu tmp_h %zu",
896 cuckoo_hashtable_entries(h), add_called,
897 cuckoo_hashtable_entries(tmp_h));
898 }
899
900 for (uint32_t i = 0; i < h->_n_buckets; i++) {
901 lck_mtx_destroy(lck: &h->_buckets[i]._lock, grp: &cht_lock_group);
902 }
903 h->_n_buckets = tmp_h->_n_buckets;
904 h->_capacity = h->_n_buckets * _CHT_BUCKET_SLOTS;
905 h->_bitmask = tmp_h->_bitmask;
906 sk_free_type_array(struct _bucket, curr_buckets, h->_buckets);
907
908 h->_buckets = tmp_h->_buckets;
909 lck_rw_destroy(lck: &tmp_h->_resize_lock, grp: &cht_lock_group);
910 lck_mtx_destroy(lck: &tmp_h->_lock, grp: &cht_lock_group);
911 sk_free_type(struct cuckoo_hashtable, tmp_h);
912
913done:
914 __resize_end(h);
915
916 return ret;
917}
918
919static inline int
920cuckoo_add_no_expand(struct cuckoo_hashtable *h,
921 struct cuckoo_node *node, uint32_t hash)
922{
923 struct _bucket *b1, *b2;
924 int ret = -1;
925
926 __rlock_table(h);
927
928 b1 = __prim_bucket(h, hash);
929 if ((ret = __add_to_bucket(h, b: b1, node, hash)) == 0) {
930 goto done;
931 }
932
933 b2 = __alt_bucket(h, hash);
934 if ((ret = __add_to_bucket(h, b: b2, node, hash)) == 0) {
935 goto done;
936 }
937
938 ret = cuckoo_probe(h, node, hash);
939done:
940 __unrlock_table(h);
941 return ret;
942}
943
944int
945cuckoo_hashtable_add_with_hash(struct cuckoo_hashtable *h,
946 struct cuckoo_node *node, uint32_t hash)
947{
948 int ret;
949
950 /* neutralize node to avoid non-terminating tail */
951 ASSERT(cuckoo_node_next(node) == NULL);
952
953 ret = cuckoo_add_no_expand(h, node, hash);
954 if (ret == ENOSPC) {
955 do {
956 ret = cuckoo_resize(h, option: _CHT_RESIZE_EXPAND);
957 if (ret != 0 && ret != EAGAIN) {
958 break;
959 }
960 // this could still fail, when other threads added
961 // enough objs that another resize is needed
962 ret = cuckoo_add_no_expand(h, node, hash);
963 } while (ret == ENOSPC);
964 }
965
966 return ret;
967}
968
969static inline int
970__del_from_bucket(struct cuckoo_hashtable *h, struct _bucket *b,
971 struct cuckoo_node *node, uint32_t hash)
972{
973 uint32_t i;
974
975 __lock_bucket(b);
976 for (i = 0; i < _CHT_BUCKET_SLOTS; i++) {
977 if (b->_slots[i]._hash == hash) {
978 if (cuckoo_node_del(pnode: &b->_slots[i]._node, del_node: node)) {
979 h->_obj_release(node);
980 OSAddAtomic(-1, &h->_n_entries);
981 if (__slot_empty(s: __bucket_slot(b, slot_idx: i))) {
982 b->_slots[i]._hash = 0;
983 b->_inuse--;
984 }
985 __unlock_bucket(b);
986 return 0;
987 }
988 }
989 }
990 __unlock_bucket(b);
991 return ENOENT;
992}
993
994int
995cuckoo_hashtable_del(struct cuckoo_hashtable *h,
996 struct cuckoo_node *node, uint32_t hash)
997{
998 struct _bucket *b1, *b2;
999 int ret = -1;
1000
1001 __rlock_table(h);
1002
1003 b1 = __prim_bucket(h, hash);
1004 if ((ret = __del_from_bucket(h, b: b1, node, hash)) == 0) {
1005 goto done;
1006 }
1007
1008 b2 = __alt_bucket(h, hash);
1009 if ((ret = __del_from_bucket(h, b: b2, node, hash)) == 0) {
1010 goto done;
1011 }
1012
1013done:
1014 if (ret == 0) {
1015 cuckoo_node_set_next(node, NULL);
1016 }
1017 __unrlock_table(h);
1018 return ret;
1019}
1020
1021void
1022cuckoo_hashtable_try_shrink(struct cuckoo_hashtable *h)
1023{
1024 cuckoo_resize(h, option: _CHT_RESIZE_SHRINK);
1025}
1026
1027#if (DEVELOPMENT || DEBUG)
1028
1029static inline bool
1030cuckoo_node_looped(struct cuckoo_node *node)
1031{
1032 struct cuckoo_node *runner = node;
1033
1034 if (node == NULL) {
1035 return false;
1036 }
1037
1038 while (runner->next && runner->next->next) {
1039 runner = runner->next->next;
1040 node = node->next;
1041
1042 if (runner == node) {
1043 return true;
1044 }
1045 }
1046 return false;
1047}
1048
1049int
1050cuckoo_hashtable_health_check(struct cuckoo_hashtable *h)
1051{
1052 uint32_t hash;
1053 uint32_t i, j;
1054 struct _bucket *b;
1055 struct cuckoo_node *node;
1056 bool healthy = true;
1057 uint32_t seen = 0;
1058
1059 __wlock_table(h);
1060
1061 for (i = 0; i < h->_n_buckets; i++) {
1062 b = &h->_buckets[i];
1063 uint8_t inuse = 0;
1064 for (j = 0; j < _CHT_BUCKET_SLOTS; j++) {
1065 hash = b->_slots[j]._hash;
1066 node = b->_slots[j]._node;
1067 if (node != NULL) {
1068 inuse++;
1069 }
1070 while (node != NULL) {
1071 seen++;
1072 if ((__prim_bucket(h, hash) != b) &&
1073 (__alt_bucket(h, hash) != b)) {
1074 panic("[%d][%d] stray hash %x node %p",
1075 i, j, hash, node);
1076 healthy = false;
1077 }
1078
1079 if (cuckoo_node_looped(node)) {
1080 panic("[%d][%d] looped hash %x node %p",
1081 i, j, hash, node);
1082 healthy = false;
1083 }
1084 node = cuckoo_node_next(node);
1085 }
1086 }
1087 ASSERT(inuse == b->_inuse);
1088 }
1089
1090 if (seen != h->_n_entries) {
1091 panic("seen %d != n_entries %d", seen, h->_n_entries);
1092 }
1093
1094 __unwlock_table(h);
1095
1096 if (!healthy) {
1097 cht_err("table unhealthy");
1098 return -1;
1099 } else {
1100 return 0;
1101 }
1102}
1103
1104void
1105cuckoo_hashtable_dump(struct cuckoo_hashtable *h)
1106{
1107 uint32_t hash;
1108 struct cuckoo_node *node;
1109 uint32_t i, j;
1110 struct _bucket *b;
1111
1112 cuckoo_hashtable_health_check(h);
1113
1114 for (i = 0; i < h->_n_buckets; i++) {
1115 printf("%d\t", i);
1116 b = &h->_buckets[i];
1117 for (j = 0; j < _CHT_BUCKET_SLOTS; j++) {
1118 hash = b->_slots[j]._hash;
1119 node = b->_slots[j]._node;
1120 printf("0x%08x(%p) ", hash, node);
1121 }
1122 printf("\n");
1123 }
1124}
1125#endif /* !DEVELOPMENT && !DEBUG */
1126