1/*
2 * Copyright (c) 2021-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#ifndef _KERN_SMR_H_
30#define _KERN_SMR_H_
31
32#include <sys/cdefs.h>
33#include <stdbool.h>
34#include <stdint.h>
35#include <kern/assert.h>
36#include <kern/debug.h>
37#include <kern/smr_types.h>
38#include <kern/startup.h>
39#include <os/atomic_private.h>
40
41__BEGIN_DECLS
42
43#pragma mark SMR pointers
44
45/*
46 * SMR Accessors are meant to provide safe access to SMR protected
47 * pointers and prevent misuse and accidental access.
48 *
49 * Accessors are grouped by type:
50 * entered - Use while in a read section (between smr_enter/smr_leave())
51 * serialized - Use while holding a lock that serializes writers.
52 * Updates are synchronized with readers via included barriers.
53 * unserialized - Use after the memory is out of scope and not visible to
54 * readers.
55 *
56 * All acceses include a parameter for an assert to verify the required
57 * synchronization.
58 */
59
60
61/*!
62 * @macro smr_unsafe_load()
63 *
64 * @brief
65 * Read from an SMR protected pointer without any synchronization.
66 *
67 * @discussion
68 * This returns an integer on purpose as dereference is generally unsafe.
69 */
70#define smr_unsafe_load(ptr) \
71 ({ (uintptr_t)((ptr)->__smr_ptr); })
72
73/*!
74 * @macro smr_entered_load()
75 *
76 * @brief
77 * Read from an SMR protected pointer while in a read section.
78 */
79#define smr_entered_load(ptr) \
80 ({ (ptr)->__smr_ptr; })
81
82/*!
83 * @macro smr_entered_load_assert()
84 *
85 * @brief
86 * Read from an SMR protected pointer while in a read section.
87 */
88#define smr_entered_load_assert(ptr, smr) ({ \
89 assert(smr_entered(smr)); \
90 (ptr)->__smr_ptr; \
91})
92
93/*!
94 * @macro smr_entered_load_acquire()
95 *
96 * @brief
97 * Read from an SMR protected pointer while in a read section (with acquire
98 * fence).
99 */
100#define smr_entered_load_acquire(ptr) \
101 os_atomic_load(&(ptr)->__smr_ptr, acquire)
102
103/*!
104 * @macro smr_entered_load_acquire_assert()
105 *
106 * @brief
107 * Read from an SMR protected pointer while in a read section.
108 */
109#define smr_entered_load_acquire_assert(ptr, smr) ({ \
110 assert(smr_entered(smr)); \
111 os_atomic_load(&(ptr)->__smr_ptr, acquire); \
112})
113
114/*!
115 * @macro smr_serialized_load_assert()
116 *
117 * @brief
118 * Read from an SMR protected pointer while serialized by an
119 * external mechanism.
120 */
121#define smr_serialized_load_assert(ptr, held_cond) ({ \
122 assertf(held_cond, "smr_serialized_load: lock not held"); \
123 (ptr)->__smr_ptr; \
124})
125
126/*!
127 * @macro smr_serialized_load()
128 *
129 * @brief
130 * Read from an SMR protected pointer while serialized by an
131 * external mechanism.
132 */
133#define smr_serialized_load(ptr) \
134 smr_serialized_load_assert(ptr, true)
135
136/*!
137 * @macro smr_init_store()
138 *
139 * @brief
140 * Store @c value to an SMR protected pointer during initialization.
141 */
142#define smr_init_store(ptr, value) \
143 ({ (ptr)->__smr_ptr = value; })
144
145/*!
146 * @macro smr_clear_store()
147 *
148 * @brief
149 * Clear (sets to 0) an SMR protected pointer (this is always "allowed" to do).
150 */
151#define smr_clear_store(ptr) \
152 smr_init_store(ptr, 0)
153
154/*!
155 * @macro smr_serialized_store_assert()
156 *
157 * @brief
158 * Store @c value to an SMR protected pointer while serialized by an
159 * external mechanism.
160 *
161 * @discussion
162 * Writers that are serialized with mutual exclusion or on a single
163 * thread should use smr_serialized_store() rather than swap.
164 */
165#define smr_serialized_store_assert(ptr, value, held_cond) ({ \
166 assertf(held_cond, "smr_serialized_store: lock not held"); \
167 os_atomic_thread_fence(release); \
168 (ptr)->__smr_ptr = value; \
169})
170
171/*!
172 * @macro smr_serialized_store()
173 *
174 * @brief
175 * Store @c value to an SMR protected pointer while serialized by an
176 * external mechanism.
177 *
178 * @discussion
179 * Writers that are serialized with mutual exclusion or on a single
180 * thread should use smr_serialized_store() rather than swap.
181 */
182#define smr_serialized_store(ptr, value) \
183 smr_serialized_store_assert(ptr, value, true)
184
185/*!
186 * @macro smr_serialized_store_relaxed_assert()
187 *
188 * @brief
189 * Store @c value to an SMR protected pointer while serialized by an
190 * external mechanism.
191 *
192 * @discussion
193 * This function can be used when storing a value that was already
194 * previously stored with smr_serialized_store() (for example during
195 * a linked list removal).
196 */
197#define smr_serialized_store_relaxed_assert(ptr, value, held_cond) ({ \
198 assertf(held_cond, "smr_serialized_store_relaxed: lock not held"); \
199 (ptr)->__smr_ptr = value; \
200})
201
202/*!
203 * @macro smr_serialized_store_relaxed()
204 *
205 * @brief
206 * Store @c value to an SMR protected pointer while serialized by an
207 * external mechanism.
208 *
209 * @discussion
210 * This function can be used when storing a value that was already
211 * previously stored with smr_serialized_store() (for example during
212 * a linked list removal).
213 */
214#define smr_serialized_store_relaxed(ptr, value) \
215 smr_serialized_store_relaxed_assert(ptr, value, true)
216
217/*!
218 * @macro smr_serialized_swap_assert()
219 *
220 * @brief
221 * Swap @c value with an SMR protected pointer and return the old value
222 * while serialized by an external mechanism.
223 *
224 * @discussion
225 * Swap permits multiple writers to update a pointer concurrently.
226 */
227#define smr_serialized_swap_assert(ptr, value, held_cond) ({ \
228 assertf(held_cond, "smr_serialized_store: lock not held"); \
229 os_atomic_xchg(&(ptr)->__smr_ptr, value, release); \
230})
231
232/*!
233 * @macro smr_serialized_swap()
234 *
235 * @brief
236 * Swap @c value with an SMR protected pointer and return the old value
237 * while serialized by an external mechanism.
238 *
239 * @discussion
240 * Swap permits multiple writers to update a pointer concurrently.
241 */
242#define smr_serialized_swap(ptr, value) \
243 smr_serialized_swap_assert(ptr, value, true)
244
245/*!
246 * @macro smr_unserialized_load()
247 *
248 * @brief.
249 * Read from an SMR protected pointer when no serialization is required
250 * such as in the destructor callback or when the caller guarantees other
251 * synchronization.
252 */
253#define smr_unserialized_load(ptr) \
254 ({ (ptr)->__smr_ptr; })
255
256/*!
257 * @macro smr_unserialized_store()
258 *
259 * @brief.
260 * Store to an SMR protected pointer when no serialiation is required
261 * such as in the destructor callback or when the caller guarantees other
262 * synchronization.
263 */
264#define smr_unserialized_store(ptr, value) \
265 ({ (ptr)->__smr_ptr = value; })
266
267
268#pragma mark SMR queues
269
270/*
271 * SMR queues are queues that are meant to be read under SMR critical sections
272 * concurrently with possible updates to the queue.
273 *
274 * /!\ Such read operations CAN ONLY BE PERFORMED IN FORWARD DIRECTION. /!\
275 *
276 * Queues can be either:
277 * - lists where the head is a single pointer,
278 * and insertions can only be at the head;
279 * - tail queues where the head is two pointers,
280 * and insertions can be either at the head or the tail.
281 *
282 * Queue linkages can either be single forward pointer linkages or double
283 * forward/backward linkages. The latter supports O(1) deletion.
284 *
285 *
286 * The entire API surface uses type inference for the implementations,
287 * which allows to relatively easily change between the 4 types of queues
288 * with very minimal API changes (mostly the types of list heads and fields).
289 */
290
291
292/*!
293 * @macro smrq_init
294 *
295 * @brief
296 * Initializes an SMR queue head.
297 */
298#define smrq_init(head) ({ \
299 __auto_type __head = (head); \
300 \
301 smr_init_store(&__head->first, NULL); \
302 if (__smrq_lastp(__head)) { \
303 *__smrq_lastp(__head) = &__head->first; \
304 } \
305})
306
307
308/*!
309 * @macro smrq_empty
310 *
311 * @brief
312 * Returns whether an SMR queue is empty, can be called from any context.
313 */
314#define smrq_empty(head) \
315 (smr_unsafe_load(&(head)->first) == 0)
316
317
318/*!
319 * @macro smrq_entered_first
320 *
321 * @brief
322 * Returns the first element of an SMR queue, while in a read section.
323 */
324#define smrq_entered_first(head, type_t, field) \
325 __container_of_safe(smr_entered_load(&(head)->first), type_t, field)
326
327
328/*!
329 * @macro smrq_entered_next
330 *
331 * @brief
332 * Returns the next element of an SMR queue element, while in a read section.
333 */
334#define smrq_entered_next(elem, field) \
335 __container_of_safe(smr_entered_load(&(elem)->field.next), \
336 typeof(*(elem)), field)
337
338
339/*!
340 * @macro smrq_entered_foreach
341 *
342 * @brief
343 * Enumerates an SMR queue, while in a read section.
344 */
345#define smrq_entered_foreach(it, head, field) \
346 for (__auto_type __it = smr_entered_load(&(head)->first); \
347 ((it) = __container_of_safe(__it, typeof(*(it)), field)); \
348 __it = smr_entered_load(&__it->next))
349
350
351/*!
352 * @macro smrq_serialized_first
353 *
354 * @brief
355 * Returns the first element of an SMR queue, while being serialized
356 * by an external mechanism.
357 */
358#define smrq_serialized_first(head, type_t, link) \
359 __container_of_safe(smr_serialized_load(&(head)->first), type_t, link)
360
361/*!
362 * @macro smrq_serialized_next
363 *
364 * @brief
365 * Returns the next element of an SMR queue element, while being serialized
366 * by an external mechanism.
367 */
368#define smrq_serialized_next(elem, field) \
369 __container_of_safe(smr_serialized_load(&(elem)->field.next), \
370 typeof(*(elem)), field)
371
372/*!
373 * @macro smrq_serialized_foreach
374 *
375 * @brief
376 * Enumerates an SMR queue, while being serialized
377 * by an external mechanism.
378 */
379#define smrq_serialized_foreach(it, head, field) \
380 for (__auto_type __it = smr_serialized_load(&(head)->first); \
381 ((it) = __container_of_safe(__it, typeof(*(it)), field)); \
382 __it = smr_serialized_load(&__it->next))
383
384/*!
385 * @macro smrq_serialized_foreach_safe
386 *
387 * @brief
388 * Enumerates an SMR queue, while being serialized
389 * by an external mechanism.
390 *
391 * @discussion
392 * This variant supports removing the current element from the queue.
393 */
394#define smrq_serialized_foreach_safe(it, head, field) \
395 for (__auto_type __it = smr_serialized_load(&(head)->first), \
396 __next_it = __it; \
397 ((it) = __container_of_safe(__it, typeof(*(it)), field)) && \
398 ((__next_it = smr_serialized_load(&__it->next)), 1); \
399 __it = __next_it)
400
401
402/*!
403 * @macro smrq_serialized_insert_head
404 *
405 * @brief
406 * Inserts an element at the head of an SMR queue, while being serialized
407 * by an external mechanism.
408 */
409#define smrq_serialized_insert_head(head, elem) ({ \
410 __auto_type __head = (head); \
411 \
412 __smrq_serialized_insert(&__head->first, (elem), \
413 smr_serialized_load(&__head->first), __smrq_lastp(__head)); \
414})
415
416
417/*!
418 * @macro smrq_serialized_insert_tail
419 *
420 * @brief
421 * Inserts an element at the tail of an SMR queue, while being serialized
422 * by an external mechanism.
423 */
424#define smrq_serialized_insert_tail(head, elem) ({ \
425 __auto_type __head = (head); \
426 \
427 __smrq_serialized_insert(__head->last, (elem), \
428 NULL, &__head->last); \
429})
430
431
432/*!
433 * @macro smrq_serialized_insert_head_relaxed
434 *
435 * @brief
436 * Inserts an element at the head of an SMR queue, while being serialized
437 * by an external mechanism, without any barrier.
438 */
439#define smrq_serialized_insert_head_relaxed(head, elem) ({ \
440 __auto_type __head = (head); \
441 \
442 __smrq_serialized_insert_relaxed(&__head->first, (elem), \
443 smr_serialized_load(&__head->first), __smrq_lastp(__head)); \
444})
445
446
447/*!
448 * @macro smrq_serialized_insert_tail_relaxed
449 *
450 * @brief
451 * Inserts an element at the tail of an SMR queue, while being serialized
452 * by an external mechanism, without any barrier.
453 */
454#define smrq_serialized_insert_tail_relaxed(head, elem) ({ \
455 __auto_type __head = (head); \
456 \
457 __smrq_serialized_insert_relaxed(__head->last, (elem), \
458 NULL, &__head->last); \
459})
460
461
462/*!
463 * @macro smrq_serialized_remove
464 *
465 * @brief
466 * Removes an element from an SMR queue, while being serialized
467 * by an external mechanism.
468 *
469 * @discussion
470 * The @c head argument is actually unused for the @c smrq_list queue type.
471 * It is still advised to pass it, the compiler should be able to optimize
472 * the code away as computing a list head ought to have no side effects.
473 */
474#define smrq_serialized_remove(head, elem) ({ \
475 __auto_type __head = (head); \
476 \
477 __smrq_serialized_remove(&__head->first, (elem), __smrq_lastp(__head)); \
478})
479
480
481/*!
482 * @macro smrq_serialized_replace
483 *
484 * @brief
485 * Replaces an element on an SMR queue with another at the same spot,
486 * while being serialized by an external mechanism.
487 */
488#define smrq_serialized_replace(head, old_elem, new_elem) ({ \
489 __auto_type __head = (head); \
490 \
491 __smrq_serialized_replace(&__head->first, \
492 (old_elem), (new_elem), __smrq_lastp(__head)); \
493})
494
495
496/*!
497 * @macro smrq_serialized_iter
498 *
499 * @brief
500 * Enumerates an SMR singly linked queue, while being serialized
501 * by an external mechanism.
502 *
503 * @discussion
504 * This is for manual loops that typically perform erasures.
505 *
506 * The body of the loop must move the cursor using (once):
507 * - smrq_serialized_iter_next() to to go the next element,
508 * - smrq_serialized_iter_erase() to erase the current element.
509 *
510 * The iterator variable will _not_ be updated until the next
511 * loop iteration.
512 *
513 * This form is preferred to smrq_serialized_foreach_safe()
514 * for singly linked lists as smrq_serialized_iter_erase()
515 * is O(1) as opposed to smrq_serialized_remove().
516 */
517#define smrq_serialized_iter(it, head, field) \
518 for (__smrq_slink_t *__prev_##it = &(head)->first, \
519 *__chk_##it = __prev_##it; \
520 ((it) = __container_of_safe(smr_serialized_load(__prev_##it), \
521 typeof(*(it)), field)); \
522 assert(__chk_##it), __chk_##it = __prev_##it)
523
524/*!
525 * @macro smrq_serialized_iter_next
526 *
527 * @brief
528 * Goes to the next element inside an smrq_serialied_iter() loop.
529 */
530#define smrq_serialized_iter_next(it, field) ({ \
531 assert(__chk_##it == __prev_##it); \
532 __chk_##it = NULL; \
533 __prev_##it = &(it)->field.next; \
534})
535
536/*!
537 * @macro smrq_serialized_iter_erase
538 *
539 * @brief
540 * Erases the element pointed at by the cursor.
541 */
542#define smrq_serialized_iter_erase(it, field) ({ \
543 assert(__chk_##it == __prev_##it); \
544 __chk_##it = NULL; \
545 __smrq_serialized_remove_one(__prev_##it, &(it)->field, NULL); \
546})
547
548
549/*!
550 * @macro smrq_serialized_append
551 *
552 * @brief
553 * Appends a given list at the end of the previous one.
554 *
555 * @discussion
556 * /!\ WARNING /!\: this doesn't "move" the "source" queue like *_CONCAT
557 * for <sys/queue.h>, as it is useful to merge/split hash queues concurrently
558 * with readers while allowing readers to still read via the "source" queue.
559 *
560 * However, the "source" queue needs to be reset to a valid state
561 * if it is to be used again.
562 */
563#define smrq_serialized_append(dst, src) ({ \
564 __auto_type __src = (src); \
565 __auto_type __dst = (dst); \
566 \
567 __smrq_serialized_append(&__dst->first, __smrq_lastp(__dst), \
568 &__src->first, __smrq_lastp(__src)); \
569})
570
571
572#pragma mark SMR domains
573
574/*!
575 * @enum smr_flags_t
576 *
577 * @brief
578 * Options to pass to smr_domain_create()
579 *
580 * @const SMR_NONE
581 * Default values for the flags.
582 #if XNU_KERNEL_PRIVATE
583 *
584 * @const SMR_SLEEPABLE
585 * Create a sleepable SMR domain.
586 #endif
587 */
588__options_closed_decl(smr_flags_t, unsigned long, {
589 SMR_NONE = 0x00000000,
590#if XNU_KERNEL_PRIVATE
591 SMR_SLEEPABLE = 0x00000001,
592#endif
593});
594
595/*!
596 * @function smr_domain_create()
597 *
598 * @brief
599 * Create an SMR domain.
600 *
601 * @discussion
602 * Be mindful when creating SMR domains, and consider carefully
603 * whether to add one or consolidate an existing one.
604 *
605 *
606 * Memory usage
607 * ~~~~~~~~~~~~
608 *
609 * SMR domains are fairly large structures that scale with the number
610 * of cores of the machine. They are meant to be use in a coarse grained
611 * manner.
612 *
613 * In addition to that, when @c smr_call() is used with the domain,
614 * the queues of callbacks are drained based on memory pressure within
615 * the domain. The more domains, the more dormant memory might exist.
616 *
617 * In general, memory considerations drive toward less domains.
618 *
619 *
620 * Scalability
621 * ~~~~~~~~~~~
622 *
623 * An SMR domain is built on top of an atomic state that is used
624 * to perform grace period detection. The more "write" activity
625 * there is on the domain (@c smr_call(), @c smr_advance(), etc...),
626 * the more this atomic might become contended. In particular,
627 * certain usage patterns might scale extremely well independently,
628 * but cause higher contention when sharing a domain.
629 *
630 * Another thing to consider is that when @c smr_call() is being used,
631 * if the callbacks act on vastly different data structures, then as
632 * the callbacks are being drained, cache misses will be higher.
633 *
634 * However, the more domains are in use, the more probable it is
635 * that using it will cause a cache miss.
636 *
637 * Generally, scalability considerations drive toward balanced
638 * coarse-grained domains.
639 *
640 *
641 * Invariants
642 * ~~~~~~~~~~
643 *
644 * The last aspect leading to the decision of creating versus reusing
645 * an SMR domain is about the invariants that these domains protect.
646 *
647 * Object graphs that are protected with SMR and are used together
648 * in many workloads will likely require to share an SMR domain
649 * in order to provide the required guarantees. Having @c smr_call()
650 * callbacks in a given domain cause downstream @c smr_call() into
651 * another different domain regularly is probably a sign that these
652 * domains should be shared.
653 *
654 * Another aspect to consider is that using @c smr_synchronize()
655 * or @c smr_barrier() can lead to two classes of problems:
656 *
657 * - these operations are extremely heavy, and if some subsystem needs to
658 * perform them on several domains, performance will be disappointing.
659 *
660 * - these operations are akin to taking a "write lock" on the domain,
661 * and as such can cause deadlocks when used improperly.
662 * Using a coarser grained unique domain is a good way to simplify
663 * reasoning about the locking dependencies between SMR domains
664 * and other regular locks.
665 *
666 *
667 * Guidance
668 * ~~~~~~~~
669 *
670 * In general, the entire kernel should have relatively few SMR domains,
671 * at the scale of the big subsystems of the kernel (think: Mach IPC, Mach VM,
672 * VFS, Networking, ...).
673 *
674 * When write operations (@c smr_call(), @c smr_synchronize, ...) are used
675 * rarely, consider using the system wide default domains.
676 */
677extern smr_t smr_domain_create(smr_flags_t flags, const char *name);
678
679/*!
680 * @function smr_domain_free()
681 *
682 * @brief
683 * Destroys an SMR domain previously create with @c smr_domain_create().
684 */
685extern void smr_domain_free(smr_t smr);
686
687
688/*!
689 * @function smr_entered()
690 *
691 * @brief
692 * Returns whether an SMR critical section is entered.
693 */
694extern bool smr_entered(smr_t smr) __result_use_check;
695
696/*!
697 * @function smr_enter()
698 *
699 * @brief
700 * Enter a non preemptible SMR critical section.
701 *
702 * @discussion
703 * Entering an SMR critical section is non reentrant.
704 * (entering it recursively is undefined and will panic on development kernels)
705 *
706 * @c smr_leave() must be called to end this section.
707 *
708 * This function can'be be used in interrupt context.
709 */
710extern void smr_enter(smr_t smr);
711
712/*!
713 * @function smr_leave()
714 *
715 * @brief
716 * Leave a non preemptible SMR critical section.
717 */
718extern void smr_leave(smr_t smr);
719
720
721/*!
722 * @function smr_call()
723 *
724 * @brief
725 * Defer making a call until it is safe to assume all readers
726 * will observe any update prior to this call.
727 *
728 * @discussion
729 * The target SMR domain must NOT be entered when making this call.
730 *
731 * The passed @c size doesn't have to be precise, it should be a rough
732 * estimate of the memory that will be reclaimed when when the call is made.
733 *
734 * This function gives no guarantee of forward progress,
735 * unless the magic SMR_CALL_EXPEDITE size is passed to @c smr_call().
736 *
737 * This function can'be be used in interrupt context.
738 */
739extern void smr_call(smr_t smr, smr_node_t node, vm_size_t size, smr_cb_t cb);
740
741#define SMR_CALL_EXPEDITE ((vm_size_t)~0)
742
743/*!
744 * @function smr_synchronize()
745 *
746 * @brief
747 * Wait until all readers have observed any updates made prior to this call.
748 *
749 * @discussion
750 * The target SMR domain must NOT be entered when making this call.
751 *
752 * This function is quite expensive, and asynchronous deferred processing
753 * using @c smr_call() should be used instead when possible.
754 *
755 * Reserve using this call for events that are extremely rare (like system
756 * configuration events such as configuring networking interfaces, changing
757 * system wide security policies, or loading/unloading a kernel extension).
758 *
759 * This function should typically be called with preemption enabled,
760 * and no locks held.
761 */
762extern void smr_synchronize(smr_t smr);
763
764/*!
765 * @function smr_barrier()
766 *
767 * @brief
768 * Wait until all readers have observed any updates made prior to this call,
769 * and all @c smr_call() callbacks dispatched prior to this call on any core
770 * have completed.
771 *
772 * @discussion
773 * The target SMR domain must NOT be entered when making this call.
774 *
775 * This function is typically used when some data structure is being
776 * accessed by @c smr_call() callbacks and that data structure needs
777 * to be retired.
778 *
779 * Reserve using this call for events that are extremely rare (like system
780 * configuration events such as configuring networking interfaces, changing
781 * system wide security policies, or loading/unloading a kernel extension).
782 *
783 * This function should typically be called with preemption enabled,
784 * and no locks held.
785 */
786extern void smr_barrier(smr_t smr);
787
788
789#ifdef XNU_KERNEL_PRIVATE
790#pragma GCC visibility push(hidden)
791#pragma mark - XNU only
792#pragma mark XNU only: SMR domains advanced
793
794#define SMR_SEQ_INVALID ((smr_seq_t)0)
795#define SMR_SEQ_SLEEPABLE ((smr_seq_t)1) /* only on smr_pcpu::rd_seq */
796#define SMR_SEQ_INIT ((smr_seq_t)2)
797#define SMR_SEQ_INC ((smr_seq_t)4)
798
799typedef long smr_delta_t;
800
801#define SMR_SEQ_DELTA(a, b) ((smr_delta_t)((a) - (b)))
802#define SMR_SEQ_CMP(a, op, b) (SMR_SEQ_DELTA(a, b) op 0)
803
804/*!
805 * @typedef smr_clock_t
806 *
807 * @brief
808 * Represents an SMR domain clock, internal type not manipulated by clients.
809 */
810typedef struct {
811 smr_seq_t s_rd_seq;
812 smr_seq_t s_wr_seq;
813} smr_clock_t;
814
815#define SMR_NAME_MAX 24
816
817/*!
818 * @typedef smr_t
819 *
820 * @brief
821 * Declares an SMR domain of synchronization.
822 */
823struct smr {
824 smr_clock_t smr_clock;
825 struct smr_pcpu *smr_pcpu;
826 unsigned long smr_flags;
827 unsigned long smr_early;
828 char smr_name[SMR_NAME_MAX];
829} __attribute__((aligned(64)));
830
831/*!
832 * @macro SMR_DEFINE_FLAGS
833 *
834 * @brief
835 * Define an SMR domain with specific create flags.
836 */
837#define SMR_DEFINE_FLAGS(var, name, flags) \
838 struct smr var = { \
839 .smr_clock.s_rd_seq = SMR_SEQ_INIT, \
840 .smr_clock.s_wr_seq = SMR_SEQ_INIT, \
841 .smr_flags = (flags), \
842 .smr_name = "" name, \
843 }; \
844 STARTUP_ARG(TUNABLES, STARTUP_RANK_LAST, __smr_domain_init, &(var)); \
845 STARTUP_ARG(ZALLOC, STARTUP_RANK_LAST, __smr_domain_init, &(var))
846
847/*!
848 * @macro SMR_DEFINE
849 *
850 * @brief
851 * Define an SMR domain.
852 */
853#define SMR_DEFINE(var, name) \
854 SMR_DEFINE_FLAGS(var, name, SMR_NONE)
855
856
857/*!
858 * @macro SMR_DEFINE_SLEEPABLE
859 *
860 * @brief
861 * Define a sleepable SMR domain.
862 */
863#define SMR_DEFINE_SLEEPABLE(var, name) \
864 SMR_DEFINE_FLAGS(var, name, SMR_SLEEPABLE)
865
866
867/*!
868 * @function smr_advance()
869 *
870 * @brief
871 * Advance the write sequence and return the value
872 * for use as a wait goal.
873 *
874 * @discussion
875 * This guarantees that any changes made by the calling thread
876 * prior to this call will be visible to all threads after
877 * the read sequence meets or exceeds the return value.
878 */
879extern smr_seq_t smr_advance(smr_t smr) __result_use_check;
880
881/*!
882 * @function smr_deferred_advance()
883 *
884 * @brief
885 * Pretend-advance the write sequence and return the value
886 * for use as a wait goal.
887 *
888 * @discussion
889 * This guarantees that any changes made by the calling thread
890 * prior to this call will be visible to all threads after
891 * the read sequence meets or exceeds the return value.
892 *
893 * Unlike smr_advance(), the global clock isn't really advanced,
894 * it only sets a goal in the future. This can be used to control
895 * the pace of updating the global clock and avoid global atomics.
896 *
897 * In order for the clock to advance, clients of this API must call
898 * @c smr_deferred_advance_commit() with the goal returned by this call.
899 *
900 * Note that calls to @c smr_advance() or @c smr_wait() when passed
901 * the goal returned by this function would also allow the clock
902 * to make progress and are legal (yet less efficient) calls to make.
903 */
904extern smr_seq_t smr_deferred_advance(smr_t smr) __result_use_check;
905
906/*!
907 * @function smr_deferred_advance_commit()
908 *
909 * @brief
910 * Actually advance the write sequence to the goal returned by a previous
911 * call to @c smr_deferred_advance().
912 */
913extern void smr_deferred_advance_commit(smr_t smr, smr_seq_t seq);
914
915
916/*!
917 * @function smr_poll
918 *
919 * @brief
920 * Poll to determine whether all readers have observed the @c goal
921 * write sequence number.
922 *
923 * @discussion
924 * This function is safe to be called from preemption disabled context
925 * and its worst complexity is O(ncpu).
926 *
927 * @returns true if the goal is met and false if not.
928 */
929extern bool smr_poll(smr_t smr, smr_seq_t goal) __result_use_check;
930
931/*!
932 * @function smr_wait
933 *
934 * @brief
935 * Wait until all readers have observed
936 * the @c goal write sequence number.
937 *
938 * @discussion
939 * This function is safe to be called from preemption disabled context
940 * as it never explicitly blocks, however this is not recommended.
941 */
942extern void smr_wait(smr_t smr, smr_seq_t goal);
943
944
945#pragma mark XNU only: major sleepable SMR domains
946/*
947 * Note: this is private for now because sleepable sections that do "bad" things
948 * (such as doing an upcall to userspace, or doing VM allocations) have
949 * the danger that they can stall the reclamation worker threads,
950 * which are a singleton resource.
951 *
952 * Until this can be mitigated or designed better, this stays private.
953 */
954
955/*!
956 * @typedef smr_tracker_t
957 *
958 * @brief
959 * Structure used to track active sleepable SMR sections.
960 *
961 * @field smrt_domain the entered SMR domain
962 * @field smrt_seq the SMR sequence at the time of smr_enter_sleepable().
963 * @field smrt_link linkage used to track stalled sections.
964 * @field smrt_stack linkage used to track entered sections.
965 * @field smrt_ctid (if stalled) the ctid of the thread in this section.
966 * @field smrt_cpu (if stalled) the cpu the thread was on when stalled.
967 */
968typedef struct smr_tracker {
969 smr_t smrt_domain;
970 smr_seq_t smrt_seq;
971 struct smrq_link smrt_link;
972 struct smrq_slink smrt_stack;
973 uint32_t smrt_ctid;
974 int smrt_cpu;
975} *smr_tracker_t;
976
977/*!
978 * @function smr_enter_sleepable()
979 *
980 * @brief
981 * Enter a sleepable SMR critical section.
982 *
983 * @discussion
984 * Entering an SMR critical section is non recursive
985 * (entering it recursively is undefined and will panic on development kernels)
986 *
987 * @c smr_leave_sleepable() must be called to end this section,
988 * passing the same tracker pointer.
989 *
990 * The SMR domain must have been created with the @c SMR_SLEEPABLE flag.
991 *
992 * It is permitted to do operations that might block under such a transaction,
993 * such as acquiring a lock, or freeing memory.
994 *
995 * It is forbidden to perform operations that wait for an unbounded amount of
996 * time such as waiting for networking packets or even a hardware driver event,
997 * as these could cause grace periods (and memory reclamation) to stall for
998 * a very long time.
999 */
1000extern void smr_enter_sleepable(smr_t smr, smr_tracker_t tracker);
1001
1002/*!
1003 * @function smr_leave_sleepable()
1004 *
1005 * @brief
1006 * Leave a sleepable SMR critical section entered with @c smr_enter_sleepable().
1007 */
1008extern void smr_leave_sleepable(smr_t smr, smr_tracker_t tracker);
1009
1010
1011#pragma mark XNU only: major subsystems SMR domains
1012
1013/*!
1014 * @brief
1015 * A global system wide non preemptible domain.
1016 *
1017 * @discussion
1018 * This is provided as a fallback for when a specific SMR domain
1019 * would be overkill.
1020 *
1021 * Try not use the @c smr_system name directly, instead define
1022 * a subsystem domain that happens to be defined to it, so that
1023 * understanding the invariants being provided is easier.
1024 */
1025extern struct smr smr_system;
1026
1027/*!
1028 * @brief
1029 * A global system wide sleepable domain.
1030 *
1031 * @discussion
1032 * This is provided as a fallback for when a specific SMR domain
1033 * would be overkill.
1034 *
1035 * Try not use the @c smr_system_sleepable name directly,
1036 * instead define a subsystem domain that happens to be defined to it,
1037 * so that understanding the invariants being provided is easier.
1038 */
1039extern struct smr smr_system_sleepable;
1040
1041
1042/*!
1043 * @macro smr_ipc
1044 *
1045 * @brief
1046 * The SMR domain for the Mach IPC subsystem.
1047 */
1048#define smr_ipc smr_system
1049#define smr_ipc_entered() smr_entered(&smr_ipc)
1050#define smr_ipc_enter() smr_enter(&smr_ipc)
1051#define smr_ipc_leave() smr_leave(&smr_ipc)
1052
1053#define smr_ipc_call(n, sz, cb) smr_call(&smr_ipc, n, sz, cb)
1054#define smr_ipc_synchronize() smr_synchronize(&smr_ipc)
1055#define smr_ipc_barrier() smr_barrier(&smr_ipc)
1056
1057
1058/*!
1059 * @macro smr_proc_task
1060 *
1061 * @brief
1062 * The SMR domain for the proc/task and adjacent objects.
1063 */
1064#define smr_proc_task smr_system
1065#define smr_proc_task_entered() smr_entered(&smr_proc_task)
1066#define smr_proc_task_enter() smr_enter(&smr_proc_task)
1067#define smr_proc_task_leave() smr_leave(&smr_proc_task)
1068
1069#define smr_proc_task_call(n, sz, cb) smr_call(&smr_proc_task, n, sz, cb)
1070#define smr_proc_task_synchronize() smr_synchronize(&smr_proc_task)
1071#define smr_proc_task_barrier() smr_barrier(&smr_proc_task)
1072
1073
1074/*!
1075 * @macro smr_iokit
1076 *
1077 * @brief
1078 * The SMR domain for IOKit
1079 */
1080#define smr_iokit smr_system
1081#define smr_iokit_entered() smr_entered(&smr_iokit)
1082#define smr_iokit_enter() smr_enter(&smr_iokit)
1083#define smr_iokit_leave() smr_leave(&smr_iokit)
1084
1085#define smr_iokit_call(n, sz, cb) smr_call(&smr_iokit, n, sz, cb)
1086#define smr_iokit_synchronize() smr_synchronize(&smr_iokit)
1087#define smr_iokit_barrier() smr_barrier(&smr_iokit)
1088
1089
1090/*!
1091 * @macro smr_oslog
1092 *
1093 * @brief
1094 * The SMR domain for kernel OSLog handles.
1095 */
1096#define smr_oslog smr_system
1097#define smr_oslog_entered() smr_entered(&smr_oslog)
1098#define smr_oslog_enter() smr_enter(&smr_oslog)
1099#define smr_oslog_leave() smr_leave(&smr_oslog)
1100
1101#define smr_oslog_call(n, sz, cb) smr_call(&smr_oslog, n, sz, cb)
1102#define smr_oslog_synchronize() smr_synchronize(&smr_oslog)
1103#define smr_oslog_barrier() smr_barrier(&smr_oslog)
1104
1105
1106#pragma mark XNU only: implementation details
1107
1108extern void __smr_domain_init(smr_t);
1109
1110#ifdef MACH_KERNEL_PRIVATE
1111struct processor;
1112
1113extern bool smr_entered_cpu_noblock(smr_t smr, int cpu) __result_use_check;
1114
1115extern void smr_ack_ipi(void);
1116
1117extern void smr_mark_active_trackers_stalled(struct thread *self);
1118
1119__options_closed_decl(smr_cpu_reason_t, uint8_t, {
1120 SMR_CPU_REASON_NONE = 0x00,
1121 SMR_CPU_REASON_OFFLINE = 0x01,
1122 SMR_CPU_REASON_IGNORED = 0x02,
1123 SMR_CPU_REASON_ALL = 0x03,
1124});
1125
1126extern void smr_cpu_init(struct processor *);
1127extern void smr_cpu_up(struct processor *, smr_cpu_reason_t);
1128extern void smr_cpu_down(struct processor *, smr_cpu_reason_t);
1129
1130extern void smr_cpu_join(struct processor *, uint64_t ctime);
1131extern void smr_cpu_tick(uint64_t ctime, bool safe_point);
1132extern void smr_cpu_leave(struct processor *, uint64_t ctime);
1133
1134extern void smr_maintenance(uint64_t ctime);
1135
1136#if CONFIG_QUIESCE_COUNTER
1137extern void cpu_quiescent_set_storage(uint64_t _Atomic *ptr);
1138#endif
1139#endif /* MACH_KERNEL_PRIVATE */
1140
1141extern uint32_t smr_cpu_checkin_get_min_interval_us(void);
1142
1143extern uint32_t smr_cpu_checkin_get_min_interval_us(void);
1144
1145extern void smr_cpu_checkin_set_min_interval_us(uint32_t new_value);
1146
1147#pragma GCC visibility pop
1148#endif /* XNU_KERNEL_PRIVATE */
1149#pragma mark - implementation details
1150#pragma mark implementation details: SMR queues
1151
1152extern void __smr_linkage_invalid(__smrq_link_t *link) __abortlike;
1153extern void __smr_stail_invalid(__smrq_slink_t *link, __smrq_slink_t *last) __abortlike;
1154extern void __smr_tail_invalid(__smrq_link_t *link, __smrq_link_t *last) __abortlike;
1155
1156__attribute__((always_inline, overloadable))
1157static inline __smrq_slink_t **
1158__smrq_lastp(struct smrq_slist_head *head __unused)
1159{
1160 return NULL;
1161}
1162
1163__attribute__((always_inline, overloadable))
1164static inline __smrq_link_t **
1165__smrq_lastp(struct smrq_list_head *head __unused)
1166{
1167 return NULL;
1168}
1169
1170__attribute__((always_inline, overloadable))
1171static inline __smrq_slink_t **
1172__smrq_lastp(struct smrq_stailq_head *head)
1173{
1174 __smrq_slink_t **last = &head->last;
1175
1176 __builtin_assume(last != NULL);
1177 return last;
1178}
1179
1180__attribute__((always_inline, overloadable))
1181static inline __smrq_link_t **
1182__smrq_lastp(struct smrq_tailq_head *head)
1183{
1184 __smrq_link_t **last = &head->last;
1185
1186 __builtin_assume(last != NULL);
1187 return last;
1188}
1189
1190
1191__attribute__((always_inline, overloadable))
1192static inline void
1193__smrq_serialized_insert(
1194 __smrq_slink_t *prev,
1195 struct smrq_slink *elem,
1196 struct smrq_slink *next,
1197 __smrq_slink_t **lastp)
1198{
1199 if (next == NULL && lastp) {
1200 if (*lastp != prev || smr_serialized_load(prev)) {
1201 __smr_stail_invalid(link: prev, last: *lastp);
1202 }
1203 }
1204
1205 smr_serialized_store_relaxed(&elem->next, next);
1206 smr_serialized_store(prev, elem);
1207 if (next == NULL && lastp) {
1208 *lastp = &elem->next;
1209 }
1210}
1211
1212__attribute__((always_inline, overloadable))
1213static inline void
1214__smrq_serialized_insert(
1215 __smrq_link_t *prev,
1216 struct smrq_link *elem,
1217 struct smrq_link *next,
1218 __smrq_link_t **lastp)
1219{
1220 if (next != NULL && next->prev != prev) {
1221 __smr_linkage_invalid(link: prev);
1222 }
1223 if (next == NULL && lastp) {
1224 if (*lastp != prev || smr_serialized_load(prev)) {
1225 __smr_tail_invalid(link: prev, last: *lastp);
1226 }
1227 }
1228
1229 smr_serialized_store_relaxed(&elem->next, next);
1230 elem->prev = prev;
1231 smr_serialized_store(prev, elem);
1232
1233 if (next != NULL) {
1234 next->prev = &elem->next;
1235 } else if (lastp) {
1236 *lastp = &elem->next;
1237 }
1238}
1239
1240
1241__attribute__((always_inline, overloadable))
1242static inline void
1243__smrq_serialized_insert_relaxed(
1244 __smrq_slink_t *prev,
1245 struct smrq_slink *elem,
1246 struct smrq_slink *next,
1247 __smrq_slink_t **lastp)
1248{
1249 if (next == NULL && lastp) {
1250 if (*lastp != prev || smr_serialized_load(prev)) {
1251 __smr_stail_invalid(link: prev, last: *lastp);
1252 }
1253 }
1254
1255 smr_serialized_store_relaxed(&elem->next, next);
1256 smr_serialized_store_relaxed(prev, elem);
1257 if (next == NULL && lastp) {
1258 *lastp = &elem->next;
1259 }
1260}
1261
1262__attribute__((always_inline, overloadable))
1263static inline void
1264__smrq_serialized_insert_relaxed(
1265 __smrq_link_t *prev,
1266 struct smrq_link *elem,
1267 struct smrq_link *next,
1268 __smrq_link_t **lastp)
1269{
1270 if (next != NULL && next->prev != prev) {
1271 __smr_linkage_invalid(link: prev);
1272 }
1273 if (next == NULL && lastp) {
1274 if (*lastp != prev || smr_serialized_load(prev)) {
1275 __smr_tail_invalid(link: prev, last: *lastp);
1276 }
1277 }
1278
1279 smr_serialized_store_relaxed(&elem->next, next);
1280 elem->prev = prev;
1281 smr_serialized_store_relaxed(prev, elem);
1282
1283 if (next != NULL) {
1284 next->prev = &elem->next;
1285 } else if (lastp) {
1286 *lastp = &elem->next;
1287 }
1288}
1289
1290
1291__attribute__((always_inline, overloadable))
1292static inline void
1293__smrq_serialized_remove_one(
1294 __smrq_slink_t *prev,
1295 struct smrq_slink *elem,
1296 __smrq_slink_t **lastp)
1297{
1298 struct smrq_slink *next;
1299
1300 /*
1301 * Removal "skips" a link this way:
1302 *
1303 * e1 ---> e2 ---> e3 becomes e1 -----------> e3
1304 *
1305 * When e3 was inserted, a release barrier was issued
1306 * by smr_serialized_store(). We do not need to issue
1307 * a release barrier upon removal because `next` carries
1308 * a dependency on that smr_serialized_store()d value.
1309 */
1310 next = smr_serialized_load(&elem->next);
1311 smr_serialized_store_relaxed(prev, next);
1312 if (next == NULL && lastp) {
1313 *lastp = prev;
1314 }
1315}
1316
1317__attribute__((always_inline, overloadable))
1318static inline void
1319__smrq_serialized_remove_one(
1320 __smrq_link_t *prev,
1321 struct smrq_link *elem,
1322 __smrq_link_t **lastp)
1323{
1324 struct smrq_link *next;
1325
1326 next = smr_serialized_load(&elem->next);
1327
1328 if (smr_serialized_load(prev) != elem) {
1329 __smr_linkage_invalid(link: prev);
1330 }
1331 if (next && next->prev != &elem->next) {
1332 __smr_linkage_invalid(link: &elem->next);
1333 }
1334
1335 /*
1336 * Removal "skips" a link this way:
1337 *
1338 * e1 ---> e2 ---> e3 becomes e1 -----------> e3
1339 *
1340 * When e3 was inserted, a release barrier was issued
1341 * by smr_serialized_store(). We do not need to issue
1342 * a release barrier upon removal because `next` carries
1343 * a dependency on that smr_serialized_store()d value.
1344 */
1345 smr_serialized_store_relaxed(prev, next);
1346
1347 if (next != NULL) {
1348 next->prev = prev;
1349 } else if (lastp) {
1350 *lastp = prev;
1351 }
1352 elem->prev = NULL;
1353}
1354
1355
1356__attribute__((always_inline, overloadable))
1357static inline void
1358__smrq_serialized_remove(
1359 __smrq_slink_t *first,
1360 struct smrq_slink *elem,
1361 __smrq_slink_t **lastp)
1362{
1363 __smrq_slink_t *prev = first;
1364 struct smrq_slink *cur;
1365
1366 while ((cur = smr_serialized_load(prev)) != elem) {
1367 prev = &cur->next;
1368 }
1369
1370 __smrq_serialized_remove_one(prev, elem, lastp);
1371}
1372
1373__attribute__((always_inline, overloadable))
1374static inline void
1375__smrq_serialized_remove(
1376 __smrq_link_t *first __unused,
1377 struct smrq_link *elem,
1378 __smrq_link_t **lastp)
1379{
1380 __smrq_serialized_remove_one(prev: elem->prev, elem, lastp);
1381}
1382
1383
1384__attribute__((always_inline, overloadable))
1385static inline void
1386__smrq_serialized_replace(
1387 __smrq_slink_t *first,
1388 struct smrq_slink *old_elem,
1389 struct smrq_slink *new_elem,
1390 __smrq_slink_t **lastp)
1391{
1392 __smrq_slink_t *prev = first;
1393 struct smrq_slink *cur;
1394 struct smrq_slink *next;
1395
1396 while ((cur = smr_serialized_load(prev)) != old_elem) {
1397 prev = &cur->next;
1398 }
1399
1400 next = smr_serialized_load(&old_elem->next);
1401 smr_serialized_store_relaxed(&new_elem->next, next);
1402 smr_serialized_store(prev, new_elem);
1403
1404 if (next == NULL && lastp) {
1405 *lastp = &new_elem->next;
1406 }
1407}
1408
1409__attribute__((always_inline, overloadable))
1410static inline void
1411__smrq_serialized_replace(
1412 __smrq_link_t *first __unused,
1413 struct smrq_link *old_elem,
1414 struct smrq_link *new_elem,
1415 __smrq_link_t **lastp)
1416{
1417 __smrq_link_t *prev;
1418 struct smrq_link *next;
1419
1420 prev = old_elem->prev;
1421 next = smr_serialized_load(&old_elem->next);
1422
1423 if (smr_serialized_load(prev) != old_elem) {
1424 __smr_linkage_invalid(link: prev);
1425 }
1426 if (next && next->prev != &old_elem->next) {
1427 __smr_linkage_invalid(link: &old_elem->next);
1428 }
1429
1430 smr_serialized_store_relaxed(&new_elem->next, next);
1431 new_elem->prev = prev;
1432 smr_serialized_store(prev, new_elem);
1433
1434 if (next != NULL) {
1435 next->prev = &new_elem->next;
1436 } else if (lastp) {
1437 *lastp = &new_elem->next;
1438 }
1439 old_elem->prev = NULL;
1440}
1441
1442__attribute__((always_inline, overloadable))
1443static inline void
1444__smrq_serialized_append(
1445 __smrq_slink_t *dst_first,
1446 __smrq_slink_t **dst_lastp,
1447 __smrq_slink_t *src_first,
1448 __smrq_slink_t **src_lastp)
1449{
1450 struct smrq_slink *src = smr_serialized_load(src_first);
1451 struct smrq_slink *dst;
1452
1453 if (dst_lastp) {
1454 if (src) {
1455 smr_serialized_store_relaxed(*dst_lastp, src);
1456 *dst_lastp = *src_lastp;
1457 }
1458 } else {
1459 while ((dst = smr_serialized_load(dst_first))) {
1460 dst_first = &dst->next;
1461 }
1462 smr_serialized_store_relaxed(dst_first, src);
1463 }
1464}
1465
1466__attribute__((always_inline, overloadable))
1467static inline void
1468__smrq_serialized_append(
1469 __smrq_link_t *dst_first,
1470 __smrq_link_t **dst_lastp,
1471 __smrq_link_t *src_first,
1472 __smrq_link_t **src_lastp)
1473{
1474 struct smrq_link *src = smr_serialized_load(src_first);
1475 struct smrq_link *dst;
1476
1477 if (dst_lastp) {
1478 if (src) {
1479 smr_serialized_store_relaxed(*dst_lastp, src);
1480 src->prev = *dst_lastp;
1481 *dst_lastp = *src_lastp;
1482 }
1483 } else {
1484 while ((dst = smr_serialized_load(dst_first))) {
1485 dst_first = &dst->next;
1486 }
1487 smr_serialized_store_relaxed(dst_first, src);
1488 src->prev = &dst->next;
1489 }
1490}
1491
1492__END_DECLS
1493
1494#endif /* _KERN_SMR_H_ */
1495