1/*
2 * Copyright (c) 2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#ifndef _KERN_MPSC_QUEUE_H_
30#define _KERN_MPSC_QUEUE_H_
31
32#ifdef XNU_KERNEL_PRIVATE
33
34#include <machine/atomic.h>
35#include <kern/macro_help.h>
36#include <kern/thread_call.h>
37
38#endif // XNU_KERNEL_PRIVATE
39
40#include <sys/cdefs.h>
41
42__BEGIN_DECLS __ASSUME_PTR_ABI_SINGLE_BEGIN
43
44/*!
45 * @typedef struct mpsc_queue_chain
46 *
47 * @brief
48 * Type for the intrusive linkage used by MPSC queues.
49 */
50typedef struct mpsc_queue_chain {
51#if __has_ptrcheck // work around 78354145
52 struct mpsc_queue_chain *volatile mpqc_next;
53#else
54 struct mpsc_queue_chain *_Atomic mpqc_next;
55#endif
56} *mpsc_queue_chain_t;
57
58/*!
59 * @typedef struct mpsc_queue_head
60 *
61 * @brief
62 * The type for a multi-producer single-consumer queue.
63 *
64 * @discussion
65 * MPSC queues allow for producers to not be affected by other producers or the
66 * consumer. Which means in turn that having producers in interrupt context
67 * does not require that other producers disable interrupts like a traditional
68 * spinlock based approach would require.
69 *
70 * These queues shine when data is produced from the entire system and is
71 * consumed from a single serial context (logging, tracing, ...).
72 * mpsc_daemon_queue_t is provided as a fully ready/easy-to-use pre-packaged
73 * solution for these common use cases.
74 *
75 * - mpsc_queue_append() can be used to append a single item
76 * - mpsc_queue_append_list() can be used to append a batch of items at once.
77 *
78 * Functions for the consumer side assume proper serialization that is not
79 * provided by the MPSC queue itself. Dequeuing doesn't require preemption
80 * to be disabled.
81 *
82 * <h2>Algorithm</h2>
83 *
84 * The base of the enqueue algorithm is a single atomic exchange (first half,
85 * called __mpsc_queue_append_update_tail) and a list fixup (2nd half, called
86 * __mpsc_queue_append_update_prev).
87 *
88 * Graphically, enqueuing `X` looks like this, with each step being done
89 * atomically (for the empty queue case, `tail` points to `head`):
90 *
91 * | orig state | update_tail | update_prev |
92 * +---------------------+---------------------+---------------------+
93 * | | | |
94 * | head -> e1 -> e2 -. | head -> e1 -> e2 -. | head -> e1 -> e2 -. |
95 * | | | | | | |
96 * | ,- ... <--' | ,- ... <--' | ,- ... <--' |
97 * | | | | | | |
98 * | v | v | v |
99 * | tail -> eN -> NULL | tail eN -> NULL | tail eN |
100 * | | | | | | |
101 * | | | | | v |
102 * | X -> NULL | `---> X -> NULL | '---> X -> NULL |
103 * | | | |
104 * +---------------------+---------------------+---------------------+
105 *
106 *
107 * There is a small 1-instruction gap of inconsistency which makes the chosen
108 * algorithm non linearizable, and requires enqueuers to disable preemption
109 * during the enqueue so as not to starve the consumer forever.
110 *
111 * As far as memory visibility is concerned, enqueuing uses a release fence in
112 * update_tail which pairs with memory fences in mpsc_queue_dequeue_batch().
113 *
114 * Note: as far as the data structure in memory, its layout is equivalent to
115 * a BSD <sys/queue.h> STAILQ. However because of this inconsistency
116 * window and memory ordering concerns, it is incorrect to use STAILQ
117 * macros on an MPSC queue.
118 */
119typedef struct mpsc_queue_head {
120 struct mpsc_queue_chain mpqh_head;
121#if __has_ptrcheck // work around 78354145
122 struct mpsc_queue_chain *volatile mpqh_tail;
123#else
124 struct mpsc_queue_chain *_Atomic mpqh_tail;
125#endif
126} *mpsc_queue_head_t;
127
128/*!
129 * @macro MPSC_QUEUE_INITIALIZER
130 *
131 * @brief
132 * Macro to use in static initializers for mpsc queues.
133 *
134 * @param head
135 * The name of the variable to initialize.
136 */
137#define MPSC_QUEUE_INITIALIZER(head) { .mpqh_tail = &(head).mpqh_head }
138
139#ifdef XNU_KERNEL_PRIVATE
140
141/*!
142 * @function mpsc_queue_init
143 *
144 * @brief
145 * Dynamically initialize an mpsc queue.
146 *
147 * @discussion
148 * This initialization assumes that the object holding the queue head
149 * is initialized before it can be made visible to other threads/cores.
150 *
151 * @param q
152 * The queue to initialize.
153 */
154static inline void
155mpsc_queue_init(mpsc_queue_head_t q)
156{
157 os_atomic_init(&q->mpqh_head.mpqc_next, NULL);
158 os_atomic_init(&q->mpqh_tail, &q->mpqh_head);
159}
160
161/*!
162 * @typedef enum mpsc_queue_options
163 */
164typedef enum mpsc_queue_options {
165 MPSC_QUEUE_NONE = 0,
166 MPSC_QUEUE_DISABLE_PREEMPTION = 1 << 0,
167} mpsc_queue_options_t;
168
169/*!
170 * @const MPSC_QUEUE_NOTQUEUED_MARKER
171 *
172 * @brief
173 * Magical marker that implementations can use to poison the chain pointer of
174 * elements not on any MPSC queue.
175 */
176#define MPSC_QUEUE_NOTQUEUED_MARKER ((mpsc_queue_chain_t)~0ul)
177
178/*!
179 * @macro mpsc_queue_element
180 *
181 * @brief
182 * Macro to find the pointer of an element back from its MPSC chain linkage.
183 */
184#define mpsc_queue_element(ptr, type, field) __container_of(ptr, type, field)
185
186
187#pragma mark Advanced Multi Producer calls
188
189/**
190 * @function __mpsc_queue_append_update_tail
191 *
192 * @brief
193 * First half of the enqueue operation onto a multi-producer single-consumer
194 * queue.
195 *
196 * @discussion
197 * This function is available for algorithms that need to do things (such as
198 * taking a refcount) before calling __mpsc_queue_append_update_prev().
199 *
200 * Preemption should be disabled before calling
201 * __mpsc_queue_append_update_tail(), and until
202 * __mpsc_queue_append_update_prev() has returned.
203 *
204 * @param q
205 * The queue to update.
206 *
207 * @param elm
208 * The element to append to `q`.
209 *
210 * @returns
211 * A token to later pass to __mpsc_queue_append_update_prev()
212 * to complete the enqueue.
213 */
214static inline mpsc_queue_chain_t
215__mpsc_queue_append_update_tail(mpsc_queue_head_t q, mpsc_queue_chain_t elm)
216{
217 os_atomic_store(&elm->mpqc_next, (struct mpsc_queue_chain *__single)NULL, relaxed);
218 return os_atomic_xchg(&q->mpqh_tail, elm, release);
219}
220
221/**
222 * @function __mpsc_queue_append_was_empty
223 *
224 * @brief
225 * Tests whether the queue was empty at the time
226 * __mpsc_queue_append_update_tail() was called.
227 *
228 * @param q
229 * The queue to test emptiness for.
230 *
231 * @param prev
232 * The token returned by __mpsc_queue_append_update_tail().
233 *
234 * @returns
235 * Whether the queue was empty (true) or not (false).
236 */
237static inline bool
238__mpsc_queue_append_was_empty(mpsc_queue_head_t q, mpsc_queue_chain_t prev)
239{
240 return &q->mpqh_head == prev;
241}
242
243/**
244 * @function __mpsc_queue_append_update_prev
245 *
246 * @brief
247 * Second half of the enqueue operation onto a multi-producer single-consumer
248 * queue.
249 *
250 * @discussion
251 * This function is available for algorithms that need to do things (such as
252 * taking a refcount) before calling __mpsc_queue_append_update_prev().
253 *
254 * Preemption should be disabled before calling
255 * __mpsc_queue_append_update_tail(), and until
256 * __mpsc_queue_append_update_prev() has returned.
257 *
258 * @param prev
259 * The token returned by __mpsc_queue_append_update_tail().
260 *
261 * @param elm
262 * The element to append to the queue.
263 */
264static inline void
265__mpsc_queue_append_update_prev(mpsc_queue_chain_t prev, mpsc_queue_chain_t elm)
266{
267 os_atomic_store(&prev->mpqc_next, elm, relaxed);
268}
269
270
271#pragma mark Multi Producer calls
272
273/**
274 * @function mpsc_queue_append_list
275 *
276 * @brief
277 * Enqueues a list of elements onto a queue.
278 *
279 * @discussion
280 * This enqueues a list that has to be fully formed from `first` to `last`
281 * at the end of `q`.
282 *
283 * Preemption should be disabled when calling mpsc_queue_append_list().
284 *
285 * @param q
286 * The queue to update.
287 *
288 * @param first
289 * The first of the list elements being appended.
290 *
291 * @param last
292 * The last of the list elements being appended.
293 */
294static inline bool
295mpsc_queue_append_list(mpsc_queue_head_t q, mpsc_queue_chain_t first,
296 mpsc_queue_chain_t last)
297{
298 mpsc_queue_chain_t prev = __mpsc_queue_append_update_tail(q, elm: last);
299 __mpsc_queue_append_update_prev(prev, elm: first);
300 return __mpsc_queue_append_was_empty(q, prev);
301}
302
303/**
304 * @function mpsc_queue_append
305 *
306 * @brief
307 * Enqueues an element onto a queue.
308 *
309 * @discussion
310 * Preemption should be disabled when calling mpsc_queue_append().
311 *
312 * @param q the queue to update
313 * @param elm the element to append
314 */
315static inline bool
316mpsc_queue_append(mpsc_queue_head_t q, mpsc_queue_chain_t elm)
317{
318 return mpsc_queue_append_list(q, first: elm, last: elm);
319}
320
321
322#pragma mark Single Consumer calls
323
324/**
325 * @function mpsc_queue_dequeue_batch()
326 *
327 * @brief
328 * Atomically empty a queue at once and return the batch head and tail.
329 *
330 * @discussion
331 * Consumer function, must be called in a serialized way with respect to any
332 * other consumer function.
333 *
334 * @param q
335 * The queue
336 *
337 * @param tail
338 * An out pointer filled with the last element captured.
339 *
340 * @param dependency
341 * A dependency token (to rely on consume / hardware dependencies)
342 * When not trying to take advantage of hardware dependencies, just pass NULL.
343 *
344 * @returns
345 * The first element of the batch if any, or NULL the queue was empty.
346 */
347mpsc_queue_chain_t
348mpsc_queue_dequeue_batch(mpsc_queue_head_t q, mpsc_queue_chain_t *tail,
349 os_atomic_dependency_t dependency);
350
351/**
352 * @function mpsc_queue_batch_next()
353 *
354 * @brief
355 * Function used to consume an element from a batch dequeued with
356 * mpsc_queue_dequeue_batch().
357 *
358 * @discussion
359 * Once a batch has been dequeued, there is no need to hold the consumer lock
360 * anymore to consume it.
361 *
362 * mpsc_queue_batch_foreach_safe() is the preferred interface to consume
363 * the whole batch.
364 *
365 * @param cur
366 * The current inspected element of the batch (must be the batch head or
367 * a value returned by mpsc_queue_batch_next()).
368 *
369 * @param tail
370 * The last element of the batch.
371 *
372 * @returns
373 * The next element if any.
374 */
375mpsc_queue_chain_t
376mpsc_queue_batch_next(mpsc_queue_chain_t cur, mpsc_queue_chain_t tail);
377
378/**
379 * @macro mpsc_queue_batch_foreach_safe
380 *
381 * @brief
382 * Macro used to enumerate a batch dequeued with mpsc_queue_dequeue_batch().
383 *
384 * @param item
385 * The item being currently visited.
386 *
387 * @param head
388 * The first element of the batch.
389 *
390 * @param tail
391 * The last element of the batch.
392 */
393#define mpsc_queue_batch_foreach_safe(item, head, tail) \
394 for (mpsc_queue_chain_t __tmp, __item = (head), __tail = (tail); \
395 __tmp = mpsc_queue_batch_next(__item, __tail), (item) = __item; \
396 __item = __tmp)
397
398/**
399 * @function mpsc_queue_restore_batch()
400 *
401 * @brief
402 * "Restore"s a batch at the head of the queue.
403 *
404 * @discussion
405 * Consumer function, must be called in a serialized way with respect to any
406 * other consumer function.
407 *
408 * @param q
409 * The queue
410 *
411 * @param first
412 * The first element to put back.
413 *
414 * @param last
415 * The last element to put back.
416 * It is the responsibility of the caller to ensure the linkages from first to
417 * last are properly set up before calling this function.
418 */
419void
420mpsc_queue_restore_batch(mpsc_queue_head_t q, mpsc_queue_chain_t first,
421 mpsc_queue_chain_t last);
422
423
424#pragma mark "GCD"-like facilities
425
426/*!
427 * @typedef enum mpsc_daemon_init_options
428 *
429 * @const MPSC_DAEMON_INIT_NONE
430 * Default options (no specific behavior)
431 *
432 * @const MPSC_DAEMON_INIT_INACTIVE
433 * Create the queue inactive, which requires an explicit call
434 * to @c mpsc_daemon_queue_activate() to start draining.
435 *
436 */
437__options_decl(mpsc_daemon_init_options_t, uint32_t, {
438 MPSC_DAEMON_INIT_NONE = 0,
439 MPSC_DAEMON_INIT_INACTIVE = 1 << 0,
440});
441
442/*!
443 * @typedef struct mpsc_daemon_queue
444 *
445 * @brief
446 * Daemon queues are a ready-to use packaging of the low level MPSC queue
447 * primitive.
448 *
449 * @discussion
450 * mpsc_queue_t requires handling of state transitions of the queue and
451 * dequeuing yourself, which is a non trivial task.
452 *
453 * Daemon queues are a simple packaged solution that allows for mpsc_queue_t to
454 * form hierarchies (mostly for layering purposes), and be serviced at the
455 * bottom of such a hierarchy by a thread or a thread call.
456 *
457 * Daemon queues assume homogenous items, and are setup with an `invoke`
458 * callback that is called in the dequeuer on every item as they are dequeued.
459 */
460typedef struct mpsc_daemon_queue *mpsc_daemon_queue_t;
461
462#define MPSC_QUEUE_BATCH_END ((mpsc_queue_chain_t)~0ul)
463
464/*!
465 * @typedef struct mpsc_daemon_queue
466 *
467 * @brief
468 * The type for MPSC Daemon Queues invoke callbacks.
469 */
470typedef void (*mpsc_daemon_invoke_fn_t)(mpsc_queue_chain_t elm,
471 mpsc_daemon_queue_t dq);
472
473/*!
474 * @enum mpsc_daemon_queue_kind
475 *
476 * @brief
477 * Internal type, not to be used by clients.
478 */
479__enum_decl(mpsc_daemon_queue_kind_t, uint16_t, {
480 MPSC_QUEUE_KIND_UNKNOWN,
481 MPSC_QUEUE_KIND_NESTED,
482 MPSC_QUEUE_KIND_THREAD,
483 MPSC_QUEUE_KIND_THREAD_CRITICAL,
484 MPSC_QUEUE_KIND_THREAD_CALL,
485});
486
487/*!
488 * @enum mpsc_daemon_queue_options
489 *
490 * @brief
491 * Options clients can set on their queue before first use.
492 *
493 * @const MPSC_QUEUE_OPTION_BATCH
494 * Call the `invoke` callback at the end of a batch
495 * with the magic @c MPSC_QUEUE_BATCH_END marker.
496 */
497__options_decl(mpsc_daemon_queue_options_t, uint16_t, {
498 MPSC_QUEUE_OPTION_BATCH = 0x0001,
499});
500
501/*!
502 * @enum mpsc_daemon_queue_state
503 *
504 * @brief
505 * Internal type, not to be used by clients.
506 */
507__options_decl(mpsc_daemon_queue_state_t, uint32_t, {
508 MPSC_QUEUE_STATE_DRAINING = 0x0001,
509 MPSC_QUEUE_STATE_WAKEUP = 0x0002,
510 MPSC_QUEUE_STATE_CANCELED = 0x0004,
511 MPSC_QUEUE_STATE_INACTIVE = 0x0008,
512});
513
514struct mpsc_daemon_queue {
515 mpsc_daemon_queue_kind_t mpd_kind;
516 mpsc_daemon_queue_options_t mpd_options;
517 mpsc_daemon_queue_state_t _Atomic mpd_state;
518 mpsc_daemon_invoke_fn_t mpd_invoke;
519 union {
520 mpsc_daemon_queue_t mpd_target;
521 struct thread *mpd_thread;
522 struct thread_call *mpd_call;
523 };
524 struct mpsc_queue_head mpd_queue;
525 struct mpsc_queue_chain mpd_chain;
526};
527
528/*!
529 * @function mpsc_daemon_queue_init_with_thread
530 *
531 * @brief
532 * Sets up a daemon queue to be a base queue drained by a kernel thread.
533 *
534 * @discussion
535 * The function will allocate the thread and start it in assert_wait.
536 *
537 * @param dq
538 * The queue to initialize
539 *
540 * @param invoke
541 * The invoke function called on individual items on the queue during drain.
542 *
543 * @param pri
544 * The scheduler priority for the created thread.
545 *
546 * @param name
547 * The name to give to the created thread.
548 *
549 * @param flags
550 * See mpsc_daemon_init_options_t.
551 *
552 * @returns
553 * Whether creating the thread was successful.
554 */
555kern_return_t
556mpsc_daemon_queue_init_with_thread(mpsc_daemon_queue_t dq,
557 mpsc_daemon_invoke_fn_t invoke, int pri, const char *name,
558 mpsc_daemon_init_options_t flags);
559
560
561/*!
562 * @function mpsc_daemon_queue_init_with_thread_call
563 *
564 * @brief
565 * Sets up a daemon queue to be a base queue drained by a thread call.
566 *
567 * @param dq
568 * The queue to initialize
569 *
570 * @param invoke
571 * The invoke function called on individual items on the queue during drain.
572 *
573 * @param pri
574 * The priority the thread call will run at.
575 *
576 * @param flags
577 * See mpsc_daemon_init_options_t.
578 */
579void
580mpsc_daemon_queue_init_with_thread_call(mpsc_daemon_queue_t dq,
581 mpsc_daemon_invoke_fn_t invoke, thread_call_priority_t pri,
582 mpsc_daemon_init_options_t flags);
583
584/*!
585 * @function mpsc_daemon_queue_init_with_target
586 *
587 * @brief
588 * Sets up a daemon queue to target another daemon queue.
589 *
590 * @discussion
591 * The targetting relationship is useful for subsystem layering purposes only.
592 * Because draining a given queue is atomic with respect to its target, target
593 * queue hierarchies are prone to starvation.
594 *
595 * @param dq
596 * The queue to initialize
597 *
598 * @param invoke
599 * The invoke function called on individual items on the queue during drain.
600 *
601 * @param target
602 * The target queue of the initialized queue, which has to be initialized with
603 * the mpsc_daemon_queue_nested_invoke invoke handler.
604 *
605 * @param flags
606 * See mpsc_daemon_init_options_t.
607 */
608void
609mpsc_daemon_queue_init_with_target(mpsc_daemon_queue_t dq,
610 mpsc_daemon_invoke_fn_t invoke, mpsc_daemon_queue_t target,
611 mpsc_daemon_init_options_t flags);
612
613/*!
614 * @function mpsc_daemon_queue_nested_invoke
615 *
616 * @brief
617 * The invoke function to pass to mpsc_daemon_queue_init_* when a queue is meant
618 * to be targeted by other queues.
619 */
620void
621mpsc_daemon_queue_nested_invoke(mpsc_queue_chain_t elm,
622 mpsc_daemon_queue_t dq);
623
624/*!
625 * @function mpsc_daemon_queue_activate
626 *
627 * @brief
628 * Activate a queue that was created with the @c MPSC_DAEMON_INIT_INACTIVE flag.
629 *
630 * @param dq
631 * The queue to activate.
632 */
633void
634mpsc_daemon_queue_activate(mpsc_daemon_queue_t dq);
635
636/*!
637 * @function mpsc_daemon_queue_cancel_and_wait
638 *
639 * @brief
640 * Cancels the queue so that the object owning it can be destroyed.
641 *
642 * @discussion
643 * This interface will cancel the queue and wait synchronously for the
644 * cancelation to have taken effect, possibly waiting on elements currently
645 * draining.
646 *
647 * Sending objects to the daemon queue after cancelation is undefined.
648 *
649 * Calling this function multiple times is undefined.
650 *
651 * Tearing down daemon queue hierarchies is the responsibility of the adopter.
652 */
653void
654mpsc_daemon_queue_cancel_and_wait(mpsc_daemon_queue_t dq);
655
656/*!
657 * @function mpsc_daemon_enqueue
658 *
659 * @brief
660 * Send ("async") an item to a given daemon on a given queue.
661 *
662 * @discussion
663 * It is the responsibility of the caller to ensure preemption is disabled when
664 * this call is made.
665 *
666 * @param dq
667 * The daemon queue to enqueue the element onto.
668 *
669 * @param elm
670 * The item to enqueue.
671 *
672 * @param options
673 * Options applicable to the enqueue. In particupar passing
674 * MPSC_QUEUE_DISABLE_PREEMPTION makes sure preemption is properly disabled
675 * during the enqueue.
676 */
677void
678mpsc_daemon_enqueue(mpsc_daemon_queue_t dq, mpsc_queue_chain_t elm,
679 mpsc_queue_options_t options);
680
681
682#pragma mark Deferred deallocation daemon
683
684/*!
685 * @function thread_deallocate_daemon_init
686 *
687 * @brief
688 * Initializes the deferred deallocation daemon, called by thread_daemon_init().
689 *
690 * @discussion
691 * The deferred deallocation daemon is a kernel thread based daemon queue that
692 * is targeted by nested daemon queues.
693 *
694 * It is used to perform deferred deallocation for objects that can't safely be
695 * deallocated from the context where the deallocation should normally occur.
696 *
697 * Subsystems using it are for example: turnstiles, workqueues, threads.
698 *
699 * @warning
700 * New queues should be added to this daemon with great care,
701 * as abusing it can lead to unbounded amount of kernel work.
702 */
703void
704thread_deallocate_daemon_init(void);
705
706/*!
707 * @function thread_deallocate_daemon_register_queue
708 *
709 * @brief
710 * Dynamically register a queue for deferred deletion with the deferred
711 * deallocation daemon.
712 *
713 * @param dq
714 * The daemon queue to register with the deferred deallocation daemon.
715 *
716 * @param invoke
717 * The callback called on every element of this queue by the deallocation
718 * daemon.
719 */
720void
721thread_deallocate_daemon_register_queue(mpsc_daemon_queue_t dq,
722 mpsc_daemon_invoke_fn_t invoke);
723
724
725#pragma mark tests
726#if DEBUG || DEVELOPMENT
727
728int
729mpsc_test_pingpong(uint64_t count, uint64_t *out);
730
731#endif /* DEBUG || DEVELOPMENT */
732
733#endif /* XNU_KERNEL_PRIVATE */
734
735__ASSUME_PTR_ABI_SINGLE_END __END_DECLS
736
737#endif /* _KERN_MPSC_QUEUE_H_ */
738