1/*
2 * Copyright (c) 1993-1995, 1999-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <mach/mach_types.h>
30#include <mach/thread_act.h>
31
32#include <kern/kern_types.h>
33#include <kern/zalloc.h>
34#include <kern/sched_prim.h>
35#include <kern/clock.h>
36#include <kern/task.h>
37#include <kern/thread.h>
38#include <kern/waitq.h>
39#include <kern/ledger.h>
40#include <kern/policy_internal.h>
41
42#include <vm/vm_pageout.h>
43
44#include <kern/thread_call.h>
45#include <kern/timer_call.h>
46
47#include <libkern/OSAtomic.h>
48#include <kern/timer_queue.h>
49
50#include <sys/kdebug.h>
51#if CONFIG_DTRACE
52#include <mach/sdt.h>
53#endif
54#include <machine/machine_routines.h>
55
56static KALLOC_TYPE_DEFINE(thread_call_zone, thread_call_data_t,
57 KT_PRIV_ACCT | KT_NOSHARED);
58
59typedef enum {
60 TCF_ABSOLUTE = 0,
61 TCF_CONTINUOUS = 1,
62 TCF_COUNT = 2,
63} thread_call_flavor_t;
64
65__options_decl(thread_call_group_flags_t, uint32_t, {
66 TCG_NONE = 0x0,
67 TCG_PARALLEL = 0x1,
68 TCG_DEALLOC_ACTIVE = 0x2,
69});
70
71static struct thread_call_group {
72 __attribute__((aligned(128))) lck_ticket_t tcg_lock;
73
74 const char * tcg_name;
75
76 queue_head_t pending_queue;
77 uint32_t pending_count;
78
79 queue_head_t delayed_queues[TCF_COUNT];
80 struct priority_queue_deadline_min delayed_pqueues[TCF_COUNT];
81 timer_call_data_t delayed_timers[TCF_COUNT];
82
83 timer_call_data_t dealloc_timer;
84
85 struct waitq idle_waitq;
86 uint64_t idle_timestamp;
87 uint32_t idle_count, active_count, blocked_count;
88
89 uint32_t tcg_thread_pri;
90 uint32_t target_thread_count;
91
92 thread_call_group_flags_t tcg_flags;
93
94 struct waitq waiters_waitq;
95} thread_call_groups[THREAD_CALL_INDEX_MAX] = {
96 [THREAD_CALL_INDEX_INVALID] = {
97 .tcg_name = "invalid",
98 },
99 [THREAD_CALL_INDEX_HIGH] = {
100 .tcg_name = "high",
101 .tcg_thread_pri = BASEPRI_PREEMPT_HIGH,
102 .target_thread_count = 4,
103 .tcg_flags = TCG_NONE,
104 },
105 [THREAD_CALL_INDEX_KERNEL] = {
106 .tcg_name = "kernel",
107 .tcg_thread_pri = BASEPRI_KERNEL,
108 .target_thread_count = 1,
109 .tcg_flags = TCG_PARALLEL,
110 },
111 [THREAD_CALL_INDEX_USER] = {
112 .tcg_name = "user",
113 .tcg_thread_pri = BASEPRI_DEFAULT,
114 .target_thread_count = 1,
115 .tcg_flags = TCG_PARALLEL,
116 },
117 [THREAD_CALL_INDEX_LOW] = {
118 .tcg_name = "low",
119 .tcg_thread_pri = MAXPRI_THROTTLE,
120 .target_thread_count = 1,
121 .tcg_flags = TCG_PARALLEL,
122 },
123 [THREAD_CALL_INDEX_KERNEL_HIGH] = {
124 .tcg_name = "kernel-high",
125 .tcg_thread_pri = BASEPRI_PREEMPT,
126 .target_thread_count = 2,
127 .tcg_flags = TCG_NONE,
128 },
129 [THREAD_CALL_INDEX_QOS_UI] = {
130 .tcg_name = "qos-ui",
131 .tcg_thread_pri = BASEPRI_FOREGROUND,
132 .target_thread_count = 1,
133 .tcg_flags = TCG_NONE,
134 },
135 [THREAD_CALL_INDEX_QOS_IN] = {
136 .tcg_name = "qos-in",
137 .tcg_thread_pri = BASEPRI_USER_INITIATED,
138 .target_thread_count = 1,
139 .tcg_flags = TCG_NONE,
140 },
141 [THREAD_CALL_INDEX_QOS_UT] = {
142 .tcg_name = "qos-ut",
143 .tcg_thread_pri = BASEPRI_UTILITY,
144 .target_thread_count = 1,
145 .tcg_flags = TCG_NONE,
146 },
147};
148
149typedef struct thread_call_group *thread_call_group_t;
150
151#define INTERNAL_CALL_COUNT 768
152#define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * NSEC_PER_MSEC) /* 5 ms */
153#define THREAD_CALL_ADD_RATIO 4
154#define THREAD_CALL_MACH_FACTOR_CAP 3
155#define THREAD_CALL_GROUP_MAX_THREADS 500
156
157struct thread_call_thread_state {
158 struct thread_call_group * thc_group;
159 struct thread_call * thc_call; /* debug only, may be deallocated */
160 uint64_t thc_call_start;
161 uint64_t thc_call_soft_deadline;
162 uint64_t thc_call_hard_deadline;
163 uint64_t thc_call_pending_timestamp;
164 uint64_t thc_IOTES_invocation_timestamp;
165 thread_call_func_t thc_func;
166 thread_call_param_t thc_param0;
167 thread_call_param_t thc_param1;
168};
169
170static bool thread_call_daemon_awake = true;
171/*
172 * This special waitq exists because the daemon thread
173 * might need to be woken while already holding a global waitq locked.
174 */
175static struct waitq daemon_waitq;
176
177static thread_call_data_t internal_call_storage[INTERNAL_CALL_COUNT];
178static queue_head_t thread_call_internal_queue;
179int thread_call_internal_queue_count = 0;
180static uint64_t thread_call_dealloc_interval_abs;
181
182static void _internal_call_init(void);
183
184static thread_call_t _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0);
185static bool _is_internal_call(thread_call_t call);
186static void _internal_call_release(thread_call_t call);
187static bool _pending_call_enqueue(thread_call_t call, thread_call_group_t group, uint64_t now);
188static bool _delayed_call_enqueue(thread_call_t call, thread_call_group_t group,
189 uint64_t deadline, thread_call_flavor_t flavor);
190static bool _call_dequeue(thread_call_t call, thread_call_group_t group);
191static void thread_call_wake(thread_call_group_t group);
192static void thread_call_daemon(void *arg, wait_result_t w);
193static void thread_call_thread(thread_call_group_t group, wait_result_t wres);
194static void thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
195static void thread_call_group_setup(thread_call_group_t group);
196static void sched_call_thread(int type, thread_t thread);
197static void thread_call_start_deallocate_timer(thread_call_group_t group);
198static void thread_call_wait_locked(thread_call_t call, spl_t s);
199static bool thread_call_wait_once_locked(thread_call_t call, spl_t s);
200
201static boolean_t thread_call_enter_delayed_internal(thread_call_t call,
202 thread_call_func_t alt_func, thread_call_param_t alt_param0,
203 thread_call_param_t param1, uint64_t deadline,
204 uint64_t leeway, unsigned int flags);
205
206/* non-static so dtrace can find it rdar://problem/31156135&31379348 */
207extern void thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
208
209LCK_GRP_DECLARE(thread_call_lck_grp, "thread_call");
210
211
212static void
213thread_call_lock_spin(thread_call_group_t group)
214{
215 lck_ticket_lock(tlock: &group->tcg_lock, grp: &thread_call_lck_grp);
216}
217
218static void
219thread_call_unlock(thread_call_group_t group)
220{
221 lck_ticket_unlock(tlock: &group->tcg_lock);
222}
223
224static void __assert_only
225thread_call_assert_locked(thread_call_group_t group)
226{
227 lck_ticket_assert_owned(tlock: &group->tcg_lock);
228}
229
230
231static spl_t
232disable_ints_and_lock(thread_call_group_t group)
233{
234 spl_t s = splsched();
235 thread_call_lock_spin(group);
236
237 return s;
238}
239
240static void
241enable_ints_and_unlock(thread_call_group_t group, spl_t s)
242{
243 thread_call_unlock(group);
244 splx(s);
245}
246
247static thread_call_group_t
248thread_call_get_group(thread_call_t call)
249{
250 thread_call_index_t index = call->tc_index;
251 thread_call_flags_t flags = call->tc_flags;
252 thread_call_func_t func = call->tc_func;
253
254 if (index == THREAD_CALL_INDEX_INVALID || index >= THREAD_CALL_INDEX_MAX) {
255 panic("(%p %p) invalid thread call index: %d", call, func, index);
256 }
257
258 if (func == NULL || !(flags & THREAD_CALL_INITIALIZED)) {
259 panic("(%p %p) uninitialized thread call", call, func);
260 }
261
262 if (flags & THREAD_CALL_ALLOC) {
263 kalloc_type_require(thread_call_data_t, call);
264 }
265
266 return &thread_call_groups[index];
267}
268
269/* Lock held */
270static thread_call_flavor_t
271thread_call_get_flavor(thread_call_t call)
272{
273 return (call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
274}
275
276/* Lock held */
277static thread_call_flavor_t
278thread_call_set_flavor(thread_call_t call, thread_call_flavor_t flavor)
279{
280 assert(flavor == TCF_CONTINUOUS || flavor == TCF_ABSOLUTE);
281 thread_call_flavor_t old_flavor = thread_call_get_flavor(call);
282
283 if (old_flavor != flavor) {
284 if (flavor == TCF_CONTINUOUS) {
285 call->tc_flags |= THREAD_CALL_FLAG_CONTINUOUS;
286 } else {
287 call->tc_flags &= ~THREAD_CALL_FLAG_CONTINUOUS;
288 }
289 }
290
291 return old_flavor;
292}
293
294/* returns true if it was on a queue */
295static bool
296thread_call_enqueue_tail(
297 thread_call_t call,
298 queue_t new_queue)
299{
300 queue_t old_queue = call->tc_queue;
301
302 thread_call_group_t group = thread_call_get_group(call);
303 thread_call_flavor_t flavor = thread_call_get_flavor(call);
304
305 if (old_queue != NULL &&
306 old_queue != &group->delayed_queues[flavor]) {
307 panic("thread call (%p %p) on bad queue (old_queue: %p)",
308 call, call->tc_func, old_queue);
309 }
310
311 if (old_queue == &group->delayed_queues[flavor]) {
312 priority_queue_remove(que: &group->delayed_pqueues[flavor], elt: &call->tc_pqlink);
313 }
314
315 if (old_queue == NULL) {
316 enqueue_tail(que: new_queue, elt: &call->tc_qlink);
317 } else {
318 re_queue_tail(que: new_queue, elt: &call->tc_qlink);
319 }
320
321 call->tc_queue = new_queue;
322
323 return old_queue != NULL;
324}
325
326static queue_head_t *
327thread_call_dequeue(
328 thread_call_t call)
329{
330 queue_t old_queue = call->tc_queue;
331
332 thread_call_group_t group = thread_call_get_group(call);
333 thread_call_flavor_t flavor = thread_call_get_flavor(call);
334
335 if (old_queue != NULL &&
336 old_queue != &group->pending_queue &&
337 old_queue != &group->delayed_queues[flavor]) {
338 panic("thread call (%p %p) on bad queue (old_queue: %p)",
339 call, call->tc_func, old_queue);
340 }
341
342 if (old_queue == &group->delayed_queues[flavor]) {
343 priority_queue_remove(que: &group->delayed_pqueues[flavor], elt: &call->tc_pqlink);
344 }
345
346 if (old_queue != NULL) {
347 remqueue(elt: &call->tc_qlink);
348
349 call->tc_queue = NULL;
350 }
351 return old_queue;
352}
353
354static queue_head_t *
355thread_call_enqueue_deadline(
356 thread_call_t call,
357 thread_call_group_t group,
358 thread_call_flavor_t flavor,
359 uint64_t deadline)
360{
361 queue_t old_queue = call->tc_queue;
362 queue_t new_queue = &group->delayed_queues[flavor];
363
364 thread_call_flavor_t old_flavor = thread_call_set_flavor(call, flavor);
365
366 if (old_queue != NULL &&
367 old_queue != &group->pending_queue &&
368 old_queue != &group->delayed_queues[old_flavor]) {
369 panic("thread call (%p %p) on bad queue (old_queue: %p)",
370 call, call->tc_func, old_queue);
371 }
372
373 if (old_queue == new_queue) {
374 /* optimize the same-queue case to avoid a full re-insert */
375 uint64_t old_deadline = call->tc_pqlink.deadline;
376 call->tc_pqlink.deadline = deadline;
377
378 if (old_deadline < deadline) {
379 priority_queue_entry_increased(que: &group->delayed_pqueues[flavor],
380 elt: &call->tc_pqlink);
381 } else {
382 priority_queue_entry_decreased(que: &group->delayed_pqueues[flavor],
383 elt: &call->tc_pqlink);
384 }
385 } else {
386 if (old_queue == &group->delayed_queues[old_flavor]) {
387 priority_queue_remove(que: &group->delayed_pqueues[old_flavor],
388 elt: &call->tc_pqlink);
389 }
390
391 call->tc_pqlink.deadline = deadline;
392
393 priority_queue_insert(que: &group->delayed_pqueues[flavor], elt: &call->tc_pqlink);
394 }
395
396 if (old_queue == NULL) {
397 enqueue_tail(que: new_queue, elt: &call->tc_qlink);
398 } else if (old_queue != new_queue) {
399 re_queue_tail(que: new_queue, elt: &call->tc_qlink);
400 }
401
402 call->tc_queue = new_queue;
403
404 return old_queue;
405}
406
407uint64_t
408thread_call_get_armed_deadline(thread_call_t call)
409{
410 return call->tc_pqlink.deadline;
411}
412
413
414static bool
415group_isparallel(thread_call_group_t group)
416{
417 return (group->tcg_flags & TCG_PARALLEL) != 0;
418}
419
420static bool
421thread_call_group_should_add_thread(thread_call_group_t group)
422{
423 if ((group->active_count + group->blocked_count + group->idle_count) >= THREAD_CALL_GROUP_MAX_THREADS) {
424 panic("thread_call group '%s' reached max thread cap (%d): active: %d, blocked: %d, idle: %d",
425 group->tcg_name, THREAD_CALL_GROUP_MAX_THREADS,
426 group->active_count, group->blocked_count, group->idle_count);
427 }
428
429 if (group_isparallel(group) == false) {
430 if (group->pending_count > 0 && group->active_count == 0) {
431 return true;
432 }
433
434 return false;
435 }
436
437 if (group->pending_count > 0) {
438 if (group->idle_count > 0) {
439 return false;
440 }
441
442 uint32_t thread_count = group->active_count;
443
444 /*
445 * Add a thread if either there are no threads,
446 * the group has fewer than its target number of
447 * threads, or the amount of work is large relative
448 * to the number of threads. In the last case, pay attention
449 * to the total load on the system, and back off if
450 * it's high.
451 */
452 if ((thread_count == 0) ||
453 (thread_count < group->target_thread_count) ||
454 ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
455 (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
456 return true;
457 }
458 }
459
460 return false;
461}
462
463static void
464thread_call_group_setup(thread_call_group_t group)
465{
466 lck_ticket_init(tlock: &group->tcg_lock, grp: &thread_call_lck_grp);
467
468 queue_init(&group->pending_queue);
469
470 for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
471 queue_init(&group->delayed_queues[flavor]);
472 priority_queue_init(que: &group->delayed_pqueues[flavor]);
473 timer_call_setup(call: &group->delayed_timers[flavor], func: thread_call_delayed_timer, param0: group);
474 }
475
476 timer_call_setup(call: &group->dealloc_timer, func: thread_call_dealloc_timer, param0: group);
477
478 waitq_init(waitq: &group->waiters_waitq, type: WQT_QUEUE, SYNC_POLICY_FIFO);
479
480 /* Reverse the wait order so we re-use the most recently parked thread from the pool */
481 waitq_init(waitq: &group->idle_waitq, type: WQT_QUEUE, SYNC_POLICY_REVERSED);
482}
483
484/*
485 * Simple wrapper for creating threads bound to
486 * thread call groups.
487 */
488static void
489thread_call_thread_create(
490 thread_call_group_t group)
491{
492 thread_t thread;
493 kern_return_t result;
494
495 int thread_pri = group->tcg_thread_pri;
496
497 result = kernel_thread_start_priority(continuation: (thread_continue_t)thread_call_thread,
498 parameter: group, priority: thread_pri, new_thread: &thread);
499 if (result != KERN_SUCCESS) {
500 panic("cannot create new thread call thread %d", result);
501 }
502
503 if (thread_pri <= BASEPRI_KERNEL) {
504 /*
505 * THREAD_CALL_PRIORITY_KERNEL and lower don't get to run to completion
506 * in kernel if there are higher priority threads available.
507 */
508 thread_set_eager_preempt(thread);
509 }
510
511 char name[MAXTHREADNAMESIZE] = "";
512
513 int group_thread_count = group->idle_count + group->active_count + group->blocked_count;
514
515 snprintf(name, sizeof(name), "thread call %s #%d", group->tcg_name, group_thread_count);
516 thread_set_thread_name(th: thread, name);
517
518 thread_deallocate(thread);
519}
520
521/*
522 * thread_call_initialize:
523 *
524 * Initialize this module, called
525 * early during system initialization.
526 */
527__startup_func
528static void
529thread_call_initialize(void)
530{
531 nanotime_to_absolutetime(secs: 0, THREAD_CALL_DEALLOC_INTERVAL_NS, result: &thread_call_dealloc_interval_abs);
532 waitq_init(waitq: &daemon_waitq, type: WQT_QUEUE, SYNC_POLICY_FIFO);
533
534 for (uint32_t i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
535 thread_call_group_setup(group: &thread_call_groups[i]);
536 }
537
538 _internal_call_init();
539
540 thread_t thread;
541 kern_return_t result;
542
543 result = kernel_thread_start_priority(continuation: (thread_continue_t)thread_call_daemon,
544 NULL, BASEPRI_PREEMPT_HIGH + 1, new_thread: &thread);
545 if (result != KERN_SUCCESS) {
546 panic("thread_call_initialize failed (%d)", result);
547 }
548
549 thread_deallocate(thread);
550}
551STARTUP(THREAD_CALL, STARTUP_RANK_FIRST, thread_call_initialize);
552
553void
554thread_call_setup_with_options(
555 thread_call_t call,
556 thread_call_func_t func,
557 thread_call_param_t param0,
558 thread_call_priority_t pri,
559 thread_call_options_t options)
560{
561 if (func == NULL) {
562 panic("initializing thread call with NULL func");
563 }
564
565 bzero(s: call, n: sizeof(*call));
566
567 *call = (struct thread_call) {
568 .tc_func = func,
569 .tc_param0 = param0,
570 .tc_flags = THREAD_CALL_INITIALIZED,
571 };
572
573 switch (pri) {
574 case THREAD_CALL_PRIORITY_HIGH:
575 call->tc_index = THREAD_CALL_INDEX_HIGH;
576 break;
577 case THREAD_CALL_PRIORITY_KERNEL:
578 call->tc_index = THREAD_CALL_INDEX_KERNEL;
579 break;
580 case THREAD_CALL_PRIORITY_USER:
581 call->tc_index = THREAD_CALL_INDEX_USER;
582 break;
583 case THREAD_CALL_PRIORITY_LOW:
584 call->tc_index = THREAD_CALL_INDEX_LOW;
585 break;
586 case THREAD_CALL_PRIORITY_KERNEL_HIGH:
587 call->tc_index = THREAD_CALL_INDEX_KERNEL_HIGH;
588 break;
589 default:
590 panic("Invalid thread call pri value: %d", pri);
591 break;
592 }
593
594 if (options & THREAD_CALL_OPTIONS_ONCE) {
595 call->tc_flags |= THREAD_CALL_ONCE;
596 }
597 if (options & THREAD_CALL_OPTIONS_SIGNAL) {
598 call->tc_flags |= THREAD_CALL_SIGNAL | THREAD_CALL_ONCE;
599 }
600}
601
602void
603thread_call_setup(
604 thread_call_t call,
605 thread_call_func_t func,
606 thread_call_param_t param0)
607{
608 thread_call_setup_with_options(call, func, param0,
609 pri: THREAD_CALL_PRIORITY_HIGH, options: 0);
610}
611
612static void
613_internal_call_init(void)
614{
615 /* Function-only thread calls are only kept in the default HIGH group */
616 thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
617
618 spl_t s = disable_ints_and_lock(group);
619
620 queue_init(&thread_call_internal_queue);
621
622 for (unsigned i = 0; i < INTERNAL_CALL_COUNT; i++) {
623 enqueue_tail(que: &thread_call_internal_queue, elt: &internal_call_storage[i].tc_qlink);
624 thread_call_internal_queue_count++;
625 }
626
627 enable_ints_and_unlock(group, s);
628}
629
630/*
631 * _internal_call_allocate:
632 *
633 * Allocate an internal callout entry.
634 *
635 * Called with thread_call_lock held.
636 */
637static thread_call_t
638_internal_call_allocate(thread_call_func_t func, thread_call_param_t param0)
639{
640 /* Function-only thread calls are only kept in the default HIGH group */
641 thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
642
643 spl_t s = disable_ints_and_lock(group);
644
645 thread_call_t call = qe_dequeue_head(&thread_call_internal_queue,
646 struct thread_call, tc_qlink);
647
648 if (call == NULL) {
649 panic("_internal_call_allocate: thread_call_internal_queue empty");
650 }
651
652 thread_call_internal_queue_count--;
653
654 thread_call_setup(call, func, param0);
655 /* THREAD_CALL_ALLOC not set, do not free back to zone */
656 assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
657 enable_ints_and_unlock(group, s);
658
659 return call;
660}
661
662/* Check if a call is internal and needs to be returned to the internal pool. */
663static bool
664_is_internal_call(thread_call_t call)
665{
666 if (call >= internal_call_storage &&
667 call < &internal_call_storage[INTERNAL_CALL_COUNT]) {
668 assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
669 return true;
670 }
671 return false;
672}
673
674/*
675 * _internal_call_release:
676 *
677 * Release an internal callout entry which
678 * is no longer pending (or delayed).
679 *
680 * Called with thread_call_lock held.
681 */
682static void
683_internal_call_release(thread_call_t call)
684{
685 assert(_is_internal_call(call));
686
687 thread_call_group_t group = thread_call_get_group(call);
688
689 assert(group == &thread_call_groups[THREAD_CALL_INDEX_HIGH]);
690 thread_call_assert_locked(group);
691
692 call->tc_flags &= ~THREAD_CALL_INITIALIZED;
693
694 enqueue_head(que: &thread_call_internal_queue, elt: &call->tc_qlink);
695 thread_call_internal_queue_count++;
696}
697
698/*
699 * _pending_call_enqueue:
700 *
701 * Place an entry at the end of the
702 * pending queue, to be executed soon.
703 *
704 * Returns TRUE if the entry was already
705 * on a queue.
706 *
707 * Called with thread_call_lock held.
708 */
709static bool
710_pending_call_enqueue(thread_call_t call,
711 thread_call_group_t group,
712 uint64_t now)
713{
714 if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
715 == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
716 call->tc_pqlink.deadline = 0;
717
718 thread_call_flags_t flags = call->tc_flags;
719 call->tc_flags |= THREAD_CALL_RESCHEDULE;
720
721 assert(call->tc_queue == NULL);
722
723 return flags & THREAD_CALL_RESCHEDULE;
724 }
725
726 call->tc_pending_timestamp = now;
727
728 bool was_on_queue = thread_call_enqueue_tail(call, new_queue: &group->pending_queue);
729
730 if (!was_on_queue) {
731 call->tc_submit_count++;
732 }
733
734 group->pending_count++;
735
736 thread_call_wake(group);
737
738 return was_on_queue;
739}
740
741/*
742 * _delayed_call_enqueue:
743 *
744 * Place an entry on the delayed queue,
745 * after existing entries with an earlier
746 * (or identical) deadline.
747 *
748 * Returns TRUE if the entry was already
749 * on a queue.
750 *
751 * Called with thread_call_lock held.
752 */
753static bool
754_delayed_call_enqueue(
755 thread_call_t call,
756 thread_call_group_t group,
757 uint64_t deadline,
758 thread_call_flavor_t flavor)
759{
760 if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
761 == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
762 call->tc_pqlink.deadline = deadline;
763
764 thread_call_flags_t flags = call->tc_flags;
765 call->tc_flags |= THREAD_CALL_RESCHEDULE;
766
767 assert(call->tc_queue == NULL);
768 thread_call_set_flavor(call, flavor);
769
770 return flags & THREAD_CALL_RESCHEDULE;
771 }
772
773 queue_head_t *old_queue = thread_call_enqueue_deadline(call, group, flavor, deadline);
774
775 if (old_queue == &group->pending_queue) {
776 group->pending_count--;
777 } else if (old_queue == NULL) {
778 call->tc_submit_count++;
779 }
780
781 return old_queue != NULL;
782}
783
784/*
785 * _call_dequeue:
786 *
787 * Remove an entry from a queue.
788 *
789 * Returns TRUE if the entry was on a queue.
790 *
791 * Called with thread_call_lock held.
792 */
793static bool
794_call_dequeue(
795 thread_call_t call,
796 thread_call_group_t group)
797{
798 queue_head_t *old_queue = thread_call_dequeue(call);
799
800 if (old_queue == NULL) {
801 return false;
802 }
803
804 call->tc_finish_count++;
805
806 if (old_queue == &group->pending_queue) {
807 group->pending_count--;
808 }
809
810 return true;
811}
812
813/*
814 * _arm_delayed_call_timer:
815 *
816 * Check if the timer needs to be armed for this flavor,
817 * and if so, arm it.
818 *
819 * If call is non-NULL, only re-arm the timer if the specified call
820 * is the first in the queue.
821 *
822 * Returns true if the timer was armed/re-armed, false if it was left unset
823 * Caller should cancel the timer if need be.
824 *
825 * Called with thread_call_lock held.
826 */
827static bool
828_arm_delayed_call_timer(thread_call_t new_call,
829 thread_call_group_t group,
830 thread_call_flavor_t flavor)
831{
832 /* No calls implies no timer needed */
833 if (queue_empty(&group->delayed_queues[flavor])) {
834 return false;
835 }
836
837 thread_call_t call = priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink);
838
839 /* We only need to change the hard timer if this new call is the first in the list */
840 if (new_call != NULL && new_call != call) {
841 return false;
842 }
843
844 assert((call->tc_soft_deadline != 0) && ((call->tc_soft_deadline <= call->tc_pqlink.deadline)));
845
846 uint64_t fire_at = call->tc_soft_deadline;
847
848 if (flavor == TCF_CONTINUOUS) {
849 assert(call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS);
850 fire_at = continuoustime_to_absolutetime(conttime: fire_at);
851 } else {
852 assert((call->tc_flags & THREAD_CALL_FLAG_CONTINUOUS) == 0);
853 }
854
855 /*
856 * Note: This picks the soonest-deadline call's leeway as the hard timer's leeway,
857 * which does not take into account later-deadline timers with a larger leeway.
858 * This is a valid coalescing behavior, but masks a possible window to
859 * fire a timer instead of going idle.
860 */
861 uint64_t leeway = call->tc_pqlink.deadline - call->tc_soft_deadline;
862
863 timer_call_enter_with_leeway(call: &group->delayed_timers[flavor], param1: (timer_call_param_t)flavor,
864 deadline: fire_at, leeway,
865 TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LEEWAY,
866 ratelimited: ((call->tc_flags & THREAD_CALL_RATELIMITED) == THREAD_CALL_RATELIMITED));
867
868 return true;
869}
870
871/*
872 * _cancel_func_from_queue:
873 *
874 * Remove the first (or all) matching
875 * entries from the specified queue.
876 *
877 * Returns TRUE if any matching entries
878 * were found.
879 *
880 * Called with thread_call_lock held.
881 */
882static boolean_t
883_cancel_func_from_queue(thread_call_func_t func,
884 thread_call_param_t param0,
885 thread_call_group_t group,
886 boolean_t remove_all,
887 queue_head_t *queue)
888{
889 boolean_t call_removed = FALSE;
890 thread_call_t call;
891
892 qe_foreach_element_safe(call, queue, tc_qlink) {
893 if (call->tc_func != func ||
894 call->tc_param0 != param0) {
895 continue;
896 }
897
898 _call_dequeue(call, group);
899
900 if (_is_internal_call(call)) {
901 _internal_call_release(call);
902 }
903
904 call_removed = TRUE;
905 if (!remove_all) {
906 break;
907 }
908 }
909
910 return call_removed;
911}
912
913/*
914 * thread_call_func_delayed:
915 *
916 * Enqueue a function callout to
917 * occur at the stated time.
918 */
919void
920thread_call_func_delayed(
921 thread_call_func_t func,
922 thread_call_param_t param,
923 uint64_t deadline)
924{
925 (void)thread_call_enter_delayed_internal(NULL, alt_func: func, alt_param0: param, param1: 0, deadline, leeway: 0, flags: 0);
926}
927
928/*
929 * thread_call_func_delayed_with_leeway:
930 *
931 * Same as thread_call_func_delayed(), but with
932 * leeway/flags threaded through.
933 */
934
935void
936thread_call_func_delayed_with_leeway(
937 thread_call_func_t func,
938 thread_call_param_t param,
939 uint64_t deadline,
940 uint64_t leeway,
941 uint32_t flags)
942{
943 (void)thread_call_enter_delayed_internal(NULL, alt_func: func, alt_param0: param, param1: 0, deadline, leeway, flags);
944}
945
946/*
947 * thread_call_func_cancel:
948 *
949 * Dequeue a function callout.
950 *
951 * Removes one (or all) { function, argument }
952 * instance(s) from either (or both)
953 * the pending and the delayed queue,
954 * in that order.
955 *
956 * Returns TRUE if any calls were cancelled.
957 *
958 * This iterates all of the pending or delayed thread calls in the group,
959 * which is really inefficient. Switch to an allocated thread call instead.
960 *
961 * TODO: Give 'func' thread calls their own group, so this silliness doesn't
962 * affect the main 'high' group.
963 */
964boolean_t
965thread_call_func_cancel(
966 thread_call_func_t func,
967 thread_call_param_t param,
968 boolean_t cancel_all)
969{
970 boolean_t result;
971
972 if (func == NULL) {
973 panic("trying to cancel NULL func");
974 }
975
976 /* Function-only thread calls are only kept in the default HIGH group */
977 thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
978
979 spl_t s = disable_ints_and_lock(group);
980
981 if (cancel_all) {
982 /* exhaustively search every queue, and return true if any search found something */
983 result = _cancel_func_from_queue(func, param0: param, group, remove_all: cancel_all, queue: &group->pending_queue) |
984 _cancel_func_from_queue(func, param0: param, group, remove_all: cancel_all, queue: &group->delayed_queues[TCF_ABSOLUTE]) |
985 _cancel_func_from_queue(func, param0: param, group, remove_all: cancel_all, queue: &group->delayed_queues[TCF_CONTINUOUS]);
986 } else {
987 /* early-exit as soon as we find something, don't search other queues */
988 result = _cancel_func_from_queue(func, param0: param, group, remove_all: cancel_all, queue: &group->pending_queue) ||
989 _cancel_func_from_queue(func, param0: param, group, remove_all: cancel_all, queue: &group->delayed_queues[TCF_ABSOLUTE]) ||
990 _cancel_func_from_queue(func, param0: param, group, remove_all: cancel_all, queue: &group->delayed_queues[TCF_CONTINUOUS]);
991 }
992
993 enable_ints_and_unlock(group, s);
994
995 return result;
996}
997
998/*
999 * Allocate a thread call with a given priority. Importances other than
1000 * THREAD_CALL_PRIORITY_HIGH or THREAD_CALL_PRIORITY_KERNEL_HIGH will be run in threads
1001 * with eager preemption enabled (i.e. may be aggressively preempted by higher-priority
1002 * threads which are not in the normal "urgent" bands).
1003 */
1004thread_call_t
1005thread_call_allocate_with_priority(
1006 thread_call_func_t func,
1007 thread_call_param_t param0,
1008 thread_call_priority_t pri)
1009{
1010 return thread_call_allocate_with_options(func, param0, pri, options: 0);
1011}
1012
1013thread_call_t
1014thread_call_allocate_with_options(
1015 thread_call_func_t func,
1016 thread_call_param_t param0,
1017 thread_call_priority_t pri,
1018 thread_call_options_t options)
1019{
1020 thread_call_t call = zalloc(kt_view: thread_call_zone);
1021
1022 thread_call_setup_with_options(call, func, param0, pri, options);
1023 call->tc_refs = 1;
1024 call->tc_flags |= THREAD_CALL_ALLOC;
1025
1026 return call;
1027}
1028
1029thread_call_t
1030thread_call_allocate_with_qos(thread_call_func_t func,
1031 thread_call_param_t param0,
1032 int qos_tier,
1033 thread_call_options_t options)
1034{
1035 thread_call_t call = thread_call_allocate(func, param0);
1036
1037 switch (qos_tier) {
1038 case THREAD_QOS_UNSPECIFIED:
1039 call->tc_index = THREAD_CALL_INDEX_HIGH;
1040 break;
1041 case THREAD_QOS_LEGACY:
1042 call->tc_index = THREAD_CALL_INDEX_USER;
1043 break;
1044 case THREAD_QOS_MAINTENANCE:
1045 case THREAD_QOS_BACKGROUND:
1046 call->tc_index = THREAD_CALL_INDEX_LOW;
1047 break;
1048 case THREAD_QOS_UTILITY:
1049 call->tc_index = THREAD_CALL_INDEX_QOS_UT;
1050 break;
1051 case THREAD_QOS_USER_INITIATED:
1052 call->tc_index = THREAD_CALL_INDEX_QOS_IN;
1053 break;
1054 case THREAD_QOS_USER_INTERACTIVE:
1055 call->tc_index = THREAD_CALL_INDEX_QOS_UI;
1056 break;
1057 default:
1058 panic("Invalid thread call qos value: %d", qos_tier);
1059 break;
1060 }
1061
1062 if (options & THREAD_CALL_OPTIONS_ONCE) {
1063 call->tc_flags |= THREAD_CALL_ONCE;
1064 }
1065
1066 /* does not support THREAD_CALL_OPTIONS_SIGNAL */
1067
1068 return call;
1069}
1070
1071
1072/*
1073 * thread_call_allocate:
1074 *
1075 * Allocate a callout entry.
1076 */
1077thread_call_t
1078thread_call_allocate(
1079 thread_call_func_t func,
1080 thread_call_param_t param0)
1081{
1082 return thread_call_allocate_with_options(func, param0,
1083 pri: THREAD_CALL_PRIORITY_HIGH, options: 0);
1084}
1085
1086/*
1087 * thread_call_free:
1088 *
1089 * Release a callout. If the callout is currently
1090 * executing, it will be freed when all invocations
1091 * finish.
1092 *
1093 * If the callout is currently armed to fire again, then
1094 * freeing is not allowed and returns FALSE. The
1095 * client must have canceled the pending invocation before freeing.
1096 */
1097boolean_t
1098thread_call_free(
1099 thread_call_t call)
1100{
1101 thread_call_group_t group = thread_call_get_group(call);
1102
1103 spl_t s = disable_ints_and_lock(group);
1104
1105 if (call->tc_queue != NULL ||
1106 ((call->tc_flags & THREAD_CALL_RESCHEDULE) != 0)) {
1107 thread_call_unlock(group);
1108 splx(s);
1109
1110 return FALSE;
1111 }
1112
1113 int32_t refs = --call->tc_refs;
1114 if (refs < 0) {
1115 panic("(%p %p) Refcount negative: %d", call, call->tc_func, refs);
1116 }
1117
1118 if ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING)
1119 == ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING) & call->tc_flags)) {
1120 thread_call_wait_once_locked(call, s);
1121 /* thread call lock has been unlocked */
1122 } else {
1123 enable_ints_and_unlock(group, s);
1124 }
1125
1126 if (refs == 0) {
1127 if (!(call->tc_flags & THREAD_CALL_INITIALIZED)) {
1128 panic("(%p %p) freeing an uninitialized call", call, call->tc_func);
1129 }
1130
1131 if ((call->tc_flags & THREAD_CALL_WAIT) != 0) {
1132 panic("(%p %p) Someone waiting on a thread call that is scheduled for free",
1133 call, call->tc_func);
1134 }
1135
1136 if (call->tc_flags & THREAD_CALL_RUNNING) {
1137 panic("(%p %p) freeing a running once call", call, call->tc_func);
1138 }
1139
1140 if (call->tc_finish_count != call->tc_submit_count) {
1141 panic("(%p %p) thread call submit/finish imbalance: %lld %lld",
1142 call, call->tc_func,
1143 call->tc_submit_count, call->tc_finish_count);
1144 }
1145
1146 call->tc_flags &= ~THREAD_CALL_INITIALIZED;
1147
1148 zfree(thread_call_zone, call);
1149 }
1150
1151 return TRUE;
1152}
1153
1154/*
1155 * thread_call_enter:
1156 *
1157 * Enqueue a callout entry to occur "soon".
1158 *
1159 * Returns TRUE if the call was
1160 * already on a queue.
1161 */
1162boolean_t
1163thread_call_enter(
1164 thread_call_t call)
1165{
1166 return thread_call_enter1(call, param1: 0);
1167}
1168
1169boolean_t
1170thread_call_enter1(
1171 thread_call_t call,
1172 thread_call_param_t param1)
1173{
1174 if (call->tc_func == NULL || !(call->tc_flags & THREAD_CALL_INITIALIZED)) {
1175 panic("(%p %p) uninitialized thread call", call, call->tc_func);
1176 }
1177
1178 assert((call->tc_flags & THREAD_CALL_SIGNAL) == 0);
1179
1180 thread_call_group_t group = thread_call_get_group(call);
1181 bool result = true;
1182
1183 spl_t s = disable_ints_and_lock(group);
1184
1185 if (call->tc_queue != &group->pending_queue) {
1186 result = _pending_call_enqueue(call, group, now: mach_absolute_time());
1187 }
1188
1189 call->tc_param1 = param1;
1190
1191 enable_ints_and_unlock(group, s);
1192
1193 return result;
1194}
1195
1196/*
1197 * thread_call_enter_delayed:
1198 *
1199 * Enqueue a callout entry to occur
1200 * at the stated time.
1201 *
1202 * Returns TRUE if the call was
1203 * already on a queue.
1204 */
1205boolean_t
1206thread_call_enter_delayed(
1207 thread_call_t call,
1208 uint64_t deadline)
1209{
1210 if (call == NULL) {
1211 panic("NULL call in %s", __FUNCTION__);
1212 }
1213 return thread_call_enter_delayed_internal(call, NULL, alt_param0: 0, param1: 0, deadline, leeway: 0, flags: 0);
1214}
1215
1216boolean_t
1217thread_call_enter1_delayed(
1218 thread_call_t call,
1219 thread_call_param_t param1,
1220 uint64_t deadline)
1221{
1222 if (call == NULL) {
1223 panic("NULL call in %s", __FUNCTION__);
1224 }
1225
1226 return thread_call_enter_delayed_internal(call, NULL, alt_param0: 0, param1, deadline, leeway: 0, flags: 0);
1227}
1228
1229boolean_t
1230thread_call_enter_delayed_with_leeway(
1231 thread_call_t call,
1232 thread_call_param_t param1,
1233 uint64_t deadline,
1234 uint64_t leeway,
1235 unsigned int flags)
1236{
1237 if (call == NULL) {
1238 panic("NULL call in %s", __FUNCTION__);
1239 }
1240
1241 return thread_call_enter_delayed_internal(call, NULL, alt_param0: 0, param1, deadline, leeway, flags);
1242}
1243
1244
1245/*
1246 * thread_call_enter_delayed_internal:
1247 * enqueue a callout entry to occur at the stated time
1248 *
1249 * Returns True if the call was already on a queue
1250 * params:
1251 * call - structure encapsulating state of the callout
1252 * alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
1253 * deadline - time deadline in nanoseconds
1254 * leeway - timer slack represented as delta of deadline.
1255 * flags - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
1256 * THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
1257 * THREAD_CALL_CONTINUOUS: thread call will be called according to mach_continuous_time rather
1258 * than mach_absolute_time
1259 */
1260boolean_t
1261thread_call_enter_delayed_internal(
1262 thread_call_t call,
1263 thread_call_func_t alt_func,
1264 thread_call_param_t alt_param0,
1265 thread_call_param_t param1,
1266 uint64_t deadline,
1267 uint64_t leeway,
1268 unsigned int flags)
1269{
1270 uint64_t now, sdeadline;
1271
1272 thread_call_flavor_t flavor = (flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
1273
1274 /* direct mapping between thread_call, timer_call, and timeout_urgency values */
1275 uint32_t urgency = (flags & TIMEOUT_URGENCY_MASK);
1276
1277 if (call == NULL) {
1278 /* allocate a structure out of internal storage, as a convenience for BSD callers */
1279 call = _internal_call_allocate(func: alt_func, param0: alt_param0);
1280 }
1281
1282 thread_call_group_t group = thread_call_get_group(call);
1283
1284 spl_t s = disable_ints_and_lock(group);
1285
1286 /*
1287 * kevent and IOTES let you change flavor for an existing timer, so we have to
1288 * support flipping flavors for enqueued thread calls.
1289 */
1290 if (flavor == TCF_CONTINUOUS) {
1291 now = mach_continuous_time();
1292 } else {
1293 now = mach_absolute_time();
1294 }
1295
1296 call->tc_flags |= THREAD_CALL_DELAYED;
1297
1298 call->tc_soft_deadline = sdeadline = deadline;
1299
1300 boolean_t ratelimited = FALSE;
1301 uint64_t slop = timer_call_slop(deadline, armtime: now, urgency, arming_thread: current_thread(), rlimited: &ratelimited);
1302
1303 if ((flags & THREAD_CALL_DELAY_LEEWAY) != 0 && leeway > slop) {
1304 slop = leeway;
1305 }
1306
1307 if (UINT64_MAX - deadline <= slop) {
1308 deadline = UINT64_MAX;
1309 } else {
1310 deadline += slop;
1311 }
1312
1313 if (ratelimited) {
1314 call->tc_flags |= THREAD_CALL_RATELIMITED;
1315 } else {
1316 call->tc_flags &= ~THREAD_CALL_RATELIMITED;
1317 }
1318
1319 call->tc_param1 = param1;
1320
1321 call->tc_ttd = (sdeadline > now) ? (sdeadline - now) : 0;
1322
1323 bool result = _delayed_call_enqueue(call, group, deadline, flavor);
1324
1325 _arm_delayed_call_timer(new_call: call, group, flavor);
1326
1327#if CONFIG_DTRACE
1328 DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_func,
1329 uint64_t, (deadline - sdeadline), uint64_t, (call->tc_ttd >> 32),
1330 (unsigned) (call->tc_ttd & 0xFFFFFFFF), call);
1331#endif
1332
1333 enable_ints_and_unlock(group, s);
1334
1335 return result;
1336}
1337
1338/*
1339 * Remove a callout entry from the queue
1340 * Called with thread_call_lock held
1341 */
1342static bool
1343thread_call_cancel_locked(thread_call_t call)
1344{
1345 bool canceled;
1346
1347 if (call->tc_flags & THREAD_CALL_RESCHEDULE) {
1348 call->tc_flags &= ~THREAD_CALL_RESCHEDULE;
1349 canceled = true;
1350
1351 /* if reschedule was set, it must not have been queued */
1352 assert(call->tc_queue == NULL);
1353 } else {
1354 bool queue_head_changed = false;
1355
1356 thread_call_flavor_t flavor = thread_call_get_flavor(call);
1357 thread_call_group_t group = thread_call_get_group(call);
1358
1359 if (call->tc_pqlink.deadline != 0 &&
1360 call == priority_queue_min(&group->delayed_pqueues[flavor], struct thread_call, tc_pqlink)) {
1361 assert(call->tc_queue == &group->delayed_queues[flavor]);
1362 queue_head_changed = true;
1363 }
1364
1365 canceled = _call_dequeue(call, group);
1366
1367 if (queue_head_changed) {
1368 if (_arm_delayed_call_timer(NULL, group, flavor) == false) {
1369 timer_call_cancel(call: &group->delayed_timers[flavor]);
1370 }
1371 }
1372 }
1373
1374#if CONFIG_DTRACE
1375 DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_func,
1376 0, (call->tc_ttd >> 32), (unsigned) (call->tc_ttd & 0xFFFFFFFF));
1377#endif
1378
1379 return canceled;
1380}
1381
1382/*
1383 * thread_call_cancel:
1384 *
1385 * Dequeue a callout entry.
1386 *
1387 * Returns TRUE if the call was
1388 * on a queue.
1389 */
1390boolean_t
1391thread_call_cancel(thread_call_t call)
1392{
1393 thread_call_group_t group = thread_call_get_group(call);
1394
1395 spl_t s = disable_ints_and_lock(group);
1396
1397 boolean_t result = thread_call_cancel_locked(call);
1398
1399 enable_ints_and_unlock(group, s);
1400
1401 return result;
1402}
1403
1404/*
1405 * Cancel a thread call. If it cannot be cancelled (i.e.
1406 * is already in flight), waits for the most recent invocation
1407 * to finish. Note that if clients re-submit this thread call,
1408 * it may still be pending or in flight when thread_call_cancel_wait
1409 * returns, but all requests to execute this work item prior
1410 * to the call to thread_call_cancel_wait will have finished.
1411 */
1412boolean_t
1413thread_call_cancel_wait(thread_call_t call)
1414{
1415 thread_call_group_t group = thread_call_get_group(call);
1416
1417 if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
1418 panic("(%p %p) thread_call_cancel_wait: can't wait on thread call whose storage I don't own",
1419 call, call->tc_func);
1420 }
1421
1422 if (!ml_get_interrupts_enabled()) {
1423 panic("(%p %p) unsafe thread_call_cancel_wait",
1424 call, call->tc_func);
1425 }
1426
1427 thread_t self = current_thread();
1428
1429 if ((thread_get_tag_internal(thread: self) & THREAD_TAG_CALLOUT) &&
1430 self->thc_state && self->thc_state->thc_call == call) {
1431 panic("thread_call_cancel_wait: deadlock waiting on self from inside call: %p to function %p",
1432 call, call->tc_func);
1433 }
1434
1435 spl_t s = disable_ints_and_lock(group);
1436
1437 boolean_t canceled = thread_call_cancel_locked(call);
1438
1439 if ((call->tc_flags & THREAD_CALL_ONCE) == THREAD_CALL_ONCE) {
1440 /*
1441 * A cancel-wait on a 'once' call will both cancel
1442 * the pending call and wait for the in-flight call
1443 */
1444
1445 thread_call_wait_once_locked(call, s);
1446 /* thread call lock unlocked */
1447 } else {
1448 /*
1449 * A cancel-wait on a normal call will only wait for the in-flight calls
1450 * if it did not cancel the pending call.
1451 *
1452 * TODO: This seems less than useful - shouldn't it do the wait as well?
1453 */
1454
1455 if (canceled == FALSE) {
1456 thread_call_wait_locked(call, s);
1457 /* thread call lock unlocked */
1458 } else {
1459 enable_ints_and_unlock(group, s);
1460 }
1461 }
1462
1463 return canceled;
1464}
1465
1466
1467/*
1468 * thread_call_wake:
1469 *
1470 * Wake a call thread to service
1471 * pending call entries. May wake
1472 * the daemon thread in order to
1473 * create additional call threads.
1474 *
1475 * Called with thread_call_lock held.
1476 *
1477 * For high-priority group, only does wakeup/creation if there are no threads
1478 * running.
1479 */
1480static void
1481thread_call_wake(
1482 thread_call_group_t group)
1483{
1484 /*
1485 * New behavior: use threads if you've got 'em.
1486 * Traditional behavior: wake only if no threads running.
1487 */
1488 if (group_isparallel(group) || group->active_count == 0) {
1489 if (group->idle_count) {
1490 __assert_only kern_return_t kr;
1491
1492 kr = waitq_wakeup64_one(waitq: &group->idle_waitq, CAST_EVENT64_T(group),
1493 THREAD_AWAKENED, flags: WAITQ_WAKEUP_DEFAULT);
1494 assert(kr == KERN_SUCCESS);
1495
1496 group->idle_count--;
1497 group->active_count++;
1498
1499 if (group->idle_count == 0 && (group->tcg_flags & TCG_DEALLOC_ACTIVE) == TCG_DEALLOC_ACTIVE) {
1500 if (timer_call_cancel(call: &group->dealloc_timer) == TRUE) {
1501 group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
1502 }
1503 }
1504 } else {
1505 if (thread_call_group_should_add_thread(group) &&
1506 os_atomic_cmpxchg(&thread_call_daemon_awake,
1507 false, true, relaxed)) {
1508 waitq_wakeup64_all(waitq: &daemon_waitq,
1509 CAST_EVENT64_T(&thread_call_daemon_awake),
1510 THREAD_AWAKENED, flags: WAITQ_WAKEUP_DEFAULT);
1511 }
1512 }
1513 }
1514}
1515
1516/*
1517 * sched_call_thread:
1518 *
1519 * Call out invoked by the scheduler.
1520 */
1521static void
1522sched_call_thread(
1523 int type,
1524 thread_t thread)
1525{
1526 thread_call_group_t group;
1527
1528 assert(thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT);
1529 assert(thread->thc_state != NULL);
1530
1531 group = thread->thc_state->thc_group;
1532 assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
1533 assert((group - &thread_call_groups[0]) > THREAD_CALL_INDEX_INVALID);
1534
1535 thread_call_lock_spin(group);
1536
1537 switch (type) {
1538 case SCHED_CALL_BLOCK:
1539 assert(group->active_count);
1540 --group->active_count;
1541 group->blocked_count++;
1542 if (group->pending_count > 0) {
1543 thread_call_wake(group);
1544 }
1545 break;
1546
1547 case SCHED_CALL_UNBLOCK:
1548 assert(group->blocked_count);
1549 --group->blocked_count;
1550 group->active_count++;
1551 break;
1552 }
1553
1554 thread_call_unlock(group);
1555}
1556
1557/*
1558 * Interrupts disabled, lock held; returns the same way.
1559 * Only called on thread calls whose storage we own. Wakes up
1560 * anyone who might be waiting on this work item and frees it
1561 * if the client has so requested.
1562 */
1563static bool
1564thread_call_finish(thread_call_t call, thread_call_group_t group, spl_t *s)
1565{
1566 thread_call_group_t call_group = thread_call_get_group(call);
1567 if (group != call_group) {
1568 panic("(%p %p) call finishing from wrong group: %p",
1569 call, call->tc_func, call_group);
1570 }
1571
1572 bool repend = false;
1573 bool signal = call->tc_flags & THREAD_CALL_SIGNAL;
1574 bool alloc = call->tc_flags & THREAD_CALL_ALLOC;
1575
1576 call->tc_finish_count++;
1577
1578 if (!signal && alloc) {
1579 /* The thread call thread owns a ref until the call is finished */
1580 if (call->tc_refs <= 0) {
1581 panic("(%p %p) thread_call_finish: detected over-released thread call",
1582 call, call->tc_func);
1583 }
1584 call->tc_refs--;
1585 }
1586
1587 thread_call_flags_t old_flags = call->tc_flags;
1588 call->tc_flags &= ~(THREAD_CALL_RESCHEDULE | THREAD_CALL_RUNNING | THREAD_CALL_WAIT);
1589
1590 if ((!alloc || call->tc_refs != 0) &&
1591 (old_flags & THREAD_CALL_RESCHEDULE) != 0) {
1592 assert(old_flags & THREAD_CALL_ONCE);
1593 thread_call_flavor_t flavor = thread_call_get_flavor(call);
1594
1595 if (old_flags & THREAD_CALL_DELAYED) {
1596 uint64_t now = mach_absolute_time();
1597 if (flavor == TCF_CONTINUOUS) {
1598 now = absolutetime_to_continuoustime(abstime: now);
1599 }
1600 if (call->tc_soft_deadline <= now) {
1601 /* The deadline has already expired, go straight to pending */
1602 call->tc_flags &= ~(THREAD_CALL_DELAYED | THREAD_CALL_RATELIMITED);
1603 call->tc_pqlink.deadline = 0;
1604 }
1605 }
1606
1607 if (call->tc_pqlink.deadline) {
1608 _delayed_call_enqueue(call, group, deadline: call->tc_pqlink.deadline, flavor);
1609 if (!signal) {
1610 _arm_delayed_call_timer(new_call: call, group, flavor);
1611 }
1612 } else if (signal) {
1613 call->tc_submit_count++;
1614 repend = true;
1615 } else {
1616 _pending_call_enqueue(call, group, now: mach_absolute_time());
1617 }
1618 }
1619
1620 if (!signal && alloc && call->tc_refs == 0) {
1621 if ((old_flags & THREAD_CALL_WAIT) != 0) {
1622 panic("(%p %p) Someone waiting on a thread call that is scheduled for free",
1623 call, call->tc_func);
1624 }
1625
1626 if (call->tc_finish_count != call->tc_submit_count) {
1627 panic("(%p %p) thread call submit/finish imbalance: %lld %lld",
1628 call, call->tc_func,
1629 call->tc_submit_count, call->tc_finish_count);
1630 }
1631
1632 if (call->tc_func == NULL || !(call->tc_flags & THREAD_CALL_INITIALIZED)) {
1633 panic("(%p %p) uninitialized thread call", call, call->tc_func);
1634 }
1635
1636 call->tc_flags &= ~THREAD_CALL_INITIALIZED;
1637
1638 enable_ints_and_unlock(group, s: *s);
1639
1640 zfree(thread_call_zone, call);
1641
1642 *s = disable_ints_and_lock(group);
1643 }
1644
1645 if ((old_flags & THREAD_CALL_WAIT) != 0) {
1646 /*
1647 * This may wake up a thread with a registered sched_call.
1648 * That call might need the group lock, so we drop the lock
1649 * to avoid deadlocking.
1650 *
1651 * We also must use a separate waitq from the idle waitq, as
1652 * this path goes waitq lock->thread lock->group lock, but
1653 * the idle wait goes group lock->waitq_lock->thread_lock.
1654 */
1655 thread_call_unlock(group);
1656
1657 waitq_wakeup64_all(waitq: &group->waiters_waitq, CAST_EVENT64_T(call),
1658 THREAD_AWAKENED, flags: WAITQ_WAKEUP_DEFAULT);
1659
1660 thread_call_lock_spin(group);
1661 /* THREAD_CALL_SIGNAL call may have been freed */
1662 }
1663
1664 return repend;
1665}
1666
1667/*
1668 * thread_call_invoke
1669 *
1670 * Invoke the function provided for this thread call
1671 *
1672 * Note that the thread call object can be deallocated by the function if we do not control its storage.
1673 */
1674static void __attribute__((noinline))
1675thread_call_invoke(thread_call_func_t func,
1676 thread_call_param_t param0,
1677 thread_call_param_t param1,
1678 __unused thread_call_t call)
1679{
1680#if DEVELOPMENT || DEBUG
1681 KERNEL_DEBUG_CONSTANT(
1682 MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_START,
1683 VM_KERNEL_UNSLIDE(func), VM_KERNEL_ADDRHIDE(param0), VM_KERNEL_ADDRHIDE(param1), 0, 0);
1684#endif /* DEVELOPMENT || DEBUG */
1685
1686#if CONFIG_DTRACE
1687 uint64_t tc_ttd = call->tc_ttd;
1688 boolean_t is_delayed = call->tc_flags & THREAD_CALL_DELAYED;
1689 DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1690 (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1691#endif
1692
1693 (*func)(param0, param1);
1694
1695#if CONFIG_DTRACE
1696 DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1697 (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1698#endif
1699
1700#if DEVELOPMENT || DEBUG
1701 KERNEL_DEBUG_CONSTANT(
1702 MACHDBG_CODE(DBG_MACH_SCHED, MACH_CALLOUT) | DBG_FUNC_END,
1703 VM_KERNEL_UNSLIDE(func), 0, 0, 0, 0);
1704#endif /* DEVELOPMENT || DEBUG */
1705}
1706
1707/*
1708 * thread_call_thread:
1709 */
1710static void
1711thread_call_thread(
1712 thread_call_group_t group,
1713 wait_result_t wres)
1714{
1715 thread_t self = current_thread();
1716
1717 if ((thread_get_tag_internal(thread: self) & THREAD_TAG_CALLOUT) == 0) {
1718 (void)thread_set_tag_internal(thread: self, tag: THREAD_TAG_CALLOUT);
1719 }
1720
1721 /*
1722 * A wakeup with THREAD_INTERRUPTED indicates that
1723 * we should terminate.
1724 */
1725 if (wres == THREAD_INTERRUPTED) {
1726 thread_terminate(target_act: self);
1727
1728 /* NOTREACHED */
1729 panic("thread_terminate() returned?");
1730 }
1731
1732 spl_t s = disable_ints_and_lock(group);
1733
1734 struct thread_call_thread_state thc_state = { .thc_group = group };
1735 self->thc_state = &thc_state;
1736
1737 thread_sched_call(thread: self, call: sched_call_thread);
1738
1739 while (group->pending_count > 0) {
1740 thread_call_t call = qe_dequeue_head(&group->pending_queue,
1741 struct thread_call, tc_qlink);
1742 assert(call != NULL);
1743
1744 /*
1745 * This thread_call_get_group is also here to validate
1746 * sanity of the thing popped off the queue
1747 */
1748 thread_call_group_t call_group = thread_call_get_group(call);
1749 if (group != call_group) {
1750 panic("(%p %p) call on pending_queue from wrong group %p",
1751 call, call->tc_func, call_group);
1752 }
1753
1754 group->pending_count--;
1755 if (group->pending_count == 0) {
1756 assert(queue_empty(&group->pending_queue));
1757 }
1758
1759 thread_call_func_t func = call->tc_func;
1760 thread_call_param_t param0 = call->tc_param0;
1761 thread_call_param_t param1 = call->tc_param1;
1762
1763 if (func == NULL) {
1764 panic("pending call with NULL func: %p", call);
1765 }
1766
1767 call->tc_queue = NULL;
1768
1769 if (_is_internal_call(call)) {
1770 _internal_call_release(call);
1771 }
1772
1773 /*
1774 * Can only do wakeups for thread calls whose storage
1775 * we control.
1776 */
1777 bool needs_finish = false;
1778 if (call->tc_flags & THREAD_CALL_ALLOC) {
1779 call->tc_refs++; /* Delay free until we're done */
1780 }
1781 if (call->tc_flags & (THREAD_CALL_ALLOC | THREAD_CALL_ONCE)) {
1782 /*
1783 * If THREAD_CALL_ONCE is used, and the timer wasn't
1784 * THREAD_CALL_ALLOC, then clients swear they will use
1785 * thread_call_cancel_wait() before destroying
1786 * the thread call.
1787 *
1788 * Else, the storage for the thread call might have
1789 * disappeared when thread_call_invoke() ran.
1790 */
1791 needs_finish = true;
1792 call->tc_flags |= THREAD_CALL_RUNNING;
1793 }
1794
1795 thc_state.thc_call = call;
1796 thc_state.thc_call_pending_timestamp = call->tc_pending_timestamp;
1797 thc_state.thc_call_soft_deadline = call->tc_soft_deadline;
1798 thc_state.thc_call_hard_deadline = call->tc_pqlink.deadline;
1799 thc_state.thc_func = func;
1800 thc_state.thc_param0 = param0;
1801 thc_state.thc_param1 = param1;
1802 thc_state.thc_IOTES_invocation_timestamp = 0;
1803
1804 enable_ints_and_unlock(group, s);
1805
1806 thc_state.thc_call_start = mach_absolute_time();
1807
1808 thread_call_invoke(func, param0, param1, call);
1809
1810 thc_state.thc_call = NULL;
1811
1812 if (get_preemption_level() != 0) {
1813 int pl = get_preemption_level();
1814 panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1815 pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
1816 }
1817
1818 s = disable_ints_and_lock(group);
1819
1820 if (needs_finish) {
1821 /* Release refcount, may free, may temporarily drop lock */
1822 thread_call_finish(call, group, s: &s);
1823 }
1824 }
1825
1826 thread_sched_call(thread: self, NULL);
1827 group->active_count--;
1828
1829 if (self->callout_woken_from_icontext && !self->callout_woke_thread) {
1830 ledger_credit(ledger: self->t_ledger, entry: task_ledgers.interrupt_wakeups, amount: 1);
1831 if (self->callout_woken_from_platform_idle) {
1832 ledger_credit(ledger: self->t_ledger, entry: task_ledgers.platform_idle_wakeups, amount: 1);
1833 }
1834 }
1835
1836 self->callout_woken_from_icontext = FALSE;
1837 self->callout_woken_from_platform_idle = FALSE;
1838 self->callout_woke_thread = FALSE;
1839
1840 self->thc_state = NULL;
1841
1842 if (group_isparallel(group)) {
1843 /*
1844 * For new style of thread group, thread always blocks.
1845 * If we have more than the target number of threads,
1846 * and this is the first to block, and it isn't active
1847 * already, set a timer for deallocating a thread if we
1848 * continue to have a surplus.
1849 */
1850 group->idle_count++;
1851
1852 if (group->idle_count == 1) {
1853 group->idle_timestamp = mach_absolute_time();
1854 }
1855
1856 if (((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0) &&
1857 ((group->active_count + group->idle_count) > group->target_thread_count)) {
1858 thread_call_start_deallocate_timer(group);
1859 }
1860
1861 /* Wait for more work (or termination) */
1862 wres = waitq_assert_wait64(waitq: &group->idle_waitq, CAST_EVENT64_T(group), THREAD_INTERRUPTIBLE, deadline: 0);
1863 if (wres != THREAD_WAITING) {
1864 panic("kcall worker unable to assert wait %d", wres);
1865 }
1866
1867 enable_ints_and_unlock(group, s);
1868
1869 thread_block_parameter(continuation: (thread_continue_t)thread_call_thread, parameter: group);
1870 } else {
1871 if (group->idle_count < group->target_thread_count) {
1872 group->idle_count++;
1873
1874 waitq_assert_wait64(waitq: &group->idle_waitq, CAST_EVENT64_T(group), THREAD_UNINT, deadline: 0); /* Interrupted means to exit */
1875
1876 enable_ints_and_unlock(group, s);
1877
1878 thread_block_parameter(continuation: (thread_continue_t)thread_call_thread, parameter: group);
1879 /* NOTREACHED */
1880 }
1881 }
1882
1883 enable_ints_and_unlock(group, s);
1884
1885 thread_terminate(target_act: self);
1886 /* NOTREACHED */
1887}
1888
1889void
1890thread_call_start_iotes_invocation(__assert_only thread_call_t call)
1891{
1892 thread_t self = current_thread();
1893
1894 if ((thread_get_tag_internal(thread: self) & THREAD_TAG_CALLOUT) == 0) {
1895 /* not a thread call thread, might be a workloop IOTES */
1896 return;
1897 }
1898
1899 assert(self->thc_state);
1900 assert(self->thc_state->thc_call == call);
1901
1902 self->thc_state->thc_IOTES_invocation_timestamp = mach_absolute_time();
1903}
1904
1905
1906/*
1907 * thread_call_daemon: walk list of groups, allocating
1908 * threads if appropriate (as determined by
1909 * thread_call_group_should_add_thread()).
1910 */
1911static void
1912thread_call_daemon_continue(__unused void *arg,
1913 __unused wait_result_t w)
1914{
1915 do {
1916 os_atomic_store(&thread_call_daemon_awake, false, relaxed);
1917
1918 for (int i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
1919 thread_call_group_t group = &thread_call_groups[i];
1920
1921 spl_t s = disable_ints_and_lock(group);
1922
1923 while (thread_call_group_should_add_thread(group)) {
1924 group->active_count++;
1925
1926 enable_ints_and_unlock(group, s);
1927
1928 thread_call_thread_create(group);
1929
1930 s = disable_ints_and_lock(group);
1931 }
1932
1933 enable_ints_and_unlock(group, s);
1934 }
1935 } while (os_atomic_load(&thread_call_daemon_awake, relaxed));
1936
1937 waitq_assert_wait64(waitq: &daemon_waitq, CAST_EVENT64_T(&thread_call_daemon_awake), THREAD_UNINT, deadline: 0);
1938
1939 if (os_atomic_load(&thread_call_daemon_awake, relaxed)) {
1940 clear_wait(thread: current_thread(), THREAD_AWAKENED);
1941 }
1942
1943 thread_block_parameter(continuation: thread_call_daemon_continue, NULL);
1944 /* NOTREACHED */
1945}
1946
1947static void
1948thread_call_daemon(
1949 __unused void *arg,
1950 __unused wait_result_t w)
1951{
1952 thread_t self = current_thread();
1953
1954 self->options |= TH_OPT_VMPRIV;
1955 vm_page_free_reserve(pages: 2); /* XXX */
1956
1957 thread_set_thread_name(th: self, name: "thread_call_daemon");
1958
1959 thread_call_daemon_continue(NULL, w: 0);
1960 /* NOTREACHED */
1961}
1962
1963/*
1964 * Schedule timer to deallocate a worker thread if we have a surplus
1965 * of threads (in excess of the group's target) and at least one thread
1966 * is idle the whole time.
1967 */
1968static void
1969thread_call_start_deallocate_timer(thread_call_group_t group)
1970{
1971 __assert_only bool already_enqueued;
1972
1973 assert(group->idle_count > 0);
1974 assert((group->tcg_flags & TCG_DEALLOC_ACTIVE) == 0);
1975
1976 group->tcg_flags |= TCG_DEALLOC_ACTIVE;
1977
1978 uint64_t deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
1979
1980 already_enqueued = timer_call_enter(call: &group->dealloc_timer, deadline, flags: 0);
1981
1982 assert(already_enqueued == false);
1983}
1984
1985/* non-static so dtrace can find it rdar://problem/31156135&31379348 */
1986void
1987thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1)
1988{
1989 thread_call_group_t group = (thread_call_group_t) p0;
1990 thread_call_flavor_t flavor = (thread_call_flavor_t) p1;
1991
1992 assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
1993 assert((group - &thread_call_groups[0]) > THREAD_CALL_INDEX_INVALID);
1994
1995 thread_call_t call;
1996 uint64_t now;
1997
1998 thread_call_lock_spin(group);
1999
2000 if (flavor == TCF_CONTINUOUS) {
2001 now = mach_continuous_time();
2002 } else if (flavor == TCF_ABSOLUTE) {
2003 now = mach_absolute_time();
2004 } else {
2005 panic("invalid timer flavor: %d", flavor);
2006 }
2007
2008 while ((call = priority_queue_min(&group->delayed_pqueues[flavor],
2009 struct thread_call, tc_pqlink)) != NULL) {
2010 assert(thread_call_get_group(call) == group);
2011 assert(thread_call_get_flavor(call) == flavor);
2012
2013 /*
2014 * if we hit a call that isn't yet ready to expire,
2015 * then we're done for now
2016 * TODO: The next timer in the list could have a larger leeway
2017 * and therefore be ready to expire.
2018 */
2019 if (call->tc_soft_deadline > now) {
2020 break;
2021 }
2022
2023 /*
2024 * If we hit a rate-limited timer, don't eagerly wake it up.
2025 * Wait until it reaches the end of the leeway window.
2026 *
2027 * TODO: What if the next timer is not rate-limited?
2028 * Have a separate rate-limited queue to avoid this
2029 */
2030 if ((call->tc_flags & THREAD_CALL_RATELIMITED) &&
2031 (call->tc_pqlink.deadline > now) &&
2032 (ml_timer_forced_evaluation() == FALSE)) {
2033 break;
2034 }
2035
2036 if (THREAD_CALL_SIGNAL & call->tc_flags) {
2037 __assert_only queue_head_t *old_queue;
2038 old_queue = thread_call_dequeue(call);
2039 assert(old_queue == &group->delayed_queues[flavor]);
2040
2041 do {
2042 thread_call_func_t func = call->tc_func;
2043 thread_call_param_t param0 = call->tc_param0;
2044 thread_call_param_t param1 = call->tc_param1;
2045
2046 call->tc_flags |= THREAD_CALL_RUNNING;
2047
2048 thread_call_unlock(group);
2049 thread_call_invoke(func, param0, param1, call);
2050 thread_call_lock_spin(group);
2051
2052 /* finish may detect that the call has been re-pended */
2053 } while (thread_call_finish(call, group, NULL));
2054 /* call may have been freed by the finish */
2055 } else {
2056 _pending_call_enqueue(call, group, now);
2057 }
2058 }
2059
2060 _arm_delayed_call_timer(new_call: call, group, flavor);
2061
2062 thread_call_unlock(group);
2063}
2064
2065static void
2066thread_call_delayed_timer_rescan(thread_call_group_t group,
2067 thread_call_flavor_t flavor)
2068{
2069 thread_call_t call;
2070 uint64_t now;
2071
2072 spl_t s = disable_ints_and_lock(group);
2073
2074 assert(ml_timer_forced_evaluation() == TRUE);
2075
2076 if (flavor == TCF_CONTINUOUS) {
2077 now = mach_continuous_time();
2078 } else {
2079 now = mach_absolute_time();
2080 }
2081
2082 qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_qlink) {
2083 if (call->tc_soft_deadline <= now) {
2084 _pending_call_enqueue(call, group, now);
2085 } else {
2086 uint64_t skew = call->tc_pqlink.deadline - call->tc_soft_deadline;
2087 assert(call->tc_pqlink.deadline >= call->tc_soft_deadline);
2088 /*
2089 * On a latency quality-of-service level change,
2090 * re-sort potentially rate-limited callout. The platform
2091 * layer determines which timers require this.
2092 *
2093 * This trick works by updating the deadline value to
2094 * equal soft-deadline, effectively crushing away
2095 * timer coalescing slop values for any armed
2096 * timer in the queue.
2097 *
2098 * TODO: keep a hint on the timer to tell whether its inputs changed, so we
2099 * only have to crush coalescing for timers that need it.
2100 *
2101 * TODO: Keep a separate queue of timers above the re-sort
2102 * threshold, so we only have to look at those.
2103 */
2104 if (timer_resort_threshold(skew)) {
2105 _call_dequeue(call, group);
2106 _delayed_call_enqueue(call, group, deadline: call->tc_soft_deadline, flavor);
2107 }
2108 }
2109 }
2110
2111 _arm_delayed_call_timer(NULL, group, flavor);
2112
2113 enable_ints_and_unlock(group, s);
2114}
2115
2116void
2117thread_call_delayed_timer_rescan_all(void)
2118{
2119 for (int i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
2120 for (thread_call_flavor_t flavor = 0; flavor < TCF_COUNT; flavor++) {
2121 thread_call_delayed_timer_rescan(group: &thread_call_groups[i], flavor);
2122 }
2123 }
2124}
2125
2126/*
2127 * Timer callback to tell a thread to terminate if
2128 * we have an excess of threads and at least one has been
2129 * idle for a long time.
2130 */
2131static void
2132thread_call_dealloc_timer(
2133 timer_call_param_t p0,
2134 __unused timer_call_param_t p1)
2135{
2136 thread_call_group_t group = (thread_call_group_t)p0;
2137 uint64_t now;
2138 kern_return_t res;
2139 bool terminated = false;
2140
2141 thread_call_lock_spin(group);
2142
2143 assert(group->tcg_flags & TCG_DEALLOC_ACTIVE);
2144
2145 now = mach_absolute_time();
2146
2147 if (group->idle_count > 0) {
2148 if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
2149 terminated = true;
2150 group->idle_count--;
2151 res = waitq_wakeup64_one(waitq: &group->idle_waitq, CAST_EVENT64_T(group),
2152 THREAD_INTERRUPTED, flags: WAITQ_WAKEUP_DEFAULT);
2153 if (res != KERN_SUCCESS) {
2154 panic("Unable to wake up idle thread for termination (%d)", res);
2155 }
2156 }
2157 }
2158
2159 group->tcg_flags &= ~TCG_DEALLOC_ACTIVE;
2160
2161 /*
2162 * If we still have an excess of threads, schedule another
2163 * invocation of this function.
2164 */
2165 if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
2166 /*
2167 * If we killed someone just now, push out the
2168 * next deadline.
2169 */
2170 if (terminated) {
2171 group->idle_timestamp = now;
2172 }
2173
2174 thread_call_start_deallocate_timer(group);
2175 }
2176
2177 thread_call_unlock(group);
2178}
2179
2180/*
2181 * Wait for the invocation of the thread call to complete
2182 * We know there's only one in flight because of the 'once' flag.
2183 *
2184 * If a subsequent invocation comes in before we wake up, that's OK
2185 *
2186 * TODO: Here is where we will add priority inheritance to the thread executing
2187 * the thread call in case it's lower priority than the current thread
2188 * <rdar://problem/30321792> Priority inheritance for thread_call_wait_once
2189 *
2190 * Takes the thread call lock locked, returns unlocked
2191 * This lets us avoid a spurious take/drop after waking up from thread_block
2192 *
2193 * This thread could be a thread call thread itself, blocking and therefore making a
2194 * sched_call upcall into the thread call subsystem, needing the group lock.
2195 * However, we're saved from deadlock because the 'block' upcall is made in
2196 * thread_block, not in assert_wait.
2197 */
2198static bool
2199thread_call_wait_once_locked(thread_call_t call, spl_t s)
2200{
2201 assert(call->tc_flags & THREAD_CALL_ALLOC);
2202 assert(call->tc_flags & THREAD_CALL_ONCE);
2203
2204 thread_call_group_t group = thread_call_get_group(call);
2205
2206 if ((call->tc_flags & THREAD_CALL_RUNNING) == 0) {
2207 enable_ints_and_unlock(group, s);
2208 return false;
2209 }
2210
2211 /* call is running, so we have to wait for it */
2212 call->tc_flags |= THREAD_CALL_WAIT;
2213
2214 wait_result_t res = waitq_assert_wait64(waitq: &group->waiters_waitq, CAST_EVENT64_T(call), THREAD_UNINT, deadline: 0);
2215 if (res != THREAD_WAITING) {
2216 panic("Unable to assert wait: %d", res);
2217 }
2218
2219 enable_ints_and_unlock(group, s);
2220
2221 res = thread_block(THREAD_CONTINUE_NULL);
2222 if (res != THREAD_AWAKENED) {
2223 panic("Awoken with %d?", res);
2224 }
2225
2226 /* returns unlocked */
2227 return true;
2228}
2229
2230/*
2231 * Wait for an in-flight invocation to complete
2232 * Does NOT try to cancel, so the client doesn't need to hold their
2233 * lock while calling this function.
2234 *
2235 * Returns whether or not it had to wait.
2236 *
2237 * Only works for THREAD_CALL_ONCE calls.
2238 */
2239boolean_t
2240thread_call_wait_once(thread_call_t call)
2241{
2242 if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
2243 panic("(%p %p) thread_call_wait_once: can't wait on thread call whose storage I don't own",
2244 call, call->tc_func);
2245 }
2246
2247 if ((call->tc_flags & THREAD_CALL_ONCE) == 0) {
2248 panic("(%p %p) thread_call_wait_once: can't wait_once on a non-once call",
2249 call, call->tc_func);
2250 }
2251
2252 if (!ml_get_interrupts_enabled()) {
2253 panic("(%p %p) unsafe thread_call_wait_once",
2254 call, call->tc_func);
2255 }
2256
2257 thread_t self = current_thread();
2258
2259 if ((thread_get_tag_internal(thread: self) & THREAD_TAG_CALLOUT) &&
2260 self->thc_state && self->thc_state->thc_call == call) {
2261 panic("thread_call_wait_once: deadlock waiting on self from inside call: %p to function %p",
2262 call, call->tc_func);
2263 }
2264
2265 thread_call_group_t group = thread_call_get_group(call);
2266
2267 spl_t s = disable_ints_and_lock(group);
2268
2269 bool waited = thread_call_wait_once_locked(call, s);
2270 /* thread call lock unlocked */
2271
2272 return waited;
2273}
2274
2275
2276/*
2277 * Wait for all requested invocations of a thread call prior to now
2278 * to finish. Can only be invoked on thread calls whose storage we manage.
2279 * Just waits for the finish count to catch up to the submit count we find
2280 * at the beginning of our wait.
2281 *
2282 * Called with thread_call_lock held. Returns with lock released.
2283 */
2284static void
2285thread_call_wait_locked(thread_call_t call, spl_t s)
2286{
2287 thread_call_group_t group = thread_call_get_group(call);
2288
2289 assert(call->tc_flags & THREAD_CALL_ALLOC);
2290
2291 uint64_t submit_count = call->tc_submit_count;
2292
2293 while (call->tc_finish_count < submit_count) {
2294 call->tc_flags |= THREAD_CALL_WAIT;
2295
2296 wait_result_t res = waitq_assert_wait64(waitq: &group->waiters_waitq,
2297 CAST_EVENT64_T(call), THREAD_UNINT, deadline: 0);
2298
2299 if (res != THREAD_WAITING) {
2300 panic("Unable to assert wait: %d", res);
2301 }
2302
2303 enable_ints_and_unlock(group, s);
2304
2305 res = thread_block(THREAD_CONTINUE_NULL);
2306 if (res != THREAD_AWAKENED) {
2307 panic("Awoken with %d?", res);
2308 }
2309
2310 s = disable_ints_and_lock(group);
2311 }
2312
2313 enable_ints_and_unlock(group, s);
2314}
2315
2316/*
2317 * Determine whether a thread call is either on a queue or
2318 * currently being executed.
2319 */
2320boolean_t
2321thread_call_isactive(thread_call_t call)
2322{
2323 thread_call_group_t group = thread_call_get_group(call);
2324
2325 spl_t s = disable_ints_and_lock(group);
2326 boolean_t active = (call->tc_submit_count > call->tc_finish_count);
2327 enable_ints_and_unlock(group, s);
2328
2329 return active;
2330}
2331
2332/*
2333 * adjust_cont_time_thread_calls
2334 * on wake, reenqueue delayed call timer for continuous time thread call groups
2335 */
2336void
2337adjust_cont_time_thread_calls(void)
2338{
2339 for (int i = THREAD_CALL_INDEX_HIGH; i < THREAD_CALL_INDEX_MAX; i++) {
2340 thread_call_group_t group = &thread_call_groups[i];
2341 spl_t s = disable_ints_and_lock(group);
2342
2343 /* only the continuous timers need to be re-armed */
2344
2345 _arm_delayed_call_timer(NULL, group, flavor: TCF_CONTINUOUS);
2346 enable_ints_and_unlock(group, s);
2347 }
2348}
2349