1/*
2 * Copyright (c) 1993-1995, 1999-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <mach/mach_types.h>
30#include <mach/thread_act.h>
31
32#include <kern/kern_types.h>
33#include <kern/zalloc.h>
34#include <kern/sched_prim.h>
35#include <kern/clock.h>
36#include <kern/task.h>
37#include <kern/thread.h>
38#include <kern/waitq.h>
39#include <kern/ledger.h>
40#include <kern/policy_internal.h>
41
42#include <vm/vm_pageout.h>
43
44#include <kern/thread_call.h>
45#include <kern/call_entry.h>
46#include <kern/timer_call.h>
47
48#include <libkern/OSAtomic.h>
49#include <kern/timer_queue.h>
50
51#include <sys/kdebug.h>
52#if CONFIG_DTRACE
53#include <mach/sdt.h>
54#endif
55#include <machine/machine_routines.h>
56
57static zone_t thread_call_zone;
58static struct waitq daemon_waitq;
59
60typedef enum {
61 TCF_ABSOLUTE = 0,
62 TCF_CONTINUOUS = 1,
63 TCF_COUNT = 2,
64} thread_call_flavor_t;
65
66typedef enum {
67 TCG_NONE = 0x0,
68 TCG_PARALLEL = 0x1,
69 TCG_DEALLOC_ACTIVE = 0x2,
70} thread_call_group_flags_t;
71
72static struct thread_call_group {
73 const char * tcg_name;
74
75 queue_head_t pending_queue;
76 uint32_t pending_count;
77
78 queue_head_t delayed_queues[TCF_COUNT];
79 timer_call_data_t delayed_timers[TCF_COUNT];
80
81 timer_call_data_t dealloc_timer;
82
83 struct waitq idle_waitq;
84 uint32_t idle_count, active_count, blocked_count;
85
86 uint32_t tcg_thread_pri;
87 uint32_t target_thread_count;
88 uint64_t idle_timestamp;
89
90 thread_call_group_flags_t flags;
91
92} thread_call_groups[THREAD_CALL_INDEX_MAX] = {
93 [THREAD_CALL_INDEX_HIGH] = {
94 .tcg_name = "high",
95 .tcg_thread_pri = BASEPRI_PREEMPT_HIGH,
96 .target_thread_count = 4,
97 .flags = TCG_NONE,
98 },
99 [THREAD_CALL_INDEX_KERNEL] = {
100 .tcg_name = "kernel",
101 .tcg_thread_pri = BASEPRI_KERNEL,
102 .target_thread_count = 1,
103 .flags = TCG_PARALLEL,
104 },
105 [THREAD_CALL_INDEX_USER] = {
106 .tcg_name = "user",
107 .tcg_thread_pri = BASEPRI_DEFAULT,
108 .target_thread_count = 1,
109 .flags = TCG_PARALLEL,
110 },
111 [THREAD_CALL_INDEX_LOW] = {
112 .tcg_name = "low",
113 .tcg_thread_pri = MAXPRI_THROTTLE,
114 .target_thread_count = 1,
115 .flags = TCG_PARALLEL,
116 },
117 [THREAD_CALL_INDEX_KERNEL_HIGH] = {
118 .tcg_name = "kernel-high",
119 .tcg_thread_pri = BASEPRI_PREEMPT,
120 .target_thread_count = 2,
121 .flags = TCG_NONE,
122 },
123 [THREAD_CALL_INDEX_QOS_UI] = {
124 .tcg_name = "qos-ui",
125 .tcg_thread_pri = BASEPRI_FOREGROUND,
126 .target_thread_count = 1,
127 .flags = TCG_NONE,
128 },
129 [THREAD_CALL_INDEX_QOS_IN] = {
130 .tcg_name = "qos-in",
131 .tcg_thread_pri = BASEPRI_USER_INITIATED,
132 .target_thread_count = 1,
133 .flags = TCG_NONE,
134 },
135 [THREAD_CALL_INDEX_QOS_UT] = {
136 .tcg_name = "qos-ut",
137 .tcg_thread_pri = BASEPRI_UTILITY,
138 .target_thread_count = 1,
139 .flags = TCG_NONE,
140 },
141};
142
143typedef struct thread_call_group *thread_call_group_t;
144
145#define INTERNAL_CALL_COUNT 768
146#define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * NSEC_PER_MSEC) /* 5 ms */
147#define THREAD_CALL_ADD_RATIO 4
148#define THREAD_CALL_MACH_FACTOR_CAP 3
149#define THREAD_CALL_GROUP_MAX_THREADS 500
150
151static boolean_t thread_call_daemon_awake;
152static thread_call_data_t internal_call_storage[INTERNAL_CALL_COUNT];
153static queue_head_t thread_call_internal_queue;
154int thread_call_internal_queue_count = 0;
155static uint64_t thread_call_dealloc_interval_abs;
156
157static __inline__ thread_call_t _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0);
158static __inline__ void _internal_call_release(thread_call_t call);
159static __inline__ boolean_t _pending_call_enqueue(thread_call_t call, thread_call_group_t group);
160static boolean_t _delayed_call_enqueue(thread_call_t call, thread_call_group_t group,
161 uint64_t deadline, thread_call_flavor_t flavor);
162static __inline__ boolean_t _call_dequeue(thread_call_t call, thread_call_group_t group);
163static __inline__ void thread_call_wake(thread_call_group_t group);
164static void thread_call_daemon(void *arg);
165static void thread_call_thread(thread_call_group_t group, wait_result_t wres);
166static void thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
167static void thread_call_group_setup(thread_call_group_t group);
168static void sched_call_thread(int type, thread_t thread);
169static void thread_call_start_deallocate_timer(thread_call_group_t group);
170static void thread_call_wait_locked(thread_call_t call, spl_t s);
171static boolean_t thread_call_wait_once_locked(thread_call_t call, spl_t s);
172
173static boolean_t thread_call_enter_delayed_internal(thread_call_t call,
174 thread_call_func_t alt_func, thread_call_param_t alt_param0,
175 thread_call_param_t param1, uint64_t deadline,
176 uint64_t leeway, unsigned int flags);
177
178/* non-static so dtrace can find it rdar://problem/31156135&31379348 */
179extern void thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
180
181lck_grp_t thread_call_lck_grp;
182lck_mtx_t thread_call_lock_data;
183
184#define thread_call_lock_spin() \
185 lck_mtx_lock_spin_always(&thread_call_lock_data)
186
187#define thread_call_unlock() \
188 lck_mtx_unlock_always(&thread_call_lock_data)
189
190#define tc_deadline tc_call.deadline
191
192extern boolean_t mach_timer_coalescing_enabled;
193
194static inline spl_t
195disable_ints_and_lock(void)
196{
197 spl_t s = splsched();
198 thread_call_lock_spin();
199
200 return s;
201}
202
203static inline void
204enable_ints_and_unlock(spl_t s)
205{
206 thread_call_unlock();
207 splx(s);
208}
209
210static inline boolean_t
211group_isparallel(thread_call_group_t group)
212{
213 return ((group->flags & TCG_PARALLEL) != 0);
214}
215
216static boolean_t
217thread_call_group_should_add_thread(thread_call_group_t group)
218{
219 if ((group->active_count + group->blocked_count + group->idle_count) >= THREAD_CALL_GROUP_MAX_THREADS) {
220 panic("thread_call group '%s' reached max thread cap (%d): active: %d, blocked: %d, idle: %d",
221 group->tcg_name, THREAD_CALL_GROUP_MAX_THREADS,
222 group->active_count, group->blocked_count, group->idle_count);
223 }
224
225 if (group_isparallel(group) == FALSE) {
226 if (group->pending_count > 0 && group->active_count == 0) {
227 return TRUE;
228 }
229
230 return FALSE;
231 }
232
233 if (group->pending_count > 0) {
234 if (group->idle_count > 0) {
235 return FALSE;
236 }
237
238 uint32_t thread_count = group->active_count;
239
240 /*
241 * Add a thread if either there are no threads,
242 * the group has fewer than its target number of
243 * threads, or the amount of work is large relative
244 * to the number of threads. In the last case, pay attention
245 * to the total load on the system, and back off if
246 * it's high.
247 */
248 if ((thread_count == 0) ||
249 (thread_count < group->target_thread_count) ||
250 ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
251 (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
252 return TRUE;
253 }
254 }
255
256 return FALSE;
257}
258
259/* Lock held */
260static inline thread_call_group_t
261thread_call_get_group(thread_call_t call)
262{
263 thread_call_index_t index = call->tc_index;
264
265 assert(index >= 0 && index < THREAD_CALL_INDEX_MAX);
266
267 return &thread_call_groups[index];
268}
269
270/* Lock held */
271static inline thread_call_flavor_t
272thread_call_get_flavor(thread_call_t call)
273{
274 return (call->tc_flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
275}
276
277static void
278thread_call_group_setup(thread_call_group_t group)
279{
280 queue_init(&group->pending_queue);
281 queue_init(&group->delayed_queues[TCF_ABSOLUTE]);
282 queue_init(&group->delayed_queues[TCF_CONTINUOUS]);
283
284 /* TODO: Consolidate to one hard timer for each group */
285 timer_call_setup(&group->delayed_timers[TCF_ABSOLUTE], thread_call_delayed_timer, group);
286 timer_call_setup(&group->delayed_timers[TCF_CONTINUOUS], thread_call_delayed_timer, group);
287 timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
288
289 /* Reverse the wait order so we re-use the most recently parked thread from the pool */
290 waitq_init(&group->idle_waitq, SYNC_POLICY_REVERSED|SYNC_POLICY_DISABLE_IRQ);
291}
292
293/*
294 * Simple wrapper for creating threads bound to
295 * thread call groups.
296 */
297static kern_return_t
298thread_call_thread_create(
299 thread_call_group_t group)
300{
301 thread_t thread;
302 kern_return_t result;
303
304 int thread_pri = group->tcg_thread_pri;
305
306 result = kernel_thread_start_priority((thread_continue_t)thread_call_thread,
307 group, thread_pri, &thread);
308 if (result != KERN_SUCCESS) {
309 return result;
310 }
311
312 if (thread_pri <= BASEPRI_KERNEL) {
313 /*
314 * THREAD_CALL_PRIORITY_KERNEL and lower don't get to run to completion
315 * in kernel if there are higher priority threads available.
316 */
317 thread_set_eager_preempt(thread);
318 }
319
320 char name[MAXTHREADNAMESIZE] = "";
321
322 int group_thread_count = group->idle_count + group->active_count + group->blocked_count;
323
324 snprintf(name, sizeof(name), "thread call %s #%d", group->tcg_name, group_thread_count);
325 thread_set_thread_name(thread, name);
326
327 thread_deallocate(thread);
328 return KERN_SUCCESS;
329}
330
331/*
332 * thread_call_initialize:
333 *
334 * Initialize this module, called
335 * early during system initialization.
336 */
337void
338thread_call_initialize(void)
339{
340 int tc_size = sizeof (thread_call_data_t);
341 thread_call_zone = zinit(tc_size, 4096 * tc_size, 16 * tc_size, "thread_call");
342 zone_change(thread_call_zone, Z_CALLERACCT, FALSE);
343 zone_change(thread_call_zone, Z_NOENCRYPT, TRUE);
344
345 lck_grp_init(&thread_call_lck_grp, "thread_call", LCK_GRP_ATTR_NULL);
346 lck_mtx_init(&thread_call_lock_data, &thread_call_lck_grp, LCK_ATTR_NULL);
347
348 nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
349 waitq_init(&daemon_waitq, SYNC_POLICY_DISABLE_IRQ | SYNC_POLICY_FIFO);
350
351 for (uint32_t i = 0; i < THREAD_CALL_INDEX_MAX; i++)
352 thread_call_group_setup(&thread_call_groups[i]);
353
354 spl_t s = disable_ints_and_lock();
355
356 queue_init(&thread_call_internal_queue);
357 for (
358 thread_call_t call = internal_call_storage;
359 call < &internal_call_storage[INTERNAL_CALL_COUNT];
360 call++) {
361
362 enqueue_tail(&thread_call_internal_queue, &call->tc_call.q_link);
363 thread_call_internal_queue_count++;
364 }
365
366 thread_call_daemon_awake = TRUE;
367
368 enable_ints_and_unlock(s);
369
370 thread_t thread;
371 kern_return_t result;
372
373 result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon,
374 NULL, BASEPRI_PREEMPT_HIGH + 1, &thread);
375 if (result != KERN_SUCCESS)
376 panic("thread_call_initialize");
377
378 thread_deallocate(thread);
379}
380
381void
382thread_call_setup(
383 thread_call_t call,
384 thread_call_func_t func,
385 thread_call_param_t param0)
386{
387 bzero(call, sizeof(*call));
388 call_entry_setup((call_entry_t)call, func, param0);
389
390 /* Thread calls default to the HIGH group unless otherwise specified */
391 call->tc_index = THREAD_CALL_INDEX_HIGH;
392
393 /* THREAD_CALL_ALLOC not set, memory owned by caller */
394}
395
396/*
397 * _internal_call_allocate:
398 *
399 * Allocate an internal callout entry.
400 *
401 * Called with thread_call_lock held.
402 */
403static __inline__ thread_call_t
404_internal_call_allocate(thread_call_func_t func, thread_call_param_t param0)
405{
406 thread_call_t call;
407
408 if (queue_empty(&thread_call_internal_queue))
409 panic("_internal_call_allocate");
410
411 call = qe_dequeue_head(&thread_call_internal_queue, struct thread_call, tc_call.q_link);
412
413 thread_call_internal_queue_count--;
414
415 thread_call_setup(call, func, param0);
416 call->tc_refs = 0;
417 call->tc_flags = 0; /* THREAD_CALL_ALLOC not set, do not free back to zone */
418
419 return (call);
420}
421
422/*
423 * _internal_call_release:
424 *
425 * Release an internal callout entry which
426 * is no longer pending (or delayed). This is
427 * safe to call on a non-internal entry, in which
428 * case nothing happens.
429 *
430 * Called with thread_call_lock held.
431 */
432static __inline__ void
433_internal_call_release(thread_call_t call)
434{
435 if (call >= internal_call_storage &&
436 call < &internal_call_storage[INTERNAL_CALL_COUNT]) {
437 assert((call->tc_flags & THREAD_CALL_ALLOC) == 0);
438 enqueue_head(&thread_call_internal_queue, &call->tc_call.q_link);
439 thread_call_internal_queue_count++;
440 }
441}
442
443/*
444 * _pending_call_enqueue:
445 *
446 * Place an entry at the end of the
447 * pending queue, to be executed soon.
448 *
449 * Returns TRUE if the entry was already
450 * on a queue.
451 *
452 * Called with thread_call_lock held.
453 */
454static __inline__ boolean_t
455_pending_call_enqueue(thread_call_t call,
456 thread_call_group_t group)
457{
458 if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
459 == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
460 call->tc_deadline = 0;
461
462 uint32_t flags = call->tc_flags;
463 call->tc_flags |= THREAD_CALL_RESCHEDULE;
464
465 if ((flags & THREAD_CALL_RESCHEDULE) != 0)
466 return (TRUE);
467 else
468 return (FALSE);
469 }
470
471 queue_head_t *old_queue = call_entry_enqueue_tail(CE(call), &group->pending_queue);
472
473 if (old_queue == NULL) {
474 call->tc_submit_count++;
475 } else if (old_queue != &group->pending_queue &&
476 old_queue != &group->delayed_queues[TCF_ABSOLUTE] &&
477 old_queue != &group->delayed_queues[TCF_CONTINUOUS]) {
478 panic("tried to move a thread call (%p) between groups (old_queue: %p)", call, old_queue);
479 }
480
481 group->pending_count++;
482
483 thread_call_wake(group);
484
485 return (old_queue != NULL);
486}
487
488/*
489 * _delayed_call_enqueue:
490 *
491 * Place an entry on the delayed queue,
492 * after existing entries with an earlier
493 * (or identical) deadline.
494 *
495 * Returns TRUE if the entry was already
496 * on a queue.
497 *
498 * Called with thread_call_lock held.
499 */
500static boolean_t
501_delayed_call_enqueue(
502 thread_call_t call,
503 thread_call_group_t group,
504 uint64_t deadline,
505 thread_call_flavor_t flavor)
506{
507 if ((THREAD_CALL_ONCE | THREAD_CALL_RUNNING)
508 == (call->tc_flags & (THREAD_CALL_ONCE | THREAD_CALL_RUNNING))) {
509 call->tc_deadline = deadline;
510
511 uint32_t flags = call->tc_flags;
512 call->tc_flags |= THREAD_CALL_RESCHEDULE;
513
514 if ((flags & THREAD_CALL_RESCHEDULE) != 0)
515 return (TRUE);
516 else
517 return (FALSE);
518 }
519
520 queue_head_t *old_queue = call_entry_enqueue_deadline(CE(call),
521 &group->delayed_queues[flavor],
522 deadline);
523
524 if (old_queue == &group->pending_queue) {
525 group->pending_count--;
526 } else if (old_queue == NULL) {
527 call->tc_submit_count++;
528 } else if (old_queue == &group->delayed_queues[TCF_ABSOLUTE] ||
529 old_queue == &group->delayed_queues[TCF_CONTINUOUS]) {
530 /* TODO: if it's in the other delayed queue, that might not be OK */
531 // we did nothing, and that's fine
532 } else {
533 panic("tried to move a thread call (%p) between groups (old_queue: %p)", call, old_queue);
534 }
535
536 return (old_queue != NULL);
537}
538
539/*
540 * _call_dequeue:
541 *
542 * Remove an entry from a queue.
543 *
544 * Returns TRUE if the entry was on a queue.
545 *
546 * Called with thread_call_lock held.
547 */
548static __inline__ boolean_t
549_call_dequeue(
550 thread_call_t call,
551 thread_call_group_t group)
552{
553 queue_head_t *old_queue;
554
555 old_queue = call_entry_dequeue(CE(call));
556
557 if (old_queue != NULL) {
558 assert(old_queue == &group->pending_queue ||
559 old_queue == &group->delayed_queues[TCF_ABSOLUTE] ||
560 old_queue == &group->delayed_queues[TCF_CONTINUOUS]);
561
562 call->tc_finish_count++;
563 if (old_queue == &group->pending_queue)
564 group->pending_count--;
565 }
566
567 return (old_queue != NULL);
568}
569
570/*
571 * _arm_delayed_call_timer:
572 *
573 * Check if the timer needs to be armed for this flavor,
574 * and if so, arm it.
575 *
576 * If call is non-NULL, only re-arm the timer if the specified call
577 * is the first in the queue.
578 *
579 * Returns true if the timer was armed/re-armed, false if it was left unset
580 * Caller should cancel the timer if need be.
581 *
582 * Called with thread_call_lock held.
583 */
584static bool
585_arm_delayed_call_timer(thread_call_t new_call,
586 thread_call_group_t group,
587 thread_call_flavor_t flavor)
588{
589 /* No calls implies no timer needed */
590 if (queue_empty(&group->delayed_queues[flavor]))
591 return false;
592
593 thread_call_t call = qe_queue_first(&group->delayed_queues[flavor], struct thread_call, tc_call.q_link);
594
595 /* We only need to change the hard timer if this new call is the first in the list */
596 if (new_call != NULL && new_call != call)
597 return false;
598
599 assert((call->tc_soft_deadline != 0) && ((call->tc_soft_deadline <= call->tc_call.deadline)));
600
601 uint64_t fire_at = call->tc_soft_deadline;
602
603 if (flavor == TCF_CONTINUOUS) {
604 assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == THREAD_CALL_CONTINUOUS);
605 fire_at = continuoustime_to_absolutetime(fire_at);
606 } else {
607 assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == 0);
608 }
609
610 /*
611 * Note: This picks the soonest-deadline call's leeway as the hard timer's leeway,
612 * which does not take into account later-deadline timers with a larger leeway.
613 * This is a valid coalescing behavior, but masks a possible window to
614 * fire a timer instead of going idle.
615 */
616 uint64_t leeway = call->tc_call.deadline - call->tc_soft_deadline;
617
618 timer_call_enter_with_leeway(&group->delayed_timers[flavor], (timer_call_param_t)flavor,
619 fire_at, leeway,
620 TIMER_CALL_SYS_CRITICAL|TIMER_CALL_LEEWAY,
621 ((call->tc_flags & THREAD_CALL_RATELIMITED) == THREAD_CALL_RATELIMITED));
622
623 return true;
624}
625
626/*
627 * _cancel_func_from_queue:
628 *
629 * Remove the first (or all) matching
630 * entries from the specified queue.
631 *
632 * Returns TRUE if any matching entries
633 * were found.
634 *
635 * Called with thread_call_lock held.
636 */
637static boolean_t
638_cancel_func_from_queue(thread_call_func_t func,
639 thread_call_param_t param0,
640 thread_call_group_t group,
641 boolean_t remove_all,
642 queue_head_t *queue)
643{
644 boolean_t call_removed = FALSE;
645 thread_call_t call;
646
647 qe_foreach_element_safe(call, queue, tc_call.q_link) {
648 if (call->tc_call.func != func ||
649 call->tc_call.param0 != param0) {
650 continue;
651 }
652
653 _call_dequeue(call, group);
654
655 _internal_call_release(call);
656
657 call_removed = TRUE;
658 if (!remove_all)
659 break;
660 }
661
662 return (call_removed);
663}
664
665/*
666 * thread_call_func_delayed:
667 *
668 * Enqueue a function callout to
669 * occur at the stated time.
670 */
671void
672thread_call_func_delayed(
673 thread_call_func_t func,
674 thread_call_param_t param,
675 uint64_t deadline)
676{
677 (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, 0, 0);
678}
679
680/*
681 * thread_call_func_delayed_with_leeway:
682 *
683 * Same as thread_call_func_delayed(), but with
684 * leeway/flags threaded through.
685 */
686
687void
688thread_call_func_delayed_with_leeway(
689 thread_call_func_t func,
690 thread_call_param_t param,
691 uint64_t deadline,
692 uint64_t leeway,
693 uint32_t flags)
694{
695 (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, leeway, flags);
696}
697
698/*
699 * thread_call_func_cancel:
700 *
701 * Dequeue a function callout.
702 *
703 * Removes one (or all) { function, argument }
704 * instance(s) from either (or both)
705 * the pending and the delayed queue,
706 * in that order.
707 *
708 * Returns TRUE if any calls were cancelled.
709 *
710 * This iterates all of the pending or delayed thread calls in the group,
711 * which is really inefficient. Switch to an allocated thread call instead.
712 */
713boolean_t
714thread_call_func_cancel(
715 thread_call_func_t func,
716 thread_call_param_t param,
717 boolean_t cancel_all)
718{
719 boolean_t result;
720
721 assert(func != NULL);
722
723 spl_t s = disable_ints_and_lock();
724
725 /* Function-only thread calls are only kept in the default HIGH group */
726 thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
727
728 if (cancel_all) {
729 /* exhaustively search every queue, and return true if any search found something */
730 result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) |
731 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) |
732 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
733 } else {
734 /* early-exit as soon as we find something, don't search other queues */
735 result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) ||
736 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) ||
737 _cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
738 }
739
740 enable_ints_and_unlock(s);
741
742 return (result);
743}
744
745/*
746 * Allocate a thread call with a given priority. Importances other than
747 * THREAD_CALL_PRIORITY_HIGH or THREAD_CALL_PRIORITY_KERNEL_HIGH will be run in threads
748 * with eager preemption enabled (i.e. may be aggressively preempted by higher-priority
749 * threads which are not in the normal "urgent" bands).
750 */
751thread_call_t
752thread_call_allocate_with_priority(
753 thread_call_func_t func,
754 thread_call_param_t param0,
755 thread_call_priority_t pri)
756{
757 return thread_call_allocate_with_options(func, param0, pri, 0);
758}
759
760thread_call_t
761thread_call_allocate_with_options(
762 thread_call_func_t func,
763 thread_call_param_t param0,
764 thread_call_priority_t pri,
765 thread_call_options_t options)
766{
767 thread_call_t call = thread_call_allocate(func, param0);
768
769 switch (pri) {
770 case THREAD_CALL_PRIORITY_HIGH:
771 call->tc_index = THREAD_CALL_INDEX_HIGH;
772 break;
773 case THREAD_CALL_PRIORITY_KERNEL:
774 call->tc_index = THREAD_CALL_INDEX_KERNEL;
775 break;
776 case THREAD_CALL_PRIORITY_USER:
777 call->tc_index = THREAD_CALL_INDEX_USER;
778 break;
779 case THREAD_CALL_PRIORITY_LOW:
780 call->tc_index = THREAD_CALL_INDEX_LOW;
781 break;
782 case THREAD_CALL_PRIORITY_KERNEL_HIGH:
783 call->tc_index = THREAD_CALL_INDEX_KERNEL_HIGH;
784 break;
785 default:
786 panic("Invalid thread call pri value: %d", pri);
787 break;
788 }
789
790 if (options & THREAD_CALL_OPTIONS_ONCE) {
791 call->tc_flags |= THREAD_CALL_ONCE;
792 }
793 if (options & THREAD_CALL_OPTIONS_SIGNAL) {
794 call->tc_flags |= THREAD_CALL_SIGNAL | THREAD_CALL_ONCE;
795 }
796
797 return call;
798}
799
800thread_call_t
801thread_call_allocate_with_qos(thread_call_func_t func,
802 thread_call_param_t param0,
803 int qos_tier,
804 thread_call_options_t options)
805{
806 thread_call_t call = thread_call_allocate(func, param0);
807
808 switch (qos_tier) {
809 case THREAD_QOS_UNSPECIFIED:
810 call->tc_index = THREAD_CALL_INDEX_HIGH;
811 break;
812 case THREAD_QOS_LEGACY:
813 call->tc_index = THREAD_CALL_INDEX_USER;
814 break;
815 case THREAD_QOS_MAINTENANCE:
816 case THREAD_QOS_BACKGROUND:
817 call->tc_index = THREAD_CALL_INDEX_LOW;
818 break;
819 case THREAD_QOS_UTILITY:
820 call->tc_index = THREAD_CALL_INDEX_QOS_UT;
821 break;
822 case THREAD_QOS_USER_INITIATED:
823 call->tc_index = THREAD_CALL_INDEX_QOS_IN;
824 break;
825 case THREAD_QOS_USER_INTERACTIVE:
826 call->tc_index = THREAD_CALL_INDEX_QOS_UI;
827 break;
828 default:
829 panic("Invalid thread call qos value: %d", qos_tier);
830 break;
831 }
832
833 if (options & THREAD_CALL_OPTIONS_ONCE)
834 call->tc_flags |= THREAD_CALL_ONCE;
835
836 /* does not support THREAD_CALL_OPTIONS_SIGNAL */
837
838 return call;
839}
840
841
842/*
843 * thread_call_allocate:
844 *
845 * Allocate a callout entry.
846 */
847thread_call_t
848thread_call_allocate(
849 thread_call_func_t func,
850 thread_call_param_t param0)
851{
852 thread_call_t call = zalloc(thread_call_zone);
853
854 thread_call_setup(call, func, param0);
855 call->tc_refs = 1;
856 call->tc_flags = THREAD_CALL_ALLOC;
857
858 return (call);
859}
860
861/*
862 * thread_call_free:
863 *
864 * Release a callout. If the callout is currently
865 * executing, it will be freed when all invocations
866 * finish.
867 *
868 * If the callout is currently armed to fire again, then
869 * freeing is not allowed and returns FALSE. The
870 * client must have canceled the pending invocation before freeing.
871 */
872boolean_t
873thread_call_free(
874 thread_call_t call)
875{
876 spl_t s = disable_ints_and_lock();
877
878 if (call->tc_call.queue != NULL ||
879 ((call->tc_flags & THREAD_CALL_RESCHEDULE) != 0)) {
880 thread_call_unlock();
881 splx(s);
882
883 return (FALSE);
884 }
885
886 int32_t refs = --call->tc_refs;
887 if (refs < 0) {
888 panic("Refcount negative: %d\n", refs);
889 }
890
891 if ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING)
892 == ((THREAD_CALL_SIGNAL | THREAD_CALL_RUNNING) & call->tc_flags)) {
893 thread_call_wait_once_locked(call, s);
894 /* thread call lock has been unlocked */
895 } else {
896 enable_ints_and_unlock(s);
897 }
898
899 if (refs == 0) {
900 assert(call->tc_finish_count == call->tc_submit_count);
901 zfree(thread_call_zone, call);
902 }
903
904 return (TRUE);
905}
906
907/*
908 * thread_call_enter:
909 *
910 * Enqueue a callout entry to occur "soon".
911 *
912 * Returns TRUE if the call was
913 * already on a queue.
914 */
915boolean_t
916thread_call_enter(
917 thread_call_t call)
918{
919 return thread_call_enter1(call, 0);
920}
921
922boolean_t
923thread_call_enter1(
924 thread_call_t call,
925 thread_call_param_t param1)
926{
927 boolean_t result = TRUE;
928 thread_call_group_t group;
929
930 assert(call->tc_call.func != NULL);
931
932 assert((call->tc_flags & THREAD_CALL_SIGNAL) == 0);
933
934 group = thread_call_get_group(call);
935
936 spl_t s = disable_ints_and_lock();
937
938 if (call->tc_call.queue != &group->pending_queue) {
939 result = _pending_call_enqueue(call, group);
940 }
941
942 call->tc_call.param1 = param1;
943
944 enable_ints_and_unlock(s);
945
946 return (result);
947}
948
949/*
950 * thread_call_enter_delayed:
951 *
952 * Enqueue a callout entry to occur
953 * at the stated time.
954 *
955 * Returns TRUE if the call was
956 * already on a queue.
957 */
958boolean_t
959thread_call_enter_delayed(
960 thread_call_t call,
961 uint64_t deadline)
962{
963 assert(call != NULL);
964 return thread_call_enter_delayed_internal(call, NULL, 0, 0, deadline, 0, 0);
965}
966
967boolean_t
968thread_call_enter1_delayed(
969 thread_call_t call,
970 thread_call_param_t param1,
971 uint64_t deadline)
972{
973 assert(call != NULL);
974 return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, 0, 0);
975}
976
977boolean_t
978thread_call_enter_delayed_with_leeway(
979 thread_call_t call,
980 thread_call_param_t param1,
981 uint64_t deadline,
982 uint64_t leeway,
983 unsigned int flags)
984{
985 assert(call != NULL);
986 return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, leeway, flags);
987}
988
989
990/*
991 * thread_call_enter_delayed_internal:
992 * enqueue a callout entry to occur at the stated time
993 *
994 * Returns True if the call was already on a queue
995 * params:
996 * call - structure encapsulating state of the callout
997 * alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
998 * deadline - time deadline in nanoseconds
999 * leeway - timer slack represented as delta of deadline.
1000 * flags - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
1001 * THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
1002 * THREAD_CALL_CONTINUOUS: thread call will be called according to mach_continuous_time rather
1003 * than mach_absolute_time
1004 */
1005boolean_t
1006thread_call_enter_delayed_internal(
1007 thread_call_t call,
1008 thread_call_func_t alt_func,
1009 thread_call_param_t alt_param0,
1010 thread_call_param_t param1,
1011 uint64_t deadline,
1012 uint64_t leeway,
1013 unsigned int flags)
1014{
1015 boolean_t result = TRUE;
1016 thread_call_group_t group;
1017 uint64_t now, sdeadline, slop;
1018 uint32_t urgency;
1019
1020 thread_call_flavor_t flavor = (flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
1021
1022 /* direct mapping between thread_call, timer_call, and timeout_urgency values */
1023 urgency = (flags & TIMEOUT_URGENCY_MASK);
1024
1025 spl_t s = disable_ints_and_lock();
1026
1027 if (call == NULL) {
1028 /* allocate a structure out of internal storage, as a convenience for BSD callers */
1029 call = _internal_call_allocate(alt_func, alt_param0);
1030 }
1031
1032 assert(call->tc_call.func != NULL);
1033 group = thread_call_get_group(call);
1034
1035 /* TODO: assert that call is not enqueued before flipping the flag */
1036 if (flavor == TCF_CONTINUOUS) {
1037 now = mach_continuous_time();
1038 call->tc_flags |= THREAD_CALL_CONTINUOUS;
1039 } else {
1040 now = mach_absolute_time();
1041 call->tc_flags &= ~THREAD_CALL_CONTINUOUS;
1042 }
1043
1044 call->tc_flags |= THREAD_CALL_DELAYED;
1045
1046 call->tc_soft_deadline = sdeadline = deadline;
1047
1048 boolean_t ratelimited = FALSE;
1049 slop = timer_call_slop(deadline, now, urgency, current_thread(), &ratelimited);
1050
1051 if ((flags & THREAD_CALL_DELAY_LEEWAY) != 0 && leeway > slop)
1052 slop = leeway;
1053
1054 if (UINT64_MAX - deadline <= slop)
1055 deadline = UINT64_MAX;
1056 else
1057 deadline += slop;
1058
1059 if (ratelimited) {
1060 call->tc_flags |= TIMER_CALL_RATELIMITED;
1061 } else {
1062 call->tc_flags &= ~TIMER_CALL_RATELIMITED;
1063 }
1064
1065 call->tc_call.param1 = param1;
1066
1067 call->tc_ttd = (sdeadline > now) ? (sdeadline - now) : 0;
1068
1069 result = _delayed_call_enqueue(call, group, deadline, flavor);
1070
1071 _arm_delayed_call_timer(call, group, flavor);
1072
1073#if CONFIG_DTRACE
1074 DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_call.func,
1075 uint64_t, (deadline - sdeadline), uint64_t, (call->tc_ttd >> 32),
1076 (unsigned) (call->tc_ttd & 0xFFFFFFFF), call);
1077#endif
1078
1079 enable_ints_and_unlock(s);
1080
1081 return (result);
1082}
1083
1084/*
1085 * Remove a callout entry from the queue
1086 * Called with thread_call_lock held
1087 */
1088static boolean_t
1089thread_call_cancel_locked(thread_call_t call)
1090{
1091 boolean_t canceled = (0 != (THREAD_CALL_RESCHEDULE & call->tc_flags));
1092 call->tc_flags &= ~THREAD_CALL_RESCHEDULE;
1093
1094 if (canceled) {
1095 /* if reschedule was set, it must not have been queued */
1096 assert(call->tc_call.queue == NULL);
1097 } else {
1098 boolean_t do_cancel_callout = FALSE;
1099
1100 thread_call_flavor_t flavor = thread_call_get_flavor(call);
1101 thread_call_group_t group = thread_call_get_group(call);
1102
1103 if ((call->tc_call.deadline != 0) &&
1104 (call == qe_queue_first(&group->delayed_queues[flavor], struct thread_call, tc_call.q_link))) {
1105 assert(call->tc_call.queue == &group->delayed_queues[flavor]);
1106 do_cancel_callout = TRUE;
1107 }
1108
1109 canceled = _call_dequeue(call, group);
1110
1111 if (do_cancel_callout) {
1112 if (_arm_delayed_call_timer(NULL, group, flavor) == false)
1113 timer_call_cancel(&group->delayed_timers[flavor]);
1114 }
1115 }
1116
1117#if CONFIG_DTRACE
1118 DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_call.func,
1119 0, (call->tc_ttd >> 32), (unsigned) (call->tc_ttd & 0xFFFFFFFF));
1120#endif
1121
1122 return canceled;
1123}
1124
1125/*
1126 * thread_call_cancel:
1127 *
1128 * Dequeue a callout entry.
1129 *
1130 * Returns TRUE if the call was
1131 * on a queue.
1132 */
1133boolean_t
1134thread_call_cancel(thread_call_t call)
1135{
1136 spl_t s = disable_ints_and_lock();
1137
1138 boolean_t result = thread_call_cancel_locked(call);
1139
1140 enable_ints_and_unlock(s);
1141
1142 return result;
1143}
1144
1145/*
1146 * Cancel a thread call. If it cannot be cancelled (i.e.
1147 * is already in flight), waits for the most recent invocation
1148 * to finish. Note that if clients re-submit this thread call,
1149 * it may still be pending or in flight when thread_call_cancel_wait
1150 * returns, but all requests to execute this work item prior
1151 * to the call to thread_call_cancel_wait will have finished.
1152 */
1153boolean_t
1154thread_call_cancel_wait(thread_call_t call)
1155{
1156 if ((call->tc_flags & THREAD_CALL_ALLOC) == 0)
1157 panic("thread_call_cancel_wait: can't wait on thread call whose storage I don't own");
1158
1159 if (!ml_get_interrupts_enabled())
1160 panic("unsafe thread_call_cancel_wait");
1161
1162 if (current_thread()->thc_state.thc_call == call)
1163 panic("thread_call_cancel_wait: deadlock waiting on self from inside call: %p to function %p",
1164 call, call->tc_call.func);
1165
1166 spl_t s = disable_ints_and_lock();
1167
1168 boolean_t canceled = thread_call_cancel_locked(call);
1169
1170 if ((call->tc_flags & THREAD_CALL_ONCE) == THREAD_CALL_ONCE) {
1171 /*
1172 * A cancel-wait on a 'once' call will both cancel
1173 * the pending call and wait for the in-flight call
1174 */
1175
1176 thread_call_wait_once_locked(call, s);
1177 /* thread call lock unlocked */
1178 } else {
1179 /*
1180 * A cancel-wait on a normal call will only wait for the in-flight calls
1181 * if it did not cancel the pending call.
1182 *
1183 * TODO: This seems less than useful - shouldn't it do the wait as well?
1184 */
1185
1186 if (canceled == FALSE) {
1187 thread_call_wait_locked(call, s);
1188 /* thread call lock unlocked */
1189 } else {
1190 enable_ints_and_unlock(s);
1191 }
1192 }
1193
1194 return canceled;
1195}
1196
1197
1198/*
1199 * thread_call_wake:
1200 *
1201 * Wake a call thread to service
1202 * pending call entries. May wake
1203 * the daemon thread in order to
1204 * create additional call threads.
1205 *
1206 * Called with thread_call_lock held.
1207 *
1208 * For high-priority group, only does wakeup/creation if there are no threads
1209 * running.
1210 */
1211static __inline__ void
1212thread_call_wake(
1213 thread_call_group_t group)
1214{
1215 /*
1216 * New behavior: use threads if you've got 'em.
1217 * Traditional behavior: wake only if no threads running.
1218 */
1219 if (group_isparallel(group) || group->active_count == 0) {
1220 if (waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
1221 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES) == KERN_SUCCESS) {
1222 group->idle_count--; group->active_count++;
1223
1224 if (group->idle_count == 0 && (group->flags & TCG_DEALLOC_ACTIVE) == TCG_DEALLOC_ACTIVE) {
1225 if (timer_call_cancel(&group->dealloc_timer) == TRUE) {
1226 group->flags &= ~TCG_DEALLOC_ACTIVE;
1227 }
1228 }
1229 } else {
1230 if (!thread_call_daemon_awake && thread_call_group_should_add_thread(group)) {
1231 thread_call_daemon_awake = TRUE;
1232 waitq_wakeup64_one(&daemon_waitq, NO_EVENT64,
1233 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1234 }
1235 }
1236 }
1237}
1238
1239/*
1240 * sched_call_thread:
1241 *
1242 * Call out invoked by the scheduler.
1243 */
1244static void
1245sched_call_thread(
1246 int type,
1247 thread_t thread)
1248{
1249 thread_call_group_t group;
1250
1251 group = thread->thc_state.thc_group;
1252 assert((group - &thread_call_groups[0]) < THREAD_CALL_INDEX_MAX);
1253
1254 thread_call_lock_spin();
1255
1256 switch (type) {
1257
1258 case SCHED_CALL_BLOCK:
1259 assert(group->active_count);
1260 --group->active_count;
1261 group->blocked_count++;
1262 if (group->pending_count > 0)
1263 thread_call_wake(group);
1264 break;
1265
1266 case SCHED_CALL_UNBLOCK:
1267 assert(group->blocked_count);
1268 --group->blocked_count;
1269 group->active_count++;
1270 break;
1271 }
1272
1273 thread_call_unlock();
1274}
1275
1276/*
1277 * Interrupts disabled, lock held; returns the same way.
1278 * Only called on thread calls whose storage we own. Wakes up
1279 * anyone who might be waiting on this work item and frees it
1280 * if the client has so requested.
1281 */
1282static boolean_t
1283thread_call_finish(thread_call_t call, thread_call_group_t group, spl_t *s)
1284{
1285 uint64_t time;
1286 uint32_t flags;
1287 boolean_t signal;
1288 boolean_t repend = FALSE;
1289
1290 call->tc_finish_count++;
1291 flags = call->tc_flags;
1292 signal = ((THREAD_CALL_SIGNAL & flags) != 0);
1293
1294 if (!signal) {
1295 /* The thread call thread owns a ref until the call is finished */
1296 if (call->tc_refs <= 0)
1297 panic("thread_call_finish: detected over-released thread call: %p", call);
1298 call->tc_refs--;
1299 }
1300
1301 call->tc_flags &= ~(THREAD_CALL_RESCHEDULE | THREAD_CALL_RUNNING | THREAD_CALL_WAIT);
1302
1303 if ((call->tc_refs != 0) && ((flags & THREAD_CALL_RESCHEDULE) != 0)) {
1304 assert(flags & THREAD_CALL_ONCE);
1305 thread_call_flavor_t flavor = thread_call_get_flavor(call);
1306
1307 if (THREAD_CALL_DELAYED & flags) {
1308 time = mach_absolute_time();
1309 if (flavor == TCF_CONTINUOUS) {
1310 time = absolutetime_to_continuoustime(time);
1311 }
1312 if (call->tc_soft_deadline <= time) {
1313 call->tc_flags &= ~(THREAD_CALL_DELAYED | TIMER_CALL_RATELIMITED);
1314 call->tc_deadline = 0;
1315 }
1316 }
1317 if (call->tc_deadline) {
1318 _delayed_call_enqueue(call, group, call->tc_deadline, flavor);
1319 if (!signal) {
1320 _arm_delayed_call_timer(call, group, flavor);
1321 }
1322 } else if (signal) {
1323 call->tc_submit_count++;
1324 repend = TRUE;
1325 } else {
1326 _pending_call_enqueue(call, group);
1327 }
1328 }
1329
1330 if (!signal && (call->tc_refs == 0)) {
1331 if ((flags & THREAD_CALL_WAIT) != 0) {
1332 panic("Someone waiting on a thread call that is scheduled for free: %p\n", call->tc_call.func);
1333 }
1334
1335 assert(call->tc_finish_count == call->tc_submit_count);
1336
1337 enable_ints_and_unlock(*s);
1338
1339 zfree(thread_call_zone, call);
1340
1341 *s = disable_ints_and_lock();
1342 }
1343
1344 if ((flags & THREAD_CALL_WAIT) != 0) {
1345 /*
1346 * Dropping lock here because the sched call for the
1347 * high-pri group can take the big lock from under
1348 * a thread lock.
1349 */
1350 thread_call_unlock();
1351 thread_wakeup((event_t)call);
1352 thread_call_lock_spin();
1353 /* THREAD_CALL_SIGNAL call may have been freed */
1354 }
1355
1356 return (repend);
1357}
1358
1359/*
1360 * thread_call_invoke
1361 *
1362 * Invoke the function provided for this thread call
1363 *
1364 * Note that the thread call object can be deallocated by the function if we do not control its storage.
1365 */
1366static void __attribute__((noinline))
1367thread_call_invoke(thread_call_func_t func, thread_call_param_t param0, thread_call_param_t param1, thread_call_t call)
1368{
1369 current_thread()->thc_state.thc_call = call;
1370
1371#if DEVELOPMENT || DEBUG
1372 KERNEL_DEBUG_CONSTANT(
1373 MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_START,
1374 VM_KERNEL_UNSLIDE(func), VM_KERNEL_ADDRHIDE(param0), VM_KERNEL_ADDRHIDE(param1), 0, 0);
1375#endif /* DEVELOPMENT || DEBUG */
1376
1377#if CONFIG_DTRACE
1378 uint64_t tc_ttd = call->tc_ttd;
1379 boolean_t is_delayed = call->tc_flags & THREAD_CALL_DELAYED;
1380 DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1381 (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1382#endif
1383
1384 (*func)(param0, param1);
1385
1386#if CONFIG_DTRACE
1387 DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, 0, int, (tc_ttd >> 32),
1388 (unsigned) (tc_ttd & 0xFFFFFFFF), is_delayed, call);
1389#endif
1390
1391#if DEVELOPMENT || DEBUG
1392 KERNEL_DEBUG_CONSTANT(
1393 MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_END,
1394 VM_KERNEL_UNSLIDE(func), 0, 0, 0, 0);
1395#endif /* DEVELOPMENT || DEBUG */
1396
1397 current_thread()->thc_state.thc_call = NULL;
1398}
1399
1400/*
1401 * thread_call_thread:
1402 */
1403static void
1404thread_call_thread(
1405 thread_call_group_t group,
1406 wait_result_t wres)
1407{
1408 thread_t self = current_thread();
1409 boolean_t canwait;
1410
1411 if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0)
1412 (void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT);
1413
1414 /*
1415 * A wakeup with THREAD_INTERRUPTED indicates that
1416 * we should terminate.
1417 */
1418 if (wres == THREAD_INTERRUPTED) {
1419 thread_terminate(self);
1420
1421 /* NOTREACHED */
1422 panic("thread_terminate() returned?");
1423 }
1424
1425 spl_t s = disable_ints_and_lock();
1426
1427 self->thc_state.thc_group = group;
1428 thread_sched_call(self, sched_call_thread);
1429
1430 while (group->pending_count > 0) {
1431 thread_call_t call;
1432 thread_call_func_t func;
1433 thread_call_param_t param0, param1;
1434
1435 call = qe_dequeue_head(&group->pending_queue, struct thread_call, tc_call.q_link);
1436 assert(call != NULL);
1437 group->pending_count--;
1438
1439 func = call->tc_call.func;
1440 param0 = call->tc_call.param0;
1441 param1 = call->tc_call.param1;
1442
1443 call->tc_call.queue = NULL;
1444
1445 _internal_call_release(call);
1446
1447 /*
1448 * Can only do wakeups for thread calls whose storage
1449 * we control.
1450 */
1451 if ((call->tc_flags & THREAD_CALL_ALLOC) != 0) {
1452 canwait = TRUE;
1453 call->tc_flags |= THREAD_CALL_RUNNING;
1454 call->tc_refs++; /* Delay free until we're done */
1455 } else
1456 canwait = FALSE;
1457
1458 enable_ints_and_unlock(s);
1459
1460 thread_call_invoke(func, param0, param1, call);
1461
1462 if (get_preemption_level() != 0) {
1463 int pl = get_preemption_level();
1464 panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1465 pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
1466 }
1467
1468 s = disable_ints_and_lock();
1469
1470 if (canwait) {
1471 /* Frees if so desired */
1472 thread_call_finish(call, group, &s);
1473 }
1474 }
1475
1476 thread_sched_call(self, NULL);
1477 group->active_count--;
1478
1479 if (self->callout_woken_from_icontext && !self->callout_woke_thread) {
1480 ledger_credit(self->t_ledger, task_ledgers.interrupt_wakeups, 1);
1481 if (self->callout_woken_from_platform_idle)
1482 ledger_credit(self->t_ledger, task_ledgers.platform_idle_wakeups, 1);
1483 }
1484
1485 self->callout_woken_from_icontext = FALSE;
1486 self->callout_woken_from_platform_idle = FALSE;
1487 self->callout_woke_thread = FALSE;
1488
1489 if (group_isparallel(group)) {
1490 /*
1491 * For new style of thread group, thread always blocks.
1492 * If we have more than the target number of threads,
1493 * and this is the first to block, and it isn't active
1494 * already, set a timer for deallocating a thread if we
1495 * continue to have a surplus.
1496 */
1497 group->idle_count++;
1498
1499 if (group->idle_count == 1) {
1500 group->idle_timestamp = mach_absolute_time();
1501 }
1502
1503 if (((group->flags & TCG_DEALLOC_ACTIVE) == 0) &&
1504 ((group->active_count + group->idle_count) > group->target_thread_count)) {
1505 thread_call_start_deallocate_timer(group);
1506 }
1507
1508 /* Wait for more work (or termination) */
1509 wres = waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_INTERRUPTIBLE, 0);
1510 if (wres != THREAD_WAITING) {
1511 panic("kcall worker unable to assert wait?");
1512 }
1513
1514 enable_ints_and_unlock(s);
1515
1516 thread_block_parameter((thread_continue_t)thread_call_thread, group);
1517 } else {
1518 if (group->idle_count < group->target_thread_count) {
1519 group->idle_count++;
1520
1521 waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_UNINT, 0); /* Interrupted means to exit */
1522
1523 enable_ints_and_unlock(s);
1524
1525 thread_block_parameter((thread_continue_t)thread_call_thread, group);
1526 /* NOTREACHED */
1527 }
1528 }
1529
1530 enable_ints_and_unlock(s);
1531
1532 thread_terminate(self);
1533 /* NOTREACHED */
1534}
1535
1536/*
1537 * thread_call_daemon: walk list of groups, allocating
1538 * threads if appropriate (as determined by
1539 * thread_call_group_should_add_thread()).
1540 */
1541static void
1542thread_call_daemon_continue(__unused void *arg)
1543{
1544 spl_t s = disable_ints_and_lock();
1545
1546 /* Starting at zero happens to be high-priority first. */
1547 for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
1548 thread_call_group_t group = &thread_call_groups[i];
1549 while (thread_call_group_should_add_thread(group)) {
1550 group->active_count++;
1551
1552 enable_ints_and_unlock(s);
1553
1554 kern_return_t kr = thread_call_thread_create(group);
1555 if (kr != KERN_SUCCESS) {
1556 /*
1557 * On failure, just pause for a moment and give up.
1558 * We can try again later.
1559 */
1560 delay(10000); /* 10 ms */
1561 s = disable_ints_and_lock();
1562 goto out;
1563 }
1564
1565 s = disable_ints_and_lock();
1566 }
1567 }
1568
1569out:
1570 thread_call_daemon_awake = FALSE;
1571 waitq_assert_wait64(&daemon_waitq, NO_EVENT64, THREAD_UNINT, 0);
1572
1573 enable_ints_and_unlock(s);
1574
1575 thread_block_parameter((thread_continue_t)thread_call_daemon_continue, NULL);
1576 /* NOTREACHED */
1577}
1578
1579static void
1580thread_call_daemon(
1581 __unused void *arg)
1582{
1583 thread_t self = current_thread();
1584
1585 self->options |= TH_OPT_VMPRIV;
1586 vm_page_free_reserve(2); /* XXX */
1587
1588 thread_set_thread_name(self, "thread_call_daemon");
1589
1590 thread_call_daemon_continue(NULL);
1591 /* NOTREACHED */
1592}
1593
1594/*
1595 * Schedule timer to deallocate a worker thread if we have a surplus
1596 * of threads (in excess of the group's target) and at least one thread
1597 * is idle the whole time.
1598 */
1599static void
1600thread_call_start_deallocate_timer(thread_call_group_t group)
1601{
1602 __assert_only boolean_t already_enqueued;
1603
1604 assert(group->idle_count > 0);
1605 assert((group->flags & TCG_DEALLOC_ACTIVE) == 0);
1606
1607 group->flags |= TCG_DEALLOC_ACTIVE;
1608
1609 uint64_t deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
1610
1611 already_enqueued = timer_call_enter(&group->dealloc_timer, deadline, 0);
1612
1613 assert(already_enqueued == FALSE);
1614}
1615
1616/* non-static so dtrace can find it rdar://problem/31156135&31379348 */
1617void
1618thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1)
1619{
1620 thread_call_group_t group = (thread_call_group_t) p0;
1621 thread_call_flavor_t flavor = (thread_call_flavor_t) p1;
1622
1623 thread_call_t call;
1624 uint64_t now;
1625 boolean_t restart;
1626 boolean_t repend;
1627
1628 thread_call_lock_spin();
1629
1630 if (flavor == TCF_CONTINUOUS)
1631 now = mach_continuous_time();
1632 else if (flavor == TCF_ABSOLUTE)
1633 now = mach_absolute_time();
1634 else
1635 panic("invalid timer flavor: %d", flavor);
1636
1637 do {
1638 restart = FALSE;
1639 qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_call.q_link) {
1640 if (flavor == TCF_CONTINUOUS)
1641 assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == THREAD_CALL_CONTINUOUS);
1642 else
1643 assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == 0);
1644
1645 /*
1646 * if we hit a call that isn't yet ready to expire,
1647 * then we're done for now
1648 * TODO: The next timer in the list could have a larger leeway
1649 * and therefore be ready to expire.
1650 * Sort by deadline then by soft deadline to avoid this
1651 */
1652 if (call->tc_soft_deadline > now)
1653 break;
1654
1655 /*
1656 * If we hit a rate-limited timer, don't eagerly wake it up.
1657 * Wait until it reaches the end of the leeway window.
1658 *
1659 * TODO: What if the next timer is not rate-limited?
1660 * Have a separate rate-limited queue to avoid this
1661 */
1662 if ((call->tc_flags & THREAD_CALL_RATELIMITED) &&
1663 (call->tc_call.deadline > now) &&
1664 (ml_timer_forced_evaluation() == FALSE)) {
1665 break;
1666 }
1667
1668 if (THREAD_CALL_SIGNAL & call->tc_flags) {
1669 __assert_only queue_head_t *old_queue;
1670 old_queue = call_entry_dequeue(&call->tc_call);
1671 assert(old_queue == &group->delayed_queues[flavor]);
1672
1673 do {
1674 thread_call_func_t func = call->tc_call.func;
1675 thread_call_param_t param0 = call->tc_call.param0;
1676 thread_call_param_t param1 = call->tc_call.param1;
1677
1678 call->tc_flags |= THREAD_CALL_RUNNING;
1679 thread_call_unlock();
1680 thread_call_invoke(func, param0, param1, call);
1681 thread_call_lock_spin();
1682
1683 repend = thread_call_finish(call, group, NULL);
1684 } while (repend);
1685
1686 /* call may have been freed */
1687 restart = TRUE;
1688 break;
1689 } else {
1690 _pending_call_enqueue(call, group);
1691 }
1692 }
1693 } while (restart);
1694
1695 _arm_delayed_call_timer(call, group, flavor);
1696
1697 thread_call_unlock();
1698}
1699
1700static void
1701thread_call_delayed_timer_rescan(thread_call_group_t group,
1702 thread_call_flavor_t flavor)
1703{
1704 thread_call_t call;
1705 uint64_t now;
1706
1707 spl_t s = disable_ints_and_lock();
1708
1709 assert(ml_timer_forced_evaluation() == TRUE);
1710
1711 if (flavor == TCF_CONTINUOUS) {
1712 now = mach_continuous_time();
1713 } else {
1714 now = mach_absolute_time();
1715 }
1716
1717 qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_call.q_link) {
1718 if (call->tc_soft_deadline <= now) {
1719 _pending_call_enqueue(call, group);
1720 } else {
1721 uint64_t skew = call->tc_call.deadline - call->tc_soft_deadline;
1722 assert (call->tc_call.deadline >= call->tc_soft_deadline);
1723 /*
1724 * On a latency quality-of-service level change,
1725 * re-sort potentially rate-limited callout. The platform
1726 * layer determines which timers require this.
1727 */
1728 if (timer_resort_threshold(skew)) {
1729 _call_dequeue(call, group);
1730 _delayed_call_enqueue(call, group, call->tc_soft_deadline, flavor);
1731 }
1732 }
1733 }
1734
1735 _arm_delayed_call_timer(NULL, group, flavor);
1736
1737 enable_ints_and_unlock(s);
1738}
1739
1740void
1741thread_call_delayed_timer_rescan_all(void) {
1742 for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
1743 thread_call_delayed_timer_rescan(&thread_call_groups[i], TCF_ABSOLUTE);
1744 thread_call_delayed_timer_rescan(&thread_call_groups[i], TCF_CONTINUOUS);
1745 }
1746}
1747
1748/*
1749 * Timer callback to tell a thread to terminate if
1750 * we have an excess of threads and at least one has been
1751 * idle for a long time.
1752 */
1753static void
1754thread_call_dealloc_timer(
1755 timer_call_param_t p0,
1756 __unused timer_call_param_t p1)
1757{
1758 thread_call_group_t group = (thread_call_group_t)p0;
1759 uint64_t now;
1760 kern_return_t res;
1761 boolean_t terminated = FALSE;
1762
1763 thread_call_lock_spin();
1764
1765 assert((group->flags & TCG_DEALLOC_ACTIVE) == TCG_DEALLOC_ACTIVE);
1766
1767 now = mach_absolute_time();
1768
1769 if (group->idle_count > 0) {
1770 if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
1771 terminated = TRUE;
1772 group->idle_count--;
1773 res = waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
1774 THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES);
1775 if (res != KERN_SUCCESS) {
1776 panic("Unable to wake up idle thread for termination?");
1777 }
1778 }
1779 }
1780
1781 group->flags &= ~TCG_DEALLOC_ACTIVE;
1782
1783 /*
1784 * If we still have an excess of threads, schedule another
1785 * invocation of this function.
1786 */
1787 if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
1788 /*
1789 * If we killed someone just now, push out the
1790 * next deadline.
1791 */
1792 if (terminated) {
1793 group->idle_timestamp = now;
1794 }
1795
1796 thread_call_start_deallocate_timer(group);
1797 }
1798
1799 thread_call_unlock();
1800}
1801
1802/*
1803 * Wait for the invocation of the thread call to complete
1804 * We know there's only one in flight because of the 'once' flag.
1805 *
1806 * If a subsequent invocation comes in before we wake up, that's OK
1807 *
1808 * TODO: Here is where we will add priority inheritance to the thread executing
1809 * the thread call in case it's lower priority than the current thread
1810 * <rdar://problem/30321792> Priority inheritance for thread_call_wait_once
1811 *
1812 * Takes the thread call lock locked, returns unlocked
1813 * This lets us avoid a spurious take/drop after waking up from thread_block
1814 */
1815static boolean_t
1816thread_call_wait_once_locked(thread_call_t call, spl_t s)
1817{
1818 assert(call->tc_flags & THREAD_CALL_ALLOC);
1819 assert(call->tc_flags & THREAD_CALL_ONCE);
1820
1821 if ((call->tc_flags & THREAD_CALL_RUNNING) == 0) {
1822 enable_ints_and_unlock(s);
1823 return FALSE;
1824 }
1825
1826 /* call is running, so we have to wait for it */
1827 call->tc_flags |= THREAD_CALL_WAIT;
1828
1829 wait_result_t res = assert_wait(call, THREAD_UNINT);
1830 if (res != THREAD_WAITING)
1831 panic("Unable to assert wait: %d", res);
1832
1833 enable_ints_and_unlock(s);
1834
1835 res = thread_block(THREAD_CONTINUE_NULL);
1836 if (res != THREAD_AWAKENED)
1837 panic("Awoken with %d?", res);
1838
1839 /* returns unlocked */
1840 return TRUE;
1841}
1842
1843/*
1844 * Wait for an in-flight invocation to complete
1845 * Does NOT try to cancel, so the client doesn't need to hold their
1846 * lock while calling this function.
1847 *
1848 * Returns whether or not it had to wait.
1849 *
1850 * Only works for THREAD_CALL_ONCE calls.
1851 */
1852boolean_t
1853thread_call_wait_once(thread_call_t call)
1854{
1855 if ((call->tc_flags & THREAD_CALL_ALLOC) == 0)
1856 panic("thread_call_wait_once: can't wait on thread call whose storage I don't own");
1857
1858 if ((call->tc_flags & THREAD_CALL_ONCE) == 0)
1859 panic("thread_call_wait_once: can't wait_once on a non-once call");
1860
1861 if (!ml_get_interrupts_enabled())
1862 panic("unsafe thread_call_wait_once");
1863
1864 if (current_thread()->thc_state.thc_call == call)
1865 panic("thread_call_wait_once: deadlock waiting on self from inside call: %p to function %p",
1866 call, call->tc_call.func);
1867
1868 spl_t s = disable_ints_and_lock();
1869
1870 boolean_t waited = thread_call_wait_once_locked(call, s);
1871 /* thread call lock unlocked */
1872
1873 return waited;
1874}
1875
1876
1877/*
1878 * Wait for all requested invocations of a thread call prior to now
1879 * to finish. Can only be invoked on thread calls whose storage we manage.
1880 * Just waits for the finish count to catch up to the submit count we find
1881 * at the beginning of our wait.
1882 *
1883 * Called with thread_call_lock held. Returns with lock released.
1884 */
1885static void
1886thread_call_wait_locked(thread_call_t call, spl_t s)
1887{
1888 uint64_t submit_count;
1889 wait_result_t res;
1890
1891 assert(call->tc_flags & THREAD_CALL_ALLOC);
1892
1893 submit_count = call->tc_submit_count;
1894
1895 while (call->tc_finish_count < submit_count) {
1896 call->tc_flags |= THREAD_CALL_WAIT;
1897
1898 res = assert_wait(call, THREAD_UNINT);
1899 if (res != THREAD_WAITING)
1900 panic("Unable to assert wait: %d", res);
1901
1902 enable_ints_and_unlock(s);
1903
1904 res = thread_block(THREAD_CONTINUE_NULL);
1905 if (res != THREAD_AWAKENED)
1906 panic("Awoken with %d?", res);
1907
1908 s = disable_ints_and_lock();
1909 }
1910
1911 enable_ints_and_unlock(s);
1912}
1913
1914/*
1915 * Determine whether a thread call is either on a queue or
1916 * currently being executed.
1917 */
1918boolean_t
1919thread_call_isactive(thread_call_t call)
1920{
1921 boolean_t active;
1922
1923 spl_t s = disable_ints_and_lock();
1924 active = (call->tc_submit_count > call->tc_finish_count);
1925 enable_ints_and_unlock(s);
1926
1927 return active;
1928}
1929
1930/*
1931 * adjust_cont_time_thread_calls
1932 * on wake, reenqueue delayed call timer for continuous time thread call groups
1933 */
1934void
1935adjust_cont_time_thread_calls(void)
1936{
1937 spl_t s = disable_ints_and_lock();
1938
1939 for (int i = 0; i < THREAD_CALL_INDEX_MAX; i++) {
1940 thread_call_group_t group = &thread_call_groups[i];
1941
1942 /* only the continuous timers need to be re-armed */
1943
1944 _arm_delayed_call_timer(NULL, group, TCF_CONTINUOUS);
1945 }
1946
1947 enable_ints_and_unlock(s);
1948}
1949
1950