thread_call.c source code [xnu/osfmk/kern/thread_call.c]

1	/*
2	* Copyright (c) 1993-1995, 1999-2008 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	#include <mach/mach_types.h>
30	#include <mach/thread_act.h>
31
32	#include <kern/kern_types.h>
33	#include <kern/zalloc.h>
34	#include <kern/sched_prim.h>
35	#include <kern/clock.h>
36	#include <kern/task.h>
37	#include <kern/thread.h>
38	#include <kern/waitq.h>
39	#include <kern/ledger.h>
40	#include <kern/policy_internal.h>
41
42	#include <vm/vm_pageout.h>
43
44	#include <kern/thread_call.h>
45	#include <kern/call_entry.h>
46	#include <kern/timer_call.h>
47
48	#include <libkern/OSAtomic.h>
49	#include <kern/timer_queue.h>
50
51	#include <sys/kdebug.h>
52	#if CONFIG_DTRACE
53	#include <mach/sdt.h>
54	#endif
55	#include <machine/machine_routines.h>
56
57	static zone_t thread_call_zone;
58	static struct waitq daemon_waitq;
59
60	typedef enum {
61	TCF_ABSOLUTE = `0`,
62	TCF_CONTINUOUS = `1`,
63	TCF_COUNT = `2`,
64	} thread_call_flavor_t;
65
66	typedef enum {
67	TCG_NONE = `0x0`,
68	TCG_PARALLEL = `0x1`,
69	TCG_DEALLOC_ACTIVE = `0x2`,
70	} thread_call_group_flags_t;
71
72	static struct thread_call_group {
73	const char * tcg_name;
74
75	queue_head_t pending_queue;
76	uint32_t pending_count;
77
78	queue_head_t delayed_queues[TCF_COUNT];
79	timer_call_data_t delayed_timers[TCF_COUNT];
80
81	timer_call_data_t dealloc_timer;
82
83	struct waitq idle_waitq;
84	uint32_t idle_count, active_count, blocked_count;
85
86	uint32_t tcg_thread_pri;
87	uint32_t target_thread_count;
88	uint64_t idle_timestamp;
89
90	thread_call_group_flags_t flags;
91
92	} thread_call_groups[THREAD_CALL_INDEX_MAX] = {
93	[THREAD_CALL_INDEX_HIGH] = {
94	.tcg_name = "high",
95	.tcg_thread_pri = BASEPRI_PREEMPT_HIGH,
96	.target_thread_count = `4`,
97	.flags = TCG_NONE,
98	},
99	[THREAD_CALL_INDEX_KERNEL] = {
100	.tcg_name = "kernel",
101	.tcg_thread_pri = BASEPRI_KERNEL,
102	.target_thread_count = `1`,
103	.flags = TCG_PARALLEL,
104	},
105	[THREAD_CALL_INDEX_USER] = {
106	.tcg_name = "user",
107	.tcg_thread_pri = BASEPRI_DEFAULT,
108	.target_thread_count = `1`,
109	.flags = TCG_PARALLEL,
110	},
111	[THREAD_CALL_INDEX_LOW] = {
112	.tcg_name = "low",
113	.tcg_thread_pri = MAXPRI_THROTTLE,
114	.target_thread_count = `1`,
115	.flags = TCG_PARALLEL,
116	},
117	[THREAD_CALL_INDEX_KERNEL_HIGH] = {
118	.tcg_name = "kernel-high",
119	.tcg_thread_pri = BASEPRI_PREEMPT,
120	.target_thread_count = `2`,
121	.flags = TCG_NONE,
122	},
123	[THREAD_CALL_INDEX_QOS_UI] = {
124	.tcg_name = "qos-ui",
125	.tcg_thread_pri = BASEPRI_FOREGROUND,
126	.target_thread_count = `1`,
127	.flags = TCG_NONE,
128	},
129	[THREAD_CALL_INDEX_QOS_IN] = {
130	.tcg_name = "qos-in",
131	.tcg_thread_pri = BASEPRI_USER_INITIATED,
132	.target_thread_count = `1`,
133	.flags = TCG_NONE,
134	},
135	[THREAD_CALL_INDEX_QOS_UT] = {
136	.tcg_name = "qos-ut",
137	.tcg_thread_pri = BASEPRI_UTILITY,
138	.target_thread_count = `1`,
139	.flags = TCG_NONE,
140	},
141	};
142
143	typedef struct thread_call_group *thread_call_group_t;
144
145	#define INTERNAL_CALL_COUNT 768
146	#define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * NSEC_PER_MSEC) /* 5 ms */
147	#define THREAD_CALL_ADD_RATIO 4
148	#define THREAD_CALL_MACH_FACTOR_CAP 3
149	#define THREAD_CALL_GROUP_MAX_THREADS 500
150
151	static boolean_t thread_call_daemon_awake;
152	static thread_call_data_t internal_call_storage[INTERNAL_CALL_COUNT];
153	static queue_head_t thread_call_internal_queue;
154	int thread_call_internal_queue_count = `0`;
155	static uint64_t thread_call_dealloc_interval_abs;
156
157	static __inline__ thread_call_t _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0);
158	static __inline__ void _internal_call_release(thread_call_t call);
159	static __inline__ boolean_t _pending_call_enqueue(thread_call_t call, thread_call_group_t group);
160	static boolean_t _delayed_call_enqueue(thread_call_t call, thread_call_group_t group,
161	uint64_t deadline, thread_call_flavor_t flavor);
162	static __inline__ boolean_t _call_dequeue(thread_call_t call, thread_call_group_t group);
163	static __inline__ void thread_call_wake(thread_call_group_t group);
164	static void thread_call_daemon(void *arg);
165	static void thread_call_thread(thread_call_group_t group, wait_result_t wres);
166	static void thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
167	static void thread_call_group_setup(thread_call_group_t group);
168	static void sched_call_thread(int type, thread_t thread);
169	static void thread_call_start_deallocate_timer(thread_call_group_t group);
170	static void thread_call_wait_locked(thread_call_t call, spl_t s);
171	static boolean_t thread_call_wait_once_locked(thread_call_t call, spl_t s);
172
173	static boolean_t thread_call_enter_delayed_internal(thread_call_t call,
174	thread_call_func_t alt_func, thread_call_param_t alt_param0,
175	thread_call_param_t param1, uint64_t deadline,
176	uint64_t leeway, unsigned int flags);
177
178	/ non-static so dtrace can find it rdar://problem/31156135&31379348 /
179	extern void thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
180
181	lck_grp_t thread_call_lck_grp;
182	lck_mtx_t thread_call_lock_data;
183
184	#define thread_call_lock_spin() \
185	lck_mtx_lock_spin_always(&thread_call_lock_data)
186
187	#define thread_call_unlock() \
188	lck_mtx_unlock_always(&thread_call_lock_data)
189
190	#define tc_deadline tc_call.deadline
191
192	extern boolean_t mach_timer_coalescing_enabled;
193
194	static inline spl_t
195	disable_ints_and_lock(void)
196	{
197	spl_t s = splsched();
198	thread_call_lock_spin();
199
200	return s;
201	}
202
203	static inline void
204	enable_ints_and_unlock(spl_t s)
205	{
206	thread_call_unlock();
207	splx(s);
208	}
209
210	static inline boolean_t
211	group_isparallel(thread_call_group_t group)
212	{
213	return ((group->flags & TCG_PARALLEL) != `0`);
214	}
215
216	static boolean_t
217	thread_call_group_should_add_thread(thread_call_group_t group)
218	{
219	if ((group->active_count + group->blocked_count + group->idle_count) >= THREAD_CALL_GROUP_MAX_THREADS) {
220	panic("thread_call group '%s' reached max thread cap (%d): active: %d, blocked: %d, idle: %d",
221	group->tcg_name, THREAD_CALL_GROUP_MAX_THREADS,
222	group->active_count, group->blocked_count, group->idle_count);
223	}
224
225	if (group_isparallel(group) == FALSE) {
226	if (group->pending_count > `0` && group->active_count == `0`) {
227	return TRUE;
228	}
229
230	return FALSE;
231	}
232
233	if (group->pending_count > `0`) {
234	if (group->idle_count > `0`) {
235	return FALSE;
236	}
237
238	uint32_t thread_count = group->active_count;
239
240	/*
241	* Add a thread if either there are no threads,
242	* the group has fewer than its target number of
243	* threads, or the amount of work is large relative
244	* to the number of threads. In the last case, pay attention
245	* to the total load on the system, and back off if
246	* it's high.
247	*/
248	if ((thread_count == `0`) \|\|
249	(thread_count < group->target_thread_count) \|\|
250	((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) &&
251	(sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
252	return TRUE;
253	}
254	}
255
256	return FALSE;
257	}
258
259	/ Lock held /
260	static inline thread_call_group_t
261	thread_call_get_group(thread_call_t call)
262	{
263	thread_call_index_t index = call->tc_index;
264
265	assert(index >= `0` && index < THREAD_CALL_INDEX_MAX);
266
267	return &thread_call_groups[index];
268	}
269
270	/ Lock held /
271	static inline thread_call_flavor_t
272	thread_call_get_flavor(thread_call_t call)
273	{
274	return (call->tc_flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
275	}
276
277	static void
278	thread_call_group_setup(thread_call_group_t group)
279	{
280	queue_init(&group->pending_queue);
281	queue_init(&group->delayed_queues[TCF_ABSOLUTE]);
282	queue_init(&group->delayed_queues[TCF_CONTINUOUS]);
283
284	/ TODO: Consolidate to one hard timer for each group /
285	timer_call_setup(&group->delayed_timers[TCF_ABSOLUTE], thread_call_delayed_timer, group);
286	timer_call_setup(&group->delayed_timers[TCF_CONTINUOUS], thread_call_delayed_timer, group);
287	timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
288
289	/ Reverse the wait order so we re-use the most recently parked thread from the pool /
290	waitq_init(&group->idle_waitq, SYNC_POLICY_REVERSED\|SYNC_POLICY_DISABLE_IRQ);
291	}
292
293	/*
294	* Simple wrapper for creating threads bound to
295	* thread call groups.
296	*/
297	static kern_return_t
298	thread_call_thread_create(
299	thread_call_group_t group)
300	{
301	thread_t thread;
302	kern_return_t result;
303
304	int thread_pri = group->tcg_thread_pri;
305
306	result = kernel_thread_start_priority((thread_continue_t)thread_call_thread,
307	group, thread_pri, &thread);
308	if (result != KERN_SUCCESS) {
309	return result;
310	}
311
312	if (thread_pri <= BASEPRI_KERNEL) {
313	/*
314	* THREAD_CALL_PRIORITY_KERNEL and lower don't get to run to completion
315	* in kernel if there are higher priority threads available.
316	*/
317	thread_set_eager_preempt(thread);
318	}
319
320	char name[MAXTHREADNAMESIZE] = "";
321
322	int group_thread_count = group->idle_count + group->active_count + group->blocked_count;
323
324	snprintf(name, sizeof(name), "thread call %s #%d", group->tcg_name, group_thread_count);
325	thread_set_thread_name(thread, name);
326
327	thread_deallocate(thread);
328	return KERN_SUCCESS;
329	}
330
331	/*
332	* thread_call_initialize:
333	*
334	* Initialize this module, called
335	* early during system initialization.
336	*/
337	void
338	thread_call_initialize(void)
339	{
340	int tc_size = sizeof (thread_call_data_t);
341	thread_call_zone = zinit(tc_size, `4096` * tc_size, `16` * tc_size, "thread_call");
342	zone_change(thread_call_zone, Z_CALLERACCT, FALSE);
343	zone_change(thread_call_zone, Z_NOENCRYPT, TRUE);
344
345	lck_grp_init(&thread_call_lck_grp, "thread_call", LCK_GRP_ATTR_NULL);
346	lck_mtx_init(&thread_call_lock_data, &thread_call_lck_grp, LCK_ATTR_NULL);
347
348	nanotime_to_absolutetime(`0`, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
349	waitq_init(&daemon_waitq, SYNC_POLICY_DISABLE_IRQ \| SYNC_POLICY_FIFO);
350
351	for (uint32_t i = `0`; i < THREAD_CALL_INDEX_MAX; i++)
352	thread_call_group_setup(&thread_call_groups[i]);
353
354	spl_t s = disable_ints_and_lock();
355
356	queue_init(&thread_call_internal_queue);
357	for (
358	thread_call_t call = internal_call_storage;
359	call < &internal_call_storage[INTERNAL_CALL_COUNT];
360	call++) {
361
362	enqueue_tail(&thread_call_internal_queue, &call->tc_call.q_link);
363	thread_call_internal_queue_count++;
364	}
365
366	thread_call_daemon_awake = TRUE;
367
368	enable_ints_and_unlock(s);
369
370	thread_t thread;
371	kern_return_t result;
372
373	result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon,
374	NULL, BASEPRI_PREEMPT_HIGH + `1`, &thread);
375	if (result != KERN_SUCCESS)
376	panic("thread_call_initialize");
377
378	thread_deallocate(thread);
379	}
380
381	void
382	thread_call_setup(
383	thread_call_t call,
384	thread_call_func_t func,
385	thread_call_param_t param0)
386	{
387	bzero(call, sizeof(*call));
388	call_entry_setup((call_entry_t)call, func, param0);
389
390	/ Thread calls default to the HIGH group unless otherwise specified /
391	call->tc_index = THREAD_CALL_INDEX_HIGH;
392
393	/ THREAD_CALL_ALLOC not set, memory owned by caller /
394	}
395
396	/*
397	* _internal_call_allocate:
398	*
399	* Allocate an internal callout entry.
400	*
401	* Called with thread_call_lock held.
402	*/
403	static __inline__ thread_call_t
404	_internal_call_allocate(thread_call_func_t func, thread_call_param_t param0)
405	{
406	thread_call_t call;
407
408	if (queue_empty(&thread_call_internal_queue))
409	panic("_internal_call_allocate");
410
411	call = qe_dequeue_head(&thread_call_internal_queue, struct thread_call, tc_call.q_link);
412
413	thread_call_internal_queue_count--;
414
415	thread_call_setup(call, func, param0);
416	call->tc_refs = `0`;
417	call->tc_flags = `0`; / THREAD_CALL_ALLOC not set, do not free back to zone /
418
419	return (call);
420	}
421
422	/*
423	* _internal_call_release:
424	*
425	* Release an internal callout entry which
426	* is no longer pending (or delayed). This is
427	* safe to call on a non-internal entry, in which
428	* case nothing happens.
429	*
430	* Called with thread_call_lock held.
431	*/
432	static __inline__ void
433	_internal_call_release(thread_call_t call)
434	{
435	if (call >= internal_call_storage &&
436	call < &internal_call_storage[INTERNAL_CALL_COUNT]) {
437	assert((call->tc_flags & THREAD_CALL_ALLOC) == `0`);
438	enqueue_head(&thread_call_internal_queue, &call->tc_call.q_link);
439	thread_call_internal_queue_count++;
440	}
441	}
442
443	/*
444	* _pending_call_enqueue:
445	*
446	* Place an entry at the end of the
447	* pending queue, to be executed soon.
448	*
449	* Returns TRUE if the entry was already
450	* on a queue.
451	*
452	* Called with thread_call_lock held.
453	*/
454	static __inline__ boolean_t
455	_pending_call_enqueue(thread_call_t call,
456	thread_call_group_t group)
457	{
458	if ((THREAD_CALL_ONCE \| THREAD_CALL_RUNNING)
459	== (call->tc_flags & (THREAD_CALL_ONCE \| THREAD_CALL_RUNNING))) {
460	call->tc_deadline = `0`;
461
462	uint32_t flags = call->tc_flags;
463	call->tc_flags \|= THREAD_CALL_RESCHEDULE;
464
465	if ((flags & THREAD_CALL_RESCHEDULE) != `0`)
466	return (TRUE);
467	else
468	return (FALSE);
469	}
470
471	queue_head_t *old_queue = call_entry_enqueue_tail(CE(call), &group->pending_queue);
472
473	if (old_queue == NULL) {
474	call->tc_submit_count++;
475	} else if (old_queue != &group->pending_queue &&
476	old_queue != &group->delayed_queues[TCF_ABSOLUTE] &&
477	old_queue != &group->delayed_queues[TCF_CONTINUOUS]) {
478	panic("tried to move a thread call (%p) between groups (old_queue: %p)", call, old_queue);
479	}
480
481	group->pending_count++;
482
483	thread_call_wake(group);
484
485	return (old_queue != NULL);
486	}
487
488	/*
489	* _delayed_call_enqueue:
490	*
491	* Place an entry on the delayed queue,
492	* after existing entries with an earlier
493	* (or identical) deadline.
494	*
495	* Returns TRUE if the entry was already
496	* on a queue.
497	*
498	* Called with thread_call_lock held.
499	*/
500	static boolean_t
501	_delayed_call_enqueue(
502	thread_call_t call,
503	thread_call_group_t group,
504	uint64_t deadline,
505	thread_call_flavor_t flavor)
506	{
507	if ((THREAD_CALL_ONCE \| THREAD_CALL_RUNNING)
508	== (call->tc_flags & (THREAD_CALL_ONCE \| THREAD_CALL_RUNNING))) {
509	call->tc_deadline = deadline;
510
511	uint32_t flags = call->tc_flags;
512	call->tc_flags \|= THREAD_CALL_RESCHEDULE;
513
514	if ((flags & THREAD_CALL_RESCHEDULE) != `0`)
515	return (TRUE);
516	else
517	return (FALSE);
518	}
519
520	queue_head_t *old_queue = call_entry_enqueue_deadline(CE(call),
521	&group->delayed_queues[flavor],
522	deadline);
523
524	if (old_queue == &group->pending_queue) {
525	group->pending_count--;
526	} else if (old_queue == NULL) {
527	call->tc_submit_count++;
528	} else if (old_queue == &group->delayed_queues[TCF_ABSOLUTE] \|\|
529	old_queue == &group->delayed_queues[TCF_CONTINUOUS]) {
530	/ TODO: if it's in the other delayed queue, that might not be OK /
531	// we did nothing, and that's fine
532	} else {
533	panic("tried to move a thread call (%p) between groups (old_queue: %p)", call, old_queue);
534	}
535
536	return (old_queue != NULL);
537	}
538
539	/*
540	* _call_dequeue:
541	*
542	* Remove an entry from a queue.
543	*
544	* Returns TRUE if the entry was on a queue.
545	*
546	* Called with thread_call_lock held.
547	*/
548	static __inline__ boolean_t
549	_call_dequeue(
550	thread_call_t call,
551	thread_call_group_t group)
552	{
553	queue_head_t *old_queue;
554
555	old_queue = call_entry_dequeue(CE(call));
556
557	if (old_queue != NULL) {
558	assert(old_queue == &group->pending_queue \|\|
559	old_queue == &group->delayed_queues[TCF_ABSOLUTE] \|\|
560	old_queue == &group->delayed_queues[TCF_CONTINUOUS]);
561
562	call->tc_finish_count++;
563	if (old_queue == &group->pending_queue)
564	group->pending_count--;
565	}
566
567	return (old_queue != NULL);
568	}
569
570	/*
571	* _arm_delayed_call_timer:
572	*
573	* Check if the timer needs to be armed for this flavor,
574	* and if so, arm it.
575	*
576	* If call is non-NULL, only re-arm the timer if the specified call
577	* is the first in the queue.
578	*
579	* Returns true if the timer was armed/re-armed, false if it was left unset
580	* Caller should cancel the timer if need be.
581	*
582	* Called with thread_call_lock held.
583	*/
584	static bool
585	_arm_delayed_call_timer(thread_call_t new_call,
586	thread_call_group_t group,
587	thread_call_flavor_t flavor)
588	{
589	/ No calls implies no timer needed /
590	if (queue_empty(&group->delayed_queues[flavor]))
591	return false;
592
593	thread_call_t call = qe_queue_first(&group->delayed_queues[flavor], struct thread_call, tc_call.q_link);
594
595	/ We only need to change the hard timer if this new call is the first in the list /
596	if (new_call != NULL && new_call != call)
597	return false;
598
599	assert((call->tc_soft_deadline != `0`) && ((call->tc_soft_deadline <= call->tc_call.deadline)));
600
601	uint64_t fire_at = call->tc_soft_deadline;
602
603	if (flavor == TCF_CONTINUOUS) {
604	assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == THREAD_CALL_CONTINUOUS);
605	fire_at = continuoustime_to_absolutetime(fire_at);
606	} else {
607	assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == `0`);
608	}
609
610	/*
611	* Note: This picks the soonest-deadline call's leeway as the hard timer's leeway,
612	* which does not take into account later-deadline timers with a larger leeway.
613	* This is a valid coalescing behavior, but masks a possible window to
614	* fire a timer instead of going idle.
615	*/
616	uint64_t leeway = call->tc_call.deadline - call->tc_soft_deadline;
617
618	timer_call_enter_with_leeway(&group->delayed_timers[flavor], (timer_call_param_t)flavor,
619	fire_at, leeway,
620	TIMER_CALL_SYS_CRITICAL\|TIMER_CALL_LEEWAY,
621	((call->tc_flags & THREAD_CALL_RATELIMITED) == THREAD_CALL_RATELIMITED));
622
623	return true;
624	}
625
626	/*
627	* _cancel_func_from_queue:
628	*
629	* Remove the first (or all) matching
630	* entries from the specified queue.
631	*
632	* Returns TRUE if any matching entries
633	* were found.
634	*
635	* Called with thread_call_lock held.
636	*/
637	static boolean_t
638	_cancel_func_from_queue(thread_call_func_t func,
639	thread_call_param_t param0,
640	thread_call_group_t group,
641	boolean_t remove_all,
642	queue_head_t *queue)
643	{
644	boolean_t call_removed = FALSE;
645	thread_call_t call;
646
647	qe_foreach_element_safe(call, queue, tc_call.q_link) {
648	if (call->tc_call.func != func \|\|
649	call->tc_call.param0 != param0) {
650	continue;
651	}
652
653	_call_dequeue(call, group);
654
655	_internal_call_release(call);
656
657	call_removed = TRUE;
658	if (!remove_all)
659	break;
660	}
661
662	return (call_removed);
663	}
664
665	/*
666	* thread_call_func_delayed:
667	*
668	* Enqueue a function callout to
669	* occur at the stated time.
670	*/
671	void
672	thread_call_func_delayed(
673	thread_call_func_t func,
674	thread_call_param_t param,
675	uint64_t deadline)
676	{
677	(void)thread_call_enter_delayed_internal(NULL, func, param, `0`, deadline, `0`, `0`);
678	}
679
680	/*
681	* thread_call_func_delayed_with_leeway:
682	*
683	* Same as thread_call_func_delayed(), but with
684	* leeway/flags threaded through.
685	*/
686
687	void
688	thread_call_func_delayed_with_leeway(
689	thread_call_func_t func,
690	thread_call_param_t param,
691	uint64_t deadline,
692	uint64_t leeway,
693	uint32_t flags)
694	{
695	(void)thread_call_enter_delayed_internal(NULL, func, param, `0`, deadline, leeway, flags);
696	}
697
698	/*
699	* thread_call_func_cancel:
700	*
701	* Dequeue a function callout.
702	*
703	* Removes one (or all) { function, argument }
704	* instance(s) from either (or both)
705	* the pending and the delayed queue,
706	* in that order.
707	*
708	* Returns TRUE if any calls were cancelled.
709	*
710	* This iterates all of the pending or delayed thread calls in the group,
711	* which is really inefficient. Switch to an allocated thread call instead.
712	*/
713	boolean_t
714	thread_call_func_cancel(
715	thread_call_func_t func,
716	thread_call_param_t param,
717	boolean_t cancel_all)
718	{
719	boolean_t result;
720
721	assert(func != NULL);
722
723	spl_t s = disable_ints_and_lock();
724
725	/ Function-only thread calls are only kept in the default HIGH group /
726	thread_call_group_t group = &thread_call_groups[THREAD_CALL_INDEX_HIGH];
727
728	if (cancel_all) {
729	/ exhaustively search every queue, and return true if any search found something /
730	result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) \|
731	_cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) \|
732	_cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
733	} else {
734	/ early-exit as soon as we find something, don't search other queues /
735	result = _cancel_func_from_queue(func, param, group, cancel_all, &group->pending_queue) \|\|
736	_cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_ABSOLUTE]) \|\|
737	_cancel_func_from_queue(func, param, group, cancel_all, &group->delayed_queues[TCF_CONTINUOUS]);
738	}
739
740	enable_ints_and_unlock(s);
741
742	return (result);
743	}
744
745	/*
746	* Allocate a thread call with a given priority. Importances other than
747	* THREAD_CALL_PRIORITY_HIGH or THREAD_CALL_PRIORITY_KERNEL_HIGH will be run in threads
748	* with eager preemption enabled (i.e. may be aggressively preempted by higher-priority
749	* threads which are not in the normal "urgent" bands).
750	*/
751	thread_call_t
752	thread_call_allocate_with_priority(
753	thread_call_func_t func,
754	thread_call_param_t param0,
755	thread_call_priority_t pri)
756	{
757	return thread_call_allocate_with_options(func, param0, pri, `0`);
758	}
759
760	thread_call_t
761	thread_call_allocate_with_options(
762	thread_call_func_t func,
763	thread_call_param_t param0,
764	thread_call_priority_t pri,
765	thread_call_options_t options)
766	{
767	thread_call_t call = thread_call_allocate(func, param0);
768
769	switch (pri) {
770	case THREAD_CALL_PRIORITY_HIGH:
771	call->tc_index = THREAD_CALL_INDEX_HIGH;
772	break;
773	case THREAD_CALL_PRIORITY_KERNEL:
774	call->tc_index = THREAD_CALL_INDEX_KERNEL;
775	break;
776	case THREAD_CALL_PRIORITY_USER:
777	call->tc_index = THREAD_CALL_INDEX_USER;
778	break;
779	case THREAD_CALL_PRIORITY_LOW:
780	call->tc_index = THREAD_CALL_INDEX_LOW;
781	break;
782	case THREAD_CALL_PRIORITY_KERNEL_HIGH:
783	call->tc_index = THREAD_CALL_INDEX_KERNEL_HIGH;
784	break;
785	default:
786	panic("Invalid thread call pri value: %d", pri);
787	break;
788	}
789
790	if (options & THREAD_CALL_OPTIONS_ONCE) {
791	call->tc_flags \|= THREAD_CALL_ONCE;
792	}
793	if (options & THREAD_CALL_OPTIONS_SIGNAL) {
794	call->tc_flags \|= THREAD_CALL_SIGNAL \| THREAD_CALL_ONCE;
795	}
796
797	return call;
798	}
799
800	thread_call_t
801	thread_call_allocate_with_qos(thread_call_func_t func,
802	thread_call_param_t param0,
803	int qos_tier,
804	thread_call_options_t options)
805	{
806	thread_call_t call = thread_call_allocate(func, param0);
807
808	switch (qos_tier) {
809	case THREAD_QOS_UNSPECIFIED:
810	call->tc_index = THREAD_CALL_INDEX_HIGH;
811	break;
812	case THREAD_QOS_LEGACY:
813	call->tc_index = THREAD_CALL_INDEX_USER;
814	break;
815	case THREAD_QOS_MAINTENANCE:
816	case THREAD_QOS_BACKGROUND:
817	call->tc_index = THREAD_CALL_INDEX_LOW;
818	break;
819	case THREAD_QOS_UTILITY:
820	call->tc_index = THREAD_CALL_INDEX_QOS_UT;
821	break;
822	case THREAD_QOS_USER_INITIATED:
823	call->tc_index = THREAD_CALL_INDEX_QOS_IN;
824	break;
825	case THREAD_QOS_USER_INTERACTIVE:
826	call->tc_index = THREAD_CALL_INDEX_QOS_UI;
827	break;
828	default:
829	panic("Invalid thread call qos value: %d", qos_tier);
830	break;
831	}
832
833	if (options & THREAD_CALL_OPTIONS_ONCE)
834	call->tc_flags \|= THREAD_CALL_ONCE;
835
836	/ does not support THREAD_CALL_OPTIONS_SIGNAL /
837
838	return call;
839	}
840
841
842	/*
843	* thread_call_allocate:
844	*
845	* Allocate a callout entry.
846	*/
847	thread_call_t
848	thread_call_allocate(
849	thread_call_func_t func,
850	thread_call_param_t param0)
851	{
852	thread_call_t call = zalloc(thread_call_zone);
853
854	thread_call_setup(call, func, param0);
855	call->tc_refs = `1`;
856	call->tc_flags = THREAD_CALL_ALLOC;
857
858	return (call);
859	}
860
861	/*
862	* thread_call_free:
863	*
864	* Release a callout. If the callout is currently
865	* executing, it will be freed when all invocations
866	* finish.
867	*
868	* If the callout is currently armed to fire again, then
869	* freeing is not allowed and returns FALSE. The
870	* client must have canceled the pending invocation before freeing.
871	*/
872	boolean_t
873	thread_call_free(
874	thread_call_t call)
875	{
876	spl_t s = disable_ints_and_lock();
877
878	if (call->tc_call.queue != NULL \|\|
879	((call->tc_flags & THREAD_CALL_RESCHEDULE) != `0`)) {
880	thread_call_unlock();
881	splx(s);
882
883	return (FALSE);
884	}
885
886	int32_t refs = --call->tc_refs;
887	if (refs < `0`) {
888	panic("Refcount negative: %d\n", refs);
889	}
890
891	if ((THREAD_CALL_SIGNAL \| THREAD_CALL_RUNNING)
892	== ((THREAD_CALL_SIGNAL \| THREAD_CALL_RUNNING) & call->tc_flags)) {
893	thread_call_wait_once_locked(call, s);
894	/ thread call lock has been unlocked /
895	} else {
896	enable_ints_and_unlock(s);
897	}
898
899	if (refs == `0`) {
900	assert(call->tc_finish_count == call->tc_submit_count);
901	zfree(thread_call_zone, call);
902	}
903
904	return (TRUE);
905	}
906
907	/*
908	* thread_call_enter:
909	*
910	* Enqueue a callout entry to occur "soon".
911	*
912	* Returns TRUE if the call was
913	* already on a queue.
914	*/
915	boolean_t
916	thread_call_enter(
917	thread_call_t call)
918	{
919	return thread_call_enter1(call, `0`);
920	}
921
922	boolean_t
923	thread_call_enter1(
924	thread_call_t call,
925	thread_call_param_t param1)
926	{
927	boolean_t result = TRUE;
928	thread_call_group_t group;
929
930	assert(call->tc_call.func != NULL);
931
932	assert((call->tc_flags & THREAD_CALL_SIGNAL) == `0`);
933
934	group = thread_call_get_group(call);
935
936	spl_t s = disable_ints_and_lock();
937
938	if (call->tc_call.queue != &group->pending_queue) {
939	result = _pending_call_enqueue(call, group);
940	}
941
942	call->tc_call.param1 = param1;
943
944	enable_ints_and_unlock(s);
945
946	return (result);
947	}
948
949	/*
950	* thread_call_enter_delayed:
951	*
952	* Enqueue a callout entry to occur
953	* at the stated time.
954	*
955	* Returns TRUE if the call was
956	* already on a queue.
957	*/
958	boolean_t
959	thread_call_enter_delayed(
960	thread_call_t call,
961	uint64_t deadline)
962	{
963	assert(call != NULL);
964	return thread_call_enter_delayed_internal(call, NULL, `0`, `0`, deadline, `0`, `0`);
965	}
966
967	boolean_t
968	thread_call_enter1_delayed(
969	thread_call_t call,
970	thread_call_param_t param1,
971	uint64_t deadline)
972	{
973	assert(call != NULL);
974	return thread_call_enter_delayed_internal(call, NULL, `0`, param1, deadline, `0`, `0`);
975	}
976
977	boolean_t
978	thread_call_enter_delayed_with_leeway(
979	thread_call_t call,
980	thread_call_param_t param1,
981	uint64_t deadline,
982	uint64_t leeway,
983	unsigned int flags)
984	{
985	assert(call != NULL);
986	return thread_call_enter_delayed_internal(call, NULL, `0`, param1, deadline, leeway, flags);
987	}
988
989
990	/*
991	* thread_call_enter_delayed_internal:
992	* enqueue a callout entry to occur at the stated time
993	*
994	* Returns True if the call was already on a queue
995	* params:
996	* call - structure encapsulating state of the callout
997	* alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters
998	* deadline - time deadline in nanoseconds
999	* leeway - timer slack represented as delta of deadline.
1000	* flags - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing.
1001	* THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing.
1002	* THREAD_CALL_CONTINUOUS: thread call will be called according to mach_continuous_time rather
1003	* than mach_absolute_time
1004	*/
1005	boolean_t
1006	thread_call_enter_delayed_internal(
1007	thread_call_t call,
1008	thread_call_func_t alt_func,
1009	thread_call_param_t alt_param0,
1010	thread_call_param_t param1,
1011	uint64_t deadline,
1012	uint64_t leeway,
1013	unsigned int flags)
1014	{
1015	boolean_t result = TRUE;
1016	thread_call_group_t group;
1017	uint64_t now, sdeadline, slop;
1018	uint32_t urgency;
1019
1020	thread_call_flavor_t flavor = (flags & THREAD_CALL_CONTINUOUS) ? TCF_CONTINUOUS : TCF_ABSOLUTE;
1021
1022	/ direct mapping between thread_call, timer_call, and timeout_urgency values /
1023	urgency = (flags & TIMEOUT_URGENCY_MASK);
1024
1025	spl_t s = disable_ints_and_lock();
1026
1027	if (call == NULL) {
1028	/ allocate a structure out of internal storage, as a convenience for BSD callers /
1029	call = _internal_call_allocate(alt_func, alt_param0);
1030	}
1031
1032	assert(call->tc_call.func != NULL);
1033	group = thread_call_get_group(call);
1034
1035	/ TODO: assert that call is not enqueued before flipping the flag /
1036	if (flavor == TCF_CONTINUOUS) {
1037	now = mach_continuous_time();
1038	call->tc_flags \|= THREAD_CALL_CONTINUOUS;
1039	} else {
1040	now = mach_absolute_time();
1041	call->tc_flags &= ~THREAD_CALL_CONTINUOUS;
1042	}
1043
1044	call->tc_flags \|= THREAD_CALL_DELAYED;
1045
1046	call->tc_soft_deadline = sdeadline = deadline;
1047
1048	boolean_t ratelimited = FALSE;
1049	slop = timer_call_slop(deadline, now, urgency, current_thread(), &ratelimited);
1050
1051	if ((flags & THREAD_CALL_DELAY_LEEWAY) != `0` && leeway > slop)
1052	slop = leeway;
1053
1054	if (UINT64_MAX - deadline <= slop)
1055	deadline = UINT64_MAX;
1056	else
1057	deadline += slop;
1058
1059	if (ratelimited) {
1060	call->tc_flags \|= TIMER_CALL_RATELIMITED;
1061	} else {
1062	call->tc_flags &= ~TIMER_CALL_RATELIMITED;
1063	}
1064
1065	call->tc_call.param1 = param1;
1066
1067	call->tc_ttd = (sdeadline > now) ? (sdeadline - now) : `0`;
1068
1069	result = _delayed_call_enqueue(call, group, deadline, flavor);
1070
1071	_arm_delayed_call_timer(call, group, flavor);
1072
1073	#if CONFIG_DTRACE
1074	DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_call.func,
1075	uint64_t, (deadline - sdeadline), uint64_t, (call->tc_ttd >> `32`),
1076	(unsigned) (call->tc_ttd & `0xFFFFFFFF`), call);
1077	#endif
1078
1079	enable_ints_and_unlock(s);
1080
1081	return (result);
1082	}
1083
1084	/*
1085	* Remove a callout entry from the queue
1086	* Called with thread_call_lock held
1087	*/
1088	static boolean_t
1089	thread_call_cancel_locked(thread_call_t call)
1090	{
1091	boolean_t canceled = (`0` != (THREAD_CALL_RESCHEDULE & call->tc_flags));
1092	call->tc_flags &= ~THREAD_CALL_RESCHEDULE;
1093
1094	if (canceled) {
1095	/ if reschedule was set, it must not have been queued /
1096	assert(call->tc_call.queue == NULL);
1097	} else {
1098	boolean_t do_cancel_callout = FALSE;
1099
1100	thread_call_flavor_t flavor = thread_call_get_flavor(call);
1101	thread_call_group_t group = thread_call_get_group(call);
1102
1103	if ((call->tc_call.deadline != `0`) &&
1104	(call == qe_queue_first(&group->delayed_queues[flavor], struct thread_call, tc_call.q_link))) {
1105	assert(call->tc_call.queue == &group->delayed_queues[flavor]);
1106	do_cancel_callout = TRUE;
1107	}
1108
1109	canceled = _call_dequeue(call, group);
1110
1111	if (do_cancel_callout) {
1112	if (_arm_delayed_call_timer(NULL, group, flavor) == false)
1113	timer_call_cancel(&group->delayed_timers[flavor]);
1114	}
1115	}
1116
1117	#if CONFIG_DTRACE
1118	DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_call.func,
1119	`0`, (call->tc_ttd >> `32`), (unsigned) (call->tc_ttd & `0xFFFFFFFF`));
1120	#endif
1121
1122	return canceled;
1123	}
1124
1125	/*
1126	* thread_call_cancel:
1127	*
1128	* Dequeue a callout entry.
1129	*
1130	* Returns TRUE if the call was
1131	* on a queue.
1132	*/
1133	boolean_t
1134	thread_call_cancel(thread_call_t call)
1135	{
1136	spl_t s = disable_ints_and_lock();
1137
1138	boolean_t result = thread_call_cancel_locked(call);
1139
1140	enable_ints_and_unlock(s);
1141
1142	return result;
1143	}
1144
1145	/*
1146	* Cancel a thread call. If it cannot be cancelled (i.e.
1147	* is already in flight), waits for the most recent invocation
1148	* to finish. Note that if clients re-submit this thread call,
1149	* it may still be pending or in flight when thread_call_cancel_wait
1150	* returns, but all requests to execute this work item prior
1151	* to the call to thread_call_cancel_wait will have finished.
1152	*/
1153	boolean_t
1154	thread_call_cancel_wait(thread_call_t call)
1155	{
1156	if ((call->tc_flags & THREAD_CALL_ALLOC) == `0`)
1157	panic("thread_call_cancel_wait: can't wait on thread call whose storage I don't own");
1158
1159	if (!ml_get_interrupts_enabled())
1160	panic("unsafe thread_call_cancel_wait");
1161
1162	if (current_thread()->thc_state.thc_call == call)
1163	panic("thread_call_cancel_wait: deadlock waiting on self from inside call: %p to function %p",
1164	call, call->tc_call.func);
1165
1166	spl_t s = disable_ints_and_lock();
1167
1168	boolean_t canceled = thread_call_cancel_locked(call);
1169
1170	if ((call->tc_flags & THREAD_CALL_ONCE) == THREAD_CALL_ONCE) {
1171	/*
1172	* A cancel-wait on a 'once' call will both cancel
1173	* the pending call and wait for the in-flight call
1174	*/
1175
1176	thread_call_wait_once_locked(call, s);
1177	/ thread call lock unlocked /
1178	} else {
1179	/*
1180	* A cancel-wait on a normal call will only wait for the in-flight calls
1181	* if it did not cancel the pending call.
1182	*
1183	* TODO: This seems less than useful - shouldn't it do the wait as well?
1184	*/
1185
1186	if (canceled == FALSE) {
1187	thread_call_wait_locked(call, s);
1188	/ thread call lock unlocked /
1189	} else {
1190	enable_ints_and_unlock(s);
1191	}
1192	}
1193
1194	return canceled;
1195	}
1196
1197
1198	/*
1199	* thread_call_wake:
1200	*
1201	* Wake a call thread to service
1202	* pending call entries. May wake
1203	* the daemon thread in order to
1204	* create additional call threads.
1205	*
1206	* Called with thread_call_lock held.
1207	*
1208	* For high-priority group, only does wakeup/creation if there are no threads
1209	* running.
1210	*/
1211	static __inline__ void
1212	thread_call_wake(
1213	thread_call_group_t group)
1214	{
1215	/*
1216	* New behavior: use threads if you've got 'em.
1217	* Traditional behavior: wake only if no threads running.
1218	*/
1219	if (group_isparallel(group) \|\| group->active_count == `0`) {
1220	if (waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
1221	THREAD_AWAKENED, WAITQ_ALL_PRIORITIES) == KERN_SUCCESS) {
1222	group->idle_count--; group->active_count++;
1223
1224	if (group->idle_count == `0` && (group->flags & TCG_DEALLOC_ACTIVE) == TCG_DEALLOC_ACTIVE) {
1225	if (timer_call_cancel(&group->dealloc_timer) == TRUE) {
1226	group->flags &= ~TCG_DEALLOC_ACTIVE;
1227	}
1228	}
1229	} else {
1230	if (!thread_call_daemon_awake && thread_call_group_should_add_thread(group)) {
1231	thread_call_daemon_awake = TRUE;
1232	waitq_wakeup64_one(&daemon_waitq, NO_EVENT64,
1233	THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
1234	}
1235	}
1236	}
1237	}
1238
1239	/*
1240	* sched_call_thread:
1241	*
1242	* Call out invoked by the scheduler.
1243	*/
1244	static void
1245	sched_call_thread(
1246	int type,
1247	thread_t thread)
1248	{
1249	thread_call_group_t group;
1250
1251	group = thread->thc_state.thc_group;
1252	assert((group - &thread_call_groups[`0`]) < THREAD_CALL_INDEX_MAX);
1253
1254	thread_call_lock_spin();
1255
1256	switch (type) {
1257
1258	case SCHED_CALL_BLOCK:
1259	assert(group->active_count);
1260	--group->active_count;
1261	group->blocked_count++;
1262	if (group->pending_count > `0`)
1263	thread_call_wake(group);
1264	break;
1265
1266	case SCHED_CALL_UNBLOCK:
1267	assert(group->blocked_count);
1268	--group->blocked_count;
1269	group->active_count++;
1270	break;
1271	}
1272
1273	thread_call_unlock();
1274	}
1275
1276	/*
1277	* Interrupts disabled, lock held; returns the same way.
1278	* Only called on thread calls whose storage we own. Wakes up
1279	* anyone who might be waiting on this work item and frees it
1280	* if the client has so requested.
1281	*/
1282	static boolean_t
1283	thread_call_finish(thread_call_t call, thread_call_group_t group, spl_t *s)
1284	{
1285	uint64_t time;
1286	uint32_t flags;
1287	boolean_t signal;
1288	boolean_t repend = FALSE;
1289
1290	call->tc_finish_count++;
1291	flags = call->tc_flags;
1292	signal = ((THREAD_CALL_SIGNAL & flags) != `0`);
1293
1294	if (!signal) {
1295	/ The thread call thread owns a ref until the call is finished /
1296	if (call->tc_refs <= `0`)
1297	panic("thread_call_finish: detected over-released thread call: %p", call);
1298	call->tc_refs--;
1299	}
1300
1301	call->tc_flags &= ~(THREAD_CALL_RESCHEDULE \| THREAD_CALL_RUNNING \| THREAD_CALL_WAIT);
1302
1303	if ((call->tc_refs != `0`) && ((flags & THREAD_CALL_RESCHEDULE) != `0`)) {
1304	assert(flags & THREAD_CALL_ONCE);
1305	thread_call_flavor_t flavor = thread_call_get_flavor(call);
1306
1307	if (THREAD_CALL_DELAYED & flags) {
1308	time = mach_absolute_time();
1309	if (flavor == TCF_CONTINUOUS) {
1310	time = absolutetime_to_continuoustime(time);
1311	}
1312	if (call->tc_soft_deadline <= time) {
1313	call->tc_flags &= ~(THREAD_CALL_DELAYED \| TIMER_CALL_RATELIMITED);
1314	call->tc_deadline = `0`;
1315	}
1316	}
1317	if (call->tc_deadline) {
1318	_delayed_call_enqueue(call, group, call->tc_deadline, flavor);
1319	if (!signal) {
1320	_arm_delayed_call_timer(call, group, flavor);
1321	}
1322	} else if (signal) {
1323	call->tc_submit_count++;
1324	repend = TRUE;
1325	} else {
1326	_pending_call_enqueue(call, group);
1327	}
1328	}
1329
1330	if (!signal && (call->tc_refs == `0`)) {
1331	if ((flags & THREAD_CALL_WAIT) != `0`) {
1332	panic("Someone waiting on a thread call that is scheduled for free: %p\n", call->tc_call.func);
1333	}
1334
1335	assert(call->tc_finish_count == call->tc_submit_count);
1336
1337	enable_ints_and_unlock(*s);
1338
1339	zfree(thread_call_zone, call);
1340
1341	*s = disable_ints_and_lock();
1342	}
1343
1344	if ((flags & THREAD_CALL_WAIT) != `0`) {
1345	/*
1346	* Dropping lock here because the sched call for the
1347	* high-pri group can take the big lock from under
1348	* a thread lock.
1349	*/
1350	thread_call_unlock();
1351	thread_wakeup((event_t)call);
1352	thread_call_lock_spin();
1353	/ THREAD_CALL_SIGNAL call may have been freed /
1354	}
1355
1356	return (repend);
1357	}
1358
1359	/*
1360	* thread_call_invoke
1361	*
1362	* Invoke the function provided for this thread call
1363	*
1364	* Note that the thread call object can be deallocated by the function if we do not control its storage.
1365	*/
1366	static void __attribute__((noinline))
1367	thread_call_invoke(thread_call_func_t func, thread_call_param_t param0, thread_call_param_t param1, thread_call_t call)
1368	{
1369	current_thread()->thc_state.thc_call = call;
1370
1371	#if DEVELOPMENT \|\| DEBUG
1372	KERNEL_DEBUG_CONSTANT(
1373	MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) \| DBG_FUNC_START,
1374	VM_KERNEL_UNSLIDE(func), VM_KERNEL_ADDRHIDE(param0), VM_KERNEL_ADDRHIDE(param1), `0`, `0`);
1375	#endif /* DEVELOPMENT \|\| DEBUG */
1376
1377	#if CONFIG_DTRACE
1378	uint64_t tc_ttd = call->tc_ttd;
1379	boolean_t is_delayed = call->tc_flags & THREAD_CALL_DELAYED;
1380	DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, `0`, int, (tc_ttd >> `32`),
1381	(unsigned) (tc_ttd & `0xFFFFFFFF`), is_delayed, call);
1382	#endif
1383
1384	(*func)(param0, param1);
1385
1386	#if CONFIG_DTRACE
1387	DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, `0`, int, (tc_ttd >> `32`),
1388	(unsigned) (tc_ttd & `0xFFFFFFFF`), is_delayed, call);
1389	#endif
1390
1391	#if DEVELOPMENT \|\| DEBUG
1392	KERNEL_DEBUG_CONSTANT(
1393	MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) \| DBG_FUNC_END,
1394	VM_KERNEL_UNSLIDE(func), `0`, `0`, `0`, `0`);
1395	#endif /* DEVELOPMENT \|\| DEBUG */
1396
1397	current_thread()->thc_state.thc_call = NULL;
1398	}
1399
1400	/*
1401	* thread_call_thread:
1402	*/
1403	static void
1404	thread_call_thread(
1405	thread_call_group_t group,
1406	wait_result_t wres)
1407	{
1408	thread_t self = current_thread();
1409	boolean_t canwait;
1410
1411	if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == `0`)
1412	(void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT);
1413
1414	/*
1415	* A wakeup with THREAD_INTERRUPTED indicates that
1416	* we should terminate.
1417	*/
1418	if (wres == THREAD_INTERRUPTED) {
1419	thread_terminate(self);
1420
1421	/ NOTREACHED /
1422	panic("thread_terminate() returned?");
1423	}
1424
1425	spl_t s = disable_ints_and_lock();
1426
1427	self->thc_state.thc_group = group;
1428	thread_sched_call(self, sched_call_thread);
1429
1430	while (group->pending_count > `0`) {
1431	thread_call_t call;
1432	thread_call_func_t func;
1433	thread_call_param_t param0, param1;
1434
1435	call = qe_dequeue_head(&group->pending_queue, struct thread_call, tc_call.q_link);
1436	assert(call != NULL);
1437	group->pending_count--;
1438
1439	func = call->tc_call.func;
1440	param0 = call->tc_call.param0;
1441	param1 = call->tc_call.param1;
1442
1443	call->tc_call.queue = NULL;
1444
1445	_internal_call_release(call);
1446
1447	/*
1448	* Can only do wakeups for thread calls whose storage
1449	* we control.
1450	*/
1451	if ((call->tc_flags & THREAD_CALL_ALLOC) != `0`) {
1452	canwait = TRUE;
1453	call->tc_flags \|= THREAD_CALL_RUNNING;
1454	call->tc_refs++; / Delay free until we're done /
1455	} else
1456	canwait = FALSE;
1457
1458	enable_ints_and_unlock(s);
1459
1460	thread_call_invoke(func, param0, param1, call);
1461
1462	if (get_preemption_level() != `0`) {
1463	int pl = get_preemption_level();
1464	panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
1465	pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
1466	}
1467
1468	s = disable_ints_and_lock();
1469
1470	if (canwait) {
1471	/ Frees if so desired /
1472	thread_call_finish(call, group, &s);
1473	}
1474	}
1475
1476	thread_sched_call(self, NULL);
1477	group->active_count--;
1478
1479	if (self->callout_woken_from_icontext && !self->callout_woke_thread) {
1480	ledger_credit(self->t_ledger, task_ledgers.interrupt_wakeups, `1`);
1481	if (self->callout_woken_from_platform_idle)
1482	ledger_credit(self->t_ledger, task_ledgers.platform_idle_wakeups, `1`);
1483	}
1484
1485	self->callout_woken_from_icontext = FALSE;
1486	self->callout_woken_from_platform_idle = FALSE;
1487	self->callout_woke_thread = FALSE;
1488
1489	if (group_isparallel(group)) {
1490	/*
1491	* For new style of thread group, thread always blocks.
1492	* If we have more than the target number of threads,
1493	* and this is the first to block, and it isn't active
1494	* already, set a timer for deallocating a thread if we
1495	* continue to have a surplus.
1496	*/
1497	group->idle_count++;
1498
1499	if (group->idle_count == `1`) {
1500	group->idle_timestamp = mach_absolute_time();
1501	}
1502
1503	if (((group->flags & TCG_DEALLOC_ACTIVE) == `0`) &&
1504	((group->active_count + group->idle_count) > group->target_thread_count)) {
1505	thread_call_start_deallocate_timer(group);
1506	}
1507
1508	/ Wait for more work (or termination) /
1509	wres = waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_INTERRUPTIBLE, `0`);
1510	if (wres != THREAD_WAITING) {
1511	panic("kcall worker unable to assert wait?");
1512	}
1513
1514	enable_ints_and_unlock(s);
1515
1516	thread_block_parameter((thread_continue_t)thread_call_thread, group);
1517	} else {
1518	if (group->idle_count < group->target_thread_count) {
1519	group->idle_count++;
1520
1521	waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_UNINT, `0`); / Interrupted means to exit /
1522
1523	enable_ints_and_unlock(s);
1524
1525	thread_block_parameter((thread_continue_t)thread_call_thread, group);
1526	/ NOTREACHED /
1527	}
1528	}
1529
1530	enable_ints_and_unlock(s);
1531
1532	thread_terminate(self);
1533	/ NOTREACHED /
1534	}
1535
1536	/*
1537	* thread_call_daemon: walk list of groups, allocating
1538	* threads if appropriate (as determined by
1539	* thread_call_group_should_add_thread()).
1540	*/
1541	static void
1542	thread_call_daemon_continue(__unused void *arg)
1543	{
1544	spl_t s = disable_ints_and_lock();
1545
1546	/ Starting at zero happens to be high-priority first. /
1547	for (int i = `0`; i < THREAD_CALL_INDEX_MAX; i++) {
1548	thread_call_group_t group = &thread_call_groups[i];
1549	while (thread_call_group_should_add_thread(group)) {
1550	group->active_count++;
1551
1552	enable_ints_and_unlock(s);
1553
1554	kern_return_t kr = thread_call_thread_create(group);
1555	if (kr != KERN_SUCCESS) {
1556	/*
1557	* On failure, just pause for a moment and give up.
1558	* We can try again later.
1559	*/
1560	delay(`10000`); / 10 ms /
1561	s = disable_ints_and_lock();
1562	goto out;
1563	}
1564
1565	s = disable_ints_and_lock();
1566	}
1567	}
1568
1569	out:
1570	thread_call_daemon_awake = FALSE;
1571	waitq_assert_wait64(&daemon_waitq, NO_EVENT64, THREAD_UNINT, `0`);
1572
1573	enable_ints_and_unlock(s);
1574
1575	thread_block_parameter((thread_continue_t)thread_call_daemon_continue, NULL);
1576	/ NOTREACHED /
1577	}
1578
1579	static void
1580	thread_call_daemon(
1581	__unused void *arg)
1582	{
1583	thread_t self = current_thread();
1584
1585	self->options \|= TH_OPT_VMPRIV;
1586	vm_page_free_reserve(`2`); / XXX /
1587
1588	thread_set_thread_name(self, "thread_call_daemon");
1589
1590	thread_call_daemon_continue(NULL);
1591	/ NOTREACHED /
1592	}
1593
1594	/*
1595	* Schedule timer to deallocate a worker thread if we have a surplus
1596	* of threads (in excess of the group's target) and at least one thread
1597	* is idle the whole time.
1598	*/
1599	static void
1600	thread_call_start_deallocate_timer(thread_call_group_t group)
1601	{
1602	__assert_only boolean_t already_enqueued;
1603
1604	assert(group->idle_count > `0`);
1605	assert((group->flags & TCG_DEALLOC_ACTIVE) == `0`);
1606
1607	group->flags \|= TCG_DEALLOC_ACTIVE;
1608
1609	uint64_t deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
1610
1611	already_enqueued = timer_call_enter(&group->dealloc_timer, deadline, `0`);
1612
1613	assert(already_enqueued == FALSE);
1614	}
1615
1616	/ non-static so dtrace can find it rdar://problem/31156135&31379348 /
1617	void
1618	thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1)
1619	{
1620	thread_call_group_t group = (thread_call_group_t) p0;
1621	thread_call_flavor_t flavor = (thread_call_flavor_t) p1;
1622
1623	thread_call_t call;
1624	uint64_t now;
1625	boolean_t restart;
1626	boolean_t repend;
1627
1628	thread_call_lock_spin();
1629
1630	if (flavor == TCF_CONTINUOUS)
1631	now = mach_continuous_time();
1632	else if (flavor == TCF_ABSOLUTE)
1633	now = mach_absolute_time();
1634	else
1635	panic("invalid timer flavor: %d", flavor);
1636
1637	do {
1638	restart = FALSE;
1639	qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_call.q_link) {
1640	if (flavor == TCF_CONTINUOUS)
1641	assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == THREAD_CALL_CONTINUOUS);
1642	else
1643	assert((call->tc_flags & THREAD_CALL_CONTINUOUS) == `0`);
1644
1645	/*
1646	* if we hit a call that isn't yet ready to expire,
1647	* then we're done for now
1648	* TODO: The next timer in the list could have a larger leeway
1649	* and therefore be ready to expire.
1650	* Sort by deadline then by soft deadline to avoid this
1651	*/
1652	if (call->tc_soft_deadline > now)
1653	break;
1654
1655	/*
1656	* If we hit a rate-limited timer, don't eagerly wake it up.
1657	* Wait until it reaches the end of the leeway window.
1658	*
1659	* TODO: What if the next timer is not rate-limited?
1660	* Have a separate rate-limited queue to avoid this
1661	*/
1662	if ((call->tc_flags & THREAD_CALL_RATELIMITED) &&
1663	(call->tc_call.deadline > now) &&
1664	(ml_timer_forced_evaluation() == FALSE)) {
1665	break;
1666	}
1667
1668	if (THREAD_CALL_SIGNAL & call->tc_flags) {
1669	__assert_only queue_head_t *old_queue;
1670	old_queue = call_entry_dequeue(&call->tc_call);
1671	assert(old_queue == &group->delayed_queues[flavor]);
1672
1673	do {
1674	thread_call_func_t func = call->tc_call.func;
1675	thread_call_param_t param0 = call->tc_call.param0;
1676	thread_call_param_t param1 = call->tc_call.param1;
1677
1678	call->tc_flags \|= THREAD_CALL_RUNNING;
1679	thread_call_unlock();
1680	thread_call_invoke(func, param0, param1, call);
1681	thread_call_lock_spin();
1682
1683	repend = thread_call_finish(call, group, NULL);
1684	} while (repend);
1685
1686	/ call may have been freed /
1687	restart = TRUE;
1688	break;
1689	} else {
1690	_pending_call_enqueue(call, group);
1691	}
1692	}
1693	} while (restart);
1694
1695	_arm_delayed_call_timer(call, group, flavor);
1696
1697	thread_call_unlock();
1698	}
1699
1700	static void
1701	thread_call_delayed_timer_rescan(thread_call_group_t group,
1702	thread_call_flavor_t flavor)
1703	{
1704	thread_call_t call;
1705	uint64_t now;
1706
1707	spl_t s = disable_ints_and_lock();
1708
1709	assert(ml_timer_forced_evaluation() == TRUE);
1710
1711	if (flavor == TCF_CONTINUOUS) {
1712	now = mach_continuous_time();
1713	} else {
1714	now = mach_absolute_time();
1715	}
1716
1717	qe_foreach_element_safe(call, &group->delayed_queues[flavor], tc_call.q_link) {
1718	if (call->tc_soft_deadline <= now) {
1719	_pending_call_enqueue(call, group);
1720	} else {
1721	uint64_t skew = call->tc_call.deadline - call->tc_soft_deadline;
1722	assert (call->tc_call.deadline >= call->tc_soft_deadline);
1723	/*
1724	* On a latency quality-of-service level change,
1725	* re-sort potentially rate-limited callout. The platform
1726	* layer determines which timers require this.
1727	*/
1728	if (timer_resort_threshold(skew)) {
1729	_call_dequeue(call, group);
1730	_delayed_call_enqueue(call, group, call->tc_soft_deadline, flavor);
1731	}
1732	}
1733	}
1734
1735	_arm_delayed_call_timer(NULL, group, flavor);
1736
1737	enable_ints_and_unlock(s);
1738	}
1739
1740	void
1741	thread_call_delayed_timer_rescan_all(void) {
1742	for (int i = `0`; i < THREAD_CALL_INDEX_MAX; i++) {
1743	thread_call_delayed_timer_rescan(&thread_call_groups[i], TCF_ABSOLUTE);
1744	thread_call_delayed_timer_rescan(&thread_call_groups[i], TCF_CONTINUOUS);
1745	}
1746	}
1747
1748	/*
1749	* Timer callback to tell a thread to terminate if
1750	* we have an excess of threads and at least one has been
1751	* idle for a long time.
1752	*/
1753	static void
1754	thread_call_dealloc_timer(
1755	timer_call_param_t p0,
1756	__unused timer_call_param_t p1)
1757	{
1758	thread_call_group_t group = (thread_call_group_t)p0;
1759	uint64_t now;
1760	kern_return_t res;
1761	boolean_t terminated = FALSE;
1762
1763	thread_call_lock_spin();
1764
1765	assert((group->flags & TCG_DEALLOC_ACTIVE) == TCG_DEALLOC_ACTIVE);
1766
1767	now = mach_absolute_time();
1768
1769	if (group->idle_count > `0`) {
1770	if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
1771	terminated = TRUE;
1772	group->idle_count--;
1773	res = waitq_wakeup64_one(&group->idle_waitq, NO_EVENT64,
1774	THREAD_INTERRUPTED, WAITQ_ALL_PRIORITIES);
1775	if (res != KERN_SUCCESS) {
1776	panic("Unable to wake up idle thread for termination?");
1777	}
1778	}
1779	}
1780
1781	group->flags &= ~TCG_DEALLOC_ACTIVE;
1782
1783	/*
1784	* If we still have an excess of threads, schedule another
1785	* invocation of this function.
1786	*/
1787	if (group->idle_count > `0` && (group->idle_count + group->active_count > group->target_thread_count)) {
1788	/*
1789	* If we killed someone just now, push out the
1790	* next deadline.
1791	*/
1792	if (terminated) {
1793	group->idle_timestamp = now;
1794	}
1795
1796	thread_call_start_deallocate_timer(group);
1797	}
1798
1799	thread_call_unlock();
1800	}
1801
1802	/*
1803	* Wait for the invocation of the thread call to complete
1804	* We know there's only one in flight because of the 'once' flag.
1805	*
1806	* If a subsequent invocation comes in before we wake up, that's OK
1807	*
1808	* TODO: Here is where we will add priority inheritance to the thread executing
1809	* the thread call in case it's lower priority than the current thread
1810	* <rdar://problem/30321792> Priority inheritance for thread_call_wait_once
1811	*
1812	* Takes the thread call lock locked, returns unlocked
1813	* This lets us avoid a spurious take/drop after waking up from thread_block
1814	*/
1815	static boolean_t
1816	thread_call_wait_once_locked(thread_call_t call, spl_t s)
1817	{
1818	assert(call->tc_flags & THREAD_CALL_ALLOC);
1819	assert(call->tc_flags & THREAD_CALL_ONCE);
1820
1821	if ((call->tc_flags & THREAD_CALL_RUNNING) == `0`) {
1822	enable_ints_and_unlock(s);
1823	return FALSE;
1824	}
1825
1826	/ call is running, so we have to wait for it /
1827	call->tc_flags \|= THREAD_CALL_WAIT;
1828
1829	wait_result_t res = assert_wait(call, THREAD_UNINT);
1830	if (res != THREAD_WAITING)
1831	panic("Unable to assert wait: %d", res);
1832
1833	enable_ints_and_unlock(s);
1834
1835	res = thread_block(THREAD_CONTINUE_NULL);
1836	if (res != THREAD_AWAKENED)
1837	panic("Awoken with %d?", res);
1838
1839	/ returns unlocked /
1840	return TRUE;
1841	}
1842
1843	/*
1844	* Wait for an in-flight invocation to complete
1845	* Does NOT try to cancel, so the client doesn't need to hold their
1846	* lock while calling this function.
1847	*
1848	* Returns whether or not it had to wait.
1849	*
1850	* Only works for THREAD_CALL_ONCE calls.
1851	*/
1852	boolean_t
1853	thread_call_wait_once(thread_call_t call)
1854	{
1855	if ((call->tc_flags & THREAD_CALL_ALLOC) == `0`)
1856	panic("thread_call_wait_once: can't wait on thread call whose storage I don't own");
1857
1858	if ((call->tc_flags & THREAD_CALL_ONCE) == `0`)
1859	panic("thread_call_wait_once: can't wait_once on a non-once call");
1860
1861	if (!ml_get_interrupts_enabled())
1862	panic("unsafe thread_call_wait_once");
1863
1864	if (current_thread()->thc_state.thc_call == call)
1865	panic("thread_call_wait_once: deadlock waiting on self from inside call: %p to function %p",
1866	call, call->tc_call.func);
1867
1868	spl_t s = disable_ints_and_lock();
1869
1870	boolean_t waited = thread_call_wait_once_locked(call, s);
1871	/ thread call lock unlocked /
1872
1873	return waited;
1874	}
1875
1876
1877	/*
1878	* Wait for all requested invocations of a thread call prior to now
1879	* to finish. Can only be invoked on thread calls whose storage we manage.
1880	* Just waits for the finish count to catch up to the submit count we find
1881	* at the beginning of our wait.
1882	*
1883	* Called with thread_call_lock held. Returns with lock released.
1884	*/
1885	static void
1886	thread_call_wait_locked(thread_call_t call, spl_t s)
1887	{
1888	uint64_t submit_count;
1889	wait_result_t res;
1890
1891	assert(call->tc_flags & THREAD_CALL_ALLOC);
1892
1893	submit_count = call->tc_submit_count;
1894
1895	while (call->tc_finish_count < submit_count) {
1896	call->tc_flags \|= THREAD_CALL_WAIT;
1897
1898	res = assert_wait(call, THREAD_UNINT);
1899	if (res != THREAD_WAITING)
1900	panic("Unable to assert wait: %d", res);
1901
1902	enable_ints_and_unlock(s);
1903
1904	res = thread_block(THREAD_CONTINUE_NULL);
1905	if (res != THREAD_AWAKENED)
1906	panic("Awoken with %d?", res);
1907
1908	s = disable_ints_and_lock();
1909	}
1910
1911	enable_ints_and_unlock(s);
1912	}
1913
1914	/*
1915	* Determine whether a thread call is either on a queue or
1916	* currently being executed.
1917	*/
1918	boolean_t
1919	thread_call_isactive(thread_call_t call)
1920	{
1921	boolean_t active;
1922
1923	spl_t s = disable_ints_and_lock();
1924	active = (call->tc_submit_count > call->tc_finish_count);
1925	enable_ints_and_unlock(s);
1926
1927	return active;
1928	}
1929
1930	/*
1931	* adjust_cont_time_thread_calls
1932	* on wake, reenqueue delayed call timer for continuous time thread call groups
1933	*/
1934	void
1935	adjust_cont_time_thread_calls(void)
1936	{
1937	spl_t s = disable_ints_and_lock();
1938
1939	for (int i = `0`; i < THREAD_CALL_INDEX_MAX; i++) {
1940	thread_call_group_t group = &thread_call_groups[i];
1941
1942	/ only the continuous timers need to be re-armed /
1943
1944	_arm_delayed_call_timer(NULL, group, TCF_CONTINUOUS);
1945	}
1946
1947	enable_ints_and_unlock(s);
1948	}
1949
1950

Browse the source code of xnu/osfmk/kern/thread_call.c