thread_policy.c source code [xnu/osfmk/kern/thread_policy.c]

1	/*
2	* Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	#include <mach/mach_types.h>
30	#include <mach/thread_act_server.h>
31
32	#include <kern/kern_types.h>
33	#include <kern/processor.h>
34	#include <kern/thread.h>
35	#include <kern/affinity.h>
36	#include <mach/task_policy.h>
37	#include <kern/sfi.h>
38	#include <kern/policy_internal.h>
39	#include <sys/errno.h>
40	#include <sys/ulock.h>
41
42	#include <mach/machine/sdt.h>
43
44	#ifdef MACH_BSD
45	extern int proc_selfpid(void);
46	extern char * proc_name_address(void *p);
47	extern void rethrottle_thread(void * uthread);
48	#endif /* MACH_BSD */
49
50	#define QOS_EXTRACT(q) ((q) & 0xff)
51
52	uint32_t qos_override_mode;
53	#define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
54	#define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
55	#define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
56	#define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
57
58	extern zone_t thread_qos_override_zone;
59
60	static void
61	proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
62
63	/*
64	* THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
65	* to threads that don't have a QoS class set.
66	*/
67	const qos_policy_params_t thread_qos_policy_params = {
68	/*
69	* This table defines the starting base priority of the thread,
70	* which will be modified by the thread importance and the task max priority
71	* before being applied.
72	*/
73	.qos_pri[THREAD_QOS_UNSPECIFIED] = `0`, / not consulted /
74	.qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, / i.e. 46 /
75	.qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED,
76	.qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT,
77	.qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY,
78	.qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE,
79	.qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE,
80
81	/*
82	* This table defines the highest IO priority that a thread marked with this
83	* QoS class can have.
84	*/
85	.qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0,
86	.qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0,
87	.qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0,
88	.qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0,
89	.qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1,
90	.qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, / possibly overridden by bg_iotier /
91	.qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3,
92
93	/*
94	* This table defines the highest QoS level that
95	* a thread marked with this QoS class can have.
96	*/
97
98	.qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
99	.qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
100	.qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
101	.qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
102	.qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
103	.qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
104	.qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
105
106	.qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
107	.qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0),
108	.qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
109	.qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
110	.qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
111	.qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
112	.qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
113	};
114
115	static void
116	thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
117
118	static int
119	thread_qos_scaled_relative_priority(int qos, int qos_relprio);
120
121	static void
122	proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
123
124	static void
125	proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
126
127	static void
128	proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
129
130	static void
131	thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2);
132
133	static int
134	thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
135
136	static int
137	proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
138
139	static void
140	thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
141
142	static void
143	thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
144
145	void
146	thread_policy_init(void) {
147	if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
148	printf("QOS override mode: 0x%08x\n", qos_override_mode);
149	} else {
150	qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
151	}
152	}
153
154	boolean_t
155	thread_has_qos_policy(thread_t thread) {
156	return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
157	}
158
159
160	static void
161	thread_remove_qos_policy_locked(thread_t thread,
162	task_pend_token_t pend_token)
163	{
164
165	__unused int prev_qos = thread->requested_policy.thrp_qos;
166
167	DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
168
169	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
170	THREAD_QOS_UNSPECIFIED, `0`, pend_token);
171	}
172
173	kern_return_t
174	thread_remove_qos_policy(thread_t thread)
175	{
176	struct task_pend_token pend_token = {};
177
178	thread_mtx_lock(thread);
179	if (!thread->active) {
180	thread_mtx_unlock(thread);
181	return KERN_TERMINATED;
182	}
183
184	thread_remove_qos_policy_locked(thread, &pend_token);
185
186	thread_mtx_unlock(thread);
187
188	thread_policy_update_complete_unlocked(thread, &pend_token);
189
190	return KERN_SUCCESS;
191	}
192
193
194	boolean_t
195	thread_is_static_param(thread_t thread)
196	{
197	if (thread->static_param) {
198	DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
199	return TRUE;
200	}
201	return FALSE;
202	}
203
204	/*
205	* Relative priorities can range between 0REL and -15REL. These
206	* map to QoS-specific ranges, to create non-overlapping priority
207	* ranges.
208	*/
209	static int
210	thread_qos_scaled_relative_priority(int qos, int qos_relprio)
211	{
212	int next_lower_qos;
213
214	/ Fast path, since no validation or scaling is needed /
215	if (qos_relprio == `0`) return `0`;
216
217	switch (qos) {
218	case THREAD_QOS_USER_INTERACTIVE:
219	next_lower_qos = THREAD_QOS_USER_INITIATED;
220	break;
221	case THREAD_QOS_USER_INITIATED:
222	next_lower_qos = THREAD_QOS_LEGACY;
223	break;
224	case THREAD_QOS_LEGACY:
225	next_lower_qos = THREAD_QOS_UTILITY;
226	break;
227	case THREAD_QOS_UTILITY:
228	next_lower_qos = THREAD_QOS_BACKGROUND;
229	break;
230	case THREAD_QOS_MAINTENANCE:
231	case THREAD_QOS_BACKGROUND:
232	next_lower_qos = `0`;
233	break;
234	default:
235	panic("Unrecognized QoS %d", qos);
236	return `0`;
237	}
238
239	int prio_range_max = thread_qos_policy_params.qos_pri[qos];
240	int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : `0`;
241
242	/*
243	* We now have the valid range that the scaled relative priority can map to. Note
244	* that the lower bound is exclusive, but the upper bound is inclusive. If the
245	* range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
246	* fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
247	* remainder.
248	*/
249	int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> `4`);
250
251	return scaled_relprio;
252	}
253
254	/*
255	* flag set by -qos-policy-allow boot-arg to allow
256	* testing thread qos policy from userspace
257	*/
258	boolean_t allow_qos_policy_set = FALSE;
259
260	kern_return_t
261	thread_policy_set(
262	thread_t thread,
263	thread_policy_flavor_t flavor,
264	thread_policy_t policy_info,
265	mach_msg_type_number_t count)
266	{
267	thread_qos_policy_data_t req_qos;
268	kern_return_t kr;
269
270	req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
271
272	if (thread == THREAD_NULL)
273	return (KERN_INVALID_ARGUMENT);
274
275	if (allow_qos_policy_set == FALSE) {
276	if (thread_is_static_param(thread))
277	return (KERN_POLICY_STATIC);
278
279	if (flavor == THREAD_QOS_POLICY)
280	return (KERN_INVALID_ARGUMENT);
281	}
282
283	/ Threads without static_param set reset their QoS when other policies are applied. /
284	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
285	/ Store the existing tier, if we fail this call it is used to reset back. /
286	req_qos.qos_tier = thread->requested_policy.thrp_qos;
287	req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
288
289	kr = thread_remove_qos_policy(thread);
290	if (kr != KERN_SUCCESS) {
291	return kr;
292	}
293	}
294
295	kr = thread_policy_set_internal(thread, flavor, policy_info, count);
296
297	/ Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. /
298	if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
299	if (kr != KERN_SUCCESS) {
300	/ Reset back to our original tier as the set failed. /
301	(void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
302	}
303	}
304
305	return kr;
306	}
307
308	kern_return_t
309	thread_policy_set_internal(
310	thread_t thread,
311	thread_policy_flavor_t flavor,
312	thread_policy_t policy_info,
313	mach_msg_type_number_t count)
314	{
315	kern_return_t result = KERN_SUCCESS;
316	struct task_pend_token pend_token = {};
317
318	thread_mtx_lock(thread);
319	if (!thread->active) {
320	thread_mtx_unlock(thread);
321
322	return (KERN_TERMINATED);
323	}
324
325	switch (flavor) {
326
327	case THREAD_EXTENDED_POLICY:
328	{
329	boolean_t timeshare = TRUE;
330
331	if (count >= THREAD_EXTENDED_POLICY_COUNT) {
332	thread_extended_policy_t info;
333
334	info = (thread_extended_policy_t)policy_info;
335	timeshare = info->timeshare;
336	}
337
338	sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
339
340	spl_t s = splsched();
341	thread_lock(thread);
342
343	thread_set_user_sched_mode_and_recompute_pri(thread, mode);
344
345	thread_unlock(thread);
346	splx(s);
347
348	pend_token.tpt_update_thread_sfi = `1`;
349
350	break;
351	}
352
353	case THREAD_TIME_CONSTRAINT_POLICY:
354	{
355	thread_time_constraint_policy_t info;
356
357	if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
358	result = KERN_INVALID_ARGUMENT;
359	break;
360	}
361
362	info = (thread_time_constraint_policy_t)policy_info;
363	if (info->constraint < info->computation \|\|
364	info->computation > max_rt_quantum \|\|
365	info->computation < min_rt_quantum ) {
366	result = KERN_INVALID_ARGUMENT;
367	break;
368	}
369
370	spl_t s = splsched();
371	thread_lock(thread);
372
373	thread->realtime.period = info->period;
374	thread->realtime.computation = info->computation;
375	thread->realtime.constraint = info->constraint;
376	thread->realtime.preemptible = info->preemptible;
377
378	thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
379
380	thread_unlock(thread);
381	splx(s);
382
383	pend_token.tpt_update_thread_sfi = `1`;
384
385	break;
386	}
387
388	case THREAD_PRECEDENCE_POLICY:
389	{
390	thread_precedence_policy_t info;
391
392	if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
393	result = KERN_INVALID_ARGUMENT;
394	break;
395	}
396	info = (thread_precedence_policy_t)policy_info;
397
398	spl_t s = splsched();
399	thread_lock(thread);
400
401	thread->importance = info->importance;
402
403	thread_recompute_priority(thread);
404
405	thread_unlock(thread);
406	splx(s);
407
408	break;
409	}
410
411	case THREAD_AFFINITY_POLICY:
412	{
413	thread_affinity_policy_t info;
414
415	if (!thread_affinity_is_supported()) {
416	result = KERN_NOT_SUPPORTED;
417	break;
418	}
419	if (count < THREAD_AFFINITY_POLICY_COUNT) {
420	result = KERN_INVALID_ARGUMENT;
421	break;
422	}
423
424	info = (thread_affinity_policy_t) policy_info;
425	/*
426	* Unlock the thread mutex here and
427	* return directly after calling thread_affinity_set().
428	* This is necessary for correct lock ordering because
429	* thread_affinity_set() takes the task lock.
430	*/
431	thread_mtx_unlock(thread);
432	return thread_affinity_set(thread, info->affinity_tag);
433	}
434
435	#if CONFIG_EMBEDDED
436	case THREAD_BACKGROUND_POLICY:
437	{
438	thread_background_policy_t info;
439
440	if (count < THREAD_BACKGROUND_POLICY_COUNT) {
441	result = KERN_INVALID_ARGUMENT;
442	break;
443	}
444
445	if (thread->task != current_task()) {
446	result = KERN_PROTECTION_FAILURE;
447	break;
448	}
449
450	info = (thread_background_policy_t) policy_info;
451
452	int enable;
453
454	if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG)
455	enable = TASK_POLICY_ENABLE;
456	else
457	enable = TASK_POLICY_DISABLE;
458
459	int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
460
461	proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, `0`, &pend_token);
462
463	break;
464	}
465	#endif /* CONFIG_EMBEDDED */
466
467	case THREAD_THROUGHPUT_QOS_POLICY:
468	{
469	thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
470	thread_throughput_qos_t tqos;
471
472	if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
473	result = KERN_INVALID_ARGUMENT;
474	break;
475	}
476
477	if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS)
478	break;
479
480	tqos = qos_extract(info->thread_throughput_qos_tier);
481
482	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
483	TASK_POLICY_THROUGH_QOS, tqos, `0`, &pend_token);
484
485	break;
486	}
487
488	case THREAD_LATENCY_QOS_POLICY:
489	{
490	thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
491	thread_latency_qos_t lqos;
492
493	if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
494	result = KERN_INVALID_ARGUMENT;
495	break;
496	}
497
498	if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS)
499	break;
500
501	lqos = qos_extract(info->thread_latency_qos_tier);
502
503	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
504	TASK_POLICY_LATENCY_QOS, lqos, `0`, &pend_token);
505
506	break;
507	}
508
509	case THREAD_QOS_POLICY:
510	{
511	thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
512
513	if (count < THREAD_QOS_POLICY_COUNT) {
514	result = KERN_INVALID_ARGUMENT;
515	break;
516	}
517
518	if (info->qos_tier < `0` \|\| info->qos_tier >= THREAD_QOS_LAST) {
519	result = KERN_INVALID_ARGUMENT;
520	break;
521	}
522
523	if (info->tier_importance > `0` \|\| info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
524	result = KERN_INVALID_ARGUMENT;
525	break;
526	}
527
528	if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != `0`) {
529	result = KERN_INVALID_ARGUMENT;
530	break;
531	}
532
533	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
534	info->qos_tier, -info->tier_importance, &pend_token);
535
536	break;
537	}
538
539	default:
540	result = KERN_INVALID_ARGUMENT;
541	break;
542	}
543
544	thread_mtx_unlock(thread);
545
546	thread_policy_update_complete_unlocked(thread, &pend_token);
547
548	return (result);
549	}
550
551	/*
552	* Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
553	* Both result in FIXED mode scheduling.
554	*/
555	static sched_mode_t
556	convert_policy_to_sched_mode(integer_t policy) {
557	switch (policy) {
558	case POLICY_TIMESHARE:
559	return TH_MODE_TIMESHARE;
560	case POLICY_RR:
561	case POLICY_FIFO:
562	return TH_MODE_FIXED;
563	default:
564	panic("unexpected sched policy: %d", policy);
565	return TH_MODE_NONE;
566	}
567	}
568
569	/*
570	* Called either with the thread mutex locked
571	* or from the pthread kext in a 'safe place'.
572	*/
573	static kern_return_t
574	thread_set_mode_and_absolute_pri_internal(thread_t thread,
575	sched_mode_t mode,
576	integer_t priority,
577	task_pend_token_t pend_token)
578	{
579	kern_return_t kr = KERN_SUCCESS;
580
581	spl_t s = splsched();
582	thread_lock(thread);
583
584	/ This path isn't allowed to change a thread out of realtime. /
585	if ((thread->sched_mode == TH_MODE_REALTIME) \|\|
586	(thread->saved_mode == TH_MODE_REALTIME)) {
587	kr = KERN_FAILURE;
588	goto unlock;
589	}
590
591	if (thread->policy_reset) {
592	kr = KERN_SUCCESS;
593	goto unlock;
594	}
595
596	sched_mode_t old_mode = thread->sched_mode;
597
598	/*
599	* Reverse engineer and apply the correct importance value
600	* from the requested absolute priority value.
601	*
602	* TODO: Store the absolute priority value instead
603	*/
604
605	if (priority >= thread->max_priority)
606	priority = thread->max_priority - thread->task_priority;
607	else if (priority >= MINPRI_KERNEL)
608	priority -= MINPRI_KERNEL;
609	else if (priority >= MINPRI_RESERVED)
610	priority -= MINPRI_RESERVED;
611	else
612	priority -= BASEPRI_DEFAULT;
613
614	priority += thread->task_priority;
615
616	if (priority > thread->max_priority)
617	priority = thread->max_priority;
618	else if (priority < MINPRI)
619	priority = MINPRI;
620
621	thread->importance = priority - thread->task_priority;
622
623	thread_set_user_sched_mode_and_recompute_pri(thread, mode);
624
625	if (mode != old_mode)
626	pend_token->tpt_update_thread_sfi = `1`;
627
628	unlock:
629	thread_unlock(thread);
630	splx(s);
631
632	return kr;
633	}
634
635	uint8_t
636	thread_workq_pri_for_qos(thread_qos_t qos)
637	{
638	assert(qos < THREAD_QOS_LAST);
639	return (uint8_t)thread_qos_policy_params.qos_pri[qos];
640	}
641
642	thread_qos_t
643	thread_workq_qos_for_pri(int priority)
644	{
645	int qos;
646	if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
647	// indicate that workq should map >UI threads to workq's
648	// internal notation for above-UI work.
649	return THREAD_QOS_UNSPECIFIED;
650	}
651	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
652	// map a given priority up to the next nearest qos band.
653	if (thread_qos_policy_params.qos_pri[qos - `1`] < priority) {
654	return qos;
655	}
656	}
657	return THREAD_QOS_MAINTENANCE;
658	}
659
660	/*
661	* private interface for pthread workqueues
662	*
663	* Set scheduling policy & absolute priority for thread
664	* May be called with spinlocks held
665	* Thread mutex lock is not held
666	*/
667	void
668	thread_reset_workq_qos(thread_t thread, uint32_t qos)
669	{
670	struct task_pend_token pend_token = {};
671
672	assert(qos < THREAD_QOS_LAST);
673
674	spl_t s = splsched();
675	thread_lock(thread);
676
677	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
678	TASK_POLICY_QOS_AND_RELPRIO, qos, `0`, &pend_token);
679	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
680	TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, `0`,
681	&pend_token);
682
683	assert(pend_token.tpt_update_sockets == `0`);
684
685	thread_unlock(thread);
686	splx(s);
687
688	thread_policy_update_complete_unlocked(thread, &pend_token);
689	}
690
691	/*
692	* private interface for pthread workqueues
693	*
694	* Set scheduling policy & absolute priority for thread
695	* May be called with spinlocks held
696	* Thread mutex lock is held
697	*/
698	void
699	thread_set_workq_override(thread_t thread, uint32_t qos)
700	{
701	struct task_pend_token pend_token = {};
702
703	assert(qos < THREAD_QOS_LAST);
704
705	spl_t s = splsched();
706	thread_lock(thread);
707
708	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
709	TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, `0`, &pend_token);
710
711	assert(pend_token.tpt_update_sockets == `0`);
712
713	thread_unlock(thread);
714	splx(s);
715
716	thread_policy_update_complete_unlocked(thread, &pend_token);
717	}
718
719	/*
720	* private interface for pthread workqueues
721	*
722	* Set scheduling policy & absolute priority for thread
723	* May be called with spinlocks held
724	* Thread mutex lock is not held
725	*/
726	void
727	thread_set_workq_pri(thread_t thread,
728	thread_qos_t qos,
729	integer_t priority,
730	integer_t policy)
731	{
732	struct task_pend_token pend_token = {};
733	sched_mode_t mode = convert_policy_to_sched_mode(policy);
734
735	assert(qos < THREAD_QOS_LAST);
736	assert(thread->static_param);
737
738	if (!thread->static_param \|\| !thread->active)
739	return;
740
741	spl_t s = splsched();
742	thread_lock(thread);
743
744	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
745	TASK_POLICY_QOS_AND_RELPRIO, qos, `0`, &pend_token);
746	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
747	TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
748	`0`, &pend_token);
749
750	thread_unlock(thread);
751	splx(s);
752
753	/ Concern: this doesn't hold the mutex... /
754
755	__assert_only kern_return_t kr;
756	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
757	&pend_token);
758	assert(kr == KERN_SUCCESS);
759
760	if (pend_token.tpt_update_thread_sfi)
761	sfi_reevaluate(thread);
762	}
763
764	/*
765	* thread_set_mode_and_absolute_pri:
766	*
767	* Set scheduling policy & absolute priority for thread, for deprecated
768	* thread_set_policy and thread_policy interfaces.
769	*
770	* Called with nothing locked.
771	*/
772	kern_return_t
773	thread_set_mode_and_absolute_pri(thread_t thread,
774	integer_t policy,
775	integer_t priority)
776	{
777	kern_return_t kr = KERN_SUCCESS;
778	struct task_pend_token pend_token = {};
779
780	sched_mode_t mode = convert_policy_to_sched_mode(policy);
781
782	thread_mtx_lock(thread);
783
784	if (!thread->active) {
785	kr = KERN_TERMINATED;
786	goto unlock;
787	}
788
789	if (thread_is_static_param(thread)) {
790	kr = KERN_POLICY_STATIC;
791	goto unlock;
792	}
793
794	/ Setting legacy policies on threads kills the current QoS /
795	if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED)
796	thread_remove_qos_policy_locked(thread, &pend_token);
797
798	kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
799
800	unlock:
801	thread_mtx_unlock(thread);
802
803	thread_policy_update_complete_unlocked(thread, &pend_token);
804
805	return (kr);
806	}
807
808	/*
809	* Set the thread's requested mode and recompute priority
810	* Called with thread mutex and thread locked
811	*
812	* TODO: Mitigate potential problems caused by moving thread to end of runq
813	* whenever its priority is recomputed
814	* Only remove when it actually changes? Attempt to re-insert at appropriate location?
815	*/
816	static void
817	thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
818	{
819	if (thread->policy_reset)
820	return;
821
822	boolean_t removed = thread_run_queue_remove(thread);
823
824	/*
825	* TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
826	* That way there's zero confusion over which the user wants
827	* and which the kernel wants.
828	*/
829	if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK)
830	thread->saved_mode = mode;
831	else
832	sched_set_thread_mode(thread, mode);
833
834	thread_recompute_priority(thread);
835
836	if (removed)
837	thread_run_queue_reinsert(thread, SCHED_TAILQ);
838	}
839
840	/ called at splsched with thread lock locked /
841	static void
842	thread_update_qos_cpu_time_locked(thread_t thread)
843	{
844	task_t task = thread->task;
845	uint64_t timer_sum, timer_delta;
846
847	/*
848	* This is only as accurate as the distance between
849	* last context switch (embedded) or last user/kernel boundary transition (desktop)
850	* because user_timer and system_timer are only updated then.
851	*
852	* TODO: Consider running a timer_update operation here to update it first.
853	* Maybe doable with interrupts disabled from current thread.
854	* If the thread is on a different core, may not be easy to get right.
855	*
856	* TODO: There should be a function for this in timer.c
857	*/
858
859	timer_sum = timer_grab(&thread->user_timer);
860	timer_sum += timer_grab(&thread->system_timer);
861	timer_delta = timer_sum - thread->vtimer_qos_save;
862
863	thread->vtimer_qos_save = timer_sum;
864
865	uint64_t* task_counter = NULL;
866
867	/ Update the task-level effective and requested qos stats atomically, because we don't have the task lock. /
868	switch (thread->effective_policy.thep_qos) {
869	case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
870	case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
871	case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
872	case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
873	case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
874	case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
875	case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
876	default:
877	panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
878	}
879
880	OSAddAtomic64(timer_delta, task_counter);
881
882	/ Update the task-level qos stats atomically, because we don't have the task lock. /
883	switch (thread->requested_policy.thrp_qos) {
884	case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
885	case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
886	case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
887	case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
888	case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
889	case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
890	case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
891	default:
892	panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
893	}
894
895	OSAddAtomic64(timer_delta, task_counter);
896	}
897
898	/*
899	* called with no thread locks held
900	* may hold task lock
901	*/
902	void
903	thread_update_qos_cpu_time(thread_t thread)
904	{
905	thread_mtx_lock(thread);
906
907	spl_t s = splsched();
908	thread_lock(thread);
909
910	thread_update_qos_cpu_time_locked(thread);
911
912	thread_unlock(thread);
913	splx(s);
914
915	thread_mtx_unlock(thread);
916	}
917
918	/*
919	* Calculate base priority from thread attributes, and set it on the thread
920	*
921	* Called with thread_lock and thread mutex held.
922	*/
923	void
924	thread_recompute_priority(
925	thread_t thread)
926	{
927	integer_t priority;
928
929	if (thread->policy_reset)
930	return;
931
932	if (thread->sched_mode == TH_MODE_REALTIME) {
933	sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
934	return;
935	} else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
936	int qos = thread->effective_policy.thep_qos;
937	int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
938	int qos_relprio = -(thread->effective_policy.thep_qos_relprio); / stored in task policy inverted /
939	int qos_scaled_relprio;
940
941	assert(qos >= `0` && qos < THREAD_QOS_LAST);
942	assert(qos_relprio <= `0` && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
943
944	priority = thread_qos_policy_params.qos_pri[qos];
945	qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
946
947	if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == `1`) {
948	/ Bump priority 46 to 47 when in a frontmost app /
949	qos_scaled_relprio += `1`;
950	}
951
952	/ TODO: factor in renice priority here? /
953
954	priority += qos_scaled_relprio;
955	} else {
956	if (thread->importance > MAXPRI)
957	priority = MAXPRI;
958	else if (thread->importance < -MAXPRI)
959	priority = -MAXPRI;
960	else
961	priority = thread->importance;
962
963	priority += thread->task_priority;
964	}
965
966	priority = MAX(priority, thread->user_promotion_basepri);
967
968	/*
969	* Clamp priority back into the allowed range for this task.
970	* The initial priority value could be out of this range due to:
971	* Task clamped to BG or Utility (max-pri is 4, or 20)
972	* Task is user task (max-pri is 63)
973	* Task is kernel task (max-pri is 95)
974	* Note that thread->importance is user-settable to any integer
975	* via THREAD_PRECEDENCE_POLICY.
976	*/
977	if (priority > thread->max_priority)
978	priority = thread->max_priority;
979	else if (priority < MINPRI)
980	priority = MINPRI;
981
982	if (thread->saved_mode == TH_MODE_REALTIME &&
983	thread->sched_flags & TH_SFLAG_FAILSAFE)
984	priority = DEPRESSPRI;
985
986	if (thread->effective_policy.thep_terminated == TRUE) {
987	/*
988	* We temporarily want to override the expected priority to
989	* ensure that the thread exits in a timely manner.
990	* Note that this is allowed to exceed thread->max_priority
991	* so that the thread is no longer clamped to background
992	* during the final exit phase.
993	*/
994	if (priority < thread->task_priority)
995	priority = thread->task_priority;
996	if (priority < BASEPRI_DEFAULT)
997	priority = BASEPRI_DEFAULT;
998	}
999
1000	#if CONFIG_EMBEDDED
1001	/ No one can have a base priority less than MAXPRI_THROTTLE /
1002	if (priority < MAXPRI_THROTTLE)
1003	priority = MAXPRI_THROTTLE;
1004	#endif /* CONFIG_EMBEDDED */
1005
1006	sched_set_thread_base_priority(thread, priority);
1007	}
1008
1009	/ Called with the task lock held, but not the thread mutex or spinlock /
1010	void
1011	thread_policy_update_tasklocked(
1012	thread_t thread,
1013	integer_t priority,
1014	integer_t max_priority,
1015	task_pend_token_t pend_token)
1016	{
1017	thread_mtx_lock(thread);
1018
1019	if (!thread->active \|\| thread->policy_reset) {
1020	thread_mtx_unlock(thread);
1021	return;
1022	}
1023
1024	spl_t s = splsched();
1025	thread_lock(thread);
1026
1027	__unused
1028	integer_t old_max_priority = thread->max_priority;
1029
1030	thread->task_priority = priority;
1031	thread->max_priority = max_priority;
1032
1033	#if CONFIG_EMBEDDED
1034	/*
1035	* When backgrounding a thread, iOS has the semantic that
1036	* realtime and fixed priority threads should be demoted
1037	* to timeshare background threads.
1038	*
1039	* On OSX, realtime and fixed priority threads don't lose their mode.
1040	*
1041	* TODO: Do this inside the thread policy update routine in order to avoid double
1042	* remove/reinsert for a runnable thread
1043	*/
1044	if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1045	sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1046	} else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1047	sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1048	}
1049	#endif /* CONFIG_EMBEDDED */
1050
1051	thread_policy_update_spinlocked(thread, TRUE, pend_token);
1052
1053	thread_unlock(thread);
1054	splx(s);
1055
1056	thread_mtx_unlock(thread);
1057	}
1058
1059	/*
1060	* Reset thread to default state in preparation for termination
1061	* Called with thread mutex locked
1062	*
1063	* Always called on current thread, so we don't need a run queue remove
1064	*/
1065	void
1066	thread_policy_reset(
1067	thread_t thread)
1068	{
1069	spl_t s;
1070
1071	assert(thread == current_thread());
1072
1073	s = splsched();
1074	thread_lock(thread);
1075
1076	if (thread->sched_flags & TH_SFLAG_FAILSAFE)
1077	sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1078
1079	if (thread->sched_flags & TH_SFLAG_THROTTLED)
1080	sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1081
1082	/ At this point, the various demotions should be inactive /
1083	assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1084	assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
1085	assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1086
1087	/ Reset thread back to task-default basepri and mode /
1088	sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task);
1089
1090	sched_set_thread_mode(thread, newmode);
1091
1092	thread->importance = `0`;
1093
1094	/ Prevent further changes to thread base priority or mode /
1095	thread->policy_reset = `1`;
1096
1097	sched_set_thread_base_priority(thread, thread->task_priority);
1098
1099	thread_unlock(thread);
1100	splx(s);
1101	}
1102
1103	kern_return_t
1104	thread_policy_get(
1105	thread_t thread,
1106	thread_policy_flavor_t flavor,
1107	thread_policy_t policy_info,
1108	mach_msg_type_number_t *count,
1109	boolean_t *get_default)
1110	{
1111	kern_return_t result = KERN_SUCCESS;
1112
1113	if (thread == THREAD_NULL)
1114	return (KERN_INVALID_ARGUMENT);
1115
1116	thread_mtx_lock(thread);
1117	if (!thread->active) {
1118	thread_mtx_unlock(thread);
1119
1120	return (KERN_TERMINATED);
1121	}
1122
1123	switch (flavor) {
1124
1125	case THREAD_EXTENDED_POLICY:
1126	{
1127	boolean_t timeshare = TRUE;
1128
1129	if (!(*get_default)) {
1130	spl_t s = splsched();
1131	thread_lock(thread);
1132
1133	if ( (thread->sched_mode != TH_MODE_REALTIME) &&
1134	(thread->saved_mode != TH_MODE_REALTIME) ) {
1135	if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK))
1136	timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != `0`;
1137	else
1138	timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != `0`;
1139	}
1140	else
1141	*get_default = TRUE;
1142
1143	thread_unlock(thread);
1144	splx(s);
1145	}
1146
1147	if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1148	thread_extended_policy_t info;
1149
1150	info = (thread_extended_policy_t)policy_info;
1151	info->timeshare = timeshare;
1152	}
1153
1154	break;
1155	}
1156
1157	case THREAD_TIME_CONSTRAINT_POLICY:
1158	{
1159	thread_time_constraint_policy_t info;
1160
1161	if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
1162	result = KERN_INVALID_ARGUMENT;
1163	break;
1164	}
1165
1166	info = (thread_time_constraint_policy_t)policy_info;
1167
1168	if (!(*get_default)) {
1169	spl_t s = splsched();
1170	thread_lock(thread);
1171
1172	if ( (thread->sched_mode == TH_MODE_REALTIME) \|\|
1173	(thread->saved_mode == TH_MODE_REALTIME) ) {
1174	info->period = thread->realtime.period;
1175	info->computation = thread->realtime.computation;
1176	info->constraint = thread->realtime.constraint;
1177	info->preemptible = thread->realtime.preemptible;
1178	}
1179	else
1180	*get_default = TRUE;
1181
1182	thread_unlock(thread);
1183	splx(s);
1184	}
1185
1186	if (*get_default) {
1187	info->period = `0`;
1188	info->computation = default_timeshare_computation;
1189	info->constraint = default_timeshare_constraint;
1190	info->preemptible = TRUE;
1191	}
1192
1193	break;
1194	}
1195
1196	case THREAD_PRECEDENCE_POLICY:
1197	{
1198	thread_precedence_policy_t info;
1199
1200	if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1201	result = KERN_INVALID_ARGUMENT;
1202	break;
1203	}
1204
1205	info = (thread_precedence_policy_t)policy_info;
1206
1207	if (!(*get_default)) {
1208	spl_t s = splsched();
1209	thread_lock(thread);
1210
1211	info->importance = thread->importance;
1212
1213	thread_unlock(thread);
1214	splx(s);
1215	}
1216	else
1217	info->importance = `0`;
1218
1219	break;
1220	}
1221
1222	case THREAD_AFFINITY_POLICY:
1223	{
1224	thread_affinity_policy_t info;
1225
1226	if (!thread_affinity_is_supported()) {
1227	result = KERN_NOT_SUPPORTED;
1228	break;
1229	}
1230	if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1231	result = KERN_INVALID_ARGUMENT;
1232	break;
1233	}
1234
1235	info = (thread_affinity_policy_t)policy_info;
1236
1237	if (!(*get_default))
1238	info->affinity_tag = thread_affinity_get(thread);
1239	else
1240	info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1241
1242	break;
1243	}
1244
1245	case THREAD_POLICY_STATE:
1246	{
1247	thread_policy_state_t info;
1248
1249	if (*count < THREAD_POLICY_STATE_COUNT) {
1250	result = KERN_INVALID_ARGUMENT;
1251	break;
1252	}
1253
1254	/ Only root can get this info /
1255	if (current_task()->sec_token.val[`0`] != `0`) {
1256	result = KERN_PROTECTION_FAILURE;
1257	break;
1258	}
1259
1260	info = (thread_policy_state_t)(void*)policy_info;
1261
1262	if (!(*get_default)) {
1263	info->flags = `0`;
1264
1265	spl_t s = splsched();
1266	thread_lock(thread);
1267
1268	info->flags \|= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : `0`);
1269
1270	info->thps_requested_policy = (uint64_t)(void*)(&thread->requested_policy);
1271	info->thps_effective_policy = (uint64_t)(void*)(&thread->effective_policy);
1272
1273	info->thps_user_promotions = `0`;
1274	info->thps_user_promotion_basepri = thread->user_promotion_basepri;
1275	info->thps_ipc_overrides = thread->ipc_overrides;
1276
1277	proc_get_thread_policy_bitfield(thread, info);
1278
1279	thread_unlock(thread);
1280	splx(s);
1281	} else {
1282	info->requested = `0`;
1283	info->effective = `0`;
1284	info->pending = `0`;
1285	}
1286
1287	break;
1288	}
1289
1290	case THREAD_LATENCY_QOS_POLICY:
1291	{
1292	thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1293	thread_latency_qos_t plqos;
1294
1295	if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1296	result = KERN_INVALID_ARGUMENT;
1297	break;
1298	}
1299
1300	if (*get_default) {
1301	plqos = `0`;
1302	} else {
1303	plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1304	}
1305
1306	info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1307	}
1308	break;
1309
1310	case THREAD_THROUGHPUT_QOS_POLICY:
1311	{
1312	thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1313	thread_throughput_qos_t ptqos;
1314
1315	if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1316	result = KERN_INVALID_ARGUMENT;
1317	break;
1318	}
1319
1320	if (*get_default) {
1321	ptqos = `0`;
1322	} else {
1323	ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1324	}
1325
1326	info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1327	}
1328	break;
1329
1330	case THREAD_QOS_POLICY:
1331	{
1332	thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1333
1334	if (*count < THREAD_QOS_POLICY_COUNT) {
1335	result = KERN_INVALID_ARGUMENT;
1336	break;
1337	}
1338
1339	if (!(*get_default)) {
1340	int relprio_value = `0`;
1341	info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1342	TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1343
1344	info->tier_importance = -relprio_value;
1345	} else {
1346	info->qos_tier = THREAD_QOS_UNSPECIFIED;
1347	info->tier_importance = `0`;
1348	}
1349
1350	break;
1351	}
1352
1353	default:
1354	result = KERN_INVALID_ARGUMENT;
1355	break;
1356	}
1357
1358	thread_mtx_unlock(thread);
1359
1360	return (result);
1361	}
1362
1363	void
1364	thread_policy_create(thread_t thread)
1365	{
1366	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1367	(IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE \| TASK_POLICY_THREAD))) \| DBG_FUNC_START,
1368	thread_tid(thread), theffective_0(thread),
1369	theffective_1(thread), thread->base_pri, `0`);
1370
1371	/ We pass a pend token but ignore it /
1372	struct task_pend_token pend_token = {};
1373
1374	thread_policy_update_internal_spinlocked(thread, TRUE, &pend_token);
1375
1376	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1377	(IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE \| TASK_POLICY_THREAD))) \| DBG_FUNC_END,
1378	thread_tid(thread), theffective_0(thread),
1379	theffective_1(thread), thread->base_pri, `0`);
1380	}
1381
1382	static void
1383	thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token)
1384	{
1385	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1386	(IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) \| DBG_FUNC_START),
1387	thread_tid(thread), theffective_0(thread),
1388	theffective_1(thread), thread->base_pri, `0`);
1389
1390	thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1391
1392	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1393	(IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) \| DBG_FUNC_END,
1394	thread_tid(thread), theffective_0(thread),
1395	theffective_1(thread), thread->base_pri, `0`);
1396	}
1397
1398
1399
1400	/*
1401	* One thread state update function TO RULE THEM ALL
1402	*
1403	* This function updates the thread effective policy fields
1404	* and pushes the results to the relevant subsystems.
1405	*
1406	* Returns TRUE if a pended action needs to be run.
1407	*
1408	* Called with thread spinlock locked, task may be locked, thread mutex may be locked
1409	*/
1410	static void
1411	thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority,
1412	task_pend_token_t pend_token)
1413	{
1414	/*
1415	* Step 1:
1416	* Gather requested policy and effective task state
1417	*/
1418
1419	struct thread_requested_policy requested = thread->requested_policy;
1420	struct task_effective_policy task_effective = thread->task->effective_policy;
1421
1422	/*
1423	* Step 2:
1424	* Calculate new effective policies from requested policy, task and thread state
1425	* Rules:
1426	* Don't change requested, it won't take effect
1427	*/
1428
1429	struct thread_effective_policy next = {};
1430
1431	next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1432
1433	uint32_t next_qos = requested.thrp_qos;
1434
1435	if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1436	next_qos = MAX(requested.thrp_qos_override, next_qos);
1437	next_qos = MAX(requested.thrp_qos_promote, next_qos);
1438	next_qos = MAX(requested.thrp_qos_ipc_override, next_qos);
1439	next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1440	}
1441
1442	next.thep_qos = next_qos;
1443
1444	/ A task clamp will result in an effective QoS even when requested is UNSPECIFIED /
1445	if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1446	if (next.thep_qos != THREAD_QOS_UNSPECIFIED)
1447	next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1448	else
1449	next.thep_qos = task_effective.tep_qos_clamp;
1450	}
1451
1452	/*
1453	* Extract outbound-promotion QoS before applying task ceiling or BG clamp
1454	* This allows QoS promotions to work properly even after the process is unclamped.
1455	*/
1456	next.thep_qos_promote = next.thep_qos;
1457
1458	/ The ceiling only applies to threads that are in the QoS world /
1459	if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1460	next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1461	next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1462	}
1463
1464	/ Apply the sync ipc qos override /
1465	assert(requested.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
1466
1467	/*
1468	* The QoS relative priority is only applicable when the original programmer's
1469	* intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1470	* USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1471	* since otherwise it would be lower than unclamped threads. Similarly, in the
1472	* presence of boosting, the programmer doesn't know what other actors
1473	* are boosting the thread.
1474	*/
1475	if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1476	(requested.thrp_qos == next.thep_qos) &&
1477	(requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1478	next.thep_qos_relprio = requested.thrp_qos_relprio;
1479	} else {
1480	next.thep_qos_relprio = `0`;
1481	}
1482
1483	/ Calculate DARWIN_BG /
1484	boolean_t wants_darwinbg = FALSE;
1485	boolean_t wants_all_sockets_bg = FALSE; / Do I want my existing sockets to be bg /
1486
1487	/*
1488	* If DARWIN_BG has been requested at either level, it's engaged.
1489	* darwinbg threads always create bg sockets,
1490	* but only some types of darwinbg change the sockets
1491	* after they're created
1492	*/
1493	if (requested.thrp_int_darwinbg \|\| requested.thrp_ext_darwinbg)
1494	wants_all_sockets_bg = wants_darwinbg = TRUE;
1495
1496	if (requested.thrp_pidbind_bg)
1497	wants_all_sockets_bg = wants_darwinbg = TRUE;
1498
1499	if (task_effective.tep_darwinbg)
1500	wants_darwinbg = TRUE;
1501
1502	if (next.thep_qos == THREAD_QOS_BACKGROUND \|\|
1503	next.thep_qos == THREAD_QOS_MAINTENANCE)
1504	wants_darwinbg = TRUE;
1505
1506	/ Calculate side effects of DARWIN_BG /
1507
1508	if (wants_darwinbg)
1509	next.thep_darwinbg = `1`;
1510
1511	if (next.thep_darwinbg \|\| task_effective.tep_new_sockets_bg)
1512	next.thep_new_sockets_bg = `1`;
1513
1514	/ Don't use task_effective.tep_all_sockets_bg here /
1515	if (wants_all_sockets_bg)
1516	next.thep_all_sockets_bg = `1`;
1517
1518	/ darwinbg implies background QOS (or lower) /
1519	if (next.thep_darwinbg &&
1520	(next.thep_qos > THREAD_QOS_BACKGROUND \|\| next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1521	next.thep_qos = THREAD_QOS_BACKGROUND;
1522	next.thep_qos_relprio = `0`;
1523	}
1524
1525	/ Calculate IO policy /
1526
1527	int iopol = THROTTLE_LEVEL_TIER0;
1528
1529	/ Factor in the task's IO policy /
1530	if (next.thep_darwinbg)
1531	iopol = MAX(iopol, task_effective.tep_bg_iotier);
1532
1533	iopol = MAX(iopol, task_effective.tep_io_tier);
1534
1535	/ Look up the associated IO tier value for the QoS class /
1536	iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1537
1538	iopol = MAX(iopol, requested.thrp_int_iotier);
1539	iopol = MAX(iopol, requested.thrp_ext_iotier);
1540
1541	next.thep_io_tier = iopol;
1542
1543	/*
1544	* If a QoS override is causing IO to go into a lower tier, we also set
1545	* the passive bit so that a thread doesn't end up stuck in its own throttle
1546	* window when the override goes away.
1547	*/
1548	boolean_t qos_io_override_active = FALSE;
1549	if (thread_qos_policy_params.qos_iotier[next.thep_qos] <
1550	thread_qos_policy_params.qos_iotier[requested.thrp_qos])
1551	qos_io_override_active = TRUE;
1552
1553	/ Calculate Passive IO policy /
1554	if (requested.thrp_ext_iopassive \|\|
1555	requested.thrp_int_iopassive \|\|
1556	qos_io_override_active \|\|
1557	task_effective.tep_io_passive )
1558	next.thep_io_passive = `1`;
1559
1560	/ Calculate timer QOS /
1561	uint32_t latency_qos = requested.thrp_latency_qos;
1562
1563	latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1564	latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1565
1566	next.thep_latency_qos = latency_qos;
1567
1568	/ Calculate throughput QOS /
1569	uint32_t through_qos = requested.thrp_through_qos;
1570
1571	through_qos = MAX(through_qos, task_effective.tep_through_qos);
1572	through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1573
1574	next.thep_through_qos = through_qos;
1575
1576	if (task_effective.tep_terminated \|\| requested.thrp_terminated) {
1577	/ Shoot down the throttles that slow down exit or response to SIGTERM /
1578	next.thep_terminated = `1`;
1579	next.thep_darwinbg = `0`;
1580	next.thep_io_tier = THROTTLE_LEVEL_TIER0;
1581	next.thep_qos = THREAD_QOS_UNSPECIFIED;
1582	next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED;
1583	next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1584	}
1585
1586	/*
1587	* Step 3:
1588	* Swap out old policy for new policy
1589	*/
1590
1591	struct thread_effective_policy prev = thread->effective_policy;
1592
1593	thread_update_qos_cpu_time_locked(thread);
1594
1595	/ This is the point where the new values become visible to other threads /
1596	thread->effective_policy = next;
1597
1598	/*
1599	* Step 4:
1600	* Pend updates that can't be done while holding the thread lock
1601	*/
1602
1603	if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg)
1604	pend_token->tpt_update_sockets = `1`;
1605
1606	/ TODO: Doesn't this only need to be done if the throttle went up? /
1607	if (prev.thep_io_tier != next.thep_io_tier)
1608	pend_token->tpt_update_throttle = `1`;
1609
1610	/*
1611	* Check for the attributes that sfi_thread_classify() consults,
1612	* and trigger SFI re-evaluation.
1613	*/
1614	if (prev.thep_qos != next.thep_qos \|\|
1615	prev.thep_darwinbg != next.thep_darwinbg )
1616	pend_token->tpt_update_thread_sfi = `1`;
1617
1618	/*
1619	* Step 5:
1620	* Update other subsystems as necessary if something has changed
1621	*/
1622
1623	/ Check for the attributes that thread_recompute_priority() consults /
1624	if (prev.thep_qos != next.thep_qos \|\|
1625	prev.thep_qos_relprio != next.thep_qos_relprio \|\|
1626	prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent \|\|
1627	prev.thep_terminated != next.thep_terminated \|\|
1628	pend_token->tpt_force_recompute_pri == `1` \|\|
1629	recompute_priority) {
1630	thread_recompute_priority(thread);
1631	}
1632	}
1633
1634
1635	/*
1636	* Initiate a thread policy state transition on a thread with its TID
1637	* Useful if you cannot guarantee the thread won't get terminated
1638	* Precondition: No locks are held
1639	* Will take task lock - using the non-tid variant is faster
1640	* if you already have a thread ref.
1641	*/
1642	void
1643	proc_set_thread_policy_with_tid(task_t task,
1644	uint64_t tid,
1645	int category,
1646	int flavor,
1647	int value)
1648	{
1649	/ takes task lock, returns ref'ed thread or NULL /
1650	thread_t thread = task_findtid(task, tid);
1651
1652	if (thread == THREAD_NULL)
1653	return;
1654
1655	proc_set_thread_policy(thread, category, flavor, value);
1656
1657	thread_deallocate(thread);
1658	}
1659
1660	/*
1661	* Initiate a thread policy transition on a thread
1662	* This path supports networking transitions (i.e. darwinbg transitions)
1663	* Precondition: No locks are held
1664	*/
1665	void
1666	proc_set_thread_policy(thread_t thread,
1667	int category,
1668	int flavor,
1669	int value)
1670	{
1671	struct task_pend_token pend_token = {};
1672
1673	thread_mtx_lock(thread);
1674
1675	proc_set_thread_policy_locked(thread, category, flavor, value, `0`, &pend_token);
1676
1677	thread_mtx_unlock(thread);
1678
1679	thread_policy_update_complete_unlocked(thread, &pend_token);
1680	}
1681
1682	/*
1683	* Do the things that can't be done while holding a thread mutex.
1684	* These are set up to call back into thread policy to get the latest value,
1685	* so they don't have to be synchronized with the update.
1686	* The only required semantic is 'call this sometime after updating effective policy'
1687	*
1688	* Precondition: Thread mutex is not held
1689	*
1690	* This may be called with the task lock held, but in that case it won't be
1691	* called with tpt_update_sockets set.
1692	*/
1693	void
1694	thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1695	{
1696	#ifdef MACH_BSD
1697	if (pend_token->tpt_update_sockets)
1698	proc_apply_task_networkbg(thread->task->bsd_info, thread);
1699	#endif /* MACH_BSD */
1700
1701	if (pend_token->tpt_update_throttle)
1702	rethrottle_thread(thread->uthread);
1703
1704	if (pend_token->tpt_update_thread_sfi)
1705	sfi_reevaluate(thread);
1706	}
1707
1708	/*
1709	* Set and update thread policy
1710	* Thread mutex might be held
1711	*/
1712	static void
1713	proc_set_thread_policy_locked(thread_t thread,
1714	int category,
1715	int flavor,
1716	int value,
1717	int value2,
1718	task_pend_token_t pend_token)
1719	{
1720	spl_t s = splsched();
1721	thread_lock(thread);
1722
1723	proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1724
1725	thread_unlock(thread);
1726	splx(s);
1727	}
1728
1729	/*
1730	* Set and update thread policy
1731	* Thread spinlock is held
1732	*/
1733	static void
1734	proc_set_thread_policy_spinlocked(thread_t thread,
1735	int category,
1736	int flavor,
1737	int value,
1738	int value2,
1739	task_pend_token_t pend_token)
1740	{
1741	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1742	(IMPORTANCE_CODE(flavor, (category \| TASK_POLICY_THREAD))) \| DBG_FUNC_START,
1743	thread_tid(thread), threquested_0(thread),
1744	threquested_1(thread), value, `0`);
1745
1746	thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2);
1747
1748	thread_policy_update_spinlocked(thread, FALSE, pend_token);
1749
1750	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1751	(IMPORTANCE_CODE(flavor, (category \| TASK_POLICY_THREAD))) \| DBG_FUNC_END,
1752	thread_tid(thread), threquested_0(thread),
1753	threquested_1(thread), tpending(pend_token), `0`);
1754	}
1755
1756	/*
1757	* Set the requested state for a specific flavor to a specific value.
1758	*/
1759	static void
1760	thread_set_requested_policy_spinlocked(thread_t thread,
1761	int category,
1762	int flavor,
1763	int value,
1764	int value2)
1765	{
1766	int tier, passive;
1767
1768	struct thread_requested_policy requested = thread->requested_policy;
1769
1770	switch (flavor) {
1771
1772	/ Category: EXTERNAL and INTERNAL, thread and task /
1773
1774	case TASK_POLICY_DARWIN_BG:
1775	if (category == TASK_POLICY_EXTERNAL)
1776	requested.thrp_ext_darwinbg = value;
1777	else
1778	requested.thrp_int_darwinbg = value;
1779	break;
1780
1781	case TASK_POLICY_IOPOL:
1782	proc_iopol_to_tier(value, &tier, &passive);
1783	if (category == TASK_POLICY_EXTERNAL) {
1784	requested.thrp_ext_iotier = tier;
1785	requested.thrp_ext_iopassive = passive;
1786	} else {
1787	requested.thrp_int_iotier = tier;
1788	requested.thrp_int_iopassive = passive;
1789	}
1790	break;
1791
1792	case TASK_POLICY_IO:
1793	if (category == TASK_POLICY_EXTERNAL)
1794	requested.thrp_ext_iotier = value;
1795	else
1796	requested.thrp_int_iotier = value;
1797	break;
1798
1799	case TASK_POLICY_PASSIVE_IO:
1800	if (category == TASK_POLICY_EXTERNAL)
1801	requested.thrp_ext_iopassive = value;
1802	else
1803	requested.thrp_int_iopassive = value;
1804	break;
1805
1806	/ Category: ATTRIBUTE, thread only /
1807
1808	case TASK_POLICY_PIDBIND_BG:
1809	assert(category == TASK_POLICY_ATTRIBUTE);
1810	requested.thrp_pidbind_bg = value;
1811	break;
1812
1813	case TASK_POLICY_LATENCY_QOS:
1814	assert(category == TASK_POLICY_ATTRIBUTE);
1815	requested.thrp_latency_qos = value;
1816	break;
1817
1818	case TASK_POLICY_THROUGH_QOS:
1819	assert(category == TASK_POLICY_ATTRIBUTE);
1820	requested.thrp_through_qos = value;
1821	break;
1822
1823	case TASK_POLICY_QOS:
1824	assert(category == TASK_POLICY_ATTRIBUTE);
1825	requested.thrp_qos = value;
1826	break;
1827
1828	case TASK_POLICY_QOS_OVERRIDE:
1829	assert(category == TASK_POLICY_ATTRIBUTE);
1830	requested.thrp_qos_override = value;
1831	break;
1832
1833	case TASK_POLICY_QOS_AND_RELPRIO:
1834	assert(category == TASK_POLICY_ATTRIBUTE);
1835	requested.thrp_qos = value;
1836	requested.thrp_qos_relprio = value2;
1837	DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1838	break;
1839
1840	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
1841	assert(category == TASK_POLICY_ATTRIBUTE);
1842	requested.thrp_qos_workq_override = value;
1843	break;
1844
1845	case TASK_POLICY_QOS_PROMOTE:
1846	assert(category == TASK_POLICY_ATTRIBUTE);
1847	requested.thrp_qos_promote = value;
1848	break;
1849
1850	case TASK_POLICY_QOS_IPC_OVERRIDE:
1851	assert(category == TASK_POLICY_ATTRIBUTE);
1852	requested.thrp_qos_ipc_override = value;
1853	break;
1854
1855	case TASK_POLICY_TERMINATED:
1856	assert(category == TASK_POLICY_ATTRIBUTE);
1857	requested.thrp_terminated = value;
1858	break;
1859
1860	default:
1861	panic("unknown task policy: %d %d %d", category, flavor, value);
1862	break;
1863	}
1864
1865	thread->requested_policy = requested;
1866	}
1867
1868	/*
1869	* Gets what you set. Effective values may be different.
1870	* Precondition: No locks are held
1871	*/
1872	int
1873	proc_get_thread_policy(thread_t thread,
1874	int category,
1875	int flavor)
1876	{
1877	int value = `0`;
1878	thread_mtx_lock(thread);
1879	value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
1880	thread_mtx_unlock(thread);
1881	return value;
1882	}
1883
1884	static int
1885	proc_get_thread_policy_locked(thread_t thread,
1886	int category,
1887	int flavor,
1888	int* value2)
1889	{
1890	int value = `0`;
1891
1892	spl_t s = splsched();
1893	thread_lock(thread);
1894
1895	value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
1896
1897	thread_unlock(thread);
1898	splx(s);
1899
1900	return value;
1901	}
1902
1903	/*
1904	* Gets what you set. Effective values may be different.
1905	*/
1906	static int
1907	thread_get_requested_policy_spinlocked(thread_t thread,
1908	int category,
1909	int flavor,
1910	int* value2)
1911	{
1912	int value = `0`;
1913
1914	struct thread_requested_policy requested = thread->requested_policy;
1915
1916	switch (flavor) {
1917	case TASK_POLICY_DARWIN_BG:
1918	if (category == TASK_POLICY_EXTERNAL)
1919	value = requested.thrp_ext_darwinbg;
1920	else
1921	value = requested.thrp_int_darwinbg;
1922	break;
1923	case TASK_POLICY_IOPOL:
1924	if (category == TASK_POLICY_EXTERNAL)
1925	value = proc_tier_to_iopol(requested.thrp_ext_iotier,
1926	requested.thrp_ext_iopassive);
1927	else
1928	value = proc_tier_to_iopol(requested.thrp_int_iotier,
1929	requested.thrp_int_iopassive);
1930	break;
1931	case TASK_POLICY_IO:
1932	if (category == TASK_POLICY_EXTERNAL)
1933	value = requested.thrp_ext_iotier;
1934	else
1935	value = requested.thrp_int_iotier;
1936	break;
1937	case TASK_POLICY_PASSIVE_IO:
1938	if (category == TASK_POLICY_EXTERNAL)
1939	value = requested.thrp_ext_iopassive;
1940	else
1941	value = requested.thrp_int_iopassive;
1942	break;
1943	case TASK_POLICY_QOS:
1944	assert(category == TASK_POLICY_ATTRIBUTE);
1945	value = requested.thrp_qos;
1946	break;
1947	case TASK_POLICY_QOS_OVERRIDE:
1948	assert(category == TASK_POLICY_ATTRIBUTE);
1949	value = requested.thrp_qos_override;
1950	break;
1951	case TASK_POLICY_LATENCY_QOS:
1952	assert(category == TASK_POLICY_ATTRIBUTE);
1953	value = requested.thrp_latency_qos;
1954	break;
1955	case TASK_POLICY_THROUGH_QOS:
1956	assert(category == TASK_POLICY_ATTRIBUTE);
1957	value = requested.thrp_through_qos;
1958	break;
1959	case TASK_POLICY_QOS_WORKQ_OVERRIDE:
1960	assert(category == TASK_POLICY_ATTRIBUTE);
1961	value = requested.thrp_qos_workq_override;
1962	break;
1963	case TASK_POLICY_QOS_AND_RELPRIO:
1964	assert(category == TASK_POLICY_ATTRIBUTE);
1965	assert(value2 != NULL);
1966	value = requested.thrp_qos;
1967	*value2 = requested.thrp_qos_relprio;
1968	break;
1969	case TASK_POLICY_QOS_PROMOTE:
1970	assert(category == TASK_POLICY_ATTRIBUTE);
1971	value = requested.thrp_qos_promote;
1972	break;
1973	case TASK_POLICY_QOS_IPC_OVERRIDE:
1974	assert(category == TASK_POLICY_ATTRIBUTE);
1975	value = requested.thrp_qos_ipc_override;
1976	break;
1977	case TASK_POLICY_TERMINATED:
1978	assert(category == TASK_POLICY_ATTRIBUTE);
1979	value = requested.thrp_terminated;
1980	break;
1981
1982	default:
1983	panic("unknown policy_flavor %d", flavor);
1984	break;
1985	}
1986
1987	return value;
1988	}
1989
1990	/*
1991	* Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
1992	*
1993	* NOTE: This accessor does not take the task or thread lock.
1994	* Notifications of state updates need to be externally synchronized with state queries.
1995	* This routine MUST remain interrupt safe, as it is potentially invoked
1996	* within the context of a timer interrupt.
1997	*
1998	* TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
1999	* Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2000	* I don't think that cost is worth not having the right answer.
2001	*/
2002	int
2003	proc_get_effective_thread_policy(thread_t thread,
2004	int flavor)
2005	{
2006	int value = `0`;
2007
2008	switch (flavor) {
2009	case TASK_POLICY_DARWIN_BG:
2010	/*
2011	* This call is used within the timer layer, as well as
2012	* prioritizing requests to the graphics system.
2013	* It also informs SFI and originator-bg-state.
2014	* Returns 1 for background mode, 0 for normal mode
2015	*/
2016
2017	value = thread->effective_policy.thep_darwinbg ? `1` : `0`;
2018	break;
2019	case TASK_POLICY_IO:
2020	/*
2021	* The I/O system calls here to find out what throttling tier to apply to an operation.
2022	* Returns THROTTLE_LEVEL_* values
2023	*/
2024	value = thread->effective_policy.thep_io_tier;
2025	if (thread->iotier_override != THROTTLE_LEVEL_NONE)
2026	value = MIN(value, thread->iotier_override);
2027	break;
2028	case TASK_POLICY_PASSIVE_IO:
2029	/*
2030	* The I/O system calls here to find out whether an operation should be passive.
2031	* (i.e. not cause operations with lower throttle tiers to be throttled)
2032	* Returns 1 for passive mode, 0 for normal mode
2033	*
2034	* If an override is causing IO to go into a lower tier, we also set
2035	* the passive bit so that a thread doesn't end up stuck in its own throttle
2036	* window when the override goes away.
2037	*/
2038	value = thread->effective_policy.thep_io_passive ? `1` : `0`;
2039	if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2040	thread->iotier_override < thread->effective_policy.thep_io_tier)
2041	value = `1`;
2042	break;
2043	case TASK_POLICY_ALL_SOCKETS_BG:
2044	/*
2045	* do_background_socket() calls this to determine whether
2046	* it should change the thread's sockets
2047	* Returns 1 for background mode, 0 for normal mode
2048	* This consults both thread and task so un-DBGing a thread while the task is BG
2049	* doesn't get you out of the network throttle.
2050	*/
2051	value = (thread->effective_policy.thep_all_sockets_bg \|\|
2052	thread->task->effective_policy.tep_all_sockets_bg) ? `1` : `0`;
2053	break;
2054	case TASK_POLICY_NEW_SOCKETS_BG:
2055	/*
2056	* socreate() calls this to determine if it should mark a new socket as background
2057	* Returns 1 for background mode, 0 for normal mode
2058	*/
2059	value = thread->effective_policy.thep_new_sockets_bg ? `1` : `0`;
2060	break;
2061	case TASK_POLICY_LATENCY_QOS:
2062	/*
2063	* timer arming calls into here to find out the timer coalescing level
2064	* Returns a latency QoS tier (0-6)
2065	*/
2066	value = thread->effective_policy.thep_latency_qos;
2067	break;
2068	case TASK_POLICY_THROUGH_QOS:
2069	/*
2070	* This value is passed into the urgency callout from the scheduler
2071	* to the performance management subsystem.
2072	*
2073	* Returns a throughput QoS tier (0-6)
2074	*/
2075	value = thread->effective_policy.thep_through_qos;
2076	break;
2077	case TASK_POLICY_QOS:
2078	/*
2079	* This is communicated to the performance management layer and SFI.
2080	*
2081	* Returns a QoS policy tier
2082	*/
2083	value = thread->effective_policy.thep_qos;
2084	break;
2085	default:
2086	panic("unknown thread policy flavor %d", flavor);
2087	break;
2088	}
2089
2090	return value;
2091	}
2092
2093
2094	/*
2095	* (integer_t) casts limit the number of bits we can fit here
2096	* this interface is deprecated and replaced by the _EXT struct ?
2097	*/
2098	static void
2099	proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2100	{
2101	uint64_t bits = `0`;
2102	struct thread_requested_policy requested = thread->requested_policy;
2103
2104	bits \|= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : `0`);
2105	bits \|= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : `0`);
2106	bits \|= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : `0`);
2107	bits \|= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : `0`);
2108	bits \|= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : `0`);
2109	bits \|= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : `0`);
2110
2111	bits \|= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : `0`);
2112	bits \|= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : `0`);
2113
2114	bits \|= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : `0`);
2115
2116	bits \|= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : `0`);
2117	bits \|= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : `0`);
2118
2119	info->requested = (integer_t) bits;
2120	bits = `0`;
2121
2122	struct thread_effective_policy effective = thread->effective_policy;
2123
2124	bits \|= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : `0`);
2125
2126	bits \|= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : `0`);
2127	bits \|= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : `0`);
2128	bits \|= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : `0`);
2129	bits \|= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : `0`);
2130
2131	bits \|= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : `0`);
2132
2133	bits \|= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : `0`);
2134	bits \|= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : `0`);
2135
2136	info->effective = (integer_t)bits;
2137	bits = `0`;
2138
2139	info->pending = `0`;
2140	}
2141
2142	/*
2143	* Sneakily trace either the task and thread requested
2144	* or just the thread requested, depending on if we have enough room.
2145	* We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2146	*
2147	* LP32 LP64
2148	* threquested_0(thread) thread[0] task[0]
2149	* threquested_1(thread) thread[1] thread[0]
2150	*
2151	*/
2152
2153	uintptr_t
2154	threquested_0(thread_t thread)
2155	{
2156	static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2157
2158	uintptr_t* raw = (uintptr_t)(void**)&thread->requested_policy;
2159
2160	return raw[`0`];
2161	}
2162
2163	uintptr_t
2164	threquested_1(thread_t thread)
2165	{
2166	#if defined __LP64__
2167	return (uintptr_t)&thread->task->requested_policy;
2168	#else
2169	uintptr_t* raw = (uintptr_t)(void**)&thread->requested_policy;
2170	return raw[`1`];
2171	#endif
2172	}
2173
2174	uintptr_t
2175	theffective_0(thread_t thread)
2176	{
2177	static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2178
2179	uintptr_t* raw = (uintptr_t)(void**)&thread->effective_policy;
2180	return raw[`0`];
2181	}
2182
2183	uintptr_t
2184	theffective_1(thread_t thread)
2185	{
2186	#if defined __LP64__
2187	return (uintptr_t)&thread->task->effective_policy;
2188	#else
2189	uintptr_t* raw = (uintptr_t)(void**)&thread->effective_policy;
2190	return raw[`1`];
2191	#endif
2192	}
2193
2194
2195	/*
2196	* Set an override on the thread which is consulted with a
2197	* higher priority than the task/thread policy. This should
2198	* only be set for temporary grants until the thread
2199	* returns to the userspace boundary
2200	*
2201	* We use atomic operations to swap in the override, with
2202	* the assumption that the thread itself can
2203	* read the override and clear it on return to userspace.
2204	*
2205	* No locking is performed, since it is acceptable to see
2206	* a stale override for one loop through throttle_lowpri_io().
2207	* However a thread reference must be held on the thread.
2208	*/
2209
2210	void set_thread_iotier_override(thread_t thread, int policy)
2211	{
2212	int current_override;
2213
2214	/ Let most aggressive I/O policy win until user boundary /
2215	do {
2216	current_override = thread->iotier_override;
2217
2218	if (current_override != THROTTLE_LEVEL_NONE)
2219	policy = MIN(current_override, policy);
2220
2221	if (current_override == policy) {
2222	/ no effective change /
2223	return;
2224	}
2225	} while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2226
2227	/*
2228	* Since the thread may be currently throttled,
2229	* re-evaluate tiers and potentially break out
2230	* of an msleep
2231	*/
2232	rethrottle_thread(thread->uthread);
2233	}
2234
2235	/*
2236	* Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2237	* semaphores, dispatch_sync) may result in priority inversions where a higher priority
2238	* (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2239	* priority thread. In these cases, we attempt to propagate the priority token, as long
2240	* as the subsystem informs us of the relationships between the threads. The userspace
2241	* synchronization subsystem should maintain the information of owner->resource and
2242	* resource->waiters itself.
2243	*/
2244
2245	/*
2246	* This helper canonicalizes the resource/resource_type given the current qos_override_mode
2247	* in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2248	* to be handled specially in the future, but for now it's fine to slam
2249	* *resource to USER_ADDR_NULL even if it was previously a wildcard.
2250	*/
2251	static void canonicalize_resource_and_type(user_addr_t resource, int* *resource_type) {
2252	if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK \|\| qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2253	/ Map all input resource/type to a single one /
2254	*resource = USER_ADDR_NULL;
2255	*resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2256	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2257	/ no transform /
2258	} else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2259	/ Map all mutex overrides to a single one, to avoid memory overhead /
2260	if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2261	*resource = USER_ADDR_NULL;
2262	}
2263	}
2264	}
2265
2266	/ This helper routine finds an existing override if known. Locking should be done by caller /
2267	static struct thread_qos_override *
2268	find_qos_override(thread_t thread,
2269	user_addr_t resource,
2270	int resource_type)
2271	{
2272	struct thread_qos_override *override;
2273
2274	override = thread->overrides;
2275	while (override) {
2276	if (override->override_resource == resource &&
2277	override->override_resource_type == resource_type) {
2278	return override;
2279	}
2280
2281	override = override->override_next;
2282	}
2283
2284	return NULL;
2285	}
2286
2287	static void
2288	find_and_decrement_qos_override(thread_t thread,
2289	user_addr_t resource,
2290	int resource_type,
2291	boolean_t reset,
2292	struct thread_qos_override **free_override_list)
2293	{
2294	struct thread_qos_override override, override_prev;
2295
2296	override_prev = NULL;
2297	override = thread->overrides;
2298	while (override) {
2299	struct thread_qos_override *override_next = override->override_next;
2300
2301	if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource \|\| override->override_resource == resource) &&
2302	(THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type \|\| override->override_resource_type == resource_type)) {
2303
2304	if (reset) {
2305	override->override_contended_resource_count = `0`;
2306	} else {
2307	override->override_contended_resource_count--;
2308	}
2309
2310	if (override->override_contended_resource_count == `0`) {
2311	if (override_prev == NULL) {
2312	thread->overrides = override_next;
2313	} else {
2314	override_prev->override_next = override_next;
2315	}
2316
2317	/ Add to out-param for later zfree /
2318	override->override_next = *free_override_list;
2319	*free_override_list = override;
2320	} else {
2321	override_prev = override;
2322	}
2323
2324	if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2325	return;
2326	}
2327	} else {
2328	override_prev = override;
2329	}
2330
2331	override = override_next;
2332	}
2333	}
2334
2335	/ This helper recalculates the current requested override using the policy selected at boot /
2336	static int
2337	calculate_requested_qos_override(thread_t thread)
2338	{
2339	if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2340	return THREAD_QOS_UNSPECIFIED;
2341	}
2342
2343	/ iterate over all overrides and calculate MAX /
2344	struct thread_qos_override *override;
2345	int qos_override = THREAD_QOS_UNSPECIFIED;
2346
2347	override = thread->overrides;
2348	while (override) {
2349	qos_override = MAX(qos_override, override->override_qos);
2350	override = override->override_next;
2351	}
2352
2353	return qos_override;
2354	}
2355
2356	/*
2357	* Returns:
2358	* - 0 on success
2359	* - EINVAL if some invalid input was passed
2360	*/
2361	static int
2362	proc_thread_qos_add_override_internal(thread_t thread,
2363	int override_qos,
2364	boolean_t first_override_for_resource,
2365	user_addr_t resource,
2366	int resource_type)
2367	{
2368	struct task_pend_token pend_token = {};
2369	int rc = `0`;
2370
2371	thread_mtx_lock(thread);
2372
2373	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) \| DBG_FUNC_START,
2374	thread_tid(thread), override_qos, first_override_for_resource ? `1` : `0`, `0`, `0`);
2375
2376	DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2377	uint64_t, thread->requested_policy.thrp_qos,
2378	uint64_t, thread->effective_policy.thep_qos,
2379	int, override_qos, boolean_t, first_override_for_resource);
2380
2381	struct thread_qos_override *override;
2382	struct thread_qos_override *override_new = NULL;
2383	int new_qos_override, prev_qos_override;
2384	int new_effective_qos;
2385
2386	canonicalize_resource_and_type(&resource, &resource_type);
2387
2388	override = find_qos_override(thread, resource, resource_type);
2389	if (first_override_for_resource && !override) {
2390	/ We need to allocate a new object. Drop the thread lock and*
2391	* recheck afterwards in case someone else added the override
2392	*/
2393	thread_mtx_unlock(thread);
2394	override_new = zalloc(thread_qos_override_zone);
2395	thread_mtx_lock(thread);
2396	override = find_qos_override(thread, resource, resource_type);
2397	}
2398	if (first_override_for_resource && override) {
2399	/ Someone else already allocated while the thread lock was dropped /
2400	override->override_contended_resource_count++;
2401	} else if (!override && override_new) {
2402	override = override_new;
2403	override_new = NULL;
2404	override->override_next = thread->overrides;
2405	/ since first_override_for_resource was TRUE /
2406	override->override_contended_resource_count = `1`;
2407	override->override_resource = resource;
2408	override->override_resource_type = resource_type;
2409	override->override_qos = THREAD_QOS_UNSPECIFIED;
2410	thread->overrides = override;
2411	}
2412
2413	if (override) {
2414	if (override->override_qos == THREAD_QOS_UNSPECIFIED)
2415	override->override_qos = override_qos;
2416	else
2417	override->override_qos = MAX(override->override_qos, override_qos);
2418	}
2419
2420	/ Determine how to combine the various overrides into a single current*
2421	* requested override
2422	*/
2423	new_qos_override = calculate_requested_qos_override(thread);
2424
2425	prev_qos_override = proc_get_thread_policy_locked(thread,
2426	TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2427
2428	if (new_qos_override != prev_qos_override) {
2429	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2430	TASK_POLICY_QOS_OVERRIDE,
2431	new_qos_override, `0`, &pend_token);
2432	}
2433
2434	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2435
2436	thread_mtx_unlock(thread);
2437
2438	thread_policy_update_complete_unlocked(thread, &pend_token);
2439
2440	if (override_new) {
2441	zfree(thread_qos_override_zone, override_new);
2442	}
2443
2444	DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2445	int, new_qos_override, int, new_effective_qos, int, rc);
2446
2447	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) \| DBG_FUNC_END,
2448	new_qos_override, resource, resource_type, `0`, `0`);
2449
2450	return rc;
2451	}
2452
2453	int
2454	proc_thread_qos_add_override(task_t task,
2455	thread_t thread,
2456	uint64_t tid,
2457	int override_qos,
2458	boolean_t first_override_for_resource,
2459	user_addr_t resource,
2460	int resource_type)
2461	{
2462	boolean_t has_thread_reference = FALSE;
2463	int rc = `0`;
2464
2465	if (thread == THREAD_NULL) {
2466	thread = task_findtid(task, tid);
2467	/ returns referenced thread /
2468
2469	if (thread == THREAD_NULL) {
2470	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) \| DBG_FUNC_NONE,
2471	tid, `0`, `0xdead`, `0`, `0`);
2472	return ESRCH;
2473	}
2474	has_thread_reference = TRUE;
2475	} else {
2476	assert(thread->task == task);
2477	}
2478	rc = proc_thread_qos_add_override_internal(thread, override_qos,
2479	first_override_for_resource, resource, resource_type);
2480	if (has_thread_reference) {
2481	thread_deallocate(thread);
2482	}
2483
2484	return rc;
2485	}
2486
2487	static void
2488	proc_thread_qos_remove_override_internal(thread_t thread,
2489	user_addr_t resource,
2490	int resource_type,
2491	boolean_t reset)
2492	{
2493	struct task_pend_token pend_token = {};
2494
2495	struct thread_qos_override *deferred_free_override_list = NULL;
2496	int new_qos_override, prev_qos_override, new_effective_qos;
2497
2498	thread_mtx_lock(thread);
2499
2500	canonicalize_resource_and_type(&resource, &resource_type);
2501
2502	find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2503
2504	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) \| DBG_FUNC_START,
2505	thread_tid(thread), resource, reset, `0`, `0`);
2506
2507	DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2508	uint64_t, thread->requested_policy.thrp_qos,
2509	uint64_t, thread->effective_policy.thep_qos);
2510
2511	/ Determine how to combine the various overrides into a single current requested override /
2512	new_qos_override = calculate_requested_qos_override(thread);
2513
2514	spl_t s = splsched();
2515	thread_lock(thread);
2516
2517	/*
2518	* The override chain and therefore the value of the current override is locked with thread mutex,
2519	* so we can do a get/set without races. However, the rest of thread policy is locked under the spinlock.
2520	* This means you can't change the current override from a spinlock-only setter.
2521	*/
2522	prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2523
2524	if (new_qos_override != prev_qos_override)
2525	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, `0`, &pend_token);
2526
2527	new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2528
2529	thread_unlock(thread);
2530	splx(s);
2531
2532	thread_mtx_unlock(thread);
2533
2534	thread_policy_update_complete_unlocked(thread, &pend_token);
2535
2536	while (deferred_free_override_list) {
2537	struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2538
2539	zfree(thread_qos_override_zone, deferred_free_override_list);
2540	deferred_free_override_list = override_next;
2541	}
2542
2543	DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2544	int, new_qos_override, int, new_effective_qos);
2545
2546	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) \| DBG_FUNC_END,
2547	thread_tid(thread), `0`, `0`, `0`, `0`);
2548	}
2549
2550	int
2551	proc_thread_qos_remove_override(task_t task,
2552	thread_t thread,
2553	uint64_t tid,
2554	user_addr_t resource,
2555	int resource_type)
2556	{
2557	boolean_t has_thread_reference = FALSE;
2558
2559	if (thread == THREAD_NULL) {
2560	thread = task_findtid(task, tid);
2561	/ returns referenced thread /
2562
2563	if (thread == THREAD_NULL) {
2564	KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) \| DBG_FUNC_NONE,
2565	tid, `0`, `0xdead`, `0`, `0`);
2566	return ESRCH;
2567	}
2568	has_thread_reference = TRUE;
2569	} else {
2570	assert(task == thread->task);
2571	}
2572
2573	proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2574
2575	if (has_thread_reference)
2576	thread_deallocate(thread);
2577
2578	return `0`;
2579	}
2580
2581	/ Deallocate before thread termination /
2582	void proc_thread_qos_deallocate(thread_t thread)
2583	{
2584	/ This thread must have no more IPC overrides. /
2585	assert(thread->ipc_overrides == `0`);
2586	assert(thread->requested_policy.thrp_qos_ipc_override == THREAD_QOS_UNSPECIFIED);
2587	assert(thread->sync_ipc_overrides == `0`);
2588	assert(thread->requested_policy.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
2589
2590	/*
2591	* Clear out any lingering override objects.
2592	*/
2593	struct thread_qos_override *override;
2594
2595	thread_mtx_lock(thread);
2596	override = thread->overrides;
2597	thread->overrides = NULL;
2598	thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2599	/ We don't need to re-evaluate thread policy here because the thread has already exited /
2600	thread_mtx_unlock(thread);
2601
2602	while (override) {
2603	struct thread_qos_override *override_next = override->override_next;
2604
2605	zfree(thread_qos_override_zone, override);
2606	override = override_next;
2607	}
2608	}
2609
2610	/*
2611	* Set up the primordial thread's QoS
2612	*/
2613	void
2614	task_set_main_thread_qos(task_t task, thread_t thread) {
2615	struct task_pend_token pend_token = {};
2616
2617	assert(thread->task == task);
2618
2619	thread_mtx_lock(thread);
2620
2621	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2622	(IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, `0`)) \| DBG_FUNC_START,
2623	thread_tid(thread), threquested_0(thread), threquested_1(thread),
2624	thread->requested_policy.thrp_qos, `0`);
2625
2626	int primordial_qos = task_compute_main_thread_qos(task);
2627
2628	proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS,
2629	primordial_qos, `0`, &pend_token);
2630
2631	thread_mtx_unlock(thread);
2632
2633	thread_policy_update_complete_unlocked(thread, &pend_token);
2634
2635	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2636	(IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, `0`)) \| DBG_FUNC_END,
2637	thread_tid(thread), threquested_0(thread), threquested_1(thread),
2638	primordial_qos, `0`);
2639	}
2640
2641	/*
2642	* KPI for pthread kext
2643	*
2644	* Return a good guess at what the initial manager QoS will be
2645	* Dispatch can override this in userspace if it so chooses
2646	*/
2647	int
2648	task_get_default_manager_qos(task_t task)
2649	{
2650	int primordial_qos = task_compute_main_thread_qos(task);
2651
2652	if (primordial_qos == THREAD_QOS_LEGACY)
2653	primordial_qos = THREAD_QOS_USER_INITIATED;
2654
2655	return primordial_qos;
2656	}
2657
2658	/*
2659	* Check if the user promotion on thread has changed
2660	* and apply it.
2661	*
2662	* thread locked on entry, might drop the thread lock
2663	* and reacquire it.
2664	*/
2665	boolean_t
2666	thread_recompute_user_promotion_locked(thread_t thread)
2667	{
2668	boolean_t needs_update = FALSE;
2669	struct task_pend_token pend_token = {};
2670	int user_promotion_basepri = MIN(thread_get_inheritor_turnstile_priority(thread), MAXPRI_USER);
2671	int old_base_pri = thread->base_pri;
2672	thread_qos_t qos_promotion;
2673
2674	/ Check if user promotion has changed /
2675	if (thread->user_promotion_basepri == user_promotion_basepri) {
2676	return needs_update;
2677	} else {
2678	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2679	(TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) \| DBG_FUNC_NONE,
2680	thread_tid(thread),
2681	user_promotion_basepri,
2682	thread->user_promotion_basepri,
2683	`0`, `0`);
2684	}
2685
2686	/ Update the user promotion base pri /
2687	thread->user_promotion_basepri = user_promotion_basepri;
2688	pend_token.tpt_force_recompute_pri = `1`;
2689
2690	if (user_promotion_basepri <= MAXPRI_THROTTLE) {
2691	qos_promotion = THREAD_QOS_UNSPECIFIED;
2692	} else {
2693	qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
2694	}
2695
2696	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2697	TASK_POLICY_QOS_PROMOTE, qos_promotion, `0`, &pend_token);
2698
2699	if (thread_get_waiting_turnstile(thread) &&
2700	thread->base_pri != old_base_pri) {
2701	needs_update = TRUE;
2702	}
2703
2704	thread_unlock(thread);
2705
2706	thread_policy_update_complete_unlocked(thread, &pend_token);
2707
2708	thread_lock(thread);
2709
2710	return needs_update;
2711	}
2712
2713	/*
2714	* Convert the thread user promotion base pri to qos for threads in qos world.
2715	* For priority above UI qos, the qos would be set to UI.
2716	*/
2717	thread_qos_t
2718	thread_user_promotion_qos_for_pri(int priority)
2719	{
2720	int qos;
2721	for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
2722	if (thread_qos_policy_params.qos_pri[qos] <= priority) {
2723	return qos;
2724	}
2725	}
2726	return THREAD_QOS_MAINTENANCE;
2727	}
2728
2729	/*
2730	* Set the thread's QoS IPC override
2731	* Owned by the IPC subsystem
2732	*
2733	* May be called with spinlocks held, but not spinlocks
2734	* that may deadlock against the thread lock, the throttle lock, or the SFI lock.
2735	*
2736	* One 'add' must be balanced by one 'drop'.
2737	* Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
2738	* Before the thread is deallocated, there must be 0 remaining overrides.
2739	*/
2740	static void
2741	thread_ipc_override(thread_t thread,
2742	uint32_t qos_override,
2743	boolean_t is_new_override)
2744	{
2745	struct task_pend_token pend_token = {};
2746	boolean_t needs_update;
2747
2748	spl_t s = splsched();
2749	thread_lock(thread);
2750
2751	uint32_t old_override = thread->requested_policy.thrp_qos_ipc_override;
2752
2753	assert(qos_override > THREAD_QOS_UNSPECIFIED);
2754	assert(qos_override < THREAD_QOS_LAST);
2755
2756	if (is_new_override) {
2757	if (thread->ipc_overrides++ == `0`) {
2758	/ This add is the first override for this thread /
2759	assert(old_override == THREAD_QOS_UNSPECIFIED);
2760	} else {
2761	/ There are already other overrides in effect for this thread /
2762	assert(old_override > THREAD_QOS_UNSPECIFIED);
2763	}
2764	} else {
2765	/ There must be at least one override (the previous add call) in effect /
2766	assert(thread->ipc_overrides > `0`);
2767	assert(old_override > THREAD_QOS_UNSPECIFIED);
2768	}
2769
2770	/*
2771	* We can't allow lowering if there are several IPC overrides because
2772	* the caller can't possibly know the whole truth
2773	*/
2774	if (thread->ipc_overrides == `1`) {
2775	needs_update = qos_override != old_override;
2776	} else {
2777	needs_update = qos_override > old_override;
2778	}
2779
2780	if (needs_update) {
2781	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2782	TASK_POLICY_QOS_IPC_OVERRIDE,
2783	qos_override, `0`, &pend_token);
2784	assert(pend_token.tpt_update_sockets == `0`);
2785	}
2786
2787	thread_unlock(thread);
2788	splx(s);
2789
2790	thread_policy_update_complete_unlocked(thread, &pend_token);
2791	}
2792
2793	void
2794	thread_add_ipc_override(thread_t thread,
2795	uint32_t qos_override)
2796	{
2797	thread_ipc_override(thread, qos_override, TRUE);
2798	}
2799
2800	void
2801	thread_update_ipc_override(thread_t thread,
2802	uint32_t qos_override)
2803	{
2804	thread_ipc_override(thread, qos_override, FALSE);
2805	}
2806
2807	void
2808	thread_drop_ipc_override(thread_t thread)
2809	{
2810	struct task_pend_token pend_token = {};
2811
2812	spl_t s = splsched();
2813	thread_lock(thread);
2814
2815	assert(thread->ipc_overrides > `0`);
2816
2817	if (--thread->ipc_overrides == `0`) {
2818	/*
2819	* There are no more overrides for this thread, so we should
2820	* clear out the saturated override value
2821	*/
2822
2823	proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2824	TASK_POLICY_QOS_IPC_OVERRIDE, THREAD_QOS_UNSPECIFIED,
2825	`0`, &pend_token);
2826	}
2827
2828	thread_unlock(thread);
2829	splx(s);
2830
2831	thread_policy_update_complete_unlocked(thread, &pend_token);
2832	}
2833
2834	/ Get current requested qos / relpri, may be called from spinlock context /
2835	thread_qos_t
2836	thread_get_requested_qos(thread_t thread, int *relpri)
2837	{
2838	int relprio_value = `0`;
2839	thread_qos_t qos;
2840
2841	qos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2842	TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
2843	if (relpri) *relpri = -relprio_value;
2844	return qos;
2845	}
2846
2847	/*
2848	* This function will promote the thread priority
2849	* since exec could block other threads calling
2850	* proc_find on the proc. This boost must be removed
2851	* via call to thread_clear_exec_promotion.
2852	*
2853	* This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
2854	*/
2855	void
2856	thread_set_exec_promotion(thread_t thread)
2857	{
2858	spl_t s = splsched();
2859	thread_lock(thread);
2860
2861	sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, `0`);
2862
2863	thread_unlock(thread);
2864	splx(s);
2865	}
2866
2867	/*
2868	* This function will clear the exec thread
2869	* promotion set on the thread by thread_set_exec_promotion.
2870	*/
2871	void
2872	thread_clear_exec_promotion(thread_t thread)
2873	{
2874	spl_t s = splsched();
2875	thread_lock(thread);
2876
2877	sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, `0`);
2878
2879	thread_unlock(thread);
2880	splx(s);
2881	}
2882
2883

Browse the source code of xnu/osfmk/kern/thread_policy.c