task_policy.c source code [xnu/osfmk/kern/task_policy.c]

1	/*
2	* Copyright (c) 2000-2020 Apple Computer, Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	#include <kern/policy_internal.h>
30	#include <mach/task_policy.h>
31	#include <mach/task.h>
32	#include <mach/mach_types.h>
33	#include <mach/task_server.h>
34	#include <kern/host.h> /* host_priv_self() */
35	#include <mach/host_priv.h> /* host_get_special_port() */
36	#include <mach/host_special_ports.h> /* RESOURCE_NOTIFY_PORT */
37	#include <kern/sched.h>
38	#include <kern/task.h>
39	#include <mach/thread_policy.h>
40	#include <sys/errno.h>
41	#include <sys/resource.h>
42	#include <machine/limits.h>
43	#include <kern/ledger.h>
44	#include <kern/thread_call.h>
45	#include <kern/sfi.h>
46	#include <kern/coalition.h>
47	#if CONFIG_TELEMETRY
48	#include <kern/telemetry.h>
49	#endif
50	#if !defined(XNU_TARGET_OS_OSX)
51	#include <kern/kalloc.h>
52	#include <sys/errno.h>
53	#endif /* !defined(XNU_TARGET_OS_OSX) */
54
55	#if IMPORTANCE_INHERITANCE
56	#include <ipc/ipc_importance.h>
57	#if IMPORTANCE_TRACE
58	#include <mach/machine/sdt.h>
59	#endif /* IMPORTANCE_TRACE */
60	#endif /* IMPORTANCE_INHERITACE */
61
62	#include <sys/kdebug.h>
63
64	/*
65	* Task Policy
66	*
67	* This subsystem manages task and thread IO priority and backgrounding,
68	* as well as importance inheritance, process suppression, task QoS, and apptype.
69	* These properties have a suprising number of complex interactions, so they are
70	* centralized here in one state machine to simplify the implementation of those interactions.
71	*
72	* Architecture:
73	* Threads and tasks have two policy fields: requested, effective.
74	* Requested represents the wishes of each interface that influences task policy.
75	* Effective represents the distillation of that policy into a set of behaviors.
76	*
77	* Each thread making a modification in the policy system passes a 'pending' struct,
78	* which tracks updates that will be applied after dropping the policy engine lock.
79	*
80	* Each interface that has an input into the task policy state machine controls a field in requested.
81	* If the interface has a getter, it returns what is in the field in requested, but that is
82	* not necessarily what is actually in effect.
83	*
84	* All kernel subsystems that behave differently based on task policy call into
85	* the proc_get_effective_(task\|thread)_policy functions, which return the decision of the task policy state machine
86	* for that subsystem by querying only the 'effective' field.
87	*
88	* Policy change operations:
89	* Here are the steps to change a policy on a task or thread:
90	* 1) Lock task
91	* 2) Change requested field for the relevant policy
92	* 3) Run a task policy update, which recalculates effective based on requested,
93	* then takes a diff between the old and new versions of requested and calls the relevant
94	* other subsystems to apply these changes, and updates the pending field.
95	* 4) Unlock task
96	* 5) Run task policy update complete, which looks at the pending field to update
97	* subsystems which cannot be touched while holding the task lock.
98	*
99	* To add a new requested policy, add the field in the requested struct, the flavor in task.h,
100	* the setter and getter in proc_(set\|get)_task_policy*,
101	* then set up the effects of that behavior in task_policy_update*. If the policy manifests
102	* itself as a distinct effective policy, add it to the effective struct and add it to the
103	* proc_get_effective_task_policy accessor.
104	*
105	* Most policies are set via proc_set_task_policy, but policies that don't fit that interface
106	* roll their own lock/set/update/unlock/complete code inside this file.
107	*
108	*
109	* Suppression policy
110	*
111	* These are a set of behaviors that can be requested for a task. They currently have specific
112	* implied actions when they're enabled, but they may be made customizable in the future.
113	*
114	* When the affected task is boosted, we temporarily disable the suppression behaviors
115	* so that the affected process has a chance to run so it can call the API to permanently
116	* disable the suppression behaviors.
117	*
118	* Locking
119	*
120	* Changing task policy on a task takes the task lock.
121	* Changing task policy on a thread takes the thread mutex.
122	* Task policy changes that affect threads will take each thread's mutex to update it if necessary.
123	*
124	* Querying the effective policy does not take a lock, because callers
125	* may run in interrupt context or other place where locks are not OK.
126	*
127	* This means that any notification of state change needs to be externally synchronized.
128	* We do this by idempotent callouts after the state has changed to ask
129	* other subsystems to update their view of the world.
130	*
131	* TODO: Move all cpu/wakes/io monitor code into a separate file
132	* TODO: Move all importance code over to importance subsystem
133	* TODO: Move all taskwatch code into a separate file
134	* TODO: Move all VM importance code into a separate file
135	*/
136
137	/ Task policy related helper functions /
138	static void proc_set_task_policy_locked(task_t task, int category, int flavor, int value, int value2);
139
140	static void task_policy_update_locked(task_t task, task_pend_token_t pend_token);
141	static void task_policy_update_internal_locked(task_t task, bool in_create, task_pend_token_t pend_token);
142
143	/ For attributes that have two scalars as input/output /
144	static void proc_set_task_policy2(task_t task, int category, int flavor, int value1, int value2);
145	static void proc_get_task_policy2(task_t task, int category, int flavor, int value1, int* *value2);
146
147	static boolean_t task_policy_update_coalition_focal_tasks(task_t task, int prev_role, int next_role, task_pend_token_t pend_token);
148
149	static uint64_t task_requested_bitfield(task_t task);
150	static uint64_t task_effective_bitfield(task_t task);
151
152	/ Convenience functions for munging a policy bitfield into a tracepoint /
153	static uintptr_t trequested_0(task_t task);
154	static uintptr_t trequested_1(task_t task);
155	static uintptr_t teffective_0(task_t task);
156	static uintptr_t teffective_1(task_t task);
157
158	/ CPU limits helper functions /
159	static int task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int entitled);
160	static int task_get_cpuusage(task_t task, uint8_t percentagep, uint64_t intervalp, uint64_t deadlinep, int* *scope);
161	static int task_enable_cpumon_locked(task_t task);
162	static int task_disable_cpumon(task_t task);
163	static int task_clear_cpuusage_locked(task_t task, int cpumon_entitled);
164	static int task_apply_resource_actions(task_t task, int type);
165	static void task_action_cpuusage(thread_call_param_t param0, thread_call_param_t param1);
166
167	#ifdef MACH_BSD
168	typedef struct proc * proc_t;
169	int proc_pid(struct proc *proc);
170	extern int proc_selfpid(void);
171	extern char * proc_name_address(void *p);
172	extern char * proc_best_name(proc_t proc);
173
174	extern int proc_pidpathinfo_internal(proc_t p, uint64_t arg,
175	char *buffer, uint32_t buffersize,
176	int32_t *retval);
177	#endif /* MACH_BSD */
178
179
180	#if CONFIG_TASKWATCH
181	/ Taskwatch related helper functions /
182	static void set_thread_appbg(thread_t thread, int setbg, int importance);
183	static void add_taskwatch_locked(task_t task, task_watch_t * twp);
184	static void remove_taskwatch_locked(task_t task, task_watch_t * twp);
185	static void task_watch_lock(void);
186	static void task_watch_unlock(void);
187	static void apply_appstate_watchers(task_t task);
188
189	typedef struct task_watcher {
190	queue_chain_t tw_links; / queueing of threads /
191	task_t tw_task; / task that is being watched /
192	thread_t tw_thread; / thread that is watching the watch_task /
193	int tw_state; / the current app state of the thread /
194	int tw_importance; / importance prior to backgrounding /
195	} task_watch_t;
196
197	typedef struct thread_watchlist {
198	thread_t thread; / thread being worked on for taskwatch action /
199	int importance; / importance to be restored if thread is being made active /
200	} thread_watchlist_t;
201
202	#endif /* CONFIG_TASKWATCH */
203
204	extern int memorystatus_update_priority_for_appnap(proc_t p, boolean_t is_appnap);
205
206	/ Importance Inheritance related helper functions /
207
208	#if IMPORTANCE_INHERITANCE
209
210	static void task_importance_mark_live_donor(task_t task, boolean_t donating);
211	static void task_importance_mark_receiver(task_t task, boolean_t receiving);
212	static void task_importance_mark_denap_receiver(task_t task, boolean_t denap);
213
214	static boolean_t task_is_marked_live_importance_donor(task_t task);
215	static boolean_t task_is_importance_receiver(task_t task);
216	static boolean_t task_is_importance_denap_receiver(task_t task);
217
218	static int task_importance_hold_internal_assertion(task_t target_task, uint32_t count);
219
220	static void task_add_importance_watchport(task_t task, mach_port_t port, int *boostp);
221	static void task_importance_update_live_donor(task_t target_task);
222
223	static void task_set_boost_locked(task_t task, boolean_t boost_active);
224
225	#endif /* IMPORTANCE_INHERITANCE */
226
227	#if IMPORTANCE_TRACE
228	#define __imptrace_only
229	#else /* IMPORTANCE_TRACE */
230	#define __imptrace_only __unused
231	#endif /* !IMPORTANCE_TRACE */
232
233	#if IMPORTANCE_INHERITANCE
234	#define __imp_only
235	#else
236	#define __imp_only __unused
237	#endif
238
239	/*
240	* Default parameters for certain policies
241	*/
242
243	int proc_standard_daemon_tier = THROTTLE_LEVEL_TIER1;
244	int proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER1;
245
246	int proc_graphics_timer_qos = (LATENCY_QOS_TIER_0 & `0xFF`);
247
248	const int proc_default_bg_iotier = THROTTLE_LEVEL_TIER2;
249
250	/ Latency/throughput QoS fields remain zeroed, i.e. TIER_UNSPECIFIED at creation /
251	const struct task_requested_policy default_task_requested_policy = {
252	.trp_bg_iotier = proc_default_bg_iotier
253	};
254	const struct task_effective_policy default_task_effective_policy = {};
255
256	/*
257	* Default parameters for CPU usage monitor.
258	*
259	* Default setting is 50% over 3 minutes.
260	*/
261	#define DEFAULT_CPUMON_PERCENTAGE 50
262	#define DEFAULT_CPUMON_INTERVAL (3 * 60)
263
264	uint8_t proc_max_cpumon_percentage;
265	uint64_t proc_max_cpumon_interval;
266
267	kern_return_t
268	qos_latency_policy_validate(task_latency_qos_t ltier)
269	{
270	if ((ltier != LATENCY_QOS_TIER_UNSPECIFIED) &&
271	((ltier > LATENCY_QOS_TIER_5) \|\| (ltier < LATENCY_QOS_TIER_0))) {
272	return KERN_INVALID_ARGUMENT;
273	}
274
275	return KERN_SUCCESS;
276	}
277
278	kern_return_t
279	qos_throughput_policy_validate(task_throughput_qos_t ttier)
280	{
281	if ((ttier != THROUGHPUT_QOS_TIER_UNSPECIFIED) &&
282	((ttier > THROUGHPUT_QOS_TIER_5) \|\| (ttier < THROUGHPUT_QOS_TIER_0))) {
283	return KERN_INVALID_ARGUMENT;
284	}
285
286	return KERN_SUCCESS;
287	}
288
289	static kern_return_t
290	task_qos_policy_validate(task_qos_policy_t qosinfo, mach_msg_type_number_t count)
291	{
292	if (count < TASK_QOS_POLICY_COUNT) {
293	return KERN_INVALID_ARGUMENT;
294	}
295
296	task_latency_qos_t ltier = qosinfo->task_latency_qos_tier;
297	task_throughput_qos_t ttier = qosinfo->task_throughput_qos_tier;
298
299	kern_return_t kr = qos_latency_policy_validate(ltier);
300
301	if (kr != KERN_SUCCESS) {
302	return kr;
303	}
304
305	kr = qos_throughput_policy_validate(ttier);
306
307	return kr;
308	}
309
310	uint32_t
311	qos_extract(uint32_t qv)
312	{
313	return qv & `0xFF`;
314	}
315
316	uint32_t
317	qos_latency_policy_package(uint32_t qv)
318	{
319	return (qv == LATENCY_QOS_TIER_UNSPECIFIED) ? LATENCY_QOS_TIER_UNSPECIFIED : ((`0xFF` << `16`) \| qv);
320	}
321
322	uint32_t
323	qos_throughput_policy_package(uint32_t qv)
324	{
325	return (qv == THROUGHPUT_QOS_TIER_UNSPECIFIED) ? THROUGHPUT_QOS_TIER_UNSPECIFIED : ((`0xFE` << `16`) \| qv);
326	}
327
328	#define TASK_POLICY_SUPPRESSION_DISABLE 0x1
329	#define TASK_POLICY_SUPPRESSION_IOTIER2 0x2
330	#define TASK_POLICY_SUPPRESSION_NONDONOR 0x4
331	/ TEMPORARY boot-arg controlling task_policy suppression (App Nap) /
332	static boolean_t task_policy_suppression_flags = TASK_POLICY_SUPPRESSION_IOTIER2 \|
333	TASK_POLICY_SUPPRESSION_NONDONOR;
334
335	static void
336	task_set_requested_apptype(task_t task, uint64_t apptype, __unused boolean_t update_tg_flag)
337	{
338	task->requested_policy.trp_apptype = apptype;
339	#if CONFIG_THREAD_GROUPS
340	if (update_tg_flag && task_is_app(task)) {
341	task_coalition_thread_group_application_set(task);
342	}
343	#endif /* CONFIG_THREAD_GROUPS */
344	}
345
346	kern_return_t
347	task_policy_set(
348	task_t task,
349	task_policy_flavor_t flavor,
350	task_policy_t policy_info,
351	mach_msg_type_number_t count)
352	{
353	kern_return_t result = KERN_SUCCESS;
354
355	if (task == TASK_NULL \|\| task == kernel_task) {
356	return KERN_INVALID_ARGUMENT;
357	}
358
359	switch (flavor) {
360	case TASK_CATEGORY_POLICY: {
361	task_category_policy_t info = (task_category_policy_t)policy_info;
362
363	if (count < TASK_CATEGORY_POLICY_COUNT) {
364	return KERN_INVALID_ARGUMENT;
365	}
366
367	#if !defined(XNU_TARGET_OS_OSX)
368	/ On embedded, you can't modify your own role. /
369	if (current_task() == task) {
370	return KERN_INVALID_ARGUMENT;
371	}
372	#endif
373
374	switch (info->role) {
375	case TASK_FOREGROUND_APPLICATION:
376	case TASK_BACKGROUND_APPLICATION:
377	case TASK_DEFAULT_APPLICATION:
378	proc_set_task_policy(task,
379	TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
380	value: info->role);
381	break;
382
383	case TASK_CONTROL_APPLICATION:
384	if (task != current_task() \|\| !task_is_privileged(task)) {
385	result = KERN_INVALID_ARGUMENT;
386	} else {
387	proc_set_task_policy(task,
388	TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
389	value: info->role);
390	}
391	break;
392
393	case TASK_GRAPHICS_SERVER:
394	/ TODO: Restrict this role to FCFS <rdar://problem/12552788> /
395	if (task != current_task() \|\| !task_is_privileged(task)) {
396	result = KERN_INVALID_ARGUMENT;
397	} else {
398	proc_set_task_policy(task,
399	TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
400	value: info->role);
401	}
402	break;
403	default:
404	result = KERN_INVALID_ARGUMENT;
405	break;
406	} / switch (info->role) /
407
408	break;
409	}
410
411	/ Desired energy-efficiency/performance "quality-of-service" /
412	case TASK_BASE_QOS_POLICY:
413	case TASK_OVERRIDE_QOS_POLICY:
414	{
415	task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
416	kern_return_t kr = task_qos_policy_validate(qosinfo, count);
417
418	if (kr != KERN_SUCCESS) {
419	return kr;
420	}
421
422
423	uint32_t lqos = qos_extract(qv: qosinfo->task_latency_qos_tier);
424	uint32_t tqos = qos_extract(qv: qosinfo->task_throughput_qos_tier);
425
426	proc_set_task_policy2(task, TASK_POLICY_ATTRIBUTE,
427	flavor: flavor == TASK_BASE_QOS_POLICY ? TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS : TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS,
428	value1: lqos, value2: tqos);
429	}
430	break;
431
432	case TASK_BASE_LATENCY_QOS_POLICY:
433	{
434	task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
435	kern_return_t kr = task_qos_policy_validate(qosinfo, count);
436
437	if (kr != KERN_SUCCESS) {
438	return kr;
439	}
440
441	uint32_t lqos = qos_extract(qv: qosinfo->task_latency_qos_tier);
442
443	proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE, TASK_BASE_LATENCY_QOS_POLICY, value: lqos);
444	}
445	break;
446
447	case TASK_BASE_THROUGHPUT_QOS_POLICY:
448	{
449	task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
450	kern_return_t kr = task_qos_policy_validate(qosinfo, count);
451
452	if (kr != KERN_SUCCESS) {
453	return kr;
454	}
455
456	uint32_t tqos = qos_extract(qv: qosinfo->task_throughput_qos_tier);
457
458	proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE, TASK_BASE_THROUGHPUT_QOS_POLICY, value: tqos);
459	}
460	break;
461
462	case TASK_SUPPRESSION_POLICY:
463	{
464	#if !defined(XNU_TARGET_OS_OSX)
465	/*
466	* Suppression policy is not enabled for embedded
467	* because apps aren't marked as denap receivers
468	*/
469	result = KERN_INVALID_ARGUMENT;
470	break;
471	#else /* !defined(XNU_TARGET_OS_OSX) */
472
473	task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
474
475	if (count < TASK_SUPPRESSION_POLICY_COUNT) {
476	return KERN_INVALID_ARGUMENT;
477	}
478
479	struct task_qos_policy qosinfo;
480
481	qosinfo.task_latency_qos_tier = info->timer_throttle;
482	qosinfo.task_throughput_qos_tier = info->throughput_qos;
483
484	kern_return_t kr = task_qos_policy_validate(qosinfo: &qosinfo, TASK_QOS_POLICY_COUNT);
485
486	if (kr != KERN_SUCCESS) {
487	return kr;
488	}
489
490	/ TEMPORARY disablement of task suppression /
491	if (info->active &&
492	(task_policy_suppression_flags & TASK_POLICY_SUPPRESSION_DISABLE)) {
493	return KERN_SUCCESS;
494	}
495
496	struct task_pend_token pend_token = {};
497
498	task_lock(task);
499
500	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
501	(IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) \| DBG_FUNC_START,
502	proc_selfpid(), task_pid(task), trequested_0(task),
503	trequested_1(task), `0`);
504
505	task->requested_policy.trp_sup_active = (info->active) ? `1` : `0`;
506	task->requested_policy.trp_sup_lowpri_cpu = (info->lowpri_cpu) ? `1` : `0`;
507	task->requested_policy.trp_sup_timer = qos_extract(qv: info->timer_throttle);
508	task->requested_policy.trp_sup_disk = (info->disk_throttle) ? `1` : `0`;
509	task->requested_policy.trp_sup_throughput = qos_extract(qv: info->throughput_qos);
510	task->requested_policy.trp_sup_cpu = (info->suppressed_cpu) ? `1` : `0`;
511	task->requested_policy.trp_sup_bg_sockets = (info->background_sockets) ? `1` : `0`;
512
513	task_policy_update_locked(task, pend_token: &pend_token);
514
515	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
516	(IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) \| DBG_FUNC_END,
517	proc_selfpid(), task_pid(task), trequested_0(task),
518	trequested_1(task), `0`);
519
520	task_unlock(task);
521
522	task_policy_update_complete_unlocked(task, pend_token: &pend_token);
523
524	break;
525
526	#endif /* !defined(XNU_TARGET_OS_OSX) */
527	}
528
529	default:
530	result = KERN_INVALID_ARGUMENT;
531	break;
532	}
533
534	return result;
535	}
536
537	/ Sets BSD 'nice' value on the task /
538	kern_return_t
539	task_importance(
540	task_t task,
541	integer_t importance)
542	{
543	if (task == TASK_NULL \|\| task == kernel_task) {
544	return KERN_INVALID_ARGUMENT;
545	}
546
547	task_lock(task);
548
549	if (!task->active) {
550	task_unlock(task);
551
552	return KERN_TERMINATED;
553	}
554
555	if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) >= TASK_CONTROL_APPLICATION) {
556	task_unlock(task);
557
558	return KERN_INVALID_ARGUMENT;
559	}
560
561	task->importance = importance;
562
563	struct task_pend_token pend_token = {};
564
565	task_policy_update_locked(task, pend_token: &pend_token);
566
567	task_unlock(task);
568
569	task_policy_update_complete_unlocked(task, pend_token: &pend_token);
570
571	return KERN_SUCCESS;
572	}
573
574	kern_return_t
575	task_policy_get(
576	task_t task,
577	task_policy_flavor_t flavor,
578	task_policy_t policy_info,
579	mach_msg_type_number_t *count,
580	boolean_t *get_default)
581	{
582	if (task == TASK_NULL \|\| task == kernel_task) {
583	return KERN_INVALID_ARGUMENT;
584	}
585
586	switch (flavor) {
587	case TASK_CATEGORY_POLICY:
588	{
589	task_category_policy_t info = (task_category_policy_t)policy_info;
590
591	if (*count < TASK_CATEGORY_POLICY_COUNT) {
592	return KERN_INVALID_ARGUMENT;
593	}
594
595	if (*get_default) {
596	info->role = TASK_UNSPECIFIED;
597	} else {
598	info->role = proc_get_task_policy(task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE);
599	}
600	break;
601	}
602
603	case TASK_BASE_QOS_POLICY: / FALLTHRU /
604	case TASK_OVERRIDE_QOS_POLICY:
605	{
606	task_qos_policy_t info = (task_qos_policy_t)policy_info;
607
608	if (*count < TASK_QOS_POLICY_COUNT) {
609	return KERN_INVALID_ARGUMENT;
610	}
611
612	if (*get_default) {
613	info->task_latency_qos_tier = LATENCY_QOS_TIER_UNSPECIFIED;
614	info->task_throughput_qos_tier = THROUGHPUT_QOS_TIER_UNSPECIFIED;
615	} else if (flavor == TASK_BASE_QOS_POLICY) {
616	int value1, value2;
617
618	proc_get_task_policy2(task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS, value1: &value1, value2: &value2);
619
620	info->task_latency_qos_tier = qos_latency_policy_package(qv: value1);
621	info->task_throughput_qos_tier = qos_throughput_policy_package(qv: value2);
622	} else if (flavor == TASK_OVERRIDE_QOS_POLICY) {
623	int value1, value2;
624
625	proc_get_task_policy2(task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS, value1: &value1, value2: &value2);
626
627	info->task_latency_qos_tier = qos_latency_policy_package(qv: value1);
628	info->task_throughput_qos_tier = qos_throughput_policy_package(qv: value2);
629	}
630
631	break;
632	}
633
634	case TASK_POLICY_STATE:
635	{
636	task_policy_state_t info = (task_policy_state_t)policy_info;
637
638	if (*count < TASK_POLICY_STATE_COUNT) {
639	return KERN_INVALID_ARGUMENT;
640	}
641
642	/ Only root can get this info /
643	if (!task_is_privileged(task: current_task())) {
644	return KERN_PROTECTION_FAILURE;
645	}
646
647	if (*get_default) {
648	info->requested = `0`;
649	info->effective = `0`;
650	info->pending = `0`;
651	info->imp_assertcnt = `0`;
652	info->imp_externcnt = `0`;
653	info->flags = `0`;
654	info->imp_transitions = `0`;
655	} else {
656	task_lock(task);
657
658	info->requested = task_requested_bitfield(task);
659	info->effective = task_effective_bitfield(task);
660	info->pending = `0`;
661
662	info->tps_requested_policy = (uint64_t)(&task->requested_policy);
663	info->tps_effective_policy = (uint64_t)(&task->effective_policy);
664
665	info->flags = `0`;
666	if (task->task_imp_base != NULL) {
667	info->imp_assertcnt = task->task_imp_base->iit_assertcnt;
668	info->imp_externcnt = IIT_EXTERN(task->task_imp_base);
669	info->flags \|= (task_is_marked_importance_receiver(task) ? TASK_IMP_RECEIVER : `0`);
670	info->flags \|= (task_is_marked_importance_denap_receiver(task) ? TASK_DENAP_RECEIVER : `0`);
671	info->flags \|= (task_is_marked_importance_donor(task) ? TASK_IMP_DONOR : `0`);
672	info->flags \|= (task_is_marked_live_importance_donor(task) ? TASK_IMP_LIVE_DONOR : `0`);
673	info->flags \|= (get_task_pidsuspended(task) ? TASK_IS_PIDSUSPENDED : `0`);
674	info->imp_transitions = task->task_imp_base->iit_transitions;
675	} else {
676	info->imp_assertcnt = `0`;
677	info->imp_externcnt = `0`;
678	info->imp_transitions = `0`;
679	}
680	task_unlock(task);
681	}
682
683	break;
684	}
685
686	case TASK_SUPPRESSION_POLICY:
687	{
688	task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
689
690	if (*count < TASK_SUPPRESSION_POLICY_COUNT) {
691	return KERN_INVALID_ARGUMENT;
692	}
693
694	task_lock(task);
695
696	if (*get_default) {
697	info->active = `0`;
698	info->lowpri_cpu = `0`;
699	info->timer_throttle = LATENCY_QOS_TIER_UNSPECIFIED;
700	info->disk_throttle = `0`;
701	info->cpu_limit = `0`;
702	info->suspend = `0`;
703	info->throughput_qos = `0`;
704	info->suppressed_cpu = `0`;
705	} else {
706	info->active = task->requested_policy.trp_sup_active;
707	info->lowpri_cpu = task->requested_policy.trp_sup_lowpri_cpu;
708	info->timer_throttle = qos_latency_policy_package(qv: task->requested_policy.trp_sup_timer);
709	info->disk_throttle = task->requested_policy.trp_sup_disk;
710	info->cpu_limit = `0`;
711	info->suspend = `0`;
712	info->throughput_qos = qos_throughput_policy_package(qv: task->requested_policy.trp_sup_throughput);
713	info->suppressed_cpu = task->requested_policy.trp_sup_cpu;
714	info->background_sockets = task->requested_policy.trp_sup_bg_sockets;
715	}
716
717	task_unlock(task);
718	break;
719	}
720
721	default:
722	return KERN_INVALID_ARGUMENT;
723	}
724
725	return KERN_SUCCESS;
726	}
727
728	/*
729	* Called at task creation
730	* We calculate the correct effective but don't apply it to anything yet.
731	* The threads, etc will inherit from the task as they get created.
732	*/
733	void
734	task_policy_create(task_t task, task_t parent_task)
735	{
736	task_set_requested_apptype(task, apptype: parent_task->requested_policy.trp_apptype, true);
737
738	task->requested_policy.trp_int_darwinbg = parent_task->requested_policy.trp_int_darwinbg;
739	task->requested_policy.trp_ext_darwinbg = parent_task->requested_policy.trp_ext_darwinbg;
740	task->requested_policy.trp_int_iotier = parent_task->requested_policy.trp_int_iotier;
741	task->requested_policy.trp_ext_iotier = parent_task->requested_policy.trp_ext_iotier;
742	task->requested_policy.trp_int_iopassive = parent_task->requested_policy.trp_int_iopassive;
743	task->requested_policy.trp_ext_iopassive = parent_task->requested_policy.trp_ext_iopassive;
744	task->requested_policy.trp_bg_iotier = parent_task->requested_policy.trp_bg_iotier;
745	task->requested_policy.trp_terminated = parent_task->requested_policy.trp_terminated;
746	task->requested_policy.trp_qos_clamp = parent_task->requested_policy.trp_qos_clamp;
747
748	if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && !task_is_exec_copy(task)) {
749	/ Do not update the apptype for exec copy task /
750	if (parent_task->requested_policy.trp_boosted) {
751	task_set_requested_apptype(task, TASK_APPTYPE_DAEMON_INTERACTIVE, true);
752	task_importance_mark_donor(task, TRUE);
753	} else {
754	task_set_requested_apptype(task, TASK_APPTYPE_DAEMON_BACKGROUND, true);
755	task_importance_mark_receiver(task, FALSE);
756	}
757	}
758
759	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
760	(IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE \| TASK_POLICY_TASK))) \| DBG_FUNC_START,
761	task_pid(task), teffective_0(task),
762	teffective_1(task), task->priority, `0`);
763
764	task_policy_update_internal_locked(task, true, NULL);
765
766	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
767	(IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE \| TASK_POLICY_TASK))) \| DBG_FUNC_END,
768	task_pid(task), teffective_0(task),
769	teffective_1(task), task->priority, `0`);
770
771	task_importance_update_live_donor(target_task: task);
772	}
773
774
775	static void
776	task_policy_update_locked(task_t task, task_pend_token_t pend_token)
777	{
778	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
779	(IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_TASK) \| DBG_FUNC_START),
780	task_pid(task), teffective_0(task),
781	teffective_1(task), task->priority, `0`);
782
783	task_policy_update_internal_locked(task, false, pend_token);
784
785	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
786	(IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_TASK)) \| DBG_FUNC_END,
787	task_pid(task), teffective_0(task),
788	teffective_1(task), task->priority, `0`);
789	}
790
791	/*
792	* One state update function TO RULE THEM ALL
793	*
794	* This function updates the task or thread effective policy fields
795	* and pushes the results to the relevant subsystems.
796	*
797	* Must call update_complete after unlocking the task,
798	* as some subsystems cannot be updated while holding the task lock.
799	*
800	* Called with task locked, not thread
801	*/
802
803	static void
804	task_policy_update_internal_locked(task_t task, bool in_create, task_pend_token_t pend_token)
805	{
806	/*
807	* Step 1:
808	* Gather requested policy
809	*/
810
811	struct task_requested_policy requested = task->requested_policy;
812
813	/*
814	* Step 2:
815	* Calculate new effective policies from requested policy and task state
816	* Rules:
817	* Don't change requested, it won't take effect
818	*/
819
820	struct task_effective_policy next = {};
821
822	/ Update task role /
823	next.tep_role = requested.trp_role;
824
825	/ Set task qos clamp and ceiling /
826
827	thread_qos_t role_clamp = THREAD_QOS_UNSPECIFIED;
828
829	if (requested.trp_apptype == TASK_APPTYPE_APP_DEFAULT) {
830	switch (next.tep_role) {
831	case TASK_FOREGROUND_APPLICATION:
832	/ Foreground apps get urgent scheduler priority /
833	next.tep_qos_ui_is_urgent = `1`;
834	next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
835	break;
836
837	case TASK_BACKGROUND_APPLICATION:
838	/ This is really 'non-focal but on-screen' /
839	next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
840	break;
841
842	case TASK_DEFAULT_APPLICATION:
843	/ This is 'may render UI but we don't know if it's focal/nonfocal' /
844	next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
845	break;
846
847	case TASK_NONUI_APPLICATION:
848	/ i.e. 'off-screen' /
849	next.tep_qos_ceiling = THREAD_QOS_LEGACY;
850	break;
851
852	case TASK_CONTROL_APPLICATION:
853	case TASK_GRAPHICS_SERVER:
854	next.tep_qos_ui_is_urgent = `1`;
855	next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED;
856	break;
857
858	case TASK_THROTTLE_APPLICATION:
859	/ i.e. 'TAL launch' /
860	next.tep_qos_ceiling = THREAD_QOS_UTILITY;
861	role_clamp = THREAD_QOS_UTILITY;
862	break;
863
864	case TASK_DARWINBG_APPLICATION:
865	/ i.e. 'DARWIN_BG throttled background application' /
866	next.tep_qos_ceiling = THREAD_QOS_BACKGROUND;
867	break;
868
869	case TASK_UNSPECIFIED:
870	default:
871	/ Apps that don't have an application role get*
872	* USER_INTERACTIVE and USER_INITIATED squashed to LEGACY */
873	next.tep_qos_ceiling = THREAD_QOS_LEGACY;
874	break;
875	}
876	} else {
877	/ Daemons and dext get USER_INTERACTIVE squashed to USER_INITIATED /
878	next.tep_qos_ceiling = THREAD_QOS_USER_INITIATED;
879	}
880
881	if (role_clamp != THREAD_QOS_UNSPECIFIED) {
882	if (requested.trp_qos_clamp != THREAD_QOS_UNSPECIFIED) {
883	next.tep_qos_clamp = MIN(role_clamp, requested.trp_qos_clamp);
884	} else {
885	next.tep_qos_clamp = role_clamp;
886	}
887	} else {
888	next.tep_qos_clamp = requested.trp_qos_clamp;
889	}
890
891	/ Calculate DARWIN_BG /
892	bool wants_darwinbg = false;
893	bool wants_all_sockets_bg = false; / Do I want my existing sockets to be bg /
894	bool wants_watchersbg = false; / Do I want my pidbound threads to be bg /
895	bool adaptive_bg_only = false; / This task is BG only because it's adaptive unboosted /
896
897	/ Adaptive daemons are DARWIN_BG unless boosted, and don't get network throttled. /
898	if (requested.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE &&
899	requested.trp_boosted == `0`) {
900	wants_darwinbg = true;
901	adaptive_bg_only = true;
902	}
903
904	/*
905	* If DARWIN_BG has been requested at either level, it's engaged.
906	* Only true DARWIN_BG changes cause watchers to transition.
907	*
908	* Backgrounding due to apptype does.
909	*/
910	if (requested.trp_int_darwinbg \|\| requested.trp_ext_darwinbg \|\|
911	next.tep_role == TASK_DARWINBG_APPLICATION) {
912	wants_watchersbg = wants_all_sockets_bg = wants_darwinbg = true;
913	adaptive_bg_only = false;
914	}
915
916	/ Application launching in special Transparent App Lifecycle throttle mode /
917	if (requested.trp_apptype == TASK_APPTYPE_APP_DEFAULT &&
918	requested.trp_role == TASK_THROTTLE_APPLICATION) {
919	next.tep_tal_engaged = `1`;
920	}
921
922	/ Background daemons are always DARWIN_BG, no exceptions, and don't get network throttled. /
923	if (requested.trp_apptype == TASK_APPTYPE_DAEMON_BACKGROUND) {
924	wants_darwinbg = true;
925	adaptive_bg_only = false;
926	}
927
928	if (next.tep_qos_clamp == THREAD_QOS_BACKGROUND \|\|
929	next.tep_qos_clamp == THREAD_QOS_MAINTENANCE) {
930	wants_darwinbg = true;
931	adaptive_bg_only = false;
932	}
933
934	/ Calculate side effects of DARWIN_BG /
935
936	if (wants_darwinbg) {
937	next.tep_darwinbg = `1`;
938	/ darwinbg tasks always create bg sockets, but we don't always loop over all sockets /
939	next.tep_new_sockets_bg = `1`;
940	next.tep_lowpri_cpu = `1`;
941	}
942
943	if (wants_all_sockets_bg) {
944	next.tep_all_sockets_bg = `1`;
945	}
946
947	if (wants_watchersbg) {
948	next.tep_watchers_bg = `1`;
949	}
950
951	next.tep_adaptive_bg = adaptive_bg_only;
952
953	/ Calculate low CPU priority /
954
955	boolean_t wants_lowpri_cpu = false;
956
957	if (wants_darwinbg) {
958	wants_lowpri_cpu = true;
959	}
960
961	if (requested.trp_sup_lowpri_cpu && requested.trp_boosted == `0`) {
962	wants_lowpri_cpu = true;
963	}
964
965	if (wants_lowpri_cpu) {
966	next.tep_lowpri_cpu = `1`;
967	}
968
969	/ Calculate IO policy /
970
971	/ Update BG IO policy (so we can see if it has changed) /
972	next.tep_bg_iotier = requested.trp_bg_iotier;
973
974	int iopol = THROTTLE_LEVEL_TIER0;
975
976	if (wants_darwinbg) {
977	iopol = MAX(iopol, requested.trp_bg_iotier);
978	}
979
980	if (requested.trp_apptype == TASK_APPTYPE_DAEMON_STANDARD) {
981	iopol = MAX(iopol, proc_standard_daemon_tier);
982	}
983
984	if (requested.trp_sup_disk && requested.trp_boosted == `0`) {
985	iopol = MAX(iopol, proc_suppressed_disk_tier);
986	}
987
988	if (next.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
989	iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.tep_qos_clamp]);
990	}
991
992	iopol = MAX(iopol, requested.trp_int_iotier);
993	iopol = MAX(iopol, requested.trp_ext_iotier);
994
995	next.tep_io_tier = iopol;
996
997	/ Calculate Passive IO policy /
998
999	if (requested.trp_ext_iopassive \|\| requested.trp_int_iopassive) {
1000	next.tep_io_passive = `1`;
1001	}
1002
1003	/ Calculate suppression-active flag /
1004	boolean_t appnap_transition = false;
1005
1006	if (requested.trp_sup_active && requested.trp_boosted == `0`) {
1007	next.tep_sup_active = `1`;
1008	}
1009
1010	if (task->effective_policy.tep_sup_active != next.tep_sup_active) {
1011	appnap_transition = true;
1012	}
1013
1014	/ Calculate timer QOS /
1015	int latency_qos = requested.trp_base_latency_qos;
1016
1017	if (requested.trp_sup_timer && requested.trp_boosted == `0`) {
1018	latency_qos = requested.trp_sup_timer;
1019	}
1020
1021	if (next.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1022	latency_qos = MAX(latency_qos, (int)thread_qos_policy_params.qos_latency_qos[next.tep_qos_clamp]);
1023	}
1024
1025	if (requested.trp_over_latency_qos != `0`) {
1026	latency_qos = requested.trp_over_latency_qos;
1027	}
1028
1029	/ Treat the windowserver special /
1030	if (requested.trp_role == TASK_GRAPHICS_SERVER) {
1031	latency_qos = proc_graphics_timer_qos;
1032	}
1033
1034	next.tep_latency_qos = latency_qos;
1035
1036	/ Calculate throughput QOS /
1037	int through_qos = requested.trp_base_through_qos;
1038
1039	if (requested.trp_sup_throughput && requested.trp_boosted == `0`) {
1040	through_qos = requested.trp_sup_throughput;
1041	}
1042
1043	if (next.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1044	through_qos = MAX(through_qos, (int)thread_qos_policy_params.qos_through_qos[next.tep_qos_clamp]);
1045	}
1046
1047	if (requested.trp_over_through_qos != `0`) {
1048	through_qos = requested.trp_over_through_qos;
1049	}
1050
1051	next.tep_through_qos = through_qos;
1052
1053	/ Calculate suppressed CPU priority /
1054	if (requested.trp_sup_cpu && requested.trp_boosted == `0`) {
1055	next.tep_suppressed_cpu = `1`;
1056	}
1057
1058	/*
1059	* Calculate background sockets
1060	* Don't take into account boosting to limit transition frequency.
1061	*/
1062	if (requested.trp_sup_bg_sockets) {
1063	next.tep_all_sockets_bg = `1`;
1064	next.tep_new_sockets_bg = `1`;
1065	}
1066
1067	/ Apply SFI Managed class bit /
1068	next.tep_sfi_managed = requested.trp_sfi_managed;
1069
1070	/ Calculate 'live donor' status for live importance /
1071	switch (requested.trp_apptype) {
1072	case TASK_APPTYPE_APP_TAL:
1073	case TASK_APPTYPE_APP_DEFAULT:
1074	if (requested.trp_ext_darwinbg == `1` \|\|
1075	(next.tep_sup_active == `1` &&
1076	(task_policy_suppression_flags & TASK_POLICY_SUPPRESSION_NONDONOR)) \|\|
1077	next.tep_role == TASK_DARWINBG_APPLICATION) {
1078	next.tep_live_donor = `0`;
1079	} else {
1080	next.tep_live_donor = `1`;
1081	}
1082	break;
1083
1084	case TASK_APPTYPE_DAEMON_INTERACTIVE:
1085	case TASK_APPTYPE_DAEMON_STANDARD:
1086	case TASK_APPTYPE_DAEMON_ADAPTIVE:
1087	case TASK_APPTYPE_DAEMON_BACKGROUND:
1088	case TASK_APPTYPE_DRIVER:
1089	default:
1090	next.tep_live_donor = `0`;
1091	break;
1092	}
1093
1094	if (requested.trp_terminated) {
1095	/*
1096	* Shoot down the throttles that slow down exit or response to SIGTERM
1097	* We don't need to shoot down:
1098	* passive (don't want to cause others to throttle)
1099	* all_sockets_bg (don't need to iterate FDs on every exit)
1100	* new_sockets_bg (doesn't matter for exiting process)
1101	* pidsuspend (jetsam-ed BG process shouldn't run again)
1102	* watchers_bg (watcher threads don't need to be unthrottled)
1103	* latency_qos (affects userspace timers only)
1104	*/
1105
1106	next.tep_terminated = `1`;
1107	next.tep_darwinbg = `0`;
1108	next.tep_lowpri_cpu = `0`;
1109	next.tep_io_tier = THROTTLE_LEVEL_TIER0;
1110	next.tep_tal_engaged = `0`;
1111	next.tep_role = TASK_UNSPECIFIED;
1112	next.tep_suppressed_cpu = `0`;
1113	}
1114
1115	/*
1116	* Step 3:
1117	* Swap out old policy for new policy
1118	*/
1119
1120	struct task_effective_policy prev = task->effective_policy;
1121
1122	/ This is the point where the new values become visible to other threads /
1123	task->effective_policy = next;
1124
1125	/ Don't do anything further to a half-formed task /
1126	if (in_create) {
1127	return;
1128	}
1129
1130	if (task == kernel_task) {
1131	panic("Attempting to set task policy on kernel_task");
1132	}
1133
1134	/*
1135	* Step 4:
1136	* Pend updates that can't be done while holding the task lock
1137	*/
1138
1139	if (prev.tep_all_sockets_bg != next.tep_all_sockets_bg) {
1140	pend_token->tpt_update_sockets = `1`;
1141	}
1142
1143	/ Only re-scan the timer list if the qos level is getting less strong /
1144	if (prev.tep_latency_qos > next.tep_latency_qos) {
1145	pend_token->tpt_update_timers = `1`;
1146	}
1147
1148	#if CONFIG_TASKWATCH
1149	if (prev.tep_watchers_bg != next.tep_watchers_bg) {
1150	pend_token->tpt_update_watchers = `1`;
1151	}
1152	#endif /* CONFIG_TASKWATCH */
1153
1154	if (prev.tep_live_donor != next.tep_live_donor) {
1155	pend_token->tpt_update_live_donor = `1`;
1156	}
1157
1158	/*
1159	* Step 5:
1160	* Update other subsystems as necessary if something has changed
1161	*/
1162
1163	bool update_threads = false, update_sfi = false, update_termination = false;
1164
1165	/*
1166	* Check for the attributes that thread_policy_update_internal_locked() consults,
1167	* and trigger thread policy re-evaluation.
1168	*/
1169	if (prev.tep_io_tier != next.tep_io_tier \|\|
1170	prev.tep_bg_iotier != next.tep_bg_iotier \|\|
1171	prev.tep_io_passive != next.tep_io_passive \|\|
1172	prev.tep_darwinbg != next.tep_darwinbg \|\|
1173	prev.tep_qos_clamp != next.tep_qos_clamp \|\|
1174	prev.tep_qos_ceiling != next.tep_qos_ceiling \|\|
1175	prev.tep_qos_ui_is_urgent != next.tep_qos_ui_is_urgent \|\|
1176	prev.tep_latency_qos != next.tep_latency_qos \|\|
1177	prev.tep_through_qos != next.tep_through_qos \|\|
1178	prev.tep_lowpri_cpu != next.tep_lowpri_cpu \|\|
1179	prev.tep_new_sockets_bg != next.tep_new_sockets_bg \|\|
1180	prev.tep_terminated != next.tep_terminated \|\|
1181	prev.tep_adaptive_bg != next.tep_adaptive_bg) {
1182	update_threads = true;
1183	}
1184
1185	/*
1186	* Check for the attributes that sfi_thread_classify() consults,
1187	* and trigger SFI re-evaluation.
1188	*/
1189	if (prev.tep_latency_qos != next.tep_latency_qos \|\|
1190	prev.tep_role != next.tep_role \|\|
1191	prev.tep_sfi_managed != next.tep_sfi_managed) {
1192	update_sfi = true;
1193	}
1194
1195	/ Reflect task role transitions into the coalition role counters /
1196	if (prev.tep_role != next.tep_role) {
1197	if (task_policy_update_coalition_focal_tasks(task, prev_role: prev.tep_role, next_role: next.tep_role, pend_token)) {
1198	update_sfi = true;
1199	}
1200	}
1201
1202	if (prev.tep_terminated != next.tep_terminated) {
1203	update_termination = true;
1204	}
1205
1206	bool update_priority = false;
1207
1208	int16_t priority = BASEPRI_DEFAULT;
1209	int16_t max_priority = MAXPRI_USER;
1210
1211	if (next.tep_lowpri_cpu) {
1212	priority = MAXPRI_THROTTLE;
1213	max_priority = MAXPRI_THROTTLE;
1214	} else if (next.tep_suppressed_cpu) {
1215	priority = MAXPRI_SUPPRESSED;
1216	max_priority = MAXPRI_SUPPRESSED;
1217	} else {
1218	switch (next.tep_role) {
1219	case TASK_CONTROL_APPLICATION:
1220	priority = BASEPRI_CONTROL;
1221	break;
1222	case TASK_GRAPHICS_SERVER:
1223	priority = BASEPRI_GRAPHICS;
1224	max_priority = MAXPRI_RESERVED;
1225	break;
1226	default:
1227	break;
1228	}
1229
1230	/ factor in 'nice' value /
1231	priority += task->importance;
1232
1233	if (task->effective_policy.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1234	int16_t qos_clamp_priority = thread_qos_policy_params.qos_pri[task->effective_policy.tep_qos_clamp];
1235
1236	priority = MIN(priority, qos_clamp_priority);
1237	max_priority = MIN(max_priority, qos_clamp_priority);
1238	}
1239
1240	if (priority > max_priority) {
1241	priority = max_priority;
1242	} else if (priority < MINPRI) {
1243	priority = MINPRI;
1244	}
1245	}
1246
1247	assert(priority <= max_priority);
1248
1249	/ avoid extra work if priority isn't changing /
1250	if (priority != task->priority \|\|
1251	max_priority != task->max_priority) {
1252	/ update the scheduling priority for the task /
1253	task->max_priority = max_priority;
1254	task->priority = priority;
1255	update_priority = true;
1256	}
1257
1258	/ Loop over the threads in the task:*
1259	* only once
1260	* only if necessary
1261	* with one thread mutex hold per thread
1262	*/
1263	if (update_threads \|\| update_priority \|\| update_sfi) {
1264	thread_t thread;
1265
1266	queue_iterate(&task->threads, thread, thread_t, task_threads) {
1267	struct task_pend_token thread_pend_token = {};
1268
1269	if (update_sfi) {
1270	thread_pend_token.tpt_update_thread_sfi = `1`;
1271	}
1272
1273	if (update_priority \|\| update_threads) {
1274	/ Check if we need to reevaluate turnstile push /
1275	if (pend_token->tpt_update_turnstile) {
1276	thread_pend_token.tpt_update_turnstile = `1`;
1277	}
1278	thread_policy_update_tasklocked(thread,
1279	priority: task->priority, max_priority: task->max_priority,
1280	pend_token: &thread_pend_token);
1281	}
1282
1283	assert(!thread_pend_token.tpt_update_sockets);
1284
1285	// Slightly risky, as we still hold the task lock...
1286	thread_policy_update_complete_unlocked(task: thread, pend_token: &thread_pend_token);
1287	}
1288	}
1289
1290	/*
1291	* Use the app-nap transitions to influence the
1292	* transition of the process within the jetsam band
1293	* [and optionally its live-donor status]
1294	* On macOS only.
1295	*/
1296	if (appnap_transition) {
1297	if (task->effective_policy.tep_sup_active == `1`) {
1298	memorystatus_update_priority_for_appnap(p: ((proc_t) get_bsdtask_info(task)), TRUE);
1299	} else {
1300	memorystatus_update_priority_for_appnap(p: ((proc_t) get_bsdtask_info(task)), FALSE);
1301	}
1302	}
1303
1304	if (update_termination) {
1305	/*
1306	* This update is done after the terminated bit is set,
1307	* and all updates other than this one will check that bit,
1308	* so we know that it will be the last update. (This path
1309	* skips the check for the terminated bit.)
1310	*/
1311	if (task_set_game_mode_locked(task, false)) {
1312	pend_token->tpt_update_game_mode = `1`;
1313	}
1314	}
1315	}
1316
1317
1318	/*
1319	* Yet another layering violation. We reach out and bang on the coalition directly.
1320	*/
1321	static boolean_t
1322	task_policy_update_coalition_focal_tasks(task_t task,
1323	int prev_role,
1324	int next_role,
1325	task_pend_token_t pend_token)
1326	{
1327	boolean_t sfi_transition = FALSE;
1328	uint32_t new_count = `0`;
1329
1330	/ task moving into/out-of the foreground /
1331	if (prev_role != TASK_FOREGROUND_APPLICATION && next_role == TASK_FOREGROUND_APPLICATION) {
1332	if (task_coalition_adjust_focal_count(task, count: `1`, new_count: &new_count) && (new_count == `1`)) {
1333	sfi_transition = TRUE;
1334	pend_token->tpt_update_tg_ui_flag = TRUE;
1335	}
1336	} else if (prev_role == TASK_FOREGROUND_APPLICATION && next_role != TASK_FOREGROUND_APPLICATION) {
1337	if (task_coalition_adjust_focal_count(task, count: -`1`, new_count: &new_count) && (new_count == `0`)) {
1338	sfi_transition = TRUE;
1339	pend_token->tpt_update_tg_ui_flag = TRUE;
1340	}
1341	}
1342
1343	/ task moving into/out-of background /
1344	if (prev_role != TASK_BACKGROUND_APPLICATION && next_role == TASK_BACKGROUND_APPLICATION) {
1345	if (task_coalition_adjust_nonfocal_count(task, count: `1`, new_count: &new_count) && (new_count == `1`)) {
1346	sfi_transition = TRUE;
1347	}
1348	} else if (prev_role == TASK_BACKGROUND_APPLICATION && next_role != TASK_BACKGROUND_APPLICATION) {
1349	if (task_coalition_adjust_nonfocal_count(task, count: -`1`, new_count: &new_count) && (new_count == `0`)) {
1350	sfi_transition = TRUE;
1351	}
1352	}
1353
1354	if (sfi_transition) {
1355	pend_token->tpt_update_coal_sfi = `1`;
1356	}
1357	return sfi_transition;
1358	}
1359
1360	#if CONFIG_SCHED_SFI
1361
1362	/ coalition object is locked /
1363	static void
1364	task_sfi_reevaluate_cb(coalition_t coal, void *ctx, task_t task)
1365	{
1366	thread_t thread;
1367
1368	/ unused for now /
1369	(void)coal;
1370
1371	/ skip the task we're re-evaluating on behalf of: it's already updated /
1372	if (task == (task_t)ctx) {
1373	return;
1374	}
1375
1376	task_lock(task);
1377
1378	queue_iterate(&task->threads, thread, thread_t, task_threads) {
1379	sfi_reevaluate(thread);
1380	}
1381
1382	task_unlock(task);
1383	}
1384	#endif /* CONFIG_SCHED_SFI */
1385
1386	/*
1387	* Called with task unlocked to do things that can't be done while holding the task lock
1388	*/
1389	void
1390	task_policy_update_complete_unlocked(task_t task, task_pend_token_t pend_token)
1391	{
1392	#ifdef MACH_BSD
1393	if (pend_token->tpt_update_sockets) {
1394	proc_apply_task_networkbg(pid: task_pid(task), THREAD_NULL);
1395	}
1396	#endif /* MACH_BSD */
1397
1398	/ The timer throttle has been removed or reduced, we need to look for expired timers and fire them /
1399	if (pend_token->tpt_update_timers) {
1400	ml_timer_evaluate();
1401	}
1402
1403	#if CONFIG_TASKWATCH
1404	if (pend_token->tpt_update_watchers) {
1405	apply_appstate_watchers(task);
1406	}
1407	#endif /* CONFIG_TASKWATCH */
1408
1409	if (pend_token->tpt_update_live_donor) {
1410	task_importance_update_live_donor(target_task: task);
1411	}
1412
1413	#if CONFIG_SCHED_SFI
1414	/ use the resource coalition for SFI re-evaluation /
1415	if (pend_token->tpt_update_coal_sfi) {
1416	coalition_for_each_task(coal: task->coalition[COALITION_TYPE_RESOURCE],
1417	ctx: (void *)task, callback: task_sfi_reevaluate_cb);
1418	}
1419	#endif /* CONFIG_SCHED_SFI */
1420
1421	#if CONFIG_THREAD_GROUPS
1422	if (pend_token->tpt_update_tg_ui_flag) {
1423	task_coalition_thread_group_focal_update(task);
1424	}
1425	if (pend_token->tpt_update_tg_app_flag) {
1426	task_coalition_thread_group_application_set(task);
1427	}
1428	if (pend_token->tpt_update_game_mode) {
1429	task_coalition_thread_group_game_mode_update(task);
1430	}
1431	#endif /* CONFIG_THREAD_GROUPS */
1432	}
1433
1434	/*
1435	* Initiate a task policy state transition
1436	*
1437	* Everything that modifies requested except functions that need to hold the task lock
1438	* should use this function
1439	*
1440	* Argument validation should be performed before reaching this point.
1441	*
1442	* TODO: Do we need to check task->active?
1443	*/
1444	void
1445	proc_set_task_policy(task_t task,
1446	int category,
1447	int flavor,
1448	int value)
1449	{
1450	struct task_pend_token pend_token = {};
1451
1452	task_lock(task);
1453
1454	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1455	(IMPORTANCE_CODE(flavor, (category \| TASK_POLICY_TASK))) \| DBG_FUNC_START,
1456	task_pid(task), trequested_0(task),
1457	trequested_1(task), value, `0`);
1458
1459	proc_set_task_policy_locked(task, category, flavor, value, value2: `0`);
1460
1461	task_policy_update_locked(task, pend_token: &pend_token);
1462
1463
1464	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1465	(IMPORTANCE_CODE(flavor, (category \| TASK_POLICY_TASK))) \| DBG_FUNC_END,
1466	task_pid(task), trequested_0(task),
1467	trequested_1(task), tpending(&pend_token), `0`);
1468
1469	task_unlock(task);
1470
1471	task_policy_update_complete_unlocked(task, pend_token: &pend_token);
1472	}
1473
1474	/*
1475	* Variant of proc_set_task_policy() that sets two scalars in the requested policy structure.
1476	* Same locking rules apply.
1477	*/
1478	void
1479	proc_set_task_policy2(task_t task,
1480	int category,
1481	int flavor,
1482	int value,
1483	int value2)
1484	{
1485	struct task_pend_token pend_token = {};
1486
1487	task_lock(task);
1488
1489	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1490	(IMPORTANCE_CODE(flavor, (category \| TASK_POLICY_TASK))) \| DBG_FUNC_START,
1491	task_pid(task), trequested_0(task),
1492	trequested_1(task), value, `0`);
1493
1494	proc_set_task_policy_locked(task, category, flavor, value, value2);
1495
1496	task_policy_update_locked(task, pend_token: &pend_token);
1497
1498	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1499	(IMPORTANCE_CODE(flavor, (category \| TASK_POLICY_TASK))) \| DBG_FUNC_END,
1500	task_pid(task), trequested_0(task),
1501	trequested_1(task), tpending(&pend_token), `0`);
1502
1503	task_unlock(task);
1504
1505	task_policy_update_complete_unlocked(task, pend_token: &pend_token);
1506	}
1507
1508	/*
1509	* Set the requested state for a specific flavor to a specific value.
1510	*
1511	* TODO:
1512	* Verify that arguments to non iopol things are 1 or 0
1513	*/
1514	static void
1515	proc_set_task_policy_locked(task_t task,
1516	int category,
1517	int flavor,
1518	int value,
1519	int value2)
1520	{
1521	int tier, passive;
1522
1523	struct task_requested_policy requested = task->requested_policy;
1524
1525	switch (flavor) {
1526	/ Category: EXTERNAL and INTERNAL /
1527
1528	case TASK_POLICY_DARWIN_BG:
1529	if (category == TASK_POLICY_EXTERNAL) {
1530	requested.trp_ext_darwinbg = value;
1531	} else {
1532	requested.trp_int_darwinbg = value;
1533	}
1534	break;
1535
1536	case TASK_POLICY_IOPOL:
1537	proc_iopol_to_tier(iopolicy: value, tier: &tier, passive: &passive);
1538	if (category == TASK_POLICY_EXTERNAL) {
1539	requested.trp_ext_iotier = tier;
1540	requested.trp_ext_iopassive = passive;
1541	} else {
1542	requested.trp_int_iotier = tier;
1543	requested.trp_int_iopassive = passive;
1544	}
1545	break;
1546
1547	case TASK_POLICY_IO:
1548	if (category == TASK_POLICY_EXTERNAL) {
1549	requested.trp_ext_iotier = value;
1550	} else {
1551	requested.trp_int_iotier = value;
1552	}
1553	break;
1554
1555	case TASK_POLICY_PASSIVE_IO:
1556	if (category == TASK_POLICY_EXTERNAL) {
1557	requested.trp_ext_iopassive = value;
1558	} else {
1559	requested.trp_int_iopassive = value;
1560	}
1561	break;
1562
1563	/ Category: INTERNAL /
1564
1565	case TASK_POLICY_DARWIN_BG_IOPOL:
1566	assert(category == TASK_POLICY_INTERNAL);
1567	proc_iopol_to_tier(iopolicy: value, tier: &tier, passive: &passive);
1568	requested.trp_bg_iotier = tier;
1569	break;
1570
1571	/ Category: ATTRIBUTE /
1572
1573	case TASK_POLICY_BOOST:
1574	assert(category == TASK_POLICY_ATTRIBUTE);
1575	requested.trp_boosted = value;
1576	break;
1577
1578	case TASK_POLICY_ROLE:
1579	assert(category == TASK_POLICY_ATTRIBUTE);
1580	requested.trp_role = value;
1581	break;
1582
1583	case TASK_POLICY_TERMINATED:
1584	assert(category == TASK_POLICY_ATTRIBUTE);
1585	requested.trp_terminated = value;
1586	break;
1587
1588	case TASK_BASE_LATENCY_QOS_POLICY:
1589	assert(category == TASK_POLICY_ATTRIBUTE);
1590	requested.trp_base_latency_qos = value;
1591	break;
1592
1593	case TASK_BASE_THROUGHPUT_QOS_POLICY:
1594	assert(category == TASK_POLICY_ATTRIBUTE);
1595	requested.trp_base_through_qos = value;
1596	break;
1597
1598	case TASK_POLICY_SFI_MANAGED:
1599	assert(category == TASK_POLICY_ATTRIBUTE);
1600	requested.trp_sfi_managed = value;
1601	break;
1602
1603	case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1604	assert(category == TASK_POLICY_ATTRIBUTE);
1605	requested.trp_base_latency_qos = value;
1606	requested.trp_base_through_qos = value2;
1607	break;
1608
1609	case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1610	assert(category == TASK_POLICY_ATTRIBUTE);
1611	requested.trp_over_latency_qos = value;
1612	requested.trp_over_through_qos = value2;
1613	break;
1614
1615	default:
1616	panic("unknown task policy: %d %d %d %d", category, flavor, value, value2);
1617	break;
1618	}
1619
1620	task->requested_policy = requested;
1621	}
1622
1623	/*
1624	* Gets what you set. Effective values may be different.
1625	*/
1626	int
1627	proc_get_task_policy(task_t task,
1628	int category,
1629	int flavor)
1630	{
1631	int value = `0`;
1632
1633	task_lock(task);
1634
1635	struct task_requested_policy requested = task->requested_policy;
1636
1637	switch (flavor) {
1638	case TASK_POLICY_DARWIN_BG:
1639	if (category == TASK_POLICY_EXTERNAL) {
1640	value = requested.trp_ext_darwinbg;
1641	} else {
1642	value = requested.trp_int_darwinbg;
1643	}
1644	break;
1645	case TASK_POLICY_IOPOL:
1646	if (category == TASK_POLICY_EXTERNAL) {
1647	value = proc_tier_to_iopol(tier: requested.trp_ext_iotier,
1648	passive: requested.trp_ext_iopassive);
1649	} else {
1650	value = proc_tier_to_iopol(tier: requested.trp_int_iotier,
1651	passive: requested.trp_int_iopassive);
1652	}
1653	break;
1654	case TASK_POLICY_IO:
1655	if (category == TASK_POLICY_EXTERNAL) {
1656	value = requested.trp_ext_iotier;
1657	} else {
1658	value = requested.trp_int_iotier;
1659	}
1660	break;
1661	case TASK_POLICY_PASSIVE_IO:
1662	if (category == TASK_POLICY_EXTERNAL) {
1663	value = requested.trp_ext_iopassive;
1664	} else {
1665	value = requested.trp_int_iopassive;
1666	}
1667	break;
1668	case TASK_POLICY_DARWIN_BG_IOPOL:
1669	assert(category == TASK_POLICY_INTERNAL);
1670	value = proc_tier_to_iopol(tier: requested.trp_bg_iotier, passive: `0`);
1671	break;
1672	case TASK_POLICY_ROLE:
1673	assert(category == TASK_POLICY_ATTRIBUTE);
1674	value = requested.trp_role;
1675	break;
1676	case TASK_POLICY_SFI_MANAGED:
1677	assert(category == TASK_POLICY_ATTRIBUTE);
1678	value = requested.trp_sfi_managed;
1679	break;
1680	default:
1681	panic("unknown policy_flavor %d", flavor);
1682	break;
1683	}
1684
1685	task_unlock(task);
1686
1687	return value;
1688	}
1689
1690	/*
1691	* Variant of proc_get_task_policy() that returns two scalar outputs.
1692	*/
1693	void
1694	proc_get_task_policy2(task_t task,
1695	__assert_only int category,
1696	int flavor,
1697	int *value1,
1698	int *value2)
1699	{
1700	task_lock(task);
1701
1702	struct task_requested_policy requested = task->requested_policy;
1703
1704	switch (flavor) {
1705	case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1706	assert(category == TASK_POLICY_ATTRIBUTE);
1707	*value1 = requested.trp_base_latency_qos;
1708	*value2 = requested.trp_base_through_qos;
1709	break;
1710
1711	case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1712	assert(category == TASK_POLICY_ATTRIBUTE);
1713	*value1 = requested.trp_over_latency_qos;
1714	*value2 = requested.trp_over_through_qos;
1715	break;
1716
1717	default:
1718	panic("unknown policy_flavor %d", flavor);
1719	break;
1720	}
1721
1722	task_unlock(task);
1723	}
1724
1725	/*
1726	* Function for querying effective state for relevant subsystems
1727	* Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
1728	*
1729	* ONLY the relevant subsystem should query this.
1730	* NEVER take a value from the 'effective' function and stuff it into a setter.
1731	*
1732	* NOTE: This accessor does not take the task lock.
1733	* Notifications of state updates need to be externally synchronized with state queries.
1734	* This routine MUST remain interrupt safe, as it is potentially invoked
1735	* within the context of a timer interrupt. It is also called in KDP context for stackshot.
1736	*/
1737	int
1738	proc_get_effective_task_policy(task_t task,
1739	int flavor)
1740	{
1741	int value = `0`;
1742
1743	switch (flavor) {
1744	case TASK_POLICY_DARWIN_BG:
1745	/*
1746	* This backs the KPI call proc_pidbackgrounded to find
1747	* out if a pid is backgrounded.
1748	* It is used to communicate state to the VM system, as well as
1749	* prioritizing requests to the graphics system.
1750	* Returns 1 for background mode, 0 for normal mode
1751	*/
1752	value = task->effective_policy.tep_darwinbg;
1753	break;
1754	case TASK_POLICY_ALL_SOCKETS_BG:
1755	/*
1756	* do_background_socket() calls this to determine what it should do to the proc's sockets
1757	* Returns 1 for background mode, 0 for normal mode
1758	*
1759	* This consults both thread and task so un-DBGing a thread while the task is BG
1760	* doesn't get you out of the network throttle.
1761	*/
1762	value = task->effective_policy.tep_all_sockets_bg;
1763	break;
1764	case TASK_POLICY_SUP_ACTIVE:
1765	/*
1766	* Is the task in AppNap? This is used to determine the urgency
1767	* that's passed to the performance management subsystem for threads
1768	* that are running at a priority <= MAXPRI_THROTTLE.
1769	*/
1770	value = task->effective_policy.tep_sup_active;
1771	break;
1772	case TASK_POLICY_LATENCY_QOS:
1773	/*
1774	* timer arming calls into here to find out the timer coalescing level
1775	* Returns a QoS tier (0-6)
1776	*/
1777	value = task->effective_policy.tep_latency_qos;
1778	break;
1779	case TASK_POLICY_THROUGH_QOS:
1780	/*
1781	* This value is passed into the urgency callout from the scheduler
1782	* to the performance management subsystem.
1783	* Returns a QoS tier (0-6)
1784	*/
1785	value = task->effective_policy.tep_through_qos;
1786	break;
1787	case TASK_POLICY_ROLE:
1788	/*
1789	* This controls various things that ask whether a process is foreground,
1790	* like SFI, VM, access to GPU, etc
1791	*/
1792	value = task->effective_policy.tep_role;
1793	break;
1794	case TASK_POLICY_WATCHERS_BG:
1795	/*
1796	* This controls whether or not a thread watching this process should be BG.
1797	*/
1798	value = task->effective_policy.tep_watchers_bg;
1799	break;
1800	case TASK_POLICY_SFI_MANAGED:
1801	/*
1802	* This controls whether or not a process is targeted for specific control by thermald.
1803	*/
1804	value = task->effective_policy.tep_sfi_managed;
1805	break;
1806	case TASK_POLICY_TERMINATED:
1807	/*
1808	* This controls whether or not a process has its throttling properties shot down for termination.
1809	*/
1810	value = task->effective_policy.tep_terminated;
1811	break;
1812	default:
1813	panic("unknown policy_flavor %d", flavor);
1814	break;
1815	}
1816
1817	return value;
1818	}
1819
1820	/*
1821	* Convert from IOPOL_* values to throttle tiers.
1822	*
1823	* TODO: Can this be made more compact, like an array lookup
1824	* Note that it is possible to support e.g. IOPOL_PASSIVE_STANDARD in the future
1825	*/
1826
1827	void
1828	proc_iopol_to_tier(int iopolicy, int tier, int* *passive)
1829	{
1830	*passive = `0`;
1831	*tier = `0`;
1832	switch (iopolicy) {
1833	case IOPOL_IMPORTANT:
1834	*tier = THROTTLE_LEVEL_TIER0;
1835	break;
1836	case IOPOL_PASSIVE:
1837	*tier = THROTTLE_LEVEL_TIER0;
1838	*passive = `1`;
1839	break;
1840	case IOPOL_STANDARD:
1841	*tier = THROTTLE_LEVEL_TIER1;
1842	break;
1843	case IOPOL_UTILITY:
1844	*tier = THROTTLE_LEVEL_TIER2;
1845	break;
1846	case IOPOL_THROTTLE:
1847	*tier = THROTTLE_LEVEL_TIER3;
1848	break;
1849	default:
1850	panic("unknown I/O policy %d", iopolicy);
1851	break;
1852	}
1853	}
1854
1855	int
1856	proc_tier_to_iopol(int tier, int passive)
1857	{
1858	if (passive == `1`) {
1859	switch (tier) {
1860	case THROTTLE_LEVEL_TIER0:
1861	return IOPOL_PASSIVE;
1862	default:
1863	panic("unknown passive tier %d", tier);
1864	return IOPOL_DEFAULT;
1865	}
1866	} else {
1867	switch (tier) {
1868	case THROTTLE_LEVEL_NONE:
1869	case THROTTLE_LEVEL_TIER0:
1870	return IOPOL_DEFAULT;
1871	case THROTTLE_LEVEL_TIER1:
1872	return IOPOL_STANDARD;
1873	case THROTTLE_LEVEL_TIER2:
1874	return IOPOL_UTILITY;
1875	case THROTTLE_LEVEL_TIER3:
1876	return IOPOL_THROTTLE;
1877	default:
1878	panic("unknown tier %d", tier);
1879	return IOPOL_DEFAULT;
1880	}
1881	}
1882	}
1883
1884	int
1885	proc_darwin_role_to_task_role(int darwin_role, task_role_t* task_role)
1886	{
1887	integer_t role = TASK_UNSPECIFIED;
1888
1889	switch (darwin_role) {
1890	case PRIO_DARWIN_ROLE_DEFAULT:
1891	role = TASK_UNSPECIFIED;
1892	break;
1893	case PRIO_DARWIN_ROLE_UI_FOCAL:
1894	role = TASK_FOREGROUND_APPLICATION;
1895	break;
1896	case PRIO_DARWIN_ROLE_UI:
1897	role = TASK_DEFAULT_APPLICATION;
1898	break;
1899	case PRIO_DARWIN_ROLE_NON_UI:
1900	role = TASK_NONUI_APPLICATION;
1901	break;
1902	case PRIO_DARWIN_ROLE_UI_NON_FOCAL:
1903	role = TASK_BACKGROUND_APPLICATION;
1904	break;
1905	case PRIO_DARWIN_ROLE_TAL_LAUNCH:
1906	role = TASK_THROTTLE_APPLICATION;
1907	break;
1908	case PRIO_DARWIN_ROLE_DARWIN_BG:
1909	role = TASK_DARWINBG_APPLICATION;
1910	break;
1911	default:
1912	return EINVAL;
1913	}
1914
1915	*task_role = role;
1916
1917	return `0`;
1918	}
1919
1920	int
1921	proc_task_role_to_darwin_role(task_role_t task_role)
1922	{
1923	switch (task_role) {
1924	case TASK_FOREGROUND_APPLICATION:
1925	return PRIO_DARWIN_ROLE_UI_FOCAL;
1926	case TASK_BACKGROUND_APPLICATION:
1927	return PRIO_DARWIN_ROLE_UI_NON_FOCAL;
1928	case TASK_NONUI_APPLICATION:
1929	return PRIO_DARWIN_ROLE_NON_UI;
1930	case TASK_DEFAULT_APPLICATION:
1931	return PRIO_DARWIN_ROLE_UI;
1932	case TASK_THROTTLE_APPLICATION:
1933	return PRIO_DARWIN_ROLE_TAL_LAUNCH;
1934	case TASK_DARWINBG_APPLICATION:
1935	return PRIO_DARWIN_ROLE_DARWIN_BG;
1936	case TASK_UNSPECIFIED:
1937	default:
1938	return PRIO_DARWIN_ROLE_DEFAULT;
1939	}
1940	}
1941
1942
1943	/ TODO: remove this variable when interactive daemon audit period is over /
1944	static TUNABLE(bool, ipc_importance_interactive_receiver,
1945	"imp_interactive_receiver", false);
1946
1947	/*
1948	* Called at process exec to initialize the apptype, qos clamp, and qos seed of a process
1949	*
1950	* TODO: Make this function more table-driven instead of ad-hoc
1951	*/
1952	void
1953	proc_set_task_spawnpolicy(task_t task, thread_t thread, int apptype, int qos_clamp, task_role_t role,
1954	ipc_port_t * portwatch_ports, uint32_t portwatch_count)
1955	{
1956	struct task_pend_token pend_token = {};
1957
1958	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1959	(IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) \| DBG_FUNC_START,
1960	task_pid(task), trequested_0(task), trequested_1(task),
1961	apptype, `0`);
1962
1963	if (apptype != TASK_APPTYPE_NONE) {
1964	/*
1965	* Reset the receiver and denap state inherited from the
1966	* task's parent, but only if we are going to reset it via the
1967	* provided apptype.
1968	*/
1969	if (task_is_importance_receiver(task)) {
1970	task_importance_mark_receiver(task, FALSE);
1971	}
1972	if (task_is_importance_denap_receiver(task)) {
1973	task_importance_mark_denap_receiver(task, FALSE);
1974	}
1975	}
1976
1977	switch (apptype) {
1978	case TASK_APPTYPE_APP_DEFAULT:
1979	/ Apps become donors via the 'live-donor' flag instead of the static donor flag /
1980	task_importance_mark_donor(task, FALSE);
1981	task_importance_mark_live_donor(task, TRUE);
1982	// importance_receiver == FALSE
1983	#if defined(XNU_TARGET_OS_OSX)
1984	/ Apps are de-nap recievers on macOS for suppression behaviors /
1985	task_importance_mark_denap_receiver(task, TRUE);
1986	#endif /* !defined(XNU_TARGET_OS_OSX) */
1987	break;
1988
1989	case TASK_APPTYPE_DAEMON_INTERACTIVE:
1990	task_importance_mark_donor(task, TRUE);
1991	task_importance_mark_live_donor(task, FALSE);
1992	// importance_denap_receiver == FALSE
1993
1994	/*
1995	* A boot arg controls whether interactive daemons are importance receivers.
1996	* Normally, they are not. But for testing their behavior as an adaptive
1997	* daemon, the boot-arg can be set.
1998	*
1999	* TODO: remove this when the interactive daemon audit period is over.
2000	*/
2001	task_importance_mark_receiver(task, / FALSE / receiving: ipc_importance_interactive_receiver);
2002	break;
2003
2004	case TASK_APPTYPE_DAEMON_STANDARD:
2005	task_importance_mark_donor(task, TRUE);
2006	task_importance_mark_live_donor(task, FALSE);
2007	// importance_denap_receiver == FALSE
2008	// importance_receiver == FALSE
2009	break;
2010
2011	case TASK_APPTYPE_DAEMON_ADAPTIVE:
2012	task_importance_mark_donor(task, FALSE);
2013	task_importance_mark_live_donor(task, FALSE);
2014	task_importance_mark_receiver(task, TRUE);
2015	// importance_denap_receiver == FALSE
2016	break;
2017
2018	case TASK_APPTYPE_DAEMON_BACKGROUND:
2019	task_importance_mark_donor(task, FALSE);
2020	task_importance_mark_live_donor(task, FALSE);
2021	// importance_denap_receiver == FALSE
2022	// importance_receiver == FALSE
2023	break;
2024
2025	case TASK_APPTYPE_DRIVER:
2026	task_importance_mark_donor(task, FALSE);
2027	task_importance_mark_live_donor(task, FALSE);
2028	// importance_denap_receiver == FALSE
2029	// importance_receiver == FALSE
2030	break;
2031
2032	case TASK_APPTYPE_NONE:
2033	break;
2034	}
2035
2036	if (portwatch_ports != NULL && apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
2037	int portwatch_boosts = `0`;
2038
2039	for (uint32_t i = `0`; i < portwatch_count; i++) {
2040	ipc_port_t port = NULL;
2041
2042	if (IP_VALID(port = portwatch_ports[i])) {
2043	int boost = `0`;
2044	task_add_importance_watchport(task, port, boostp: &boost);
2045	portwatch_boosts += boost;
2046	}
2047	}
2048
2049	if (portwatch_boosts > `0`) {
2050	task_importance_hold_internal_assertion(target_task: task, count: portwatch_boosts);
2051	}
2052	}
2053
2054	/ Redirect the turnstile push of watchports to task /
2055	if (portwatch_count && portwatch_ports != NULL) {
2056	task_add_turnstile_watchports(task, thread, portwatch_ports, portwatch_count);
2057	}
2058
2059	task_lock(task);
2060
2061	if (apptype != TASK_APPTYPE_NONE) {
2062	task_set_requested_apptype(task, apptype, false);
2063	if (task_is_app(task)) {
2064	pend_token.tpt_update_tg_app_flag = `1`;
2065	}
2066	}
2067
2068	#if !defined(XNU_TARGET_OS_OSX)
2069	/ Remove this after launchd starts setting it properly /
2070	if (apptype == TASK_APPTYPE_APP_DEFAULT && role == TASK_UNSPECIFIED) {
2071	task->requested_policy.trp_role = TASK_FOREGROUND_APPLICATION;
2072	} else
2073	#endif
2074	if (role != TASK_UNSPECIFIED) {
2075	task->requested_policy.trp_role = (uint32_t)role;
2076	}
2077
2078	if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2079	task->requested_policy.trp_qos_clamp = qos_clamp;
2080	}
2081
2082	task_policy_update_locked(task, pend_token: &pend_token);
2083
2084	task_unlock(task);
2085
2086	/ Ensure the donor bit is updated to be in sync with the new live donor status /
2087	pend_token.tpt_update_live_donor = `1`;
2088
2089	task_policy_update_complete_unlocked(task, pend_token: &pend_token);
2090
2091	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2092	(IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) \| DBG_FUNC_END,
2093	task_pid(task), trequested_0(task), trequested_1(task),
2094	task_is_importance_receiver(task), `0`);
2095	}
2096
2097	/*
2098	* Inherit task role across exec
2099	*/
2100	void
2101	proc_inherit_task_role(task_t new_task,
2102	task_t old_task)
2103	{
2104	int role;
2105
2106	/ inherit the role from old task to new task /
2107	role = proc_get_task_policy(task: old_task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE);
2108	proc_set_task_policy(task: new_task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, value: role);
2109	}
2110
2111	extern void * XNU_PTRAUTH_SIGNED_PTR("initproc") initproc;
2112
2113	/*
2114	* Compute the default main thread qos for a task
2115	*/
2116	thread_qos_t
2117	task_compute_main_thread_qos(task_t task)
2118	{
2119	thread_qos_t primordial_qos = THREAD_QOS_UNSPECIFIED;
2120
2121	thread_qos_t qos_clamp = task->requested_policy.trp_qos_clamp;
2122
2123	switch (task->requested_policy.trp_apptype) {
2124	case TASK_APPTYPE_APP_TAL:
2125	case TASK_APPTYPE_APP_DEFAULT:
2126	primordial_qos = THREAD_QOS_USER_INTERACTIVE;
2127	break;
2128
2129	case TASK_APPTYPE_DAEMON_INTERACTIVE:
2130	case TASK_APPTYPE_DAEMON_STANDARD:
2131	case TASK_APPTYPE_DAEMON_ADAPTIVE:
2132	case TASK_APPTYPE_DRIVER:
2133	primordial_qos = THREAD_QOS_LEGACY;
2134	break;
2135
2136	case TASK_APPTYPE_DAEMON_BACKGROUND:
2137	primordial_qos = THREAD_QOS_BACKGROUND;
2138	break;
2139	}
2140
2141	if (get_bsdtask_info(task) == initproc) {
2142	/ PID 1 gets a special case /
2143	primordial_qos = MAX(primordial_qos, THREAD_QOS_USER_INITIATED);
2144	}
2145
2146	if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2147	if (primordial_qos != THREAD_QOS_UNSPECIFIED) {
2148	primordial_qos = MIN(qos_clamp, primordial_qos);
2149	} else {
2150	primordial_qos = qos_clamp;
2151	}
2152	}
2153
2154	return primordial_qos;
2155	}
2156
2157
2158	/ for process_policy to check before attempting to set /
2159	boolean_t
2160	proc_task_is_tal(task_t task)
2161	{
2162	return (task->requested_policy.trp_apptype == TASK_APPTYPE_APP_TAL) ? TRUE : FALSE;
2163	}
2164
2165	int
2166	task_get_apptype(task_t task)
2167	{
2168	return task->requested_policy.trp_apptype;
2169	}
2170
2171	boolean_t
2172	task_is_daemon(task_t task)
2173	{
2174	switch (task->requested_policy.trp_apptype) {
2175	case TASK_APPTYPE_DAEMON_INTERACTIVE:
2176	case TASK_APPTYPE_DAEMON_STANDARD:
2177	case TASK_APPTYPE_DAEMON_ADAPTIVE:
2178	case TASK_APPTYPE_DAEMON_BACKGROUND:
2179	return TRUE;
2180	default:
2181	return FALSE;
2182	}
2183	}
2184
2185	bool
2186	task_is_driver(task_t task)
2187	{
2188	if (!task) {
2189	return FALSE;
2190	}
2191	return task->requested_policy.trp_apptype == TASK_APPTYPE_DRIVER;
2192	}
2193
2194	boolean_t
2195	task_is_app(task_t task)
2196	{
2197	switch (task->requested_policy.trp_apptype) {
2198	case TASK_APPTYPE_APP_DEFAULT:
2199	case TASK_APPTYPE_APP_TAL:
2200	return TRUE;
2201	default:
2202	return FALSE;
2203	}
2204	}
2205
2206
2207	/ for telemetry /
2208	integer_t
2209	task_grab_latency_qos(task_t task)
2210	{
2211	return qos_latency_policy_package(qv: proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS));
2212	}
2213
2214	/ update the darwin background action state in the flags field for libproc /
2215	int
2216	proc_get_darwinbgstate(task_t task, uint32_t * flagsp)
2217	{
2218	if (task->requested_policy.trp_ext_darwinbg) {
2219	*flagsp \|= PROC_FLAG_EXT_DARWINBG;
2220	}
2221
2222	if (task->requested_policy.trp_int_darwinbg) {
2223	*flagsp \|= PROC_FLAG_DARWINBG;
2224	}
2225
2226	#if !defined(XNU_TARGET_OS_OSX)
2227	if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_BACKGROUND) {
2228	*flagsp \|= PROC_FLAG_IOS_APPLEDAEMON;
2229	}
2230
2231	if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
2232	*flagsp \|= PROC_FLAG_IOS_IMPPROMOTION;
2233	}
2234	#endif /* !defined(XNU_TARGET_OS_OSX) */
2235
2236	if (task->requested_policy.trp_apptype == TASK_APPTYPE_APP_DEFAULT \|\|
2237	task->requested_policy.trp_apptype == TASK_APPTYPE_APP_TAL) {
2238	*flagsp \|= PROC_FLAG_APPLICATION;
2239	}
2240
2241	if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
2242	*flagsp \|= PROC_FLAG_ADAPTIVE;
2243	}
2244
2245	if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE &&
2246	task->requested_policy.trp_boosted == `1`) {
2247	*flagsp \|= PROC_FLAG_ADAPTIVE_IMPORTANT;
2248	}
2249
2250	if (task_is_importance_donor(task)) {
2251	*flagsp \|= PROC_FLAG_IMPORTANCE_DONOR;
2252	}
2253
2254	if (task->effective_policy.tep_sup_active) {
2255	*flagsp \|= PROC_FLAG_SUPPRESSED;
2256	}
2257
2258	return `0`;
2259	}
2260
2261	/*
2262	* Tracepoint data... Reading the tracepoint data can be somewhat complicated.
2263	* The current scheme packs as much data into a single tracepoint as it can.
2264	*
2265	* Each task/thread requested/effective structure is 64 bits in size. Any
2266	* given tracepoint will emit either requested or effective data, but not both.
2267	*
2268	* A tracepoint may emit any of task, thread, or task & thread data.
2269	*
2270	* The type of data emitted varies with pointer size. Where possible, both
2271	* task and thread data are emitted. In LP32 systems, the first and second
2272	* halves of either the task or thread data is emitted.
2273	*
2274	* The code uses uintptr_t array indexes instead of high/low to avoid
2275	* confusion WRT big vs little endian.
2276	*
2277	* The truth table for the tracepoint data functions is below, and has the
2278	* following invariants:
2279	*
2280	* 1) task and thread are uintptr_t*
2281	* 2) task may never be NULL
2282	*
2283	*
2284	* LP32 LP64
2285	* trequested_0(task, NULL) task[0] task[0]
2286	* trequested_1(task, NULL) task[1] NULL
2287	* trequested_0(task, thread) thread[0] task[0]
2288	* trequested_1(task, thread) thread[1] thread[0]
2289	*
2290	* Basically, you get a full task or thread on LP32, and both on LP64.
2291	*
2292	* The uintptr_t munging here is squicky enough to deserve a comment.
2293	*
2294	* The variables we are accessing are laid out in memory like this:
2295	*
2296	* [ LP64 uintptr_t 0 ]
2297	* [ LP32 uintptr_t 0 ] [ LP32 uintptr_t 1 ]
2298	*
2299	* 1 2 3 4 5 6 7 8
2300	*
2301	*/
2302
2303	static uintptr_t
2304	trequested_0(task_t task)
2305	{
2306	static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated");
2307
2308	uintptr_t* raw = (uintptr_t*)&task->requested_policy;
2309
2310	return raw[`0`];
2311	}
2312
2313	static uintptr_t
2314	trequested_1(task_t task)
2315	{
2316	#if defined __LP64__
2317	(void)task;
2318	return `0`;
2319	#else
2320	uintptr_t* raw = (uintptr_t*)(&task->requested_policy);
2321	return raw[`1`];
2322	#endif
2323	}
2324
2325	static uintptr_t
2326	teffective_0(task_t task)
2327	{
2328	uintptr_t* raw = (uintptr_t*)&task->effective_policy;
2329
2330	return raw[`0`];
2331	}
2332
2333	static uintptr_t
2334	teffective_1(task_t task)
2335	{
2336	#if defined __LP64__
2337	(void)task;
2338	return `0`;
2339	#else
2340	uintptr_t* raw = (uintptr_t*)(&task->effective_policy);
2341	return raw[`1`];
2342	#endif
2343	}
2344
2345	/ dump pending for tracepoint /
2346	uint32_t
2347	tpending(task_pend_token_t pend_token)
2348	{
2349	return (uint32_t)(void*)(pend_token);
2350	}
2351
2352	uint64_t
2353	task_requested_bitfield(task_t task)
2354	{
2355	uint64_t bits = `0`;
2356	struct task_requested_policy requested = task->requested_policy;
2357
2358	bits \|= (requested.trp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : `0`);
2359	bits \|= (requested.trp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : `0`);
2360	bits \|= (requested.trp_int_iotier ? (((uint64_t)requested.trp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : `0`);
2361	bits \|= (requested.trp_ext_iotier ? (((uint64_t)requested.trp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : `0`);
2362	bits \|= (requested.trp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : `0`);
2363	bits \|= (requested.trp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : `0`);
2364	bits \|= (requested.trp_bg_iotier ? (((uint64_t)requested.trp_bg_iotier) << POLICY_REQ_BG_IOTIER_SHIFT) : `0`);
2365	bits \|= (requested.trp_terminated ? POLICY_REQ_TERMINATED : `0`);
2366
2367	bits \|= (requested.trp_boosted ? POLICY_REQ_BOOSTED : `0`);
2368	bits \|= (requested.trp_tal_enabled ? POLICY_REQ_TAL_ENABLED : `0`);
2369	bits \|= (requested.trp_apptype ? (((uint64_t)requested.trp_apptype) << POLICY_REQ_APPTYPE_SHIFT) : `0`);
2370	bits \|= (requested.trp_role ? (((uint64_t)requested.trp_role) << POLICY_REQ_ROLE_SHIFT) : `0`);
2371
2372	bits \|= (requested.trp_sup_active ? POLICY_REQ_SUP_ACTIVE : `0`);
2373	bits \|= (requested.trp_sup_lowpri_cpu ? POLICY_REQ_SUP_LOWPRI_CPU : `0`);
2374	bits \|= (requested.trp_sup_cpu ? POLICY_REQ_SUP_CPU : `0`);
2375	bits \|= (requested.trp_sup_timer ? (((uint64_t)requested.trp_sup_timer) << POLICY_REQ_SUP_TIMER_THROTTLE_SHIFT) : `0`);
2376	bits \|= (requested.trp_sup_throughput ? (((uint64_t)requested.trp_sup_throughput) << POLICY_REQ_SUP_THROUGHPUT_SHIFT) : `0`);
2377	bits \|= (requested.trp_sup_disk ? POLICY_REQ_SUP_DISK_THROTTLE : `0`);
2378	bits \|= (requested.trp_sup_bg_sockets ? POLICY_REQ_SUP_BG_SOCKETS : `0`);
2379
2380	bits \|= (requested.trp_base_latency_qos ? (((uint64_t)requested.trp_base_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : `0`);
2381	bits \|= (requested.trp_over_latency_qos ? (((uint64_t)requested.trp_over_latency_qos) << POLICY_REQ_OVER_LATENCY_QOS_SHIFT) : `0`);
2382	bits \|= (requested.trp_base_through_qos ? (((uint64_t)requested.trp_base_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : `0`);
2383	bits \|= (requested.trp_over_through_qos ? (((uint64_t)requested.trp_over_through_qos) << POLICY_REQ_OVER_THROUGH_QOS_SHIFT) : `0`);
2384	bits \|= (requested.trp_sfi_managed ? POLICY_REQ_SFI_MANAGED : `0`);
2385	bits \|= (requested.trp_qos_clamp ? (((uint64_t)requested.trp_qos_clamp) << POLICY_REQ_QOS_CLAMP_SHIFT) : `0`);
2386
2387	return bits;
2388	}
2389
2390	uint64_t
2391	task_effective_bitfield(task_t task)
2392	{
2393	uint64_t bits = `0`;
2394	struct task_effective_policy effective = task->effective_policy;
2395
2396	bits \|= (effective.tep_io_tier ? (((uint64_t)effective.tep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : `0`);
2397	bits \|= (effective.tep_io_passive ? POLICY_EFF_IO_PASSIVE : `0`);
2398	bits \|= (effective.tep_darwinbg ? POLICY_EFF_DARWIN_BG : `0`);
2399	bits \|= (effective.tep_lowpri_cpu ? POLICY_EFF_LOWPRI_CPU : `0`);
2400	bits \|= (effective.tep_terminated ? POLICY_EFF_TERMINATED : `0`);
2401	bits \|= (effective.tep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : `0`);
2402	bits \|= (effective.tep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : `0`);
2403	bits \|= (effective.tep_bg_iotier ? (((uint64_t)effective.tep_bg_iotier) << POLICY_EFF_BG_IOTIER_SHIFT) : `0`);
2404	bits \|= (effective.tep_qos_ui_is_urgent ? POLICY_EFF_QOS_UI_IS_URGENT : `0`);
2405
2406	bits \|= (effective.tep_tal_engaged ? POLICY_EFF_TAL_ENGAGED : `0`);
2407	bits \|= (effective.tep_watchers_bg ? POLICY_EFF_WATCHERS_BG : `0`);
2408	bits \|= (effective.tep_sup_active ? POLICY_EFF_SUP_ACTIVE : `0`);
2409	bits \|= (effective.tep_suppressed_cpu ? POLICY_EFF_SUP_CPU : `0`);
2410	bits \|= (effective.tep_role ? (((uint64_t)effective.tep_role) << POLICY_EFF_ROLE_SHIFT) : `0`);
2411	bits \|= (effective.tep_latency_qos ? (((uint64_t)effective.tep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : `0`);
2412	bits \|= (effective.tep_through_qos ? (((uint64_t)effective.tep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : `0`);
2413	bits \|= (effective.tep_sfi_managed ? POLICY_EFF_SFI_MANAGED : `0`);
2414	bits \|= (effective.tep_qos_ceiling ? (((uint64_t)effective.tep_qos_ceiling) << POLICY_EFF_QOS_CEILING_SHIFT) : `0`);
2415
2416	return bits;
2417	}
2418
2419
2420	/*
2421	* Resource usage and CPU related routines
2422	*/
2423
2424	int
2425	proc_get_task_ruse_cpu(task_t task, uint32_t policyp, uint8_t percentagep, uint64_t intervalp, uint64_t deadlinep)
2426	{
2427	int error = `0`;
2428	int scope;
2429
2430	task_lock(task);
2431
2432
2433	error = task_get_cpuusage(task, percentagep, intervalp, deadlinep, scope: &scope);
2434	task_unlock(task);
2435
2436	/*
2437	* Reverse-map from CPU resource limit scopes back to policies (see comment below).
2438	*/
2439	if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
2440	*policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC;
2441	} else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
2442	*policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE;
2443	} else if (scope == TASK_RUSECPU_FLAGS_DEADLINE) {
2444	*policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
2445	}
2446
2447	return error;
2448	}
2449
2450	/*
2451	* Configure the default CPU usage monitor parameters.
2452	*
2453	* For tasks which have this mechanism activated: if any thread in the
2454	* process consumes more CPU than this, an EXC_RESOURCE exception will be generated.
2455	*/
2456	void
2457	proc_init_cpumon_params(void)
2458	{
2459	/*
2460	* The max CPU percentage can be configured via the boot-args and
2461	* a key in the device tree. The boot-args are honored first, then the
2462	* device tree.
2463	*/
2464	if (!PE_parse_boot_argn(arg_string: "max_cpumon_percentage", arg_ptr: &proc_max_cpumon_percentage,
2465	max_arg: sizeof(proc_max_cpumon_percentage))) {
2466	uint64_t max_percentage = `0ULL`;
2467
2468	if (!PE_get_default(property_name: "kern.max_cpumon_percentage", property_ptr: &max_percentage,
2469	max_property: sizeof(max_percentage))) {
2470	max_percentage = DEFAULT_CPUMON_PERCENTAGE;
2471	}
2472
2473	assert(max_percentage <= UINT8_MAX);
2474	proc_max_cpumon_percentage = (uint8_t) max_percentage;
2475	}
2476
2477	if (proc_max_cpumon_percentage > `100`) {
2478	proc_max_cpumon_percentage = `100`;
2479	}
2480
2481	/*
2482	* The interval should be specified in seconds.
2483	*
2484	* Like the max CPU percentage, the max CPU interval can be configured
2485	* via boot-args and the device tree.
2486	*/
2487	if (!PE_parse_boot_argn(arg_string: "max_cpumon_interval", arg_ptr: &proc_max_cpumon_interval,
2488	max_arg: sizeof(proc_max_cpumon_interval))) {
2489	if (!PE_get_default(property_name: "kern.max_cpumon_interval", property_ptr: &proc_max_cpumon_interval,
2490	max_property: sizeof(proc_max_cpumon_interval))) {
2491	proc_max_cpumon_interval = DEFAULT_CPUMON_INTERVAL;
2492	}
2493	}
2494
2495	proc_max_cpumon_interval *= NSEC_PER_SEC;
2496
2497	/ TEMPORARY boot arg to control App suppression /
2498	PE_parse_boot_argn(arg_string: "task_policy_suppression_flags",
2499	arg_ptr: &task_policy_suppression_flags,
2500	max_arg: sizeof(task_policy_suppression_flags));
2501
2502	/ adjust suppression disk policy if called for in boot arg /
2503	if (task_policy_suppression_flags & TASK_POLICY_SUPPRESSION_IOTIER2) {
2504	proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER2;
2505	}
2506	}
2507
2508	/*
2509	* Currently supported configurations for CPU limits.
2510	*
2511	* Policy \| Deadline-based CPU limit \| Percentage-based CPU limit
2512	* -------------------------------------+--------------------------+------------------------------
2513	* PROC_POLICY_RSRCACT_THROTTLE \| ENOTSUP \| Task-wide scope only
2514	* PROC_POLICY_RSRCACT_SUSPEND \| Task-wide scope only \| ENOTSUP
2515	* PROC_POLICY_RSRCACT_TERMINATE \| Task-wide scope only \| ENOTSUP
2516	* PROC_POLICY_RSRCACT_NOTIFY_KQ \| Task-wide scope only \| ENOTSUP
2517	* PROC_POLICY_RSRCACT_NOTIFY_EXC \| ENOTSUP \| Per-thread scope only
2518	*
2519	* A deadline-based CPU limit is actually a simple wallclock timer - the requested action is performed
2520	* after the specified amount of wallclock time has elapsed.
2521	*
2522	* A percentage-based CPU limit performs the requested action after the specified amount of actual CPU time
2523	* has been consumed -- regardless of how much wallclock time has elapsed -- by either the task as an
2524	* aggregate entity (so-called "Task-wide" or "Proc-wide" scope, whereby the CPU time consumed by all threads
2525	* in the task are added together), or by any one thread in the task (so-called "per-thread" scope).
2526	*
2527	* We support either deadline != 0 OR percentage != 0, but not both. The original intention in having them
2528	* share an API was to use actual CPU time as the basis of the deadline-based limit (as in: perform an action
2529	* after I have used some amount of CPU time; this is different than the recurring percentage/interval model)
2530	* but the potential consumer of the API at the time was insisting on wallclock time instead.
2531	*
2532	* Currently, requesting notification via an exception is the only way to get per-thread scope for a
2533	* CPU limit. All other types of notifications force task-wide scope for the limit.
2534	*/
2535	int
2536	proc_set_task_ruse_cpu(task_t task, uint16_t policy, uint8_t percentage, uint64_t interval, uint64_t deadline,
2537	int cpumon_entitled)
2538	{
2539	int error = `0`;
2540	int scope;
2541
2542	/*
2543	* Enforce the matrix of supported configurations for policy, percentage, and deadline.
2544	*/
2545	switch (policy) {
2546	// If no policy is explicitly given, the default is to throttle.
2547	case TASK_POLICY_RESOURCE_ATTRIBUTE_NONE:
2548	case TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE:
2549	if (deadline != `0`) {
2550	return ENOTSUP;
2551	}
2552	scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
2553	break;
2554	case TASK_POLICY_RESOURCE_ATTRIBUTE_SUSPEND:
2555	case TASK_POLICY_RESOURCE_ATTRIBUTE_TERMINATE:
2556	case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_KQ:
2557	if (percentage != `0`) {
2558	return ENOTSUP;
2559	}
2560	scope = TASK_RUSECPU_FLAGS_DEADLINE;
2561	break;
2562	case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC:
2563	if (deadline != `0`) {
2564	return ENOTSUP;
2565	}
2566	scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
2567	#ifdef CONFIG_NOMONITORS
2568	return error;
2569	#endif /* CONFIG_NOMONITORS */
2570	break;
2571	default:
2572	return EINVAL;
2573	}
2574
2575	task_lock(task);
2576	if (task != current_task()) {
2577	task->policy_ru_cpu_ext = policy;
2578	} else {
2579	task->policy_ru_cpu = policy;
2580	}
2581	error = task_set_cpuusage(task, percentage, interval, deadline, scope, entitled: cpumon_entitled);
2582	task_unlock(task);
2583	return error;
2584	}
2585
2586	/ TODO: get rid of these /
2587	#define TASK_POLICY_CPU_RESOURCE_USAGE 0
2588	#define TASK_POLICY_WIREDMEM_RESOURCE_USAGE 1
2589	#define TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE 2
2590	#define TASK_POLICY_DISK_RESOURCE_USAGE 3
2591	#define TASK_POLICY_NETWORK_RESOURCE_USAGE 4
2592	#define TASK_POLICY_POWER_RESOURCE_USAGE 5
2593
2594	#define TASK_POLICY_RESOURCE_USAGE_COUNT 6
2595
2596	int
2597	proc_clear_task_ruse_cpu(task_t task, int cpumon_entitled)
2598	{
2599	int error = `0`;
2600	int action;
2601	void * bsdinfo = NULL;
2602
2603	task_lock(task);
2604	if (task != current_task()) {
2605	task->policy_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
2606	} else {
2607	task->policy_ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
2608	}
2609
2610	error = task_clear_cpuusage_locked(task, cpumon_entitled);
2611	if (error != `0`) {
2612	goto out;
2613	}
2614
2615	action = task->applied_ru_cpu;
2616	if (task->applied_ru_cpu_ext != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2617	/ reset action /
2618	task->applied_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
2619	}
2620	if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2621	bsdinfo = get_bsdtask_info(task);
2622	task_unlock(task);
2623	proc_restore_resource_actions(p: bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
2624	goto out1;
2625	}
2626
2627	out:
2628	task_unlock(task);
2629	out1:
2630	return error;
2631	}
2632
2633	/ used to apply resource limit related actions /
2634	static int
2635	task_apply_resource_actions(task_t task, int type)
2636	{
2637	int action = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
2638	void * bsdinfo = NULL;
2639
2640	switch (type) {
2641	case TASK_POLICY_CPU_RESOURCE_USAGE:
2642	break;
2643	case TASK_POLICY_WIREDMEM_RESOURCE_USAGE:
2644	case TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE:
2645	case TASK_POLICY_DISK_RESOURCE_USAGE:
2646	case TASK_POLICY_NETWORK_RESOURCE_USAGE:
2647	case TASK_POLICY_POWER_RESOURCE_USAGE:
2648	return `0`;
2649
2650	default:
2651	return `1`;
2652	}
2653	;
2654
2655	/ only cpu actions for now /
2656	task_lock(task);
2657
2658	if (task->applied_ru_cpu_ext == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2659	/ apply action /
2660	task->applied_ru_cpu_ext = task->policy_ru_cpu_ext;
2661	action = task->applied_ru_cpu_ext;
2662	} else {
2663	action = task->applied_ru_cpu_ext;
2664	}
2665
2666	if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
2667	bsdinfo = get_bsdtask_info(task);
2668	task_unlock(task);
2669	proc_apply_resource_actions(p: bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
2670	} else {
2671	task_unlock(task);
2672	}
2673
2674	return `0`;
2675	}
2676
2677	/*
2678	* XXX This API is somewhat broken; we support multiple simultaneous CPU limits, but the get/set API
2679	* only allows for one at a time. This means that if there is a per-thread limit active, the other
2680	* "scopes" will not be accessible via this API. We could change it to pass in the scope of interest
2681	* to the caller, and prefer that, but there's no need for that at the moment.
2682	*/
2683	static int
2684	task_get_cpuusage(task_t task, uint8_t percentagep, uint64_t intervalp, uint64_t deadlinep, int* *scope)
2685	{
2686	*percentagep = `0`;
2687	*intervalp = `0`;
2688	*deadlinep = `0`;
2689
2690	if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) != `0`) {
2691	*scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
2692	*percentagep = task->rusage_cpu_perthr_percentage;
2693	*intervalp = task->rusage_cpu_perthr_interval;
2694	} else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) != `0`) {
2695	*scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
2696	*percentagep = task->rusage_cpu_percentage;
2697	*intervalp = task->rusage_cpu_interval;
2698	} else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) != `0`) {
2699	*scope = TASK_RUSECPU_FLAGS_DEADLINE;
2700	*deadlinep = task->rusage_cpu_deadline;
2701	} else {
2702	*scope = `0`;
2703	}
2704
2705	return `0`;
2706	}
2707
2708	/*
2709	* Suspend the CPU usage monitor for the task. Return value indicates
2710	* if the mechanism was actually enabled.
2711	*/
2712	int
2713	task_suspend_cpumon(task_t task)
2714	{
2715	thread_t thread;
2716
2717	task_lock_assert_owned(task);
2718
2719	if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == `0`) {
2720	return KERN_INVALID_ARGUMENT;
2721	}
2722
2723	#if CONFIG_TELEMETRY
2724	/*
2725	* Disable task-wide telemetry if it was ever enabled by the CPU usage
2726	* monitor's warning zone.
2727	*/
2728	telemetry_task_ctl_locked(task, TF_CPUMON_WARNING, enable_disable: `0`);
2729	#endif
2730
2731	/*
2732	* Suspend monitoring for the task, and propagate that change to each thread.
2733	*/
2734	task->rusage_cpu_flags &= ~(TASK_RUSECPU_FLAGS_PERTHR_LIMIT \| TASK_RUSECPU_FLAGS_FATAL_CPUMON);
2735	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2736	act_set_astledger(thread);
2737	}
2738
2739	return KERN_SUCCESS;
2740	}
2741
2742	/*
2743	* Remove all traces of the CPU monitor.
2744	*/
2745	int
2746	task_disable_cpumon(task_t task)
2747	{
2748	int kret;
2749
2750	task_lock_assert_owned(task);
2751
2752	kret = task_suspend_cpumon(task);
2753	if (kret) {
2754	return kret;
2755	}
2756
2757	/ Once we clear these values, the monitor can't be resumed /
2758	task->rusage_cpu_perthr_percentage = `0`;
2759	task->rusage_cpu_perthr_interval = `0`;
2760
2761	return KERN_SUCCESS;
2762	}
2763
2764
2765	static int
2766	task_enable_cpumon_locked(task_t task)
2767	{
2768	thread_t thread;
2769	task_lock_assert_owned(task);
2770
2771	if (task->rusage_cpu_perthr_percentage == `0` \|\|
2772	task->rusage_cpu_perthr_interval == `0`) {
2773	return KERN_INVALID_ARGUMENT;
2774	}
2775
2776	task->rusage_cpu_flags \|= TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
2777	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2778	act_set_astledger(thread);
2779	}
2780
2781	return KERN_SUCCESS;
2782	}
2783
2784	int
2785	task_resume_cpumon(task_t task)
2786	{
2787	kern_return_t kret;
2788
2789	if (!task) {
2790	return EINVAL;
2791	}
2792
2793	task_lock(task);
2794	kret = task_enable_cpumon_locked(task);
2795	task_unlock(task);
2796
2797	return kret;
2798	}
2799
2800
2801	/ duplicate values from bsd/sys/process_policy.h /
2802	#define PROC_POLICY_CPUMON_DISABLE 0xFF
2803	#define PROC_POLICY_CPUMON_DEFAULTS 0xFE
2804
2805	static int
2806	task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int cpumon_entitled)
2807	{
2808	uint64_t abstime = `0`;
2809	uint64_t limittime = `0`;
2810
2811	lck_mtx_assert(lck: &task->lock, LCK_MTX_ASSERT_OWNED);
2812
2813	/ By default, refill once per second /
2814	if (interval == `0`) {
2815	interval = NSEC_PER_SEC;
2816	}
2817
2818	if (percentage != `0`) {
2819	if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
2820	boolean_t warn = FALSE;
2821
2822	/*
2823	* A per-thread CPU limit on a task generates an exception
2824	* (LEDGER_ACTION_EXCEPTION) if any one thread in the task
2825	* exceeds the limit.
2826	*/
2827
2828	if (percentage == PROC_POLICY_CPUMON_DISABLE) {
2829	if (cpumon_entitled) {
2830	/ 25095698 - task_disable_cpumon() should be reliable /
2831	task_disable_cpumon(task);
2832	return `0`;
2833	}
2834
2835	/*
2836	* This task wishes to disable the CPU usage monitor, but it's
2837	* missing the required entitlement:
2838	* com.apple.private.kernel.override-cpumon
2839	*
2840	* Instead, treat this as a request to reset its params
2841	* back to the defaults.
2842	*/
2843	warn = TRUE;
2844	percentage = PROC_POLICY_CPUMON_DEFAULTS;
2845	}
2846
2847	if (percentage == PROC_POLICY_CPUMON_DEFAULTS) {
2848	percentage = proc_max_cpumon_percentage;
2849	interval = proc_max_cpumon_interval;
2850	}
2851
2852	if (percentage > `100`) {
2853	percentage = `100`;
2854	}
2855
2856	/*
2857	* Passing in an interval of -1 means either:
2858	* - Leave the interval as-is, if there's already a per-thread
2859	* limit configured
2860	* - Use the system default.
2861	*/
2862	if (interval == -`1ULL`) {
2863	if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
2864	interval = task->rusage_cpu_perthr_interval;
2865	} else {
2866	interval = proc_max_cpumon_interval;
2867	}
2868	}
2869
2870	/*
2871	* Enforce global caps on CPU usage monitor here if the process is not
2872	* entitled to escape the global caps.
2873	*/
2874	if ((percentage > proc_max_cpumon_percentage) && (cpumon_entitled == `0`)) {
2875	warn = TRUE;
2876	percentage = proc_max_cpumon_percentage;
2877	}
2878
2879	if ((interval > proc_max_cpumon_interval) && (cpumon_entitled == `0`)) {
2880	warn = TRUE;
2881	interval = proc_max_cpumon_interval;
2882	}
2883
2884	if (warn) {
2885	int pid = `0`;
2886	const char *procname = "unknown";
2887
2888	#ifdef MACH_BSD
2889	pid = proc_selfpid();
2890	void *cur_bsd_info = get_bsdtask_info(current_task());
2891	if (cur_bsd_info != NULL) {
2892	procname = proc_name_address(p: cur_bsd_info);
2893	}
2894	#endif
2895
2896	printf(format: "process %s[%d] denied attempt to escape CPU monitor"
2897	" (missing required entitlement).\n", procname, pid);
2898	}
2899
2900	/ configure the limit values /
2901	task->rusage_cpu_perthr_percentage = percentage;
2902	task->rusage_cpu_perthr_interval = interval;
2903
2904	/ and enable the CPU monitor /
2905	(void)task_enable_cpumon_locked(task);
2906	} else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
2907	/*
2908	* Currently, a proc-wide CPU limit always blocks if the limit is
2909	* exceeded (LEDGER_ACTION_BLOCK).
2910	*/
2911	task->rusage_cpu_flags \|= TASK_RUSECPU_FLAGS_PROC_LIMIT;
2912	task->rusage_cpu_percentage = percentage;
2913	task->rusage_cpu_interval = interval;
2914
2915	limittime = (interval * percentage) / `100`;
2916	nanoseconds_to_absolutetime(nanoseconds: limittime, result: &abstime);
2917
2918	ledger_set_limit(ledger: task->ledger, entry: task_ledgers.cpu_time, limit: abstime, warn_level_percentage: `0`);
2919	ledger_set_period(ledger: task->ledger, entry: task_ledgers.cpu_time, period: interval);
2920	ledger_set_action(ledger: task->ledger, entry: task_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
2921	}
2922	}
2923
2924	if (deadline != `0`) {
2925	assert(scope == TASK_RUSECPU_FLAGS_DEADLINE);
2926
2927	/ if already in use, cancel and wait for it to cleanout /
2928	if (task->rusage_cpu_callt != NULL) {
2929	task_unlock(task);
2930	thread_call_cancel_wait(call: task->rusage_cpu_callt);
2931	task_lock(task);
2932	}
2933	if (task->rusage_cpu_callt == NULL) {
2934	task->rusage_cpu_callt = thread_call_allocate_with_priority(func: task_action_cpuusage, param0: (thread_call_param_t)task, pri: THREAD_CALL_PRIORITY_KERNEL);
2935	}
2936	/ setup callout /
2937	if (task->rusage_cpu_callt != `0`) {
2938	uint64_t save_abstime = `0`;
2939
2940	task->rusage_cpu_flags \|= TASK_RUSECPU_FLAGS_DEADLINE;
2941	task->rusage_cpu_deadline = deadline;
2942
2943	nanoseconds_to_absolutetime(nanoseconds: deadline, result: &abstime);
2944	save_abstime = abstime;
2945	clock_absolutetime_interval_to_deadline(abstime: save_abstime, result: &abstime);
2946	thread_call_enter_delayed(call: task->rusage_cpu_callt, deadline: abstime);
2947	}
2948	}
2949
2950	return `0`;
2951	}
2952
2953	int
2954	task_clear_cpuusage(task_t task, int cpumon_entitled)
2955	{
2956	int retval = `0`;
2957
2958	task_lock(task);
2959	retval = task_clear_cpuusage_locked(task, cpumon_entitled);
2960	task_unlock(task);
2961
2962	return retval;
2963	}
2964
2965	static int
2966	task_clear_cpuusage_locked(task_t task, int cpumon_entitled)
2967	{
2968	thread_call_t savecallt;
2969
2970	/ cancel percentage handling if set /
2971	if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) {
2972	task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PROC_LIMIT;
2973	ledger_set_limit(ledger: task->ledger, entry: task_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, warn_level_percentage: `0`);
2974	task->rusage_cpu_percentage = `0`;
2975	task->rusage_cpu_interval = `0`;
2976	}
2977
2978	/*
2979	* Disable the CPU usage monitor.
2980	*/
2981	if (cpumon_entitled) {
2982	task_disable_cpumon(task);
2983	}
2984
2985	/ cancel deadline handling if set /
2986	if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) {
2987	task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_DEADLINE;
2988	if (task->rusage_cpu_callt != `0`) {
2989	savecallt = task->rusage_cpu_callt;
2990	task->rusage_cpu_callt = NULL;
2991	task->rusage_cpu_deadline = `0`;
2992	task_unlock(task);
2993	thread_call_cancel_wait(call: savecallt);
2994	thread_call_free(call: savecallt);
2995	task_lock(task);
2996	}
2997	}
2998	return `0`;
2999	}
3000
3001	/ called by ledger unit to enforce action due to resource usage criteria being met /
3002	static void
3003	task_action_cpuusage(thread_call_param_t param0, __unused thread_call_param_t param1)
3004	{
3005	task_t task = (task_t)param0;
3006	(void)task_apply_resource_actions(task, TASK_POLICY_CPU_RESOURCE_USAGE);
3007	return;
3008	}
3009
3010
3011	/*
3012	* Routines for taskwatch and pidbind
3013	*/
3014
3015	#if CONFIG_TASKWATCH
3016
3017	LCK_MTX_DECLARE_ATTR(task_watch_mtx, &task_lck_grp, &task_lck_attr);
3018
3019	static void
3020	task_watch_lock(void)
3021	{
3022	lck_mtx_lock(lck: &task_watch_mtx);
3023	}
3024
3025	static void
3026	task_watch_unlock(void)
3027	{
3028	lck_mtx_unlock(lck: &task_watch_mtx);
3029	}
3030
3031	static void
3032	add_taskwatch_locked(task_t task, task_watch_t * twp)
3033	{
3034	queue_enter(&task->task_watchers, twp, task_watch_t *, tw_links);
3035	task->num_taskwatchers++;
3036	}
3037
3038	static void
3039	remove_taskwatch_locked(task_t task, task_watch_t * twp)
3040	{
3041	queue_remove(&task->task_watchers, twp, task_watch_t *, tw_links);
3042	task->num_taskwatchers--;
3043	}
3044
3045
3046	int
3047	proc_lf_pidbind(task_t curtask, uint64_t tid, task_t target_task, int bind)
3048	{
3049	thread_t target_thread = NULL;
3050	int ret = `0`, setbg = `0`;
3051	task_watch_t *twp = NULL;
3052	task_t task = TASK_NULL;
3053
3054	target_thread = task_findtid(task: curtask, tid);
3055	if (target_thread == NULL) {
3056	return ESRCH;
3057	}
3058	/ holds thread reference /
3059
3060	if (bind != `0`) {
3061	/ task is still active ? /
3062	task_lock(target_task);
3063	if (target_task->active == `0`) {
3064	task_unlock(target_task);
3065	ret = ESRCH;
3066	goto out;
3067	}
3068	task_unlock(target_task);
3069
3070	twp = kalloc_type(task_watch_t, Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
3071
3072	task_watch_lock();
3073
3074	if (target_thread->taskwatch != NULL) {
3075	/ already bound to another task /
3076	task_watch_unlock();
3077
3078	kfree_type(task_watch_t, twp);
3079	ret = EBUSY;
3080	goto out;
3081	}
3082
3083	task_reference(target_task);
3084
3085	setbg = proc_get_effective_task_policy(task: target_task, TASK_POLICY_WATCHERS_BG);
3086
3087	twp->tw_task = target_task; / holds the task reference /
3088	twp->tw_thread = target_thread; / holds the thread reference /
3089	twp->tw_state = setbg;
3090	twp->tw_importance = target_thread->importance;
3091
3092	add_taskwatch_locked(task: target_task, twp);
3093
3094	target_thread->taskwatch = twp;
3095
3096	task_watch_unlock();
3097
3098	if (setbg) {
3099	set_thread_appbg(thread: target_thread, setbg, INT_MIN);
3100	}
3101
3102	/ retain the thread reference as it is in twp /
3103	target_thread = NULL;
3104	} else {
3105	/ unbind /
3106	task_watch_lock();
3107	if ((twp = target_thread->taskwatch) != NULL) {
3108	task = twp->tw_task;
3109	target_thread->taskwatch = NULL;
3110	remove_taskwatch_locked(task, twp);
3111
3112	task_watch_unlock();
3113
3114	task_deallocate(task); / drop task ref in twp /
3115	set_thread_appbg(thread: target_thread, setbg: `0`, importance: twp->tw_importance);
3116	thread_deallocate(thread: target_thread); / drop thread ref in twp /
3117	kfree_type(task_watch_t, twp);
3118	} else {
3119	task_watch_unlock();
3120	ret = `0`; / return success if it not alredy bound /
3121	goto out;
3122	}
3123	}
3124	out:
3125	thread_deallocate(thread: target_thread); / drop thread ref acquired in this routine /
3126	return ret;
3127	}
3128
3129	static void
3130	set_thread_appbg(thread_t thread, int setbg, __unused int importance)
3131	{
3132	int enable = (setbg ? TASK_POLICY_ENABLE : TASK_POLICY_DISABLE);
3133
3134	proc_set_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_PIDBIND_BG, value: enable);
3135	}
3136
3137	static void
3138	apply_appstate_watchers(task_t task)
3139	{
3140	int numwatchers = `0`, i, j, setbg;
3141	thread_watchlist_t * threadlist;
3142	task_watch_t * twp;
3143
3144	retry:
3145	/ if no watchers on the list return /
3146	if ((numwatchers = task->num_taskwatchers) == `0`) {
3147	return;
3148	}
3149
3150	threadlist = kalloc_type(thread_watchlist_t, numwatchers, Z_WAITOK \| Z_ZERO);
3151	if (threadlist == NULL) {
3152	return;
3153	}
3154
3155	task_watch_lock();
3156	/serialize application of app state changes /
3157
3158	if (task->watchapplying != `0`) {
3159	lck_mtx_sleep(lck: &task_watch_mtx, lck_sleep_action: LCK_SLEEP_DEFAULT, event: &task->watchapplying, THREAD_UNINT);
3160	task_watch_unlock();
3161	kfree_type(thread_watchlist_t, numwatchers, threadlist);
3162	goto retry;
3163	}
3164
3165	if (numwatchers != task->num_taskwatchers) {
3166	task_watch_unlock();
3167	kfree_type(thread_watchlist_t, numwatchers, threadlist);
3168	goto retry;
3169	}
3170
3171	setbg = proc_get_effective_task_policy(task, TASK_POLICY_WATCHERS_BG);
3172
3173	task->watchapplying = `1`;
3174	i = `0`;
3175	queue_iterate(&task->task_watchers, twp, task_watch_t *, tw_links) {
3176	threadlist[i].thread = twp->tw_thread;
3177	thread_reference(thread: threadlist[i].thread);
3178	if (setbg != `0`) {
3179	twp->tw_importance = twp->tw_thread->importance;
3180	threadlist[i].importance = INT_MIN;
3181	} else {
3182	threadlist[i].importance = twp->tw_importance;
3183	}
3184	i++;
3185	if (i > numwatchers) {
3186	break;
3187	}
3188	}
3189
3190	task_watch_unlock();
3191
3192	for (j = `0`; j < i; j++) {
3193	set_thread_appbg(thread: threadlist[j].thread, setbg, importance: threadlist[j].importance);
3194	thread_deallocate(thread: threadlist[j].thread);
3195	}
3196	kfree_type(thread_watchlist_t, numwatchers, threadlist);
3197
3198
3199	task_watch_lock();
3200	task->watchapplying = `0`;
3201	thread_wakeup_one(&task->watchapplying);
3202	task_watch_unlock();
3203	}
3204
3205	void
3206	thead_remove_taskwatch(thread_t thread)
3207	{
3208	task_watch_t * twp;
3209	int importance = `0`;
3210
3211	task_watch_lock();
3212	if ((twp = thread->taskwatch) != NULL) {
3213	thread->taskwatch = NULL;
3214	remove_taskwatch_locked(task: twp->tw_task, twp);
3215	}
3216	task_watch_unlock();
3217	if (twp != NULL) {
3218	thread_deallocate(thread: twp->tw_thread);
3219	task_deallocate(twp->tw_task);
3220	importance = twp->tw_importance;
3221	kfree_type(task_watch_t, twp);
3222	/ remove the thread and networkbg /
3223	set_thread_appbg(thread, setbg: `0`, importance);
3224	}
3225	}
3226
3227	void
3228	task_removewatchers(task_t task)
3229	{
3230	queue_head_t queue;
3231	task_watch_t *twp;
3232
3233	task_watch_lock();
3234	queue_new_head(&task->task_watchers, &queue, task_watch_t *, tw_links);
3235	queue_init(&task->task_watchers);
3236
3237	queue_iterate(&queue, twp, task_watch_t *, tw_links) {
3238	/*
3239	* Since the linkage is removed and thead state cleanup is already set up,
3240	* remove the refernce from the thread.
3241	*/
3242	twp->tw_thread->taskwatch = NULL; / removed linkage, clear thread holding ref /
3243	}
3244
3245	task->num_taskwatchers = `0`;
3246	task_watch_unlock();
3247
3248	while (!queue_empty(&queue)) {
3249	queue_remove_first(&queue, twp, task_watch_t *, tw_links);
3250	/ remove thread and network bg /
3251	set_thread_appbg(thread: twp->tw_thread, setbg: `0`, importance: twp->tw_importance);
3252	thread_deallocate(thread: twp->tw_thread);
3253	task_deallocate(twp->tw_task);
3254	kfree_type(task_watch_t, twp);
3255	}
3256	}
3257	#endif /* CONFIG_TASKWATCH */
3258
3259	/*
3260	* Routines for importance donation/inheritance/boosting
3261	*/
3262
3263	static void
3264	task_importance_update_live_donor(task_t target_task)
3265	{
3266	#if IMPORTANCE_INHERITANCE
3267
3268	ipc_importance_task_t task_imp;
3269
3270	task_imp = ipc_importance_for_task(task: target_task, FALSE);
3271	if (IIT_NULL != task_imp) {
3272	ipc_importance_task_update_live_donor(task_imp);
3273	ipc_importance_task_release(task_imp);
3274	}
3275	#endif /* IMPORTANCE_INHERITANCE */
3276	}
3277
3278	void
3279	task_importance_mark_donor(task_t task, boolean_t donating)
3280	{
3281	#if IMPORTANCE_INHERITANCE
3282	ipc_importance_task_t task_imp;
3283
3284	task_imp = ipc_importance_for_task(task, FALSE);
3285	if (IIT_NULL != task_imp) {
3286	ipc_importance_task_mark_donor(task_imp, donating);
3287	ipc_importance_task_release(task_imp);
3288	}
3289	#endif /* IMPORTANCE_INHERITANCE */
3290	}
3291
3292	void
3293	task_importance_mark_live_donor(task_t task, boolean_t live_donating)
3294	{
3295	#if IMPORTANCE_INHERITANCE
3296	ipc_importance_task_t task_imp;
3297
3298	task_imp = ipc_importance_for_task(task, FALSE);
3299	if (IIT_NULL != task_imp) {
3300	ipc_importance_task_mark_live_donor(task_imp, live_donating);
3301	ipc_importance_task_release(task_imp);
3302	}
3303	#endif /* IMPORTANCE_INHERITANCE */
3304	}
3305
3306	void
3307	task_importance_mark_receiver(task_t task, boolean_t receiving)
3308	{
3309	#if IMPORTANCE_INHERITANCE
3310	ipc_importance_task_t task_imp;
3311
3312	task_imp = ipc_importance_for_task(task, FALSE);
3313	if (IIT_NULL != task_imp) {
3314	ipc_importance_task_mark_receiver(task_imp, receiving);
3315	ipc_importance_task_release(task_imp);
3316	}
3317	#endif /* IMPORTANCE_INHERITANCE */
3318	}
3319
3320	void
3321	task_importance_mark_denap_receiver(task_t task, boolean_t denap)
3322	{
3323	#if IMPORTANCE_INHERITANCE
3324	ipc_importance_task_t task_imp;
3325
3326	task_imp = ipc_importance_for_task(task, FALSE);
3327	if (IIT_NULL != task_imp) {
3328	ipc_importance_task_mark_denap_receiver(task_imp, receiving: denap);
3329	ipc_importance_task_release(task_imp);
3330	}
3331	#endif /* IMPORTANCE_INHERITANCE */
3332	}
3333
3334	void
3335	task_importance_reset(__imp_only task_t task)
3336	{
3337	#if IMPORTANCE_INHERITANCE
3338	ipc_importance_task_t task_imp;
3339
3340	/ TODO: Lower importance downstream before disconnect /
3341	task_imp = task->task_imp_base;
3342	ipc_importance_reset(task_imp, FALSE);
3343	task_importance_update_live_donor(target_task: task);
3344	#endif /* IMPORTANCE_INHERITANCE */
3345	}
3346
3347	void
3348	task_importance_init_from_parent(__imp_only task_t new_task, __imp_only task_t parent_task)
3349	{
3350	#if IMPORTANCE_INHERITANCE
3351	ipc_importance_task_t new_task_imp = IIT_NULL;
3352
3353	new_task->task_imp_base = NULL;
3354	if (!parent_task) {
3355	return;
3356	}
3357
3358	if (task_is_marked_importance_donor(task: parent_task)) {
3359	new_task_imp = ipc_importance_for_task(task: new_task, FALSE);
3360	assert(IIT_NULL != new_task_imp);
3361	ipc_importance_task_mark_donor(task_imp: new_task_imp, TRUE);
3362	}
3363	if (task_is_marked_live_importance_donor(task: parent_task)) {
3364	if (IIT_NULL == new_task_imp) {
3365	new_task_imp = ipc_importance_for_task(task: new_task, FALSE);
3366	}
3367	assert(IIT_NULL != new_task_imp);
3368	ipc_importance_task_mark_live_donor(task_imp: new_task_imp, TRUE);
3369	}
3370	/ Do not inherit 'receiver' on fork, vfexec or true spawn /
3371	if (task_is_exec_copy(new_task) &&
3372	task_is_marked_importance_receiver(task: parent_task)) {
3373	if (IIT_NULL == new_task_imp) {
3374	new_task_imp = ipc_importance_for_task(task: new_task, FALSE);
3375	}
3376	assert(IIT_NULL != new_task_imp);
3377	ipc_importance_task_mark_receiver(task_imp: new_task_imp, TRUE);
3378	}
3379	if (task_is_marked_importance_denap_receiver(task: parent_task)) {
3380	if (IIT_NULL == new_task_imp) {
3381	new_task_imp = ipc_importance_for_task(task: new_task, FALSE);
3382	}
3383	assert(IIT_NULL != new_task_imp);
3384	ipc_importance_task_mark_denap_receiver(task_imp: new_task_imp, TRUE);
3385	}
3386	if (IIT_NULL != new_task_imp) {
3387	assert(new_task->task_imp_base == new_task_imp);
3388	ipc_importance_task_release(task_imp: new_task_imp);
3389	}
3390	#endif /* IMPORTANCE_INHERITANCE */
3391	}
3392
3393	#if IMPORTANCE_INHERITANCE
3394	/*
3395	* Sets the task boost bit to the provided value. Does NOT run the update function.
3396	*
3397	* Task lock must be held.
3398	*/
3399	static void
3400	task_set_boost_locked(task_t task, boolean_t boost_active)
3401	{
3402	#if IMPORTANCE_TRACE
3403	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) \| DBG_FUNC_START),
3404	proc_selfpid(), task_pid(task), trequested_0(task), trequested_1(task), `0`);
3405	#endif /* IMPORTANCE_TRACE */
3406
3407	task->requested_policy.trp_boosted = boost_active;
3408
3409	#if IMPORTANCE_TRACE
3410	if (boost_active == TRUE) {
3411	DTRACE_BOOST2(boost, task_t, task, int, task_pid(task));
3412	} else {
3413	DTRACE_BOOST2(unboost, task_t, task, int, task_pid(task));
3414	}
3415	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) \| DBG_FUNC_END),
3416	proc_selfpid(), task_pid(task),
3417	trequested_0(task), trequested_1(task), `0`);
3418	#endif /* IMPORTANCE_TRACE */
3419	}
3420
3421	/*
3422	* Sets the task boost bit to the provided value and applies the update.
3423	*
3424	* Task lock must be held. Must call update complete after unlocking the task.
3425	*/
3426	void
3427	task_update_boost_locked(task_t task, boolean_t boost_active, task_pend_token_t pend_token)
3428	{
3429	task_set_boost_locked(task, boost_active);
3430
3431	task_policy_update_locked(task, pend_token);
3432	}
3433
3434	/*
3435	* Check if this task should donate importance.
3436	*
3437	* May be called without taking the task lock. In that case, donor status can change
3438	* so you must check only once for each donation event.
3439	*/
3440	boolean_t
3441	task_is_importance_donor(task_t task)
3442	{
3443	if (task->task_imp_base == IIT_NULL) {
3444	return FALSE;
3445	}
3446	return ipc_importance_task_is_donor(task_imp: task->task_imp_base);
3447	}
3448
3449	/*
3450	* Query the status of the task's donor mark.
3451	*/
3452	boolean_t
3453	task_is_marked_importance_donor(task_t task)
3454	{
3455	if (task->task_imp_base == IIT_NULL) {
3456	return FALSE;
3457	}
3458	return ipc_importance_task_is_marked_donor(task_imp: task->task_imp_base);
3459	}
3460
3461	/*
3462	* Query the status of the task's live donor and donor mark.
3463	*/
3464	boolean_t
3465	task_is_marked_live_importance_donor(task_t task)
3466	{
3467	if (task->task_imp_base == IIT_NULL) {
3468	return FALSE;
3469	}
3470	return ipc_importance_task_is_marked_live_donor(task_imp: task->task_imp_base);
3471	}
3472
3473
3474	/*
3475	* This routine may be called without holding task lock
3476	* since the value of imp_receiver can never be unset.
3477	*/
3478	boolean_t
3479	task_is_importance_receiver(task_t task)
3480	{
3481	if (task->task_imp_base == IIT_NULL) {
3482	return FALSE;
3483	}
3484	return ipc_importance_task_is_marked_receiver(task_imp: task->task_imp_base);
3485	}
3486
3487	/*
3488	* Query the task's receiver mark.
3489	*/
3490	boolean_t
3491	task_is_marked_importance_receiver(task_t task)
3492	{
3493	if (task->task_imp_base == IIT_NULL) {
3494	return FALSE;
3495	}
3496	return ipc_importance_task_is_marked_receiver(task_imp: task->task_imp_base);
3497	}
3498
3499	/*
3500	* This routine may be called without holding task lock
3501	* since the value of de-nap receiver can never be unset.
3502	*/
3503	boolean_t
3504	task_is_importance_denap_receiver(task_t task)
3505	{
3506	if (task->task_imp_base == IIT_NULL) {
3507	return FALSE;
3508	}
3509	return ipc_importance_task_is_denap_receiver(task_imp: task->task_imp_base);
3510	}
3511
3512	/*
3513	* Query the task's de-nap receiver mark.
3514	*/
3515	boolean_t
3516	task_is_marked_importance_denap_receiver(task_t task)
3517	{
3518	if (task->task_imp_base == IIT_NULL) {
3519	return FALSE;
3520	}
3521	return ipc_importance_task_is_marked_denap_receiver(task_imp: task->task_imp_base);
3522	}
3523
3524	/*
3525	* This routine may be called without holding task lock
3526	* since the value of imp_receiver can never be unset.
3527	*/
3528	boolean_t
3529	task_is_importance_receiver_type(task_t task)
3530	{
3531	if (task->task_imp_base == IIT_NULL) {
3532	return FALSE;
3533	}
3534	return task_is_importance_receiver(task) \|\|
3535	task_is_importance_denap_receiver(task);
3536	}
3537
3538	/*
3539	* External importance assertions are managed by the process in userspace
3540	* Internal importance assertions are the responsibility of the kernel
3541	* Assertions are changed from internal to external via task_importance_externalize_assertion
3542	*/
3543
3544	int
3545	task_importance_hold_internal_assertion(task_t target_task, uint32_t count)
3546	{
3547	ipc_importance_task_t task_imp;
3548	kern_return_t ret;
3549
3550	/ may be first time, so allow for possible importance setup /
3551	task_imp = ipc_importance_for_task(task: target_task, FALSE);
3552	if (IIT_NULL == task_imp) {
3553	return EOVERFLOW;
3554	}
3555	ret = ipc_importance_task_hold_internal_assertion(task_imp, count);
3556	ipc_importance_task_release(task_imp);
3557
3558	return (KERN_SUCCESS != ret) ? ENOTSUP : `0`;
3559	}
3560
3561	int
3562	task_importance_hold_file_lock_assertion(task_t target_task, uint32_t count)
3563	{
3564	ipc_importance_task_t task_imp;
3565	kern_return_t ret;
3566
3567	/ may be first time, so allow for possible importance setup /
3568	task_imp = ipc_importance_for_task(task: target_task, FALSE);
3569	if (IIT_NULL == task_imp) {
3570	return EOVERFLOW;
3571	}
3572	ret = ipc_importance_task_hold_file_lock_assertion(task_imp, count);
3573	ipc_importance_task_release(task_imp);
3574
3575	return (KERN_SUCCESS != ret) ? ENOTSUP : `0`;
3576	}
3577
3578	int
3579	task_importance_hold_legacy_external_assertion(task_t target_task, uint32_t count)
3580	{
3581	ipc_importance_task_t task_imp;
3582	kern_return_t ret;
3583
3584	/ must already have set up an importance /
3585	task_imp = target_task->task_imp_base;
3586	if (IIT_NULL == task_imp) {
3587	return EOVERFLOW;
3588	}
3589	ret = ipc_importance_task_hold_legacy_external_assertion(task_imp, count);
3590	return (KERN_SUCCESS != ret) ? ENOTSUP : `0`;
3591	}
3592
3593	int
3594	task_importance_drop_file_lock_assertion(task_t target_task, uint32_t count)
3595	{
3596	ipc_importance_task_t task_imp;
3597	kern_return_t ret;
3598
3599	/ must already have set up an importance /
3600	task_imp = target_task->task_imp_base;
3601	if (IIT_NULL == task_imp) {
3602	return EOVERFLOW;
3603	}
3604	ret = ipc_importance_task_drop_file_lock_assertion(task_imp: target_task->task_imp_base, count);
3605	return (KERN_SUCCESS != ret) ? EOVERFLOW : `0`;
3606	}
3607
3608	int
3609	task_importance_drop_legacy_external_assertion(task_t target_task, uint32_t count)
3610	{
3611	ipc_importance_task_t task_imp;
3612	kern_return_t ret;
3613
3614	/ must already have set up an importance /
3615	task_imp = target_task->task_imp_base;
3616	if (IIT_NULL == task_imp) {
3617	return EOVERFLOW;
3618	}
3619	ret = ipc_importance_task_drop_legacy_external_assertion(task_imp, count);
3620	return (KERN_SUCCESS != ret) ? EOVERFLOW : `0`;
3621	}
3622
3623	static void
3624	task_add_importance_watchport(task_t task, mach_port_t port, int *boostp)
3625	{
3626	int boost = `0`;
3627
3628	__imptrace_only int released_pid = `0`;
3629	__imptrace_only int pid = task_pid(task);
3630
3631	ipc_importance_task_t release_imp_task = IIT_NULL;
3632
3633	if (IP_VALID(port) != `0`) {
3634	ipc_importance_task_t new_imp_task = ipc_importance_for_task(task, FALSE);
3635
3636	ip_mq_lock(port);
3637
3638	/*
3639	* The port must have been marked tempowner already.
3640	* This also filters out ports whose receive rights
3641	* are already enqueued in a message, as you can't
3642	* change the right's destination once it's already
3643	* on its way.
3644	*/
3645	if (port->ip_tempowner != `0`) {
3646	assert(port->ip_impdonation != `0`);
3647
3648	boost = port->ip_impcount;
3649	if (IIT_NULL != ip_get_imp_task(port)) {
3650	/*
3651	* if this port is already bound to a task,
3652	* release the task reference and drop any
3653	* watchport-forwarded boosts
3654	*/
3655	release_imp_task = ip_get_imp_task(port);
3656	port->ip_imp_task = IIT_NULL;
3657	}
3658
3659	/ mark the port is watching another task (reference held in port->ip_imp_task) /
3660	if (ipc_importance_task_is_marked_receiver(task_imp: new_imp_task)) {
3661	port->ip_imp_task = new_imp_task;
3662	new_imp_task = IIT_NULL;
3663	}
3664	}
3665	ip_mq_unlock(port);
3666
3667	if (IIT_NULL != new_imp_task) {
3668	ipc_importance_task_release(task_imp: new_imp_task);
3669	}
3670
3671	if (IIT_NULL != release_imp_task) {
3672	if (boost > `0`) {
3673	ipc_importance_task_drop_internal_assertion(task_imp: release_imp_task, count: boost);
3674	}
3675
3676	// released_pid = task_pid(release_imp_task); / TODO: Need ref-safe way to get pid /
3677	ipc_importance_task_release(task_imp: release_imp_task);
3678	}
3679	#if IMPORTANCE_TRACE
3680	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_WATCHPORT, `0`)) \| DBG_FUNC_NONE,
3681	proc_selfpid(), pid, boost, released_pid, `0`);
3682	#endif /* IMPORTANCE_TRACE */
3683	}
3684
3685	*boostp = boost;
3686	return;
3687	}
3688
3689	#endif /* IMPORTANCE_INHERITANCE */
3690
3691	/*
3692	* Routines for VM to query task importance
3693	*/
3694
3695
3696	/*
3697	* Order to be considered while estimating importance
3698	* for low memory notification and purging purgeable memory.
3699	*/
3700	#define TASK_IMPORTANCE_FOREGROUND 4
3701	#define TASK_IMPORTANCE_NOTDARWINBG 1
3702
3703
3704	/*
3705	* (Un)Mark the task as a privileged listener for memory notifications.
3706	* if marked, this task will be among the first to be notified amongst
3707	* the bulk of all other tasks when the system enters a pressure level
3708	* of interest to this task.
3709	*/
3710	int
3711	task_low_mem_privileged_listener(task_t task, boolean_t new_value, boolean_t *old_value)
3712	{
3713	if (old_value != NULL) {
3714	*old_value = (boolean_t)task->low_mem_privileged_listener;
3715	} else {
3716	task_lock(task);
3717	task->low_mem_privileged_listener = (uint32_t)new_value;
3718	task_unlock(task);
3719	}
3720
3721	return `0`;
3722	}
3723
3724	/*
3725	* Checks if the task is already notified.
3726	*
3727	* Condition: task lock should be held while calling this function.
3728	*/
3729	boolean_t
3730	task_has_been_notified(task_t task, int pressurelevel)
3731	{
3732	if (task == NULL) {
3733	return FALSE;
3734	}
3735
3736	if (pressurelevel == kVMPressureWarning) {
3737	return task->low_mem_notified_warn ? TRUE : FALSE;
3738	} else if (pressurelevel == kVMPressureCritical) {
3739	return task->low_mem_notified_critical ? TRUE : FALSE;
3740	} else {
3741	return TRUE;
3742	}
3743	}
3744
3745
3746	/*
3747	* Checks if the task is used for purging.
3748	*
3749	* Condition: task lock should be held while calling this function.
3750	*/
3751	boolean_t
3752	task_used_for_purging(task_t task, int pressurelevel)
3753	{
3754	if (task == NULL) {
3755	return FALSE;
3756	}
3757
3758	if (pressurelevel == kVMPressureWarning) {
3759	return task->purged_memory_warn ? TRUE : FALSE;
3760	} else if (pressurelevel == kVMPressureCritical) {
3761	return task->purged_memory_critical ? TRUE : FALSE;
3762	} else {
3763	return TRUE;
3764	}
3765	}
3766
3767
3768	/*
3769	* Mark the task as notified with memory notification.
3770	*
3771	* Condition: task lock should be held while calling this function.
3772	*/
3773	void
3774	task_mark_has_been_notified(task_t task, int pressurelevel)
3775	{
3776	if (task == NULL) {
3777	return;
3778	}
3779
3780	if (pressurelevel == kVMPressureWarning) {
3781	task->low_mem_notified_warn = `1`;
3782	} else if (pressurelevel == kVMPressureCritical) {
3783	task->low_mem_notified_critical = `1`;
3784	}
3785	}
3786
3787
3788	/*
3789	* Mark the task as purged.
3790	*
3791	* Condition: task lock should be held while calling this function.
3792	*/
3793	void
3794	task_mark_used_for_purging(task_t task, int pressurelevel)
3795	{
3796	if (task == NULL) {
3797	return;
3798	}
3799
3800	if (pressurelevel == kVMPressureWarning) {
3801	task->purged_memory_warn = `1`;
3802	} else if (pressurelevel == kVMPressureCritical) {
3803	task->purged_memory_critical = `1`;
3804	}
3805	}
3806
3807
3808	/*
3809	* Mark the task eligible for low memory notification.
3810	*
3811	* Condition: task lock should be held while calling this function.
3812	*/
3813	void
3814	task_clear_has_been_notified(task_t task, int pressurelevel)
3815	{
3816	if (task == NULL) {
3817	return;
3818	}
3819
3820	if (pressurelevel == kVMPressureWarning) {
3821	task->low_mem_notified_warn = `0`;
3822	} else if (pressurelevel == kVMPressureCritical) {
3823	task->low_mem_notified_critical = `0`;
3824	}
3825	}
3826
3827
3828	/*
3829	* Mark the task eligible for purging its purgeable memory.
3830	*
3831	* Condition: task lock should be held while calling this function.
3832	*/
3833	void
3834	task_clear_used_for_purging(task_t task)
3835	{
3836	if (task == NULL) {
3837	return;
3838	}
3839
3840	task->purged_memory_warn = `0`;
3841	task->purged_memory_critical = `0`;
3842	}
3843
3844
3845	/*
3846	* Estimate task importance for purging its purgeable memory
3847	* and low memory notification.
3848	*
3849	* Importance is calculated in the following order of criteria:
3850	* -Task role : Background vs Foreground
3851	* -Boost status: Not boosted vs Boosted
3852	* -Darwin BG status.
3853	*
3854	* Returns: Estimated task importance. Less important task will have lower
3855	* estimated importance.
3856	*/
3857	int
3858	task_importance_estimate(task_t task)
3859	{
3860	int task_importance = `0`;
3861
3862	if (task == NULL) {
3863	return `0`;
3864	}
3865
3866	if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) == TASK_FOREGROUND_APPLICATION) {
3867	task_importance += TASK_IMPORTANCE_FOREGROUND;
3868	}
3869
3870	if (proc_get_effective_task_policy(task, TASK_POLICY_DARWIN_BG) == `0`) {
3871	task_importance += TASK_IMPORTANCE_NOTDARWINBG;
3872	}
3873
3874	return task_importance;
3875	}
3876
3877	boolean_t
3878	task_has_assertions(task_t task)
3879	{
3880	return task->task_imp_base->iit_assertcnt? TRUE : FALSE;
3881	}
3882
3883
3884	kern_return_t
3885	send_resource_violation(typeof(send_cpu_usage_violation) sendfunc,
3886	task_t violator,
3887	struct ledger_entry_info *linfo,
3888	resource_notify_flags_t flags)
3889	{
3890	#ifndef MACH_BSD
3891	return KERN_NOT_SUPPORTED;
3892	#else
3893	kern_return_t kr = KERN_SUCCESS;
3894	proc_t proc = NULL;
3895	posix_path_t proc_path = "";
3896	proc_name_t procname = "<unknown>";
3897	int pid = -`1`;
3898	clock_sec_t secs;
3899	clock_nsec_t nsecs;
3900	mach_timespec_t timestamp;
3901	thread_t curthread = current_thread();
3902	ipc_port_t dstport = MACH_PORT_NULL;
3903
3904	if (!violator) {
3905	kr = KERN_INVALID_ARGUMENT; goto finish;
3906	}
3907
3908	/ extract violator information /
3909	task_lock(violator);
3910	if (!(proc = get_bsdtask_info(violator))) {
3911	task_unlock(violator);
3912	kr = KERN_INVALID_ARGUMENT; goto finish;
3913	}
3914	(void)mig_strncpy(dest: procname, src: proc_best_name(proc), len: sizeof(procname));
3915	pid = task_pid(task: violator);
3916	if (flags & kRNFatalLimitFlag) {
3917	kr = proc_pidpathinfo_internal(p: proc, arg: `0`, buffer: proc_path,
3918	buffersize: sizeof(proc_path), NULL);
3919	}
3920	task_unlock(violator);
3921	if (kr) {
3922	goto finish;
3923	}
3924
3925	/ violation time ~ now /
3926	clock_get_calendar_nanotime(secs: &secs, nanosecs: &nsecs);
3927	timestamp.tv_sec = (int32_t)secs;
3928	timestamp.tv_nsec = (int32_t)nsecs;
3929	/ 25567702 tracks widening mach_timespec_t /
3930
3931	/ send message /
3932	kr = host_get_special_port(host_priv: host_priv_self(), HOST_LOCAL_NODE,
3933	HOST_RESOURCE_NOTIFY_PORT, port: &dstport);
3934	if (kr) {
3935	goto finish;
3936	}
3937
3938	thread_set_honor_qlimit(thread: curthread);
3939	kr = sendfunc(dstport,
3940	procname, pid, proc_path, timestamp,
3941	linfo->lei_balance, linfo->lei_last_refill,
3942	linfo->lei_limit, linfo->lei_refill_period,
3943	flags);
3944	thread_clear_honor_qlimit(thread: curthread);
3945
3946	ipc_port_release_send(port: dstport);
3947
3948	finish:
3949	return kr;
3950	#endif /* MACH_BSD */
3951	}
3952
3953	kern_return_t
3954	send_resource_violation_with_fatal_port(typeof(send_port_space_violation) sendfunc,
3955	task_t violator,
3956	int64_t current_size,
3957	int64_t limit,
3958	mach_port_t fatal_port,
3959	resource_notify_flags_t flags)
3960	{
3961	#ifndef MACH_BSD
3962	kr = KERN_NOT_SUPPORTED; goto finish;
3963	#else
3964	kern_return_t kr = KERN_SUCCESS;
3965	proc_t proc = NULL;
3966	proc_name_t procname = "<unknown>";
3967	int pid = -`1`;
3968	clock_sec_t secs;
3969	clock_nsec_t nsecs;
3970	mach_timespec_t timestamp;
3971	thread_t curthread = current_thread();
3972	ipc_port_t dstport = MACH_PORT_NULL;
3973
3974	if (!violator) {
3975	kr = KERN_INVALID_ARGUMENT; goto finish;
3976	}
3977
3978	/ extract violator information; no need to acquire task lock /
3979	assert(violator == current_task());
3980	if (!(proc = get_bsdtask_info(violator))) {
3981	kr = KERN_INVALID_ARGUMENT; goto finish;
3982	}
3983	(void)mig_strncpy(dest: procname, src: proc_best_name(proc), len: sizeof(procname));
3984	pid = task_pid(task: violator);
3985
3986	/ violation time ~ now /
3987	clock_get_calendar_nanotime(secs: &secs, nanosecs: &nsecs);
3988	timestamp.tv_sec = (int32_t)secs;
3989	timestamp.tv_nsec = (int32_t)nsecs;
3990	/ 25567702 tracks widening mach_timespec_t /
3991
3992	/ send message /
3993	kr = task_get_special_port(task: current_task(), TASK_RESOURCE_NOTIFY_PORT, special_port: &dstport);
3994	if (dstport == MACH_PORT_NULL) {
3995	kr = host_get_special_port(host_priv: host_priv_self(), HOST_LOCAL_NODE,
3996	HOST_RESOURCE_NOTIFY_PORT, port: &dstport);
3997	if (kr) {
3998	goto finish;
3999	}
4000	}
4001
4002	thread_set_honor_qlimit(thread: curthread);
4003	kr = sendfunc(dstport,
4004	procname, pid, timestamp,
4005	current_size, limit, fatal_port,
4006	flags);
4007	thread_clear_honor_qlimit(thread: curthread);
4008
4009	ipc_port_release_send(port: dstport);
4010
4011	#endif /* MACH_BSD */
4012	finish:
4013	return kr;
4014	}
4015
4016	/*
4017	* Resource violations trace four 64-bit integers. For K32, two additional
4018	* codes are allocated, the first with the low nibble doubled. So if the K64
4019	* code is 0x042, the K32 codes would be 0x044 and 0x45.
4020	*/
4021	#ifdef __LP64__
4022	void
4023	trace_resource_violation(uint16_t code,
4024	struct ledger_entry_info *linfo)
4025	{
4026	KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, code),
4027	linfo->lei_balance, linfo->lei_last_refill,
4028	linfo->lei_limit, linfo->lei_refill_period);
4029	}
4030	#else /* K32 */
4031	/ TODO: create/find a trace_two_LLs() for K32 systems /
4032	#define MASK32 0xffffffff
4033	void
4034	trace_resource_violation(uint16_t code,
4035	struct ledger_entry_info *linfo)
4036	{
4037	int8_t lownibble = (code & `0x3`) * `2`;
4038	int16_t codeA = (code & `0xffc`) \| lownibble;
4039	int16_t codeB = codeA + `1`;
4040
4041	int32_t balance_high = (linfo->lei_balance >> `32`) & MASK32;
4042	int32_t balance_low = linfo->lei_balance & MASK32;
4043	int32_t last_refill_high = (linfo->lei_last_refill >> `32`) & MASK32;
4044	int32_t last_refill_low = linfo->lei_last_refill & MASK32;
4045
4046	int32_t limit_high = (linfo->lei_limit >> `32`) & MASK32;
4047	int32_t limit_low = linfo->lei_limit & MASK32;
4048	int32_t refill_period_high = (linfo->lei_refill_period >> `32`) & MASK32;
4049	int32_t refill_period_low = linfo->lei_refill_period & MASK32;
4050
4051	KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, codeA),
4052	balance_high, balance_low,
4053	last_refill_high, last_refill_low);
4054	KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, codeB),
4055	limit_high, limit_low,
4056	refill_period_high, refill_period_low);
4057	}
4058	#endif /* K64/K32 */
4059

Browse the source code of xnu/osfmk/kern/task_policy.c