task.c source code [xnu/osfmk/kern/task.c]

1	/*
2	* Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/*
29	* @OSF_FREE_COPYRIGHT@
30	*/
31	/*
32	* Mach Operating System
33	* Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34	* All Rights Reserved.
35	*
36	* Permission to use, copy, modify and distribute this software and its
37	* documentation is hereby granted, provided that both the copyright
38	* notice and this permission notice appear in all copies of the
39	* software, derivative works or modified versions, and any portions
40	* thereof, and that both notices appear in supporting documentation.
41	*
42	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45	*
46	* Carnegie Mellon requests users of this software to return to
47	*
48	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49	* School of Computer Science
50	* Carnegie Mellon University
51	* Pittsburgh PA 15213-3890
52	*
53	* any improvements or extensions that they make and grant Carnegie Mellon
54	* the rights to redistribute these changes.
55	*/
56	/*
57	* File: kern/task.c
58	* Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59	* David Black
60	*
61	* Task management primitives implementation.
62	*/
63	/*
64	* Copyright (c) 1993 The University of Utah and
65	* the Computer Systems Laboratory (CSL). All rights reserved.
66	*
67	* Permission to use, copy, modify and distribute this software and its
68	* documentation is hereby granted, provided that both the copyright
69	* notice and this permission notice appear in all copies of the
70	* software, derivative works or modified versions, and any portions
71	* thereof, and that both notices appear in supporting documentation.
72	*
73	* THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74	* IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75	* ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76	*
77	* CSL requests users of this software to return to csl-dist@cs.utah.edu any
78	* improvements that they make and grant CSL redistribution rights.
79	*
80	*/
81	/*
82	* NOTICE: This file was modified by McAfee Research in 2004 to introduce
83	* support for mandatory and extensible security protections. This notice
84	* is included in support of clause 2.2 (b) of the Apple Public License,
85	* Version 2.0.
86	* Copyright (c) 2005 SPARTA, Inc.
87	*/
88
89	#include <mach/mach_types.h>
90	#include <mach/boolean.h>
91	#include <mach/host_priv.h>
92	#include <mach/machine/vm_types.h>
93	#include <mach/vm_param.h>
94	#include <mach/mach_vm.h>
95	#include <mach/semaphore.h>
96	#include <mach/task_info.h>
97	#include <mach/task_inspect.h>
98	#include <mach/task_special_ports.h>
99	#include <mach/sdt.h>
100	#include <mach/mach_test_upcall.h>
101
102	#include <ipc/ipc_importance.h>
103	#include <ipc/ipc_types.h>
104	#include <ipc/ipc_space.h>
105	#include <ipc/ipc_entry.h>
106	#include <ipc/ipc_hash.h>
107	#include <ipc/ipc_init.h>
108
109	#include <kern/kern_types.h>
110	#include <kern/mach_param.h>
111	#include <kern/misc_protos.h>
112	#include <kern/task.h>
113	#include <kern/thread.h>
114	#include <kern/coalition.h>
115	#include <kern/zalloc.h>
116	#include <kern/kalloc.h>
117	#include <kern/kern_cdata.h>
118	#include <kern/processor.h>
119	#include <kern/recount.h>
120	#include <kern/sched_prim.h> /* for thread_wakeup */
121	#include <kern/ipc_tt.h>
122	#include <kern/host.h>
123	#include <kern/clock.h>
124	#include <kern/timer.h>
125	#include <kern/assert.h>
126	#include <kern/affinity.h>
127	#include <kern/exc_resource.h>
128	#include <kern/machine.h>
129	#include <kern/policy_internal.h>
130	#include <kern/restartable.h>
131	#include <kern/ipc_kobject.h>
132
133	#include <corpses/task_corpse.h>
134	#if CONFIG_TELEMETRY
135	#include <kern/telemetry.h>
136	#endif
137
138	#if CONFIG_PERVASIVE_CPI
139	#include <kern/monotonic.h>
140	#include <machine/monotonic.h>
141	#endif /* CONFIG_PERVASIVE_CPI */
142
143	#if CONFIG_EXCLAVES
144	#include "exclaves_boot.h"
145	#include "exclaves_resource.h"
146	#include "exclaves_boot.h"
147	#include "kern/exclaves.tightbeam.h"
148	#endif /* CONFIG_EXCLAVES */
149
150	#include <os/log.h>
151
152	#include <vm/pmap.h>
153	#include <vm/vm_map.h>
154	#include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
155	#include <vm/vm_pageout.h>
156	#include <vm/vm_protos.h>
157	#include <vm/vm_purgeable_internal.h>
158	#include <vm/vm_compressor_pager.h>
159	#include <vm/vm_reclaim_internal.h>
160
161	#include <sys/proc_ro.h>
162	#include <sys/resource.h>
163	#include <sys/signalvar.h> /* for coredump */
164	#include <sys/bsdtask_info.h>
165	#include <sys/kdebug_triage.h>
166	#include <sys/code_signing.h> /* for address_space_debugged */
167	/*
168	* Exported interfaces
169	*/
170
171	#include <mach/task_server.h>
172	#include <mach/mach_host_server.h>
173	#include <mach/mach_port_server.h>
174
175	#include <vm/vm_shared_region.h>
176
177	#include <libkern/OSDebug.h>
178	#include <libkern/OSAtomic.h>
179	#include <libkern/section_keywords.h>
180
181	#include <mach-o/loader.h>
182	#include <kdp/kdp_dyld.h>
183
184	#include <kern/sfi.h> /* picks up ledger.h */
185
186	#if CONFIG_MACF
187	#include <security/mac_mach_internal.h>
188	#endif
189
190	#include <IOKit/IOBSD.h>
191	#include <kdp/processor_core.h>
192
193	#include <string.h>
194
195	#if KPERF
196	extern int kpc_force_all_ctrs(task_t, int);
197	#endif
198
199	SECURITY_READ_ONLY_LATE(task_t) kernel_task;
200
201	int64_t next_taskuniqueid = `0`;
202	const size_t task_alignment = _Alignof(struct task);
203	extern const size_t proc_alignment;
204	extern size_t proc_struct_size;
205	extern size_t proc_and_task_size;
206	size_t task_struct_size;
207
208	extern uint32_t ipc_control_port_options;
209
210	extern int large_corpse_count;
211
212	extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
213	extern void task_disown_frozen_csegs(task_t owner_task);
214
215	static void task_port_no_senders(ipc_port_t, mach_msg_type_number_t);
216	static void task_port_with_flavor_no_senders(ipc_port_t, mach_msg_type_number_t);
217	static void task_suspension_no_senders(ipc_port_t, mach_msg_type_number_t);
218	static inline void task_zone_init(void);
219
220	#if CONFIG_EXCLAVES
221	static bool task_should_panic_on_exit_due_to_conclave_taint(task_t task);
222	static bool task_is_conclave_tainted(task_t task);
223	static void task_set_conclave_taint(task_t task);
224	kern_return_t task_crash_info_conclave_upcall(task_t task,
225	const xnuupcalls_conclavesharedbuffer_s *shared_buf, uint32_t length);
226	kern_return_t
227	stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *_Nonnull result, void *kcdata_ptr);
228	#endif /* CONFIG_EXCLAVES */
229
230	IPC_KOBJECT_DEFINE(IKOT_TASK_NAME);
231	IPC_KOBJECT_DEFINE(IKOT_TASK_CONTROL,
232	.iko_op_no_senders = task_port_no_senders);
233	IPC_KOBJECT_DEFINE(IKOT_TASK_READ,
234	.iko_op_no_senders = task_port_with_flavor_no_senders);
235	IPC_KOBJECT_DEFINE(IKOT_TASK_INSPECT,
236	.iko_op_no_senders = task_port_with_flavor_no_senders);
237	IPC_KOBJECT_DEFINE(IKOT_TASK_RESUME,
238	.iko_op_no_senders = task_suspension_no_senders);
239
240	#if CONFIG_PROC_RESOURCE_LIMITS
241	static void task_fatal_port_no_senders(ipc_port_t, mach_msg_type_number_t);
242	static mach_port_t task_allocate_fatal_port(void);
243
244	IPC_KOBJECT_DEFINE(IKOT_TASK_FATAL,
245	.iko_op_stable = true,
246	.iko_op_no_senders = task_fatal_port_no_senders);
247
248	extern void task_id_token_set_port(task_id_token_t token, ipc_port_t port);
249	#endif /* CONFIG_PROC_RESOURCE_LIMITS */
250
251	/ Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. /
252	int audio_active = `0`;
253
254	/*
255	* structure for tracking zone usage
256	* Used either one per task/thread for all zones or <per-task,per-zone>.
257	*/
258	typedef struct zinfo_usage_store_t {
259	/ These fields may be updated atomically, and so must be 8 byte aligned /
260	uint64_t alloc __attribute__((aligned(`8`))); / allocation counter /
261	uint64_t free __attribute__((aligned(`8`))); / free counter /
262	} zinfo_usage_store_t;
263
264	/**
265	* Return codes related to diag threshold and memory limit
266	*/
267	__options_decl(diagthreshold_check_return, int, {
268	THRESHOLD_IS_SAME_AS_LIMIT_FLAG_DISABLED = `0`,
269	THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED = `1`,
270	THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED = `2`,
271	THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_ENABLED = `3`,
272	});
273
274	/**
275	* Return codes related to diag threshold and memory limit
276	*/
277	__options_decl(current_, int, {
278	THRESHOLD_IS_SAME_AS_LIMIT = `0`,
279	THRESHOLD_IS_NOT_SAME_AS_LIMIT = `1`
280	});
281
282	zinfo_usage_store_t tasks_tkm_private;
283	zinfo_usage_store_t tasks_tkm_shared;
284
285	/ A container to accumulate statistics for expired tasks /
286	expired_task_statistics_t dead_task_statistics;
287	LCK_SPIN_DECLARE_ATTR(dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
288
289	ledger_template_t task_ledger_template = NULL;
290
291	/ global lock for task_dyld_process_info_notify_{register, deregister, get_trap} /
292	LCK_GRP_DECLARE(g_dyldinfo_mtx_grp, "g_dyldinfo");
293	LCK_MTX_DECLARE(g_dyldinfo_mtx, &g_dyldinfo_mtx_grp);
294
295	SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
296	{.cpu_time = -`1`,
297	.tkm_private = -`1`,
298	.tkm_shared = -`1`,
299	.phys_mem = -`1`,
300	.wired_mem = -`1`,
301	.internal = -`1`,
302	.iokit_mapped = -`1`,
303	.external = -`1`,
304	.reusable = -`1`,
305	.alternate_accounting = -`1`,
306	.alternate_accounting_compressed = -`1`,
307	.page_table = -`1`,
308	.phys_footprint = -`1`,
309	.internal_compressed = -`1`,
310	.purgeable_volatile = -`1`,
311	.purgeable_nonvolatile = -`1`,
312	.purgeable_volatile_compressed = -`1`,
313	.purgeable_nonvolatile_compressed = -`1`,
314	.tagged_nofootprint = -`1`,
315	.tagged_footprint = -`1`,
316	.tagged_nofootprint_compressed = -`1`,
317	.tagged_footprint_compressed = -`1`,
318	.network_volatile = -`1`,
319	.network_nonvolatile = -`1`,
320	.network_volatile_compressed = -`1`,
321	.network_nonvolatile_compressed = -`1`,
322	.media_nofootprint = -`1`,
323	.media_footprint = -`1`,
324	.media_nofootprint_compressed = -`1`,
325	.media_footprint_compressed = -`1`,
326	.graphics_nofootprint = -`1`,
327	.graphics_footprint = -`1`,
328	.graphics_nofootprint_compressed = -`1`,
329	.graphics_footprint_compressed = -`1`,
330	.neural_nofootprint = -`1`,
331	.neural_footprint = -`1`,
332	.neural_nofootprint_compressed = -`1`,
333	.neural_footprint_compressed = -`1`,
334	.platform_idle_wakeups = -`1`,
335	.interrupt_wakeups = -`1`,
336	#if CONFIG_SCHED_SFI
337	.sfi_wait_times = { `0` / initialized at runtime /},
338	#endif /* CONFIG_SCHED_SFI */
339	.cpu_time_billed_to_me = -`1`,
340	.cpu_time_billed_to_others = -`1`,
341	.physical_writes = -`1`,
342	.logical_writes = -`1`,
343	.logical_writes_to_external = -`1`,
344	#if DEBUG \|\| DEVELOPMENT
345	.pages_grabbed = -`1`,
346	.pages_grabbed_kern = -`1`,
347	.pages_grabbed_iopl = -`1`,
348	.pages_grabbed_upl = -`1`,
349	#endif
350	#if CONFIG_FREEZE
351	.frozen_to_swap = -`1`,
352	#endif /* CONFIG_FREEZE */
353	.energy_billed_to_me = -`1`,
354	.energy_billed_to_others = -`1`,
355	#if CONFIG_PHYS_WRITE_ACCT
356	.fs_metadata_writes = -`1`,
357	#endif /* CONFIG_PHYS_WRITE_ACCT */
358	#if CONFIG_MEMORYSTATUS
359	.memorystatus_dirty_time = -`1`,
360	#endif /* CONFIG_MEMORYSTATUS */
361	.swapins = -`1`,
362	.conclave_mem = -`1`, };
363
364	/ System sleep state /
365	boolean_t tasks_suspend_state;
366
367	__options_decl(send_exec_resource_is_fatal, bool, {
368	IS_NOT_FATAL = false,
369	IS_FATAL = true
370	});
371
372	__options_decl(send_exec_resource_is_diagnostics, bool, {
373	IS_NOT_DIAGNOSTICS = false,
374	IS_DIAGNOSTICS = true
375	});
376
377	__options_decl(send_exec_resource_is_warning, bool, {
378	IS_NOT_WARNING = false,
379	IS_WARNING = true
380	});
381
382	__options_decl(send_exec_resource_options_t, uint8_t, {
383	EXEC_RESOURCE_FATAL = `0x01`,
384	EXEC_RESOURCE_DIAGNOSTIC = `0x02`,
385	EXEC_RESOURCE_WARNING = `0x04`,
386	});
387
388	/**
389	* Actions to take when a process has reached the memory limit or the diagnostics threshold limits
390	*/
391	static inline void task_process_crossed_limit_no_diag(task_t task, ledger_amount_t ledger_limit_size, bool memlimit_is_fatal, bool memlimit_is_active, send_exec_resource_is_warning is_warning);
392	#if DEBUG \|\| DEVELOPMENT
393	static inline void task_process_crossed_limit_diag(ledger_amount_t ledger_limit_size);
394	#endif
395	void init_task_ledgers(void);
396	void task_footprint_exceeded(int warning, __unused const void param0, __unused const* void *param1);
397	void task_wakeups_rate_exceeded(int warning, __unused const void param0, __unused const* void *param1);
398	void task_io_rate_exceeded(int warning, const void param0, __unused const* void *param1);
399	void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
400	void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, send_exec_resource_options_t exception_options);
401	void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
402	#if CONFIG_PROC_RESOURCE_LIMITS
403	void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit);
404	mach_port_name_t current_task_get_fatal_port_name(void);
405	void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_KQWORKLOOPS(task_t task, int current_size, int soft_limit, int hard_limit);
406	#endif /* CONFIG_PROC_RESOURCE_LIMITS */
407
408	kern_return_t task_suspend_internal_locked(task_t);
409	kern_return_t task_suspend_internal(task_t);
410	kern_return_t task_resume_internal_locked(task_t);
411	kern_return_t task_resume_internal(task_t);
412	static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
413
414	extern kern_return_t iokit_task_terminate(task_t task, int phase);
415	extern void iokit_task_app_suspended_changed(task_t task);
416
417	extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action , lck_mtx_t );
418	extern void bsd_copythreadname(void dst_uth, void* *src_uth);
419	extern kern_return_t thread_resume(thread_t thread);
420
421	extern int exit_with_port_space_exception(void *proc, mach_exception_code_t code, mach_exception_subcode_t subcode);
422
423	// Condition to include diag footprints
424	#define RESETTABLE_DIAG_FOOTPRINT_LIMITS ((DEBUG \|\| DEVELOPMENT) && CONFIG_MEMORYSTATUS)
425
426	// Warn tasks when they hit 80% of their memory limit.
427	#define PHYS_FOOTPRINT_WARNING_LEVEL 80
428
429	#define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
430	#define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
431
432	/*
433	* Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
434	*
435	* (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
436	* stacktraces, aka micro-stackshots)
437	*/
438	#define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
439
440	int task_wakeups_monitor_interval; / In seconds. Time period over which wakeups rate is observed /
441	int task_wakeups_monitor_rate; / In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent /
442
443	unsigned int task_wakeups_monitor_ustackshots_trigger_pct; / Percentage. Level at which we start gathering telemetry. /
444
445	TUNABLE(bool, disable_exc_resource, "disable_exc_resource", false); / Global override to suppress EXC_RESOURCE for resource monitor violations. /
446	TUNABLE(bool, disable_exc_resource_during_audio, "disable_exc_resource_during_audio", true); / Global override to suppress EXC_RESOURCE while audio is active /
447
448	ledger_amount_t max_task_footprint = `0`; / Per-task limit on physical memory consumption in bytes /
449	unsigned int max_task_footprint_warning_level = `0`; / Per-task limit warning percentage /
450
451	/*
452	* Configure per-task memory limit.
453	* The boot-arg is interpreted as Megabytes,
454	* and takes precedence over the device tree.
455	* Setting the boot-arg to 0 disables task limits.
456	*/
457	TUNABLE_DT_WRITEABLE(int, max_task_footprint_mb, "/defaults", "kern.max_task_pmem", "max_task_pmem", `0`, TUNABLE_DT_NONE);
458
459	/ I/O Monitor Limits /
460	#define IOMON_DEFAULT_LIMIT (20480ull) /* MB of logical/physical I/O */
461	#define IOMON_DEFAULT_INTERVAL (86400ull) /* in seconds */
462
463	uint64_t task_iomon_limit_mb; / Per-task I/O monitor limit in MBs /
464	uint64_t task_iomon_interval_secs; / Per-task I/O monitor interval in secs /
465
466	#define IO_TELEMETRY_DEFAULT_LIMIT (10ll * 1024ll * 1024ll)
467	int64_t io_telemetry_limit; / Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) /
468	int64_t global_logical_writes_count = `0`; / Global count for logical writes /
469	int64_t global_logical_writes_to_external_count = `0`; / Global count for logical writes to external storage/
470	static boolean_t global_update_logical_writes(int64_t, int64_t*);
471
472	#if DEBUG \|\| DEVELOPMENT
473	static diagthreshold_check_return task_check_memorythreshold_is_valid(task_t task, uint64_t new_limit, bool is_diagnostics_value);
474	#endif
475	#define TASK_MAX_THREAD_LIMIT 256
476
477	#if MACH_ASSERT
478	int pmap_ledgers_panic = `1`;
479	int pmap_ledgers_panic_leeway = `3`;
480	#endif /* MACH_ASSERT */
481
482	int task_max = CONFIG_TASK_MAX; / Max number of tasks /
483
484	#if CONFIG_COREDUMP
485	int hwm_user_cores = `0`; / high watermark violations generate user core files /
486	#endif
487
488	#ifdef MACH_BSD
489	extern uint32_t proc_platform(const struct proc *);
490	extern uint32_t proc_sdk(struct proc *);
491	extern void proc_getexecutableuuid(void , unsigned* char , unsigned* long);
492	extern int proc_pid(struct proc *p);
493	extern int proc_selfpid(void);
494	extern struct proc current_proc(void*);
495	extern char proc_name_address(struct* proc *p);
496	extern uint64_t get_dispatchqueue_offset_from_proc(void *);
497	extern int kevent_proc_copy_uptrs(void proc, uint64_t buf, uint32_t bufsize);
498	extern void workq_proc_suspended(struct proc *p);
499	extern void workq_proc_resumed(struct proc *p);
500	extern struct proc *kernproc;
501
502	#if CONFIG_MEMORYSTATUS
503	extern void proc_memstat_skip(struct proc* p, boolean_t set);
504	extern void memorystatus_on_ledger_footprint_exceeded(int warning, bool memlimit_is_active, bool memlimit_is_fatal);
505	extern void memorystatus_log_exception(const int max_footprint_mb, bool memlimit_is_active, bool memlimit_is_fatal);
506	extern void memorystatus_log_diag_threshold_exception(const int diag_threshold_value);
507	extern boolean_t memorystatus_allowed_vm_map_fork(task_t task, bool *is_large);
508	extern uint64_t memorystatus_available_memory_internal(struct proc *p);
509
510	#if DEVELOPMENT \|\| DEBUG
511	extern void memorystatus_abort_vm_map_fork(task_t);
512	#endif
513
514	#endif /* CONFIG_MEMORYSTATUS */
515
516	#endif /* MACH_BSD */
517
518	/ Boot-arg that turns on fatal pac exception delivery for all first-party apps /
519	static TUNABLE(bool, enable_pac_exception, "enable_pac_exception", false);
520
521	/*
522	* Defaults for controllable EXC_GUARD behaviors
523	*
524	* Internal builds are fatal by default (except BRIDGE).
525	* Create an alternate set of defaults for special processes by name.
526	*/
527	struct task_exc_guard_named_default {
528	char *name;
529	uint32_t behavior;
530	};
531	#define _TASK_EXC_GUARD_MP_CORPSE (TASK_EXC_GUARD_MP_DELIVER \| TASK_EXC_GUARD_MP_CORPSE)
532	#define _TASK_EXC_GUARD_MP_ONCE (_TASK_EXC_GUARD_MP_CORPSE \| TASK_EXC_GUARD_MP_ONCE)
533	#define _TASK_EXC_GUARD_MP_FATAL (TASK_EXC_GUARD_MP_DELIVER \| TASK_EXC_GUARD_MP_FATAL)
534
535	#define _TASK_EXC_GUARD_VM_CORPSE (TASK_EXC_GUARD_VM_DELIVER \| TASK_EXC_GUARD_VM_ONCE)
536	#define _TASK_EXC_GUARD_VM_ONCE (_TASK_EXC_GUARD_VM_CORPSE \| TASK_EXC_GUARD_VM_ONCE)
537	#define _TASK_EXC_GUARD_VM_FATAL (TASK_EXC_GUARD_VM_DELIVER \| TASK_EXC_GUARD_VM_FATAL)
538
539	#define _TASK_EXC_GUARD_ALL_CORPSE (_TASK_EXC_GUARD_MP_CORPSE \| _TASK_EXC_GUARD_VM_CORPSE)
540	#define _TASK_EXC_GUARD_ALL_ONCE (_TASK_EXC_GUARD_MP_ONCE \| _TASK_EXC_GUARD_VM_ONCE)
541	#define _TASK_EXC_GUARD_ALL_FATAL (_TASK_EXC_GUARD_MP_FATAL \| _TASK_EXC_GUARD_VM_FATAL)
542
543	/ cannot turn off FATAL and DELIVER bit if set /
544	uint32_t task_exc_guard_no_unset_mask = TASK_EXC_GUARD_MP_FATAL \| TASK_EXC_GUARD_VM_FATAL \|
545	TASK_EXC_GUARD_MP_DELIVER \| TASK_EXC_GUARD_VM_DELIVER;
546	/ cannot turn on ONCE bit if unset /
547	uint32_t task_exc_guard_no_set_mask = TASK_EXC_GUARD_MP_ONCE \| TASK_EXC_GUARD_VM_ONCE;
548
549	#if !defined(XNU_TARGET_OS_BRIDGE)
550
551	uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_FATAL;
552	uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL \| TASK_EXC_GUARD_VM_ALL;
553	/*
554	* These "by-process-name" default overrides are intended to be a short-term fix to
555	* quickly get over races between changes introducing new EXC_GUARD raising behaviors
556	* in some process and a change in default behavior for same. We should ship with
557	* these lists empty (by fixing the bugs, or explicitly changing the task's EXC_GUARD
558	* exception behavior via task_set_exc_guard_behavior()).
559	*
560	* XXX Remember to add/remove TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS back to
561	* task_exc_guard_default when transitioning this list between empty and
562	* non-empty.
563	*/
564	static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
565
566	#else /* !defined(XNU_TARGET_OS_BRIDGE) */
567
568	uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_ONCE;
569	uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL \| TASK_EXC_GUARD_VM_ALL;
570	static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
571
572	#endif /* !defined(XNU_TARGET_OS_BRIDGE) */
573
574	/ Forwards /
575
576	static void task_hold_locked(task_t task);
577	static void task_wait_locked(task_t task, boolean_t until_not_runnable);
578	static void task_release_locked(task_t task);
579	extern task_t proc_get_task_raw(void *proc);
580	extern void task_ref_hold_proc_task_struct(task_t task);
581	extern void task_release_proc_task_struct(task_t task);
582
583	static void task_synchronizer_destroy_all(task_t task);
584	static os_ref_count_t
585	task_add_turnstile_watchports_locked(
586	task_t task,
587	struct task_watchports *watchports,
588	struct task_watchport_elem **previous_elem_array,
589	ipc_port_t *portwatch_ports,
590	uint32_t portwatch_count);
591
592	static os_ref_count_t
593	task_remove_turnstile_watchports_locked(
594	task_t task,
595	struct task_watchports *watchports,
596	ipc_port_t *port_freelist);
597
598	static struct task_watchports *
599	task_watchports_alloc_init(
600	task_t task,
601	thread_t thread,
602	uint32_t count);
603
604	static void
605	task_watchports_deallocate(
606	struct task_watchports *watchports);
607
608	__attribute__((always_inline)) inline void
609	task_lock(task_t task)
610	{
611	lck_mtx_lock(lck: &(task)->lock);
612	}
613
614	__attribute__((always_inline)) inline void
615	task_unlock(task_t task)
616	{
617	lck_mtx_unlock(lck: &(task)->lock);
618	}
619
620	void
621	task_set_64bit(
622	task_t task,
623	boolean_t is_64bit,
624	boolean_t is_64bit_data)
625	{
626	#if defined(__i386__) \|\| defined(__x86_64__) \|\| defined(__arm64__)
627	thread_t thread;
628	#endif /* defined(__i386__) \|\| defined(__x86_64__) \|\| defined(__arm64__) */
629
630	task_lock(task);
631
632	/*
633	* Switching to/from 64-bit address spaces
634	*/
635	if (is_64bit) {
636	if (!task_has_64Bit_addr(task)) {
637	task_set_64Bit_addr(task);
638	}
639	} else {
640	if (task_has_64Bit_addr(task)) {
641	task_clear_64Bit_addr(task);
642	}
643	}
644
645	/*
646	* Switching to/from 64-bit register state.
647	*/
648	if (is_64bit_data) {
649	if (task_has_64Bit_data(task)) {
650	goto out;
651	}
652
653	task_set_64Bit_data(task);
654	} else {
655	if (!task_has_64Bit_data(task)) {
656	goto out;
657	}
658
659	task_clear_64Bit_data(task);
660	}
661
662	/ FIXME: On x86, the thread save state flavor can diverge from the*
663	* task's 64-bit feature flag due to the 32-bit/64-bit register save
664	* state dichotomy. Since we can be pre-empted in this interval,
665	* certain routines may observe the thread as being in an inconsistent
666	* state with respect to its task's 64-bitness.
667	*/
668
669	#if defined(__x86_64__) \|\| defined(__arm64__)
670	queue_iterate(&task->threads, thread, thread_t, task_threads) {
671	thread_mtx_lock(thread);
672	machine_thread_switch_addrmode(thread);
673	thread_mtx_unlock(thread);
674	}
675	#endif /* defined(__x86_64__) \|\| defined(__arm64__) */
676
677	out:
678	task_unlock(task);
679	}
680
681	bool
682	task_get_64bit_addr(task_t task)
683	{
684	return task_has_64Bit_addr(task);
685	}
686
687	bool
688	task_get_64bit_data(task_t task)
689	{
690	return task_has_64Bit_data(task);
691	}
692
693	void
694	task_set_platform_binary(
695	task_t task,
696	boolean_t is_platform)
697	{
698	if (is_platform) {
699	task_ro_flags_set(task, TFRO_PLATFORM);
700	} else {
701	task_ro_flags_clear(task, TFRO_PLATFORM);
702	}
703	}
704
705	#if XNU_TARGET_OS_OSX
706	#if DEVELOPMENT \|\| DEBUG
707	SECURITY_READ_ONLY_LATE(bool) AMFI_bootarg_disable_mach_hardening = false;
708	#endif /* DEVELOPMENT \|\| DEBUG */
709
710	void
711	task_disable_mach_hardening(task_t task)
712	{
713	task_ro_flags_set(task, TFRO_MACH_HARDENING_OPT_OUT);
714	}
715
716	bool
717	task_opted_out_mach_hardening(task_t task)
718	{
719	return task_ro_flags_get(task) & TFRO_MACH_HARDENING_OPT_OUT;
720	}
721	#endif /* XNU_TARGET_OS_OSX */
722
723	/*
724	* Use the `task_is_hardened_binary` macro below
725	* when applying new security policies.
726	*
727	* Kernel security policies now generally apply to
728	* "hardened binaries" - which are platform binaries, and
729	* third party binaries who adopt hardened runtime on ios.
730	*/
731	boolean_t
732	task_get_platform_binary(task_t task)
733	{
734	return (task_ro_flags_get(task) & TFRO_PLATFORM) != `0`;
735	}
736
737	static boolean_t
738	task_get_hardened_runtime(task_t task)
739	{
740	return (task_ro_flags_get(task) & TFRO_HARDENED) != `0`;
741	}
742
743	boolean_t
744	task_is_hardened_binary(task_t task)
745	{
746	return task_get_platform_binary(task) \|\|
747	task_get_hardened_runtime(task);
748	}
749
750	void
751	task_set_hardened_runtime(
752	task_t task,
753	bool is_hardened)
754	{
755	if (is_hardened) {
756	task_ro_flags_set(task, TFRO_HARDENED);
757	} else {
758	task_ro_flags_clear(task, TFRO_HARDENED);
759	}
760	}
761
762	boolean_t
763	task_is_a_corpse(task_t task)
764	{
765	return (task_ro_flags_get(task) & TFRO_CORPSE) != `0`;
766	}
767
768	boolean_t
769	task_is_ipc_active(task_t task)
770	{
771	return task->ipc_active;
772	}
773
774	void
775	task_set_corpse(task_t task)
776	{
777	return task_ro_flags_set(task, TFRO_CORPSE);
778	}
779
780	void
781	task_set_immovable_pinned(task_t task)
782	{
783	ipc_task_set_immovable_pinned(task);
784	}
785
786	/*
787	* Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
788	* Returns "false" if flag is already set, and "true" in other cases.
789	*/
790	bool
791	task_set_ca_client_wi(
792	task_t task,
793	boolean_t set_or_clear)
794	{
795	bool ret = true;
796	task_lock(task);
797	if (set_or_clear) {
798	/ Tasks can have only one CA_CLIENT work interval /
799	if (task->t_flags & TF_CA_CLIENT_WI) {
800	ret = false;
801	} else {
802	task->t_flags \|= TF_CA_CLIENT_WI;
803	}
804	} else {
805	task->t_flags &= ~TF_CA_CLIENT_WI;
806	}
807	task_unlock(task);
808	return ret;
809	}
810
811	/*
812	* task_set_dyld_info() is called at most three times.
813	* 1) at task struct creation to set addr/size to zero.
814	* 2) in mach_loader.c to set location of __all_image_info section in loaded dyld
815	* 3) is from dyld itself to update location of all_image_info
816	* For security any calls after that are ignored. The TF_DYLD_ALL_IMAGE_SET bit is used to determine state.
817	*/
818	kern_return_t
819	task_set_dyld_info(
820	task_t task,
821	mach_vm_address_t addr,
822	mach_vm_size_t size)
823	{
824	mach_vm_address_t end;
825	if (os_add_overflow(addr, size, &end)) {
826	return KERN_FAILURE;
827	}
828
829	task_lock(task);
830	/ don't accept updates if all_image_info_addr is final /
831	if ((task->t_flags & TF_DYLD_ALL_IMAGE_FINAL) == `0`) {
832	bool inputNonZero = ((addr != `0`) \|\| (size != `0`));
833	bool currentNonZero = ((task->all_image_info_addr != `0`) \|\| (task->all_image_info_size != `0`));
834	task->all_image_info_addr = addr;
835	task->all_image_info_size = size;
836	/ can only change from a non-zero value to another non-zero once /
837	if (inputNonZero && currentNonZero) {
838	task->t_flags \|= TF_DYLD_ALL_IMAGE_FINAL;
839	}
840	task_unlock(task);
841	return KERN_SUCCESS;
842	} else {
843	task_unlock(task);
844	return KERN_FAILURE;
845	}
846	}
847
848	bool
849	task_donates_own_pages(
850	task_t task)
851	{
852	return task->donates_own_pages;
853	}
854
855	void
856	task_set_mach_header_address(
857	task_t task,
858	mach_vm_address_t addr)
859	{
860	task_lock(task);
861	task->mach_header_vm_address = addr;
862	task_unlock(task);
863	}
864
865	void
866	task_bank_reset(__unused task_t task)
867	{
868	if (task->bank_context != NULL) {
869	bank_task_destroy(task);
870	}
871	}
872
873	/*
874	* NOTE: This should only be called when the P_LINTRANSIT
875	* flag is set (the proc_trans lock is held) on the
876	* proc associated with the task.
877	*/
878	void
879	task_bank_init(__unused task_t task)
880	{
881	if (task->bank_context != NULL) {
882	panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
883	}
884	bank_task_initialize(task);
885	}
886
887	void
888	task_set_did_exec_flag(task_t task)
889	{
890	task->t_procflags \|= TPF_DID_EXEC;
891	}
892
893	void
894	task_clear_exec_copy_flag(task_t task)
895	{
896	task->t_procflags &= ~TPF_EXEC_COPY;
897	}
898
899	event_t
900	task_get_return_wait_event(task_t task)
901	{
902	return (event_t)&task->returnwait_inheritor;
903	}
904
905	void
906	task_clear_return_wait(task_t task, uint32_t flags)
907	{
908	if (flags & TCRW_CLEAR_INITIAL_WAIT) {
909	thread_wakeup(task_get_return_wait_event(task));
910	}
911
912	if (flags & TCRW_CLEAR_FINAL_WAIT) {
913	is_write_lock(task->itk_space);
914
915	task->t_returnwaitflags &= ~TRW_LRETURNWAIT;
916	task->returnwait_inheritor = NULL;
917
918	if (flags & TCRW_CLEAR_EXEC_COMPLETE) {
919	task->t_returnwaitflags &= ~TRW_LEXEC_COMPLETE;
920	}
921
922	if (task->t_returnwaitflags & TRW_LRETURNWAITER) {
923	struct turnstile *turnstile = turnstile_prepare_hash(proprietor: (uintptr_t) task_get_return_wait_event(task),
924	type: TURNSTILE_ULOCK);
925
926	waitq_wakeup64_all(waitq: &turnstile->ts_waitq,
927	CAST_EVENT64_T(task_get_return_wait_event(task)),
928	THREAD_AWAKENED, flags: WAITQ_UPDATE_INHERITOR);
929
930	turnstile_update_inheritor_complete(turnstile, flags: TURNSTILE_INTERLOCK_HELD);
931
932	turnstile_complete_hash(proprietor: (uintptr_t) task_get_return_wait_event(task), type: TURNSTILE_ULOCK);
933	turnstile_cleanup();
934	task->t_returnwaitflags &= ~TRW_LRETURNWAITER;
935	}
936	is_write_unlock(task->itk_space);
937	}
938	}
939
940	void __attribute__((noreturn))
941	task_wait_to_return(void)
942	{
943	task_t task = current_task();
944	uint8_t returnwaitflags;
945
946	is_write_lock(task->itk_space);
947
948	if (task->t_returnwaitflags & TRW_LRETURNWAIT) {
949	struct turnstile *turnstile = turnstile_prepare_hash(proprietor: (uintptr_t) task_get_return_wait_event(task),
950	type: TURNSTILE_ULOCK);
951
952	do {
953	task->t_returnwaitflags \|= TRW_LRETURNWAITER;
954	turnstile_update_inheritor(turnstile, new_inheritor: task->returnwait_inheritor,
955	flags: (TURNSTILE_DELAYED_UPDATE \| TURNSTILE_INHERITOR_THREAD));
956
957	waitq_assert_wait64(waitq: &turnstile->ts_waitq,
958	CAST_EVENT64_T(task_get_return_wait_event(task)),
959	THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
960
961	is_write_unlock(task->itk_space);
962
963	turnstile_update_inheritor_complete(turnstile, flags: TURNSTILE_INTERLOCK_NOT_HELD);
964
965	thread_block(THREAD_CONTINUE_NULL);
966
967	is_write_lock(task->itk_space);
968	} while (task->t_returnwaitflags & TRW_LRETURNWAIT);
969
970	turnstile_complete_hash(proprietor: (uintptr_t) task_get_return_wait_event(task), type: TURNSTILE_ULOCK);
971	}
972
973	returnwaitflags = task->t_returnwaitflags;
974	is_write_unlock(task->itk_space);
975	turnstile_cleanup();
976
977
978	#if CONFIG_MACF
979	/*
980	* Before jumping to userspace and allowing this process
981	* to execute any code, make sure its credentials are cached,
982	* and notify any interested parties.
983	*/
984	extern void current_cached_proc_cred_update(void);
985
986	current_cached_proc_cred_update();
987	if (returnwaitflags & TRW_LEXEC_COMPLETE) {
988	mac_proc_notify_exec_complete(proc: current_proc());
989	}
990	#endif
991
992	thread_bootstrap_return();
993	}
994
995	boolean_t
996	task_is_exec_copy(task_t task)
997	{
998	return task_is_exec_copy_internal(task);
999	}
1000
1001	boolean_t
1002	task_did_exec(task_t task)
1003	{
1004	return task_did_exec_internal(task);
1005	}
1006
1007	boolean_t
1008	task_is_active(task_t task)
1009	{
1010	return task->active;
1011	}
1012
1013	boolean_t
1014	task_is_halting(task_t task)
1015	{
1016	return task->halting;
1017	}
1018
1019	void
1020	task_init(void)
1021	{
1022	if (max_task_footprint_mb != `0`) {
1023	#if CONFIG_MEMORYSTATUS
1024	if (max_task_footprint_mb < `50`) {
1025	printf(format: "Warning: max_task_pmem %d below minimum.\n",
1026	max_task_footprint_mb);
1027	max_task_footprint_mb = `50`;
1028	}
1029	printf(format: "Limiting task physical memory footprint to %d MB\n",
1030	max_task_footprint_mb);
1031
1032	max_task_footprint = (ledger_amount_t)max_task_footprint_mb * `1024` * `1024`; // Convert MB to bytes
1033
1034	/*
1035	* Configure the per-task memory limit warning level.
1036	* This is computed as a percentage.
1037	*/
1038	max_task_footprint_warning_level = `0`;
1039
1040	if (max_mem < `0x40000000`) {
1041	/*
1042	* On devices with < 1GB of memory:
1043	* -- set warnings to 50MB below the per-task limit.
1044	*/
1045	if (max_task_footprint_mb > `50`) {
1046	max_task_footprint_warning_level = ((max_task_footprint_mb - `50`) * `100`) / max_task_footprint_mb;
1047	}
1048	} else {
1049	/*
1050	* On devices with >= 1GB of memory:
1051	* -- set warnings to 100MB below the per-task limit.
1052	*/
1053	if (max_task_footprint_mb > `100`) {
1054	max_task_footprint_warning_level = ((max_task_footprint_mb - `100`) * `100`) / max_task_footprint_mb;
1055	}
1056	}
1057
1058	/*
1059	* Never allow warning level to land below the default.
1060	*/
1061	if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
1062	max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
1063	}
1064
1065	printf(format: "Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
1066
1067	#else
1068	printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
1069	#endif /* CONFIG_MEMORYSTATUS */
1070	}
1071
1072	#if DEVELOPMENT \|\| DEBUG
1073	PE_parse_boot_argn("task_exc_guard_default",
1074	&task_exc_guard_default,
1075	sizeof(task_exc_guard_default));
1076	#endif /* DEVELOPMENT \|\| DEBUG */
1077
1078	#if CONFIG_COREDUMP
1079	if (!PE_parse_boot_argn(arg_string: "hwm_user_cores", arg_ptr: &hwm_user_cores,
1080	max_arg: sizeof(hwm_user_cores))) {
1081	hwm_user_cores = `0`;
1082	}
1083	#endif
1084
1085	proc_init_cpumon_params();
1086
1087	if (!PE_parse_boot_argn(arg_string: "task_wakeups_monitor_rate", arg_ptr: &task_wakeups_monitor_rate, max_arg: sizeof(task_wakeups_monitor_rate))) {
1088	task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
1089	}
1090
1091	if (!PE_parse_boot_argn(arg_string: "task_wakeups_monitor_interval", arg_ptr: &task_wakeups_monitor_interval, max_arg: sizeof(task_wakeups_monitor_interval))) {
1092	task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
1093	}
1094
1095	if (!PE_parse_boot_argn(arg_string: "task_wakeups_monitor_ustackshots_trigger_pct", arg_ptr: &task_wakeups_monitor_ustackshots_trigger_pct,
1096	max_arg: sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
1097	task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
1098	}
1099
1100	if (!PE_parse_boot_argn(arg_string: "task_iomon_limit_mb", arg_ptr: &task_iomon_limit_mb, max_arg: sizeof(task_iomon_limit_mb))) {
1101	task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
1102	}
1103
1104	if (!PE_parse_boot_argn(arg_string: "task_iomon_interval_secs", arg_ptr: &task_iomon_interval_secs, max_arg: sizeof(task_iomon_interval_secs))) {
1105	task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
1106	}
1107
1108	if (!PE_parse_boot_argn(arg_string: "io_telemetry_limit", arg_ptr: &io_telemetry_limit, max_arg: sizeof(io_telemetry_limit))) {
1109	io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
1110	}
1111
1112	/*
1113	* If we have coalitions, coalition_init() will call init_task_ledgers() as it
1114	* sets up the ledgers for the default coalition. If we don't have coalitions,
1115	* then we have to call it now.
1116	*/
1117	#if CONFIG_COALITIONS
1118	assert(task_ledger_template);
1119	#else /* CONFIG_COALITIONS */
1120	init_task_ledgers();
1121	#endif /* CONFIG_COALITIONS */
1122
1123	task_ref_init();
1124	task_zone_init();
1125
1126	#ifdef __LP64__
1127	boolean_t is_64bit = TRUE;
1128	#else
1129	boolean_t is_64bit = FALSE;
1130	#endif
1131
1132	kernproc = (struct proc *)zalloc_flags(proc_task_zone, Z_WAITOK \| Z_ZERO);
1133	kernel_task = proc_get_task_raw(proc: kernproc);
1134
1135	/*
1136	* Create the kernel task as the first task.
1137	*/
1138	if (task_create_internal(TASK_NULL, NULL, NULL, FALSE, is_64bit,
1139	is_64bit_data: is_64bit, TF_NONE, TF_NONE, TPF_NONE, TWF_NONE, child_task: kernel_task) != KERN_SUCCESS) {
1140	panic("task_init");
1141	}
1142
1143	ipc_task_enable(task: kernel_task);
1144
1145	#if defined(HAS_APPLE_PAC)
1146	kernel_task->rop_pid = ml_default_rop_pid();
1147	kernel_task->jop_pid = ml_default_jop_pid();
1148	// kernel_task never runs at EL0, but machine_thread_state_convert_from/to_user() relies on
1149	// disable_user_jop to be false for kernel threads (e.g. in exception delivery on thread_exception_daemon)
1150	ml_task_set_disable_user_jop(task: kernel_task, FALSE);
1151	#endif
1152
1153	vm_map_deallocate(map: kernel_task->map);
1154	kernel_task->map = kernel_map;
1155	}
1156
1157	static inline void
1158	task_zone_init(void)
1159	{
1160	proc_struct_size = roundup(proc_struct_size, task_alignment);
1161	task_struct_size = roundup(sizeof(struct task), proc_alignment);
1162	proc_and_task_size = proc_struct_size + task_struct_size;
1163
1164	proc_task_zone = zone_create_ext(name: "proc_task", size: proc_and_task_size,
1165	flags: ZC_ZFREE_CLEARMEM \| ZC_SEQUESTER, desired_zid: ZONE_ID_PROC_TASK, NULL); / sequester is needed for proc_rele() /
1166	}
1167
1168	/*
1169	* Task ledgers
1170	* ------------
1171	*
1172	* phys_footprint
1173	* Physical footprint: This is the sum of:
1174	* + (internal - alternate_accounting)
1175	* + (internal_compressed - alternate_accounting_compressed)
1176	* + iokit_mapped
1177	* + purgeable_nonvolatile
1178	* + purgeable_nonvolatile_compressed
1179	* + page_table
1180	*
1181	* internal
1182	* The task's anonymous memory, which on iOS is always resident.
1183	*
1184	* internal_compressed
1185	* Amount of this task's internal memory which is held by the compressor.
1186	* Such memory is no longer actually resident for the task [i.e., resident in its pmap],
1187	* and could be either decompressed back into memory, or paged out to storage, depending
1188	* on our implementation.
1189	*
1190	* iokit_mapped
1191	* IOKit mappings: The total size of all IOKit mappings in this task, regardless of
1192	* clean/dirty or internal/external state].
1193	*
1194	* alternate_accounting
1195	* The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
1196	* are counted in both internal and iokit_mapped, so we must subtract them from the total to avoid
1197	* double counting.
1198	*
1199	* pages_grabbed
1200	* pages_grabbed counts all page grabs in a task. It is also broken out into three subtypes
1201	* which track UPL, IOPL and Kernel page grabs.
1202	*/
1203	void
1204	init_task_ledgers(void)
1205	{
1206	ledger_template_t t;
1207
1208	assert(task_ledger_template == NULL);
1209	assert(kernel_task == TASK_NULL);
1210
1211	#if MACH_ASSERT
1212	PE_parse_boot_argn("pmap_ledgers_panic",
1213	&pmap_ledgers_panic,
1214	sizeof(pmap_ledgers_panic));
1215	PE_parse_boot_argn("pmap_ledgers_panic_leeway",
1216	&pmap_ledgers_panic_leeway,
1217	sizeof(pmap_ledgers_panic_leeway));
1218	#endif /* MACH_ASSERT */
1219
1220	if ((t = ledger_template_create(name: "Per-task ledger")) == NULL) {
1221	panic("couldn't create task ledger template");
1222	}
1223
1224	task_ledgers.cpu_time = ledger_entry_add(template: t, key: "cpu_time", group: "sched", units: "ns");
1225	task_ledgers.tkm_private = ledger_entry_add(template: t, key: "tkm_private",
1226	group: "physmem", units: "bytes");
1227	task_ledgers.tkm_shared = ledger_entry_add(template: t, key: "tkm_shared", group: "physmem",
1228	units: "bytes");
1229	task_ledgers.phys_mem = ledger_entry_add(template: t, key: "phys_mem", group: "physmem",
1230	units: "bytes");
1231	task_ledgers.wired_mem = ledger_entry_add(template: t, key: "wired_mem", group: "physmem",
1232	units: "bytes");
1233	task_ledgers.conclave_mem = ledger_entry_add_with_flags(template: t, key: "conclave_mem", group: "physmem", units: "count",
1234	flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE \| LEDGER_ENTRY_ALLOW_DEBIT);
1235	task_ledgers.internal = ledger_entry_add(template: t, key: "internal", group: "physmem",
1236	units: "bytes");
1237	task_ledgers.iokit_mapped = ledger_entry_add_with_flags(template: t, key: "iokit_mapped", group: "mappings",
1238	units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1239	task_ledgers.alternate_accounting = ledger_entry_add_with_flags(template: t, key: "alternate_accounting", group: "physmem",
1240	units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1241	task_ledgers.alternate_accounting_compressed = ledger_entry_add_with_flags(template: t, key: "alternate_accounting_compressed", group: "physmem",
1242	units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1243	task_ledgers.page_table = ledger_entry_add_with_flags(template: t, key: "page_table", group: "physmem",
1244	units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1245	task_ledgers.phys_footprint = ledger_entry_add(template: t, key: "phys_footprint", group: "physmem",
1246	units: "bytes");
1247	task_ledgers.internal_compressed = ledger_entry_add(template: t, key: "internal_compressed", group: "physmem",
1248	units: "bytes");
1249	task_ledgers.reusable = ledger_entry_add(template: t, key: "reusable", group: "physmem", units: "bytes");
1250	task_ledgers.external = ledger_entry_add(template: t, key: "external", group: "physmem", units: "bytes");
1251	task_ledgers.purgeable_volatile = ledger_entry_add_with_flags(template: t, key: "purgeable_volatile", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1252	task_ledgers.purgeable_nonvolatile = ledger_entry_add_with_flags(template: t, key: "purgeable_nonvolatile", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1253	task_ledgers.purgeable_volatile_compressed = ledger_entry_add_with_flags(template: t, key: "purgeable_volatile_compress", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1254	task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add_with_flags(template: t, key: "purgeable_nonvolatile_compress", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1255	#if DEBUG \|\| DEVELOPMENT
1256	task_ledgers.pages_grabbed = ledger_entry_add_with_flags(t, "pages_grabbed", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1257	task_ledgers.pages_grabbed_kern = ledger_entry_add_with_flags(t, "pages_grabbed_kern", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1258	task_ledgers.pages_grabbed_iopl = ledger_entry_add_with_flags(t, "pages_grabbed_iopl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1259	task_ledgers.pages_grabbed_upl = ledger_entry_add_with_flags(t, "pages_grabbed_upl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1260	#endif
1261	task_ledgers.tagged_nofootprint = ledger_entry_add_with_flags(template: t, key: "tagged_nofootprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1262	task_ledgers.tagged_footprint = ledger_entry_add_with_flags(template: t, key: "tagged_footprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1263	task_ledgers.tagged_nofootprint_compressed = ledger_entry_add_with_flags(template: t, key: "tagged_nofootprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1264	task_ledgers.tagged_footprint_compressed = ledger_entry_add_with_flags(template: t, key: "tagged_footprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1265	task_ledgers.network_volatile = ledger_entry_add_with_flags(template: t, key: "network_volatile", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1266	task_ledgers.network_nonvolatile = ledger_entry_add_with_flags(template: t, key: "network_nonvolatile", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1267	task_ledgers.network_volatile_compressed = ledger_entry_add_with_flags(template: t, key: "network_volatile_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1268	task_ledgers.network_nonvolatile_compressed = ledger_entry_add_with_flags(template: t, key: "network_nonvolatile_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1269	task_ledgers.media_nofootprint = ledger_entry_add_with_flags(template: t, key: "media_nofootprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1270	task_ledgers.media_footprint = ledger_entry_add_with_flags(template: t, key: "media_footprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1271	task_ledgers.media_nofootprint_compressed = ledger_entry_add_with_flags(template: t, key: "media_nofootprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1272	task_ledgers.media_footprint_compressed = ledger_entry_add_with_flags(template: t, key: "media_footprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1273	task_ledgers.graphics_nofootprint = ledger_entry_add_with_flags(template: t, key: "graphics_nofootprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1274	task_ledgers.graphics_footprint = ledger_entry_add_with_flags(template: t, key: "graphics_footprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1275	task_ledgers.graphics_nofootprint_compressed = ledger_entry_add_with_flags(template: t, key: "graphics_nofootprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1276	task_ledgers.graphics_footprint_compressed = ledger_entry_add_with_flags(template: t, key: "graphics_footprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1277	task_ledgers.neural_nofootprint = ledger_entry_add_with_flags(template: t, key: "neural_nofootprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1278	task_ledgers.neural_footprint = ledger_entry_add_with_flags(template: t, key: "neural_footprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1279	task_ledgers.neural_nofootprint_compressed = ledger_entry_add_with_flags(template: t, key: "neural_nofootprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1280	task_ledgers.neural_footprint_compressed = ledger_entry_add_with_flags(template: t, key: "neural_footprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1281
1282	#if CONFIG_FREEZE
1283	task_ledgers.frozen_to_swap = ledger_entry_add(t, "frozen_to_swap", "physmem", "bytes");
1284	#endif /* CONFIG_FREEZE */
1285
1286	task_ledgers.platform_idle_wakeups = ledger_entry_add(template: t, key: "platform_idle_wakeups", group: "power",
1287	units: "count");
1288	task_ledgers.interrupt_wakeups = ledger_entry_add(template: t, key: "interrupt_wakeups", group: "power",
1289	units: "count");
1290
1291	#if CONFIG_SCHED_SFI
1292	sfi_class_id_t class_id, ledger_alias;
1293	for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1294	task_ledgers.sfi_wait_times[class_id] = -`1`;
1295	}
1296
1297	/ don't account for UNSPECIFIED /
1298	for (class_id = SFI_CLASS_UNSPECIFIED + `1`; class_id < MAX_SFI_CLASS_ID; class_id++) {
1299	ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1300	if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1301	/ Check to see if alias has been registered yet /
1302	if (task_ledgers.sfi_wait_times[ledger_alias] != -`1`) {
1303	task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1304	} else {
1305	/ Otherwise, initialize it first /
1306	task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(template: t, class_id: ledger_alias);
1307	}
1308	} else {
1309	task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(template: t, class_id);
1310	}
1311
1312	if (task_ledgers.sfi_wait_times[class_id] < `0`) {
1313	panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1314	}
1315	}
1316
1317	assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - `1`] != -`1`);
1318	#endif /* CONFIG_SCHED_SFI */
1319
1320	task_ledgers.cpu_time_billed_to_me = ledger_entry_add(template: t, key: "cpu_time_billed_to_me", group: "sched", units: "ns");
1321	task_ledgers.cpu_time_billed_to_others = ledger_entry_add(template: t, key: "cpu_time_billed_to_others", group: "sched", units: "ns");
1322	task_ledgers.physical_writes = ledger_entry_add(template: t, key: "physical_writes", group: "res", units: "bytes");
1323	task_ledgers.logical_writes = ledger_entry_add(template: t, key: "logical_writes", group: "res", units: "bytes");
1324	task_ledgers.logical_writes_to_external = ledger_entry_add(template: t, key: "logical_writes_to_external", group: "res", units: "bytes");
1325	#if CONFIG_PHYS_WRITE_ACCT
1326	task_ledgers.fs_metadata_writes = ledger_entry_add(template: t, key: "fs_metadata_writes", group: "res", units: "bytes");
1327	#endif /* CONFIG_PHYS_WRITE_ACCT */
1328	task_ledgers.energy_billed_to_me = ledger_entry_add(template: t, key: "energy_billed_to_me", group: "power", units: "nj");
1329	task_ledgers.energy_billed_to_others = ledger_entry_add(template: t, key: "energy_billed_to_others", group: "power", units: "nj");
1330
1331	#if CONFIG_MEMORYSTATUS
1332	task_ledgers.memorystatus_dirty_time = ledger_entry_add(template: t, key: "memorystatus_dirty_time", group: "physmem", units: "ns");
1333	#endif /* CONFIG_MEMORYSTATUS */
1334
1335	task_ledgers.swapins = ledger_entry_add_with_flags(template: t, key: "swapins", group: "physmem", units: "bytes",
1336	flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1337
1338	if ((task_ledgers.cpu_time < `0`) \|\|
1339	(task_ledgers.tkm_private < `0`) \|\|
1340	(task_ledgers.tkm_shared < `0`) \|\|
1341	(task_ledgers.phys_mem < `0`) \|\|
1342	(task_ledgers.wired_mem < `0`) \|\|
1343	(task_ledgers.conclave_mem < `0`) \|\|
1344	(task_ledgers.internal < `0`) \|\|
1345	(task_ledgers.external < `0`) \|\|
1346	(task_ledgers.reusable < `0`) \|\|
1347	(task_ledgers.iokit_mapped < `0`) \|\|
1348	(task_ledgers.alternate_accounting < `0`) \|\|
1349	(task_ledgers.alternate_accounting_compressed < `0`) \|\|
1350	(task_ledgers.page_table < `0`) \|\|
1351	(task_ledgers.phys_footprint < `0`) \|\|
1352	(task_ledgers.internal_compressed < `0`) \|\|
1353	(task_ledgers.purgeable_volatile < `0`) \|\|
1354	(task_ledgers.purgeable_nonvolatile < `0`) \|\|
1355	(task_ledgers.purgeable_volatile_compressed < `0`) \|\|
1356	(task_ledgers.purgeable_nonvolatile_compressed < `0`) \|\|
1357	(task_ledgers.tagged_nofootprint < `0`) \|\|
1358	(task_ledgers.tagged_footprint < `0`) \|\|
1359	(task_ledgers.tagged_nofootprint_compressed < `0`) \|\|
1360	(task_ledgers.tagged_footprint_compressed < `0`) \|\|
1361	#if CONFIG_FREEZE
1362	(task_ledgers.frozen_to_swap < `0`) \|\|
1363	#endif /* CONFIG_FREEZE */
1364	(task_ledgers.network_volatile < `0`) \|\|
1365	(task_ledgers.network_nonvolatile < `0`) \|\|
1366	(task_ledgers.network_volatile_compressed < `0`) \|\|
1367	(task_ledgers.network_nonvolatile_compressed < `0`) \|\|
1368	(task_ledgers.media_nofootprint < `0`) \|\|
1369	(task_ledgers.media_footprint < `0`) \|\|
1370	(task_ledgers.media_nofootprint_compressed < `0`) \|\|
1371	(task_ledgers.media_footprint_compressed < `0`) \|\|
1372	(task_ledgers.graphics_nofootprint < `0`) \|\|
1373	(task_ledgers.graphics_footprint < `0`) \|\|
1374	(task_ledgers.graphics_nofootprint_compressed < `0`) \|\|
1375	(task_ledgers.graphics_footprint_compressed < `0`) \|\|
1376	(task_ledgers.neural_nofootprint < `0`) \|\|
1377	(task_ledgers.neural_footprint < `0`) \|\|
1378	(task_ledgers.neural_nofootprint_compressed < `0`) \|\|
1379	(task_ledgers.neural_footprint_compressed < `0`) \|\|
1380	(task_ledgers.platform_idle_wakeups < `0`) \|\|
1381	(task_ledgers.interrupt_wakeups < `0`) \|\|
1382	(task_ledgers.cpu_time_billed_to_me < `0`) \|\| (task_ledgers.cpu_time_billed_to_others < `0`) \|\|
1383	(task_ledgers.physical_writes < `0`) \|\|
1384	(task_ledgers.logical_writes < `0`) \|\|
1385	(task_ledgers.logical_writes_to_external < `0`) \|\|
1386	#if CONFIG_PHYS_WRITE_ACCT
1387	(task_ledgers.fs_metadata_writes < `0`) \|\|
1388	#endif /* CONFIG_PHYS_WRITE_ACCT */
1389	#if CONFIG_MEMORYSTATUS
1390	(task_ledgers.memorystatus_dirty_time < `0`) \|\|
1391	#endif /* CONFIG_MEMORYSTATUS */
1392	(task_ledgers.energy_billed_to_me < `0`) \|\|
1393	(task_ledgers.energy_billed_to_others < `0`) \|\|
1394	(task_ledgers.swapins < `0`)
1395	) {
1396	panic("couldn't create entries for task ledger template");
1397	}
1398
1399	ledger_track_credit_only(template: t, entry: task_ledgers.phys_footprint);
1400	ledger_track_credit_only(template: t, entry: task_ledgers.internal);
1401	ledger_track_credit_only(template: t, entry: task_ledgers.external);
1402	ledger_track_credit_only(template: t, entry: task_ledgers.reusable);
1403
1404	ledger_track_maximum(template: t, entry: task_ledgers.phys_footprint, period_in_secs: `60`);
1405	ledger_track_maximum(template: t, entry: task_ledgers.phys_mem, period_in_secs: `60`);
1406	ledger_track_maximum(template: t, entry: task_ledgers.internal, period_in_secs: `60`);
1407	ledger_track_maximum(template: t, entry: task_ledgers.internal_compressed, period_in_secs: `60`);
1408	ledger_track_maximum(template: t, entry: task_ledgers.reusable, period_in_secs: `60`);
1409	ledger_track_maximum(template: t, entry: task_ledgers.external, period_in_secs: `60`);
1410	#if MACH_ASSERT
1411	if (pmap_ledgers_panic) {
1412	ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1413	ledger_panic_on_negative(t, task_ledgers.conclave_mem);
1414	ledger_panic_on_negative(t, task_ledgers.page_table);
1415	ledger_panic_on_negative(t, task_ledgers.internal);
1416	ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1417	ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1418	ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1419	ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1420	ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1421	ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1422	ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1423	#if CONFIG_PHYS_WRITE_ACCT
1424	ledger_panic_on_negative(t, task_ledgers.fs_metadata_writes);
1425	#endif /* CONFIG_PHYS_WRITE_ACCT */
1426
1427	ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint);
1428	ledger_panic_on_negative(t, task_ledgers.tagged_footprint);
1429	ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint_compressed);
1430	ledger_panic_on_negative(t, task_ledgers.tagged_footprint_compressed);
1431	ledger_panic_on_negative(t, task_ledgers.network_volatile);
1432	ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1433	ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1434	ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1435	ledger_panic_on_negative(t, task_ledgers.media_nofootprint);
1436	ledger_panic_on_negative(t, task_ledgers.media_footprint);
1437	ledger_panic_on_negative(t, task_ledgers.media_nofootprint_compressed);
1438	ledger_panic_on_negative(t, task_ledgers.media_footprint_compressed);
1439	ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint);
1440	ledger_panic_on_negative(t, task_ledgers.graphics_footprint);
1441	ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint_compressed);
1442	ledger_panic_on_negative(t, task_ledgers.graphics_footprint_compressed);
1443	ledger_panic_on_negative(t, task_ledgers.neural_nofootprint);
1444	ledger_panic_on_negative(t, task_ledgers.neural_footprint);
1445	ledger_panic_on_negative(t, task_ledgers.neural_nofootprint_compressed);
1446	ledger_panic_on_negative(t, task_ledgers.neural_footprint_compressed);
1447	}
1448	#endif /* MACH_ASSERT */
1449
1450	#if CONFIG_MEMORYSTATUS
1451	ledger_set_callback(template: t, entry: task_ledgers.phys_footprint, callback: task_footprint_exceeded, NULL, NULL);
1452	#endif /* CONFIG_MEMORYSTATUS */
1453
1454	ledger_set_callback(template: t, entry: task_ledgers.interrupt_wakeups,
1455	callback: task_wakeups_rate_exceeded, NULL, NULL);
1456	ledger_set_callback(template: t, entry: task_ledgers.physical_writes, callback: task_io_rate_exceeded, param0: (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1457
1458	#if CONFIG_SPTM \|\| !XNU_MONITOR
1459	ledger_template_complete(template: t);
1460	#else /* CONFIG_SPTM \|\| !XNU_MONITOR */
1461	ledger_template_complete_secure_alloc(t);
1462	#endif /* XNU_MONITOR */
1463	task_ledger_template = t;
1464	}
1465
1466	/ Create a task, but leave the task ports disabled /
1467	kern_return_t
1468	task_create_internal(
1469	task_t parent_task, / Null-able /
1470	proc_ro_t proc_ro,
1471	coalition_t *parent_coalitions __unused,
1472	boolean_t inherit_memory,
1473	boolean_t is_64bit,
1474	boolean_t is_64bit_data,
1475	uint32_t t_flags,
1476	uint32_t t_flags_ro,
1477	uint32_t t_procflags,
1478	uint8_t t_returnwaitflags,
1479	task_t child_task)
1480	{
1481	task_t new_task;
1482	vm_shared_region_t shared_region;
1483	ledger_t ledger = NULL;
1484	struct task_ro_data task_ro_data = {};
1485	uint32_t parent_t_flags_ro = `0`;
1486
1487	new_task = child_task;
1488
1489	if (task_ref_count_init(new_task) != KERN_SUCCESS) {
1490	return KERN_RESOURCE_SHORTAGE;
1491	}
1492
1493	/ allocate with active entries /
1494	assert(task_ledger_template != NULL);
1495	ledger = ledger_instantiate(template: task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES);
1496	if (ledger == NULL) {
1497	task_ref_count_fini(new_task);
1498	return KERN_RESOURCE_SHORTAGE;
1499	}
1500
1501	counter_alloc(&(new_task->faults));
1502
1503	#if defined(HAS_APPLE_PAC)
1504	const uint8_t disable_user_jop = inherit_memory ? parent_task->disable_user_jop : FALSE;
1505	ml_task_set_rop_pid(task: new_task, parent_task, inherit: inherit_memory);
1506	ml_task_set_jop_pid(task: new_task, parent_task, inherit: inherit_memory, disable_user_jop);
1507	ml_task_set_disable_user_jop(task: new_task, disable_user_jop);
1508	#endif
1509
1510
1511	new_task->ledger = ledger;
1512
1513	/ if inherit_memory is true, parent_task MUST not be NULL /
1514	if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1515	#if CONFIG_DEFERRED_RECLAIM
1516	if (parent_task->deferred_reclamation_metadata) {
1517	/*
1518	* Prevent concurrent reclaims while we're forking the parent_task's map,
1519	* so that the child's map is in sync with the forked reclamation
1520	* metadata.
1521	*/
1522	vm_deferred_reclamation_buffer_lock(
1523	metadata: parent_task->deferred_reclamation_metadata);
1524	}
1525	#endif /* CONFIG_DEFERRED_RECLAIM */
1526	new_task->map = vm_map_fork(ledger, old_map: parent_task->map, options: `0`);
1527	#if CONFIG_DEFERRED_RECLAIM
1528	if (new_task->map != NULL &&
1529	parent_task->deferred_reclamation_metadata) {
1530	new_task->deferred_reclamation_metadata =
1531	vm_deferred_reclamation_buffer_fork(task: new_task,
1532	parent: parent_task->deferred_reclamation_metadata);
1533	}
1534	#endif /* CONFIG_DEFERRED_RECLAIM */
1535	} else {
1536	unsigned int pmap_flags = is_64bit ? PMAP_CREATE_64BIT : `0`;
1537	pmap_t pmap = pmap_create_options(ledger, size: `0`, flags: pmap_flags);
1538	vm_map_t new_map;
1539
1540	if (pmap == NULL) {
1541	counter_free(&new_task->faults);
1542	ledger_dereference(ledger);
1543	task_ref_count_fini(new_task);
1544	return KERN_RESOURCE_SHORTAGE;
1545	}
1546	new_map = vm_map_create_options(pmap,
1547	min_off: (vm_map_offset_t)(VM_MIN_ADDRESS),
1548	max_off: (vm_map_offset_t)(VM_MAX_ADDRESS),
1549	options: VM_MAP_CREATE_PAGEABLE);
1550	if (parent_task) {
1551	vm_map_inherit_limits(new_map, old_map: parent_task->map);
1552	}
1553	new_task->map = new_map;
1554	}
1555
1556	if (new_task->map == NULL) {
1557	counter_free(&new_task->faults);
1558	ledger_dereference(ledger);
1559	task_ref_count_fini(new_task);
1560	return KERN_RESOURCE_SHORTAGE;
1561	}
1562
1563	#if defined(CONFIG_SCHED_MULTIQ)
1564	new_task->sched_group = sched_group_create();
1565	#endif
1566
1567	lck_mtx_init(lck: &new_task->lock, grp: &task_lck_grp, attr: &task_lck_attr);
1568	queue_init(&new_task->threads);
1569	new_task->suspend_count = `0`;
1570	new_task->thread_count = `0`;
1571	new_task->active_thread_count = `0`;
1572	new_task->user_stop_count = `0`;
1573	new_task->legacy_stop_count = `0`;
1574	new_task->active = TRUE;
1575	new_task->halting = FALSE;
1576	new_task->priv_flags = `0`;
1577	new_task->t_flags = t_flags;
1578	task_ro_data.t_flags_ro = t_flags_ro;
1579	new_task->t_procflags = t_procflags;
1580	new_task->t_returnwaitflags = t_returnwaitflags;
1581	new_task->returnwait_inheritor = current_thread();
1582	new_task->importance = `0`;
1583	new_task->crashed_thread_id = `0`;
1584	new_task->watchports = NULL;
1585	new_task->t_rr_ranges = NULL;
1586
1587	new_task->bank_context = NULL;
1588
1589	if (parent_task) {
1590	parent_t_flags_ro = task_ro_flags_get(task: parent_task);
1591	}
1592
1593	if (parent_task && inherit_memory) {
1594	#if __has_feature(ptrauth_calls)
1595	/ Inherit the pac exception flags from parent if in fork /
1596	task_ro_data.t_flags_ro \|= (parent_t_flags_ro & (TFRO_PAC_ENFORCE_USER_STATE \|
1597	TFRO_PAC_EXC_FATAL));
1598	#endif /* __has_feature(ptrauth_calls) */
1599	/ Inherit the hardened binary flags from parent if in fork /
1600	task_ro_data.t_flags_ro \|= parent_t_flags_ro & (TFRO_HARDENED \| TFRO_PLATFORM \| TFRO_JIT_EXC_FATAL);
1601	#if XNU_TARGET_OS_OSX
1602	task_ro_data.t_flags_ro \|= parent_t_flags_ro & TFRO_MACH_HARDENING_OPT_OUT;
1603	#endif /* XNU_TARGET_OS_OSX */
1604	}
1605
1606	#ifdef MACH_BSD
1607	new_task->corpse_info = NULL;
1608	#endif /* MACH_BSD */
1609
1610	/ kern_task not created by this function has unique id 0, start with 1 here. /
1611	task_set_uniqueid(task: new_task);
1612
1613	#if CONFIG_MACF
1614	set_task_crash_label(task: new_task, NULL);
1615
1616	task_ro_data.task_filters.mach_trap_filter_mask = NULL;
1617	task_ro_data.task_filters.mach_kobj_filter_mask = NULL;
1618	#endif
1619
1620	#if CONFIG_MEMORYSTATUS
1621	if (max_task_footprint != `0`) {
1622	ledger_set_limit(ledger, entry: task_ledgers.phys_footprint, limit: max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1623	}
1624	#endif /* CONFIG_MEMORYSTATUS */
1625
1626	if (task_wakeups_monitor_rate != `0`) {
1627	uint32_t flags = WAKEMON_ENABLE \| WAKEMON_SET_DEFAULTS;
1628	int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
1629	task_wakeups_monitor_ctl(task: new_task, rate_hz: &flags, flags: &rate);
1630	}
1631
1632	#if CONFIG_IO_ACCOUNTING
1633	uint32_t flags = IOMON_ENABLE;
1634	task_io_monitor_ctl(new_task, &flags);
1635	#endif /* CONFIG_IO_ACCOUNTING */
1636
1637	machine_task_init(new_task, parent_task, memory_inherit: inherit_memory);
1638
1639	new_task->task_debug = NULL;
1640
1641	#if DEVELOPMENT \|\| DEBUG
1642	new_task->task_unnested = FALSE;
1643	new_task->task_disconnected_count = `0`;
1644	#endif
1645	queue_init(&new_task->semaphore_list);
1646	new_task->semaphores_owned = `0`;
1647
1648	new_task->vtimers = `0`;
1649
1650	new_task->shared_region = NULL;
1651
1652	new_task->affinity_space = NULL;
1653
1654	#if CONFIG_CPU_COUNTERS
1655	new_task->t_kpc = `0`;
1656	#endif /* CONFIG_CPU_COUNTERS */
1657
1658	new_task->pidsuspended = FALSE;
1659	new_task->frozen = FALSE;
1660	new_task->changing_freeze_state = FALSE;
1661	new_task->rusage_cpu_flags = `0`;
1662	new_task->rusage_cpu_percentage = `0`;
1663	new_task->rusage_cpu_interval = `0`;
1664	new_task->rusage_cpu_deadline = `0`;
1665	new_task->rusage_cpu_callt = NULL;
1666	#if MACH_ASSERT
1667	new_task->suspends_outstanding = `0`;
1668	#endif
1669	recount_task_init(tk: &new_task->tk_recount);
1670
1671	#if HYPERVISOR
1672	new_task->hv_task_target = NULL;
1673	#endif /* HYPERVISOR */
1674
1675	#if CONFIG_TASKWATCH
1676	queue_init(&new_task->task_watchers);
1677	new_task->num_taskwatchers = `0`;
1678	new_task->watchapplying = `0`;
1679	#endif /* CONFIG_TASKWATCH */
1680
1681	new_task->mem_notify_reserved = `0`;
1682	new_task->memlimit_attrs_reserved = `0`;
1683
1684	new_task->requested_policy = default_task_requested_policy;
1685	new_task->effective_policy = default_task_effective_policy;
1686
1687	new_task->task_shared_region_slide = -`1`;
1688
1689	if (parent_task != NULL) {
1690	task_ro_data.task_tokens.sec_token = *task_get_sec_token(task: parent_task);
1691	task_ro_data.task_tokens.audit_token = *task_get_audit_token(task: parent_task);
1692
1693	/ only inherit the option bits, no effect until task_set_immovable_pinned() /
1694	task_ro_data.task_control_port_options = task_get_control_port_options(task: parent_task);
1695
1696	task_ro_data.t_flags_ro \|= parent_t_flags_ro & TFRO_FILTER_MSG;
1697	#if CONFIG_MACF
1698	if (!(t_flags & TF_CORPSE_FORK)) {
1699	task_ro_data.task_filters.mach_trap_filter_mask = task_get_mach_trap_filter_mask(task: parent_task);
1700	task_ro_data.task_filters.mach_kobj_filter_mask = task_get_mach_kobj_filter_mask(task: parent_task);
1701	}
1702	#endif
1703	} else {
1704	task_ro_data.task_tokens.sec_token = KERNEL_SECURITY_TOKEN;
1705	task_ro_data.task_tokens.audit_token = KERNEL_AUDIT_TOKEN;
1706
1707	task_ro_data.task_control_port_options = TASK_CONTROL_PORT_OPTIONS_NONE;
1708	}
1709
1710	/ must set before task_importance_init_from_parent: /
1711	if (proc_ro != NULL) {
1712	new_task->bsd_info_ro = proc_ro_ref_task(pr: proc_ro, t: new_task, t_data: &task_ro_data);
1713	} else {
1714	new_task->bsd_info_ro = proc_ro_alloc(NULL, NULL, t: new_task, t_data: &task_ro_data);
1715	}
1716
1717	ipc_task_init(task: new_task, parent: parent_task);
1718
1719	task_importance_init_from_parent(new_task, parent_task);
1720
1721	new_task->corpse_vmobject_list = NULL;
1722
1723	if (parent_task != TASK_NULL) {
1724	/ inherit the parent's shared region /
1725	shared_region = vm_shared_region_get(task: parent_task);
1726	if (shared_region != NULL) {
1727	vm_shared_region_set(task: new_task, new_shared_region: shared_region);
1728	}
1729
1730	#if __has_feature(ptrauth_calls)
1731	/ use parent's shared_region_id /
1732	char *shared_region_id = task_get_vm_shared_region_id_and_jop_pid(parent_task, NULL);
1733	if (shared_region_id != NULL) {
1734	shared_region_key_alloc(shared_region_id, FALSE, `0`); / get a reference /
1735	}
1736	task_set_shared_region_id(new_task, shared_region_id);
1737	#endif /* __has_feature(ptrauth_calls) */
1738
1739	if (task_has_64Bit_addr(parent_task)) {
1740	task_set_64Bit_addr(new_task);
1741	}
1742
1743	if (task_has_64Bit_data(parent_task)) {
1744	task_set_64Bit_data(new_task);
1745	}
1746
1747	new_task->all_image_info_addr = parent_task->all_image_info_addr;
1748	new_task->all_image_info_size = parent_task->all_image_info_size;
1749	new_task->mach_header_vm_address = `0`;
1750
1751	if (inherit_memory && parent_task->affinity_space) {
1752	task_affinity_create(parent_task, new_task);
1753	}
1754
1755	new_task->pset_hint = parent_task->pset_hint = task_choose_pset(task: parent_task);
1756
1757	new_task->task_exc_guard = parent_task->task_exc_guard;
1758	if (parent_task->t_flags & TF_NO_SMT) {
1759	new_task->t_flags \|= TF_NO_SMT;
1760	}
1761
1762	if (parent_task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE) {
1763	new_task->t_flags \|= TF_USE_PSET_HINT_CLUSTER_TYPE;
1764	}
1765
1766	if (parent_task->t_flags & TF_TECS) {
1767	new_task->t_flags \|= TF_TECS;
1768	}
1769
1770	#if defined(__x86_64__)
1771	if (parent_task->t_flags & TF_INSN_COPY_OPTOUT) {
1772	new_task->t_flags \|= TF_INSN_COPY_OPTOUT;
1773	}
1774	#endif
1775
1776	new_task->priority = BASEPRI_DEFAULT;
1777	new_task->max_priority = MAXPRI_USER;
1778
1779	task_policy_create(task: new_task, parent_task);
1780	} else {
1781	#ifdef __LP64__
1782	if (is_64bit) {
1783	task_set_64Bit_addr(new_task);
1784	}
1785	#endif
1786
1787	if (is_64bit_data) {
1788	task_set_64Bit_data(new_task);
1789	}
1790
1791	new_task->all_image_info_addr = (mach_vm_address_t)`0`;
1792	new_task->all_image_info_size = (mach_vm_size_t)`0`;
1793
1794	new_task->pset_hint = PROCESSOR_SET_NULL;
1795
1796	new_task->task_exc_guard = TASK_EXC_GUARD_NONE;
1797
1798	if (new_task == kernel_task) {
1799	new_task->priority = BASEPRI_KERNEL;
1800	new_task->max_priority = MAXPRI_KERNEL;
1801	} else {
1802	new_task->priority = BASEPRI_DEFAULT;
1803	new_task->max_priority = MAXPRI_USER;
1804	}
1805	}
1806
1807	bzero(s: new_task->coalition, n: sizeof(new_task->coalition));
1808	for (int i = `0`; i < COALITION_NUM_TYPES; i++) {
1809	queue_chain_init(new_task->task_coalition[i]);
1810	}
1811
1812	/ Allocate I/O Statistics /
1813	new_task->task_io_stats = kalloc_data(sizeof(struct io_stat_info),
1814	Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
1815
1816	bzero(s: &(new_task->cpu_time_eqos_stats), n: sizeof(new_task->cpu_time_eqos_stats));
1817	bzero(s: &(new_task->cpu_time_rqos_stats), n: sizeof(new_task->cpu_time_rqos_stats));
1818
1819	bzero(s: &new_task->extmod_statistics, n: sizeof(new_task->extmod_statistics));
1820
1821	counter_alloc(&(new_task->pageins));
1822	counter_alloc(&(new_task->cow_faults));
1823	counter_alloc(&(new_task->messages_sent));
1824	counter_alloc(&(new_task->messages_received));
1825
1826	/ Copy resource acc. info from Parent for Corpe Forked task. /
1827	if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1828	task_rollup_accounting_info(new_task, parent_task);
1829	task_store_owned_vmobject_info(to_task: new_task, from_task: parent_task);
1830	} else {
1831	/ Initialize to zero for standard fork/spawn case /
1832	new_task->total_runnable_time = `0`;
1833	new_task->syscalls_mach = `0`;
1834	new_task->syscalls_unix = `0`;
1835	new_task->c_switch = `0`;
1836	new_task->p_switch = `0`;
1837	new_task->ps_switch = `0`;
1838	new_task->decompressions = `0`;
1839	new_task->low_mem_notified_warn = `0`;
1840	new_task->low_mem_notified_critical = `0`;
1841	new_task->purged_memory_warn = `0`;
1842	new_task->purged_memory_critical = `0`;
1843	new_task->low_mem_privileged_listener = `0`;
1844	new_task->memlimit_is_active = `0`;
1845	new_task->memlimit_is_fatal = `0`;
1846	new_task->memlimit_active_exc_resource = `0`;
1847	new_task->memlimit_inactive_exc_resource = `0`;
1848	new_task->task_timer_wakeups_bin_1 = `0`;
1849	new_task->task_timer_wakeups_bin_2 = `0`;
1850	new_task->task_gpu_ns = `0`;
1851	new_task->task_writes_counters_internal.task_immediate_writes = `0`;
1852	new_task->task_writes_counters_internal.task_deferred_writes = `0`;
1853	new_task->task_writes_counters_internal.task_invalidated_writes = `0`;
1854	new_task->task_writes_counters_internal.task_metadata_writes = `0`;
1855	new_task->task_writes_counters_external.task_immediate_writes = `0`;
1856	new_task->task_writes_counters_external.task_deferred_writes = `0`;
1857	new_task->task_writes_counters_external.task_invalidated_writes = `0`;
1858	new_task->task_writes_counters_external.task_metadata_writes = `0`;
1859	#if CONFIG_PHYS_WRITE_ACCT
1860	new_task->task_fs_metadata_writes = `0`;
1861	#endif /* CONFIG_PHYS_WRITE_ACCT */
1862	}
1863
1864
1865	new_task->donates_own_pages = FALSE;
1866	#if CONFIG_COALITIONS
1867	if (!(t_flags & TF_CORPSE_FORK)) {
1868	/ TODO: there is no graceful failure path here... /
1869	if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1870	coalitions_adopt_task(coaltions: parent_coalitions, task: new_task);
1871	if (parent_coalitions[COALITION_TYPE_JETSAM]) {
1872	new_task->donates_own_pages = coalition_is_swappable(coal: parent_coalitions[COALITION_TYPE_JETSAM]);
1873	}
1874	} else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1875	/*
1876	* all tasks at least have a resource coalition, so
1877	* if the parent has one then inherit all coalitions
1878	* the parent is a part of
1879	*/
1880	coalitions_adopt_task(coaltions: parent_task->coalition, task: new_task);
1881	if (parent_task->coalition[COALITION_TYPE_JETSAM]) {
1882	new_task->donates_own_pages = coalition_is_swappable(coal: parent_task->coalition[COALITION_TYPE_JETSAM]);
1883	}
1884	} else {
1885	/ TODO: assert that new_task will be PID 1 (launchd) /
1886	coalitions_adopt_init_task(task: new_task);
1887	}
1888	/*
1889	* on exec, we need to transfer the coalition roles from the
1890	* parent task to the exec copy task.
1891	*/
1892	if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1893	int coal_roles[COALITION_NUM_TYPES];
1894	task_coalition_roles(task: parent_task, roles: coal_roles);
1895	(void)coalitions_set_roles(coalitions: new_task->coalition, task: new_task, roles: coal_roles);
1896	}
1897	} else {
1898	coalitions_adopt_corpse_task(task: new_task);
1899	}
1900
1901	if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1902	panic("created task is not a member of a resource coalition");
1903	}
1904	task_set_coalition_member(new_task);
1905	#endif /* CONFIG_COALITIONS */
1906
1907	new_task->dispatchqueue_offset = `0`;
1908	if (parent_task != NULL) {
1909	new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1910	}
1911
1912	new_task->task_can_transfer_memory_ownership = FALSE;
1913	new_task->task_volatile_objects = `0`;
1914	new_task->task_nonvolatile_objects = `0`;
1915	new_task->task_objects_disowning = FALSE;
1916	new_task->task_objects_disowned = FALSE;
1917	new_task->task_owned_objects = `0`;
1918	queue_init(&new_task->task_objq);
1919
1920	#if CONFIG_FREEZE
1921	queue_init(&new_task->task_frozen_cseg_q);
1922	#endif /* CONFIG_FREEZE */
1923
1924	task_objq_lock_init(new_task);
1925
1926	#if __arm64__
1927	new_task->task_legacy_footprint = FALSE;
1928	new_task->task_extra_footprint_limit = FALSE;
1929	new_task->task_ios13extended_footprint_limit = FALSE;
1930	#endif /* __arm64__ */
1931	new_task->task_region_footprint = FALSE;
1932	new_task->task_has_crossed_thread_limit = FALSE;
1933	new_task->task_thread_limit = `0`;
1934	#if CONFIG_SECLUDED_MEMORY
1935	new_task->task_can_use_secluded_mem = FALSE;
1936	new_task->task_could_use_secluded_mem = FALSE;
1937	new_task->task_could_also_use_secluded_mem = FALSE;
1938	new_task->task_suppressed_secluded = FALSE;
1939	#endif /* CONFIG_SECLUDED_MEMORY */
1940
1941	/*
1942	* t_flags is set up above. But since we don't
1943	* support darkwake mode being set that way
1944	* currently, we clear it out here explicitly.
1945	*/
1946	new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1947
1948	queue_init(&new_task->io_user_clients);
1949	new_task->loadTag = `0`;
1950
1951	lck_mtx_lock(lck: &tasks_threads_lock);
1952	queue_enter(&tasks, new_task, task_t, tasks);
1953	tasks_count++;
1954	if (tasks_suspend_state) {
1955	task_suspend_internal(new_task);
1956	}
1957	lck_mtx_unlock(lck: &tasks_threads_lock);
1958	task_ref_hold_proc_task_struct(task: new_task);
1959
1960	return KERN_SUCCESS;
1961	}
1962
1963	/*
1964	* task_rollup_accounting_info
1965	*
1966	* Roll up accounting stats. Used to rollup stats
1967	* for exec copy task and corpse fork.
1968	*/
1969	void
1970	task_rollup_accounting_info(task_t to_task, task_t from_task)
1971	{
1972	assert(from_task != to_task);
1973
1974	recount_task_copy(dst: &to_task->tk_recount, src: &from_task->tk_recount);
1975	to_task->total_runnable_time = from_task->total_runnable_time;
1976	counter_add(&to_task->faults, amount: counter_load(&from_task->faults));
1977	counter_add(&to_task->pageins, amount: counter_load(&from_task->pageins));
1978	counter_add(&to_task->cow_faults, amount: counter_load(&from_task->cow_faults));
1979	counter_add(&to_task->messages_sent, amount: counter_load(&from_task->messages_sent));
1980	counter_add(&to_task->messages_received, amount: counter_load(&from_task->messages_received));
1981	to_task->decompressions = from_task->decompressions;
1982	to_task->syscalls_mach = from_task->syscalls_mach;
1983	to_task->syscalls_unix = from_task->syscalls_unix;
1984	to_task->c_switch = from_task->c_switch;
1985	to_task->p_switch = from_task->p_switch;
1986	to_task->ps_switch = from_task->ps_switch;
1987	to_task->extmod_statistics = from_task->extmod_statistics;
1988	to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1989	to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1990	to_task->purged_memory_warn = from_task->purged_memory_warn;
1991	to_task->purged_memory_critical = from_task->purged_memory_critical;
1992	to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1993	to_task->task_io_stats = from_task->task_io_stats;
1994	to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1995	to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1996	to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1997	to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1998	to_task->task_gpu_ns = from_task->task_gpu_ns;
1999	to_task->task_writes_counters_internal.task_immediate_writes = from_task->task_writes_counters_internal.task_immediate_writes;
2000	to_task->task_writes_counters_internal.task_deferred_writes = from_task->task_writes_counters_internal.task_deferred_writes;
2001	to_task->task_writes_counters_internal.task_invalidated_writes = from_task->task_writes_counters_internal.task_invalidated_writes;
2002	to_task->task_writes_counters_internal.task_metadata_writes = from_task->task_writes_counters_internal.task_metadata_writes;
2003	to_task->task_writes_counters_external.task_immediate_writes = from_task->task_writes_counters_external.task_immediate_writes;
2004	to_task->task_writes_counters_external.task_deferred_writes = from_task->task_writes_counters_external.task_deferred_writes;
2005	to_task->task_writes_counters_external.task_invalidated_writes = from_task->task_writes_counters_external.task_invalidated_writes;
2006	to_task->task_writes_counters_external.task_metadata_writes = from_task->task_writes_counters_external.task_metadata_writes;
2007	#if CONFIG_PHYS_WRITE_ACCT
2008	to_task->task_fs_metadata_writes = from_task->task_fs_metadata_writes;
2009	#endif /* CONFIG_PHYS_WRITE_ACCT */
2010
2011	#if CONFIG_MEMORYSTATUS
2012	ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.memorystatus_dirty_time);
2013	#endif /* CONFIG_MEMORYSTATUS */
2014
2015	/ Skip ledger roll up for memory accounting entries /
2016	ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.cpu_time);
2017	ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.platform_idle_wakeups);
2018	ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.interrupt_wakeups);
2019	#if CONFIG_SCHED_SFI
2020	for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
2021	ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.sfi_wait_times[class_id]);
2022	}
2023	#endif
2024	ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.cpu_time_billed_to_me);
2025	ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.cpu_time_billed_to_others);
2026	ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.physical_writes);
2027	ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.logical_writes);
2028	ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.energy_billed_to_me);
2029	ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.energy_billed_to_others);
2030	}
2031
2032	/*
2033	* task_deallocate_internal:
2034	*
2035	* Drop a reference on a task.
2036	* Don't call this directly.
2037	*/
2038	extern void task_deallocate_internal(task_t task, os_ref_count_t refs);
2039	void
2040	task_deallocate_internal(
2041	task_t task,
2042	os_ref_count_t refs)
2043	{
2044	ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
2045
2046	if (task == TASK_NULL) {
2047	return;
2048	}
2049
2050	#if IMPORTANCE_INHERITANCE
2051	if (refs == `1`) {
2052	/*
2053	* If last ref potentially comes from the task's importance,
2054	* disconnect it. But more task refs may be added before
2055	* that completes, so wait for the reference to go to zero
2056	* naturally (it may happen on a recursive task_deallocate()
2057	* from the ipc_importance_disconnect_task() call).
2058	*/
2059	if (IIT_NULL != task->task_imp_base) {
2060	ipc_importance_disconnect_task(task);
2061	}
2062	return;
2063	}
2064	#endif /* IMPORTANCE_INHERITANCE */
2065
2066	if (refs > `0`) {
2067	return;
2068	}
2069
2070	/*
2071	* The task should be dead at this point. Ensure other resources
2072	* like threads, are gone before we trash the world.
2073	*/
2074	assert(queue_empty(&task->threads));
2075	assert(get_bsdtask_info(task) == NULL);
2076	assert(!is_active(task->itk_space));
2077	assert(!task->active);
2078	assert(task->active_thread_count == `0`);
2079	assert(!task_get_game_mode(task));
2080
2081	lck_mtx_lock(lck: &tasks_threads_lock);
2082	assert(terminated_tasks_count > `0`);
2083	queue_remove(&terminated_tasks, task, task_t, tasks);
2084	terminated_tasks_count--;
2085	lck_mtx_unlock(lck: &tasks_threads_lock);
2086
2087	/*
2088	* remove the reference on bank context
2089	*/
2090	task_bank_reset(task);
2091
2092	kfree_data(task->task_io_stats, sizeof(struct io_stat_info));
2093
2094	/*
2095	* Give the machine dependent code a chance
2096	* to perform cleanup before ripping apart
2097	* the task.
2098	*/
2099	machine_task_terminate(task);
2100
2101	ipc_task_terminate(task);
2102
2103	/ let iokit know 2 /
2104	iokit_task_terminate(task, phase: `2`);
2105
2106	/ Unregister task from userspace coredumps on panic /
2107	kern_unregister_userspace_coredump(task);
2108
2109	if (task->affinity_space) {
2110	task_affinity_deallocate(task);
2111	}
2112
2113	#if MACH_ASSERT
2114	if (task->ledger != NULL &&
2115	task->map != NULL &&
2116	task->map->pmap != NULL &&
2117	task->map->pmap->ledger != NULL) {
2118	assert(task->ledger == task->map->pmap->ledger);
2119	}
2120	#endif /* MACH_ASSERT */
2121
2122	vm_owned_objects_disown(task);
2123	assert(task->task_objects_disowned);
2124	if (task->task_owned_objects != `0`) {
2125	panic("task_deallocate(%p): "
2126	"volatile_objects=%d nonvolatile_objects=%d owned=%d\n",
2127	task,
2128	task->task_volatile_objects,
2129	task->task_nonvolatile_objects,
2130	task->task_owned_objects);
2131	}
2132
2133	#if CONFIG_DEFERRED_RECLAIM
2134	if (task->deferred_reclamation_metadata != NULL) {
2135	vm_deferred_reclamation_buffer_deallocate(metadata: task->deferred_reclamation_metadata);
2136	task->deferred_reclamation_metadata = NULL;
2137	}
2138	#endif /* CONFIG_DEFERRED_RECLAIM */
2139
2140	vm_map_deallocate(map: task->map);
2141	if (task->is_large_corpse) {
2142	assert(large_corpse_count > `0`);
2143	OSDecrementAtomic(&large_corpse_count);
2144	task->is_large_corpse = false;
2145	}
2146	is_release(task->itk_space);
2147
2148	if (task->t_rr_ranges) {
2149	restartable_ranges_release(ranges: task->t_rr_ranges);
2150	}
2151
2152	ledger_get_entries(ledger: task->ledger, entry: task_ledgers.interrupt_wakeups,
2153	credit: &interrupt_wakeups, debit: &debit);
2154	ledger_get_entries(ledger: task->ledger, entry: task_ledgers.platform_idle_wakeups,
2155	credit: &platform_idle_wakeups, debit: &debit);
2156
2157	#if defined(CONFIG_SCHED_MULTIQ)
2158	sched_group_destroy(sched_group: task->sched_group);
2159	#endif
2160
2161	struct recount_times_mach sum = { `0` };
2162	struct recount_times_mach p_only = { `0` };
2163	recount_task_times_perf_only(task, sum: &sum, sum_perf_only: &p_only);
2164	#if CONFIG_PERVASIVE_ENERGY
2165	uint64_t energy = recount_task_energy_nj(task);
2166	#endif /* CONFIG_PERVASIVE_ENERGY */
2167	recount_task_deinit(tk: &task->tk_recount);
2168
2169	/ Accumulate statistics for dead tasks /
2170	lck_spin_lock(lck: &dead_task_statistics_lock);
2171	dead_task_statistics.total_user_time += sum.rtm_user;
2172	dead_task_statistics.total_system_time += sum.rtm_system;
2173
2174	dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
2175	dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
2176
2177	dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
2178	dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
2179	dead_task_statistics.total_ptime += p_only.rtm_user + p_only.rtm_system;
2180	dead_task_statistics.total_pset_switches += task->ps_switch;
2181	dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
2182	#if CONFIG_PERVASIVE_ENERGY
2183	dead_task_statistics.task_energy += energy;
2184	#endif /* CONFIG_PERVASIVE_ENERGY */
2185
2186	lck_spin_unlock(lck: &dead_task_statistics_lock);
2187	lck_mtx_destroy(lck: &task->lock, grp: &task_lck_grp);
2188
2189	if (!ledger_get_entries(ledger: task->ledger, entry: task_ledgers.tkm_private, credit: &credit,
2190	debit: &debit)) {
2191	OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
2192	OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
2193	}
2194	if (!ledger_get_entries(ledger: task->ledger, entry: task_ledgers.tkm_shared, credit: &credit,
2195	debit: &debit)) {
2196	OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
2197	OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
2198	}
2199	ledger_dereference(ledger: task->ledger);
2200
2201	counter_free(&task->faults);
2202	counter_free(&task->pageins);
2203	counter_free(&task->cow_faults);
2204	counter_free(&task->messages_sent);
2205	counter_free(&task->messages_received);
2206
2207	#if CONFIG_COALITIONS
2208	task_release_coalitions(task);
2209	#endif /* CONFIG_COALITIONS */
2210
2211	bzero(s: task->coalition, n: sizeof(task->coalition));
2212
2213	#if MACH_BSD
2214	/ clean up collected information since last reference to task is gone /
2215	if (task->corpse_info) {
2216	void *corpse_info_kernel = kcdata_memory_get_begin_addr(data: task->corpse_info);
2217	task_crashinfo_destroy(data: task->corpse_info);
2218	task->corpse_info = NULL;
2219	kfree_data(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
2220	}
2221	#endif
2222
2223	#if CONFIG_MACF
2224	if (get_task_crash_label(task)) {
2225	mac_exc_free_label(label: get_task_crash_label(task));
2226	set_task_crash_label(task, NULL);
2227	}
2228	#endif
2229
2230	assert(queue_empty(&task->task_objq));
2231	task_objq_lock_destroy(task);
2232
2233	if (task->corpse_vmobject_list) {
2234	kfree_data(task->corpse_vmobject_list,
2235	(vm_size_t)task->corpse_vmobject_list_size);
2236	}
2237
2238	task_ref_count_fini(task);
2239	proc_ro_erase_task(pr: task->bsd_info_ro);
2240	task_release_proc_task_struct(task);
2241	}
2242
2243	/*
2244	* task_name_deallocate_mig:
2245	*
2246	* Drop a reference on a task name.
2247	*/
2248	void
2249	task_name_deallocate_mig(
2250	task_name_t task_name)
2251	{
2252	return task_deallocate_grp((task_t)task_name, TASK_GRP_MIG);
2253	}
2254
2255	/*
2256	* task_policy_set_deallocate_mig:
2257	*
2258	* Drop a reference on a task type.
2259	*/
2260	void
2261	task_policy_set_deallocate_mig(task_policy_set_t task_policy_set)
2262	{
2263	return task_deallocate_grp((task_t)task_policy_set, TASK_GRP_MIG);
2264	}
2265
2266	/*
2267	* task_policy_get_deallocate_mig:
2268	*
2269	* Drop a reference on a task type.
2270	*/
2271	void
2272	task_policy_get_deallocate_mig(task_policy_get_t task_policy_get)
2273	{
2274	return task_deallocate_grp((task_t)task_policy_get, TASK_GRP_MIG);
2275	}
2276
2277	/*
2278	* task_inspect_deallocate_mig:
2279	*
2280	* Drop a task inspection reference.
2281	*/
2282	void
2283	task_inspect_deallocate_mig(
2284	task_inspect_t task_inspect)
2285	{
2286	return task_deallocate_grp((task_t)task_inspect, TASK_GRP_MIG);
2287	}
2288
2289	/*
2290	* task_read_deallocate_mig:
2291	*
2292	* Drop a reference on task read port.
2293	*/
2294	void
2295	task_read_deallocate_mig(
2296	task_read_t task_read)
2297	{
2298	return task_deallocate_grp((task_t)task_read, TASK_GRP_MIG);
2299	}
2300
2301	/*
2302	* task_suspension_token_deallocate:
2303	*
2304	* Drop a reference on a task suspension token.
2305	*/
2306	void
2307	task_suspension_token_deallocate(
2308	task_suspension_token_t token)
2309	{
2310	return task_deallocate((task_t)token);
2311	}
2312
2313	void
2314	task_suspension_token_deallocate_grp(
2315	task_suspension_token_t token,
2316	task_grp_t grp)
2317	{
2318	return task_deallocate_grp((task_t)token, grp);
2319	}
2320
2321	/*
2322	* task_collect_crash_info:
2323	*
2324	* collect crash info from bsd and mach based data
2325	*/
2326	kern_return_t
2327	task_collect_crash_info(
2328	task_t task,
2329	#ifdef CONFIG_MACF
2330	struct label *crash_label,
2331	#endif
2332	int is_corpse_fork)
2333	{
2334	kern_return_t kr = KERN_SUCCESS;
2335
2336	kcdata_descriptor_t crash_data = NULL;
2337	kcdata_descriptor_t crash_data_release = NULL;
2338	mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
2339	mach_vm_offset_t crash_data_ptr = `0`;
2340	void *crash_data_kernel = NULL;
2341	void *crash_data_kernel_release = NULL;
2342	#if CONFIG_MACF
2343	struct label label, free_label;
2344	#endif
2345
2346	if (!corpses_enabled()) {
2347	return KERN_NOT_SUPPORTED;
2348	}
2349
2350	#if CONFIG_MACF
2351	free_label = label = mac_exc_create_label(NULL);
2352	#endif
2353
2354	task_lock(task);
2355
2356	assert(is_corpse_fork \|\| get_bsdtask_info(task) != NULL);
2357	if (task->corpse_info == NULL && (is_corpse_fork \|\| get_bsdtask_info(task) != NULL)) {
2358	#if CONFIG_MACF
2359	/ Set the crash label, used by the exception delivery mac hook /
2360	free_label = get_task_crash_label(task); // Most likely NULL.
2361	set_task_crash_label(task, label);
2362	mac_exc_update_task_crash_label(task, newlabel: crash_label);
2363	#endif
2364	task_unlock(task);
2365
2366	crash_data_kernel = kalloc_data(CORPSEINFO_ALLOCATION_SIZE,
2367	Z_WAITOK \| Z_ZERO);
2368	if (crash_data_kernel == NULL) {
2369	kr = KERN_RESOURCE_SHORTAGE;
2370	goto out_no_lock;
2371	}
2372	crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
2373
2374	/ Do not get a corpse ref for corpse fork /
2375	crash_data = task_crashinfo_alloc_init(crash_data_p: (mach_vm_address_t)crash_data_ptr, size,
2376	kc_u_flags: is_corpse_fork ? `0` : CORPSE_CRASHINFO_HAS_REF,
2377	KCFLAG_USE_MEMCOPY);
2378	if (crash_data) {
2379	task_lock(task);
2380	crash_data_release = task->corpse_info;
2381	crash_data_kernel_release = kcdata_memory_get_begin_addr(data: crash_data_release);
2382	task->corpse_info = crash_data;
2383
2384	task_unlock(task);
2385	kr = KERN_SUCCESS;
2386	} else {
2387	kfree_data(crash_data_kernel,
2388	CORPSEINFO_ALLOCATION_SIZE);
2389	kr = KERN_FAILURE;
2390	}
2391
2392	if (crash_data_release != NULL) {
2393	task_crashinfo_destroy(data: crash_data_release);
2394	}
2395	kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2396	} else {
2397	task_unlock(task);
2398	}
2399
2400	out_no_lock:
2401	#if CONFIG_MACF
2402	if (free_label != NULL) {
2403	mac_exc_free_label(label: free_label);
2404	}
2405	#endif
2406	return kr;
2407	}
2408
2409	/*
2410	* task_deliver_crash_notification:
2411	*
2412	* Makes outcall to registered host port for a corpse.
2413	*/
2414	kern_return_t
2415	task_deliver_crash_notification(
2416	task_t corpse, / corpse or corpse fork /
2417	thread_t thread,
2418	exception_type_t etype,
2419	mach_exception_subcode_t subcode)
2420	{
2421	kcdata_descriptor_t crash_info = corpse->corpse_info;
2422	thread_t th_iter = NULL;
2423	kern_return_t kr = KERN_SUCCESS;
2424	wait_interrupt_t wsave;
2425	mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2426	ipc_port_t corpse_port;
2427
2428	if (crash_info == NULL) {
2429	return KERN_FAILURE;
2430	}
2431
2432	assert(task_is_a_corpse(corpse));
2433
2434	task_lock(task: corpse);
2435
2436	/*
2437	* Always populate code[0] as the effective exception type for EXC_CORPSE_NOTIFY.
2438	* Crash reporters should derive whether it's fatal from corpse blob.
2439	*/
2440	code[`0`] = etype;
2441	code[`1`] = subcode;
2442
2443	queue_iterate(&corpse->threads, th_iter, thread_t, task_threads)
2444	{
2445	if (th_iter->corpse_dup == FALSE) {
2446	ipc_thread_reset(thread: th_iter);
2447	}
2448	}
2449	task_unlock(task: corpse);
2450
2451	/ Arm the no-sender notification for taskport /
2452	task_reference(corpse);
2453	corpse_port = convert_corpse_to_port_and_nsrequest(task: corpse);
2454
2455	wsave = thread_interrupt_level(THREAD_UNINT);
2456	kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
2457	if (kr != KERN_SUCCESS) {
2458	printf(format: "Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task: corpse));
2459	}
2460
2461	(void)thread_interrupt_level(interruptible: wsave);
2462
2463	/*
2464	* Drop the send right on corpse port, will fire the
2465	* no-sender notification if exception deliver failed.
2466	*/
2467	ipc_port_release_send(port: corpse_port);
2468	return kr;
2469	}
2470
2471	/*
2472	* task_terminate:
2473	*
2474	* Terminate the specified task. See comments on thread_terminate
2475	* (kern/thread.c) about problems with terminating the "current task."
2476	*/
2477
2478	kern_return_t
2479	task_terminate(
2480	task_t task)
2481	{
2482	if (task == TASK_NULL) {
2483	return KERN_INVALID_ARGUMENT;
2484	}
2485
2486	if (get_bsdtask_info(task)) {
2487	return KERN_FAILURE;
2488	}
2489
2490	return task_terminate_internal(task);
2491	}
2492
2493	#if MACH_ASSERT
2494	extern int proc_pid(struct proc *);
2495	extern void proc_name_kdp(struct proc p, char* buf, int* size);
2496	#endif /* MACH_ASSERT */
2497
2498	#define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
2499	static void
2500	__unused task_partial_reap(task_t task, __unused int pid)
2501	{
2502	unsigned int reclaimed_resident = `0`;
2503	unsigned int reclaimed_compressed = `0`;
2504	uint64_t task_page_count;
2505
2506	task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
2507
2508	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) \| DBG_FUNC_START),
2509	pid, task_page_count, `0`, `0`, `0`);
2510
2511	vm_map_partial_reap(map: task->map, reclaimed_resident: &reclaimed_resident, reclaimed_compressed: &reclaimed_compressed);
2512
2513	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) \| DBG_FUNC_END),
2514	pid, reclaimed_resident, reclaimed_compressed, `0`, `0`);
2515	}
2516
2517	/*
2518	* task_mark_corpse:
2519	*
2520	* Mark the task as a corpse. Called by crashing thread.
2521	*/
2522	kern_return_t
2523	task_mark_corpse(task_t task)
2524	{
2525	kern_return_t kr = KERN_SUCCESS;
2526	thread_t self_thread;
2527	(void) self_thread;
2528	wait_interrupt_t wsave;
2529	#if CONFIG_MACF
2530	struct label *crash_label = NULL;
2531	#endif
2532
2533	assert(task != kernel_task);
2534	assert(task == current_task());
2535	assert(!task_is_a_corpse(task));
2536
2537	#if CONFIG_MACF
2538	crash_label = mac_exc_create_label_for_proc(proc: (struct proc*)get_bsdtask_info(task));
2539	#endif
2540
2541	kr = task_collect_crash_info(task,
2542	#if CONFIG_MACF
2543	crash_label,
2544	#endif
2545	FALSE);
2546	if (kr != KERN_SUCCESS) {
2547	goto out;
2548	}
2549
2550	self_thread = current_thread();
2551
2552	wsave = thread_interrupt_level(THREAD_UNINT);
2553	task_lock(task);
2554
2555	/*
2556	* Check if any other thread called task_terminate_internal
2557	* and made the task inactive before we could mark it for
2558	* corpse pending report. Bail out if the task is inactive.
2559	*/
2560	if (!task->active) {
2561	kcdata_descriptor_t crash_data_release = task->corpse_info;;
2562	void *crash_data_kernel_release = kcdata_memory_get_begin_addr(data: crash_data_release);;
2563
2564	task->corpse_info = NULL;
2565	task_unlock(task);
2566
2567	if (crash_data_release != NULL) {
2568	task_crashinfo_destroy(data: crash_data_release);
2569	}
2570	kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2571	return KERN_TERMINATED;
2572	}
2573
2574	task_set_corpse_pending_report(task);
2575	task_set_corpse(task);
2576	task->crashed_thread_id = thread_tid(thread: self_thread);
2577
2578	kr = task_start_halt_locked(task, TRUE);
2579	assert(kr == KERN_SUCCESS);
2580
2581	task_set_uniqueid(task);
2582
2583	task_unlock(task);
2584
2585	/*
2586	* ipc_task_reset() moved to last thread_terminate_self(): rdar://75737960.
2587	* disable old ports here instead.
2588	*
2589	* The vm_map and ipc_space must exist until this function returns,
2590	* convert_port_to_{map,space}_with_flavor relies on this behavior.
2591	*/
2592	ipc_task_disable(task);
2593
2594	/ let iokit know 1 /
2595	iokit_task_terminate(task, phase: `1`);
2596
2597	/ terminate the ipc space /
2598	ipc_space_terminate(space: task->itk_space);
2599
2600	/ Add it to global corpse task list /
2601	task_add_to_corpse_task_list(corpse_task: task);
2602
2603	thread_terminate_internal(thread: self_thread);
2604
2605	(void) thread_interrupt_level(interruptible: wsave);
2606	assert(task->halting == TRUE);
2607
2608	out:
2609	#if CONFIG_MACF
2610	mac_exc_free_label(label: crash_label);
2611	#endif
2612	return kr;
2613	}
2614
2615	/*
2616	* task_set_uniqueid
2617	*
2618	* Set task uniqueid to systemwide unique 64 bit value
2619	*/
2620	void
2621	task_set_uniqueid(task_t task)
2622	{
2623	task->task_uniqueid = OSIncrementAtomic64(address: &next_taskuniqueid);
2624	}
2625
2626	/*
2627	* task_clear_corpse
2628	*
2629	* Clears the corpse pending bit on task.
2630	* Removes inspection bit on the threads.
2631	*/
2632	void
2633	task_clear_corpse(task_t task)
2634	{
2635	thread_t th_iter = NULL;
2636
2637	task_lock(task);
2638	queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2639	{
2640	thread_mtx_lock(thread: th_iter);
2641	th_iter->inspection = FALSE;
2642	ipc_thread_disable(thread: th_iter);
2643	thread_mtx_unlock(thread: th_iter);
2644	}
2645
2646	thread_terminate_crashed_threads();
2647	/ remove the pending corpse report flag /
2648	task_clear_corpse_pending_report(task);
2649
2650	task_unlock(task);
2651	}
2652
2653	/*
2654	* task_port_no_senders
2655	*
2656	* Called whenever the Mach port system detects no-senders on
2657	* the task port of a corpse.
2658	* Each notification that comes in should terminate the task (corpse).
2659	*/
2660	static void
2661	task_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
2662	{
2663	task_t task = ipc_kobject_get_locked(port, type: IKOT_TASK_CONTROL);
2664
2665	assert(task != TASK_NULL);
2666	assert(task_is_a_corpse(task));
2667
2668	/ Remove the task from global corpse task list /
2669	task_remove_from_corpse_task_list(corpse_task: task);
2670
2671	task_clear_corpse(task);
2672	vm_map_unset_corpse_source(map: task->map);
2673	task_terminate_internal(task);
2674	}
2675
2676	/*
2677	* task_port_with_flavor_no_senders
2678	*
2679	* Called whenever the Mach port system detects no-senders on
2680	* the task inspect or read port. These ports are allocated lazily and
2681	* should be deallocated here when there are no senders remaining.
2682	*/
2683	static void
2684	task_port_with_flavor_no_senders(
2685	ipc_port_t port,
2686	mach_port_mscount_t mscount __unused)
2687	{
2688	task_t task;
2689	mach_task_flavor_t flavor;
2690	ipc_kobject_type_t kotype;
2691
2692	ip_mq_lock(port);
2693	if (port->ip_srights > `0`) {
2694	ip_mq_unlock(port);
2695	return;
2696	}
2697	kotype = ip_kotype(port);
2698	assert((IKOT_TASK_READ == kotype) \|\| (IKOT_TASK_INSPECT == kotype));
2699	task = ipc_kobject_get_locked(port, type: kotype);
2700	if (task != TASK_NULL) {
2701	task_reference(task);
2702	}
2703	ip_mq_unlock(port);
2704
2705	if (task == TASK_NULL) {
2706	/ The task is exiting or disabled; it will eventually deallocate the port /
2707	return;
2708	}
2709
2710	if (kotype == IKOT_TASK_READ) {
2711	flavor = TASK_FLAVOR_READ;
2712	} else {
2713	flavor = TASK_FLAVOR_INSPECT;
2714	}
2715
2716	itk_lock(task);
2717	ip_mq_lock(port);
2718
2719	/*
2720	* If the port is no longer active, then ipc_task_terminate() ran
2721	* and destroyed the kobject already. Just deallocate the task
2722	* ref we took and go away.
2723	*
2724	* It is also possible that several nsrequests are in flight,
2725	* only one shall NULL-out the port entry, and this is the one
2726	* that gets to dealloc the port.
2727	*
2728	* Check for a stale no-senders notification. A call to any function
2729	* that vends out send rights to this port could resurrect it between
2730	* this notification being generated and actually being handled here.
2731	*/
2732	if (!ip_active(port) \|\|
2733	task->itk_task_ports[flavor] != port \|\|
2734	port->ip_srights > `0`) {
2735	ip_mq_unlock(port);
2736	itk_unlock(task);
2737	task_deallocate(task);
2738	return;
2739	}
2740
2741	assert(task->itk_task_ports[flavor] == port);
2742	task->itk_task_ports[flavor] = IP_NULL;
2743	itk_unlock(task);
2744
2745	ipc_kobject_dealloc_port_and_unlock(port, mscount: `0`, type: kotype);
2746
2747	task_deallocate(task);
2748	}
2749
2750	/*
2751	* task_wait_till_threads_terminate_locked
2752	*
2753	* Wait till all the threads in the task are terminated.
2754	* Might release the task lock and re-acquire it.
2755	*/
2756	void
2757	task_wait_till_threads_terminate_locked(task_t task)
2758	{
2759	/ wait for all the threads in the task to terminate /
2760	while (task->active_thread_count != `0`) {
2761	assert_wait(event: (event_t)&task->active_thread_count, THREAD_UNINT);
2762	task_unlock(task);
2763	thread_block(THREAD_CONTINUE_NULL);
2764
2765	task_lock(task);
2766	}
2767	}
2768
2769	/*
2770	* task_duplicate_map_and_threads
2771	*
2772	* Copy vmmap of source task.
2773	* Copy active threads from source task to destination task.
2774	* Source task would be suspended during the copy.
2775	*/
2776	kern_return_t
2777	task_duplicate_map_and_threads(
2778	task_t task,
2779	void *p,
2780	task_t new_task,
2781	thread_t *thread_ret,
2782	uint64_t **udata_buffer,
2783	int *size,
2784	int *num_udata,
2785	bool for_exception)
2786	{
2787	kern_return_t kr = KERN_SUCCESS;
2788	int active;
2789	thread_t thread, self, thread_return = THREAD_NULL;
2790	thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2791	thread_t *thread_array;
2792	uint32_t active_thread_count = `0`, array_count = `0`, i;
2793	vm_map_t oldmap;
2794	uint64_t *buffer = NULL;
2795	int buf_size = `0`;
2796	int est_knotes = `0`, num_knotes = `0`;
2797
2798	self = current_thread();
2799
2800	/*
2801	* Suspend the task to copy thread state, use the internal
2802	* variant so that no user-space process can resume
2803	* the task from under us
2804	*/
2805	kr = task_suspend_internal(task);
2806	if (kr != KERN_SUCCESS) {
2807	return kr;
2808	}
2809
2810	if (task->map->disable_vmentry_reuse == TRUE) {
2811	/*
2812	* Quite likely GuardMalloc (or some debugging tool)
2813	* is being used on this task. And it has gone through
2814	* its limit. Making a corpse will likely encounter
2815	* a lot of VM entries that will need COW.
2816	*
2817	* Skip it.
2818	*/
2819	#if DEVELOPMENT \|\| DEBUG
2820	memorystatus_abort_vm_map_fork(task);
2821	#endif
2822	ktriage_record(thread_id: thread_tid(thread: self), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_CORPSE, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_CORPSE_FAIL_LIBGMALLOC), arg: `0` / arg /);
2823	task_resume_internal(task);
2824	return KERN_FAILURE;
2825	}
2826
2827	/ Check with VM if vm_map_fork is allowed for this task /
2828	bool is_large = false;
2829	if (memorystatus_allowed_vm_map_fork(task, is_large: &is_large)) {
2830	/ Setup new task's vmmap, switch from parent task's map to it COW map /
2831	oldmap = new_task->map;
2832	new_task->map = vm_map_fork(ledger: new_task->ledger,
2833	old_map: task->map,
2834	options: (VM_MAP_FORK_SHARE_IF_INHERIT_NONE \|
2835	VM_MAP_FORK_PRESERVE_PURGEABLE \|
2836	VM_MAP_FORK_CORPSE_FOOTPRINT));
2837	if (new_task->map) {
2838	new_task->is_large_corpse = is_large;
2839	vm_map_deallocate(map: oldmap);
2840
2841	/ copy ledgers that impact the memory footprint /
2842	vm_map_copy_footprint_ledgers(old_task: task, new_task);
2843
2844	/ Get all the udata pointers from kqueue /
2845	est_knotes = kevent_proc_copy_uptrs(proc: p, NULL, bufsize: `0`);
2846	if (est_knotes > `0`) {
2847	buf_size = (est_knotes + `32`) * sizeof(uint64_t);
2848	buffer = kalloc_data(buf_size, Z_WAITOK);
2849	num_knotes = kevent_proc_copy_uptrs(proc: p, buf: buffer, bufsize: buf_size);
2850	if (num_knotes > est_knotes + `32`) {
2851	num_knotes = est_knotes + `32`;
2852	}
2853	}
2854	} else {
2855	if (is_large) {
2856	assert(large_corpse_count > `0`);
2857	OSDecrementAtomic(&large_corpse_count);
2858	}
2859	new_task->map = oldmap;
2860	#if DEVELOPMENT \|\| DEBUG
2861	memorystatus_abort_vm_map_fork(task);
2862	#endif
2863	task_resume_internal(task);
2864	return KERN_NO_SPACE;
2865	}
2866	} else if (!for_exception) {
2867	#if DEVELOPMENT \|\| DEBUG
2868	memorystatus_abort_vm_map_fork(task);
2869	#endif
2870	task_resume_internal(task);
2871	return KERN_NO_SPACE;
2872	}
2873
2874	active_thread_count = task->active_thread_count;
2875	if (active_thread_count == `0`) {
2876	kfree_data(buffer, buf_size);
2877	task_resume_internal(task);
2878	return KERN_FAILURE;
2879	}
2880
2881	thread_array = kalloc_type(thread_t, active_thread_count, Z_WAITOK);
2882
2883	/ Iterate all the threads and drop the task lock before calling thread_create_with_continuation /
2884	task_lock(task);
2885	queue_iterate(&task->threads, thread, thread_t, task_threads) {
2886	/ Skip inactive threads /
2887	active = thread->active;
2888	if (!active) {
2889	continue;
2890	}
2891
2892	if (array_count >= active_thread_count) {
2893	break;
2894	}
2895
2896	thread_array[array_count++] = thread;
2897	thread_reference(thread);
2898	}
2899	task_unlock(task);
2900
2901	for (i = `0`; i < array_count; i++) {
2902	kr = thread_create_with_continuation(task: new_task, new_thread: &new_thread, continuation: (thread_continue_t)thread_corpse_continue);
2903	if (kr != KERN_SUCCESS) {
2904	break;
2905	}
2906
2907	/ Equivalent of current thread in corpse /
2908	if (thread_array[i] == self) {
2909	thread_return = new_thread;
2910	new_task->crashed_thread_id = thread_tid(thread: new_thread);
2911	} else if (first_thread == NULL) {
2912	first_thread = new_thread;
2913	} else {
2914	/ drop the extra ref returned by thread_create_with_continuation /
2915	thread_deallocate(thread: new_thread);
2916	}
2917
2918	kr = thread_dup2(thread_array[i], new_thread);
2919	if (kr != KERN_SUCCESS) {
2920	thread_mtx_lock(thread: new_thread);
2921	new_thread->corpse_dup = TRUE;
2922	thread_mtx_unlock(thread: new_thread);
2923	continue;
2924	}
2925
2926	/ Copy thread name /
2927	bsd_copythreadname(dst_uth: get_bsdthread_info(new_thread),
2928	src_uth: get_bsdthread_info(thread_array[i]));
2929	new_thread->thread_tag = thread_array[i]->thread_tag &
2930	~THREAD_TAG_USER_JOIN;
2931	thread_copy_resource_info(dst_thread: new_thread, src_thread: thread_array[i]);
2932	}
2933
2934	/ return the first thread if we couldn't find the equivalent of current /
2935	if (thread_return == THREAD_NULL) {
2936	thread_return = first_thread;
2937	} else if (first_thread != THREAD_NULL) {
2938	/ drop the extra ref returned by thread_create_with_continuation /
2939	thread_deallocate(thread: first_thread);
2940	}
2941
2942	task_resume_internal(task);
2943
2944	for (i = `0`; i < array_count; i++) {
2945	thread_deallocate(thread: thread_array[i]);
2946	}
2947	kfree_type(thread_t, active_thread_count, thread_array);
2948
2949	if (kr == KERN_SUCCESS) {
2950	*thread_ret = thread_return;
2951	*udata_buffer = buffer;
2952	*size = buf_size;
2953	*num_udata = num_knotes;
2954	} else {
2955	if (thread_return != THREAD_NULL) {
2956	thread_deallocate(thread: thread_return);
2957	}
2958	kfree_data(buffer, buf_size);
2959	}
2960
2961	return kr;
2962	}
2963
2964	#if CONFIG_SECLUDED_MEMORY
2965	extern void task_set_can_use_secluded_mem_locked(
2966	task_t task,
2967	boolean_t can_use_secluded_mem);
2968	#endif /* CONFIG_SECLUDED_MEMORY */
2969
2970	#if MACH_ASSERT
2971	int debug4k_panic_on_terminate = `0`;
2972	#endif /* MACH_ASSERT */
2973	kern_return_t
2974	task_terminate_internal(
2975	task_t task)
2976	{
2977	thread_t thread, self;
2978	task_t self_task;
2979	boolean_t interrupt_save;
2980	int pid = `0`;
2981
2982	assert(task != kernel_task);
2983
2984	self = current_thread();
2985	self_task = current_task();
2986
2987	/*
2988	* Get the task locked and make sure that we are not racing
2989	* with someone else trying to terminate us.
2990	*/
2991	if (task == self_task) {
2992	task_lock(task);
2993	} else if (task < self_task) {
2994	task_lock(task);
2995	task_lock(task: self_task);
2996	} else {
2997	task_lock(task: self_task);
2998	task_lock(task);
2999	}
3000
3001	#if CONFIG_SECLUDED_MEMORY
3002	if (task->task_can_use_secluded_mem) {
3003	task_set_can_use_secluded_mem_locked(task, FALSE);
3004	}
3005	task->task_could_use_secluded_mem = FALSE;
3006	task->task_could_also_use_secluded_mem = FALSE;
3007
3008	if (task->task_suppressed_secluded) {
3009	stop_secluded_suppression(task);
3010	}
3011	#endif /* CONFIG_SECLUDED_MEMORY */
3012
3013	if (!task->active) {
3014	/*
3015	* Task is already being terminated.
3016	* Just return an error. If we are dying, this will
3017	* just get us to our AST special handler and that
3018	* will get us to finalize the termination of ourselves.
3019	*/
3020	task_unlock(task);
3021	if (self_task != task) {
3022	task_unlock(task: self_task);
3023	}
3024
3025	return KERN_FAILURE;
3026	}
3027
3028	if (task_corpse_pending_report(task)) {
3029	/*
3030	* Task is marked for reporting as corpse.
3031	* Just return an error. This will
3032	* just get us to our AST special handler and that
3033	* will get us to finish the path to death
3034	*/
3035	task_unlock(task);
3036	if (self_task != task) {
3037	task_unlock(task: self_task);
3038	}
3039
3040	return KERN_FAILURE;
3041	}
3042
3043	if (self_task != task) {
3044	task_unlock(task: self_task);
3045	}
3046
3047	/*
3048	* Make sure the current thread does not get aborted out of
3049	* the waits inside these operations.
3050	*/
3051	interrupt_save = thread_interrupt_level(THREAD_UNINT);
3052
3053	/*
3054	* Indicate that we want all the threads to stop executing
3055	* at user space by holding the task (we would have held
3056	* each thread independently in thread_terminate_internal -
3057	* but this way we may be more likely to already find it
3058	* held there). Mark the task inactive, and prevent
3059	* further task operations via the task port.
3060	*
3061	* The vm_map and ipc_space must exist until this function returns,
3062	* convert_port_to_{map,space}_with_flavor relies on this behavior.
3063	*/
3064	task_hold_locked(task);
3065	task->active = FALSE;
3066	ipc_task_disable(task);
3067
3068	#if CONFIG_EXCLAVES
3069	task_stop_conclave(task, false);
3070	#endif /* CONFIG_EXCLAVES */
3071
3072	#if CONFIG_TELEMETRY
3073	/*
3074	* Notify telemetry that this task is going away.
3075	*/
3076	telemetry_task_ctl_locked(task, TF_TELEMETRY, enable_disable: `0`);
3077	#endif
3078
3079	/*
3080	* Terminate each thread in the task.
3081	*/
3082	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3083	thread_terminate_internal(thread);
3084	}
3085
3086	#ifdef MACH_BSD
3087	void *bsd_info = get_bsdtask_info(task);
3088	if (bsd_info != NULL) {
3089	pid = proc_pid(p: bsd_info);
3090	}
3091	#endif /* MACH_BSD */
3092
3093	task_unlock(task);
3094
3095	proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
3096	TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
3097
3098	/ Early object reap phase /
3099
3100	// PR-17045188: Revisit implementation
3101	// task_partial_reap(task, pid);
3102
3103	#if CONFIG_TASKWATCH
3104	/*
3105	* remove all task watchers
3106	*/
3107	task_removewatchers(task);
3108
3109	#endif /* CONFIG_TASKWATCH */
3110
3111	/*
3112	* Destroy all synchronizers owned by the task.
3113	*/
3114	task_synchronizer_destroy_all(task);
3115
3116	/*
3117	* Clear the watchport boost on the task.
3118	*/
3119	task_remove_turnstile_watchports(task);
3120
3121	/ let iokit know 1 /
3122	iokit_task_terminate(task, phase: `1`);
3123
3124	/*
3125	* Destroy the IPC space, leaving just a reference for it.
3126	*/
3127	ipc_space_terminate(space: task->itk_space);
3128
3129	#if 00
3130	/ if some ledgers go negative on tear-down again... /
3131	ledger_disable_panic_on_negative(task->map->pmap->ledger,
3132	task_ledgers.phys_footprint);
3133	ledger_disable_panic_on_negative(task->map->pmap->ledger,
3134	task_ledgers.internal);
3135	ledger_disable_panic_on_negative(task->map->pmap->ledger,
3136	task_ledgers.iokit_mapped);
3137	ledger_disable_panic_on_negative(task->map->pmap->ledger,
3138	task_ledgers.alternate_accounting);
3139	ledger_disable_panic_on_negative(task->map->pmap->ledger,
3140	task_ledgers.alternate_accounting_compressed);
3141	#endif
3142
3143	#if CONFIG_DEFERRED_RECLAIM
3144	/*
3145	* Remove this tasks reclaim buffer from global queues.
3146	*/
3147	if (task->deferred_reclamation_metadata != NULL) {
3148	vm_deferred_reclamation_buffer_uninstall(metadata: task->deferred_reclamation_metadata);
3149	}
3150	#endif /* CONFIG_DEFERRED_RECLAIM */
3151
3152	/*
3153	* If the current thread is a member of the task
3154	* being terminated, then the last reference to
3155	* the task will not be dropped until the thread
3156	* is finally reaped. To avoid incurring the
3157	* expense of removing the address space regions
3158	* at reap time, we do it explictly here.
3159	*/
3160
3161	#if MACH_ASSERT
3162	/*
3163	* Identify the pmap's process, in case the pmap ledgers drift
3164	* and we have to report it.
3165	*/
3166	char procname[`17`];
3167	void *proc = get_bsdtask_info(task);
3168	if (proc) {
3169	pid = proc_pid(proc);
3170	proc_name_kdp(proc, procname, sizeof(procname));
3171	} else {
3172	pid = `0`;
3173	strlcpy(procname, "<unknown>", sizeof(procname));
3174	}
3175	pmap_set_process(task->map->pmap, pid, procname);
3176	if (vm_map_page_shift(task->map) < (int)PAGE_SHIFT) {
3177	DEBUG4K_LIFE("map %p procname: %s\n", task->map, procname);
3178	if (debug4k_panic_on_terminate) {
3179	panic("DEBUG4K: %s:%d %d[%s] map %p", __FUNCTION__, __LINE__, pid, procname, task->map);
3180	}
3181	}
3182	#endif /* MACH_ASSERT */
3183
3184	vm_map_terminate(map: task->map);
3185
3186	/ release our shared region /
3187	vm_shared_region_set(task, NULL);
3188
3189	#if __has_feature(ptrauth_calls)
3190	task_set_shared_region_id(task, NULL);
3191	#endif /* __has_feature(ptrauth_calls) */
3192
3193	lck_mtx_lock(lck: &tasks_threads_lock);
3194	queue_remove(&tasks, task, task_t, tasks);
3195	queue_enter(&terminated_tasks, task, task_t, tasks);
3196	tasks_count--;
3197	terminated_tasks_count++;
3198	lck_mtx_unlock(lck: &tasks_threads_lock);
3199
3200	/*
3201	* We no longer need to guard against being aborted, so restore
3202	* the previous interruptible state.
3203	*/
3204	thread_interrupt_level(interruptible: interrupt_save);
3205
3206	#if CONFIG_CPU_COUNTERS
3207	/ force the task to release all ctrs /
3208	if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
3209	kpc_force_all_ctrs(task, `0`);
3210	}
3211	#endif /* CONFIG_CPU_COUNTERS */
3212
3213	#if CONFIG_COALITIONS
3214	/*
3215	* Leave the coalition for corpse task or task that
3216	* never had any active threads (e.g. fork, exec failure).
3217	* For task with active threads, the task will be removed
3218	* from coalition by last terminating thread.
3219	*/
3220	if (task->active_thread_count == `0`) {
3221	coalitions_remove_task(task);
3222	}
3223	#endif
3224
3225	#if CONFIG_FREEZE
3226	extern int vm_compressor_available;
3227	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE && vm_compressor_available) {
3228	task_disown_frozen_csegs(task);
3229	assert(queue_empty(&task->task_frozen_cseg_q));
3230	}
3231	#endif /* CONFIG_FREEZE */
3232
3233
3234	/*
3235	* Get rid of the task active reference on itself.
3236	*/
3237	task_deallocate_grp(task, TASK_GRP_INTERNAL);
3238
3239	return KERN_SUCCESS;
3240	}
3241
3242	void
3243	tasks_system_suspend(boolean_t suspend)
3244	{
3245	task_t task;
3246
3247	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SUSPEND_USERSPACE) \|
3248	(suspend ? DBG_FUNC_START : DBG_FUNC_END));
3249
3250	lck_mtx_lock(lck: &tasks_threads_lock);
3251	assert(tasks_suspend_state != suspend);
3252	tasks_suspend_state = suspend;
3253	queue_iterate(&tasks, task, task_t, tasks) {
3254	if (task == kernel_task) {
3255	continue;
3256	}
3257	suspend ? task_suspend_internal(task) : task_resume_internal(task);
3258	}
3259	lck_mtx_unlock(lck: &tasks_threads_lock);
3260	}
3261
3262	/*
3263	* task_start_halt:
3264	*
3265	* Shut the current task down (except for the current thread) in
3266	* preparation for dramatic changes to the task (probably exec).
3267	* We hold the task and mark all other threads in the task for
3268	* termination.
3269	*/
3270	kern_return_t
3271	task_start_halt(task_t task)
3272	{
3273	kern_return_t kr = KERN_SUCCESS;
3274	task_lock(task);
3275	kr = task_start_halt_locked(task, FALSE);
3276	task_unlock(task);
3277	return kr;
3278	}
3279
3280	static kern_return_t
3281	task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
3282	{
3283	thread_t thread, self;
3284	uint64_t dispatchqueue_offset;
3285
3286	assert(task != kernel_task);
3287
3288	self = current_thread();
3289
3290	if (task != get_threadtask(self) && !task_is_a_corpse_fork(task)) {
3291	return KERN_INVALID_ARGUMENT;
3292	}
3293
3294	if (!should_mark_corpse &&
3295	(task->halting \|\| !task->active \|\| !self->active)) {
3296	/*
3297	* Task or current thread is already being terminated.
3298	* Hurry up and return out of the current kernel context
3299	* so that we run our AST special handler to terminate
3300	* ourselves. If should_mark_corpse is set, the corpse
3301	* creation might have raced with exec, let the corpse
3302	* creation continue, once the current thread reaches AST
3303	* thread in exec will be woken up from task_complete_halt.
3304	* Exec will fail cause the proc was marked for exit.
3305	* Once the thread in exec reaches AST, it will call proc_exit
3306	* and deliver the EXC_CORPSE_NOTIFY.
3307	*/
3308	return KERN_FAILURE;
3309	}
3310
3311	/ Thread creation will fail after this point of no return. /
3312	task->halting = TRUE;
3313
3314	/*
3315	* Mark all the threads to keep them from starting any more
3316	* user-level execution. The thread_terminate_internal code
3317	* would do this on a thread by thread basis anyway, but this
3318	* gives us a better chance of not having to wait there.
3319	*/
3320	task_hold_locked(task);
3321
3322	#if CONFIG_EXCLAVES
3323	if (should_mark_corpse) {
3324	void *crash_info_ptr = task_get_corpseinfo(task);
3325	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3326	if (crash_info_ptr != NULL && thread->th_exclaves_ipc_buffer != NULL) {
3327	struct thread_crash_exclaves_info info = { `0` };
3328
3329	info.tcei_flags = kExclaveRPCActive;
3330	info.tcei_scid = thread->th_exclaves_scheduling_context_id;
3331	info.tcei_thread_id = thread->thread_id;
3332
3333	kcdata_push_data(crash_info_ptr,
3334	STACKSHOT_KCTYPE_KERN_EXCLAVES_CRASH_THREADINFO,
3335	sizeof(struct thread_crash_exclaves_info), &info);
3336	}
3337	}
3338
3339	task_unlock(task);
3340	task_stop_conclave(task, true);
3341	task_lock(task);
3342	}
3343	#endif /* CONFIG_EXCLAVES */
3344
3345	dispatchqueue_offset = get_dispatchqueue_offset_from_proc(get_bsdtask_info(task));
3346	/*
3347	* Terminate all the other threads in the task.
3348	*/
3349	queue_iterate(&task->threads, thread, thread_t, task_threads)
3350	{
3351	/*
3352	* Remove priority throttles for threads to terminate timely. This has
3353	* to be done after task_hold_locked() traps all threads to AST, but before
3354	* threads are marked inactive in thread_terminate_internal(). Takes thread
3355	* mutex lock.
3356	*
3357	* We need task_is_a_corpse() check so that we don't accidently update policy
3358	* for tasks that are doing posix_spawn().
3359	*
3360	* See: thread_policy_update_tasklocked().
3361	*/
3362	if (task_is_a_corpse(task)) {
3363	proc_set_thread_policy(thread, TASK_POLICY_ATTRIBUTE,
3364	TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
3365	}
3366
3367	if (should_mark_corpse) {
3368	thread_mtx_lock(thread);
3369	thread->inspection = TRUE;
3370	thread_mtx_unlock(thread);
3371	}
3372	if (thread != self) {
3373	thread_terminate_internal(thread);
3374	}
3375	}
3376	task->dispatchqueue_offset = dispatchqueue_offset;
3377
3378	task_release_locked(task);
3379
3380	return KERN_SUCCESS;
3381	}
3382
3383
3384	/*
3385	* task_complete_halt:
3386	*
3387	* Complete task halt by waiting for threads to terminate, then clean
3388	* up task resources (VM, port namespace, etc...) and then let the
3389	* current thread go in the (practically empty) task context.
3390	*
3391	* Note: task->halting flag is not cleared in order to avoid creation
3392	* of new thread in old exec'ed task.
3393	*/
3394	void
3395	task_complete_halt(task_t task)
3396	{
3397	task_lock(task);
3398	assert(task->halting);
3399	assert(task == current_task());
3400
3401	/*
3402	* Wait for the other threads to get shut down.
3403	* When the last other thread is reaped, we'll be
3404	* woken up.
3405	*/
3406	if (task->thread_count > `1`) {
3407	assert_wait(event: (event_t)&task->halting, THREAD_UNINT);
3408	task_unlock(task);
3409	thread_block(THREAD_CONTINUE_NULL);
3410	} else {
3411	task_unlock(task);
3412	}
3413
3414	#if CONFIG_DEFERRED_RECLAIM
3415	if (task->deferred_reclamation_metadata) {
3416	vm_deferred_reclamation_buffer_uninstall(
3417	metadata: task->deferred_reclamation_metadata);
3418	vm_deferred_reclamation_buffer_deallocate(
3419	metadata: task->deferred_reclamation_metadata);
3420	task->deferred_reclamation_metadata = NULL;
3421	}
3422	#endif /* CONFIG_DEFERRED_RECLAIM */
3423
3424	/*
3425	* Give the machine dependent code a chance
3426	* to perform cleanup of task-level resources
3427	* associated with the current thread before
3428	* ripping apart the task.
3429	*/
3430	machine_task_terminate(task);
3431
3432	/*
3433	* Destroy all synchronizers owned by the task.
3434	*/
3435	task_synchronizer_destroy_all(task);
3436
3437	/ let iokit know 1 /
3438	iokit_task_terminate(task, phase: `1`);
3439
3440	/*
3441	* Terminate the IPC space. A long time ago,
3442	* this used to be ipc_space_clean() which would
3443	* keep the space active but hollow it.
3444	*
3445	* We really do not need this semantics given
3446	* tasks die with exec now.
3447	*/
3448	ipc_space_terminate(space: task->itk_space);
3449
3450	/*
3451	* Clean out the address space, as we are going to be
3452	* getting a new one.
3453	*/
3454	vm_map_terminate(map: task->map);
3455
3456	/*
3457	* Kick out any IOKitUser handles to the task. At best they're stale,
3458	* at worst someone is racing a SUID exec.
3459	*/
3460	/ let iokit know 2 /
3461	iokit_task_terminate(task, phase: `2`);
3462	}
3463
3464	#ifdef CONFIG_TASK_SUSPEND_STATS
3465
3466	static void
3467	_task_mark_suspend_source(task_t task)
3468	{
3469	int idx;
3470	task_suspend_stats_t stats;
3471	task_suspend_source_t source;
3472	task_lock_assert_owned(task);
3473	stats = &task->t_suspend_stats;
3474
3475	idx = stats->tss_count % TASK_SUSPEND_SOURCES_MAX;
3476	source = &task->t_suspend_sources[idx];
3477	bzero(source, sizeof(*source));
3478
3479	source->tss_time = mach_absolute_time();
3480	source->tss_tid = current_thread()->thread_id;
3481	source->tss_pid = task_pid(current_task());
3482	task_best_name(current_task(), source->tss_procname, sizeof(source->tss_procname));
3483
3484	stats->tss_count++;
3485	}
3486
3487	static inline void
3488	_task_mark_suspend_start(task_t task)
3489	{
3490	task_lock_assert_owned(task);
3491	task->t_suspend_stats.tss_last_start = mach_absolute_time();
3492	}
3493
3494	static inline void
3495	_task_mark_suspend_end(task_t task)
3496	{
3497	task_lock_assert_owned(task);
3498	task->t_suspend_stats.tss_last_end = mach_absolute_time();
3499	task->t_suspend_stats.tss_duration += (task->t_suspend_stats.tss_last_end -
3500	task->t_suspend_stats.tss_last_start);
3501	}
3502
3503	static kern_return_t
3504	_task_get_suspend_stats_locked(task_t task, task_suspend_stats_t stats)
3505	{
3506	if (task == TASK_NULL \|\| stats == NULL) {
3507	return KERN_INVALID_ARGUMENT;
3508	}
3509	task_lock_assert_owned(task);
3510	memcpy(stats, &task->t_suspend_stats, sizeof(task->t_suspend_stats));
3511	return KERN_SUCCESS;
3512	}
3513
3514	static kern_return_t
3515	_task_get_suspend_sources_locked(task_t task, task_suspend_source_t sources)
3516	{
3517	if (task == TASK_NULL \|\| sources == NULL) {
3518	return KERN_INVALID_ARGUMENT;
3519	}
3520	task_lock_assert_owned(task);
3521	memcpy(sources, task->t_suspend_sources,
3522	sizeof(struct task_suspend_source_s) * TASK_SUSPEND_SOURCES_MAX);
3523	return KERN_SUCCESS;
3524	}
3525
3526	#endif /* CONFIG_TASK_SUSPEND_STATS */
3527
3528	kern_return_t
3529	task_get_suspend_stats(task_t task, task_suspend_stats_t stats)
3530	{
3531	#ifdef CONFIG_TASK_SUSPEND_STATS
3532	kern_return_t kr;
3533	if (task == TASK_NULL \|\| stats == NULL) {
3534	return KERN_INVALID_ARGUMENT;
3535	}
3536	task_lock(task);
3537	kr = _task_get_suspend_stats_locked(task, stats);
3538	task_unlock(task);
3539	return kr;
3540	#else /* CONFIG_TASK_SUSPEND_STATS */
3541	(void)task;
3542	(void)stats;
3543	return KERN_NOT_SUPPORTED;
3544	#endif
3545	}
3546
3547	kern_return_t
3548	task_get_suspend_stats_kdp(task_t task, task_suspend_stats_t stats)
3549	{
3550	#ifdef CONFIG_TASK_SUSPEND_STATS
3551	if (task == TASK_NULL \|\| stats == NULL) {
3552	return KERN_INVALID_ARGUMENT;
3553	}
3554	memcpy(stats, &task->t_suspend_stats, sizeof(task->t_suspend_stats));
3555	return KERN_SUCCESS;
3556	#else /* CONFIG_TASK_SUSPEND_STATS */
3557	#pragma unused(task, stats)
3558	return KERN_NOT_SUPPORTED;
3559	#endif /* CONFIG_TASK_SUSPEND_STATS */
3560	}
3561
3562	kern_return_t
3563	task_get_suspend_sources(task_t task, task_suspend_source_array_t sources)
3564	{
3565	#ifdef CONFIG_TASK_SUSPEND_STATS
3566	kern_return_t kr;
3567	if (task == TASK_NULL \|\| sources == NULL) {
3568	return KERN_INVALID_ARGUMENT;
3569	}
3570	task_lock(task);
3571	kr = _task_get_suspend_sources_locked(task, sources);
3572	task_unlock(task);
3573	return kr;
3574	#else /* CONFIG_TASK_SUSPEND_STATS */
3575	(void)task;
3576	(void)sources;
3577	return KERN_NOT_SUPPORTED;
3578	#endif
3579	}
3580
3581	kern_return_t
3582	task_get_suspend_sources_kdp(task_t task, task_suspend_source_array_t sources)
3583	{
3584	#ifdef CONFIG_TASK_SUSPEND_STATS
3585	if (task == TASK_NULL \|\| sources == NULL) {
3586	return KERN_INVALID_ARGUMENT;
3587	}
3588	memcpy(sources, task->t_suspend_sources,
3589	sizeof(struct task_suspend_source_s) * TASK_SUSPEND_SOURCES_MAX);
3590	return KERN_SUCCESS;
3591	#else /* CONFIG_TASK_SUSPEND_STATS */
3592	#pragma unused(task, sources)
3593	return KERN_NOT_SUPPORTED;
3594	#endif
3595	}
3596
3597	/*
3598	* task_hold_locked:
3599	*
3600	* Suspend execution of the specified task.
3601	* This is a recursive-style suspension of the task, a count of
3602	* suspends is maintained.
3603	*
3604	* CONDITIONS: the task is locked and active.
3605	*/
3606	void
3607	task_hold_locked(
3608	task_t task)
3609	{
3610	thread_t thread;
3611	void *bsd_info = get_bsdtask_info(task);
3612
3613	assert(task->active);
3614
3615	if (task->suspend_count++ > `0`) {
3616	return;
3617	}
3618
3619	if (bsd_info) {
3620	workq_proc_suspended(p: bsd_info);
3621	}
3622
3623	/*
3624	* Iterate through all the threads and hold them.
3625	*/
3626	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3627	thread_mtx_lock(thread);
3628	thread_hold(thread);
3629	thread_mtx_unlock(thread);
3630	}
3631
3632	#ifdef CONFIG_TASK_SUSPEND_STATS
3633	_task_mark_suspend_start(task);
3634	#endif
3635	}
3636
3637	/*
3638	* task_hold_and_wait
3639	*
3640	* Same as the internal routine above, except that is must lock
3641	* and verify that the task is active. This differs from task_suspend
3642	* in that it places a kernel hold on the task rather than just a
3643	* user-level hold. This keeps users from over resuming and setting
3644	* it running out from under the kernel.
3645	*
3646	* CONDITIONS: the caller holds a reference on the task
3647	*/
3648	kern_return_t
3649	task_hold_and_wait(
3650	task_t task)
3651	{
3652	if (task == TASK_NULL) {
3653	return KERN_INVALID_ARGUMENT;
3654	}
3655
3656	task_lock(task);
3657	if (!task->active) {
3658	task_unlock(task);
3659	return KERN_FAILURE;
3660	}
3661
3662	#ifdef CONFIG_TASK_SUSPEND_STATS
3663	_task_mark_suspend_source(task);
3664	#endif /* CONFIG_TASK_SUSPEND_STATS */
3665
3666	task_hold_locked(task);
3667	task_wait_locked(task, FALSE);
3668	task_unlock(task);
3669
3670	return KERN_SUCCESS;
3671	}
3672
3673	/*
3674	* task_wait_locked:
3675	*
3676	* Wait for all threads in task to stop.
3677	*
3678	* Conditions:
3679	* Called with task locked, active, and held.
3680	*/
3681	void
3682	task_wait_locked(
3683	task_t task,
3684	boolean_t until_not_runnable)
3685	{
3686	thread_t thread, self;
3687
3688	assert(task->active);
3689	assert(task->suspend_count > `0`);
3690
3691	self = current_thread();
3692
3693	/*
3694	* Iterate through all the threads and wait for them to
3695	* stop. Do not wait for the current thread if it is within
3696	* the task.
3697	*/
3698	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3699	if (thread != self) {
3700	thread_wait(thread, until_not_runnable);
3701	}
3702	}
3703	}
3704
3705	boolean_t
3706	task_is_app_suspended(task_t task)
3707	{
3708	return task->pidsuspended;
3709	}
3710
3711	/*
3712	* task_release_locked:
3713	*
3714	* Release a kernel hold on a task.
3715	*
3716	* CONDITIONS: the task is locked and active
3717	*/
3718	void
3719	task_release_locked(
3720	task_t task)
3721	{
3722	thread_t thread;
3723	void *bsd_info = get_bsdtask_info(task);
3724
3725	assert(task->active);
3726	assert(task->suspend_count > `0`);
3727
3728	if (--task->suspend_count > `0`) {
3729	return;
3730	}
3731
3732	if (bsd_info) {
3733	workq_proc_resumed(p: bsd_info);
3734	}
3735
3736	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3737	thread_mtx_lock(thread);
3738	thread_release(thread);
3739	thread_mtx_unlock(thread);
3740	}
3741
3742	#if CONFIG_TASK_SUSPEND_STATS
3743	_task_mark_suspend_end(task);
3744	#endif
3745	}
3746
3747	/*
3748	* task_release:
3749	*
3750	* Same as the internal routine above, except that it must lock
3751	* and verify that the task is active.
3752	*
3753	* CONDITIONS: The caller holds a reference to the task
3754	*/
3755	kern_return_t
3756	task_release(
3757	task_t task)
3758	{
3759	if (task == TASK_NULL) {
3760	return KERN_INVALID_ARGUMENT;
3761	}
3762
3763	task_lock(task);
3764
3765	if (!task->active) {
3766	task_unlock(task);
3767
3768	return KERN_FAILURE;
3769	}
3770
3771	task_release_locked(task);
3772	task_unlock(task);
3773
3774	return KERN_SUCCESS;
3775	}
3776
3777	static kern_return_t
3778	task_threads_internal(
3779	task_t task,
3780	thread_act_array_t *threads_out,
3781	mach_msg_type_number_t *countp,
3782	mach_thread_flavor_t flavor)
3783	{
3784	mach_msg_type_number_t actual, count, count_needed;
3785	thread_t *thread_list;
3786	thread_t thread;
3787	unsigned int i;
3788
3789	count = `0`;
3790	thread_list = NULL;
3791
3792	if (task == TASK_NULL) {
3793	return KERN_INVALID_ARGUMENT;
3794	}
3795
3796	assert(flavor <= THREAD_FLAVOR_INSPECT);
3797
3798	for (;;) {
3799	task_lock(task);
3800	if (!task->active) {
3801	task_unlock(task);
3802
3803	kfree_type(thread_t, count, thread_list);
3804	return KERN_FAILURE;
3805	}
3806
3807	count_needed = actual = task->thread_count;
3808	if (count_needed <= count) {
3809	break;
3810	}
3811
3812	/ unlock the task and allocate more memory /
3813	task_unlock(task);
3814
3815	kfree_type(thread_t, count, thread_list);
3816	count = count_needed;
3817	thread_list = kalloc_type(thread_t, count, Z_WAITOK);
3818
3819	if (thread_list == NULL) {
3820	return KERN_RESOURCE_SHORTAGE;
3821	}
3822	}
3823
3824	i = `0`;
3825	queue_iterate(&task->threads, thread, thread_t, task_threads) {
3826	assert(i < actual);
3827	thread_reference(thread);
3828	thread_list[i++] = thread;
3829	}
3830
3831	count_needed = actual;
3832
3833	/ can unlock task now that we've got the thread refs /
3834	task_unlock(task);
3835
3836	if (actual == `0`) {
3837	/ no threads, so return null pointer and deallocate memory /
3838
3839	*threads_out = NULL;
3840	*countp = `0`;
3841	kfree_type(thread_t, count, thread_list);
3842	} else {
3843	/ if we allocated too much, must copy /
3844	if (count_needed < count) {
3845	void *newaddr;
3846
3847	newaddr = kalloc_type(thread_t, count_needed, Z_WAITOK);
3848	if (newaddr == NULL) {
3849	for (i = `0`; i < actual; ++i) {
3850	thread_deallocate(thread: thread_list[i]);
3851	}
3852	kfree_type(thread_t, count, thread_list);
3853	return KERN_RESOURCE_SHORTAGE;
3854	}
3855
3856	bcopy(src: thread_list, dst: newaddr, n: count_needed * sizeof(thread_t));
3857	kfree_type(thread_t, count, thread_list);
3858	thread_list = (thread_t *)newaddr;
3859	}
3860
3861	*threads_out = thread_list;
3862	*countp = actual;
3863
3864	/ do the conversion that Mig should handle /
3865
3866	switch (flavor) {
3867	case THREAD_FLAVOR_CONTROL:
3868	if (task == current_task()) {
3869	for (i = `0`; i < actual; ++i) {
3870	((ipc_port_t *) thread_list)[i] = convert_thread_to_port_pinned(thread_list[i]);
3871	}
3872	} else {
3873	for (i = `0`; i < actual; ++i) {
3874	((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
3875	}
3876	}
3877	break;
3878	case THREAD_FLAVOR_READ:
3879	for (i = `0`; i < actual; ++i) {
3880	((ipc_port_t *) thread_list)[i] = convert_thread_read_to_port(thread_list[i]);
3881	}
3882	break;
3883	case THREAD_FLAVOR_INSPECT:
3884	for (i = `0`; i < actual; ++i) {
3885	((ipc_port_t *) thread_list)[i] = convert_thread_inspect_to_port(thread_list[i]);
3886	}
3887	break;
3888	}
3889	}
3890
3891	return KERN_SUCCESS;
3892	}
3893
3894	kern_return_t
3895	task_threads(
3896	task_t task,
3897	thread_act_array_t *threads_out,
3898	mach_msg_type_number_t *count)
3899	{
3900	return task_threads_internal(task, threads_out, countp: count, THREAD_FLAVOR_CONTROL);
3901	}
3902
3903
3904	kern_return_t
3905	task_threads_from_user(
3906	mach_port_t port,
3907	thread_act_array_t *threads_out,
3908	mach_msg_type_number_t *count)
3909	{
3910	ipc_kobject_type_t kotype;
3911	kern_return_t kr;
3912
3913	task_t task = convert_port_to_task_inspect_no_eval(port);
3914
3915	if (task == TASK_NULL) {
3916	return KERN_INVALID_ARGUMENT;
3917	}
3918
3919	kotype = ip_kotype(port);
3920
3921	switch (kotype) {
3922	case IKOT_TASK_CONTROL:
3923	kr = task_threads_internal(task, threads_out, countp: count, THREAD_FLAVOR_CONTROL);
3924	break;
3925	case IKOT_TASK_READ:
3926	kr = task_threads_internal(task, threads_out, countp: count, THREAD_FLAVOR_READ);
3927	break;
3928	case IKOT_TASK_INSPECT:
3929	kr = task_threads_internal(task, threads_out, countp: count, THREAD_FLAVOR_INSPECT);
3930	break;
3931	default:
3932	panic("strange kobject type");
3933	break;
3934	}
3935
3936	task_deallocate(task);
3937	return kr;
3938	}
3939
3940	#define TASK_HOLD_NORMAL 0
3941	#define TASK_HOLD_PIDSUSPEND 1
3942	#define TASK_HOLD_LEGACY 2
3943	#define TASK_HOLD_LEGACY_ALL 3
3944
3945	static kern_return_t
3946	place_task_hold(
3947	task_t task,
3948	int mode)
3949	{
3950	if (!task->active && !task_is_a_corpse(task)) {
3951	return KERN_FAILURE;
3952	}
3953
3954	/ Return success for corpse task /
3955	if (task_is_a_corpse(task)) {
3956	return KERN_SUCCESS;
3957	}
3958
3959	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND),
3960	task_pid(task),
3961	task->thread_count > `0` ?((thread_t)queue_first(&task->threads))->thread_id : `0`,
3962	task->user_stop_count, task->user_stop_count + `1`);
3963
3964	#if MACH_ASSERT
3965	current_task()->suspends_outstanding++;
3966	#endif
3967
3968	if (mode == TASK_HOLD_LEGACY) {
3969	task->legacy_stop_count++;
3970	}
3971
3972	#ifdef CONFIG_TASK_SUSPEND_STATS
3973	_task_mark_suspend_source(task);
3974	#endif /* CONFIG_TASK_SUSPEND_STATS */
3975
3976	if (task->user_stop_count++ > `0`) {
3977	/*
3978	* If the stop count was positive, the task is
3979	* already stopped and we can exit.
3980	*/
3981	return KERN_SUCCESS;
3982	}
3983
3984	/*
3985	* Put a kernel-level hold on the threads in the task (all
3986	* user-level task suspensions added together represent a
3987	* single kernel-level hold). We then wait for the threads
3988	* to stop executing user code.
3989	*/
3990	task_hold_locked(task);
3991	task_wait_locked(task, FALSE);
3992
3993	return KERN_SUCCESS;
3994	}
3995
3996	static kern_return_t
3997	release_task_hold(
3998	task_t task,
3999	int mode)
4000	{
4001	boolean_t release = FALSE;
4002
4003	if (!task->active && !task_is_a_corpse(task)) {
4004	return KERN_FAILURE;
4005	}
4006
4007	/ Return success for corpse task /
4008	if (task_is_a_corpse(task)) {
4009	return KERN_SUCCESS;
4010	}
4011
4012	if (mode == TASK_HOLD_PIDSUSPEND) {
4013	if (task->pidsuspended == FALSE) {
4014	return KERN_FAILURE;
4015	}
4016	task->pidsuspended = FALSE;
4017	}
4018
4019	if (task->user_stop_count > (task->pidsuspended ? `1` : `0`)) {
4020	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4021	MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) \| DBG_FUNC_NONE,
4022	task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
4023	task->user_stop_count, mode, task->legacy_stop_count);
4024
4025	#if MACH_ASSERT
4026	/*
4027	* This is obviously not robust; if we suspend one task and then resume a different one,
4028	* we'll fly under the radar. This is only meant to catch the common case of a crashed
4029	* or buggy suspender.
4030	*/
4031	current_task()->suspends_outstanding--;
4032	#endif
4033
4034	if (mode == TASK_HOLD_LEGACY_ALL) {
4035	if (task->legacy_stop_count >= task->user_stop_count) {
4036	task->user_stop_count = `0`;
4037	release = TRUE;
4038	} else {
4039	task->user_stop_count -= task->legacy_stop_count;
4040	}
4041	task->legacy_stop_count = `0`;
4042	} else {
4043	if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > `0`) {
4044	task->legacy_stop_count--;
4045	}
4046	if (--task->user_stop_count == `0`) {
4047	release = TRUE;
4048	}
4049	}
4050	} else {
4051	return KERN_FAILURE;
4052	}
4053
4054	/*
4055	* Release the task if necessary.
4056	*/
4057	if (release) {
4058	task_release_locked(task);
4059	}
4060
4061	return KERN_SUCCESS;
4062	}
4063
4064	boolean_t
4065	get_task_suspended(task_t task)
4066	{
4067	return `0` != task->user_stop_count;
4068	}
4069
4070	/*
4071	* task_suspend:
4072	*
4073	* Implement an (old-fashioned) user-level suspension on a task.
4074	*
4075	* Because the user isn't expecting to have to manage a suspension
4076	* token, we'll track it for him in the kernel in the form of a naked
4077	* send right to the task's resume port. All such send rights
4078	* account for a single suspension against the task (unlike task_suspend2()
4079	* where each caller gets a unique suspension count represented by a
4080	* unique send-once right).
4081	*
4082	* Conditions:
4083	* The caller holds a reference to the task
4084	*/
4085	kern_return_t
4086	task_suspend(
4087	task_t task)
4088	{
4089	kern_return_t kr;
4090	mach_port_t port;
4091	mach_port_name_t name;
4092
4093	if (task == TASK_NULL \|\| task == kernel_task) {
4094	return KERN_INVALID_ARGUMENT;
4095	}
4096
4097	/*
4098	* place a legacy hold on the task.
4099	*/
4100	task_lock(task);
4101	kr = place_task_hold(task, TASK_HOLD_LEGACY);
4102	task_unlock(task);
4103
4104	if (kr != KERN_SUCCESS) {
4105	return kr;
4106	}
4107
4108	/*
4109	* Claim a send right on the task resume port, and request a no-senders
4110	* notification on that port (if none outstanding).
4111	*/
4112	itk_lock(task);
4113	port = task->itk_resume;
4114	if (port == IP_NULL) {
4115	port = ipc_kobject_alloc_port(kobject: task, type: IKOT_TASK_RESUME,
4116	options: IPC_KOBJECT_ALLOC_NSREQUEST \| IPC_KOBJECT_ALLOC_MAKE_SEND);
4117	task->itk_resume = port;
4118	} else {
4119	(void)ipc_kobject_make_send_nsrequest(port, kobject: task, kotype: IKOT_TASK_RESUME);
4120	}
4121	itk_unlock(task);
4122
4123	/*
4124	* Copyout the send right into the calling task's IPC space. It won't know it is there,
4125	* but we'll look it up when calling a traditional resume. Any IPC operations that
4126	* deallocate the send right will auto-release the suspension.
4127	*/
4128	if (IP_VALID(port)) {
4129	kr = ipc_object_copyout(current_space(), ip_to_object(port),
4130	MACH_MSG_TYPE_MOVE_SEND, flags: IPC_OBJECT_COPYOUT_FLAGS_NONE,
4131	NULL, NULL, namep: &name);
4132	} else {
4133	kr = KERN_SUCCESS;
4134	}
4135	if (kr != KERN_SUCCESS) {
4136	printf(format: "warning: %s(%d) failed to copyout suspension "
4137	"token for pid %d with error: %d\n",
4138	proc_name_address(p: get_bsdtask_info(current_task())),
4139	proc_pid(p: get_bsdtask_info(current_task())),
4140	task_pid(task), kr);
4141	}
4142
4143	return kr;
4144	}
4145
4146	/*
4147	* task_resume:
4148	* Release a user hold on a task.
4149	*
4150	* Conditions:
4151	* The caller holds a reference to the task
4152	*/
4153	kern_return_t
4154	task_resume(
4155	task_t task)
4156	{
4157	kern_return_t kr;
4158	mach_port_name_t resume_port_name;
4159	ipc_entry_t resume_port_entry;
4160	ipc_space_t space = current_task()->itk_space;
4161
4162	if (task == TASK_NULL \|\| task == kernel_task) {
4163	return KERN_INVALID_ARGUMENT;
4164	}
4165
4166	/ release a legacy task hold /
4167	task_lock(task);
4168	kr = release_task_hold(task, TASK_HOLD_LEGACY);
4169	task_unlock(task);
4170
4171	itk_lock(task); / for itk_resume /
4172	is_write_lock(space); / spin lock /
4173	if (is_active(space) && IP_VALID(task->itk_resume) &&
4174	ipc_hash_lookup(space, ip_to_object(task->itk_resume), namep: &resume_port_name, entryp: &resume_port_entry) == TRUE) {
4175	/*
4176	* We found a suspension token in the caller's IPC space. Release a send right to indicate that
4177	* we are holding one less legacy hold on the task from this caller. If the release failed,
4178	* go ahead and drop all the rights, as someone either already released our holds or the task
4179	* is gone.
4180	*/
4181	itk_unlock(task);
4182	if (kr == KERN_SUCCESS) {
4183	ipc_right_dealloc(space, name: resume_port_name, entry: resume_port_entry);
4184	} else {
4185	ipc_right_destroy(space, name: resume_port_name, entry: resume_port_entry, FALSE, guard: `0`);
4186	}
4187	/ space unlocked /
4188	} else {
4189	itk_unlock(task);
4190	is_write_unlock(space);
4191	if (kr == KERN_SUCCESS) {
4192	printf(format: "warning: %s(%d) performed out-of-band resume on pid %d\n",
4193	proc_name_address(p: get_bsdtask_info(current_task())), proc_pid(p: get_bsdtask_info(current_task())),
4194	task_pid(task));
4195	}
4196	}
4197
4198	return kr;
4199	}
4200
4201	/*
4202	* Suspend a task that is already protected by a held lock.
4203	* Making/holding a token/reference/port is the caller's responsibility.
4204	*/
4205	kern_return_t
4206	task_suspend_internal_locked(task_t task)
4207	{
4208	if (task == TASK_NULL \|\| task == kernel_task) {
4209	return KERN_INVALID_ARGUMENT;
4210	}
4211
4212	return place_task_hold(task, TASK_HOLD_NORMAL);
4213	}
4214
4215	/*
4216	* Suspend a task.
4217	* Making/holding a token/reference/port is the caller's responsibility.
4218	*/
4219	kern_return_t
4220	task_suspend_internal(task_t task)
4221	{
4222	kern_return_t kr;
4223
4224	if (task == TASK_NULL \|\| task == kernel_task) {
4225	return KERN_INVALID_ARGUMENT;
4226	}
4227
4228	task_lock(task);
4229	kr = task_suspend_internal_locked(task);
4230	task_unlock(task);
4231	return kr;
4232	}
4233
4234	/*
4235	* Suspend the target task, and return a suspension token. The token
4236	* represents a reference on the suspended task.
4237	*/
4238	static kern_return_t
4239	task_suspend2_grp(
4240	task_t task,
4241	task_suspension_token_t *suspend_token,
4242	task_grp_t grp)
4243	{
4244	kern_return_t kr;
4245
4246	kr = task_suspend_internal(task);
4247	if (kr != KERN_SUCCESS) {
4248	*suspend_token = TASK_NULL;
4249	return kr;
4250	}
4251
4252	/*
4253	* Take a reference on the target task and return that to the caller
4254	* as a "suspension token," which can be converted into an SO right to
4255	* the now-suspended task's resume port.
4256	*/
4257	task_reference_grp(task, grp);
4258	*suspend_token = task;
4259
4260	return KERN_SUCCESS;
4261	}
4262
4263	kern_return_t
4264	task_suspend2_mig(
4265	task_t task,
4266	task_suspension_token_t *suspend_token)
4267	{
4268	return task_suspend2_grp(task, suspend_token, grp: TASK_GRP_MIG);
4269	}
4270
4271	kern_return_t
4272	task_suspend2_external(
4273	task_t task,
4274	task_suspension_token_t *suspend_token)
4275	{
4276	return task_suspend2_grp(task, suspend_token, grp: TASK_GRP_EXTERNAL);
4277	}
4278
4279	/*
4280	* Resume a task that is already protected by a held lock.
4281	* (reference/token/port management is caller's responsibility).
4282	*/
4283	kern_return_t
4284	task_resume_internal_locked(
4285	task_suspension_token_t task)
4286	{
4287	if (task == TASK_NULL \|\| task == kernel_task) {
4288	return KERN_INVALID_ARGUMENT;
4289	}
4290
4291	return release_task_hold(task, TASK_HOLD_NORMAL);
4292	}
4293
4294	/*
4295	* Resume a task.
4296	* (reference/token/port management is caller's responsibility).
4297	*/
4298	kern_return_t
4299	task_resume_internal(
4300	task_suspension_token_t task)
4301	{
4302	kern_return_t kr;
4303
4304	if (task == TASK_NULL \|\| task == kernel_task) {
4305	return KERN_INVALID_ARGUMENT;
4306	}
4307
4308	task_lock(task);
4309	kr = task_resume_internal_locked(task);
4310	task_unlock(task);
4311	return kr;
4312	}
4313
4314	/*
4315	* Resume the task using a suspension token. Consumes the token's ref.
4316	*/
4317	static kern_return_t
4318	task_resume2_grp(
4319	task_suspension_token_t task,
4320	task_grp_t grp)
4321	{
4322	kern_return_t kr;
4323
4324	kr = task_resume_internal(task);
4325	task_suspension_token_deallocate_grp(token: task, grp);
4326
4327	return kr;
4328	}
4329
4330	kern_return_t
4331	task_resume2_mig(
4332	task_suspension_token_t task)
4333	{
4334	return task_resume2_grp(task, grp: TASK_GRP_MIG);
4335	}
4336
4337	kern_return_t
4338	task_resume2_external(
4339	task_suspension_token_t task)
4340	{
4341	return task_resume2_grp(task, grp: TASK_GRP_EXTERNAL);
4342	}
4343
4344	static void
4345	task_suspension_no_senders(ipc_port_t port, mach_port_mscount_t mscount)
4346	{
4347	task_t task = convert_port_to_task_suspension_token(port);
4348	kern_return_t kr;
4349
4350	if (task == TASK_NULL) {
4351	return;
4352	}
4353
4354	if (task == kernel_task) {
4355	task_suspension_token_deallocate(token: task);
4356	return;
4357	}
4358
4359	task_lock(task);
4360
4361	kr = ipc_kobject_nsrequest(port, sync: mscount, NULL);
4362	if (kr == KERN_FAILURE) {
4363	/ release all the [remaining] outstanding legacy holds /
4364	release_task_hold(task, TASK_HOLD_LEGACY_ALL);
4365	}
4366
4367	task_unlock(task);
4368
4369	task_suspension_token_deallocate(token: task); / drop token reference /
4370	}
4371
4372	/*
4373	* Fires when a send once made
4374	* by convert_task_suspension_token_to_port() dies.
4375	*/
4376	void
4377	task_suspension_send_once(ipc_port_t port)
4378	{
4379	task_t task = convert_port_to_task_suspension_token(port);
4380
4381	if (task == TASK_NULL \|\| task == kernel_task) {
4382	return; / nothing to do /
4383	}
4384
4385	/ release the hold held by this specific send-once right /
4386	task_lock(task);
4387	release_task_hold(task, TASK_HOLD_NORMAL);
4388	task_unlock(task);
4389
4390	task_suspension_token_deallocate(token: task); / drop token reference /
4391	}
4392
4393	static kern_return_t
4394	task_pidsuspend_locked(task_t task)
4395	{
4396	kern_return_t kr;
4397
4398	if (task->pidsuspended) {
4399	kr = KERN_FAILURE;
4400	goto out;
4401	}
4402
4403	task->pidsuspended = TRUE;
4404
4405	kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
4406	if (kr != KERN_SUCCESS) {
4407	task->pidsuspended = FALSE;
4408	}
4409	out:
4410	return kr;
4411	}
4412
4413
4414	/*
4415	* task_pidsuspend:
4416	*
4417	* Suspends a task by placing a hold on its threads.
4418	*
4419	* Conditions:
4420	* The caller holds a reference to the task
4421	*/
4422	kern_return_t
4423	task_pidsuspend(
4424	task_t task)
4425	{
4426	kern_return_t kr;
4427
4428	if (task == TASK_NULL \|\| task == kernel_task) {
4429	return KERN_INVALID_ARGUMENT;
4430	}
4431
4432	task_lock(task);
4433
4434	kr = task_pidsuspend_locked(task);
4435
4436	task_unlock(task);
4437
4438	if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
4439	iokit_task_app_suspended_changed(task);
4440	}
4441
4442	return kr;
4443	}
4444
4445	/*
4446	* task_pidresume:
4447	* Resumes a previously suspended task.
4448	*
4449	* Conditions:
4450	* The caller holds a reference to the task
4451	*/
4452	kern_return_t
4453	task_pidresume(
4454	task_t task)
4455	{
4456	kern_return_t kr;
4457
4458	if (task == TASK_NULL \|\| task == kernel_task) {
4459	return KERN_INVALID_ARGUMENT;
4460	}
4461
4462	task_lock(task);
4463
4464	#if CONFIG_FREEZE
4465
4466	while (task->changing_freeze_state) {
4467	assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4468	task_unlock(task);
4469	thread_block(THREAD_CONTINUE_NULL);
4470
4471	task_lock(task);
4472	}
4473	task->changing_freeze_state = TRUE;
4474	#endif
4475
4476	kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
4477
4478	task_unlock(task);
4479
4480	if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
4481	iokit_task_app_suspended_changed(task);
4482	}
4483
4484	#if CONFIG_FREEZE
4485
4486	task_lock(task);
4487
4488	if (kr == KERN_SUCCESS) {
4489	task->frozen = FALSE;
4490	}
4491	task->changing_freeze_state = FALSE;
4492	thread_wakeup(&task->changing_freeze_state);
4493
4494	task_unlock(task);
4495	#endif
4496
4497	return kr;
4498	}
4499
4500	os_refgrp_decl(static, task_watchports_refgrp, "task_watchports", NULL);
4501
4502	/*
4503	* task_add_turnstile_watchports:
4504	* Setup watchports to boost the main thread of the task.
4505	*
4506	* Arguments:
4507	* task: task being spawned
4508	* thread: main thread of task
4509	* portwatch_ports: array of watchports
4510	* portwatch_count: number of watchports
4511	*
4512	* Conditions:
4513	* Nothing locked.
4514	*/
4515	void
4516	task_add_turnstile_watchports(
4517	task_t task,
4518	thread_t thread,
4519	ipc_port_t *portwatch_ports,
4520	uint32_t portwatch_count)
4521	{
4522	struct task_watchports *watchports = NULL;
4523	struct task_watchport_elem *previous_elem_array[TASK_MAX_WATCHPORT_COUNT] = {};
4524	os_ref_count_t refs;
4525
4526	/ Check if the task has terminated /
4527	if (!task->active) {
4528	return;
4529	}
4530
4531	assert(portwatch_count <= TASK_MAX_WATCHPORT_COUNT);
4532
4533	watchports = task_watchports_alloc_init(task, thread, count: portwatch_count);
4534
4535	/ Lock the ipc space /
4536	is_write_lock(task->itk_space);
4537
4538	/ Setup watchports to boost the main thread /
4539	refs = task_add_turnstile_watchports_locked(task,
4540	watchports, previous_elem_array, portwatch_ports,
4541	portwatch_count);
4542
4543	/ Drop the space lock /
4544	is_write_unlock(task->itk_space);
4545
4546	if (refs == `0`) {
4547	task_watchports_deallocate(watchports);
4548	}
4549
4550	/ Drop the ref on previous_elem_array /
4551	for (uint32_t i = `0`; i < portwatch_count && previous_elem_array[i] != NULL; i++) {
4552	task_watchport_elem_deallocate(watchport_elem: previous_elem_array[i]);
4553	}
4554	}
4555
4556	/*
4557	* task_remove_turnstile_watchports:
4558	* Clear all turnstile boost on the task from watchports.
4559	*
4560	* Arguments:
4561	* task: task being terminated
4562	*
4563	* Conditions:
4564	* Nothing locked.
4565	*/
4566	void
4567	task_remove_turnstile_watchports(
4568	task_t task)
4569	{
4570	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4571	struct task_watchports *watchports = NULL;
4572	ipc_port_t port_freelist[TASK_MAX_WATCHPORT_COUNT] = {};
4573	uint32_t portwatch_count;
4574
4575	/ Lock the ipc space /
4576	is_write_lock(task->itk_space);
4577
4578	/ Check if watchport boost exist /
4579	if (task->watchports == NULL) {
4580	is_write_unlock(task->itk_space);
4581	return;
4582	}
4583	watchports = task->watchports;
4584	portwatch_count = watchports->tw_elem_array_count;
4585
4586	refs = task_remove_turnstile_watchports_locked(task, watchports,
4587	port_freelist);
4588
4589	is_write_unlock(task->itk_space);
4590
4591	/ Drop all the port references /
4592	for (uint32_t i = `0`; i < portwatch_count && port_freelist[i] != NULL; i++) {
4593	ip_release(port_freelist[i]);
4594	}
4595
4596	/ Clear the task and thread references for task_watchport /
4597	if (refs == `0`) {
4598	task_watchports_deallocate(watchports);
4599	}
4600	}
4601
4602	/*
4603	* task_transfer_turnstile_watchports:
4604	* Transfer all watchport turnstile boost from old task to new task.
4605	*
4606	* Arguments:
4607	* old_task: task calling exec
4608	* new_task: new exec'ed task
4609	* thread: main thread of new task
4610	*
4611	* Conditions:
4612	* Nothing locked.
4613	*/
4614	void
4615	task_transfer_turnstile_watchports(
4616	task_t old_task,
4617	task_t new_task,
4618	thread_t new_thread)
4619	{
4620	struct task_watchports *old_watchports = NULL;
4621	struct task_watchports *new_watchports = NULL;
4622	os_ref_count_t old_refs = TASK_MAX_WATCHPORT_COUNT;
4623	os_ref_count_t new_refs = TASK_MAX_WATCHPORT_COUNT;
4624	uint32_t portwatch_count;
4625
4626	if (old_task->watchports == NULL \|\| !new_task->active) {
4627	return;
4628	}
4629
4630	/ Get the watch port count from the old task /
4631	is_write_lock(old_task->itk_space);
4632	if (old_task->watchports == NULL) {
4633	is_write_unlock(old_task->itk_space);
4634	return;
4635	}
4636
4637	portwatch_count = old_task->watchports->tw_elem_array_count;
4638	is_write_unlock(old_task->itk_space);
4639
4640	new_watchports = task_watchports_alloc_init(task: new_task, thread: new_thread, count: portwatch_count);
4641
4642	/ Lock the ipc space for old task /
4643	is_write_lock(old_task->itk_space);
4644
4645	/ Lock the ipc space for new task /
4646	is_write_lock(new_task->itk_space);
4647
4648	/ Check if watchport boost exist /
4649	if (old_task->watchports == NULL \|\| !new_task->active) {
4650	is_write_unlock(new_task->itk_space);
4651	is_write_unlock(old_task->itk_space);
4652	(void)task_watchports_release(new_watchports);
4653	task_watchports_deallocate(watchports: new_watchports);
4654	return;
4655	}
4656
4657	old_watchports = old_task->watchports;
4658	assert(portwatch_count == old_task->watchports->tw_elem_array_count);
4659
4660	/ Setup new task watchports /
4661	new_task->watchports = new_watchports;
4662
4663	for (uint32_t i = `0`; i < portwatch_count; i++) {
4664	ipc_port_t port = old_watchports->tw_elem[i].twe_port;
4665
4666	if (port == NULL) {
4667	task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4668	continue;
4669	}
4670
4671	/ Lock the port and check if it has the entry /
4672	ip_mq_lock(port);
4673
4674	task_watchport_elem_init(&new_watchports->tw_elem[i], new_task, port);
4675
4676	if (ipc_port_replace_watchport_elem_conditional_locked(port,
4677	old_watchport_elem: &old_watchports->tw_elem[i], new_watchport_elem: &new_watchports->tw_elem[i]) == KERN_SUCCESS) {
4678	task_watchport_elem_clear(&old_watchports->tw_elem[i]);
4679
4680	task_watchports_retain(new_watchports);
4681	old_refs = task_watchports_release(old_watchports);
4682
4683	/ Check if all ports are cleaned /
4684	if (old_refs == `0`) {
4685	old_task->watchports = NULL;
4686	}
4687	} else {
4688	task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4689	}
4690	/ port unlocked by ipc_port_replace_watchport_elem_conditional_locked /
4691	}
4692
4693	/ Drop the reference on new task_watchports struct returned by task_watchports_alloc_init /
4694	new_refs = task_watchports_release(new_watchports);
4695	if (new_refs == `0`) {
4696	new_task->watchports = NULL;
4697	}
4698
4699	is_write_unlock(new_task->itk_space);
4700	is_write_unlock(old_task->itk_space);
4701
4702	/ Clear the task and thread references for old_watchport /
4703	if (old_refs == `0`) {
4704	task_watchports_deallocate(watchports: old_watchports);
4705	}
4706
4707	/ Clear the task and thread references for new_watchport /
4708	if (new_refs == `0`) {
4709	task_watchports_deallocate(watchports: new_watchports);
4710	}
4711	}
4712
4713	/*
4714	* task_add_turnstile_watchports_locked:
4715	* Setup watchports to boost the main thread of the task.
4716	*
4717	* Arguments:
4718	* task: task to boost
4719	* watchports: watchport structure to be attached to the task
4720	* previous_elem_array: an array of old watchport_elem to be returned to caller
4721	* portwatch_ports: array of watchports
4722	* portwatch_count: number of watchports
4723	*
4724	* Conditions:
4725	* ipc space of the task locked.
4726	* returns array of old watchport_elem in previous_elem_array
4727	*/
4728	static os_ref_count_t
4729	task_add_turnstile_watchports_locked(
4730	task_t task,
4731	struct task_watchports *watchports,
4732	struct task_watchport_elem **previous_elem_array,
4733	ipc_port_t *portwatch_ports,
4734	uint32_t portwatch_count)
4735	{
4736	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4737
4738	/ Check if the task is still active /
4739	if (!task->active) {
4740	refs = task_watchports_release(watchports);
4741	return refs;
4742	}
4743
4744	assert(task->watchports == NULL);
4745	task->watchports = watchports;
4746
4747	for (uint32_t i = `0`, j = `0`; i < portwatch_count; i++) {
4748	ipc_port_t port = portwatch_ports[i];
4749
4750	task_watchport_elem_init(&watchports->tw_elem[i], task, port);
4751	if (port == NULL) {
4752	task_watchport_elem_clear(&watchports->tw_elem[i]);
4753	continue;
4754	}
4755
4756	ip_mq_lock(port);
4757
4758	/ Check if port is in valid state to be setup as watchport /
4759	if (ipc_port_add_watchport_elem_locked(port, watchport_elem: &watchports->tw_elem[i],
4760	old_elem: &previous_elem_array[j]) != KERN_SUCCESS) {
4761	task_watchport_elem_clear(&watchports->tw_elem[i]);
4762	continue;
4763	}
4764	/ port unlocked on return /
4765
4766	ip_reference(port);
4767	task_watchports_retain(watchports);
4768	if (previous_elem_array[j] != NULL) {
4769	j++;
4770	}
4771	}
4772
4773	/ Drop the reference on task_watchport struct returned by os_ref_init /
4774	refs = task_watchports_release(watchports);
4775	if (refs == `0`) {
4776	task->watchports = NULL;
4777	}
4778
4779	return refs;
4780	}
4781
4782	/*
4783	* task_remove_turnstile_watchports_locked:
4784	* Clear all turnstile boost on the task from watchports.
4785	*
4786	* Arguments:
4787	* task: task to remove watchports from
4788	* watchports: watchports structure for the task
4789	* port_freelist: array of ports returned with ref to caller
4790	*
4791	*
4792	* Conditions:
4793	* ipc space of the task locked.
4794	* array of ports with refs are returned in port_freelist
4795	*/
4796	static os_ref_count_t
4797	task_remove_turnstile_watchports_locked(
4798	task_t task,
4799	struct task_watchports *watchports,
4800	ipc_port_t *port_freelist)
4801	{
4802	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4803
4804	for (uint32_t i = `0`, j = `0`; i < watchports->tw_elem_array_count; i++) {
4805	ipc_port_t port = watchports->tw_elem[i].twe_port;
4806	if (port == NULL) {
4807	continue;
4808	}
4809
4810	/ Lock the port and check if it has the entry /
4811	ip_mq_lock(port);
4812	if (ipc_port_clear_watchport_elem_internal_conditional_locked(port,
4813	watchport_elem: &watchports->tw_elem[i]) == KERN_SUCCESS) {
4814	task_watchport_elem_clear(&watchports->tw_elem[i]);
4815	port_freelist[j++] = port;
4816	refs = task_watchports_release(watchports);
4817
4818	/ Check if all ports are cleaned /
4819	if (refs == `0`) {
4820	task->watchports = NULL;
4821	break;
4822	}
4823	}
4824	/ mqueue and port unlocked by ipc_port_clear_watchport_elem_internal_conditional_locked /
4825	}
4826	return refs;
4827	}
4828
4829	/*
4830	* task_watchports_alloc_init:
4831	* Allocate and initialize task watchport struct.
4832	*
4833	* Conditions:
4834	* Nothing locked.
4835	*/
4836	static struct task_watchports *
4837	task_watchports_alloc_init(
4838	task_t task,
4839	thread_t thread,
4840	uint32_t count)
4841	{
4842	struct task_watchports watchports = kalloc_type(struct* task_watchports,
4843	struct task_watchport_elem, count, Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
4844
4845	task_reference(task);
4846	thread_reference(thread);
4847	watchports->tw_task = task;
4848	watchports->tw_thread = thread;
4849	watchports->tw_elem_array_count = count;
4850	os_ref_init(&watchports->tw_refcount, &task_watchports_refgrp);
4851
4852	return watchports;
4853	}
4854
4855	/*
4856	* task_watchports_deallocate:
4857	* Deallocate task watchport struct.
4858	*
4859	* Conditions:
4860	* Nothing locked.
4861	*/
4862	static void
4863	task_watchports_deallocate(
4864	struct task_watchports *watchports)
4865	{
4866	uint32_t portwatch_count = watchports->tw_elem_array_count;
4867
4868	task_deallocate(watchports->tw_task);
4869	thread_deallocate(thread: watchports->tw_thread);
4870	kfree_type(struct task_watchports, struct task_watchport_elem,
4871	portwatch_count, watchports);
4872	}
4873
4874	/*
4875	* task_watchport_elem_deallocate:
4876	* Deallocate task watchport element and release its ref on task_watchport.
4877	*
4878	* Conditions:
4879	* Nothing locked.
4880	*/
4881	void
4882	task_watchport_elem_deallocate(
4883	struct task_watchport_elem *watchport_elem)
4884	{
4885	os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4886	task_t task = watchport_elem->twe_task;
4887	struct task_watchports *watchports = NULL;
4888	ipc_port_t port = NULL;
4889
4890	assert(task != NULL);
4891
4892	/ Take the space lock to modify the elememt /
4893	is_write_lock(task->itk_space);
4894
4895	watchports = task->watchports;
4896	assert(watchports != NULL);
4897
4898	port = watchport_elem->twe_port;
4899	assert(port != NULL);
4900
4901	task_watchport_elem_clear(watchport_elem);
4902	refs = task_watchports_release(watchports);
4903
4904	if (refs == `0`) {
4905	task->watchports = NULL;
4906	}
4907
4908	is_write_unlock(task->itk_space);
4909
4910	ip_release(port);
4911	if (refs == `0`) {
4912	task_watchports_deallocate(watchports);
4913	}
4914	}
4915
4916	/*
4917	* task_has_watchports:
4918	* Return TRUE if task has watchport boosts.
4919	*
4920	* Conditions:
4921	* Nothing locked.
4922	*/
4923	boolean_t
4924	task_has_watchports(task_t task)
4925	{
4926	return task->watchports != NULL;
4927	}
4928
4929	#if DEVELOPMENT \|\| DEBUG
4930
4931	extern void IOSleep(int);
4932
4933	kern_return_t
4934	task_disconnect_page_mappings(task_t task)
4935	{
4936	int n;
4937
4938	if (task == TASK_NULL \|\| task == kernel_task) {
4939	return KERN_INVALID_ARGUMENT;
4940	}
4941
4942	/*
4943	* this function is used to strip all of the mappings from
4944	* the pmap for the specified task to force the task to
4945	* re-fault all of the pages it is actively using... this
4946	* allows us to approximate the true working set of the
4947	* specified task. We only engage if at least 1 of the
4948	* threads in the task is runnable, but we want to continuously
4949	* sweep (at least for a while - I've arbitrarily set the limit at
4950	* 100 sweeps to be re-looked at as we gain experience) to get a better
4951	* view into what areas within a page are being visited (as opposed to only
4952	* seeing the first fault of a page after the task becomes
4953	* runnable)... in the future I may
4954	* try to block until awakened by a thread in this task
4955	* being made runnable, but for now we'll periodically poll from the
4956	* user level debug tool driving the sysctl
4957	*/
4958	for (n = `0`; n < `100`; n++) {
4959	thread_t thread;
4960	boolean_t runnable;
4961	boolean_t do_unnest;
4962	int page_count;
4963
4964	runnable = FALSE;
4965	do_unnest = FALSE;
4966
4967	task_lock(task);
4968
4969	queue_iterate(&task->threads, thread, thread_t, task_threads) {
4970	if (thread->state & TH_RUN) {
4971	runnable = TRUE;
4972	break;
4973	}
4974	}
4975	if (n == `0`) {
4976	task->task_disconnected_count++;
4977	}
4978
4979	if (task->task_unnested == FALSE) {
4980	if (runnable == TRUE) {
4981	task->task_unnested = TRUE;
4982	do_unnest = TRUE;
4983	}
4984	}
4985	task_unlock(task);
4986
4987	if (runnable == FALSE) {
4988	break;
4989	}
4990
4991	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) \| DBG_FUNC_START,
4992	task, do_unnest, task->task_disconnected_count, `0`, `0`);
4993
4994	page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
4995
4996	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) \| DBG_FUNC_END,
4997	task, page_count, `0`, `0`, `0`);
4998
4999	if ((n % `5`) == `4`) {
5000	IOSleep(`1`);
5001	}
5002	}
5003	return KERN_SUCCESS;
5004	}
5005
5006	#endif
5007
5008
5009	#if CONFIG_FREEZE
5010
5011	/*
5012	* task_freeze:
5013	*
5014	* Freeze a task.
5015	*
5016	* Conditions:
5017	* The caller holds a reference to the task
5018	*/
5019	extern void vm_wake_compactor_swapper(void);
5020	extern struct freezer_context freezer_context_global;
5021
5022	kern_return_t
5023	task_freeze(
5024	task_t task,
5025	uint32_t *purgeable_count,
5026	uint32_t *wired_count,
5027	uint32_t *clean_count,
5028	uint32_t *dirty_count,
5029	uint32_t dirty_budget,
5030	uint32_t *shared_count,
5031	int *freezer_error_code,
5032	boolean_t eval_only)
5033	{
5034	kern_return_t kr = KERN_SUCCESS;
5035
5036	if (task == TASK_NULL \|\| task == kernel_task) {
5037	return KERN_INVALID_ARGUMENT;
5038	}
5039
5040	task_lock(task);
5041
5042	while (task->changing_freeze_state) {
5043	assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
5044	task_unlock(task);
5045	thread_block(THREAD_CONTINUE_NULL);
5046
5047	task_lock(task);
5048	}
5049	if (task->frozen) {
5050	task_unlock(task);
5051	return KERN_FAILURE;
5052	}
5053	task->changing_freeze_state = TRUE;
5054
5055	freezer_context_global.freezer_ctx_task = task;
5056
5057	task_unlock(task);
5058
5059	kr = vm_map_freeze(task,
5060	purgeable_count,
5061	wired_count,
5062	clean_count,
5063	dirty_count,
5064	dirty_budget,
5065	shared_count,
5066	freezer_error_code,
5067	eval_only);
5068
5069	task_lock(task);
5070
5071	if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
5072	task->frozen = TRUE;
5073
5074	freezer_context_global.freezer_ctx_task = NULL;
5075	freezer_context_global.freezer_ctx_uncompressed_pages = `0`;
5076
5077	if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
5078	/*
5079	* reset the counter tracking the # of swapped compressed pages
5080	* because we are now done with this freeze session and task.
5081	*/
5082
5083	dirty_count = (uint32_t) (freezer_context_global.freezer_ctx_swapped_bytes / PAGE_SIZE_64); /used to track pageouts/*
5084	}
5085
5086	freezer_context_global.freezer_ctx_swapped_bytes = `0`;
5087	}
5088
5089	task->changing_freeze_state = FALSE;
5090	thread_wakeup(&task->changing_freeze_state);
5091
5092	task_unlock(task);
5093
5094	if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
5095	(kr == KERN_SUCCESS) &&
5096	(eval_only == FALSE)) {
5097	vm_wake_compactor_swapper();
5098	/*
5099	* We do an explicit wakeup of the swapout thread here
5100	* because the compact_and_swap routines don't have
5101	* knowledge about these kind of "per-task packed c_segs"
5102	* and so will not be evaluating whether we need to do
5103	* a wakeup there.
5104	*/
5105	thread_wakeup((event_t)&vm_swapout_thread);
5106	}
5107
5108	return kr;
5109	}
5110
5111	/*
5112	* task_thaw:
5113	*
5114	* Thaw a currently frozen task.
5115	*
5116	* Conditions:
5117	* The caller holds a reference to the task
5118	*/
5119	kern_return_t
5120	task_thaw(
5121	task_t task)
5122	{
5123	if (task == TASK_NULL \|\| task == kernel_task) {
5124	return KERN_INVALID_ARGUMENT;
5125	}
5126
5127	task_lock(task);
5128
5129	while (task->changing_freeze_state) {
5130	assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
5131	task_unlock(task);
5132	thread_block(THREAD_CONTINUE_NULL);
5133
5134	task_lock(task);
5135	}
5136	if (!task->frozen) {
5137	task_unlock(task);
5138	return KERN_FAILURE;
5139	}
5140	task->frozen = FALSE;
5141
5142	task_unlock(task);
5143
5144	return KERN_SUCCESS;
5145	}
5146
5147	void
5148	task_update_frozen_to_swap_acct(task_t task, int64_t amount, freezer_acct_op_t op)
5149	{
5150	/*
5151	* We don't assert that the task lock is held because we call this
5152	* routine from the decompression path and we won't be holding the
5153	* task lock. However, since we are in the context of the task we are
5154	* safe.
5155	* In the case of the task_freeze path, we call it from behind the task
5156	* lock but we don't need to because we have a reference on the proc
5157	* being frozen.
5158	*/
5159
5160	assert(task);
5161	if (amount == `0`) {
5162	return;
5163	}
5164
5165	if (op == CREDIT_TO_SWAP) {
5166	ledger_credit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
5167	} else if (op == DEBIT_FROM_SWAP) {
5168	ledger_debit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
5169	} else {
5170	panic("task_update_frozen_to_swap_acct: Invalid ledger op");
5171	}
5172	}
5173	#endif /* CONFIG_FREEZE */
5174
5175	kern_return_t
5176	task_set_security_tokens(
5177	task_t task,
5178	security_token_t sec_token,
5179	audit_token_t audit_token,
5180	host_priv_t host_priv)
5181	{
5182	ipc_port_t host_port = IP_NULL;
5183	kern_return_t kr;
5184
5185	if (task == TASK_NULL) {
5186	return KERN_INVALID_ARGUMENT;
5187	}
5188
5189	task_lock(task);
5190	task_set_tokens(task, sec_token: &sec_token, audit_token: &audit_token);
5191	task_unlock(task);
5192
5193	if (host_priv != HOST_PRIV_NULL) {
5194	kr = host_get_host_priv_port(host_priv, &host_port);
5195	} else {
5196	kr = host_get_host_port(host_priv_self(), &host_port);
5197	}
5198	assert(kr == KERN_SUCCESS);
5199
5200	kr = task_set_special_port_internal(task, TASK_HOST_PORT, port: host_port);
5201	return kr;
5202	}
5203
5204	kern_return_t
5205	task_send_trace_memory(
5206	__unused task_t target_task,
5207	__unused uint32_t pid,
5208	__unused uint64_t uniqueid)
5209	{
5210	return KERN_INVALID_ARGUMENT;
5211	}
5212
5213	/*
5214	* This routine was added, pretty much exclusively, for registering the
5215	* RPC glue vector for in-kernel short circuited tasks. Rather than
5216	* removing it completely, I have only disabled that feature (which was
5217	* the only feature at the time). It just appears that we are going to
5218	* want to add some user data to tasks in the future (i.e. bsd info,
5219	* task names, etc...), so I left it in the formal task interface.
5220	*/
5221	kern_return_t
5222	task_set_info(
5223	task_t task,
5224	task_flavor_t flavor,
5225	__unused task_info_t task_info_in, / pointer to IN array /
5226	__unused mach_msg_type_number_t task_info_count)
5227	{
5228	if (task == TASK_NULL) {
5229	return KERN_INVALID_ARGUMENT;
5230	}
5231	switch (flavor) {
5232	#if CONFIG_ATM
5233	case TASK_TRACE_MEMORY_INFO:
5234	return KERN_NOT_SUPPORTED;
5235	#endif // CONFIG_ATM
5236	default:
5237	return KERN_INVALID_ARGUMENT;
5238	}
5239	}
5240
5241	static void
5242	_task_fill_times(task_t task, time_value_t user_time, time_value_t sys_time)
5243	{
5244	clock_sec_t sec;
5245	clock_usec_t usec;
5246
5247	struct recount_times_mach times = recount_task_terminated_times(task);
5248	absolutetime_to_microtime(abstime: times.rtm_user, secs: &sec, microsecs: &usec);
5249	user_time->seconds = (typeof(user_time->seconds))sec;
5250	user_time->microseconds = usec;
5251	absolutetime_to_microtime(abstime: times.rtm_system, secs: &sec, microsecs: &usec);
5252	sys_time->seconds = (typeof(sys_time->seconds))sec;
5253	sys_time->microseconds = usec;
5254	}
5255
5256	int radar_20146450 = `1`;
5257	kern_return_t
5258	task_info(
5259	task_t task,
5260	task_flavor_t flavor,
5261	task_info_t task_info_out,
5262	mach_msg_type_number_t *task_info_count)
5263	{
5264	kern_return_t error = KERN_SUCCESS;
5265	mach_msg_type_number_t original_task_info_count;
5266	bool is_kernel_task = (task == kernel_task);
5267
5268	if (task == TASK_NULL) {
5269	return KERN_INVALID_ARGUMENT;
5270	}
5271
5272	original_task_info_count = *task_info_count;
5273	task_lock(task);
5274
5275	if (task != current_task() && !task->active) {
5276	task_unlock(task);
5277	return KERN_INVALID_ARGUMENT;
5278	}
5279
5280
5281	switch (flavor) {
5282	case TASK_BASIC_INFO_32:
5283	case TASK_BASIC2_INFO_32:
5284	#if defined(__arm64__)
5285	case TASK_BASIC_INFO_64:
5286	#endif
5287	{
5288	task_basic_info_32_t basic_info;
5289	ledger_amount_t tmp;
5290
5291	if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
5292	error = KERN_INVALID_ARGUMENT;
5293	break;
5294	}
5295
5296	basic_info = (task_basic_info_32_t)task_info_out;
5297
5298	basic_info->virtual_size = (typeof(basic_info->virtual_size))
5299	vm_map_adjusted_size(map: is_kernel_task ? kernel_map : task->map);
5300	if (flavor == TASK_BASIC2_INFO_32) {
5301	/*
5302	* The "BASIC2" flavor gets the maximum resident
5303	* size instead of the current resident size...
5304	*/
5305	ledger_get_lifetime_max(ledger: task->ledger, entry: task_ledgers.phys_mem, max_lifetime_balance: &tmp);
5306	} else {
5307	ledger_get_balance(ledger: task->ledger, entry: task_ledgers.phys_mem, balance: &tmp);
5308	}
5309	basic_info->resident_size = (natural_t) MIN((ledger_amount_t) UINT32_MAX, tmp);
5310
5311	_task_fill_times(task, user_time: &basic_info->user_time,
5312	sys_time: &basic_info->system_time);
5313
5314	basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5315	basic_info->suspend_count = task->user_stop_count;
5316
5317	*task_info_count = TASK_BASIC_INFO_32_COUNT;
5318	break;
5319	}
5320
5321	#if defined(__arm64__)
5322	case TASK_BASIC_INFO_64_2:
5323	{
5324	task_basic_info_64_2_t basic_info;
5325
5326	if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
5327	error = KERN_INVALID_ARGUMENT;
5328	break;
5329	}
5330
5331	basic_info = (task_basic_info_64_2_t)task_info_out;
5332
5333	basic_info->virtual_size = vm_map_adjusted_size(map: is_kernel_task ?
5334	kernel_map : task->map);
5335	ledger_get_balance(ledger: task->ledger, entry: task_ledgers.phys_mem,
5336	balance: (ledger_amount_t *)&basic_info->resident_size);
5337	basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5338	basic_info->suspend_count = task->user_stop_count;
5339	_task_fill_times(task, user_time: &basic_info->user_time,
5340	sys_time: &basic_info->system_time);
5341
5342	*task_info_count = TASK_BASIC_INFO_64_2_COUNT;
5343	break;
5344	}
5345
5346	#else /* defined(__arm64__) */
5347	case TASK_BASIC_INFO_64:
5348	{
5349	task_basic_info_64_t basic_info;
5350
5351	if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
5352	error = KERN_INVALID_ARGUMENT;
5353	break;
5354	}
5355
5356	basic_info = (task_basic_info_64_t)task_info_out;
5357
5358	basic_info->virtual_size = vm_map_adjusted_size(is_kernel_task ?
5359	kernel_map : task->map);
5360	ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *)&basic_info->resident_size);
5361	basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5362	basic_info->suspend_count = task->user_stop_count;
5363	_task_fill_times(task, &basic_info->user_time,
5364	&basic_info->system_time);
5365
5366	*task_info_count = TASK_BASIC_INFO_64_COUNT;
5367	break;
5368	}
5369	#endif /* defined(__arm64__) */
5370
5371	case MACH_TASK_BASIC_INFO:
5372	{
5373	mach_task_basic_info_t basic_info;
5374
5375	if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
5376	error = KERN_INVALID_ARGUMENT;
5377	break;
5378	}
5379
5380	basic_info = (mach_task_basic_info_t)task_info_out;
5381
5382	basic_info->virtual_size = vm_map_adjusted_size(map: is_kernel_task ?
5383	kernel_map : task->map);
5384	ledger_get_balance(ledger: task->ledger, entry: task_ledgers.phys_mem, balance: (ledger_amount_t *) &basic_info->resident_size);
5385	ledger_get_lifetime_max(ledger: task->ledger, entry: task_ledgers.phys_mem, max_lifetime_balance: (ledger_amount_t *) &basic_info->resident_size_max);
5386	basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5387	basic_info->suspend_count = task->user_stop_count;
5388	_task_fill_times(task, user_time: &basic_info->user_time,
5389	sys_time: &basic_info->system_time);
5390
5391	*task_info_count = MACH_TASK_BASIC_INFO_COUNT;
5392	break;
5393	}
5394
5395	case TASK_THREAD_TIMES_INFO:
5396	{
5397	task_thread_times_info_t times_info;
5398	thread_t thread;
5399
5400	if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
5401	error = KERN_INVALID_ARGUMENT;
5402	break;
5403	}
5404
5405	times_info = (task_thread_times_info_t)task_info_out;
5406	times_info->user_time = (time_value_t){ `0` };
5407	times_info->system_time = (time_value_t){ `0` };
5408
5409	queue_iterate(&task->threads, thread, thread_t, task_threads) {
5410	if ((thread->options & TH_OPT_IDLE_THREAD) == `0`) {
5411	time_value_t user_time, system_time;
5412
5413	thread_read_times(thread, &user_time, &system_time, NULL);
5414	time_value_add(&times_info->user_time, &user_time);
5415	time_value_add(&times_info->system_time, &system_time);
5416	}
5417	}
5418
5419	*task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
5420	break;
5421	}
5422
5423	case TASK_ABSOLUTETIME_INFO:
5424	{
5425	task_absolutetime_info_t info;
5426
5427	if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
5428	error = KERN_INVALID_ARGUMENT;
5429	break;
5430	}
5431
5432	info = (task_absolutetime_info_t)task_info_out;
5433
5434	struct recount_times_mach term_times =
5435	recount_task_terminated_times(task);
5436	struct recount_times_mach total_times = recount_task_times(task);
5437
5438	info->total_user = total_times.rtm_user;
5439	info->total_system = total_times.rtm_system;
5440	info->threads_user = total_times.rtm_user - term_times.rtm_user;
5441	info->threads_system += total_times.rtm_system - term_times.rtm_system;
5442
5443	*task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
5444	break;
5445	}
5446
5447	case TASK_DYLD_INFO:
5448	{
5449	task_dyld_info_t info;
5450
5451	/*
5452	* We added the format field to TASK_DYLD_INFO output. For
5453	* temporary backward compatibility, accept the fact that
5454	* clients may ask for the old version - distinquished by the
5455	* size of the expected result structure.
5456	*/
5457	#define TASK_LEGACY_DYLD_INFO_COUNT \
5458	offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
5459
5460	if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
5461	error = KERN_INVALID_ARGUMENT;
5462	break;
5463	}
5464
5465	info = (task_dyld_info_t)task_info_out;
5466	info->all_image_info_addr = task->all_image_info_addr;
5467	info->all_image_info_size = task->all_image_info_size;
5468
5469	/ only set format on output for those expecting it /
5470	if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
5471	info->all_image_info_format = task_has_64Bit_addr(task) ?
5472	TASK_DYLD_ALL_IMAGE_INFO_64 :
5473	TASK_DYLD_ALL_IMAGE_INFO_32;
5474	*task_info_count = TASK_DYLD_INFO_COUNT;
5475	} else {
5476	*task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
5477	}
5478	break;
5479	}
5480
5481	case TASK_EXTMOD_INFO:
5482	{
5483	task_extmod_info_t info;
5484	void *p;
5485
5486	if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
5487	error = KERN_INVALID_ARGUMENT;
5488	break;
5489	}
5490
5491	info = (task_extmod_info_t)task_info_out;
5492
5493	p = get_bsdtask_info(task);
5494	if (p) {
5495	proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
5496	} else {
5497	bzero(info->task_uuid, sizeof(info->task_uuid));
5498	}
5499	info->extmod_statistics = task->extmod_statistics;
5500	*task_info_count = TASK_EXTMOD_INFO_COUNT;
5501
5502	break;
5503	}
5504
5505	case TASK_KERNELMEMORY_INFO:
5506	{
5507	task_kernelmemory_info_t tkm_info;
5508	ledger_amount_t credit, debit;
5509
5510	if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
5511	error = KERN_INVALID_ARGUMENT;
5512	break;
5513	}
5514
5515	tkm_info = (task_kernelmemory_info_t) task_info_out;
5516	tkm_info->total_palloc = `0`;
5517	tkm_info->total_pfree = `0`;
5518	tkm_info->total_salloc = `0`;
5519	tkm_info->total_sfree = `0`;
5520
5521	if (task == kernel_task) {
5522	/*
5523	* All shared allocs/frees from other tasks count against
5524	* the kernel private memory usage. If we are looking up
5525	* info for the kernel task, gather from everywhere.
5526	*/
5527	task_unlock(task);
5528
5529	/ start by accounting for all the terminated tasks against the kernel /
5530	tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
5531	tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
5532
5533	/ count all other task/thread shared alloc/free against the kernel /
5534	lck_mtx_lock(&tasks_threads_lock);
5535
5536	/ XXX this really shouldn't be using the function parameter 'task' as a local var! /
5537	queue_iterate(&tasks, task, task_t, tasks) {
5538	if (task == kernel_task) {
5539	if (ledger_get_entries(task->ledger,
5540	task_ledgers.tkm_private, &credit,
5541	&debit) == KERN_SUCCESS) {
5542	tkm_info->total_palloc += credit;
5543	tkm_info->total_pfree += debit;
5544	}
5545	}
5546	if (!ledger_get_entries(task->ledger,
5547	task_ledgers.tkm_shared, &credit, &debit)) {
5548	tkm_info->total_palloc += credit;
5549	tkm_info->total_pfree += debit;
5550	}
5551	}
5552	lck_mtx_unlock(&tasks_threads_lock);
5553	} else {
5554	if (!ledger_get_entries(task->ledger,
5555	task_ledgers.tkm_private, &credit, &debit)) {
5556	tkm_info->total_palloc = credit;
5557	tkm_info->total_pfree = debit;
5558	}
5559	if (!ledger_get_entries(task->ledger,
5560	task_ledgers.tkm_shared, &credit, &debit)) {
5561	tkm_info->total_salloc = credit;
5562	tkm_info->total_sfree = debit;
5563	}
5564	task_unlock(task);
5565	}
5566
5567	*task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
5568	return KERN_SUCCESS;
5569	}
5570
5571	/ OBSOLETE /
5572	case TASK_SCHED_FIFO_INFO:
5573	{
5574	if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
5575	error = KERN_INVALID_ARGUMENT;
5576	break;
5577	}
5578
5579	error = KERN_INVALID_POLICY;
5580	break;
5581	}
5582
5583	/ OBSOLETE /
5584	case TASK_SCHED_RR_INFO:
5585	{
5586	policy_rr_base_t rr_base;
5587	uint32_t quantum_time;
5588	uint64_t quantum_ns;
5589
5590	if (*task_info_count < POLICY_RR_BASE_COUNT) {
5591	error = KERN_INVALID_ARGUMENT;
5592	break;
5593	}
5594
5595	rr_base = (policy_rr_base_t) task_info_out;
5596
5597	if (task != kernel_task) {
5598	error = KERN_INVALID_POLICY;
5599	break;
5600	}
5601
5602	rr_base->base_priority = task->priority;
5603
5604	quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
5605	absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
5606
5607	rr_base->quantum = (uint32_t)(quantum_ns / `1000` / `1000`);
5608
5609	*task_info_count = POLICY_RR_BASE_COUNT;
5610	break;
5611	}
5612
5613	/ OBSOLETE /
5614	case TASK_SCHED_TIMESHARE_INFO:
5615	{
5616	policy_timeshare_base_t ts_base;
5617
5618	if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
5619	error = KERN_INVALID_ARGUMENT;
5620	break;
5621	}
5622
5623	ts_base = (policy_timeshare_base_t) task_info_out;
5624
5625	if (task == kernel_task) {
5626	error = KERN_INVALID_POLICY;
5627	break;
5628	}
5629
5630	ts_base->base_priority = task->priority;
5631
5632	*task_info_count = POLICY_TIMESHARE_BASE_COUNT;
5633	break;
5634	}
5635
5636	case TASK_SECURITY_TOKEN:
5637	{
5638	security_token_t *sec_token_p;
5639
5640	if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
5641	error = KERN_INVALID_ARGUMENT;
5642	break;
5643	}
5644
5645	sec_token_p = (security_token_t *) task_info_out;
5646
5647	sec_token_p = task_get_sec_token(task);
5648
5649	*task_info_count = TASK_SECURITY_TOKEN_COUNT;
5650	break;
5651	}
5652
5653	case TASK_AUDIT_TOKEN:
5654	{
5655	audit_token_t *audit_token_p;
5656
5657	if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
5658	error = KERN_INVALID_ARGUMENT;
5659	break;
5660	}
5661
5662	audit_token_p = (audit_token_t *) task_info_out;
5663
5664	audit_token_p = task_get_audit_token(task);
5665
5666	*task_info_count = TASK_AUDIT_TOKEN_COUNT;
5667	break;
5668	}
5669
5670	case TASK_SCHED_INFO:
5671	error = KERN_INVALID_ARGUMENT;
5672	break;
5673
5674	case TASK_EVENTS_INFO:
5675	{
5676	task_events_info_t events_info;
5677	thread_t thread;
5678	uint64_t n_syscalls_mach, n_syscalls_unix, n_csw;
5679
5680	if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
5681	error = KERN_INVALID_ARGUMENT;
5682	break;
5683	}
5684
5685	events_info = (task_events_info_t) task_info_out;
5686
5687
5688	events_info->faults = (int32_t) MIN(counter_load(&task->faults), INT32_MAX);
5689	events_info->pageins = (int32_t) MIN(counter_load(&task->pageins), INT32_MAX);
5690	events_info->cow_faults = (int32_t) MIN(counter_load(&task->cow_faults), INT32_MAX);
5691	events_info->messages_sent = (int32_t) MIN(counter_load(&task->messages_sent), INT32_MAX);
5692	events_info->messages_received = (int32_t) MIN(counter_load(&task->messages_received), INT32_MAX);
5693
5694	n_syscalls_mach = task->syscalls_mach;
5695	n_syscalls_unix = task->syscalls_unix;
5696	n_csw = task->c_switch;
5697
5698	queue_iterate(&task->threads, thread, thread_t, task_threads) {
5699	n_csw += thread->c_switch;
5700	n_syscalls_mach += thread->syscalls_mach;
5701	n_syscalls_unix += thread->syscalls_unix;
5702	}
5703
5704	events_info->syscalls_mach = (int32_t) MIN(n_syscalls_mach, INT32_MAX);
5705	events_info->syscalls_unix = (int32_t) MIN(n_syscalls_unix, INT32_MAX);
5706	events_info->csw = (int32_t) MIN(n_csw, INT32_MAX);
5707
5708	*task_info_count = TASK_EVENTS_INFO_COUNT;
5709	break;
5710	}
5711	case TASK_AFFINITY_TAG_INFO:
5712	{
5713	if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
5714	error = KERN_INVALID_ARGUMENT;
5715	break;
5716	}
5717
5718	error = task_affinity_info(task, task_info_out, task_info_count);
5719	break;
5720	}
5721	case TASK_POWER_INFO:
5722	{
5723	if (*task_info_count < TASK_POWER_INFO_COUNT) {
5724	error = KERN_INVALID_ARGUMENT;
5725	break;
5726	}
5727
5728	task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL, NULL);
5729	break;
5730	}
5731
5732	case TASK_POWER_INFO_V2:
5733	{
5734	if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
5735	error = KERN_INVALID_ARGUMENT;
5736	break;
5737	}
5738	task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
5739	task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2, NULL);
5740	break;
5741	}
5742
5743	case TASK_VM_INFO:
5744	case TASK_VM_INFO_PURGEABLE:
5745	{
5746	task_vm_info_t vm_info;
5747	vm_map_t map;
5748	ledger_amount_t tmp_amount;
5749
5750	struct proc *p;
5751	uint32_t platform, sdk;
5752	p = current_proc();
5753	platform = proc_platform(p);
5754	sdk = proc_sdk(p);
5755	if (original_task_info_count > TASK_VM_INFO_COUNT) {
5756	/*
5757	* Some iOS apps pass an incorrect value for
5758	* task_info_count, expressed in number of bytes
5759	* instead of number of "natural_t" elements, which
5760	* can lead to binary compatibility issues (including
5761	* stack corruption) when the data structure is
5762	* expanded in the future.
5763	* Let's make this potential issue visible by
5764	* logging about it...
5765	*/
5766	printf("%s:%d %d[%s] task_info(flavor=%d) possibly invalid "
5767	"task_info_count=%d > TASK_VM_INFO_COUNT=%d platform %d sdk "
5768	"%d.%d.%d - please use TASK_VM_INFO_COUNT.\n",
5769	__FUNCTION__, __LINE__, proc_pid(p), proc_name_address(p),
5770	flavor, original_task_info_count, TASK_VM_INFO_COUNT,
5771	platform, (sdk >> `16`), ((sdk >> `8`) & `0xff`), (sdk & `0xff`));
5772	DTRACE_VM4(suspicious_task_vm_info_count,
5773	mach_msg_type_number_t, original_task_info_count,
5774	mach_msg_type_number_t, TASK_VM_INFO_COUNT,
5775	uint32_t, platform,
5776	uint32_t, sdk);
5777	}
5778	#if __arm64__
5779	if (original_task_info_count > TASK_VM_INFO_REV2_COUNT &&
5780	platform == PLATFORM_IOS &&
5781	sdk != `0` &&
5782	(sdk >> `16`) <= `12`) {
5783	/*
5784	* Some iOS apps pass an incorrect value for
5785	* task_info_count, expressed in number of bytes
5786	* instead of number of "natural_t" elements.
5787	* For the sake of backwards binary compatibility
5788	* for apps built with an iOS12 or older SDK and using
5789	* the "rev2" data structure, let's fix task_info_count
5790	* for them, to avoid stomping past the actual end
5791	* of their buffer.
5792	*/
5793	#if DEVELOPMENT \|\| DEBUG
5794	printf("%s:%d %d[%s] rdar://49484582 task_info_count %d -> %d "
5795	"platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p),
5796	proc_name_address(p), original_task_info_count,
5797	TASK_VM_INFO_REV2_COUNT, platform, (sdk >> `16`),
5798	((sdk >> `8`) & `0xff`), (sdk & `0xff`));
5799	#endif /* DEVELOPMENT \|\| DEBUG */
5800	DTRACE_VM4(workaround_task_vm_info_count,
5801	mach_msg_type_number_t, original_task_info_count,
5802	mach_msg_type_number_t, TASK_VM_INFO_REV2_COUNT,
5803	uint32_t, platform,
5804	uint32_t, sdk);
5805	original_task_info_count = TASK_VM_INFO_REV2_COUNT;
5806	*task_info_count = original_task_info_count;
5807	}
5808	if (original_task_info_count > TASK_VM_INFO_REV5_COUNT &&
5809	platform == PLATFORM_IOS &&
5810	sdk != `0` &&
5811	(sdk >> `16`) <= `15`) {
5812	/*
5813	* Some iOS apps pass an incorrect value for
5814	* task_info_count, expressed in number of bytes
5815	* instead of number of "natural_t" elements.
5816	*/
5817	printf("%s:%d %d[%s] task_info_count=%d > TASK_VM_INFO_COUNT=%d "
5818	"platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p),
5819	proc_name_address(p), original_task_info_count,
5820	TASK_VM_INFO_REV5_COUNT, platform, (sdk >> `16`),
5821	((sdk >> `8`) & `0xff`), (sdk & `0xff`));
5822	DTRACE_VM4(workaround_task_vm_info_count,
5823	mach_msg_type_number_t, original_task_info_count,
5824	mach_msg_type_number_t, TASK_VM_INFO_REV5_COUNT,
5825	uint32_t, platform,
5826	uint32_t, sdk);
5827	#if DEVELOPMENT \|\| DEBUG
5828	/*
5829	* For the sake of internal builds livability,
5830	* work around this user-space bug by capping the
5831	* buffer's size to what it was with the iOS15 SDK.
5832	*/
5833	original_task_info_count = TASK_VM_INFO_REV5_COUNT;
5834	*task_info_count = original_task_info_count;
5835	#endif /* DEVELOPMENT \|\| DEBUG */
5836	}
5837	#endif /* __arm64__ */
5838
5839	if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
5840	error = KERN_INVALID_ARGUMENT;
5841	break;
5842	}
5843
5844	vm_info = (task_vm_info_t)task_info_out;
5845
5846	/*
5847	* Do not hold both the task and map locks,
5848	* so convert the task lock into a map reference,
5849	* drop the task lock, then lock the map.
5850	*/
5851	if (is_kernel_task) {
5852	map = kernel_map;
5853	task_unlock(task);
5854	/ no lock, no reference /
5855	} else {
5856	map = task->map;
5857	vm_map_reference(map);
5858	task_unlock(task);
5859	vm_map_lock_read(map);
5860	}
5861
5862	vm_info->virtual_size = (typeof(vm_info->virtual_size))vm_map_adjusted_size(map);
5863	vm_info->region_count = map->hdr.nentries;
5864	vm_info->page_size = vm_map_page_size(map);
5865
5866	ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size);
5867	ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size_peak);
5868
5869	vm_info->device = `0`;
5870	vm_info->device_peak = `0`;
5871	ledger_get_balance(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external);
5872	ledger_get_lifetime_max(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external_peak);
5873	ledger_get_balance(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal);
5874	ledger_get_lifetime_max(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal_peak);
5875	ledger_get_balance(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable);
5876	ledger_get_lifetime_max(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable_peak);
5877	ledger_get_balance(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed);
5878	ledger_get_lifetime_max(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_peak);
5879	ledger_get_entries(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_lifetime, &tmp_amount);
5880
5881	vm_info->purgeable_volatile_pmap = `0`;
5882	vm_info->purgeable_volatile_resident = `0`;
5883	vm_info->purgeable_volatile_virtual = `0`;
5884	if (is_kernel_task) {
5885	/*
5886	* We do not maintain the detailed stats for the
5887	* kernel_pmap, so just count everything as
5888	* "internal"...
5889	*/
5890	vm_info->internal = vm_info->resident_size;
5891	/*
5892	* ... but since the memory held by the VM compressor
5893	* in the kernel address space ought to be attributed
5894	* to user-space tasks, we subtract it from "internal"
5895	* to give memory reporting tools a more accurate idea
5896	* of what the kernel itself is actually using, instead
5897	* of making it look like the kernel is leaking memory
5898	* when the system is under memory pressure.
5899	*/
5900	vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
5901	PAGE_SIZE);
5902	} else {
5903	mach_vm_size_t volatile_virtual_size;
5904	mach_vm_size_t volatile_resident_size;
5905	mach_vm_size_t volatile_compressed_size;
5906	mach_vm_size_t volatile_pmap_size;
5907	mach_vm_size_t volatile_compressed_pmap_size;
5908	kern_return_t kr;
5909
5910	if (flavor == TASK_VM_INFO_PURGEABLE) {
5911	kr = vm_map_query_volatile(
5912	map,
5913	&volatile_virtual_size,
5914	&volatile_resident_size,
5915	&volatile_compressed_size,
5916	&volatile_pmap_size,
5917	&volatile_compressed_pmap_size);
5918	if (kr == KERN_SUCCESS) {
5919	vm_info->purgeable_volatile_pmap =
5920	volatile_pmap_size;
5921	if (radar_20146450) {
5922	vm_info->compressed -=
5923	volatile_compressed_pmap_size;
5924	}
5925	vm_info->purgeable_volatile_resident =
5926	volatile_resident_size;
5927	vm_info->purgeable_volatile_virtual =
5928	volatile_virtual_size;
5929	}
5930	}
5931	}
5932	*task_info_count = TASK_VM_INFO_REV0_COUNT;
5933
5934	if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5935	/ must be captured while we still have the map lock /
5936	vm_info->min_address = map->min_offset;
5937	vm_info->max_address = map->max_offset;
5938	}
5939
5940	/*
5941	* Done with vm map things, can drop the map lock and reference,
5942	* and take the task lock back.
5943	*
5944	* Re-validate that the task didn't die on us.
5945	*/
5946	if (!is_kernel_task) {
5947	vm_map_unlock_read(map);
5948	vm_map_deallocate(map);
5949	}
5950	map = VM_MAP_NULL;
5951
5952	task_lock(task);
5953
5954	if ((task != current_task()) && (!task->active)) {
5955	error = KERN_INVALID_ARGUMENT;
5956	break;
5957	}
5958
5959	if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
5960	vm_info->phys_footprint =
5961	(mach_vm_size_t) get_task_phys_footprint(task);
5962	*task_info_count = TASK_VM_INFO_REV1_COUNT;
5963	}
5964	if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5965	/ data was captured above /
5966	*task_info_count = TASK_VM_INFO_REV2_COUNT;
5967	}
5968
5969	if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
5970	ledger_get_lifetime_max(task->ledger,
5971	task_ledgers.phys_footprint,
5972	&vm_info->ledger_phys_footprint_peak);
5973	ledger_get_balance(task->ledger,
5974	task_ledgers.purgeable_nonvolatile,
5975	&vm_info->ledger_purgeable_nonvolatile);
5976	ledger_get_balance(task->ledger,
5977	task_ledgers.purgeable_nonvolatile_compressed,
5978	&vm_info->ledger_purgeable_novolatile_compressed);
5979	ledger_get_balance(task->ledger,
5980	task_ledgers.purgeable_volatile,
5981	&vm_info->ledger_purgeable_volatile);
5982	ledger_get_balance(task->ledger,
5983	task_ledgers.purgeable_volatile_compressed,
5984	&vm_info->ledger_purgeable_volatile_compressed);
5985	ledger_get_balance(task->ledger,
5986	task_ledgers.network_nonvolatile,
5987	&vm_info->ledger_tag_network_nonvolatile);
5988	ledger_get_balance(task->ledger,
5989	task_ledgers.network_nonvolatile_compressed,
5990	&vm_info->ledger_tag_network_nonvolatile_compressed);
5991	ledger_get_balance(task->ledger,
5992	task_ledgers.network_volatile,
5993	&vm_info->ledger_tag_network_volatile);
5994	ledger_get_balance(task->ledger,
5995	task_ledgers.network_volatile_compressed,
5996	&vm_info->ledger_tag_network_volatile_compressed);
5997	ledger_get_balance(task->ledger,
5998	task_ledgers.media_footprint,
5999	&vm_info->ledger_tag_media_footprint);
6000	ledger_get_balance(task->ledger,
6001	task_ledgers.media_footprint_compressed,
6002	&vm_info->ledger_tag_media_footprint_compressed);
6003	ledger_get_balance(task->ledger,
6004	task_ledgers.media_nofootprint,
6005	&vm_info->ledger_tag_media_nofootprint);
6006	ledger_get_balance(task->ledger,
6007	task_ledgers.media_nofootprint_compressed,
6008	&vm_info->ledger_tag_media_nofootprint_compressed);
6009	ledger_get_balance(task->ledger,
6010	task_ledgers.graphics_footprint,
6011	&vm_info->ledger_tag_graphics_footprint);
6012	ledger_get_balance(task->ledger,
6013	task_ledgers.graphics_footprint_compressed,
6014	&vm_info->ledger_tag_graphics_footprint_compressed);
6015	ledger_get_balance(task->ledger,
6016	task_ledgers.graphics_nofootprint,
6017	&vm_info->ledger_tag_graphics_nofootprint);
6018	ledger_get_balance(task->ledger,
6019	task_ledgers.graphics_nofootprint_compressed,
6020	&vm_info->ledger_tag_graphics_nofootprint_compressed);
6021	ledger_get_balance(task->ledger,
6022	task_ledgers.neural_footprint,
6023	&vm_info->ledger_tag_neural_footprint);
6024	ledger_get_balance(task->ledger,
6025	task_ledgers.neural_footprint_compressed,
6026	&vm_info->ledger_tag_neural_footprint_compressed);
6027	ledger_get_balance(task->ledger,
6028	task_ledgers.neural_nofootprint,
6029	&vm_info->ledger_tag_neural_nofootprint);
6030	ledger_get_balance(task->ledger,
6031	task_ledgers.neural_nofootprint_compressed,
6032	&vm_info->ledger_tag_neural_nofootprint_compressed);
6033	*task_info_count = TASK_VM_INFO_REV3_COUNT;
6034	}
6035	if (original_task_info_count >= TASK_VM_INFO_REV4_COUNT) {
6036	if (get_bsdtask_info(task)) {
6037	vm_info->limit_bytes_remaining =
6038	memorystatus_available_memory_internal(get_bsdtask_info(task));
6039	} else {
6040	vm_info->limit_bytes_remaining = `0`;
6041	}
6042	*task_info_count = TASK_VM_INFO_REV4_COUNT;
6043	}
6044	if (original_task_info_count >= TASK_VM_INFO_REV5_COUNT) {
6045	thread_t thread;
6046	uint64_t total = task->decompressions;
6047	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6048	total += thread->decompressions;
6049	}
6050	vm_info->decompressions = (int32_t) MIN(total, INT32_MAX);
6051	*task_info_count = TASK_VM_INFO_REV5_COUNT;
6052	}
6053	if (original_task_info_count >= TASK_VM_INFO_REV6_COUNT) {
6054	ledger_get_balance(task->ledger, task_ledgers.swapins,
6055	&vm_info->ledger_swapins);
6056	*task_info_count = TASK_VM_INFO_REV6_COUNT;
6057	}
6058
6059	break;
6060	}
6061
6062	case TASK_WAIT_STATE_INFO:
6063	{
6064	/*
6065	* Deprecated flavor. Currently allowing some results until all users
6066	* stop calling it. The results may not be accurate.
6067	*/
6068	task_wait_state_info_t wait_state_info;
6069	uint64_t total_sfi_ledger_val = `0`;
6070
6071	if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
6072	error = KERN_INVALID_ARGUMENT;
6073	break;
6074	}
6075
6076	wait_state_info = (task_wait_state_info_t) task_info_out;
6077
6078	wait_state_info->total_wait_state_time = `0`;
6079	bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
6080
6081	#if CONFIG_SCHED_SFI
6082	int i, prev_lentry = -`1`;
6083	int64_t val_credit, val_debit;
6084
6085	for (i = `0`; i < MAX_SFI_CLASS_ID; i++) {
6086	val_credit = `0`;
6087	/*
6088	* checking with prev_lentry != entry ensures adjacent classes
6089	* which share the same ledger do not add wait times twice.
6090	* Note: Use ledger() call to get data for each individual sfi class.
6091	*/
6092	if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
6093	KERN_SUCCESS == ledger_get_entries(task->ledger,
6094	task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
6095	total_sfi_ledger_val += val_credit;
6096	}
6097	prev_lentry = task_ledgers.sfi_wait_times[i];
6098	}
6099
6100	#endif /* CONFIG_SCHED_SFI */
6101	wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
6102	*task_info_count = TASK_WAIT_STATE_INFO_COUNT;
6103
6104	break;
6105	}
6106	case TASK_VM_INFO_PURGEABLE_ACCOUNT:
6107	{
6108	#if DEVELOPMENT \|\| DEBUG
6109	pvm_account_info_t acnt_info;
6110
6111	if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
6112	error = KERN_INVALID_ARGUMENT;
6113	break;
6114	}
6115
6116	if (task_info_out == NULL) {
6117	error = KERN_INVALID_ARGUMENT;
6118	break;
6119	}
6120
6121	acnt_info = (pvm_account_info_t) task_info_out;
6122
6123	error = vm_purgeable_account(task, acnt_info);
6124
6125	*task_info_count = PVM_ACCOUNT_INFO_COUNT;
6126
6127	break;
6128	#else /* DEVELOPMENT \|\| DEBUG */
6129	error = KERN_NOT_SUPPORTED;
6130	break;
6131	#endif /* DEVELOPMENT \|\| DEBUG */
6132	}
6133	case TASK_FLAGS_INFO:
6134	{
6135	task_flags_info_t flags_info;
6136
6137	if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
6138	error = KERN_INVALID_ARGUMENT;
6139	break;
6140	}
6141
6142	flags_info = (task_flags_info_t)task_info_out;
6143
6144	/ only publish the 64-bit flag of the task /
6145	flags_info->flags = task->t_flags & (TF_64B_ADDR \| TF_64B_DATA);
6146
6147	*task_info_count = TASK_FLAGS_INFO_COUNT;
6148	break;
6149	}
6150
6151	case TASK_DEBUG_INFO_INTERNAL:
6152	{
6153	#if DEVELOPMENT \|\| DEBUG
6154	task_debug_info_internal_t dbg_info;
6155	ipc_space_t space = task->itk_space;
6156	if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
6157	error = KERN_NOT_SUPPORTED;
6158	break;
6159	}
6160
6161	if (task_info_out == NULL) {
6162	error = KERN_INVALID_ARGUMENT;
6163	break;
6164	}
6165	dbg_info = (task_debug_info_internal_t) task_info_out;
6166	dbg_info->ipc_space_size = `0`;
6167
6168	if (space) {
6169	smr_ipc_enter();
6170	ipc_entry_table_t table = smr_entered_load(&space->is_table);
6171	if (table) {
6172	dbg_info->ipc_space_size =
6173	ipc_entry_table_count(table);
6174	}
6175	smr_ipc_leave();
6176	}
6177
6178	dbg_info->suspend_count = task->suspend_count;
6179
6180	error = KERN_SUCCESS;
6181	*task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
6182	break;
6183	#else /* DEVELOPMENT \|\| DEBUG */
6184	error = KERN_NOT_SUPPORTED;
6185	break;
6186	#endif /* DEVELOPMENT \|\| DEBUG */
6187	}
6188	case TASK_SUSPEND_STATS_INFO:
6189	{
6190	#if CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT \|\| DEBUG)
6191	if (*task_info_count < TASK_SUSPEND_STATS_INFO_COUNT \|\| task_info_out == NULL) {
6192	error = KERN_INVALID_ARGUMENT;
6193	break;
6194	}
6195	error = _task_get_suspend_stats_locked(task, (task_suspend_stats_t)task_info_out);
6196	*task_info_count = TASK_SUSPEND_STATS_INFO_COUNT;
6197	break;
6198	#else /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT \|\| DEBUG) */
6199	error = KERN_NOT_SUPPORTED;
6200	break;
6201	#endif /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT \|\| DEBUG) */
6202	}
6203	case TASK_SUSPEND_SOURCES_INFO:
6204	{
6205	#if CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT \|\| DEBUG)
6206	if (*task_info_count < TASK_SUSPEND_SOURCES_INFO_COUNT \|\| task_info_out == NULL) {
6207	error = KERN_INVALID_ARGUMENT;
6208	break;
6209	}
6210	error = _task_get_suspend_sources_locked(task, (task_suspend_source_t)task_info_out);
6211	*task_info_count = TASK_SUSPEND_SOURCES_INFO_COUNT;
6212	break;
6213	#else /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT \|\| DEBUG) */
6214	error = KERN_NOT_SUPPORTED;
6215	break;
6216	#endif /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT \|\| DEBUG) */
6217	}
6218	default:
6219	error = KERN_INVALID_ARGUMENT;
6220	}
6221
6222	task_unlock(task);
6223	return error;
6224	}
6225
6226	/*
6227	* task_info_from_user
6228	*
6229	* When calling task_info from user space,
6230	* this function will be executed as mig server side
6231	* instead of calling directly into task_info.
6232	* This gives the possibility to perform more security
6233	* checks on task_port.
6234	*
6235	* In the case of TASK_DYLD_INFO, we require the more
6236	* privileged task_read_port not the less-privileged task_name_port.
6237	*
6238	*/
6239	kern_return_t
6240	task_info_from_user(
6241	mach_port_t task_port,
6242	task_flavor_t flavor,
6243	task_info_t task_info_out,
6244	mach_msg_type_number_t *task_info_count)
6245	{
6246	task_t task;
6247	kern_return_t ret;
6248
6249	if (flavor == TASK_DYLD_INFO) {
6250	task = convert_port_to_task_read(port: task_port);
6251	} else {
6252	task = convert_port_to_task_name(port: task_port);
6253	}
6254
6255	ret = task_info(task, flavor, task_info_out, task_info_count);
6256
6257	task_deallocate(task);
6258
6259	return ret;
6260	}
6261
6262	/*
6263	* Routine: task_dyld_process_info_update_helper
6264	*
6265	* Release send rights in release_ports.
6266	*
6267	* If no active ports found in task's dyld notifier array, unset the magic value
6268	* in user space to indicate so.
6269	*
6270	* Condition:
6271	* task's itk_lock is locked, and is unlocked upon return.
6272	* Global g_dyldinfo_mtx is locked, and is unlocked upon return.
6273	*/
6274	void
6275	task_dyld_process_info_update_helper(
6276	task_t task,
6277	size_t active_count,
6278	vm_map_address_t magic_addr, / a userspace address /
6279	ipc_port_t *release_ports,
6280	size_t release_count)
6281	{
6282	void *notifiers_ptr = NULL;
6283
6284	assert(release_count <= DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT);
6285
6286	if (active_count == `0`) {
6287	assert(task->itk_dyld_notify != NULL);
6288	notifiers_ptr = task->itk_dyld_notify;
6289	task->itk_dyld_notify = NULL;
6290	itk_unlock(task);
6291
6292	kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
6293	(void)copyoutmap_atomic32(map: task->map, MACH_PORT_NULL, toaddr: magic_addr); / unset magic /
6294	} else {
6295	itk_unlock(task);
6296	(void)copyoutmap_atomic32(map: task->map, value: (mach_port_name_t)DYLD_PROCESS_INFO_NOTIFY_MAGIC,
6297	toaddr: magic_addr); / reset magic /
6298	}
6299
6300	lck_mtx_unlock(lck: &g_dyldinfo_mtx);
6301
6302	for (size_t i = `0`; i < release_count; i++) {
6303	ipc_port_release_send(port: release_ports[i]);
6304	}
6305	}
6306
6307	/*
6308	* Routine: task_dyld_process_info_notify_register
6309	*
6310	* Insert a send right to target task's itk_dyld_notify array. Allocate kernel
6311	* memory for the array if it's the first port to be registered. Also cleanup
6312	* any dead rights found in the array.
6313	*
6314	* Consumes sright if returns KERN_SUCCESS, otherwise MIG will destroy it.
6315	*
6316	* Args:
6317	* task: Target task for the registration.
6318	* sright: A send right.
6319	*
6320	* Returns:
6321	* KERN_SUCCESS: Registration succeeded.
6322	* KERN_INVALID_TASK: task is invalid.
6323	* KERN_INVALID_RIGHT: sright is invalid.
6324	* KERN_DENIED: Security policy denied this call.
6325	* KERN_RESOURCE_SHORTAGE: Kernel memory allocation failed.
6326	* KERN_NO_SPACE: No available notifier port slot left for this task.
6327	* KERN_RIGHT_EXISTS: The notifier port is already registered and active.
6328	*
6329	* Other error code see task_info().
6330	*
6331	* See Also:
6332	* task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
6333	*/
6334	kern_return_t
6335	task_dyld_process_info_notify_register(
6336	task_t task,
6337	ipc_port_t sright)
6338	{
6339	struct task_dyld_info dyld_info;
6340	mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
6341	ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
6342	uint32_t release_count = `0`, active_count = `0`;
6343	mach_vm_address_t ports_addr; / a user space address /
6344	kern_return_t kr;
6345	boolean_t right_exists = false;
6346	ipc_port_t *notifiers_ptr = NULL;
6347	ipc_port_t *portp;
6348
6349	if (task == TASK_NULL \|\| task == kernel_task) {
6350	return KERN_INVALID_TASK;
6351	}
6352
6353	if (!IP_VALID(sright)) {
6354	return KERN_INVALID_RIGHT;
6355	}
6356
6357	#if CONFIG_MACF
6358	if (mac_task_check_dyld_process_info_notify_register()) {
6359	return KERN_DENIED;
6360	}
6361	#endif
6362
6363	kr = task_info(task, TASK_DYLD_INFO, task_info_out: (task_info_t)&dyld_info, task_info_count: &info_count);
6364	if (kr) {
6365	return kr;
6366	}
6367
6368	if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
6369	ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6370	offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
6371	} else {
6372	ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6373	offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
6374	}
6375
6376	if (task->itk_dyld_notify == NULL) {
6377	notifiers_ptr = kalloc_type(ipc_port_t,
6378	DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT,
6379	Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
6380	}
6381
6382	lck_mtx_lock(lck: &g_dyldinfo_mtx);
6383	itk_lock(task);
6384
6385	if (task->itk_dyld_notify == NULL) {
6386	task->itk_dyld_notify = notifiers_ptr;
6387	notifiers_ptr = NULL;
6388	}
6389
6390	assert(task->itk_dyld_notify != NULL);
6391	/ First pass: clear dead names and check for duplicate registration /
6392	for (int slot = `0`; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6393	portp = &task->itk_dyld_notify[slot];
6394	if (portp != IPC_PORT_NULL && !ip_active(portp)) {
6395	release_ports[release_count++] = *portp;
6396	*portp = IPC_PORT_NULL;
6397	} else if (*portp == sright) {
6398	/ the port is already registered and is active /
6399	right_exists = true;
6400	}
6401
6402	if (*portp != IPC_PORT_NULL) {
6403	active_count++;
6404	}
6405	}
6406
6407	if (right_exists) {
6408	/ skip second pass /
6409	kr = KERN_RIGHT_EXISTS;
6410	goto out;
6411	}
6412
6413	/ Second pass: register the port /
6414	kr = KERN_NO_SPACE;
6415	for (int slot = `0`; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6416	portp = &task->itk_dyld_notify[slot];
6417	if (*portp == IPC_PORT_NULL) {
6418	*portp = sright;
6419	active_count++;
6420	kr = KERN_SUCCESS;
6421	break;
6422	}
6423	}
6424
6425	out:
6426	assert(active_count > `0`);
6427
6428	task_dyld_process_info_update_helper(task, active_count,
6429	magic_addr: (vm_map_address_t)ports_addr, release_ports, release_count);
6430	/ itk_lock, g_dyldinfo_mtx are unlocked upon return /
6431
6432	kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
6433
6434	return kr;
6435	}
6436
6437	/*
6438	* Routine: task_dyld_process_info_notify_deregister
6439	*
6440	* Remove a send right in target task's itk_dyld_notify array matching the receive
6441	* right name passed in. Deallocate kernel memory for the array if it's the last port to
6442	* be deregistered, or all ports have died. Also cleanup any dead rights found in the array.
6443	*
6444	* Does not consume any reference.
6445	*
6446	* Args:
6447	* task: Target task for the deregistration.
6448	* rcv_name: The name denoting the receive right in caller's space.
6449	*
6450	* Returns:
6451	* KERN_SUCCESS: A matching entry found and degistration succeeded.
6452	* KERN_INVALID_TASK: task is invalid.
6453	* KERN_INVALID_NAME: name is invalid.
6454	* KERN_DENIED: Security policy denied this call.
6455	* KERN_FAILURE: A matching entry is not found.
6456	* KERN_INVALID_RIGHT: The name passed in does not represent a valid rcv right.
6457	*
6458	* Other error code see task_info().
6459	*
6460	* See Also:
6461	* task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
6462	*/
6463	kern_return_t
6464	task_dyld_process_info_notify_deregister(
6465	task_t task,
6466	mach_port_name_t rcv_name)
6467	{
6468	struct task_dyld_info dyld_info;
6469	mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
6470	ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
6471	uint32_t release_count = `0`, active_count = `0`;
6472	boolean_t port_found = false;
6473	mach_vm_address_t ports_addr; / a user space address /
6474	ipc_port_t sright;
6475	kern_return_t kr;
6476	ipc_port_t *portp;
6477
6478	if (task == TASK_NULL \|\| task == kernel_task) {
6479	return KERN_INVALID_TASK;
6480	}
6481
6482	if (!MACH_PORT_VALID(rcv_name)) {
6483	return KERN_INVALID_NAME;
6484	}
6485
6486	#if CONFIG_MACF
6487	if (mac_task_check_dyld_process_info_notify_register()) {
6488	return KERN_DENIED;
6489	}
6490	#endif
6491
6492	kr = task_info(task, TASK_DYLD_INFO, task_info_out: (task_info_t)&dyld_info, task_info_count: &info_count);
6493	if (kr) {
6494	return kr;
6495	}
6496
6497	if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
6498	ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6499	offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
6500	} else {
6501	ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6502	offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
6503	}
6504
6505	kr = ipc_port_translate_receive(current_space(), name: rcv_name, portp: &sright); / does not produce port ref /
6506	if (kr) {
6507	return KERN_INVALID_RIGHT;
6508	}
6509
6510	ip_reference(sright);
6511	ip_mq_unlock(sright);
6512
6513	assert(sright != IPC_PORT_NULL);
6514
6515	lck_mtx_lock(lck: &g_dyldinfo_mtx);
6516	itk_lock(task);
6517
6518	if (task->itk_dyld_notify == NULL) {
6519	itk_unlock(task);
6520	lck_mtx_unlock(lck: &g_dyldinfo_mtx);
6521	ip_release(sright);
6522	return KERN_FAILURE;
6523	}
6524
6525	for (int slot = `0`; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6526	portp = &task->itk_dyld_notify[slot];
6527	if (*portp == sright) {
6528	release_ports[release_count++] = *portp;
6529	*portp = IPC_PORT_NULL;
6530	port_found = true;
6531	} else if ((portp != IPC_PORT_NULL && !ip_active(portp))) {
6532	release_ports[release_count++] = *portp;
6533	*portp = IPC_PORT_NULL;
6534	}
6535
6536	if (*portp != IPC_PORT_NULL) {
6537	active_count++;
6538	}
6539	}
6540
6541	task_dyld_process_info_update_helper(task, active_count,
6542	magic_addr: (vm_map_address_t)ports_addr, release_ports, release_count);
6543	/ itk_lock, g_dyldinfo_mtx are unlocked upon return /
6544
6545	ip_release(sright);
6546
6547	return port_found ? KERN_SUCCESS : KERN_FAILURE;
6548	}
6549
6550	/*
6551	* task_power_info
6552	*
6553	* Returns power stats for the task.
6554	* Note: Called with task locked.
6555	*/
6556	void
6557	task_power_info_locked(
6558	task_t task,
6559	task_power_info_t info,
6560	gpu_energy_data_t ginfo,
6561	task_power_info_v2_t infov2,
6562	struct task_power_info_extra *extra_info)
6563	{
6564	thread_t thread;
6565	ledger_amount_t tmp;
6566
6567	uint64_t runnable_time_sum = `0`;
6568
6569	task_lock_assert_owned(task);
6570
6571	ledger_get_entries(ledger: task->ledger, entry: task_ledgers.interrupt_wakeups,
6572	credit: (ledger_amount_t *)&info->task_interrupt_wakeups, debit: &tmp);
6573	ledger_get_entries(ledger: task->ledger, entry: task_ledgers.platform_idle_wakeups,
6574	credit: (ledger_amount_t *)&info->task_platform_idle_wakeups, debit: &tmp);
6575
6576	info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
6577	info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
6578
6579	struct recount_usage usage = { `0` };
6580	struct recount_usage usage_perf = { `0` };
6581	recount_task_usage_perf_only(task, sum: &usage, sum_perf_only: &usage_perf);
6582
6583	info->total_user = usage.ru_metrics[RCT_LVL_USER].rm_time_mach;
6584	info->total_system = recount_usage_system_time_mach(usage: &usage);
6585	runnable_time_sum = task->total_runnable_time;
6586
6587	if (ginfo) {
6588	ginfo->task_gpu_utilisation = task->task_gpu_ns;
6589	}
6590
6591	if (infov2) {
6592	infov2->task_ptime = recount_usage_time_mach(usage: &usage_perf);
6593	infov2->task_pset_switches = task->ps_switch;
6594	#if CONFIG_PERVASIVE_ENERGY
6595	infov2->task_energy = usage.ru_energy_nj;
6596	#endif /* CONFIG_PERVASIVE_ENERGY */
6597	}
6598
6599	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6600	spl_t x;
6601
6602	if (thread->options & TH_OPT_IDLE_THREAD) {
6603	continue;
6604	}
6605
6606	x = splsched();
6607	thread_lock(thread);
6608
6609	info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
6610	info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
6611
6612	if (infov2) {
6613	infov2->task_pset_switches += thread->ps_switch;
6614	}
6615
6616	runnable_time_sum += timer_grab(timer: &thread->runnable_timer);
6617
6618	if (ginfo) {
6619	ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
6620	}
6621	thread_unlock(thread);
6622	splx(x);
6623	}
6624
6625	if (extra_info) {
6626	extra_info->runnable_time = runnable_time_sum;
6627	#if CONFIG_PERVASIVE_CPI
6628	extra_info->cycles = recount_usage_cycles(&usage);
6629	extra_info->instructions = recount_usage_instructions(&usage);
6630	extra_info->pcycles = recount_usage_cycles(&usage_perf);
6631	extra_info->pinstructions = recount_usage_instructions(&usage_perf);
6632	extra_info->user_ptime = usage_perf.ru_metrics[RCT_LVL_USER].rm_time_mach;
6633	extra_info->system_ptime = recount_usage_system_time_mach(&usage_perf);
6634	#endif // CONFIG_PERVASIVE_CPI
6635	#if CONFIG_PERVASIVE_ENERGY
6636	extra_info->energy = usage.ru_energy_nj;
6637	extra_info->penergy = usage_perf.ru_energy_nj;
6638	#endif // CONFIG_PERVASIVE_ENERGY
6639	#if RECOUNT_SECURE_METRICS
6640	if (PE_i_can_has_debugger(NULL)) {
6641	extra_info->secure_time = usage.ru_metrics[RCT_LVL_SECURE].rm_time_mach;
6642	extra_info->secure_ptime = usage_perf.ru_metrics[RCT_LVL_SECURE].rm_time_mach;
6643	}
6644	#endif // RECOUNT_SECURE_METRICS
6645	}
6646	}
6647
6648	/*
6649	* task_gpu_utilisation
6650	*
6651	* Returns the total gpu time used by the all the threads of the task
6652	* (both dead and alive)
6653	*/
6654	uint64_t
6655	task_gpu_utilisation(
6656	task_t task)
6657	{
6658	uint64_t gpu_time = `0`;
6659	#if defined(__x86_64__)
6660	thread_t thread;
6661
6662	task_lock(task);
6663	gpu_time += task->task_gpu_ns;
6664
6665	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6666	spl_t x;
6667	x = splsched();
6668	thread_lock(thread);
6669	gpu_time += ml_gpu_stat(thread);
6670	thread_unlock(thread);
6671	splx(x);
6672	}
6673
6674	task_unlock(task);
6675	#else /* defined(__x86_64__) */
6676	/ silence compiler warning /
6677	(void)task;
6678	#endif /* defined(__x86_64__) */
6679	return gpu_time;
6680	}
6681
6682	/ This function updates the cpu time in the arrays for each*
6683	* effective and requested QoS class
6684	*/
6685	void
6686	task_update_cpu_time_qos_stats(
6687	task_t task,
6688	uint64_t *eqos_stats,
6689	uint64_t *rqos_stats)
6690	{
6691	if (!eqos_stats && !rqos_stats) {
6692	return;
6693	}
6694
6695	task_lock(task);
6696	thread_t thread;
6697	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6698	if (thread->options & TH_OPT_IDLE_THREAD) {
6699	continue;
6700	}
6701
6702	thread_update_qos_cpu_time(thread);
6703	}
6704
6705	if (eqos_stats) {
6706	eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
6707	eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
6708	eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
6709	eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
6710	eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
6711	eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
6712	eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
6713	}
6714
6715	if (rqos_stats) {
6716	rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
6717	rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
6718	rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
6719	rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
6720	rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
6721	rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
6722	rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
6723	}
6724
6725	task_unlock(task);
6726	}
6727
6728	kern_return_t
6729	task_purgable_info(
6730	task_t task,
6731	task_purgable_info_t *stats)
6732	{
6733	if (task == TASK_NULL \|\| stats == NULL) {
6734	return KERN_INVALID_ARGUMENT;
6735	}
6736	/ Take task reference /
6737	task_reference(task);
6738	vm_purgeable_stats(info: (vm_purgeable_info_t)stats, target_task: task);
6739	/ Drop task reference /
6740	task_deallocate(task);
6741	return KERN_SUCCESS;
6742	}
6743
6744	void
6745	task_vtimer_set(
6746	task_t task,
6747	integer_t which)
6748	{
6749	thread_t thread;
6750	spl_t x;
6751
6752	task_lock(task);
6753
6754	task->vtimers \|= which;
6755
6756	switch (which) {
6757	case TASK_VTIMER_USER:
6758	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6759	x = splsched();
6760	thread_lock(thread);
6761	struct recount_times_mach times = recount_thread_times(thread);
6762	thread->vtimer_user_save = times.rtm_user;
6763	thread_unlock(thread);
6764	splx(x);
6765	}
6766	break;
6767
6768	case TASK_VTIMER_PROF:
6769	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6770	x = splsched();
6771	thread_lock(thread);
6772	thread->vtimer_prof_save = recount_thread_time_mach(thread);
6773	thread_unlock(thread);
6774	splx(x);
6775	}
6776	break;
6777
6778	case TASK_VTIMER_RLIM:
6779	queue_iterate(&task->threads, thread, thread_t, task_threads) {
6780	x = splsched();
6781	thread_lock(thread);
6782	thread->vtimer_rlim_save = recount_thread_time_mach(thread);
6783	thread_unlock(thread);
6784	splx(x);
6785	}
6786	break;
6787	}
6788
6789	task_unlock(task);
6790	}
6791
6792	void
6793	task_vtimer_clear(
6794	task_t task,
6795	integer_t which)
6796	{
6797	task_lock(task);
6798
6799	task->vtimers &= ~which;
6800
6801	task_unlock(task);
6802	}
6803
6804	void
6805	task_vtimer_update(
6806	__unused
6807	task_t task,
6808	integer_t which,
6809	uint32_t *microsecs)
6810	{
6811	thread_t thread = current_thread();
6812	uint32_t tdelt = `0`;
6813	clock_sec_t secs = `0`;
6814	uint64_t tsum;
6815
6816	assert(task == current_task());
6817
6818	spl_t s = splsched();
6819	thread_lock(thread);
6820
6821	if ((task->vtimers & which) != (uint32_t)which) {
6822	thread_unlock(thread);
6823	splx(s);
6824	return;
6825	}
6826
6827	switch (which) {
6828	case TASK_VTIMER_USER:;
6829	struct recount_times_mach times = recount_thread_times(thread);
6830	tsum = times.rtm_user;
6831	tdelt = (uint32_t)(tsum - thread->vtimer_user_save);
6832	thread->vtimer_user_save = tsum;
6833	absolutetime_to_microtime(abstime: tdelt, secs: &secs, microsecs);
6834	break;
6835
6836	case TASK_VTIMER_PROF:
6837	tsum = recount_current_thread_time_mach();
6838	tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
6839	absolutetime_to_microtime(abstime: tdelt, secs: &secs, microsecs);
6840	/ if the time delta is smaller than a usec, ignore /
6841	if (*microsecs != `0`) {
6842	thread->vtimer_prof_save = tsum;
6843	}
6844	break;
6845
6846	case TASK_VTIMER_RLIM:
6847	tsum = recount_current_thread_time_mach();
6848	tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
6849	thread->vtimer_rlim_save = tsum;
6850	absolutetime_to_microtime(abstime: tdelt, secs: &secs, microsecs);
6851	break;
6852	}
6853
6854	thread_unlock(thread);
6855	splx(s);
6856	}
6857
6858	uint64_t
6859	get_task_dispatchqueue_offset(
6860	task_t task)
6861	{
6862	return task->dispatchqueue_offset;
6863	}
6864
6865	void
6866	task_synchronizer_destroy_all(task_t task)
6867	{
6868	/*
6869	* Destroy owned semaphores
6870	*/
6871	semaphore_destroy_all(task);
6872	}
6873
6874	/*
6875	* Install default (machine-dependent) initial thread state
6876	* on the task. Subsequent thread creation will have this initial
6877	* state set on the thread by machine_thread_inherit_taskwide().
6878	* Flavors and structures are exactly the same as those to thread_set_state()
6879	*/
6880	kern_return_t
6881	task_set_state(
6882	task_t task,
6883	int flavor,
6884	thread_state_t state,
6885	mach_msg_type_number_t state_count)
6886	{
6887	kern_return_t ret;
6888
6889	if (task == TASK_NULL) {
6890	return KERN_INVALID_ARGUMENT;
6891	}
6892
6893	task_lock(task);
6894
6895	if (!task->active) {
6896	task_unlock(task);
6897	return KERN_FAILURE;
6898	}
6899
6900	ret = machine_task_set_state(task, flavor, state, state_count);
6901
6902	task_unlock(task);
6903	return ret;
6904	}
6905
6906	/*
6907	* Examine the default (machine-dependent) initial thread state
6908	* on the task, as set by task_set_state(). Flavors and structures
6909	* are exactly the same as those passed to thread_get_state().
6910	*/
6911	kern_return_t
6912	task_get_state(
6913	task_t task,
6914	int flavor,
6915	thread_state_t state,
6916	mach_msg_type_number_t *state_count)
6917	{
6918	kern_return_t ret;
6919
6920	if (task == TASK_NULL) {
6921	return KERN_INVALID_ARGUMENT;
6922	}
6923
6924	task_lock(task);
6925
6926	if (!task->active) {
6927	task_unlock(task);
6928	return KERN_FAILURE;
6929	}
6930
6931	ret = machine_task_get_state(task, flavor, state, state_count);
6932
6933	task_unlock(task);
6934	return ret;
6935	}
6936
6937
6938	static kern_return_t __attribute__((noinline, not_tail_called))
6939	PROC_VIOLATED_GUARD__SEND_EXC_GUARD(
6940	mach_exception_code_t code,
6941	mach_exception_subcode_t subcode,
6942	void *reason,
6943	boolean_t backtrace_only)
6944	{
6945	#ifdef MACH_BSD
6946	if (`1` == proc_selfpid()) {
6947	return KERN_NOT_SUPPORTED; // initproc is immune
6948	}
6949	#endif
6950	mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
6951	[`0`] = code,
6952	[`1`] = subcode,
6953	};
6954	task_t task = current_task();
6955	kern_return_t kr;
6956	void *bsd_info = get_bsdtask_info(task);
6957
6958	/ (See jetsam-related comments below) /
6959
6960	proc_memstat_skip(p: bsd_info, TRUE);
6961	kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, code: codes, codeCnt: `2`, reason, lightweight: backtrace_only);
6962	proc_memstat_skip(p: bsd_info, FALSE);
6963	return kr;
6964	}
6965
6966	kern_return_t
6967	task_violated_guard(
6968	mach_exception_code_t code,
6969	mach_exception_subcode_t subcode,
6970	void *reason,
6971	bool backtrace_only)
6972	{
6973	return PROC_VIOLATED_GUARD__SEND_EXC_GUARD(code, subcode, reason, backtrace_only);
6974	}
6975
6976
6977	#if CONFIG_MEMORYSTATUS
6978
6979	boolean_t
6980	task_get_memlimit_is_active(task_t task)
6981	{
6982	assert(task != NULL);
6983
6984	if (task->memlimit_is_active == `1`) {
6985	return TRUE;
6986	} else {
6987	return FALSE;
6988	}
6989	}
6990
6991	void
6992	task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
6993	{
6994	assert(task != NULL);
6995
6996	if (memlimit_is_active) {
6997	task->memlimit_is_active = `1`;
6998	} else {
6999	task->memlimit_is_active = `0`;
7000	}
7001	}
7002
7003	boolean_t
7004	task_get_memlimit_is_fatal(task_t task)
7005	{
7006	assert(task != NULL);
7007
7008	if (task->memlimit_is_fatal == `1`) {
7009	return TRUE;
7010	} else {
7011	return FALSE;
7012	}
7013	}
7014
7015	void
7016	task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
7017	{
7018	assert(task != NULL);
7019
7020	if (memlimit_is_fatal) {
7021	task->memlimit_is_fatal = `1`;
7022	} else {
7023	task->memlimit_is_fatal = `0`;
7024	}
7025	}
7026
7027	uint64_t
7028	task_get_dirty_start(task_t task)
7029	{
7030	return task->memstat_dirty_start;
7031	}
7032
7033	void
7034	task_set_dirty_start(task_t task, uint64_t start)
7035	{
7036	task_lock(task);
7037	task->memstat_dirty_start = start;
7038	task_unlock(task);
7039	}
7040
7041	boolean_t
7042	task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
7043	{
7044	boolean_t triggered = FALSE;
7045
7046	assert(task == current_task());
7047
7048	/*
7049	* Returns true, if task has already triggered an exc_resource exception.
7050	*/
7051
7052	if (memlimit_is_active) {
7053	triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
7054	} else {
7055	triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
7056	}
7057
7058	return triggered;
7059	}
7060
7061	void
7062	task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
7063	{
7064	assert(task == current_task());
7065
7066	/*
7067	* We allow one exc_resource per process per active/inactive limit.
7068	* The limit's fatal attribute does not come into play.
7069	*/
7070
7071	if (memlimit_is_active) {
7072	task->memlimit_active_exc_resource = `1`;
7073	} else {
7074	task->memlimit_inactive_exc_resource = `1`;
7075	}
7076	}
7077
7078	#define HWM_USERCORE_MINSPACE 250 // free space (in MB) required after core file creation
7079
7080	void __attribute__((noinline))
7081	PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, send_exec_resource_options_t exception_options)
7082	{
7083	task_t task = current_task();
7084	int pid = `0`;
7085	const char *procname = "unknown";
7086	mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
7087	boolean_t send_sync_exc_resource = FALSE;
7088	void *cur_bsd_info = get_bsdtask_info(current_task());
7089
7090	#ifdef MACH_BSD
7091	pid = proc_selfpid();
7092
7093	if (pid == `1`) {
7094	/*
7095	* Cannot have ReportCrash analyzing
7096	* a suspended initproc.
7097	*/
7098	return;
7099	}
7100
7101	if (cur_bsd_info != NULL) {
7102	procname = proc_name_address(p: cur_bsd_info);
7103	send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(p: cur_bsd_info);
7104	}
7105	#endif
7106	#if CONFIG_COREDUMP
7107	if (hwm_user_cores) {
7108	int error;
7109	uint64_t starttime, end;
7110	clock_sec_t secs = `0`;
7111	uint32_t microsecs = `0`;
7112
7113	starttime = mach_absolute_time();
7114	/*
7115	* Trigger a coredump of this process. Don't proceed unless we know we won't
7116	* be filling up the disk; and ignore the core size resource limit for this
7117	* core file.
7118	*/
7119	if ((error = coredump(p: cur_bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != `0`) {
7120	printf(format: "couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
7121	}
7122	/*
7123	* coredump() leaves the task suspended.
7124	*/
7125	task_resume_internal(task: current_task());
7126
7127	end = mach_absolute_time();
7128	absolutetime_to_microtime(abstime: end - starttime, secs: &secs, microsecs: &microsecs);
7129	printf(format: "coredump of %s[%d] taken in %d secs %d microsecs\n",
7130	proc_name_address(p: cur_bsd_info), pid, (int)secs, microsecs);
7131	}
7132	#endif /* CONFIG_COREDUMP */
7133
7134	if (disable_exc_resource) {
7135	printf(format: "process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
7136	"suppressed by a boot-arg.\n", procname, pid, max_footprint_mb);
7137	return;
7138	}
7139	printf(format: "process %s [%d] crossed memory %s (%d MB); EXC_RESOURCE "
7140	"\n", procname, pid, (!(exception_options & EXEC_RESOURCE_DIAGNOSTIC) ? "high watermark" : "diagnostics limit"), max_footprint_mb);
7141
7142	/*
7143	* A task that has triggered an EXC_RESOURCE, should not be
7144	* jetsammed when the device is under memory pressure. Here
7145	* we set the P_MEMSTAT_SKIP flag so that the process
7146	* will be skipped if the memorystatus_thread wakes up.
7147	*
7148	* This is a debugging aid to ensure we can get a corpse before
7149	* the jetsam thread kills the process.
7150	* Note that proc_memstat_skip is a no-op on release kernels.
7151	*/
7152	proc_memstat_skip(p: cur_bsd_info, TRUE);
7153
7154	code[`0`] = code[`1`] = `0`;
7155	EXC_RESOURCE_ENCODE_TYPE(code[`0`], RESOURCE_TYPE_MEMORY);
7156	/*
7157	* Regardless if there was a diag memlimit violation, fatal exceptions shall be notified always
7158	* as high level watermaks. In another words, if there was a diag limit and a watermark, and the
7159	* violation if for limit watermark, a watermark shall be reported.
7160	*/
7161	if (!(exception_options & EXEC_RESOURCE_FATAL)) {
7162	EXC_RESOURCE_ENCODE_FLAVOR(code[`0`], !(exception_options & EXEC_RESOURCE_DIAGNOSTIC) ? FLAVOR_HIGH_WATERMARK : FLAVOR_DIAG_MEMLIMIT);
7163	} else {
7164	EXC_RESOURCE_ENCODE_FLAVOR(code[`0`], FLAVOR_HIGH_WATERMARK );
7165	}
7166	EXC_RESOURCE_HWM_ENCODE_LIMIT(code[`0`], max_footprint_mb);
7167	/*
7168	* Do not generate a corpse fork if the violation is a fatal one
7169	* or the process wants synchronous EXC_RESOURCE exceptions.
7170	*/
7171	if ((exception_options & EXEC_RESOURCE_FATAL) \|\| send_sync_exc_resource \|\| !exc_via_corpse_forking) {
7172	if (exception_options & EXEC_RESOURCE_FATAL) {
7173	vm_map_set_corpse_source(map: task->map);
7174	}
7175
7176	/ Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set /
7177	if (send_sync_exc_resource \|\| !corpse_for_fatal_memkill) {
7178	/*
7179	* Use the _internal_ variant so that no user-space
7180	* process can resume our task from under us.
7181	*/
7182	task_suspend_internal(task);
7183	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
7184	task_resume_internal(task);
7185	}
7186	} else {
7187	if (disable_exc_resource_during_audio && audio_active) {
7188	printf(format: "process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
7189	"suppressed due to audio playback.\n", procname, pid, max_footprint_mb);
7190	} else {
7191	task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
7192	code, EXCEPTION_CODE_MAX, NULL, FALSE);
7193	}
7194	}
7195
7196	/*
7197	* After the EXC_RESOURCE has been handled, we must clear the
7198	* P_MEMSTAT_SKIP flag so that the process can again be
7199	* considered for jetsam if the memorystatus_thread wakes up.
7200	*/
7201	proc_memstat_skip(p: cur_bsd_info, FALSE); / clear the flag /
7202	}
7203	/*
7204	* Callback invoked when a task exceeds its physical footprint limit.
7205	*/
7206	void
7207	task_footprint_exceeded(int warning, __unused const void param0, __unused const* void *param1)
7208	{
7209	ledger_amount_t max_footprint = `0`;
7210	ledger_amount_t max_footprint_mb = `0`;
7211	#if DEBUG \|\| DEVELOPMENT
7212	ledger_amount_t diag_threshold_limit_mb = `0`;
7213	ledger_amount_t diag_threshold_limit = `0`;
7214	#endif
7215	#if CONFIG_DEFERRED_RECLAIM
7216	ledger_amount_t current_footprint;
7217	#endif /* CONFIG_DEFERRED_RECLAIM */
7218	task_t task;
7219	send_exec_resource_is_warning is_warning = IS_NOT_WARNING;
7220	boolean_t memlimit_is_active;
7221	send_exec_resource_is_fatal memlimit_is_fatal;
7222	send_exec_resource_is_diagnostics is_diag_mem_threshold = IS_NOT_DIAGNOSTICS;
7223	if (warning == LEDGER_WARNING_DIAG_MEM_THRESHOLD) {
7224	is_diag_mem_threshold = IS_DIAGNOSTICS;
7225	is_warning = IS_WARNING;
7226	} else if (warning == LEDGER_WARNING_DIPPED_BELOW) {
7227	/*
7228	* Task memory limits only provide a warning on the way up.
7229	*/
7230	return;
7231	} else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
7232	/*
7233	* This task is in danger of violating a memory limit,
7234	* It has exceeded a percentage level of the limit.
7235	*/
7236	is_warning = IS_WARNING;
7237	} else {
7238	/*
7239	* The task has exceeded the physical footprint limit.
7240	* This is not a warning but a true limit violation.
7241	*/
7242	is_warning = IS_NOT_WARNING;
7243	}
7244
7245	task = current_task();
7246
7247	ledger_get_limit(ledger: task->ledger, entry: task_ledgers.phys_footprint, limit: &max_footprint);
7248	#if DEBUG \|\| DEVELOPMENT
7249	ledger_get_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint, &diag_threshold_limit);
7250	#endif
7251	#if CONFIG_DEFERRED_RECLAIM
7252	if (task->deferred_reclamation_metadata != NULL) {
7253	/*
7254	* Task is enrolled in deferred reclamation.
7255	* Do a reclaim to ensure it's really over its limit.
7256	*/
7257	vm_deferred_reclamation_reclaim_from_task_sync(task, UINT64_MAX);
7258	ledger_get_balance(ledger: task->ledger, entry: task_ledgers.phys_footprint, balance: &current_footprint);
7259	if (current_footprint < max_footprint) {
7260	return;
7261	}
7262	}
7263	#endif /* CONFIG_DEFERRED_RECLAIM */
7264	max_footprint_mb = max_footprint >> `20`;
7265	#if DEBUG \|\| DEVELOPMENT
7266	diag_threshold_limit_mb = diag_threshold_limit >> `20`;
7267	#endif
7268	memlimit_is_active = task_get_memlimit_is_active(task);
7269	memlimit_is_fatal = task_get_memlimit_is_fatal(task) == FALSE ? IS_NOT_FATAL : IS_FATAL;
7270	#if DEBUG \|\| DEVELOPMENT
7271	if (is_diag_mem_threshold == IS_NOT_DIAGNOSTICS) {
7272	task_process_crossed_limit_no_diag(task, max_footprint_mb, memlimit_is_fatal, memlimit_is_active, is_warning);
7273	} else {
7274	task_process_crossed_limit_diag(diag_threshold_limit_mb);
7275	}
7276	#else
7277	task_process_crossed_limit_no_diag(task, ledger_limit_size: max_footprint_mb, memlimit_is_fatal, memlimit_is_active, is_warning);
7278	#endif
7279	}
7280
7281	/*
7282	* Actions to perfrom when a process has crossed watermark or is a fatal consumption */
7283	static inline void
7284	task_process_crossed_limit_no_diag(task_t task, ledger_amount_t ledger_limit_size, bool memlimit_is_fatal, bool memlimit_is_active, send_exec_resource_is_warning is_warning)
7285	{
7286	send_exec_resource_options_t exception_options = `0`;
7287	if (memlimit_is_fatal) {
7288	exception_options \|= EXEC_RESOURCE_FATAL;
7289	}
7290	/*
7291	* If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
7292	* We only generate the exception once per process per memlimit (active/inactive limit).
7293	* To enforce this, we monitor state based on the memlimit's active/inactive attribute
7294	* and we disable it by marking that memlimit as exception triggered.
7295	*/
7296	if (is_warning == IS_NOT_WARNING && !task_has_triggered_exc_resource(task, memlimit_is_active)) {
7297	PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(max_footprint_mb: (int)ledger_limit_size, exception_options);
7298	// If it was not a diag threshold (if was a memory limit), then we do not want more signalling,
7299	// however, if was a diag limit, the user may reload a different limit and signal again the violation
7300	memorystatus_log_exception(max_footprint_mb: (int)ledger_limit_size, memlimit_is_active, memlimit_is_fatal);
7301	task_mark_has_triggered_exc_resource(task, memlimit_is_active);
7302	}
7303	memorystatus_on_ledger_footprint_exceeded(warning: is_warning == IS_NOT_WARNING ? FALSE : TRUE, memlimit_is_active, memlimit_is_fatal);
7304	}
7305
7306	#if DEBUG \|\| DEVELOPMENT
7307	/**
7308	* Actions to take when a process has crossed the diagnostics limit
7309	*/
7310	static inline void
7311	task_process_crossed_limit_diag(ledger_amount_t ledger_limit_size)
7312	{
7313	/*
7314	* If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
7315	* In the case of the diagnostics thresholds, the exception will be signaled only once, but the
7316	* inhibit / rearm mechanism if performed at ledger level.
7317	*/
7318	send_exec_resource_options_t exception_options = EXEC_RESOURCE_DIAGNOSTIC;
7319	PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)ledger_limit_size, exception_options);
7320	memorystatus_log_diag_threshold_exception((int)ledger_limit_size);
7321	}
7322	#endif
7323
7324	extern int proc_check_footprint_priv(void);
7325
7326	kern_return_t
7327	task_set_phys_footprint_limit(
7328	task_t task,
7329	int new_limit_mb,
7330	int *old_limit_mb)
7331	{
7332	kern_return_t error;
7333
7334	boolean_t memlimit_is_active;
7335	boolean_t memlimit_is_fatal;
7336
7337	if ((error = proc_check_footprint_priv())) {
7338	return KERN_NO_ACCESS;
7339	}
7340
7341	/*
7342	* This call should probably be obsoleted.
7343	* But for now, we default to current state.
7344	*/
7345	memlimit_is_active = task_get_memlimit_is_active(task);
7346	memlimit_is_fatal = task_get_memlimit_is_fatal(task);
7347
7348	return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
7349	}
7350
7351	/*
7352	* Set the limit of diagnostics memory consumption for a concrete task
7353	*/
7354	#if CONFIG_MEMORYSTATUS
7355	#if DEVELOPMENT \|\| DEBUG
7356	kern_return_t
7357	task_set_diag_footprint_limit(
7358	task_t task,
7359	uint64_t new_limit_mb,
7360	uint64_t *old_limit_mb)
7361	{
7362	kern_return_t error;
7363
7364	if ((error = proc_check_footprint_priv())) {
7365	return KERN_NO_ACCESS;
7366	}
7367
7368	return task_set_diag_footprint_limit_internal(task, new_limit_mb, old_limit_mb);
7369	}
7370
7371	#endif // DEVELOPMENT \|\| DEBUG
7372	#endif // CONFIG_MEMORYSTATUS
7373
7374	kern_return_t
7375	task_convert_phys_footprint_limit(
7376	int limit_mb,
7377	int *converted_limit_mb)
7378	{
7379	if (limit_mb == -`1`) {
7380	/*
7381	* No limit
7382	*/
7383	if (max_task_footprint != `0`) {
7384	converted_limit_mb = (int)(max_task_footprint / `1024` / `1024`); /* bytes to MB /
7385	} else {
7386	converted_limit_mb = (int*)(LEDGER_LIMIT_INFINITY >> `20`);
7387	}
7388	} else {
7389	/ nothing to convert /
7390	*converted_limit_mb = limit_mb;
7391	}
7392	return KERN_SUCCESS;
7393	}
7394
7395	kern_return_t
7396	task_set_phys_footprint_limit_internal(
7397	task_t task,
7398	int new_limit_mb,
7399	int *old_limit_mb,
7400	boolean_t memlimit_is_active,
7401	boolean_t memlimit_is_fatal)
7402	{
7403	ledger_amount_t old;
7404	kern_return_t ret;
7405	#if DEVELOPMENT \|\| DEBUG
7406	diagthreshold_check_return diag_threshold_validity;
7407	#endif
7408	ret = ledger_get_limit(ledger: task->ledger, entry: task_ledgers.phys_footprint, limit: &old);
7409
7410	if (ret != KERN_SUCCESS) {
7411	return ret;
7412	}
7413	/**
7414	* Maybe we will need to re-enable the diag threshold, lets get the value
7415	* and the current status
7416	*/
7417	#if DEVELOPMENT \|\| DEBUG
7418	diag_threshold_validity = task_check_memorythreshold_is_valid( task, new_limit_mb, false);
7419	/**
7420	* If the footprint and diagnostics threshold are going to be same, lets disable the threshold
7421	*/
7422	if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7423	ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7424	} else if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7425	ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7426	}
7427	#endif
7428
7429	/*
7430	* Check that limit >> 20 will not give an "unexpected" 32-bit
7431	* result. There are, however, implicit assumptions that -1 mb limit
7432	* equates to LEDGER_LIMIT_INFINITY.
7433	*/
7434	assert(((old & `0xFFF0000000000000LL`) == `0`) \|\| (old == LEDGER_LIMIT_INFINITY));
7435
7436	if (old_limit_mb) {
7437	old_limit_mb = (int*)(old >> `20`);
7438	}
7439
7440	if (new_limit_mb == -`1`) {
7441	/*
7442	* Caller wishes to remove the limit.
7443	*/
7444	ledger_set_limit(ledger: task->ledger, entry: task_ledgers.phys_footprint,
7445	limit: max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
7446	warn_level_percentage: max_task_footprint ? (uint8_t)max_task_footprint_warning_level : `0`);
7447
7448	task_lock(task);
7449	task_set_memlimit_is_active(task, memlimit_is_active);
7450	task_set_memlimit_is_fatal(task, memlimit_is_fatal);
7451	task_unlock(task);
7452	/**
7453	* If the diagnostics were disabled, and now we have a new limit, we have to re-enable it.
7454	*/
7455	#if DEVELOPMENT \|\| DEBUG
7456	if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7457	ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7458	} else if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7459	ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7460	}
7461	#endif
7462	return KERN_SUCCESS;
7463	}
7464
7465	#ifdef CONFIG_NOMONITORS
7466	return KERN_SUCCESS;
7467	#endif /* CONFIG_NOMONITORS */
7468
7469	task_lock(task);
7470
7471	if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
7472	(memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
7473	(((ledger_amount_t)new_limit_mb << `20`) == old)) {
7474	/*
7475	* memlimit state is not changing
7476	*/
7477	task_unlock(task);
7478	return KERN_SUCCESS;
7479	}
7480
7481	task_set_memlimit_is_active(task, memlimit_is_active);
7482	task_set_memlimit_is_fatal(task, memlimit_is_fatal);
7483
7484	ledger_set_limit(ledger: task->ledger, entry: task_ledgers.phys_footprint,
7485	limit: (ledger_amount_t)new_limit_mb << `20`, PHYS_FOOTPRINT_WARNING_LEVEL);
7486
7487	if (task == current_task()) {
7488	ledger_check_new_balance(thread: current_thread(), ledger: task->ledger,
7489	entry: task_ledgers.phys_footprint);
7490	}
7491
7492	task_unlock(task);
7493	#if DEVELOPMENT \|\| DEBUG
7494	if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7495	ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7496	}
7497	#endif
7498
7499	return KERN_SUCCESS;
7500	}
7501
7502	#if RESETTABLE_DIAG_FOOTPRINT_LIMITS
7503	kern_return_t
7504	task_set_diag_footprint_limit_internal(
7505	task_t task,
7506	uint64_t new_limit_bytes,
7507	uint64_t *old_limit_bytes)
7508	{
7509	ledger_amount_t old = `0`;
7510	kern_return_t ret = KERN_SUCCESS;
7511	diagthreshold_check_return diag_threshold_validity;
7512	ret = ledger_get_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint, &old);
7513
7514	if (ret != KERN_SUCCESS) {
7515	return ret;
7516	}
7517	/**
7518	* Maybe we will need to re-enable the diag threshold, lets get the value
7519	* and the current status
7520	*/
7521	diag_threshold_validity = task_check_memorythreshold_is_valid( task, new_limit_bytes >> `20`, true);
7522	/**
7523	* If the footprint and diagnostics threshold are going to be same, lets disable the threshold
7524	*/
7525	if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7526	ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7527	}
7528
7529	/*
7530	* Check that limit >> 20 will not give an "unexpected" 32-bit
7531	* result. There are, however, implicit assumptions that -1 mb limit
7532	* equates to LEDGER_LIMIT_INFINITY.
7533	*/
7534	if (old_limit_bytes) {
7535	*old_limit_bytes = old;
7536	}
7537
7538	if (new_limit_bytes == -`1`) {
7539	/*
7540	* Caller wishes to remove the limit.
7541	*/
7542	ledger_set_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint,
7543	LEDGER_LIMIT_INFINITY);
7544	/*
7545	* If the memory diagnostics flag was disabled, lets enable it again
7546	*/
7547	ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7548	return KERN_SUCCESS;
7549	}
7550
7551	#ifdef CONFIG_NOMONITORS
7552	return KERN_SUCCESS;
7553	#else
7554
7555	task_lock(task);
7556	ledger_set_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint,
7557	(ledger_amount_t)new_limit_bytes );
7558	if (task == current_task()) {
7559	ledger_check_new_balance(current_thread(), task->ledger,
7560	task_ledgers.phys_footprint);
7561	}
7562
7563	task_unlock(task);
7564	if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7565	ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7566	} else if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7567	ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7568	}
7569
7570	return KERN_SUCCESS;
7571	#endif /* CONFIG_NOMONITORS */
7572	}
7573
7574	kern_return_t
7575	task_get_diag_footprint_limit_internal(
7576	task_t task,
7577	uint64_t *new_limit_bytes,
7578	bool *threshold_disabled)
7579	{
7580	ledger_amount_t ledger_limit;
7581	kern_return_t ret = KERN_SUCCESS;
7582	if (new_limit_bytes == NULL \|\| threshold_disabled == NULL) {
7583	return KERN_INVALID_ARGUMENT;
7584	}
7585	ret = ledger_get_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint, &ledger_limit);
7586	if (ledger_limit == LEDGER_LIMIT_INFINITY) {
7587	ledger_limit = -`1`;
7588	}
7589	if (ret == KERN_SUCCESS) {
7590	*new_limit_bytes = ledger_limit;
7591	ret = ledger_is_diag_threshold_enabled(task->ledger, task_ledgers.phys_footprint, threshold_disabled);
7592	}
7593	return ret;
7594	}
7595	#endif /* RESETTABLE_DIAG_FOOTPRINT_LIMITS */
7596
7597
7598	kern_return_t
7599	task_get_phys_footprint_limit(
7600	task_t task,
7601	int *limit_mb)
7602	{
7603	ledger_amount_t limit;
7604	kern_return_t ret;
7605
7606	ret = ledger_get_limit(ledger: task->ledger, entry: task_ledgers.phys_footprint, limit: &limit);
7607	if (ret != KERN_SUCCESS) {
7608	return ret;
7609	}
7610
7611	/*
7612	* Check that limit >> 20 will not give an "unexpected" signed, 32-bit
7613	* result. There are, however, implicit assumptions that -1 mb limit
7614	* equates to LEDGER_LIMIT_INFINITY.
7615	*/
7616	assert(((limit & `0xFFF0000000000000LL`) == `0`) \|\| (limit == LEDGER_LIMIT_INFINITY));
7617	limit_mb = (int*)(limit >> `20`);
7618
7619	return KERN_SUCCESS;
7620	}
7621	#else /* CONFIG_MEMORYSTATUS */
7622	kern_return_t
7623	task_set_phys_footprint_limit(
7624	__unused task_t task,
7625	__unused int new_limit_mb,
7626	__unused int *old_limit_mb)
7627	{
7628	return KERN_FAILURE;
7629	}
7630
7631	kern_return_t
7632	task_get_phys_footprint_limit(
7633	__unused task_t task,
7634	__unused int *limit_mb)
7635	{
7636	return KERN_FAILURE;
7637	}
7638	#endif /* CONFIG_MEMORYSTATUS */
7639
7640	security_token_t *
7641	task_get_sec_token(task_t task)
7642	{
7643	return &task_get_ro(t: task)->task_tokens.sec_token;
7644	}
7645
7646	void
7647	task_set_sec_token(task_t task, security_token_t *token)
7648	{
7649	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7650	task_tokens.sec_token, token);
7651	}
7652
7653	audit_token_t *
7654	task_get_audit_token(task_t task)
7655	{
7656	return &task_get_ro(t: task)->task_tokens.audit_token;
7657	}
7658
7659	void
7660	task_set_audit_token(task_t task, audit_token_t *token)
7661	{
7662	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7663	task_tokens.audit_token, token);
7664	}
7665
7666	void
7667	task_set_tokens(task_t task, security_token_t sec_token, audit_token_t audit_token)
7668	{
7669	struct task_token_ro_data tokens;
7670
7671	tokens = task_get_ro(t: task)->task_tokens;
7672	tokens.sec_token = *sec_token;
7673	tokens.audit_token = *audit_token;
7674
7675	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task), task_tokens,
7676	&tokens);
7677	}
7678
7679	boolean_t
7680	task_is_privileged(task_t task)
7681	{
7682	return task_get_sec_token(task)->val[`0`] == `0`;
7683	}
7684
7685	#ifdef CONFIG_MACF
7686	uint8_t *
7687	task_get_mach_trap_filter_mask(task_t task)
7688	{
7689	return task_get_ro(t: task)->task_filters.mach_trap_filter_mask;
7690	}
7691
7692	void
7693	task_set_mach_trap_filter_mask(task_t task, uint8_t *mask)
7694	{
7695	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7696	task_filters.mach_trap_filter_mask, &mask);
7697	}
7698
7699	uint8_t *
7700	task_get_mach_kobj_filter_mask(task_t task)
7701	{
7702	return task_get_ro(t: task)->task_filters.mach_kobj_filter_mask;
7703	}
7704
7705	mach_vm_address_t
7706	task_get_all_image_info_addr(task_t task)
7707	{
7708	return task->all_image_info_addr;
7709	}
7710
7711	void
7712	task_set_mach_kobj_filter_mask(task_t task, uint8_t *mask)
7713	{
7714	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7715	task_filters.mach_kobj_filter_mask, &mask);
7716	}
7717
7718	#endif /* CONFIG_MACF */
7719
7720	void
7721	task_set_thread_limit(task_t task, uint16_t thread_limit)
7722	{
7723	assert(task != kernel_task);
7724	if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
7725	task_lock(task);
7726	task->task_thread_limit = thread_limit;
7727	task_unlock(task);
7728	}
7729	}
7730
7731	#if CONFIG_PROC_RESOURCE_LIMITS
7732	kern_return_t
7733	task_set_port_space_limits(task_t task, uint32_t soft_limit, uint32_t hard_limit)
7734	{
7735	return ipc_space_set_table_size_limits(task->itk_space, soft_limit, hard_limit);
7736	}
7737	#endif /* CONFIG_PROC_RESOURCE_LIMITS */
7738
7739	#if XNU_TARGET_OS_OSX
7740	boolean_t
7741	task_has_system_version_compat_enabled(task_t task)
7742	{
7743	boolean_t enabled = FALSE;
7744
7745	task_lock(task);
7746	enabled = (task->t_flags & TF_SYS_VERSION_COMPAT);
7747	task_unlock(task);
7748
7749	return enabled;
7750	}
7751
7752	void
7753	task_set_system_version_compat_enabled(task_t task, boolean_t enable_system_version_compat)
7754	{
7755	assert(task == current_task());
7756	assert(task != kernel_task);
7757
7758	task_lock(task);
7759	if (enable_system_version_compat) {
7760	task->t_flags \|= TF_SYS_VERSION_COMPAT;
7761	} else {
7762	task->t_flags &= ~TF_SYS_VERSION_COMPAT;
7763	}
7764	task_unlock(task);
7765	}
7766	#endif /* XNU_TARGET_OS_OSX */
7767
7768	/*
7769	* We need to export some functions to other components that
7770	* are currently implemented in macros within the osfmk
7771	* component. Just export them as functions of the same name.
7772	*/
7773	boolean_t
7774	is_kerneltask(task_t t)
7775	{
7776	if (t == kernel_task) {
7777	return TRUE;
7778	}
7779
7780	return FALSE;
7781	}
7782
7783	boolean_t
7784	is_corpsefork(task_t t)
7785	{
7786	return task_is_a_corpse_fork(t);
7787	}
7788
7789	task_t
7790	current_task_early(void)
7791	{
7792	if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
7793	if (current_thread()->t_tro == NULL) {
7794	return TASK_NULL;
7795	}
7796	}
7797	return get_threadtask(current_thread());
7798	}
7799
7800	task_t
7801	current_task(void)
7802	{
7803	return get_threadtask(current_thread());
7804	}
7805
7806	/ defined in bsd/kern/kern_prot.c /
7807	extern int get_audit_token_pid(audit_token_t *audit_token);
7808
7809	int
7810	task_pid(task_t task)
7811	{
7812	if (task) {
7813	return get_audit_token_pid(audit_token: task_get_audit_token(task));
7814	}
7815	return -`1`;
7816	}
7817
7818	#if __has_feature(ptrauth_calls)
7819	/*
7820	* Get the shared region id and jop signing key for the task.
7821	* The function will allocate a kalloc buffer and return
7822	* it to caller, the caller needs to free it. This is used
7823	* for getting the information via task port.
7824	*/
7825	char *
7826	task_get_vm_shared_region_id_and_jop_pid(task_t task, uint64_t *jop_pid)
7827	{
7828	size_t len;
7829	char *shared_region_id = NULL;
7830
7831	task_lock(task);
7832	if (task->shared_region_id == NULL) {
7833	task_unlock(task);
7834	return NULL;
7835	}
7836	len = strlen(task->shared_region_id) + `1`;
7837
7838	/ don't hold task lock while allocating /
7839	task_unlock(task);
7840	shared_region_id = kalloc_data(len, Z_WAITOK);
7841	task_lock(task);
7842
7843	if (task->shared_region_id == NULL) {
7844	task_unlock(task);
7845	kfree_data(shared_region_id, len);
7846	return NULL;
7847	}
7848	assert(len == strlen(task->shared_region_id) + `1`); / should never change /
7849	strlcpy(shared_region_id, task->shared_region_id, len);
7850	task_unlock(task);
7851
7852	/ find key from its auth pager /
7853	if (jop_pid != NULL) {
7854	*jop_pid = shared_region_find_key(shared_region_id);
7855	}
7856
7857	return shared_region_id;
7858	}
7859
7860	/*
7861	* set the shared region id for a task
7862	*/
7863	void
7864	task_set_shared_region_id(task_t task, char *id)
7865	{
7866	char *old_id;
7867
7868	task_lock(task);
7869	old_id = task->shared_region_id;
7870	task->shared_region_id = id;
7871	task->shared_region_auth_remapped = FALSE;
7872	task_unlock(task);
7873
7874	/ free any pre-existing shared region id /
7875	if (old_id != NULL) {
7876	shared_region_key_dealloc(old_id);
7877	kfree_data(old_id, strlen(old_id) + `1`);
7878	}
7879	}
7880	#endif /* __has_feature(ptrauth_calls) */
7881
7882	/*
7883	* This routine finds a thread in a task by its unique id
7884	* Returns a referenced thread or THREAD_NULL if the thread was not found
7885	*
7886	* TODO: This is super inefficient - it's an O(threads in task) list walk!
7887	* We should make a tid hash, or transition all tid clients to thread ports
7888	*
7889	* Precondition: No locks held (will take task lock)
7890	*/
7891	thread_t
7892	task_findtid(task_t task, uint64_t tid)
7893	{
7894	thread_t self = current_thread();
7895	thread_t found_thread = THREAD_NULL;
7896	thread_t iter_thread = THREAD_NULL;
7897
7898	/ Short-circuit the lookup if we're looking up ourselves /
7899	if (tid == self->thread_id \|\| tid == TID_NULL) {
7900	assert(get_threadtask(self) == task);
7901
7902	thread_reference(thread: self);
7903
7904	return self;
7905	}
7906
7907	task_lock(task);
7908
7909	queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
7910	if (iter_thread->thread_id == tid) {
7911	found_thread = iter_thread;
7912	thread_reference(thread: found_thread);
7913	break;
7914	}
7915	}
7916
7917	task_unlock(task);
7918
7919	return found_thread;
7920	}
7921
7922	int
7923	pid_from_task(task_t task)
7924	{
7925	int pid = -`1`;
7926	void *bsd_info = get_bsdtask_info(task);
7927
7928	if (bsd_info) {
7929	pid = proc_pid(p: bsd_info);
7930	} else {
7931	pid = task_pid(task);
7932	}
7933
7934	return pid;
7935	}
7936
7937	/*
7938	* Control the CPU usage monitor for a task.
7939	*/
7940	kern_return_t
7941	task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
7942	{
7943	int error = KERN_SUCCESS;
7944
7945	if (*flags & CPUMON_MAKE_FATAL) {
7946	task->rusage_cpu_flags \|= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
7947	} else {
7948	error = KERN_INVALID_ARGUMENT;
7949	}
7950
7951	return error;
7952	}
7953
7954	/*
7955	* Control the wakeups monitor for a task.
7956	*/
7957	kern_return_t
7958	task_wakeups_monitor_ctl(task_t task, uint32_t flags, int32_t rate_hz)
7959	{
7960	ledger_t ledger = task->ledger;
7961
7962	task_lock(task);
7963	if (*flags & WAKEMON_GET_PARAMS) {
7964	ledger_amount_t limit;
7965	uint64_t period;
7966
7967	ledger_get_limit(ledger, entry: task_ledgers.interrupt_wakeups, limit: &limit);
7968	ledger_get_period(ledger, entry: task_ledgers.interrupt_wakeups, period: &period);
7969
7970	if (limit != LEDGER_LIMIT_INFINITY) {
7971	/*
7972	* An active limit means the wakeups monitor is enabled.
7973	*/
7974	*rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
7975	*flags = WAKEMON_ENABLE;
7976	if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
7977	*flags \|= WAKEMON_MAKE_FATAL;
7978	}
7979	} else {
7980	*flags = WAKEMON_DISABLE;
7981	*rate_hz = -`1`;
7982	}
7983
7984	/*
7985	* If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
7986	*/
7987	task_unlock(task);
7988	return KERN_SUCCESS;
7989	}
7990
7991	if (*flags & WAKEMON_ENABLE) {
7992	if (*flags & WAKEMON_SET_DEFAULTS) {
7993	*rate_hz = task_wakeups_monitor_rate;
7994	}
7995
7996	#ifndef CONFIG_NOMONITORS
7997	if (*flags & WAKEMON_MAKE_FATAL) {
7998	task->rusage_cpu_flags \|= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
7999	}
8000	#endif /* CONFIG_NOMONITORS */
8001
8002	if (*rate_hz <= `0`) {
8003	task_unlock(task);
8004	return KERN_INVALID_ARGUMENT;
8005	}
8006
8007	#ifndef CONFIG_NOMONITORS
8008	ledger_set_limit(ledger, entry: task_ledgers.interrupt_wakeups, limit: rate_hz task_wakeups_monitor_interval,
8009	warn_level_percentage: (uint8_t)task_wakeups_monitor_ustackshots_trigger_pct);
8010	ledger_set_period(ledger, entry: task_ledgers.interrupt_wakeups, period: task_wakeups_monitor_interval * NSEC_PER_SEC);
8011	ledger_enable_callback(ledger, entry: task_ledgers.interrupt_wakeups);
8012	#endif /* CONFIG_NOMONITORS */
8013	} else if (*flags & WAKEMON_DISABLE) {
8014	/*
8015	* Caller wishes to disable wakeups monitor on the task.
8016	*
8017	* Disable telemetry if it was triggered by the wakeups monitor, and
8018	* remove the limit & callback on the wakeups ledger entry.
8019	*/
8020	#if CONFIG_TELEMETRY
8021	telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, enable_disable: `0`);
8022	#endif
8023	ledger_disable_refill(l: ledger, entry: task_ledgers.interrupt_wakeups);
8024	ledger_disable_callback(ledger, entry: task_ledgers.interrupt_wakeups);
8025	}
8026
8027	task_unlock(task);
8028	return KERN_SUCCESS;
8029	}
8030
8031	void
8032	task_wakeups_rate_exceeded(int warning, __unused const void param0, __unused const* void *param1)
8033	{
8034	if (warning == LEDGER_WARNING_ROSE_ABOVE) {
8035	#if CONFIG_TELEMETRY
8036	/*
8037	* This task is in danger of violating the wakeups monitor. Enable telemetry on this task
8038	* so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
8039	*/
8040	telemetry_task_ctl(task: current_task(), TF_WAKEMON_WARNING, enable_disable: `1`);
8041	#endif
8042	return;
8043	}
8044
8045	#if CONFIG_TELEMETRY
8046	/*
8047	* If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
8048	* exceeded the limit, turn telemetry off for the task.
8049	*/
8050	telemetry_task_ctl(task: current_task(), TF_WAKEMON_WARNING, enable_disable: `0`);
8051	#endif
8052
8053	if (warning == `0`) {
8054	SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
8055	}
8056	}
8057
8058	TUNABLE(bool, enable_wakeup_reports, "enable_wakeup_reports", false); / Enable wakeup reports. /
8059
8060	void __attribute__((noinline))
8061	SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
8062	{
8063	task_t task = current_task();
8064	int pid = `0`;
8065	const char *procname = "unknown";
8066	boolean_t fatal;
8067	kern_return_t kr;
8068	#ifdef EXC_RESOURCE_MONITORS
8069	mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
8070	#endif /* EXC_RESOURCE_MONITORS */
8071	struct ledger_entry_info lei;
8072
8073	#ifdef MACH_BSD
8074	pid = proc_selfpid();
8075	if (get_bsdtask_info(task) != NULL) {
8076	procname = proc_name_address(p: get_bsdtask_info(current_task()));
8077	}
8078	#endif
8079
8080	ledger_get_entry_info(ledger: task->ledger, entry: task_ledgers.interrupt_wakeups, lei: &lei);
8081
8082	/*
8083	* Disable the exception notification so we don't overwhelm
8084	* the listener with an endless stream of redundant exceptions.
8085	* TODO: detect whether another thread is already reporting the violation.
8086	*/
8087	uint32_t flags = WAKEMON_DISABLE;
8088	task_wakeups_monitor_ctl(task, flags: &flags, NULL);
8089
8090	fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
8091	trace_resource_violation(RMON_CPUWAKES_VIOLATED, ledger_info: &lei);
8092	os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
8093	"over ~%llu seconds, averaging %llu wakes / second and "
8094	"violating a %slimit of %llu wakes over %llu seconds.\n",
8095	procname, pid,
8096	lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
8097	lei.lei_last_refill == `0` ? `0` :
8098	(NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
8099	fatal ? "FATAL " : "",
8100	lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
8101
8102	if (enable_wakeup_reports) {
8103	kr = send_resource_violation(send_cpu_wakes_violation, violator: task, ledger_info: &lei,
8104	flags: fatal ? kRNFatalLimitFlag : `0`);
8105	if (kr) {
8106	printf(format: "send_resource_violation(CPU wakes, ...): error %#x\n", kr);
8107	}
8108	}
8109
8110	#ifdef EXC_RESOURCE_MONITORS
8111	if (disable_exc_resource) {
8112	printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
8113	"suppressed by a boot-arg\n", procname, pid);
8114	return;
8115	}
8116	if (disable_exc_resource_during_audio && audio_active) {
8117	os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
8118	"suppressed due to audio playback\n", procname, pid);
8119	return;
8120	}
8121	if (lei.lei_last_refill == `0`) {
8122	os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
8123	"suppressed due to lei.lei_last_refill = 0 \n", procname, pid);
8124	}
8125
8126	code[`0`] = code[`1`] = `0`;
8127	EXC_RESOURCE_ENCODE_TYPE(code[`0`], RESOURCE_TYPE_WAKEUPS);
8128	EXC_RESOURCE_ENCODE_FLAVOR(code[`0`], FLAVOR_WAKEUPS_MONITOR);
8129	EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[`0`],
8130	NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
8131	EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[`0`],
8132	lei.lei_last_refill);
8133	EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[`1`],
8134	NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
8135	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
8136	#endif /* EXC_RESOURCE_MONITORS */
8137
8138	if (fatal) {
8139	task_terminate_internal(task);
8140	}
8141	}
8142
8143	static boolean_t
8144	global_update_logical_writes(int64_t io_delta, int64_t *global_write_count)
8145	{
8146	int64_t old_count, new_count;
8147	boolean_t needs_telemetry;
8148
8149	do {
8150	new_count = old_count = *global_write_count;
8151	new_count += io_delta;
8152	if (new_count >= io_telemetry_limit) {
8153	new_count = `0`;
8154	needs_telemetry = TRUE;
8155	} else {
8156	needs_telemetry = FALSE;
8157	}
8158	} while (!OSCompareAndSwap64(old_count, new_count, global_write_count));
8159	return needs_telemetry;
8160	}
8161
8162	void
8163	task_update_physical_writes(__unused task_t task, __unused task_physical_write_flavor_t flavor, __unused uint64_t io_size, __unused task_balance_flags_t flags)
8164	{
8165	#if CONFIG_PHYS_WRITE_ACCT
8166	if (!io_size) {
8167	return;
8168	}
8169
8170	/*
8171	* task == NULL means that we have to update kernel_task ledgers
8172	*/
8173	if (!task) {
8174	task = kernel_task;
8175	}
8176
8177	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PHYS_WRITE_ACCT)) \| DBG_FUNC_NONE,
8178	task_pid(task), flavor, io_size, flags, `0`);
8179	DTRACE_IO4(physical_writes, struct task *, task, task_physical_write_flavor_t, flavor, uint64_t, io_size, task_balance_flags_t, flags);
8180
8181	if (flags & TASK_BALANCE_CREDIT) {
8182	if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
8183	OSAddAtomic64(io_size, (SInt64 *)&(task->task_fs_metadata_writes));
8184	ledger_credit_nocheck(ledger: task->ledger, entry: task_ledgers.fs_metadata_writes, amount: io_size);
8185	}
8186	} else if (flags & TASK_BALANCE_DEBIT) {
8187	if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
8188	OSAddAtomic64(-`1` * io_size, (SInt64 *)&(task->task_fs_metadata_writes));
8189	ledger_debit_nocheck(ledger: task->ledger, entry: task_ledgers.fs_metadata_writes, amount: io_size);
8190	}
8191	}
8192	#endif /* CONFIG_PHYS_WRITE_ACCT */
8193	}
8194
8195	void
8196	task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
8197	{
8198	int64_t io_delta = `0`;
8199	int64_t * global_counter_to_update;
8200	boolean_t needs_telemetry = FALSE;
8201	boolean_t is_external_device = FALSE;
8202	int ledger_to_update = `0`;
8203	struct task_writes_counters * writes_counters_to_update;
8204
8205	if ((!task) \|\| (!io_size) \|\| (!vp)) {
8206	return;
8207	}
8208
8209	KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) \| DBG_FUNC_NONE,
8210	task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), `0`);
8211	DTRACE_IO4(logical_writes, struct task , task, uint32_t, io_size, int, flags, vnode , vp);
8212
8213	// Is the drive backing this vnode internal or external to the system?
8214	if (vnode_isonexternalstorage(vp) == false) {
8215	global_counter_to_update = &global_logical_writes_count;
8216	ledger_to_update = task_ledgers.logical_writes;
8217	writes_counters_to_update = &task->task_writes_counters_internal;
8218	is_external_device = FALSE;
8219	} else {
8220	global_counter_to_update = &global_logical_writes_to_external_count;
8221	ledger_to_update = task_ledgers.logical_writes_to_external;
8222	writes_counters_to_update = &task->task_writes_counters_external;
8223	is_external_device = TRUE;
8224	}
8225
8226	switch (flags) {
8227	case TASK_WRITE_IMMEDIATE:
8228	OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
8229	ledger_credit(ledger: task->ledger, entry: ledger_to_update, amount: io_size);
8230	if (!is_external_device) {
8231	coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
8232	}
8233	break;
8234	case TASK_WRITE_DEFERRED:
8235	OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
8236	ledger_credit(ledger: task->ledger, entry: ledger_to_update, amount: io_size);
8237	if (!is_external_device) {
8238	coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
8239	}
8240	break;
8241	case TASK_WRITE_INVALIDATED:
8242	OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
8243	ledger_debit(ledger: task->ledger, entry: ledger_to_update, amount: io_size);
8244	if (!is_external_device) {
8245	coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
8246	}
8247	break;
8248	case TASK_WRITE_METADATA:
8249	OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
8250	ledger_credit(ledger: task->ledger, entry: ledger_to_update, amount: io_size);
8251	if (!is_external_device) {
8252	coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
8253	}
8254	break;
8255	}
8256
8257	io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -`1ll`) : ((int64_t)io_size);
8258	if (io_telemetry_limit != `0`) {
8259	/ If io_telemetry_limit is 0, disable global updates and I/O telemetry /
8260	needs_telemetry = global_update_logical_writes(io_delta, global_write_count: global_counter_to_update);
8261	if (needs_telemetry && !is_external_device) {
8262	act_set_io_telemetry_ast(current_thread());
8263	}
8264	}
8265	}
8266
8267	/*
8268	* Control the I/O monitor for a task.
8269	*/
8270	kern_return_t
8271	task_io_monitor_ctl(task_t task, uint32_t *flags)
8272	{
8273	ledger_t ledger = task->ledger;
8274
8275	task_lock(task);
8276	if (*flags & IOMON_ENABLE) {
8277	/ Configure the physical I/O ledger /
8278	ledger_set_limit(ledger, entry: task_ledgers.physical_writes, limit: (task_iomon_limit_mb * `1024` * `1024`), warn_level_percentage: `0`);
8279	ledger_set_period(ledger, entry: task_ledgers.physical_writes, period: (task_iomon_interval_secs * NSEC_PER_SEC));
8280	} else if (*flags & IOMON_DISABLE) {
8281	/*
8282	* Caller wishes to disable I/O monitor on the task.
8283	*/
8284	ledger_disable_refill(l: ledger, entry: task_ledgers.physical_writes);
8285	ledger_disable_callback(ledger, entry: task_ledgers.physical_writes);
8286	}
8287
8288	task_unlock(task);
8289	return KERN_SUCCESS;
8290	}
8291
8292	void
8293	task_io_rate_exceeded(int warning, const void param0, __unused const* void *param1)
8294	{
8295	if (warning == `0`) {
8296	SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(flavor: (int)param0);
8297	}
8298	}
8299
8300	void __attribute__((noinline))
8301	SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
8302	{
8303	int pid = `0`;
8304	task_t task = current_task();
8305	#ifdef EXC_RESOURCE_MONITORS
8306	mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
8307	#endif /* EXC_RESOURCE_MONITORS */
8308	struct ledger_entry_info lei = {};
8309	kern_return_t kr;
8310
8311	#ifdef MACH_BSD
8312	pid = proc_selfpid();
8313	#endif
8314	/*
8315	* Get the ledger entry info. We need to do this before disabling the exception
8316	* to get correct values for all fields.
8317	*/
8318	switch (flavor) {
8319	case FLAVOR_IO_PHYSICAL_WRITES:
8320	ledger_get_entry_info(ledger: task->ledger, entry: task_ledgers.physical_writes, lei: &lei);
8321	break;
8322	}
8323
8324
8325	/*
8326	* Disable the exception notification so we don't overwhelm
8327	* the listener with an endless stream of redundant exceptions.
8328	* TODO: detect whether another thread is already reporting the violation.
8329	*/
8330	uint32_t flags = IOMON_DISABLE;
8331	task_io_monitor_ctl(task, flags: &flags);
8332
8333	if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
8334	trace_resource_violation(RMON_LOGWRITES_VIOLATED, ledger_info: &lei);
8335	}
8336	os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
8337	pid, flavor, (lei.lei_balance / (`1024` * `1024`)), (lei.lei_limit / (`1024` * `1024`)), (lei.lei_refill_period / NSEC_PER_SEC));
8338
8339	kr = send_resource_violation(send_disk_writes_violation, violator: task, ledger_info: &lei, kRNFlagsNone);
8340	if (kr) {
8341	printf(format: "send_resource_violation(disk_writes, ...): error %#x\n", kr);
8342	}
8343
8344	#ifdef EXC_RESOURCE_MONITORS
8345	code[`0`] = code[`1`] = `0`;
8346	EXC_RESOURCE_ENCODE_TYPE(code[`0`], RESOURCE_TYPE_IO);
8347	EXC_RESOURCE_ENCODE_FLAVOR(code[`0`], flavor);
8348	EXC_RESOURCE_IO_ENCODE_INTERVAL(code[`0`], (lei.lei_refill_period / NSEC_PER_SEC));
8349	EXC_RESOURCE_IO_ENCODE_LIMIT(code[`0`], (lei.lei_limit / (`1024` * `1024`)));
8350	EXC_RESOURCE_IO_ENCODE_OBSERVED(code[`1`], (lei.lei_balance / (`1024` * `1024`)));
8351	exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
8352	#endif /* EXC_RESOURCE_MONITORS */
8353	}
8354
8355	void
8356	task_port_space_ast(__unused task_t task)
8357	{
8358	uint32_t current_size, soft_limit, hard_limit;
8359	assert(task == current_task());
8360	bool should_notify = ipc_space_check_table_size_limit(space: task->itk_space,
8361	current_limit: &current_size, soft_limit: &soft_limit, hard_limit: &hard_limit);
8362	if (should_notify) {
8363	SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task, current_size, soft_limit, hard_limit);
8364	}
8365	}
8366
8367	#if CONFIG_PROC_RESOURCE_LIMITS
8368	static mach_port_t
8369	task_allocate_fatal_port(void)
8370	{
8371	mach_port_t task_fatal_port = MACH_PORT_NULL;
8372	task_id_token_t token;
8373
8374	kern_return_t kr = task_create_identity_token(current_task(), &token); / Takes a reference on the token /
8375	if (kr) {
8376	return MACH_PORT_NULL;
8377	}
8378	task_fatal_port = ipc_kobject_alloc_port((ipc_kobject_t)token, IKOT_TASK_FATAL,
8379	IPC_KOBJECT_ALLOC_NSREQUEST \| IPC_KOBJECT_ALLOC_MAKE_SEND);
8380
8381	task_id_token_set_port(token, task_fatal_port);
8382
8383	return task_fatal_port;
8384	}
8385
8386	static void
8387	task_fatal_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
8388	{
8389	task_t task = TASK_NULL;
8390	kern_return_t kr;
8391
8392	task_id_token_t token = ipc_kobject_get_stable(port, IKOT_TASK_FATAL);
8393
8394	assert(token != NULL);
8395	if (token) {
8396	kr = task_identity_token_get_task_grp(token, &task, TASK_GRP_KERNEL); / takes a reference on task /
8397	if (task) {
8398	task_bsdtask_kill(task);
8399	task_deallocate(task);
8400	}
8401	task_id_token_release(token); / consumes ref given by notification /
8402	}
8403	}
8404	#endif /* CONFIG_PROC_RESOURCE_LIMITS */
8405
8406	void __attribute__((noinline))
8407	SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task_t task, uint32_t current_size, uint32_t soft_limit, uint32_t hard_limit)
8408	{
8409	int pid = `0`;
8410	char procname = (char* *) "unknown";
8411	__unused kern_return_t kr;
8412	__unused resource_notify_flags_t flags = kRNFlagsNone;
8413	__unused uint32_t limit;
8414	__unused mach_port_t task_fatal_port = MACH_PORT_NULL;
8415	mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
8416
8417	pid = proc_selfpid();
8418	if (get_bsdtask_info(task) != NULL) {
8419	procname = proc_name_address(p: get_bsdtask_info(task));
8420	}
8421
8422	/*
8423	* Only kernel_task and launchd may be allowed to
8424	* have really large ipc space.
8425	*/
8426	if (pid == `0` \|\| pid == `1`) {
8427	return;
8428	}
8429
8430	os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many mach ports. \
8431	Num of ports allocated %u; \n", procname, pid, current_size);
8432
8433	/ Abort the process if it has hit the system-wide limit for ipc port table size /
8434	if (!hard_limit && !soft_limit) {
8435	code[`0`] = code[`1`] = `0`;
8436	EXC_RESOURCE_ENCODE_TYPE(code[`0`], RESOURCE_TYPE_PORTS);
8437	EXC_RESOURCE_ENCODE_FLAVOR(code[`0`], FLAVOR_PORT_SPACE_FULL);
8438	EXC_RESOURCE_PORTS_ENCODE_PORTS(code[`0`], current_size);
8439
8440	exit_with_port_space_exception(proc: current_proc(), code: code[`0`], subcode: code[`1`]);
8441
8442	return;
8443	}
8444
8445	#if CONFIG_PROC_RESOURCE_LIMITS
8446	if (hard_limit > `0`) {
8447	flags \|= kRNHardLimitFlag;
8448	limit = hard_limit;
8449	task_fatal_port = task_allocate_fatal_port();
8450	if (!task_fatal_port) {
8451	os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
8452	task_bsdtask_kill(task);
8453	}
8454	} else {
8455	flags \|= kRNSoftLimitFlag;
8456	limit = soft_limit;
8457	}
8458
8459	kr = send_resource_violation_with_fatal_port(send_port_space_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
8460	if (kr) {
8461	os_log(OS_LOG_DEFAULT, "send_resource_violation(ports, ...): error %#x\n", kr);
8462	}
8463	if (task_fatal_port) {
8464	ipc_port_release_send(task_fatal_port);
8465	}
8466	#endif /* CONFIG_PROC_RESOURCE_LIMITS */
8467	}
8468
8469	#if CONFIG_PROC_RESOURCE_LIMITS
8470	void
8471	task_kqworkloop_ast(task_t task, int current_size, int soft_limit, int hard_limit)
8472	{
8473	assert(task == current_task());
8474	return SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_KQWORKLOOPS(task, current_size, soft_limit, hard_limit);
8475	}
8476
8477	void __attribute__((noinline))
8478	SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_KQWORKLOOPS(task_t task, int current_size, int soft_limit, int hard_limit)
8479	{
8480	int pid = `0`;
8481	char procname = (char* *) "unknown";
8482	#ifdef MACH_BSD
8483	pid = proc_selfpid();
8484	if (get_bsdtask_info(task) != NULL) {
8485	procname = proc_name_address(get_bsdtask_info(task));
8486	}
8487	#endif
8488	if (pid == `0` \|\| pid == `1`) {
8489	return;
8490	}
8491
8492	os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many kqworkloops. \
8493	Num of kqworkloops allocated %u; \n", procname, pid, current_size);
8494
8495	int limit = `0`;
8496	resource_notify_flags_t flags = kRNFlagsNone;
8497	mach_port_t task_fatal_port = MACH_PORT_NULL;
8498	if (hard_limit) {
8499	flags \|= kRNHardLimitFlag;
8500	limit = hard_limit;
8501
8502	task_fatal_port = task_allocate_fatal_port();
8503	if (task_fatal_port == MACH_PORT_NULL) {
8504	os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
8505	task_bsdtask_kill(task);
8506	}
8507	} else {
8508	flags \|= kRNSoftLimitFlag;
8509	limit = soft_limit;
8510	}
8511
8512	kern_return_t kr;
8513	kr = send_resource_violation_with_fatal_port(send_kqworkloops_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
8514	if (kr) {
8515	os_log(OS_LOG_DEFAULT, "send_resource_violation_with_fatal_port(kqworkloops, ...): error %#x\n", kr);
8516	}
8517	if (task_fatal_port) {
8518	ipc_port_release_send(task_fatal_port);
8519	}
8520	}
8521
8522
8523	void
8524	task_filedesc_ast(__unused task_t task, __unused int current_size, __unused int soft_limit, __unused int hard_limit)
8525	{
8526	assert(task == current_task());
8527	SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task, current_size, soft_limit, hard_limit);
8528	}
8529
8530	void __attribute__((noinline))
8531	SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit)
8532	{
8533	int pid = `0`;
8534	char procname = (char* *) "unknown";
8535	kern_return_t kr;
8536	resource_notify_flags_t flags = kRNFlagsNone;
8537	int limit;
8538	mach_port_t task_fatal_port = MACH_PORT_NULL;
8539
8540	#ifdef MACH_BSD
8541	pid = proc_selfpid();
8542	if (get_bsdtask_info(task) != NULL) {
8543	procname = proc_name_address(get_bsdtask_info(task));
8544	}
8545	#endif
8546	/*
8547	* Only kernel_task and launchd may be allowed to
8548	* have really large ipc space.
8549	*/
8550	if (pid == `0` \|\| pid == `1`) {
8551	return;
8552	}
8553
8554	os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many file descriptors. \
8555	Num of fds allocated %u; \n", procname, pid, current_size);
8556
8557	if (hard_limit > `0`) {
8558	flags \|= kRNHardLimitFlag;
8559	limit = hard_limit;
8560	task_fatal_port = task_allocate_fatal_port();
8561	if (!task_fatal_port) {
8562	os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
8563	task_bsdtask_kill(task);
8564	}
8565	} else {
8566	flags \|= kRNSoftLimitFlag;
8567	limit = soft_limit;
8568	}
8569
8570	kr = send_resource_violation_with_fatal_port(send_file_descriptors_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
8571	if (kr) {
8572	os_log(OS_LOG_DEFAULT, "send_resource_violation_with_fatal_port(filedesc, ...): error %#x\n", kr);
8573	}
8574	if (task_fatal_port) {
8575	ipc_port_release_send(task_fatal_port);
8576	}
8577	}
8578	#endif /* CONFIG_PROC_RESOURCE_LIMITS */
8579
8580	/ Placeholders for the task set/get voucher interfaces /
8581	kern_return_t
8582	task_get_mach_voucher(
8583	task_t task,
8584	mach_voucher_selector_t __unused which,
8585	ipc_voucher_t *voucher)
8586	{
8587	if (TASK_NULL == task) {
8588	return KERN_INVALID_TASK;
8589	}
8590
8591	*voucher = NULL;
8592	return KERN_SUCCESS;
8593	}
8594
8595	kern_return_t
8596	task_set_mach_voucher(
8597	task_t task,
8598	ipc_voucher_t __unused voucher)
8599	{
8600	if (TASK_NULL == task) {
8601	return KERN_INVALID_TASK;
8602	}
8603
8604	return KERN_SUCCESS;
8605	}
8606
8607	kern_return_t
8608	task_swap_mach_voucher(
8609	__unused task_t task,
8610	__unused ipc_voucher_t new_voucher,
8611	ipc_voucher_t *in_out_old_voucher)
8612	{
8613	/*
8614	* Currently this function is only called from a MIG generated
8615	* routine which doesn't release the reference on the voucher
8616	* addressed by in_out_old_voucher. To avoid leaking this reference,
8617	* a call to release it has been added here.
8618	*/
8619	ipc_voucher_release(voucher: *in_out_old_voucher);
8620	OS_ANALYZER_SUPPRESS("81787115") return KERN_NOT_SUPPORTED;
8621	}
8622
8623	void
8624	task_set_gpu_denied(task_t task, boolean_t denied)
8625	{
8626	task_lock(task);
8627
8628	if (denied) {
8629	task->t_flags \|= TF_GPU_DENIED;
8630	} else {
8631	task->t_flags &= ~TF_GPU_DENIED;
8632	}
8633
8634	task_unlock(task);
8635	}
8636
8637	boolean_t
8638	task_is_gpu_denied(task_t task)
8639	{
8640	/ We don't need the lock to read this flag /
8641	return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
8642	}
8643
8644	/*
8645	* Task policy termination uses this path to clear the bit the final time
8646	* during the termination flow, and the TASK_POLICY_TERMINATED bit guarantees
8647	* that it won't be changed again on a terminated task.
8648	*/
8649	bool
8650	task_set_game_mode_locked(task_t task, bool enabled)
8651	{
8652	task_lock_assert_owned(task);
8653
8654	if (enabled) {
8655	assert(proc_get_effective_task_policy(task, TASK_POLICY_TERMINATED) == `0`);
8656	}
8657
8658	bool previously_enabled = task_get_game_mode(task);
8659	bool needs_update = false;
8660	uint32_t new_count = `0`;
8661
8662	if (enabled) {
8663	task->t_flags \|= TF_GAME_MODE;
8664	} else {
8665	task->t_flags &= ~TF_GAME_MODE;
8666	}
8667
8668	if (enabled && !previously_enabled) {
8669	if (task_coalition_adjust_game_mode_count(task, count: `1`, new_count: &new_count) && (new_count == `1`)) {
8670	needs_update = true;
8671	}
8672	} else if (!enabled && previously_enabled) {
8673	if (task_coalition_adjust_game_mode_count(task, count: -`1`, new_count: &new_count) && (new_count == `0`)) {
8674	needs_update = true;
8675	}
8676	}
8677
8678	return needs_update;
8679	}
8680
8681	void
8682	task_set_game_mode(task_t task, bool enabled)
8683	{
8684	bool needs_update = false;
8685
8686	task_lock(task);
8687
8688	/ After termination, further updates are no longer effective /
8689	if (proc_get_effective_task_policy(task, TASK_POLICY_TERMINATED) == `0`) {
8690	needs_update = task_set_game_mode_locked(task, enabled);
8691	}
8692
8693	task_unlock(task);
8694
8695	#if CONFIG_THREAD_GROUPS
8696	if (needs_update) {
8697	task_coalition_thread_group_game_mode_update(task);
8698	}
8699	#endif /* CONFIG_THREAD_GROUPS */
8700	}
8701
8702	bool
8703	task_get_game_mode(task_t task)
8704	{
8705	/ We don't need the lock to read this flag /
8706	return task->t_flags & TF_GAME_MODE;
8707	}
8708
8709
8710	uint64_t
8711	get_task_memory_region_count(task_t task)
8712	{
8713	vm_map_t map;
8714	map = (task == kernel_task) ? kernel_map: task->map;
8715	return (uint64_t)get_map_nentries(map);
8716	}
8717
8718	static void
8719	kdebug_trace_dyld_internal(uint32_t base_code,
8720	struct dyld_kernel_image_info *info)
8721	{
8722	static_assert(sizeof(info->uuid) >= `16`);
8723
8724	#if defined(__LP64__)
8725	uint64_t uuid = (uint64_t )&(info->uuid);
8726
8727	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8728	KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[`0`],
8729	uuid[`1`], info->load_addr,
8730	(uint64_t)info->fsid.val[`0`] \| ((uint64_t)info->fsid.val[`1`] << `32`),
8731	`0`);
8732	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8733	KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + `1`),
8734	(uint64_t)info->fsobjid.fid_objno \|
8735	((uint64_t)info->fsobjid.fid_generation << `32`),
8736	`0`, `0`, `0`, `0`);
8737	#else /* defined(__LP64__) */
8738	uint32_t uuid = (uint32_t )&(info->uuid);
8739
8740	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8741	KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + `2`), uuid[`0`],
8742	uuid[`1`], uuid[`2`], uuid[`3`], `0`);
8743	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8744	KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + `3`),
8745	(uint32_t)info->load_addr, info->fsid.val[`0`], info->fsid.val[`1`],
8746	info->fsobjid.fid_objno, `0`);
8747	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8748	KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + `4`),
8749	info->fsobjid.fid_generation, `0`, `0`, `0`, `0`);
8750	#endif /* !defined(__LP64__) */
8751	}
8752
8753	static kern_return_t
8754	kdebug_trace_dyld(task_t task, uint32_t base_code,
8755	vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
8756	{
8757	kern_return_t kr;
8758	dyld_kernel_image_info_array_t infos;
8759	vm_map_offset_t map_data;
8760	vm_offset_t data;
8761
8762	if (!infos_copy) {
8763	return KERN_INVALID_ADDRESS;
8764	}
8765
8766	if (!kdebug_enable \|\|
8767	!kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, `0`))) {
8768	vm_map_copy_discard(copy: infos_copy);
8769	return KERN_SUCCESS;
8770	}
8771
8772	if (task == NULL \|\| task != current_task()) {
8773	return KERN_INVALID_TASK;
8774	}
8775
8776	kr = vm_map_copyout(dst_map: ipc_kernel_map, dst_addr: &map_data, copy: (vm_map_copy_t)infos_copy);
8777	if (kr != KERN_SUCCESS) {
8778	return kr;
8779	}
8780
8781	infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
8782
8783	for (mach_msg_type_number_t i = `0`; i < infos_len; i++) {
8784	kdebug_trace_dyld_internal(base_code, info: &(infos[i]));
8785	}
8786
8787	data = CAST_DOWN(vm_offset_t, map_data);
8788	mach_vm_deallocate(target: ipc_kernel_map, address: data, size: infos_len * sizeof(infos[`0`]));
8789	return KERN_SUCCESS;
8790	}
8791
8792	kern_return_t
8793	task_register_dyld_image_infos(task_t task,
8794	dyld_kernel_image_info_array_t infos_copy,
8795	mach_msg_type_number_t infos_len)
8796	{
8797	return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
8798	infos_copy: (vm_map_copy_t)infos_copy, infos_len);
8799	}
8800
8801	kern_return_t
8802	task_unregister_dyld_image_infos(task_t task,
8803	dyld_kernel_image_info_array_t infos_copy,
8804	mach_msg_type_number_t infos_len)
8805	{
8806	return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
8807	infos_copy: (vm_map_copy_t)infos_copy, infos_len);
8808	}
8809
8810	kern_return_t
8811	task_get_dyld_image_infos(__unused task_t task,
8812	__unused dyld_kernel_image_info_array_t * dyld_images,
8813	__unused mach_msg_type_number_t * dyld_imagesCnt)
8814	{
8815	return KERN_NOT_SUPPORTED;
8816	}
8817
8818	kern_return_t
8819	task_register_dyld_shared_cache_image_info(task_t task,
8820	dyld_kernel_image_info_t cache_img,
8821	__unused boolean_t no_cache,
8822	__unused boolean_t private_cache)
8823	{
8824	if (task == NULL \|\| task != current_task()) {
8825	return KERN_INVALID_TASK;
8826	}
8827
8828	kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, info: &cache_img);
8829	return KERN_SUCCESS;
8830	}
8831
8832	kern_return_t
8833	task_register_dyld_set_dyld_state(__unused task_t task,
8834	__unused uint8_t dyld_state)
8835	{
8836	return KERN_NOT_SUPPORTED;
8837	}
8838
8839	kern_return_t
8840	task_register_dyld_get_process_state(__unused task_t task,
8841	__unused dyld_kernel_process_info_t * dyld_process_state)
8842	{
8843	return KERN_NOT_SUPPORTED;
8844	}
8845
8846	kern_return_t
8847	task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
8848	task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
8849	{
8850	#if CONFIG_PERVASIVE_CPI
8851	task_t task = (task_t)task_insp;
8852	kern_return_t kr = KERN_SUCCESS;
8853	mach_msg_type_number_t size;
8854
8855	if (task == TASK_NULL) {
8856	return KERN_INVALID_ARGUMENT;
8857	}
8858
8859	size = *size_in_out;
8860
8861	switch (flavor) {
8862	case TASK_INSPECT_BASIC_COUNTS: {
8863	struct task_inspect_basic_counts *bc =
8864	(struct task_inspect_basic_counts *)info_out;
8865	struct recount_usage stats = { `0` };
8866	if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
8867	kr = KERN_INVALID_ARGUMENT;
8868	break;
8869	}
8870
8871	recount_sum(&recount_task_plan, task->tk_recount.rtk_lifetime, &stats);
8872	bc->instructions = recount_usage_instructions(&stats);
8873	bc->cycles = recount_usage_cycles(&stats);
8874	size = TASK_INSPECT_BASIC_COUNTS_COUNT;
8875	break;
8876	}
8877	default:
8878	kr = KERN_INVALID_ARGUMENT;
8879	break;
8880	}
8881
8882	if (kr == KERN_SUCCESS) {
8883	*size_in_out = size;
8884	}
8885	return kr;
8886	#else /* CONFIG_PERVASIVE_CPI */
8887	#pragma unused(task_insp, flavor, info_out, size_in_out)
8888	return KERN_NOT_SUPPORTED;
8889	#endif /* !CONFIG_PERVASIVE_CPI */
8890	}
8891
8892	#if CONFIG_SECLUDED_MEMORY
8893	int num_tasks_can_use_secluded_mem = `0`;
8894
8895	void
8896	task_set_can_use_secluded_mem(
8897	task_t task,
8898	boolean_t can_use_secluded_mem)
8899	{
8900	if (!task->task_could_use_secluded_mem) {
8901	return;
8902	}
8903	task_lock(task);
8904	task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
8905	task_unlock(task);
8906	}
8907
8908	void
8909	task_set_can_use_secluded_mem_locked(
8910	task_t task,
8911	boolean_t can_use_secluded_mem)
8912	{
8913	assert(task->task_could_use_secluded_mem);
8914	if (can_use_secluded_mem &&
8915	secluded_for_apps && / global boot-arg /
8916	!task->task_can_use_secluded_mem) {
8917	assert(num_tasks_can_use_secluded_mem >= `0`);
8918	OSAddAtomic(+`1`,
8919	(volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8920	task->task_can_use_secluded_mem = TRUE;
8921	} else if (!can_use_secluded_mem &&
8922	task->task_can_use_secluded_mem) {
8923	assert(num_tasks_can_use_secluded_mem > `0`);
8924	OSAddAtomic(-`1`,
8925	(volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8926	task->task_can_use_secluded_mem = FALSE;
8927	}
8928	}
8929
8930	void
8931	task_set_could_use_secluded_mem(
8932	task_t task,
8933	boolean_t could_use_secluded_mem)
8934	{
8935	task->task_could_use_secluded_mem = !!could_use_secluded_mem;
8936	}
8937
8938	void
8939	task_set_could_also_use_secluded_mem(
8940	task_t task,
8941	boolean_t could_also_use_secluded_mem)
8942	{
8943	task->task_could_also_use_secluded_mem = !!could_also_use_secluded_mem;
8944	}
8945
8946	boolean_t
8947	task_can_use_secluded_mem(
8948	task_t task,
8949	boolean_t is_alloc)
8950	{
8951	if (task->task_can_use_secluded_mem) {
8952	assert(task->task_could_use_secluded_mem);
8953	assert(num_tasks_can_use_secluded_mem > `0`);
8954	return TRUE;
8955	}
8956	if (task->task_could_also_use_secluded_mem &&
8957	num_tasks_can_use_secluded_mem > `0`) {
8958	assert(num_tasks_can_use_secluded_mem > `0`);
8959	return TRUE;
8960	}
8961
8962	/*
8963	* If a single task is using more than some large amount of
8964	* memory (i.e. secluded_shutoff_trigger) and is approaching
8965	* its task limit, allow it to dip into secluded and begin
8966	* suppression of rebuilding secluded memory until that task exits.
8967	*/
8968	if (is_alloc && secluded_shutoff_trigger != `0`) {
8969	uint64_t phys_used = get_task_phys_footprint(task);
8970	uint64_t limit = get_task_phys_footprint_limit(task);
8971	if (phys_used > secluded_shutoff_trigger &&
8972	limit > secluded_shutoff_trigger &&
8973	phys_used > limit - secluded_shutoff_headroom) {
8974	start_secluded_suppression(task);
8975	return TRUE;
8976	}
8977	}
8978
8979	return FALSE;
8980	}
8981
8982	boolean_t
8983	task_could_use_secluded_mem(
8984	task_t task)
8985	{
8986	return task->task_could_use_secluded_mem;
8987	}
8988
8989	boolean_t
8990	task_could_also_use_secluded_mem(
8991	task_t task)
8992	{
8993	return task->task_could_also_use_secluded_mem;
8994	}
8995	#endif /* CONFIG_SECLUDED_MEMORY */
8996
8997	queue_head_t *
8998	task_io_user_clients(task_t task)
8999	{
9000	return &task->io_user_clients;
9001	}
9002
9003	void
9004	task_set_message_app_suspended(task_t task, boolean_t enable)
9005	{
9006	task->message_app_suspended = enable;
9007	}
9008
9009	void
9010	task_copy_fields_for_exec(task_t dst_task, task_t src_task)
9011	{
9012	dst_task->vtimers = src_task->vtimers;
9013	}
9014
9015	#if DEVELOPMENT \|\| DEBUG
9016	int vm_region_footprint = `0`;
9017	#endif /* DEVELOPMENT \|\| DEBUG */
9018
9019	boolean_t
9020	task_self_region_footprint(void)
9021	{
9022	#if DEVELOPMENT \|\| DEBUG
9023	if (vm_region_footprint) {
9024	/ system-wide override /
9025	return TRUE;
9026	}
9027	#endif /* DEVELOPMENT \|\| DEBUG */
9028	return current_task()->task_region_footprint;
9029	}
9030
9031	void
9032	task_self_region_footprint_set(
9033	boolean_t newval)
9034	{
9035	task_t curtask;
9036
9037	curtask = current_task();
9038	task_lock(task: curtask);
9039	if (newval) {
9040	curtask->task_region_footprint = TRUE;
9041	} else {
9042	curtask->task_region_footprint = FALSE;
9043	}
9044	task_unlock(task: curtask);
9045	}
9046
9047	void
9048	task_set_darkwake_mode(task_t task, boolean_t set_mode)
9049	{
9050	assert(task);
9051
9052	task_lock(task);
9053
9054	if (set_mode) {
9055	task->t_flags \|= TF_DARKWAKE_MODE;
9056	} else {
9057	task->t_flags &= ~(TF_DARKWAKE_MODE);
9058	}
9059
9060	task_unlock(task);
9061	}
9062
9063	boolean_t
9064	task_get_darkwake_mode(task_t task)
9065	{
9066	assert(task);
9067	return (task->t_flags & TF_DARKWAKE_MODE) != `0`;
9068	}
9069
9070	/*
9071	* Set default behavior for task's control port and EXC_GUARD variants that have
9072	* settable behavior.
9073	*
9074	* Platform binaries typically have one behavior, third parties another -
9075	* but there are special exception we may need to account for.
9076	*/
9077	void
9078	task_set_exc_guard_ctrl_port_default(
9079	task_t task,
9080	thread_t main_thread,
9081	const char *name,
9082	unsigned int namelen,
9083	boolean_t is_simulated,
9084	uint32_t platform,
9085	uint32_t sdk)
9086	{
9087	task_control_port_options_t opts = TASK_CONTROL_PORT_OPTIONS_NONE;
9088
9089	if (task_is_hardened_binary(task)) {
9090	/ set exc guard default behavior for hardened binaries /
9091	task->task_exc_guard = (task_exc_guard_default & TASK_EXC_GUARD_ALL);
9092
9093	if (`1` == task_pid(task)) {
9094	/ special flags for inittask - delivery every instance as corpse /
9095	task->task_exc_guard = _TASK_EXC_GUARD_ALL_CORPSE;
9096	} else if (task_exc_guard_default & TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS) {
9097	/ honor by-name default setting overrides /
9098
9099	int count = sizeof(task_exc_guard_named_defaults) / sizeof(struct task_exc_guard_named_default);
9100
9101	for (int i = `0`; i < count; i++) {
9102	const struct task_exc_guard_named_default *named_default =
9103	&task_exc_guard_named_defaults[i];
9104	if (strncmp(s1: named_default->name, s2: name, n: namelen) == `0` &&
9105	strlen(s: named_default->name) == namelen) {
9106	task->task_exc_guard = named_default->behavior;
9107	break;
9108	}
9109	}
9110	}
9111
9112	/ set control port options for 1p code, inherited from parent task by default /
9113	opts = ipc_control_port_options & ICP_OPTIONS_1P_MASK;
9114	} else {
9115	/ set exc guard default behavior for third-party code /
9116	task->task_exc_guard = ((task_exc_guard_default >> TASK_EXC_GUARD_THIRD_PARTY_DEFAULT_SHIFT) & TASK_EXC_GUARD_ALL);
9117	/ set control port options for 3p code, inherited from parent task by default /
9118	opts = (ipc_control_port_options & ICP_OPTIONS_3P_MASK) >> ICP_OPTIONS_3P_SHIFT;
9119	}
9120
9121	if (is_simulated) {
9122	/ If simulated and built against pre-iOS 15 SDK, disable all EXC_GUARD /
9123	if ((platform == PLATFORM_IOSSIMULATOR && sdk < `0xf0000`) \|\|
9124	(platform == PLATFORM_TVOSSIMULATOR && sdk < `0xf0000`) \|\|
9125	(platform == PLATFORM_WATCHOSSIMULATOR && sdk < `0x80000`)) {
9126	task->task_exc_guard = TASK_EXC_GUARD_NONE;
9127	}
9128	/ Disable protection for control ports for simulated binaries /
9129	opts = TASK_CONTROL_PORT_OPTIONS_NONE;
9130	}
9131
9132
9133	task_set_control_port_options(task, opts);
9134
9135	task_set_immovable_pinned(task);
9136	main_thread_set_immovable_pinned(thread: main_thread);
9137	}
9138
9139	kern_return_t
9140	task_get_exc_guard_behavior(
9141	task_t task,
9142	task_exc_guard_behavior_t *behaviorp)
9143	{
9144	if (task == TASK_NULL) {
9145	return KERN_INVALID_TASK;
9146	}
9147	*behaviorp = task->task_exc_guard;
9148	return KERN_SUCCESS;
9149	}
9150
9151	kern_return_t
9152	task_set_exc_guard_behavior(
9153	task_t task,
9154	task_exc_guard_behavior_t new_behavior)
9155	{
9156	if (task == TASK_NULL) {
9157	return KERN_INVALID_TASK;
9158	}
9159	if (new_behavior & ~TASK_EXC_GUARD_ALL) {
9160	return KERN_INVALID_VALUE;
9161	}
9162
9163	/ limit setting to that allowed for this config /
9164	new_behavior = new_behavior & task_exc_guard_config_mask;
9165
9166	#if !defined (DEBUG) && !defined (DEVELOPMENT)
9167	/ On release kernels, only allow _upgrading_ exc guard behavior /
9168	task_exc_guard_behavior_t cur_behavior;
9169
9170	os_atomic_rmw_loop(&task->task_exc_guard, cur_behavior, new_behavior, relaxed, {
9171	if ((cur_behavior & task_exc_guard_no_unset_mask) & ~(new_behavior & task_exc_guard_no_unset_mask)) {
9172	os_atomic_rmw_loop_give_up(return KERN_DENIED);
9173	}
9174
9175	if ((new_behavior & task_exc_guard_no_set_mask) & ~(cur_behavior & task_exc_guard_no_set_mask)) {
9176	os_atomic_rmw_loop_give_up(return KERN_DENIED);
9177	}
9178
9179	/ no restrictions on CORPSE bit /
9180	});
9181	#else
9182	task->task_exc_guard = new_behavior;
9183	#endif
9184	return KERN_SUCCESS;
9185	}
9186
9187	kern_return_t
9188	task_set_corpse_forking_behavior(task_t task, task_corpse_forking_behavior_t behavior)
9189	{
9190	#if DEVELOPMENT \|\| DEBUG
9191	if (task == TASK_NULL) {
9192	return KERN_INVALID_TASK;
9193	}
9194
9195	task_lock(task);
9196	if (behavior & TASK_CORPSE_FORKING_DISABLED_MEM_DIAG) {
9197	task->t_flags \|= TF_NO_CORPSE_FORKING;
9198	} else {
9199	task->t_flags &= ~TF_NO_CORPSE_FORKING;
9200	}
9201	task_unlock(task);
9202
9203	return KERN_SUCCESS;
9204	#else
9205	(void)task;
9206	(void)behavior;
9207	return KERN_NOT_SUPPORTED;
9208	#endif
9209	}
9210
9211	boolean_t
9212	task_corpse_forking_disabled(task_t task)
9213	{
9214	boolean_t disabled = FALSE;
9215
9216	task_lock(task);
9217	disabled = (task->t_flags & TF_NO_CORPSE_FORKING);
9218	task_unlock(task);
9219
9220	return disabled;
9221	}
9222
9223	#if __arm64__
9224	extern int legacy_footprint_entitlement_mode;
9225	extern void memorystatus_act_on_legacy_footprint_entitlement(struct proc *, boolean_t);
9226	extern void memorystatus_act_on_ios13extended_footprint_entitlement(struct proc *);
9227
9228
9229	void
9230	task_set_legacy_footprint(
9231	task_t task)
9232	{
9233	task_lock(task);
9234	task->task_legacy_footprint = TRUE;
9235	task_unlock(task);
9236	}
9237
9238	void
9239	task_set_extra_footprint_limit(
9240	task_t task)
9241	{
9242	if (task->task_extra_footprint_limit) {
9243	return;
9244	}
9245	task_lock(task);
9246	if (task->task_extra_footprint_limit) {
9247	task_unlock(task);
9248	return;
9249	}
9250	task->task_extra_footprint_limit = TRUE;
9251	task_unlock(task);
9252	memorystatus_act_on_legacy_footprint_entitlement(get_bsdtask_info(task), TRUE);
9253	}
9254
9255	void
9256	task_set_ios13extended_footprint_limit(
9257	task_t task)
9258	{
9259	if (task->task_ios13extended_footprint_limit) {
9260	return;
9261	}
9262	task_lock(task);
9263	if (task->task_ios13extended_footprint_limit) {
9264	task_unlock(task);
9265	return;
9266	}
9267	task->task_ios13extended_footprint_limit = TRUE;
9268	task_unlock(task);
9269	memorystatus_act_on_ios13extended_footprint_entitlement(get_bsdtask_info(task));
9270	}
9271	#endif /* __arm64__ */
9272
9273	static inline ledger_amount_t
9274	task_ledger_get_balance(
9275	ledger_t ledger,
9276	int ledger_idx)
9277	{
9278	ledger_amount_t amount;
9279	amount = `0`;
9280	ledger_get_balance(ledger, entry: ledger_idx, balance: &amount);
9281	return amount;
9282	}
9283
9284	/*
9285	* Gather the amount of memory counted in a task's footprint due to
9286	* being in a specific set of ledgers.
9287	*/
9288	void
9289	task_ledgers_footprint(
9290	ledger_t ledger,
9291	ledger_amount_t *ledger_resident,
9292	ledger_amount_t *ledger_compressed)
9293	{
9294	*ledger_resident = `0`;
9295	*ledger_compressed = `0`;
9296
9297	/ purgeable non-volatile memory /
9298	*ledger_resident += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.purgeable_nonvolatile);
9299	*ledger_compressed += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.purgeable_nonvolatile_compressed);
9300
9301	/ "default" tagged memory /
9302	*ledger_resident += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.tagged_footprint);
9303	*ledger_compressed += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.tagged_footprint_compressed);
9304
9305	/ "network" currently never counts in the footprint... /
9306
9307	/ "media" tagged memory /
9308	*ledger_resident += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.media_footprint);
9309	*ledger_compressed += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.media_footprint_compressed);
9310
9311	/ "graphics" tagged memory /
9312	*ledger_resident += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.graphics_footprint);
9313	*ledger_compressed += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.graphics_footprint_compressed);
9314
9315	/ "neural" tagged memory /
9316	*ledger_resident += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.neural_footprint);
9317	*ledger_compressed += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.neural_footprint_compressed);
9318	}
9319
9320	#if CONFIG_MEMORYSTATUS
9321	/*
9322	* Credit any outstanding task dirty time to the ledger.
9323	* memstat_dirty_start is pushed forward to prevent any possibility of double
9324	* counting, making it safe to call this as often as necessary to ensure that
9325	* anyone reading the ledger gets up-to-date information.
9326	*/
9327	void
9328	task_ledger_settle_dirty_time(task_t t)
9329	{
9330	task_lock(task: t);
9331
9332	uint64_t start = t->memstat_dirty_start;
9333	if (start) {
9334	uint64_t now = mach_absolute_time();
9335
9336	uint64_t duration;
9337	absolutetime_to_nanoseconds(abstime: now - start, result: &duration);
9338
9339	ledger_t ledger = get_task_ledger(t);
9340	ledger_credit(ledger, entry: task_ledgers.memorystatus_dirty_time, amount: duration);
9341
9342	t->memstat_dirty_start = now;
9343	}
9344
9345	task_unlock(task: t);
9346	}
9347	#endif /* CONFIG_MEMORYSTATUS */
9348
9349	void
9350	task_set_memory_ownership_transfer(
9351	task_t task,
9352	boolean_t value)
9353	{
9354	task_lock(task);
9355	task->task_can_transfer_memory_ownership = !!value;
9356	task_unlock(task);
9357	}
9358
9359	#if DEVELOPMENT \|\| DEBUG
9360
9361	void
9362	task_set_no_footprint_for_debug(task_t task, boolean_t value)
9363	{
9364	task_lock(task);
9365	task->task_no_footprint_for_debug = !!value;
9366	task_unlock(task);
9367	}
9368
9369	int
9370	task_get_no_footprint_for_debug(task_t task)
9371	{
9372	return task->task_no_footprint_for_debug;
9373	}
9374
9375	#endif /* DEVELOPMENT \|\| DEBUG */
9376
9377	void
9378	task_copy_vmobjects(task_t task, vm_object_query_t query, size_t len, size_t *num)
9379	{
9380	vm_object_t find_vmo;
9381	size_t size = `0`;
9382
9383	/*
9384	* Allocate a save area for FP state before taking task_objq lock,
9385	* if necessary, to ensure that VM_KERNEL_ADDRHASH() doesn't cause
9386	* an FP state allocation while holding VM locks.
9387	*/
9388	ml_fp_save_area_prealloc();
9389
9390	task_objq_lock(task);
9391	if (query != NULL) {
9392	queue_iterate(&task->task_objq, find_vmo, vm_object_t, task_objq)
9393	{
9394	vm_object_query_t p = &query[size++];
9395
9396	/ make sure to not overrun /
9397	if (size * sizeof(vm_object_query_data_t) > len) {
9398	--size;
9399	break;
9400	}
9401
9402	bzero(s: p, n: sizeof(*p));
9403	p->object_id = (vm_object_id_t) VM_KERNEL_ADDRHASH(find_vmo);
9404	p->virtual_size = find_vmo->internal ? find_vmo->vo_size : `0`;
9405	p->resident_size = find_vmo->resident_page_count * PAGE_SIZE;
9406	p->wired_size = find_vmo->wired_page_count * PAGE_SIZE;
9407	p->reusable_size = find_vmo->reusable_page_count * PAGE_SIZE;
9408	p->vo_no_footprint = find_vmo->vo_no_footprint;
9409	p->vo_ledger_tag = find_vmo->vo_ledger_tag;
9410	p->purgable = find_vmo->purgable;
9411
9412	if (find_vmo->internal && find_vmo->pager_created && find_vmo->pager != NULL) {
9413	p->compressed_size = vm_compressor_pager_get_count(mem_obj: find_vmo->pager) * PAGE_SIZE;
9414	} else {
9415	p->compressed_size = `0`;
9416	}
9417	}
9418	} else {
9419	size = (size_t)task->task_owned_objects;
9420	}
9421	task_objq_unlock(task);
9422
9423	*num = size;
9424	}
9425
9426	void
9427	task_get_owned_vmobjects(task_t task, size_t buffer_size, vmobject_list_output_t buffer, size_t* output_size, size_t* entries)
9428	{
9429	assert(output_size);
9430	assert(entries);
9431
9432	/ copy the vmobjects and vmobject data out of the task /
9433	if (buffer_size == `0`) {
9434	task_copy_vmobjects(task, NULL, len: `0`, num: entries);
9435	output_size = (entries > `0`) ? entries sizeof(vm_object_query_data_t) + sizeof(*buffer) : `0`;
9436	} else {
9437	assert(buffer);
9438	task_copy_vmobjects(task, query: &buffer->data[`0`], len: buffer_size - sizeof(*buffer), num: entries);
9439	buffer->entries = (uint64_t)*entries;
9440	output_size = entries * sizeof(vm_object_query_data_t) + sizeof(*buffer);
9441	}
9442	}
9443
9444	void
9445	task_store_owned_vmobject_info(task_t to_task, task_t from_task)
9446	{
9447	size_t buffer_size;
9448	vmobject_list_output_t buffer;
9449	size_t output_size;
9450	size_t entries;
9451
9452	assert(to_task != from_task);
9453
9454	/ get the size, allocate a bufferr, and populate /
9455	entries = `0`;
9456	output_size = `0`;
9457	task_get_owned_vmobjects(task: from_task, buffer_size: `0`, NULL, output_size: &output_size, entries: &entries);
9458
9459	if (output_size) {
9460	buffer_size = output_size;
9461	buffer = kalloc_data(buffer_size, Z_WAITOK);
9462
9463	if (buffer) {
9464	entries = `0`;
9465	output_size = `0`;
9466
9467	task_get_owned_vmobjects(task: from_task, buffer_size, buffer, output_size: &output_size, entries: &entries);
9468
9469	if (entries) {
9470	to_task->corpse_vmobject_list = buffer;
9471	to_task->corpse_vmobject_list_size = buffer_size;
9472	}
9473	}
9474	}
9475	}
9476
9477	void
9478	task_set_filter_msg_flag(
9479	task_t task,
9480	boolean_t flag)
9481	{
9482	assert(task != TASK_NULL);
9483
9484	if (flag) {
9485	task_ro_flags_set(task, TFRO_FILTER_MSG);
9486	} else {
9487	task_ro_flags_clear(task, TFRO_FILTER_MSG);
9488	}
9489	}
9490
9491	boolean_t
9492	task_get_filter_msg_flag(
9493	task_t task)
9494	{
9495	if (!task) {
9496	return false;
9497	}
9498
9499	return (task_ro_flags_get(task) & TFRO_FILTER_MSG) ? TRUE : FALSE;
9500	}
9501	bool
9502	task_is_exotic(
9503	task_t task)
9504	{
9505	if (task == TASK_NULL) {
9506	return false;
9507	}
9508	return vm_map_is_exotic(map: get_task_map(task));
9509	}
9510
9511	bool
9512	task_is_alien(
9513	task_t task)
9514	{
9515	if (task == TASK_NULL) {
9516	return false;
9517	}
9518	return vm_map_is_alien(map: get_task_map(task));
9519	}
9520
9521
9522
9523	#if CONFIG_MACF
9524	/ Set the filter mask for Mach traps. /
9525	void
9526	mac_task_set_mach_filter_mask(task_t task, uint8_t *maskptr)
9527	{
9528	assert(task);
9529
9530	task_set_mach_trap_filter_mask(task, mask: maskptr);
9531	}
9532
9533	/ Set the filter mask for kobject msgs. /
9534	void
9535	mac_task_set_kobj_filter_mask(task_t task, uint8_t *maskptr)
9536	{
9537	assert(task);
9538
9539	task_set_mach_kobj_filter_mask(task, mask: maskptr);
9540	}
9541
9542	/ Hook for mach trap/sc filter evaluation policy. /
9543	SECURITY_READ_ONLY_LATE(mac_task_mach_filter_cbfunc_t) mac_task_mach_trap_evaluate = NULL;
9544
9545	/ Hook for kobj message filter evaluation policy. /
9546	SECURITY_READ_ONLY_LATE(mac_task_kobj_filter_cbfunc_t) mac_task_kobj_msg_evaluate = NULL;
9547
9548	/ Set the callback hooks for the filtering policy. /
9549	int
9550	mac_task_register_filter_callbacks(
9551	const mac_task_mach_filter_cbfunc_t mach_cbfunc,
9552	const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)
9553	{
9554	if (mach_cbfunc != NULL) {
9555	if (mac_task_mach_trap_evaluate != NULL) {
9556	return KERN_FAILURE;
9557	}
9558	mac_task_mach_trap_evaluate = mach_cbfunc;
9559	}
9560	if (kobj_cbfunc != NULL) {
9561	if (mac_task_kobj_msg_evaluate != NULL) {
9562	return KERN_FAILURE;
9563	}
9564	mac_task_kobj_msg_evaluate = kobj_cbfunc;
9565	}
9566
9567	return KERN_SUCCESS;
9568	}
9569	#endif /* CONFIG_MACF */
9570
9571	#if CONFIG_ROSETTA
9572	bool
9573	task_is_translated(task_t task)
9574	{
9575	extern boolean_t proc_is_translated(struct proc* p);
9576	return task && proc_is_translated(get_bsdtask_info(task));
9577	}
9578	#endif
9579
9580
9581
9582	#if __has_feature(ptrauth_calls)
9583	/ On FPAC, we want to deliver all PAC violations as fatal exceptions, regardless*
9584	* of the enable_pac_exception boot-arg value or any other entitlements.
9585	* The only case where we allow non-fatal PAC exceptions on FPAC is for debugging,
9586	* which requires Developer Mode enabled.
9587	*
9588	* On non-FPAC hardware, we gate the decision behind entitlements and the
9589	* enable_pac_exception boot-arg.
9590	*/
9591	extern int gARM_FEAT_FPAC;
9592	/*
9593	* Having the PAC_EXCEPTION_ENTITLEMENT entitlement means we always enforce all
9594	* of the PAC exception hardening: fatal exceptions and signed user state.
9595	*/
9596	#define PAC_EXCEPTION_ENTITLEMENT "com.apple.private.pac.exception"
9597	/*
9598	* On non-FPAC hardware, when enable_pac_exception boot-arg is set to true,
9599	* processes can choose to get non-fatal PAC exception delivery by setting
9600	* the SKIP_PAC_EXCEPTION_ENTITLEMENT entitlement.
9601	*/
9602	#define SKIP_PAC_EXCEPTION_ENTITLEMENT "com.apple.private.skip.pac.exception"
9603
9604	void
9605	task_set_pac_exception_fatal_flag(
9606	task_t task)
9607	{
9608	assert(task != TASK_NULL);
9609	bool pac_hardened_task = false;
9610	uint32_t set_flags = `0`;
9611
9612	/*
9613	* We must not apply this security policy on tasks which have opted out of mach hardening to
9614	* avoid regressions in third party plugins and third party apps when using AMFI boot-args
9615	*/
9616	bool platform_binary = task_get_platform_binary(task);
9617	#if XNU_TARGET_OS_OSX
9618	platform_binary &= !task_opted_out_mach_hardening(task);
9619	#endif /* XNU_TARGET_OS_OSX */
9620
9621	/*
9622	* On non-FPAC hardware, we allow gating PAC exceptions behind
9623	* SKIP_PAC_EXCEPTION_ENTITLEMENT and the boot-arg.
9624	*/
9625	if (!gARM_FEAT_FPAC && enable_pac_exception &&
9626	IOTaskHasEntitlement(task, SKIP_PAC_EXCEPTION_ENTITLEMENT)) {
9627	return;
9628	}
9629
9630	if (IOTaskHasEntitlement(task, PAC_EXCEPTION_ENTITLEMENT) \|\| task_get_hardened_runtime(task)) {
9631	pac_hardened_task = true;
9632	set_flags \|= TFRO_PAC_ENFORCE_USER_STATE;
9633	}
9634
9635	/ On non-FPAC hardware, gate the fatal property behind entitlements and boot-arg. /
9636	if (pac_hardened_task \|\|
9637	((enable_pac_exception \|\| gARM_FEAT_FPAC) && platform_binary)) {
9638	/ If debugging is configured, do not make PAC exception fatal. /
9639	if (address_space_debugged(task_get_proc_raw(task)) != KERN_SUCCESS) {
9640	set_flags \|= TFRO_PAC_EXC_FATAL;
9641	}
9642	}
9643
9644	if (set_flags != `0`) {
9645	task_ro_flags_set(task, set_flags);
9646	}
9647	}
9648
9649	bool
9650	task_is_pac_exception_fatal(
9651	task_t task)
9652	{
9653	assert(task != TASK_NULL);
9654	return !!(task_ro_flags_get(task) & TFRO_PAC_EXC_FATAL);
9655	}
9656	#endif /* __has_feature(ptrauth_calls) */
9657
9658	/*
9659	* FATAL_EXCEPTION_ENTITLEMENT, if present, will contain a list of
9660	* conditions for which access violations should deliver SIGKILL rather than
9661	* SIGSEGV. This is a hardening measure intended for use by applications
9662	* that are able to handle the stricter error handling behavior. Currently
9663	* this supports FATAL_EXCEPTION_ENTITLEMENT_JIT, which is documented in
9664	* user_fault_in_self_restrict_mode().
9665	*/
9666	#define FATAL_EXCEPTION_ENTITLEMENT "com.apple.security.fatal-exceptions"
9667	#define FATAL_EXCEPTION_ENTITLEMENT_JIT "jit"
9668
9669	void
9670	task_set_jit_exception_fatal_flag(
9671	task_t task)
9672	{
9673	assert(task != TASK_NULL);
9674	if (IOTaskHasStringEntitlement(task, FATAL_EXCEPTION_ENTITLEMENT, FATAL_EXCEPTION_ENTITLEMENT_JIT) &&
9675	address_space_debugged(process: task_get_proc_raw(task)) != KERN_SUCCESS) {
9676	task_ro_flags_set(task, TFRO_JIT_EXC_FATAL);
9677	}
9678	}
9679
9680	bool
9681	task_is_jit_exception_fatal(
9682	__unused task_t task)
9683	{
9684	#if !defined(XNU_PLATFORM_MacOSX)
9685	return true;
9686	#else
9687	assert(task != TASK_NULL);
9688	return !!(task_ro_flags_get(task) & TFRO_JIT_EXC_FATAL);
9689	#endif
9690	}
9691
9692	bool
9693	task_needs_user_signed_thread_state(
9694	task_t task)
9695	{
9696	assert(task != TASK_NULL);
9697	return !!(task_ro_flags_get(task) & TFRO_PAC_ENFORCE_USER_STATE);
9698	}
9699
9700	void
9701	task_set_tecs(task_t task)
9702	{
9703	if (task == TASK_NULL) {
9704	task = current_task();
9705	}
9706
9707	if (!machine_csv(cve: CPUVN_CI)) {
9708	return;
9709	}
9710
9711	LCK_MTX_ASSERT(&task->lock, LCK_MTX_ASSERT_NOTOWNED);
9712
9713	task_lock(task);
9714
9715	task->t_flags \|= TF_TECS;
9716
9717	thread_t thread;
9718	queue_iterate(&task->threads, thread, thread_t, task_threads) {
9719	machine_tecs(thr: thread);
9720	}
9721	task_unlock(task);
9722	}
9723
9724	kern_return_t
9725	task_test_sync_upcall(
9726	task_t task,
9727	ipc_port_t send_port)
9728	{
9729	#if DEVELOPMENT \|\| DEBUG
9730	if (task != current_task() \|\| !IPC_PORT_VALID(send_port)) {
9731	return KERN_INVALID_ARGUMENT;
9732	}
9733
9734	/ Block on sync kernel upcall on the given send port /
9735	mach_test_sync_upcall(send_port);
9736
9737	ipc_port_release_send(send_port);
9738	return KERN_SUCCESS;
9739	#else
9740	(void)task;
9741	(void)send_port;
9742	return KERN_NOT_SUPPORTED;
9743	#endif
9744	}
9745
9746	kern_return_t
9747	task_test_async_upcall_propagation(
9748	task_t task,
9749	ipc_port_t send_port,
9750	int qos,
9751	int iotier)
9752	{
9753	#if DEVELOPMENT \|\| DEBUG
9754	kern_return_t kr;
9755
9756	if (task != current_task() \|\| !IPC_PORT_VALID(send_port)) {
9757	return KERN_INVALID_ARGUMENT;
9758	}
9759
9760	if (qos < THREAD_QOS_DEFAULT \|\| qos > THREAD_QOS_USER_INTERACTIVE \|\|
9761	iotier < THROTTLE_LEVEL_START \|\| iotier > THROTTLE_LEVEL_END) {
9762	return KERN_INVALID_ARGUMENT;
9763	}
9764
9765	struct thread_attr_for_ipc_propagation attr = {
9766	.tafip_iotier = iotier,
9767	.tafip_qos = qos
9768	};
9769
9770	/ Apply propagate attr to port /
9771	kr = ipc_port_propagate_thread_attr(send_port, attr);
9772	if (kr != KERN_SUCCESS) {
9773	return kr;
9774	}
9775
9776	thread_enable_send_importance(current_thread(), TRUE);
9777
9778	/ Perform an async kernel upcall on the given send port /
9779	mach_test_async_upcall(send_port);
9780	thread_enable_send_importance(current_thread(), FALSE);
9781
9782	ipc_port_release_send(send_port);
9783	return KERN_SUCCESS;
9784	#else
9785	(void)task;
9786	(void)send_port;
9787	(void)qos;
9788	(void)iotier;
9789	return KERN_NOT_SUPPORTED;
9790	#endif
9791	}
9792
9793	#if CONFIG_PROC_RESOURCE_LIMITS
9794	mach_port_name_t
9795	current_task_get_fatal_port_name(void)
9796	{
9797	mach_port_t task_fatal_port = MACH_PORT_NULL;
9798	mach_port_name_t port_name = `0`;
9799
9800	task_fatal_port = task_allocate_fatal_port();
9801
9802	if (task_fatal_port) {
9803	ipc_object_copyout(current_space(), ip_to_object(task_fatal_port), MACH_MSG_TYPE_PORT_SEND,
9804	IPC_OBJECT_COPYOUT_FLAGS_NONE, NULL, NULL, &port_name);
9805	}
9806
9807	return port_name;
9808	}
9809	#endif /* CONFIG_PROC_RESOURCE_LIMITS */
9810
9811	#if defined(__x86_64__)
9812	bool
9813	curtask_get_insn_copy_optout(void)
9814	{
9815	bool optout;
9816	task_t cur_task = current_task();
9817
9818	task_lock(cur_task);
9819	optout = (cur_task->t_flags & TF_INSN_COPY_OPTOUT) ? true : false;
9820	task_unlock(cur_task);
9821
9822	return optout;
9823	}
9824
9825	void
9826	curtask_set_insn_copy_optout(void)
9827	{
9828	task_t cur_task = current_task();
9829
9830	task_lock(cur_task);
9831
9832	cur_task->t_flags \|= TF_INSN_COPY_OPTOUT;
9833
9834	thread_t thread;
9835	queue_iterate(&cur_task->threads, thread, thread_t, task_threads) {
9836	machine_thread_set_insn_copy_optout(thread);
9837	}
9838	task_unlock(cur_task);
9839	}
9840	#endif /* defined(__x86_64__) */
9841
9842	void
9843	task_get_corpse_vmobject_list(task_t task, vmobject_list_output_t* list, size_t* list_size)
9844	{
9845	assert(task);
9846	assert(list_size);
9847
9848	*list = task->corpse_vmobject_list;
9849	*list_size = (size_t)task->corpse_vmobject_list_size;
9850	}
9851
9852	__abortlike
9853	static void
9854	panic_proc_ro_task_backref_mismatch(task_t t, proc_ro_t ro)
9855	{
9856	panic("proc_ro->task backref mismatch: t=%p, ro=%p, "
9857	"proc_ro_task(ro)=%p", t, ro, proc_ro_task(ro));
9858	}
9859
9860	proc_ro_t
9861	task_get_ro(task_t t)
9862	{
9863	proc_ro_t ro = (proc_ro_t)t->bsd_info_ro;
9864
9865	zone_require_ro(zone_id: ZONE_ID_PROC_RO, elem_size: sizeof(struct proc_ro), addr: ro);
9866	if (__improbable(proc_ro_task(ro) != t)) {
9867	panic_proc_ro_task_backref_mismatch(t, ro);
9868	}
9869
9870	return ro;
9871	}
9872
9873	uint32_t
9874	task_ro_flags_get(task_t task)
9875	{
9876	return task_get_ro(t: task)->t_flags_ro;
9877	}
9878
9879	void
9880	task_ro_flags_set(task_t task, uint32_t flags)
9881	{
9882	zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO, task_get_ro(task),
9883	t_flags_ro, ZRO_ATOMIC_OR_32, flags);
9884	}
9885
9886	void
9887	task_ro_flags_clear(task_t task, uint32_t flags)
9888	{
9889	zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO, task_get_ro(task),
9890	t_flags_ro, ZRO_ATOMIC_AND_32, ~flags);
9891	}
9892
9893	task_control_port_options_t
9894	task_get_control_port_options(task_t task)
9895	{
9896	return task_get_ro(t: task)->task_control_port_options;
9897	}
9898
9899	void
9900	task_set_control_port_options(task_t task, task_control_port_options_t opts)
9901	{
9902	zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
9903	task_control_port_options, &opts);
9904	}
9905
9906	/!*
9907	* @function kdp_task_is_locked
9908	*
9909	* @abstract
9910	* Checks if task is locked.
9911	*
9912	* @discussion
9913	* NOT SAFE: To be used only by kernel debugger.
9914	*
9915	* @param task task to check
9916	*
9917	* @returns TRUE if the task is locked.
9918	*/
9919	boolean_t
9920	kdp_task_is_locked(task_t task)
9921	{
9922	return kdp_lck_mtx_lock_spin_is_acquired(lck: &task->lock);
9923	}
9924
9925	#if DEBUG \|\| DEVELOPMENT
9926	/**
9927	*
9928	* Check if a threshold limit is valid based on the actual phys memory
9929	* limit. If they are same, race conditions may arise, so we have to prevent
9930	* it to happen.
9931	*/
9932	static diagthreshold_check_return
9933	task_check_memorythreshold_is_valid(task_t task, uint64_t new_limit, bool is_diagnostics_value)
9934	{
9935	int phys_limit_mb;
9936	kern_return_t ret_value;
9937	bool threshold_enabled;
9938	bool dummy;
9939	ret_value = ledger_is_diag_threshold_enabled(task->ledger, task_ledgers.phys_footprint, &threshold_enabled);
9940	if (ret_value != KERN_SUCCESS) {
9941	return ret_value;
9942	}
9943	if (is_diagnostics_value == true) {
9944	ret_value = task_get_phys_footprint_limit(task, &phys_limit_mb);
9945	} else {
9946	uint64_t diag_limit;
9947	ret_value = task_get_diag_footprint_limit_internal(task, &diag_limit, &dummy);
9948	phys_limit_mb = (int)(diag_limit >> `20`);
9949	}
9950	if (ret_value != KERN_SUCCESS) {
9951	return ret_value;
9952	}
9953	if (phys_limit_mb == (int) new_limit) {
9954	if (threshold_enabled == false) {
9955	return THRESHOLD_IS_SAME_AS_LIMIT_FLAG_DISABLED;
9956	} else {
9957	return THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED;
9958	}
9959	}
9960	if (threshold_enabled == false) {
9961	return THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED;
9962	} else {
9963	return THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_ENABLED;
9964	}
9965	}
9966	#endif
9967
9968	#if CONFIG_EXCLAVES
9969	kern_return_t
9970	task_add_conclave(task_t task, void vnode, int64_t off, const* char *task_conclave_id)
9971	{
9972	/*
9973	* Only launchd or properly entitled tasks can attach tasks to
9974	* conclaves.
9975	*/
9976	if (!exclaves_has_priv(current_task(), EXCLAVES_PRIV_CONCLAVE_SPAWN)) {
9977	return KERN_DENIED;
9978	}
9979
9980	/*
9981	* Only entitled tasks can have conclaves attached.
9982	* Allow tasks which have the SPAWN privilege to also host conclaves.
9983	* This allows xpc proxy to add a conclave before execing a daemon.
9984	*/
9985	if (!exclaves_has_priv_vnode(vnode, off, EXCLAVES_PRIV_CONCLAVE_HOST) &&
9986	!exclaves_has_priv_vnode(vnode, off, EXCLAVES_PRIV_CONCLAVE_SPAWN)) {
9987	return KERN_DENIED;
9988	}
9989
9990	/*
9991	* Make this EXCLAVES_BOOT_STAGE_2 until userspace is actually
9992	* triggering the EXCLAVESKIT boot stage.
9993	*/
9994	kern_return_t kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_2);
9995	if (kr != KERN_SUCCESS) {
9996	return kr;
9997	}
9998
9999	return exclaves_conclave_attach(EXCLAVES_DOMAIN_KERNEL, task_conclave_id, task);
10000	}
10001
10002	kern_return_t
10003	task_launch_conclave(mach_port_name_t port __unused)
10004	{
10005	kern_return_t kr = KERN_FAILURE;
10006	assert3u(port, ==, MACH_PORT_NULL);
10007	exclaves_resource_t *conclave = task_get_conclave(current_task());
10008	if (conclave == NULL) {
10009	return kr;
10010	}
10011
10012	kr = exclaves_conclave_launch(conclave);
10013	if (kr != KERN_SUCCESS) {
10014	return kr;
10015	}
10016	task_set_conclave_taint(current_task());
10017
10018	return KERN_SUCCESS;
10019	}
10020
10021	kern_return_t
10022	task_inherit_conclave(task_t old_task, task_t new_task, void *vnode, int64_t off)
10023	{
10024	if (old_task->conclave == NULL \|\|
10025	!exclaves_conclave_is_attached(old_task->conclave)) {
10026	return KERN_SUCCESS;
10027	}
10028
10029	/*
10030	* Only launchd or properly entitled tasks can attach tasks to
10031	* conclaves.
10032	*/
10033	if (!exclaves_has_priv(current_task(), EXCLAVES_PRIV_CONCLAVE_SPAWN)) {
10034	return KERN_DENIED;
10035	}
10036
10037	/*
10038	* Only entitled tasks can have conclaves attached.
10039	*/
10040	if (!exclaves_has_priv_vnode(vnode, off, EXCLAVES_PRIV_CONCLAVE_HOST)) {
10041	return KERN_DENIED;
10042	}
10043
10044	return exclaves_conclave_inherit(old_task->conclave, old_task, new_task);
10045	}
10046
10047	void
10048	task_clear_conclave(task_t task)
10049	{
10050	if (task->exclave_crash_info) {
10051	kfree_data(task->exclave_crash_info, CONCLAVE_CRASH_BUFFER_PAGECOUNT * PAGE_SIZE);
10052	task->exclave_crash_info = NULL;
10053	}
10054
10055	if (task->conclave == NULL) {
10056	return;
10057	}
10058
10059	/*
10060	* XXX
10061	* This should only fail if either the conclave is in an unexpected
10062	* state (i.e. not ATTACHED) or if the wrong port is supplied.
10063	* We should re-visit this and make sure we guarantee the above
10064	* constraints.
10065	*/
10066	__assert_only kern_return_t ret =
10067	exclaves_conclave_detach(task->conclave, task);
10068	assert3u(ret, ==, KERN_SUCCESS);
10069	}
10070
10071	void
10072	task_stop_conclave(task_t task, bool gather_crash_bt)
10073	{
10074	thread_t thread = current_thread();
10075
10076	if (task->conclave == NULL) {
10077	return;
10078	}
10079
10080	if (task_should_panic_on_exit_due_to_conclave_taint(task)) {
10081	panic("Conclave tainted task %p terminated\n", task);
10082	}
10083
10084	/ Stash the task on current thread for conclave teardown /
10085	thread->conclave_stop_task = task;
10086
10087	__assert_only kern_return_t ret =
10088	exclaves_conclave_stop(task->conclave, gather_crash_bt);
10089
10090	thread->conclave_stop_task = TASK_NULL;
10091
10092	assert3u(ret, ==, KERN_SUCCESS);
10093	}
10094
10095	kern_return_t
10096	task_stop_conclave_upcall(void)
10097	{
10098	task_t task = current_task();
10099	if (task->conclave == NULL) {
10100	return KERN_INVALID_TASK;
10101	}
10102
10103	return exclaves_conclave_stop_upcall(task->conclave);
10104	}
10105
10106	kern_return_t
10107	task_stop_conclave_upcall_complete(void)
10108	{
10109	task_t task = current_task();
10110	thread_t thread = current_thread();
10111
10112	if (!(thread->th_exclaves_state & TH_EXCLAVES_STOP_UPCALL_PENDING)) {
10113	return KERN_SUCCESS;
10114	}
10115
10116	assert3p(task->conclave, !=, NULL);
10117
10118	return exclaves_conclave_stop_upcall_complete(task->conclave, task);
10119	}
10120
10121	kern_return_t
10122	task_suspend_conclave_upcall(uint64_t *scid_list, size_t scid_list_count)
10123	{
10124	task_t task = current_task();
10125	thread_t thread;
10126	int scid_count = `0`;
10127	kern_return_t kr;
10128	if (task->conclave == NULL) {
10129	return KERN_INVALID_TASK;
10130	}
10131
10132	kr = task_hold_and_wait(task);
10133
10134	task_lock(task);
10135	queue_iterate(&task->threads, thread, thread_t, task_threads)
10136	{
10137	if (thread->th_exclaves_state & TH_EXCLAVES_RPC) {
10138	scid_list[scid_count++] = thread->th_exclaves_scheduling_context_id;
10139	if (scid_count >= scid_list_count) {
10140	break;
10141	}
10142	}
10143	}
10144
10145	task_unlock(task);
10146	return kr;
10147	}
10148
10149	kern_return_t
10150	task_crash_info_conclave_upcall(task_t task, const xnuupcalls_conclavesharedbuffer_s *shared_buf,
10151	uint32_t length)
10152	{
10153	if (task->conclave == NULL) {
10154	return KERN_INVALID_TASK;
10155	}
10156
10157	/ Allocate the buffer and memcpy it /
10158	int task_crash_info_buffer_size = `0`;
10159	uint8_t * task_crash_info_buffer;
10160
10161	if (!length) {
10162	printf("Conclave upcall: task_crash_info_conclave_upcall did not return any page addresses\n");
10163	return KERN_INVALID_ARGUMENT;
10164	}
10165
10166	task_crash_info_buffer_size = CONCLAVE_CRASH_BUFFER_PAGECOUNT * PAGE_SIZE;
10167	assert3u(task_crash_info_buffer_size, >=, length);
10168
10169	task_crash_info_buffer = kalloc_data(task_crash_info_buffer_size, Z_WAITOK);
10170	if (!task_crash_info_buffer) {
10171	panic("task_crash_info_conclave_upcall: cannot allocate buffer for task_info shared memory");
10172	return KERN_INVALID_ARGUMENT;
10173	}
10174
10175	uint8_t * dst = task_crash_info_buffer;
10176	uint32_t remaining = length;
10177	for (size_t i = `0`; i < CONCLAVE_CRASH_BUFFER_PAGECOUNT; i++) {
10178	if (remaining) {
10179	memcpy(dst, (uint8_t*)phystokv((pmap_paddr_t)shared_buf->physaddr[i]), PAGE_SIZE);
10180	remaining = (remaining >= PAGE_SIZE) ? remaining - PAGE_SIZE : `0`;
10181	dst += PAGE_SIZE;
10182	}
10183	}
10184
10185	task_lock(task);
10186	if (task->exclave_crash_info == NULL && task->active) {
10187	task->exclave_crash_info = task_crash_info_buffer;
10188	task->exclave_crash_info_length = length;
10189	task_crash_info_buffer = NULL;
10190	}
10191	task_unlock(task);
10192
10193	if (task_crash_info_buffer) {
10194	kfree_data(task_crash_info_buffer, task_crash_info_buffer_size);
10195	}
10196
10197	return KERN_SUCCESS;
10198	}
10199
10200	exclaves_resource_t *
10201	task_get_conclave(task_t task)
10202	{
10203	return task->conclave;
10204	}
10205
10206	extern boolean_t IOPMRootDomainGetWillShutdown(void);
10207
10208	TUNABLE(bool, disable_conclave_taint, "disable_conclave_taint", true); / Do not taint processes when they talk to conclave, so system does not panic when exit. /
10209
10210	static bool
10211	task_should_panic_on_exit_due_to_conclave_taint(task_t task)
10212	{
10213	/ Check if boot-arg to disable conclave taint is set /
10214	if (disable_conclave_taint) {
10215	return false;
10216	}
10217
10218	/ Check if the system is shutting down /
10219	if (IOPMRootDomainGetWillShutdown()) {
10220	return false;
10221	}
10222
10223	return task_is_conclave_tainted(task);
10224	}
10225
10226	static bool
10227	task_is_conclave_tainted(task_t task)
10228	{
10229	return (task->t_exclave_state & TES_CONCLAVE_TAINTED) != `0` &&
10230	!(task->t_exclave_state & TES_CONCLAVE_UNTAINTABLE);
10231	}
10232
10233	static void
10234	task_set_conclave_taint(task_t task)
10235	{
10236	os_atomic_or(&task->t_exclave_state, TES_CONCLAVE_TAINTED, relaxed);
10237	}
10238
10239	void
10240	task_set_conclave_untaintable(task_t task)
10241	{
10242	os_atomic_or(&task->t_exclave_state, TES_CONCLAVE_UNTAINTABLE, relaxed);
10243	}
10244
10245	void
10246	task_add_conclave_crash_info(task_t task, void *crash_info_ptr)
10247	{
10248	__block kern_return_t error = KERN_SUCCESS;
10249	tb_error_t tberr = TB_ERROR_SUCCESS;
10250	void *crash_info;
10251	uint32_t crash_info_length = `0`;
10252
10253	if (task->conclave == NULL) {
10254	return;
10255	}
10256
10257	if (task->exclave_crash_info_length == `0`) {
10258	return;
10259	}
10260
10261	error = kcdata_add_container_marker(crash_info_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
10262	STACKSHOT_KCCONTAINER_EXCLAVES, `0`);
10263	if (error != KERN_SUCCESS) {
10264	return;
10265	}
10266
10267	crash_info = task->exclave_crash_info;
10268	crash_info_length = task->exclave_crash_info_length;
10269
10270	tberr = stackshot_stackshotresult__unmarshal(crash_info,
10271	(uint64_t)crash_info_length, ^(stackshot_stackshotresult_s result){
10272	error = stackshot_exclaves_process_stackshot(&result, crash_info_ptr);
10273	if (error != KERN_SUCCESS) {
10274	printf("stackshot_exclaves_process_result: error processing stackshot result %d\n", error);
10275	}
10276	});
10277	if (tberr != TB_ERROR_SUCCESS) {
10278	printf("task_conclave_crash: task_add_conclave_crash_info could not unmarshal stackshot data 0x%x\n", tberr);
10279	error = KERN_FAILURE;
10280	goto error_exit;
10281	}
10282
10283	error_exit:
10284	kcdata_add_container_marker(crash_info_ptr, KCDATA_TYPE_CONTAINER_END,
10285	STACKSHOT_KCCONTAINER_EXCLAVES, `0`);
10286
10287	return;
10288	}
10289
10290	#endif /* CONFIG_EXCLAVES */
10291
10292	#pragma mark task utils
10293
10294	/ defined in bsd/kern/kern_proc.c /
10295	extern void proc_name(int pid, char buf, int* size);
10296	extern char proc_best_name(struct* proc *p);
10297
10298	void
10299	task_procname(task_t task, char buf, int* size)
10300	{
10301	proc_name(pid: task_pid(task), buf, size);
10302	}
10303
10304	void
10305	task_best_name(task_t task, char *buf, size_t size)
10306	{
10307	char *name = proc_best_name(p: task_get_proc_raw(task));
10308	strlcpy(dst: buf, src: name, n: size);
10309	}
10310

Browse the source code of xnu/osfmk/kern/task.c