1/*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_FREE_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63/*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81/*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89#include <mach/mach_types.h>
90#include <mach/boolean.h>
91#include <mach/host_priv.h>
92#include <mach/machine/vm_types.h>
93#include <mach/vm_param.h>
94#include <mach/mach_vm.h>
95#include <mach/semaphore.h>
96#include <mach/task_info.h>
97#include <mach/task_inspect.h>
98#include <mach/task_special_ports.h>
99#include <mach/sdt.h>
100#include <mach/mach_test_upcall.h>
101
102#include <ipc/ipc_importance.h>
103#include <ipc/ipc_types.h>
104#include <ipc/ipc_space.h>
105#include <ipc/ipc_entry.h>
106#include <ipc/ipc_hash.h>
107#include <ipc/ipc_init.h>
108
109#include <kern/kern_types.h>
110#include <kern/mach_param.h>
111#include <kern/misc_protos.h>
112#include <kern/task.h>
113#include <kern/thread.h>
114#include <kern/coalition.h>
115#include <kern/zalloc.h>
116#include <kern/kalloc.h>
117#include <kern/kern_cdata.h>
118#include <kern/processor.h>
119#include <kern/recount.h>
120#include <kern/sched_prim.h> /* for thread_wakeup */
121#include <kern/ipc_tt.h>
122#include <kern/host.h>
123#include <kern/clock.h>
124#include <kern/timer.h>
125#include <kern/assert.h>
126#include <kern/affinity.h>
127#include <kern/exc_resource.h>
128#include <kern/machine.h>
129#include <kern/policy_internal.h>
130#include <kern/restartable.h>
131#include <kern/ipc_kobject.h>
132
133#include <corpses/task_corpse.h>
134#if CONFIG_TELEMETRY
135#include <kern/telemetry.h>
136#endif
137
138#if CONFIG_PERVASIVE_CPI
139#include <kern/monotonic.h>
140#include <machine/monotonic.h>
141#endif /* CONFIG_PERVASIVE_CPI */
142
143#if CONFIG_EXCLAVES
144#include "exclaves_boot.h"
145#include "exclaves_resource.h"
146#include "exclaves_boot.h"
147#include "kern/exclaves.tightbeam.h"
148#endif /* CONFIG_EXCLAVES */
149
150#include <os/log.h>
151
152#include <vm/pmap.h>
153#include <vm/vm_map.h>
154#include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
155#include <vm/vm_pageout.h>
156#include <vm/vm_protos.h>
157#include <vm/vm_purgeable_internal.h>
158#include <vm/vm_compressor_pager.h>
159#include <vm/vm_reclaim_internal.h>
160
161#include <sys/proc_ro.h>
162#include <sys/resource.h>
163#include <sys/signalvar.h> /* for coredump */
164#include <sys/bsdtask_info.h>
165#include <sys/kdebug_triage.h>
166#include <sys/code_signing.h> /* for address_space_debugged */
167/*
168 * Exported interfaces
169 */
170
171#include <mach/task_server.h>
172#include <mach/mach_host_server.h>
173#include <mach/mach_port_server.h>
174
175#include <vm/vm_shared_region.h>
176
177#include <libkern/OSDebug.h>
178#include <libkern/OSAtomic.h>
179#include <libkern/section_keywords.h>
180
181#include <mach-o/loader.h>
182#include <kdp/kdp_dyld.h>
183
184#include <kern/sfi.h> /* picks up ledger.h */
185
186#if CONFIG_MACF
187#include <security/mac_mach_internal.h>
188#endif
189
190#include <IOKit/IOBSD.h>
191#include <kdp/processor_core.h>
192
193#include <string.h>
194
195#if KPERF
196extern int kpc_force_all_ctrs(task_t, int);
197#endif
198
199SECURITY_READ_ONLY_LATE(task_t) kernel_task;
200
201int64_t next_taskuniqueid = 0;
202const size_t task_alignment = _Alignof(struct task);
203extern const size_t proc_alignment;
204extern size_t proc_struct_size;
205extern size_t proc_and_task_size;
206size_t task_struct_size;
207
208extern uint32_t ipc_control_port_options;
209
210extern int large_corpse_count;
211
212extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
213extern void task_disown_frozen_csegs(task_t owner_task);
214
215static void task_port_no_senders(ipc_port_t, mach_msg_type_number_t);
216static void task_port_with_flavor_no_senders(ipc_port_t, mach_msg_type_number_t);
217static void task_suspension_no_senders(ipc_port_t, mach_msg_type_number_t);
218static inline void task_zone_init(void);
219
220#if CONFIG_EXCLAVES
221static bool task_should_panic_on_exit_due_to_conclave_taint(task_t task);
222static bool task_is_conclave_tainted(task_t task);
223static void task_set_conclave_taint(task_t task);
224kern_return_t task_crash_info_conclave_upcall(task_t task,
225 const xnuupcalls_conclavesharedbuffer_s *shared_buf, uint32_t length);
226kern_return_t
227stackshot_exclaves_process_stackshot(const stackshot_stackshotresult_s *_Nonnull result, void *kcdata_ptr);
228#endif /* CONFIG_EXCLAVES */
229
230IPC_KOBJECT_DEFINE(IKOT_TASK_NAME);
231IPC_KOBJECT_DEFINE(IKOT_TASK_CONTROL,
232 .iko_op_no_senders = task_port_no_senders);
233IPC_KOBJECT_DEFINE(IKOT_TASK_READ,
234 .iko_op_no_senders = task_port_with_flavor_no_senders);
235IPC_KOBJECT_DEFINE(IKOT_TASK_INSPECT,
236 .iko_op_no_senders = task_port_with_flavor_no_senders);
237IPC_KOBJECT_DEFINE(IKOT_TASK_RESUME,
238 .iko_op_no_senders = task_suspension_no_senders);
239
240#if CONFIG_PROC_RESOURCE_LIMITS
241static void task_fatal_port_no_senders(ipc_port_t, mach_msg_type_number_t);
242static mach_port_t task_allocate_fatal_port(void);
243
244IPC_KOBJECT_DEFINE(IKOT_TASK_FATAL,
245 .iko_op_stable = true,
246 .iko_op_no_senders = task_fatal_port_no_senders);
247
248extern void task_id_token_set_port(task_id_token_t token, ipc_port_t port);
249#endif /* CONFIG_PROC_RESOURCE_LIMITS */
250
251/* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
252int audio_active = 0;
253
254/*
255 * structure for tracking zone usage
256 * Used either one per task/thread for all zones or <per-task,per-zone>.
257 */
258typedef struct zinfo_usage_store_t {
259 /* These fields may be updated atomically, and so must be 8 byte aligned */
260 uint64_t alloc __attribute__((aligned(8))); /* allocation counter */
261 uint64_t free __attribute__((aligned(8))); /* free counter */
262} zinfo_usage_store_t;
263
264/**
265 * Return codes related to diag threshold and memory limit
266 */
267__options_decl(diagthreshold_check_return, int, {
268 THRESHOLD_IS_SAME_AS_LIMIT_FLAG_DISABLED = 0,
269 THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED = 1,
270 THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED = 2,
271 THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_ENABLED = 3,
272});
273
274/**
275 * Return codes related to diag threshold and memory limit
276 */
277__options_decl(current_, int, {
278 THRESHOLD_IS_SAME_AS_LIMIT = 0,
279 THRESHOLD_IS_NOT_SAME_AS_LIMIT = 1
280});
281
282zinfo_usage_store_t tasks_tkm_private;
283zinfo_usage_store_t tasks_tkm_shared;
284
285/* A container to accumulate statistics for expired tasks */
286expired_task_statistics_t dead_task_statistics;
287LCK_SPIN_DECLARE_ATTR(dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
288
289ledger_template_t task_ledger_template = NULL;
290
291/* global lock for task_dyld_process_info_notify_{register, deregister, get_trap} */
292LCK_GRP_DECLARE(g_dyldinfo_mtx_grp, "g_dyldinfo");
293LCK_MTX_DECLARE(g_dyldinfo_mtx, &g_dyldinfo_mtx_grp);
294
295SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
296{.cpu_time = -1,
297 .tkm_private = -1,
298 .tkm_shared = -1,
299 .phys_mem = -1,
300 .wired_mem = -1,
301 .internal = -1,
302 .iokit_mapped = -1,
303 .external = -1,
304 .reusable = -1,
305 .alternate_accounting = -1,
306 .alternate_accounting_compressed = -1,
307 .page_table = -1,
308 .phys_footprint = -1,
309 .internal_compressed = -1,
310 .purgeable_volatile = -1,
311 .purgeable_nonvolatile = -1,
312 .purgeable_volatile_compressed = -1,
313 .purgeable_nonvolatile_compressed = -1,
314 .tagged_nofootprint = -1,
315 .tagged_footprint = -1,
316 .tagged_nofootprint_compressed = -1,
317 .tagged_footprint_compressed = -1,
318 .network_volatile = -1,
319 .network_nonvolatile = -1,
320 .network_volatile_compressed = -1,
321 .network_nonvolatile_compressed = -1,
322 .media_nofootprint = -1,
323 .media_footprint = -1,
324 .media_nofootprint_compressed = -1,
325 .media_footprint_compressed = -1,
326 .graphics_nofootprint = -1,
327 .graphics_footprint = -1,
328 .graphics_nofootprint_compressed = -1,
329 .graphics_footprint_compressed = -1,
330 .neural_nofootprint = -1,
331 .neural_footprint = -1,
332 .neural_nofootprint_compressed = -1,
333 .neural_footprint_compressed = -1,
334 .platform_idle_wakeups = -1,
335 .interrupt_wakeups = -1,
336#if CONFIG_SCHED_SFI
337 .sfi_wait_times = { 0 /* initialized at runtime */},
338#endif /* CONFIG_SCHED_SFI */
339 .cpu_time_billed_to_me = -1,
340 .cpu_time_billed_to_others = -1,
341 .physical_writes = -1,
342 .logical_writes = -1,
343 .logical_writes_to_external = -1,
344#if DEBUG || DEVELOPMENT
345 .pages_grabbed = -1,
346 .pages_grabbed_kern = -1,
347 .pages_grabbed_iopl = -1,
348 .pages_grabbed_upl = -1,
349#endif
350#if CONFIG_FREEZE
351 .frozen_to_swap = -1,
352#endif /* CONFIG_FREEZE */
353 .energy_billed_to_me = -1,
354 .energy_billed_to_others = -1,
355#if CONFIG_PHYS_WRITE_ACCT
356 .fs_metadata_writes = -1,
357#endif /* CONFIG_PHYS_WRITE_ACCT */
358#if CONFIG_MEMORYSTATUS
359 .memorystatus_dirty_time = -1,
360#endif /* CONFIG_MEMORYSTATUS */
361 .swapins = -1,
362 .conclave_mem = -1, };
363
364/* System sleep state */
365boolean_t tasks_suspend_state;
366
367__options_decl(send_exec_resource_is_fatal, bool, {
368 IS_NOT_FATAL = false,
369 IS_FATAL = true
370});
371
372__options_decl(send_exec_resource_is_diagnostics, bool, {
373 IS_NOT_DIAGNOSTICS = false,
374 IS_DIAGNOSTICS = true
375});
376
377__options_decl(send_exec_resource_is_warning, bool, {
378 IS_NOT_WARNING = false,
379 IS_WARNING = true
380});
381
382__options_decl(send_exec_resource_options_t, uint8_t, {
383 EXEC_RESOURCE_FATAL = 0x01,
384 EXEC_RESOURCE_DIAGNOSTIC = 0x02,
385 EXEC_RESOURCE_WARNING = 0x04,
386});
387
388/**
389 * Actions to take when a process has reached the memory limit or the diagnostics threshold limits
390 */
391static inline void task_process_crossed_limit_no_diag(task_t task, ledger_amount_t ledger_limit_size, bool memlimit_is_fatal, bool memlimit_is_active, send_exec_resource_is_warning is_warning);
392#if DEBUG || DEVELOPMENT
393static inline void task_process_crossed_limit_diag(ledger_amount_t ledger_limit_size);
394#endif
395void init_task_ledgers(void);
396void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
397void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
398void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
399void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
400void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, send_exec_resource_options_t exception_options);
401void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
402#if CONFIG_PROC_RESOURCE_LIMITS
403void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit);
404mach_port_name_t current_task_get_fatal_port_name(void);
405void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_KQWORKLOOPS(task_t task, int current_size, int soft_limit, int hard_limit);
406#endif /* CONFIG_PROC_RESOURCE_LIMITS */
407
408kern_return_t task_suspend_internal_locked(task_t);
409kern_return_t task_suspend_internal(task_t);
410kern_return_t task_resume_internal_locked(task_t);
411kern_return_t task_resume_internal(task_t);
412static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
413
414extern kern_return_t iokit_task_terminate(task_t task, int phase);
415extern void iokit_task_app_suspended_changed(task_t task);
416
417extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
418extern void bsd_copythreadname(void *dst_uth, void *src_uth);
419extern kern_return_t thread_resume(thread_t thread);
420
421extern int exit_with_port_space_exception(void *proc, mach_exception_code_t code, mach_exception_subcode_t subcode);
422
423// Condition to include diag footprints
424#define RESETTABLE_DIAG_FOOTPRINT_LIMITS ((DEBUG || DEVELOPMENT) && CONFIG_MEMORYSTATUS)
425
426// Warn tasks when they hit 80% of their memory limit.
427#define PHYS_FOOTPRINT_WARNING_LEVEL 80
428
429#define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
430#define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
431
432/*
433 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
434 *
435 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
436 * stacktraces, aka micro-stackshots)
437 */
438#define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
439
440int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
441int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
442
443unsigned int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
444
445TUNABLE(bool, disable_exc_resource, "disable_exc_resource", false); /* Global override to suppress EXC_RESOURCE for resource monitor violations. */
446TUNABLE(bool, disable_exc_resource_during_audio, "disable_exc_resource_during_audio", true); /* Global override to suppress EXC_RESOURCE while audio is active */
447
448ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
449unsigned int max_task_footprint_warning_level = 0; /* Per-task limit warning percentage */
450
451/*
452 * Configure per-task memory limit.
453 * The boot-arg is interpreted as Megabytes,
454 * and takes precedence over the device tree.
455 * Setting the boot-arg to 0 disables task limits.
456 */
457TUNABLE_DT_WRITEABLE(int, max_task_footprint_mb, "/defaults", "kern.max_task_pmem", "max_task_pmem", 0, TUNABLE_DT_NONE);
458
459/* I/O Monitor Limits */
460#define IOMON_DEFAULT_LIMIT (20480ull) /* MB of logical/physical I/O */
461#define IOMON_DEFAULT_INTERVAL (86400ull) /* in seconds */
462
463uint64_t task_iomon_limit_mb; /* Per-task I/O monitor limit in MBs */
464uint64_t task_iomon_interval_secs; /* Per-task I/O monitor interval in secs */
465
466#define IO_TELEMETRY_DEFAULT_LIMIT (10ll * 1024ll * 1024ll)
467int64_t io_telemetry_limit; /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
468int64_t global_logical_writes_count = 0; /* Global count for logical writes */
469int64_t global_logical_writes_to_external_count = 0; /* Global count for logical writes to external storage*/
470static boolean_t global_update_logical_writes(int64_t, int64_t*);
471
472#if DEBUG || DEVELOPMENT
473static diagthreshold_check_return task_check_memorythreshold_is_valid(task_t task, uint64_t new_limit, bool is_diagnostics_value);
474#endif
475#define TASK_MAX_THREAD_LIMIT 256
476
477#if MACH_ASSERT
478int pmap_ledgers_panic = 1;
479int pmap_ledgers_panic_leeway = 3;
480#endif /* MACH_ASSERT */
481
482int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
483
484#if CONFIG_COREDUMP
485int hwm_user_cores = 0; /* high watermark violations generate user core files */
486#endif
487
488#ifdef MACH_BSD
489extern uint32_t proc_platform(const struct proc *);
490extern uint32_t proc_sdk(struct proc *);
491extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
492extern int proc_pid(struct proc *p);
493extern int proc_selfpid(void);
494extern struct proc *current_proc(void);
495extern char *proc_name_address(struct proc *p);
496extern uint64_t get_dispatchqueue_offset_from_proc(void *);
497extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, uint32_t bufsize);
498extern void workq_proc_suspended(struct proc *p);
499extern void workq_proc_resumed(struct proc *p);
500extern struct proc *kernproc;
501
502#if CONFIG_MEMORYSTATUS
503extern void proc_memstat_skip(struct proc* p, boolean_t set);
504extern void memorystatus_on_ledger_footprint_exceeded(int warning, bool memlimit_is_active, bool memlimit_is_fatal);
505extern void memorystatus_log_exception(const int max_footprint_mb, bool memlimit_is_active, bool memlimit_is_fatal);
506extern void memorystatus_log_diag_threshold_exception(const int diag_threshold_value);
507extern boolean_t memorystatus_allowed_vm_map_fork(task_t task, bool *is_large);
508extern uint64_t memorystatus_available_memory_internal(struct proc *p);
509
510#if DEVELOPMENT || DEBUG
511extern void memorystatus_abort_vm_map_fork(task_t);
512#endif
513
514#endif /* CONFIG_MEMORYSTATUS */
515
516#endif /* MACH_BSD */
517
518/* Boot-arg that turns on fatal pac exception delivery for all first-party apps */
519static TUNABLE(bool, enable_pac_exception, "enable_pac_exception", false);
520
521/*
522 * Defaults for controllable EXC_GUARD behaviors
523 *
524 * Internal builds are fatal by default (except BRIDGE).
525 * Create an alternate set of defaults for special processes by name.
526 */
527struct task_exc_guard_named_default {
528 char *name;
529 uint32_t behavior;
530};
531#define _TASK_EXC_GUARD_MP_CORPSE (TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_CORPSE)
532#define _TASK_EXC_GUARD_MP_ONCE (_TASK_EXC_GUARD_MP_CORPSE | TASK_EXC_GUARD_MP_ONCE)
533#define _TASK_EXC_GUARD_MP_FATAL (TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_FATAL)
534
535#define _TASK_EXC_GUARD_VM_CORPSE (TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_ONCE)
536#define _TASK_EXC_GUARD_VM_ONCE (_TASK_EXC_GUARD_VM_CORPSE | TASK_EXC_GUARD_VM_ONCE)
537#define _TASK_EXC_GUARD_VM_FATAL (TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_FATAL)
538
539#define _TASK_EXC_GUARD_ALL_CORPSE (_TASK_EXC_GUARD_MP_CORPSE | _TASK_EXC_GUARD_VM_CORPSE)
540#define _TASK_EXC_GUARD_ALL_ONCE (_TASK_EXC_GUARD_MP_ONCE | _TASK_EXC_GUARD_VM_ONCE)
541#define _TASK_EXC_GUARD_ALL_FATAL (_TASK_EXC_GUARD_MP_FATAL | _TASK_EXC_GUARD_VM_FATAL)
542
543/* cannot turn off FATAL and DELIVER bit if set */
544uint32_t task_exc_guard_no_unset_mask = TASK_EXC_GUARD_MP_FATAL | TASK_EXC_GUARD_VM_FATAL |
545 TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_VM_DELIVER;
546/* cannot turn on ONCE bit if unset */
547uint32_t task_exc_guard_no_set_mask = TASK_EXC_GUARD_MP_ONCE | TASK_EXC_GUARD_VM_ONCE;
548
549#if !defined(XNU_TARGET_OS_BRIDGE)
550
551uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_FATAL;
552uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL | TASK_EXC_GUARD_VM_ALL;
553/*
554 * These "by-process-name" default overrides are intended to be a short-term fix to
555 * quickly get over races between changes introducing new EXC_GUARD raising behaviors
556 * in some process and a change in default behavior for same. We should ship with
557 * these lists empty (by fixing the bugs, or explicitly changing the task's EXC_GUARD
558 * exception behavior via task_set_exc_guard_behavior()).
559 *
560 * XXX Remember to add/remove TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS back to
561 * task_exc_guard_default when transitioning this list between empty and
562 * non-empty.
563 */
564static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
565
566#else /* !defined(XNU_TARGET_OS_BRIDGE) */
567
568uint32_t task_exc_guard_default = _TASK_EXC_GUARD_ALL_ONCE;
569uint32_t task_exc_guard_config_mask = TASK_EXC_GUARD_MP_ALL | TASK_EXC_GUARD_VM_ALL;
570static struct task_exc_guard_named_default task_exc_guard_named_defaults[] = {};
571
572#endif /* !defined(XNU_TARGET_OS_BRIDGE) */
573
574/* Forwards */
575
576static void task_hold_locked(task_t task);
577static void task_wait_locked(task_t task, boolean_t until_not_runnable);
578static void task_release_locked(task_t task);
579extern task_t proc_get_task_raw(void *proc);
580extern void task_ref_hold_proc_task_struct(task_t task);
581extern void task_release_proc_task_struct(task_t task);
582
583static void task_synchronizer_destroy_all(task_t task);
584static os_ref_count_t
585task_add_turnstile_watchports_locked(
586 task_t task,
587 struct task_watchports *watchports,
588 struct task_watchport_elem **previous_elem_array,
589 ipc_port_t *portwatch_ports,
590 uint32_t portwatch_count);
591
592static os_ref_count_t
593task_remove_turnstile_watchports_locked(
594 task_t task,
595 struct task_watchports *watchports,
596 ipc_port_t *port_freelist);
597
598static struct task_watchports *
599task_watchports_alloc_init(
600 task_t task,
601 thread_t thread,
602 uint32_t count);
603
604static void
605task_watchports_deallocate(
606 struct task_watchports *watchports);
607
608__attribute__((always_inline)) inline void
609task_lock(task_t task)
610{
611 lck_mtx_lock(lck: &(task)->lock);
612}
613
614__attribute__((always_inline)) inline void
615task_unlock(task_t task)
616{
617 lck_mtx_unlock(lck: &(task)->lock);
618}
619
620void
621task_set_64bit(
622 task_t task,
623 boolean_t is_64bit,
624 boolean_t is_64bit_data)
625{
626#if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
627 thread_t thread;
628#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
629
630 task_lock(task);
631
632 /*
633 * Switching to/from 64-bit address spaces
634 */
635 if (is_64bit) {
636 if (!task_has_64Bit_addr(task)) {
637 task_set_64Bit_addr(task);
638 }
639 } else {
640 if (task_has_64Bit_addr(task)) {
641 task_clear_64Bit_addr(task);
642 }
643 }
644
645 /*
646 * Switching to/from 64-bit register state.
647 */
648 if (is_64bit_data) {
649 if (task_has_64Bit_data(task)) {
650 goto out;
651 }
652
653 task_set_64Bit_data(task);
654 } else {
655 if (!task_has_64Bit_data(task)) {
656 goto out;
657 }
658
659 task_clear_64Bit_data(task);
660 }
661
662 /* FIXME: On x86, the thread save state flavor can diverge from the
663 * task's 64-bit feature flag due to the 32-bit/64-bit register save
664 * state dichotomy. Since we can be pre-empted in this interval,
665 * certain routines may observe the thread as being in an inconsistent
666 * state with respect to its task's 64-bitness.
667 */
668
669#if defined(__x86_64__) || defined(__arm64__)
670 queue_iterate(&task->threads, thread, thread_t, task_threads) {
671 thread_mtx_lock(thread);
672 machine_thread_switch_addrmode(thread);
673 thread_mtx_unlock(thread);
674 }
675#endif /* defined(__x86_64__) || defined(__arm64__) */
676
677out:
678 task_unlock(task);
679}
680
681bool
682task_get_64bit_addr(task_t task)
683{
684 return task_has_64Bit_addr(task);
685}
686
687bool
688task_get_64bit_data(task_t task)
689{
690 return task_has_64Bit_data(task);
691}
692
693void
694task_set_platform_binary(
695 task_t task,
696 boolean_t is_platform)
697{
698 if (is_platform) {
699 task_ro_flags_set(task, TFRO_PLATFORM);
700 } else {
701 task_ro_flags_clear(task, TFRO_PLATFORM);
702 }
703}
704
705#if XNU_TARGET_OS_OSX
706#if DEVELOPMENT || DEBUG
707SECURITY_READ_ONLY_LATE(bool) AMFI_bootarg_disable_mach_hardening = false;
708#endif /* DEVELOPMENT || DEBUG */
709
710void
711task_disable_mach_hardening(task_t task)
712{
713 task_ro_flags_set(task, TFRO_MACH_HARDENING_OPT_OUT);
714}
715
716bool
717task_opted_out_mach_hardening(task_t task)
718{
719 return task_ro_flags_get(task) & TFRO_MACH_HARDENING_OPT_OUT;
720}
721#endif /* XNU_TARGET_OS_OSX */
722
723/*
724 * Use the `task_is_hardened_binary` macro below
725 * when applying new security policies.
726 *
727 * Kernel security policies now generally apply to
728 * "hardened binaries" - which are platform binaries, and
729 * third party binaries who adopt hardened runtime on ios.
730 */
731boolean_t
732task_get_platform_binary(task_t task)
733{
734 return (task_ro_flags_get(task) & TFRO_PLATFORM) != 0;
735}
736
737static boolean_t
738task_get_hardened_runtime(task_t task)
739{
740 return (task_ro_flags_get(task) & TFRO_HARDENED) != 0;
741}
742
743boolean_t
744task_is_hardened_binary(task_t task)
745{
746 return task_get_platform_binary(task) ||
747 task_get_hardened_runtime(task);
748}
749
750void
751task_set_hardened_runtime(
752 task_t task,
753 bool is_hardened)
754{
755 if (is_hardened) {
756 task_ro_flags_set(task, TFRO_HARDENED);
757 } else {
758 task_ro_flags_clear(task, TFRO_HARDENED);
759 }
760}
761
762boolean_t
763task_is_a_corpse(task_t task)
764{
765 return (task_ro_flags_get(task) & TFRO_CORPSE) != 0;
766}
767
768boolean_t
769task_is_ipc_active(task_t task)
770{
771 return task->ipc_active;
772}
773
774void
775task_set_corpse(task_t task)
776{
777 return task_ro_flags_set(task, TFRO_CORPSE);
778}
779
780void
781task_set_immovable_pinned(task_t task)
782{
783 ipc_task_set_immovable_pinned(task);
784}
785
786/*
787 * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
788 * Returns "false" if flag is already set, and "true" in other cases.
789 */
790bool
791task_set_ca_client_wi(
792 task_t task,
793 boolean_t set_or_clear)
794{
795 bool ret = true;
796 task_lock(task);
797 if (set_or_clear) {
798 /* Tasks can have only one CA_CLIENT work interval */
799 if (task->t_flags & TF_CA_CLIENT_WI) {
800 ret = false;
801 } else {
802 task->t_flags |= TF_CA_CLIENT_WI;
803 }
804 } else {
805 task->t_flags &= ~TF_CA_CLIENT_WI;
806 }
807 task_unlock(task);
808 return ret;
809}
810
811/*
812 * task_set_dyld_info() is called at most three times.
813 * 1) at task struct creation to set addr/size to zero.
814 * 2) in mach_loader.c to set location of __all_image_info section in loaded dyld
815 * 3) is from dyld itself to update location of all_image_info
816 * For security any calls after that are ignored. The TF_DYLD_ALL_IMAGE_SET bit is used to determine state.
817 */
818kern_return_t
819task_set_dyld_info(
820 task_t task,
821 mach_vm_address_t addr,
822 mach_vm_size_t size)
823{
824 mach_vm_address_t end;
825 if (os_add_overflow(addr, size, &end)) {
826 return KERN_FAILURE;
827 }
828
829 task_lock(task);
830 /* don't accept updates if all_image_info_addr is final */
831 if ((task->t_flags & TF_DYLD_ALL_IMAGE_FINAL) == 0) {
832 bool inputNonZero = ((addr != 0) || (size != 0));
833 bool currentNonZero = ((task->all_image_info_addr != 0) || (task->all_image_info_size != 0));
834 task->all_image_info_addr = addr;
835 task->all_image_info_size = size;
836 /* can only change from a non-zero value to another non-zero once */
837 if (inputNonZero && currentNonZero) {
838 task->t_flags |= TF_DYLD_ALL_IMAGE_FINAL;
839 }
840 task_unlock(task);
841 return KERN_SUCCESS;
842 } else {
843 task_unlock(task);
844 return KERN_FAILURE;
845 }
846}
847
848bool
849task_donates_own_pages(
850 task_t task)
851{
852 return task->donates_own_pages;
853}
854
855void
856task_set_mach_header_address(
857 task_t task,
858 mach_vm_address_t addr)
859{
860 task_lock(task);
861 task->mach_header_vm_address = addr;
862 task_unlock(task);
863}
864
865void
866task_bank_reset(__unused task_t task)
867{
868 if (task->bank_context != NULL) {
869 bank_task_destroy(task);
870 }
871}
872
873/*
874 * NOTE: This should only be called when the P_LINTRANSIT
875 * flag is set (the proc_trans lock is held) on the
876 * proc associated with the task.
877 */
878void
879task_bank_init(__unused task_t task)
880{
881 if (task->bank_context != NULL) {
882 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
883 }
884 bank_task_initialize(task);
885}
886
887void
888task_set_did_exec_flag(task_t task)
889{
890 task->t_procflags |= TPF_DID_EXEC;
891}
892
893void
894task_clear_exec_copy_flag(task_t task)
895{
896 task->t_procflags &= ~TPF_EXEC_COPY;
897}
898
899event_t
900task_get_return_wait_event(task_t task)
901{
902 return (event_t)&task->returnwait_inheritor;
903}
904
905void
906task_clear_return_wait(task_t task, uint32_t flags)
907{
908 if (flags & TCRW_CLEAR_INITIAL_WAIT) {
909 thread_wakeup(task_get_return_wait_event(task));
910 }
911
912 if (flags & TCRW_CLEAR_FINAL_WAIT) {
913 is_write_lock(task->itk_space);
914
915 task->t_returnwaitflags &= ~TRW_LRETURNWAIT;
916 task->returnwait_inheritor = NULL;
917
918 if (flags & TCRW_CLEAR_EXEC_COMPLETE) {
919 task->t_returnwaitflags &= ~TRW_LEXEC_COMPLETE;
920 }
921
922 if (task->t_returnwaitflags & TRW_LRETURNWAITER) {
923 struct turnstile *turnstile = turnstile_prepare_hash(proprietor: (uintptr_t) task_get_return_wait_event(task),
924 type: TURNSTILE_ULOCK);
925
926 waitq_wakeup64_all(waitq: &turnstile->ts_waitq,
927 CAST_EVENT64_T(task_get_return_wait_event(task)),
928 THREAD_AWAKENED, flags: WAITQ_UPDATE_INHERITOR);
929
930 turnstile_update_inheritor_complete(turnstile, flags: TURNSTILE_INTERLOCK_HELD);
931
932 turnstile_complete_hash(proprietor: (uintptr_t) task_get_return_wait_event(task), type: TURNSTILE_ULOCK);
933 turnstile_cleanup();
934 task->t_returnwaitflags &= ~TRW_LRETURNWAITER;
935 }
936 is_write_unlock(task->itk_space);
937 }
938}
939
940void __attribute__((noreturn))
941task_wait_to_return(void)
942{
943 task_t task = current_task();
944 uint8_t returnwaitflags;
945
946 is_write_lock(task->itk_space);
947
948 if (task->t_returnwaitflags & TRW_LRETURNWAIT) {
949 struct turnstile *turnstile = turnstile_prepare_hash(proprietor: (uintptr_t) task_get_return_wait_event(task),
950 type: TURNSTILE_ULOCK);
951
952 do {
953 task->t_returnwaitflags |= TRW_LRETURNWAITER;
954 turnstile_update_inheritor(turnstile, new_inheritor: task->returnwait_inheritor,
955 flags: (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
956
957 waitq_assert_wait64(waitq: &turnstile->ts_waitq,
958 CAST_EVENT64_T(task_get_return_wait_event(task)),
959 THREAD_UNINT, TIMEOUT_WAIT_FOREVER);
960
961 is_write_unlock(task->itk_space);
962
963 turnstile_update_inheritor_complete(turnstile, flags: TURNSTILE_INTERLOCK_NOT_HELD);
964
965 thread_block(THREAD_CONTINUE_NULL);
966
967 is_write_lock(task->itk_space);
968 } while (task->t_returnwaitflags & TRW_LRETURNWAIT);
969
970 turnstile_complete_hash(proprietor: (uintptr_t) task_get_return_wait_event(task), type: TURNSTILE_ULOCK);
971 }
972
973 returnwaitflags = task->t_returnwaitflags;
974 is_write_unlock(task->itk_space);
975 turnstile_cleanup();
976
977
978#if CONFIG_MACF
979 /*
980 * Before jumping to userspace and allowing this process
981 * to execute any code, make sure its credentials are cached,
982 * and notify any interested parties.
983 */
984 extern void current_cached_proc_cred_update(void);
985
986 current_cached_proc_cred_update();
987 if (returnwaitflags & TRW_LEXEC_COMPLETE) {
988 mac_proc_notify_exec_complete(proc: current_proc());
989 }
990#endif
991
992 thread_bootstrap_return();
993}
994
995boolean_t
996task_is_exec_copy(task_t task)
997{
998 return task_is_exec_copy_internal(task);
999}
1000
1001boolean_t
1002task_did_exec(task_t task)
1003{
1004 return task_did_exec_internal(task);
1005}
1006
1007boolean_t
1008task_is_active(task_t task)
1009{
1010 return task->active;
1011}
1012
1013boolean_t
1014task_is_halting(task_t task)
1015{
1016 return task->halting;
1017}
1018
1019void
1020task_init(void)
1021{
1022 if (max_task_footprint_mb != 0) {
1023#if CONFIG_MEMORYSTATUS
1024 if (max_task_footprint_mb < 50) {
1025 printf(format: "Warning: max_task_pmem %d below minimum.\n",
1026 max_task_footprint_mb);
1027 max_task_footprint_mb = 50;
1028 }
1029 printf(format: "Limiting task physical memory footprint to %d MB\n",
1030 max_task_footprint_mb);
1031
1032 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
1033
1034 /*
1035 * Configure the per-task memory limit warning level.
1036 * This is computed as a percentage.
1037 */
1038 max_task_footprint_warning_level = 0;
1039
1040 if (max_mem < 0x40000000) {
1041 /*
1042 * On devices with < 1GB of memory:
1043 * -- set warnings to 50MB below the per-task limit.
1044 */
1045 if (max_task_footprint_mb > 50) {
1046 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
1047 }
1048 } else {
1049 /*
1050 * On devices with >= 1GB of memory:
1051 * -- set warnings to 100MB below the per-task limit.
1052 */
1053 if (max_task_footprint_mb > 100) {
1054 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
1055 }
1056 }
1057
1058 /*
1059 * Never allow warning level to land below the default.
1060 */
1061 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
1062 max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
1063 }
1064
1065 printf(format: "Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
1066
1067#else
1068 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
1069#endif /* CONFIG_MEMORYSTATUS */
1070 }
1071
1072#if DEVELOPMENT || DEBUG
1073 PE_parse_boot_argn("task_exc_guard_default",
1074 &task_exc_guard_default,
1075 sizeof(task_exc_guard_default));
1076#endif /* DEVELOPMENT || DEBUG */
1077
1078#if CONFIG_COREDUMP
1079 if (!PE_parse_boot_argn(arg_string: "hwm_user_cores", arg_ptr: &hwm_user_cores,
1080 max_arg: sizeof(hwm_user_cores))) {
1081 hwm_user_cores = 0;
1082 }
1083#endif
1084
1085 proc_init_cpumon_params();
1086
1087 if (!PE_parse_boot_argn(arg_string: "task_wakeups_monitor_rate", arg_ptr: &task_wakeups_monitor_rate, max_arg: sizeof(task_wakeups_monitor_rate))) {
1088 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
1089 }
1090
1091 if (!PE_parse_boot_argn(arg_string: "task_wakeups_monitor_interval", arg_ptr: &task_wakeups_monitor_interval, max_arg: sizeof(task_wakeups_monitor_interval))) {
1092 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
1093 }
1094
1095 if (!PE_parse_boot_argn(arg_string: "task_wakeups_monitor_ustackshots_trigger_pct", arg_ptr: &task_wakeups_monitor_ustackshots_trigger_pct,
1096 max_arg: sizeof(task_wakeups_monitor_ustackshots_trigger_pct))) {
1097 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
1098 }
1099
1100 if (!PE_parse_boot_argn(arg_string: "task_iomon_limit_mb", arg_ptr: &task_iomon_limit_mb, max_arg: sizeof(task_iomon_limit_mb))) {
1101 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
1102 }
1103
1104 if (!PE_parse_boot_argn(arg_string: "task_iomon_interval_secs", arg_ptr: &task_iomon_interval_secs, max_arg: sizeof(task_iomon_interval_secs))) {
1105 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
1106 }
1107
1108 if (!PE_parse_boot_argn(arg_string: "io_telemetry_limit", arg_ptr: &io_telemetry_limit, max_arg: sizeof(io_telemetry_limit))) {
1109 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
1110 }
1111
1112/*
1113 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
1114 * sets up the ledgers for the default coalition. If we don't have coalitions,
1115 * then we have to call it now.
1116 */
1117#if CONFIG_COALITIONS
1118 assert(task_ledger_template);
1119#else /* CONFIG_COALITIONS */
1120 init_task_ledgers();
1121#endif /* CONFIG_COALITIONS */
1122
1123 task_ref_init();
1124 task_zone_init();
1125
1126#ifdef __LP64__
1127 boolean_t is_64bit = TRUE;
1128#else
1129 boolean_t is_64bit = FALSE;
1130#endif
1131
1132 kernproc = (struct proc *)zalloc_flags(proc_task_zone, Z_WAITOK | Z_ZERO);
1133 kernel_task = proc_get_task_raw(proc: kernproc);
1134
1135 /*
1136 * Create the kernel task as the first task.
1137 */
1138 if (task_create_internal(TASK_NULL, NULL, NULL, FALSE, is_64bit,
1139 is_64bit_data: is_64bit, TF_NONE, TF_NONE, TPF_NONE, TWF_NONE, child_task: kernel_task) != KERN_SUCCESS) {
1140 panic("task_init");
1141 }
1142
1143 ipc_task_enable(task: kernel_task);
1144
1145#if defined(HAS_APPLE_PAC)
1146 kernel_task->rop_pid = ml_default_rop_pid();
1147 kernel_task->jop_pid = ml_default_jop_pid();
1148 // kernel_task never runs at EL0, but machine_thread_state_convert_from/to_user() relies on
1149 // disable_user_jop to be false for kernel threads (e.g. in exception delivery on thread_exception_daemon)
1150 ml_task_set_disable_user_jop(task: kernel_task, FALSE);
1151#endif
1152
1153 vm_map_deallocate(map: kernel_task->map);
1154 kernel_task->map = kernel_map;
1155}
1156
1157static inline void
1158task_zone_init(void)
1159{
1160 proc_struct_size = roundup(proc_struct_size, task_alignment);
1161 task_struct_size = roundup(sizeof(struct task), proc_alignment);
1162 proc_and_task_size = proc_struct_size + task_struct_size;
1163
1164 proc_task_zone = zone_create_ext(name: "proc_task", size: proc_and_task_size,
1165 flags: ZC_ZFREE_CLEARMEM | ZC_SEQUESTER, desired_zid: ZONE_ID_PROC_TASK, NULL); /* sequester is needed for proc_rele() */
1166}
1167
1168/*
1169 * Task ledgers
1170 * ------------
1171 *
1172 * phys_footprint
1173 * Physical footprint: This is the sum of:
1174 * + (internal - alternate_accounting)
1175 * + (internal_compressed - alternate_accounting_compressed)
1176 * + iokit_mapped
1177 * + purgeable_nonvolatile
1178 * + purgeable_nonvolatile_compressed
1179 * + page_table
1180 *
1181 * internal
1182 * The task's anonymous memory, which on iOS is always resident.
1183 *
1184 * internal_compressed
1185 * Amount of this task's internal memory which is held by the compressor.
1186 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
1187 * and could be either decompressed back into memory, or paged out to storage, depending
1188 * on our implementation.
1189 *
1190 * iokit_mapped
1191 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
1192 * clean/dirty or internal/external state].
1193 *
1194 * alternate_accounting
1195 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
1196 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
1197 * double counting.
1198 *
1199 * pages_grabbed
1200 * pages_grabbed counts all page grabs in a task. It is also broken out into three subtypes
1201 * which track UPL, IOPL and Kernel page grabs.
1202 */
1203void
1204init_task_ledgers(void)
1205{
1206 ledger_template_t t;
1207
1208 assert(task_ledger_template == NULL);
1209 assert(kernel_task == TASK_NULL);
1210
1211#if MACH_ASSERT
1212 PE_parse_boot_argn("pmap_ledgers_panic",
1213 &pmap_ledgers_panic,
1214 sizeof(pmap_ledgers_panic));
1215 PE_parse_boot_argn("pmap_ledgers_panic_leeway",
1216 &pmap_ledgers_panic_leeway,
1217 sizeof(pmap_ledgers_panic_leeway));
1218#endif /* MACH_ASSERT */
1219
1220 if ((t = ledger_template_create(name: "Per-task ledger")) == NULL) {
1221 panic("couldn't create task ledger template");
1222 }
1223
1224 task_ledgers.cpu_time = ledger_entry_add(template: t, key: "cpu_time", group: "sched", units: "ns");
1225 task_ledgers.tkm_private = ledger_entry_add(template: t, key: "tkm_private",
1226 group: "physmem", units: "bytes");
1227 task_ledgers.tkm_shared = ledger_entry_add(template: t, key: "tkm_shared", group: "physmem",
1228 units: "bytes");
1229 task_ledgers.phys_mem = ledger_entry_add(template: t, key: "phys_mem", group: "physmem",
1230 units: "bytes");
1231 task_ledgers.wired_mem = ledger_entry_add(template: t, key: "wired_mem", group: "physmem",
1232 units: "bytes");
1233 task_ledgers.conclave_mem = ledger_entry_add_with_flags(template: t, key: "conclave_mem", group: "physmem", units: "count",
1234 flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE | LEDGER_ENTRY_ALLOW_DEBIT);
1235 task_ledgers.internal = ledger_entry_add(template: t, key: "internal", group: "physmem",
1236 units: "bytes");
1237 task_ledgers.iokit_mapped = ledger_entry_add_with_flags(template: t, key: "iokit_mapped", group: "mappings",
1238 units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1239 task_ledgers.alternate_accounting = ledger_entry_add_with_flags(template: t, key: "alternate_accounting", group: "physmem",
1240 units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1241 task_ledgers.alternate_accounting_compressed = ledger_entry_add_with_flags(template: t, key: "alternate_accounting_compressed", group: "physmem",
1242 units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1243 task_ledgers.page_table = ledger_entry_add_with_flags(template: t, key: "page_table", group: "physmem",
1244 units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1245 task_ledgers.phys_footprint = ledger_entry_add(template: t, key: "phys_footprint", group: "physmem",
1246 units: "bytes");
1247 task_ledgers.internal_compressed = ledger_entry_add(template: t, key: "internal_compressed", group: "physmem",
1248 units: "bytes");
1249 task_ledgers.reusable = ledger_entry_add(template: t, key: "reusable", group: "physmem", units: "bytes");
1250 task_ledgers.external = ledger_entry_add(template: t, key: "external", group: "physmem", units: "bytes");
1251 task_ledgers.purgeable_volatile = ledger_entry_add_with_flags(template: t, key: "purgeable_volatile", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1252 task_ledgers.purgeable_nonvolatile = ledger_entry_add_with_flags(template: t, key: "purgeable_nonvolatile", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1253 task_ledgers.purgeable_volatile_compressed = ledger_entry_add_with_flags(template: t, key: "purgeable_volatile_compress", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1254 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add_with_flags(template: t, key: "purgeable_nonvolatile_compress", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1255#if DEBUG || DEVELOPMENT
1256 task_ledgers.pages_grabbed = ledger_entry_add_with_flags(t, "pages_grabbed", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1257 task_ledgers.pages_grabbed_kern = ledger_entry_add_with_flags(t, "pages_grabbed_kern", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1258 task_ledgers.pages_grabbed_iopl = ledger_entry_add_with_flags(t, "pages_grabbed_iopl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1259 task_ledgers.pages_grabbed_upl = ledger_entry_add_with_flags(t, "pages_grabbed_upl", "physmem", "count", LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1260#endif
1261 task_ledgers.tagged_nofootprint = ledger_entry_add_with_flags(template: t, key: "tagged_nofootprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1262 task_ledgers.tagged_footprint = ledger_entry_add_with_flags(template: t, key: "tagged_footprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1263 task_ledgers.tagged_nofootprint_compressed = ledger_entry_add_with_flags(template: t, key: "tagged_nofootprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1264 task_ledgers.tagged_footprint_compressed = ledger_entry_add_with_flags(template: t, key: "tagged_footprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1265 task_ledgers.network_volatile = ledger_entry_add_with_flags(template: t, key: "network_volatile", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1266 task_ledgers.network_nonvolatile = ledger_entry_add_with_flags(template: t, key: "network_nonvolatile", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1267 task_ledgers.network_volatile_compressed = ledger_entry_add_with_flags(template: t, key: "network_volatile_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1268 task_ledgers.network_nonvolatile_compressed = ledger_entry_add_with_flags(template: t, key: "network_nonvolatile_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1269 task_ledgers.media_nofootprint = ledger_entry_add_with_flags(template: t, key: "media_nofootprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1270 task_ledgers.media_footprint = ledger_entry_add_with_flags(template: t, key: "media_footprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1271 task_ledgers.media_nofootprint_compressed = ledger_entry_add_with_flags(template: t, key: "media_nofootprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1272 task_ledgers.media_footprint_compressed = ledger_entry_add_with_flags(template: t, key: "media_footprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1273 task_ledgers.graphics_nofootprint = ledger_entry_add_with_flags(template: t, key: "graphics_nofootprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1274 task_ledgers.graphics_footprint = ledger_entry_add_with_flags(template: t, key: "graphics_footprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1275 task_ledgers.graphics_nofootprint_compressed = ledger_entry_add_with_flags(template: t, key: "graphics_nofootprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1276 task_ledgers.graphics_footprint_compressed = ledger_entry_add_with_flags(template: t, key: "graphics_footprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1277 task_ledgers.neural_nofootprint = ledger_entry_add_with_flags(template: t, key: "neural_nofootprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1278 task_ledgers.neural_footprint = ledger_entry_add_with_flags(template: t, key: "neural_footprint", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1279 task_ledgers.neural_nofootprint_compressed = ledger_entry_add_with_flags(template: t, key: "neural_nofootprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1280 task_ledgers.neural_footprint_compressed = ledger_entry_add_with_flags(template: t, key: "neural_footprint_compressed", group: "physmem", units: "bytes", flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1281
1282#if CONFIG_FREEZE
1283 task_ledgers.frozen_to_swap = ledger_entry_add(t, "frozen_to_swap", "physmem", "bytes");
1284#endif /* CONFIG_FREEZE */
1285
1286 task_ledgers.platform_idle_wakeups = ledger_entry_add(template: t, key: "platform_idle_wakeups", group: "power",
1287 units: "count");
1288 task_ledgers.interrupt_wakeups = ledger_entry_add(template: t, key: "interrupt_wakeups", group: "power",
1289 units: "count");
1290
1291#if CONFIG_SCHED_SFI
1292 sfi_class_id_t class_id, ledger_alias;
1293 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1294 task_ledgers.sfi_wait_times[class_id] = -1;
1295 }
1296
1297 /* don't account for UNSPECIFIED */
1298 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1299 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1300 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1301 /* Check to see if alias has been registered yet */
1302 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1303 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1304 } else {
1305 /* Otherwise, initialize it first */
1306 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(template: t, class_id: ledger_alias);
1307 }
1308 } else {
1309 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(template: t, class_id);
1310 }
1311
1312 if (task_ledgers.sfi_wait_times[class_id] < 0) {
1313 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1314 }
1315 }
1316
1317 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID - 1] != -1);
1318#endif /* CONFIG_SCHED_SFI */
1319
1320 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(template: t, key: "cpu_time_billed_to_me", group: "sched", units: "ns");
1321 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(template: t, key: "cpu_time_billed_to_others", group: "sched", units: "ns");
1322 task_ledgers.physical_writes = ledger_entry_add(template: t, key: "physical_writes", group: "res", units: "bytes");
1323 task_ledgers.logical_writes = ledger_entry_add(template: t, key: "logical_writes", group: "res", units: "bytes");
1324 task_ledgers.logical_writes_to_external = ledger_entry_add(template: t, key: "logical_writes_to_external", group: "res", units: "bytes");
1325#if CONFIG_PHYS_WRITE_ACCT
1326 task_ledgers.fs_metadata_writes = ledger_entry_add(template: t, key: "fs_metadata_writes", group: "res", units: "bytes");
1327#endif /* CONFIG_PHYS_WRITE_ACCT */
1328 task_ledgers.energy_billed_to_me = ledger_entry_add(template: t, key: "energy_billed_to_me", group: "power", units: "nj");
1329 task_ledgers.energy_billed_to_others = ledger_entry_add(template: t, key: "energy_billed_to_others", group: "power", units: "nj");
1330
1331#if CONFIG_MEMORYSTATUS
1332 task_ledgers.memorystatus_dirty_time = ledger_entry_add(template: t, key: "memorystatus_dirty_time", group: "physmem", units: "ns");
1333#endif /* CONFIG_MEMORYSTATUS */
1334
1335 task_ledgers.swapins = ledger_entry_add_with_flags(template: t, key: "swapins", group: "physmem", units: "bytes",
1336 flags: LEDGER_ENTRY_ALLOW_PANIC_ON_NEGATIVE);
1337
1338 if ((task_ledgers.cpu_time < 0) ||
1339 (task_ledgers.tkm_private < 0) ||
1340 (task_ledgers.tkm_shared < 0) ||
1341 (task_ledgers.phys_mem < 0) ||
1342 (task_ledgers.wired_mem < 0) ||
1343 (task_ledgers.conclave_mem < 0) ||
1344 (task_ledgers.internal < 0) ||
1345 (task_ledgers.external < 0) ||
1346 (task_ledgers.reusable < 0) ||
1347 (task_ledgers.iokit_mapped < 0) ||
1348 (task_ledgers.alternate_accounting < 0) ||
1349 (task_ledgers.alternate_accounting_compressed < 0) ||
1350 (task_ledgers.page_table < 0) ||
1351 (task_ledgers.phys_footprint < 0) ||
1352 (task_ledgers.internal_compressed < 0) ||
1353 (task_ledgers.purgeable_volatile < 0) ||
1354 (task_ledgers.purgeable_nonvolatile < 0) ||
1355 (task_ledgers.purgeable_volatile_compressed < 0) ||
1356 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1357 (task_ledgers.tagged_nofootprint < 0) ||
1358 (task_ledgers.tagged_footprint < 0) ||
1359 (task_ledgers.tagged_nofootprint_compressed < 0) ||
1360 (task_ledgers.tagged_footprint_compressed < 0) ||
1361#if CONFIG_FREEZE
1362 (task_ledgers.frozen_to_swap < 0) ||
1363#endif /* CONFIG_FREEZE */
1364 (task_ledgers.network_volatile < 0) ||
1365 (task_ledgers.network_nonvolatile < 0) ||
1366 (task_ledgers.network_volatile_compressed < 0) ||
1367 (task_ledgers.network_nonvolatile_compressed < 0) ||
1368 (task_ledgers.media_nofootprint < 0) ||
1369 (task_ledgers.media_footprint < 0) ||
1370 (task_ledgers.media_nofootprint_compressed < 0) ||
1371 (task_ledgers.media_footprint_compressed < 0) ||
1372 (task_ledgers.graphics_nofootprint < 0) ||
1373 (task_ledgers.graphics_footprint < 0) ||
1374 (task_ledgers.graphics_nofootprint_compressed < 0) ||
1375 (task_ledgers.graphics_footprint_compressed < 0) ||
1376 (task_ledgers.neural_nofootprint < 0) ||
1377 (task_ledgers.neural_footprint < 0) ||
1378 (task_ledgers.neural_nofootprint_compressed < 0) ||
1379 (task_ledgers.neural_footprint_compressed < 0) ||
1380 (task_ledgers.platform_idle_wakeups < 0) ||
1381 (task_ledgers.interrupt_wakeups < 0) ||
1382 (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1383 (task_ledgers.physical_writes < 0) ||
1384 (task_ledgers.logical_writes < 0) ||
1385 (task_ledgers.logical_writes_to_external < 0) ||
1386#if CONFIG_PHYS_WRITE_ACCT
1387 (task_ledgers.fs_metadata_writes < 0) ||
1388#endif /* CONFIG_PHYS_WRITE_ACCT */
1389#if CONFIG_MEMORYSTATUS
1390 (task_ledgers.memorystatus_dirty_time < 0) ||
1391#endif /* CONFIG_MEMORYSTATUS */
1392 (task_ledgers.energy_billed_to_me < 0) ||
1393 (task_ledgers.energy_billed_to_others < 0) ||
1394 (task_ledgers.swapins < 0)
1395 ) {
1396 panic("couldn't create entries for task ledger template");
1397 }
1398
1399 ledger_track_credit_only(template: t, entry: task_ledgers.phys_footprint);
1400 ledger_track_credit_only(template: t, entry: task_ledgers.internal);
1401 ledger_track_credit_only(template: t, entry: task_ledgers.external);
1402 ledger_track_credit_only(template: t, entry: task_ledgers.reusable);
1403
1404 ledger_track_maximum(template: t, entry: task_ledgers.phys_footprint, period_in_secs: 60);
1405 ledger_track_maximum(template: t, entry: task_ledgers.phys_mem, period_in_secs: 60);
1406 ledger_track_maximum(template: t, entry: task_ledgers.internal, period_in_secs: 60);
1407 ledger_track_maximum(template: t, entry: task_ledgers.internal_compressed, period_in_secs: 60);
1408 ledger_track_maximum(template: t, entry: task_ledgers.reusable, period_in_secs: 60);
1409 ledger_track_maximum(template: t, entry: task_ledgers.external, period_in_secs: 60);
1410#if MACH_ASSERT
1411 if (pmap_ledgers_panic) {
1412 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1413 ledger_panic_on_negative(t, task_ledgers.conclave_mem);
1414 ledger_panic_on_negative(t, task_ledgers.page_table);
1415 ledger_panic_on_negative(t, task_ledgers.internal);
1416 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1417 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1418 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1419 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1420 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1421 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1422 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1423#if CONFIG_PHYS_WRITE_ACCT
1424 ledger_panic_on_negative(t, task_ledgers.fs_metadata_writes);
1425#endif /* CONFIG_PHYS_WRITE_ACCT */
1426
1427 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint);
1428 ledger_panic_on_negative(t, task_ledgers.tagged_footprint);
1429 ledger_panic_on_negative(t, task_ledgers.tagged_nofootprint_compressed);
1430 ledger_panic_on_negative(t, task_ledgers.tagged_footprint_compressed);
1431 ledger_panic_on_negative(t, task_ledgers.network_volatile);
1432 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1433 ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1434 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1435 ledger_panic_on_negative(t, task_ledgers.media_nofootprint);
1436 ledger_panic_on_negative(t, task_ledgers.media_footprint);
1437 ledger_panic_on_negative(t, task_ledgers.media_nofootprint_compressed);
1438 ledger_panic_on_negative(t, task_ledgers.media_footprint_compressed);
1439 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint);
1440 ledger_panic_on_negative(t, task_ledgers.graphics_footprint);
1441 ledger_panic_on_negative(t, task_ledgers.graphics_nofootprint_compressed);
1442 ledger_panic_on_negative(t, task_ledgers.graphics_footprint_compressed);
1443 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint);
1444 ledger_panic_on_negative(t, task_ledgers.neural_footprint);
1445 ledger_panic_on_negative(t, task_ledgers.neural_nofootprint_compressed);
1446 ledger_panic_on_negative(t, task_ledgers.neural_footprint_compressed);
1447 }
1448#endif /* MACH_ASSERT */
1449
1450#if CONFIG_MEMORYSTATUS
1451 ledger_set_callback(template: t, entry: task_ledgers.phys_footprint, callback: task_footprint_exceeded, NULL, NULL);
1452#endif /* CONFIG_MEMORYSTATUS */
1453
1454 ledger_set_callback(template: t, entry: task_ledgers.interrupt_wakeups,
1455 callback: task_wakeups_rate_exceeded, NULL, NULL);
1456 ledger_set_callback(template: t, entry: task_ledgers.physical_writes, callback: task_io_rate_exceeded, param0: (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1457
1458#if CONFIG_SPTM || !XNU_MONITOR
1459 ledger_template_complete(template: t);
1460#else /* CONFIG_SPTM || !XNU_MONITOR */
1461 ledger_template_complete_secure_alloc(t);
1462#endif /* XNU_MONITOR */
1463 task_ledger_template = t;
1464}
1465
1466/* Create a task, but leave the task ports disabled */
1467kern_return_t
1468task_create_internal(
1469 task_t parent_task, /* Null-able */
1470 proc_ro_t proc_ro,
1471 coalition_t *parent_coalitions __unused,
1472 boolean_t inherit_memory,
1473 boolean_t is_64bit,
1474 boolean_t is_64bit_data,
1475 uint32_t t_flags,
1476 uint32_t t_flags_ro,
1477 uint32_t t_procflags,
1478 uint8_t t_returnwaitflags,
1479 task_t child_task)
1480{
1481 task_t new_task;
1482 vm_shared_region_t shared_region;
1483 ledger_t ledger = NULL;
1484 struct task_ro_data task_ro_data = {};
1485 uint32_t parent_t_flags_ro = 0;
1486
1487 new_task = child_task;
1488
1489 if (task_ref_count_init(new_task) != KERN_SUCCESS) {
1490 return KERN_RESOURCE_SHORTAGE;
1491 }
1492
1493 /* allocate with active entries */
1494 assert(task_ledger_template != NULL);
1495 ledger = ledger_instantiate(template: task_ledger_template, LEDGER_CREATE_ACTIVE_ENTRIES);
1496 if (ledger == NULL) {
1497 task_ref_count_fini(new_task);
1498 return KERN_RESOURCE_SHORTAGE;
1499 }
1500
1501 counter_alloc(&(new_task->faults));
1502
1503#if defined(HAS_APPLE_PAC)
1504 const uint8_t disable_user_jop = inherit_memory ? parent_task->disable_user_jop : FALSE;
1505 ml_task_set_rop_pid(task: new_task, parent_task, inherit: inherit_memory);
1506 ml_task_set_jop_pid(task: new_task, parent_task, inherit: inherit_memory, disable_user_jop);
1507 ml_task_set_disable_user_jop(task: new_task, disable_user_jop);
1508#endif
1509
1510
1511 new_task->ledger = ledger;
1512
1513 /* if inherit_memory is true, parent_task MUST not be NULL */
1514 if (!(t_flags & TF_CORPSE_FORK) && inherit_memory) {
1515#if CONFIG_DEFERRED_RECLAIM
1516 if (parent_task->deferred_reclamation_metadata) {
1517 /*
1518 * Prevent concurrent reclaims while we're forking the parent_task's map,
1519 * so that the child's map is in sync with the forked reclamation
1520 * metadata.
1521 */
1522 vm_deferred_reclamation_buffer_lock(
1523 metadata: parent_task->deferred_reclamation_metadata);
1524 }
1525#endif /* CONFIG_DEFERRED_RECLAIM */
1526 new_task->map = vm_map_fork(ledger, old_map: parent_task->map, options: 0);
1527#if CONFIG_DEFERRED_RECLAIM
1528 if (new_task->map != NULL &&
1529 parent_task->deferred_reclamation_metadata) {
1530 new_task->deferred_reclamation_metadata =
1531 vm_deferred_reclamation_buffer_fork(task: new_task,
1532 parent: parent_task->deferred_reclamation_metadata);
1533 }
1534#endif /* CONFIG_DEFERRED_RECLAIM */
1535 } else {
1536 unsigned int pmap_flags = is_64bit ? PMAP_CREATE_64BIT : 0;
1537 pmap_t pmap = pmap_create_options(ledger, size: 0, flags: pmap_flags);
1538 vm_map_t new_map;
1539
1540 if (pmap == NULL) {
1541 counter_free(&new_task->faults);
1542 ledger_dereference(ledger);
1543 task_ref_count_fini(new_task);
1544 return KERN_RESOURCE_SHORTAGE;
1545 }
1546 new_map = vm_map_create_options(pmap,
1547 min_off: (vm_map_offset_t)(VM_MIN_ADDRESS),
1548 max_off: (vm_map_offset_t)(VM_MAX_ADDRESS),
1549 options: VM_MAP_CREATE_PAGEABLE);
1550 if (parent_task) {
1551 vm_map_inherit_limits(new_map, old_map: parent_task->map);
1552 }
1553 new_task->map = new_map;
1554 }
1555
1556 if (new_task->map == NULL) {
1557 counter_free(&new_task->faults);
1558 ledger_dereference(ledger);
1559 task_ref_count_fini(new_task);
1560 return KERN_RESOURCE_SHORTAGE;
1561 }
1562
1563#if defined(CONFIG_SCHED_MULTIQ)
1564 new_task->sched_group = sched_group_create();
1565#endif
1566
1567 lck_mtx_init(lck: &new_task->lock, grp: &task_lck_grp, attr: &task_lck_attr);
1568 queue_init(&new_task->threads);
1569 new_task->suspend_count = 0;
1570 new_task->thread_count = 0;
1571 new_task->active_thread_count = 0;
1572 new_task->user_stop_count = 0;
1573 new_task->legacy_stop_count = 0;
1574 new_task->active = TRUE;
1575 new_task->halting = FALSE;
1576 new_task->priv_flags = 0;
1577 new_task->t_flags = t_flags;
1578 task_ro_data.t_flags_ro = t_flags_ro;
1579 new_task->t_procflags = t_procflags;
1580 new_task->t_returnwaitflags = t_returnwaitflags;
1581 new_task->returnwait_inheritor = current_thread();
1582 new_task->importance = 0;
1583 new_task->crashed_thread_id = 0;
1584 new_task->watchports = NULL;
1585 new_task->t_rr_ranges = NULL;
1586
1587 new_task->bank_context = NULL;
1588
1589 if (parent_task) {
1590 parent_t_flags_ro = task_ro_flags_get(task: parent_task);
1591 }
1592
1593 if (parent_task && inherit_memory) {
1594#if __has_feature(ptrauth_calls)
1595 /* Inherit the pac exception flags from parent if in fork */
1596 task_ro_data.t_flags_ro |= (parent_t_flags_ro & (TFRO_PAC_ENFORCE_USER_STATE |
1597 TFRO_PAC_EXC_FATAL));
1598#endif /* __has_feature(ptrauth_calls) */
1599 /* Inherit the hardened binary flags from parent if in fork */
1600 task_ro_data.t_flags_ro |= parent_t_flags_ro & (TFRO_HARDENED | TFRO_PLATFORM | TFRO_JIT_EXC_FATAL);
1601#if XNU_TARGET_OS_OSX
1602 task_ro_data.t_flags_ro |= parent_t_flags_ro & TFRO_MACH_HARDENING_OPT_OUT;
1603#endif /* XNU_TARGET_OS_OSX */
1604 }
1605
1606#ifdef MACH_BSD
1607 new_task->corpse_info = NULL;
1608#endif /* MACH_BSD */
1609
1610 /* kern_task not created by this function has unique id 0, start with 1 here. */
1611 task_set_uniqueid(task: new_task);
1612
1613#if CONFIG_MACF
1614 set_task_crash_label(task: new_task, NULL);
1615
1616 task_ro_data.task_filters.mach_trap_filter_mask = NULL;
1617 task_ro_data.task_filters.mach_kobj_filter_mask = NULL;
1618#endif
1619
1620#if CONFIG_MEMORYSTATUS
1621 if (max_task_footprint != 0) {
1622 ledger_set_limit(ledger, entry: task_ledgers.phys_footprint, limit: max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1623 }
1624#endif /* CONFIG_MEMORYSTATUS */
1625
1626 if (task_wakeups_monitor_rate != 0) {
1627 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1628 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
1629 task_wakeups_monitor_ctl(task: new_task, rate_hz: &flags, flags: &rate);
1630 }
1631
1632#if CONFIG_IO_ACCOUNTING
1633 uint32_t flags = IOMON_ENABLE;
1634 task_io_monitor_ctl(new_task, &flags);
1635#endif /* CONFIG_IO_ACCOUNTING */
1636
1637 machine_task_init(new_task, parent_task, memory_inherit: inherit_memory);
1638
1639 new_task->task_debug = NULL;
1640
1641#if DEVELOPMENT || DEBUG
1642 new_task->task_unnested = FALSE;
1643 new_task->task_disconnected_count = 0;
1644#endif
1645 queue_init(&new_task->semaphore_list);
1646 new_task->semaphores_owned = 0;
1647
1648 new_task->vtimers = 0;
1649
1650 new_task->shared_region = NULL;
1651
1652 new_task->affinity_space = NULL;
1653
1654#if CONFIG_CPU_COUNTERS
1655 new_task->t_kpc = 0;
1656#endif /* CONFIG_CPU_COUNTERS */
1657
1658 new_task->pidsuspended = FALSE;
1659 new_task->frozen = FALSE;
1660 new_task->changing_freeze_state = FALSE;
1661 new_task->rusage_cpu_flags = 0;
1662 new_task->rusage_cpu_percentage = 0;
1663 new_task->rusage_cpu_interval = 0;
1664 new_task->rusage_cpu_deadline = 0;
1665 new_task->rusage_cpu_callt = NULL;
1666#if MACH_ASSERT
1667 new_task->suspends_outstanding = 0;
1668#endif
1669 recount_task_init(tk: &new_task->tk_recount);
1670
1671#if HYPERVISOR
1672 new_task->hv_task_target = NULL;
1673#endif /* HYPERVISOR */
1674
1675#if CONFIG_TASKWATCH
1676 queue_init(&new_task->task_watchers);
1677 new_task->num_taskwatchers = 0;
1678 new_task->watchapplying = 0;
1679#endif /* CONFIG_TASKWATCH */
1680
1681 new_task->mem_notify_reserved = 0;
1682 new_task->memlimit_attrs_reserved = 0;
1683
1684 new_task->requested_policy = default_task_requested_policy;
1685 new_task->effective_policy = default_task_effective_policy;
1686
1687 new_task->task_shared_region_slide = -1;
1688
1689 if (parent_task != NULL) {
1690 task_ro_data.task_tokens.sec_token = *task_get_sec_token(task: parent_task);
1691 task_ro_data.task_tokens.audit_token = *task_get_audit_token(task: parent_task);
1692
1693 /* only inherit the option bits, no effect until task_set_immovable_pinned() */
1694 task_ro_data.task_control_port_options = task_get_control_port_options(task: parent_task);
1695
1696 task_ro_data.t_flags_ro |= parent_t_flags_ro & TFRO_FILTER_MSG;
1697#if CONFIG_MACF
1698 if (!(t_flags & TF_CORPSE_FORK)) {
1699 task_ro_data.task_filters.mach_trap_filter_mask = task_get_mach_trap_filter_mask(task: parent_task);
1700 task_ro_data.task_filters.mach_kobj_filter_mask = task_get_mach_kobj_filter_mask(task: parent_task);
1701 }
1702#endif
1703 } else {
1704 task_ro_data.task_tokens.sec_token = KERNEL_SECURITY_TOKEN;
1705 task_ro_data.task_tokens.audit_token = KERNEL_AUDIT_TOKEN;
1706
1707 task_ro_data.task_control_port_options = TASK_CONTROL_PORT_OPTIONS_NONE;
1708 }
1709
1710 /* must set before task_importance_init_from_parent: */
1711 if (proc_ro != NULL) {
1712 new_task->bsd_info_ro = proc_ro_ref_task(pr: proc_ro, t: new_task, t_data: &task_ro_data);
1713 } else {
1714 new_task->bsd_info_ro = proc_ro_alloc(NULL, NULL, t: new_task, t_data: &task_ro_data);
1715 }
1716
1717 ipc_task_init(task: new_task, parent: parent_task);
1718
1719 task_importance_init_from_parent(new_task, parent_task);
1720
1721 new_task->corpse_vmobject_list = NULL;
1722
1723 if (parent_task != TASK_NULL) {
1724 /* inherit the parent's shared region */
1725 shared_region = vm_shared_region_get(task: parent_task);
1726 if (shared_region != NULL) {
1727 vm_shared_region_set(task: new_task, new_shared_region: shared_region);
1728 }
1729
1730#if __has_feature(ptrauth_calls)
1731 /* use parent's shared_region_id */
1732 char *shared_region_id = task_get_vm_shared_region_id_and_jop_pid(parent_task, NULL);
1733 if (shared_region_id != NULL) {
1734 shared_region_key_alloc(shared_region_id, FALSE, 0); /* get a reference */
1735 }
1736 task_set_shared_region_id(new_task, shared_region_id);
1737#endif /* __has_feature(ptrauth_calls) */
1738
1739 if (task_has_64Bit_addr(parent_task)) {
1740 task_set_64Bit_addr(new_task);
1741 }
1742
1743 if (task_has_64Bit_data(parent_task)) {
1744 task_set_64Bit_data(new_task);
1745 }
1746
1747 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1748 new_task->all_image_info_size = parent_task->all_image_info_size;
1749 new_task->mach_header_vm_address = 0;
1750
1751 if (inherit_memory && parent_task->affinity_space) {
1752 task_affinity_create(parent_task, new_task);
1753 }
1754
1755 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(task: parent_task);
1756
1757 new_task->task_exc_guard = parent_task->task_exc_guard;
1758 if (parent_task->t_flags & TF_NO_SMT) {
1759 new_task->t_flags |= TF_NO_SMT;
1760 }
1761
1762 if (parent_task->t_flags & TF_USE_PSET_HINT_CLUSTER_TYPE) {
1763 new_task->t_flags |= TF_USE_PSET_HINT_CLUSTER_TYPE;
1764 }
1765
1766 if (parent_task->t_flags & TF_TECS) {
1767 new_task->t_flags |= TF_TECS;
1768 }
1769
1770#if defined(__x86_64__)
1771 if (parent_task->t_flags & TF_INSN_COPY_OPTOUT) {
1772 new_task->t_flags |= TF_INSN_COPY_OPTOUT;
1773 }
1774#endif
1775
1776 new_task->priority = BASEPRI_DEFAULT;
1777 new_task->max_priority = MAXPRI_USER;
1778
1779 task_policy_create(task: new_task, parent_task);
1780 } else {
1781#ifdef __LP64__
1782 if (is_64bit) {
1783 task_set_64Bit_addr(new_task);
1784 }
1785#endif
1786
1787 if (is_64bit_data) {
1788 task_set_64Bit_data(new_task);
1789 }
1790
1791 new_task->all_image_info_addr = (mach_vm_address_t)0;
1792 new_task->all_image_info_size = (mach_vm_size_t)0;
1793
1794 new_task->pset_hint = PROCESSOR_SET_NULL;
1795
1796 new_task->task_exc_guard = TASK_EXC_GUARD_NONE;
1797
1798 if (new_task == kernel_task) {
1799 new_task->priority = BASEPRI_KERNEL;
1800 new_task->max_priority = MAXPRI_KERNEL;
1801 } else {
1802 new_task->priority = BASEPRI_DEFAULT;
1803 new_task->max_priority = MAXPRI_USER;
1804 }
1805 }
1806
1807 bzero(s: new_task->coalition, n: sizeof(new_task->coalition));
1808 for (int i = 0; i < COALITION_NUM_TYPES; i++) {
1809 queue_chain_init(new_task->task_coalition[i]);
1810 }
1811
1812 /* Allocate I/O Statistics */
1813 new_task->task_io_stats = kalloc_data(sizeof(struct io_stat_info),
1814 Z_WAITOK | Z_ZERO | Z_NOFAIL);
1815
1816 bzero(s: &(new_task->cpu_time_eqos_stats), n: sizeof(new_task->cpu_time_eqos_stats));
1817 bzero(s: &(new_task->cpu_time_rqos_stats), n: sizeof(new_task->cpu_time_rqos_stats));
1818
1819 bzero(s: &new_task->extmod_statistics, n: sizeof(new_task->extmod_statistics));
1820
1821 counter_alloc(&(new_task->pageins));
1822 counter_alloc(&(new_task->cow_faults));
1823 counter_alloc(&(new_task->messages_sent));
1824 counter_alloc(&(new_task->messages_received));
1825
1826 /* Copy resource acc. info from Parent for Corpe Forked task. */
1827 if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1828 task_rollup_accounting_info(new_task, parent_task);
1829 task_store_owned_vmobject_info(to_task: new_task, from_task: parent_task);
1830 } else {
1831 /* Initialize to zero for standard fork/spawn case */
1832 new_task->total_runnable_time = 0;
1833 new_task->syscalls_mach = 0;
1834 new_task->syscalls_unix = 0;
1835 new_task->c_switch = 0;
1836 new_task->p_switch = 0;
1837 new_task->ps_switch = 0;
1838 new_task->decompressions = 0;
1839 new_task->low_mem_notified_warn = 0;
1840 new_task->low_mem_notified_critical = 0;
1841 new_task->purged_memory_warn = 0;
1842 new_task->purged_memory_critical = 0;
1843 new_task->low_mem_privileged_listener = 0;
1844 new_task->memlimit_is_active = 0;
1845 new_task->memlimit_is_fatal = 0;
1846 new_task->memlimit_active_exc_resource = 0;
1847 new_task->memlimit_inactive_exc_resource = 0;
1848 new_task->task_timer_wakeups_bin_1 = 0;
1849 new_task->task_timer_wakeups_bin_2 = 0;
1850 new_task->task_gpu_ns = 0;
1851 new_task->task_writes_counters_internal.task_immediate_writes = 0;
1852 new_task->task_writes_counters_internal.task_deferred_writes = 0;
1853 new_task->task_writes_counters_internal.task_invalidated_writes = 0;
1854 new_task->task_writes_counters_internal.task_metadata_writes = 0;
1855 new_task->task_writes_counters_external.task_immediate_writes = 0;
1856 new_task->task_writes_counters_external.task_deferred_writes = 0;
1857 new_task->task_writes_counters_external.task_invalidated_writes = 0;
1858 new_task->task_writes_counters_external.task_metadata_writes = 0;
1859#if CONFIG_PHYS_WRITE_ACCT
1860 new_task->task_fs_metadata_writes = 0;
1861#endif /* CONFIG_PHYS_WRITE_ACCT */
1862 }
1863
1864
1865 new_task->donates_own_pages = FALSE;
1866#if CONFIG_COALITIONS
1867 if (!(t_flags & TF_CORPSE_FORK)) {
1868 /* TODO: there is no graceful failure path here... */
1869 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1870 coalitions_adopt_task(coaltions: parent_coalitions, task: new_task);
1871 if (parent_coalitions[COALITION_TYPE_JETSAM]) {
1872 new_task->donates_own_pages = coalition_is_swappable(coal: parent_coalitions[COALITION_TYPE_JETSAM]);
1873 }
1874 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1875 /*
1876 * all tasks at least have a resource coalition, so
1877 * if the parent has one then inherit all coalitions
1878 * the parent is a part of
1879 */
1880 coalitions_adopt_task(coaltions: parent_task->coalition, task: new_task);
1881 if (parent_task->coalition[COALITION_TYPE_JETSAM]) {
1882 new_task->donates_own_pages = coalition_is_swappable(coal: parent_task->coalition[COALITION_TYPE_JETSAM]);
1883 }
1884 } else {
1885 /* TODO: assert that new_task will be PID 1 (launchd) */
1886 coalitions_adopt_init_task(task: new_task);
1887 }
1888 /*
1889 * on exec, we need to transfer the coalition roles from the
1890 * parent task to the exec copy task.
1891 */
1892 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1893 int coal_roles[COALITION_NUM_TYPES];
1894 task_coalition_roles(task: parent_task, roles: coal_roles);
1895 (void)coalitions_set_roles(coalitions: new_task->coalition, task: new_task, roles: coal_roles);
1896 }
1897 } else {
1898 coalitions_adopt_corpse_task(task: new_task);
1899 }
1900
1901 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1902 panic("created task is not a member of a resource coalition");
1903 }
1904 task_set_coalition_member(new_task);
1905#endif /* CONFIG_COALITIONS */
1906
1907 new_task->dispatchqueue_offset = 0;
1908 if (parent_task != NULL) {
1909 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1910 }
1911
1912 new_task->task_can_transfer_memory_ownership = FALSE;
1913 new_task->task_volatile_objects = 0;
1914 new_task->task_nonvolatile_objects = 0;
1915 new_task->task_objects_disowning = FALSE;
1916 new_task->task_objects_disowned = FALSE;
1917 new_task->task_owned_objects = 0;
1918 queue_init(&new_task->task_objq);
1919
1920#if CONFIG_FREEZE
1921 queue_init(&new_task->task_frozen_cseg_q);
1922#endif /* CONFIG_FREEZE */
1923
1924 task_objq_lock_init(new_task);
1925
1926#if __arm64__
1927 new_task->task_legacy_footprint = FALSE;
1928 new_task->task_extra_footprint_limit = FALSE;
1929 new_task->task_ios13extended_footprint_limit = FALSE;
1930#endif /* __arm64__ */
1931 new_task->task_region_footprint = FALSE;
1932 new_task->task_has_crossed_thread_limit = FALSE;
1933 new_task->task_thread_limit = 0;
1934#if CONFIG_SECLUDED_MEMORY
1935 new_task->task_can_use_secluded_mem = FALSE;
1936 new_task->task_could_use_secluded_mem = FALSE;
1937 new_task->task_could_also_use_secluded_mem = FALSE;
1938 new_task->task_suppressed_secluded = FALSE;
1939#endif /* CONFIG_SECLUDED_MEMORY */
1940
1941 /*
1942 * t_flags is set up above. But since we don't
1943 * support darkwake mode being set that way
1944 * currently, we clear it out here explicitly.
1945 */
1946 new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1947
1948 queue_init(&new_task->io_user_clients);
1949 new_task->loadTag = 0;
1950
1951 lck_mtx_lock(lck: &tasks_threads_lock);
1952 queue_enter(&tasks, new_task, task_t, tasks);
1953 tasks_count++;
1954 if (tasks_suspend_state) {
1955 task_suspend_internal(new_task);
1956 }
1957 lck_mtx_unlock(lck: &tasks_threads_lock);
1958 task_ref_hold_proc_task_struct(task: new_task);
1959
1960 return KERN_SUCCESS;
1961}
1962
1963/*
1964 * task_rollup_accounting_info
1965 *
1966 * Roll up accounting stats. Used to rollup stats
1967 * for exec copy task and corpse fork.
1968 */
1969void
1970task_rollup_accounting_info(task_t to_task, task_t from_task)
1971{
1972 assert(from_task != to_task);
1973
1974 recount_task_copy(dst: &to_task->tk_recount, src: &from_task->tk_recount);
1975 to_task->total_runnable_time = from_task->total_runnable_time;
1976 counter_add(&to_task->faults, amount: counter_load(&from_task->faults));
1977 counter_add(&to_task->pageins, amount: counter_load(&from_task->pageins));
1978 counter_add(&to_task->cow_faults, amount: counter_load(&from_task->cow_faults));
1979 counter_add(&to_task->messages_sent, amount: counter_load(&from_task->messages_sent));
1980 counter_add(&to_task->messages_received, amount: counter_load(&from_task->messages_received));
1981 to_task->decompressions = from_task->decompressions;
1982 to_task->syscalls_mach = from_task->syscalls_mach;
1983 to_task->syscalls_unix = from_task->syscalls_unix;
1984 to_task->c_switch = from_task->c_switch;
1985 to_task->p_switch = from_task->p_switch;
1986 to_task->ps_switch = from_task->ps_switch;
1987 to_task->extmod_statistics = from_task->extmod_statistics;
1988 to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1989 to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1990 to_task->purged_memory_warn = from_task->purged_memory_warn;
1991 to_task->purged_memory_critical = from_task->purged_memory_critical;
1992 to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1993 *to_task->task_io_stats = *from_task->task_io_stats;
1994 to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1995 to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1996 to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1997 to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1998 to_task->task_gpu_ns = from_task->task_gpu_ns;
1999 to_task->task_writes_counters_internal.task_immediate_writes = from_task->task_writes_counters_internal.task_immediate_writes;
2000 to_task->task_writes_counters_internal.task_deferred_writes = from_task->task_writes_counters_internal.task_deferred_writes;
2001 to_task->task_writes_counters_internal.task_invalidated_writes = from_task->task_writes_counters_internal.task_invalidated_writes;
2002 to_task->task_writes_counters_internal.task_metadata_writes = from_task->task_writes_counters_internal.task_metadata_writes;
2003 to_task->task_writes_counters_external.task_immediate_writes = from_task->task_writes_counters_external.task_immediate_writes;
2004 to_task->task_writes_counters_external.task_deferred_writes = from_task->task_writes_counters_external.task_deferred_writes;
2005 to_task->task_writes_counters_external.task_invalidated_writes = from_task->task_writes_counters_external.task_invalidated_writes;
2006 to_task->task_writes_counters_external.task_metadata_writes = from_task->task_writes_counters_external.task_metadata_writes;
2007#if CONFIG_PHYS_WRITE_ACCT
2008 to_task->task_fs_metadata_writes = from_task->task_fs_metadata_writes;
2009#endif /* CONFIG_PHYS_WRITE_ACCT */
2010
2011#if CONFIG_MEMORYSTATUS
2012 ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.memorystatus_dirty_time);
2013#endif /* CONFIG_MEMORYSTATUS */
2014
2015 /* Skip ledger roll up for memory accounting entries */
2016 ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.cpu_time);
2017 ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.platform_idle_wakeups);
2018 ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.interrupt_wakeups);
2019#if CONFIG_SCHED_SFI
2020 for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
2021 ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.sfi_wait_times[class_id]);
2022 }
2023#endif
2024 ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.cpu_time_billed_to_me);
2025 ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.cpu_time_billed_to_others);
2026 ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.physical_writes);
2027 ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.logical_writes);
2028 ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.energy_billed_to_me);
2029 ledger_rollup_entry(to_ledger: to_task->ledger, from_ledger: from_task->ledger, entry: task_ledgers.energy_billed_to_others);
2030}
2031
2032/*
2033 * task_deallocate_internal:
2034 *
2035 * Drop a reference on a task.
2036 * Don't call this directly.
2037 */
2038extern void task_deallocate_internal(task_t task, os_ref_count_t refs);
2039void
2040task_deallocate_internal(
2041 task_t task,
2042 os_ref_count_t refs)
2043{
2044 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
2045
2046 if (task == TASK_NULL) {
2047 return;
2048 }
2049
2050#if IMPORTANCE_INHERITANCE
2051 if (refs == 1) {
2052 /*
2053 * If last ref potentially comes from the task's importance,
2054 * disconnect it. But more task refs may be added before
2055 * that completes, so wait for the reference to go to zero
2056 * naturally (it may happen on a recursive task_deallocate()
2057 * from the ipc_importance_disconnect_task() call).
2058 */
2059 if (IIT_NULL != task->task_imp_base) {
2060 ipc_importance_disconnect_task(task);
2061 }
2062 return;
2063 }
2064#endif /* IMPORTANCE_INHERITANCE */
2065
2066 if (refs > 0) {
2067 return;
2068 }
2069
2070 /*
2071 * The task should be dead at this point. Ensure other resources
2072 * like threads, are gone before we trash the world.
2073 */
2074 assert(queue_empty(&task->threads));
2075 assert(get_bsdtask_info(task) == NULL);
2076 assert(!is_active(task->itk_space));
2077 assert(!task->active);
2078 assert(task->active_thread_count == 0);
2079 assert(!task_get_game_mode(task));
2080
2081 lck_mtx_lock(lck: &tasks_threads_lock);
2082 assert(terminated_tasks_count > 0);
2083 queue_remove(&terminated_tasks, task, task_t, tasks);
2084 terminated_tasks_count--;
2085 lck_mtx_unlock(lck: &tasks_threads_lock);
2086
2087 /*
2088 * remove the reference on bank context
2089 */
2090 task_bank_reset(task);
2091
2092 kfree_data(task->task_io_stats, sizeof(struct io_stat_info));
2093
2094 /*
2095 * Give the machine dependent code a chance
2096 * to perform cleanup before ripping apart
2097 * the task.
2098 */
2099 machine_task_terminate(task);
2100
2101 ipc_task_terminate(task);
2102
2103 /* let iokit know 2 */
2104 iokit_task_terminate(task, phase: 2);
2105
2106 /* Unregister task from userspace coredumps on panic */
2107 kern_unregister_userspace_coredump(task);
2108
2109 if (task->affinity_space) {
2110 task_affinity_deallocate(task);
2111 }
2112
2113#if MACH_ASSERT
2114 if (task->ledger != NULL &&
2115 task->map != NULL &&
2116 task->map->pmap != NULL &&
2117 task->map->pmap->ledger != NULL) {
2118 assert(task->ledger == task->map->pmap->ledger);
2119 }
2120#endif /* MACH_ASSERT */
2121
2122 vm_owned_objects_disown(task);
2123 assert(task->task_objects_disowned);
2124 if (task->task_owned_objects != 0) {
2125 panic("task_deallocate(%p): "
2126 "volatile_objects=%d nonvolatile_objects=%d owned=%d\n",
2127 task,
2128 task->task_volatile_objects,
2129 task->task_nonvolatile_objects,
2130 task->task_owned_objects);
2131 }
2132
2133#if CONFIG_DEFERRED_RECLAIM
2134 if (task->deferred_reclamation_metadata != NULL) {
2135 vm_deferred_reclamation_buffer_deallocate(metadata: task->deferred_reclamation_metadata);
2136 task->deferred_reclamation_metadata = NULL;
2137 }
2138#endif /* CONFIG_DEFERRED_RECLAIM */
2139
2140 vm_map_deallocate(map: task->map);
2141 if (task->is_large_corpse) {
2142 assert(large_corpse_count > 0);
2143 OSDecrementAtomic(&large_corpse_count);
2144 task->is_large_corpse = false;
2145 }
2146 is_release(task->itk_space);
2147
2148 if (task->t_rr_ranges) {
2149 restartable_ranges_release(ranges: task->t_rr_ranges);
2150 }
2151
2152 ledger_get_entries(ledger: task->ledger, entry: task_ledgers.interrupt_wakeups,
2153 credit: &interrupt_wakeups, debit: &debit);
2154 ledger_get_entries(ledger: task->ledger, entry: task_ledgers.platform_idle_wakeups,
2155 credit: &platform_idle_wakeups, debit: &debit);
2156
2157#if defined(CONFIG_SCHED_MULTIQ)
2158 sched_group_destroy(sched_group: task->sched_group);
2159#endif
2160
2161 struct recount_times_mach sum = { 0 };
2162 struct recount_times_mach p_only = { 0 };
2163 recount_task_times_perf_only(task, sum: &sum, sum_perf_only: &p_only);
2164#if CONFIG_PERVASIVE_ENERGY
2165 uint64_t energy = recount_task_energy_nj(task);
2166#endif /* CONFIG_PERVASIVE_ENERGY */
2167 recount_task_deinit(tk: &task->tk_recount);
2168
2169 /* Accumulate statistics for dead tasks */
2170 lck_spin_lock(lck: &dead_task_statistics_lock);
2171 dead_task_statistics.total_user_time += sum.rtm_user;
2172 dead_task_statistics.total_system_time += sum.rtm_system;
2173
2174 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
2175 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
2176
2177 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
2178 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
2179 dead_task_statistics.total_ptime += p_only.rtm_user + p_only.rtm_system;
2180 dead_task_statistics.total_pset_switches += task->ps_switch;
2181 dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
2182#if CONFIG_PERVASIVE_ENERGY
2183 dead_task_statistics.task_energy += energy;
2184#endif /* CONFIG_PERVASIVE_ENERGY */
2185
2186 lck_spin_unlock(lck: &dead_task_statistics_lock);
2187 lck_mtx_destroy(lck: &task->lock, grp: &task_lck_grp);
2188
2189 if (!ledger_get_entries(ledger: task->ledger, entry: task_ledgers.tkm_private, credit: &credit,
2190 debit: &debit)) {
2191 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
2192 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
2193 }
2194 if (!ledger_get_entries(ledger: task->ledger, entry: task_ledgers.tkm_shared, credit: &credit,
2195 debit: &debit)) {
2196 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
2197 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
2198 }
2199 ledger_dereference(ledger: task->ledger);
2200
2201 counter_free(&task->faults);
2202 counter_free(&task->pageins);
2203 counter_free(&task->cow_faults);
2204 counter_free(&task->messages_sent);
2205 counter_free(&task->messages_received);
2206
2207#if CONFIG_COALITIONS
2208 task_release_coalitions(task);
2209#endif /* CONFIG_COALITIONS */
2210
2211 bzero(s: task->coalition, n: sizeof(task->coalition));
2212
2213#if MACH_BSD
2214 /* clean up collected information since last reference to task is gone */
2215 if (task->corpse_info) {
2216 void *corpse_info_kernel = kcdata_memory_get_begin_addr(data: task->corpse_info);
2217 task_crashinfo_destroy(data: task->corpse_info);
2218 task->corpse_info = NULL;
2219 kfree_data(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
2220 }
2221#endif
2222
2223#if CONFIG_MACF
2224 if (get_task_crash_label(task)) {
2225 mac_exc_free_label(label: get_task_crash_label(task));
2226 set_task_crash_label(task, NULL);
2227 }
2228#endif
2229
2230 assert(queue_empty(&task->task_objq));
2231 task_objq_lock_destroy(task);
2232
2233 if (task->corpse_vmobject_list) {
2234 kfree_data(task->corpse_vmobject_list,
2235 (vm_size_t)task->corpse_vmobject_list_size);
2236 }
2237
2238 task_ref_count_fini(task);
2239 proc_ro_erase_task(pr: task->bsd_info_ro);
2240 task_release_proc_task_struct(task);
2241}
2242
2243/*
2244 * task_name_deallocate_mig:
2245 *
2246 * Drop a reference on a task name.
2247 */
2248void
2249task_name_deallocate_mig(
2250 task_name_t task_name)
2251{
2252 return task_deallocate_grp((task_t)task_name, TASK_GRP_MIG);
2253}
2254
2255/*
2256 * task_policy_set_deallocate_mig:
2257 *
2258 * Drop a reference on a task type.
2259 */
2260void
2261task_policy_set_deallocate_mig(task_policy_set_t task_policy_set)
2262{
2263 return task_deallocate_grp((task_t)task_policy_set, TASK_GRP_MIG);
2264}
2265
2266/*
2267 * task_policy_get_deallocate_mig:
2268 *
2269 * Drop a reference on a task type.
2270 */
2271void
2272task_policy_get_deallocate_mig(task_policy_get_t task_policy_get)
2273{
2274 return task_deallocate_grp((task_t)task_policy_get, TASK_GRP_MIG);
2275}
2276
2277/*
2278 * task_inspect_deallocate_mig:
2279 *
2280 * Drop a task inspection reference.
2281 */
2282void
2283task_inspect_deallocate_mig(
2284 task_inspect_t task_inspect)
2285{
2286 return task_deallocate_grp((task_t)task_inspect, TASK_GRP_MIG);
2287}
2288
2289/*
2290 * task_read_deallocate_mig:
2291 *
2292 * Drop a reference on task read port.
2293 */
2294void
2295task_read_deallocate_mig(
2296 task_read_t task_read)
2297{
2298 return task_deallocate_grp((task_t)task_read, TASK_GRP_MIG);
2299}
2300
2301/*
2302 * task_suspension_token_deallocate:
2303 *
2304 * Drop a reference on a task suspension token.
2305 */
2306void
2307task_suspension_token_deallocate(
2308 task_suspension_token_t token)
2309{
2310 return task_deallocate((task_t)token);
2311}
2312
2313void
2314task_suspension_token_deallocate_grp(
2315 task_suspension_token_t token,
2316 task_grp_t grp)
2317{
2318 return task_deallocate_grp((task_t)token, grp);
2319}
2320
2321/*
2322 * task_collect_crash_info:
2323 *
2324 * collect crash info from bsd and mach based data
2325 */
2326kern_return_t
2327task_collect_crash_info(
2328 task_t task,
2329#ifdef CONFIG_MACF
2330 struct label *crash_label,
2331#endif
2332 int is_corpse_fork)
2333{
2334 kern_return_t kr = KERN_SUCCESS;
2335
2336 kcdata_descriptor_t crash_data = NULL;
2337 kcdata_descriptor_t crash_data_release = NULL;
2338 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
2339 mach_vm_offset_t crash_data_ptr = 0;
2340 void *crash_data_kernel = NULL;
2341 void *crash_data_kernel_release = NULL;
2342#if CONFIG_MACF
2343 struct label *label, *free_label;
2344#endif
2345
2346 if (!corpses_enabled()) {
2347 return KERN_NOT_SUPPORTED;
2348 }
2349
2350#if CONFIG_MACF
2351 free_label = label = mac_exc_create_label(NULL);
2352#endif
2353
2354 task_lock(task);
2355
2356 assert(is_corpse_fork || get_bsdtask_info(task) != NULL);
2357 if (task->corpse_info == NULL && (is_corpse_fork || get_bsdtask_info(task) != NULL)) {
2358#if CONFIG_MACF
2359 /* Set the crash label, used by the exception delivery mac hook */
2360 free_label = get_task_crash_label(task); // Most likely NULL.
2361 set_task_crash_label(task, label);
2362 mac_exc_update_task_crash_label(task, newlabel: crash_label);
2363#endif
2364 task_unlock(task);
2365
2366 crash_data_kernel = kalloc_data(CORPSEINFO_ALLOCATION_SIZE,
2367 Z_WAITOK | Z_ZERO);
2368 if (crash_data_kernel == NULL) {
2369 kr = KERN_RESOURCE_SHORTAGE;
2370 goto out_no_lock;
2371 }
2372 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
2373
2374 /* Do not get a corpse ref for corpse fork */
2375 crash_data = task_crashinfo_alloc_init(crash_data_p: (mach_vm_address_t)crash_data_ptr, size,
2376 kc_u_flags: is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
2377 KCFLAG_USE_MEMCOPY);
2378 if (crash_data) {
2379 task_lock(task);
2380 crash_data_release = task->corpse_info;
2381 crash_data_kernel_release = kcdata_memory_get_begin_addr(data: crash_data_release);
2382 task->corpse_info = crash_data;
2383
2384 task_unlock(task);
2385 kr = KERN_SUCCESS;
2386 } else {
2387 kfree_data(crash_data_kernel,
2388 CORPSEINFO_ALLOCATION_SIZE);
2389 kr = KERN_FAILURE;
2390 }
2391
2392 if (crash_data_release != NULL) {
2393 task_crashinfo_destroy(data: crash_data_release);
2394 }
2395 kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2396 } else {
2397 task_unlock(task);
2398 }
2399
2400out_no_lock:
2401#if CONFIG_MACF
2402 if (free_label != NULL) {
2403 mac_exc_free_label(label: free_label);
2404 }
2405#endif
2406 return kr;
2407}
2408
2409/*
2410 * task_deliver_crash_notification:
2411 *
2412 * Makes outcall to registered host port for a corpse.
2413 */
2414kern_return_t
2415task_deliver_crash_notification(
2416 task_t corpse, /* corpse or corpse fork */
2417 thread_t thread,
2418 exception_type_t etype,
2419 mach_exception_subcode_t subcode)
2420{
2421 kcdata_descriptor_t crash_info = corpse->corpse_info;
2422 thread_t th_iter = NULL;
2423 kern_return_t kr = KERN_SUCCESS;
2424 wait_interrupt_t wsave;
2425 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2426 ipc_port_t corpse_port;
2427
2428 if (crash_info == NULL) {
2429 return KERN_FAILURE;
2430 }
2431
2432 assert(task_is_a_corpse(corpse));
2433
2434 task_lock(task: corpse);
2435
2436 /*
2437 * Always populate code[0] as the effective exception type for EXC_CORPSE_NOTIFY.
2438 * Crash reporters should derive whether it's fatal from corpse blob.
2439 */
2440 code[0] = etype;
2441 code[1] = subcode;
2442
2443 queue_iterate(&corpse->threads, th_iter, thread_t, task_threads)
2444 {
2445 if (th_iter->corpse_dup == FALSE) {
2446 ipc_thread_reset(thread: th_iter);
2447 }
2448 }
2449 task_unlock(task: corpse);
2450
2451 /* Arm the no-sender notification for taskport */
2452 task_reference(corpse);
2453 corpse_port = convert_corpse_to_port_and_nsrequest(task: corpse);
2454
2455 wsave = thread_interrupt_level(THREAD_UNINT);
2456 kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
2457 if (kr != KERN_SUCCESS) {
2458 printf(format: "Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task: corpse));
2459 }
2460
2461 (void)thread_interrupt_level(interruptible: wsave);
2462
2463 /*
2464 * Drop the send right on corpse port, will fire the
2465 * no-sender notification if exception deliver failed.
2466 */
2467 ipc_port_release_send(port: corpse_port);
2468 return kr;
2469}
2470
2471/*
2472 * task_terminate:
2473 *
2474 * Terminate the specified task. See comments on thread_terminate
2475 * (kern/thread.c) about problems with terminating the "current task."
2476 */
2477
2478kern_return_t
2479task_terminate(
2480 task_t task)
2481{
2482 if (task == TASK_NULL) {
2483 return KERN_INVALID_ARGUMENT;
2484 }
2485
2486 if (get_bsdtask_info(task)) {
2487 return KERN_FAILURE;
2488 }
2489
2490 return task_terminate_internal(task);
2491}
2492
2493#if MACH_ASSERT
2494extern int proc_pid(struct proc *);
2495extern void proc_name_kdp(struct proc *p, char *buf, int size);
2496#endif /* MACH_ASSERT */
2497
2498#define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
2499static void
2500__unused task_partial_reap(task_t task, __unused int pid)
2501{
2502 unsigned int reclaimed_resident = 0;
2503 unsigned int reclaimed_compressed = 0;
2504 uint64_t task_page_count;
2505
2506 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
2507
2508 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
2509 pid, task_page_count, 0, 0, 0);
2510
2511 vm_map_partial_reap(map: task->map, reclaimed_resident: &reclaimed_resident, reclaimed_compressed: &reclaimed_compressed);
2512
2513 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
2514 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
2515}
2516
2517/*
2518 * task_mark_corpse:
2519 *
2520 * Mark the task as a corpse. Called by crashing thread.
2521 */
2522kern_return_t
2523task_mark_corpse(task_t task)
2524{
2525 kern_return_t kr = KERN_SUCCESS;
2526 thread_t self_thread;
2527 (void) self_thread;
2528 wait_interrupt_t wsave;
2529#if CONFIG_MACF
2530 struct label *crash_label = NULL;
2531#endif
2532
2533 assert(task != kernel_task);
2534 assert(task == current_task());
2535 assert(!task_is_a_corpse(task));
2536
2537#if CONFIG_MACF
2538 crash_label = mac_exc_create_label_for_proc(proc: (struct proc*)get_bsdtask_info(task));
2539#endif
2540
2541 kr = task_collect_crash_info(task,
2542#if CONFIG_MACF
2543 crash_label,
2544#endif
2545 FALSE);
2546 if (kr != KERN_SUCCESS) {
2547 goto out;
2548 }
2549
2550 self_thread = current_thread();
2551
2552 wsave = thread_interrupt_level(THREAD_UNINT);
2553 task_lock(task);
2554
2555 /*
2556 * Check if any other thread called task_terminate_internal
2557 * and made the task inactive before we could mark it for
2558 * corpse pending report. Bail out if the task is inactive.
2559 */
2560 if (!task->active) {
2561 kcdata_descriptor_t crash_data_release = task->corpse_info;;
2562 void *crash_data_kernel_release = kcdata_memory_get_begin_addr(data: crash_data_release);;
2563
2564 task->corpse_info = NULL;
2565 task_unlock(task);
2566
2567 if (crash_data_release != NULL) {
2568 task_crashinfo_destroy(data: crash_data_release);
2569 }
2570 kfree_data(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
2571 return KERN_TERMINATED;
2572 }
2573
2574 task_set_corpse_pending_report(task);
2575 task_set_corpse(task);
2576 task->crashed_thread_id = thread_tid(thread: self_thread);
2577
2578 kr = task_start_halt_locked(task, TRUE);
2579 assert(kr == KERN_SUCCESS);
2580
2581 task_set_uniqueid(task);
2582
2583 task_unlock(task);
2584
2585 /*
2586 * ipc_task_reset() moved to last thread_terminate_self(): rdar://75737960.
2587 * disable old ports here instead.
2588 *
2589 * The vm_map and ipc_space must exist until this function returns,
2590 * convert_port_to_{map,space}_with_flavor relies on this behavior.
2591 */
2592 ipc_task_disable(task);
2593
2594 /* let iokit know 1 */
2595 iokit_task_terminate(task, phase: 1);
2596
2597 /* terminate the ipc space */
2598 ipc_space_terminate(space: task->itk_space);
2599
2600 /* Add it to global corpse task list */
2601 task_add_to_corpse_task_list(corpse_task: task);
2602
2603 thread_terminate_internal(thread: self_thread);
2604
2605 (void) thread_interrupt_level(interruptible: wsave);
2606 assert(task->halting == TRUE);
2607
2608out:
2609#if CONFIG_MACF
2610 mac_exc_free_label(label: crash_label);
2611#endif
2612 return kr;
2613}
2614
2615/*
2616 * task_set_uniqueid
2617 *
2618 * Set task uniqueid to systemwide unique 64 bit value
2619 */
2620void
2621task_set_uniqueid(task_t task)
2622{
2623 task->task_uniqueid = OSIncrementAtomic64(address: &next_taskuniqueid);
2624}
2625
2626/*
2627 * task_clear_corpse
2628 *
2629 * Clears the corpse pending bit on task.
2630 * Removes inspection bit on the threads.
2631 */
2632void
2633task_clear_corpse(task_t task)
2634{
2635 thread_t th_iter = NULL;
2636
2637 task_lock(task);
2638 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2639 {
2640 thread_mtx_lock(thread: th_iter);
2641 th_iter->inspection = FALSE;
2642 ipc_thread_disable(thread: th_iter);
2643 thread_mtx_unlock(thread: th_iter);
2644 }
2645
2646 thread_terminate_crashed_threads();
2647 /* remove the pending corpse report flag */
2648 task_clear_corpse_pending_report(task);
2649
2650 task_unlock(task);
2651}
2652
2653/*
2654 * task_port_no_senders
2655 *
2656 * Called whenever the Mach port system detects no-senders on
2657 * the task port of a corpse.
2658 * Each notification that comes in should terminate the task (corpse).
2659 */
2660static void
2661task_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
2662{
2663 task_t task = ipc_kobject_get_locked(port, type: IKOT_TASK_CONTROL);
2664
2665 assert(task != TASK_NULL);
2666 assert(task_is_a_corpse(task));
2667
2668 /* Remove the task from global corpse task list */
2669 task_remove_from_corpse_task_list(corpse_task: task);
2670
2671 task_clear_corpse(task);
2672 vm_map_unset_corpse_source(map: task->map);
2673 task_terminate_internal(task);
2674}
2675
2676/*
2677 * task_port_with_flavor_no_senders
2678 *
2679 * Called whenever the Mach port system detects no-senders on
2680 * the task inspect or read port. These ports are allocated lazily and
2681 * should be deallocated here when there are no senders remaining.
2682 */
2683static void
2684task_port_with_flavor_no_senders(
2685 ipc_port_t port,
2686 mach_port_mscount_t mscount __unused)
2687{
2688 task_t task;
2689 mach_task_flavor_t flavor;
2690 ipc_kobject_type_t kotype;
2691
2692 ip_mq_lock(port);
2693 if (port->ip_srights > 0) {
2694 ip_mq_unlock(port);
2695 return;
2696 }
2697 kotype = ip_kotype(port);
2698 assert((IKOT_TASK_READ == kotype) || (IKOT_TASK_INSPECT == kotype));
2699 task = ipc_kobject_get_locked(port, type: kotype);
2700 if (task != TASK_NULL) {
2701 task_reference(task);
2702 }
2703 ip_mq_unlock(port);
2704
2705 if (task == TASK_NULL) {
2706 /* The task is exiting or disabled; it will eventually deallocate the port */
2707 return;
2708 }
2709
2710 if (kotype == IKOT_TASK_READ) {
2711 flavor = TASK_FLAVOR_READ;
2712 } else {
2713 flavor = TASK_FLAVOR_INSPECT;
2714 }
2715
2716 itk_lock(task);
2717 ip_mq_lock(port);
2718
2719 /*
2720 * If the port is no longer active, then ipc_task_terminate() ran
2721 * and destroyed the kobject already. Just deallocate the task
2722 * ref we took and go away.
2723 *
2724 * It is also possible that several nsrequests are in flight,
2725 * only one shall NULL-out the port entry, and this is the one
2726 * that gets to dealloc the port.
2727 *
2728 * Check for a stale no-senders notification. A call to any function
2729 * that vends out send rights to this port could resurrect it between
2730 * this notification being generated and actually being handled here.
2731 */
2732 if (!ip_active(port) ||
2733 task->itk_task_ports[flavor] != port ||
2734 port->ip_srights > 0) {
2735 ip_mq_unlock(port);
2736 itk_unlock(task);
2737 task_deallocate(task);
2738 return;
2739 }
2740
2741 assert(task->itk_task_ports[flavor] == port);
2742 task->itk_task_ports[flavor] = IP_NULL;
2743 itk_unlock(task);
2744
2745 ipc_kobject_dealloc_port_and_unlock(port, mscount: 0, type: kotype);
2746
2747 task_deallocate(task);
2748}
2749
2750/*
2751 * task_wait_till_threads_terminate_locked
2752 *
2753 * Wait till all the threads in the task are terminated.
2754 * Might release the task lock and re-acquire it.
2755 */
2756void
2757task_wait_till_threads_terminate_locked(task_t task)
2758{
2759 /* wait for all the threads in the task to terminate */
2760 while (task->active_thread_count != 0) {
2761 assert_wait(event: (event_t)&task->active_thread_count, THREAD_UNINT);
2762 task_unlock(task);
2763 thread_block(THREAD_CONTINUE_NULL);
2764
2765 task_lock(task);
2766 }
2767}
2768
2769/*
2770 * task_duplicate_map_and_threads
2771 *
2772 * Copy vmmap of source task.
2773 * Copy active threads from source task to destination task.
2774 * Source task would be suspended during the copy.
2775 */
2776kern_return_t
2777task_duplicate_map_and_threads(
2778 task_t task,
2779 void *p,
2780 task_t new_task,
2781 thread_t *thread_ret,
2782 uint64_t **udata_buffer,
2783 int *size,
2784 int *num_udata,
2785 bool for_exception)
2786{
2787 kern_return_t kr = KERN_SUCCESS;
2788 int active;
2789 thread_t thread, self, thread_return = THREAD_NULL;
2790 thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2791 thread_t *thread_array;
2792 uint32_t active_thread_count = 0, array_count = 0, i;
2793 vm_map_t oldmap;
2794 uint64_t *buffer = NULL;
2795 int buf_size = 0;
2796 int est_knotes = 0, num_knotes = 0;
2797
2798 self = current_thread();
2799
2800 /*
2801 * Suspend the task to copy thread state, use the internal
2802 * variant so that no user-space process can resume
2803 * the task from under us
2804 */
2805 kr = task_suspend_internal(task);
2806 if (kr != KERN_SUCCESS) {
2807 return kr;
2808 }
2809
2810 if (task->map->disable_vmentry_reuse == TRUE) {
2811 /*
2812 * Quite likely GuardMalloc (or some debugging tool)
2813 * is being used on this task. And it has gone through
2814 * its limit. Making a corpse will likely encounter
2815 * a lot of VM entries that will need COW.
2816 *
2817 * Skip it.
2818 */
2819#if DEVELOPMENT || DEBUG
2820 memorystatus_abort_vm_map_fork(task);
2821#endif
2822 ktriage_record(thread_id: thread_tid(thread: self), KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_CORPSE, KDBG_TRIAGE_RESERVED, KDBG_TRIAGE_CORPSE_FAIL_LIBGMALLOC), arg: 0 /* arg */);
2823 task_resume_internal(task);
2824 return KERN_FAILURE;
2825 }
2826
2827 /* Check with VM if vm_map_fork is allowed for this task */
2828 bool is_large = false;
2829 if (memorystatus_allowed_vm_map_fork(task, is_large: &is_large)) {
2830 /* Setup new task's vmmap, switch from parent task's map to it COW map */
2831 oldmap = new_task->map;
2832 new_task->map = vm_map_fork(ledger: new_task->ledger,
2833 old_map: task->map,
2834 options: (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2835 VM_MAP_FORK_PRESERVE_PURGEABLE |
2836 VM_MAP_FORK_CORPSE_FOOTPRINT));
2837 if (new_task->map) {
2838 new_task->is_large_corpse = is_large;
2839 vm_map_deallocate(map: oldmap);
2840
2841 /* copy ledgers that impact the memory footprint */
2842 vm_map_copy_footprint_ledgers(old_task: task, new_task);
2843
2844 /* Get all the udata pointers from kqueue */
2845 est_knotes = kevent_proc_copy_uptrs(proc: p, NULL, bufsize: 0);
2846 if (est_knotes > 0) {
2847 buf_size = (est_knotes + 32) * sizeof(uint64_t);
2848 buffer = kalloc_data(buf_size, Z_WAITOK);
2849 num_knotes = kevent_proc_copy_uptrs(proc: p, buf: buffer, bufsize: buf_size);
2850 if (num_knotes > est_knotes + 32) {
2851 num_knotes = est_knotes + 32;
2852 }
2853 }
2854 } else {
2855 if (is_large) {
2856 assert(large_corpse_count > 0);
2857 OSDecrementAtomic(&large_corpse_count);
2858 }
2859 new_task->map = oldmap;
2860#if DEVELOPMENT || DEBUG
2861 memorystatus_abort_vm_map_fork(task);
2862#endif
2863 task_resume_internal(task);
2864 return KERN_NO_SPACE;
2865 }
2866 } else if (!for_exception) {
2867#if DEVELOPMENT || DEBUG
2868 memorystatus_abort_vm_map_fork(task);
2869#endif
2870 task_resume_internal(task);
2871 return KERN_NO_SPACE;
2872 }
2873
2874 active_thread_count = task->active_thread_count;
2875 if (active_thread_count == 0) {
2876 kfree_data(buffer, buf_size);
2877 task_resume_internal(task);
2878 return KERN_FAILURE;
2879 }
2880
2881 thread_array = kalloc_type(thread_t, active_thread_count, Z_WAITOK);
2882
2883 /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2884 task_lock(task);
2885 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2886 /* Skip inactive threads */
2887 active = thread->active;
2888 if (!active) {
2889 continue;
2890 }
2891
2892 if (array_count >= active_thread_count) {
2893 break;
2894 }
2895
2896 thread_array[array_count++] = thread;
2897 thread_reference(thread);
2898 }
2899 task_unlock(task);
2900
2901 for (i = 0; i < array_count; i++) {
2902 kr = thread_create_with_continuation(task: new_task, new_thread: &new_thread, continuation: (thread_continue_t)thread_corpse_continue);
2903 if (kr != KERN_SUCCESS) {
2904 break;
2905 }
2906
2907 /* Equivalent of current thread in corpse */
2908 if (thread_array[i] == self) {
2909 thread_return = new_thread;
2910 new_task->crashed_thread_id = thread_tid(thread: new_thread);
2911 } else if (first_thread == NULL) {
2912 first_thread = new_thread;
2913 } else {
2914 /* drop the extra ref returned by thread_create_with_continuation */
2915 thread_deallocate(thread: new_thread);
2916 }
2917
2918 kr = thread_dup2(thread_array[i], new_thread);
2919 if (kr != KERN_SUCCESS) {
2920 thread_mtx_lock(thread: new_thread);
2921 new_thread->corpse_dup = TRUE;
2922 thread_mtx_unlock(thread: new_thread);
2923 continue;
2924 }
2925
2926 /* Copy thread name */
2927 bsd_copythreadname(dst_uth: get_bsdthread_info(new_thread),
2928 src_uth: get_bsdthread_info(thread_array[i]));
2929 new_thread->thread_tag = thread_array[i]->thread_tag &
2930 ~THREAD_TAG_USER_JOIN;
2931 thread_copy_resource_info(dst_thread: new_thread, src_thread: thread_array[i]);
2932 }
2933
2934 /* return the first thread if we couldn't find the equivalent of current */
2935 if (thread_return == THREAD_NULL) {
2936 thread_return = first_thread;
2937 } else if (first_thread != THREAD_NULL) {
2938 /* drop the extra ref returned by thread_create_with_continuation */
2939 thread_deallocate(thread: first_thread);
2940 }
2941
2942 task_resume_internal(task);
2943
2944 for (i = 0; i < array_count; i++) {
2945 thread_deallocate(thread: thread_array[i]);
2946 }
2947 kfree_type(thread_t, active_thread_count, thread_array);
2948
2949 if (kr == KERN_SUCCESS) {
2950 *thread_ret = thread_return;
2951 *udata_buffer = buffer;
2952 *size = buf_size;
2953 *num_udata = num_knotes;
2954 } else {
2955 if (thread_return != THREAD_NULL) {
2956 thread_deallocate(thread: thread_return);
2957 }
2958 kfree_data(buffer, buf_size);
2959 }
2960
2961 return kr;
2962}
2963
2964#if CONFIG_SECLUDED_MEMORY
2965extern void task_set_can_use_secluded_mem_locked(
2966 task_t task,
2967 boolean_t can_use_secluded_mem);
2968#endif /* CONFIG_SECLUDED_MEMORY */
2969
2970#if MACH_ASSERT
2971int debug4k_panic_on_terminate = 0;
2972#endif /* MACH_ASSERT */
2973kern_return_t
2974task_terminate_internal(
2975 task_t task)
2976{
2977 thread_t thread, self;
2978 task_t self_task;
2979 boolean_t interrupt_save;
2980 int pid = 0;
2981
2982 assert(task != kernel_task);
2983
2984 self = current_thread();
2985 self_task = current_task();
2986
2987 /*
2988 * Get the task locked and make sure that we are not racing
2989 * with someone else trying to terminate us.
2990 */
2991 if (task == self_task) {
2992 task_lock(task);
2993 } else if (task < self_task) {
2994 task_lock(task);
2995 task_lock(task: self_task);
2996 } else {
2997 task_lock(task: self_task);
2998 task_lock(task);
2999 }
3000
3001#if CONFIG_SECLUDED_MEMORY
3002 if (task->task_can_use_secluded_mem) {
3003 task_set_can_use_secluded_mem_locked(task, FALSE);
3004 }
3005 task->task_could_use_secluded_mem = FALSE;
3006 task->task_could_also_use_secluded_mem = FALSE;
3007
3008 if (task->task_suppressed_secluded) {
3009 stop_secluded_suppression(task);
3010 }
3011#endif /* CONFIG_SECLUDED_MEMORY */
3012
3013 if (!task->active) {
3014 /*
3015 * Task is already being terminated.
3016 * Just return an error. If we are dying, this will
3017 * just get us to our AST special handler and that
3018 * will get us to finalize the termination of ourselves.
3019 */
3020 task_unlock(task);
3021 if (self_task != task) {
3022 task_unlock(task: self_task);
3023 }
3024
3025 return KERN_FAILURE;
3026 }
3027
3028 if (task_corpse_pending_report(task)) {
3029 /*
3030 * Task is marked for reporting as corpse.
3031 * Just return an error. This will
3032 * just get us to our AST special handler and that
3033 * will get us to finish the path to death
3034 */
3035 task_unlock(task);
3036 if (self_task != task) {
3037 task_unlock(task: self_task);
3038 }
3039
3040 return KERN_FAILURE;
3041 }
3042
3043 if (self_task != task) {
3044 task_unlock(task: self_task);
3045 }
3046
3047 /*
3048 * Make sure the current thread does not get aborted out of
3049 * the waits inside these operations.
3050 */
3051 interrupt_save = thread_interrupt_level(THREAD_UNINT);
3052
3053 /*
3054 * Indicate that we want all the threads to stop executing
3055 * at user space by holding the task (we would have held
3056 * each thread independently in thread_terminate_internal -
3057 * but this way we may be more likely to already find it
3058 * held there). Mark the task inactive, and prevent
3059 * further task operations via the task port.
3060 *
3061 * The vm_map and ipc_space must exist until this function returns,
3062 * convert_port_to_{map,space}_with_flavor relies on this behavior.
3063 */
3064 task_hold_locked(task);
3065 task->active = FALSE;
3066 ipc_task_disable(task);
3067
3068#if CONFIG_EXCLAVES
3069 task_stop_conclave(task, false);
3070#endif /* CONFIG_EXCLAVES */
3071
3072#if CONFIG_TELEMETRY
3073 /*
3074 * Notify telemetry that this task is going away.
3075 */
3076 telemetry_task_ctl_locked(task, TF_TELEMETRY, enable_disable: 0);
3077#endif
3078
3079 /*
3080 * Terminate each thread in the task.
3081 */
3082 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3083 thread_terminate_internal(thread);
3084 }
3085
3086#ifdef MACH_BSD
3087 void *bsd_info = get_bsdtask_info(task);
3088 if (bsd_info != NULL) {
3089 pid = proc_pid(p: bsd_info);
3090 }
3091#endif /* MACH_BSD */
3092
3093 task_unlock(task);
3094
3095 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
3096 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
3097
3098 /* Early object reap phase */
3099
3100// PR-17045188: Revisit implementation
3101// task_partial_reap(task, pid);
3102
3103#if CONFIG_TASKWATCH
3104 /*
3105 * remove all task watchers
3106 */
3107 task_removewatchers(task);
3108
3109#endif /* CONFIG_TASKWATCH */
3110
3111 /*
3112 * Destroy all synchronizers owned by the task.
3113 */
3114 task_synchronizer_destroy_all(task);
3115
3116 /*
3117 * Clear the watchport boost on the task.
3118 */
3119 task_remove_turnstile_watchports(task);
3120
3121 /* let iokit know 1 */
3122 iokit_task_terminate(task, phase: 1);
3123
3124 /*
3125 * Destroy the IPC space, leaving just a reference for it.
3126 */
3127 ipc_space_terminate(space: task->itk_space);
3128
3129#if 00
3130 /* if some ledgers go negative on tear-down again... */
3131 ledger_disable_panic_on_negative(task->map->pmap->ledger,
3132 task_ledgers.phys_footprint);
3133 ledger_disable_panic_on_negative(task->map->pmap->ledger,
3134 task_ledgers.internal);
3135 ledger_disable_panic_on_negative(task->map->pmap->ledger,
3136 task_ledgers.iokit_mapped);
3137 ledger_disable_panic_on_negative(task->map->pmap->ledger,
3138 task_ledgers.alternate_accounting);
3139 ledger_disable_panic_on_negative(task->map->pmap->ledger,
3140 task_ledgers.alternate_accounting_compressed);
3141#endif
3142
3143#if CONFIG_DEFERRED_RECLAIM
3144 /*
3145 * Remove this tasks reclaim buffer from global queues.
3146 */
3147 if (task->deferred_reclamation_metadata != NULL) {
3148 vm_deferred_reclamation_buffer_uninstall(metadata: task->deferred_reclamation_metadata);
3149 }
3150#endif /* CONFIG_DEFERRED_RECLAIM */
3151
3152 /*
3153 * If the current thread is a member of the task
3154 * being terminated, then the last reference to
3155 * the task will not be dropped until the thread
3156 * is finally reaped. To avoid incurring the
3157 * expense of removing the address space regions
3158 * at reap time, we do it explictly here.
3159 */
3160
3161#if MACH_ASSERT
3162 /*
3163 * Identify the pmap's process, in case the pmap ledgers drift
3164 * and we have to report it.
3165 */
3166 char procname[17];
3167 void *proc = get_bsdtask_info(task);
3168 if (proc) {
3169 pid = proc_pid(proc);
3170 proc_name_kdp(proc, procname, sizeof(procname));
3171 } else {
3172 pid = 0;
3173 strlcpy(procname, "<unknown>", sizeof(procname));
3174 }
3175 pmap_set_process(task->map->pmap, pid, procname);
3176 if (vm_map_page_shift(task->map) < (int)PAGE_SHIFT) {
3177 DEBUG4K_LIFE("map %p procname: %s\n", task->map, procname);
3178 if (debug4k_panic_on_terminate) {
3179 panic("DEBUG4K: %s:%d %d[%s] map %p", __FUNCTION__, __LINE__, pid, procname, task->map);
3180 }
3181 }
3182#endif /* MACH_ASSERT */
3183
3184 vm_map_terminate(map: task->map);
3185
3186 /* release our shared region */
3187 vm_shared_region_set(task, NULL);
3188
3189#if __has_feature(ptrauth_calls)
3190 task_set_shared_region_id(task, NULL);
3191#endif /* __has_feature(ptrauth_calls) */
3192
3193 lck_mtx_lock(lck: &tasks_threads_lock);
3194 queue_remove(&tasks, task, task_t, tasks);
3195 queue_enter(&terminated_tasks, task, task_t, tasks);
3196 tasks_count--;
3197 terminated_tasks_count++;
3198 lck_mtx_unlock(lck: &tasks_threads_lock);
3199
3200 /*
3201 * We no longer need to guard against being aborted, so restore
3202 * the previous interruptible state.
3203 */
3204 thread_interrupt_level(interruptible: interrupt_save);
3205
3206#if CONFIG_CPU_COUNTERS
3207 /* force the task to release all ctrs */
3208 if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS) {
3209 kpc_force_all_ctrs(task, 0);
3210 }
3211#endif /* CONFIG_CPU_COUNTERS */
3212
3213#if CONFIG_COALITIONS
3214 /*
3215 * Leave the coalition for corpse task or task that
3216 * never had any active threads (e.g. fork, exec failure).
3217 * For task with active threads, the task will be removed
3218 * from coalition by last terminating thread.
3219 */
3220 if (task->active_thread_count == 0) {
3221 coalitions_remove_task(task);
3222 }
3223#endif
3224
3225#if CONFIG_FREEZE
3226 extern int vm_compressor_available;
3227 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE && vm_compressor_available) {
3228 task_disown_frozen_csegs(task);
3229 assert(queue_empty(&task->task_frozen_cseg_q));
3230 }
3231#endif /* CONFIG_FREEZE */
3232
3233
3234 /*
3235 * Get rid of the task active reference on itself.
3236 */
3237 task_deallocate_grp(task, TASK_GRP_INTERNAL);
3238
3239 return KERN_SUCCESS;
3240}
3241
3242void
3243tasks_system_suspend(boolean_t suspend)
3244{
3245 task_t task;
3246
3247 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SUSPEND_USERSPACE) |
3248 (suspend ? DBG_FUNC_START : DBG_FUNC_END));
3249
3250 lck_mtx_lock(lck: &tasks_threads_lock);
3251 assert(tasks_suspend_state != suspend);
3252 tasks_suspend_state = suspend;
3253 queue_iterate(&tasks, task, task_t, tasks) {
3254 if (task == kernel_task) {
3255 continue;
3256 }
3257 suspend ? task_suspend_internal(task) : task_resume_internal(task);
3258 }
3259 lck_mtx_unlock(lck: &tasks_threads_lock);
3260}
3261
3262/*
3263 * task_start_halt:
3264 *
3265 * Shut the current task down (except for the current thread) in
3266 * preparation for dramatic changes to the task (probably exec).
3267 * We hold the task and mark all other threads in the task for
3268 * termination.
3269 */
3270kern_return_t
3271task_start_halt(task_t task)
3272{
3273 kern_return_t kr = KERN_SUCCESS;
3274 task_lock(task);
3275 kr = task_start_halt_locked(task, FALSE);
3276 task_unlock(task);
3277 return kr;
3278}
3279
3280static kern_return_t
3281task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
3282{
3283 thread_t thread, self;
3284 uint64_t dispatchqueue_offset;
3285
3286 assert(task != kernel_task);
3287
3288 self = current_thread();
3289
3290 if (task != get_threadtask(self) && !task_is_a_corpse_fork(task)) {
3291 return KERN_INVALID_ARGUMENT;
3292 }
3293
3294 if (!should_mark_corpse &&
3295 (task->halting || !task->active || !self->active)) {
3296 /*
3297 * Task or current thread is already being terminated.
3298 * Hurry up and return out of the current kernel context
3299 * so that we run our AST special handler to terminate
3300 * ourselves. If should_mark_corpse is set, the corpse
3301 * creation might have raced with exec, let the corpse
3302 * creation continue, once the current thread reaches AST
3303 * thread in exec will be woken up from task_complete_halt.
3304 * Exec will fail cause the proc was marked for exit.
3305 * Once the thread in exec reaches AST, it will call proc_exit
3306 * and deliver the EXC_CORPSE_NOTIFY.
3307 */
3308 return KERN_FAILURE;
3309 }
3310
3311 /* Thread creation will fail after this point of no return. */
3312 task->halting = TRUE;
3313
3314 /*
3315 * Mark all the threads to keep them from starting any more
3316 * user-level execution. The thread_terminate_internal code
3317 * would do this on a thread by thread basis anyway, but this
3318 * gives us a better chance of not having to wait there.
3319 */
3320 task_hold_locked(task);
3321
3322#if CONFIG_EXCLAVES
3323 if (should_mark_corpse) {
3324 void *crash_info_ptr = task_get_corpseinfo(task);
3325 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3326 if (crash_info_ptr != NULL && thread->th_exclaves_ipc_buffer != NULL) {
3327 struct thread_crash_exclaves_info info = { 0 };
3328
3329 info.tcei_flags = kExclaveRPCActive;
3330 info.tcei_scid = thread->th_exclaves_scheduling_context_id;
3331 info.tcei_thread_id = thread->thread_id;
3332
3333 kcdata_push_data(crash_info_ptr,
3334 STACKSHOT_KCTYPE_KERN_EXCLAVES_CRASH_THREADINFO,
3335 sizeof(struct thread_crash_exclaves_info), &info);
3336 }
3337 }
3338
3339 task_unlock(task);
3340 task_stop_conclave(task, true);
3341 task_lock(task);
3342 }
3343#endif /* CONFIG_EXCLAVES */
3344
3345 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(get_bsdtask_info(task));
3346 /*
3347 * Terminate all the other threads in the task.
3348 */
3349 queue_iterate(&task->threads, thread, thread_t, task_threads)
3350 {
3351 /*
3352 * Remove priority throttles for threads to terminate timely. This has
3353 * to be done after task_hold_locked() traps all threads to AST, but before
3354 * threads are marked inactive in thread_terminate_internal(). Takes thread
3355 * mutex lock.
3356 *
3357 * We need task_is_a_corpse() check so that we don't accidently update policy
3358 * for tasks that are doing posix_spawn().
3359 *
3360 * See: thread_policy_update_tasklocked().
3361 */
3362 if (task_is_a_corpse(task)) {
3363 proc_set_thread_policy(thread, TASK_POLICY_ATTRIBUTE,
3364 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
3365 }
3366
3367 if (should_mark_corpse) {
3368 thread_mtx_lock(thread);
3369 thread->inspection = TRUE;
3370 thread_mtx_unlock(thread);
3371 }
3372 if (thread != self) {
3373 thread_terminate_internal(thread);
3374 }
3375 }
3376 task->dispatchqueue_offset = dispatchqueue_offset;
3377
3378 task_release_locked(task);
3379
3380 return KERN_SUCCESS;
3381}
3382
3383
3384/*
3385 * task_complete_halt:
3386 *
3387 * Complete task halt by waiting for threads to terminate, then clean
3388 * up task resources (VM, port namespace, etc...) and then let the
3389 * current thread go in the (practically empty) task context.
3390 *
3391 * Note: task->halting flag is not cleared in order to avoid creation
3392 * of new thread in old exec'ed task.
3393 */
3394void
3395task_complete_halt(task_t task)
3396{
3397 task_lock(task);
3398 assert(task->halting);
3399 assert(task == current_task());
3400
3401 /*
3402 * Wait for the other threads to get shut down.
3403 * When the last other thread is reaped, we'll be
3404 * woken up.
3405 */
3406 if (task->thread_count > 1) {
3407 assert_wait(event: (event_t)&task->halting, THREAD_UNINT);
3408 task_unlock(task);
3409 thread_block(THREAD_CONTINUE_NULL);
3410 } else {
3411 task_unlock(task);
3412 }
3413
3414#if CONFIG_DEFERRED_RECLAIM
3415 if (task->deferred_reclamation_metadata) {
3416 vm_deferred_reclamation_buffer_uninstall(
3417 metadata: task->deferred_reclamation_metadata);
3418 vm_deferred_reclamation_buffer_deallocate(
3419 metadata: task->deferred_reclamation_metadata);
3420 task->deferred_reclamation_metadata = NULL;
3421 }
3422#endif /* CONFIG_DEFERRED_RECLAIM */
3423
3424 /*
3425 * Give the machine dependent code a chance
3426 * to perform cleanup of task-level resources
3427 * associated with the current thread before
3428 * ripping apart the task.
3429 */
3430 machine_task_terminate(task);
3431
3432 /*
3433 * Destroy all synchronizers owned by the task.
3434 */
3435 task_synchronizer_destroy_all(task);
3436
3437 /* let iokit know 1 */
3438 iokit_task_terminate(task, phase: 1);
3439
3440 /*
3441 * Terminate the IPC space. A long time ago,
3442 * this used to be ipc_space_clean() which would
3443 * keep the space active but hollow it.
3444 *
3445 * We really do not need this semantics given
3446 * tasks die with exec now.
3447 */
3448 ipc_space_terminate(space: task->itk_space);
3449
3450 /*
3451 * Clean out the address space, as we are going to be
3452 * getting a new one.
3453 */
3454 vm_map_terminate(map: task->map);
3455
3456 /*
3457 * Kick out any IOKitUser handles to the task. At best they're stale,
3458 * at worst someone is racing a SUID exec.
3459 */
3460 /* let iokit know 2 */
3461 iokit_task_terminate(task, phase: 2);
3462}
3463
3464#ifdef CONFIG_TASK_SUSPEND_STATS
3465
3466static void
3467_task_mark_suspend_source(task_t task)
3468{
3469 int idx;
3470 task_suspend_stats_t stats;
3471 task_suspend_source_t source;
3472 task_lock_assert_owned(task);
3473 stats = &task->t_suspend_stats;
3474
3475 idx = stats->tss_count % TASK_SUSPEND_SOURCES_MAX;
3476 source = &task->t_suspend_sources[idx];
3477 bzero(source, sizeof(*source));
3478
3479 source->tss_time = mach_absolute_time();
3480 source->tss_tid = current_thread()->thread_id;
3481 source->tss_pid = task_pid(current_task());
3482 task_best_name(current_task(), source->tss_procname, sizeof(source->tss_procname));
3483
3484 stats->tss_count++;
3485}
3486
3487static inline void
3488_task_mark_suspend_start(task_t task)
3489{
3490 task_lock_assert_owned(task);
3491 task->t_suspend_stats.tss_last_start = mach_absolute_time();
3492}
3493
3494static inline void
3495_task_mark_suspend_end(task_t task)
3496{
3497 task_lock_assert_owned(task);
3498 task->t_suspend_stats.tss_last_end = mach_absolute_time();
3499 task->t_suspend_stats.tss_duration += (task->t_suspend_stats.tss_last_end -
3500 task->t_suspend_stats.tss_last_start);
3501}
3502
3503static kern_return_t
3504_task_get_suspend_stats_locked(task_t task, task_suspend_stats_t stats)
3505{
3506 if (task == TASK_NULL || stats == NULL) {
3507 return KERN_INVALID_ARGUMENT;
3508 }
3509 task_lock_assert_owned(task);
3510 memcpy(stats, &task->t_suspend_stats, sizeof(task->t_suspend_stats));
3511 return KERN_SUCCESS;
3512}
3513
3514static kern_return_t
3515_task_get_suspend_sources_locked(task_t task, task_suspend_source_t sources)
3516{
3517 if (task == TASK_NULL || sources == NULL) {
3518 return KERN_INVALID_ARGUMENT;
3519 }
3520 task_lock_assert_owned(task);
3521 memcpy(sources, task->t_suspend_sources,
3522 sizeof(struct task_suspend_source_s) * TASK_SUSPEND_SOURCES_MAX);
3523 return KERN_SUCCESS;
3524}
3525
3526#endif /* CONFIG_TASK_SUSPEND_STATS */
3527
3528kern_return_t
3529task_get_suspend_stats(task_t task, task_suspend_stats_t stats)
3530{
3531#ifdef CONFIG_TASK_SUSPEND_STATS
3532 kern_return_t kr;
3533 if (task == TASK_NULL || stats == NULL) {
3534 return KERN_INVALID_ARGUMENT;
3535 }
3536 task_lock(task);
3537 kr = _task_get_suspend_stats_locked(task, stats);
3538 task_unlock(task);
3539 return kr;
3540#else /* CONFIG_TASK_SUSPEND_STATS */
3541 (void)task;
3542 (void)stats;
3543 return KERN_NOT_SUPPORTED;
3544#endif
3545}
3546
3547kern_return_t
3548task_get_suspend_stats_kdp(task_t task, task_suspend_stats_t stats)
3549{
3550#ifdef CONFIG_TASK_SUSPEND_STATS
3551 if (task == TASK_NULL || stats == NULL) {
3552 return KERN_INVALID_ARGUMENT;
3553 }
3554 memcpy(stats, &task->t_suspend_stats, sizeof(task->t_suspend_stats));
3555 return KERN_SUCCESS;
3556#else /* CONFIG_TASK_SUSPEND_STATS */
3557#pragma unused(task, stats)
3558 return KERN_NOT_SUPPORTED;
3559#endif /* CONFIG_TASK_SUSPEND_STATS */
3560}
3561
3562kern_return_t
3563task_get_suspend_sources(task_t task, task_suspend_source_array_t sources)
3564{
3565#ifdef CONFIG_TASK_SUSPEND_STATS
3566 kern_return_t kr;
3567 if (task == TASK_NULL || sources == NULL) {
3568 return KERN_INVALID_ARGUMENT;
3569 }
3570 task_lock(task);
3571 kr = _task_get_suspend_sources_locked(task, sources);
3572 task_unlock(task);
3573 return kr;
3574#else /* CONFIG_TASK_SUSPEND_STATS */
3575 (void)task;
3576 (void)sources;
3577 return KERN_NOT_SUPPORTED;
3578#endif
3579}
3580
3581kern_return_t
3582task_get_suspend_sources_kdp(task_t task, task_suspend_source_array_t sources)
3583{
3584#ifdef CONFIG_TASK_SUSPEND_STATS
3585 if (task == TASK_NULL || sources == NULL) {
3586 return KERN_INVALID_ARGUMENT;
3587 }
3588 memcpy(sources, task->t_suspend_sources,
3589 sizeof(struct task_suspend_source_s) * TASK_SUSPEND_SOURCES_MAX);
3590 return KERN_SUCCESS;
3591#else /* CONFIG_TASK_SUSPEND_STATS */
3592#pragma unused(task, sources)
3593 return KERN_NOT_SUPPORTED;
3594#endif
3595}
3596
3597/*
3598 * task_hold_locked:
3599 *
3600 * Suspend execution of the specified task.
3601 * This is a recursive-style suspension of the task, a count of
3602 * suspends is maintained.
3603 *
3604 * CONDITIONS: the task is locked and active.
3605 */
3606void
3607task_hold_locked(
3608 task_t task)
3609{
3610 thread_t thread;
3611 void *bsd_info = get_bsdtask_info(task);
3612
3613 assert(task->active);
3614
3615 if (task->suspend_count++ > 0) {
3616 return;
3617 }
3618
3619 if (bsd_info) {
3620 workq_proc_suspended(p: bsd_info);
3621 }
3622
3623 /*
3624 * Iterate through all the threads and hold them.
3625 */
3626 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3627 thread_mtx_lock(thread);
3628 thread_hold(thread);
3629 thread_mtx_unlock(thread);
3630 }
3631
3632#ifdef CONFIG_TASK_SUSPEND_STATS
3633 _task_mark_suspend_start(task);
3634#endif
3635}
3636
3637/*
3638 * task_hold_and_wait
3639 *
3640 * Same as the internal routine above, except that is must lock
3641 * and verify that the task is active. This differs from task_suspend
3642 * in that it places a kernel hold on the task rather than just a
3643 * user-level hold. This keeps users from over resuming and setting
3644 * it running out from under the kernel.
3645 *
3646 * CONDITIONS: the caller holds a reference on the task
3647 */
3648kern_return_t
3649task_hold_and_wait(
3650 task_t task)
3651{
3652 if (task == TASK_NULL) {
3653 return KERN_INVALID_ARGUMENT;
3654 }
3655
3656 task_lock(task);
3657 if (!task->active) {
3658 task_unlock(task);
3659 return KERN_FAILURE;
3660 }
3661
3662#ifdef CONFIG_TASK_SUSPEND_STATS
3663 _task_mark_suspend_source(task);
3664#endif /* CONFIG_TASK_SUSPEND_STATS */
3665
3666 task_hold_locked(task);
3667 task_wait_locked(task, FALSE);
3668 task_unlock(task);
3669
3670 return KERN_SUCCESS;
3671}
3672
3673/*
3674 * task_wait_locked:
3675 *
3676 * Wait for all threads in task to stop.
3677 *
3678 * Conditions:
3679 * Called with task locked, active, and held.
3680 */
3681void
3682task_wait_locked(
3683 task_t task,
3684 boolean_t until_not_runnable)
3685{
3686 thread_t thread, self;
3687
3688 assert(task->active);
3689 assert(task->suspend_count > 0);
3690
3691 self = current_thread();
3692
3693 /*
3694 * Iterate through all the threads and wait for them to
3695 * stop. Do not wait for the current thread if it is within
3696 * the task.
3697 */
3698 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3699 if (thread != self) {
3700 thread_wait(thread, until_not_runnable);
3701 }
3702 }
3703}
3704
3705boolean_t
3706task_is_app_suspended(task_t task)
3707{
3708 return task->pidsuspended;
3709}
3710
3711/*
3712 * task_release_locked:
3713 *
3714 * Release a kernel hold on a task.
3715 *
3716 * CONDITIONS: the task is locked and active
3717 */
3718void
3719task_release_locked(
3720 task_t task)
3721{
3722 thread_t thread;
3723 void *bsd_info = get_bsdtask_info(task);
3724
3725 assert(task->active);
3726 assert(task->suspend_count > 0);
3727
3728 if (--task->suspend_count > 0) {
3729 return;
3730 }
3731
3732 if (bsd_info) {
3733 workq_proc_resumed(p: bsd_info);
3734 }
3735
3736 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3737 thread_mtx_lock(thread);
3738 thread_release(thread);
3739 thread_mtx_unlock(thread);
3740 }
3741
3742#if CONFIG_TASK_SUSPEND_STATS
3743 _task_mark_suspend_end(task);
3744#endif
3745}
3746
3747/*
3748 * task_release:
3749 *
3750 * Same as the internal routine above, except that it must lock
3751 * and verify that the task is active.
3752 *
3753 * CONDITIONS: The caller holds a reference to the task
3754 */
3755kern_return_t
3756task_release(
3757 task_t task)
3758{
3759 if (task == TASK_NULL) {
3760 return KERN_INVALID_ARGUMENT;
3761 }
3762
3763 task_lock(task);
3764
3765 if (!task->active) {
3766 task_unlock(task);
3767
3768 return KERN_FAILURE;
3769 }
3770
3771 task_release_locked(task);
3772 task_unlock(task);
3773
3774 return KERN_SUCCESS;
3775}
3776
3777static kern_return_t
3778task_threads_internal(
3779 task_t task,
3780 thread_act_array_t *threads_out,
3781 mach_msg_type_number_t *countp,
3782 mach_thread_flavor_t flavor)
3783{
3784 mach_msg_type_number_t actual, count, count_needed;
3785 thread_t *thread_list;
3786 thread_t thread;
3787 unsigned int i;
3788
3789 count = 0;
3790 thread_list = NULL;
3791
3792 if (task == TASK_NULL) {
3793 return KERN_INVALID_ARGUMENT;
3794 }
3795
3796 assert(flavor <= THREAD_FLAVOR_INSPECT);
3797
3798 for (;;) {
3799 task_lock(task);
3800 if (!task->active) {
3801 task_unlock(task);
3802
3803 kfree_type(thread_t, count, thread_list);
3804 return KERN_FAILURE;
3805 }
3806
3807 count_needed = actual = task->thread_count;
3808 if (count_needed <= count) {
3809 break;
3810 }
3811
3812 /* unlock the task and allocate more memory */
3813 task_unlock(task);
3814
3815 kfree_type(thread_t, count, thread_list);
3816 count = count_needed;
3817 thread_list = kalloc_type(thread_t, count, Z_WAITOK);
3818
3819 if (thread_list == NULL) {
3820 return KERN_RESOURCE_SHORTAGE;
3821 }
3822 }
3823
3824 i = 0;
3825 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3826 assert(i < actual);
3827 thread_reference(thread);
3828 thread_list[i++] = thread;
3829 }
3830
3831 count_needed = actual;
3832
3833 /* can unlock task now that we've got the thread refs */
3834 task_unlock(task);
3835
3836 if (actual == 0) {
3837 /* no threads, so return null pointer and deallocate memory */
3838
3839 *threads_out = NULL;
3840 *countp = 0;
3841 kfree_type(thread_t, count, thread_list);
3842 } else {
3843 /* if we allocated too much, must copy */
3844 if (count_needed < count) {
3845 void *newaddr;
3846
3847 newaddr = kalloc_type(thread_t, count_needed, Z_WAITOK);
3848 if (newaddr == NULL) {
3849 for (i = 0; i < actual; ++i) {
3850 thread_deallocate(thread: thread_list[i]);
3851 }
3852 kfree_type(thread_t, count, thread_list);
3853 return KERN_RESOURCE_SHORTAGE;
3854 }
3855
3856 bcopy(src: thread_list, dst: newaddr, n: count_needed * sizeof(thread_t));
3857 kfree_type(thread_t, count, thread_list);
3858 thread_list = (thread_t *)newaddr;
3859 }
3860
3861 *threads_out = thread_list;
3862 *countp = actual;
3863
3864 /* do the conversion that Mig should handle */
3865
3866 switch (flavor) {
3867 case THREAD_FLAVOR_CONTROL:
3868 if (task == current_task()) {
3869 for (i = 0; i < actual; ++i) {
3870 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port_pinned(thread_list[i]);
3871 }
3872 } else {
3873 for (i = 0; i < actual; ++i) {
3874 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
3875 }
3876 }
3877 break;
3878 case THREAD_FLAVOR_READ:
3879 for (i = 0; i < actual; ++i) {
3880 ((ipc_port_t *) thread_list)[i] = convert_thread_read_to_port(thread_list[i]);
3881 }
3882 break;
3883 case THREAD_FLAVOR_INSPECT:
3884 for (i = 0; i < actual; ++i) {
3885 ((ipc_port_t *) thread_list)[i] = convert_thread_inspect_to_port(thread_list[i]);
3886 }
3887 break;
3888 }
3889 }
3890
3891 return KERN_SUCCESS;
3892}
3893
3894kern_return_t
3895task_threads(
3896 task_t task,
3897 thread_act_array_t *threads_out,
3898 mach_msg_type_number_t *count)
3899{
3900 return task_threads_internal(task, threads_out, countp: count, THREAD_FLAVOR_CONTROL);
3901}
3902
3903
3904kern_return_t
3905task_threads_from_user(
3906 mach_port_t port,
3907 thread_act_array_t *threads_out,
3908 mach_msg_type_number_t *count)
3909{
3910 ipc_kobject_type_t kotype;
3911 kern_return_t kr;
3912
3913 task_t task = convert_port_to_task_inspect_no_eval(port);
3914
3915 if (task == TASK_NULL) {
3916 return KERN_INVALID_ARGUMENT;
3917 }
3918
3919 kotype = ip_kotype(port);
3920
3921 switch (kotype) {
3922 case IKOT_TASK_CONTROL:
3923 kr = task_threads_internal(task, threads_out, countp: count, THREAD_FLAVOR_CONTROL);
3924 break;
3925 case IKOT_TASK_READ:
3926 kr = task_threads_internal(task, threads_out, countp: count, THREAD_FLAVOR_READ);
3927 break;
3928 case IKOT_TASK_INSPECT:
3929 kr = task_threads_internal(task, threads_out, countp: count, THREAD_FLAVOR_INSPECT);
3930 break;
3931 default:
3932 panic("strange kobject type");
3933 break;
3934 }
3935
3936 task_deallocate(task);
3937 return kr;
3938}
3939
3940#define TASK_HOLD_NORMAL 0
3941#define TASK_HOLD_PIDSUSPEND 1
3942#define TASK_HOLD_LEGACY 2
3943#define TASK_HOLD_LEGACY_ALL 3
3944
3945static kern_return_t
3946place_task_hold(
3947 task_t task,
3948 int mode)
3949{
3950 if (!task->active && !task_is_a_corpse(task)) {
3951 return KERN_FAILURE;
3952 }
3953
3954 /* Return success for corpse task */
3955 if (task_is_a_corpse(task)) {
3956 return KERN_SUCCESS;
3957 }
3958
3959 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_SUSPEND),
3960 task_pid(task),
3961 task->thread_count > 0 ?((thread_t)queue_first(&task->threads))->thread_id : 0,
3962 task->user_stop_count, task->user_stop_count + 1);
3963
3964#if MACH_ASSERT
3965 current_task()->suspends_outstanding++;
3966#endif
3967
3968 if (mode == TASK_HOLD_LEGACY) {
3969 task->legacy_stop_count++;
3970 }
3971
3972#ifdef CONFIG_TASK_SUSPEND_STATS
3973 _task_mark_suspend_source(task);
3974#endif /* CONFIG_TASK_SUSPEND_STATS */
3975
3976 if (task->user_stop_count++ > 0) {
3977 /*
3978 * If the stop count was positive, the task is
3979 * already stopped and we can exit.
3980 */
3981 return KERN_SUCCESS;
3982 }
3983
3984 /*
3985 * Put a kernel-level hold on the threads in the task (all
3986 * user-level task suspensions added together represent a
3987 * single kernel-level hold). We then wait for the threads
3988 * to stop executing user code.
3989 */
3990 task_hold_locked(task);
3991 task_wait_locked(task, FALSE);
3992
3993 return KERN_SUCCESS;
3994}
3995
3996static kern_return_t
3997release_task_hold(
3998 task_t task,
3999 int mode)
4000{
4001 boolean_t release = FALSE;
4002
4003 if (!task->active && !task_is_a_corpse(task)) {
4004 return KERN_FAILURE;
4005 }
4006
4007 /* Return success for corpse task */
4008 if (task_is_a_corpse(task)) {
4009 return KERN_SUCCESS;
4010 }
4011
4012 if (mode == TASK_HOLD_PIDSUSPEND) {
4013 if (task->pidsuspended == FALSE) {
4014 return KERN_FAILURE;
4015 }
4016 task->pidsuspended = FALSE;
4017 }
4018
4019 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
4020 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
4021 MACHDBG_CODE(DBG_MACH_IPC, MACH_TASK_RESUME) | DBG_FUNC_NONE,
4022 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
4023 task->user_stop_count, mode, task->legacy_stop_count);
4024
4025#if MACH_ASSERT
4026 /*
4027 * This is obviously not robust; if we suspend one task and then resume a different one,
4028 * we'll fly under the radar. This is only meant to catch the common case of a crashed
4029 * or buggy suspender.
4030 */
4031 current_task()->suspends_outstanding--;
4032#endif
4033
4034 if (mode == TASK_HOLD_LEGACY_ALL) {
4035 if (task->legacy_stop_count >= task->user_stop_count) {
4036 task->user_stop_count = 0;
4037 release = TRUE;
4038 } else {
4039 task->user_stop_count -= task->legacy_stop_count;
4040 }
4041 task->legacy_stop_count = 0;
4042 } else {
4043 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) {
4044 task->legacy_stop_count--;
4045 }
4046 if (--task->user_stop_count == 0) {
4047 release = TRUE;
4048 }
4049 }
4050 } else {
4051 return KERN_FAILURE;
4052 }
4053
4054 /*
4055 * Release the task if necessary.
4056 */
4057 if (release) {
4058 task_release_locked(task);
4059 }
4060
4061 return KERN_SUCCESS;
4062}
4063
4064boolean_t
4065get_task_suspended(task_t task)
4066{
4067 return 0 != task->user_stop_count;
4068}
4069
4070/*
4071 * task_suspend:
4072 *
4073 * Implement an (old-fashioned) user-level suspension on a task.
4074 *
4075 * Because the user isn't expecting to have to manage a suspension
4076 * token, we'll track it for him in the kernel in the form of a naked
4077 * send right to the task's resume port. All such send rights
4078 * account for a single suspension against the task (unlike task_suspend2()
4079 * where each caller gets a unique suspension count represented by a
4080 * unique send-once right).
4081 *
4082 * Conditions:
4083 * The caller holds a reference to the task
4084 */
4085kern_return_t
4086task_suspend(
4087 task_t task)
4088{
4089 kern_return_t kr;
4090 mach_port_t port;
4091 mach_port_name_t name;
4092
4093 if (task == TASK_NULL || task == kernel_task) {
4094 return KERN_INVALID_ARGUMENT;
4095 }
4096
4097 /*
4098 * place a legacy hold on the task.
4099 */
4100 task_lock(task);
4101 kr = place_task_hold(task, TASK_HOLD_LEGACY);
4102 task_unlock(task);
4103
4104 if (kr != KERN_SUCCESS) {
4105 return kr;
4106 }
4107
4108 /*
4109 * Claim a send right on the task resume port, and request a no-senders
4110 * notification on that port (if none outstanding).
4111 */
4112 itk_lock(task);
4113 port = task->itk_resume;
4114 if (port == IP_NULL) {
4115 port = ipc_kobject_alloc_port(kobject: task, type: IKOT_TASK_RESUME,
4116 options: IPC_KOBJECT_ALLOC_NSREQUEST | IPC_KOBJECT_ALLOC_MAKE_SEND);
4117 task->itk_resume = port;
4118 } else {
4119 (void)ipc_kobject_make_send_nsrequest(port, kobject: task, kotype: IKOT_TASK_RESUME);
4120 }
4121 itk_unlock(task);
4122
4123 /*
4124 * Copyout the send right into the calling task's IPC space. It won't know it is there,
4125 * but we'll look it up when calling a traditional resume. Any IPC operations that
4126 * deallocate the send right will auto-release the suspension.
4127 */
4128 if (IP_VALID(port)) {
4129 kr = ipc_object_copyout(current_space(), ip_to_object(port),
4130 MACH_MSG_TYPE_MOVE_SEND, flags: IPC_OBJECT_COPYOUT_FLAGS_NONE,
4131 NULL, NULL, namep: &name);
4132 } else {
4133 kr = KERN_SUCCESS;
4134 }
4135 if (kr != KERN_SUCCESS) {
4136 printf(format: "warning: %s(%d) failed to copyout suspension "
4137 "token for pid %d with error: %d\n",
4138 proc_name_address(p: get_bsdtask_info(current_task())),
4139 proc_pid(p: get_bsdtask_info(current_task())),
4140 task_pid(task), kr);
4141 }
4142
4143 return kr;
4144}
4145
4146/*
4147 * task_resume:
4148 * Release a user hold on a task.
4149 *
4150 * Conditions:
4151 * The caller holds a reference to the task
4152 */
4153kern_return_t
4154task_resume(
4155 task_t task)
4156{
4157 kern_return_t kr;
4158 mach_port_name_t resume_port_name;
4159 ipc_entry_t resume_port_entry;
4160 ipc_space_t space = current_task()->itk_space;
4161
4162 if (task == TASK_NULL || task == kernel_task) {
4163 return KERN_INVALID_ARGUMENT;
4164 }
4165
4166 /* release a legacy task hold */
4167 task_lock(task);
4168 kr = release_task_hold(task, TASK_HOLD_LEGACY);
4169 task_unlock(task);
4170
4171 itk_lock(task); /* for itk_resume */
4172 is_write_lock(space); /* spin lock */
4173 if (is_active(space) && IP_VALID(task->itk_resume) &&
4174 ipc_hash_lookup(space, ip_to_object(task->itk_resume), namep: &resume_port_name, entryp: &resume_port_entry) == TRUE) {
4175 /*
4176 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
4177 * we are holding one less legacy hold on the task from this caller. If the release failed,
4178 * go ahead and drop all the rights, as someone either already released our holds or the task
4179 * is gone.
4180 */
4181 itk_unlock(task);
4182 if (kr == KERN_SUCCESS) {
4183 ipc_right_dealloc(space, name: resume_port_name, entry: resume_port_entry);
4184 } else {
4185 ipc_right_destroy(space, name: resume_port_name, entry: resume_port_entry, FALSE, guard: 0);
4186 }
4187 /* space unlocked */
4188 } else {
4189 itk_unlock(task);
4190 is_write_unlock(space);
4191 if (kr == KERN_SUCCESS) {
4192 printf(format: "warning: %s(%d) performed out-of-band resume on pid %d\n",
4193 proc_name_address(p: get_bsdtask_info(current_task())), proc_pid(p: get_bsdtask_info(current_task())),
4194 task_pid(task));
4195 }
4196 }
4197
4198 return kr;
4199}
4200
4201/*
4202 * Suspend a task that is already protected by a held lock.
4203 * Making/holding a token/reference/port is the caller's responsibility.
4204 */
4205kern_return_t
4206task_suspend_internal_locked(task_t task)
4207{
4208 if (task == TASK_NULL || task == kernel_task) {
4209 return KERN_INVALID_ARGUMENT;
4210 }
4211
4212 return place_task_hold(task, TASK_HOLD_NORMAL);
4213}
4214
4215/*
4216 * Suspend a task.
4217 * Making/holding a token/reference/port is the caller's responsibility.
4218 */
4219kern_return_t
4220task_suspend_internal(task_t task)
4221{
4222 kern_return_t kr;
4223
4224 if (task == TASK_NULL || task == kernel_task) {
4225 return KERN_INVALID_ARGUMENT;
4226 }
4227
4228 task_lock(task);
4229 kr = task_suspend_internal_locked(task);
4230 task_unlock(task);
4231 return kr;
4232}
4233
4234/*
4235 * Suspend the target task, and return a suspension token. The token
4236 * represents a reference on the suspended task.
4237 */
4238static kern_return_t
4239task_suspend2_grp(
4240 task_t task,
4241 task_suspension_token_t *suspend_token,
4242 task_grp_t grp)
4243{
4244 kern_return_t kr;
4245
4246 kr = task_suspend_internal(task);
4247 if (kr != KERN_SUCCESS) {
4248 *suspend_token = TASK_NULL;
4249 return kr;
4250 }
4251
4252 /*
4253 * Take a reference on the target task and return that to the caller
4254 * as a "suspension token," which can be converted into an SO right to
4255 * the now-suspended task's resume port.
4256 */
4257 task_reference_grp(task, grp);
4258 *suspend_token = task;
4259
4260 return KERN_SUCCESS;
4261}
4262
4263kern_return_t
4264task_suspend2_mig(
4265 task_t task,
4266 task_suspension_token_t *suspend_token)
4267{
4268 return task_suspend2_grp(task, suspend_token, grp: TASK_GRP_MIG);
4269}
4270
4271kern_return_t
4272task_suspend2_external(
4273 task_t task,
4274 task_suspension_token_t *suspend_token)
4275{
4276 return task_suspend2_grp(task, suspend_token, grp: TASK_GRP_EXTERNAL);
4277}
4278
4279/*
4280 * Resume a task that is already protected by a held lock.
4281 * (reference/token/port management is caller's responsibility).
4282 */
4283kern_return_t
4284task_resume_internal_locked(
4285 task_suspension_token_t task)
4286{
4287 if (task == TASK_NULL || task == kernel_task) {
4288 return KERN_INVALID_ARGUMENT;
4289 }
4290
4291 return release_task_hold(task, TASK_HOLD_NORMAL);
4292}
4293
4294/*
4295 * Resume a task.
4296 * (reference/token/port management is caller's responsibility).
4297 */
4298kern_return_t
4299task_resume_internal(
4300 task_suspension_token_t task)
4301{
4302 kern_return_t kr;
4303
4304 if (task == TASK_NULL || task == kernel_task) {
4305 return KERN_INVALID_ARGUMENT;
4306 }
4307
4308 task_lock(task);
4309 kr = task_resume_internal_locked(task);
4310 task_unlock(task);
4311 return kr;
4312}
4313
4314/*
4315 * Resume the task using a suspension token. Consumes the token's ref.
4316 */
4317static kern_return_t
4318task_resume2_grp(
4319 task_suspension_token_t task,
4320 task_grp_t grp)
4321{
4322 kern_return_t kr;
4323
4324 kr = task_resume_internal(task);
4325 task_suspension_token_deallocate_grp(token: task, grp);
4326
4327 return kr;
4328}
4329
4330kern_return_t
4331task_resume2_mig(
4332 task_suspension_token_t task)
4333{
4334 return task_resume2_grp(task, grp: TASK_GRP_MIG);
4335}
4336
4337kern_return_t
4338task_resume2_external(
4339 task_suspension_token_t task)
4340{
4341 return task_resume2_grp(task, grp: TASK_GRP_EXTERNAL);
4342}
4343
4344static void
4345task_suspension_no_senders(ipc_port_t port, mach_port_mscount_t mscount)
4346{
4347 task_t task = convert_port_to_task_suspension_token(port);
4348 kern_return_t kr;
4349
4350 if (task == TASK_NULL) {
4351 return;
4352 }
4353
4354 if (task == kernel_task) {
4355 task_suspension_token_deallocate(token: task);
4356 return;
4357 }
4358
4359 task_lock(task);
4360
4361 kr = ipc_kobject_nsrequest(port, sync: mscount, NULL);
4362 if (kr == KERN_FAILURE) {
4363 /* release all the [remaining] outstanding legacy holds */
4364 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
4365 }
4366
4367 task_unlock(task);
4368
4369 task_suspension_token_deallocate(token: task); /* drop token reference */
4370}
4371
4372/*
4373 * Fires when a send once made
4374 * by convert_task_suspension_token_to_port() dies.
4375 */
4376void
4377task_suspension_send_once(ipc_port_t port)
4378{
4379 task_t task = convert_port_to_task_suspension_token(port);
4380
4381 if (task == TASK_NULL || task == kernel_task) {
4382 return; /* nothing to do */
4383 }
4384
4385 /* release the hold held by this specific send-once right */
4386 task_lock(task);
4387 release_task_hold(task, TASK_HOLD_NORMAL);
4388 task_unlock(task);
4389
4390 task_suspension_token_deallocate(token: task); /* drop token reference */
4391}
4392
4393static kern_return_t
4394task_pidsuspend_locked(task_t task)
4395{
4396 kern_return_t kr;
4397
4398 if (task->pidsuspended) {
4399 kr = KERN_FAILURE;
4400 goto out;
4401 }
4402
4403 task->pidsuspended = TRUE;
4404
4405 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
4406 if (kr != KERN_SUCCESS) {
4407 task->pidsuspended = FALSE;
4408 }
4409out:
4410 return kr;
4411}
4412
4413
4414/*
4415 * task_pidsuspend:
4416 *
4417 * Suspends a task by placing a hold on its threads.
4418 *
4419 * Conditions:
4420 * The caller holds a reference to the task
4421 */
4422kern_return_t
4423task_pidsuspend(
4424 task_t task)
4425{
4426 kern_return_t kr;
4427
4428 if (task == TASK_NULL || task == kernel_task) {
4429 return KERN_INVALID_ARGUMENT;
4430 }
4431
4432 task_lock(task);
4433
4434 kr = task_pidsuspend_locked(task);
4435
4436 task_unlock(task);
4437
4438 if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
4439 iokit_task_app_suspended_changed(task);
4440 }
4441
4442 return kr;
4443}
4444
4445/*
4446 * task_pidresume:
4447 * Resumes a previously suspended task.
4448 *
4449 * Conditions:
4450 * The caller holds a reference to the task
4451 */
4452kern_return_t
4453task_pidresume(
4454 task_t task)
4455{
4456 kern_return_t kr;
4457
4458 if (task == TASK_NULL || task == kernel_task) {
4459 return KERN_INVALID_ARGUMENT;
4460 }
4461
4462 task_lock(task);
4463
4464#if CONFIG_FREEZE
4465
4466 while (task->changing_freeze_state) {
4467 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
4468 task_unlock(task);
4469 thread_block(THREAD_CONTINUE_NULL);
4470
4471 task_lock(task);
4472 }
4473 task->changing_freeze_state = TRUE;
4474#endif
4475
4476 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
4477
4478 task_unlock(task);
4479
4480 if ((KERN_SUCCESS == kr) && task->message_app_suspended) {
4481 iokit_task_app_suspended_changed(task);
4482 }
4483
4484#if CONFIG_FREEZE
4485
4486 task_lock(task);
4487
4488 if (kr == KERN_SUCCESS) {
4489 task->frozen = FALSE;
4490 }
4491 task->changing_freeze_state = FALSE;
4492 thread_wakeup(&task->changing_freeze_state);
4493
4494 task_unlock(task);
4495#endif
4496
4497 return kr;
4498}
4499
4500os_refgrp_decl(static, task_watchports_refgrp, "task_watchports", NULL);
4501
4502/*
4503 * task_add_turnstile_watchports:
4504 * Setup watchports to boost the main thread of the task.
4505 *
4506 * Arguments:
4507 * task: task being spawned
4508 * thread: main thread of task
4509 * portwatch_ports: array of watchports
4510 * portwatch_count: number of watchports
4511 *
4512 * Conditions:
4513 * Nothing locked.
4514 */
4515void
4516task_add_turnstile_watchports(
4517 task_t task,
4518 thread_t thread,
4519 ipc_port_t *portwatch_ports,
4520 uint32_t portwatch_count)
4521{
4522 struct task_watchports *watchports = NULL;
4523 struct task_watchport_elem *previous_elem_array[TASK_MAX_WATCHPORT_COUNT] = {};
4524 os_ref_count_t refs;
4525
4526 /* Check if the task has terminated */
4527 if (!task->active) {
4528 return;
4529 }
4530
4531 assert(portwatch_count <= TASK_MAX_WATCHPORT_COUNT);
4532
4533 watchports = task_watchports_alloc_init(task, thread, count: portwatch_count);
4534
4535 /* Lock the ipc space */
4536 is_write_lock(task->itk_space);
4537
4538 /* Setup watchports to boost the main thread */
4539 refs = task_add_turnstile_watchports_locked(task,
4540 watchports, previous_elem_array, portwatch_ports,
4541 portwatch_count);
4542
4543 /* Drop the space lock */
4544 is_write_unlock(task->itk_space);
4545
4546 if (refs == 0) {
4547 task_watchports_deallocate(watchports);
4548 }
4549
4550 /* Drop the ref on previous_elem_array */
4551 for (uint32_t i = 0; i < portwatch_count && previous_elem_array[i] != NULL; i++) {
4552 task_watchport_elem_deallocate(watchport_elem: previous_elem_array[i]);
4553 }
4554}
4555
4556/*
4557 * task_remove_turnstile_watchports:
4558 * Clear all turnstile boost on the task from watchports.
4559 *
4560 * Arguments:
4561 * task: task being terminated
4562 *
4563 * Conditions:
4564 * Nothing locked.
4565 */
4566void
4567task_remove_turnstile_watchports(
4568 task_t task)
4569{
4570 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4571 struct task_watchports *watchports = NULL;
4572 ipc_port_t port_freelist[TASK_MAX_WATCHPORT_COUNT] = {};
4573 uint32_t portwatch_count;
4574
4575 /* Lock the ipc space */
4576 is_write_lock(task->itk_space);
4577
4578 /* Check if watchport boost exist */
4579 if (task->watchports == NULL) {
4580 is_write_unlock(task->itk_space);
4581 return;
4582 }
4583 watchports = task->watchports;
4584 portwatch_count = watchports->tw_elem_array_count;
4585
4586 refs = task_remove_turnstile_watchports_locked(task, watchports,
4587 port_freelist);
4588
4589 is_write_unlock(task->itk_space);
4590
4591 /* Drop all the port references */
4592 for (uint32_t i = 0; i < portwatch_count && port_freelist[i] != NULL; i++) {
4593 ip_release(port_freelist[i]);
4594 }
4595
4596 /* Clear the task and thread references for task_watchport */
4597 if (refs == 0) {
4598 task_watchports_deallocate(watchports);
4599 }
4600}
4601
4602/*
4603 * task_transfer_turnstile_watchports:
4604 * Transfer all watchport turnstile boost from old task to new task.
4605 *
4606 * Arguments:
4607 * old_task: task calling exec
4608 * new_task: new exec'ed task
4609 * thread: main thread of new task
4610 *
4611 * Conditions:
4612 * Nothing locked.
4613 */
4614void
4615task_transfer_turnstile_watchports(
4616 task_t old_task,
4617 task_t new_task,
4618 thread_t new_thread)
4619{
4620 struct task_watchports *old_watchports = NULL;
4621 struct task_watchports *new_watchports = NULL;
4622 os_ref_count_t old_refs = TASK_MAX_WATCHPORT_COUNT;
4623 os_ref_count_t new_refs = TASK_MAX_WATCHPORT_COUNT;
4624 uint32_t portwatch_count;
4625
4626 if (old_task->watchports == NULL || !new_task->active) {
4627 return;
4628 }
4629
4630 /* Get the watch port count from the old task */
4631 is_write_lock(old_task->itk_space);
4632 if (old_task->watchports == NULL) {
4633 is_write_unlock(old_task->itk_space);
4634 return;
4635 }
4636
4637 portwatch_count = old_task->watchports->tw_elem_array_count;
4638 is_write_unlock(old_task->itk_space);
4639
4640 new_watchports = task_watchports_alloc_init(task: new_task, thread: new_thread, count: portwatch_count);
4641
4642 /* Lock the ipc space for old task */
4643 is_write_lock(old_task->itk_space);
4644
4645 /* Lock the ipc space for new task */
4646 is_write_lock(new_task->itk_space);
4647
4648 /* Check if watchport boost exist */
4649 if (old_task->watchports == NULL || !new_task->active) {
4650 is_write_unlock(new_task->itk_space);
4651 is_write_unlock(old_task->itk_space);
4652 (void)task_watchports_release(new_watchports);
4653 task_watchports_deallocate(watchports: new_watchports);
4654 return;
4655 }
4656
4657 old_watchports = old_task->watchports;
4658 assert(portwatch_count == old_task->watchports->tw_elem_array_count);
4659
4660 /* Setup new task watchports */
4661 new_task->watchports = new_watchports;
4662
4663 for (uint32_t i = 0; i < portwatch_count; i++) {
4664 ipc_port_t port = old_watchports->tw_elem[i].twe_port;
4665
4666 if (port == NULL) {
4667 task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4668 continue;
4669 }
4670
4671 /* Lock the port and check if it has the entry */
4672 ip_mq_lock(port);
4673
4674 task_watchport_elem_init(&new_watchports->tw_elem[i], new_task, port);
4675
4676 if (ipc_port_replace_watchport_elem_conditional_locked(port,
4677 old_watchport_elem: &old_watchports->tw_elem[i], new_watchport_elem: &new_watchports->tw_elem[i]) == KERN_SUCCESS) {
4678 task_watchport_elem_clear(&old_watchports->tw_elem[i]);
4679
4680 task_watchports_retain(new_watchports);
4681 old_refs = task_watchports_release(old_watchports);
4682
4683 /* Check if all ports are cleaned */
4684 if (old_refs == 0) {
4685 old_task->watchports = NULL;
4686 }
4687 } else {
4688 task_watchport_elem_clear(&new_watchports->tw_elem[i]);
4689 }
4690 /* port unlocked by ipc_port_replace_watchport_elem_conditional_locked */
4691 }
4692
4693 /* Drop the reference on new task_watchports struct returned by task_watchports_alloc_init */
4694 new_refs = task_watchports_release(new_watchports);
4695 if (new_refs == 0) {
4696 new_task->watchports = NULL;
4697 }
4698
4699 is_write_unlock(new_task->itk_space);
4700 is_write_unlock(old_task->itk_space);
4701
4702 /* Clear the task and thread references for old_watchport */
4703 if (old_refs == 0) {
4704 task_watchports_deallocate(watchports: old_watchports);
4705 }
4706
4707 /* Clear the task and thread references for new_watchport */
4708 if (new_refs == 0) {
4709 task_watchports_deallocate(watchports: new_watchports);
4710 }
4711}
4712
4713/*
4714 * task_add_turnstile_watchports_locked:
4715 * Setup watchports to boost the main thread of the task.
4716 *
4717 * Arguments:
4718 * task: task to boost
4719 * watchports: watchport structure to be attached to the task
4720 * previous_elem_array: an array of old watchport_elem to be returned to caller
4721 * portwatch_ports: array of watchports
4722 * portwatch_count: number of watchports
4723 *
4724 * Conditions:
4725 * ipc space of the task locked.
4726 * returns array of old watchport_elem in previous_elem_array
4727 */
4728static os_ref_count_t
4729task_add_turnstile_watchports_locked(
4730 task_t task,
4731 struct task_watchports *watchports,
4732 struct task_watchport_elem **previous_elem_array,
4733 ipc_port_t *portwatch_ports,
4734 uint32_t portwatch_count)
4735{
4736 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4737
4738 /* Check if the task is still active */
4739 if (!task->active) {
4740 refs = task_watchports_release(watchports);
4741 return refs;
4742 }
4743
4744 assert(task->watchports == NULL);
4745 task->watchports = watchports;
4746
4747 for (uint32_t i = 0, j = 0; i < portwatch_count; i++) {
4748 ipc_port_t port = portwatch_ports[i];
4749
4750 task_watchport_elem_init(&watchports->tw_elem[i], task, port);
4751 if (port == NULL) {
4752 task_watchport_elem_clear(&watchports->tw_elem[i]);
4753 continue;
4754 }
4755
4756 ip_mq_lock(port);
4757
4758 /* Check if port is in valid state to be setup as watchport */
4759 if (ipc_port_add_watchport_elem_locked(port, watchport_elem: &watchports->tw_elem[i],
4760 old_elem: &previous_elem_array[j]) != KERN_SUCCESS) {
4761 task_watchport_elem_clear(&watchports->tw_elem[i]);
4762 continue;
4763 }
4764 /* port unlocked on return */
4765
4766 ip_reference(port);
4767 task_watchports_retain(watchports);
4768 if (previous_elem_array[j] != NULL) {
4769 j++;
4770 }
4771 }
4772
4773 /* Drop the reference on task_watchport struct returned by os_ref_init */
4774 refs = task_watchports_release(watchports);
4775 if (refs == 0) {
4776 task->watchports = NULL;
4777 }
4778
4779 return refs;
4780}
4781
4782/*
4783 * task_remove_turnstile_watchports_locked:
4784 * Clear all turnstile boost on the task from watchports.
4785 *
4786 * Arguments:
4787 * task: task to remove watchports from
4788 * watchports: watchports structure for the task
4789 * port_freelist: array of ports returned with ref to caller
4790 *
4791 *
4792 * Conditions:
4793 * ipc space of the task locked.
4794 * array of ports with refs are returned in port_freelist
4795 */
4796static os_ref_count_t
4797task_remove_turnstile_watchports_locked(
4798 task_t task,
4799 struct task_watchports *watchports,
4800 ipc_port_t *port_freelist)
4801{
4802 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4803
4804 for (uint32_t i = 0, j = 0; i < watchports->tw_elem_array_count; i++) {
4805 ipc_port_t port = watchports->tw_elem[i].twe_port;
4806 if (port == NULL) {
4807 continue;
4808 }
4809
4810 /* Lock the port and check if it has the entry */
4811 ip_mq_lock(port);
4812 if (ipc_port_clear_watchport_elem_internal_conditional_locked(port,
4813 watchport_elem: &watchports->tw_elem[i]) == KERN_SUCCESS) {
4814 task_watchport_elem_clear(&watchports->tw_elem[i]);
4815 port_freelist[j++] = port;
4816 refs = task_watchports_release(watchports);
4817
4818 /* Check if all ports are cleaned */
4819 if (refs == 0) {
4820 task->watchports = NULL;
4821 break;
4822 }
4823 }
4824 /* mqueue and port unlocked by ipc_port_clear_watchport_elem_internal_conditional_locked */
4825 }
4826 return refs;
4827}
4828
4829/*
4830 * task_watchports_alloc_init:
4831 * Allocate and initialize task watchport struct.
4832 *
4833 * Conditions:
4834 * Nothing locked.
4835 */
4836static struct task_watchports *
4837task_watchports_alloc_init(
4838 task_t task,
4839 thread_t thread,
4840 uint32_t count)
4841{
4842 struct task_watchports *watchports = kalloc_type(struct task_watchports,
4843 struct task_watchport_elem, count, Z_WAITOK | Z_ZERO | Z_NOFAIL);
4844
4845 task_reference(task);
4846 thread_reference(thread);
4847 watchports->tw_task = task;
4848 watchports->tw_thread = thread;
4849 watchports->tw_elem_array_count = count;
4850 os_ref_init(&watchports->tw_refcount, &task_watchports_refgrp);
4851
4852 return watchports;
4853}
4854
4855/*
4856 * task_watchports_deallocate:
4857 * Deallocate task watchport struct.
4858 *
4859 * Conditions:
4860 * Nothing locked.
4861 */
4862static void
4863task_watchports_deallocate(
4864 struct task_watchports *watchports)
4865{
4866 uint32_t portwatch_count = watchports->tw_elem_array_count;
4867
4868 task_deallocate(watchports->tw_task);
4869 thread_deallocate(thread: watchports->tw_thread);
4870 kfree_type(struct task_watchports, struct task_watchport_elem,
4871 portwatch_count, watchports);
4872}
4873
4874/*
4875 * task_watchport_elem_deallocate:
4876 * Deallocate task watchport element and release its ref on task_watchport.
4877 *
4878 * Conditions:
4879 * Nothing locked.
4880 */
4881void
4882task_watchport_elem_deallocate(
4883 struct task_watchport_elem *watchport_elem)
4884{
4885 os_ref_count_t refs = TASK_MAX_WATCHPORT_COUNT;
4886 task_t task = watchport_elem->twe_task;
4887 struct task_watchports *watchports = NULL;
4888 ipc_port_t port = NULL;
4889
4890 assert(task != NULL);
4891
4892 /* Take the space lock to modify the elememt */
4893 is_write_lock(task->itk_space);
4894
4895 watchports = task->watchports;
4896 assert(watchports != NULL);
4897
4898 port = watchport_elem->twe_port;
4899 assert(port != NULL);
4900
4901 task_watchport_elem_clear(watchport_elem);
4902 refs = task_watchports_release(watchports);
4903
4904 if (refs == 0) {
4905 task->watchports = NULL;
4906 }
4907
4908 is_write_unlock(task->itk_space);
4909
4910 ip_release(port);
4911 if (refs == 0) {
4912 task_watchports_deallocate(watchports);
4913 }
4914}
4915
4916/*
4917 * task_has_watchports:
4918 * Return TRUE if task has watchport boosts.
4919 *
4920 * Conditions:
4921 * Nothing locked.
4922 */
4923boolean_t
4924task_has_watchports(task_t task)
4925{
4926 return task->watchports != NULL;
4927}
4928
4929#if DEVELOPMENT || DEBUG
4930
4931extern void IOSleep(int);
4932
4933kern_return_t
4934task_disconnect_page_mappings(task_t task)
4935{
4936 int n;
4937
4938 if (task == TASK_NULL || task == kernel_task) {
4939 return KERN_INVALID_ARGUMENT;
4940 }
4941
4942 /*
4943 * this function is used to strip all of the mappings from
4944 * the pmap for the specified task to force the task to
4945 * re-fault all of the pages it is actively using... this
4946 * allows us to approximate the true working set of the
4947 * specified task. We only engage if at least 1 of the
4948 * threads in the task is runnable, but we want to continuously
4949 * sweep (at least for a while - I've arbitrarily set the limit at
4950 * 100 sweeps to be re-looked at as we gain experience) to get a better
4951 * view into what areas within a page are being visited (as opposed to only
4952 * seeing the first fault of a page after the task becomes
4953 * runnable)... in the future I may
4954 * try to block until awakened by a thread in this task
4955 * being made runnable, but for now we'll periodically poll from the
4956 * user level debug tool driving the sysctl
4957 */
4958 for (n = 0; n < 100; n++) {
4959 thread_t thread;
4960 boolean_t runnable;
4961 boolean_t do_unnest;
4962 int page_count;
4963
4964 runnable = FALSE;
4965 do_unnest = FALSE;
4966
4967 task_lock(task);
4968
4969 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4970 if (thread->state & TH_RUN) {
4971 runnable = TRUE;
4972 break;
4973 }
4974 }
4975 if (n == 0) {
4976 task->task_disconnected_count++;
4977 }
4978
4979 if (task->task_unnested == FALSE) {
4980 if (runnable == TRUE) {
4981 task->task_unnested = TRUE;
4982 do_unnest = TRUE;
4983 }
4984 }
4985 task_unlock(task);
4986
4987 if (runnable == FALSE) {
4988 break;
4989 }
4990
4991 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
4992 task, do_unnest, task->task_disconnected_count, 0, 0);
4993
4994 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
4995
4996 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
4997 task, page_count, 0, 0, 0);
4998
4999 if ((n % 5) == 4) {
5000 IOSleep(1);
5001 }
5002 }
5003 return KERN_SUCCESS;
5004}
5005
5006#endif
5007
5008
5009#if CONFIG_FREEZE
5010
5011/*
5012 * task_freeze:
5013 *
5014 * Freeze a task.
5015 *
5016 * Conditions:
5017 * The caller holds a reference to the task
5018 */
5019extern void vm_wake_compactor_swapper(void);
5020extern struct freezer_context freezer_context_global;
5021
5022kern_return_t
5023task_freeze(
5024 task_t task,
5025 uint32_t *purgeable_count,
5026 uint32_t *wired_count,
5027 uint32_t *clean_count,
5028 uint32_t *dirty_count,
5029 uint32_t dirty_budget,
5030 uint32_t *shared_count,
5031 int *freezer_error_code,
5032 boolean_t eval_only)
5033{
5034 kern_return_t kr = KERN_SUCCESS;
5035
5036 if (task == TASK_NULL || task == kernel_task) {
5037 return KERN_INVALID_ARGUMENT;
5038 }
5039
5040 task_lock(task);
5041
5042 while (task->changing_freeze_state) {
5043 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
5044 task_unlock(task);
5045 thread_block(THREAD_CONTINUE_NULL);
5046
5047 task_lock(task);
5048 }
5049 if (task->frozen) {
5050 task_unlock(task);
5051 return KERN_FAILURE;
5052 }
5053 task->changing_freeze_state = TRUE;
5054
5055 freezer_context_global.freezer_ctx_task = task;
5056
5057 task_unlock(task);
5058
5059 kr = vm_map_freeze(task,
5060 purgeable_count,
5061 wired_count,
5062 clean_count,
5063 dirty_count,
5064 dirty_budget,
5065 shared_count,
5066 freezer_error_code,
5067 eval_only);
5068
5069 task_lock(task);
5070
5071 if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
5072 task->frozen = TRUE;
5073
5074 freezer_context_global.freezer_ctx_task = NULL;
5075 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
5076
5077 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
5078 /*
5079 * reset the counter tracking the # of swapped compressed pages
5080 * because we are now done with this freeze session and task.
5081 */
5082
5083 *dirty_count = (uint32_t) (freezer_context_global.freezer_ctx_swapped_bytes / PAGE_SIZE_64); /*used to track pageouts*/
5084 }
5085
5086 freezer_context_global.freezer_ctx_swapped_bytes = 0;
5087 }
5088
5089 task->changing_freeze_state = FALSE;
5090 thread_wakeup(&task->changing_freeze_state);
5091
5092 task_unlock(task);
5093
5094 if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
5095 (kr == KERN_SUCCESS) &&
5096 (eval_only == FALSE)) {
5097 vm_wake_compactor_swapper();
5098 /*
5099 * We do an explicit wakeup of the swapout thread here
5100 * because the compact_and_swap routines don't have
5101 * knowledge about these kind of "per-task packed c_segs"
5102 * and so will not be evaluating whether we need to do
5103 * a wakeup there.
5104 */
5105 thread_wakeup((event_t)&vm_swapout_thread);
5106 }
5107
5108 return kr;
5109}
5110
5111/*
5112 * task_thaw:
5113 *
5114 * Thaw a currently frozen task.
5115 *
5116 * Conditions:
5117 * The caller holds a reference to the task
5118 */
5119kern_return_t
5120task_thaw(
5121 task_t task)
5122{
5123 if (task == TASK_NULL || task == kernel_task) {
5124 return KERN_INVALID_ARGUMENT;
5125 }
5126
5127 task_lock(task);
5128
5129 while (task->changing_freeze_state) {
5130 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
5131 task_unlock(task);
5132 thread_block(THREAD_CONTINUE_NULL);
5133
5134 task_lock(task);
5135 }
5136 if (!task->frozen) {
5137 task_unlock(task);
5138 return KERN_FAILURE;
5139 }
5140 task->frozen = FALSE;
5141
5142 task_unlock(task);
5143
5144 return KERN_SUCCESS;
5145}
5146
5147void
5148task_update_frozen_to_swap_acct(task_t task, int64_t amount, freezer_acct_op_t op)
5149{
5150 /*
5151 * We don't assert that the task lock is held because we call this
5152 * routine from the decompression path and we won't be holding the
5153 * task lock. However, since we are in the context of the task we are
5154 * safe.
5155 * In the case of the task_freeze path, we call it from behind the task
5156 * lock but we don't need to because we have a reference on the proc
5157 * being frozen.
5158 */
5159
5160 assert(task);
5161 if (amount == 0) {
5162 return;
5163 }
5164
5165 if (op == CREDIT_TO_SWAP) {
5166 ledger_credit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
5167 } else if (op == DEBIT_FROM_SWAP) {
5168 ledger_debit_nocheck(task->ledger, task_ledgers.frozen_to_swap, amount);
5169 } else {
5170 panic("task_update_frozen_to_swap_acct: Invalid ledger op");
5171 }
5172}
5173#endif /* CONFIG_FREEZE */
5174
5175kern_return_t
5176task_set_security_tokens(
5177 task_t task,
5178 security_token_t sec_token,
5179 audit_token_t audit_token,
5180 host_priv_t host_priv)
5181{
5182 ipc_port_t host_port = IP_NULL;
5183 kern_return_t kr;
5184
5185 if (task == TASK_NULL) {
5186 return KERN_INVALID_ARGUMENT;
5187 }
5188
5189 task_lock(task);
5190 task_set_tokens(task, sec_token: &sec_token, audit_token: &audit_token);
5191 task_unlock(task);
5192
5193 if (host_priv != HOST_PRIV_NULL) {
5194 kr = host_get_host_priv_port(host_priv, &host_port);
5195 } else {
5196 kr = host_get_host_port(host_priv_self(), &host_port);
5197 }
5198 assert(kr == KERN_SUCCESS);
5199
5200 kr = task_set_special_port_internal(task, TASK_HOST_PORT, port: host_port);
5201 return kr;
5202}
5203
5204kern_return_t
5205task_send_trace_memory(
5206 __unused task_t target_task,
5207 __unused uint32_t pid,
5208 __unused uint64_t uniqueid)
5209{
5210 return KERN_INVALID_ARGUMENT;
5211}
5212
5213/*
5214 * This routine was added, pretty much exclusively, for registering the
5215 * RPC glue vector for in-kernel short circuited tasks. Rather than
5216 * removing it completely, I have only disabled that feature (which was
5217 * the only feature at the time). It just appears that we are going to
5218 * want to add some user data to tasks in the future (i.e. bsd info,
5219 * task names, etc...), so I left it in the formal task interface.
5220 */
5221kern_return_t
5222task_set_info(
5223 task_t task,
5224 task_flavor_t flavor,
5225 __unused task_info_t task_info_in, /* pointer to IN array */
5226 __unused mach_msg_type_number_t task_info_count)
5227{
5228 if (task == TASK_NULL) {
5229 return KERN_INVALID_ARGUMENT;
5230 }
5231 switch (flavor) {
5232#if CONFIG_ATM
5233 case TASK_TRACE_MEMORY_INFO:
5234 return KERN_NOT_SUPPORTED;
5235#endif // CONFIG_ATM
5236 default:
5237 return KERN_INVALID_ARGUMENT;
5238 }
5239}
5240
5241static void
5242_task_fill_times(task_t task, time_value_t *user_time, time_value_t *sys_time)
5243{
5244 clock_sec_t sec;
5245 clock_usec_t usec;
5246
5247 struct recount_times_mach times = recount_task_terminated_times(task);
5248 absolutetime_to_microtime(abstime: times.rtm_user, secs: &sec, microsecs: &usec);
5249 user_time->seconds = (typeof(user_time->seconds))sec;
5250 user_time->microseconds = usec;
5251 absolutetime_to_microtime(abstime: times.rtm_system, secs: &sec, microsecs: &usec);
5252 sys_time->seconds = (typeof(sys_time->seconds))sec;
5253 sys_time->microseconds = usec;
5254}
5255
5256int radar_20146450 = 1;
5257kern_return_t
5258task_info(
5259 task_t task,
5260 task_flavor_t flavor,
5261 task_info_t task_info_out,
5262 mach_msg_type_number_t *task_info_count)
5263{
5264 kern_return_t error = KERN_SUCCESS;
5265 mach_msg_type_number_t original_task_info_count;
5266 bool is_kernel_task = (task == kernel_task);
5267
5268 if (task == TASK_NULL) {
5269 return KERN_INVALID_ARGUMENT;
5270 }
5271
5272 original_task_info_count = *task_info_count;
5273 task_lock(task);
5274
5275 if (task != current_task() && !task->active) {
5276 task_unlock(task);
5277 return KERN_INVALID_ARGUMENT;
5278 }
5279
5280
5281 switch (flavor) {
5282 case TASK_BASIC_INFO_32:
5283 case TASK_BASIC2_INFO_32:
5284#if defined(__arm64__)
5285 case TASK_BASIC_INFO_64:
5286#endif
5287 {
5288 task_basic_info_32_t basic_info;
5289 ledger_amount_t tmp;
5290
5291 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
5292 error = KERN_INVALID_ARGUMENT;
5293 break;
5294 }
5295
5296 basic_info = (task_basic_info_32_t)task_info_out;
5297
5298 basic_info->virtual_size = (typeof(basic_info->virtual_size))
5299 vm_map_adjusted_size(map: is_kernel_task ? kernel_map : task->map);
5300 if (flavor == TASK_BASIC2_INFO_32) {
5301 /*
5302 * The "BASIC2" flavor gets the maximum resident
5303 * size instead of the current resident size...
5304 */
5305 ledger_get_lifetime_max(ledger: task->ledger, entry: task_ledgers.phys_mem, max_lifetime_balance: &tmp);
5306 } else {
5307 ledger_get_balance(ledger: task->ledger, entry: task_ledgers.phys_mem, balance: &tmp);
5308 }
5309 basic_info->resident_size = (natural_t) MIN((ledger_amount_t) UINT32_MAX, tmp);
5310
5311 _task_fill_times(task, user_time: &basic_info->user_time,
5312 sys_time: &basic_info->system_time);
5313
5314 basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5315 basic_info->suspend_count = task->user_stop_count;
5316
5317 *task_info_count = TASK_BASIC_INFO_32_COUNT;
5318 break;
5319 }
5320
5321#if defined(__arm64__)
5322 case TASK_BASIC_INFO_64_2:
5323 {
5324 task_basic_info_64_2_t basic_info;
5325
5326 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
5327 error = KERN_INVALID_ARGUMENT;
5328 break;
5329 }
5330
5331 basic_info = (task_basic_info_64_2_t)task_info_out;
5332
5333 basic_info->virtual_size = vm_map_adjusted_size(map: is_kernel_task ?
5334 kernel_map : task->map);
5335 ledger_get_balance(ledger: task->ledger, entry: task_ledgers.phys_mem,
5336 balance: (ledger_amount_t *)&basic_info->resident_size);
5337 basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5338 basic_info->suspend_count = task->user_stop_count;
5339 _task_fill_times(task, user_time: &basic_info->user_time,
5340 sys_time: &basic_info->system_time);
5341
5342 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
5343 break;
5344 }
5345
5346#else /* defined(__arm64__) */
5347 case TASK_BASIC_INFO_64:
5348 {
5349 task_basic_info_64_t basic_info;
5350
5351 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
5352 error = KERN_INVALID_ARGUMENT;
5353 break;
5354 }
5355
5356 basic_info = (task_basic_info_64_t)task_info_out;
5357
5358 basic_info->virtual_size = vm_map_adjusted_size(is_kernel_task ?
5359 kernel_map : task->map);
5360 ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *)&basic_info->resident_size);
5361 basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5362 basic_info->suspend_count = task->user_stop_count;
5363 _task_fill_times(task, &basic_info->user_time,
5364 &basic_info->system_time);
5365
5366 *task_info_count = TASK_BASIC_INFO_64_COUNT;
5367 break;
5368 }
5369#endif /* defined(__arm64__) */
5370
5371 case MACH_TASK_BASIC_INFO:
5372 {
5373 mach_task_basic_info_t basic_info;
5374
5375 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
5376 error = KERN_INVALID_ARGUMENT;
5377 break;
5378 }
5379
5380 basic_info = (mach_task_basic_info_t)task_info_out;
5381
5382 basic_info->virtual_size = vm_map_adjusted_size(map: is_kernel_task ?
5383 kernel_map : task->map);
5384 ledger_get_balance(ledger: task->ledger, entry: task_ledgers.phys_mem, balance: (ledger_amount_t *) &basic_info->resident_size);
5385 ledger_get_lifetime_max(ledger: task->ledger, entry: task_ledgers.phys_mem, max_lifetime_balance: (ledger_amount_t *) &basic_info->resident_size_max);
5386 basic_info->policy = is_kernel_task ? POLICY_RR : POLICY_TIMESHARE;
5387 basic_info->suspend_count = task->user_stop_count;
5388 _task_fill_times(task, user_time: &basic_info->user_time,
5389 sys_time: &basic_info->system_time);
5390
5391 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
5392 break;
5393 }
5394
5395 case TASK_THREAD_TIMES_INFO:
5396 {
5397 task_thread_times_info_t times_info;
5398 thread_t thread;
5399
5400 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
5401 error = KERN_INVALID_ARGUMENT;
5402 break;
5403 }
5404
5405 times_info = (task_thread_times_info_t)task_info_out;
5406 times_info->user_time = (time_value_t){ 0 };
5407 times_info->system_time = (time_value_t){ 0 };
5408
5409 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5410 if ((thread->options & TH_OPT_IDLE_THREAD) == 0) {
5411 time_value_t user_time, system_time;
5412
5413 thread_read_times(thread, &user_time, &system_time, NULL);
5414 time_value_add(&times_info->user_time, &user_time);
5415 time_value_add(&times_info->system_time, &system_time);
5416 }
5417 }
5418
5419 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
5420 break;
5421 }
5422
5423 case TASK_ABSOLUTETIME_INFO:
5424 {
5425 task_absolutetime_info_t info;
5426
5427 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
5428 error = KERN_INVALID_ARGUMENT;
5429 break;
5430 }
5431
5432 info = (task_absolutetime_info_t)task_info_out;
5433
5434 struct recount_times_mach term_times =
5435 recount_task_terminated_times(task);
5436 struct recount_times_mach total_times = recount_task_times(task);
5437
5438 info->total_user = total_times.rtm_user;
5439 info->total_system = total_times.rtm_system;
5440 info->threads_user = total_times.rtm_user - term_times.rtm_user;
5441 info->threads_system += total_times.rtm_system - term_times.rtm_system;
5442
5443 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
5444 break;
5445 }
5446
5447 case TASK_DYLD_INFO:
5448 {
5449 task_dyld_info_t info;
5450
5451 /*
5452 * We added the format field to TASK_DYLD_INFO output. For
5453 * temporary backward compatibility, accept the fact that
5454 * clients may ask for the old version - distinquished by the
5455 * size of the expected result structure.
5456 */
5457#define TASK_LEGACY_DYLD_INFO_COUNT \
5458 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
5459
5460 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
5461 error = KERN_INVALID_ARGUMENT;
5462 break;
5463 }
5464
5465 info = (task_dyld_info_t)task_info_out;
5466 info->all_image_info_addr = task->all_image_info_addr;
5467 info->all_image_info_size = task->all_image_info_size;
5468
5469 /* only set format on output for those expecting it */
5470 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
5471 info->all_image_info_format = task_has_64Bit_addr(task) ?
5472 TASK_DYLD_ALL_IMAGE_INFO_64 :
5473 TASK_DYLD_ALL_IMAGE_INFO_32;
5474 *task_info_count = TASK_DYLD_INFO_COUNT;
5475 } else {
5476 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
5477 }
5478 break;
5479 }
5480
5481 case TASK_EXTMOD_INFO:
5482 {
5483 task_extmod_info_t info;
5484 void *p;
5485
5486 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
5487 error = KERN_INVALID_ARGUMENT;
5488 break;
5489 }
5490
5491 info = (task_extmod_info_t)task_info_out;
5492
5493 p = get_bsdtask_info(task);
5494 if (p) {
5495 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
5496 } else {
5497 bzero(info->task_uuid, sizeof(info->task_uuid));
5498 }
5499 info->extmod_statistics = task->extmod_statistics;
5500 *task_info_count = TASK_EXTMOD_INFO_COUNT;
5501
5502 break;
5503 }
5504
5505 case TASK_KERNELMEMORY_INFO:
5506 {
5507 task_kernelmemory_info_t tkm_info;
5508 ledger_amount_t credit, debit;
5509
5510 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
5511 error = KERN_INVALID_ARGUMENT;
5512 break;
5513 }
5514
5515 tkm_info = (task_kernelmemory_info_t) task_info_out;
5516 tkm_info->total_palloc = 0;
5517 tkm_info->total_pfree = 0;
5518 tkm_info->total_salloc = 0;
5519 tkm_info->total_sfree = 0;
5520
5521 if (task == kernel_task) {
5522 /*
5523 * All shared allocs/frees from other tasks count against
5524 * the kernel private memory usage. If we are looking up
5525 * info for the kernel task, gather from everywhere.
5526 */
5527 task_unlock(task);
5528
5529 /* start by accounting for all the terminated tasks against the kernel */
5530 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
5531 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
5532
5533 /* count all other task/thread shared alloc/free against the kernel */
5534 lck_mtx_lock(&tasks_threads_lock);
5535
5536 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
5537 queue_iterate(&tasks, task, task_t, tasks) {
5538 if (task == kernel_task) {
5539 if (ledger_get_entries(task->ledger,
5540 task_ledgers.tkm_private, &credit,
5541 &debit) == KERN_SUCCESS) {
5542 tkm_info->total_palloc += credit;
5543 tkm_info->total_pfree += debit;
5544 }
5545 }
5546 if (!ledger_get_entries(task->ledger,
5547 task_ledgers.tkm_shared, &credit, &debit)) {
5548 tkm_info->total_palloc += credit;
5549 tkm_info->total_pfree += debit;
5550 }
5551 }
5552 lck_mtx_unlock(&tasks_threads_lock);
5553 } else {
5554 if (!ledger_get_entries(task->ledger,
5555 task_ledgers.tkm_private, &credit, &debit)) {
5556 tkm_info->total_palloc = credit;
5557 tkm_info->total_pfree = debit;
5558 }
5559 if (!ledger_get_entries(task->ledger,
5560 task_ledgers.tkm_shared, &credit, &debit)) {
5561 tkm_info->total_salloc = credit;
5562 tkm_info->total_sfree = debit;
5563 }
5564 task_unlock(task);
5565 }
5566
5567 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
5568 return KERN_SUCCESS;
5569 }
5570
5571 /* OBSOLETE */
5572 case TASK_SCHED_FIFO_INFO:
5573 {
5574 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
5575 error = KERN_INVALID_ARGUMENT;
5576 break;
5577 }
5578
5579 error = KERN_INVALID_POLICY;
5580 break;
5581 }
5582
5583 /* OBSOLETE */
5584 case TASK_SCHED_RR_INFO:
5585 {
5586 policy_rr_base_t rr_base;
5587 uint32_t quantum_time;
5588 uint64_t quantum_ns;
5589
5590 if (*task_info_count < POLICY_RR_BASE_COUNT) {
5591 error = KERN_INVALID_ARGUMENT;
5592 break;
5593 }
5594
5595 rr_base = (policy_rr_base_t) task_info_out;
5596
5597 if (task != kernel_task) {
5598 error = KERN_INVALID_POLICY;
5599 break;
5600 }
5601
5602 rr_base->base_priority = task->priority;
5603
5604 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
5605 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
5606
5607 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
5608
5609 *task_info_count = POLICY_RR_BASE_COUNT;
5610 break;
5611 }
5612
5613 /* OBSOLETE */
5614 case TASK_SCHED_TIMESHARE_INFO:
5615 {
5616 policy_timeshare_base_t ts_base;
5617
5618 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
5619 error = KERN_INVALID_ARGUMENT;
5620 break;
5621 }
5622
5623 ts_base = (policy_timeshare_base_t) task_info_out;
5624
5625 if (task == kernel_task) {
5626 error = KERN_INVALID_POLICY;
5627 break;
5628 }
5629
5630 ts_base->base_priority = task->priority;
5631
5632 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
5633 break;
5634 }
5635
5636 case TASK_SECURITY_TOKEN:
5637 {
5638 security_token_t *sec_token_p;
5639
5640 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
5641 error = KERN_INVALID_ARGUMENT;
5642 break;
5643 }
5644
5645 sec_token_p = (security_token_t *) task_info_out;
5646
5647 *sec_token_p = *task_get_sec_token(task);
5648
5649 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
5650 break;
5651 }
5652
5653 case TASK_AUDIT_TOKEN:
5654 {
5655 audit_token_t *audit_token_p;
5656
5657 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
5658 error = KERN_INVALID_ARGUMENT;
5659 break;
5660 }
5661
5662 audit_token_p = (audit_token_t *) task_info_out;
5663
5664 *audit_token_p = *task_get_audit_token(task);
5665
5666 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
5667 break;
5668 }
5669
5670 case TASK_SCHED_INFO:
5671 error = KERN_INVALID_ARGUMENT;
5672 break;
5673
5674 case TASK_EVENTS_INFO:
5675 {
5676 task_events_info_t events_info;
5677 thread_t thread;
5678 uint64_t n_syscalls_mach, n_syscalls_unix, n_csw;
5679
5680 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
5681 error = KERN_INVALID_ARGUMENT;
5682 break;
5683 }
5684
5685 events_info = (task_events_info_t) task_info_out;
5686
5687
5688 events_info->faults = (int32_t) MIN(counter_load(&task->faults), INT32_MAX);
5689 events_info->pageins = (int32_t) MIN(counter_load(&task->pageins), INT32_MAX);
5690 events_info->cow_faults = (int32_t) MIN(counter_load(&task->cow_faults), INT32_MAX);
5691 events_info->messages_sent = (int32_t) MIN(counter_load(&task->messages_sent), INT32_MAX);
5692 events_info->messages_received = (int32_t) MIN(counter_load(&task->messages_received), INT32_MAX);
5693
5694 n_syscalls_mach = task->syscalls_mach;
5695 n_syscalls_unix = task->syscalls_unix;
5696 n_csw = task->c_switch;
5697
5698 queue_iterate(&task->threads, thread, thread_t, task_threads) {
5699 n_csw += thread->c_switch;
5700 n_syscalls_mach += thread->syscalls_mach;
5701 n_syscalls_unix += thread->syscalls_unix;
5702 }
5703
5704 events_info->syscalls_mach = (int32_t) MIN(n_syscalls_mach, INT32_MAX);
5705 events_info->syscalls_unix = (int32_t) MIN(n_syscalls_unix, INT32_MAX);
5706 events_info->csw = (int32_t) MIN(n_csw, INT32_MAX);
5707
5708 *task_info_count = TASK_EVENTS_INFO_COUNT;
5709 break;
5710 }
5711 case TASK_AFFINITY_TAG_INFO:
5712 {
5713 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
5714 error = KERN_INVALID_ARGUMENT;
5715 break;
5716 }
5717
5718 error = task_affinity_info(task, task_info_out, task_info_count);
5719 break;
5720 }
5721 case TASK_POWER_INFO:
5722 {
5723 if (*task_info_count < TASK_POWER_INFO_COUNT) {
5724 error = KERN_INVALID_ARGUMENT;
5725 break;
5726 }
5727
5728 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL, NULL);
5729 break;
5730 }
5731
5732 case TASK_POWER_INFO_V2:
5733 {
5734 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
5735 error = KERN_INVALID_ARGUMENT;
5736 break;
5737 }
5738 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
5739 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2, NULL);
5740 break;
5741 }
5742
5743 case TASK_VM_INFO:
5744 case TASK_VM_INFO_PURGEABLE:
5745 {
5746 task_vm_info_t vm_info;
5747 vm_map_t map;
5748 ledger_amount_t tmp_amount;
5749
5750 struct proc *p;
5751 uint32_t platform, sdk;
5752 p = current_proc();
5753 platform = proc_platform(p);
5754 sdk = proc_sdk(p);
5755 if (original_task_info_count > TASK_VM_INFO_COUNT) {
5756 /*
5757 * Some iOS apps pass an incorrect value for
5758 * task_info_count, expressed in number of bytes
5759 * instead of number of "natural_t" elements, which
5760 * can lead to binary compatibility issues (including
5761 * stack corruption) when the data structure is
5762 * expanded in the future.
5763 * Let's make this potential issue visible by
5764 * logging about it...
5765 */
5766 printf("%s:%d %d[%s] task_info(flavor=%d) possibly invalid "
5767 "task_info_count=%d > TASK_VM_INFO_COUNT=%d platform %d sdk "
5768 "%d.%d.%d - please use TASK_VM_INFO_COUNT.\n",
5769 __FUNCTION__, __LINE__, proc_pid(p), proc_name_address(p),
5770 flavor, original_task_info_count, TASK_VM_INFO_COUNT,
5771 platform, (sdk >> 16), ((sdk >> 8) & 0xff), (sdk & 0xff));
5772 DTRACE_VM4(suspicious_task_vm_info_count,
5773 mach_msg_type_number_t, original_task_info_count,
5774 mach_msg_type_number_t, TASK_VM_INFO_COUNT,
5775 uint32_t, platform,
5776 uint32_t, sdk);
5777 }
5778#if __arm64__
5779 if (original_task_info_count > TASK_VM_INFO_REV2_COUNT &&
5780 platform == PLATFORM_IOS &&
5781 sdk != 0 &&
5782 (sdk >> 16) <= 12) {
5783 /*
5784 * Some iOS apps pass an incorrect value for
5785 * task_info_count, expressed in number of bytes
5786 * instead of number of "natural_t" elements.
5787 * For the sake of backwards binary compatibility
5788 * for apps built with an iOS12 or older SDK and using
5789 * the "rev2" data structure, let's fix task_info_count
5790 * for them, to avoid stomping past the actual end
5791 * of their buffer.
5792 */
5793#if DEVELOPMENT || DEBUG
5794 printf("%s:%d %d[%s] rdar://49484582 task_info_count %d -> %d "
5795 "platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p),
5796 proc_name_address(p), original_task_info_count,
5797 TASK_VM_INFO_REV2_COUNT, platform, (sdk >> 16),
5798 ((sdk >> 8) & 0xff), (sdk & 0xff));
5799#endif /* DEVELOPMENT || DEBUG */
5800 DTRACE_VM4(workaround_task_vm_info_count,
5801 mach_msg_type_number_t, original_task_info_count,
5802 mach_msg_type_number_t, TASK_VM_INFO_REV2_COUNT,
5803 uint32_t, platform,
5804 uint32_t, sdk);
5805 original_task_info_count = TASK_VM_INFO_REV2_COUNT;
5806 *task_info_count = original_task_info_count;
5807 }
5808 if (original_task_info_count > TASK_VM_INFO_REV5_COUNT &&
5809 platform == PLATFORM_IOS &&
5810 sdk != 0 &&
5811 (sdk >> 16) <= 15) {
5812 /*
5813 * Some iOS apps pass an incorrect value for
5814 * task_info_count, expressed in number of bytes
5815 * instead of number of "natural_t" elements.
5816 */
5817 printf("%s:%d %d[%s] task_info_count=%d > TASK_VM_INFO_COUNT=%d "
5818 "platform %d sdk %d.%d.%d\n", __FUNCTION__, __LINE__, proc_pid(p),
5819 proc_name_address(p), original_task_info_count,
5820 TASK_VM_INFO_REV5_COUNT, platform, (sdk >> 16),
5821 ((sdk >> 8) & 0xff), (sdk & 0xff));
5822 DTRACE_VM4(workaround_task_vm_info_count,
5823 mach_msg_type_number_t, original_task_info_count,
5824 mach_msg_type_number_t, TASK_VM_INFO_REV5_COUNT,
5825 uint32_t, platform,
5826 uint32_t, sdk);
5827#if DEVELOPMENT || DEBUG
5828 /*
5829 * For the sake of internal builds livability,
5830 * work around this user-space bug by capping the
5831 * buffer's size to what it was with the iOS15 SDK.
5832 */
5833 original_task_info_count = TASK_VM_INFO_REV5_COUNT;
5834 *task_info_count = original_task_info_count;
5835#endif /* DEVELOPMENT || DEBUG */
5836 }
5837#endif /* __arm64__ */
5838
5839 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
5840 error = KERN_INVALID_ARGUMENT;
5841 break;
5842 }
5843
5844 vm_info = (task_vm_info_t)task_info_out;
5845
5846 /*
5847 * Do not hold both the task and map locks,
5848 * so convert the task lock into a map reference,
5849 * drop the task lock, then lock the map.
5850 */
5851 if (is_kernel_task) {
5852 map = kernel_map;
5853 task_unlock(task);
5854 /* no lock, no reference */
5855 } else {
5856 map = task->map;
5857 vm_map_reference(map);
5858 task_unlock(task);
5859 vm_map_lock_read(map);
5860 }
5861
5862 vm_info->virtual_size = (typeof(vm_info->virtual_size))vm_map_adjusted_size(map);
5863 vm_info->region_count = map->hdr.nentries;
5864 vm_info->page_size = vm_map_page_size(map);
5865
5866 ledger_get_balance(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size);
5867 ledger_get_lifetime_max(task->ledger, task_ledgers.phys_mem, (ledger_amount_t *) &vm_info->resident_size_peak);
5868
5869 vm_info->device = 0;
5870 vm_info->device_peak = 0;
5871 ledger_get_balance(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external);
5872 ledger_get_lifetime_max(task->ledger, task_ledgers.external, (ledger_amount_t *) &vm_info->external_peak);
5873 ledger_get_balance(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal);
5874 ledger_get_lifetime_max(task->ledger, task_ledgers.internal, (ledger_amount_t *) &vm_info->internal_peak);
5875 ledger_get_balance(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable);
5876 ledger_get_lifetime_max(task->ledger, task_ledgers.reusable, (ledger_amount_t *) &vm_info->reusable_peak);
5877 ledger_get_balance(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed);
5878 ledger_get_lifetime_max(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_peak);
5879 ledger_get_entries(task->ledger, task_ledgers.internal_compressed, (ledger_amount_t*) &vm_info->compressed_lifetime, &tmp_amount);
5880
5881 vm_info->purgeable_volatile_pmap = 0;
5882 vm_info->purgeable_volatile_resident = 0;
5883 vm_info->purgeable_volatile_virtual = 0;
5884 if (is_kernel_task) {
5885 /*
5886 * We do not maintain the detailed stats for the
5887 * kernel_pmap, so just count everything as
5888 * "internal"...
5889 */
5890 vm_info->internal = vm_info->resident_size;
5891 /*
5892 * ... but since the memory held by the VM compressor
5893 * in the kernel address space ought to be attributed
5894 * to user-space tasks, we subtract it from "internal"
5895 * to give memory reporting tools a more accurate idea
5896 * of what the kernel itself is actually using, instead
5897 * of making it look like the kernel is leaking memory
5898 * when the system is under memory pressure.
5899 */
5900 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
5901 PAGE_SIZE);
5902 } else {
5903 mach_vm_size_t volatile_virtual_size;
5904 mach_vm_size_t volatile_resident_size;
5905 mach_vm_size_t volatile_compressed_size;
5906 mach_vm_size_t volatile_pmap_size;
5907 mach_vm_size_t volatile_compressed_pmap_size;
5908 kern_return_t kr;
5909
5910 if (flavor == TASK_VM_INFO_PURGEABLE) {
5911 kr = vm_map_query_volatile(
5912 map,
5913 &volatile_virtual_size,
5914 &volatile_resident_size,
5915 &volatile_compressed_size,
5916 &volatile_pmap_size,
5917 &volatile_compressed_pmap_size);
5918 if (kr == KERN_SUCCESS) {
5919 vm_info->purgeable_volatile_pmap =
5920 volatile_pmap_size;
5921 if (radar_20146450) {
5922 vm_info->compressed -=
5923 volatile_compressed_pmap_size;
5924 }
5925 vm_info->purgeable_volatile_resident =
5926 volatile_resident_size;
5927 vm_info->purgeable_volatile_virtual =
5928 volatile_virtual_size;
5929 }
5930 }
5931 }
5932 *task_info_count = TASK_VM_INFO_REV0_COUNT;
5933
5934 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5935 /* must be captured while we still have the map lock */
5936 vm_info->min_address = map->min_offset;
5937 vm_info->max_address = map->max_offset;
5938 }
5939
5940 /*
5941 * Done with vm map things, can drop the map lock and reference,
5942 * and take the task lock back.
5943 *
5944 * Re-validate that the task didn't die on us.
5945 */
5946 if (!is_kernel_task) {
5947 vm_map_unlock_read(map);
5948 vm_map_deallocate(map);
5949 }
5950 map = VM_MAP_NULL;
5951
5952 task_lock(task);
5953
5954 if ((task != current_task()) && (!task->active)) {
5955 error = KERN_INVALID_ARGUMENT;
5956 break;
5957 }
5958
5959 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
5960 vm_info->phys_footprint =
5961 (mach_vm_size_t) get_task_phys_footprint(task);
5962 *task_info_count = TASK_VM_INFO_REV1_COUNT;
5963 }
5964 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
5965 /* data was captured above */
5966 *task_info_count = TASK_VM_INFO_REV2_COUNT;
5967 }
5968
5969 if (original_task_info_count >= TASK_VM_INFO_REV3_COUNT) {
5970 ledger_get_lifetime_max(task->ledger,
5971 task_ledgers.phys_footprint,
5972 &vm_info->ledger_phys_footprint_peak);
5973 ledger_get_balance(task->ledger,
5974 task_ledgers.purgeable_nonvolatile,
5975 &vm_info->ledger_purgeable_nonvolatile);
5976 ledger_get_balance(task->ledger,
5977 task_ledgers.purgeable_nonvolatile_compressed,
5978 &vm_info->ledger_purgeable_novolatile_compressed);
5979 ledger_get_balance(task->ledger,
5980 task_ledgers.purgeable_volatile,
5981 &vm_info->ledger_purgeable_volatile);
5982 ledger_get_balance(task->ledger,
5983 task_ledgers.purgeable_volatile_compressed,
5984 &vm_info->ledger_purgeable_volatile_compressed);
5985 ledger_get_balance(task->ledger,
5986 task_ledgers.network_nonvolatile,
5987 &vm_info->ledger_tag_network_nonvolatile);
5988 ledger_get_balance(task->ledger,
5989 task_ledgers.network_nonvolatile_compressed,
5990 &vm_info->ledger_tag_network_nonvolatile_compressed);
5991 ledger_get_balance(task->ledger,
5992 task_ledgers.network_volatile,
5993 &vm_info->ledger_tag_network_volatile);
5994 ledger_get_balance(task->ledger,
5995 task_ledgers.network_volatile_compressed,
5996 &vm_info->ledger_tag_network_volatile_compressed);
5997 ledger_get_balance(task->ledger,
5998 task_ledgers.media_footprint,
5999 &vm_info->ledger_tag_media_footprint);
6000 ledger_get_balance(task->ledger,
6001 task_ledgers.media_footprint_compressed,
6002 &vm_info->ledger_tag_media_footprint_compressed);
6003 ledger_get_balance(task->ledger,
6004 task_ledgers.media_nofootprint,
6005 &vm_info->ledger_tag_media_nofootprint);
6006 ledger_get_balance(task->ledger,
6007 task_ledgers.media_nofootprint_compressed,
6008 &vm_info->ledger_tag_media_nofootprint_compressed);
6009 ledger_get_balance(task->ledger,
6010 task_ledgers.graphics_footprint,
6011 &vm_info->ledger_tag_graphics_footprint);
6012 ledger_get_balance(task->ledger,
6013 task_ledgers.graphics_footprint_compressed,
6014 &vm_info->ledger_tag_graphics_footprint_compressed);
6015 ledger_get_balance(task->ledger,
6016 task_ledgers.graphics_nofootprint,
6017 &vm_info->ledger_tag_graphics_nofootprint);
6018 ledger_get_balance(task->ledger,
6019 task_ledgers.graphics_nofootprint_compressed,
6020 &vm_info->ledger_tag_graphics_nofootprint_compressed);
6021 ledger_get_balance(task->ledger,
6022 task_ledgers.neural_footprint,
6023 &vm_info->ledger_tag_neural_footprint);
6024 ledger_get_balance(task->ledger,
6025 task_ledgers.neural_footprint_compressed,
6026 &vm_info->ledger_tag_neural_footprint_compressed);
6027 ledger_get_balance(task->ledger,
6028 task_ledgers.neural_nofootprint,
6029 &vm_info->ledger_tag_neural_nofootprint);
6030 ledger_get_balance(task->ledger,
6031 task_ledgers.neural_nofootprint_compressed,
6032 &vm_info->ledger_tag_neural_nofootprint_compressed);
6033 *task_info_count = TASK_VM_INFO_REV3_COUNT;
6034 }
6035 if (original_task_info_count >= TASK_VM_INFO_REV4_COUNT) {
6036 if (get_bsdtask_info(task)) {
6037 vm_info->limit_bytes_remaining =
6038 memorystatus_available_memory_internal(get_bsdtask_info(task));
6039 } else {
6040 vm_info->limit_bytes_remaining = 0;
6041 }
6042 *task_info_count = TASK_VM_INFO_REV4_COUNT;
6043 }
6044 if (original_task_info_count >= TASK_VM_INFO_REV5_COUNT) {
6045 thread_t thread;
6046 uint64_t total = task->decompressions;
6047 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6048 total += thread->decompressions;
6049 }
6050 vm_info->decompressions = (int32_t) MIN(total, INT32_MAX);
6051 *task_info_count = TASK_VM_INFO_REV5_COUNT;
6052 }
6053 if (original_task_info_count >= TASK_VM_INFO_REV6_COUNT) {
6054 ledger_get_balance(task->ledger, task_ledgers.swapins,
6055 &vm_info->ledger_swapins);
6056 *task_info_count = TASK_VM_INFO_REV6_COUNT;
6057 }
6058
6059 break;
6060 }
6061
6062 case TASK_WAIT_STATE_INFO:
6063 {
6064 /*
6065 * Deprecated flavor. Currently allowing some results until all users
6066 * stop calling it. The results may not be accurate.
6067 */
6068 task_wait_state_info_t wait_state_info;
6069 uint64_t total_sfi_ledger_val = 0;
6070
6071 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
6072 error = KERN_INVALID_ARGUMENT;
6073 break;
6074 }
6075
6076 wait_state_info = (task_wait_state_info_t) task_info_out;
6077
6078 wait_state_info->total_wait_state_time = 0;
6079 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
6080
6081#if CONFIG_SCHED_SFI
6082 int i, prev_lentry = -1;
6083 int64_t val_credit, val_debit;
6084
6085 for (i = 0; i < MAX_SFI_CLASS_ID; i++) {
6086 val_credit = 0;
6087 /*
6088 * checking with prev_lentry != entry ensures adjacent classes
6089 * which share the same ledger do not add wait times twice.
6090 * Note: Use ledger() call to get data for each individual sfi class.
6091 */
6092 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
6093 KERN_SUCCESS == ledger_get_entries(task->ledger,
6094 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
6095 total_sfi_ledger_val += val_credit;
6096 }
6097 prev_lentry = task_ledgers.sfi_wait_times[i];
6098 }
6099
6100#endif /* CONFIG_SCHED_SFI */
6101 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
6102 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
6103
6104 break;
6105 }
6106 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
6107 {
6108#if DEVELOPMENT || DEBUG
6109 pvm_account_info_t acnt_info;
6110
6111 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
6112 error = KERN_INVALID_ARGUMENT;
6113 break;
6114 }
6115
6116 if (task_info_out == NULL) {
6117 error = KERN_INVALID_ARGUMENT;
6118 break;
6119 }
6120
6121 acnt_info = (pvm_account_info_t) task_info_out;
6122
6123 error = vm_purgeable_account(task, acnt_info);
6124
6125 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
6126
6127 break;
6128#else /* DEVELOPMENT || DEBUG */
6129 error = KERN_NOT_SUPPORTED;
6130 break;
6131#endif /* DEVELOPMENT || DEBUG */
6132 }
6133 case TASK_FLAGS_INFO:
6134 {
6135 task_flags_info_t flags_info;
6136
6137 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
6138 error = KERN_INVALID_ARGUMENT;
6139 break;
6140 }
6141
6142 flags_info = (task_flags_info_t)task_info_out;
6143
6144 /* only publish the 64-bit flag of the task */
6145 flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
6146
6147 *task_info_count = TASK_FLAGS_INFO_COUNT;
6148 break;
6149 }
6150
6151 case TASK_DEBUG_INFO_INTERNAL:
6152 {
6153#if DEVELOPMENT || DEBUG
6154 task_debug_info_internal_t dbg_info;
6155 ipc_space_t space = task->itk_space;
6156 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
6157 error = KERN_NOT_SUPPORTED;
6158 break;
6159 }
6160
6161 if (task_info_out == NULL) {
6162 error = KERN_INVALID_ARGUMENT;
6163 break;
6164 }
6165 dbg_info = (task_debug_info_internal_t) task_info_out;
6166 dbg_info->ipc_space_size = 0;
6167
6168 if (space) {
6169 smr_ipc_enter();
6170 ipc_entry_table_t table = smr_entered_load(&space->is_table);
6171 if (table) {
6172 dbg_info->ipc_space_size =
6173 ipc_entry_table_count(table);
6174 }
6175 smr_ipc_leave();
6176 }
6177
6178 dbg_info->suspend_count = task->suspend_count;
6179
6180 error = KERN_SUCCESS;
6181 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
6182 break;
6183#else /* DEVELOPMENT || DEBUG */
6184 error = KERN_NOT_SUPPORTED;
6185 break;
6186#endif /* DEVELOPMENT || DEBUG */
6187 }
6188 case TASK_SUSPEND_STATS_INFO:
6189 {
6190#if CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG)
6191 if (*task_info_count < TASK_SUSPEND_STATS_INFO_COUNT || task_info_out == NULL) {
6192 error = KERN_INVALID_ARGUMENT;
6193 break;
6194 }
6195 error = _task_get_suspend_stats_locked(task, (task_suspend_stats_t)task_info_out);
6196 *task_info_count = TASK_SUSPEND_STATS_INFO_COUNT;
6197 break;
6198#else /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6199 error = KERN_NOT_SUPPORTED;
6200 break;
6201#endif /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6202 }
6203 case TASK_SUSPEND_SOURCES_INFO:
6204 {
6205#if CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG)
6206 if (*task_info_count < TASK_SUSPEND_SOURCES_INFO_COUNT || task_info_out == NULL) {
6207 error = KERN_INVALID_ARGUMENT;
6208 break;
6209 }
6210 error = _task_get_suspend_sources_locked(task, (task_suspend_source_t)task_info_out);
6211 *task_info_count = TASK_SUSPEND_SOURCES_INFO_COUNT;
6212 break;
6213#else /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6214 error = KERN_NOT_SUPPORTED;
6215 break;
6216#endif /* CONFIG_TASK_SUSPEND_STATS && (DEVELOPMENT || DEBUG) */
6217 }
6218 default:
6219 error = KERN_INVALID_ARGUMENT;
6220 }
6221
6222 task_unlock(task);
6223 return error;
6224}
6225
6226/*
6227 * task_info_from_user
6228 *
6229 * When calling task_info from user space,
6230 * this function will be executed as mig server side
6231 * instead of calling directly into task_info.
6232 * This gives the possibility to perform more security
6233 * checks on task_port.
6234 *
6235 * In the case of TASK_DYLD_INFO, we require the more
6236 * privileged task_read_port not the less-privileged task_name_port.
6237 *
6238 */
6239kern_return_t
6240task_info_from_user(
6241 mach_port_t task_port,
6242 task_flavor_t flavor,
6243 task_info_t task_info_out,
6244 mach_msg_type_number_t *task_info_count)
6245{
6246 task_t task;
6247 kern_return_t ret;
6248
6249 if (flavor == TASK_DYLD_INFO) {
6250 task = convert_port_to_task_read(port: task_port);
6251 } else {
6252 task = convert_port_to_task_name(port: task_port);
6253 }
6254
6255 ret = task_info(task, flavor, task_info_out, task_info_count);
6256
6257 task_deallocate(task);
6258
6259 return ret;
6260}
6261
6262/*
6263 * Routine: task_dyld_process_info_update_helper
6264 *
6265 * Release send rights in release_ports.
6266 *
6267 * If no active ports found in task's dyld notifier array, unset the magic value
6268 * in user space to indicate so.
6269 *
6270 * Condition:
6271 * task's itk_lock is locked, and is unlocked upon return.
6272 * Global g_dyldinfo_mtx is locked, and is unlocked upon return.
6273 */
6274void
6275task_dyld_process_info_update_helper(
6276 task_t task,
6277 size_t active_count,
6278 vm_map_address_t magic_addr, /* a userspace address */
6279 ipc_port_t *release_ports,
6280 size_t release_count)
6281{
6282 void *notifiers_ptr = NULL;
6283
6284 assert(release_count <= DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT);
6285
6286 if (active_count == 0) {
6287 assert(task->itk_dyld_notify != NULL);
6288 notifiers_ptr = task->itk_dyld_notify;
6289 task->itk_dyld_notify = NULL;
6290 itk_unlock(task);
6291
6292 kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
6293 (void)copyoutmap_atomic32(map: task->map, MACH_PORT_NULL, toaddr: magic_addr); /* unset magic */
6294 } else {
6295 itk_unlock(task);
6296 (void)copyoutmap_atomic32(map: task->map, value: (mach_port_name_t)DYLD_PROCESS_INFO_NOTIFY_MAGIC,
6297 toaddr: magic_addr); /* reset magic */
6298 }
6299
6300 lck_mtx_unlock(lck: &g_dyldinfo_mtx);
6301
6302 for (size_t i = 0; i < release_count; i++) {
6303 ipc_port_release_send(port: release_ports[i]);
6304 }
6305}
6306
6307/*
6308 * Routine: task_dyld_process_info_notify_register
6309 *
6310 * Insert a send right to target task's itk_dyld_notify array. Allocate kernel
6311 * memory for the array if it's the first port to be registered. Also cleanup
6312 * any dead rights found in the array.
6313 *
6314 * Consumes sright if returns KERN_SUCCESS, otherwise MIG will destroy it.
6315 *
6316 * Args:
6317 * task: Target task for the registration.
6318 * sright: A send right.
6319 *
6320 * Returns:
6321 * KERN_SUCCESS: Registration succeeded.
6322 * KERN_INVALID_TASK: task is invalid.
6323 * KERN_INVALID_RIGHT: sright is invalid.
6324 * KERN_DENIED: Security policy denied this call.
6325 * KERN_RESOURCE_SHORTAGE: Kernel memory allocation failed.
6326 * KERN_NO_SPACE: No available notifier port slot left for this task.
6327 * KERN_RIGHT_EXISTS: The notifier port is already registered and active.
6328 *
6329 * Other error code see task_info().
6330 *
6331 * See Also:
6332 * task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
6333 */
6334kern_return_t
6335task_dyld_process_info_notify_register(
6336 task_t task,
6337 ipc_port_t sright)
6338{
6339 struct task_dyld_info dyld_info;
6340 mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
6341 ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
6342 uint32_t release_count = 0, active_count = 0;
6343 mach_vm_address_t ports_addr; /* a user space address */
6344 kern_return_t kr;
6345 boolean_t right_exists = false;
6346 ipc_port_t *notifiers_ptr = NULL;
6347 ipc_port_t *portp;
6348
6349 if (task == TASK_NULL || task == kernel_task) {
6350 return KERN_INVALID_TASK;
6351 }
6352
6353 if (!IP_VALID(sright)) {
6354 return KERN_INVALID_RIGHT;
6355 }
6356
6357#if CONFIG_MACF
6358 if (mac_task_check_dyld_process_info_notify_register()) {
6359 return KERN_DENIED;
6360 }
6361#endif
6362
6363 kr = task_info(task, TASK_DYLD_INFO, task_info_out: (task_info_t)&dyld_info, task_info_count: &info_count);
6364 if (kr) {
6365 return kr;
6366 }
6367
6368 if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
6369 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6370 offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
6371 } else {
6372 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6373 offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
6374 }
6375
6376 if (task->itk_dyld_notify == NULL) {
6377 notifiers_ptr = kalloc_type(ipc_port_t,
6378 DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT,
6379 Z_WAITOK | Z_ZERO | Z_NOFAIL);
6380 }
6381
6382 lck_mtx_lock(lck: &g_dyldinfo_mtx);
6383 itk_lock(task);
6384
6385 if (task->itk_dyld_notify == NULL) {
6386 task->itk_dyld_notify = notifiers_ptr;
6387 notifiers_ptr = NULL;
6388 }
6389
6390 assert(task->itk_dyld_notify != NULL);
6391 /* First pass: clear dead names and check for duplicate registration */
6392 for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6393 portp = &task->itk_dyld_notify[slot];
6394 if (*portp != IPC_PORT_NULL && !ip_active(*portp)) {
6395 release_ports[release_count++] = *portp;
6396 *portp = IPC_PORT_NULL;
6397 } else if (*portp == sright) {
6398 /* the port is already registered and is active */
6399 right_exists = true;
6400 }
6401
6402 if (*portp != IPC_PORT_NULL) {
6403 active_count++;
6404 }
6405 }
6406
6407 if (right_exists) {
6408 /* skip second pass */
6409 kr = KERN_RIGHT_EXISTS;
6410 goto out;
6411 }
6412
6413 /* Second pass: register the port */
6414 kr = KERN_NO_SPACE;
6415 for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6416 portp = &task->itk_dyld_notify[slot];
6417 if (*portp == IPC_PORT_NULL) {
6418 *portp = sright;
6419 active_count++;
6420 kr = KERN_SUCCESS;
6421 break;
6422 }
6423 }
6424
6425out:
6426 assert(active_count > 0);
6427
6428 task_dyld_process_info_update_helper(task, active_count,
6429 magic_addr: (vm_map_address_t)ports_addr, release_ports, release_count);
6430 /* itk_lock, g_dyldinfo_mtx are unlocked upon return */
6431
6432 kfree_type(ipc_port_t, DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT, notifiers_ptr);
6433
6434 return kr;
6435}
6436
6437/*
6438 * Routine: task_dyld_process_info_notify_deregister
6439 *
6440 * Remove a send right in target task's itk_dyld_notify array matching the receive
6441 * right name passed in. Deallocate kernel memory for the array if it's the last port to
6442 * be deregistered, or all ports have died. Also cleanup any dead rights found in the array.
6443 *
6444 * Does not consume any reference.
6445 *
6446 * Args:
6447 * task: Target task for the deregistration.
6448 * rcv_name: The name denoting the receive right in caller's space.
6449 *
6450 * Returns:
6451 * KERN_SUCCESS: A matching entry found and degistration succeeded.
6452 * KERN_INVALID_TASK: task is invalid.
6453 * KERN_INVALID_NAME: name is invalid.
6454 * KERN_DENIED: Security policy denied this call.
6455 * KERN_FAILURE: A matching entry is not found.
6456 * KERN_INVALID_RIGHT: The name passed in does not represent a valid rcv right.
6457 *
6458 * Other error code see task_info().
6459 *
6460 * See Also:
6461 * task_dyld_process_info_notify_get_trap() in mach_kernelrpc.c
6462 */
6463kern_return_t
6464task_dyld_process_info_notify_deregister(
6465 task_t task,
6466 mach_port_name_t rcv_name)
6467{
6468 struct task_dyld_info dyld_info;
6469 mach_msg_type_number_t info_count = TASK_DYLD_INFO_COUNT;
6470 ipc_port_t release_ports[DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT];
6471 uint32_t release_count = 0, active_count = 0;
6472 boolean_t port_found = false;
6473 mach_vm_address_t ports_addr; /* a user space address */
6474 ipc_port_t sright;
6475 kern_return_t kr;
6476 ipc_port_t *portp;
6477
6478 if (task == TASK_NULL || task == kernel_task) {
6479 return KERN_INVALID_TASK;
6480 }
6481
6482 if (!MACH_PORT_VALID(rcv_name)) {
6483 return KERN_INVALID_NAME;
6484 }
6485
6486#if CONFIG_MACF
6487 if (mac_task_check_dyld_process_info_notify_register()) {
6488 return KERN_DENIED;
6489 }
6490#endif
6491
6492 kr = task_info(task, TASK_DYLD_INFO, task_info_out: (task_info_t)&dyld_info, task_info_count: &info_count);
6493 if (kr) {
6494 return kr;
6495 }
6496
6497 if (dyld_info.all_image_info_format == TASK_DYLD_ALL_IMAGE_INFO_32) {
6498 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6499 offsetof(struct user32_dyld_all_image_infos, notifyMachPorts));
6500 } else {
6501 ports_addr = (mach_vm_address_t)(dyld_info.all_image_info_addr +
6502 offsetof(struct user64_dyld_all_image_infos, notifyMachPorts));
6503 }
6504
6505 kr = ipc_port_translate_receive(current_space(), name: rcv_name, portp: &sright); /* does not produce port ref */
6506 if (kr) {
6507 return KERN_INVALID_RIGHT;
6508 }
6509
6510 ip_reference(sright);
6511 ip_mq_unlock(sright);
6512
6513 assert(sright != IPC_PORT_NULL);
6514
6515 lck_mtx_lock(lck: &g_dyldinfo_mtx);
6516 itk_lock(task);
6517
6518 if (task->itk_dyld_notify == NULL) {
6519 itk_unlock(task);
6520 lck_mtx_unlock(lck: &g_dyldinfo_mtx);
6521 ip_release(sright);
6522 return KERN_FAILURE;
6523 }
6524
6525 for (int slot = 0; slot < DYLD_MAX_PROCESS_INFO_NOTIFY_COUNT; slot++) {
6526 portp = &task->itk_dyld_notify[slot];
6527 if (*portp == sright) {
6528 release_ports[release_count++] = *portp;
6529 *portp = IPC_PORT_NULL;
6530 port_found = true;
6531 } else if ((*portp != IPC_PORT_NULL && !ip_active(*portp))) {
6532 release_ports[release_count++] = *portp;
6533 *portp = IPC_PORT_NULL;
6534 }
6535
6536 if (*portp != IPC_PORT_NULL) {
6537 active_count++;
6538 }
6539 }
6540
6541 task_dyld_process_info_update_helper(task, active_count,
6542 magic_addr: (vm_map_address_t)ports_addr, release_ports, release_count);
6543 /* itk_lock, g_dyldinfo_mtx are unlocked upon return */
6544
6545 ip_release(sright);
6546
6547 return port_found ? KERN_SUCCESS : KERN_FAILURE;
6548}
6549
6550/*
6551 * task_power_info
6552 *
6553 * Returns power stats for the task.
6554 * Note: Called with task locked.
6555 */
6556void
6557task_power_info_locked(
6558 task_t task,
6559 task_power_info_t info,
6560 gpu_energy_data_t ginfo,
6561 task_power_info_v2_t infov2,
6562 struct task_power_info_extra *extra_info)
6563{
6564 thread_t thread;
6565 ledger_amount_t tmp;
6566
6567 uint64_t runnable_time_sum = 0;
6568
6569 task_lock_assert_owned(task);
6570
6571 ledger_get_entries(ledger: task->ledger, entry: task_ledgers.interrupt_wakeups,
6572 credit: (ledger_amount_t *)&info->task_interrupt_wakeups, debit: &tmp);
6573 ledger_get_entries(ledger: task->ledger, entry: task_ledgers.platform_idle_wakeups,
6574 credit: (ledger_amount_t *)&info->task_platform_idle_wakeups, debit: &tmp);
6575
6576 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
6577 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
6578
6579 struct recount_usage usage = { 0 };
6580 struct recount_usage usage_perf = { 0 };
6581 recount_task_usage_perf_only(task, sum: &usage, sum_perf_only: &usage_perf);
6582
6583 info->total_user = usage.ru_metrics[RCT_LVL_USER].rm_time_mach;
6584 info->total_system = recount_usage_system_time_mach(usage: &usage);
6585 runnable_time_sum = task->total_runnable_time;
6586
6587 if (ginfo) {
6588 ginfo->task_gpu_utilisation = task->task_gpu_ns;
6589 }
6590
6591 if (infov2) {
6592 infov2->task_ptime = recount_usage_time_mach(usage: &usage_perf);
6593 infov2->task_pset_switches = task->ps_switch;
6594#if CONFIG_PERVASIVE_ENERGY
6595 infov2->task_energy = usage.ru_energy_nj;
6596#endif /* CONFIG_PERVASIVE_ENERGY */
6597 }
6598
6599 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6600 spl_t x;
6601
6602 if (thread->options & TH_OPT_IDLE_THREAD) {
6603 continue;
6604 }
6605
6606 x = splsched();
6607 thread_lock(thread);
6608
6609 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
6610 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
6611
6612 if (infov2) {
6613 infov2->task_pset_switches += thread->ps_switch;
6614 }
6615
6616 runnable_time_sum += timer_grab(timer: &thread->runnable_timer);
6617
6618 if (ginfo) {
6619 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
6620 }
6621 thread_unlock(thread);
6622 splx(x);
6623 }
6624
6625 if (extra_info) {
6626 extra_info->runnable_time = runnable_time_sum;
6627#if CONFIG_PERVASIVE_CPI
6628 extra_info->cycles = recount_usage_cycles(&usage);
6629 extra_info->instructions = recount_usage_instructions(&usage);
6630 extra_info->pcycles = recount_usage_cycles(&usage_perf);
6631 extra_info->pinstructions = recount_usage_instructions(&usage_perf);
6632 extra_info->user_ptime = usage_perf.ru_metrics[RCT_LVL_USER].rm_time_mach;
6633 extra_info->system_ptime = recount_usage_system_time_mach(&usage_perf);
6634#endif // CONFIG_PERVASIVE_CPI
6635#if CONFIG_PERVASIVE_ENERGY
6636 extra_info->energy = usage.ru_energy_nj;
6637 extra_info->penergy = usage_perf.ru_energy_nj;
6638#endif // CONFIG_PERVASIVE_ENERGY
6639#if RECOUNT_SECURE_METRICS
6640 if (PE_i_can_has_debugger(NULL)) {
6641 extra_info->secure_time = usage.ru_metrics[RCT_LVL_SECURE].rm_time_mach;
6642 extra_info->secure_ptime = usage_perf.ru_metrics[RCT_LVL_SECURE].rm_time_mach;
6643 }
6644#endif // RECOUNT_SECURE_METRICS
6645 }
6646}
6647
6648/*
6649 * task_gpu_utilisation
6650 *
6651 * Returns the total gpu time used by the all the threads of the task
6652 * (both dead and alive)
6653 */
6654uint64_t
6655task_gpu_utilisation(
6656 task_t task)
6657{
6658 uint64_t gpu_time = 0;
6659#if defined(__x86_64__)
6660 thread_t thread;
6661
6662 task_lock(task);
6663 gpu_time += task->task_gpu_ns;
6664
6665 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6666 spl_t x;
6667 x = splsched();
6668 thread_lock(thread);
6669 gpu_time += ml_gpu_stat(thread);
6670 thread_unlock(thread);
6671 splx(x);
6672 }
6673
6674 task_unlock(task);
6675#else /* defined(__x86_64__) */
6676 /* silence compiler warning */
6677 (void)task;
6678#endif /* defined(__x86_64__) */
6679 return gpu_time;
6680}
6681
6682/* This function updates the cpu time in the arrays for each
6683 * effective and requested QoS class
6684 */
6685void
6686task_update_cpu_time_qos_stats(
6687 task_t task,
6688 uint64_t *eqos_stats,
6689 uint64_t *rqos_stats)
6690{
6691 if (!eqos_stats && !rqos_stats) {
6692 return;
6693 }
6694
6695 task_lock(task);
6696 thread_t thread;
6697 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6698 if (thread->options & TH_OPT_IDLE_THREAD) {
6699 continue;
6700 }
6701
6702 thread_update_qos_cpu_time(thread);
6703 }
6704
6705 if (eqos_stats) {
6706 eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
6707 eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
6708 eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
6709 eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
6710 eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
6711 eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
6712 eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
6713 }
6714
6715 if (rqos_stats) {
6716 rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
6717 rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
6718 rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
6719 rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
6720 rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
6721 rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
6722 rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
6723 }
6724
6725 task_unlock(task);
6726}
6727
6728kern_return_t
6729task_purgable_info(
6730 task_t task,
6731 task_purgable_info_t *stats)
6732{
6733 if (task == TASK_NULL || stats == NULL) {
6734 return KERN_INVALID_ARGUMENT;
6735 }
6736 /* Take task reference */
6737 task_reference(task);
6738 vm_purgeable_stats(info: (vm_purgeable_info_t)stats, target_task: task);
6739 /* Drop task reference */
6740 task_deallocate(task);
6741 return KERN_SUCCESS;
6742}
6743
6744void
6745task_vtimer_set(
6746 task_t task,
6747 integer_t which)
6748{
6749 thread_t thread;
6750 spl_t x;
6751
6752 task_lock(task);
6753
6754 task->vtimers |= which;
6755
6756 switch (which) {
6757 case TASK_VTIMER_USER:
6758 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6759 x = splsched();
6760 thread_lock(thread);
6761 struct recount_times_mach times = recount_thread_times(thread);
6762 thread->vtimer_user_save = times.rtm_user;
6763 thread_unlock(thread);
6764 splx(x);
6765 }
6766 break;
6767
6768 case TASK_VTIMER_PROF:
6769 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6770 x = splsched();
6771 thread_lock(thread);
6772 thread->vtimer_prof_save = recount_thread_time_mach(thread);
6773 thread_unlock(thread);
6774 splx(x);
6775 }
6776 break;
6777
6778 case TASK_VTIMER_RLIM:
6779 queue_iterate(&task->threads, thread, thread_t, task_threads) {
6780 x = splsched();
6781 thread_lock(thread);
6782 thread->vtimer_rlim_save = recount_thread_time_mach(thread);
6783 thread_unlock(thread);
6784 splx(x);
6785 }
6786 break;
6787 }
6788
6789 task_unlock(task);
6790}
6791
6792void
6793task_vtimer_clear(
6794 task_t task,
6795 integer_t which)
6796{
6797 task_lock(task);
6798
6799 task->vtimers &= ~which;
6800
6801 task_unlock(task);
6802}
6803
6804void
6805task_vtimer_update(
6806 __unused
6807 task_t task,
6808 integer_t which,
6809 uint32_t *microsecs)
6810{
6811 thread_t thread = current_thread();
6812 uint32_t tdelt = 0;
6813 clock_sec_t secs = 0;
6814 uint64_t tsum;
6815
6816 assert(task == current_task());
6817
6818 spl_t s = splsched();
6819 thread_lock(thread);
6820
6821 if ((task->vtimers & which) != (uint32_t)which) {
6822 thread_unlock(thread);
6823 splx(s);
6824 return;
6825 }
6826
6827 switch (which) {
6828 case TASK_VTIMER_USER:;
6829 struct recount_times_mach times = recount_thread_times(thread);
6830 tsum = times.rtm_user;
6831 tdelt = (uint32_t)(tsum - thread->vtimer_user_save);
6832 thread->vtimer_user_save = tsum;
6833 absolutetime_to_microtime(abstime: tdelt, secs: &secs, microsecs);
6834 break;
6835
6836 case TASK_VTIMER_PROF:
6837 tsum = recount_current_thread_time_mach();
6838 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
6839 absolutetime_to_microtime(abstime: tdelt, secs: &secs, microsecs);
6840 /* if the time delta is smaller than a usec, ignore */
6841 if (*microsecs != 0) {
6842 thread->vtimer_prof_save = tsum;
6843 }
6844 break;
6845
6846 case TASK_VTIMER_RLIM:
6847 tsum = recount_current_thread_time_mach();
6848 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
6849 thread->vtimer_rlim_save = tsum;
6850 absolutetime_to_microtime(abstime: tdelt, secs: &secs, microsecs);
6851 break;
6852 }
6853
6854 thread_unlock(thread);
6855 splx(s);
6856}
6857
6858uint64_t
6859get_task_dispatchqueue_offset(
6860 task_t task)
6861{
6862 return task->dispatchqueue_offset;
6863}
6864
6865void
6866task_synchronizer_destroy_all(task_t task)
6867{
6868 /*
6869 * Destroy owned semaphores
6870 */
6871 semaphore_destroy_all(task);
6872}
6873
6874/*
6875 * Install default (machine-dependent) initial thread state
6876 * on the task. Subsequent thread creation will have this initial
6877 * state set on the thread by machine_thread_inherit_taskwide().
6878 * Flavors and structures are exactly the same as those to thread_set_state()
6879 */
6880kern_return_t
6881task_set_state(
6882 task_t task,
6883 int flavor,
6884 thread_state_t state,
6885 mach_msg_type_number_t state_count)
6886{
6887 kern_return_t ret;
6888
6889 if (task == TASK_NULL) {
6890 return KERN_INVALID_ARGUMENT;
6891 }
6892
6893 task_lock(task);
6894
6895 if (!task->active) {
6896 task_unlock(task);
6897 return KERN_FAILURE;
6898 }
6899
6900 ret = machine_task_set_state(task, flavor, state, state_count);
6901
6902 task_unlock(task);
6903 return ret;
6904}
6905
6906/*
6907 * Examine the default (machine-dependent) initial thread state
6908 * on the task, as set by task_set_state(). Flavors and structures
6909 * are exactly the same as those passed to thread_get_state().
6910 */
6911kern_return_t
6912task_get_state(
6913 task_t task,
6914 int flavor,
6915 thread_state_t state,
6916 mach_msg_type_number_t *state_count)
6917{
6918 kern_return_t ret;
6919
6920 if (task == TASK_NULL) {
6921 return KERN_INVALID_ARGUMENT;
6922 }
6923
6924 task_lock(task);
6925
6926 if (!task->active) {
6927 task_unlock(task);
6928 return KERN_FAILURE;
6929 }
6930
6931 ret = machine_task_get_state(task, flavor, state, state_count);
6932
6933 task_unlock(task);
6934 return ret;
6935}
6936
6937
6938static kern_return_t __attribute__((noinline, not_tail_called))
6939PROC_VIOLATED_GUARD__SEND_EXC_GUARD(
6940 mach_exception_code_t code,
6941 mach_exception_subcode_t subcode,
6942 void *reason,
6943 boolean_t backtrace_only)
6944{
6945#ifdef MACH_BSD
6946 if (1 == proc_selfpid()) {
6947 return KERN_NOT_SUPPORTED; // initproc is immune
6948 }
6949#endif
6950 mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
6951 [0] = code,
6952 [1] = subcode,
6953 };
6954 task_t task = current_task();
6955 kern_return_t kr;
6956 void *bsd_info = get_bsdtask_info(task);
6957
6958 /* (See jetsam-related comments below) */
6959
6960 proc_memstat_skip(p: bsd_info, TRUE);
6961 kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, code: codes, codeCnt: 2, reason, lightweight: backtrace_only);
6962 proc_memstat_skip(p: bsd_info, FALSE);
6963 return kr;
6964}
6965
6966kern_return_t
6967task_violated_guard(
6968 mach_exception_code_t code,
6969 mach_exception_subcode_t subcode,
6970 void *reason,
6971 bool backtrace_only)
6972{
6973 return PROC_VIOLATED_GUARD__SEND_EXC_GUARD(code, subcode, reason, backtrace_only);
6974}
6975
6976
6977#if CONFIG_MEMORYSTATUS
6978
6979boolean_t
6980task_get_memlimit_is_active(task_t task)
6981{
6982 assert(task != NULL);
6983
6984 if (task->memlimit_is_active == 1) {
6985 return TRUE;
6986 } else {
6987 return FALSE;
6988 }
6989}
6990
6991void
6992task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
6993{
6994 assert(task != NULL);
6995
6996 if (memlimit_is_active) {
6997 task->memlimit_is_active = 1;
6998 } else {
6999 task->memlimit_is_active = 0;
7000 }
7001}
7002
7003boolean_t
7004task_get_memlimit_is_fatal(task_t task)
7005{
7006 assert(task != NULL);
7007
7008 if (task->memlimit_is_fatal == 1) {
7009 return TRUE;
7010 } else {
7011 return FALSE;
7012 }
7013}
7014
7015void
7016task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
7017{
7018 assert(task != NULL);
7019
7020 if (memlimit_is_fatal) {
7021 task->memlimit_is_fatal = 1;
7022 } else {
7023 task->memlimit_is_fatal = 0;
7024 }
7025}
7026
7027uint64_t
7028task_get_dirty_start(task_t task)
7029{
7030 return task->memstat_dirty_start;
7031}
7032
7033void
7034task_set_dirty_start(task_t task, uint64_t start)
7035{
7036 task_lock(task);
7037 task->memstat_dirty_start = start;
7038 task_unlock(task);
7039}
7040
7041boolean_t
7042task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
7043{
7044 boolean_t triggered = FALSE;
7045
7046 assert(task == current_task());
7047
7048 /*
7049 * Returns true, if task has already triggered an exc_resource exception.
7050 */
7051
7052 if (memlimit_is_active) {
7053 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
7054 } else {
7055 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
7056 }
7057
7058 return triggered;
7059}
7060
7061void
7062task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
7063{
7064 assert(task == current_task());
7065
7066 /*
7067 * We allow one exc_resource per process per active/inactive limit.
7068 * The limit's fatal attribute does not come into play.
7069 */
7070
7071 if (memlimit_is_active) {
7072 task->memlimit_active_exc_resource = 1;
7073 } else {
7074 task->memlimit_inactive_exc_resource = 1;
7075 }
7076}
7077
7078#define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
7079
7080void __attribute__((noinline))
7081PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, send_exec_resource_options_t exception_options)
7082{
7083 task_t task = current_task();
7084 int pid = 0;
7085 const char *procname = "unknown";
7086 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
7087 boolean_t send_sync_exc_resource = FALSE;
7088 void *cur_bsd_info = get_bsdtask_info(current_task());
7089
7090#ifdef MACH_BSD
7091 pid = proc_selfpid();
7092
7093 if (pid == 1) {
7094 /*
7095 * Cannot have ReportCrash analyzing
7096 * a suspended initproc.
7097 */
7098 return;
7099 }
7100
7101 if (cur_bsd_info != NULL) {
7102 procname = proc_name_address(p: cur_bsd_info);
7103 send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(p: cur_bsd_info);
7104 }
7105#endif
7106#if CONFIG_COREDUMP
7107 if (hwm_user_cores) {
7108 int error;
7109 uint64_t starttime, end;
7110 clock_sec_t secs = 0;
7111 uint32_t microsecs = 0;
7112
7113 starttime = mach_absolute_time();
7114 /*
7115 * Trigger a coredump of this process. Don't proceed unless we know we won't
7116 * be filling up the disk; and ignore the core size resource limit for this
7117 * core file.
7118 */
7119 if ((error = coredump(p: cur_bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
7120 printf(format: "couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
7121 }
7122 /*
7123 * coredump() leaves the task suspended.
7124 */
7125 task_resume_internal(task: current_task());
7126
7127 end = mach_absolute_time();
7128 absolutetime_to_microtime(abstime: end - starttime, secs: &secs, microsecs: &microsecs);
7129 printf(format: "coredump of %s[%d] taken in %d secs %d microsecs\n",
7130 proc_name_address(p: cur_bsd_info), pid, (int)secs, microsecs);
7131 }
7132#endif /* CONFIG_COREDUMP */
7133
7134 if (disable_exc_resource) {
7135 printf(format: "process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
7136 "suppressed by a boot-arg.\n", procname, pid, max_footprint_mb);
7137 return;
7138 }
7139 printf(format: "process %s [%d] crossed memory %s (%d MB); EXC_RESOURCE "
7140 "\n", procname, pid, (!(exception_options & EXEC_RESOURCE_DIAGNOSTIC) ? "high watermark" : "diagnostics limit"), max_footprint_mb);
7141
7142 /*
7143 * A task that has triggered an EXC_RESOURCE, should not be
7144 * jetsammed when the device is under memory pressure. Here
7145 * we set the P_MEMSTAT_SKIP flag so that the process
7146 * will be skipped if the memorystatus_thread wakes up.
7147 *
7148 * This is a debugging aid to ensure we can get a corpse before
7149 * the jetsam thread kills the process.
7150 * Note that proc_memstat_skip is a no-op on release kernels.
7151 */
7152 proc_memstat_skip(p: cur_bsd_info, TRUE);
7153
7154 code[0] = code[1] = 0;
7155 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
7156 /*
7157 * Regardless if there was a diag memlimit violation, fatal exceptions shall be notified always
7158 * as high level watermaks. In another words, if there was a diag limit and a watermark, and the
7159 * violation if for limit watermark, a watermark shall be reported.
7160 */
7161 if (!(exception_options & EXEC_RESOURCE_FATAL)) {
7162 EXC_RESOURCE_ENCODE_FLAVOR(code[0], !(exception_options & EXEC_RESOURCE_DIAGNOSTIC) ? FLAVOR_HIGH_WATERMARK : FLAVOR_DIAG_MEMLIMIT);
7163 } else {
7164 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK );
7165 }
7166 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
7167 /*
7168 * Do not generate a corpse fork if the violation is a fatal one
7169 * or the process wants synchronous EXC_RESOURCE exceptions.
7170 */
7171 if ((exception_options & EXEC_RESOURCE_FATAL) || send_sync_exc_resource || !exc_via_corpse_forking) {
7172 if (exception_options & EXEC_RESOURCE_FATAL) {
7173 vm_map_set_corpse_source(map: task->map);
7174 }
7175
7176 /* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
7177 if (send_sync_exc_resource || !corpse_for_fatal_memkill) {
7178 /*
7179 * Use the _internal_ variant so that no user-space
7180 * process can resume our task from under us.
7181 */
7182 task_suspend_internal(task);
7183 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
7184 task_resume_internal(task);
7185 }
7186 } else {
7187 if (disable_exc_resource_during_audio && audio_active) {
7188 printf(format: "process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
7189 "suppressed due to audio playback.\n", procname, pid, max_footprint_mb);
7190 } else {
7191 task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
7192 code, EXCEPTION_CODE_MAX, NULL, FALSE);
7193 }
7194 }
7195
7196 /*
7197 * After the EXC_RESOURCE has been handled, we must clear the
7198 * P_MEMSTAT_SKIP flag so that the process can again be
7199 * considered for jetsam if the memorystatus_thread wakes up.
7200 */
7201 proc_memstat_skip(p: cur_bsd_info, FALSE); /* clear the flag */
7202}
7203/*
7204 * Callback invoked when a task exceeds its physical footprint limit.
7205 */
7206void
7207task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
7208{
7209 ledger_amount_t max_footprint = 0;
7210 ledger_amount_t max_footprint_mb = 0;
7211#if DEBUG || DEVELOPMENT
7212 ledger_amount_t diag_threshold_limit_mb = 0;
7213 ledger_amount_t diag_threshold_limit = 0;
7214#endif
7215#if CONFIG_DEFERRED_RECLAIM
7216 ledger_amount_t current_footprint;
7217#endif /* CONFIG_DEFERRED_RECLAIM */
7218 task_t task;
7219 send_exec_resource_is_warning is_warning = IS_NOT_WARNING;
7220 boolean_t memlimit_is_active;
7221 send_exec_resource_is_fatal memlimit_is_fatal;
7222 send_exec_resource_is_diagnostics is_diag_mem_threshold = IS_NOT_DIAGNOSTICS;
7223 if (warning == LEDGER_WARNING_DIAG_MEM_THRESHOLD) {
7224 is_diag_mem_threshold = IS_DIAGNOSTICS;
7225 is_warning = IS_WARNING;
7226 } else if (warning == LEDGER_WARNING_DIPPED_BELOW) {
7227 /*
7228 * Task memory limits only provide a warning on the way up.
7229 */
7230 return;
7231 } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
7232 /*
7233 * This task is in danger of violating a memory limit,
7234 * It has exceeded a percentage level of the limit.
7235 */
7236 is_warning = IS_WARNING;
7237 } else {
7238 /*
7239 * The task has exceeded the physical footprint limit.
7240 * This is not a warning but a true limit violation.
7241 */
7242 is_warning = IS_NOT_WARNING;
7243 }
7244
7245 task = current_task();
7246
7247 ledger_get_limit(ledger: task->ledger, entry: task_ledgers.phys_footprint, limit: &max_footprint);
7248#if DEBUG || DEVELOPMENT
7249 ledger_get_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint, &diag_threshold_limit);
7250#endif
7251#if CONFIG_DEFERRED_RECLAIM
7252 if (task->deferred_reclamation_metadata != NULL) {
7253 /*
7254 * Task is enrolled in deferred reclamation.
7255 * Do a reclaim to ensure it's really over its limit.
7256 */
7257 vm_deferred_reclamation_reclaim_from_task_sync(task, UINT64_MAX);
7258 ledger_get_balance(ledger: task->ledger, entry: task_ledgers.phys_footprint, balance: &current_footprint);
7259 if (current_footprint < max_footprint) {
7260 return;
7261 }
7262 }
7263#endif /* CONFIG_DEFERRED_RECLAIM */
7264 max_footprint_mb = max_footprint >> 20;
7265#if DEBUG || DEVELOPMENT
7266 diag_threshold_limit_mb = diag_threshold_limit >> 20;
7267#endif
7268 memlimit_is_active = task_get_memlimit_is_active(task);
7269 memlimit_is_fatal = task_get_memlimit_is_fatal(task) == FALSE ? IS_NOT_FATAL : IS_FATAL;
7270#if DEBUG || DEVELOPMENT
7271 if (is_diag_mem_threshold == IS_NOT_DIAGNOSTICS) {
7272 task_process_crossed_limit_no_diag(task, max_footprint_mb, memlimit_is_fatal, memlimit_is_active, is_warning);
7273 } else {
7274 task_process_crossed_limit_diag(diag_threshold_limit_mb);
7275 }
7276#else
7277 task_process_crossed_limit_no_diag(task, ledger_limit_size: max_footprint_mb, memlimit_is_fatal, memlimit_is_active, is_warning);
7278#endif
7279}
7280
7281/*
7282 * Actions to perfrom when a process has crossed watermark or is a fatal consumption */
7283static inline void
7284task_process_crossed_limit_no_diag(task_t task, ledger_amount_t ledger_limit_size, bool memlimit_is_fatal, bool memlimit_is_active, send_exec_resource_is_warning is_warning)
7285{
7286 send_exec_resource_options_t exception_options = 0;
7287 if (memlimit_is_fatal) {
7288 exception_options |= EXEC_RESOURCE_FATAL;
7289 }
7290 /*
7291 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
7292 * We only generate the exception once per process per memlimit (active/inactive limit).
7293 * To enforce this, we monitor state based on the memlimit's active/inactive attribute
7294 * and we disable it by marking that memlimit as exception triggered.
7295 */
7296 if (is_warning == IS_NOT_WARNING && !task_has_triggered_exc_resource(task, memlimit_is_active)) {
7297 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(max_footprint_mb: (int)ledger_limit_size, exception_options);
7298 // If it was not a diag threshold (if was a memory limit), then we do not want more signalling,
7299 // however, if was a diag limit, the user may reload a different limit and signal again the violation
7300 memorystatus_log_exception(max_footprint_mb: (int)ledger_limit_size, memlimit_is_active, memlimit_is_fatal);
7301 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
7302 }
7303 memorystatus_on_ledger_footprint_exceeded(warning: is_warning == IS_NOT_WARNING ? FALSE : TRUE, memlimit_is_active, memlimit_is_fatal);
7304}
7305
7306#if DEBUG || DEVELOPMENT
7307/**
7308 * Actions to take when a process has crossed the diagnostics limit
7309 */
7310static inline void
7311task_process_crossed_limit_diag(ledger_amount_t ledger_limit_size)
7312{
7313 /*
7314 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
7315 * In the case of the diagnostics thresholds, the exception will be signaled only once, but the
7316 * inhibit / rearm mechanism if performed at ledger level.
7317 */
7318 send_exec_resource_options_t exception_options = EXEC_RESOURCE_DIAGNOSTIC;
7319 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)ledger_limit_size, exception_options);
7320 memorystatus_log_diag_threshold_exception((int)ledger_limit_size);
7321}
7322#endif
7323
7324extern int proc_check_footprint_priv(void);
7325
7326kern_return_t
7327task_set_phys_footprint_limit(
7328 task_t task,
7329 int new_limit_mb,
7330 int *old_limit_mb)
7331{
7332 kern_return_t error;
7333
7334 boolean_t memlimit_is_active;
7335 boolean_t memlimit_is_fatal;
7336
7337 if ((error = proc_check_footprint_priv())) {
7338 return KERN_NO_ACCESS;
7339 }
7340
7341 /*
7342 * This call should probably be obsoleted.
7343 * But for now, we default to current state.
7344 */
7345 memlimit_is_active = task_get_memlimit_is_active(task);
7346 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
7347
7348 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
7349}
7350
7351/*
7352 * Set the limit of diagnostics memory consumption for a concrete task
7353 */
7354#if CONFIG_MEMORYSTATUS
7355#if DEVELOPMENT || DEBUG
7356kern_return_t
7357task_set_diag_footprint_limit(
7358 task_t task,
7359 uint64_t new_limit_mb,
7360 uint64_t *old_limit_mb)
7361{
7362 kern_return_t error;
7363
7364 if ((error = proc_check_footprint_priv())) {
7365 return KERN_NO_ACCESS;
7366 }
7367
7368 return task_set_diag_footprint_limit_internal(task, new_limit_mb, old_limit_mb);
7369}
7370
7371#endif // DEVELOPMENT || DEBUG
7372#endif // CONFIG_MEMORYSTATUS
7373
7374kern_return_t
7375task_convert_phys_footprint_limit(
7376 int limit_mb,
7377 int *converted_limit_mb)
7378{
7379 if (limit_mb == -1) {
7380 /*
7381 * No limit
7382 */
7383 if (max_task_footprint != 0) {
7384 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
7385 } else {
7386 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
7387 }
7388 } else {
7389 /* nothing to convert */
7390 *converted_limit_mb = limit_mb;
7391 }
7392 return KERN_SUCCESS;
7393}
7394
7395kern_return_t
7396task_set_phys_footprint_limit_internal(
7397 task_t task,
7398 int new_limit_mb,
7399 int *old_limit_mb,
7400 boolean_t memlimit_is_active,
7401 boolean_t memlimit_is_fatal)
7402{
7403 ledger_amount_t old;
7404 kern_return_t ret;
7405#if DEVELOPMENT || DEBUG
7406 diagthreshold_check_return diag_threshold_validity;
7407#endif
7408 ret = ledger_get_limit(ledger: task->ledger, entry: task_ledgers.phys_footprint, limit: &old);
7409
7410 if (ret != KERN_SUCCESS) {
7411 return ret;
7412 }
7413 /**
7414 * Maybe we will need to re-enable the diag threshold, lets get the value
7415 * and the current status
7416 */
7417#if DEVELOPMENT || DEBUG
7418 diag_threshold_validity = task_check_memorythreshold_is_valid( task, new_limit_mb, false);
7419 /**
7420 * If the footprint and diagnostics threshold are going to be same, lets disable the threshold
7421 */
7422 if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7423 ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7424 } else if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7425 ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7426 }
7427#endif
7428
7429 /*
7430 * Check that limit >> 20 will not give an "unexpected" 32-bit
7431 * result. There are, however, implicit assumptions that -1 mb limit
7432 * equates to LEDGER_LIMIT_INFINITY.
7433 */
7434 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
7435
7436 if (old_limit_mb) {
7437 *old_limit_mb = (int)(old >> 20);
7438 }
7439
7440 if (new_limit_mb == -1) {
7441 /*
7442 * Caller wishes to remove the limit.
7443 */
7444 ledger_set_limit(ledger: task->ledger, entry: task_ledgers.phys_footprint,
7445 limit: max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
7446 warn_level_percentage: max_task_footprint ? (uint8_t)max_task_footprint_warning_level : 0);
7447
7448 task_lock(task);
7449 task_set_memlimit_is_active(task, memlimit_is_active);
7450 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
7451 task_unlock(task);
7452 /**
7453 * If the diagnostics were disabled, and now we have a new limit, we have to re-enable it.
7454 */
7455#if DEVELOPMENT || DEBUG
7456 if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7457 ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7458 } else if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7459 ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7460 }
7461 #endif
7462 return KERN_SUCCESS;
7463 }
7464
7465#ifdef CONFIG_NOMONITORS
7466 return KERN_SUCCESS;
7467#endif /* CONFIG_NOMONITORS */
7468
7469 task_lock(task);
7470
7471 if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
7472 (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
7473 (((ledger_amount_t)new_limit_mb << 20) == old)) {
7474 /*
7475 * memlimit state is not changing
7476 */
7477 task_unlock(task);
7478 return KERN_SUCCESS;
7479 }
7480
7481 task_set_memlimit_is_active(task, memlimit_is_active);
7482 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
7483
7484 ledger_set_limit(ledger: task->ledger, entry: task_ledgers.phys_footprint,
7485 limit: (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
7486
7487 if (task == current_task()) {
7488 ledger_check_new_balance(thread: current_thread(), ledger: task->ledger,
7489 entry: task_ledgers.phys_footprint);
7490 }
7491
7492 task_unlock(task);
7493#if DEVELOPMENT || DEBUG
7494 if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7495 ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7496 }
7497 #endif
7498
7499 return KERN_SUCCESS;
7500}
7501
7502#if RESETTABLE_DIAG_FOOTPRINT_LIMITS
7503kern_return_t
7504task_set_diag_footprint_limit_internal(
7505 task_t task,
7506 uint64_t new_limit_bytes,
7507 uint64_t *old_limit_bytes)
7508{
7509 ledger_amount_t old = 0;
7510 kern_return_t ret = KERN_SUCCESS;
7511 diagthreshold_check_return diag_threshold_validity;
7512 ret = ledger_get_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint, &old);
7513
7514 if (ret != KERN_SUCCESS) {
7515 return ret;
7516 }
7517 /**
7518 * Maybe we will need to re-enable the diag threshold, lets get the value
7519 * and the current status
7520 */
7521 diag_threshold_validity = task_check_memorythreshold_is_valid( task, new_limit_bytes >> 20, true);
7522 /**
7523 * If the footprint and diagnostics threshold are going to be same, lets disable the threshold
7524 */
7525 if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7526 ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7527 }
7528
7529 /*
7530 * Check that limit >> 20 will not give an "unexpected" 32-bit
7531 * result. There are, however, implicit assumptions that -1 mb limit
7532 * equates to LEDGER_LIMIT_INFINITY.
7533 */
7534 if (old_limit_bytes) {
7535 *old_limit_bytes = old;
7536 }
7537
7538 if (new_limit_bytes == -1) {
7539 /*
7540 * Caller wishes to remove the limit.
7541 */
7542 ledger_set_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint,
7543 LEDGER_LIMIT_INFINITY);
7544 /*
7545 * If the memory diagnostics flag was disabled, lets enable it again
7546 */
7547 ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7548 return KERN_SUCCESS;
7549 }
7550
7551#ifdef CONFIG_NOMONITORS
7552 return KERN_SUCCESS;
7553#else
7554
7555 task_lock(task);
7556 ledger_set_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint,
7557 (ledger_amount_t)new_limit_bytes );
7558 if (task == current_task()) {
7559 ledger_check_new_balance(current_thread(), task->ledger,
7560 task_ledgers.phys_footprint);
7561 }
7562
7563 task_unlock(task);
7564 if (diag_threshold_validity == THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED) {
7565 ledger_set_diag_mem_threshold_disabled(task->ledger, task_ledgers.phys_footprint);
7566 } else if (diag_threshold_validity == THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED) {
7567 ledger_set_diag_mem_threshold_enabled(task->ledger, task_ledgers.phys_footprint);
7568 }
7569
7570 return KERN_SUCCESS;
7571#endif /* CONFIG_NOMONITORS */
7572}
7573
7574kern_return_t
7575task_get_diag_footprint_limit_internal(
7576 task_t task,
7577 uint64_t *new_limit_bytes,
7578 bool *threshold_disabled)
7579{
7580 ledger_amount_t ledger_limit;
7581 kern_return_t ret = KERN_SUCCESS;
7582 if (new_limit_bytes == NULL || threshold_disabled == NULL) {
7583 return KERN_INVALID_ARGUMENT;
7584 }
7585 ret = ledger_get_diag_mem_threshold(task->ledger, task_ledgers.phys_footprint, &ledger_limit);
7586 if (ledger_limit == LEDGER_LIMIT_INFINITY) {
7587 ledger_limit = -1;
7588 }
7589 if (ret == KERN_SUCCESS) {
7590 *new_limit_bytes = ledger_limit;
7591 ret = ledger_is_diag_threshold_enabled(task->ledger, task_ledgers.phys_footprint, threshold_disabled);
7592 }
7593 return ret;
7594}
7595#endif /* RESETTABLE_DIAG_FOOTPRINT_LIMITS */
7596
7597
7598kern_return_t
7599task_get_phys_footprint_limit(
7600 task_t task,
7601 int *limit_mb)
7602{
7603 ledger_amount_t limit;
7604 kern_return_t ret;
7605
7606 ret = ledger_get_limit(ledger: task->ledger, entry: task_ledgers.phys_footprint, limit: &limit);
7607 if (ret != KERN_SUCCESS) {
7608 return ret;
7609 }
7610
7611 /*
7612 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
7613 * result. There are, however, implicit assumptions that -1 mb limit
7614 * equates to LEDGER_LIMIT_INFINITY.
7615 */
7616 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
7617 *limit_mb = (int)(limit >> 20);
7618
7619 return KERN_SUCCESS;
7620}
7621#else /* CONFIG_MEMORYSTATUS */
7622kern_return_t
7623task_set_phys_footprint_limit(
7624 __unused task_t task,
7625 __unused int new_limit_mb,
7626 __unused int *old_limit_mb)
7627{
7628 return KERN_FAILURE;
7629}
7630
7631kern_return_t
7632task_get_phys_footprint_limit(
7633 __unused task_t task,
7634 __unused int *limit_mb)
7635{
7636 return KERN_FAILURE;
7637}
7638#endif /* CONFIG_MEMORYSTATUS */
7639
7640security_token_t *
7641task_get_sec_token(task_t task)
7642{
7643 return &task_get_ro(t: task)->task_tokens.sec_token;
7644}
7645
7646void
7647task_set_sec_token(task_t task, security_token_t *token)
7648{
7649 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7650 task_tokens.sec_token, token);
7651}
7652
7653audit_token_t *
7654task_get_audit_token(task_t task)
7655{
7656 return &task_get_ro(t: task)->task_tokens.audit_token;
7657}
7658
7659void
7660task_set_audit_token(task_t task, audit_token_t *token)
7661{
7662 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7663 task_tokens.audit_token, token);
7664}
7665
7666void
7667task_set_tokens(task_t task, security_token_t *sec_token, audit_token_t *audit_token)
7668{
7669 struct task_token_ro_data tokens;
7670
7671 tokens = task_get_ro(t: task)->task_tokens;
7672 tokens.sec_token = *sec_token;
7673 tokens.audit_token = *audit_token;
7674
7675 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task), task_tokens,
7676 &tokens);
7677}
7678
7679boolean_t
7680task_is_privileged(task_t task)
7681{
7682 return task_get_sec_token(task)->val[0] == 0;
7683}
7684
7685#ifdef CONFIG_MACF
7686uint8_t *
7687task_get_mach_trap_filter_mask(task_t task)
7688{
7689 return task_get_ro(t: task)->task_filters.mach_trap_filter_mask;
7690}
7691
7692void
7693task_set_mach_trap_filter_mask(task_t task, uint8_t *mask)
7694{
7695 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7696 task_filters.mach_trap_filter_mask, &mask);
7697}
7698
7699uint8_t *
7700task_get_mach_kobj_filter_mask(task_t task)
7701{
7702 return task_get_ro(t: task)->task_filters.mach_kobj_filter_mask;
7703}
7704
7705mach_vm_address_t
7706task_get_all_image_info_addr(task_t task)
7707{
7708 return task->all_image_info_addr;
7709}
7710
7711void
7712task_set_mach_kobj_filter_mask(task_t task, uint8_t *mask)
7713{
7714 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
7715 task_filters.mach_kobj_filter_mask, &mask);
7716}
7717
7718#endif /* CONFIG_MACF */
7719
7720void
7721task_set_thread_limit(task_t task, uint16_t thread_limit)
7722{
7723 assert(task != kernel_task);
7724 if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
7725 task_lock(task);
7726 task->task_thread_limit = thread_limit;
7727 task_unlock(task);
7728 }
7729}
7730
7731#if CONFIG_PROC_RESOURCE_LIMITS
7732kern_return_t
7733task_set_port_space_limits(task_t task, uint32_t soft_limit, uint32_t hard_limit)
7734{
7735 return ipc_space_set_table_size_limits(task->itk_space, soft_limit, hard_limit);
7736}
7737#endif /* CONFIG_PROC_RESOURCE_LIMITS */
7738
7739#if XNU_TARGET_OS_OSX
7740boolean_t
7741task_has_system_version_compat_enabled(task_t task)
7742{
7743 boolean_t enabled = FALSE;
7744
7745 task_lock(task);
7746 enabled = (task->t_flags & TF_SYS_VERSION_COMPAT);
7747 task_unlock(task);
7748
7749 return enabled;
7750}
7751
7752void
7753task_set_system_version_compat_enabled(task_t task, boolean_t enable_system_version_compat)
7754{
7755 assert(task == current_task());
7756 assert(task != kernel_task);
7757
7758 task_lock(task);
7759 if (enable_system_version_compat) {
7760 task->t_flags |= TF_SYS_VERSION_COMPAT;
7761 } else {
7762 task->t_flags &= ~TF_SYS_VERSION_COMPAT;
7763 }
7764 task_unlock(task);
7765}
7766#endif /* XNU_TARGET_OS_OSX */
7767
7768/*
7769 * We need to export some functions to other components that
7770 * are currently implemented in macros within the osfmk
7771 * component. Just export them as functions of the same name.
7772 */
7773boolean_t
7774is_kerneltask(task_t t)
7775{
7776 if (t == kernel_task) {
7777 return TRUE;
7778 }
7779
7780 return FALSE;
7781}
7782
7783boolean_t
7784is_corpsefork(task_t t)
7785{
7786 return task_is_a_corpse_fork(t);
7787}
7788
7789task_t
7790current_task_early(void)
7791{
7792 if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
7793 if (current_thread()->t_tro == NULL) {
7794 return TASK_NULL;
7795 }
7796 }
7797 return get_threadtask(current_thread());
7798}
7799
7800task_t
7801current_task(void)
7802{
7803 return get_threadtask(current_thread());
7804}
7805
7806/* defined in bsd/kern/kern_prot.c */
7807extern int get_audit_token_pid(audit_token_t *audit_token);
7808
7809int
7810task_pid(task_t task)
7811{
7812 if (task) {
7813 return get_audit_token_pid(audit_token: task_get_audit_token(task));
7814 }
7815 return -1;
7816}
7817
7818#if __has_feature(ptrauth_calls)
7819/*
7820 * Get the shared region id and jop signing key for the task.
7821 * The function will allocate a kalloc buffer and return
7822 * it to caller, the caller needs to free it. This is used
7823 * for getting the information via task port.
7824 */
7825char *
7826task_get_vm_shared_region_id_and_jop_pid(task_t task, uint64_t *jop_pid)
7827{
7828 size_t len;
7829 char *shared_region_id = NULL;
7830
7831 task_lock(task);
7832 if (task->shared_region_id == NULL) {
7833 task_unlock(task);
7834 return NULL;
7835 }
7836 len = strlen(task->shared_region_id) + 1;
7837
7838 /* don't hold task lock while allocating */
7839 task_unlock(task);
7840 shared_region_id = kalloc_data(len, Z_WAITOK);
7841 task_lock(task);
7842
7843 if (task->shared_region_id == NULL) {
7844 task_unlock(task);
7845 kfree_data(shared_region_id, len);
7846 return NULL;
7847 }
7848 assert(len == strlen(task->shared_region_id) + 1); /* should never change */
7849 strlcpy(shared_region_id, task->shared_region_id, len);
7850 task_unlock(task);
7851
7852 /* find key from its auth pager */
7853 if (jop_pid != NULL) {
7854 *jop_pid = shared_region_find_key(shared_region_id);
7855 }
7856
7857 return shared_region_id;
7858}
7859
7860/*
7861 * set the shared region id for a task
7862 */
7863void
7864task_set_shared_region_id(task_t task, char *id)
7865{
7866 char *old_id;
7867
7868 task_lock(task);
7869 old_id = task->shared_region_id;
7870 task->shared_region_id = id;
7871 task->shared_region_auth_remapped = FALSE;
7872 task_unlock(task);
7873
7874 /* free any pre-existing shared region id */
7875 if (old_id != NULL) {
7876 shared_region_key_dealloc(old_id);
7877 kfree_data(old_id, strlen(old_id) + 1);
7878 }
7879}
7880#endif /* __has_feature(ptrauth_calls) */
7881
7882/*
7883 * This routine finds a thread in a task by its unique id
7884 * Returns a referenced thread or THREAD_NULL if the thread was not found
7885 *
7886 * TODO: This is super inefficient - it's an O(threads in task) list walk!
7887 * We should make a tid hash, or transition all tid clients to thread ports
7888 *
7889 * Precondition: No locks held (will take task lock)
7890 */
7891thread_t
7892task_findtid(task_t task, uint64_t tid)
7893{
7894 thread_t self = current_thread();
7895 thread_t found_thread = THREAD_NULL;
7896 thread_t iter_thread = THREAD_NULL;
7897
7898 /* Short-circuit the lookup if we're looking up ourselves */
7899 if (tid == self->thread_id || tid == TID_NULL) {
7900 assert(get_threadtask(self) == task);
7901
7902 thread_reference(thread: self);
7903
7904 return self;
7905 }
7906
7907 task_lock(task);
7908
7909 queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
7910 if (iter_thread->thread_id == tid) {
7911 found_thread = iter_thread;
7912 thread_reference(thread: found_thread);
7913 break;
7914 }
7915 }
7916
7917 task_unlock(task);
7918
7919 return found_thread;
7920}
7921
7922int
7923pid_from_task(task_t task)
7924{
7925 int pid = -1;
7926 void *bsd_info = get_bsdtask_info(task);
7927
7928 if (bsd_info) {
7929 pid = proc_pid(p: bsd_info);
7930 } else {
7931 pid = task_pid(task);
7932 }
7933
7934 return pid;
7935}
7936
7937/*
7938 * Control the CPU usage monitor for a task.
7939 */
7940kern_return_t
7941task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
7942{
7943 int error = KERN_SUCCESS;
7944
7945 if (*flags & CPUMON_MAKE_FATAL) {
7946 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
7947 } else {
7948 error = KERN_INVALID_ARGUMENT;
7949 }
7950
7951 return error;
7952}
7953
7954/*
7955 * Control the wakeups monitor for a task.
7956 */
7957kern_return_t
7958task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
7959{
7960 ledger_t ledger = task->ledger;
7961
7962 task_lock(task);
7963 if (*flags & WAKEMON_GET_PARAMS) {
7964 ledger_amount_t limit;
7965 uint64_t period;
7966
7967 ledger_get_limit(ledger, entry: task_ledgers.interrupt_wakeups, limit: &limit);
7968 ledger_get_period(ledger, entry: task_ledgers.interrupt_wakeups, period: &period);
7969
7970 if (limit != LEDGER_LIMIT_INFINITY) {
7971 /*
7972 * An active limit means the wakeups monitor is enabled.
7973 */
7974 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
7975 *flags = WAKEMON_ENABLE;
7976 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
7977 *flags |= WAKEMON_MAKE_FATAL;
7978 }
7979 } else {
7980 *flags = WAKEMON_DISABLE;
7981 *rate_hz = -1;
7982 }
7983
7984 /*
7985 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
7986 */
7987 task_unlock(task);
7988 return KERN_SUCCESS;
7989 }
7990
7991 if (*flags & WAKEMON_ENABLE) {
7992 if (*flags & WAKEMON_SET_DEFAULTS) {
7993 *rate_hz = task_wakeups_monitor_rate;
7994 }
7995
7996#ifndef CONFIG_NOMONITORS
7997 if (*flags & WAKEMON_MAKE_FATAL) {
7998 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
7999 }
8000#endif /* CONFIG_NOMONITORS */
8001
8002 if (*rate_hz <= 0) {
8003 task_unlock(task);
8004 return KERN_INVALID_ARGUMENT;
8005 }
8006
8007#ifndef CONFIG_NOMONITORS
8008 ledger_set_limit(ledger, entry: task_ledgers.interrupt_wakeups, limit: *rate_hz * task_wakeups_monitor_interval,
8009 warn_level_percentage: (uint8_t)task_wakeups_monitor_ustackshots_trigger_pct);
8010 ledger_set_period(ledger, entry: task_ledgers.interrupt_wakeups, period: task_wakeups_monitor_interval * NSEC_PER_SEC);
8011 ledger_enable_callback(ledger, entry: task_ledgers.interrupt_wakeups);
8012#endif /* CONFIG_NOMONITORS */
8013 } else if (*flags & WAKEMON_DISABLE) {
8014 /*
8015 * Caller wishes to disable wakeups monitor on the task.
8016 *
8017 * Disable telemetry if it was triggered by the wakeups monitor, and
8018 * remove the limit & callback on the wakeups ledger entry.
8019 */
8020#if CONFIG_TELEMETRY
8021 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, enable_disable: 0);
8022#endif
8023 ledger_disable_refill(l: ledger, entry: task_ledgers.interrupt_wakeups);
8024 ledger_disable_callback(ledger, entry: task_ledgers.interrupt_wakeups);
8025 }
8026
8027 task_unlock(task);
8028 return KERN_SUCCESS;
8029}
8030
8031void
8032task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
8033{
8034 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
8035#if CONFIG_TELEMETRY
8036 /*
8037 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
8038 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
8039 */
8040 telemetry_task_ctl(task: current_task(), TF_WAKEMON_WARNING, enable_disable: 1);
8041#endif
8042 return;
8043 }
8044
8045#if CONFIG_TELEMETRY
8046 /*
8047 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
8048 * exceeded the limit, turn telemetry off for the task.
8049 */
8050 telemetry_task_ctl(task: current_task(), TF_WAKEMON_WARNING, enable_disable: 0);
8051#endif
8052
8053 if (warning == 0) {
8054 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
8055 }
8056}
8057
8058TUNABLE(bool, enable_wakeup_reports, "enable_wakeup_reports", false); /* Enable wakeup reports. */
8059
8060void __attribute__((noinline))
8061SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
8062{
8063 task_t task = current_task();
8064 int pid = 0;
8065 const char *procname = "unknown";
8066 boolean_t fatal;
8067 kern_return_t kr;
8068#ifdef EXC_RESOURCE_MONITORS
8069 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
8070#endif /* EXC_RESOURCE_MONITORS */
8071 struct ledger_entry_info lei;
8072
8073#ifdef MACH_BSD
8074 pid = proc_selfpid();
8075 if (get_bsdtask_info(task) != NULL) {
8076 procname = proc_name_address(p: get_bsdtask_info(current_task()));
8077 }
8078#endif
8079
8080 ledger_get_entry_info(ledger: task->ledger, entry: task_ledgers.interrupt_wakeups, lei: &lei);
8081
8082 /*
8083 * Disable the exception notification so we don't overwhelm
8084 * the listener with an endless stream of redundant exceptions.
8085 * TODO: detect whether another thread is already reporting the violation.
8086 */
8087 uint32_t flags = WAKEMON_DISABLE;
8088 task_wakeups_monitor_ctl(task, flags: &flags, NULL);
8089
8090 fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
8091 trace_resource_violation(RMON_CPUWAKES_VIOLATED, ledger_info: &lei);
8092 os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
8093 "over ~%llu seconds, averaging %llu wakes / second and "
8094 "violating a %slimit of %llu wakes over %llu seconds.\n",
8095 procname, pid,
8096 lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
8097 lei.lei_last_refill == 0 ? 0 :
8098 (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
8099 fatal ? "FATAL " : "",
8100 lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
8101
8102 if (enable_wakeup_reports) {
8103 kr = send_resource_violation(send_cpu_wakes_violation, violator: task, ledger_info: &lei,
8104 flags: fatal ? kRNFatalLimitFlag : 0);
8105 if (kr) {
8106 printf(format: "send_resource_violation(CPU wakes, ...): error %#x\n", kr);
8107 }
8108 }
8109
8110#ifdef EXC_RESOURCE_MONITORS
8111 if (disable_exc_resource) {
8112 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
8113 "suppressed by a boot-arg\n", procname, pid);
8114 return;
8115 }
8116 if (disable_exc_resource_during_audio && audio_active) {
8117 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
8118 "suppressed due to audio playback\n", procname, pid);
8119 return;
8120 }
8121 if (lei.lei_last_refill == 0) {
8122 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
8123 "suppressed due to lei.lei_last_refill = 0 \n", procname, pid);
8124 }
8125
8126 code[0] = code[1] = 0;
8127 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
8128 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
8129 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
8130 NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
8131 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
8132 lei.lei_last_refill);
8133 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
8134 NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
8135 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
8136#endif /* EXC_RESOURCE_MONITORS */
8137
8138 if (fatal) {
8139 task_terminate_internal(task);
8140 }
8141}
8142
8143static boolean_t
8144global_update_logical_writes(int64_t io_delta, int64_t *global_write_count)
8145{
8146 int64_t old_count, new_count;
8147 boolean_t needs_telemetry;
8148
8149 do {
8150 new_count = old_count = *global_write_count;
8151 new_count += io_delta;
8152 if (new_count >= io_telemetry_limit) {
8153 new_count = 0;
8154 needs_telemetry = TRUE;
8155 } else {
8156 needs_telemetry = FALSE;
8157 }
8158 } while (!OSCompareAndSwap64(old_count, new_count, global_write_count));
8159 return needs_telemetry;
8160}
8161
8162void
8163task_update_physical_writes(__unused task_t task, __unused task_physical_write_flavor_t flavor, __unused uint64_t io_size, __unused task_balance_flags_t flags)
8164{
8165#if CONFIG_PHYS_WRITE_ACCT
8166 if (!io_size) {
8167 return;
8168 }
8169
8170 /*
8171 * task == NULL means that we have to update kernel_task ledgers
8172 */
8173 if (!task) {
8174 task = kernel_task;
8175 }
8176
8177 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PHYS_WRITE_ACCT)) | DBG_FUNC_NONE,
8178 task_pid(task), flavor, io_size, flags, 0);
8179 DTRACE_IO4(physical_writes, struct task *, task, task_physical_write_flavor_t, flavor, uint64_t, io_size, task_balance_flags_t, flags);
8180
8181 if (flags & TASK_BALANCE_CREDIT) {
8182 if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
8183 OSAddAtomic64(io_size, (SInt64 *)&(task->task_fs_metadata_writes));
8184 ledger_credit_nocheck(ledger: task->ledger, entry: task_ledgers.fs_metadata_writes, amount: io_size);
8185 }
8186 } else if (flags & TASK_BALANCE_DEBIT) {
8187 if (flavor == TASK_PHYSICAL_WRITE_METADATA) {
8188 OSAddAtomic64(-1 * io_size, (SInt64 *)&(task->task_fs_metadata_writes));
8189 ledger_debit_nocheck(ledger: task->ledger, entry: task_ledgers.fs_metadata_writes, amount: io_size);
8190 }
8191 }
8192#endif /* CONFIG_PHYS_WRITE_ACCT */
8193}
8194
8195void
8196task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
8197{
8198 int64_t io_delta = 0;
8199 int64_t * global_counter_to_update;
8200 boolean_t needs_telemetry = FALSE;
8201 boolean_t is_external_device = FALSE;
8202 int ledger_to_update = 0;
8203 struct task_writes_counters * writes_counters_to_update;
8204
8205 if ((!task) || (!io_size) || (!vp)) {
8206 return;
8207 }
8208
8209 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
8210 task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
8211 DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
8212
8213 // Is the drive backing this vnode internal or external to the system?
8214 if (vnode_isonexternalstorage(vp) == false) {
8215 global_counter_to_update = &global_logical_writes_count;
8216 ledger_to_update = task_ledgers.logical_writes;
8217 writes_counters_to_update = &task->task_writes_counters_internal;
8218 is_external_device = FALSE;
8219 } else {
8220 global_counter_to_update = &global_logical_writes_to_external_count;
8221 ledger_to_update = task_ledgers.logical_writes_to_external;
8222 writes_counters_to_update = &task->task_writes_counters_external;
8223 is_external_device = TRUE;
8224 }
8225
8226 switch (flags) {
8227 case TASK_WRITE_IMMEDIATE:
8228 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_immediate_writes));
8229 ledger_credit(ledger: task->ledger, entry: ledger_to_update, amount: io_size);
8230 if (!is_external_device) {
8231 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
8232 }
8233 break;
8234 case TASK_WRITE_DEFERRED:
8235 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_deferred_writes));
8236 ledger_credit(ledger: task->ledger, entry: ledger_to_update, amount: io_size);
8237 if (!is_external_device) {
8238 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
8239 }
8240 break;
8241 case TASK_WRITE_INVALIDATED:
8242 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_invalidated_writes));
8243 ledger_debit(ledger: task->ledger, entry: ledger_to_update, amount: io_size);
8244 if (!is_external_device) {
8245 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, FALSE, io_size);
8246 }
8247 break;
8248 case TASK_WRITE_METADATA:
8249 OSAddAtomic64(io_size, (SInt64 *)&(writes_counters_to_update->task_metadata_writes));
8250 ledger_credit(ledger: task->ledger, entry: ledger_to_update, amount: io_size);
8251 if (!is_external_device) {
8252 coalition_io_ledger_update(task, FLAVOR_IO_LOGICAL_WRITES, TRUE, io_size);
8253 }
8254 break;
8255 }
8256
8257 io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
8258 if (io_telemetry_limit != 0) {
8259 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
8260 needs_telemetry = global_update_logical_writes(io_delta, global_write_count: global_counter_to_update);
8261 if (needs_telemetry && !is_external_device) {
8262 act_set_io_telemetry_ast(current_thread());
8263 }
8264 }
8265}
8266
8267/*
8268 * Control the I/O monitor for a task.
8269 */
8270kern_return_t
8271task_io_monitor_ctl(task_t task, uint32_t *flags)
8272{
8273 ledger_t ledger = task->ledger;
8274
8275 task_lock(task);
8276 if (*flags & IOMON_ENABLE) {
8277 /* Configure the physical I/O ledger */
8278 ledger_set_limit(ledger, entry: task_ledgers.physical_writes, limit: (task_iomon_limit_mb * 1024 * 1024), warn_level_percentage: 0);
8279 ledger_set_period(ledger, entry: task_ledgers.physical_writes, period: (task_iomon_interval_secs * NSEC_PER_SEC));
8280 } else if (*flags & IOMON_DISABLE) {
8281 /*
8282 * Caller wishes to disable I/O monitor on the task.
8283 */
8284 ledger_disable_refill(l: ledger, entry: task_ledgers.physical_writes);
8285 ledger_disable_callback(ledger, entry: task_ledgers.physical_writes);
8286 }
8287
8288 task_unlock(task);
8289 return KERN_SUCCESS;
8290}
8291
8292void
8293task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
8294{
8295 if (warning == 0) {
8296 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(flavor: (int)param0);
8297 }
8298}
8299
8300void __attribute__((noinline))
8301SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
8302{
8303 int pid = 0;
8304 task_t task = current_task();
8305#ifdef EXC_RESOURCE_MONITORS
8306 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
8307#endif /* EXC_RESOURCE_MONITORS */
8308 struct ledger_entry_info lei = {};
8309 kern_return_t kr;
8310
8311#ifdef MACH_BSD
8312 pid = proc_selfpid();
8313#endif
8314 /*
8315 * Get the ledger entry info. We need to do this before disabling the exception
8316 * to get correct values for all fields.
8317 */
8318 switch (flavor) {
8319 case FLAVOR_IO_PHYSICAL_WRITES:
8320 ledger_get_entry_info(ledger: task->ledger, entry: task_ledgers.physical_writes, lei: &lei);
8321 break;
8322 }
8323
8324
8325 /*
8326 * Disable the exception notification so we don't overwhelm
8327 * the listener with an endless stream of redundant exceptions.
8328 * TODO: detect whether another thread is already reporting the violation.
8329 */
8330 uint32_t flags = IOMON_DISABLE;
8331 task_io_monitor_ctl(task, flags: &flags);
8332
8333 if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
8334 trace_resource_violation(RMON_LOGWRITES_VIOLATED, ledger_info: &lei);
8335 }
8336 os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
8337 pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
8338
8339 kr = send_resource_violation(send_disk_writes_violation, violator: task, ledger_info: &lei, kRNFlagsNone);
8340 if (kr) {
8341 printf(format: "send_resource_violation(disk_writes, ...): error %#x\n", kr);
8342 }
8343
8344#ifdef EXC_RESOURCE_MONITORS
8345 code[0] = code[1] = 0;
8346 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
8347 EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
8348 EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
8349 EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
8350 EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
8351 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
8352#endif /* EXC_RESOURCE_MONITORS */
8353}
8354
8355void
8356task_port_space_ast(__unused task_t task)
8357{
8358 uint32_t current_size, soft_limit, hard_limit;
8359 assert(task == current_task());
8360 bool should_notify = ipc_space_check_table_size_limit(space: task->itk_space,
8361 current_limit: &current_size, soft_limit: &soft_limit, hard_limit: &hard_limit);
8362 if (should_notify) {
8363 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task, current_size, soft_limit, hard_limit);
8364 }
8365}
8366
8367#if CONFIG_PROC_RESOURCE_LIMITS
8368static mach_port_t
8369task_allocate_fatal_port(void)
8370{
8371 mach_port_t task_fatal_port = MACH_PORT_NULL;
8372 task_id_token_t token;
8373
8374 kern_return_t kr = task_create_identity_token(current_task(), &token); /* Takes a reference on the token */
8375 if (kr) {
8376 return MACH_PORT_NULL;
8377 }
8378 task_fatal_port = ipc_kobject_alloc_port((ipc_kobject_t)token, IKOT_TASK_FATAL,
8379 IPC_KOBJECT_ALLOC_NSREQUEST | IPC_KOBJECT_ALLOC_MAKE_SEND);
8380
8381 task_id_token_set_port(token, task_fatal_port);
8382
8383 return task_fatal_port;
8384}
8385
8386static void
8387task_fatal_port_no_senders(ipc_port_t port, __unused mach_port_mscount_t mscount)
8388{
8389 task_t task = TASK_NULL;
8390 kern_return_t kr;
8391
8392 task_id_token_t token = ipc_kobject_get_stable(port, IKOT_TASK_FATAL);
8393
8394 assert(token != NULL);
8395 if (token) {
8396 kr = task_identity_token_get_task_grp(token, &task, TASK_GRP_KERNEL); /* takes a reference on task */
8397 if (task) {
8398 task_bsdtask_kill(task);
8399 task_deallocate(task);
8400 }
8401 task_id_token_release(token); /* consumes ref given by notification */
8402 }
8403}
8404#endif /* CONFIG_PROC_RESOURCE_LIMITS */
8405
8406void __attribute__((noinline))
8407SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_MACH_PORTS(task_t task, uint32_t current_size, uint32_t soft_limit, uint32_t hard_limit)
8408{
8409 int pid = 0;
8410 char *procname = (char *) "unknown";
8411 __unused kern_return_t kr;
8412 __unused resource_notify_flags_t flags = kRNFlagsNone;
8413 __unused uint32_t limit;
8414 __unused mach_port_t task_fatal_port = MACH_PORT_NULL;
8415 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
8416
8417 pid = proc_selfpid();
8418 if (get_bsdtask_info(task) != NULL) {
8419 procname = proc_name_address(p: get_bsdtask_info(task));
8420 }
8421
8422 /*
8423 * Only kernel_task and launchd may be allowed to
8424 * have really large ipc space.
8425 */
8426 if (pid == 0 || pid == 1) {
8427 return;
8428 }
8429
8430 os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many mach ports. \
8431 Num of ports allocated %u; \n", procname, pid, current_size);
8432
8433 /* Abort the process if it has hit the system-wide limit for ipc port table size */
8434 if (!hard_limit && !soft_limit) {
8435 code[0] = code[1] = 0;
8436 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_PORTS);
8437 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_PORT_SPACE_FULL);
8438 EXC_RESOURCE_PORTS_ENCODE_PORTS(code[0], current_size);
8439
8440 exit_with_port_space_exception(proc: current_proc(), code: code[0], subcode: code[1]);
8441
8442 return;
8443 }
8444
8445#if CONFIG_PROC_RESOURCE_LIMITS
8446 if (hard_limit > 0) {
8447 flags |= kRNHardLimitFlag;
8448 limit = hard_limit;
8449 task_fatal_port = task_allocate_fatal_port();
8450 if (!task_fatal_port) {
8451 os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
8452 task_bsdtask_kill(task);
8453 }
8454 } else {
8455 flags |= kRNSoftLimitFlag;
8456 limit = soft_limit;
8457 }
8458
8459 kr = send_resource_violation_with_fatal_port(send_port_space_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
8460 if (kr) {
8461 os_log(OS_LOG_DEFAULT, "send_resource_violation(ports, ...): error %#x\n", kr);
8462 }
8463 if (task_fatal_port) {
8464 ipc_port_release_send(task_fatal_port);
8465 }
8466#endif /* CONFIG_PROC_RESOURCE_LIMITS */
8467}
8468
8469#if CONFIG_PROC_RESOURCE_LIMITS
8470void
8471task_kqworkloop_ast(task_t task, int current_size, int soft_limit, int hard_limit)
8472{
8473 assert(task == current_task());
8474 return SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_KQWORKLOOPS(task, current_size, soft_limit, hard_limit);
8475}
8476
8477void __attribute__((noinline))
8478SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_KQWORKLOOPS(task_t task, int current_size, int soft_limit, int hard_limit)
8479{
8480 int pid = 0;
8481 char *procname = (char *) "unknown";
8482#ifdef MACH_BSD
8483 pid = proc_selfpid();
8484 if (get_bsdtask_info(task) != NULL) {
8485 procname = proc_name_address(get_bsdtask_info(task));
8486 }
8487#endif
8488 if (pid == 0 || pid == 1) {
8489 return;
8490 }
8491
8492 os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many kqworkloops. \
8493 Num of kqworkloops allocated %u; \n", procname, pid, current_size);
8494
8495 int limit = 0;
8496 resource_notify_flags_t flags = kRNFlagsNone;
8497 mach_port_t task_fatal_port = MACH_PORT_NULL;
8498 if (hard_limit) {
8499 flags |= kRNHardLimitFlag;
8500 limit = hard_limit;
8501
8502 task_fatal_port = task_allocate_fatal_port();
8503 if (task_fatal_port == MACH_PORT_NULL) {
8504 os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
8505 task_bsdtask_kill(task);
8506 }
8507 } else {
8508 flags |= kRNSoftLimitFlag;
8509 limit = soft_limit;
8510 }
8511
8512 kern_return_t kr;
8513 kr = send_resource_violation_with_fatal_port(send_kqworkloops_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
8514 if (kr) {
8515 os_log(OS_LOG_DEFAULT, "send_resource_violation_with_fatal_port(kqworkloops, ...): error %#x\n", kr);
8516 }
8517 if (task_fatal_port) {
8518 ipc_port_release_send(task_fatal_port);
8519 }
8520}
8521
8522
8523void
8524task_filedesc_ast(__unused task_t task, __unused int current_size, __unused int soft_limit, __unused int hard_limit)
8525{
8526 assert(task == current_task());
8527 SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task, current_size, soft_limit, hard_limit);
8528}
8529
8530void __attribute__((noinline))
8531SENDING_NOTIFICATION__THIS_PROCESS_HAS_TOO_MANY_FILE_DESCRIPTORS(task_t task, int current_size, int soft_limit, int hard_limit)
8532{
8533 int pid = 0;
8534 char *procname = (char *) "unknown";
8535 kern_return_t kr;
8536 resource_notify_flags_t flags = kRNFlagsNone;
8537 int limit;
8538 mach_port_t task_fatal_port = MACH_PORT_NULL;
8539
8540#ifdef MACH_BSD
8541 pid = proc_selfpid();
8542 if (get_bsdtask_info(task) != NULL) {
8543 procname = proc_name_address(get_bsdtask_info(task));
8544 }
8545#endif
8546 /*
8547 * Only kernel_task and launchd may be allowed to
8548 * have really large ipc space.
8549 */
8550 if (pid == 0 || pid == 1) {
8551 return;
8552 }
8553
8554 os_log(OS_LOG_DEFAULT, "process %s[%d] caught allocating too many file descriptors. \
8555 Num of fds allocated %u; \n", procname, pid, current_size);
8556
8557 if (hard_limit > 0) {
8558 flags |= kRNHardLimitFlag;
8559 limit = hard_limit;
8560 task_fatal_port = task_allocate_fatal_port();
8561 if (!task_fatal_port) {
8562 os_log(OS_LOG_DEFAULT, "process %s[%d] Unable to create task token ident object", procname, pid);
8563 task_bsdtask_kill(task);
8564 }
8565 } else {
8566 flags |= kRNSoftLimitFlag;
8567 limit = soft_limit;
8568 }
8569
8570 kr = send_resource_violation_with_fatal_port(send_file_descriptors_violation, task, (int64_t)current_size, (int64_t)limit, task_fatal_port, flags);
8571 if (kr) {
8572 os_log(OS_LOG_DEFAULT, "send_resource_violation_with_fatal_port(filedesc, ...): error %#x\n", kr);
8573 }
8574 if (task_fatal_port) {
8575 ipc_port_release_send(task_fatal_port);
8576 }
8577}
8578#endif /* CONFIG_PROC_RESOURCE_LIMITS */
8579
8580/* Placeholders for the task set/get voucher interfaces */
8581kern_return_t
8582task_get_mach_voucher(
8583 task_t task,
8584 mach_voucher_selector_t __unused which,
8585 ipc_voucher_t *voucher)
8586{
8587 if (TASK_NULL == task) {
8588 return KERN_INVALID_TASK;
8589 }
8590
8591 *voucher = NULL;
8592 return KERN_SUCCESS;
8593}
8594
8595kern_return_t
8596task_set_mach_voucher(
8597 task_t task,
8598 ipc_voucher_t __unused voucher)
8599{
8600 if (TASK_NULL == task) {
8601 return KERN_INVALID_TASK;
8602 }
8603
8604 return KERN_SUCCESS;
8605}
8606
8607kern_return_t
8608task_swap_mach_voucher(
8609 __unused task_t task,
8610 __unused ipc_voucher_t new_voucher,
8611 ipc_voucher_t *in_out_old_voucher)
8612{
8613 /*
8614 * Currently this function is only called from a MIG generated
8615 * routine which doesn't release the reference on the voucher
8616 * addressed by in_out_old_voucher. To avoid leaking this reference,
8617 * a call to release it has been added here.
8618 */
8619 ipc_voucher_release(voucher: *in_out_old_voucher);
8620 OS_ANALYZER_SUPPRESS("81787115") return KERN_NOT_SUPPORTED;
8621}
8622
8623void
8624task_set_gpu_denied(task_t task, boolean_t denied)
8625{
8626 task_lock(task);
8627
8628 if (denied) {
8629 task->t_flags |= TF_GPU_DENIED;
8630 } else {
8631 task->t_flags &= ~TF_GPU_DENIED;
8632 }
8633
8634 task_unlock(task);
8635}
8636
8637boolean_t
8638task_is_gpu_denied(task_t task)
8639{
8640 /* We don't need the lock to read this flag */
8641 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
8642}
8643
8644/*
8645 * Task policy termination uses this path to clear the bit the final time
8646 * during the termination flow, and the TASK_POLICY_TERMINATED bit guarantees
8647 * that it won't be changed again on a terminated task.
8648 */
8649bool
8650task_set_game_mode_locked(task_t task, bool enabled)
8651{
8652 task_lock_assert_owned(task);
8653
8654 if (enabled) {
8655 assert(proc_get_effective_task_policy(task, TASK_POLICY_TERMINATED) == 0);
8656 }
8657
8658 bool previously_enabled = task_get_game_mode(task);
8659 bool needs_update = false;
8660 uint32_t new_count = 0;
8661
8662 if (enabled) {
8663 task->t_flags |= TF_GAME_MODE;
8664 } else {
8665 task->t_flags &= ~TF_GAME_MODE;
8666 }
8667
8668 if (enabled && !previously_enabled) {
8669 if (task_coalition_adjust_game_mode_count(task, count: 1, new_count: &new_count) && (new_count == 1)) {
8670 needs_update = true;
8671 }
8672 } else if (!enabled && previously_enabled) {
8673 if (task_coalition_adjust_game_mode_count(task, count: -1, new_count: &new_count) && (new_count == 0)) {
8674 needs_update = true;
8675 }
8676 }
8677
8678 return needs_update;
8679}
8680
8681void
8682task_set_game_mode(task_t task, bool enabled)
8683{
8684 bool needs_update = false;
8685
8686 task_lock(task);
8687
8688 /* After termination, further updates are no longer effective */
8689 if (proc_get_effective_task_policy(task, TASK_POLICY_TERMINATED) == 0) {
8690 needs_update = task_set_game_mode_locked(task, enabled);
8691 }
8692
8693 task_unlock(task);
8694
8695#if CONFIG_THREAD_GROUPS
8696 if (needs_update) {
8697 task_coalition_thread_group_game_mode_update(task);
8698 }
8699#endif /* CONFIG_THREAD_GROUPS */
8700}
8701
8702bool
8703task_get_game_mode(task_t task)
8704{
8705 /* We don't need the lock to read this flag */
8706 return task->t_flags & TF_GAME_MODE;
8707}
8708
8709
8710uint64_t
8711get_task_memory_region_count(task_t task)
8712{
8713 vm_map_t map;
8714 map = (task == kernel_task) ? kernel_map: task->map;
8715 return (uint64_t)get_map_nentries(map);
8716}
8717
8718static void
8719kdebug_trace_dyld_internal(uint32_t base_code,
8720 struct dyld_kernel_image_info *info)
8721{
8722 static_assert(sizeof(info->uuid) >= 16);
8723
8724#if defined(__LP64__)
8725 uint64_t *uuid = (uint64_t *)&(info->uuid);
8726
8727 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8728 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
8729 uuid[1], info->load_addr,
8730 (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
8731 0);
8732 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8733 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
8734 (uint64_t)info->fsobjid.fid_objno |
8735 ((uint64_t)info->fsobjid.fid_generation << 32),
8736 0, 0, 0, 0);
8737#else /* defined(__LP64__) */
8738 uint32_t *uuid = (uint32_t *)&(info->uuid);
8739
8740 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8741 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
8742 uuid[1], uuid[2], uuid[3], 0);
8743 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8744 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
8745 (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
8746 info->fsobjid.fid_objno, 0);
8747 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
8748 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
8749 info->fsobjid.fid_generation, 0, 0, 0, 0);
8750#endif /* !defined(__LP64__) */
8751}
8752
8753static kern_return_t
8754kdebug_trace_dyld(task_t task, uint32_t base_code,
8755 vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
8756{
8757 kern_return_t kr;
8758 dyld_kernel_image_info_array_t infos;
8759 vm_map_offset_t map_data;
8760 vm_offset_t data;
8761
8762 if (!infos_copy) {
8763 return KERN_INVALID_ADDRESS;
8764 }
8765
8766 if (!kdebug_enable ||
8767 !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0))) {
8768 vm_map_copy_discard(copy: infos_copy);
8769 return KERN_SUCCESS;
8770 }
8771
8772 if (task == NULL || task != current_task()) {
8773 return KERN_INVALID_TASK;
8774 }
8775
8776 kr = vm_map_copyout(dst_map: ipc_kernel_map, dst_addr: &map_data, copy: (vm_map_copy_t)infos_copy);
8777 if (kr != KERN_SUCCESS) {
8778 return kr;
8779 }
8780
8781 infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
8782
8783 for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
8784 kdebug_trace_dyld_internal(base_code, info: &(infos[i]));
8785 }
8786
8787 data = CAST_DOWN(vm_offset_t, map_data);
8788 mach_vm_deallocate(target: ipc_kernel_map, address: data, size: infos_len * sizeof(infos[0]));
8789 return KERN_SUCCESS;
8790}
8791
8792kern_return_t
8793task_register_dyld_image_infos(task_t task,
8794 dyld_kernel_image_info_array_t infos_copy,
8795 mach_msg_type_number_t infos_len)
8796{
8797 return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
8798 infos_copy: (vm_map_copy_t)infos_copy, infos_len);
8799}
8800
8801kern_return_t
8802task_unregister_dyld_image_infos(task_t task,
8803 dyld_kernel_image_info_array_t infos_copy,
8804 mach_msg_type_number_t infos_len)
8805{
8806 return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
8807 infos_copy: (vm_map_copy_t)infos_copy, infos_len);
8808}
8809
8810kern_return_t
8811task_get_dyld_image_infos(__unused task_t task,
8812 __unused dyld_kernel_image_info_array_t * dyld_images,
8813 __unused mach_msg_type_number_t * dyld_imagesCnt)
8814{
8815 return KERN_NOT_SUPPORTED;
8816}
8817
8818kern_return_t
8819task_register_dyld_shared_cache_image_info(task_t task,
8820 dyld_kernel_image_info_t cache_img,
8821 __unused boolean_t no_cache,
8822 __unused boolean_t private_cache)
8823{
8824 if (task == NULL || task != current_task()) {
8825 return KERN_INVALID_TASK;
8826 }
8827
8828 kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, info: &cache_img);
8829 return KERN_SUCCESS;
8830}
8831
8832kern_return_t
8833task_register_dyld_set_dyld_state(__unused task_t task,
8834 __unused uint8_t dyld_state)
8835{
8836 return KERN_NOT_SUPPORTED;
8837}
8838
8839kern_return_t
8840task_register_dyld_get_process_state(__unused task_t task,
8841 __unused dyld_kernel_process_info_t * dyld_process_state)
8842{
8843 return KERN_NOT_SUPPORTED;
8844}
8845
8846kern_return_t
8847task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
8848 task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
8849{
8850#if CONFIG_PERVASIVE_CPI
8851 task_t task = (task_t)task_insp;
8852 kern_return_t kr = KERN_SUCCESS;
8853 mach_msg_type_number_t size;
8854
8855 if (task == TASK_NULL) {
8856 return KERN_INVALID_ARGUMENT;
8857 }
8858
8859 size = *size_in_out;
8860
8861 switch (flavor) {
8862 case TASK_INSPECT_BASIC_COUNTS: {
8863 struct task_inspect_basic_counts *bc =
8864 (struct task_inspect_basic_counts *)info_out;
8865 struct recount_usage stats = { 0 };
8866 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
8867 kr = KERN_INVALID_ARGUMENT;
8868 break;
8869 }
8870
8871 recount_sum(&recount_task_plan, task->tk_recount.rtk_lifetime, &stats);
8872 bc->instructions = recount_usage_instructions(&stats);
8873 bc->cycles = recount_usage_cycles(&stats);
8874 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
8875 break;
8876 }
8877 default:
8878 kr = KERN_INVALID_ARGUMENT;
8879 break;
8880 }
8881
8882 if (kr == KERN_SUCCESS) {
8883 *size_in_out = size;
8884 }
8885 return kr;
8886#else /* CONFIG_PERVASIVE_CPI */
8887#pragma unused(task_insp, flavor, info_out, size_in_out)
8888 return KERN_NOT_SUPPORTED;
8889#endif /* !CONFIG_PERVASIVE_CPI */
8890}
8891
8892#if CONFIG_SECLUDED_MEMORY
8893int num_tasks_can_use_secluded_mem = 0;
8894
8895void
8896task_set_can_use_secluded_mem(
8897 task_t task,
8898 boolean_t can_use_secluded_mem)
8899{
8900 if (!task->task_could_use_secluded_mem) {
8901 return;
8902 }
8903 task_lock(task);
8904 task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
8905 task_unlock(task);
8906}
8907
8908void
8909task_set_can_use_secluded_mem_locked(
8910 task_t task,
8911 boolean_t can_use_secluded_mem)
8912{
8913 assert(task->task_could_use_secluded_mem);
8914 if (can_use_secluded_mem &&
8915 secluded_for_apps && /* global boot-arg */
8916 !task->task_can_use_secluded_mem) {
8917 assert(num_tasks_can_use_secluded_mem >= 0);
8918 OSAddAtomic(+1,
8919 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8920 task->task_can_use_secluded_mem = TRUE;
8921 } else if (!can_use_secluded_mem &&
8922 task->task_can_use_secluded_mem) {
8923 assert(num_tasks_can_use_secluded_mem > 0);
8924 OSAddAtomic(-1,
8925 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
8926 task->task_can_use_secluded_mem = FALSE;
8927 }
8928}
8929
8930void
8931task_set_could_use_secluded_mem(
8932 task_t task,
8933 boolean_t could_use_secluded_mem)
8934{
8935 task->task_could_use_secluded_mem = !!could_use_secluded_mem;
8936}
8937
8938void
8939task_set_could_also_use_secluded_mem(
8940 task_t task,
8941 boolean_t could_also_use_secluded_mem)
8942{
8943 task->task_could_also_use_secluded_mem = !!could_also_use_secluded_mem;
8944}
8945
8946boolean_t
8947task_can_use_secluded_mem(
8948 task_t task,
8949 boolean_t is_alloc)
8950{
8951 if (task->task_can_use_secluded_mem) {
8952 assert(task->task_could_use_secluded_mem);
8953 assert(num_tasks_can_use_secluded_mem > 0);
8954 return TRUE;
8955 }
8956 if (task->task_could_also_use_secluded_mem &&
8957 num_tasks_can_use_secluded_mem > 0) {
8958 assert(num_tasks_can_use_secluded_mem > 0);
8959 return TRUE;
8960 }
8961
8962 /*
8963 * If a single task is using more than some large amount of
8964 * memory (i.e. secluded_shutoff_trigger) and is approaching
8965 * its task limit, allow it to dip into secluded and begin
8966 * suppression of rebuilding secluded memory until that task exits.
8967 */
8968 if (is_alloc && secluded_shutoff_trigger != 0) {
8969 uint64_t phys_used = get_task_phys_footprint(task);
8970 uint64_t limit = get_task_phys_footprint_limit(task);
8971 if (phys_used > secluded_shutoff_trigger &&
8972 limit > secluded_shutoff_trigger &&
8973 phys_used > limit - secluded_shutoff_headroom) {
8974 start_secluded_suppression(task);
8975 return TRUE;
8976 }
8977 }
8978
8979 return FALSE;
8980}
8981
8982boolean_t
8983task_could_use_secluded_mem(
8984 task_t task)
8985{
8986 return task->task_could_use_secluded_mem;
8987}
8988
8989boolean_t
8990task_could_also_use_secluded_mem(
8991 task_t task)
8992{
8993 return task->task_could_also_use_secluded_mem;
8994}
8995#endif /* CONFIG_SECLUDED_MEMORY */
8996
8997queue_head_t *
8998task_io_user_clients(task_t task)
8999{
9000 return &task->io_user_clients;
9001}
9002
9003void
9004task_set_message_app_suspended(task_t task, boolean_t enable)
9005{
9006 task->message_app_suspended = enable;
9007}
9008
9009void
9010task_copy_fields_for_exec(task_t dst_task, task_t src_task)
9011{
9012 dst_task->vtimers = src_task->vtimers;
9013}
9014
9015#if DEVELOPMENT || DEBUG
9016int vm_region_footprint = 0;
9017#endif /* DEVELOPMENT || DEBUG */
9018
9019boolean_t
9020task_self_region_footprint(void)
9021{
9022#if DEVELOPMENT || DEBUG
9023 if (vm_region_footprint) {
9024 /* system-wide override */
9025 return TRUE;
9026 }
9027#endif /* DEVELOPMENT || DEBUG */
9028 return current_task()->task_region_footprint;
9029}
9030
9031void
9032task_self_region_footprint_set(
9033 boolean_t newval)
9034{
9035 task_t curtask;
9036
9037 curtask = current_task();
9038 task_lock(task: curtask);
9039 if (newval) {
9040 curtask->task_region_footprint = TRUE;
9041 } else {
9042 curtask->task_region_footprint = FALSE;
9043 }
9044 task_unlock(task: curtask);
9045}
9046
9047void
9048task_set_darkwake_mode(task_t task, boolean_t set_mode)
9049{
9050 assert(task);
9051
9052 task_lock(task);
9053
9054 if (set_mode) {
9055 task->t_flags |= TF_DARKWAKE_MODE;
9056 } else {
9057 task->t_flags &= ~(TF_DARKWAKE_MODE);
9058 }
9059
9060 task_unlock(task);
9061}
9062
9063boolean_t
9064task_get_darkwake_mode(task_t task)
9065{
9066 assert(task);
9067 return (task->t_flags & TF_DARKWAKE_MODE) != 0;
9068}
9069
9070/*
9071 * Set default behavior for task's control port and EXC_GUARD variants that have
9072 * settable behavior.
9073 *
9074 * Platform binaries typically have one behavior, third parties another -
9075 * but there are special exception we may need to account for.
9076 */
9077void
9078task_set_exc_guard_ctrl_port_default(
9079 task_t task,
9080 thread_t main_thread,
9081 const char *name,
9082 unsigned int namelen,
9083 boolean_t is_simulated,
9084 uint32_t platform,
9085 uint32_t sdk)
9086{
9087 task_control_port_options_t opts = TASK_CONTROL_PORT_OPTIONS_NONE;
9088
9089 if (task_is_hardened_binary(task)) {
9090 /* set exc guard default behavior for hardened binaries */
9091 task->task_exc_guard = (task_exc_guard_default & TASK_EXC_GUARD_ALL);
9092
9093 if (1 == task_pid(task)) {
9094 /* special flags for inittask - delivery every instance as corpse */
9095 task->task_exc_guard = _TASK_EXC_GUARD_ALL_CORPSE;
9096 } else if (task_exc_guard_default & TASK_EXC_GUARD_HONOR_NAMED_DEFAULTS) {
9097 /* honor by-name default setting overrides */
9098
9099 int count = sizeof(task_exc_guard_named_defaults) / sizeof(struct task_exc_guard_named_default);
9100
9101 for (int i = 0; i < count; i++) {
9102 const struct task_exc_guard_named_default *named_default =
9103 &task_exc_guard_named_defaults[i];
9104 if (strncmp(s1: named_default->name, s2: name, n: namelen) == 0 &&
9105 strlen(s: named_default->name) == namelen) {
9106 task->task_exc_guard = named_default->behavior;
9107 break;
9108 }
9109 }
9110 }
9111
9112 /* set control port options for 1p code, inherited from parent task by default */
9113 opts = ipc_control_port_options & ICP_OPTIONS_1P_MASK;
9114 } else {
9115 /* set exc guard default behavior for third-party code */
9116 task->task_exc_guard = ((task_exc_guard_default >> TASK_EXC_GUARD_THIRD_PARTY_DEFAULT_SHIFT) & TASK_EXC_GUARD_ALL);
9117 /* set control port options for 3p code, inherited from parent task by default */
9118 opts = (ipc_control_port_options & ICP_OPTIONS_3P_MASK) >> ICP_OPTIONS_3P_SHIFT;
9119 }
9120
9121 if (is_simulated) {
9122 /* If simulated and built against pre-iOS 15 SDK, disable all EXC_GUARD */
9123 if ((platform == PLATFORM_IOSSIMULATOR && sdk < 0xf0000) ||
9124 (platform == PLATFORM_TVOSSIMULATOR && sdk < 0xf0000) ||
9125 (platform == PLATFORM_WATCHOSSIMULATOR && sdk < 0x80000)) {
9126 task->task_exc_guard = TASK_EXC_GUARD_NONE;
9127 }
9128 /* Disable protection for control ports for simulated binaries */
9129 opts = TASK_CONTROL_PORT_OPTIONS_NONE;
9130 }
9131
9132
9133 task_set_control_port_options(task, opts);
9134
9135 task_set_immovable_pinned(task);
9136 main_thread_set_immovable_pinned(thread: main_thread);
9137}
9138
9139kern_return_t
9140task_get_exc_guard_behavior(
9141 task_t task,
9142 task_exc_guard_behavior_t *behaviorp)
9143{
9144 if (task == TASK_NULL) {
9145 return KERN_INVALID_TASK;
9146 }
9147 *behaviorp = task->task_exc_guard;
9148 return KERN_SUCCESS;
9149}
9150
9151kern_return_t
9152task_set_exc_guard_behavior(
9153 task_t task,
9154 task_exc_guard_behavior_t new_behavior)
9155{
9156 if (task == TASK_NULL) {
9157 return KERN_INVALID_TASK;
9158 }
9159 if (new_behavior & ~TASK_EXC_GUARD_ALL) {
9160 return KERN_INVALID_VALUE;
9161 }
9162
9163 /* limit setting to that allowed for this config */
9164 new_behavior = new_behavior & task_exc_guard_config_mask;
9165
9166#if !defined (DEBUG) && !defined (DEVELOPMENT)
9167 /* On release kernels, only allow _upgrading_ exc guard behavior */
9168 task_exc_guard_behavior_t cur_behavior;
9169
9170 os_atomic_rmw_loop(&task->task_exc_guard, cur_behavior, new_behavior, relaxed, {
9171 if ((cur_behavior & task_exc_guard_no_unset_mask) & ~(new_behavior & task_exc_guard_no_unset_mask)) {
9172 os_atomic_rmw_loop_give_up(return KERN_DENIED);
9173 }
9174
9175 if ((new_behavior & task_exc_guard_no_set_mask) & ~(cur_behavior & task_exc_guard_no_set_mask)) {
9176 os_atomic_rmw_loop_give_up(return KERN_DENIED);
9177 }
9178
9179 /* no restrictions on CORPSE bit */
9180 });
9181#else
9182 task->task_exc_guard = new_behavior;
9183#endif
9184 return KERN_SUCCESS;
9185}
9186
9187kern_return_t
9188task_set_corpse_forking_behavior(task_t task, task_corpse_forking_behavior_t behavior)
9189{
9190#if DEVELOPMENT || DEBUG
9191 if (task == TASK_NULL) {
9192 return KERN_INVALID_TASK;
9193 }
9194
9195 task_lock(task);
9196 if (behavior & TASK_CORPSE_FORKING_DISABLED_MEM_DIAG) {
9197 task->t_flags |= TF_NO_CORPSE_FORKING;
9198 } else {
9199 task->t_flags &= ~TF_NO_CORPSE_FORKING;
9200 }
9201 task_unlock(task);
9202
9203 return KERN_SUCCESS;
9204#else
9205 (void)task;
9206 (void)behavior;
9207 return KERN_NOT_SUPPORTED;
9208#endif
9209}
9210
9211boolean_t
9212task_corpse_forking_disabled(task_t task)
9213{
9214 boolean_t disabled = FALSE;
9215
9216 task_lock(task);
9217 disabled = (task->t_flags & TF_NO_CORPSE_FORKING);
9218 task_unlock(task);
9219
9220 return disabled;
9221}
9222
9223#if __arm64__
9224extern int legacy_footprint_entitlement_mode;
9225extern void memorystatus_act_on_legacy_footprint_entitlement(struct proc *, boolean_t);
9226extern void memorystatus_act_on_ios13extended_footprint_entitlement(struct proc *);
9227
9228
9229void
9230task_set_legacy_footprint(
9231 task_t task)
9232{
9233 task_lock(task);
9234 task->task_legacy_footprint = TRUE;
9235 task_unlock(task);
9236}
9237
9238void
9239task_set_extra_footprint_limit(
9240 task_t task)
9241{
9242 if (task->task_extra_footprint_limit) {
9243 return;
9244 }
9245 task_lock(task);
9246 if (task->task_extra_footprint_limit) {
9247 task_unlock(task);
9248 return;
9249 }
9250 task->task_extra_footprint_limit = TRUE;
9251 task_unlock(task);
9252 memorystatus_act_on_legacy_footprint_entitlement(get_bsdtask_info(task), TRUE);
9253}
9254
9255void
9256task_set_ios13extended_footprint_limit(
9257 task_t task)
9258{
9259 if (task->task_ios13extended_footprint_limit) {
9260 return;
9261 }
9262 task_lock(task);
9263 if (task->task_ios13extended_footprint_limit) {
9264 task_unlock(task);
9265 return;
9266 }
9267 task->task_ios13extended_footprint_limit = TRUE;
9268 task_unlock(task);
9269 memorystatus_act_on_ios13extended_footprint_entitlement(get_bsdtask_info(task));
9270}
9271#endif /* __arm64__ */
9272
9273static inline ledger_amount_t
9274task_ledger_get_balance(
9275 ledger_t ledger,
9276 int ledger_idx)
9277{
9278 ledger_amount_t amount;
9279 amount = 0;
9280 ledger_get_balance(ledger, entry: ledger_idx, balance: &amount);
9281 return amount;
9282}
9283
9284/*
9285 * Gather the amount of memory counted in a task's footprint due to
9286 * being in a specific set of ledgers.
9287 */
9288void
9289task_ledgers_footprint(
9290 ledger_t ledger,
9291 ledger_amount_t *ledger_resident,
9292 ledger_amount_t *ledger_compressed)
9293{
9294 *ledger_resident = 0;
9295 *ledger_compressed = 0;
9296
9297 /* purgeable non-volatile memory */
9298 *ledger_resident += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.purgeable_nonvolatile);
9299 *ledger_compressed += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.purgeable_nonvolatile_compressed);
9300
9301 /* "default" tagged memory */
9302 *ledger_resident += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.tagged_footprint);
9303 *ledger_compressed += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.tagged_footprint_compressed);
9304
9305 /* "network" currently never counts in the footprint... */
9306
9307 /* "media" tagged memory */
9308 *ledger_resident += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.media_footprint);
9309 *ledger_compressed += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.media_footprint_compressed);
9310
9311 /* "graphics" tagged memory */
9312 *ledger_resident += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.graphics_footprint);
9313 *ledger_compressed += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.graphics_footprint_compressed);
9314
9315 /* "neural" tagged memory */
9316 *ledger_resident += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.neural_footprint);
9317 *ledger_compressed += task_ledger_get_balance(ledger, ledger_idx: task_ledgers.neural_footprint_compressed);
9318}
9319
9320#if CONFIG_MEMORYSTATUS
9321/*
9322 * Credit any outstanding task dirty time to the ledger.
9323 * memstat_dirty_start is pushed forward to prevent any possibility of double
9324 * counting, making it safe to call this as often as necessary to ensure that
9325 * anyone reading the ledger gets up-to-date information.
9326 */
9327void
9328task_ledger_settle_dirty_time(task_t t)
9329{
9330 task_lock(task: t);
9331
9332 uint64_t start = t->memstat_dirty_start;
9333 if (start) {
9334 uint64_t now = mach_absolute_time();
9335
9336 uint64_t duration;
9337 absolutetime_to_nanoseconds(abstime: now - start, result: &duration);
9338
9339 ledger_t ledger = get_task_ledger(t);
9340 ledger_credit(ledger, entry: task_ledgers.memorystatus_dirty_time, amount: duration);
9341
9342 t->memstat_dirty_start = now;
9343 }
9344
9345 task_unlock(task: t);
9346}
9347#endif /* CONFIG_MEMORYSTATUS */
9348
9349void
9350task_set_memory_ownership_transfer(
9351 task_t task,
9352 boolean_t value)
9353{
9354 task_lock(task);
9355 task->task_can_transfer_memory_ownership = !!value;
9356 task_unlock(task);
9357}
9358
9359#if DEVELOPMENT || DEBUG
9360
9361void
9362task_set_no_footprint_for_debug(task_t task, boolean_t value)
9363{
9364 task_lock(task);
9365 task->task_no_footprint_for_debug = !!value;
9366 task_unlock(task);
9367}
9368
9369int
9370task_get_no_footprint_for_debug(task_t task)
9371{
9372 return task->task_no_footprint_for_debug;
9373}
9374
9375#endif /* DEVELOPMENT || DEBUG */
9376
9377void
9378task_copy_vmobjects(task_t task, vm_object_query_t query, size_t len, size_t *num)
9379{
9380 vm_object_t find_vmo;
9381 size_t size = 0;
9382
9383 /*
9384 * Allocate a save area for FP state before taking task_objq lock,
9385 * if necessary, to ensure that VM_KERNEL_ADDRHASH() doesn't cause
9386 * an FP state allocation while holding VM locks.
9387 */
9388 ml_fp_save_area_prealloc();
9389
9390 task_objq_lock(task);
9391 if (query != NULL) {
9392 queue_iterate(&task->task_objq, find_vmo, vm_object_t, task_objq)
9393 {
9394 vm_object_query_t p = &query[size++];
9395
9396 /* make sure to not overrun */
9397 if (size * sizeof(vm_object_query_data_t) > len) {
9398 --size;
9399 break;
9400 }
9401
9402 bzero(s: p, n: sizeof(*p));
9403 p->object_id = (vm_object_id_t) VM_KERNEL_ADDRHASH(find_vmo);
9404 p->virtual_size = find_vmo->internal ? find_vmo->vo_size : 0;
9405 p->resident_size = find_vmo->resident_page_count * PAGE_SIZE;
9406 p->wired_size = find_vmo->wired_page_count * PAGE_SIZE;
9407 p->reusable_size = find_vmo->reusable_page_count * PAGE_SIZE;
9408 p->vo_no_footprint = find_vmo->vo_no_footprint;
9409 p->vo_ledger_tag = find_vmo->vo_ledger_tag;
9410 p->purgable = find_vmo->purgable;
9411
9412 if (find_vmo->internal && find_vmo->pager_created && find_vmo->pager != NULL) {
9413 p->compressed_size = vm_compressor_pager_get_count(mem_obj: find_vmo->pager) * PAGE_SIZE;
9414 } else {
9415 p->compressed_size = 0;
9416 }
9417 }
9418 } else {
9419 size = (size_t)task->task_owned_objects;
9420 }
9421 task_objq_unlock(task);
9422
9423 *num = size;
9424}
9425
9426void
9427task_get_owned_vmobjects(task_t task, size_t buffer_size, vmobject_list_output_t buffer, size_t* output_size, size_t* entries)
9428{
9429 assert(output_size);
9430 assert(entries);
9431
9432 /* copy the vmobjects and vmobject data out of the task */
9433 if (buffer_size == 0) {
9434 task_copy_vmobjects(task, NULL, len: 0, num: entries);
9435 *output_size = (*entries > 0) ? *entries * sizeof(vm_object_query_data_t) + sizeof(*buffer) : 0;
9436 } else {
9437 assert(buffer);
9438 task_copy_vmobjects(task, query: &buffer->data[0], len: buffer_size - sizeof(*buffer), num: entries);
9439 buffer->entries = (uint64_t)*entries;
9440 *output_size = *entries * sizeof(vm_object_query_data_t) + sizeof(*buffer);
9441 }
9442}
9443
9444void
9445task_store_owned_vmobject_info(task_t to_task, task_t from_task)
9446{
9447 size_t buffer_size;
9448 vmobject_list_output_t buffer;
9449 size_t output_size;
9450 size_t entries;
9451
9452 assert(to_task != from_task);
9453
9454 /* get the size, allocate a bufferr, and populate */
9455 entries = 0;
9456 output_size = 0;
9457 task_get_owned_vmobjects(task: from_task, buffer_size: 0, NULL, output_size: &output_size, entries: &entries);
9458
9459 if (output_size) {
9460 buffer_size = output_size;
9461 buffer = kalloc_data(buffer_size, Z_WAITOK);
9462
9463 if (buffer) {
9464 entries = 0;
9465 output_size = 0;
9466
9467 task_get_owned_vmobjects(task: from_task, buffer_size, buffer, output_size: &output_size, entries: &entries);
9468
9469 if (entries) {
9470 to_task->corpse_vmobject_list = buffer;
9471 to_task->corpse_vmobject_list_size = buffer_size;
9472 }
9473 }
9474 }
9475}
9476
9477void
9478task_set_filter_msg_flag(
9479 task_t task,
9480 boolean_t flag)
9481{
9482 assert(task != TASK_NULL);
9483
9484 if (flag) {
9485 task_ro_flags_set(task, TFRO_FILTER_MSG);
9486 } else {
9487 task_ro_flags_clear(task, TFRO_FILTER_MSG);
9488 }
9489}
9490
9491boolean_t
9492task_get_filter_msg_flag(
9493 task_t task)
9494{
9495 if (!task) {
9496 return false;
9497 }
9498
9499 return (task_ro_flags_get(task) & TFRO_FILTER_MSG) ? TRUE : FALSE;
9500}
9501bool
9502task_is_exotic(
9503 task_t task)
9504{
9505 if (task == TASK_NULL) {
9506 return false;
9507 }
9508 return vm_map_is_exotic(map: get_task_map(task));
9509}
9510
9511bool
9512task_is_alien(
9513 task_t task)
9514{
9515 if (task == TASK_NULL) {
9516 return false;
9517 }
9518 return vm_map_is_alien(map: get_task_map(task));
9519}
9520
9521
9522
9523#if CONFIG_MACF
9524/* Set the filter mask for Mach traps. */
9525void
9526mac_task_set_mach_filter_mask(task_t task, uint8_t *maskptr)
9527{
9528 assert(task);
9529
9530 task_set_mach_trap_filter_mask(task, mask: maskptr);
9531}
9532
9533/* Set the filter mask for kobject msgs. */
9534void
9535mac_task_set_kobj_filter_mask(task_t task, uint8_t *maskptr)
9536{
9537 assert(task);
9538
9539 task_set_mach_kobj_filter_mask(task, mask: maskptr);
9540}
9541
9542/* Hook for mach trap/sc filter evaluation policy. */
9543SECURITY_READ_ONLY_LATE(mac_task_mach_filter_cbfunc_t) mac_task_mach_trap_evaluate = NULL;
9544
9545/* Hook for kobj message filter evaluation policy. */
9546SECURITY_READ_ONLY_LATE(mac_task_kobj_filter_cbfunc_t) mac_task_kobj_msg_evaluate = NULL;
9547
9548/* Set the callback hooks for the filtering policy. */
9549int
9550mac_task_register_filter_callbacks(
9551 const mac_task_mach_filter_cbfunc_t mach_cbfunc,
9552 const mac_task_kobj_filter_cbfunc_t kobj_cbfunc)
9553{
9554 if (mach_cbfunc != NULL) {
9555 if (mac_task_mach_trap_evaluate != NULL) {
9556 return KERN_FAILURE;
9557 }
9558 mac_task_mach_trap_evaluate = mach_cbfunc;
9559 }
9560 if (kobj_cbfunc != NULL) {
9561 if (mac_task_kobj_msg_evaluate != NULL) {
9562 return KERN_FAILURE;
9563 }
9564 mac_task_kobj_msg_evaluate = kobj_cbfunc;
9565 }
9566
9567 return KERN_SUCCESS;
9568}
9569#endif /* CONFIG_MACF */
9570
9571#if CONFIG_ROSETTA
9572bool
9573task_is_translated(task_t task)
9574{
9575 extern boolean_t proc_is_translated(struct proc* p);
9576 return task && proc_is_translated(get_bsdtask_info(task));
9577}
9578#endif
9579
9580
9581
9582#if __has_feature(ptrauth_calls)
9583/* On FPAC, we want to deliver all PAC violations as fatal exceptions, regardless
9584 * of the enable_pac_exception boot-arg value or any other entitlements.
9585 * The only case where we allow non-fatal PAC exceptions on FPAC is for debugging,
9586 * which requires Developer Mode enabled.
9587 *
9588 * On non-FPAC hardware, we gate the decision behind entitlements and the
9589 * enable_pac_exception boot-arg.
9590 */
9591extern int gARM_FEAT_FPAC;
9592/*
9593 * Having the PAC_EXCEPTION_ENTITLEMENT entitlement means we always enforce all
9594 * of the PAC exception hardening: fatal exceptions and signed user state.
9595 */
9596#define PAC_EXCEPTION_ENTITLEMENT "com.apple.private.pac.exception"
9597/*
9598 * On non-FPAC hardware, when enable_pac_exception boot-arg is set to true,
9599 * processes can choose to get non-fatal PAC exception delivery by setting
9600 * the SKIP_PAC_EXCEPTION_ENTITLEMENT entitlement.
9601 */
9602#define SKIP_PAC_EXCEPTION_ENTITLEMENT "com.apple.private.skip.pac.exception"
9603
9604void
9605task_set_pac_exception_fatal_flag(
9606 task_t task)
9607{
9608 assert(task != TASK_NULL);
9609 bool pac_hardened_task = false;
9610 uint32_t set_flags = 0;
9611
9612 /*
9613 * We must not apply this security policy on tasks which have opted out of mach hardening to
9614 * avoid regressions in third party plugins and third party apps when using AMFI boot-args
9615 */
9616 bool platform_binary = task_get_platform_binary(task);
9617#if XNU_TARGET_OS_OSX
9618 platform_binary &= !task_opted_out_mach_hardening(task);
9619#endif /* XNU_TARGET_OS_OSX */
9620
9621 /*
9622 * On non-FPAC hardware, we allow gating PAC exceptions behind
9623 * SKIP_PAC_EXCEPTION_ENTITLEMENT and the boot-arg.
9624 */
9625 if (!gARM_FEAT_FPAC && enable_pac_exception &&
9626 IOTaskHasEntitlement(task, SKIP_PAC_EXCEPTION_ENTITLEMENT)) {
9627 return;
9628 }
9629
9630 if (IOTaskHasEntitlement(task, PAC_EXCEPTION_ENTITLEMENT) || task_get_hardened_runtime(task)) {
9631 pac_hardened_task = true;
9632 set_flags |= TFRO_PAC_ENFORCE_USER_STATE;
9633 }
9634
9635 /* On non-FPAC hardware, gate the fatal property behind entitlements and boot-arg. */
9636 if (pac_hardened_task ||
9637 ((enable_pac_exception || gARM_FEAT_FPAC) && platform_binary)) {
9638 /* If debugging is configured, do not make PAC exception fatal. */
9639 if (address_space_debugged(task_get_proc_raw(task)) != KERN_SUCCESS) {
9640 set_flags |= TFRO_PAC_EXC_FATAL;
9641 }
9642 }
9643
9644 if (set_flags != 0) {
9645 task_ro_flags_set(task, set_flags);
9646 }
9647}
9648
9649bool
9650task_is_pac_exception_fatal(
9651 task_t task)
9652{
9653 assert(task != TASK_NULL);
9654 return !!(task_ro_flags_get(task) & TFRO_PAC_EXC_FATAL);
9655}
9656#endif /* __has_feature(ptrauth_calls) */
9657
9658/*
9659 * FATAL_EXCEPTION_ENTITLEMENT, if present, will contain a list of
9660 * conditions for which access violations should deliver SIGKILL rather than
9661 * SIGSEGV. This is a hardening measure intended for use by applications
9662 * that are able to handle the stricter error handling behavior. Currently
9663 * this supports FATAL_EXCEPTION_ENTITLEMENT_JIT, which is documented in
9664 * user_fault_in_self_restrict_mode().
9665 */
9666#define FATAL_EXCEPTION_ENTITLEMENT "com.apple.security.fatal-exceptions"
9667#define FATAL_EXCEPTION_ENTITLEMENT_JIT "jit"
9668
9669void
9670task_set_jit_exception_fatal_flag(
9671 task_t task)
9672{
9673 assert(task != TASK_NULL);
9674 if (IOTaskHasStringEntitlement(task, FATAL_EXCEPTION_ENTITLEMENT, FATAL_EXCEPTION_ENTITLEMENT_JIT) &&
9675 address_space_debugged(process: task_get_proc_raw(task)) != KERN_SUCCESS) {
9676 task_ro_flags_set(task, TFRO_JIT_EXC_FATAL);
9677 }
9678}
9679
9680bool
9681task_is_jit_exception_fatal(
9682 __unused task_t task)
9683{
9684#if !defined(XNU_PLATFORM_MacOSX)
9685 return true;
9686#else
9687 assert(task != TASK_NULL);
9688 return !!(task_ro_flags_get(task) & TFRO_JIT_EXC_FATAL);
9689#endif
9690}
9691
9692bool
9693task_needs_user_signed_thread_state(
9694 task_t task)
9695{
9696 assert(task != TASK_NULL);
9697 return !!(task_ro_flags_get(task) & TFRO_PAC_ENFORCE_USER_STATE);
9698}
9699
9700void
9701task_set_tecs(task_t task)
9702{
9703 if (task == TASK_NULL) {
9704 task = current_task();
9705 }
9706
9707 if (!machine_csv(cve: CPUVN_CI)) {
9708 return;
9709 }
9710
9711 LCK_MTX_ASSERT(&task->lock, LCK_MTX_ASSERT_NOTOWNED);
9712
9713 task_lock(task);
9714
9715 task->t_flags |= TF_TECS;
9716
9717 thread_t thread;
9718 queue_iterate(&task->threads, thread, thread_t, task_threads) {
9719 machine_tecs(thr: thread);
9720 }
9721 task_unlock(task);
9722}
9723
9724kern_return_t
9725task_test_sync_upcall(
9726 task_t task,
9727 ipc_port_t send_port)
9728{
9729#if DEVELOPMENT || DEBUG
9730 if (task != current_task() || !IPC_PORT_VALID(send_port)) {
9731 return KERN_INVALID_ARGUMENT;
9732 }
9733
9734 /* Block on sync kernel upcall on the given send port */
9735 mach_test_sync_upcall(send_port);
9736
9737 ipc_port_release_send(send_port);
9738 return KERN_SUCCESS;
9739#else
9740 (void)task;
9741 (void)send_port;
9742 return KERN_NOT_SUPPORTED;
9743#endif
9744}
9745
9746kern_return_t
9747task_test_async_upcall_propagation(
9748 task_t task,
9749 ipc_port_t send_port,
9750 int qos,
9751 int iotier)
9752{
9753#if DEVELOPMENT || DEBUG
9754 kern_return_t kr;
9755
9756 if (task != current_task() || !IPC_PORT_VALID(send_port)) {
9757 return KERN_INVALID_ARGUMENT;
9758 }
9759
9760 if (qos < THREAD_QOS_DEFAULT || qos > THREAD_QOS_USER_INTERACTIVE ||
9761 iotier < THROTTLE_LEVEL_START || iotier > THROTTLE_LEVEL_END) {
9762 return KERN_INVALID_ARGUMENT;
9763 }
9764
9765 struct thread_attr_for_ipc_propagation attr = {
9766 .tafip_iotier = iotier,
9767 .tafip_qos = qos
9768 };
9769
9770 /* Apply propagate attr to port */
9771 kr = ipc_port_propagate_thread_attr(send_port, attr);
9772 if (kr != KERN_SUCCESS) {
9773 return kr;
9774 }
9775
9776 thread_enable_send_importance(current_thread(), TRUE);
9777
9778 /* Perform an async kernel upcall on the given send port */
9779 mach_test_async_upcall(send_port);
9780 thread_enable_send_importance(current_thread(), FALSE);
9781
9782 ipc_port_release_send(send_port);
9783 return KERN_SUCCESS;
9784#else
9785 (void)task;
9786 (void)send_port;
9787 (void)qos;
9788 (void)iotier;
9789 return KERN_NOT_SUPPORTED;
9790#endif
9791}
9792
9793#if CONFIG_PROC_RESOURCE_LIMITS
9794mach_port_name_t
9795current_task_get_fatal_port_name(void)
9796{
9797 mach_port_t task_fatal_port = MACH_PORT_NULL;
9798 mach_port_name_t port_name = 0;
9799
9800 task_fatal_port = task_allocate_fatal_port();
9801
9802 if (task_fatal_port) {
9803 ipc_object_copyout(current_space(), ip_to_object(task_fatal_port), MACH_MSG_TYPE_PORT_SEND,
9804 IPC_OBJECT_COPYOUT_FLAGS_NONE, NULL, NULL, &port_name);
9805 }
9806
9807 return port_name;
9808}
9809#endif /* CONFIG_PROC_RESOURCE_LIMITS */
9810
9811#if defined(__x86_64__)
9812bool
9813curtask_get_insn_copy_optout(void)
9814{
9815 bool optout;
9816 task_t cur_task = current_task();
9817
9818 task_lock(cur_task);
9819 optout = (cur_task->t_flags & TF_INSN_COPY_OPTOUT) ? true : false;
9820 task_unlock(cur_task);
9821
9822 return optout;
9823}
9824
9825void
9826curtask_set_insn_copy_optout(void)
9827{
9828 task_t cur_task = current_task();
9829
9830 task_lock(cur_task);
9831
9832 cur_task->t_flags |= TF_INSN_COPY_OPTOUT;
9833
9834 thread_t thread;
9835 queue_iterate(&cur_task->threads, thread, thread_t, task_threads) {
9836 machine_thread_set_insn_copy_optout(thread);
9837 }
9838 task_unlock(cur_task);
9839}
9840#endif /* defined(__x86_64__) */
9841
9842void
9843task_get_corpse_vmobject_list(task_t task, vmobject_list_output_t* list, size_t* list_size)
9844{
9845 assert(task);
9846 assert(list_size);
9847
9848 *list = task->corpse_vmobject_list;
9849 *list_size = (size_t)task->corpse_vmobject_list_size;
9850}
9851
9852__abortlike
9853static void
9854panic_proc_ro_task_backref_mismatch(task_t t, proc_ro_t ro)
9855{
9856 panic("proc_ro->task backref mismatch: t=%p, ro=%p, "
9857 "proc_ro_task(ro)=%p", t, ro, proc_ro_task(ro));
9858}
9859
9860proc_ro_t
9861task_get_ro(task_t t)
9862{
9863 proc_ro_t ro = (proc_ro_t)t->bsd_info_ro;
9864
9865 zone_require_ro(zone_id: ZONE_ID_PROC_RO, elem_size: sizeof(struct proc_ro), addr: ro);
9866 if (__improbable(proc_ro_task(ro) != t)) {
9867 panic_proc_ro_task_backref_mismatch(t, ro);
9868 }
9869
9870 return ro;
9871}
9872
9873uint32_t
9874task_ro_flags_get(task_t task)
9875{
9876 return task_get_ro(t: task)->t_flags_ro;
9877}
9878
9879void
9880task_ro_flags_set(task_t task, uint32_t flags)
9881{
9882 zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO, task_get_ro(task),
9883 t_flags_ro, ZRO_ATOMIC_OR_32, flags);
9884}
9885
9886void
9887task_ro_flags_clear(task_t task, uint32_t flags)
9888{
9889 zalloc_ro_update_field_atomic(ZONE_ID_PROC_RO, task_get_ro(task),
9890 t_flags_ro, ZRO_ATOMIC_AND_32, ~flags);
9891}
9892
9893task_control_port_options_t
9894task_get_control_port_options(task_t task)
9895{
9896 return task_get_ro(t: task)->task_control_port_options;
9897}
9898
9899void
9900task_set_control_port_options(task_t task, task_control_port_options_t opts)
9901{
9902 zalloc_ro_update_field(ZONE_ID_PROC_RO, task_get_ro(task),
9903 task_control_port_options, &opts);
9904}
9905
9906/*!
9907 * @function kdp_task_is_locked
9908 *
9909 * @abstract
9910 * Checks if task is locked.
9911 *
9912 * @discussion
9913 * NOT SAFE: To be used only by kernel debugger.
9914 *
9915 * @param task task to check
9916 *
9917 * @returns TRUE if the task is locked.
9918 */
9919boolean_t
9920kdp_task_is_locked(task_t task)
9921{
9922 return kdp_lck_mtx_lock_spin_is_acquired(lck: &task->lock);
9923}
9924
9925#if DEBUG || DEVELOPMENT
9926/**
9927 *
9928 * Check if a threshold limit is valid based on the actual phys memory
9929 * limit. If they are same, race conditions may arise, so we have to prevent
9930 * it to happen.
9931 */
9932static diagthreshold_check_return
9933task_check_memorythreshold_is_valid(task_t task, uint64_t new_limit, bool is_diagnostics_value)
9934{
9935 int phys_limit_mb;
9936 kern_return_t ret_value;
9937 bool threshold_enabled;
9938 bool dummy;
9939 ret_value = ledger_is_diag_threshold_enabled(task->ledger, task_ledgers.phys_footprint, &threshold_enabled);
9940 if (ret_value != KERN_SUCCESS) {
9941 return ret_value;
9942 }
9943 if (is_diagnostics_value == true) {
9944 ret_value = task_get_phys_footprint_limit(task, &phys_limit_mb);
9945 } else {
9946 uint64_t diag_limit;
9947 ret_value = task_get_diag_footprint_limit_internal(task, &diag_limit, &dummy);
9948 phys_limit_mb = (int)(diag_limit >> 20);
9949 }
9950 if (ret_value != KERN_SUCCESS) {
9951 return ret_value;
9952 }
9953 if (phys_limit_mb == (int) new_limit) {
9954 if (threshold_enabled == false) {
9955 return THRESHOLD_IS_SAME_AS_LIMIT_FLAG_DISABLED;
9956 } else {
9957 return THRESHOLD_IS_SAME_AS_LIMIT_FLAG_ENABLED;
9958 }
9959 }
9960 if (threshold_enabled == false) {
9961 return THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_DISABLED;
9962 } else {
9963 return THRESHOLD_IS_NOT_SAME_AS_LIMIT_FLAG_ENABLED;
9964 }
9965}
9966#endif
9967
9968#if CONFIG_EXCLAVES
9969kern_return_t
9970task_add_conclave(task_t task, void *vnode, int64_t off, const char *task_conclave_id)
9971{
9972 /*
9973 * Only launchd or properly entitled tasks can attach tasks to
9974 * conclaves.
9975 */
9976 if (!exclaves_has_priv(current_task(), EXCLAVES_PRIV_CONCLAVE_SPAWN)) {
9977 return KERN_DENIED;
9978 }
9979
9980 /*
9981 * Only entitled tasks can have conclaves attached.
9982 * Allow tasks which have the SPAWN privilege to also host conclaves.
9983 * This allows xpc proxy to add a conclave before execing a daemon.
9984 */
9985 if (!exclaves_has_priv_vnode(vnode, off, EXCLAVES_PRIV_CONCLAVE_HOST) &&
9986 !exclaves_has_priv_vnode(vnode, off, EXCLAVES_PRIV_CONCLAVE_SPAWN)) {
9987 return KERN_DENIED;
9988 }
9989
9990 /*
9991 * Make this EXCLAVES_BOOT_STAGE_2 until userspace is actually
9992 * triggering the EXCLAVESKIT boot stage.
9993 */
9994 kern_return_t kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_2);
9995 if (kr != KERN_SUCCESS) {
9996 return kr;
9997 }
9998
9999 return exclaves_conclave_attach(EXCLAVES_DOMAIN_KERNEL, task_conclave_id, task);
10000}
10001
10002kern_return_t
10003task_launch_conclave(mach_port_name_t port __unused)
10004{
10005 kern_return_t kr = KERN_FAILURE;
10006 assert3u(port, ==, MACH_PORT_NULL);
10007 exclaves_resource_t *conclave = task_get_conclave(current_task());
10008 if (conclave == NULL) {
10009 return kr;
10010 }
10011
10012 kr = exclaves_conclave_launch(conclave);
10013 if (kr != KERN_SUCCESS) {
10014 return kr;
10015 }
10016 task_set_conclave_taint(current_task());
10017
10018 return KERN_SUCCESS;
10019}
10020
10021kern_return_t
10022task_inherit_conclave(task_t old_task, task_t new_task, void *vnode, int64_t off)
10023{
10024 if (old_task->conclave == NULL ||
10025 !exclaves_conclave_is_attached(old_task->conclave)) {
10026 return KERN_SUCCESS;
10027 }
10028
10029 /*
10030 * Only launchd or properly entitled tasks can attach tasks to
10031 * conclaves.
10032 */
10033 if (!exclaves_has_priv(current_task(), EXCLAVES_PRIV_CONCLAVE_SPAWN)) {
10034 return KERN_DENIED;
10035 }
10036
10037 /*
10038 * Only entitled tasks can have conclaves attached.
10039 */
10040 if (!exclaves_has_priv_vnode(vnode, off, EXCLAVES_PRIV_CONCLAVE_HOST)) {
10041 return KERN_DENIED;
10042 }
10043
10044 return exclaves_conclave_inherit(old_task->conclave, old_task, new_task);
10045}
10046
10047void
10048task_clear_conclave(task_t task)
10049{
10050 if (task->exclave_crash_info) {
10051 kfree_data(task->exclave_crash_info, CONCLAVE_CRASH_BUFFER_PAGECOUNT * PAGE_SIZE);
10052 task->exclave_crash_info = NULL;
10053 }
10054
10055 if (task->conclave == NULL) {
10056 return;
10057 }
10058
10059 /*
10060 * XXX
10061 * This should only fail if either the conclave is in an unexpected
10062 * state (i.e. not ATTACHED) or if the wrong port is supplied.
10063 * We should re-visit this and make sure we guarantee the above
10064 * constraints.
10065 */
10066 __assert_only kern_return_t ret =
10067 exclaves_conclave_detach(task->conclave, task);
10068 assert3u(ret, ==, KERN_SUCCESS);
10069}
10070
10071void
10072task_stop_conclave(task_t task, bool gather_crash_bt)
10073{
10074 thread_t thread = current_thread();
10075
10076 if (task->conclave == NULL) {
10077 return;
10078 }
10079
10080 if (task_should_panic_on_exit_due_to_conclave_taint(task)) {
10081 panic("Conclave tainted task %p terminated\n", task);
10082 }
10083
10084 /* Stash the task on current thread for conclave teardown */
10085 thread->conclave_stop_task = task;
10086
10087 __assert_only kern_return_t ret =
10088 exclaves_conclave_stop(task->conclave, gather_crash_bt);
10089
10090 thread->conclave_stop_task = TASK_NULL;
10091
10092 assert3u(ret, ==, KERN_SUCCESS);
10093}
10094
10095kern_return_t
10096task_stop_conclave_upcall(void)
10097{
10098 task_t task = current_task();
10099 if (task->conclave == NULL) {
10100 return KERN_INVALID_TASK;
10101 }
10102
10103 return exclaves_conclave_stop_upcall(task->conclave);
10104}
10105
10106kern_return_t
10107task_stop_conclave_upcall_complete(void)
10108{
10109 task_t task = current_task();
10110 thread_t thread = current_thread();
10111
10112 if (!(thread->th_exclaves_state & TH_EXCLAVES_STOP_UPCALL_PENDING)) {
10113 return KERN_SUCCESS;
10114 }
10115
10116 assert3p(task->conclave, !=, NULL);
10117
10118 return exclaves_conclave_stop_upcall_complete(task->conclave, task);
10119}
10120
10121kern_return_t
10122task_suspend_conclave_upcall(uint64_t *scid_list, size_t scid_list_count)
10123{
10124 task_t task = current_task();
10125 thread_t thread;
10126 int scid_count = 0;
10127 kern_return_t kr;
10128 if (task->conclave == NULL) {
10129 return KERN_INVALID_TASK;
10130 }
10131
10132 kr = task_hold_and_wait(task);
10133
10134 task_lock(task);
10135 queue_iterate(&task->threads, thread, thread_t, task_threads)
10136 {
10137 if (thread->th_exclaves_state & TH_EXCLAVES_RPC) {
10138 scid_list[scid_count++] = thread->th_exclaves_scheduling_context_id;
10139 if (scid_count >= scid_list_count) {
10140 break;
10141 }
10142 }
10143 }
10144
10145 task_unlock(task);
10146 return kr;
10147}
10148
10149kern_return_t
10150task_crash_info_conclave_upcall(task_t task, const xnuupcalls_conclavesharedbuffer_s *shared_buf,
10151 uint32_t length)
10152{
10153 if (task->conclave == NULL) {
10154 return KERN_INVALID_TASK;
10155 }
10156
10157 /* Allocate the buffer and memcpy it */
10158 int task_crash_info_buffer_size = 0;
10159 uint8_t * task_crash_info_buffer;
10160
10161 if (!length) {
10162 printf("Conclave upcall: task_crash_info_conclave_upcall did not return any page addresses\n");
10163 return KERN_INVALID_ARGUMENT;
10164 }
10165
10166 task_crash_info_buffer_size = CONCLAVE_CRASH_BUFFER_PAGECOUNT * PAGE_SIZE;
10167 assert3u(task_crash_info_buffer_size, >=, length);
10168
10169 task_crash_info_buffer = kalloc_data(task_crash_info_buffer_size, Z_WAITOK);
10170 if (!task_crash_info_buffer) {
10171 panic("task_crash_info_conclave_upcall: cannot allocate buffer for task_info shared memory");
10172 return KERN_INVALID_ARGUMENT;
10173 }
10174
10175 uint8_t * dst = task_crash_info_buffer;
10176 uint32_t remaining = length;
10177 for (size_t i = 0; i < CONCLAVE_CRASH_BUFFER_PAGECOUNT; i++) {
10178 if (remaining) {
10179 memcpy(dst, (uint8_t*)phystokv((pmap_paddr_t)shared_buf->physaddr[i]), PAGE_SIZE);
10180 remaining = (remaining >= PAGE_SIZE) ? remaining - PAGE_SIZE : 0;
10181 dst += PAGE_SIZE;
10182 }
10183 }
10184
10185 task_lock(task);
10186 if (task->exclave_crash_info == NULL && task->active) {
10187 task->exclave_crash_info = task_crash_info_buffer;
10188 task->exclave_crash_info_length = length;
10189 task_crash_info_buffer = NULL;
10190 }
10191 task_unlock(task);
10192
10193 if (task_crash_info_buffer) {
10194 kfree_data(task_crash_info_buffer, task_crash_info_buffer_size);
10195 }
10196
10197 return KERN_SUCCESS;
10198}
10199
10200exclaves_resource_t *
10201task_get_conclave(task_t task)
10202{
10203 return task->conclave;
10204}
10205
10206extern boolean_t IOPMRootDomainGetWillShutdown(void);
10207
10208TUNABLE(bool, disable_conclave_taint, "disable_conclave_taint", true); /* Do not taint processes when they talk to conclave, so system does not panic when exit. */
10209
10210static bool
10211task_should_panic_on_exit_due_to_conclave_taint(task_t task)
10212{
10213 /* Check if boot-arg to disable conclave taint is set */
10214 if (disable_conclave_taint) {
10215 return false;
10216 }
10217
10218 /* Check if the system is shutting down */
10219 if (IOPMRootDomainGetWillShutdown()) {
10220 return false;
10221 }
10222
10223 return task_is_conclave_tainted(task);
10224}
10225
10226static bool
10227task_is_conclave_tainted(task_t task)
10228{
10229 return (task->t_exclave_state & TES_CONCLAVE_TAINTED) != 0 &&
10230 !(task->t_exclave_state & TES_CONCLAVE_UNTAINTABLE);
10231}
10232
10233static void
10234task_set_conclave_taint(task_t task)
10235{
10236 os_atomic_or(&task->t_exclave_state, TES_CONCLAVE_TAINTED, relaxed);
10237}
10238
10239void
10240task_set_conclave_untaintable(task_t task)
10241{
10242 os_atomic_or(&task->t_exclave_state, TES_CONCLAVE_UNTAINTABLE, relaxed);
10243}
10244
10245void
10246task_add_conclave_crash_info(task_t task, void *crash_info_ptr)
10247{
10248 __block kern_return_t error = KERN_SUCCESS;
10249 tb_error_t tberr = TB_ERROR_SUCCESS;
10250 void *crash_info;
10251 uint32_t crash_info_length = 0;
10252
10253 if (task->conclave == NULL) {
10254 return;
10255 }
10256
10257 if (task->exclave_crash_info_length == 0) {
10258 return;
10259 }
10260
10261 error = kcdata_add_container_marker(crash_info_ptr, KCDATA_TYPE_CONTAINER_BEGIN,
10262 STACKSHOT_KCCONTAINER_EXCLAVES, 0);
10263 if (error != KERN_SUCCESS) {
10264 return;
10265 }
10266
10267 crash_info = task->exclave_crash_info;
10268 crash_info_length = task->exclave_crash_info_length;
10269
10270 tberr = stackshot_stackshotresult__unmarshal(crash_info,
10271 (uint64_t)crash_info_length, ^(stackshot_stackshotresult_s result){
10272 error = stackshot_exclaves_process_stackshot(&result, crash_info_ptr);
10273 if (error != KERN_SUCCESS) {
10274 printf("stackshot_exclaves_process_result: error processing stackshot result %d\n", error);
10275 }
10276 });
10277 if (tberr != TB_ERROR_SUCCESS) {
10278 printf("task_conclave_crash: task_add_conclave_crash_info could not unmarshal stackshot data 0x%x\n", tberr);
10279 error = KERN_FAILURE;
10280 goto error_exit;
10281 }
10282
10283error_exit:
10284 kcdata_add_container_marker(crash_info_ptr, KCDATA_TYPE_CONTAINER_END,
10285 STACKSHOT_KCCONTAINER_EXCLAVES, 0);
10286
10287 return;
10288}
10289
10290#endif /* CONFIG_EXCLAVES */
10291
10292#pragma mark task utils
10293
10294/* defined in bsd/kern/kern_proc.c */
10295extern void proc_name(int pid, char *buf, int size);
10296extern char *proc_best_name(struct proc *p);
10297
10298void
10299task_procname(task_t task, char *buf, int size)
10300{
10301 proc_name(pid: task_pid(task), buf, size);
10302}
10303
10304void
10305task_best_name(task_t task, char *buf, size_t size)
10306{
10307 char *name = proc_best_name(p: task_get_proc_raw(task));
10308 strlcpy(dst: buf, src: name, n: size);
10309}
10310