1/*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_FREE_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 * File: kern/task.c
58 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub,
59 * David Black
60 *
61 * Task management primitives implementation.
62 */
63/*
64 * Copyright (c) 1993 The University of Utah and
65 * the Computer Systems Laboratory (CSL). All rights reserved.
66 *
67 * Permission to use, copy, modify and distribute this software and its
68 * documentation is hereby granted, provided that both the copyright
69 * notice and this permission notice appear in all copies of the
70 * software, derivative works or modified versions, and any portions
71 * thereof, and that both notices appear in supporting documentation.
72 *
73 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
74 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
75 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
76 *
77 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
78 * improvements that they make and grant CSL redistribution rights.
79 *
80 */
81/*
82 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
83 * support for mandatory and extensible security protections. This notice
84 * is included in support of clause 2.2 (b) of the Apple Public License,
85 * Version 2.0.
86 * Copyright (c) 2005 SPARTA, Inc.
87 */
88
89#include <mach/mach_types.h>
90#include <mach/boolean.h>
91#include <mach/host_priv.h>
92#include <mach/machine/vm_types.h>
93#include <mach/vm_param.h>
94#include <mach/mach_vm.h>
95#include <mach/semaphore.h>
96#include <mach/task_info.h>
97#include <mach/task_inspect.h>
98#include <mach/task_special_ports.h>
99#include <mach/sdt.h>
100
101#include <ipc/ipc_importance.h>
102#include <ipc/ipc_types.h>
103#include <ipc/ipc_space.h>
104#include <ipc/ipc_entry.h>
105#include <ipc/ipc_hash.h>
106
107#include <kern/kern_types.h>
108#include <kern/mach_param.h>
109#include <kern/misc_protos.h>
110#include <kern/task.h>
111#include <kern/thread.h>
112#include <kern/coalition.h>
113#include <kern/zalloc.h>
114#include <kern/kalloc.h>
115#include <kern/kern_cdata.h>
116#include <kern/processor.h>
117#include <kern/sched_prim.h> /* for thread_wakeup */
118#include <kern/ipc_tt.h>
119#include <kern/host.h>
120#include <kern/clock.h>
121#include <kern/timer.h>
122#include <kern/assert.h>
123#include <kern/sync_lock.h>
124#include <kern/affinity.h>
125#include <kern/exc_resource.h>
126#include <kern/machine.h>
127#include <kern/policy_internal.h>
128
129#include <corpses/task_corpse.h>
130#if CONFIG_TELEMETRY
131#include <kern/telemetry.h>
132#endif
133
134#if MONOTONIC
135#include <kern/monotonic.h>
136#include <machine/monotonic.h>
137#endif /* MONOTONIC */
138
139#include <os/log.h>
140
141#include <vm/pmap.h>
142#include <vm/vm_map.h>
143#include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */
144#include <vm/vm_pageout.h>
145#include <vm/vm_protos.h>
146#include <vm/vm_purgeable_internal.h>
147
148#include <sys/resource.h>
149#include <sys/signalvar.h> /* for coredump */
150
151/*
152 * Exported interfaces
153 */
154
155#include <mach/task_server.h>
156#include <mach/mach_host_server.h>
157#include <mach/host_security_server.h>
158#include <mach/mach_port_server.h>
159
160#include <vm/vm_shared_region.h>
161
162#include <libkern/OSDebug.h>
163#include <libkern/OSAtomic.h>
164#include <libkern/section_keywords.h>
165
166#if CONFIG_ATM
167#include <atm/atm_internal.h>
168#endif
169
170#include <kern/sfi.h> /* picks up ledger.h */
171
172#if CONFIG_MACF
173#include <security/mac_mach_internal.h>
174#endif
175
176#if KPERF
177extern int kpc_force_all_ctrs(task_t, int);
178#endif
179
180task_t kernel_task;
181zone_t task_zone;
182lck_attr_t task_lck_attr;
183lck_grp_t task_lck_grp;
184lck_grp_attr_t task_lck_grp_attr;
185
186extern int exc_via_corpse_forking;
187extern int corpse_for_fatal_memkill;
188extern boolean_t proc_send_synchronous_EXC_RESOURCE(void *p);
189
190/* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */
191int audio_active = 0;
192
193zinfo_usage_store_t tasks_tkm_private;
194zinfo_usage_store_t tasks_tkm_shared;
195
196/* A container to accumulate statistics for expired tasks */
197expired_task_statistics_t dead_task_statistics;
198lck_spin_t dead_task_statistics_lock;
199
200ledger_template_t task_ledger_template = NULL;
201
202SECURITY_READ_ONLY_LATE(struct _task_ledger_indices) task_ledgers __attribute__((used)) =
203{.cpu_time = -1,
204 .tkm_private = -1,
205 .tkm_shared = -1,
206 .phys_mem = -1,
207 .wired_mem = -1,
208 .internal = -1,
209 .iokit_mapped = -1,
210 .alternate_accounting = -1,
211 .alternate_accounting_compressed = -1,
212 .page_table = -1,
213 .phys_footprint = -1,
214 .internal_compressed = -1,
215 .purgeable_volatile = -1,
216 .purgeable_nonvolatile = -1,
217 .purgeable_volatile_compressed = -1,
218 .purgeable_nonvolatile_compressed = -1,
219 .network_volatile = -1,
220 .network_nonvolatile = -1,
221 .network_volatile_compressed = -1,
222 .network_nonvolatile_compressed = -1,
223 .platform_idle_wakeups = -1,
224 .interrupt_wakeups = -1,
225#if !CONFIG_EMBEDDED
226 .sfi_wait_times = { 0 /* initialized at runtime */},
227#endif /* !CONFIG_EMBEDDED */
228 .cpu_time_billed_to_me = -1,
229 .cpu_time_billed_to_others = -1,
230 .physical_writes = -1,
231 .logical_writes = -1,
232 .energy_billed_to_me = -1,
233 .energy_billed_to_others = -1
234};
235
236/* System sleep state */
237boolean_t tasks_suspend_state;
238
239
240void init_task_ledgers(void);
241void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1);
242void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1);
243void task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1);
244void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void);
245void __attribute__((noinline)) PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal);
246void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor);
247
248kern_return_t task_suspend_internal(task_t);
249kern_return_t task_resume_internal(task_t);
250static kern_return_t task_start_halt_locked(task_t task, boolean_t should_mark_corpse);
251
252extern kern_return_t iokit_task_terminate(task_t task);
253
254extern kern_return_t exception_deliver(thread_t, exception_type_t, mach_exception_data_t, mach_msg_type_number_t, struct exception_action *, lck_mtx_t *);
255extern void bsd_copythreadname(void *dst_uth, void *src_uth);
256extern kern_return_t thread_resume(thread_t thread);
257
258// Warn tasks when they hit 80% of their memory limit.
259#define PHYS_FOOTPRINT_WARNING_LEVEL 80
260
261#define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */
262#define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */
263
264/*
265 * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry.
266 *
267 * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user
268 * stacktraces, aka micro-stackshots)
269 */
270#define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70
271
272int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */
273int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */
274
275int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
276
277int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */
278
279ledger_amount_t max_task_footprint = 0; /* Per-task limit on physical memory consumption in bytes */
280int max_task_footprint_warning_level = 0; /* Per-task limit warning percentage */
281int max_task_footprint_mb = 0; /* Per-task limit on physical memory consumption in megabytes */
282
283/* I/O Monitor Limits */
284#define IOMON_DEFAULT_LIMIT (20480ull) /* MB of logical/physical I/O */
285#define IOMON_DEFAULT_INTERVAL (86400ull) /* in seconds */
286
287uint64_t task_iomon_limit_mb; /* Per-task I/O monitor limit in MBs */
288uint64_t task_iomon_interval_secs; /* Per-task I/O monitor interval in secs */
289
290#define IO_TELEMETRY_DEFAULT_LIMIT (10ll * 1024ll * 1024ll)
291int64_t io_telemetry_limit; /* Threshold to take a microstackshot (0 indicated I/O telemetry is turned off) */
292int64_t global_logical_writes_count = 0; /* Global count for logical writes */
293static boolean_t global_update_logical_writes(int64_t);
294
295#define TASK_MAX_THREAD_LIMIT 256
296
297#if MACH_ASSERT
298int pmap_ledgers_panic = 1;
299int pmap_ledgers_panic_leeway = 3;
300#endif /* MACH_ASSERT */
301
302int task_max = CONFIG_TASK_MAX; /* Max number of tasks */
303
304#if CONFIG_COREDUMP
305int hwm_user_cores = 0; /* high watermark violations generate user core files */
306#endif
307
308#ifdef MACH_BSD
309extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long);
310extern int proc_pid(struct proc *p);
311extern int proc_selfpid(void);
312extern struct proc *current_proc(void);
313extern char *proc_name_address(struct proc *p);
314extern uint64_t get_dispatchqueue_offset_from_proc(void *);
315extern int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, int bufsize);
316extern void workq_proc_suspended(struct proc *p);
317extern void workq_proc_resumed(struct proc *p);
318
319#if CONFIG_MEMORYSTATUS
320extern void proc_memstat_terminated(struct proc* p, boolean_t set);
321extern void memorystatus_on_ledger_footprint_exceeded(int warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
322extern void memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal);
323extern boolean_t memorystatus_allowed_vm_map_fork(task_t task);
324
325#if DEVELOPMENT || DEBUG
326extern void memorystatus_abort_vm_map_fork(task_t);
327#endif
328
329#endif /* CONFIG_MEMORYSTATUS */
330
331#endif /* MACH_BSD */
332
333#if DEVELOPMENT || DEBUG
334int exc_resource_threads_enabled;
335#endif /* DEVELOPMENT || DEBUG */
336
337#if (DEVELOPMENT || DEBUG) && TASK_EXC_GUARD_DELIVER_CORPSE
338uint32_t task_exc_guard_default = TASK_EXC_GUARD_MP_DELIVER | TASK_EXC_GUARD_MP_CORPSE |
339 TASK_EXC_GUARD_VM_DELIVER | TASK_EXC_GUARD_VM_CORPSE;
340#else
341uint32_t task_exc_guard_default = 0;
342#endif
343
344/* Forwards */
345
346static void task_hold_locked(task_t task);
347static void task_wait_locked(task_t task, boolean_t until_not_runnable);
348static void task_release_locked(task_t task);
349
350static void task_synchronizer_destroy_all(task_t task);
351
352
353void
354task_set_64bit(
355 task_t task,
356 boolean_t is_64bit,
357 boolean_t is_64bit_data)
358{
359#if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
360 thread_t thread;
361#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm64__) */
362
363 task_lock(task);
364
365 /*
366 * Switching to/from 64-bit address spaces
367 */
368 if (is_64bit) {
369 if (!task_has_64Bit_addr(task)) {
370 task_set_64Bit_addr(task);
371 }
372 } else {
373 if (task_has_64Bit_addr(task)) {
374 task_clear_64Bit_addr(task);
375 }
376 }
377
378 /*
379 * Switching to/from 64-bit register state.
380 */
381 if (is_64bit_data) {
382 if (task_has_64Bit_data(task))
383 goto out;
384
385 task_set_64Bit_data(task);
386 } else {
387 if ( !task_has_64Bit_data(task))
388 goto out;
389
390 task_clear_64Bit_data(task);
391 }
392
393 /* FIXME: On x86, the thread save state flavor can diverge from the
394 * task's 64-bit feature flag due to the 32-bit/64-bit register save
395 * state dichotomy. Since we can be pre-empted in this interval,
396 * certain routines may observe the thread as being in an inconsistent
397 * state with respect to its task's 64-bitness.
398 */
399
400#if defined(__x86_64__) || defined(__arm64__)
401 queue_iterate(&task->threads, thread, thread_t, task_threads) {
402 thread_mtx_lock(thread);
403 machine_thread_switch_addrmode(thread);
404 thread_mtx_unlock(thread);
405
406#if defined(__arm64__)
407 /* specifically, if running on H9 */
408 if (thread == current_thread()) {
409 uint64_t arg1, arg2;
410 int urgency;
411 spl_t spl = splsched();
412 /*
413 * This call tell that the current thread changed it's 32bitness.
414 * Other thread were no more on core when 32bitness was changed,
415 * but current_thread() is on core and the previous call to
416 * machine_thread_going_on_core() gave 32bitness which is now wrong.
417 *
418 * This is needed for bring-up, a different callback should be used
419 * in the future.
420 *
421 * TODO: Remove this callout when we no longer support 32-bit code on H9
422 */
423 thread_lock(thread);
424 urgency = thread_get_urgency(thread, &arg1, &arg2);
425 machine_thread_going_on_core(thread, urgency, 0, 0, mach_approximate_time());
426 thread_unlock(thread);
427 splx(spl);
428 }
429#endif /* defined(__arm64__) */
430 }
431#endif /* defined(__x86_64__) || defined(__arm64__) */
432
433out:
434 task_unlock(task);
435}
436
437boolean_t
438task_get_64bit_data(task_t task)
439{
440 return task_has_64Bit_data(task);
441}
442
443void
444task_set_platform_binary(
445 task_t task,
446 boolean_t is_platform)
447{
448 task_lock(task);
449 if (is_platform) {
450 task->t_flags |= TF_PLATFORM;
451 } else {
452 task->t_flags &= ~(TF_PLATFORM);
453 }
454 task_unlock(task);
455}
456
457/*
458 * Set or clear per-task TF_CA_CLIENT_WI flag according to specified argument.
459 * Returns "false" if flag is already set, and "true" in other cases.
460 */
461bool
462task_set_ca_client_wi(
463 task_t task,
464 boolean_t set_or_clear)
465{
466 bool ret = true;
467 task_lock(task);
468 if (set_or_clear) {
469 /* Tasks can have only one CA_CLIENT work interval */
470 if (task->t_flags & TF_CA_CLIENT_WI)
471 ret = false;
472 else
473 task->t_flags |= TF_CA_CLIENT_WI;
474 } else {
475 task->t_flags &= ~TF_CA_CLIENT_WI;
476 }
477 task_unlock(task);
478 return ret;
479}
480
481void
482task_set_dyld_info(
483 task_t task,
484 mach_vm_address_t addr,
485 mach_vm_size_t size)
486{
487 task_lock(task);
488 task->all_image_info_addr = addr;
489 task->all_image_info_size = size;
490 task_unlock(task);
491}
492
493void
494task_atm_reset(__unused task_t task) {
495
496#if CONFIG_ATM
497 if (task->atm_context != NULL) {
498 atm_task_descriptor_destroy(task->atm_context);
499 task->atm_context = NULL;
500 }
501#endif
502
503}
504
505void
506task_bank_reset(__unused task_t task) {
507
508 if (task->bank_context != NULL) {
509 bank_task_destroy(task);
510 }
511}
512
513/*
514 * NOTE: This should only be called when the P_LINTRANSIT
515 * flag is set (the proc_trans lock is held) on the
516 * proc associated with the task.
517 */
518void
519task_bank_init(__unused task_t task) {
520
521 if (task->bank_context != NULL) {
522 panic("Task bank init called with non null bank context for task: %p and bank_context: %p", task, task->bank_context);
523 }
524 bank_task_initialize(task);
525}
526
527void
528task_set_did_exec_flag(task_t task)
529{
530 task->t_procflags |= TPF_DID_EXEC;
531}
532
533void
534task_clear_exec_copy_flag(task_t task)
535{
536 task->t_procflags &= ~TPF_EXEC_COPY;
537}
538
539/*
540 * This wait event is t_procflags instead of t_flags because t_flags is volatile
541 *
542 * TODO: store the flags in the same place as the event
543 * rdar://problem/28501994
544 */
545event_t
546task_get_return_wait_event(task_t task)
547{
548 return (event_t)&task->t_procflags;
549}
550
551void
552task_clear_return_wait(task_t task)
553{
554 task_lock(task);
555
556 task->t_flags &= ~TF_LRETURNWAIT;
557
558 if (task->t_flags & TF_LRETURNWAITER) {
559 thread_wakeup(task_get_return_wait_event(task));
560 task->t_flags &= ~TF_LRETURNWAITER;
561 }
562
563 task_unlock(task);
564}
565
566void __attribute__((noreturn))
567task_wait_to_return(void)
568{
569 task_t task;
570
571 task = current_task();
572 task_lock(task);
573
574 if (task->t_flags & TF_LRETURNWAIT) {
575 do {
576 task->t_flags |= TF_LRETURNWAITER;
577 assert_wait(task_get_return_wait_event(task), THREAD_UNINT);
578 task_unlock(task);
579
580 thread_block(THREAD_CONTINUE_NULL);
581
582 task_lock(task);
583 } while (task->t_flags & TF_LRETURNWAIT);
584 }
585
586 task_unlock(task);
587
588#if CONFIG_MACF
589 /*
590 * Before jumping to userspace and allowing this process to execute any code,
591 * notify any interested parties.
592 */
593 mac_proc_notify_exec_complete(current_proc());
594#endif
595
596 thread_bootstrap_return();
597}
598
599#ifdef CONFIG_32BIT_TELEMETRY
600boolean_t
601task_consume_32bit_log_flag(task_t task)
602{
603 if ((task->t_procflags & TPF_LOG_32BIT_TELEMETRY) != 0) {
604 task->t_procflags &= ~TPF_LOG_32BIT_TELEMETRY;
605 return TRUE;
606 } else {
607 return FALSE;
608 }
609}
610
611void
612task_set_32bit_log_flag(task_t task)
613{
614 task->t_procflags |= TPF_LOG_32BIT_TELEMETRY;
615}
616#endif /* CONFIG_32BIT_TELEMETRY */
617
618boolean_t
619task_is_exec_copy(task_t task)
620{
621 return task_is_exec_copy_internal(task);
622}
623
624boolean_t
625task_did_exec(task_t task)
626{
627 return task_did_exec_internal(task);
628}
629
630boolean_t
631task_is_active(task_t task)
632{
633 return task->active;
634}
635
636boolean_t
637task_is_halting(task_t task)
638{
639 return task->halting;
640}
641
642#if TASK_REFERENCE_LEAK_DEBUG
643#include <kern/btlog.h>
644
645static btlog_t *task_ref_btlog;
646#define TASK_REF_OP_INCR 0x1
647#define TASK_REF_OP_DECR 0x2
648
649#define TASK_REF_NUM_RECORDS 100000
650#define TASK_REF_BTDEPTH 7
651
652void
653task_reference_internal(task_t task)
654{
655 void * bt[TASK_REF_BTDEPTH];
656 int numsaved = 0;
657
658 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
659
660 (void)hw_atomic_add(&(task)->ref_count, 1);
661 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR,
662 bt, numsaved);
663}
664
665uint32_t
666task_deallocate_internal(task_t task)
667{
668 void * bt[TASK_REF_BTDEPTH];
669 int numsaved = 0;
670
671 numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH);
672
673 btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR,
674 bt, numsaved);
675 return hw_atomic_sub(&(task)->ref_count, 1);
676}
677
678#endif /* TASK_REFERENCE_LEAK_DEBUG */
679
680void
681task_init(void)
682{
683
684 lck_grp_attr_setdefault(&task_lck_grp_attr);
685 lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr);
686 lck_attr_setdefault(&task_lck_attr);
687 lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr);
688 lck_mtx_init(&tasks_corpse_lock, &task_lck_grp, &task_lck_attr);
689
690 task_zone = zinit(
691 sizeof(struct task),
692 task_max * sizeof(struct task),
693 TASK_CHUNK * sizeof(struct task),
694 "tasks");
695
696 zone_change(task_zone, Z_NOENCRYPT, TRUE);
697
698#if CONFIG_EMBEDDED
699 task_watch_init();
700#endif /* CONFIG_EMBEDDED */
701
702 /*
703 * Configure per-task memory limit.
704 * The boot-arg is interpreted as Megabytes,
705 * and takes precedence over the device tree.
706 * Setting the boot-arg to 0 disables task limits.
707 */
708 if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint_mb,
709 sizeof (max_task_footprint_mb))) {
710 /*
711 * No limit was found in boot-args, so go look in the device tree.
712 */
713 if (!PE_get_default("kern.max_task_pmem", &max_task_footprint_mb,
714 sizeof(max_task_footprint_mb))) {
715 /*
716 * No limit was found in device tree.
717 */
718 max_task_footprint_mb = 0;
719 }
720 }
721
722 if (max_task_footprint_mb != 0) {
723#if CONFIG_MEMORYSTATUS
724 if (max_task_footprint_mb < 50) {
725 printf("Warning: max_task_pmem %d below minimum.\n",
726 max_task_footprint_mb);
727 max_task_footprint_mb = 50;
728 }
729 printf("Limiting task physical memory footprint to %d MB\n",
730 max_task_footprint_mb);
731
732 max_task_footprint = (ledger_amount_t)max_task_footprint_mb * 1024 * 1024; // Convert MB to bytes
733
734 /*
735 * Configure the per-task memory limit warning level.
736 * This is computed as a percentage.
737 */
738 max_task_footprint_warning_level = 0;
739
740 if (max_mem < 0x40000000) {
741 /*
742 * On devices with < 1GB of memory:
743 * -- set warnings to 50MB below the per-task limit.
744 */
745 if (max_task_footprint_mb > 50) {
746 max_task_footprint_warning_level = ((max_task_footprint_mb - 50) * 100) / max_task_footprint_mb;
747 }
748 } else {
749 /*
750 * On devices with >= 1GB of memory:
751 * -- set warnings to 100MB below the per-task limit.
752 */
753 if (max_task_footprint_mb > 100) {
754 max_task_footprint_warning_level = ((max_task_footprint_mb - 100) * 100) / max_task_footprint_mb;
755 }
756 }
757
758 /*
759 * Never allow warning level to land below the default.
760 */
761 if (max_task_footprint_warning_level < PHYS_FOOTPRINT_WARNING_LEVEL) {
762 max_task_footprint_warning_level = PHYS_FOOTPRINT_WARNING_LEVEL;
763 }
764
765 printf("Limiting task physical memory warning to %d%%\n", max_task_footprint_warning_level);
766
767#else
768 printf("Warning: max_task_pmem specified, but jetsam not configured; ignoring.\n");
769#endif /* CONFIG_MEMORYSTATUS */
770 }
771
772#if DEVELOPMENT || DEBUG
773 if (!PE_parse_boot_argn("exc_resource_threads",
774 &exc_resource_threads_enabled,
775 sizeof(exc_resource_threads_enabled))) {
776 exc_resource_threads_enabled = 1;
777 }
778 PE_parse_boot_argn("task_exc_guard_default",
779 &task_exc_guard_default,
780 sizeof(task_exc_guard_default));
781#endif /* DEVELOPMENT || DEBUG */
782
783#if CONFIG_COREDUMP
784 if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores,
785 sizeof (hwm_user_cores))) {
786 hwm_user_cores = 0;
787 }
788#endif
789
790 proc_init_cpumon_params();
791
792 if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) {
793 task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT;
794 }
795
796 if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) {
797 task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL;
798 }
799
800 if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct,
801 sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) {
802 task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER;
803 }
804
805 if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource,
806 sizeof (disable_exc_resource))) {
807 disable_exc_resource = 0;
808 }
809
810 if (!PE_parse_boot_argn("task_iomon_limit_mb", &task_iomon_limit_mb, sizeof (task_iomon_limit_mb))) {
811 task_iomon_limit_mb = IOMON_DEFAULT_LIMIT;
812 }
813
814 if (!PE_parse_boot_argn("task_iomon_interval_secs", &task_iomon_interval_secs, sizeof (task_iomon_interval_secs))) {
815 task_iomon_interval_secs = IOMON_DEFAULT_INTERVAL;
816 }
817
818 if (!PE_parse_boot_argn("io_telemetry_limit", &io_telemetry_limit, sizeof (io_telemetry_limit))) {
819 io_telemetry_limit = IO_TELEMETRY_DEFAULT_LIMIT;
820 }
821
822/*
823 * If we have coalitions, coalition_init() will call init_task_ledgers() as it
824 * sets up the ledgers for the default coalition. If we don't have coalitions,
825 * then we have to call it now.
826 */
827#if CONFIG_COALITIONS
828 assert(task_ledger_template);
829#else /* CONFIG_COALITIONS */
830 init_task_ledgers();
831#endif /* CONFIG_COALITIONS */
832
833#if TASK_REFERENCE_LEAK_DEBUG
834 task_ref_btlog = btlog_create(TASK_REF_NUM_RECORDS, TASK_REF_BTDEPTH, TRUE /* caller_will_remove_entries_for_element? */);
835 assert(task_ref_btlog);
836#endif
837
838 /*
839 * Create the kernel task as the first task.
840 */
841#ifdef __LP64__
842 if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TRUE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
843#else
844 if (task_create_internal(TASK_NULL, NULL, FALSE, FALSE, FALSE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)
845#endif
846 panic("task_init\n");
847
848
849 vm_map_deallocate(kernel_task->map);
850 kernel_task->map = kernel_map;
851 lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr);
852}
853
854/*
855 * Create a task running in the kernel address space. It may
856 * have its own map of size mem_size and may have ipc privileges.
857 */
858kern_return_t
859kernel_task_create(
860 __unused task_t parent_task,
861 __unused vm_offset_t map_base,
862 __unused vm_size_t map_size,
863 __unused task_t *child_task)
864{
865 return (KERN_INVALID_ARGUMENT);
866}
867
868kern_return_t
869task_create(
870 task_t parent_task,
871 __unused ledger_port_array_t ledger_ports,
872 __unused mach_msg_type_number_t num_ledger_ports,
873 __unused boolean_t inherit_memory,
874 __unused task_t *child_task) /* OUT */
875{
876 if (parent_task == TASK_NULL)
877 return(KERN_INVALID_ARGUMENT);
878
879 /*
880 * No longer supported: too many calls assume that a task has a valid
881 * process attached.
882 */
883 return(KERN_FAILURE);
884}
885
886kern_return_t
887host_security_create_task_token(
888 host_security_t host_security,
889 task_t parent_task,
890 __unused security_token_t sec_token,
891 __unused audit_token_t audit_token,
892 __unused host_priv_t host_priv,
893 __unused ledger_port_array_t ledger_ports,
894 __unused mach_msg_type_number_t num_ledger_ports,
895 __unused boolean_t inherit_memory,
896 __unused task_t *child_task) /* OUT */
897{
898 if (parent_task == TASK_NULL)
899 return(KERN_INVALID_ARGUMENT);
900
901 if (host_security == HOST_NULL)
902 return(KERN_INVALID_SECURITY);
903
904 /*
905 * No longer supported.
906 */
907 return(KERN_FAILURE);
908}
909
910/*
911 * Task ledgers
912 * ------------
913 *
914 * phys_footprint
915 * Physical footprint: This is the sum of:
916 * + (internal - alternate_accounting)
917 * + (internal_compressed - alternate_accounting_compressed)
918 * + iokit_mapped
919 * + purgeable_nonvolatile
920 * + purgeable_nonvolatile_compressed
921 * + page_table
922 *
923 * internal
924 * The task's anonymous memory, which on iOS is always resident.
925 *
926 * internal_compressed
927 * Amount of this task's internal memory which is held by the compressor.
928 * Such memory is no longer actually resident for the task [i.e., resident in its pmap],
929 * and could be either decompressed back into memory, or paged out to storage, depending
930 * on our implementation.
931 *
932 * iokit_mapped
933 * IOKit mappings: The total size of all IOKit mappings in this task, regardless of
934 clean/dirty or internal/external state].
935 *
936 * alternate_accounting
937 * The number of internal dirty pages which are part of IOKit mappings. By definition, these pages
938 * are counted in both internal *and* iokit_mapped, so we must subtract them from the total to avoid
939 * double counting.
940 */
941void
942init_task_ledgers(void)
943{
944 ledger_template_t t;
945
946 assert(task_ledger_template == NULL);
947 assert(kernel_task == TASK_NULL);
948
949#if MACH_ASSERT
950 PE_parse_boot_argn("pmap_ledgers_panic",
951 &pmap_ledgers_panic,
952 sizeof (pmap_ledgers_panic));
953 PE_parse_boot_argn("pmap_ledgers_panic_leeway",
954 &pmap_ledgers_panic_leeway,
955 sizeof (pmap_ledgers_panic_leeway));
956#endif /* MACH_ASSERT */
957
958 if ((t = ledger_template_create("Per-task ledger")) == NULL)
959 panic("couldn't create task ledger template");
960
961 task_ledgers.cpu_time = ledger_entry_add(t, "cpu_time", "sched", "ns");
962 task_ledgers.tkm_private = ledger_entry_add(t, "tkm_private",
963 "physmem", "bytes");
964 task_ledgers.tkm_shared = ledger_entry_add(t, "tkm_shared", "physmem",
965 "bytes");
966 task_ledgers.phys_mem = ledger_entry_add(t, "phys_mem", "physmem",
967 "bytes");
968 task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem",
969 "bytes");
970 task_ledgers.internal = ledger_entry_add(t, "internal", "physmem",
971 "bytes");
972 task_ledgers.iokit_mapped = ledger_entry_add(t, "iokit_mapped", "mappings",
973 "bytes");
974 task_ledgers.alternate_accounting = ledger_entry_add(t, "alternate_accounting", "physmem",
975 "bytes");
976 task_ledgers.alternate_accounting_compressed = ledger_entry_add(t, "alternate_accounting_compressed", "physmem",
977 "bytes");
978 task_ledgers.page_table = ledger_entry_add(t, "page_table", "physmem",
979 "bytes");
980 task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem",
981 "bytes");
982 task_ledgers.internal_compressed = ledger_entry_add(t, "internal_compressed", "physmem",
983 "bytes");
984 task_ledgers.purgeable_volatile = ledger_entry_add(t, "purgeable_volatile", "physmem", "bytes");
985 task_ledgers.purgeable_nonvolatile = ledger_entry_add(t, "purgeable_nonvolatile", "physmem", "bytes");
986 task_ledgers.purgeable_volatile_compressed = ledger_entry_add(t, "purgeable_volatile_compress", "physmem", "bytes");
987 task_ledgers.purgeable_nonvolatile_compressed = ledger_entry_add(t, "purgeable_nonvolatile_compress", "physmem", "bytes");
988
989 task_ledgers.network_volatile = ledger_entry_add(t, "network_volatile", "physmem", "bytes");
990 task_ledgers.network_nonvolatile = ledger_entry_add(t, "network_nonvolatile", "physmem", "bytes");
991 task_ledgers.network_volatile_compressed = ledger_entry_add(t, "network_volatile_compressed", "physmem", "bytes");
992 task_ledgers.network_nonvolatile_compressed = ledger_entry_add(t, "network_nonvolatile_compressed", "physmem", "bytes");
993
994 task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power",
995 "count");
996 task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power",
997 "count");
998
999#if CONFIG_SCHED_SFI
1000 sfi_class_id_t class_id, ledger_alias;
1001 for (class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1002 task_ledgers.sfi_wait_times[class_id] = -1;
1003 }
1004
1005 /* don't account for UNSPECIFIED */
1006 for (class_id = SFI_CLASS_UNSPECIFIED + 1; class_id < MAX_SFI_CLASS_ID; class_id++) {
1007 ledger_alias = sfi_get_ledger_alias_for_class(class_id);
1008 if (ledger_alias != SFI_CLASS_UNSPECIFIED) {
1009 /* Check to see if alias has been registered yet */
1010 if (task_ledgers.sfi_wait_times[ledger_alias] != -1) {
1011 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias];
1012 } else {
1013 /* Otherwise, initialize it first */
1014 task_ledgers.sfi_wait_times[class_id] = task_ledgers.sfi_wait_times[ledger_alias] = sfi_ledger_entry_add(t, ledger_alias);
1015 }
1016 } else {
1017 task_ledgers.sfi_wait_times[class_id] = sfi_ledger_entry_add(t, class_id);
1018 }
1019
1020 if (task_ledgers.sfi_wait_times[class_id] < 0) {
1021 panic("couldn't create entries for task ledger template for SFI class 0x%x", class_id);
1022 }
1023 }
1024
1025 assert(task_ledgers.sfi_wait_times[MAX_SFI_CLASS_ID -1] != -1);
1026#endif /* CONFIG_SCHED_SFI */
1027
1028 task_ledgers.cpu_time_billed_to_me = ledger_entry_add(t, "cpu_time_billed_to_me", "sched", "ns");
1029 task_ledgers.cpu_time_billed_to_others = ledger_entry_add(t, "cpu_time_billed_to_others", "sched", "ns");
1030 task_ledgers.physical_writes = ledger_entry_add(t, "physical_writes", "res", "bytes");
1031 task_ledgers.logical_writes = ledger_entry_add(t, "logical_writes", "res", "bytes");
1032 task_ledgers.energy_billed_to_me = ledger_entry_add(t, "energy_billed_to_me", "power", "nj");
1033 task_ledgers.energy_billed_to_others = ledger_entry_add(t, "energy_billed_to_others", "power", "nj");
1034
1035 if ((task_ledgers.cpu_time < 0) ||
1036 (task_ledgers.tkm_private < 0) ||
1037 (task_ledgers.tkm_shared < 0) ||
1038 (task_ledgers.phys_mem < 0) ||
1039 (task_ledgers.wired_mem < 0) ||
1040 (task_ledgers.internal < 0) ||
1041 (task_ledgers.iokit_mapped < 0) ||
1042 (task_ledgers.alternate_accounting < 0) ||
1043 (task_ledgers.alternate_accounting_compressed < 0) ||
1044 (task_ledgers.page_table < 0) ||
1045 (task_ledgers.phys_footprint < 0) ||
1046 (task_ledgers.internal_compressed < 0) ||
1047 (task_ledgers.purgeable_volatile < 0) ||
1048 (task_ledgers.purgeable_nonvolatile < 0) ||
1049 (task_ledgers.purgeable_volatile_compressed < 0) ||
1050 (task_ledgers.purgeable_nonvolatile_compressed < 0) ||
1051 (task_ledgers.network_volatile < 0) ||
1052 (task_ledgers.network_nonvolatile < 0) ||
1053 (task_ledgers.network_volatile_compressed < 0) ||
1054 (task_ledgers.network_nonvolatile_compressed < 0) ||
1055 (task_ledgers.platform_idle_wakeups < 0) ||
1056 (task_ledgers.interrupt_wakeups < 0) ||
1057 (task_ledgers.cpu_time_billed_to_me < 0) || (task_ledgers.cpu_time_billed_to_others < 0) ||
1058 (task_ledgers.physical_writes < 0) ||
1059 (task_ledgers.logical_writes < 0) ||
1060 (task_ledgers.energy_billed_to_me < 0) ||
1061 (task_ledgers.energy_billed_to_others < 0)
1062 ) {
1063 panic("couldn't create entries for task ledger template");
1064 }
1065
1066 ledger_track_credit_only(t, task_ledgers.phys_footprint);
1067 ledger_track_credit_only(t, task_ledgers.page_table);
1068 ledger_track_credit_only(t, task_ledgers.internal);
1069 ledger_track_credit_only(t, task_ledgers.internal_compressed);
1070 ledger_track_credit_only(t, task_ledgers.iokit_mapped);
1071 ledger_track_credit_only(t, task_ledgers.alternate_accounting);
1072 ledger_track_credit_only(t, task_ledgers.alternate_accounting_compressed);
1073 ledger_track_credit_only(t, task_ledgers.purgeable_volatile);
1074 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile);
1075 ledger_track_credit_only(t, task_ledgers.purgeable_volatile_compressed);
1076 ledger_track_credit_only(t, task_ledgers.purgeable_nonvolatile_compressed);
1077
1078 ledger_track_credit_only(t, task_ledgers.network_volatile);
1079 ledger_track_credit_only(t, task_ledgers.network_nonvolatile);
1080 ledger_track_credit_only(t, task_ledgers.network_volatile_compressed);
1081 ledger_track_credit_only(t, task_ledgers.network_nonvolatile_compressed);
1082
1083 ledger_track_maximum(t, task_ledgers.phys_footprint, 60);
1084#if MACH_ASSERT
1085 if (pmap_ledgers_panic) {
1086 ledger_panic_on_negative(t, task_ledgers.phys_footprint);
1087 ledger_panic_on_negative(t, task_ledgers.page_table);
1088 ledger_panic_on_negative(t, task_ledgers.internal);
1089 ledger_panic_on_negative(t, task_ledgers.internal_compressed);
1090 ledger_panic_on_negative(t, task_ledgers.iokit_mapped);
1091 ledger_panic_on_negative(t, task_ledgers.alternate_accounting);
1092 ledger_panic_on_negative(t, task_ledgers.alternate_accounting_compressed);
1093 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile);
1094 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile);
1095 ledger_panic_on_negative(t, task_ledgers.purgeable_volatile_compressed);
1096 ledger_panic_on_negative(t, task_ledgers.purgeable_nonvolatile_compressed);
1097
1098 ledger_panic_on_negative(t, task_ledgers.network_volatile);
1099 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile);
1100 ledger_panic_on_negative(t, task_ledgers.network_volatile_compressed);
1101 ledger_panic_on_negative(t, task_ledgers.network_nonvolatile_compressed);
1102 }
1103#endif /* MACH_ASSERT */
1104
1105#if CONFIG_MEMORYSTATUS
1106 ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL);
1107#endif /* CONFIG_MEMORYSTATUS */
1108
1109 ledger_set_callback(t, task_ledgers.interrupt_wakeups,
1110 task_wakeups_rate_exceeded, NULL, NULL);
1111 ledger_set_callback(t, task_ledgers.physical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_PHYSICAL_WRITES, NULL);
1112 ledger_set_callback(t, task_ledgers.logical_writes, task_io_rate_exceeded, (void *)FLAVOR_IO_LOGICAL_WRITES, NULL);
1113
1114 ledger_template_complete(t);
1115 task_ledger_template = t;
1116}
1117
1118kern_return_t
1119task_create_internal(
1120 task_t parent_task,
1121 coalition_t *parent_coalitions __unused,
1122 boolean_t inherit_memory,
1123 __unused boolean_t is_64bit,
1124 boolean_t is_64bit_data,
1125 uint32_t t_flags,
1126 uint32_t t_procflags,
1127 task_t *child_task) /* OUT */
1128{
1129 task_t new_task;
1130 vm_shared_region_t shared_region;
1131 ledger_t ledger = NULL;
1132
1133 new_task = (task_t) zalloc(task_zone);
1134
1135 if (new_task == TASK_NULL)
1136 return(KERN_RESOURCE_SHORTAGE);
1137
1138 /* one ref for just being alive; one for our caller */
1139 new_task->ref_count = 2;
1140
1141 /* allocate with active entries */
1142 assert(task_ledger_template != NULL);
1143 if ((ledger = ledger_instantiate(task_ledger_template,
1144 LEDGER_CREATE_ACTIVE_ENTRIES)) == NULL) {
1145 zfree(task_zone, new_task);
1146 return(KERN_RESOURCE_SHORTAGE);
1147 }
1148
1149
1150 new_task->ledger = ledger;
1151
1152#if defined(CONFIG_SCHED_MULTIQ)
1153 new_task->sched_group = sched_group_create();
1154#endif
1155
1156 /* if inherit_memory is true, parent_task MUST not be NULL */
1157 if (!(t_flags & TF_CORPSE_FORK) && inherit_memory)
1158 new_task->map = vm_map_fork(ledger, parent_task->map, 0);
1159 else
1160 new_task->map = vm_map_create(pmap_create(ledger, 0, is_64bit),
1161 (vm_map_offset_t)(VM_MIN_ADDRESS),
1162 (vm_map_offset_t)(VM_MAX_ADDRESS), TRUE);
1163
1164 /* Inherit memlock limit from parent */
1165 if (parent_task)
1166 vm_map_set_user_wire_limit(new_task->map, (vm_size_t)parent_task->map->user_wire_limit);
1167
1168 lck_mtx_init(&new_task->lock, &task_lck_grp, &task_lck_attr);
1169 queue_init(&new_task->threads);
1170 new_task->suspend_count = 0;
1171 new_task->thread_count = 0;
1172 new_task->active_thread_count = 0;
1173 new_task->user_stop_count = 0;
1174 new_task->legacy_stop_count = 0;
1175 new_task->active = TRUE;
1176 new_task->halting = FALSE;
1177 new_task->priv_flags = 0;
1178 new_task->t_flags = t_flags;
1179 new_task->t_procflags = t_procflags;
1180 new_task->importance = 0;
1181 new_task->crashed_thread_id = 0;
1182 new_task->exec_token = 0;
1183
1184 new_task->task_exc_guard = task_exc_guard_default;
1185
1186#if CONFIG_ATM
1187 new_task->atm_context = NULL;
1188#endif
1189 new_task->bank_context = NULL;
1190
1191#ifdef MACH_BSD
1192 new_task->bsd_info = NULL;
1193 new_task->corpse_info = NULL;
1194#endif /* MACH_BSD */
1195
1196#if CONFIG_MACF
1197 new_task->crash_label = NULL;
1198#endif
1199
1200#if CONFIG_MEMORYSTATUS
1201 if (max_task_footprint != 0) {
1202 ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL);
1203 }
1204#endif /* CONFIG_MEMORYSTATUS */
1205
1206 if (task_wakeups_monitor_rate != 0) {
1207 uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS;
1208 int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS
1209 task_wakeups_monitor_ctl(new_task, &flags, &rate);
1210 }
1211
1212#if CONFIG_IO_ACCOUNTING
1213 uint32_t flags = IOMON_ENABLE;
1214 task_io_monitor_ctl(new_task, &flags);
1215#endif /* CONFIG_IO_ACCOUNTING */
1216
1217 machine_task_init(new_task, parent_task, inherit_memory);
1218
1219 new_task->task_debug = NULL;
1220
1221#if DEVELOPMENT || DEBUG
1222 new_task->task_unnested = FALSE;
1223 new_task->task_disconnected_count = 0;
1224#endif
1225 queue_init(&new_task->semaphore_list);
1226 new_task->semaphores_owned = 0;
1227
1228 ipc_task_init(new_task, parent_task);
1229
1230 new_task->vtimers = 0;
1231
1232 new_task->shared_region = NULL;
1233
1234 new_task->affinity_space = NULL;
1235
1236 new_task->t_kpc = 0;
1237
1238 new_task->pidsuspended = FALSE;
1239 new_task->frozen = FALSE;
1240 new_task->changing_freeze_state = FALSE;
1241 new_task->rusage_cpu_flags = 0;
1242 new_task->rusage_cpu_percentage = 0;
1243 new_task->rusage_cpu_interval = 0;
1244 new_task->rusage_cpu_deadline = 0;
1245 new_task->rusage_cpu_callt = NULL;
1246#if MACH_ASSERT
1247 new_task->suspends_outstanding = 0;
1248#endif
1249
1250#if HYPERVISOR
1251 new_task->hv_task_target = NULL;
1252#endif /* HYPERVISOR */
1253
1254#if CONFIG_EMBEDDED
1255 queue_init(&new_task->task_watchers);
1256 new_task->num_taskwatchers = 0;
1257 new_task->watchapplying = 0;
1258#endif /* CONFIG_EMBEDDED */
1259
1260 new_task->mem_notify_reserved = 0;
1261 new_task->memlimit_attrs_reserved = 0;
1262
1263 new_task->requested_policy = default_task_requested_policy;
1264 new_task->effective_policy = default_task_effective_policy;
1265
1266 task_importance_init_from_parent(new_task, parent_task);
1267
1268 if (parent_task != TASK_NULL) {
1269 new_task->sec_token = parent_task->sec_token;
1270 new_task->audit_token = parent_task->audit_token;
1271
1272 /* inherit the parent's shared region */
1273 shared_region = vm_shared_region_get(parent_task);
1274 vm_shared_region_set(new_task, shared_region);
1275
1276 if(task_has_64Bit_addr(parent_task)) {
1277 task_set_64Bit_addr(new_task);
1278 }
1279
1280 if(task_has_64Bit_data(parent_task)) {
1281 task_set_64Bit_data(new_task);
1282 }
1283
1284 new_task->all_image_info_addr = parent_task->all_image_info_addr;
1285 new_task->all_image_info_size = parent_task->all_image_info_size;
1286
1287 if (inherit_memory && parent_task->affinity_space)
1288 task_affinity_create(parent_task, new_task);
1289
1290 new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task);
1291
1292 new_task->priority = BASEPRI_DEFAULT;
1293 new_task->max_priority = MAXPRI_USER;
1294
1295 task_policy_create(new_task, parent_task);
1296 } else {
1297 new_task->sec_token = KERNEL_SECURITY_TOKEN;
1298 new_task->audit_token = KERNEL_AUDIT_TOKEN;
1299#ifdef __LP64__
1300 if(is_64bit) {
1301 task_set_64Bit_addr(new_task);
1302 }
1303#endif
1304
1305 if(is_64bit_data) {
1306 task_set_64Bit_data(new_task);
1307 }
1308
1309 new_task->all_image_info_addr = (mach_vm_address_t)0;
1310 new_task->all_image_info_size = (mach_vm_size_t)0;
1311
1312 new_task->pset_hint = PROCESSOR_SET_NULL;
1313
1314 if (kernel_task == TASK_NULL) {
1315 new_task->priority = BASEPRI_KERNEL;
1316 new_task->max_priority = MAXPRI_KERNEL;
1317 } else {
1318 new_task->priority = BASEPRI_DEFAULT;
1319 new_task->max_priority = MAXPRI_USER;
1320 }
1321 }
1322
1323 bzero(new_task->coalition, sizeof(new_task->coalition));
1324 for (int i = 0; i < COALITION_NUM_TYPES; i++)
1325 queue_chain_init(new_task->task_coalition[i]);
1326
1327 /* Allocate I/O Statistics */
1328 new_task->task_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1329 assert(new_task->task_io_stats != NULL);
1330 bzero(new_task->task_io_stats, sizeof(struct io_stat_info));
1331
1332 bzero(&(new_task->cpu_time_eqos_stats), sizeof(new_task->cpu_time_eqos_stats));
1333 bzero(&(new_task->cpu_time_rqos_stats), sizeof(new_task->cpu_time_rqos_stats));
1334
1335 bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics));
1336
1337 /* Copy resource acc. info from Parent for Corpe Forked task. */
1338 if (parent_task != NULL && (t_flags & TF_CORPSE_FORK)) {
1339 task_rollup_accounting_info(new_task, parent_task);
1340 } else {
1341 /* Initialize to zero for standard fork/spawn case */
1342 new_task->total_user_time = 0;
1343 new_task->total_system_time = 0;
1344 new_task->total_ptime = 0;
1345 new_task->total_runnable_time = 0;
1346 new_task->faults = 0;
1347 new_task->pageins = 0;
1348 new_task->cow_faults = 0;
1349 new_task->messages_sent = 0;
1350 new_task->messages_received = 0;
1351 new_task->syscalls_mach = 0;
1352 new_task->syscalls_unix = 0;
1353 new_task->c_switch = 0;
1354 new_task->p_switch = 0;
1355 new_task->ps_switch = 0;
1356 new_task->low_mem_notified_warn = 0;
1357 new_task->low_mem_notified_critical = 0;
1358 new_task->purged_memory_warn = 0;
1359 new_task->purged_memory_critical = 0;
1360 new_task->low_mem_privileged_listener = 0;
1361 new_task->memlimit_is_active = 0;
1362 new_task->memlimit_is_fatal = 0;
1363 new_task->memlimit_active_exc_resource = 0;
1364 new_task->memlimit_inactive_exc_resource = 0;
1365 new_task->task_timer_wakeups_bin_1 = 0;
1366 new_task->task_timer_wakeups_bin_2 = 0;
1367 new_task->task_gpu_ns = 0;
1368 new_task->task_immediate_writes = 0;
1369 new_task->task_deferred_writes = 0;
1370 new_task->task_invalidated_writes = 0;
1371 new_task->task_metadata_writes = 0;
1372 new_task->task_energy = 0;
1373#if MONOTONIC
1374 memset(&new_task->task_monotonic, 0, sizeof(new_task->task_monotonic));
1375#endif /* MONOTONIC */
1376 }
1377
1378
1379#if CONFIG_COALITIONS
1380 if (!(t_flags & TF_CORPSE_FORK)) {
1381 /* TODO: there is no graceful failure path here... */
1382 if (parent_coalitions && parent_coalitions[COALITION_TYPE_RESOURCE]) {
1383 coalitions_adopt_task(parent_coalitions, new_task);
1384 } else if (parent_task && parent_task->coalition[COALITION_TYPE_RESOURCE]) {
1385 /*
1386 * all tasks at least have a resource coalition, so
1387 * if the parent has one then inherit all coalitions
1388 * the parent is a part of
1389 */
1390 coalitions_adopt_task(parent_task->coalition, new_task);
1391 } else {
1392 /* TODO: assert that new_task will be PID 1 (launchd) */
1393 coalitions_adopt_init_task(new_task);
1394 }
1395 /*
1396 * on exec, we need to transfer the coalition roles from the
1397 * parent task to the exec copy task.
1398 */
1399 if (parent_task && (t_procflags & TPF_EXEC_COPY)) {
1400 int coal_roles[COALITION_NUM_TYPES];
1401 task_coalition_roles(parent_task, coal_roles);
1402 (void)coalitions_set_roles(new_task->coalition, new_task, coal_roles);
1403 }
1404 } else {
1405 coalitions_adopt_corpse_task(new_task);
1406 }
1407
1408 if (new_task->coalition[COALITION_TYPE_RESOURCE] == COALITION_NULL) {
1409 panic("created task is not a member of a resource coalition");
1410 }
1411#endif /* CONFIG_COALITIONS */
1412
1413 new_task->dispatchqueue_offset = 0;
1414 if (parent_task != NULL) {
1415 new_task->dispatchqueue_offset = parent_task->dispatchqueue_offset;
1416 }
1417
1418 new_task->task_volatile_objects = 0;
1419 new_task->task_nonvolatile_objects = 0;
1420 new_task->task_purgeable_disowning = FALSE;
1421 new_task->task_purgeable_disowned = FALSE;
1422 queue_init(&new_task->task_objq);
1423 task_objq_lock_init(new_task);
1424
1425#if __arm64__
1426 new_task->task_legacy_footprint = FALSE;
1427#endif /* __arm64__ */
1428 new_task->task_region_footprint = FALSE;
1429 new_task->task_has_crossed_thread_limit = FALSE;
1430 new_task->task_thread_limit = 0;
1431#if CONFIG_SECLUDED_MEMORY
1432 new_task->task_can_use_secluded_mem = FALSE;
1433 new_task->task_could_use_secluded_mem = FALSE;
1434 new_task->task_could_also_use_secluded_mem = FALSE;
1435 new_task->task_suppressed_secluded = FALSE;
1436#endif /* CONFIG_SECLUDED_MEMORY */
1437
1438 /*
1439 * t_flags is set up above. But since we don't
1440 * support darkwake mode being set that way
1441 * currently, we clear it out here explicitly.
1442 */
1443 new_task->t_flags &= ~(TF_DARKWAKE_MODE);
1444
1445 queue_init(&new_task->io_user_clients);
1446
1447 ipc_task_enable(new_task);
1448
1449 lck_mtx_lock(&tasks_threads_lock);
1450 queue_enter(&tasks, new_task, task_t, tasks);
1451 tasks_count++;
1452 if (tasks_suspend_state) {
1453 task_suspend_internal(new_task);
1454 }
1455 lck_mtx_unlock(&tasks_threads_lock);
1456
1457 *child_task = new_task;
1458 return(KERN_SUCCESS);
1459}
1460
1461/*
1462 * task_rollup_accounting_info
1463 *
1464 * Roll up accounting stats. Used to rollup stats
1465 * for exec copy task and corpse fork.
1466 */
1467void
1468task_rollup_accounting_info(task_t to_task, task_t from_task)
1469{
1470 assert(from_task != to_task);
1471
1472 to_task->total_user_time = from_task->total_user_time;
1473 to_task->total_system_time = from_task->total_system_time;
1474 to_task->total_ptime = from_task->total_ptime;
1475 to_task->total_runnable_time = from_task->total_runnable_time;
1476 to_task->faults = from_task->faults;
1477 to_task->pageins = from_task->pageins;
1478 to_task->cow_faults = from_task->cow_faults;
1479 to_task->messages_sent = from_task->messages_sent;
1480 to_task->messages_received = from_task->messages_received;
1481 to_task->syscalls_mach = from_task->syscalls_mach;
1482 to_task->syscalls_unix = from_task->syscalls_unix;
1483 to_task->c_switch = from_task->c_switch;
1484 to_task->p_switch = from_task->p_switch;
1485 to_task->ps_switch = from_task->ps_switch;
1486 to_task->extmod_statistics = from_task->extmod_statistics;
1487 to_task->low_mem_notified_warn = from_task->low_mem_notified_warn;
1488 to_task->low_mem_notified_critical = from_task->low_mem_notified_critical;
1489 to_task->purged_memory_warn = from_task->purged_memory_warn;
1490 to_task->purged_memory_critical = from_task->purged_memory_critical;
1491 to_task->low_mem_privileged_listener = from_task->low_mem_privileged_listener;
1492 *to_task->task_io_stats = *from_task->task_io_stats;
1493 to_task->cpu_time_eqos_stats = from_task->cpu_time_eqos_stats;
1494 to_task->cpu_time_rqos_stats = from_task->cpu_time_rqos_stats;
1495 to_task->task_timer_wakeups_bin_1 = from_task->task_timer_wakeups_bin_1;
1496 to_task->task_timer_wakeups_bin_2 = from_task->task_timer_wakeups_bin_2;
1497 to_task->task_gpu_ns = from_task->task_gpu_ns;
1498 to_task->task_immediate_writes = from_task->task_immediate_writes;
1499 to_task->task_deferred_writes = from_task->task_deferred_writes;
1500 to_task->task_invalidated_writes = from_task->task_invalidated_writes;
1501 to_task->task_metadata_writes = from_task->task_metadata_writes;
1502 to_task->task_energy = from_task->task_energy;
1503
1504 /* Skip ledger roll up for memory accounting entries */
1505 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time);
1506 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.platform_idle_wakeups);
1507 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.interrupt_wakeups);
1508#if CONFIG_SCHED_SFI
1509 for (sfi_class_id_t class_id = SFI_CLASS_UNSPECIFIED; class_id < MAX_SFI_CLASS_ID; class_id++) {
1510 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.sfi_wait_times[class_id]);
1511 }
1512#endif
1513 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_me);
1514 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.cpu_time_billed_to_others);
1515 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.physical_writes);
1516 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.logical_writes);
1517 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_me);
1518 ledger_rollup_entry(to_task->ledger, from_task->ledger, task_ledgers.energy_billed_to_others);
1519}
1520
1521int task_dropped_imp_count = 0;
1522
1523/*
1524 * task_deallocate:
1525 *
1526 * Drop a reference on a task.
1527 */
1528void
1529task_deallocate(
1530 task_t task)
1531{
1532 ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups;
1533 uint32_t refs;
1534
1535 if (task == TASK_NULL)
1536 return;
1537
1538 refs = task_deallocate_internal(task);
1539
1540#if IMPORTANCE_INHERITANCE
1541 if (refs > 1)
1542 return;
1543
1544 atomic_load_explicit(&task->ref_count, memory_order_acquire);
1545
1546 if (refs == 1) {
1547 /*
1548 * If last ref potentially comes from the task's importance,
1549 * disconnect it. But more task refs may be added before
1550 * that completes, so wait for the reference to go to zero
1551 * naturually (it may happen on a recursive task_deallocate()
1552 * from the ipc_importance_disconnect_task() call).
1553 */
1554 if (IIT_NULL != task->task_imp_base)
1555 ipc_importance_disconnect_task(task);
1556 return;
1557 }
1558#else
1559 if (refs > 0)
1560 return;
1561
1562 atomic_load_explicit(&task->ref_count, memory_order_acquire);
1563
1564#endif /* IMPORTANCE_INHERITANCE */
1565
1566 lck_mtx_lock(&tasks_threads_lock);
1567 queue_remove(&terminated_tasks, task, task_t, tasks);
1568 terminated_tasks_count--;
1569 lck_mtx_unlock(&tasks_threads_lock);
1570
1571 /*
1572 * remove the reference on atm descriptor
1573 */
1574 task_atm_reset(task);
1575
1576 /*
1577 * remove the reference on bank context
1578 */
1579 task_bank_reset(task);
1580
1581 if (task->task_io_stats)
1582 kfree(task->task_io_stats, sizeof(struct io_stat_info));
1583
1584 /*
1585 * Give the machine dependent code a chance
1586 * to perform cleanup before ripping apart
1587 * the task.
1588 */
1589 machine_task_terminate(task);
1590
1591 ipc_task_terminate(task);
1592
1593 /* let iokit know */
1594 iokit_task_terminate(task);
1595
1596 if (task->affinity_space)
1597 task_affinity_deallocate(task);
1598
1599#if MACH_ASSERT
1600 if (task->ledger != NULL &&
1601 task->map != NULL &&
1602 task->map->pmap != NULL &&
1603 task->map->pmap->ledger != NULL) {
1604 assert(task->ledger == task->map->pmap->ledger);
1605 }
1606#endif /* MACH_ASSERT */
1607
1608 vm_purgeable_disown(task);
1609 assert(task->task_purgeable_disowned);
1610 if (task->task_volatile_objects != 0 ||
1611 task->task_nonvolatile_objects != 0) {
1612 panic("task_deallocate(%p): "
1613 "volatile_objects=%d nonvolatile_objects=%d\n",
1614 task,
1615 task->task_volatile_objects,
1616 task->task_nonvolatile_objects);
1617 }
1618
1619 vm_map_deallocate(task->map);
1620 is_release(task->itk_space);
1621
1622 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
1623 &interrupt_wakeups, &debit);
1624 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
1625 &platform_idle_wakeups, &debit);
1626
1627#if defined(CONFIG_SCHED_MULTIQ)
1628 sched_group_destroy(task->sched_group);
1629#endif
1630
1631 /* Accumulate statistics for dead tasks */
1632 lck_spin_lock(&dead_task_statistics_lock);
1633 dead_task_statistics.total_user_time += task->total_user_time;
1634 dead_task_statistics.total_system_time += task->total_system_time;
1635
1636 dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups;
1637 dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups;
1638
1639 dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1;
1640 dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2;
1641 dead_task_statistics.total_ptime += task->total_ptime;
1642 dead_task_statistics.total_pset_switches += task->ps_switch;
1643 dead_task_statistics.task_gpu_ns += task->task_gpu_ns;
1644 dead_task_statistics.task_energy += task->task_energy;
1645
1646 lck_spin_unlock(&dead_task_statistics_lock);
1647 lck_mtx_destroy(&task->lock, &task_lck_grp);
1648
1649 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_private, &credit,
1650 &debit)) {
1651 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_private.alloc);
1652 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_private.free);
1653 }
1654 if (!ledger_get_entries(task->ledger, task_ledgers.tkm_shared, &credit,
1655 &debit)) {
1656 OSAddAtomic64(credit, (int64_t *)&tasks_tkm_shared.alloc);
1657 OSAddAtomic64(debit, (int64_t *)&tasks_tkm_shared.free);
1658 }
1659 ledger_dereference(task->ledger);
1660
1661#if TASK_REFERENCE_LEAK_DEBUG
1662 btlog_remove_entries_for_element(task_ref_btlog, task);
1663#endif
1664
1665#if CONFIG_COALITIONS
1666 task_release_coalitions(task);
1667#endif /* CONFIG_COALITIONS */
1668
1669 bzero(task->coalition, sizeof(task->coalition));
1670
1671#if MACH_BSD
1672 /* clean up collected information since last reference to task is gone */
1673 if (task->corpse_info) {
1674 void *corpse_info_kernel = kcdata_memory_get_begin_addr(task->corpse_info);
1675 task_crashinfo_destroy(task->corpse_info);
1676 task->corpse_info = NULL;
1677 if (corpse_info_kernel) {
1678 kfree(corpse_info_kernel, CORPSEINFO_ALLOCATION_SIZE);
1679 }
1680 }
1681#endif
1682
1683#if CONFIG_MACF
1684 if (task->crash_label) {
1685 mac_exc_free_label(task->crash_label);
1686 task->crash_label = NULL;
1687 }
1688#endif
1689
1690 assert(queue_empty(&task->task_objq));
1691
1692 zfree(task_zone, task);
1693}
1694
1695/*
1696 * task_name_deallocate:
1697 *
1698 * Drop a reference on a task name.
1699 */
1700void
1701task_name_deallocate(
1702 task_name_t task_name)
1703{
1704 return(task_deallocate((task_t)task_name));
1705}
1706
1707/*
1708 * task_inspect_deallocate:
1709 *
1710 * Drop a task inspection reference.
1711 */
1712void
1713task_inspect_deallocate(
1714 task_inspect_t task_inspect)
1715{
1716 return(task_deallocate((task_t)task_inspect));
1717}
1718
1719/*
1720 * task_suspension_token_deallocate:
1721 *
1722 * Drop a reference on a task suspension token.
1723 */
1724void
1725task_suspension_token_deallocate(
1726 task_suspension_token_t token)
1727{
1728 return(task_deallocate((task_t)token));
1729}
1730
1731
1732/*
1733 * task_collect_crash_info:
1734 *
1735 * collect crash info from bsd and mach based data
1736 */
1737kern_return_t
1738task_collect_crash_info(
1739 task_t task,
1740#ifdef CONFIG_MACF
1741 struct label *crash_label,
1742#endif
1743 int is_corpse_fork)
1744{
1745 kern_return_t kr = KERN_SUCCESS;
1746
1747 kcdata_descriptor_t crash_data = NULL;
1748 kcdata_descriptor_t crash_data_release = NULL;
1749 mach_msg_type_number_t size = CORPSEINFO_ALLOCATION_SIZE;
1750 mach_vm_offset_t crash_data_ptr = 0;
1751 void *crash_data_kernel = NULL;
1752 void *crash_data_kernel_release = NULL;
1753#if CONFIG_MACF
1754 struct label *label, *free_label;
1755#endif
1756
1757 if (!corpses_enabled()) {
1758 return KERN_NOT_SUPPORTED;
1759 }
1760
1761#if CONFIG_MACF
1762 free_label = label = mac_exc_create_label();
1763#endif
1764
1765 task_lock(task);
1766
1767 assert(is_corpse_fork || task->bsd_info != NULL);
1768 if (task->corpse_info == NULL && (is_corpse_fork || task->bsd_info != NULL)) {
1769#if CONFIG_MACF
1770 /* Set the crash label, used by the exception delivery mac hook */
1771 free_label = task->crash_label; // Most likely NULL.
1772 task->crash_label = label;
1773 mac_exc_update_task_crash_label(task, crash_label);
1774#endif
1775 task_unlock(task);
1776
1777 crash_data_kernel = (void *) kalloc(CORPSEINFO_ALLOCATION_SIZE);
1778 if (crash_data_kernel == NULL) {
1779 kr = KERN_RESOURCE_SHORTAGE;
1780 goto out_no_lock;
1781 }
1782 bzero(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1783 crash_data_ptr = (mach_vm_offset_t) crash_data_kernel;
1784
1785 /* Do not get a corpse ref for corpse fork */
1786 crash_data = task_crashinfo_alloc_init((mach_vm_address_t)crash_data_ptr, size,
1787 is_corpse_fork ? 0 : CORPSE_CRASHINFO_HAS_REF,
1788 KCFLAG_USE_MEMCOPY);
1789 if (crash_data) {
1790 task_lock(task);
1791 crash_data_release = task->corpse_info;
1792 crash_data_kernel_release = kcdata_memory_get_begin_addr(crash_data_release);
1793 task->corpse_info = crash_data;
1794
1795 task_unlock(task);
1796 kr = KERN_SUCCESS;
1797 } else {
1798 kfree(crash_data_kernel, CORPSEINFO_ALLOCATION_SIZE);
1799 kr = KERN_FAILURE;
1800 }
1801
1802 if (crash_data_release != NULL) {
1803 task_crashinfo_destroy(crash_data_release);
1804 }
1805 if (crash_data_kernel_release != NULL) {
1806 kfree(crash_data_kernel_release, CORPSEINFO_ALLOCATION_SIZE);
1807 }
1808 } else {
1809 task_unlock(task);
1810 }
1811
1812out_no_lock:
1813#if CONFIG_MACF
1814 if (free_label != NULL) {
1815 mac_exc_free_label(free_label);
1816 }
1817#endif
1818 return kr;
1819}
1820
1821/*
1822 * task_deliver_crash_notification:
1823 *
1824 * Makes outcall to registered host port for a corpse.
1825 */
1826kern_return_t
1827task_deliver_crash_notification(
1828 task_t task,
1829 thread_t thread,
1830 exception_type_t etype,
1831 mach_exception_subcode_t subcode)
1832{
1833 kcdata_descriptor_t crash_info = task->corpse_info;
1834 thread_t th_iter = NULL;
1835 kern_return_t kr = KERN_SUCCESS;
1836 wait_interrupt_t wsave;
1837 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
1838 ipc_port_t task_port, old_notify;
1839
1840 if (crash_info == NULL)
1841 return KERN_FAILURE;
1842
1843 task_lock(task);
1844 if (task_is_a_corpse_fork(task)) {
1845 /* Populate code with EXC_{RESOURCE,GUARD} for corpse fork */
1846 code[0] = etype;
1847 code[1] = subcode;
1848 } else {
1849 /* Populate code with EXC_CRASH for corpses */
1850 code[0] = EXC_CRASH;
1851 code[1] = 0;
1852 /* Update the code[1] if the boot-arg corpse_for_fatal_memkill is set */
1853 if (corpse_for_fatal_memkill) {
1854 code[1] = subcode;
1855 }
1856 }
1857
1858 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
1859 {
1860 if (th_iter->corpse_dup == FALSE) {
1861 ipc_thread_reset(th_iter);
1862 }
1863 }
1864 task_unlock(task);
1865
1866 /* Arm the no-sender notification for taskport */
1867 task_reference(task);
1868 task_port = convert_task_to_port(task);
1869 ip_lock(task_port);
1870 assert(ip_active(task_port));
1871 ipc_port_nsrequest(task_port, task_port->ip_mscount, ipc_port_make_sonce_locked(task_port), &old_notify);
1872 /* port unlocked */
1873 assert(IP_NULL == old_notify);
1874
1875 wsave = thread_interrupt_level(THREAD_UNINT);
1876 kr = exception_triage_thread(EXC_CORPSE_NOTIFY, code, EXCEPTION_CODE_MAX, thread);
1877 if (kr != KERN_SUCCESS) {
1878 printf("Failed to send exception EXC_CORPSE_NOTIFY. error code: %d for pid %d\n", kr, task_pid(task));
1879 }
1880
1881 (void)thread_interrupt_level(wsave);
1882
1883 /*
1884 * Drop the send right on task port, will fire the
1885 * no-sender notification if exception deliver failed.
1886 */
1887 ipc_port_release_send(task_port);
1888 return kr;
1889}
1890
1891/*
1892 * task_terminate:
1893 *
1894 * Terminate the specified task. See comments on thread_terminate
1895 * (kern/thread.c) about problems with terminating the "current task."
1896 */
1897
1898kern_return_t
1899task_terminate(
1900 task_t task)
1901{
1902 if (task == TASK_NULL)
1903 return (KERN_INVALID_ARGUMENT);
1904
1905 if (task->bsd_info)
1906 return (KERN_FAILURE);
1907
1908 return (task_terminate_internal(task));
1909}
1910
1911#if MACH_ASSERT
1912extern int proc_pid(struct proc *);
1913extern void proc_name_kdp(task_t t, char *buf, int size);
1914#endif /* MACH_ASSERT */
1915
1916#define VM_MAP_PARTIAL_REAP 0x54 /* 0x150 */
1917static void
1918__unused task_partial_reap(task_t task, __unused int pid)
1919{
1920 unsigned int reclaimed_resident = 0;
1921 unsigned int reclaimed_compressed = 0;
1922 uint64_t task_page_count;
1923
1924 task_page_count = (get_task_phys_footprint(task) / PAGE_SIZE_64);
1925
1926 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_START),
1927 pid, task_page_count, 0, 0, 0);
1928
1929 vm_map_partial_reap(task->map, &reclaimed_resident, &reclaimed_compressed);
1930
1931 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_MAP_PARTIAL_REAP) | DBG_FUNC_END),
1932 pid, reclaimed_resident, reclaimed_compressed, 0, 0);
1933}
1934
1935kern_return_t
1936task_mark_corpse(task_t task)
1937{
1938 kern_return_t kr = KERN_SUCCESS;
1939 thread_t self_thread;
1940 (void) self_thread;
1941 wait_interrupt_t wsave;
1942#if CONFIG_MACF
1943 struct label *crash_label = NULL;
1944#endif
1945
1946 assert(task != kernel_task);
1947 assert(task == current_task());
1948 assert(!task_is_a_corpse(task));
1949
1950#if CONFIG_MACF
1951 crash_label = mac_exc_create_label_for_proc((struct proc*)task->bsd_info);
1952#endif
1953
1954 kr = task_collect_crash_info(task,
1955#if CONFIG_MACF
1956 crash_label,
1957#endif
1958 FALSE);
1959 if (kr != KERN_SUCCESS) {
1960 goto out;
1961 }
1962
1963 self_thread = current_thread();
1964
1965 wsave = thread_interrupt_level(THREAD_UNINT);
1966 task_lock(task);
1967
1968 task_set_corpse_pending_report(task);
1969 task_set_corpse(task);
1970 task->crashed_thread_id = thread_tid(self_thread);
1971
1972 kr = task_start_halt_locked(task, TRUE);
1973 assert(kr == KERN_SUCCESS);
1974
1975 ipc_task_reset(task);
1976 /* Remove the naked send right for task port, needed to arm no sender notification */
1977 task_set_special_port(task, TASK_KERNEL_PORT, IPC_PORT_NULL);
1978 ipc_task_enable(task);
1979
1980 task_unlock(task);
1981 /* terminate the ipc space */
1982 ipc_space_terminate(task->itk_space);
1983
1984 /* Add it to global corpse task list */
1985 task_add_to_corpse_task_list(task);
1986
1987 task_start_halt(task);
1988 thread_terminate_internal(self_thread);
1989
1990 (void) thread_interrupt_level(wsave);
1991 assert(task->halting == TRUE);
1992
1993out:
1994#if CONFIG_MACF
1995 mac_exc_free_label(crash_label);
1996#endif
1997 return kr;
1998}
1999
2000/*
2001 * task_clear_corpse
2002 *
2003 * Clears the corpse pending bit on task.
2004 * Removes inspection bit on the threads.
2005 */
2006void
2007task_clear_corpse(task_t task)
2008{
2009 thread_t th_iter = NULL;
2010
2011 task_lock(task);
2012 queue_iterate(&task->threads, th_iter, thread_t, task_threads)
2013 {
2014 thread_mtx_lock(th_iter);
2015 th_iter->inspection = FALSE;
2016 thread_mtx_unlock(th_iter);
2017 }
2018
2019 thread_terminate_crashed_threads();
2020 /* remove the pending corpse report flag */
2021 task_clear_corpse_pending_report(task);
2022
2023 task_unlock(task);
2024}
2025
2026/*
2027 * task_port_notify
2028 *
2029 * Called whenever the Mach port system detects no-senders on
2030 * the task port of a corpse.
2031 * Each notification that comes in should terminate the task (corpse).
2032 */
2033void
2034task_port_notify(mach_msg_header_t *msg)
2035{
2036 mach_no_senders_notification_t *notification = (void *)msg;
2037 ipc_port_t port = notification->not_header.msgh_remote_port;
2038 task_t task;
2039
2040 assert(ip_active(port));
2041 assert(IKOT_TASK == ip_kotype(port));
2042 task = (task_t) port->ip_kobject;
2043
2044 assert(task_is_a_corpse(task));
2045
2046 /* Remove the task from global corpse task list */
2047 task_remove_from_corpse_task_list(task);
2048
2049 task_clear_corpse(task);
2050 task_terminate_internal(task);
2051}
2052
2053/*
2054 * task_wait_till_threads_terminate_locked
2055 *
2056 * Wait till all the threads in the task are terminated.
2057 * Might release the task lock and re-acquire it.
2058 */
2059void
2060task_wait_till_threads_terminate_locked(task_t task)
2061{
2062 /* wait for all the threads in the task to terminate */
2063 while (task->active_thread_count != 0) {
2064 assert_wait((event_t)&task->active_thread_count, THREAD_UNINT);
2065 task_unlock(task);
2066 thread_block(THREAD_CONTINUE_NULL);
2067
2068 task_lock(task);
2069 }
2070}
2071
2072/*
2073 * task_duplicate_map_and_threads
2074 *
2075 * Copy vmmap of source task.
2076 * Copy active threads from source task to destination task.
2077 * Source task would be suspended during the copy.
2078 */
2079kern_return_t
2080task_duplicate_map_and_threads(
2081 task_t task,
2082 void *p,
2083 task_t new_task,
2084 thread_t *thread_ret,
2085 uint64_t **udata_buffer,
2086 int *size,
2087 int *num_udata)
2088{
2089 kern_return_t kr = KERN_SUCCESS;
2090 int active;
2091 thread_t thread, self, thread_return = THREAD_NULL;
2092 thread_t new_thread = THREAD_NULL, first_thread = THREAD_NULL;
2093 thread_t *thread_array;
2094 uint32_t active_thread_count = 0, array_count = 0, i;
2095 vm_map_t oldmap;
2096 uint64_t *buffer = NULL;
2097 int buf_size = 0;
2098 int est_knotes = 0, num_knotes = 0;
2099
2100 self = current_thread();
2101
2102 /*
2103 * Suspend the task to copy thread state, use the internal
2104 * variant so that no user-space process can resume
2105 * the task from under us
2106 */
2107 kr = task_suspend_internal(task);
2108 if (kr != KERN_SUCCESS) {
2109 return kr;
2110 }
2111
2112 if (task->map->disable_vmentry_reuse == TRUE) {
2113 /*
2114 * Quite likely GuardMalloc (or some debugging tool)
2115 * is being used on this task. And it has gone through
2116 * its limit. Making a corpse will likely encounter
2117 * a lot of VM entries that will need COW.
2118 *
2119 * Skip it.
2120 */
2121#if DEVELOPMENT || DEBUG
2122 memorystatus_abort_vm_map_fork(task);
2123#endif
2124 task_resume_internal(task);
2125 return KERN_FAILURE;
2126 }
2127
2128 /* Check with VM if vm_map_fork is allowed for this task */
2129 if (memorystatus_allowed_vm_map_fork(task)) {
2130
2131 /* Setup new task's vmmap, switch from parent task's map to it COW map */
2132 oldmap = new_task->map;
2133 new_task->map = vm_map_fork(new_task->ledger,
2134 task->map,
2135 (VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
2136 VM_MAP_FORK_PRESERVE_PURGEABLE |
2137 VM_MAP_FORK_CORPSE_FOOTPRINT));
2138 vm_map_deallocate(oldmap);
2139
2140 /* copy ledgers that impact the memory footprint */
2141 vm_map_copy_footprint_ledgers(task, new_task);
2142
2143 /* Get all the udata pointers from kqueue */
2144 est_knotes = kevent_proc_copy_uptrs(p, NULL, 0);
2145 if (est_knotes > 0) {
2146 buf_size = (est_knotes + 32) * sizeof(uint64_t);
2147 buffer = (uint64_t *) kalloc(buf_size);
2148 num_knotes = kevent_proc_copy_uptrs(p, buffer, buf_size);
2149 if (num_knotes > est_knotes + 32) {
2150 num_knotes = est_knotes + 32;
2151 }
2152 }
2153 }
2154
2155 active_thread_count = task->active_thread_count;
2156 if (active_thread_count == 0) {
2157 if (buffer != NULL) {
2158 kfree(buffer, buf_size);
2159 }
2160 task_resume_internal(task);
2161 return KERN_FAILURE;
2162 }
2163
2164 thread_array = (thread_t *) kalloc(sizeof(thread_t) * active_thread_count);
2165
2166 /* Iterate all the threads and drop the task lock before calling thread_create_with_continuation */
2167 task_lock(task);
2168 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2169 /* Skip inactive threads */
2170 active = thread->active;
2171 if (!active) {
2172 continue;
2173 }
2174
2175 if (array_count >= active_thread_count) {
2176 break;
2177 }
2178
2179 thread_array[array_count++] = thread;
2180 thread_reference(thread);
2181 }
2182 task_unlock(task);
2183
2184 for (i = 0; i < array_count; i++) {
2185
2186 kr = thread_create_with_continuation(new_task, &new_thread, (thread_continue_t)thread_corpse_continue);
2187 if (kr != KERN_SUCCESS) {
2188 break;
2189 }
2190
2191 /* Equivalent of current thread in corpse */
2192 if (thread_array[i] == self) {
2193 thread_return = new_thread;
2194 new_task->crashed_thread_id = thread_tid(new_thread);
2195 } else if (first_thread == NULL) {
2196 first_thread = new_thread;
2197 } else {
2198 /* drop the extra ref returned by thread_create_with_continuation */
2199 thread_deallocate(new_thread);
2200 }
2201
2202 kr = thread_dup2(thread_array[i], new_thread);
2203 if (kr != KERN_SUCCESS) {
2204 thread_mtx_lock(new_thread);
2205 new_thread->corpse_dup = TRUE;
2206 thread_mtx_unlock(new_thread);
2207 continue;
2208 }
2209
2210 /* Copy thread name */
2211 bsd_copythreadname(new_thread->uthread, thread_array[i]->uthread);
2212 new_thread->thread_tag = thread_array[i]->thread_tag;
2213 thread_copy_resource_info(new_thread, thread_array[i]);
2214 }
2215
2216 /* return the first thread if we couldn't find the equivalent of current */
2217 if (thread_return == THREAD_NULL) {
2218 thread_return = first_thread;
2219 }
2220 else if (first_thread != THREAD_NULL) {
2221 /* drop the extra ref returned by thread_create_with_continuation */
2222 thread_deallocate(first_thread);
2223 }
2224
2225 task_resume_internal(task);
2226
2227 for (i = 0; i < array_count; i++) {
2228 thread_deallocate(thread_array[i]);
2229 }
2230 kfree(thread_array, sizeof(thread_t) * active_thread_count);
2231
2232 if (kr == KERN_SUCCESS) {
2233 *thread_ret = thread_return;
2234 *udata_buffer = buffer;
2235 *size = buf_size;
2236 *num_udata = num_knotes;
2237 } else {
2238 if (thread_return != THREAD_NULL) {
2239 thread_deallocate(thread_return);
2240 }
2241 if (buffer != NULL) {
2242 kfree(buffer, buf_size);
2243 }
2244 }
2245
2246 return kr;
2247}
2248
2249#if CONFIG_SECLUDED_MEMORY
2250extern void task_set_can_use_secluded_mem_locked(
2251 task_t task,
2252 boolean_t can_use_secluded_mem);
2253#endif /* CONFIG_SECLUDED_MEMORY */
2254
2255kern_return_t
2256task_terminate_internal(
2257 task_t task)
2258{
2259 thread_t thread, self;
2260 task_t self_task;
2261 boolean_t interrupt_save;
2262 int pid = 0;
2263
2264 assert(task != kernel_task);
2265
2266 self = current_thread();
2267 self_task = self->task;
2268
2269 /*
2270 * Get the task locked and make sure that we are not racing
2271 * with someone else trying to terminate us.
2272 */
2273 if (task == self_task)
2274 task_lock(task);
2275 else
2276 if (task < self_task) {
2277 task_lock(task);
2278 task_lock(self_task);
2279 }
2280 else {
2281 task_lock(self_task);
2282 task_lock(task);
2283 }
2284
2285#if CONFIG_SECLUDED_MEMORY
2286 if (task->task_can_use_secluded_mem) {
2287 task_set_can_use_secluded_mem_locked(task, FALSE);
2288 }
2289 task->task_could_use_secluded_mem = FALSE;
2290 task->task_could_also_use_secluded_mem = FALSE;
2291
2292 if (task->task_suppressed_secluded) {
2293 stop_secluded_suppression(task);
2294 }
2295#endif /* CONFIG_SECLUDED_MEMORY */
2296
2297 if (!task->active) {
2298 /*
2299 * Task is already being terminated.
2300 * Just return an error. If we are dying, this will
2301 * just get us to our AST special handler and that
2302 * will get us to finalize the termination of ourselves.
2303 */
2304 task_unlock(task);
2305 if (self_task != task)
2306 task_unlock(self_task);
2307
2308 return (KERN_FAILURE);
2309 }
2310
2311 if (task_corpse_pending_report(task)) {
2312 /*
2313 * Task is marked for reporting as corpse.
2314 * Just return an error. This will
2315 * just get us to our AST special handler and that
2316 * will get us to finish the path to death
2317 */
2318 task_unlock(task);
2319 if (self_task != task)
2320 task_unlock(self_task);
2321
2322 return (KERN_FAILURE);
2323 }
2324
2325 if (self_task != task)
2326 task_unlock(self_task);
2327
2328 /*
2329 * Make sure the current thread does not get aborted out of
2330 * the waits inside these operations.
2331 */
2332 interrupt_save = thread_interrupt_level(THREAD_UNINT);
2333
2334 /*
2335 * Indicate that we want all the threads to stop executing
2336 * at user space by holding the task (we would have held
2337 * each thread independently in thread_terminate_internal -
2338 * but this way we may be more likely to already find it
2339 * held there). Mark the task inactive, and prevent
2340 * further task operations via the task port.
2341 */
2342 task_hold_locked(task);
2343 task->active = FALSE;
2344 ipc_task_disable(task);
2345
2346#if CONFIG_TELEMETRY
2347 /*
2348 * Notify telemetry that this task is going away.
2349 */
2350 telemetry_task_ctl_locked(task, TF_TELEMETRY, 0);
2351#endif
2352
2353 /*
2354 * Terminate each thread in the task.
2355 */
2356 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2357 thread_terminate_internal(thread);
2358 }
2359
2360#ifdef MACH_BSD
2361 if (task->bsd_info != NULL && !task_is_exec_copy(task)) {
2362 pid = proc_pid(task->bsd_info);
2363 }
2364#endif /* MACH_BSD */
2365
2366 task_unlock(task);
2367
2368 proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE,
2369 TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE);
2370
2371 /* Early object reap phase */
2372
2373// PR-17045188: Revisit implementation
2374// task_partial_reap(task, pid);
2375
2376#if CONFIG_EMBEDDED
2377 /*
2378 * remove all task watchers
2379 */
2380 task_removewatchers(task);
2381
2382#endif /* CONFIG_EMBEDDED */
2383
2384 /*
2385 * Destroy all synchronizers owned by the task.
2386 */
2387 task_synchronizer_destroy_all(task);
2388
2389 /*
2390 * Destroy the IPC space, leaving just a reference for it.
2391 */
2392 ipc_space_terminate(task->itk_space);
2393
2394#if 00
2395 /* if some ledgers go negative on tear-down again... */
2396 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2397 task_ledgers.phys_footprint);
2398 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2399 task_ledgers.internal);
2400 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2401 task_ledgers.internal_compressed);
2402 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2403 task_ledgers.iokit_mapped);
2404 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2405 task_ledgers.alternate_accounting);
2406 ledger_disable_panic_on_negative(task->map->pmap->ledger,
2407 task_ledgers.alternate_accounting_compressed);
2408#endif
2409
2410 /*
2411 * If the current thread is a member of the task
2412 * being terminated, then the last reference to
2413 * the task will not be dropped until the thread
2414 * is finally reaped. To avoid incurring the
2415 * expense of removing the address space regions
2416 * at reap time, we do it explictly here.
2417 */
2418
2419 vm_map_lock(task->map);
2420 vm_map_disable_hole_optimization(task->map);
2421 vm_map_unlock(task->map);
2422
2423#if MACH_ASSERT
2424 /*
2425 * Identify the pmap's process, in case the pmap ledgers drift
2426 * and we have to report it.
2427 */
2428 char procname[17];
2429 if (task->bsd_info && !task_is_exec_copy(task)) {
2430 pid = proc_pid(task->bsd_info);
2431 proc_name_kdp(task, procname, sizeof (procname));
2432 } else {
2433 pid = 0;
2434 strlcpy(procname, "<unknown>", sizeof (procname));
2435 }
2436 pmap_set_process(task->map->pmap, pid, procname);
2437#endif /* MACH_ASSERT */
2438
2439 vm_map_remove(task->map,
2440 task->map->min_offset,
2441 task->map->max_offset,
2442 /*
2443 * Final cleanup:
2444 * + no unnesting
2445 * + remove immutable mappings
2446 * + allow gaps in range
2447 */
2448 (VM_MAP_REMOVE_NO_UNNESTING |
2449 VM_MAP_REMOVE_IMMUTABLE |
2450 VM_MAP_REMOVE_GAPS_OK));
2451
2452 /* release our shared region */
2453 vm_shared_region_set(task, NULL);
2454
2455
2456 lck_mtx_lock(&tasks_threads_lock);
2457 queue_remove(&tasks, task, task_t, tasks);
2458 queue_enter(&terminated_tasks, task, task_t, tasks);
2459 tasks_count--;
2460 terminated_tasks_count++;
2461 lck_mtx_unlock(&tasks_threads_lock);
2462
2463 /*
2464 * We no longer need to guard against being aborted, so restore
2465 * the previous interruptible state.
2466 */
2467 thread_interrupt_level(interrupt_save);
2468
2469#if KPC
2470 /* force the task to release all ctrs */
2471 if (task->t_kpc & TASK_KPC_FORCED_ALL_CTRS)
2472 kpc_force_all_ctrs(task, 0);
2473#endif /* KPC */
2474
2475#if CONFIG_COALITIONS
2476 /*
2477 * Leave our coalitions. (drop activation but not reference)
2478 */
2479 coalitions_remove_task(task);
2480#endif
2481
2482 /*
2483 * Get rid of the task active reference on itself.
2484 */
2485 task_deallocate(task);
2486
2487 return (KERN_SUCCESS);
2488}
2489
2490void
2491tasks_system_suspend(boolean_t suspend)
2492{
2493 task_t task;
2494
2495 lck_mtx_lock(&tasks_threads_lock);
2496 assert(tasks_suspend_state != suspend);
2497 tasks_suspend_state = suspend;
2498 queue_iterate(&tasks, task, task_t, tasks) {
2499 if (task == kernel_task) {
2500 continue;
2501 }
2502 suspend ? task_suspend_internal(task) : task_resume_internal(task);
2503 }
2504 lck_mtx_unlock(&tasks_threads_lock);
2505}
2506
2507/*
2508 * task_start_halt:
2509 *
2510 * Shut the current task down (except for the current thread) in
2511 * preparation for dramatic changes to the task (probably exec).
2512 * We hold the task and mark all other threads in the task for
2513 * termination.
2514 */
2515kern_return_t
2516task_start_halt(task_t task)
2517{
2518 kern_return_t kr = KERN_SUCCESS;
2519 task_lock(task);
2520 kr = task_start_halt_locked(task, FALSE);
2521 task_unlock(task);
2522 return kr;
2523}
2524
2525static kern_return_t
2526task_start_halt_locked(task_t task, boolean_t should_mark_corpse)
2527{
2528 thread_t thread, self;
2529 uint64_t dispatchqueue_offset;
2530
2531 assert(task != kernel_task);
2532
2533 self = current_thread();
2534
2535 if (task != self->task && !task_is_a_corpse_fork(task))
2536 return (KERN_INVALID_ARGUMENT);
2537
2538 if (task->halting || !task->active || !self->active) {
2539 /*
2540 * Task or current thread is already being terminated.
2541 * Hurry up and return out of the current kernel context
2542 * so that we run our AST special handler to terminate
2543 * ourselves.
2544 */
2545 return (KERN_FAILURE);
2546 }
2547
2548 task->halting = TRUE;
2549
2550 /*
2551 * Mark all the threads to keep them from starting any more
2552 * user-level execution. The thread_terminate_internal code
2553 * would do this on a thread by thread basis anyway, but this
2554 * gives us a better chance of not having to wait there.
2555 */
2556 task_hold_locked(task);
2557 dispatchqueue_offset = get_dispatchqueue_offset_from_proc(task->bsd_info);
2558
2559 /*
2560 * Terminate all the other threads in the task.
2561 */
2562 queue_iterate(&task->threads, thread, thread_t, task_threads)
2563 {
2564 if (should_mark_corpse) {
2565 thread_mtx_lock(thread);
2566 thread->inspection = TRUE;
2567 thread_mtx_unlock(thread);
2568 }
2569 if (thread != self)
2570 thread_terminate_internal(thread);
2571 }
2572 task->dispatchqueue_offset = dispatchqueue_offset;
2573
2574 task_release_locked(task);
2575
2576 return KERN_SUCCESS;
2577}
2578
2579
2580/*
2581 * task_complete_halt:
2582 *
2583 * Complete task halt by waiting for threads to terminate, then clean
2584 * up task resources (VM, port namespace, etc...) and then let the
2585 * current thread go in the (practically empty) task context.
2586 *
2587 * Note: task->halting flag is not cleared in order to avoid creation
2588 * of new thread in old exec'ed task.
2589 */
2590void
2591task_complete_halt(task_t task)
2592{
2593 task_lock(task);
2594 assert(task->halting);
2595 assert(task == current_task());
2596
2597 /*
2598 * Wait for the other threads to get shut down.
2599 * When the last other thread is reaped, we'll be
2600 * woken up.
2601 */
2602 if (task->thread_count > 1) {
2603 assert_wait((event_t)&task->halting, THREAD_UNINT);
2604 task_unlock(task);
2605 thread_block(THREAD_CONTINUE_NULL);
2606 } else {
2607 task_unlock(task);
2608 }
2609
2610 /*
2611 * Give the machine dependent code a chance
2612 * to perform cleanup of task-level resources
2613 * associated with the current thread before
2614 * ripping apart the task.
2615 */
2616 machine_task_terminate(task);
2617
2618 /*
2619 * Destroy all synchronizers owned by the task.
2620 */
2621 task_synchronizer_destroy_all(task);
2622
2623 /*
2624 * Destroy the contents of the IPC space, leaving just
2625 * a reference for it.
2626 */
2627 ipc_space_clean(task->itk_space);
2628
2629 /*
2630 * Clean out the address space, as we are going to be
2631 * getting a new one.
2632 */
2633 vm_map_remove(task->map, task->map->min_offset,
2634 task->map->max_offset,
2635 /*
2636 * Final cleanup:
2637 * + no unnesting
2638 * + remove immutable mappings
2639 * + allow gaps in the range
2640 */
2641 (VM_MAP_REMOVE_NO_UNNESTING |
2642 VM_MAP_REMOVE_IMMUTABLE |
2643 VM_MAP_REMOVE_GAPS_OK));
2644
2645 /*
2646 * Kick out any IOKitUser handles to the task. At best they're stale,
2647 * at worst someone is racing a SUID exec.
2648 */
2649 iokit_task_terminate(task);
2650}
2651
2652/*
2653 * task_hold_locked:
2654 *
2655 * Suspend execution of the specified task.
2656 * This is a recursive-style suspension of the task, a count of
2657 * suspends is maintained.
2658 *
2659 * CONDITIONS: the task is locked and active.
2660 */
2661void
2662task_hold_locked(
2663 task_t task)
2664{
2665 thread_t thread;
2666
2667 assert(task->active);
2668
2669 if (task->suspend_count++ > 0)
2670 return;
2671
2672 if (task->bsd_info) {
2673 workq_proc_suspended(task->bsd_info);
2674 }
2675
2676 /*
2677 * Iterate through all the threads and hold them.
2678 */
2679 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2680 thread_mtx_lock(thread);
2681 thread_hold(thread);
2682 thread_mtx_unlock(thread);
2683 }
2684}
2685
2686/*
2687 * task_hold:
2688 *
2689 * Same as the internal routine above, except that is must lock
2690 * and verify that the task is active. This differs from task_suspend
2691 * in that it places a kernel hold on the task rather than just a
2692 * user-level hold. This keeps users from over resuming and setting
2693 * it running out from under the kernel.
2694 *
2695 * CONDITIONS: the caller holds a reference on the task
2696 */
2697kern_return_t
2698task_hold(
2699 task_t task)
2700{
2701 if (task == TASK_NULL)
2702 return (KERN_INVALID_ARGUMENT);
2703
2704 task_lock(task);
2705
2706 if (!task->active) {
2707 task_unlock(task);
2708
2709 return (KERN_FAILURE);
2710 }
2711
2712 task_hold_locked(task);
2713 task_unlock(task);
2714
2715 return (KERN_SUCCESS);
2716}
2717
2718kern_return_t
2719task_wait(
2720 task_t task,
2721 boolean_t until_not_runnable)
2722{
2723 if (task == TASK_NULL)
2724 return (KERN_INVALID_ARGUMENT);
2725
2726 task_lock(task);
2727
2728 if (!task->active) {
2729 task_unlock(task);
2730
2731 return (KERN_FAILURE);
2732 }
2733
2734 task_wait_locked(task, until_not_runnable);
2735 task_unlock(task);
2736
2737 return (KERN_SUCCESS);
2738}
2739
2740/*
2741 * task_wait_locked:
2742 *
2743 * Wait for all threads in task to stop.
2744 *
2745 * Conditions:
2746 * Called with task locked, active, and held.
2747 */
2748void
2749task_wait_locked(
2750 task_t task,
2751 boolean_t until_not_runnable)
2752{
2753 thread_t thread, self;
2754
2755 assert(task->active);
2756 assert(task->suspend_count > 0);
2757
2758 self = current_thread();
2759
2760 /*
2761 * Iterate through all the threads and wait for them to
2762 * stop. Do not wait for the current thread if it is within
2763 * the task.
2764 */
2765 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2766 if (thread != self)
2767 thread_wait(thread, until_not_runnable);
2768 }
2769}
2770
2771/*
2772 * task_release_locked:
2773 *
2774 * Release a kernel hold on a task.
2775 *
2776 * CONDITIONS: the task is locked and active
2777 */
2778void
2779task_release_locked(
2780 task_t task)
2781{
2782 thread_t thread;
2783
2784 assert(task->active);
2785 assert(task->suspend_count > 0);
2786
2787 if (--task->suspend_count > 0)
2788 return;
2789
2790 if (task->bsd_info) {
2791 workq_proc_resumed(task->bsd_info);
2792 }
2793
2794 queue_iterate(&task->threads, thread, thread_t, task_threads) {
2795 thread_mtx_lock(thread);
2796 thread_release(thread);
2797 thread_mtx_unlock(thread);
2798 }
2799}
2800
2801/*
2802 * task_release:
2803 *
2804 * Same as the internal routine above, except that it must lock
2805 * and verify that the task is active.
2806 *
2807 * CONDITIONS: The caller holds a reference to the task
2808 */
2809kern_return_t
2810task_release(
2811 task_t task)
2812{
2813 if (task == TASK_NULL)
2814 return (KERN_INVALID_ARGUMENT);
2815
2816 task_lock(task);
2817
2818 if (!task->active) {
2819 task_unlock(task);
2820
2821 return (KERN_FAILURE);
2822 }
2823
2824 task_release_locked(task);
2825 task_unlock(task);
2826
2827 return (KERN_SUCCESS);
2828}
2829
2830kern_return_t
2831task_threads(
2832 task_t task,
2833 thread_act_array_t *threads_out,
2834 mach_msg_type_number_t *count)
2835{
2836 mach_msg_type_number_t actual;
2837 thread_t *thread_list;
2838 thread_t thread;
2839 vm_size_t size, size_needed;
2840 void *addr;
2841 unsigned int i, j;
2842
2843 if (task == TASK_NULL)
2844 return (KERN_INVALID_ARGUMENT);
2845
2846 size = 0; addr = NULL;
2847
2848 for (;;) {
2849 task_lock(task);
2850 if (!task->active) {
2851 task_unlock(task);
2852
2853 if (size != 0)
2854 kfree(addr, size);
2855
2856 return (KERN_FAILURE);
2857 }
2858
2859 actual = task->thread_count;
2860
2861 /* do we have the memory we need? */
2862 size_needed = actual * sizeof (mach_port_t);
2863 if (size_needed <= size)
2864 break;
2865
2866 /* unlock the task and allocate more memory */
2867 task_unlock(task);
2868
2869 if (size != 0)
2870 kfree(addr, size);
2871
2872 assert(size_needed > 0);
2873 size = size_needed;
2874
2875 addr = kalloc(size);
2876 if (addr == 0)
2877 return (KERN_RESOURCE_SHORTAGE);
2878 }
2879
2880 /* OK, have memory and the task is locked & active */
2881 thread_list = (thread_t *)addr;
2882
2883 i = j = 0;
2884
2885 for (thread = (thread_t)queue_first(&task->threads); i < actual;
2886 ++i, thread = (thread_t)queue_next(&thread->task_threads)) {
2887 thread_reference_internal(thread);
2888 thread_list[j++] = thread;
2889 }
2890
2891 assert(queue_end(&task->threads, (queue_entry_t)thread));
2892
2893 actual = j;
2894 size_needed = actual * sizeof (mach_port_t);
2895
2896 /* can unlock task now that we've got the thread refs */
2897 task_unlock(task);
2898
2899 if (actual == 0) {
2900 /* no threads, so return null pointer and deallocate memory */
2901
2902 *threads_out = NULL;
2903 *count = 0;
2904
2905 if (size != 0)
2906 kfree(addr, size);
2907 }
2908 else {
2909 /* if we allocated too much, must copy */
2910
2911 if (size_needed < size) {
2912 void *newaddr;
2913
2914 newaddr = kalloc(size_needed);
2915 if (newaddr == 0) {
2916 for (i = 0; i < actual; ++i)
2917 thread_deallocate(thread_list[i]);
2918 kfree(addr, size);
2919 return (KERN_RESOURCE_SHORTAGE);
2920 }
2921
2922 bcopy(addr, newaddr, size_needed);
2923 kfree(addr, size);
2924 thread_list = (thread_t *)newaddr;
2925 }
2926
2927 *threads_out = thread_list;
2928 *count = actual;
2929
2930 /* do the conversion that Mig should handle */
2931
2932 for (i = 0; i < actual; ++i)
2933 ((ipc_port_t *) thread_list)[i] = convert_thread_to_port(thread_list[i]);
2934 }
2935
2936 return (KERN_SUCCESS);
2937}
2938
2939#define TASK_HOLD_NORMAL 0
2940#define TASK_HOLD_PIDSUSPEND 1
2941#define TASK_HOLD_LEGACY 2
2942#define TASK_HOLD_LEGACY_ALL 3
2943
2944static kern_return_t
2945place_task_hold (
2946 task_t task,
2947 int mode)
2948{
2949 if (!task->active && !task_is_a_corpse(task)) {
2950 return (KERN_FAILURE);
2951 }
2952
2953 /* Return success for corpse task */
2954 if (task_is_a_corpse(task)) {
2955 return KERN_SUCCESS;
2956 }
2957
2958 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2959 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE,
2960 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
2961 task->user_stop_count, task->user_stop_count + 1, 0);
2962
2963#if MACH_ASSERT
2964 current_task()->suspends_outstanding++;
2965#endif
2966
2967 if (mode == TASK_HOLD_LEGACY)
2968 task->legacy_stop_count++;
2969
2970 if (task->user_stop_count++ > 0) {
2971 /*
2972 * If the stop count was positive, the task is
2973 * already stopped and we can exit.
2974 */
2975 return (KERN_SUCCESS);
2976 }
2977
2978 /*
2979 * Put a kernel-level hold on the threads in the task (all
2980 * user-level task suspensions added together represent a
2981 * single kernel-level hold). We then wait for the threads
2982 * to stop executing user code.
2983 */
2984 task_hold_locked(task);
2985 task_wait_locked(task, FALSE);
2986
2987 return (KERN_SUCCESS);
2988}
2989
2990static kern_return_t
2991release_task_hold (
2992 task_t task,
2993 int mode)
2994{
2995 boolean_t release = FALSE;
2996
2997 if (!task->active && !task_is_a_corpse(task)) {
2998 return (KERN_FAILURE);
2999 }
3000
3001 /* Return success for corpse task */
3002 if (task_is_a_corpse(task)) {
3003 return KERN_SUCCESS;
3004 }
3005
3006 if (mode == TASK_HOLD_PIDSUSPEND) {
3007 if (task->pidsuspended == FALSE) {
3008 return (KERN_FAILURE);
3009 }
3010 task->pidsuspended = FALSE;
3011 }
3012
3013 if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) {
3014
3015 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3016 MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE,
3017 task_pid(task), ((thread_t)queue_first(&task->threads))->thread_id,
3018 task->user_stop_count, mode, task->legacy_stop_count);
3019
3020#if MACH_ASSERT
3021 /*
3022 * This is obviously not robust; if we suspend one task and then resume a different one,
3023 * we'll fly under the radar. This is only meant to catch the common case of a crashed
3024 * or buggy suspender.
3025 */
3026 current_task()->suspends_outstanding--;
3027#endif
3028
3029 if (mode == TASK_HOLD_LEGACY_ALL) {
3030 if (task->legacy_stop_count >= task->user_stop_count) {
3031 task->user_stop_count = 0;
3032 release = TRUE;
3033 } else {
3034 task->user_stop_count -= task->legacy_stop_count;
3035 }
3036 task->legacy_stop_count = 0;
3037 } else {
3038 if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0)
3039 task->legacy_stop_count--;
3040 if (--task->user_stop_count == 0)
3041 release = TRUE;
3042 }
3043 }
3044 else {
3045 return (KERN_FAILURE);
3046 }
3047
3048 /*
3049 * Release the task if necessary.
3050 */
3051 if (release)
3052 task_release_locked(task);
3053
3054 return (KERN_SUCCESS);
3055}
3056
3057
3058/*
3059 * task_suspend:
3060 *
3061 * Implement an (old-fashioned) user-level suspension on a task.
3062 *
3063 * Because the user isn't expecting to have to manage a suspension
3064 * token, we'll track it for him in the kernel in the form of a naked
3065 * send right to the task's resume port. All such send rights
3066 * account for a single suspension against the task (unlike task_suspend2()
3067 * where each caller gets a unique suspension count represented by a
3068 * unique send-once right).
3069 *
3070 * Conditions:
3071 * The caller holds a reference to the task
3072 */
3073kern_return_t
3074task_suspend(
3075 task_t task)
3076{
3077 kern_return_t kr;
3078 mach_port_t port, send, old_notify;
3079 mach_port_name_t name;
3080
3081 if (task == TASK_NULL || task == kernel_task)
3082 return (KERN_INVALID_ARGUMENT);
3083
3084 task_lock(task);
3085
3086 /*
3087 * Claim a send right on the task resume port, and request a no-senders
3088 * notification on that port (if none outstanding).
3089 */
3090 if (task->itk_resume == IP_NULL) {
3091 task->itk_resume = ipc_port_alloc_kernel();
3092 if (!IP_VALID(task->itk_resume))
3093 panic("failed to create resume port");
3094 ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME);
3095 }
3096
3097 port = task->itk_resume;
3098 ip_lock(port);
3099 assert(ip_active(port));
3100
3101 send = ipc_port_make_send_locked(port);
3102 assert(IP_VALID(send));
3103
3104 if (port->ip_nsrequest == IP_NULL) {
3105 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3106 assert(old_notify == IP_NULL);
3107 /* port unlocked */
3108 } else {
3109 ip_unlock(port);
3110 }
3111
3112 /*
3113 * place a legacy hold on the task.
3114 */
3115 kr = place_task_hold(task, TASK_HOLD_LEGACY);
3116 if (kr != KERN_SUCCESS) {
3117 task_unlock(task);
3118 ipc_port_release_send(send);
3119 return kr;
3120 }
3121
3122 task_unlock(task);
3123
3124 /*
3125 * Copyout the send right into the calling task's IPC space. It won't know it is there,
3126 * but we'll look it up when calling a traditional resume. Any IPC operations that
3127 * deallocate the send right will auto-release the suspension.
3128 */
3129 if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send,
3130 MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) {
3131 printf("warning: %s(%d) failed to copyout suspension token for pid %d with error: %d\n",
3132 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3133 task_pid(task), kr);
3134 return (kr);
3135 }
3136
3137 return (kr);
3138}
3139
3140/*
3141 * task_resume:
3142 * Release a user hold on a task.
3143 *
3144 * Conditions:
3145 * The caller holds a reference to the task
3146 */
3147kern_return_t
3148task_resume(
3149 task_t task)
3150{
3151 kern_return_t kr;
3152 mach_port_name_t resume_port_name;
3153 ipc_entry_t resume_port_entry;
3154 ipc_space_t space = current_task()->itk_space;
3155
3156 if (task == TASK_NULL || task == kernel_task )
3157 return (KERN_INVALID_ARGUMENT);
3158
3159 /* release a legacy task hold */
3160 task_lock(task);
3161 kr = release_task_hold(task, TASK_HOLD_LEGACY);
3162 task_unlock(task);
3163
3164 is_write_lock(space);
3165 if (is_active(space) && IP_VALID(task->itk_resume) &&
3166 ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) {
3167 /*
3168 * We found a suspension token in the caller's IPC space. Release a send right to indicate that
3169 * we are holding one less legacy hold on the task from this caller. If the release failed,
3170 * go ahead and drop all the rights, as someone either already released our holds or the task
3171 * is gone.
3172 */
3173 if (kr == KERN_SUCCESS)
3174 ipc_right_dealloc(space, resume_port_name, resume_port_entry);
3175 else
3176 ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0);
3177 /* space unlocked */
3178 } else {
3179 is_write_unlock(space);
3180 if (kr == KERN_SUCCESS)
3181 printf("warning: %s(%d) performed out-of-band resume on pid %d\n",
3182 proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info),
3183 task_pid(task));
3184 }
3185
3186 return kr;
3187}
3188
3189/*
3190 * Suspend the target task.
3191 * Making/holding a token/reference/port is the callers responsibility.
3192 */
3193kern_return_t
3194task_suspend_internal(task_t task)
3195{
3196 kern_return_t kr;
3197
3198 if (task == TASK_NULL || task == kernel_task)
3199 return (KERN_INVALID_ARGUMENT);
3200
3201 task_lock(task);
3202 kr = place_task_hold(task, TASK_HOLD_NORMAL);
3203 task_unlock(task);
3204 return (kr);
3205}
3206
3207/*
3208 * Suspend the target task, and return a suspension token. The token
3209 * represents a reference on the suspended task.
3210 */
3211kern_return_t
3212task_suspend2(
3213 task_t task,
3214 task_suspension_token_t *suspend_token)
3215{
3216 kern_return_t kr;
3217
3218 kr = task_suspend_internal(task);
3219 if (kr != KERN_SUCCESS) {
3220 *suspend_token = TASK_NULL;
3221 return (kr);
3222 }
3223
3224 /*
3225 * Take a reference on the target task and return that to the caller
3226 * as a "suspension token," which can be converted into an SO right to
3227 * the now-suspended task's resume port.
3228 */
3229 task_reference_internal(task);
3230 *suspend_token = task;
3231
3232 return (KERN_SUCCESS);
3233}
3234
3235/*
3236 * Resume the task
3237 * (reference/token/port management is caller's responsibility).
3238 */
3239kern_return_t
3240task_resume_internal(
3241 task_suspension_token_t task)
3242{
3243 kern_return_t kr;
3244
3245 if (task == TASK_NULL || task == kernel_task)
3246 return (KERN_INVALID_ARGUMENT);
3247
3248 task_lock(task);
3249 kr = release_task_hold(task, TASK_HOLD_NORMAL);
3250 task_unlock(task);
3251 return (kr);
3252}
3253
3254/*
3255 * Resume the task using a suspension token. Consumes the token's ref.
3256 */
3257kern_return_t
3258task_resume2(
3259 task_suspension_token_t task)
3260{
3261 kern_return_t kr;
3262
3263 kr = task_resume_internal(task);
3264 task_suspension_token_deallocate(task);
3265
3266 return (kr);
3267}
3268
3269boolean_t
3270task_suspension_notify(mach_msg_header_t *request_header)
3271{
3272 ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port;
3273 task_t task = convert_port_to_task_suspension_token(port);
3274 mach_msg_type_number_t not_count;
3275
3276 if (task == TASK_NULL || task == kernel_task)
3277 return TRUE; /* nothing to do */
3278
3279 switch (request_header->msgh_id) {
3280
3281 case MACH_NOTIFY_SEND_ONCE:
3282 /* release the hold held by this specific send-once right */
3283 task_lock(task);
3284 release_task_hold(task, TASK_HOLD_NORMAL);
3285 task_unlock(task);
3286 break;
3287
3288 case MACH_NOTIFY_NO_SENDERS:
3289 not_count = ((mach_no_senders_notification_t *)request_header)->not_count;
3290
3291 task_lock(task);
3292 ip_lock(port);
3293 if (port->ip_mscount == not_count) {
3294
3295 /* release all the [remaining] outstanding legacy holds */
3296 assert(port->ip_nsrequest == IP_NULL);
3297 ip_unlock(port);
3298 release_task_hold(task, TASK_HOLD_LEGACY_ALL);
3299 task_unlock(task);
3300
3301 } else if (port->ip_nsrequest == IP_NULL) {
3302 ipc_port_t old_notify;
3303
3304 task_unlock(task);
3305 /* new send rights, re-arm notification at current make-send count */
3306 ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify);
3307 assert(old_notify == IP_NULL);
3308 /* port unlocked */
3309 } else {
3310 ip_unlock(port);
3311 task_unlock(task);
3312 }
3313 break;
3314
3315 default:
3316 break;
3317 }
3318
3319 task_suspension_token_deallocate(task); /* drop token reference */
3320 return TRUE;
3321}
3322
3323kern_return_t
3324task_pidsuspend_locked(task_t task)
3325{
3326 kern_return_t kr;
3327
3328 if (task->pidsuspended) {
3329 kr = KERN_FAILURE;
3330 goto out;
3331 }
3332
3333 task->pidsuspended = TRUE;
3334
3335 kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND);
3336 if (kr != KERN_SUCCESS) {
3337 task->pidsuspended = FALSE;
3338 }
3339out:
3340 return(kr);
3341}
3342
3343
3344/*
3345 * task_pidsuspend:
3346 *
3347 * Suspends a task by placing a hold on its threads.
3348 *
3349 * Conditions:
3350 * The caller holds a reference to the task
3351 */
3352kern_return_t
3353task_pidsuspend(
3354 task_t task)
3355{
3356 kern_return_t kr;
3357
3358 if (task == TASK_NULL || task == kernel_task)
3359 return (KERN_INVALID_ARGUMENT);
3360
3361 task_lock(task);
3362
3363 kr = task_pidsuspend_locked(task);
3364
3365 task_unlock(task);
3366
3367 return (kr);
3368}
3369
3370/*
3371 * task_pidresume:
3372 * Resumes a previously suspended task.
3373 *
3374 * Conditions:
3375 * The caller holds a reference to the task
3376 */
3377kern_return_t
3378task_pidresume(
3379 task_t task)
3380{
3381 kern_return_t kr;
3382
3383 if (task == TASK_NULL || task == kernel_task)
3384 return (KERN_INVALID_ARGUMENT);
3385
3386 task_lock(task);
3387
3388#if CONFIG_FREEZE
3389
3390 while (task->changing_freeze_state) {
3391
3392 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3393 task_unlock(task);
3394 thread_block(THREAD_CONTINUE_NULL);
3395
3396 task_lock(task);
3397 }
3398 task->changing_freeze_state = TRUE;
3399#endif
3400
3401 kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND);
3402
3403 task_unlock(task);
3404
3405#if CONFIG_FREEZE
3406
3407 task_lock(task);
3408
3409 if (kr == KERN_SUCCESS)
3410 task->frozen = FALSE;
3411 task->changing_freeze_state = FALSE;
3412 thread_wakeup(&task->changing_freeze_state);
3413
3414 task_unlock(task);
3415#endif
3416
3417 return (kr);
3418}
3419
3420
3421#if DEVELOPMENT || DEBUG
3422
3423extern void IOSleep(int);
3424
3425kern_return_t
3426task_disconnect_page_mappings(task_t task)
3427{
3428 int n;
3429
3430 if (task == TASK_NULL || task == kernel_task)
3431 return (KERN_INVALID_ARGUMENT);
3432
3433 /*
3434 * this function is used to strip all of the mappings from
3435 * the pmap for the specified task to force the task to
3436 * re-fault all of the pages it is actively using... this
3437 * allows us to approximate the true working set of the
3438 * specified task. We only engage if at least 1 of the
3439 * threads in the task is runnable, but we want to continuously
3440 * sweep (at least for a while - I've arbitrarily set the limit at
3441 * 100 sweeps to be re-looked at as we gain experience) to get a better
3442 * view into what areas within a page are being visited (as opposed to only
3443 * seeing the first fault of a page after the task becomes
3444 * runnable)... in the future I may
3445 * try to block until awakened by a thread in this task
3446 * being made runnable, but for now we'll periodically poll from the
3447 * user level debug tool driving the sysctl
3448 */
3449 for (n = 0; n < 100; n++) {
3450 thread_t thread;
3451 boolean_t runnable;
3452 boolean_t do_unnest;
3453 int page_count;
3454
3455 runnable = FALSE;
3456 do_unnest = FALSE;
3457
3458 task_lock(task);
3459
3460 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3461
3462 if (thread->state & TH_RUN) {
3463 runnable = TRUE;
3464 break;
3465 }
3466 }
3467 if (n == 0)
3468 task->task_disconnected_count++;
3469
3470 if (task->task_unnested == FALSE) {
3471 if (runnable == TRUE) {
3472 task->task_unnested = TRUE;
3473 do_unnest = TRUE;
3474 }
3475 }
3476 task_unlock(task);
3477
3478 if (runnable == FALSE)
3479 break;
3480
3481 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_START,
3482 task, do_unnest, task->task_disconnected_count, 0, 0);
3483
3484 page_count = vm_map_disconnect_page_mappings(task->map, do_unnest);
3485
3486 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_TASK_PAGE_MAPPINGS)) | DBG_FUNC_END,
3487 task, page_count, 0, 0, 0);
3488
3489 if ((n % 5) == 4)
3490 IOSleep(1);
3491 }
3492 return (KERN_SUCCESS);
3493}
3494
3495#endif
3496
3497
3498#if CONFIG_FREEZE
3499
3500/*
3501 * task_freeze:
3502 *
3503 * Freeze a task.
3504 *
3505 * Conditions:
3506 * The caller holds a reference to the task
3507 */
3508extern void vm_wake_compactor_swapper(void);
3509extern queue_head_t c_swapout_list_head;
3510
3511kern_return_t
3512task_freeze(
3513 task_t task,
3514 uint32_t *purgeable_count,
3515 uint32_t *wired_count,
3516 uint32_t *clean_count,
3517 uint32_t *dirty_count,
3518 uint32_t dirty_budget,
3519 uint32_t *shared_count,
3520 int *freezer_error_code,
3521 boolean_t eval_only)
3522{
3523 kern_return_t kr = KERN_SUCCESS;
3524
3525 if (task == TASK_NULL || task == kernel_task)
3526 return (KERN_INVALID_ARGUMENT);
3527
3528 task_lock(task);
3529
3530 while (task->changing_freeze_state) {
3531
3532 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3533 task_unlock(task);
3534 thread_block(THREAD_CONTINUE_NULL);
3535
3536 task_lock(task);
3537 }
3538 if (task->frozen) {
3539 task_unlock(task);
3540 return (KERN_FAILURE);
3541 }
3542 task->changing_freeze_state = TRUE;
3543
3544 task_unlock(task);
3545
3546 kr = vm_map_freeze(task->map,
3547 purgeable_count,
3548 wired_count,
3549 clean_count,
3550 dirty_count,
3551 dirty_budget,
3552 shared_count,
3553 freezer_error_code,
3554 eval_only);
3555
3556 task_lock(task);
3557
3558 if ((kr == KERN_SUCCESS) && (eval_only == FALSE)) {
3559 task->frozen = TRUE;
3560 }
3561
3562 task->changing_freeze_state = FALSE;
3563 thread_wakeup(&task->changing_freeze_state);
3564
3565 task_unlock(task);
3566
3567 if (VM_CONFIG_COMPRESSOR_IS_PRESENT &&
3568 (eval_only == FALSE)) {
3569 vm_wake_compactor_swapper();
3570 /*
3571 * We do an explicit wakeup of the swapout thread here
3572 * because the compact_and_swap routines don't have
3573 * knowledge about these kind of "per-task packed c_segs"
3574 * and so will not be evaluating whether we need to do
3575 * a wakeup there.
3576 */
3577 thread_wakeup((event_t)&c_swapout_list_head);
3578 }
3579
3580 return (kr);
3581}
3582
3583/*
3584 * task_thaw:
3585 *
3586 * Thaw a currently frozen task.
3587 *
3588 * Conditions:
3589 * The caller holds a reference to the task
3590 */
3591kern_return_t
3592task_thaw(
3593 task_t task)
3594{
3595 if (task == TASK_NULL || task == kernel_task)
3596 return (KERN_INVALID_ARGUMENT);
3597
3598 task_lock(task);
3599
3600 while (task->changing_freeze_state) {
3601
3602 assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT);
3603 task_unlock(task);
3604 thread_block(THREAD_CONTINUE_NULL);
3605
3606 task_lock(task);
3607 }
3608 if (!task->frozen) {
3609 task_unlock(task);
3610 return (KERN_FAILURE);
3611 }
3612 task->frozen = FALSE;
3613
3614 task_unlock(task);
3615
3616 return (KERN_SUCCESS);
3617}
3618
3619#endif /* CONFIG_FREEZE */
3620
3621kern_return_t
3622host_security_set_task_token(
3623 host_security_t host_security,
3624 task_t task,
3625 security_token_t sec_token,
3626 audit_token_t audit_token,
3627 host_priv_t host_priv)
3628{
3629 ipc_port_t host_port;
3630 kern_return_t kr;
3631
3632 if (task == TASK_NULL)
3633 return(KERN_INVALID_ARGUMENT);
3634
3635 if (host_security == HOST_NULL)
3636 return(KERN_INVALID_SECURITY);
3637
3638 task_lock(task);
3639 task->sec_token = sec_token;
3640 task->audit_token = audit_token;
3641
3642 task_unlock(task);
3643
3644 if (host_priv != HOST_PRIV_NULL) {
3645 kr = host_get_host_priv_port(host_priv, &host_port);
3646 } else {
3647 kr = host_get_host_port(host_priv_self(), &host_port);
3648 }
3649 assert(kr == KERN_SUCCESS);
3650 kr = task_set_special_port(task, TASK_HOST_PORT, host_port);
3651 return(kr);
3652}
3653
3654kern_return_t
3655task_send_trace_memory(
3656 task_t target_task,
3657 __unused uint32_t pid,
3658 __unused uint64_t uniqueid)
3659{
3660 kern_return_t kr = KERN_INVALID_ARGUMENT;
3661 if (target_task == TASK_NULL)
3662 return (KERN_INVALID_ARGUMENT);
3663
3664#if CONFIG_ATM
3665 kr = atm_send_proc_inspect_notification(target_task,
3666 pid,
3667 uniqueid);
3668
3669#endif
3670 return (kr);
3671}
3672/*
3673 * This routine was added, pretty much exclusively, for registering the
3674 * RPC glue vector for in-kernel short circuited tasks. Rather than
3675 * removing it completely, I have only disabled that feature (which was
3676 * the only feature at the time). It just appears that we are going to
3677 * want to add some user data to tasks in the future (i.e. bsd info,
3678 * task names, etc...), so I left it in the formal task interface.
3679 */
3680kern_return_t
3681task_set_info(
3682 task_t task,
3683 task_flavor_t flavor,
3684 __unused task_info_t task_info_in, /* pointer to IN array */
3685 __unused mach_msg_type_number_t task_info_count)
3686{
3687 if (task == TASK_NULL)
3688 return(KERN_INVALID_ARGUMENT);
3689
3690 switch (flavor) {
3691
3692#if CONFIG_ATM
3693 case TASK_TRACE_MEMORY_INFO:
3694 {
3695 if (task_info_count != TASK_TRACE_MEMORY_INFO_COUNT)
3696 return (KERN_INVALID_ARGUMENT);
3697
3698 assert(task_info_in != NULL);
3699 task_trace_memory_info_t mem_info;
3700 mem_info = (task_trace_memory_info_t) task_info_in;
3701 kern_return_t kr = atm_register_trace_memory(task,
3702 mem_info->user_memory_address,
3703 mem_info->buffer_size);
3704 return kr;
3705 }
3706
3707#endif
3708 default:
3709 return (KERN_INVALID_ARGUMENT);
3710 }
3711 return (KERN_SUCCESS);
3712}
3713
3714int radar_20146450 = 1;
3715kern_return_t
3716task_info(
3717 task_t task,
3718 task_flavor_t flavor,
3719 task_info_t task_info_out,
3720 mach_msg_type_number_t *task_info_count)
3721{
3722 kern_return_t error = KERN_SUCCESS;
3723 mach_msg_type_number_t original_task_info_count;
3724
3725 if (task == TASK_NULL)
3726 return (KERN_INVALID_ARGUMENT);
3727
3728 original_task_info_count = *task_info_count;
3729 task_lock(task);
3730
3731 if ((task != current_task()) && (!task->active)) {
3732 task_unlock(task);
3733 return (KERN_INVALID_ARGUMENT);
3734 }
3735
3736 switch (flavor) {
3737
3738 case TASK_BASIC_INFO_32:
3739 case TASK_BASIC2_INFO_32:
3740#if defined(__arm__) || defined(__arm64__)
3741 case TASK_BASIC_INFO_64:
3742#endif
3743 {
3744 task_basic_info_32_t basic_info;
3745 vm_map_t map;
3746 clock_sec_t secs;
3747 clock_usec_t usecs;
3748
3749 if (*task_info_count < TASK_BASIC_INFO_32_COUNT) {
3750 error = KERN_INVALID_ARGUMENT;
3751 break;
3752 }
3753
3754 basic_info = (task_basic_info_32_t)task_info_out;
3755
3756 map = (task == kernel_task)? kernel_map: task->map;
3757 basic_info->virtual_size = (typeof(basic_info->virtual_size))map->size;
3758 if (flavor == TASK_BASIC2_INFO_32) {
3759 /*
3760 * The "BASIC2" flavor gets the maximum resident
3761 * size instead of the current resident size...
3762 */
3763 basic_info->resident_size = pmap_resident_max(map->pmap);
3764 } else {
3765 basic_info->resident_size = pmap_resident_count(map->pmap);
3766 }
3767 basic_info->resident_size *= PAGE_SIZE;
3768
3769 basic_info->policy = ((task != kernel_task)?
3770 POLICY_TIMESHARE: POLICY_RR);
3771 basic_info->suspend_count = task->user_stop_count;
3772
3773 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3774 basic_info->user_time.seconds =
3775 (typeof(basic_info->user_time.seconds))secs;
3776 basic_info->user_time.microseconds = usecs;
3777
3778 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3779 basic_info->system_time.seconds =
3780 (typeof(basic_info->system_time.seconds))secs;
3781 basic_info->system_time.microseconds = usecs;
3782
3783 *task_info_count = TASK_BASIC_INFO_32_COUNT;
3784 break;
3785 }
3786
3787#if defined(__arm__) || defined(__arm64__)
3788 case TASK_BASIC_INFO_64_2:
3789 {
3790 task_basic_info_64_2_t basic_info;
3791 vm_map_t map;
3792 clock_sec_t secs;
3793 clock_usec_t usecs;
3794
3795 if (*task_info_count < TASK_BASIC_INFO_64_2_COUNT) {
3796 error = KERN_INVALID_ARGUMENT;
3797 break;
3798 }
3799
3800 basic_info = (task_basic_info_64_2_t)task_info_out;
3801
3802 map = (task == kernel_task)? kernel_map: task->map;
3803 basic_info->virtual_size = map->size;
3804 basic_info->resident_size =
3805 (mach_vm_size_t)(pmap_resident_count(map->pmap))
3806 * PAGE_SIZE_64;
3807
3808 basic_info->policy = ((task != kernel_task)?
3809 POLICY_TIMESHARE: POLICY_RR);
3810 basic_info->suspend_count = task->user_stop_count;
3811
3812 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3813 basic_info->user_time.seconds =
3814 (typeof(basic_info->user_time.seconds))secs;
3815 basic_info->user_time.microseconds = usecs;
3816
3817 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3818 basic_info->system_time.seconds =
3819 (typeof(basic_info->system_time.seconds))secs;
3820 basic_info->system_time.microseconds = usecs;
3821
3822 *task_info_count = TASK_BASIC_INFO_64_2_COUNT;
3823 break;
3824 }
3825
3826#else /* defined(__arm__) || defined(__arm64__) */
3827 case TASK_BASIC_INFO_64:
3828 {
3829 task_basic_info_64_t basic_info;
3830 vm_map_t map;
3831 clock_sec_t secs;
3832 clock_usec_t usecs;
3833
3834 if (*task_info_count < TASK_BASIC_INFO_64_COUNT) {
3835 error = KERN_INVALID_ARGUMENT;
3836 break;
3837 }
3838
3839 basic_info = (task_basic_info_64_t)task_info_out;
3840
3841 map = (task == kernel_task)? kernel_map: task->map;
3842 basic_info->virtual_size = map->size;
3843 basic_info->resident_size =
3844 (mach_vm_size_t)(pmap_resident_count(map->pmap))
3845 * PAGE_SIZE_64;
3846
3847 basic_info->policy = ((task != kernel_task)?
3848 POLICY_TIMESHARE: POLICY_RR);
3849 basic_info->suspend_count = task->user_stop_count;
3850
3851 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3852 basic_info->user_time.seconds =
3853 (typeof(basic_info->user_time.seconds))secs;
3854 basic_info->user_time.microseconds = usecs;
3855
3856 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3857 basic_info->system_time.seconds =
3858 (typeof(basic_info->system_time.seconds))secs;
3859 basic_info->system_time.microseconds = usecs;
3860
3861 *task_info_count = TASK_BASIC_INFO_64_COUNT;
3862 break;
3863 }
3864#endif /* defined(__arm__) || defined(__arm64__) */
3865
3866 case MACH_TASK_BASIC_INFO:
3867 {
3868 mach_task_basic_info_t basic_info;
3869 vm_map_t map;
3870 clock_sec_t secs;
3871 clock_usec_t usecs;
3872
3873 if (*task_info_count < MACH_TASK_BASIC_INFO_COUNT) {
3874 error = KERN_INVALID_ARGUMENT;
3875 break;
3876 }
3877
3878 basic_info = (mach_task_basic_info_t)task_info_out;
3879
3880 map = (task == kernel_task) ? kernel_map : task->map;
3881
3882 basic_info->virtual_size = map->size;
3883
3884 basic_info->resident_size =
3885 (mach_vm_size_t)(pmap_resident_count(map->pmap));
3886 basic_info->resident_size *= PAGE_SIZE_64;
3887
3888 basic_info->resident_size_max =
3889 (mach_vm_size_t)(pmap_resident_max(map->pmap));
3890 basic_info->resident_size_max *= PAGE_SIZE_64;
3891
3892 basic_info->policy = ((task != kernel_task) ?
3893 POLICY_TIMESHARE : POLICY_RR);
3894
3895 basic_info->suspend_count = task->user_stop_count;
3896
3897 absolutetime_to_microtime(task->total_user_time, &secs, &usecs);
3898 basic_info->user_time.seconds =
3899 (typeof(basic_info->user_time.seconds))secs;
3900 basic_info->user_time.microseconds = usecs;
3901
3902 absolutetime_to_microtime(task->total_system_time, &secs, &usecs);
3903 basic_info->system_time.seconds =
3904 (typeof(basic_info->system_time.seconds))secs;
3905 basic_info->system_time.microseconds = usecs;
3906
3907 *task_info_count = MACH_TASK_BASIC_INFO_COUNT;
3908 break;
3909 }
3910
3911 case TASK_THREAD_TIMES_INFO:
3912 {
3913 task_thread_times_info_t times_info;
3914 thread_t thread;
3915
3916 if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) {
3917 error = KERN_INVALID_ARGUMENT;
3918 break;
3919 }
3920
3921 times_info = (task_thread_times_info_t) task_info_out;
3922 times_info->user_time.seconds = 0;
3923 times_info->user_time.microseconds = 0;
3924 times_info->system_time.seconds = 0;
3925 times_info->system_time.microseconds = 0;
3926
3927
3928 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3929 time_value_t user_time, system_time;
3930
3931 if (thread->options & TH_OPT_IDLE_THREAD)
3932 continue;
3933
3934 thread_read_times(thread, &user_time, &system_time, NULL);
3935
3936 time_value_add(&times_info->user_time, &user_time);
3937 time_value_add(&times_info->system_time, &system_time);
3938 }
3939
3940 *task_info_count = TASK_THREAD_TIMES_INFO_COUNT;
3941 break;
3942 }
3943
3944 case TASK_ABSOLUTETIME_INFO:
3945 {
3946 task_absolutetime_info_t info;
3947 thread_t thread;
3948
3949 if (*task_info_count < TASK_ABSOLUTETIME_INFO_COUNT) {
3950 error = KERN_INVALID_ARGUMENT;
3951 break;
3952 }
3953
3954 info = (task_absolutetime_info_t)task_info_out;
3955 info->threads_user = info->threads_system = 0;
3956
3957
3958 info->total_user = task->total_user_time;
3959 info->total_system = task->total_system_time;
3960
3961 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3962 uint64_t tval;
3963 spl_t x;
3964
3965 if (thread->options & TH_OPT_IDLE_THREAD)
3966 continue;
3967
3968 x = splsched();
3969 thread_lock(thread);
3970
3971 tval = timer_grab(&thread->user_timer);
3972 info->threads_user += tval;
3973 info->total_user += tval;
3974
3975 tval = timer_grab(&thread->system_timer);
3976 if (thread->precise_user_kernel_time) {
3977 info->threads_system += tval;
3978 info->total_system += tval;
3979 } else {
3980 /* system_timer may represent either sys or user */
3981 info->threads_user += tval;
3982 info->total_user += tval;
3983 }
3984
3985 thread_unlock(thread);
3986 splx(x);
3987 }
3988
3989
3990 *task_info_count = TASK_ABSOLUTETIME_INFO_COUNT;
3991 break;
3992 }
3993
3994 case TASK_DYLD_INFO:
3995 {
3996 task_dyld_info_t info;
3997
3998 /*
3999 * We added the format field to TASK_DYLD_INFO output. For
4000 * temporary backward compatibility, accept the fact that
4001 * clients may ask for the old version - distinquished by the
4002 * size of the expected result structure.
4003 */
4004#define TASK_LEGACY_DYLD_INFO_COUNT \
4005 offsetof(struct task_dyld_info, all_image_info_format)/sizeof(natural_t)
4006
4007 if (*task_info_count < TASK_LEGACY_DYLD_INFO_COUNT) {
4008 error = KERN_INVALID_ARGUMENT;
4009 break;
4010 }
4011
4012 info = (task_dyld_info_t)task_info_out;
4013 info->all_image_info_addr = task->all_image_info_addr;
4014 info->all_image_info_size = task->all_image_info_size;
4015
4016 /* only set format on output for those expecting it */
4017 if (*task_info_count >= TASK_DYLD_INFO_COUNT) {
4018 info->all_image_info_format = task_has_64Bit_addr(task) ?
4019 TASK_DYLD_ALL_IMAGE_INFO_64 :
4020 TASK_DYLD_ALL_IMAGE_INFO_32 ;
4021 *task_info_count = TASK_DYLD_INFO_COUNT;
4022 } else {
4023 *task_info_count = TASK_LEGACY_DYLD_INFO_COUNT;
4024 }
4025 break;
4026 }
4027
4028 case TASK_EXTMOD_INFO:
4029 {
4030 task_extmod_info_t info;
4031 void *p;
4032
4033 if (*task_info_count < TASK_EXTMOD_INFO_COUNT) {
4034 error = KERN_INVALID_ARGUMENT;
4035 break;
4036 }
4037
4038 info = (task_extmod_info_t)task_info_out;
4039
4040 p = get_bsdtask_info(task);
4041 if (p) {
4042 proc_getexecutableuuid(p, info->task_uuid, sizeof(info->task_uuid));
4043 } else {
4044 bzero(info->task_uuid, sizeof(info->task_uuid));
4045 }
4046 info->extmod_statistics = task->extmod_statistics;
4047 *task_info_count = TASK_EXTMOD_INFO_COUNT;
4048
4049 break;
4050 }
4051
4052 case TASK_KERNELMEMORY_INFO:
4053 {
4054 task_kernelmemory_info_t tkm_info;
4055 ledger_amount_t credit, debit;
4056
4057 if (*task_info_count < TASK_KERNELMEMORY_INFO_COUNT) {
4058 error = KERN_INVALID_ARGUMENT;
4059 break;
4060 }
4061
4062 tkm_info = (task_kernelmemory_info_t) task_info_out;
4063 tkm_info->total_palloc = 0;
4064 tkm_info->total_pfree = 0;
4065 tkm_info->total_salloc = 0;
4066 tkm_info->total_sfree = 0;
4067
4068 if (task == kernel_task) {
4069 /*
4070 * All shared allocs/frees from other tasks count against
4071 * the kernel private memory usage. If we are looking up
4072 * info for the kernel task, gather from everywhere.
4073 */
4074 task_unlock(task);
4075
4076 /* start by accounting for all the terminated tasks against the kernel */
4077 tkm_info->total_palloc = tasks_tkm_private.alloc + tasks_tkm_shared.alloc;
4078 tkm_info->total_pfree = tasks_tkm_private.free + tasks_tkm_shared.free;
4079
4080 /* count all other task/thread shared alloc/free against the kernel */
4081 lck_mtx_lock(&tasks_threads_lock);
4082
4083 /* XXX this really shouldn't be using the function parameter 'task' as a local var! */
4084 queue_iterate(&tasks, task, task_t, tasks) {
4085 if (task == kernel_task) {
4086 if (ledger_get_entries(task->ledger,
4087 task_ledgers.tkm_private, &credit,
4088 &debit) == KERN_SUCCESS) {
4089 tkm_info->total_palloc += credit;
4090 tkm_info->total_pfree += debit;
4091 }
4092 }
4093 if (!ledger_get_entries(task->ledger,
4094 task_ledgers.tkm_shared, &credit, &debit)) {
4095 tkm_info->total_palloc += credit;
4096 tkm_info->total_pfree += debit;
4097 }
4098 }
4099 lck_mtx_unlock(&tasks_threads_lock);
4100 } else {
4101 if (!ledger_get_entries(task->ledger,
4102 task_ledgers.tkm_private, &credit, &debit)) {
4103 tkm_info->total_palloc = credit;
4104 tkm_info->total_pfree = debit;
4105 }
4106 if (!ledger_get_entries(task->ledger,
4107 task_ledgers.tkm_shared, &credit, &debit)) {
4108 tkm_info->total_salloc = credit;
4109 tkm_info->total_sfree = debit;
4110 }
4111 task_unlock(task);
4112 }
4113
4114 *task_info_count = TASK_KERNELMEMORY_INFO_COUNT;
4115 return KERN_SUCCESS;
4116 }
4117
4118 /* OBSOLETE */
4119 case TASK_SCHED_FIFO_INFO:
4120 {
4121
4122 if (*task_info_count < POLICY_FIFO_BASE_COUNT) {
4123 error = KERN_INVALID_ARGUMENT;
4124 break;
4125 }
4126
4127 error = KERN_INVALID_POLICY;
4128 break;
4129 }
4130
4131 /* OBSOLETE */
4132 case TASK_SCHED_RR_INFO:
4133 {
4134 policy_rr_base_t rr_base;
4135 uint32_t quantum_time;
4136 uint64_t quantum_ns;
4137
4138 if (*task_info_count < POLICY_RR_BASE_COUNT) {
4139 error = KERN_INVALID_ARGUMENT;
4140 break;
4141 }
4142
4143 rr_base = (policy_rr_base_t) task_info_out;
4144
4145 if (task != kernel_task) {
4146 error = KERN_INVALID_POLICY;
4147 break;
4148 }
4149
4150 rr_base->base_priority = task->priority;
4151
4152 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
4153 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
4154
4155 rr_base->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
4156
4157 *task_info_count = POLICY_RR_BASE_COUNT;
4158 break;
4159 }
4160
4161 /* OBSOLETE */
4162 case TASK_SCHED_TIMESHARE_INFO:
4163 {
4164 policy_timeshare_base_t ts_base;
4165
4166 if (*task_info_count < POLICY_TIMESHARE_BASE_COUNT) {
4167 error = KERN_INVALID_ARGUMENT;
4168 break;
4169 }
4170
4171 ts_base = (policy_timeshare_base_t) task_info_out;
4172
4173 if (task == kernel_task) {
4174 error = KERN_INVALID_POLICY;
4175 break;
4176 }
4177
4178 ts_base->base_priority = task->priority;
4179
4180 *task_info_count = POLICY_TIMESHARE_BASE_COUNT;
4181 break;
4182 }
4183
4184 case TASK_SECURITY_TOKEN:
4185 {
4186 security_token_t *sec_token_p;
4187
4188 if (*task_info_count < TASK_SECURITY_TOKEN_COUNT) {
4189 error = KERN_INVALID_ARGUMENT;
4190 break;
4191 }
4192
4193 sec_token_p = (security_token_t *) task_info_out;
4194
4195 *sec_token_p = task->sec_token;
4196
4197 *task_info_count = TASK_SECURITY_TOKEN_COUNT;
4198 break;
4199 }
4200
4201 case TASK_AUDIT_TOKEN:
4202 {
4203 audit_token_t *audit_token_p;
4204
4205 if (*task_info_count < TASK_AUDIT_TOKEN_COUNT) {
4206 error = KERN_INVALID_ARGUMENT;
4207 break;
4208 }
4209
4210 audit_token_p = (audit_token_t *) task_info_out;
4211
4212 *audit_token_p = task->audit_token;
4213
4214 *task_info_count = TASK_AUDIT_TOKEN_COUNT;
4215 break;
4216 }
4217
4218 case TASK_SCHED_INFO:
4219 error = KERN_INVALID_ARGUMENT;
4220 break;
4221
4222 case TASK_EVENTS_INFO:
4223 {
4224 task_events_info_t events_info;
4225 thread_t thread;
4226
4227 if (*task_info_count < TASK_EVENTS_INFO_COUNT) {
4228 error = KERN_INVALID_ARGUMENT;
4229 break;
4230 }
4231
4232 events_info = (task_events_info_t) task_info_out;
4233
4234
4235 events_info->faults = task->faults;
4236 events_info->pageins = task->pageins;
4237 events_info->cow_faults = task->cow_faults;
4238 events_info->messages_sent = task->messages_sent;
4239 events_info->messages_received = task->messages_received;
4240 events_info->syscalls_mach = task->syscalls_mach;
4241 events_info->syscalls_unix = task->syscalls_unix;
4242
4243 events_info->csw = task->c_switch;
4244
4245 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4246 events_info->csw += thread->c_switch;
4247 events_info->syscalls_mach += thread->syscalls_mach;
4248 events_info->syscalls_unix += thread->syscalls_unix;
4249 }
4250
4251
4252 *task_info_count = TASK_EVENTS_INFO_COUNT;
4253 break;
4254 }
4255 case TASK_AFFINITY_TAG_INFO:
4256 {
4257 if (*task_info_count < TASK_AFFINITY_TAG_INFO_COUNT) {
4258 error = KERN_INVALID_ARGUMENT;
4259 break;
4260 }
4261
4262 error = task_affinity_info(task, task_info_out, task_info_count);
4263 break;
4264 }
4265 case TASK_POWER_INFO:
4266 {
4267 if (*task_info_count < TASK_POWER_INFO_COUNT) {
4268 error = KERN_INVALID_ARGUMENT;
4269 break;
4270 }
4271
4272 task_power_info_locked(task, (task_power_info_t)task_info_out, NULL, NULL);
4273 break;
4274 }
4275
4276 case TASK_POWER_INFO_V2:
4277 {
4278 if (*task_info_count < TASK_POWER_INFO_V2_COUNT_OLD) {
4279 error = KERN_INVALID_ARGUMENT;
4280 break;
4281 }
4282 task_power_info_v2_t tpiv2 = (task_power_info_v2_t) task_info_out;
4283 task_power_info_locked(task, &tpiv2->cpu_energy, &tpiv2->gpu_energy, tpiv2);
4284 break;
4285 }
4286
4287 case TASK_VM_INFO:
4288 case TASK_VM_INFO_PURGEABLE:
4289 {
4290 task_vm_info_t vm_info;
4291 vm_map_t map;
4292
4293 if (*task_info_count < TASK_VM_INFO_REV0_COUNT) {
4294 error = KERN_INVALID_ARGUMENT;
4295 break;
4296 }
4297
4298 vm_info = (task_vm_info_t)task_info_out;
4299
4300 if (task == kernel_task) {
4301 map = kernel_map;
4302 /* no lock */
4303 } else {
4304 map = task->map;
4305 vm_map_lock_read(map);
4306 }
4307
4308 vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size;
4309 vm_info->region_count = map->hdr.nentries;
4310 vm_info->page_size = vm_map_page_size(map);
4311
4312 vm_info->resident_size = pmap_resident_count(map->pmap);
4313 vm_info->resident_size *= PAGE_SIZE;
4314 vm_info->resident_size_peak = pmap_resident_max(map->pmap);
4315 vm_info->resident_size_peak *= PAGE_SIZE;
4316
4317#define _VM_INFO(_name) \
4318 vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE
4319
4320 _VM_INFO(device);
4321 _VM_INFO(device_peak);
4322 _VM_INFO(external);
4323 _VM_INFO(external_peak);
4324 _VM_INFO(internal);
4325 _VM_INFO(internal_peak);
4326 _VM_INFO(reusable);
4327 _VM_INFO(reusable_peak);
4328 _VM_INFO(compressed);
4329 _VM_INFO(compressed_peak);
4330 _VM_INFO(compressed_lifetime);
4331
4332 vm_info->purgeable_volatile_pmap = 0;
4333 vm_info->purgeable_volatile_resident = 0;
4334 vm_info->purgeable_volatile_virtual = 0;
4335 if (task == kernel_task) {
4336 /*
4337 * We do not maintain the detailed stats for the
4338 * kernel_pmap, so just count everything as
4339 * "internal"...
4340 */
4341 vm_info->internal = vm_info->resident_size;
4342 /*
4343 * ... but since the memory held by the VM compressor
4344 * in the kernel address space ought to be attributed
4345 * to user-space tasks, we subtract it from "internal"
4346 * to give memory reporting tools a more accurate idea
4347 * of what the kernel itself is actually using, instead
4348 * of making it look like the kernel is leaking memory
4349 * when the system is under memory pressure.
4350 */
4351 vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT *
4352 PAGE_SIZE);
4353 } else {
4354 mach_vm_size_t volatile_virtual_size;
4355 mach_vm_size_t volatile_resident_size;
4356 mach_vm_size_t volatile_compressed_size;
4357 mach_vm_size_t volatile_pmap_size;
4358 mach_vm_size_t volatile_compressed_pmap_size;
4359 kern_return_t kr;
4360
4361 if (flavor == TASK_VM_INFO_PURGEABLE) {
4362 kr = vm_map_query_volatile(
4363 map,
4364 &volatile_virtual_size,
4365 &volatile_resident_size,
4366 &volatile_compressed_size,
4367 &volatile_pmap_size,
4368 &volatile_compressed_pmap_size);
4369 if (kr == KERN_SUCCESS) {
4370 vm_info->purgeable_volatile_pmap =
4371 volatile_pmap_size;
4372 if (radar_20146450) {
4373 vm_info->compressed -=
4374 volatile_compressed_pmap_size;
4375 }
4376 vm_info->purgeable_volatile_resident =
4377 volatile_resident_size;
4378 vm_info->purgeable_volatile_virtual =
4379 volatile_virtual_size;
4380 }
4381 }
4382 }
4383 *task_info_count = TASK_VM_INFO_REV0_COUNT;
4384
4385 if (original_task_info_count >= TASK_VM_INFO_REV1_COUNT) {
4386 vm_info->phys_footprint =
4387 (mach_vm_size_t) get_task_phys_footprint(task);
4388 *task_info_count = TASK_VM_INFO_REV1_COUNT;
4389 }
4390 if (original_task_info_count >= TASK_VM_INFO_REV2_COUNT) {
4391 vm_info->min_address = map->min_offset;
4392 vm_info->max_address = map->max_offset;
4393 *task_info_count = TASK_VM_INFO_REV2_COUNT;
4394 }
4395
4396 if (task != kernel_task) {
4397 vm_map_unlock_read(map);
4398 }
4399
4400 break;
4401 }
4402
4403 case TASK_WAIT_STATE_INFO:
4404 {
4405 /*
4406 * Deprecated flavor. Currently allowing some results until all users
4407 * stop calling it. The results may not be accurate.
4408 */
4409 task_wait_state_info_t wait_state_info;
4410 uint64_t total_sfi_ledger_val = 0;
4411
4412 if (*task_info_count < TASK_WAIT_STATE_INFO_COUNT) {
4413 error = KERN_INVALID_ARGUMENT;
4414 break;
4415 }
4416
4417 wait_state_info = (task_wait_state_info_t) task_info_out;
4418
4419 wait_state_info->total_wait_state_time = 0;
4420 bzero(wait_state_info->_reserved, sizeof(wait_state_info->_reserved));
4421
4422#if CONFIG_SCHED_SFI
4423 int i, prev_lentry = -1;
4424 int64_t val_credit, val_debit;
4425
4426 for (i = 0; i < MAX_SFI_CLASS_ID; i++){
4427 val_credit =0;
4428 /*
4429 * checking with prev_lentry != entry ensures adjacent classes
4430 * which share the same ledger do not add wait times twice.
4431 * Note: Use ledger() call to get data for each individual sfi class.
4432 */
4433 if (prev_lentry != task_ledgers.sfi_wait_times[i] &&
4434 KERN_SUCCESS == ledger_get_entries(task->ledger,
4435 task_ledgers.sfi_wait_times[i], &val_credit, &val_debit)) {
4436 total_sfi_ledger_val += val_credit;
4437 }
4438 prev_lentry = task_ledgers.sfi_wait_times[i];
4439 }
4440
4441#endif /* CONFIG_SCHED_SFI */
4442 wait_state_info->total_wait_sfi_state_time = total_sfi_ledger_val;
4443 *task_info_count = TASK_WAIT_STATE_INFO_COUNT;
4444
4445 break;
4446 }
4447 case TASK_VM_INFO_PURGEABLE_ACCOUNT:
4448 {
4449#if DEVELOPMENT || DEBUG
4450 pvm_account_info_t acnt_info;
4451
4452 if (*task_info_count < PVM_ACCOUNT_INFO_COUNT) {
4453 error = KERN_INVALID_ARGUMENT;
4454 break;
4455 }
4456
4457 if (task_info_out == NULL) {
4458 error = KERN_INVALID_ARGUMENT;
4459 break;
4460 }
4461
4462 acnt_info = (pvm_account_info_t) task_info_out;
4463
4464 error = vm_purgeable_account(task, acnt_info);
4465
4466 *task_info_count = PVM_ACCOUNT_INFO_COUNT;
4467
4468 break;
4469#else /* DEVELOPMENT || DEBUG */
4470 error = KERN_NOT_SUPPORTED;
4471 break;
4472#endif /* DEVELOPMENT || DEBUG */
4473 }
4474 case TASK_FLAGS_INFO:
4475 {
4476 task_flags_info_t flags_info;
4477
4478 if (*task_info_count < TASK_FLAGS_INFO_COUNT) {
4479 error = KERN_INVALID_ARGUMENT;
4480 break;
4481 }
4482
4483 flags_info = (task_flags_info_t)task_info_out;
4484
4485 /* only publish the 64-bit flag of the task */
4486 flags_info->flags = task->t_flags & (TF_64B_ADDR | TF_64B_DATA);
4487
4488 *task_info_count = TASK_FLAGS_INFO_COUNT;
4489 break;
4490 }
4491
4492 case TASK_DEBUG_INFO_INTERNAL:
4493 {
4494#if DEVELOPMENT || DEBUG
4495 task_debug_info_internal_t dbg_info;
4496 if (*task_info_count < TASK_DEBUG_INFO_INTERNAL_COUNT) {
4497 error = KERN_NOT_SUPPORTED;
4498 break;
4499 }
4500
4501 if (task_info_out == NULL) {
4502 error = KERN_INVALID_ARGUMENT;
4503 break;
4504 }
4505 dbg_info = (task_debug_info_internal_t) task_info_out;
4506 dbg_info->ipc_space_size = 0;
4507 if (task->itk_space){
4508 dbg_info->ipc_space_size = task->itk_space->is_table_size;
4509 }
4510
4511 dbg_info->suspend_count = task->suspend_count;
4512
4513 error = KERN_SUCCESS;
4514 *task_info_count = TASK_DEBUG_INFO_INTERNAL_COUNT;
4515 break;
4516#else /* DEVELOPMENT || DEBUG */
4517 error = KERN_NOT_SUPPORTED;
4518 break;
4519#endif /* DEVELOPMENT || DEBUG */
4520 }
4521 default:
4522 error = KERN_INVALID_ARGUMENT;
4523 }
4524
4525 task_unlock(task);
4526 return (error);
4527}
4528
4529/*
4530 * task_info_from_user
4531 *
4532 * When calling task_info from user space,
4533 * this function will be executed as mig server side
4534 * instead of calling directly into task_info.
4535 * This gives the possibility to perform more security
4536 * checks on task_port.
4537 *
4538 * In the case of TASK_DYLD_INFO, we require the more
4539 * privileged task_port not the less-privileged task_name_port.
4540 *
4541 */
4542kern_return_t
4543task_info_from_user(
4544 mach_port_t task_port,
4545 task_flavor_t flavor,
4546 task_info_t task_info_out,
4547 mach_msg_type_number_t *task_info_count)
4548{
4549 task_t task;
4550 kern_return_t ret;
4551
4552 if (flavor == TASK_DYLD_INFO)
4553 task = convert_port_to_task(task_port);
4554 else
4555 task = convert_port_to_task_name(task_port);
4556
4557 ret = task_info(task, flavor, task_info_out, task_info_count);
4558
4559 task_deallocate(task);
4560
4561 return ret;
4562}
4563
4564/*
4565 * task_power_info
4566 *
4567 * Returns power stats for the task.
4568 * Note: Called with task locked.
4569 */
4570void
4571task_power_info_locked(
4572 task_t task,
4573 task_power_info_t info,
4574 gpu_energy_data_t ginfo,
4575 task_power_info_v2_t infov2)
4576{
4577 thread_t thread;
4578 ledger_amount_t tmp;
4579
4580 task_lock_assert_owned(task);
4581
4582 ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups,
4583 (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp);
4584 ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups,
4585 (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp);
4586
4587 info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1;
4588 info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2;
4589
4590 info->total_user = task->total_user_time;
4591 info->total_system = task->total_system_time;
4592
4593#if CONFIG_EMBEDDED
4594 if (infov2) {
4595 infov2->task_energy = task->task_energy;
4596 }
4597#endif
4598
4599 if (ginfo) {
4600 ginfo->task_gpu_utilisation = task->task_gpu_ns;
4601 }
4602
4603 if (infov2) {
4604 infov2->task_ptime = task->total_ptime;
4605 infov2->task_pset_switches = task->ps_switch;
4606 }
4607
4608 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4609 uint64_t tval;
4610 spl_t x;
4611
4612 if (thread->options & TH_OPT_IDLE_THREAD)
4613 continue;
4614
4615 x = splsched();
4616 thread_lock(thread);
4617
4618 info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
4619 info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
4620
4621#if CONFIG_EMBEDDED
4622 if (infov2) {
4623 infov2->task_energy += ml_energy_stat(thread);
4624 }
4625#endif
4626
4627 tval = timer_grab(&thread->user_timer);
4628 info->total_user += tval;
4629
4630 if (infov2) {
4631 tval = timer_grab(&thread->ptime);
4632 infov2->task_ptime += tval;
4633 infov2->task_pset_switches += thread->ps_switch;
4634 }
4635
4636 tval = timer_grab(&thread->system_timer);
4637 if (thread->precise_user_kernel_time) {
4638 info->total_system += tval;
4639 } else {
4640 /* system_timer may represent either sys or user */
4641 info->total_user += tval;
4642 }
4643
4644 if (ginfo) {
4645 ginfo->task_gpu_utilisation += ml_gpu_stat(thread);
4646 }
4647 thread_unlock(thread);
4648 splx(x);
4649 }
4650}
4651
4652/*
4653 * task_gpu_utilisation
4654 *
4655 * Returns the total gpu time used by the all the threads of the task
4656 * (both dead and alive)
4657 */
4658uint64_t
4659task_gpu_utilisation(
4660 task_t task)
4661{
4662 uint64_t gpu_time = 0;
4663#if !CONFIG_EMBEDDED
4664 thread_t thread;
4665
4666 task_lock(task);
4667 gpu_time += task->task_gpu_ns;
4668
4669 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4670 spl_t x;
4671 x = splsched();
4672 thread_lock(thread);
4673 gpu_time += ml_gpu_stat(thread);
4674 thread_unlock(thread);
4675 splx(x);
4676 }
4677
4678 task_unlock(task);
4679#else /* CONFIG_EMBEDDED */
4680 /* silence compiler warning */
4681 (void)task;
4682#endif /* !CONFIG_EMBEDDED */
4683 return gpu_time;
4684}
4685
4686/*
4687 * task_energy
4688 *
4689 * Returns the total energy used by the all the threads of the task
4690 * (both dead and alive)
4691 */
4692uint64_t
4693task_energy(
4694 task_t task)
4695{
4696 uint64_t energy = 0;
4697 thread_t thread;
4698
4699 task_lock(task);
4700 energy += task->task_energy;
4701
4702 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4703 spl_t x;
4704 x = splsched();
4705 thread_lock(thread);
4706 energy += ml_energy_stat(thread);
4707 thread_unlock(thread);
4708 splx(x);
4709 }
4710
4711 task_unlock(task);
4712 return energy;
4713}
4714
4715
4716uint64_t
4717task_cpu_ptime(
4718 __unused task_t task)
4719{
4720 return 0;
4721}
4722
4723
4724/* This function updates the cpu time in the arrays for each
4725 * effective and requested QoS class
4726 */
4727void
4728task_update_cpu_time_qos_stats(
4729 task_t task,
4730 uint64_t *eqos_stats,
4731 uint64_t *rqos_stats)
4732{
4733 if (!eqos_stats && !rqos_stats) {
4734 return;
4735 }
4736
4737 task_lock(task);
4738 thread_t thread;
4739 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4740 if (thread->options & TH_OPT_IDLE_THREAD) {
4741 continue;
4742 }
4743
4744 thread_update_qos_cpu_time(thread);
4745 }
4746
4747 if (eqos_stats) {
4748 eqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_eqos_stats.cpu_time_qos_default;
4749 eqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_eqos_stats.cpu_time_qos_maintenance;
4750 eqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_eqos_stats.cpu_time_qos_background;
4751 eqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_eqos_stats.cpu_time_qos_utility;
4752 eqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_eqos_stats.cpu_time_qos_legacy;
4753 eqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_eqos_stats.cpu_time_qos_user_initiated;
4754 eqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_eqos_stats.cpu_time_qos_user_interactive;
4755 }
4756
4757 if (rqos_stats) {
4758 rqos_stats[THREAD_QOS_DEFAULT] += task->cpu_time_rqos_stats.cpu_time_qos_default;
4759 rqos_stats[THREAD_QOS_MAINTENANCE] += task->cpu_time_rqos_stats.cpu_time_qos_maintenance;
4760 rqos_stats[THREAD_QOS_BACKGROUND] += task->cpu_time_rqos_stats.cpu_time_qos_background;
4761 rqos_stats[THREAD_QOS_UTILITY] += task->cpu_time_rqos_stats.cpu_time_qos_utility;
4762 rqos_stats[THREAD_QOS_LEGACY] += task->cpu_time_rqos_stats.cpu_time_qos_legacy;
4763 rqos_stats[THREAD_QOS_USER_INITIATED] += task->cpu_time_rqos_stats.cpu_time_qos_user_initiated;
4764 rqos_stats[THREAD_QOS_USER_INTERACTIVE] += task->cpu_time_rqos_stats.cpu_time_qos_user_interactive;
4765 }
4766
4767 task_unlock(task);
4768}
4769
4770kern_return_t
4771task_purgable_info(
4772 task_t task,
4773 task_purgable_info_t *stats)
4774{
4775 if (task == TASK_NULL || stats == NULL)
4776 return KERN_INVALID_ARGUMENT;
4777 /* Take task reference */
4778 task_reference(task);
4779 vm_purgeable_stats((vm_purgeable_info_t)stats, task);
4780 /* Drop task reference */
4781 task_deallocate(task);
4782 return KERN_SUCCESS;
4783}
4784
4785void
4786task_vtimer_set(
4787 task_t task,
4788 integer_t which)
4789{
4790 thread_t thread;
4791 spl_t x;
4792
4793 task_lock(task);
4794
4795 task->vtimers |= which;
4796
4797 switch (which) {
4798
4799 case TASK_VTIMER_USER:
4800 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4801 x = splsched();
4802 thread_lock(thread);
4803 if (thread->precise_user_kernel_time)
4804 thread->vtimer_user_save = timer_grab(&thread->user_timer);
4805 else
4806 thread->vtimer_user_save = timer_grab(&thread->system_timer);
4807 thread_unlock(thread);
4808 splx(x);
4809 }
4810 break;
4811
4812 case TASK_VTIMER_PROF:
4813 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4814 x = splsched();
4815 thread_lock(thread);
4816 thread->vtimer_prof_save = timer_grab(&thread->user_timer);
4817 thread->vtimer_prof_save += timer_grab(&thread->system_timer);
4818 thread_unlock(thread);
4819 splx(x);
4820 }
4821 break;
4822
4823 case TASK_VTIMER_RLIM:
4824 queue_iterate(&task->threads, thread, thread_t, task_threads) {
4825 x = splsched();
4826 thread_lock(thread);
4827 thread->vtimer_rlim_save = timer_grab(&thread->user_timer);
4828 thread->vtimer_rlim_save += timer_grab(&thread->system_timer);
4829 thread_unlock(thread);
4830 splx(x);
4831 }
4832 break;
4833 }
4834
4835 task_unlock(task);
4836}
4837
4838void
4839task_vtimer_clear(
4840 task_t task,
4841 integer_t which)
4842{
4843 assert(task == current_task());
4844
4845 task_lock(task);
4846
4847 task->vtimers &= ~which;
4848
4849 task_unlock(task);
4850}
4851
4852void
4853task_vtimer_update(
4854__unused
4855 task_t task,
4856 integer_t which,
4857 uint32_t *microsecs)
4858{
4859 thread_t thread = current_thread();
4860 uint32_t tdelt = 0;
4861 clock_sec_t secs = 0;
4862 uint64_t tsum;
4863
4864 assert(task == current_task());
4865
4866 spl_t s = splsched();
4867 thread_lock(thread);
4868
4869 if ((task->vtimers & which) != (uint32_t)which) {
4870 thread_unlock(thread);
4871 splx(s);
4872 return;
4873 }
4874
4875 switch (which) {
4876
4877 case TASK_VTIMER_USER:
4878 if (thread->precise_user_kernel_time) {
4879 tdelt = (uint32_t)timer_delta(&thread->user_timer,
4880 &thread->vtimer_user_save);
4881 } else {
4882 tdelt = (uint32_t)timer_delta(&thread->system_timer,
4883 &thread->vtimer_user_save);
4884 }
4885 absolutetime_to_microtime(tdelt, &secs, microsecs);
4886 break;
4887
4888 case TASK_VTIMER_PROF:
4889 tsum = timer_grab(&thread->user_timer);
4890 tsum += timer_grab(&thread->system_timer);
4891 tdelt = (uint32_t)(tsum - thread->vtimer_prof_save);
4892 absolutetime_to_microtime(tdelt, &secs, microsecs);
4893 /* if the time delta is smaller than a usec, ignore */
4894 if (*microsecs != 0)
4895 thread->vtimer_prof_save = tsum;
4896 break;
4897
4898 case TASK_VTIMER_RLIM:
4899 tsum = timer_grab(&thread->user_timer);
4900 tsum += timer_grab(&thread->system_timer);
4901 tdelt = (uint32_t)(tsum - thread->vtimer_rlim_save);
4902 thread->vtimer_rlim_save = tsum;
4903 absolutetime_to_microtime(tdelt, &secs, microsecs);
4904 break;
4905 }
4906
4907 thread_unlock(thread);
4908 splx(s);
4909}
4910
4911/*
4912 * task_assign:
4913 *
4914 * Change the assigned processor set for the task
4915 */
4916kern_return_t
4917task_assign(
4918 __unused task_t task,
4919 __unused processor_set_t new_pset,
4920 __unused boolean_t assign_threads)
4921{
4922 return(KERN_FAILURE);
4923}
4924
4925/*
4926 * task_assign_default:
4927 *
4928 * Version of task_assign to assign to default processor set.
4929 */
4930kern_return_t
4931task_assign_default(
4932 task_t task,
4933 boolean_t assign_threads)
4934{
4935 return (task_assign(task, &pset0, assign_threads));
4936}
4937
4938/*
4939 * task_get_assignment
4940 *
4941 * Return name of processor set that task is assigned to.
4942 */
4943kern_return_t
4944task_get_assignment(
4945 task_t task,
4946 processor_set_t *pset)
4947{
4948 if (!task || !task->active)
4949 return KERN_FAILURE;
4950
4951 *pset = &pset0;
4952
4953 return KERN_SUCCESS;
4954}
4955
4956uint64_t
4957get_task_dispatchqueue_offset(
4958 task_t task)
4959{
4960 return task->dispatchqueue_offset;
4961}
4962
4963/*
4964 * task_policy
4965 *
4966 * Set scheduling policy and parameters, both base and limit, for
4967 * the given task. Policy must be a policy which is enabled for the
4968 * processor set. Change contained threads if requested.
4969 */
4970kern_return_t
4971task_policy(
4972 __unused task_t task,
4973 __unused policy_t policy_id,
4974 __unused policy_base_t base,
4975 __unused mach_msg_type_number_t count,
4976 __unused boolean_t set_limit,
4977 __unused boolean_t change)
4978{
4979 return(KERN_FAILURE);
4980}
4981
4982/*
4983 * task_set_policy
4984 *
4985 * Set scheduling policy and parameters, both base and limit, for
4986 * the given task. Policy can be any policy implemented by the
4987 * processor set, whether enabled or not. Change contained threads
4988 * if requested.
4989 */
4990kern_return_t
4991task_set_policy(
4992 __unused task_t task,
4993 __unused processor_set_t pset,
4994 __unused policy_t policy_id,
4995 __unused policy_base_t base,
4996 __unused mach_msg_type_number_t base_count,
4997 __unused policy_limit_t limit,
4998 __unused mach_msg_type_number_t limit_count,
4999 __unused boolean_t change)
5000{
5001 return(KERN_FAILURE);
5002}
5003
5004kern_return_t
5005task_set_ras_pc(
5006 __unused task_t task,
5007 __unused vm_offset_t pc,
5008 __unused vm_offset_t endpc)
5009{
5010 return KERN_FAILURE;
5011}
5012
5013void
5014task_synchronizer_destroy_all(task_t task)
5015{
5016 /*
5017 * Destroy owned semaphores
5018 */
5019 semaphore_destroy_all(task);
5020}
5021
5022/*
5023 * Install default (machine-dependent) initial thread state
5024 * on the task. Subsequent thread creation will have this initial
5025 * state set on the thread by machine_thread_inherit_taskwide().
5026 * Flavors and structures are exactly the same as those to thread_set_state()
5027 */
5028kern_return_t
5029task_set_state(
5030 task_t task,
5031 int flavor,
5032 thread_state_t state,
5033 mach_msg_type_number_t state_count)
5034{
5035 kern_return_t ret;
5036
5037 if (task == TASK_NULL) {
5038 return (KERN_INVALID_ARGUMENT);
5039 }
5040
5041 task_lock(task);
5042
5043 if (!task->active) {
5044 task_unlock(task);
5045 return (KERN_FAILURE);
5046 }
5047
5048 ret = machine_task_set_state(task, flavor, state, state_count);
5049
5050 task_unlock(task);
5051 return ret;
5052}
5053
5054/*
5055 * Examine the default (machine-dependent) initial thread state
5056 * on the task, as set by task_set_state(). Flavors and structures
5057 * are exactly the same as those passed to thread_get_state().
5058 */
5059kern_return_t
5060task_get_state(
5061 task_t task,
5062 int flavor,
5063 thread_state_t state,
5064 mach_msg_type_number_t *state_count)
5065{
5066 kern_return_t ret;
5067
5068 if (task == TASK_NULL) {
5069 return (KERN_INVALID_ARGUMENT);
5070 }
5071
5072 task_lock(task);
5073
5074 if (!task->active) {
5075 task_unlock(task);
5076 return (KERN_FAILURE);
5077 }
5078
5079 ret = machine_task_get_state(task, flavor, state, state_count);
5080
5081 task_unlock(task);
5082 return ret;
5083}
5084
5085
5086static kern_return_t __attribute__((noinline,not_tail_called))
5087PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(
5088 mach_exception_code_t code,
5089 mach_exception_subcode_t subcode,
5090 void *reason)
5091{
5092#ifdef MACH_BSD
5093 if (1 == proc_selfpid())
5094 return KERN_NOT_SUPPORTED; // initproc is immune
5095#endif
5096 mach_exception_data_type_t codes[EXCEPTION_CODE_MAX] = {
5097 [0] = code,
5098 [1] = subcode,
5099 };
5100 task_t task = current_task();
5101 kern_return_t kr;
5102
5103 /* (See jetsam-related comments below) */
5104
5105 proc_memstat_terminated(task->bsd_info, TRUE);
5106 kr = task_enqueue_exception_with_corpse(task, EXC_GUARD, codes, 2, reason);
5107 proc_memstat_terminated(task->bsd_info, FALSE);
5108 return kr;
5109}
5110
5111kern_return_t
5112task_violated_guard(
5113 mach_exception_code_t code,
5114 mach_exception_subcode_t subcode,
5115 void *reason)
5116{
5117 return PROC_VIOLATED_GUARD__SEND_EXC_GUARD_AND_SUSPEND(code, subcode, reason);
5118}
5119
5120
5121#if CONFIG_MEMORYSTATUS
5122
5123boolean_t
5124task_get_memlimit_is_active(task_t task)
5125{
5126 assert (task != NULL);
5127
5128 if (task->memlimit_is_active == 1) {
5129 return(TRUE);
5130 } else {
5131 return (FALSE);
5132 }
5133}
5134
5135void
5136task_set_memlimit_is_active(task_t task, boolean_t memlimit_is_active)
5137{
5138 assert (task != NULL);
5139
5140 if (memlimit_is_active) {
5141 task->memlimit_is_active = 1;
5142 } else {
5143 task->memlimit_is_active = 0;
5144 }
5145}
5146
5147boolean_t
5148task_get_memlimit_is_fatal(task_t task)
5149{
5150 assert(task != NULL);
5151
5152 if (task->memlimit_is_fatal == 1) {
5153 return(TRUE);
5154 } else {
5155 return(FALSE);
5156 }
5157}
5158
5159void
5160task_set_memlimit_is_fatal(task_t task, boolean_t memlimit_is_fatal)
5161{
5162 assert (task != NULL);
5163
5164 if (memlimit_is_fatal) {
5165 task->memlimit_is_fatal = 1;
5166 } else {
5167 task->memlimit_is_fatal = 0;
5168 }
5169}
5170
5171boolean_t
5172task_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
5173{
5174 boolean_t triggered = FALSE;
5175
5176 assert(task == current_task());
5177
5178 /*
5179 * Returns true, if task has already triggered an exc_resource exception.
5180 */
5181
5182 if (memlimit_is_active) {
5183 triggered = (task->memlimit_active_exc_resource ? TRUE : FALSE);
5184 } else {
5185 triggered = (task->memlimit_inactive_exc_resource ? TRUE : FALSE);
5186 }
5187
5188 return(triggered);
5189}
5190
5191void
5192task_mark_has_triggered_exc_resource(task_t task, boolean_t memlimit_is_active)
5193{
5194 assert(task == current_task());
5195
5196 /*
5197 * We allow one exc_resource per process per active/inactive limit.
5198 * The limit's fatal attribute does not come into play.
5199 */
5200
5201 if (memlimit_is_active) {
5202 task->memlimit_active_exc_resource = 1;
5203 } else {
5204 task->memlimit_inactive_exc_resource = 1;
5205 }
5206}
5207
5208#define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation
5209
5210void __attribute__((noinline))
5211PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND(int max_footprint_mb, boolean_t is_fatal)
5212{
5213 task_t task = current_task();
5214 int pid = 0;
5215 const char *procname = "unknown";
5216 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
5217 boolean_t send_sync_exc_resource = FALSE;
5218
5219#ifdef MACH_BSD
5220 pid = proc_selfpid();
5221
5222 if (pid == 1) {
5223 /*
5224 * Cannot have ReportCrash analyzing
5225 * a suspended initproc.
5226 */
5227 return;
5228 }
5229
5230 if (task->bsd_info != NULL) {
5231 procname = proc_name_address(current_task()->bsd_info);
5232 send_sync_exc_resource = proc_send_synchronous_EXC_RESOURCE(current_task()->bsd_info);
5233 }
5234#endif
5235#if CONFIG_COREDUMP
5236 if (hwm_user_cores) {
5237 int error;
5238 uint64_t starttime, end;
5239 clock_sec_t secs = 0;
5240 uint32_t microsecs = 0;
5241
5242 starttime = mach_absolute_time();
5243 /*
5244 * Trigger a coredump of this process. Don't proceed unless we know we won't
5245 * be filling up the disk; and ignore the core size resource limit for this
5246 * core file.
5247 */
5248 if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, COREDUMP_IGNORE_ULIMIT)) != 0) {
5249 printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error);
5250 }
5251 /*
5252 * coredump() leaves the task suspended.
5253 */
5254 task_resume_internal(current_task());
5255
5256 end = mach_absolute_time();
5257 absolutetime_to_microtime(end - starttime, &secs, &microsecs);
5258 printf("coredump of %s[%d] taken in %d secs %d microsecs\n",
5259 proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs);
5260 }
5261#endif /* CONFIG_COREDUMP */
5262
5263 if (disable_exc_resource) {
5264 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5265 "supressed by a boot-arg.\n", procname, pid, max_footprint_mb);
5266 return;
5267 }
5268
5269 /*
5270 * A task that has triggered an EXC_RESOURCE, should not be
5271 * jetsammed when the device is under memory pressure. Here
5272 * we set the P_MEMSTAT_TERMINATED flag so that the process
5273 * will be skipped if the memorystatus_thread wakes up.
5274 */
5275 proc_memstat_terminated(current_task()->bsd_info, TRUE);
5276
5277 code[0] = code[1] = 0;
5278 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY);
5279 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK);
5280 EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb);
5281
5282 /*
5283 * Do not generate a corpse fork if the violation is a fatal one
5284 * or the process wants synchronous EXC_RESOURCE exceptions.
5285 */
5286 if (is_fatal || send_sync_exc_resource || exc_via_corpse_forking == 0) {
5287 /* Do not send a EXC_RESOURCE if corpse_for_fatal_memkill is set */
5288 if (send_sync_exc_resource || corpse_for_fatal_memkill == 0) {
5289 /*
5290 * Use the _internal_ variant so that no user-space
5291 * process can resume our task from under us.
5292 */
5293 task_suspend_internal(task);
5294 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5295 task_resume_internal(task);
5296 }
5297 } else {
5298 if (audio_active) {
5299 printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE "
5300 "supressed due to audio playback.\n", procname, pid, max_footprint_mb);
5301 } else {
5302 task_enqueue_exception_with_corpse(task, EXC_RESOURCE,
5303 code, EXCEPTION_CODE_MAX, NULL);
5304 }
5305 }
5306
5307 /*
5308 * After the EXC_RESOURCE has been handled, we must clear the
5309 * P_MEMSTAT_TERMINATED flag so that the process can again be
5310 * considered for jetsam if the memorystatus_thread wakes up.
5311 */
5312 proc_memstat_terminated(current_task()->bsd_info, FALSE); /* clear the flag */
5313}
5314
5315/*
5316 * Callback invoked when a task exceeds its physical footprint limit.
5317 */
5318void
5319task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1)
5320{
5321 ledger_amount_t max_footprint, max_footprint_mb;
5322 task_t task;
5323 boolean_t is_warning;
5324 boolean_t memlimit_is_active;
5325 boolean_t memlimit_is_fatal;
5326
5327 if (warning == LEDGER_WARNING_DIPPED_BELOW) {
5328 /*
5329 * Task memory limits only provide a warning on the way up.
5330 */
5331 return;
5332 } else if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5333 /*
5334 * This task is in danger of violating a memory limit,
5335 * It has exceeded a percentage level of the limit.
5336 */
5337 is_warning = TRUE;
5338 } else {
5339 /*
5340 * The task has exceeded the physical footprint limit.
5341 * This is not a warning but a true limit violation.
5342 */
5343 is_warning = FALSE;
5344 }
5345
5346 task = current_task();
5347
5348 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &max_footprint);
5349 max_footprint_mb = max_footprint >> 20;
5350
5351 memlimit_is_active = task_get_memlimit_is_active(task);
5352 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
5353
5354 /*
5355 * If this is an actual violation (not a warning), then generate EXC_RESOURCE exception.
5356 * We only generate the exception once per process per memlimit (active/inactive limit).
5357 * To enforce this, we monitor state based on the memlimit's active/inactive attribute
5358 * and we disable it by marking that memlimit as exception triggered.
5359 */
5360 if ((is_warning == FALSE) && (!task_has_triggered_exc_resource(task, memlimit_is_active))) {
5361 PROC_CROSSED_HIGH_WATERMARK__SEND_EXC_RESOURCE_AND_SUSPEND((int)max_footprint_mb, memlimit_is_fatal);
5362 memorystatus_log_exception((int)max_footprint_mb, memlimit_is_active, memlimit_is_fatal);
5363 task_mark_has_triggered_exc_resource(task, memlimit_is_active);
5364 }
5365
5366 memorystatus_on_ledger_footprint_exceeded(is_warning, memlimit_is_active, memlimit_is_fatal);
5367}
5368
5369extern int proc_check_footprint_priv(void);
5370
5371kern_return_t
5372task_set_phys_footprint_limit(
5373 task_t task,
5374 int new_limit_mb,
5375 int *old_limit_mb)
5376{
5377 kern_return_t error;
5378
5379 boolean_t memlimit_is_active;
5380 boolean_t memlimit_is_fatal;
5381
5382 if ((error = proc_check_footprint_priv())) {
5383 return (KERN_NO_ACCESS);
5384 }
5385
5386 /*
5387 * This call should probably be obsoleted.
5388 * But for now, we default to current state.
5389 */
5390 memlimit_is_active = task_get_memlimit_is_active(task);
5391 memlimit_is_fatal = task_get_memlimit_is_fatal(task);
5392
5393 return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, memlimit_is_active, memlimit_is_fatal);
5394}
5395
5396kern_return_t
5397task_convert_phys_footprint_limit(
5398 int limit_mb,
5399 int *converted_limit_mb)
5400{
5401 if (limit_mb == -1) {
5402 /*
5403 * No limit
5404 */
5405 if (max_task_footprint != 0) {
5406 *converted_limit_mb = (int)(max_task_footprint / 1024 / 1024); /* bytes to MB */
5407 } else {
5408 *converted_limit_mb = (int)(LEDGER_LIMIT_INFINITY >> 20);
5409 }
5410 } else {
5411 /* nothing to convert */
5412 *converted_limit_mb = limit_mb;
5413 }
5414 return (KERN_SUCCESS);
5415}
5416
5417
5418kern_return_t
5419task_set_phys_footprint_limit_internal(
5420 task_t task,
5421 int new_limit_mb,
5422 int *old_limit_mb,
5423 boolean_t memlimit_is_active,
5424 boolean_t memlimit_is_fatal)
5425{
5426 ledger_amount_t old;
5427
5428 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old);
5429
5430 /*
5431 * Check that limit >> 20 will not give an "unexpected" 32-bit
5432 * result. There are, however, implicit assumptions that -1 mb limit
5433 * equates to LEDGER_LIMIT_INFINITY.
5434 */
5435 assert(((old & 0xFFF0000000000000LL) == 0) || (old == LEDGER_LIMIT_INFINITY));
5436
5437 if (old_limit_mb) {
5438 *old_limit_mb = (int)(old >> 20);
5439 }
5440
5441 if (new_limit_mb == -1) {
5442 /*
5443 * Caller wishes to remove the limit.
5444 */
5445 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5446 max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY,
5447 max_task_footprint ? max_task_footprint_warning_level : 0);
5448
5449 task_lock(task);
5450 task_set_memlimit_is_active(task, memlimit_is_active);
5451 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
5452 task_unlock(task);
5453
5454 return (KERN_SUCCESS);
5455 }
5456
5457#ifdef CONFIG_NOMONITORS
5458 return (KERN_SUCCESS);
5459#endif /* CONFIG_NOMONITORS */
5460
5461 task_lock(task);
5462
5463 if ((memlimit_is_active == task_get_memlimit_is_active(task)) &&
5464 (memlimit_is_fatal == task_get_memlimit_is_fatal(task)) &&
5465 (((ledger_amount_t)new_limit_mb << 20) == old)) {
5466 /*
5467 * memlimit state is not changing
5468 */
5469 task_unlock(task);
5470 return(KERN_SUCCESS);
5471 }
5472
5473 task_set_memlimit_is_active(task, memlimit_is_active);
5474 task_set_memlimit_is_fatal(task, memlimit_is_fatal);
5475
5476 ledger_set_limit(task->ledger, task_ledgers.phys_footprint,
5477 (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL);
5478
5479 if (task == current_task()) {
5480 ledger_check_new_balance(current_thread(), task->ledger,
5481 task_ledgers.phys_footprint);
5482 }
5483
5484 task_unlock(task);
5485
5486 return (KERN_SUCCESS);
5487}
5488
5489kern_return_t
5490task_get_phys_footprint_limit(
5491 task_t task,
5492 int *limit_mb)
5493{
5494 ledger_amount_t limit;
5495
5496 ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit);
5497 /*
5498 * Check that limit >> 20 will not give an "unexpected" signed, 32-bit
5499 * result. There are, however, implicit assumptions that -1 mb limit
5500 * equates to LEDGER_LIMIT_INFINITY.
5501 */
5502 assert(((limit & 0xFFF0000000000000LL) == 0) || (limit == LEDGER_LIMIT_INFINITY));
5503 *limit_mb = (int)(limit >> 20);
5504
5505 return (KERN_SUCCESS);
5506}
5507#else /* CONFIG_MEMORYSTATUS */
5508kern_return_t
5509task_set_phys_footprint_limit(
5510 __unused task_t task,
5511 __unused int new_limit_mb,
5512 __unused int *old_limit_mb)
5513{
5514 return (KERN_FAILURE);
5515}
5516
5517kern_return_t
5518task_get_phys_footprint_limit(
5519 __unused task_t task,
5520 __unused int *limit_mb)
5521{
5522 return (KERN_FAILURE);
5523}
5524#endif /* CONFIG_MEMORYSTATUS */
5525
5526void
5527task_set_thread_limit(task_t task, uint16_t thread_limit)
5528{
5529 assert(task != kernel_task);
5530 if (thread_limit <= TASK_MAX_THREAD_LIMIT) {
5531 task_lock(task);
5532 task->task_thread_limit = thread_limit;
5533 task_unlock(task);
5534 }
5535}
5536
5537/*
5538 * We need to export some functions to other components that
5539 * are currently implemented in macros within the osfmk
5540 * component. Just export them as functions of the same name.
5541 */
5542boolean_t is_kerneltask(task_t t)
5543{
5544 if (t == kernel_task)
5545 return (TRUE);
5546
5547 return (FALSE);
5548}
5549
5550boolean_t is_corpsetask(task_t t)
5551{
5552 return (task_is_a_corpse(t));
5553}
5554
5555#undef current_task
5556task_t current_task(void);
5557task_t current_task(void)
5558{
5559 return (current_task_fast());
5560}
5561
5562#undef task_reference
5563void task_reference(task_t task);
5564void
5565task_reference(
5566 task_t task)
5567{
5568 if (task != TASK_NULL)
5569 task_reference_internal(task);
5570}
5571
5572/* defined in bsd/kern/kern_prot.c */
5573extern int get_audit_token_pid(audit_token_t *audit_token);
5574
5575int task_pid(task_t task)
5576{
5577 if (task)
5578 return get_audit_token_pid(&task->audit_token);
5579 return -1;
5580}
5581
5582
5583/*
5584 * This routine finds a thread in a task by its unique id
5585 * Returns a referenced thread or THREAD_NULL if the thread was not found
5586 *
5587 * TODO: This is super inefficient - it's an O(threads in task) list walk!
5588 * We should make a tid hash, or transition all tid clients to thread ports
5589 *
5590 * Precondition: No locks held (will take task lock)
5591 */
5592thread_t
5593task_findtid(task_t task, uint64_t tid)
5594{
5595 thread_t self = current_thread();
5596 thread_t found_thread = THREAD_NULL;
5597 thread_t iter_thread = THREAD_NULL;
5598
5599 /* Short-circuit the lookup if we're looking up ourselves */
5600 if (tid == self->thread_id || tid == TID_NULL) {
5601 assert(self->task == task);
5602
5603 thread_reference(self);
5604
5605 return self;
5606 }
5607
5608 task_lock(task);
5609
5610 queue_iterate(&task->threads, iter_thread, thread_t, task_threads) {
5611 if (iter_thread->thread_id == tid) {
5612 found_thread = iter_thread;
5613 thread_reference(found_thread);
5614 break;
5615 }
5616 }
5617
5618 task_unlock(task);
5619
5620 return (found_thread);
5621}
5622
5623int pid_from_task(task_t task)
5624{
5625 int pid = -1;
5626
5627 if (task->bsd_info) {
5628 pid = proc_pid(task->bsd_info);
5629 } else {
5630 pid = task_pid(task);
5631 }
5632
5633 return pid;
5634}
5635
5636/*
5637 * Control the CPU usage monitor for a task.
5638 */
5639kern_return_t
5640task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags)
5641{
5642 int error = KERN_SUCCESS;
5643
5644 if (*flags & CPUMON_MAKE_FATAL) {
5645 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON;
5646 } else {
5647 error = KERN_INVALID_ARGUMENT;
5648 }
5649
5650 return error;
5651}
5652
5653/*
5654 * Control the wakeups monitor for a task.
5655 */
5656kern_return_t
5657task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz)
5658{
5659 ledger_t ledger = task->ledger;
5660
5661 task_lock(task);
5662 if (*flags & WAKEMON_GET_PARAMS) {
5663 ledger_amount_t limit;
5664 uint64_t period;
5665
5666 ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit);
5667 ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period);
5668
5669 if (limit != LEDGER_LIMIT_INFINITY) {
5670 /*
5671 * An active limit means the wakeups monitor is enabled.
5672 */
5673 *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC));
5674 *flags = WAKEMON_ENABLE;
5675 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) {
5676 *flags |= WAKEMON_MAKE_FATAL;
5677 }
5678 } else {
5679 *flags = WAKEMON_DISABLE;
5680 *rate_hz = -1;
5681 }
5682
5683 /*
5684 * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored.
5685 */
5686 task_unlock(task);
5687 return KERN_SUCCESS;
5688 }
5689
5690 if (*flags & WAKEMON_ENABLE) {
5691 if (*flags & WAKEMON_SET_DEFAULTS) {
5692 *rate_hz = task_wakeups_monitor_rate;
5693 }
5694
5695#ifndef CONFIG_NOMONITORS
5696 if (*flags & WAKEMON_MAKE_FATAL) {
5697 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5698 }
5699#endif /* CONFIG_NOMONITORS */
5700
5701 if (*rate_hz <= 0) {
5702 task_unlock(task);
5703 return KERN_INVALID_ARGUMENT;
5704 }
5705
5706#ifndef CONFIG_NOMONITORS
5707 ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval,
5708 task_wakeups_monitor_ustackshots_trigger_pct);
5709 ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC);
5710 ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups);
5711#endif /* CONFIG_NOMONITORS */
5712 } else if (*flags & WAKEMON_DISABLE) {
5713 /*
5714 * Caller wishes to disable wakeups monitor on the task.
5715 *
5716 * Disable telemetry if it was triggered by the wakeups monitor, and
5717 * remove the limit & callback on the wakeups ledger entry.
5718 */
5719#if CONFIG_TELEMETRY
5720 telemetry_task_ctl_locked(task, TF_WAKEMON_WARNING, 0);
5721#endif
5722 ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups);
5723 ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups);
5724 }
5725
5726 task_unlock(task);
5727 return KERN_SUCCESS;
5728}
5729
5730void
5731task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1)
5732{
5733 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
5734#if CONFIG_TELEMETRY
5735 /*
5736 * This task is in danger of violating the wakeups monitor. Enable telemetry on this task
5737 * so there are micro-stackshots available if and when EXC_RESOURCE is triggered.
5738 */
5739 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1);
5740#endif
5741 return;
5742 }
5743
5744#if CONFIG_TELEMETRY
5745 /*
5746 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
5747 * exceeded the limit, turn telemetry off for the task.
5748 */
5749 telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0);
5750#endif
5751
5752 if (warning == 0) {
5753 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS();
5754 }
5755}
5756
5757void __attribute__((noinline))
5758SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS(void)
5759{
5760 task_t task = current_task();
5761 int pid = 0;
5762 const char *procname = "unknown";
5763 boolean_t fatal;
5764 kern_return_t kr;
5765#ifdef EXC_RESOURCE_MONITORS
5766 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
5767#endif /* EXC_RESOURCE_MONITORS */
5768 struct ledger_entry_info lei;
5769
5770#ifdef MACH_BSD
5771 pid = proc_selfpid();
5772 if (task->bsd_info != NULL)
5773 procname = proc_name_address(current_task()->bsd_info);
5774#endif
5775
5776 ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei);
5777
5778 /*
5779 * Disable the exception notification so we don't overwhelm
5780 * the listener with an endless stream of redundant exceptions.
5781 * TODO: detect whether another thread is already reporting the violation.
5782 */
5783 uint32_t flags = WAKEMON_DISABLE;
5784 task_wakeups_monitor_ctl(task, &flags, NULL);
5785
5786 fatal = task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON;
5787 trace_resource_violation(RMON_CPUWAKES_VIOLATED, &lei);
5788 os_log(OS_LOG_DEFAULT, "process %s[%d] caught waking the CPU %llu times "
5789 "over ~%llu seconds, averaging %llu wakes / second and "
5790 "violating a %slimit of %llu wakes over %llu seconds.\n",
5791 procname, pid,
5792 lei.lei_balance, lei.lei_last_refill / NSEC_PER_SEC,
5793 lei.lei_last_refill == 0 ? 0 :
5794 (NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill),
5795 fatal ? "FATAL " : "",
5796 lei.lei_limit, lei.lei_refill_period / NSEC_PER_SEC);
5797
5798 kr = send_resource_violation(send_cpu_wakes_violation, task, &lei,
5799 fatal ? kRNFatalLimitFlag : 0);
5800 if (kr) {
5801 printf("send_resource_violation(CPU wakes, ...): error %#x\n", kr);
5802 }
5803
5804#ifdef EXC_RESOURCE_MONITORS
5805 if (disable_exc_resource) {
5806 printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5807 "supressed by a boot-arg\n", procname, pid);
5808 return;
5809 }
5810 if (audio_active) {
5811 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5812 "supressed due to audio playback\n", procname, pid);
5813 return;
5814 }
5815 if (lei.lei_last_refill == 0) {
5816 os_log(OS_LOG_DEFAULT, "process %s[%d] caught causing excessive wakeups. EXC_RESOURCE "
5817 "supressed due to lei.lei_last_refill = 0 \n", procname, pid);
5818 }
5819
5820 code[0] = code[1] = 0;
5821 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS);
5822 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR);
5823 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0],
5824 NSEC_PER_SEC * lei.lei_limit / lei.lei_refill_period);
5825 EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0],
5826 lei.lei_last_refill);
5827 EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1],
5828 NSEC_PER_SEC * lei.lei_balance / lei.lei_last_refill);
5829 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5830#endif /* EXC_RESOURCE_MONITORS */
5831
5832 if (fatal) {
5833 task_terminate_internal(task);
5834 }
5835}
5836
5837static boolean_t
5838global_update_logical_writes(int64_t io_delta)
5839{
5840 int64_t old_count, new_count;
5841 boolean_t needs_telemetry;
5842
5843 do {
5844 new_count = old_count = global_logical_writes_count;
5845 new_count += io_delta;
5846 if (new_count >= io_telemetry_limit) {
5847 new_count = 0;
5848 needs_telemetry = TRUE;
5849 } else {
5850 needs_telemetry = FALSE;
5851 }
5852 } while(!OSCompareAndSwap64(old_count, new_count, &global_logical_writes_count));
5853 return needs_telemetry;
5854}
5855
5856void task_update_logical_writes(task_t task, uint32_t io_size, int flags, void *vp)
5857{
5858 int64_t io_delta = 0;
5859 boolean_t needs_telemetry = FALSE;
5860
5861 if ((!task) || (!io_size) || (!vp))
5862 return;
5863
5864 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_DATA_WRITE)) | DBG_FUNC_NONE,
5865 task_pid(task), io_size, flags, (uintptr_t)VM_KERNEL_ADDRPERM(vp), 0);
5866 DTRACE_IO4(logical_writes, struct task *, task, uint32_t, io_size, int, flags, vnode *, vp);
5867 switch(flags) {
5868 case TASK_WRITE_IMMEDIATE:
5869 OSAddAtomic64(io_size, (SInt64 *)&(task->task_immediate_writes));
5870 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5871 break;
5872 case TASK_WRITE_DEFERRED:
5873 OSAddAtomic64(io_size, (SInt64 *)&(task->task_deferred_writes));
5874 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5875 break;
5876 case TASK_WRITE_INVALIDATED:
5877 OSAddAtomic64(io_size, (SInt64 *)&(task->task_invalidated_writes));
5878 ledger_debit(task->ledger, task_ledgers.logical_writes, io_size);
5879 break;
5880 case TASK_WRITE_METADATA:
5881 OSAddAtomic64(io_size, (SInt64 *)&(task->task_metadata_writes));
5882 ledger_credit(task->ledger, task_ledgers.logical_writes, io_size);
5883 break;
5884 }
5885
5886 io_delta = (flags == TASK_WRITE_INVALIDATED) ? ((int64_t)io_size * -1ll) : ((int64_t)io_size);
5887 if (io_telemetry_limit != 0) {
5888 /* If io_telemetry_limit is 0, disable global updates and I/O telemetry */
5889 needs_telemetry = global_update_logical_writes(io_delta);
5890 if (needs_telemetry) {
5891 act_set_io_telemetry_ast(current_thread());
5892 }
5893 }
5894}
5895
5896/*
5897 * Control the I/O monitor for a task.
5898 */
5899kern_return_t
5900task_io_monitor_ctl(task_t task, uint32_t *flags)
5901{
5902 ledger_t ledger = task->ledger;
5903
5904 task_lock(task);
5905 if (*flags & IOMON_ENABLE) {
5906 /* Configure the physical I/O ledger */
5907 ledger_set_limit(ledger, task_ledgers.physical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5908 ledger_set_period(ledger, task_ledgers.physical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5909
5910 /* Configure the logical I/O ledger */
5911 ledger_set_limit(ledger, task_ledgers.logical_writes, (task_iomon_limit_mb * 1024 * 1024), 0);
5912 ledger_set_period(ledger, task_ledgers.logical_writes, (task_iomon_interval_secs * NSEC_PER_SEC));
5913
5914 } else if (*flags & IOMON_DISABLE) {
5915 /*
5916 * Caller wishes to disable I/O monitor on the task.
5917 */
5918 ledger_disable_refill(ledger, task_ledgers.physical_writes);
5919 ledger_disable_callback(ledger, task_ledgers.physical_writes);
5920 ledger_disable_refill(ledger, task_ledgers.logical_writes);
5921 ledger_disable_callback(ledger, task_ledgers.logical_writes);
5922 }
5923
5924 task_unlock(task);
5925 return KERN_SUCCESS;
5926}
5927
5928void
5929task_io_rate_exceeded(int warning, const void *param0, __unused const void *param1)
5930{
5931 if (warning == 0) {
5932 SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO((int)param0);
5933 }
5934}
5935
5936void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_PROCESS_IS_CAUSING_TOO_MUCH_IO(int flavor)
5937{
5938 int pid = 0;
5939 task_t task = current_task();
5940#ifdef EXC_RESOURCE_MONITORS
5941 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
5942#endif /* EXC_RESOURCE_MONITORS */
5943 struct ledger_entry_info lei;
5944 kern_return_t kr;
5945
5946#ifdef MACH_BSD
5947 pid = proc_selfpid();
5948#endif
5949 /*
5950 * Get the ledger entry info. We need to do this before disabling the exception
5951 * to get correct values for all fields.
5952 */
5953 switch(flavor) {
5954 case FLAVOR_IO_PHYSICAL_WRITES:
5955 ledger_get_entry_info(task->ledger, task_ledgers.physical_writes, &lei);
5956 break;
5957 case FLAVOR_IO_LOGICAL_WRITES:
5958 ledger_get_entry_info(task->ledger, task_ledgers.logical_writes, &lei);
5959 break;
5960 }
5961
5962
5963 /*
5964 * Disable the exception notification so we don't overwhelm
5965 * the listener with an endless stream of redundant exceptions.
5966 * TODO: detect whether another thread is already reporting the violation.
5967 */
5968 uint32_t flags = IOMON_DISABLE;
5969 task_io_monitor_ctl(task, &flags);
5970
5971 if (flavor == FLAVOR_IO_LOGICAL_WRITES) {
5972 trace_resource_violation(RMON_LOGWRITES_VIOLATED, &lei);
5973 }
5974 os_log(OS_LOG_DEFAULT, "process [%d] caught causing excessive I/O (flavor: %d). Task I/O: %lld MB. [Limit : %lld MB per %lld secs]\n",
5975 pid, flavor, (lei.lei_balance / (1024 * 1024)), (lei.lei_limit / (1024 * 1024)), (lei.lei_refill_period / NSEC_PER_SEC));
5976
5977 kr = send_resource_violation(send_disk_writes_violation, task, &lei, kRNFlagsNone);
5978 if (kr) {
5979 printf("send_resource_violation(disk_writes, ...): error %#x\n", kr);
5980 }
5981
5982#ifdef EXC_RESOURCE_MONITORS
5983 code[0] = code[1] = 0;
5984 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_IO);
5985 EXC_RESOURCE_ENCODE_FLAVOR(code[0], flavor);
5986 EXC_RESOURCE_IO_ENCODE_INTERVAL(code[0], (lei.lei_refill_period / NSEC_PER_SEC));
5987 EXC_RESOURCE_IO_ENCODE_LIMIT(code[0], (lei.lei_limit / (1024 * 1024)));
5988 EXC_RESOURCE_IO_ENCODE_OBSERVED(code[1], (lei.lei_balance / (1024 * 1024)));
5989 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
5990#endif /* EXC_RESOURCE_MONITORS */
5991}
5992
5993/* Placeholders for the task set/get voucher interfaces */
5994kern_return_t
5995task_get_mach_voucher(
5996 task_t task,
5997 mach_voucher_selector_t __unused which,
5998 ipc_voucher_t *voucher)
5999{
6000 if (TASK_NULL == task)
6001 return KERN_INVALID_TASK;
6002
6003 *voucher = NULL;
6004 return KERN_SUCCESS;
6005}
6006
6007kern_return_t
6008task_set_mach_voucher(
6009 task_t task,
6010 ipc_voucher_t __unused voucher)
6011{
6012 if (TASK_NULL == task)
6013 return KERN_INVALID_TASK;
6014
6015 return KERN_SUCCESS;
6016}
6017
6018kern_return_t
6019task_swap_mach_voucher(
6020 task_t task,
6021 ipc_voucher_t new_voucher,
6022 ipc_voucher_t *in_out_old_voucher)
6023{
6024 if (TASK_NULL == task)
6025 return KERN_INVALID_TASK;
6026
6027 *in_out_old_voucher = new_voucher;
6028 return KERN_SUCCESS;
6029}
6030
6031void task_set_gpu_denied(task_t task, boolean_t denied)
6032{
6033 task_lock(task);
6034
6035 if (denied) {
6036 task->t_flags |= TF_GPU_DENIED;
6037 } else {
6038 task->t_flags &= ~TF_GPU_DENIED;
6039 }
6040
6041 task_unlock(task);
6042}
6043
6044boolean_t task_is_gpu_denied(task_t task)
6045{
6046 /* We don't need the lock to read this flag */
6047 return (task->t_flags & TF_GPU_DENIED) ? TRUE : FALSE;
6048}
6049
6050
6051uint64_t get_task_memory_region_count(task_t task)
6052{
6053 vm_map_t map;
6054 map = (task == kernel_task) ? kernel_map: task->map;
6055 return((uint64_t)get_map_nentries(map));
6056}
6057
6058static void
6059kdebug_trace_dyld_internal(uint32_t base_code,
6060 struct dyld_kernel_image_info *info)
6061{
6062 static_assert(sizeof(info->uuid) >= 16);
6063
6064#if defined(__LP64__)
6065 uint64_t *uuid = (uint64_t *)&(info->uuid);
6066
6067 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6068 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code), uuid[0],
6069 uuid[1], info->load_addr,
6070 (uint64_t)info->fsid.val[0] | ((uint64_t)info->fsid.val[1] << 32),
6071 0);
6072 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6073 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 1),
6074 (uint64_t)info->fsobjid.fid_objno |
6075 ((uint64_t)info->fsobjid.fid_generation << 32),
6076 0, 0, 0, 0);
6077#else /* defined(__LP64__) */
6078 uint32_t *uuid = (uint32_t *)&(info->uuid);
6079
6080 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6081 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 2), uuid[0],
6082 uuid[1], uuid[2], uuid[3], 0);
6083 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6084 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 3),
6085 (uint32_t)info->load_addr, info->fsid.val[0], info->fsid.val[1],
6086 info->fsobjid.fid_objno, 0);
6087 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
6088 KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, base_code + 4),
6089 info->fsobjid.fid_generation, 0, 0, 0, 0);
6090#endif /* !defined(__LP64__) */
6091}
6092
6093static kern_return_t
6094kdebug_trace_dyld(task_t task, uint32_t base_code,
6095 vm_map_copy_t infos_copy, mach_msg_type_number_t infos_len)
6096{
6097 kern_return_t kr;
6098 dyld_kernel_image_info_array_t infos;
6099 vm_map_offset_t map_data;
6100 vm_offset_t data;
6101
6102 if (!infos_copy) {
6103 return KERN_INVALID_ADDRESS;
6104 }
6105
6106 if (!kdebug_enable ||
6107 !kdebug_debugid_enabled(KDBG_EVENTID(DBG_DYLD, DBG_DYLD_UUID, 0)))
6108 {
6109 vm_map_copy_discard(infos_copy);
6110 return KERN_SUCCESS;
6111 }
6112
6113 if (task == NULL || task != current_task()) {
6114 return KERN_INVALID_TASK;
6115 }
6116
6117 kr = vm_map_copyout(ipc_kernel_map, &map_data, (vm_map_copy_t)infos_copy);
6118 if (kr != KERN_SUCCESS) {
6119 return kr;
6120 }
6121
6122 infos = CAST_DOWN(dyld_kernel_image_info_array_t, map_data);
6123
6124 for (mach_msg_type_number_t i = 0; i < infos_len; i++) {
6125 kdebug_trace_dyld_internal(base_code, &(infos[i]));
6126 }
6127
6128 data = CAST_DOWN(vm_offset_t, map_data);
6129 mach_vm_deallocate(ipc_kernel_map, data, infos_len * sizeof(infos[0]));
6130 return KERN_SUCCESS;
6131}
6132
6133kern_return_t
6134task_register_dyld_image_infos(task_t task,
6135 dyld_kernel_image_info_array_t infos_copy,
6136 mach_msg_type_number_t infos_len)
6137{
6138 return kdebug_trace_dyld(task, DBG_DYLD_UUID_MAP_A,
6139 (vm_map_copy_t)infos_copy, infos_len);
6140}
6141
6142kern_return_t
6143task_unregister_dyld_image_infos(task_t task,
6144 dyld_kernel_image_info_array_t infos_copy,
6145 mach_msg_type_number_t infos_len)
6146{
6147 return kdebug_trace_dyld(task, DBG_DYLD_UUID_UNMAP_A,
6148 (vm_map_copy_t)infos_copy, infos_len);
6149}
6150
6151kern_return_t
6152task_get_dyld_image_infos(__unused task_t task,
6153 __unused dyld_kernel_image_info_array_t * dyld_images,
6154 __unused mach_msg_type_number_t * dyld_imagesCnt)
6155{
6156 return KERN_NOT_SUPPORTED;
6157}
6158
6159kern_return_t
6160task_register_dyld_shared_cache_image_info(task_t task,
6161 dyld_kernel_image_info_t cache_img,
6162 __unused boolean_t no_cache,
6163 __unused boolean_t private_cache)
6164{
6165 if (task == NULL || task != current_task()) {
6166 return KERN_INVALID_TASK;
6167 }
6168
6169 kdebug_trace_dyld_internal(DBG_DYLD_UUID_SHARED_CACHE_A, &cache_img);
6170 return KERN_SUCCESS;
6171}
6172
6173kern_return_t
6174task_register_dyld_set_dyld_state(__unused task_t task,
6175 __unused uint8_t dyld_state)
6176{
6177 return KERN_NOT_SUPPORTED;
6178}
6179
6180kern_return_t
6181task_register_dyld_get_process_state(__unused task_t task,
6182 __unused dyld_kernel_process_info_t * dyld_process_state)
6183{
6184 return KERN_NOT_SUPPORTED;
6185}
6186
6187kern_return_t
6188task_inspect(task_inspect_t task_insp, task_inspect_flavor_t flavor,
6189 task_inspect_info_t info_out, mach_msg_type_number_t *size_in_out)
6190{
6191#if MONOTONIC
6192 task_t task = (task_t)task_insp;
6193 kern_return_t kr = KERN_SUCCESS;
6194 mach_msg_type_number_t size;
6195
6196 if (task == TASK_NULL) {
6197 return KERN_INVALID_ARGUMENT;
6198 }
6199
6200 size = *size_in_out;
6201
6202 switch (flavor) {
6203 case TASK_INSPECT_BASIC_COUNTS: {
6204 struct task_inspect_basic_counts *bc;
6205 uint64_t task_counts[MT_CORE_NFIXED];
6206
6207 if (size < TASK_INSPECT_BASIC_COUNTS_COUNT) {
6208 kr = KERN_INVALID_ARGUMENT;
6209 break;
6210 }
6211
6212 mt_fixed_task_counts(task, task_counts);
6213 bc = (struct task_inspect_basic_counts *)info_out;
6214#ifdef MT_CORE_INSTRS
6215 bc->instructions = task_counts[MT_CORE_INSTRS];
6216#else /* defined(MT_CORE_INSTRS) */
6217 bc->instructions = 0;
6218#endif /* !defined(MT_CORE_INSTRS) */
6219 bc->cycles = task_counts[MT_CORE_CYCLES];
6220 size = TASK_INSPECT_BASIC_COUNTS_COUNT;
6221 break;
6222 }
6223 default:
6224 kr = KERN_INVALID_ARGUMENT;
6225 break;
6226 }
6227
6228 if (kr == KERN_SUCCESS) {
6229 *size_in_out = size;
6230 }
6231 return kr;
6232#else /* MONOTONIC */
6233#pragma unused(task_insp, flavor, info_out, size_in_out)
6234 return KERN_NOT_SUPPORTED;
6235#endif /* !MONOTONIC */
6236}
6237
6238#if CONFIG_SECLUDED_MEMORY
6239int num_tasks_can_use_secluded_mem = 0;
6240
6241void
6242task_set_can_use_secluded_mem(
6243 task_t task,
6244 boolean_t can_use_secluded_mem)
6245{
6246 if (!task->task_could_use_secluded_mem) {
6247 return;
6248 }
6249 task_lock(task);
6250 task_set_can_use_secluded_mem_locked(task, can_use_secluded_mem);
6251 task_unlock(task);
6252}
6253
6254void
6255task_set_can_use_secluded_mem_locked(
6256 task_t task,
6257 boolean_t can_use_secluded_mem)
6258{
6259 assert(task->task_could_use_secluded_mem);
6260 if (can_use_secluded_mem &&
6261 secluded_for_apps && /* global boot-arg */
6262 !task->task_can_use_secluded_mem) {
6263 assert(num_tasks_can_use_secluded_mem >= 0);
6264 OSAddAtomic(+1,
6265 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6266 task->task_can_use_secluded_mem = TRUE;
6267 } else if (!can_use_secluded_mem &&
6268 task->task_can_use_secluded_mem) {
6269 assert(num_tasks_can_use_secluded_mem > 0);
6270 OSAddAtomic(-1,
6271 (volatile SInt32 *)&num_tasks_can_use_secluded_mem);
6272 task->task_can_use_secluded_mem = FALSE;
6273 }
6274}
6275
6276void
6277task_set_could_use_secluded_mem(
6278 task_t task,
6279 boolean_t could_use_secluded_mem)
6280{
6281 task->task_could_use_secluded_mem = could_use_secluded_mem;
6282}
6283
6284void
6285task_set_could_also_use_secluded_mem(
6286 task_t task,
6287 boolean_t could_also_use_secluded_mem)
6288{
6289 task->task_could_also_use_secluded_mem = could_also_use_secluded_mem;
6290}
6291
6292boolean_t
6293task_can_use_secluded_mem(
6294 task_t task,
6295 boolean_t is_alloc)
6296{
6297 if (task->task_can_use_secluded_mem) {
6298 assert(task->task_could_use_secluded_mem);
6299 assert(num_tasks_can_use_secluded_mem > 0);
6300 return TRUE;
6301 }
6302 if (task->task_could_also_use_secluded_mem &&
6303 num_tasks_can_use_secluded_mem > 0) {
6304 assert(num_tasks_can_use_secluded_mem > 0);
6305 return TRUE;
6306 }
6307
6308 /*
6309 * If a single task is using more than some amount of
6310 * memory, allow it to dip into secluded and also begin
6311 * suppression of secluded memory until the tasks exits.
6312 */
6313 if (is_alloc && secluded_shutoff_trigger != 0) {
6314 uint64_t phys_used = get_task_phys_footprint(task);
6315 if (phys_used > secluded_shutoff_trigger) {
6316 start_secluded_suppression(task);
6317 return TRUE;
6318 }
6319 }
6320
6321 return FALSE;
6322}
6323
6324boolean_t
6325task_could_use_secluded_mem(
6326 task_t task)
6327{
6328 return task->task_could_use_secluded_mem;
6329}
6330#endif /* CONFIG_SECLUDED_MEMORY */
6331
6332queue_head_t *
6333task_io_user_clients(task_t task)
6334{
6335 return (&task->io_user_clients);
6336}
6337
6338void
6339task_copy_fields_for_exec(task_t dst_task, task_t src_task)
6340{
6341 dst_task->vtimers = src_task->vtimers;
6342}
6343
6344#if DEVELOPMENT || DEBUG
6345int vm_region_footprint = 0;
6346#endif /* DEVELOPMENT || DEBUG */
6347
6348boolean_t
6349task_self_region_footprint(void)
6350{
6351#if DEVELOPMENT || DEBUG
6352 if (vm_region_footprint) {
6353 /* system-wide override */
6354 return TRUE;
6355 }
6356#endif /* DEVELOPMENT || DEBUG */
6357 return current_task()->task_region_footprint;
6358}
6359
6360void
6361task_self_region_footprint_set(
6362 boolean_t newval)
6363{
6364 task_t curtask;
6365
6366 curtask = current_task();
6367 task_lock(curtask);
6368 if (newval) {
6369 curtask->task_region_footprint = TRUE;
6370 } else {
6371 curtask->task_region_footprint = FALSE;
6372 }
6373 task_unlock(curtask);
6374}
6375
6376void
6377task_set_darkwake_mode(task_t task, boolean_t set_mode)
6378{
6379 assert(task);
6380
6381 task_lock(task);
6382
6383 if (set_mode) {
6384 task->t_flags |= TF_DARKWAKE_MODE;
6385 } else {
6386 task->t_flags &= ~(TF_DARKWAKE_MODE);
6387 }
6388
6389 task_unlock(task);
6390}
6391
6392boolean_t
6393task_get_darkwake_mode(task_t task)
6394{
6395 assert(task);
6396 return ((task->t_flags & TF_DARKWAKE_MODE) != 0);
6397}
6398
6399#if __arm64__
6400void
6401task_set_legacy_footprint(
6402 task_t task,
6403 boolean_t new_val)
6404{
6405 task_lock(task);
6406 task->task_legacy_footprint = new_val;
6407 task_unlock(task);
6408}
6409#endif /* __arm64__ */
6410