1/*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Mach Operating System
30 * Copyright (c) 1987 Carnegie-Mellon University
31 * All rights reserved. The CMU software License Agreement specifies
32 * the terms and conditions for use and redistribution.
33 */
34/*
35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
36 * support for mandatory and extensible security protections. This notice
37 * is included in support of clause 2.2 (b) of the Apple Public License,
38 * Version 2.0.
39 */
40#include <vm/vm_options.h>
41
42#include <kern/ecc.h>
43#include <kern/task.h>
44#include <kern/thread.h>
45#include <kern/debug.h>
46#include <kern/extmod_statistics.h>
47#include <mach/mach_traps.h>
48#include <mach/port.h>
49#include <mach/sdt.h>
50#include <mach/task.h>
51#include <mach/task_access.h>
52#include <mach/task_special_ports.h>
53#include <mach/time_value.h>
54#include <mach/vm_map.h>
55#include <mach/vm_param.h>
56#include <mach/vm_prot.h>
57#include <machine/machine_routines.h>
58
59#include <sys/file_internal.h>
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/dir.h>
63#include <sys/namei.h>
64#include <sys/proc_internal.h>
65#include <sys/kauth.h>
66#include <sys/vm.h>
67#include <sys/file.h>
68#include <sys/vnode_internal.h>
69#include <sys/mount.h>
70#include <sys/xattr.h>
71#include <sys/trace.h>
72#include <sys/kernel.h>
73#include <sys/ubc_internal.h>
74#include <sys/user.h>
75#include <sys/syslog.h>
76#include <sys/stat.h>
77#include <sys/sysproto.h>
78#include <sys/mman.h>
79#include <sys/sysctl.h>
80#include <sys/cprotect.h>
81#include <sys/kpi_socket.h>
82#include <sys/kas_info.h>
83#include <sys/socket.h>
84#include <sys/socketvar.h>
85#include <sys/random.h>
86#include <sys/code_signing.h>
87#if NECP
88#include <net/necp.h>
89#endif /* NECP */
90#if SKYWALK
91#include <skywalk/os_channel.h>
92#endif /* SKYWALK */
93
94#include <security/audit/audit.h>
95#include <security/mac.h>
96#include <bsm/audit_kevents.h>
97
98#include <kern/kalloc.h>
99#include <vm/vm_map.h>
100#include <vm/vm_kern.h>
101#include <vm/vm_pageout.h>
102
103#include <mach/shared_region.h>
104#include <vm/vm_shared_region.h>
105
106#include <vm/vm_dyld_pager.h>
107
108#include <vm/vm_protos.h>
109
110#include <sys/kern_memorystatus.h>
111#include <sys/kern_memorystatus_freeze.h>
112#include <sys/proc_internal.h>
113
114#include <mach-o/fixup-chains.h>
115
116#if CONFIG_MACF
117#include <security/mac_framework.h>
118#endif
119
120#include <kern/bits.h>
121
122#if CONFIG_CSR
123#include <sys/csr.h>
124#endif /* CONFIG_CSR */
125#include <sys/trust_caches.h>
126#include <libkern/amfi/amfi.h>
127#include <IOKit/IOBSD.h>
128
129#if VM_MAP_DEBUG_APPLE_PROTECT
130SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "");
131#endif /* VM_MAP_DEBUG_APPLE_PROTECT */
132
133#if VM_MAP_DEBUG_FOURK
134SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "");
135#endif /* VM_MAP_DEBUG_FOURK */
136
137#if DEVELOPMENT || DEBUG
138
139static int
140sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS
141{
142#pragma unused(arg1, arg2)
143 vm_offset_t kaddr;
144 kern_return_t kr;
145 int error = 0;
146 int size = 0;
147
148 error = sysctl_handle_int(oidp, &size, 0, req);
149 if (error || !req->newptr) {
150 return error;
151 }
152
153 kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size,
154 0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT);
155
156 if (kr == KERN_SUCCESS) {
157 kmem_free(kernel_map, kaddr, size);
158 }
159
160 return error;
161}
162
163SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
164 0, 0, &sysctl_kmem_alloc_contig, "I", "");
165
166extern int vm_region_footprint;
167SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "");
168
169static int
170sysctl_kmem_gobj_stats SYSCTL_HANDLER_ARGS
171{
172#pragma unused(arg1, arg2, oidp)
173 kmem_gobj_stats stats = kmem_get_gobj_stats();
174
175 return SYSCTL_OUT(req, &stats, sizeof(stats));
176}
177
178SYSCTL_PROC(_vm, OID_AUTO, sysctl_kmem_gobj_stats,
179 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
180 0, 0, &sysctl_kmem_gobj_stats, "S,kmem_gobj_stats", "");
181
182#endif /* DEVELOPMENT || DEBUG */
183
184static int
185sysctl_vm_self_region_footprint SYSCTL_HANDLER_ARGS
186{
187#pragma unused(arg1, arg2, oidp)
188 int error = 0;
189 int value;
190
191 value = task_self_region_footprint();
192 error = SYSCTL_OUT(req, &value, sizeof(int));
193 if (error) {
194 return error;
195 }
196
197 if (!req->newptr) {
198 return 0;
199 }
200
201 error = SYSCTL_IN(req, &value, sizeof(int));
202 if (error) {
203 return error;
204 }
205 task_self_region_footprint_set(newval: value);
206 return 0;
207}
208SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I", "");
209
210static int
211sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS
212{
213#pragma unused(arg1, arg2, oidp)
214 int error = 0;
215 int value;
216
217 value = (1 << thread_self_region_page_shift());
218 error = SYSCTL_OUT(req, &value, sizeof(int));
219 if (error) {
220 return error;
221 }
222
223 if (!req->newptr) {
224 return 0;
225 }
226
227 error = SYSCTL_IN(req, &value, sizeof(int));
228 if (error) {
229 return error;
230 }
231
232 if (value != 0 && value != 4096 && value != 16384) {
233 return EINVAL;
234 }
235
236#if !__ARM_MIXED_PAGE_SIZE__
237 if (value != vm_map_page_size(current_map())) {
238 return EINVAL;
239 }
240#endif /* !__ARM_MIXED_PAGE_SIZE__ */
241
242 thread_self_region_page_shift_set(pgshift: bit_first(bitmap: value));
243 return 0;
244}
245SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I", "");
246
247
248#if DEVELOPMENT || DEBUG
249extern int panic_on_unsigned_execute;
250SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "");
251
252extern int vm_log_xnu_user_debug;
253SYSCTL_INT(_vm, OID_AUTO, log_xnu_user_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_xnu_user_debug, 0, "");
254#endif /* DEVELOPMENT || DEBUG */
255
256extern int cs_executable_create_upl;
257extern int cs_executable_wire;
258SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "");
259SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "");
260
261extern int apple_protect_pager_count;
262extern int apple_protect_pager_count_mapped;
263extern unsigned int apple_protect_pager_cache_limit;
264SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "");
265SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "");
266SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "");
267
268#if DEVELOPMENT || DEBUG
269extern int radar_20146450;
270SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "");
271
272extern int macho_printf;
273SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "");
274
275extern int apple_protect_pager_data_request_debug;
276SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "");
277
278#if __arm64__
279/* These are meant to support the page table accounting unit test. */
280extern unsigned int arm_hardware_page_size;
281extern unsigned int arm_pt_desc_size;
282extern unsigned int arm_pt_root_size;
283extern unsigned int inuse_user_tteroot_count;
284extern unsigned int inuse_kernel_tteroot_count;
285extern unsigned int inuse_user_ttepages_count;
286extern unsigned int inuse_kernel_ttepages_count;
287extern unsigned int inuse_user_ptepages_count;
288extern unsigned int inuse_kernel_ptepages_count;
289SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "");
290SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "");
291SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "");
292SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "");
293SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "");
294SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "");
295SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "");
296SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "");
297SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "");
298#if !CONFIG_SPTM
299extern unsigned int free_page_size_tt_count;
300extern unsigned int free_two_page_size_tt_count;
301extern unsigned int free_tt_count;
302SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "");
303SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "");
304SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "");
305#endif
306#if DEVELOPMENT || DEBUG
307extern unsigned long pmap_asid_flushes;
308SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "");
309extern unsigned long pmap_asid_hits;
310SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "");
311extern unsigned long pmap_asid_misses;
312SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "");
313#endif
314#endif /* __arm64__ */
315
316#if __arm64__
317extern int fourk_pager_data_request_debug;
318SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "");
319#endif /* __arm64__ */
320#endif /* DEVELOPMENT || DEBUG */
321
322SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "");
323SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "");
324SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "");
325SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "");
326SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "");
327SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "");
328SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "");
329SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "");
330SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "");
331#if VM_SCAN_FOR_SHADOW_CHAIN
332static int vm_shadow_max_enabled = 0; /* Disabled by default */
333extern int proc_shadow_max(void);
334static int
335vm_shadow_max SYSCTL_HANDLER_ARGS
336{
337#pragma unused(arg1, arg2, oidp)
338 int value = 0;
339
340 if (vm_shadow_max_enabled) {
341 value = proc_shadow_max();
342 }
343
344 return SYSCTL_OUT(req, &value, sizeof(value));
345}
346SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
347 0, 0, &vm_shadow_max, "I", "");
348
349SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "");
350
351#endif /* VM_SCAN_FOR_SHADOW_CHAIN */
352
353SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
354
355__attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
356 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor);
357/*
358 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c
359 */
360
361#if DEVELOPMENT || DEBUG
362extern int allow_stack_exec, allow_data_exec;
363
364SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
365SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
366
367#endif /* DEVELOPMENT || DEBUG */
368
369static const char *prot_values[] = {
370 "none",
371 "read-only",
372 "write-only",
373 "read-write",
374 "execute-only",
375 "read-execute",
376 "write-execute",
377 "read-write-execute"
378};
379
380void
381log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
382{
383 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n",
384 current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]);
385}
386
387/*
388 * shared_region_unnest_logging: level of logging of unnesting events
389 * 0 - no logging
390 * 1 - throttled logging of unexpected unnesting events (default)
391 * 2 - unthrottled logging of unexpected unnesting events
392 * 3+ - unthrottled logging of all unnesting events
393 */
394int shared_region_unnest_logging = 1;
395
396SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
397 &shared_region_unnest_logging, 0, "");
398
399int vm_shared_region_unnest_log_interval = 10;
400int shared_region_unnest_log_count_threshold = 5;
401
402
403#if XNU_TARGET_OS_OSX
404
405#if defined (__x86_64__)
406static int scdir_enforce = 1;
407#else /* defined (__x86_64__) */
408static int scdir_enforce = 0; /* AOT caches live elsewhere */
409#endif /* defined (__x86_64__) */
410
411static char *scdir_path[] = {
412 "/System/Library/dyld/",
413 "/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld",
414 "/System/Cryptexes/OS/System/Library/dyld",
415 NULL
416};
417
418#else /* XNU_TARGET_OS_OSX */
419
420static int scdir_enforce = 0;
421static char *scdir_path[] = {
422 "/System/Library/Caches/com.apple.dyld/",
423 "/private/preboot/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
424 "/System/Cryptexes/OS/System/Library/Caches/com.apple.dyld",
425 NULL
426};
427
428#endif /* XNU_TARGET_OS_OSX */
429
430static char *driverkit_scdir_path[] = {
431 "/System/DriverKit/System/Library/dyld/",
432#if XNU_TARGET_OS_OSX
433 "/System/Volumes/Preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
434#else
435 "/private/preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld",
436#endif /* XNU_TARGET_OS_OSX */
437 "/System/Cryptexes/OS/System/DriverKit/System/Library/dyld",
438 NULL
439};
440
441#ifndef SECURE_KERNEL
442static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS
443{
444#if CONFIG_CSR
445 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
446 printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n");
447 return EPERM;
448 }
449#endif /* CONFIG_CSR */
450 return sysctl_handle_int(oidp, arg1, arg2, req);
451}
452
453SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I", "");
454#endif
455
456/* These log rate throttling state variables aren't thread safe, but
457 * are sufficient unto the task.
458 */
459static int64_t last_unnest_log_time = 0;
460static int shared_region_unnest_log_count = 0;
461
462void
463log_unnest_badness(
464 vm_map_t m,
465 vm_map_offset_t s,
466 vm_map_offset_t e,
467 boolean_t is_nested_map,
468 vm_map_offset_t lowest_unnestable_addr)
469{
470 struct timeval tv;
471
472 if (shared_region_unnest_logging == 0) {
473 return;
474 }
475
476 if (shared_region_unnest_logging <= 2 &&
477 is_nested_map &&
478 s >= lowest_unnestable_addr) {
479 /*
480 * Unnesting of writable map entries is fine.
481 */
482 return;
483 }
484
485 if (shared_region_unnest_logging <= 1) {
486 microtime(tv: &tv);
487 if ((tv.tv_sec - last_unnest_log_time) <
488 vm_shared_region_unnest_log_interval) {
489 if (shared_region_unnest_log_count++ >
490 shared_region_unnest_log_count_threshold) {
491 return;
492 }
493 } else {
494 last_unnest_log_time = tv.tv_sec;
495 shared_region_unnest_log_count = 0;
496 }
497 }
498
499 DTRACE_VM4(log_unnest_badness,
500 vm_map_t, m,
501 vm_map_offset_t, s,
502 vm_map_offset_t, e,
503 vm_map_offset_t, lowest_unnestable_addr);
504 printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m));
505}
506
507uint64_t
508vm_purge_filebacked_pagers(void)
509{
510 uint64_t pages_purged;
511
512 pages_purged = 0;
513 pages_purged += apple_protect_pager_purge_all();
514 pages_purged += shared_region_pager_purge_all();
515 pages_purged += dyld_pager_purge_all();
516#if DEVELOPMENT || DEBUG
517 printf("%s:%d pages purged: %llu\n", __FUNCTION__, __LINE__, pages_purged);
518#endif /* DEVELOPMENT || DEBUG */
519 return pages_purged;
520}
521
522int
523useracc(
524 user_addr_t addr,
525 user_size_t len,
526 int prot)
527{
528 vm_map_t map;
529
530 map = current_map();
531 return vm_map_check_protection(
532 map,
533 vm_map_trunc_page(addr,
534 vm_map_page_mask(map)),
535 vm_map_round_page(addr + len,
536 vm_map_page_mask(map)),
537 protection: prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE);
538}
539
540int
541vslock(
542 user_addr_t addr,
543 user_size_t len)
544{
545 kern_return_t kret;
546 vm_map_t map;
547
548 map = current_map();
549 kret = vm_map_wire_kernel(map,
550 vm_map_trunc_page(addr,
551 vm_map_page_mask(map)),
552 vm_map_round_page(addr + len,
553 vm_map_page_mask(map)),
554 VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD,
555 FALSE);
556
557 switch (kret) {
558 case KERN_SUCCESS:
559 return 0;
560 case KERN_INVALID_ADDRESS:
561 case KERN_NO_SPACE:
562 return ENOMEM;
563 case KERN_PROTECTION_FAILURE:
564 return EACCES;
565 default:
566 return EINVAL;
567 }
568}
569
570int
571vsunlock(
572 user_addr_t addr,
573 user_size_t len,
574 __unused int dirtied)
575{
576#if FIXME /* [ */
577 pmap_t pmap;
578 vm_page_t pg;
579 vm_map_offset_t vaddr;
580 ppnum_t paddr;
581#endif /* FIXME ] */
582 kern_return_t kret;
583 vm_map_t map;
584
585 map = current_map();
586
587#if FIXME /* [ */
588 if (dirtied) {
589 pmap = get_task_pmap(current_task());
590 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK);
591 vaddr < vm_map_round_page(addr + len, PAGE_MASK);
592 vaddr += PAGE_SIZE) {
593 paddr = pmap_find_phys(pmap, vaddr);
594 pg = PHYS_TO_VM_PAGE(paddr);
595 vm_page_set_modified(pg);
596 }
597 }
598#endif /* FIXME ] */
599#ifdef lint
600 dirtied++;
601#endif /* lint */
602 kret = vm_map_unwire(map,
603 vm_map_trunc_page(addr,
604 vm_map_page_mask(map)),
605 vm_map_round_page(addr + len,
606 vm_map_page_mask(map)),
607 FALSE);
608 switch (kret) {
609 case KERN_SUCCESS:
610 return 0;
611 case KERN_INVALID_ADDRESS:
612 case KERN_NO_SPACE:
613 return ENOMEM;
614 case KERN_PROTECTION_FAILURE:
615 return EACCES;
616 default:
617 return EINVAL;
618 }
619}
620
621int
622subyte(
623 user_addr_t addr,
624 int byte)
625{
626 char character;
627
628 character = (char)byte;
629 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
630}
631
632int
633suibyte(
634 user_addr_t addr,
635 int byte)
636{
637 char character;
638
639 character = (char)byte;
640 return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1;
641}
642
643int
644fubyte(user_addr_t addr)
645{
646 unsigned char byte;
647
648 if (copyin(addr, (void *) &byte, sizeof(char))) {
649 return -1;
650 }
651 return byte;
652}
653
654int
655fuibyte(user_addr_t addr)
656{
657 unsigned char byte;
658
659 if (copyin(addr, (void *) &(byte), sizeof(char))) {
660 return -1;
661 }
662 return byte;
663}
664
665int
666suword(
667 user_addr_t addr,
668 long word)
669{
670 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
671}
672
673long
674fuword(user_addr_t addr)
675{
676 long word = 0;
677
678 if (copyin(addr, (void *) &word, sizeof(int))) {
679 return -1;
680 }
681 return word;
682}
683
684/* suiword and fuiword are the same as suword and fuword, respectively */
685
686int
687suiword(
688 user_addr_t addr,
689 long word)
690{
691 return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1;
692}
693
694long
695fuiword(user_addr_t addr)
696{
697 long word = 0;
698
699 if (copyin(addr, (void *) &word, sizeof(int))) {
700 return -1;
701 }
702 return word;
703}
704
705/*
706 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
707 * fetching and setting of process-sized size_t and pointer values.
708 */
709int
710sulong(user_addr_t addr, int64_t word)
711{
712 if (IS_64BIT_PROCESS(current_proc())) {
713 return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1;
714 } else {
715 return suiword(addr, word: (long)word);
716 }
717}
718
719int64_t
720fulong(user_addr_t addr)
721{
722 int64_t longword;
723
724 if (IS_64BIT_PROCESS(current_proc())) {
725 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) {
726 return -1;
727 }
728 return longword;
729 } else {
730 return (int64_t)fuiword(addr);
731 }
732}
733
734int
735suulong(user_addr_t addr, uint64_t uword)
736{
737 if (IS_64BIT_PROCESS(current_proc())) {
738 return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1;
739 } else {
740 return suiword(addr, word: (uint32_t)uword);
741 }
742}
743
744uint64_t
745fuulong(user_addr_t addr)
746{
747 uint64_t ulongword;
748
749 if (IS_64BIT_PROCESS(current_proc())) {
750 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) {
751 return -1ULL;
752 }
753 return ulongword;
754 } else {
755 return (uint64_t)fuiword(addr);
756 }
757}
758
759int
760swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
761{
762 return ENOTSUP;
763}
764
765/*
766 * pid_for_task
767 *
768 * Find the BSD process ID for the Mach task associated with the given Mach port
769 * name
770 *
771 * Parameters: args User argument descriptor (see below)
772 *
773 * Indirect parameters: args->t Mach port name
774 * args->pid Process ID (returned value; see below)
775 *
776 * Returns: KERL_SUCCESS Success
777 * KERN_FAILURE Not success
778 *
779 * Implicit returns: args->pid Process ID
780 *
781 */
782kern_return_t
783pid_for_task(
784 struct pid_for_task_args *args)
785{
786 mach_port_name_t t = args->t;
787 user_addr_t pid_addr = args->pid;
788 proc_t p;
789 task_t t1;
790 int pid = -1;
791 kern_return_t err = KERN_SUCCESS;
792
793 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
794 AUDIT_ARG(mach_port1, t);
795
796 t1 = port_name_to_task_name(name: t);
797
798 if (t1 == TASK_NULL) {
799 err = KERN_FAILURE;
800 goto pftout;
801 } else {
802 p = get_bsdtask_info(t1);
803 if (p) {
804 pid = proc_pid(p);
805 err = KERN_SUCCESS;
806 } else if (task_is_a_corpse(task: t1)) {
807 pid = task_pid(task: t1);
808 err = KERN_SUCCESS;
809 } else {
810 err = KERN_FAILURE;
811 }
812 }
813 task_deallocate(t1);
814pftout:
815 AUDIT_ARG(pid, pid);
816 (void) copyout((char *) &pid, pid_addr, sizeof(int));
817 AUDIT_MACH_SYSCALL_EXIT(err);
818 return err;
819}
820
821/*
822 *
823 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
824 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
825 *
826 */
827static int tfp_policy = KERN_TFP_POLICY_DEFAULT;
828
829/*
830 * Routine: task_for_pid_posix_check
831 * Purpose:
832 * Verify that the current process should be allowed to
833 * get the target process's task port. This is only
834 * permitted if:
835 * - The current process is root
836 * OR all of the following are true:
837 * - The target process's real, effective, and saved uids
838 * are the same as the current proc's euid,
839 * - The target process's group set is a subset of the
840 * calling process's group set, and
841 * - The target process hasn't switched credentials.
842 *
843 * Returns: TRUE: permitted
844 * FALSE: denied
845 */
846static int
847task_for_pid_posix_check(proc_t target)
848{
849 kauth_cred_t targetcred, mycred;
850 bool checkcredentials;
851 uid_t myuid;
852 int allowed;
853
854 /* No task_for_pid on bad targets */
855 if (target->p_stat == SZOMB) {
856 return FALSE;
857 }
858
859 mycred = kauth_cred_get();
860 myuid = kauth_cred_getuid(cred: mycred);
861
862 /* If we're running as root, the check passes */
863 if (kauth_cred_issuser(cred: mycred)) {
864 return TRUE;
865 }
866
867 /* We're allowed to get our own task port */
868 if (target == current_proc()) {
869 return TRUE;
870 }
871
872 /*
873 * Under DENY, only root can get another proc's task port,
874 * so no more checks are needed.
875 */
876 if (tfp_policy == KERN_TFP_POLICY_DENY) {
877 return FALSE;
878 }
879
880 targetcred = kauth_cred_proc_ref(procp: target);
881 allowed = TRUE;
882
883 checkcredentials = !proc_is_third_party_debuggable_driver(p: target);
884
885 if (checkcredentials) {
886 /* Do target's ruid, euid, and saved uid match my euid? */
887 if ((kauth_cred_getuid(cred: targetcred) != myuid) ||
888 (kauth_cred_getruid(cred: targetcred) != myuid) ||
889 (kauth_cred_getsvuid(cred: targetcred) != myuid)) {
890 allowed = FALSE;
891 goto out;
892 }
893 /* Are target's groups a subset of my groups? */
894 if (kauth_cred_gid_subset(cred1: targetcred, cred2: mycred, resultp: &allowed) ||
895 allowed == 0) {
896 allowed = FALSE;
897 goto out;
898 }
899 }
900
901 /* Has target switched credentials? */
902 if (target->p_flag & P_SUGID) {
903 allowed = FALSE;
904 goto out;
905 }
906
907out:
908 kauth_cred_unref(&targetcred);
909 return allowed;
910}
911
912/*
913 * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__
914 *
915 * Description: Waits for the user space daemon to respond to the request
916 * we made. Function declared non inline to be visible in
917 * stackshots and spindumps as well as debugging.
918 */
919__attribute__((noinline)) int
920__KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(
921 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor)
922{
923 return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor);
924}
925
926/*
927 * Routine: task_for_pid
928 * Purpose:
929 * Get the task port for another "process", named by its
930 * process ID on the same host as "target_task".
931 *
932 * Only permitted to privileged processes, or processes
933 * with the same user ID.
934 *
935 * Note: if pid == 0, an error is return no matter who is calling.
936 *
937 * XXX This should be a BSD system call, not a Mach trap!!!
938 */
939kern_return_t
940task_for_pid(
941 struct task_for_pid_args *args)
942{
943 mach_port_name_t target_tport = args->target_tport;
944 int pid = args->pid;
945 user_addr_t task_addr = args->t;
946 proc_t p = PROC_NULL;
947 task_t t1 = TASK_NULL;
948 task_t task = TASK_NULL;
949 mach_port_name_t tret = MACH_PORT_NULL;
950 ipc_port_t tfpport = MACH_PORT_NULL;
951 void * sright = NULL;
952 int error = 0;
953 boolean_t is_current_proc = FALSE;
954 struct proc_ident pident = {0};
955
956 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
957 AUDIT_ARG(pid, pid);
958 AUDIT_ARG(mach_port1, target_tport);
959
960 /* Always check if pid == 0 */
961 if (pid == 0) {
962 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
963 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
964 return KERN_FAILURE;
965 }
966
967 t1 = port_name_to_task(target_tport);
968 if (t1 == TASK_NULL) {
969 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
970 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
971 return KERN_FAILURE;
972 }
973
974
975 p = proc_find(pid);
976 if (p == PROC_NULL) {
977 error = KERN_FAILURE;
978 goto tfpout;
979 }
980 pident = proc_ident(p);
981 is_current_proc = (p == current_proc());
982
983#if CONFIG_AUDIT
984 AUDIT_ARG(process, p);
985#endif
986
987 if (!(task_for_pid_posix_check(target: p))) {
988 error = KERN_FAILURE;
989 goto tfpout;
990 }
991
992 if (proc_task(p) == TASK_NULL) {
993 error = KERN_SUCCESS;
994 goto tfpout;
995 }
996
997 /*
998 * Grab a task reference and drop the proc reference as the proc ref
999 * shouldn't be held accross upcalls.
1000 */
1001 task = proc_task(p);
1002 task_reference(task);
1003
1004 proc_rele(p);
1005 p = PROC_NULL;
1006
1007 /* IPC is not active on the task until after `exec_resettextvp` has been called.
1008 * We don't want to call into MAC hooks until we know that this has occured, otherwise
1009 * AMFI and others will read uninitialized fields from the csproc
1010 */
1011 if (!task_is_ipc_active(task)) {
1012 error = KERN_FAILURE;
1013 goto tfpout;
1014 }
1015
1016#if CONFIG_MACF
1017 error = mac_proc_check_get_task(cred: kauth_cred_get(), pident: &pident, TASK_FLAVOR_CONTROL);
1018 if (error) {
1019 error = KERN_FAILURE;
1020 goto tfpout;
1021 }
1022#endif
1023
1024 /* If we aren't root and target's task access port is set... */
1025 if (!kauth_cred_issuser(cred: kauth_cred_get()) &&
1026 !is_current_proc &&
1027 (task_get_task_access_port(task, &tfpport) == 0) &&
1028 (tfpport != IPC_PORT_NULL)) {
1029 if (tfpport == IPC_PORT_DEAD) {
1030 error = KERN_PROTECTION_FAILURE;
1031 goto tfpout;
1032 }
1033
1034 /* Call up to the task access server */
1035 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(task_access_port: tfpport,
1036 calling_pid: proc_selfpid(), calling_gid: kauth_getgid(), target_pid: pid, TASK_FLAVOR_CONTROL);
1037
1038 if (error != MACH_MSG_SUCCESS) {
1039 if (error == MACH_RCV_INTERRUPTED) {
1040 error = KERN_ABORTED;
1041 } else {
1042 error = KERN_FAILURE;
1043 }
1044 goto tfpout;
1045 }
1046 }
1047
1048 /* Grant task port access */
1049 extmod_statistics_incr_task_for_pid(target: task);
1050
1051 /* this reference will be consumed during conversion */
1052 task_reference(task);
1053 if (task == current_task()) {
1054 /* return pinned self if current_task() so equality check with mach_task_self_ passes */
1055 sright = (void *)convert_task_to_port_pinned(task);
1056 } else {
1057 sright = (void *)convert_task_to_port(task);
1058 }
1059 /* extra task ref consumed */
1060
1061 /*
1062 * Check if the task has been corpsified. We must do so after conversion
1063 * since we don't hold locks and may have grabbed a corpse control port
1064 * above which will prevent no-senders notification delivery.
1065 */
1066 if (task_is_a_corpse(task)) {
1067 ipc_port_release_send(port: sright);
1068 error = KERN_FAILURE;
1069 goto tfpout;
1070 }
1071
1072 tret = ipc_port_copyout_send(
1073 sright,
1074 space: get_task_ipcspace(t: current_task()));
1075
1076 error = KERN_SUCCESS;
1077
1078tfpout:
1079 task_deallocate(t1);
1080 AUDIT_ARG(mach_port2, tret);
1081 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1082
1083 if (tfpport != IPC_PORT_NULL) {
1084 ipc_port_release_send(port: tfpport);
1085 }
1086 if (task != TASK_NULL) {
1087 task_deallocate(task);
1088 }
1089 if (p != PROC_NULL) {
1090 proc_rele(p);
1091 }
1092 AUDIT_MACH_SYSCALL_EXIT(error);
1093 return error;
1094}
1095
1096/*
1097 * Routine: task_name_for_pid
1098 * Purpose:
1099 * Get the task name port for another "process", named by its
1100 * process ID on the same host as "target_task".
1101 *
1102 * Only permitted to privileged processes, or processes
1103 * with the same user ID.
1104 *
1105 * XXX This should be a BSD system call, not a Mach trap!!!
1106 */
1107
1108kern_return_t
1109task_name_for_pid(
1110 struct task_name_for_pid_args *args)
1111{
1112 mach_port_name_t target_tport = args->target_tport;
1113 int pid = args->pid;
1114 user_addr_t task_addr = args->t;
1115 proc_t p = PROC_NULL;
1116 task_t t1 = TASK_NULL;
1117 mach_port_name_t tret = MACH_PORT_NULL;
1118 void * sright;
1119 int error = 0, refheld = 0;
1120 kauth_cred_t target_cred;
1121
1122 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
1123 AUDIT_ARG(pid, pid);
1124 AUDIT_ARG(mach_port1, target_tport);
1125
1126 t1 = port_name_to_task(target_tport);
1127 if (t1 == TASK_NULL) {
1128 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1129 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1130 return KERN_FAILURE;
1131 }
1132
1133 p = proc_find(pid);
1134 if (p != PROC_NULL) {
1135 AUDIT_ARG(process, p);
1136 target_cred = kauth_cred_proc_ref(procp: p);
1137 refheld = 1;
1138
1139 if ((p->p_stat != SZOMB)
1140 && ((current_proc() == p)
1141 || kauth_cred_issuser(cred: kauth_cred_get())
1142 || ((kauth_cred_getuid(cred: target_cred) == kauth_cred_getuid(cred: kauth_cred_get())) &&
1143 ((kauth_cred_getruid(cred: target_cred) == kauth_getruid())))
1144 || IOCurrentTaskHasEntitlement(entitlement: "com.apple.system-task-ports.name.safe")
1145 )) {
1146 if (proc_task(p) != TASK_NULL) {
1147 struct proc_ident pident = proc_ident(p);
1148
1149 task_t task = proc_task(p);
1150
1151 task_reference(task);
1152 proc_rele(p);
1153 p = PROC_NULL;
1154#if CONFIG_MACF
1155 error = mac_proc_check_get_task(cred: kauth_cred_get(), pident: &pident, TASK_FLAVOR_NAME);
1156 if (error) {
1157 task_deallocate(task);
1158 goto noperm;
1159 }
1160#endif
1161 sright = (void *)convert_task_name_to_port(task);
1162 task = NULL;
1163 tret = ipc_port_copyout_send(sright,
1164 space: get_task_ipcspace(t: current_task()));
1165 } else {
1166 tret = MACH_PORT_NULL;
1167 }
1168
1169 AUDIT_ARG(mach_port2, tret);
1170 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1171 task_deallocate(t1);
1172 error = KERN_SUCCESS;
1173 goto tnfpout;
1174 }
1175 }
1176
1177#if CONFIG_MACF
1178noperm:
1179#endif
1180 task_deallocate(t1);
1181 tret = MACH_PORT_NULL;
1182 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1183 error = KERN_FAILURE;
1184tnfpout:
1185 if (refheld != 0) {
1186 kauth_cred_unref(&target_cred);
1187 }
1188 if (p != PROC_NULL) {
1189 proc_rele(p);
1190 }
1191 AUDIT_MACH_SYSCALL_EXIT(error);
1192 return error;
1193}
1194
1195/*
1196 * Routine: task_inspect_for_pid
1197 * Purpose:
1198 * Get the task inspect port for another "process", named by its
1199 * process ID on the same host as "target_task".
1200 */
1201int
1202task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret)
1203{
1204 mach_port_name_t target_tport = args->target_tport;
1205 int pid = args->pid;
1206 user_addr_t task_addr = args->t;
1207
1208 proc_t proc = PROC_NULL;
1209 task_t t1 = TASK_NULL;
1210 task_inspect_t task_insp = TASK_INSPECT_NULL;
1211 mach_port_name_t tret = MACH_PORT_NULL;
1212 ipc_port_t tfpport = MACH_PORT_NULL;
1213 int error = 0;
1214 void *sright = NULL;
1215 boolean_t is_current_proc = FALSE;
1216 struct proc_ident pident = {0};
1217
1218 /* Disallow inspect port for kernel_task */
1219 if (pid == 0) {
1220 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1221 return EPERM;
1222 }
1223
1224 t1 = port_name_to_task(target_tport);
1225 if (t1 == TASK_NULL) {
1226 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1227 return EINVAL;
1228 }
1229
1230 proc = proc_find(pid);
1231 if (proc == PROC_NULL) {
1232 error = ESRCH;
1233 goto tifpout;
1234 }
1235 pident = proc_ident(p: proc);
1236 is_current_proc = (proc == current_proc());
1237
1238 if (!(task_for_pid_posix_check(target: proc))) {
1239 error = EPERM;
1240 goto tifpout;
1241 }
1242
1243 task_insp = proc_task(proc);
1244 if (task_insp == TASK_INSPECT_NULL) {
1245 goto tifpout;
1246 }
1247
1248 /*
1249 * Grab a task reference and drop the proc reference before making any upcalls.
1250 */
1251 task_reference(task_insp);
1252
1253 proc_rele(p: proc);
1254 proc = PROC_NULL;
1255
1256#if CONFIG_MACF
1257 error = mac_proc_check_get_task(cred: kauth_cred_get(), pident: &pident, TASK_FLAVOR_INSPECT);
1258 if (error) {
1259 error = EPERM;
1260 goto tifpout;
1261 }
1262#endif
1263
1264 /* If we aren't root and target's task access port is set... */
1265 if (!kauth_cred_issuser(cred: kauth_cred_get()) &&
1266 !is_current_proc &&
1267 (task_get_task_access_port(task_insp, &tfpport) == 0) &&
1268 (tfpport != IPC_PORT_NULL)) {
1269 if (tfpport == IPC_PORT_DEAD) {
1270 error = EACCES;
1271 goto tifpout;
1272 }
1273
1274
1275 /* Call up to the task access server */
1276 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(task_access_port: tfpport,
1277 calling_pid: proc_selfpid(), calling_gid: kauth_getgid(), target_pid: pid, TASK_FLAVOR_INSPECT);
1278
1279 if (error != MACH_MSG_SUCCESS) {
1280 if (error == MACH_RCV_INTERRUPTED) {
1281 error = EINTR;
1282 } else {
1283 error = EPERM;
1284 }
1285 goto tifpout;
1286 }
1287 }
1288
1289 /* Check if the task has been corpsified */
1290 if (task_is_a_corpse(task: task_insp)) {
1291 error = EACCES;
1292 goto tifpout;
1293 }
1294
1295 /* could be IP_NULL, consumes a ref */
1296 sright = (void*) convert_task_inspect_to_port(task_insp);
1297 task_insp = TASK_INSPECT_NULL;
1298 tret = ipc_port_copyout_send(sright, space: get_task_ipcspace(t: current_task()));
1299
1300tifpout:
1301 task_deallocate(t1);
1302 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1303 if (proc != PROC_NULL) {
1304 proc_rele(p: proc);
1305 }
1306 if (tfpport != IPC_PORT_NULL) {
1307 ipc_port_release_send(port: tfpport);
1308 }
1309 if (task_insp != TASK_INSPECT_NULL) {
1310 task_deallocate(task_insp);
1311 }
1312
1313 *ret = error;
1314 return error;
1315}
1316
1317/*
1318 * Routine: task_read_for_pid
1319 * Purpose:
1320 * Get the task read port for another "process", named by its
1321 * process ID on the same host as "target_task".
1322 */
1323int
1324task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret)
1325{
1326 mach_port_name_t target_tport = args->target_tport;
1327 int pid = args->pid;
1328 user_addr_t task_addr = args->t;
1329
1330 proc_t proc = PROC_NULL;
1331 task_t t1 = TASK_NULL;
1332 task_read_t task_read = TASK_READ_NULL;
1333 mach_port_name_t tret = MACH_PORT_NULL;
1334 ipc_port_t tfpport = MACH_PORT_NULL;
1335 int error = 0;
1336 void *sright = NULL;
1337 boolean_t is_current_proc = FALSE;
1338 struct proc_ident pident = {0};
1339
1340 /* Disallow read port for kernel_task */
1341 if (pid == 0) {
1342 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1343 return EPERM;
1344 }
1345
1346 t1 = port_name_to_task(target_tport);
1347 if (t1 == TASK_NULL) {
1348 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1349 return EINVAL;
1350 }
1351
1352 proc = proc_find(pid);
1353 if (proc == PROC_NULL) {
1354 error = ESRCH;
1355 goto trfpout;
1356 }
1357 pident = proc_ident(p: proc);
1358 is_current_proc = (proc == current_proc());
1359
1360 if (!(task_for_pid_posix_check(target: proc))) {
1361 error = EPERM;
1362 goto trfpout;
1363 }
1364
1365 task_read = proc_task(proc);
1366 if (task_read == TASK_INSPECT_NULL) {
1367 goto trfpout;
1368 }
1369
1370 /*
1371 * Grab a task reference and drop the proc reference before making any upcalls.
1372 */
1373 task_reference(task_read);
1374
1375 proc_rele(p: proc);
1376 proc = PROC_NULL;
1377
1378#if CONFIG_MACF
1379 error = mac_proc_check_get_task(cred: kauth_cred_get(), pident: &pident, TASK_FLAVOR_READ);
1380 if (error) {
1381 error = EPERM;
1382 goto trfpout;
1383 }
1384#endif
1385
1386 /* If we aren't root and target's task access port is set... */
1387 if (!kauth_cred_issuser(cred: kauth_cred_get()) &&
1388 !is_current_proc &&
1389 (task_get_task_access_port(task_read, &tfpport) == 0) &&
1390 (tfpport != IPC_PORT_NULL)) {
1391 if (tfpport == IPC_PORT_DEAD) {
1392 error = EACCES;
1393 goto trfpout;
1394 }
1395
1396
1397 /* Call up to the task access server */
1398 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(task_access_port: tfpport,
1399 calling_pid: proc_selfpid(), calling_gid: kauth_getgid(), target_pid: pid, TASK_FLAVOR_READ);
1400
1401 if (error != MACH_MSG_SUCCESS) {
1402 if (error == MACH_RCV_INTERRUPTED) {
1403 error = EINTR;
1404 } else {
1405 error = EPERM;
1406 }
1407 goto trfpout;
1408 }
1409 }
1410
1411 /* Check if the task has been corpsified */
1412 if (task_is_a_corpse(task: task_read)) {
1413 error = EACCES;
1414 goto trfpout;
1415 }
1416
1417 /* could be IP_NULL, consumes a ref */
1418 sright = (void*) convert_task_read_to_port(task_read);
1419 task_read = TASK_READ_NULL;
1420 tret = ipc_port_copyout_send(sright, space: get_task_ipcspace(t: current_task()));
1421
1422trfpout:
1423 task_deallocate(t1);
1424 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1425 if (proc != PROC_NULL) {
1426 proc_rele(p: proc);
1427 }
1428 if (tfpport != IPC_PORT_NULL) {
1429 ipc_port_release_send(port: tfpport);
1430 }
1431 if (task_read != TASK_READ_NULL) {
1432 task_deallocate(task_read);
1433 }
1434
1435 *ret = error;
1436 return error;
1437}
1438
1439kern_return_t
1440pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
1441{
1442 task_t target = NULL;
1443 proc_t targetproc = PROC_NULL;
1444 int pid = args->pid;
1445 int error = 0;
1446 mach_port_t tfpport = MACH_PORT_NULL;
1447
1448 if (pid == 0) {
1449 error = EPERM;
1450 goto out;
1451 }
1452
1453 targetproc = proc_find(pid);
1454 if (targetproc == PROC_NULL) {
1455 error = ESRCH;
1456 goto out;
1457 }
1458
1459 if (!task_for_pid_posix_check(target: targetproc) &&
1460 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1461 error = EPERM;
1462 goto out;
1463 }
1464
1465#if CONFIG_MACF
1466 error = mac_proc_check_suspend_resume(proc: targetproc, MAC_PROC_CHECK_SUSPEND);
1467 if (error) {
1468 error = EPERM;
1469 goto out;
1470 }
1471#endif
1472
1473 target = proc_task(targetproc);
1474#if XNU_TARGET_OS_OSX
1475 if (target != TASK_NULL) {
1476 /* If we aren't root and target's task access port is set... */
1477 if (!kauth_cred_issuser(cred: kauth_cred_get()) &&
1478 targetproc != current_proc() &&
1479 (task_get_task_access_port(target, &tfpport) == 0) &&
1480 (tfpport != IPC_PORT_NULL)) {
1481 if (tfpport == IPC_PORT_DEAD) {
1482 error = EACCES;
1483 goto out;
1484 }
1485
1486 /* Call up to the task access server */
1487 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(task_access_port: tfpport,
1488 calling_pid: proc_selfpid(), calling_gid: kauth_getgid(), target_pid: pid, TASK_FLAVOR_CONTROL);
1489
1490 if (error != MACH_MSG_SUCCESS) {
1491 if (error == MACH_RCV_INTERRUPTED) {
1492 error = EINTR;
1493 } else {
1494 error = EPERM;
1495 }
1496 goto out;
1497 }
1498 }
1499 }
1500#endif /* XNU_TARGET_OS_OSX */
1501
1502 task_reference(target);
1503 error = task_pidsuspend(task: target);
1504 if (error) {
1505 if (error == KERN_INVALID_ARGUMENT) {
1506 error = EINVAL;
1507 } else {
1508 error = EPERM;
1509 }
1510 }
1511#if CONFIG_MEMORYSTATUS
1512 else {
1513 memorystatus_on_suspend(p: targetproc);
1514 }
1515#endif
1516
1517 task_deallocate(target);
1518
1519out:
1520 if (tfpport != IPC_PORT_NULL) {
1521 ipc_port_release_send(port: tfpport);
1522 }
1523
1524 if (targetproc != PROC_NULL) {
1525 proc_rele(p: targetproc);
1526 }
1527 *ret = error;
1528 return error;
1529}
1530
1531kern_return_t
1532debug_control_port_for_pid(struct debug_control_port_for_pid_args *args)
1533{
1534 mach_port_name_t target_tport = args->target_tport;
1535 int pid = args->pid;
1536 user_addr_t task_addr = args->t;
1537 proc_t p = PROC_NULL;
1538 task_t t1 = TASK_NULL;
1539 task_t task = TASK_NULL;
1540 mach_port_name_t tret = MACH_PORT_NULL;
1541 ipc_port_t tfpport = MACH_PORT_NULL;
1542 ipc_port_t sright = NULL;
1543 int error = 0;
1544 boolean_t is_current_proc = FALSE;
1545 struct proc_ident pident = {0};
1546
1547 AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID);
1548 AUDIT_ARG(pid, pid);
1549 AUDIT_ARG(mach_port1, target_tport);
1550
1551 /* Always check if pid == 0 */
1552 if (pid == 0) {
1553 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1554 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1555 return KERN_FAILURE;
1556 }
1557
1558 t1 = port_name_to_task(target_tport);
1559 if (t1 == TASK_NULL) {
1560 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
1561 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
1562 return KERN_FAILURE;
1563 }
1564
1565 p = proc_find(pid);
1566 if (p == PROC_NULL) {
1567 error = KERN_FAILURE;
1568 goto tfpout;
1569 }
1570 pident = proc_ident(p);
1571 is_current_proc = (p == current_proc());
1572
1573#if CONFIG_AUDIT
1574 AUDIT_ARG(process, p);
1575#endif
1576
1577 if (!(task_for_pid_posix_check(target: p))) {
1578 error = KERN_FAILURE;
1579 goto tfpout;
1580 }
1581
1582 if (proc_task(p) == TASK_NULL) {
1583 error = KERN_SUCCESS;
1584 goto tfpout;
1585 }
1586
1587 /*
1588 * Grab a task reference and drop the proc reference before making any upcalls.
1589 */
1590 task = proc_task(p);
1591 task_reference(task);
1592
1593 proc_rele(p);
1594 p = PROC_NULL;
1595
1596 if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) {
1597#if CONFIG_MACF
1598 error = mac_proc_check_get_task(cred: kauth_cred_get(), pident: &pident, TASK_FLAVOR_CONTROL);
1599 if (error) {
1600 error = KERN_FAILURE;
1601 goto tfpout;
1602 }
1603#endif
1604
1605 /* If we aren't root and target's task access port is set... */
1606 if (!kauth_cred_issuser(cred: kauth_cred_get()) &&
1607 !is_current_proc &&
1608 (task_get_task_access_port(task, &tfpport) == 0) &&
1609 (tfpport != IPC_PORT_NULL)) {
1610 if (tfpport == IPC_PORT_DEAD) {
1611 error = KERN_PROTECTION_FAILURE;
1612 goto tfpout;
1613 }
1614
1615
1616 /* Call up to the task access server */
1617 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(task_access_port: tfpport,
1618 calling_pid: proc_selfpid(), calling_gid: kauth_getgid(), target_pid: pid, TASK_FLAVOR_CONTROL);
1619
1620 if (error != MACH_MSG_SUCCESS) {
1621 if (error == MACH_RCV_INTERRUPTED) {
1622 error = KERN_ABORTED;
1623 } else {
1624 error = KERN_FAILURE;
1625 }
1626 goto tfpout;
1627 }
1628 }
1629 }
1630
1631 /* Check if the task has been corpsified */
1632 if (task_is_a_corpse(task)) {
1633 error = KERN_FAILURE;
1634 goto tfpout;
1635 }
1636
1637 error = task_get_debug_control_port(task, &sright);
1638 if (error != KERN_SUCCESS) {
1639 goto tfpout;
1640 }
1641
1642 tret = ipc_port_copyout_send(
1643 sright,
1644 space: get_task_ipcspace(t: current_task()));
1645
1646 error = KERN_SUCCESS;
1647
1648tfpout:
1649 task_deallocate(t1);
1650 AUDIT_ARG(mach_port2, tret);
1651 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
1652
1653 if (tfpport != IPC_PORT_NULL) {
1654 ipc_port_release_send(port: tfpport);
1655 }
1656 if (task != TASK_NULL) {
1657 task_deallocate(task);
1658 }
1659 if (p != PROC_NULL) {
1660 proc_rele(p);
1661 }
1662 AUDIT_MACH_SYSCALL_EXIT(error);
1663 return error;
1664}
1665
1666kern_return_t
1667pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
1668{
1669 task_t target = NULL;
1670 proc_t targetproc = PROC_NULL;
1671 int pid = args->pid;
1672 int error = 0;
1673 mach_port_t tfpport = MACH_PORT_NULL;
1674
1675 if (pid == 0) {
1676 error = EPERM;
1677 goto out;
1678 }
1679
1680 targetproc = proc_find(pid);
1681 if (targetproc == PROC_NULL) {
1682 error = ESRCH;
1683 goto out;
1684 }
1685
1686 if (!task_for_pid_posix_check(target: targetproc) &&
1687 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1688 error = EPERM;
1689 goto out;
1690 }
1691
1692#if CONFIG_MACF
1693 error = mac_proc_check_suspend_resume(proc: targetproc, MAC_PROC_CHECK_RESUME);
1694 if (error) {
1695 error = EPERM;
1696 goto out;
1697 }
1698#endif
1699
1700 target = proc_task(targetproc);
1701#if XNU_TARGET_OS_OSX
1702 if (target != TASK_NULL) {
1703 /* If we aren't root and target's task access port is set... */
1704 if (!kauth_cred_issuser(cred: kauth_cred_get()) &&
1705 targetproc != current_proc() &&
1706 (task_get_task_access_port(target, &tfpport) == 0) &&
1707 (tfpport != IPC_PORT_NULL)) {
1708 if (tfpport == IPC_PORT_DEAD) {
1709 error = EACCES;
1710 goto out;
1711 }
1712
1713 /* Call up to the task access server */
1714 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(task_access_port: tfpport,
1715 calling_pid: proc_selfpid(), calling_gid: kauth_getgid(), target_pid: pid, TASK_FLAVOR_CONTROL);
1716
1717 if (error != MACH_MSG_SUCCESS) {
1718 if (error == MACH_RCV_INTERRUPTED) {
1719 error = EINTR;
1720 } else {
1721 error = EPERM;
1722 }
1723 goto out;
1724 }
1725 }
1726 }
1727#endif /* XNU_TARGET_OS_OSX */
1728
1729#if !XNU_TARGET_OS_OSX
1730#if SOCKETS
1731 resume_proc_sockets(targetproc);
1732#endif /* SOCKETS */
1733#endif /* !XNU_TARGET_OS_OSX */
1734
1735 task_reference(target);
1736
1737#if CONFIG_MEMORYSTATUS
1738 memorystatus_on_resume(p: targetproc);
1739#endif
1740
1741 error = task_pidresume(task: target);
1742 if (error) {
1743 if (error == KERN_INVALID_ARGUMENT) {
1744 error = EINVAL;
1745 } else {
1746 if (error == KERN_MEMORY_ERROR) {
1747 psignal(p: targetproc, SIGKILL);
1748 error = EIO;
1749 } else {
1750 error = EPERM;
1751 }
1752 }
1753 }
1754
1755 task_deallocate(target);
1756
1757out:
1758 if (tfpport != IPC_PORT_NULL) {
1759 ipc_port_release_send(port: tfpport);
1760 }
1761
1762 if (targetproc != PROC_NULL) {
1763 proc_rele(p: targetproc);
1764 }
1765
1766 *ret = error;
1767 return error;
1768}
1769
1770#if !XNU_TARGET_OS_OSX
1771/*
1772 * Freeze the specified process (provided in args->pid), or find and freeze a PID.
1773 * When a process is specified, this call is blocking, otherwise we wake up the
1774 * freezer thread and do not block on a process being frozen.
1775 */
1776kern_return_t
1777pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
1778{
1779 int error = 0;
1780 proc_t targetproc = PROC_NULL;
1781 int pid = args->pid;
1782
1783#ifndef CONFIG_FREEZE
1784 #pragma unused(pid)
1785#else
1786
1787 /*
1788 * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check().
1789 */
1790
1791 if (pid >= 0) {
1792 targetproc = proc_find(pid);
1793
1794 if (targetproc == PROC_NULL) {
1795 error = ESRCH;
1796 goto out;
1797 }
1798
1799 if (!task_for_pid_posix_check(targetproc)) {
1800 error = EPERM;
1801 goto out;
1802 }
1803 }
1804
1805#if CONFIG_MACF
1806 //Note that targetproc may be null
1807 error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE);
1808 if (error) {
1809 error = EPERM;
1810 goto out;
1811 }
1812#endif
1813
1814 if (pid == -2) {
1815 vm_pageout_anonymous_pages();
1816 } else if (pid == -1) {
1817 memorystatus_on_inactivity(targetproc);
1818 } else {
1819 error = memorystatus_freeze_process_sync(targetproc);
1820 }
1821
1822out:
1823
1824#endif /* CONFIG_FREEZE */
1825
1826 if (targetproc != PROC_NULL) {
1827 proc_rele(targetproc);
1828 }
1829 *ret = error;
1830 return error;
1831}
1832#endif /* !XNU_TARGET_OS_OSX */
1833
1834#if SOCKETS
1835int
1836networking_memstatus_callout(proc_t p, uint32_t status)
1837{
1838 struct fileproc *fp;
1839
1840 /*
1841 * proc list lock NOT held
1842 * proc lock NOT held
1843 * a reference on the proc has been held / shall be dropped by the caller.
1844 */
1845 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
1846 LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED);
1847
1848 proc_fdlock(p);
1849
1850 fdt_foreach(fp, p) {
1851 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
1852#if NECP
1853 case DTYPE_NETPOLICY:
1854 necp_fd_memstatus(proc: p, status,
1855 client_fd: (struct necp_fd_data *)fp_get_data(fp));
1856 break;
1857#endif /* NECP */
1858#if SKYWALK
1859 case DTYPE_CHANNEL:
1860 kern_channel_memstatus(p, status,
1861 (struct kern_channel *)fp_get_data(fp));
1862 break;
1863#endif /* SKYWALK */
1864 default:
1865 break;
1866 }
1867 }
1868 proc_fdunlock(p);
1869
1870 return 1;
1871}
1872
1873#if SKYWALK
1874/*
1875 * Since we make multiple passes across the fileproc array, record the
1876 * first MAX_CHANNELS channel handles found. MAX_CHANNELS should be
1877 * large enough to accomodate most, if not all cases. If we find more,
1878 * we'll go to the slow path during second pass.
1879 */
1880#define MAX_CHANNELS 8 /* should be more than enough */
1881#endif /* SKYWALK */
1882
1883static int
1884networking_defunct_callout(proc_t p, void *arg)
1885{
1886 struct pid_shutdown_sockets_args *args = arg;
1887 int pid = args->pid;
1888 int level = args->level;
1889 struct fileproc *fp;
1890#if SKYWALK
1891 int i;
1892 int channel_count = 0;
1893 struct kern_channel *channel_array[MAX_CHANNELS];
1894
1895 bzero(s: &channel_array, n: sizeof(channel_array));
1896#endif /* SKYWALK */
1897
1898 proc_fdlock(p);
1899
1900 fdt_foreach(fp, p) {
1901 struct fileglob *fg = fp->fp_glob;
1902
1903 switch (FILEGLOB_DTYPE(fg)) {
1904 case DTYPE_SOCKET: {
1905 struct socket *so = (struct socket *)fg_get_data(fg);
1906 if (proc_getpid(p) == pid || so->last_pid == pid ||
1907 ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) {
1908 /* Call networking stack with socket and level */
1909 (void)socket_defunct(p, so, level);
1910 }
1911 break;
1912 }
1913#if NECP
1914 case DTYPE_NETPOLICY:
1915 /* first pass: defunct necp and get stats for ntstat */
1916 if (proc_getpid(p) == pid) {
1917 necp_fd_defunct(proc: p,
1918 client_fd: (struct necp_fd_data *)fg_get_data(fg));
1919 }
1920 break;
1921#endif /* NECP */
1922#if SKYWALK
1923 case DTYPE_CHANNEL:
1924 /* first pass: get channels and total count */
1925 if (proc_getpid(p) == pid) {
1926 if (channel_count < MAX_CHANNELS) {
1927 channel_array[channel_count] =
1928 (struct kern_channel *)fg_get_data(fg);
1929 }
1930 ++channel_count;
1931 }
1932 break;
1933#endif /* SKYWALK */
1934 default:
1935 break;
1936 }
1937 }
1938
1939#if SKYWALK
1940 /*
1941 * Second pass: defunct channels/flows (after NECP). Handle
1942 * the common case of up to MAX_CHANNELS count with fast path,
1943 * and traverse the fileproc array again only if we exceed it.
1944 */
1945 if (channel_count != 0 && channel_count <= MAX_CHANNELS) {
1946 ASSERT(proc_getpid(p) == pid);
1947 for (i = 0; i < channel_count; i++) {
1948 ASSERT(channel_array[i] != NULL);
1949 kern_channel_defunct(p, channel_array[i]);
1950 }
1951 } else if (channel_count != 0) {
1952 ASSERT(proc_getpid(p) == pid);
1953 fdt_foreach(fp, p) {
1954 struct fileglob *fg = fp->fp_glob;
1955
1956 if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) {
1957 kern_channel_defunct(p,
1958 (struct kern_channel *)fg_get_data(fg));
1959 }
1960 }
1961 }
1962#endif /* SKYWALK */
1963 proc_fdunlock(p);
1964
1965 return PROC_RETURNED;
1966}
1967
1968int
1969pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
1970{
1971 int error = 0;
1972 proc_t targetproc = PROC_NULL;
1973 int pid = args->pid;
1974 int level = args->level;
1975
1976 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
1977 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) {
1978 error = EINVAL;
1979 goto out;
1980 }
1981
1982 targetproc = proc_find(pid);
1983 if (targetproc == PROC_NULL) {
1984 error = ESRCH;
1985 goto out;
1986 }
1987
1988 if (!task_for_pid_posix_check(target: targetproc) &&
1989 !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) {
1990 error = EPERM;
1991 goto out;
1992 }
1993
1994#if CONFIG_MACF
1995 error = mac_proc_check_suspend_resume(proc: targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
1996 if (error) {
1997 error = EPERM;
1998 goto out;
1999 }
2000#endif
2001
2002 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
2003 callout: networking_defunct_callout, arg: args, NULL, NULL);
2004
2005out:
2006 if (targetproc != PROC_NULL) {
2007 proc_rele(p: targetproc);
2008 }
2009 *ret = error;
2010 return error;
2011}
2012
2013#endif /* SOCKETS */
2014
2015static int
2016sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
2017 __unused int arg2, struct sysctl_req *req)
2018{
2019 int error = 0;
2020 int new_value;
2021
2022 error = SYSCTL_OUT(req, arg1, sizeof(int));
2023 if (error || req->newptr == USER_ADDR_NULL) {
2024 return error;
2025 }
2026
2027 if (!kauth_cred_issuser(cred: kauth_cred_get())) {
2028 return EPERM;
2029 }
2030
2031 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
2032 goto out;
2033 }
2034 if ((new_value == KERN_TFP_POLICY_DENY)
2035 || (new_value == KERN_TFP_POLICY_DEFAULT)) {
2036 tfp_policy = new_value;
2037 } else {
2038 error = EINVAL;
2039 }
2040out:
2041 return error;
2042}
2043
2044#if defined(SECURE_KERNEL)
2045static int kern_secure_kernel = 1;
2046#else
2047static int kern_secure_kernel = 0;
2048#endif
2049
2050SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
2051
2052SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
2053SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2054 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I", "policy");
2055
2056SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
2057 &shared_region_trace_level, 0, "");
2058SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
2059 &shared_region_version, 0, "");
2060SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
2061 &shared_region_persistence, 0, "");
2062
2063/*
2064 * shared_region_check_np:
2065 *
2066 * This system call is intended for dyld.
2067 *
2068 * dyld calls this when any process starts to see if the process's shared
2069 * region is already set up and ready to use.
2070 * This call returns the base address of the first mapping in the
2071 * process's shared region's first mapping.
2072 * dyld will then check what's mapped at that address.
2073 *
2074 * If the shared region is empty, dyld will then attempt to map the shared
2075 * cache file in the shared region via the shared_region_map_np() system call.
2076 *
2077 * If something's already mapped in the shared region, dyld will check if it
2078 * matches the shared cache it would like to use for that process.
2079 * If it matches, evrything's ready and the process can proceed and use the
2080 * shared region.
2081 * If it doesn't match, dyld will unmap the shared region and map the shared
2082 * cache into the process's address space via mmap().
2083 *
2084 * A NULL pointer argument can be used by dyld to indicate it has unmapped
2085 * the shared region. We will remove the shared_region reference from the task.
2086 *
2087 * ERROR VALUES
2088 * EINVAL no shared region
2089 * ENOMEM shared region is empty
2090 * EFAULT bad address for "start_address"
2091 */
2092int
2093shared_region_check_np(
2094 __unused struct proc *p,
2095 struct shared_region_check_np_args *uap,
2096 __unused int *retvalp)
2097{
2098 vm_shared_region_t shared_region;
2099 mach_vm_offset_t start_address = 0;
2100 int error = 0;
2101 kern_return_t kr;
2102 task_t task = current_task();
2103
2104 SHARED_REGION_TRACE_DEBUG(
2105 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
2106 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2107 proc_getpid(p), p->p_comm,
2108 (uint64_t)uap->start_address));
2109
2110 /*
2111 * Special value of start_address used to indicate that map_with_linking() should
2112 * no longer be allowed in this process
2113 */
2114 if (uap->start_address == (task_get_64bit_addr(task) ? DYLD_VM_END_MWL : (uint32_t)DYLD_VM_END_MWL)) {
2115 p->p_disallow_map_with_linking = TRUE;
2116 return 0;
2117 }
2118
2119 /* retrieve the current tasks's shared region */
2120 shared_region = vm_shared_region_get(task);
2121 if (shared_region != NULL) {
2122 /*
2123 * A NULL argument is used by dyld to indicate the task
2124 * has unmapped its shared region.
2125 */
2126 if (uap->start_address == 0) {
2127 /* unmap it first */
2128 vm_shared_region_remove(task, sr: shared_region);
2129 vm_shared_region_set(task, NULL);
2130 } else {
2131 /* retrieve address of its first mapping... */
2132 kr = vm_shared_region_start_address(shared_region, start_address: &start_address, task);
2133 if (kr != KERN_SUCCESS) {
2134 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2135 "check_np(0x%llx) "
2136 "vm_shared_region_start_address() failed\n",
2137 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2138 proc_getpid(p), p->p_comm,
2139 (uint64_t)uap->start_address));
2140 error = ENOMEM;
2141 } else {
2142#if __has_feature(ptrauth_calls)
2143 /*
2144 * Remap any section of the shared library that
2145 * has authenticated pointers into private memory.
2146 */
2147 if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) {
2148 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] "
2149 "check_np(0x%llx) "
2150 "vm_shared_region_auth_remap() failed\n",
2151 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2152 proc_getpid(p), p->p_comm,
2153 (uint64_t)uap->start_address));
2154 error = ENOMEM;
2155 }
2156#endif /* __has_feature(ptrauth_calls) */
2157
2158 /* ... and give it to the caller */
2159 if (error == 0) {
2160 error = copyout(&start_address,
2161 (user_addr_t) uap->start_address,
2162 sizeof(start_address));
2163 if (error != 0) {
2164 SHARED_REGION_TRACE_ERROR(
2165 ("shared_region: %p [%d(%s)] "
2166 "check_np(0x%llx) "
2167 "copyout(0x%llx) error %d\n",
2168 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2169 proc_getpid(p), p->p_comm,
2170 (uint64_t)uap->start_address, (uint64_t)start_address,
2171 error));
2172 }
2173 }
2174 }
2175 }
2176 vm_shared_region_deallocate(shared_region);
2177 } else {
2178 /* no shared region ! */
2179 error = EINVAL;
2180 }
2181
2182 SHARED_REGION_TRACE_DEBUG(
2183 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
2184 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2185 proc_getpid(p), p->p_comm,
2186 (uint64_t)uap->start_address, (uint64_t)start_address, error));
2187
2188 return error;
2189}
2190
2191
2192static int
2193shared_region_copyin(
2194 struct proc *p,
2195 user_addr_t user_addr,
2196 unsigned int count,
2197 unsigned int element_size,
2198 void *kernel_data)
2199{
2200 int error = 0;
2201 vm_size_t size = count * element_size;
2202
2203 error = copyin(user_addr, kernel_data, size);
2204 if (error) {
2205 SHARED_REGION_TRACE_ERROR(
2206 ("shared_region: %p [%d(%s)] map(): "
2207 "copyin(0x%llx, %ld) failed (error=%d)\n",
2208 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2209 proc_getpid(p), p->p_comm,
2210 (uint64_t)user_addr, (long)size, error));
2211 }
2212 return error;
2213}
2214
2215/*
2216 * A reasonable upper limit to prevent overflow of allocation/copyin.
2217 */
2218#define _SR_FILE_MAPPINGS_MAX_FILES 256
2219
2220/* forward declaration */
2221__attribute__((noinline))
2222static void shared_region_map_and_slide_cleanup(
2223 struct proc *p,
2224 uint32_t files_count,
2225 struct _sr_file_mappings *sr_file_mappings,
2226 struct vm_shared_region *shared_region);
2227
2228/*
2229 * Setup part of _shared_region_map_and_slide().
2230 * It had to be broken out of _shared_region_map_and_slide() to
2231 * prevent compiler inlining from blowing out the stack.
2232 */
2233__attribute__((noinline))
2234static int
2235shared_region_map_and_slide_setup(
2236 struct proc *p,
2237 uint32_t files_count,
2238 struct shared_file_np *files,
2239 uint32_t mappings_count,
2240 struct shared_file_mapping_slide_np *mappings,
2241 struct _sr_file_mappings **sr_file_mappings,
2242 struct vm_shared_region **shared_region_ptr,
2243 struct vnode *rdir_vp)
2244{
2245 int error = 0;
2246 struct _sr_file_mappings *srfmp;
2247 uint32_t mappings_next;
2248 struct vnode_attr va;
2249 off_t fs;
2250#if CONFIG_MACF
2251 vm_prot_t maxprot = VM_PROT_ALL;
2252#endif
2253 uint32_t i;
2254 struct vm_shared_region *shared_region = NULL;
2255 boolean_t is_driverkit = task_is_driver(task: current_task());
2256
2257 SHARED_REGION_TRACE_DEBUG(
2258 ("shared_region: %p [%d(%s)] -> map\n",
2259 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2260 proc_getpid(p), p->p_comm));
2261
2262 if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) {
2263 error = E2BIG;
2264 goto done;
2265 }
2266 if (files_count == 0) {
2267 error = EINVAL;
2268 goto done;
2269 }
2270 *sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count,
2271 Z_WAITOK | Z_ZERO);
2272 if (*sr_file_mappings == NULL) {
2273 error = ENOMEM;
2274 goto done;
2275 }
2276 mappings_next = 0;
2277 for (i = 0; i < files_count; i++) {
2278 srfmp = &(*sr_file_mappings)[i];
2279 srfmp->fd = files[i].sf_fd;
2280 srfmp->mappings_count = files[i].sf_mappings_count;
2281 srfmp->mappings = &mappings[mappings_next];
2282 mappings_next += srfmp->mappings_count;
2283 if (mappings_next > mappings_count) {
2284 error = EINVAL;
2285 goto done;
2286 }
2287 srfmp->slide = files[i].sf_slide;
2288 }
2289
2290 /* get the process's shared region (setup in vm_map_exec()) */
2291 shared_region = vm_shared_region_trim_and_get(task: current_task());
2292 *shared_region_ptr = shared_region;
2293 if (shared_region == NULL) {
2294 SHARED_REGION_TRACE_ERROR(
2295 ("shared_region: %p [%d(%s)] map(): "
2296 "no shared region\n",
2297 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2298 proc_getpid(p), p->p_comm));
2299 error = EINVAL;
2300 goto done;
2301 }
2302
2303 /*
2304 * Check the shared region matches the current root
2305 * directory of this process. Deny the mapping to
2306 * avoid tainting the shared region with something that
2307 * doesn't quite belong into it.
2308 */
2309 struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region);
2310 if (sr_vnode != NULL ? rdir_vp != sr_vnode : rdir_vp != rootvnode) {
2311 SHARED_REGION_TRACE_ERROR(
2312 ("shared_region: map(%p) root_dir mismatch\n",
2313 (void *)VM_KERNEL_ADDRPERM(current_thread())));
2314 error = EPERM;
2315 goto done;
2316 }
2317
2318
2319 for (srfmp = &(*sr_file_mappings)[0];
2320 srfmp < &(*sr_file_mappings)[files_count];
2321 srfmp++) {
2322 if (srfmp->mappings_count == 0) {
2323 /* no mappings here... */
2324 continue;
2325 }
2326
2327 /*
2328 * A file descriptor of -1 is used to indicate that the data
2329 * to be put in the shared region for this mapping comes directly
2330 * from the processes address space. Ensure we have proper alignments.
2331 */
2332 if (srfmp->fd == -1) {
2333 /* only allow one mapping per fd */
2334 if (srfmp->mappings_count > 1) {
2335 SHARED_REGION_TRACE_ERROR(
2336 ("shared_region: %p [%d(%s)] map data >1 mapping\n",
2337 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2338 proc_getpid(p), p->p_comm));
2339 error = EINVAL;
2340 goto done;
2341 }
2342
2343 /*
2344 * The destination address and size must be page aligned.
2345 */
2346 struct shared_file_mapping_slide_np *mapping = &srfmp->mappings[0];
2347 mach_vm_address_t dest_addr = mapping->sms_address;
2348 mach_vm_size_t map_size = mapping->sms_size;
2349 if (!vm_map_page_aligned(offset: dest_addr, mask: vm_map_page_mask(map: current_map()))) {
2350 SHARED_REGION_TRACE_ERROR(
2351 ("shared_region: %p [%d(%s)] map data destination 0x%llx not aligned\n",
2352 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2353 proc_getpid(p), p->p_comm, dest_addr));
2354 error = EINVAL;
2355 goto done;
2356 }
2357 if (!vm_map_page_aligned(offset: map_size, mask: vm_map_page_mask(map: current_map()))) {
2358 SHARED_REGION_TRACE_ERROR(
2359 ("shared_region: %p [%d(%s)] map data size 0x%llx not aligned\n",
2360 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2361 proc_getpid(p), p->p_comm, map_size));
2362 error = EINVAL;
2363 goto done;
2364 }
2365 continue;
2366 }
2367
2368 /* get file structure from file descriptor */
2369 error = fp_get_ftype(p, fd: srfmp->fd, ftype: DTYPE_VNODE, EINVAL, fpp: &srfmp->fp);
2370 if (error) {
2371 SHARED_REGION_TRACE_ERROR(
2372 ("shared_region: %p [%d(%s)] map: "
2373 "fd=%d lookup failed (error=%d)\n",
2374 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2375 proc_getpid(p), p->p_comm, srfmp->fd, error));
2376 goto done;
2377 }
2378
2379 /* we need at least read permission on the file */
2380 if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) {
2381 SHARED_REGION_TRACE_ERROR(
2382 ("shared_region: %p [%d(%s)] map: "
2383 "fd=%d not readable\n",
2384 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2385 proc_getpid(p), p->p_comm, srfmp->fd));
2386 error = EPERM;
2387 goto done;
2388 }
2389
2390 /* get vnode from file structure */
2391 error = vnode_getwithref(vp: (vnode_t)fp_get_data(fp: srfmp->fp));
2392 if (error) {
2393 SHARED_REGION_TRACE_ERROR(
2394 ("shared_region: %p [%d(%s)] map: "
2395 "fd=%d getwithref failed (error=%d)\n",
2396 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2397 proc_getpid(p), p->p_comm, srfmp->fd, error));
2398 goto done;
2399 }
2400 srfmp->vp = (struct vnode *)fp_get_data(fp: srfmp->fp);
2401
2402 /* make sure the vnode is a regular file */
2403 if (srfmp->vp->v_type != VREG) {
2404 SHARED_REGION_TRACE_ERROR(
2405 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2406 "not a file (type=%d)\n",
2407 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2408 proc_getpid(p), p->p_comm,
2409 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2410 srfmp->vp->v_name, srfmp->vp->v_type));
2411 error = EINVAL;
2412 goto done;
2413 }
2414
2415#if CONFIG_MACF
2416 /* pass in 0 for the offset argument because AMFI does not need the offset
2417 * of the shared cache */
2418 error = mac_file_check_mmap(cred: vfs_context_ucred(ctx: vfs_context_current()),
2419 fg: srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE | MAP_PRIVATE | MAP_FIXED, file_pos: 0, maxprot: &maxprot);
2420 if (error) {
2421 goto done;
2422 }
2423#endif /* MAC */
2424
2425#if XNU_TARGET_OS_OSX && defined(__arm64__)
2426 /*
2427 * Check if the shared cache is in the trust cache;
2428 * if so, we can skip the root ownership check.
2429 */
2430#if DEVELOPMENT || DEBUG
2431 /*
2432 * Skip both root ownership and trust cache check if
2433 * enforcement is disabled.
2434 */
2435 if (!cs_system_enforcement()) {
2436 goto after_root_check;
2437 }
2438#endif /* DEVELOPMENT || DEBUG */
2439 struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0);
2440 if (blob == NULL) {
2441 SHARED_REGION_TRACE_ERROR(
2442 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2443 "missing CS blob\n",
2444 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2445 proc_getpid(p), p->p_comm,
2446 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2447 srfmp->vp->v_name));
2448 goto root_check;
2449 }
2450 const uint8_t *cdhash = csblob_get_cdhash(blob);
2451 if (cdhash == NULL) {
2452 SHARED_REGION_TRACE_ERROR(
2453 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2454 "missing cdhash\n",
2455 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2456 proc_getpid(p), p->p_comm,
2457 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2458 srfmp->vp->v_name));
2459 goto root_check;
2460 }
2461
2462 bool in_trust_cache = false;
2463 TrustCacheQueryToken_t qt;
2464 if (query_trust_cache(query_type: kTCQueryTypeAll, cdhash, query_token: &qt) == KERN_SUCCESS) {
2465 TCType_t tc_type = kTCTypeInvalid;
2466 TCReturn_t tc_ret = amfi->TrustCache.queryGetTCType(&qt, &tc_type);
2467 in_trust_cache = (tc_ret.error == kTCReturnSuccess &&
2468 (tc_type == kTCTypeCryptex1BootOS ||
2469 tc_type == kTCTypeStatic ||
2470 tc_type == kTCTypeEngineering));
2471 }
2472 if (!in_trust_cache) {
2473 SHARED_REGION_TRACE_ERROR(
2474 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2475 "not in trust cache\n",
2476 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2477 proc_getpid(p), p->p_comm,
2478 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2479 srfmp->vp->v_name));
2480 goto root_check;
2481 }
2482 goto after_root_check;
2483root_check:
2484#endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2485
2486 /* The shared cache file must be owned by root */
2487 VATTR_INIT(&va);
2488 VATTR_WANTED(&va, va_uid);
2489 error = vnode_getattr(vp: srfmp->vp, vap: &va, ctx: vfs_context_current());
2490 if (error) {
2491 SHARED_REGION_TRACE_ERROR(
2492 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2493 "vnode_getattr(%p) failed (error=%d)\n",
2494 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2495 proc_getpid(p), p->p_comm,
2496 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2497 srfmp->vp->v_name,
2498 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2499 error));
2500 goto done;
2501 }
2502 if (va.va_uid != 0) {
2503 SHARED_REGION_TRACE_ERROR(
2504 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2505 "owned by uid=%d instead of 0\n",
2506 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2507 proc_getpid(p), p->p_comm,
2508 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2509 srfmp->vp->v_name, va.va_uid));
2510 error = EPERM;
2511 goto done;
2512 }
2513
2514#if XNU_TARGET_OS_OSX && defined(__arm64__)
2515after_root_check:
2516#endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */
2517
2518#if CONFIG_CSR
2519 if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) {
2520 VATTR_INIT(&va);
2521 VATTR_WANTED(&va, va_flags);
2522 error = vnode_getattr(vp: srfmp->vp, vap: &va, ctx: vfs_context_current());
2523 if (error) {
2524 SHARED_REGION_TRACE_ERROR(
2525 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2526 "vnode_getattr(%p) failed (error=%d)\n",
2527 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2528 proc_getpid(p), p->p_comm,
2529 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2530 srfmp->vp->v_name,
2531 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2532 error));
2533 goto done;
2534 }
2535
2536 if (!(va.va_flags & SF_RESTRICTED)) {
2537 /*
2538 * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and
2539 * the shared cache file is NOT SIP-protected, so reject the
2540 * mapping request
2541 */
2542 SHARED_REGION_TRACE_ERROR(
2543 ("shared_region: %p [%d(%s)] map(%p:'%s'), "
2544 "vnode is not SIP-protected. \n",
2545 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2546 proc_getpid(p), p->p_comm,
2547 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2548 srfmp->vp->v_name));
2549 error = EPERM;
2550 goto done;
2551 }
2552 }
2553#else /* CONFIG_CSR */
2554
2555 /*
2556 * Devices without SIP/ROSP need to make sure that the shared cache
2557 * is either on the root volume or in the preboot cryptex volume.
2558 */
2559 assert(rdir_vp != NULL);
2560 if (srfmp->vp->v_mount != rdir_vp->v_mount) {
2561 vnode_t preboot_vp = NULL;
2562#if XNU_TARGET_OS_OSX
2563#define PREBOOT_CRYPTEX_PATH "/System/Volumes/Preboot/Cryptexes"
2564#else
2565#define PREBOOT_CRYPTEX_PATH "/private/preboot/Cryptexes"
2566#endif
2567 error = vnode_lookup(PREBOOT_CRYPTEX_PATH, 0, &preboot_vp, vfs_context_current());
2568 if (error || srfmp->vp->v_mount != preboot_vp->v_mount) {
2569 SHARED_REGION_TRACE_ERROR(
2570 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2571 "not on process' root volume nor preboot volume\n",
2572 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2573 proc_getpid(p), p->p_comm,
2574 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2575 srfmp->vp->v_name));
2576 error = EPERM;
2577 if (preboot_vp) {
2578 (void)vnode_put(preboot_vp);
2579 }
2580 goto done;
2581 } else if (preboot_vp) {
2582 (void)vnode_put(preboot_vp);
2583 }
2584 }
2585#endif /* CONFIG_CSR */
2586
2587 if (scdir_enforce) {
2588 char **expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2589 struct vnode *scdir_vp = NULL;
2590 for (expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path;
2591 *expected_scdir_path != NULL;
2592 expected_scdir_path++) {
2593 /* get vnode for expected_scdir_path */
2594 error = vnode_lookup(path: *expected_scdir_path, flags: 0, vpp: &scdir_vp, ctx: vfs_context_current());
2595 if (error) {
2596 SHARED_REGION_TRACE_ERROR(
2597 ("shared_region: %p [%d(%s)]: "
2598 "vnode_lookup(%s) failed (error=%d)\n",
2599 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2600 proc_getpid(p), p->p_comm,
2601 *expected_scdir_path, error));
2602 continue;
2603 }
2604
2605 /* check if parent is scdir_vp */
2606 assert(scdir_vp != NULL);
2607 if (vnode_parent(vp: srfmp->vp) == scdir_vp) {
2608 (void)vnode_put(vp: scdir_vp);
2609 scdir_vp = NULL;
2610 goto scdir_ok;
2611 }
2612 (void)vnode_put(vp: scdir_vp);
2613 scdir_vp = NULL;
2614 }
2615 /* nothing matches */
2616 SHARED_REGION_TRACE_ERROR(
2617 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2618 "shared cache file not in expected directory\n",
2619 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2620 proc_getpid(p), p->p_comm,
2621 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2622 srfmp->vp->v_name));
2623 error = EPERM;
2624 goto done;
2625 }
2626scdir_ok:
2627
2628 /* get vnode size */
2629 error = vnode_size(srfmp->vp, &fs, vfs_context_current());
2630 if (error) {
2631 SHARED_REGION_TRACE_ERROR(
2632 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2633 "vnode_size(%p) failed (error=%d)\n",
2634 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2635 proc_getpid(p), p->p_comm,
2636 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2637 srfmp->vp->v_name,
2638 (void *)VM_KERNEL_ADDRPERM(srfmp->vp), error));
2639 goto done;
2640 }
2641 srfmp->file_size = fs;
2642
2643 /* get the file's memory object handle */
2644 srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT);
2645 if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) {
2646 SHARED_REGION_TRACE_ERROR(
2647 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2648 "no memory object\n",
2649 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2650 proc_getpid(p), p->p_comm,
2651 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2652 srfmp->vp->v_name));
2653 error = EINVAL;
2654 goto done;
2655 }
2656
2657 /* check that the mappings are properly covered by code signatures */
2658 if (!cs_system_enforcement()) {
2659 /* code signing is not enforced: no need to check */
2660 } else {
2661 for (i = 0; i < srfmp->mappings_count; i++) {
2662 if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) {
2663 /* zero-filled mapping: not backed by the file */
2664 continue;
2665 }
2666 if (ubc_cs_is_range_codesigned(srfmp->vp,
2667 srfmp->mappings[i].sms_file_offset,
2668 srfmp->mappings[i].sms_size)) {
2669 /* this mapping is fully covered by code signatures */
2670 continue;
2671 }
2672 SHARED_REGION_TRACE_ERROR(
2673 ("shared_region: %p [%d(%s)] map(%p:'%s'): "
2674 "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] "
2675 "is not code-signed\n",
2676 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2677 proc_getpid(p), p->p_comm,
2678 (void *)VM_KERNEL_ADDRPERM(srfmp->vp),
2679 srfmp->vp->v_name,
2680 i, srfmp->mappings_count,
2681 srfmp->mappings[i].sms_address,
2682 srfmp->mappings[i].sms_size,
2683 srfmp->mappings[i].sms_file_offset,
2684 srfmp->mappings[i].sms_max_prot,
2685 srfmp->mappings[i].sms_init_prot));
2686 error = EINVAL;
2687 goto done;
2688 }
2689 }
2690 }
2691done:
2692 if (error != 0) {
2693 shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings: *sr_file_mappings, shared_region);
2694 *sr_file_mappings = NULL;
2695 *shared_region_ptr = NULL;
2696 }
2697 return error;
2698}
2699
2700/*
2701 * shared_region_map_np()
2702 *
2703 * This system call is intended for dyld.
2704 *
2705 * dyld uses this to map a shared cache file into a shared region.
2706 * This is usually done only the first time a shared cache is needed.
2707 * Subsequent processes will just use the populated shared region without
2708 * requiring any further setup.
2709 */
2710static int
2711_shared_region_map_and_slide(
2712 struct proc *p,
2713 uint32_t files_count,
2714 struct shared_file_np *files,
2715 uint32_t mappings_count,
2716 struct shared_file_mapping_slide_np *mappings)
2717{
2718 int error = 0;
2719 kern_return_t kr = KERN_SUCCESS;
2720 struct _sr_file_mappings *sr_file_mappings = NULL;
2721 struct vnode *rdir_vp = NULL;
2722 struct vm_shared_region *shared_region = NULL;
2723
2724 /*
2725 * Get a reference to the current proc's root dir.
2726 * Need this to prevent racing with chroot.
2727 */
2728 proc_fdlock(p);
2729 rdir_vp = p->p_fd.fd_rdir;
2730 if (rdir_vp == NULL) {
2731 rdir_vp = rootvnode;
2732 }
2733 assert(rdir_vp != NULL);
2734 vnode_get(rdir_vp);
2735 proc_fdunlock(p);
2736
2737 /*
2738 * Turn files, mappings into sr_file_mappings and other setup.
2739 */
2740 error = shared_region_map_and_slide_setup(p, files_count,
2741 files, mappings_count, mappings,
2742 sr_file_mappings: &sr_file_mappings, shared_region_ptr: &shared_region, rdir_vp);
2743 if (error != 0) {
2744 vnode_put(vp: rdir_vp);
2745 return error;
2746 }
2747
2748 /* map the file(s) into that shared region's submap */
2749 kr = vm_shared_region_map_file(shared_region, sr_mappings_count: files_count, sr_mappings: sr_file_mappings);
2750 if (kr != KERN_SUCCESS) {
2751 SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): "
2752 "vm_shared_region_map_file() failed kr=0x%x\n",
2753 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2754 proc_getpid(p), p->p_comm, kr));
2755 }
2756
2757 /* convert kern_return_t to errno */
2758 switch (kr) {
2759 case KERN_SUCCESS:
2760 error = 0;
2761 break;
2762 case KERN_INVALID_ADDRESS:
2763 error = EFAULT;
2764 break;
2765 case KERN_PROTECTION_FAILURE:
2766 error = EPERM;
2767 break;
2768 case KERN_NO_SPACE:
2769 error = ENOMEM;
2770 break;
2771 case KERN_FAILURE:
2772 case KERN_INVALID_ARGUMENT:
2773 default:
2774 error = EINVAL;
2775 break;
2776 }
2777
2778 /*
2779 * Mark that this process is now using split libraries.
2780 */
2781 if (error == 0 && (p->p_flag & P_NOSHLIB)) {
2782 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
2783 }
2784
2785 vnode_put(vp: rdir_vp);
2786 shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region);
2787
2788 SHARED_REGION_TRACE_DEBUG(
2789 ("shared_region: %p [%d(%s)] <- map\n",
2790 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2791 proc_getpid(p), p->p_comm));
2792
2793 return error;
2794}
2795
2796/*
2797 * Clean up part of _shared_region_map_and_slide()
2798 * It had to be broken out of _shared_region_map_and_slide() to
2799 * prevent compiler inlining from blowing out the stack.
2800 */
2801__attribute__((noinline))
2802static void
2803shared_region_map_and_slide_cleanup(
2804 struct proc *p,
2805 uint32_t files_count,
2806 struct _sr_file_mappings *sr_file_mappings,
2807 struct vm_shared_region *shared_region)
2808{
2809 struct _sr_file_mappings *srfmp;
2810 struct vnode_attr va;
2811
2812 if (sr_file_mappings != NULL) {
2813 for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) {
2814 if (srfmp->vp != NULL) {
2815 vnode_lock_spin(srfmp->vp);
2816 srfmp->vp->v_flag |= VSHARED_DYLD;
2817 vnode_unlock(srfmp->vp);
2818
2819 /* update the vnode's access time */
2820 if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) {
2821 VATTR_INIT(&va);
2822 nanotime(ts: &va.va_access_time);
2823 VATTR_SET_ACTIVE(&va, va_access_time);
2824 vnode_setattr(vp: srfmp->vp, vap: &va, ctx: vfs_context_current());
2825 }
2826
2827#if NAMEDSTREAMS
2828 /*
2829 * If the shared cache is compressed, it may
2830 * have a namedstream vnode instantiated for
2831 * for it. That namedstream vnode will also
2832 * have to be marked with VSHARED_DYLD.
2833 */
2834 if (vnode_hasnamedstreams(srfmp->vp)) {
2835 vnode_t svp;
2836 if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME,
2837 NS_OPEN, 0, vfs_context_kernel()) == 0) {
2838 vnode_lock_spin(svp);
2839 svp->v_flag |= VSHARED_DYLD;
2840 vnode_unlock(svp);
2841 vnode_put(vp: svp);
2842 }
2843 }
2844#endif /* NAMEDSTREAMS */
2845 /*
2846 * release the vnode...
2847 * ubc_map() still holds it for us in the non-error case
2848 */
2849 (void) vnode_put(vp: srfmp->vp);
2850 srfmp->vp = NULL;
2851 }
2852 if (srfmp->fp != NULL) {
2853 /* release the file descriptor */
2854 fp_drop(p, fd: srfmp->fd, fp: srfmp->fp, locked: 0);
2855 srfmp->fp = NULL;
2856 }
2857 }
2858 kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings);
2859 }
2860
2861 if (shared_region != NULL) {
2862 vm_shared_region_deallocate(shared_region);
2863 }
2864}
2865
2866
2867/*
2868 * For each file mapped, we may have mappings for:
2869 * TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA
2870 * so let's round up to 8 mappings per file.
2871 */
2872#define SFM_MAX (_SR_FILE_MAPPINGS_MAX_FILES * 8) /* max mapping structs allowed to pass in */
2873
2874/*
2875 * This is the new interface for setting up shared region mappings.
2876 *
2877 * The slide used for shared regions setup using this interface is done differently
2878 * from the old interface. The slide value passed in the shared_files_np represents
2879 * a max value. The kernel will choose a random value based on that, then use it
2880 * for all shared regions.
2881 */
2882#if defined (__x86_64__)
2883#define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK
2884#else
2885#define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK
2886#endif
2887
2888int
2889shared_region_map_and_slide_2_np(
2890 struct proc *p,
2891 struct shared_region_map_and_slide_2_np_args *uap,
2892 __unused int *retvalp)
2893{
2894 unsigned int files_count;
2895 struct shared_file_np *shared_files = NULL;
2896 unsigned int mappings_count;
2897 struct shared_file_mapping_slide_np *mappings = NULL;
2898 kern_return_t kr = KERN_SUCCESS;
2899
2900 files_count = uap->files_count;
2901 mappings_count = uap->mappings_count;
2902
2903 if (files_count == 0) {
2904 SHARED_REGION_TRACE_INFO(
2905 ("shared_region: %p [%d(%s)] map(): "
2906 "no files\n",
2907 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2908 proc_getpid(p), p->p_comm));
2909 kr = 0; /* no files to map: we're done ! */
2910 goto done;
2911 } else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) {
2912 shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK);
2913 if (shared_files == NULL) {
2914 kr = KERN_RESOURCE_SHORTAGE;
2915 goto done;
2916 }
2917 } else {
2918 SHARED_REGION_TRACE_ERROR(
2919 ("shared_region: %p [%d(%s)] map(): "
2920 "too many files (%d) max %d\n",
2921 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2922 proc_getpid(p), p->p_comm,
2923 files_count, _SR_FILE_MAPPINGS_MAX_FILES));
2924 kr = KERN_FAILURE;
2925 goto done;
2926 }
2927
2928 if (mappings_count == 0) {
2929 SHARED_REGION_TRACE_INFO(
2930 ("shared_region: %p [%d(%s)] map(): "
2931 "no mappings\n",
2932 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2933 proc_getpid(p), p->p_comm));
2934 kr = 0; /* no mappings: we're done ! */
2935 goto done;
2936 } else if (mappings_count <= SFM_MAX) {
2937 mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK);
2938 if (mappings == NULL) {
2939 kr = KERN_RESOURCE_SHORTAGE;
2940 goto done;
2941 }
2942 } else {
2943 SHARED_REGION_TRACE_ERROR(
2944 ("shared_region: %p [%d(%s)] map(): "
2945 "too many mappings (%d) max %d\n",
2946 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2947 proc_getpid(p), p->p_comm,
2948 mappings_count, SFM_MAX));
2949 kr = KERN_FAILURE;
2950 goto done;
2951 }
2952
2953 kr = shared_region_copyin(p, user_addr: uap->files, count: files_count, element_size: sizeof(shared_files[0]), kernel_data: shared_files);
2954 if (kr != KERN_SUCCESS) {
2955 goto done;
2956 }
2957
2958 kr = shared_region_copyin(p, user_addr: uap->mappings, count: mappings_count, element_size: sizeof(mappings[0]), kernel_data: mappings);
2959 if (kr != KERN_SUCCESS) {
2960 goto done;
2961 }
2962
2963 uint32_t max_slide = shared_files[0].sf_slide;
2964 uint32_t random_val;
2965 uint32_t slide_amount;
2966
2967 if (max_slide != 0) {
2968 read_random(buffer: &random_val, numBytes: sizeof random_val);
2969 slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK);
2970 } else {
2971 slide_amount = 0;
2972 }
2973#if DEVELOPMENT || DEBUG
2974 extern bool bootarg_disable_aslr;
2975 if (bootarg_disable_aslr) {
2976 slide_amount = 0;
2977 }
2978#endif /* DEVELOPMENT || DEBUG */
2979
2980 /*
2981 * Fix up the mappings to reflect the desired slide.
2982 */
2983 unsigned int f;
2984 unsigned int m = 0;
2985 unsigned int i;
2986 for (f = 0; f < files_count; ++f) {
2987 shared_files[f].sf_slide = slide_amount;
2988 for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) {
2989 if (m >= mappings_count) {
2990 SHARED_REGION_TRACE_ERROR(
2991 ("shared_region: %p [%d(%s)] map(): "
2992 "mapping count argument was too small\n",
2993 (void *)VM_KERNEL_ADDRPERM(current_thread()),
2994 proc_getpid(p), p->p_comm));
2995 kr = KERN_FAILURE;
2996 goto done;
2997 }
2998 mappings[m].sms_address += slide_amount;
2999 if (mappings[m].sms_slide_size != 0) {
3000 mappings[m].sms_slide_start += slide_amount;
3001 }
3002 }
3003 }
3004
3005 kr = _shared_region_map_and_slide(p, files_count, files: shared_files, mappings_count, mappings);
3006done:
3007 kfree_data(shared_files, files_count * sizeof(shared_files[0]));
3008 kfree_data(mappings, mappings_count * sizeof(mappings[0]));
3009 return kr;
3010}
3011
3012/*
3013 * A syscall for dyld to use to map data pages that need load time relocation fixups.
3014 * The fixups are performed by a custom pager during page-in, so the pages still appear
3015 * "clean" and hence are easily discarded under memory pressure. They can be re-paged-in
3016 * on demand later, all w/o using the compressor.
3017 *
3018 * Note these page are treated as MAP_PRIVATE. So if the application dirties any pages while
3019 * running, they are COW'd as normal.
3020 */
3021int
3022map_with_linking_np(
3023 struct proc *p,
3024 struct map_with_linking_np_args *uap,
3025 __unused int *retvalp)
3026{
3027 uint32_t region_count;
3028 uint32_t r;
3029 struct mwl_region *regions = NULL;
3030 struct mwl_region *rp;
3031 uint32_t link_info_size;
3032 void *link_info = NULL; /* starts with a struct mwl_info_hdr */
3033 struct mwl_info_hdr *info_hdr = NULL;
3034 uint64_t binds_size;
3035 int fd;
3036 struct fileproc *fp = NULL;
3037 struct vnode *vp = NULL;
3038 size_t file_size;
3039 off_t fs;
3040 struct vnode_attr va;
3041 memory_object_control_t file_control = NULL;
3042 int error;
3043 kern_return_t kr = KERN_SUCCESS;
3044
3045 /*
3046 * Check if dyld has told us it finished with this call.
3047 */
3048 if (p->p_disallow_map_with_linking) {
3049 printf("%s: [%d(%s)]: map__with_linking() was disabled\n",
3050 __func__, proc_getpid(p), p->p_comm);
3051 kr = KERN_FAILURE;
3052 goto done;
3053 }
3054
3055 /*
3056 * First we do some sanity checking on what dyld has passed us.
3057 */
3058 region_count = uap->region_count;
3059 link_info_size = uap->link_info_size;
3060 if (region_count == 0) {
3061 printf("%s: [%d(%s)]: region_count == 0\n",
3062 __func__, proc_getpid(p), p->p_comm);
3063 kr = KERN_FAILURE;
3064 goto done;
3065 }
3066 if (region_count > MWL_MAX_REGION_COUNT) {
3067 printf("%s: [%d(%s)]: region_count too big %d\n",
3068 __func__, proc_getpid(p), p->p_comm, region_count);
3069 kr = KERN_FAILURE;
3070 goto done;
3071 }
3072
3073 if (link_info_size <= MWL_MIN_LINK_INFO_SIZE) {
3074 printf("%s: [%d(%s)]: link_info_size too small\n",
3075 __func__, proc_getpid(p), p->p_comm);
3076 kr = KERN_FAILURE;
3077 goto done;
3078 }
3079 if (link_info_size >= MWL_MAX_LINK_INFO_SIZE) {
3080 printf("%s: [%d(%s)]: link_info_size too big %d\n",
3081 __func__, proc_getpid(p), p->p_comm, link_info_size);
3082 kr = KERN_FAILURE;
3083 goto done;
3084 }
3085
3086 /*
3087 * Allocate and copyin the regions and link info
3088 */
3089 regions = kalloc_data(region_count * sizeof(regions[0]), Z_WAITOK);
3090 if (regions == NULL) {
3091 printf("%s: [%d(%s)]: failed to allocate regions\n",
3092 __func__, proc_getpid(p), p->p_comm);
3093 kr = KERN_RESOURCE_SHORTAGE;
3094 goto done;
3095 }
3096 kr = shared_region_copyin(p, user_addr: uap->regions, count: region_count, element_size: sizeof(regions[0]), kernel_data: regions);
3097 if (kr != KERN_SUCCESS) {
3098 printf("%s: [%d(%s)]: failed to copyin regions kr=%d\n",
3099 __func__, proc_getpid(p), p->p_comm, kr);
3100 goto done;
3101 }
3102
3103 link_info = kalloc_data(link_info_size, Z_WAITOK);
3104 if (link_info == NULL) {
3105 printf("%s: [%d(%s)]: failed to allocate link_info\n",
3106 __func__, proc_getpid(p), p->p_comm);
3107 kr = KERN_RESOURCE_SHORTAGE;
3108 goto done;
3109 }
3110 kr = shared_region_copyin(p, user_addr: uap->link_info, count: 1, element_size: link_info_size, kernel_data: link_info);
3111 if (kr != KERN_SUCCESS) {
3112 printf("%s: [%d(%s)]: failed to copyin link_info kr=%d\n",
3113 __func__, proc_getpid(p), p->p_comm, kr);
3114 goto done;
3115 }
3116
3117 /*
3118 * Do some verification the data structures.
3119 */
3120 info_hdr = (struct mwl_info_hdr *)link_info;
3121 if (info_hdr->mwli_version != MWL_INFO_VERS) {
3122 printf("%s: [%d(%s)]: unrecognized mwli_version=%d\n",
3123 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_version);
3124 kr = KERN_FAILURE;
3125 goto done;
3126 }
3127
3128 if (info_hdr->mwli_binds_offset > link_info_size) {
3129 printf("%s: [%d(%s)]: mwli_binds_offset too large %d\n",
3130 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_offset);
3131 kr = KERN_FAILURE;
3132 goto done;
3133 }
3134
3135 /* some older devs have s/w page size > h/w page size, no need to support them */
3136 if (info_hdr->mwli_page_size != PAGE_SIZE) {
3137 /* no printf, since this is expected on some devices */
3138 kr = KERN_INVALID_ARGUMENT;
3139 goto done;
3140 }
3141
3142 binds_size = (uint64_t)info_hdr->mwli_binds_count *
3143 ((info_hdr->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8);
3144 if (binds_size > link_info_size - info_hdr->mwli_binds_offset) {
3145 printf("%s: [%d(%s)]: mwli_binds_count too large %d\n",
3146 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_count);
3147 kr = KERN_FAILURE;
3148 goto done;
3149 }
3150
3151 if (info_hdr->mwli_chains_offset > link_info_size) {
3152 printf("%s: [%d(%s)]: mwli_chains_offset too large %d\n",
3153 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_offset);
3154 kr = KERN_FAILURE;
3155 goto done;
3156 }
3157
3158
3159 /*
3160 * Ensure the chained starts in the link info and make sure the
3161 * segment info offsets are within bounds.
3162 */
3163 if (info_hdr->mwli_chains_size < sizeof(struct dyld_chained_starts_in_image)) {
3164 printf("%s: [%d(%s)]: mwli_chains_size too small %d\n",
3165 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3166 kr = KERN_FAILURE;
3167 goto done;
3168 }
3169 if (info_hdr->mwli_chains_size > link_info_size - info_hdr->mwli_chains_offset) {
3170 printf("%s: [%d(%s)]: mwli_chains_size too large %d\n",
3171 __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size);
3172 kr = KERN_FAILURE;
3173 goto done;
3174 }
3175
3176 /* Note that more verification of offsets is done in the pager itself */
3177
3178 /*
3179 * Ensure we've only been given one FD and verify valid protections.
3180 */
3181 fd = regions[0].mwlr_fd;
3182 for (r = 0; r < region_count; ++r) {
3183 if (regions[r].mwlr_fd != fd) {
3184 printf("%s: [%d(%s)]: mwlr_fd mismatch %d and %d\n",
3185 __func__, proc_getpid(p), p->p_comm, fd, regions[r].mwlr_fd);
3186 kr = KERN_FAILURE;
3187 goto done;
3188 }
3189
3190 /*
3191 * Only allow data mappings and not zero fill. Permit TPRO
3192 * mappings only when VM_PROT_READ | VM_PROT_WRITE.
3193 */
3194 if (regions[r].mwlr_protections & VM_PROT_EXECUTE) {
3195 printf("%s: [%d(%s)]: mwlr_protections EXECUTE not allowed\n",
3196 __func__, proc_getpid(p), p->p_comm);
3197 kr = KERN_FAILURE;
3198 goto done;
3199 }
3200 if (regions[r].mwlr_protections & VM_PROT_ZF) {
3201 printf("%s: [%d(%s)]: region %d, found VM_PROT_ZF not allowed\n",
3202 __func__, proc_getpid(p), p->p_comm, r);
3203 kr = KERN_FAILURE;
3204 goto done;
3205 }
3206 if ((regions[r].mwlr_protections & VM_PROT_TPRO) &&
3207 !(regions[r].mwlr_protections & VM_PROT_WRITE)) {
3208 printf("%s: [%d(%s)]: region %d, found VM_PROT_TPRO without VM_PROT_WRITE\n",
3209 __func__, proc_getpid(p), p->p_comm, r);
3210 kr = KERN_FAILURE;
3211 goto done;
3212 }
3213 }
3214
3215
3216 /* get file structure from file descriptor */
3217 error = fp_get_ftype(p, fd, ftype: DTYPE_VNODE, EINVAL, fpp: &fp);
3218 if (error) {
3219 printf("%s: [%d(%s)]: fp_get_ftype() failed, error %d\n",
3220 __func__, proc_getpid(p), p->p_comm, error);
3221 kr = KERN_FAILURE;
3222 goto done;
3223 }
3224
3225 /* We need at least read permission on the file */
3226 if (!(fp->fp_glob->fg_flag & FREAD)) {
3227 printf("%s: [%d(%s)]: not readable\n",
3228 __func__, proc_getpid(p), p->p_comm);
3229 kr = KERN_FAILURE;
3230 goto done;
3231 }
3232
3233 /* Get the vnode from file structure */
3234 vp = (struct vnode *)fp_get_data(fp);
3235 error = vnode_getwithref(vp);
3236 if (error) {
3237 printf("%s: [%d(%s)]: failed to get vnode, error %d\n",
3238 __func__, proc_getpid(p), p->p_comm, error);
3239 kr = KERN_FAILURE;
3240 vp = NULL; /* just to be sure */
3241 goto done;
3242 }
3243
3244 /* Make sure the vnode is a regular file */
3245 if (vp->v_type != VREG) {
3246 printf("%s: [%d(%s)]: vnode not VREG\n",
3247 __func__, proc_getpid(p), p->p_comm);
3248 kr = KERN_FAILURE;
3249 goto done;
3250 }
3251
3252 /* get vnode size */
3253 error = vnode_size(vp, &fs, vfs_context_current());
3254 if (error) {
3255 goto done;
3256 }
3257 file_size = fs;
3258
3259 /* get the file's memory object handle */
3260 file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
3261 if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
3262 printf("%s: [%d(%s)]: no memory object\n",
3263 __func__, proc_getpid(p), p->p_comm);
3264 kr = KERN_FAILURE;
3265 goto done;
3266 }
3267
3268 for (r = 0; r < region_count; ++r) {
3269 rp = &regions[r];
3270
3271#if CONFIG_MACF
3272 vm_prot_t prot = (rp->mwlr_protections & VM_PROT_ALL);
3273 error = mac_file_check_mmap(cred: vfs_context_ucred(ctx: vfs_context_current()),
3274 fg: fp->fp_glob, prot, MAP_FILE | MAP_PRIVATE | MAP_FIXED, file_pos: rp->mwlr_file_offset, maxprot: &prot);
3275 if (error) {
3276 printf("%s: [%d(%s)]: mac_file_check_mmap() failed, region %d, error %d\n",
3277 __func__, proc_getpid(p), p->p_comm, r, error);
3278 kr = KERN_FAILURE;
3279 goto done;
3280 }
3281#endif /* MAC */
3282
3283 /* check that the mappings are properly covered by code signatures */
3284 if (cs_system_enforcement()) {
3285 if (!ubc_cs_is_range_codesigned(vp, rp->mwlr_file_offset, rp->mwlr_size)) {
3286 printf("%s: [%d(%s)]: region %d, not code signed\n",
3287 __func__, proc_getpid(p), p->p_comm, r);
3288 kr = KERN_FAILURE;
3289 goto done;
3290 }
3291 }
3292 }
3293
3294 /* update the vnode's access time */
3295 if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) {
3296 VATTR_INIT(&va);
3297 nanotime(ts: &va.va_access_time);
3298 VATTR_SET_ACTIVE(&va, va_access_time);
3299 vnode_setattr(vp, vap: &va, ctx: vfs_context_current());
3300 }
3301
3302 /* get the VM to do the work */
3303 kr = vm_map_with_linking(task: proc_task(p), regions, region_cnt: region_count, link_info, link_info_size, file_control);
3304
3305done:
3306 if (fp != NULL) {
3307 /* release the file descriptor */
3308 fp_drop(p, fd, fp, locked: 0);
3309 }
3310 if (vp != NULL) {
3311 (void)vnode_put(vp);
3312 }
3313 if (regions != NULL) {
3314 kfree_data(regions, region_count * sizeof(regions[0]));
3315 }
3316 /* link info is used in the pager if things worked */
3317 if (link_info != NULL && kr != KERN_SUCCESS) {
3318 kfree_data(link_info, link_info_size);
3319 }
3320
3321 switch (kr) {
3322 case KERN_SUCCESS:
3323 return 0;
3324 case KERN_RESOURCE_SHORTAGE:
3325 return ENOMEM;
3326 default:
3327 return EINVAL;
3328 }
3329}
3330
3331#if DEBUG || DEVELOPMENT
3332SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count,
3333 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count, 0, "");
3334SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count_max,
3335 CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count_max, 0, "");
3336#endif /* DEBUG || DEVELOPMENT */
3337
3338/* sysctl overflow room */
3339
3340SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED,
3341 (int *) &page_size, 0, "vm page size");
3342
3343/* vm_page_free_target is provided as a makeshift solution for applications that want to
3344 * allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
3345 * reclaimed. It allows the app to calculate how much memory is free outside the free target. */
3346extern unsigned int vm_page_free_target;
3347SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED,
3348 &vm_page_free_target, 0, "Pageout daemon free target");
3349
3350SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
3351 &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator");
3352
3353static int
3354vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
3355{
3356#pragma unused(oidp, arg1, arg2)
3357 unsigned int page_free_wanted;
3358
3359 page_free_wanted = mach_vm_ctl_page_free_wanted();
3360 return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted));
3361}
3362SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
3363 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
3364 0, 0, vm_ctl_page_free_wanted, "I", "");
3365
3366extern unsigned int vm_page_purgeable_count;
3367SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3368 &vm_page_purgeable_count, 0, "Purgeable page count");
3369
3370extern unsigned int vm_page_purgeable_wired_count;
3371SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3372 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
3373
3374extern unsigned int vm_page_kern_lpage_count;
3375SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3376 &vm_page_kern_lpage_count, 0, "kernel used large pages");
3377
3378#if DEVELOPMENT || DEBUG
3379#if __ARM_MIXED_PAGE_SIZE__
3380static int vm_mixed_pagesize_supported = 1;
3381#else
3382static int vm_mixed_pagesize_supported = 0;
3383#endif /*__ARM_MIXED_PAGE_SIZE__ */
3384SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED,
3385 &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize");
3386
3387SCALABLE_COUNTER_DECLARE(vm_page_grab_count);
3388SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed");
3389SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED,
3390 &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed");
3391
3392SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED,
3393 &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count");
3394SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED,
3395 &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)");
3396SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED,
3397 &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)");
3398
3399SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3400 &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated");
3401SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3402 &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated");
3403SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3404 &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */
3405SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3406 &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed");
3407SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED,
3408 &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated");
3409SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED,
3410 &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */
3411#endif /* DEVELOPMENT || DEBUG */
3412
3413extern int madvise_free_debug;
3414SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
3415 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)");
3416extern int madvise_free_debug_sometimes;
3417SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug_sometimes, CTLFLAG_RW | CTLFLAG_LOCKED,
3418 &madvise_free_debug_sometimes, 0, "sometimes zero-fill on madvise(MADV_FREE*)");
3419
3420SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
3421 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
3422SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3423 &vm_page_stats_reusable.reusable_pages_success, "");
3424SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3425 &vm_page_stats_reusable.reusable_pages_failure, "");
3426SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3427 &vm_page_stats_reusable.reusable_pages_shared, "");
3428SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3429 &vm_page_stats_reusable.all_reusable_calls, "");
3430SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3431 &vm_page_stats_reusable.partial_reusable_calls, "");
3432SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3433 &vm_page_stats_reusable.reuse_pages_success, "");
3434SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3435 &vm_page_stats_reusable.reuse_pages_failure, "");
3436SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3437 &vm_page_stats_reusable.all_reuse_calls, "");
3438SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
3439 &vm_page_stats_reusable.partial_reuse_calls, "");
3440SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
3441 &vm_page_stats_reusable.can_reuse_success, "");
3442SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
3443 &vm_page_stats_reusable.can_reuse_failure, "");
3444SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED,
3445 &vm_page_stats_reusable.reusable_reclaimed, "");
3446SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED,
3447 &vm_page_stats_reusable.reusable_nonwritable, "");
3448SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3449 &vm_page_stats_reusable.reusable_shared, "");
3450SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
3451 &vm_page_stats_reusable.free_shared, "");
3452
3453
3454extern unsigned int vm_page_free_count, vm_page_speculative_count;
3455SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "");
3456SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "");
3457
3458extern unsigned int vm_page_cleaned_count;
3459SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size");
3460
3461extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count;
3462SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "");
3463SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "");
3464
3465/* pageout counts */
3466SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "");
3467SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "");
3468
3469SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "");
3470SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "");
3471SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3472SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "");
3473SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "");
3474SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "");
3475
3476SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_sharedcache, "");
3477SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache, "");
3478SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_realtime, "");
3479SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime, "");
3480extern unsigned int vm_page_realtime_count;
3481SYSCTL_UINT(_vm, OID_AUTO, page_realtime_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_realtime_count, 0, "");
3482extern int vm_pageout_protect_realtime;
3483SYSCTL_INT(_vm, OID_AUTO, pageout_protect_realtime, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_protect_realtime, 0, "");
3484
3485/* counts of pages prefaulted when entering a memory object */
3486extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout;
3487SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "");
3488SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "");
3489
3490#if defined (__x86_64__)
3491extern unsigned int vm_clump_promote_threshold;
3492SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes");
3493#if DEVELOPMENT || DEBUG
3494extern unsigned long vm_clump_stats[];
3495SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page");
3496SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages");
3497SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages");
3498SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages");
3499SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages");
3500SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages");
3501SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages");
3502SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages");
3503SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages");
3504SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages");
3505SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages");
3506SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages");
3507SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages");
3508SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages");
3509SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages");
3510SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages");
3511extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes;
3512SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations");
3513SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions");
3514SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages");
3515SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head");
3516#endif /* if DEVELOPMENT || DEBUG */
3517#endif /* #if defined (__x86_64__) */
3518
3519#if CONFIG_SECLUDED_MEMORY
3520
3521SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "");
3522extern unsigned int vm_page_secluded_target;
3523extern unsigned int vm_page_secluded_count;
3524extern unsigned int vm_page_secluded_count_free;
3525extern unsigned int vm_page_secluded_count_inuse;
3526extern unsigned int vm_page_secluded_count_over_target;
3527SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "");
3528SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "");
3529SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "");
3530SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "");
3531SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "");
3532
3533extern struct vm_page_secluded_data vm_page_secluded;
3534SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "");
3535SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "");
3536SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "");
3537SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "");
3538SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "");
3539SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_realtime, 0, "");
3540SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "");
3541SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "");
3542SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "");
3543
3544#endif /* CONFIG_SECLUDED_MEMORY */
3545
3546#pragma mark Deferred Reclaim
3547
3548#if CONFIG_DEFERRED_RECLAIM
3549
3550#if DEVELOPMENT || DEBUG
3551/*
3552 * VM reclaim testing
3553 */
3554extern bool vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid_t pid);
3555
3556static int
3557sysctl_vm_reclaim_drain_async_queue SYSCTL_HANDLER_ARGS
3558{
3559#pragma unused(arg1, arg2)
3560 int error = EINVAL, pid = 0;
3561 /*
3562 * Only send on write
3563 */
3564 error = sysctl_handle_int(oidp, &pid, 0, req);
3565 if (error || !req->newptr) {
3566 return error;
3567 }
3568
3569 bool success = vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid);
3570 if (success) {
3571 error = 0;
3572 }
3573
3574 return error;
3575}
3576
3577SYSCTL_PROC(_vm, OID_AUTO, reclaim_drain_async_queue,
3578 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0,
3579 &sysctl_vm_reclaim_drain_async_queue, "I", "");
3580
3581
3582extern uint64_t vm_reclaim_max_threshold;
3583extern uint64_t vm_reclaim_trim_divisor;
3584
3585SYSCTL_ULONG(_vm, OID_AUTO, reclaim_max_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_max_threshold, "");
3586SYSCTL_ULONG(_vm, OID_AUTO, reclaim_trim_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_trim_divisor, "");
3587#endif /* DEVELOPMENT || DEBUG */
3588
3589#endif /* CONFIG_DEFERRED_RECLAIM */
3590
3591#include <kern/thread.h>
3592#include <sys/user.h>
3593
3594void vm_pageout_io_throttle(void);
3595
3596void
3597vm_pageout_io_throttle(void)
3598{
3599 struct uthread *uthread = current_uthread();
3600
3601 /*
3602 * thread is marked as a low priority I/O type
3603 * and the I/O we issued while in this cleaning operation
3604 * collided with normal I/O operations... we'll
3605 * delay in order to mitigate the impact of this
3606 * task on the normal operation of the system
3607 */
3608
3609 if (uthread->uu_lowpri_window) {
3610 throttle_lowpri_io(sleep_amount: 1);
3611 }
3612}
3613
3614int
3615vm_pressure_monitor(
3616 __unused struct proc *p,
3617 struct vm_pressure_monitor_args *uap,
3618 int *retval)
3619{
3620 kern_return_t kr;
3621 uint32_t pages_reclaimed;
3622 uint32_t pages_wanted;
3623
3624 kr = mach_vm_pressure_monitor(
3625 wait_for_pressure: (boolean_t) uap->wait_for_pressure,
3626 nsecs_monitored: uap->nsecs_monitored,
3627 pages_reclaimed_p: (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
3628 pages_wanted_p: &pages_wanted);
3629
3630 switch (kr) {
3631 case KERN_SUCCESS:
3632 break;
3633 case KERN_ABORTED:
3634 return EINTR;
3635 default:
3636 return EINVAL;
3637 }
3638
3639 if (uap->pages_reclaimed) {
3640 if (copyout((void *)&pages_reclaimed,
3641 uap->pages_reclaimed,
3642 sizeof(pages_reclaimed)) != 0) {
3643 return EFAULT;
3644 }
3645 }
3646
3647 *retval = (int) pages_wanted;
3648 return 0;
3649}
3650
3651int
3652kas_info(struct proc *p,
3653 struct kas_info_args *uap,
3654 int *retval __unused)
3655{
3656#ifndef CONFIG_KAS_INFO
3657 (void)p;
3658 (void)uap;
3659 return ENOTSUP;
3660#else /* CONFIG_KAS_INFO */
3661 int selector = uap->selector;
3662 user_addr_t valuep = uap->value;
3663 user_addr_t sizep = uap->size;
3664 user_size_t size, rsize;
3665 int error;
3666
3667 if (!kauth_cred_issuser(cred: kauth_cred_get())) {
3668 return EPERM;
3669 }
3670
3671#if CONFIG_MACF
3672 error = mac_system_check_kas_info(cred: kauth_cred_get(), selector);
3673 if (error) {
3674 return error;
3675 }
3676#endif
3677
3678 if (IS_64BIT_PROCESS(p)) {
3679 user64_size_t size64;
3680 error = copyin(sizep, &size64, sizeof(size64));
3681 size = (user_size_t)size64;
3682 } else {
3683 user32_size_t size32;
3684 error = copyin(sizep, &size32, sizeof(size32));
3685 size = (user_size_t)size32;
3686 }
3687 if (error) {
3688 return error;
3689 }
3690
3691 switch (selector) {
3692 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR:
3693 {
3694 uint64_t slide = vm_kernel_slide;
3695
3696 if (sizeof(slide) != size) {
3697 return EINVAL;
3698 }
3699
3700 error = copyout(&slide, valuep, sizeof(slide));
3701 if (error) {
3702 return error;
3703 }
3704 rsize = size;
3705 }
3706 break;
3707 case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR:
3708 {
3709 uint32_t i;
3710 kernel_mach_header_t *mh = &_mh_execute_header;
3711 struct load_command *cmd;
3712 cmd = (struct load_command*) &mh[1];
3713 uint64_t *bases;
3714 rsize = mh->ncmds * sizeof(uint64_t);
3715
3716 /*
3717 * Return the size if no data was passed
3718 */
3719 if (valuep == 0) {
3720 break;
3721 }
3722
3723 if (rsize > size) {
3724 return EINVAL;
3725 }
3726
3727 bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO);
3728
3729 for (i = 0; i < mh->ncmds; i++) {
3730 if (cmd->cmd == LC_SEGMENT_KERNEL) {
3731 __IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd);
3732 bases[i] = (uint64_t)sg->vmaddr;
3733 }
3734 cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize);
3735 }
3736
3737 error = copyout(bases, valuep, rsize);
3738
3739 kfree_data(bases, rsize);
3740
3741 if (error) {
3742 return error;
3743 }
3744 }
3745 break;
3746 case KAS_INFO_SPTM_TEXT_SLIDE_SELECTOR:
3747 case KAS_INFO_TXM_TEXT_SLIDE_SELECTOR:
3748 {
3749#if CONFIG_SPTM
3750 const uint64_t slide =
3751 (selector == KAS_INFO_SPTM_TEXT_SLIDE_SELECTOR) ? vm_sptm_offsets.slide : vm_txm_offsets.slide;
3752#else
3753 const uint64_t slide = 0;
3754#endif
3755
3756 if (sizeof(slide) != size) {
3757 return EINVAL;
3758 }
3759
3760 error = copyout(&slide, valuep, sizeof(slide));
3761 if (error) {
3762 return error;
3763 }
3764 rsize = size;
3765 }
3766 break;
3767 default:
3768 return EINVAL;
3769 }
3770
3771 if (IS_64BIT_PROCESS(p)) {
3772 user64_size_t size64 = (user64_size_t)rsize;
3773 error = copyout(&size64, sizep, sizeof(size64));
3774 } else {
3775 user32_size_t size32 = (user32_size_t)rsize;
3776 error = copyout(&size32, sizep, sizeof(size32));
3777 }
3778
3779 return error;
3780#endif /* CONFIG_KAS_INFO */
3781}
3782
3783#if __has_feature(ptrauth_calls)
3784/*
3785 * Generate a random pointer signing key that isn't 0.
3786 */
3787uint64_t
3788generate_jop_key(void)
3789{
3790 uint64_t key;
3791
3792 do {
3793 read_random(&key, sizeof key);
3794 } while (key == 0);
3795 return key;
3796}
3797#endif /* __has_feature(ptrauth_calls) */
3798
3799
3800#pragma clang diagnostic push
3801#pragma clang diagnostic ignored "-Wcast-qual"
3802#pragma clang diagnostic ignored "-Wunused-function"
3803
3804static void
3805asserts()
3806{
3807 static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long));
3808 static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long));
3809}
3810
3811SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "");
3812SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "");
3813#pragma clang diagnostic pop
3814
3815extern uint32_t vm_page_pages;
3816SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "");
3817
3818extern uint32_t vm_page_busy_absent_skipped;
3819SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "");
3820
3821extern uint32_t vm_page_upl_tainted;
3822SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "");
3823
3824extern uint32_t vm_page_iopl_tainted;
3825SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "");
3826
3827#if __arm64__ && (DEVELOPMENT || DEBUG)
3828extern int vm_footprint_suspend_allowed;
3829SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "");
3830
3831extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend);
3832static int
3833sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS
3834{
3835#pragma unused(oidp, arg1, arg2)
3836 int error = 0;
3837 int new_value;
3838
3839 if (req->newptr == USER_ADDR_NULL) {
3840 return 0;
3841 }
3842 error = SYSCTL_IN(req, &new_value, sizeof(int));
3843 if (error) {
3844 return error;
3845 }
3846 if (!vm_footprint_suspend_allowed) {
3847 if (new_value != 0) {
3848 /* suspends are not allowed... */
3849 return 0;
3850 }
3851 /* ... but let resumes proceed */
3852 }
3853 DTRACE_VM2(footprint_suspend,
3854 vm_map_t, current_map(),
3855 int, new_value);
3856
3857 pmap_footprint_suspend(current_map(), new_value);
3858
3859 return 0;
3860}
3861SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend,
3862 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED,
3863 0, 0, &sysctl_vm_footprint_suspend, "I", "");
3864#endif /* __arm64__ && (DEVELOPMENT || DEBUG) */
3865
3866extern uint64_t vm_map_corpse_footprint_count;
3867extern uint64_t vm_map_corpse_footprint_size_avg;
3868extern uint64_t vm_map_corpse_footprint_size_max;
3869extern uint64_t vm_map_corpse_footprint_full;
3870extern uint64_t vm_map_corpse_footprint_no_buf;
3871SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count,
3872 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "");
3873SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg,
3874 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "");
3875SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max,
3876 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "");
3877SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full,
3878 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "");
3879SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf,
3880 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "");
3881
3882#if CODE_SIGNING_MONITOR
3883extern uint64_t vm_cs_defer_to_csm;
3884extern uint64_t vm_cs_defer_to_csm_not;
3885SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm,
3886 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm, "");
3887SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm_not,
3888 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm_not, "");
3889#endif /* CODE_SIGNING_MONITOR */
3890
3891extern uint64_t shared_region_pager_copied;
3892extern uint64_t shared_region_pager_slid;
3893extern uint64_t shared_region_pager_slid_error;
3894extern uint64_t shared_region_pager_reclaimed;
3895SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied,
3896 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "");
3897SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid,
3898 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "");
3899SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error,
3900 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "");
3901SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed,
3902 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "");
3903extern int shared_region_destroy_delay;
3904SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay,
3905 CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "");
3906
3907#if MACH_ASSERT
3908extern int pmap_ledgers_panic_leeway;
3909SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "");
3910#endif /* MACH_ASSERT */
3911
3912
3913extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_count;
3914extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_size;
3915extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_max;
3916extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart;
3917extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_error;
3918extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_count;
3919extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_size;
3920extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_max;
3921extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart;
3922extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_error;
3923extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_count;
3924extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_size;
3925extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_max;
3926SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count,
3927 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_count, "");
3928SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size,
3929 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_size, "");
3930SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max,
3931 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_max, "");
3932SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart,
3933 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_restart, "");
3934SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error,
3935 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_error, "");
3936SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count,
3937 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_count, "");
3938SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size,
3939 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_size, "");
3940SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max,
3941 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_max, "");
3942SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart,
3943 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_restart, "");
3944SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error,
3945 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_error, "");
3946SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count,
3947 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_count, "");
3948SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size,
3949 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_size, "");
3950SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max,
3951 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_max, "");
3952
3953extern int vm_protect_privileged_from_untrusted;
3954SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted,
3955 CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "");
3956extern uint64_t vm_copied_on_read;
3957SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read,
3958 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "");
3959
3960extern int vm_shared_region_count;
3961extern int vm_shared_region_peak;
3962SYSCTL_INT(_vm, OID_AUTO, shared_region_count,
3963 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "");
3964SYSCTL_INT(_vm, OID_AUTO, shared_region_peak,
3965 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "");
3966#if DEVELOPMENT || DEBUG
3967extern unsigned int shared_region_pagers_resident_count;
3968SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count,
3969 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "");
3970extern unsigned int shared_region_pagers_resident_peak;
3971SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak,
3972 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "");
3973extern int shared_region_pager_count;
3974SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count,
3975 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "");
3976#if __has_feature(ptrauth_calls)
3977extern int shared_region_key_count;
3978SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count,
3979 CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "");
3980extern int vm_shared_region_reslide_count;
3981SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count,
3982 CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "");
3983#endif /* __has_feature(ptrauth_calls) */
3984#endif /* DEVELOPMENT || DEBUG */
3985
3986#if MACH_ASSERT
3987extern int debug4k_filter;
3988SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "");
3989extern int debug4k_panic_on_terminate;
3990SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "");
3991extern int debug4k_panic_on_exception;
3992SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "");
3993extern int debug4k_panic_on_misaligned_sharing;
3994SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "");
3995#endif /* MACH_ASSERT */
3996
3997extern uint64_t vm_map_set_size_limit_count;
3998extern uint64_t vm_map_set_data_limit_count;
3999extern uint64_t vm_map_enter_RLIMIT_AS_count;
4000extern uint64_t vm_map_enter_RLIMIT_DATA_count;
4001SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "");
4002SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "");
4003SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "");
4004SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "");
4005
4006extern uint64_t vm_fault_resilient_media_initiate;
4007extern uint64_t vm_fault_resilient_media_retry;
4008extern uint64_t vm_fault_resilient_media_proceed;
4009extern uint64_t vm_fault_resilient_media_release;
4010extern uint64_t vm_fault_resilient_media_abort1;
4011extern uint64_t vm_fault_resilient_media_abort2;
4012SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "");
4013SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "");
4014SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "");
4015SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "");
4016SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "");
4017SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "");
4018#if MACH_ASSERT
4019extern int vm_fault_resilient_media_inject_error1_rate;
4020extern int vm_fault_resilient_media_inject_error1;
4021extern int vm_fault_resilient_media_inject_error2_rate;
4022extern int vm_fault_resilient_media_inject_error2;
4023extern int vm_fault_resilient_media_inject_error3_rate;
4024extern int vm_fault_resilient_media_inject_error3;
4025SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "");
4026SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "");
4027SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "");
4028SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "");
4029SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "");
4030SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "");
4031#endif /* MACH_ASSERT */
4032
4033extern uint64_t pmap_query_page_info_retries;
4034SYSCTL_QUAD(_vm, OID_AUTO, pmap_query_page_info_retries, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_query_page_info_retries, "");
4035
4036/*
4037 * A sysctl which causes all existing shared regions to become stale. They
4038 * will no longer be used by anything new and will be torn down as soon as
4039 * the last existing user exits. A write of non-zero value causes that to happen.
4040 * This should only be used by launchd, so we check that this is initproc.
4041 */
4042static int
4043shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
4044{
4045 unsigned int value = 0;
4046 int changed = 0;
4047 int error = sysctl_io_number(req, bigValue: 0, valueSize: sizeof(value), pValue: &value, changed: &changed);
4048 if (error || !changed) {
4049 return error;
4050 }
4051 if (current_proc() != initproc) {
4052 return EPERM;
4053 }
4054
4055 vm_shared_region_pivot();
4056
4057 return 0;
4058}
4059
4060SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot,
4061 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED,
4062 0, 0, shared_region_pivot, "I", "");
4063
4064extern uint64_t vm_object_shadow_forced;
4065extern uint64_t vm_object_shadow_skipped;
4066SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
4067 &vm_object_shadow_forced, "");
4068SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_skipped, CTLFLAG_RD | CTLFLAG_LOCKED,
4069 &vm_object_shadow_skipped, "");
4070
4071SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED,
4072 &vmtc_total, 0, "total text page corruptions detected");
4073
4074
4075#if DEBUG || DEVELOPMENT
4076/*
4077 * A sysctl that can be used to corrupt a text page with an illegal instruction.
4078 * Used for testing text page self healing.
4079 */
4080extern kern_return_t vm_corrupt_text_addr(uintptr_t);
4081static int
4082corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
4083{
4084 uint64_t value = 0;
4085 int error = sysctl_handle_quad(oidp, &value, 0, req);
4086 if (error || !req->newptr) {
4087 return error;
4088 }
4089
4090 if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) {
4091 return 0;
4092 } else {
4093 return EINVAL;
4094 }
4095}
4096
4097SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr,
4098 CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
4099 0, 0, corrupt_text_addr, "-", "");
4100#endif /* DEBUG || DEVELOPMENT */
4101
4102#if CONFIG_MAP_RANGES
4103/*
4104 * vm.malloc_ranges
4105 *
4106 * space-separated list of <left:right> hexadecimal addresses.
4107 */
4108static int
4109vm_map_malloc_ranges SYSCTL_HANDLER_ARGS
4110{
4111 vm_map_t map = current_map();
4112 struct mach_vm_range r1, r2;
4113 char str[20 * 4];
4114 int len;
4115
4116 if (vm_map_get_user_range(map, UMEM_RANGE_ID_DEFAULT, &r1)) {
4117 return ENOENT;
4118 }
4119 if (vm_map_get_user_range(map, UMEM_RANGE_ID_HEAP, &r2)) {
4120 return ENOENT;
4121 }
4122
4123 len = scnprintf(str, sizeof(str), "0x%llx:0x%llx 0x%llx:0x%llx",
4124 r1.max_address, r2.min_address,
4125 r2.max_address, get_map_max(map));
4126
4127 return SYSCTL_OUT(req, str, len);
4128}
4129
4130SYSCTL_PROC(_vm, OID_AUTO, malloc_ranges,
4131 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
4132 0, 0, &vm_map_malloc_ranges, "A", "");
4133
4134#if DEBUG || DEVELOPMENT
4135static int
4136vm_map_user_range_default SYSCTL_HANDLER_ARGS
4137{
4138#pragma unused(arg1, arg2, oidp)
4139 struct mach_vm_range range;
4140
4141 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_DEFAULT, &range)
4142 != KERN_SUCCESS) {
4143 return EINVAL;
4144 }
4145
4146 return SYSCTL_OUT(req, &range, sizeof(range));
4147}
4148
4149static int
4150vm_map_user_range_heap SYSCTL_HANDLER_ARGS
4151{
4152#pragma unused(arg1, arg2, oidp)
4153 struct mach_vm_range range;
4154
4155 if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_HEAP, &range)
4156 != KERN_SUCCESS) {
4157 return EINVAL;
4158 }
4159
4160 return SYSCTL_OUT(req, &range, sizeof(range));
4161}
4162
4163/*
4164 * A sysctl that can be used to return ranges for the current VM map.
4165 * Used for testing VM ranges.
4166 */
4167SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_default, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4168 0, 0, &vm_map_user_range_default, "S,mach_vm_range", "");
4169SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_heap, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
4170 0, 0, &vm_map_user_range_heap, "S,mach_vm_range", "");
4171
4172#endif /* DEBUG || DEVELOPMENT */
4173#endif /* CONFIG_MAP_RANGES */
4174
4175#if DEBUG || DEVELOPMENT
4176#endif /* DEBUG || DEVELOPMENT */
4177
4178extern uint64_t vm_map_range_overflows_count;
4179SYSCTL_QUAD(_vm, OID_AUTO, map_range_overflows_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_range_overflows_count, "");
4180extern boolean_t vm_map_range_overflows_log;
4181SYSCTL_INT(_vm, OID_AUTO, map_range_oveflows_log, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_range_overflows_log, 0, "");
4182
4183extern uint64_t c_seg_filled_no_contention;
4184extern uint64_t c_seg_filled_contention;
4185extern clock_sec_t c_seg_filled_contention_sec_max;
4186extern clock_nsec_t c_seg_filled_contention_nsec_max;
4187SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "");
4188SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "");
4189SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "");
4190SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "");
4191#if (XNU_TARGET_OS_OSX && __arm64__)
4192extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */
4193extern int c_process_major_yield_after; /* yield after moving ? segments */
4194extern uint64_t c_process_major_reports;
4195extern clock_sec_t c_process_major_max_sec;
4196extern clock_nsec_t c_process_major_max_nsec;
4197extern uint32_t c_process_major_peak_segcount;
4198SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "");
4199SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "");
4200SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "");
4201SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "");
4202SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "");
4203SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "");
4204#endif /* (XNU_TARGET_OS_OSX && __arm64__) */
4205
4206#if DEVELOPMENT || DEBUG
4207extern int panic_object_not_alive;
4208SYSCTL_INT(_vm, OID_AUTO, panic_object_not_alive, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &panic_object_not_alive, 0, "");
4209#endif /* DEVELOPMENT || DEBUG */
4210
4211#if MACH_ASSERT
4212extern int fbdp_no_panic;
4213SYSCTL_INT(_vm, OID_AUTO, fbdp_no_panic, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &fbdp_no_panic, 0, "");
4214#endif /* MACH_ASSERT */
4215
4216