1 | /* |
2 | * Copyright (c) 2000-2021 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * Mach Operating System |
30 | * Copyright (c) 1987 Carnegie-Mellon University |
31 | * All rights reserved. The CMU software License Agreement specifies |
32 | * the terms and conditions for use and redistribution. |
33 | */ |
34 | /* |
35 | * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce |
36 | * support for mandatory and extensible security protections. This notice |
37 | * is included in support of clause 2.2 (b) of the Apple Public License, |
38 | * Version 2.0. |
39 | */ |
40 | #include <vm/vm_options.h> |
41 | |
42 | #include <kern/ecc.h> |
43 | #include <kern/task.h> |
44 | #include <kern/thread.h> |
45 | #include <kern/debug.h> |
46 | #include <kern/extmod_statistics.h> |
47 | #include <mach/mach_traps.h> |
48 | #include <mach/port.h> |
49 | #include <mach/sdt.h> |
50 | #include <mach/task.h> |
51 | #include <mach/task_access.h> |
52 | #include <mach/task_special_ports.h> |
53 | #include <mach/time_value.h> |
54 | #include <mach/vm_map.h> |
55 | #include <mach/vm_param.h> |
56 | #include <mach/vm_prot.h> |
57 | #include <machine/machine_routines.h> |
58 | |
59 | #include <sys/file_internal.h> |
60 | #include <sys/param.h> |
61 | #include <sys/systm.h> |
62 | #include <sys/dir.h> |
63 | #include <sys/namei.h> |
64 | #include <sys/proc_internal.h> |
65 | #include <sys/kauth.h> |
66 | #include <sys/vm.h> |
67 | #include <sys/file.h> |
68 | #include <sys/vnode_internal.h> |
69 | #include <sys/mount.h> |
70 | #include <sys/xattr.h> |
71 | #include <sys/trace.h> |
72 | #include <sys/kernel.h> |
73 | #include <sys/ubc_internal.h> |
74 | #include <sys/user.h> |
75 | #include <sys/syslog.h> |
76 | #include <sys/stat.h> |
77 | #include <sys/sysproto.h> |
78 | #include <sys/mman.h> |
79 | #include <sys/sysctl.h> |
80 | #include <sys/cprotect.h> |
81 | #include <sys/kpi_socket.h> |
82 | #include <sys/kas_info.h> |
83 | #include <sys/socket.h> |
84 | #include <sys/socketvar.h> |
85 | #include <sys/random.h> |
86 | #include <sys/code_signing.h> |
87 | #if NECP |
88 | #include <net/necp.h> |
89 | #endif /* NECP */ |
90 | #if SKYWALK |
91 | #include <skywalk/os_channel.h> |
92 | #endif /* SKYWALK */ |
93 | |
94 | #include <security/audit/audit.h> |
95 | #include <security/mac.h> |
96 | #include <bsm/audit_kevents.h> |
97 | |
98 | #include <kern/kalloc.h> |
99 | #include <vm/vm_map.h> |
100 | #include <vm/vm_kern.h> |
101 | #include <vm/vm_pageout.h> |
102 | |
103 | #include <mach/shared_region.h> |
104 | #include <vm/vm_shared_region.h> |
105 | |
106 | #include <vm/vm_dyld_pager.h> |
107 | |
108 | #include <vm/vm_protos.h> |
109 | |
110 | #include <sys/kern_memorystatus.h> |
111 | #include <sys/kern_memorystatus_freeze.h> |
112 | #include <sys/proc_internal.h> |
113 | |
114 | #include <mach-o/fixup-chains.h> |
115 | |
116 | #if CONFIG_MACF |
117 | #include <security/mac_framework.h> |
118 | #endif |
119 | |
120 | #include <kern/bits.h> |
121 | |
122 | #if CONFIG_CSR |
123 | #include <sys/csr.h> |
124 | #endif /* CONFIG_CSR */ |
125 | #include <sys/trust_caches.h> |
126 | #include <libkern/amfi/amfi.h> |
127 | #include <IOKit/IOBSD.h> |
128 | |
129 | #if VM_MAP_DEBUG_APPLE_PROTECT |
130 | SYSCTL_INT(_vm, OID_AUTO, map_debug_apple_protect, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_apple_protect, 0, "" ); |
131 | #endif /* VM_MAP_DEBUG_APPLE_PROTECT */ |
132 | |
133 | #if VM_MAP_DEBUG_FOURK |
134 | SYSCTL_INT(_vm, OID_AUTO, map_debug_fourk, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_debug_fourk, 0, "" ); |
135 | #endif /* VM_MAP_DEBUG_FOURK */ |
136 | |
137 | #if DEVELOPMENT || DEBUG |
138 | |
139 | static int |
140 | sysctl_kmem_alloc_contig SYSCTL_HANDLER_ARGS |
141 | { |
142 | #pragma unused(arg1, arg2) |
143 | vm_offset_t kaddr; |
144 | kern_return_t kr; |
145 | int error = 0; |
146 | int size = 0; |
147 | |
148 | error = sysctl_handle_int(oidp, &size, 0, req); |
149 | if (error || !req->newptr) { |
150 | return error; |
151 | } |
152 | |
153 | kr = kmem_alloc_contig(kernel_map, &kaddr, (vm_size_t)size, |
154 | 0, 0, 0, KMA_DATA, VM_KERN_MEMORY_IOKIT); |
155 | |
156 | if (kr == KERN_SUCCESS) { |
157 | kmem_free(kernel_map, kaddr, size); |
158 | } |
159 | |
160 | return error; |
161 | } |
162 | |
163 | SYSCTL_PROC(_vm, OID_AUTO, kmem_alloc_contig, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, |
164 | 0, 0, &sysctl_kmem_alloc_contig, "I" , "" ); |
165 | |
166 | extern int vm_region_footprint; |
167 | SYSCTL_INT(_vm, OID_AUTO, region_footprint, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED, &vm_region_footprint, 0, "" ); |
168 | |
169 | static int |
170 | sysctl_kmem_gobj_stats SYSCTL_HANDLER_ARGS |
171 | { |
172 | #pragma unused(arg1, arg2, oidp) |
173 | kmem_gobj_stats stats = kmem_get_gobj_stats(); |
174 | |
175 | return SYSCTL_OUT(req, &stats, sizeof(stats)); |
176 | } |
177 | |
178 | SYSCTL_PROC(_vm, OID_AUTO, sysctl_kmem_gobj_stats, |
179 | CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED, |
180 | 0, 0, &sysctl_kmem_gobj_stats, "S,kmem_gobj_stats" , "" ); |
181 | |
182 | #endif /* DEVELOPMENT || DEBUG */ |
183 | |
184 | static int |
185 | SYSCTL_HANDLER_ARGS |
186 | { |
187 | #pragma unused(arg1, arg2, oidp) |
188 | int error = 0; |
189 | int value; |
190 | |
191 | value = task_self_region_footprint(); |
192 | error = SYSCTL_OUT(req, &value, sizeof(int)); |
193 | if (error) { |
194 | return error; |
195 | } |
196 | |
197 | if (!req->newptr) { |
198 | return 0; |
199 | } |
200 | |
201 | error = SYSCTL_IN(req, &value, sizeof(int)); |
202 | if (error) { |
203 | return error; |
204 | } |
205 | task_self_region_footprint_set(newval: value); |
206 | return 0; |
207 | } |
208 | SYSCTL_PROC(_vm, OID_AUTO, self_region_footprint, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_footprint, "I" , "" ); |
209 | |
210 | static int |
211 | sysctl_vm_self_region_page_size SYSCTL_HANDLER_ARGS |
212 | { |
213 | #pragma unused(arg1, arg2, oidp) |
214 | int error = 0; |
215 | int value; |
216 | |
217 | value = (1 << thread_self_region_page_shift()); |
218 | error = SYSCTL_OUT(req, &value, sizeof(int)); |
219 | if (error) { |
220 | return error; |
221 | } |
222 | |
223 | if (!req->newptr) { |
224 | return 0; |
225 | } |
226 | |
227 | error = SYSCTL_IN(req, &value, sizeof(int)); |
228 | if (error) { |
229 | return error; |
230 | } |
231 | |
232 | if (value != 0 && value != 4096 && value != 16384) { |
233 | return EINVAL; |
234 | } |
235 | |
236 | #if !__ARM_MIXED_PAGE_SIZE__ |
237 | if (value != vm_map_page_size(current_map())) { |
238 | return EINVAL; |
239 | } |
240 | #endif /* !__ARM_MIXED_PAGE_SIZE__ */ |
241 | |
242 | thread_self_region_page_shift_set(pgshift: bit_first(bitmap: value)); |
243 | return 0; |
244 | } |
245 | SYSCTL_PROC(_vm, OID_AUTO, self_region_page_size, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_vm_self_region_page_size, "I" , "" ); |
246 | |
247 | |
248 | #if DEVELOPMENT || DEBUG |
249 | extern int panic_on_unsigned_execute; |
250 | SYSCTL_INT(_vm, OID_AUTO, panic_on_unsigned_execute, CTLFLAG_RW | CTLFLAG_LOCKED, &panic_on_unsigned_execute, 0, "" ); |
251 | |
252 | extern int vm_log_xnu_user_debug; |
253 | SYSCTL_INT(_vm, OID_AUTO, log_xnu_user_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_log_xnu_user_debug, 0, "" ); |
254 | #endif /* DEVELOPMENT || DEBUG */ |
255 | |
256 | extern int cs_executable_create_upl; |
257 | extern int cs_executable_wire; |
258 | SYSCTL_INT(_vm, OID_AUTO, cs_executable_create_upl, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_create_upl, 0, "" ); |
259 | SYSCTL_INT(_vm, OID_AUTO, cs_executable_wire, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_executable_wire, 0, "" ); |
260 | |
261 | extern int ; |
262 | extern int ; |
263 | extern unsigned int ; |
264 | SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count, 0, "" ); |
265 | SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_count_mapped, CTLFLAG_RD | CTLFLAG_LOCKED, &apple_protect_pager_count_mapped, 0, "" ); |
266 | SYSCTL_UINT(_vm, OID_AUTO, apple_protect_pager_cache_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_cache_limit, 0, "" ); |
267 | |
268 | #if DEVELOPMENT || DEBUG |
269 | extern int radar_20146450; |
270 | SYSCTL_INT(_vm, OID_AUTO, radar_20146450, CTLFLAG_RW | CTLFLAG_LOCKED, &radar_20146450, 0, "" ); |
271 | |
272 | extern int macho_printf; |
273 | SYSCTL_INT(_vm, OID_AUTO, macho_printf, CTLFLAG_RW | CTLFLAG_LOCKED, &macho_printf, 0, "" ); |
274 | |
275 | extern int apple_protect_pager_data_request_debug; |
276 | SYSCTL_INT(_vm, OID_AUTO, apple_protect_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &apple_protect_pager_data_request_debug, 0, "" ); |
277 | |
278 | #if __arm64__ |
279 | /* These are meant to support the page table accounting unit test. */ |
280 | extern unsigned int arm_hardware_page_size; |
281 | extern unsigned int arm_pt_desc_size; |
282 | extern unsigned int arm_pt_root_size; |
283 | extern unsigned int inuse_user_tteroot_count; |
284 | extern unsigned int inuse_kernel_tteroot_count; |
285 | extern unsigned int inuse_user_ttepages_count; |
286 | extern unsigned int inuse_kernel_ttepages_count; |
287 | extern unsigned int inuse_user_ptepages_count; |
288 | extern unsigned int inuse_kernel_ptepages_count; |
289 | SYSCTL_UINT(_vm, OID_AUTO, native_hw_pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_hardware_page_size, 0, "" ); |
290 | SYSCTL_UINT(_vm, OID_AUTO, arm_pt_desc_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_desc_size, 0, "" ); |
291 | SYSCTL_UINT(_vm, OID_AUTO, arm_pt_root_size, CTLFLAG_RD | CTLFLAG_LOCKED, &arm_pt_root_size, 0, "" ); |
292 | SYSCTL_UINT(_vm, OID_AUTO, user_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_tteroot_count, 0, "" ); |
293 | SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_tteroot_count, 0, "" ); |
294 | SYSCTL_UINT(_vm, OID_AUTO, user_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ttepages_count, 0, "" ); |
295 | SYSCTL_UINT(_vm, OID_AUTO, kernel_tte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ttepages_count, 0, "" ); |
296 | SYSCTL_UINT(_vm, OID_AUTO, user_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_user_ptepages_count, 0, "" ); |
297 | SYSCTL_UINT(_vm, OID_AUTO, kernel_pte_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &inuse_kernel_ptepages_count, 0, "" ); |
298 | #if !CONFIG_SPTM |
299 | extern unsigned int free_page_size_tt_count; |
300 | extern unsigned int free_two_page_size_tt_count; |
301 | extern unsigned int free_tt_count; |
302 | SYSCTL_UINT(_vm, OID_AUTO, free_1page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_page_size_tt_count, 0, "" ); |
303 | SYSCTL_UINT(_vm, OID_AUTO, free_2page_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_two_page_size_tt_count, 0, "" ); |
304 | SYSCTL_UINT(_vm, OID_AUTO, free_tte_root, CTLFLAG_RD | CTLFLAG_LOCKED, &free_tt_count, 0, "" ); |
305 | #endif |
306 | #if DEVELOPMENT || DEBUG |
307 | extern unsigned long pmap_asid_flushes; |
308 | SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_flushes, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_flushes, "" ); |
309 | extern unsigned long pmap_asid_hits; |
310 | SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_hits, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_hits, "" ); |
311 | extern unsigned long pmap_asid_misses; |
312 | SYSCTL_ULONG(_vm, OID_AUTO, pmap_asid_misses, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_asid_misses, "" ); |
313 | #endif |
314 | #endif /* __arm64__ */ |
315 | |
316 | #if __arm64__ |
317 | extern int fourk_pager_data_request_debug; |
318 | SYSCTL_INT(_vm, OID_AUTO, fourk_pager_data_request_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &fourk_pager_data_request_debug, 0, "" ); |
319 | #endif /* __arm64__ */ |
320 | #endif /* DEVELOPMENT || DEBUG */ |
321 | |
322 | SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, "" ); |
323 | SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, "" ); |
324 | SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, "" ); |
325 | SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, "" ); |
326 | SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, "" ); |
327 | SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, "" ); |
328 | SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, "" ); |
329 | SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, "" ); |
330 | SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, "" ); |
331 | #if VM_SCAN_FOR_SHADOW_CHAIN |
332 | static int vm_shadow_max_enabled = 0; /* Disabled by default */ |
333 | extern int proc_shadow_max(void); |
334 | static int |
335 | vm_shadow_max SYSCTL_HANDLER_ARGS |
336 | { |
337 | #pragma unused(arg1, arg2, oidp) |
338 | int value = 0; |
339 | |
340 | if (vm_shadow_max_enabled) { |
341 | value = proc_shadow_max(); |
342 | } |
343 | |
344 | return SYSCTL_OUT(req, &value, sizeof(value)); |
345 | } |
346 | SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, |
347 | 0, 0, &vm_shadow_max, "I" , "" ); |
348 | |
349 | SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, "" ); |
350 | |
351 | #endif /* VM_SCAN_FOR_SHADOW_CHAIN */ |
352 | |
353 | SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "" ); |
354 | |
355 | __attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__( |
356 | mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor); |
357 | /* |
358 | * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c |
359 | */ |
360 | |
361 | #if DEVELOPMENT || DEBUG |
362 | extern int allow_stack_exec, allow_data_exec; |
363 | |
364 | SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "" ); |
365 | SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "" ); |
366 | |
367 | #endif /* DEVELOPMENT || DEBUG */ |
368 | |
369 | static const char *prot_values[] = { |
370 | "none" , |
371 | "read-only" , |
372 | "write-only" , |
373 | "read-write" , |
374 | "execute-only" , |
375 | "read-execute" , |
376 | "write-execute" , |
377 | "read-write-execute" |
378 | }; |
379 | |
380 | void |
381 | log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot) |
382 | { |
383 | printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n" , |
384 | current_proc()->p_comm, proc_getpid(current_proc()), vaddr, prot_values[prot & VM_PROT_ALL]); |
385 | } |
386 | |
387 | /* |
388 | * shared_region_unnest_logging: level of logging of unnesting events |
389 | * 0 - no logging |
390 | * 1 - throttled logging of unexpected unnesting events (default) |
391 | * 2 - unthrottled logging of unexpected unnesting events |
392 | * 3+ - unthrottled logging of all unnesting events |
393 | */ |
394 | int shared_region_unnest_logging = 1; |
395 | |
396 | SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED, |
397 | &shared_region_unnest_logging, 0, "" ); |
398 | |
399 | int vm_shared_region_unnest_log_interval = 10; |
400 | int shared_region_unnest_log_count_threshold = 5; |
401 | |
402 | |
403 | #if XNU_TARGET_OS_OSX |
404 | |
405 | #if defined (__x86_64__) |
406 | static int scdir_enforce = 1; |
407 | #else /* defined (__x86_64__) */ |
408 | static int scdir_enforce = 0; /* AOT caches live elsewhere */ |
409 | #endif /* defined (__x86_64__) */ |
410 | |
411 | static char *scdir_path[] = { |
412 | "/System/Library/dyld/" , |
413 | "/System/Volumes/Preboot/Cryptexes/OS/System/Library/dyld" , |
414 | "/System/Cryptexes/OS/System/Library/dyld" , |
415 | NULL |
416 | }; |
417 | |
418 | #else /* XNU_TARGET_OS_OSX */ |
419 | |
420 | static int scdir_enforce = 0; |
421 | static char *scdir_path[] = { |
422 | "/System/Library/Caches/com.apple.dyld/" , |
423 | "/private/preboot/Cryptexes/OS/System/Library/Caches/com.apple.dyld" , |
424 | "/System/Cryptexes/OS/System/Library/Caches/com.apple.dyld" , |
425 | NULL |
426 | }; |
427 | |
428 | #endif /* XNU_TARGET_OS_OSX */ |
429 | |
430 | static char *driverkit_scdir_path[] = { |
431 | "/System/DriverKit/System/Library/dyld/" , |
432 | #if XNU_TARGET_OS_OSX |
433 | "/System/Volumes/Preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld" , |
434 | #else |
435 | "/private/preboot/Cryptexes/OS/System/DriverKit/System/Library/dyld" , |
436 | #endif /* XNU_TARGET_OS_OSX */ |
437 | "/System/Cryptexes/OS/System/DriverKit/System/Library/dyld" , |
438 | NULL |
439 | }; |
440 | |
441 | #ifndef SECURE_KERNEL |
442 | static int sysctl_scdir_enforce SYSCTL_HANDLER_ARGS |
443 | { |
444 | #if CONFIG_CSR |
445 | if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) { |
446 | printf("Failed attempt to set vm.enforce_shared_cache_dir sysctl\n" ); |
447 | return EPERM; |
448 | } |
449 | #endif /* CONFIG_CSR */ |
450 | return sysctl_handle_int(oidp, arg1, arg2, req); |
451 | } |
452 | |
453 | SYSCTL_PROC(_vm, OID_AUTO, enforce_shared_cache_dir, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, sysctl_scdir_enforce, "I" , "" ); |
454 | #endif |
455 | |
456 | /* These log rate throttling state variables aren't thread safe, but |
457 | * are sufficient unto the task. |
458 | */ |
459 | static int64_t last_unnest_log_time = 0; |
460 | static int shared_region_unnest_log_count = 0; |
461 | |
462 | void |
463 | log_unnest_badness( |
464 | vm_map_t m, |
465 | vm_map_offset_t s, |
466 | vm_map_offset_t e, |
467 | boolean_t is_nested_map, |
468 | vm_map_offset_t lowest_unnestable_addr) |
469 | { |
470 | struct timeval tv; |
471 | |
472 | if (shared_region_unnest_logging == 0) { |
473 | return; |
474 | } |
475 | |
476 | if (shared_region_unnest_logging <= 2 && |
477 | is_nested_map && |
478 | s >= lowest_unnestable_addr) { |
479 | /* |
480 | * Unnesting of writable map entries is fine. |
481 | */ |
482 | return; |
483 | } |
484 | |
485 | if (shared_region_unnest_logging <= 1) { |
486 | microtime(tv: &tv); |
487 | if ((tv.tv_sec - last_unnest_log_time) < |
488 | vm_shared_region_unnest_log_interval) { |
489 | if (shared_region_unnest_log_count++ > |
490 | shared_region_unnest_log_count_threshold) { |
491 | return; |
492 | } |
493 | } else { |
494 | last_unnest_log_time = tv.tv_sec; |
495 | shared_region_unnest_log_count = 0; |
496 | } |
497 | } |
498 | |
499 | DTRACE_VM4(log_unnest_badness, |
500 | vm_map_t, m, |
501 | vm_map_offset_t, s, |
502 | vm_map_offset_t, e, |
503 | vm_map_offset_t, lowest_unnestable_addr); |
504 | printf("%s[%d] triggered unnest of range 0x%qx->0x%qx of DYLD shared region in VM map %p. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n" , current_proc()->p_comm, proc_getpid(current_proc()), (uint64_t)s, (uint64_t)e, (void *) VM_KERNEL_ADDRPERM(m)); |
505 | } |
506 | |
507 | uint64_t |
508 | (void) |
509 | { |
510 | uint64_t pages_purged; |
511 | |
512 | pages_purged = 0; |
513 | pages_purged += apple_protect_pager_purge_all(); |
514 | pages_purged += shared_region_pager_purge_all(); |
515 | pages_purged += dyld_pager_purge_all(); |
516 | #if DEVELOPMENT || DEBUG |
517 | printf("%s:%d pages purged: %llu\n" , __FUNCTION__, __LINE__, pages_purged); |
518 | #endif /* DEVELOPMENT || DEBUG */ |
519 | return pages_purged; |
520 | } |
521 | |
522 | int |
523 | useracc( |
524 | user_addr_t addr, |
525 | user_size_t len, |
526 | int prot) |
527 | { |
528 | vm_map_t map; |
529 | |
530 | map = current_map(); |
531 | return vm_map_check_protection( |
532 | map, |
533 | vm_map_trunc_page(addr, |
534 | vm_map_page_mask(map)), |
535 | vm_map_round_page(addr + len, |
536 | vm_map_page_mask(map)), |
537 | protection: prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE); |
538 | } |
539 | |
540 | int |
541 | vslock( |
542 | user_addr_t addr, |
543 | user_size_t len) |
544 | { |
545 | kern_return_t kret; |
546 | vm_map_t map; |
547 | |
548 | map = current_map(); |
549 | kret = vm_map_wire_kernel(map, |
550 | vm_map_trunc_page(addr, |
551 | vm_map_page_mask(map)), |
552 | vm_map_round_page(addr + len, |
553 | vm_map_page_mask(map)), |
554 | VM_PROT_READ | VM_PROT_WRITE, VM_KERN_MEMORY_BSD, |
555 | FALSE); |
556 | |
557 | switch (kret) { |
558 | case KERN_SUCCESS: |
559 | return 0; |
560 | case KERN_INVALID_ADDRESS: |
561 | case KERN_NO_SPACE: |
562 | return ENOMEM; |
563 | case KERN_PROTECTION_FAILURE: |
564 | return EACCES; |
565 | default: |
566 | return EINVAL; |
567 | } |
568 | } |
569 | |
570 | int |
571 | vsunlock( |
572 | user_addr_t addr, |
573 | user_size_t len, |
574 | __unused int dirtied) |
575 | { |
576 | #if FIXME /* [ */ |
577 | pmap_t pmap; |
578 | vm_page_t pg; |
579 | vm_map_offset_t vaddr; |
580 | ppnum_t paddr; |
581 | #endif /* FIXME ] */ |
582 | kern_return_t kret; |
583 | vm_map_t map; |
584 | |
585 | map = current_map(); |
586 | |
587 | #if FIXME /* [ */ |
588 | if (dirtied) { |
589 | pmap = get_task_pmap(current_task()); |
590 | for (vaddr = vm_map_trunc_page(addr, PAGE_MASK); |
591 | vaddr < vm_map_round_page(addr + len, PAGE_MASK); |
592 | vaddr += PAGE_SIZE) { |
593 | paddr = pmap_find_phys(pmap, vaddr); |
594 | pg = PHYS_TO_VM_PAGE(paddr); |
595 | vm_page_set_modified(pg); |
596 | } |
597 | } |
598 | #endif /* FIXME ] */ |
599 | #ifdef lint |
600 | dirtied++; |
601 | #endif /* lint */ |
602 | kret = vm_map_unwire(map, |
603 | vm_map_trunc_page(addr, |
604 | vm_map_page_mask(map)), |
605 | vm_map_round_page(addr + len, |
606 | vm_map_page_mask(map)), |
607 | FALSE); |
608 | switch (kret) { |
609 | case KERN_SUCCESS: |
610 | return 0; |
611 | case KERN_INVALID_ADDRESS: |
612 | case KERN_NO_SPACE: |
613 | return ENOMEM; |
614 | case KERN_PROTECTION_FAILURE: |
615 | return EACCES; |
616 | default: |
617 | return EINVAL; |
618 | } |
619 | } |
620 | |
621 | int |
622 | subyte( |
623 | user_addr_t addr, |
624 | int byte) |
625 | { |
626 | char character; |
627 | |
628 | character = (char)byte; |
629 | return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1; |
630 | } |
631 | |
632 | int |
633 | suibyte( |
634 | user_addr_t addr, |
635 | int byte) |
636 | { |
637 | char character; |
638 | |
639 | character = (char)byte; |
640 | return copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1; |
641 | } |
642 | |
643 | int |
644 | fubyte(user_addr_t addr) |
645 | { |
646 | unsigned char byte; |
647 | |
648 | if (copyin(addr, (void *) &byte, sizeof(char))) { |
649 | return -1; |
650 | } |
651 | return byte; |
652 | } |
653 | |
654 | int |
655 | fuibyte(user_addr_t addr) |
656 | { |
657 | unsigned char byte; |
658 | |
659 | if (copyin(addr, (void *) &(byte), sizeof(char))) { |
660 | return -1; |
661 | } |
662 | return byte; |
663 | } |
664 | |
665 | int |
666 | suword( |
667 | user_addr_t addr, |
668 | long word) |
669 | { |
670 | return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1; |
671 | } |
672 | |
673 | long |
674 | fuword(user_addr_t addr) |
675 | { |
676 | long word = 0; |
677 | |
678 | if (copyin(addr, (void *) &word, sizeof(int))) { |
679 | return -1; |
680 | } |
681 | return word; |
682 | } |
683 | |
684 | /* suiword and fuiword are the same as suword and fuword, respectively */ |
685 | |
686 | int |
687 | suiword( |
688 | user_addr_t addr, |
689 | long word) |
690 | { |
691 | return copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1; |
692 | } |
693 | |
694 | long |
695 | fuiword(user_addr_t addr) |
696 | { |
697 | long word = 0; |
698 | |
699 | if (copyin(addr, (void *) &word, sizeof(int))) { |
700 | return -1; |
701 | } |
702 | return word; |
703 | } |
704 | |
705 | /* |
706 | * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the |
707 | * fetching and setting of process-sized size_t and pointer values. |
708 | */ |
709 | int |
710 | sulong(user_addr_t addr, int64_t word) |
711 | { |
712 | if (IS_64BIT_PROCESS(current_proc())) { |
713 | return copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1; |
714 | } else { |
715 | return suiword(addr, word: (long)word); |
716 | } |
717 | } |
718 | |
719 | int64_t |
720 | fulong(user_addr_t addr) |
721 | { |
722 | int64_t longword; |
723 | |
724 | if (IS_64BIT_PROCESS(current_proc())) { |
725 | if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) { |
726 | return -1; |
727 | } |
728 | return longword; |
729 | } else { |
730 | return (int64_t)fuiword(addr); |
731 | } |
732 | } |
733 | |
734 | int |
735 | suulong(user_addr_t addr, uint64_t uword) |
736 | { |
737 | if (IS_64BIT_PROCESS(current_proc())) { |
738 | return copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1; |
739 | } else { |
740 | return suiword(addr, word: (uint32_t)uword); |
741 | } |
742 | } |
743 | |
744 | uint64_t |
745 | fuulong(user_addr_t addr) |
746 | { |
747 | uint64_t ulongword; |
748 | |
749 | if (IS_64BIT_PROCESS(current_proc())) { |
750 | if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) { |
751 | return -1ULL; |
752 | } |
753 | return ulongword; |
754 | } else { |
755 | return (uint64_t)fuiword(addr); |
756 | } |
757 | } |
758 | |
759 | int |
760 | swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval) |
761 | { |
762 | return ENOTSUP; |
763 | } |
764 | |
765 | /* |
766 | * pid_for_task |
767 | * |
768 | * Find the BSD process ID for the Mach task associated with the given Mach port |
769 | * name |
770 | * |
771 | * Parameters: args User argument descriptor (see below) |
772 | * |
773 | * Indirect parameters: args->t Mach port name |
774 | * args->pid Process ID (returned value; see below) |
775 | * |
776 | * Returns: KERL_SUCCESS Success |
777 | * KERN_FAILURE Not success |
778 | * |
779 | * Implicit returns: args->pid Process ID |
780 | * |
781 | */ |
782 | kern_return_t |
783 | pid_for_task( |
784 | struct pid_for_task_args *args) |
785 | { |
786 | mach_port_name_t t = args->t; |
787 | user_addr_t pid_addr = args->pid; |
788 | proc_t p; |
789 | task_t t1; |
790 | int pid = -1; |
791 | kern_return_t err = KERN_SUCCESS; |
792 | |
793 | AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK); |
794 | AUDIT_ARG(mach_port1, t); |
795 | |
796 | t1 = port_name_to_task_name(name: t); |
797 | |
798 | if (t1 == TASK_NULL) { |
799 | err = KERN_FAILURE; |
800 | goto pftout; |
801 | } else { |
802 | p = get_bsdtask_info(t1); |
803 | if (p) { |
804 | pid = proc_pid(p); |
805 | err = KERN_SUCCESS; |
806 | } else if (task_is_a_corpse(task: t1)) { |
807 | pid = task_pid(task: t1); |
808 | err = KERN_SUCCESS; |
809 | } else { |
810 | err = KERN_FAILURE; |
811 | } |
812 | } |
813 | task_deallocate(t1); |
814 | pftout: |
815 | AUDIT_ARG(pid, pid); |
816 | (void) copyout((char *) &pid, pid_addr, sizeof(int)); |
817 | AUDIT_MACH_SYSCALL_EXIT(err); |
818 | return err; |
819 | } |
820 | |
821 | /* |
822 | * |
823 | * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self |
824 | * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication |
825 | * |
826 | */ |
827 | static int tfp_policy = KERN_TFP_POLICY_DEFAULT; |
828 | |
829 | /* |
830 | * Routine: task_for_pid_posix_check |
831 | * Purpose: |
832 | * Verify that the current process should be allowed to |
833 | * get the target process's task port. This is only |
834 | * permitted if: |
835 | * - The current process is root |
836 | * OR all of the following are true: |
837 | * - The target process's real, effective, and saved uids |
838 | * are the same as the current proc's euid, |
839 | * - The target process's group set is a subset of the |
840 | * calling process's group set, and |
841 | * - The target process hasn't switched credentials. |
842 | * |
843 | * Returns: TRUE: permitted |
844 | * FALSE: denied |
845 | */ |
846 | static int |
847 | task_for_pid_posix_check(proc_t target) |
848 | { |
849 | kauth_cred_t targetcred, mycred; |
850 | bool checkcredentials; |
851 | uid_t myuid; |
852 | int allowed; |
853 | |
854 | /* No task_for_pid on bad targets */ |
855 | if (target->p_stat == SZOMB) { |
856 | return FALSE; |
857 | } |
858 | |
859 | mycred = kauth_cred_get(); |
860 | myuid = kauth_cred_getuid(cred: mycred); |
861 | |
862 | /* If we're running as root, the check passes */ |
863 | if (kauth_cred_issuser(cred: mycred)) { |
864 | return TRUE; |
865 | } |
866 | |
867 | /* We're allowed to get our own task port */ |
868 | if (target == current_proc()) { |
869 | return TRUE; |
870 | } |
871 | |
872 | /* |
873 | * Under DENY, only root can get another proc's task port, |
874 | * so no more checks are needed. |
875 | */ |
876 | if (tfp_policy == KERN_TFP_POLICY_DENY) { |
877 | return FALSE; |
878 | } |
879 | |
880 | targetcred = kauth_cred_proc_ref(procp: target); |
881 | allowed = TRUE; |
882 | |
883 | checkcredentials = !proc_is_third_party_debuggable_driver(p: target); |
884 | |
885 | if (checkcredentials) { |
886 | /* Do target's ruid, euid, and saved uid match my euid? */ |
887 | if ((kauth_cred_getuid(cred: targetcred) != myuid) || |
888 | (kauth_cred_getruid(cred: targetcred) != myuid) || |
889 | (kauth_cred_getsvuid(cred: targetcred) != myuid)) { |
890 | allowed = FALSE; |
891 | goto out; |
892 | } |
893 | /* Are target's groups a subset of my groups? */ |
894 | if (kauth_cred_gid_subset(cred1: targetcred, cred2: mycred, resultp: &allowed) || |
895 | allowed == 0) { |
896 | allowed = FALSE; |
897 | goto out; |
898 | } |
899 | } |
900 | |
901 | /* Has target switched credentials? */ |
902 | if (target->p_flag & P_SUGID) { |
903 | allowed = FALSE; |
904 | goto out; |
905 | } |
906 | |
907 | out: |
908 | kauth_cred_unref(&targetcred); |
909 | return allowed; |
910 | } |
911 | |
912 | /* |
913 | * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__ |
914 | * |
915 | * Description: Waits for the user space daemon to respond to the request |
916 | * we made. Function declared non inline to be visible in |
917 | * stackshots and spindumps as well as debugging. |
918 | */ |
919 | __attribute__((noinline)) int |
920 | __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__( |
921 | mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid, mach_task_flavor_t flavor) |
922 | { |
923 | return check_task_access_with_flavor(task_access_port, calling_pid, calling_gid, target_pid, flavor); |
924 | } |
925 | |
926 | /* |
927 | * Routine: task_for_pid |
928 | * Purpose: |
929 | * Get the task port for another "process", named by its |
930 | * process ID on the same host as "target_task". |
931 | * |
932 | * Only permitted to privileged processes, or processes |
933 | * with the same user ID. |
934 | * |
935 | * Note: if pid == 0, an error is return no matter who is calling. |
936 | * |
937 | * XXX This should be a BSD system call, not a Mach trap!!! |
938 | */ |
939 | kern_return_t |
940 | task_for_pid( |
941 | struct task_for_pid_args *args) |
942 | { |
943 | mach_port_name_t target_tport = args->target_tport; |
944 | int pid = args->pid; |
945 | user_addr_t task_addr = args->t; |
946 | proc_t p = PROC_NULL; |
947 | task_t t1 = TASK_NULL; |
948 | task_t task = TASK_NULL; |
949 | mach_port_name_t tret = MACH_PORT_NULL; |
950 | ipc_port_t tfpport = MACH_PORT_NULL; |
951 | void * sright = NULL; |
952 | int error = 0; |
953 | boolean_t is_current_proc = FALSE; |
954 | struct proc_ident pident = {0}; |
955 | |
956 | AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID); |
957 | AUDIT_ARG(pid, pid); |
958 | AUDIT_ARG(mach_port1, target_tport); |
959 | |
960 | /* Always check if pid == 0 */ |
961 | if (pid == 0) { |
962 | (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); |
963 | AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); |
964 | return KERN_FAILURE; |
965 | } |
966 | |
967 | t1 = port_name_to_task(target_tport); |
968 | if (t1 == TASK_NULL) { |
969 | (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); |
970 | AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); |
971 | return KERN_FAILURE; |
972 | } |
973 | |
974 | |
975 | p = proc_find(pid); |
976 | if (p == PROC_NULL) { |
977 | error = KERN_FAILURE; |
978 | goto tfpout; |
979 | } |
980 | pident = proc_ident(p); |
981 | is_current_proc = (p == current_proc()); |
982 | |
983 | #if CONFIG_AUDIT |
984 | AUDIT_ARG(process, p); |
985 | #endif |
986 | |
987 | if (!(task_for_pid_posix_check(target: p))) { |
988 | error = KERN_FAILURE; |
989 | goto tfpout; |
990 | } |
991 | |
992 | if (proc_task(p) == TASK_NULL) { |
993 | error = KERN_SUCCESS; |
994 | goto tfpout; |
995 | } |
996 | |
997 | /* |
998 | * Grab a task reference and drop the proc reference as the proc ref |
999 | * shouldn't be held accross upcalls. |
1000 | */ |
1001 | task = proc_task(p); |
1002 | task_reference(task); |
1003 | |
1004 | proc_rele(p); |
1005 | p = PROC_NULL; |
1006 | |
1007 | /* IPC is not active on the task until after `exec_resettextvp` has been called. |
1008 | * We don't want to call into MAC hooks until we know that this has occured, otherwise |
1009 | * AMFI and others will read uninitialized fields from the csproc |
1010 | */ |
1011 | if (!task_is_ipc_active(task)) { |
1012 | error = KERN_FAILURE; |
1013 | goto tfpout; |
1014 | } |
1015 | |
1016 | #if CONFIG_MACF |
1017 | error = mac_proc_check_get_task(cred: kauth_cred_get(), pident: &pident, TASK_FLAVOR_CONTROL); |
1018 | if (error) { |
1019 | error = KERN_FAILURE; |
1020 | goto tfpout; |
1021 | } |
1022 | #endif |
1023 | |
1024 | /* If we aren't root and target's task access port is set... */ |
1025 | if (!kauth_cred_issuser(cred: kauth_cred_get()) && |
1026 | !is_current_proc && |
1027 | (task_get_task_access_port(task, &tfpport) == 0) && |
1028 | (tfpport != IPC_PORT_NULL)) { |
1029 | if (tfpport == IPC_PORT_DEAD) { |
1030 | error = KERN_PROTECTION_FAILURE; |
1031 | goto tfpout; |
1032 | } |
1033 | |
1034 | /* Call up to the task access server */ |
1035 | error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(task_access_port: tfpport, |
1036 | calling_pid: proc_selfpid(), calling_gid: kauth_getgid(), target_pid: pid, TASK_FLAVOR_CONTROL); |
1037 | |
1038 | if (error != MACH_MSG_SUCCESS) { |
1039 | if (error == MACH_RCV_INTERRUPTED) { |
1040 | error = KERN_ABORTED; |
1041 | } else { |
1042 | error = KERN_FAILURE; |
1043 | } |
1044 | goto tfpout; |
1045 | } |
1046 | } |
1047 | |
1048 | /* Grant task port access */ |
1049 | extmod_statistics_incr_task_for_pid(target: task); |
1050 | |
1051 | /* this reference will be consumed during conversion */ |
1052 | task_reference(task); |
1053 | if (task == current_task()) { |
1054 | /* return pinned self if current_task() so equality check with mach_task_self_ passes */ |
1055 | sright = (void *)convert_task_to_port_pinned(task); |
1056 | } else { |
1057 | sright = (void *)convert_task_to_port(task); |
1058 | } |
1059 | /* extra task ref consumed */ |
1060 | |
1061 | /* |
1062 | * Check if the task has been corpsified. We must do so after conversion |
1063 | * since we don't hold locks and may have grabbed a corpse control port |
1064 | * above which will prevent no-senders notification delivery. |
1065 | */ |
1066 | if (task_is_a_corpse(task)) { |
1067 | ipc_port_release_send(port: sright); |
1068 | error = KERN_FAILURE; |
1069 | goto tfpout; |
1070 | } |
1071 | |
1072 | tret = ipc_port_copyout_send( |
1073 | sright, |
1074 | space: get_task_ipcspace(t: current_task())); |
1075 | |
1076 | error = KERN_SUCCESS; |
1077 | |
1078 | tfpout: |
1079 | task_deallocate(t1); |
1080 | AUDIT_ARG(mach_port2, tret); |
1081 | (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); |
1082 | |
1083 | if (tfpport != IPC_PORT_NULL) { |
1084 | ipc_port_release_send(port: tfpport); |
1085 | } |
1086 | if (task != TASK_NULL) { |
1087 | task_deallocate(task); |
1088 | } |
1089 | if (p != PROC_NULL) { |
1090 | proc_rele(p); |
1091 | } |
1092 | AUDIT_MACH_SYSCALL_EXIT(error); |
1093 | return error; |
1094 | } |
1095 | |
1096 | /* |
1097 | * Routine: task_name_for_pid |
1098 | * Purpose: |
1099 | * Get the task name port for another "process", named by its |
1100 | * process ID on the same host as "target_task". |
1101 | * |
1102 | * Only permitted to privileged processes, or processes |
1103 | * with the same user ID. |
1104 | * |
1105 | * XXX This should be a BSD system call, not a Mach trap!!! |
1106 | */ |
1107 | |
1108 | kern_return_t |
1109 | task_name_for_pid( |
1110 | struct task_name_for_pid_args *args) |
1111 | { |
1112 | mach_port_name_t target_tport = args->target_tport; |
1113 | int pid = args->pid; |
1114 | user_addr_t task_addr = args->t; |
1115 | proc_t p = PROC_NULL; |
1116 | task_t t1 = TASK_NULL; |
1117 | mach_port_name_t tret = MACH_PORT_NULL; |
1118 | void * sright; |
1119 | int error = 0, refheld = 0; |
1120 | kauth_cred_t target_cred; |
1121 | |
1122 | AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID); |
1123 | AUDIT_ARG(pid, pid); |
1124 | AUDIT_ARG(mach_port1, target_tport); |
1125 | |
1126 | t1 = port_name_to_task(target_tport); |
1127 | if (t1 == TASK_NULL) { |
1128 | (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); |
1129 | AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); |
1130 | return KERN_FAILURE; |
1131 | } |
1132 | |
1133 | p = proc_find(pid); |
1134 | if (p != PROC_NULL) { |
1135 | AUDIT_ARG(process, p); |
1136 | target_cred = kauth_cred_proc_ref(procp: p); |
1137 | refheld = 1; |
1138 | |
1139 | if ((p->p_stat != SZOMB) |
1140 | && ((current_proc() == p) |
1141 | || kauth_cred_issuser(cred: kauth_cred_get()) |
1142 | || ((kauth_cred_getuid(cred: target_cred) == kauth_cred_getuid(cred: kauth_cred_get())) && |
1143 | ((kauth_cred_getruid(cred: target_cred) == kauth_getruid()))) |
1144 | || IOCurrentTaskHasEntitlement(entitlement: "com.apple.system-task-ports.name.safe" ) |
1145 | )) { |
1146 | if (proc_task(p) != TASK_NULL) { |
1147 | struct proc_ident pident = proc_ident(p); |
1148 | |
1149 | task_t task = proc_task(p); |
1150 | |
1151 | task_reference(task); |
1152 | proc_rele(p); |
1153 | p = PROC_NULL; |
1154 | #if CONFIG_MACF |
1155 | error = mac_proc_check_get_task(cred: kauth_cred_get(), pident: &pident, TASK_FLAVOR_NAME); |
1156 | if (error) { |
1157 | task_deallocate(task); |
1158 | goto noperm; |
1159 | } |
1160 | #endif |
1161 | sright = (void *)convert_task_name_to_port(task); |
1162 | task = NULL; |
1163 | tret = ipc_port_copyout_send(sright, |
1164 | space: get_task_ipcspace(t: current_task())); |
1165 | } else { |
1166 | tret = MACH_PORT_NULL; |
1167 | } |
1168 | |
1169 | AUDIT_ARG(mach_port2, tret); |
1170 | (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); |
1171 | task_deallocate(t1); |
1172 | error = KERN_SUCCESS; |
1173 | goto tnfpout; |
1174 | } |
1175 | } |
1176 | |
1177 | #if CONFIG_MACF |
1178 | noperm: |
1179 | #endif |
1180 | task_deallocate(t1); |
1181 | tret = MACH_PORT_NULL; |
1182 | (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); |
1183 | error = KERN_FAILURE; |
1184 | tnfpout: |
1185 | if (refheld != 0) { |
1186 | kauth_cred_unref(&target_cred); |
1187 | } |
1188 | if (p != PROC_NULL) { |
1189 | proc_rele(p); |
1190 | } |
1191 | AUDIT_MACH_SYSCALL_EXIT(error); |
1192 | return error; |
1193 | } |
1194 | |
1195 | /* |
1196 | * Routine: task_inspect_for_pid |
1197 | * Purpose: |
1198 | * Get the task inspect port for another "process", named by its |
1199 | * process ID on the same host as "target_task". |
1200 | */ |
1201 | int |
1202 | task_inspect_for_pid(struct proc *p __unused, struct task_inspect_for_pid_args *args, int *ret) |
1203 | { |
1204 | mach_port_name_t target_tport = args->target_tport; |
1205 | int pid = args->pid; |
1206 | user_addr_t task_addr = args->t; |
1207 | |
1208 | proc_t proc = PROC_NULL; |
1209 | task_t t1 = TASK_NULL; |
1210 | task_inspect_t task_insp = TASK_INSPECT_NULL; |
1211 | mach_port_name_t tret = MACH_PORT_NULL; |
1212 | ipc_port_t tfpport = MACH_PORT_NULL; |
1213 | int error = 0; |
1214 | void *sright = NULL; |
1215 | boolean_t is_current_proc = FALSE; |
1216 | struct proc_ident pident = {0}; |
1217 | |
1218 | /* Disallow inspect port for kernel_task */ |
1219 | if (pid == 0) { |
1220 | (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); |
1221 | return EPERM; |
1222 | } |
1223 | |
1224 | t1 = port_name_to_task(target_tport); |
1225 | if (t1 == TASK_NULL) { |
1226 | (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); |
1227 | return EINVAL; |
1228 | } |
1229 | |
1230 | proc = proc_find(pid); |
1231 | if (proc == PROC_NULL) { |
1232 | error = ESRCH; |
1233 | goto tifpout; |
1234 | } |
1235 | pident = proc_ident(p: proc); |
1236 | is_current_proc = (proc == current_proc()); |
1237 | |
1238 | if (!(task_for_pid_posix_check(target: proc))) { |
1239 | error = EPERM; |
1240 | goto tifpout; |
1241 | } |
1242 | |
1243 | task_insp = proc_task(proc); |
1244 | if (task_insp == TASK_INSPECT_NULL) { |
1245 | goto tifpout; |
1246 | } |
1247 | |
1248 | /* |
1249 | * Grab a task reference and drop the proc reference before making any upcalls. |
1250 | */ |
1251 | task_reference(task_insp); |
1252 | |
1253 | proc_rele(p: proc); |
1254 | proc = PROC_NULL; |
1255 | |
1256 | #if CONFIG_MACF |
1257 | error = mac_proc_check_get_task(cred: kauth_cred_get(), pident: &pident, TASK_FLAVOR_INSPECT); |
1258 | if (error) { |
1259 | error = EPERM; |
1260 | goto tifpout; |
1261 | } |
1262 | #endif |
1263 | |
1264 | /* If we aren't root and target's task access port is set... */ |
1265 | if (!kauth_cred_issuser(cred: kauth_cred_get()) && |
1266 | !is_current_proc && |
1267 | (task_get_task_access_port(task_insp, &tfpport) == 0) && |
1268 | (tfpport != IPC_PORT_NULL)) { |
1269 | if (tfpport == IPC_PORT_DEAD) { |
1270 | error = EACCES; |
1271 | goto tifpout; |
1272 | } |
1273 | |
1274 | |
1275 | /* Call up to the task access server */ |
1276 | error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(task_access_port: tfpport, |
1277 | calling_pid: proc_selfpid(), calling_gid: kauth_getgid(), target_pid: pid, TASK_FLAVOR_INSPECT); |
1278 | |
1279 | if (error != MACH_MSG_SUCCESS) { |
1280 | if (error == MACH_RCV_INTERRUPTED) { |
1281 | error = EINTR; |
1282 | } else { |
1283 | error = EPERM; |
1284 | } |
1285 | goto tifpout; |
1286 | } |
1287 | } |
1288 | |
1289 | /* Check if the task has been corpsified */ |
1290 | if (task_is_a_corpse(task: task_insp)) { |
1291 | error = EACCES; |
1292 | goto tifpout; |
1293 | } |
1294 | |
1295 | /* could be IP_NULL, consumes a ref */ |
1296 | sright = (void*) convert_task_inspect_to_port(task_insp); |
1297 | task_insp = TASK_INSPECT_NULL; |
1298 | tret = ipc_port_copyout_send(sright, space: get_task_ipcspace(t: current_task())); |
1299 | |
1300 | tifpout: |
1301 | task_deallocate(t1); |
1302 | (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); |
1303 | if (proc != PROC_NULL) { |
1304 | proc_rele(p: proc); |
1305 | } |
1306 | if (tfpport != IPC_PORT_NULL) { |
1307 | ipc_port_release_send(port: tfpport); |
1308 | } |
1309 | if (task_insp != TASK_INSPECT_NULL) { |
1310 | task_deallocate(task_insp); |
1311 | } |
1312 | |
1313 | *ret = error; |
1314 | return error; |
1315 | } |
1316 | |
1317 | /* |
1318 | * Routine: task_read_for_pid |
1319 | * Purpose: |
1320 | * Get the task read port for another "process", named by its |
1321 | * process ID on the same host as "target_task". |
1322 | */ |
1323 | int |
1324 | task_read_for_pid(struct proc *p __unused, struct task_read_for_pid_args *args, int *ret) |
1325 | { |
1326 | mach_port_name_t target_tport = args->target_tport; |
1327 | int pid = args->pid; |
1328 | user_addr_t task_addr = args->t; |
1329 | |
1330 | proc_t proc = PROC_NULL; |
1331 | task_t t1 = TASK_NULL; |
1332 | task_read_t task_read = TASK_READ_NULL; |
1333 | mach_port_name_t tret = MACH_PORT_NULL; |
1334 | ipc_port_t tfpport = MACH_PORT_NULL; |
1335 | int error = 0; |
1336 | void *sright = NULL; |
1337 | boolean_t is_current_proc = FALSE; |
1338 | struct proc_ident pident = {0}; |
1339 | |
1340 | /* Disallow read port for kernel_task */ |
1341 | if (pid == 0) { |
1342 | (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); |
1343 | return EPERM; |
1344 | } |
1345 | |
1346 | t1 = port_name_to_task(target_tport); |
1347 | if (t1 == TASK_NULL) { |
1348 | (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); |
1349 | return EINVAL; |
1350 | } |
1351 | |
1352 | proc = proc_find(pid); |
1353 | if (proc == PROC_NULL) { |
1354 | error = ESRCH; |
1355 | goto trfpout; |
1356 | } |
1357 | pident = proc_ident(p: proc); |
1358 | is_current_proc = (proc == current_proc()); |
1359 | |
1360 | if (!(task_for_pid_posix_check(target: proc))) { |
1361 | error = EPERM; |
1362 | goto trfpout; |
1363 | } |
1364 | |
1365 | task_read = proc_task(proc); |
1366 | if (task_read == TASK_INSPECT_NULL) { |
1367 | goto trfpout; |
1368 | } |
1369 | |
1370 | /* |
1371 | * Grab a task reference and drop the proc reference before making any upcalls. |
1372 | */ |
1373 | task_reference(task_read); |
1374 | |
1375 | proc_rele(p: proc); |
1376 | proc = PROC_NULL; |
1377 | |
1378 | #if CONFIG_MACF |
1379 | error = mac_proc_check_get_task(cred: kauth_cred_get(), pident: &pident, TASK_FLAVOR_READ); |
1380 | if (error) { |
1381 | error = EPERM; |
1382 | goto trfpout; |
1383 | } |
1384 | #endif |
1385 | |
1386 | /* If we aren't root and target's task access port is set... */ |
1387 | if (!kauth_cred_issuser(cred: kauth_cred_get()) && |
1388 | !is_current_proc && |
1389 | (task_get_task_access_port(task_read, &tfpport) == 0) && |
1390 | (tfpport != IPC_PORT_NULL)) { |
1391 | if (tfpport == IPC_PORT_DEAD) { |
1392 | error = EACCES; |
1393 | goto trfpout; |
1394 | } |
1395 | |
1396 | |
1397 | /* Call up to the task access server */ |
1398 | error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(task_access_port: tfpport, |
1399 | calling_pid: proc_selfpid(), calling_gid: kauth_getgid(), target_pid: pid, TASK_FLAVOR_READ); |
1400 | |
1401 | if (error != MACH_MSG_SUCCESS) { |
1402 | if (error == MACH_RCV_INTERRUPTED) { |
1403 | error = EINTR; |
1404 | } else { |
1405 | error = EPERM; |
1406 | } |
1407 | goto trfpout; |
1408 | } |
1409 | } |
1410 | |
1411 | /* Check if the task has been corpsified */ |
1412 | if (task_is_a_corpse(task: task_read)) { |
1413 | error = EACCES; |
1414 | goto trfpout; |
1415 | } |
1416 | |
1417 | /* could be IP_NULL, consumes a ref */ |
1418 | sright = (void*) convert_task_read_to_port(task_read); |
1419 | task_read = TASK_READ_NULL; |
1420 | tret = ipc_port_copyout_send(sright, space: get_task_ipcspace(t: current_task())); |
1421 | |
1422 | trfpout: |
1423 | task_deallocate(t1); |
1424 | (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); |
1425 | if (proc != PROC_NULL) { |
1426 | proc_rele(p: proc); |
1427 | } |
1428 | if (tfpport != IPC_PORT_NULL) { |
1429 | ipc_port_release_send(port: tfpport); |
1430 | } |
1431 | if (task_read != TASK_READ_NULL) { |
1432 | task_deallocate(task_read); |
1433 | } |
1434 | |
1435 | *ret = error; |
1436 | return error; |
1437 | } |
1438 | |
1439 | kern_return_t |
1440 | pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) |
1441 | { |
1442 | task_t target = NULL; |
1443 | proc_t targetproc = PROC_NULL; |
1444 | int pid = args->pid; |
1445 | int error = 0; |
1446 | mach_port_t tfpport = MACH_PORT_NULL; |
1447 | |
1448 | if (pid == 0) { |
1449 | error = EPERM; |
1450 | goto out; |
1451 | } |
1452 | |
1453 | targetproc = proc_find(pid); |
1454 | if (targetproc == PROC_NULL) { |
1455 | error = ESRCH; |
1456 | goto out; |
1457 | } |
1458 | |
1459 | if (!task_for_pid_posix_check(target: targetproc) && |
1460 | !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) { |
1461 | error = EPERM; |
1462 | goto out; |
1463 | } |
1464 | |
1465 | #if CONFIG_MACF |
1466 | error = mac_proc_check_suspend_resume(proc: targetproc, MAC_PROC_CHECK_SUSPEND); |
1467 | if (error) { |
1468 | error = EPERM; |
1469 | goto out; |
1470 | } |
1471 | #endif |
1472 | |
1473 | target = proc_task(targetproc); |
1474 | #if XNU_TARGET_OS_OSX |
1475 | if (target != TASK_NULL) { |
1476 | /* If we aren't root and target's task access port is set... */ |
1477 | if (!kauth_cred_issuser(cred: kauth_cred_get()) && |
1478 | targetproc != current_proc() && |
1479 | (task_get_task_access_port(target, &tfpport) == 0) && |
1480 | (tfpport != IPC_PORT_NULL)) { |
1481 | if (tfpport == IPC_PORT_DEAD) { |
1482 | error = EACCES; |
1483 | goto out; |
1484 | } |
1485 | |
1486 | /* Call up to the task access server */ |
1487 | error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(task_access_port: tfpport, |
1488 | calling_pid: proc_selfpid(), calling_gid: kauth_getgid(), target_pid: pid, TASK_FLAVOR_CONTROL); |
1489 | |
1490 | if (error != MACH_MSG_SUCCESS) { |
1491 | if (error == MACH_RCV_INTERRUPTED) { |
1492 | error = EINTR; |
1493 | } else { |
1494 | error = EPERM; |
1495 | } |
1496 | goto out; |
1497 | } |
1498 | } |
1499 | } |
1500 | #endif /* XNU_TARGET_OS_OSX */ |
1501 | |
1502 | task_reference(target); |
1503 | error = task_pidsuspend(task: target); |
1504 | if (error) { |
1505 | if (error == KERN_INVALID_ARGUMENT) { |
1506 | error = EINVAL; |
1507 | } else { |
1508 | error = EPERM; |
1509 | } |
1510 | } |
1511 | #if CONFIG_MEMORYSTATUS |
1512 | else { |
1513 | memorystatus_on_suspend(p: targetproc); |
1514 | } |
1515 | #endif |
1516 | |
1517 | task_deallocate(target); |
1518 | |
1519 | out: |
1520 | if (tfpport != IPC_PORT_NULL) { |
1521 | ipc_port_release_send(port: tfpport); |
1522 | } |
1523 | |
1524 | if (targetproc != PROC_NULL) { |
1525 | proc_rele(p: targetproc); |
1526 | } |
1527 | *ret = error; |
1528 | return error; |
1529 | } |
1530 | |
1531 | kern_return_t |
1532 | debug_control_port_for_pid(struct debug_control_port_for_pid_args *args) |
1533 | { |
1534 | mach_port_name_t target_tport = args->target_tport; |
1535 | int pid = args->pid; |
1536 | user_addr_t task_addr = args->t; |
1537 | proc_t p = PROC_NULL; |
1538 | task_t t1 = TASK_NULL; |
1539 | task_t task = TASK_NULL; |
1540 | mach_port_name_t tret = MACH_PORT_NULL; |
1541 | ipc_port_t tfpport = MACH_PORT_NULL; |
1542 | ipc_port_t sright = NULL; |
1543 | int error = 0; |
1544 | boolean_t is_current_proc = FALSE; |
1545 | struct proc_ident pident = {0}; |
1546 | |
1547 | AUDIT_MACH_SYSCALL_ENTER(AUE_DBGPORTFORPID); |
1548 | AUDIT_ARG(pid, pid); |
1549 | AUDIT_ARG(mach_port1, target_tport); |
1550 | |
1551 | /* Always check if pid == 0 */ |
1552 | if (pid == 0) { |
1553 | (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); |
1554 | AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); |
1555 | return KERN_FAILURE; |
1556 | } |
1557 | |
1558 | t1 = port_name_to_task(target_tport); |
1559 | if (t1 == TASK_NULL) { |
1560 | (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); |
1561 | AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); |
1562 | return KERN_FAILURE; |
1563 | } |
1564 | |
1565 | p = proc_find(pid); |
1566 | if (p == PROC_NULL) { |
1567 | error = KERN_FAILURE; |
1568 | goto tfpout; |
1569 | } |
1570 | pident = proc_ident(p); |
1571 | is_current_proc = (p == current_proc()); |
1572 | |
1573 | #if CONFIG_AUDIT |
1574 | AUDIT_ARG(process, p); |
1575 | #endif |
1576 | |
1577 | if (!(task_for_pid_posix_check(target: p))) { |
1578 | error = KERN_FAILURE; |
1579 | goto tfpout; |
1580 | } |
1581 | |
1582 | if (proc_task(p) == TASK_NULL) { |
1583 | error = KERN_SUCCESS; |
1584 | goto tfpout; |
1585 | } |
1586 | |
1587 | /* |
1588 | * Grab a task reference and drop the proc reference before making any upcalls. |
1589 | */ |
1590 | task = proc_task(p); |
1591 | task_reference(task); |
1592 | |
1593 | proc_rele(p); |
1594 | p = PROC_NULL; |
1595 | |
1596 | if (!IOCurrentTaskHasEntitlement(DEBUG_PORT_ENTITLEMENT)) { |
1597 | #if CONFIG_MACF |
1598 | error = mac_proc_check_get_task(cred: kauth_cred_get(), pident: &pident, TASK_FLAVOR_CONTROL); |
1599 | if (error) { |
1600 | error = KERN_FAILURE; |
1601 | goto tfpout; |
1602 | } |
1603 | #endif |
1604 | |
1605 | /* If we aren't root and target's task access port is set... */ |
1606 | if (!kauth_cred_issuser(cred: kauth_cred_get()) && |
1607 | !is_current_proc && |
1608 | (task_get_task_access_port(task, &tfpport) == 0) && |
1609 | (tfpport != IPC_PORT_NULL)) { |
1610 | if (tfpport == IPC_PORT_DEAD) { |
1611 | error = KERN_PROTECTION_FAILURE; |
1612 | goto tfpout; |
1613 | } |
1614 | |
1615 | |
1616 | /* Call up to the task access server */ |
1617 | error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(task_access_port: tfpport, |
1618 | calling_pid: proc_selfpid(), calling_gid: kauth_getgid(), target_pid: pid, TASK_FLAVOR_CONTROL); |
1619 | |
1620 | if (error != MACH_MSG_SUCCESS) { |
1621 | if (error == MACH_RCV_INTERRUPTED) { |
1622 | error = KERN_ABORTED; |
1623 | } else { |
1624 | error = KERN_FAILURE; |
1625 | } |
1626 | goto tfpout; |
1627 | } |
1628 | } |
1629 | } |
1630 | |
1631 | /* Check if the task has been corpsified */ |
1632 | if (task_is_a_corpse(task)) { |
1633 | error = KERN_FAILURE; |
1634 | goto tfpout; |
1635 | } |
1636 | |
1637 | error = task_get_debug_control_port(task, &sright); |
1638 | if (error != KERN_SUCCESS) { |
1639 | goto tfpout; |
1640 | } |
1641 | |
1642 | tret = ipc_port_copyout_send( |
1643 | sright, |
1644 | space: get_task_ipcspace(t: current_task())); |
1645 | |
1646 | error = KERN_SUCCESS; |
1647 | |
1648 | tfpout: |
1649 | task_deallocate(t1); |
1650 | AUDIT_ARG(mach_port2, tret); |
1651 | (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); |
1652 | |
1653 | if (tfpport != IPC_PORT_NULL) { |
1654 | ipc_port_release_send(port: tfpport); |
1655 | } |
1656 | if (task != TASK_NULL) { |
1657 | task_deallocate(task); |
1658 | } |
1659 | if (p != PROC_NULL) { |
1660 | proc_rele(p); |
1661 | } |
1662 | AUDIT_MACH_SYSCALL_EXIT(error); |
1663 | return error; |
1664 | } |
1665 | |
1666 | kern_return_t |
1667 | pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) |
1668 | { |
1669 | task_t target = NULL; |
1670 | proc_t targetproc = PROC_NULL; |
1671 | int pid = args->pid; |
1672 | int error = 0; |
1673 | mach_port_t tfpport = MACH_PORT_NULL; |
1674 | |
1675 | if (pid == 0) { |
1676 | error = EPERM; |
1677 | goto out; |
1678 | } |
1679 | |
1680 | targetproc = proc_find(pid); |
1681 | if (targetproc == PROC_NULL) { |
1682 | error = ESRCH; |
1683 | goto out; |
1684 | } |
1685 | |
1686 | if (!task_for_pid_posix_check(target: targetproc) && |
1687 | !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) { |
1688 | error = EPERM; |
1689 | goto out; |
1690 | } |
1691 | |
1692 | #if CONFIG_MACF |
1693 | error = mac_proc_check_suspend_resume(proc: targetproc, MAC_PROC_CHECK_RESUME); |
1694 | if (error) { |
1695 | error = EPERM; |
1696 | goto out; |
1697 | } |
1698 | #endif |
1699 | |
1700 | target = proc_task(targetproc); |
1701 | #if XNU_TARGET_OS_OSX |
1702 | if (target != TASK_NULL) { |
1703 | /* If we aren't root and target's task access port is set... */ |
1704 | if (!kauth_cred_issuser(cred: kauth_cred_get()) && |
1705 | targetproc != current_proc() && |
1706 | (task_get_task_access_port(target, &tfpport) == 0) && |
1707 | (tfpport != IPC_PORT_NULL)) { |
1708 | if (tfpport == IPC_PORT_DEAD) { |
1709 | error = EACCES; |
1710 | goto out; |
1711 | } |
1712 | |
1713 | /* Call up to the task access server */ |
1714 | error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(task_access_port: tfpport, |
1715 | calling_pid: proc_selfpid(), calling_gid: kauth_getgid(), target_pid: pid, TASK_FLAVOR_CONTROL); |
1716 | |
1717 | if (error != MACH_MSG_SUCCESS) { |
1718 | if (error == MACH_RCV_INTERRUPTED) { |
1719 | error = EINTR; |
1720 | } else { |
1721 | error = EPERM; |
1722 | } |
1723 | goto out; |
1724 | } |
1725 | } |
1726 | } |
1727 | #endif /* XNU_TARGET_OS_OSX */ |
1728 | |
1729 | #if !XNU_TARGET_OS_OSX |
1730 | #if SOCKETS |
1731 | resume_proc_sockets(targetproc); |
1732 | #endif /* SOCKETS */ |
1733 | #endif /* !XNU_TARGET_OS_OSX */ |
1734 | |
1735 | task_reference(target); |
1736 | |
1737 | #if CONFIG_MEMORYSTATUS |
1738 | memorystatus_on_resume(p: targetproc); |
1739 | #endif |
1740 | |
1741 | error = task_pidresume(task: target); |
1742 | if (error) { |
1743 | if (error == KERN_INVALID_ARGUMENT) { |
1744 | error = EINVAL; |
1745 | } else { |
1746 | if (error == KERN_MEMORY_ERROR) { |
1747 | psignal(p: targetproc, SIGKILL); |
1748 | error = EIO; |
1749 | } else { |
1750 | error = EPERM; |
1751 | } |
1752 | } |
1753 | } |
1754 | |
1755 | task_deallocate(target); |
1756 | |
1757 | out: |
1758 | if (tfpport != IPC_PORT_NULL) { |
1759 | ipc_port_release_send(port: tfpport); |
1760 | } |
1761 | |
1762 | if (targetproc != PROC_NULL) { |
1763 | proc_rele(p: targetproc); |
1764 | } |
1765 | |
1766 | *ret = error; |
1767 | return error; |
1768 | } |
1769 | |
1770 | #if !XNU_TARGET_OS_OSX |
1771 | /* |
1772 | * Freeze the specified process (provided in args->pid), or find and freeze a PID. |
1773 | * When a process is specified, this call is blocking, otherwise we wake up the |
1774 | * freezer thread and do not block on a process being frozen. |
1775 | */ |
1776 | kern_return_t |
1777 | pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret) |
1778 | { |
1779 | int error = 0; |
1780 | proc_t targetproc = PROC_NULL; |
1781 | int pid = args->pid; |
1782 | |
1783 | #ifndef CONFIG_FREEZE |
1784 | #pragma unused(pid) |
1785 | #else |
1786 | |
1787 | /* |
1788 | * If a pid has been provided, we obtain the process handle and call task_for_pid_posix_check(). |
1789 | */ |
1790 | |
1791 | if (pid >= 0) { |
1792 | targetproc = proc_find(pid); |
1793 | |
1794 | if (targetproc == PROC_NULL) { |
1795 | error = ESRCH; |
1796 | goto out; |
1797 | } |
1798 | |
1799 | if (!task_for_pid_posix_check(targetproc)) { |
1800 | error = EPERM; |
1801 | goto out; |
1802 | } |
1803 | } |
1804 | |
1805 | #if CONFIG_MACF |
1806 | //Note that targetproc may be null |
1807 | error = mac_proc_check_suspend_resume(targetproc, MAC_PROC_CHECK_HIBERNATE); |
1808 | if (error) { |
1809 | error = EPERM; |
1810 | goto out; |
1811 | } |
1812 | #endif |
1813 | |
1814 | if (pid == -2) { |
1815 | vm_pageout_anonymous_pages(); |
1816 | } else if (pid == -1) { |
1817 | memorystatus_on_inactivity(targetproc); |
1818 | } else { |
1819 | error = memorystatus_freeze_process_sync(targetproc); |
1820 | } |
1821 | |
1822 | out: |
1823 | |
1824 | #endif /* CONFIG_FREEZE */ |
1825 | |
1826 | if (targetproc != PROC_NULL) { |
1827 | proc_rele(targetproc); |
1828 | } |
1829 | *ret = error; |
1830 | return error; |
1831 | } |
1832 | #endif /* !XNU_TARGET_OS_OSX */ |
1833 | |
1834 | #if SOCKETS |
1835 | int |
1836 | networking_memstatus_callout(proc_t p, uint32_t status) |
1837 | { |
1838 | struct fileproc *fp; |
1839 | |
1840 | /* |
1841 | * proc list lock NOT held |
1842 | * proc lock NOT held |
1843 | * a reference on the proc has been held / shall be dropped by the caller. |
1844 | */ |
1845 | LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED); |
1846 | LCK_MTX_ASSERT(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED); |
1847 | |
1848 | proc_fdlock(p); |
1849 | |
1850 | fdt_foreach(fp, p) { |
1851 | switch (FILEGLOB_DTYPE(fp->fp_glob)) { |
1852 | #if NECP |
1853 | case DTYPE_NETPOLICY: |
1854 | necp_fd_memstatus(proc: p, status, |
1855 | client_fd: (struct necp_fd_data *)fp_get_data(fp)); |
1856 | break; |
1857 | #endif /* NECP */ |
1858 | #if SKYWALK |
1859 | case DTYPE_CHANNEL: |
1860 | kern_channel_memstatus(p, status, |
1861 | (struct kern_channel *)fp_get_data(fp)); |
1862 | break; |
1863 | #endif /* SKYWALK */ |
1864 | default: |
1865 | break; |
1866 | } |
1867 | } |
1868 | proc_fdunlock(p); |
1869 | |
1870 | return 1; |
1871 | } |
1872 | |
1873 | #if SKYWALK |
1874 | /* |
1875 | * Since we make multiple passes across the fileproc array, record the |
1876 | * first MAX_CHANNELS channel handles found. MAX_CHANNELS should be |
1877 | * large enough to accomodate most, if not all cases. If we find more, |
1878 | * we'll go to the slow path during second pass. |
1879 | */ |
1880 | #define MAX_CHANNELS 8 /* should be more than enough */ |
1881 | #endif /* SKYWALK */ |
1882 | |
1883 | static int |
1884 | networking_defunct_callout(proc_t p, void *arg) |
1885 | { |
1886 | struct pid_shutdown_sockets_args *args = arg; |
1887 | int pid = args->pid; |
1888 | int level = args->level; |
1889 | struct fileproc *fp; |
1890 | #if SKYWALK |
1891 | int i; |
1892 | int channel_count = 0; |
1893 | struct kern_channel *channel_array[MAX_CHANNELS]; |
1894 | |
1895 | bzero(s: &channel_array, n: sizeof(channel_array)); |
1896 | #endif /* SKYWALK */ |
1897 | |
1898 | proc_fdlock(p); |
1899 | |
1900 | fdt_foreach(fp, p) { |
1901 | struct fileglob *fg = fp->fp_glob; |
1902 | |
1903 | switch (FILEGLOB_DTYPE(fg)) { |
1904 | case DTYPE_SOCKET: { |
1905 | struct socket *so = (struct socket *)fg_get_data(fg); |
1906 | if (proc_getpid(p) == pid || so->last_pid == pid || |
1907 | ((so->so_flags & SOF_DELEGATED) && so->e_pid == pid)) { |
1908 | /* Call networking stack with socket and level */ |
1909 | (void)socket_defunct(p, so, level); |
1910 | } |
1911 | break; |
1912 | } |
1913 | #if NECP |
1914 | case DTYPE_NETPOLICY: |
1915 | /* first pass: defunct necp and get stats for ntstat */ |
1916 | if (proc_getpid(p) == pid) { |
1917 | necp_fd_defunct(proc: p, |
1918 | client_fd: (struct necp_fd_data *)fg_get_data(fg)); |
1919 | } |
1920 | break; |
1921 | #endif /* NECP */ |
1922 | #if SKYWALK |
1923 | case DTYPE_CHANNEL: |
1924 | /* first pass: get channels and total count */ |
1925 | if (proc_getpid(p) == pid) { |
1926 | if (channel_count < MAX_CHANNELS) { |
1927 | channel_array[channel_count] = |
1928 | (struct kern_channel *)fg_get_data(fg); |
1929 | } |
1930 | ++channel_count; |
1931 | } |
1932 | break; |
1933 | #endif /* SKYWALK */ |
1934 | default: |
1935 | break; |
1936 | } |
1937 | } |
1938 | |
1939 | #if SKYWALK |
1940 | /* |
1941 | * Second pass: defunct channels/flows (after NECP). Handle |
1942 | * the common case of up to MAX_CHANNELS count with fast path, |
1943 | * and traverse the fileproc array again only if we exceed it. |
1944 | */ |
1945 | if (channel_count != 0 && channel_count <= MAX_CHANNELS) { |
1946 | ASSERT(proc_getpid(p) == pid); |
1947 | for (i = 0; i < channel_count; i++) { |
1948 | ASSERT(channel_array[i] != NULL); |
1949 | kern_channel_defunct(p, channel_array[i]); |
1950 | } |
1951 | } else if (channel_count != 0) { |
1952 | ASSERT(proc_getpid(p) == pid); |
1953 | fdt_foreach(fp, p) { |
1954 | struct fileglob *fg = fp->fp_glob; |
1955 | |
1956 | if (FILEGLOB_DTYPE(fg) == DTYPE_CHANNEL) { |
1957 | kern_channel_defunct(p, |
1958 | (struct kern_channel *)fg_get_data(fg)); |
1959 | } |
1960 | } |
1961 | } |
1962 | #endif /* SKYWALK */ |
1963 | proc_fdunlock(p); |
1964 | |
1965 | return PROC_RETURNED; |
1966 | } |
1967 | |
1968 | int |
1969 | pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret) |
1970 | { |
1971 | int error = 0; |
1972 | proc_t targetproc = PROC_NULL; |
1973 | int pid = args->pid; |
1974 | int level = args->level; |
1975 | |
1976 | if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC && |
1977 | level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) { |
1978 | error = EINVAL; |
1979 | goto out; |
1980 | } |
1981 | |
1982 | targetproc = proc_find(pid); |
1983 | if (targetproc == PROC_NULL) { |
1984 | error = ESRCH; |
1985 | goto out; |
1986 | } |
1987 | |
1988 | if (!task_for_pid_posix_check(target: targetproc) && |
1989 | !IOCurrentTaskHasEntitlement(PROCESS_RESUME_SUSPEND_ENTITLEMENT)) { |
1990 | error = EPERM; |
1991 | goto out; |
1992 | } |
1993 | |
1994 | #if CONFIG_MACF |
1995 | error = mac_proc_check_suspend_resume(proc: targetproc, MAC_PROC_CHECK_SHUTDOWN_SOCKETS); |
1996 | if (error) { |
1997 | error = EPERM; |
1998 | goto out; |
1999 | } |
2000 | #endif |
2001 | |
2002 | proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, |
2003 | callout: networking_defunct_callout, arg: args, NULL, NULL); |
2004 | |
2005 | out: |
2006 | if (targetproc != PROC_NULL) { |
2007 | proc_rele(p: targetproc); |
2008 | } |
2009 | *ret = error; |
2010 | return error; |
2011 | } |
2012 | |
2013 | #endif /* SOCKETS */ |
2014 | |
2015 | static int |
2016 | sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1, |
2017 | __unused int arg2, struct sysctl_req *req) |
2018 | { |
2019 | int error = 0; |
2020 | int new_value; |
2021 | |
2022 | error = SYSCTL_OUT(req, arg1, sizeof(int)); |
2023 | if (error || req->newptr == USER_ADDR_NULL) { |
2024 | return error; |
2025 | } |
2026 | |
2027 | if (!kauth_cred_issuser(cred: kauth_cred_get())) { |
2028 | return EPERM; |
2029 | } |
2030 | |
2031 | if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) { |
2032 | goto out; |
2033 | } |
2034 | if ((new_value == KERN_TFP_POLICY_DENY) |
2035 | || (new_value == KERN_TFP_POLICY_DEFAULT)) { |
2036 | tfp_policy = new_value; |
2037 | } else { |
2038 | error = EINVAL; |
2039 | } |
2040 | out: |
2041 | return error; |
2042 | } |
2043 | |
2044 | #if defined(SECURE_KERNEL) |
2045 | static int kern_secure_kernel = 1; |
2046 | #else |
2047 | static int kern_secure_kernel = 0; |
2048 | #endif |
2049 | |
2050 | SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "" ); |
2051 | |
2052 | SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp" ); |
2053 | SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, |
2054 | &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy, "I" , "policy" ); |
2055 | |
2056 | SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED, |
2057 | &shared_region_trace_level, 0, "" ); |
2058 | SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED, |
2059 | &shared_region_version, 0, "" ); |
2060 | SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED, |
2061 | &shared_region_persistence, 0, "" ); |
2062 | |
2063 | /* |
2064 | * shared_region_check_np: |
2065 | * |
2066 | * This system call is intended for dyld. |
2067 | * |
2068 | * dyld calls this when any process starts to see if the process's shared |
2069 | * region is already set up and ready to use. |
2070 | * This call returns the base address of the first mapping in the |
2071 | * process's shared region's first mapping. |
2072 | * dyld will then check what's mapped at that address. |
2073 | * |
2074 | * If the shared region is empty, dyld will then attempt to map the shared |
2075 | * cache file in the shared region via the shared_region_map_np() system call. |
2076 | * |
2077 | * If something's already mapped in the shared region, dyld will check if it |
2078 | * matches the shared cache it would like to use for that process. |
2079 | * If it matches, evrything's ready and the process can proceed and use the |
2080 | * shared region. |
2081 | * If it doesn't match, dyld will unmap the shared region and map the shared |
2082 | * cache into the process's address space via mmap(). |
2083 | * |
2084 | * A NULL pointer argument can be used by dyld to indicate it has unmapped |
2085 | * the shared region. We will remove the shared_region reference from the task. |
2086 | * |
2087 | * ERROR VALUES |
2088 | * EINVAL no shared region |
2089 | * ENOMEM shared region is empty |
2090 | * EFAULT bad address for "start_address" |
2091 | */ |
2092 | int |
2093 | shared_region_check_np( |
2094 | __unused struct proc *p, |
2095 | struct shared_region_check_np_args *uap, |
2096 | __unused int *retvalp) |
2097 | { |
2098 | vm_shared_region_t shared_region; |
2099 | mach_vm_offset_t start_address = 0; |
2100 | int error = 0; |
2101 | kern_return_t kr; |
2102 | task_t task = current_task(); |
2103 | |
2104 | SHARED_REGION_TRACE_DEBUG( |
2105 | ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n" , |
2106 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2107 | proc_getpid(p), p->p_comm, |
2108 | (uint64_t)uap->start_address)); |
2109 | |
2110 | /* |
2111 | * Special value of start_address used to indicate that map_with_linking() should |
2112 | * no longer be allowed in this process |
2113 | */ |
2114 | if (uap->start_address == (task_get_64bit_addr(task) ? DYLD_VM_END_MWL : (uint32_t)DYLD_VM_END_MWL)) { |
2115 | p->p_disallow_map_with_linking = TRUE; |
2116 | return 0; |
2117 | } |
2118 | |
2119 | /* retrieve the current tasks's shared region */ |
2120 | shared_region = vm_shared_region_get(task); |
2121 | if (shared_region != NULL) { |
2122 | /* |
2123 | * A NULL argument is used by dyld to indicate the task |
2124 | * has unmapped its shared region. |
2125 | */ |
2126 | if (uap->start_address == 0) { |
2127 | /* unmap it first */ |
2128 | vm_shared_region_remove(task, sr: shared_region); |
2129 | vm_shared_region_set(task, NULL); |
2130 | } else { |
2131 | /* retrieve address of its first mapping... */ |
2132 | kr = vm_shared_region_start_address(shared_region, start_address: &start_address, task); |
2133 | if (kr != KERN_SUCCESS) { |
2134 | SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] " |
2135 | "check_np(0x%llx) " |
2136 | "vm_shared_region_start_address() failed\n" , |
2137 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2138 | proc_getpid(p), p->p_comm, |
2139 | (uint64_t)uap->start_address)); |
2140 | error = ENOMEM; |
2141 | } else { |
2142 | #if __has_feature(ptrauth_calls) |
2143 | /* |
2144 | * Remap any section of the shared library that |
2145 | * has authenticated pointers into private memory. |
2146 | */ |
2147 | if (vm_shared_region_auth_remap(shared_region) != KERN_SUCCESS) { |
2148 | SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] " |
2149 | "check_np(0x%llx) " |
2150 | "vm_shared_region_auth_remap() failed\n" , |
2151 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2152 | proc_getpid(p), p->p_comm, |
2153 | (uint64_t)uap->start_address)); |
2154 | error = ENOMEM; |
2155 | } |
2156 | #endif /* __has_feature(ptrauth_calls) */ |
2157 | |
2158 | /* ... and give it to the caller */ |
2159 | if (error == 0) { |
2160 | error = copyout(&start_address, |
2161 | (user_addr_t) uap->start_address, |
2162 | sizeof(start_address)); |
2163 | if (error != 0) { |
2164 | SHARED_REGION_TRACE_ERROR( |
2165 | ("shared_region: %p [%d(%s)] " |
2166 | "check_np(0x%llx) " |
2167 | "copyout(0x%llx) error %d\n" , |
2168 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2169 | proc_getpid(p), p->p_comm, |
2170 | (uint64_t)uap->start_address, (uint64_t)start_address, |
2171 | error)); |
2172 | } |
2173 | } |
2174 | } |
2175 | } |
2176 | vm_shared_region_deallocate(shared_region); |
2177 | } else { |
2178 | /* no shared region ! */ |
2179 | error = EINVAL; |
2180 | } |
2181 | |
2182 | SHARED_REGION_TRACE_DEBUG( |
2183 | ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n" , |
2184 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2185 | proc_getpid(p), p->p_comm, |
2186 | (uint64_t)uap->start_address, (uint64_t)start_address, error)); |
2187 | |
2188 | return error; |
2189 | } |
2190 | |
2191 | |
2192 | static int |
2193 | shared_region_copyin( |
2194 | struct proc *p, |
2195 | user_addr_t user_addr, |
2196 | unsigned int count, |
2197 | unsigned int element_size, |
2198 | void *kernel_data) |
2199 | { |
2200 | int error = 0; |
2201 | vm_size_t size = count * element_size; |
2202 | |
2203 | error = copyin(user_addr, kernel_data, size); |
2204 | if (error) { |
2205 | SHARED_REGION_TRACE_ERROR( |
2206 | ("shared_region: %p [%d(%s)] map(): " |
2207 | "copyin(0x%llx, %ld) failed (error=%d)\n" , |
2208 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2209 | proc_getpid(p), p->p_comm, |
2210 | (uint64_t)user_addr, (long)size, error)); |
2211 | } |
2212 | return error; |
2213 | } |
2214 | |
2215 | /* |
2216 | * A reasonable upper limit to prevent overflow of allocation/copyin. |
2217 | */ |
2218 | #define _SR_FILE_MAPPINGS_MAX_FILES 256 |
2219 | |
2220 | /* forward declaration */ |
2221 | __attribute__((noinline)) |
2222 | static void shared_region_map_and_slide_cleanup( |
2223 | struct proc *p, |
2224 | uint32_t files_count, |
2225 | struct _sr_file_mappings *sr_file_mappings, |
2226 | struct vm_shared_region *shared_region); |
2227 | |
2228 | /* |
2229 | * Setup part of _shared_region_map_and_slide(). |
2230 | * It had to be broken out of _shared_region_map_and_slide() to |
2231 | * prevent compiler inlining from blowing out the stack. |
2232 | */ |
2233 | __attribute__((noinline)) |
2234 | static int |
2235 | shared_region_map_and_slide_setup( |
2236 | struct proc *p, |
2237 | uint32_t files_count, |
2238 | struct shared_file_np *files, |
2239 | uint32_t mappings_count, |
2240 | struct shared_file_mapping_slide_np *mappings, |
2241 | struct _sr_file_mappings **sr_file_mappings, |
2242 | struct vm_shared_region **shared_region_ptr, |
2243 | struct vnode *rdir_vp) |
2244 | { |
2245 | int error = 0; |
2246 | struct _sr_file_mappings *srfmp; |
2247 | uint32_t mappings_next; |
2248 | struct vnode_attr va; |
2249 | off_t fs; |
2250 | #if CONFIG_MACF |
2251 | vm_prot_t maxprot = VM_PROT_ALL; |
2252 | #endif |
2253 | uint32_t i; |
2254 | struct vm_shared_region *shared_region = NULL; |
2255 | boolean_t is_driverkit = task_is_driver(task: current_task()); |
2256 | |
2257 | SHARED_REGION_TRACE_DEBUG( |
2258 | ("shared_region: %p [%d(%s)] -> map\n" , |
2259 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2260 | proc_getpid(p), p->p_comm)); |
2261 | |
2262 | if (files_count > _SR_FILE_MAPPINGS_MAX_FILES) { |
2263 | error = E2BIG; |
2264 | goto done; |
2265 | } |
2266 | if (files_count == 0) { |
2267 | error = EINVAL; |
2268 | goto done; |
2269 | } |
2270 | *sr_file_mappings = kalloc_type(struct _sr_file_mappings, files_count, |
2271 | Z_WAITOK | Z_ZERO); |
2272 | if (*sr_file_mappings == NULL) { |
2273 | error = ENOMEM; |
2274 | goto done; |
2275 | } |
2276 | mappings_next = 0; |
2277 | for (i = 0; i < files_count; i++) { |
2278 | srfmp = &(*sr_file_mappings)[i]; |
2279 | srfmp->fd = files[i].sf_fd; |
2280 | srfmp->mappings_count = files[i].sf_mappings_count; |
2281 | srfmp->mappings = &mappings[mappings_next]; |
2282 | mappings_next += srfmp->mappings_count; |
2283 | if (mappings_next > mappings_count) { |
2284 | error = EINVAL; |
2285 | goto done; |
2286 | } |
2287 | srfmp->slide = files[i].sf_slide; |
2288 | } |
2289 | |
2290 | /* get the process's shared region (setup in vm_map_exec()) */ |
2291 | shared_region = vm_shared_region_trim_and_get(task: current_task()); |
2292 | *shared_region_ptr = shared_region; |
2293 | if (shared_region == NULL) { |
2294 | SHARED_REGION_TRACE_ERROR( |
2295 | ("shared_region: %p [%d(%s)] map(): " |
2296 | "no shared region\n" , |
2297 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2298 | proc_getpid(p), p->p_comm)); |
2299 | error = EINVAL; |
2300 | goto done; |
2301 | } |
2302 | |
2303 | /* |
2304 | * Check the shared region matches the current root |
2305 | * directory of this process. Deny the mapping to |
2306 | * avoid tainting the shared region with something that |
2307 | * doesn't quite belong into it. |
2308 | */ |
2309 | struct vnode *sr_vnode = vm_shared_region_root_dir(shared_region); |
2310 | if (sr_vnode != NULL ? rdir_vp != sr_vnode : rdir_vp != rootvnode) { |
2311 | SHARED_REGION_TRACE_ERROR( |
2312 | ("shared_region: map(%p) root_dir mismatch\n" , |
2313 | (void *)VM_KERNEL_ADDRPERM(current_thread()))); |
2314 | error = EPERM; |
2315 | goto done; |
2316 | } |
2317 | |
2318 | |
2319 | for (srfmp = &(*sr_file_mappings)[0]; |
2320 | srfmp < &(*sr_file_mappings)[files_count]; |
2321 | srfmp++) { |
2322 | if (srfmp->mappings_count == 0) { |
2323 | /* no mappings here... */ |
2324 | continue; |
2325 | } |
2326 | |
2327 | /* |
2328 | * A file descriptor of -1 is used to indicate that the data |
2329 | * to be put in the shared region for this mapping comes directly |
2330 | * from the processes address space. Ensure we have proper alignments. |
2331 | */ |
2332 | if (srfmp->fd == -1) { |
2333 | /* only allow one mapping per fd */ |
2334 | if (srfmp->mappings_count > 1) { |
2335 | SHARED_REGION_TRACE_ERROR( |
2336 | ("shared_region: %p [%d(%s)] map data >1 mapping\n" , |
2337 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2338 | proc_getpid(p), p->p_comm)); |
2339 | error = EINVAL; |
2340 | goto done; |
2341 | } |
2342 | |
2343 | /* |
2344 | * The destination address and size must be page aligned. |
2345 | */ |
2346 | struct shared_file_mapping_slide_np *mapping = &srfmp->mappings[0]; |
2347 | mach_vm_address_t dest_addr = mapping->sms_address; |
2348 | mach_vm_size_t map_size = mapping->sms_size; |
2349 | if (!vm_map_page_aligned(offset: dest_addr, mask: vm_map_page_mask(map: current_map()))) { |
2350 | SHARED_REGION_TRACE_ERROR( |
2351 | ("shared_region: %p [%d(%s)] map data destination 0x%llx not aligned\n" , |
2352 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2353 | proc_getpid(p), p->p_comm, dest_addr)); |
2354 | error = EINVAL; |
2355 | goto done; |
2356 | } |
2357 | if (!vm_map_page_aligned(offset: map_size, mask: vm_map_page_mask(map: current_map()))) { |
2358 | SHARED_REGION_TRACE_ERROR( |
2359 | ("shared_region: %p [%d(%s)] map data size 0x%llx not aligned\n" , |
2360 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2361 | proc_getpid(p), p->p_comm, map_size)); |
2362 | error = EINVAL; |
2363 | goto done; |
2364 | } |
2365 | continue; |
2366 | } |
2367 | |
2368 | /* get file structure from file descriptor */ |
2369 | error = fp_get_ftype(p, fd: srfmp->fd, ftype: DTYPE_VNODE, EINVAL, fpp: &srfmp->fp); |
2370 | if (error) { |
2371 | SHARED_REGION_TRACE_ERROR( |
2372 | ("shared_region: %p [%d(%s)] map: " |
2373 | "fd=%d lookup failed (error=%d)\n" , |
2374 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2375 | proc_getpid(p), p->p_comm, srfmp->fd, error)); |
2376 | goto done; |
2377 | } |
2378 | |
2379 | /* we need at least read permission on the file */ |
2380 | if (!(srfmp->fp->fp_glob->fg_flag & FREAD)) { |
2381 | SHARED_REGION_TRACE_ERROR( |
2382 | ("shared_region: %p [%d(%s)] map: " |
2383 | "fd=%d not readable\n" , |
2384 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2385 | proc_getpid(p), p->p_comm, srfmp->fd)); |
2386 | error = EPERM; |
2387 | goto done; |
2388 | } |
2389 | |
2390 | /* get vnode from file structure */ |
2391 | error = vnode_getwithref(vp: (vnode_t)fp_get_data(fp: srfmp->fp)); |
2392 | if (error) { |
2393 | SHARED_REGION_TRACE_ERROR( |
2394 | ("shared_region: %p [%d(%s)] map: " |
2395 | "fd=%d getwithref failed (error=%d)\n" , |
2396 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2397 | proc_getpid(p), p->p_comm, srfmp->fd, error)); |
2398 | goto done; |
2399 | } |
2400 | srfmp->vp = (struct vnode *)fp_get_data(fp: srfmp->fp); |
2401 | |
2402 | /* make sure the vnode is a regular file */ |
2403 | if (srfmp->vp->v_type != VREG) { |
2404 | SHARED_REGION_TRACE_ERROR( |
2405 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " |
2406 | "not a file (type=%d)\n" , |
2407 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2408 | proc_getpid(p), p->p_comm, |
2409 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), |
2410 | srfmp->vp->v_name, srfmp->vp->v_type)); |
2411 | error = EINVAL; |
2412 | goto done; |
2413 | } |
2414 | |
2415 | #if CONFIG_MACF |
2416 | /* pass in 0 for the offset argument because AMFI does not need the offset |
2417 | * of the shared cache */ |
2418 | error = mac_file_check_mmap(cred: vfs_context_ucred(ctx: vfs_context_current()), |
2419 | fg: srfmp->fp->fp_glob, VM_PROT_ALL, MAP_FILE | MAP_PRIVATE | MAP_FIXED, file_pos: 0, maxprot: &maxprot); |
2420 | if (error) { |
2421 | goto done; |
2422 | } |
2423 | #endif /* MAC */ |
2424 | |
2425 | #if XNU_TARGET_OS_OSX && defined(__arm64__) |
2426 | /* |
2427 | * Check if the shared cache is in the trust cache; |
2428 | * if so, we can skip the root ownership check. |
2429 | */ |
2430 | #if DEVELOPMENT || DEBUG |
2431 | /* |
2432 | * Skip both root ownership and trust cache check if |
2433 | * enforcement is disabled. |
2434 | */ |
2435 | if (!cs_system_enforcement()) { |
2436 | goto after_root_check; |
2437 | } |
2438 | #endif /* DEVELOPMENT || DEBUG */ |
2439 | struct cs_blob *blob = csvnode_get_blob(srfmp->vp, 0); |
2440 | if (blob == NULL) { |
2441 | SHARED_REGION_TRACE_ERROR( |
2442 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " |
2443 | "missing CS blob\n" , |
2444 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2445 | proc_getpid(p), p->p_comm, |
2446 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), |
2447 | srfmp->vp->v_name)); |
2448 | goto root_check; |
2449 | } |
2450 | const uint8_t *cdhash = csblob_get_cdhash(blob); |
2451 | if (cdhash == NULL) { |
2452 | SHARED_REGION_TRACE_ERROR( |
2453 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " |
2454 | "missing cdhash\n" , |
2455 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2456 | proc_getpid(p), p->p_comm, |
2457 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), |
2458 | srfmp->vp->v_name)); |
2459 | goto root_check; |
2460 | } |
2461 | |
2462 | bool in_trust_cache = false; |
2463 | TrustCacheQueryToken_t qt; |
2464 | if (query_trust_cache(query_type: kTCQueryTypeAll, cdhash, query_token: &qt) == KERN_SUCCESS) { |
2465 | TCType_t tc_type = kTCTypeInvalid; |
2466 | TCReturn_t tc_ret = amfi->TrustCache.queryGetTCType(&qt, &tc_type); |
2467 | in_trust_cache = (tc_ret.error == kTCReturnSuccess && |
2468 | (tc_type == kTCTypeCryptex1BootOS || |
2469 | tc_type == kTCTypeStatic || |
2470 | tc_type == kTCTypeEngineering)); |
2471 | } |
2472 | if (!in_trust_cache) { |
2473 | SHARED_REGION_TRACE_ERROR( |
2474 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " |
2475 | "not in trust cache\n" , |
2476 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2477 | proc_getpid(p), p->p_comm, |
2478 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), |
2479 | srfmp->vp->v_name)); |
2480 | goto root_check; |
2481 | } |
2482 | goto after_root_check; |
2483 | root_check: |
2484 | #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */ |
2485 | |
2486 | /* The shared cache file must be owned by root */ |
2487 | VATTR_INIT(&va); |
2488 | VATTR_WANTED(&va, va_uid); |
2489 | error = vnode_getattr(vp: srfmp->vp, vap: &va, ctx: vfs_context_current()); |
2490 | if (error) { |
2491 | SHARED_REGION_TRACE_ERROR( |
2492 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " |
2493 | "vnode_getattr(%p) failed (error=%d)\n" , |
2494 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2495 | proc_getpid(p), p->p_comm, |
2496 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), |
2497 | srfmp->vp->v_name, |
2498 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), |
2499 | error)); |
2500 | goto done; |
2501 | } |
2502 | if (va.va_uid != 0) { |
2503 | SHARED_REGION_TRACE_ERROR( |
2504 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " |
2505 | "owned by uid=%d instead of 0\n" , |
2506 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2507 | proc_getpid(p), p->p_comm, |
2508 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), |
2509 | srfmp->vp->v_name, va.va_uid)); |
2510 | error = EPERM; |
2511 | goto done; |
2512 | } |
2513 | |
2514 | #if XNU_TARGET_OS_OSX && defined(__arm64__) |
2515 | after_root_check: |
2516 | #endif /* XNU_TARGET_OS_OSX && defined(__arm64__) */ |
2517 | |
2518 | #if CONFIG_CSR |
2519 | if (csr_check(CSR_ALLOW_UNRESTRICTED_FS) != 0) { |
2520 | VATTR_INIT(&va); |
2521 | VATTR_WANTED(&va, va_flags); |
2522 | error = vnode_getattr(vp: srfmp->vp, vap: &va, ctx: vfs_context_current()); |
2523 | if (error) { |
2524 | SHARED_REGION_TRACE_ERROR( |
2525 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " |
2526 | "vnode_getattr(%p) failed (error=%d)\n" , |
2527 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2528 | proc_getpid(p), p->p_comm, |
2529 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), |
2530 | srfmp->vp->v_name, |
2531 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), |
2532 | error)); |
2533 | goto done; |
2534 | } |
2535 | |
2536 | if (!(va.va_flags & SF_RESTRICTED)) { |
2537 | /* |
2538 | * CSR is not configured in CSR_ALLOW_UNRESTRICTED_FS mode, and |
2539 | * the shared cache file is NOT SIP-protected, so reject the |
2540 | * mapping request |
2541 | */ |
2542 | SHARED_REGION_TRACE_ERROR( |
2543 | ("shared_region: %p [%d(%s)] map(%p:'%s'), " |
2544 | "vnode is not SIP-protected. \n" , |
2545 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2546 | proc_getpid(p), p->p_comm, |
2547 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), |
2548 | srfmp->vp->v_name)); |
2549 | error = EPERM; |
2550 | goto done; |
2551 | } |
2552 | } |
2553 | #else /* CONFIG_CSR */ |
2554 | |
2555 | /* |
2556 | * Devices without SIP/ROSP need to make sure that the shared cache |
2557 | * is either on the root volume or in the preboot cryptex volume. |
2558 | */ |
2559 | assert(rdir_vp != NULL); |
2560 | if (srfmp->vp->v_mount != rdir_vp->v_mount) { |
2561 | vnode_t preboot_vp = NULL; |
2562 | #if XNU_TARGET_OS_OSX |
2563 | #define PREBOOT_CRYPTEX_PATH "/System/Volumes/Preboot/Cryptexes" |
2564 | #else |
2565 | #define PREBOOT_CRYPTEX_PATH "/private/preboot/Cryptexes" |
2566 | #endif |
2567 | error = vnode_lookup(PREBOOT_CRYPTEX_PATH, 0, &preboot_vp, vfs_context_current()); |
2568 | if (error || srfmp->vp->v_mount != preboot_vp->v_mount) { |
2569 | SHARED_REGION_TRACE_ERROR( |
2570 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " |
2571 | "not on process' root volume nor preboot volume\n" , |
2572 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2573 | proc_getpid(p), p->p_comm, |
2574 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), |
2575 | srfmp->vp->v_name)); |
2576 | error = EPERM; |
2577 | if (preboot_vp) { |
2578 | (void)vnode_put(preboot_vp); |
2579 | } |
2580 | goto done; |
2581 | } else if (preboot_vp) { |
2582 | (void)vnode_put(preboot_vp); |
2583 | } |
2584 | } |
2585 | #endif /* CONFIG_CSR */ |
2586 | |
2587 | if (scdir_enforce) { |
2588 | char **expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path; |
2589 | struct vnode *scdir_vp = NULL; |
2590 | for (expected_scdir_path = is_driverkit ? driverkit_scdir_path : scdir_path; |
2591 | *expected_scdir_path != NULL; |
2592 | expected_scdir_path++) { |
2593 | /* get vnode for expected_scdir_path */ |
2594 | error = vnode_lookup(path: *expected_scdir_path, flags: 0, vpp: &scdir_vp, ctx: vfs_context_current()); |
2595 | if (error) { |
2596 | SHARED_REGION_TRACE_ERROR( |
2597 | ("shared_region: %p [%d(%s)]: " |
2598 | "vnode_lookup(%s) failed (error=%d)\n" , |
2599 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2600 | proc_getpid(p), p->p_comm, |
2601 | *expected_scdir_path, error)); |
2602 | continue; |
2603 | } |
2604 | |
2605 | /* check if parent is scdir_vp */ |
2606 | assert(scdir_vp != NULL); |
2607 | if (vnode_parent(vp: srfmp->vp) == scdir_vp) { |
2608 | (void)vnode_put(vp: scdir_vp); |
2609 | scdir_vp = NULL; |
2610 | goto scdir_ok; |
2611 | } |
2612 | (void)vnode_put(vp: scdir_vp); |
2613 | scdir_vp = NULL; |
2614 | } |
2615 | /* nothing matches */ |
2616 | SHARED_REGION_TRACE_ERROR( |
2617 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " |
2618 | "shared cache file not in expected directory\n" , |
2619 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2620 | proc_getpid(p), p->p_comm, |
2621 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), |
2622 | srfmp->vp->v_name)); |
2623 | error = EPERM; |
2624 | goto done; |
2625 | } |
2626 | scdir_ok: |
2627 | |
2628 | /* get vnode size */ |
2629 | error = vnode_size(srfmp->vp, &fs, vfs_context_current()); |
2630 | if (error) { |
2631 | SHARED_REGION_TRACE_ERROR( |
2632 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " |
2633 | "vnode_size(%p) failed (error=%d)\n" , |
2634 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2635 | proc_getpid(p), p->p_comm, |
2636 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), |
2637 | srfmp->vp->v_name, |
2638 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), error)); |
2639 | goto done; |
2640 | } |
2641 | srfmp->file_size = fs; |
2642 | |
2643 | /* get the file's memory object handle */ |
2644 | srfmp->file_control = ubc_getobject(srfmp->vp, UBC_HOLDOBJECT); |
2645 | if (srfmp->file_control == MEMORY_OBJECT_CONTROL_NULL) { |
2646 | SHARED_REGION_TRACE_ERROR( |
2647 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " |
2648 | "no memory object\n" , |
2649 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2650 | proc_getpid(p), p->p_comm, |
2651 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), |
2652 | srfmp->vp->v_name)); |
2653 | error = EINVAL; |
2654 | goto done; |
2655 | } |
2656 | |
2657 | /* check that the mappings are properly covered by code signatures */ |
2658 | if (!cs_system_enforcement()) { |
2659 | /* code signing is not enforced: no need to check */ |
2660 | } else { |
2661 | for (i = 0; i < srfmp->mappings_count; i++) { |
2662 | if (srfmp->mappings[i].sms_init_prot & VM_PROT_ZF) { |
2663 | /* zero-filled mapping: not backed by the file */ |
2664 | continue; |
2665 | } |
2666 | if (ubc_cs_is_range_codesigned(srfmp->vp, |
2667 | srfmp->mappings[i].sms_file_offset, |
2668 | srfmp->mappings[i].sms_size)) { |
2669 | /* this mapping is fully covered by code signatures */ |
2670 | continue; |
2671 | } |
2672 | SHARED_REGION_TRACE_ERROR( |
2673 | ("shared_region: %p [%d(%s)] map(%p:'%s'): " |
2674 | "mapping #%d/%d [0x%llx:0x%llx:0x%llx:0x%x:0x%x] " |
2675 | "is not code-signed\n" , |
2676 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2677 | proc_getpid(p), p->p_comm, |
2678 | (void *)VM_KERNEL_ADDRPERM(srfmp->vp), |
2679 | srfmp->vp->v_name, |
2680 | i, srfmp->mappings_count, |
2681 | srfmp->mappings[i].sms_address, |
2682 | srfmp->mappings[i].sms_size, |
2683 | srfmp->mappings[i].sms_file_offset, |
2684 | srfmp->mappings[i].sms_max_prot, |
2685 | srfmp->mappings[i].sms_init_prot)); |
2686 | error = EINVAL; |
2687 | goto done; |
2688 | } |
2689 | } |
2690 | } |
2691 | done: |
2692 | if (error != 0) { |
2693 | shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings: *sr_file_mappings, shared_region); |
2694 | *sr_file_mappings = NULL; |
2695 | *shared_region_ptr = NULL; |
2696 | } |
2697 | return error; |
2698 | } |
2699 | |
2700 | /* |
2701 | * shared_region_map_np() |
2702 | * |
2703 | * This system call is intended for dyld. |
2704 | * |
2705 | * dyld uses this to map a shared cache file into a shared region. |
2706 | * This is usually done only the first time a shared cache is needed. |
2707 | * Subsequent processes will just use the populated shared region without |
2708 | * requiring any further setup. |
2709 | */ |
2710 | static int |
2711 | _shared_region_map_and_slide( |
2712 | struct proc *p, |
2713 | uint32_t files_count, |
2714 | struct shared_file_np *files, |
2715 | uint32_t mappings_count, |
2716 | struct shared_file_mapping_slide_np *mappings) |
2717 | { |
2718 | int error = 0; |
2719 | kern_return_t kr = KERN_SUCCESS; |
2720 | struct _sr_file_mappings *sr_file_mappings = NULL; |
2721 | struct vnode *rdir_vp = NULL; |
2722 | struct vm_shared_region *shared_region = NULL; |
2723 | |
2724 | /* |
2725 | * Get a reference to the current proc's root dir. |
2726 | * Need this to prevent racing with chroot. |
2727 | */ |
2728 | proc_fdlock(p); |
2729 | rdir_vp = p->p_fd.fd_rdir; |
2730 | if (rdir_vp == NULL) { |
2731 | rdir_vp = rootvnode; |
2732 | } |
2733 | assert(rdir_vp != NULL); |
2734 | vnode_get(rdir_vp); |
2735 | proc_fdunlock(p); |
2736 | |
2737 | /* |
2738 | * Turn files, mappings into sr_file_mappings and other setup. |
2739 | */ |
2740 | error = shared_region_map_and_slide_setup(p, files_count, |
2741 | files, mappings_count, mappings, |
2742 | sr_file_mappings: &sr_file_mappings, shared_region_ptr: &shared_region, rdir_vp); |
2743 | if (error != 0) { |
2744 | vnode_put(vp: rdir_vp); |
2745 | return error; |
2746 | } |
2747 | |
2748 | /* map the file(s) into that shared region's submap */ |
2749 | kr = vm_shared_region_map_file(shared_region, sr_mappings_count: files_count, sr_mappings: sr_file_mappings); |
2750 | if (kr != KERN_SUCCESS) { |
2751 | SHARED_REGION_TRACE_ERROR(("shared_region: %p [%d(%s)] map(): " |
2752 | "vm_shared_region_map_file() failed kr=0x%x\n" , |
2753 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2754 | proc_getpid(p), p->p_comm, kr)); |
2755 | } |
2756 | |
2757 | /* convert kern_return_t to errno */ |
2758 | switch (kr) { |
2759 | case KERN_SUCCESS: |
2760 | error = 0; |
2761 | break; |
2762 | case KERN_INVALID_ADDRESS: |
2763 | error = EFAULT; |
2764 | break; |
2765 | case KERN_PROTECTION_FAILURE: |
2766 | error = EPERM; |
2767 | break; |
2768 | case KERN_NO_SPACE: |
2769 | error = ENOMEM; |
2770 | break; |
2771 | case KERN_FAILURE: |
2772 | case KERN_INVALID_ARGUMENT: |
2773 | default: |
2774 | error = EINVAL; |
2775 | break; |
2776 | } |
2777 | |
2778 | /* |
2779 | * Mark that this process is now using split libraries. |
2780 | */ |
2781 | if (error == 0 && (p->p_flag & P_NOSHLIB)) { |
2782 | OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag); |
2783 | } |
2784 | |
2785 | vnode_put(vp: rdir_vp); |
2786 | shared_region_map_and_slide_cleanup(p, files_count, sr_file_mappings, shared_region); |
2787 | |
2788 | SHARED_REGION_TRACE_DEBUG( |
2789 | ("shared_region: %p [%d(%s)] <- map\n" , |
2790 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2791 | proc_getpid(p), p->p_comm)); |
2792 | |
2793 | return error; |
2794 | } |
2795 | |
2796 | /* |
2797 | * Clean up part of _shared_region_map_and_slide() |
2798 | * It had to be broken out of _shared_region_map_and_slide() to |
2799 | * prevent compiler inlining from blowing out the stack. |
2800 | */ |
2801 | __attribute__((noinline)) |
2802 | static void |
2803 | shared_region_map_and_slide_cleanup( |
2804 | struct proc *p, |
2805 | uint32_t files_count, |
2806 | struct _sr_file_mappings *sr_file_mappings, |
2807 | struct vm_shared_region *shared_region) |
2808 | { |
2809 | struct _sr_file_mappings *srfmp; |
2810 | struct vnode_attr va; |
2811 | |
2812 | if (sr_file_mappings != NULL) { |
2813 | for (srfmp = &sr_file_mappings[0]; srfmp < &sr_file_mappings[files_count]; srfmp++) { |
2814 | if (srfmp->vp != NULL) { |
2815 | vnode_lock_spin(srfmp->vp); |
2816 | srfmp->vp->v_flag |= VSHARED_DYLD; |
2817 | vnode_unlock(srfmp->vp); |
2818 | |
2819 | /* update the vnode's access time */ |
2820 | if (!(vnode_vfsvisflags(srfmp->vp) & MNT_NOATIME)) { |
2821 | VATTR_INIT(&va); |
2822 | nanotime(ts: &va.va_access_time); |
2823 | VATTR_SET_ACTIVE(&va, va_access_time); |
2824 | vnode_setattr(vp: srfmp->vp, vap: &va, ctx: vfs_context_current()); |
2825 | } |
2826 | |
2827 | #if NAMEDSTREAMS |
2828 | /* |
2829 | * If the shared cache is compressed, it may |
2830 | * have a namedstream vnode instantiated for |
2831 | * for it. That namedstream vnode will also |
2832 | * have to be marked with VSHARED_DYLD. |
2833 | */ |
2834 | if (vnode_hasnamedstreams(srfmp->vp)) { |
2835 | vnode_t svp; |
2836 | if (vnode_getnamedstream(srfmp->vp, &svp, XATTR_RESOURCEFORK_NAME, |
2837 | NS_OPEN, 0, vfs_context_kernel()) == 0) { |
2838 | vnode_lock_spin(svp); |
2839 | svp->v_flag |= VSHARED_DYLD; |
2840 | vnode_unlock(svp); |
2841 | vnode_put(vp: svp); |
2842 | } |
2843 | } |
2844 | #endif /* NAMEDSTREAMS */ |
2845 | /* |
2846 | * release the vnode... |
2847 | * ubc_map() still holds it for us in the non-error case |
2848 | */ |
2849 | (void) vnode_put(vp: srfmp->vp); |
2850 | srfmp->vp = NULL; |
2851 | } |
2852 | if (srfmp->fp != NULL) { |
2853 | /* release the file descriptor */ |
2854 | fp_drop(p, fd: srfmp->fd, fp: srfmp->fp, locked: 0); |
2855 | srfmp->fp = NULL; |
2856 | } |
2857 | } |
2858 | kfree_type(struct _sr_file_mappings, files_count, sr_file_mappings); |
2859 | } |
2860 | |
2861 | if (shared_region != NULL) { |
2862 | vm_shared_region_deallocate(shared_region); |
2863 | } |
2864 | } |
2865 | |
2866 | |
2867 | /* |
2868 | * For each file mapped, we may have mappings for: |
2869 | * TEXT, EXECUTE, LINKEDIT, DATA_CONST, __AUTH, DATA |
2870 | * so let's round up to 8 mappings per file. |
2871 | */ |
2872 | #define SFM_MAX (_SR_FILE_MAPPINGS_MAX_FILES * 8) /* max mapping structs allowed to pass in */ |
2873 | |
2874 | /* |
2875 | * This is the new interface for setting up shared region mappings. |
2876 | * |
2877 | * The slide used for shared regions setup using this interface is done differently |
2878 | * from the old interface. The slide value passed in the shared_files_np represents |
2879 | * a max value. The kernel will choose a random value based on that, then use it |
2880 | * for all shared regions. |
2881 | */ |
2882 | #if defined (__x86_64__) |
2883 | #define SLIDE_AMOUNT_MASK ~FOURK_PAGE_MASK |
2884 | #else |
2885 | #define SLIDE_AMOUNT_MASK ~SIXTEENK_PAGE_MASK |
2886 | #endif |
2887 | |
2888 | int |
2889 | shared_region_map_and_slide_2_np( |
2890 | struct proc *p, |
2891 | struct shared_region_map_and_slide_2_np_args *uap, |
2892 | __unused int *retvalp) |
2893 | { |
2894 | unsigned int files_count; |
2895 | struct shared_file_np *shared_files = NULL; |
2896 | unsigned int mappings_count; |
2897 | struct shared_file_mapping_slide_np *mappings = NULL; |
2898 | kern_return_t kr = KERN_SUCCESS; |
2899 | |
2900 | files_count = uap->files_count; |
2901 | mappings_count = uap->mappings_count; |
2902 | |
2903 | if (files_count == 0) { |
2904 | SHARED_REGION_TRACE_INFO( |
2905 | ("shared_region: %p [%d(%s)] map(): " |
2906 | "no files\n" , |
2907 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2908 | proc_getpid(p), p->p_comm)); |
2909 | kr = 0; /* no files to map: we're done ! */ |
2910 | goto done; |
2911 | } else if (files_count <= _SR_FILE_MAPPINGS_MAX_FILES) { |
2912 | shared_files = kalloc_data(files_count * sizeof(shared_files[0]), Z_WAITOK); |
2913 | if (shared_files == NULL) { |
2914 | kr = KERN_RESOURCE_SHORTAGE; |
2915 | goto done; |
2916 | } |
2917 | } else { |
2918 | SHARED_REGION_TRACE_ERROR( |
2919 | ("shared_region: %p [%d(%s)] map(): " |
2920 | "too many files (%d) max %d\n" , |
2921 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2922 | proc_getpid(p), p->p_comm, |
2923 | files_count, _SR_FILE_MAPPINGS_MAX_FILES)); |
2924 | kr = KERN_FAILURE; |
2925 | goto done; |
2926 | } |
2927 | |
2928 | if (mappings_count == 0) { |
2929 | SHARED_REGION_TRACE_INFO( |
2930 | ("shared_region: %p [%d(%s)] map(): " |
2931 | "no mappings\n" , |
2932 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2933 | proc_getpid(p), p->p_comm)); |
2934 | kr = 0; /* no mappings: we're done ! */ |
2935 | goto done; |
2936 | } else if (mappings_count <= SFM_MAX) { |
2937 | mappings = kalloc_data(mappings_count * sizeof(mappings[0]), Z_WAITOK); |
2938 | if (mappings == NULL) { |
2939 | kr = KERN_RESOURCE_SHORTAGE; |
2940 | goto done; |
2941 | } |
2942 | } else { |
2943 | SHARED_REGION_TRACE_ERROR( |
2944 | ("shared_region: %p [%d(%s)] map(): " |
2945 | "too many mappings (%d) max %d\n" , |
2946 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2947 | proc_getpid(p), p->p_comm, |
2948 | mappings_count, SFM_MAX)); |
2949 | kr = KERN_FAILURE; |
2950 | goto done; |
2951 | } |
2952 | |
2953 | kr = shared_region_copyin(p, user_addr: uap->files, count: files_count, element_size: sizeof(shared_files[0]), kernel_data: shared_files); |
2954 | if (kr != KERN_SUCCESS) { |
2955 | goto done; |
2956 | } |
2957 | |
2958 | kr = shared_region_copyin(p, user_addr: uap->mappings, count: mappings_count, element_size: sizeof(mappings[0]), kernel_data: mappings); |
2959 | if (kr != KERN_SUCCESS) { |
2960 | goto done; |
2961 | } |
2962 | |
2963 | uint32_t max_slide = shared_files[0].sf_slide; |
2964 | uint32_t random_val; |
2965 | uint32_t slide_amount; |
2966 | |
2967 | if (max_slide != 0) { |
2968 | read_random(buffer: &random_val, numBytes: sizeof random_val); |
2969 | slide_amount = ((random_val % max_slide) & SLIDE_AMOUNT_MASK); |
2970 | } else { |
2971 | slide_amount = 0; |
2972 | } |
2973 | #if DEVELOPMENT || DEBUG |
2974 | extern bool bootarg_disable_aslr; |
2975 | if (bootarg_disable_aslr) { |
2976 | slide_amount = 0; |
2977 | } |
2978 | #endif /* DEVELOPMENT || DEBUG */ |
2979 | |
2980 | /* |
2981 | * Fix up the mappings to reflect the desired slide. |
2982 | */ |
2983 | unsigned int f; |
2984 | unsigned int m = 0; |
2985 | unsigned int i; |
2986 | for (f = 0; f < files_count; ++f) { |
2987 | shared_files[f].sf_slide = slide_amount; |
2988 | for (i = 0; i < shared_files[f].sf_mappings_count; ++i, ++m) { |
2989 | if (m >= mappings_count) { |
2990 | SHARED_REGION_TRACE_ERROR( |
2991 | ("shared_region: %p [%d(%s)] map(): " |
2992 | "mapping count argument was too small\n" , |
2993 | (void *)VM_KERNEL_ADDRPERM(current_thread()), |
2994 | proc_getpid(p), p->p_comm)); |
2995 | kr = KERN_FAILURE; |
2996 | goto done; |
2997 | } |
2998 | mappings[m].sms_address += slide_amount; |
2999 | if (mappings[m].sms_slide_size != 0) { |
3000 | mappings[m].sms_slide_start += slide_amount; |
3001 | } |
3002 | } |
3003 | } |
3004 | |
3005 | kr = _shared_region_map_and_slide(p, files_count, files: shared_files, mappings_count, mappings); |
3006 | done: |
3007 | kfree_data(shared_files, files_count * sizeof(shared_files[0])); |
3008 | kfree_data(mappings, mappings_count * sizeof(mappings[0])); |
3009 | return kr; |
3010 | } |
3011 | |
3012 | /* |
3013 | * A syscall for dyld to use to map data pages that need load time relocation fixups. |
3014 | * The fixups are performed by a custom pager during page-in, so the pages still appear |
3015 | * "clean" and hence are easily discarded under memory pressure. They can be re-paged-in |
3016 | * on demand later, all w/o using the compressor. |
3017 | * |
3018 | * Note these page are treated as MAP_PRIVATE. So if the application dirties any pages while |
3019 | * running, they are COW'd as normal. |
3020 | */ |
3021 | int |
3022 | map_with_linking_np( |
3023 | struct proc *p, |
3024 | struct map_with_linking_np_args *uap, |
3025 | __unused int *retvalp) |
3026 | { |
3027 | uint32_t region_count; |
3028 | uint32_t r; |
3029 | struct mwl_region *regions = NULL; |
3030 | struct mwl_region *rp; |
3031 | uint32_t link_info_size; |
3032 | void *link_info = NULL; /* starts with a struct mwl_info_hdr */ |
3033 | struct mwl_info_hdr *info_hdr = NULL; |
3034 | uint64_t binds_size; |
3035 | int fd; |
3036 | struct fileproc *fp = NULL; |
3037 | struct vnode *vp = NULL; |
3038 | size_t file_size; |
3039 | off_t fs; |
3040 | struct vnode_attr va; |
3041 | memory_object_control_t file_control = NULL; |
3042 | int error; |
3043 | kern_return_t kr = KERN_SUCCESS; |
3044 | |
3045 | /* |
3046 | * Check if dyld has told us it finished with this call. |
3047 | */ |
3048 | if (p->p_disallow_map_with_linking) { |
3049 | printf("%s: [%d(%s)]: map__with_linking() was disabled\n" , |
3050 | __func__, proc_getpid(p), p->p_comm); |
3051 | kr = KERN_FAILURE; |
3052 | goto done; |
3053 | } |
3054 | |
3055 | /* |
3056 | * First we do some sanity checking on what dyld has passed us. |
3057 | */ |
3058 | region_count = uap->region_count; |
3059 | link_info_size = uap->link_info_size; |
3060 | if (region_count == 0) { |
3061 | printf("%s: [%d(%s)]: region_count == 0\n" , |
3062 | __func__, proc_getpid(p), p->p_comm); |
3063 | kr = KERN_FAILURE; |
3064 | goto done; |
3065 | } |
3066 | if (region_count > MWL_MAX_REGION_COUNT) { |
3067 | printf("%s: [%d(%s)]: region_count too big %d\n" , |
3068 | __func__, proc_getpid(p), p->p_comm, region_count); |
3069 | kr = KERN_FAILURE; |
3070 | goto done; |
3071 | } |
3072 | |
3073 | if (link_info_size <= MWL_MIN_LINK_INFO_SIZE) { |
3074 | printf("%s: [%d(%s)]: link_info_size too small\n" , |
3075 | __func__, proc_getpid(p), p->p_comm); |
3076 | kr = KERN_FAILURE; |
3077 | goto done; |
3078 | } |
3079 | if (link_info_size >= MWL_MAX_LINK_INFO_SIZE) { |
3080 | printf("%s: [%d(%s)]: link_info_size too big %d\n" , |
3081 | __func__, proc_getpid(p), p->p_comm, link_info_size); |
3082 | kr = KERN_FAILURE; |
3083 | goto done; |
3084 | } |
3085 | |
3086 | /* |
3087 | * Allocate and copyin the regions and link info |
3088 | */ |
3089 | regions = kalloc_data(region_count * sizeof(regions[0]), Z_WAITOK); |
3090 | if (regions == NULL) { |
3091 | printf("%s: [%d(%s)]: failed to allocate regions\n" , |
3092 | __func__, proc_getpid(p), p->p_comm); |
3093 | kr = KERN_RESOURCE_SHORTAGE; |
3094 | goto done; |
3095 | } |
3096 | kr = shared_region_copyin(p, user_addr: uap->regions, count: region_count, element_size: sizeof(regions[0]), kernel_data: regions); |
3097 | if (kr != KERN_SUCCESS) { |
3098 | printf("%s: [%d(%s)]: failed to copyin regions kr=%d\n" , |
3099 | __func__, proc_getpid(p), p->p_comm, kr); |
3100 | goto done; |
3101 | } |
3102 | |
3103 | link_info = kalloc_data(link_info_size, Z_WAITOK); |
3104 | if (link_info == NULL) { |
3105 | printf("%s: [%d(%s)]: failed to allocate link_info\n" , |
3106 | __func__, proc_getpid(p), p->p_comm); |
3107 | kr = KERN_RESOURCE_SHORTAGE; |
3108 | goto done; |
3109 | } |
3110 | kr = shared_region_copyin(p, user_addr: uap->link_info, count: 1, element_size: link_info_size, kernel_data: link_info); |
3111 | if (kr != KERN_SUCCESS) { |
3112 | printf("%s: [%d(%s)]: failed to copyin link_info kr=%d\n" , |
3113 | __func__, proc_getpid(p), p->p_comm, kr); |
3114 | goto done; |
3115 | } |
3116 | |
3117 | /* |
3118 | * Do some verification the data structures. |
3119 | */ |
3120 | info_hdr = (struct mwl_info_hdr *)link_info; |
3121 | if (info_hdr->mwli_version != MWL_INFO_VERS) { |
3122 | printf("%s: [%d(%s)]: unrecognized mwli_version=%d\n" , |
3123 | __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_version); |
3124 | kr = KERN_FAILURE; |
3125 | goto done; |
3126 | } |
3127 | |
3128 | if (info_hdr->mwli_binds_offset > link_info_size) { |
3129 | printf("%s: [%d(%s)]: mwli_binds_offset too large %d\n" , |
3130 | __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_offset); |
3131 | kr = KERN_FAILURE; |
3132 | goto done; |
3133 | } |
3134 | |
3135 | /* some older devs have s/w page size > h/w page size, no need to support them */ |
3136 | if (info_hdr->mwli_page_size != PAGE_SIZE) { |
3137 | /* no printf, since this is expected on some devices */ |
3138 | kr = KERN_INVALID_ARGUMENT; |
3139 | goto done; |
3140 | } |
3141 | |
3142 | binds_size = (uint64_t)info_hdr->mwli_binds_count * |
3143 | ((info_hdr->mwli_pointer_format == DYLD_CHAINED_PTR_32) ? 4 : 8); |
3144 | if (binds_size > link_info_size - info_hdr->mwli_binds_offset) { |
3145 | printf("%s: [%d(%s)]: mwli_binds_count too large %d\n" , |
3146 | __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_binds_count); |
3147 | kr = KERN_FAILURE; |
3148 | goto done; |
3149 | } |
3150 | |
3151 | if (info_hdr->mwli_chains_offset > link_info_size) { |
3152 | printf("%s: [%d(%s)]: mwli_chains_offset too large %d\n" , |
3153 | __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_offset); |
3154 | kr = KERN_FAILURE; |
3155 | goto done; |
3156 | } |
3157 | |
3158 | |
3159 | /* |
3160 | * Ensure the chained starts in the link info and make sure the |
3161 | * segment info offsets are within bounds. |
3162 | */ |
3163 | if (info_hdr->mwli_chains_size < sizeof(struct dyld_chained_starts_in_image)) { |
3164 | printf("%s: [%d(%s)]: mwli_chains_size too small %d\n" , |
3165 | __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size); |
3166 | kr = KERN_FAILURE; |
3167 | goto done; |
3168 | } |
3169 | if (info_hdr->mwli_chains_size > link_info_size - info_hdr->mwli_chains_offset) { |
3170 | printf("%s: [%d(%s)]: mwli_chains_size too large %d\n" , |
3171 | __func__, proc_getpid(p), p->p_comm, info_hdr->mwli_chains_size); |
3172 | kr = KERN_FAILURE; |
3173 | goto done; |
3174 | } |
3175 | |
3176 | /* Note that more verification of offsets is done in the pager itself */ |
3177 | |
3178 | /* |
3179 | * Ensure we've only been given one FD and verify valid protections. |
3180 | */ |
3181 | fd = regions[0].mwlr_fd; |
3182 | for (r = 0; r < region_count; ++r) { |
3183 | if (regions[r].mwlr_fd != fd) { |
3184 | printf("%s: [%d(%s)]: mwlr_fd mismatch %d and %d\n" , |
3185 | __func__, proc_getpid(p), p->p_comm, fd, regions[r].mwlr_fd); |
3186 | kr = KERN_FAILURE; |
3187 | goto done; |
3188 | } |
3189 | |
3190 | /* |
3191 | * Only allow data mappings and not zero fill. Permit TPRO |
3192 | * mappings only when VM_PROT_READ | VM_PROT_WRITE. |
3193 | */ |
3194 | if (regions[r].mwlr_protections & VM_PROT_EXECUTE) { |
3195 | printf("%s: [%d(%s)]: mwlr_protections EXECUTE not allowed\n" , |
3196 | __func__, proc_getpid(p), p->p_comm); |
3197 | kr = KERN_FAILURE; |
3198 | goto done; |
3199 | } |
3200 | if (regions[r].mwlr_protections & VM_PROT_ZF) { |
3201 | printf("%s: [%d(%s)]: region %d, found VM_PROT_ZF not allowed\n" , |
3202 | __func__, proc_getpid(p), p->p_comm, r); |
3203 | kr = KERN_FAILURE; |
3204 | goto done; |
3205 | } |
3206 | if ((regions[r].mwlr_protections & VM_PROT_TPRO) && |
3207 | !(regions[r].mwlr_protections & VM_PROT_WRITE)) { |
3208 | printf("%s: [%d(%s)]: region %d, found VM_PROT_TPRO without VM_PROT_WRITE\n" , |
3209 | __func__, proc_getpid(p), p->p_comm, r); |
3210 | kr = KERN_FAILURE; |
3211 | goto done; |
3212 | } |
3213 | } |
3214 | |
3215 | |
3216 | /* get file structure from file descriptor */ |
3217 | error = fp_get_ftype(p, fd, ftype: DTYPE_VNODE, EINVAL, fpp: &fp); |
3218 | if (error) { |
3219 | printf("%s: [%d(%s)]: fp_get_ftype() failed, error %d\n" , |
3220 | __func__, proc_getpid(p), p->p_comm, error); |
3221 | kr = KERN_FAILURE; |
3222 | goto done; |
3223 | } |
3224 | |
3225 | /* We need at least read permission on the file */ |
3226 | if (!(fp->fp_glob->fg_flag & FREAD)) { |
3227 | printf("%s: [%d(%s)]: not readable\n" , |
3228 | __func__, proc_getpid(p), p->p_comm); |
3229 | kr = KERN_FAILURE; |
3230 | goto done; |
3231 | } |
3232 | |
3233 | /* Get the vnode from file structure */ |
3234 | vp = (struct vnode *)fp_get_data(fp); |
3235 | error = vnode_getwithref(vp); |
3236 | if (error) { |
3237 | printf("%s: [%d(%s)]: failed to get vnode, error %d\n" , |
3238 | __func__, proc_getpid(p), p->p_comm, error); |
3239 | kr = KERN_FAILURE; |
3240 | vp = NULL; /* just to be sure */ |
3241 | goto done; |
3242 | } |
3243 | |
3244 | /* Make sure the vnode is a regular file */ |
3245 | if (vp->v_type != VREG) { |
3246 | printf("%s: [%d(%s)]: vnode not VREG\n" , |
3247 | __func__, proc_getpid(p), p->p_comm); |
3248 | kr = KERN_FAILURE; |
3249 | goto done; |
3250 | } |
3251 | |
3252 | /* get vnode size */ |
3253 | error = vnode_size(vp, &fs, vfs_context_current()); |
3254 | if (error) { |
3255 | goto done; |
3256 | } |
3257 | file_size = fs; |
3258 | |
3259 | /* get the file's memory object handle */ |
3260 | file_control = ubc_getobject(vp, UBC_HOLDOBJECT); |
3261 | if (file_control == MEMORY_OBJECT_CONTROL_NULL) { |
3262 | printf("%s: [%d(%s)]: no memory object\n" , |
3263 | __func__, proc_getpid(p), p->p_comm); |
3264 | kr = KERN_FAILURE; |
3265 | goto done; |
3266 | } |
3267 | |
3268 | for (r = 0; r < region_count; ++r) { |
3269 | rp = ®ions[r]; |
3270 | |
3271 | #if CONFIG_MACF |
3272 | vm_prot_t prot = (rp->mwlr_protections & VM_PROT_ALL); |
3273 | error = mac_file_check_mmap(cred: vfs_context_ucred(ctx: vfs_context_current()), |
3274 | fg: fp->fp_glob, prot, MAP_FILE | MAP_PRIVATE | MAP_FIXED, file_pos: rp->mwlr_file_offset, maxprot: &prot); |
3275 | if (error) { |
3276 | printf("%s: [%d(%s)]: mac_file_check_mmap() failed, region %d, error %d\n" , |
3277 | __func__, proc_getpid(p), p->p_comm, r, error); |
3278 | kr = KERN_FAILURE; |
3279 | goto done; |
3280 | } |
3281 | #endif /* MAC */ |
3282 | |
3283 | /* check that the mappings are properly covered by code signatures */ |
3284 | if (cs_system_enforcement()) { |
3285 | if (!ubc_cs_is_range_codesigned(vp, rp->mwlr_file_offset, rp->mwlr_size)) { |
3286 | printf("%s: [%d(%s)]: region %d, not code signed\n" , |
3287 | __func__, proc_getpid(p), p->p_comm, r); |
3288 | kr = KERN_FAILURE; |
3289 | goto done; |
3290 | } |
3291 | } |
3292 | } |
3293 | |
3294 | /* update the vnode's access time */ |
3295 | if (!(vnode_vfsvisflags(vp) & MNT_NOATIME)) { |
3296 | VATTR_INIT(&va); |
3297 | nanotime(ts: &va.va_access_time); |
3298 | VATTR_SET_ACTIVE(&va, va_access_time); |
3299 | vnode_setattr(vp, vap: &va, ctx: vfs_context_current()); |
3300 | } |
3301 | |
3302 | /* get the VM to do the work */ |
3303 | kr = vm_map_with_linking(task: proc_task(p), regions, region_cnt: region_count, link_info, link_info_size, file_control); |
3304 | |
3305 | done: |
3306 | if (fp != NULL) { |
3307 | /* release the file descriptor */ |
3308 | fp_drop(p, fd, fp, locked: 0); |
3309 | } |
3310 | if (vp != NULL) { |
3311 | (void)vnode_put(vp); |
3312 | } |
3313 | if (regions != NULL) { |
3314 | kfree_data(regions, region_count * sizeof(regions[0])); |
3315 | } |
3316 | /* link info is used in the pager if things worked */ |
3317 | if (link_info != NULL && kr != KERN_SUCCESS) { |
3318 | kfree_data(link_info, link_info_size); |
3319 | } |
3320 | |
3321 | switch (kr) { |
3322 | case KERN_SUCCESS: |
3323 | return 0; |
3324 | case KERN_RESOURCE_SHORTAGE: |
3325 | return ENOMEM; |
3326 | default: |
3327 | return EINVAL; |
3328 | } |
3329 | } |
3330 | |
3331 | #if DEBUG || DEVELOPMENT |
3332 | SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count, |
3333 | CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count, 0, "" ); |
3334 | SYSCTL_INT(_vm, OID_AUTO, dyld_pager_count_max, |
3335 | CTLFLAG_RD | CTLFLAG_LOCKED, &dyld_pager_count_max, 0, "" ); |
3336 | #endif /* DEBUG || DEVELOPMENT */ |
3337 | |
3338 | /* sysctl overflow room */ |
3339 | |
3340 | SYSCTL_INT(_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, |
3341 | (int *) &page_size, 0, "vm page size" ); |
3342 | |
3343 | /* vm_page_free_target is provided as a makeshift solution for applications that want to |
3344 | * allocate buffer space, possibly purgeable memory, but not cause inactive pages to be |
3345 | * reclaimed. It allows the app to calculate how much memory is free outside the free target. */ |
3346 | extern unsigned int vm_page_free_target; |
3347 | SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED, |
3348 | &vm_page_free_target, 0, "Pageout daemon free target" ); |
3349 | |
3350 | SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED, |
3351 | &vm_pageout_state.vm_memory_pressure, 0, "Memory pressure indicator" ); |
3352 | |
3353 | static int |
3354 | vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS |
3355 | { |
3356 | #pragma unused(oidp, arg1, arg2) |
3357 | unsigned int page_free_wanted; |
3358 | |
3359 | page_free_wanted = mach_vm_ctl_page_free_wanted(); |
3360 | return SYSCTL_OUT(req, &page_free_wanted, sizeof(page_free_wanted)); |
3361 | } |
3362 | SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted, |
3363 | CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, |
3364 | 0, 0, vm_ctl_page_free_wanted, "I" , "" ); |
3365 | |
3366 | extern unsigned int vm_page_purgeable_count; |
3367 | SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED, |
3368 | &vm_page_purgeable_count, 0, "Purgeable page count" ); |
3369 | |
3370 | extern unsigned int vm_page_purgeable_wired_count; |
3371 | SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED, |
3372 | &vm_page_purgeable_wired_count, 0, "Wired purgeable page count" ); |
3373 | |
3374 | extern unsigned int vm_page_kern_lpage_count; |
3375 | SYSCTL_INT(_vm, OID_AUTO, kern_lpage_count, CTLFLAG_RD | CTLFLAG_LOCKED, |
3376 | &vm_page_kern_lpage_count, 0, "kernel used large pages" ); |
3377 | |
3378 | #if DEVELOPMENT || DEBUG |
3379 | #if __ARM_MIXED_PAGE_SIZE__ |
3380 | static int vm_mixed_pagesize_supported = 1; |
3381 | #else |
3382 | static int vm_mixed_pagesize_supported = 0; |
3383 | #endif /*__ARM_MIXED_PAGE_SIZE__ */ |
3384 | SYSCTL_INT(_debug, OID_AUTO, vm_mixed_pagesize_supported, CTLFLAG_ANYBODY | CTLFLAG_RD | CTLFLAG_LOCKED, |
3385 | &vm_mixed_pagesize_supported, 0, "kernel support for mixed pagesize" ); |
3386 | |
3387 | SCALABLE_COUNTER_DECLARE(vm_page_grab_count); |
3388 | SYSCTL_SCALABLE_COUNTER(_vm, pages_grabbed, vm_page_grab_count, "Total pages grabbed" ); |
3389 | SYSCTL_ULONG(_vm, OID_AUTO, pages_freed, CTLFLAG_RD | CTLFLAG_LOCKED, |
3390 | &vm_pageout_vminfo.vm_page_pages_freed, "Total pages freed" ); |
3391 | |
3392 | SYSCTL_INT(_vm, OID_AUTO, pageout_purged_objects, CTLFLAG_RD | CTLFLAG_LOCKED, |
3393 | &vm_pageout_debug.vm_pageout_purged_objects, 0, "System purged object count" ); |
3394 | SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, |
3395 | &vm_pageout_debug.vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)" ); |
3396 | SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, |
3397 | &vm_pageout_debug.vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)" ); |
3398 | |
3399 | SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, |
3400 | &vm_pageout_debug.vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated" ); |
3401 | SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, |
3402 | &vm_pageout_debug.vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated" ); |
3403 | SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, |
3404 | &vm_pageout_debug.vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated" ); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */ |
3405 | SYSCTL_ULONG(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, |
3406 | &vm_pageout_vminfo.vm_pageout_freed_cleaned, "Cleaned pages freed" ); |
3407 | SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, |
3408 | &vm_pageout_debug.vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated" ); |
3409 | SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, |
3410 | &vm_pageout_debug.vm_pageout_enqueued_cleaned, 0, "" ); /* sum of next two */ |
3411 | #endif /* DEVELOPMENT || DEBUG */ |
3412 | |
3413 | extern int madvise_free_debug; |
3414 | SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED, |
3415 | &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)" ); |
3416 | extern int madvise_free_debug_sometimes; |
3417 | SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug_sometimes, CTLFLAG_RW | CTLFLAG_LOCKED, |
3418 | &madvise_free_debug_sometimes, 0, "sometimes zero-fill on madvise(MADV_FREE*)" ); |
3419 | |
3420 | SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED, |
3421 | &vm_page_stats_reusable.reusable_count, 0, "Reusable page count" ); |
3422 | SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED, |
3423 | &vm_page_stats_reusable.reusable_pages_success, "" ); |
3424 | SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED, |
3425 | &vm_page_stats_reusable.reusable_pages_failure, "" ); |
3426 | SYSCTL_QUAD(_vm, OID_AUTO, reusable_pages_shared, CTLFLAG_RD | CTLFLAG_LOCKED, |
3427 | &vm_page_stats_reusable.reusable_pages_shared, "" ); |
3428 | SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED, |
3429 | &vm_page_stats_reusable.all_reusable_calls, "" ); |
3430 | SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED, |
3431 | &vm_page_stats_reusable.partial_reusable_calls, "" ); |
3432 | SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED, |
3433 | &vm_page_stats_reusable.reuse_pages_success, "" ); |
3434 | SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED, |
3435 | &vm_page_stats_reusable.reuse_pages_failure, "" ); |
3436 | SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED, |
3437 | &vm_page_stats_reusable.all_reuse_calls, "" ); |
3438 | SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED, |
3439 | &vm_page_stats_reusable.partial_reuse_calls, "" ); |
3440 | SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED, |
3441 | &vm_page_stats_reusable.can_reuse_success, "" ); |
3442 | SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED, |
3443 | &vm_page_stats_reusable.can_reuse_failure, "" ); |
3444 | SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED, |
3445 | &vm_page_stats_reusable.reusable_reclaimed, "" ); |
3446 | SYSCTL_QUAD(_vm, OID_AUTO, reusable_nonwritable, CTLFLAG_RD | CTLFLAG_LOCKED, |
3447 | &vm_page_stats_reusable.reusable_nonwritable, "" ); |
3448 | SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED, |
3449 | &vm_page_stats_reusable.reusable_shared, "" ); |
3450 | SYSCTL_QUAD(_vm, OID_AUTO, free_shared, CTLFLAG_RD | CTLFLAG_LOCKED, |
3451 | &vm_page_stats_reusable.free_shared, "" ); |
3452 | |
3453 | |
3454 | extern unsigned int vm_page_free_count, vm_page_speculative_count; |
3455 | SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, "" ); |
3456 | SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, "" ); |
3457 | |
3458 | extern unsigned int vm_page_cleaned_count; |
3459 | SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size" ); |
3460 | |
3461 | extern unsigned int vm_page_pageable_internal_count, vm_page_pageable_external_count; |
3462 | SYSCTL_UINT(_vm, OID_AUTO, page_pageable_internal_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_internal_count, 0, "" ); |
3463 | SYSCTL_UINT(_vm, OID_AUTO, page_pageable_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pageable_external_count, 0, "" ); |
3464 | |
3465 | /* pageout counts */ |
3466 | SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_clean, 0, "" ); |
3467 | SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_state.vm_pageout_inactive_used, 0, "" ); |
3468 | |
3469 | SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_internal, "" ); |
3470 | SYSCTL_ULONG(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_inactive_dirty_external, "" ); |
3471 | SYSCTL_ULONG(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "" ); |
3472 | SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_external, "" ); |
3473 | SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_speculative, "" ); |
3474 | SYSCTL_ULONG(_vm, OID_AUTO, pageout_freed_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_freed_cleaned, "" ); |
3475 | |
3476 | SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_sharedcache, "" ); |
3477 | SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_sharedcache, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_sharedcache, "" ); |
3478 | SYSCTL_ULONG(_vm, OID_AUTO, pageout_protected_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_protected_realtime, "" ); |
3479 | SYSCTL_ULONG(_vm, OID_AUTO, pageout_forcereclaimed_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_vminfo.vm_pageout_forcereclaimed_realtime, "" ); |
3480 | extern unsigned int vm_page_realtime_count; |
3481 | SYSCTL_UINT(_vm, OID_AUTO, page_realtime_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_realtime_count, 0, "" ); |
3482 | extern int vm_pageout_protect_realtime; |
3483 | SYSCTL_INT(_vm, OID_AUTO, pageout_protect_realtime, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_protect_realtime, 0, "" ); |
3484 | |
3485 | /* counts of pages prefaulted when entering a memory object */ |
3486 | extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout; |
3487 | SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, "" ); |
3488 | SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, "" ); |
3489 | |
3490 | #if defined (__x86_64__) |
3491 | extern unsigned int vm_clump_promote_threshold; |
3492 | SYSCTL_UINT(_vm, OID_AUTO, vm_clump_promote_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_clump_promote_threshold, 0, "clump size threshold for promotes" ); |
3493 | #if DEVELOPMENT || DEBUG |
3494 | extern unsigned long vm_clump_stats[]; |
3495 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[1], "free page allocations from clump of 1 page" ); |
3496 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[2], "free page allocations from clump of 2 pages" ); |
3497 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[3], "free page allocations from clump of 3 pages" ); |
3498 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats4, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[4], "free page allocations from clump of 4 pages" ); |
3499 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats5, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[5], "free page allocations from clump of 5 pages" ); |
3500 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats6, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[6], "free page allocations from clump of 6 pages" ); |
3501 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats7, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[7], "free page allocations from clump of 7 pages" ); |
3502 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats8, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[8], "free page allocations from clump of 8 pages" ); |
3503 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats9, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[9], "free page allocations from clump of 9 pages" ); |
3504 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats10, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[10], "free page allocations from clump of 10 pages" ); |
3505 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats11, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[11], "free page allocations from clump of 11 pages" ); |
3506 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats12, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[12], "free page allocations from clump of 12 pages" ); |
3507 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats13, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[13], "free page allocations from clump of 13 pages" ); |
3508 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats14, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[14], "free page allocations from clump of 14 pages" ); |
3509 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats15, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[15], "free page allocations from clump of 15 pages" ); |
3510 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_stats16, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_stats[16], "free page allocations from clump of 16 pages" ); |
3511 | extern unsigned long vm_clump_allocs, vm_clump_inserts, vm_clump_inrange, vm_clump_promotes; |
3512 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_alloc, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_allocs, "free page allocations" ); |
3513 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inserts, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inserts, "free page insertions" ); |
3514 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_inrange, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_inrange, "free page insertions that are part of vm_pages" ); |
3515 | SYSCTL_LONG(_vm, OID_AUTO, vm_clump_promotes, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_clump_promotes, "pages promoted to head" ); |
3516 | #endif /* if DEVELOPMENT || DEBUG */ |
3517 | #endif /* #if defined (__x86_64__) */ |
3518 | |
3519 | #if CONFIG_SECLUDED_MEMORY |
3520 | |
3521 | SYSCTL_UINT(_vm, OID_AUTO, num_tasks_can_use_secluded_mem, CTLFLAG_RD | CTLFLAG_LOCKED, &num_tasks_can_use_secluded_mem, 0, "" ); |
3522 | extern unsigned int vm_page_secluded_target; |
3523 | extern unsigned int vm_page_secluded_count; |
3524 | extern unsigned int vm_page_secluded_count_free; |
3525 | extern unsigned int vm_page_secluded_count_inuse; |
3526 | extern unsigned int vm_page_secluded_count_over_target; |
3527 | SYSCTL_UINT(_vm, OID_AUTO, page_secluded_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_target, 0, "" ); |
3528 | SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count, 0, "" ); |
3529 | SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_free, 0, "" ); |
3530 | SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_inuse, 0, "" ); |
3531 | SYSCTL_UINT(_vm, OID_AUTO, page_secluded_count_over_target, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded_count_over_target, 0, "" ); |
3532 | |
3533 | extern struct vm_page_secluded_data vm_page_secluded; |
3534 | SYSCTL_UINT(_vm, OID_AUTO, page_secluded_eligible, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.eligible_for_secluded, 0, "" ); |
3535 | SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_free, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_free, 0, "" ); |
3536 | SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_success_other, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_success_other, 0, "" ); |
3537 | SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_locked, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_locked, 0, "" ); |
3538 | SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_state, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_state, 0, "" ); |
3539 | SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_realtime, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_realtime, 0, "" ); |
3540 | SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_failure_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_failure_dirty, 0, "" ); |
3541 | SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit, 0, "" ); |
3542 | SYSCTL_UINT(_vm, OID_AUTO, page_secluded_grab_for_iokit_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_secluded.grab_for_iokit_success, 0, "" ); |
3543 | |
3544 | #endif /* CONFIG_SECLUDED_MEMORY */ |
3545 | |
3546 | #pragma mark Deferred Reclaim |
3547 | |
3548 | #if CONFIG_DEFERRED_RECLAIM |
3549 | |
3550 | #if DEVELOPMENT || DEBUG |
3551 | /* |
3552 | * VM reclaim testing |
3553 | */ |
3554 | extern bool vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid_t pid); |
3555 | |
3556 | static int |
3557 | sysctl_vm_reclaim_drain_async_queue SYSCTL_HANDLER_ARGS |
3558 | { |
3559 | #pragma unused(arg1, arg2) |
3560 | int error = EINVAL, pid = 0; |
3561 | /* |
3562 | * Only send on write |
3563 | */ |
3564 | error = sysctl_handle_int(oidp, &pid, 0, req); |
3565 | if (error || !req->newptr) { |
3566 | return error; |
3567 | } |
3568 | |
3569 | bool success = vm_deferred_reclamation_block_until_pid_has_been_reclaimed(pid); |
3570 | if (success) { |
3571 | error = 0; |
3572 | } |
3573 | |
3574 | return error; |
3575 | } |
3576 | |
3577 | SYSCTL_PROC(_vm, OID_AUTO, reclaim_drain_async_queue, |
3578 | CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, |
3579 | &sysctl_vm_reclaim_drain_async_queue, "I" , "" ); |
3580 | |
3581 | |
3582 | extern uint64_t vm_reclaim_max_threshold; |
3583 | extern uint64_t vm_reclaim_trim_divisor; |
3584 | |
3585 | SYSCTL_ULONG(_vm, OID_AUTO, reclaim_max_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_max_threshold, "" ); |
3586 | SYSCTL_ULONG(_vm, OID_AUTO, reclaim_trim_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_reclaim_trim_divisor, "" ); |
3587 | #endif /* DEVELOPMENT || DEBUG */ |
3588 | |
3589 | #endif /* CONFIG_DEFERRED_RECLAIM */ |
3590 | |
3591 | #include <kern/thread.h> |
3592 | #include <sys/user.h> |
3593 | |
3594 | void vm_pageout_io_throttle(void); |
3595 | |
3596 | void |
3597 | vm_pageout_io_throttle(void) |
3598 | { |
3599 | struct uthread *uthread = current_uthread(); |
3600 | |
3601 | /* |
3602 | * thread is marked as a low priority I/O type |
3603 | * and the I/O we issued while in this cleaning operation |
3604 | * collided with normal I/O operations... we'll |
3605 | * delay in order to mitigate the impact of this |
3606 | * task on the normal operation of the system |
3607 | */ |
3608 | |
3609 | if (uthread->uu_lowpri_window) { |
3610 | throttle_lowpri_io(sleep_amount: 1); |
3611 | } |
3612 | } |
3613 | |
3614 | int |
3615 | vm_pressure_monitor( |
3616 | __unused struct proc *p, |
3617 | struct vm_pressure_monitor_args *uap, |
3618 | int *retval) |
3619 | { |
3620 | kern_return_t kr; |
3621 | uint32_t pages_reclaimed; |
3622 | uint32_t pages_wanted; |
3623 | |
3624 | kr = mach_vm_pressure_monitor( |
3625 | wait_for_pressure: (boolean_t) uap->wait_for_pressure, |
3626 | nsecs_monitored: uap->nsecs_monitored, |
3627 | pages_reclaimed_p: (uap->pages_reclaimed) ? &pages_reclaimed : NULL, |
3628 | pages_wanted_p: &pages_wanted); |
3629 | |
3630 | switch (kr) { |
3631 | case KERN_SUCCESS: |
3632 | break; |
3633 | case KERN_ABORTED: |
3634 | return EINTR; |
3635 | default: |
3636 | return EINVAL; |
3637 | } |
3638 | |
3639 | if (uap->pages_reclaimed) { |
3640 | if (copyout((void *)&pages_reclaimed, |
3641 | uap->pages_reclaimed, |
3642 | sizeof(pages_reclaimed)) != 0) { |
3643 | return EFAULT; |
3644 | } |
3645 | } |
3646 | |
3647 | *retval = (int) pages_wanted; |
3648 | return 0; |
3649 | } |
3650 | |
3651 | int |
3652 | kas_info(struct proc *p, |
3653 | struct kas_info_args *uap, |
3654 | int *retval __unused) |
3655 | { |
3656 | #ifndef CONFIG_KAS_INFO |
3657 | (void)p; |
3658 | (void)uap; |
3659 | return ENOTSUP; |
3660 | #else /* CONFIG_KAS_INFO */ |
3661 | int selector = uap->selector; |
3662 | user_addr_t valuep = uap->value; |
3663 | user_addr_t sizep = uap->size; |
3664 | user_size_t size, rsize; |
3665 | int error; |
3666 | |
3667 | if (!kauth_cred_issuser(cred: kauth_cred_get())) { |
3668 | return EPERM; |
3669 | } |
3670 | |
3671 | #if CONFIG_MACF |
3672 | error = mac_system_check_kas_info(cred: kauth_cred_get(), selector); |
3673 | if (error) { |
3674 | return error; |
3675 | } |
3676 | #endif |
3677 | |
3678 | if (IS_64BIT_PROCESS(p)) { |
3679 | user64_size_t size64; |
3680 | error = copyin(sizep, &size64, sizeof(size64)); |
3681 | size = (user_size_t)size64; |
3682 | } else { |
3683 | user32_size_t size32; |
3684 | error = copyin(sizep, &size32, sizeof(size32)); |
3685 | size = (user_size_t)size32; |
3686 | } |
3687 | if (error) { |
3688 | return error; |
3689 | } |
3690 | |
3691 | switch (selector) { |
3692 | case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR: |
3693 | { |
3694 | uint64_t slide = vm_kernel_slide; |
3695 | |
3696 | if (sizeof(slide) != size) { |
3697 | return EINVAL; |
3698 | } |
3699 | |
3700 | error = copyout(&slide, valuep, sizeof(slide)); |
3701 | if (error) { |
3702 | return error; |
3703 | } |
3704 | rsize = size; |
3705 | } |
3706 | break; |
3707 | case KAS_INFO_KERNEL_SEGMENT_VMADDR_SELECTOR: |
3708 | { |
3709 | uint32_t i; |
3710 | kernel_mach_header_t *mh = &_mh_execute_header; |
3711 | struct load_command *cmd; |
3712 | cmd = (struct load_command*) &mh[1]; |
3713 | uint64_t *bases; |
3714 | rsize = mh->ncmds * sizeof(uint64_t); |
3715 | |
3716 | /* |
3717 | * Return the size if no data was passed |
3718 | */ |
3719 | if (valuep == 0) { |
3720 | break; |
3721 | } |
3722 | |
3723 | if (rsize > size) { |
3724 | return EINVAL; |
3725 | } |
3726 | |
3727 | bases = kalloc_data(rsize, Z_WAITOK | Z_ZERO); |
3728 | |
3729 | for (i = 0; i < mh->ncmds; i++) { |
3730 | if (cmd->cmd == LC_SEGMENT_KERNEL) { |
3731 | __IGNORE_WCASTALIGN(kernel_segment_command_t * sg = (kernel_segment_command_t *) cmd); |
3732 | bases[i] = (uint64_t)sg->vmaddr; |
3733 | } |
3734 | cmd = (struct load_command *) ((uintptr_t) cmd + cmd->cmdsize); |
3735 | } |
3736 | |
3737 | error = copyout(bases, valuep, rsize); |
3738 | |
3739 | kfree_data(bases, rsize); |
3740 | |
3741 | if (error) { |
3742 | return error; |
3743 | } |
3744 | } |
3745 | break; |
3746 | case KAS_INFO_SPTM_TEXT_SLIDE_SELECTOR: |
3747 | case KAS_INFO_TXM_TEXT_SLIDE_SELECTOR: |
3748 | { |
3749 | #if CONFIG_SPTM |
3750 | const uint64_t slide = |
3751 | (selector == KAS_INFO_SPTM_TEXT_SLIDE_SELECTOR) ? vm_sptm_offsets.slide : vm_txm_offsets.slide; |
3752 | #else |
3753 | const uint64_t slide = 0; |
3754 | #endif |
3755 | |
3756 | if (sizeof(slide) != size) { |
3757 | return EINVAL; |
3758 | } |
3759 | |
3760 | error = copyout(&slide, valuep, sizeof(slide)); |
3761 | if (error) { |
3762 | return error; |
3763 | } |
3764 | rsize = size; |
3765 | } |
3766 | break; |
3767 | default: |
3768 | return EINVAL; |
3769 | } |
3770 | |
3771 | if (IS_64BIT_PROCESS(p)) { |
3772 | user64_size_t size64 = (user64_size_t)rsize; |
3773 | error = copyout(&size64, sizep, sizeof(size64)); |
3774 | } else { |
3775 | user32_size_t size32 = (user32_size_t)rsize; |
3776 | error = copyout(&size32, sizep, sizeof(size32)); |
3777 | } |
3778 | |
3779 | return error; |
3780 | #endif /* CONFIG_KAS_INFO */ |
3781 | } |
3782 | |
3783 | #if __has_feature(ptrauth_calls) |
3784 | /* |
3785 | * Generate a random pointer signing key that isn't 0. |
3786 | */ |
3787 | uint64_t |
3788 | generate_jop_key(void) |
3789 | { |
3790 | uint64_t key; |
3791 | |
3792 | do { |
3793 | read_random(&key, sizeof key); |
3794 | } while (key == 0); |
3795 | return key; |
3796 | } |
3797 | #endif /* __has_feature(ptrauth_calls) */ |
3798 | |
3799 | |
3800 | #pragma clang diagnostic push |
3801 | #pragma clang diagnostic ignored "-Wcast-qual" |
3802 | #pragma clang diagnostic ignored "-Wunused-function" |
3803 | |
3804 | static void |
3805 | asserts() |
3806 | { |
3807 | static_assert(sizeof(vm_min_kernel_address) == sizeof(unsigned long)); |
3808 | static_assert(sizeof(vm_max_kernel_address) == sizeof(unsigned long)); |
3809 | } |
3810 | |
3811 | SYSCTL_ULONG(_vm, OID_AUTO, vm_min_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_min_kernel_address, "" ); |
3812 | SYSCTL_ULONG(_vm, OID_AUTO, vm_max_kernel_address, CTLFLAG_RD, (unsigned long *) &vm_max_kernel_address, "" ); |
3813 | #pragma clang diagnostic pop |
3814 | |
3815 | extern uint32_t vm_page_pages; |
3816 | SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, "" ); |
3817 | |
3818 | extern uint32_t vm_page_busy_absent_skipped; |
3819 | SYSCTL_UINT(_vm, OID_AUTO, page_busy_absent_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_busy_absent_skipped, 0, "" ); |
3820 | |
3821 | extern uint32_t vm_page_upl_tainted; |
3822 | SYSCTL_UINT(_vm, OID_AUTO, upl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_upl_tainted, 0, "" ); |
3823 | |
3824 | extern uint32_t vm_page_iopl_tainted; |
3825 | SYSCTL_UINT(_vm, OID_AUTO, iopl_pages_tainted, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_iopl_tainted, 0, "" ); |
3826 | |
3827 | #if __arm64__ && (DEVELOPMENT || DEBUG) |
3828 | extern int vm_footprint_suspend_allowed; |
3829 | SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_footprint_suspend_allowed, 0, "" ); |
3830 | |
3831 | extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend); |
3832 | static int |
3833 | sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS |
3834 | { |
3835 | #pragma unused(oidp, arg1, arg2) |
3836 | int error = 0; |
3837 | int new_value; |
3838 | |
3839 | if (req->newptr == USER_ADDR_NULL) { |
3840 | return 0; |
3841 | } |
3842 | error = SYSCTL_IN(req, &new_value, sizeof(int)); |
3843 | if (error) { |
3844 | return error; |
3845 | } |
3846 | if (!vm_footprint_suspend_allowed) { |
3847 | if (new_value != 0) { |
3848 | /* suspends are not allowed... */ |
3849 | return 0; |
3850 | } |
3851 | /* ... but let resumes proceed */ |
3852 | } |
3853 | DTRACE_VM2(footprint_suspend, |
3854 | vm_map_t, current_map(), |
3855 | int, new_value); |
3856 | |
3857 | pmap_footprint_suspend(current_map(), new_value); |
3858 | |
3859 | return 0; |
3860 | } |
3861 | SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend, |
3862 | CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY | CTLFLAG_LOCKED | CTLFLAG_MASKED, |
3863 | 0, 0, &sysctl_vm_footprint_suspend, "I" , "" ); |
3864 | #endif /* __arm64__ && (DEVELOPMENT || DEBUG) */ |
3865 | |
3866 | extern uint64_t ; |
3867 | extern uint64_t ; |
3868 | extern uint64_t ; |
3869 | extern uint64_t ; |
3870 | extern uint64_t ; |
3871 | SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_count, |
3872 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_count, "" ); |
3873 | SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_avg, |
3874 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_avg, "" ); |
3875 | SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_size_max, |
3876 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_size_max, "" ); |
3877 | SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_full, |
3878 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_full, "" ); |
3879 | SYSCTL_QUAD(_vm, OID_AUTO, corpse_footprint_no_buf, |
3880 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_corpse_footprint_no_buf, "" ); |
3881 | |
3882 | #if CODE_SIGNING_MONITOR |
3883 | extern uint64_t vm_cs_defer_to_csm; |
3884 | extern uint64_t vm_cs_defer_to_csm_not; |
3885 | SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm, |
3886 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm, "" ); |
3887 | SYSCTL_QUAD(_vm, OID_AUTO, cs_defer_to_csm_not, |
3888 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_cs_defer_to_csm_not, "" ); |
3889 | #endif /* CODE_SIGNING_MONITOR */ |
3890 | |
3891 | extern uint64_t ; |
3892 | extern uint64_t ; |
3893 | extern uint64_t ; |
3894 | extern uint64_t ; |
3895 | SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_copied, |
3896 | CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_copied, "" ); |
3897 | SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid, |
3898 | CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid, "" ); |
3899 | SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_slid_error, |
3900 | CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_slid_error, "" ); |
3901 | SYSCTL_QUAD(_vm, OID_AUTO, shared_region_pager_reclaimed, |
3902 | CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_reclaimed, "" ); |
3903 | extern int shared_region_destroy_delay; |
3904 | SYSCTL_INT(_vm, OID_AUTO, shared_region_destroy_delay, |
3905 | CTLFLAG_RW | CTLFLAG_LOCKED, &shared_region_destroy_delay, 0, "" ); |
3906 | |
3907 | #if MACH_ASSERT |
3908 | extern int pmap_ledgers_panic_leeway; |
3909 | SYSCTL_INT(_vm, OID_AUTO, pmap_ledgers_panic_leeway, CTLFLAG_RW | CTLFLAG_LOCKED, &pmap_ledgers_panic_leeway, 0, "" ); |
3910 | #endif /* MACH_ASSERT */ |
3911 | |
3912 | |
3913 | extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_count; |
3914 | extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_size; |
3915 | extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_max; |
3916 | extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_restart; |
3917 | extern uint64_t vm_map_lookup_and_lock_object_copy_slowly_error; |
3918 | extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_count; |
3919 | extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_size; |
3920 | extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_max; |
3921 | extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_restart; |
3922 | extern uint64_t vm_map_lookup_and_lock_object_copy_strategically_error; |
3923 | extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_count; |
3924 | extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_size; |
3925 | extern uint64_t vm_map_lookup_and_lock_object_copy_shadow_max; |
3926 | SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_count, |
3927 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_count, "" ); |
3928 | SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_size, |
3929 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_size, "" ); |
3930 | SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_max, |
3931 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_max, "" ); |
3932 | SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_restart, |
3933 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_restart, "" ); |
3934 | SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_slowly_error, |
3935 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_slowly_error, "" ); |
3936 | SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_count, |
3937 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_count, "" ); |
3938 | SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_size, |
3939 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_size, "" ); |
3940 | SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_max, |
3941 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_max, "" ); |
3942 | SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_restart, |
3943 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_restart, "" ); |
3944 | SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_strategically_error, |
3945 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_strategically_error, "" ); |
3946 | SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_count, |
3947 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_count, "" ); |
3948 | SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_size, |
3949 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_size, "" ); |
3950 | SYSCTL_QUAD(_vm, OID_AUTO, map_lookup_locked_copy_shadow_max, |
3951 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_lookup_and_lock_object_copy_shadow_max, "" ); |
3952 | |
3953 | extern int vm_protect_privileged_from_untrusted; |
3954 | SYSCTL_INT(_vm, OID_AUTO, protect_privileged_from_untrusted, |
3955 | CTLFLAG_RW | CTLFLAG_LOCKED, &vm_protect_privileged_from_untrusted, 0, "" ); |
3956 | extern uint64_t vm_copied_on_read; |
3957 | SYSCTL_QUAD(_vm, OID_AUTO, copied_on_read, |
3958 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_copied_on_read, "" ); |
3959 | |
3960 | extern int vm_shared_region_count; |
3961 | extern int vm_shared_region_peak; |
3962 | SYSCTL_INT(_vm, OID_AUTO, shared_region_count, |
3963 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_count, 0, "" ); |
3964 | SYSCTL_INT(_vm, OID_AUTO, shared_region_peak, |
3965 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_peak, 0, "" ); |
3966 | #if DEVELOPMENT || DEBUG |
3967 | extern unsigned int shared_region_pagers_resident_count; |
3968 | SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_count, |
3969 | CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_count, 0, "" ); |
3970 | extern unsigned int shared_region_pagers_resident_peak; |
3971 | SYSCTL_INT(_vm, OID_AUTO, shared_region_pagers_resident_peak, |
3972 | CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pagers_resident_peak, 0, "" ); |
3973 | extern int shared_region_pager_count; |
3974 | SYSCTL_INT(_vm, OID_AUTO, shared_region_pager_count, |
3975 | CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_pager_count, 0, "" ); |
3976 | #if __has_feature(ptrauth_calls) |
3977 | extern int shared_region_key_count; |
3978 | SYSCTL_INT(_vm, OID_AUTO, shared_region_key_count, |
3979 | CTLFLAG_RD | CTLFLAG_LOCKED, &shared_region_key_count, 0, "" ); |
3980 | extern int vm_shared_region_reslide_count; |
3981 | SYSCTL_INT(_vm, OID_AUTO, shared_region_reslide_count, |
3982 | CTLFLAG_RD | CTLFLAG_LOCKED, &vm_shared_region_reslide_count, 0, "" ); |
3983 | #endif /* __has_feature(ptrauth_calls) */ |
3984 | #endif /* DEVELOPMENT || DEBUG */ |
3985 | |
3986 | #if MACH_ASSERT |
3987 | extern int debug4k_filter; |
3988 | SYSCTL_INT(_vm, OID_AUTO, debug4k_filter, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_filter, 0, "" ); |
3989 | extern int debug4k_panic_on_terminate; |
3990 | SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_terminate, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_terminate, 0, "" ); |
3991 | extern int debug4k_panic_on_exception; |
3992 | SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_exception, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_exception, 0, "" ); |
3993 | extern int debug4k_panic_on_misaligned_sharing; |
3994 | SYSCTL_INT(_vm, OID_AUTO, debug4k_panic_on_misaligned_sharing, CTLFLAG_RW | CTLFLAG_LOCKED, &debug4k_panic_on_misaligned_sharing, 0, "" ); |
3995 | #endif /* MACH_ASSERT */ |
3996 | |
3997 | extern uint64_t vm_map_set_size_limit_count; |
3998 | extern uint64_t vm_map_set_data_limit_count; |
3999 | extern uint64_t vm_map_enter_RLIMIT_AS_count; |
4000 | extern uint64_t vm_map_enter_RLIMIT_DATA_count; |
4001 | SYSCTL_QUAD(_vm, OID_AUTO, map_set_size_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_size_limit_count, "" ); |
4002 | SYSCTL_QUAD(_vm, OID_AUTO, map_set_data_limit_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_set_data_limit_count, "" ); |
4003 | SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_AS_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_AS_count, "" ); |
4004 | SYSCTL_QUAD(_vm, OID_AUTO, map_enter_RLIMIT_DATA_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_enter_RLIMIT_DATA_count, "" ); |
4005 | |
4006 | extern uint64_t vm_fault_resilient_media_initiate; |
4007 | extern uint64_t vm_fault_resilient_media_retry; |
4008 | extern uint64_t vm_fault_resilient_media_proceed; |
4009 | extern uint64_t vm_fault_resilient_media_release; |
4010 | extern uint64_t vm_fault_resilient_media_abort1; |
4011 | extern uint64_t vm_fault_resilient_media_abort2; |
4012 | SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_initiate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_initiate, "" ); |
4013 | SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_retry, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_retry, "" ); |
4014 | SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_proceed, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_proceed, "" ); |
4015 | SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_release, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_release, "" ); |
4016 | SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort1, "" ); |
4017 | SYSCTL_QUAD(_vm, OID_AUTO, fault_resilient_media_abort2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_abort2, "" ); |
4018 | #if MACH_ASSERT |
4019 | extern int vm_fault_resilient_media_inject_error1_rate; |
4020 | extern int vm_fault_resilient_media_inject_error1; |
4021 | extern int vm_fault_resilient_media_inject_error2_rate; |
4022 | extern int vm_fault_resilient_media_inject_error2; |
4023 | extern int vm_fault_resilient_media_inject_error3_rate; |
4024 | extern int vm_fault_resilient_media_inject_error3; |
4025 | SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1_rate, 0, "" ); |
4026 | SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error1, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error1, 0, "" ); |
4027 | SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2_rate, 0, "" ); |
4028 | SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error2, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error2, 0, "" ); |
4029 | SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3_rate, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3_rate, 0, "" ); |
4030 | SYSCTL_INT(_vm, OID_AUTO, fault_resilient_media_inject_error3, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_fault_resilient_media_inject_error3, 0, "" ); |
4031 | #endif /* MACH_ASSERT */ |
4032 | |
4033 | extern uint64_t pmap_query_page_info_retries; |
4034 | SYSCTL_QUAD(_vm, OID_AUTO, pmap_query_page_info_retries, CTLFLAG_RD | CTLFLAG_LOCKED, &pmap_query_page_info_retries, "" ); |
4035 | |
4036 | /* |
4037 | * A sysctl which causes all existing shared regions to become stale. They |
4038 | * will no longer be used by anything new and will be torn down as soon as |
4039 | * the last existing user exits. A write of non-zero value causes that to happen. |
4040 | * This should only be used by launchd, so we check that this is initproc. |
4041 | */ |
4042 | static int |
4043 | shared_region_pivot(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) |
4044 | { |
4045 | unsigned int value = 0; |
4046 | int changed = 0; |
4047 | int error = sysctl_io_number(req, bigValue: 0, valueSize: sizeof(value), pValue: &value, changed: &changed); |
4048 | if (error || !changed) { |
4049 | return error; |
4050 | } |
4051 | if (current_proc() != initproc) { |
4052 | return EPERM; |
4053 | } |
4054 | |
4055 | vm_shared_region_pivot(); |
4056 | |
4057 | return 0; |
4058 | } |
4059 | |
4060 | SYSCTL_PROC(_vm, OID_AUTO, shared_region_pivot, |
4061 | CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED, |
4062 | 0, 0, shared_region_pivot, "I" , "" ); |
4063 | |
4064 | extern uint64_t vm_object_shadow_forced; |
4065 | extern uint64_t vm_object_shadow_skipped; |
4066 | SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_forced, CTLFLAG_RD | CTLFLAG_LOCKED, |
4067 | &vm_object_shadow_forced, "" ); |
4068 | SYSCTL_QUAD(_vm, OID_AUTO, object_shadow_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, |
4069 | &vm_object_shadow_skipped, "" ); |
4070 | |
4071 | SYSCTL_INT(_vm, OID_AUTO, vmtc_total, CTLFLAG_RD | CTLFLAG_LOCKED, |
4072 | &vmtc_total, 0, "total text page corruptions detected" ); |
4073 | |
4074 | |
4075 | #if DEBUG || DEVELOPMENT |
4076 | /* |
4077 | * A sysctl that can be used to corrupt a text page with an illegal instruction. |
4078 | * Used for testing text page self healing. |
4079 | */ |
4080 | extern kern_return_t vm_corrupt_text_addr(uintptr_t); |
4081 | static int |
4082 | corrupt_text_addr(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) |
4083 | { |
4084 | uint64_t value = 0; |
4085 | int error = sysctl_handle_quad(oidp, &value, 0, req); |
4086 | if (error || !req->newptr) { |
4087 | return error; |
4088 | } |
4089 | |
4090 | if (vm_corrupt_text_addr((uintptr_t)value) == KERN_SUCCESS) { |
4091 | return 0; |
4092 | } else { |
4093 | return EINVAL; |
4094 | } |
4095 | } |
4096 | |
4097 | SYSCTL_PROC(_vm, OID_AUTO, corrupt_text_addr, |
4098 | CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, |
4099 | 0, 0, corrupt_text_addr, "-" , "" ); |
4100 | #endif /* DEBUG || DEVELOPMENT */ |
4101 | |
4102 | #if CONFIG_MAP_RANGES |
4103 | /* |
4104 | * vm.malloc_ranges |
4105 | * |
4106 | * space-separated list of <left:right> hexadecimal addresses. |
4107 | */ |
4108 | static int |
4109 | vm_map_malloc_ranges SYSCTL_HANDLER_ARGS |
4110 | { |
4111 | vm_map_t map = current_map(); |
4112 | struct mach_vm_range r1, r2; |
4113 | char str[20 * 4]; |
4114 | int len; |
4115 | |
4116 | if (vm_map_get_user_range(map, UMEM_RANGE_ID_DEFAULT, &r1)) { |
4117 | return ENOENT; |
4118 | } |
4119 | if (vm_map_get_user_range(map, UMEM_RANGE_ID_HEAP, &r2)) { |
4120 | return ENOENT; |
4121 | } |
4122 | |
4123 | len = scnprintf(str, sizeof(str), "0x%llx:0x%llx 0x%llx:0x%llx" , |
4124 | r1.max_address, r2.min_address, |
4125 | r2.max_address, get_map_max(map)); |
4126 | |
4127 | return SYSCTL_OUT(req, str, len); |
4128 | } |
4129 | |
4130 | SYSCTL_PROC(_vm, OID_AUTO, malloc_ranges, |
4131 | CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED, |
4132 | 0, 0, &vm_map_malloc_ranges, "A" , "" ); |
4133 | |
4134 | #if DEBUG || DEVELOPMENT |
4135 | static int |
4136 | vm_map_user_range_default SYSCTL_HANDLER_ARGS |
4137 | { |
4138 | #pragma unused(arg1, arg2, oidp) |
4139 | struct mach_vm_range range; |
4140 | |
4141 | if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_DEFAULT, &range) |
4142 | != KERN_SUCCESS) { |
4143 | return EINVAL; |
4144 | } |
4145 | |
4146 | return SYSCTL_OUT(req, &range, sizeof(range)); |
4147 | } |
4148 | |
4149 | static int |
4150 | vm_map_user_range_heap SYSCTL_HANDLER_ARGS |
4151 | { |
4152 | #pragma unused(arg1, arg2, oidp) |
4153 | struct mach_vm_range range; |
4154 | |
4155 | if (vm_map_get_user_range(current_map(), UMEM_RANGE_ID_HEAP, &range) |
4156 | != KERN_SUCCESS) { |
4157 | return EINVAL; |
4158 | } |
4159 | |
4160 | return SYSCTL_OUT(req, &range, sizeof(range)); |
4161 | } |
4162 | |
4163 | /* |
4164 | * A sysctl that can be used to return ranges for the current VM map. |
4165 | * Used for testing VM ranges. |
4166 | */ |
4167 | SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_default, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, |
4168 | 0, 0, &vm_map_user_range_default, "S,mach_vm_range" , "" ); |
4169 | SYSCTL_PROC(_vm, OID_AUTO, vm_map_user_range_heap, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, |
4170 | 0, 0, &vm_map_user_range_heap, "S,mach_vm_range" , "" ); |
4171 | |
4172 | #endif /* DEBUG || DEVELOPMENT */ |
4173 | #endif /* CONFIG_MAP_RANGES */ |
4174 | |
4175 | #if DEBUG || DEVELOPMENT |
4176 | #endif /* DEBUG || DEVELOPMENT */ |
4177 | |
4178 | extern uint64_t vm_map_range_overflows_count; |
4179 | SYSCTL_QUAD(_vm, OID_AUTO, map_range_overflows_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_range_overflows_count, "" ); |
4180 | extern boolean_t vm_map_range_overflows_log; |
4181 | SYSCTL_INT(_vm, OID_AUTO, map_range_oveflows_log, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_map_range_overflows_log, 0, "" ); |
4182 | |
4183 | extern uint64_t c_seg_filled_no_contention; |
4184 | extern uint64_t c_seg_filled_contention; |
4185 | extern clock_sec_t c_seg_filled_contention_sec_max; |
4186 | extern clock_nsec_t c_seg_filled_contention_nsec_max; |
4187 | SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_no_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_no_contention, "" ); |
4188 | SYSCTL_QUAD(_vm, OID_AUTO, c_seg_filled_contention, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention, "" ); |
4189 | SYSCTL_ULONG(_vm, OID_AUTO, c_seg_filled_contention_sec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_sec_max, "" ); |
4190 | SYSCTL_UINT(_vm, OID_AUTO, c_seg_filled_contention_nsec_max, CTLFLAG_RD | CTLFLAG_LOCKED, &c_seg_filled_contention_nsec_max, 0, "" ); |
4191 | #if (XNU_TARGET_OS_OSX && __arm64__) |
4192 | extern clock_nsec_t c_process_major_report_over_ms; /* report if over ? ms */ |
4193 | extern int c_process_major_yield_after; /* yield after moving ? segments */ |
4194 | extern uint64_t c_process_major_reports; |
4195 | extern clock_sec_t c_process_major_max_sec; |
4196 | extern clock_nsec_t c_process_major_max_nsec; |
4197 | extern uint32_t c_process_major_peak_segcount; |
4198 | SYSCTL_UINT(_vm, OID_AUTO, c_process_major_report_over_ms, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_report_over_ms, 0, "" ); |
4199 | SYSCTL_INT(_vm, OID_AUTO, c_process_major_yield_after, CTLFLAG_RW | CTLFLAG_LOCKED, &c_process_major_yield_after, 0, "" ); |
4200 | SYSCTL_QUAD(_vm, OID_AUTO, c_process_major_reports, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_reports, "" ); |
4201 | SYSCTL_ULONG(_vm, OID_AUTO, c_process_major_max_sec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_sec, "" ); |
4202 | SYSCTL_UINT(_vm, OID_AUTO, c_process_major_max_nsec, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_max_nsec, 0, "" ); |
4203 | SYSCTL_UINT(_vm, OID_AUTO, c_process_major_peak_segcount, CTLFLAG_RD | CTLFLAG_LOCKED, &c_process_major_peak_segcount, 0, "" ); |
4204 | #endif /* (XNU_TARGET_OS_OSX && __arm64__) */ |
4205 | |
4206 | #if DEVELOPMENT || DEBUG |
4207 | extern int panic_object_not_alive; |
4208 | SYSCTL_INT(_vm, OID_AUTO, panic_object_not_alive, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &panic_object_not_alive, 0, "" ); |
4209 | #endif /* DEVELOPMENT || DEBUG */ |
4210 | |
4211 | #if MACH_ASSERT |
4212 | extern int fbdp_no_panic; |
4213 | SYSCTL_INT(_vm, OID_AUTO, fbdp_no_panic, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, &fbdp_no_panic, 0, "" ); |
4214 | #endif /* MACH_ASSERT */ |
4215 | |
4216 | |