1 | /* |
2 | * Copyright (c) 2007-2020 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. Please obtain a copy of the License at |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this |
11 | * file. |
12 | * |
13 | * The Original Code and all software distributed under the License are |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
18 | * Please see the License for the specific language governing rights and |
19 | * limitations under the License. |
20 | * |
21 | * @APPLE_LICENSE_HEADER_END@ |
22 | */ |
23 | |
24 | /* |
25 | * Shared region (... and comm page) |
26 | * |
27 | * This file handles the VM shared region and comm page. |
28 | * |
29 | */ |
30 | /* |
31 | * SHARED REGIONS |
32 | * -------------- |
33 | * |
34 | * A shared region is a submap that contains the most common system shared |
35 | * libraries for a given environment which is defined by: |
36 | * - cpu-type |
37 | * - 64-bitness |
38 | * - root directory |
39 | * - Team ID - when we have pointer authentication. |
40 | * |
41 | * The point of a shared region is to reduce the setup overhead when exec'ing |
42 | * a new process. A shared region uses a shared VM submap that gets mapped |
43 | * automatically at exec() time, see vm_map_exec(). The first process of a given |
44 | * environment sets up the shared region and all further processes in that |
45 | * environment can re-use that shared region without having to re-create |
46 | * the same mappings in their VM map. All they need is contained in the shared |
47 | * region. |
48 | * |
49 | * The region can also share a pmap (mostly for read-only parts but also for the |
50 | * initial version of some writable parts), which gets "nested" into the |
51 | * process's pmap. This reduces the number of soft faults: once one process |
52 | * brings in a page in the shared region, all the other processes can access |
53 | * it without having to enter it in their own pmap. |
54 | * |
55 | * When a process is being exec'ed, vm_map_exec() calls vm_shared_region_enter() |
56 | * to map the appropriate shared region in the process's address space. |
57 | * We look up the appropriate shared region for the process's environment. |
58 | * If we can't find one, we create a new (empty) one and add it to the list. |
59 | * Otherwise, we just take an extra reference on the shared region we found. |
60 | * |
61 | * The "dyld" runtime, mapped into the process's address space at exec() time, |
62 | * will then use the shared_region_check_np() and shared_region_map_and_slide_2_np() |
63 | * system calls to validate and/or populate the shared region with the |
64 | * appropriate dyld_shared_cache file. |
65 | * |
66 | * The shared region is inherited on fork() and the child simply takes an |
67 | * extra reference on its parent's shared region. |
68 | * |
69 | * When the task terminates, we release the reference on its shared region. |
70 | * When the last reference is released, we destroy the shared region. |
71 | * |
72 | * After a chroot(), the calling process keeps using its original shared region, |
73 | * since that's what was mapped when it was started. But its children |
74 | * will use a different shared region, because they need to use the shared |
75 | * cache that's relative to the new root directory. |
76 | */ |
77 | |
78 | /* |
79 | * COMM PAGE |
80 | * |
81 | * A "comm page" is an area of memory that is populated by the kernel with |
82 | * the appropriate platform-specific version of some commonly used code. |
83 | * There is one "comm page" per platform (cpu-type, 64-bitness) but only |
84 | * for the native cpu-type. No need to overly optimize translated code |
85 | * for hardware that is not really there ! |
86 | * |
87 | * The comm pages are created and populated at boot time. |
88 | * |
89 | * The appropriate comm page is mapped into a process's address space |
90 | * at exec() time, in vm_map_exec(). It is then inherited on fork(). |
91 | * |
92 | * The comm page is shared between the kernel and all applications of |
93 | * a given platform. Only the kernel can modify it. |
94 | * |
95 | * Applications just branch to fixed addresses in the comm page and find |
96 | * the right version of the code for the platform. There is also some |
97 | * data provided and updated by the kernel for processes to retrieve easily |
98 | * without having to do a system call. |
99 | */ |
100 | |
101 | #include <debug.h> |
102 | |
103 | #include <kern/ipc_tt.h> |
104 | #include <kern/kalloc.h> |
105 | #include <kern/thread_call.h> |
106 | |
107 | #include <mach/mach_vm.h> |
108 | #include <mach/machine.h> |
109 | |
110 | #include <vm/vm_map.h> |
111 | #include <vm/vm_map_internal.h> |
112 | #include <vm/vm_shared_region.h> |
113 | |
114 | #include <vm/vm_protos.h> |
115 | |
116 | #include <machine/commpage.h> |
117 | #include <machine/cpu_capabilities.h> |
118 | #include <sys/random.h> |
119 | #include <sys/errno.h> |
120 | |
121 | #if defined(__arm64__) |
122 | #include <arm/cpu_data_internal.h> |
123 | #include <arm/misc_protos.h> |
124 | #endif |
125 | |
126 | /* |
127 | * the following codes are used in the subclass |
128 | * of the DBG_MACH_SHAREDREGION class |
129 | */ |
130 | #define PROCESS_SHARED_CACHE_LAYOUT 0x00 |
131 | |
132 | #if __has_feature(ptrauth_calls) |
133 | #include <ptrauth.h> |
134 | #endif /* __has_feature(ptrauth_calls) */ |
135 | |
136 | /* "dyld" uses this to figure out what the kernel supports */ |
137 | int shared_region_version = 3; |
138 | |
139 | /* trace level, output is sent to the system log file */ |
140 | int shared_region_trace_level = SHARED_REGION_TRACE_ERROR_LVL; |
141 | |
142 | /* should local (non-chroot) shared regions persist when no task uses them ? */ |
143 | int shared_region_persistence = 0; /* no by default */ |
144 | |
145 | |
146 | /* delay in seconds before reclaiming an unused shared region */ |
147 | TUNABLE_WRITEABLE(int, shared_region_destroy_delay, "vm_shared_region_destroy_delay" , 120); |
148 | |
149 | /* |
150 | * Cached pointer to the most recently mapped shared region from PID 1, which should |
151 | * be the most commonly mapped shared region in the system. There are many processes |
152 | * which do not use this, for a variety of reasons. |
153 | * |
154 | * The main consumer of this is stackshot. |
155 | */ |
156 | struct vm_shared_region *primary_system_shared_region = NULL; |
157 | |
158 | #if XNU_TARGET_OS_OSX |
159 | /* |
160 | * Only one cache gets to slide on Desktop, since we can't |
161 | * tear down slide info properly today and the desktop actually |
162 | * produces lots of shared caches. |
163 | */ |
164 | boolean_t shared_region_completed_slide = FALSE; |
165 | #endif /* XNU_TARGET_OS_OSX */ |
166 | |
167 | /* this lock protects all the shared region data structures */ |
168 | static LCK_GRP_DECLARE(vm_shared_region_lck_grp, "vm shared region" ); |
169 | static LCK_MTX_DECLARE(vm_shared_region_lock, &vm_shared_region_lck_grp); |
170 | |
171 | #define vm_shared_region_lock() lck_mtx_lock(&vm_shared_region_lock) |
172 | #define vm_shared_region_unlock() lck_mtx_unlock(&vm_shared_region_lock) |
173 | #define vm_shared_region_sleep(event, interruptible) \ |
174 | lck_mtx_sleep_with_inheritor(&vm_shared_region_lock, \ |
175 | LCK_SLEEP_DEFAULT, \ |
176 | (event_t) (event), \ |
177 | *(event), \ |
178 | (interruptible) | THREAD_WAIT_NOREPORT, \ |
179 | TIMEOUT_WAIT_FOREVER) |
180 | #define vm_shared_region_wakeup(event) \ |
181 | wakeup_all_with_inheritor((event), THREAD_AWAKENED) |
182 | |
183 | /* the list of currently available shared regions (one per environment) */ |
184 | queue_head_t vm_shared_region_queue = QUEUE_HEAD_INITIALIZER(vm_shared_region_queue); |
185 | int vm_shared_region_count = 0; |
186 | int vm_shared_region_peak = 0; |
187 | static uint32_t vm_shared_region_lastid = 0; /* for sr_id field */ |
188 | |
189 | /* |
190 | * the number of times an event has forced the recalculation of the reslide |
191 | * shared region slide. |
192 | */ |
193 | #if __has_feature(ptrauth_calls) |
194 | int vm_shared_region_reslide_count = 0; |
195 | #endif /* __has_feature(ptrauth_calls) */ |
196 | |
197 | static void vm_shared_region_reference_locked(vm_shared_region_t shared_region); |
198 | static vm_shared_region_t vm_shared_region_create( |
199 | void *root_dir, |
200 | cpu_type_t cputype, |
201 | cpu_subtype_t cpu_subtype, |
202 | boolean_t is_64bit, |
203 | int target_page_shift, |
204 | boolean_t reslide, |
205 | boolean_t is_driverkit, |
206 | uint32_t rsr_version); |
207 | static void vm_shared_region_destroy(vm_shared_region_t shared_region); |
208 | |
209 | static kern_return_t vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t entry, mach_vm_size_t size); |
210 | static void vm_shared_region_timeout(thread_call_param_t param0, |
211 | thread_call_param_t param1); |
212 | static kern_return_t vm_shared_region_slide_mapping( |
213 | vm_shared_region_t sr, |
214 | user_addr_t slide_info_addr, |
215 | mach_vm_size_t slide_info_size, |
216 | mach_vm_offset_t start, |
217 | mach_vm_size_t size, |
218 | mach_vm_offset_t slid_mapping, |
219 | uint32_t slide, |
220 | memory_object_control_t, |
221 | vm_prot_t prot); /* forward */ |
222 | |
223 | static int __commpage_setup = 0; |
224 | #if XNU_TARGET_OS_OSX |
225 | static int __system_power_source = 1; /* init to extrnal power source */ |
226 | static void post_sys_powersource_internal(int i, int internal); |
227 | #endif /* XNU_TARGET_OS_OSX */ |
228 | |
229 | extern u_int32_t random(void); |
230 | |
231 | /* |
232 | * Retrieve a task's shared region and grab an extra reference to |
233 | * make sure it doesn't disappear while the caller is using it. |
234 | * The caller is responsible for consuming that extra reference if |
235 | * necessary. |
236 | */ |
237 | vm_shared_region_t |
238 | vm_shared_region_get( |
239 | task_t task) |
240 | { |
241 | vm_shared_region_t shared_region; |
242 | |
243 | SHARED_REGION_TRACE_DEBUG( |
244 | ("shared_region: -> get(%p)\n" , |
245 | (void *)VM_KERNEL_ADDRPERM(task))); |
246 | |
247 | task_lock(task); |
248 | vm_shared_region_lock(); |
249 | shared_region = task->shared_region; |
250 | if (shared_region) { |
251 | assert(shared_region->sr_ref_count > 0); |
252 | vm_shared_region_reference_locked(shared_region); |
253 | } |
254 | vm_shared_region_unlock(); |
255 | task_unlock(task); |
256 | |
257 | SHARED_REGION_TRACE_DEBUG( |
258 | ("shared_region: get(%p) <- %p\n" , |
259 | (void *)VM_KERNEL_ADDRPERM(task), |
260 | (void *)VM_KERNEL_ADDRPERM(shared_region))); |
261 | |
262 | return shared_region; |
263 | } |
264 | |
265 | vm_map_t |
266 | vm_shared_region_vm_map( |
267 | vm_shared_region_t shared_region) |
268 | { |
269 | ipc_port_t sr_handle; |
270 | vm_named_entry_t sr_mem_entry; |
271 | vm_map_t sr_map; |
272 | |
273 | SHARED_REGION_TRACE_DEBUG( |
274 | ("shared_region: -> vm_map(%p)\n" , |
275 | (void *)VM_KERNEL_ADDRPERM(shared_region))); |
276 | assert(shared_region->sr_ref_count > 0); |
277 | |
278 | sr_handle = shared_region->sr_mem_entry; |
279 | sr_mem_entry = mach_memory_entry_from_port(port: sr_handle); |
280 | sr_map = sr_mem_entry->backing.map; |
281 | assert(sr_mem_entry->is_sub_map); |
282 | |
283 | SHARED_REGION_TRACE_DEBUG( |
284 | ("shared_region: vm_map(%p) <- %p\n" , |
285 | (void *)VM_KERNEL_ADDRPERM(shared_region), |
286 | (void *)VM_KERNEL_ADDRPERM(sr_map))); |
287 | return sr_map; |
288 | } |
289 | |
290 | /* |
291 | * Set the shared region the process should use. |
292 | * A NULL new shared region means that we just want to release the old |
293 | * shared region. |
294 | * The caller should already have an extra reference on the new shared region |
295 | * (if any). We release a reference on the old shared region (if any). |
296 | */ |
297 | void |
298 | vm_shared_region_set( |
299 | task_t task, |
300 | vm_shared_region_t new_shared_region) |
301 | { |
302 | vm_shared_region_t old_shared_region; |
303 | |
304 | SHARED_REGION_TRACE_DEBUG( |
305 | ("shared_region: -> set(%p, %p)\n" , |
306 | (void *)VM_KERNEL_ADDRPERM(task), |
307 | (void *)VM_KERNEL_ADDRPERM(new_shared_region))); |
308 | |
309 | task_lock(task); |
310 | vm_shared_region_lock(); |
311 | |
312 | old_shared_region = task->shared_region; |
313 | if (new_shared_region) { |
314 | assert(new_shared_region->sr_ref_count > 0); |
315 | } |
316 | |
317 | task->shared_region = new_shared_region; |
318 | |
319 | vm_shared_region_unlock(); |
320 | task_unlock(task); |
321 | |
322 | if (old_shared_region) { |
323 | assert(old_shared_region->sr_ref_count > 0); |
324 | vm_shared_region_deallocate(shared_region: old_shared_region); |
325 | } |
326 | |
327 | SHARED_REGION_TRACE_DEBUG( |
328 | ("shared_region: set(%p) <- old=%p new=%p\n" , |
329 | (void *)VM_KERNEL_ADDRPERM(task), |
330 | (void *)VM_KERNEL_ADDRPERM(old_shared_region), |
331 | (void *)VM_KERNEL_ADDRPERM(new_shared_region))); |
332 | } |
333 | |
334 | /* |
335 | * New arm64 shared regions match with an existing arm64e region. |
336 | * They just get a private non-authenticating pager. |
337 | */ |
338 | static inline bool |
339 | match_subtype(cpu_type_t cputype, cpu_subtype_t exist, cpu_subtype_t new) |
340 | { |
341 | if (exist == new) { |
342 | return true; |
343 | } |
344 | if (cputype == CPU_TYPE_ARM64 && |
345 | exist == CPU_SUBTYPE_ARM64E && |
346 | new == CPU_SUBTYPE_ARM64_ALL) { |
347 | return true; |
348 | } |
349 | return false; |
350 | } |
351 | |
352 | |
353 | /* |
354 | * Lookup up the shared region for the desired environment. |
355 | * If none is found, create a new (empty) one. |
356 | * Grab an extra reference on the returned shared region, to make sure |
357 | * it doesn't get destroyed before the caller is done with it. The caller |
358 | * is responsible for consuming that extra reference if necessary. |
359 | */ |
360 | vm_shared_region_t |
361 | vm_shared_region_lookup( |
362 | void *root_dir, |
363 | cpu_type_t cputype, |
364 | cpu_subtype_t cpu_subtype, |
365 | boolean_t is_64bit, |
366 | int target_page_shift, |
367 | boolean_t reslide, |
368 | boolean_t is_driverkit, |
369 | uint32_t rsr_version) |
370 | { |
371 | vm_shared_region_t shared_region; |
372 | vm_shared_region_t new_shared_region; |
373 | |
374 | SHARED_REGION_TRACE_DEBUG( |
375 | ("shared_region: -> lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n" , |
376 | (void *)VM_KERNEL_ADDRPERM(root_dir), |
377 | cputype, cpu_subtype, is_64bit, target_page_shift, |
378 | reslide, is_driverkit)); |
379 | |
380 | shared_region = NULL; |
381 | new_shared_region = NULL; |
382 | |
383 | vm_shared_region_lock(); |
384 | for (;;) { |
385 | queue_iterate(&vm_shared_region_queue, |
386 | shared_region, |
387 | vm_shared_region_t, |
388 | sr_q) { |
389 | assert(shared_region->sr_ref_count > 0); |
390 | if (shared_region->sr_cpu_type == cputype && |
391 | match_subtype(cputype, exist: shared_region->sr_cpu_subtype, new: cpu_subtype) && |
392 | shared_region->sr_root_dir == root_dir && |
393 | shared_region->sr_64bit == is_64bit && |
394 | #if __ARM_MIXED_PAGE_SIZE__ |
395 | shared_region->sr_page_shift == target_page_shift && |
396 | #endif /* __ARM_MIXED_PAGE_SIZE__ */ |
397 | #if __has_feature(ptrauth_calls) |
398 | shared_region->sr_reslide == reslide && |
399 | #endif /* __has_feature(ptrauth_calls) */ |
400 | shared_region->sr_driverkit == is_driverkit && |
401 | shared_region->sr_rsr_version == rsr_version && |
402 | !shared_region->sr_stale) { |
403 | /* found a match ! */ |
404 | vm_shared_region_reference_locked(shared_region); |
405 | goto done; |
406 | } |
407 | } |
408 | if (new_shared_region == NULL) { |
409 | /* no match: create a new one */ |
410 | vm_shared_region_unlock(); |
411 | new_shared_region = vm_shared_region_create(root_dir, |
412 | cputype, |
413 | cpu_subtype, |
414 | is_64bit, |
415 | target_page_shift, |
416 | reslide, |
417 | is_driverkit, |
418 | rsr_version); |
419 | /* do the lookup again, in case we lost a race */ |
420 | vm_shared_region_lock(); |
421 | continue; |
422 | } |
423 | /* still no match: use our new one */ |
424 | shared_region = new_shared_region; |
425 | new_shared_region = NULL; |
426 | uint32_t newid = ++vm_shared_region_lastid; |
427 | if (newid == 0) { |
428 | panic("shared_region: vm_shared_region_lastid wrapped" ); |
429 | } |
430 | shared_region->sr_id = newid; |
431 | shared_region->sr_install_time = mach_absolute_time(); |
432 | queue_enter(&vm_shared_region_queue, |
433 | shared_region, |
434 | vm_shared_region_t, |
435 | sr_q); |
436 | vm_shared_region_count++; |
437 | if (vm_shared_region_count > vm_shared_region_peak) { |
438 | vm_shared_region_peak = vm_shared_region_count; |
439 | } |
440 | break; |
441 | } |
442 | |
443 | done: |
444 | vm_shared_region_unlock(); |
445 | |
446 | if (new_shared_region) { |
447 | /* |
448 | * We lost a race with someone else to create a new shared |
449 | * region for that environment. Get rid of our unused one. |
450 | */ |
451 | assert(new_shared_region->sr_ref_count == 1); |
452 | new_shared_region->sr_ref_count--; |
453 | vm_shared_region_destroy(shared_region: new_shared_region); |
454 | new_shared_region = NULL; |
455 | } |
456 | |
457 | SHARED_REGION_TRACE_DEBUG( |
458 | ("shared_region: lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d) <- %p\n" , |
459 | (void *)VM_KERNEL_ADDRPERM(root_dir), |
460 | cputype, cpu_subtype, is_64bit, target_page_shift, |
461 | reslide, is_driverkit, |
462 | (void *)VM_KERNEL_ADDRPERM(shared_region))); |
463 | |
464 | assert(shared_region->sr_ref_count > 0); |
465 | return shared_region; |
466 | } |
467 | |
468 | /* |
469 | * Take an extra reference on a shared region. |
470 | * The vm_shared_region_lock should already be held by the caller. |
471 | */ |
472 | static void |
473 | vm_shared_region_reference_locked( |
474 | vm_shared_region_t shared_region) |
475 | { |
476 | LCK_MTX_ASSERT(&vm_shared_region_lock, LCK_MTX_ASSERT_OWNED); |
477 | |
478 | SHARED_REGION_TRACE_DEBUG( |
479 | ("shared_region: -> reference_locked(%p)\n" , |
480 | (void *)VM_KERNEL_ADDRPERM(shared_region))); |
481 | assert(shared_region->sr_ref_count > 0); |
482 | shared_region->sr_ref_count++; |
483 | assert(shared_region->sr_ref_count != 0); |
484 | |
485 | if (shared_region->sr_timer_call != NULL) { |
486 | boolean_t cancelled; |
487 | |
488 | /* cancel and free any pending timeout */ |
489 | cancelled = thread_call_cancel(call: shared_region->sr_timer_call); |
490 | if (cancelled) { |
491 | thread_call_free(call: shared_region->sr_timer_call); |
492 | shared_region->sr_timer_call = NULL; |
493 | /* release the reference held by the cancelled timer */ |
494 | shared_region->sr_ref_count--; |
495 | } else { |
496 | /* the timer will drop the reference and free itself */ |
497 | } |
498 | } |
499 | |
500 | SHARED_REGION_TRACE_DEBUG( |
501 | ("shared_region: reference_locked(%p) <- %d\n" , |
502 | (void *)VM_KERNEL_ADDRPERM(shared_region), |
503 | shared_region->sr_ref_count)); |
504 | } |
505 | |
506 | /* |
507 | * Take a reference on a shared region. |
508 | */ |
509 | void |
510 | vm_shared_region_reference(vm_shared_region_t shared_region) |
511 | { |
512 | SHARED_REGION_TRACE_DEBUG( |
513 | ("shared_region: -> reference(%p)\n" , |
514 | (void *)VM_KERNEL_ADDRPERM(shared_region))); |
515 | |
516 | vm_shared_region_lock(); |
517 | vm_shared_region_reference_locked(shared_region); |
518 | vm_shared_region_unlock(); |
519 | |
520 | SHARED_REGION_TRACE_DEBUG( |
521 | ("shared_region: reference(%p) <- %d\n" , |
522 | (void *)VM_KERNEL_ADDRPERM(shared_region), |
523 | shared_region->sr_ref_count)); |
524 | } |
525 | |
526 | /* |
527 | * Release a reference on the shared region. |
528 | * Destroy it if there are no references left. |
529 | */ |
530 | void |
531 | vm_shared_region_deallocate( |
532 | vm_shared_region_t shared_region) |
533 | { |
534 | SHARED_REGION_TRACE_DEBUG( |
535 | ("shared_region: -> deallocate(%p)\n" , |
536 | (void *)VM_KERNEL_ADDRPERM(shared_region))); |
537 | |
538 | vm_shared_region_lock(); |
539 | |
540 | assert(shared_region->sr_ref_count > 0); |
541 | |
542 | if (shared_region->sr_root_dir == NULL) { |
543 | /* |
544 | * Local (i.e. based on the boot volume) shared regions |
545 | * can persist or not based on the "shared_region_persistence" |
546 | * sysctl. |
547 | * Make sure that this one complies. |
548 | * |
549 | * See comments in vm_shared_region_slide() for notes about |
550 | * shared regions we have slid (which are not torn down currently). |
551 | */ |
552 | if (shared_region_persistence && |
553 | !shared_region->sr_persists) { |
554 | /* make this one persistent */ |
555 | shared_region->sr_ref_count++; |
556 | shared_region->sr_persists = TRUE; |
557 | } else if (!shared_region_persistence && |
558 | shared_region->sr_persists) { |
559 | /* make this one no longer persistent */ |
560 | assert(shared_region->sr_ref_count > 1); |
561 | shared_region->sr_ref_count--; |
562 | shared_region->sr_persists = FALSE; |
563 | } |
564 | } |
565 | |
566 | assert(shared_region->sr_ref_count > 0); |
567 | shared_region->sr_ref_count--; |
568 | SHARED_REGION_TRACE_DEBUG( |
569 | ("shared_region: deallocate(%p): ref now %d\n" , |
570 | (void *)VM_KERNEL_ADDRPERM(shared_region), |
571 | shared_region->sr_ref_count)); |
572 | |
573 | if (shared_region->sr_ref_count == 0) { |
574 | uint64_t deadline; |
575 | |
576 | /* |
577 | * Even though a shared region is unused, delay a while before |
578 | * tearing it down, in case a new app launch can use it. |
579 | * We don't keep around stale shared regions, nor older RSR ones. |
580 | */ |
581 | if (shared_region->sr_timer_call == NULL && |
582 | shared_region_destroy_delay != 0 && |
583 | !shared_region->sr_stale && |
584 | !(shared_region->sr_rsr_version != 0 && |
585 | shared_region->sr_rsr_version != rsr_get_version())) { |
586 | /* hold one reference for the timer */ |
587 | assert(!shared_region->sr_mapping_in_progress); |
588 | shared_region->sr_ref_count++; |
589 | |
590 | /* set up the timer */ |
591 | shared_region->sr_timer_call = thread_call_allocate( |
592 | func: (thread_call_func_t) vm_shared_region_timeout, |
593 | param0: (thread_call_param_t) shared_region); |
594 | |
595 | /* schedule the timer */ |
596 | clock_interval_to_deadline(interval: shared_region_destroy_delay, |
597 | NSEC_PER_SEC, |
598 | result: &deadline); |
599 | thread_call_enter_delayed(call: shared_region->sr_timer_call, |
600 | deadline); |
601 | |
602 | SHARED_REGION_TRACE_DEBUG( |
603 | ("shared_region: deallocate(%p): armed timer\n" , |
604 | (void *)VM_KERNEL_ADDRPERM(shared_region))); |
605 | |
606 | vm_shared_region_unlock(); |
607 | } else { |
608 | /* timer expired: let go of this shared region */ |
609 | |
610 | /* Make sure there's no cached pointer to the region. */ |
611 | if (primary_system_shared_region == shared_region) { |
612 | primary_system_shared_region = NULL; |
613 | } |
614 | |
615 | /* |
616 | * Remove it from the queue first, so no one can find |
617 | * it... |
618 | */ |
619 | queue_remove(&vm_shared_region_queue, |
620 | shared_region, |
621 | vm_shared_region_t, |
622 | sr_q); |
623 | vm_shared_region_count--; |
624 | vm_shared_region_unlock(); |
625 | |
626 | /* ... and destroy it */ |
627 | vm_shared_region_destroy(shared_region); |
628 | shared_region = NULL; |
629 | } |
630 | } else { |
631 | vm_shared_region_unlock(); |
632 | } |
633 | |
634 | SHARED_REGION_TRACE_DEBUG( |
635 | ("shared_region: deallocate(%p) <-\n" , |
636 | (void *)VM_KERNEL_ADDRPERM(shared_region))); |
637 | } |
638 | |
639 | void |
640 | vm_shared_region_timeout( |
641 | thread_call_param_t param0, |
642 | __unused thread_call_param_t param1) |
643 | { |
644 | vm_shared_region_t shared_region; |
645 | |
646 | shared_region = (vm_shared_region_t) param0; |
647 | |
648 | vm_shared_region_deallocate(shared_region); |
649 | } |
650 | |
651 | |
652 | /* |
653 | * Create a new (empty) shared region for a new environment. |
654 | */ |
655 | static vm_shared_region_t |
656 | vm_shared_region_create( |
657 | void *root_dir, |
658 | cpu_type_t cputype, |
659 | cpu_subtype_t cpu_subtype, |
660 | boolean_t is_64bit, |
661 | int target_page_shift, |
662 | #if !__has_feature(ptrauth_calls) |
663 | __unused |
664 | #endif /* __has_feature(ptrauth_calls) */ |
665 | boolean_t reslide, |
666 | boolean_t is_driverkit, |
667 | uint32_t rsr_version) |
668 | { |
669 | vm_named_entry_t mem_entry; |
670 | ipc_port_t mem_entry_port; |
671 | vm_shared_region_t shared_region; |
672 | vm_map_t sub_map; |
673 | mach_vm_offset_t base_address, pmap_nesting_start; |
674 | mach_vm_size_t size, pmap_nesting_size; |
675 | |
676 | SHARED_REGION_TRACE_INFO( |
677 | ("shared_region: -> create(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n" , |
678 | (void *)VM_KERNEL_ADDRPERM(root_dir), |
679 | cputype, cpu_subtype, is_64bit, target_page_shift, |
680 | reslide, is_driverkit)); |
681 | |
682 | base_address = 0; |
683 | size = 0; |
684 | mem_entry = NULL; |
685 | mem_entry_port = IPC_PORT_NULL; |
686 | sub_map = VM_MAP_NULL; |
687 | |
688 | /* create a new shared region structure... */ |
689 | shared_region = kalloc_type(struct vm_shared_region, |
690 | Z_WAITOK | Z_NOFAIL); |
691 | |
692 | /* figure out the correct settings for the desired environment */ |
693 | if (is_64bit) { |
694 | switch (cputype) { |
695 | #if defined(__arm64__) |
696 | case CPU_TYPE_ARM64: |
697 | base_address = SHARED_REGION_BASE_ARM64; |
698 | size = SHARED_REGION_SIZE_ARM64; |
699 | pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM64; |
700 | pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM64; |
701 | break; |
702 | #else |
703 | case CPU_TYPE_I386: |
704 | base_address = SHARED_REGION_BASE_X86_64; |
705 | size = SHARED_REGION_SIZE_X86_64; |
706 | pmap_nesting_start = SHARED_REGION_NESTING_BASE_X86_64; |
707 | pmap_nesting_size = SHARED_REGION_NESTING_SIZE_X86_64; |
708 | break; |
709 | case CPU_TYPE_POWERPC: |
710 | base_address = SHARED_REGION_BASE_PPC64; |
711 | size = SHARED_REGION_SIZE_PPC64; |
712 | pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC64; |
713 | pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC64; |
714 | break; |
715 | #endif |
716 | default: |
717 | SHARED_REGION_TRACE_ERROR( |
718 | ("shared_region: create: unknown cpu type %d\n" , |
719 | cputype)); |
720 | kfree_type(struct vm_shared_region, shared_region); |
721 | shared_region = NULL; |
722 | goto done; |
723 | } |
724 | } else { |
725 | switch (cputype) { |
726 | #if defined(__arm64__) |
727 | case CPU_TYPE_ARM: |
728 | base_address = SHARED_REGION_BASE_ARM; |
729 | size = SHARED_REGION_SIZE_ARM; |
730 | pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM; |
731 | pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM; |
732 | break; |
733 | #else |
734 | case CPU_TYPE_I386: |
735 | base_address = SHARED_REGION_BASE_I386; |
736 | size = SHARED_REGION_SIZE_I386; |
737 | pmap_nesting_start = SHARED_REGION_NESTING_BASE_I386; |
738 | pmap_nesting_size = SHARED_REGION_NESTING_SIZE_I386; |
739 | break; |
740 | case CPU_TYPE_POWERPC: |
741 | base_address = SHARED_REGION_BASE_PPC; |
742 | size = SHARED_REGION_SIZE_PPC; |
743 | pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC; |
744 | pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC; |
745 | break; |
746 | #endif |
747 | default: |
748 | SHARED_REGION_TRACE_ERROR( |
749 | ("shared_region: create: unknown cpu type %d\n" , |
750 | cputype)); |
751 | kfree_type(struct vm_shared_region, shared_region); |
752 | shared_region = NULL; |
753 | goto done; |
754 | } |
755 | } |
756 | |
757 | /* create a memory entry structure and a Mach port handle */ |
758 | mem_entry = mach_memory_entry_allocate(user_handle_p: &mem_entry_port); |
759 | |
760 | #if defined(__arm64__) |
761 | { |
762 | struct pmap *pmap_nested; |
763 | int pmap_flags = 0; |
764 | pmap_flags |= is_64bit ? PMAP_CREATE_64BIT : 0; |
765 | |
766 | |
767 | #if __ARM_MIXED_PAGE_SIZE__ |
768 | if (cputype == CPU_TYPE_ARM64 && |
769 | target_page_shift == FOURK_PAGE_SHIFT) { |
770 | /* arm64/4k address space */ |
771 | pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES; |
772 | } |
773 | #endif /* __ARM_MIXED_PAGE_SIZE__ */ |
774 | |
775 | pmap_nested = pmap_create_options(NULL, size: 0, flags: pmap_flags); |
776 | if (pmap_nested != PMAP_NULL) { |
777 | pmap_set_nested(pmap: pmap_nested); |
778 | sub_map = vm_map_create_options(pmap: pmap_nested, min_off: 0, |
779 | max_off: (vm_map_offset_t)size, options: VM_MAP_CREATE_PAGEABLE); |
780 | |
781 | if (is_64bit || |
782 | page_shift_user32 == SIXTEENK_PAGE_SHIFT) { |
783 | /* enforce 16KB alignment of VM map entries */ |
784 | vm_map_set_page_shift(map: sub_map, SIXTEENK_PAGE_SHIFT); |
785 | } |
786 | #if __ARM_MIXED_PAGE_SIZE__ |
787 | if (cputype == CPU_TYPE_ARM64 && |
788 | target_page_shift == FOURK_PAGE_SHIFT) { |
789 | /* arm64/4k address space */ |
790 | vm_map_set_page_shift(map: sub_map, FOURK_PAGE_SHIFT); |
791 | } |
792 | #endif /* __ARM_MIXED_PAGE_SIZE__ */ |
793 | } else { |
794 | sub_map = VM_MAP_NULL; |
795 | } |
796 | } |
797 | #else /* defined(__arm64__) */ |
798 | { |
799 | /* create a VM sub map and its pmap */ |
800 | pmap_t pmap = pmap_create_options(NULL, 0, is_64bit); |
801 | if (pmap != NULL) { |
802 | sub_map = vm_map_create_options(pmap, 0, |
803 | (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE); |
804 | } else { |
805 | sub_map = VM_MAP_NULL; |
806 | } |
807 | } |
808 | #endif /* defined(__arm64__) */ |
809 | if (sub_map == VM_MAP_NULL) { |
810 | ipc_port_release_send(port: mem_entry_port); |
811 | kfree_type(struct vm_shared_region, shared_region); |
812 | shared_region = NULL; |
813 | SHARED_REGION_TRACE_ERROR(("shared_region: create: couldn't allocate map\n" )); |
814 | goto done; |
815 | } |
816 | |
817 | /* shared regions should always enforce code-signing */ |
818 | vm_map_cs_enforcement_set(map: sub_map, true); |
819 | assert(vm_map_cs_enforcement(sub_map)); |
820 | assert(pmap_get_vm_map_cs_enforced(vm_map_pmap(sub_map))); |
821 | |
822 | assert(!sub_map->disable_vmentry_reuse); |
823 | sub_map->is_nested_map = TRUE; |
824 | |
825 | /* make the memory entry point to the VM sub map */ |
826 | mem_entry->is_sub_map = TRUE; |
827 | mem_entry->backing.map = sub_map; |
828 | mem_entry->size = size; |
829 | mem_entry->protection = VM_PROT_ALL; |
830 | |
831 | /* make the shared region point at the memory entry */ |
832 | shared_region->sr_mem_entry = mem_entry_port; |
833 | |
834 | /* fill in the shared region's environment and settings */ |
835 | shared_region->sr_base_address = base_address; |
836 | shared_region->sr_size = size; |
837 | shared_region->sr_pmap_nesting_start = pmap_nesting_start; |
838 | shared_region->sr_pmap_nesting_size = pmap_nesting_size; |
839 | shared_region->sr_cpu_type = cputype; |
840 | shared_region->sr_cpu_subtype = cpu_subtype; |
841 | shared_region->sr_64bit = (uint8_t)is_64bit; |
842 | #if __ARM_MIXED_PAGE_SIZE__ |
843 | shared_region->sr_page_shift = (uint8_t)target_page_shift; |
844 | #endif /* __ARM_MIXED_PAGE_SIZE__ */ |
845 | shared_region->sr_driverkit = (uint8_t)is_driverkit; |
846 | shared_region->sr_rsr_version = rsr_version; |
847 | shared_region->sr_root_dir = root_dir; |
848 | |
849 | queue_init(&shared_region->sr_q); |
850 | shared_region->sr_mapping_in_progress = THREAD_NULL; |
851 | shared_region->sr_slide_in_progress = THREAD_NULL; |
852 | shared_region->sr_persists = FALSE; |
853 | shared_region->sr_stale = FALSE; |
854 | shared_region->sr_timer_call = NULL; |
855 | shared_region->sr_first_mapping = (mach_vm_offset_t) -1; |
856 | |
857 | /* grab a reference for the caller */ |
858 | shared_region->sr_ref_count = 1; |
859 | |
860 | shared_region->sr_slide = 0; /* not slid yet */ |
861 | |
862 | /* Initialize UUID and other metadata */ |
863 | memset(s: &shared_region->sr_uuid, c: '\0', n: sizeof(shared_region->sr_uuid)); |
864 | shared_region->sr_uuid_copied = FALSE; |
865 | shared_region->sr_images_count = 0; |
866 | shared_region->sr_images = NULL; |
867 | #if __has_feature(ptrauth_calls) |
868 | shared_region->sr_reslide = reslide; |
869 | shared_region->sr_num_auth_section = 0; |
870 | shared_region->sr_next_auth_section = 0; |
871 | shared_region->sr_auth_section = NULL; |
872 | #endif /* __has_feature(ptrauth_calls) */ |
873 | |
874 | done: |
875 | if (shared_region) { |
876 | SHARED_REGION_TRACE_INFO( |
877 | ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d," |
878 | "base=0x%llx,size=0x%llx) <- " |
879 | "%p mem=(%p,%p) map=%p pmap=%p\n" , |
880 | (void *)VM_KERNEL_ADDRPERM(root_dir), |
881 | cputype, cpu_subtype, is_64bit, reslide, is_driverkit, |
882 | (long long)base_address, |
883 | (long long)size, |
884 | (void *)VM_KERNEL_ADDRPERM(shared_region), |
885 | (void *)VM_KERNEL_ADDRPERM(mem_entry_port), |
886 | (void *)VM_KERNEL_ADDRPERM(mem_entry), |
887 | (void *)VM_KERNEL_ADDRPERM(sub_map), |
888 | (void *)VM_KERNEL_ADDRPERM(sub_map->pmap))); |
889 | } else { |
890 | SHARED_REGION_TRACE_INFO( |
891 | ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d," |
892 | "base=0x%llx,size=0x%llx) <- NULL" , |
893 | (void *)VM_KERNEL_ADDRPERM(root_dir), |
894 | cputype, cpu_subtype, is_64bit, is_driverkit, |
895 | (long long)base_address, |
896 | (long long)size)); |
897 | } |
898 | return shared_region; |
899 | } |
900 | |
901 | /* |
902 | * Destroy a now-unused shared region. |
903 | * The shared region is no longer in the queue and can not be looked up. |
904 | */ |
905 | static void |
906 | vm_shared_region_destroy( |
907 | vm_shared_region_t shared_region) |
908 | { |
909 | vm_named_entry_t mem_entry; |
910 | vm_map_t map; |
911 | |
912 | SHARED_REGION_TRACE_INFO( |
913 | ("shared_region: -> destroy(%p) (root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n" , |
914 | (void *)VM_KERNEL_ADDRPERM(shared_region), |
915 | (void *)VM_KERNEL_ADDRPERM(shared_region->sr_root_dir), |
916 | shared_region->sr_cpu_type, |
917 | shared_region->sr_cpu_subtype, |
918 | shared_region->sr_64bit, |
919 | shared_region->sr_driverkit)); |
920 | |
921 | assert(shared_region->sr_ref_count == 0); |
922 | assert(!shared_region->sr_persists); |
923 | |
924 | mem_entry = mach_memory_entry_from_port(port: shared_region->sr_mem_entry); |
925 | assert(mem_entry->is_sub_map); |
926 | assert(!mem_entry->internal); |
927 | assert(!mem_entry->is_copy); |
928 | map = mem_entry->backing.map; |
929 | |
930 | /* |
931 | * Clean up the pmap first. The virtual addresses that were |
932 | * entered in this possibly "nested" pmap may have different values |
933 | * than the VM map's min and max offsets, if the VM sub map was |
934 | * mapped at a non-zero offset in the processes' main VM maps, which |
935 | * is usually the case, so the clean-up we do in vm_map_destroy() would |
936 | * not be enough. |
937 | */ |
938 | if (map->pmap) { |
939 | pmap_remove(map: map->pmap, |
940 | s: (vm_map_offset_t)shared_region->sr_base_address, |
941 | e: (vm_map_offset_t)(shared_region->sr_base_address + shared_region->sr_size)); |
942 | } |
943 | |
944 | /* |
945 | * Release our (one and only) handle on the memory entry. |
946 | * This will generate a no-senders notification, which will be processed |
947 | * by ipc_kobject_notify_no_senders(), which will release the one and only |
948 | * reference on the memory entry and cause it to be destroyed, along |
949 | * with the VM sub map and its pmap. |
950 | */ |
951 | mach_memory_entry_port_release(port: shared_region->sr_mem_entry); |
952 | mem_entry = NULL; |
953 | shared_region->sr_mem_entry = IPC_PORT_NULL; |
954 | |
955 | if (shared_region->sr_timer_call) { |
956 | thread_call_free(call: shared_region->sr_timer_call); |
957 | } |
958 | |
959 | #if __has_feature(ptrauth_calls) |
960 | /* |
961 | * Free the cached copies of slide_info for the AUTH regions. |
962 | */ |
963 | for (uint_t i = 0; i < shared_region->sr_num_auth_section; ++i) { |
964 | vm_shared_region_slide_info_t si = shared_region->sr_auth_section[i]; |
965 | if (si != NULL) { |
966 | vm_object_deallocate(si->si_slide_object); |
967 | kfree_data(si->si_slide_info_entry, |
968 | si->si_slide_info_size); |
969 | kfree_type(struct vm_shared_region_slide_info, si); |
970 | shared_region->sr_auth_section[i] = NULL; |
971 | } |
972 | } |
973 | if (shared_region->sr_auth_section != NULL) { |
974 | assert(shared_region->sr_num_auth_section > 0); |
975 | kfree_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section, shared_region->sr_auth_section); |
976 | shared_region->sr_auth_section = NULL; |
977 | shared_region->sr_num_auth_section = 0; |
978 | } |
979 | #endif /* __has_feature(ptrauth_calls) */ |
980 | |
981 | /* release the shared region structure... */ |
982 | kfree_type(struct vm_shared_region, shared_region); |
983 | |
984 | SHARED_REGION_TRACE_DEBUG( |
985 | ("shared_region: destroy(%p) <-\n" , |
986 | (void *)VM_KERNEL_ADDRPERM(shared_region))); |
987 | shared_region = NULL; |
988 | } |
989 | |
990 | /* |
991 | * Gets the address of the first (in time) mapping in the shared region. |
992 | * If used during initial task setup by dyld, task should non-NULL. |
993 | */ |
994 | kern_return_t |
995 | vm_shared_region_start_address( |
996 | vm_shared_region_t shared_region, |
997 | mach_vm_offset_t *start_address, |
998 | task_t task) |
999 | { |
1000 | kern_return_t kr; |
1001 | mach_vm_offset_t sr_base_address; |
1002 | mach_vm_offset_t sr_first_mapping; |
1003 | |
1004 | SHARED_REGION_TRACE_DEBUG( |
1005 | ("shared_region: -> start_address(%p)\n" , |
1006 | (void *)VM_KERNEL_ADDRPERM(shared_region))); |
1007 | |
1008 | vm_shared_region_lock(); |
1009 | |
1010 | /* |
1011 | * Wait if there's another thread establishing a mapping |
1012 | * in this shared region right when we're looking at it. |
1013 | * We want a consistent view of the map... |
1014 | */ |
1015 | while (shared_region->sr_mapping_in_progress) { |
1016 | /* wait for our turn... */ |
1017 | vm_shared_region_sleep(&shared_region->sr_mapping_in_progress, |
1018 | THREAD_UNINT); |
1019 | } |
1020 | assert(!shared_region->sr_mapping_in_progress); |
1021 | assert(shared_region->sr_ref_count > 0); |
1022 | |
1023 | sr_base_address = shared_region->sr_base_address; |
1024 | sr_first_mapping = shared_region->sr_first_mapping; |
1025 | |
1026 | if (sr_first_mapping == (mach_vm_offset_t) -1) { |
1027 | /* shared region is empty */ |
1028 | kr = KERN_INVALID_ADDRESS; |
1029 | } else { |
1030 | kr = KERN_SUCCESS; |
1031 | *start_address = sr_base_address + sr_first_mapping; |
1032 | } |
1033 | |
1034 | |
1035 | uint32_t slide = shared_region->sr_slide; |
1036 | |
1037 | vm_shared_region_unlock(); |
1038 | |
1039 | /* |
1040 | * Cache shared region info in the task for telemetry gathering, if we're |
1041 | * passed in the task. No task lock here as we're still in intial task set up. |
1042 | */ |
1043 | if (kr == KERN_SUCCESS && task != NULL && task->task_shared_region_slide == -1) { |
1044 | uint_t = offsetof(struct _dyld_cache_header, uuid); |
1045 | if (copyin((user_addr_t)(*start_address + sc_header_uuid_offset), |
1046 | (char *)&task->task_shared_region_uuid, |
1047 | sizeof(task->task_shared_region_uuid)) == 0) { |
1048 | task->task_shared_region_slide = slide; |
1049 | } |
1050 | } |
1051 | |
1052 | SHARED_REGION_TRACE_DEBUG( |
1053 | ("shared_region: start_address(%p) <- 0x%llx\n" , |
1054 | (void *)VM_KERNEL_ADDRPERM(shared_region), |
1055 | (long long)shared_region->sr_base_address)); |
1056 | |
1057 | return kr; |
1058 | } |
1059 | |
1060 | /* |
1061 | * Look up a pre-existing mapping in shared region, for replacement. |
1062 | * Takes an extra object reference if found. |
1063 | */ |
1064 | static kern_return_t |
1065 | find_mapping_to_slide(vm_map_t map, vm_map_address_t addr, vm_map_entry_t entry) |
1066 | { |
1067 | vm_map_entry_t found; |
1068 | |
1069 | /* find the shared region's map entry to slide */ |
1070 | vm_map_lock_read(map); |
1071 | if (!vm_map_lookup_entry_allow_pgz(map, address: addr, entry: &found)) { |
1072 | /* no mapping there */ |
1073 | vm_map_unlock(map); |
1074 | return KERN_INVALID_ARGUMENT; |
1075 | } |
1076 | |
1077 | *entry = *found; |
1078 | /* extra ref to keep object alive while map is unlocked */ |
1079 | vm_object_reference(VME_OBJECT(found)); |
1080 | vm_map_unlock_read(map); |
1081 | return KERN_SUCCESS; |
1082 | } |
1083 | |
1084 | static bool |
1085 | shared_region_make_permanent( |
1086 | vm_shared_region_t sr, |
1087 | vm_prot_t max_prot) |
1088 | { |
1089 | if (sr->sr_cpu_type == CPU_TYPE_X86_64) { |
1090 | return false; |
1091 | } |
1092 | if (max_prot & VM_PROT_WRITE) { |
1093 | /* |
1094 | * Potentially writable mapping: no major issue with allowing |
1095 | * it to be replaced since its contents could be modified |
1096 | * anyway. |
1097 | */ |
1098 | return false; |
1099 | } |
1100 | if (max_prot & VM_PROT_EXECUTE) { |
1101 | /* |
1102 | * Potentially executable mapping: some software might want |
1103 | * to try and replace it to interpose their own code when a |
1104 | * given routine is called or returns, for example. |
1105 | * So let's not make it "permanent". |
1106 | */ |
1107 | return false; |
1108 | } |
1109 | /* |
1110 | * Make this mapping "permanent" to prevent it from being deleted |
1111 | * and/or replaced with another mapping. |
1112 | */ |
1113 | return true; |
1114 | } |
1115 | |
1116 | static bool |
1117 | shared_region_tpro_protect( |
1118 | vm_shared_region_t sr, |
1119 | vm_prot_t max_prot __unused) |
1120 | { |
1121 | if (sr->sr_cpu_type != CPU_TYPE_ARM64 || |
1122 | (sr->sr_cpu_subtype & ~CPU_SUBTYPE_MASK) != CPU_SUBTYPE_ARM64E) { |
1123 | return false; |
1124 | } |
1125 | |
1126 | |
1127 | /* |
1128 | * Unless otherwise explicitly requested all other mappings do not get |
1129 | * TPRO protection. |
1130 | */ |
1131 | return false; |
1132 | } |
1133 | |
1134 | #if __has_feature(ptrauth_calls) |
1135 | |
1136 | /* |
1137 | * Determine if this task is actually using pointer signing. |
1138 | */ |
1139 | static boolean_t |
1140 | task_sign_pointers(task_t task) |
1141 | { |
1142 | if (task->map && |
1143 | task->map->pmap && |
1144 | !task->map->pmap->disable_jop) { |
1145 | return TRUE; |
1146 | } |
1147 | return FALSE; |
1148 | } |
1149 | |
1150 | /* |
1151 | * If the shared region contains mappings that are authenticated, then |
1152 | * remap them into the task private map. |
1153 | * |
1154 | * Failures are possible in this routine when jetsam kills a process |
1155 | * just as dyld is trying to set it up. The vm_map and task shared region |
1156 | * info get torn down w/o waiting for this thread to finish up. |
1157 | */ |
1158 | __attribute__((noinline)) |
1159 | kern_return_t |
1160 | vm_shared_region_auth_remap(vm_shared_region_t sr) |
1161 | { |
1162 | memory_object_t sr_pager = MEMORY_OBJECT_NULL; |
1163 | task_t task = current_task(); |
1164 | vm_shared_region_slide_info_t si; |
1165 | uint_t i; |
1166 | vm_object_t object; |
1167 | vm_map_t sr_map; |
1168 | struct vm_map_entry tmp_entry_store = {0}; |
1169 | vm_map_entry_t tmp_entry = NULL; |
1170 | vm_map_kernel_flags_t vmk_flags; |
1171 | vm_map_offset_t map_addr; |
1172 | kern_return_t kr = KERN_SUCCESS; |
1173 | boolean_t use_ptr_auth = task_sign_pointers(task); |
1174 | |
1175 | /* |
1176 | * Don't do this more than once and avoid any race conditions in finishing it. |
1177 | */ |
1178 | vm_shared_region_lock(); |
1179 | while (sr->sr_mapping_in_progress) { |
1180 | /* wait for our turn... */ |
1181 | vm_shared_region_sleep(&sr->sr_mapping_in_progress, THREAD_UNINT); |
1182 | } |
1183 | assert(!sr->sr_mapping_in_progress); |
1184 | assert(sr->sr_ref_count > 0); |
1185 | |
1186 | /* Just return if already done. */ |
1187 | if (task->shared_region_auth_remapped) { |
1188 | vm_shared_region_unlock(); |
1189 | return KERN_SUCCESS; |
1190 | } |
1191 | |
1192 | /* let others know to wait while we're working in this shared region */ |
1193 | sr->sr_mapping_in_progress = current_thread(); |
1194 | vm_shared_region_unlock(); |
1195 | |
1196 | /* |
1197 | * Remap any sections with pointer authentications into the private map. |
1198 | */ |
1199 | for (i = 0; i < sr->sr_num_auth_section; ++i) { |
1200 | si = sr->sr_auth_section[i]; |
1201 | assert(si != NULL); |
1202 | assert(si->si_ptrauth); |
1203 | |
1204 | /* |
1205 | * We have mapping that needs to be private. |
1206 | * Look for an existing slid mapping's pager with matching |
1207 | * object, offset, slide info and shared_region_id to reuse. |
1208 | */ |
1209 | object = si->si_slide_object; |
1210 | sr_pager = shared_region_pager_match(object, si->si_start, si, |
1211 | use_ptr_auth ? task->jop_pid : 0); |
1212 | if (sr_pager == MEMORY_OBJECT_NULL) { |
1213 | printf("%s(): shared_region_pager_match() failed\n" , __func__); |
1214 | kr = KERN_FAILURE; |
1215 | goto done; |
1216 | } |
1217 | |
1218 | /* |
1219 | * verify matching jop_pid for this task and this pager |
1220 | */ |
1221 | if (use_ptr_auth) { |
1222 | shared_region_pager_match_task_key(sr_pager, task); |
1223 | } |
1224 | |
1225 | sr_map = vm_shared_region_vm_map(sr); |
1226 | tmp_entry = NULL; |
1227 | |
1228 | kr = find_mapping_to_slide(sr_map, si->si_slid_address - sr->sr_base_address, &tmp_entry_store); |
1229 | if (kr != KERN_SUCCESS) { |
1230 | printf("%s(): find_mapping_to_slide() failed\n" , __func__); |
1231 | goto done; |
1232 | } |
1233 | tmp_entry = &tmp_entry_store; |
1234 | |
1235 | /* |
1236 | * Check that the object exactly covers the region to slide. |
1237 | */ |
1238 | if (tmp_entry->vme_end - tmp_entry->vme_start != si->si_end - si->si_start) { |
1239 | printf("%s(): doesn't fully cover\n" , __func__); |
1240 | kr = KERN_FAILURE; |
1241 | goto done; |
1242 | } |
1243 | |
1244 | /* |
1245 | * map the pager over the portion of the mapping that needs sliding |
1246 | */ |
1247 | vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true); |
1248 | vmk_flags.vmkf_overwrite_immutable = true; |
1249 | vmk_flags.vmf_permanent = shared_region_make_permanent(sr, |
1250 | tmp_entry->max_protection); |
1251 | |
1252 | /* Preserve the TPRO flag if task has TPRO enabled */ |
1253 | vmk_flags.vmf_tpro = (vm_map_tpro(task->map) && |
1254 | tmp_entry->used_for_tpro && |
1255 | task_is_hardened_binary(task)); |
1256 | |
1257 | map_addr = si->si_slid_address; |
1258 | kr = vm_map_enter_mem_object(task->map, |
1259 | &map_addr, |
1260 | si->si_end - si->si_start, |
1261 | (mach_vm_offset_t) 0, |
1262 | vmk_flags, |
1263 | (ipc_port_t)(uintptr_t) sr_pager, |
1264 | 0, |
1265 | TRUE, |
1266 | tmp_entry->protection, |
1267 | tmp_entry->max_protection, |
1268 | tmp_entry->inheritance); |
1269 | memory_object_deallocate(sr_pager); |
1270 | sr_pager = MEMORY_OBJECT_NULL; |
1271 | if (kr != KERN_SUCCESS) { |
1272 | printf("%s(): vm_map_enter_mem_object() failed\n" , __func__); |
1273 | goto done; |
1274 | } |
1275 | assertf(map_addr == si->si_slid_address, |
1276 | "map_addr=0x%llx si_slid_address=0x%llx tmp_entry=%p\n" , |
1277 | (uint64_t)map_addr, |
1278 | (uint64_t)si->si_slid_address, |
1279 | tmp_entry); |
1280 | |
1281 | /* Drop the ref count grabbed by find_mapping_to_slide */ |
1282 | vm_object_deallocate(VME_OBJECT(tmp_entry)); |
1283 | tmp_entry = NULL; |
1284 | } |
1285 | |
1286 | done: |
1287 | if (tmp_entry) { |
1288 | /* Drop the ref count grabbed by find_mapping_to_slide */ |
1289 | vm_object_deallocate(VME_OBJECT(tmp_entry)); |
1290 | tmp_entry = NULL; |
1291 | } |
1292 | |
1293 | /* |
1294 | * Drop any extra reference to the pager in case we're quitting due to an error above. |
1295 | */ |
1296 | if (sr_pager != MEMORY_OBJECT_NULL) { |
1297 | memory_object_deallocate(sr_pager); |
1298 | } |
1299 | |
1300 | /* |
1301 | * Mark the region as having it's auth sections remapped. |
1302 | */ |
1303 | vm_shared_region_lock(); |
1304 | task->shared_region_auth_remapped = TRUE; |
1305 | assert(sr->sr_mapping_in_progress == current_thread()); |
1306 | sr->sr_mapping_in_progress = THREAD_NULL; |
1307 | vm_shared_region_wakeup((event_t)&sr->sr_mapping_in_progress); |
1308 | vm_shared_region_unlock(); |
1309 | return kr; |
1310 | } |
1311 | #endif /* __has_feature(ptrauth_calls) */ |
1312 | |
1313 | void |
1314 | vm_shared_region_undo_mappings( |
1315 | vm_map_t sr_map, |
1316 | mach_vm_offset_t sr_base_address, |
1317 | struct _sr_file_mappings *srf_mappings, |
1318 | struct _sr_file_mappings *srf_mappings_current, |
1319 | unsigned int srf_current_mappings_count) |
1320 | { |
1321 | unsigned int j = 0; |
1322 | vm_shared_region_t shared_region = NULL; |
1323 | boolean_t reset_shared_region_state = FALSE; |
1324 | struct _sr_file_mappings *srfmp; |
1325 | unsigned int mappings_count; |
1326 | struct shared_file_mapping_slide_np *mappings; |
1327 | |
1328 | shared_region = vm_shared_region_get(task: current_task()); |
1329 | if (shared_region == NULL) { |
1330 | printf(format: "Failed to undo mappings because of NULL shared region.\n" ); |
1331 | return; |
1332 | } |
1333 | |
1334 | shared_region->sr_first_mapping = (mach_vm_offset_t) -1; |
1335 | |
1336 | if (sr_map == NULL) { |
1337 | ipc_port_t sr_handle; |
1338 | vm_named_entry_t sr_mem_entry; |
1339 | |
1340 | vm_shared_region_lock(); |
1341 | assert(shared_region->sr_ref_count > 0); |
1342 | |
1343 | while (shared_region->sr_mapping_in_progress) { |
1344 | /* wait for our turn... */ |
1345 | vm_shared_region_sleep(&shared_region->sr_mapping_in_progress, |
1346 | THREAD_UNINT); |
1347 | } |
1348 | assert(!shared_region->sr_mapping_in_progress); |
1349 | assert(shared_region->sr_ref_count > 0); |
1350 | /* let others know we're working in this shared region */ |
1351 | shared_region->sr_mapping_in_progress = current_thread(); |
1352 | |
1353 | vm_shared_region_unlock(); |
1354 | |
1355 | reset_shared_region_state = TRUE; |
1356 | |
1357 | /* no need to lock because this data is never modified... */ |
1358 | sr_handle = shared_region->sr_mem_entry; |
1359 | sr_mem_entry = mach_memory_entry_from_port(port: sr_handle); |
1360 | sr_map = sr_mem_entry->backing.map; |
1361 | sr_base_address = shared_region->sr_base_address; |
1362 | } |
1363 | /* |
1364 | * Undo the mappings we've established so far. |
1365 | */ |
1366 | for (srfmp = &srf_mappings[0]; |
1367 | srfmp <= srf_mappings_current; |
1368 | srfmp++) { |
1369 | mappings = srfmp->mappings; |
1370 | mappings_count = srfmp->mappings_count; |
1371 | if (srfmp == srf_mappings_current) { |
1372 | mappings_count = srf_current_mappings_count; |
1373 | } |
1374 | |
1375 | for (j = 0; j < mappings_count; j++) { |
1376 | kern_return_t kr2; |
1377 | mach_vm_offset_t start, end; |
1378 | |
1379 | if (mappings[j].sms_size == 0) { |
1380 | /* |
1381 | * We didn't establish this |
1382 | * mapping, so nothing to undo. |
1383 | */ |
1384 | continue; |
1385 | } |
1386 | SHARED_REGION_TRACE_INFO( |
1387 | ("shared_region: mapping[%d]: " |
1388 | "address:0x%016llx " |
1389 | "size:0x%016llx " |
1390 | "offset:0x%016llx " |
1391 | "maxprot:0x%x prot:0x%x: " |
1392 | "undoing...\n" , |
1393 | j, |
1394 | (long long)mappings[j].sms_address, |
1395 | (long long)mappings[j].sms_size, |
1396 | (long long)mappings[j].sms_file_offset, |
1397 | mappings[j].sms_max_prot, |
1398 | mappings[j].sms_init_prot)); |
1399 | start = (mappings[j].sms_address - sr_base_address); |
1400 | end = start + mappings[j].sms_size; |
1401 | start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(sr_map)); |
1402 | end = vm_map_round_page(end, VM_MAP_PAGE_MASK(sr_map)); |
1403 | kr2 = vm_map_remove_guard(map: sr_map, |
1404 | start, |
1405 | end, |
1406 | flags: VM_MAP_REMOVE_IMMUTABLE, |
1407 | KMEM_GUARD_NONE).kmr_return; |
1408 | assert(kr2 == KERN_SUCCESS); |
1409 | } |
1410 | } |
1411 | |
1412 | if (reset_shared_region_state) { |
1413 | vm_shared_region_lock(); |
1414 | assert(shared_region->sr_ref_count > 0); |
1415 | assert(shared_region->sr_mapping_in_progress == current_thread()); |
1416 | /* we're done working on that shared region */ |
1417 | shared_region->sr_mapping_in_progress = THREAD_NULL; |
1418 | vm_shared_region_wakeup((event_t) &shared_region->sr_mapping_in_progress); |
1419 | vm_shared_region_unlock(); |
1420 | reset_shared_region_state = FALSE; |
1421 | } |
1422 | |
1423 | vm_shared_region_deallocate(shared_region); |
1424 | } |
1425 | |
1426 | /* |
1427 | * First part of vm_shared_region_map_file(). Split out to |
1428 | * avoid kernel stack overflow. |
1429 | */ |
1430 | __attribute__((noinline)) |
1431 | static kern_return_t |
1432 | vm_shared_region_map_file_setup( |
1433 | vm_shared_region_t shared_region, |
1434 | int sr_file_mappings_count, |
1435 | struct _sr_file_mappings *sr_file_mappings, |
1436 | unsigned int *mappings_to_slide_cnt, |
1437 | struct shared_file_mapping_slide_np **mappings_to_slide, |
1438 | mach_vm_offset_t *slid_mappings, |
1439 | memory_object_control_t *slid_file_controls, |
1440 | mach_vm_offset_t *sfm_min_address, |
1441 | mach_vm_offset_t *sfm_max_address, |
1442 | vm_map_t *sr_map_ptr, |
1443 | vm_map_offset_t *lowest_unnestable_addr_ptr, |
1444 | unsigned int vmsr_num_slides) |
1445 | { |
1446 | kern_return_t kr = KERN_SUCCESS; |
1447 | memory_object_control_t file_control; |
1448 | vm_object_t file_object; |
1449 | ipc_port_t sr_handle; |
1450 | vm_named_entry_t sr_mem_entry; |
1451 | vm_map_t sr_map; |
1452 | mach_vm_offset_t sr_base_address; |
1453 | unsigned int i = 0; |
1454 | mach_port_t map_port; |
1455 | vm_map_offset_t target_address; |
1456 | vm_object_t object; |
1457 | vm_object_size_t obj_size; |
1458 | vm_map_offset_t lowest_unnestable_addr = 0; |
1459 | vm_map_kernel_flags_t vmk_flags; |
1460 | mach_vm_offset_t sfm_end; |
1461 | uint32_t mappings_count; |
1462 | struct shared_file_mapping_slide_np *mappings; |
1463 | struct _sr_file_mappings *srfmp; |
1464 | |
1465 | vm_shared_region_lock(); |
1466 | assert(shared_region->sr_ref_count > 0); |
1467 | |
1468 | /* |
1469 | * Make sure we handle only one mapping at a time in a given |
1470 | * shared region, to avoid race conditions. This should not |
1471 | * happen frequently... |
1472 | */ |
1473 | while (shared_region->sr_mapping_in_progress) { |
1474 | /* wait for our turn... */ |
1475 | vm_shared_region_sleep(&shared_region->sr_mapping_in_progress, |
1476 | THREAD_UNINT); |
1477 | } |
1478 | assert(!shared_region->sr_mapping_in_progress); |
1479 | assert(shared_region->sr_ref_count > 0); |
1480 | |
1481 | |
1482 | /* let others know we're working in this shared region */ |
1483 | shared_region->sr_mapping_in_progress = current_thread(); |
1484 | |
1485 | /* |
1486 | * Did someone race in and map this shared region already? |
1487 | */ |
1488 | if (shared_region->sr_first_mapping != -1) { |
1489 | vm_shared_region_unlock(); |
1490 | #if DEVELOPMENT || DEBUG |
1491 | printf("shared_region: caught race in map and slide\n" ); |
1492 | #endif /* DEVELOPMENT || DEBUG */ |
1493 | return KERN_FAILURE; |
1494 | } |
1495 | |
1496 | vm_shared_region_unlock(); |
1497 | |
1498 | /* no need to lock because this data is never modified... */ |
1499 | sr_handle = shared_region->sr_mem_entry; |
1500 | sr_mem_entry = mach_memory_entry_from_port(port: sr_handle); |
1501 | sr_map = sr_mem_entry->backing.map; |
1502 | sr_base_address = shared_region->sr_base_address; |
1503 | |
1504 | SHARED_REGION_TRACE_DEBUG( |
1505 | ("shared_region: -> map(%p)\n" , |
1506 | (void *)VM_KERNEL_ADDRPERM(shared_region))); |
1507 | |
1508 | mappings_count = 0; |
1509 | mappings = NULL; |
1510 | srfmp = NULL; |
1511 | |
1512 | /* process all the files to be mapped */ |
1513 | for (srfmp = &sr_file_mappings[0]; |
1514 | srfmp < &sr_file_mappings[sr_file_mappings_count]; |
1515 | srfmp++) { |
1516 | mappings_count = srfmp->mappings_count; |
1517 | mappings = srfmp->mappings; |
1518 | file_control = srfmp->file_control; |
1519 | |
1520 | if (mappings_count == 0) { |
1521 | /* no mappings here... */ |
1522 | continue; |
1523 | } |
1524 | |
1525 | /* |
1526 | * The code below can only correctly "slide" (perform relocations) for one |
1527 | * value of the slide amount. So if a file has a non-zero slide, it has to |
1528 | * match any previous value. A zero slide value is ok for things that are |
1529 | * just directly mapped. |
1530 | */ |
1531 | if (shared_region->sr_slide == 0 && srfmp->slide != 0) { |
1532 | shared_region->sr_slide = srfmp->slide; |
1533 | } else if (shared_region->sr_slide != 0 && |
1534 | srfmp->slide != 0 && |
1535 | shared_region->sr_slide != srfmp->slide) { |
1536 | SHARED_REGION_TRACE_ERROR( |
1537 | ("shared_region: more than 1 non-zero slide value amount " |
1538 | "slide 1:0x%x slide 2:0x%x\n " , |
1539 | shared_region->sr_slide, srfmp->slide)); |
1540 | kr = KERN_INVALID_ARGUMENT; |
1541 | break; |
1542 | } |
1543 | |
1544 | #if __arm64__ |
1545 | if ((shared_region->sr_64bit || |
1546 | page_shift_user32 == SIXTEENK_PAGE_SHIFT) && |
1547 | ((srfmp->slide & SIXTEENK_PAGE_MASK) != 0)) { |
1548 | printf(format: "FOURK_COMPAT: %s: rejecting mis-aligned slide 0x%x\n" , |
1549 | __FUNCTION__, srfmp->slide); |
1550 | kr = KERN_INVALID_ARGUMENT; |
1551 | break; |
1552 | } |
1553 | #endif /* __arm64__ */ |
1554 | |
1555 | /* |
1556 | * An FD of -1 means we need to copyin the data to an anonymous object. |
1557 | */ |
1558 | if (srfmp->fd == -1) { |
1559 | assert(mappings_count == 1); |
1560 | SHARED_REGION_TRACE_INFO( |
1561 | ("shared_region: mapping[0]: " |
1562 | "address:0x%016llx size:0x%016llx offset/addr:0x%016llx " |
1563 | "maxprot:0x%x prot:0x%x fd==-1\n" , |
1564 | (long long)mappings[0].sms_address, |
1565 | (long long)mappings[0].sms_size, |
1566 | (long long)mappings[0].sms_file_offset, |
1567 | mappings[0].sms_max_prot, |
1568 | mappings[0].sms_init_prot)); |
1569 | |
1570 | /* |
1571 | * We need an anon object to hold the data in the shared region. |
1572 | * The size needs to be suitable to map into kernel. |
1573 | */ |
1574 | obj_size = vm_object_round_page(mappings->sms_size); |
1575 | object = vm_object_allocate(size: obj_size); |
1576 | if (object == VM_OBJECT_NULL) { |
1577 | printf(format: "%s(): for fd==-1 vm_object_allocate() failed\n" , __func__); |
1578 | kr = KERN_RESOURCE_SHORTAGE; |
1579 | break; |
1580 | } |
1581 | |
1582 | /* |
1583 | * map the object into the kernel |
1584 | */ |
1585 | vm_map_offset_t kaddr = 0; |
1586 | vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE(); |
1587 | vmk_flags.vmkf_no_copy_on_read = 1; |
1588 | vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA; |
1589 | |
1590 | kr = vm_map_enter(map: kernel_map, |
1591 | address: &kaddr, |
1592 | size: obj_size, |
1593 | mask: 0, |
1594 | vmk_flags, |
1595 | object, |
1596 | offset: 0, |
1597 | FALSE, |
1598 | cur_protection: (VM_PROT_READ | VM_PROT_WRITE), |
1599 | max_protection: (VM_PROT_READ | VM_PROT_WRITE), |
1600 | VM_INHERIT_NONE); |
1601 | if (kr != KERN_SUCCESS) { |
1602 | printf(format: "%s(): for fd==-1 vm_map_enter() in kernel failed\n" , __func__); |
1603 | vm_object_deallocate(object); |
1604 | object = VM_OBJECT_NULL; |
1605 | break; |
1606 | } |
1607 | |
1608 | /* |
1609 | * We'll need another reference to keep the object alive after |
1610 | * we vm_map_remove() it from the kernel. |
1611 | */ |
1612 | vm_object_reference(object); |
1613 | |
1614 | /* |
1615 | * Zero out the object's pages, so we can't leak data. |
1616 | */ |
1617 | bzero(s: (void *)kaddr, n: obj_size); |
1618 | |
1619 | /* |
1620 | * Copyin the data from dyld to the new object. |
1621 | * Then remove the kernel mapping. |
1622 | */ |
1623 | int copyin_err = |
1624 | copyin((user_addr_t)mappings->sms_file_offset, (void *)kaddr, mappings->sms_size); |
1625 | vm_map_remove(map: kernel_map, start: kaddr, end: kaddr + obj_size); |
1626 | if (copyin_err) { |
1627 | printf(format: "%s(): for fd==-1 copyin() failed, errno=%d\n" , __func__, copyin_err); |
1628 | switch (copyin_err) { |
1629 | case EPERM: |
1630 | case EACCES: |
1631 | kr = KERN_PROTECTION_FAILURE; |
1632 | break; |
1633 | case EFAULT: |
1634 | kr = KERN_INVALID_ADDRESS; |
1635 | break; |
1636 | default: |
1637 | kr = KERN_FAILURE; |
1638 | break; |
1639 | } |
1640 | vm_object_deallocate(object); |
1641 | object = VM_OBJECT_NULL; |
1642 | break; |
1643 | } |
1644 | |
1645 | /* |
1646 | * Finally map the object into the shared region. |
1647 | */ |
1648 | target_address = (vm_map_offset_t)(mappings[0].sms_address - sr_base_address); |
1649 | vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(); |
1650 | vmk_flags.vmkf_already = TRUE; |
1651 | vmk_flags.vmkf_no_copy_on_read = 1; |
1652 | vmk_flags.vmf_permanent = shared_region_make_permanent(sr: shared_region, |
1653 | max_prot: mappings[0].sms_max_prot); |
1654 | |
1655 | kr = vm_map_enter( |
1656 | map: sr_map, |
1657 | address: &target_address, |
1658 | vm_map_round_page(mappings[0].sms_size, VM_MAP_PAGE_MASK(sr_map)), |
1659 | mask: 0, |
1660 | vmk_flags, |
1661 | object, |
1662 | offset: 0, |
1663 | TRUE, |
1664 | cur_protection: mappings[0].sms_init_prot & VM_PROT_ALL, |
1665 | max_protection: mappings[0].sms_max_prot & VM_PROT_ALL, |
1666 | VM_INHERIT_DEFAULT); |
1667 | if (kr != KERN_SUCCESS) { |
1668 | printf(format: "%s(): for fd==-1 vm_map_enter() in SR failed\n" , __func__); |
1669 | vm_object_deallocate(object); |
1670 | break; |
1671 | } |
1672 | |
1673 | if (mappings[0].sms_address < *sfm_min_address) { |
1674 | *sfm_min_address = mappings[0].sms_address; |
1675 | } |
1676 | |
1677 | if (os_add_overflow(mappings[0].sms_address, |
1678 | mappings[0].sms_size, |
1679 | &sfm_end) || |
1680 | (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) < |
1681 | mappings[0].sms_address)) { |
1682 | /* overflow */ |
1683 | kr = KERN_INVALID_ARGUMENT; |
1684 | break; |
1685 | } |
1686 | |
1687 | if (sfm_end > *sfm_max_address) { |
1688 | *sfm_max_address = sfm_end; |
1689 | } |
1690 | |
1691 | continue; |
1692 | } |
1693 | |
1694 | /* get the VM object associated with the file to be mapped */ |
1695 | file_object = memory_object_control_to_vm_object(control: file_control); |
1696 | assert(file_object); |
1697 | |
1698 | if (!file_object->object_is_shared_cache) { |
1699 | vm_object_lock(file_object); |
1700 | file_object->object_is_shared_cache = true; |
1701 | vm_object_unlock(file_object); |
1702 | } |
1703 | |
1704 | #if CONFIG_SECLUDED_MEMORY |
1705 | /* |
1706 | * Camera will need the shared cache, so don't put the pages |
1707 | * on the secluded queue, assume that's the primary region. |
1708 | * Also keep DEXT shared cache pages off secluded. |
1709 | */ |
1710 | if (primary_system_shared_region == NULL || |
1711 | primary_system_shared_region == shared_region || |
1712 | shared_region->sr_driverkit) { |
1713 | memory_object_mark_eligible_for_secluded(file_control, FALSE); |
1714 | } |
1715 | #endif /* CONFIG_SECLUDED_MEMORY */ |
1716 | |
1717 | /* establish the mappings for that file */ |
1718 | for (i = 0; i < mappings_count; i++) { |
1719 | SHARED_REGION_TRACE_INFO( |
1720 | ("shared_region: mapping[%d]: " |
1721 | "address:0x%016llx size:0x%016llx offset:0x%016llx " |
1722 | "maxprot:0x%x prot:0x%x\n" , |
1723 | i, |
1724 | (long long)mappings[i].sms_address, |
1725 | (long long)mappings[i].sms_size, |
1726 | (long long)mappings[i].sms_file_offset, |
1727 | mappings[i].sms_max_prot, |
1728 | mappings[i].sms_init_prot)); |
1729 | |
1730 | if (mappings[i].sms_address < *sfm_min_address) { |
1731 | *sfm_min_address = mappings[i].sms_address; |
1732 | } |
1733 | |
1734 | if (os_add_overflow(mappings[i].sms_address, |
1735 | mappings[i].sms_size, |
1736 | &sfm_end) || |
1737 | (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) < |
1738 | mappings[i].sms_address)) { |
1739 | /* overflow */ |
1740 | kr = KERN_INVALID_ARGUMENT; |
1741 | break; |
1742 | } |
1743 | |
1744 | if (sfm_end > *sfm_max_address) { |
1745 | *sfm_max_address = sfm_end; |
1746 | } |
1747 | |
1748 | if (mappings[i].sms_init_prot & VM_PROT_ZF) { |
1749 | /* zero-filled memory */ |
1750 | map_port = MACH_PORT_NULL; |
1751 | } else { |
1752 | /* file-backed memory */ |
1753 | __IGNORE_WCASTALIGN(map_port = (ipc_port_t) file_object->pager); |
1754 | } |
1755 | |
1756 | /* |
1757 | * Remember which mappings need sliding. |
1758 | */ |
1759 | if (mappings[i].sms_max_prot & VM_PROT_SLIDE) { |
1760 | if (*mappings_to_slide_cnt == vmsr_num_slides) { |
1761 | SHARED_REGION_TRACE_INFO( |
1762 | ("shared_region: mapping[%d]: " |
1763 | "address:0x%016llx size:0x%016llx " |
1764 | "offset:0x%016llx " |
1765 | "maxprot:0x%x prot:0x%x " |
1766 | "too many mappings to slide...\n" , |
1767 | i, |
1768 | (long long)mappings[i].sms_address, |
1769 | (long long)mappings[i].sms_size, |
1770 | (long long)mappings[i].sms_file_offset, |
1771 | mappings[i].sms_max_prot, |
1772 | mappings[i].sms_init_prot)); |
1773 | } else { |
1774 | mappings_to_slide[*mappings_to_slide_cnt] = &mappings[i]; |
1775 | *mappings_to_slide_cnt += 1; |
1776 | } |
1777 | } |
1778 | |
1779 | /* mapping's address is relative to the shared region base */ |
1780 | target_address = (vm_map_offset_t)(mappings[i].sms_address - sr_base_address); |
1781 | |
1782 | vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(); |
1783 | vmk_flags.vmkf_already = TRUE; |
1784 | /* no copy-on-read for mapped binaries */ |
1785 | vmk_flags.vmkf_no_copy_on_read = 1; |
1786 | vmk_flags.vmf_permanent = shared_region_make_permanent( |
1787 | sr: shared_region, |
1788 | max_prot: mappings[i].sms_max_prot); |
1789 | vmk_flags.vmf_tpro = shared_region_tpro_protect( |
1790 | sr: shared_region, |
1791 | max_prot: mappings[i].sms_max_prot); |
1792 | |
1793 | /* establish that mapping, OK if it's "already" there */ |
1794 | if (map_port == MACH_PORT_NULL) { |
1795 | /* |
1796 | * We want to map some anonymous memory in a shared region. |
1797 | * We have to create the VM object now, so that it can be mapped "copy-on-write". |
1798 | */ |
1799 | obj_size = vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)); |
1800 | object = vm_object_allocate(size: obj_size); |
1801 | if (object == VM_OBJECT_NULL) { |
1802 | kr = KERN_RESOURCE_SHORTAGE; |
1803 | } else { |
1804 | kr = vm_map_enter( |
1805 | map: sr_map, |
1806 | address: &target_address, |
1807 | vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)), |
1808 | mask: 0, |
1809 | vmk_flags, |
1810 | object, |
1811 | offset: 0, |
1812 | TRUE, |
1813 | cur_protection: mappings[i].sms_init_prot & VM_PROT_ALL, |
1814 | max_protection: mappings[i].sms_max_prot & VM_PROT_ALL, |
1815 | VM_INHERIT_DEFAULT); |
1816 | } |
1817 | } else { |
1818 | object = VM_OBJECT_NULL; /* no anonymous memory here */ |
1819 | kr = vm_map_enter_mem_object( |
1820 | map: sr_map, |
1821 | address: &target_address, |
1822 | vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)), |
1823 | mask: 0, |
1824 | vmk_flags, |
1825 | port: map_port, |
1826 | offset: mappings[i].sms_file_offset, |
1827 | TRUE, |
1828 | cur_protection: mappings[i].sms_init_prot & VM_PROT_ALL, |
1829 | max_protection: mappings[i].sms_max_prot & VM_PROT_ALL, |
1830 | VM_INHERIT_DEFAULT); |
1831 | } |
1832 | |
1833 | if (kr == KERN_SUCCESS) { |
1834 | /* |
1835 | * Record the first successful mapping(s) in the shared |
1836 | * region by file. We're protected by "sr_mapping_in_progress" |
1837 | * here, so no need to lock "shared_region". |
1838 | * |
1839 | * Note that if we have an AOT shared cache (ARM) for a |
1840 | * translated task, then it's always the first file. |
1841 | * The original "native" (i.e. x86) shared cache is the |
1842 | * second file. |
1843 | */ |
1844 | |
1845 | if (shared_region->sr_first_mapping == (mach_vm_offset_t)-1) { |
1846 | shared_region->sr_first_mapping = target_address; |
1847 | } |
1848 | |
1849 | if (*mappings_to_slide_cnt > 0 && |
1850 | mappings_to_slide[*mappings_to_slide_cnt - 1] == &mappings[i]) { |
1851 | slid_mappings[*mappings_to_slide_cnt - 1] = target_address; |
1852 | slid_file_controls[*mappings_to_slide_cnt - 1] = file_control; |
1853 | } |
1854 | |
1855 | /* |
1856 | * Record the lowest writable address in this |
1857 | * sub map, to log any unexpected unnesting below |
1858 | * that address (see log_unnest_badness()). |
1859 | */ |
1860 | if ((mappings[i].sms_init_prot & VM_PROT_WRITE) && |
1861 | sr_map->is_nested_map && |
1862 | (lowest_unnestable_addr == 0 || |
1863 | (target_address < lowest_unnestable_addr))) { |
1864 | lowest_unnestable_addr = target_address; |
1865 | } |
1866 | } else { |
1867 | if (map_port == MACH_PORT_NULL) { |
1868 | /* |
1869 | * Get rid of the VM object we just created |
1870 | * but failed to map. |
1871 | */ |
1872 | vm_object_deallocate(object); |
1873 | object = VM_OBJECT_NULL; |
1874 | } |
1875 | if (kr == KERN_MEMORY_PRESENT) { |
1876 | /* |
1877 | * This exact mapping was already there: |
1878 | * that's fine. |
1879 | */ |
1880 | SHARED_REGION_TRACE_INFO( |
1881 | ("shared_region: mapping[%d]: " |
1882 | "address:0x%016llx size:0x%016llx " |
1883 | "offset:0x%016llx " |
1884 | "maxprot:0x%x prot:0x%x " |
1885 | "already mapped...\n" , |
1886 | i, |
1887 | (long long)mappings[i].sms_address, |
1888 | (long long)mappings[i].sms_size, |
1889 | (long long)mappings[i].sms_file_offset, |
1890 | mappings[i].sms_max_prot, |
1891 | mappings[i].sms_init_prot)); |
1892 | /* |
1893 | * We didn't establish this mapping ourselves; |
1894 | * let's reset its size, so that we do not |
1895 | * attempt to undo it if an error occurs later. |
1896 | */ |
1897 | mappings[i].sms_size = 0; |
1898 | kr = KERN_SUCCESS; |
1899 | } else { |
1900 | break; |
1901 | } |
1902 | } |
1903 | } |
1904 | |
1905 | if (kr != KERN_SUCCESS) { |
1906 | break; |
1907 | } |
1908 | } |
1909 | |
1910 | if (kr != KERN_SUCCESS) { |
1911 | /* the last mapping we tried (mappings[i]) failed ! */ |
1912 | assert(i < mappings_count); |
1913 | SHARED_REGION_TRACE_ERROR( |
1914 | ("shared_region: mapping[%d]: " |
1915 | "address:0x%016llx size:0x%016llx " |
1916 | "offset:0x%016llx " |
1917 | "maxprot:0x%x prot:0x%x failed 0x%x\n" , |
1918 | i, |
1919 | (long long)mappings[i].sms_address, |
1920 | (long long)mappings[i].sms_size, |
1921 | (long long)mappings[i].sms_file_offset, |
1922 | mappings[i].sms_max_prot, |
1923 | mappings[i].sms_init_prot, |
1924 | kr)); |
1925 | |
1926 | /* |
1927 | * Respect the design of vm_shared_region_undo_mappings |
1928 | * as we are holding the sr_mapping_in_progress here. |
1929 | * So don't allow sr_map == NULL otherwise vm_shared_region_undo_mappings |
1930 | * will be blocked at waiting sr_mapping_in_progress to be NULL. |
1931 | */ |
1932 | assert(sr_map != NULL); |
1933 | /* undo all the previous mappings */ |
1934 | vm_shared_region_undo_mappings(sr_map, sr_base_address, srf_mappings: sr_file_mappings, srf_mappings_current: srfmp, srf_current_mappings_count: i); |
1935 | return kr; |
1936 | } |
1937 | |
1938 | *lowest_unnestable_addr_ptr = lowest_unnestable_addr; |
1939 | *sr_map_ptr = sr_map; |
1940 | return KERN_SUCCESS; |
1941 | } |
1942 | |
1943 | /* forwared declaration */ |
1944 | __attribute__((noinline)) |
1945 | static void |
1946 | vm_shared_region_map_file_final( |
1947 | vm_shared_region_t shared_region, |
1948 | vm_map_t sr_map, |
1949 | mach_vm_offset_t sfm_min_address, |
1950 | mach_vm_offset_t sfm_max_address); |
1951 | |
1952 | /* |
1953 | * Establish some mappings of a file in the shared region. |
1954 | * This is used by "dyld" via the shared_region_map_np() system call |
1955 | * to populate the shared region with the appropriate shared cache. |
1956 | * |
1957 | * One could also call it several times to incrementally load several |
1958 | * libraries, as long as they do not overlap. |
1959 | * It will return KERN_SUCCESS if the mappings were successfully established |
1960 | * or if they were already established identically by another process. |
1961 | */ |
1962 | __attribute__((noinline)) |
1963 | kern_return_t |
1964 | vm_shared_region_map_file( |
1965 | vm_shared_region_t shared_region, |
1966 | int sr_file_mappings_count, |
1967 | struct _sr_file_mappings *sr_file_mappings) |
1968 | { |
1969 | kern_return_t kr = KERN_SUCCESS; |
1970 | unsigned int i; |
1971 | unsigned int mappings_to_slide_cnt = 0; |
1972 | mach_vm_offset_t sfm_min_address = (mach_vm_offset_t)-1; |
1973 | mach_vm_offset_t sfm_max_address = 0; |
1974 | vm_map_t sr_map = NULL; |
1975 | vm_map_offset_t lowest_unnestable_addr = 0; |
1976 | unsigned int vmsr_num_slides = 0; |
1977 | typedef mach_vm_offset_t slid_mappings_t __kernel_data_semantics; |
1978 | slid_mappings_t *slid_mappings = NULL; /* [0..vmsr_num_slides] */ |
1979 | memory_object_control_t *slid_file_controls = NULL; /* [0..vmsr_num_slides] */ |
1980 | struct shared_file_mapping_slide_np **mappings_to_slide = NULL; /* [0..vmsr_num_slides] */ |
1981 | struct _sr_file_mappings *srfmp; |
1982 | |
1983 | /* |
1984 | * Figure out how many of the mappings have slides. |
1985 | */ |
1986 | for (srfmp = &sr_file_mappings[0]; |
1987 | srfmp < &sr_file_mappings[sr_file_mappings_count]; |
1988 | srfmp++) { |
1989 | for (i = 0; i < srfmp->mappings_count; ++i) { |
1990 | if (srfmp->mappings[i].sms_max_prot & VM_PROT_SLIDE) { |
1991 | ++vmsr_num_slides; |
1992 | } |
1993 | } |
1994 | } |
1995 | |
1996 | /* Allocate per slide data structures */ |
1997 | if (vmsr_num_slides > 0) { |
1998 | slid_mappings = |
1999 | kalloc_data(vmsr_num_slides * sizeof(*slid_mappings), Z_WAITOK); |
2000 | slid_file_controls = |
2001 | kalloc_type(memory_object_control_t, vmsr_num_slides, Z_WAITOK); |
2002 | mappings_to_slide = |
2003 | kalloc_type(struct shared_file_mapping_slide_np *, vmsr_num_slides, Z_WAITOK | Z_ZERO); |
2004 | } |
2005 | |
2006 | kr = vm_shared_region_map_file_setup(shared_region, sr_file_mappings_count, sr_file_mappings, |
2007 | mappings_to_slide_cnt: &mappings_to_slide_cnt, mappings_to_slide, slid_mappings, slid_file_controls, |
2008 | sfm_min_address: &sfm_min_address, sfm_max_address: &sfm_max_address, sr_map_ptr: &sr_map, lowest_unnestable_addr_ptr: &lowest_unnestable_addr, vmsr_num_slides); |
2009 | if (kr != KERN_SUCCESS) { |
2010 | vm_shared_region_lock(); |
2011 | goto done; |
2012 | } |
2013 | assert(vmsr_num_slides == mappings_to_slide_cnt); |
2014 | |
2015 | /* |
2016 | * The call above installed direct mappings to the shared cache file. |
2017 | * Now we go back and overwrite the mappings that need relocation |
2018 | * with a special shared region pager. |
2019 | * |
2020 | * Note that this does copyin() of data, needed by the pager, which |
2021 | * the previous code just established mappings for. This is why we |
2022 | * do it in a separate pass. |
2023 | */ |
2024 | #if __has_feature(ptrauth_calls) |
2025 | /* |
2026 | * need to allocate storage needed for any sr_auth_sections |
2027 | */ |
2028 | for (i = 0; i < mappings_to_slide_cnt; ++i) { |
2029 | if (shared_region->sr_cpu_type == CPU_TYPE_ARM64 && |
2030 | shared_region->sr_cpu_subtype == CPU_SUBTYPE_ARM64E && |
2031 | !(mappings_to_slide[i]->sms_max_prot & VM_PROT_NOAUTH)) { |
2032 | ++shared_region->sr_num_auth_section; |
2033 | } |
2034 | } |
2035 | if (shared_region->sr_num_auth_section > 0) { |
2036 | shared_region->sr_auth_section = |
2037 | kalloc_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section, |
2038 | Z_WAITOK | Z_ZERO); |
2039 | } |
2040 | #endif /* __has_feature(ptrauth_calls) */ |
2041 | for (i = 0; i < mappings_to_slide_cnt; ++i) { |
2042 | kr = vm_shared_region_slide(shared_region->sr_slide, |
2043 | mappings_to_slide[i]->sms_file_offset, |
2044 | mappings_to_slide[i]->sms_size, |
2045 | mappings_to_slide[i]->sms_slide_start, |
2046 | mappings_to_slide[i]->sms_slide_size, |
2047 | slid_mappings[i], |
2048 | slid_file_controls[i], |
2049 | mappings_to_slide[i]->sms_max_prot); |
2050 | if (kr != KERN_SUCCESS) { |
2051 | SHARED_REGION_TRACE_ERROR( |
2052 | ("shared_region: region_slide(" |
2053 | "slide:0x%x start:0x%016llx " |
2054 | "size:0x%016llx) failed 0x%x\n" , |
2055 | shared_region->sr_slide, |
2056 | (long long)mappings_to_slide[i]->sms_slide_start, |
2057 | (long long)mappings_to_slide[i]->sms_slide_size, |
2058 | kr)); |
2059 | vm_shared_region_undo_mappings(sr_map, sr_base_address: shared_region->sr_base_address, |
2060 | srf_mappings: &sr_file_mappings[0], |
2061 | srf_mappings_current: &sr_file_mappings[sr_file_mappings_count - 1], |
2062 | srf_current_mappings_count: sr_file_mappings_count); |
2063 | vm_shared_region_lock(); |
2064 | goto done; |
2065 | } |
2066 | } |
2067 | |
2068 | assert(kr == KERN_SUCCESS); |
2069 | |
2070 | /* adjust the map's "lowest_unnestable_start" */ |
2071 | lowest_unnestable_addr &= ~(pmap_shared_region_size_min(map: sr_map->pmap) - 1); |
2072 | if (lowest_unnestable_addr != sr_map->lowest_unnestable_start) { |
2073 | vm_map_lock(sr_map); |
2074 | sr_map->lowest_unnestable_start = lowest_unnestable_addr; |
2075 | vm_map_unlock(sr_map); |
2076 | } |
2077 | |
2078 | vm_shared_region_lock(); |
2079 | assert(shared_region->sr_ref_count > 0); |
2080 | assert(shared_region->sr_mapping_in_progress == current_thread()); |
2081 | |
2082 | vm_shared_region_map_file_final(shared_region, sr_map, sfm_min_address, sfm_max_address); |
2083 | |
2084 | done: |
2085 | /* |
2086 | * We're done working on that shared region. |
2087 | * Wake up any waiting threads. |
2088 | */ |
2089 | assert(shared_region->sr_mapping_in_progress == current_thread()); |
2090 | shared_region->sr_mapping_in_progress = THREAD_NULL; |
2091 | vm_shared_region_wakeup((event_t) &shared_region->sr_mapping_in_progress); |
2092 | vm_shared_region_unlock(); |
2093 | |
2094 | #if __has_feature(ptrauth_calls) |
2095 | if (kr == KERN_SUCCESS) { |
2096 | /* |
2097 | * Since authenticated mappings were just added to the shared region, |
2098 | * go back and remap them into private mappings for this task. |
2099 | */ |
2100 | kr = vm_shared_region_auth_remap(shared_region); |
2101 | } |
2102 | #endif /* __has_feature(ptrauth_calls) */ |
2103 | |
2104 | /* Cache shared region info needed for telemetry in the task */ |
2105 | task_t task; |
2106 | if (kr == KERN_SUCCESS && (task = current_task())->task_shared_region_slide == -1) { |
2107 | mach_vm_offset_t start_address; |
2108 | (void)vm_shared_region_start_address(shared_region, start_address: &start_address, task); |
2109 | } |
2110 | |
2111 | SHARED_REGION_TRACE_DEBUG( |
2112 | ("shared_region: map(%p) <- 0x%x \n" , |
2113 | (void *)VM_KERNEL_ADDRPERM(shared_region), kr)); |
2114 | if (vmsr_num_slides > 0) { |
2115 | kfree_data(slid_mappings, vmsr_num_slides * sizeof(*slid_mappings)); |
2116 | kfree_type(memory_object_control_t, vmsr_num_slides, slid_file_controls); |
2117 | kfree_type(struct shared_file_mapping_slide_np *, vmsr_num_slides, |
2118 | mappings_to_slide); |
2119 | } |
2120 | return kr; |
2121 | } |
2122 | |
2123 | /* |
2124 | * Final part of vm_shared_region_map_file(). |
2125 | * Kept in separate function to avoid blowing out the stack. |
2126 | */ |
2127 | __attribute__((noinline)) |
2128 | static void |
2129 | vm_shared_region_map_file_final( |
2130 | vm_shared_region_t shared_region, |
2131 | vm_map_t sr_map __unused, |
2132 | mach_vm_offset_t sfm_min_address __unused, |
2133 | mach_vm_offset_t sfm_max_address __unused) |
2134 | { |
2135 | struct _dyld_cache_header ; |
2136 | int error; |
2137 | size_t image_array_length; |
2138 | struct _dyld_cache_image_text_info *sr_image_layout; |
2139 | boolean_t locally_built = FALSE; |
2140 | |
2141 | |
2142 | /* |
2143 | * copy in the shared region UUID to the shared region structure. |
2144 | * we do this indirectly by first copying in the shared cache header |
2145 | * and then copying the UUID from there because we'll need to look |
2146 | * at other content from the shared cache header. |
2147 | */ |
2148 | if (!shared_region->sr_uuid_copied) { |
2149 | error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping), |
2150 | (char *)&sr_cache_header, |
2151 | sizeof(sr_cache_header)); |
2152 | if (error == 0) { |
2153 | memcpy(dst: &shared_region->sr_uuid, src: &sr_cache_header.uuid, n: sizeof(shared_region->sr_uuid)); |
2154 | shared_region->sr_uuid_copied = TRUE; |
2155 | locally_built = sr_cache_header.locallyBuiltCache; |
2156 | } else { |
2157 | #if DEVELOPMENT || DEBUG |
2158 | panic("shared_region: copyin shared_cache_header(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx " |
2159 | "offset:0 size:0x%016llx) failed with %d\n" , |
2160 | (long long)shared_region->sr_base_address, |
2161 | (long long)shared_region->sr_first_mapping, |
2162 | (long long)sizeof(sr_cache_header), |
2163 | error); |
2164 | #endif /* DEVELOPMENT || DEBUG */ |
2165 | shared_region->sr_uuid_copied = FALSE; |
2166 | } |
2167 | } |
2168 | |
2169 | /* |
2170 | * We save a pointer to the shared cache mapped by the "init task", i.e. launchd. This is used by |
2171 | * the stackshot code to reduce output size in the common case that everything maps the same shared cache. |
2172 | * One gotcha is that "userspace reboots" can occur which can cause a new shared region to be the primary |
2173 | * region. In that case, launchd re-exec's itself, so we may go through this path multiple times. We |
2174 | * let the most recent one win. |
2175 | * |
2176 | * Check whether the shared cache is a custom built one and copy in the shared cache layout accordingly. |
2177 | */ |
2178 | bool is_init_task = (task_pid(task: current_task()) == 1); |
2179 | if (shared_region->sr_uuid_copied && is_init_task) { |
2180 | /* Copy in the shared cache layout if we're running with a locally built shared cache */ |
2181 | if (locally_built) { |
2182 | KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_START); |
2183 | image_array_length = (size_t)(sr_cache_header.imagesTextCount * sizeof(struct _dyld_cache_image_text_info)); |
2184 | sr_image_layout = kalloc_data(image_array_length, Z_WAITOK); |
2185 | error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping + |
2186 | sr_cache_header.imagesTextOffset), (char *)sr_image_layout, image_array_length); |
2187 | if (error == 0) { |
2188 | if (sr_cache_header.imagesTextCount >= UINT32_MAX) { |
2189 | panic("shared_region: sr_cache_header.imagesTextCount >= UINT32_MAX" ); |
2190 | } |
2191 | shared_region->sr_images = kalloc_data((vm_size_t)(sr_cache_header.imagesTextCount * sizeof(struct dyld_uuid_info_64)), Z_WAITOK); |
2192 | for (size_t index = 0; index < sr_cache_header.imagesTextCount; index++) { |
2193 | memcpy(dst: (char *)&shared_region->sr_images[index].imageUUID, src: (char *)&sr_image_layout[index].uuid, |
2194 | n: sizeof(shared_region->sr_images[index].imageUUID)); |
2195 | shared_region->sr_images[index].imageLoadAddress = sr_image_layout[index].loadAddress; |
2196 | } |
2197 | |
2198 | shared_region->sr_images_count = (uint32_t) sr_cache_header.imagesTextCount; |
2199 | } else { |
2200 | #if DEVELOPMENT || DEBUG |
2201 | panic("shared_region: copyin shared_cache_layout(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx " |
2202 | "offset:0x%016llx size:0x%016llx) failed with %d\n" , |
2203 | (long long)shared_region->sr_base_address, |
2204 | (long long)shared_region->sr_first_mapping, |
2205 | (long long)sr_cache_header.imagesTextOffset, |
2206 | (long long)image_array_length, |
2207 | error); |
2208 | #endif /* DEVELOPMENT || DEBUG */ |
2209 | } |
2210 | KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_END, shared_region->sr_images_count); |
2211 | kfree_data(sr_image_layout, image_array_length); |
2212 | sr_image_layout = NULL; |
2213 | } |
2214 | primary_system_shared_region = shared_region; |
2215 | } |
2216 | |
2217 | /* |
2218 | * If we succeeded, we know the bounds of the shared region. |
2219 | * Trim our pmaps to only cover this range (if applicable to |
2220 | * this platform). |
2221 | */ |
2222 | if (VM_MAP_PAGE_SHIFT(current_map()) == VM_MAP_PAGE_SHIFT(map: sr_map)) { |
2223 | pmap_trim(current_map()->pmap, subord: sr_map->pmap, vstart: sfm_min_address, size: sfm_max_address - sfm_min_address); |
2224 | } |
2225 | } |
2226 | |
2227 | /* |
2228 | * Retrieve a task's shared region and grab an extra reference to |
2229 | * make sure it doesn't disappear while the caller is using it. |
2230 | * The caller is responsible for consuming that extra reference if |
2231 | * necessary. |
2232 | * |
2233 | * This also tries to trim the pmap for the shared region. |
2234 | */ |
2235 | vm_shared_region_t |
2236 | vm_shared_region_trim_and_get(task_t task) |
2237 | { |
2238 | vm_shared_region_t shared_region; |
2239 | ipc_port_t sr_handle; |
2240 | vm_named_entry_t sr_mem_entry; |
2241 | vm_map_t sr_map; |
2242 | |
2243 | /* Get the shared region and the map. */ |
2244 | shared_region = vm_shared_region_get(task); |
2245 | if (shared_region == NULL) { |
2246 | return NULL; |
2247 | } |
2248 | |
2249 | sr_handle = shared_region->sr_mem_entry; |
2250 | sr_mem_entry = mach_memory_entry_from_port(port: sr_handle); |
2251 | sr_map = sr_mem_entry->backing.map; |
2252 | |
2253 | /* Trim the pmap if possible. */ |
2254 | if (VM_MAP_PAGE_SHIFT(map: task->map) == VM_MAP_PAGE_SHIFT(map: sr_map)) { |
2255 | pmap_trim(grand: task->map->pmap, subord: sr_map->pmap, vstart: 0, size: 0); |
2256 | } |
2257 | |
2258 | return shared_region; |
2259 | } |
2260 | |
2261 | /* |
2262 | * Enter the appropriate shared region into "map" for "task". |
2263 | * This involves looking up the shared region (and possibly creating a new |
2264 | * one) for the desired environment, then mapping the VM sub map into the |
2265 | * task's VM "map", with the appropriate level of pmap-nesting. |
2266 | */ |
2267 | kern_return_t |
2268 | vm_shared_region_enter( |
2269 | struct _vm_map *map, |
2270 | struct task *task, |
2271 | boolean_t is_64bit, |
2272 | void *fsroot, |
2273 | cpu_type_t cpu, |
2274 | cpu_subtype_t cpu_subtype, |
2275 | boolean_t reslide, |
2276 | boolean_t is_driverkit, |
2277 | uint32_t rsr_version) |
2278 | { |
2279 | kern_return_t kr; |
2280 | vm_shared_region_t shared_region; |
2281 | vm_map_offset_t sr_address, sr_offset, target_address; |
2282 | vm_map_size_t sr_size, mapping_size; |
2283 | vm_map_offset_t sr_pmap_nesting_start; |
2284 | vm_map_size_t sr_pmap_nesting_size; |
2285 | ipc_port_t sr_handle; |
2286 | vm_prot_t cur_prot, max_prot; |
2287 | vm_map_kernel_flags_t vmk_flags; |
2288 | |
2289 | SHARED_REGION_TRACE_DEBUG( |
2290 | ("shared_region: -> " |
2291 | "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n" , |
2292 | (void *)VM_KERNEL_ADDRPERM(map), |
2293 | (void *)VM_KERNEL_ADDRPERM(task), |
2294 | (void *)VM_KERNEL_ADDRPERM(fsroot), |
2295 | cpu, cpu_subtype, is_64bit, is_driverkit)); |
2296 | |
2297 | /* lookup (create if needed) the shared region for this environment */ |
2298 | shared_region = vm_shared_region_lookup(root_dir: fsroot, cputype: cpu, cpu_subtype, is_64bit, target_page_shift: VM_MAP_PAGE_SHIFT(map), reslide, is_driverkit, rsr_version); |
2299 | if (shared_region == NULL) { |
2300 | /* this should not happen ! */ |
2301 | SHARED_REGION_TRACE_ERROR( |
2302 | ("shared_region: -> " |
2303 | "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d): " |
2304 | "lookup failed !\n" , |
2305 | (void *)VM_KERNEL_ADDRPERM(map), |
2306 | (void *)VM_KERNEL_ADDRPERM(task), |
2307 | (void *)VM_KERNEL_ADDRPERM(fsroot), |
2308 | cpu, cpu_subtype, is_64bit, reslide, is_driverkit)); |
2309 | //panic("shared_region_enter: lookup failed"); |
2310 | return KERN_FAILURE; |
2311 | } |
2312 | |
2313 | kr = KERN_SUCCESS; |
2314 | /* no need to lock since this data is never modified */ |
2315 | sr_address = (vm_map_offset_t)shared_region->sr_base_address; |
2316 | sr_size = (vm_map_size_t)shared_region->sr_size; |
2317 | sr_handle = shared_region->sr_mem_entry; |
2318 | sr_pmap_nesting_start = (vm_map_offset_t)shared_region->sr_pmap_nesting_start; |
2319 | sr_pmap_nesting_size = (vm_map_size_t)shared_region->sr_pmap_nesting_size; |
2320 | vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(); |
2321 | |
2322 | cur_prot = VM_PROT_READ; |
2323 | if (VM_MAP_POLICY_WRITABLE_SHARED_REGION(map)) { |
2324 | /* |
2325 | * XXX BINARY COMPATIBILITY |
2326 | * java6 apparently needs to modify some code in the |
2327 | * dyld shared cache and needs to be allowed to add |
2328 | * write access... |
2329 | */ |
2330 | max_prot = VM_PROT_ALL; |
2331 | } else { |
2332 | max_prot = VM_PROT_READ; |
2333 | /* make it "permanent" to protect against re-mappings */ |
2334 | vmk_flags.vmf_permanent = true; |
2335 | } |
2336 | |
2337 | /* |
2338 | * Start mapping the shared region's VM sub map into the task's VM map. |
2339 | */ |
2340 | sr_offset = 0; |
2341 | |
2342 | if (sr_pmap_nesting_start > sr_address) { |
2343 | /* we need to map a range without pmap-nesting first */ |
2344 | target_address = sr_address; |
2345 | mapping_size = sr_pmap_nesting_start - sr_address; |
2346 | kr = vm_map_enter_mem_object( |
2347 | map, |
2348 | address: &target_address, |
2349 | size: mapping_size, |
2350 | mask: 0, |
2351 | vmk_flags, |
2352 | port: sr_handle, |
2353 | offset: sr_offset, |
2354 | TRUE, |
2355 | cur_protection: cur_prot, |
2356 | max_protection: max_prot, |
2357 | VM_INHERIT_SHARE); |
2358 | if (kr != KERN_SUCCESS) { |
2359 | SHARED_REGION_TRACE_ERROR( |
2360 | ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): " |
2361 | "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n" , |
2362 | (void *)VM_KERNEL_ADDRPERM(map), |
2363 | (void *)VM_KERNEL_ADDRPERM(task), |
2364 | (void *)VM_KERNEL_ADDRPERM(fsroot), |
2365 | cpu, cpu_subtype, is_64bit, reslide, is_driverkit, |
2366 | (long long)target_address, |
2367 | (long long)mapping_size, |
2368 | (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); |
2369 | goto done; |
2370 | } |
2371 | SHARED_REGION_TRACE_DEBUG( |
2372 | ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): " |
2373 | "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n" , |
2374 | (void *)VM_KERNEL_ADDRPERM(map), |
2375 | (void *)VM_KERNEL_ADDRPERM(task), |
2376 | (void *)VM_KERNEL_ADDRPERM(fsroot), |
2377 | cpu, cpu_subtype, is_64bit, reslide, is_driverkit, |
2378 | (long long)target_address, (long long)mapping_size, |
2379 | (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); |
2380 | sr_offset += mapping_size; |
2381 | sr_size -= mapping_size; |
2382 | } |
2383 | |
2384 | /* The pmap-nesting is triggered by the "vmkf_nested_pmap" flag. */ |
2385 | vmk_flags.vmkf_nested_pmap = true; |
2386 | vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP; |
2387 | |
2388 | /* |
2389 | * Use pmap-nesting to map the majority of the shared region into the task's |
2390 | * VM space. Very rarely will architectures have a shared region that isn't |
2391 | * the same size as the pmap-nesting region, or start at a different address |
2392 | * than the pmap-nesting region, so this code will map the entirety of the |
2393 | * shared region for most architectures. |
2394 | */ |
2395 | assert((sr_address + sr_offset) == sr_pmap_nesting_start); |
2396 | target_address = sr_pmap_nesting_start; |
2397 | kr = vm_map_enter_mem_object( |
2398 | map, |
2399 | address: &target_address, |
2400 | size: sr_pmap_nesting_size, |
2401 | mask: 0, |
2402 | vmk_flags, |
2403 | port: sr_handle, |
2404 | offset: sr_offset, |
2405 | TRUE, |
2406 | cur_protection: cur_prot, |
2407 | max_protection: max_prot, |
2408 | VM_INHERIT_SHARE); |
2409 | if (kr != KERN_SUCCESS) { |
2410 | SHARED_REGION_TRACE_ERROR( |
2411 | ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): " |
2412 | "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n" , |
2413 | (void *)VM_KERNEL_ADDRPERM(map), |
2414 | (void *)VM_KERNEL_ADDRPERM(task), |
2415 | (void *)VM_KERNEL_ADDRPERM(fsroot), |
2416 | cpu, cpu_subtype, is_64bit, reslide, is_driverkit, |
2417 | (long long)target_address, |
2418 | (long long)sr_pmap_nesting_size, |
2419 | (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); |
2420 | goto done; |
2421 | } |
2422 | SHARED_REGION_TRACE_DEBUG( |
2423 | ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): " |
2424 | "nested vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n" , |
2425 | (void *)VM_KERNEL_ADDRPERM(map), |
2426 | (void *)VM_KERNEL_ADDRPERM(task), |
2427 | (void *)VM_KERNEL_ADDRPERM(fsroot), |
2428 | cpu, cpu_subtype, is_64bit, reslide, is_driverkit, |
2429 | (long long)target_address, (long long)sr_pmap_nesting_size, |
2430 | (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); |
2431 | |
2432 | sr_offset += sr_pmap_nesting_size; |
2433 | sr_size -= sr_pmap_nesting_size; |
2434 | |
2435 | if (sr_size > 0) { |
2436 | /* and there's some left to be mapped without pmap-nesting */ |
2437 | vmk_flags.vmkf_nested_pmap = false; /* no pmap nesting */ |
2438 | target_address = sr_address + sr_offset; |
2439 | mapping_size = sr_size; |
2440 | kr = vm_map_enter_mem_object( |
2441 | map, |
2442 | address: &target_address, |
2443 | size: mapping_size, |
2444 | mask: 0, |
2445 | VM_MAP_KERNEL_FLAGS_FIXED(), |
2446 | port: sr_handle, |
2447 | offset: sr_offset, |
2448 | TRUE, |
2449 | cur_protection: cur_prot, |
2450 | max_protection: max_prot, |
2451 | VM_INHERIT_SHARE); |
2452 | if (kr != KERN_SUCCESS) { |
2453 | SHARED_REGION_TRACE_ERROR( |
2454 | ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): " |
2455 | "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n" , |
2456 | (void *)VM_KERNEL_ADDRPERM(map), |
2457 | (void *)VM_KERNEL_ADDRPERM(task), |
2458 | (void *)VM_KERNEL_ADDRPERM(fsroot), |
2459 | cpu, cpu_subtype, is_64bit, reslide, is_driverkit, |
2460 | (long long)target_address, |
2461 | (long long)mapping_size, |
2462 | (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); |
2463 | goto done; |
2464 | } |
2465 | SHARED_REGION_TRACE_DEBUG( |
2466 | ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): " |
2467 | "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n" , |
2468 | (void *)VM_KERNEL_ADDRPERM(map), |
2469 | (void *)VM_KERNEL_ADDRPERM(task), |
2470 | (void *)VM_KERNEL_ADDRPERM(fsroot), |
2471 | cpu, cpu_subtype, is_64bit, reslide, is_driverkit, |
2472 | (long long)target_address, (long long)mapping_size, |
2473 | (void *)VM_KERNEL_ADDRPERM(sr_handle), kr)); |
2474 | sr_offset += mapping_size; |
2475 | sr_size -= mapping_size; |
2476 | } |
2477 | assert(sr_size == 0); |
2478 | |
2479 | done: |
2480 | if (kr == KERN_SUCCESS) { |
2481 | /* let the task use that shared region */ |
2482 | vm_shared_region_set(task, new_shared_region: shared_region); |
2483 | } else { |
2484 | /* drop our reference since we're not using it */ |
2485 | vm_shared_region_deallocate(shared_region); |
2486 | vm_shared_region_set(task, NULL); |
2487 | } |
2488 | |
2489 | SHARED_REGION_TRACE_DEBUG( |
2490 | ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d) <- 0x%x\n" , |
2491 | (void *)VM_KERNEL_ADDRPERM(map), |
2492 | (void *)VM_KERNEL_ADDRPERM(task), |
2493 | (void *)VM_KERNEL_ADDRPERM(fsroot), |
2494 | cpu, cpu_subtype, is_64bit, reslide, is_driverkit, |
2495 | kr)); |
2496 | return kr; |
2497 | } |
2498 | |
2499 | void |
2500 | vm_shared_region_remove( |
2501 | task_t task, |
2502 | vm_shared_region_t sr) |
2503 | { |
2504 | vm_map_t map; |
2505 | mach_vm_offset_t start; |
2506 | mach_vm_size_t size; |
2507 | vm_map_kernel_flags_t vmk_flags; |
2508 | kern_return_t kr; |
2509 | |
2510 | if (sr == NULL) { |
2511 | return; |
2512 | } |
2513 | map = get_task_map(task); |
2514 | start = sr->sr_base_address; |
2515 | size = sr->sr_size; |
2516 | |
2517 | vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true); |
2518 | vmk_flags.vmkf_overwrite_immutable = true; |
2519 | vmk_flags.vm_tag = VM_MEMORY_DYLD; |
2520 | |
2521 | /* range_id is set by mach_vm_map_kernel */ |
2522 | kr = mach_vm_map_kernel(target_map: map, |
2523 | address: &start, |
2524 | initial_size: size, |
2525 | mask: 0, /* mask */ |
2526 | vmk_flags, |
2527 | MACH_PORT_NULL, |
2528 | offset: 0, |
2529 | FALSE, /* copy */ |
2530 | VM_PROT_NONE, |
2531 | VM_PROT_NONE, |
2532 | VM_INHERIT_DEFAULT); |
2533 | if (kr != KERN_SUCCESS) { |
2534 | printf(format: "%s:%d vm_map(0x%llx, 0x%llx) error %d\n" , __FUNCTION__, __LINE__, (uint64_t)sr->sr_base_address, (uint64_t)size, kr); |
2535 | } |
2536 | } |
2537 | |
2538 | #define SANE_SLIDE_INFO_SIZE (2560*1024) /*Can be changed if needed*/ |
2539 | |
2540 | kern_return_t |
2541 | vm_shared_region_sliding_valid(uint32_t slide) |
2542 | { |
2543 | kern_return_t kr = KERN_SUCCESS; |
2544 | vm_shared_region_t sr = vm_shared_region_get(task: current_task()); |
2545 | |
2546 | /* No region yet? we're fine. */ |
2547 | if (sr == NULL) { |
2548 | return kr; |
2549 | } |
2550 | |
2551 | if (sr->sr_slide != 0 && slide != 0) { |
2552 | if (slide == sr->sr_slide) { |
2553 | /* |
2554 | * Request for sliding when we've |
2555 | * already done it with exactly the |
2556 | * same slide value before. |
2557 | * This isn't wrong technically but |
2558 | * we don't want to slide again and |
2559 | * so we return this value. |
2560 | */ |
2561 | kr = KERN_INVALID_ARGUMENT; |
2562 | } else { |
2563 | printf(format: "Mismatched shared region slide\n" ); |
2564 | kr = KERN_FAILURE; |
2565 | } |
2566 | } |
2567 | vm_shared_region_deallocate(shared_region: sr); |
2568 | return kr; |
2569 | } |
2570 | |
2571 | /* |
2572 | * Actually create (really overwrite) the mapping to part of the shared cache which |
2573 | * undergoes relocation. This routine reads in the relocation info from dyld and |
2574 | * verifies it. It then creates a (or finds a matching) shared region pager which |
2575 | * handles the actual modification of the page contents and installs the mapping |
2576 | * using that pager. |
2577 | */ |
2578 | kern_return_t |
2579 | vm_shared_region_slide_mapping( |
2580 | vm_shared_region_t sr, |
2581 | user_addr_t slide_info_addr, |
2582 | mach_vm_size_t slide_info_size, |
2583 | mach_vm_offset_t start, |
2584 | mach_vm_size_t size, |
2585 | mach_vm_offset_t slid_mapping, |
2586 | uint32_t slide, |
2587 | memory_object_control_t sr_file_control, |
2588 | vm_prot_t prot) |
2589 | { |
2590 | kern_return_t kr; |
2591 | vm_object_t object = VM_OBJECT_NULL; |
2592 | vm_shared_region_slide_info_t si = NULL; |
2593 | vm_map_entry_t tmp_entry = VM_MAP_ENTRY_NULL; |
2594 | struct vm_map_entry tmp_entry_store; |
2595 | memory_object_t = MEMORY_OBJECT_NULL; |
2596 | vm_map_t sr_map; |
2597 | vm_map_kernel_flags_t vmk_flags; |
2598 | vm_map_offset_t map_addr; |
2599 | void *slide_info_entry = NULL; |
2600 | int error; |
2601 | |
2602 | assert(sr->sr_slide_in_progress); |
2603 | |
2604 | if (sr_file_control == MEMORY_OBJECT_CONTROL_NULL) { |
2605 | return KERN_INVALID_ARGUMENT; |
2606 | } |
2607 | |
2608 | /* |
2609 | * Copy in and verify the relocation information. |
2610 | */ |
2611 | if (slide_info_size < MIN_SLIDE_INFO_SIZE) { |
2612 | printf(format: "Slide_info_size too small: %lx\n" , (uintptr_t)slide_info_size); |
2613 | return KERN_FAILURE; |
2614 | } |
2615 | if (slide_info_size > SANE_SLIDE_INFO_SIZE) { |
2616 | printf(format: "Slide_info_size too large: %lx\n" , (uintptr_t)slide_info_size); |
2617 | return KERN_FAILURE; |
2618 | } |
2619 | |
2620 | slide_info_entry = kalloc_data((vm_size_t)slide_info_size, Z_WAITOK); |
2621 | if (slide_info_entry == NULL) { |
2622 | return KERN_RESOURCE_SHORTAGE; |
2623 | } |
2624 | error = copyin(slide_info_addr, slide_info_entry, (size_t)slide_info_size); |
2625 | if (error) { |
2626 | printf(format: "copyin of slide_info failed\n" ); |
2627 | kr = KERN_INVALID_ADDRESS; |
2628 | goto done; |
2629 | } |
2630 | |
2631 | if ((kr = vm_shared_region_slide_sanity_check(entry: slide_info_entry, size: slide_info_size)) != KERN_SUCCESS) { |
2632 | printf(format: "Sanity Check failed for slide_info\n" ); |
2633 | goto done; |
2634 | } |
2635 | |
2636 | /* |
2637 | * Allocate and fill in a vm_shared_region_slide_info. |
2638 | * This will either be used by a new pager, or used to find |
2639 | * a pre-existing matching pager. |
2640 | */ |
2641 | object = memory_object_control_to_vm_object(control: sr_file_control); |
2642 | if (object == VM_OBJECT_NULL || object->internal) { |
2643 | object = VM_OBJECT_NULL; |
2644 | kr = KERN_INVALID_ADDRESS; |
2645 | goto done; |
2646 | } |
2647 | |
2648 | si = kalloc_type(struct vm_shared_region_slide_info, |
2649 | Z_WAITOK | Z_NOFAIL); |
2650 | vm_object_lock(object); |
2651 | |
2652 | vm_object_reference_locked(object); /* for si->slide_object */ |
2653 | object->object_is_shared_cache = TRUE; |
2654 | vm_object_unlock(object); |
2655 | |
2656 | si->si_slide_info_entry = slide_info_entry; |
2657 | si->si_slide_info_size = slide_info_size; |
2658 | |
2659 | assert(slid_mapping != (mach_vm_offset_t) -1); |
2660 | si->si_slid_address = slid_mapping + sr->sr_base_address; |
2661 | si->si_slide_object = object; |
2662 | si->si_start = start; |
2663 | si->si_end = si->si_start + size; |
2664 | si->si_slide = slide; |
2665 | #if __has_feature(ptrauth_calls) |
2666 | /* |
2667 | * If there is authenticated pointer data in this slid mapping, |
2668 | * then just add the information needed to create new pagers for |
2669 | * different shared_region_id's later. |
2670 | */ |
2671 | if (sr->sr_cpu_type == CPU_TYPE_ARM64 && |
2672 | sr->sr_cpu_subtype == CPU_SUBTYPE_ARM64E && |
2673 | !(prot & VM_PROT_NOAUTH)) { |
2674 | if (sr->sr_next_auth_section == sr->sr_num_auth_section) { |
2675 | printf("Too many auth/private sections for shared region!!\n" ); |
2676 | kr = KERN_INVALID_ARGUMENT; |
2677 | goto done; |
2678 | } |
2679 | si->si_ptrauth = TRUE; |
2680 | sr->sr_auth_section[sr->sr_next_auth_section++] = si; |
2681 | /* |
2682 | * Remember the shared region, since that's where we'll |
2683 | * stash this info for all auth pagers to share. Each pager |
2684 | * will need to take a reference to it. |
2685 | */ |
2686 | si->si_shared_region = sr; |
2687 | kr = KERN_SUCCESS; |
2688 | goto done; |
2689 | } |
2690 | si->si_shared_region = NULL; |
2691 | si->si_ptrauth = FALSE; |
2692 | #endif /* __has_feature(ptrauth_calls) */ |
2693 | |
2694 | /* |
2695 | * find the pre-existing shared region's map entry to slide |
2696 | */ |
2697 | sr_map = vm_shared_region_vm_map(shared_region: sr); |
2698 | kr = find_mapping_to_slide(map: sr_map, addr: (vm_map_address_t)slid_mapping, entry: &tmp_entry_store); |
2699 | if (kr != KERN_SUCCESS) { |
2700 | goto done; |
2701 | } |
2702 | tmp_entry = &tmp_entry_store; |
2703 | |
2704 | /* |
2705 | * The object must exactly cover the region to slide. |
2706 | */ |
2707 | assert(VME_OFFSET(tmp_entry) == start); |
2708 | assert(tmp_entry->vme_end - tmp_entry->vme_start == size); |
2709 | |
2710 | /* create a "shared_region" sliding pager */ |
2711 | sr_pager = shared_region_pager_setup(VME_OBJECT(tmp_entry), backing_offset: VME_OFFSET(entry: tmp_entry), slide_info: si, jop_key: 0); |
2712 | if (sr_pager == MEMORY_OBJECT_NULL) { |
2713 | kr = KERN_RESOURCE_SHORTAGE; |
2714 | goto done; |
2715 | } |
2716 | |
2717 | #if CONFIG_SECLUDED_MEMORY |
2718 | /* |
2719 | * The shared region pagers used by camera or DEXT should have |
2720 | * pagers that won't go on the secluded queue. |
2721 | */ |
2722 | if (primary_system_shared_region == NULL || |
2723 | primary_system_shared_region == sr || |
2724 | sr->sr_driverkit) { |
2725 | memory_object_mark_eligible_for_secluded(sr_pager->mo_control, FALSE); |
2726 | } |
2727 | #endif /* CONFIG_SECLUDED_MEMORY */ |
2728 | |
2729 | /* map that pager over the portion of the mapping that needs sliding */ |
2730 | map_addr = tmp_entry->vme_start; |
2731 | vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true); |
2732 | vmk_flags.vmkf_overwrite_immutable = true; |
2733 | vmk_flags.vmf_permanent = shared_region_make_permanent(sr, |
2734 | max_prot: tmp_entry->max_protection); |
2735 | vmk_flags.vmf_tpro = shared_region_tpro_protect(sr, |
2736 | max_prot: prot); |
2737 | kr = vm_map_enter_mem_object(map: sr_map, |
2738 | address: &map_addr, |
2739 | size: (tmp_entry->vme_end - tmp_entry->vme_start), |
2740 | mask: (mach_vm_offset_t) 0, |
2741 | vmk_flags, |
2742 | port: (ipc_port_t)(uintptr_t) sr_pager, |
2743 | offset: 0, |
2744 | TRUE, |
2745 | cur_protection: tmp_entry->protection, |
2746 | max_protection: tmp_entry->max_protection, |
2747 | inheritance: tmp_entry->inheritance); |
2748 | assertf(kr == KERN_SUCCESS, "kr = 0x%x\n" , kr); |
2749 | assertf(map_addr == tmp_entry->vme_start, |
2750 | "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n" , |
2751 | (uint64_t)map_addr, |
2752 | (uint64_t) tmp_entry->vme_start, |
2753 | tmp_entry); |
2754 | |
2755 | /* success! */ |
2756 | kr = KERN_SUCCESS; |
2757 | |
2758 | done: |
2759 | if (sr_pager != NULL) { |
2760 | /* |
2761 | * Release the sr_pager reference obtained by shared_region_pager_setup(). |
2762 | * The mapping, if it succeeded, is now holding a reference on the memory object. |
2763 | */ |
2764 | memory_object_deallocate(object: sr_pager); |
2765 | sr_pager = MEMORY_OBJECT_NULL; |
2766 | } |
2767 | if (tmp_entry != NULL) { |
2768 | /* release extra ref on tmp_entry's VM object */ |
2769 | vm_object_deallocate(VME_OBJECT(tmp_entry)); |
2770 | tmp_entry = VM_MAP_ENTRY_NULL; |
2771 | } |
2772 | |
2773 | if (kr != KERN_SUCCESS) { |
2774 | /* cleanup */ |
2775 | if (si != NULL) { |
2776 | if (si->si_slide_object) { |
2777 | vm_object_deallocate(object: si->si_slide_object); |
2778 | si->si_slide_object = VM_OBJECT_NULL; |
2779 | } |
2780 | kfree_type(struct vm_shared_region_slide_info, si); |
2781 | si = NULL; |
2782 | } |
2783 | if (slide_info_entry != NULL) { |
2784 | kfree_data(slide_info_entry, (vm_size_t)slide_info_size); |
2785 | slide_info_entry = NULL; |
2786 | } |
2787 | } |
2788 | return kr; |
2789 | } |
2790 | |
2791 | static kern_return_t |
2792 | vm_shared_region_slide_sanity_check_v1( |
2793 | vm_shared_region_slide_info_entry_v1_t s_info) |
2794 | { |
2795 | uint32_t pageIndex = 0; |
2796 | uint16_t entryIndex = 0; |
2797 | uint16_t *toc = NULL; |
2798 | |
2799 | toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset); |
2800 | for (; pageIndex < s_info->toc_count; pageIndex++) { |
2801 | entryIndex = (uint16_t)(toc[pageIndex]); |
2802 | |
2803 | if (entryIndex >= s_info->entry_count) { |
2804 | printf(format: "No sliding bitmap entry for pageIndex: %d at entryIndex: %d amongst %d entries\n" , pageIndex, entryIndex, s_info->entry_count); |
2805 | return KERN_FAILURE; |
2806 | } |
2807 | } |
2808 | return KERN_SUCCESS; |
2809 | } |
2810 | |
2811 | static kern_return_t |
2812 | vm_shared_region_slide_sanity_check_v2( |
2813 | vm_shared_region_slide_info_entry_v2_t s_info, |
2814 | mach_vm_size_t slide_info_size) |
2815 | { |
2816 | if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v2)) { |
2817 | printf(format: "%s bad slide_info_size: %lx\n" , __func__, (uintptr_t)slide_info_size); |
2818 | return KERN_FAILURE; |
2819 | } |
2820 | if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) { |
2821 | return KERN_FAILURE; |
2822 | } |
2823 | |
2824 | /* Ensure that the slide info doesn't reference any data outside of its bounds. */ |
2825 | |
2826 | uint32_t page_starts_count = s_info->page_starts_count; |
2827 | uint32_t = s_info->page_extras_count; |
2828 | mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count; |
2829 | if (num_trailing_entries < page_starts_count) { |
2830 | return KERN_FAILURE; |
2831 | } |
2832 | |
2833 | /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */ |
2834 | mach_vm_size_t trailing_size = num_trailing_entries << 1; |
2835 | if (trailing_size >> 1 != num_trailing_entries) { |
2836 | return KERN_FAILURE; |
2837 | } |
2838 | |
2839 | mach_vm_size_t required_size = sizeof(*s_info) + trailing_size; |
2840 | if (required_size < sizeof(*s_info)) { |
2841 | return KERN_FAILURE; |
2842 | } |
2843 | |
2844 | if (required_size > slide_info_size) { |
2845 | return KERN_FAILURE; |
2846 | } |
2847 | |
2848 | return KERN_SUCCESS; |
2849 | } |
2850 | |
2851 | static kern_return_t |
2852 | vm_shared_region_slide_sanity_check_v3( |
2853 | vm_shared_region_slide_info_entry_v3_t s_info, |
2854 | mach_vm_size_t slide_info_size) |
2855 | { |
2856 | if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v3)) { |
2857 | printf(format: "%s bad slide_info_size: %lx\n" , __func__, (uintptr_t)slide_info_size); |
2858 | return KERN_FAILURE; |
2859 | } |
2860 | if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) { |
2861 | printf(format: "vm_shared_region_slide_sanity_check_v3: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n" , (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE); |
2862 | return KERN_FAILURE; |
2863 | } |
2864 | |
2865 | uint32_t page_starts_count = s_info->page_starts_count; |
2866 | mach_vm_size_t num_trailing_entries = page_starts_count; |
2867 | mach_vm_size_t trailing_size = num_trailing_entries << 1; |
2868 | mach_vm_size_t required_size = sizeof(*s_info) + trailing_size; |
2869 | if (required_size < sizeof(*s_info)) { |
2870 | printf(format: "vm_shared_region_slide_sanity_check_v3: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n" , (uint64_t)required_size, (uint64_t)sizeof(*s_info)); |
2871 | return KERN_FAILURE; |
2872 | } |
2873 | |
2874 | if (required_size > slide_info_size) { |
2875 | printf(format: "vm_shared_region_slide_sanity_check_v3: required_size != slide_info_size 0x%llx != 0x%llx\n" , (uint64_t)required_size, (uint64_t)slide_info_size); |
2876 | return KERN_FAILURE; |
2877 | } |
2878 | |
2879 | return KERN_SUCCESS; |
2880 | } |
2881 | |
2882 | static kern_return_t |
2883 | vm_shared_region_slide_sanity_check_v4( |
2884 | vm_shared_region_slide_info_entry_v4_t s_info, |
2885 | mach_vm_size_t slide_info_size) |
2886 | { |
2887 | if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v4)) { |
2888 | printf(format: "%s bad slide_info_size: %lx\n" , __func__, (uintptr_t)slide_info_size); |
2889 | return KERN_FAILURE; |
2890 | } |
2891 | if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) { |
2892 | return KERN_FAILURE; |
2893 | } |
2894 | |
2895 | /* Ensure that the slide info doesn't reference any data outside of its bounds. */ |
2896 | |
2897 | uint32_t page_starts_count = s_info->page_starts_count; |
2898 | uint32_t = s_info->page_extras_count; |
2899 | mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count; |
2900 | if (num_trailing_entries < page_starts_count) { |
2901 | return KERN_FAILURE; |
2902 | } |
2903 | |
2904 | /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */ |
2905 | mach_vm_size_t trailing_size = num_trailing_entries << 1; |
2906 | if (trailing_size >> 1 != num_trailing_entries) { |
2907 | return KERN_FAILURE; |
2908 | } |
2909 | |
2910 | mach_vm_size_t required_size = sizeof(*s_info) + trailing_size; |
2911 | if (required_size < sizeof(*s_info)) { |
2912 | return KERN_FAILURE; |
2913 | } |
2914 | |
2915 | if (required_size > slide_info_size) { |
2916 | return KERN_FAILURE; |
2917 | } |
2918 | |
2919 | return KERN_SUCCESS; |
2920 | } |
2921 | |
2922 | static kern_return_t |
2923 | vm_shared_region_slide_sanity_check_v5( |
2924 | vm_shared_region_slide_info_entry_v5_t s_info, |
2925 | mach_vm_size_t slide_info_size) |
2926 | { |
2927 | if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v5)) { |
2928 | printf(format: "%s bad slide_info_size: %lx\n" , __func__, (uintptr_t)slide_info_size); |
2929 | return KERN_FAILURE; |
2930 | } |
2931 | if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE_16KB) { |
2932 | printf(format: "vm_shared_region_slide_sanity_check_v5: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n" , (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE_16KB); |
2933 | return KERN_FAILURE; |
2934 | } |
2935 | |
2936 | uint32_t page_starts_count = s_info->page_starts_count; |
2937 | mach_vm_size_t num_trailing_entries = page_starts_count; |
2938 | mach_vm_size_t trailing_size = num_trailing_entries << 1; |
2939 | mach_vm_size_t required_size = sizeof(*s_info) + trailing_size; |
2940 | if (required_size < sizeof(*s_info)) { |
2941 | printf(format: "vm_shared_region_slide_sanity_check_v5: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n" , (uint64_t)required_size, (uint64_t)sizeof(*s_info)); |
2942 | return KERN_FAILURE; |
2943 | } |
2944 | |
2945 | if (required_size > slide_info_size) { |
2946 | printf(format: "vm_shared_region_slide_sanity_check_v5: required_size != slide_info_size 0x%llx != 0x%llx\n" , (uint64_t)required_size, (uint64_t)slide_info_size); |
2947 | return KERN_FAILURE; |
2948 | } |
2949 | |
2950 | return KERN_SUCCESS; |
2951 | } |
2952 | |
2953 | |
2954 | static kern_return_t |
2955 | vm_shared_region_slide_sanity_check( |
2956 | vm_shared_region_slide_info_entry_t s_info, |
2957 | mach_vm_size_t s_info_size) |
2958 | { |
2959 | kern_return_t kr; |
2960 | |
2961 | switch (s_info->version) { |
2962 | case 1: |
2963 | kr = vm_shared_region_slide_sanity_check_v1(s_info: &s_info->v1); |
2964 | break; |
2965 | case 2: |
2966 | kr = vm_shared_region_slide_sanity_check_v2(s_info: &s_info->v2, slide_info_size: s_info_size); |
2967 | break; |
2968 | case 3: |
2969 | kr = vm_shared_region_slide_sanity_check_v3(s_info: &s_info->v3, slide_info_size: s_info_size); |
2970 | break; |
2971 | case 4: |
2972 | kr = vm_shared_region_slide_sanity_check_v4(s_info: &s_info->v4, slide_info_size: s_info_size); |
2973 | break; |
2974 | case 5: |
2975 | kr = vm_shared_region_slide_sanity_check_v5(s_info: &s_info->v5, slide_info_size: s_info_size); |
2976 | break; |
2977 | default: |
2978 | kr = KERN_FAILURE; |
2979 | } |
2980 | return kr; |
2981 | } |
2982 | |
2983 | static kern_return_t |
2984 | vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex) |
2985 | { |
2986 | uint16_t *toc = NULL; |
2987 | slide_info_entry_toc_t bitmap = NULL; |
2988 | uint32_t i = 0, j = 0; |
2989 | uint8_t b = 0; |
2990 | uint32_t slide = si->si_slide; |
2991 | int is_64 = task_has_64Bit_addr(current_task()); |
2992 | |
2993 | vm_shared_region_slide_info_entry_v1_t s_info = &si->si_slide_info_entry->v1; |
2994 | toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset); |
2995 | |
2996 | if (pageIndex >= s_info->toc_count) { |
2997 | printf(format: "No slide entry for this page in toc. PageIndex: %d Toc Count: %d\n" , pageIndex, s_info->toc_count); |
2998 | } else { |
2999 | uint16_t entryIndex = (uint16_t)(toc[pageIndex]); |
3000 | slide_info_entry_toc_t slide_info_entries = (slide_info_entry_toc_t)((uintptr_t)s_info + s_info->entry_offset); |
3001 | |
3002 | if (entryIndex >= s_info->entry_count) { |
3003 | printf(format: "No sliding bitmap entry for entryIndex: %d amongst %d entries\n" , entryIndex, s_info->entry_count); |
3004 | } else { |
3005 | bitmap = &slide_info_entries[entryIndex]; |
3006 | |
3007 | for (i = 0; i < NUM_SLIDING_BITMAPS_PER_PAGE; ++i) { |
3008 | b = bitmap->entry[i]; |
3009 | if (b != 0) { |
3010 | for (j = 0; j < 8; ++j) { |
3011 | if (b & (1 << j)) { |
3012 | uint32_t *ptr_to_slide; |
3013 | uint32_t old_value; |
3014 | |
3015 | ptr_to_slide = (uint32_t*)((uintptr_t)(vaddr) + (sizeof(uint32_t) * (i * 8 + j))); |
3016 | old_value = *ptr_to_slide; |
3017 | *ptr_to_slide += slide; |
3018 | if (is_64 && *ptr_to_slide < old_value) { |
3019 | /* |
3020 | * We just slid the low 32 bits of a 64-bit pointer |
3021 | * and it looks like there should have been a carry-over |
3022 | * to the upper 32 bits. |
3023 | * The sliding failed... |
3024 | */ |
3025 | printf(format: "vm_shared_region_slide() carry over: i=%d j=%d b=0x%x slide=0x%x old=0x%x new=0x%x\n" , |
3026 | i, j, b, slide, old_value, *ptr_to_slide); |
3027 | return KERN_FAILURE; |
3028 | } |
3029 | } |
3030 | } |
3031 | } |
3032 | } |
3033 | } |
3034 | } |
3035 | |
3036 | return KERN_SUCCESS; |
3037 | } |
3038 | |
3039 | static kern_return_t |
3040 | rebase_chain_32( |
3041 | uint8_t *page_content, |
3042 | uint16_t start_offset, |
3043 | uint32_t slide_amount, |
3044 | vm_shared_region_slide_info_entry_v2_t s_info) |
3045 | { |
3046 | const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t); |
3047 | |
3048 | const uint32_t delta_mask = (uint32_t)(s_info->delta_mask); |
3049 | const uint32_t value_mask = ~delta_mask; |
3050 | const uint32_t value_add = (uint32_t)(s_info->value_add); |
3051 | const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2; |
3052 | |
3053 | uint32_t page_offset = start_offset; |
3054 | uint32_t delta = 1; |
3055 | |
3056 | while (delta != 0 && page_offset <= last_page_offset) { |
3057 | uint8_t *loc; |
3058 | uint32_t value; |
3059 | |
3060 | loc = page_content + page_offset; |
3061 | memcpy(dst: &value, src: loc, n: sizeof(value)); |
3062 | delta = (value & delta_mask) >> delta_shift; |
3063 | value &= value_mask; |
3064 | |
3065 | if (value != 0) { |
3066 | value += value_add; |
3067 | value += slide_amount; |
3068 | } |
3069 | memcpy(dst: loc, src: &value, n: sizeof(value)); |
3070 | page_offset += delta; |
3071 | } |
3072 | |
3073 | /* If the offset went past the end of the page, then the slide data is invalid. */ |
3074 | if (page_offset > last_page_offset) { |
3075 | return KERN_FAILURE; |
3076 | } |
3077 | return KERN_SUCCESS; |
3078 | } |
3079 | |
3080 | static kern_return_t |
3081 | rebase_chain_64( |
3082 | uint8_t *page_content, |
3083 | uint16_t start_offset, |
3084 | uint32_t slide_amount, |
3085 | vm_shared_region_slide_info_entry_v2_t s_info) |
3086 | { |
3087 | const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint64_t); |
3088 | |
3089 | const uint64_t delta_mask = s_info->delta_mask; |
3090 | const uint64_t value_mask = ~delta_mask; |
3091 | const uint64_t value_add = s_info->value_add; |
3092 | const uint64_t delta_shift = __builtin_ctzll(delta_mask) - 2; |
3093 | |
3094 | uint32_t page_offset = start_offset; |
3095 | uint32_t delta = 1; |
3096 | |
3097 | while (delta != 0 && page_offset <= last_page_offset) { |
3098 | uint8_t *loc; |
3099 | uint64_t value; |
3100 | |
3101 | loc = page_content + page_offset; |
3102 | memcpy(dst: &value, src: loc, n: sizeof(value)); |
3103 | delta = (uint32_t)((value & delta_mask) >> delta_shift); |
3104 | value &= value_mask; |
3105 | |
3106 | if (value != 0) { |
3107 | value += value_add; |
3108 | value += slide_amount; |
3109 | } |
3110 | memcpy(dst: loc, src: &value, n: sizeof(value)); |
3111 | page_offset += delta; |
3112 | } |
3113 | |
3114 | if (page_offset + sizeof(uint32_t) == PAGE_SIZE_FOR_SR_SLIDE) { |
3115 | /* If a pointer straddling the page boundary needs to be adjusted, then |
3116 | * add the slide to the lower half. The encoding guarantees that the upper |
3117 | * half on the next page will need no masking. |
3118 | * |
3119 | * This assumes a little-endian machine and that the region being slid |
3120 | * never crosses a 4 GB boundary. */ |
3121 | |
3122 | uint8_t *loc = page_content + page_offset; |
3123 | uint32_t value; |
3124 | |
3125 | memcpy(dst: &value, src: loc, n: sizeof(value)); |
3126 | value += slide_amount; |
3127 | memcpy(dst: loc, src: &value, n: sizeof(value)); |
3128 | } else if (page_offset > last_page_offset) { |
3129 | return KERN_FAILURE; |
3130 | } |
3131 | |
3132 | return KERN_SUCCESS; |
3133 | } |
3134 | |
3135 | static kern_return_t |
3136 | rebase_chain( |
3137 | boolean_t is_64, |
3138 | uint32_t pageIndex, |
3139 | uint8_t *page_content, |
3140 | uint16_t start_offset, |
3141 | uint32_t slide_amount, |
3142 | vm_shared_region_slide_info_entry_v2_t s_info) |
3143 | { |
3144 | kern_return_t kr; |
3145 | if (is_64) { |
3146 | kr = rebase_chain_64(page_content, start_offset, slide_amount, s_info); |
3147 | } else { |
3148 | kr = rebase_chain_32(page_content, start_offset, slide_amount, s_info); |
3149 | } |
3150 | |
3151 | if (kr != KERN_SUCCESS) { |
3152 | printf(format: "vm_shared_region_slide_page() offset overflow: pageIndex=%u, start_offset=%u, slide_amount=%u\n" , |
3153 | pageIndex, start_offset, slide_amount); |
3154 | } |
3155 | return kr; |
3156 | } |
3157 | |
3158 | static kern_return_t |
3159 | vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex) |
3160 | { |
3161 | vm_shared_region_slide_info_entry_v2_t s_info = &si->si_slide_info_entry->v2; |
3162 | const uint32_t slide_amount = si->si_slide; |
3163 | |
3164 | /* The high bits of the delta_mask field are nonzero precisely when the shared |
3165 | * cache is 64-bit. */ |
3166 | const boolean_t is_64 = (s_info->delta_mask >> 32) != 0; |
3167 | |
3168 | const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset); |
3169 | const uint16_t * = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset); |
3170 | |
3171 | uint8_t *page_content = (uint8_t *)vaddr; |
3172 | uint16_t page_entry; |
3173 | |
3174 | if (pageIndex >= s_info->page_starts_count) { |
3175 | printf(format: "vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n" , |
3176 | pageIndex, s_info->page_starts_count); |
3177 | return KERN_FAILURE; |
3178 | } |
3179 | page_entry = page_starts[pageIndex]; |
3180 | |
3181 | if (page_entry == DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE) { |
3182 | return KERN_SUCCESS; |
3183 | } |
3184 | |
3185 | if (page_entry & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) { |
3186 | uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE_PAGE_VALUE; |
3187 | uint16_t info; |
3188 | |
3189 | do { |
3190 | uint16_t page_start_offset; |
3191 | kern_return_t kr; |
3192 | |
3193 | if (chain_index >= s_info->page_extras_count) { |
3194 | printf(format: "vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n" , |
3195 | chain_index, s_info->page_extras_count); |
3196 | return KERN_FAILURE; |
3197 | } |
3198 | info = page_extras[chain_index]; |
3199 | page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE_PAGE_VALUE) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT); |
3200 | |
3201 | kr = rebase_chain(is_64, pageIndex, page_content, start_offset: page_start_offset, slide_amount, s_info); |
3202 | if (kr != KERN_SUCCESS) { |
3203 | return KERN_FAILURE; |
3204 | } |
3205 | |
3206 | chain_index++; |
3207 | } while (!(info & DYLD_CACHE_SLIDE_PAGE_ATTR_END)); |
3208 | } else { |
3209 | const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT); |
3210 | kern_return_t kr; |
3211 | |
3212 | kr = rebase_chain(is_64, pageIndex, page_content, start_offset: page_start_offset, slide_amount, s_info); |
3213 | if (kr != KERN_SUCCESS) { |
3214 | return KERN_FAILURE; |
3215 | } |
3216 | } |
3217 | |
3218 | return KERN_SUCCESS; |
3219 | } |
3220 | |
3221 | |
3222 | static kern_return_t |
3223 | vm_shared_region_slide_page_v3( |
3224 | vm_shared_region_slide_info_t si, |
3225 | vm_offset_t vaddr, |
3226 | __unused mach_vm_offset_t uservaddr, |
3227 | uint32_t pageIndex, |
3228 | #if !__has_feature(ptrauth_calls) |
3229 | __unused |
3230 | #endif /* !__has_feature(ptrauth_calls) */ |
3231 | uint64_t jop_key) |
3232 | { |
3233 | vm_shared_region_slide_info_entry_v3_t s_info = &si->si_slide_info_entry->v3; |
3234 | const uint32_t slide_amount = si->si_slide; |
3235 | |
3236 | uint8_t *page_content = (uint8_t *)vaddr; |
3237 | uint16_t page_entry; |
3238 | |
3239 | if (pageIndex >= s_info->page_starts_count) { |
3240 | printf(format: "vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n" , |
3241 | pageIndex, s_info->page_starts_count); |
3242 | return KERN_FAILURE; |
3243 | } |
3244 | page_entry = s_info->page_starts[pageIndex]; |
3245 | |
3246 | if (page_entry == DYLD_CACHE_SLIDE_V3_PAGE_ATTR_NO_REBASE) { |
3247 | return KERN_SUCCESS; |
3248 | } |
3249 | |
3250 | uint8_t* rebaseLocation = page_content; |
3251 | uint64_t delta = page_entry; |
3252 | do { |
3253 | rebaseLocation += delta; |
3254 | uint64_t value; |
3255 | memcpy(dst: &value, src: rebaseLocation, n: sizeof(value)); |
3256 | delta = ((value & 0x3FF8000000000000) >> 51) * sizeof(uint64_t); |
3257 | |
3258 | // A pointer is one of : |
3259 | // { |
3260 | // uint64_t pointerValue : 51; |
3261 | // uint64_t offsetToNextPointer : 11; |
3262 | // uint64_t isBind : 1 = 0; |
3263 | // uint64_t authenticated : 1 = 0; |
3264 | // } |
3265 | // { |
3266 | // uint32_t offsetFromSharedCacheBase; |
3267 | // uint16_t diversityData; |
3268 | // uint16_t hasAddressDiversity : 1; |
3269 | // uint16_t hasDKey : 1; |
3270 | // uint16_t hasBKey : 1; |
3271 | // uint16_t offsetToNextPointer : 11; |
3272 | // uint16_t isBind : 1; |
3273 | // uint16_t authenticated : 1 = 1; |
3274 | // } |
3275 | |
3276 | bool isBind = (value & (1ULL << 62)) != 0; |
3277 | if (isBind) { |
3278 | return KERN_FAILURE; |
3279 | } |
3280 | |
3281 | #if __has_feature(ptrauth_calls) |
3282 | uint16_t diversity_data = (uint16_t)(value >> 32); |
3283 | bool hasAddressDiversity = (value & (1ULL << 48)) != 0; |
3284 | ptrauth_key key = (ptrauth_key)((value >> 49) & 0x3); |
3285 | #endif /* __has_feature(ptrauth_calls) */ |
3286 | bool isAuthenticated = (value & (1ULL << 63)) != 0; |
3287 | |
3288 | if (isAuthenticated) { |
3289 | // The new value for a rebase is the low 32-bits of the threaded value plus the slide. |
3290 | value = (value & 0xFFFFFFFF) + slide_amount; |
3291 | // Add in the offset from the mach_header |
3292 | const uint64_t value_add = s_info->value_add; |
3293 | value += value_add; |
3294 | |
3295 | #if __has_feature(ptrauth_calls) |
3296 | uint64_t discriminator = diversity_data; |
3297 | if (hasAddressDiversity) { |
3298 | // First calculate a new discriminator using the address of where we are trying to store the value |
3299 | uintptr_t pageOffset = rebaseLocation - page_content; |
3300 | discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator); |
3301 | } |
3302 | |
3303 | if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) { |
3304 | /* |
3305 | * these pointers are used in user mode. disable the kernel key diversification |
3306 | * so we can sign them for use in user mode. |
3307 | */ |
3308 | value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key); |
3309 | } |
3310 | #endif /* __has_feature(ptrauth_calls) */ |
3311 | } else { |
3312 | // The new value for a rebase is the low 51-bits of the threaded value plus the slide. |
3313 | // Regular pointer which needs to fit in 51-bits of value. |
3314 | // C++ RTTI uses the top bit, so we'll allow the whole top-byte |
3315 | // and the bottom 43-bits to be fit in to 51-bits. |
3316 | uint64_t top8Bits = value & 0x0007F80000000000ULL; |
3317 | uint64_t bottom43Bits = value & 0x000007FFFFFFFFFFULL; |
3318 | uint64_t targetValue = (top8Bits << 13) | bottom43Bits; |
3319 | value = targetValue + slide_amount; |
3320 | } |
3321 | |
3322 | memcpy(dst: rebaseLocation, src: &value, n: sizeof(value)); |
3323 | } while (delta != 0); |
3324 | |
3325 | return KERN_SUCCESS; |
3326 | } |
3327 | |
3328 | static kern_return_t |
3329 | rebase_chainv4( |
3330 | uint8_t *page_content, |
3331 | uint16_t start_offset, |
3332 | uint32_t slide_amount, |
3333 | vm_shared_region_slide_info_entry_v4_t s_info) |
3334 | { |
3335 | const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t); |
3336 | |
3337 | const uint32_t delta_mask = (uint32_t)(s_info->delta_mask); |
3338 | const uint32_t value_mask = ~delta_mask; |
3339 | const uint32_t value_add = (uint32_t)(s_info->value_add); |
3340 | const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2; |
3341 | |
3342 | uint32_t page_offset = start_offset; |
3343 | uint32_t delta = 1; |
3344 | |
3345 | while (delta != 0 && page_offset <= last_page_offset) { |
3346 | uint8_t *loc; |
3347 | uint32_t value; |
3348 | |
3349 | loc = page_content + page_offset; |
3350 | memcpy(dst: &value, src: loc, n: sizeof(value)); |
3351 | delta = (value & delta_mask) >> delta_shift; |
3352 | value &= value_mask; |
3353 | |
3354 | if ((value & 0xFFFF8000) == 0) { |
3355 | // small positive non-pointer, use as-is |
3356 | } else if ((value & 0x3FFF8000) == 0x3FFF8000) { |
3357 | // small negative non-pointer |
3358 | value |= 0xC0000000; |
3359 | } else { |
3360 | // pointer that needs rebasing |
3361 | value += value_add; |
3362 | value += slide_amount; |
3363 | } |
3364 | memcpy(dst: loc, src: &value, n: sizeof(value)); |
3365 | page_offset += delta; |
3366 | } |
3367 | |
3368 | /* If the offset went past the end of the page, then the slide data is invalid. */ |
3369 | if (page_offset > last_page_offset) { |
3370 | return KERN_FAILURE; |
3371 | } |
3372 | return KERN_SUCCESS; |
3373 | } |
3374 | |
3375 | static kern_return_t |
3376 | vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex) |
3377 | { |
3378 | vm_shared_region_slide_info_entry_v4_t s_info = &si->si_slide_info_entry->v4; |
3379 | const uint32_t slide_amount = si->si_slide; |
3380 | |
3381 | const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset); |
3382 | const uint16_t * = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset); |
3383 | |
3384 | uint8_t *page_content = (uint8_t *)vaddr; |
3385 | uint16_t page_entry; |
3386 | |
3387 | if (pageIndex >= s_info->page_starts_count) { |
3388 | printf(format: "vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n" , |
3389 | pageIndex, s_info->page_starts_count); |
3390 | return KERN_FAILURE; |
3391 | } |
3392 | page_entry = page_starts[pageIndex]; |
3393 | |
3394 | if (page_entry == DYLD_CACHE_SLIDE4_PAGE_NO_REBASE) { |
3395 | return KERN_SUCCESS; |
3396 | } |
3397 | |
3398 | if (page_entry & DYLD_CACHE_SLIDE4_PAGE_USE_EXTRA) { |
3399 | uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE4_PAGE_INDEX; |
3400 | uint16_t info; |
3401 | |
3402 | do { |
3403 | uint16_t page_start_offset; |
3404 | kern_return_t kr; |
3405 | |
3406 | if (chain_index >= s_info->page_extras_count) { |
3407 | printf(format: "vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n" , |
3408 | chain_index, s_info->page_extras_count); |
3409 | return KERN_FAILURE; |
3410 | } |
3411 | info = page_extras[chain_index]; |
3412 | page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE4_PAGE_INDEX) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT); |
3413 | |
3414 | kr = rebase_chainv4(page_content, start_offset: page_start_offset, slide_amount, s_info); |
3415 | if (kr != KERN_SUCCESS) { |
3416 | return KERN_FAILURE; |
3417 | } |
3418 | |
3419 | chain_index++; |
3420 | } while (!(info & DYLD_CACHE_SLIDE4_PAGE_EXTRA_END)); |
3421 | } else { |
3422 | const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT); |
3423 | kern_return_t kr; |
3424 | |
3425 | kr = rebase_chainv4(page_content, start_offset: page_start_offset, slide_amount, s_info); |
3426 | if (kr != KERN_SUCCESS) { |
3427 | return KERN_FAILURE; |
3428 | } |
3429 | } |
3430 | |
3431 | return KERN_SUCCESS; |
3432 | } |
3433 | |
3434 | |
3435 | static kern_return_t |
3436 | vm_shared_region_slide_page_v5( |
3437 | vm_shared_region_slide_info_t si, |
3438 | vm_offset_t vaddr, |
3439 | __unused mach_vm_offset_t uservaddr, |
3440 | uint32_t pageIndex, |
3441 | #if !__has_feature(ptrauth_calls) |
3442 | __unused |
3443 | #endif /* !__has_feature(ptrauth_calls) */ |
3444 | uint64_t jop_key) |
3445 | { |
3446 | vm_shared_region_slide_info_entry_v5_t s_info = &si->si_slide_info_entry->v5; |
3447 | const uint32_t slide_amount = si->si_slide; |
3448 | const uint64_t value_add = s_info->value_add; |
3449 | |
3450 | uint8_t *page_content = (uint8_t *)vaddr; |
3451 | uint16_t page_entry; |
3452 | |
3453 | if (pageIndex >= s_info->page_starts_count) { |
3454 | printf(format: "vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n" , |
3455 | pageIndex, s_info->page_starts_count); |
3456 | return KERN_FAILURE; |
3457 | } |
3458 | page_entry = s_info->page_starts[pageIndex]; |
3459 | |
3460 | if (page_entry == DYLD_CACHE_SLIDE_V5_PAGE_ATTR_NO_REBASE) { |
3461 | return KERN_SUCCESS; |
3462 | } |
3463 | |
3464 | uint8_t* rebaseLocation = page_content; |
3465 | uint64_t delta = page_entry; |
3466 | do { |
3467 | rebaseLocation += delta; |
3468 | uint64_t value; |
3469 | memcpy(dst: &value, src: rebaseLocation, n: sizeof(value)); |
3470 | delta = ((value & 0x7FF0000000000000ULL) >> 52) * sizeof(uint64_t); |
3471 | |
3472 | // A pointer is one of : |
3473 | // { |
3474 | // uint64_t runtimeOffset : 34, // offset from the start of the shared cache |
3475 | // high8 : 8, |
3476 | // unused : 10, |
3477 | // next : 11, // 8-byte stide |
3478 | // auth : 1; // == 0 |
3479 | // } |
3480 | // { |
3481 | // uint64_t runtimeOffset : 34, // offset from the start of the shared cache |
3482 | // diversity : 16, |
3483 | // addrDiv : 1, |
3484 | // keyIsData : 1, // implicitly always the 'A' key. 0 -> IA. 1 -> DA |
3485 | // next : 11, // 8-byte stide |
3486 | // auth : 1; // == 1 |
3487 | // } |
3488 | |
3489 | #if __has_feature(ptrauth_calls) |
3490 | bool addrDiv = ((value & (1ULL << 50)) != 0); |
3491 | bool keyIsData = ((value & (1ULL << 51)) != 0); |
3492 | // the key is always A, and the bit tells us if its IA or ID |
3493 | ptrauth_key key = keyIsData ? ptrauth_key_asda : ptrauth_key_asia; |
3494 | uint16_t diversity = (uint16_t)((value >> 34) & 0xFFFF); |
3495 | #endif /* __has_feature(ptrauth_calls) */ |
3496 | uint64_t high8 = (value << 22) & 0xFF00000000000000ULL; |
3497 | bool isAuthenticated = (value & (1ULL << 63)) != 0; |
3498 | |
3499 | // The new value for a rebase is the low 34-bits of the threaded value plus the base plus slide. |
3500 | value = (value & 0x3FFFFFFFFULL) + value_add + slide_amount; |
3501 | if (isAuthenticated) { |
3502 | #if __has_feature(ptrauth_calls) |
3503 | uint64_t discriminator = diversity; |
3504 | if (addrDiv) { |
3505 | // First calculate a new discriminator using the address of where we are trying to store the value |
3506 | uintptr_t pageOffset = rebaseLocation - page_content; |
3507 | discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator); |
3508 | } |
3509 | |
3510 | if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) { |
3511 | /* |
3512 | * these pointers are used in user mode. disable the kernel key diversification |
3513 | * so we can sign them for use in user mode. |
3514 | */ |
3515 | value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key); |
3516 | } |
3517 | #endif /* __has_feature(ptrauth_calls) */ |
3518 | } else { |
3519 | // the value already has the correct low bits, so just add in the high8 if it exists |
3520 | value += high8; |
3521 | } |
3522 | |
3523 | memcpy(dst: rebaseLocation, src: &value, n: sizeof(value)); |
3524 | } while (delta != 0); |
3525 | |
3526 | return KERN_SUCCESS; |
3527 | } |
3528 | |
3529 | |
3530 | |
3531 | kern_return_t |
3532 | vm_shared_region_slide_page( |
3533 | vm_shared_region_slide_info_t si, |
3534 | vm_offset_t vaddr, |
3535 | mach_vm_offset_t uservaddr, |
3536 | uint32_t pageIndex, |
3537 | uint64_t jop_key) |
3538 | { |
3539 | switch (si->si_slide_info_entry->version) { |
3540 | case 1: |
3541 | return vm_shared_region_slide_page_v1(si, vaddr, pageIndex); |
3542 | case 2: |
3543 | return vm_shared_region_slide_page_v2(si, vaddr, pageIndex); |
3544 | case 3: |
3545 | return vm_shared_region_slide_page_v3(si, vaddr, uservaddr, pageIndex, jop_key); |
3546 | case 4: |
3547 | return vm_shared_region_slide_page_v4(si, vaddr, pageIndex); |
3548 | case 5: |
3549 | return vm_shared_region_slide_page_v5(si, vaddr, uservaddr, pageIndex, jop_key); |
3550 | default: |
3551 | return KERN_FAILURE; |
3552 | } |
3553 | } |
3554 | |
3555 | /******************************************************************************/ |
3556 | /* Comm page support */ |
3557 | /******************************************************************************/ |
3558 | |
3559 | SECURITY_READ_ONLY_LATE(ipc_port_t) commpage32_handle = IPC_PORT_NULL; |
3560 | SECURITY_READ_ONLY_LATE(ipc_port_t) commpage64_handle = IPC_PORT_NULL; |
3561 | SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage32_entry = NULL; |
3562 | SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage64_entry = NULL; |
3563 | SECURITY_READ_ONLY_LATE(vm_map_t) commpage32_map = VM_MAP_NULL; |
3564 | SECURITY_READ_ONLY_LATE(vm_map_t) commpage64_map = VM_MAP_NULL; |
3565 | |
3566 | SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text32_handle = IPC_PORT_NULL; |
3567 | SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text64_handle = IPC_PORT_NULL; |
3568 | SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text32_entry = NULL; |
3569 | SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text64_entry = NULL; |
3570 | SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text32_map = VM_MAP_NULL; |
3571 | SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text64_map = VM_MAP_NULL; |
3572 | |
3573 | SECURITY_READ_ONLY_LATE(user32_addr_t) commpage_text32_location = 0; |
3574 | SECURITY_READ_ONLY_LATE(user64_addr_t) commpage_text64_location = 0; |
3575 | |
3576 | #if defined(__i386__) || defined(__x86_64__) |
3577 | /* |
3578 | * Create a memory entry, VM submap and pmap for one commpage. |
3579 | */ |
3580 | static void |
3581 | _vm_commpage_init( |
3582 | ipc_port_t *handlep, |
3583 | vm_map_size_t size) |
3584 | { |
3585 | vm_named_entry_t mem_entry; |
3586 | vm_map_t new_map; |
3587 | |
3588 | SHARED_REGION_TRACE_DEBUG( |
3589 | ("commpage: -> _init(0x%llx)\n" , |
3590 | (long long)size)); |
3591 | |
3592 | pmap_t new_pmap = pmap_create_options(NULL, 0, 0); |
3593 | if (new_pmap == NULL) { |
3594 | panic("_vm_commpage_init: could not allocate pmap" ); |
3595 | } |
3596 | new_map = vm_map_create_options(new_pmap, 0, size, VM_MAP_CREATE_DEFAULT); |
3597 | |
3598 | mem_entry = mach_memory_entry_allocate(handlep); |
3599 | mem_entry->backing.map = new_map; |
3600 | mem_entry->internal = TRUE; |
3601 | mem_entry->is_sub_map = TRUE; |
3602 | mem_entry->offset = 0; |
3603 | mem_entry->protection = VM_PROT_ALL; |
3604 | mem_entry->size = size; |
3605 | |
3606 | SHARED_REGION_TRACE_DEBUG( |
3607 | ("commpage: _init(0x%llx) <- %p\n" , |
3608 | (long long)size, (void *)VM_KERNEL_ADDRPERM(*handlep))); |
3609 | } |
3610 | #endif |
3611 | |
3612 | |
3613 | /* |
3614 | * Initialize the comm text pages at boot time |
3615 | */ |
3616 | void |
3617 | vm_commpage_text_init(void) |
3618 | { |
3619 | SHARED_REGION_TRACE_DEBUG( |
3620 | ("commpage text: ->init()\n" )); |
3621 | #if defined(__i386__) || defined(__x86_64__) |
3622 | /* create the 32 bit comm text page */ |
3623 | unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */ |
3624 | _vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH); |
3625 | commpage_text32_entry = mach_memory_entry_from_port(commpage_text32_handle); |
3626 | commpage_text32_map = commpage_text32_entry->backing.map; |
3627 | commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset); |
3628 | /* XXX if (cpu_is_64bit_capable()) ? */ |
3629 | /* create the 64-bit comm page */ |
3630 | offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */ |
3631 | _vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH); |
3632 | commpage_text64_entry = mach_memory_entry_from_port(commpage_text64_handle); |
3633 | commpage_text64_map = commpage_text64_entry->backing.map; |
3634 | commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset); |
3635 | #endif |
3636 | |
3637 | commpage_text_populate(); |
3638 | |
3639 | /* populate the routines in here */ |
3640 | SHARED_REGION_TRACE_DEBUG( |
3641 | ("commpage text: init() <-\n" )); |
3642 | } |
3643 | |
3644 | /* |
3645 | * Initialize the comm pages at boot time. |
3646 | */ |
3647 | void |
3648 | vm_commpage_init(void) |
3649 | { |
3650 | SHARED_REGION_TRACE_DEBUG( |
3651 | ("commpage: -> init()\n" )); |
3652 | |
3653 | #if defined(__i386__) || defined(__x86_64__) |
3654 | /* create the 32-bit comm page */ |
3655 | _vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH); |
3656 | commpage32_entry = mach_memory_entry_from_port(commpage32_handle); |
3657 | commpage32_map = commpage32_entry->backing.map; |
3658 | |
3659 | /* XXX if (cpu_is_64bit_capable()) ? */ |
3660 | /* create the 64-bit comm page */ |
3661 | _vm_commpage_init(&commpage64_handle, _COMM_PAGE64_AREA_LENGTH); |
3662 | commpage64_entry = mach_memory_entry_from_port(commpage64_handle); |
3663 | commpage64_map = commpage64_entry->backing.map; |
3664 | |
3665 | #endif /* __i386__ || __x86_64__ */ |
3666 | |
3667 | /* populate them according to this specific platform */ |
3668 | commpage_populate(); |
3669 | __commpage_setup = 1; |
3670 | #if XNU_TARGET_OS_OSX |
3671 | if (__system_power_source == 0) { |
3672 | post_sys_powersource_internal(i: 0, internal: 1); |
3673 | } |
3674 | #endif /* XNU_TARGET_OS_OSX */ |
3675 | |
3676 | SHARED_REGION_TRACE_DEBUG( |
3677 | ("commpage: init() <-\n" )); |
3678 | } |
3679 | |
3680 | /* |
3681 | * Enter the appropriate comm page into the task's address space. |
3682 | * This is called at exec() time via vm_map_exec(). |
3683 | */ |
3684 | kern_return_t |
3685 | vm_commpage_enter( |
3686 | vm_map_t map, |
3687 | task_t task, |
3688 | boolean_t is64bit) |
3689 | { |
3690 | #if defined(__arm64__) |
3691 | #pragma unused(is64bit) |
3692 | (void)task; |
3693 | (void)map; |
3694 | pmap_insert_commpage(vm_map_pmap(map)); |
3695 | return KERN_SUCCESS; |
3696 | #else |
3697 | ipc_port_t commpage_handle, commpage_text_handle; |
3698 | vm_map_offset_t commpage_address, objc_address, commpage_text_address; |
3699 | vm_map_size_t commpage_size, objc_size, commpage_text_size; |
3700 | vm_map_kernel_flags_t vmk_flags; |
3701 | kern_return_t kr; |
3702 | |
3703 | SHARED_REGION_TRACE_DEBUG( |
3704 | ("commpage: -> enter(%p,%p)\n" , |
3705 | (void *)VM_KERNEL_ADDRPERM(map), |
3706 | (void *)VM_KERNEL_ADDRPERM(task))); |
3707 | |
3708 | commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH; |
3709 | /* the comm page is likely to be beyond the actual end of the VM map */ |
3710 | vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(); |
3711 | vmk_flags.vmkf_beyond_max = TRUE; |
3712 | |
3713 | /* select the appropriate comm page for this task */ |
3714 | assert(!(is64bit ^ vm_map_is_64bit(map))); |
3715 | if (is64bit) { |
3716 | commpage_handle = commpage64_handle; |
3717 | commpage_address = (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS; |
3718 | commpage_size = _COMM_PAGE64_AREA_LENGTH; |
3719 | objc_size = _COMM_PAGE64_OBJC_SIZE; |
3720 | objc_address = _COMM_PAGE64_OBJC_BASE; |
3721 | commpage_text_handle = commpage_text64_handle; |
3722 | commpage_text_address = (vm_map_offset_t) commpage_text64_location; |
3723 | } else { |
3724 | commpage_handle = commpage32_handle; |
3725 | commpage_address = |
3726 | (vm_map_offset_t)(unsigned) _COMM_PAGE32_BASE_ADDRESS; |
3727 | commpage_size = _COMM_PAGE32_AREA_LENGTH; |
3728 | objc_size = _COMM_PAGE32_OBJC_SIZE; |
3729 | objc_address = _COMM_PAGE32_OBJC_BASE; |
3730 | commpage_text_handle = commpage_text32_handle; |
3731 | commpage_text_address = (vm_map_offset_t) commpage_text32_location; |
3732 | } |
3733 | |
3734 | if ((commpage_address & (pmap_commpage_size_min(map->pmap) - 1)) == 0 && |
3735 | (commpage_size & (pmap_commpage_size_min(map->pmap) - 1)) == 0) { |
3736 | /* the commpage is properly aligned or sized for pmap-nesting */ |
3737 | vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP; |
3738 | vmk_flags.vmkf_nested_pmap = TRUE; |
3739 | } |
3740 | |
3741 | /* map the comm page in the task's address space */ |
3742 | assert(commpage_handle != IPC_PORT_NULL); |
3743 | kr = vm_map_enter_mem_object( |
3744 | map, |
3745 | &commpage_address, |
3746 | commpage_size, |
3747 | 0, |
3748 | vmk_flags, |
3749 | commpage_handle, |
3750 | 0, |
3751 | FALSE, |
3752 | VM_PROT_READ, |
3753 | VM_PROT_READ, |
3754 | VM_INHERIT_SHARE); |
3755 | if (kr != KERN_SUCCESS) { |
3756 | SHARED_REGION_TRACE_ERROR( |
3757 | ("commpage: enter(%p,0x%llx,0x%llx) " |
3758 | "commpage %p mapping failed 0x%x\n" , |
3759 | (void *)VM_KERNEL_ADDRPERM(map), |
3760 | (long long)commpage_address, |
3761 | (long long)commpage_size, |
3762 | (void *)VM_KERNEL_ADDRPERM(commpage_handle), kr)); |
3763 | } |
3764 | |
3765 | /* map the comm text page in the task's address space */ |
3766 | assert(commpage_text_handle != IPC_PORT_NULL); |
3767 | kr = vm_map_enter_mem_object( |
3768 | map, |
3769 | &commpage_text_address, |
3770 | commpage_text_size, |
3771 | 0, |
3772 | vmk_flags, |
3773 | commpage_text_handle, |
3774 | 0, |
3775 | FALSE, |
3776 | VM_PROT_READ | VM_PROT_EXECUTE, |
3777 | VM_PROT_READ | VM_PROT_EXECUTE, |
3778 | VM_INHERIT_SHARE); |
3779 | if (kr != KERN_SUCCESS) { |
3780 | SHARED_REGION_TRACE_ERROR( |
3781 | ("commpage text: enter(%p,0x%llx,0x%llx) " |
3782 | "commpage text %p mapping failed 0x%x\n" , |
3783 | (void *)VM_KERNEL_ADDRPERM(map), |
3784 | (long long)commpage_text_address, |
3785 | (long long)commpage_text_size, |
3786 | (void *)VM_KERNEL_ADDRPERM(commpage_text_handle), kr)); |
3787 | } |
3788 | |
3789 | /* |
3790 | * Since we're here, we also pre-allocate some virtual space for the |
3791 | * Objective-C run-time, if needed... |
3792 | */ |
3793 | if (objc_size != 0) { |
3794 | kr = vm_map_enter_mem_object( |
3795 | map, |
3796 | &objc_address, |
3797 | objc_size, |
3798 | 0, |
3799 | vmk_flags, |
3800 | IPC_PORT_NULL, |
3801 | 0, |
3802 | FALSE, |
3803 | VM_PROT_ALL, |
3804 | VM_PROT_ALL, |
3805 | VM_INHERIT_DEFAULT); |
3806 | if (kr != KERN_SUCCESS) { |
3807 | SHARED_REGION_TRACE_ERROR( |
3808 | ("commpage: enter(%p,0x%llx,0x%llx) " |
3809 | "objc mapping failed 0x%x\n" , |
3810 | (void *)VM_KERNEL_ADDRPERM(map), |
3811 | (long long)objc_address, |
3812 | (long long)objc_size, kr)); |
3813 | } |
3814 | } |
3815 | |
3816 | SHARED_REGION_TRACE_DEBUG( |
3817 | ("commpage: enter(%p,%p) <- 0x%x\n" , |
3818 | (void *)VM_KERNEL_ADDRPERM(map), |
3819 | (void *)VM_KERNEL_ADDRPERM(task), kr)); |
3820 | return kr; |
3821 | #endif |
3822 | } |
3823 | |
3824 | int |
3825 | vm_shared_region_slide( |
3826 | uint32_t slide, |
3827 | mach_vm_offset_t entry_start_address, |
3828 | mach_vm_size_t entry_size, |
3829 | mach_vm_offset_t slide_start, |
3830 | mach_vm_size_t slide_size, |
3831 | mach_vm_offset_t slid_mapping, |
3832 | memory_object_control_t sr_file_control, |
3833 | vm_prot_t prot) |
3834 | { |
3835 | vm_shared_region_t sr; |
3836 | kern_return_t error; |
3837 | |
3838 | SHARED_REGION_TRACE_DEBUG( |
3839 | ("vm_shared_region_slide: -> slide %#x, entry_start %#llx, entry_size %#llx, slide_start %#llx, slide_size %#llx\n" , |
3840 | slide, entry_start_address, entry_size, slide_start, slide_size)); |
3841 | |
3842 | sr = vm_shared_region_get(task: current_task()); |
3843 | if (sr == NULL) { |
3844 | printf(format: "%s: no shared region?\n" , __FUNCTION__); |
3845 | SHARED_REGION_TRACE_DEBUG( |
3846 | ("vm_shared_region_slide: <- %d (no shared region)\n" , |
3847 | KERN_FAILURE)); |
3848 | return KERN_FAILURE; |
3849 | } |
3850 | |
3851 | /* |
3852 | * Protect from concurrent access. |
3853 | */ |
3854 | vm_shared_region_lock(); |
3855 | while (sr->sr_slide_in_progress) { |
3856 | vm_shared_region_sleep(&sr->sr_slide_in_progress, THREAD_UNINT); |
3857 | } |
3858 | |
3859 | sr->sr_slide_in_progress = current_thread(); |
3860 | vm_shared_region_unlock(); |
3861 | |
3862 | error = vm_shared_region_slide_mapping(sr, |
3863 | slide_info_addr: (user_addr_t)slide_start, |
3864 | slide_info_size: slide_size, |
3865 | start: entry_start_address, |
3866 | size: entry_size, |
3867 | slid_mapping, |
3868 | slide, |
3869 | sr_file_control, |
3870 | prot); |
3871 | if (error) { |
3872 | printf(format: "slide_info initialization failed with kr=%d\n" , error); |
3873 | } |
3874 | |
3875 | vm_shared_region_lock(); |
3876 | |
3877 | assert(sr->sr_slide_in_progress == current_thread()); |
3878 | sr->sr_slide_in_progress = THREAD_NULL; |
3879 | vm_shared_region_wakeup(&sr->sr_slide_in_progress); |
3880 | |
3881 | #if XNU_TARGET_OS_OSX |
3882 | if (error == KERN_SUCCESS) { |
3883 | shared_region_completed_slide = TRUE; |
3884 | } |
3885 | #endif /* XNU_TARGET_OS_OSX */ |
3886 | vm_shared_region_unlock(); |
3887 | |
3888 | vm_shared_region_deallocate(shared_region: sr); |
3889 | |
3890 | SHARED_REGION_TRACE_DEBUG( |
3891 | ("vm_shared_region_slide: <- %d\n" , |
3892 | error)); |
3893 | |
3894 | return error; |
3895 | } |
3896 | |
3897 | /* |
3898 | * Used during Authenticated Root Volume macOS boot. |
3899 | * Launchd re-execs itself and wants the new launchd to use |
3900 | * the shared cache from the new root volume. This call |
3901 | * makes all the existing shared caches stale to allow |
3902 | * that to happen. |
3903 | */ |
3904 | void |
3905 | vm_shared_region_pivot(void) |
3906 | { |
3907 | vm_shared_region_t shared_region = NULL; |
3908 | |
3909 | vm_shared_region_lock(); |
3910 | |
3911 | queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) { |
3912 | assert(shared_region->sr_ref_count > 0); |
3913 | shared_region->sr_stale = TRUE; |
3914 | if (shared_region->sr_timer_call) { |
3915 | /* |
3916 | * We have a shared region ready to be destroyed |
3917 | * and just waiting for a delayed timer to fire. |
3918 | * Marking it stale cements its ineligibility to |
3919 | * be used ever again. So let's shorten the timer |
3920 | * aggressively down to 10 milliseconds and get rid of it. |
3921 | * This is a single quantum and we don't need to go |
3922 | * shorter than this duration. We want it to be short |
3923 | * enough, however, because we could have an unmount |
3924 | * of the volume hosting this shared region just behind |
3925 | * us. |
3926 | */ |
3927 | uint64_t deadline; |
3928 | assert(shared_region->sr_ref_count == 1); |
3929 | |
3930 | /* |
3931 | * Free the old timer call. Returns with a reference held. |
3932 | * If the old timer has fired and is waiting for the vm_shared_region_lock |
3933 | * lock, we will just return with an additional ref_count i.e. 2. |
3934 | * The old timer will then fire and just drop the ref count down to 1 |
3935 | * with no other modifications. |
3936 | */ |
3937 | vm_shared_region_reference_locked(shared_region); |
3938 | |
3939 | /* set up the timer. Keep the reference from above for this timer.*/ |
3940 | shared_region->sr_timer_call = thread_call_allocate( |
3941 | func: (thread_call_func_t) vm_shared_region_timeout, |
3942 | param0: (thread_call_param_t) shared_region); |
3943 | |
3944 | /* schedule the timer */ |
3945 | clock_interval_to_deadline(interval: 10, /* 10 milliseconds */ |
3946 | NSEC_PER_MSEC, |
3947 | result: &deadline); |
3948 | thread_call_enter_delayed(call: shared_region->sr_timer_call, |
3949 | deadline); |
3950 | |
3951 | SHARED_REGION_TRACE_DEBUG( |
3952 | ("shared_region: pivot(%p): armed timer\n" , |
3953 | (void *)VM_KERNEL_ADDRPERM(shared_region))); |
3954 | } |
3955 | } |
3956 | |
3957 | vm_shared_region_unlock(); |
3958 | } |
3959 | |
3960 | /* |
3961 | * Routine to mark any non-standard slide shared cache region as stale. |
3962 | * This causes the next "reslide" spawn to create a new shared region. |
3963 | */ |
3964 | void |
3965 | vm_shared_region_reslide_stale(boolean_t driverkit) |
3966 | { |
3967 | #if __has_feature(ptrauth_calls) |
3968 | vm_shared_region_t shared_region = NULL; |
3969 | |
3970 | vm_shared_region_lock(); |
3971 | |
3972 | queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) { |
3973 | assert(shared_region->sr_ref_count > 0); |
3974 | if (shared_region->sr_driverkit == driverkit && !shared_region->sr_stale && shared_region->sr_reslide) { |
3975 | shared_region->sr_stale = TRUE; |
3976 | vm_shared_region_reslide_count++; |
3977 | } |
3978 | } |
3979 | |
3980 | vm_shared_region_unlock(); |
3981 | #else |
3982 | (void)driverkit; |
3983 | #endif /* __has_feature(ptrauth_calls) */ |
3984 | } |
3985 | |
3986 | /* |
3987 | * report if the task is using a reslide shared cache region. |
3988 | */ |
3989 | bool |
3990 | vm_shared_region_is_reslide(__unused struct task *task) |
3991 | { |
3992 | bool is_reslide = FALSE; |
3993 | #if __has_feature(ptrauth_calls) |
3994 | vm_shared_region_t sr = vm_shared_region_get(task); |
3995 | |
3996 | if (sr != NULL) { |
3997 | is_reslide = sr->sr_reslide; |
3998 | vm_shared_region_deallocate(sr); |
3999 | } |
4000 | #endif /* __has_feature(ptrauth_calls) */ |
4001 | return is_reslide; |
4002 | } |
4003 | |
4004 | /* |
4005 | * This is called from powermanagement code to let kernel know the current source of power. |
4006 | * 0 if it is external source (connected to power ) |
4007 | * 1 if it is internal power source ie battery |
4008 | */ |
4009 | void |
4010 | #if XNU_TARGET_OS_OSX |
4011 | post_sys_powersource(int i) |
4012 | #else /* XNU_TARGET_OS_OSX */ |
4013 | post_sys_powersource(__unused int i) |
4014 | #endif /* XNU_TARGET_OS_OSX */ |
4015 | { |
4016 | #if XNU_TARGET_OS_OSX |
4017 | post_sys_powersource_internal(i, internal: 0); |
4018 | #endif /* XNU_TARGET_OS_OSX */ |
4019 | } |
4020 | |
4021 | |
4022 | #if XNU_TARGET_OS_OSX |
4023 | static void |
4024 | post_sys_powersource_internal(int i, int internal) |
4025 | { |
4026 | if (internal == 0) { |
4027 | __system_power_source = i; |
4028 | } |
4029 | } |
4030 | #endif /* XNU_TARGET_OS_OSX */ |
4031 | |
4032 | void * |
4033 | vm_shared_region_root_dir( |
4034 | struct vm_shared_region *sr) |
4035 | { |
4036 | void *vnode; |
4037 | |
4038 | vm_shared_region_lock(); |
4039 | vnode = sr->sr_root_dir; |
4040 | vm_shared_region_unlock(); |
4041 | return vnode; |
4042 | } |
4043 | |