1/*
2 * Copyright (c) 2007-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*
25 * Shared region (... and comm page)
26 *
27 * This file handles the VM shared region and comm page.
28 *
29 */
30/*
31 * SHARED REGIONS
32 * --------------
33 *
34 * A shared region is a submap that contains the most common system shared
35 * libraries for a given environment which is defined by:
36 * - cpu-type
37 * - 64-bitness
38 * - root directory
39 * - Team ID - when we have pointer authentication.
40 *
41 * The point of a shared region is to reduce the setup overhead when exec'ing
42 * a new process. A shared region uses a shared VM submap that gets mapped
43 * automatically at exec() time, see vm_map_exec(). The first process of a given
44 * environment sets up the shared region and all further processes in that
45 * environment can re-use that shared region without having to re-create
46 * the same mappings in their VM map. All they need is contained in the shared
47 * region.
48 *
49 * The region can also share a pmap (mostly for read-only parts but also for the
50 * initial version of some writable parts), which gets "nested" into the
51 * process's pmap. This reduces the number of soft faults: once one process
52 * brings in a page in the shared region, all the other processes can access
53 * it without having to enter it in their own pmap.
54 *
55 * When a process is being exec'ed, vm_map_exec() calls vm_shared_region_enter()
56 * to map the appropriate shared region in the process's address space.
57 * We look up the appropriate shared region for the process's environment.
58 * If we can't find one, we create a new (empty) one and add it to the list.
59 * Otherwise, we just take an extra reference on the shared region we found.
60 *
61 * The "dyld" runtime, mapped into the process's address space at exec() time,
62 * will then use the shared_region_check_np() and shared_region_map_and_slide_2_np()
63 * system calls to validate and/or populate the shared region with the
64 * appropriate dyld_shared_cache file.
65 *
66 * The shared region is inherited on fork() and the child simply takes an
67 * extra reference on its parent's shared region.
68 *
69 * When the task terminates, we release the reference on its shared region.
70 * When the last reference is released, we destroy the shared region.
71 *
72 * After a chroot(), the calling process keeps using its original shared region,
73 * since that's what was mapped when it was started. But its children
74 * will use a different shared region, because they need to use the shared
75 * cache that's relative to the new root directory.
76 */
77
78/*
79 * COMM PAGE
80 *
81 * A "comm page" is an area of memory that is populated by the kernel with
82 * the appropriate platform-specific version of some commonly used code.
83 * There is one "comm page" per platform (cpu-type, 64-bitness) but only
84 * for the native cpu-type. No need to overly optimize translated code
85 * for hardware that is not really there !
86 *
87 * The comm pages are created and populated at boot time.
88 *
89 * The appropriate comm page is mapped into a process's address space
90 * at exec() time, in vm_map_exec(). It is then inherited on fork().
91 *
92 * The comm page is shared between the kernel and all applications of
93 * a given platform. Only the kernel can modify it.
94 *
95 * Applications just branch to fixed addresses in the comm page and find
96 * the right version of the code for the platform. There is also some
97 * data provided and updated by the kernel for processes to retrieve easily
98 * without having to do a system call.
99 */
100
101#include <debug.h>
102
103#include <kern/ipc_tt.h>
104#include <kern/kalloc.h>
105#include <kern/thread_call.h>
106
107#include <mach/mach_vm.h>
108#include <mach/machine.h>
109
110#include <vm/vm_map.h>
111#include <vm/vm_map_internal.h>
112#include <vm/vm_shared_region.h>
113
114#include <vm/vm_protos.h>
115
116#include <machine/commpage.h>
117#include <machine/cpu_capabilities.h>
118#include <sys/random.h>
119#include <sys/errno.h>
120
121#if defined(__arm64__)
122#include <arm/cpu_data_internal.h>
123#include <arm/misc_protos.h>
124#endif
125
126/*
127 * the following codes are used in the subclass
128 * of the DBG_MACH_SHAREDREGION class
129 */
130#define PROCESS_SHARED_CACHE_LAYOUT 0x00
131
132#if __has_feature(ptrauth_calls)
133#include <ptrauth.h>
134#endif /* __has_feature(ptrauth_calls) */
135
136/* "dyld" uses this to figure out what the kernel supports */
137int shared_region_version = 3;
138
139/* trace level, output is sent to the system log file */
140int shared_region_trace_level = SHARED_REGION_TRACE_ERROR_LVL;
141
142/* should local (non-chroot) shared regions persist when no task uses them ? */
143int shared_region_persistence = 0; /* no by default */
144
145
146/* delay in seconds before reclaiming an unused shared region */
147TUNABLE_WRITEABLE(int, shared_region_destroy_delay, "vm_shared_region_destroy_delay", 120);
148
149/*
150 * Cached pointer to the most recently mapped shared region from PID 1, which should
151 * be the most commonly mapped shared region in the system. There are many processes
152 * which do not use this, for a variety of reasons.
153 *
154 * The main consumer of this is stackshot.
155 */
156struct vm_shared_region *primary_system_shared_region = NULL;
157
158#if XNU_TARGET_OS_OSX
159/*
160 * Only one cache gets to slide on Desktop, since we can't
161 * tear down slide info properly today and the desktop actually
162 * produces lots of shared caches.
163 */
164boolean_t shared_region_completed_slide = FALSE;
165#endif /* XNU_TARGET_OS_OSX */
166
167/* this lock protects all the shared region data structures */
168static LCK_GRP_DECLARE(vm_shared_region_lck_grp, "vm shared region");
169static LCK_MTX_DECLARE(vm_shared_region_lock, &vm_shared_region_lck_grp);
170
171#define vm_shared_region_lock() lck_mtx_lock(&vm_shared_region_lock)
172#define vm_shared_region_unlock() lck_mtx_unlock(&vm_shared_region_lock)
173#define vm_shared_region_sleep(event, interruptible) \
174 lck_mtx_sleep_with_inheritor(&vm_shared_region_lock, \
175 LCK_SLEEP_DEFAULT, \
176 (event_t) (event), \
177 *(event), \
178 (interruptible) | THREAD_WAIT_NOREPORT, \
179 TIMEOUT_WAIT_FOREVER)
180#define vm_shared_region_wakeup(event) \
181 wakeup_all_with_inheritor((event), THREAD_AWAKENED)
182
183/* the list of currently available shared regions (one per environment) */
184queue_head_t vm_shared_region_queue = QUEUE_HEAD_INITIALIZER(vm_shared_region_queue);
185int vm_shared_region_count = 0;
186int vm_shared_region_peak = 0;
187static uint32_t vm_shared_region_lastid = 0; /* for sr_id field */
188
189/*
190 * the number of times an event has forced the recalculation of the reslide
191 * shared region slide.
192 */
193#if __has_feature(ptrauth_calls)
194int vm_shared_region_reslide_count = 0;
195#endif /* __has_feature(ptrauth_calls) */
196
197static void vm_shared_region_reference_locked(vm_shared_region_t shared_region);
198static vm_shared_region_t vm_shared_region_create(
199 void *root_dir,
200 cpu_type_t cputype,
201 cpu_subtype_t cpu_subtype,
202 boolean_t is_64bit,
203 int target_page_shift,
204 boolean_t reslide,
205 boolean_t is_driverkit,
206 uint32_t rsr_version);
207static void vm_shared_region_destroy(vm_shared_region_t shared_region);
208
209static kern_return_t vm_shared_region_slide_sanity_check(vm_shared_region_slide_info_entry_t entry, mach_vm_size_t size);
210static void vm_shared_region_timeout(thread_call_param_t param0,
211 thread_call_param_t param1);
212static kern_return_t vm_shared_region_slide_mapping(
213 vm_shared_region_t sr,
214 user_addr_t slide_info_addr,
215 mach_vm_size_t slide_info_size,
216 mach_vm_offset_t start,
217 mach_vm_size_t size,
218 mach_vm_offset_t slid_mapping,
219 uint32_t slide,
220 memory_object_control_t,
221 vm_prot_t prot); /* forward */
222
223static int __commpage_setup = 0;
224#if XNU_TARGET_OS_OSX
225static int __system_power_source = 1; /* init to extrnal power source */
226static void post_sys_powersource_internal(int i, int internal);
227#endif /* XNU_TARGET_OS_OSX */
228
229extern u_int32_t random(void);
230
231/*
232 * Retrieve a task's shared region and grab an extra reference to
233 * make sure it doesn't disappear while the caller is using it.
234 * The caller is responsible for consuming that extra reference if
235 * necessary.
236 */
237vm_shared_region_t
238vm_shared_region_get(
239 task_t task)
240{
241 vm_shared_region_t shared_region;
242
243 SHARED_REGION_TRACE_DEBUG(
244 ("shared_region: -> get(%p)\n",
245 (void *)VM_KERNEL_ADDRPERM(task)));
246
247 task_lock(task);
248 vm_shared_region_lock();
249 shared_region = task->shared_region;
250 if (shared_region) {
251 assert(shared_region->sr_ref_count > 0);
252 vm_shared_region_reference_locked(shared_region);
253 }
254 vm_shared_region_unlock();
255 task_unlock(task);
256
257 SHARED_REGION_TRACE_DEBUG(
258 ("shared_region: get(%p) <- %p\n",
259 (void *)VM_KERNEL_ADDRPERM(task),
260 (void *)VM_KERNEL_ADDRPERM(shared_region)));
261
262 return shared_region;
263}
264
265vm_map_t
266vm_shared_region_vm_map(
267 vm_shared_region_t shared_region)
268{
269 ipc_port_t sr_handle;
270 vm_named_entry_t sr_mem_entry;
271 vm_map_t sr_map;
272
273 SHARED_REGION_TRACE_DEBUG(
274 ("shared_region: -> vm_map(%p)\n",
275 (void *)VM_KERNEL_ADDRPERM(shared_region)));
276 assert(shared_region->sr_ref_count > 0);
277
278 sr_handle = shared_region->sr_mem_entry;
279 sr_mem_entry = mach_memory_entry_from_port(port: sr_handle);
280 sr_map = sr_mem_entry->backing.map;
281 assert(sr_mem_entry->is_sub_map);
282
283 SHARED_REGION_TRACE_DEBUG(
284 ("shared_region: vm_map(%p) <- %p\n",
285 (void *)VM_KERNEL_ADDRPERM(shared_region),
286 (void *)VM_KERNEL_ADDRPERM(sr_map)));
287 return sr_map;
288}
289
290/*
291 * Set the shared region the process should use.
292 * A NULL new shared region means that we just want to release the old
293 * shared region.
294 * The caller should already have an extra reference on the new shared region
295 * (if any). We release a reference on the old shared region (if any).
296 */
297void
298vm_shared_region_set(
299 task_t task,
300 vm_shared_region_t new_shared_region)
301{
302 vm_shared_region_t old_shared_region;
303
304 SHARED_REGION_TRACE_DEBUG(
305 ("shared_region: -> set(%p, %p)\n",
306 (void *)VM_KERNEL_ADDRPERM(task),
307 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
308
309 task_lock(task);
310 vm_shared_region_lock();
311
312 old_shared_region = task->shared_region;
313 if (new_shared_region) {
314 assert(new_shared_region->sr_ref_count > 0);
315 }
316
317 task->shared_region = new_shared_region;
318
319 vm_shared_region_unlock();
320 task_unlock(task);
321
322 if (old_shared_region) {
323 assert(old_shared_region->sr_ref_count > 0);
324 vm_shared_region_deallocate(shared_region: old_shared_region);
325 }
326
327 SHARED_REGION_TRACE_DEBUG(
328 ("shared_region: set(%p) <- old=%p new=%p\n",
329 (void *)VM_KERNEL_ADDRPERM(task),
330 (void *)VM_KERNEL_ADDRPERM(old_shared_region),
331 (void *)VM_KERNEL_ADDRPERM(new_shared_region)));
332}
333
334/*
335 * New arm64 shared regions match with an existing arm64e region.
336 * They just get a private non-authenticating pager.
337 */
338static inline bool
339match_subtype(cpu_type_t cputype, cpu_subtype_t exist, cpu_subtype_t new)
340{
341 if (exist == new) {
342 return true;
343 }
344 if (cputype == CPU_TYPE_ARM64 &&
345 exist == CPU_SUBTYPE_ARM64E &&
346 new == CPU_SUBTYPE_ARM64_ALL) {
347 return true;
348 }
349 return false;
350}
351
352
353/*
354 * Lookup up the shared region for the desired environment.
355 * If none is found, create a new (empty) one.
356 * Grab an extra reference on the returned shared region, to make sure
357 * it doesn't get destroyed before the caller is done with it. The caller
358 * is responsible for consuming that extra reference if necessary.
359 */
360vm_shared_region_t
361vm_shared_region_lookup(
362 void *root_dir,
363 cpu_type_t cputype,
364 cpu_subtype_t cpu_subtype,
365 boolean_t is_64bit,
366 int target_page_shift,
367 boolean_t reslide,
368 boolean_t is_driverkit,
369 uint32_t rsr_version)
370{
371 vm_shared_region_t shared_region;
372 vm_shared_region_t new_shared_region;
373
374 SHARED_REGION_TRACE_DEBUG(
375 ("shared_region: -> lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
376 (void *)VM_KERNEL_ADDRPERM(root_dir),
377 cputype, cpu_subtype, is_64bit, target_page_shift,
378 reslide, is_driverkit));
379
380 shared_region = NULL;
381 new_shared_region = NULL;
382
383 vm_shared_region_lock();
384 for (;;) {
385 queue_iterate(&vm_shared_region_queue,
386 shared_region,
387 vm_shared_region_t,
388 sr_q) {
389 assert(shared_region->sr_ref_count > 0);
390 if (shared_region->sr_cpu_type == cputype &&
391 match_subtype(cputype, exist: shared_region->sr_cpu_subtype, new: cpu_subtype) &&
392 shared_region->sr_root_dir == root_dir &&
393 shared_region->sr_64bit == is_64bit &&
394#if __ARM_MIXED_PAGE_SIZE__
395 shared_region->sr_page_shift == target_page_shift &&
396#endif /* __ARM_MIXED_PAGE_SIZE__ */
397#if __has_feature(ptrauth_calls)
398 shared_region->sr_reslide == reslide &&
399#endif /* __has_feature(ptrauth_calls) */
400 shared_region->sr_driverkit == is_driverkit &&
401 shared_region->sr_rsr_version == rsr_version &&
402 !shared_region->sr_stale) {
403 /* found a match ! */
404 vm_shared_region_reference_locked(shared_region);
405 goto done;
406 }
407 }
408 if (new_shared_region == NULL) {
409 /* no match: create a new one */
410 vm_shared_region_unlock();
411 new_shared_region = vm_shared_region_create(root_dir,
412 cputype,
413 cpu_subtype,
414 is_64bit,
415 target_page_shift,
416 reslide,
417 is_driverkit,
418 rsr_version);
419 /* do the lookup again, in case we lost a race */
420 vm_shared_region_lock();
421 continue;
422 }
423 /* still no match: use our new one */
424 shared_region = new_shared_region;
425 new_shared_region = NULL;
426 uint32_t newid = ++vm_shared_region_lastid;
427 if (newid == 0) {
428 panic("shared_region: vm_shared_region_lastid wrapped");
429 }
430 shared_region->sr_id = newid;
431 shared_region->sr_install_time = mach_absolute_time();
432 queue_enter(&vm_shared_region_queue,
433 shared_region,
434 vm_shared_region_t,
435 sr_q);
436 vm_shared_region_count++;
437 if (vm_shared_region_count > vm_shared_region_peak) {
438 vm_shared_region_peak = vm_shared_region_count;
439 }
440 break;
441 }
442
443done:
444 vm_shared_region_unlock();
445
446 if (new_shared_region) {
447 /*
448 * We lost a race with someone else to create a new shared
449 * region for that environment. Get rid of our unused one.
450 */
451 assert(new_shared_region->sr_ref_count == 1);
452 new_shared_region->sr_ref_count--;
453 vm_shared_region_destroy(shared_region: new_shared_region);
454 new_shared_region = NULL;
455 }
456
457 SHARED_REGION_TRACE_DEBUG(
458 ("shared_region: lookup(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d) <- %p\n",
459 (void *)VM_KERNEL_ADDRPERM(root_dir),
460 cputype, cpu_subtype, is_64bit, target_page_shift,
461 reslide, is_driverkit,
462 (void *)VM_KERNEL_ADDRPERM(shared_region)));
463
464 assert(shared_region->sr_ref_count > 0);
465 return shared_region;
466}
467
468/*
469 * Take an extra reference on a shared region.
470 * The vm_shared_region_lock should already be held by the caller.
471 */
472static void
473vm_shared_region_reference_locked(
474 vm_shared_region_t shared_region)
475{
476 LCK_MTX_ASSERT(&vm_shared_region_lock, LCK_MTX_ASSERT_OWNED);
477
478 SHARED_REGION_TRACE_DEBUG(
479 ("shared_region: -> reference_locked(%p)\n",
480 (void *)VM_KERNEL_ADDRPERM(shared_region)));
481 assert(shared_region->sr_ref_count > 0);
482 shared_region->sr_ref_count++;
483 assert(shared_region->sr_ref_count != 0);
484
485 if (shared_region->sr_timer_call != NULL) {
486 boolean_t cancelled;
487
488 /* cancel and free any pending timeout */
489 cancelled = thread_call_cancel(call: shared_region->sr_timer_call);
490 if (cancelled) {
491 thread_call_free(call: shared_region->sr_timer_call);
492 shared_region->sr_timer_call = NULL;
493 /* release the reference held by the cancelled timer */
494 shared_region->sr_ref_count--;
495 } else {
496 /* the timer will drop the reference and free itself */
497 }
498 }
499
500 SHARED_REGION_TRACE_DEBUG(
501 ("shared_region: reference_locked(%p) <- %d\n",
502 (void *)VM_KERNEL_ADDRPERM(shared_region),
503 shared_region->sr_ref_count));
504}
505
506/*
507 * Take a reference on a shared region.
508 */
509void
510vm_shared_region_reference(vm_shared_region_t shared_region)
511{
512 SHARED_REGION_TRACE_DEBUG(
513 ("shared_region: -> reference(%p)\n",
514 (void *)VM_KERNEL_ADDRPERM(shared_region)));
515
516 vm_shared_region_lock();
517 vm_shared_region_reference_locked(shared_region);
518 vm_shared_region_unlock();
519
520 SHARED_REGION_TRACE_DEBUG(
521 ("shared_region: reference(%p) <- %d\n",
522 (void *)VM_KERNEL_ADDRPERM(shared_region),
523 shared_region->sr_ref_count));
524}
525
526/*
527 * Release a reference on the shared region.
528 * Destroy it if there are no references left.
529 */
530void
531vm_shared_region_deallocate(
532 vm_shared_region_t shared_region)
533{
534 SHARED_REGION_TRACE_DEBUG(
535 ("shared_region: -> deallocate(%p)\n",
536 (void *)VM_KERNEL_ADDRPERM(shared_region)));
537
538 vm_shared_region_lock();
539
540 assert(shared_region->sr_ref_count > 0);
541
542 if (shared_region->sr_root_dir == NULL) {
543 /*
544 * Local (i.e. based on the boot volume) shared regions
545 * can persist or not based on the "shared_region_persistence"
546 * sysctl.
547 * Make sure that this one complies.
548 *
549 * See comments in vm_shared_region_slide() for notes about
550 * shared regions we have slid (which are not torn down currently).
551 */
552 if (shared_region_persistence &&
553 !shared_region->sr_persists) {
554 /* make this one persistent */
555 shared_region->sr_ref_count++;
556 shared_region->sr_persists = TRUE;
557 } else if (!shared_region_persistence &&
558 shared_region->sr_persists) {
559 /* make this one no longer persistent */
560 assert(shared_region->sr_ref_count > 1);
561 shared_region->sr_ref_count--;
562 shared_region->sr_persists = FALSE;
563 }
564 }
565
566 assert(shared_region->sr_ref_count > 0);
567 shared_region->sr_ref_count--;
568 SHARED_REGION_TRACE_DEBUG(
569 ("shared_region: deallocate(%p): ref now %d\n",
570 (void *)VM_KERNEL_ADDRPERM(shared_region),
571 shared_region->sr_ref_count));
572
573 if (shared_region->sr_ref_count == 0) {
574 uint64_t deadline;
575
576 /*
577 * Even though a shared region is unused, delay a while before
578 * tearing it down, in case a new app launch can use it.
579 * We don't keep around stale shared regions, nor older RSR ones.
580 */
581 if (shared_region->sr_timer_call == NULL &&
582 shared_region_destroy_delay != 0 &&
583 !shared_region->sr_stale &&
584 !(shared_region->sr_rsr_version != 0 &&
585 shared_region->sr_rsr_version != rsr_get_version())) {
586 /* hold one reference for the timer */
587 assert(!shared_region->sr_mapping_in_progress);
588 shared_region->sr_ref_count++;
589
590 /* set up the timer */
591 shared_region->sr_timer_call = thread_call_allocate(
592 func: (thread_call_func_t) vm_shared_region_timeout,
593 param0: (thread_call_param_t) shared_region);
594
595 /* schedule the timer */
596 clock_interval_to_deadline(interval: shared_region_destroy_delay,
597 NSEC_PER_SEC,
598 result: &deadline);
599 thread_call_enter_delayed(call: shared_region->sr_timer_call,
600 deadline);
601
602 SHARED_REGION_TRACE_DEBUG(
603 ("shared_region: deallocate(%p): armed timer\n",
604 (void *)VM_KERNEL_ADDRPERM(shared_region)));
605
606 vm_shared_region_unlock();
607 } else {
608 /* timer expired: let go of this shared region */
609
610 /* Make sure there's no cached pointer to the region. */
611 if (primary_system_shared_region == shared_region) {
612 primary_system_shared_region = NULL;
613 }
614
615 /*
616 * Remove it from the queue first, so no one can find
617 * it...
618 */
619 queue_remove(&vm_shared_region_queue,
620 shared_region,
621 vm_shared_region_t,
622 sr_q);
623 vm_shared_region_count--;
624 vm_shared_region_unlock();
625
626 /* ... and destroy it */
627 vm_shared_region_destroy(shared_region);
628 shared_region = NULL;
629 }
630 } else {
631 vm_shared_region_unlock();
632 }
633
634 SHARED_REGION_TRACE_DEBUG(
635 ("shared_region: deallocate(%p) <-\n",
636 (void *)VM_KERNEL_ADDRPERM(shared_region)));
637}
638
639void
640vm_shared_region_timeout(
641 thread_call_param_t param0,
642 __unused thread_call_param_t param1)
643{
644 vm_shared_region_t shared_region;
645
646 shared_region = (vm_shared_region_t) param0;
647
648 vm_shared_region_deallocate(shared_region);
649}
650
651
652/*
653 * Create a new (empty) shared region for a new environment.
654 */
655static vm_shared_region_t
656vm_shared_region_create(
657 void *root_dir,
658 cpu_type_t cputype,
659 cpu_subtype_t cpu_subtype,
660 boolean_t is_64bit,
661 int target_page_shift,
662#if !__has_feature(ptrauth_calls)
663 __unused
664#endif /* __has_feature(ptrauth_calls) */
665 boolean_t reslide,
666 boolean_t is_driverkit,
667 uint32_t rsr_version)
668{
669 vm_named_entry_t mem_entry;
670 ipc_port_t mem_entry_port;
671 vm_shared_region_t shared_region;
672 vm_map_t sub_map;
673 mach_vm_offset_t base_address, pmap_nesting_start;
674 mach_vm_size_t size, pmap_nesting_size;
675
676 SHARED_REGION_TRACE_INFO(
677 ("shared_region: -> create(root=%p,cpu=<%d,%d>,64bit=%d,pgshift=%d,reslide=%d,driverkit=%d)\n",
678 (void *)VM_KERNEL_ADDRPERM(root_dir),
679 cputype, cpu_subtype, is_64bit, target_page_shift,
680 reslide, is_driverkit));
681
682 base_address = 0;
683 size = 0;
684 mem_entry = NULL;
685 mem_entry_port = IPC_PORT_NULL;
686 sub_map = VM_MAP_NULL;
687
688 /* create a new shared region structure... */
689 shared_region = kalloc_type(struct vm_shared_region,
690 Z_WAITOK | Z_NOFAIL);
691
692 /* figure out the correct settings for the desired environment */
693 if (is_64bit) {
694 switch (cputype) {
695#if defined(__arm64__)
696 case CPU_TYPE_ARM64:
697 base_address = SHARED_REGION_BASE_ARM64;
698 size = SHARED_REGION_SIZE_ARM64;
699 pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM64;
700 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM64;
701 break;
702#else
703 case CPU_TYPE_I386:
704 base_address = SHARED_REGION_BASE_X86_64;
705 size = SHARED_REGION_SIZE_X86_64;
706 pmap_nesting_start = SHARED_REGION_NESTING_BASE_X86_64;
707 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_X86_64;
708 break;
709 case CPU_TYPE_POWERPC:
710 base_address = SHARED_REGION_BASE_PPC64;
711 size = SHARED_REGION_SIZE_PPC64;
712 pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC64;
713 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC64;
714 break;
715#endif
716 default:
717 SHARED_REGION_TRACE_ERROR(
718 ("shared_region: create: unknown cpu type %d\n",
719 cputype));
720 kfree_type(struct vm_shared_region, shared_region);
721 shared_region = NULL;
722 goto done;
723 }
724 } else {
725 switch (cputype) {
726#if defined(__arm64__)
727 case CPU_TYPE_ARM:
728 base_address = SHARED_REGION_BASE_ARM;
729 size = SHARED_REGION_SIZE_ARM;
730 pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM;
731 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM;
732 break;
733#else
734 case CPU_TYPE_I386:
735 base_address = SHARED_REGION_BASE_I386;
736 size = SHARED_REGION_SIZE_I386;
737 pmap_nesting_start = SHARED_REGION_NESTING_BASE_I386;
738 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_I386;
739 break;
740 case CPU_TYPE_POWERPC:
741 base_address = SHARED_REGION_BASE_PPC;
742 size = SHARED_REGION_SIZE_PPC;
743 pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC;
744 pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC;
745 break;
746#endif
747 default:
748 SHARED_REGION_TRACE_ERROR(
749 ("shared_region: create: unknown cpu type %d\n",
750 cputype));
751 kfree_type(struct vm_shared_region, shared_region);
752 shared_region = NULL;
753 goto done;
754 }
755 }
756
757 /* create a memory entry structure and a Mach port handle */
758 mem_entry = mach_memory_entry_allocate(user_handle_p: &mem_entry_port);
759
760#if defined(__arm64__)
761 {
762 struct pmap *pmap_nested;
763 int pmap_flags = 0;
764 pmap_flags |= is_64bit ? PMAP_CREATE_64BIT : 0;
765
766
767#if __ARM_MIXED_PAGE_SIZE__
768 if (cputype == CPU_TYPE_ARM64 &&
769 target_page_shift == FOURK_PAGE_SHIFT) {
770 /* arm64/4k address space */
771 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
772 }
773#endif /* __ARM_MIXED_PAGE_SIZE__ */
774
775 pmap_nested = pmap_create_options(NULL, size: 0, flags: pmap_flags);
776 if (pmap_nested != PMAP_NULL) {
777 pmap_set_nested(pmap: pmap_nested);
778 sub_map = vm_map_create_options(pmap: pmap_nested, min_off: 0,
779 max_off: (vm_map_offset_t)size, options: VM_MAP_CREATE_PAGEABLE);
780
781 if (is_64bit ||
782 page_shift_user32 == SIXTEENK_PAGE_SHIFT) {
783 /* enforce 16KB alignment of VM map entries */
784 vm_map_set_page_shift(map: sub_map, SIXTEENK_PAGE_SHIFT);
785 }
786#if __ARM_MIXED_PAGE_SIZE__
787 if (cputype == CPU_TYPE_ARM64 &&
788 target_page_shift == FOURK_PAGE_SHIFT) {
789 /* arm64/4k address space */
790 vm_map_set_page_shift(map: sub_map, FOURK_PAGE_SHIFT);
791 }
792#endif /* __ARM_MIXED_PAGE_SIZE__ */
793 } else {
794 sub_map = VM_MAP_NULL;
795 }
796 }
797#else /* defined(__arm64__) */
798 {
799 /* create a VM sub map and its pmap */
800 pmap_t pmap = pmap_create_options(NULL, 0, is_64bit);
801 if (pmap != NULL) {
802 sub_map = vm_map_create_options(pmap, 0,
803 (vm_map_offset_t)size, VM_MAP_CREATE_PAGEABLE);
804 } else {
805 sub_map = VM_MAP_NULL;
806 }
807 }
808#endif /* defined(__arm64__) */
809 if (sub_map == VM_MAP_NULL) {
810 ipc_port_release_send(port: mem_entry_port);
811 kfree_type(struct vm_shared_region, shared_region);
812 shared_region = NULL;
813 SHARED_REGION_TRACE_ERROR(("shared_region: create: couldn't allocate map\n"));
814 goto done;
815 }
816
817 /* shared regions should always enforce code-signing */
818 vm_map_cs_enforcement_set(map: sub_map, true);
819 assert(vm_map_cs_enforcement(sub_map));
820 assert(pmap_get_vm_map_cs_enforced(vm_map_pmap(sub_map)));
821
822 assert(!sub_map->disable_vmentry_reuse);
823 sub_map->is_nested_map = TRUE;
824
825 /* make the memory entry point to the VM sub map */
826 mem_entry->is_sub_map = TRUE;
827 mem_entry->backing.map = sub_map;
828 mem_entry->size = size;
829 mem_entry->protection = VM_PROT_ALL;
830
831 /* make the shared region point at the memory entry */
832 shared_region->sr_mem_entry = mem_entry_port;
833
834 /* fill in the shared region's environment and settings */
835 shared_region->sr_base_address = base_address;
836 shared_region->sr_size = size;
837 shared_region->sr_pmap_nesting_start = pmap_nesting_start;
838 shared_region->sr_pmap_nesting_size = pmap_nesting_size;
839 shared_region->sr_cpu_type = cputype;
840 shared_region->sr_cpu_subtype = cpu_subtype;
841 shared_region->sr_64bit = (uint8_t)is_64bit;
842#if __ARM_MIXED_PAGE_SIZE__
843 shared_region->sr_page_shift = (uint8_t)target_page_shift;
844#endif /* __ARM_MIXED_PAGE_SIZE__ */
845 shared_region->sr_driverkit = (uint8_t)is_driverkit;
846 shared_region->sr_rsr_version = rsr_version;
847 shared_region->sr_root_dir = root_dir;
848
849 queue_init(&shared_region->sr_q);
850 shared_region->sr_mapping_in_progress = THREAD_NULL;
851 shared_region->sr_slide_in_progress = THREAD_NULL;
852 shared_region->sr_persists = FALSE;
853 shared_region->sr_stale = FALSE;
854 shared_region->sr_timer_call = NULL;
855 shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
856
857 /* grab a reference for the caller */
858 shared_region->sr_ref_count = 1;
859
860 shared_region->sr_slide = 0; /* not slid yet */
861
862 /* Initialize UUID and other metadata */
863 memset(s: &shared_region->sr_uuid, c: '\0', n: sizeof(shared_region->sr_uuid));
864 shared_region->sr_uuid_copied = FALSE;
865 shared_region->sr_images_count = 0;
866 shared_region->sr_images = NULL;
867#if __has_feature(ptrauth_calls)
868 shared_region->sr_reslide = reslide;
869 shared_region->sr_num_auth_section = 0;
870 shared_region->sr_next_auth_section = 0;
871 shared_region->sr_auth_section = NULL;
872#endif /* __has_feature(ptrauth_calls) */
873
874done:
875 if (shared_region) {
876 SHARED_REGION_TRACE_INFO(
877 ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d,"
878 "base=0x%llx,size=0x%llx) <- "
879 "%p mem=(%p,%p) map=%p pmap=%p\n",
880 (void *)VM_KERNEL_ADDRPERM(root_dir),
881 cputype, cpu_subtype, is_64bit, reslide, is_driverkit,
882 (long long)base_address,
883 (long long)size,
884 (void *)VM_KERNEL_ADDRPERM(shared_region),
885 (void *)VM_KERNEL_ADDRPERM(mem_entry_port),
886 (void *)VM_KERNEL_ADDRPERM(mem_entry),
887 (void *)VM_KERNEL_ADDRPERM(sub_map),
888 (void *)VM_KERNEL_ADDRPERM(sub_map->pmap)));
889 } else {
890 SHARED_REGION_TRACE_INFO(
891 ("shared_region: create(root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d,"
892 "base=0x%llx,size=0x%llx) <- NULL",
893 (void *)VM_KERNEL_ADDRPERM(root_dir),
894 cputype, cpu_subtype, is_64bit, is_driverkit,
895 (long long)base_address,
896 (long long)size));
897 }
898 return shared_region;
899}
900
901/*
902 * Destroy a now-unused shared region.
903 * The shared region is no longer in the queue and can not be looked up.
904 */
905static void
906vm_shared_region_destroy(
907 vm_shared_region_t shared_region)
908{
909 vm_named_entry_t mem_entry;
910 vm_map_t map;
911
912 SHARED_REGION_TRACE_INFO(
913 ("shared_region: -> destroy(%p) (root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
914 (void *)VM_KERNEL_ADDRPERM(shared_region),
915 (void *)VM_KERNEL_ADDRPERM(shared_region->sr_root_dir),
916 shared_region->sr_cpu_type,
917 shared_region->sr_cpu_subtype,
918 shared_region->sr_64bit,
919 shared_region->sr_driverkit));
920
921 assert(shared_region->sr_ref_count == 0);
922 assert(!shared_region->sr_persists);
923
924 mem_entry = mach_memory_entry_from_port(port: shared_region->sr_mem_entry);
925 assert(mem_entry->is_sub_map);
926 assert(!mem_entry->internal);
927 assert(!mem_entry->is_copy);
928 map = mem_entry->backing.map;
929
930 /*
931 * Clean up the pmap first. The virtual addresses that were
932 * entered in this possibly "nested" pmap may have different values
933 * than the VM map's min and max offsets, if the VM sub map was
934 * mapped at a non-zero offset in the processes' main VM maps, which
935 * is usually the case, so the clean-up we do in vm_map_destroy() would
936 * not be enough.
937 */
938 if (map->pmap) {
939 pmap_remove(map: map->pmap,
940 s: (vm_map_offset_t)shared_region->sr_base_address,
941 e: (vm_map_offset_t)(shared_region->sr_base_address + shared_region->sr_size));
942 }
943
944 /*
945 * Release our (one and only) handle on the memory entry.
946 * This will generate a no-senders notification, which will be processed
947 * by ipc_kobject_notify_no_senders(), which will release the one and only
948 * reference on the memory entry and cause it to be destroyed, along
949 * with the VM sub map and its pmap.
950 */
951 mach_memory_entry_port_release(port: shared_region->sr_mem_entry);
952 mem_entry = NULL;
953 shared_region->sr_mem_entry = IPC_PORT_NULL;
954
955 if (shared_region->sr_timer_call) {
956 thread_call_free(call: shared_region->sr_timer_call);
957 }
958
959#if __has_feature(ptrauth_calls)
960 /*
961 * Free the cached copies of slide_info for the AUTH regions.
962 */
963 for (uint_t i = 0; i < shared_region->sr_num_auth_section; ++i) {
964 vm_shared_region_slide_info_t si = shared_region->sr_auth_section[i];
965 if (si != NULL) {
966 vm_object_deallocate(si->si_slide_object);
967 kfree_data(si->si_slide_info_entry,
968 si->si_slide_info_size);
969 kfree_type(struct vm_shared_region_slide_info, si);
970 shared_region->sr_auth_section[i] = NULL;
971 }
972 }
973 if (shared_region->sr_auth_section != NULL) {
974 assert(shared_region->sr_num_auth_section > 0);
975 kfree_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section, shared_region->sr_auth_section);
976 shared_region->sr_auth_section = NULL;
977 shared_region->sr_num_auth_section = 0;
978 }
979#endif /* __has_feature(ptrauth_calls) */
980
981 /* release the shared region structure... */
982 kfree_type(struct vm_shared_region, shared_region);
983
984 SHARED_REGION_TRACE_DEBUG(
985 ("shared_region: destroy(%p) <-\n",
986 (void *)VM_KERNEL_ADDRPERM(shared_region)));
987 shared_region = NULL;
988}
989
990/*
991 * Gets the address of the first (in time) mapping in the shared region.
992 * If used during initial task setup by dyld, task should non-NULL.
993 */
994kern_return_t
995vm_shared_region_start_address(
996 vm_shared_region_t shared_region,
997 mach_vm_offset_t *start_address,
998 task_t task)
999{
1000 kern_return_t kr;
1001 mach_vm_offset_t sr_base_address;
1002 mach_vm_offset_t sr_first_mapping;
1003
1004 SHARED_REGION_TRACE_DEBUG(
1005 ("shared_region: -> start_address(%p)\n",
1006 (void *)VM_KERNEL_ADDRPERM(shared_region)));
1007
1008 vm_shared_region_lock();
1009
1010 /*
1011 * Wait if there's another thread establishing a mapping
1012 * in this shared region right when we're looking at it.
1013 * We want a consistent view of the map...
1014 */
1015 while (shared_region->sr_mapping_in_progress) {
1016 /* wait for our turn... */
1017 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1018 THREAD_UNINT);
1019 }
1020 assert(!shared_region->sr_mapping_in_progress);
1021 assert(shared_region->sr_ref_count > 0);
1022
1023 sr_base_address = shared_region->sr_base_address;
1024 sr_first_mapping = shared_region->sr_first_mapping;
1025
1026 if (sr_first_mapping == (mach_vm_offset_t) -1) {
1027 /* shared region is empty */
1028 kr = KERN_INVALID_ADDRESS;
1029 } else {
1030 kr = KERN_SUCCESS;
1031 *start_address = sr_base_address + sr_first_mapping;
1032 }
1033
1034
1035 uint32_t slide = shared_region->sr_slide;
1036
1037 vm_shared_region_unlock();
1038
1039 /*
1040 * Cache shared region info in the task for telemetry gathering, if we're
1041 * passed in the task. No task lock here as we're still in intial task set up.
1042 */
1043 if (kr == KERN_SUCCESS && task != NULL && task->task_shared_region_slide == -1) {
1044 uint_t sc_header_uuid_offset = offsetof(struct _dyld_cache_header, uuid);
1045 if (copyin((user_addr_t)(*start_address + sc_header_uuid_offset),
1046 (char *)&task->task_shared_region_uuid,
1047 sizeof(task->task_shared_region_uuid)) == 0) {
1048 task->task_shared_region_slide = slide;
1049 }
1050 }
1051
1052 SHARED_REGION_TRACE_DEBUG(
1053 ("shared_region: start_address(%p) <- 0x%llx\n",
1054 (void *)VM_KERNEL_ADDRPERM(shared_region),
1055 (long long)shared_region->sr_base_address));
1056
1057 return kr;
1058}
1059
1060/*
1061 * Look up a pre-existing mapping in shared region, for replacement.
1062 * Takes an extra object reference if found.
1063 */
1064static kern_return_t
1065find_mapping_to_slide(vm_map_t map, vm_map_address_t addr, vm_map_entry_t entry)
1066{
1067 vm_map_entry_t found;
1068
1069 /* find the shared region's map entry to slide */
1070 vm_map_lock_read(map);
1071 if (!vm_map_lookup_entry_allow_pgz(map, address: addr, entry: &found)) {
1072 /* no mapping there */
1073 vm_map_unlock(map);
1074 return KERN_INVALID_ARGUMENT;
1075 }
1076
1077 *entry = *found;
1078 /* extra ref to keep object alive while map is unlocked */
1079 vm_object_reference(VME_OBJECT(found));
1080 vm_map_unlock_read(map);
1081 return KERN_SUCCESS;
1082}
1083
1084static bool
1085shared_region_make_permanent(
1086 vm_shared_region_t sr,
1087 vm_prot_t max_prot)
1088{
1089 if (sr->sr_cpu_type == CPU_TYPE_X86_64) {
1090 return false;
1091 }
1092 if (max_prot & VM_PROT_WRITE) {
1093 /*
1094 * Potentially writable mapping: no major issue with allowing
1095 * it to be replaced since its contents could be modified
1096 * anyway.
1097 */
1098 return false;
1099 }
1100 if (max_prot & VM_PROT_EXECUTE) {
1101 /*
1102 * Potentially executable mapping: some software might want
1103 * to try and replace it to interpose their own code when a
1104 * given routine is called or returns, for example.
1105 * So let's not make it "permanent".
1106 */
1107 return false;
1108 }
1109 /*
1110 * Make this mapping "permanent" to prevent it from being deleted
1111 * and/or replaced with another mapping.
1112 */
1113 return true;
1114}
1115
1116static bool
1117shared_region_tpro_protect(
1118 vm_shared_region_t sr,
1119 vm_prot_t max_prot __unused)
1120{
1121 if (sr->sr_cpu_type != CPU_TYPE_ARM64 ||
1122 (sr->sr_cpu_subtype & ~CPU_SUBTYPE_MASK) != CPU_SUBTYPE_ARM64E) {
1123 return false;
1124 }
1125
1126
1127 /*
1128 * Unless otherwise explicitly requested all other mappings do not get
1129 * TPRO protection.
1130 */
1131 return false;
1132}
1133
1134#if __has_feature(ptrauth_calls)
1135
1136/*
1137 * Determine if this task is actually using pointer signing.
1138 */
1139static boolean_t
1140task_sign_pointers(task_t task)
1141{
1142 if (task->map &&
1143 task->map->pmap &&
1144 !task->map->pmap->disable_jop) {
1145 return TRUE;
1146 }
1147 return FALSE;
1148}
1149
1150/*
1151 * If the shared region contains mappings that are authenticated, then
1152 * remap them into the task private map.
1153 *
1154 * Failures are possible in this routine when jetsam kills a process
1155 * just as dyld is trying to set it up. The vm_map and task shared region
1156 * info get torn down w/o waiting for this thread to finish up.
1157 */
1158__attribute__((noinline))
1159kern_return_t
1160vm_shared_region_auth_remap(vm_shared_region_t sr)
1161{
1162 memory_object_t sr_pager = MEMORY_OBJECT_NULL;
1163 task_t task = current_task();
1164 vm_shared_region_slide_info_t si;
1165 uint_t i;
1166 vm_object_t object;
1167 vm_map_t sr_map;
1168 struct vm_map_entry tmp_entry_store = {0};
1169 vm_map_entry_t tmp_entry = NULL;
1170 vm_map_kernel_flags_t vmk_flags;
1171 vm_map_offset_t map_addr;
1172 kern_return_t kr = KERN_SUCCESS;
1173 boolean_t use_ptr_auth = task_sign_pointers(task);
1174
1175 /*
1176 * Don't do this more than once and avoid any race conditions in finishing it.
1177 */
1178 vm_shared_region_lock();
1179 while (sr->sr_mapping_in_progress) {
1180 /* wait for our turn... */
1181 vm_shared_region_sleep(&sr->sr_mapping_in_progress, THREAD_UNINT);
1182 }
1183 assert(!sr->sr_mapping_in_progress);
1184 assert(sr->sr_ref_count > 0);
1185
1186 /* Just return if already done. */
1187 if (task->shared_region_auth_remapped) {
1188 vm_shared_region_unlock();
1189 return KERN_SUCCESS;
1190 }
1191
1192 /* let others know to wait while we're working in this shared region */
1193 sr->sr_mapping_in_progress = current_thread();
1194 vm_shared_region_unlock();
1195
1196 /*
1197 * Remap any sections with pointer authentications into the private map.
1198 */
1199 for (i = 0; i < sr->sr_num_auth_section; ++i) {
1200 si = sr->sr_auth_section[i];
1201 assert(si != NULL);
1202 assert(si->si_ptrauth);
1203
1204 /*
1205 * We have mapping that needs to be private.
1206 * Look for an existing slid mapping's pager with matching
1207 * object, offset, slide info and shared_region_id to reuse.
1208 */
1209 object = si->si_slide_object;
1210 sr_pager = shared_region_pager_match(object, si->si_start, si,
1211 use_ptr_auth ? task->jop_pid : 0);
1212 if (sr_pager == MEMORY_OBJECT_NULL) {
1213 printf("%s(): shared_region_pager_match() failed\n", __func__);
1214 kr = KERN_FAILURE;
1215 goto done;
1216 }
1217
1218 /*
1219 * verify matching jop_pid for this task and this pager
1220 */
1221 if (use_ptr_auth) {
1222 shared_region_pager_match_task_key(sr_pager, task);
1223 }
1224
1225 sr_map = vm_shared_region_vm_map(sr);
1226 tmp_entry = NULL;
1227
1228 kr = find_mapping_to_slide(sr_map, si->si_slid_address - sr->sr_base_address, &tmp_entry_store);
1229 if (kr != KERN_SUCCESS) {
1230 printf("%s(): find_mapping_to_slide() failed\n", __func__);
1231 goto done;
1232 }
1233 tmp_entry = &tmp_entry_store;
1234
1235 /*
1236 * Check that the object exactly covers the region to slide.
1237 */
1238 if (tmp_entry->vme_end - tmp_entry->vme_start != si->si_end - si->si_start) {
1239 printf("%s(): doesn't fully cover\n", __func__);
1240 kr = KERN_FAILURE;
1241 goto done;
1242 }
1243
1244 /*
1245 * map the pager over the portion of the mapping that needs sliding
1246 */
1247 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
1248 vmk_flags.vmkf_overwrite_immutable = true;
1249 vmk_flags.vmf_permanent = shared_region_make_permanent(sr,
1250 tmp_entry->max_protection);
1251
1252 /* Preserve the TPRO flag if task has TPRO enabled */
1253 vmk_flags.vmf_tpro = (vm_map_tpro(task->map) &&
1254 tmp_entry->used_for_tpro &&
1255 task_is_hardened_binary(task));
1256
1257 map_addr = si->si_slid_address;
1258 kr = vm_map_enter_mem_object(task->map,
1259 &map_addr,
1260 si->si_end - si->si_start,
1261 (mach_vm_offset_t) 0,
1262 vmk_flags,
1263 (ipc_port_t)(uintptr_t) sr_pager,
1264 0,
1265 TRUE,
1266 tmp_entry->protection,
1267 tmp_entry->max_protection,
1268 tmp_entry->inheritance);
1269 memory_object_deallocate(sr_pager);
1270 sr_pager = MEMORY_OBJECT_NULL;
1271 if (kr != KERN_SUCCESS) {
1272 printf("%s(): vm_map_enter_mem_object() failed\n", __func__);
1273 goto done;
1274 }
1275 assertf(map_addr == si->si_slid_address,
1276 "map_addr=0x%llx si_slid_address=0x%llx tmp_entry=%p\n",
1277 (uint64_t)map_addr,
1278 (uint64_t)si->si_slid_address,
1279 tmp_entry);
1280
1281 /* Drop the ref count grabbed by find_mapping_to_slide */
1282 vm_object_deallocate(VME_OBJECT(tmp_entry));
1283 tmp_entry = NULL;
1284 }
1285
1286done:
1287 if (tmp_entry) {
1288 /* Drop the ref count grabbed by find_mapping_to_slide */
1289 vm_object_deallocate(VME_OBJECT(tmp_entry));
1290 tmp_entry = NULL;
1291 }
1292
1293 /*
1294 * Drop any extra reference to the pager in case we're quitting due to an error above.
1295 */
1296 if (sr_pager != MEMORY_OBJECT_NULL) {
1297 memory_object_deallocate(sr_pager);
1298 }
1299
1300 /*
1301 * Mark the region as having it's auth sections remapped.
1302 */
1303 vm_shared_region_lock();
1304 task->shared_region_auth_remapped = TRUE;
1305 assert(sr->sr_mapping_in_progress == current_thread());
1306 sr->sr_mapping_in_progress = THREAD_NULL;
1307 vm_shared_region_wakeup((event_t)&sr->sr_mapping_in_progress);
1308 vm_shared_region_unlock();
1309 return kr;
1310}
1311#endif /* __has_feature(ptrauth_calls) */
1312
1313void
1314vm_shared_region_undo_mappings(
1315 vm_map_t sr_map,
1316 mach_vm_offset_t sr_base_address,
1317 struct _sr_file_mappings *srf_mappings,
1318 struct _sr_file_mappings *srf_mappings_current,
1319 unsigned int srf_current_mappings_count)
1320{
1321 unsigned int j = 0;
1322 vm_shared_region_t shared_region = NULL;
1323 boolean_t reset_shared_region_state = FALSE;
1324 struct _sr_file_mappings *srfmp;
1325 unsigned int mappings_count;
1326 struct shared_file_mapping_slide_np *mappings;
1327
1328 shared_region = vm_shared_region_get(task: current_task());
1329 if (shared_region == NULL) {
1330 printf(format: "Failed to undo mappings because of NULL shared region.\n");
1331 return;
1332 }
1333
1334 shared_region->sr_first_mapping = (mach_vm_offset_t) -1;
1335
1336 if (sr_map == NULL) {
1337 ipc_port_t sr_handle;
1338 vm_named_entry_t sr_mem_entry;
1339
1340 vm_shared_region_lock();
1341 assert(shared_region->sr_ref_count > 0);
1342
1343 while (shared_region->sr_mapping_in_progress) {
1344 /* wait for our turn... */
1345 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1346 THREAD_UNINT);
1347 }
1348 assert(!shared_region->sr_mapping_in_progress);
1349 assert(shared_region->sr_ref_count > 0);
1350 /* let others know we're working in this shared region */
1351 shared_region->sr_mapping_in_progress = current_thread();
1352
1353 vm_shared_region_unlock();
1354
1355 reset_shared_region_state = TRUE;
1356
1357 /* no need to lock because this data is never modified... */
1358 sr_handle = shared_region->sr_mem_entry;
1359 sr_mem_entry = mach_memory_entry_from_port(port: sr_handle);
1360 sr_map = sr_mem_entry->backing.map;
1361 sr_base_address = shared_region->sr_base_address;
1362 }
1363 /*
1364 * Undo the mappings we've established so far.
1365 */
1366 for (srfmp = &srf_mappings[0];
1367 srfmp <= srf_mappings_current;
1368 srfmp++) {
1369 mappings = srfmp->mappings;
1370 mappings_count = srfmp->mappings_count;
1371 if (srfmp == srf_mappings_current) {
1372 mappings_count = srf_current_mappings_count;
1373 }
1374
1375 for (j = 0; j < mappings_count; j++) {
1376 kern_return_t kr2;
1377 mach_vm_offset_t start, end;
1378
1379 if (mappings[j].sms_size == 0) {
1380 /*
1381 * We didn't establish this
1382 * mapping, so nothing to undo.
1383 */
1384 continue;
1385 }
1386 SHARED_REGION_TRACE_INFO(
1387 ("shared_region: mapping[%d]: "
1388 "address:0x%016llx "
1389 "size:0x%016llx "
1390 "offset:0x%016llx "
1391 "maxprot:0x%x prot:0x%x: "
1392 "undoing...\n",
1393 j,
1394 (long long)mappings[j].sms_address,
1395 (long long)mappings[j].sms_size,
1396 (long long)mappings[j].sms_file_offset,
1397 mappings[j].sms_max_prot,
1398 mappings[j].sms_init_prot));
1399 start = (mappings[j].sms_address - sr_base_address);
1400 end = start + mappings[j].sms_size;
1401 start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(sr_map));
1402 end = vm_map_round_page(end, VM_MAP_PAGE_MASK(sr_map));
1403 kr2 = vm_map_remove_guard(map: sr_map,
1404 start,
1405 end,
1406 flags: VM_MAP_REMOVE_IMMUTABLE,
1407 KMEM_GUARD_NONE).kmr_return;
1408 assert(kr2 == KERN_SUCCESS);
1409 }
1410 }
1411
1412 if (reset_shared_region_state) {
1413 vm_shared_region_lock();
1414 assert(shared_region->sr_ref_count > 0);
1415 assert(shared_region->sr_mapping_in_progress == current_thread());
1416 /* we're done working on that shared region */
1417 shared_region->sr_mapping_in_progress = THREAD_NULL;
1418 vm_shared_region_wakeup((event_t) &shared_region->sr_mapping_in_progress);
1419 vm_shared_region_unlock();
1420 reset_shared_region_state = FALSE;
1421 }
1422
1423 vm_shared_region_deallocate(shared_region);
1424}
1425
1426/*
1427 * First part of vm_shared_region_map_file(). Split out to
1428 * avoid kernel stack overflow.
1429 */
1430__attribute__((noinline))
1431static kern_return_t
1432vm_shared_region_map_file_setup(
1433 vm_shared_region_t shared_region,
1434 int sr_file_mappings_count,
1435 struct _sr_file_mappings *sr_file_mappings,
1436 unsigned int *mappings_to_slide_cnt,
1437 struct shared_file_mapping_slide_np **mappings_to_slide,
1438 mach_vm_offset_t *slid_mappings,
1439 memory_object_control_t *slid_file_controls,
1440 mach_vm_offset_t *sfm_min_address,
1441 mach_vm_offset_t *sfm_max_address,
1442 vm_map_t *sr_map_ptr,
1443 vm_map_offset_t *lowest_unnestable_addr_ptr,
1444 unsigned int vmsr_num_slides)
1445{
1446 kern_return_t kr = KERN_SUCCESS;
1447 memory_object_control_t file_control;
1448 vm_object_t file_object;
1449 ipc_port_t sr_handle;
1450 vm_named_entry_t sr_mem_entry;
1451 vm_map_t sr_map;
1452 mach_vm_offset_t sr_base_address;
1453 unsigned int i = 0;
1454 mach_port_t map_port;
1455 vm_map_offset_t target_address;
1456 vm_object_t object;
1457 vm_object_size_t obj_size;
1458 vm_map_offset_t lowest_unnestable_addr = 0;
1459 vm_map_kernel_flags_t vmk_flags;
1460 mach_vm_offset_t sfm_end;
1461 uint32_t mappings_count;
1462 struct shared_file_mapping_slide_np *mappings;
1463 struct _sr_file_mappings *srfmp;
1464
1465 vm_shared_region_lock();
1466 assert(shared_region->sr_ref_count > 0);
1467
1468 /*
1469 * Make sure we handle only one mapping at a time in a given
1470 * shared region, to avoid race conditions. This should not
1471 * happen frequently...
1472 */
1473 while (shared_region->sr_mapping_in_progress) {
1474 /* wait for our turn... */
1475 vm_shared_region_sleep(&shared_region->sr_mapping_in_progress,
1476 THREAD_UNINT);
1477 }
1478 assert(!shared_region->sr_mapping_in_progress);
1479 assert(shared_region->sr_ref_count > 0);
1480
1481
1482 /* let others know we're working in this shared region */
1483 shared_region->sr_mapping_in_progress = current_thread();
1484
1485 /*
1486 * Did someone race in and map this shared region already?
1487 */
1488 if (shared_region->sr_first_mapping != -1) {
1489 vm_shared_region_unlock();
1490#if DEVELOPMENT || DEBUG
1491 printf("shared_region: caught race in map and slide\n");
1492#endif /* DEVELOPMENT || DEBUG */
1493 return KERN_FAILURE;
1494 }
1495
1496 vm_shared_region_unlock();
1497
1498 /* no need to lock because this data is never modified... */
1499 sr_handle = shared_region->sr_mem_entry;
1500 sr_mem_entry = mach_memory_entry_from_port(port: sr_handle);
1501 sr_map = sr_mem_entry->backing.map;
1502 sr_base_address = shared_region->sr_base_address;
1503
1504 SHARED_REGION_TRACE_DEBUG(
1505 ("shared_region: -> map(%p)\n",
1506 (void *)VM_KERNEL_ADDRPERM(shared_region)));
1507
1508 mappings_count = 0;
1509 mappings = NULL;
1510 srfmp = NULL;
1511
1512 /* process all the files to be mapped */
1513 for (srfmp = &sr_file_mappings[0];
1514 srfmp < &sr_file_mappings[sr_file_mappings_count];
1515 srfmp++) {
1516 mappings_count = srfmp->mappings_count;
1517 mappings = srfmp->mappings;
1518 file_control = srfmp->file_control;
1519
1520 if (mappings_count == 0) {
1521 /* no mappings here... */
1522 continue;
1523 }
1524
1525 /*
1526 * The code below can only correctly "slide" (perform relocations) for one
1527 * value of the slide amount. So if a file has a non-zero slide, it has to
1528 * match any previous value. A zero slide value is ok for things that are
1529 * just directly mapped.
1530 */
1531 if (shared_region->sr_slide == 0 && srfmp->slide != 0) {
1532 shared_region->sr_slide = srfmp->slide;
1533 } else if (shared_region->sr_slide != 0 &&
1534 srfmp->slide != 0 &&
1535 shared_region->sr_slide != srfmp->slide) {
1536 SHARED_REGION_TRACE_ERROR(
1537 ("shared_region: more than 1 non-zero slide value amount "
1538 "slide 1:0x%x slide 2:0x%x\n ",
1539 shared_region->sr_slide, srfmp->slide));
1540 kr = KERN_INVALID_ARGUMENT;
1541 break;
1542 }
1543
1544#if __arm64__
1545 if ((shared_region->sr_64bit ||
1546 page_shift_user32 == SIXTEENK_PAGE_SHIFT) &&
1547 ((srfmp->slide & SIXTEENK_PAGE_MASK) != 0)) {
1548 printf(format: "FOURK_COMPAT: %s: rejecting mis-aligned slide 0x%x\n",
1549 __FUNCTION__, srfmp->slide);
1550 kr = KERN_INVALID_ARGUMENT;
1551 break;
1552 }
1553#endif /* __arm64__ */
1554
1555 /*
1556 * An FD of -1 means we need to copyin the data to an anonymous object.
1557 */
1558 if (srfmp->fd == -1) {
1559 assert(mappings_count == 1);
1560 SHARED_REGION_TRACE_INFO(
1561 ("shared_region: mapping[0]: "
1562 "address:0x%016llx size:0x%016llx offset/addr:0x%016llx "
1563 "maxprot:0x%x prot:0x%x fd==-1\n",
1564 (long long)mappings[0].sms_address,
1565 (long long)mappings[0].sms_size,
1566 (long long)mappings[0].sms_file_offset,
1567 mappings[0].sms_max_prot,
1568 mappings[0].sms_init_prot));
1569
1570 /*
1571 * We need an anon object to hold the data in the shared region.
1572 * The size needs to be suitable to map into kernel.
1573 */
1574 obj_size = vm_object_round_page(mappings->sms_size);
1575 object = vm_object_allocate(size: obj_size);
1576 if (object == VM_OBJECT_NULL) {
1577 printf(format: "%s(): for fd==-1 vm_object_allocate() failed\n", __func__);
1578 kr = KERN_RESOURCE_SHORTAGE;
1579 break;
1580 }
1581
1582 /*
1583 * map the object into the kernel
1584 */
1585 vm_map_offset_t kaddr = 0;
1586 vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
1587 vmk_flags.vmkf_no_copy_on_read = 1;
1588 vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
1589
1590 kr = vm_map_enter(map: kernel_map,
1591 address: &kaddr,
1592 size: obj_size,
1593 mask: 0,
1594 vmk_flags,
1595 object,
1596 offset: 0,
1597 FALSE,
1598 cur_protection: (VM_PROT_READ | VM_PROT_WRITE),
1599 max_protection: (VM_PROT_READ | VM_PROT_WRITE),
1600 VM_INHERIT_NONE);
1601 if (kr != KERN_SUCCESS) {
1602 printf(format: "%s(): for fd==-1 vm_map_enter() in kernel failed\n", __func__);
1603 vm_object_deallocate(object);
1604 object = VM_OBJECT_NULL;
1605 break;
1606 }
1607
1608 /*
1609 * We'll need another reference to keep the object alive after
1610 * we vm_map_remove() it from the kernel.
1611 */
1612 vm_object_reference(object);
1613
1614 /*
1615 * Zero out the object's pages, so we can't leak data.
1616 */
1617 bzero(s: (void *)kaddr, n: obj_size);
1618
1619 /*
1620 * Copyin the data from dyld to the new object.
1621 * Then remove the kernel mapping.
1622 */
1623 int copyin_err =
1624 copyin((user_addr_t)mappings->sms_file_offset, (void *)kaddr, mappings->sms_size);
1625 vm_map_remove(map: kernel_map, start: kaddr, end: kaddr + obj_size);
1626 if (copyin_err) {
1627 printf(format: "%s(): for fd==-1 copyin() failed, errno=%d\n", __func__, copyin_err);
1628 switch (copyin_err) {
1629 case EPERM:
1630 case EACCES:
1631 kr = KERN_PROTECTION_FAILURE;
1632 break;
1633 case EFAULT:
1634 kr = KERN_INVALID_ADDRESS;
1635 break;
1636 default:
1637 kr = KERN_FAILURE;
1638 break;
1639 }
1640 vm_object_deallocate(object);
1641 object = VM_OBJECT_NULL;
1642 break;
1643 }
1644
1645 /*
1646 * Finally map the object into the shared region.
1647 */
1648 target_address = (vm_map_offset_t)(mappings[0].sms_address - sr_base_address);
1649 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
1650 vmk_flags.vmkf_already = TRUE;
1651 vmk_flags.vmkf_no_copy_on_read = 1;
1652 vmk_flags.vmf_permanent = shared_region_make_permanent(sr: shared_region,
1653 max_prot: mappings[0].sms_max_prot);
1654
1655 kr = vm_map_enter(
1656 map: sr_map,
1657 address: &target_address,
1658 vm_map_round_page(mappings[0].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1659 mask: 0,
1660 vmk_flags,
1661 object,
1662 offset: 0,
1663 TRUE,
1664 cur_protection: mappings[0].sms_init_prot & VM_PROT_ALL,
1665 max_protection: mappings[0].sms_max_prot & VM_PROT_ALL,
1666 VM_INHERIT_DEFAULT);
1667 if (kr != KERN_SUCCESS) {
1668 printf(format: "%s(): for fd==-1 vm_map_enter() in SR failed\n", __func__);
1669 vm_object_deallocate(object);
1670 break;
1671 }
1672
1673 if (mappings[0].sms_address < *sfm_min_address) {
1674 *sfm_min_address = mappings[0].sms_address;
1675 }
1676
1677 if (os_add_overflow(mappings[0].sms_address,
1678 mappings[0].sms_size,
1679 &sfm_end) ||
1680 (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1681 mappings[0].sms_address)) {
1682 /* overflow */
1683 kr = KERN_INVALID_ARGUMENT;
1684 break;
1685 }
1686
1687 if (sfm_end > *sfm_max_address) {
1688 *sfm_max_address = sfm_end;
1689 }
1690
1691 continue;
1692 }
1693
1694 /* get the VM object associated with the file to be mapped */
1695 file_object = memory_object_control_to_vm_object(control: file_control);
1696 assert(file_object);
1697
1698 if (!file_object->object_is_shared_cache) {
1699 vm_object_lock(file_object);
1700 file_object->object_is_shared_cache = true;
1701 vm_object_unlock(file_object);
1702 }
1703
1704#if CONFIG_SECLUDED_MEMORY
1705 /*
1706 * Camera will need the shared cache, so don't put the pages
1707 * on the secluded queue, assume that's the primary region.
1708 * Also keep DEXT shared cache pages off secluded.
1709 */
1710 if (primary_system_shared_region == NULL ||
1711 primary_system_shared_region == shared_region ||
1712 shared_region->sr_driverkit) {
1713 memory_object_mark_eligible_for_secluded(file_control, FALSE);
1714 }
1715#endif /* CONFIG_SECLUDED_MEMORY */
1716
1717 /* establish the mappings for that file */
1718 for (i = 0; i < mappings_count; i++) {
1719 SHARED_REGION_TRACE_INFO(
1720 ("shared_region: mapping[%d]: "
1721 "address:0x%016llx size:0x%016llx offset:0x%016llx "
1722 "maxprot:0x%x prot:0x%x\n",
1723 i,
1724 (long long)mappings[i].sms_address,
1725 (long long)mappings[i].sms_size,
1726 (long long)mappings[i].sms_file_offset,
1727 mappings[i].sms_max_prot,
1728 mappings[i].sms_init_prot));
1729
1730 if (mappings[i].sms_address < *sfm_min_address) {
1731 *sfm_min_address = mappings[i].sms_address;
1732 }
1733
1734 if (os_add_overflow(mappings[i].sms_address,
1735 mappings[i].sms_size,
1736 &sfm_end) ||
1737 (vm_map_round_page(sfm_end, VM_MAP_PAGE_MASK(sr_map)) <
1738 mappings[i].sms_address)) {
1739 /* overflow */
1740 kr = KERN_INVALID_ARGUMENT;
1741 break;
1742 }
1743
1744 if (sfm_end > *sfm_max_address) {
1745 *sfm_max_address = sfm_end;
1746 }
1747
1748 if (mappings[i].sms_init_prot & VM_PROT_ZF) {
1749 /* zero-filled memory */
1750 map_port = MACH_PORT_NULL;
1751 } else {
1752 /* file-backed memory */
1753 __IGNORE_WCASTALIGN(map_port = (ipc_port_t) file_object->pager);
1754 }
1755
1756 /*
1757 * Remember which mappings need sliding.
1758 */
1759 if (mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1760 if (*mappings_to_slide_cnt == vmsr_num_slides) {
1761 SHARED_REGION_TRACE_INFO(
1762 ("shared_region: mapping[%d]: "
1763 "address:0x%016llx size:0x%016llx "
1764 "offset:0x%016llx "
1765 "maxprot:0x%x prot:0x%x "
1766 "too many mappings to slide...\n",
1767 i,
1768 (long long)mappings[i].sms_address,
1769 (long long)mappings[i].sms_size,
1770 (long long)mappings[i].sms_file_offset,
1771 mappings[i].sms_max_prot,
1772 mappings[i].sms_init_prot));
1773 } else {
1774 mappings_to_slide[*mappings_to_slide_cnt] = &mappings[i];
1775 *mappings_to_slide_cnt += 1;
1776 }
1777 }
1778
1779 /* mapping's address is relative to the shared region base */
1780 target_address = (vm_map_offset_t)(mappings[i].sms_address - sr_base_address);
1781
1782 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
1783 vmk_flags.vmkf_already = TRUE;
1784 /* no copy-on-read for mapped binaries */
1785 vmk_flags.vmkf_no_copy_on_read = 1;
1786 vmk_flags.vmf_permanent = shared_region_make_permanent(
1787 sr: shared_region,
1788 max_prot: mappings[i].sms_max_prot);
1789 vmk_flags.vmf_tpro = shared_region_tpro_protect(
1790 sr: shared_region,
1791 max_prot: mappings[i].sms_max_prot);
1792
1793 /* establish that mapping, OK if it's "already" there */
1794 if (map_port == MACH_PORT_NULL) {
1795 /*
1796 * We want to map some anonymous memory in a shared region.
1797 * We have to create the VM object now, so that it can be mapped "copy-on-write".
1798 */
1799 obj_size = vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map));
1800 object = vm_object_allocate(size: obj_size);
1801 if (object == VM_OBJECT_NULL) {
1802 kr = KERN_RESOURCE_SHORTAGE;
1803 } else {
1804 kr = vm_map_enter(
1805 map: sr_map,
1806 address: &target_address,
1807 vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1808 mask: 0,
1809 vmk_flags,
1810 object,
1811 offset: 0,
1812 TRUE,
1813 cur_protection: mappings[i].sms_init_prot & VM_PROT_ALL,
1814 max_protection: mappings[i].sms_max_prot & VM_PROT_ALL,
1815 VM_INHERIT_DEFAULT);
1816 }
1817 } else {
1818 object = VM_OBJECT_NULL; /* no anonymous memory here */
1819 kr = vm_map_enter_mem_object(
1820 map: sr_map,
1821 address: &target_address,
1822 vm_map_round_page(mappings[i].sms_size, VM_MAP_PAGE_MASK(sr_map)),
1823 mask: 0,
1824 vmk_flags,
1825 port: map_port,
1826 offset: mappings[i].sms_file_offset,
1827 TRUE,
1828 cur_protection: mappings[i].sms_init_prot & VM_PROT_ALL,
1829 max_protection: mappings[i].sms_max_prot & VM_PROT_ALL,
1830 VM_INHERIT_DEFAULT);
1831 }
1832
1833 if (kr == KERN_SUCCESS) {
1834 /*
1835 * Record the first successful mapping(s) in the shared
1836 * region by file. We're protected by "sr_mapping_in_progress"
1837 * here, so no need to lock "shared_region".
1838 *
1839 * Note that if we have an AOT shared cache (ARM) for a
1840 * translated task, then it's always the first file.
1841 * The original "native" (i.e. x86) shared cache is the
1842 * second file.
1843 */
1844
1845 if (shared_region->sr_first_mapping == (mach_vm_offset_t)-1) {
1846 shared_region->sr_first_mapping = target_address;
1847 }
1848
1849 if (*mappings_to_slide_cnt > 0 &&
1850 mappings_to_slide[*mappings_to_slide_cnt - 1] == &mappings[i]) {
1851 slid_mappings[*mappings_to_slide_cnt - 1] = target_address;
1852 slid_file_controls[*mappings_to_slide_cnt - 1] = file_control;
1853 }
1854
1855 /*
1856 * Record the lowest writable address in this
1857 * sub map, to log any unexpected unnesting below
1858 * that address (see log_unnest_badness()).
1859 */
1860 if ((mappings[i].sms_init_prot & VM_PROT_WRITE) &&
1861 sr_map->is_nested_map &&
1862 (lowest_unnestable_addr == 0 ||
1863 (target_address < lowest_unnestable_addr))) {
1864 lowest_unnestable_addr = target_address;
1865 }
1866 } else {
1867 if (map_port == MACH_PORT_NULL) {
1868 /*
1869 * Get rid of the VM object we just created
1870 * but failed to map.
1871 */
1872 vm_object_deallocate(object);
1873 object = VM_OBJECT_NULL;
1874 }
1875 if (kr == KERN_MEMORY_PRESENT) {
1876 /*
1877 * This exact mapping was already there:
1878 * that's fine.
1879 */
1880 SHARED_REGION_TRACE_INFO(
1881 ("shared_region: mapping[%d]: "
1882 "address:0x%016llx size:0x%016llx "
1883 "offset:0x%016llx "
1884 "maxprot:0x%x prot:0x%x "
1885 "already mapped...\n",
1886 i,
1887 (long long)mappings[i].sms_address,
1888 (long long)mappings[i].sms_size,
1889 (long long)mappings[i].sms_file_offset,
1890 mappings[i].sms_max_prot,
1891 mappings[i].sms_init_prot));
1892 /*
1893 * We didn't establish this mapping ourselves;
1894 * let's reset its size, so that we do not
1895 * attempt to undo it if an error occurs later.
1896 */
1897 mappings[i].sms_size = 0;
1898 kr = KERN_SUCCESS;
1899 } else {
1900 break;
1901 }
1902 }
1903 }
1904
1905 if (kr != KERN_SUCCESS) {
1906 break;
1907 }
1908 }
1909
1910 if (kr != KERN_SUCCESS) {
1911 /* the last mapping we tried (mappings[i]) failed ! */
1912 assert(i < mappings_count);
1913 SHARED_REGION_TRACE_ERROR(
1914 ("shared_region: mapping[%d]: "
1915 "address:0x%016llx size:0x%016llx "
1916 "offset:0x%016llx "
1917 "maxprot:0x%x prot:0x%x failed 0x%x\n",
1918 i,
1919 (long long)mappings[i].sms_address,
1920 (long long)mappings[i].sms_size,
1921 (long long)mappings[i].sms_file_offset,
1922 mappings[i].sms_max_prot,
1923 mappings[i].sms_init_prot,
1924 kr));
1925
1926 /*
1927 * Respect the design of vm_shared_region_undo_mappings
1928 * as we are holding the sr_mapping_in_progress here.
1929 * So don't allow sr_map == NULL otherwise vm_shared_region_undo_mappings
1930 * will be blocked at waiting sr_mapping_in_progress to be NULL.
1931 */
1932 assert(sr_map != NULL);
1933 /* undo all the previous mappings */
1934 vm_shared_region_undo_mappings(sr_map, sr_base_address, srf_mappings: sr_file_mappings, srf_mappings_current: srfmp, srf_current_mappings_count: i);
1935 return kr;
1936 }
1937
1938 *lowest_unnestable_addr_ptr = lowest_unnestable_addr;
1939 *sr_map_ptr = sr_map;
1940 return KERN_SUCCESS;
1941}
1942
1943/* forwared declaration */
1944__attribute__((noinline))
1945static void
1946vm_shared_region_map_file_final(
1947 vm_shared_region_t shared_region,
1948 vm_map_t sr_map,
1949 mach_vm_offset_t sfm_min_address,
1950 mach_vm_offset_t sfm_max_address);
1951
1952/*
1953 * Establish some mappings of a file in the shared region.
1954 * This is used by "dyld" via the shared_region_map_np() system call
1955 * to populate the shared region with the appropriate shared cache.
1956 *
1957 * One could also call it several times to incrementally load several
1958 * libraries, as long as they do not overlap.
1959 * It will return KERN_SUCCESS if the mappings were successfully established
1960 * or if they were already established identically by another process.
1961 */
1962__attribute__((noinline))
1963kern_return_t
1964vm_shared_region_map_file(
1965 vm_shared_region_t shared_region,
1966 int sr_file_mappings_count,
1967 struct _sr_file_mappings *sr_file_mappings)
1968{
1969 kern_return_t kr = KERN_SUCCESS;
1970 unsigned int i;
1971 unsigned int mappings_to_slide_cnt = 0;
1972 mach_vm_offset_t sfm_min_address = (mach_vm_offset_t)-1;
1973 mach_vm_offset_t sfm_max_address = 0;
1974 vm_map_t sr_map = NULL;
1975 vm_map_offset_t lowest_unnestable_addr = 0;
1976 unsigned int vmsr_num_slides = 0;
1977 typedef mach_vm_offset_t slid_mappings_t __kernel_data_semantics;
1978 slid_mappings_t *slid_mappings = NULL; /* [0..vmsr_num_slides] */
1979 memory_object_control_t *slid_file_controls = NULL; /* [0..vmsr_num_slides] */
1980 struct shared_file_mapping_slide_np **mappings_to_slide = NULL; /* [0..vmsr_num_slides] */
1981 struct _sr_file_mappings *srfmp;
1982
1983 /*
1984 * Figure out how many of the mappings have slides.
1985 */
1986 for (srfmp = &sr_file_mappings[0];
1987 srfmp < &sr_file_mappings[sr_file_mappings_count];
1988 srfmp++) {
1989 for (i = 0; i < srfmp->mappings_count; ++i) {
1990 if (srfmp->mappings[i].sms_max_prot & VM_PROT_SLIDE) {
1991 ++vmsr_num_slides;
1992 }
1993 }
1994 }
1995
1996 /* Allocate per slide data structures */
1997 if (vmsr_num_slides > 0) {
1998 slid_mappings =
1999 kalloc_data(vmsr_num_slides * sizeof(*slid_mappings), Z_WAITOK);
2000 slid_file_controls =
2001 kalloc_type(memory_object_control_t, vmsr_num_slides, Z_WAITOK);
2002 mappings_to_slide =
2003 kalloc_type(struct shared_file_mapping_slide_np *, vmsr_num_slides, Z_WAITOK | Z_ZERO);
2004 }
2005
2006 kr = vm_shared_region_map_file_setup(shared_region, sr_file_mappings_count, sr_file_mappings,
2007 mappings_to_slide_cnt: &mappings_to_slide_cnt, mappings_to_slide, slid_mappings, slid_file_controls,
2008 sfm_min_address: &sfm_min_address, sfm_max_address: &sfm_max_address, sr_map_ptr: &sr_map, lowest_unnestable_addr_ptr: &lowest_unnestable_addr, vmsr_num_slides);
2009 if (kr != KERN_SUCCESS) {
2010 vm_shared_region_lock();
2011 goto done;
2012 }
2013 assert(vmsr_num_slides == mappings_to_slide_cnt);
2014
2015 /*
2016 * The call above installed direct mappings to the shared cache file.
2017 * Now we go back and overwrite the mappings that need relocation
2018 * with a special shared region pager.
2019 *
2020 * Note that this does copyin() of data, needed by the pager, which
2021 * the previous code just established mappings for. This is why we
2022 * do it in a separate pass.
2023 */
2024#if __has_feature(ptrauth_calls)
2025 /*
2026 * need to allocate storage needed for any sr_auth_sections
2027 */
2028 for (i = 0; i < mappings_to_slide_cnt; ++i) {
2029 if (shared_region->sr_cpu_type == CPU_TYPE_ARM64 &&
2030 shared_region->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2031 !(mappings_to_slide[i]->sms_max_prot & VM_PROT_NOAUTH)) {
2032 ++shared_region->sr_num_auth_section;
2033 }
2034 }
2035 if (shared_region->sr_num_auth_section > 0) {
2036 shared_region->sr_auth_section =
2037 kalloc_type(vm_shared_region_slide_info_t, shared_region->sr_num_auth_section,
2038 Z_WAITOK | Z_ZERO);
2039 }
2040#endif /* __has_feature(ptrauth_calls) */
2041 for (i = 0; i < mappings_to_slide_cnt; ++i) {
2042 kr = vm_shared_region_slide(shared_region->sr_slide,
2043 mappings_to_slide[i]->sms_file_offset,
2044 mappings_to_slide[i]->sms_size,
2045 mappings_to_slide[i]->sms_slide_start,
2046 mappings_to_slide[i]->sms_slide_size,
2047 slid_mappings[i],
2048 slid_file_controls[i],
2049 mappings_to_slide[i]->sms_max_prot);
2050 if (kr != KERN_SUCCESS) {
2051 SHARED_REGION_TRACE_ERROR(
2052 ("shared_region: region_slide("
2053 "slide:0x%x start:0x%016llx "
2054 "size:0x%016llx) failed 0x%x\n",
2055 shared_region->sr_slide,
2056 (long long)mappings_to_slide[i]->sms_slide_start,
2057 (long long)mappings_to_slide[i]->sms_slide_size,
2058 kr));
2059 vm_shared_region_undo_mappings(sr_map, sr_base_address: shared_region->sr_base_address,
2060 srf_mappings: &sr_file_mappings[0],
2061 srf_mappings_current: &sr_file_mappings[sr_file_mappings_count - 1],
2062 srf_current_mappings_count: sr_file_mappings_count);
2063 vm_shared_region_lock();
2064 goto done;
2065 }
2066 }
2067
2068 assert(kr == KERN_SUCCESS);
2069
2070 /* adjust the map's "lowest_unnestable_start" */
2071 lowest_unnestable_addr &= ~(pmap_shared_region_size_min(map: sr_map->pmap) - 1);
2072 if (lowest_unnestable_addr != sr_map->lowest_unnestable_start) {
2073 vm_map_lock(sr_map);
2074 sr_map->lowest_unnestable_start = lowest_unnestable_addr;
2075 vm_map_unlock(sr_map);
2076 }
2077
2078 vm_shared_region_lock();
2079 assert(shared_region->sr_ref_count > 0);
2080 assert(shared_region->sr_mapping_in_progress == current_thread());
2081
2082 vm_shared_region_map_file_final(shared_region, sr_map, sfm_min_address, sfm_max_address);
2083
2084done:
2085 /*
2086 * We're done working on that shared region.
2087 * Wake up any waiting threads.
2088 */
2089 assert(shared_region->sr_mapping_in_progress == current_thread());
2090 shared_region->sr_mapping_in_progress = THREAD_NULL;
2091 vm_shared_region_wakeup((event_t) &shared_region->sr_mapping_in_progress);
2092 vm_shared_region_unlock();
2093
2094#if __has_feature(ptrauth_calls)
2095 if (kr == KERN_SUCCESS) {
2096 /*
2097 * Since authenticated mappings were just added to the shared region,
2098 * go back and remap them into private mappings for this task.
2099 */
2100 kr = vm_shared_region_auth_remap(shared_region);
2101 }
2102#endif /* __has_feature(ptrauth_calls) */
2103
2104 /* Cache shared region info needed for telemetry in the task */
2105 task_t task;
2106 if (kr == KERN_SUCCESS && (task = current_task())->task_shared_region_slide == -1) {
2107 mach_vm_offset_t start_address;
2108 (void)vm_shared_region_start_address(shared_region, start_address: &start_address, task);
2109 }
2110
2111 SHARED_REGION_TRACE_DEBUG(
2112 ("shared_region: map(%p) <- 0x%x \n",
2113 (void *)VM_KERNEL_ADDRPERM(shared_region), kr));
2114 if (vmsr_num_slides > 0) {
2115 kfree_data(slid_mappings, vmsr_num_slides * sizeof(*slid_mappings));
2116 kfree_type(memory_object_control_t, vmsr_num_slides, slid_file_controls);
2117 kfree_type(struct shared_file_mapping_slide_np *, vmsr_num_slides,
2118 mappings_to_slide);
2119 }
2120 return kr;
2121}
2122
2123/*
2124 * Final part of vm_shared_region_map_file().
2125 * Kept in separate function to avoid blowing out the stack.
2126 */
2127__attribute__((noinline))
2128static void
2129vm_shared_region_map_file_final(
2130 vm_shared_region_t shared_region,
2131 vm_map_t sr_map __unused,
2132 mach_vm_offset_t sfm_min_address __unused,
2133 mach_vm_offset_t sfm_max_address __unused)
2134{
2135 struct _dyld_cache_header sr_cache_header;
2136 int error;
2137 size_t image_array_length;
2138 struct _dyld_cache_image_text_info *sr_image_layout;
2139 boolean_t locally_built = FALSE;
2140
2141
2142 /*
2143 * copy in the shared region UUID to the shared region structure.
2144 * we do this indirectly by first copying in the shared cache header
2145 * and then copying the UUID from there because we'll need to look
2146 * at other content from the shared cache header.
2147 */
2148 if (!shared_region->sr_uuid_copied) {
2149 error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping),
2150 (char *)&sr_cache_header,
2151 sizeof(sr_cache_header));
2152 if (error == 0) {
2153 memcpy(dst: &shared_region->sr_uuid, src: &sr_cache_header.uuid, n: sizeof(shared_region->sr_uuid));
2154 shared_region->sr_uuid_copied = TRUE;
2155 locally_built = sr_cache_header.locallyBuiltCache;
2156 } else {
2157#if DEVELOPMENT || DEBUG
2158 panic("shared_region: copyin shared_cache_header(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2159 "offset:0 size:0x%016llx) failed with %d\n",
2160 (long long)shared_region->sr_base_address,
2161 (long long)shared_region->sr_first_mapping,
2162 (long long)sizeof(sr_cache_header),
2163 error);
2164#endif /* DEVELOPMENT || DEBUG */
2165 shared_region->sr_uuid_copied = FALSE;
2166 }
2167 }
2168
2169 /*
2170 * We save a pointer to the shared cache mapped by the "init task", i.e. launchd. This is used by
2171 * the stackshot code to reduce output size in the common case that everything maps the same shared cache.
2172 * One gotcha is that "userspace reboots" can occur which can cause a new shared region to be the primary
2173 * region. In that case, launchd re-exec's itself, so we may go through this path multiple times. We
2174 * let the most recent one win.
2175 *
2176 * Check whether the shared cache is a custom built one and copy in the shared cache layout accordingly.
2177 */
2178 bool is_init_task = (task_pid(task: current_task()) == 1);
2179 if (shared_region->sr_uuid_copied && is_init_task) {
2180 /* Copy in the shared cache layout if we're running with a locally built shared cache */
2181 if (locally_built) {
2182 KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_START);
2183 image_array_length = (size_t)(sr_cache_header.imagesTextCount * sizeof(struct _dyld_cache_image_text_info));
2184 sr_image_layout = kalloc_data(image_array_length, Z_WAITOK);
2185 error = copyin((user_addr_t)(shared_region->sr_base_address + shared_region->sr_first_mapping +
2186 sr_cache_header.imagesTextOffset), (char *)sr_image_layout, image_array_length);
2187 if (error == 0) {
2188 if (sr_cache_header.imagesTextCount >= UINT32_MAX) {
2189 panic("shared_region: sr_cache_header.imagesTextCount >= UINT32_MAX");
2190 }
2191 shared_region->sr_images = kalloc_data((vm_size_t)(sr_cache_header.imagesTextCount * sizeof(struct dyld_uuid_info_64)), Z_WAITOK);
2192 for (size_t index = 0; index < sr_cache_header.imagesTextCount; index++) {
2193 memcpy(dst: (char *)&shared_region->sr_images[index].imageUUID, src: (char *)&sr_image_layout[index].uuid,
2194 n: sizeof(shared_region->sr_images[index].imageUUID));
2195 shared_region->sr_images[index].imageLoadAddress = sr_image_layout[index].loadAddress;
2196 }
2197
2198 shared_region->sr_images_count = (uint32_t) sr_cache_header.imagesTextCount;
2199 } else {
2200#if DEVELOPMENT || DEBUG
2201 panic("shared_region: copyin shared_cache_layout(sr_base_addr:0x%016llx sr_first_mapping:0x%016llx "
2202 "offset:0x%016llx size:0x%016llx) failed with %d\n",
2203 (long long)shared_region->sr_base_address,
2204 (long long)shared_region->sr_first_mapping,
2205 (long long)sr_cache_header.imagesTextOffset,
2206 (long long)image_array_length,
2207 error);
2208#endif /* DEVELOPMENT || DEBUG */
2209 }
2210 KDBG((MACHDBG_CODE(DBG_MACH_SHAREDREGION, PROCESS_SHARED_CACHE_LAYOUT)) | DBG_FUNC_END, shared_region->sr_images_count);
2211 kfree_data(sr_image_layout, image_array_length);
2212 sr_image_layout = NULL;
2213 }
2214 primary_system_shared_region = shared_region;
2215 }
2216
2217 /*
2218 * If we succeeded, we know the bounds of the shared region.
2219 * Trim our pmaps to only cover this range (if applicable to
2220 * this platform).
2221 */
2222 if (VM_MAP_PAGE_SHIFT(current_map()) == VM_MAP_PAGE_SHIFT(map: sr_map)) {
2223 pmap_trim(current_map()->pmap, subord: sr_map->pmap, vstart: sfm_min_address, size: sfm_max_address - sfm_min_address);
2224 }
2225}
2226
2227/*
2228 * Retrieve a task's shared region and grab an extra reference to
2229 * make sure it doesn't disappear while the caller is using it.
2230 * The caller is responsible for consuming that extra reference if
2231 * necessary.
2232 *
2233 * This also tries to trim the pmap for the shared region.
2234 */
2235vm_shared_region_t
2236vm_shared_region_trim_and_get(task_t task)
2237{
2238 vm_shared_region_t shared_region;
2239 ipc_port_t sr_handle;
2240 vm_named_entry_t sr_mem_entry;
2241 vm_map_t sr_map;
2242
2243 /* Get the shared region and the map. */
2244 shared_region = vm_shared_region_get(task);
2245 if (shared_region == NULL) {
2246 return NULL;
2247 }
2248
2249 sr_handle = shared_region->sr_mem_entry;
2250 sr_mem_entry = mach_memory_entry_from_port(port: sr_handle);
2251 sr_map = sr_mem_entry->backing.map;
2252
2253 /* Trim the pmap if possible. */
2254 if (VM_MAP_PAGE_SHIFT(map: task->map) == VM_MAP_PAGE_SHIFT(map: sr_map)) {
2255 pmap_trim(grand: task->map->pmap, subord: sr_map->pmap, vstart: 0, size: 0);
2256 }
2257
2258 return shared_region;
2259}
2260
2261/*
2262 * Enter the appropriate shared region into "map" for "task".
2263 * This involves looking up the shared region (and possibly creating a new
2264 * one) for the desired environment, then mapping the VM sub map into the
2265 * task's VM "map", with the appropriate level of pmap-nesting.
2266 */
2267kern_return_t
2268vm_shared_region_enter(
2269 struct _vm_map *map,
2270 struct task *task,
2271 boolean_t is_64bit,
2272 void *fsroot,
2273 cpu_type_t cpu,
2274 cpu_subtype_t cpu_subtype,
2275 boolean_t reslide,
2276 boolean_t is_driverkit,
2277 uint32_t rsr_version)
2278{
2279 kern_return_t kr;
2280 vm_shared_region_t shared_region;
2281 vm_map_offset_t sr_address, sr_offset, target_address;
2282 vm_map_size_t sr_size, mapping_size;
2283 vm_map_offset_t sr_pmap_nesting_start;
2284 vm_map_size_t sr_pmap_nesting_size;
2285 ipc_port_t sr_handle;
2286 vm_prot_t cur_prot, max_prot;
2287 vm_map_kernel_flags_t vmk_flags;
2288
2289 SHARED_REGION_TRACE_DEBUG(
2290 ("shared_region: -> "
2291 "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,driverkit=%d)\n",
2292 (void *)VM_KERNEL_ADDRPERM(map),
2293 (void *)VM_KERNEL_ADDRPERM(task),
2294 (void *)VM_KERNEL_ADDRPERM(fsroot),
2295 cpu, cpu_subtype, is_64bit, is_driverkit));
2296
2297 /* lookup (create if needed) the shared region for this environment */
2298 shared_region = vm_shared_region_lookup(root_dir: fsroot, cputype: cpu, cpu_subtype, is_64bit, target_page_shift: VM_MAP_PAGE_SHIFT(map), reslide, is_driverkit, rsr_version);
2299 if (shared_region == NULL) {
2300 /* this should not happen ! */
2301 SHARED_REGION_TRACE_ERROR(
2302 ("shared_region: -> "
2303 "enter(map=%p,task=%p,root=%p,cpu=<%d,%d>,64bit=%d,reslide=%d,driverkit=%d): "
2304 "lookup failed !\n",
2305 (void *)VM_KERNEL_ADDRPERM(map),
2306 (void *)VM_KERNEL_ADDRPERM(task),
2307 (void *)VM_KERNEL_ADDRPERM(fsroot),
2308 cpu, cpu_subtype, is_64bit, reslide, is_driverkit));
2309 //panic("shared_region_enter: lookup failed");
2310 return KERN_FAILURE;
2311 }
2312
2313 kr = KERN_SUCCESS;
2314 /* no need to lock since this data is never modified */
2315 sr_address = (vm_map_offset_t)shared_region->sr_base_address;
2316 sr_size = (vm_map_size_t)shared_region->sr_size;
2317 sr_handle = shared_region->sr_mem_entry;
2318 sr_pmap_nesting_start = (vm_map_offset_t)shared_region->sr_pmap_nesting_start;
2319 sr_pmap_nesting_size = (vm_map_size_t)shared_region->sr_pmap_nesting_size;
2320 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
2321
2322 cur_prot = VM_PROT_READ;
2323 if (VM_MAP_POLICY_WRITABLE_SHARED_REGION(map)) {
2324 /*
2325 * XXX BINARY COMPATIBILITY
2326 * java6 apparently needs to modify some code in the
2327 * dyld shared cache and needs to be allowed to add
2328 * write access...
2329 */
2330 max_prot = VM_PROT_ALL;
2331 } else {
2332 max_prot = VM_PROT_READ;
2333 /* make it "permanent" to protect against re-mappings */
2334 vmk_flags.vmf_permanent = true;
2335 }
2336
2337 /*
2338 * Start mapping the shared region's VM sub map into the task's VM map.
2339 */
2340 sr_offset = 0;
2341
2342 if (sr_pmap_nesting_start > sr_address) {
2343 /* we need to map a range without pmap-nesting first */
2344 target_address = sr_address;
2345 mapping_size = sr_pmap_nesting_start - sr_address;
2346 kr = vm_map_enter_mem_object(
2347 map,
2348 address: &target_address,
2349 size: mapping_size,
2350 mask: 0,
2351 vmk_flags,
2352 port: sr_handle,
2353 offset: sr_offset,
2354 TRUE,
2355 cur_protection: cur_prot,
2356 max_protection: max_prot,
2357 VM_INHERIT_SHARE);
2358 if (kr != KERN_SUCCESS) {
2359 SHARED_REGION_TRACE_ERROR(
2360 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2361 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2362 (void *)VM_KERNEL_ADDRPERM(map),
2363 (void *)VM_KERNEL_ADDRPERM(task),
2364 (void *)VM_KERNEL_ADDRPERM(fsroot),
2365 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2366 (long long)target_address,
2367 (long long)mapping_size,
2368 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2369 goto done;
2370 }
2371 SHARED_REGION_TRACE_DEBUG(
2372 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2373 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2374 (void *)VM_KERNEL_ADDRPERM(map),
2375 (void *)VM_KERNEL_ADDRPERM(task),
2376 (void *)VM_KERNEL_ADDRPERM(fsroot),
2377 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2378 (long long)target_address, (long long)mapping_size,
2379 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2380 sr_offset += mapping_size;
2381 sr_size -= mapping_size;
2382 }
2383
2384 /* The pmap-nesting is triggered by the "vmkf_nested_pmap" flag. */
2385 vmk_flags.vmkf_nested_pmap = true;
2386 vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP;
2387
2388 /*
2389 * Use pmap-nesting to map the majority of the shared region into the task's
2390 * VM space. Very rarely will architectures have a shared region that isn't
2391 * the same size as the pmap-nesting region, or start at a different address
2392 * than the pmap-nesting region, so this code will map the entirety of the
2393 * shared region for most architectures.
2394 */
2395 assert((sr_address + sr_offset) == sr_pmap_nesting_start);
2396 target_address = sr_pmap_nesting_start;
2397 kr = vm_map_enter_mem_object(
2398 map,
2399 address: &target_address,
2400 size: sr_pmap_nesting_size,
2401 mask: 0,
2402 vmk_flags,
2403 port: sr_handle,
2404 offset: sr_offset,
2405 TRUE,
2406 cur_protection: cur_prot,
2407 max_protection: max_prot,
2408 VM_INHERIT_SHARE);
2409 if (kr != KERN_SUCCESS) {
2410 SHARED_REGION_TRACE_ERROR(
2411 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2412 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2413 (void *)VM_KERNEL_ADDRPERM(map),
2414 (void *)VM_KERNEL_ADDRPERM(task),
2415 (void *)VM_KERNEL_ADDRPERM(fsroot),
2416 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2417 (long long)target_address,
2418 (long long)sr_pmap_nesting_size,
2419 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2420 goto done;
2421 }
2422 SHARED_REGION_TRACE_DEBUG(
2423 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2424 "nested vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2425 (void *)VM_KERNEL_ADDRPERM(map),
2426 (void *)VM_KERNEL_ADDRPERM(task),
2427 (void *)VM_KERNEL_ADDRPERM(fsroot),
2428 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2429 (long long)target_address, (long long)sr_pmap_nesting_size,
2430 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2431
2432 sr_offset += sr_pmap_nesting_size;
2433 sr_size -= sr_pmap_nesting_size;
2434
2435 if (sr_size > 0) {
2436 /* and there's some left to be mapped without pmap-nesting */
2437 vmk_flags.vmkf_nested_pmap = false; /* no pmap nesting */
2438 target_address = sr_address + sr_offset;
2439 mapping_size = sr_size;
2440 kr = vm_map_enter_mem_object(
2441 map,
2442 address: &target_address,
2443 size: mapping_size,
2444 mask: 0,
2445 VM_MAP_KERNEL_FLAGS_FIXED(),
2446 port: sr_handle,
2447 offset: sr_offset,
2448 TRUE,
2449 cur_protection: cur_prot,
2450 max_protection: max_prot,
2451 VM_INHERIT_SHARE);
2452 if (kr != KERN_SUCCESS) {
2453 SHARED_REGION_TRACE_ERROR(
2454 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2455 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2456 (void *)VM_KERNEL_ADDRPERM(map),
2457 (void *)VM_KERNEL_ADDRPERM(task),
2458 (void *)VM_KERNEL_ADDRPERM(fsroot),
2459 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2460 (long long)target_address,
2461 (long long)mapping_size,
2462 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2463 goto done;
2464 }
2465 SHARED_REGION_TRACE_DEBUG(
2466 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d): "
2467 "vm_map_enter(0x%llx,0x%llx,%p) error 0x%x\n",
2468 (void *)VM_KERNEL_ADDRPERM(map),
2469 (void *)VM_KERNEL_ADDRPERM(task),
2470 (void *)VM_KERNEL_ADDRPERM(fsroot),
2471 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2472 (long long)target_address, (long long)mapping_size,
2473 (void *)VM_KERNEL_ADDRPERM(sr_handle), kr));
2474 sr_offset += mapping_size;
2475 sr_size -= mapping_size;
2476 }
2477 assert(sr_size == 0);
2478
2479done:
2480 if (kr == KERN_SUCCESS) {
2481 /* let the task use that shared region */
2482 vm_shared_region_set(task, new_shared_region: shared_region);
2483 } else {
2484 /* drop our reference since we're not using it */
2485 vm_shared_region_deallocate(shared_region);
2486 vm_shared_region_set(task, NULL);
2487 }
2488
2489 SHARED_REGION_TRACE_DEBUG(
2490 ("shared_region: enter(%p,%p,%p,%d,%d,%d,%d,%d) <- 0x%x\n",
2491 (void *)VM_KERNEL_ADDRPERM(map),
2492 (void *)VM_KERNEL_ADDRPERM(task),
2493 (void *)VM_KERNEL_ADDRPERM(fsroot),
2494 cpu, cpu_subtype, is_64bit, reslide, is_driverkit,
2495 kr));
2496 return kr;
2497}
2498
2499void
2500vm_shared_region_remove(
2501 task_t task,
2502 vm_shared_region_t sr)
2503{
2504 vm_map_t map;
2505 mach_vm_offset_t start;
2506 mach_vm_size_t size;
2507 vm_map_kernel_flags_t vmk_flags;
2508 kern_return_t kr;
2509
2510 if (sr == NULL) {
2511 return;
2512 }
2513 map = get_task_map(task);
2514 start = sr->sr_base_address;
2515 size = sr->sr_size;
2516
2517 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
2518 vmk_flags.vmkf_overwrite_immutable = true;
2519 vmk_flags.vm_tag = VM_MEMORY_DYLD;
2520
2521 /* range_id is set by mach_vm_map_kernel */
2522 kr = mach_vm_map_kernel(target_map: map,
2523 address: &start,
2524 initial_size: size,
2525 mask: 0, /* mask */
2526 vmk_flags,
2527 MACH_PORT_NULL,
2528 offset: 0,
2529 FALSE, /* copy */
2530 VM_PROT_NONE,
2531 VM_PROT_NONE,
2532 VM_INHERIT_DEFAULT);
2533 if (kr != KERN_SUCCESS) {
2534 printf(format: "%s:%d vm_map(0x%llx, 0x%llx) error %d\n", __FUNCTION__, __LINE__, (uint64_t)sr->sr_base_address, (uint64_t)size, kr);
2535 }
2536}
2537
2538#define SANE_SLIDE_INFO_SIZE (2560*1024) /*Can be changed if needed*/
2539
2540kern_return_t
2541vm_shared_region_sliding_valid(uint32_t slide)
2542{
2543 kern_return_t kr = KERN_SUCCESS;
2544 vm_shared_region_t sr = vm_shared_region_get(task: current_task());
2545
2546 /* No region yet? we're fine. */
2547 if (sr == NULL) {
2548 return kr;
2549 }
2550
2551 if (sr->sr_slide != 0 && slide != 0) {
2552 if (slide == sr->sr_slide) {
2553 /*
2554 * Request for sliding when we've
2555 * already done it with exactly the
2556 * same slide value before.
2557 * This isn't wrong technically but
2558 * we don't want to slide again and
2559 * so we return this value.
2560 */
2561 kr = KERN_INVALID_ARGUMENT;
2562 } else {
2563 printf(format: "Mismatched shared region slide\n");
2564 kr = KERN_FAILURE;
2565 }
2566 }
2567 vm_shared_region_deallocate(shared_region: sr);
2568 return kr;
2569}
2570
2571/*
2572 * Actually create (really overwrite) the mapping to part of the shared cache which
2573 * undergoes relocation. This routine reads in the relocation info from dyld and
2574 * verifies it. It then creates a (or finds a matching) shared region pager which
2575 * handles the actual modification of the page contents and installs the mapping
2576 * using that pager.
2577 */
2578kern_return_t
2579vm_shared_region_slide_mapping(
2580 vm_shared_region_t sr,
2581 user_addr_t slide_info_addr,
2582 mach_vm_size_t slide_info_size,
2583 mach_vm_offset_t start,
2584 mach_vm_size_t size,
2585 mach_vm_offset_t slid_mapping,
2586 uint32_t slide,
2587 memory_object_control_t sr_file_control,
2588 vm_prot_t prot)
2589{
2590 kern_return_t kr;
2591 vm_object_t object = VM_OBJECT_NULL;
2592 vm_shared_region_slide_info_t si = NULL;
2593 vm_map_entry_t tmp_entry = VM_MAP_ENTRY_NULL;
2594 struct vm_map_entry tmp_entry_store;
2595 memory_object_t sr_pager = MEMORY_OBJECT_NULL;
2596 vm_map_t sr_map;
2597 vm_map_kernel_flags_t vmk_flags;
2598 vm_map_offset_t map_addr;
2599 void *slide_info_entry = NULL;
2600 int error;
2601
2602 assert(sr->sr_slide_in_progress);
2603
2604 if (sr_file_control == MEMORY_OBJECT_CONTROL_NULL) {
2605 return KERN_INVALID_ARGUMENT;
2606 }
2607
2608 /*
2609 * Copy in and verify the relocation information.
2610 */
2611 if (slide_info_size < MIN_SLIDE_INFO_SIZE) {
2612 printf(format: "Slide_info_size too small: %lx\n", (uintptr_t)slide_info_size);
2613 return KERN_FAILURE;
2614 }
2615 if (slide_info_size > SANE_SLIDE_INFO_SIZE) {
2616 printf(format: "Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size);
2617 return KERN_FAILURE;
2618 }
2619
2620 slide_info_entry = kalloc_data((vm_size_t)slide_info_size, Z_WAITOK);
2621 if (slide_info_entry == NULL) {
2622 return KERN_RESOURCE_SHORTAGE;
2623 }
2624 error = copyin(slide_info_addr, slide_info_entry, (size_t)slide_info_size);
2625 if (error) {
2626 printf(format: "copyin of slide_info failed\n");
2627 kr = KERN_INVALID_ADDRESS;
2628 goto done;
2629 }
2630
2631 if ((kr = vm_shared_region_slide_sanity_check(entry: slide_info_entry, size: slide_info_size)) != KERN_SUCCESS) {
2632 printf(format: "Sanity Check failed for slide_info\n");
2633 goto done;
2634 }
2635
2636 /*
2637 * Allocate and fill in a vm_shared_region_slide_info.
2638 * This will either be used by a new pager, or used to find
2639 * a pre-existing matching pager.
2640 */
2641 object = memory_object_control_to_vm_object(control: sr_file_control);
2642 if (object == VM_OBJECT_NULL || object->internal) {
2643 object = VM_OBJECT_NULL;
2644 kr = KERN_INVALID_ADDRESS;
2645 goto done;
2646 }
2647
2648 si = kalloc_type(struct vm_shared_region_slide_info,
2649 Z_WAITOK | Z_NOFAIL);
2650 vm_object_lock(object);
2651
2652 vm_object_reference_locked(object); /* for si->slide_object */
2653 object->object_is_shared_cache = TRUE;
2654 vm_object_unlock(object);
2655
2656 si->si_slide_info_entry = slide_info_entry;
2657 si->si_slide_info_size = slide_info_size;
2658
2659 assert(slid_mapping != (mach_vm_offset_t) -1);
2660 si->si_slid_address = slid_mapping + sr->sr_base_address;
2661 si->si_slide_object = object;
2662 si->si_start = start;
2663 si->si_end = si->si_start + size;
2664 si->si_slide = slide;
2665#if __has_feature(ptrauth_calls)
2666 /*
2667 * If there is authenticated pointer data in this slid mapping,
2668 * then just add the information needed to create new pagers for
2669 * different shared_region_id's later.
2670 */
2671 if (sr->sr_cpu_type == CPU_TYPE_ARM64 &&
2672 sr->sr_cpu_subtype == CPU_SUBTYPE_ARM64E &&
2673 !(prot & VM_PROT_NOAUTH)) {
2674 if (sr->sr_next_auth_section == sr->sr_num_auth_section) {
2675 printf("Too many auth/private sections for shared region!!\n");
2676 kr = KERN_INVALID_ARGUMENT;
2677 goto done;
2678 }
2679 si->si_ptrauth = TRUE;
2680 sr->sr_auth_section[sr->sr_next_auth_section++] = si;
2681 /*
2682 * Remember the shared region, since that's where we'll
2683 * stash this info for all auth pagers to share. Each pager
2684 * will need to take a reference to it.
2685 */
2686 si->si_shared_region = sr;
2687 kr = KERN_SUCCESS;
2688 goto done;
2689 }
2690 si->si_shared_region = NULL;
2691 si->si_ptrauth = FALSE;
2692#endif /* __has_feature(ptrauth_calls) */
2693
2694 /*
2695 * find the pre-existing shared region's map entry to slide
2696 */
2697 sr_map = vm_shared_region_vm_map(shared_region: sr);
2698 kr = find_mapping_to_slide(map: sr_map, addr: (vm_map_address_t)slid_mapping, entry: &tmp_entry_store);
2699 if (kr != KERN_SUCCESS) {
2700 goto done;
2701 }
2702 tmp_entry = &tmp_entry_store;
2703
2704 /*
2705 * The object must exactly cover the region to slide.
2706 */
2707 assert(VME_OFFSET(tmp_entry) == start);
2708 assert(tmp_entry->vme_end - tmp_entry->vme_start == size);
2709
2710 /* create a "shared_region" sliding pager */
2711 sr_pager = shared_region_pager_setup(VME_OBJECT(tmp_entry), backing_offset: VME_OFFSET(entry: tmp_entry), slide_info: si, jop_key: 0);
2712 if (sr_pager == MEMORY_OBJECT_NULL) {
2713 kr = KERN_RESOURCE_SHORTAGE;
2714 goto done;
2715 }
2716
2717#if CONFIG_SECLUDED_MEMORY
2718 /*
2719 * The shared region pagers used by camera or DEXT should have
2720 * pagers that won't go on the secluded queue.
2721 */
2722 if (primary_system_shared_region == NULL ||
2723 primary_system_shared_region == sr ||
2724 sr->sr_driverkit) {
2725 memory_object_mark_eligible_for_secluded(sr_pager->mo_control, FALSE);
2726 }
2727#endif /* CONFIG_SECLUDED_MEMORY */
2728
2729 /* map that pager over the portion of the mapping that needs sliding */
2730 map_addr = tmp_entry->vme_start;
2731 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED(.vmf_overwrite = true);
2732 vmk_flags.vmkf_overwrite_immutable = true;
2733 vmk_flags.vmf_permanent = shared_region_make_permanent(sr,
2734 max_prot: tmp_entry->max_protection);
2735 vmk_flags.vmf_tpro = shared_region_tpro_protect(sr,
2736 max_prot: prot);
2737 kr = vm_map_enter_mem_object(map: sr_map,
2738 address: &map_addr,
2739 size: (tmp_entry->vme_end - tmp_entry->vme_start),
2740 mask: (mach_vm_offset_t) 0,
2741 vmk_flags,
2742 port: (ipc_port_t)(uintptr_t) sr_pager,
2743 offset: 0,
2744 TRUE,
2745 cur_protection: tmp_entry->protection,
2746 max_protection: tmp_entry->max_protection,
2747 inheritance: tmp_entry->inheritance);
2748 assertf(kr == KERN_SUCCESS, "kr = 0x%x\n", kr);
2749 assertf(map_addr == tmp_entry->vme_start,
2750 "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
2751 (uint64_t)map_addr,
2752 (uint64_t) tmp_entry->vme_start,
2753 tmp_entry);
2754
2755 /* success! */
2756 kr = KERN_SUCCESS;
2757
2758done:
2759 if (sr_pager != NULL) {
2760 /*
2761 * Release the sr_pager reference obtained by shared_region_pager_setup().
2762 * The mapping, if it succeeded, is now holding a reference on the memory object.
2763 */
2764 memory_object_deallocate(object: sr_pager);
2765 sr_pager = MEMORY_OBJECT_NULL;
2766 }
2767 if (tmp_entry != NULL) {
2768 /* release extra ref on tmp_entry's VM object */
2769 vm_object_deallocate(VME_OBJECT(tmp_entry));
2770 tmp_entry = VM_MAP_ENTRY_NULL;
2771 }
2772
2773 if (kr != KERN_SUCCESS) {
2774 /* cleanup */
2775 if (si != NULL) {
2776 if (si->si_slide_object) {
2777 vm_object_deallocate(object: si->si_slide_object);
2778 si->si_slide_object = VM_OBJECT_NULL;
2779 }
2780 kfree_type(struct vm_shared_region_slide_info, si);
2781 si = NULL;
2782 }
2783 if (slide_info_entry != NULL) {
2784 kfree_data(slide_info_entry, (vm_size_t)slide_info_size);
2785 slide_info_entry = NULL;
2786 }
2787 }
2788 return kr;
2789}
2790
2791static kern_return_t
2792vm_shared_region_slide_sanity_check_v1(
2793 vm_shared_region_slide_info_entry_v1_t s_info)
2794{
2795 uint32_t pageIndex = 0;
2796 uint16_t entryIndex = 0;
2797 uint16_t *toc = NULL;
2798
2799 toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2800 for (; pageIndex < s_info->toc_count; pageIndex++) {
2801 entryIndex = (uint16_t)(toc[pageIndex]);
2802
2803 if (entryIndex >= s_info->entry_count) {
2804 printf(format: "No sliding bitmap entry for pageIndex: %d at entryIndex: %d amongst %d entries\n", pageIndex, entryIndex, s_info->entry_count);
2805 return KERN_FAILURE;
2806 }
2807 }
2808 return KERN_SUCCESS;
2809}
2810
2811static kern_return_t
2812vm_shared_region_slide_sanity_check_v2(
2813 vm_shared_region_slide_info_entry_v2_t s_info,
2814 mach_vm_size_t slide_info_size)
2815{
2816 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v2)) {
2817 printf(format: "%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2818 return KERN_FAILURE;
2819 }
2820 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2821 return KERN_FAILURE;
2822 }
2823
2824 /* Ensure that the slide info doesn't reference any data outside of its bounds. */
2825
2826 uint32_t page_starts_count = s_info->page_starts_count;
2827 uint32_t page_extras_count = s_info->page_extras_count;
2828 mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2829 if (num_trailing_entries < page_starts_count) {
2830 return KERN_FAILURE;
2831 }
2832
2833 /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2834 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2835 if (trailing_size >> 1 != num_trailing_entries) {
2836 return KERN_FAILURE;
2837 }
2838
2839 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2840 if (required_size < sizeof(*s_info)) {
2841 return KERN_FAILURE;
2842 }
2843
2844 if (required_size > slide_info_size) {
2845 return KERN_FAILURE;
2846 }
2847
2848 return KERN_SUCCESS;
2849}
2850
2851static kern_return_t
2852vm_shared_region_slide_sanity_check_v3(
2853 vm_shared_region_slide_info_entry_v3_t s_info,
2854 mach_vm_size_t slide_info_size)
2855{
2856 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v3)) {
2857 printf(format: "%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2858 return KERN_FAILURE;
2859 }
2860 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2861 printf(format: "vm_shared_region_slide_sanity_check_v3: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n", (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE);
2862 return KERN_FAILURE;
2863 }
2864
2865 uint32_t page_starts_count = s_info->page_starts_count;
2866 mach_vm_size_t num_trailing_entries = page_starts_count;
2867 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2868 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2869 if (required_size < sizeof(*s_info)) {
2870 printf(format: "vm_shared_region_slide_sanity_check_v3: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)sizeof(*s_info));
2871 return KERN_FAILURE;
2872 }
2873
2874 if (required_size > slide_info_size) {
2875 printf(format: "vm_shared_region_slide_sanity_check_v3: required_size != slide_info_size 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)slide_info_size);
2876 return KERN_FAILURE;
2877 }
2878
2879 return KERN_SUCCESS;
2880}
2881
2882static kern_return_t
2883vm_shared_region_slide_sanity_check_v4(
2884 vm_shared_region_slide_info_entry_v4_t s_info,
2885 mach_vm_size_t slide_info_size)
2886{
2887 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v4)) {
2888 printf(format: "%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2889 return KERN_FAILURE;
2890 }
2891 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE) {
2892 return KERN_FAILURE;
2893 }
2894
2895 /* Ensure that the slide info doesn't reference any data outside of its bounds. */
2896
2897 uint32_t page_starts_count = s_info->page_starts_count;
2898 uint32_t page_extras_count = s_info->page_extras_count;
2899 mach_vm_size_t num_trailing_entries = page_starts_count + page_extras_count;
2900 if (num_trailing_entries < page_starts_count) {
2901 return KERN_FAILURE;
2902 }
2903
2904 /* Scale by sizeof(uint16_t). Hard-coding the size simplifies the overflow check. */
2905 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2906 if (trailing_size >> 1 != num_trailing_entries) {
2907 return KERN_FAILURE;
2908 }
2909
2910 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2911 if (required_size < sizeof(*s_info)) {
2912 return KERN_FAILURE;
2913 }
2914
2915 if (required_size > slide_info_size) {
2916 return KERN_FAILURE;
2917 }
2918
2919 return KERN_SUCCESS;
2920}
2921
2922static kern_return_t
2923vm_shared_region_slide_sanity_check_v5(
2924 vm_shared_region_slide_info_entry_v5_t s_info,
2925 mach_vm_size_t slide_info_size)
2926{
2927 if (slide_info_size < sizeof(struct vm_shared_region_slide_info_entry_v5)) {
2928 printf(format: "%s bad slide_info_size: %lx\n", __func__, (uintptr_t)slide_info_size);
2929 return KERN_FAILURE;
2930 }
2931 if (s_info->page_size != PAGE_SIZE_FOR_SR_SLIDE_16KB) {
2932 printf(format: "vm_shared_region_slide_sanity_check_v5: s_info->page_size != PAGE_SIZE_FOR_SR_SL 0x%llx != 0x%llx\n", (uint64_t)s_info->page_size, (uint64_t)PAGE_SIZE_FOR_SR_SLIDE_16KB);
2933 return KERN_FAILURE;
2934 }
2935
2936 uint32_t page_starts_count = s_info->page_starts_count;
2937 mach_vm_size_t num_trailing_entries = page_starts_count;
2938 mach_vm_size_t trailing_size = num_trailing_entries << 1;
2939 mach_vm_size_t required_size = sizeof(*s_info) + trailing_size;
2940 if (required_size < sizeof(*s_info)) {
2941 printf(format: "vm_shared_region_slide_sanity_check_v5: required_size != sizeof(*s_info) 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)sizeof(*s_info));
2942 return KERN_FAILURE;
2943 }
2944
2945 if (required_size > slide_info_size) {
2946 printf(format: "vm_shared_region_slide_sanity_check_v5: required_size != slide_info_size 0x%llx != 0x%llx\n", (uint64_t)required_size, (uint64_t)slide_info_size);
2947 return KERN_FAILURE;
2948 }
2949
2950 return KERN_SUCCESS;
2951}
2952
2953
2954static kern_return_t
2955vm_shared_region_slide_sanity_check(
2956 vm_shared_region_slide_info_entry_t s_info,
2957 mach_vm_size_t s_info_size)
2958{
2959 kern_return_t kr;
2960
2961 switch (s_info->version) {
2962 case 1:
2963 kr = vm_shared_region_slide_sanity_check_v1(s_info: &s_info->v1);
2964 break;
2965 case 2:
2966 kr = vm_shared_region_slide_sanity_check_v2(s_info: &s_info->v2, slide_info_size: s_info_size);
2967 break;
2968 case 3:
2969 kr = vm_shared_region_slide_sanity_check_v3(s_info: &s_info->v3, slide_info_size: s_info_size);
2970 break;
2971 case 4:
2972 kr = vm_shared_region_slide_sanity_check_v4(s_info: &s_info->v4, slide_info_size: s_info_size);
2973 break;
2974 case 5:
2975 kr = vm_shared_region_slide_sanity_check_v5(s_info: &s_info->v5, slide_info_size: s_info_size);
2976 break;
2977 default:
2978 kr = KERN_FAILURE;
2979 }
2980 return kr;
2981}
2982
2983static kern_return_t
2984vm_shared_region_slide_page_v1(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
2985{
2986 uint16_t *toc = NULL;
2987 slide_info_entry_toc_t bitmap = NULL;
2988 uint32_t i = 0, j = 0;
2989 uint8_t b = 0;
2990 uint32_t slide = si->si_slide;
2991 int is_64 = task_has_64Bit_addr(current_task());
2992
2993 vm_shared_region_slide_info_entry_v1_t s_info = &si->si_slide_info_entry->v1;
2994 toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset);
2995
2996 if (pageIndex >= s_info->toc_count) {
2997 printf(format: "No slide entry for this page in toc. PageIndex: %d Toc Count: %d\n", pageIndex, s_info->toc_count);
2998 } else {
2999 uint16_t entryIndex = (uint16_t)(toc[pageIndex]);
3000 slide_info_entry_toc_t slide_info_entries = (slide_info_entry_toc_t)((uintptr_t)s_info + s_info->entry_offset);
3001
3002 if (entryIndex >= s_info->entry_count) {
3003 printf(format: "No sliding bitmap entry for entryIndex: %d amongst %d entries\n", entryIndex, s_info->entry_count);
3004 } else {
3005 bitmap = &slide_info_entries[entryIndex];
3006
3007 for (i = 0; i < NUM_SLIDING_BITMAPS_PER_PAGE; ++i) {
3008 b = bitmap->entry[i];
3009 if (b != 0) {
3010 for (j = 0; j < 8; ++j) {
3011 if (b & (1 << j)) {
3012 uint32_t *ptr_to_slide;
3013 uint32_t old_value;
3014
3015 ptr_to_slide = (uint32_t*)((uintptr_t)(vaddr) + (sizeof(uint32_t) * (i * 8 + j)));
3016 old_value = *ptr_to_slide;
3017 *ptr_to_slide += slide;
3018 if (is_64 && *ptr_to_slide < old_value) {
3019 /*
3020 * We just slid the low 32 bits of a 64-bit pointer
3021 * and it looks like there should have been a carry-over
3022 * to the upper 32 bits.
3023 * The sliding failed...
3024 */
3025 printf(format: "vm_shared_region_slide() carry over: i=%d j=%d b=0x%x slide=0x%x old=0x%x new=0x%x\n",
3026 i, j, b, slide, old_value, *ptr_to_slide);
3027 return KERN_FAILURE;
3028 }
3029 }
3030 }
3031 }
3032 }
3033 }
3034 }
3035
3036 return KERN_SUCCESS;
3037}
3038
3039static kern_return_t
3040rebase_chain_32(
3041 uint8_t *page_content,
3042 uint16_t start_offset,
3043 uint32_t slide_amount,
3044 vm_shared_region_slide_info_entry_v2_t s_info)
3045{
3046 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3047
3048 const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3049 const uint32_t value_mask = ~delta_mask;
3050 const uint32_t value_add = (uint32_t)(s_info->value_add);
3051 const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3052
3053 uint32_t page_offset = start_offset;
3054 uint32_t delta = 1;
3055
3056 while (delta != 0 && page_offset <= last_page_offset) {
3057 uint8_t *loc;
3058 uint32_t value;
3059
3060 loc = page_content + page_offset;
3061 memcpy(dst: &value, src: loc, n: sizeof(value));
3062 delta = (value & delta_mask) >> delta_shift;
3063 value &= value_mask;
3064
3065 if (value != 0) {
3066 value += value_add;
3067 value += slide_amount;
3068 }
3069 memcpy(dst: loc, src: &value, n: sizeof(value));
3070 page_offset += delta;
3071 }
3072
3073 /* If the offset went past the end of the page, then the slide data is invalid. */
3074 if (page_offset > last_page_offset) {
3075 return KERN_FAILURE;
3076 }
3077 return KERN_SUCCESS;
3078}
3079
3080static kern_return_t
3081rebase_chain_64(
3082 uint8_t *page_content,
3083 uint16_t start_offset,
3084 uint32_t slide_amount,
3085 vm_shared_region_slide_info_entry_v2_t s_info)
3086{
3087 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint64_t);
3088
3089 const uint64_t delta_mask = s_info->delta_mask;
3090 const uint64_t value_mask = ~delta_mask;
3091 const uint64_t value_add = s_info->value_add;
3092 const uint64_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3093
3094 uint32_t page_offset = start_offset;
3095 uint32_t delta = 1;
3096
3097 while (delta != 0 && page_offset <= last_page_offset) {
3098 uint8_t *loc;
3099 uint64_t value;
3100
3101 loc = page_content + page_offset;
3102 memcpy(dst: &value, src: loc, n: sizeof(value));
3103 delta = (uint32_t)((value & delta_mask) >> delta_shift);
3104 value &= value_mask;
3105
3106 if (value != 0) {
3107 value += value_add;
3108 value += slide_amount;
3109 }
3110 memcpy(dst: loc, src: &value, n: sizeof(value));
3111 page_offset += delta;
3112 }
3113
3114 if (page_offset + sizeof(uint32_t) == PAGE_SIZE_FOR_SR_SLIDE) {
3115 /* If a pointer straddling the page boundary needs to be adjusted, then
3116 * add the slide to the lower half. The encoding guarantees that the upper
3117 * half on the next page will need no masking.
3118 *
3119 * This assumes a little-endian machine and that the region being slid
3120 * never crosses a 4 GB boundary. */
3121
3122 uint8_t *loc = page_content + page_offset;
3123 uint32_t value;
3124
3125 memcpy(dst: &value, src: loc, n: sizeof(value));
3126 value += slide_amount;
3127 memcpy(dst: loc, src: &value, n: sizeof(value));
3128 } else if (page_offset > last_page_offset) {
3129 return KERN_FAILURE;
3130 }
3131
3132 return KERN_SUCCESS;
3133}
3134
3135static kern_return_t
3136rebase_chain(
3137 boolean_t is_64,
3138 uint32_t pageIndex,
3139 uint8_t *page_content,
3140 uint16_t start_offset,
3141 uint32_t slide_amount,
3142 vm_shared_region_slide_info_entry_v2_t s_info)
3143{
3144 kern_return_t kr;
3145 if (is_64) {
3146 kr = rebase_chain_64(page_content, start_offset, slide_amount, s_info);
3147 } else {
3148 kr = rebase_chain_32(page_content, start_offset, slide_amount, s_info);
3149 }
3150
3151 if (kr != KERN_SUCCESS) {
3152 printf(format: "vm_shared_region_slide_page() offset overflow: pageIndex=%u, start_offset=%u, slide_amount=%u\n",
3153 pageIndex, start_offset, slide_amount);
3154 }
3155 return kr;
3156}
3157
3158static kern_return_t
3159vm_shared_region_slide_page_v2(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3160{
3161 vm_shared_region_slide_info_entry_v2_t s_info = &si->si_slide_info_entry->v2;
3162 const uint32_t slide_amount = si->si_slide;
3163
3164 /* The high bits of the delta_mask field are nonzero precisely when the shared
3165 * cache is 64-bit. */
3166 const boolean_t is_64 = (s_info->delta_mask >> 32) != 0;
3167
3168 const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3169 const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3170
3171 uint8_t *page_content = (uint8_t *)vaddr;
3172 uint16_t page_entry;
3173
3174 if (pageIndex >= s_info->page_starts_count) {
3175 printf(format: "vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3176 pageIndex, s_info->page_starts_count);
3177 return KERN_FAILURE;
3178 }
3179 page_entry = page_starts[pageIndex];
3180
3181 if (page_entry == DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE) {
3182 return KERN_SUCCESS;
3183 }
3184
3185 if (page_entry & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) {
3186 uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE_PAGE_VALUE;
3187 uint16_t info;
3188
3189 do {
3190 uint16_t page_start_offset;
3191 kern_return_t kr;
3192
3193 if (chain_index >= s_info->page_extras_count) {
3194 printf(format: "vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3195 chain_index, s_info->page_extras_count);
3196 return KERN_FAILURE;
3197 }
3198 info = page_extras[chain_index];
3199 page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE_PAGE_VALUE) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3200
3201 kr = rebase_chain(is_64, pageIndex, page_content, start_offset: page_start_offset, slide_amount, s_info);
3202 if (kr != KERN_SUCCESS) {
3203 return KERN_FAILURE;
3204 }
3205
3206 chain_index++;
3207 } while (!(info & DYLD_CACHE_SLIDE_PAGE_ATTR_END));
3208 } else {
3209 const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3210 kern_return_t kr;
3211
3212 kr = rebase_chain(is_64, pageIndex, page_content, start_offset: page_start_offset, slide_amount, s_info);
3213 if (kr != KERN_SUCCESS) {
3214 return KERN_FAILURE;
3215 }
3216 }
3217
3218 return KERN_SUCCESS;
3219}
3220
3221
3222static kern_return_t
3223vm_shared_region_slide_page_v3(
3224 vm_shared_region_slide_info_t si,
3225 vm_offset_t vaddr,
3226 __unused mach_vm_offset_t uservaddr,
3227 uint32_t pageIndex,
3228#if !__has_feature(ptrauth_calls)
3229 __unused
3230#endif /* !__has_feature(ptrauth_calls) */
3231 uint64_t jop_key)
3232{
3233 vm_shared_region_slide_info_entry_v3_t s_info = &si->si_slide_info_entry->v3;
3234 const uint32_t slide_amount = si->si_slide;
3235
3236 uint8_t *page_content = (uint8_t *)vaddr;
3237 uint16_t page_entry;
3238
3239 if (pageIndex >= s_info->page_starts_count) {
3240 printf(format: "vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3241 pageIndex, s_info->page_starts_count);
3242 return KERN_FAILURE;
3243 }
3244 page_entry = s_info->page_starts[pageIndex];
3245
3246 if (page_entry == DYLD_CACHE_SLIDE_V3_PAGE_ATTR_NO_REBASE) {
3247 return KERN_SUCCESS;
3248 }
3249
3250 uint8_t* rebaseLocation = page_content;
3251 uint64_t delta = page_entry;
3252 do {
3253 rebaseLocation += delta;
3254 uint64_t value;
3255 memcpy(dst: &value, src: rebaseLocation, n: sizeof(value));
3256 delta = ((value & 0x3FF8000000000000) >> 51) * sizeof(uint64_t);
3257
3258 // A pointer is one of :
3259 // {
3260 // uint64_t pointerValue : 51;
3261 // uint64_t offsetToNextPointer : 11;
3262 // uint64_t isBind : 1 = 0;
3263 // uint64_t authenticated : 1 = 0;
3264 // }
3265 // {
3266 // uint32_t offsetFromSharedCacheBase;
3267 // uint16_t diversityData;
3268 // uint16_t hasAddressDiversity : 1;
3269 // uint16_t hasDKey : 1;
3270 // uint16_t hasBKey : 1;
3271 // uint16_t offsetToNextPointer : 11;
3272 // uint16_t isBind : 1;
3273 // uint16_t authenticated : 1 = 1;
3274 // }
3275
3276 bool isBind = (value & (1ULL << 62)) != 0;
3277 if (isBind) {
3278 return KERN_FAILURE;
3279 }
3280
3281#if __has_feature(ptrauth_calls)
3282 uint16_t diversity_data = (uint16_t)(value >> 32);
3283 bool hasAddressDiversity = (value & (1ULL << 48)) != 0;
3284 ptrauth_key key = (ptrauth_key)((value >> 49) & 0x3);
3285#endif /* __has_feature(ptrauth_calls) */
3286 bool isAuthenticated = (value & (1ULL << 63)) != 0;
3287
3288 if (isAuthenticated) {
3289 // The new value for a rebase is the low 32-bits of the threaded value plus the slide.
3290 value = (value & 0xFFFFFFFF) + slide_amount;
3291 // Add in the offset from the mach_header
3292 const uint64_t value_add = s_info->value_add;
3293 value += value_add;
3294
3295#if __has_feature(ptrauth_calls)
3296 uint64_t discriminator = diversity_data;
3297 if (hasAddressDiversity) {
3298 // First calculate a new discriminator using the address of where we are trying to store the value
3299 uintptr_t pageOffset = rebaseLocation - page_content;
3300 discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator);
3301 }
3302
3303 if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) {
3304 /*
3305 * these pointers are used in user mode. disable the kernel key diversification
3306 * so we can sign them for use in user mode.
3307 */
3308 value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key);
3309 }
3310#endif /* __has_feature(ptrauth_calls) */
3311 } else {
3312 // The new value for a rebase is the low 51-bits of the threaded value plus the slide.
3313 // Regular pointer which needs to fit in 51-bits of value.
3314 // C++ RTTI uses the top bit, so we'll allow the whole top-byte
3315 // and the bottom 43-bits to be fit in to 51-bits.
3316 uint64_t top8Bits = value & 0x0007F80000000000ULL;
3317 uint64_t bottom43Bits = value & 0x000007FFFFFFFFFFULL;
3318 uint64_t targetValue = (top8Bits << 13) | bottom43Bits;
3319 value = targetValue + slide_amount;
3320 }
3321
3322 memcpy(dst: rebaseLocation, src: &value, n: sizeof(value));
3323 } while (delta != 0);
3324
3325 return KERN_SUCCESS;
3326}
3327
3328static kern_return_t
3329rebase_chainv4(
3330 uint8_t *page_content,
3331 uint16_t start_offset,
3332 uint32_t slide_amount,
3333 vm_shared_region_slide_info_entry_v4_t s_info)
3334{
3335 const uint32_t last_page_offset = PAGE_SIZE_FOR_SR_SLIDE - sizeof(uint32_t);
3336
3337 const uint32_t delta_mask = (uint32_t)(s_info->delta_mask);
3338 const uint32_t value_mask = ~delta_mask;
3339 const uint32_t value_add = (uint32_t)(s_info->value_add);
3340 const uint32_t delta_shift = __builtin_ctzll(delta_mask) - 2;
3341
3342 uint32_t page_offset = start_offset;
3343 uint32_t delta = 1;
3344
3345 while (delta != 0 && page_offset <= last_page_offset) {
3346 uint8_t *loc;
3347 uint32_t value;
3348
3349 loc = page_content + page_offset;
3350 memcpy(dst: &value, src: loc, n: sizeof(value));
3351 delta = (value & delta_mask) >> delta_shift;
3352 value &= value_mask;
3353
3354 if ((value & 0xFFFF8000) == 0) {
3355 // small positive non-pointer, use as-is
3356 } else if ((value & 0x3FFF8000) == 0x3FFF8000) {
3357 // small negative non-pointer
3358 value |= 0xC0000000;
3359 } else {
3360 // pointer that needs rebasing
3361 value += value_add;
3362 value += slide_amount;
3363 }
3364 memcpy(dst: loc, src: &value, n: sizeof(value));
3365 page_offset += delta;
3366 }
3367
3368 /* If the offset went past the end of the page, then the slide data is invalid. */
3369 if (page_offset > last_page_offset) {
3370 return KERN_FAILURE;
3371 }
3372 return KERN_SUCCESS;
3373}
3374
3375static kern_return_t
3376vm_shared_region_slide_page_v4(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex)
3377{
3378 vm_shared_region_slide_info_entry_v4_t s_info = &si->si_slide_info_entry->v4;
3379 const uint32_t slide_amount = si->si_slide;
3380
3381 const uint16_t *page_starts = (uint16_t *)((uintptr_t)s_info + s_info->page_starts_offset);
3382 const uint16_t *page_extras = (uint16_t *)((uintptr_t)s_info + s_info->page_extras_offset);
3383
3384 uint8_t *page_content = (uint8_t *)vaddr;
3385 uint16_t page_entry;
3386
3387 if (pageIndex >= s_info->page_starts_count) {
3388 printf(format: "vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3389 pageIndex, s_info->page_starts_count);
3390 return KERN_FAILURE;
3391 }
3392 page_entry = page_starts[pageIndex];
3393
3394 if (page_entry == DYLD_CACHE_SLIDE4_PAGE_NO_REBASE) {
3395 return KERN_SUCCESS;
3396 }
3397
3398 if (page_entry & DYLD_CACHE_SLIDE4_PAGE_USE_EXTRA) {
3399 uint16_t chain_index = page_entry & DYLD_CACHE_SLIDE4_PAGE_INDEX;
3400 uint16_t info;
3401
3402 do {
3403 uint16_t page_start_offset;
3404 kern_return_t kr;
3405
3406 if (chain_index >= s_info->page_extras_count) {
3407 printf(format: "vm_shared_region_slide_page() out-of-bounds extras index: index=%u, count=%u\n",
3408 chain_index, s_info->page_extras_count);
3409 return KERN_FAILURE;
3410 }
3411 info = page_extras[chain_index];
3412 page_start_offset = (uint16_t)((info & DYLD_CACHE_SLIDE4_PAGE_INDEX) << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3413
3414 kr = rebase_chainv4(page_content, start_offset: page_start_offset, slide_amount, s_info);
3415 if (kr != KERN_SUCCESS) {
3416 return KERN_FAILURE;
3417 }
3418
3419 chain_index++;
3420 } while (!(info & DYLD_CACHE_SLIDE4_PAGE_EXTRA_END));
3421 } else {
3422 const uint16_t page_start_offset = (uint16_t)(page_entry << DYLD_CACHE_SLIDE_PAGE_OFFSET_SHIFT);
3423 kern_return_t kr;
3424
3425 kr = rebase_chainv4(page_content, start_offset: page_start_offset, slide_amount, s_info);
3426 if (kr != KERN_SUCCESS) {
3427 return KERN_FAILURE;
3428 }
3429 }
3430
3431 return KERN_SUCCESS;
3432}
3433
3434
3435static kern_return_t
3436vm_shared_region_slide_page_v5(
3437 vm_shared_region_slide_info_t si,
3438 vm_offset_t vaddr,
3439 __unused mach_vm_offset_t uservaddr,
3440 uint32_t pageIndex,
3441#if !__has_feature(ptrauth_calls)
3442 __unused
3443#endif /* !__has_feature(ptrauth_calls) */
3444 uint64_t jop_key)
3445{
3446 vm_shared_region_slide_info_entry_v5_t s_info = &si->si_slide_info_entry->v5;
3447 const uint32_t slide_amount = si->si_slide;
3448 const uint64_t value_add = s_info->value_add;
3449
3450 uint8_t *page_content = (uint8_t *)vaddr;
3451 uint16_t page_entry;
3452
3453 if (pageIndex >= s_info->page_starts_count) {
3454 printf(format: "vm_shared_region_slide_page() did not find page start in slide info: pageIndex=%u, count=%u\n",
3455 pageIndex, s_info->page_starts_count);
3456 return KERN_FAILURE;
3457 }
3458 page_entry = s_info->page_starts[pageIndex];
3459
3460 if (page_entry == DYLD_CACHE_SLIDE_V5_PAGE_ATTR_NO_REBASE) {
3461 return KERN_SUCCESS;
3462 }
3463
3464 uint8_t* rebaseLocation = page_content;
3465 uint64_t delta = page_entry;
3466 do {
3467 rebaseLocation += delta;
3468 uint64_t value;
3469 memcpy(dst: &value, src: rebaseLocation, n: sizeof(value));
3470 delta = ((value & 0x7FF0000000000000ULL) >> 52) * sizeof(uint64_t);
3471
3472 // A pointer is one of :
3473 // {
3474 // uint64_t runtimeOffset : 34, // offset from the start of the shared cache
3475 // high8 : 8,
3476 // unused : 10,
3477 // next : 11, // 8-byte stide
3478 // auth : 1; // == 0
3479 // }
3480 // {
3481 // uint64_t runtimeOffset : 34, // offset from the start of the shared cache
3482 // diversity : 16,
3483 // addrDiv : 1,
3484 // keyIsData : 1, // implicitly always the 'A' key. 0 -> IA. 1 -> DA
3485 // next : 11, // 8-byte stide
3486 // auth : 1; // == 1
3487 // }
3488
3489#if __has_feature(ptrauth_calls)
3490 bool addrDiv = ((value & (1ULL << 50)) != 0);
3491 bool keyIsData = ((value & (1ULL << 51)) != 0);
3492 // the key is always A, and the bit tells us if its IA or ID
3493 ptrauth_key key = keyIsData ? ptrauth_key_asda : ptrauth_key_asia;
3494 uint16_t diversity = (uint16_t)((value >> 34) & 0xFFFF);
3495#endif /* __has_feature(ptrauth_calls) */
3496 uint64_t high8 = (value << 22) & 0xFF00000000000000ULL;
3497 bool isAuthenticated = (value & (1ULL << 63)) != 0;
3498
3499 // The new value for a rebase is the low 34-bits of the threaded value plus the base plus slide.
3500 value = (value & 0x3FFFFFFFFULL) + value_add + slide_amount;
3501 if (isAuthenticated) {
3502#if __has_feature(ptrauth_calls)
3503 uint64_t discriminator = diversity;
3504 if (addrDiv) {
3505 // First calculate a new discriminator using the address of where we are trying to store the value
3506 uintptr_t pageOffset = rebaseLocation - page_content;
3507 discriminator = __builtin_ptrauth_blend_discriminator((void*)(((uintptr_t)uservaddr) + pageOffset), discriminator);
3508 }
3509
3510 if (jop_key != 0 && si->si_ptrauth && !arm_user_jop_disabled()) {
3511 /*
3512 * these pointers are used in user mode. disable the kernel key diversification
3513 * so we can sign them for use in user mode.
3514 */
3515 value = (uintptr_t)pmap_sign_user_ptr((void *)value, key, discriminator, jop_key);
3516 }
3517#endif /* __has_feature(ptrauth_calls) */
3518 } else {
3519 // the value already has the correct low bits, so just add in the high8 if it exists
3520 value += high8;
3521 }
3522
3523 memcpy(dst: rebaseLocation, src: &value, n: sizeof(value));
3524 } while (delta != 0);
3525
3526 return KERN_SUCCESS;
3527}
3528
3529
3530
3531kern_return_t
3532vm_shared_region_slide_page(
3533 vm_shared_region_slide_info_t si,
3534 vm_offset_t vaddr,
3535 mach_vm_offset_t uservaddr,
3536 uint32_t pageIndex,
3537 uint64_t jop_key)
3538{
3539 switch (si->si_slide_info_entry->version) {
3540 case 1:
3541 return vm_shared_region_slide_page_v1(si, vaddr, pageIndex);
3542 case 2:
3543 return vm_shared_region_slide_page_v2(si, vaddr, pageIndex);
3544 case 3:
3545 return vm_shared_region_slide_page_v3(si, vaddr, uservaddr, pageIndex, jop_key);
3546 case 4:
3547 return vm_shared_region_slide_page_v4(si, vaddr, pageIndex);
3548 case 5:
3549 return vm_shared_region_slide_page_v5(si, vaddr, uservaddr, pageIndex, jop_key);
3550 default:
3551 return KERN_FAILURE;
3552 }
3553}
3554
3555/******************************************************************************/
3556/* Comm page support */
3557/******************************************************************************/
3558
3559SECURITY_READ_ONLY_LATE(ipc_port_t) commpage32_handle = IPC_PORT_NULL;
3560SECURITY_READ_ONLY_LATE(ipc_port_t) commpage64_handle = IPC_PORT_NULL;
3561SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage32_entry = NULL;
3562SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage64_entry = NULL;
3563SECURITY_READ_ONLY_LATE(vm_map_t) commpage32_map = VM_MAP_NULL;
3564SECURITY_READ_ONLY_LATE(vm_map_t) commpage64_map = VM_MAP_NULL;
3565
3566SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text32_handle = IPC_PORT_NULL;
3567SECURITY_READ_ONLY_LATE(ipc_port_t) commpage_text64_handle = IPC_PORT_NULL;
3568SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text32_entry = NULL;
3569SECURITY_READ_ONLY_LATE(vm_named_entry_t) commpage_text64_entry = NULL;
3570SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text32_map = VM_MAP_NULL;
3571SECURITY_READ_ONLY_LATE(vm_map_t) commpage_text64_map = VM_MAP_NULL;
3572
3573SECURITY_READ_ONLY_LATE(user32_addr_t) commpage_text32_location = 0;
3574SECURITY_READ_ONLY_LATE(user64_addr_t) commpage_text64_location = 0;
3575
3576#if defined(__i386__) || defined(__x86_64__)
3577/*
3578 * Create a memory entry, VM submap and pmap for one commpage.
3579 */
3580static void
3581_vm_commpage_init(
3582 ipc_port_t *handlep,
3583 vm_map_size_t size)
3584{
3585 vm_named_entry_t mem_entry;
3586 vm_map_t new_map;
3587
3588 SHARED_REGION_TRACE_DEBUG(
3589 ("commpage: -> _init(0x%llx)\n",
3590 (long long)size));
3591
3592 pmap_t new_pmap = pmap_create_options(NULL, 0, 0);
3593 if (new_pmap == NULL) {
3594 panic("_vm_commpage_init: could not allocate pmap");
3595 }
3596 new_map = vm_map_create_options(new_pmap, 0, size, VM_MAP_CREATE_DEFAULT);
3597
3598 mem_entry = mach_memory_entry_allocate(handlep);
3599 mem_entry->backing.map = new_map;
3600 mem_entry->internal = TRUE;
3601 mem_entry->is_sub_map = TRUE;
3602 mem_entry->offset = 0;
3603 mem_entry->protection = VM_PROT_ALL;
3604 mem_entry->size = size;
3605
3606 SHARED_REGION_TRACE_DEBUG(
3607 ("commpage: _init(0x%llx) <- %p\n",
3608 (long long)size, (void *)VM_KERNEL_ADDRPERM(*handlep)));
3609}
3610#endif
3611
3612
3613/*
3614 * Initialize the comm text pages at boot time
3615 */
3616void
3617vm_commpage_text_init(void)
3618{
3619 SHARED_REGION_TRACE_DEBUG(
3620 ("commpage text: ->init()\n"));
3621#if defined(__i386__) || defined(__x86_64__)
3622 /* create the 32 bit comm text page */
3623 unsigned int offset = (random() % _PFZ32_SLIDE_RANGE) << PAGE_SHIFT; /* restricting to 32bMAX-2PAGE */
3624 _vm_commpage_init(&commpage_text32_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3625 commpage_text32_entry = mach_memory_entry_from_port(commpage_text32_handle);
3626 commpage_text32_map = commpage_text32_entry->backing.map;
3627 commpage_text32_location = (user32_addr_t) (_COMM_PAGE32_TEXT_START + offset);
3628 /* XXX if (cpu_is_64bit_capable()) ? */
3629 /* create the 64-bit comm page */
3630 offset = (random() % _PFZ64_SLIDE_RANGE) << PAGE_SHIFT; /* restricting sliding upto 2Mb range */
3631 _vm_commpage_init(&commpage_text64_handle, _COMM_PAGE_TEXT_AREA_LENGTH);
3632 commpage_text64_entry = mach_memory_entry_from_port(commpage_text64_handle);
3633 commpage_text64_map = commpage_text64_entry->backing.map;
3634 commpage_text64_location = (user64_addr_t) (_COMM_PAGE64_TEXT_START + offset);
3635#endif
3636
3637 commpage_text_populate();
3638
3639 /* populate the routines in here */
3640 SHARED_REGION_TRACE_DEBUG(
3641 ("commpage text: init() <-\n"));
3642}
3643
3644/*
3645 * Initialize the comm pages at boot time.
3646 */
3647void
3648vm_commpage_init(void)
3649{
3650 SHARED_REGION_TRACE_DEBUG(
3651 ("commpage: -> init()\n"));
3652
3653#if defined(__i386__) || defined(__x86_64__)
3654 /* create the 32-bit comm page */
3655 _vm_commpage_init(&commpage32_handle, _COMM_PAGE32_AREA_LENGTH);
3656 commpage32_entry = mach_memory_entry_from_port(commpage32_handle);
3657 commpage32_map = commpage32_entry->backing.map;
3658
3659 /* XXX if (cpu_is_64bit_capable()) ? */
3660 /* create the 64-bit comm page */
3661 _vm_commpage_init(&commpage64_handle, _COMM_PAGE64_AREA_LENGTH);
3662 commpage64_entry = mach_memory_entry_from_port(commpage64_handle);
3663 commpage64_map = commpage64_entry->backing.map;
3664
3665#endif /* __i386__ || __x86_64__ */
3666
3667 /* populate them according to this specific platform */
3668 commpage_populate();
3669 __commpage_setup = 1;
3670#if XNU_TARGET_OS_OSX
3671 if (__system_power_source == 0) {
3672 post_sys_powersource_internal(i: 0, internal: 1);
3673 }
3674#endif /* XNU_TARGET_OS_OSX */
3675
3676 SHARED_REGION_TRACE_DEBUG(
3677 ("commpage: init() <-\n"));
3678}
3679
3680/*
3681 * Enter the appropriate comm page into the task's address space.
3682 * This is called at exec() time via vm_map_exec().
3683 */
3684kern_return_t
3685vm_commpage_enter(
3686 vm_map_t map,
3687 task_t task,
3688 boolean_t is64bit)
3689{
3690#if defined(__arm64__)
3691#pragma unused(is64bit)
3692 (void)task;
3693 (void)map;
3694 pmap_insert_commpage(vm_map_pmap(map));
3695 return KERN_SUCCESS;
3696#else
3697 ipc_port_t commpage_handle, commpage_text_handle;
3698 vm_map_offset_t commpage_address, objc_address, commpage_text_address;
3699 vm_map_size_t commpage_size, objc_size, commpage_text_size;
3700 vm_map_kernel_flags_t vmk_flags;
3701 kern_return_t kr;
3702
3703 SHARED_REGION_TRACE_DEBUG(
3704 ("commpage: -> enter(%p,%p)\n",
3705 (void *)VM_KERNEL_ADDRPERM(map),
3706 (void *)VM_KERNEL_ADDRPERM(task)));
3707
3708 commpage_text_size = _COMM_PAGE_TEXT_AREA_LENGTH;
3709 /* the comm page is likely to be beyond the actual end of the VM map */
3710 vmk_flags = VM_MAP_KERNEL_FLAGS_FIXED();
3711 vmk_flags.vmkf_beyond_max = TRUE;
3712
3713 /* select the appropriate comm page for this task */
3714 assert(!(is64bit ^ vm_map_is_64bit(map)));
3715 if (is64bit) {
3716 commpage_handle = commpage64_handle;
3717 commpage_address = (vm_map_offset_t) _COMM_PAGE64_BASE_ADDRESS;
3718 commpage_size = _COMM_PAGE64_AREA_LENGTH;
3719 objc_size = _COMM_PAGE64_OBJC_SIZE;
3720 objc_address = _COMM_PAGE64_OBJC_BASE;
3721 commpage_text_handle = commpage_text64_handle;
3722 commpage_text_address = (vm_map_offset_t) commpage_text64_location;
3723 } else {
3724 commpage_handle = commpage32_handle;
3725 commpage_address =
3726 (vm_map_offset_t)(unsigned) _COMM_PAGE32_BASE_ADDRESS;
3727 commpage_size = _COMM_PAGE32_AREA_LENGTH;
3728 objc_size = _COMM_PAGE32_OBJC_SIZE;
3729 objc_address = _COMM_PAGE32_OBJC_BASE;
3730 commpage_text_handle = commpage_text32_handle;
3731 commpage_text_address = (vm_map_offset_t) commpage_text32_location;
3732 }
3733
3734 if ((commpage_address & (pmap_commpage_size_min(map->pmap) - 1)) == 0 &&
3735 (commpage_size & (pmap_commpage_size_min(map->pmap) - 1)) == 0) {
3736 /* the commpage is properly aligned or sized for pmap-nesting */
3737 vmk_flags.vm_tag = VM_MEMORY_SHARED_PMAP;
3738 vmk_flags.vmkf_nested_pmap = TRUE;
3739 }
3740
3741 /* map the comm page in the task's address space */
3742 assert(commpage_handle != IPC_PORT_NULL);
3743 kr = vm_map_enter_mem_object(
3744 map,
3745 &commpage_address,
3746 commpage_size,
3747 0,
3748 vmk_flags,
3749 commpage_handle,
3750 0,
3751 FALSE,
3752 VM_PROT_READ,
3753 VM_PROT_READ,
3754 VM_INHERIT_SHARE);
3755 if (kr != KERN_SUCCESS) {
3756 SHARED_REGION_TRACE_ERROR(
3757 ("commpage: enter(%p,0x%llx,0x%llx) "
3758 "commpage %p mapping failed 0x%x\n",
3759 (void *)VM_KERNEL_ADDRPERM(map),
3760 (long long)commpage_address,
3761 (long long)commpage_size,
3762 (void *)VM_KERNEL_ADDRPERM(commpage_handle), kr));
3763 }
3764
3765 /* map the comm text page in the task's address space */
3766 assert(commpage_text_handle != IPC_PORT_NULL);
3767 kr = vm_map_enter_mem_object(
3768 map,
3769 &commpage_text_address,
3770 commpage_text_size,
3771 0,
3772 vmk_flags,
3773 commpage_text_handle,
3774 0,
3775 FALSE,
3776 VM_PROT_READ | VM_PROT_EXECUTE,
3777 VM_PROT_READ | VM_PROT_EXECUTE,
3778 VM_INHERIT_SHARE);
3779 if (kr != KERN_SUCCESS) {
3780 SHARED_REGION_TRACE_ERROR(
3781 ("commpage text: enter(%p,0x%llx,0x%llx) "
3782 "commpage text %p mapping failed 0x%x\n",
3783 (void *)VM_KERNEL_ADDRPERM(map),
3784 (long long)commpage_text_address,
3785 (long long)commpage_text_size,
3786 (void *)VM_KERNEL_ADDRPERM(commpage_text_handle), kr));
3787 }
3788
3789 /*
3790 * Since we're here, we also pre-allocate some virtual space for the
3791 * Objective-C run-time, if needed...
3792 */
3793 if (objc_size != 0) {
3794 kr = vm_map_enter_mem_object(
3795 map,
3796 &objc_address,
3797 objc_size,
3798 0,
3799 vmk_flags,
3800 IPC_PORT_NULL,
3801 0,
3802 FALSE,
3803 VM_PROT_ALL,
3804 VM_PROT_ALL,
3805 VM_INHERIT_DEFAULT);
3806 if (kr != KERN_SUCCESS) {
3807 SHARED_REGION_TRACE_ERROR(
3808 ("commpage: enter(%p,0x%llx,0x%llx) "
3809 "objc mapping failed 0x%x\n",
3810 (void *)VM_KERNEL_ADDRPERM(map),
3811 (long long)objc_address,
3812 (long long)objc_size, kr));
3813 }
3814 }
3815
3816 SHARED_REGION_TRACE_DEBUG(
3817 ("commpage: enter(%p,%p) <- 0x%x\n",
3818 (void *)VM_KERNEL_ADDRPERM(map),
3819 (void *)VM_KERNEL_ADDRPERM(task), kr));
3820 return kr;
3821#endif
3822}
3823
3824int
3825vm_shared_region_slide(
3826 uint32_t slide,
3827 mach_vm_offset_t entry_start_address,
3828 mach_vm_size_t entry_size,
3829 mach_vm_offset_t slide_start,
3830 mach_vm_size_t slide_size,
3831 mach_vm_offset_t slid_mapping,
3832 memory_object_control_t sr_file_control,
3833 vm_prot_t prot)
3834{
3835 vm_shared_region_t sr;
3836 kern_return_t error;
3837
3838 SHARED_REGION_TRACE_DEBUG(
3839 ("vm_shared_region_slide: -> slide %#x, entry_start %#llx, entry_size %#llx, slide_start %#llx, slide_size %#llx\n",
3840 slide, entry_start_address, entry_size, slide_start, slide_size));
3841
3842 sr = vm_shared_region_get(task: current_task());
3843 if (sr == NULL) {
3844 printf(format: "%s: no shared region?\n", __FUNCTION__);
3845 SHARED_REGION_TRACE_DEBUG(
3846 ("vm_shared_region_slide: <- %d (no shared region)\n",
3847 KERN_FAILURE));
3848 return KERN_FAILURE;
3849 }
3850
3851 /*
3852 * Protect from concurrent access.
3853 */
3854 vm_shared_region_lock();
3855 while (sr->sr_slide_in_progress) {
3856 vm_shared_region_sleep(&sr->sr_slide_in_progress, THREAD_UNINT);
3857 }
3858
3859 sr->sr_slide_in_progress = current_thread();
3860 vm_shared_region_unlock();
3861
3862 error = vm_shared_region_slide_mapping(sr,
3863 slide_info_addr: (user_addr_t)slide_start,
3864 slide_info_size: slide_size,
3865 start: entry_start_address,
3866 size: entry_size,
3867 slid_mapping,
3868 slide,
3869 sr_file_control,
3870 prot);
3871 if (error) {
3872 printf(format: "slide_info initialization failed with kr=%d\n", error);
3873 }
3874
3875 vm_shared_region_lock();
3876
3877 assert(sr->sr_slide_in_progress == current_thread());
3878 sr->sr_slide_in_progress = THREAD_NULL;
3879 vm_shared_region_wakeup(&sr->sr_slide_in_progress);
3880
3881#if XNU_TARGET_OS_OSX
3882 if (error == KERN_SUCCESS) {
3883 shared_region_completed_slide = TRUE;
3884 }
3885#endif /* XNU_TARGET_OS_OSX */
3886 vm_shared_region_unlock();
3887
3888 vm_shared_region_deallocate(shared_region: sr);
3889
3890 SHARED_REGION_TRACE_DEBUG(
3891 ("vm_shared_region_slide: <- %d\n",
3892 error));
3893
3894 return error;
3895}
3896
3897/*
3898 * Used during Authenticated Root Volume macOS boot.
3899 * Launchd re-execs itself and wants the new launchd to use
3900 * the shared cache from the new root volume. This call
3901 * makes all the existing shared caches stale to allow
3902 * that to happen.
3903 */
3904void
3905vm_shared_region_pivot(void)
3906{
3907 vm_shared_region_t shared_region = NULL;
3908
3909 vm_shared_region_lock();
3910
3911 queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3912 assert(shared_region->sr_ref_count > 0);
3913 shared_region->sr_stale = TRUE;
3914 if (shared_region->sr_timer_call) {
3915 /*
3916 * We have a shared region ready to be destroyed
3917 * and just waiting for a delayed timer to fire.
3918 * Marking it stale cements its ineligibility to
3919 * be used ever again. So let's shorten the timer
3920 * aggressively down to 10 milliseconds and get rid of it.
3921 * This is a single quantum and we don't need to go
3922 * shorter than this duration. We want it to be short
3923 * enough, however, because we could have an unmount
3924 * of the volume hosting this shared region just behind
3925 * us.
3926 */
3927 uint64_t deadline;
3928 assert(shared_region->sr_ref_count == 1);
3929
3930 /*
3931 * Free the old timer call. Returns with a reference held.
3932 * If the old timer has fired and is waiting for the vm_shared_region_lock
3933 * lock, we will just return with an additional ref_count i.e. 2.
3934 * The old timer will then fire and just drop the ref count down to 1
3935 * with no other modifications.
3936 */
3937 vm_shared_region_reference_locked(shared_region);
3938
3939 /* set up the timer. Keep the reference from above for this timer.*/
3940 shared_region->sr_timer_call = thread_call_allocate(
3941 func: (thread_call_func_t) vm_shared_region_timeout,
3942 param0: (thread_call_param_t) shared_region);
3943
3944 /* schedule the timer */
3945 clock_interval_to_deadline(interval: 10, /* 10 milliseconds */
3946 NSEC_PER_MSEC,
3947 result: &deadline);
3948 thread_call_enter_delayed(call: shared_region->sr_timer_call,
3949 deadline);
3950
3951 SHARED_REGION_TRACE_DEBUG(
3952 ("shared_region: pivot(%p): armed timer\n",
3953 (void *)VM_KERNEL_ADDRPERM(shared_region)));
3954 }
3955 }
3956
3957 vm_shared_region_unlock();
3958}
3959
3960/*
3961 * Routine to mark any non-standard slide shared cache region as stale.
3962 * This causes the next "reslide" spawn to create a new shared region.
3963 */
3964void
3965vm_shared_region_reslide_stale(boolean_t driverkit)
3966{
3967#if __has_feature(ptrauth_calls)
3968 vm_shared_region_t shared_region = NULL;
3969
3970 vm_shared_region_lock();
3971
3972 queue_iterate(&vm_shared_region_queue, shared_region, vm_shared_region_t, sr_q) {
3973 assert(shared_region->sr_ref_count > 0);
3974 if (shared_region->sr_driverkit == driverkit && !shared_region->sr_stale && shared_region->sr_reslide) {
3975 shared_region->sr_stale = TRUE;
3976 vm_shared_region_reslide_count++;
3977 }
3978 }
3979
3980 vm_shared_region_unlock();
3981#else
3982 (void)driverkit;
3983#endif /* __has_feature(ptrauth_calls) */
3984}
3985
3986/*
3987 * report if the task is using a reslide shared cache region.
3988 */
3989bool
3990vm_shared_region_is_reslide(__unused struct task *task)
3991{
3992 bool is_reslide = FALSE;
3993#if __has_feature(ptrauth_calls)
3994 vm_shared_region_t sr = vm_shared_region_get(task);
3995
3996 if (sr != NULL) {
3997 is_reslide = sr->sr_reslide;
3998 vm_shared_region_deallocate(sr);
3999 }
4000#endif /* __has_feature(ptrauth_calls) */
4001 return is_reslide;
4002}
4003
4004/*
4005 * This is called from powermanagement code to let kernel know the current source of power.
4006 * 0 if it is external source (connected to power )
4007 * 1 if it is internal power source ie battery
4008 */
4009void
4010#if XNU_TARGET_OS_OSX
4011post_sys_powersource(int i)
4012#else /* XNU_TARGET_OS_OSX */
4013post_sys_powersource(__unused int i)
4014#endif /* XNU_TARGET_OS_OSX */
4015{
4016#if XNU_TARGET_OS_OSX
4017 post_sys_powersource_internal(i, internal: 0);
4018#endif /* XNU_TARGET_OS_OSX */
4019}
4020
4021
4022#if XNU_TARGET_OS_OSX
4023static void
4024post_sys_powersource_internal(int i, int internal)
4025{
4026 if (internal == 0) {
4027 __system_power_source = i;
4028 }
4029}
4030#endif /* XNU_TARGET_OS_OSX */
4031
4032void *
4033vm_shared_region_root_dir(
4034 struct vm_shared_region *sr)
4035{
4036 void *vnode;
4037
4038 vm_shared_region_lock();
4039 vnode = sr->sr_root_dir;
4040 vm_shared_region_unlock();
4041 return vnode;
4042}
4043