1/*
2 * Copyright (c) 2007-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <mach_kdp.h>
30#include <debug.h>
31
32#include <kern/assert.h>
33#include <kern/misc_protos.h>
34#include <kern/monotonic.h>
35#include <mach/vm_types.h>
36#include <mach/vm_param.h>
37#include <vm/vm_kern.h>
38#include <vm/vm_page.h>
39#include <vm/pmap.h>
40
41#include <machine/atomic.h>
42#include <arm64/proc_reg.h>
43#include <arm64/lowglobals.h>
44#include <arm/cpu_data_internal.h>
45#include <arm/misc_protos.h>
46#include <pexpert/arm64/boot.h>
47#include <pexpert/device_tree.h>
48
49#include <libkern/kernel_mach_header.h>
50#include <libkern/section_keywords.h>
51
52#include <san/kasan.h>
53
54#if __ARM_KERNEL_PROTECT__
55/*
56 * If we want to support __ARM_KERNEL_PROTECT__, we need a sufficient amount of
57 * mappable space preceeding the kernel (as we unmap the kernel by cutting the
58 * range covered by TTBR1 in half). This must also cover the exception vectors.
59 */
60static_assert(KERNEL_PMAP_HEAP_RANGE_START > ARM_KERNEL_PROTECT_EXCEPTION_START);
61
62/* The exception vectors and the kernel cannot share root TTEs. */
63static_assert((KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_ROOT_OFFMASK) > ARM_KERNEL_PROTECT_EXCEPTION_START);
64
65/*
66 * We must have enough space in the TTBR1_EL1 range to create the EL0 mapping of
67 * the exception vectors.
68 */
69static_assert((((~ARM_KERNEL_PROTECT_EXCEPTION_START) + 1) * 2ULL) <= (ARM_TT_ROOT_SIZE + ARM_TT_ROOT_INDEX_MASK));
70#endif /* __ARM_KERNEL_PROTECT__ */
71
72#define ARM_DYNAMIC_TABLE_XN (ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN)
73
74#if KASAN
75extern vm_offset_t shadow_pbase;
76extern vm_offset_t shadow_ptop;
77extern vm_offset_t physmap_vbase;
78extern vm_offset_t physmap_vtop;
79#endif
80
81/*
82 * We explicitly place this in const, as it is not const from a language
83 * perspective, but it is only modified before we actually switch away from
84 * the bootstrap page tables.
85 */
86SECURITY_READ_ONLY_LATE(uint8_t) bootstrap_pagetables[BOOTSTRAP_TABLE_SIZE] __attribute__((aligned(ARM_PGBYTES)));
87
88/*
89 * Denotes the end of xnu.
90 */
91extern void *last_kernel_symbol;
92
93extern void arm64_replace_bootstack(cpu_data_t*);
94extern void PE_slide_devicetree(vm_offset_t);
95
96/*
97 * KASLR parameters
98 */
99SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_base;
100SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_top;
101SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_base;
102SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_top;
103SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_stext;
104SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_etext;
105SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slide;
106SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_base;
107SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_top;
108
109SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_stext;
110SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_etext;
111SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sdata;
112SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_edata;
113SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sinfo;
114SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_einfo;
115SECURITY_READ_ONLY_LATE(vm_offset_t) vm_slinkedit;
116SECURITY_READ_ONLY_LATE(vm_offset_t) vm_elinkedit;
117
118SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text;
119SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text_end;
120
121SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernelcache_base;
122SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernelcache_top;
123
124/* Used by <mach/arm/vm_param.h> */
125SECURITY_READ_ONLY_LATE(unsigned long) gVirtBase;
126SECURITY_READ_ONLY_LATE(unsigned long) gPhysBase;
127SECURITY_READ_ONLY_LATE(unsigned long) gPhysSize;
128SECURITY_READ_ONLY_LATE(unsigned long) gT0Sz = T0SZ_BOOT;
129SECURITY_READ_ONLY_LATE(unsigned long) gT1Sz = T1SZ_BOOT;
130
131/* 23543331 - step 1 of kext / kernel __TEXT and __DATA colocation is to move
132 * all kexts before the kernel. This is only for arm64 devices and looks
133 * something like the following:
134 * -- vmaddr order --
135 * 0xffffff8004004000 __PRELINK_TEXT
136 * 0xffffff8007004000 __TEXT (xnu)
137 * 0xffffff80075ec000 __DATA (xnu)
138 * 0xffffff80076dc000 __KLD (xnu)
139 * 0xffffff80076e0000 __LAST (xnu)
140 * 0xffffff80076e4000 __LINKEDIT (xnu)
141 * 0xffffff80076e4000 __PRELINK_DATA (not used yet)
142 * 0xffffff800782c000 __PRELINK_INFO
143 * 0xffffff80078e4000 -- End of kernelcache
144 */
145
146/* 24921709 - make XNU ready for KTRR
147 *
148 * Two possible kernel cache layouts, depending on which kcgen is being used.
149 * VAs increasing downwards.
150 * Old KCGEN:
151 *
152 * __PRELINK_TEXT
153 * __TEXT
154 * __DATA_CONST
155 * __TEXT_EXEC
156 * __KLD
157 * __LAST
158 * __DATA
159 * __PRELINK_DATA (expected empty)
160 * __LINKEDIT
161 * __PRELINK_INFO
162 *
163 * New kcgen:
164 *
165 * __PRELINK_TEXT <--- First KTRR (ReadOnly) segment
166 * __PLK_DATA_CONST
167 * __PLK_TEXT_EXEC
168 * __TEXT
169 * __DATA_CONST
170 * __TEXT_EXEC
171 * __KLD
172 * __LAST <--- Last KTRR (ReadOnly) segment
173 * __DATA
174 * __BOOTDATA (if present)
175 * __LINKEDIT
176 * __PRELINK_DATA (expected populated now)
177 * __PLK_LINKEDIT
178 * __PRELINK_INFO
179 *
180 */
181
182vm_offset_t mem_size; /* Size of actual physical memory present
183 * minus any performance buffer and possibly
184 * limited by mem_limit in bytes */
185uint64_t mem_actual; /* The "One True" physical memory size
186 * actually, it's the highest physical
187 * address + 1 */
188uint64_t max_mem; /* Size of physical memory (bytes), adjusted
189 * by maxmem */
190uint64_t max_mem_actual; /* Actual size of physical memory (bytes),
191 * adjusted by the maxmem boot-arg */
192uint64_t sane_size; /* Memory size to use for defaults
193 * calculations */
194/* This no longer appears to be used; kill it? */
195addr64_t vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Highest kernel
196 * virtual address known
197 * to the VM system */
198
199SECURITY_READ_ONLY_LATE(vm_offset_t) segEXTRADATA;
200SECURITY_READ_ONLY_LATE(unsigned long) segSizeEXTRADATA;
201
202/* Trust cache portion of EXTRADATA (if within it) */
203SECURITY_READ_ONLY_LATE(vm_offset_t) segTRUSTCACHE;
204SECURITY_READ_ONLY_LATE(unsigned long) segSizeTRUSTCACHE;
205
206SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTTEXT;
207SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWEST;
208SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTRO;
209SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTRO;
210
211/* Only set when booted from MH_FILESET kernel collections */
212SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTKC;
213SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTKC;
214SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTROKC;
215SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTROKC;
216SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTAuxKC;
217SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTAuxKC;
218SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTROAuxKC;
219SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTROAuxKC;
220SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTRXAuxKC;
221SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTRXAuxKC;
222SECURITY_READ_ONLY_LATE(vm_offset_t) segHIGHESTNLEAuxKC;
223
224SECURITY_READ_ONLY_LATE(static vm_offset_t) segTEXTB;
225SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXT;
226
227#if XNU_MONITOR
228SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLTEXTB;
229SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLTEXT;
230
231SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLTRAMPB;
232SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLTRAMP;
233
234SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLDATACONSTB;
235SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLDATACONST;
236SECURITY_READ_ONLY_LATE(void *) pmap_stacks_start = NULL;
237SECURITY_READ_ONLY_LATE(void *) pmap_stacks_end = NULL;
238#if HAS_GUARDED_IO_FILTER
239SECURITY_READ_ONLY_LATE(void *) iofilter_stacks_start = NULL;
240SECURITY_READ_ONLY_LATE(void *) iofilter_stacks_end = NULL;
241#endif
242#endif
243
244SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATACONSTB;
245SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATACONST;
246
247SECURITY_READ_ONLY_LATE(vm_offset_t) segTEXTEXECB;
248SECURITY_READ_ONLY_LATE(unsigned long) segSizeTEXTEXEC;
249
250SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATAB;
251SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATA;
252
253#if XNU_MONITOR
254SECURITY_READ_ONLY_LATE(vm_offset_t) segPPLDATAB;
255SECURITY_READ_ONLY_LATE(unsigned long) segSizePPLDATA;
256#endif
257
258SECURITY_READ_ONLY_LATE(vm_offset_t) segBOOTDATAB;
259SECURITY_READ_ONLY_LATE(unsigned long) segSizeBOOTDATA;
260extern vm_offset_t intstack_low_guard;
261extern vm_offset_t intstack_high_guard;
262extern vm_offset_t excepstack_high_guard;
263
264SECURITY_READ_ONLY_LATE(vm_offset_t) segLINKB;
265SECURITY_READ_ONLY_LATE(static unsigned long) segSizeLINK;
266
267SECURITY_READ_ONLY_LATE(static vm_offset_t) segKLDB;
268SECURITY_READ_ONLY_LATE(unsigned long) segSizeKLD;
269SECURITY_READ_ONLY_LATE(static vm_offset_t) segKLDDATAB;
270SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKLDDATA;
271SECURITY_READ_ONLY_LATE(vm_offset_t) segLASTB;
272SECURITY_READ_ONLY_LATE(unsigned long) segSizeLAST;
273SECURITY_READ_ONLY_LATE(vm_offset_t) segLASTDATACONSTB;
274SECURITY_READ_ONLY_LATE(unsigned long) segSizeLASTDATACONST;
275
276SECURITY_READ_ONLY_LATE(vm_offset_t) sectHIBTEXTB;
277SECURITY_READ_ONLY_LATE(unsigned long) sectSizeHIBTEXT;
278SECURITY_READ_ONLY_LATE(vm_offset_t) segHIBDATAB;
279SECURITY_READ_ONLY_LATE(unsigned long) segSizeHIBDATA;
280SECURITY_READ_ONLY_LATE(vm_offset_t) sectHIBDATACONSTB;
281SECURITY_READ_ONLY_LATE(unsigned long) sectSizeHIBDATACONST;
282
283SECURITY_READ_ONLY_LATE(vm_offset_t) segPRELINKTEXTB;
284SECURITY_READ_ONLY_LATE(unsigned long) segSizePRELINKTEXT;
285
286SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKTEXTEXECB;
287SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKTEXTEXEC;
288
289SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKDATACONSTB;
290SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKDATACONST;
291
292SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKDATAB;
293SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKDATA;
294
295SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLLVMCOVB = 0;
296SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLLVMCOV = 0;
297
298SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLINKEDITB;
299SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLINKEDIT;
300
301SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKINFOB;
302SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKINFO;
303
304/* Only set when booted from MH_FILESET primary kernel collection */
305SECURITY_READ_ONLY_LATE(vm_offset_t) segKCTEXTEXECB;
306SECURITY_READ_ONLY_LATE(unsigned long) segSizeKCTEXTEXEC;
307SECURITY_READ_ONLY_LATE(static vm_offset_t) segKCDATACONSTB;
308SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKCDATACONST;
309SECURITY_READ_ONLY_LATE(static vm_offset_t) segKCDATAB;
310SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKCDATA;
311
312SECURITY_READ_ONLY_LATE(static boolean_t) use_contiguous_hint = TRUE;
313
314SECURITY_READ_ONLY_LATE(int) PAGE_SHIFT_CONST;
315
316SECURITY_READ_ONLY_LATE(vm_offset_t) end_kern;
317SECURITY_READ_ONLY_LATE(vm_offset_t) etext;
318SECURITY_READ_ONLY_LATE(vm_offset_t) sdata;
319SECURITY_READ_ONLY_LATE(vm_offset_t) edata;
320
321SECURITY_READ_ONLY_LATE(static vm_offset_t) auxkc_mh, auxkc_base, auxkc_right_above;
322
323vm_offset_t alloc_ptpage(boolean_t map_static);
324SECURITY_READ_ONLY_LATE(vm_offset_t) ropage_next;
325extern int dtrace_keep_kernel_symbols(void);
326
327/*
328 * Bootstrap the system enough to run with virtual memory.
329 * Map the kernel's code and data, and allocate the system page table.
330 * Page_size must already be set.
331 *
332 * Parameters:
333 * first_avail: first available physical page -
334 * after kernel page tables
335 * avail_start: PA of first physical page
336 * avail_end: PA of last physical page
337 */
338SECURITY_READ_ONLY_LATE(vm_offset_t) first_avail;
339SECURITY_READ_ONLY_LATE(vm_offset_t) static_memory_end;
340SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_start;
341SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_end;
342SECURITY_READ_ONLY_LATE(pmap_paddr_t) real_avail_end;
343SECURITY_READ_ONLY_LATE(unsigned long) real_phys_size;
344SECURITY_READ_ONLY_LATE(vm_map_address_t) physmap_base = (vm_map_address_t)0;
345SECURITY_READ_ONLY_LATE(vm_map_address_t) physmap_end = (vm_map_address_t)0;
346
347/**
348 * First physical address freely available to xnu.
349 */
350SECURITY_READ_ONLY_LATE(addr64_t) first_avail_phys = 0;
351
352/*
353 * Bounds of the kernelcache; used for accounting.
354 */
355SECURITY_READ_ONLY_LATE(vm_offset_t) arm_vm_kernelcache_phys_start;
356SECURITY_READ_ONLY_LATE(vm_offset_t) arm_vm_kernelcache_phys_end;
357
358#if __ARM_KERNEL_PROTECT__
359extern void ExceptionVectorsBase;
360extern void ExceptionVectorsEnd;
361#endif /* __ARM_KERNEL_PROTECT__ */
362
363typedef struct {
364 pmap_paddr_t pa;
365 vm_map_address_t va;
366 vm_size_t len;
367} ptov_table_entry;
368
369#define PTOV_TABLE_SIZE 8
370SECURITY_READ_ONLY_LATE(static ptov_table_entry) ptov_table[PTOV_TABLE_SIZE];
371SECURITY_READ_ONLY_LATE(static boolean_t) kva_active = FALSE;
372
373
374vm_map_address_t
375phystokv(pmap_paddr_t pa)
376{
377
378 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
379 if ((pa >= ptov_table[i].pa) && (pa < (ptov_table[i].pa + ptov_table[i].len))) {
380 return pa - ptov_table[i].pa + ptov_table[i].va;
381 }
382 }
383 if (__improbable((pa < gPhysBase) || ((pa - gPhysBase) >= real_phys_size))) {
384 panic("%s: illegal PA: 0x%llx; phys base 0x%llx, size 0x%llx", __func__,
385 (unsigned long long)pa, (unsigned long long)gPhysBase, (unsigned long long)real_phys_size);
386 }
387 return pa - gPhysBase + gVirtBase;
388}
389
390vm_map_address_t
391phystokv_range(pmap_paddr_t pa, vm_size_t *max_len)
392{
393
394 vm_size_t len;
395 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
396 if ((pa >= ptov_table[i].pa) && (pa < (ptov_table[i].pa + ptov_table[i].len))) {
397 len = ptov_table[i].len - (pa - ptov_table[i].pa);
398 if (*max_len > len) {
399 *max_len = len;
400 }
401 return pa - ptov_table[i].pa + ptov_table[i].va;
402 }
403 }
404 len = PAGE_SIZE - (pa & PAGE_MASK);
405 if (*max_len > len) {
406 *max_len = len;
407 }
408 if (__improbable((pa < gPhysBase) || ((pa - gPhysBase) >= real_phys_size))) {
409 panic("%s: illegal PA: 0x%llx; phys base 0x%llx, size 0x%llx", __func__,
410 (unsigned long long)pa, (unsigned long long)gPhysBase, (unsigned long long)real_phys_size);
411 }
412 return pa - gPhysBase + gVirtBase;
413}
414
415vm_offset_t
416ml_static_vtop(vm_offset_t va)
417{
418 for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) {
419 if ((va >= ptov_table[i].va) && (va < (ptov_table[i].va + ptov_table[i].len))) {
420 return va - ptov_table[i].va + ptov_table[i].pa;
421 }
422 }
423 if (__improbable((va < gVirtBase) || (((vm_address_t)(va) - gVirtBase) >= gPhysSize))) {
424 panic("%s: illegal VA: %p; virt base 0x%llx, size 0x%llx", __func__,
425 (void*)va, (unsigned long long)gVirtBase, (unsigned long long)gPhysSize);
426 }
427 return (vm_address_t)(va) - gVirtBase + gPhysBase;
428}
429
430/*
431 * This rounds the given address up to the nearest boundary for a PTE contiguous
432 * hint.
433 */
434static vm_offset_t
435round_up_pte_hint_address(vm_offset_t address)
436{
437 vm_offset_t hint_size = ARM_PTE_SIZE << ARM_PTE_HINT_ENTRIES_SHIFT;
438 return (address + (hint_size - 1)) & ~(hint_size - 1);
439}
440
441/* allocate a page for a page table: we support static and dynamic mappings.
442 *
443 * returns a virtual address for the allocated page
444 *
445 * for static mappings, we allocate from the region ropagetable_begin to ro_pagetable_end-1,
446 * which is defined in the DATA_CONST segment and will be protected RNX when vm_prot_finalize runs.
447 *
448 * for dynamic mappings, we allocate from avail_start, which should remain RWNX.
449 */
450
451vm_offset_t
452alloc_ptpage(boolean_t map_static)
453{
454 vm_offset_t vaddr;
455
456#if !(defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR))
457 map_static = FALSE;
458#endif
459
460 if (!ropage_next) {
461 ropage_next = (vm_offset_t)&ropagetable_begin;
462 }
463
464 if (map_static) {
465 assert(ropage_next < (vm_offset_t)&ropagetable_end);
466
467 vaddr = ropage_next;
468 ropage_next += ARM_PGBYTES;
469
470 return vaddr;
471 } else {
472 vaddr = phystokv(pa: avail_start);
473 avail_start += ARM_PGBYTES;
474
475 return vaddr;
476 }
477}
478
479#if DEBUG
480
481void dump_kva_l2(vm_offset_t tt_base, tt_entry_t *tt, int indent, uint64_t *rosz_out, uint64_t *rwsz_out);
482
483void
484dump_kva_l2(vm_offset_t tt_base, tt_entry_t *tt, int indent, uint64_t *rosz_out, uint64_t *rwsz_out)
485{
486 unsigned int i;
487 boolean_t cur_ro, prev_ro = 0;
488 int start_entry = -1;
489 tt_entry_t cur, prev = 0;
490 pmap_paddr_t robegin = kvtophys((vm_offset_t)&ropagetable_begin);
491 pmap_paddr_t roend = kvtophys((vm_offset_t)&ropagetable_end);
492 boolean_t tt_static = kvtophys((vm_offset_t)tt) >= robegin &&
493 kvtophys((vm_offset_t)tt) < roend;
494
495 for (i = 0; i < TTE_PGENTRIES; i++) {
496 int tte_type = tt[i] & ARM_TTE_TYPE_MASK;
497 cur = tt[i] & ARM_TTE_TABLE_MASK;
498
499 if (tt_static) {
500 /* addresses mapped by this entry are static if it is a block mapping,
501 * or the table was allocated from the RO page table region */
502 cur_ro = (tte_type == ARM_TTE_TYPE_BLOCK) || (cur >= robegin && cur < roend);
503 } else {
504 cur_ro = 0;
505 }
506
507 if ((cur == 0 && prev != 0) || (cur_ro != prev_ro && prev != 0)) { // falling edge
508 uintptr_t start, end, sz;
509
510 start = (uintptr_t)start_entry << ARM_TT_L2_SHIFT;
511 start += tt_base;
512 end = ((uintptr_t)i << ARM_TT_L2_SHIFT) - 1;
513 end += tt_base;
514
515 sz = end - start + 1;
516 printf("%*s0x%08x_%08x-0x%08x_%08x %s (%luMB)\n",
517 indent * 4, "",
518 (uint32_t)(start >> 32), (uint32_t)start,
519 (uint32_t)(end >> 32), (uint32_t)end,
520 prev_ro ? "Static " : "Dynamic",
521 (sz >> 20));
522
523 if (prev_ro) {
524 *rosz_out += sz;
525 } else {
526 *rwsz_out += sz;
527 }
528 }
529
530 if ((prev == 0 && cur != 0) || cur_ro != prev_ro) { // rising edge: set start
531 start_entry = i;
532 }
533
534 prev = cur;
535 prev_ro = cur_ro;
536 }
537}
538
539void
540dump_kva_space()
541{
542 uint64_t tot_rosz = 0, tot_rwsz = 0;
543 int ro_ptpages, rw_ptpages;
544 pmap_paddr_t robegin = kvtophys((vm_offset_t)&ropagetable_begin);
545 pmap_paddr_t roend = kvtophys((vm_offset_t)&ropagetable_end);
546 boolean_t root_static = kvtophys((vm_offset_t)cpu_tte) >= robegin &&
547 kvtophys((vm_offset_t)cpu_tte) < roend;
548 uint64_t kva_base = ~((1ULL << (64 - T1SZ_BOOT)) - 1);
549
550 printf("Root page table: %s\n", root_static ? "Static" : "Dynamic");
551
552 for (unsigned int i = 0; i < TTE_PGENTRIES; i++) {
553 pmap_paddr_t cur;
554 boolean_t cur_ro;
555 uintptr_t start, end;
556 uint64_t rosz = 0, rwsz = 0;
557
558 if ((cpu_tte[i] & ARM_TTE_VALID) == 0) {
559 continue;
560 }
561
562 cur = cpu_tte[i] & ARM_TTE_TABLE_MASK;
563 start = (uint64_t)i << ARM_TT_L1_SHIFT;
564 start = start + kva_base;
565 end = start + (ARM_TT_L1_SIZE - 1);
566 cur_ro = cur >= robegin && cur < roend;
567
568 printf("0x%08x_%08x-0x%08x_%08x %s\n",
569 (uint32_t)(start >> 32), (uint32_t)start,
570 (uint32_t)(end >> 32), (uint32_t)end,
571 cur_ro ? "Static " : "Dynamic");
572
573 dump_kva_l2(start, (tt_entry_t*)phystokv(cur), 1, &rosz, &rwsz);
574 tot_rosz += rosz;
575 tot_rwsz += rwsz;
576 }
577
578 printf("L2 Address space mapped: Static %lluMB Dynamic %lluMB Total %lluMB\n",
579 tot_rosz >> 20,
580 tot_rwsz >> 20,
581 (tot_rosz >> 20) + (tot_rwsz >> 20));
582
583 ro_ptpages = (int)((ropage_next - (vm_offset_t)&ropagetable_begin) >> ARM_PGSHIFT);
584 rw_ptpages = (int)(lowGlo.lgStaticSize >> ARM_PGSHIFT);
585 printf("Pages used: static %d dynamic %d\n", ro_ptpages, rw_ptpages);
586}
587
588#endif /* DEBUG */
589
590#if __ARM_KERNEL_PROTECT__ || XNU_MONITOR
591/*
592 * arm_vm_map:
593 * root_ttp: The kernel virtual address for the root of the target page tables
594 * vaddr: The target virtual address
595 * pte: A page table entry value (may be ARM_PTE_EMPTY)
596 *
597 * This function installs pte at vaddr in root_ttp. Any page table pages needed
598 * to install pte will be allocated by this function.
599 */
600static void
601arm_vm_map(tt_entry_t * root_ttp, vm_offset_t vaddr, pt_entry_t pte)
602{
603 vm_offset_t ptpage = 0;
604 tt_entry_t * ttp = root_ttp;
605
606 tt_entry_t * l1_ttep = NULL;
607 tt_entry_t l1_tte = 0;
608
609 tt_entry_t * l2_ttep = NULL;
610 tt_entry_t l2_tte = 0;
611 pt_entry_t * ptep = NULL;
612 pt_entry_t cpte = 0;
613
614 /*
615 * Walk the target page table to find the PTE for the given virtual
616 * address. Allocate any page table pages needed to do this.
617 */
618 l1_ttep = ttp + ((vaddr & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
619 l1_tte = *l1_ttep;
620
621 if (l1_tte == ARM_TTE_EMPTY) {
622 ptpage = alloc_ptpage(TRUE);
623 bzero((void *)ptpage, ARM_PGBYTES);
624 l1_tte = kvtophys(ptpage);
625 l1_tte &= ARM_TTE_TABLE_MASK;
626 l1_tte |= ARM_TTE_VALID | ARM_TTE_TYPE_TABLE | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA);
627 *l1_ttep = l1_tte;
628 ptpage = 0;
629 }
630
631 ttp = (tt_entry_t *)phystokv(l1_tte & ARM_TTE_TABLE_MASK);
632
633 l2_ttep = ttp + ((vaddr & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
634 l2_tte = *l2_ttep;
635
636 if (l2_tte == ARM_TTE_EMPTY) {
637 ptpage = alloc_ptpage(TRUE);
638 bzero((void *)ptpage, ARM_PGBYTES);
639 l2_tte = kvtophys(ptpage);
640 l2_tte &= ARM_TTE_TABLE_MASK;
641 l2_tte |= ARM_TTE_VALID | ARM_TTE_TYPE_TABLE;
642 *l2_ttep = l2_tte;
643 ptpage = 0;
644 }
645
646 ttp = (tt_entry_t *)phystokv(l2_tte & ARM_TTE_TABLE_MASK);
647
648 ptep = ttp + ((vaddr & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT);
649 cpte = *ptep;
650
651 /*
652 * If the existing PTE is not empty, then we are replacing a valid
653 * mapping.
654 */
655 if (cpte != ARM_PTE_EMPTY) {
656 panic("%s: cpte=%#llx is not empty, "
657 "vaddr=%#lx, pte=%#llx",
658 __FUNCTION__, cpte,
659 vaddr, pte);
660 }
661
662 *ptep = pte;
663}
664
665#endif // __ARM_KERNEL_PROTECT || XNU_MONITOR
666
667#if __ARM_KERNEL_PROTECT__
668
669/*
670 * arm_vm_kernel_el0_map:
671 * vaddr: The target virtual address
672 * pte: A page table entry value (may be ARM_PTE_EMPTY)
673 *
674 * This function installs pte at vaddr for the EL0 kernel mappings.
675 */
676static void
677arm_vm_kernel_el0_map(vm_offset_t vaddr, pt_entry_t pte)
678{
679 /* Calculate where vaddr will be in the EL1 kernel page tables. */
680 vm_offset_t kernel_pmap_vaddr = vaddr - ((ARM_TT_ROOT_INDEX_MASK + ARM_TT_ROOT_SIZE) / 2ULL);
681 arm_vm_map(cpu_tte, kernel_pmap_vaddr, pte);
682}
683
684/*
685 * arm_vm_kernel_el1_map:
686 * vaddr: The target virtual address
687 * pte: A page table entry value (may be ARM_PTE_EMPTY)
688 *
689 * This function installs pte at vaddr for the EL1 kernel mappings.
690 */
691static void
692arm_vm_kernel_el1_map(vm_offset_t vaddr, pt_entry_t pte)
693{
694 arm_vm_map(cpu_tte, vaddr, pte);
695}
696
697/*
698 * arm_vm_kernel_pte:
699 * vaddr: The target virtual address
700 *
701 * This function returns the PTE value for the given vaddr from the kernel page
702 * tables. If the region has been been block mapped, we return what an
703 * equivalent PTE value would be (as regards permissions and flags). We also
704 * remove the HINT bit (as we are not necessarily creating contiguous mappings.
705 */
706static pt_entry_t
707arm_vm_kernel_pte(vm_offset_t vaddr)
708{
709 tt_entry_t * ttp = cpu_tte;
710 tt_entry_t * ttep = NULL;
711 tt_entry_t tte = 0;
712 pt_entry_t * ptep = NULL;
713 pt_entry_t pte = 0;
714
715 ttep = ttp + ((vaddr & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
716 tte = *ttep;
717
718 assert(tte & ARM_TTE_VALID);
719
720 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
721 /* This is a block mapping; return the equivalent PTE value. */
722 pte = (pt_entry_t)(tte & ~ARM_TTE_TYPE_MASK);
723 pte |= ARM_PTE_TYPE_VALID;
724 pte |= vaddr & ((ARM_TT_L1_SIZE - 1) & ARM_PTE_PAGE_MASK);
725 pte &= ~ARM_PTE_HINT_MASK;
726 return pte;
727 }
728
729 ttp = (tt_entry_t *)phystokv(tte & ARM_TTE_TABLE_MASK);
730 ttep = ttp + ((vaddr & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
731 tte = *ttep;
732
733 assert(tte & ARM_TTE_VALID);
734
735 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
736 /* This is a block mapping; return the equivalent PTE value. */
737 pte = (pt_entry_t)(tte & ~ARM_TTE_TYPE_MASK);
738 pte |= ARM_PTE_TYPE_VALID;
739 pte |= vaddr & ((ARM_TT_L2_SIZE - 1) & ARM_PTE_PAGE_MASK);
740 pte &= ~ARM_PTE_HINT_MASK;
741 return pte;
742 }
743
744 ttp = (tt_entry_t *)phystokv(tte & ARM_TTE_TABLE_MASK);
745
746 ptep = ttp + ((vaddr & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT);
747 pte = *ptep;
748 pte &= ~ARM_PTE_HINT_MASK;
749 return pte;
750}
751
752/*
753 * arm_vm_prepare_kernel_el0_mappings:
754 * alloc_only: Indicates if PTE values should be copied from the EL1 kernel
755 * mappings.
756 *
757 * This function expands the kernel page tables to support the EL0 kernel
758 * mappings, and conditionally installs the PTE values for the EL0 kernel
759 * mappings (if alloc_only is false).
760 */
761static void
762arm_vm_prepare_kernel_el0_mappings(bool alloc_only)
763{
764 pt_entry_t pte = 0;
765 vm_offset_t start = ((vm_offset_t)&ExceptionVectorsBase) & ~PAGE_MASK;
766 vm_offset_t end = (((vm_offset_t)&ExceptionVectorsEnd) + PAGE_MASK) & ~PAGE_MASK;
767 vm_offset_t cur = 0;
768 vm_offset_t cur_fixed = 0;
769
770 /* Expand for/map the exceptions vectors in the EL0 kernel mappings. */
771 for (cur = start, cur_fixed = ARM_KERNEL_PROTECT_EXCEPTION_START; cur < end; cur += ARM_PGBYTES, cur_fixed += ARM_PGBYTES) {
772 /*
773 * We map the exception vectors at a different address than that
774 * of the kernelcache to avoid sharing page table pages with the
775 * kernelcache (as this may cause issues with TLB caching of
776 * page table pages.
777 */
778 if (!alloc_only) {
779 pte = arm_vm_kernel_pte(cur);
780 }
781
782 arm_vm_kernel_el1_map(cur_fixed, pte);
783 arm_vm_kernel_el0_map(cur_fixed, pte);
784 }
785
786 __builtin_arm_dmb(DMB_ISH);
787 __builtin_arm_isb(ISB_SY);
788
789 if (!alloc_only) {
790 /*
791 * If we have created the alternate exception vector mappings,
792 * the boot CPU may now switch over to them.
793 */
794 set_vbar_el1(ARM_KERNEL_PROTECT_EXCEPTION_START);
795 __builtin_arm_isb(ISB_SY);
796 }
797}
798
799/*
800 * arm_vm_populate_kernel_el0_mappings:
801 *
802 * This function adds all required mappings to the EL0 kernel mappings.
803 */
804static void
805arm_vm_populate_kernel_el0_mappings(void)
806{
807 arm_vm_prepare_kernel_el0_mappings(FALSE);
808}
809
810/*
811 * arm_vm_expand_kernel_el0_mappings:
812 *
813 * This function expands the kernel page tables to accomodate the EL0 kernel
814 * mappings.
815 */
816static void
817arm_vm_expand_kernel_el0_mappings(void)
818{
819 arm_vm_prepare_kernel_el0_mappings(TRUE);
820}
821#endif /* __ARM_KERNEL_PROTECT__ */
822
823#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
824extern void bootstrap_instructions;
825
826/*
827 * arm_replace_identity_map takes the V=P map that we construct in start.s
828 * and repurposes it in order to have it map only the page we need in order
829 * to turn on the MMU. This prevents us from running into issues where
830 * KTRR will cause us to fault on executable block mappings that cross the
831 * KTRR boundary.
832 */
833static void
834arm_replace_identity_map(void)
835{
836 vm_offset_t addr;
837 pmap_paddr_t paddr;
838
839 pmap_paddr_t l1_ptp_phys = 0;
840 tt_entry_t *l1_ptp_virt = NULL;
841 tt_entry_t *tte1 = NULL;
842 pmap_paddr_t l2_ptp_phys = 0;
843 tt_entry_t *l2_ptp_virt = NULL;
844 tt_entry_t *tte2 = NULL;
845 pmap_paddr_t l3_ptp_phys = 0;
846 pt_entry_t *l3_ptp_virt = NULL;
847 pt_entry_t *ptep = NULL;
848
849 addr = ((vm_offset_t)&bootstrap_instructions) & ~ARM_PGMASK;
850 paddr = kvtophys(addr);
851
852 /*
853 * Grab references to the V=P page tables, and allocate an L3 page.
854 */
855 l1_ptp_phys = kvtophys((vm_offset_t)&bootstrap_pagetables);
856 l1_ptp_virt = (tt_entry_t *)phystokv(l1_ptp_phys);
857 tte1 = &l1_ptp_virt[L1_TABLE_INDEX(paddr)];
858
859 l2_ptp_virt = L2_TABLE_VA(tte1);
860 l2_ptp_phys = (*tte1) & ARM_TTE_TABLE_MASK;
861 tte2 = &l2_ptp_virt[L2_TABLE_INDEX(paddr)];
862
863 l3_ptp_virt = (pt_entry_t *)alloc_ptpage(TRUE);
864 l3_ptp_phys = kvtophys((vm_offset_t)l3_ptp_virt);
865 ptep = &l3_ptp_virt[L3_TABLE_INDEX(paddr)];
866
867 /*
868 * Replace the large V=P mapping with a mapping that provides only the
869 * mappings needed to turn on the MMU.
870 */
871
872 bzero(l1_ptp_virt, ARM_PGBYTES);
873 *tte1 = ARM_TTE_BOOT_TABLE | (l2_ptp_phys & ARM_TTE_TABLE_MASK);
874
875 bzero(l2_ptp_virt, ARM_PGBYTES);
876 *tte2 = ARM_TTE_BOOT_TABLE | (l3_ptp_phys & ARM_TTE_TABLE_MASK);
877
878 *ptep = (paddr & ARM_PTE_MASK) |
879 ARM_PTE_TYPE_VALID |
880 ARM_PTE_SH(SH_OUTER_MEMORY) |
881 ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) |
882 ARM_PTE_AF |
883 ARM_PTE_AP(AP_RONA) |
884 ARM_PTE_NX;
885}
886#endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */
887
888tt_entry_t *arm_kva_to_tte(vm_offset_t);
889
890tt_entry_t *
891arm_kva_to_tte(vm_offset_t va)
892{
893 tt_entry_t *tte1, *tte2;
894 tte1 = cpu_tte + L1_TABLE_INDEX(va);
895 tte2 = L2_TABLE_VA(tte1) + L2_TABLE_INDEX(va);
896
897 return tte2;
898}
899
900#if XNU_MONITOR
901
902static inline pt_entry_t *
903arm_kva_to_pte(vm_offset_t va)
904{
905 tt_entry_t *tte2 = arm_kva_to_tte(va);
906 return L3_TABLE_VA(tte2) + L3_TABLE_INDEX(va);
907}
908
909#endif
910
911#define ARM64_GRANULE_ALLOW_BLOCK (1 << 0)
912#define ARM64_GRANULE_ALLOW_HINT (1 << 1)
913
914/**
915 * Updates a translation table entry (TTE) with the supplied value, unless doing so might render
916 * the pagetable region read-only before subsequent updates have finished. In that case, the TTE
917 * value will be saved off for deferred processing.
918 *
919 * @param ttep address of the TTE to update
920 * @param entry the value to store in ttep
921 * @param pa the base physical address mapped by the TTE
922 * @param ttebase L3-page- or L2-block-aligned base virtual address of the pagetable region
923 * @param granule mask indicating whether L2 block or L3 hint mappings are allowed for this segment
924 * @param deferred_ttep_pair 2-element array of addresses of deferred TTEs
925 * @param deferred_tte_pair 2-element array containing TTE values for deferred assignment to
926 * corresponding elements of deferred_ttep_pair
927 */
928static void
929update_or_defer_tte(tt_entry_t *ttep, tt_entry_t entry, pmap_paddr_t pa, vm_map_address_t ttebase,
930 unsigned granule __unused, tt_entry_t **deferred_ttep_pair, tt_entry_t *deferred_tte_pair)
931{
932 /*
933 * If we're trying to assign an entry that maps the current TTE region (identified by ttebase),
934 * and the pagetable is already live (indicated by kva_active), defer assignment of the current
935 * entry and possibly the entry after it until all other mappings in the segment have been
936 * updated. Otherwise we may end up immediately marking the pagetable region read-only
937 * leading to a fault later on a later assignment if we manage to outrun the TLB. This can
938 * happen on KTRR/CTRR-enabled devices when marking segDATACONST read-only, as the pagetables
939 * that map that segment must come from the segment itself. We therefore store the initial
940 * recursive TTE in deferred_ttep_pair[0] and its value in deferred_tte_pair[0]. We may also
941 * defer assignment of the TTE following that recursive TTE and store its value in
942 * deferred_tte_pair[1], because the TTE region following the current one may also contain
943 * pagetables and we must avoid marking that region read-only before updating those tables.
944 *
945 * We require that such recursive mappings must exist in regions that can be mapped with L2
946 * block entries if they are sufficiently large. This is what allows us to assume that no
947 * more than 2 deferred TTEs will be required, because:
948 * --If more than 2 adjacent L3 PTEs were required to map our pagetables, that would mean
949 * we would have at least one full L3 pagetable page and would instead use an L2 block.
950 * --If more than 2 adjacent L2 blocks were required to map our pagetables, that would
951 * mean we would have at least one full L2-block-sized region of TTEs and something
952 * is very wrong because no segment should be that large.
953 */
954 if ((deferred_ttep_pair != NULL) && (deferred_ttep_pair[0] != NULL) && (ttep == (deferred_ttep_pair[0] + 1))) {
955 assert(deferred_tte_pair[1] == 0);
956 deferred_ttep_pair[1] = ttep;
957 deferred_tte_pair[1] = entry;
958 } else if (kva_active && (phystokv(pa) == ttebase)) {
959 assert(deferred_ttep_pair != NULL);
960 assert(granule & ARM64_GRANULE_ALLOW_BLOCK);
961 if (deferred_ttep_pair[0] == NULL) {
962 deferred_ttep_pair[0] = ttep;
963 deferred_tte_pair[0] = entry;
964 } else {
965 assert(deferred_ttep_pair[1] == NULL);
966 deferred_ttep_pair[1] = ttep;
967 deferred_tte_pair[1] = entry;
968 }
969 } else {
970 *ttep = entry;
971 }
972}
973
974
975/*
976 * arm_vm_page_granular_helper updates protections at the L3 level. It will (if
977 * neccessary) allocate a page for the L3 table and update the corresponding L2
978 * entry. Then, it will iterate over the L3 table, updating protections as necessary.
979 * This expects to be invoked on a L2 entry or sub L2 entry granularity, so this should
980 * not be invoked from a context that does not do L2 iteration separately (basically,
981 * don't call this except from arm_vm_page_granular_prot).
982 *
983 * unsigned granule: 0 => force to page granule, or a combination of
984 * ARM64_GRANULE_* flags declared above.
985 */
986
987static void
988arm_vm_page_granular_helper(vm_offset_t start, vm_offset_t _end, vm_offset_t va, pmap_paddr_t pa_offset,
989 int pte_prot_APX, int pte_prot_XN, unsigned granule,
990 tt_entry_t **deferred_ttep_pair, tt_entry_t *deferred_tte_pair)
991{
992 if (va & ARM_TT_L2_OFFMASK) { /* ragged edge hanging over a ARM_TT_L2_SIZE boundary */
993 tt_entry_t *tte2;
994 tt_entry_t tmplate;
995 pmap_paddr_t pa;
996 pt_entry_t *ppte, ptmp;
997 addr64_t ppte_phys;
998 unsigned i;
999
1000 va &= ~ARM_TT_L2_OFFMASK;
1001 pa = va - gVirtBase + gPhysBase - pa_offset;
1002
1003 if (pa >= real_avail_end) {
1004 return;
1005 }
1006
1007 tte2 = arm_kva_to_tte(va);
1008
1009 assert(_end >= va);
1010 tmplate = *tte2;
1011
1012 if (ARM_TTE_TYPE_TABLE == (tmplate & ARM_TTE_TYPE_MASK)) {
1013 /* pick up the existing page table. */
1014 ppte = (pt_entry_t *)phystokv(pa: (tmplate & ARM_TTE_TABLE_MASK));
1015 } else {
1016 // TTE must be reincarnated with page level mappings.
1017
1018 // ... but we don't want to break up blocks on live
1019 // translation tables.
1020 assert(!kva_active);
1021
1022 ppte = (pt_entry_t*)alloc_ptpage(map_static: pa_offset == 0);
1023 bzero(s: ppte, ARM_PGBYTES);
1024 ppte_phys = kvtophys(va: (vm_offset_t)ppte);
1025
1026 *tte2 = pa_to_tte(ppte_phys) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
1027 }
1028
1029 vm_offset_t len = _end - va;
1030 if ((pa + len) > real_avail_end) {
1031 _end -= (pa + len - real_avail_end);
1032 }
1033 assert((start - gVirtBase + gPhysBase - pa_offset) >= gPhysBase);
1034
1035 /* Round up to the nearest PAGE_SIZE boundary when creating mappings:
1036 * PAGE_SIZE may be a multiple of ARM_PGBYTES, and we don't want to leave
1037 * a ragged non-PAGE_SIZE-aligned edge. */
1038 vm_offset_t rounded_end = round_page(x: _end);
1039 /* Apply the desired protections to the specified page range */
1040 for (i = 0; i <= (ARM_TT_L3_INDEX_MASK >> ARM_TT_L3_SHIFT); i++) {
1041 if ((start <= va) && (va < rounded_end)) {
1042 ptmp = pa | ARM_PTE_AF | ARM_PTE_SH(SH_OUTER_MEMORY) | ARM_PTE_TYPE;
1043 ptmp = ptmp | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
1044 ptmp = ptmp | ARM_PTE_AP(pte_prot_APX);
1045 ptmp = ptmp | ARM_PTE_NX;
1046#if __ARM_KERNEL_PROTECT__
1047 ptmp = ptmp | ARM_PTE_NG;
1048#endif /* __ARM_KERNEL_PROTECT__ */
1049
1050 if (pte_prot_XN) {
1051 ptmp = ptmp | ARM_PTE_PNX;
1052 }
1053
1054 /*
1055 * If we can, apply the contiguous hint to this range. The hint is
1056 * applicable if the current address falls within a hint-sized range that will
1057 * be fully covered by this mapping request.
1058 */
1059 if ((va >= round_up_pte_hint_address(address: start)) && (round_up_pte_hint_address(address: va + 1) <= _end) &&
1060 (granule & ARM64_GRANULE_ALLOW_HINT) && use_contiguous_hint) {
1061 assert((va & ((1 << ARM_PTE_HINT_ADDR_SHIFT) - 1)) == ((pa & ((1 << ARM_PTE_HINT_ADDR_SHIFT) - 1))));
1062 ptmp |= ARM_PTE_HINT;
1063 /* Do not attempt to reapply the hint bit to an already-active mapping.
1064 * This very likely means we're attempting to change attributes on an already-active mapping,
1065 * which violates the requirement of the hint bit.*/
1066 assert(!kva_active || (ppte[i] == ARM_PTE_TYPE_FAULT));
1067 }
1068 /*
1069 * Do not change the contiguous bit on an active mapping. Even in a single-threaded
1070 * environment, it's possible for prefetch to produce a TLB conflict by trying to pull in
1071 * a hint-sized entry on top of one or more existing page-sized entries. It's also useful
1072 * to make sure we're not trying to unhint a sub-range of a larger hinted range, which
1073 * could produce a later TLB conflict.
1074 */
1075 assert(!kva_active || (ppte[i] == ARM_PTE_TYPE_FAULT) || ((ppte[i] & ARM_PTE_HINT) == (ptmp & ARM_PTE_HINT)));
1076
1077 update_or_defer_tte(ttep: &ppte[i], entry: ptmp, pa, ttebase: (vm_map_address_t)ppte, granule, deferred_ttep_pair, deferred_tte_pair);
1078 }
1079
1080 va += ARM_PGBYTES;
1081 pa += ARM_PGBYTES;
1082 }
1083 }
1084}
1085
1086/*
1087 * arm_vm_page_granular_prot updates protections by iterating over the L2 entries and
1088 * changing them. If a particular chunk necessitates L3 entries (for reasons of
1089 * alignment or length, or an explicit request that the entry be fully expanded), we
1090 * hand off to arm_vm_page_granular_helper to deal with the L3 chunk of the logic.
1091 */
1092static void
1093arm_vm_page_granular_prot(vm_offset_t start, unsigned long size, pmap_paddr_t pa_offset,
1094 int tte_prot_XN, int pte_prot_APX, int pte_prot_XN,
1095 unsigned granule)
1096{
1097 tt_entry_t *deferred_ttep_pair[2] = {NULL};
1098 tt_entry_t deferred_tte_pair[2] = {0};
1099 vm_offset_t _end = start + size;
1100 vm_offset_t align_start = (start + ARM_TT_L2_OFFMASK) & ~ARM_TT_L2_OFFMASK;
1101
1102 if (size == 0x0UL) {
1103 return;
1104 }
1105
1106 if (align_start > _end) {
1107 align_start = _end;
1108 }
1109
1110 arm_vm_page_granular_helper(start, end: align_start, va: start, pa_offset, pte_prot_APX, pte_prot_XN, granule, deferred_ttep_pair, deferred_tte_pair);
1111
1112 while ((_end - align_start) >= ARM_TT_L2_SIZE) {
1113 if (!(granule & ARM64_GRANULE_ALLOW_BLOCK)) {
1114 arm_vm_page_granular_helper(start: align_start, end: align_start + ARM_TT_L2_SIZE, va: align_start + 1, pa_offset,
1115 pte_prot_APX, pte_prot_XN, granule, deferred_ttep_pair, deferred_tte_pair);
1116 } else {
1117 pmap_paddr_t pa = align_start - gVirtBase + gPhysBase - pa_offset;
1118 assert((pa & ARM_TT_L2_OFFMASK) == 0);
1119 tt_entry_t *tte2;
1120 tt_entry_t tmplate;
1121
1122 tte2 = arm_kva_to_tte(va: align_start);
1123
1124 if ((pa >= gPhysBase) && (pa < real_avail_end)) {
1125 tmplate = (pa & ARM_TTE_BLOCK_L2_MASK) | ARM_TTE_TYPE_BLOCK
1126 | ARM_TTE_VALID | ARM_TTE_BLOCK_AF | ARM_TTE_BLOCK_NX
1127 | ARM_TTE_BLOCK_AP(pte_prot_APX) | ARM_TTE_BLOCK_SH(SH_OUTER_MEMORY)
1128 | ARM_TTE_BLOCK_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
1129
1130#if __ARM_KERNEL_PROTECT__
1131 tmplate = tmplate | ARM_TTE_BLOCK_NG;
1132#endif /* __ARM_KERNEL_PROTECT__ */
1133 if (tte_prot_XN) {
1134 tmplate = tmplate | ARM_TTE_BLOCK_PNX;
1135 }
1136
1137 update_or_defer_tte(ttep: tte2, entry: tmplate, pa, ttebase: (vm_map_address_t)tte2 & ~ARM_TT_L2_OFFMASK,
1138 granule, deferred_ttep_pair, deferred_tte_pair);
1139 }
1140 }
1141 align_start += ARM_TT_L2_SIZE;
1142 }
1143
1144 if (align_start < _end) {
1145 arm_vm_page_granular_helper(start: align_start, _end, va: _end, pa_offset, pte_prot_APX, pte_prot_XN, granule, deferred_ttep_pair, deferred_tte_pair);
1146 }
1147
1148 if (deferred_ttep_pair[0] != NULL) {
1149#if DEBUG || DEVELOPMENT
1150 /*
1151 * Flush the TLB to catch bugs that might cause us to prematurely revoke write access from the pagetable page.
1152 * These bugs may otherwise be hidden by TLB entries in most cases, resulting in very rare panics.
1153 * Note that we always flush the TLB at the end of arm_vm_prot_finalize().
1154 */
1155 flush_mmu_tlb();
1156#endif
1157 /*
1158 * The first TTE in the pair is a recursive mapping of the pagetable region, so we must update it last
1159 * to avoid potentially marking deferred_pte_pair[1] read-only.
1160 */
1161 if (deferred_tte_pair[1] != 0) {
1162 os_atomic_store(deferred_ttep_pair[1], deferred_tte_pair[1], release);
1163 }
1164 os_atomic_store(deferred_ttep_pair[0], deferred_tte_pair[0], release);
1165 }
1166}
1167
1168static inline void
1169arm_vm_page_granular_RNX(vm_offset_t start, unsigned long size, unsigned granule)
1170{
1171 arm_vm_page_granular_prot(start, size, pa_offset: 0, tte_prot_XN: 1, AP_RONA, pte_prot_XN: 1, granule);
1172}
1173
1174static inline void
1175arm_vm_page_granular_ROX(vm_offset_t start, unsigned long size, unsigned granule)
1176{
1177 arm_vm_page_granular_prot(start, size, pa_offset: 0, tte_prot_XN: 0, AP_RONA, pte_prot_XN: 0, granule);
1178}
1179
1180static inline void
1181arm_vm_page_granular_RWNX(vm_offset_t start, unsigned long size, unsigned granule)
1182{
1183 arm_vm_page_granular_prot(start, size, pa_offset: 0, tte_prot_XN: 1, AP_RWNA, pte_prot_XN: 1, granule);
1184}
1185
1186// Populate seg...AuxKC and fixup AuxKC permissions
1187static bool
1188arm_vm_auxkc_init(void)
1189{
1190 if (auxkc_mh == 0 || auxkc_base == 0) {
1191 return false; // no auxKC.
1192 }
1193
1194 /* Fixup AuxKC and populate seg*AuxKC globals used below */
1195 arm_auxkc_init(mh: (void*)auxkc_mh, base: (void*)auxkc_base);
1196
1197 if (segLOWESTAuxKC != segLOWEST) {
1198 panic("segLOWESTAuxKC (%p) not equal to segLOWEST (%p). auxkc_mh: %p, auxkc_base: %p",
1199 (void*)segLOWESTAuxKC, (void*)segLOWEST,
1200 (void*)auxkc_mh, (void*)auxkc_base);
1201 }
1202
1203 /*
1204 * The AuxKC LINKEDIT segment needs to be covered by the RO region but is excluded
1205 * from the RO address range returned by kernel_collection_adjust_mh_addrs().
1206 * Ensure the highest non-LINKEDIT address in the AuxKC is the current end of
1207 * its RO region before extending it.
1208 */
1209 assert(segHIGHESTROAuxKC == segHIGHESTNLEAuxKC);
1210 assert(segHIGHESTAuxKC >= segHIGHESTROAuxKC);
1211 if (segHIGHESTAuxKC > segHIGHESTROAuxKC) {
1212 segHIGHESTROAuxKC = segHIGHESTAuxKC;
1213 }
1214
1215 /*
1216 * The AuxKC RO region must be right below the device tree/trustcache so that it can be covered
1217 * by CTRR, and the AuxKC RX region must be within the RO region.
1218 */
1219 assert(segHIGHESTROAuxKC == auxkc_right_above);
1220 assert(segHIGHESTRXAuxKC <= segHIGHESTROAuxKC);
1221 assert(segLOWESTRXAuxKC <= segHIGHESTRXAuxKC);
1222 assert(segLOWESTROAuxKC <= segLOWESTRXAuxKC);
1223 assert(segLOWESTAuxKC <= segLOWESTROAuxKC);
1224
1225 if (segHIGHESTRXAuxKC < segLOWEST) {
1226 arm_vm_page_granular_RNX(start: segHIGHESTRXAuxKC, size: segLOWEST - segHIGHESTRXAuxKC, granule: 0);
1227 }
1228 if (segLOWESTRXAuxKC < segHIGHESTRXAuxKC) {
1229 arm_vm_page_granular_ROX(start: segLOWESTRXAuxKC, size: segHIGHESTRXAuxKC - segLOWESTRXAuxKC, granule: 0); // Refined in OSKext::readPrelinkedExtensions
1230 }
1231 if (segLOWESTROAuxKC < segLOWESTRXAuxKC) {
1232 arm_vm_page_granular_RNX(start: segLOWESTROAuxKC, size: segLOWESTRXAuxKC - segLOWESTROAuxKC, granule: 0);
1233 }
1234 if (segLOWESTAuxKC < segLOWESTROAuxKC) {
1235 arm_vm_page_granular_RWNX(start: segLOWESTAuxKC, size: segLOWESTROAuxKC - segLOWESTAuxKC, granule: 0);
1236 }
1237
1238 return true;
1239}
1240
1241void
1242arm_vm_prot_init(__unused boot_args * args)
1243{
1244 segLOWESTTEXT = UINT64_MAX;
1245 if (segSizePRELINKTEXT && (segPRELINKTEXTB < segLOWESTTEXT)) {
1246 segLOWESTTEXT = segPRELINKTEXTB;
1247 }
1248 assert(segSizeTEXT);
1249 if (segTEXTB < segLOWESTTEXT) {
1250 segLOWESTTEXT = segTEXTB;
1251 }
1252 assert(segLOWESTTEXT < UINT64_MAX);
1253
1254 segEXTRADATA = 0;
1255 segSizeEXTRADATA = 0;
1256 segTRUSTCACHE = 0;
1257 segSizeTRUSTCACHE = 0;
1258
1259 segLOWEST = segLOWESTTEXT;
1260 segLOWESTRO = segLOWESTTEXT;
1261
1262 if (segLOWESTKC && segLOWESTKC < segLOWEST) {
1263 /*
1264 * kernel collections have segments below the kernel. In particular the collection mach header
1265 * is below PRELINK_TEXT and is not covered by any other segments already tracked.
1266 */
1267 arm_vm_page_granular_RNX(start: segLOWESTKC, size: segLOWEST - segLOWESTKC, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1268 segLOWEST = segLOWESTKC;
1269 if (segLOWESTROKC && segLOWESTROKC < segLOWESTRO) {
1270 segLOWESTRO = segLOWESTROKC;
1271 }
1272 if (segHIGHESTROKC && segHIGHESTROKC > segHIGHESTRO) {
1273 segHIGHESTRO = segHIGHESTROKC;
1274 }
1275 }
1276
1277 DTEntry memory_map;
1278 int err;
1279
1280 // Device Tree portion of EXTRADATA
1281 if (SecureDTIsLockedDown()) {
1282 segEXTRADATA = (vm_offset_t)PE_state.deviceTreeHead;
1283 segSizeEXTRADATA = PE_state.deviceTreeSize;
1284 }
1285
1286 // Trust Caches portion of EXTRADATA
1287 {
1288 DTMemoryMapRange const *trustCacheRange;
1289 unsigned int trustCacheRangeSize;
1290
1291 err = SecureDTLookupEntry(NULL, pathName: "chosen/memory-map", foundEntry: &memory_map);
1292 assert(err == kSuccess);
1293
1294 err = SecureDTGetProperty(entry: memory_map, propertyName: "TrustCache", propertyValue: (void const **)&trustCacheRange, propertySize: &trustCacheRangeSize);
1295 if (err == kSuccess) {
1296 if (trustCacheRangeSize != sizeof(DTMemoryMapRange)) {
1297 panic("Unexpected /chosen/memory-map/TrustCache property size %u != %zu", trustCacheRangeSize, sizeof(DTMemoryMapRange));
1298 }
1299
1300 vm_offset_t const trustCacheRegion = phystokv(pa: trustCacheRange->paddr);
1301 if (trustCacheRegion < segLOWEST) {
1302 if (segEXTRADATA != 0) {
1303 if (trustCacheRegion != segEXTRADATA + segSizeEXTRADATA) {
1304 panic("Unexpected location of TrustCache region: %#lx != %#lx",
1305 trustCacheRegion, segEXTRADATA + segSizeEXTRADATA);
1306 }
1307 segSizeEXTRADATA += trustCacheRange->length;
1308 } else {
1309 // Not all devices support CTRR device trees.
1310 segEXTRADATA = trustCacheRegion;
1311 segSizeEXTRADATA = trustCacheRange->length;
1312 }
1313 }
1314#if !(DEVELOPMENT || DEBUG)
1315 else {
1316 panic("TrustCache region is in an unexpected place: %#lx > %#lx", trustCacheRegion, segLOWEST);
1317 }
1318#endif
1319 segTRUSTCACHE = trustCacheRegion;
1320 segSizeTRUSTCACHE = trustCacheRange->length;
1321 }
1322 }
1323
1324 if (segSizeEXTRADATA != 0) {
1325 if (segEXTRADATA <= segLOWEST) {
1326 segLOWEST = segEXTRADATA;
1327 if (segEXTRADATA <= segLOWESTRO) {
1328 segLOWESTRO = segEXTRADATA;
1329 }
1330 } else {
1331 panic("EXTRADATA is in an unexpected place: %#lx > %#lx", segEXTRADATA, segLOWEST);
1332 }
1333
1334 arm_vm_page_granular_RNX(start: segEXTRADATA, size: segSizeEXTRADATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1335 }
1336
1337 const DTMemoryMapRange *auxKC_range, *auxKC_header_range;
1338 unsigned int auxKC_range_size, auxKC_header_range_size;
1339
1340 err = SecureDTGetProperty(entry: memory_map, propertyName: "AuxKC", propertyValue: (const void**)&auxKC_range,
1341 propertySize: &auxKC_range_size);
1342 if (err != kSuccess) {
1343 goto noAuxKC;
1344 }
1345 assert(auxKC_range_size == sizeof(DTMemoryMapRange));
1346 err = SecureDTGetProperty(entry: memory_map, propertyName: "AuxKC-mach_header",
1347 propertyValue: (const void**)&auxKC_header_range, propertySize: &auxKC_header_range_size);
1348 if (err != kSuccess) {
1349 goto noAuxKC;
1350 }
1351 assert(auxKC_header_range_size == sizeof(DTMemoryMapRange));
1352
1353 if (auxKC_header_range->paddr == 0 || auxKC_range->paddr == 0) {
1354 goto noAuxKC;
1355 }
1356
1357 auxkc_mh = phystokv(pa: auxKC_header_range->paddr);
1358 auxkc_base = phystokv(pa: auxKC_range->paddr);
1359
1360 if (auxkc_base < segLOWEST) {
1361 auxkc_right_above = segLOWEST;
1362 segLOWEST = auxkc_base;
1363 } else {
1364 panic("auxkc_base (%p) not below segLOWEST (%p)", (void*)auxkc_base, (void*)segLOWEST);
1365 }
1366
1367 /* Map AuxKC RWNX initially so that arm_vm_auxkc_init can traverse
1368 * it and apply fixups (after we're off the bootstrap translation
1369 * tables).
1370 */
1371 arm_vm_page_granular_RWNX(start: auxkc_base, size: auxKC_range->length, granule: 0);
1372
1373noAuxKC:
1374 /* Map coalesced kext TEXT segment RWNX for now */
1375 arm_vm_page_granular_RWNX(start: segPRELINKTEXTB, size: segSizePRELINKTEXT, ARM64_GRANULE_ALLOW_BLOCK); // Refined in OSKext::readPrelinkedExtensions
1376
1377 /* Map coalesced kext DATA_CONST segment RWNX (could be empty) */
1378 arm_vm_page_granular_RWNX(start: segPLKDATACONSTB, size: segSizePLKDATACONST, ARM64_GRANULE_ALLOW_BLOCK); // Refined in OSKext::readPrelinkedExtensions
1379
1380 /* Map coalesced kext TEXT_EXEC segment RX (could be empty) */
1381 arm_vm_page_granular_ROX(start: segPLKTEXTEXECB, size: segSizePLKTEXTEXEC, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Refined in OSKext::readPrelinkedExtensions
1382
1383 /* if new segments not present, set space between PRELINK_TEXT and xnu TEXT to RWNX
1384 * otherwise we no longer expect any space between the coalesced kext read only segments and xnu rosegments
1385 */
1386 if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC) {
1387 if (segSizePRELINKTEXT) {
1388 arm_vm_page_granular_RWNX(start: segPRELINKTEXTB + segSizePRELINKTEXT, size: segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT),
1389 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1390 }
1391 } else {
1392 /*
1393 * If we have the new segments, we should still protect the gap between kext
1394 * read-only pages and kernel read-only pages, in the event that this gap
1395 * exists.
1396 */
1397 if ((segPLKDATACONSTB + segSizePLKDATACONST) < segTEXTB) {
1398 arm_vm_page_granular_RWNX(start: segPLKDATACONSTB + segSizePLKDATACONST, size: segTEXTB - (segPLKDATACONSTB + segSizePLKDATACONST),
1399 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1400 }
1401 }
1402
1403 /*
1404 * Protection on kernel text is loose here to allow shenanigans early on. These
1405 * protections are tightened in arm_vm_prot_finalize(). This is necessary because
1406 * we currently patch LowResetVectorBase in cpu.c.
1407 *
1408 * TEXT segment contains mach headers and other non-executable data. This will become RONX later.
1409 */
1410 arm_vm_page_granular_RNX(start: segTEXTB, size: segSizeTEXT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1411
1412 /* Can DATACONST start out and stay RNX?
1413 * NO, stuff in this segment gets modified during startup (viz. mac_policy_init()/mac_policy_list)
1414 * Make RNX in prot_finalize
1415 */
1416 arm_vm_page_granular_RWNX(start: segDATACONSTB, size: segSizeDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1417
1418 arm_vm_page_granular_ROX(start: segTEXTEXECB, size: segSizeTEXTEXEC, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1419
1420#if XNU_MONITOR
1421 arm_vm_page_granular_ROX(segPPLTEXTB, segSizePPLTEXT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1422 arm_vm_page_granular_ROX(segPPLTRAMPB, segSizePPLTRAMP, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1423 arm_vm_page_granular_RNX(segPPLDATACONSTB, segSizePPLDATACONST, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1424#endif
1425
1426 /* DATA segment will remain RWNX */
1427 arm_vm_page_granular_RWNX(start: segDATAB, size: segSizeDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1428#if XNU_MONITOR
1429 arm_vm_page_granular_RWNX(segPPLDATAB, segSizePPLDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1430#endif
1431
1432 arm_vm_page_granular_RWNX(start: segHIBDATAB, size: segSizeHIBDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1433
1434 arm_vm_page_granular_RWNX(start: segBOOTDATAB, size: segSizeBOOTDATA, granule: 0);
1435 arm_vm_page_granular_RNX(start: (vm_offset_t)&intstack_low_guard, PAGE_MAX_SIZE, granule: 0);
1436 arm_vm_page_granular_RNX(start: (vm_offset_t)&intstack_high_guard, PAGE_MAX_SIZE, granule: 0);
1437 arm_vm_page_granular_RNX(start: (vm_offset_t)&excepstack_high_guard, PAGE_MAX_SIZE, granule: 0);
1438
1439 arm_vm_page_granular_ROX(start: segKLDB, size: segSizeKLD, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1440 arm_vm_page_granular_RNX(start: segKLDDATAB, size: segSizeKLDDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1441 arm_vm_page_granular_RWNX(start: segLINKB, size: segSizeLINK, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1442 arm_vm_page_granular_RWNX(start: segPLKLINKEDITB, size: segSizePLKLINKEDIT, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Coalesced kext LINKEDIT segment
1443 arm_vm_page_granular_ROX(start: segLASTB, size: segSizeLAST, ARM64_GRANULE_ALLOW_BLOCK); // __LAST may be empty, but we cannot assume this
1444 if (segLASTDATACONSTB) {
1445 arm_vm_page_granular_RWNX(start: segLASTDATACONSTB, size: segSizeLASTDATACONST, ARM64_GRANULE_ALLOW_BLOCK); // __LASTDATA_CONST may be empty, but we cannot assume this
1446 }
1447 arm_vm_page_granular_RWNX(start: segPRELINKDATAB, size: segSizePRELINKDATA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // Prelink __DATA for kexts (RW data)
1448
1449 if (segSizePLKLLVMCOV > 0) {
1450 arm_vm_page_granular_RWNX(start: segPLKLLVMCOVB, size: segSizePLKLLVMCOV, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // LLVM code coverage data
1451 }
1452 arm_vm_page_granular_RWNX(start: segPRELINKINFOB, size: segSizePRELINKINFO, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); /* PreLinkInfoDictionary */
1453
1454 /* Record the bounds of the kernelcache. */
1455 vm_kernelcache_base = segLOWEST;
1456 vm_kernelcache_top = end_kern;
1457}
1458
1459/*
1460 * return < 0 for a < b
1461 * 0 for a == b
1462 * > 0 for a > b
1463 */
1464typedef int (*cmpfunc_t)(const void *a, const void *b);
1465
1466extern void
1467qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
1468
1469static int
1470cmp_ptov_entries(const void *a, const void *b)
1471{
1472 const ptov_table_entry *entry_a = a;
1473 const ptov_table_entry *entry_b = b;
1474 // Sort in descending order of segment length
1475 if (entry_a->len < entry_b->len) {
1476 return 1;
1477 } else if (entry_a->len > entry_b->len) {
1478 return -1;
1479 } else {
1480 return 0;
1481 }
1482}
1483
1484SECURITY_READ_ONLY_LATE(static unsigned int) ptov_index = 0;
1485
1486#define ROUND_L1(addr) (((addr) + ARM_TT_L1_OFFMASK) & ~(ARM_TT_L1_OFFMASK))
1487#define ROUND_TWIG(addr) (((addr) + ARM_TT_TWIG_OFFMASK) & ~(ARM_TT_TWIG_OFFMASK))
1488
1489static void
1490arm_vm_physmap_slide(ptov_table_entry *temp_ptov_table, vm_map_address_t orig_va, vm_size_t len, int pte_prot_APX, unsigned granule)
1491{
1492 pmap_paddr_t pa_offset;
1493
1494 if (__improbable(ptov_index >= PTOV_TABLE_SIZE)) {
1495 panic("%s: PTOV table limit exceeded; segment va = 0x%llx, size = 0x%llx", __func__,
1496 (unsigned long long)orig_va, (unsigned long long)len);
1497 }
1498 assert((orig_va & ARM_PGMASK) == 0);
1499 temp_ptov_table[ptov_index].pa = orig_va - gVirtBase + gPhysBase;
1500 if (ptov_index == 0) {
1501 temp_ptov_table[ptov_index].va = physmap_base;
1502 } else {
1503 temp_ptov_table[ptov_index].va = temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len;
1504 }
1505 if (granule & ARM64_GRANULE_ALLOW_BLOCK) {
1506 vm_map_address_t orig_offset = temp_ptov_table[ptov_index].pa & ARM_TT_TWIG_OFFMASK;
1507 vm_map_address_t new_offset = temp_ptov_table[ptov_index].va & ARM_TT_TWIG_OFFMASK;
1508 if (new_offset < orig_offset) {
1509 temp_ptov_table[ptov_index].va += (orig_offset - new_offset);
1510 } else if (new_offset > orig_offset) {
1511 temp_ptov_table[ptov_index].va = ROUND_TWIG(temp_ptov_table[ptov_index].va) + orig_offset;
1512 }
1513 }
1514 assert((temp_ptov_table[ptov_index].va & ARM_PGMASK) == 0);
1515 temp_ptov_table[ptov_index].len = round_page(x: len);
1516 pa_offset = temp_ptov_table[ptov_index].va - orig_va;
1517 arm_vm_page_granular_prot(start: temp_ptov_table[ptov_index].va, size: temp_ptov_table[ptov_index].len, pa_offset, tte_prot_XN: 1, pte_prot_APX, pte_prot_XN: 1, granule);
1518 ++ptov_index;
1519}
1520
1521#if XNU_MONITOR
1522
1523SECURITY_READ_ONLY_LATE(static boolean_t) keep_linkedit = FALSE;
1524
1525static void
1526arm_vm_physmap_init(boot_args *args)
1527{
1528 ptov_table_entry temp_ptov_table[PTOV_TABLE_SIZE];
1529 bzero(temp_ptov_table, sizeof(temp_ptov_table));
1530
1531 // This is memory that will either be handed back to the VM layer via ml_static_mfree(),
1532 // or will be available for general-purpose use. Physical aperture mappings for this memory
1533 // must be at page granularity, so that PPL ownership or cache attribute changes can be reflected
1534 // in the physical aperture mappings.
1535
1536 // Slid region between gPhysBase and beginning of protected text
1537 arm_vm_physmap_slide(temp_ptov_table, gVirtBase, segLOWEST - gVirtBase, AP_RWNA, 0);
1538
1539 // kext bootstrap segments
1540#if !defined(KERNEL_INTEGRITY_KTRR) && !defined(KERNEL_INTEGRITY_CTRR)
1541 /* __KLD,__text is covered by the rorgn */
1542 arm_vm_physmap_slide(temp_ptov_table, segKLDB, segSizeKLD, AP_RONA, 0);
1543#endif
1544 arm_vm_physmap_slide(temp_ptov_table, segKLDDATAB, segSizeKLDDATA, AP_RONA, 0);
1545
1546 // Early-boot data
1547 arm_vm_physmap_slide(temp_ptov_table, segBOOTDATAB, segSizeBOOTDATA, AP_RONA, 0);
1548
1549 PE_parse_boot_argn("keepsyms", &keep_linkedit, sizeof(keep_linkedit));
1550#if CONFIG_DTRACE
1551 if (dtrace_keep_kernel_symbols()) {
1552 keep_linkedit = TRUE;
1553 }
1554#endif /* CONFIG_DTRACE */
1555#if KASAN_DYNAMIC_BLACKLIST
1556 /* KASAN's dynamic blacklist needs to query the LINKEDIT segment at runtime. As such, the
1557 * kext bootstrap code will not jettison LINKEDIT on kasan kernels, so don't bother to relocate it. */
1558 keep_linkedit = TRUE;
1559#endif
1560 if (!keep_linkedit) {
1561 // Kernel LINKEDIT
1562 arm_vm_physmap_slide(temp_ptov_table, segLINKB, segSizeLINK, AP_RWNA, 0);
1563
1564 if (segSizePLKLINKEDIT) {
1565 // Prelinked kernel LINKEDIT
1566 arm_vm_physmap_slide(temp_ptov_table, segPLKLINKEDITB, segSizePLKLINKEDIT, AP_RWNA, 0);
1567 }
1568 }
1569
1570 // Prelinked kernel plists
1571 arm_vm_physmap_slide(temp_ptov_table, segPRELINKINFOB, segSizePRELINKINFO, AP_RWNA, 0);
1572
1573 // Device tree (if not locked down), ramdisk, boot args
1574 arm_vm_physmap_slide(temp_ptov_table, end_kern, (args->topOfKernelData - gPhysBase + gVirtBase) - end_kern, AP_RWNA, 0);
1575 if (!SecureDTIsLockedDown()) {
1576 PE_slide_devicetree(temp_ptov_table[ptov_index - 1].va - end_kern);
1577 }
1578
1579 // Remainder of physical memory
1580 arm_vm_physmap_slide(temp_ptov_table, (args->topOfKernelData - gPhysBase + gVirtBase),
1581 real_avail_end - args->topOfKernelData, AP_RWNA, 0);
1582
1583 assert((temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len) <= physmap_end);
1584
1585 // Sort in descending order of segment length. LUT traversal is linear, so largest (most likely used)
1586 // segments should be placed earliest in the table to optimize lookup performance.
1587 qsort(temp_ptov_table, PTOV_TABLE_SIZE, sizeof(temp_ptov_table[0]), cmp_ptov_entries);
1588
1589 memcpy(ptov_table, temp_ptov_table, sizeof(ptov_table));
1590}
1591
1592#else
1593
1594static void
1595arm_vm_physmap_init(boot_args *args)
1596{
1597 ptov_table_entry temp_ptov_table[PTOV_TABLE_SIZE];
1598 bzero(s: temp_ptov_table, n: sizeof(temp_ptov_table));
1599
1600 // Will be handed back to VM layer through ml_static_mfree() in arm_vm_prot_finalize()
1601 arm_vm_physmap_slide(temp_ptov_table, orig_va: gVirtBase, len: segLOWEST - gVirtBase, AP_RWNA,
1602 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT);
1603
1604 arm_vm_page_granular_RWNX(start: end_kern, size: phystokv(pa: args->topOfKernelData) - end_kern,
1605 ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); /* Device Tree (if not locked down), RAM Disk (if present), bootArgs */
1606
1607 arm_vm_physmap_slide(temp_ptov_table, orig_va: (args->topOfKernelData - gPhysBase + gVirtBase),
1608 len: real_avail_end - args->topOfKernelData, AP_RWNA, ARM64_GRANULE_ALLOW_BLOCK | ARM64_GRANULE_ALLOW_HINT); // rest of physmem
1609
1610 assert((temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len) <= physmap_end);
1611
1612 // Sort in descending order of segment length. LUT traversal is linear, so largest (most likely used)
1613 // segments should be placed earliest in the table to optimize lookup performance.
1614 qsort(a: temp_ptov_table, PTOV_TABLE_SIZE, es: sizeof(temp_ptov_table[0]), cmp: cmp_ptov_entries);
1615
1616 memcpy(dst: ptov_table, src: temp_ptov_table, n: sizeof(ptov_table));
1617}
1618
1619#endif // XNU_MONITOR
1620
1621void
1622arm_vm_prot_finalize(boot_args * args __unused)
1623{
1624 /*
1625 * At this point, we are far enough along in the boot process that it will be
1626 * safe to free up all of the memory preceeding the kernel. It may in fact
1627 * be safe to do this earlier.
1628 *
1629 * This keeps the memory in the V-to-P mapping, but advertises it to the VM
1630 * as usable.
1631 */
1632
1633 /*
1634 * if old style PRELINK segment exists, free memory before it, and after it before XNU text
1635 * otherwise we're dealing with a new style kernel cache, so we should just free the
1636 * memory before PRELINK_TEXT segment, since the rest of the KEXT read only data segments
1637 * should be immediately followed by XNU's TEXT segment
1638 */
1639
1640 ml_static_mfree(phystokv(pa: gPhysBase), segLOWEST - gVirtBase);
1641
1642 /*
1643 * KTRR support means we will be mucking with these pages and trying to
1644 * protect them; we cannot free the pages to the VM if we do this.
1645 */
1646 if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC && segSizePRELINKTEXT) {
1647 /* If new segments not present, PRELINK_TEXT is not dynamically sized, free DRAM between it and xnu TEXT */
1648 ml_static_mfree(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT));
1649 }
1650
1651 /* tighten permissions on kext read only data and code */
1652 arm_vm_page_granular_RNX(start: segPRELINKTEXTB, size: segSizePRELINKTEXT, ARM64_GRANULE_ALLOW_BLOCK);
1653 arm_vm_page_granular_RNX(start: segPLKDATACONSTB, size: segSizePLKDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1654
1655 cpu_stack_alloc(&BootCpuData);
1656 arm64_replace_bootstack(&BootCpuData);
1657 ml_static_mfree(phystokv(pa: segBOOTDATAB - gVirtBase + gPhysBase), segSizeBOOTDATA);
1658
1659#if __ARM_KERNEL_PROTECT__
1660 arm_vm_populate_kernel_el0_mappings();
1661#endif /* __ARM_KERNEL_PROTECT__ */
1662
1663#if XNU_MONITOR
1664#if !defined(KERNEL_INTEGRITY_KTRR) && !defined(KERNEL_INTEGRITY_CTRR)
1665 /* __KLD,__text is covered by the rorgn */
1666 for (vm_offset_t va = segKLDB; va < (segKLDB + segSizeKLD); va += ARM_PGBYTES) {
1667 pt_entry_t *pte = arm_kva_to_pte(va);
1668 *pte = ARM_PTE_EMPTY;
1669 }
1670#endif
1671 for (vm_offset_t va = segKLDDATAB; va < (segKLDDATAB + segSizeKLDDATA); va += ARM_PGBYTES) {
1672 pt_entry_t *pte = arm_kva_to_pte(va);
1673 *pte = ARM_PTE_EMPTY;
1674 }
1675 /* Clear the original stack mappings; these pages should be mapped through ptov_table. */
1676 for (vm_offset_t va = segBOOTDATAB; va < (segBOOTDATAB + segSizeBOOTDATA); va += ARM_PGBYTES) {
1677 pt_entry_t *pte = arm_kva_to_pte(va);
1678 *pte = ARM_PTE_EMPTY;
1679 }
1680 /* Clear the original PRELINKINFO mapping. This segment should be jettisoned during I/O Kit
1681 * initialization before we reach this point. */
1682 for (vm_offset_t va = segPRELINKINFOB; va < (segPRELINKINFOB + segSizePRELINKINFO); va += ARM_PGBYTES) {
1683 pt_entry_t *pte = arm_kva_to_pte(va);
1684 *pte = ARM_PTE_EMPTY;
1685 }
1686 if (!keep_linkedit) {
1687 for (vm_offset_t va = segLINKB; va < (segLINKB + segSizeLINK); va += ARM_PGBYTES) {
1688 pt_entry_t *pte = arm_kva_to_pte(va);
1689 *pte = ARM_PTE_EMPTY;
1690 }
1691 if (segSizePLKLINKEDIT) {
1692 for (vm_offset_t va = segPLKLINKEDITB; va < (segPLKLINKEDITB + segSizePLKLINKEDIT); va += ARM_PGBYTES) {
1693 pt_entry_t *pte = arm_kva_to_pte(va);
1694 *pte = ARM_PTE_EMPTY;
1695 }
1696 }
1697 }
1698#endif /* XNU_MONITOR */
1699
1700#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1701 /*
1702 * __LAST,__pinst should no longer be executable.
1703 */
1704 arm_vm_page_granular_RNX(segLASTB, segSizeLAST, ARM64_GRANULE_ALLOW_BLOCK);
1705
1706 /* __LASTDATA_CONST should no longer be writable. */
1707 if (segLASTDATACONSTB) {
1708 arm_vm_page_granular_RNX(segLASTDATACONSTB, segSizeLASTDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1709 }
1710
1711 /*
1712 * __KLD,__text should no longer be executable.
1713 */
1714 arm_vm_page_granular_RNX(segKLDB, segSizeKLD, ARM64_GRANULE_ALLOW_BLOCK);
1715
1716 /*
1717 * Must wait until all other region permissions are set before locking down DATA_CONST
1718 * as the kernel static page tables live in DATA_CONST on KTRR enabled systems
1719 * and will become immutable.
1720 */
1721#endif
1722
1723 arm_vm_page_granular_RNX(start: segDATACONSTB, size: segSizeDATACONST, ARM64_GRANULE_ALLOW_BLOCK);
1724
1725 __builtin_arm_dsb(DSB_ISH);
1726 flush_mmu_tlb();
1727}
1728
1729/*
1730 * TBI (top-byte ignore) is an ARMv8 feature for ignoring the top 8 bits of
1731 * address accesses. It can be enabled separately for TTBR0 (user) and
1732 * TTBR1 (kernel).
1733 */
1734void
1735arm_set_kernel_tbi(void)
1736{
1737#if !__ARM_KERNEL_PROTECT__ && CONFIG_KERNEL_TBI
1738 uint64_t old_tcr, new_tcr;
1739
1740 old_tcr = new_tcr = get_tcr();
1741 /*
1742 * For kernel configurations that require TBI support on
1743 * PAC systems, we enable DATA TBI only.
1744 */
1745 new_tcr |= TCR_TBI1_TOPBYTE_IGNORED;
1746 new_tcr |= TCR_TBID1_ENABLE;
1747
1748 if (old_tcr != new_tcr) {
1749 set_tcr(new_tcr);
1750 sysreg_restore.tcr_el1 = new_tcr;
1751 }
1752#endif /* !__ARM_KERNEL_PROTECT__ && CONFIG_KERNEL_TBI */
1753}
1754
1755static void
1756arm_set_user_tbi(void)
1757{
1758#if !__ARM_KERNEL_PROTECT__
1759 uint64_t old_tcr, new_tcr;
1760
1761 old_tcr = new_tcr = get_tcr();
1762 new_tcr |= TCR_TBI0_TOPBYTE_IGNORED;
1763
1764 if (old_tcr != new_tcr) {
1765 set_tcr(new_tcr);
1766 sysreg_restore.tcr_el1 = new_tcr;
1767 }
1768#endif /* !__ARM_KERNEL_PROTECT__ */
1769}
1770
1771/*
1772 * Initialize and enter blank (invalid) page tables in a L1 translation table for a given VA range.
1773 *
1774 * This is a helper function used to build up the initial page tables for the kernel translation table.
1775 * With KERNEL_INTEGRITY we keep at least the root level of the kernel page table immutable, thus the need
1776 * to preallocate before machine_lockdown any L1 entries necessary during the entire kernel runtime.
1777 *
1778 * For a given VA range, if necessary, allocate new L2 translation tables and install the table entries in
1779 * the appropriate L1 table indexes. called before the translation table is active
1780 *
1781 * parameters:
1782 *
1783 * tt: virtual address of L1 translation table to modify
1784 * start: beginning of VA range
1785 * end: end of VA range
1786 * static_map: whether to allocate the new translation table page from read only memory
1787 * table_attrs: attributes of new table entry in addition to VALID and TYPE_TABLE attributes
1788 *
1789 */
1790
1791static void
1792init_ptpages(tt_entry_t *tt, vm_map_address_t start, vm_map_address_t end, bool static_map, uint64_t table_attrs)
1793{
1794 tt_entry_t *l1_tte;
1795 vm_offset_t ptpage_vaddr;
1796
1797 l1_tte = tt + ((start & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
1798
1799 while (start < end) {
1800 if (*l1_tte == ARM_TTE_EMPTY) {
1801 /* Allocate a page and setup L1 Table TTE in L1 */
1802 ptpage_vaddr = alloc_ptpage(map_static: static_map);
1803 *l1_tte = (kvtophys(va: ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | table_attrs;
1804 bzero(s: (void *)ptpage_vaddr, ARM_PGBYTES);
1805 }
1806
1807 if ((start + ARM_TT_L1_SIZE) < start) {
1808 /* If this is the last L1 entry, it must cover the last mapping. */
1809 break;
1810 }
1811
1812 start += ARM_TT_L1_SIZE;
1813 l1_tte++;
1814 }
1815}
1816
1817#define ARM64_PHYSMAP_SLIDE_RANGE (1ULL << 30) // 1 GB
1818#define ARM64_PHYSMAP_SLIDE_MASK (ARM64_PHYSMAP_SLIDE_RANGE - 1)
1819
1820void
1821arm_vm_init(uint64_t memory_size, boot_args * args)
1822{
1823 vm_map_address_t va_l1, va_l1_end;
1824 tt_entry_t *cpu_l1_tte;
1825 vm_map_address_t va_l2, va_l2_end;
1826 tt_entry_t *cpu_l2_tte;
1827 pmap_paddr_t boot_ttep;
1828 tt_entry_t *boot_tte;
1829 uint64_t mem_segments;
1830 vm_offset_t ptpage_vaddr;
1831 vm_map_address_t dynamic_memory_begin;
1832
1833 /*
1834 * Get the virtual and physical kernel-managed memory base from boot_args.
1835 */
1836 gVirtBase = args->virtBase;
1837 gPhysBase = args->physBase;
1838#if KASAN
1839 real_phys_size = args->memSize + (shadow_ptop - shadow_pbase);
1840#else
1841 real_phys_size = args->memSize;
1842#endif
1843 /*
1844 * Ensure the physical region we specify for the VM to manage ends on a
1845 * software page boundary. Note that the software page size (PAGE_SIZE)
1846 * may be a multiple of the hardware page size specified in ARM_PGBYTES.
1847 * We must round the reported memory size down to the nearest PAGE_SIZE
1848 * boundary to ensure the VM does not try to manage a page it does not
1849 * completely own. The KASAN shadow region, if present, is managed entirely
1850 * in units of the hardware page size and should not need similar treatment.
1851 */
1852 gPhysSize = mem_size = ((gPhysBase + args->memSize) & ~PAGE_MASK) - gPhysBase;
1853
1854 mem_actual = args->memSizeActual ? args->memSizeActual : mem_size;
1855
1856 if ((memory_size != 0) && (mem_size > memory_size)) {
1857 mem_size = memory_size;
1858 max_mem_actual = memory_size;
1859 } else {
1860 max_mem_actual = mem_actual;
1861 }
1862#if !defined(ARM_LARGE_MEMORY)
1863 if (mem_size >= ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 2)) {
1864 panic("Unsupported memory configuration %lx", mem_size);
1865 }
1866#endif
1867
1868#if defined(ARM_LARGE_MEMORY)
1869 unsigned long physmap_l1_entries = ((real_phys_size + ARM64_PHYSMAP_SLIDE_RANGE) >> ARM_TT_L1_SHIFT) + 1;
1870 physmap_base = VM_MIN_KERNEL_ADDRESS - (physmap_l1_entries << ARM_TT_L1_SHIFT);
1871#else
1872 physmap_base = phystokv(args->topOfKernelData);
1873#endif
1874
1875 // Slide the physical aperture to a random page-aligned location within the slide range
1876 uint64_t physmap_slide = early_random() & ARM64_PHYSMAP_SLIDE_MASK & ~((uint64_t)PAGE_MASK);
1877 assert(physmap_slide < ARM64_PHYSMAP_SLIDE_RANGE);
1878
1879 physmap_base += physmap_slide;
1880
1881#if XNU_MONITOR
1882 physmap_base = ROUND_TWIG(physmap_base);
1883#if defined(ARM_LARGE_MEMORY)
1884 static_memory_end = phystokv(args->topOfKernelData);
1885#else
1886 static_memory_end = physmap_base + mem_size;
1887#endif // ARM_LARGE_MEMORY
1888 physmap_end = physmap_base + real_phys_size;
1889#else
1890#if defined(ARM_LARGE_MEMORY)
1891 /* For large memory systems with no PPL such as virtual machines */
1892 static_memory_end = phystokv(pa: args->topOfKernelData);
1893 physmap_end = physmap_base + real_phys_size;
1894#else
1895 static_memory_end = physmap_base + mem_size + (PTOV_TABLE_SIZE * ARM_TT_TWIG_SIZE); // worst possible case for block alignment
1896 physmap_end = physmap_base + real_phys_size + (PTOV_TABLE_SIZE * ARM_TT_TWIG_SIZE);
1897#endif // ARM_LARGE_MEMORY
1898#endif
1899
1900#if KASAN && !defined(ARM_LARGE_MEMORY)
1901 /* add the KASAN stolen memory to the physmap */
1902 dynamic_memory_begin = static_memory_end + (shadow_ptop - shadow_pbase);
1903#else
1904 dynamic_memory_begin = static_memory_end;
1905#endif
1906#if XNU_MONITOR
1907 pmap_stacks_start = (void*)dynamic_memory_begin;
1908 dynamic_memory_begin += PPL_STACK_REGION_SIZE;
1909 pmap_stacks_end = (void*)dynamic_memory_begin;
1910
1911#if HAS_GUARDED_IO_FILTER
1912 iofilter_stacks_start = (void*)dynamic_memory_begin;
1913 dynamic_memory_begin += IOFILTER_STACK_REGION_SIZE;
1914 iofilter_stacks_end = (void*)dynamic_memory_begin;
1915#endif
1916#endif
1917 if (dynamic_memory_begin > VM_MAX_KERNEL_ADDRESS) {
1918 panic("Unsupported memory configuration %lx", mem_size);
1919 }
1920
1921 boot_tte = (tt_entry_t *)&bootstrap_pagetables;
1922 boot_ttep = kvtophys(va: (vm_offset_t)boot_tte);
1923
1924#if DEVELOPMENT || DEBUG
1925 /* Sanity check - assert that BOOTSTRAP_TABLE_SIZE is sufficiently-large to
1926 * hold our bootstrap mappings for any possible slide */
1927 size_t bytes_mapped = dynamic_memory_begin - gVirtBase;
1928 size_t l1_entries = 1 + ((bytes_mapped + ARM_TT_L1_SIZE - 1) / ARM_TT_L1_SIZE);
1929 /* 1 L1 each for V=P and KVA, plus 1 page for each L2 */
1930 size_t pages_used = 2 * (l1_entries + 1);
1931 if (pages_used > BOOTSTRAP_TABLE_SIZE) {
1932 panic("BOOTSTRAP_TABLE_SIZE too small for memory config");
1933 }
1934#endif
1935
1936 /*
1937 * TTBR0 L1, TTBR0 L2 - 1:1 bootstrap mapping.
1938 * TTBR1 L1, TTBR1 L2 - kernel mapping
1939 */
1940
1941 /*
1942 * TODO: free bootstrap table memory back to allocator.
1943 * on large memory systems bootstrap tables could be quite large.
1944 * after bootstrap complete, xnu can warm start with a single 16KB page mapping
1945 * to trampoline to KVA. this requires only 3 pages to stay resident.
1946 */
1947 first_avail_phys = avail_start = args->topOfKernelData;
1948
1949#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
1950 arm_replace_identity_map();
1951#endif
1952
1953 /* Initialize invalid tte page */
1954 invalid_tte = (tt_entry_t *)alloc_ptpage(TRUE);
1955 invalid_ttep = kvtophys(va: (vm_offset_t)invalid_tte);
1956 bzero(s: invalid_tte, ARM_PGBYTES);
1957
1958 /*
1959 * Initialize l1 page table page
1960 */
1961 cpu_tte = (tt_entry_t *)alloc_ptpage(TRUE);
1962 cpu_ttep = kvtophys(va: (vm_offset_t)cpu_tte);
1963 bzero(s: cpu_tte, ARM_PGBYTES);
1964 avail_end = gPhysBase + mem_size;
1965 assert(!(avail_end & PAGE_MASK));
1966
1967#if KASAN
1968 real_avail_end = gPhysBase + real_phys_size;
1969#else
1970 real_avail_end = avail_end;
1971#endif
1972
1973 /*
1974 * Initialize l1 and l2 page table pages :
1975 * map physical memory at the kernel base virtual address
1976 * cover the kernel dynamic address range section
1977 *
1978 * the so called physical aperture should be statically mapped
1979 */
1980 init_ptpages(tt: cpu_tte, start: gVirtBase, end: dynamic_memory_begin, TRUE, ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
1981
1982#if defined(ARM_LARGE_MEMORY)
1983 /*
1984 * Initialize l1 page table pages :
1985 * on large memory systems the physical aperture exists separately below
1986 * the rest of the kernel virtual address space
1987 */
1988 init_ptpages(tt: cpu_tte, start: physmap_base, ROUND_L1(physmap_end), TRUE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
1989#endif
1990
1991
1992#if __ARM_KERNEL_PROTECT__
1993 /* Expand the page tables to prepare for the EL0 mappings. */
1994 arm_vm_expand_kernel_el0_mappings();
1995#endif /* __ARM_KERNEL_PROTECT__ */
1996
1997 /*
1998 * Now retrieve addresses for various segments from kernel mach-o header
1999 */
2000 segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_TEXT", &segSizePRELINKTEXT);
2001 segPLKDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_DATA_CONST", &segSizePLKDATACONST);
2002 segPLKTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_TEXT_EXEC", &segSizePLKTEXTEXEC);
2003 segTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT", &segSizeTEXT);
2004 segDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA_CONST", &segSizeDATACONST);
2005 segTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT_EXEC", &segSizeTEXTEXEC);
2006#if XNU_MONITOR
2007 segPPLTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLTEXT", &segSizePPLTEXT);
2008 segPPLTRAMPB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLTRAMP", &segSizePPLTRAMP);
2009 segPPLDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLDATA_CONST", &segSizePPLDATACONST);
2010#endif
2011 segDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA", &segSizeDATA);
2012#if XNU_MONITOR
2013 segPPLDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PPLDATA", &segSizePPLDATA);
2014#endif
2015
2016 segBOOTDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__BOOTDATA", &segSizeBOOTDATA);
2017 segLINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LINKEDIT", &segSizeLINK);
2018 segKLDB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLD", &segSizeKLD);
2019 segKLDDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLDDATA", &segSizeKLDDATA);
2020 segPRELINKDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_DATA", &segSizePRELINKDATA);
2021 segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_INFO", &segSizePRELINKINFO);
2022 segPLKLLVMCOVB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LLVM_COV", &segSizePLKLLVMCOV);
2023 segPLKLINKEDITB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LINKEDIT", &segSizePLKLINKEDIT);
2024 segLASTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LAST", &segSizeLAST);
2025 segLASTDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LASTDATA_CONST", &segSizeLASTDATACONST);
2026
2027 sectHIBTEXTB = (vm_offset_t) getsectdatafromheader(&_mh_execute_header, "__TEXT_EXEC", "__hib_text", &sectSizeHIBTEXT);
2028 sectHIBDATACONSTB = (vm_offset_t) getsectdatafromheader(&_mh_execute_header, "__DATA_CONST", "__hib_const", &sectSizeHIBDATACONST);
2029 segHIBDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__HIBDATA", &segSizeHIBDATA);
2030
2031 if (kernel_mach_header_is_in_fileset(&_mh_execute_header)) {
2032 kernel_mach_header_t *kc_mh = PE_get_kc_header(type: KCKindPrimary);
2033
2034 // fileset has kext PLK_TEXT_EXEC under kernel collection TEXT_EXEC following kernel's LAST
2035 segKCTEXTEXECB = (vm_offset_t) getsegdatafromheader(kc_mh, "__TEXT_EXEC", &segSizeKCTEXTEXEC);
2036 assert(segPLKTEXTEXECB && !segSizePLKTEXTEXEC); // kernel PLK_TEXT_EXEC must be empty
2037
2038 assert(segLASTB); // kernel LAST can be empty, but it must have
2039 // a valid address for computations below.
2040
2041 assert(segKCTEXTEXECB <= segLASTB); // KC TEXT_EXEC must contain kernel LAST
2042 assert(segKCTEXTEXECB + segSizeKCTEXTEXEC >= segLASTB + segSizeLAST);
2043 segPLKTEXTEXECB = segLASTB + segSizeLAST;
2044 segSizePLKTEXTEXEC = segSizeKCTEXTEXEC - (segPLKTEXTEXECB - segKCTEXTEXECB);
2045
2046 // fileset has kext PLK_DATA_CONST under kernel collection DATA_CONST following kernel's LASTDATA_CONST
2047 segKCDATACONSTB = (vm_offset_t) getsegdatafromheader(kc_mh, "__DATA_CONST", &segSizeKCDATACONST);
2048 assert(segPLKDATACONSTB && !segSizePLKDATACONST); // kernel PLK_DATA_CONST must be empty
2049 assert(segLASTDATACONSTB && segSizeLASTDATACONST); // kernel LASTDATA_CONST must be non-empty
2050 assert(segKCDATACONSTB <= segLASTDATACONSTB); // KC DATA_CONST must contain kernel LASTDATA_CONST
2051 assert(segKCDATACONSTB + segSizeKCDATACONST >= segLASTDATACONSTB + segSizeLASTDATACONST);
2052 segPLKDATACONSTB = segLASTDATACONSTB + segSizeLASTDATACONST;
2053 segSizePLKDATACONST = segSizeKCDATACONST - (segPLKDATACONSTB - segKCDATACONSTB);
2054
2055 // fileset has kext PRELINK_DATA under kernel collection DATA following kernel's empty PRELINK_DATA
2056 segKCDATAB = (vm_offset_t) getsegdatafromheader(kc_mh, "__DATA", &segSizeKCDATA);
2057 assert(segPRELINKDATAB && !segSizePRELINKDATA); // kernel PRELINK_DATA must be empty
2058 assert(segKCDATAB <= segPRELINKDATAB); // KC DATA must contain kernel PRELINK_DATA
2059 assert(segKCDATAB + segSizeKCDATA >= segPRELINKDATAB + segSizePRELINKDATA);
2060 segSizePRELINKDATA = segSizeKCDATA - (segPRELINKDATAB - segKCDATAB);
2061
2062 // fileset has consolidated PRELINK_TEXT, PRELINK_INFO and LINKEDIT at the kernel collection level
2063 assert(segPRELINKTEXTB && !segSizePRELINKTEXT); // kernel PRELINK_TEXT must be empty
2064 segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(kc_mh, "__PRELINK_TEXT", &segSizePRELINKTEXT);
2065 assert(segPRELINKINFOB && !segSizePRELINKINFO); // kernel PRELINK_INFO must be empty
2066 segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(kc_mh, "__PRELINK_INFO", &segSizePRELINKINFO);
2067 segLINKB = (vm_offset_t) getsegdatafromheader(kc_mh, "__LINKEDIT", &segSizeLINK);
2068 }
2069
2070 (void) PE_parse_boot_argn(arg_string: "use_contiguous_hint", arg_ptr: &use_contiguous_hint, max_arg: sizeof(use_contiguous_hint));
2071 assert(segSizePRELINKTEXT < 0x03000000); /* 23355738 */
2072
2073 /* if one of the new segments is present, the other one better be as well */
2074 if (segSizePLKDATACONST || segSizePLKTEXTEXEC) {
2075 assert(segSizePLKDATACONST && segSizePLKTEXTEXEC);
2076 }
2077
2078 etext = (vm_offset_t) segTEXTB + segSizeTEXT;
2079 sdata = (vm_offset_t) segDATAB;
2080 edata = (vm_offset_t) segDATAB + segSizeDATA;
2081 end_kern = round_page(x: segHIGHESTKC ? segHIGHESTKC : getlastkerneladdr()); /* Force end to next page */
2082
2083 vm_set_page_size();
2084
2085 vm_kernel_base = segTEXTB;
2086 vm_kernel_top = (vm_offset_t) &last_kernel_symbol;
2087 vm_kext_base = segPRELINKTEXTB;
2088 vm_kext_top = vm_kext_base + segSizePRELINKTEXT;
2089
2090 vm_prelink_stext = segPRELINKTEXTB;
2091 if (!segSizePLKTEXTEXEC && !segSizePLKDATACONST) {
2092 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT;
2093 } else {
2094 vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT + segSizePLKDATACONST + segSizePLKTEXTEXEC;
2095 }
2096 vm_prelink_sinfo = segPRELINKINFOB;
2097 vm_prelink_einfo = segPRELINKINFOB + segSizePRELINKINFO;
2098 vm_slinkedit = segLINKB;
2099 vm_elinkedit = segLINKB + segSizeLINK;
2100
2101 vm_prelink_sdata = segPRELINKDATAB;
2102 vm_prelink_edata = segPRELINKDATAB + segSizePRELINKDATA;
2103
2104 arm_vm_prot_init(args);
2105
2106 /*
2107 * Initialize the page tables for the low globals:
2108 * cover this address range:
2109 * LOW_GLOBAL_BASE_ADDRESS + 2MB
2110 */
2111 va_l1 = va_l2 = LOW_GLOBAL_BASE_ADDRESS;
2112 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
2113 cpu_l2_tte = ((tt_entry_t *) phystokv(pa: ((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2114 ptpage_vaddr = alloc_ptpage(TRUE);
2115 *cpu_l2_tte = (kvtophys(va: ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN;
2116 bzero(s: (void *)ptpage_vaddr, ARM_PGBYTES);
2117
2118 /*
2119 * Initialize l2 page table pages :
2120 * cover this address range:
2121 * KERNEL_DYNAMIC_ADDR - VM_MAX_KERNEL_ADDRESS
2122 */
2123#if defined(ARM_LARGE_MEMORY)
2124 /*
2125 * dynamic mapped memory outside the VM allocator VA range required to bootstrap VM system
2126 * don't expect to exceed 64GB, no sense mapping any more space between here and the VM heap range
2127 */
2128 init_ptpages(tt: cpu_tte, start: dynamic_memory_begin, ROUND_L1(dynamic_memory_begin), FALSE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
2129#else
2130 /*
2131 * TODO: do these pages really need to come from RO memory?
2132 * With legacy 3 level table systems we never mapped more than a single L1 entry so this may be dead code
2133 */
2134 init_ptpages(cpu_tte, dynamic_memory_begin, VM_MAX_KERNEL_ADDRESS, TRUE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
2135#endif
2136
2137#if KASAN
2138 /* record the extent of the physmap */
2139 physmap_vbase = physmap_base;
2140 physmap_vtop = physmap_end;
2141 kasan_init();
2142#endif /* KASAN */
2143
2144#if CONFIG_CPU_COUNTERS
2145 mt_early_init();
2146#endif /* CONFIG_CPU_COUNTERS */
2147
2148 arm_set_user_tbi();
2149
2150 arm_vm_physmap_init(args);
2151 set_mmu_ttb_alternate(cpu_ttep & TTBR_BADDR_MASK);
2152
2153 ml_enable_monitor();
2154
2155 set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
2156
2157 flush_mmu_tlb();
2158 kva_active = TRUE;
2159 // global table pointers may need to be different due to physical aperture remapping
2160 cpu_tte = (tt_entry_t*)(phystokv(pa: cpu_ttep));
2161 invalid_tte = (tt_entry_t*)(phystokv(pa: invalid_ttep));
2162
2163 // From here on out, we're off the bootstrap translation tables.
2164
2165
2166 /* AuxKC initialization has to be deferred until this point, since
2167 * the AuxKC may not have been fully mapped in the bootstrap
2168 * tables, if it spilled downwards into the prior L2 block.
2169 *
2170 * Now that its mapping set up by arm_vm_prot_init() is active,
2171 * we can traverse and fix it up.
2172 */
2173
2174 /* Calculate the physical bounds of the kernelcache; using
2175 * gVirtBase/gPhysBase math to do this directly is generally a bad idea
2176 * as the physmap is no longer physically contiguous. However, this is
2177 * done here as segLOWEST and end_kern are both virtual addresses the
2178 * bootstrap physmap, and because kvtophys references the page tables
2179 * (at least at the time this comment was written), meaning that at
2180 * least end_kern may not point to a valid mapping on some kernelcache
2181 * configurations, so kvtophys would report a physical address of 0.
2182 *
2183 * Long term, the kernelcache should probably be described in terms of
2184 * multiple physical ranges, as there is no strong guarantee or
2185 * requirement that the kernelcache will always be physically
2186 * contiguous.
2187 */
2188 arm_vm_kernelcache_phys_start = segLOWEST - gVirtBase + gPhysBase;
2189 arm_vm_kernelcache_phys_end = end_kern - gVirtBase + gPhysBase;;
2190
2191 /* Calculate the number of pages that belong to the kernelcache. */
2192 vm_page_kernelcache_count = (unsigned int) (atop_64(arm_vm_kernelcache_phys_end - arm_vm_kernelcache_phys_start));
2193
2194 if (arm_vm_auxkc_init()) {
2195 if (segLOWESTROAuxKC < segLOWESTRO) {
2196 segLOWESTRO = segLOWESTROAuxKC;
2197 }
2198 if (segHIGHESTROAuxKC > segHIGHESTRO) {
2199 segHIGHESTRO = segHIGHESTROAuxKC;
2200 }
2201 if (segLOWESTRXAuxKC < segLOWESTTEXT) {
2202 segLOWESTTEXT = segLOWESTRXAuxKC;
2203 }
2204 assert(segLOWEST == segLOWESTAuxKC);
2205
2206 // The preliminary auxKC mapping has been broken up.
2207 flush_mmu_tlb();
2208 }
2209
2210 sane_size = mem_size - (avail_start - gPhysBase);
2211 max_mem = mem_size;
2212 vm_kernel_slid_base = segLOWESTTEXT;
2213 // vm_kernel_slide is set by arm_init()->arm_slide_rebase_and_sign_image()
2214 vm_kernel_stext = segTEXTB;
2215
2216 if (kernel_mach_header_is_in_fileset(&_mh_execute_header)) {
2217 vm_kernel_etext = segTEXTEXECB + segSizeTEXTEXEC;
2218 vm_kernel_slid_top = vm_slinkedit;
2219 } else {
2220 assert(segDATACONSTB == segTEXTB + segSizeTEXT);
2221 assert(segTEXTEXECB == segDATACONSTB + segSizeDATACONST);
2222 vm_kernel_etext = segTEXTB + segSizeTEXT + segSizeDATACONST + segSizeTEXTEXEC;
2223 vm_kernel_slid_top = vm_prelink_einfo;
2224 }
2225
2226 dynamic_memory_begin = ROUND_TWIG(dynamic_memory_begin);
2227#if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
2228 // reserve a 32MB region without permission overrides to use later for a CTRR unit test
2229 {
2230 extern vm_offset_t ctrr_test_page;
2231 tt_entry_t *new_tte;
2232
2233 ctrr_test_page = dynamic_memory_begin;
2234 dynamic_memory_begin += ARM_TT_L2_SIZE;
2235 cpu_l1_tte = cpu_tte + ((ctrr_test_page & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
2236 assert((*cpu_l1_tte) & ARM_TTE_VALID);
2237 cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((ctrr_test_page & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2238 assert((*cpu_l2_tte) == ARM_TTE_EMPTY);
2239 new_tte = (tt_entry_t *)alloc_ptpage(FALSE);
2240 bzero(new_tte, ARM_PGBYTES);
2241 *cpu_l2_tte = (kvtophys((vm_offset_t)new_tte) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
2242 }
2243#endif /* defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST) */
2244#if XNU_MONITOR
2245 for (vm_offset_t cur = (vm_offset_t)pmap_stacks_start; cur < (vm_offset_t)pmap_stacks_end; cur += ARM_PGBYTES) {
2246 arm_vm_map(cpu_tte, cur, ARM_PTE_EMPTY);
2247 }
2248#if HAS_GUARDED_IO_FILTER
2249 for (vm_offset_t cur = (vm_offset_t)iofilter_stacks_start; cur < (vm_offset_t)iofilter_stacks_end; cur += ARM_PGBYTES) {
2250 arm_vm_map(cpu_tte, cur, ARM_PTE_EMPTY);
2251 }
2252#endif
2253#endif
2254 pmap_bootstrap(dynamic_memory_begin);
2255
2256 disable_preemption();
2257
2258 /*
2259 * Initialize l3 page table pages :
2260 * cover this address range:
2261 * 2MB + FrameBuffer size + 10MB for each 256MB segment
2262 */
2263
2264 mem_segments = (mem_size + 0x0FFFFFFF) >> 28;
2265
2266 va_l1 = dynamic_memory_begin;
2267 va_l1_end = va_l1 + ((2 + (mem_segments * 10)) << 20);
2268 va_l1_end += round_page(x: args->Video.v_height * args->Video.v_rowBytes);
2269 va_l1_end = (va_l1_end + 0x00000000007FFFFFULL) & 0xFFFFFFFFFF800000ULL;
2270
2271 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
2272
2273 while (va_l1 < va_l1_end) {
2274 va_l2 = va_l1;
2275
2276 if (((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE) < va_l1) {
2277 /* If this is the last L1 entry, it must cover the last mapping. */
2278 va_l2_end = va_l1_end;
2279 } else {
2280 va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE, va_l1_end);
2281 }
2282
2283 cpu_l2_tte = ((tt_entry_t *) phystokv(pa: ((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2284
2285 while (va_l2 < va_l2_end) {
2286 pt_entry_t * ptp;
2287 pmap_paddr_t ptp_phys;
2288
2289 /* Allocate a page and setup L3 Table TTE in L2 */
2290 ptp = (pt_entry_t *) alloc_ptpage(FALSE);
2291 ptp_phys = (pmap_paddr_t)kvtophys(va: (vm_offset_t)ptp);
2292
2293 bzero(s: ptp, ARM_PGBYTES);
2294 pmap_init_pte_page(kernel_pmap, ptp, va_l2, ttlevel: 3, TRUE);
2295
2296 *cpu_l2_tte = (pa_to_tte(ptp_phys)) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
2297
2298 va_l2 += ARM_TT_L2_SIZE;
2299 cpu_l2_tte++;
2300 }
2301
2302 va_l1 = va_l2_end;
2303 cpu_l1_tte++;
2304 }
2305
2306#if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
2307 /*
2308 * In this configuration, the bootstrap mappings (arm_vm_init) and
2309 * the heap mappings occupy separate L1 regions. Explicitly set up
2310 * the heap L1 allocations here.
2311 */
2312#if defined(ARM_LARGE_MEMORY)
2313 init_ptpages(cpu_tte, KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_L1_OFFMASK, VM_MAX_KERNEL_ADDRESS, FALSE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
2314#else // defined(ARM_LARGE_MEMORY)
2315 va_l1 = VM_MIN_KERNEL_ADDRESS & ~ARM_TT_L1_OFFMASK;
2316 init_ptpages(cpu_tte, VM_MIN_KERNEL_ADDRESS & ~ARM_TT_L1_OFFMASK, VM_MAX_KERNEL_ADDRESS, FALSE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
2317#endif // defined(ARM_LARGE_MEMORY)
2318#else
2319#if defined(ARM_LARGE_MEMORY)
2320 /* For large memory systems with no KTRR/CTRR such as virtual machines */
2321 init_ptpages(tt: cpu_tte, KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_L1_OFFMASK, VM_MAX_KERNEL_ADDRESS, FALSE, ARM_DYNAMIC_TABLE_XN | ARM_TTE_TABLE_AP(ARM_TTE_TABLE_AP_USER_NA));
2322#endif
2323#endif // defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
2324
2325 /*
2326 * Initialize l3 page table pages :
2327 * cover this address range:
2328 * ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - PE_EARLY_BOOT_VA) to VM_MAX_KERNEL_ADDRESS
2329 */
2330 va_l1 = (VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - PE_EARLY_BOOT_VA;
2331 va_l1_end = VM_MAX_KERNEL_ADDRESS;
2332
2333 cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT);
2334
2335 while (va_l1 < va_l1_end) {
2336 va_l2 = va_l1;
2337
2338 if (((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE) < va_l1) {
2339 /* If this is the last L1 entry, it must cover the last mapping. */
2340 va_l2_end = va_l1_end;
2341 } else {
2342 va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE, va_l1_end);
2343 }
2344
2345 cpu_l2_tte = ((tt_entry_t *) phystokv(pa: ((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT);
2346
2347 while (va_l2 < va_l2_end) {
2348 pt_entry_t * ptp;
2349 pmap_paddr_t ptp_phys;
2350
2351 /* Allocate a page and setup L3 Table TTE in L2 */
2352 ptp = (pt_entry_t *) alloc_ptpage(FALSE);
2353 ptp_phys = (pmap_paddr_t)kvtophys(va: (vm_offset_t)ptp);
2354
2355 bzero(s: ptp, ARM_PGBYTES);
2356 pmap_init_pte_page(kernel_pmap, ptp, va_l2, ttlevel: 3, TRUE);
2357
2358 *cpu_l2_tte = (pa_to_tte(ptp_phys)) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN;
2359
2360 va_l2 += ARM_TT_L2_SIZE;
2361 cpu_l2_tte++;
2362 }
2363
2364 va_l1 = va_l2_end;
2365 cpu_l1_tte++;
2366 }
2367
2368
2369 /*
2370 * Adjust avail_start so that the range that the VM owns
2371 * starts on a PAGE_SIZE aligned boundary.
2372 */
2373 avail_start = (avail_start + PAGE_MASK) & ~PAGE_MASK;
2374
2375#if XNU_MONITOR
2376 pmap_static_allocations_done();
2377#endif
2378 first_avail = avail_start;
2379 patch_low_glo_static_region(address: args->topOfKernelData, size: avail_start - args->topOfKernelData);
2380 enable_preemption();
2381}
2382
2383/*
2384 * Returns true if the address is within __TEXT, __TEXT_EXEC or __DATA_CONST
2385 * segment range. This is what [vm_kernel_stext, vm_kernel_etext) range used to
2386 * cover. The segments together may not be continuous anymore and so individual
2387 * intervals are inspected.
2388 */
2389bool
2390kernel_text_contains(vm_offset_t addr)
2391{
2392 if (segTEXTB <= addr && addr < (segTEXTB + segSizeTEXT)) {
2393 return true;
2394 }
2395 if (segTEXTEXECB <= addr && addr < (segTEXTEXECB + segSizeTEXTEXEC)) {
2396 return true;
2397 }
2398 return segDATACONSTB <= addr && addr < (segDATACONSTB + segSizeDATACONST);
2399}
2400