1 | /* |
2 | * Copyright (c) 2007-2011 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <mach_debug.h> |
30 | #include <mach_kdp.h> |
31 | #include <debug.h> |
32 | |
33 | #include <mach/vm_types.h> |
34 | #include <mach/vm_param.h> |
35 | #include <kern/misc_protos.h> |
36 | #include <kern/assert.h> |
37 | #include <vm/vm_kern.h> |
38 | #include <vm/vm_page.h> |
39 | #include <vm/pmap.h> |
40 | |
41 | #include <arm/atomic.h> |
42 | #include <arm64/proc_reg.h> |
43 | #include <arm64/lowglobals.h> |
44 | #include <arm/cpu_data_internal.h> |
45 | #include <arm/misc_protos.h> |
46 | #include <pexpert/arm64/boot.h> |
47 | #include <pexpert/device_tree.h> |
48 | |
49 | #include <libkern/kernel_mach_header.h> |
50 | #include <libkern/section_keywords.h> |
51 | |
52 | #include <san/kasan.h> |
53 | |
54 | #if __ARM_KERNEL_PROTECT__ |
55 | /* |
56 | * If we want to support __ARM_KERNEL_PROTECT__, we need a sufficient amount of |
57 | * mappable space preceeding the kernel (as we unmap the kernel by cutting the |
58 | * range covered by TTBR1 in half). This must also cover the exception vectors. |
59 | */ |
60 | static_assert(KERNEL_PMAP_HEAP_RANGE_START > ARM_KERNEL_PROTECT_EXCEPTION_START); |
61 | |
62 | /* The exception vectors and the kernel cannot share root TTEs. */ |
63 | static_assert((KERNEL_PMAP_HEAP_RANGE_START & ~ARM_TT_ROOT_OFFMASK) > ARM_KERNEL_PROTECT_EXCEPTION_START); |
64 | |
65 | /* |
66 | * We must have enough space in the TTBR1_EL1 range to create the EL0 mapping of |
67 | * the exception vectors. |
68 | */ |
69 | static_assert((((~ARM_KERNEL_PROTECT_EXCEPTION_START) + 1) * 2ULL) <= (ARM_TT_ROOT_SIZE + ARM_TT_ROOT_INDEX_MASK)); |
70 | #endif /* __ARM_KERNEL_PROTECT__ */ |
71 | |
72 | #define ARM_DYNAMIC_TABLE_XN (ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN) |
73 | |
74 | #if KASAN |
75 | extern vm_offset_t shadow_pbase; |
76 | extern vm_offset_t shadow_ptop; |
77 | extern vm_offset_t physmap_vbase; |
78 | extern vm_offset_t physmap_vtop; |
79 | #endif |
80 | |
81 | /* |
82 | * Denotes the end of xnu. |
83 | */ |
84 | extern void *last_kernel_symbol; |
85 | |
86 | extern void arm64_replace_bootstack(cpu_data_t*); |
87 | extern void PE_slide_devicetree(vm_offset_t); |
88 | |
89 | /* |
90 | * KASLR parameters |
91 | */ |
92 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_base; |
93 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_top; |
94 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_base; |
95 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kext_top; |
96 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_stext; |
97 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_etext; |
98 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slide; |
99 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_base; |
100 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_slid_top; |
101 | |
102 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_stext; |
103 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_etext; |
104 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sdata; |
105 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_edata; |
106 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_sinfo; |
107 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_prelink_einfo; |
108 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_slinkedit; |
109 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_elinkedit; |
110 | |
111 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text; |
112 | SECURITY_READ_ONLY_LATE(vm_offset_t) vm_kernel_builtinkmod_text_end; |
113 | |
114 | /* Used by <mach/arm/vm_param.h> */ |
115 | SECURITY_READ_ONLY_LATE(unsigned long) gVirtBase; |
116 | SECURITY_READ_ONLY_LATE(unsigned long) gPhysBase; |
117 | SECURITY_READ_ONLY_LATE(unsigned long) gPhysSize; |
118 | SECURITY_READ_ONLY_LATE(unsigned long) gT0Sz = T0SZ_BOOT; |
119 | SECURITY_READ_ONLY_LATE(unsigned long) gT1Sz = T1SZ_BOOT; |
120 | |
121 | /* 23543331 - step 1 of kext / kernel __TEXT and __DATA colocation is to move |
122 | * all kexts before the kernel. This is only for arm64 devices and looks |
123 | * something like the following: |
124 | * -- vmaddr order -- |
125 | * 0xffffff8004004000 __PRELINK_TEXT |
126 | * 0xffffff8007004000 __TEXT (xnu) |
127 | * 0xffffff80075ec000 __DATA (xnu) |
128 | * 0xffffff80076dc000 __KLD (xnu) |
129 | * 0xffffff80076e0000 __LAST (xnu) |
130 | * 0xffffff80076e4000 __LINKEDIT (xnu) |
131 | * 0xffffff80076e4000 __PRELINK_DATA (not used yet) |
132 | * 0xffffff800782c000 __PRELINK_INFO |
133 | * 0xffffff80078e4000 -- End of kernelcache |
134 | */ |
135 | |
136 | /* 24921709 - make XNU ready for KTRR |
137 | * |
138 | * Two possible kernel cache layouts, depending on which kcgen is being used. |
139 | * VAs increasing downwards. |
140 | * Old KCGEN: |
141 | * |
142 | * __PRELINK_TEXT |
143 | * __TEXT |
144 | * __DATA_CONST |
145 | * __TEXT_EXEC |
146 | * __KLD |
147 | * __LAST |
148 | * __DATA |
149 | * __PRELINK_DATA (expected empty) |
150 | * __LINKEDIT |
151 | * __PRELINK_INFO |
152 | * |
153 | * New kcgen: |
154 | * |
155 | * __PRELINK_TEXT <--- First KTRR (ReadOnly) segment |
156 | * __PLK_DATA_CONST |
157 | * __PLK_TEXT_EXEC |
158 | * __TEXT |
159 | * __DATA_CONST |
160 | * __TEXT_EXEC |
161 | * __KLD |
162 | * __LAST <--- Last KTRR (ReadOnly) segment |
163 | * __DATA |
164 | * __BOOTDATA (if present) |
165 | * __LINKEDIT |
166 | * __PRELINK_DATA (expected populated now) |
167 | * __PLK_LINKEDIT |
168 | * __PRELINK_INFO |
169 | * |
170 | */ |
171 | |
172 | vm_offset_t mem_size; /* Size of actual physical memory present |
173 | * minus any performance buffer and possibly |
174 | * limited by mem_limit in bytes */ |
175 | uint64_t mem_actual; /* The "One True" physical memory size |
176 | * actually, it's the highest physical |
177 | * address + 1 */ |
178 | uint64_t max_mem; /* Size of physical memory (bytes), adjusted |
179 | * by maxmem */ |
180 | uint64_t sane_size; /* Memory size to use for defaults |
181 | * calculations */ |
182 | /* This no longer appears to be used; kill it? */ |
183 | addr64_t vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Highest kernel |
184 | * virtual address known |
185 | * to the VM system */ |
186 | |
187 | SECURITY_READ_ONLY_LATE(vm_offset_t) ; |
188 | SECURITY_READ_ONLY_LATE(unsigned long) ; |
189 | |
190 | SECURITY_READ_ONLY_LATE(vm_offset_t) segLOWESTTEXT; |
191 | |
192 | SECURITY_READ_ONLY_LATE(static vm_offset_t) segTEXTB; |
193 | SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXT; |
194 | |
195 | |
196 | SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATACONSTB; |
197 | SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATACONST; |
198 | |
199 | SECURITY_READ_ONLY_LATE(static vm_offset_t) segTEXTEXECB; |
200 | SECURITY_READ_ONLY_LATE(static unsigned long) segSizeTEXTEXEC; |
201 | |
202 | SECURITY_READ_ONLY_LATE(static vm_offset_t) segDATAB; |
203 | SECURITY_READ_ONLY_LATE(static unsigned long) segSizeDATA; |
204 | |
205 | |
206 | SECURITY_READ_ONLY_LATE(vm_offset_t) segBOOTDATAB; |
207 | SECURITY_READ_ONLY_LATE(unsigned long) segSizeBOOTDATA; |
208 | extern vm_offset_t intstack_low_guard; |
209 | extern vm_offset_t intstack_high_guard; |
210 | extern vm_offset_t excepstack_high_guard; |
211 | |
212 | SECURITY_READ_ONLY_LATE(static vm_offset_t) segLINKB; |
213 | SECURITY_READ_ONLY_LATE(static unsigned long) segSizeLINK; |
214 | |
215 | SECURITY_READ_ONLY_LATE(static vm_offset_t) segKLDB; |
216 | SECURITY_READ_ONLY_LATE(static unsigned long) segSizeKLD; |
217 | SECURITY_READ_ONLY_LATE(vm_offset_t) segLASTB; |
218 | SECURITY_READ_ONLY_LATE(unsigned long) segSizeLAST; |
219 | |
220 | SECURITY_READ_ONLY_LATE(vm_offset_t) segPRELINKTEXTB; |
221 | SECURITY_READ_ONLY_LATE(unsigned long) segSizePRELINKTEXT; |
222 | |
223 | SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKTEXTEXECB; |
224 | SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKTEXTEXEC; |
225 | |
226 | SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKDATACONSTB; |
227 | SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKDATACONST; |
228 | |
229 | SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKDATAB; |
230 | SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKDATA; |
231 | |
232 | SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLLVMCOVB = 0; |
233 | SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLLVMCOV = 0; |
234 | |
235 | SECURITY_READ_ONLY_LATE(static vm_offset_t) segPLKLINKEDITB; |
236 | SECURITY_READ_ONLY_LATE(static unsigned long) segSizePLKLINKEDIT; |
237 | |
238 | SECURITY_READ_ONLY_LATE(static vm_offset_t) segPRELINKINFOB; |
239 | SECURITY_READ_ONLY_LATE(static unsigned long) segSizePRELINKINFO; |
240 | |
241 | SECURITY_READ_ONLY_LATE(static boolean_t) use_contiguous_hint = TRUE; |
242 | |
243 | SECURITY_READ_ONLY_LATE(unsigned) PAGE_SHIFT_CONST; |
244 | |
245 | SECURITY_READ_ONLY_LATE(vm_offset_t) end_kern; |
246 | SECURITY_READ_ONLY_LATE(vm_offset_t) etext; |
247 | SECURITY_READ_ONLY_LATE(vm_offset_t) sdata; |
248 | SECURITY_READ_ONLY_LATE(vm_offset_t) edata; |
249 | |
250 | vm_offset_t alloc_ptpage(boolean_t map_static); |
251 | SECURITY_READ_ONLY_LATE(vm_offset_t) ropage_next; |
252 | |
253 | /* |
254 | * Bootstrap the system enough to run with virtual memory. |
255 | * Map the kernel's code and data, and allocate the system page table. |
256 | * Page_size must already be set. |
257 | * |
258 | * Parameters: |
259 | * first_avail: first available physical page - |
260 | * after kernel page tables |
261 | * avail_start: PA of first physical page |
262 | * avail_end: PA of last physical page |
263 | */ |
264 | SECURITY_READ_ONLY_LATE(vm_offset_t) first_avail; |
265 | SECURITY_READ_ONLY_LATE(vm_offset_t) static_memory_end; |
266 | SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_start; |
267 | SECURITY_READ_ONLY_LATE(pmap_paddr_t) avail_end; |
268 | SECURITY_READ_ONLY_LATE(pmap_paddr_t) real_avail_end; |
269 | |
270 | #if __ARM_KERNEL_PROTECT__ |
271 | extern void ExceptionVectorsBase; |
272 | extern void ExceptionVectorsEnd; |
273 | #endif /* __ARM_KERNEL_PROTECT__ */ |
274 | |
275 | #if defined(KERNEL_INTEGRITY_KTRR) |
276 | #if __ARM64_TWO_LEVEL_PMAP__ |
277 | /* We could support this configuration, but it adds memory overhead. */ |
278 | #error This configuration is not supported |
279 | #endif |
280 | #endif |
281 | |
282 | typedef struct { |
283 | pmap_paddr_t pa; |
284 | vm_map_address_t va; |
285 | vm_size_t len; |
286 | } ptov_table_entry; |
287 | |
288 | #define PTOV_TABLE_SIZE 8 |
289 | SECURITY_READ_ONLY_LATE(static ptov_table_entry) ptov_table[PTOV_TABLE_SIZE]; |
290 | SECURITY_READ_ONLY_LATE(static boolean_t) kva_active = FALSE; |
291 | |
292 | |
293 | vm_map_address_t |
294 | phystokv(pmap_paddr_t pa) |
295 | { |
296 | for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) { |
297 | if ((pa >= ptov_table[i].pa) && (pa < (ptov_table[i].pa + ptov_table[i].len))) |
298 | return (pa - ptov_table[i].pa + ptov_table[i].va); |
299 | } |
300 | return (pa - gPhysBase + gVirtBase); |
301 | } |
302 | |
303 | vm_map_address_t |
304 | phystokv_range(pmap_paddr_t pa, vm_size_t *max_len) |
305 | { |
306 | vm_size_t len; |
307 | for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) { |
308 | if ((pa >= ptov_table[i].pa) && (pa < (ptov_table[i].pa + ptov_table[i].len))) { |
309 | len = ptov_table[i].len - (pa - ptov_table[i].pa); |
310 | if (*max_len > len) |
311 | *max_len = len; |
312 | return (pa - ptov_table[i].pa + ptov_table[i].va); |
313 | } |
314 | } |
315 | len = PAGE_SIZE - (pa & PAGE_MASK); |
316 | if (*max_len > len) |
317 | *max_len = len; |
318 | return (pa - gPhysBase + gVirtBase); |
319 | } |
320 | |
321 | vm_offset_t |
322 | ml_static_vtop(vm_offset_t va) |
323 | { |
324 | for (size_t i = 0; (i < PTOV_TABLE_SIZE) && (ptov_table[i].len != 0); i++) { |
325 | if ((va >= ptov_table[i].va) && (va < (ptov_table[i].va + ptov_table[i].len))) |
326 | return (va - ptov_table[i].va + ptov_table[i].pa); |
327 | } |
328 | if (((vm_address_t)(va) - gVirtBase) >= gPhysSize) |
329 | panic("ml_static_vtop(): illegal VA: %p\n" , (void*)va); |
330 | return ((vm_address_t)(va) - gVirtBase + gPhysBase); |
331 | } |
332 | |
333 | /* |
334 | * This rounds the given address up to the nearest boundary for a PTE contiguous |
335 | * hint. |
336 | */ |
337 | static vm_offset_t |
338 | round_up_pte_hint_address(vm_offset_t address) |
339 | { |
340 | vm_offset_t hint_size = ARM_PTE_SIZE << ARM_PTE_HINT_ENTRIES_SHIFT; |
341 | return ((address + (hint_size - 1)) & ~(hint_size - 1)); |
342 | } |
343 | |
344 | /* allocate a page for a page table: we support static and dynamic mappings. |
345 | * |
346 | * returns a virtual address for the allocated page |
347 | * |
348 | * for static mappings, we allocate from the region ropagetable_begin to ro_pagetable_end-1, |
349 | * which is defined in the DATA_CONST segment and will be protected RNX when vm_prot_finalize runs. |
350 | * |
351 | * for dynamic mappings, we allocate from avail_start, which should remain RWNX. |
352 | */ |
353 | |
354 | vm_offset_t alloc_ptpage(boolean_t map_static) { |
355 | vm_offset_t vaddr; |
356 | |
357 | #if !(defined(KERNEL_INTEGRITY_KTRR)) |
358 | map_static = FALSE; |
359 | #endif |
360 | |
361 | if (!ropage_next) { |
362 | ropage_next = (vm_offset_t)&ropagetable_begin; |
363 | } |
364 | |
365 | if (map_static) { |
366 | assert(ropage_next < (vm_offset_t)&ropagetable_end); |
367 | |
368 | vaddr = ropage_next; |
369 | ropage_next += ARM_PGBYTES; |
370 | |
371 | return vaddr; |
372 | } else { |
373 | vaddr = phystokv(avail_start); |
374 | avail_start += ARM_PGBYTES; |
375 | |
376 | return vaddr; |
377 | } |
378 | } |
379 | |
380 | #if DEBUG |
381 | |
382 | void dump_kva_l2(vm_offset_t tt_base, tt_entry_t *tt, int indent, uint64_t *rosz_out, uint64_t *rwsz_out); |
383 | |
384 | void dump_kva_l2(vm_offset_t tt_base, tt_entry_t *tt, int indent, uint64_t *rosz_out, uint64_t *rwsz_out) { |
385 | unsigned int i; |
386 | boolean_t cur_ro, prev_ro = 0; |
387 | int start_entry = -1; |
388 | tt_entry_t cur, prev = 0; |
389 | pmap_paddr_t robegin = kvtophys((vm_offset_t)&ropagetable_begin); |
390 | pmap_paddr_t roend = kvtophys((vm_offset_t)&ropagetable_end); |
391 | boolean_t tt_static = kvtophys((vm_offset_t)tt) >= robegin && |
392 | kvtophys((vm_offset_t)tt) < roend; |
393 | |
394 | for(i=0; i<TTE_PGENTRIES; i++) { |
395 | int tte_type = tt[i] & ARM_TTE_TYPE_MASK; |
396 | cur = tt[i] & ARM_TTE_TABLE_MASK; |
397 | |
398 | if (tt_static) { |
399 | /* addresses mapped by this entry are static if it is a block mapping, |
400 | * or the table was allocated from the RO page table region */ |
401 | cur_ro = (tte_type == ARM_TTE_TYPE_BLOCK) || (cur >= robegin && cur < roend); |
402 | } else { |
403 | cur_ro = 0; |
404 | } |
405 | |
406 | if ((cur == 0 && prev != 0) || (cur_ro != prev_ro && prev != 0)) { // falling edge |
407 | uintptr_t start,end,sz; |
408 | |
409 | start = (uintptr_t)start_entry << ARM_TT_L2_SHIFT; |
410 | start += tt_base; |
411 | end = ((uintptr_t)i << ARM_TT_L2_SHIFT) - 1; |
412 | end += tt_base; |
413 | |
414 | sz = end - start + 1; |
415 | printf("%*s0x%08x_%08x-0x%08x_%08x %s (%luMB)\n" , |
416 | indent*4, "" , |
417 | (uint32_t)(start >> 32),(uint32_t)start, |
418 | (uint32_t)(end >> 32),(uint32_t)end, |
419 | prev_ro ? "Static " : "Dynamic" , |
420 | (sz >> 20)); |
421 | |
422 | if (prev_ro) { |
423 | *rosz_out += sz; |
424 | } else { |
425 | *rwsz_out += sz; |
426 | } |
427 | } |
428 | |
429 | if ((prev == 0 && cur != 0) || cur_ro != prev_ro) { // rising edge: set start |
430 | start_entry = i; |
431 | } |
432 | |
433 | prev = cur; |
434 | prev_ro = cur_ro; |
435 | } |
436 | } |
437 | |
438 | void dump_kva_space() { |
439 | uint64_t tot_rosz=0, tot_rwsz=0; |
440 | int ro_ptpages, rw_ptpages; |
441 | pmap_paddr_t robegin = kvtophys((vm_offset_t)&ropagetable_begin); |
442 | pmap_paddr_t roend = kvtophys((vm_offset_t)&ropagetable_end); |
443 | boolean_t root_static = kvtophys((vm_offset_t)cpu_tte) >= robegin && |
444 | kvtophys((vm_offset_t)cpu_tte) < roend; |
445 | uint64_t kva_base = ~((1ULL << (64 - T1SZ_BOOT)) - 1); |
446 | |
447 | printf("Root page table: %s\n" , root_static ? "Static" : "Dynamic" ); |
448 | |
449 | #if !__ARM64_TWO_LEVEL_PMAP__ |
450 | for(unsigned int i=0; i<TTE_PGENTRIES; i++) { |
451 | pmap_paddr_t cur; |
452 | boolean_t cur_ro; |
453 | uintptr_t start,end; |
454 | uint64_t rosz = 0, rwsz = 0; |
455 | |
456 | if ((cpu_tte[i] & ARM_TTE_VALID) == 0) |
457 | continue; |
458 | |
459 | cur = cpu_tte[i] & ARM_TTE_TABLE_MASK; |
460 | start = (uint64_t)i << ARM_TT_L1_SHIFT; |
461 | start = start + kva_base; |
462 | end = start + (ARM_TT_L1_SIZE - 1); |
463 | cur_ro = cur >= robegin && cur < roend; |
464 | |
465 | printf("0x%08x_%08x-0x%08x_%08x %s\n" , |
466 | (uint32_t)(start >> 32),(uint32_t)start, |
467 | (uint32_t)(end >> 32),(uint32_t)end, |
468 | cur_ro ? "Static " : "Dynamic" ); |
469 | |
470 | dump_kva_l2(start, (tt_entry_t*)phystokv(cur), 1, &rosz, &rwsz); |
471 | tot_rosz += rosz; |
472 | tot_rwsz += rwsz; |
473 | } |
474 | #else |
475 | dump_kva_l2(kva_base, cpu_tte, 0, &tot_rosz, &tot_rwsz); |
476 | #endif /* !_ARM64_TWO_LEVEL_PMAP__ */ |
477 | |
478 | printf("L2 Address space mapped: Static %lluMB Dynamic %lluMB Total %lluMB\n" , |
479 | tot_rosz >> 20, |
480 | tot_rwsz >> 20, |
481 | (tot_rosz >> 20) + (tot_rwsz >> 20)); |
482 | |
483 | ro_ptpages = (int)((ropage_next - (vm_offset_t)&ropagetable_begin) >> ARM_PGSHIFT); |
484 | rw_ptpages = (int)(lowGlo.lgStaticSize >> ARM_PGSHIFT); |
485 | printf("Pages used: static %d dynamic %d\n" , ro_ptpages, rw_ptpages); |
486 | } |
487 | |
488 | #endif /* DEBUG */ |
489 | |
490 | #if __ARM_KERNEL_PROTECT__ |
491 | /* |
492 | * arm_vm_map: |
493 | * root_ttp: The kernel virtual address for the root of the target page tables |
494 | * vaddr: The target virtual address |
495 | * pte: A page table entry value (may be ARM_PTE_EMPTY) |
496 | * |
497 | * This function installs pte at vaddr in root_ttp. Any page table pages needed |
498 | * to install pte will be allocated by this function. |
499 | */ |
500 | static void |
501 | arm_vm_map(tt_entry_t * root_ttp, vm_offset_t vaddr, pt_entry_t pte) |
502 | { |
503 | vm_offset_t ptpage = 0; |
504 | tt_entry_t * ttp = root_ttp; |
505 | |
506 | #if !__ARM64_TWO_LEVEL_PMAP__ |
507 | tt_entry_t * l1_ttep = NULL; |
508 | tt_entry_t l1_tte = 0; |
509 | #endif |
510 | |
511 | tt_entry_t * l2_ttep = NULL; |
512 | tt_entry_t l2_tte = 0; |
513 | pt_entry_t * ptep = NULL; |
514 | pt_entry_t cpte = 0; |
515 | |
516 | /* |
517 | * Walk the target page table to find the PTE for the given virtual |
518 | * address. Allocate any page table pages needed to do this. |
519 | */ |
520 | #if !__ARM64_TWO_LEVEL_PMAP__ |
521 | l1_ttep = ttp + ((vaddr & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT); |
522 | l1_tte = *l1_ttep; |
523 | |
524 | if (l1_tte == ARM_TTE_EMPTY) { |
525 | ptpage = alloc_ptpage(TRUE); |
526 | bzero((void *)ptpage, ARM_PGBYTES); |
527 | l1_tte = kvtophys(ptpage); |
528 | l1_tte &= ARM_TTE_TABLE_MASK; |
529 | l1_tte |= ARM_TTE_VALID | ARM_TTE_TYPE_TABLE; |
530 | *l1_ttep = l1_tte; |
531 | ptpage = 0; |
532 | } |
533 | |
534 | ttp = (tt_entry_t *)phystokv(l1_tte & ARM_TTE_TABLE_MASK); |
535 | #endif |
536 | |
537 | l2_ttep = ttp + ((vaddr & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT); |
538 | l2_tte = *l2_ttep; |
539 | |
540 | if (l2_tte == ARM_TTE_EMPTY) { |
541 | ptpage = alloc_ptpage(TRUE); |
542 | bzero((void *)ptpage, ARM_PGBYTES); |
543 | l2_tte = kvtophys(ptpage); |
544 | l2_tte &= ARM_TTE_TABLE_MASK; |
545 | l2_tte |= ARM_TTE_VALID | ARM_TTE_TYPE_TABLE; |
546 | *l2_ttep = l2_tte; |
547 | ptpage = 0; |
548 | } |
549 | |
550 | ttp = (tt_entry_t *)phystokv(l2_tte & ARM_TTE_TABLE_MASK); |
551 | |
552 | ptep = ttp + ((vaddr & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT); |
553 | cpte = *ptep; |
554 | |
555 | /* |
556 | * If the existing PTE is not empty, then we are replacing a valid |
557 | * mapping. |
558 | */ |
559 | if (cpte != ARM_PTE_EMPTY) { |
560 | panic("%s: cpte=%#llx is not empty, " |
561 | "vaddr=%#lx, pte=%#llx" , |
562 | __FUNCTION__, cpte, |
563 | vaddr, pte); |
564 | } |
565 | |
566 | *ptep = pte; |
567 | } |
568 | |
569 | /* |
570 | * arm_vm_kernel_el0_map: |
571 | * vaddr: The target virtual address |
572 | * pte: A page table entry value (may be ARM_PTE_EMPTY) |
573 | * |
574 | * This function installs pte at vaddr for the EL0 kernel mappings. |
575 | */ |
576 | static void |
577 | arm_vm_kernel_el0_map(vm_offset_t vaddr, pt_entry_t pte) |
578 | { |
579 | /* Calculate where vaddr will be in the EL1 kernel page tables. */ |
580 | vm_offset_t kernel_pmap_vaddr = vaddr - ((ARM_TT_ROOT_INDEX_MASK + ARM_TT_ROOT_SIZE) / 2ULL); |
581 | arm_vm_map(cpu_tte, kernel_pmap_vaddr, pte); |
582 | } |
583 | |
584 | /* |
585 | * arm_vm_kernel_el1_map: |
586 | * vaddr: The target virtual address |
587 | * pte: A page table entry value (may be ARM_PTE_EMPTY) |
588 | * |
589 | * This function installs pte at vaddr for the EL1 kernel mappings. |
590 | */ |
591 | static void |
592 | arm_vm_kernel_el1_map(vm_offset_t vaddr, pt_entry_t pte) { |
593 | arm_vm_map(cpu_tte, vaddr, pte); |
594 | } |
595 | |
596 | /* |
597 | * arm_vm_kernel_pte: |
598 | * vaddr: The target virtual address |
599 | * |
600 | * This function returns the PTE value for the given vaddr from the kernel page |
601 | * tables. If the region has been been block mapped, we return what an |
602 | * equivalent PTE value would be (as regards permissions and flags). We also |
603 | * remove the HINT bit (as we are not necessarily creating contiguous mappings. |
604 | */ |
605 | static pt_entry_t |
606 | arm_vm_kernel_pte(vm_offset_t vaddr) |
607 | { |
608 | tt_entry_t * ttp = cpu_tte; |
609 | tt_entry_t * ttep = NULL; |
610 | tt_entry_t tte = 0; |
611 | pt_entry_t * ptep = NULL; |
612 | pt_entry_t pte = 0; |
613 | |
614 | #if !__ARM64_TWO_LEVEL_PMAP__ |
615 | ttep = ttp + ((vaddr & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT); |
616 | tte = *ttep; |
617 | |
618 | assert(tte & ARM_TTE_VALID); |
619 | |
620 | if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) { |
621 | /* This is a block mapping; return the equivalent PTE value. */ |
622 | pte = (pt_entry_t)(tte & ~ARM_TTE_TYPE_MASK); |
623 | pte |= ARM_PTE_TYPE_VALID; |
624 | pte |= vaddr & ((ARM_TT_L1_SIZE - 1) & ARM_PTE_PAGE_MASK); |
625 | pte &= ~ARM_PTE_HINT_MASK; |
626 | return pte; |
627 | } |
628 | |
629 | ttp = (tt_entry_t *)phystokv(tte & ARM_TTE_TABLE_MASK); |
630 | #endif |
631 | ttep = ttp + ((vaddr & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT); |
632 | tte = *ttep; |
633 | |
634 | assert(tte & ARM_TTE_VALID); |
635 | |
636 | if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) { |
637 | /* This is a block mapping; return the equivalent PTE value. */ |
638 | pte = (pt_entry_t)(tte & ~ARM_TTE_TYPE_MASK); |
639 | pte |= ARM_PTE_TYPE_VALID; |
640 | pte |= vaddr & ((ARM_TT_L2_SIZE - 1) & ARM_PTE_PAGE_MASK); |
641 | pte &= ~ARM_PTE_HINT_MASK; |
642 | return pte; |
643 | } |
644 | |
645 | ttp = (tt_entry_t *)phystokv(tte & ARM_TTE_TABLE_MASK); |
646 | |
647 | ptep = ttp + ((vaddr & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT); |
648 | pte = *ptep; |
649 | pte &= ~ARM_PTE_HINT_MASK; |
650 | return pte; |
651 | } |
652 | |
653 | /* |
654 | * arm_vm_prepare_kernel_el0_mappings: |
655 | * alloc_only: Indicates if PTE values should be copied from the EL1 kernel |
656 | * mappings. |
657 | * |
658 | * This function expands the kernel page tables to support the EL0 kernel |
659 | * mappings, and conditionally installs the PTE values for the EL0 kernel |
660 | * mappings (if alloc_only is false). |
661 | */ |
662 | static void |
663 | arm_vm_prepare_kernel_el0_mappings(bool alloc_only) |
664 | { |
665 | pt_entry_t pte = 0; |
666 | vm_offset_t start = ((vm_offset_t)&ExceptionVectorsBase) & ~PAGE_MASK; |
667 | vm_offset_t end = (((vm_offset_t)&ExceptionVectorsEnd) + PAGE_MASK) & ~PAGE_MASK; |
668 | vm_offset_t cur = 0; |
669 | vm_offset_t cur_fixed = 0; |
670 | |
671 | /* Expand for/map the exceptions vectors in the EL0 kernel mappings. */ |
672 | for (cur = start, cur_fixed = ARM_KERNEL_PROTECT_EXCEPTION_START; cur < end; cur += ARM_PGBYTES, cur_fixed += ARM_PGBYTES) { |
673 | /* |
674 | * We map the exception vectors at a different address than that |
675 | * of the kernelcache to avoid sharing page table pages with the |
676 | * kernelcache (as this may cause issues with TLB caching of |
677 | * page table pages. |
678 | */ |
679 | if (!alloc_only) { |
680 | pte = arm_vm_kernel_pte(cur); |
681 | } |
682 | |
683 | arm_vm_kernel_el1_map(cur_fixed, pte); |
684 | arm_vm_kernel_el0_map(cur_fixed, pte); |
685 | } |
686 | |
687 | __builtin_arm_dmb(DMB_ISH); |
688 | __builtin_arm_isb(ISB_SY); |
689 | |
690 | if (!alloc_only) { |
691 | /* |
692 | * If we have created the alternate exception vector mappings, |
693 | * the boot CPU may now switch over to them. |
694 | */ |
695 | set_vbar_el1(ARM_KERNEL_PROTECT_EXCEPTION_START); |
696 | __builtin_arm_isb(ISB_SY); |
697 | } |
698 | } |
699 | |
700 | /* |
701 | * arm_vm_populate_kernel_el0_mappings: |
702 | * |
703 | * This function adds all required mappings to the EL0 kernel mappings. |
704 | */ |
705 | static void |
706 | arm_vm_populate_kernel_el0_mappings(void) |
707 | { |
708 | arm_vm_prepare_kernel_el0_mappings(FALSE); |
709 | } |
710 | |
711 | /* |
712 | * arm_vm_expand_kernel_el0_mappings: |
713 | * |
714 | * This function expands the kernel page tables to accomodate the EL0 kernel |
715 | * mappings. |
716 | */ |
717 | static void |
718 | arm_vm_expand_kernel_el0_mappings(void) |
719 | { |
720 | arm_vm_prepare_kernel_el0_mappings(TRUE); |
721 | } |
722 | #endif /* __ARM_KERNEL_PROTECT__ */ |
723 | |
724 | #if defined(KERNEL_INTEGRITY_KTRR) |
725 | extern void bootstrap_instructions; |
726 | |
727 | /* |
728 | * arm_replace_identity_map takes the V=P map that we construct in start.s |
729 | * and repurposes it in order to have it map only the page we need in order |
730 | * to turn on the MMU. This prevents us from running into issues where |
731 | * KTRR will cause us to fault on executable block mappings that cross the |
732 | * KTRR boundary. |
733 | */ |
734 | static void arm_replace_identity_map(boot_args * args) |
735 | { |
736 | vm_offset_t addr; |
737 | pmap_paddr_t paddr; |
738 | |
739 | #if !__ARM64_TWO_LEVEL_PMAP__ |
740 | pmap_paddr_t l1_ptp_phys = 0; |
741 | tt_entry_t *l1_ptp_virt = NULL; |
742 | tt_entry_t *tte1 = NULL; |
743 | #endif |
744 | pmap_paddr_t l2_ptp_phys = 0; |
745 | tt_entry_t *l2_ptp_virt = NULL; |
746 | tt_entry_t *tte2 = NULL; |
747 | pmap_paddr_t l3_ptp_phys = 0; |
748 | pt_entry_t *l3_ptp_virt = NULL; |
749 | pt_entry_t *ptep = NULL; |
750 | |
751 | addr = ((vm_offset_t)&bootstrap_instructions) & ~ARM_PGMASK; |
752 | paddr = kvtophys(addr); |
753 | |
754 | /* |
755 | * The V=P page tables (at the time this comment was written) start |
756 | * after the last bit of kernel data, and consist of 1 L1 page and 1 or |
757 | * more L2 pages. |
758 | * Grab references to those pages, and allocate an L3 page. |
759 | */ |
760 | l1_ptp_phys = args->topOfKernelData; |
761 | l1_ptp_virt = (tt_entry_t *)phystokv(l1_ptp_phys); |
762 | tte1 = &l1_ptp_virt[L1_TABLE_INDEX(paddr)]; |
763 | |
764 | l2_ptp_virt = L2_TABLE_VA(tte1); |
765 | l2_ptp_phys = (*tte1) & ARM_TTE_TABLE_MASK; |
766 | tte2 = &l2_ptp_virt[L2_TABLE_INDEX(paddr)]; |
767 | |
768 | l3_ptp_virt = (pt_entry_t *)alloc_ptpage(FALSE); |
769 | l3_ptp_phys = kvtophys((vm_offset_t)l3_ptp_virt); |
770 | ptep = &l3_ptp_virt[L3_TABLE_INDEX(paddr)]; |
771 | |
772 | /* |
773 | * Replace the large V=P mapping with a mapping that provides only the |
774 | * mappings needed to turn on the MMU. |
775 | */ |
776 | |
777 | bzero(l1_ptp_virt, ARM_PGBYTES); |
778 | *tte1 = ARM_TTE_BOOT_TABLE | (l2_ptp_phys & ARM_TTE_TABLE_MASK); |
779 | |
780 | bzero(l2_ptp_virt, ARM_PGBYTES); |
781 | *tte2 = ARM_TTE_BOOT_TABLE | (l3_ptp_phys & ARM_TTE_TABLE_MASK); |
782 | |
783 | *ptep = (paddr & ARM_PTE_MASK) | |
784 | ARM_PTE_TYPE_VALID | |
785 | ARM_PTE_SH(SH_OUTER_MEMORY) | |
786 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) | |
787 | ARM_PTE_AF | |
788 | ARM_PTE_AP(AP_RONA) | |
789 | ARM_PTE_NX; |
790 | } |
791 | #endif /* defined(KERNEL_INTEGRITY_KTRR)*/ |
792 | |
793 | tt_entry_t *arm_kva_to_tte(vm_offset_t); |
794 | |
795 | tt_entry_t * |
796 | arm_kva_to_tte(vm_offset_t va) |
797 | { |
798 | #if __ARM64_TWO_LEVEL_PMAP__ |
799 | tt_entry_t *tte2; |
800 | tte2 = cpu_tte + L2_TABLE_INDEX(va); |
801 | #else |
802 | tt_entry_t *tte1, *tte2; |
803 | tte1 = cpu_tte + L1_TABLE_INDEX(va); |
804 | tte2 = L2_TABLE_VA(tte1) + L2_TABLE_INDEX(va); |
805 | #endif |
806 | return tte2; |
807 | } |
808 | |
809 | |
810 | /* |
811 | * arm_vm_page_granular_helper updates protections at the L3 level. It will (if |
812 | * neccessary) allocate a page for the L3 table and update the corresponding L2 |
813 | * entry. Then, it will iterate over the L3 table, updating protections as necessary. |
814 | * This expects to be invoked on a L2 entry or sub L2 entry granularity, so this should |
815 | * not be invoked from a context that does not do L2 iteration separately (basically, |
816 | * don't call this except from arm_vm_page_granular_prot). |
817 | * |
818 | * bool force_page_granule: true: will force page level mappings for this entry |
819 | * false: will try to use block level mappings |
820 | */ |
821 | |
822 | static void |
823 | arm_vm_page_granular_helper(vm_offset_t start, vm_offset_t _end, vm_offset_t va, pmap_paddr_t pa_offset, |
824 | int pte_prot_APX, int pte_prot_XN, bool force_page_granule, |
825 | pt_entry_t **deferred_pte, pt_entry_t *deferred_ptmp) |
826 | { |
827 | if (va & ARM_TT_L2_OFFMASK) { /* ragged edge hanging over a ARM_TT_L2_SIZE boundary */ |
828 | tt_entry_t *tte2; |
829 | tt_entry_t tmplate; |
830 | pmap_paddr_t pa; |
831 | pt_entry_t *ppte, *recursive_pte = NULL, ptmp, recursive_ptmp = 0; |
832 | addr64_t ppte_phys; |
833 | unsigned i; |
834 | |
835 | va &= ~ARM_TT_L2_OFFMASK; |
836 | pa = va - gVirtBase + gPhysBase - pa_offset; |
837 | |
838 | if (pa >= real_avail_end) |
839 | return; |
840 | |
841 | tte2 = arm_kva_to_tte(va); |
842 | |
843 | assert(_end >= va); |
844 | tmplate = *tte2; |
845 | |
846 | if (ARM_TTE_TYPE_TABLE == (tmplate & ARM_TTE_TYPE_MASK)) { |
847 | /* pick up the existing page table. */ |
848 | ppte = (pt_entry_t *)phystokv((tmplate & ARM_TTE_TABLE_MASK)); |
849 | } else { |
850 | // TTE must be reincarnated with page level mappings. |
851 | ppte = (pt_entry_t*)alloc_ptpage(pa_offset == 0); |
852 | bzero(ppte, ARM_PGBYTES); |
853 | ppte_phys = kvtophys((vm_offset_t)ppte); |
854 | |
855 | *tte2 = pa_to_tte(ppte_phys) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID; |
856 | } |
857 | |
858 | vm_offset_t len = _end - va; |
859 | if ((pa + len) > real_avail_end) |
860 | _end -= (pa + len - real_avail_end); |
861 | assert((start - gVirtBase + gPhysBase - pa_offset) >= gPhysBase); |
862 | |
863 | /* Round up to the nearest PAGE_SIZE boundary when creating mappings: |
864 | * PAGE_SIZE may be a multiple of ARM_PGBYTES, and we don't want to leave |
865 | * a ragged non-PAGE_SIZE-aligned edge. */ |
866 | vm_offset_t rounded_end = round_page(_end); |
867 | /* Apply the desired protections to the specified page range */ |
868 | for (i = 0; i <= (ARM_TT_L3_INDEX_MASK>>ARM_TT_L3_SHIFT); i++) { |
869 | if ((start <= va) && (va < rounded_end)) { |
870 | |
871 | ptmp = pa | ARM_PTE_AF | ARM_PTE_SH(SH_OUTER_MEMORY) | ARM_PTE_TYPE; |
872 | ptmp = ptmp | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT); |
873 | ptmp = ptmp | ARM_PTE_AP(pte_prot_APX); |
874 | ptmp = ptmp | ARM_PTE_NX; |
875 | #if __ARM_KERNEL_PROTECT__ |
876 | ptmp = ptmp | ARM_PTE_NG; |
877 | #endif /* __ARM_KERNEL_PROTECT__ */ |
878 | |
879 | if (pte_prot_XN) { |
880 | ptmp = ptmp | ARM_PTE_PNX; |
881 | } |
882 | |
883 | /* |
884 | * If we can, apply the contiguous hint to this range. The hint is |
885 | * applicable if the current address falls within a hint-sized range that will |
886 | * be fully covered by this mapping request. |
887 | */ |
888 | if ((va >= round_up_pte_hint_address(start)) && (round_up_pte_hint_address(va + 1) <= _end) && |
889 | !force_page_granule && use_contiguous_hint) { |
890 | assert((va & ((1 << ARM_PTE_HINT_ADDR_SHIFT) - 1)) == ((pa & ((1 << ARM_PTE_HINT_ADDR_SHIFT) - 1)))); |
891 | ptmp |= ARM_PTE_HINT; |
892 | } |
893 | /* |
894 | * Do not change the contiguous bit on an active mapping. Even in a single-threaded |
895 | * environment, it's possible for prefetch to produce a TLB conflict by trying to pull in |
896 | * a hint-sized entry on top of one or more existing page-sized entries. It's also useful |
897 | * to make sure we're not trying to unhint a sub-range of a larger hinted range, which |
898 | * could produce a later TLB conflict. |
899 | */ |
900 | assert(!kva_active || (ppte[i] == ARM_PTE_TYPE_FAULT) || ((ppte[i] & ARM_PTE_HINT) == (ptmp & ARM_PTE_HINT))); |
901 | |
902 | /* |
903 | * If we reach an entry that maps the current pte page, delay updating it until the very end. |
904 | * Otherwise we might end up making the PTE page read-only, leading to a fault later on in |
905 | * this function if we manage to outrun the TLB. This can happen on KTRR-enabled devices when |
906 | * marking segDATACONST read-only. Mappings for this region may straddle a PT page boundary, |
907 | * so we must also defer assignment of the following PTE. We will assume that if the region |
908 | * were to require one or more full L3 pages, it would instead use L2 blocks where possible, |
909 | * therefore only requiring at most one L3 page at the beginning and one at the end. |
910 | */ |
911 | if (kva_active && ((pt_entry_t*)(phystokv(pa)) == ppte)) { |
912 | assert(recursive_pte == NULL); |
913 | assert(!force_page_granule); |
914 | recursive_pte = &ppte[i]; |
915 | recursive_ptmp = ptmp; |
916 | } else if ((deferred_pte != NULL) && (&ppte[i] == &recursive_pte[1])) { |
917 | assert(*deferred_pte == NULL); |
918 | assert(deferred_ptmp != NULL); |
919 | *deferred_pte = &ppte[i]; |
920 | *deferred_ptmp = ptmp; |
921 | } else { |
922 | ppte[i] = ptmp; |
923 | } |
924 | } |
925 | |
926 | va += ARM_PGBYTES; |
927 | pa += ARM_PGBYTES; |
928 | } |
929 | if (recursive_pte != NULL) |
930 | *recursive_pte = recursive_ptmp; |
931 | } |
932 | } |
933 | |
934 | /* |
935 | * arm_vm_page_granular_prot updates protections by iterating over the L2 entries and |
936 | * changing them. If a particular chunk necessitates L3 entries (for reasons of |
937 | * alignment or length, or an explicit request that the entry be fully expanded), we |
938 | * hand off to arm_vm_page_granular_helper to deal with the L3 chunk of the logic. |
939 | */ |
940 | static void |
941 | arm_vm_page_granular_prot(vm_offset_t start, unsigned long size, pmap_paddr_t pa_offset, |
942 | int tte_prot_XN, int pte_prot_APX, int pte_prot_XN, |
943 | bool force_page_granule) |
944 | { |
945 | pt_entry_t *deferred_pte = NULL, deferred_ptmp = 0; |
946 | vm_offset_t _end = start + size; |
947 | vm_offset_t align_start = (start + ARM_TT_L2_OFFMASK) & ~ARM_TT_L2_OFFMASK; |
948 | |
949 | if (size == 0x0UL) |
950 | return; |
951 | |
952 | if (align_start > _end) { |
953 | arm_vm_page_granular_helper(start, _end, start, pa_offset, pte_prot_APX, pte_prot_XN, force_page_granule, NULL, NULL); |
954 | return; |
955 | } |
956 | |
957 | arm_vm_page_granular_helper(start, align_start, start, pa_offset, pte_prot_APX, pte_prot_XN, force_page_granule, &deferred_pte, &deferred_ptmp); |
958 | |
959 | while ((_end - align_start) >= ARM_TT_L2_SIZE) { |
960 | if (force_page_granule) |
961 | arm_vm_page_granular_helper(align_start, align_start+ARM_TT_L2_SIZE, align_start + 1, pa_offset, |
962 | pte_prot_APX, pte_prot_XN, force_page_granule, NULL, NULL); |
963 | else { |
964 | pmap_paddr_t pa = align_start - gVirtBase + gPhysBase - pa_offset; |
965 | assert((pa & ARM_TT_L2_OFFMASK) == 0); |
966 | tt_entry_t *tte2; |
967 | tt_entry_t tmplate; |
968 | |
969 | tte2 = arm_kva_to_tte(align_start); |
970 | |
971 | if ((pa >= gPhysBase) && (pa < real_avail_end)) { |
972 | tmplate = (pa & ARM_TTE_BLOCK_L2_MASK) | ARM_TTE_TYPE_BLOCK |
973 | | ARM_TTE_VALID | ARM_TTE_BLOCK_AF | ARM_TTE_BLOCK_NX |
974 | | ARM_TTE_BLOCK_AP(pte_prot_APX) | ARM_TTE_BLOCK_SH(SH_OUTER_MEMORY) |
975 | | ARM_TTE_BLOCK_ATTRINDX(CACHE_ATTRINDX_WRITEBACK); |
976 | |
977 | #if __ARM_KERNEL_PROTECT__ |
978 | tmplate = tmplate | ARM_TTE_BLOCK_NG; |
979 | #endif /* __ARM_KERNEL_PROTECT__ */ |
980 | if (tte_prot_XN) |
981 | tmplate = tmplate | ARM_TTE_BLOCK_PNX; |
982 | |
983 | *tte2 = tmplate; |
984 | } |
985 | } |
986 | align_start += ARM_TT_L2_SIZE; |
987 | } |
988 | |
989 | if (align_start < _end) |
990 | arm_vm_page_granular_helper(align_start, _end, _end, pa_offset, pte_prot_APX, pte_prot_XN, force_page_granule, &deferred_pte, &deferred_ptmp); |
991 | |
992 | if (deferred_pte != NULL) |
993 | *deferred_pte = deferred_ptmp; |
994 | } |
995 | |
996 | static inline void |
997 | arm_vm_page_granular_RNX(vm_offset_t start, unsigned long size, bool force_page_granule) |
998 | { |
999 | arm_vm_page_granular_prot(start, size, 0, 1, AP_RONA, 1, force_page_granule); |
1000 | } |
1001 | |
1002 | static inline void |
1003 | arm_vm_page_granular_ROX(vm_offset_t start, unsigned long size, bool force_page_granule) |
1004 | { |
1005 | arm_vm_page_granular_prot(start, size, 0, 0, AP_RONA, 0, force_page_granule); |
1006 | } |
1007 | |
1008 | static inline void |
1009 | arm_vm_page_granular_RWNX(vm_offset_t start, unsigned long size, bool force_page_granule) |
1010 | { |
1011 | arm_vm_page_granular_prot(start, size, 0, 1, AP_RWNA, 1, force_page_granule); |
1012 | } |
1013 | |
1014 | static inline void |
1015 | arm_vm_page_granular_RWX(vm_offset_t start, unsigned long size, bool force_page_granule) |
1016 | { |
1017 | arm_vm_page_granular_prot(start, size, 0, 0, AP_RWNA, 0, force_page_granule); |
1018 | } |
1019 | |
1020 | /* used in the chosen/memory-map node, populated by iBoot. */ |
1021 | typedef struct MemoryMapFileInfo { |
1022 | vm_offset_t paddr; |
1023 | size_t length; |
1024 | } MemoryMapFileInfo; |
1025 | |
1026 | |
1027 | void |
1028 | arm_vm_prot_init(boot_args * args) |
1029 | { |
1030 | |
1031 | segLOWESTTEXT = UINT64_MAX; |
1032 | if (segSizePRELINKTEXT && (segPRELINKTEXTB < segLOWESTTEXT)) segLOWESTTEXT = segPRELINKTEXTB; |
1033 | assert(segSizeTEXT); |
1034 | if (segTEXTB < segLOWESTTEXT) segLOWESTTEXT = segTEXTB; |
1035 | assert(segLOWESTTEXT < UINT64_MAX); |
1036 | |
1037 | segEXTRADATA = segLOWESTTEXT; |
1038 | segSizeEXTRADATA = 0; |
1039 | |
1040 | DTEntry memory_map; |
1041 | MemoryMapFileInfo *trustCacheRange; |
1042 | unsigned int trustCacheRangeSize; |
1043 | int err; |
1044 | |
1045 | err = DTLookupEntry(NULL, "chosen/memory-map" , &memory_map); |
1046 | assert(err == kSuccess); |
1047 | |
1048 | err = DTGetProperty(memory_map, "TrustCache" , (void**)&trustCacheRange, &trustCacheRangeSize); |
1049 | if (err == kSuccess) { |
1050 | assert(trustCacheRangeSize == sizeof(MemoryMapFileInfo)); |
1051 | |
1052 | segEXTRADATA = phystokv(trustCacheRange->paddr); |
1053 | segSizeEXTRADATA = trustCacheRange->length; |
1054 | |
1055 | arm_vm_page_granular_RNX(segEXTRADATA, segSizeEXTRADATA, FALSE); |
1056 | } |
1057 | |
1058 | /* Map coalesced kext TEXT segment RWNX for now */ |
1059 | arm_vm_page_granular_RWNX(segPRELINKTEXTB, segSizePRELINKTEXT, FALSE); // Refined in OSKext::readPrelinkedExtensions |
1060 | |
1061 | /* Map coalesced kext DATA_CONST segment RWNX (could be empty) */ |
1062 | arm_vm_page_granular_RWNX(segPLKDATACONSTB, segSizePLKDATACONST, FALSE); // Refined in OSKext::readPrelinkedExtensions |
1063 | |
1064 | /* Map coalesced kext TEXT_EXEC segment RWX (could be empty) */ |
1065 | arm_vm_page_granular_ROX(segPLKTEXTEXECB, segSizePLKTEXTEXEC, FALSE); // Refined in OSKext::readPrelinkedExtensions |
1066 | |
1067 | /* if new segments not present, set space between PRELINK_TEXT and xnu TEXT to RWNX |
1068 | * otherwise we no longer expect any space between the coalesced kext read only segments and xnu rosegments |
1069 | */ |
1070 | if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC) { |
1071 | if (segSizePRELINKTEXT) |
1072 | arm_vm_page_granular_RWNX(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT), FALSE); |
1073 | } else { |
1074 | /* |
1075 | * If we have the new segments, we should still protect the gap between kext |
1076 | * read-only pages and kernel read-only pages, in the event that this gap |
1077 | * exists. |
1078 | */ |
1079 | if ((segPLKDATACONSTB + segSizePLKDATACONST) < segTEXTB) { |
1080 | arm_vm_page_granular_RWNX(segPLKDATACONSTB + segSizePLKDATACONST, segTEXTB - (segPLKDATACONSTB + segSizePLKDATACONST), FALSE); |
1081 | } |
1082 | } |
1083 | |
1084 | /* |
1085 | * Protection on kernel text is loose here to allow shenanigans early on. These |
1086 | * protections are tightened in arm_vm_prot_finalize(). This is necessary because |
1087 | * we currently patch LowResetVectorBase in cpu.c. |
1088 | * |
1089 | * TEXT segment contains mach headers and other non-executable data. This will become RONX later. |
1090 | */ |
1091 | arm_vm_page_granular_RNX(segTEXTB, segSizeTEXT, FALSE); |
1092 | |
1093 | /* Can DATACONST start out and stay RNX? |
1094 | * NO, stuff in this segment gets modified during startup (viz. mac_policy_init()/mac_policy_list) |
1095 | * Make RNX in prot_finalize |
1096 | */ |
1097 | arm_vm_page_granular_RWNX(segDATACONSTB, segSizeDATACONST, FALSE); |
1098 | |
1099 | /* TEXTEXEC contains read only executable code: becomes ROX in prot_finalize */ |
1100 | arm_vm_page_granular_RWX(segTEXTEXECB, segSizeTEXTEXEC, FALSE); |
1101 | |
1102 | |
1103 | /* DATA segment will remain RWNX */ |
1104 | arm_vm_page_granular_RWNX(segDATAB, segSizeDATA, FALSE); |
1105 | |
1106 | arm_vm_page_granular_RWNX(segBOOTDATAB, segSizeBOOTDATA, TRUE); |
1107 | arm_vm_page_granular_RNX((vm_offset_t)&intstack_low_guard, PAGE_MAX_SIZE, TRUE); |
1108 | arm_vm_page_granular_RNX((vm_offset_t)&intstack_high_guard, PAGE_MAX_SIZE, TRUE); |
1109 | arm_vm_page_granular_RNX((vm_offset_t)&excepstack_high_guard, PAGE_MAX_SIZE, TRUE); |
1110 | |
1111 | arm_vm_page_granular_ROX(segKLDB, segSizeKLD, FALSE); |
1112 | arm_vm_page_granular_RWNX(segLINKB, segSizeLINK, FALSE); |
1113 | arm_vm_page_granular_RWNX(segPLKLINKEDITB, segSizePLKLINKEDIT, FALSE); // Coalesced kext LINKEDIT segment |
1114 | arm_vm_page_granular_ROX(segLASTB, segSizeLAST, FALSE); // __LAST may be empty, but we cannot assume this |
1115 | |
1116 | arm_vm_page_granular_RWNX(segPRELINKDATAB, segSizePRELINKDATA, FALSE); // Prelink __DATA for kexts (RW data) |
1117 | |
1118 | if (segSizePLKLLVMCOV > 0) |
1119 | arm_vm_page_granular_RWNX(segPLKLLVMCOVB, segSizePLKLLVMCOV, FALSE); // LLVM code coverage data |
1120 | |
1121 | arm_vm_page_granular_RWNX(segPRELINKINFOB, segSizePRELINKINFO, FALSE); /* PreLinkInfoDictionary */ |
1122 | |
1123 | arm_vm_page_granular_RNX(phystokv(args->topOfKernelData), BOOTSTRAP_TABLE_SIZE, FALSE); // Boot page tables; they should not be mutable. |
1124 | } |
1125 | |
1126 | /* |
1127 | * return < 0 for a < b |
1128 | * 0 for a == b |
1129 | * > 0 for a > b |
1130 | */ |
1131 | typedef int (*cmpfunc_t)(const void *a, const void *b); |
1132 | |
1133 | extern void |
1134 | qsort(void *a, size_t n, size_t es, cmpfunc_t cmp); |
1135 | |
1136 | static int |
1137 | cmp_ptov_entries(const void *a, const void *b) |
1138 | { |
1139 | const ptov_table_entry *entry_a = a; |
1140 | const ptov_table_entry *entry_b = b; |
1141 | // Sort in descending order of segment length |
1142 | if (entry_a->len < entry_b->len) |
1143 | return 1; |
1144 | else if (entry_a->len > entry_b->len) |
1145 | return -1; |
1146 | else |
1147 | return 0; |
1148 | } |
1149 | |
1150 | SECURITY_READ_ONLY_LATE(static unsigned int) ptov_index = 0; |
1151 | |
1152 | #define ROUND_TWIG(addr) (((addr) + ARM_TT_TWIG_OFFMASK) & ~(ARM_TT_TWIG_OFFMASK)) |
1153 | |
1154 | static void |
1155 | arm_vm_physmap_slide(ptov_table_entry *temp_ptov_table, vm_map_address_t physmap_base, vm_map_address_t orig_va, vm_size_t len, int pte_prot_APX, boolean_t force_page_granule) |
1156 | { |
1157 | pmap_paddr_t pa_offset; |
1158 | |
1159 | assert(ptov_index < PTOV_TABLE_SIZE); |
1160 | assert((orig_va & ARM_PGMASK) == 0); |
1161 | temp_ptov_table[ptov_index].pa = orig_va - gVirtBase + gPhysBase; |
1162 | if (ptov_index == 0) |
1163 | temp_ptov_table[ptov_index].va = physmap_base; |
1164 | else |
1165 | temp_ptov_table[ptov_index].va = temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len; |
1166 | if (!force_page_granule) { |
1167 | vm_map_address_t orig_offset = temp_ptov_table[ptov_index].pa & ARM_TT_TWIG_OFFMASK; |
1168 | vm_map_address_t new_offset = temp_ptov_table[ptov_index].va & ARM_TT_TWIG_OFFMASK; |
1169 | if (new_offset < orig_offset) |
1170 | temp_ptov_table[ptov_index].va += (orig_offset - new_offset); |
1171 | else if (new_offset > orig_offset) |
1172 | temp_ptov_table[ptov_index].va = ROUND_TWIG(temp_ptov_table[ptov_index].va) + orig_offset; |
1173 | } |
1174 | assert((temp_ptov_table[ptov_index].va & ARM_PGMASK) == 0); |
1175 | temp_ptov_table[ptov_index].len = round_page(len); |
1176 | pa_offset = temp_ptov_table[ptov_index].va - orig_va; |
1177 | arm_vm_page_granular_prot(temp_ptov_table[ptov_index].va, temp_ptov_table[ptov_index].len, pa_offset, 1, pte_prot_APX, 1, force_page_granule); |
1178 | ++ptov_index; |
1179 | } |
1180 | |
1181 | |
1182 | static void |
1183 | arm_vm_physmap_init(boot_args *args, vm_map_address_t physmap_base, vm_map_address_t dynamic_memory_begin __unused) |
1184 | { |
1185 | ptov_table_entry temp_ptov_table[PTOV_TABLE_SIZE]; |
1186 | bzero(temp_ptov_table, sizeof(temp_ptov_table)); |
1187 | |
1188 | // Will be handed back to VM layer through ml_static_mfree() in arm_vm_prot_finalize() |
1189 | arm_vm_physmap_slide(temp_ptov_table, physmap_base, gVirtBase, segEXTRADATA - gVirtBase, AP_RWNA, FALSE); |
1190 | |
1191 | arm_vm_page_granular_RWNX(end_kern, phystokv(args->topOfKernelData) - end_kern, FALSE); /* Device Tree, RAM Disk (if present), bootArgs */ |
1192 | |
1193 | arm_vm_physmap_slide(temp_ptov_table, physmap_base, (args->topOfKernelData + BOOTSTRAP_TABLE_SIZE - gPhysBase + gVirtBase), |
1194 | real_avail_end - (args->topOfKernelData + BOOTSTRAP_TABLE_SIZE), AP_RWNA, FALSE); // rest of physmem |
1195 | |
1196 | assert((temp_ptov_table[ptov_index - 1].va + temp_ptov_table[ptov_index - 1].len) <= dynamic_memory_begin); |
1197 | |
1198 | // Sort in descending order of segment length. LUT traversal is linear, so largest (most likely used) |
1199 | // segments should be placed earliest in the table to optimize lookup performance. |
1200 | qsort(temp_ptov_table, PTOV_TABLE_SIZE, sizeof(temp_ptov_table[0]), cmp_ptov_entries); |
1201 | |
1202 | memcpy(ptov_table, temp_ptov_table, sizeof(ptov_table)); |
1203 | } |
1204 | |
1205 | |
1206 | void |
1207 | arm_vm_prot_finalize(boot_args * args __unused) |
1208 | { |
1209 | /* |
1210 | * At this point, we are far enough along in the boot process that it will be |
1211 | * safe to free up all of the memory preceeding the kernel. It may in fact |
1212 | * be safe to do this earlier. |
1213 | * |
1214 | * This keeps the memory in the V-to-P mapping, but advertises it to the VM |
1215 | * as usable. |
1216 | */ |
1217 | |
1218 | /* |
1219 | * if old style PRELINK segment exists, free memory before it, and after it before XNU text |
1220 | * otherwise we're dealing with a new style kernel cache, so we should just free the |
1221 | * memory before PRELINK_TEXT segment, since the rest of the KEXT read only data segments |
1222 | * should be immediately followed by XNU's TEXT segment |
1223 | */ |
1224 | |
1225 | ml_static_mfree(phystokv(gPhysBase), segEXTRADATA - gVirtBase); |
1226 | |
1227 | /* |
1228 | * KTRR support means we will be mucking with these pages and trying to |
1229 | * protect them; we cannot free the pages to the VM if we do this. |
1230 | */ |
1231 | if (!segSizePLKDATACONST && !segSizePLKTEXTEXEC && segSizePRELINKTEXT) { |
1232 | /* If new segments not present, PRELINK_TEXT is not dynamically sized, free DRAM between it and xnu TEXT */ |
1233 | ml_static_mfree(segPRELINKTEXTB + segSizePRELINKTEXT, segTEXTB - (segPRELINKTEXTB + segSizePRELINKTEXT)); |
1234 | } |
1235 | |
1236 | /* |
1237 | * LowResetVectorBase patching should be done by now, so tighten executable |
1238 | * protections. |
1239 | */ |
1240 | arm_vm_page_granular_ROX(segTEXTEXECB, segSizeTEXTEXEC, FALSE); |
1241 | |
1242 | /* tighten permissions on kext read only data and code */ |
1243 | if (segSizePLKDATACONST && segSizePLKTEXTEXEC) { |
1244 | arm_vm_page_granular_RNX(segPRELINKTEXTB, segSizePRELINKTEXT, FALSE); |
1245 | arm_vm_page_granular_ROX(segPLKTEXTEXECB, segSizePLKTEXTEXEC, FALSE); |
1246 | arm_vm_page_granular_RNX(segPLKDATACONSTB, segSizePLKDATACONST, FALSE); |
1247 | } |
1248 | |
1249 | cpu_stack_alloc(&BootCpuData); |
1250 | arm64_replace_bootstack(&BootCpuData); |
1251 | ml_static_mfree(phystokv(segBOOTDATAB - gVirtBase + gPhysBase), segSizeBOOTDATA); |
1252 | |
1253 | #if __ARM_KERNEL_PROTECT__ |
1254 | arm_vm_populate_kernel_el0_mappings(); |
1255 | #endif /* __ARM_KERNEL_PROTECT__ */ |
1256 | |
1257 | |
1258 | #if defined(KERNEL_INTEGRITY_KTRR) |
1259 | /* |
1260 | * __LAST,__pinst should no longer be executable. |
1261 | */ |
1262 | arm_vm_page_granular_RNX(segLASTB, segSizeLAST, FALSE); |
1263 | |
1264 | /* |
1265 | * Must wait until all other region permissions are set before locking down DATA_CONST |
1266 | * as the kernel static page tables live in DATA_CONST on KTRR enabled systems |
1267 | * and will become immutable. |
1268 | */ |
1269 | #endif |
1270 | |
1271 | arm_vm_page_granular_RNX(segDATACONSTB, segSizeDATACONST, FALSE); |
1272 | |
1273 | #ifndef __ARM_L1_PTW__ |
1274 | FlushPoC_Dcache(); |
1275 | #endif |
1276 | __builtin_arm_dsb(DSB_ISH); |
1277 | flush_mmu_tlb(); |
1278 | } |
1279 | |
1280 | #define TBI_USER 0x1 |
1281 | #define TBI_KERNEL 0x2 |
1282 | |
1283 | boolean_t user_tbi = TRUE; |
1284 | |
1285 | /* |
1286 | * TBI (top-byte ignore) is an ARMv8 feature for ignoring the top 8 bits of |
1287 | * address accesses. It can be enabled separately for TTBR0 (user) and |
1288 | * TTBR1 (kernel). We enable it by default for user only, but allow both |
1289 | * to be controlled by the 'tbi' boot-arg. |
1290 | */ |
1291 | static void |
1292 | set_tbi(void) |
1293 | { |
1294 | #if !__ARM_KERNEL_PROTECT__ |
1295 | /* If we are not built with __ARM_KERNEL_PROTECT__, TBI can be turned |
1296 | * off with a boot-arg. |
1297 | */ |
1298 | uint64_t old_tcr, new_tcr; |
1299 | int tbi = 0; |
1300 | |
1301 | if (PE_parse_boot_argn("tbi" , &tbi, sizeof(tbi))) |
1302 | user_tbi = ((tbi & TBI_USER) == TBI_USER); |
1303 | old_tcr = new_tcr = get_tcr(); |
1304 | new_tcr |= (user_tbi) ? TCR_TBI0_TOPBYTE_IGNORED : 0; |
1305 | new_tcr |= (tbi & TBI_KERNEL) ? TCR_TBI1_TOPBYTE_IGNORED : 0; |
1306 | |
1307 | if (old_tcr != new_tcr) { |
1308 | set_tcr(new_tcr); |
1309 | sysreg_restore.tcr_el1 = new_tcr; |
1310 | } |
1311 | #endif /* !__ARM_KERNEL_PROTECT__ */ |
1312 | } |
1313 | |
1314 | #define ARM64_PHYSMAP_SLIDE_RANGE (1ULL << 30) // 1 GB |
1315 | #define ARM64_PHYSMAP_SLIDE_MASK (ARM64_PHYSMAP_SLIDE_RANGE - 1) |
1316 | |
1317 | void |
1318 | arm_vm_init(uint64_t memory_size, boot_args * args) |
1319 | { |
1320 | #if !__ARM64_TWO_LEVEL_PMAP__ |
1321 | vm_map_address_t va_l1, va_l1_end; |
1322 | tt_entry_t *cpu_l1_tte; |
1323 | #else |
1324 | /* |
1325 | * If we are using two level page tables, rather than the |
1326 | * 3 level page tables that xnu defaults to for ARM64, |
1327 | * then a great deal of the code in this path becomes |
1328 | * redundant. As a result, most of the logic having to |
1329 | * do with L1 pages will be excluded from such |
1330 | * configurations in this function. |
1331 | */ |
1332 | #endif |
1333 | vm_map_address_t va_l2, va_l2_end; |
1334 | tt_entry_t *cpu_l2_tte; |
1335 | pmap_paddr_t boot_ttep; |
1336 | tt_entry_t *boot_tte; |
1337 | uint64_t mem_segments; |
1338 | vm_offset_t ptpage_vaddr; |
1339 | vm_map_address_t dynamic_memory_begin; |
1340 | vm_map_address_t physmap_base; |
1341 | |
1342 | |
1343 | /* |
1344 | * Get the virtual and physical memory base from boot_args. |
1345 | */ |
1346 | gVirtBase = args->virtBase; |
1347 | gPhysBase = args->physBase; |
1348 | gPhysSize = args->memSize; |
1349 | mem_size = args->memSize; |
1350 | if ((memory_size != 0) && (mem_size > memory_size)) |
1351 | mem_size = memory_size; |
1352 | if (mem_size >= ((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 4)) |
1353 | panic("Unsupported memory configuration %lx\n" , mem_size); |
1354 | |
1355 | physmap_base = phystokv(args->topOfKernelData) + BOOTSTRAP_TABLE_SIZE; |
1356 | |
1357 | // Slide the physical aperture to a random page-aligned location within the slide range |
1358 | uint64_t physmap_slide = early_random() & ARM64_PHYSMAP_SLIDE_MASK & ~((uint64_t)PAGE_MASK); |
1359 | assert(physmap_slide < ARM64_PHYSMAP_SLIDE_RANGE); |
1360 | |
1361 | physmap_base += physmap_slide; |
1362 | |
1363 | static_memory_end = physmap_base + mem_size + (PTOV_TABLE_SIZE * ARM_TT_TWIG_SIZE); // worst possible case for block alignment |
1364 | #if KASAN |
1365 | /* add the KASAN stolen memory to the physmap */ |
1366 | dynamic_memory_begin = static_memory_end + (shadow_ptop - shadow_pbase); |
1367 | #else |
1368 | dynamic_memory_begin = static_memory_end; |
1369 | #endif |
1370 | if (dynamic_memory_begin > VM_MAX_KERNEL_ADDRESS) |
1371 | panic("Unsupported memory configuration %lx\n" , mem_size); |
1372 | |
1373 | boot_ttep = args->topOfKernelData; |
1374 | boot_tte = (tt_entry_t *) phystokv(boot_ttep); |
1375 | |
1376 | #if DEVELOPMENT || DEBUG |
1377 | /* Sanity check - assert that BOOTSTRAP_TABLE_SIZE is sufficiently-large to |
1378 | * hold our bootstrap mappings for any possible slide */ |
1379 | size_t bytes_mapped = dynamic_memory_begin - gVirtBase; |
1380 | size_t l1_entries = 1 + ((bytes_mapped + ARM_TT_L1_SIZE - 1) / ARM_TT_L1_SIZE); |
1381 | /* 1 L1 each for V=P and KVA, plus 1 page for each L2 */ |
1382 | size_t pages_used = 2 * (l1_entries + 1); |
1383 | if (pages_used > BOOTSTRAP_TABLE_SIZE) { |
1384 | panic("BOOTSTRAP_TABLE_SIZE too small for memory config\n" ); |
1385 | } |
1386 | #endif |
1387 | |
1388 | /* |
1389 | * TTBR0 L1, TTBR0 L2 - 1:1 bootstrap mapping. |
1390 | * TTBR1 L1, TTBR1 L2 - kernel mapping |
1391 | */ |
1392 | avail_start = boot_ttep + BOOTSTRAP_TABLE_SIZE; |
1393 | |
1394 | #if defined(KERNEL_INTEGRITY_KTRR) |
1395 | arm_replace_identity_map(args); |
1396 | #endif |
1397 | |
1398 | /* Initialize invalid tte page */ |
1399 | invalid_tte = (tt_entry_t *)alloc_ptpage(TRUE); |
1400 | invalid_ttep = kvtophys((vm_offset_t)invalid_tte); |
1401 | bzero(invalid_tte, ARM_PGBYTES); |
1402 | |
1403 | /* |
1404 | * Initialize l1 page table page |
1405 | */ |
1406 | #if __ARM64_TWO_LEVEL_PMAP__ |
1407 | /* |
1408 | * If we're using a two level page table, we still need to |
1409 | * set the cpu_ttep to avail_start, as this will be the root |
1410 | * of our page table regardless of how many levels we are |
1411 | * using. |
1412 | */ |
1413 | #endif |
1414 | cpu_tte = (tt_entry_t *)alloc_ptpage(TRUE); |
1415 | cpu_ttep = kvtophys((vm_offset_t)cpu_tte); |
1416 | bzero(cpu_tte, ARM_PGBYTES); |
1417 | avail_end = gPhysBase + mem_size; |
1418 | |
1419 | #if KASAN |
1420 | real_avail_end = avail_end + (shadow_ptop - shadow_pbase); |
1421 | #else |
1422 | real_avail_end = avail_end; |
1423 | #endif |
1424 | |
1425 | /* |
1426 | * Initialize l1 and l2 page table pages : |
1427 | * map physical memory at the kernel base virtual address |
1428 | * cover the kernel dynamic address range section |
1429 | * |
1430 | * the so called physical aperture should be statically mapped |
1431 | */ |
1432 | #if !__ARM64_TWO_LEVEL_PMAP__ |
1433 | va_l1 = gVirtBase; |
1434 | va_l1_end = dynamic_memory_begin; |
1435 | cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT); |
1436 | |
1437 | while (va_l1 < va_l1_end) { |
1438 | if (*cpu_l1_tte == ARM_TTE_EMPTY) { |
1439 | /* Allocate a page and setup L1 Table TTE in L1 */ |
1440 | ptpage_vaddr = alloc_ptpage(TRUE); |
1441 | *cpu_l1_tte = (kvtophys(ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID; |
1442 | bzero((void *)ptpage_vaddr, ARM_PGBYTES); |
1443 | } |
1444 | |
1445 | if ((va_l1 + ARM_TT_L1_SIZE) < va_l1) { |
1446 | /* If this is the last L1 entry, it must cover the last mapping. */ |
1447 | break; |
1448 | } |
1449 | |
1450 | va_l1 += ARM_TT_L1_SIZE; |
1451 | cpu_l1_tte++; |
1452 | } |
1453 | #endif |
1454 | |
1455 | #if __ARM_KERNEL_PROTECT__ |
1456 | /* Expand the page tables to prepare for the EL0 mappings. */ |
1457 | arm_vm_expand_kernel_el0_mappings(); |
1458 | #endif /* __ARM_KERNEL_PROTECT__ */ |
1459 | |
1460 | /* |
1461 | * Now retrieve addresses for end, edata, and etext from MACH-O headers |
1462 | */ |
1463 | segPRELINKTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_TEXT" , &segSizePRELINKTEXT); |
1464 | segPLKDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_DATA_CONST" , &segSizePLKDATACONST); |
1465 | segPLKTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_TEXT_EXEC" , &segSizePLKTEXTEXEC); |
1466 | segTEXTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT" , &segSizeTEXT); |
1467 | segDATACONSTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA_CONST" , &segSizeDATACONST); |
1468 | segTEXTEXECB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__TEXT_EXEC" , &segSizeTEXTEXEC); |
1469 | segDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__DATA" , &segSizeDATA); |
1470 | |
1471 | segBOOTDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__BOOTDATA" , &segSizeBOOTDATA); |
1472 | segLINKB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LINKEDIT" , &segSizeLINK); |
1473 | segKLDB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__KLD" , &segSizeKLD); |
1474 | segPRELINKDATAB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_DATA" , &segSizePRELINKDATA); |
1475 | segPRELINKINFOB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PRELINK_INFO" , &segSizePRELINKINFO); |
1476 | segPLKLLVMCOVB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LLVM_COV" , &segSizePLKLLVMCOV); |
1477 | segPLKLINKEDITB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__PLK_LINKEDIT" , &segSizePLKLINKEDIT); |
1478 | segLASTB = (vm_offset_t) getsegdatafromheader(&_mh_execute_header, "__LAST" , &segSizeLAST); |
1479 | |
1480 | (void) PE_parse_boot_argn("use_contiguous_hint" , &use_contiguous_hint, sizeof(use_contiguous_hint)); |
1481 | assert(segSizePRELINKTEXT < 0x03000000); /* 23355738 */ |
1482 | |
1483 | /* if one of the new segments is present, the other one better be as well */ |
1484 | if (segSizePLKDATACONST || segSizePLKTEXTEXEC) { |
1485 | assert(segSizePLKDATACONST && segSizePLKTEXTEXEC); |
1486 | } |
1487 | |
1488 | etext = (vm_offset_t) segTEXTB + segSizeTEXT; |
1489 | sdata = (vm_offset_t) segDATAB; |
1490 | edata = (vm_offset_t) segDATAB + segSizeDATA; |
1491 | end_kern = round_page(getlastaddr()); /* Force end to next page */ |
1492 | |
1493 | vm_set_page_size(); |
1494 | |
1495 | vm_kernel_base = segTEXTB; |
1496 | vm_kernel_top = (vm_offset_t) &last_kernel_symbol; |
1497 | vm_kext_base = segPRELINKTEXTB; |
1498 | vm_kext_top = vm_kext_base + segSizePRELINKTEXT; |
1499 | |
1500 | vm_prelink_stext = segPRELINKTEXTB; |
1501 | if (!segSizePLKTEXTEXEC && !segSizePLKDATACONST) { |
1502 | vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT; |
1503 | } else { |
1504 | vm_prelink_etext = segPRELINKTEXTB + segSizePRELINKTEXT + segSizePLKDATACONST + segSizePLKTEXTEXEC; |
1505 | } |
1506 | vm_prelink_sinfo = segPRELINKINFOB; |
1507 | vm_prelink_einfo = segPRELINKINFOB + segSizePRELINKINFO; |
1508 | vm_slinkedit = segLINKB; |
1509 | vm_elinkedit = segLINKB + segSizeLINK; |
1510 | |
1511 | vm_prelink_sdata = segPRELINKDATAB; |
1512 | vm_prelink_edata = segPRELINKDATAB + segSizePRELINKDATA; |
1513 | |
1514 | arm_vm_prot_init(args); |
1515 | |
1516 | |
1517 | /* |
1518 | * Initialize the page tables for the low globals: |
1519 | * cover this address range: |
1520 | * LOW_GLOBAL_BASE_ADDRESS + 2MB |
1521 | */ |
1522 | #if __ARM64_TWO_LEVEL_PMAP__ |
1523 | va_l2 = LOW_GLOBAL_BASE_ADDRESS; |
1524 | cpu_l2_tte = cpu_tte + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT); |
1525 | #else |
1526 | va_l1 = va_l2 = LOW_GLOBAL_BASE_ADDRESS; |
1527 | cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT); |
1528 | cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT); |
1529 | #endif |
1530 | ptpage_vaddr = alloc_ptpage(TRUE); |
1531 | *cpu_l2_tte = (kvtophys(ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_TTE_TABLE_PXN | ARM_TTE_TABLE_XN; |
1532 | bzero((void *)ptpage_vaddr, ARM_PGBYTES); |
1533 | |
1534 | /* |
1535 | * Initialize l2 page table pages : |
1536 | * cover this address range: |
1537 | * KERNEL_DYNAMIC_ADDR - VM_MAX_KERNEL_ADDRESS |
1538 | */ |
1539 | #if !__ARM64_TWO_LEVEL_PMAP__ |
1540 | va_l1 = dynamic_memory_begin; |
1541 | va_l1_end = VM_MAX_KERNEL_ADDRESS; |
1542 | cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT); |
1543 | |
1544 | while (va_l1 < va_l1_end) { |
1545 | if (*cpu_l1_tte == ARM_TTE_EMPTY) { |
1546 | /* Allocate a page and setup L1 Table TTE in L1 */ |
1547 | ptpage_vaddr = alloc_ptpage(TRUE); |
1548 | *cpu_l1_tte = (kvtophys(ptpage_vaddr) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN; |
1549 | bzero((void *)ptpage_vaddr, ARM_PGBYTES); |
1550 | } |
1551 | |
1552 | if ((va_l1 + ARM_TT_L1_SIZE) < va_l1) { |
1553 | /* If this is the last L1 entry, it must cover the last mapping. */ |
1554 | break; |
1555 | } |
1556 | |
1557 | va_l1 += ARM_TT_L1_SIZE; |
1558 | cpu_l1_tte++; |
1559 | } |
1560 | #endif |
1561 | |
1562 | #if KASAN |
1563 | /* record the extent of the physmap */ |
1564 | physmap_vbase = physmap_base; |
1565 | physmap_vtop = static_memory_end; |
1566 | kasan_init(); |
1567 | #endif |
1568 | |
1569 | set_tbi(); |
1570 | set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK); |
1571 | |
1572 | arm_vm_physmap_init(args, physmap_base, dynamic_memory_begin); |
1573 | set_mmu_ttb_alternate(cpu_ttep & TTBR_BADDR_MASK); |
1574 | flush_mmu_tlb(); |
1575 | kva_active = TRUE; |
1576 | // global table pointers may need to be different due to physical aperture remapping |
1577 | cpu_tte = (tt_entry_t*)(phystokv(cpu_ttep)); |
1578 | invalid_tte = (tt_entry_t*)(phystokv(invalid_ttep)); |
1579 | |
1580 | sane_size = mem_size - (avail_start - gPhysBase); |
1581 | max_mem = mem_size; |
1582 | vm_kernel_slid_base = segLOWESTTEXT; |
1583 | vm_kernel_slid_top = vm_prelink_einfo; |
1584 | vm_kernel_slide = segTEXTB-VM_KERNEL_LINK_ADDRESS; |
1585 | vm_kernel_stext = segTEXTB; |
1586 | assert(segDATACONSTB == segTEXTB + segSizeTEXT); |
1587 | assert(segTEXTEXECB == segDATACONSTB + segSizeDATACONST); |
1588 | vm_kernel_etext = segTEXTB + segSizeTEXT + segSizeDATACONST + segSizeTEXTEXEC; |
1589 | |
1590 | dynamic_memory_begin = ROUND_TWIG(dynamic_memory_begin); |
1591 | pmap_bootstrap(dynamic_memory_begin); |
1592 | |
1593 | disable_preemption(); |
1594 | |
1595 | /* |
1596 | * Initialize l3 page table pages : |
1597 | * cover this address range: |
1598 | * 2MB + FrameBuffer size + 10MB for each 256MB segment |
1599 | */ |
1600 | |
1601 | mem_segments = (mem_size + 0x0FFFFFFF) >> 28; |
1602 | |
1603 | #if !__ARM64_TWO_LEVEL_PMAP__ |
1604 | va_l1 = dynamic_memory_begin; |
1605 | va_l1_end = va_l1 + ((2 + (mem_segments * 10)) << 20); |
1606 | va_l1_end += round_page(args->Video.v_height * args->Video.v_rowBytes); |
1607 | va_l1_end = (va_l1_end + 0x00000000007FFFFFULL) & 0xFFFFFFFFFF800000ULL; |
1608 | |
1609 | cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT); |
1610 | |
1611 | while (va_l1 < va_l1_end) { |
1612 | |
1613 | va_l2 = va_l1; |
1614 | |
1615 | if (((va_l1 & ~ARM_TT_L1_OFFMASK)+ARM_TT_L1_SIZE) < va_l1) { |
1616 | /* If this is the last L1 entry, it must cover the last mapping. */ |
1617 | va_l2_end = va_l1_end; |
1618 | } else { |
1619 | va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK)+ARM_TT_L1_SIZE, va_l1_end); |
1620 | } |
1621 | |
1622 | cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT); |
1623 | #else |
1624 | va_l2 = dynamic_memory_begin; |
1625 | va_l2_end = va_l2 + ((2 + (mem_segments * 10)) << 20); |
1626 | va_l2_end += round_page(args->Video.v_height * args->Video.v_rowBytes); |
1627 | va_l2_end = (va_l2_end + 0x00000000007FFFFFULL) & 0xFFFFFFFFFF800000ULL; |
1628 | cpu_l2_tte = cpu_tte + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT); |
1629 | #endif |
1630 | |
1631 | while (va_l2 < va_l2_end) { |
1632 | pt_entry_t * ptp; |
1633 | pmap_paddr_t ptp_phys; |
1634 | |
1635 | /* Allocate a page and setup L3 Table TTE in L2 */ |
1636 | ptp = (pt_entry_t *) alloc_ptpage(FALSE); |
1637 | ptp_phys = (pmap_paddr_t)kvtophys((vm_offset_t)ptp); |
1638 | |
1639 | pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE); |
1640 | |
1641 | *cpu_l2_tte = (pa_to_tte (ptp_phys)) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN; |
1642 | |
1643 | va_l2 += ARM_TT_L2_SIZE; |
1644 | cpu_l2_tte++; |
1645 | }; |
1646 | #if !__ARM64_TWO_LEVEL_PMAP__ |
1647 | va_l1 = va_l2_end; |
1648 | cpu_l1_tte++; |
1649 | } |
1650 | #endif |
1651 | |
1652 | /* |
1653 | * Initialize l3 page table pages : |
1654 | * cover this address range: |
1655 | * (VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VM_MAX_KERNEL_ADDRESS |
1656 | */ |
1657 | #if !__ARM64_TWO_LEVEL_PMAP__ |
1658 | va_l1 = VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK; |
1659 | va_l1_end = VM_MAX_KERNEL_ADDRESS; |
1660 | |
1661 | cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT); |
1662 | |
1663 | while (va_l1 < va_l1_end) { |
1664 | |
1665 | va_l2 = va_l1; |
1666 | |
1667 | if (((va_l1 & ~ARM_TT_L1_OFFMASK)+ARM_TT_L1_SIZE) < va_l1) { |
1668 | /* If this is the last L1 entry, it must cover the last mapping. */ |
1669 | va_l2_end = va_l1_end; |
1670 | } else { |
1671 | va_l2_end = MIN((va_l1 & ~ARM_TT_L1_OFFMASK)+ARM_TT_L1_SIZE, va_l1_end); |
1672 | } |
1673 | |
1674 | cpu_l2_tte = ((tt_entry_t *) phystokv(((*cpu_l1_tte) & ARM_TTE_TABLE_MASK))) + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT); |
1675 | #else |
1676 | va_l2 = VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK; |
1677 | va_l2_end = VM_MAX_KERNEL_ADDRESS; |
1678 | cpu_l2_tte = cpu_tte + ((va_l2 & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT); |
1679 | #endif |
1680 | |
1681 | while (va_l2 < va_l2_end) { |
1682 | pt_entry_t * ptp; |
1683 | pmap_paddr_t ptp_phys; |
1684 | |
1685 | /* Allocate a page and setup L3 Table TTE in L2 */ |
1686 | ptp = (pt_entry_t *) alloc_ptpage(FALSE); |
1687 | ptp_phys = (pmap_paddr_t)kvtophys((vm_offset_t)ptp); |
1688 | |
1689 | pmap_init_pte_page(kernel_pmap, ptp, va_l2, 3, TRUE); |
1690 | |
1691 | *cpu_l2_tte = (pa_to_tte (ptp_phys)) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN; |
1692 | |
1693 | va_l2 += ARM_TT_L2_SIZE; |
1694 | cpu_l2_tte++; |
1695 | }; |
1696 | #if !__ARM64_TWO_LEVEL_PMAP__ |
1697 | va_l1 = va_l2_end; |
1698 | cpu_l1_tte++; |
1699 | } |
1700 | #endif |
1701 | |
1702 | #if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__ |
1703 | /* |
1704 | * In this configuration, the bootstrap mappings (arm_vm_init) and |
1705 | * the heap mappings occupy separate L1 regions. Explicitly set up |
1706 | * the heap L1 allocations here. |
1707 | */ |
1708 | va_l1 = VM_MIN_KERNEL_ADDRESS & ~ARM_TT_L1_OFFMASK; |
1709 | cpu_l1_tte = cpu_tte + ((va_l1 & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT); |
1710 | |
1711 | while ((va_l1 >= (VM_MIN_KERNEL_ADDRESS & ~ARM_TT_L1_OFFMASK)) && (va_l1 < VM_MAX_KERNEL_ADDRESS)) { |
1712 | /* |
1713 | * If the L1 entry has not yet been allocated, allocate it |
1714 | * now and treat it as a heap table. |
1715 | */ |
1716 | if (*cpu_l1_tte == ARM_TTE_EMPTY) { |
1717 | tt_entry_t *new_tte = (tt_entry_t*)alloc_ptpage(FALSE); |
1718 | bzero(new_tte, ARM_PGBYTES); |
1719 | *cpu_l1_tte = (kvtophys((vm_offset_t)new_tte) & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID | ARM_DYNAMIC_TABLE_XN; |
1720 | } |
1721 | |
1722 | cpu_l1_tte++; |
1723 | va_l1 += ARM_TT_L1_SIZE; |
1724 | } |
1725 | #endif |
1726 | |
1727 | /* |
1728 | * Adjust avail_start so that the range that the VM owns |
1729 | * starts on a PAGE_SIZE aligned boundary. |
1730 | */ |
1731 | avail_start = (avail_start + PAGE_MASK) & ~PAGE_MASK; |
1732 | |
1733 | first_avail = avail_start; |
1734 | patch_low_glo_static_region(args->topOfKernelData, avail_start - args->topOfKernelData); |
1735 | enable_preemption(); |
1736 | } |
1737 | |
1738 | |