1 | /* |
2 | * Copyright (c) 2019-2022 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #pragma once |
30 | |
31 | #include <arm64/proc_reg.h> |
32 | #include <machine/atomic.h> |
33 | |
34 | #define tlbi_addr(x) ((((x) >> 12) & TLBI_ADDR_MASK) << TLBI_ADDR_SHIFT) |
35 | #define tlbi_asid(x) (((uintptr_t)(x) & TLBI_ASID_MASK) << TLBI_ASID_SHIFT) |
36 | |
37 | #if __ARM_KERNEL_PROTECT__ |
38 | /* |
39 | * __ARM_KERNEL_PROTECT__ adds two complications to TLB management: |
40 | * |
41 | * 1. As each pmap has two ASIDs, every TLB operation that targets an ASID must |
42 | * target both ASIDs for the pmap that owns the target ASID. |
43 | * |
44 | * 2. Any TLB operation targeting the kernel_pmap ASID (ASID 0) must target all |
45 | * ASIDs (as kernel_pmap mappings may be referenced while using an ASID that |
46 | * belongs to another pmap). We expect these routines to be called with the |
47 | * EL0 ASID for the target; not the EL1 ASID. |
48 | */ |
49 | #endif /* __ARM_KERNEL_PROTECT__ */ |
50 | |
51 | static inline void |
52 | sync_tlb_flush(void) |
53 | { |
54 | #if HAS_FEAT_XS |
55 | asm volatile ("dsb ishnxs" :::"memory" ); |
56 | #else |
57 | __builtin_arm_dsb(DSB_ISH); |
58 | #endif /* HAS_FEAT_XS */ |
59 | __builtin_arm_isb(ISB_SY); |
60 | } |
61 | |
62 | static inline void |
63 | sync_tlb_flush_local(void) |
64 | { |
65 | #if HAS_FEAT_XS |
66 | asm volatile ("dsb nshnxs" :::"memory" ); |
67 | #else |
68 | __builtin_arm_dsb(DSB_NSH); |
69 | #endif /* HAS_FEAT_XS */ |
70 | __builtin_arm_isb(ISB_SY); |
71 | } |
72 | |
73 | #if HAS_FEAT_XS |
74 | |
75 | static inline void |
76 | sync_tlb_flush_strong(void) |
77 | { |
78 | __builtin_arm_dsb(DSB_ISH); |
79 | __builtin_arm_isb(ISB_SY); |
80 | } |
81 | |
82 | #endif // |
83 | |
84 | |
85 | static inline void |
86 | arm64_sync_tlb(bool strong __unused) |
87 | { |
88 | sync_tlb_flush(); |
89 | } |
90 | |
91 | // flush_mmu_tlb: full TLB flush on all cores |
92 | static inline void |
93 | flush_mmu_tlb_async(void) |
94 | { |
95 | asm volatile ("tlbi vmalle1is" ); |
96 | } |
97 | |
98 | static inline void |
99 | flush_mmu_tlb(void) |
100 | { |
101 | flush_mmu_tlb_async(); |
102 | #if HAS_FEAT_XS |
103 | /* Full flush is always treated as "strong" when there is a HW-level distinction. */ |
104 | sync_tlb_flush_strong(); |
105 | #else |
106 | sync_tlb_flush(); |
107 | #endif /* HAS_FEAT_XS */ |
108 | } |
109 | |
110 | // flush_core_tlb: full TLB flush on local core only |
111 | static inline void |
112 | flush_core_tlb_async(void) |
113 | { |
114 | #if HAS_FEAT_XS |
115 | asm volatile ("tlbi vmalle1nxs" ); |
116 | #else |
117 | asm volatile ("tlbi vmalle1" ); |
118 | #endif /* HAS_FEAT_XS */ |
119 | } |
120 | |
121 | static inline void |
122 | flush_core_tlb(void) |
123 | { |
124 | flush_core_tlb_async(); |
125 | sync_tlb_flush_local(); |
126 | } |
127 | |
128 | // flush_mmu_tlb_allentries_async: flush entries that map VA range, all ASIDS, all cores |
129 | // start and end are in units of 4K pages. |
130 | static inline void |
131 | flush_mmu_tlb_allentries_async(uint64_t start, uint64_t end, uint64_t pmap_page_size, |
132 | bool last_level_only, bool strong __unused) |
133 | { |
134 | #if __ARM_16K_PG__ |
135 | if (pmap_page_size == 16384) { |
136 | start = start & ~0x3ULL; |
137 | |
138 | /* |
139 | * The code below is not necessarily correct. From an overview of |
140 | * the client code, the expected contract for TLB flushes is that |
141 | * we will expand from an "address, length" pair to "start address, |
142 | * end address" in the course of a TLB flush. This suggests that |
143 | * a flush for "X, X+4" is actually only asking for a flush of a |
144 | * single 16KB page. At the same time, we'd like to be prepared |
145 | * for bad inputs (X, X+3), so add 3 and then truncate the 4KB page |
146 | * number to a 16KB page boundary. This should deal correctly with |
147 | * unaligned inputs. |
148 | * |
149 | * If our expecations about client behavior are wrong however, this |
150 | * will lead to occasional TLB corruption on platforms with 16KB |
151 | * pages. |
152 | */ |
153 | end = (end + 0x3ULL) & ~0x3ULL; |
154 | } |
155 | #endif // __ARM_16K_PG__ |
156 | if (last_level_only) { |
157 | for (; start < end; start += (pmap_page_size / 4096)) { |
158 | #if HAS_FEAT_XS |
159 | if (__probable(!strong)) { |
160 | asm volatile ("tlbi vaale1isnxs, %0" : : "r" (start)); |
161 | } else |
162 | #endif /* HAS_FEAT_XS */ |
163 | { |
164 | asm volatile ("tlbi vaale1is, %0" : : "r" (start)); |
165 | } |
166 | } |
167 | } else { |
168 | for (; start < end; start += (pmap_page_size / 4096)) { |
169 | #if HAS_FEAT_XS |
170 | if (__probable(!strong)) { |
171 | asm volatile ("tlbi vaae1isnxs, %0" : : "r" (start)); |
172 | } else |
173 | #endif /* HAS_FEAT_XS */ |
174 | { |
175 | asm volatile ("tlbi vaae1is, %0" : : "r" (start)); |
176 | } |
177 | } |
178 | } |
179 | } |
180 | |
181 | static inline void |
182 | flush_mmu_tlb_allentries(uint64_t start, uint64_t end, uint64_t pmap_page_size, bool last_level_only, bool strong) |
183 | { |
184 | flush_mmu_tlb_allentries_async(start, end, pmap_page_size, last_level_only, strong); |
185 | arm64_sync_tlb(strong); |
186 | } |
187 | |
188 | // flush_mmu_tlb_entries: flush TLB entries that map a VA range and ASID, all cores |
189 | // start and end must have the ASID in the high 16 bits, with the VA in units of 4K in the lowest bits |
190 | // Will also flush global entries that match the VA range |
191 | static inline void |
192 | flush_mmu_tlb_entries_async(uint64_t start, uint64_t end, uint64_t pmap_page_size, |
193 | bool last_level_only, bool strong __unused) |
194 | { |
195 | #if __ARM_16K_PG__ |
196 | if (pmap_page_size == 16384) { |
197 | start = start & ~0x3ULL; |
198 | |
199 | /* |
200 | * The code below is not necessarily correct. From an overview of |
201 | * the client code, the expected contract for TLB flushes is that |
202 | * we will expand from an "address, length" pair to "start address, |
203 | * end address" in the course of a TLB flush. This suggests that |
204 | * a flush for "X, X+4" is actually only asking for a flush of a |
205 | * single 16KB page. At the same time, we'd like to be prepared |
206 | * for bad inputs (X, X+3), so add 3 and then truncate the 4KB page |
207 | * number to a 16KB page boundary. This should deal correctly with |
208 | * unaligned inputs. |
209 | * |
210 | * If our expecations about client behavior are wrong however, this |
211 | * will lead to occasional TLB corruption on platforms with 16KB |
212 | * pages. |
213 | */ |
214 | end = (end + 0x3ULL) & ~0x3ULL; |
215 | } |
216 | #endif // __ARM_16K_PG__ |
217 | #if __ARM_KERNEL_PROTECT__ |
218 | uint64_t asid = start >> TLBI_ASID_SHIFT; |
219 | /* |
220 | * If we are flushing ASID 0, this is a kernel operation. With this |
221 | * ASID scheme, this means we should flush all ASIDs. |
222 | */ |
223 | if (asid == 0) { |
224 | if (last_level_only) { |
225 | for (; start < end; start += (pmap_page_size / 4096)) { |
226 | asm volatile ("tlbi vaale1is, %0" : : "r" (start)); |
227 | } |
228 | } else { |
229 | for (; start < end; start += (pmap_page_size / 4096)) { |
230 | asm volatile ("tlbi vaae1is, %0" : : "r" (start)); |
231 | } |
232 | } |
233 | return; |
234 | } |
235 | start = start | (1ULL << TLBI_ASID_SHIFT); |
236 | end = end | (1ULL << TLBI_ASID_SHIFT); |
237 | if (last_level_only) { |
238 | for (; start < end; start += (pmap_page_size / 4096)) { |
239 | start = start & ~(1ULL << TLBI_ASID_SHIFT); |
240 | asm volatile ("tlbi vale1is, %0" : : "r" (start)); |
241 | start = start | (1ULL << TLBI_ASID_SHIFT); |
242 | asm volatile ("tlbi vale1is, %0" : : "r" (start)); |
243 | } |
244 | } else { |
245 | for (; start < end; start += (pmap_page_size / 4096)) { |
246 | start = start & ~(1ULL << TLBI_ASID_SHIFT); |
247 | asm volatile ("tlbi vae1is, %0" : : "r" (start)); |
248 | start = start | (1ULL << TLBI_ASID_SHIFT); |
249 | asm volatile ("tlbi vae1is, %0" : : "r" (start)); |
250 | } |
251 | } |
252 | #else |
253 | if (last_level_only) { |
254 | for (; start < end; start += (pmap_page_size / 4096)) { |
255 | #if HAS_FEAT_XS |
256 | if (__probable(!strong)) { |
257 | asm volatile ("tlbi vale1isnxs, %0" : : "r" (start)); |
258 | } else |
259 | #endif /* HAS_FEAT_XS */ |
260 | { |
261 | asm volatile ("tlbi vale1is, %0" : : "r" (start)); |
262 | } |
263 | } |
264 | } else { |
265 | for (; start < end; start += (pmap_page_size / 4096)) { |
266 | #if HAS_FEAT_XS |
267 | if (__probable(!strong)) { |
268 | asm volatile ("tlbi vae1isnxs, %0" : : "r" (start)); |
269 | } else |
270 | #endif /* HAS_FEAT_XS */ |
271 | { |
272 | asm volatile ("tlbi vae1is, %0" : : "r" (start)); |
273 | } |
274 | } |
275 | } |
276 | #endif /* __ARM_KERNEL_PROTECT__ */ |
277 | } |
278 | |
279 | static inline void |
280 | flush_mmu_tlb_entries(uint64_t start, uint64_t end, uint64_t pmap_page_size, bool last_level_only, bool strong) |
281 | { |
282 | flush_mmu_tlb_entries_async(start, end, pmap_page_size, last_level_only, strong); |
283 | arm64_sync_tlb(strong); |
284 | } |
285 | |
286 | // flush_mmu_tlb_asid: flush all entries that match an ASID, on all cores |
287 | // ASID must be in high 16 bits of argument |
288 | // Will not flush global entries |
289 | static inline void |
290 | flush_mmu_tlb_asid_async(uint64_t val, bool strong __unused) |
291 | { |
292 | #if __ARM_KERNEL_PROTECT__ |
293 | /* |
294 | * If we are flushing ASID 0, this is a kernel operation. With this |
295 | * ASID scheme, this means we should flush all ASIDs. |
296 | */ |
297 | uint64_t asid = val >> TLBI_ASID_SHIFT; |
298 | if (asid == 0) { |
299 | asm volatile ("tlbi vmalle1is" ); |
300 | return; |
301 | } |
302 | val = val & ~(1ULL << TLBI_ASID_SHIFT); |
303 | asm volatile ("tlbi aside1is, %0" : : "r" (val)); |
304 | val = val | (1ULL << TLBI_ASID_SHIFT); |
305 | #endif /* __ARM_KERNEL_PROTECT__ */ |
306 | #if HAS_FEAT_XS |
307 | if (__probable(!strong)) { |
308 | asm volatile ("tlbi aside1isnxs, %0" : : "r" (val)); |
309 | } else |
310 | #endif /* HAS_FEAT_XS */ |
311 | { |
312 | asm volatile ("tlbi aside1is, %0" : : "r" (val)); |
313 | } |
314 | } |
315 | |
316 | static inline void |
317 | flush_mmu_tlb_asid(uint64_t val, bool strong) |
318 | { |
319 | flush_mmu_tlb_asid_async(val, strong); |
320 | arm64_sync_tlb(strong); |
321 | } |
322 | |
323 | // flush_core_tlb_asid: flush all entries that match an ASID, local core only |
324 | // ASID must be in high 16 bits of argument |
325 | // Will not flush global entries |
326 | static inline void |
327 | flush_core_tlb_asid_async(uint64_t val) |
328 | { |
329 | #if __ARM_KERNEL_PROTECT__ |
330 | /* |
331 | * If we are flushing ASID 0, this is a kernel operation. With this |
332 | * ASID scheme, this means we should flush all ASIDs. |
333 | */ |
334 | uint64_t asid = val >> TLBI_ASID_SHIFT; |
335 | if (asid == 0) { |
336 | asm volatile ("tlbi vmalle1" ); |
337 | return; |
338 | } |
339 | val = val & ~(1ULL << TLBI_ASID_SHIFT); |
340 | asm volatile ("tlbi aside1, %0" : : "r" (val)); |
341 | val = val | (1ULL << TLBI_ASID_SHIFT); |
342 | #endif /* __ARM_KERNEL_PROTECT__ */ |
343 | #if HAS_FEAT_XS |
344 | asm volatile ("tlbi aside1nxs, %0" : : "r" (val)); |
345 | #else |
346 | asm volatile ("tlbi aside1, %0" : : "r" (val)); |
347 | #endif /* HAS_FEAT_XS */ |
348 | } |
349 | |
350 | static inline void |
351 | flush_core_tlb_asid(uint64_t val) |
352 | { |
353 | flush_core_tlb_asid_async(val); |
354 | sync_tlb_flush_local(); |
355 | } |
356 | |
357 | #if __ARM_RANGE_TLBI__ |
358 | #if __ARM_KERNEL_PROTECT__ |
359 | #error __ARM_RANGE_TLBI__ + __ARM_KERNEL_PROTECT__ is not currently supported |
360 | #endif |
361 | |
362 | #define ARM64_TLB_RANGE_MIN_PAGES 2 |
363 | #define ARM64_TLB_RANGE_MAX_PAGES (1ULL << 21) |
364 | #define rtlbi_addr(x, shift) (((x) >> (shift)) & RTLBI_ADDR_MASK) |
365 | #define rtlbi_scale(x) ((uint64_t)(x) << RTLBI_SCALE_SHIFT) |
366 | #define rtlbi_num(x) ((uint64_t)(x) << RTLBI_NUM_SHIFT) |
367 | |
368 | /** |
369 | * Given the number of pages to invalidate, generate the correct parameter to |
370 | * pass to any of the TLBI by range methods. |
371 | */ |
372 | static inline uint64_t |
373 | generate_rtlbi_param(ppnum_t npages, uint32_t asid, vm_offset_t va, uint64_t pmap_page_shift) |
374 | { |
375 | assert(npages > 1); |
376 | /** |
377 | * Per the armv8.4 RTLBI extension spec, the range encoded in the rtlbi register operand is defined by: |
378 | * BaseADDR <= VA < BaseADDR+((NUM+1)*2^(5*SCALE+1) * Translation_Granule_Size) |
379 | */ |
380 | unsigned order = (unsigned)(sizeof(npages) * 8) - (unsigned)__builtin_clz(npages - 1) - 1; |
381 | unsigned scale = ((order ? order : 1) - 1) / 5; |
382 | unsigned granule = 1 << ((5 * scale) + 1); |
383 | unsigned num = (((npages + granule - 1) & ~(granule - 1)) / granule) - 1; |
384 | return tlbi_asid(asid) | RTLBI_TG(pmap_page_shift) | rtlbi_scale(scale) | rtlbi_num(num) | rtlbi_addr(va, pmap_page_shift); |
385 | } |
386 | |
387 | // flush_mmu_tlb_range: flush TLB entries that map a VA range using a single instruction |
388 | // The argument should be encoded according to generate_rtlbi_param(). |
389 | // Follows the same ASID matching behavior as flush_mmu_tlb_entries() |
390 | static inline void |
391 | flush_mmu_tlb_range_async(uint64_t val, bool last_level_only, bool strong __unused) |
392 | { |
393 | if (last_level_only) { |
394 | #if HAS_FEAT_XS |
395 | if (__probable(!strong)) { |
396 | asm volatile ("tlbi rvale1isnxs, %0" : : "r" (val)); |
397 | } else |
398 | #endif /* HAS_FEAT_XS */ |
399 | { |
400 | asm volatile ("tlbi rvale1is, %0" : : "r" (val)); |
401 | } |
402 | } else { |
403 | #if HAS_FEAT_XS |
404 | if (__probable(!strong)) { |
405 | asm volatile ("tlbi rvae1isnxs, %0" : : "r" (val)); |
406 | } else |
407 | #endif /* HAS_FEAT_XS */ |
408 | { |
409 | asm volatile ("tlbi rvae1is, %0" : : "r" (val)); |
410 | } |
411 | } |
412 | } |
413 | |
414 | static inline void |
415 | flush_mmu_tlb_range(uint64_t val, bool last_level_only, bool strong) |
416 | { |
417 | flush_mmu_tlb_range_async(val, last_level_only, strong); |
418 | arm64_sync_tlb(strong); |
419 | } |
420 | |
421 | // flush_mmu_tlb_allrange: flush TLB entries that map a VA range using a single instruction |
422 | // The argument should be encoded according to generate_rtlbi_param(). |
423 | // Follows the same ASID matching behavior as flush_mmu_tlb_allentries() |
424 | static inline void |
425 | flush_mmu_tlb_allrange_async(uint64_t val, bool last_level_only, bool strong __unused) |
426 | { |
427 | if (last_level_only) { |
428 | #if HAS_FEAT_XS |
429 | if (__probable(!strong)) { |
430 | asm volatile ("tlbi rvaale1isnxs, %0" : : "r" (val)); |
431 | } else |
432 | #endif /* HAS_FEAT_XS */ |
433 | { |
434 | asm volatile ("tlbi rvaale1is, %0" : : "r" (val)); |
435 | } |
436 | } else { |
437 | #if HAS_FEAT_XS |
438 | if (__probable(!strong)) { |
439 | asm volatile ("tlbi rvaae1isnxs, %0" : : "r" (val)); |
440 | } else |
441 | #endif /* HAS_FEAT_XS */ |
442 | { |
443 | asm volatile ("tlbi rvaae1is, %0" : : "r" (val)); |
444 | } |
445 | } |
446 | } |
447 | |
448 | static inline void |
449 | flush_mmu_tlb_allrange(uint64_t val, bool last_level_only, bool strong) |
450 | { |
451 | flush_mmu_tlb_allrange_async(val, last_level_only, strong); |
452 | arm64_sync_tlb(strong); |
453 | } |
454 | |
455 | // flush_core_tlb_allrange: flush TLB entries that map a VA range using a single instruction, local core only |
456 | // The argument should be encoded according to generate_rtlbi_param(). |
457 | // Follows the same ASID matching behavior as flush_mmu_tlb_allentries() |
458 | static inline void |
459 | flush_core_tlb_allrange_async(uint64_t val) |
460 | { |
461 | #if HAS_FEAT_XS |
462 | asm volatile ("tlbi rvaae1nxs, %0" : : "r" (val)); |
463 | #else |
464 | asm volatile ("tlbi rvaae1, %0" : : "r" (val)); |
465 | #endif /* HAS_FEAT_XS */ |
466 | } |
467 | |
468 | static inline void |
469 | flush_core_tlb_allrange(uint64_t val) |
470 | { |
471 | flush_core_tlb_allrange_async(val); |
472 | sync_tlb_flush_local(); |
473 | } |
474 | |
475 | #endif // __ARM_RANGE_TLBI__ |
476 | |
477 | |