| 1 | /* |
| 2 | * Copyright (c) 2019-2022 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | #pragma once |
| 30 | |
| 31 | #include <arm64/proc_reg.h> |
| 32 | #include <machine/atomic.h> |
| 33 | |
| 34 | #define tlbi_addr(x) ((((x) >> 12) & TLBI_ADDR_MASK) << TLBI_ADDR_SHIFT) |
| 35 | #define tlbi_asid(x) (((uintptr_t)(x) & TLBI_ASID_MASK) << TLBI_ASID_SHIFT) |
| 36 | |
| 37 | #if __ARM_KERNEL_PROTECT__ |
| 38 | /* |
| 39 | * __ARM_KERNEL_PROTECT__ adds two complications to TLB management: |
| 40 | * |
| 41 | * 1. As each pmap has two ASIDs, every TLB operation that targets an ASID must |
| 42 | * target both ASIDs for the pmap that owns the target ASID. |
| 43 | * |
| 44 | * 2. Any TLB operation targeting the kernel_pmap ASID (ASID 0) must target all |
| 45 | * ASIDs (as kernel_pmap mappings may be referenced while using an ASID that |
| 46 | * belongs to another pmap). We expect these routines to be called with the |
| 47 | * EL0 ASID for the target; not the EL1 ASID. |
| 48 | */ |
| 49 | #endif /* __ARM_KERNEL_PROTECT__ */ |
| 50 | |
| 51 | static inline void |
| 52 | sync_tlb_flush(void) |
| 53 | { |
| 54 | #if HAS_FEAT_XS |
| 55 | asm volatile ("dsb ishnxs" :::"memory" ); |
| 56 | #else |
| 57 | __builtin_arm_dsb(DSB_ISH); |
| 58 | #endif /* HAS_FEAT_XS */ |
| 59 | __builtin_arm_isb(ISB_SY); |
| 60 | } |
| 61 | |
| 62 | static inline void |
| 63 | sync_tlb_flush_local(void) |
| 64 | { |
| 65 | #if HAS_FEAT_XS |
| 66 | asm volatile ("dsb nshnxs" :::"memory" ); |
| 67 | #else |
| 68 | __builtin_arm_dsb(DSB_NSH); |
| 69 | #endif /* HAS_FEAT_XS */ |
| 70 | __builtin_arm_isb(ISB_SY); |
| 71 | } |
| 72 | |
| 73 | #if HAS_FEAT_XS |
| 74 | |
| 75 | static inline void |
| 76 | sync_tlb_flush_strong(void) |
| 77 | { |
| 78 | __builtin_arm_dsb(DSB_ISH); |
| 79 | __builtin_arm_isb(ISB_SY); |
| 80 | } |
| 81 | |
| 82 | #endif // |
| 83 | |
| 84 | |
| 85 | static inline void |
| 86 | arm64_sync_tlb(bool strong __unused) |
| 87 | { |
| 88 | sync_tlb_flush(); |
| 89 | } |
| 90 | |
| 91 | // flush_mmu_tlb: full TLB flush on all cores |
| 92 | static inline void |
| 93 | flush_mmu_tlb_async(void) |
| 94 | { |
| 95 | asm volatile ("tlbi vmalle1is" ); |
| 96 | } |
| 97 | |
| 98 | static inline void |
| 99 | flush_mmu_tlb(void) |
| 100 | { |
| 101 | flush_mmu_tlb_async(); |
| 102 | #if HAS_FEAT_XS |
| 103 | /* Full flush is always treated as "strong" when there is a HW-level distinction. */ |
| 104 | sync_tlb_flush_strong(); |
| 105 | #else |
| 106 | sync_tlb_flush(); |
| 107 | #endif /* HAS_FEAT_XS */ |
| 108 | } |
| 109 | |
| 110 | // flush_core_tlb: full TLB flush on local core only |
| 111 | static inline void |
| 112 | flush_core_tlb_async(void) |
| 113 | { |
| 114 | #if HAS_FEAT_XS |
| 115 | asm volatile ("tlbi vmalle1nxs" ); |
| 116 | #else |
| 117 | asm volatile ("tlbi vmalle1" ); |
| 118 | #endif /* HAS_FEAT_XS */ |
| 119 | } |
| 120 | |
| 121 | static inline void |
| 122 | flush_core_tlb(void) |
| 123 | { |
| 124 | flush_core_tlb_async(); |
| 125 | sync_tlb_flush_local(); |
| 126 | } |
| 127 | |
| 128 | // flush_mmu_tlb_allentries_async: flush entries that map VA range, all ASIDS, all cores |
| 129 | // start and end are in units of 4K pages. |
| 130 | static inline void |
| 131 | flush_mmu_tlb_allentries_async(uint64_t start, uint64_t end, uint64_t pmap_page_size, |
| 132 | bool last_level_only, bool strong __unused) |
| 133 | { |
| 134 | #if __ARM_16K_PG__ |
| 135 | if (pmap_page_size == 16384) { |
| 136 | start = start & ~0x3ULL; |
| 137 | |
| 138 | /* |
| 139 | * The code below is not necessarily correct. From an overview of |
| 140 | * the client code, the expected contract for TLB flushes is that |
| 141 | * we will expand from an "address, length" pair to "start address, |
| 142 | * end address" in the course of a TLB flush. This suggests that |
| 143 | * a flush for "X, X+4" is actually only asking for a flush of a |
| 144 | * single 16KB page. At the same time, we'd like to be prepared |
| 145 | * for bad inputs (X, X+3), so add 3 and then truncate the 4KB page |
| 146 | * number to a 16KB page boundary. This should deal correctly with |
| 147 | * unaligned inputs. |
| 148 | * |
| 149 | * If our expecations about client behavior are wrong however, this |
| 150 | * will lead to occasional TLB corruption on platforms with 16KB |
| 151 | * pages. |
| 152 | */ |
| 153 | end = (end + 0x3ULL) & ~0x3ULL; |
| 154 | } |
| 155 | #endif // __ARM_16K_PG__ |
| 156 | if (last_level_only) { |
| 157 | for (; start < end; start += (pmap_page_size / 4096)) { |
| 158 | #if HAS_FEAT_XS |
| 159 | if (__probable(!strong)) { |
| 160 | asm volatile ("tlbi vaale1isnxs, %0" : : "r" (start)); |
| 161 | } else |
| 162 | #endif /* HAS_FEAT_XS */ |
| 163 | { |
| 164 | asm volatile ("tlbi vaale1is, %0" : : "r" (start)); |
| 165 | } |
| 166 | } |
| 167 | } else { |
| 168 | for (; start < end; start += (pmap_page_size / 4096)) { |
| 169 | #if HAS_FEAT_XS |
| 170 | if (__probable(!strong)) { |
| 171 | asm volatile ("tlbi vaae1isnxs, %0" : : "r" (start)); |
| 172 | } else |
| 173 | #endif /* HAS_FEAT_XS */ |
| 174 | { |
| 175 | asm volatile ("tlbi vaae1is, %0" : : "r" (start)); |
| 176 | } |
| 177 | } |
| 178 | } |
| 179 | } |
| 180 | |
| 181 | static inline void |
| 182 | flush_mmu_tlb_allentries(uint64_t start, uint64_t end, uint64_t pmap_page_size, bool last_level_only, bool strong) |
| 183 | { |
| 184 | flush_mmu_tlb_allentries_async(start, end, pmap_page_size, last_level_only, strong); |
| 185 | arm64_sync_tlb(strong); |
| 186 | } |
| 187 | |
| 188 | // flush_mmu_tlb_entries: flush TLB entries that map a VA range and ASID, all cores |
| 189 | // start and end must have the ASID in the high 16 bits, with the VA in units of 4K in the lowest bits |
| 190 | // Will also flush global entries that match the VA range |
| 191 | static inline void |
| 192 | flush_mmu_tlb_entries_async(uint64_t start, uint64_t end, uint64_t pmap_page_size, |
| 193 | bool last_level_only, bool strong __unused) |
| 194 | { |
| 195 | #if __ARM_16K_PG__ |
| 196 | if (pmap_page_size == 16384) { |
| 197 | start = start & ~0x3ULL; |
| 198 | |
| 199 | /* |
| 200 | * The code below is not necessarily correct. From an overview of |
| 201 | * the client code, the expected contract for TLB flushes is that |
| 202 | * we will expand from an "address, length" pair to "start address, |
| 203 | * end address" in the course of a TLB flush. This suggests that |
| 204 | * a flush for "X, X+4" is actually only asking for a flush of a |
| 205 | * single 16KB page. At the same time, we'd like to be prepared |
| 206 | * for bad inputs (X, X+3), so add 3 and then truncate the 4KB page |
| 207 | * number to a 16KB page boundary. This should deal correctly with |
| 208 | * unaligned inputs. |
| 209 | * |
| 210 | * If our expecations about client behavior are wrong however, this |
| 211 | * will lead to occasional TLB corruption on platforms with 16KB |
| 212 | * pages. |
| 213 | */ |
| 214 | end = (end + 0x3ULL) & ~0x3ULL; |
| 215 | } |
| 216 | #endif // __ARM_16K_PG__ |
| 217 | #if __ARM_KERNEL_PROTECT__ |
| 218 | uint64_t asid = start >> TLBI_ASID_SHIFT; |
| 219 | /* |
| 220 | * If we are flushing ASID 0, this is a kernel operation. With this |
| 221 | * ASID scheme, this means we should flush all ASIDs. |
| 222 | */ |
| 223 | if (asid == 0) { |
| 224 | if (last_level_only) { |
| 225 | for (; start < end; start += (pmap_page_size / 4096)) { |
| 226 | asm volatile ("tlbi vaale1is, %0" : : "r" (start)); |
| 227 | } |
| 228 | } else { |
| 229 | for (; start < end; start += (pmap_page_size / 4096)) { |
| 230 | asm volatile ("tlbi vaae1is, %0" : : "r" (start)); |
| 231 | } |
| 232 | } |
| 233 | return; |
| 234 | } |
| 235 | start = start | (1ULL << TLBI_ASID_SHIFT); |
| 236 | end = end | (1ULL << TLBI_ASID_SHIFT); |
| 237 | if (last_level_only) { |
| 238 | for (; start < end; start += (pmap_page_size / 4096)) { |
| 239 | start = start & ~(1ULL << TLBI_ASID_SHIFT); |
| 240 | asm volatile ("tlbi vale1is, %0" : : "r" (start)); |
| 241 | start = start | (1ULL << TLBI_ASID_SHIFT); |
| 242 | asm volatile ("tlbi vale1is, %0" : : "r" (start)); |
| 243 | } |
| 244 | } else { |
| 245 | for (; start < end; start += (pmap_page_size / 4096)) { |
| 246 | start = start & ~(1ULL << TLBI_ASID_SHIFT); |
| 247 | asm volatile ("tlbi vae1is, %0" : : "r" (start)); |
| 248 | start = start | (1ULL << TLBI_ASID_SHIFT); |
| 249 | asm volatile ("tlbi vae1is, %0" : : "r" (start)); |
| 250 | } |
| 251 | } |
| 252 | #else |
| 253 | if (last_level_only) { |
| 254 | for (; start < end; start += (pmap_page_size / 4096)) { |
| 255 | #if HAS_FEAT_XS |
| 256 | if (__probable(!strong)) { |
| 257 | asm volatile ("tlbi vale1isnxs, %0" : : "r" (start)); |
| 258 | } else |
| 259 | #endif /* HAS_FEAT_XS */ |
| 260 | { |
| 261 | asm volatile ("tlbi vale1is, %0" : : "r" (start)); |
| 262 | } |
| 263 | } |
| 264 | } else { |
| 265 | for (; start < end; start += (pmap_page_size / 4096)) { |
| 266 | #if HAS_FEAT_XS |
| 267 | if (__probable(!strong)) { |
| 268 | asm volatile ("tlbi vae1isnxs, %0" : : "r" (start)); |
| 269 | } else |
| 270 | #endif /* HAS_FEAT_XS */ |
| 271 | { |
| 272 | asm volatile ("tlbi vae1is, %0" : : "r" (start)); |
| 273 | } |
| 274 | } |
| 275 | } |
| 276 | #endif /* __ARM_KERNEL_PROTECT__ */ |
| 277 | } |
| 278 | |
| 279 | static inline void |
| 280 | flush_mmu_tlb_entries(uint64_t start, uint64_t end, uint64_t pmap_page_size, bool last_level_only, bool strong) |
| 281 | { |
| 282 | flush_mmu_tlb_entries_async(start, end, pmap_page_size, last_level_only, strong); |
| 283 | arm64_sync_tlb(strong); |
| 284 | } |
| 285 | |
| 286 | // flush_mmu_tlb_asid: flush all entries that match an ASID, on all cores |
| 287 | // ASID must be in high 16 bits of argument |
| 288 | // Will not flush global entries |
| 289 | static inline void |
| 290 | flush_mmu_tlb_asid_async(uint64_t val, bool strong __unused) |
| 291 | { |
| 292 | #if __ARM_KERNEL_PROTECT__ |
| 293 | /* |
| 294 | * If we are flushing ASID 0, this is a kernel operation. With this |
| 295 | * ASID scheme, this means we should flush all ASIDs. |
| 296 | */ |
| 297 | uint64_t asid = val >> TLBI_ASID_SHIFT; |
| 298 | if (asid == 0) { |
| 299 | asm volatile ("tlbi vmalle1is" ); |
| 300 | return; |
| 301 | } |
| 302 | val = val & ~(1ULL << TLBI_ASID_SHIFT); |
| 303 | asm volatile ("tlbi aside1is, %0" : : "r" (val)); |
| 304 | val = val | (1ULL << TLBI_ASID_SHIFT); |
| 305 | #endif /* __ARM_KERNEL_PROTECT__ */ |
| 306 | #if HAS_FEAT_XS |
| 307 | if (__probable(!strong)) { |
| 308 | asm volatile ("tlbi aside1isnxs, %0" : : "r" (val)); |
| 309 | } else |
| 310 | #endif /* HAS_FEAT_XS */ |
| 311 | { |
| 312 | asm volatile ("tlbi aside1is, %0" : : "r" (val)); |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | static inline void |
| 317 | flush_mmu_tlb_asid(uint64_t val, bool strong) |
| 318 | { |
| 319 | flush_mmu_tlb_asid_async(val, strong); |
| 320 | arm64_sync_tlb(strong); |
| 321 | } |
| 322 | |
| 323 | // flush_core_tlb_asid: flush all entries that match an ASID, local core only |
| 324 | // ASID must be in high 16 bits of argument |
| 325 | // Will not flush global entries |
| 326 | static inline void |
| 327 | flush_core_tlb_asid_async(uint64_t val) |
| 328 | { |
| 329 | #if __ARM_KERNEL_PROTECT__ |
| 330 | /* |
| 331 | * If we are flushing ASID 0, this is a kernel operation. With this |
| 332 | * ASID scheme, this means we should flush all ASIDs. |
| 333 | */ |
| 334 | uint64_t asid = val >> TLBI_ASID_SHIFT; |
| 335 | if (asid == 0) { |
| 336 | asm volatile ("tlbi vmalle1" ); |
| 337 | return; |
| 338 | } |
| 339 | val = val & ~(1ULL << TLBI_ASID_SHIFT); |
| 340 | asm volatile ("tlbi aside1, %0" : : "r" (val)); |
| 341 | val = val | (1ULL << TLBI_ASID_SHIFT); |
| 342 | #endif /* __ARM_KERNEL_PROTECT__ */ |
| 343 | #if HAS_FEAT_XS |
| 344 | asm volatile ("tlbi aside1nxs, %0" : : "r" (val)); |
| 345 | #else |
| 346 | asm volatile ("tlbi aside1, %0" : : "r" (val)); |
| 347 | #endif /* HAS_FEAT_XS */ |
| 348 | } |
| 349 | |
| 350 | static inline void |
| 351 | flush_core_tlb_asid(uint64_t val) |
| 352 | { |
| 353 | flush_core_tlb_asid_async(val); |
| 354 | sync_tlb_flush_local(); |
| 355 | } |
| 356 | |
| 357 | #if __ARM_RANGE_TLBI__ |
| 358 | #if __ARM_KERNEL_PROTECT__ |
| 359 | #error __ARM_RANGE_TLBI__ + __ARM_KERNEL_PROTECT__ is not currently supported |
| 360 | #endif |
| 361 | |
| 362 | #define ARM64_TLB_RANGE_MIN_PAGES 2 |
| 363 | #define ARM64_TLB_RANGE_MAX_PAGES (1ULL << 21) |
| 364 | #define rtlbi_addr(x, shift) (((x) >> (shift)) & RTLBI_ADDR_MASK) |
| 365 | #define rtlbi_scale(x) ((uint64_t)(x) << RTLBI_SCALE_SHIFT) |
| 366 | #define rtlbi_num(x) ((uint64_t)(x) << RTLBI_NUM_SHIFT) |
| 367 | |
| 368 | /** |
| 369 | * Given the number of pages to invalidate, generate the correct parameter to |
| 370 | * pass to any of the TLBI by range methods. |
| 371 | */ |
| 372 | static inline uint64_t |
| 373 | generate_rtlbi_param(ppnum_t npages, uint32_t asid, vm_offset_t va, uint64_t pmap_page_shift) |
| 374 | { |
| 375 | assert(npages > 1); |
| 376 | /** |
| 377 | * Per the armv8.4 RTLBI extension spec, the range encoded in the rtlbi register operand is defined by: |
| 378 | * BaseADDR <= VA < BaseADDR+((NUM+1)*2^(5*SCALE+1) * Translation_Granule_Size) |
| 379 | */ |
| 380 | unsigned order = (unsigned)(sizeof(npages) * 8) - (unsigned)__builtin_clz(npages - 1) - 1; |
| 381 | unsigned scale = ((order ? order : 1) - 1) / 5; |
| 382 | unsigned granule = 1 << ((5 * scale) + 1); |
| 383 | unsigned num = (((npages + granule - 1) & ~(granule - 1)) / granule) - 1; |
| 384 | return tlbi_asid(asid) | RTLBI_TG(pmap_page_shift) | rtlbi_scale(scale) | rtlbi_num(num) | rtlbi_addr(va, pmap_page_shift); |
| 385 | } |
| 386 | |
| 387 | // flush_mmu_tlb_range: flush TLB entries that map a VA range using a single instruction |
| 388 | // The argument should be encoded according to generate_rtlbi_param(). |
| 389 | // Follows the same ASID matching behavior as flush_mmu_tlb_entries() |
| 390 | static inline void |
| 391 | flush_mmu_tlb_range_async(uint64_t val, bool last_level_only, bool strong __unused) |
| 392 | { |
| 393 | if (last_level_only) { |
| 394 | #if HAS_FEAT_XS |
| 395 | if (__probable(!strong)) { |
| 396 | asm volatile ("tlbi rvale1isnxs, %0" : : "r" (val)); |
| 397 | } else |
| 398 | #endif /* HAS_FEAT_XS */ |
| 399 | { |
| 400 | asm volatile ("tlbi rvale1is, %0" : : "r" (val)); |
| 401 | } |
| 402 | } else { |
| 403 | #if HAS_FEAT_XS |
| 404 | if (__probable(!strong)) { |
| 405 | asm volatile ("tlbi rvae1isnxs, %0" : : "r" (val)); |
| 406 | } else |
| 407 | #endif /* HAS_FEAT_XS */ |
| 408 | { |
| 409 | asm volatile ("tlbi rvae1is, %0" : : "r" (val)); |
| 410 | } |
| 411 | } |
| 412 | } |
| 413 | |
| 414 | static inline void |
| 415 | flush_mmu_tlb_range(uint64_t val, bool last_level_only, bool strong) |
| 416 | { |
| 417 | flush_mmu_tlb_range_async(val, last_level_only, strong); |
| 418 | arm64_sync_tlb(strong); |
| 419 | } |
| 420 | |
| 421 | // flush_mmu_tlb_allrange: flush TLB entries that map a VA range using a single instruction |
| 422 | // The argument should be encoded according to generate_rtlbi_param(). |
| 423 | // Follows the same ASID matching behavior as flush_mmu_tlb_allentries() |
| 424 | static inline void |
| 425 | flush_mmu_tlb_allrange_async(uint64_t val, bool last_level_only, bool strong __unused) |
| 426 | { |
| 427 | if (last_level_only) { |
| 428 | #if HAS_FEAT_XS |
| 429 | if (__probable(!strong)) { |
| 430 | asm volatile ("tlbi rvaale1isnxs, %0" : : "r" (val)); |
| 431 | } else |
| 432 | #endif /* HAS_FEAT_XS */ |
| 433 | { |
| 434 | asm volatile ("tlbi rvaale1is, %0" : : "r" (val)); |
| 435 | } |
| 436 | } else { |
| 437 | #if HAS_FEAT_XS |
| 438 | if (__probable(!strong)) { |
| 439 | asm volatile ("tlbi rvaae1isnxs, %0" : : "r" (val)); |
| 440 | } else |
| 441 | #endif /* HAS_FEAT_XS */ |
| 442 | { |
| 443 | asm volatile ("tlbi rvaae1is, %0" : : "r" (val)); |
| 444 | } |
| 445 | } |
| 446 | } |
| 447 | |
| 448 | static inline void |
| 449 | flush_mmu_tlb_allrange(uint64_t val, bool last_level_only, bool strong) |
| 450 | { |
| 451 | flush_mmu_tlb_allrange_async(val, last_level_only, strong); |
| 452 | arm64_sync_tlb(strong); |
| 453 | } |
| 454 | |
| 455 | // flush_core_tlb_allrange: flush TLB entries that map a VA range using a single instruction, local core only |
| 456 | // The argument should be encoded according to generate_rtlbi_param(). |
| 457 | // Follows the same ASID matching behavior as flush_mmu_tlb_allentries() |
| 458 | static inline void |
| 459 | flush_core_tlb_allrange_async(uint64_t val) |
| 460 | { |
| 461 | #if HAS_FEAT_XS |
| 462 | asm volatile ("tlbi rvaae1nxs, %0" : : "r" (val)); |
| 463 | #else |
| 464 | asm volatile ("tlbi rvaae1, %0" : : "r" (val)); |
| 465 | #endif /* HAS_FEAT_XS */ |
| 466 | } |
| 467 | |
| 468 | static inline void |
| 469 | flush_core_tlb_allrange(uint64_t val) |
| 470 | { |
| 471 | flush_core_tlb_allrange_async(val); |
| 472 | sync_tlb_flush_local(); |
| 473 | } |
| 474 | |
| 475 | #endif // __ARM_RANGE_TLBI__ |
| 476 | |
| 477 | |