1 | /* |
2 | * Copyright (c) 2007-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <arm64/machine_machdep.h> |
30 | #include <arm64/proc_reg.h> |
31 | #include <arm/machine_cpu.h> |
32 | #include <arm/cpu_internal.h> |
33 | #include <arm/cpuid.h> |
34 | #include <arm/cpu_data.h> |
35 | #include <arm/cpu_data_internal.h> |
36 | #include <arm/caches_internal.h> |
37 | #include <arm/misc_protos.h> |
38 | #include <arm/machdep_call.h> |
39 | #include <arm/machine_routines.h> |
40 | #include <arm/rtclock.h> |
41 | #include <arm/cpuid_internal.h> |
42 | #include <arm/cpu_capabilities.h> |
43 | #include <console/serial_protos.h> |
44 | #include <kern/machine.h> |
45 | #include <kern/misc_protos.h> |
46 | #include <prng/random.h> |
47 | #include <kern/startup.h> |
48 | #include <kern/thread.h> |
49 | #include <kern/timer_queue.h> |
50 | #include <mach/machine.h> |
51 | #include <machine/atomic.h> |
52 | #include <machine/config.h> |
53 | #include <vm/pmap.h> |
54 | #include <vm/vm_page.h> |
55 | #include <vm/vm_shared_region.h> |
56 | #include <vm/vm_map.h> |
57 | #include <sys/codesign.h> |
58 | #include <sys/kdebug.h> |
59 | #include <kern/coalition.h> |
60 | #include <pexpert/device_tree.h> |
61 | |
62 | #include <IOKit/IOPlatformExpert.h> |
63 | #if HIBERNATION |
64 | #include <IOKit/IOHibernatePrivate.h> |
65 | #endif /* HIBERNATION */ |
66 | |
67 | #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) |
68 | #include <arm64/amcc_rorgn.h> |
69 | #endif |
70 | |
71 | |
72 | #if CONFIG_SPTM |
73 | #include <arm64/sptm/sptm.h> |
74 | #endif /* CONFIG_SPTM */ |
75 | |
76 | #include <libkern/section_keywords.h> |
77 | |
78 | /** |
79 | * On supported hardware, debuggable builds make the HID bits read-only |
80 | * without locking them. This lets people manually modify HID bits while |
81 | * debugging, since they can use a debugging tool to first reset the HID |
82 | * bits back to read/write. However it will still catch xnu changes that |
83 | * accidentally write to HID bits after they've been made read-only. |
84 | */ |
85 | SECURITY_READ_ONLY_LATE(bool) skip_spr_lockdown_glb = 0; |
86 | |
87 | /* |
88 | * On some SoCs, PIO lockdown is applied in assembly in early boot by |
89 | * secondary CPUs. |
90 | * Since the cluster_pio_ro_ctl value is dynamic, it is stored here by the |
91 | * primary CPU so that it doesn't have to be computed each time by the |
92 | * startup code. |
93 | */ |
94 | SECURITY_READ_ONLY_LATE(uint64_t) cluster_pio_ro_ctl_mask_glb = 0; |
95 | |
96 | #if CONFIG_CPU_COUNTERS |
97 | #include <kern/kpc.h> |
98 | #endif /* CONFIG_CPU_COUNTERS */ |
99 | |
100 | #define MPIDR_CPU_ID(mpidr_el1_val) (((mpidr_el1_val) & MPIDR_AFF0_MASK) >> MPIDR_AFF0_SHIFT) |
101 | #define MPIDR_CLUSTER_ID(mpidr_el1_val) (((mpidr_el1_val) & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT) |
102 | |
103 | #if HAS_CLUSTER |
104 | static uint8_t cluster_initialized = 0; |
105 | #endif |
106 | |
107 | MACHINE_TIMEOUT_DEV_WRITEABLE(LockTimeOut, "lock" , 6e6 /* 0.25s */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL); |
108 | machine_timeout_t LockTimeOutUsec; // computed in ml_init_lock_timeout |
109 | |
110 | MACHINE_TIMEOUT_DEV_WRITEABLE(TLockTimeOut, "ticket-lock" , 3e6 /* 0.125s */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL); |
111 | |
112 | MACHINE_TIMEOUT_DEV_WRITEABLE(MutexSpin, "mutex-spin" , 240 /* 10us */, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL); |
113 | |
114 | uint64_t low_MutexSpin; |
115 | int64_t high_MutexSpin; |
116 | |
117 | |
118 | |
119 | static uint64_t ml_wfe_hint_max_interval; |
120 | #define MAX_WFE_HINT_INTERVAL_US (500ULL) |
121 | |
122 | /* Must be less than cpu_idle_latency to ensure ml_delay_should_spin is true */ |
123 | TUNABLE(uint32_t, yield_delay_us, "yield_delay_us" , 0); |
124 | |
125 | extern vm_offset_t segLOWEST; |
126 | extern vm_offset_t segLOWESTTEXT; |
127 | extern vm_offset_t segLASTB; |
128 | extern unsigned long segSizeLAST; |
129 | |
130 | /* ARM64 specific bounds; used to test for presence in the kernelcache. */ |
131 | extern vm_offset_t vm_kernelcache_base; |
132 | extern vm_offset_t vm_kernelcache_top; |
133 | |
134 | /* Location of the physmap / physical aperture */ |
135 | extern uint64_t physmap_base; |
136 | |
137 | #if defined(CONFIG_SPTM) |
138 | extern const arm_physrange_t *arm_vm_kernelcache_ranges; |
139 | extern int arm_vm_kernelcache_numranges; |
140 | #else /* defined(CONFIG_SPTM) */ |
141 | extern vm_offset_t arm_vm_kernelcache_phys_start; |
142 | extern vm_offset_t arm_vm_kernelcache_phys_end; |
143 | #endif /* defined(CONFIG_SPTM) */ |
144 | |
145 | #if defined(HAS_IPI) |
146 | unsigned int gFastIPI = 1; |
147 | #define kDeferredIPITimerDefault (64 * NSEC_PER_USEC) /* in nanoseconds */ |
148 | static TUNABLE_WRITEABLE(uint64_t, deferred_ipi_timer_ns, "fastipitimeout" , |
149 | kDeferredIPITimerDefault); |
150 | #endif /* defined(HAS_IPI) */ |
151 | |
152 | thread_t Idle_context(void); |
153 | |
154 | SECURITY_READ_ONLY_LATE(bool) cpu_config_correct = true; |
155 | |
156 | SECURITY_READ_ONLY_LATE(static ml_topology_cpu_t) topology_cpu_array[MAX_CPUS]; |
157 | SECURITY_READ_ONLY_LATE(static ml_topology_cluster_t) topology_cluster_array[MAX_CPU_CLUSTERS]; |
158 | SECURITY_READ_ONLY_LATE(static ml_topology_info_t) topology_info = { |
159 | .version = CPU_TOPOLOGY_VERSION, |
160 | .cpus = topology_cpu_array, |
161 | .clusters = topology_cluster_array, |
162 | }; |
163 | |
164 | _Atomic unsigned int cluster_type_num_active_cpus[MAX_CPU_TYPES]; |
165 | |
166 | /** |
167 | * Represents the offset of each cluster within a hypothetical array of MAX_CPUS |
168 | * entries of an arbitrary data type. This is intended for use by specialized consumers |
169 | * that must quickly access per-CPU data using only the physical CPU ID (MPIDR_EL1), |
170 | * as follows: |
171 | * hypothetical_array[cluster_offsets[AFF1] + AFF0] |
172 | * Most consumers should instead use general-purpose facilities such as PERCPU or |
173 | * ml_get_cpu_number(). |
174 | */ |
175 | SECURITY_READ_ONLY_LATE(int64_t) cluster_offsets[MAX_CPU_CLUSTER_PHY_ID + 1]; |
176 | |
177 | SECURITY_READ_ONLY_LATE(static uint32_t) arm64_eventi = UINT32_MAX; |
178 | |
179 | extern uint32_t lockdown_done; |
180 | |
181 | /** |
182 | * Represents regions of virtual address space that should be reserved |
183 | * (pre-mapped) in each user address space. |
184 | */ |
185 | static const struct vm_reserved_region vm_reserved_regions[] = { |
186 | { |
187 | .vmrr_name = "GPU Carveout" , |
188 | .vmrr_addr = MACH_VM_MIN_GPU_CARVEOUT_ADDRESS, |
189 | .vmrr_size = (vm_map_size_t)(MACH_VM_MAX_GPU_CARVEOUT_ADDRESS - MACH_VM_MIN_GPU_CARVEOUT_ADDRESS) |
190 | }, |
191 | /* |
192 | * Reserve the virtual memory space representing the commpage nesting region |
193 | * to prevent user processes from allocating memory within it. The actual |
194 | * page table entries for the commpage are inserted by vm_commpage_enter(). |
195 | * This vm_map_enter() just prevents userspace from allocating/deallocating |
196 | * anything within the entire commpage nested region. |
197 | */ |
198 | { |
199 | .vmrr_name = "commpage nesting" , |
200 | .vmrr_addr = _COMM_PAGE64_NESTING_START, |
201 | .vmrr_size = _COMM_PAGE64_NESTING_SIZE |
202 | } |
203 | }; |
204 | |
205 | uint32_t get_arm_cpu_version(void); |
206 | |
207 | #if defined(HAS_IPI) |
208 | static inline void |
209 | ml_cpu_signal_type(unsigned int cpu_mpidr, uint32_t type) |
210 | { |
211 | #if HAS_CLUSTER |
212 | uint64_t local_mpidr; |
213 | /* NOTE: this logic expects that we are called in a non-preemptible |
214 | * context, or at least one in which the calling thread is bound |
215 | * to a single CPU. Otherwise we may migrate between choosing which |
216 | * IPI mechanism to use and issuing the IPI. */ |
217 | MRS(local_mpidr, "MPIDR_EL1" ); |
218 | if (MPIDR_CLUSTER_ID(local_mpidr) == MPIDR_CLUSTER_ID(cpu_mpidr)) { |
219 | uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr); |
220 | MSR("S3_5_C15_C0_0" , x); |
221 | } else { |
222 | #define IPI_RR_TARGET_CLUSTER_SHIFT 16 |
223 | uint64_t x = type | (MPIDR_CLUSTER_ID(cpu_mpidr) << IPI_RR_TARGET_CLUSTER_SHIFT) | MPIDR_CPU_ID(cpu_mpidr); |
224 | MSR("S3_5_C15_C0_1" , x); |
225 | } |
226 | #else |
227 | uint64_t x = type | MPIDR_CPU_ID(cpu_mpidr); |
228 | MSR("S3_5_C15_C0_1" , x); |
229 | #endif |
230 | /* The recommended local/global IPI sequence is: |
231 | * DSB <sys> (This ensures visibility of e.g. older stores to the |
232 | * pending CPU signals bit vector in DRAM prior to IPI reception, |
233 | * and is present in cpu_signal_internal()) |
234 | * MSR S3_5_C15_C0_1, Xt |
235 | * ISB |
236 | */ |
237 | __builtin_arm_isb(ISB_SY); |
238 | } |
239 | #endif |
240 | |
241 | #if !defined(HAS_IPI) |
242 | __dead2 |
243 | #endif |
244 | void |
245 | ml_cpu_signal(unsigned int cpu_mpidr __unused) |
246 | { |
247 | #if defined(HAS_IPI) |
248 | ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_IMMEDIATE); |
249 | #else |
250 | panic("Platform does not support ACC Fast IPI" ); |
251 | #endif |
252 | } |
253 | |
254 | #if !defined(HAS_IPI) |
255 | __dead2 |
256 | #endif |
257 | void |
258 | ml_cpu_signal_deferred_adjust_timer(uint64_t nanosecs) |
259 | { |
260 | #if defined(HAS_IPI) |
261 | /* adjust IPI_CR timer countdown value for deferred IPI |
262 | * accepts input in nanosecs, convert to absolutetime (REFCLK ticks), |
263 | * clamp maximum REFCLK ticks to 0xFFFF (16 bit field) |
264 | * |
265 | * global register, should only require a single write to update all |
266 | * CPU cores: from Skye ACC user spec section 5.7.3.3 |
267 | * |
268 | * IPICR is a global register but there are two copies in ACC: one at pBLK and one at eBLK. |
269 | * IPICR write SPR token also traverses both pCPM and eCPM rings and updates both copies. |
270 | */ |
271 | uint64_t abstime; |
272 | |
273 | nanoseconds_to_absolutetime(nanosecs, &abstime); |
274 | |
275 | abstime = MIN(abstime, 0xFFFF); |
276 | |
277 | /* update deferred_ipi_timer_ns with the new clamped value */ |
278 | absolutetime_to_nanoseconds(abstime, &deferred_ipi_timer_ns); |
279 | |
280 | MSR("S3_5_C15_C3_1" , abstime); |
281 | #else |
282 | (void)nanosecs; |
283 | panic("Platform does not support ACC Fast IPI" ); |
284 | #endif |
285 | } |
286 | |
287 | uint64_t |
288 | ml_cpu_signal_deferred_get_timer() |
289 | { |
290 | #if defined(HAS_IPI) |
291 | return deferred_ipi_timer_ns; |
292 | #else |
293 | return 0; |
294 | #endif |
295 | } |
296 | |
297 | #if !defined(HAS_IPI) |
298 | __dead2 |
299 | #endif |
300 | void |
301 | ml_cpu_signal_deferred(unsigned int cpu_mpidr __unused) |
302 | { |
303 | #if defined(HAS_IPI) |
304 | ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_DEFERRED); |
305 | #else |
306 | panic("Platform does not support ACC Fast IPI deferral" ); |
307 | #endif |
308 | } |
309 | |
310 | #if !defined(HAS_IPI) |
311 | __dead2 |
312 | #endif |
313 | void |
314 | ml_cpu_signal_retract(unsigned int cpu_mpidr __unused) |
315 | { |
316 | #if defined(HAS_IPI) |
317 | ml_cpu_signal_type(cpu_mpidr, ARM64_REG_IPI_RR_TYPE_RETRACT); |
318 | #else |
319 | panic("Platform does not support ACC Fast IPI retraction" ); |
320 | #endif |
321 | } |
322 | |
323 | extern uint32_t idle_proximate_io_wfe_unmasked; |
324 | |
325 | #define CPUPM_IDLE_WFE 0x5310300 |
326 | static bool |
327 | wfe_process_recommendation(void) |
328 | { |
329 | bool ipending = false; |
330 | if (__probable(idle_proximate_io_wfe_unmasked == 1)) { |
331 | /* Check for an active perf. controller generated |
332 | * WFE recommendation for this cluster. |
333 | */ |
334 | cpu_data_t *cdp = getCpuDatap(); |
335 | uint32_t cid = cdp->cpu_cluster_id; |
336 | uint64_t wfe_ttd = 0; |
337 | uint64_t wfe_deadline = 0; |
338 | |
339 | if ((wfe_ttd = ml_cluster_wfe_timeout(wfe_cluster_id: cid)) != 0) { |
340 | wfe_deadline = mach_absolute_time() + wfe_ttd; |
341 | } |
342 | |
343 | if (wfe_deadline != 0) { |
344 | /* Poll issuing event-bounded WFEs until an interrupt |
345 | * arrives or the WFE recommendation expires |
346 | */ |
347 | #if DEVELOPMENT || DEBUG |
348 | uint64_t wc = cdp->wfe_count; |
349 | KDBG(CPUPM_IDLE_WFE | DBG_FUNC_START, ipending, wc, wfe_ttd, cdp->cpu_stat.irq_ex_cnt_wake); |
350 | #endif |
351 | /* Issue WFE until the recommendation expires, |
352 | * with IRQs unmasked. |
353 | */ |
354 | ipending = wfe_to_deadline_or_interrupt(cid, wfe_deadline, cdp, true, true); |
355 | #if DEVELOPMENT || DEBUG |
356 | KDBG(CPUPM_IDLE_WFE | DBG_FUNC_END, ipending, cdp->wfe_count - wc, wfe_deadline, cdp->cpu_stat.irq_ex_cnt_wake); |
357 | #endif |
358 | } |
359 | } |
360 | return ipending; |
361 | } |
362 | |
363 | void |
364 | machine_idle(void) |
365 | { |
366 | /* Interrupts are expected to be masked on entry or re-entry via |
367 | * Idle_load_context() |
368 | */ |
369 | assert((__builtin_arm_rsr("DAIF" ) & (DAIF_IRQF | DAIF_FIQF)) == (DAIF_IRQF | DAIF_FIQF)); |
370 | /* Check for, and act on, a WFE recommendation. |
371 | * Bypasses context spill/fill for a minor perf. increment. |
372 | * May unmask and restore IRQ+FIQ mask. |
373 | */ |
374 | if (wfe_process_recommendation() == false) { |
375 | /* If WFE recommendation absent, or WFE deadline |
376 | * arrived with no interrupt pending/processed, |
377 | * fall back to WFI. |
378 | */ |
379 | Idle_context(); |
380 | } |
381 | __builtin_arm_wsr("DAIFClr" , (DAIFSC_IRQF | DAIFSC_FIQF)); |
382 | } |
383 | |
384 | void |
385 | OSSynchronizeIO(void) |
386 | { |
387 | __builtin_arm_dsb(DSB_SY); |
388 | } |
389 | |
390 | uint64_t |
391 | get_aux_control(void) |
392 | { |
393 | uint64_t value; |
394 | |
395 | MRS(value, "ACTLR_EL1" ); |
396 | return value; |
397 | } |
398 | |
399 | uint64_t |
400 | get_mmu_control(void) |
401 | { |
402 | uint64_t value; |
403 | |
404 | MRS(value, "SCTLR_EL1" ); |
405 | return value; |
406 | } |
407 | |
408 | uint64_t |
409 | get_tcr(void) |
410 | { |
411 | uint64_t value; |
412 | |
413 | MRS(value, "TCR_EL1" ); |
414 | return value; |
415 | } |
416 | |
417 | boolean_t |
418 | ml_get_interrupts_enabled(void) |
419 | { |
420 | uint64_t value; |
421 | |
422 | MRS(value, "DAIF" ); |
423 | if (value & DAIF_IRQF) { |
424 | return FALSE; |
425 | } |
426 | return TRUE; |
427 | } |
428 | |
429 | pmap_paddr_t |
430 | get_mmu_ttb(void) |
431 | { |
432 | pmap_paddr_t value; |
433 | |
434 | MRS(value, "TTBR0_EL1" ); |
435 | return value; |
436 | } |
437 | |
438 | uint32_t |
439 | get_arm_cpu_version(void) |
440 | { |
441 | uint32_t value = machine_read_midr(); |
442 | |
443 | /* Compose the register values into 8 bits; variant[7:4], revision[3:0]. */ |
444 | return ((value & MIDR_EL1_REV_MASK) >> MIDR_EL1_REV_SHIFT) | ((value & MIDR_EL1_VAR_MASK) >> (MIDR_EL1_VAR_SHIFT - 4)); |
445 | } |
446 | |
447 | bool |
448 | ml_feature_supported(uint32_t feature_bit) |
449 | { |
450 | uint64_t aidr_el1_value = 0; |
451 | |
452 | MRS(aidr_el1_value, "AIDR_EL1" ); |
453 | |
454 | #ifdef APPLEAVALANCHE |
455 | #endif // APPLEAVALANCHE |
456 | |
457 | return aidr_el1_value & feature_bit; |
458 | } |
459 | |
460 | /* |
461 | * user_cont_hwclock_allowed() |
462 | * |
463 | * Indicates whether we allow EL0 to read the virtual timebase (CNTVCT_EL0) |
464 | * as a continuous time source (e.g. from mach_continuous_time) |
465 | */ |
466 | boolean_t |
467 | user_cont_hwclock_allowed(void) |
468 | { |
469 | #if HAS_CONTINUOUS_HWCLOCK |
470 | return TRUE; |
471 | #else |
472 | return FALSE; |
473 | #endif |
474 | } |
475 | |
476 | /* |
477 | * user_timebase_type() |
478 | * |
479 | * Indicates type of EL0 virtual timebase read (CNTVCT_EL0). |
480 | * |
481 | * USER_TIMEBASE_NONE: EL0 has no access to timebase register |
482 | * USER_TIMEBASE_SPEC: EL0 has access to speculative timebase reads (CNTVCT_EL0) |
483 | * USER_TIMEBASE_NOSPEC: EL0 has access to non speculative timebase reads (CNTVCTSS_EL0) |
484 | * |
485 | */ |
486 | |
487 | uint8_t |
488 | user_timebase_type(void) |
489 | { |
490 | #if HAS_ACNTVCT |
491 | return USER_TIMEBASE_NOSPEC_APPLE; |
492 | #elif __ARM_ARCH_8_6__ |
493 | return USER_TIMEBASE_NOSPEC; |
494 | #else |
495 | return USER_TIMEBASE_SPEC; |
496 | #endif |
497 | } |
498 | |
499 | void |
500 | machine_startup(__unused boot_args * args) |
501 | { |
502 | #if defined(HAS_IPI) && (DEVELOPMENT || DEBUG) |
503 | if (!PE_parse_boot_argn("fastipi" , &gFastIPI, sizeof(gFastIPI))) { |
504 | gFastIPI = 1; |
505 | } |
506 | #endif /* defined(HAS_IPI) && (DEVELOPMENT || DEBUG)*/ |
507 | |
508 | |
509 | machine_conf(); |
510 | |
511 | |
512 | /* |
513 | * Kick off the kernel bootstrap. |
514 | */ |
515 | kernel_bootstrap(); |
516 | /* NOTREACHED */ |
517 | } |
518 | |
519 | typedef void (*invalidate_fn_t)(void); |
520 | |
521 | static SECURITY_READ_ONLY_LATE(invalidate_fn_t) invalidate_hmac_function = NULL; |
522 | |
523 | void set_invalidate_hmac_function(invalidate_fn_t fn); |
524 | |
525 | void |
526 | set_invalidate_hmac_function(invalidate_fn_t fn) |
527 | { |
528 | if (NULL != invalidate_hmac_function) { |
529 | panic("Invalidate HMAC function already set" ); |
530 | } |
531 | |
532 | invalidate_hmac_function = fn; |
533 | } |
534 | |
535 | void |
536 | machine_lockdown(void) |
537 | { |
538 | |
539 | #if CONFIG_SPTM |
540 | /** |
541 | * On devices that make use of the SPTM, the SPTM is responsible for |
542 | * managing system register locks. Due to this, we skip the call to |
543 | * spr_lockdown() below. |
544 | */ |
545 | #else |
546 | #endif |
547 | |
548 | arm_vm_prot_finalize(args: PE_state.bootArgs); |
549 | |
550 | #if CONFIG_KERNEL_INTEGRITY |
551 | #if KERNEL_INTEGRITY_WT |
552 | /* Watchtower |
553 | * |
554 | * Notify the monitor about the completion of early kernel bootstrap. |
555 | * From this point forward it will enforce the integrity of kernel text, |
556 | * rodata and page tables. |
557 | */ |
558 | |
559 | #ifdef MONITOR |
560 | monitor_call(MONITOR_LOCKDOWN, 0, 0, 0); |
561 | #endif |
562 | #endif /* KERNEL_INTEGRITY_WT */ |
563 | |
564 | #if CONFIG_SPTM |
565 | extern void pmap_prepare_commpages(void); |
566 | pmap_prepare_commpages(); |
567 | |
568 | /** |
569 | * sptm_lockdown_xnu() disables preemption like all SPTM calls, but may take |
570 | * a fair amount of time as it involves retyping a large number of pages. |
571 | * This preemption latency is not really a concern since we're still fairly |
572 | * early in the boot process, so just explicitly disable preemption before |
573 | * invoking the SPTM and abandon preemption latency measurements before |
574 | * re-enabling it. |
575 | */ |
576 | disable_preemption(); |
577 | /* Signal the SPTM that XNU is ready for RO memory to actually become read-only */ |
578 | sptm_lockdown_xnu(); |
579 | #if SCHED_HYGIENE_DEBUG |
580 | abandon_preemption_disable_measurement(); |
581 | #endif /* SCHED_HYGIENE_DEBUG */ |
582 | enable_preemption(); |
583 | #else |
584 | #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) |
585 | /* KTRR |
586 | * |
587 | * Lock physical KTRR region. KTRR region is read-only. Memory outside |
588 | * the region is not executable at EL1. |
589 | */ |
590 | |
591 | rorgn_lockdown(); |
592 | #endif /* defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) */ |
593 | #endif /* CONFIG_SPTM */ |
594 | |
595 | #if XNU_MONITOR |
596 | pmap_lockdown_ppl(); |
597 | #endif |
598 | |
599 | #endif /* CONFIG_KERNEL_INTEGRITY */ |
600 | |
601 | |
602 | if (NULL != invalidate_hmac_function) { |
603 | invalidate_hmac_function(); |
604 | } |
605 | |
606 | lockdown_done = 1; |
607 | } |
608 | |
609 | |
610 | char * |
611 | machine_boot_info( |
612 | __unused char *buf, |
613 | __unused vm_size_t size) |
614 | { |
615 | return PE_boot_args(); |
616 | } |
617 | |
618 | void |
619 | slave_machine_init(__unused void *param) |
620 | { |
621 | cpu_machine_init(); /* Initialize the processor */ |
622 | clock_init(); /* Init the clock */ |
623 | } |
624 | |
625 | /* |
626 | * Routine: machine_processor_shutdown |
627 | * Function: |
628 | */ |
629 | thread_t |
630 | machine_processor_shutdown( |
631 | __unused thread_t thread, |
632 | void (*doshutdown)(processor_t), |
633 | processor_t processor) |
634 | { |
635 | return Shutdown_context(doshutdown, processor); |
636 | } |
637 | |
638 | /* |
639 | * Routine: ml_init_lock_timeout |
640 | * Function: |
641 | */ |
642 | static void __startup_func |
643 | ml_init_lock_timeout(void) |
644 | { |
645 | /* |
646 | * This function is called after STARTUP_SUB_TIMEOUTS |
647 | * initialization, so using the "legacy" boot-args here overrides |
648 | * the ml-timeout-... configuration. (Given that these boot-args |
649 | * here are usually explicitly specified, this makes sense by |
650 | * overriding ml-timeout-..., which may come from the device tree. |
651 | */ |
652 | |
653 | uint64_t lto_timeout_ns; |
654 | uint64_t lto_abstime; |
655 | uint32_t slto; |
656 | |
657 | if (PE_parse_boot_argn(arg_string: "slto_us" , arg_ptr: &slto, max_arg: sizeof(slto))) { |
658 | lto_timeout_ns = slto * NSEC_PER_USEC; |
659 | nanoseconds_to_absolutetime(nanoseconds: lto_timeout_ns, result: <o_abstime); |
660 | os_atomic_store(&LockTimeOut, lto_abstime, relaxed); |
661 | } else { |
662 | lto_abstime = os_atomic_load(&LockTimeOut, relaxed); |
663 | absolutetime_to_nanoseconds(abstime: lto_abstime, result: <o_timeout_ns); |
664 | } |
665 | |
666 | os_atomic_store(&LockTimeOutUsec, lto_timeout_ns / NSEC_PER_USEC, relaxed); |
667 | |
668 | if (PE_parse_boot_argn(arg_string: "tlto_us" , arg_ptr: &slto, max_arg: sizeof(slto))) { |
669 | nanoseconds_to_absolutetime(nanoseconds: slto * NSEC_PER_USEC, result: <o_abstime); |
670 | os_atomic_store(&TLockTimeOut, lto_abstime, relaxed); |
671 | } else if (lto_abstime != 0) { |
672 | os_atomic_store(&TLockTimeOut, lto_abstime >> 1, relaxed); |
673 | } // else take default from MACHINE_TIMEOUT. |
674 | |
675 | uint64_t mtxspin; |
676 | uint64_t mtx_abstime; |
677 | if (PE_parse_boot_argn(arg_string: "mtxspin" , arg_ptr: &mtxspin, max_arg: sizeof(mtxspin))) { |
678 | if (mtxspin > USEC_PER_SEC >> 4) { |
679 | mtxspin = USEC_PER_SEC >> 4; |
680 | } |
681 | nanoseconds_to_absolutetime(nanoseconds: mtxspin * NSEC_PER_USEC, result: &mtx_abstime); |
682 | os_atomic_store(&MutexSpin, mtx_abstime, relaxed); |
683 | } else { |
684 | mtx_abstime = os_atomic_load(&MutexSpin, relaxed); |
685 | } |
686 | |
687 | low_MutexSpin = os_atomic_load(&MutexSpin, relaxed); |
688 | /* |
689 | * high_MutexSpin should be initialized as low_MutexSpin * real_ncpus, but |
690 | * real_ncpus is not set at this time |
691 | * |
692 | * NOTE: active spinning is disabled in arm. It can be activated |
693 | * by setting high_MutexSpin through the sysctl. |
694 | */ |
695 | high_MutexSpin = low_MutexSpin; |
696 | |
697 | uint64_t maxwfeus = MAX_WFE_HINT_INTERVAL_US; |
698 | PE_parse_boot_argn(arg_string: "max_wfe_us" , arg_ptr: &maxwfeus, max_arg: sizeof(maxwfeus)); |
699 | nanoseconds_to_absolutetime(nanoseconds: maxwfeus * NSEC_PER_USEC, result: &ml_wfe_hint_max_interval); |
700 | } |
701 | STARTUP(TIMEOUTS, STARTUP_RANK_MIDDLE, ml_init_lock_timeout); |
702 | |
703 | |
704 | /* |
705 | * This is called when all of the ml_processor_info_t structures have been |
706 | * initialized and all the processors have been started through processor_start(). |
707 | * |
708 | * Required by the scheduler subsystem. |
709 | */ |
710 | void |
711 | ml_cpu_init_completed(void) |
712 | { |
713 | if (SCHED(cpu_init_completed) != NULL) { |
714 | SCHED(cpu_init_completed)(); |
715 | } |
716 | } |
717 | |
718 | /* |
719 | * These are called from the machine-independent routine cpu_up() |
720 | * to perform machine-dependent info updates. |
721 | * |
722 | * The update to CPU counts needs to be separate from other actions |
723 | * because we don't update the counts when CLPC causes temporary |
724 | * cluster powerdown events, as these must be transparent to the user. |
725 | */ |
726 | void |
727 | ml_cpu_up(void) |
728 | { |
729 | } |
730 | |
731 | void |
732 | ml_cpu_up_update_counts(int cpu_id) |
733 | { |
734 | ml_topology_cpu_t *cpu = &ml_get_topology_info()->cpus[cpu_id]; |
735 | |
736 | os_atomic_inc(&cluster_type_num_active_cpus[cpu->cluster_type], relaxed); |
737 | |
738 | os_atomic_inc(&machine_info.physical_cpu, relaxed); |
739 | os_atomic_inc(&machine_info.logical_cpu, relaxed); |
740 | } |
741 | |
742 | /* |
743 | * These are called from the machine-independent routine cpu_down() |
744 | * to perform machine-dependent info updates. |
745 | * |
746 | * The update to CPU counts needs to be separate from other actions |
747 | * because we don't update the counts when CLPC causes temporary |
748 | * cluster powerdown events, as these must be transparent to the user. |
749 | */ |
750 | void |
751 | ml_cpu_down(void) |
752 | { |
753 | /* |
754 | * If we want to deal with outstanding IPIs, we need to |
755 | * do relatively early in the processor_doshutdown path, |
756 | * as we pend decrementer interrupts using the IPI |
757 | * mechanism if we cannot immediately service them (if |
758 | * IRQ is masked). Do so now. |
759 | * |
760 | * We aren't on the interrupt stack here; would it make |
761 | * more sense to disable signaling and then enable |
762 | * interrupts? It might be a bit cleaner. |
763 | */ |
764 | cpu_data_t *cpu_data_ptr = getCpuDatap(); |
765 | cpu_data_ptr->cpu_running = FALSE; |
766 | |
767 | if (cpu_data_ptr != &BootCpuData) { |
768 | /* |
769 | * Move all of this cpu's timers to the master/boot cpu, |
770 | * and poke it in case there's a sooner deadline for it to schedule. |
771 | */ |
772 | timer_queue_shutdown(queue: &cpu_data_ptr->rtclock_timer.queue); |
773 | kern_return_t rv = cpu_xcall(BootCpuData.cpu_number, &timer_queue_expire_local, &ml_cpu_down); |
774 | if (rv != KERN_SUCCESS) { |
775 | panic("ml_cpu_down: IPI failure %d" , rv); |
776 | } |
777 | } |
778 | |
779 | cpu_signal_handler_internal(TRUE); |
780 | } |
781 | void |
782 | ml_cpu_down_update_counts(int cpu_id) |
783 | { |
784 | ml_topology_cpu_t *cpu = &ml_get_topology_info()->cpus[cpu_id]; |
785 | |
786 | os_atomic_dec(&cluster_type_num_active_cpus[cpu->cluster_type], relaxed); |
787 | |
788 | os_atomic_dec(&machine_info.physical_cpu, relaxed); |
789 | os_atomic_dec(&machine_info.logical_cpu, relaxed); |
790 | } |
791 | |
792 | |
793 | unsigned int |
794 | ml_get_machine_mem(void) |
795 | { |
796 | return machine_info.memory_size; |
797 | } |
798 | |
799 | __attribute__((noreturn)) |
800 | void |
801 | halt_all_cpus(boolean_t reboot) |
802 | { |
803 | if (reboot) { |
804 | printf(format: "MACH Reboot\n" ); |
805 | PEHaltRestart(type: kPERestartCPU); |
806 | } else { |
807 | printf(format: "CPU halted\n" ); |
808 | PEHaltRestart(type: kPEHaltCPU); |
809 | } |
810 | while (1) { |
811 | ; |
812 | } |
813 | } |
814 | |
815 | __attribute__((noreturn)) |
816 | void |
817 | halt_cpu(void) |
818 | { |
819 | halt_all_cpus(FALSE); |
820 | } |
821 | |
822 | /* |
823 | * Routine: machine_signal_idle |
824 | * Function: |
825 | */ |
826 | void |
827 | machine_signal_idle( |
828 | processor_t processor) |
829 | { |
830 | cpu_signal(target: processor_to_cpu_datap(processor), SIGPnop, p0: (void *)NULL, p1: (void *)NULL); |
831 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0); |
832 | } |
833 | |
834 | void |
835 | machine_signal_idle_deferred( |
836 | processor_t processor) |
837 | { |
838 | cpu_signal_deferred(target: processor_to_cpu_datap(processor)); |
839 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_DEFERRED_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0); |
840 | } |
841 | |
842 | void |
843 | machine_signal_idle_cancel( |
844 | processor_t processor) |
845 | { |
846 | cpu_signal_cancel(target: processor_to_cpu_datap(processor)); |
847 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_CANCEL_AST), processor->cpu_id, 0 /* nop */, 0, 0, 0); |
848 | } |
849 | |
850 | /* |
851 | * Routine: ml_install_interrupt_handler |
852 | * Function: Initialize Interrupt Handler |
853 | */ |
854 | void |
855 | ml_install_interrupt_handler( |
856 | void *nub, |
857 | int source, |
858 | void *target, |
859 | IOInterruptHandler handler, |
860 | void *refCon) |
861 | { |
862 | cpu_data_t *cpu_data_ptr; |
863 | boolean_t current_state; |
864 | |
865 | current_state = ml_set_interrupts_enabled(FALSE); |
866 | cpu_data_ptr = getCpuDatap(); |
867 | |
868 | cpu_data_ptr->interrupt_nub = nub; |
869 | cpu_data_ptr->interrupt_source = source; |
870 | cpu_data_ptr->interrupt_target = target; |
871 | cpu_data_ptr->interrupt_handler = handler; |
872 | cpu_data_ptr->interrupt_refCon = refCon; |
873 | |
874 | (void) ml_set_interrupts_enabled(enable: current_state); |
875 | } |
876 | |
877 | /* |
878 | * Routine: ml_init_interrupt |
879 | * Function: Initialize Interrupts |
880 | */ |
881 | void |
882 | ml_init_interrupt(void) |
883 | { |
884 | #if defined(HAS_IPI) |
885 | /* |
886 | * ml_init_interrupt will get called once for each CPU, but this is redundant |
887 | * because there is only one global copy of the register for skye. do it only |
888 | * on the bootstrap cpu |
889 | */ |
890 | if (getCpuDatap()->cluster_master) { |
891 | ml_cpu_signal_deferred_adjust_timer(deferred_ipi_timer_ns); |
892 | } |
893 | #endif |
894 | } |
895 | |
896 | /* |
897 | * Routine: ml_init_timebase |
898 | * Function: register and setup Timebase, Decremeter services |
899 | */ |
900 | void |
901 | ml_init_timebase( |
902 | void *args, |
903 | tbd_ops_t tbd_funcs, |
904 | vm_offset_t int_address, |
905 | vm_offset_t int_value __unused) |
906 | { |
907 | cpu_data_t *cpu_data_ptr; |
908 | |
909 | cpu_data_ptr = (cpu_data_t *)args; |
910 | |
911 | if ((cpu_data_ptr == &BootCpuData) |
912 | && (rtclock_timebase_func.tbd_fiq_handler == (void *)NULL)) { |
913 | rtclock_timebase_func = *tbd_funcs; |
914 | rtclock_timebase_addr = int_address; |
915 | } |
916 | } |
917 | |
918 | #define ML_READPROP_MANDATORY UINT64_MAX |
919 | |
920 | static uint64_t |
921 | ml_readprop(const DTEntry entry, const char *propertyName, uint64_t default_value) |
922 | { |
923 | void const *prop; |
924 | unsigned int propSize; |
925 | |
926 | if (SecureDTGetProperty(entry, propertyName, propertyValue: &prop, propertySize: &propSize) == kSuccess) { |
927 | if (propSize == sizeof(uint8_t)) { |
928 | return *((uint8_t const *)prop); |
929 | } else if (propSize == sizeof(uint16_t)) { |
930 | return *((uint16_t const *)prop); |
931 | } else if (propSize == sizeof(uint32_t)) { |
932 | return *((uint32_t const *)prop); |
933 | } else if (propSize == sizeof(uint64_t)) { |
934 | return *((uint64_t const *)prop); |
935 | } else { |
936 | panic("CPU property '%s' has bad size %u" , propertyName, propSize); |
937 | } |
938 | } else { |
939 | if (default_value == ML_READPROP_MANDATORY) { |
940 | panic("Missing mandatory property '%s'" , propertyName); |
941 | } |
942 | return default_value; |
943 | } |
944 | } |
945 | |
946 | static boolean_t |
947 | ml_read_reg_range(const DTEntry entry, const char *propertyName, uint64_t *pa_ptr, uint64_t *len_ptr) |
948 | { |
949 | uint64_t const *prop; |
950 | unsigned int propSize; |
951 | |
952 | if (SecureDTGetProperty(entry, propertyName, propertyValue: (void const **)&prop, propertySize: &propSize) != kSuccess) { |
953 | return FALSE; |
954 | } |
955 | |
956 | if (propSize != sizeof(uint64_t) * 2) { |
957 | panic("Wrong property size for %s" , propertyName); |
958 | } |
959 | |
960 | *pa_ptr = prop[0]; |
961 | *len_ptr = prop[1]; |
962 | return TRUE; |
963 | } |
964 | |
965 | static boolean_t |
966 | ml_is_boot_cpu(const DTEntry entry) |
967 | { |
968 | void const *prop; |
969 | unsigned int propSize; |
970 | |
971 | if (SecureDTGetProperty(entry, propertyName: "state" , propertyValue: &prop, propertySize: &propSize) != kSuccess) { |
972 | panic("unable to retrieve state for cpu" ); |
973 | } |
974 | |
975 | if (strncmp(s1: (char const *)prop, s2: "running" , n: propSize) == 0) { |
976 | return TRUE; |
977 | } else { |
978 | return FALSE; |
979 | } |
980 | } |
981 | |
982 | static void |
983 | ml_read_chip_revision(unsigned int *rev __unused) |
984 | { |
985 | // The CPU_VERSION_* macros are only defined on APPLE_ARM64_ARCH_FAMILY builds |
986 | #ifdef APPLE_ARM64_ARCH_FAMILY |
987 | DTEntry entryP; |
988 | |
989 | if ((SecureDTFindEntry(propName: "name" , propValue: "arm-io" , entryH: &entryP) == kSuccess)) { |
990 | *rev = (unsigned int)ml_readprop(entry: entryP, propertyName: "chip-revision" , CPU_VERSION_UNKNOWN); |
991 | } else { |
992 | *rev = CPU_VERSION_UNKNOWN; |
993 | } |
994 | #endif |
995 | } |
996 | |
997 | void |
998 | ml_parse_cpu_topology(void) |
999 | { |
1000 | DTEntry entry, child __unused; |
1001 | OpaqueDTEntryIterator iter; |
1002 | uint32_t cpu_boot_arg = MAX_CPUS; |
1003 | uint64_t cpumask_boot_arg = ULLONG_MAX; |
1004 | int err; |
1005 | |
1006 | int64_t cluster_phys_to_logical[MAX_CPU_CLUSTER_PHY_ID + 1]; |
1007 | int64_t cluster_max_cpu_phys_id[MAX_CPU_CLUSTER_PHY_ID + 1]; |
1008 | const boolean_t cpus_boot_arg_present = PE_parse_boot_argn(arg_string: "cpus" , arg_ptr: &cpu_boot_arg, max_arg: sizeof(cpu_boot_arg)); |
1009 | const boolean_t cpumask_boot_arg_present = PE_parse_boot_argn(arg_string: "cpumask" , arg_ptr: &cpumask_boot_arg, max_arg: sizeof(cpumask_boot_arg)); |
1010 | |
1011 | // The cpus=N and cpumask=N boot args cannot be used simultaneously. Flag this |
1012 | // so that we trigger a panic later in the boot process, once serial is enabled. |
1013 | if (cpus_boot_arg_present && cpumask_boot_arg_present) { |
1014 | cpu_config_correct = false; |
1015 | } |
1016 | |
1017 | err = SecureDTLookupEntry(NULL, pathName: "/cpus" , foundEntry: &entry); |
1018 | assert(err == kSuccess); |
1019 | |
1020 | err = SecureDTInitEntryIterator(startEntry: entry, iter: &iter); |
1021 | assert(err == kSuccess); |
1022 | |
1023 | for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) { |
1024 | cluster_offsets[i] = -1; |
1025 | cluster_phys_to_logical[i] = -1; |
1026 | cluster_max_cpu_phys_id[i] = 0; |
1027 | } |
1028 | |
1029 | while (kSuccess == SecureDTIterateEntries(iterator: &iter, nextEntry: &child)) { |
1030 | boolean_t is_boot_cpu = ml_is_boot_cpu(entry: child); |
1031 | boolean_t cpu_enabled = cpumask_boot_arg & 1; |
1032 | cpumask_boot_arg >>= 1; |
1033 | |
1034 | // Boot CPU disabled in cpumask. Flag this so that we trigger a panic |
1035 | // later in the boot process, once serial is enabled. |
1036 | if (is_boot_cpu && !cpu_enabled) { |
1037 | cpu_config_correct = false; |
1038 | } |
1039 | |
1040 | // Ignore this CPU if it has been disabled by the cpumask= boot-arg. |
1041 | if (!is_boot_cpu && !cpu_enabled) { |
1042 | continue; |
1043 | } |
1044 | |
1045 | // If the number of CPUs is constrained by the cpus= boot-arg, and the boot CPU hasn't |
1046 | // been added to the topology struct yet, and we only have one slot left, then skip |
1047 | // every other non-boot CPU in order to leave room for the boot CPU. |
1048 | // |
1049 | // e.g. if the boot-args say "cpus=3" and CPU4 is the boot CPU, then the cpus[] |
1050 | // array will list CPU0, CPU1, and CPU4. CPU2-CPU3 and CPU5-CPUn will be omitted. |
1051 | if (topology_info.num_cpus >= (cpu_boot_arg - 1) && topology_info.boot_cpu == NULL && !is_boot_cpu) { |
1052 | continue; |
1053 | } |
1054 | if (topology_info.num_cpus >= cpu_boot_arg) { |
1055 | break; |
1056 | } |
1057 | |
1058 | ml_topology_cpu_t *cpu = &topology_info.cpus[topology_info.num_cpus]; |
1059 | |
1060 | cpu->cpu_id = topology_info.num_cpus++; |
1061 | assert(cpu->cpu_id < MAX_CPUS); |
1062 | topology_info.max_cpu_id = MAX(topology_info.max_cpu_id, cpu->cpu_id); |
1063 | |
1064 | cpu->die_id = 0; |
1065 | topology_info.max_die_id = 0; |
1066 | |
1067 | cpu->phys_id = (uint32_t)ml_readprop(entry: child, propertyName: "reg" , ML_READPROP_MANDATORY); |
1068 | |
1069 | cpu->l2_access_penalty = (uint32_t)ml_readprop(entry: child, propertyName: "l2-access-penalty" , default_value: 0); |
1070 | cpu->l2_cache_size = (uint32_t)ml_readprop(entry: child, propertyName: "l2-cache-size" , default_value: 0); |
1071 | cpu->l2_cache_id = (uint32_t)ml_readprop(entry: child, propertyName: "l2-cache-id" , default_value: 0); |
1072 | cpu->l3_cache_size = (uint32_t)ml_readprop(entry: child, propertyName: "l3-cache-size" , default_value: 0); |
1073 | cpu->l3_cache_id = (uint32_t)ml_readprop(entry: child, propertyName: "l3-cache-id" , default_value: 0); |
1074 | |
1075 | ml_read_reg_range(entry: child, propertyName: "cpu-uttdbg-reg" , pa_ptr: &cpu->cpu_UTTDBG_pa, len_ptr: &cpu->cpu_UTTDBG_len); |
1076 | ml_read_reg_range(entry: child, propertyName: "cpu-impl-reg" , pa_ptr: &cpu->cpu_IMPL_pa, len_ptr: &cpu->cpu_IMPL_len); |
1077 | ml_read_reg_range(entry: child, propertyName: "coresight-reg" , pa_ptr: &cpu->coresight_pa, len_ptr: &cpu->coresight_len); |
1078 | cpu->cluster_type = CLUSTER_TYPE_SMP; |
1079 | |
1080 | int cluster_type = (int)ml_readprop(entry: child, propertyName: "cluster-type" , default_value: 0); |
1081 | if (cluster_type == 'E') { |
1082 | cpu->cluster_type = CLUSTER_TYPE_E; |
1083 | } else if (cluster_type == 'P') { |
1084 | cpu->cluster_type = CLUSTER_TYPE_P; |
1085 | } |
1086 | |
1087 | topology_info.cluster_type_num_cpus[cpu->cluster_type]++; |
1088 | |
1089 | /* |
1090 | * Since we want to keep a linear cluster ID space, we cannot just rely |
1091 | * on the value provided by EDT. Instead, use the MPIDR value to see if we have |
1092 | * seen this exact cluster before. If so, then reuse that cluster ID for this CPU. |
1093 | */ |
1094 | #if HAS_CLUSTER |
1095 | uint32_t phys_cluster_id = MPIDR_CLUSTER_ID(cpu->phys_id); |
1096 | #else |
1097 | uint32_t phys_cluster_id = (cpu->cluster_type == CLUSTER_TYPE_P); |
1098 | #endif |
1099 | assert(phys_cluster_id <= MAX_CPU_CLUSTER_PHY_ID); |
1100 | cpu->cluster_id = ((cluster_phys_to_logical[phys_cluster_id] == -1) ? |
1101 | topology_info.num_clusters : cluster_phys_to_logical[phys_cluster_id]); |
1102 | |
1103 | assert(cpu->cluster_id < MAX_CPU_CLUSTERS); |
1104 | |
1105 | ml_topology_cluster_t *cluster = &topology_info.clusters[cpu->cluster_id]; |
1106 | if (cluster->num_cpus == 0) { |
1107 | assert(topology_info.num_clusters < MAX_CPU_CLUSTERS); |
1108 | |
1109 | topology_info.num_clusters++; |
1110 | topology_info.max_cluster_id = MAX(topology_info.max_cluster_id, cpu->cluster_id); |
1111 | topology_info.cluster_types |= (1 << cpu->cluster_type); |
1112 | |
1113 | cluster->cluster_id = cpu->cluster_id; |
1114 | cluster->cluster_type = cpu->cluster_type; |
1115 | cluster->first_cpu_id = cpu->cpu_id; |
1116 | assert(cluster_phys_to_logical[phys_cluster_id] == -1); |
1117 | cluster_phys_to_logical[phys_cluster_id] = cpu->cluster_id; |
1118 | |
1119 | topology_info.cluster_type_num_clusters[cluster->cluster_type]++; |
1120 | |
1121 | // Since we don't have a per-cluster EDT node, this is repeated in each CPU node. |
1122 | // If we wind up with a bunch of these, we might want to create separate per-cluster |
1123 | // EDT nodes and have the CPU nodes reference them through a phandle. |
1124 | ml_read_reg_range(entry: child, propertyName: "acc-impl-reg" , pa_ptr: &cluster->acc_IMPL_pa, len_ptr: &cluster->acc_IMPL_len); |
1125 | ml_read_reg_range(entry: child, propertyName: "cpm-impl-reg" , pa_ptr: &cluster->cpm_IMPL_pa, len_ptr: &cluster->cpm_IMPL_len); |
1126 | } |
1127 | |
1128 | #if HAS_CLUSTER |
1129 | if (MPIDR_CPU_ID(cpu->phys_id) > cluster_max_cpu_phys_id[phys_cluster_id]) { |
1130 | cluster_max_cpu_phys_id[phys_cluster_id] = MPIDR_CPU_ID(cpu->phys_id); |
1131 | } |
1132 | #endif |
1133 | |
1134 | cpu->die_cluster_id = (int)ml_readprop(entry: child, propertyName: "die-cluster-id" , MPIDR_CLUSTER_ID(cpu->phys_id)); |
1135 | cpu->cluster_core_id = (int)ml_readprop(entry: child, propertyName: "cluster-core-id" , MPIDR_CPU_ID(cpu->phys_id)); |
1136 | |
1137 | cluster->num_cpus++; |
1138 | cluster->cpu_mask |= 1ULL << cpu->cpu_id; |
1139 | |
1140 | if (is_boot_cpu) { |
1141 | assert(topology_info.boot_cpu == NULL); |
1142 | topology_info.boot_cpu = cpu; |
1143 | topology_info.boot_cluster = cluster; |
1144 | } |
1145 | |
1146 | #if CONFIG_SPTM |
1147 | sptm_register_cpu(cpu->phys_id); |
1148 | #endif |
1149 | } |
1150 | |
1151 | #if HAS_CLUSTER |
1152 | /* |
1153 | * Build the cluster offset array, ensuring that the region reserved |
1154 | * for each physical cluster contains enough entries to be indexed |
1155 | * by the maximum physical CPU ID (AFF0) within the cluster. |
1156 | */ |
1157 | unsigned int cur_cluster_offset = 0; |
1158 | for (int i = 0; i <= MAX_CPU_CLUSTER_PHY_ID; i++) { |
1159 | if (cluster_phys_to_logical[i] != -1) { |
1160 | cluster_offsets[i] = cur_cluster_offset; |
1161 | cur_cluster_offset += (cluster_max_cpu_phys_id[i] + 1); |
1162 | } |
1163 | } |
1164 | assert(cur_cluster_offset <= MAX_CPUS); |
1165 | #else |
1166 | /* |
1167 | * For H10, there are really 2 physical clusters, but they are not separated |
1168 | * into distinct ACCs. AFF1 therefore always reports 0, and AFF0 numbering |
1169 | * is linear across both clusters. For the purpose of MPIDR_EL1-based indexing, |
1170 | * treat H10 and earlier devices as though they contain a single cluster. |
1171 | */ |
1172 | cluster_offsets[0] = 0; |
1173 | #endif |
1174 | assert(topology_info.boot_cpu != NULL); |
1175 | ml_read_chip_revision(rev: &topology_info.chip_revision); |
1176 | |
1177 | /* |
1178 | * Set TPIDR_EL0 to indicate the correct cpu number & cluster id, |
1179 | * as we may not be booting from cpu 0. Userspace will consume |
1180 | * the current CPU number through this register. For non-boot |
1181 | * cores, this is done in start.s (start_cpu) using the per-cpu |
1182 | * data object. |
1183 | */ |
1184 | ml_topology_cpu_t *boot_cpu = topology_info.boot_cpu; |
1185 | uint64_t tpidr_el0 = ((boot_cpu->cpu_id << MACHDEP_TPIDR_CPUNUM_SHIFT) & MACHDEP_TPIDR_CPUNUM_MASK) | \ |
1186 | ((boot_cpu->cluster_id << MACHDEP_TPIDR_CLUSTERID_SHIFT) & MACHDEP_TPIDR_CLUSTERID_MASK); |
1187 | assert(((tpidr_el0 & MACHDEP_TPIDR_CPUNUM_MASK) >> MACHDEP_TPIDR_CPUNUM_SHIFT) == boot_cpu->cpu_id); |
1188 | assert(((tpidr_el0 & MACHDEP_TPIDR_CLUSTERID_MASK) >> MACHDEP_TPIDR_CLUSTERID_SHIFT) == boot_cpu->cluster_id); |
1189 | __builtin_arm_wsr64("TPIDR_EL0" , tpidr_el0); |
1190 | |
1191 | __builtin_arm_wsr64("TPIDRRO_EL0" , 0); |
1192 | } |
1193 | |
1194 | const ml_topology_info_t * |
1195 | ml_get_topology_info(void) |
1196 | { |
1197 | return &topology_info; |
1198 | } |
1199 | |
1200 | void |
1201 | ml_map_cpu_pio(void) |
1202 | { |
1203 | unsigned int i; |
1204 | |
1205 | for (i = 0; i < topology_info.num_cpus; i++) { |
1206 | ml_topology_cpu_t *cpu = &topology_info.cpus[i]; |
1207 | if (cpu->cpu_IMPL_pa) { |
1208 | cpu->cpu_IMPL_regs = (vm_offset_t)ml_io_map(phys_addr: cpu->cpu_IMPL_pa, size: cpu->cpu_IMPL_len); |
1209 | cpu->coresight_regs = (vm_offset_t)ml_io_map(phys_addr: cpu->coresight_pa, size: cpu->coresight_len); |
1210 | } |
1211 | if (cpu->cpu_UTTDBG_pa) { |
1212 | cpu->cpu_UTTDBG_regs = (vm_offset_t)ml_io_map(phys_addr: cpu->cpu_UTTDBG_pa, size: cpu->cpu_UTTDBG_len); |
1213 | } |
1214 | } |
1215 | |
1216 | for (i = 0; i < topology_info.num_clusters; i++) { |
1217 | ml_topology_cluster_t *cluster = &topology_info.clusters[i]; |
1218 | if (cluster->acc_IMPL_pa) { |
1219 | cluster->acc_IMPL_regs = (vm_offset_t)ml_io_map(phys_addr: cluster->acc_IMPL_pa, size: cluster->acc_IMPL_len); |
1220 | } |
1221 | if (cluster->cpm_IMPL_pa) { |
1222 | cluster->cpm_IMPL_regs = (vm_offset_t)ml_io_map(phys_addr: cluster->cpm_IMPL_pa, size: cluster->cpm_IMPL_len); |
1223 | } |
1224 | } |
1225 | } |
1226 | |
1227 | unsigned int |
1228 | ml_get_cpu_count(void) |
1229 | { |
1230 | return topology_info.num_cpus; |
1231 | } |
1232 | |
1233 | unsigned int |
1234 | ml_get_cluster_count(void) |
1235 | { |
1236 | return topology_info.num_clusters; |
1237 | } |
1238 | |
1239 | int |
1240 | ml_get_boot_cpu_number(void) |
1241 | { |
1242 | return topology_info.boot_cpu->cpu_id; |
1243 | } |
1244 | |
1245 | cluster_type_t |
1246 | ml_get_boot_cluster_type(void) |
1247 | { |
1248 | return topology_info.boot_cluster->cluster_type; |
1249 | } |
1250 | |
1251 | int |
1252 | ml_get_cpu_number(uint32_t phys_id) |
1253 | { |
1254 | phys_id &= MPIDR_AFF1_MASK | MPIDR_AFF0_MASK; |
1255 | |
1256 | for (unsigned i = 0; i < topology_info.num_cpus; i++) { |
1257 | if (topology_info.cpus[i].phys_id == phys_id) { |
1258 | return i; |
1259 | } |
1260 | } |
1261 | |
1262 | return -1; |
1263 | } |
1264 | |
1265 | int |
1266 | ml_get_cluster_number(uint32_t phys_id) |
1267 | { |
1268 | int cpu_id = ml_get_cpu_number(phys_id); |
1269 | if (cpu_id < 0) { |
1270 | return -1; |
1271 | } |
1272 | |
1273 | ml_topology_cpu_t *cpu = &topology_info.cpus[cpu_id]; |
1274 | |
1275 | return cpu->cluster_id; |
1276 | } |
1277 | |
1278 | unsigned int |
1279 | ml_get_cpu_number_local(void) |
1280 | { |
1281 | uint64_t mpidr_el1_value = 0; |
1282 | unsigned cpu_id; |
1283 | |
1284 | /* We identify the CPU based on the constant bits of MPIDR_EL1. */ |
1285 | MRS(mpidr_el1_value, "MPIDR_EL1" ); |
1286 | cpu_id = ml_get_cpu_number(phys_id: (uint32_t)mpidr_el1_value); |
1287 | |
1288 | assert(cpu_id <= (unsigned int)ml_get_max_cpu_number()); |
1289 | |
1290 | return cpu_id; |
1291 | } |
1292 | |
1293 | int |
1294 | ml_get_cluster_number_local() |
1295 | { |
1296 | uint64_t mpidr_el1_value = 0; |
1297 | unsigned cluster_id; |
1298 | |
1299 | /* We identify the cluster based on the constant bits of MPIDR_EL1. */ |
1300 | MRS(mpidr_el1_value, "MPIDR_EL1" ); |
1301 | cluster_id = ml_get_cluster_number(phys_id: (uint32_t)mpidr_el1_value); |
1302 | |
1303 | assert(cluster_id <= (unsigned int)ml_get_max_cluster_number()); |
1304 | |
1305 | return cluster_id; |
1306 | } |
1307 | |
1308 | int |
1309 | ml_get_max_cpu_number(void) |
1310 | { |
1311 | return topology_info.max_cpu_id; |
1312 | } |
1313 | |
1314 | int |
1315 | ml_get_max_cluster_number(void) |
1316 | { |
1317 | return topology_info.max_cluster_id; |
1318 | } |
1319 | |
1320 | unsigned int |
1321 | ml_get_first_cpu_id(unsigned int cluster_id) |
1322 | { |
1323 | return topology_info.clusters[cluster_id].first_cpu_id; |
1324 | } |
1325 | |
1326 | static_assert(MAX_CPUS <= 256, "MAX_CPUS must fit in _COMM_PAGE_CPU_TO_CLUSTER; Increase table size if needed" ); |
1327 | |
1328 | void |
1329 | ml_map_cpus_to_clusters(uint8_t *table) |
1330 | { |
1331 | for (uint16_t cpu_id = 0; cpu_id < topology_info.num_cpus; cpu_id++) { |
1332 | *(table + cpu_id) = (uint8_t)(topology_info.cpus[cpu_id].cluster_id); |
1333 | } |
1334 | } |
1335 | |
1336 | /* |
1337 | * Return the die id of a cluster. |
1338 | */ |
1339 | unsigned int |
1340 | ml_get_die_id(unsigned int cluster_id) |
1341 | { |
1342 | /* |
1343 | * The current implementation gets the die_id from the |
1344 | * first CPU of the cluster. |
1345 | * rdar://80917654 (Add the die_id field to the cluster topology info) |
1346 | */ |
1347 | unsigned int first_cpu = ml_get_first_cpu_id(cluster_id); |
1348 | return topology_info.cpus[first_cpu].die_id; |
1349 | } |
1350 | |
1351 | /* |
1352 | * Return the index of a cluster in its die. |
1353 | */ |
1354 | unsigned int |
1355 | ml_get_die_cluster_id(unsigned int cluster_id) |
1356 | { |
1357 | /* |
1358 | * The current implementation gets the die_id from the |
1359 | * first CPU of the cluster. |
1360 | * rdar://80917654 (Add the die_id field to the cluster topology info) |
1361 | */ |
1362 | unsigned int first_cpu = ml_get_first_cpu_id(cluster_id); |
1363 | return topology_info.cpus[first_cpu].die_cluster_id; |
1364 | } |
1365 | |
1366 | /* |
1367 | * Return the highest die id of the system. |
1368 | */ |
1369 | unsigned int |
1370 | ml_get_max_die_id(void) |
1371 | { |
1372 | return topology_info.max_die_id; |
1373 | } |
1374 | |
1375 | void |
1376 | ml_lockdown_init() |
1377 | { |
1378 | #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR) |
1379 | rorgn_stash_range(); |
1380 | #endif |
1381 | } |
1382 | |
1383 | kern_return_t |
1384 | ml_lockdown_handler_register(lockdown_handler_t f, void *this) |
1385 | { |
1386 | if (!f) { |
1387 | return KERN_FAILURE; |
1388 | } |
1389 | |
1390 | assert(lockdown_done); |
1391 | f(this); // XXX: f this whole function |
1392 | |
1393 | return KERN_SUCCESS; |
1394 | } |
1395 | |
1396 | static mcache_flush_function mcache_flush_func; |
1397 | static void* mcache_flush_service; |
1398 | kern_return_t |
1399 | ml_mcache_flush_callback_register(mcache_flush_function func, void *service) |
1400 | { |
1401 | mcache_flush_service = service; |
1402 | mcache_flush_func = func; |
1403 | |
1404 | return KERN_SUCCESS; |
1405 | } |
1406 | |
1407 | kern_return_t |
1408 | ml_mcache_flush(void) |
1409 | { |
1410 | if (!mcache_flush_func) { |
1411 | panic("Cannot flush M$ with no flush callback registered" ); |
1412 | |
1413 | return KERN_FAILURE; |
1414 | } else { |
1415 | return mcache_flush_func(mcache_flush_service); |
1416 | } |
1417 | } |
1418 | |
1419 | |
1420 | extern lck_mtx_t pset_create_lock; |
1421 | |
1422 | kern_return_t |
1423 | ml_processor_register(ml_processor_info_t *in_processor_info, |
1424 | processor_t *processor_out, ipi_handler_t *ipi_handler_out, |
1425 | perfmon_interrupt_handler_func *pmi_handler_out) |
1426 | { |
1427 | cpu_data_t *this_cpu_datap; |
1428 | processor_set_t pset; |
1429 | boolean_t is_boot_cpu; |
1430 | static unsigned int reg_cpu_count = 0; |
1431 | |
1432 | if (in_processor_info->log_id > (uint32_t)ml_get_max_cpu_number()) { |
1433 | return KERN_FAILURE; |
1434 | } |
1435 | |
1436 | if ((unsigned)OSIncrementAtomic((SInt32*)®_cpu_count) >= topology_info.num_cpus) { |
1437 | return KERN_FAILURE; |
1438 | } |
1439 | |
1440 | if (in_processor_info->log_id != (uint32_t)ml_get_boot_cpu_number()) { |
1441 | is_boot_cpu = FALSE; |
1442 | this_cpu_datap = cpu_data_alloc(FALSE); |
1443 | cpu_data_init(cpu_data_ptr: this_cpu_datap); |
1444 | } else { |
1445 | this_cpu_datap = &BootCpuData; |
1446 | is_boot_cpu = TRUE; |
1447 | } |
1448 | |
1449 | assert(in_processor_info->log_id <= (uint32_t)ml_get_max_cpu_number()); |
1450 | |
1451 | this_cpu_datap->cpu_id = in_processor_info->cpu_id; |
1452 | |
1453 | if (!is_boot_cpu) { |
1454 | this_cpu_datap->cpu_number = (unsigned short)(in_processor_info->log_id); |
1455 | |
1456 | if (cpu_data_register(cpu_data_ptr: this_cpu_datap) != KERN_SUCCESS) { |
1457 | goto processor_register_error; |
1458 | } |
1459 | assert((this_cpu_datap->cpu_number & MACHDEP_TPIDR_CPUNUM_MASK) == this_cpu_datap->cpu_number); |
1460 | } |
1461 | |
1462 | this_cpu_datap->cpu_idle_notify = in_processor_info->processor_idle; |
1463 | this_cpu_datap->cpu_cache_dispatch = (cache_dispatch_t)in_processor_info->platform_cache_dispatch; |
1464 | nanoseconds_to_absolutetime(nanoseconds: (uint64_t) in_processor_info->powergate_latency, result: &this_cpu_datap->cpu_idle_latency); |
1465 | this_cpu_datap->cpu_reset_assist = kvtophys(va: in_processor_info->powergate_stub_addr); |
1466 | |
1467 | this_cpu_datap->idle_timer_notify = in_processor_info->idle_timer; |
1468 | this_cpu_datap->idle_timer_refcon = in_processor_info->idle_timer_refcon; |
1469 | |
1470 | this_cpu_datap->platform_error_handler = in_processor_info->platform_error_handler; |
1471 | this_cpu_datap->cpu_regmap_paddr = in_processor_info->regmap_paddr; |
1472 | this_cpu_datap->cpu_phys_id = in_processor_info->phys_id; |
1473 | this_cpu_datap->cpu_l2_access_penalty = in_processor_info->l2_access_penalty; |
1474 | |
1475 | this_cpu_datap->cpu_cluster_type = in_processor_info->cluster_type; |
1476 | this_cpu_datap->cpu_cluster_id = in_processor_info->cluster_id; |
1477 | this_cpu_datap->cpu_l2_id = in_processor_info->l2_cache_id; |
1478 | this_cpu_datap->cpu_l2_size = in_processor_info->l2_cache_size; |
1479 | this_cpu_datap->cpu_l3_id = in_processor_info->l3_cache_id; |
1480 | this_cpu_datap->cpu_l3_size = in_processor_info->l3_cache_size; |
1481 | |
1482 | /* |
1483 | * Encode cpu_id, cluster_id to be stored in TPIDR_EL0 (see |
1484 | * cswitch.s:set_thread_registers, start.s:start_cpu) for consumption |
1485 | * by userspace. |
1486 | */ |
1487 | this_cpu_datap->cpu_tpidr_el0 = ((this_cpu_datap->cpu_number << MACHDEP_TPIDR_CPUNUM_SHIFT) & MACHDEP_TPIDR_CPUNUM_MASK) | \ |
1488 | ((this_cpu_datap->cpu_cluster_id << MACHDEP_TPIDR_CLUSTERID_SHIFT) & MACHDEP_TPIDR_CLUSTERID_MASK); |
1489 | assert(((this_cpu_datap->cpu_tpidr_el0 & MACHDEP_TPIDR_CPUNUM_MASK) >> MACHDEP_TPIDR_CPUNUM_SHIFT) == this_cpu_datap->cpu_number); |
1490 | assert(((this_cpu_datap->cpu_tpidr_el0 & MACHDEP_TPIDR_CLUSTERID_MASK) >> MACHDEP_TPIDR_CLUSTERID_SHIFT) == this_cpu_datap->cpu_cluster_id); |
1491 | |
1492 | #if HAS_CLUSTER |
1493 | this_cpu_datap->cluster_master = !OSTestAndSet(this_cpu_datap->cpu_cluster_id, &cluster_initialized); |
1494 | #else /* HAS_CLUSTER */ |
1495 | this_cpu_datap->cluster_master = is_boot_cpu; |
1496 | #endif /* HAS_CLUSTER */ |
1497 | lck_mtx_lock(lck: &pset_create_lock); |
1498 | pset = pset_find(cluster_id: in_processor_info->cluster_id, NULL); |
1499 | kprintf(fmt: "[%d]%s>pset_find(cluster_id=%d) returned pset %d\n" , current_processor()->cpu_id, __FUNCTION__, in_processor_info->cluster_id, pset ? pset->pset_id : -1); |
1500 | if (pset == NULL) { |
1501 | #if __AMP__ |
1502 | pset_cluster_type_t pset_cluster_type = this_cpu_datap->cpu_cluster_type == CLUSTER_TYPE_E ? PSET_AMP_E : PSET_AMP_P; |
1503 | pset = pset_create(ml_get_boot_cluster_type() == this_cpu_datap->cpu_cluster_type ? &pset_node0 : &pset_node1, pset_cluster_type, this_cpu_datap->cpu_cluster_id, this_cpu_datap->cpu_cluster_id); |
1504 | assert(pset != PROCESSOR_SET_NULL); |
1505 | kprintf("[%d]%s>pset_create(cluster_id=%d) returned pset %d\n" , current_processor()->cpu_id, __FUNCTION__, this_cpu_datap->cpu_cluster_id, pset->pset_id); |
1506 | #else /* __AMP__ */ |
1507 | pset_cluster_type_t pset_cluster_type = PSET_SMP; |
1508 | pset = pset_create(node: &pset_node0, pset_type: pset_cluster_type, pset_cluster_id: this_cpu_datap->cpu_cluster_id, pset_id: this_cpu_datap->cpu_cluster_id); |
1509 | assert(pset != PROCESSOR_SET_NULL); |
1510 | #endif /* __AMP__ */ |
1511 | } |
1512 | kprintf(fmt: "[%d]%s>cpu_id %p cluster_id %d cpu_number %d is type %d\n" , current_processor()->cpu_id, __FUNCTION__, in_processor_info->cpu_id, in_processor_info->cluster_id, this_cpu_datap->cpu_number, in_processor_info->cluster_type); |
1513 | lck_mtx_unlock(lck: &pset_create_lock); |
1514 | |
1515 | processor_t processor = PERCPU_GET_RELATIVE(processor, cpu_data, this_cpu_datap); |
1516 | if (!is_boot_cpu) { |
1517 | processor_init(processor, cpu_id: this_cpu_datap->cpu_number, processor_set: pset); |
1518 | |
1519 | if (this_cpu_datap->cpu_l2_access_penalty) { |
1520 | /* |
1521 | * Cores that have a non-zero L2 access penalty compared |
1522 | * to the boot processor should be de-prioritized by the |
1523 | * scheduler, so that threads use the cores with better L2 |
1524 | * preferentially. |
1525 | */ |
1526 | processor_set_primary(processor, master_processor); |
1527 | } |
1528 | } |
1529 | |
1530 | *processor_out = processor; |
1531 | *ipi_handler_out = cpu_signal_handler; |
1532 | #if CPMU_AIC_PMI && CONFIG_CPU_COUNTERS |
1533 | *pmi_handler_out = mt_cpmu_aic_pmi; |
1534 | #else |
1535 | *pmi_handler_out = NULL; |
1536 | #endif /* CPMU_AIC_PMI && CONFIG_CPU_COUNTERS */ |
1537 | if (in_processor_info->idle_tickle != (idle_tickle_t *) NULL) { |
1538 | *in_processor_info->idle_tickle = (idle_tickle_t) cpu_idle_tickle; |
1539 | } |
1540 | |
1541 | #if CONFIG_CPU_COUNTERS |
1542 | if (kpc_register_cpu(this_cpu_datap) != TRUE) { |
1543 | goto processor_register_error; |
1544 | } |
1545 | #endif /* CONFIG_CPU_COUNTERS */ |
1546 | |
1547 | |
1548 | if (!is_boot_cpu) { |
1549 | random_cpu_init(cpu: this_cpu_datap->cpu_number); |
1550 | // now let next CPU register itself |
1551 | OSIncrementAtomic((SInt32*)&real_ncpus); |
1552 | } |
1553 | |
1554 | return KERN_SUCCESS; |
1555 | |
1556 | processor_register_error: |
1557 | #if CONFIG_CPU_COUNTERS |
1558 | kpc_unregister_cpu(this_cpu_datap); |
1559 | #endif /* CONFIG_CPU_COUNTERS */ |
1560 | if (!is_boot_cpu) { |
1561 | cpu_data_free(cpu_data_ptr: this_cpu_datap); |
1562 | } |
1563 | |
1564 | return KERN_FAILURE; |
1565 | } |
1566 | |
1567 | void |
1568 | ml_init_arm_debug_interface( |
1569 | void * in_cpu_datap, |
1570 | vm_offset_t virt_address) |
1571 | { |
1572 | ((cpu_data_t *)in_cpu_datap)->cpu_debug_interface_map = virt_address; |
1573 | do_debugid(); |
1574 | } |
1575 | |
1576 | /* |
1577 | * Routine: init_ast_check |
1578 | * Function: |
1579 | */ |
1580 | void |
1581 | init_ast_check( |
1582 | __unused processor_t processor) |
1583 | { |
1584 | } |
1585 | |
1586 | /* |
1587 | * Routine: cause_ast_check |
1588 | * Function: |
1589 | */ |
1590 | void |
1591 | cause_ast_check( |
1592 | processor_t processor) |
1593 | { |
1594 | if (current_processor() != processor) { |
1595 | cpu_signal(target: processor_to_cpu_datap(processor), SIGPast, p0: (void *)NULL, p1: (void *)NULL); |
1596 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), processor->cpu_id, 1 /* ast */, 0, 0, 0); |
1597 | } |
1598 | } |
1599 | |
1600 | extern uint32_t cpu_idle_count; |
1601 | |
1602 | void |
1603 | ml_get_power_state(boolean_t *icp, boolean_t *pidlep) |
1604 | { |
1605 | *icp = ml_at_interrupt_context(); |
1606 | *pidlep = (cpu_idle_count == real_ncpus); |
1607 | } |
1608 | |
1609 | /* |
1610 | * Routine: ml_cause_interrupt |
1611 | * Function: Generate a fake interrupt |
1612 | */ |
1613 | void |
1614 | ml_cause_interrupt(void) |
1615 | { |
1616 | return; /* BS_XXX */ |
1617 | } |
1618 | |
1619 | /* Map memory map IO space */ |
1620 | vm_offset_t |
1621 | ml_io_map( |
1622 | vm_offset_t phys_addr, |
1623 | vm_size_t size) |
1624 | { |
1625 | return io_map(phys_addr, size, VM_WIMG_IO, VM_PROT_DEFAULT, false); |
1626 | } |
1627 | |
1628 | /* Map memory map IO space (with protections specified) */ |
1629 | vm_offset_t |
1630 | ml_io_map_with_prot( |
1631 | vm_offset_t phys_addr, |
1632 | vm_size_t size, |
1633 | vm_prot_t prot) |
1634 | { |
1635 | return io_map(phys_addr, size, VM_WIMG_IO, prot, false); |
1636 | } |
1637 | |
1638 | vm_offset_t |
1639 | ml_io_map_unmappable( |
1640 | vm_offset_t phys_addr, |
1641 | vm_size_t size, |
1642 | unsigned int flags) |
1643 | { |
1644 | return io_map(phys_addr, size, flags, VM_PROT_DEFAULT, true); |
1645 | } |
1646 | |
1647 | vm_offset_t |
1648 | ml_io_map_wcomb( |
1649 | vm_offset_t phys_addr, |
1650 | vm_size_t size) |
1651 | { |
1652 | return io_map(phys_addr, size, VM_WIMG_WCOMB, VM_PROT_DEFAULT, false); |
1653 | } |
1654 | |
1655 | void |
1656 | ml_io_unmap(vm_offset_t addr, vm_size_t sz) |
1657 | { |
1658 | pmap_remove(map: kernel_pmap, s: addr, e: addr + sz); |
1659 | kmem_free(map: kernel_map, addr, size: sz); |
1660 | } |
1661 | |
1662 | vm_map_address_t |
1663 | ml_map_high_window( |
1664 | vm_offset_t phys_addr, |
1665 | vm_size_t len) |
1666 | { |
1667 | return pmap_map_high_window_bd(pa: phys_addr, len, VM_PROT_READ | VM_PROT_WRITE); |
1668 | } |
1669 | |
1670 | vm_offset_t |
1671 | ml_static_ptovirt( |
1672 | vm_offset_t paddr) |
1673 | { |
1674 | return phystokv(pa: paddr); |
1675 | } |
1676 | |
1677 | vm_offset_t |
1678 | ml_static_slide( |
1679 | vm_offset_t vaddr) |
1680 | { |
1681 | vm_offset_t slid_vaddr = 0; |
1682 | |
1683 | #if CONFIG_SPTM |
1684 | if ((vaddr >= vm_sptm_offsets.unslid_base) && (vaddr < vm_sptm_offsets.unslid_top)) { |
1685 | slid_vaddr = vaddr + vm_sptm_offsets.slide; |
1686 | } else if ((vaddr >= vm_txm_offsets.unslid_base) && (vaddr < vm_txm_offsets.unslid_top)) { |
1687 | slid_vaddr = vaddr + vm_txm_offsets.slide; |
1688 | } else |
1689 | #endif /* CONFIG_SPTM */ |
1690 | { |
1691 | slid_vaddr = vaddr + vm_kernel_slide; |
1692 | } |
1693 | |
1694 | if (!VM_KERNEL_IS_SLID(slid_vaddr)) { |
1695 | /* This is only intended for use on static kernel addresses. */ |
1696 | return 0; |
1697 | } |
1698 | |
1699 | return slid_vaddr; |
1700 | } |
1701 | |
1702 | vm_offset_t |
1703 | ml_static_unslide( |
1704 | vm_offset_t vaddr) |
1705 | { |
1706 | if (!VM_KERNEL_IS_SLID(vaddr)) { |
1707 | /* This is only intended for use on static kernel addresses. */ |
1708 | return 0; |
1709 | } |
1710 | |
1711 | #if CONFIG_SPTM |
1712 | /** |
1713 | * Addresses coming from the SPTM and TXM have a different slide than the |
1714 | * rest of the kernel. |
1715 | */ |
1716 | if ((vaddr >= vm_sptm_offsets.slid_base) && (vaddr < vm_sptm_offsets.slid_top)) { |
1717 | return vaddr - vm_sptm_offsets.slide; |
1718 | } |
1719 | |
1720 | if ((vaddr >= vm_txm_offsets.slid_base) && (vaddr < vm_txm_offsets.slid_top)) { |
1721 | return vaddr - vm_txm_offsets.slide; |
1722 | } |
1723 | #endif /* CONFIG_SPTM */ |
1724 | |
1725 | return vaddr - vm_kernel_slide; |
1726 | } |
1727 | |
1728 | extern tt_entry_t *arm_kva_to_tte(vm_offset_t va); |
1729 | |
1730 | kern_return_t |
1731 | ml_static_protect( |
1732 | vm_offset_t vaddr, /* kernel virtual address */ |
1733 | vm_size_t size, |
1734 | vm_prot_t new_prot __unused) |
1735 | { |
1736 | #if CONFIG_SPTM |
1737 | /** |
1738 | * Retype any frames that may be passed to the VM to XNU_DEFAULT. |
1739 | */ |
1740 | for (vm_offset_t sptm_vaddr_cur = vaddr; sptm_vaddr_cur < trunc_page_64(vaddr + size); sptm_vaddr_cur += PAGE_SIZE) { |
1741 | /* Check if this frame is XNU_DEFAULT and only retype it if is not */ |
1742 | sptm_paddr_t sptm_paddr_cur = kvtophys_nofail(sptm_vaddr_cur); |
1743 | sptm_frame_type_t current_type = sptm_get_frame_type(sptm_paddr_cur); |
1744 | if (current_type != XNU_DEFAULT) { |
1745 | sptm_retype_params_t retype_params = {.raw = SPTM_RETYPE_PARAMS_NULL}; |
1746 | sptm_retype(sptm_paddr_cur, current_type, XNU_DEFAULT, retype_params); |
1747 | } |
1748 | } |
1749 | |
1750 | return KERN_SUCCESS; |
1751 | #else /* CONFIG_SPTM */ |
1752 | pt_entry_t arm_prot = 0; |
1753 | pt_entry_t arm_block_prot = 0; |
1754 | vm_offset_t vaddr_cur; |
1755 | ppnum_t ppn; |
1756 | kern_return_t result = KERN_SUCCESS; |
1757 | |
1758 | if (vaddr < physmap_base) { |
1759 | panic("ml_static_protect(): %p < %p" , (void *) vaddr, (void *) physmap_base); |
1760 | return KERN_FAILURE; |
1761 | } |
1762 | |
1763 | assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */ |
1764 | |
1765 | if ((new_prot & VM_PROT_WRITE) && (new_prot & VM_PROT_EXECUTE)) { |
1766 | panic("ml_static_protect(): WX request on %p" , (void *) vaddr); |
1767 | } |
1768 | if (lockdown_done && (new_prot & VM_PROT_EXECUTE)) { |
1769 | panic("ml_static_protect(): attempt to inject executable mapping on %p" , (void *) vaddr); |
1770 | } |
1771 | |
1772 | /* Set up the protection bits, and block bits so we can validate block mappings. */ |
1773 | if (new_prot & VM_PROT_WRITE) { |
1774 | arm_prot |= ARM_PTE_AP(AP_RWNA); |
1775 | arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RWNA); |
1776 | } else { |
1777 | arm_prot |= ARM_PTE_AP(AP_RONA); |
1778 | arm_block_prot |= ARM_TTE_BLOCK_AP(AP_RONA); |
1779 | } |
1780 | |
1781 | arm_prot |= ARM_PTE_NX; |
1782 | arm_block_prot |= ARM_TTE_BLOCK_NX; |
1783 | |
1784 | if (!(new_prot & VM_PROT_EXECUTE)) { |
1785 | arm_prot |= ARM_PTE_PNX; |
1786 | arm_block_prot |= ARM_TTE_BLOCK_PNX; |
1787 | } |
1788 | |
1789 | for (vaddr_cur = vaddr; |
1790 | vaddr_cur < trunc_page_64(vaddr + size); |
1791 | vaddr_cur += PAGE_SIZE) { |
1792 | ppn = pmap_find_phys(map: kernel_pmap, va: vaddr_cur); |
1793 | if (ppn != (vm_offset_t) NULL) { |
1794 | tt_entry_t *tte2; |
1795 | pt_entry_t *pte_p; |
1796 | pt_entry_t ptmp; |
1797 | |
1798 | #if XNU_MONITOR |
1799 | assert(!pmap_is_monitor(ppn)); |
1800 | assert(!TEST_PAGE_RATIO_4); |
1801 | #endif |
1802 | |
1803 | tte2 = arm_kva_to_tte(va: vaddr_cur); |
1804 | |
1805 | if (((*tte2) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) { |
1806 | if ((((*tte2) & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) && |
1807 | ((*tte2 & (ARM_TTE_BLOCK_NXMASK | ARM_TTE_BLOCK_PNXMASK | ARM_TTE_BLOCK_APMASK)) == arm_block_prot)) { |
1808 | /* |
1809 | * We can support ml_static_protect on a block mapping if the mapping already has |
1810 | * the desired protections. We still want to run checks on a per-page basis. |
1811 | */ |
1812 | continue; |
1813 | } |
1814 | |
1815 | result = KERN_FAILURE; |
1816 | break; |
1817 | } |
1818 | |
1819 | pte_p = (pt_entry_t *)&((tt_entry_t*)(phystokv(pa: (*tte2) & ARM_TTE_TABLE_MASK)))[(((vaddr_cur) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)]; |
1820 | ptmp = *pte_p; |
1821 | |
1822 | if ((ptmp & ARM_PTE_HINT_MASK) && ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot)) { |
1823 | /* |
1824 | * The contiguous hint is similar to a block mapping for ml_static_protect; if the existing |
1825 | * protections do not match the desired protections, then we will fail (as we cannot update |
1826 | * this mapping without updating other mappings as well). |
1827 | */ |
1828 | result = KERN_FAILURE; |
1829 | break; |
1830 | } |
1831 | |
1832 | __unreachable_ok_push |
1833 | if (TEST_PAGE_RATIO_4) { |
1834 | { |
1835 | unsigned int i; |
1836 | pt_entry_t *ptep_iter; |
1837 | |
1838 | ptep_iter = pte_p; |
1839 | for (i = 0; i < 4; i++, ptep_iter++) { |
1840 | /* Note that there is a hole in the HINT sanity checking here. */ |
1841 | ptmp = *ptep_iter; |
1842 | |
1843 | /* We only need to update the page tables if the protections do not match. */ |
1844 | if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) { |
1845 | ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot; |
1846 | *ptep_iter = ptmp; |
1847 | } |
1848 | } |
1849 | } |
1850 | } else { |
1851 | ptmp = *pte_p; |
1852 | /* We only need to update the page tables if the protections do not match. */ |
1853 | if ((ptmp & (ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) != arm_prot) { |
1854 | ptmp = (ptmp & ~(ARM_PTE_APMASK | ARM_PTE_PNXMASK | ARM_PTE_NXMASK)) | arm_prot; |
1855 | *pte_p = ptmp; |
1856 | } |
1857 | } |
1858 | __unreachable_ok_pop |
1859 | } |
1860 | } |
1861 | |
1862 | if (vaddr_cur > vaddr) { |
1863 | assert(((vaddr_cur - vaddr) & 0xFFFFFFFF00000000ULL) == 0); |
1864 | flush_mmu_tlb_region(va: vaddr, length: (uint32_t)(vaddr_cur - vaddr)); |
1865 | } |
1866 | |
1867 | |
1868 | return result; |
1869 | #endif /* CONFIG_SPTM */ |
1870 | } |
1871 | |
1872 | #if defined(CONFIG_SPTM) |
1873 | /* |
1874 | * Returns true if the given physical address is in one of the boot kernelcache ranges. |
1875 | */ |
1876 | static bool |
1877 | ml_physaddr_in_bootkc_range(vm_offset_t physaddr) |
1878 | { |
1879 | for (int i = 0; i < arm_vm_kernelcache_numranges; i++) { |
1880 | if (physaddr >= arm_vm_kernelcache_ranges[i].start_phys && physaddr < arm_vm_kernelcache_ranges[i].end_phys) { |
1881 | return true; |
1882 | } |
1883 | } |
1884 | return false; |
1885 | } |
1886 | #endif /* defined(CONFIG_SPTM) */ |
1887 | |
1888 | /* |
1889 | * Routine: ml_static_mfree |
1890 | * Function: |
1891 | */ |
1892 | void |
1893 | ml_static_mfree( |
1894 | vm_offset_t vaddr, |
1895 | vm_size_t size) |
1896 | { |
1897 | vm_offset_t vaddr_cur; |
1898 | vm_offset_t paddr_cur; |
1899 | ppnum_t ppn; |
1900 | uint32_t freed_pages = 0; |
1901 | uint32_t freed_kernelcache_pages = 0; |
1902 | |
1903 | |
1904 | /* It is acceptable (if bad) to fail to free. */ |
1905 | if (vaddr < physmap_base) { |
1906 | return; |
1907 | } |
1908 | |
1909 | assert((vaddr & (PAGE_SIZE - 1)) == 0); /* must be page aligned */ |
1910 | |
1911 | for (vaddr_cur = vaddr; |
1912 | vaddr_cur < trunc_page_64(vaddr + size); |
1913 | vaddr_cur += PAGE_SIZE) { |
1914 | ppn = pmap_find_phys(map: kernel_pmap, va: vaddr_cur); |
1915 | if (ppn != (vm_offset_t) NULL) { |
1916 | /* |
1917 | * It is not acceptable to fail to update the protections on a page |
1918 | * we will release to the VM. We need to either panic or continue. |
1919 | * For now, we'll panic (to help flag if there is memory we can |
1920 | * reclaim). |
1921 | */ |
1922 | if (ml_static_protect(vaddr: vaddr_cur, PAGE_SIZE, VM_PROT_WRITE | VM_PROT_READ) != KERN_SUCCESS) { |
1923 | panic("Failed ml_static_mfree on %p" , (void *) vaddr_cur); |
1924 | } |
1925 | |
1926 | paddr_cur = ptoa(ppn); |
1927 | |
1928 | |
1929 | vm_page_create(start: ppn, end: (ppn + 1)); |
1930 | freed_pages++; |
1931 | #if defined(CONFIG_SPTM) |
1932 | if (ml_physaddr_in_bootkc_range(paddr_cur)) { |
1933 | #else |
1934 | if (paddr_cur >= arm_vm_kernelcache_phys_start && paddr_cur < arm_vm_kernelcache_phys_end) { |
1935 | #endif |
1936 | freed_kernelcache_pages++; |
1937 | } |
1938 | } |
1939 | } |
1940 | vm_page_lockspin_queues(); |
1941 | vm_page_wire_count -= freed_pages; |
1942 | vm_page_wire_count_initial -= freed_pages; |
1943 | vm_page_kernelcache_count -= freed_kernelcache_pages; |
1944 | vm_page_unlock_queues(); |
1945 | #if DEBUG |
1946 | kprintf("ml_static_mfree: Released 0x%x pages at VA %p, size:0x%llx, last ppn: 0x%x, +%d bad\n" , freed_pages, (void *)vaddr, (uint64_t)size, ppn, bad_page_cnt); |
1947 | #endif |
1948 | } |
1949 | |
1950 | /* |
1951 | * Routine: ml_page_protection_type |
1952 | * Function: Returns the type of page protection that the system supports. |
1953 | */ |
1954 | ml_page_protection_t |
1955 | ml_page_protection_type(void) |
1956 | { |
1957 | #if CONFIG_SPTM |
1958 | return 2; |
1959 | #elif XNU_MONITOR |
1960 | return 1; |
1961 | #else |
1962 | return 0; |
1963 | #endif |
1964 | } |
1965 | |
1966 | /* virtual to physical on wired pages */ |
1967 | vm_offset_t |
1968 | ml_vtophys(vm_offset_t vaddr) |
1969 | { |
1970 | return kvtophys(va: vaddr); |
1971 | } |
1972 | |
1973 | /* |
1974 | * Routine: ml_nofault_copy |
1975 | * Function: Perform a physical mode copy if the source and destination have |
1976 | * valid translations in the kernel pmap. If translations are present, they are |
1977 | * assumed to be wired; e.g., no attempt is made to guarantee that the |
1978 | * translations obtained remain valid for the duration of the copy process. |
1979 | */ |
1980 | vm_size_t |
1981 | ml_nofault_copy(vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size) |
1982 | { |
1983 | addr64_t cur_phys_dst, cur_phys_src; |
1984 | vm_size_t count, nbytes = 0; |
1985 | |
1986 | while (size > 0) { |
1987 | if (!(cur_phys_src = kvtophys(va: virtsrc))) { |
1988 | break; |
1989 | } |
1990 | if (!(cur_phys_dst = kvtophys(va: virtdst))) { |
1991 | break; |
1992 | } |
1993 | if (!pmap_valid_address(trunc_page_64(cur_phys_dst)) || |
1994 | !pmap_valid_address(trunc_page_64(cur_phys_src))) { |
1995 | break; |
1996 | } |
1997 | count = PAGE_SIZE - (cur_phys_src & PAGE_MASK); |
1998 | if (count > (PAGE_SIZE - (cur_phys_dst & PAGE_MASK))) { |
1999 | count = PAGE_SIZE - (cur_phys_dst & PAGE_MASK); |
2000 | } |
2001 | if (count > size) { |
2002 | count = size; |
2003 | } |
2004 | |
2005 | bcopy_phys(from: cur_phys_src, to: cur_phys_dst, nbytes: count); |
2006 | |
2007 | nbytes += count; |
2008 | virtsrc += count; |
2009 | virtdst += count; |
2010 | size -= count; |
2011 | } |
2012 | |
2013 | return nbytes; |
2014 | } |
2015 | |
2016 | /* |
2017 | * Routine: ml_validate_nofault |
2018 | * Function: Validate that ths address range has a valid translations |
2019 | * in the kernel pmap. If translations are present, they are |
2020 | * assumed to be wired; i.e. no attempt is made to guarantee |
2021 | * that the translation persist after the check. |
2022 | * Returns: TRUE if the range is mapped and will not cause a fault, |
2023 | * FALSE otherwise. |
2024 | */ |
2025 | |
2026 | boolean_t |
2027 | ml_validate_nofault( |
2028 | vm_offset_t virtsrc, vm_size_t size) |
2029 | { |
2030 | addr64_t cur_phys_src; |
2031 | uint32_t count; |
2032 | |
2033 | while (size > 0) { |
2034 | if (!(cur_phys_src = kvtophys(va: virtsrc))) { |
2035 | return FALSE; |
2036 | } |
2037 | if (!pmap_valid_address(trunc_page_64(cur_phys_src))) { |
2038 | return FALSE; |
2039 | } |
2040 | count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK)); |
2041 | if (count > size) { |
2042 | count = (uint32_t)size; |
2043 | } |
2044 | |
2045 | virtsrc += count; |
2046 | size -= count; |
2047 | } |
2048 | |
2049 | return TRUE; |
2050 | } |
2051 | |
2052 | void |
2053 | ml_get_bouncepool_info(vm_offset_t * phys_addr, vm_size_t * size) |
2054 | { |
2055 | *phys_addr = 0; |
2056 | *size = 0; |
2057 | } |
2058 | |
2059 | void |
2060 | active_rt_threads(__unused boolean_t active) |
2061 | { |
2062 | } |
2063 | |
2064 | static void |
2065 | cpu_qos_cb_default(__unused int urgency, __unused uint64_t qos_param1, __unused uint64_t qos_param2) |
2066 | { |
2067 | return; |
2068 | } |
2069 | |
2070 | cpu_qos_update_t cpu_qos_update = cpu_qos_cb_default; |
2071 | |
2072 | void |
2073 | cpu_qos_update_register(cpu_qos_update_t cpu_qos_cb) |
2074 | { |
2075 | if (cpu_qos_cb != NULL) { |
2076 | cpu_qos_update = cpu_qos_cb; |
2077 | } else { |
2078 | cpu_qos_update = cpu_qos_cb_default; |
2079 | } |
2080 | } |
2081 | |
2082 | void |
2083 | thread_tell_urgency(thread_urgency_t urgency, uint64_t rt_period, uint64_t rt_deadline, uint64_t sched_latency __unused, __unused thread_t nthread) |
2084 | { |
2085 | SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0); |
2086 | |
2087 | cpu_qos_update((int)urgency, rt_period, rt_deadline); |
2088 | |
2089 | SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0); |
2090 | } |
2091 | |
2092 | void |
2093 | machine_run_count(__unused uint32_t count) |
2094 | { |
2095 | } |
2096 | |
2097 | processor_t |
2098 | machine_choose_processor(__unused processor_set_t pset, processor_t processor) |
2099 | { |
2100 | return processor; |
2101 | } |
2102 | |
2103 | #if KASAN |
2104 | vm_offset_t ml_stack_base(void); |
2105 | vm_size_t ml_stack_size(void); |
2106 | |
2107 | vm_offset_t |
2108 | ml_stack_base(void) |
2109 | { |
2110 | uintptr_t local = (uintptr_t) &local; |
2111 | vm_offset_t intstack_top_ptr; |
2112 | |
2113 | intstack_top_ptr = getCpuDatap()->intstack_top; |
2114 | if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) { |
2115 | return intstack_top_ptr - INTSTACK_SIZE; |
2116 | } else { |
2117 | return current_thread()->kernel_stack; |
2118 | } |
2119 | } |
2120 | vm_size_t |
2121 | ml_stack_size(void) |
2122 | { |
2123 | uintptr_t local = (uintptr_t) &local; |
2124 | vm_offset_t intstack_top_ptr; |
2125 | |
2126 | intstack_top_ptr = getCpuDatap()->intstack_top; |
2127 | if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) { |
2128 | return INTSTACK_SIZE; |
2129 | } else { |
2130 | return kernel_stack_size; |
2131 | } |
2132 | } |
2133 | #endif |
2134 | |
2135 | #ifdef CONFIG_KCOV |
2136 | |
2137 | kcov_cpu_data_t * |
2138 | current_kcov_data(void) |
2139 | { |
2140 | return ¤t_cpu_datap()->cpu_kcov_data; |
2141 | } |
2142 | |
2143 | kcov_cpu_data_t * |
2144 | cpu_kcov_data(int cpuid) |
2145 | { |
2146 | return &cpu_datap(cpuid)->cpu_kcov_data; |
2147 | } |
2148 | |
2149 | #endif /* CONFIG_KCOV */ |
2150 | |
2151 | boolean_t |
2152 | machine_timeout_suspended(void) |
2153 | { |
2154 | return FALSE; |
2155 | } |
2156 | |
2157 | kern_return_t |
2158 | ml_interrupt_prewarm(__unused uint64_t deadline) |
2159 | { |
2160 | return KERN_FAILURE; |
2161 | } |
2162 | |
2163 | /* |
2164 | * Assumes fiq, irq disabled. |
2165 | */ |
2166 | void |
2167 | ml_set_decrementer(uint32_t dec_value) |
2168 | { |
2169 | cpu_data_t *cdp = getCpuDatap(); |
2170 | |
2171 | assert(ml_get_interrupts_enabled() == FALSE); |
2172 | cdp->cpu_decrementer = dec_value; |
2173 | |
2174 | if (cdp->cpu_set_decrementer_func) { |
2175 | cdp->cpu_set_decrementer_func(dec_value); |
2176 | } else { |
2177 | __builtin_arm_wsr64("CNTV_TVAL_EL0" , (uint64_t)dec_value); |
2178 | } |
2179 | } |
2180 | |
2181 | /** |
2182 | * Perform a read of the timebase which is permitted to be executed |
2183 | * speculatively and/or out of program order. |
2184 | */ |
2185 | static inline uint64_t |
2186 | speculative_timebase(void) |
2187 | { |
2188 | return __builtin_arm_rsr64("CNTVCT_EL0" ); |
2189 | } |
2190 | |
2191 | /** |
2192 | * Read a non-speculative view of the timebase if one is available, |
2193 | * otherwise fallback on an ISB to prevent prevent speculation and |
2194 | * enforce ordering. |
2195 | */ |
2196 | static inline uint64_t |
2197 | nonspeculative_timebase(void) |
2198 | { |
2199 | #if defined(HAS_ACNTVCT) |
2200 | return __builtin_arm_rsr64("S3_4_c15_c10_6" ); |
2201 | #elif __ARM_ARCH_8_6__ |
2202 | return __builtin_arm_rsr64("CNTVCTSS_EL0" ); |
2203 | #else |
2204 | // ISB required by ARMV7C.b section B8.1.2 & ARMv8 section D6.1.2 |
2205 | // "Reads of CNT[PV]CT[_EL0] can occur speculatively and out of order relative |
2206 | // to other instructions executed on the same processor." |
2207 | __builtin_arm_isb(ISB_SY); |
2208 | return speculative_timebase(); |
2209 | #endif |
2210 | } |
2211 | |
2212 | |
2213 | uint64_t |
2214 | ml_get_hwclock() |
2215 | { |
2216 | uint64_t timebase = nonspeculative_timebase(); |
2217 | return timebase; |
2218 | } |
2219 | |
2220 | uint64_t |
2221 | ml_get_timebase() |
2222 | { |
2223 | uint64_t clock, timebase; |
2224 | |
2225 | //the retry is for the case where S2R catches us in the middle of this. see rdar://77019633 |
2226 | do { |
2227 | timebase = getCpuDatap()->cpu_base_timebase; |
2228 | os_compiler_barrier(); |
2229 | clock = ml_get_hwclock(); |
2230 | os_compiler_barrier(); |
2231 | } while (getCpuDatap()->cpu_base_timebase != timebase); |
2232 | |
2233 | return clock + timebase; |
2234 | } |
2235 | |
2236 | /** |
2237 | * Issue a barrier that guarantees all prior memory accesses will complete |
2238 | * before any subsequent timebase reads. |
2239 | */ |
2240 | void |
2241 | ml_memory_to_timebase_fence(void) |
2242 | { |
2243 | __builtin_arm_dmb(DMB_SY); |
2244 | const uint64_t take_backwards_branch = 0; |
2245 | asm volatile ( |
2246 | "1:" |
2247 | "ldr x0, [%[take_backwards_branch]]" "\n" |
2248 | "cbnz x0, 1b" "\n" |
2249 | : |
2250 | : [take_backwards_branch] "r" (&take_backwards_branch) |
2251 | : "x0" |
2252 | ); |
2253 | |
2254 | /* throwaway read to prevent ml_get_speculative_timebase() reordering */ |
2255 | (void)ml_get_hwclock(); |
2256 | } |
2257 | |
2258 | /** |
2259 | * Issue a barrier that guarantees all prior timebase reads will |
2260 | * be ordered before any subsequent memory accesses. |
2261 | */ |
2262 | void |
2263 | ml_timebase_to_memory_fence(void) |
2264 | { |
2265 | __builtin_arm_isb(ISB_SY); |
2266 | } |
2267 | |
2268 | /* |
2269 | * Get the speculative timebase without an ISB. |
2270 | */ |
2271 | uint64_t |
2272 | ml_get_speculative_timebase(void) |
2273 | { |
2274 | uint64_t clock, timebase; |
2275 | |
2276 | //the retry is for the case where S2R catches us in the middle of this. see rdar://77019633&77697482 |
2277 | do { |
2278 | timebase = getCpuDatap()->cpu_base_timebase; |
2279 | os_compiler_barrier(); |
2280 | clock = speculative_timebase(); |
2281 | |
2282 | os_compiler_barrier(); |
2283 | } while (getCpuDatap()->cpu_base_timebase != timebase); |
2284 | |
2285 | return clock + timebase; |
2286 | } |
2287 | |
2288 | uint64_t |
2289 | ml_get_timebase_entropy(void) |
2290 | { |
2291 | return ml_get_speculative_timebase(); |
2292 | } |
2293 | |
2294 | uint32_t |
2295 | ml_get_decrementer(void) |
2296 | { |
2297 | cpu_data_t *cdp = getCpuDatap(); |
2298 | uint32_t dec; |
2299 | |
2300 | assert(ml_get_interrupts_enabled() == FALSE); |
2301 | |
2302 | if (cdp->cpu_get_decrementer_func) { |
2303 | dec = cdp->cpu_get_decrementer_func(); |
2304 | } else { |
2305 | uint64_t wide_val; |
2306 | |
2307 | wide_val = __builtin_arm_rsr64("CNTV_TVAL_EL0" ); |
2308 | dec = (uint32_t)wide_val; |
2309 | assert(wide_val == (uint64_t)dec); |
2310 | } |
2311 | |
2312 | return dec; |
2313 | } |
2314 | |
2315 | boolean_t |
2316 | ml_get_timer_pending(void) |
2317 | { |
2318 | uint64_t cntv_ctl = __builtin_arm_rsr64("CNTV_CTL_EL0" ); |
2319 | return ((cntv_ctl & CNTV_CTL_EL0_ISTATUS) != 0) ? TRUE : FALSE; |
2320 | } |
2321 | |
2322 | __attribute__((noreturn)) |
2323 | void |
2324 | platform_syscall(arm_saved_state_t *state) |
2325 | { |
2326 | uint32_t code; |
2327 | |
2328 | #define platform_syscall_kprintf(x...) /* kprintf("platform_syscall: " x) */ |
2329 | |
2330 | code = (uint32_t)get_saved_state_reg(iss: state, reg: 3); |
2331 | |
2332 | KDBG(MACHDBG_CODE(DBG_MACH_MACHDEP_EXCP_SC_ARM, code) | DBG_FUNC_START, |
2333 | get_saved_state_reg(state, 0), |
2334 | get_saved_state_reg(state, 1), |
2335 | get_saved_state_reg(state, 2)); |
2336 | |
2337 | switch (code) { |
2338 | case 2: |
2339 | /* set cthread */ |
2340 | platform_syscall_kprintf("set cthread self.\n" ); |
2341 | thread_set_cthread_self(get_saved_state_reg(iss: state, reg: 0)); |
2342 | break; |
2343 | case 3: |
2344 | /* get cthread */ |
2345 | platform_syscall_kprintf("get cthread self.\n" ); |
2346 | set_user_saved_state_reg(iss: state, reg: 0, value: thread_get_cthread_self()); |
2347 | break; |
2348 | case 0: /* I-Cache flush (removed) */ |
2349 | case 1: /* D-Cache flush (removed) */ |
2350 | default: |
2351 | platform_syscall_kprintf("unknown: %d\n" , code); |
2352 | break; |
2353 | } |
2354 | |
2355 | KDBG(MACHDBG_CODE(DBG_MACH_MACHDEP_EXCP_SC_ARM, code) | DBG_FUNC_END, |
2356 | get_saved_state_reg(state, 0)); |
2357 | |
2358 | thread_exception_return(); |
2359 | } |
2360 | |
2361 | static void |
2362 | _enable_timebase_event_stream(uint32_t bit_index) |
2363 | { |
2364 | uint64_t cntkctl; /* One wants to use 32 bits, but "mrs" prefers it this way */ |
2365 | |
2366 | if (bit_index >= 64) { |
2367 | panic("%s: invalid bit index (%u)" , __FUNCTION__, bit_index); |
2368 | } |
2369 | |
2370 | __asm__ volatile ("mrs %0, CNTKCTL_EL1" : "=r" (cntkctl)); |
2371 | |
2372 | cntkctl |= (bit_index << CNTKCTL_EL1_EVENTI_SHIFT); |
2373 | cntkctl |= CNTKCTL_EL1_EVNTEN; |
2374 | cntkctl |= CNTKCTL_EL1_EVENTDIR; /* 1->0; why not? */ |
2375 | |
2376 | /* |
2377 | * If the SOC supports it (and it isn't broken), enable |
2378 | * EL0 access to the timebase registers. |
2379 | */ |
2380 | if (user_timebase_type() != USER_TIMEBASE_NONE) { |
2381 | cntkctl |= (CNTKCTL_EL1_PL0PCTEN | CNTKCTL_EL1_PL0VCTEN); |
2382 | } |
2383 | |
2384 | __builtin_arm_wsr64("CNTKCTL_EL1" , cntkctl); |
2385 | } |
2386 | |
2387 | /* |
2388 | * Turn timer on, unmask that interrupt. |
2389 | */ |
2390 | static void |
2391 | _enable_virtual_timer(void) |
2392 | { |
2393 | uint64_t cntvctl = CNTV_CTL_EL0_ENABLE; /* One wants to use 32 bits, but "mrs" prefers it this way */ |
2394 | |
2395 | __builtin_arm_wsr64("CNTV_CTL_EL0" , cntvctl); |
2396 | /* disable the physical timer as a precaution, as its registers reset to architecturally unknown values */ |
2397 | __builtin_arm_wsr64("CNTP_CTL_EL0" , CNTP_CTL_EL0_IMASKED); |
2398 | } |
2399 | |
2400 | void |
2401 | fiq_context_init(boolean_t enable_fiq __unused) |
2402 | { |
2403 | /* Interrupts still disabled. */ |
2404 | assert(ml_get_interrupts_enabled() == FALSE); |
2405 | _enable_virtual_timer(); |
2406 | } |
2407 | |
2408 | void |
2409 | wfe_timeout_init(void) |
2410 | { |
2411 | _enable_timebase_event_stream(bit_index: arm64_eventi); |
2412 | } |
2413 | |
2414 | /** |
2415 | * Configures, but does not enable, the WFE event stream. The event stream |
2416 | * generates an event at a set interval to act as a timeout for WFEs. |
2417 | * |
2418 | * This function sets the static global variable arm64_eventi to be the proper |
2419 | * bit index for the CNTKCTL_EL1.EVENTI field to generate events at the correct |
2420 | * period (1us unless specified by the "wfe_events_sec" boot-arg). arm64_eventi |
2421 | * is used by wfe_timeout_init to actually poke the registers and enable the |
2422 | * event stream. |
2423 | * |
2424 | * The CNTKCTL_EL1.EVENTI field contains the index of the bit of CNTVCT_EL0 that |
2425 | * is the trigger for the system to generate an event. The trigger can occur on |
2426 | * either the rising or falling edge of the bit depending on the value of |
2427 | * CNTKCTL_EL1.EVNTDIR. This is arbitrary for our purposes, so we use the |
2428 | * falling edge (1->0) transition to generate events. |
2429 | */ |
2430 | void |
2431 | wfe_timeout_configure(void) |
2432 | { |
2433 | /* Could fill in our own ops here, if we needed them */ |
2434 | uint64_t ticks_per_sec, ticks_per_event, events_per_sec = 0; |
2435 | uint32_t bit_index; |
2436 | |
2437 | if (PE_parse_boot_argn(arg_string: "wfe_events_sec" , arg_ptr: &events_per_sec, max_arg: sizeof(events_per_sec))) { |
2438 | if (events_per_sec <= 0) { |
2439 | events_per_sec = 1; |
2440 | } else if (events_per_sec > USEC_PER_SEC) { |
2441 | events_per_sec = USEC_PER_SEC; |
2442 | } |
2443 | } else { |
2444 | events_per_sec = USEC_PER_SEC; |
2445 | } |
2446 | ticks_per_sec = gPEClockFrequencyInfo.timebase_frequency_hz; |
2447 | ticks_per_event = ticks_per_sec / events_per_sec; |
2448 | |
2449 | /* Bit index of next power of two greater than ticks_per_event */ |
2450 | bit_index = flsll(mask: ticks_per_event) - 1; |
2451 | /* Round up to next power of two if ticks_per_event is initially power of two */ |
2452 | if ((ticks_per_event & ((1 << bit_index) - 1)) != 0) { |
2453 | bit_index++; |
2454 | } |
2455 | |
2456 | /* |
2457 | * The timer can only trigger on rising or falling edge, not both; we don't |
2458 | * care which we trigger on, but we do need to adjust which bit we are |
2459 | * interested in to account for this. |
2460 | * |
2461 | * In particular, we set CNTKCTL_EL1.EVENTDIR to trigger events on the |
2462 | * falling edge of the given bit. Therefore, we must decrement the bit index |
2463 | * by one as when the bit before the one we care about makes a 1 -> 0 |
2464 | * transition, the bit we care about makes a 0 -> 1 transition. |
2465 | * |
2466 | * For example if we want an event generated every 8 ticks (if we calculated |
2467 | * a bit_index of 3), we would want the event to be generated whenever the |
2468 | * lower four bits of the counter transition from 0b0111 -> 0b1000. We can |
2469 | * see that the bit at index 2 makes a falling transition in this scenario, |
2470 | * so we would want EVENTI to be 2 instead of 3. |
2471 | */ |
2472 | if (bit_index != 0) { |
2473 | bit_index--; |
2474 | } |
2475 | |
2476 | arm64_eventi = bit_index; |
2477 | } |
2478 | |
2479 | boolean_t |
2480 | ml_delay_should_spin(uint64_t interval) |
2481 | { |
2482 | cpu_data_t *cdp = getCpuDatap(); |
2483 | |
2484 | if (cdp->cpu_idle_latency) { |
2485 | return (interval < cdp->cpu_idle_latency) ? TRUE : FALSE; |
2486 | } else { |
2487 | /* |
2488 | * Early boot, latency is unknown. Err on the side of blocking, |
2489 | * which should always be safe, even if slow |
2490 | */ |
2491 | return FALSE; |
2492 | } |
2493 | } |
2494 | |
2495 | boolean_t |
2496 | ml_thread_is64bit(thread_t thread) |
2497 | { |
2498 | return thread_is_64bit_addr(thread); |
2499 | } |
2500 | |
2501 | void |
2502 | ml_delay_on_yield(void) |
2503 | { |
2504 | #if DEVELOPMENT || DEBUG |
2505 | if (yield_delay_us) { |
2506 | delay(yield_delay_us); |
2507 | } |
2508 | #endif |
2509 | } |
2510 | |
2511 | void |
2512 | ml_timer_evaluate(void) |
2513 | { |
2514 | } |
2515 | |
2516 | boolean_t |
2517 | ml_timer_forced_evaluation(void) |
2518 | { |
2519 | return FALSE; |
2520 | } |
2521 | |
2522 | void |
2523 | ml_gpu_stat_update(__unused uint64_t gpu_ns_delta) |
2524 | { |
2525 | /* |
2526 | * For now: update the resource coalition stats of the |
2527 | * current thread's coalition |
2528 | */ |
2529 | task_coalition_update_gpu_stats(task: current_task(), gpu_ns_delta); |
2530 | } |
2531 | |
2532 | uint64_t |
2533 | ml_gpu_stat(__unused thread_t t) |
2534 | { |
2535 | return 0; |
2536 | } |
2537 | |
2538 | thread_t |
2539 | current_thread(void) |
2540 | { |
2541 | return current_thread_fast(); |
2542 | } |
2543 | |
2544 | #if defined(HAS_APPLE_PAC) |
2545 | uint8_t |
2546 | ml_task_get_disable_user_jop(task_t task) |
2547 | { |
2548 | assert(task); |
2549 | return task->disable_user_jop; |
2550 | } |
2551 | |
2552 | void |
2553 | ml_task_set_disable_user_jop(task_t task, uint8_t disable_user_jop) |
2554 | { |
2555 | assert(task); |
2556 | task->disable_user_jop = disable_user_jop; |
2557 | } |
2558 | |
2559 | void |
2560 | ml_thread_set_disable_user_jop(thread_t thread, uint8_t disable_user_jop) |
2561 | { |
2562 | assert(thread); |
2563 | if (disable_user_jop) { |
2564 | thread->machine.arm_machine_flags |= ARM_MACHINE_THREAD_DISABLE_USER_JOP; |
2565 | } else { |
2566 | thread->machine.arm_machine_flags &= ~ARM_MACHINE_THREAD_DISABLE_USER_JOP; |
2567 | } |
2568 | } |
2569 | |
2570 | void |
2571 | ml_task_set_rop_pid(task_t task, task_t parent_task, boolean_t inherit) |
2572 | { |
2573 | if (inherit) { |
2574 | task->rop_pid = parent_task->rop_pid; |
2575 | } else { |
2576 | task->rop_pid = early_random(); |
2577 | } |
2578 | } |
2579 | |
2580 | /** |
2581 | * jop_pid may be inherited from the parent task or generated inside the shared |
2582 | * region. Unfortunately these two parameters are available at very different |
2583 | * times during task creation, so we need to split this into two steps. |
2584 | */ |
2585 | void |
2586 | ml_task_set_jop_pid(task_t task, task_t parent_task, boolean_t inherit, boolean_t disable_user_jop) |
2587 | { |
2588 | if (inherit) { |
2589 | task->jop_pid = parent_task->jop_pid; |
2590 | } else if (disable_user_jop) { |
2591 | task->jop_pid = ml_non_arm64e_user_jop_pid(); |
2592 | } else { |
2593 | task->jop_pid = ml_default_jop_pid(); |
2594 | } |
2595 | } |
2596 | |
2597 | void |
2598 | ml_task_set_jop_pid_from_shared_region(task_t task, boolean_t disable_user_jop) |
2599 | { |
2600 | if (disable_user_jop) { |
2601 | task->jop_pid = ml_non_arm64e_user_jop_pid(); |
2602 | return; |
2603 | } |
2604 | |
2605 | vm_shared_region_t sr = vm_shared_region_get(task); |
2606 | /* |
2607 | * If there's no shared region, we can assign the key arbitrarily. This |
2608 | * typically happens when Mach-O image activation failed part of the way |
2609 | * through, and this task is in the middle of dying with SIGKILL anyway. |
2610 | */ |
2611 | if (__improbable(!sr)) { |
2612 | task->jop_pid = early_random(); |
2613 | return; |
2614 | } |
2615 | vm_shared_region_deallocate(shared_region: sr); |
2616 | |
2617 | /* |
2618 | * Similarly we have to worry about jetsam having killed the task and |
2619 | * already cleared the shared_region_id. |
2620 | */ |
2621 | task_lock(task); |
2622 | if (task->shared_region_id != NULL) { |
2623 | task->jop_pid = shared_region_find_key(shared_region_id: task->shared_region_id); |
2624 | } else { |
2625 | task->jop_pid = early_random(); |
2626 | } |
2627 | task_unlock(task); |
2628 | } |
2629 | |
2630 | void |
2631 | ml_thread_set_jop_pid(thread_t thread, task_t task) |
2632 | { |
2633 | thread->machine.jop_pid = task->jop_pid; |
2634 | } |
2635 | #endif /* defined(HAS_APPLE_PAC) */ |
2636 | |
2637 | #if DEVELOPMENT || DEBUG |
2638 | static uint64_t minor_badness_suffered = 0; |
2639 | #endif |
2640 | void |
2641 | ml_report_minor_badness(uint32_t __unused badness_id) |
2642 | { |
2643 | #if DEVELOPMENT || DEBUG |
2644 | (void)os_atomic_or(&minor_badness_suffered, 1ULL << badness_id, relaxed); |
2645 | #endif |
2646 | } |
2647 | |
2648 | #if defined(HAS_APPLE_PAC) |
2649 | #if __ARM_ARCH_8_6__ || APPLEVIRTUALPLATFORM |
2650 | /** |
2651 | * The ARMv8.6 implementation is also safe for non-FPAC CPUs, but less efficient; |
2652 | * guest kernels need to use it because it does not know at compile time whether |
2653 | * the host CPU supports FPAC. |
2654 | */ |
2655 | |
2656 | /** |
2657 | * Emulates the poisoning done by ARMv8.3-PAuth instructions on auth failure. |
2658 | */ |
2659 | static void * |
2660 | ml_poison_ptr(void *ptr, ptrauth_key key) |
2661 | { |
2662 | bool b_key = key & (1ULL << 0); |
2663 | uint64_t error_code; |
2664 | if (b_key) { |
2665 | error_code = 2; |
2666 | } else { |
2667 | error_code = 1; |
2668 | } |
2669 | |
2670 | bool kernel_pointer = (uintptr_t)ptr & (1ULL << 55); |
2671 | bool data_key = key & (1ULL << 1); |
2672 | /* When PAC is enabled, only userspace data pointers use TBI, regardless of boot parameters */ |
2673 | bool tbi = data_key && !kernel_pointer; |
2674 | unsigned int poison_shift; |
2675 | if (tbi) { |
2676 | poison_shift = 53; |
2677 | } else { |
2678 | poison_shift = 61; |
2679 | } |
2680 | |
2681 | uintptr_t poisoned = (uintptr_t)ptr; |
2682 | poisoned &= ~(3ULL << poison_shift); |
2683 | poisoned |= error_code << poison_shift; |
2684 | return (void *)poisoned; |
2685 | } |
2686 | |
2687 | /* |
2688 | * ptrauth_sign_unauthenticated() reimplemented using asm volatile, forcing the |
2689 | * compiler to assume this operation has side-effects and cannot be reordered |
2690 | */ |
2691 | #define ptrauth_sign_volatile(__value, __suffix, __data) \ |
2692 | ({ \ |
2693 | void *__ret = __value; \ |
2694 | asm volatile ( \ |
2695 | "pac" #__suffix " %[value], %[data]" \ |
2696 | : [value] "+r"(__ret) \ |
2697 | : [data] "r"(__data) \ |
2698 | ); \ |
2699 | __ret; \ |
2700 | }) |
2701 | |
2702 | #define ml_auth_ptr_unchecked_for_key(_ptr, _suffix, _key, _modifier) \ |
2703 | do { \ |
2704 | void *stripped = ptrauth_strip(_ptr, _key); \ |
2705 | void *reauthed = ptrauth_sign_volatile(stripped, _suffix, _modifier); \ |
2706 | if (__probable(_ptr == reauthed)) { \ |
2707 | _ptr = stripped; \ |
2708 | } else { \ |
2709 | _ptr = ml_poison_ptr(stripped, _key); \ |
2710 | } \ |
2711 | } while (0) |
2712 | |
2713 | #define _ml_auth_ptr_unchecked(_ptr, _suffix, _modifier) \ |
2714 | ml_auth_ptr_unchecked_for_key(_ptr, _suffix, ptrauth_key_as ## _suffix, _modifier) |
2715 | #else |
2716 | #define _ml_auth_ptr_unchecked(_ptr, _suffix, _modifier) \ |
2717 | asm volatile ("aut" #_suffix " %[ptr], %[modifier]" : [ptr] "+r"(_ptr) : [modifier] "r"(_modifier)); |
2718 | #endif /* __ARM_ARCH_8_6__ || APPLEVIRTUALPLATFORM */ |
2719 | |
2720 | /** |
2721 | * Authenticates a signed pointer without trapping on failure. |
2722 | * |
2723 | * @warning This function must be called with interrupts disabled. |
2724 | * |
2725 | * @warning Pointer authentication failure should normally be treated as a fatal |
2726 | * error. This function is intended for a handful of callers that cannot panic |
2727 | * on failure, and that understand the risks in handling a poisoned return |
2728 | * value. Other code should generally use the trapping variant |
2729 | * ptrauth_auth_data() instead. |
2730 | * |
2731 | * @param ptr the pointer to authenticate |
2732 | * @param key which key to use for authentication |
2733 | * @param modifier a modifier to mix into the key |
2734 | * @return an authenticated version of ptr, possibly with poison bits set |
2735 | */ |
2736 | void * |
2737 | ml_auth_ptr_unchecked(void *ptr, ptrauth_key key, uint64_t modifier) |
2738 | { |
2739 | switch (key & 0x3) { |
2740 | case ptrauth_key_asia: |
2741 | _ml_auth_ptr_unchecked(ptr, ia, modifier); |
2742 | break; |
2743 | case ptrauth_key_asib: |
2744 | _ml_auth_ptr_unchecked(ptr, ib, modifier); |
2745 | break; |
2746 | case ptrauth_key_asda: |
2747 | _ml_auth_ptr_unchecked(ptr, da, modifier); |
2748 | break; |
2749 | case ptrauth_key_asdb: |
2750 | _ml_auth_ptr_unchecked(ptr, db, modifier); |
2751 | break; |
2752 | } |
2753 | |
2754 | return ptr; |
2755 | } |
2756 | #endif /* defined(HAS_APPLE_PAC) */ |
2757 | |
2758 | #ifdef CONFIG_XNUPOST |
2759 | void |
2760 | ml_expect_fault_begin(expected_fault_handler_t expected_fault_handler, uintptr_t expected_fault_addr) |
2761 | { |
2762 | thread_t thread = current_thread(); |
2763 | thread->machine.expected_fault_handler = expected_fault_handler; |
2764 | thread->machine.expected_fault_addr = expected_fault_addr; |
2765 | thread->machine.expected_fault_pc = 0; |
2766 | } |
2767 | |
2768 | /** Expect an exception to be thrown at EXPECTED_FAULT_PC */ |
2769 | void |
2770 | ml_expect_fault_pc_begin(expected_fault_handler_t expected_fault_handler, uintptr_t expected_fault_pc) |
2771 | { |
2772 | thread_t thread = current_thread(); |
2773 | thread->machine.expected_fault_handler = expected_fault_handler; |
2774 | thread->machine.expected_fault_addr = 0; |
2775 | uintptr_t raw_func = (uintptr_t)ptrauth_strip( |
2776 | (void *)expected_fault_pc, |
2777 | ptrauth_key_function_pointer); |
2778 | thread->machine.expected_fault_pc = raw_func; |
2779 | } |
2780 | |
2781 | void |
2782 | ml_expect_fault_end(void) |
2783 | { |
2784 | thread_t thread = current_thread(); |
2785 | thread->machine.expected_fault_handler = NULL; |
2786 | thread->machine.expected_fault_addr = 0; |
2787 | thread->machine.expected_fault_pc = 0; |
2788 | } |
2789 | #endif /* CONFIG_XNUPOST */ |
2790 | |
2791 | void |
2792 | ml_hibernate_active_pre(void) |
2793 | { |
2794 | #if HIBERNATION |
2795 | if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) { |
2796 | |
2797 | hibernate_rebuild_vm_structs(); |
2798 | } |
2799 | #endif /* HIBERNATION */ |
2800 | } |
2801 | |
2802 | void |
2803 | ml_hibernate_active_post(void) |
2804 | { |
2805 | #if HIBERNATION |
2806 | if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) { |
2807 | hibernate_machine_init(); |
2808 | hibernate_vm_lock_end(); |
2809 | current_cpu_datap()->cpu_hibernate = 0; |
2810 | } |
2811 | #endif /* HIBERNATION */ |
2812 | } |
2813 | |
2814 | /** |
2815 | * Return back a machine-dependent array of address space regions that should be |
2816 | * reserved by the VM (pre-mapped in the address space). This will prevent user |
2817 | * processes from allocating or deallocating from within these regions. |
2818 | * |
2819 | * @param vm_is64bit True if the process has a 64-bit address space. |
2820 | * @param regions An out parameter representing an array of regions to reserve. |
2821 | * |
2822 | * @return The number of reserved regions returned through `regions`. |
2823 | */ |
2824 | size_t |
2825 | ml_get_vm_reserved_regions(bool vm_is64bit, const struct vm_reserved_region **regions) |
2826 | { |
2827 | assert(regions != NULL); |
2828 | |
2829 | /** |
2830 | * Reserved regions only apply to 64-bit address spaces. This is because |
2831 | * we only expect to grow the maximum user VA address on 64-bit address spaces |
2832 | * (we've essentially already reached the max for 32-bit spaces). The reserved |
2833 | * regions should safely fall outside of the max user VA for 32-bit processes. |
2834 | */ |
2835 | if (vm_is64bit) { |
2836 | *regions = vm_reserved_regions; |
2837 | return ARRAY_COUNT(vm_reserved_regions); |
2838 | } else { |
2839 | /* Don't reserve any VA regions on arm64_32 processes. */ |
2840 | *regions = NULL; |
2841 | return 0; |
2842 | } |
2843 | } |
2844 | |
2845 | /* These WFE recommendations are expected to be updated on a relatively |
2846 | * infrequent cadence, possibly from a different cluster, hence |
2847 | * false cacheline sharing isn't expected to be material |
2848 | */ |
2849 | static uint64_t arm64_cluster_wfe_recs[MAX_CPU_CLUSTERS]; |
2850 | |
2851 | uint32_t |
2852 | ml_update_cluster_wfe_recommendation(uint32_t wfe_cluster_id, uint64_t wfe_timeout_abstime_interval, __unused uint64_t wfe_hint_flags) |
2853 | { |
2854 | assert(wfe_cluster_id < MAX_CPU_CLUSTERS); |
2855 | assert(wfe_timeout_abstime_interval <= ml_wfe_hint_max_interval); |
2856 | os_atomic_store(&arm64_cluster_wfe_recs[wfe_cluster_id], wfe_timeout_abstime_interval, relaxed); |
2857 | return 0; /* Success */ |
2858 | } |
2859 | |
2860 | #if DEVELOPMENT || DEBUG |
2861 | int wfe_rec_max = 0; |
2862 | int wfe_rec_none = 0; |
2863 | uint64_t wfe_rec_override_mat = 0; |
2864 | uint64_t wfe_rec_clamp = 0; |
2865 | #endif |
2866 | |
2867 | uint64_t |
2868 | ml_cluster_wfe_timeout(uint32_t wfe_cluster_id) |
2869 | { |
2870 | /* This and its consumer does not synchronize vis-a-vis updates |
2871 | * of the recommendation; races are acceptable. |
2872 | */ |
2873 | uint64_t wfet = os_atomic_load(&arm64_cluster_wfe_recs[wfe_cluster_id], relaxed); |
2874 | #if DEVELOPMENT || DEBUG |
2875 | if (wfe_rec_clamp) { |
2876 | wfet = MIN(wfe_rec_clamp, wfet); |
2877 | } |
2878 | |
2879 | if (wfe_rec_max) { |
2880 | for (int i = 0; i < MAX_CPU_CLUSTERS; i++) { |
2881 | if (arm64_cluster_wfe_recs[i] > wfet) { |
2882 | wfet = arm64_cluster_wfe_recs[i]; |
2883 | } |
2884 | } |
2885 | } |
2886 | |
2887 | if (wfe_rec_none) { |
2888 | wfet = 0; |
2889 | } |
2890 | |
2891 | if (wfe_rec_override_mat) { |
2892 | wfet = wfe_rec_override_mat; |
2893 | } |
2894 | #endif |
2895 | return wfet; |
2896 | } |
2897 | |
2898 | __pure2 bool |
2899 | ml_addr_in_non_xnu_stack(__unused uintptr_t addr) |
2900 | { |
2901 | #if CONFIG_SPTM |
2902 | /** |
2903 | * If the address is within one of the SPTM-allocated per-cpu stacks, then |
2904 | * return true. |
2905 | */ |
2906 | if ((addr >= SPTMArgs->cpu_stack_papt_start) && |
2907 | (addr < SPTMArgs->cpu_stack_papt_end)) { |
2908 | return true; |
2909 | } |
2910 | |
2911 | /** |
2912 | * If the address is within one of the TXM thread stacks, then return true. |
2913 | * The SPTM guarantees that these stacks are virtually contiguous. |
2914 | */ |
2915 | if ((addr >= SPTMArgs->txm_thread_stacks[0]) && |
2916 | (addr < SPTMArgs->txm_thread_stacks[MAX_CPUS - 1])) { |
2917 | return true; |
2918 | } |
2919 | |
2920 | return false; |
2921 | #elif XNU_MONITOR |
2922 | return (addr >= (uintptr_t)pmap_stacks_start) && (addr < (uintptr_t)pmap_stacks_end); |
2923 | #else |
2924 | return false; |
2925 | #endif /* CONFIG_SPTM || XNU_MONITOR */ |
2926 | } |
2927 | |
2928 | uint64_t |
2929 | ml_get_backtrace_pc(struct arm_saved_state *state) |
2930 | { |
2931 | assert((state != NULL) && is_saved_state64(state)); |
2932 | |
2933 | #if CONFIG_SPTM |
2934 | /** |
2935 | * On SPTM-based systems, when a non-XNU domain (e.g., SPTM) is interrupted, |
2936 | * the PC value saved into the state is not the actual PC at the interrupted |
2937 | * point, but a fixed value to a handler that knows how to re-enter the |
2938 | * interrupted domain. The interrupted domain's actual PC value is saved |
2939 | * into x14, so let's return that instead. |
2940 | */ |
2941 | if (ml_addr_in_non_xnu_stack(get_saved_state_fp(state))) { |
2942 | return saved_state64(state)->x[14]; |
2943 | } |
2944 | #endif /* CONFIG_SPTM */ |
2945 | |
2946 | return get_saved_state_pc(iss: state); |
2947 | } |
2948 | |
2949 | |
2950 | bool |
2951 | ml_paddr_is_exclaves_owned(vm_offset_t paddr) |
2952 | { |
2953 | #if CONFIG_SPTM |
2954 | const sptm_frame_type_t type = sptm_get_frame_type(paddr); |
2955 | return type == SK_DEFAULT || type == SK_IO; // SK_SHARED_R[OW] are not exclusively exclaves frames |
2956 | #else |
2957 | #pragma unused(paddr) |
2958 | return false; |
2959 | #endif /* CONFIG_SPTM */ |
2960 | } |
2961 | |
2962 | /** |
2963 | * Panic because an ARM saved-state accessor expected user saved-state but was |
2964 | * passed non-user saved-state. |
2965 | * |
2966 | * @param ss invalid saved-state (CPSR.M != EL0) |
2967 | */ |
2968 | void |
2969 | ml_panic_on_invalid_old_cpsr(const arm_saved_state_t *ss) |
2970 | { |
2971 | panic("invalid CPSR in user saved-state %p" , ss); |
2972 | } |
2973 | |
2974 | /** |
2975 | * Panic because an ARM saved-state accessor was passed user saved-state and |
2976 | * asked to assign a non-user CPSR. |
2977 | * |
2978 | * @param ss original EL0 saved-state |
2979 | * @param cpsr invalid new CPSR value (CPSR.M != EL0) |
2980 | */ |
2981 | void |
2982 | ml_panic_on_invalid_new_cpsr(const arm_saved_state_t *ss, uint32_t cpsr) |
2983 | { |
2984 | panic("attempt to set non-user CPSR %#010x on user saved-state %p" , cpsr, ss); |
2985 | } |
2986 | |
2987 | /** |
2988 | * Explicitly preallocates a floating point save area. |
2989 | * This is a noop on ARM because preallocation isn't required at this time. |
2990 | */ |
2991 | void |
2992 | ml_fp_save_area_prealloc(void) |
2993 | { |
2994 | } |
2995 | |