1 | /* |
2 | * Copyright (c) 2007 Apple Inc. All rights reserved. |
3 | * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. |
4 | * |
5 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
6 | * |
7 | * This file contains Original Code and/or Modifications of Original Code |
8 | * as defined in and that are subject to the Apple Public Source License |
9 | * Version 2.0 (the 'License'). You may not use this file except in |
10 | * compliance with the License. The rights granted to you under the License |
11 | * may not be used to create, or enable the creation or redistribution of, |
12 | * unlawful or unlicensed copies of an Apple operating system, or to |
13 | * circumvent, violate, or enable the circumvention or violation of, any |
14 | * terms of an Apple operating system software license agreement. |
15 | * |
16 | * Please obtain a copy of the License at |
17 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
18 | * |
19 | * The Original Code and all software distributed under the License are |
20 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
21 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
22 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
23 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
24 | * Please see the License for the specific language governing rights and |
25 | * limitations under the License. |
26 | * |
27 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
28 | */ |
29 | /* |
30 | * @OSF_COPYRIGHT@ |
31 | */ |
32 | /* |
33 | * @APPLE_FREE_COPYRIGHT@ |
34 | */ |
35 | /* |
36 | * File: arm/commpage/commpage.c |
37 | * Purpose: Set up and export a RO/RW page |
38 | */ |
39 | #include <libkern/section_keywords.h> |
40 | #include <mach/mach_types.h> |
41 | #include <mach/machine.h> |
42 | #include <mach/vm_map.h> |
43 | #include <machine/cpu_capabilities.h> |
44 | #include <machine/commpage.h> |
45 | #include <machine/pmap.h> |
46 | #include <vm/vm_kern.h> |
47 | #include <vm/vm_map.h> |
48 | #include <vm/vm_protos.h> |
49 | #include <ipc/ipc_port.h> |
50 | #include <arm/cpuid.h> /* for cpuid_info() & cache_info() */ |
51 | #include <arm/rtclock.h> |
52 | #include <libkern/OSAtomic.h> |
53 | #include <stdatomic.h> |
54 | |
55 | #include <sys/kdebug.h> |
56 | |
57 | #if CONFIG_ATM |
58 | #include <atm/atm_internal.h> |
59 | #endif |
60 | |
61 | static void commpage_init_cpu_capabilities( void ); |
62 | static int commpage_cpus( void ); |
63 | |
64 | SECURITY_READ_ONLY_LATE(vm_address_t) commPagePtr=0; |
65 | SECURITY_READ_ONLY_LATE(vm_address_t) sharedpage_rw_addr = 0; |
66 | SECURITY_READ_ONLY_LATE(uint32_t) _cpu_capabilities = 0; |
67 | |
68 | /* For sysctl access from BSD side */ |
69 | extern int gARMv81Atomics; |
70 | extern int gARMv8Crc32; |
71 | |
72 | void |
73 | commpage_populate( |
74 | void) |
75 | { |
76 | uint16_t c2; |
77 | int cpufamily; |
78 | |
79 | sharedpage_rw_addr = pmap_create_sharedpage(); |
80 | commPagePtr = (vm_address_t)_COMM_PAGE_BASE_ADDRESS; |
81 | |
82 | *((uint16_t*)(_COMM_PAGE_VERSION+_COMM_PAGE_RW_OFFSET)) = (uint16_t) _COMM_PAGE_THIS_VERSION; |
83 | |
84 | commpage_init_cpu_capabilities(); |
85 | commpage_set_timestamp(0, 0, 0, 0, 0); |
86 | |
87 | if (_cpu_capabilities & kCache32) |
88 | c2 = 32; |
89 | else if (_cpu_capabilities & kCache64) |
90 | c2 = 64; |
91 | else if (_cpu_capabilities & kCache128) |
92 | c2 = 128; |
93 | else |
94 | c2 = 0; |
95 | |
96 | *((uint16_t*)(_COMM_PAGE_CACHE_LINESIZE+_COMM_PAGE_RW_OFFSET)) = c2; |
97 | *((uint32_t*)(_COMM_PAGE_SPIN_COUNT+_COMM_PAGE_RW_OFFSET)) = 1; |
98 | |
99 | commpage_update_active_cpus(); |
100 | cpufamily = cpuid_get_cpufamily(); |
101 | |
102 | /* machine_info valid after ml_get_max_cpus() */ |
103 | *((uint8_t*)(_COMM_PAGE_PHYSICAL_CPUS+_COMM_PAGE_RW_OFFSET)) = (uint8_t) machine_info.physical_cpu_max; |
104 | *((uint8_t*)(_COMM_PAGE_LOGICAL_CPUS+_COMM_PAGE_RW_OFFSET))= (uint8_t) machine_info.logical_cpu_max; |
105 | *((uint64_t*)(_COMM_PAGE_MEMORY_SIZE+_COMM_PAGE_RW_OFFSET)) = machine_info.max_mem; |
106 | *((uint32_t*)(_COMM_PAGE_CPUFAMILY+_COMM_PAGE_RW_OFFSET)) = (uint32_t)cpufamily; |
107 | *((uint32_t*)(_COMM_PAGE_DEV_FIRM+_COMM_PAGE_RW_OFFSET)) = (uint32_t)PE_i_can_has_debugger(NULL); |
108 | *((uint8_t*)(_COMM_PAGE_USER_TIMEBASE+_COMM_PAGE_RW_OFFSET)) = user_timebase_allowed(); |
109 | *((uint8_t*)(_COMM_PAGE_CONT_HWCLOCK+_COMM_PAGE_RW_OFFSET)) = user_cont_hwclock_allowed(); |
110 | *((uint8_t*)(_COMM_PAGE_KERNEL_PAGE_SHIFT+_COMM_PAGE_RW_OFFSET)) = (uint8_t) page_shift; |
111 | |
112 | #if __arm64__ |
113 | *((uint8_t*)(_COMM_PAGE_USER_PAGE_SHIFT_32+_COMM_PAGE_RW_OFFSET)) = (uint8_t) page_shift_user32; |
114 | *((uint8_t*)(_COMM_PAGE_USER_PAGE_SHIFT_64+_COMM_PAGE_RW_OFFSET)) = (uint8_t) SIXTEENK_PAGE_SHIFT; |
115 | #elif (__ARM_ARCH_7K__ >= 2) && defined(PLATFORM_WatchOS) |
116 | /* enforce 16KB alignment for watch targets with new ABI */ |
117 | *((uint8_t*)(_COMM_PAGE_USER_PAGE_SHIFT_32+_COMM_PAGE_RW_OFFSET)) = (uint8_t) SIXTEENK_PAGE_SHIFT; |
118 | *((uint8_t*)(_COMM_PAGE_USER_PAGE_SHIFT_64+_COMM_PAGE_RW_OFFSET)) = (uint8_t) SIXTEENK_PAGE_SHIFT; |
119 | #else /* __arm64__ */ |
120 | *((uint8_t*)(_COMM_PAGE_USER_PAGE_SHIFT_32+_COMM_PAGE_RW_OFFSET)) = (uint8_t) PAGE_SHIFT; |
121 | *((uint8_t*)(_COMM_PAGE_USER_PAGE_SHIFT_64+_COMM_PAGE_RW_OFFSET)) = (uint8_t) PAGE_SHIFT; |
122 | #endif /* __arm64__ */ |
123 | |
124 | commpage_update_timebase(); |
125 | commpage_update_mach_continuous_time(0); |
126 | |
127 | clock_sec_t secs; |
128 | clock_usec_t microsecs; |
129 | clock_get_boottime_microtime(&secs, µsecs); |
130 | commpage_update_boottime(secs * USEC_PER_SEC + microsecs); |
131 | |
132 | /* |
133 | * set commpage approximate time to zero for initialization. |
134 | * scheduler shall populate correct value before running user thread |
135 | */ |
136 | *((uint64_t *)(_COMM_PAGE_APPROX_TIME+ _COMM_PAGE_RW_OFFSET)) = 0; |
137 | #ifdef CONFIG_MACH_APPROXIMATE_TIME |
138 | *((uint8_t *)(_COMM_PAGE_APPROX_TIME_SUPPORTED+_COMM_PAGE_RW_OFFSET)) = 1; |
139 | #else |
140 | *((uint8_t *)(_COMM_PAGE_APPROX_TIME_SUPPORTED+_COMM_PAGE_RW_OFFSET)) = 0; |
141 | #endif |
142 | |
143 | commpage_update_kdebug_state(); |
144 | |
145 | #if CONFIG_ATM |
146 | commpage_update_atm_diagnostic_config(atm_get_diagnostic_config()); |
147 | #endif |
148 | |
149 | } |
150 | |
151 | struct mu { |
152 | uint64_t m; // magic number |
153 | int32_t a; // add indicator |
154 | int32_t s; // shift amount |
155 | }; |
156 | |
157 | void |
158 | commpage_set_timestamp( |
159 | uint64_t tbr, |
160 | uint64_t secs, |
161 | uint64_t frac, |
162 | uint64_t scale, |
163 | uint64_t tick_per_sec) |
164 | { |
165 | new_commpage_timeofday_data_t *commpage_timeofday_datap; |
166 | |
167 | if (commPagePtr == 0) |
168 | return; |
169 | |
170 | commpage_timeofday_datap = (new_commpage_timeofday_data_t *)(_COMM_PAGE_NEWTIMEOFDAY_DATA+_COMM_PAGE_RW_OFFSET); |
171 | |
172 | commpage_timeofday_datap->TimeStamp_tick = 0x0ULL; |
173 | |
174 | #if (__ARM_ARCH__ >= 7) |
175 | __asm__ volatile("dmb ish" ); |
176 | #endif |
177 | commpage_timeofday_datap->TimeStamp_sec = secs; |
178 | commpage_timeofday_datap->TimeStamp_frac = frac; |
179 | commpage_timeofday_datap->Ticks_scale = scale; |
180 | commpage_timeofday_datap->Ticks_per_sec = tick_per_sec; |
181 | |
182 | #if (__ARM_ARCH__ >= 7) |
183 | __asm__ volatile("dmb ish" ); |
184 | #endif |
185 | commpage_timeofday_datap->TimeStamp_tick = tbr; |
186 | } |
187 | |
188 | /* |
189 | * Update _COMM_PAGE_MEMORY_PRESSURE. Called periodically from vm's compute_memory_pressure() |
190 | */ |
191 | |
192 | void |
193 | commpage_set_memory_pressure( |
194 | unsigned int pressure ) |
195 | { |
196 | if (commPagePtr == 0) |
197 | return; |
198 | *((uint32_t *)(_COMM_PAGE_MEMORY_PRESSURE+_COMM_PAGE_RW_OFFSET)) = pressure; |
199 | } |
200 | |
201 | /* |
202 | * Update _COMM_PAGE_SPIN_COUNT. We might want to reduce when running on a battery, etc. |
203 | */ |
204 | |
205 | void |
206 | commpage_set_spin_count( |
207 | unsigned int count ) |
208 | { |
209 | if (count == 0) /* we test for 0 after decrement, not before */ |
210 | count = 1; |
211 | |
212 | if (commPagePtr == 0) |
213 | return; |
214 | *((uint32_t *)(_COMM_PAGE_SPIN_COUNT+_COMM_PAGE_RW_OFFSET)) = count; |
215 | } |
216 | |
217 | /* |
218 | * Determine number of CPUs on this system. |
219 | */ |
220 | static int |
221 | commpage_cpus( void ) |
222 | { |
223 | int cpus; |
224 | |
225 | cpus = ml_get_max_cpus(); // NB: this call can block |
226 | |
227 | if (cpus == 0) |
228 | panic("commpage cpus==0" ); |
229 | if (cpus > 0xFF) |
230 | cpus = 0xFF; |
231 | |
232 | return cpus; |
233 | } |
234 | |
235 | vm_address_t |
236 | _get_commpage_priv_address(void) |
237 | { |
238 | return sharedpage_rw_addr; |
239 | } |
240 | |
241 | /* |
242 | * Initialize _cpu_capabilities vector |
243 | */ |
244 | static void |
245 | commpage_init_cpu_capabilities( void ) |
246 | { |
247 | uint32_t bits; |
248 | int cpus; |
249 | ml_cpu_info_t cpu_info; |
250 | |
251 | bits = 0; |
252 | ml_cpu_get_info(&cpu_info); |
253 | |
254 | switch (cpu_info.cache_line_size) { |
255 | case 128: |
256 | bits |= kCache128; |
257 | break; |
258 | case 64: |
259 | bits |= kCache64; |
260 | break; |
261 | case 32: |
262 | bits |= kCache32; |
263 | break; |
264 | default: |
265 | break; |
266 | } |
267 | cpus = commpage_cpus(); |
268 | |
269 | if (cpus == 1) |
270 | bits |= kUP; |
271 | |
272 | bits |= (cpus << kNumCPUsShift); |
273 | |
274 | bits |= kFastThreadLocalStorage; // TPIDRURO for TLS |
275 | |
276 | #if __ARM_VFP__ |
277 | bits |= kHasVfp; |
278 | arm_mvfp_info_t *mvfp_info = arm_mvfp_info(); |
279 | if (mvfp_info->neon) |
280 | bits |= kHasNeon; |
281 | if (mvfp_info->neon_hpfp) |
282 | bits |= kHasNeonHPFP; |
283 | if (mvfp_info->neon_fp16) |
284 | bits |= kHasNeonFP16; |
285 | #endif |
286 | #if defined(__arm64__) |
287 | bits |= kHasFMA; |
288 | #endif |
289 | #if __ARM_ENABLE_WFE_ |
290 | #ifdef __arm64__ |
291 | if (arm64_wfe_allowed()) { |
292 | bits |= kHasEvent; |
293 | } |
294 | #else |
295 | bits |= kHasEvent; |
296 | #endif |
297 | #endif |
298 | #if __ARM_V8_CRYPTO_EXTENSIONS__ |
299 | bits |= kHasARMv8Crypto; |
300 | #endif |
301 | #ifdef __arm64__ |
302 | uint64_t isar0 = __builtin_arm_rsr64("ID_AA64ISAR0_EL1" ); |
303 | if ((isar0 & ID_AA64ISAR0_EL1_ATOMIC_MASK) == ID_AA64ISAR0_EL1_ATOMIC_8_1) { |
304 | bits |= kHasARMv81Atomics; |
305 | gARMv81Atomics = 1; |
306 | } |
307 | if ((isar0 & ID_AA64ISAR0_EL1_CRC32_MASK) == ID_AA64ISAR0_EL1_CRC32_EN) { |
308 | bits |= kHasARMv8Crc32; |
309 | gARMv8Crc32 = 1; |
310 | } |
311 | #endif |
312 | _cpu_capabilities = bits; |
313 | |
314 | *((uint32_t *)(_COMM_PAGE_CPU_CAPABILITIES+_COMM_PAGE_RW_OFFSET)) = _cpu_capabilities; |
315 | } |
316 | |
317 | /* |
318 | * Updated every time a logical CPU goes offline/online |
319 | */ |
320 | void |
321 | commpage_update_active_cpus(void) |
322 | { |
323 | if (!commPagePtr) |
324 | return; |
325 | *((uint8_t *)(_COMM_PAGE_ACTIVE_CPUS+_COMM_PAGE_RW_OFFSET)) = processor_avail_count; |
326 | } |
327 | |
328 | /* |
329 | * Update the commpage bits for mach_absolute_time and mach_continuous_time (for userspace) |
330 | */ |
331 | void |
332 | commpage_update_timebase(void) |
333 | { |
334 | if (commPagePtr) { |
335 | *((uint64_t*)(_COMM_PAGE_TIMEBASE_OFFSET+_COMM_PAGE_RW_OFFSET)) = rtclock_base_abstime; |
336 | } |
337 | } |
338 | |
339 | /* |
340 | * Update the commpage with current kdebug state. This currently has bits for |
341 | * global trace state, and typefilter enablement. It is likely additional state |
342 | * will be tracked in the future. |
343 | * |
344 | * INVARIANT: This value will always be 0 if global tracing is disabled. This |
345 | * allows simple guard tests of "if (*_COMM_PAGE_KDEBUG_ENABLE) { ... }" |
346 | */ |
347 | void |
348 | commpage_update_kdebug_state(void) |
349 | { |
350 | if (commPagePtr) |
351 | *((volatile uint32_t*)(_COMM_PAGE_KDEBUG_ENABLE+_COMM_PAGE_RW_OFFSET)) = kdebug_commpage_state(); |
352 | } |
353 | |
354 | /* Ditto for atm_diagnostic_config */ |
355 | void |
356 | commpage_update_atm_diagnostic_config(uint32_t diagnostic_config) |
357 | { |
358 | if (commPagePtr) |
359 | *((volatile uint32_t*)(_COMM_PAGE_ATM_DIAGNOSTIC_CONFIG+_COMM_PAGE_RW_OFFSET)) = diagnostic_config; |
360 | } |
361 | |
362 | /* |
363 | * Update the commpage data with the state of multiuser mode for |
364 | * this device. Allowing various services in userspace to avoid |
365 | * IPC in the (more common) non-multiuser environment. |
366 | */ |
367 | void |
368 | commpage_update_multiuser_config(uint32_t multiuser_config) |
369 | { |
370 | if (commPagePtr) |
371 | *((volatile uint32_t *)(_COMM_PAGE_MULTIUSER_CONFIG+_COMM_PAGE_RW_OFFSET)) = multiuser_config; |
372 | } |
373 | |
374 | /* |
375 | * update the commpage data for |
376 | * last known value of mach_absolute_time() |
377 | */ |
378 | |
379 | void |
380 | commpage_update_mach_approximate_time(uint64_t abstime) |
381 | { |
382 | #ifdef CONFIG_MACH_APPROXIMATE_TIME |
383 | uintptr_t approx_time_base = (uintptr_t)(_COMM_PAGE_APPROX_TIME + _COMM_PAGE_RW_OFFSET); |
384 | uint64_t saved_data; |
385 | |
386 | if (commPagePtr) { |
387 | saved_data = atomic_load_explicit((_Atomic uint64_t *)approx_time_base, |
388 | memory_order_relaxed); |
389 | if (saved_data < abstime) { |
390 | /* ignoring the success/fail return value assuming that |
391 | * if the value has been updated since we last read it, |
392 | * "someone" has a newer timestamp than us and ours is |
393 | * now invalid. */ |
394 | atomic_compare_exchange_strong_explicit((_Atomic uint64_t *)approx_time_base, |
395 | &saved_data, abstime, memory_order_relaxed, memory_order_relaxed); |
396 | } |
397 | } |
398 | #else |
399 | #pragma unused (abstime) |
400 | #endif |
401 | } |
402 | |
403 | /* |
404 | * update the commpage data's total system sleep time for |
405 | * userspace call to mach_continuous_time() |
406 | */ |
407 | void |
408 | commpage_update_mach_continuous_time(uint64_t sleeptime) |
409 | { |
410 | if (commPagePtr) { |
411 | #ifdef __arm64__ |
412 | *((uint64_t *)(_COMM_PAGE_CONT_TIMEBASE + _COMM_PAGE_RW_OFFSET)) = sleeptime; |
413 | #else |
414 | uint64_t *c_time_base = (uint64_t *)(_COMM_PAGE_CONT_TIMEBASE + _COMM_PAGE_RW_OFFSET); |
415 | uint64_t old; |
416 | do { |
417 | old = *c_time_base; |
418 | } while(!OSCompareAndSwap64(old, sleeptime, c_time_base)); |
419 | #endif /* __arm64__ */ |
420 | } |
421 | } |
422 | |
423 | /* |
424 | * update the commpage's value for the boot time |
425 | */ |
426 | void |
427 | commpage_update_boottime(uint64_t value) |
428 | { |
429 | if (commPagePtr) { |
430 | #ifdef __arm64__ |
431 | *((uint64_t *)(_COMM_PAGE_BOOTTIME_USEC + _COMM_PAGE_RW_OFFSET)) = value; |
432 | #else |
433 | uint64_t *cp = (uint64_t *)(_COMM_PAGE_BOOTTIME_USEC + _COMM_PAGE_RW_OFFSET); |
434 | uint64_t old_value; |
435 | do { |
436 | old_value = *cp; |
437 | } while (!OSCompareAndSwap64(old_value, value, cp)); |
438 | #endif /* __arm64__ */ |
439 | } |
440 | } |
441 | |
442 | |
443 | /* |
444 | * After this counter has incremented, all running CPUs are guaranteed to |
445 | * have quiesced, i.e. executed serially dependent memory barriers. |
446 | * This is only tracked for CPUs running in userspace, therefore only useful |
447 | * outside the kernel. |
448 | * |
449 | * Note that you can't know which side of those barriers your read was from, |
450 | * so you have to observe 2 increments in order to ensure that you saw a |
451 | * serially dependent barrier chain across all running CPUs. |
452 | */ |
453 | uint64_t |
454 | commpage_increment_cpu_quiescent_counter(void) |
455 | { |
456 | if (!commPagePtr) |
457 | return 0; |
458 | |
459 | uint64_t old_gen; |
460 | |
461 | _Atomic uint64_t *sched_gen = (_Atomic uint64_t *)(_COMM_PAGE_CPU_QUIESCENT_COUNTER + |
462 | _COMM_PAGE_RW_OFFSET); |
463 | /* |
464 | * On 32bit architectures, double-wide atomic load or stores are a CAS, |
465 | * so the atomic increment is the most efficient way to increment the |
466 | * counter. |
467 | * |
468 | * On 64bit architectures however, because the update is synchronized by |
469 | * the cpu mask, relaxed loads and stores is more efficient. |
470 | */ |
471 | #if __LP64__ |
472 | old_gen = atomic_load_explicit(sched_gen, memory_order_relaxed); |
473 | atomic_store_explicit(sched_gen, old_gen + 1, memory_order_relaxed); |
474 | #else |
475 | old_gen = atomic_fetch_add_explicit(sched_gen, 1, memory_order_relaxed); |
476 | #endif |
477 | return old_gen; |
478 | } |
479 | |
480 | |