1/*
2 * Copyright (c) 2007 Apple Inc. All rights reserved.
3 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
4 *
5 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. The rights granted to you under the License
11 * may not be used to create, or enable the creation or redistribution of,
12 * unlawful or unlicensed copies of an Apple operating system, or to
13 * circumvent, violate, or enable the circumvention or violation of, any
14 * terms of an Apple operating system software license agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 *
19 * The Original Code and all software distributed under the License are
20 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
21 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
22 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
24 * Please see the License for the specific language governing rights and
25 * limitations under the License.
26 *
27 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 */
29/*
30 * @OSF_COPYRIGHT@
31 */
32/*
33 * @APPLE_FREE_COPYRIGHT@
34 */
35/*
36 * File: arm/commpage/commpage.c
37 * Purpose: Set up and export a RO/RW page
38 */
39#include <libkern/section_keywords.h>
40#include <mach/mach_types.h>
41#include <mach/machine.h>
42#include <mach/vm_map.h>
43#include <machine/cpu_capabilities.h>
44#include <machine/commpage.h>
45#include <machine/pmap.h>
46#include <vm/vm_kern.h>
47#include <vm/vm_map.h>
48#include <vm/vm_protos.h>
49#include <ipc/ipc_port.h>
50#include <arm/cpuid.h> /* for cpuid_info() & cache_info() */
51#include <arm/rtclock.h>
52#include <libkern/OSAtomic.h>
53#include <stdatomic.h>
54
55#include <sys/kdebug.h>
56
57#if CONFIG_ATM
58#include <atm/atm_internal.h>
59#endif
60
61static void commpage_init_cpu_capabilities( void );
62static int commpage_cpus( void );
63
64SECURITY_READ_ONLY_LATE(vm_address_t) commPagePtr=0;
65SECURITY_READ_ONLY_LATE(vm_address_t) sharedpage_rw_addr = 0;
66SECURITY_READ_ONLY_LATE(uint32_t) _cpu_capabilities = 0;
67
68/* For sysctl access from BSD side */
69extern int gARMv81Atomics;
70extern int gARMv8Crc32;
71
72void
73commpage_populate(
74 void)
75{
76 uint16_t c2;
77 int cpufamily;
78
79 sharedpage_rw_addr = pmap_create_sharedpage();
80 commPagePtr = (vm_address_t)_COMM_PAGE_BASE_ADDRESS;
81
82 *((uint16_t*)(_COMM_PAGE_VERSION+_COMM_PAGE_RW_OFFSET)) = (uint16_t) _COMM_PAGE_THIS_VERSION;
83
84 commpage_init_cpu_capabilities();
85 commpage_set_timestamp(0, 0, 0, 0, 0);
86
87 if (_cpu_capabilities & kCache32)
88 c2 = 32;
89 else if (_cpu_capabilities & kCache64)
90 c2 = 64;
91 else if (_cpu_capabilities & kCache128)
92 c2 = 128;
93 else
94 c2 = 0;
95
96 *((uint16_t*)(_COMM_PAGE_CACHE_LINESIZE+_COMM_PAGE_RW_OFFSET)) = c2;
97 *((uint32_t*)(_COMM_PAGE_SPIN_COUNT+_COMM_PAGE_RW_OFFSET)) = 1;
98
99 commpage_update_active_cpus();
100 cpufamily = cpuid_get_cpufamily();
101
102 /* machine_info valid after ml_get_max_cpus() */
103 *((uint8_t*)(_COMM_PAGE_PHYSICAL_CPUS+_COMM_PAGE_RW_OFFSET)) = (uint8_t) machine_info.physical_cpu_max;
104 *((uint8_t*)(_COMM_PAGE_LOGICAL_CPUS+_COMM_PAGE_RW_OFFSET))= (uint8_t) machine_info.logical_cpu_max;
105 *((uint64_t*)(_COMM_PAGE_MEMORY_SIZE+_COMM_PAGE_RW_OFFSET)) = machine_info.max_mem;
106 *((uint32_t*)(_COMM_PAGE_CPUFAMILY+_COMM_PAGE_RW_OFFSET)) = (uint32_t)cpufamily;
107 *((uint32_t*)(_COMM_PAGE_DEV_FIRM+_COMM_PAGE_RW_OFFSET)) = (uint32_t)PE_i_can_has_debugger(NULL);
108 *((uint8_t*)(_COMM_PAGE_USER_TIMEBASE+_COMM_PAGE_RW_OFFSET)) = user_timebase_allowed();
109 *((uint8_t*)(_COMM_PAGE_CONT_HWCLOCK+_COMM_PAGE_RW_OFFSET)) = user_cont_hwclock_allowed();
110 *((uint8_t*)(_COMM_PAGE_KERNEL_PAGE_SHIFT+_COMM_PAGE_RW_OFFSET)) = (uint8_t) page_shift;
111
112#if __arm64__
113 *((uint8_t*)(_COMM_PAGE_USER_PAGE_SHIFT_32+_COMM_PAGE_RW_OFFSET)) = (uint8_t) page_shift_user32;
114 *((uint8_t*)(_COMM_PAGE_USER_PAGE_SHIFT_64+_COMM_PAGE_RW_OFFSET)) = (uint8_t) SIXTEENK_PAGE_SHIFT;
115#elif (__ARM_ARCH_7K__ >= 2) && defined(PLATFORM_WatchOS)
116 /* enforce 16KB alignment for watch targets with new ABI */
117 *((uint8_t*)(_COMM_PAGE_USER_PAGE_SHIFT_32+_COMM_PAGE_RW_OFFSET)) = (uint8_t) SIXTEENK_PAGE_SHIFT;
118 *((uint8_t*)(_COMM_PAGE_USER_PAGE_SHIFT_64+_COMM_PAGE_RW_OFFSET)) = (uint8_t) SIXTEENK_PAGE_SHIFT;
119#else /* __arm64__ */
120 *((uint8_t*)(_COMM_PAGE_USER_PAGE_SHIFT_32+_COMM_PAGE_RW_OFFSET)) = (uint8_t) PAGE_SHIFT;
121 *((uint8_t*)(_COMM_PAGE_USER_PAGE_SHIFT_64+_COMM_PAGE_RW_OFFSET)) = (uint8_t) PAGE_SHIFT;
122#endif /* __arm64__ */
123
124 commpage_update_timebase();
125 commpage_update_mach_continuous_time(0);
126
127 clock_sec_t secs;
128 clock_usec_t microsecs;
129 clock_get_boottime_microtime(&secs, &microsecs);
130 commpage_update_boottime(secs * USEC_PER_SEC + microsecs);
131
132 /*
133 * set commpage approximate time to zero for initialization.
134 * scheduler shall populate correct value before running user thread
135 */
136 *((uint64_t *)(_COMM_PAGE_APPROX_TIME+ _COMM_PAGE_RW_OFFSET)) = 0;
137#ifdef CONFIG_MACH_APPROXIMATE_TIME
138 *((uint8_t *)(_COMM_PAGE_APPROX_TIME_SUPPORTED+_COMM_PAGE_RW_OFFSET)) = 1;
139#else
140 *((uint8_t *)(_COMM_PAGE_APPROX_TIME_SUPPORTED+_COMM_PAGE_RW_OFFSET)) = 0;
141#endif
142
143 commpage_update_kdebug_state();
144
145#if CONFIG_ATM
146 commpage_update_atm_diagnostic_config(atm_get_diagnostic_config());
147#endif
148
149}
150
151struct mu {
152 uint64_t m; // magic number
153 int32_t a; // add indicator
154 int32_t s; // shift amount
155};
156
157void
158commpage_set_timestamp(
159 uint64_t tbr,
160 uint64_t secs,
161 uint64_t frac,
162 uint64_t scale,
163 uint64_t tick_per_sec)
164{
165 new_commpage_timeofday_data_t *commpage_timeofday_datap;
166
167 if (commPagePtr == 0)
168 return;
169
170 commpage_timeofday_datap = (new_commpage_timeofday_data_t *)(_COMM_PAGE_NEWTIMEOFDAY_DATA+_COMM_PAGE_RW_OFFSET);
171
172 commpage_timeofday_datap->TimeStamp_tick = 0x0ULL;
173
174#if (__ARM_ARCH__ >= 7)
175 __asm__ volatile("dmb ish");
176#endif
177 commpage_timeofday_datap->TimeStamp_sec = secs;
178 commpage_timeofday_datap->TimeStamp_frac = frac;
179 commpage_timeofday_datap->Ticks_scale = scale;
180 commpage_timeofday_datap->Ticks_per_sec = tick_per_sec;
181
182#if (__ARM_ARCH__ >= 7)
183 __asm__ volatile("dmb ish");
184#endif
185 commpage_timeofday_datap->TimeStamp_tick = tbr;
186}
187
188/*
189 * Update _COMM_PAGE_MEMORY_PRESSURE. Called periodically from vm's compute_memory_pressure()
190 */
191
192void
193commpage_set_memory_pressure(
194 unsigned int pressure )
195{
196 if (commPagePtr == 0)
197 return;
198 *((uint32_t *)(_COMM_PAGE_MEMORY_PRESSURE+_COMM_PAGE_RW_OFFSET)) = pressure;
199}
200
201/*
202 * Update _COMM_PAGE_SPIN_COUNT. We might want to reduce when running on a battery, etc.
203 */
204
205void
206commpage_set_spin_count(
207 unsigned int count )
208{
209 if (count == 0) /* we test for 0 after decrement, not before */
210 count = 1;
211
212 if (commPagePtr == 0)
213 return;
214 *((uint32_t *)(_COMM_PAGE_SPIN_COUNT+_COMM_PAGE_RW_OFFSET)) = count;
215}
216
217/*
218 * Determine number of CPUs on this system.
219 */
220static int
221commpage_cpus( void )
222{
223 int cpus;
224
225 cpus = ml_get_max_cpus(); // NB: this call can block
226
227 if (cpus == 0)
228 panic("commpage cpus==0");
229 if (cpus > 0xFF)
230 cpus = 0xFF;
231
232 return cpus;
233}
234
235vm_address_t
236_get_commpage_priv_address(void)
237{
238 return sharedpage_rw_addr;
239}
240
241/*
242 * Initialize _cpu_capabilities vector
243 */
244static void
245commpage_init_cpu_capabilities( void )
246{
247 uint32_t bits;
248 int cpus;
249 ml_cpu_info_t cpu_info;
250
251 bits = 0;
252 ml_cpu_get_info(&cpu_info);
253
254 switch (cpu_info.cache_line_size) {
255 case 128:
256 bits |= kCache128;
257 break;
258 case 64:
259 bits |= kCache64;
260 break;
261 case 32:
262 bits |= kCache32;
263 break;
264 default:
265 break;
266 }
267 cpus = commpage_cpus();
268
269 if (cpus == 1)
270 bits |= kUP;
271
272 bits |= (cpus << kNumCPUsShift);
273
274 bits |= kFastThreadLocalStorage; // TPIDRURO for TLS
275
276#if __ARM_VFP__
277 bits |= kHasVfp;
278 arm_mvfp_info_t *mvfp_info = arm_mvfp_info();
279 if (mvfp_info->neon)
280 bits |= kHasNeon;
281 if (mvfp_info->neon_hpfp)
282 bits |= kHasNeonHPFP;
283 if (mvfp_info->neon_fp16)
284 bits |= kHasNeonFP16;
285#endif
286#if defined(__arm64__)
287 bits |= kHasFMA;
288#endif
289#if __ARM_ENABLE_WFE_
290#ifdef __arm64__
291 if (arm64_wfe_allowed()) {
292 bits |= kHasEvent;
293 }
294#else
295 bits |= kHasEvent;
296#endif
297#endif
298#if __ARM_V8_CRYPTO_EXTENSIONS__
299 bits |= kHasARMv8Crypto;
300#endif
301#ifdef __arm64__
302 uint64_t isar0 = __builtin_arm_rsr64("ID_AA64ISAR0_EL1");
303 if ((isar0 & ID_AA64ISAR0_EL1_ATOMIC_MASK) == ID_AA64ISAR0_EL1_ATOMIC_8_1) {
304 bits |= kHasARMv81Atomics;
305 gARMv81Atomics = 1;
306 }
307 if ((isar0 & ID_AA64ISAR0_EL1_CRC32_MASK) == ID_AA64ISAR0_EL1_CRC32_EN) {
308 bits |= kHasARMv8Crc32;
309 gARMv8Crc32 = 1;
310 }
311#endif
312 _cpu_capabilities = bits;
313
314 *((uint32_t *)(_COMM_PAGE_CPU_CAPABILITIES+_COMM_PAGE_RW_OFFSET)) = _cpu_capabilities;
315}
316
317/*
318 * Updated every time a logical CPU goes offline/online
319 */
320void
321commpage_update_active_cpus(void)
322{
323 if (!commPagePtr)
324 return;
325 *((uint8_t *)(_COMM_PAGE_ACTIVE_CPUS+_COMM_PAGE_RW_OFFSET)) = processor_avail_count;
326}
327
328/*
329 * Update the commpage bits for mach_absolute_time and mach_continuous_time (for userspace)
330 */
331void
332commpage_update_timebase(void)
333{
334 if (commPagePtr) {
335 *((uint64_t*)(_COMM_PAGE_TIMEBASE_OFFSET+_COMM_PAGE_RW_OFFSET)) = rtclock_base_abstime;
336 }
337}
338
339/*
340 * Update the commpage with current kdebug state. This currently has bits for
341 * global trace state, and typefilter enablement. It is likely additional state
342 * will be tracked in the future.
343 *
344 * INVARIANT: This value will always be 0 if global tracing is disabled. This
345 * allows simple guard tests of "if (*_COMM_PAGE_KDEBUG_ENABLE) { ... }"
346 */
347void
348commpage_update_kdebug_state(void)
349{
350 if (commPagePtr)
351 *((volatile uint32_t*)(_COMM_PAGE_KDEBUG_ENABLE+_COMM_PAGE_RW_OFFSET)) = kdebug_commpage_state();
352}
353
354/* Ditto for atm_diagnostic_config */
355void
356commpage_update_atm_diagnostic_config(uint32_t diagnostic_config)
357{
358 if (commPagePtr)
359 *((volatile uint32_t*)(_COMM_PAGE_ATM_DIAGNOSTIC_CONFIG+_COMM_PAGE_RW_OFFSET)) = diagnostic_config;
360}
361
362/*
363 * Update the commpage data with the state of multiuser mode for
364 * this device. Allowing various services in userspace to avoid
365 * IPC in the (more common) non-multiuser environment.
366 */
367void
368commpage_update_multiuser_config(uint32_t multiuser_config)
369{
370 if (commPagePtr)
371 *((volatile uint32_t *)(_COMM_PAGE_MULTIUSER_CONFIG+_COMM_PAGE_RW_OFFSET)) = multiuser_config;
372}
373
374/*
375 * update the commpage data for
376 * last known value of mach_absolute_time()
377 */
378
379void
380commpage_update_mach_approximate_time(uint64_t abstime)
381{
382#ifdef CONFIG_MACH_APPROXIMATE_TIME
383 uintptr_t approx_time_base = (uintptr_t)(_COMM_PAGE_APPROX_TIME + _COMM_PAGE_RW_OFFSET);
384 uint64_t saved_data;
385
386 if (commPagePtr) {
387 saved_data = atomic_load_explicit((_Atomic uint64_t *)approx_time_base,
388 memory_order_relaxed);
389 if (saved_data < abstime) {
390 /* ignoring the success/fail return value assuming that
391 * if the value has been updated since we last read it,
392 * "someone" has a newer timestamp than us and ours is
393 * now invalid. */
394 atomic_compare_exchange_strong_explicit((_Atomic uint64_t *)approx_time_base,
395 &saved_data, abstime, memory_order_relaxed, memory_order_relaxed);
396 }
397 }
398#else
399#pragma unused (abstime)
400#endif
401}
402
403/*
404 * update the commpage data's total system sleep time for
405 * userspace call to mach_continuous_time()
406 */
407void
408commpage_update_mach_continuous_time(uint64_t sleeptime)
409{
410 if (commPagePtr) {
411#ifdef __arm64__
412 *((uint64_t *)(_COMM_PAGE_CONT_TIMEBASE + _COMM_PAGE_RW_OFFSET)) = sleeptime;
413#else
414 uint64_t *c_time_base = (uint64_t *)(_COMM_PAGE_CONT_TIMEBASE + _COMM_PAGE_RW_OFFSET);
415 uint64_t old;
416 do {
417 old = *c_time_base;
418 } while(!OSCompareAndSwap64(old, sleeptime, c_time_base));
419#endif /* __arm64__ */
420 }
421}
422
423/*
424 * update the commpage's value for the boot time
425 */
426void
427commpage_update_boottime(uint64_t value)
428{
429 if (commPagePtr) {
430#ifdef __arm64__
431 *((uint64_t *)(_COMM_PAGE_BOOTTIME_USEC + _COMM_PAGE_RW_OFFSET)) = value;
432#else
433 uint64_t *cp = (uint64_t *)(_COMM_PAGE_BOOTTIME_USEC + _COMM_PAGE_RW_OFFSET);
434 uint64_t old_value;
435 do {
436 old_value = *cp;
437 } while (!OSCompareAndSwap64(old_value, value, cp));
438#endif /* __arm64__ */
439 }
440}
441
442
443/*
444 * After this counter has incremented, all running CPUs are guaranteed to
445 * have quiesced, i.e. executed serially dependent memory barriers.
446 * This is only tracked for CPUs running in userspace, therefore only useful
447 * outside the kernel.
448 *
449 * Note that you can't know which side of those barriers your read was from,
450 * so you have to observe 2 increments in order to ensure that you saw a
451 * serially dependent barrier chain across all running CPUs.
452 */
453uint64_t
454commpage_increment_cpu_quiescent_counter(void)
455{
456 if (!commPagePtr)
457 return 0;
458
459 uint64_t old_gen;
460
461 _Atomic uint64_t *sched_gen = (_Atomic uint64_t *)(_COMM_PAGE_CPU_QUIESCENT_COUNTER +
462 _COMM_PAGE_RW_OFFSET);
463 /*
464 * On 32bit architectures, double-wide atomic load or stores are a CAS,
465 * so the atomic increment is the most efficient way to increment the
466 * counter.
467 *
468 * On 64bit architectures however, because the update is synchronized by
469 * the cpu mask, relaxed loads and stores is more efficient.
470 */
471#if __LP64__
472 old_gen = atomic_load_explicit(sched_gen, memory_order_relaxed);
473 atomic_store_explicit(sched_gen, old_gen + 1, memory_order_relaxed);
474#else
475 old_gen = atomic_fetch_add_explicit(sched_gen, 1, memory_order_relaxed);
476#endif
477 return old_gen;
478}
479
480