1 | /* |
2 | * Copyright (c) 2007-2016 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * @OSF_COPYRIGHT@ |
30 | */ |
31 | |
32 | #include <pexpert/pexpert.h> |
33 | #include <arm/cpuid.h> |
34 | #include <arm/cpuid_internal.h> |
35 | #include <arm/cpu_data_internal.h> |
36 | #include <arm64/proc_reg.h> |
37 | #include <kern/lock_rw.h> |
38 | #include <vm/vm_page.h> |
39 | |
40 | #include <libkern/section_keywords.h> |
41 | |
42 | /* Temporary types to aid decoding, |
43 | * Everything in Little Endian */ |
44 | |
45 | typedef struct { |
46 | uint32_t |
47 | Ctype1:3, /* 2:0 */ |
48 | Ctype2:3, /* 5:3 */ |
49 | Ctype3:3, /* 8:6 */ |
50 | Ctypes:15, /* 6:23 - Don't Care */ |
51 | LoC:3, /* 26-24 - Level of Coherency */ |
52 | LoU:3, /* 29:27 - Level of Unification */ |
53 | RAZ:2; /* 31:30 - Read-As-Zero */ |
54 | } arm_cache_clidr_t; |
55 | |
56 | typedef union { |
57 | arm_cache_clidr_t bits; |
58 | uint32_t value; |
59 | } arm_cache_clidr_info_t; |
60 | |
61 | |
62 | typedef struct { |
63 | uint32_t |
64 | LineSize:3, /* 2:0 - Number of words in cache line */ |
65 | Assoc:10, /* 12:3 - Associativity of cache */ |
66 | NumSets:15, /* 27:13 - Number of sets in cache */ |
67 | c_type:4; /* 31:28 - Cache type */ |
68 | } arm_cache_ccsidr_t; |
69 | |
70 | |
71 | typedef union { |
72 | arm_cache_ccsidr_t bits; |
73 | uint32_t value; |
74 | } arm_cache_ccsidr_info_t; |
75 | |
76 | /* Statics */ |
77 | |
78 | static SECURITY_READ_ONLY_LATE(arm_cpu_info_t) cpuid_cpu_info; |
79 | static SECURITY_READ_ONLY_LATE(cache_info_t *) cpuid_cache_info_boot_cpu; |
80 | static cache_info_t cpuid_cache_info[MAX_CPU_TYPES] = { 0 }; |
81 | static _Atomic uint8_t cpuid_cache_info_bitmap = 0; |
82 | |
83 | static LCK_GRP_DECLARE(cpuid_grp, "cpuid" ); |
84 | static LCK_RW_DECLARE(cpuid_cache_info_lck_rw, &cpuid_grp); |
85 | |
86 | /* Code */ |
87 | |
88 | __private_extern__ |
89 | void |
90 | do_cpuid(void) |
91 | { |
92 | cpuid_cpu_info.value = machine_read_midr(); |
93 | #if (__ARM_ARCH__ == 8) |
94 | |
95 | #if defined(HAS_APPLE_PAC) |
96 | cpuid_cpu_info.arm_info.arm_arch = CPU_ARCH_ARMv8E; |
97 | #else /* defined(HAS_APPLE_PAC) */ |
98 | cpuid_cpu_info.arm_info.arm_arch = CPU_ARCH_ARMv8; |
99 | #endif /* defined(HAS_APPLE_PAC) */ |
100 | |
101 | #else /* (__ARM_ARCH__ != 8) */ |
102 | #error Unsupported arch |
103 | #endif /* (__ARM_ARCH__ != 8) */ |
104 | } |
105 | |
106 | arm_cpu_info_t * |
107 | cpuid_info(void) |
108 | { |
109 | return &cpuid_cpu_info; |
110 | } |
111 | |
112 | int |
113 | cpuid_get_cpufamily(void) |
114 | { |
115 | int cpufamily = 0; |
116 | |
117 | switch (cpuid_info()->arm_info.arm_implementor) { |
118 | case CPU_VID_ARM: |
119 | switch (cpuid_info()->arm_info.arm_part) { |
120 | case CPU_PART_CORTEXA9: |
121 | cpufamily = CPUFAMILY_ARM_14; |
122 | break; |
123 | case CPU_PART_CORTEXA8: |
124 | cpufamily = CPUFAMILY_ARM_13; |
125 | break; |
126 | case CPU_PART_CORTEXA7: |
127 | cpufamily = CPUFAMILY_ARM_15; |
128 | break; |
129 | case CPU_PART_1136JFS: |
130 | case CPU_PART_1176JZFS: |
131 | cpufamily = CPUFAMILY_ARM_11; |
132 | break; |
133 | case CPU_PART_926EJS: |
134 | case CPU_PART_920T: |
135 | cpufamily = CPUFAMILY_ARM_9; |
136 | break; |
137 | default: |
138 | cpufamily = CPUFAMILY_UNKNOWN; |
139 | break; |
140 | } |
141 | break; |
142 | |
143 | case CPU_VID_INTEL: |
144 | cpufamily = CPUFAMILY_ARM_XSCALE; |
145 | break; |
146 | |
147 | case CPU_VID_APPLE: |
148 | switch (cpuid_info()->arm_info.arm_part) { |
149 | case CPU_PART_TYPHOON: |
150 | case CPU_PART_TYPHOON_CAPRI: |
151 | cpufamily = CPUFAMILY_ARM_TYPHOON; |
152 | break; |
153 | case CPU_PART_TWISTER: |
154 | case CPU_PART_TWISTER_ELBA_MALTA: |
155 | cpufamily = CPUFAMILY_ARM_TWISTER; |
156 | break; |
157 | case CPU_PART_HURRICANE: |
158 | case CPU_PART_HURRICANE_MYST: |
159 | cpufamily = CPUFAMILY_ARM_HURRICANE; |
160 | break; |
161 | case CPU_PART_MONSOON: |
162 | case CPU_PART_MISTRAL: |
163 | cpufamily = CPUFAMILY_ARM_MONSOON_MISTRAL; |
164 | break; |
165 | case CPU_PART_VORTEX: |
166 | case CPU_PART_TEMPEST: |
167 | case CPU_PART_TEMPEST_M9: |
168 | case CPU_PART_VORTEX_ARUBA: |
169 | case CPU_PART_TEMPEST_ARUBA: |
170 | cpufamily = CPUFAMILY_ARM_VORTEX_TEMPEST; |
171 | break; |
172 | case CPU_PART_LIGHTNING: |
173 | case CPU_PART_THUNDER: |
174 | #ifndef RC_HIDE_XNU_FIRESTORM |
175 | case CPU_PART_THUNDER_M10: |
176 | #endif |
177 | cpufamily = CPUFAMILY_ARM_LIGHTNING_THUNDER; |
178 | break; |
179 | case CPU_PART_FIRESTORM_JADE_CHOP: |
180 | case CPU_PART_FIRESTORM_JADE_DIE: |
181 | case CPU_PART_ICESTORM_JADE_CHOP: |
182 | case CPU_PART_ICESTORM_JADE_DIE: |
183 | #ifndef RC_HIDE_XNU_FIRESTORM |
184 | case CPU_PART_FIRESTORM: |
185 | case CPU_PART_ICESTORM: |
186 | case CPU_PART_FIRESTORM_TONGA: |
187 | case CPU_PART_ICESTORM_TONGA: |
188 | cpufamily = CPUFAMILY_ARM_FIRESTORM_ICESTORM; |
189 | break; |
190 | #endif |
191 | default: |
192 | cpufamily = CPUFAMILY_UNKNOWN; |
193 | break; |
194 | } |
195 | break; |
196 | |
197 | default: |
198 | cpufamily = CPUFAMILY_UNKNOWN; |
199 | break; |
200 | } |
201 | |
202 | return cpufamily; |
203 | } |
204 | |
205 | int |
206 | cpuid_get_cpusubfamily(void) |
207 | { |
208 | int cpusubfamily = CPUSUBFAMILY_UNKNOWN; |
209 | |
210 | if (cpuid_info()->arm_info.arm_implementor != CPU_VID_APPLE) { |
211 | return cpusubfamily; |
212 | } |
213 | |
214 | switch (cpuid_info()->arm_info.arm_part) { |
215 | case CPU_PART_TYPHOON: |
216 | case CPU_PART_TWISTER: |
217 | case CPU_PART_HURRICANE: |
218 | case CPU_PART_MONSOON: |
219 | case CPU_PART_MISTRAL: |
220 | case CPU_PART_VORTEX: |
221 | case CPU_PART_TEMPEST: |
222 | case CPU_PART_LIGHTNING: |
223 | case CPU_PART_THUNDER: |
224 | #ifndef RC_HIDE_XNU_FIRESTORM |
225 | case CPU_PART_FIRESTORM: |
226 | case CPU_PART_ICESTORM: |
227 | #endif |
228 | cpusubfamily = CPUSUBFAMILY_ARM_HP; |
229 | break; |
230 | case CPU_PART_TYPHOON_CAPRI: |
231 | case CPU_PART_TWISTER_ELBA_MALTA: |
232 | case CPU_PART_HURRICANE_MYST: |
233 | case CPU_PART_VORTEX_ARUBA: |
234 | case CPU_PART_TEMPEST_ARUBA: |
235 | #ifndef RC_HIDE_XNU_FIRESTORM |
236 | case CPU_PART_FIRESTORM_TONGA: |
237 | case CPU_PART_ICESTORM_TONGA: |
238 | #endif |
239 | cpusubfamily = CPUSUBFAMILY_ARM_HG; |
240 | break; |
241 | case CPU_PART_TEMPEST_M9: |
242 | #ifndef RC_HIDE_XNU_FIRESTORM |
243 | case CPU_PART_THUNDER_M10: |
244 | #endif |
245 | cpusubfamily = CPUSUBFAMILY_ARM_M; |
246 | break; |
247 | case CPU_PART_FIRESTORM_JADE_CHOP: |
248 | case CPU_PART_ICESTORM_JADE_CHOP: |
249 | cpusubfamily = CPUSUBFAMILY_ARM_HS; |
250 | break; |
251 | case CPU_PART_FIRESTORM_JADE_DIE: |
252 | case CPU_PART_ICESTORM_JADE_DIE: |
253 | cpusubfamily = CPUSUBFAMILY_ARM_HC_HD; |
254 | break; |
255 | default: |
256 | cpusubfamily = CPUSUBFAMILY_UNKNOWN; |
257 | break; |
258 | } |
259 | |
260 | return cpusubfamily; |
261 | } |
262 | |
263 | void |
264 | do_debugid(void) |
265 | { |
266 | machine_do_debugid(); |
267 | } |
268 | |
269 | arm_debug_info_t * |
270 | arm_debug_info(void) |
271 | { |
272 | return machine_arm_debug_info(); |
273 | } |
274 | |
275 | void |
276 | do_mvfpid(void) |
277 | { |
278 | return machine_do_mvfpid(); |
279 | } |
280 | |
281 | arm_mvfp_info_t |
282 | * |
283 | arm_mvfp_info(void) |
284 | { |
285 | return machine_arm_mvfp_info(); |
286 | } |
287 | |
288 | void |
289 | do_cacheid(void) |
290 | { |
291 | arm_cache_clidr_info_t arm_cache_clidr_info; |
292 | arm_cache_ccsidr_info_t arm_cache_ccsidr_info; |
293 | |
294 | /* |
295 | * We only need to parse cache geometry parameters once per cluster type. |
296 | * Skip this if some other core of the same type has already parsed them. |
297 | */ |
298 | cluster_type_t cluster_type = ml_get_topology_info()->cpus[ml_get_cpu_number_local()].cluster_type; |
299 | uint8_t prev_cpuid_cache_info_bitmap = os_atomic_or_orig(&cpuid_cache_info_bitmap, |
300 | (uint8_t)(1 << cluster_type), acq_rel); |
301 | if (prev_cpuid_cache_info_bitmap & (1 << cluster_type)) { |
302 | return; |
303 | } |
304 | |
305 | cache_info_t *cpuid_cache_info_p = &cpuid_cache_info[cluster_type]; |
306 | |
307 | arm_cache_clidr_info.value = machine_read_clidr(); |
308 | |
309 | /* |
310 | * For compatibility purposes with existing callers, let's cache the boot CPU |
311 | * cache parameters and return those upon any call to cache_info(); |
312 | */ |
313 | if (prev_cpuid_cache_info_bitmap == 0) { |
314 | cpuid_cache_info_boot_cpu = cpuid_cache_info_p; |
315 | } |
316 | |
317 | /* Select L1 data/unified cache */ |
318 | |
319 | machine_write_csselr(level: CSSELR_L1, type: CSSELR_DATA_UNIFIED); |
320 | arm_cache_ccsidr_info.value = machine_read_ccsidr(); |
321 | |
322 | cpuid_cache_info_p->c_unified = (arm_cache_clidr_info.bits.Ctype1 == 0x4) ? 1 : 0; |
323 | |
324 | switch (arm_cache_ccsidr_info.bits.c_type) { |
325 | case 0x1: |
326 | cpuid_cache_info_p->c_type = CACHE_WRITE_ALLOCATION; |
327 | break; |
328 | case 0x2: |
329 | cpuid_cache_info_p->c_type = CACHE_READ_ALLOCATION; |
330 | break; |
331 | case 0x4: |
332 | cpuid_cache_info_p->c_type = CACHE_WRITE_BACK; |
333 | break; |
334 | case 0x8: |
335 | cpuid_cache_info_p->c_type = CACHE_WRITE_THROUGH; |
336 | break; |
337 | default: |
338 | cpuid_cache_info_p->c_type = CACHE_UNKNOWN; |
339 | } |
340 | |
341 | cpuid_cache_info_p->c_linesz = 4 * (1 << (arm_cache_ccsidr_info.bits.LineSize + 2)); |
342 | cpuid_cache_info_p->c_assoc = (arm_cache_ccsidr_info.bits.Assoc + 1); |
343 | |
344 | /* I cache size */ |
345 | cpuid_cache_info_p->c_isize = (arm_cache_ccsidr_info.bits.NumSets + 1) * cpuid_cache_info_p->c_linesz * cpuid_cache_info_p->c_assoc; |
346 | |
347 | /* D cache size */ |
348 | cpuid_cache_info_p->c_dsize = (arm_cache_ccsidr_info.bits.NumSets + 1) * cpuid_cache_info_p->c_linesz * cpuid_cache_info_p->c_assoc; |
349 | |
350 | |
351 | if ((arm_cache_clidr_info.bits.Ctype3 == 0x4) || |
352 | (arm_cache_clidr_info.bits.Ctype2 == 0x4) || (arm_cache_clidr_info.bits.Ctype2 == 0x2)) { |
353 | if (arm_cache_clidr_info.bits.Ctype3 == 0x4) { |
354 | /* Select L3 (LLC) if the SoC is new enough to have that. |
355 | * This will be the second-level cache for the highest-performing ACC. */ |
356 | machine_write_csselr(level: CSSELR_L3, type: CSSELR_DATA_UNIFIED); |
357 | } else { |
358 | /* Select L2 data cache */ |
359 | machine_write_csselr(level: CSSELR_L2, type: CSSELR_DATA_UNIFIED); |
360 | } |
361 | arm_cache_ccsidr_info.value = machine_read_ccsidr(); |
362 | |
363 | cpuid_cache_info_p->c_linesz = 4 * (1 << (arm_cache_ccsidr_info.bits.LineSize + 2)); |
364 | cpuid_cache_info_p->c_assoc = (arm_cache_ccsidr_info.bits.Assoc + 1); |
365 | cpuid_cache_info_p->c_l2size = (arm_cache_ccsidr_info.bits.NumSets + 1) * cpuid_cache_info_p->c_linesz * cpuid_cache_info_p->c_assoc; |
366 | cpuid_cache_info_p->c_inner_cache_size = cpuid_cache_info_p->c_dsize; |
367 | cpuid_cache_info_p->c_bulksize_op = cpuid_cache_info_p->c_l2size; |
368 | |
369 | /* capri has a 2MB L2 cache unlike every other SoC up to this |
370 | * point with a 1MB L2 cache, so to get the same performance |
371 | * gain from coloring, we have to double the number of colors. |
372 | * Note that in general (and in fact as it's implemented in |
373 | * i386/cpuid.c), the number of colors is calculated as the |
374 | * cache line size * the number of sets divided by the page |
375 | * size. Also note that for H8 devices and up, the page size |
376 | * will be 16k instead of 4, which will reduce the number of |
377 | * colors required. Thus, this is really a temporary solution |
378 | * for capri specifically that we may want to generalize later: |
379 | * |
380 | * TODO: Are there any special considerations for our unusual |
381 | * cache geometries (3MB)? |
382 | */ |
383 | vm_cache_geometry_colors = ((arm_cache_ccsidr_info.bits.NumSets + 1) * cpuid_cache_info_p->c_linesz) / PAGE_SIZE; |
384 | kprintf(fmt: " vm_cache_geometry_colors: %d\n" , vm_cache_geometry_colors); |
385 | } else { |
386 | cpuid_cache_info_p->c_l2size = 0; |
387 | |
388 | cpuid_cache_info_p->c_inner_cache_size = cpuid_cache_info_p->c_dsize; |
389 | cpuid_cache_info_p->c_bulksize_op = cpuid_cache_info_p->c_dsize; |
390 | } |
391 | |
392 | if (cpuid_cache_info_p->c_unified == 0) { |
393 | machine_write_csselr(level: CSSELR_L1, type: CSSELR_INSTR); |
394 | arm_cache_ccsidr_info.value = machine_read_ccsidr(); |
395 | uint32_t c_linesz = 4 * (1 << (arm_cache_ccsidr_info.bits.LineSize + 2)); |
396 | uint32_t c_assoc = (arm_cache_ccsidr_info.bits.Assoc + 1); |
397 | /* I cache size */ |
398 | cpuid_cache_info_p->c_isize = (arm_cache_ccsidr_info.bits.NumSets + 1) * c_linesz * c_assoc; |
399 | } |
400 | |
401 | if (cpuid_cache_info_p == cpuid_cache_info_boot_cpu) { |
402 | cpuid_cache_info_p->c_valid = true; |
403 | } else { |
404 | lck_rw_lock_exclusive(lck: &cpuid_cache_info_lck_rw); |
405 | cpuid_cache_info_p->c_valid = true; |
406 | thread_wakeup((event_t)&cpuid_cache_info_p->c_valid); |
407 | lck_rw_unlock_exclusive(lck: &cpuid_cache_info_lck_rw); |
408 | } |
409 | |
410 | kprintf(fmt: "%s() - %u bytes %s cache (I:%u D:%u (%s)), %u-way assoc, %u bytes/line\n" , |
411 | __FUNCTION__, |
412 | cpuid_cache_info_p->c_dsize + cpuid_cache_info_p->c_isize, |
413 | ((cpuid_cache_info_p->c_type == CACHE_WRITE_BACK) ? "WB" : |
414 | (cpuid_cache_info_p->c_type == CACHE_WRITE_THROUGH ? "WT" : "Unknown" )), |
415 | cpuid_cache_info_p->c_isize, |
416 | cpuid_cache_info_p->c_dsize, |
417 | (cpuid_cache_info_p->c_unified) ? "unified" : "separate" , |
418 | cpuid_cache_info_p->c_assoc, |
419 | cpuid_cache_info_p->c_linesz); |
420 | } |
421 | |
422 | cache_info_t * |
423 | cache_info(void) |
424 | { |
425 | return cpuid_cache_info_boot_cpu; |
426 | } |
427 | |
428 | cache_info_t * |
429 | cache_info_type(cluster_type_t cluster_type) |
430 | { |
431 | assert((cluster_type >= 0) && (cluster_type < MAX_CPU_TYPES)); |
432 | cache_info_t *ret = &cpuid_cache_info[cluster_type]; |
433 | |
434 | /* |
435 | * cpuid_cache_info_boot_cpu is always populated by the time |
436 | * cache_info_type() is callable. Other clusters may not have completed |
437 | * do_cacheid() yet. |
438 | */ |
439 | if (ret != cpuid_cache_info_boot_cpu) { |
440 | lck_rw_lock_shared(lck: &cpuid_cache_info_lck_rw); |
441 | while (CC_UNLIKELY(!ret->c_valid)) { |
442 | lck_rw_sleep(lck: &cpuid_cache_info_lck_rw, lck_sleep_action: LCK_SLEEP_DEFAULT, |
443 | event: (event_t)&ret->c_valid, THREAD_UNINT); |
444 | } |
445 | lck_rw_unlock_shared(lck: &cpuid_cache_info_lck_rw); |
446 | } |
447 | |
448 | return ret; |
449 | } |
450 | |