machine_routines_common.c source code [xnu/osfmk/arm/machine_routines_common.c]

1	/*
2	* Copyright (c) 2007-2021 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	#include <arm/machine_cpu.h>
30	#include <arm/cpu_internal.h>
31	#include <arm/cpuid.h>
32	#include <arm/cpuid_internal.h>
33	#include <arm/cpu_data.h>
34	#include <arm/cpu_data_internal.h>
35	#include <arm/misc_protos.h>
36	#include <arm/machdep_call.h>
37	#include <arm/machine_routines.h>
38	#include <arm/rtclock.h>
39	#include <kern/machine.h>
40	#include <kern/thread.h>
41	#include <kern/thread_group.h>
42	#include <kern/policy_internal.h>
43	#include <kern/sched_hygiene.h>
44	#include <kern/startup.h>
45	#include <kern/monotonic.h>
46	#include <machine/config.h>
47	#include <machine/atomic.h>
48	#include <machine/monotonic.h>
49	#include <pexpert/pexpert.h>
50	#include <pexpert/device_tree.h>
51
52	#include <mach/machine.h>
53	#include <mach/machine/sdt.h>
54
55	#if !HAS_CONTINUOUS_HWCLOCK
56	extern uint64_t mach_absolutetime_asleep;
57	#else
58	extern uint64_t wake_abstime;
59	static uint64_t wake_conttime = UINT64_MAX;
60	#endif
61
62	extern volatile uint32_t debug_enabled;
63	extern _Atomic unsigned int cluster_type_num_active_cpus[MAX_CPU_TYPES];
64	const char *cluster_type_names[MAX_CPU_TYPES] = {
65	[CLUSTER_TYPE_SMP] = "Standard",
66	[CLUSTER_TYPE_P] = "Performance",
67	[CLUSTER_TYPE_E] = "Efficiency",
68	};
69
70	static int max_cpus_initialized = `0`;
71	#define MAX_CPUS_SET 0x1
72	#define MAX_CPUS_WAIT 0x2
73
74	LCK_GRP_DECLARE(max_cpus_grp, "max_cpus");
75	LCK_MTX_DECLARE(max_cpus_lock, &max_cpus_grp);
76	uint32_t lockdown_done = `0`;
77	boolean_t is_clock_configured = FALSE;
78
79
80	static void
81	sched_perfcontrol_oncore_default(perfcontrol_state_t new_thread_state __unused, going_on_core_t on __unused)
82	{
83	}
84
85	static void
86	sched_perfcontrol_switch_default(perfcontrol_state_t old_thread_state __unused, perfcontrol_state_t new_thread_state __unused)
87	{
88	}
89
90	static void
91	sched_perfcontrol_offcore_default(perfcontrol_state_t old_thread_state __unused, going_off_core_t off __unused, boolean_t thread_terminating __unused)
92	{
93	}
94
95	static void
96	sched_perfcontrol_thread_group_default(thread_group_data_t data __unused)
97	{
98	}
99
100	static void
101	sched_perfcontrol_max_runnable_latency_default(perfcontrol_max_runnable_latency_t latencies __unused)
102	{
103	}
104
105	static void
106	sched_perfcontrol_work_interval_notify_default(perfcontrol_state_t thread_state __unused,
107	perfcontrol_work_interval_t work_interval __unused)
108	{
109	}
110
111	static void
112	sched_perfcontrol_work_interval_ctl_default(perfcontrol_state_t thread_state __unused,
113	perfcontrol_work_interval_instance_t instance __unused)
114	{
115	}
116
117	static void
118	sched_perfcontrol_deadline_passed_default(__unused uint64_t deadline)
119	{
120	}
121
122	static void
123	sched_perfcontrol_csw_default(
124	__unused perfcontrol_event event, __unused uint32_t cpu_id, __unused uint64_t timestamp,
125	__unused uint32_t flags, __unused struct perfcontrol_thread_data *offcore,
126	__unused struct perfcontrol_thread_data *oncore,
127	__unused struct perfcontrol_cpu_counters cpu_counters, __unused void* *unused)
128	{
129	}
130
131	static void
132	sched_perfcontrol_state_update_default(
133	__unused perfcontrol_event event, __unused uint32_t cpu_id, __unused uint64_t timestamp,
134	__unused uint32_t flags, __unused struct perfcontrol_thread_data *thr_data,
135	__unused void *unused)
136	{
137	}
138
139	static void
140	sched_perfcontrol_thread_group_blocked_default(
141	__unused thread_group_data_t blocked_tg, __unused thread_group_data_t blocking_tg,
142	__unused uint32_t flags, __unused perfcontrol_state_t blocked_thr_state)
143	{
144	}
145
146	static void
147	sched_perfcontrol_thread_group_unblocked_default(
148	__unused thread_group_data_t unblocked_tg, __unused thread_group_data_t unblocking_tg,
149	__unused uint32_t flags, __unused perfcontrol_state_t unblocked_thr_state)
150	{
151	}
152
153	sched_perfcontrol_offcore_t sched_perfcontrol_offcore = sched_perfcontrol_offcore_default;
154	sched_perfcontrol_context_switch_t sched_perfcontrol_switch = sched_perfcontrol_switch_default;
155	sched_perfcontrol_oncore_t sched_perfcontrol_oncore = sched_perfcontrol_oncore_default;
156	sched_perfcontrol_thread_group_init_t sched_perfcontrol_thread_group_init = sched_perfcontrol_thread_group_default;
157	sched_perfcontrol_thread_group_deinit_t sched_perfcontrol_thread_group_deinit = sched_perfcontrol_thread_group_default;
158	sched_perfcontrol_thread_group_flags_update_t sched_perfcontrol_thread_group_flags_update = sched_perfcontrol_thread_group_default;
159	sched_perfcontrol_max_runnable_latency_t sched_perfcontrol_max_runnable_latency = sched_perfcontrol_max_runnable_latency_default;
160	sched_perfcontrol_work_interval_notify_t sched_perfcontrol_work_interval_notify = sched_perfcontrol_work_interval_notify_default;
161	sched_perfcontrol_work_interval_ctl_t sched_perfcontrol_work_interval_ctl = sched_perfcontrol_work_interval_ctl_default;
162	sched_perfcontrol_deadline_passed_t sched_perfcontrol_deadline_passed = sched_perfcontrol_deadline_passed_default;
163	sched_perfcontrol_csw_t sched_perfcontrol_csw = sched_perfcontrol_csw_default;
164	sched_perfcontrol_state_update_t sched_perfcontrol_state_update = sched_perfcontrol_state_update_default;
165	sched_perfcontrol_thread_group_blocked_t sched_perfcontrol_thread_group_blocked = sched_perfcontrol_thread_group_blocked_default;
166	sched_perfcontrol_thread_group_unblocked_t sched_perfcontrol_thread_group_unblocked = sched_perfcontrol_thread_group_unblocked_default;
167	boolean_t sched_perfcontrol_thread_shared_rsrc_flags_enabled = false;
168
169	void
170	sched_perfcontrol_register_callbacks(sched_perfcontrol_callbacks_t callbacks, unsigned long size_of_state)
171	{
172	assert(callbacks == NULL \|\| callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_2);
173
174	if (size_of_state > sizeof(struct perfcontrol_state)) {
175	panic("%s: Invalid required state size %lu", __FUNCTION__, size_of_state);
176	}
177
178	if (callbacks) {
179	#if CONFIG_THREAD_GROUPS
180	if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_3) {
181	if (callbacks->thread_group_init != NULL) {
182	sched_perfcontrol_thread_group_init = callbacks->thread_group_init;
183	} else {
184	sched_perfcontrol_thread_group_init = sched_perfcontrol_thread_group_default;
185	}
186	if (callbacks->thread_group_deinit != NULL) {
187	sched_perfcontrol_thread_group_deinit = callbacks->thread_group_deinit;
188	} else {
189	sched_perfcontrol_thread_group_deinit = sched_perfcontrol_thread_group_default;
190	}
191	// tell CLPC about existing thread groups
192	thread_group_resync(TRUE);
193	}
194
195	if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_6) {
196	if (callbacks->thread_group_flags_update != NULL) {
197	sched_perfcontrol_thread_group_flags_update = callbacks->thread_group_flags_update;
198	} else {
199	sched_perfcontrol_thread_group_flags_update = sched_perfcontrol_thread_group_default;
200	}
201	}
202
203	if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_8) {
204	if (callbacks->thread_group_blocked != NULL) {
205	sched_perfcontrol_thread_group_blocked = callbacks->thread_group_blocked;
206	} else {
207	sched_perfcontrol_thread_group_blocked = sched_perfcontrol_thread_group_blocked_default;
208	}
209
210	if (callbacks->thread_group_unblocked != NULL) {
211	sched_perfcontrol_thread_group_unblocked = callbacks->thread_group_unblocked;
212	} else {
213	sched_perfcontrol_thread_group_unblocked = sched_perfcontrol_thread_group_unblocked_default;
214	}
215	}
216	#endif
217	if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_9) {
218	sched_perfcontrol_thread_shared_rsrc_flags_enabled = true;
219	}
220
221	if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_7) {
222	if (callbacks->work_interval_ctl != NULL) {
223	sched_perfcontrol_work_interval_ctl = callbacks->work_interval_ctl;
224	} else {
225	sched_perfcontrol_work_interval_ctl = sched_perfcontrol_work_interval_ctl_default;
226	}
227	}
228
229	if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_5) {
230	if (callbacks->csw != NULL) {
231	sched_perfcontrol_csw = callbacks->csw;
232	} else {
233	sched_perfcontrol_csw = sched_perfcontrol_csw_default;
234	}
235
236	if (callbacks->state_update != NULL) {
237	sched_perfcontrol_state_update = callbacks->state_update;
238	} else {
239	sched_perfcontrol_state_update = sched_perfcontrol_state_update_default;
240	}
241	}
242
243	if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_4) {
244	if (callbacks->deadline_passed != NULL) {
245	sched_perfcontrol_deadline_passed = callbacks->deadline_passed;
246	} else {
247	sched_perfcontrol_deadline_passed = sched_perfcontrol_deadline_passed_default;
248	}
249	}
250
251	if (callbacks->offcore != NULL) {
252	sched_perfcontrol_offcore = callbacks->offcore;
253	} else {
254	sched_perfcontrol_offcore = sched_perfcontrol_offcore_default;
255	}
256
257	if (callbacks->context_switch != NULL) {
258	sched_perfcontrol_switch = callbacks->context_switch;
259	} else {
260	sched_perfcontrol_switch = sched_perfcontrol_switch_default;
261	}
262
263	if (callbacks->oncore != NULL) {
264	sched_perfcontrol_oncore = callbacks->oncore;
265	} else {
266	sched_perfcontrol_oncore = sched_perfcontrol_oncore_default;
267	}
268
269	if (callbacks->max_runnable_latency != NULL) {
270	sched_perfcontrol_max_runnable_latency = callbacks->max_runnable_latency;
271	} else {
272	sched_perfcontrol_max_runnable_latency = sched_perfcontrol_max_runnable_latency_default;
273	}
274
275	if (callbacks->work_interval_notify != NULL) {
276	sched_perfcontrol_work_interval_notify = callbacks->work_interval_notify;
277	} else {
278	sched_perfcontrol_work_interval_notify = sched_perfcontrol_work_interval_notify_default;
279	}
280	} else {
281	/ reset to defaults /
282	#if CONFIG_THREAD_GROUPS
283	thread_group_resync(FALSE);
284	#endif
285	sched_perfcontrol_offcore = sched_perfcontrol_offcore_default;
286	sched_perfcontrol_switch = sched_perfcontrol_switch_default;
287	sched_perfcontrol_oncore = sched_perfcontrol_oncore_default;
288	sched_perfcontrol_thread_group_init = sched_perfcontrol_thread_group_default;
289	sched_perfcontrol_thread_group_deinit = sched_perfcontrol_thread_group_default;
290	sched_perfcontrol_thread_group_flags_update = sched_perfcontrol_thread_group_default;
291	sched_perfcontrol_max_runnable_latency = sched_perfcontrol_max_runnable_latency_default;
292	sched_perfcontrol_work_interval_notify = sched_perfcontrol_work_interval_notify_default;
293	sched_perfcontrol_work_interval_ctl = sched_perfcontrol_work_interval_ctl_default;
294	sched_perfcontrol_csw = sched_perfcontrol_csw_default;
295	sched_perfcontrol_state_update = sched_perfcontrol_state_update_default;
296	sched_perfcontrol_thread_group_blocked = sched_perfcontrol_thread_group_blocked_default;
297	sched_perfcontrol_thread_group_unblocked = sched_perfcontrol_thread_group_unblocked_default;
298	}
299	}
300
301
302	static void
303	machine_switch_populate_perfcontrol_thread_data(struct perfcontrol_thread_data *data,
304	thread_t thread,
305	uint64_t same_pri_latency)
306	{
307	bzero(s: data, n: sizeof(struct perfcontrol_thread_data));
308	data->perfctl_class = thread_get_perfcontrol_class(thread);
309	data->energy_estimate_nj = `0`;
310	data->thread_id = thread->thread_id;
311	#if CONFIG_THREAD_GROUPS
312	struct thread_group *tg = thread_group_get(t: thread);
313	data->thread_group_id = thread_group_get_id(tg);
314	data->thread_group_data = thread_group_get_machine_data(tg);
315	#endif
316	data->scheduling_latency_at_same_basepri = same_pri_latency;
317	data->perfctl_state = FIND_PERFCONTROL_STATE(thread);
318	}
319
320	static void
321	machine_switch_populate_perfcontrol_cpu_counters(struct perfcontrol_cpu_counters *cpu_counters)
322	{
323	#if CONFIG_CPU_COUNTERS
324	mt_perfcontrol(&cpu_counters->instructions, &cpu_counters->cycles);
325	#else /* CONFIG_CPU_COUNTERS */
326	cpu_counters->instructions = `0`;
327	cpu_counters->cycles = `0`;
328	#endif /* !CONFIG_CPU_COUNTERS */
329	}
330
331	int perfcontrol_callout_stats_enabled = `0`;
332	static _Atomic uint64_t perfcontrol_callout_stats[PERFCONTROL_CALLOUT_MAX][PERFCONTROL_STAT_MAX];
333	static _Atomic uint64_t perfcontrol_callout_count[PERFCONTROL_CALLOUT_MAX];
334
335	#if CONFIG_CPU_COUNTERS
336	static inline
337	bool
338	perfcontrol_callout_counters_begin(uint64_t *counters)
339	{
340	if (!perfcontrol_callout_stats_enabled) {
341	return false;
342	}
343	mt_fixed_counts(counters);
344	return true;
345	}
346
347	static inline
348	void
349	perfcontrol_callout_counters_end(uint64_t *start_counters,
350	perfcontrol_callout_type_t type)
351	{
352	uint64_t end_counters[MT_CORE_NFIXED];
353	mt_fixed_counts(end_counters);
354	os_atomic_add(&perfcontrol_callout_stats[type][PERFCONTROL_STAT_CYCLES],
355	end_counters[MT_CORE_CYCLES] - start_counters[MT_CORE_CYCLES], relaxed);
356	os_atomic_add(&perfcontrol_callout_stats[type][PERFCONTROL_STAT_INSTRS],
357	end_counters[MT_CORE_INSTRS] - start_counters[MT_CORE_INSTRS], relaxed);
358	os_atomic_inc(&perfcontrol_callout_count[type], relaxed);
359	}
360	#endif /* CONFIG_CPU_COUNTERS */
361
362	uint64_t
363	perfcontrol_callout_stat_avg(perfcontrol_callout_type_t type,
364	perfcontrol_callout_stat_t stat)
365	{
366	if (!perfcontrol_callout_stats_enabled) {
367	return `0`;
368	}
369	return os_atomic_load_wide(&perfcontrol_callout_stats[type][stat], relaxed) /
370	os_atomic_load_wide(&perfcontrol_callout_count[type], relaxed);
371	}
372
373
374
375	#if CONFIG_SCHED_EDGE
376
377	/*
378	* The Edge scheduler allows the performance controller to update properties about the
379	* threads as part of the callouts. These properties typically include shared cluster
380	* resource usage. This allows the scheduler to manage specific threads within the
381	* workload more optimally.
382	*/
383	static void
384	sched_perfcontrol_thread_flags_update(thread_t thread,
385	struct perfcontrol_thread_data *thread_data,
386	shared_rsrc_policy_agent_t agent)
387	{
388	kern_return_t kr = KERN_SUCCESS;
389	if (thread_data->thread_flags_mask & PERFCTL_THREAD_FLAGS_MASK_CLUSTER_SHARED_RSRC_RR) {
390	if (thread_data->thread_flags & PERFCTL_THREAD_FLAGS_MASK_CLUSTER_SHARED_RSRC_RR) {
391	kr = thread_shared_rsrc_policy_set(thread, `0`, CLUSTER_SHARED_RSRC_TYPE_RR, agent);
392	} else {
393	kr = thread_shared_rsrc_policy_clear(thread, CLUSTER_SHARED_RSRC_TYPE_RR, agent);
394	}
395	}
396	if (thread_data->thread_flags_mask & PERFCTL_THREAD_FLAGS_MASK_CLUSTER_SHARED_RSRC_NATIVE_FIRST) {
397	if (thread_data->thread_flags & PERFCTL_THREAD_FLAGS_MASK_CLUSTER_SHARED_RSRC_NATIVE_FIRST) {
398	kr = thread_shared_rsrc_policy_set(thread, `0`, CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST, agent);
399	} else {
400	kr = thread_shared_rsrc_policy_clear(thread, CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST, agent);
401	}
402	}
403	/*
404	* The thread_shared_rsrc_policy_* routines only fail if the performance controller is
405	* attempting to double set/clear a policy on the thread.
406	*/
407	assert(kr == KERN_SUCCESS);
408	}
409
410	#endif /* CONFIG_SCHED_EDGE */
411
412	void
413	machine_switch_perfcontrol_context(perfcontrol_event event,
414	uint64_t timestamp,
415	uint32_t flags,
416	uint64_t new_thread_same_pri_latency,
417	thread_t old,
418	thread_t new)
419	{
420
421	if (sched_perfcontrol_switch != sched_perfcontrol_switch_default) {
422	perfcontrol_state_t old_perfcontrol_state = FIND_PERFCONTROL_STATE(old);
423	perfcontrol_state_t new_perfcontrol_state = FIND_PERFCONTROL_STATE(new);
424	sched_perfcontrol_switch(old_perfcontrol_state, new_perfcontrol_state);
425	}
426
427	if (sched_perfcontrol_csw != sched_perfcontrol_csw_default) {
428	uint32_t cpu_id = (uint32_t)cpu_number();
429	struct perfcontrol_cpu_counters cpu_counters;
430	struct perfcontrol_thread_data offcore, oncore;
431	machine_switch_populate_perfcontrol_thread_data(data: &offcore, thread: old, same_pri_latency: `0`);
432	machine_switch_populate_perfcontrol_thread_data(data: &oncore, thread: new,
433	same_pri_latency: new_thread_same_pri_latency);
434	machine_switch_populate_perfcontrol_cpu_counters(cpu_counters: &cpu_counters);
435
436	#if CONFIG_CPU_COUNTERS
437	uint64_t counters[MT_CORE_NFIXED];
438	bool ctrs_enabled = perfcontrol_callout_counters_begin(counters);
439	#endif /* CONFIG_CPU_COUNTERS */
440	sched_perfcontrol_csw(event, cpu_id, timestamp, flags,
441	&offcore, &oncore, &cpu_counters, NULL);
442	#if CONFIG_CPU_COUNTERS
443	if (ctrs_enabled) {
444	perfcontrol_callout_counters_end(counters, PERFCONTROL_CALLOUT_CONTEXT);
445	}
446	#endif /* CONFIG_CPU_COUNTERS */
447
448	recount_add_energy(off_thread: old, off_task: get_threadtask(old),
449	energy_nj: offcore.energy_estimate_nj);
450
451	#if CONFIG_SCHED_EDGE
452	if (sched_perfcontrol_thread_shared_rsrc_flags_enabled) {
453	sched_perfcontrol_thread_flags_update(old, &offcore, SHARED_RSRC_POLICY_AGENT_PERFCTL_CSW);
454	}
455	#endif /* CONFIG_SCHED_EDGE */
456	}
457	}
458
459	void
460	machine_switch_perfcontrol_state_update(perfcontrol_event event,
461	uint64_t timestamp,
462	uint32_t flags,
463	thread_t thread)
464	{
465
466	if (sched_perfcontrol_state_update == sched_perfcontrol_state_update_default) {
467	return;
468	}
469	uint32_t cpu_id = (uint32_t)cpu_number();
470	struct perfcontrol_thread_data data;
471	machine_switch_populate_perfcontrol_thread_data(data: &data, thread, same_pri_latency: `0`);
472
473	#if CONFIG_CPU_COUNTERS
474	uint64_t counters[MT_CORE_NFIXED];
475	bool ctrs_enabled = perfcontrol_callout_counters_begin(counters);
476	#endif /* CONFIG_CPU_COUNTERS */
477	sched_perfcontrol_state_update(event, cpu_id, timestamp, flags,
478	&data, NULL);
479	#if CONFIG_CPU_COUNTERS
480	if (ctrs_enabled) {
481	perfcontrol_callout_counters_end(counters, PERFCONTROL_CALLOUT_STATE_UPDATE);
482	}
483	#endif /* CONFIG_CPU_COUNTERS */
484
485	#if CONFIG_PERVASIVE_ENERGY
486	recount_add_energy(thread, get_threadtask(thread), data.energy_estimate_nj);
487	#endif /* CONFIG_PERVASIVE_ENERGY */
488
489	#if CONFIG_SCHED_EDGE
490	if (sched_perfcontrol_thread_shared_rsrc_flags_enabled && (event == QUANTUM_EXPIRY)) {
491	sched_perfcontrol_thread_flags_update(thread, &data, SHARED_RSRC_POLICY_AGENT_PERFCTL_QUANTUM);
492	} else {
493	assert(data.thread_flags_mask == `0`);
494	}
495	#endif /* CONFIG_SCHED_EDGE */
496	}
497
498	void
499	machine_thread_going_on_core(thread_t new_thread,
500	thread_urgency_t urgency,
501	uint64_t sched_latency,
502	uint64_t same_pri_latency,
503	uint64_t timestamp)
504	{
505	if (sched_perfcontrol_oncore == sched_perfcontrol_oncore_default) {
506	return;
507	}
508	struct going_on_core on_core;
509	perfcontrol_state_t state = FIND_PERFCONTROL_STATE(new_thread);
510
511	on_core.thread_id = new_thread->thread_id;
512	on_core.energy_estimate_nj = `0`;
513	on_core.qos_class = (uint16_t)proc_get_effective_thread_policy(thread: new_thread, TASK_POLICY_QOS);
514	on_core.urgency = (uint16_t)urgency;
515	on_core.is_32_bit = thread_is_64bit_data(new_thread) ? FALSE : TRUE;
516	on_core.is_kernel_thread = get_threadtask(new_thread) == kernel_task;
517	#if CONFIG_THREAD_GROUPS
518	struct thread_group *tg = thread_group_get(t: new_thread);
519	on_core.thread_group_id = thread_group_get_id(tg);
520	on_core.thread_group_data = thread_group_get_machine_data(tg);
521	#endif
522	on_core.scheduling_latency = sched_latency;
523	on_core.start_time = timestamp;
524	on_core.scheduling_latency_at_same_basepri = same_pri_latency;
525
526	#if CONFIG_CPU_COUNTERS
527	uint64_t counters[MT_CORE_NFIXED];
528	bool ctrs_enabled = perfcontrol_callout_counters_begin(counters);
529	#endif /* CONFIG_CPU_COUNTERS */
530	sched_perfcontrol_oncore(state, &on_core);
531	#if CONFIG_CPU_COUNTERS
532	if (ctrs_enabled) {
533	perfcontrol_callout_counters_end(counters, PERFCONTROL_CALLOUT_ON_CORE);
534	}
535	#endif /* CONFIG_CPU_COUNTERS */
536	}
537
538	void
539	machine_thread_going_off_core(thread_t old_thread, boolean_t thread_terminating,
540	uint64_t last_dispatch, __unused boolean_t thread_runnable)
541	{
542	if (sched_perfcontrol_offcore == sched_perfcontrol_offcore_default) {
543	return;
544	}
545	struct going_off_core off_core;
546	perfcontrol_state_t state = FIND_PERFCONTROL_STATE(old_thread);
547
548	off_core.thread_id = old_thread->thread_id;
549	off_core.energy_estimate_nj = `0`;
550	off_core.end_time = last_dispatch;
551	#if CONFIG_THREAD_GROUPS
552	struct thread_group *tg = thread_group_get(t: old_thread);
553	off_core.thread_group_id = thread_group_get_id(tg);
554	off_core.thread_group_data = thread_group_get_machine_data(tg);
555	#endif
556
557	#if CONFIG_CPU_COUNTERS
558	uint64_t counters[MT_CORE_NFIXED];
559	bool ctrs_enabled = perfcontrol_callout_counters_begin(counters);
560	#endif /* CONFIG_CPU_COUNTERS */
561	sched_perfcontrol_offcore(state, &off_core, thread_terminating);
562	#if CONFIG_CPU_COUNTERS
563	if (ctrs_enabled) {
564	perfcontrol_callout_counters_end(counters, PERFCONTROL_CALLOUT_OFF_CORE);
565	}
566	#endif /* CONFIG_CPU_COUNTERS */
567	}
568
569	#if CONFIG_THREAD_GROUPS
570	void
571	machine_thread_group_init(struct thread_group *tg)
572	{
573	if (sched_perfcontrol_thread_group_init == sched_perfcontrol_thread_group_default) {
574	return;
575	}
576	struct thread_group_data data;
577	data.thread_group_id = thread_group_get_id(tg);
578	data.thread_group_data = thread_group_get_machine_data(tg);
579	data.thread_group_size = thread_group_machine_data_size();
580	data.thread_group_flags = thread_group_get_flags(tg);
581	sched_perfcontrol_thread_group_init(&data);
582	}
583
584	void
585	machine_thread_group_deinit(struct thread_group *tg)
586	{
587	if (sched_perfcontrol_thread_group_deinit == sched_perfcontrol_thread_group_default) {
588	return;
589	}
590	struct thread_group_data data;
591	data.thread_group_id = thread_group_get_id(tg);
592	data.thread_group_data = thread_group_get_machine_data(tg);
593	data.thread_group_size = thread_group_machine_data_size();
594	data.thread_group_flags = thread_group_get_flags(tg);
595	sched_perfcontrol_thread_group_deinit(&data);
596	}
597
598	void
599	machine_thread_group_flags_update(struct thread_group *tg, uint32_t flags)
600	{
601	if (sched_perfcontrol_thread_group_flags_update == sched_perfcontrol_thread_group_default) {
602	return;
603	}
604	struct thread_group_data data;
605	data.thread_group_id = thread_group_get_id(tg);
606	data.thread_group_data = thread_group_get_machine_data(tg);
607	data.thread_group_size = thread_group_machine_data_size();
608	data.thread_group_flags = flags;
609	sched_perfcontrol_thread_group_flags_update(&data);
610	}
611
612	void
613	machine_thread_group_blocked(struct thread_group *blocked_tg,
614	struct thread_group *blocking_tg,
615	uint32_t flags,
616	thread_t blocked_thread)
617	{
618	if (sched_perfcontrol_thread_group_blocked == sched_perfcontrol_thread_group_blocked_default) {
619	return;
620	}
621
622	spl_t s = splsched();
623
624	perfcontrol_state_t state = FIND_PERFCONTROL_STATE(blocked_thread);
625	struct thread_group_data blocked_data;
626	assert(blocked_tg != NULL);
627
628	blocked_data.thread_group_id = thread_group_get_id(tg: blocked_tg);
629	blocked_data.thread_group_data = thread_group_get_machine_data(tg: blocked_tg);
630	blocked_data.thread_group_size = thread_group_machine_data_size();
631
632	if (blocking_tg == NULL) {
633	/*
634	* For special cases such as the render server, the blocking TG is a
635	* well known TG. Only in that case, the blocking_tg should be NULL.
636	*/
637	assert(flags & PERFCONTROL_CALLOUT_BLOCKING_TG_RENDER_SERVER);
638	sched_perfcontrol_thread_group_blocked(&blocked_data, NULL, flags, state);
639	} else {
640	struct thread_group_data blocking_data;
641	blocking_data.thread_group_id = thread_group_get_id(tg: blocking_tg);
642	blocking_data.thread_group_data = thread_group_get_machine_data(tg: blocking_tg);
643	blocking_data.thread_group_size = thread_group_machine_data_size();
644	sched_perfcontrol_thread_group_blocked(&blocked_data, &blocking_data, flags, state);
645	}
646	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_BLOCK) \| DBG_FUNC_START,
647	thread_tid(blocked_thread), thread_group_get_id(blocked_tg),
648	blocking_tg ? thread_group_get_id(blocking_tg) : THREAD_GROUP_INVALID,
649	flags);
650
651	splx(s);
652	}
653
654	void
655	machine_thread_group_unblocked(struct thread_group *unblocked_tg,
656	struct thread_group *unblocking_tg,
657	uint32_t flags,
658	thread_t unblocked_thread)
659	{
660	if (sched_perfcontrol_thread_group_unblocked == sched_perfcontrol_thread_group_unblocked_default) {
661	return;
662	}
663
664	spl_t s = splsched();
665
666	perfcontrol_state_t state = FIND_PERFCONTROL_STATE(unblocked_thread);
667	struct thread_group_data unblocked_data;
668	assert(unblocked_tg != NULL);
669
670	unblocked_data.thread_group_id = thread_group_get_id(tg: unblocked_tg);
671	unblocked_data.thread_group_data = thread_group_get_machine_data(tg: unblocked_tg);
672	unblocked_data.thread_group_size = thread_group_machine_data_size();
673
674	if (unblocking_tg == NULL) {
675	/*
676	* For special cases such as the render server, the unblocking TG is a
677	* well known TG. Only in that case, the unblocking_tg should be NULL.
678	*/
679	assert(flags & PERFCONTROL_CALLOUT_BLOCKING_TG_RENDER_SERVER);
680	sched_perfcontrol_thread_group_unblocked(&unblocked_data, NULL, flags, state);
681	} else {
682	struct thread_group_data unblocking_data;
683	unblocking_data.thread_group_id = thread_group_get_id(tg: unblocking_tg);
684	unblocking_data.thread_group_data = thread_group_get_machine_data(tg: unblocking_tg);
685	unblocking_data.thread_group_size = thread_group_machine_data_size();
686	sched_perfcontrol_thread_group_unblocked(&unblocked_data, &unblocking_data, flags, state);
687	}
688	KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_BLOCK) \| DBG_FUNC_END,
689	thread_tid(unblocked_thread), thread_group_get_id(unblocked_tg),
690	unblocking_tg ? thread_group_get_id(unblocking_tg) : THREAD_GROUP_INVALID,
691	flags);
692
693	splx(s);
694	}
695
696	#endif /* CONFIG_THREAD_GROUPS */
697
698	void
699	machine_max_runnable_latency(uint64_t bg_max_latency,
700	uint64_t default_max_latency,
701	uint64_t realtime_max_latency)
702	{
703	if (sched_perfcontrol_max_runnable_latency == sched_perfcontrol_max_runnable_latency_default) {
704	return;
705	}
706	struct perfcontrol_max_runnable_latency latencies = {
707	.max_scheduling_latencies = {
708	[THREAD_URGENCY_NONE] = `0`,
709	[THREAD_URGENCY_BACKGROUND] = bg_max_latency,
710	[THREAD_URGENCY_NORMAL] = default_max_latency,
711	[THREAD_URGENCY_REAL_TIME] = realtime_max_latency
712	}
713	};
714
715	sched_perfcontrol_max_runnable_latency(&latencies);
716	}
717
718	void
719	machine_work_interval_notify(thread_t thread,
720	struct kern_work_interval_args* kwi_args)
721	{
722	if (sched_perfcontrol_work_interval_notify == sched_perfcontrol_work_interval_notify_default) {
723	return;
724	}
725	perfcontrol_state_t state = FIND_PERFCONTROL_STATE(thread);
726	struct perfcontrol_work_interval work_interval = {
727	.thread_id = thread->thread_id,
728	.qos_class = (uint16_t)proc_get_effective_thread_policy(thread, TASK_POLICY_QOS),
729	.urgency = kwi_args->urgency,
730	.flags = kwi_args->notify_flags,
731	.work_interval_id = kwi_args->work_interval_id,
732	.start = kwi_args->start,
733	.finish = kwi_args->finish,
734	.deadline = kwi_args->deadline,
735	.next_start = kwi_args->next_start,
736	.create_flags = kwi_args->create_flags,
737	};
738	#if CONFIG_THREAD_GROUPS
739	struct thread_group *tg;
740	tg = thread_group_get(t: thread);
741	work_interval.thread_group_id = thread_group_get_id(tg);
742	work_interval.thread_group_data = thread_group_get_machine_data(tg);
743	#endif
744	sched_perfcontrol_work_interval_notify(state, &work_interval);
745	}
746
747
748	void
749	machine_perfcontrol_deadline_passed(uint64_t deadline)
750	{
751	if (sched_perfcontrol_deadline_passed != sched_perfcontrol_deadline_passed_default) {
752	sched_perfcontrol_deadline_passed(deadline);
753	}
754	}
755
756	#if SCHED_HYGIENE_DEBUG
757
758	__options_decl(int_mask_hygiene_flags_t, uint8_t, {
759	INT_MASK_BASE = `0x00`,
760	INT_MASK_FROM_HANDLER = `0x01`,
761	INT_MASK_IS_STACKSHOT = `0x02`,
762	});
763
764	/*
765	* ml_spin_debug_reset()
766	* Reset the timestamp on a thread that has been unscheduled
767	* to avoid false alarms. Alarm will go off if interrupts are held
768	* disabled for too long, starting from now.
769	*
770	* Call ml_get_timebase() directly to prevent extra overhead on newer
771	* platforms that's enabled in DEVELOPMENT kernel configurations.
772	*/
773	void
774	ml_spin_debug_reset(thread_t thread)
775	{
776	if (thread->machine.intmask_timestamp) {
777	thread->machine.intmask_timestamp = ml_get_sched_hygiene_timebase();
778	INTERRUPT_MASKED_DEBUG_CAPTURE_PMC(thread);
779	}
780	}
781
782	/*
783	* ml_spin_debug_clear()
784	* Clear the timestamp and cycle/instruction counts on a thread that
785	* has been unscheduled to avoid false alarms
786	*/
787	void
788	ml_spin_debug_clear(thread_t thread)
789	{
790	thread->machine.intmask_timestamp = `0`;
791	thread->machine.intmask_cycles = `0`;
792	thread->machine.intmask_instr = `0`;
793	}
794
795	/*
796	* ml_spin_debug_clear_self()
797	* Clear the timestamp on the current thread to prevent
798	* false alarms
799	*/
800	void
801	ml_spin_debug_clear_self(void)
802	{
803	ml_spin_debug_clear(current_thread());
804	}
805
806	#ifndef KASAN
807
808	/*
809	* Get a character representing the provided thread's kind of CPU.
810	*/
811	#if !CONFIG_CPU_COUNTERS
812	__unused
813	#endif // !CONFIG_CPU_COUNTERS
814	static char
815	__ml_interrupts_disabled_cpu_kind(thread_t thread)
816	{
817	#if __AMP__
818	processor_t processor = thread->last_processor;
819	if (!processor) {
820	return `'!'`;
821	}
822
823	switch (processor->processor_set->pset_cluster_type) {
824	case PSET_AMP_P:
825	return `'P'`;
826	case PSET_AMP_E:
827	return `'E'`;
828	default:
829	return `'?'`;
830	}
831	#else // __AMP__
832	#pragma unused(thread)
833	return `'-'`;
834	#endif // !__AMP__
835	}
836
837	#define EXTRA_INFO_STRING_SIZE 256
838	#define LOW_FREQ_THRESHOLD_MHZ 500
839	#define HIGH_CPI_THRESHOLD 3
840
841	static void
842	__ml_trigger_interrupts_disabled_handle(thread_t thread, uint64_t start, uint64_t now, uint64_t timeout, int_mask_hygiene_flags_t flags)
843	{
844	mach_timebase_info_data_t timebase;
845	clock_timebase_info(&timebase);
846	bool is_int_handler = flags & INT_MASK_FROM_HANDLER;
847	bool is_stackshot = flags & INT_MASK_IS_STACKSHOT;
848
849	const uint64_t time_elapsed = now - start;
850	const uint64_t time_elapsed_ns = (time_elapsed * timebase.numer) / timebase.denom;
851
852	uint64_t current_cycles = `0`, current_instrs = `0`;
853
854	#if CONFIG_CPU_COUNTERS
855	if (sched_hygiene_debug_pmc) {
856	mt_cur_cpu_cycles_instrs_speculative(&current_cycles, &current_instrs);
857	}
858	#endif // CONFIG_CPU_COUNTERS
859
860	const uint64_t cycles_elapsed = current_cycles - thread->machine.intmask_cycles;
861	const uint64_t instrs_elapsed = current_instrs - thread->machine.intmask_instr;
862
863	if (interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
864	const uint64_t timeout_ns = ((timeout * debug_cpu_performance_degradation_factor) * timebase.numer) / timebase.denom;
865	char extra_info_string[EXTRA_INFO_STRING_SIZE] = { `'\0'` };
866	#if CONFIG_CPU_COUNTERS
867	if (sched_hygiene_debug_pmc) {
868	const uint64_t time_elapsed_us = time_elapsed_ns / `1000`;
869	const uint64_t average_freq_mhz = cycles_elapsed / time_elapsed_us;
870	const uint64_t average_cpi_whole = cycles_elapsed / instrs_elapsed;
871	const uint64_t average_cpi_fractional = ((cycles_elapsed * `100`) / instrs_elapsed) % `100`;
872	bool high_cpi = average_cpi_whole >= HIGH_CPI_THRESHOLD;
873	char core_kind = __ml_interrupts_disabled_cpu_kind(thread);
874	bool low_mhz = average_freq_mhz < LOW_FREQ_THRESHOLD_MHZ;
875
876	snprintf(extra_info_string, EXTRA_INFO_STRING_SIZE,
877	", %sfreq = %llu MHz, %sCPI = %llu.%llu, CPU kind = %c",
878	low_mhz ? "low " : "",
879	average_freq_mhz,
880	high_cpi ? "high " : "",
881	average_cpi_whole,
882	average_cpi_fractional,
883	core_kind);
884	}
885	#endif // CONFIG_CPU_COUNTERS
886
887	if (is_int_handler) {
888	panic("Processing of an interrupt (type = %u, handler address = %p, vector = %p) "
889	"took %llu nanoseconds (start = %llu, now = %llu, timeout = %llu ns%s)",
890	thread->machine.int_type, (void )thread->machine.int_handler_addr, (void* *)thread->machine.int_vector,
891	time_elapsed_ns, start, now, timeout_ns, extra_info_string);
892	} else {
893	panic("%s for %llu nanoseconds (start = %llu, now = %llu, timeout = %llu ns%s)",
894	is_stackshot ? "Stackshot disabled interrupts" : "Interrupts held disabled",
895	time_elapsed_ns, start, now, timeout_ns, extra_info_string);
896	}
897	} else if (interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_TRACE) {
898	if (is_int_handler) {
899	static const uint32_t interrupt_handled_dbgid =
900	MACHDBG_CODE(DBG_MACH_SCHED, MACH_INT_HANDLED_EXPIRED);
901	DTRACE_SCHED3(interrupt_handled_dbgid, uint64_t, time_elapsed,
902	uint64_t, cycles_elapsed, uint64_t, instrs_elapsed);
903	KDBG(interrupt_handled_dbgid, time_elapsed,
904	cycles_elapsed, instrs_elapsed);
905	} else {
906	static const uint32_t interrupt_masked_dbgid =
907	MACHDBG_CODE(DBG_MACH_SCHED, MACH_INT_MASKED_EXPIRED);
908	DTRACE_SCHED3(interrupt_masked_dbgid, uint64_t, time_elapsed,
909	uint64_t, cycles_elapsed, uint64_t, instrs_elapsed);
910	KDBG(interrupt_masked_dbgid, time_elapsed,
911	cycles_elapsed, instrs_elapsed);
912	}
913	}
914	}
915	#endif // !defined(KASAN)
916
917	static inline void
918	__ml_handle_interrupts_disabled_duration(thread_t thread, uint64_t timeout, bool is_int_handler)
919	{
920	if (timeout == `0`) {
921	return; // 0 means timeout disabled.
922	}
923	uint64_t start = is_int_handler ? thread->machine.inthandler_timestamp : thread->machine.intmask_timestamp;
924	if (start != `0`) {
925	uint64_t now = ml_get_sched_hygiene_timebase();
926
927	if (interrupt_masked_debug_mode &&
928	((now - start) > timeout * debug_cpu_performance_degradation_factor) &&
929	!thread->machine.inthandler_abandon) {
930	/*
931	* Disable the actual panic for KASAN due to the overhead of KASAN itself, leave the rest of the
932	* mechanism enabled so that KASAN can catch any bugs in the mechanism itself.
933	*/
934	#ifndef KASAN
935	__ml_trigger_interrupts_disabled_handle(thread, start, now, timeout, is_int_handler);
936	#endif
937	}
938
939	if (is_int_handler) {
940	uint64_t const duration = now - start;
941	/*
942	* No need for an atomic add, the only thread modifying
943	* this is ourselves. Other threads querying will just see
944	* either the old or the new value. (This will also just
945	* resolve to regular loads and stores on relevant
946	* platforms.)
947	*/
948	uint64_t const old_duration = os_atomic_load_wide(&thread->machine.int_time_mt, relaxed);
949	os_atomic_store_wide(&thread->machine.int_time_mt, old_duration + duration, relaxed);
950	}
951	}
952	}
953
954	void
955	ml_handle_interrupts_disabled_duration(thread_t thread)
956	{
957	__ml_handle_interrupts_disabled_duration(thread, os_atomic_load(&interrupt_masked_timeout, relaxed), INT_MASK_BASE);
958	}
959
960	void
961	ml_handle_stackshot_interrupt_disabled_duration(thread_t thread)
962	{
963	/ Use MAX() to let the user bump the timeout further if needed /
964	uint64_t stackshot_timeout = os_atomic_load(&stackshot_interrupt_masked_timeout, relaxed);
965	uint64_t normal_timeout = os_atomic_load(&interrupt_masked_timeout, relaxed);
966	uint64_t timeout = MAX(stackshot_timeout, normal_timeout);
967	__ml_handle_interrupts_disabled_duration(thread, timeout, INT_MASK_IS_STACKSHOT);
968	}
969
970	void
971	ml_handle_interrupt_handler_duration(thread_t thread)
972	{
973	__ml_handle_interrupts_disabled_duration(thread, os_atomic_load(&interrupt_masked_timeout, relaxed), INT_MASK_FROM_HANDLER);
974	}
975
976	void
977	ml_irq_debug_start(uintptr_t handler, uintptr_t vector)
978	{
979	INTERRUPT_MASKED_DEBUG_START(handler, DBG_INTR_TYPE_OTHER);
980	current_thread()->machine.int_vector = (uintptr_t)VM_KERNEL_STRIP_PTR(vector);
981	}
982
983	void
984	ml_irq_debug_end()
985	{
986	INTERRUPT_MASKED_DEBUG_END();
987	}
988
989	/*
990	* Abandon a potential timeout when handling an interrupt. It is important to
991	* continue to keep track of the interrupt time so the time-stamp can't be
992	* reset. (Interrupt time is subtracted from preemption time to maintain
993	* accurate preemption time measurement).
994	* When `inthandler_abandon` is true, a timeout will be ignored when the
995	* interrupt handler finishes.
996	*/
997	void
998	ml_irq_debug_abandon(void)
999	{
1000	assert(!ml_get_interrupts_enabled());
1001
1002	thread_t t = current_thread();
1003	if (t->machine.inthandler_timestamp != `0`) {
1004	t->machine.inthandler_abandon = true;
1005	}
1006	}
1007	#endif // SCHED_HYGIENE_DEBUG
1008
1009	#if SCHED_HYGIENE_DEBUG
1010	__attribute__((noinline))
1011	static void
1012	ml_interrupt_masked_debug_timestamp(thread_t thread)
1013	{
1014	thread->machine.intmask_timestamp = ml_get_sched_hygiene_timebase();
1015	INTERRUPT_MASKED_DEBUG_CAPTURE_PMC(thread);
1016	}
1017	#endif
1018
1019	boolean_t
1020	ml_set_interrupts_enabled_with_debug(boolean_t enable, boolean_t __unused debug)
1021	{
1022	thread_t thread;
1023	uint64_t state;
1024
1025	thread = current_thread();
1026
1027	state = __builtin_arm_rsr("DAIF");
1028
1029	if (enable && (state & DAIF_IRQF)) {
1030	assert(getCpuDatap()->cpu_int_state == NULL); // Make sure we're not enabling interrupts from primary interrupt context
1031	#if SCHED_HYGIENE_DEBUG
1032	if (__probable(debug && (interrupt_masked_debug_mode \|\| sched_preemption_disable_debug_mode))) {
1033	// Interrupts are currently masked, we will enable them (after finishing this check)
1034	if (stackshot_active()) {
1035	ml_handle_stackshot_interrupt_disabled_duration(thread);
1036	} else {
1037	ml_handle_interrupts_disabled_duration(thread);
1038	}
1039	thread->machine.intmask_timestamp = `0`;
1040	thread->machine.intmask_cycles = `0`;
1041	thread->machine.intmask_instr = `0`;
1042	}
1043	#endif // SCHED_HYGIENE_DEBUG
1044	if (get_preemption_level() == `0`) {
1045	while (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
1046	#if __ARM_USER_PROTECT__
1047	uintptr_t up = arm_user_protect_begin(thread);
1048	#endif
1049	ast_taken_kernel();
1050	#if __ARM_USER_PROTECT__
1051	arm_user_protect_end(thread, up, FALSE);
1052	#endif
1053	}
1054	}
1055	__builtin_arm_wsr("DAIFClr", DAIFSC_STANDARD_DISABLE);
1056	} else if (!enable && ((state & DAIF_IRQF) == `0`)) {
1057	__builtin_arm_wsr("DAIFSet", DAIFSC_STANDARD_DISABLE);
1058
1059	#if SCHED_HYGIENE_DEBUG
1060	if (__probable(debug && (interrupt_masked_debug_mode \|\| sched_preemption_disable_debug_mode))) {
1061	// Interrupts were enabled, we just masked them
1062	ml_interrupt_masked_debug_timestamp(thread);
1063	}
1064	#endif
1065	}
1066	return (state & DAIF_IRQF) == `0`;
1067	}
1068
1069	boolean_t
1070	ml_set_interrupts_enabled(boolean_t enable)
1071	{
1072	return ml_set_interrupts_enabled_with_debug(enable, true);
1073	}
1074
1075	boolean_t
1076	ml_early_set_interrupts_enabled(boolean_t enable)
1077	{
1078	return ml_set_interrupts_enabled(enable);
1079	}
1080
1081	/*
1082	* Interrupt enable function exported for AppleCLPC without
1083	* measurements enabled.
1084	*
1085	* Only for AppleCLPC!
1086	*/
1087	boolean_t
1088	sched_perfcontrol_ml_set_interrupts_without_measurement(boolean_t enable)
1089	{
1090	return ml_set_interrupts_enabled_with_debug(enable, false);
1091	}
1092
1093	/*
1094	* Routine: ml_at_interrupt_context
1095	* Function: Check if running at interrupt context
1096	*/
1097	boolean_t
1098	ml_at_interrupt_context(void)
1099	{
1100	/ Do not use a stack-based check here, as the top-level exception handler*
1101	* is free to use some other stack besides the per-CPU interrupt stack.
1102	* Interrupts should always be disabled if we're at interrupt context.
1103	* Check that first, as we may be in a preemptible non-interrupt context, in
1104	* which case we could be migrated to a different CPU between obtaining
1105	* the per-cpu data pointer and loading cpu_int_state. We then might end
1106	* up checking the interrupt state of a different CPU, resulting in a false
1107	* positive. But if interrupts are disabled, we also know we cannot be
1108	* preempted. */
1109	return !ml_get_interrupts_enabled() && (getCpuDatap()->cpu_int_state != NULL);
1110	}
1111
1112	/*
1113	* This answers the question
1114	* "after returning from this interrupt handler with the AST_URGENT bit set,
1115	* will I end up in ast_taken_user or ast_taken_kernel?"
1116	*
1117	* If it's called in non-interrupt context (e.g. regular syscall), it should
1118	* return false.
1119	*
1120	* Must be called with interrupts disabled.
1121	*/
1122	bool
1123	ml_did_interrupt_userspace(void)
1124	{
1125	assert(ml_get_interrupts_enabled() == false);
1126
1127	struct arm_saved_state *state = getCpuDatap()->cpu_int_state;
1128
1129	return state && PSR64_IS_USER(get_saved_state_cpsr(state));
1130	}
1131
1132
1133	vm_offset_t
1134	ml_stack_remaining(void)
1135	{
1136	uintptr_t local = (uintptr_t) &local;
1137	vm_offset_t intstack_top_ptr;
1138
1139	/ Since this is a stack-based check, we don't need to worry about*
1140	* preemption as we do in ml_at_interrupt_context(). If we are preemptible,
1141	* then the sp should never be within any CPU's interrupt stack unless
1142	* something has gone horribly wrong. */
1143	intstack_top_ptr = getCpuDatap()->intstack_top;
1144	if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
1145	return local - (getCpuDatap()->intstack_top - INTSTACK_SIZE);
1146	} else {
1147	return local - current_thread()->kernel_stack;
1148	}
1149	}
1150
1151	static boolean_t ml_quiescing = FALSE;
1152
1153	void
1154	ml_set_is_quiescing(boolean_t quiescing)
1155	{
1156	ml_quiescing = quiescing;
1157	os_atomic_thread_fence(release);
1158	}
1159
1160	boolean_t
1161	ml_is_quiescing(void)
1162	{
1163	os_atomic_thread_fence(acquire);
1164	return ml_quiescing;
1165	}
1166
1167	uint64_t
1168	ml_get_booter_memory_size(void)
1169	{
1170	uint64_t size;
1171	uint64_t roundsize = `512` * `1024` * `1024ULL`;
1172	size = BootArgs->memSizeActual;
1173	if (!size) {
1174	size = BootArgs->memSize;
1175	if (size < (`2` * roundsize)) {
1176	roundsize >>= `1`;
1177	}
1178	size = (size + roundsize - `1`) & ~(roundsize - `1`);
1179	}
1180
1181	size -= BootArgs->memSize;
1182
1183	return size;
1184	}
1185
1186	uint64_t
1187	ml_get_abstime_offset(void)
1188	{
1189	return rtclock_base_abstime;
1190	}
1191
1192	uint64_t
1193	ml_get_conttime_offset(void)
1194	{
1195	#if HIBERNATION && HAS_CONTINUOUS_HWCLOCK
1196	return hwclock_conttime_offset;
1197	#elif HAS_CONTINUOUS_HWCLOCK
1198	return `0`;
1199	#else
1200	return rtclock_base_abstime + mach_absolutetime_asleep;
1201	#endif
1202	}
1203
1204	uint64_t
1205	ml_get_time_since_reset(void)
1206	{
1207	#if HAS_CONTINUOUS_HWCLOCK
1208	if (wake_conttime == UINT64_MAX) {
1209	return UINT64_MAX;
1210	} else {
1211	return mach_continuous_time() - wake_conttime;
1212	}
1213	#else
1214	/ The timebase resets across S2R, so just return the raw value. /
1215	return ml_get_hwclock();
1216	#endif
1217	}
1218
1219	void
1220	ml_set_reset_time(__unused uint64_t wake_time)
1221	{
1222	#if HAS_CONTINUOUS_HWCLOCK
1223	wake_conttime = wake_time;
1224	#endif
1225	}
1226
1227	uint64_t
1228	ml_get_conttime_wake_time(void)
1229	{
1230	#if HAS_CONTINUOUS_HWCLOCK
1231	/*
1232	* For now, we will reconstitute the timebase value from
1233	* cpu_timebase_init and use it as the wake time.
1234	*/
1235	return wake_abstime - ml_get_abstime_offset();
1236	#else /* HAS_CONTINOUS_HWCLOCK */
1237	/ The wake time is simply our continuous time offset. /
1238	return ml_get_conttime_offset();
1239	#endif /* HAS_CONTINOUS_HWCLOCK */
1240	}
1241
1242	/*
1243	* ml_snoop_thread_is_on_core(thread_t thread)
1244	* Check if the given thread is currently on core. This function does not take
1245	* locks, disable preemption, or otherwise guarantee synchronization. The
1246	* result should be considered advisory.
1247	*/
1248	bool
1249	ml_snoop_thread_is_on_core(thread_t thread)
1250	{
1251	unsigned int cur_cpu_num = `0`;
1252	const unsigned int max_cpu_id = ml_get_max_cpu_number();
1253
1254	for (cur_cpu_num = `0`; cur_cpu_num <= max_cpu_id; cur_cpu_num++) {
1255	if (CpuDataEntries[cur_cpu_num].cpu_data_vaddr) {
1256	if (CpuDataEntries[cur_cpu_num].cpu_data_vaddr->cpu_active_thread == thread) {
1257	return true;
1258	}
1259	}
1260	}
1261
1262	return false;
1263	}
1264
1265	int
1266	ml_early_cpu_max_number(void)
1267	{
1268	assert(startup_phase >= STARTUP_SUB_TUNABLES);
1269	return ml_get_max_cpu_number();
1270	}
1271
1272	void
1273	ml_set_max_cpus(unsigned int max_cpus __unused)
1274	{
1275	lck_mtx_lock(lck: &max_cpus_lock);
1276	if (max_cpus_initialized != MAX_CPUS_SET) {
1277	if (max_cpus_initialized == MAX_CPUS_WAIT) {
1278	thread_wakeup((event_t) &max_cpus_initialized);
1279	}
1280	max_cpus_initialized = MAX_CPUS_SET;
1281	}
1282	lck_mtx_unlock(lck: &max_cpus_lock);
1283	}
1284
1285	unsigned int
1286	ml_wait_max_cpus(void)
1287	{
1288	assert(lockdown_done);
1289	lck_mtx_lock(lck: &max_cpus_lock);
1290	while (max_cpus_initialized != MAX_CPUS_SET) {
1291	max_cpus_initialized = MAX_CPUS_WAIT;
1292	lck_mtx_sleep(lck: &max_cpus_lock, lck_sleep_action: LCK_SLEEP_DEFAULT, event: &max_cpus_initialized, THREAD_UNINT);
1293	}
1294	lck_mtx_unlock(lck: &max_cpus_lock);
1295	return machine_info.max_cpus;
1296	}
1297
1298	void
1299	ml_cpu_get_info_type(ml_cpu_info_t * ml_cpu_info, cluster_type_t cluster_type)
1300	{
1301	cache_info_t *cpuid_cache_info;
1302
1303	cpuid_cache_info = cache_info_type(cluster_type);
1304	ml_cpu_info->vector_unit = `0`;
1305	ml_cpu_info->cache_line_size = cpuid_cache_info->c_linesz;
1306	ml_cpu_info->l1_icache_size = cpuid_cache_info->c_isize;
1307	ml_cpu_info->l1_dcache_size = cpuid_cache_info->c_dsize;
1308
1309	#if (__ARM_ARCH__ >= 8)
1310	ml_cpu_info->l2_settings = `1`;
1311	ml_cpu_info->l2_cache_size = cpuid_cache_info->c_l2size;
1312	#else
1313	#error Unsupported arch
1314	#endif
1315	ml_cpu_info->l3_settings = `0`;
1316	ml_cpu_info->l3_cache_size = `0xFFFFFFFF`;
1317	}
1318
1319	/*
1320	* Routine: ml_cpu_get_info
1321	* Function: Fill out the ml_cpu_info_t structure with parameters associated
1322	* with the boot cluster.
1323	*/
1324	void
1325	ml_cpu_get_info(ml_cpu_info_t * ml_cpu_info)
1326	{
1327	ml_cpu_get_info_type(ml_cpu_info, cluster_type: ml_get_topology_info()->boot_cpu->cluster_type);
1328	}
1329
1330	unsigned int
1331	ml_get_cpu_number_type(cluster_type_t cluster_type, bool logical, bool available)
1332	{
1333	/*
1334	* At present no supported ARM system features SMT, so the "logical"
1335	* parameter doesn't have an impact on the result.
1336	*/
1337	if (logical && available) {
1338	return os_atomic_load(&cluster_type_num_active_cpus[cluster_type], relaxed);
1339	} else if (logical && !available) {
1340	return ml_get_topology_info()->cluster_type_num_cpus[cluster_type];
1341	} else if (!logical && available) {
1342	return os_atomic_load(&cluster_type_num_active_cpus[cluster_type], relaxed);
1343	} else {
1344	return ml_get_topology_info()->cluster_type_num_cpus[cluster_type];
1345	}
1346	}
1347
1348	void
1349	ml_get_cluster_type_name(cluster_type_t cluster_type, char *name, size_t name_size)
1350	{
1351	strlcpy(dst: name, src: cluster_type_names[cluster_type], n: name_size);
1352	}
1353
1354	unsigned int
1355	ml_get_cluster_number_type(cluster_type_t cluster_type)
1356	{
1357	return ml_get_topology_info()->cluster_type_num_clusters[cluster_type];
1358	}
1359
1360	unsigned int
1361	ml_cpu_cache_sharing(unsigned int level, cluster_type_t cluster_type, bool include_all_cpu_types __unused)
1362	{
1363	unsigned int cpu_number = `0`, cluster_types = `0`;
1364
1365	/*
1366	* Level 0 corresponds to main memory, which is shared across all cores.
1367	*/
1368	if (level == `0`) {
1369	return ml_get_topology_info()->num_cpus;
1370	}
1371
1372	/*
1373	* At present no supported ARM system features more than 2 levels of caches.
1374	*/
1375	if (level > `2`) {
1376	return `0`;
1377	}
1378
1379	/*
1380	* L1 caches are always per core.
1381	*/
1382	if (level == `1`) {
1383	return `1`;
1384	}
1385
1386	cluster_types = (`1` << cluster_type);
1387
1388	/*
1389	* Traverse clusters until we find the one(s) of the desired type(s).
1390	*/
1391	for (int i = `0`; i < ml_get_topology_info()->num_clusters; i++) {
1392	ml_topology_cluster_t *cluster = &ml_get_topology_info()->clusters[i];
1393	if ((`1` << cluster->cluster_type) & cluster_types) {
1394	cpu_number += cluster->num_cpus;
1395	cluster_types &= ~(`1` << cluster->cluster_type);
1396	if (!cluster_types) {
1397	break;
1398	}
1399	}
1400	}
1401
1402	return cpu_number;
1403	}
1404
1405	unsigned int
1406	ml_get_cpu_types(void)
1407	{
1408	return ml_get_topology_info()->cluster_types;
1409	}
1410
1411	void
1412	machine_conf(void)
1413	{
1414	/*
1415	* This is known to be inaccurate. mem_size should always be capped at 2 GB
1416	*/
1417	machine_info.memory_size = (uint32_t)mem_size;
1418
1419	// rdar://problem/58285685: Userland expects _COMM_PAGE_LOGICAL_CPUS to report
1420	// (max_cpu_id+1) rather than a literal count* of logical CPUs.*
1421	unsigned int num_cpus = ml_get_topology_info()->max_cpu_id + `1`;
1422	machine_info.max_cpus = num_cpus;
1423	machine_info.physical_cpu_max = num_cpus;
1424	machine_info.logical_cpu_max = num_cpus;
1425	}
1426
1427	void
1428	machine_init(void)
1429	{
1430	debug_log_init();
1431	clock_config();
1432	is_clock_configured = TRUE;
1433	if (debug_enabled) {
1434	pmap_map_globals();
1435	}
1436	ml_lockdown_init();
1437	}
1438

Browse the source code of xnu/osfmk/arm/machine_routines_common.c