sched_amp_common.c source code [xnu/osfmk/kern/sched_amp_common.c]

1	/*
2	* Copyright (c) 2019 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	#include <mach/mach_types.h>
30	#include <mach/machine.h>
31	#include <machine/machine_routines.h>
32	#include <machine/sched_param.h>
33	#include <machine/machine_cpu.h>
34	#include <kern/kern_types.h>
35	#include <kern/debug.h>
36	#include <kern/machine.h>
37	#include <kern/misc_protos.h>
38	#include <kern/processor.h>
39	#include <kern/queue.h>
40	#include <kern/sched.h>
41	#include <kern/sched_prim.h>
42	#include <kern/task.h>
43	#include <kern/thread.h>
44	#include <machine/atomic.h>
45	#include <sys/kdebug.h>
46	#include <kern/sched_amp_common.h>
47	#include <stdatomic.h>
48
49	#if __AMP__
50
51	/ Exported globals /
52	processor_set_t ecore_set = NULL;
53	processor_set_t pcore_set = NULL;
54
55	/*
56	* sched_amp_init()
57	*
58	* Initialize the pcore_set and ecore_set globals which describe the
59	* P/E processor sets.
60	*/
61	void
62	sched_amp_init(void)
63	{
64	sched_timeshare_init();
65	}
66
67	/ Spill threshold load average is ncpus in pset + (sched_amp_spill_count/(1 << PSET_LOAD_FRACTIONAL_SHIFT) /
68	int sched_amp_spill_count = `3`;
69	int sched_amp_idle_steal = `1`;
70	int sched_amp_spill_steal = `1`;
71
72	/*
73	* We see performance gains from doing immediate IPIs to P-cores to run
74	* P-eligible threads and lesser P-E migrations from using deferred IPIs
75	* for spill.
76	*/
77	int sched_amp_spill_deferred_ipi = `1`;
78	int sched_amp_pcores_preempt_immediate_ipi = `1`;
79
80	/*
81	* sched_perfcontrol_inherit_recommendation_from_tg changes amp
82	* scheduling policy away from default and allows policy to be
83	* modified at run-time.
84	*
85	* once modified from default, the policy toggles between "follow
86	* thread group" and "restrict to e".
87	*/
88
89	_Atomic sched_perfctl_class_policy_t sched_perfctl_policy_util = SCHED_PERFCTL_POLICY_DEFAULT;
90	_Atomic sched_perfctl_class_policy_t sched_perfctl_policy_bg = SCHED_PERFCTL_POLICY_DEFAULT;
91
92	/*
93	* sched_amp_spill_threshold()
94	*
95	* Routine to calulate spill threshold which decides if cluster should spill.
96	*/
97	int
98	sched_amp_spill_threshold(processor_set_t pset)
99	{
100	int recommended_processor_count = bit_count(pset->recommended_bitmask & pset->cpu_bitmask);
101
102	return (recommended_processor_count << PSET_LOAD_FRACTIONAL_SHIFT) + sched_amp_spill_count;
103	}
104
105	/*
106	* pset_signal_spill()
107	*
108	* Routine to signal a running/idle CPU to cause a spill onto that CPU.
109	* Called with pset locked, returns unlocked
110	*/
111	void
112	pset_signal_spill(processor_set_t pset, int spilled_thread_priority)
113	{
114	processor_t processor;
115	sched_ipi_type_t ipi_type = SCHED_IPI_NONE;
116
117	uint64_t idle_map = pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE];
118	for (int cpuid = lsb_first(idle_map); cpuid >= `0`; cpuid = lsb_next(idle_map, cpuid)) {
119	processor = processor_array[cpuid];
120	if (bit_set_if_clear(pset->pending_spill_cpu_mask, processor->cpu_id)) {
121	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_SIGNAL_SPILL) \| DBG_FUNC_NONE, processor->cpu_id, `0`, `0`, `0`);
122
123	processor->deadline = UINT64_MAX;
124
125	if (processor == current_processor()) {
126	pset_update_processor_state(pset, processor, PROCESSOR_DISPATCHING);
127	if (bit_set_if_clear(pset->pending_AST_URGENT_cpu_mask, processor->cpu_id)) {
128	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PENDING_AST_URGENT) \| DBG_FUNC_START,
129	processor->cpu_id, pset->pending_AST_URGENT_cpu_mask, `0`, `6`);
130	}
131	} else {
132	ipi_type = sched_ipi_action(processor, NULL, SCHED_IPI_EVENT_SPILL);
133	}
134	pset_unlock(pset);
135	sched_ipi_perform(processor, ipi_type);
136	return;
137	}
138	}
139
140	processor_t ast_processor = NULL;
141	ast_t preempt = AST_NONE;
142	uint64_t running_map = pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_RUNNING];
143	for (int cpuid = lsb_first(running_map); cpuid >= `0`; cpuid = lsb_next(running_map, cpuid)) {
144	processor = processor_array[cpuid];
145	if (processor->current_recommended_pset_type == PSET_AMP_P) {
146	/ Already running a spilled P-core recommended thread /
147	continue;
148	}
149	if (bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) {
150	/ Already received a spill signal /
151	continue;
152	}
153	if (processor->current_pri >= spilled_thread_priority) {
154	/ Already running a higher or equal priority thread /
155	continue;
156	}
157
158	/ Found a suitable processor /
159	bit_set(pset->pending_spill_cpu_mask, processor->cpu_id);
160	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_SIGNAL_SPILL) \| DBG_FUNC_NONE, processor->cpu_id, `1`, `0`, `0`);
161	if (processor == current_processor()) {
162	preempt = AST_PREEMPT;
163	}
164	ipi_type = sched_ipi_action(processor, NULL, SCHED_IPI_EVENT_SPILL);
165	if (ipi_type != SCHED_IPI_NONE) {
166	ast_processor = processor;
167	}
168	break;
169	}
170
171	pset_unlock(pset);
172	sched_ipi_perform(ast_processor, ipi_type);
173
174	if (preempt != AST_NONE) {
175	ast_t new_preempt = update_pending_nonurgent_preemption(processor, preempt);
176	ast_on(new_preempt);
177	}
178	}
179
180	/*
181	* pset_should_accept_spilled_thread()
182	*
183	* Routine to decide if pset should accept spilled threads.
184	* This function must be safe to call (to use as a hint) without holding the pset lock.
185	*/
186	bool
187	pset_should_accept_spilled_thread(processor_set_t pset, int spilled_thread_priority)
188	{
189	if (!pset) {
190	return false;
191	}
192
193	if ((pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE]) != `0`) {
194	return true;
195	}
196
197	uint64_t cpu_map = (pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_RUNNING]);
198
199	for (int cpuid = lsb_first(cpu_map); cpuid >= `0`; cpuid = lsb_next(cpu_map, cpuid)) {
200	processor_t processor = processor_array[cpuid];
201
202	if (processor->current_recommended_pset_type == PSET_AMP_P) {
203	/ This processor is already running a spilled thread /
204	continue;
205	}
206
207	if (processor->current_pri < spilled_thread_priority) {
208	return true;
209	}
210	}
211
212	return false;
213	}
214
215	/*
216	* should_spill_to_ecores()
217	*
218	* Spill policy is implemented here
219	*/
220	bool
221	should_spill_to_ecores(processor_set_t nset, thread_t thread)
222	{
223	if (nset->pset_cluster_type == PSET_AMP_E) {
224	/ Not relevant if ecores already preferred /
225	return false;
226	}
227
228	if (!pset_is_recommended(ecore_set)) {
229	/ E cores must be recommended /
230	return false;
231	}
232
233	if (thread->th_bound_cluster_id == pcore_set->pset_id) {
234	/ Thread bound to the P-cluster /
235	return false;
236	}
237
238	if (thread->sched_pri >= BASEPRI_RTQUEUES) {
239	/ Never spill realtime threads /
240	return false;
241	}
242
243	if ((nset->recommended_bitmask & nset->cpu_state_map[PROCESSOR_IDLE]) != `0`) {
244	/ Don't spill if idle cores /
245	return false;
246	}
247
248	if ((sched_get_pset_load_average(nset, `0`) >= sched_amp_spill_threshold(nset)) && / There is already a load on P cores /
249	pset_should_accept_spilled_thread(ecore_set, thread->sched_pri)) { / There are lower priority E cores /
250	return true;
251	}
252
253	return false;
254	}
255
256	/*
257	* sched_amp_check_spill()
258	*
259	* Routine to check if the thread should be spilled and signal the pset if needed.
260	*/
261	void
262	sched_amp_check_spill(processor_set_t pset, thread_t thread)
263	{
264	/ pset is unlocked /
265
266	/ Bound threads don't call this function /
267	assert(thread->bound_processor == PROCESSOR_NULL);
268
269	if (should_spill_to_ecores(pset, thread)) {
270	pset_lock(ecore_set);
271
272	pset_signal_spill(ecore_set, thread->sched_pri);
273	/ returns with ecore_set unlocked /
274	}
275	}
276
277	/*
278	* sched_amp_steal_threshold()
279	*
280	* Routine to calculate the steal threshold
281	*/
282	int
283	sched_amp_steal_threshold(processor_set_t pset, bool spill_pending)
284	{
285	int recommended_processor_count = bit_count(pset->recommended_bitmask & pset->cpu_bitmask);
286
287	return (recommended_processor_count << PSET_LOAD_FRACTIONAL_SHIFT) + (spill_pending ? sched_amp_spill_steal : sched_amp_idle_steal);
288	}
289
290	/*
291	* sched_amp_steal_thread_enabled()
292	*
293	*/
294	bool
295	sched_amp_steal_thread_enabled(processor_set_t pset)
296	{
297	return (pset->pset_cluster_type == PSET_AMP_E) && (pcore_set != NULL) && (pcore_set->online_processor_count > `0`);
298	}
299
300	/*
301	* sched_amp_balance()
302	*
303	* Invoked with pset locked, returns with pset unlocked
304	*/
305	bool
306	sched_amp_balance(processor_t cprocessor, processor_set_t cpset)
307	{
308	assert(cprocessor == current_processor());
309
310	pset_unlock(cpset);
311
312	if (!ecore_set \|\| cpset->pset_cluster_type == PSET_AMP_E \|\| !cprocessor->is_recommended) {
313	return false;
314	}
315
316	/*
317	* cprocessor is an idle, recommended P core processor.
318	* Look for P-eligible threads that have spilled to an E core
319	* and coax them to come back.
320	*/
321	processor_set_t pset = ecore_set;
322
323	pset_lock(pset);
324
325	processor_t eprocessor;
326	uint64_t ast_processor_map = `0`;
327
328	sched_ipi_type_t ipi_type[MAX_CPUS] = {SCHED_IPI_NONE};
329	uint64_t running_map = pset->cpu_state_map[PROCESSOR_RUNNING];
330	for (int cpuid = lsb_first(running_map); cpuid >= `0`; cpuid = lsb_next(running_map, cpuid)) {
331	eprocessor = processor_array[cpuid];
332	if ((eprocessor->current_pri < BASEPRI_RTQUEUES) &&
333	(eprocessor->current_recommended_pset_type == PSET_AMP_P)) {
334	ipi_type[eprocessor->cpu_id] = sched_ipi_action(eprocessor, NULL, SCHED_IPI_EVENT_REBALANCE);
335	if (ipi_type[eprocessor->cpu_id] != SCHED_IPI_NONE) {
336	bit_set(ast_processor_map, eprocessor->cpu_id);
337	assert(eprocessor != cprocessor);
338	}
339	}
340	}
341
342	pset_unlock(pset);
343
344	for (int cpuid = lsb_first(ast_processor_map); cpuid >= `0`; cpuid = lsb_next(ast_processor_map, cpuid)) {
345	processor_t ast_processor = processor_array[cpuid];
346	sched_ipi_perform(ast_processor, ipi_type[cpuid]);
347	}
348
349	/ Core should light-weight idle using WFE if it just sent out rebalance IPIs /
350	return ast_processor_map != `0`;
351	}
352
353	/*
354	* Helper function for sched_amp_thread_group_recommendation_change()
355	* Find all the cores in the pset running threads from the thread_group tg
356	* and send them a rebalance interrupt.
357	*/
358	void
359	sched_amp_bounce_thread_group_from_ecores(processor_set_t pset, struct thread_group *tg)
360	{
361	if (!pset) {
362	return;
363	}
364
365	assert(pset->pset_cluster_type == PSET_AMP_E);
366	uint64_t ast_processor_map = `0`;
367	sched_ipi_type_t ipi_type[MAX_CPUS] = {SCHED_IPI_NONE};
368
369	spl_t s = splsched();
370	pset_lock(pset);
371
372	uint64_t running_map = pset->cpu_state_map[PROCESSOR_RUNNING];
373	for (int cpuid = lsb_first(running_map); cpuid >= `0`; cpuid = lsb_next(running_map, cpuid)) {
374	processor_t eprocessor = processor_array[cpuid];
375	if (eprocessor->current_thread_group == tg) {
376	ipi_type[eprocessor->cpu_id] = sched_ipi_action(eprocessor, NULL, SCHED_IPI_EVENT_REBALANCE);
377	if (ipi_type[eprocessor->cpu_id] != SCHED_IPI_NONE) {
378	bit_set(ast_processor_map, eprocessor->cpu_id);
379	} else if (eprocessor == current_processor()) {
380	ast_on(AST_PREEMPT);
381	bit_set(pset->pending_AST_PREEMPT_cpu_mask, eprocessor->cpu_id);
382	}
383	}
384	}
385
386	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_RECOMMENDATION_CHANGE) \| DBG_FUNC_NONE, tg, ast_processor_map, `0`, `0`);
387
388	pset_unlock(pset);
389
390	for (int cpuid = lsb_first(ast_processor_map); cpuid >= `0`; cpuid = lsb_next(ast_processor_map, cpuid)) {
391	processor_t ast_processor = processor_array[cpuid];
392	sched_ipi_perform(ast_processor, ipi_type[cpuid]);
393	}
394
395	splx(s);
396	}
397
398	/*
399	* sched_amp_ipi_policy()
400	*/
401	sched_ipi_type_t
402	sched_amp_ipi_policy(processor_t dst, thread_t thread, boolean_t dst_idle, sched_ipi_event_t event)
403	{
404	processor_set_t pset = dst->processor_set;
405	assert(dst != current_processor());
406
407	boolean_t deferred_ipi_supported = false;
408	#if defined(CONFIG_SCHED_DEFERRED_AST)
409	deferred_ipi_supported = true;
410	#endif /* CONFIG_SCHED_DEFERRED_AST */
411
412	switch (event) {
413	case SCHED_IPI_EVENT_SPILL:
414	/ For Spill event, use deferred IPIs if sched_amp_spill_deferred_ipi set /
415	if (deferred_ipi_supported && sched_amp_spill_deferred_ipi) {
416	return sched_ipi_deferred_policy(pset, dst, thread, event);
417	}
418	break;
419	case SCHED_IPI_EVENT_PREEMPT:
420	/ For preemption, the default policy is to use deferred IPIs*
421	* for Non-RT P-core preemption. Override that behavior if
422	* sched_amp_pcores_preempt_immediate_ipi is set
423	*/
424	if (thread && thread->sched_pri < BASEPRI_RTQUEUES) {
425	if (sched_amp_pcores_preempt_immediate_ipi && (pset == pcore_set)) {
426	return dst_idle ? SCHED_IPI_IDLE : SCHED_IPI_IMMEDIATE;
427	}
428	}
429	break;
430	default:
431	break;
432	}
433	/ Default back to the global policy for all other scenarios /
434	return sched_ipi_policy(dst, thread, dst_idle, event);
435	}
436
437	/*
438	* sched_amp_qos_max_parallelism()
439	*/
440	uint32_t
441	sched_amp_qos_max_parallelism(int qos, uint64_t options)
442	{
443	uint32_t ecount = ecore_set ? ecore_set->cpu_set_count : `0`;
444	uint32_t pcount = pcore_set ? pcore_set->cpu_set_count : `0`;
445
446	/*
447	* The AMP scheduler does not support more than 1 of each type of cluster
448	* but the P-cluster is optional (e.g. watchOS)
449	*/
450	uint32_t ecluster_count = ecount ? `1` : `0`;
451	uint32_t pcluster_count = pcount ? `1` : `0`;
452
453	if (options & QOS_PARALLELISM_REALTIME) {
454	/ For realtime threads on AMP, we would want them*
455	* to limit the width to just the P-cores since we
456	* do not spill/rebalance for RT threads.
457	*/
458	return (options & QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE) ? pcluster_count : pcount;
459	}
460
461	/*
462	* The default AMP scheduler policy is to run utility and by
463	* threads on E-Cores only. Run-time policy adjustment unlocks
464	* ability of utility and bg to threads to be scheduled based on
465	* run-time conditions.
466	*/
467	switch (qos) {
468	case THREAD_QOS_UTILITY:
469	if (os_atomic_load(&sched_perfctl_policy_util, relaxed) == SCHED_PERFCTL_POLICY_DEFAULT) {
470	return (options & QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE) ? ecluster_count : ecount;
471	} else {
472	return (options & QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE) ? (ecluster_count + pcluster_count) : (ecount + pcount);
473	}
474	case THREAD_QOS_BACKGROUND:
475	case THREAD_QOS_MAINTENANCE:
476	if (os_atomic_load(&sched_perfctl_policy_bg, relaxed) == SCHED_PERFCTL_POLICY_DEFAULT) {
477	return (options & QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE) ? ecluster_count : ecount;
478	} else {
479	return (options & QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE) ? (ecluster_count + pcluster_count) : (ecount + pcount);
480	}
481	default:
482	return (options & QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE) ? (ecluster_count + pcluster_count) : (ecount + pcount);
483	}
484	}
485
486	pset_node_t
487	sched_amp_choose_node(thread_t thread)
488	{
489	pset_node_t node = (recommended_pset_type(thread) == PSET_AMP_P) ? pcore_node : ecore_node;
490	return ((node != NULL) && (node->pset_map != `0`)) ? node : &pset_node0;
491	}
492
493	#endif /* __AMP__ */
494

Browse the source code of xnu/osfmk/kern/sched_amp_common.c