1 | /* |
2 | * Copyright (c) 2019 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <mach/mach_types.h> |
30 | #include <mach/machine.h> |
31 | #include <machine/machine_routines.h> |
32 | #include <machine/sched_param.h> |
33 | #include <machine/machine_cpu.h> |
34 | #include <kern/kern_types.h> |
35 | #include <kern/debug.h> |
36 | #include <kern/machine.h> |
37 | #include <kern/misc_protos.h> |
38 | #include <kern/processor.h> |
39 | #include <kern/queue.h> |
40 | #include <kern/sched.h> |
41 | #include <kern/sched_prim.h> |
42 | #include <kern/task.h> |
43 | #include <kern/thread.h> |
44 | #include <machine/atomic.h> |
45 | #include <sys/kdebug.h> |
46 | #include <kern/sched_amp_common.h> |
47 | #include <stdatomic.h> |
48 | |
49 | #if __AMP__ |
50 | |
51 | /* Exported globals */ |
52 | processor_set_t ecore_set = NULL; |
53 | processor_set_t pcore_set = NULL; |
54 | |
55 | /* |
56 | * sched_amp_init() |
57 | * |
58 | * Initialize the pcore_set and ecore_set globals which describe the |
59 | * P/E processor sets. |
60 | */ |
61 | void |
62 | sched_amp_init(void) |
63 | { |
64 | sched_timeshare_init(); |
65 | } |
66 | |
67 | /* Spill threshold load average is ncpus in pset + (sched_amp_spill_count/(1 << PSET_LOAD_FRACTIONAL_SHIFT) */ |
68 | int sched_amp_spill_count = 3; |
69 | int sched_amp_idle_steal = 1; |
70 | int sched_amp_spill_steal = 1; |
71 | |
72 | /* |
73 | * We see performance gains from doing immediate IPIs to P-cores to run |
74 | * P-eligible threads and lesser P-E migrations from using deferred IPIs |
75 | * for spill. |
76 | */ |
77 | int sched_amp_spill_deferred_ipi = 1; |
78 | int sched_amp_pcores_preempt_immediate_ipi = 1; |
79 | |
80 | /* |
81 | * sched_perfcontrol_inherit_recommendation_from_tg changes amp |
82 | * scheduling policy away from default and allows policy to be |
83 | * modified at run-time. |
84 | * |
85 | * once modified from default, the policy toggles between "follow |
86 | * thread group" and "restrict to e". |
87 | */ |
88 | |
89 | _Atomic sched_perfctl_class_policy_t sched_perfctl_policy_util = SCHED_PERFCTL_POLICY_DEFAULT; |
90 | _Atomic sched_perfctl_class_policy_t sched_perfctl_policy_bg = SCHED_PERFCTL_POLICY_DEFAULT; |
91 | |
92 | /* |
93 | * sched_amp_spill_threshold() |
94 | * |
95 | * Routine to calulate spill threshold which decides if cluster should spill. |
96 | */ |
97 | int |
98 | sched_amp_spill_threshold(processor_set_t pset) |
99 | { |
100 | int recommended_processor_count = bit_count(pset->recommended_bitmask & pset->cpu_bitmask); |
101 | |
102 | return (recommended_processor_count << PSET_LOAD_FRACTIONAL_SHIFT) + sched_amp_spill_count; |
103 | } |
104 | |
105 | /* |
106 | * pset_signal_spill() |
107 | * |
108 | * Routine to signal a running/idle CPU to cause a spill onto that CPU. |
109 | * Called with pset locked, returns unlocked |
110 | */ |
111 | void |
112 | pset_signal_spill(processor_set_t pset, int spilled_thread_priority) |
113 | { |
114 | processor_t processor; |
115 | sched_ipi_type_t ipi_type = SCHED_IPI_NONE; |
116 | |
117 | uint64_t idle_map = pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE]; |
118 | for (int cpuid = lsb_first(idle_map); cpuid >= 0; cpuid = lsb_next(idle_map, cpuid)) { |
119 | processor = processor_array[cpuid]; |
120 | if (bit_set_if_clear(pset->pending_spill_cpu_mask, processor->cpu_id)) { |
121 | KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_SIGNAL_SPILL) | DBG_FUNC_NONE, processor->cpu_id, 0, 0, 0); |
122 | |
123 | processor->deadline = UINT64_MAX; |
124 | |
125 | if (processor == current_processor()) { |
126 | pset_update_processor_state(pset, processor, PROCESSOR_DISPATCHING); |
127 | if (bit_set_if_clear(pset->pending_AST_URGENT_cpu_mask, processor->cpu_id)) { |
128 | KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PENDING_AST_URGENT) | DBG_FUNC_START, |
129 | processor->cpu_id, pset->pending_AST_URGENT_cpu_mask, 0, 6); |
130 | } |
131 | } else { |
132 | ipi_type = sched_ipi_action(processor, NULL, SCHED_IPI_EVENT_SPILL); |
133 | } |
134 | pset_unlock(pset); |
135 | sched_ipi_perform(processor, ipi_type); |
136 | return; |
137 | } |
138 | } |
139 | |
140 | processor_t ast_processor = NULL; |
141 | ast_t preempt = AST_NONE; |
142 | uint64_t running_map = pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_RUNNING]; |
143 | for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) { |
144 | processor = processor_array[cpuid]; |
145 | if (processor->current_recommended_pset_type == PSET_AMP_P) { |
146 | /* Already running a spilled P-core recommended thread */ |
147 | continue; |
148 | } |
149 | if (bit_test(pset->pending_spill_cpu_mask, processor->cpu_id)) { |
150 | /* Already received a spill signal */ |
151 | continue; |
152 | } |
153 | if (processor->current_pri >= spilled_thread_priority) { |
154 | /* Already running a higher or equal priority thread */ |
155 | continue; |
156 | } |
157 | |
158 | /* Found a suitable processor */ |
159 | bit_set(pset->pending_spill_cpu_mask, processor->cpu_id); |
160 | KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_SIGNAL_SPILL) | DBG_FUNC_NONE, processor->cpu_id, 1, 0, 0); |
161 | if (processor == current_processor()) { |
162 | preempt = AST_PREEMPT; |
163 | } |
164 | ipi_type = sched_ipi_action(processor, NULL, SCHED_IPI_EVENT_SPILL); |
165 | if (ipi_type != SCHED_IPI_NONE) { |
166 | ast_processor = processor; |
167 | } |
168 | break; |
169 | } |
170 | |
171 | pset_unlock(pset); |
172 | sched_ipi_perform(ast_processor, ipi_type); |
173 | |
174 | if (preempt != AST_NONE) { |
175 | ast_t new_preempt = update_pending_nonurgent_preemption(processor, preempt); |
176 | ast_on(new_preempt); |
177 | } |
178 | } |
179 | |
180 | /* |
181 | * pset_should_accept_spilled_thread() |
182 | * |
183 | * Routine to decide if pset should accept spilled threads. |
184 | * This function must be safe to call (to use as a hint) without holding the pset lock. |
185 | */ |
186 | bool |
187 | pset_should_accept_spilled_thread(processor_set_t pset, int spilled_thread_priority) |
188 | { |
189 | if (!pset) { |
190 | return false; |
191 | } |
192 | |
193 | if ((pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_IDLE]) != 0) { |
194 | return true; |
195 | } |
196 | |
197 | uint64_t cpu_map = (pset->recommended_bitmask & pset->cpu_state_map[PROCESSOR_RUNNING]); |
198 | |
199 | for (int cpuid = lsb_first(cpu_map); cpuid >= 0; cpuid = lsb_next(cpu_map, cpuid)) { |
200 | processor_t processor = processor_array[cpuid]; |
201 | |
202 | if (processor->current_recommended_pset_type == PSET_AMP_P) { |
203 | /* This processor is already running a spilled thread */ |
204 | continue; |
205 | } |
206 | |
207 | if (processor->current_pri < spilled_thread_priority) { |
208 | return true; |
209 | } |
210 | } |
211 | |
212 | return false; |
213 | } |
214 | |
215 | /* |
216 | * should_spill_to_ecores() |
217 | * |
218 | * Spill policy is implemented here |
219 | */ |
220 | bool |
221 | should_spill_to_ecores(processor_set_t nset, thread_t thread) |
222 | { |
223 | if (nset->pset_cluster_type == PSET_AMP_E) { |
224 | /* Not relevant if ecores already preferred */ |
225 | return false; |
226 | } |
227 | |
228 | if (!pset_is_recommended(ecore_set)) { |
229 | /* E cores must be recommended */ |
230 | return false; |
231 | } |
232 | |
233 | if (thread->th_bound_cluster_id == pcore_set->pset_id) { |
234 | /* Thread bound to the P-cluster */ |
235 | return false; |
236 | } |
237 | |
238 | if (thread->sched_pri >= BASEPRI_RTQUEUES) { |
239 | /* Never spill realtime threads */ |
240 | return false; |
241 | } |
242 | |
243 | if ((nset->recommended_bitmask & nset->cpu_state_map[PROCESSOR_IDLE]) != 0) { |
244 | /* Don't spill if idle cores */ |
245 | return false; |
246 | } |
247 | |
248 | if ((sched_get_pset_load_average(nset, 0) >= sched_amp_spill_threshold(nset)) && /* There is already a load on P cores */ |
249 | pset_should_accept_spilled_thread(ecore_set, thread->sched_pri)) { /* There are lower priority E cores */ |
250 | return true; |
251 | } |
252 | |
253 | return false; |
254 | } |
255 | |
256 | /* |
257 | * sched_amp_check_spill() |
258 | * |
259 | * Routine to check if the thread should be spilled and signal the pset if needed. |
260 | */ |
261 | void |
262 | sched_amp_check_spill(processor_set_t pset, thread_t thread) |
263 | { |
264 | /* pset is unlocked */ |
265 | |
266 | /* Bound threads don't call this function */ |
267 | assert(thread->bound_processor == PROCESSOR_NULL); |
268 | |
269 | if (should_spill_to_ecores(pset, thread)) { |
270 | pset_lock(ecore_set); |
271 | |
272 | pset_signal_spill(ecore_set, thread->sched_pri); |
273 | /* returns with ecore_set unlocked */ |
274 | } |
275 | } |
276 | |
277 | /* |
278 | * sched_amp_steal_threshold() |
279 | * |
280 | * Routine to calculate the steal threshold |
281 | */ |
282 | int |
283 | sched_amp_steal_threshold(processor_set_t pset, bool spill_pending) |
284 | { |
285 | int recommended_processor_count = bit_count(pset->recommended_bitmask & pset->cpu_bitmask); |
286 | |
287 | return (recommended_processor_count << PSET_LOAD_FRACTIONAL_SHIFT) + (spill_pending ? sched_amp_spill_steal : sched_amp_idle_steal); |
288 | } |
289 | |
290 | /* |
291 | * sched_amp_steal_thread_enabled() |
292 | * |
293 | */ |
294 | bool |
295 | sched_amp_steal_thread_enabled(processor_set_t pset) |
296 | { |
297 | return (pset->pset_cluster_type == PSET_AMP_E) && (pcore_set != NULL) && (pcore_set->online_processor_count > 0); |
298 | } |
299 | |
300 | /* |
301 | * sched_amp_balance() |
302 | * |
303 | * Invoked with pset locked, returns with pset unlocked |
304 | */ |
305 | bool |
306 | sched_amp_balance(processor_t cprocessor, processor_set_t cpset) |
307 | { |
308 | assert(cprocessor == current_processor()); |
309 | |
310 | pset_unlock(cpset); |
311 | |
312 | if (!ecore_set || cpset->pset_cluster_type == PSET_AMP_E || !cprocessor->is_recommended) { |
313 | return false; |
314 | } |
315 | |
316 | /* |
317 | * cprocessor is an idle, recommended P core processor. |
318 | * Look for P-eligible threads that have spilled to an E core |
319 | * and coax them to come back. |
320 | */ |
321 | processor_set_t pset = ecore_set; |
322 | |
323 | pset_lock(pset); |
324 | |
325 | processor_t eprocessor; |
326 | uint64_t ast_processor_map = 0; |
327 | |
328 | sched_ipi_type_t ipi_type[MAX_CPUS] = {SCHED_IPI_NONE}; |
329 | uint64_t running_map = pset->cpu_state_map[PROCESSOR_RUNNING]; |
330 | for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) { |
331 | eprocessor = processor_array[cpuid]; |
332 | if ((eprocessor->current_pri < BASEPRI_RTQUEUES) && |
333 | (eprocessor->current_recommended_pset_type == PSET_AMP_P)) { |
334 | ipi_type[eprocessor->cpu_id] = sched_ipi_action(eprocessor, NULL, SCHED_IPI_EVENT_REBALANCE); |
335 | if (ipi_type[eprocessor->cpu_id] != SCHED_IPI_NONE) { |
336 | bit_set(ast_processor_map, eprocessor->cpu_id); |
337 | assert(eprocessor != cprocessor); |
338 | } |
339 | } |
340 | } |
341 | |
342 | pset_unlock(pset); |
343 | |
344 | for (int cpuid = lsb_first(ast_processor_map); cpuid >= 0; cpuid = lsb_next(ast_processor_map, cpuid)) { |
345 | processor_t ast_processor = processor_array[cpuid]; |
346 | sched_ipi_perform(ast_processor, ipi_type[cpuid]); |
347 | } |
348 | |
349 | /* Core should light-weight idle using WFE if it just sent out rebalance IPIs */ |
350 | return ast_processor_map != 0; |
351 | } |
352 | |
353 | /* |
354 | * Helper function for sched_amp_thread_group_recommendation_change() |
355 | * Find all the cores in the pset running threads from the thread_group tg |
356 | * and send them a rebalance interrupt. |
357 | */ |
358 | void |
359 | sched_amp_bounce_thread_group_from_ecores(processor_set_t pset, struct thread_group *tg) |
360 | { |
361 | if (!pset) { |
362 | return; |
363 | } |
364 | |
365 | assert(pset->pset_cluster_type == PSET_AMP_E); |
366 | uint64_t ast_processor_map = 0; |
367 | sched_ipi_type_t ipi_type[MAX_CPUS] = {SCHED_IPI_NONE}; |
368 | |
369 | spl_t s = splsched(); |
370 | pset_lock(pset); |
371 | |
372 | uint64_t running_map = pset->cpu_state_map[PROCESSOR_RUNNING]; |
373 | for (int cpuid = lsb_first(running_map); cpuid >= 0; cpuid = lsb_next(running_map, cpuid)) { |
374 | processor_t eprocessor = processor_array[cpuid]; |
375 | if (eprocessor->current_thread_group == tg) { |
376 | ipi_type[eprocessor->cpu_id] = sched_ipi_action(eprocessor, NULL, SCHED_IPI_EVENT_REBALANCE); |
377 | if (ipi_type[eprocessor->cpu_id] != SCHED_IPI_NONE) { |
378 | bit_set(ast_processor_map, eprocessor->cpu_id); |
379 | } else if (eprocessor == current_processor()) { |
380 | ast_on(AST_PREEMPT); |
381 | bit_set(pset->pending_AST_PREEMPT_cpu_mask, eprocessor->cpu_id); |
382 | } |
383 | } |
384 | } |
385 | |
386 | KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_AMP_RECOMMENDATION_CHANGE) | DBG_FUNC_NONE, tg, ast_processor_map, 0, 0); |
387 | |
388 | pset_unlock(pset); |
389 | |
390 | for (int cpuid = lsb_first(ast_processor_map); cpuid >= 0; cpuid = lsb_next(ast_processor_map, cpuid)) { |
391 | processor_t ast_processor = processor_array[cpuid]; |
392 | sched_ipi_perform(ast_processor, ipi_type[cpuid]); |
393 | } |
394 | |
395 | splx(s); |
396 | } |
397 | |
398 | /* |
399 | * sched_amp_ipi_policy() |
400 | */ |
401 | sched_ipi_type_t |
402 | sched_amp_ipi_policy(processor_t dst, thread_t thread, boolean_t dst_idle, sched_ipi_event_t event) |
403 | { |
404 | processor_set_t pset = dst->processor_set; |
405 | assert(dst != current_processor()); |
406 | |
407 | boolean_t deferred_ipi_supported = false; |
408 | #if defined(CONFIG_SCHED_DEFERRED_AST) |
409 | deferred_ipi_supported = true; |
410 | #endif /* CONFIG_SCHED_DEFERRED_AST */ |
411 | |
412 | switch (event) { |
413 | case SCHED_IPI_EVENT_SPILL: |
414 | /* For Spill event, use deferred IPIs if sched_amp_spill_deferred_ipi set */ |
415 | if (deferred_ipi_supported && sched_amp_spill_deferred_ipi) { |
416 | return sched_ipi_deferred_policy(pset, dst, thread, event); |
417 | } |
418 | break; |
419 | case SCHED_IPI_EVENT_PREEMPT: |
420 | /* For preemption, the default policy is to use deferred IPIs |
421 | * for Non-RT P-core preemption. Override that behavior if |
422 | * sched_amp_pcores_preempt_immediate_ipi is set |
423 | */ |
424 | if (thread && thread->sched_pri < BASEPRI_RTQUEUES) { |
425 | if (sched_amp_pcores_preempt_immediate_ipi && (pset == pcore_set)) { |
426 | return dst_idle ? SCHED_IPI_IDLE : SCHED_IPI_IMMEDIATE; |
427 | } |
428 | } |
429 | break; |
430 | default: |
431 | break; |
432 | } |
433 | /* Default back to the global policy for all other scenarios */ |
434 | return sched_ipi_policy(dst, thread, dst_idle, event); |
435 | } |
436 | |
437 | /* |
438 | * sched_amp_qos_max_parallelism() |
439 | */ |
440 | uint32_t |
441 | sched_amp_qos_max_parallelism(int qos, uint64_t options) |
442 | { |
443 | uint32_t ecount = ecore_set ? ecore_set->cpu_set_count : 0; |
444 | uint32_t pcount = pcore_set ? pcore_set->cpu_set_count : 0; |
445 | |
446 | /* |
447 | * The AMP scheduler does not support more than 1 of each type of cluster |
448 | * but the P-cluster is optional (e.g. watchOS) |
449 | */ |
450 | uint32_t ecluster_count = ecount ? 1 : 0; |
451 | uint32_t pcluster_count = pcount ? 1 : 0; |
452 | |
453 | if (options & QOS_PARALLELISM_REALTIME) { |
454 | /* For realtime threads on AMP, we would want them |
455 | * to limit the width to just the P-cores since we |
456 | * do not spill/rebalance for RT threads. |
457 | */ |
458 | return (options & QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE) ? pcluster_count : pcount; |
459 | } |
460 | |
461 | /* |
462 | * The default AMP scheduler policy is to run utility and by |
463 | * threads on E-Cores only. Run-time policy adjustment unlocks |
464 | * ability of utility and bg to threads to be scheduled based on |
465 | * run-time conditions. |
466 | */ |
467 | switch (qos) { |
468 | case THREAD_QOS_UTILITY: |
469 | if (os_atomic_load(&sched_perfctl_policy_util, relaxed) == SCHED_PERFCTL_POLICY_DEFAULT) { |
470 | return (options & QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE) ? ecluster_count : ecount; |
471 | } else { |
472 | return (options & QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE) ? (ecluster_count + pcluster_count) : (ecount + pcount); |
473 | } |
474 | case THREAD_QOS_BACKGROUND: |
475 | case THREAD_QOS_MAINTENANCE: |
476 | if (os_atomic_load(&sched_perfctl_policy_bg, relaxed) == SCHED_PERFCTL_POLICY_DEFAULT) { |
477 | return (options & QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE) ? ecluster_count : ecount; |
478 | } else { |
479 | return (options & QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE) ? (ecluster_count + pcluster_count) : (ecount + pcount); |
480 | } |
481 | default: |
482 | return (options & QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE) ? (ecluster_count + pcluster_count) : (ecount + pcount); |
483 | } |
484 | } |
485 | |
486 | pset_node_t |
487 | sched_amp_choose_node(thread_t thread) |
488 | { |
489 | pset_node_t node = (recommended_pset_type(thread) == PSET_AMP_P) ? pcore_node : ecore_node; |
490 | return ((node != NULL) && (node->pset_map != 0)) ? node : &pset_node0; |
491 | } |
492 | |
493 | #endif /* __AMP__ */ |
494 | |