1 | /* |
2 | * Copyright (c) 2000-2019 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * @OSF_COPYRIGHT@ |
30 | */ |
31 | /* |
32 | * Mach Operating System |
33 | * Copyright (c) 1991,1990,1989 Carnegie Mellon University |
34 | * All Rights Reserved. |
35 | * |
36 | * Permission to use, copy, modify and distribute this software and its |
37 | * documentation is hereby granted, provided that both the copyright |
38 | * notice and this permission notice appear in all copies of the |
39 | * software, derivative works or modified versions, and any portions |
40 | * thereof, and that both notices appear in supporting documentation. |
41 | * |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
45 | * |
46 | * Carnegie Mellon requests users of this software to return to |
47 | * |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
49 | * School of Computer Science |
50 | * Carnegie Mellon University |
51 | * Pittsburgh PA 15213-3890 |
52 | * |
53 | * any improvements or extensions that they make and grant Carnegie Mellon |
54 | * the rights to redistribute these changes. |
55 | */ |
56 | /* |
57 | */ |
58 | |
59 | /* |
60 | * processor.h: Processor and processor-related definitions. |
61 | */ |
62 | |
63 | #ifndef _KERN_PROCESSOR_H_ |
64 | #define _KERN_PROCESSOR_H_ |
65 | |
66 | #include <mach/boolean.h> |
67 | #include <mach/kern_return.h> |
68 | #include <kern/kern_types.h> |
69 | |
70 | #include <sys/cdefs.h> |
71 | |
72 | #ifdef MACH_KERNEL_PRIVATE |
73 | #include <mach/mach_types.h> |
74 | #include <kern/ast.h> |
75 | #include <kern/cpu_number.h> |
76 | #include <kern/smp.h> |
77 | #include <kern/simple_lock.h> |
78 | #include <kern/locks.h> |
79 | #include <kern/percpu.h> |
80 | #include <kern/queue.h> |
81 | #include <kern/recount.h> |
82 | #include <kern/sched.h> |
83 | #include <kern/sched_urgency.h> |
84 | #include <kern/timer.h> |
85 | #include <mach/sfi_class.h> |
86 | #include <kern/sched_clutch.h> |
87 | #include <kern/timer_call.h> |
88 | #include <kern/assert.h> |
89 | #include <machine/limits.h> |
90 | #endif |
91 | |
92 | __BEGIN_DECLS __ASSUME_PTR_ABI_SINGLE_BEGIN |
93 | |
94 | #ifdef MACH_KERNEL_PRIVATE |
95 | |
96 | /* |
97 | * Processor state is accessed by locking the scheduling lock |
98 | * for the assigned processor set. |
99 | * |
100 | * --- PENDING <------- SHUTDOWN |
101 | * / ^ ^ |
102 | * _/ | \ |
103 | * OFF_LINE ---> START ---> RUNNING ---> IDLE ---> DISPATCHING |
104 | * \_________________^ ^ ^______/ / |
105 | * \__________________/ |
106 | * |
107 | * Most of these state transitions are externally driven as a |
108 | * a directive (for instance telling an IDLE processor to start |
109 | * coming out of the idle state to run a thread). However these |
110 | * are typically paired with a handshake by the processor itself |
111 | * to indicate that it has completed a transition of indeterminate |
112 | * length (for example, the DISPATCHING->RUNNING or START->RUNNING |
113 | * transitions must occur on the processor itself). |
114 | * |
115 | * The boot processor has some special cases, and skips the START state, |
116 | * since it has already bootstrapped and is ready to context switch threads. |
117 | * |
118 | * When a processor is in DISPATCHING or RUNNING state, the current_pri, |
119 | * current_thmode, and deadline fields should be set, so that other |
120 | * processors can evaluate if it is an appropriate candidate for preemption. |
121 | */ |
122 | #if defined(CONFIG_SCHED_DEFERRED_AST) |
123 | /* |
124 | * --- PENDING <------- SHUTDOWN |
125 | * / ^ ^ |
126 | * _/ | \ |
127 | * OFF_LINE ---> START ---> RUNNING ---> IDLE ---> DISPATCHING |
128 | * \_________________^ ^ ^______/ ^_____ / / |
129 | * \__________________/ |
130 | * |
131 | * A DISPATCHING processor may be put back into IDLE, if another |
132 | * processor determines that the target processor will have nothing to do |
133 | * upon reaching the RUNNING state. This is racy, but if the target |
134 | * responds and becomes RUNNING, it will not break the processor state |
135 | * machine. |
136 | * |
137 | * This change allows us to cancel an outstanding signal/AST on a processor |
138 | * (if such an operation is supported through hardware or software), and |
139 | * push the processor back into the IDLE state as a power optimization. |
140 | */ |
141 | #endif |
142 | |
143 | typedef enum { |
144 | PROCESSOR_OFF_LINE = 0, /* Not available */ |
145 | PROCESSOR_SHUTDOWN = 1, /* Going off-line, but schedulable */ |
146 | PROCESSOR_START = 2, /* Being started */ |
147 | PROCESSOR_PENDING_OFFLINE = 3, /* Going off-line, not schedulable */ |
148 | PROCESSOR_IDLE = 4, /* Idle (available) */ |
149 | PROCESSOR_DISPATCHING = 5, /* Dispatching (idle -> active) */ |
150 | PROCESSOR_RUNNING = 6, /* Normal execution */ |
151 | PROCESSOR_STATE_LEN = (PROCESSOR_RUNNING + 1) |
152 | } processor_state_t; |
153 | |
154 | typedef enum { |
155 | PSET_SMP, |
156 | #if __AMP__ |
157 | PSET_AMP_E, |
158 | PSET_AMP_P, |
159 | #endif |
160 | } pset_cluster_type_t; |
161 | |
162 | #if __AMP__ |
163 | |
164 | typedef enum { |
165 | SCHED_PERFCTL_POLICY_DEFAULT, /* static policy: set at boot */ |
166 | SCHED_PERFCTL_POLICY_FOLLOW_GROUP, /* dynamic policy: perfctl_class follows thread group across amp clusters */ |
167 | SCHED_PERFCTL_POLICY_RESTRICT_E, /* dynamic policy: limits perfctl_class to amp e cluster */ |
168 | } sched_perfctl_class_policy_t; |
169 | |
170 | extern _Atomic sched_perfctl_class_policy_t sched_perfctl_policy_util; |
171 | extern _Atomic sched_perfctl_class_policy_t sched_perfctl_policy_bg; |
172 | |
173 | #endif /* __AMP__ */ |
174 | |
175 | typedef bitmap_t cpumap_t; |
176 | |
177 | #if __arm64__ |
178 | |
179 | /* |
180 | * pset_execution_time_t |
181 | * |
182 | * The pset_execution_time_t type is used to maintain the average |
183 | * execution time of threads on a pset. Since the avg. execution time is |
184 | * updated from contexts where the pset lock is not held, it uses a |
185 | * double-wide RMW loop to update these values atomically. |
186 | */ |
187 | typedef union { |
188 | struct { |
189 | uint64_t pset_avg_thread_execution_time; |
190 | uint64_t pset_execution_time_last_update; |
191 | }; |
192 | unsigned __int128 pset_execution_time_packed; |
193 | } pset_execution_time_t; |
194 | |
195 | #endif /* __arm64__ */ |
196 | |
197 | struct processor_set { |
198 | int pset_id; |
199 | int online_processor_count; |
200 | int cpu_set_low, cpu_set_hi; |
201 | int cpu_set_count; |
202 | int last_chosen; |
203 | |
204 | uint64_t load_average; |
205 | uint64_t pset_load_average[TH_BUCKET_SCHED_MAX]; |
206 | uint64_t pset_load_last_update; |
207 | cpumap_t cpu_bitmask; |
208 | cpumap_t recommended_bitmask; |
209 | cpumap_t cpu_state_map[PROCESSOR_STATE_LEN]; |
210 | cpumap_t primary_map; |
211 | cpumap_t realtime_map; |
212 | cpumap_t cpu_available_map; |
213 | |
214 | #define SCHED_PSET_TLOCK (1) |
215 | #if defined(SCHED_PSET_TLOCK) |
216 | /* TODO: reorder struct for temporal cache locality */ |
217 | __attribute__((aligned(128))) lck_ticket_t sched_lock; |
218 | #else /* SCHED_PSET_TLOCK*/ |
219 | __attribute__((aligned(128))) lck_spin_t sched_lock; /* lock for above */ |
220 | #endif /* SCHED_PSET_TLOCK*/ |
221 | |
222 | #if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_MULTIQ) |
223 | struct run_queue pset_runq; /* runq for this processor set */ |
224 | #endif |
225 | struct rt_queue rt_runq; /* realtime runq for this processor set */ |
226 | uint64_t stealable_rt_threads_earliest_deadline; /* if this pset has stealable RT threads, the earliest deadline; else UINT64_MAX */ |
227 | #if CONFIG_SCHED_CLUTCH |
228 | struct sched_clutch_root pset_clutch_root; /* clutch hierarchy root */ |
229 | #endif /* CONFIG_SCHED_CLUTCH */ |
230 | |
231 | #if defined(CONFIG_SCHED_TRADITIONAL) |
232 | int pset_runq_bound_count; |
233 | /* # of threads in runq bound to any processor in pset */ |
234 | #endif |
235 | |
236 | /* CPUs that have been sent an unacknowledged remote AST for scheduling purposes */ |
237 | cpumap_t pending_AST_URGENT_cpu_mask; |
238 | cpumap_t pending_AST_PREEMPT_cpu_mask; |
239 | #if defined(CONFIG_SCHED_DEFERRED_AST) |
240 | /* |
241 | * A separate mask, for ASTs that we may be able to cancel. This is dependent on |
242 | * some level of support for requesting an AST on a processor, and then quashing |
243 | * that request later. |
244 | * |
245 | * The purpose of this field (and the associated codepaths) is to infer when we |
246 | * no longer need a processor that is DISPATCHING to come up, and to prevent it |
247 | * from coming out of IDLE if possible. This should serve to decrease the number |
248 | * of spurious ASTs in the system, and let processors spend longer periods in |
249 | * IDLE. |
250 | */ |
251 | cpumap_t pending_deferred_AST_cpu_mask; |
252 | #endif |
253 | cpumap_t pending_spill_cpu_mask; |
254 | cpumap_t rt_pending_spill_cpu_mask; |
255 | |
256 | struct ipc_port * pset_self; /* port for operations */ |
257 | struct ipc_port * pset_name_self; /* port for information */ |
258 | |
259 | processor_set_t pset_list; /* chain of associated psets */ |
260 | pset_node_t node; |
261 | uint32_t pset_cluster_id; |
262 | |
263 | /* |
264 | * Currently the scheduler uses a mix of pset_cluster_type_t & cluster_type_t |
265 | * for recommendations etc. It might be useful to unify these as a single type. |
266 | */ |
267 | pset_cluster_type_t pset_cluster_type; |
268 | cluster_type_t pset_type; |
269 | |
270 | #if CONFIG_SCHED_EDGE |
271 | cpumap_t cpu_running_foreign; |
272 | cpumap_t cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_COUNT]; |
273 | sched_bucket_t cpu_running_buckets[MAX_CPUS]; |
274 | |
275 | bitmap_t foreign_psets[BITMAP_LEN(MAX_PSETS)]; |
276 | bitmap_t native_psets[BITMAP_LEN(MAX_PSETS)]; |
277 | bitmap_t local_psets[BITMAP_LEN(MAX_PSETS)]; |
278 | bitmap_t remote_psets[BITMAP_LEN(MAX_PSETS)]; |
279 | sched_clutch_edge sched_edges[MAX_PSETS]; |
280 | pset_execution_time_t pset_execution_time[TH_BUCKET_SCHED_MAX]; |
281 | uint64_t pset_cluster_shared_rsrc_load[CLUSTER_SHARED_RSRC_TYPE_COUNT]; |
282 | #endif /* CONFIG_SCHED_EDGE */ |
283 | cpumap_t perfcontrol_cpu_preferred_bitmask; |
284 | cpumap_t perfcontrol_cpu_migration_bitmask; |
285 | int cpu_preferred_last_chosen; |
286 | bool is_SMT; /* pset contains SMT processors */ |
287 | }; |
288 | |
289 | extern struct processor_set pset0; |
290 | |
291 | typedef bitmap_t pset_map_t; |
292 | |
293 | struct pset_node { |
294 | processor_set_t psets; /* list of associated psets */ |
295 | |
296 | pset_node_t nodes; /* list of associated subnodes */ |
297 | pset_node_t node_list; /* chain of associated nodes */ |
298 | |
299 | pset_node_t parent; |
300 | |
301 | pset_cluster_type_t pset_cluster_type; /* Same as the type of all psets in this node */ |
302 | |
303 | pset_map_t pset_map; /* map of associated psets */ |
304 | _Atomic pset_map_t pset_idle_map; /* psets with at least one IDLE CPU */ |
305 | _Atomic pset_map_t pset_idle_primary_map; /* psets with at least one IDLE primary CPU */ |
306 | _Atomic pset_map_t pset_non_rt_map; /* psets with at least one available CPU not running a realtime thread */ |
307 | _Atomic pset_map_t pset_non_rt_primary_map;/* psets with at least one available primary CPU not running a realtime thread */ |
308 | }; |
309 | |
310 | extern struct pset_node pset_node0; |
311 | #if __AMP__ |
312 | extern struct pset_node pset_node1; |
313 | extern pset_node_t ecore_node; |
314 | extern pset_node_t pcore_node; |
315 | #endif |
316 | |
317 | extern queue_head_t tasks, threads, corpse_tasks; |
318 | extern int tasks_count, terminated_tasks_count, threads_count, terminated_threads_count; |
319 | decl_lck_mtx_data(extern, tasks_threads_lock); |
320 | decl_lck_mtx_data(extern, tasks_corpse_lock); |
321 | |
322 | /* |
323 | * The terminated tasks queue should only be inspected elsewhere by stackshot. |
324 | */ |
325 | extern queue_head_t terminated_tasks; |
326 | |
327 | extern queue_head_t terminated_threads; |
328 | |
329 | struct processor { |
330 | processor_state_t state; /* See above */ |
331 | bool is_SMT; |
332 | bool is_recommended; |
333 | bool current_is_NO_SMT; /* cached TH_SFLAG_NO_SMT of current thread */ |
334 | bool current_is_bound; /* current thread is bound to this processor */ |
335 | bool current_is_eagerpreempt;/* current thread is TH_SFLAG_EAGERPREEMPT */ |
336 | bool pending_nonurgent_preemption; /* RUNNING_TIMER_PREEMPT is armed */ |
337 | struct thread *active_thread; /* thread running on processor */ |
338 | struct thread *idle_thread; /* this processor's idle thread. */ |
339 | struct thread *startup_thread; |
340 | |
341 | processor_set_t processor_set; /* assigned set */ |
342 | |
343 | /* |
344 | * XXX All current_* fields should be grouped together, as they're |
345 | * updated at the same time. |
346 | */ |
347 | int current_pri; /* priority of current thread */ |
348 | sfi_class_id_t current_sfi_class; /* SFI class of current thread */ |
349 | perfcontrol_class_t current_perfctl_class; /* Perfcontrol class for current thread */ |
350 | /* |
351 | * The cluster type recommended for the current thread. |
352 | */ |
353 | pset_cluster_type_t current_recommended_pset_type; |
354 | thread_urgency_t current_urgency; /* cached urgency of current thread */ |
355 | |
356 | #if CONFIG_SCHED_TRADITIONAL |
357 | int runq_bound_count; /* # of threads bound to this processor */ |
358 | #endif /* CONFIG_SCHED_TRADITIONAL */ |
359 | |
360 | #if CONFIG_THREAD_GROUPS |
361 | struct thread_group *current_thread_group; /* thread_group of current thread */ |
362 | #endif |
363 | int starting_pri; /* priority of current thread as it was when scheduled */ |
364 | int cpu_id; /* platform numeric id */ |
365 | |
366 | uint64_t quantum_end; /* time when current quantum ends */ |
367 | uint64_t last_dispatch; /* time of last dispatch */ |
368 | |
369 | #if KPERF |
370 | uint64_t kperf_last_sample_time; /* time of last kperf sample */ |
371 | #endif /* KPERF */ |
372 | |
373 | uint64_t deadline; /* for next realtime thread */ |
374 | bool first_timeslice; /* has the quantum expired since context switch */ |
375 | |
376 | bool processor_offlined; /* has the processor been explicitly processor_offline'ed */ |
377 | bool must_idle; /* Needs to be forced idle as next selected thread is allowed on this processor */ |
378 | bool next_idle_short; /* Expecting a response IPI soon, so the next idle period is likely very brief */ |
379 | |
380 | bool running_timers_active; /* whether the running timers should fire */ |
381 | struct timer_call running_timers[RUNNING_TIMER_MAX]; |
382 | |
383 | #if CONFIG_SCHED_TRADITIONAL || CONFIG_SCHED_MULTIQ |
384 | struct run_queue runq; /* runq for this processor */ |
385 | #endif /* CONFIG_SCHED_TRADITIONAL || CONFIG_SCHED_MULTIQ */ |
386 | |
387 | #if CONFIG_SCHED_GRRR |
388 | struct grrr_run_queue grrr_runq; /* Group Ratio Round-Robin runq */ |
389 | #endif /* CONFIG_SCHED_GRRR */ |
390 | |
391 | struct recount_processor pr_recount; |
392 | |
393 | /* |
394 | * Pointer to primary processor for secondary SMT processors, or a |
395 | * pointer to ourselves for primaries or non-SMT. |
396 | */ |
397 | processor_t processor_primary; |
398 | processor_t processor_secondary; |
399 | struct ipc_port *processor_self; /* port for operations */ |
400 | |
401 | processor_t processor_list; /* all existing processors */ |
402 | |
403 | uint64_t timer_call_ttd; /* current timer call time-to-deadline */ |
404 | decl_simple_lock_data(, start_state_lock); |
405 | processor_reason_t last_startup_reason; |
406 | processor_reason_t last_shutdown_reason; |
407 | processor_reason_t last_recommend_reason; |
408 | processor_reason_t last_derecommend_reason; |
409 | bool shutdown_temporary; /* Shutdown should be transparent to user - don't update CPU counts */ |
410 | bool shutdown_locked; /* Processor may not be shutdown (or started up) except by SYSTEM */ |
411 | }; |
412 | |
413 | extern processor_t processor_list; |
414 | decl_simple_lock_data(extern, processor_list_lock); |
415 | |
416 | /* |
417 | * Maximum number of CPUs supported by the scheduler. bits.h bitmap macros |
418 | * need to be used to support greater than 64. |
419 | */ |
420 | #define MAX_SCHED_CPUS 64 |
421 | extern processor_t __single processor_array[MAX_SCHED_CPUS]; /* array indexed by cpuid */ |
422 | extern processor_set_t __single pset_array[MAX_PSETS]; /* array indexed by pset_id */ |
423 | |
424 | extern uint32_t processor_avail_count; |
425 | extern uint32_t processor_avail_count_user; |
426 | extern uint32_t primary_processor_avail_count; |
427 | extern uint32_t primary_processor_avail_count_user; |
428 | |
429 | #define master_processor PERCPU_GET_MASTER(processor) |
430 | PERCPU_DECL(struct processor, processor); |
431 | |
432 | extern processor_t current_processor(void); |
433 | |
434 | /* Lock macros, always acquired and released with interrupts disabled (splsched()) */ |
435 | |
436 | extern lck_grp_t pset_lck_grp; |
437 | |
438 | #if defined(SCHED_PSET_TLOCK) |
439 | #define pset_lock_init(p) lck_ticket_init(&(p)->sched_lock, &pset_lck_grp) |
440 | #define pset_lock(p) lck_ticket_lock(&(p)->sched_lock, &pset_lck_grp) |
441 | #define pset_unlock(p) lck_ticket_unlock(&(p)->sched_lock) |
442 | #define pset_assert_locked(p) lck_ticket_assert_owned(&(p)->sched_lock) |
443 | #else /* SCHED_PSET_TLOCK*/ |
444 | #define pset_lock_init(p) lck_spin_init(&(p)->sched_lock, &pset_lck_grp, NULL) |
445 | #define pset_lock(p) lck_spin_lock_grp(&(p)->sched_lock, &pset_lck_grp) |
446 | #define pset_unlock(p) lck_spin_unlock(&(p)->sched_lock) |
447 | #define pset_assert_locked(p) LCK_SPIN_ASSERT(&(p)->sched_lock, LCK_ASSERT_OWNED) |
448 | #endif /*!SCHED_PSET_TLOCK*/ |
449 | |
450 | extern lck_spin_t pset_node_lock; |
451 | |
452 | extern void processor_bootstrap(void); |
453 | |
454 | extern void processor_init( |
455 | processor_t processor, |
456 | int cpu_id, |
457 | processor_set_t processor_set); |
458 | |
459 | extern void processor_set_primary( |
460 | processor_t processor, |
461 | processor_t primary); |
462 | |
463 | extern kern_return_t processor_shutdown( |
464 | processor_t processor, |
465 | processor_reason_t reason, |
466 | uint32_t flags); |
467 | |
468 | extern void processor_wait_for_start( |
469 | processor_t processor); |
470 | |
471 | extern kern_return_t processor_start_from_user( |
472 | processor_t processor); |
473 | extern kern_return_t processor_exit_from_user( |
474 | processor_t processor); |
475 | |
476 | extern kern_return_t processor_start_reason( |
477 | processor_t processor, |
478 | processor_reason_t reason, |
479 | uint32_t flags); |
480 | extern kern_return_t processor_exit_reason( |
481 | processor_t processor, |
482 | processor_reason_t reason, |
483 | uint32_t flags); |
484 | |
485 | |
486 | extern kern_return_t sched_processor_enable( |
487 | processor_t processor, |
488 | boolean_t enable); |
489 | |
490 | extern void processor_queue_shutdown( |
491 | processor_t processor); |
492 | |
493 | extern void processor_queue_shutdown( |
494 | processor_t processor); |
495 | |
496 | extern processor_set_t processor_pset( |
497 | processor_t processor); |
498 | |
499 | extern pset_node_t pset_node_root(void); |
500 | |
501 | extern processor_set_t pset_create( |
502 | pset_node_t node, |
503 | pset_cluster_type_t pset_type, |
504 | uint32_t pset_cluster_id, |
505 | int pset_id); |
506 | |
507 | extern void pset_init( |
508 | processor_set_t pset, |
509 | pset_node_t node); |
510 | |
511 | extern processor_set_t pset_find( |
512 | uint32_t cluster_id, |
513 | processor_set_t default_pset); |
514 | |
515 | extern kern_return_t processor_info_count( |
516 | processor_flavor_t flavor, |
517 | mach_msg_type_number_t *count); |
518 | |
519 | extern void processor_cpu_load_info( |
520 | processor_t processor, |
521 | natural_t ticks[static CPU_STATE_MAX]); |
522 | |
523 | extern void machine_run_count( |
524 | uint32_t count); |
525 | |
526 | extern processor_t machine_choose_processor( |
527 | processor_set_t pset, |
528 | processor_t processor); |
529 | |
530 | inline static processor_set_t |
531 | next_pset(processor_set_t pset) |
532 | { |
533 | pset_map_t map = pset->node->pset_map; |
534 | |
535 | int pset_id = lsb_next(map, pset->pset_id); |
536 | if (pset_id == -1) { |
537 | pset_id = lsb_first(map); |
538 | } |
539 | |
540 | return pset_array[pset_id]; |
541 | } |
542 | |
543 | #define PSET_THING_TASK 0 |
544 | #define PSET_THING_THREAD 1 |
545 | |
546 | extern pset_cluster_type_t recommended_pset_type( |
547 | thread_t thread); |
548 | |
549 | extern void processor_state_update_idle( |
550 | processor_t processor); |
551 | |
552 | extern void processor_state_update_from_thread( |
553 | processor_t processor, |
554 | thread_t thread, |
555 | boolean_t pset_lock_held); |
556 | |
557 | extern void processor_state_update_explicit( |
558 | processor_t processor, |
559 | int pri, |
560 | sfi_class_id_t sfi_class, |
561 | pset_cluster_type_t pset_type, |
562 | perfcontrol_class_t perfctl_class, |
563 | thread_urgency_t urgency, |
564 | sched_bucket_t bucket); |
565 | |
566 | #define PSET_LOAD_NUMERATOR_SHIFT 16 |
567 | #define PSET_LOAD_FRACTIONAL_SHIFT 4 |
568 | |
569 | #if CONFIG_SCHED_EDGE |
570 | |
571 | extern cluster_type_t pset_type_for_id(uint32_t cluster_id); |
572 | extern uint64_t sched_pset_cluster_shared_rsrc_load(processor_set_t pset, cluster_shared_rsrc_type_t shared_rsrc_type); |
573 | |
574 | /* |
575 | * The Edge scheduler uses average scheduling latency as the metric for making |
576 | * thread migration decisions. One component of avg scheduling latency is the load |
577 | * average on the cluster. |
578 | * |
579 | * Load Average Fixed Point Arithmetic |
580 | * |
581 | * The load average is maintained as a 24.8 fixed point arithmetic value for precision. |
582 | * When multiplied by the average execution time, it needs to be rounded up (based on |
583 | * the most significant bit of the fractional part) for better accuracy. After rounding |
584 | * up, the whole number part of the value is used as the actual load value for |
585 | * migrate/steal decisions. |
586 | */ |
587 | #define SCHED_PSET_LOAD_EWMA_FRACTION_BITS 8 |
588 | #define SCHED_PSET_LOAD_EWMA_ROUND_BIT (1 << (SCHED_PSET_LOAD_EWMA_FRACTION_BITS - 1)) |
589 | #define SCHED_PSET_LOAD_EWMA_FRACTION_MASK ((1 << SCHED_PSET_LOAD_EWMA_FRACTION_BITS) - 1) |
590 | |
591 | inline static int |
592 | sched_get_pset_load_average(processor_set_t pset, sched_bucket_t sched_bucket) |
593 | { |
594 | uint64_t load_average = os_atomic_load(&pset->pset_load_average[sched_bucket], relaxed); |
595 | return (int)(((load_average + SCHED_PSET_LOAD_EWMA_ROUND_BIT) >> SCHED_PSET_LOAD_EWMA_FRACTION_BITS) * |
596 | pset->pset_execution_time[sched_bucket].pset_avg_thread_execution_time); |
597 | } |
598 | |
599 | #else /* CONFIG_SCHED_EDGE */ |
600 | inline static int |
601 | sched_get_pset_load_average(processor_set_t pset, __unused sched_bucket_t sched_bucket) |
602 | { |
603 | return (int)pset->load_average >> (PSET_LOAD_NUMERATOR_SHIFT - PSET_LOAD_FRACTIONAL_SHIFT); |
604 | } |
605 | #endif /* CONFIG_SCHED_EDGE */ |
606 | |
607 | extern void sched_update_pset_load_average(processor_set_t pset, uint64_t curtime); |
608 | extern void sched_update_pset_avg_execution_time(processor_set_t pset, uint64_t delta, uint64_t curtime, sched_bucket_t sched_bucket); |
609 | |
610 | inline static void |
611 | pset_update_processor_state(processor_set_t pset, processor_t processor, uint new_state) |
612 | { |
613 | pset_assert_locked(pset); |
614 | |
615 | uint old_state = processor->state; |
616 | uint cpuid = (uint)processor->cpu_id; |
617 | |
618 | assert(processor->processor_set == pset); |
619 | assert(bit_test(pset->cpu_bitmask, cpuid)); |
620 | |
621 | assert(old_state < PROCESSOR_STATE_LEN); |
622 | assert(new_state < PROCESSOR_STATE_LEN); |
623 | |
624 | processor->state = new_state; |
625 | |
626 | bit_clear(pset->cpu_state_map[old_state], cpuid); |
627 | bit_set(pset->cpu_state_map[new_state], cpuid); |
628 | |
629 | if (bit_test(pset->cpu_available_map, cpuid) && (new_state < PROCESSOR_IDLE)) { |
630 | /* No longer available for scheduling */ |
631 | bit_clear(pset->cpu_available_map, cpuid); |
632 | } else if (!bit_test(pset->cpu_available_map, cpuid) && (new_state >= PROCESSOR_IDLE)) { |
633 | /* Newly available for scheduling */ |
634 | bit_set(pset->cpu_available_map, cpuid); |
635 | } |
636 | |
637 | if ((old_state == PROCESSOR_RUNNING) || (new_state == PROCESSOR_RUNNING)) { |
638 | sched_update_pset_load_average(pset, 0); |
639 | if (new_state == PROCESSOR_RUNNING) { |
640 | assert(processor == current_processor()); |
641 | } |
642 | } |
643 | if ((old_state == PROCESSOR_IDLE) || (new_state == PROCESSOR_IDLE)) { |
644 | if (new_state == PROCESSOR_IDLE) { |
645 | bit_clear(pset->realtime_map, cpuid); |
646 | } |
647 | |
648 | pset_node_t node = pset->node; |
649 | |
650 | if (bit_count(node->pset_map) == 1) { |
651 | /* Node has only a single pset, so skip node pset map updates */ |
652 | return; |
653 | } |
654 | |
655 | if (new_state == PROCESSOR_IDLE) { |
656 | if (processor->processor_primary == processor) { |
657 | if (!bit_test(atomic_load(&node->pset_non_rt_primary_map), pset->pset_id)) { |
658 | atomic_bit_set(&node->pset_non_rt_primary_map, pset->pset_id, memory_order_relaxed); |
659 | } |
660 | if (!bit_test(atomic_load(&node->pset_idle_primary_map), pset->pset_id)) { |
661 | atomic_bit_set(&node->pset_idle_primary_map, pset->pset_id, memory_order_relaxed); |
662 | } |
663 | } |
664 | if (!bit_test(atomic_load(&node->pset_non_rt_map), pset->pset_id)) { |
665 | atomic_bit_set(&node->pset_non_rt_map, pset->pset_id, memory_order_relaxed); |
666 | } |
667 | if (!bit_test(atomic_load(&node->pset_idle_map), pset->pset_id)) { |
668 | atomic_bit_set(&node->pset_idle_map, pset->pset_id, memory_order_relaxed); |
669 | } |
670 | } else { |
671 | cpumap_t idle_map = pset->cpu_state_map[PROCESSOR_IDLE]; |
672 | if (idle_map == 0) { |
673 | /* No more IDLE CPUs */ |
674 | if (bit_test(atomic_load(&node->pset_idle_map), pset->pset_id)) { |
675 | atomic_bit_clear(&node->pset_idle_map, pset->pset_id, memory_order_relaxed); |
676 | } |
677 | } |
678 | if (processor->processor_primary == processor) { |
679 | idle_map &= pset->primary_map; |
680 | if (idle_map == 0) { |
681 | /* No more IDLE primary CPUs */ |
682 | if (bit_test(atomic_load(&node->pset_idle_primary_map), pset->pset_id)) { |
683 | atomic_bit_clear(&node->pset_idle_primary_map, pset->pset_id, memory_order_relaxed); |
684 | } |
685 | } |
686 | } |
687 | } |
688 | } |
689 | } |
690 | |
691 | decl_simple_lock_data(extern, sched_available_cores_lock); |
692 | |
693 | #endif /* MACH_KERNEL_PRIVATE */ |
694 | #ifdef KERNEL_PRIVATE |
695 | |
696 | extern unsigned int processor_count; |
697 | extern processor_t cpu_to_processor(int cpu); |
698 | |
699 | extern kern_return_t enable_smt_processors(bool enable); |
700 | |
701 | /* |
702 | * Update the scheduler with the set of cores that should be used to dispatch new threads. |
703 | * Non-recommended cores can still be used to field interrupts or run bound threads. |
704 | * This should be called with interrupts enabled and no scheduler locks held. |
705 | */ |
706 | #define ALL_CORES_RECOMMENDED (~(uint64_t)0) |
707 | #define ALL_CORES_POWERED (~(uint64_t)0) |
708 | |
709 | extern void sched_perfcontrol_update_recommended_cores(uint32_t recommended_cores); |
710 | extern void sched_perfcontrol_update_recommended_cores_reason(uint64_t recommended_cores, processor_reason_t reason, uint32_t flags); |
711 | extern void sched_perfcontrol_update_powered_cores(uint64_t powered_cores, processor_reason_t reason, uint32_t flags); |
712 | extern void sched_override_available_cores_for_sleep(void); |
713 | extern void sched_restore_available_cores_after_sleep(void); |
714 | extern bool sched_is_in_sleep(void); |
715 | extern void sched_mark_processor_online_locked(processor_t processor, processor_reason_t reason); |
716 | extern kern_return_t sched_mark_processor_offline(processor_t processor, processor_reason_t reason); |
717 | extern bool processor_should_kprintf(processor_t processor, bool starting); |
718 | extern void suspend_cluster_powerdown(void); |
719 | extern void resume_cluster_powerdown(void); |
720 | extern kern_return_t suspend_cluster_powerdown_from_user(void); |
721 | extern kern_return_t resume_cluster_powerdown_from_user(void); |
722 | extern int get_cluster_powerdown_user_suspended(void); |
723 | |
724 | #endif /* KERNEL_PRIVATE */ |
725 | |
726 | __ASSUME_PTR_ABI_SINGLE_END __END_DECLS |
727 | |
728 | #endif /* _KERN_PROCESSOR_H_ */ |
729 | |