| 1 | /* |
| 2 | * Copyright (c) 2000-2019 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | /* |
| 29 | * @OSF_COPYRIGHT@ |
| 30 | */ |
| 31 | /* |
| 32 | * Mach Operating System |
| 33 | * Copyright (c) 1991,1990,1989 Carnegie Mellon University |
| 34 | * All Rights Reserved. |
| 35 | * |
| 36 | * Permission to use, copy, modify and distribute this software and its |
| 37 | * documentation is hereby granted, provided that both the copyright |
| 38 | * notice and this permission notice appear in all copies of the |
| 39 | * software, derivative works or modified versions, and any portions |
| 40 | * thereof, and that both notices appear in supporting documentation. |
| 41 | * |
| 42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
| 43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR |
| 44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
| 45 | * |
| 46 | * Carnegie Mellon requests users of this software to return to |
| 47 | * |
| 48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
| 49 | * School of Computer Science |
| 50 | * Carnegie Mellon University |
| 51 | * Pittsburgh PA 15213-3890 |
| 52 | * |
| 53 | * any improvements or extensions that they make and grant Carnegie Mellon |
| 54 | * the rights to redistribute these changes. |
| 55 | */ |
| 56 | /* |
| 57 | */ |
| 58 | |
| 59 | /* |
| 60 | * processor.h: Processor and processor-related definitions. |
| 61 | */ |
| 62 | |
| 63 | #ifndef _KERN_PROCESSOR_H_ |
| 64 | #define _KERN_PROCESSOR_H_ |
| 65 | |
| 66 | #include <mach/boolean.h> |
| 67 | #include <mach/kern_return.h> |
| 68 | #include <kern/kern_types.h> |
| 69 | |
| 70 | #include <sys/cdefs.h> |
| 71 | |
| 72 | #ifdef MACH_KERNEL_PRIVATE |
| 73 | #include <mach/mach_types.h> |
| 74 | #include <kern/ast.h> |
| 75 | #include <kern/cpu_number.h> |
| 76 | #include <kern/smp.h> |
| 77 | #include <kern/simple_lock.h> |
| 78 | #include <kern/locks.h> |
| 79 | #include <kern/percpu.h> |
| 80 | #include <kern/queue.h> |
| 81 | #include <kern/recount.h> |
| 82 | #include <kern/sched.h> |
| 83 | #include <kern/sched_urgency.h> |
| 84 | #include <kern/timer.h> |
| 85 | #include <mach/sfi_class.h> |
| 86 | #include <kern/sched_clutch.h> |
| 87 | #include <kern/timer_call.h> |
| 88 | #include <kern/assert.h> |
| 89 | #include <machine/limits.h> |
| 90 | #endif |
| 91 | |
| 92 | __BEGIN_DECLS __ASSUME_PTR_ABI_SINGLE_BEGIN |
| 93 | |
| 94 | #ifdef MACH_KERNEL_PRIVATE |
| 95 | |
| 96 | /* |
| 97 | * Processor state is accessed by locking the scheduling lock |
| 98 | * for the assigned processor set. |
| 99 | * |
| 100 | * --- PENDING <------- SHUTDOWN |
| 101 | * / ^ ^ |
| 102 | * _/ | \ |
| 103 | * OFF_LINE ---> START ---> RUNNING ---> IDLE ---> DISPATCHING |
| 104 | * \_________________^ ^ ^______/ / |
| 105 | * \__________________/ |
| 106 | * |
| 107 | * Most of these state transitions are externally driven as a |
| 108 | * a directive (for instance telling an IDLE processor to start |
| 109 | * coming out of the idle state to run a thread). However these |
| 110 | * are typically paired with a handshake by the processor itself |
| 111 | * to indicate that it has completed a transition of indeterminate |
| 112 | * length (for example, the DISPATCHING->RUNNING or START->RUNNING |
| 113 | * transitions must occur on the processor itself). |
| 114 | * |
| 115 | * The boot processor has some special cases, and skips the START state, |
| 116 | * since it has already bootstrapped and is ready to context switch threads. |
| 117 | * |
| 118 | * When a processor is in DISPATCHING or RUNNING state, the current_pri, |
| 119 | * current_thmode, and deadline fields should be set, so that other |
| 120 | * processors can evaluate if it is an appropriate candidate for preemption. |
| 121 | */ |
| 122 | #if defined(CONFIG_SCHED_DEFERRED_AST) |
| 123 | /* |
| 124 | * --- PENDING <------- SHUTDOWN |
| 125 | * / ^ ^ |
| 126 | * _/ | \ |
| 127 | * OFF_LINE ---> START ---> RUNNING ---> IDLE ---> DISPATCHING |
| 128 | * \_________________^ ^ ^______/ ^_____ / / |
| 129 | * \__________________/ |
| 130 | * |
| 131 | * A DISPATCHING processor may be put back into IDLE, if another |
| 132 | * processor determines that the target processor will have nothing to do |
| 133 | * upon reaching the RUNNING state. This is racy, but if the target |
| 134 | * responds and becomes RUNNING, it will not break the processor state |
| 135 | * machine. |
| 136 | * |
| 137 | * This change allows us to cancel an outstanding signal/AST on a processor |
| 138 | * (if such an operation is supported through hardware or software), and |
| 139 | * push the processor back into the IDLE state as a power optimization. |
| 140 | */ |
| 141 | #endif |
| 142 | |
| 143 | typedef enum { |
| 144 | PROCESSOR_OFF_LINE = 0, /* Not available */ |
| 145 | PROCESSOR_SHUTDOWN = 1, /* Going off-line, but schedulable */ |
| 146 | PROCESSOR_START = 2, /* Being started */ |
| 147 | PROCESSOR_PENDING_OFFLINE = 3, /* Going off-line, not schedulable */ |
| 148 | PROCESSOR_IDLE = 4, /* Idle (available) */ |
| 149 | PROCESSOR_DISPATCHING = 5, /* Dispatching (idle -> active) */ |
| 150 | PROCESSOR_RUNNING = 6, /* Normal execution */ |
| 151 | PROCESSOR_STATE_LEN = (PROCESSOR_RUNNING + 1) |
| 152 | } processor_state_t; |
| 153 | |
| 154 | typedef enum { |
| 155 | PSET_SMP, |
| 156 | #if __AMP__ |
| 157 | PSET_AMP_E, |
| 158 | PSET_AMP_P, |
| 159 | #endif |
| 160 | } pset_cluster_type_t; |
| 161 | |
| 162 | #if __AMP__ |
| 163 | |
| 164 | typedef enum { |
| 165 | SCHED_PERFCTL_POLICY_DEFAULT, /* static policy: set at boot */ |
| 166 | SCHED_PERFCTL_POLICY_FOLLOW_GROUP, /* dynamic policy: perfctl_class follows thread group across amp clusters */ |
| 167 | SCHED_PERFCTL_POLICY_RESTRICT_E, /* dynamic policy: limits perfctl_class to amp e cluster */ |
| 168 | } sched_perfctl_class_policy_t; |
| 169 | |
| 170 | extern _Atomic sched_perfctl_class_policy_t sched_perfctl_policy_util; |
| 171 | extern _Atomic sched_perfctl_class_policy_t sched_perfctl_policy_bg; |
| 172 | |
| 173 | #endif /* __AMP__ */ |
| 174 | |
| 175 | typedef bitmap_t cpumap_t; |
| 176 | |
| 177 | #if __arm64__ |
| 178 | |
| 179 | /* |
| 180 | * pset_execution_time_t |
| 181 | * |
| 182 | * The pset_execution_time_t type is used to maintain the average |
| 183 | * execution time of threads on a pset. Since the avg. execution time is |
| 184 | * updated from contexts where the pset lock is not held, it uses a |
| 185 | * double-wide RMW loop to update these values atomically. |
| 186 | */ |
| 187 | typedef union { |
| 188 | struct { |
| 189 | uint64_t pset_avg_thread_execution_time; |
| 190 | uint64_t pset_execution_time_last_update; |
| 191 | }; |
| 192 | unsigned __int128 pset_execution_time_packed; |
| 193 | } pset_execution_time_t; |
| 194 | |
| 195 | #endif /* __arm64__ */ |
| 196 | |
| 197 | struct processor_set { |
| 198 | int pset_id; |
| 199 | int online_processor_count; |
| 200 | int cpu_set_low, cpu_set_hi; |
| 201 | int cpu_set_count; |
| 202 | int last_chosen; |
| 203 | |
| 204 | uint64_t load_average; |
| 205 | uint64_t pset_load_average[TH_BUCKET_SCHED_MAX]; |
| 206 | uint64_t pset_load_last_update; |
| 207 | cpumap_t cpu_bitmask; |
| 208 | cpumap_t recommended_bitmask; |
| 209 | cpumap_t cpu_state_map[PROCESSOR_STATE_LEN]; |
| 210 | cpumap_t primary_map; |
| 211 | cpumap_t realtime_map; |
| 212 | cpumap_t cpu_available_map; |
| 213 | |
| 214 | #define SCHED_PSET_TLOCK (1) |
| 215 | #if defined(SCHED_PSET_TLOCK) |
| 216 | /* TODO: reorder struct for temporal cache locality */ |
| 217 | __attribute__((aligned(128))) lck_ticket_t sched_lock; |
| 218 | #else /* SCHED_PSET_TLOCK*/ |
| 219 | __attribute__((aligned(128))) lck_spin_t sched_lock; /* lock for above */ |
| 220 | #endif /* SCHED_PSET_TLOCK*/ |
| 221 | |
| 222 | #if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_MULTIQ) |
| 223 | struct run_queue pset_runq; /* runq for this processor set */ |
| 224 | #endif |
| 225 | struct rt_queue rt_runq; /* realtime runq for this processor set */ |
| 226 | uint64_t stealable_rt_threads_earliest_deadline; /* if this pset has stealable RT threads, the earliest deadline; else UINT64_MAX */ |
| 227 | #if CONFIG_SCHED_CLUTCH |
| 228 | struct sched_clutch_root pset_clutch_root; /* clutch hierarchy root */ |
| 229 | #endif /* CONFIG_SCHED_CLUTCH */ |
| 230 | |
| 231 | #if defined(CONFIG_SCHED_TRADITIONAL) |
| 232 | int pset_runq_bound_count; |
| 233 | /* # of threads in runq bound to any processor in pset */ |
| 234 | #endif |
| 235 | |
| 236 | /* CPUs that have been sent an unacknowledged remote AST for scheduling purposes */ |
| 237 | cpumap_t pending_AST_URGENT_cpu_mask; |
| 238 | cpumap_t pending_AST_PREEMPT_cpu_mask; |
| 239 | #if defined(CONFIG_SCHED_DEFERRED_AST) |
| 240 | /* |
| 241 | * A separate mask, for ASTs that we may be able to cancel. This is dependent on |
| 242 | * some level of support for requesting an AST on a processor, and then quashing |
| 243 | * that request later. |
| 244 | * |
| 245 | * The purpose of this field (and the associated codepaths) is to infer when we |
| 246 | * no longer need a processor that is DISPATCHING to come up, and to prevent it |
| 247 | * from coming out of IDLE if possible. This should serve to decrease the number |
| 248 | * of spurious ASTs in the system, and let processors spend longer periods in |
| 249 | * IDLE. |
| 250 | */ |
| 251 | cpumap_t pending_deferred_AST_cpu_mask; |
| 252 | #endif |
| 253 | cpumap_t pending_spill_cpu_mask; |
| 254 | cpumap_t rt_pending_spill_cpu_mask; |
| 255 | |
| 256 | struct ipc_port * pset_self; /* port for operations */ |
| 257 | struct ipc_port * pset_name_self; /* port for information */ |
| 258 | |
| 259 | processor_set_t pset_list; /* chain of associated psets */ |
| 260 | pset_node_t node; |
| 261 | uint32_t pset_cluster_id; |
| 262 | |
| 263 | /* |
| 264 | * Currently the scheduler uses a mix of pset_cluster_type_t & cluster_type_t |
| 265 | * for recommendations etc. It might be useful to unify these as a single type. |
| 266 | */ |
| 267 | pset_cluster_type_t pset_cluster_type; |
| 268 | cluster_type_t pset_type; |
| 269 | |
| 270 | #if CONFIG_SCHED_EDGE |
| 271 | cpumap_t cpu_running_foreign; |
| 272 | cpumap_t cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_COUNT]; |
| 273 | sched_bucket_t cpu_running_buckets[MAX_CPUS]; |
| 274 | |
| 275 | bitmap_t foreign_psets[BITMAP_LEN(MAX_PSETS)]; |
| 276 | bitmap_t native_psets[BITMAP_LEN(MAX_PSETS)]; |
| 277 | bitmap_t local_psets[BITMAP_LEN(MAX_PSETS)]; |
| 278 | bitmap_t remote_psets[BITMAP_LEN(MAX_PSETS)]; |
| 279 | sched_clutch_edge sched_edges[MAX_PSETS]; |
| 280 | pset_execution_time_t pset_execution_time[TH_BUCKET_SCHED_MAX]; |
| 281 | uint64_t pset_cluster_shared_rsrc_load[CLUSTER_SHARED_RSRC_TYPE_COUNT]; |
| 282 | #endif /* CONFIG_SCHED_EDGE */ |
| 283 | cpumap_t perfcontrol_cpu_preferred_bitmask; |
| 284 | cpumap_t perfcontrol_cpu_migration_bitmask; |
| 285 | int cpu_preferred_last_chosen; |
| 286 | bool is_SMT; /* pset contains SMT processors */ |
| 287 | }; |
| 288 | |
| 289 | extern struct processor_set pset0; |
| 290 | |
| 291 | typedef bitmap_t pset_map_t; |
| 292 | |
| 293 | struct pset_node { |
| 294 | processor_set_t psets; /* list of associated psets */ |
| 295 | |
| 296 | pset_node_t nodes; /* list of associated subnodes */ |
| 297 | pset_node_t node_list; /* chain of associated nodes */ |
| 298 | |
| 299 | pset_node_t parent; |
| 300 | |
| 301 | pset_cluster_type_t pset_cluster_type; /* Same as the type of all psets in this node */ |
| 302 | |
| 303 | pset_map_t pset_map; /* map of associated psets */ |
| 304 | _Atomic pset_map_t pset_idle_map; /* psets with at least one IDLE CPU */ |
| 305 | _Atomic pset_map_t pset_idle_primary_map; /* psets with at least one IDLE primary CPU */ |
| 306 | _Atomic pset_map_t pset_non_rt_map; /* psets with at least one available CPU not running a realtime thread */ |
| 307 | _Atomic pset_map_t pset_non_rt_primary_map;/* psets with at least one available primary CPU not running a realtime thread */ |
| 308 | }; |
| 309 | |
| 310 | extern struct pset_node pset_node0; |
| 311 | #if __AMP__ |
| 312 | extern struct pset_node pset_node1; |
| 313 | extern pset_node_t ecore_node; |
| 314 | extern pset_node_t pcore_node; |
| 315 | #endif |
| 316 | |
| 317 | extern queue_head_t tasks, threads, corpse_tasks; |
| 318 | extern int tasks_count, terminated_tasks_count, threads_count, terminated_threads_count; |
| 319 | decl_lck_mtx_data(extern, tasks_threads_lock); |
| 320 | decl_lck_mtx_data(extern, tasks_corpse_lock); |
| 321 | |
| 322 | /* |
| 323 | * The terminated tasks queue should only be inspected elsewhere by stackshot. |
| 324 | */ |
| 325 | extern queue_head_t terminated_tasks; |
| 326 | |
| 327 | extern queue_head_t terminated_threads; |
| 328 | |
| 329 | struct processor { |
| 330 | processor_state_t state; /* See above */ |
| 331 | bool is_SMT; |
| 332 | bool is_recommended; |
| 333 | bool current_is_NO_SMT; /* cached TH_SFLAG_NO_SMT of current thread */ |
| 334 | bool current_is_bound; /* current thread is bound to this processor */ |
| 335 | bool current_is_eagerpreempt;/* current thread is TH_SFLAG_EAGERPREEMPT */ |
| 336 | bool pending_nonurgent_preemption; /* RUNNING_TIMER_PREEMPT is armed */ |
| 337 | struct thread *active_thread; /* thread running on processor */ |
| 338 | struct thread *idle_thread; /* this processor's idle thread. */ |
| 339 | struct thread *startup_thread; |
| 340 | |
| 341 | processor_set_t processor_set; /* assigned set */ |
| 342 | |
| 343 | /* |
| 344 | * XXX All current_* fields should be grouped together, as they're |
| 345 | * updated at the same time. |
| 346 | */ |
| 347 | int current_pri; /* priority of current thread */ |
| 348 | sfi_class_id_t current_sfi_class; /* SFI class of current thread */ |
| 349 | perfcontrol_class_t current_perfctl_class; /* Perfcontrol class for current thread */ |
| 350 | /* |
| 351 | * The cluster type recommended for the current thread. |
| 352 | */ |
| 353 | pset_cluster_type_t current_recommended_pset_type; |
| 354 | thread_urgency_t current_urgency; /* cached urgency of current thread */ |
| 355 | |
| 356 | #if CONFIG_SCHED_TRADITIONAL |
| 357 | int runq_bound_count; /* # of threads bound to this processor */ |
| 358 | #endif /* CONFIG_SCHED_TRADITIONAL */ |
| 359 | |
| 360 | #if CONFIG_THREAD_GROUPS |
| 361 | struct thread_group *current_thread_group; /* thread_group of current thread */ |
| 362 | #endif |
| 363 | int starting_pri; /* priority of current thread as it was when scheduled */ |
| 364 | int cpu_id; /* platform numeric id */ |
| 365 | |
| 366 | uint64_t quantum_end; /* time when current quantum ends */ |
| 367 | uint64_t last_dispatch; /* time of last dispatch */ |
| 368 | |
| 369 | #if KPERF |
| 370 | uint64_t kperf_last_sample_time; /* time of last kperf sample */ |
| 371 | #endif /* KPERF */ |
| 372 | |
| 373 | uint64_t deadline; /* for next realtime thread */ |
| 374 | bool first_timeslice; /* has the quantum expired since context switch */ |
| 375 | |
| 376 | bool processor_offlined; /* has the processor been explicitly processor_offline'ed */ |
| 377 | bool must_idle; /* Needs to be forced idle as next selected thread is allowed on this processor */ |
| 378 | bool next_idle_short; /* Expecting a response IPI soon, so the next idle period is likely very brief */ |
| 379 | |
| 380 | bool running_timers_active; /* whether the running timers should fire */ |
| 381 | struct timer_call running_timers[RUNNING_TIMER_MAX]; |
| 382 | |
| 383 | #if CONFIG_SCHED_TRADITIONAL || CONFIG_SCHED_MULTIQ |
| 384 | struct run_queue runq; /* runq for this processor */ |
| 385 | #endif /* CONFIG_SCHED_TRADITIONAL || CONFIG_SCHED_MULTIQ */ |
| 386 | |
| 387 | #if CONFIG_SCHED_GRRR |
| 388 | struct grrr_run_queue grrr_runq; /* Group Ratio Round-Robin runq */ |
| 389 | #endif /* CONFIG_SCHED_GRRR */ |
| 390 | |
| 391 | struct recount_processor pr_recount; |
| 392 | |
| 393 | /* |
| 394 | * Pointer to primary processor for secondary SMT processors, or a |
| 395 | * pointer to ourselves for primaries or non-SMT. |
| 396 | */ |
| 397 | processor_t processor_primary; |
| 398 | processor_t processor_secondary; |
| 399 | struct ipc_port *processor_self; /* port for operations */ |
| 400 | |
| 401 | processor_t processor_list; /* all existing processors */ |
| 402 | |
| 403 | uint64_t timer_call_ttd; /* current timer call time-to-deadline */ |
| 404 | decl_simple_lock_data(, start_state_lock); |
| 405 | processor_reason_t last_startup_reason; |
| 406 | processor_reason_t last_shutdown_reason; |
| 407 | processor_reason_t last_recommend_reason; |
| 408 | processor_reason_t last_derecommend_reason; |
| 409 | bool shutdown_temporary; /* Shutdown should be transparent to user - don't update CPU counts */ |
| 410 | bool shutdown_locked; /* Processor may not be shutdown (or started up) except by SYSTEM */ |
| 411 | }; |
| 412 | |
| 413 | extern processor_t processor_list; |
| 414 | decl_simple_lock_data(extern, processor_list_lock); |
| 415 | |
| 416 | /* |
| 417 | * Maximum number of CPUs supported by the scheduler. bits.h bitmap macros |
| 418 | * need to be used to support greater than 64. |
| 419 | */ |
| 420 | #define MAX_SCHED_CPUS 64 |
| 421 | extern processor_t __single processor_array[MAX_SCHED_CPUS]; /* array indexed by cpuid */ |
| 422 | extern processor_set_t __single pset_array[MAX_PSETS]; /* array indexed by pset_id */ |
| 423 | |
| 424 | extern uint32_t processor_avail_count; |
| 425 | extern uint32_t processor_avail_count_user; |
| 426 | extern uint32_t primary_processor_avail_count; |
| 427 | extern uint32_t primary_processor_avail_count_user; |
| 428 | |
| 429 | #define master_processor PERCPU_GET_MASTER(processor) |
| 430 | PERCPU_DECL(struct processor, processor); |
| 431 | |
| 432 | extern processor_t current_processor(void); |
| 433 | |
| 434 | /* Lock macros, always acquired and released with interrupts disabled (splsched()) */ |
| 435 | |
| 436 | extern lck_grp_t pset_lck_grp; |
| 437 | |
| 438 | #if defined(SCHED_PSET_TLOCK) |
| 439 | #define pset_lock_init(p) lck_ticket_init(&(p)->sched_lock, &pset_lck_grp) |
| 440 | #define pset_lock(p) lck_ticket_lock(&(p)->sched_lock, &pset_lck_grp) |
| 441 | #define pset_unlock(p) lck_ticket_unlock(&(p)->sched_lock) |
| 442 | #define pset_assert_locked(p) lck_ticket_assert_owned(&(p)->sched_lock) |
| 443 | #else /* SCHED_PSET_TLOCK*/ |
| 444 | #define pset_lock_init(p) lck_spin_init(&(p)->sched_lock, &pset_lck_grp, NULL) |
| 445 | #define pset_lock(p) lck_spin_lock_grp(&(p)->sched_lock, &pset_lck_grp) |
| 446 | #define pset_unlock(p) lck_spin_unlock(&(p)->sched_lock) |
| 447 | #define pset_assert_locked(p) LCK_SPIN_ASSERT(&(p)->sched_lock, LCK_ASSERT_OWNED) |
| 448 | #endif /*!SCHED_PSET_TLOCK*/ |
| 449 | |
| 450 | extern lck_spin_t pset_node_lock; |
| 451 | |
| 452 | extern void processor_bootstrap(void); |
| 453 | |
| 454 | extern void processor_init( |
| 455 | processor_t processor, |
| 456 | int cpu_id, |
| 457 | processor_set_t processor_set); |
| 458 | |
| 459 | extern void processor_set_primary( |
| 460 | processor_t processor, |
| 461 | processor_t primary); |
| 462 | |
| 463 | extern kern_return_t processor_shutdown( |
| 464 | processor_t processor, |
| 465 | processor_reason_t reason, |
| 466 | uint32_t flags); |
| 467 | |
| 468 | extern void processor_wait_for_start( |
| 469 | processor_t processor); |
| 470 | |
| 471 | extern kern_return_t processor_start_from_user( |
| 472 | processor_t processor); |
| 473 | extern kern_return_t processor_exit_from_user( |
| 474 | processor_t processor); |
| 475 | |
| 476 | extern kern_return_t processor_start_reason( |
| 477 | processor_t processor, |
| 478 | processor_reason_t reason, |
| 479 | uint32_t flags); |
| 480 | extern kern_return_t processor_exit_reason( |
| 481 | processor_t processor, |
| 482 | processor_reason_t reason, |
| 483 | uint32_t flags); |
| 484 | |
| 485 | |
| 486 | extern kern_return_t sched_processor_enable( |
| 487 | processor_t processor, |
| 488 | boolean_t enable); |
| 489 | |
| 490 | extern void processor_queue_shutdown( |
| 491 | processor_t processor); |
| 492 | |
| 493 | extern void processor_queue_shutdown( |
| 494 | processor_t processor); |
| 495 | |
| 496 | extern processor_set_t processor_pset( |
| 497 | processor_t processor); |
| 498 | |
| 499 | extern pset_node_t pset_node_root(void); |
| 500 | |
| 501 | extern processor_set_t pset_create( |
| 502 | pset_node_t node, |
| 503 | pset_cluster_type_t pset_type, |
| 504 | uint32_t pset_cluster_id, |
| 505 | int pset_id); |
| 506 | |
| 507 | extern void pset_init( |
| 508 | processor_set_t pset, |
| 509 | pset_node_t node); |
| 510 | |
| 511 | extern processor_set_t pset_find( |
| 512 | uint32_t cluster_id, |
| 513 | processor_set_t default_pset); |
| 514 | |
| 515 | extern kern_return_t processor_info_count( |
| 516 | processor_flavor_t flavor, |
| 517 | mach_msg_type_number_t *count); |
| 518 | |
| 519 | extern void processor_cpu_load_info( |
| 520 | processor_t processor, |
| 521 | natural_t ticks[static CPU_STATE_MAX]); |
| 522 | |
| 523 | extern void machine_run_count( |
| 524 | uint32_t count); |
| 525 | |
| 526 | extern processor_t machine_choose_processor( |
| 527 | processor_set_t pset, |
| 528 | processor_t processor); |
| 529 | |
| 530 | inline static processor_set_t |
| 531 | next_pset(processor_set_t pset) |
| 532 | { |
| 533 | pset_map_t map = pset->node->pset_map; |
| 534 | |
| 535 | int pset_id = lsb_next(bitmap: map, previous_bit: pset->pset_id); |
| 536 | if (pset_id == -1) { |
| 537 | pset_id = lsb_first(bitmap: map); |
| 538 | } |
| 539 | |
| 540 | return pset_array[pset_id]; |
| 541 | } |
| 542 | |
| 543 | #define PSET_THING_TASK 0 |
| 544 | #define PSET_THING_THREAD 1 |
| 545 | |
| 546 | extern pset_cluster_type_t recommended_pset_type( |
| 547 | thread_t thread); |
| 548 | |
| 549 | extern void processor_state_update_idle( |
| 550 | processor_t processor); |
| 551 | |
| 552 | extern void processor_state_update_from_thread( |
| 553 | processor_t processor, |
| 554 | thread_t thread, |
| 555 | boolean_t pset_lock_held); |
| 556 | |
| 557 | extern void processor_state_update_explicit( |
| 558 | processor_t processor, |
| 559 | int pri, |
| 560 | sfi_class_id_t sfi_class, |
| 561 | pset_cluster_type_t pset_type, |
| 562 | perfcontrol_class_t perfctl_class, |
| 563 | thread_urgency_t urgency, |
| 564 | sched_bucket_t bucket); |
| 565 | |
| 566 | #define PSET_LOAD_NUMERATOR_SHIFT 16 |
| 567 | #define PSET_LOAD_FRACTIONAL_SHIFT 4 |
| 568 | |
| 569 | #if CONFIG_SCHED_EDGE |
| 570 | |
| 571 | extern cluster_type_t pset_type_for_id(uint32_t cluster_id); |
| 572 | extern uint64_t sched_pset_cluster_shared_rsrc_load(processor_set_t pset, cluster_shared_rsrc_type_t shared_rsrc_type); |
| 573 | |
| 574 | /* |
| 575 | * The Edge scheduler uses average scheduling latency as the metric for making |
| 576 | * thread migration decisions. One component of avg scheduling latency is the load |
| 577 | * average on the cluster. |
| 578 | * |
| 579 | * Load Average Fixed Point Arithmetic |
| 580 | * |
| 581 | * The load average is maintained as a 24.8 fixed point arithmetic value for precision. |
| 582 | * When multiplied by the average execution time, it needs to be rounded up (based on |
| 583 | * the most significant bit of the fractional part) for better accuracy. After rounding |
| 584 | * up, the whole number part of the value is used as the actual load value for |
| 585 | * migrate/steal decisions. |
| 586 | */ |
| 587 | #define SCHED_PSET_LOAD_EWMA_FRACTION_BITS 8 |
| 588 | #define SCHED_PSET_LOAD_EWMA_ROUND_BIT (1 << (SCHED_PSET_LOAD_EWMA_FRACTION_BITS - 1)) |
| 589 | #define SCHED_PSET_LOAD_EWMA_FRACTION_MASK ((1 << SCHED_PSET_LOAD_EWMA_FRACTION_BITS) - 1) |
| 590 | |
| 591 | inline static int |
| 592 | sched_get_pset_load_average(processor_set_t pset, sched_bucket_t sched_bucket) |
| 593 | { |
| 594 | uint64_t load_average = os_atomic_load(&pset->pset_load_average[sched_bucket], relaxed); |
| 595 | return (int)(((load_average + SCHED_PSET_LOAD_EWMA_ROUND_BIT) >> SCHED_PSET_LOAD_EWMA_FRACTION_BITS) * |
| 596 | pset->pset_execution_time[sched_bucket].pset_avg_thread_execution_time); |
| 597 | } |
| 598 | |
| 599 | #else /* CONFIG_SCHED_EDGE */ |
| 600 | inline static int |
| 601 | sched_get_pset_load_average(processor_set_t pset, __unused sched_bucket_t sched_bucket) |
| 602 | { |
| 603 | return (int)pset->load_average >> (PSET_LOAD_NUMERATOR_SHIFT - PSET_LOAD_FRACTIONAL_SHIFT); |
| 604 | } |
| 605 | #endif /* CONFIG_SCHED_EDGE */ |
| 606 | |
| 607 | extern void sched_update_pset_load_average(processor_set_t pset, uint64_t curtime); |
| 608 | extern void sched_update_pset_avg_execution_time(processor_set_t pset, uint64_t delta, uint64_t curtime, sched_bucket_t sched_bucket); |
| 609 | |
| 610 | inline static void |
| 611 | pset_update_processor_state(processor_set_t pset, processor_t processor, uint new_state) |
| 612 | { |
| 613 | pset_assert_locked(pset); |
| 614 | |
| 615 | uint old_state = processor->state; |
| 616 | uint cpuid = (uint)processor->cpu_id; |
| 617 | |
| 618 | assert(processor->processor_set == pset); |
| 619 | assert(bit_test(pset->cpu_bitmask, cpuid)); |
| 620 | |
| 621 | assert(old_state < PROCESSOR_STATE_LEN); |
| 622 | assert(new_state < PROCESSOR_STATE_LEN); |
| 623 | |
| 624 | processor->state = new_state; |
| 625 | |
| 626 | bit_clear(pset->cpu_state_map[old_state], cpuid); |
| 627 | bit_set(pset->cpu_state_map[new_state], cpuid); |
| 628 | |
| 629 | if (bit_test(pset->cpu_available_map, cpuid) && (new_state < PROCESSOR_IDLE)) { |
| 630 | /* No longer available for scheduling */ |
| 631 | bit_clear(pset->cpu_available_map, cpuid); |
| 632 | } else if (!bit_test(pset->cpu_available_map, cpuid) && (new_state >= PROCESSOR_IDLE)) { |
| 633 | /* Newly available for scheduling */ |
| 634 | bit_set(pset->cpu_available_map, cpuid); |
| 635 | } |
| 636 | |
| 637 | if ((old_state == PROCESSOR_RUNNING) || (new_state == PROCESSOR_RUNNING)) { |
| 638 | sched_update_pset_load_average(pset, curtime: 0); |
| 639 | if (new_state == PROCESSOR_RUNNING) { |
| 640 | assert(processor == current_processor()); |
| 641 | } |
| 642 | } |
| 643 | if ((old_state == PROCESSOR_IDLE) || (new_state == PROCESSOR_IDLE)) { |
| 644 | if (new_state == PROCESSOR_IDLE) { |
| 645 | bit_clear(pset->realtime_map, cpuid); |
| 646 | } |
| 647 | |
| 648 | pset_node_t node = pset->node; |
| 649 | |
| 650 | if (bit_count(x: node->pset_map) == 1) { |
| 651 | /* Node has only a single pset, so skip node pset map updates */ |
| 652 | return; |
| 653 | } |
| 654 | |
| 655 | if (new_state == PROCESSOR_IDLE) { |
| 656 | if (processor->processor_primary == processor) { |
| 657 | if (!bit_test(atomic_load(&node->pset_non_rt_primary_map), pset->pset_id)) { |
| 658 | atomic_bit_set(map: &node->pset_non_rt_primary_map, n: pset->pset_id, mem_order: memory_order_relaxed); |
| 659 | } |
| 660 | if (!bit_test(atomic_load(&node->pset_idle_primary_map), pset->pset_id)) { |
| 661 | atomic_bit_set(map: &node->pset_idle_primary_map, n: pset->pset_id, mem_order: memory_order_relaxed); |
| 662 | } |
| 663 | } |
| 664 | if (!bit_test(atomic_load(&node->pset_non_rt_map), pset->pset_id)) { |
| 665 | atomic_bit_set(map: &node->pset_non_rt_map, n: pset->pset_id, mem_order: memory_order_relaxed); |
| 666 | } |
| 667 | if (!bit_test(atomic_load(&node->pset_idle_map), pset->pset_id)) { |
| 668 | atomic_bit_set(map: &node->pset_idle_map, n: pset->pset_id, mem_order: memory_order_relaxed); |
| 669 | } |
| 670 | } else { |
| 671 | cpumap_t idle_map = pset->cpu_state_map[PROCESSOR_IDLE]; |
| 672 | if (idle_map == 0) { |
| 673 | /* No more IDLE CPUs */ |
| 674 | if (bit_test(atomic_load(&node->pset_idle_map), pset->pset_id)) { |
| 675 | atomic_bit_clear(map: &node->pset_idle_map, n: pset->pset_id, mem_order: memory_order_relaxed); |
| 676 | } |
| 677 | } |
| 678 | if (processor->processor_primary == processor) { |
| 679 | idle_map &= pset->primary_map; |
| 680 | if (idle_map == 0) { |
| 681 | /* No more IDLE primary CPUs */ |
| 682 | if (bit_test(atomic_load(&node->pset_idle_primary_map), pset->pset_id)) { |
| 683 | atomic_bit_clear(map: &node->pset_idle_primary_map, n: pset->pset_id, mem_order: memory_order_relaxed); |
| 684 | } |
| 685 | } |
| 686 | } |
| 687 | } |
| 688 | } |
| 689 | } |
| 690 | |
| 691 | decl_simple_lock_data(extern, sched_available_cores_lock); |
| 692 | |
| 693 | #endif /* MACH_KERNEL_PRIVATE */ |
| 694 | #ifdef KERNEL_PRIVATE |
| 695 | |
| 696 | extern unsigned int processor_count; |
| 697 | extern processor_t cpu_to_processor(int cpu); |
| 698 | |
| 699 | extern kern_return_t enable_smt_processors(bool enable); |
| 700 | |
| 701 | /* |
| 702 | * Update the scheduler with the set of cores that should be used to dispatch new threads. |
| 703 | * Non-recommended cores can still be used to field interrupts or run bound threads. |
| 704 | * This should be called with interrupts enabled and no scheduler locks held. |
| 705 | */ |
| 706 | #define ALL_CORES_RECOMMENDED (~(uint64_t)0) |
| 707 | #define ALL_CORES_POWERED (~(uint64_t)0) |
| 708 | |
| 709 | extern void sched_perfcontrol_update_recommended_cores(uint32_t recommended_cores); |
| 710 | extern void sched_perfcontrol_update_recommended_cores_reason(uint64_t recommended_cores, processor_reason_t reason, uint32_t flags); |
| 711 | extern void sched_perfcontrol_update_powered_cores(uint64_t powered_cores, processor_reason_t reason, uint32_t flags); |
| 712 | extern void sched_override_available_cores_for_sleep(void); |
| 713 | extern void sched_restore_available_cores_after_sleep(void); |
| 714 | extern bool sched_is_in_sleep(void); |
| 715 | extern void sched_mark_processor_online_locked(processor_t processor, processor_reason_t reason); |
| 716 | extern kern_return_t sched_mark_processor_offline(processor_t processor, processor_reason_t reason); |
| 717 | extern bool processor_should_kprintf(processor_t processor, bool starting); |
| 718 | extern void suspend_cluster_powerdown(void); |
| 719 | extern void resume_cluster_powerdown(void); |
| 720 | extern kern_return_t suspend_cluster_powerdown_from_user(void); |
| 721 | extern kern_return_t resume_cluster_powerdown_from_user(void); |
| 722 | extern int get_cluster_powerdown_user_suspended(void); |
| 723 | |
| 724 | #endif /* KERNEL_PRIVATE */ |
| 725 | |
| 726 | __ASSUME_PTR_ABI_SINGLE_END __END_DECLS |
| 727 | |
| 728 | #endif /* _KERN_PROCESSOR_H_ */ |
| 729 | |