1 | /* |
2 | * Copyright (c) 2000-2019 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * @OSF_COPYRIGHT@ |
30 | */ |
31 | /* |
32 | * Mach Operating System |
33 | * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University |
34 | * All Rights Reserved. |
35 | * |
36 | * Permission to use, copy, modify and distribute this software and its |
37 | * documentation is hereby granted, provided that both the copyright |
38 | * notice and this permission notice appear in all copies of the |
39 | * software, derivative works or modified versions, and any portions |
40 | * thereof, and that both notices appear in supporting documentation. |
41 | * |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
45 | * |
46 | * Carnegie Mellon requests users of this software to return to |
47 | * |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
49 | * School of Computer Science |
50 | * Carnegie Mellon University |
51 | * Pittsburgh PA 15213-3890 |
52 | * |
53 | * any improvements or extensions that they make and grant Carnegie Mellon |
54 | * the rights to redistribute these changes. |
55 | */ |
56 | /* |
57 | */ |
58 | /* |
59 | * File: sched.h |
60 | * Author: Avadis Tevanian, Jr. |
61 | * Date: 1985 |
62 | * |
63 | * Header file for scheduler. |
64 | * |
65 | */ |
66 | |
67 | #ifndef _KERN_SCHED_H_ |
68 | #define _KERN_SCHED_H_ |
69 | |
70 | #include <mach/policy.h> |
71 | #include <kern/kern_types.h> |
72 | #include <kern/smp.h> |
73 | #include <kern/circle_queue.h> |
74 | #include <kern/macro_help.h> |
75 | #include <kern/timer_call.h> |
76 | #include <kern/ast.h> |
77 | #include <kern/bits.h> |
78 | |
79 | #define NRQS_MAX (128) /* maximum number of priority levels */ |
80 | |
81 | #define MAXPRI (NRQS_MAX-1) |
82 | #define MINPRI 0 /* lowest legal priority schedulable */ |
83 | #define IDLEPRI MINPRI /* idle thread priority */ |
84 | #define NOPRI -1 |
85 | |
86 | /* |
87 | * High-level priority assignments |
88 | * |
89 | ************************************************************************* |
90 | * 127 Reserved (real-time) |
91 | * A |
92 | * + |
93 | * (32 levels) |
94 | * + |
95 | * V |
96 | * 96 Reserved (real-time) |
97 | * 95 Kernel mode only |
98 | * A |
99 | * + |
100 | * (16 levels) |
101 | * + |
102 | * V |
103 | * 80 Kernel mode only |
104 | * 79 System high priority |
105 | * A |
106 | * + |
107 | * (16 levels) |
108 | * + |
109 | * V |
110 | * 64 System high priority |
111 | * 63 Elevated priorities |
112 | * A |
113 | * + |
114 | * (12 levels) |
115 | * + |
116 | * V |
117 | * 52 Elevated priorities |
118 | * 51 Elevated priorities (incl. BSD +nice) |
119 | * A |
120 | * + |
121 | * (20 levels) |
122 | * + |
123 | * V |
124 | * 32 Elevated priorities (incl. BSD +nice) |
125 | * 31 Default (default base for threads) |
126 | * 30 Lowered priorities (incl. BSD -nice) |
127 | * A |
128 | * + |
129 | * (20 levels) |
130 | * + |
131 | * V |
132 | * 11 Lowered priorities (incl. BSD -nice) |
133 | * 10 Lowered priorities (aged pri's) |
134 | * A |
135 | * + |
136 | * (11 levels) |
137 | * + |
138 | * V |
139 | * 0 Lowered priorities (aged pri's / idle) |
140 | ************************************************************************* |
141 | */ |
142 | |
143 | #define BASEPRI_RTQUEUES (BASEPRI_REALTIME + 1) /* 97 */ |
144 | #define BASEPRI_REALTIME (MAXPRI - (NRQS_MAX / 4) + 1) /* 96 */ |
145 | |
146 | #define MAXPRI_KERNEL (BASEPRI_REALTIME - 1) /* 95 */ |
147 | #define BASEPRI_PREEMPT_HIGH (BASEPRI_PREEMPT + 1) /* 93 */ |
148 | #define BASEPRI_PREEMPT (MAXPRI_KERNEL - 3) /* 92 */ |
149 | #define BASEPRI_VM (BASEPRI_PREEMPT - 1) /* 91 */ |
150 | |
151 | #define BASEPRI_KERNEL (MINPRI_KERNEL + 1) /* 81 */ |
152 | #define MINPRI_KERNEL (MAXPRI_KERNEL - (NRQS_MAX / 8) + 1) /* 80 */ |
153 | |
154 | #define MAXPRI_RESERVED (MINPRI_KERNEL - 1) /* 79 */ |
155 | #define BASEPRI_GRAPHICS (MAXPRI_RESERVED - 3) /* 76 */ |
156 | #define MINPRI_RESERVED (MAXPRI_RESERVED - (NRQS_MAX / 8) + 1) /* 64 */ |
157 | |
158 | #define MAXPRI_USER (MINPRI_RESERVED - 1) /* 63 */ |
159 | #define BASEPRI_CONTROL (BASEPRI_DEFAULT + 17) /* 48 */ |
160 | #define BASEPRI_FOREGROUND (BASEPRI_DEFAULT + 16) /* 47 */ |
161 | #define BASEPRI_BACKGROUND (BASEPRI_DEFAULT + 15) /* 46 */ |
162 | #define BASEPRI_USER_INITIATED (BASEPRI_DEFAULT + 6) /* 37 */ |
163 | #define BASEPRI_DEFAULT (MAXPRI_USER - (NRQS_MAX / 4)) /* 31 */ |
164 | #define MAXPRI_SUPPRESSED (BASEPRI_DEFAULT - 3) /* 28 */ |
165 | #define BASEPRI_UTILITY (BASEPRI_DEFAULT - 11) /* 20 */ |
166 | #define MAXPRI_THROTTLE (MINPRI + 4) /* 4 */ |
167 | #define MINPRI_USER MINPRI /* 0 */ |
168 | |
169 | #define DEPRESSPRI (MINPRI) /* depress priority */ |
170 | |
171 | #define MAXPRI_PROMOTE (MAXPRI_KERNEL) /* ceiling for mutex promotion */ |
172 | #define MINPRI_RWLOCK (BASEPRI_BACKGROUND) /* floor when holding rwlock count */ |
173 | #define MINPRI_EXEC (BASEPRI_DEFAULT) /* floor when in exec state */ |
174 | #define MINPRI_WAITQ (BASEPRI_DEFAULT) /* floor when in waitq handover state */ |
175 | #define MINPRI_FLOOR (BASEPRI_BACKGROUND) /* floor when boost requested */ |
176 | |
177 | #define NRQS (BASEPRI_REALTIME) /* Non-realtime levels for runqs */ |
178 | #define NRTQS (MAXPRI - BASEPRI_REALTIME) /* Realtime levels for runqs */ |
179 | |
180 | /* Ensure that NRQS is large enough to represent all non-realtime threads; even promoted ones */ |
181 | _Static_assert((NRQS == (MAXPRI_PROMOTE + 1)), "Runqueues are too small to hold all non-realtime threads" ); |
182 | |
183 | /* Type used for thread->sched_mode and saved_mode */ |
184 | typedef enum { |
185 | TH_MODE_NONE = 0, /* unassigned, usually for saved_mode only */ |
186 | TH_MODE_REALTIME, /* time constraints supplied */ |
187 | TH_MODE_FIXED, /* use fixed priorities, no decay */ |
188 | TH_MODE_TIMESHARE, /* use timesharing algorithm */ |
189 | } sched_mode_t; |
190 | |
191 | /* |
192 | * Determine whether the target platform should run the Clutch/Edge Scheduler. |
193 | * All arm64 platforms are eligible to do so. |
194 | */ |
195 | #if defined(__arm64__) && CONFIG_CLUTCH && !CONFIG_SCHED_EDGE_OPT_OUT |
196 | |
197 | /* |
198 | * Single-cluster, symmetric (SMP) systems can run with just the Clutch policy, but |
199 | * multi-cluster, asymmetric (AMP) systems must further enable the Edge policy |
200 | * extension to Clutch in order to manage scheduling across the multiple CPU clusters. |
201 | */ |
202 | #define CONFIG_SCHED_CLUTCH 1 |
203 | #if __AMP__ |
204 | #define CONFIG_SCHED_EDGE 1 |
205 | #endif /* __AMP__ */ |
206 | |
207 | #endif /* defined(__arm64__) && CONFIG_CLUTCH && !CONFIG_SCHED_EDGE_OPT_OUT */ |
208 | |
209 | /* |
210 | * Since the clutch scheduler organizes threads based on the thread group |
211 | * and the scheduling bucket, its important to not mix threads from multiple |
212 | * priority bands into the same bucket. To achieve that, in the clutch bucket |
213 | * world, there is a scheduling bucket per QoS effectively. |
214 | */ |
215 | |
216 | /* Buckets used for load calculation */ |
217 | typedef enum { |
218 | TH_BUCKET_FIXPRI = 0, /* Fixed-priority */ |
219 | TH_BUCKET_SHARE_FG, /* Timeshare thread above BASEPRI_DEFAULT */ |
220 | #if CONFIG_SCHED_CLUTCH |
221 | TH_BUCKET_SHARE_IN, /* Timeshare thread between BASEPRI_USER_INITIATED and BASEPRI_DEFAULT */ |
222 | #endif /* CONFIG_SCHED_CLUTCH */ |
223 | TH_BUCKET_SHARE_DF, /* Timeshare thread between BASEPRI_DEFAULT and BASEPRI_UTILITY */ |
224 | TH_BUCKET_SHARE_UT, /* Timeshare thread between BASEPRI_UTILITY and MAXPRI_THROTTLE */ |
225 | TH_BUCKET_SHARE_BG, /* Timeshare thread between MAXPRI_THROTTLE and MINPRI */ |
226 | TH_BUCKET_RUN, /* All runnable threads */ |
227 | TH_BUCKET_SCHED_MAX = TH_BUCKET_RUN, /* Maximum schedulable buckets */ |
228 | TH_BUCKET_MAX, |
229 | } sched_bucket_t; |
230 | |
231 | /* |
232 | * Macro to check for invalid priorities. |
233 | */ |
234 | #define invalid_pri(pri) ((pri) < MINPRI || (pri) > MAXPRI) |
235 | |
236 | struct runq_stats { |
237 | uint64_t count_sum; |
238 | uint64_t last_change_timestamp; |
239 | }; |
240 | |
241 | #if defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO) |
242 | |
243 | struct run_queue { |
244 | int highq; /* highest runnable queue */ |
245 | bitmap_t bitmap[BITMAP_LEN(NRQS)]; /* run queue bitmap array */ |
246 | int count; /* # of threads total */ |
247 | int urgency; /* level of preemption urgency */ |
248 | circle_queue_head_t queues[NRQS]; /* one for each priority */ |
249 | |
250 | struct runq_stats runq_stats; |
251 | }; |
252 | |
253 | inline static void |
254 | rq_bitmap_set(bitmap_t *__header_indexable map, u_int n) |
255 | { |
256 | assert(n < NRQS); |
257 | bitmap_set(map, n); |
258 | } |
259 | |
260 | inline static void |
261 | rq_bitmap_clear(bitmap_t *__header_indexable map, u_int n) |
262 | { |
263 | assert(n < NRQS); |
264 | bitmap_clear(map, n); |
265 | } |
266 | |
267 | #endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO) */ |
268 | |
269 | typedef struct { |
270 | queue_head_t pri_queue; /* runnable RT threads for this priority */ |
271 | uint64_t pri_earliest_deadline; /* earliest deadline for this priority */ |
272 | int pri_count; /* # of threads for this priority */ |
273 | uint32_t pri_constraint; /* constraint of earliest deadline thread for this priority */ |
274 | } rt_queue_pri_t; |
275 | |
276 | struct rt_queue { |
277 | _Atomic uint64_t earliest_deadline; /* earliest deadline */ |
278 | _Atomic int count; /* # of threads total */ |
279 | _Atomic uint32_t constraint; /* constraint of earliest deadline thread */ |
280 | _Atomic int ed_index; /* index of earliest deadline thread */ |
281 | |
282 | bitmap_t bitmap[BITMAP_LEN(NRTQS)]; |
283 | |
284 | rt_queue_pri_t rt_queue_pri[NRTQS]; |
285 | |
286 | struct runq_stats runq_stats; |
287 | }; |
288 | typedef struct rt_queue *rt_queue_t; |
289 | |
290 | #define RT_CONSTRAINT_NONE UINT32_MAX |
291 | #define RT_DEADLINE_NONE UINT64_MAX |
292 | #define RT_DEADLINE_QUANTUM_EXPIRED (UINT64_MAX - 1) |
293 | |
294 | #if defined(CONFIG_SCHED_GRRR_CORE) |
295 | |
296 | /* |
297 | * We map standard Mach priorities to an abstract scale that more properly |
298 | * indicates how we want processor time allocated under contention. |
299 | */ |
300 | typedef uint8_t grrr_proportional_priority_t; |
301 | typedef uint8_t grrr_group_index_t; |
302 | |
303 | #define NUM_GRRR_PROPORTIONAL_PRIORITIES 256 |
304 | #define MAX_GRRR_PROPORTIONAL_PRIORITY ((grrr_proportional_priority_t)255) |
305 | |
306 | #if 0 |
307 | #define NUM_GRRR_GROUPS 8 /* log(256) */ |
308 | #endif |
309 | |
310 | #define NUM_GRRR_GROUPS 64 /* 256/4 */ |
311 | |
312 | struct grrr_group { |
313 | queue_chain_t priority_order; /* next greatest weight group */ |
314 | grrr_proportional_priority_t minpriority; |
315 | grrr_group_index_t index; |
316 | |
317 | queue_head_t clients; |
318 | int count; |
319 | uint32_t weight; |
320 | #if 0 |
321 | uint32_t deferred_removal_weight; |
322 | #endif |
323 | uint32_t work; |
324 | thread_t current_client; |
325 | }; |
326 | |
327 | struct grrr_run_queue { |
328 | int count; |
329 | uint32_t last_rescale_tick; |
330 | struct grrr_group groups[NUM_GRRR_GROUPS]; |
331 | queue_head_t sorted_group_list; |
332 | uint32_t weight; |
333 | grrr_group_t current_group; |
334 | |
335 | struct runq_stats runq_stats; |
336 | }; |
337 | |
338 | #endif /* defined(CONFIG_SCHED_GRRR_CORE) */ |
339 | |
340 | extern int rt_runq_count(processor_set_t); |
341 | extern uint64_t rt_runq_earliest_deadline(processor_set_t); |
342 | |
343 | #if defined(CONFIG_SCHED_MULTIQ) |
344 | sched_group_t sched_group_create(void); |
345 | void sched_group_destroy(sched_group_t sched_group); |
346 | #endif /* defined(CONFIG_SCHED_MULTIQ) */ |
347 | |
348 | |
349 | |
350 | /* |
351 | * Scheduler routines. |
352 | */ |
353 | |
354 | /* Handle quantum expiration for an executing thread */ |
355 | extern void thread_quantum_expire( |
356 | timer_call_param_t processor, |
357 | timer_call_param_t thread); |
358 | |
359 | /* Handle preemption timer expiration for an executing thread */ |
360 | extern void thread_preempt_expire( |
361 | timer_call_param_t processor, |
362 | timer_call_param_t thread); |
363 | |
364 | /* Context switch check for current processor */ |
365 | extern ast_t csw_check( |
366 | thread_t thread, |
367 | processor_t processor, |
368 | ast_t check_reason); |
369 | |
370 | /* Check for pending ASTs */ |
371 | extern void ast_check(processor_t processor); |
372 | |
373 | extern ast_t update_pending_nonurgent_preemption(processor_t processor, ast_t reason); |
374 | extern void clear_pending_nonurgent_preemption(processor_t processor); |
375 | |
376 | extern void sched_update_generation_count(void); |
377 | |
378 | #if defined(CONFIG_SCHED_TIMESHARE_CORE) |
379 | extern uint32_t std_quantum, min_std_quantum; |
380 | extern uint32_t std_quantum_us; |
381 | #endif /* CONFIG_SCHED_TIMESHARE_CORE */ |
382 | |
383 | extern uint32_t thread_depress_time; |
384 | extern uint32_t default_timeshare_computation; |
385 | extern uint32_t default_timeshare_constraint; |
386 | |
387 | extern uint32_t max_rt_quantum, min_rt_quantum; |
388 | |
389 | extern int default_preemption_rate; |
390 | |
391 | #if defined(CONFIG_SCHED_TIMESHARE_CORE) |
392 | |
393 | /* |
394 | * Age usage at approximately (1 << SCHED_TICK_SHIFT) times per second |
395 | * Aging may be deferred during periods where all processors are idle |
396 | * and cumulatively applied during periods of activity. |
397 | */ |
398 | #define SCHED_TICK_SHIFT 3 |
399 | #define SCHED_TICK_MAX_DELTA (8) |
400 | |
401 | extern unsigned sched_tick; |
402 | extern uint32_t sched_tick_interval; |
403 | |
404 | #endif /* CONFIG_SCHED_TIMESHARE_CORE */ |
405 | |
406 | extern uint64_t sched_one_second_interval; |
407 | |
408 | /* Periodic computation of various averages */ |
409 | extern void compute_sched_load(void); |
410 | |
411 | extern void compute_averages(uint64_t); |
412 | |
413 | extern void compute_averunnable( |
414 | void *nrun); |
415 | |
416 | extern void compute_stack_target( |
417 | void *arg); |
418 | |
419 | extern void compute_pageout_gc_throttle( |
420 | void *arg); |
421 | |
422 | extern void compute_pmap_gc_throttle( |
423 | void *arg); |
424 | |
425 | /* |
426 | * Conversion factor from usage |
427 | * to priority. |
428 | */ |
429 | #if defined(CONFIG_SCHED_TIMESHARE_CORE) |
430 | |
431 | #define MAX_LOAD (NRQS - 1) |
432 | #define SCHED_PRI_SHIFT_MAX ((8 * sizeof(uint32_t)) - 1) |
433 | extern uint32_t sched_pri_shifts[TH_BUCKET_MAX]; |
434 | extern uint32_t sched_fixed_shift; |
435 | extern int8_t sched_load_shifts[NRQS]; |
436 | extern uint32_t sched_decay_usage_age_factor; |
437 | void sched_timeshare_consider_maintenance(uint64_t ctime, bool safe_point); |
438 | #endif /* CONFIG_SCHED_TIMESHARE_CORE */ |
439 | |
440 | void sched_consider_recommended_cores(uint64_t ctime, thread_t thread); |
441 | |
442 | extern int32_t sched_poll_yield_shift; |
443 | extern uint64_t sched_safe_rt_duration; |
444 | extern uint64_t sched_safe_fixed_duration; |
445 | |
446 | extern uint32_t sched_load_average, sched_mach_factor; |
447 | |
448 | extern uint32_t avenrun[3], mach_factor[3]; |
449 | |
450 | extern uint64_t max_unsafe_rt_computation; |
451 | extern uint64_t max_unsafe_fixed_computation; |
452 | extern uint64_t max_poll_computation; |
453 | |
454 | extern uint32_t sched_run_buckets[TH_BUCKET_MAX]; |
455 | |
456 | extern uint32_t sched_run_incr(thread_t thread); |
457 | extern uint32_t sched_run_decr(thread_t thread); |
458 | extern void sched_update_thread_bucket(thread_t thread); |
459 | |
460 | extern uint32_t sched_smt_run_incr(thread_t thread); |
461 | extern uint32_t sched_smt_run_decr(thread_t thread); |
462 | extern void sched_smt_update_thread_bucket(thread_t thread); |
463 | |
464 | #define SCHED_DECAY_TICKS 32 |
465 | struct shift_data { |
466 | int shift1; |
467 | int shift2; |
468 | }; |
469 | |
470 | /* |
471 | * Save the current thread time and compute a delta since the last call for the |
472 | * scheduler tick. |
473 | */ |
474 | #define sched_tick_delta(thread, delta) \ |
475 | MACRO_BEGIN \ |
476 | uint64_t _total = recount_thread_time_mach(thread); \ |
477 | (delta) = (typeof(delta))(_total - thread->sched_time_save); \ |
478 | thread->sched_time_save = _total; \ |
479 | MACRO_END |
480 | |
481 | #define SCHED_MAX_BACKUP_PROCESSORS 7 |
482 | #if defined(__x86_64__) |
483 | #define SCHED_DEFAULT_BACKUP_PROCESSORS 1 |
484 | #define SCHED_DEFAULT_BACKUP_PROCESSORS_SMT 2 |
485 | #else |
486 | #define SCHED_DEFAULT_BACKUP_PROCESSORS 0 |
487 | #define SCHED_DEFAULT_BACKUP_PROCESSORS_SMT 0 |
488 | #endif |
489 | extern int sched_rt_n_backup_processors; |
490 | |
491 | extern bool system_is_SMT; |
492 | |
493 | #endif /* _KERN_SCHED_H_ */ |
494 | |