1/*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: sched.h
60 * Author: Avadis Tevanian, Jr.
61 * Date: 1985
62 *
63 * Header file for scheduler.
64 *
65 */
66
67#ifndef _KERN_SCHED_H_
68#define _KERN_SCHED_H_
69
70#include <mach/policy.h>
71#include <kern/kern_types.h>
72#include <kern/smp.h>
73#include <kern/circle_queue.h>
74#include <kern/macro_help.h>
75#include <kern/timer_call.h>
76#include <kern/ast.h>
77#include <kern/bits.h>
78
79#define NRQS_MAX (128) /* maximum number of priority levels */
80
81#define MAXPRI (NRQS_MAX-1)
82#define MINPRI 0 /* lowest legal priority schedulable */
83#define IDLEPRI MINPRI /* idle thread priority */
84#define NOPRI -1
85
86/*
87 * High-level priority assignments
88 *
89 *************************************************************************
90 * 127 Reserved (real-time)
91 * A
92 * +
93 * (32 levels)
94 * +
95 * V
96 * 96 Reserved (real-time)
97 * 95 Kernel mode only
98 * A
99 * +
100 * (16 levels)
101 * +
102 * V
103 * 80 Kernel mode only
104 * 79 System high priority
105 * A
106 * +
107 * (16 levels)
108 * +
109 * V
110 * 64 System high priority
111 * 63 Elevated priorities
112 * A
113 * +
114 * (12 levels)
115 * +
116 * V
117 * 52 Elevated priorities
118 * 51 Elevated priorities (incl. BSD +nice)
119 * A
120 * +
121 * (20 levels)
122 * +
123 * V
124 * 32 Elevated priorities (incl. BSD +nice)
125 * 31 Default (default base for threads)
126 * 30 Lowered priorities (incl. BSD -nice)
127 * A
128 * +
129 * (20 levels)
130 * +
131 * V
132 * 11 Lowered priorities (incl. BSD -nice)
133 * 10 Lowered priorities (aged pri's)
134 * A
135 * +
136 * (11 levels)
137 * +
138 * V
139 * 0 Lowered priorities (aged pri's / idle)
140 *************************************************************************
141 */
142
143#define BASEPRI_RTQUEUES (BASEPRI_REALTIME + 1) /* 97 */
144#define BASEPRI_REALTIME (MAXPRI - (NRQS_MAX / 4) + 1) /* 96 */
145
146#define MAXPRI_KERNEL (BASEPRI_REALTIME - 1) /* 95 */
147#define BASEPRI_PREEMPT_HIGH (BASEPRI_PREEMPT + 1) /* 93 */
148#define BASEPRI_PREEMPT (MAXPRI_KERNEL - 3) /* 92 */
149#define BASEPRI_VM (BASEPRI_PREEMPT - 1) /* 91 */
150
151#define BASEPRI_KERNEL (MINPRI_KERNEL + 1) /* 81 */
152#define MINPRI_KERNEL (MAXPRI_KERNEL - (NRQS_MAX / 8) + 1) /* 80 */
153
154#define MAXPRI_RESERVED (MINPRI_KERNEL - 1) /* 79 */
155#define BASEPRI_GRAPHICS (MAXPRI_RESERVED - 3) /* 76 */
156#define MINPRI_RESERVED (MAXPRI_RESERVED - (NRQS_MAX / 8) + 1) /* 64 */
157
158#define MAXPRI_USER (MINPRI_RESERVED - 1) /* 63 */
159#define BASEPRI_CONTROL (BASEPRI_DEFAULT + 17) /* 48 */
160#define BASEPRI_FOREGROUND (BASEPRI_DEFAULT + 16) /* 47 */
161#define BASEPRI_BACKGROUND (BASEPRI_DEFAULT + 15) /* 46 */
162#define BASEPRI_USER_INITIATED (BASEPRI_DEFAULT + 6) /* 37 */
163#define BASEPRI_DEFAULT (MAXPRI_USER - (NRQS_MAX / 4)) /* 31 */
164#define MAXPRI_SUPPRESSED (BASEPRI_DEFAULT - 3) /* 28 */
165#define BASEPRI_UTILITY (BASEPRI_DEFAULT - 11) /* 20 */
166#define MAXPRI_THROTTLE (MINPRI + 4) /* 4 */
167#define MINPRI_USER MINPRI /* 0 */
168
169#define DEPRESSPRI (MINPRI) /* depress priority */
170
171#define MAXPRI_PROMOTE (MAXPRI_KERNEL) /* ceiling for mutex promotion */
172#define MINPRI_RWLOCK (BASEPRI_BACKGROUND) /* floor when holding rwlock count */
173#define MINPRI_EXEC (BASEPRI_DEFAULT) /* floor when in exec state */
174#define MINPRI_WAITQ (BASEPRI_DEFAULT) /* floor when in waitq handover state */
175#define MINPRI_FLOOR (BASEPRI_BACKGROUND) /* floor when boost requested */
176
177#define NRQS (BASEPRI_REALTIME) /* Non-realtime levels for runqs */
178#define NRTQS (MAXPRI - BASEPRI_REALTIME) /* Realtime levels for runqs */
179
180/* Ensure that NRQS is large enough to represent all non-realtime threads; even promoted ones */
181_Static_assert((NRQS == (MAXPRI_PROMOTE + 1)), "Runqueues are too small to hold all non-realtime threads");
182
183/* Type used for thread->sched_mode and saved_mode */
184typedef enum {
185 TH_MODE_NONE = 0, /* unassigned, usually for saved_mode only */
186 TH_MODE_REALTIME, /* time constraints supplied */
187 TH_MODE_FIXED, /* use fixed priorities, no decay */
188 TH_MODE_TIMESHARE, /* use timesharing algorithm */
189} sched_mode_t;
190
191/*
192 * Determine whether the target platform should run the Clutch/Edge Scheduler.
193 * All arm64 platforms are eligible to do so.
194 */
195#if defined(__arm64__) && CONFIG_CLUTCH && !CONFIG_SCHED_EDGE_OPT_OUT
196
197/*
198 * Single-cluster, symmetric (SMP) systems can run with just the Clutch policy, but
199 * multi-cluster, asymmetric (AMP) systems must further enable the Edge policy
200 * extension to Clutch in order to manage scheduling across the multiple CPU clusters.
201 */
202#define CONFIG_SCHED_CLUTCH 1
203#if __AMP__
204#define CONFIG_SCHED_EDGE 1
205#endif /* __AMP__ */
206
207#endif /* defined(__arm64__) && CONFIG_CLUTCH && !CONFIG_SCHED_EDGE_OPT_OUT */
208
209/*
210 * Since the clutch scheduler organizes threads based on the thread group
211 * and the scheduling bucket, its important to not mix threads from multiple
212 * priority bands into the same bucket. To achieve that, in the clutch bucket
213 * world, there is a scheduling bucket per QoS effectively.
214 */
215
216/* Buckets used for load calculation */
217typedef enum {
218 TH_BUCKET_FIXPRI = 0, /* Fixed-priority */
219 TH_BUCKET_SHARE_FG, /* Timeshare thread above BASEPRI_DEFAULT */
220#if CONFIG_SCHED_CLUTCH
221 TH_BUCKET_SHARE_IN, /* Timeshare thread between BASEPRI_USER_INITIATED and BASEPRI_DEFAULT */
222#endif /* CONFIG_SCHED_CLUTCH */
223 TH_BUCKET_SHARE_DF, /* Timeshare thread between BASEPRI_DEFAULT and BASEPRI_UTILITY */
224 TH_BUCKET_SHARE_UT, /* Timeshare thread between BASEPRI_UTILITY and MAXPRI_THROTTLE */
225 TH_BUCKET_SHARE_BG, /* Timeshare thread between MAXPRI_THROTTLE and MINPRI */
226 TH_BUCKET_RUN, /* All runnable threads */
227 TH_BUCKET_SCHED_MAX = TH_BUCKET_RUN, /* Maximum schedulable buckets */
228 TH_BUCKET_MAX,
229} sched_bucket_t;
230
231/*
232 * Macro to check for invalid priorities.
233 */
234#define invalid_pri(pri) ((pri) < MINPRI || (pri) > MAXPRI)
235
236struct runq_stats {
237 uint64_t count_sum;
238 uint64_t last_change_timestamp;
239};
240
241#if defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO)
242
243struct run_queue {
244 int highq; /* highest runnable queue */
245 bitmap_t bitmap[BITMAP_LEN(NRQS)]; /* run queue bitmap array */
246 int count; /* # of threads total */
247 int urgency; /* level of preemption urgency */
248 circle_queue_head_t queues[NRQS]; /* one for each priority */
249
250 struct runq_stats runq_stats;
251};
252
253inline static void
254rq_bitmap_set(bitmap_t *__header_indexable map, u_int n)
255{
256 assert(n < NRQS);
257 bitmap_set(map, n);
258}
259
260inline static void
261rq_bitmap_clear(bitmap_t *__header_indexable map, u_int n)
262{
263 assert(n < NRQS);
264 bitmap_clear(map, n);
265}
266
267#endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO) */
268
269typedef struct {
270 queue_head_t pri_queue; /* runnable RT threads for this priority */
271 uint64_t pri_earliest_deadline; /* earliest deadline for this priority */
272 int pri_count; /* # of threads for this priority */
273 uint32_t pri_constraint; /* constraint of earliest deadline thread for this priority */
274} rt_queue_pri_t;
275
276struct rt_queue {
277 _Atomic uint64_t earliest_deadline; /* earliest deadline */
278 _Atomic int count; /* # of threads total */
279 _Atomic uint32_t constraint; /* constraint of earliest deadline thread */
280 _Atomic int ed_index; /* index of earliest deadline thread */
281
282 bitmap_t bitmap[BITMAP_LEN(NRTQS)];
283
284 rt_queue_pri_t rt_queue_pri[NRTQS];
285
286 struct runq_stats runq_stats;
287};
288typedef struct rt_queue *rt_queue_t;
289
290#define RT_CONSTRAINT_NONE UINT32_MAX
291#define RT_DEADLINE_NONE UINT64_MAX
292#define RT_DEADLINE_QUANTUM_EXPIRED (UINT64_MAX - 1)
293
294#if defined(CONFIG_SCHED_GRRR_CORE)
295
296/*
297 * We map standard Mach priorities to an abstract scale that more properly
298 * indicates how we want processor time allocated under contention.
299 */
300typedef uint8_t grrr_proportional_priority_t;
301typedef uint8_t grrr_group_index_t;
302
303#define NUM_GRRR_PROPORTIONAL_PRIORITIES 256
304#define MAX_GRRR_PROPORTIONAL_PRIORITY ((grrr_proportional_priority_t)255)
305
306#if 0
307#define NUM_GRRR_GROUPS 8 /* log(256) */
308#endif
309
310#define NUM_GRRR_GROUPS 64 /* 256/4 */
311
312struct grrr_group {
313 queue_chain_t priority_order; /* next greatest weight group */
314 grrr_proportional_priority_t minpriority;
315 grrr_group_index_t index;
316
317 queue_head_t clients;
318 int count;
319 uint32_t weight;
320#if 0
321 uint32_t deferred_removal_weight;
322#endif
323 uint32_t work;
324 thread_t current_client;
325};
326
327struct grrr_run_queue {
328 int count;
329 uint32_t last_rescale_tick;
330 struct grrr_group groups[NUM_GRRR_GROUPS];
331 queue_head_t sorted_group_list;
332 uint32_t weight;
333 grrr_group_t current_group;
334
335 struct runq_stats runq_stats;
336};
337
338#endif /* defined(CONFIG_SCHED_GRRR_CORE) */
339
340extern int rt_runq_count(processor_set_t);
341extern uint64_t rt_runq_earliest_deadline(processor_set_t);
342
343#if defined(CONFIG_SCHED_MULTIQ)
344sched_group_t sched_group_create(void);
345void sched_group_destroy(sched_group_t sched_group);
346#endif /* defined(CONFIG_SCHED_MULTIQ) */
347
348
349
350/*
351 * Scheduler routines.
352 */
353
354/* Handle quantum expiration for an executing thread */
355extern void thread_quantum_expire(
356 timer_call_param_t processor,
357 timer_call_param_t thread);
358
359/* Handle preemption timer expiration for an executing thread */
360extern void thread_preempt_expire(
361 timer_call_param_t processor,
362 timer_call_param_t thread);
363
364/* Context switch check for current processor */
365extern ast_t csw_check(
366 thread_t thread,
367 processor_t processor,
368 ast_t check_reason);
369
370/* Check for pending ASTs */
371extern void ast_check(processor_t processor);
372
373extern ast_t update_pending_nonurgent_preemption(processor_t processor, ast_t reason);
374extern void clear_pending_nonurgent_preemption(processor_t processor);
375
376extern void sched_update_generation_count(void);
377
378#if defined(CONFIG_SCHED_TIMESHARE_CORE)
379extern uint32_t std_quantum, min_std_quantum;
380extern uint32_t std_quantum_us;
381#endif /* CONFIG_SCHED_TIMESHARE_CORE */
382
383extern uint32_t thread_depress_time;
384extern uint32_t default_timeshare_computation;
385extern uint32_t default_timeshare_constraint;
386
387extern uint32_t max_rt_quantum, min_rt_quantum;
388
389extern int default_preemption_rate;
390
391#if defined(CONFIG_SCHED_TIMESHARE_CORE)
392
393/*
394 * Age usage at approximately (1 << SCHED_TICK_SHIFT) times per second
395 * Aging may be deferred during periods where all processors are idle
396 * and cumulatively applied during periods of activity.
397 */
398#define SCHED_TICK_SHIFT 3
399#define SCHED_TICK_MAX_DELTA (8)
400
401extern unsigned sched_tick;
402extern uint32_t sched_tick_interval;
403
404#endif /* CONFIG_SCHED_TIMESHARE_CORE */
405
406extern uint64_t sched_one_second_interval;
407
408/* Periodic computation of various averages */
409extern void compute_sched_load(void);
410
411extern void compute_averages(uint64_t);
412
413extern void compute_averunnable(
414 void *nrun);
415
416extern void compute_stack_target(
417 void *arg);
418
419extern void compute_pageout_gc_throttle(
420 void *arg);
421
422extern void compute_pmap_gc_throttle(
423 void *arg);
424
425/*
426 * Conversion factor from usage
427 * to priority.
428 */
429#if defined(CONFIG_SCHED_TIMESHARE_CORE)
430
431#define MAX_LOAD (NRQS - 1)
432#define SCHED_PRI_SHIFT_MAX ((8 * sizeof(uint32_t)) - 1)
433extern uint32_t sched_pri_shifts[TH_BUCKET_MAX];
434extern uint32_t sched_fixed_shift;
435extern int8_t sched_load_shifts[NRQS];
436extern uint32_t sched_decay_usage_age_factor;
437void sched_timeshare_consider_maintenance(uint64_t ctime, bool safe_point);
438#endif /* CONFIG_SCHED_TIMESHARE_CORE */
439
440void sched_consider_recommended_cores(uint64_t ctime, thread_t thread);
441
442extern int32_t sched_poll_yield_shift;
443extern uint64_t sched_safe_rt_duration;
444extern uint64_t sched_safe_fixed_duration;
445
446extern uint32_t sched_load_average, sched_mach_factor;
447
448extern uint32_t avenrun[3], mach_factor[3];
449
450extern uint64_t max_unsafe_rt_computation;
451extern uint64_t max_unsafe_fixed_computation;
452extern uint64_t max_poll_computation;
453
454extern uint32_t sched_run_buckets[TH_BUCKET_MAX];
455
456extern uint32_t sched_run_incr(thread_t thread);
457extern uint32_t sched_run_decr(thread_t thread);
458extern void sched_update_thread_bucket(thread_t thread);
459
460extern uint32_t sched_smt_run_incr(thread_t thread);
461extern uint32_t sched_smt_run_decr(thread_t thread);
462extern void sched_smt_update_thread_bucket(thread_t thread);
463
464#define SCHED_DECAY_TICKS 32
465struct shift_data {
466 int shift1;
467 int shift2;
468};
469
470/*
471 * Save the current thread time and compute a delta since the last call for the
472 * scheduler tick.
473 */
474#define sched_tick_delta(thread, delta) \
475MACRO_BEGIN \
476 uint64_t _total = recount_thread_time_mach(thread); \
477 (delta) = (typeof(delta))(_total - thread->sched_time_save); \
478 thread->sched_time_save = _total; \
479MACRO_END
480
481#define SCHED_MAX_BACKUP_PROCESSORS 7
482#if defined(__x86_64__)
483#define SCHED_DEFAULT_BACKUP_PROCESSORS 1
484#define SCHED_DEFAULT_BACKUP_PROCESSORS_SMT 2
485#else
486#define SCHED_DEFAULT_BACKUP_PROCESSORS 0
487#define SCHED_DEFAULT_BACKUP_PROCESSORS_SMT 0
488#endif
489extern int sched_rt_n_backup_processors;
490
491extern bool system_is_SMT;
492
493#endif /* _KERN_SCHED_H_ */
494