1/*
2 * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <mach/mach_types.h>
30#include <mach/machine.h>
31
32#include <machine/machine_routines.h>
33#include <machine/sched_param.h>
34#include <machine/machine_cpu.h>
35
36#include <kern/kern_types.h>
37#include <kern/debug.h>
38#include <kern/mach_param.h>
39#include <kern/machine.h>
40#include <kern/misc_protos.h>
41#include <kern/processor.h>
42#include <kern/queue.h>
43#include <kern/sched.h>
44#include <kern/sched_prim.h>
45#include <kern/task.h>
46#include <kern/thread.h>
47
48#include <sys/kdebug.h>
49
50/*
51 * Theory Statement
52 *
53 * How does the task scheduler work?
54 *
55 * It schedules threads across a few levels.
56 *
57 * RT threads are dealt with above us
58 * Bound threads go into the per-processor runq
59 * Non-bound threads are linked on their task's sched_group's runq
60 * sched_groups' sched_entries are linked on the pset's runq
61 *
62 * TODO: make this explicit - bound threads should have a different enqueue fxn
63 *
64 * When we choose a new thread, we will decide whether to look at the bound runqueue, the global runqueue
65 * or the current group's runqueue, then dequeue the next thread in that runqueue.
66 *
67 * We then manipulate the sched_entries to reflect the invariant that:
68 * Each non-empty priority level in a group's runq is represented by one sched_entry enqueued in the global
69 * runqueue.
70 *
71 * A sched_entry represents a chance at running - for each priority in each task, there is one chance of getting
72 * to run. This reduces the excess contention bonus given to processes which have work spread among many threads
73 * as compared to processes which do the same amount of work under fewer threads.
74 *
75 * NOTE: Currently, the multiq scheduler only supports one pset.
76 *
77 * NOTE ABOUT thread->sched_pri:
78 *
79 * It can change after enqueue - it's changed without pset lock but with thread lock if thread->runq is 0.
80 * Therefore we can only depend on it not changing during the enqueue and remove path, not the dequeue.
81 *
82 * TODO: Future features:
83 *
84 * Decouple the task priority from the sched_entry priority, allowing for:
85 * fast task priority change without having to iterate and re-dispatch all threads in the task.
86 * i.e. task-wide priority, task-wide boosting
87 * fancier group decay features
88 *
89 * Group (or task) decay:
90 * Decay is used for a few different things:
91 * Prioritizing latency-needing threads over throughput-needing threads for time-to-running
92 * Balancing work between threads in a process
93 * Balancing work done at the same priority between different processes
94 * Recovering from priority inversions between two threads in the same process
95 * Recovering from priority inversions between two threads in different processes
96 * Simulating a proportional share scheduler by allowing lower priority threads
97 * to run for a certain percentage of the time
98 *
99 * Task decay lets us separately address the 'same process' and 'different process' needs,
100 * which will allow us to make smarter tradeoffs in different cases.
101 * For example, we could resolve priority inversion in the same process by reordering threads without dropping the
102 * process below low priority threads in other processes.
103 *
104 * One lock to rule them all (or at least all the runqueues) instead of the pset locks
105 *
106 * Shrink sched_entry size to the size of a queue_chain_t by inferring priority, group, and perhaps runq field.
107 * The entries array is 5K currently so it'd be really great to reduce.
108 * One way to get sched_group below 4K without a new runq structure would be to remove the extra queues above realtime.
109 *
110 * When preempting a processor, store a flag saying if the preemption
111 * was from a thread in the same group or different group,
112 * and tell choose_thread about it.
113 *
114 * When choosing a processor, bias towards those running in the same
115 * group as I am running (at the same priority, or within a certain band?).
116 *
117 * Decide if we need to support psets.
118 * Decide how to support psets - do we need duplicate entries for each pset,
119 * or can we get away with putting the entry in either one or the other pset?
120 *
121 * Consider the right way to handle runq count - I don't want to iterate groups.
122 * Perhaps keep a global counter.
123 * Alternate option - remove it from choose_processor. It doesn't add much value
124 * now that we have global runq.
125 *
126 * Need a better way of finding group to target instead of looking at current_task.
127 * Perhaps choose_thread could pass in the current thread?
128 *
129 * Consider unifying runq copy-pastes.
130 *
131 * Thoughts on having a group central quantum bucket:
132 *
133 * I see two algorithms to decide quanta:
134 * A) Hand off only when switching thread to thread in the same group
135 * B) Allocate and return quanta to the group's pool
136 *
137 * Issues:
138 * If a task blocks completely, should it come back with the leftover quanta
139 * or brand new quanta?
140 *
141 * Should I put a flag saying zero out a quanta you grab when youre dispatched'?
142 *
143 * Resolution:
144 * Handing off quanta between threads will help with jumping around in the current task
145 * but will not help when a thread from a different task is involved.
146 * Need an algorithm that works with round robin-ing between threads in different tasks
147 *
148 * But wait - round robining can only be triggered by quantum expire or blocking.
149 * We need something that works with preemption or yielding - that's the more interesting idea.
150 *
151 * Existing algorithm - preemption doesn't re-set quantum, puts thread on head of runq.
152 * Blocking or quantum expiration does re-set quantum, puts thread on tail of runq.
153 *
154 * New algorithm -
155 * Hand off quanta when hopping between threads with same sched_group
156 * Even if thread was blocked it uses last thread remaining quanta when it starts.
157 *
158 * If we use the only cycle entry at quantum algorithm, then the quantum pool starts getting
159 * interesting.
160 *
161 * A thought - perhaps the handoff approach doesn't work so well in the presence of
162 * non-handoff wakeups i.e. wake other thread then wait then block - doesn't mean that
163 * woken thread will be what I switch to - other processor may have stolen it.
164 * What do we do there?
165 *
166 * Conclusions:
167 * We currently don't know of a scenario where quantum buckets on the task is beneficial.
168 * We will instead handoff quantum between threads in the task, and keep quantum
169 * on the preempted thread if it's preempted by something outside the task.
170 *
171 */
172
173#if DEBUG || DEVELOPMENT
174#define MULTIQ_SANITY_CHECK
175#endif
176
177typedef struct sched_entry {
178 queue_chain_t entry_links;
179 int16_t sched_pri; /* scheduled (current) priority */
180 int16_t runq;
181 int32_t pad;
182} *sched_entry_t;
183
184typedef run_queue_t entry_queue_t; /* A run queue that holds sched_entries instead of threads */
185typedef run_queue_t group_runq_t; /* A run queue that is part of a sched_group */
186
187#define SCHED_ENTRY_NULL ((sched_entry_t) 0)
188#define MULTIQ_ERUNQ (-4) /* Indicates entry is on the main runq */
189
190/* Each level in the run queue corresponds to one entry in the entries array */
191struct sched_group {
192 struct sched_entry entries[NRQS];
193 struct run_queue runq;
194 queue_chain_t sched_groups;
195};
196
197/*
198 * Keep entry on the head of the runqueue while dequeueing threads.
199 * Only cycle it to the end of the runqueue when a thread in the task
200 * hits its quantum.
201 */
202static boolean_t deep_drain = FALSE;
203
204/* Verify the consistency of the runq before touching it */
205static boolean_t multiq_sanity_check = FALSE;
206
207/*
208 * Draining threads from the current task is preferred
209 * when they're less than X steps below the current
210 * global highest priority
211 */
212#define DEFAULT_DRAIN_BAND_LIMIT MAXPRI
213static integer_t drain_band_limit;
214
215/*
216 * Don't go below this priority level if there is something above it in another task
217 */
218#define DEFAULT_DRAIN_DEPTH_LIMIT MAXPRI_THROTTLE
219static integer_t drain_depth_limit;
220
221/*
222 * Don't favor the task when there's something above this priority in another task.
223 */
224#define DEFAULT_DRAIN_CEILING BASEPRI_FOREGROUND
225static integer_t drain_ceiling;
226
227static ZONE_DEFINE_TYPE(sched_group_zone, "sched groups",
228 struct sched_group, ZC_NOCALLOUT);
229
230static uint64_t num_sched_groups = 0;
231static queue_head_t sched_groups;
232
233static LCK_GRP_DECLARE(sched_groups_lock_grp, "sched_groups");
234static LCK_MTX_DECLARE(sched_groups_lock, &sched_groups_lock_grp);
235
236static void
237sched_multiq_init(void);
238
239static thread_t
240sched_multiq_steal_thread(processor_set_t pset);
241
242static void
243sched_multiq_thread_update_scan(sched_update_scan_context_t scan_context);
244
245static boolean_t
246sched_multiq_processor_enqueue(processor_t processor, thread_t thread,
247 sched_options_t options);
248
249static boolean_t
250sched_multiq_processor_queue_remove(processor_t processor, thread_t thread);
251
252void
253sched_multiq_quantum_expire(thread_t thread);
254
255static ast_t
256sched_multiq_processor_csw_check(processor_t processor);
257
258static boolean_t
259sched_multiq_processor_queue_has_priority(processor_t processor, int priority, boolean_t gte);
260
261static int
262sched_multiq_runq_count(processor_t processor);
263
264static boolean_t
265sched_multiq_processor_queue_empty(processor_t processor);
266
267static uint64_t
268sched_multiq_runq_stats_count_sum(processor_t processor);
269
270static int
271sched_multiq_processor_bound_count(processor_t processor);
272
273static void
274sched_multiq_pset_init(processor_set_t pset);
275
276static void
277sched_multiq_processor_init(processor_t processor);
278
279static thread_t
280sched_multiq_choose_thread(processor_t processor, int priority, ast_t reason);
281
282static void
283sched_multiq_processor_queue_shutdown(processor_t processor);
284
285static sched_mode_t
286sched_multiq_initial_thread_sched_mode(task_t parent_task);
287
288static bool
289sched_multiq_thread_avoid_processor(processor_t processor, thread_t thread, __unused ast_t reason);
290
291const struct sched_dispatch_table sched_multiq_dispatch = {
292 .sched_name = "multiq",
293 .init = sched_multiq_init,
294 .timebase_init = sched_timeshare_timebase_init,
295 .processor_init = sched_multiq_processor_init,
296 .pset_init = sched_multiq_pset_init,
297 .maintenance_continuation = sched_timeshare_maintenance_continue,
298 .choose_thread = sched_multiq_choose_thread,
299 .steal_thread_enabled = sched_steal_thread_DISABLED,
300 .steal_thread = sched_multiq_steal_thread,
301 .compute_timeshare_priority = sched_compute_timeshare_priority,
302 .choose_node = sched_choose_node,
303 .choose_processor = choose_processor,
304 .processor_enqueue = sched_multiq_processor_enqueue,
305 .processor_queue_shutdown = sched_multiq_processor_queue_shutdown,
306 .processor_queue_remove = sched_multiq_processor_queue_remove,
307 .processor_queue_empty = sched_multiq_processor_queue_empty,
308 .priority_is_urgent = priority_is_urgent,
309 .processor_csw_check = sched_multiq_processor_csw_check,
310 .processor_queue_has_priority = sched_multiq_processor_queue_has_priority,
311 .initial_quantum_size = sched_timeshare_initial_quantum_size,
312 .initial_thread_sched_mode = sched_multiq_initial_thread_sched_mode,
313 .can_update_priority = can_update_priority,
314 .update_priority = update_priority,
315 .lightweight_update_priority = lightweight_update_priority,
316 .quantum_expire = sched_multiq_quantum_expire,
317 .processor_runq_count = sched_multiq_runq_count,
318 .processor_runq_stats_count_sum = sched_multiq_runq_stats_count_sum,
319 .processor_bound_count = sched_multiq_processor_bound_count,
320 .thread_update_scan = sched_multiq_thread_update_scan,
321 .multiple_psets_enabled = FALSE,
322 .sched_groups_enabled = TRUE,
323 .avoid_processor_enabled = TRUE,
324 .thread_avoid_processor = sched_multiq_thread_avoid_processor,
325 .processor_balance = sched_SMT_balance,
326
327 .rt_runq = sched_rtlocal_runq,
328 .rt_init = sched_rtlocal_init,
329 .rt_queue_shutdown = sched_rtlocal_queue_shutdown,
330 .rt_runq_scan = sched_rtlocal_runq_scan,
331 .rt_runq_count_sum = sched_rtlocal_runq_count_sum,
332 .rt_steal_thread = sched_rtlocal_steal_thread,
333
334 .qos_max_parallelism = sched_qos_max_parallelism,
335 .check_spill = sched_check_spill,
336 .ipi_policy = sched_ipi_policy,
337 .thread_should_yield = sched_thread_should_yield,
338 .run_count_incr = sched_run_incr,
339 .run_count_decr = sched_run_decr,
340 .update_thread_bucket = sched_update_thread_bucket,
341 .pset_made_schedulable = sched_pset_made_schedulable,
342 .cpu_init_completed = NULL,
343 .thread_eligible_for_pset = NULL,
344};
345
346
347static void
348sched_multiq_init(void)
349{
350#if defined(MULTIQ_SANITY_CHECK)
351 PE_parse_boot_argn("-multiq-sanity-check", &multiq_sanity_check, sizeof(multiq_sanity_check));
352#endif
353
354 PE_parse_boot_argn(arg_string: "-multiq-deep-drain", arg_ptr: &deep_drain, max_arg: sizeof(deep_drain));
355
356 if (!PE_parse_boot_argn(arg_string: "multiq_drain_ceiling", arg_ptr: &drain_ceiling, max_arg: sizeof(drain_ceiling))) {
357 drain_ceiling = DEFAULT_DRAIN_CEILING;
358 }
359
360 if (!PE_parse_boot_argn(arg_string: "multiq_drain_depth_limit", arg_ptr: &drain_depth_limit, max_arg: sizeof(drain_depth_limit))) {
361 drain_depth_limit = DEFAULT_DRAIN_DEPTH_LIMIT;
362 }
363
364 if (!PE_parse_boot_argn(arg_string: "multiq_drain_band_limit", arg_ptr: &drain_band_limit, max_arg: sizeof(drain_band_limit))) {
365 drain_band_limit = DEFAULT_DRAIN_BAND_LIMIT;
366 }
367
368 printf(format: "multiq scheduler config: deep-drain %d, ceiling %d, depth limit %d, band limit %d, sanity check %d\n",
369 deep_drain, drain_ceiling, drain_depth_limit, drain_band_limit, multiq_sanity_check);
370
371 queue_init(&sched_groups);
372
373 sched_timeshare_init();
374}
375
376static void
377sched_multiq_processor_init(processor_t processor)
378{
379 run_queue_init(runq: &processor->runq);
380}
381
382static void
383sched_multiq_pset_init(processor_set_t pset)
384{
385 run_queue_init(runq: &pset->pset_runq);
386}
387
388static sched_mode_t
389sched_multiq_initial_thread_sched_mode(task_t parent_task)
390{
391 if (parent_task == kernel_task) {
392 return TH_MODE_FIXED;
393 } else {
394 return TH_MODE_TIMESHARE;
395 }
396}
397
398sched_group_t
399sched_group_create(void)
400{
401 sched_group_t sched_group;
402
403 if (!SCHED(sched_groups_enabled)) {
404 return SCHED_GROUP_NULL;
405 }
406
407 sched_group = zalloc_flags(sched_group_zone, Z_WAITOK | Z_ZERO);
408
409 run_queue_init(runq: &sched_group->runq);
410
411 for (size_t i = 0; i < NRQS; i++) {
412 sched_group->entries[i].runq = 0;
413 sched_group->entries[i].sched_pri = (int16_t)i;
414 }
415
416 lck_mtx_lock(lck: &sched_groups_lock);
417 queue_enter(&sched_groups, sched_group, sched_group_t, sched_groups);
418 num_sched_groups++;
419 lck_mtx_unlock(lck: &sched_groups_lock);
420
421 return sched_group;
422}
423
424void
425sched_group_destroy(sched_group_t sched_group)
426{
427 if (!SCHED(sched_groups_enabled)) {
428 assert(sched_group == SCHED_GROUP_NULL);
429 return;
430 }
431
432 assert(sched_group != SCHED_GROUP_NULL);
433 assert(sched_group->runq.count == 0);
434
435 for (int i = 0; i < NRQS; i++) {
436 assert(sched_group->entries[i].runq == 0);
437 assert(sched_group->entries[i].sched_pri == i);
438 }
439
440 lck_mtx_lock(lck: &sched_groups_lock);
441 queue_remove(&sched_groups, sched_group, sched_group_t, sched_groups);
442 num_sched_groups--;
443 lck_mtx_unlock(lck: &sched_groups_lock);
444
445 zfree(sched_group_zone, sched_group);
446}
447
448__attribute__((always_inline))
449static inline entry_queue_t
450multiq_main_entryq(processor_t processor)
451{
452 return (entry_queue_t)&processor->processor_set->pset_runq;
453}
454
455__attribute__((always_inline))
456static inline run_queue_t
457multiq_bound_runq(processor_t processor)
458{
459 return &processor->runq;
460}
461
462__attribute__((always_inline))
463static inline sched_entry_t
464group_entry_for_pri(sched_group_t group, integer_t pri)
465{
466 return &group->entries[pri];
467}
468
469__attribute__((always_inline))
470static inline sched_group_t
471group_for_entry(sched_entry_t entry)
472{
473#pragma clang diagnostic push
474#pragma clang diagnostic ignored "-Wcast-align"
475 sched_group_t group = (sched_group_t)(entry - entry->sched_pri);
476#pragma clang diagnostic pop
477 return group;
478}
479
480/* Peek at the head of the runqueue */
481static sched_entry_t
482entry_queue_first_entry(entry_queue_t rq)
483{
484 assert(rq->count != 0);
485
486 circle_queue_t queue = &rq->queues[rq->highq];
487
488 sched_entry_t entry = cqe_queue_first(queue, struct sched_entry, entry_links);
489
490 assert(entry->sched_pri == rq->highq);
491
492 return entry;
493}
494
495#if defined(MULTIQ_SANITY_CHECK)
496
497#if MACH_ASSERT
498__attribute__((always_inline))
499static inline boolean_t
500queue_chain_linked(queue_chain_t* chain)
501{
502 if (chain->next != NULL) {
503 assert(chain->prev != NULL);
504 return TRUE;
505 } else {
506 assert(chain->prev == NULL);
507 return FALSE;
508 }
509}
510#endif /* MACH_ASSERT */
511
512static thread_t
513group_first_thread(sched_group_t group)
514{
515 group_runq_t rq = &group->runq;
516
517 assert(rq->count != 0);
518
519 circle_queue_t queue = &rq->queues[rq->highq];
520
521 thread_t thread = cqe_queue_first(queue, struct thread, runq_links);
522
523 assert(thread != THREAD_NULL);
524 assert_thread_magic(thread);
525
526 assert(thread->sched_group == group);
527
528 /* TODO: May not be safe */
529 assert(thread->sched_pri == rq->highq);
530
531 return thread;
532}
533
534/* Asserts if entry is not in entry runq at pri */
535static void
536entry_queue_check_entry(entry_queue_t runq, sched_entry_t entry, int expected_pri)
537{
538 circle_queue_t q;
539 sched_entry_t elem;
540
541 assert(queue_chain_linked(&entry->entry_links));
542 assert(entry->runq == MULTIQ_ERUNQ);
543
544 q = &runq->queues[expected_pri];
545
546 cqe_foreach_element(elem, q, entry_links) {
547 if (elem == entry) {
548 return;
549 }
550 }
551
552 panic("runq %p doesn't contain entry %p at pri %d", runq, entry, expected_pri);
553}
554
555/* Asserts if thread is not in group at its priority */
556static void
557sched_group_check_thread(sched_group_t group, thread_t thread)
558{
559 circle_queue_t q;
560 thread_t elem;
561 int pri = thread->sched_pri;
562
563 thread_assert_runq_nonnull(thread);
564
565 q = &group->runq.queues[pri];
566
567 cqe_foreach_element(elem, q, runq_links) {
568 if (elem == thread) {
569 return;
570 }
571 }
572
573 panic("group %p doesn't contain thread %p at pri %d", group, thread, pri);
574}
575
576static void
577global_check_entry_queue(entry_queue_t main_entryq)
578{
579 if (main_entryq->count == 0) {
580 return;
581 }
582
583 sched_entry_t entry = entry_queue_first_entry(main_entryq);
584
585 assert(entry->runq == MULTIQ_ERUNQ);
586
587 sched_group_t group = group_for_entry(entry);
588
589 thread_t thread = group_first_thread(group);
590
591 __assert_only sched_entry_t thread_entry = group_entry_for_pri(thread->sched_group, thread->sched_pri);
592
593 assert(entry->sched_pri == group->runq.highq);
594
595 assert(entry == thread_entry);
596 thread_assert_runq_nonnull(thread);
597}
598
599static void
600group_check_run_queue(entry_queue_t main_entryq, sched_group_t group)
601{
602 if (group->runq.count == 0) {
603 return;
604 }
605
606 thread_t thread = group_first_thread(group);
607
608 thread_assert_runq_nonnull(thread);
609
610 sched_entry_t sched_entry = group_entry_for_pri(thread->sched_group, thread->sched_pri);
611
612 entry_queue_check_entry(main_entryq, sched_entry, thread->sched_pri);
613
614 assert(sched_entry->sched_pri == thread->sched_pri);
615 assert(sched_entry->runq == MULTIQ_ERUNQ);
616}
617
618#endif /* defined(MULTIQ_SANITY_CHECK) */
619
620/*
621 * The run queue must not be empty.
622 */
623static sched_entry_t
624entry_queue_dequeue_entry(entry_queue_t rq)
625{
626 sched_entry_t sched_entry;
627 circle_queue_t queue = &rq->queues[rq->highq];
628
629 assert(rq->count > 0);
630 assert(!circle_queue_empty(queue));
631
632 sched_entry = cqe_dequeue_head(queue, struct sched_entry, entry_links);
633
634 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
635 rq->count--;
636 if (SCHED(priority_is_urgent)(rq->highq)) {
637 rq->urgency--; assert(rq->urgency >= 0);
638 }
639 if (circle_queue_empty(cq: queue)) {
640 rq_bitmap_clear(map: rq->bitmap, n: rq->highq);
641 rq->highq = bitmap_first(map: rq->bitmap, NRQS);
642 }
643
644 sched_entry->runq = 0;
645
646 return sched_entry;
647}
648
649/*
650 * The run queue must not be empty.
651 */
652static boolean_t
653entry_queue_enqueue_entry(
654 entry_queue_t rq,
655 sched_entry_t entry,
656 integer_t options)
657{
658 int sched_pri = entry->sched_pri;
659 circle_queue_t queue = &rq->queues[sched_pri];
660 boolean_t result = FALSE;
661
662 assert(entry->runq == 0);
663
664 if (circle_queue_empty(cq: queue)) {
665 circle_enqueue_tail(cq: queue, elt: &entry->entry_links);
666
667 rq_bitmap_set(map: rq->bitmap, n: sched_pri);
668 if (sched_pri > rq->highq) {
669 rq->highq = sched_pri;
670 result = TRUE;
671 }
672 } else {
673 if (options & SCHED_TAILQ) {
674 circle_enqueue_tail(cq: queue, elt: &entry->entry_links);
675 } else {
676 circle_enqueue_head(cq: queue, elt: &entry->entry_links);
677 }
678 }
679 if (SCHED(priority_is_urgent)(sched_pri)) {
680 rq->urgency++;
681 }
682 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
683 rq->count++;
684
685 entry->runq = MULTIQ_ERUNQ;
686
687 return result;
688}
689
690/*
691 * The entry must be in this runqueue.
692 */
693static void
694entry_queue_remove_entry(
695 entry_queue_t rq,
696 sched_entry_t entry)
697{
698 int sched_pri = entry->sched_pri;
699
700#if defined(MULTIQ_SANITY_CHECK)
701 if (multiq_sanity_check) {
702 entry_queue_check_entry(rq, entry, sched_pri);
703 }
704#endif
705
706 remqueue(elt: &entry->entry_links);
707
708 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
709 rq->count--;
710 if (SCHED(priority_is_urgent)(sched_pri)) {
711 rq->urgency--; assert(rq->urgency >= 0);
712 }
713
714 if (circle_queue_empty(cq: &rq->queues[sched_pri])) {
715 /* update run queue status */
716 rq_bitmap_clear(map: rq->bitmap, n: sched_pri);
717 rq->highq = bitmap_first(map: rq->bitmap, NRQS);
718 }
719
720 entry->runq = 0;
721}
722
723static void
724entry_queue_change_entry(
725 entry_queue_t rq,
726 sched_entry_t entry,
727 integer_t options)
728{
729 int sched_pri = entry->sched_pri;
730 circle_queue_t queue = &rq->queues[sched_pri];
731
732#if defined(MULTIQ_SANITY_CHECK)
733 if (multiq_sanity_check) {
734 entry_queue_check_entry(rq, entry, sched_pri);
735 }
736#endif
737
738 circle_dequeue(cq: queue, elt: &entry->entry_links);
739 if (options & SCHED_TAILQ) {
740 circle_enqueue_tail(cq: queue, elt: &entry->entry_links);
741 } else {
742 circle_enqueue_head(cq: queue, elt: &entry->entry_links);
743 }
744}
745/*
746 * The run queue must not be empty.
747 *
748 * sets queue_empty to TRUE if queue is now empty at thread_pri
749 */
750static thread_t
751group_run_queue_dequeue_thread(
752 group_runq_t rq,
753 integer_t *thread_pri,
754 boolean_t *queue_empty)
755{
756 thread_t thread;
757 circle_queue_t queue = &rq->queues[rq->highq];
758
759 assert(rq->count > 0);
760 assert(!circle_queue_empty(queue));
761
762 *thread_pri = rq->highq;
763
764 thread = cqe_dequeue_head(queue, struct thread, runq_links);
765 assert_thread_magic(thread);
766
767 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
768 rq->count--;
769 if (SCHED(priority_is_urgent)(rq->highq)) {
770 rq->urgency--; assert(rq->urgency >= 0);
771 }
772 if (circle_queue_empty(cq: queue)) {
773 rq_bitmap_clear(map: rq->bitmap, n: rq->highq);
774 rq->highq = bitmap_first(map: rq->bitmap, NRQS);
775 *queue_empty = TRUE;
776 } else {
777 *queue_empty = FALSE;
778 }
779
780 return thread;
781}
782
783/*
784 * The run queue must not be empty.
785 * returns TRUE if queue was empty at thread_pri
786 */
787static boolean_t
788group_run_queue_enqueue_thread(
789 group_runq_t rq,
790 thread_t thread,
791 integer_t thread_pri,
792 integer_t options)
793{
794 circle_queue_t queue = &rq->queues[thread_pri];
795 boolean_t result = FALSE;
796
797 thread_assert_runq_null(thread);
798 assert_thread_magic(thread);
799
800 if (circle_queue_empty(cq: queue)) {
801 circle_enqueue_tail(cq: queue, elt: &thread->runq_links);
802
803 rq_bitmap_set(map: rq->bitmap, n: thread_pri);
804 if (thread_pri > rq->highq) {
805 rq->highq = thread_pri;
806 }
807 result = TRUE;
808 } else {
809 if (options & SCHED_TAILQ) {
810 circle_enqueue_tail(cq: queue, elt: &thread->runq_links);
811 } else {
812 circle_enqueue_head(cq: queue, elt: &thread->runq_links);
813 }
814 }
815 if (SCHED(priority_is_urgent)(thread_pri)) {
816 rq->urgency++;
817 }
818 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
819 rq->count++;
820
821 return result;
822}
823
824/*
825 * The thread must be in this runqueue.
826 * returns TRUE if queue is now empty at thread_pri
827 */
828static boolean_t
829group_run_queue_remove_thread(
830 group_runq_t rq,
831 thread_t thread,
832 integer_t thread_pri)
833{
834 circle_queue_t queue = &rq->queues[thread_pri];
835 boolean_t result = FALSE;
836
837 assert_thread_magic(thread);
838 thread_assert_runq_nonnull(thread);
839
840 circle_dequeue(cq: queue, elt: &thread->runq_links);
841
842 SCHED_STATS_RUNQ_CHANGE(&rq->runq_stats, rq->count);
843 rq->count--;
844 if (SCHED(priority_is_urgent)(thread_pri)) {
845 rq->urgency--; assert(rq->urgency >= 0);
846 }
847
848 if (circle_queue_empty(cq: queue)) {
849 /* update run queue status */
850 rq_bitmap_clear(map: rq->bitmap, n: thread_pri);
851 rq->highq = bitmap_first(map: rq->bitmap, NRQS);
852 result = TRUE;
853 }
854
855 thread_clear_runq_locked(thread);
856
857 return result;
858}
859
860/*
861 * A thread's sched pri may change out from under us because
862 * we're clearing thread->runq here without the thread locked.
863 * Do not rely on it to be the same as when we enqueued.
864 */
865static thread_t
866sched_global_dequeue_thread(entry_queue_t main_entryq)
867{
868 boolean_t pri_level_empty = FALSE;
869 sched_entry_t entry;
870 group_runq_t group_runq;
871 thread_t thread;
872 integer_t thread_pri;
873 sched_group_t group;
874
875 assert(main_entryq->count > 0);
876
877 entry = entry_queue_dequeue_entry(rq: main_entryq);
878
879 group = group_for_entry(entry);
880 group_runq = &group->runq;
881
882 thread = group_run_queue_dequeue_thread(rq: group_runq, thread_pri: &thread_pri, queue_empty: &pri_level_empty);
883
884 thread_clear_runq(thread);
885
886 if (!pri_level_empty) {
887 entry_queue_enqueue_entry(rq: main_entryq, entry, options: SCHED_TAILQ);
888 }
889
890 return thread;
891}
892
893/* Dequeue a thread from the global runq without moving the entry */
894static thread_t
895sched_global_deep_drain_dequeue_thread(entry_queue_t main_entryq)
896{
897 boolean_t pri_level_empty = FALSE;
898 sched_entry_t entry;
899 group_runq_t group_runq;
900 thread_t thread;
901 integer_t thread_pri;
902 sched_group_t group;
903
904 assert(main_entryq->count > 0);
905
906 entry = entry_queue_first_entry(rq: main_entryq);
907
908 group = group_for_entry(entry);
909 group_runq = &group->runq;
910
911 thread = group_run_queue_dequeue_thread(rq: group_runq, thread_pri: &thread_pri, queue_empty: &pri_level_empty);
912
913 thread_clear_runq(thread);
914
915 if (pri_level_empty) {
916 entry_queue_remove_entry(rq: main_entryq, entry);
917 }
918
919 return thread;
920}
921
922
923static thread_t
924sched_group_dequeue_thread(
925 entry_queue_t main_entryq,
926 sched_group_t group)
927{
928 group_runq_t group_runq = &group->runq;
929 boolean_t pri_level_empty = FALSE;
930 thread_t thread;
931 integer_t thread_pri;
932
933 thread = group_run_queue_dequeue_thread(rq: group_runq, thread_pri: &thread_pri, queue_empty: &pri_level_empty);
934
935 thread_clear_runq(thread);
936
937 if (pri_level_empty) {
938 entry_queue_remove_entry(rq: main_entryq, entry: group_entry_for_pri(group, pri: thread_pri));
939 }
940
941 return thread;
942}
943
944static void
945sched_group_remove_thread(
946 entry_queue_t main_entryq,
947 sched_group_t group,
948 thread_t thread)
949{
950 integer_t thread_pri = thread->sched_pri;
951 sched_entry_t sched_entry = group_entry_for_pri(group, pri: thread_pri);
952
953#if defined(MULTIQ_SANITY_CHECK)
954 if (multiq_sanity_check) {
955 global_check_entry_queue(main_entryq);
956 group_check_run_queue(main_entryq, group);
957
958 sched_group_check_thread(group, thread);
959 entry_queue_check_entry(main_entryq, sched_entry, thread_pri);
960 }
961#endif
962
963 boolean_t pri_level_empty = group_run_queue_remove_thread(rq: &group->runq, thread, thread_pri);
964
965 if (pri_level_empty) {
966 entry_queue_remove_entry(rq: main_entryq, entry: sched_entry);
967 }
968
969#if defined(MULTIQ_SANITY_CHECK)
970 if (multiq_sanity_check) {
971 global_check_entry_queue(main_entryq);
972 group_check_run_queue(main_entryq, group);
973 }
974#endif
975}
976
977static void
978sched_group_enqueue_thread(
979 entry_queue_t main_entryq,
980 sched_group_t group,
981 thread_t thread,
982 integer_t options)
983{
984#if defined(MULTIQ_SANITY_CHECK)
985 if (multiq_sanity_check) {
986 global_check_entry_queue(main_entryq);
987 group_check_run_queue(main_entryq, group);
988 }
989#endif
990
991 int sched_pri = thread->sched_pri;
992
993 boolean_t pri_level_was_empty = group_run_queue_enqueue_thread(rq: &group->runq, thread, thread_pri: sched_pri, options);
994
995 if (pri_level_was_empty) {
996 /*
997 * TODO: Need to figure out if passing options here is a good idea or not
998 * What effects would it have?
999 */
1000 entry_queue_enqueue_entry(rq: main_entryq, entry: &group->entries[sched_pri], options);
1001 } else if (options & SCHED_HEADQ) {
1002 /* The thread should be at the head of the line - move its entry to the front */
1003 entry_queue_change_entry(rq: main_entryq, entry: &group->entries[sched_pri], options);
1004 }
1005}
1006
1007/*
1008 * Locate a thread to execute from the run queue and return it.
1009 * Only choose a thread with greater or equal priority.
1010 *
1011 * pset is locked, thread is not locked.
1012 *
1013 * Returns THREAD_NULL if it cannot find a valid thread.
1014 *
1015 * Note: we cannot rely on the value of thread->sched_pri in this path because
1016 * we don't have the thread locked.
1017 *
1018 * TODO: Remove tracepoints
1019 */
1020static thread_t
1021sched_multiq_choose_thread(
1022 processor_t processor,
1023 int priority,
1024 ast_t reason)
1025{
1026 entry_queue_t main_entryq = multiq_main_entryq(processor);
1027 run_queue_t bound_runq = multiq_bound_runq(processor);
1028
1029 boolean_t choose_bound_runq = FALSE;
1030
1031 if (bound_runq->highq < priority &&
1032 main_entryq->highq < priority) {
1033 return THREAD_NULL;
1034 }
1035
1036 if (bound_runq->count && main_entryq->count) {
1037 if (bound_runq->highq >= main_entryq->highq) {
1038 choose_bound_runq = TRUE;
1039 } else {
1040 /* Use main runq */
1041 }
1042 } else if (bound_runq->count) {
1043 choose_bound_runq = TRUE;
1044 } else if (main_entryq->count) {
1045 /* Use main runq */
1046 } else {
1047 return THREAD_NULL;
1048 }
1049
1050 if (choose_bound_runq) {
1051 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1052 MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) | DBG_FUNC_NONE,
1053 MACH_MULTIQ_BOUND, main_entryq->highq, bound_runq->highq, 0, 0);
1054
1055 return run_queue_dequeue(runq: bound_runq, options: SCHED_HEADQ);
1056 }
1057
1058 sched_group_t group = current_thread()->sched_group;
1059
1060#if defined(MULTIQ_SANITY_CHECK)
1061 if (multiq_sanity_check) {
1062 global_check_entry_queue(main_entryq);
1063 group_check_run_queue(main_entryq, group);
1064 }
1065#endif
1066
1067 /*
1068 * Determine if we should look at the group or the global queue
1069 *
1070 * TODO:
1071 * Perhaps pass reason as a 'should look inside' argument to choose_thread
1072 * Should YIELD AST override drain limit?
1073 */
1074 if (group->runq.count != 0 && (reason & AST_PREEMPTION) == 0) {
1075 boolean_t favor_group = TRUE;
1076
1077 integer_t global_pri = main_entryq->highq;
1078 integer_t group_pri = group->runq.highq;
1079
1080 /*
1081 * Favor the current group if the group is still the globally highest.
1082 *
1083 * Otherwise, consider choosing a thread from the current group
1084 * even if it's lower priority than the global highest priority.
1085 */
1086 if (global_pri > group_pri) {
1087 /*
1088 * If there's something elsewhere above the depth limit,
1089 * don't pick a thread below the limit.
1090 */
1091 if (global_pri > drain_depth_limit && group_pri <= drain_depth_limit) {
1092 favor_group = FALSE;
1093 }
1094
1095 /*
1096 * If there's something at or above the ceiling,
1097 * don't favor the group.
1098 */
1099 if (global_pri >= drain_ceiling) {
1100 favor_group = FALSE;
1101 }
1102
1103 /*
1104 * Don't go more than X steps below the global highest
1105 */
1106 if ((global_pri - group_pri) >= drain_band_limit) {
1107 favor_group = FALSE;
1108 }
1109 }
1110
1111 if (favor_group) {
1112 /* Pull from local runq */
1113 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1114 MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) | DBG_FUNC_NONE,
1115 MACH_MULTIQ_GROUP, global_pri, group_pri, 0, 0);
1116
1117 return sched_group_dequeue_thread(main_entryq, group);
1118 }
1119 }
1120
1121 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1122 MACHDBG_CODE(DBG_MACH_SCHED, MACH_MULTIQ_DEQUEUE) | DBG_FUNC_NONE,
1123 MACH_MULTIQ_GLOBAL, main_entryq->highq, group->runq.highq, 0, 0);
1124
1125 /* Couldn't pull from local runq, pull from global runq instead */
1126 if (deep_drain) {
1127 return sched_global_deep_drain_dequeue_thread(main_entryq);
1128 } else {
1129 return sched_global_dequeue_thread(main_entryq);
1130 }
1131}
1132
1133
1134/*
1135 * Thread must be locked, and not already be on a run queue.
1136 * pset is locked.
1137 */
1138static boolean_t
1139sched_multiq_processor_enqueue(
1140 processor_t processor,
1141 thread_t thread,
1142 sched_options_t options)
1143{
1144 boolean_t result;
1145
1146 assert(processor == thread->chosen_processor);
1147
1148 if (thread->bound_processor != PROCESSOR_NULL) {
1149 assert(thread->bound_processor == processor);
1150
1151 result = run_queue_enqueue(runq: multiq_bound_runq(processor), thread, options);
1152 thread_set_runq_locked(thread, new_runq: processor);
1153
1154 return result;
1155 }
1156
1157 sched_group_enqueue_thread(main_entryq: multiq_main_entryq(processor),
1158 group: thread->sched_group,
1159 thread, options);
1160
1161 thread_set_runq_locked(thread, new_runq: processor);
1162
1163 return FALSE;
1164}
1165
1166/*
1167 * Called in the context of thread with thread and pset unlocked,
1168 * after updating thread priority but before propagating that priority
1169 * to the processor
1170 */
1171void
1172sched_multiq_quantum_expire(thread_t thread)
1173{
1174 if (deep_drain) {
1175 /*
1176 * Move the entry at this priority to the end of the queue,
1177 * to allow the next task a shot at running.
1178 */
1179
1180 processor_t processor = thread->last_processor;
1181 processor_set_t pset = processor->processor_set;
1182 entry_queue_t entryq = multiq_main_entryq(processor);
1183
1184 pset_lock(pset);
1185
1186 sched_entry_t entry = group_entry_for_pri(group: thread->sched_group, pri: processor->current_pri);
1187
1188 if (entry->runq == MULTIQ_ERUNQ) {
1189 entry_queue_change_entry(rq: entryq, entry, options: SCHED_TAILQ);
1190 }
1191
1192 pset_unlock(pset);
1193 }
1194}
1195
1196static boolean_t
1197sched_multiq_processor_queue_empty(processor_t processor)
1198{
1199 return multiq_main_entryq(processor)->count == 0 &&
1200 multiq_bound_runq(processor)->count == 0;
1201}
1202
1203static ast_t
1204sched_multiq_processor_csw_check(processor_t processor)
1205{
1206 boolean_t has_higher;
1207 int pri;
1208
1209 if (sched_multiq_thread_avoid_processor(processor, thread: current_thread(), AST_NONE)) {
1210 return AST_PREEMPT | AST_URGENT;
1211 }
1212
1213 entry_queue_t main_entryq = multiq_main_entryq(processor);
1214 run_queue_t bound_runq = multiq_bound_runq(processor);
1215
1216 assert(processor->active_thread != NULL);
1217
1218 pri = MAX(main_entryq->highq, bound_runq->highq);
1219
1220 if (processor->first_timeslice) {
1221 has_higher = (pri > processor->current_pri);
1222 } else {
1223 has_higher = (pri >= processor->current_pri);
1224 }
1225
1226 if (has_higher) {
1227 if (main_entryq->urgency > 0) {
1228 return AST_PREEMPT | AST_URGENT;
1229 }
1230
1231 if (bound_runq->urgency > 0) {
1232 return AST_PREEMPT | AST_URGENT;
1233 }
1234
1235 return AST_PREEMPT;
1236 }
1237
1238 return AST_NONE;
1239}
1240
1241static boolean_t
1242sched_multiq_processor_queue_has_priority(
1243 processor_t processor,
1244 int priority,
1245 boolean_t gte)
1246{
1247 run_queue_t main_runq = multiq_main_entryq(processor);
1248 run_queue_t bound_runq = multiq_bound_runq(processor);
1249
1250 int qpri = MAX(main_runq->highq, bound_runq->highq);
1251
1252 if (gte) {
1253 return qpri >= priority;
1254 } else {
1255 return qpri > priority;
1256 }
1257}
1258
1259static int
1260sched_multiq_runq_count(processor_t processor)
1261{
1262 /*
1263 * TODO: Decide whether to keep a count of runnable threads in the pset
1264 * or just return something less than the true count.
1265 *
1266 * This needs to be fast, so no iterating the whole runq.
1267 *
1268 * Another possible decision is to remove this - with global runq
1269 * it doesn't make much sense.
1270 */
1271 return multiq_main_entryq(processor)->count + multiq_bound_runq(processor)->count;
1272}
1273
1274static uint64_t
1275sched_multiq_runq_stats_count_sum(processor_t processor)
1276{
1277 /*
1278 * TODO: This one does need to go through all the runqueues, but it's only needed for
1279 * the sched stats tool
1280 */
1281
1282 uint64_t bound_sum = multiq_bound_runq(processor)->runq_stats.count_sum;
1283
1284 if (processor->cpu_id == processor->processor_set->cpu_set_low) {
1285 return bound_sum + multiq_main_entryq(processor)->runq_stats.count_sum;
1286 } else {
1287 return bound_sum;
1288 }
1289}
1290
1291static int
1292sched_multiq_processor_bound_count(processor_t processor)
1293{
1294 return multiq_bound_runq(processor)->count;
1295}
1296
1297static void
1298sched_multiq_processor_queue_shutdown(processor_t processor)
1299{
1300 processor_set_t pset = processor->processor_set;
1301 entry_queue_t main_entryq = multiq_main_entryq(processor);
1302 thread_t thread;
1303 queue_head_t tqueue;
1304
1305 /* We only need to migrate threads if this is the last active processor in the pset */
1306 if (pset->online_processor_count > 0) {
1307 pset_unlock(pset);
1308 return;
1309 }
1310
1311 queue_init(&tqueue);
1312
1313 /* Note that we do not remove bound threads from the queues here */
1314
1315 while (main_entryq->count > 0) {
1316 thread = sched_global_dequeue_thread(main_entryq);
1317 enqueue_tail(que: &tqueue, elt: &thread->runq_links);
1318 }
1319
1320 pset_unlock(pset);
1321
1322 qe_foreach_element_safe(thread, &tqueue, runq_links) {
1323 remqueue(elt: &thread->runq_links);
1324
1325 thread_lock(thread);
1326
1327 thread_setrun(thread, options: SCHED_TAILQ);
1328
1329 thread_unlock(thread);
1330 }
1331}
1332
1333/*
1334 * Thread is locked
1335 *
1336 * This is why we can never read sched_pri unless we have the thread locked.
1337 * Which we do in the enqueue and remove cases, but not the dequeue case.
1338 */
1339static boolean_t
1340sched_multiq_processor_queue_remove(
1341 processor_t processor,
1342 thread_t thread)
1343{
1344 boolean_t removed = FALSE;
1345 processor_set_t pset = processor->processor_set;
1346
1347 pset_lock(pset);
1348
1349 if (thread_get_runq_locked(thread) != PROCESSOR_NULL) {
1350 /*
1351 * Thread is on a run queue and we have a lock on
1352 * that run queue.
1353 */
1354
1355 thread_assert_runq_nonnull(thread);
1356
1357 if (thread->bound_processor != PROCESSOR_NULL) {
1358 assert(processor == thread->bound_processor);
1359 run_queue_remove(runq: multiq_bound_runq(processor), thread);
1360 thread_clear_runq_locked(thread);
1361 } else {
1362 sched_group_remove_thread(main_entryq: multiq_main_entryq(processor),
1363 group: thread->sched_group,
1364 thread);
1365 }
1366
1367 removed = TRUE;
1368 }
1369
1370 pset_unlock(pset);
1371
1372 return removed;
1373}
1374
1375/* pset is locked, returned unlocked */
1376static thread_t
1377sched_multiq_steal_thread(processor_set_t pset)
1378{
1379 pset_unlock(pset);
1380 return THREAD_NULL;
1381}
1382
1383/*
1384 * Scan the global queue for candidate groups, and scan those groups for
1385 * candidate threads.
1386 *
1387 * TODO: This iterates every group runq in its entirety for each entry it has in the runq, which is O(N^2)
1388 * Instead, iterate only the queue in the group runq matching the priority of the entry.
1389 *
1390 * Returns TRUE if retry is needed.
1391 */
1392static boolean_t
1393group_scan(entry_queue_t runq, sched_update_scan_context_t scan_context)
1394{
1395 int count = runq->count;
1396 int queue_index;
1397
1398 assert(count >= 0);
1399
1400 if (count == 0) {
1401 return FALSE;
1402 }
1403
1404 for (queue_index = bitmap_first(map: runq->bitmap, NRQS);
1405 queue_index >= 0;
1406 queue_index = bitmap_next(map: runq->bitmap, prev: queue_index)) {
1407 sched_entry_t entry;
1408
1409 cqe_foreach_element(entry, &runq->queues[queue_index], entry_links) {
1410 assert(count > 0);
1411
1412 sched_group_t group = group_for_entry(entry);
1413 if (group->runq.count > 0) {
1414 if (runq_scan(runq: &group->runq, scan_context)) {
1415 return TRUE;
1416 }
1417 }
1418 count--;
1419 }
1420 }
1421
1422 return FALSE;
1423}
1424
1425static void
1426sched_multiq_thread_update_scan(sched_update_scan_context_t scan_context)
1427{
1428 boolean_t restart_needed = FALSE;
1429 processor_t processor = processor_list;
1430 processor_set_t pset;
1431 thread_t thread;
1432 spl_t s;
1433
1434 /*
1435 * We update the threads associated with each processor (bound and idle threads)
1436 * and then update the threads in each pset runqueue.
1437 */
1438
1439 do {
1440 do {
1441 pset = processor->processor_set;
1442
1443 s = splsched();
1444 pset_lock(pset);
1445
1446 restart_needed = runq_scan(runq: multiq_bound_runq(processor), scan_context);
1447
1448 pset_unlock(pset);
1449 splx(s);
1450
1451 if (restart_needed) {
1452 break;
1453 }
1454
1455 thread = processor->idle_thread;
1456 if (thread != THREAD_NULL && thread->sched_stamp != sched_tick) {
1457 if (thread_update_add_thread(thread) == FALSE) {
1458 restart_needed = TRUE;
1459 break;
1460 }
1461 }
1462 } while ((processor = processor->processor_list) != NULL);
1463
1464 /* Ok, we now have a collection of candidates -- fix them. */
1465 thread_update_process_threads();
1466 } while (restart_needed);
1467
1468 pset = &pset0;
1469
1470 do {
1471 do {
1472 s = splsched();
1473 pset_lock(pset);
1474
1475 restart_needed = group_scan(runq: &pset->pset_runq, scan_context);
1476
1477 pset_unlock(pset);
1478 splx(s);
1479
1480 if (restart_needed) {
1481 break;
1482 }
1483 } while ((pset = pset->pset_list) != NULL);
1484
1485 /* Ok, we now have a collection of candidates -- fix them. */
1486 thread_update_process_threads();
1487 } while (restart_needed);
1488}
1489
1490extern int sched_allow_rt_smt;
1491
1492/* Return true if this thread should not continue running on this processor */
1493static bool
1494sched_multiq_thread_avoid_processor(processor_t processor, thread_t thread, __unused ast_t reason)
1495{
1496 if (processor->processor_primary != processor) {
1497 /*
1498 * This is a secondary SMT processor. If the primary is running
1499 * a realtime thread, only allow realtime threads on the secondary.
1500 */
1501 if ((processor->processor_primary->current_pri >= BASEPRI_RTQUEUES) && ((thread->sched_pri < BASEPRI_RTQUEUES) || !sched_allow_rt_smt)) {
1502 return true;
1503 }
1504 }
1505
1506 return false;
1507}
1508