1/*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <mach/mach_types.h>
30#include <mach/thread_act_server.h>
31
32#include <kern/kern_types.h>
33#include <kern/processor.h>
34#include <kern/thread.h>
35#include <kern/affinity.h>
36#include <kern/work_interval.h>
37#include <mach/task_policy.h>
38#include <kern/sfi.h>
39#include <kern/policy_internal.h>
40#include <sys/errno.h>
41#include <sys/ulock.h>
42
43#include <mach/machine/sdt.h>
44
45static KALLOC_TYPE_DEFINE(thread_qos_override_zone,
46 struct thread_qos_override, KT_DEFAULT);
47
48#ifdef MACH_BSD
49extern int proc_selfpid(void);
50extern char * proc_name_address(void *p);
51extern void rethrottle_thread(void * uthread);
52#endif /* MACH_BSD */
53
54#define QOS_EXTRACT(q) ((q) & 0xff)
55
56#define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
57#define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
58#define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
59#define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
60
61TUNABLE(uint32_t, qos_override_mode, "qos_override_mode",
62 QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE);
63
64static void
65proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
66
67const int thread_default_iotier_override = THROTTLE_LEVEL_END;
68
69const struct thread_requested_policy default_thread_requested_policy = {
70 .thrp_iotier_kevent_override = thread_default_iotier_override
71};
72
73/*
74 * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
75 * to threads that don't have a QoS class set.
76 */
77const qos_policy_params_t thread_qos_policy_params = {
78 /*
79 * This table defines the starting base priority of the thread,
80 * which will be modified by the thread importance and the task max priority
81 * before being applied.
82 */
83 .qos_pri[THREAD_QOS_UNSPECIFIED] = 0, /* not consulted */
84 .qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, /* i.e. 46 */
85 .qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED,
86 .qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT,
87 .qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY,
88 .qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE,
89 .qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE,
90
91 /*
92 * This table defines the highest IO priority that a thread marked with this
93 * QoS class can have.
94 */
95 .qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0,
96 .qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0,
97 .qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0,
98 .qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0,
99 .qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1,
100 .qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
101 .qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3,
102
103 /*
104 * This table defines the highest QoS level that
105 * a thread marked with this QoS class can have.
106 */
107
108 .qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
109 .qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
110 .qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
111 .qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
112 .qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
113 .qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
114 .qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
115
116 .qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
117 .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0),
118 .qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
119 .qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
120 .qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
121 .qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
122 .qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
123};
124
125static void
126thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
127
128static int
129thread_qos_scaled_relative_priority(int qos, int qos_relprio);
130
131static void
132proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
133
134static void
135proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
136
137static void
138proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
139
140static void
141thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
142
143static int
144thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
145
146static int
147proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
148
149static void
150thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
151
152static void
153thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
154
155boolean_t
156thread_has_qos_policy(thread_t thread)
157{
158 return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
159}
160
161
162static void
163thread_remove_qos_policy_locked(thread_t thread,
164 task_pend_token_t pend_token)
165{
166 __unused int prev_qos = thread->requested_policy.thrp_qos;
167
168 DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
169
170 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
171 THREAD_QOS_UNSPECIFIED, value2: 0, pend_token);
172}
173
174kern_return_t
175thread_remove_qos_policy(thread_t thread)
176{
177 struct task_pend_token pend_token = {};
178
179 thread_mtx_lock(thread);
180 if (!thread->active) {
181 thread_mtx_unlock(thread);
182 return KERN_TERMINATED;
183 }
184
185 thread_remove_qos_policy_locked(thread, pend_token: &pend_token);
186
187 thread_mtx_unlock(thread);
188
189 thread_policy_update_complete_unlocked(task: thread, pend_token: &pend_token);
190
191 return KERN_SUCCESS;
192}
193
194
195boolean_t
196thread_is_static_param(thread_t thread)
197{
198 if (thread->static_param) {
199 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
200 return TRUE;
201 }
202 return FALSE;
203}
204
205/*
206 * Relative priorities can range between 0REL and -15REL. These
207 * map to QoS-specific ranges, to create non-overlapping priority
208 * ranges.
209 */
210static int
211thread_qos_scaled_relative_priority(int qos, int qos_relprio)
212{
213 int next_lower_qos;
214
215 /* Fast path, since no validation or scaling is needed */
216 if (qos_relprio == 0) {
217 return 0;
218 }
219
220 switch (qos) {
221 case THREAD_QOS_USER_INTERACTIVE:
222 next_lower_qos = THREAD_QOS_USER_INITIATED;
223 break;
224 case THREAD_QOS_USER_INITIATED:
225 next_lower_qos = THREAD_QOS_LEGACY;
226 break;
227 case THREAD_QOS_LEGACY:
228 next_lower_qos = THREAD_QOS_UTILITY;
229 break;
230 case THREAD_QOS_UTILITY:
231 next_lower_qos = THREAD_QOS_BACKGROUND;
232 break;
233 case THREAD_QOS_MAINTENANCE:
234 case THREAD_QOS_BACKGROUND:
235 next_lower_qos = 0;
236 break;
237 default:
238 panic("Unrecognized QoS %d", qos);
239 return 0;
240 }
241
242 int prio_range_max = thread_qos_policy_params.qos_pri[qos];
243 int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
244
245 /*
246 * We now have the valid range that the scaled relative priority can map to. Note
247 * that the lower bound is exclusive, but the upper bound is inclusive. If the
248 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
249 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
250 * remainder.
251 */
252 int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
253
254 return scaled_relprio;
255}
256
257/*
258 * flag set by -qos-policy-allow boot-arg to allow
259 * testing thread qos policy from userspace
260 */
261static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
262
263kern_return_t
264thread_policy_set(
265 thread_t thread,
266 thread_policy_flavor_t flavor,
267 thread_policy_t policy_info,
268 mach_msg_type_number_t count)
269{
270 thread_qos_policy_data_t req_qos;
271 kern_return_t kr;
272
273 req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
274
275 if (thread == THREAD_NULL) {
276 return KERN_INVALID_ARGUMENT;
277 }
278
279 if (!allow_qos_policy_set) {
280 if (thread_is_static_param(thread)) {
281 return KERN_POLICY_STATIC;
282 }
283
284 if (flavor == THREAD_QOS_POLICY) {
285 return KERN_INVALID_ARGUMENT;
286 }
287
288 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
289 if (count < THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT) {
290 return KERN_INVALID_ARGUMENT;
291 }
292 thread_time_constraint_with_priority_policy_t info = (thread_time_constraint_with_priority_policy_t)policy_info;
293 if (info->priority != BASEPRI_RTQUEUES) {
294 return KERN_INVALID_ARGUMENT;
295 }
296 }
297 }
298
299 if (flavor == THREAD_TIME_CONSTRAINT_POLICY || flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
300 thread_work_interval_flags_t th_wi_flags = os_atomic_load(
301 &thread->th_work_interval_flags, relaxed);
302 if ((th_wi_flags & TH_WORK_INTERVAL_FLAGS_HAS_WORKLOAD_ID) &&
303 !(th_wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED)) {
304 /* Fail requests to become realtime for threads having joined workintervals
305 * with workload ID that don't have the rt-allowed flag. */
306 return KERN_INVALID_POLICY;
307 }
308 }
309
310 /* Threads without static_param set reset their QoS when other policies are applied. */
311 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
312 /* Store the existing tier, if we fail this call it is used to reset back. */
313 req_qos.qos_tier = thread->requested_policy.thrp_qos;
314 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
315
316 kr = thread_remove_qos_policy(thread);
317 if (kr != KERN_SUCCESS) {
318 return kr;
319 }
320 }
321
322 kr = thread_policy_set_internal(thread, flavor, policy_info, count);
323
324 if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
325 if (kr != KERN_SUCCESS) {
326 /* Reset back to our original tier as the set failed. */
327 (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, policy_info: (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
328 }
329 }
330
331 return kr;
332}
333
334static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, period) == offsetof(thread_time_constraint_policy_data_t, period));
335static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, computation) == offsetof(thread_time_constraint_policy_data_t, computation));
336static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, constraint) == offsetof(thread_time_constraint_policy_data_t, constraint));
337static_assert(offsetof(thread_time_constraint_with_priority_policy_data_t, preemptible) == offsetof(thread_time_constraint_policy_data_t, preemptible));
338
339kern_return_t
340thread_policy_set_internal(
341 thread_t thread,
342 thread_policy_flavor_t flavor,
343 thread_policy_t policy_info,
344 mach_msg_type_number_t count)
345{
346 kern_return_t result = KERN_SUCCESS;
347 struct task_pend_token pend_token = {};
348
349 thread_mtx_lock(thread);
350 if (!thread->active) {
351 thread_mtx_unlock(thread);
352
353 return KERN_TERMINATED;
354 }
355
356 switch (flavor) {
357 case THREAD_EXTENDED_POLICY:
358 {
359 boolean_t timeshare = TRUE;
360
361 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
362 thread_extended_policy_t info;
363
364 info = (thread_extended_policy_t)policy_info;
365 timeshare = info->timeshare;
366 }
367
368 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
369
370 spl_t s = splsched();
371 thread_lock(thread);
372
373 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
374
375 thread_unlock(thread);
376 splx(s);
377
378 /*
379 * The thread may be demoted with RT_DISALLOWED but has just
380 * changed its sched mode to TIMESHARE or FIXED. Make sure to
381 * undemote the thread so the new sched mode takes effect.
382 */
383 thread_rt_evaluate(thread);
384
385 pend_token.tpt_update_thread_sfi = 1;
386
387 break;
388 }
389
390 case THREAD_TIME_CONSTRAINT_POLICY:
391 case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
392 {
393 thread_time_constraint_with_priority_policy_t info;
394
395 mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
396 THREAD_TIME_CONSTRAINT_POLICY_COUNT :
397 THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
398
399 if (count < min_count) {
400 result = KERN_INVALID_ARGUMENT;
401 break;
402 }
403
404 info = (thread_time_constraint_with_priority_policy_t)policy_info;
405
406
407 if (info->constraint < info->computation ||
408 info->computation > max_rt_quantum ||
409 info->computation < min_rt_quantum) {
410 result = KERN_INVALID_ARGUMENT;
411 break;
412 }
413
414 if (info->computation < (info->constraint / 2)) {
415 info->computation = (info->constraint / 2);
416 if (info->computation > max_rt_quantum) {
417 info->computation = max_rt_quantum;
418 }
419 }
420
421 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
422 if ((info->priority < BASEPRI_RTQUEUES) || (info->priority > MAXPRI)) {
423 result = KERN_INVALID_ARGUMENT;
424 break;
425 }
426 }
427
428 spl_t s = splsched();
429 thread_lock(thread);
430
431 thread->realtime.period = info->period;
432 thread->realtime.computation = info->computation;
433 thread->realtime.constraint = info->constraint;
434 thread->realtime.preemptible = info->preemptible;
435
436 /*
437 * If the thread has a work interval driven policy, the priority
438 * offset has been set by the work interval.
439 */
440 if (!thread->requested_policy.thrp_wi_driven) {
441 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
442 thread->realtime.priority_offset = (uint8_t)(info->priority - BASEPRI_RTQUEUES);
443 } else {
444 thread->realtime.priority_offset = 0;
445 }
446 }
447
448 thread_set_user_sched_mode_and_recompute_pri(thread, mode: TH_MODE_REALTIME);
449
450 thread_unlock(thread);
451 splx(s);
452
453 thread_rt_evaluate(thread);
454
455 pend_token.tpt_update_thread_sfi = 1;
456
457 break;
458 }
459
460 case THREAD_PRECEDENCE_POLICY:
461 {
462 thread_precedence_policy_t info;
463
464 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
465 result = KERN_INVALID_ARGUMENT;
466 break;
467 }
468 info = (thread_precedence_policy_t)policy_info;
469
470 spl_t s = splsched();
471 thread_lock(thread);
472
473 thread->importance = info->importance;
474
475 thread_recompute_priority(thread);
476
477 thread_unlock(thread);
478 splx(s);
479
480 break;
481 }
482
483 case THREAD_AFFINITY_POLICY:
484 {
485 extern boolean_t affinity_sets_enabled;
486 thread_affinity_policy_t info;
487
488 if (!affinity_sets_enabled) {
489 result = KERN_INVALID_POLICY;
490 break;
491 }
492
493 if (!thread_affinity_is_supported()) {
494 result = KERN_NOT_SUPPORTED;
495 break;
496 }
497 if (count < THREAD_AFFINITY_POLICY_COUNT) {
498 result = KERN_INVALID_ARGUMENT;
499 break;
500 }
501
502 info = (thread_affinity_policy_t) policy_info;
503 /*
504 * Unlock the thread mutex here and
505 * return directly after calling thread_affinity_set().
506 * This is necessary for correct lock ordering because
507 * thread_affinity_set() takes the task lock.
508 */
509 thread_mtx_unlock(thread);
510 return thread_affinity_set(thread, tag: info->affinity_tag);
511 }
512
513#if !defined(XNU_TARGET_OS_OSX)
514 case THREAD_BACKGROUND_POLICY:
515 {
516 thread_background_policy_t info;
517
518 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
519 result = KERN_INVALID_ARGUMENT;
520 break;
521 }
522
523 if (get_threadtask(thread) != current_task()) {
524 result = KERN_PROTECTION_FAILURE;
525 break;
526 }
527
528 info = (thread_background_policy_t) policy_info;
529
530 int enable;
531
532 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
533 enable = TASK_POLICY_ENABLE;
534 } else {
535 enable = TASK_POLICY_DISABLE;
536 }
537
538 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
539
540 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
541
542 break;
543 }
544#endif /* !defined(XNU_TARGET_OS_OSX) */
545
546 case THREAD_THROUGHPUT_QOS_POLICY:
547 {
548 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
549 thread_throughput_qos_t tqos;
550
551 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
552 result = KERN_INVALID_ARGUMENT;
553 break;
554 }
555
556 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
557 break;
558 }
559
560 tqos = qos_extract(info->thread_throughput_qos_tier);
561
562 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
563 TASK_POLICY_THROUGH_QOS, value: tqos, value2: 0, pend_token: &pend_token);
564
565 break;
566 }
567
568 case THREAD_LATENCY_QOS_POLICY:
569 {
570 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
571 thread_latency_qos_t lqos;
572
573 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
574 result = KERN_INVALID_ARGUMENT;
575 break;
576 }
577
578 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
579 break;
580 }
581
582 lqos = qos_extract(info->thread_latency_qos_tier);
583
584 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
585 TASK_POLICY_LATENCY_QOS, value: lqos, value2: 0, pend_token: &pend_token);
586
587 break;
588 }
589
590 case THREAD_QOS_POLICY:
591 {
592 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
593
594 if (count < THREAD_QOS_POLICY_COUNT) {
595 result = KERN_INVALID_ARGUMENT;
596 break;
597 }
598
599 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
600 result = KERN_INVALID_ARGUMENT;
601 break;
602 }
603
604 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
605 result = KERN_INVALID_ARGUMENT;
606 break;
607 }
608
609 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
610 result = KERN_INVALID_ARGUMENT;
611 break;
612 }
613
614 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
615 value: info->qos_tier, value2: -info->tier_importance, pend_token: &pend_token);
616
617 break;
618 }
619
620 default:
621 result = KERN_INVALID_ARGUMENT;
622 break;
623 }
624
625 thread_mtx_unlock(thread);
626
627 thread_policy_update_complete_unlocked(task: thread, pend_token: &pend_token);
628
629 return result;
630}
631
632/*
633 * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
634 * Both result in FIXED mode scheduling.
635 */
636static sched_mode_t
637convert_policy_to_sched_mode(integer_t policy)
638{
639 switch (policy) {
640 case POLICY_TIMESHARE:
641 return TH_MODE_TIMESHARE;
642 case POLICY_RR:
643 case POLICY_FIFO:
644 return TH_MODE_FIXED;
645 default:
646 panic("unexpected sched policy: %d", policy);
647 return TH_MODE_NONE;
648 }
649}
650
651/*
652 * Called either with the thread mutex locked
653 * or from the pthread kext in a 'safe place'.
654 */
655static kern_return_t
656thread_set_mode_and_absolute_pri_internal(thread_t thread,
657 sched_mode_t mode,
658 integer_t priority,
659 task_pend_token_t pend_token)
660{
661 kern_return_t kr = KERN_SUCCESS;
662
663 spl_t s = splsched();
664 thread_lock(thread);
665
666 /* This path isn't allowed to change a thread out of realtime. */
667 if ((thread->sched_mode == TH_MODE_REALTIME) ||
668 (thread->saved_mode == TH_MODE_REALTIME)) {
669 kr = KERN_FAILURE;
670 goto unlock;
671 }
672
673 if (thread->policy_reset) {
674 kr = KERN_SUCCESS;
675 goto unlock;
676 }
677
678 sched_mode_t old_mode = thread->sched_mode;
679 integer_t old_base_pri = thread->base_pri;
680 integer_t old_sched_pri = thread->sched_pri;
681
682 /*
683 * Reverse engineer and apply the correct importance value
684 * from the requested absolute priority value.
685 *
686 * TODO: Store the absolute priority value instead
687 */
688
689 if (priority >= thread->max_priority) {
690 priority = thread->max_priority - thread->task_priority;
691 } else if (priority >= MINPRI_KERNEL) {
692 priority -= MINPRI_KERNEL;
693 } else if (priority >= MINPRI_RESERVED) {
694 priority -= MINPRI_RESERVED;
695 } else {
696 priority -= BASEPRI_DEFAULT;
697 }
698
699 priority += thread->task_priority;
700
701 if (priority > thread->max_priority) {
702 priority = thread->max_priority;
703 } else if (priority < MINPRI) {
704 priority = MINPRI;
705 }
706
707 thread->importance = priority - thread->task_priority;
708
709 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
710
711 if (mode != old_mode) {
712 pend_token->tpt_update_thread_sfi = 1;
713 }
714
715 if (thread->base_pri != old_base_pri ||
716 thread->sched_pri != old_sched_pri) {
717 pend_token->tpt_update_turnstile = 1;
718 }
719
720unlock:
721 thread_unlock(thread);
722 splx(s);
723
724 return kr;
725}
726
727void
728thread_freeze_base_pri(thread_t thread)
729{
730 assert(thread == current_thread());
731
732 spl_t s = splsched();
733 thread_lock(thread);
734
735 assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
736 thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
737
738 thread_unlock(thread);
739 splx(s);
740}
741
742bool
743thread_unfreeze_base_pri(thread_t thread)
744{
745 assert(thread == current_thread());
746 integer_t base_pri;
747 ast_t ast = 0;
748
749 spl_t s = splsched();
750 thread_lock(thread);
751
752 assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
753 thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
754
755 base_pri = thread->req_base_pri;
756 if (base_pri != thread->base_pri) {
757 /*
758 * This function returns "true" if the base pri change
759 * is the most likely cause for the preemption.
760 */
761 sched_set_thread_base_priority(thread, priority: base_pri);
762 ast = ast_peek(AST_PREEMPT);
763 }
764
765 thread_unlock(thread);
766 splx(s);
767
768 return ast != 0;
769}
770
771uint8_t
772thread_workq_pri_for_qos(thread_qos_t qos)
773{
774 assert(qos < THREAD_QOS_LAST);
775 return (uint8_t)thread_qos_policy_params.qos_pri[qos];
776}
777
778thread_qos_t
779thread_workq_qos_for_pri(int priority)
780{
781 thread_qos_t qos;
782 if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
783 // indicate that workq should map >UI threads to workq's
784 // internal notation for above-UI work.
785 return THREAD_QOS_UNSPECIFIED;
786 }
787 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
788 // map a given priority up to the next nearest qos band.
789 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
790 return qos;
791 }
792 }
793 return THREAD_QOS_MAINTENANCE;
794}
795
796/*
797 * private interface for pthread workqueues
798 *
799 * Set scheduling policy & absolute priority for thread
800 * May be called with spinlocks held
801 * Thread mutex lock is not held
802 */
803void
804thread_reset_workq_qos(thread_t thread, uint32_t qos)
805{
806 struct task_pend_token pend_token = {};
807
808 assert(qos < THREAD_QOS_LAST);
809
810 spl_t s = splsched();
811 thread_lock(thread);
812
813 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
814 TASK_POLICY_QOS_AND_RELPRIO, value: qos, value2: 0, pend_token: &pend_token);
815 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
816 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, value2: 0,
817 pend_token: &pend_token);
818
819 assert(pend_token.tpt_update_sockets == 0);
820
821 thread_unlock(thread);
822 splx(s);
823
824 thread_policy_update_complete_unlocked(task: thread, pend_token: &pend_token);
825}
826
827/*
828 * private interface for pthread workqueues
829 *
830 * Set scheduling policy & absolute priority for thread
831 * May be called with spinlocks held
832 * Thread mutex lock is held
833 */
834void
835thread_set_workq_override(thread_t thread, uint32_t qos)
836{
837 struct task_pend_token pend_token = {};
838
839 assert(qos < THREAD_QOS_LAST);
840
841 spl_t s = splsched();
842 thread_lock(thread);
843
844 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
845 TASK_POLICY_QOS_WORKQ_OVERRIDE, value: qos, value2: 0, pend_token: &pend_token);
846
847 assert(pend_token.tpt_update_sockets == 0);
848
849 thread_unlock(thread);
850 splx(s);
851
852 thread_policy_update_complete_unlocked(task: thread, pend_token: &pend_token);
853}
854
855/*
856 * private interface for pthread workqueues
857 *
858 * Set scheduling policy & absolute priority for thread
859 * May be called with spinlocks held
860 * Thread mutex lock is not held
861 */
862void
863thread_set_workq_pri(thread_t thread,
864 thread_qos_t qos,
865 integer_t priority,
866 integer_t policy)
867{
868 struct task_pend_token pend_token = {};
869 sched_mode_t mode = convert_policy_to_sched_mode(policy);
870
871 assert(qos < THREAD_QOS_LAST);
872 assert(thread->static_param);
873
874 if (!thread->static_param || !thread->active) {
875 return;
876 }
877
878 spl_t s = splsched();
879 thread_lock(thread);
880
881 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
882 TASK_POLICY_QOS_AND_RELPRIO, value: qos, value2: 0, pend_token: &pend_token);
883 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
884 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
885 value2: 0, pend_token: &pend_token);
886
887 thread_unlock(thread);
888 splx(s);
889
890 /* Concern: this doesn't hold the mutex... */
891
892 __assert_only kern_return_t kr;
893 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
894 pend_token: &pend_token);
895 assert(kr == KERN_SUCCESS);
896
897 assert(pend_token.tpt_update_sockets == 0);
898
899 thread_policy_update_complete_unlocked(task: thread, pend_token: &pend_token);
900}
901
902/*
903 * thread_set_mode_and_absolute_pri:
904 *
905 * Set scheduling policy & absolute priority for thread, for deprecated
906 * thread_set_policy and thread_policy interfaces.
907 *
908 * Called with nothing locked.
909 */
910kern_return_t
911thread_set_mode_and_absolute_pri(thread_t thread,
912 integer_t policy,
913 integer_t priority)
914{
915 kern_return_t kr = KERN_SUCCESS;
916 struct task_pend_token pend_token = {};
917
918 sched_mode_t mode = convert_policy_to_sched_mode(policy);
919
920 thread_mtx_lock(thread);
921
922 if (!thread->active) {
923 kr = KERN_TERMINATED;
924 goto unlock;
925 }
926
927 if (thread_is_static_param(thread)) {
928 kr = KERN_POLICY_STATIC;
929 goto unlock;
930 }
931
932 /* Setting legacy policies on threads kills the current QoS */
933 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
934 thread_remove_qos_policy_locked(thread, pend_token: &pend_token);
935 }
936
937 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, pend_token: &pend_token);
938
939unlock:
940 thread_mtx_unlock(thread);
941
942 thread_policy_update_complete_unlocked(task: thread, pend_token: &pend_token);
943
944 return kr;
945}
946
947/*
948 * Set the thread's requested mode and recompute priority
949 * Called with thread mutex and thread locked
950 *
951 * TODO: Mitigate potential problems caused by moving thread to end of runq
952 * whenever its priority is recomputed
953 * Only remove when it actually changes? Attempt to re-insert at appropriate location?
954 */
955static void
956thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
957{
958 if (thread->policy_reset) {
959 return;
960 }
961
962 boolean_t removed = thread_run_queue_remove(thread);
963
964 sched_set_thread_mode_user(thread, mode);
965
966 thread_recompute_priority(thread);
967
968 if (removed) {
969 thread_run_queue_reinsert(thread, options: SCHED_TAILQ);
970 }
971}
972
973/* called at splsched with thread lock locked */
974static void
975thread_update_qos_cpu_time_locked(thread_t thread)
976{
977 task_t task = get_threadtask(thread);
978 uint64_t timer_sum, timer_delta;
979
980 /*
981 * This is only as accurate the thread's last context switch or user/kernel
982 * transition (unless precise user/kernel time is disabled).
983 *
984 * TODO: Consider running an update operation here to update it first.
985 * Maybe doable with interrupts disabled from current thread.
986 * If the thread is on a different core, may not be easy to get right.
987 */
988
989 timer_sum = recount_thread_time_mach(thread);
990 timer_delta = timer_sum - thread->vtimer_qos_save;
991
992 thread->vtimer_qos_save = timer_sum;
993
994 uint64_t* task_counter = NULL;
995
996 /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
997 switch (thread->effective_policy.thep_qos) {
998 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
999 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
1000 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
1001 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
1002 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
1003 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
1004 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
1005 default:
1006 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
1007 }
1008
1009 OSAddAtomic64(timer_delta, task_counter);
1010
1011 /* Update the task-level qos stats atomically, because we don't have the task lock. */
1012 switch (thread->requested_policy.thrp_qos) {
1013 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
1014 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
1015 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
1016 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
1017 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
1018 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
1019 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
1020 default:
1021 panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
1022 }
1023
1024 OSAddAtomic64(timer_delta, task_counter);
1025}
1026
1027/*
1028 * called with no thread locks held
1029 * may hold task lock
1030 */
1031void
1032thread_update_qos_cpu_time(thread_t thread)
1033{
1034 thread_mtx_lock(thread);
1035
1036 spl_t s = splsched();
1037 thread_lock(thread);
1038
1039 thread_update_qos_cpu_time_locked(thread);
1040
1041 thread_unlock(thread);
1042 splx(s);
1043
1044 thread_mtx_unlock(thread);
1045}
1046
1047/*
1048 * Calculate base priority from thread attributes, and set it on the thread
1049 *
1050 * Called with thread_lock and thread mutex held.
1051 */
1052void
1053thread_recompute_priority(
1054 thread_t thread)
1055{
1056 integer_t priority;
1057 integer_t adj_priority;
1058 bool wi_priority = false;
1059
1060 if (thread->policy_reset) {
1061 return;
1062 }
1063
1064 if (thread->sched_mode == TH_MODE_REALTIME) {
1065 uint8_t i = thread->realtime.priority_offset;
1066 assert((i >= 0) && (i < NRTQS));
1067 priority = BASEPRI_RTQUEUES + i;
1068
1069 sched_set_thread_base_priority(thread, priority);
1070 if (thread->realtime.deadline == RT_DEADLINE_NONE) {
1071 /* Make sure the thread has a valid deadline */
1072 uint64_t ctime = mach_absolute_time();
1073 thread->realtime.deadline = thread->realtime.constraint + ctime;
1074 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SET_RT_DEADLINE) | DBG_FUNC_NONE,
1075 (uintptr_t)thread_tid(thread), thread->realtime.deadline, thread->realtime.computation, 1);
1076 }
1077 return;
1078
1079 /*
1080 * A thread may have joined a RT work interval but then never
1081 * changed its sched mode or have been demoted. RT work
1082 * intervals will have RT priorities - ignore the priority if
1083 * the thread isn't RT.
1084 */
1085 } else if (thread->effective_policy.thep_wi_driven &&
1086 work_interval_get_priority(thread) < BASEPRI_RTQUEUES) {
1087 priority = work_interval_get_priority(thread);
1088 wi_priority = true;
1089 } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1090 int qos = thread->effective_policy.thep_qos;
1091 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1092 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1093 int qos_scaled_relprio;
1094
1095 assert(qos >= 0 && qos < THREAD_QOS_LAST);
1096 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1097
1098 priority = thread_qos_policy_params.qos_pri[qos];
1099 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1100
1101 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1102 /* Bump priority 46 to 47 when in a frontmost app */
1103 qos_scaled_relprio += 1;
1104 }
1105
1106 /* TODO: factor in renice priority here? */
1107
1108 priority += qos_scaled_relprio;
1109 } else {
1110 if (thread->importance > MAXPRI) {
1111 priority = MAXPRI;
1112 } else if (thread->importance < -MAXPRI) {
1113 priority = -MAXPRI;
1114 } else {
1115 priority = thread->importance;
1116 }
1117
1118 priority += thread->task_priority;
1119 }
1120
1121 /* Boost the priority of threads which are RT demoted. */
1122 if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
1123 priority = MAX(priority, MAXPRI_USER);
1124 }
1125
1126 priority = MAX(priority, thread->user_promotion_basepri);
1127
1128 /*
1129 * Clamp priority back into the allowed range for this task.
1130 * The initial priority value could be out of this range due to:
1131 * Task clamped to BG or Utility (max-pri is 4, or 20)
1132 * Task is user task (max-pri is 63)
1133 * Task is kernel task (max-pri is 95)
1134 * Note that thread->importance is user-settable to any integer
1135 * via THREAD_PRECEDENCE_POLICY.
1136 */
1137 adj_priority = priority;
1138 adj_priority = MIN(adj_priority, thread->max_priority);
1139 adj_priority = MAX(adj_priority, MINPRI);
1140
1141 /* Allow workload driven priorities to exceed max_priority. */
1142 if (wi_priority) {
1143 adj_priority = MAX(adj_priority, priority);
1144 }
1145
1146 /* Allow priority to exceed max_priority for promotions. */
1147 if (thread->effective_policy.thep_promote_above_task) {
1148 adj_priority = MAX(adj_priority, thread->user_promotion_basepri);
1149 }
1150 priority = adj_priority;
1151 assert3u(priority, <=, MAXPRI);
1152
1153 if (thread->saved_mode == TH_MODE_REALTIME &&
1154 sched_thread_mode_has_demotion(thread, TH_SFLAG_FAILSAFE)) {
1155 priority = DEPRESSPRI;
1156 }
1157
1158 if (thread->effective_policy.thep_terminated == TRUE) {
1159 /*
1160 * We temporarily want to override the expected priority to
1161 * ensure that the thread exits in a timely manner.
1162 * Note that this is allowed to exceed thread->max_priority
1163 * so that the thread is no longer clamped to background
1164 * during the final exit phase.
1165 */
1166 if (priority < thread->task_priority) {
1167 priority = thread->task_priority;
1168 }
1169 if (priority < BASEPRI_DEFAULT) {
1170 priority = BASEPRI_DEFAULT;
1171 }
1172 }
1173
1174#if !defined(XNU_TARGET_OS_OSX)
1175 /* No one can have a base priority less than MAXPRI_THROTTLE */
1176 if (priority < MAXPRI_THROTTLE) {
1177 priority = MAXPRI_THROTTLE;
1178 }
1179#endif /* !defined(XNU_TARGET_OS_OSX) */
1180
1181 sched_set_thread_base_priority(thread, priority);
1182}
1183
1184/* Called with the task lock held, but not the thread mutex or spinlock */
1185void
1186thread_policy_update_tasklocked(
1187 thread_t thread,
1188 integer_t priority,
1189 integer_t max_priority,
1190 task_pend_token_t pend_token)
1191{
1192 thread_mtx_lock(thread);
1193
1194 if (!thread->active || thread->policy_reset) {
1195 thread_mtx_unlock(thread);
1196 return;
1197 }
1198
1199 spl_t s = splsched();
1200 thread_lock(thread);
1201
1202 __unused
1203 integer_t old_max_priority = thread->max_priority;
1204
1205 assert(priority >= INT16_MIN && priority <= INT16_MAX);
1206 thread->task_priority = (int16_t)priority;
1207
1208 assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1209 thread->max_priority = (int16_t)max_priority;
1210
1211 /*
1212 * When backgrounding a thread, realtime and fixed priority threads
1213 * should be demoted to timeshare background threads.
1214 *
1215 * TODO: Do this inside the thread policy update routine in order to avoid double
1216 * remove/reinsert for a runnable thread
1217 */
1218 if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1219 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1220 } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1221 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1222 }
1223
1224 thread_policy_update_spinlocked(thread, true, pend_token);
1225
1226 thread_unlock(thread);
1227 splx(s);
1228
1229 thread_mtx_unlock(thread);
1230}
1231
1232/*
1233 * Reset thread to default state in preparation for termination
1234 * Called with thread mutex locked
1235 *
1236 * Always called on current thread, so we don't need a run queue remove
1237 */
1238void
1239thread_policy_reset(
1240 thread_t thread)
1241{
1242 spl_t s;
1243
1244 assert(thread == current_thread());
1245
1246 s = splsched();
1247 thread_lock(thread);
1248
1249 if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1250 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1251 }
1252
1253 if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1254 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1255 }
1256
1257 if (thread->sched_flags & TH_SFLAG_RT_DISALLOWED) {
1258 sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
1259 }
1260
1261 /* At this point, the various demotions should be inactive */
1262 assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1263 assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1264
1265 /* Reset thread back to task-default basepri and mode */
1266 sched_mode_t newmode = SCHED(initial_thread_sched_mode)(get_threadtask(thread));
1267
1268 sched_set_thread_mode(thread, mode: newmode);
1269
1270 thread->importance = 0;
1271
1272 /* Prevent further changes to thread base priority or mode */
1273 thread->policy_reset = 1;
1274
1275 sched_set_thread_base_priority(thread, priority: thread->task_priority);
1276
1277 thread_unlock(thread);
1278 splx(s);
1279}
1280
1281kern_return_t
1282thread_policy_get(
1283 thread_t thread,
1284 thread_policy_flavor_t flavor,
1285 thread_policy_t policy_info,
1286 mach_msg_type_number_t *count,
1287 boolean_t *get_default)
1288{
1289 kern_return_t result = KERN_SUCCESS;
1290
1291 if (thread == THREAD_NULL) {
1292 return KERN_INVALID_ARGUMENT;
1293 }
1294
1295 thread_mtx_lock(thread);
1296 if (!thread->active) {
1297 thread_mtx_unlock(thread);
1298
1299 return KERN_TERMINATED;
1300 }
1301
1302 switch (flavor) {
1303 case THREAD_EXTENDED_POLICY:
1304 {
1305 boolean_t timeshare = TRUE;
1306
1307 if (!(*get_default)) {
1308 spl_t s = splsched();
1309 thread_lock(thread);
1310
1311 if ((thread->sched_mode != TH_MODE_REALTIME) &&
1312 (thread->saved_mode != TH_MODE_REALTIME)) {
1313 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1314 timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1315 } else {
1316 timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1317 }
1318 } else {
1319 *get_default = TRUE;
1320 }
1321
1322 thread_unlock(thread);
1323 splx(s);
1324 }
1325
1326 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1327 thread_extended_policy_t info;
1328
1329 info = (thread_extended_policy_t)policy_info;
1330 info->timeshare = timeshare;
1331 }
1332
1333 break;
1334 }
1335
1336 case THREAD_TIME_CONSTRAINT_POLICY:
1337 case THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY:
1338 {
1339 thread_time_constraint_with_priority_policy_t info;
1340
1341 mach_msg_type_number_t min_count = (flavor == THREAD_TIME_CONSTRAINT_POLICY ?
1342 THREAD_TIME_CONSTRAINT_POLICY_COUNT :
1343 THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY_COUNT);
1344
1345 if (*count < min_count) {
1346 result = KERN_INVALID_ARGUMENT;
1347 break;
1348 }
1349
1350 info = (thread_time_constraint_with_priority_policy_t)policy_info;
1351
1352 if (!(*get_default)) {
1353 spl_t s = splsched();
1354 thread_lock(thread);
1355
1356 if ((thread->sched_mode == TH_MODE_REALTIME) ||
1357 (thread->saved_mode == TH_MODE_REALTIME)) {
1358 info->period = thread->realtime.period;
1359 info->computation = thread->realtime.computation;
1360 info->constraint = thread->realtime.constraint;
1361 info->preemptible = thread->realtime.preemptible;
1362 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1363 info->priority = thread->realtime.priority_offset + BASEPRI_RTQUEUES;
1364 }
1365 } else {
1366 *get_default = TRUE;
1367 }
1368
1369 thread_unlock(thread);
1370 splx(s);
1371 }
1372
1373 if (*get_default) {
1374 info->period = 0;
1375 info->computation = default_timeshare_computation;
1376 info->constraint = default_timeshare_constraint;
1377 info->preemptible = TRUE;
1378 if (flavor == THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY) {
1379 info->priority = BASEPRI_RTQUEUES;
1380 }
1381 }
1382
1383
1384 break;
1385 }
1386
1387 case THREAD_PRECEDENCE_POLICY:
1388 {
1389 thread_precedence_policy_t info;
1390
1391 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1392 result = KERN_INVALID_ARGUMENT;
1393 break;
1394 }
1395
1396 info = (thread_precedence_policy_t)policy_info;
1397
1398 if (!(*get_default)) {
1399 spl_t s = splsched();
1400 thread_lock(thread);
1401
1402 info->importance = thread->importance;
1403
1404 thread_unlock(thread);
1405 splx(s);
1406 } else {
1407 info->importance = 0;
1408 }
1409
1410 break;
1411 }
1412
1413 case THREAD_AFFINITY_POLICY:
1414 {
1415 thread_affinity_policy_t info;
1416
1417 if (!thread_affinity_is_supported()) {
1418 result = KERN_NOT_SUPPORTED;
1419 break;
1420 }
1421 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1422 result = KERN_INVALID_ARGUMENT;
1423 break;
1424 }
1425
1426 info = (thread_affinity_policy_t)policy_info;
1427
1428 if (!(*get_default)) {
1429 info->affinity_tag = thread_affinity_get(thread);
1430 } else {
1431 info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1432 }
1433
1434 break;
1435 }
1436
1437 case THREAD_POLICY_STATE:
1438 {
1439 thread_policy_state_t info;
1440
1441 if (*count < THREAD_POLICY_STATE_COUNT) {
1442 result = KERN_INVALID_ARGUMENT;
1443 break;
1444 }
1445
1446 /* Only root can get this info */
1447 if (!task_is_privileged(task: current_task())) {
1448 result = KERN_PROTECTION_FAILURE;
1449 break;
1450 }
1451
1452 info = (thread_policy_state_t)(void*)policy_info;
1453
1454 if (!(*get_default)) {
1455 info->flags = 0;
1456
1457 spl_t s = splsched();
1458 thread_lock(thread);
1459
1460 info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1461
1462 info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1463 info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1464
1465 info->thps_user_promotions = 0;
1466 info->thps_user_promotion_basepri = thread->user_promotion_basepri;
1467 info->thps_ipc_overrides = thread->kevent_overrides;
1468
1469 proc_get_thread_policy_bitfield(thread, info);
1470
1471 thread_unlock(thread);
1472 splx(s);
1473 } else {
1474 info->requested = 0;
1475 info->effective = 0;
1476 info->pending = 0;
1477 }
1478
1479 break;
1480 }
1481
1482 case THREAD_REQUESTED_STATE_POLICY:
1483 {
1484 if (*count < THREAD_REQUESTED_STATE_POLICY_COUNT) {
1485 result = KERN_INVALID_ARGUMENT;
1486 break;
1487 }
1488
1489 thread_requested_qos_policy_t info = (thread_requested_qos_policy_t) policy_info;
1490 struct thread_requested_policy *req_policy = &thread->requested_policy;
1491
1492 info->thrq_base_qos = req_policy->thrp_qos;
1493 info->thrq_qos_relprio = req_policy->thrp_qos_relprio;
1494 info->thrq_qos_override = req_policy->thrp_qos_override;
1495 info->thrq_qos_promote = req_policy->thrp_qos_promote;
1496 info->thrq_qos_kevent_override = req_policy->thrp_qos_kevent_override;
1497 info->thrq_qos_workq_override = req_policy->thrp_qos_workq_override;
1498 info->thrq_qos_wlsvc_override = req_policy->thrp_qos_wlsvc_override;
1499
1500 break;
1501 }
1502
1503 case THREAD_LATENCY_QOS_POLICY:
1504 {
1505 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1506 thread_latency_qos_t plqos;
1507
1508 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1509 result = KERN_INVALID_ARGUMENT;
1510 break;
1511 }
1512
1513 if (*get_default) {
1514 plqos = 0;
1515 } else {
1516 plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1517 }
1518
1519 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1520 }
1521 break;
1522
1523 case THREAD_THROUGHPUT_QOS_POLICY:
1524 {
1525 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1526 thread_throughput_qos_t ptqos;
1527
1528 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1529 result = KERN_INVALID_ARGUMENT;
1530 break;
1531 }
1532
1533 if (*get_default) {
1534 ptqos = 0;
1535 } else {
1536 ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1537 }
1538
1539 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1540 }
1541 break;
1542
1543 case THREAD_QOS_POLICY:
1544 {
1545 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1546
1547 if (*count < THREAD_QOS_POLICY_COUNT) {
1548 result = KERN_INVALID_ARGUMENT;
1549 break;
1550 }
1551
1552 if (!(*get_default)) {
1553 int relprio_value = 0;
1554 info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1555 TASK_POLICY_QOS_AND_RELPRIO, value2: &relprio_value);
1556
1557 info->tier_importance = -relprio_value;
1558 } else {
1559 info->qos_tier = THREAD_QOS_UNSPECIFIED;
1560 info->tier_importance = 0;
1561 }
1562
1563 break;
1564 }
1565
1566 default:
1567 result = KERN_INVALID_ARGUMENT;
1568 break;
1569 }
1570
1571 thread_mtx_unlock(thread);
1572
1573 return result;
1574}
1575
1576void
1577thread_policy_create(thread_t thread)
1578{
1579 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1580 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1581 thread_tid(thread), theffective_0(thread),
1582 theffective_1(thread), thread->base_pri, 0);
1583
1584 /* We pass a pend token but ignore it */
1585 struct task_pend_token pend_token = {};
1586
1587 thread_policy_update_internal_spinlocked(thread, true, pend_token: &pend_token);
1588
1589 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1590 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1591 thread_tid(thread), theffective_0(thread),
1592 theffective_1(thread), thread->base_pri, 0);
1593}
1594
1595static void
1596thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1597{
1598 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1599 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1600 thread_tid(thread), theffective_0(thread),
1601 theffective_1(thread), thread->base_pri, 0);
1602
1603 thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1604
1605 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1606 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1607 thread_tid(thread), theffective_0(thread),
1608 theffective_1(thread), thread->base_pri, 0);
1609}
1610
1611
1612
1613/*
1614 * One thread state update function TO RULE THEM ALL
1615 *
1616 * This function updates the thread effective policy fields
1617 * and pushes the results to the relevant subsystems.
1618 *
1619 * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1620 */
1621static void
1622thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1623 task_pend_token_t pend_token)
1624{
1625 /*
1626 * Step 1:
1627 * Gather requested policy and effective task state
1628 */
1629
1630 const struct thread_requested_policy requested = thread->requested_policy;
1631 const struct task_effective_policy task_effective = get_threadtask(thread)->effective_policy;
1632
1633 /*
1634 * Step 2:
1635 * Calculate new effective policies from requested policy, task and thread state
1636 * Rules:
1637 * Don't change requested, it won't take effect
1638 */
1639
1640 struct thread_effective_policy next = {};
1641
1642 next.thep_wi_driven = requested.thrp_wi_driven;
1643
1644 next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1645
1646 uint32_t next_qos = requested.thrp_qos;
1647
1648 if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1649 next_qos = MAX(requested.thrp_qos_override, next_qos);
1650 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1651 next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1652 next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1653 next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1654 }
1655
1656 if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1657 requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1658 /*
1659 * This thread is turnstile-boosted higher than the adaptive clamp
1660 * by a synchronous waiter. Allow that to override the adaptive
1661 * clamp temporarily for this thread only.
1662 */
1663 next.thep_promote_above_task = true;
1664 next_qos = requested.thrp_qos_promote;
1665 }
1666
1667 next.thep_qos = next_qos;
1668
1669 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1670 if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1671 if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1672 next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1673 } else {
1674 next.thep_qos = task_effective.tep_qos_clamp;
1675 }
1676 next.thep_wi_driven = 0;
1677 }
1678
1679 /*
1680 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1681 * This allows QoS promotions to work properly even after the process is unclamped.
1682 */
1683 next.thep_qos_promote = next.thep_qos;
1684
1685 /* The ceiling only applies to threads that are in the QoS world */
1686 /* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1687 if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1688 next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1689 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1690 }
1691
1692 /*
1693 * The QoS relative priority is only applicable when the original programmer's
1694 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1695 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1696 * since otherwise it would be lower than unclamped threads. Similarly, in the
1697 * presence of boosting, the programmer doesn't know what other actors
1698 * are boosting the thread.
1699 */
1700 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1701 (requested.thrp_qos == next.thep_qos) &&
1702 (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1703 next.thep_qos_relprio = requested.thrp_qos_relprio;
1704 } else {
1705 next.thep_qos_relprio = 0;
1706 }
1707
1708 /* Calculate DARWIN_BG */
1709 bool wants_darwinbg = false;
1710 bool wants_all_sockets_bg = false; /* Do I want my existing sockets to be bg */
1711
1712 if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1713 wants_darwinbg = true;
1714 }
1715
1716 /*
1717 * If DARWIN_BG has been requested at either level, it's engaged.
1718 * darwinbg threads always create bg sockets,
1719 * but only some types of darwinbg change the sockets
1720 * after they're created
1721 */
1722 if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1723 wants_all_sockets_bg = wants_darwinbg = true;
1724 }
1725
1726 if (requested.thrp_pidbind_bg) {
1727 wants_all_sockets_bg = wants_darwinbg = true;
1728 }
1729
1730 if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1731 next.thep_qos == THREAD_QOS_MAINTENANCE) {
1732 wants_darwinbg = true;
1733 }
1734
1735 /* Calculate side effects of DARWIN_BG */
1736
1737 if (wants_darwinbg) {
1738 next.thep_darwinbg = 1;
1739 next.thep_wi_driven = 0;
1740 }
1741
1742 if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1743 next.thep_new_sockets_bg = 1;
1744 }
1745
1746 /* Don't use task_effective.tep_all_sockets_bg here */
1747 if (wants_all_sockets_bg) {
1748 next.thep_all_sockets_bg = 1;
1749 }
1750
1751 /* darwinbg implies background QOS (or lower) */
1752 if (next.thep_darwinbg &&
1753 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1754 next.thep_qos = THREAD_QOS_BACKGROUND;
1755 next.thep_qos_relprio = 0;
1756 }
1757
1758 /* Calculate IO policy */
1759
1760 int iopol = THROTTLE_LEVEL_TIER0;
1761
1762 /* Factor in the task's IO policy */
1763 if (next.thep_darwinbg) {
1764 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1765 }
1766
1767 if (!next.thep_promote_above_task) {
1768 iopol = MAX(iopol, task_effective.tep_io_tier);
1769 }
1770
1771 /* Look up the associated IO tier value for the QoS class */
1772 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1773
1774 iopol = MAX(iopol, requested.thrp_int_iotier);
1775 iopol = MAX(iopol, requested.thrp_ext_iotier);
1776
1777 /* Apply the kevent iotier override */
1778 iopol = MIN(iopol, requested.thrp_iotier_kevent_override);
1779
1780 next.thep_io_tier = iopol;
1781
1782 /*
1783 * If a QoS override is causing IO to go into a lower tier, we also set
1784 * the passive bit so that a thread doesn't end up stuck in its own throttle
1785 * window when the override goes away.
1786 */
1787
1788 int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1789 int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1790 bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1791
1792 /* Calculate Passive IO policy */
1793 if (requested.thrp_ext_iopassive ||
1794 requested.thrp_int_iopassive ||
1795 qos_io_override_active ||
1796 task_effective.tep_io_passive) {
1797 next.thep_io_passive = 1;
1798 }
1799
1800 /* Calculate timer QOS */
1801 uint32_t latency_qos = requested.thrp_latency_qos;
1802
1803 if (!next.thep_promote_above_task) {
1804 latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1805 }
1806
1807 latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1808
1809 next.thep_latency_qos = latency_qos;
1810
1811 /* Calculate throughput QOS */
1812 uint32_t through_qos = requested.thrp_through_qos;
1813
1814 if (!next.thep_promote_above_task) {
1815 through_qos = MAX(through_qos, task_effective.tep_through_qos);
1816 }
1817
1818 through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1819
1820 next.thep_through_qos = through_qos;
1821
1822 if (task_effective.tep_terminated || requested.thrp_terminated) {
1823 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1824 next.thep_terminated = 1;
1825 next.thep_darwinbg = 0;
1826 next.thep_io_tier = THROTTLE_LEVEL_TIER0;
1827 next.thep_qos = THREAD_QOS_UNSPECIFIED;
1828 next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED;
1829 next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1830 next.thep_wi_driven = 0;
1831 }
1832
1833 /*
1834 * Step 3:
1835 * Swap out old policy for new policy
1836 */
1837
1838 struct thread_effective_policy prev = thread->effective_policy;
1839
1840 thread_update_qos_cpu_time_locked(thread);
1841
1842 /* This is the point where the new values become visible to other threads */
1843 thread->effective_policy = next;
1844
1845 /*
1846 * Step 4:
1847 * Pend updates that can't be done while holding the thread lock
1848 */
1849
1850 if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1851 pend_token->tpt_update_sockets = 1;
1852 }
1853
1854 /* TODO: Doesn't this only need to be done if the throttle went up? */
1855 if (prev.thep_io_tier != next.thep_io_tier) {
1856 pend_token->tpt_update_throttle = 1;
1857 }
1858
1859 /*
1860 * Check for the attributes that sfi_thread_classify() consults,
1861 * and trigger SFI re-evaluation.
1862 */
1863 if (prev.thep_qos != next.thep_qos ||
1864 prev.thep_darwinbg != next.thep_darwinbg) {
1865 pend_token->tpt_update_thread_sfi = 1;
1866 }
1867
1868 integer_t old_base_pri = thread->base_pri;
1869
1870 /*
1871 * Step 5:
1872 * Update other subsystems as necessary if something has changed
1873 */
1874
1875 /* Check for the attributes that thread_recompute_priority() consults */
1876 if (prev.thep_qos != next.thep_qos ||
1877 prev.thep_qos_relprio != next.thep_qos_relprio ||
1878 prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1879 prev.thep_promote_above_task != next.thep_promote_above_task ||
1880 prev.thep_terminated != next.thep_terminated ||
1881 prev.thep_wi_driven != next.thep_wi_driven ||
1882 pend_token->tpt_force_recompute_pri == 1 ||
1883 recompute_priority) {
1884 thread_recompute_priority(thread);
1885 }
1886
1887 /*
1888 * Check if the thread is waiting on a turnstile and needs priority propagation.
1889 */
1890 if (pend_token->tpt_update_turnstile &&
1891 ((old_base_pri == thread->base_pri) ||
1892 !thread_get_waiting_turnstile(thread))) {
1893 /*
1894 * Reset update turnstile pend token since either
1895 * the thread priority did not change or thread is
1896 * not blocked on a turnstile.
1897 */
1898 pend_token->tpt_update_turnstile = 0;
1899 }
1900}
1901
1902
1903/*
1904 * Initiate a thread policy state transition on a thread with its TID
1905 * Useful if you cannot guarantee the thread won't get terminated
1906 * Precondition: No locks are held
1907 * Will take task lock - using the non-tid variant is faster
1908 * if you already have a thread ref.
1909 */
1910void
1911proc_set_thread_policy_with_tid(task_t task,
1912 uint64_t tid,
1913 int category,
1914 int flavor,
1915 int value)
1916{
1917 /* takes task lock, returns ref'ed thread or NULL */
1918 thread_t thread = task_findtid(task, tid);
1919
1920 if (thread == THREAD_NULL) {
1921 return;
1922 }
1923
1924 proc_set_thread_policy(thread, category, flavor, value);
1925
1926 thread_deallocate(thread);
1927}
1928
1929/*
1930 * Initiate a thread policy transition on a thread
1931 * This path supports networking transitions (i.e. darwinbg transitions)
1932 * Precondition: No locks are held
1933 */
1934void
1935proc_set_thread_policy(thread_t thread,
1936 int category,
1937 int flavor,
1938 int value)
1939{
1940 proc_set_thread_policy_ext(thread, category, flavor, value, value2: 0);
1941}
1942
1943void
1944proc_set_thread_policy_ext(thread_t thread,
1945 int category,
1946 int flavor,
1947 int value,
1948 int value2)
1949{
1950 struct task_pend_token pend_token = {};
1951
1952 thread_mtx_lock(thread);
1953
1954 proc_set_thread_policy_locked(thread, category, flavor, value, value2, pend_token: &pend_token);
1955
1956 thread_mtx_unlock(thread);
1957
1958 thread_policy_update_complete_unlocked(task: thread, pend_token: &pend_token);
1959}
1960
1961/*
1962 * Do the things that can't be done while holding a thread mutex.
1963 * These are set up to call back into thread policy to get the latest value,
1964 * so they don't have to be synchronized with the update.
1965 * The only required semantic is 'call this sometime after updating effective policy'
1966 *
1967 * Precondition: Thread mutex is not held
1968 *
1969 * This may be called with the task lock held, but in that case it won't be
1970 * called with tpt_update_sockets set.
1971 */
1972void
1973thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1974{
1975#ifdef MACH_BSD
1976 if (pend_token->tpt_update_sockets) {
1977 proc_apply_task_networkbg(pid: task_pid(task: get_threadtask(thread)), thread);
1978 }
1979#endif /* MACH_BSD */
1980
1981 if (pend_token->tpt_update_throttle) {
1982 rethrottle_thread(uthread: get_bsdthread_info(thread));
1983 }
1984
1985 if (pend_token->tpt_update_thread_sfi) {
1986 sfi_reevaluate(thread);
1987 }
1988
1989 if (pend_token->tpt_update_turnstile) {
1990 turnstile_update_thread_priority_chain(thread);
1991 }
1992}
1993
1994/*
1995 * Set and update thread policy
1996 * Thread mutex might be held
1997 */
1998static void
1999proc_set_thread_policy_locked(thread_t thread,
2000 int category,
2001 int flavor,
2002 int value,
2003 int value2,
2004 task_pend_token_t pend_token)
2005{
2006 spl_t s = splsched();
2007 thread_lock(thread);
2008
2009 proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2010
2011 thread_unlock(thread);
2012 splx(s);
2013}
2014
2015/*
2016 * Set and update thread policy
2017 * Thread spinlock is held
2018 */
2019static void
2020proc_set_thread_policy_spinlocked(thread_t thread,
2021 int category,
2022 int flavor,
2023 int value,
2024 int value2,
2025 task_pend_token_t pend_token)
2026{
2027 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2028 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
2029 thread_tid(thread), threquested_0(thread),
2030 threquested_1(thread), value, 0);
2031
2032 thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
2033
2034 thread_policy_update_spinlocked(thread, false, pend_token);
2035
2036 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2037 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
2038 thread_tid(thread), threquested_0(thread),
2039 threquested_1(thread), tpending(pend_token), 0);
2040}
2041
2042/*
2043 * Set the requested state for a specific flavor to a specific value.
2044 */
2045static void
2046thread_set_requested_policy_spinlocked(thread_t thread,
2047 int category,
2048 int flavor,
2049 int value,
2050 int value2,
2051 task_pend_token_t pend_token)
2052{
2053 int tier, passive;
2054
2055 struct thread_requested_policy requested = thread->requested_policy;
2056
2057 switch (flavor) {
2058 /* Category: EXTERNAL and INTERNAL, thread and task */
2059
2060 case TASK_POLICY_DARWIN_BG:
2061 if (category == TASK_POLICY_EXTERNAL) {
2062 requested.thrp_ext_darwinbg = value;
2063 } else {
2064 requested.thrp_int_darwinbg = value;
2065 }
2066 pend_token->tpt_update_turnstile = 1;
2067 break;
2068
2069 case TASK_POLICY_IOPOL:
2070 proc_iopol_to_tier(iopolicy: value, tier: &tier, passive: &passive);
2071 if (category == TASK_POLICY_EXTERNAL) {
2072 requested.thrp_ext_iotier = tier;
2073 requested.thrp_ext_iopassive = passive;
2074 } else {
2075 requested.thrp_int_iotier = tier;
2076 requested.thrp_int_iopassive = passive;
2077 }
2078 break;
2079
2080 case TASK_POLICY_IO:
2081 if (category == TASK_POLICY_EXTERNAL) {
2082 requested.thrp_ext_iotier = value;
2083 } else {
2084 requested.thrp_int_iotier = value;
2085 }
2086 break;
2087
2088 case TASK_POLICY_PASSIVE_IO:
2089 if (category == TASK_POLICY_EXTERNAL) {
2090 requested.thrp_ext_iopassive = value;
2091 } else {
2092 requested.thrp_int_iopassive = value;
2093 }
2094 break;
2095
2096 /* Category: ATTRIBUTE, thread only */
2097
2098 case TASK_POLICY_PIDBIND_BG:
2099 assert(category == TASK_POLICY_ATTRIBUTE);
2100 requested.thrp_pidbind_bg = value;
2101 pend_token->tpt_update_turnstile = 1;
2102 break;
2103
2104 case TASK_POLICY_LATENCY_QOS:
2105 assert(category == TASK_POLICY_ATTRIBUTE);
2106 requested.thrp_latency_qos = value;
2107 break;
2108
2109 case TASK_POLICY_THROUGH_QOS:
2110 assert(category == TASK_POLICY_ATTRIBUTE);
2111 requested.thrp_through_qos = value;
2112 break;
2113
2114 case TASK_POLICY_QOS_OVERRIDE:
2115 assert(category == TASK_POLICY_ATTRIBUTE);
2116 requested.thrp_qos_override = value;
2117 pend_token->tpt_update_turnstile = 1;
2118 break;
2119
2120 case TASK_POLICY_QOS_AND_RELPRIO:
2121 assert(category == TASK_POLICY_ATTRIBUTE);
2122 requested.thrp_qos = value;
2123 requested.thrp_qos_relprio = value2;
2124 pend_token->tpt_update_turnstile = 1;
2125 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
2126 break;
2127
2128 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2129 assert(category == TASK_POLICY_ATTRIBUTE);
2130 requested.thrp_qos_workq_override = value;
2131 pend_token->tpt_update_turnstile = 1;
2132 break;
2133
2134 case TASK_POLICY_QOS_PROMOTE:
2135 assert(category == TASK_POLICY_ATTRIBUTE);
2136 requested.thrp_qos_promote = value;
2137 break;
2138
2139 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2140 assert(category == TASK_POLICY_ATTRIBUTE);
2141 requested.thrp_qos_kevent_override = value;
2142 pend_token->tpt_update_turnstile = 1;
2143 break;
2144
2145 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2146 assert(category == TASK_POLICY_ATTRIBUTE);
2147 requested.thrp_qos_wlsvc_override = value;
2148 pend_token->tpt_update_turnstile = 1;
2149 break;
2150
2151 case TASK_POLICY_TERMINATED:
2152 assert(category == TASK_POLICY_ATTRIBUTE);
2153 requested.thrp_terminated = value;
2154 break;
2155
2156 case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2157 assert(category == TASK_POLICY_ATTRIBUTE);
2158 requested.thrp_iotier_kevent_override = value;
2159 break;
2160
2161 case TASK_POLICY_WI_DRIVEN:
2162 assert(category == TASK_POLICY_ATTRIBUTE);
2163 assert(thread == current_thread());
2164
2165 const bool set_policy = value;
2166 const sched_mode_t mode = value2;
2167
2168 requested.thrp_wi_driven = set_policy ? 1 : 0;
2169
2170 /*
2171 * No sched mode change for REALTIME (threads must explicitly
2172 * opt-in), however the priority_offset needs to be updated.
2173 */
2174 if (mode == TH_MODE_REALTIME) {
2175 const int pri = work_interval_get_priority(thread);
2176 assert3u(pri, >=, BASEPRI_RTQUEUES);
2177 thread->realtime.priority_offset = set_policy ?
2178 (uint8_t)(pri - BASEPRI_RTQUEUES) : 0;
2179 } else {
2180 sched_set_thread_mode_user(thread, mode);
2181 if (set_policy) {
2182 thread->static_param = true;
2183 }
2184 }
2185 break;
2186
2187 default:
2188 panic("unknown task policy: %d %d %d", category, flavor, value);
2189 break;
2190 }
2191
2192 thread->requested_policy = requested;
2193}
2194
2195/*
2196 * Gets what you set. Effective values may be different.
2197 * Precondition: No locks are held
2198 */
2199int
2200proc_get_thread_policy(thread_t thread,
2201 int category,
2202 int flavor)
2203{
2204 int value = 0;
2205 thread_mtx_lock(thread);
2206 value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2207 thread_mtx_unlock(thread);
2208 return value;
2209}
2210
2211static int
2212proc_get_thread_policy_locked(thread_t thread,
2213 int category,
2214 int flavor,
2215 int* value2)
2216{
2217 int value = 0;
2218
2219 spl_t s = splsched();
2220 thread_lock(thread);
2221
2222 value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2223
2224 thread_unlock(thread);
2225 splx(s);
2226
2227 return value;
2228}
2229
2230/*
2231 * Gets what you set. Effective values may be different.
2232 */
2233static int
2234thread_get_requested_policy_spinlocked(thread_t thread,
2235 int category,
2236 int flavor,
2237 int* value2)
2238{
2239 int value = 0;
2240
2241 struct thread_requested_policy requested = thread->requested_policy;
2242
2243 switch (flavor) {
2244 case TASK_POLICY_DARWIN_BG:
2245 if (category == TASK_POLICY_EXTERNAL) {
2246 value = requested.thrp_ext_darwinbg;
2247 } else {
2248 value = requested.thrp_int_darwinbg;
2249 }
2250 break;
2251 case TASK_POLICY_IOPOL:
2252 if (category == TASK_POLICY_EXTERNAL) {
2253 value = proc_tier_to_iopol(tier: requested.thrp_ext_iotier,
2254 passive: requested.thrp_ext_iopassive);
2255 } else {
2256 value = proc_tier_to_iopol(tier: requested.thrp_int_iotier,
2257 passive: requested.thrp_int_iopassive);
2258 }
2259 break;
2260 case TASK_POLICY_IO:
2261 if (category == TASK_POLICY_EXTERNAL) {
2262 value = requested.thrp_ext_iotier;
2263 } else {
2264 value = requested.thrp_int_iotier;
2265 }
2266 break;
2267 case TASK_POLICY_PASSIVE_IO:
2268 if (category == TASK_POLICY_EXTERNAL) {
2269 value = requested.thrp_ext_iopassive;
2270 } else {
2271 value = requested.thrp_int_iopassive;
2272 }
2273 break;
2274 case TASK_POLICY_QOS:
2275 assert(category == TASK_POLICY_ATTRIBUTE);
2276 value = requested.thrp_qos;
2277 break;
2278 case TASK_POLICY_QOS_OVERRIDE:
2279 assert(category == TASK_POLICY_ATTRIBUTE);
2280 value = requested.thrp_qos_override;
2281 break;
2282 case TASK_POLICY_LATENCY_QOS:
2283 assert(category == TASK_POLICY_ATTRIBUTE);
2284 value = requested.thrp_latency_qos;
2285 break;
2286 case TASK_POLICY_THROUGH_QOS:
2287 assert(category == TASK_POLICY_ATTRIBUTE);
2288 value = requested.thrp_through_qos;
2289 break;
2290 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2291 assert(category == TASK_POLICY_ATTRIBUTE);
2292 value = requested.thrp_qos_workq_override;
2293 break;
2294 case TASK_POLICY_QOS_AND_RELPRIO:
2295 assert(category == TASK_POLICY_ATTRIBUTE);
2296 assert(value2 != NULL);
2297 value = requested.thrp_qos;
2298 *value2 = requested.thrp_qos_relprio;
2299 break;
2300 case TASK_POLICY_QOS_PROMOTE:
2301 assert(category == TASK_POLICY_ATTRIBUTE);
2302 value = requested.thrp_qos_promote;
2303 break;
2304 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2305 assert(category == TASK_POLICY_ATTRIBUTE);
2306 value = requested.thrp_qos_kevent_override;
2307 break;
2308 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2309 assert(category == TASK_POLICY_ATTRIBUTE);
2310 value = requested.thrp_qos_wlsvc_override;
2311 break;
2312 case TASK_POLICY_TERMINATED:
2313 assert(category == TASK_POLICY_ATTRIBUTE);
2314 value = requested.thrp_terminated;
2315 break;
2316 case TASK_POLICY_IOTIER_KEVENT_OVERRIDE:
2317 assert(category == TASK_POLICY_ATTRIBUTE);
2318 value = requested.thrp_iotier_kevent_override;
2319 break;
2320
2321 case TASK_POLICY_WI_DRIVEN:
2322 assert(category == TASK_POLICY_ATTRIBUTE);
2323 value = requested.thrp_wi_driven;
2324 break;
2325
2326 default:
2327 panic("unknown policy_flavor %d", flavor);
2328 break;
2329 }
2330
2331 return value;
2332}
2333
2334/*
2335 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2336 *
2337 * NOTE: This accessor does not take the task or thread lock.
2338 * Notifications of state updates need to be externally synchronized with state queries.
2339 * This routine *MUST* remain interrupt safe, as it is potentially invoked
2340 * within the context of a timer interrupt.
2341 *
2342 * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2343 * Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2344 * I don't think that cost is worth not having the right answer.
2345 */
2346int
2347proc_get_effective_thread_policy(thread_t thread,
2348 int flavor)
2349{
2350 int value = 0;
2351
2352 switch (flavor) {
2353 case TASK_POLICY_DARWIN_BG:
2354 /*
2355 * This call is used within the timer layer, as well as
2356 * prioritizing requests to the graphics system.
2357 * It also informs SFI and originator-bg-state.
2358 * Returns 1 for background mode, 0 for normal mode
2359 */
2360
2361 value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2362 break;
2363 case TASK_POLICY_IO:
2364 /*
2365 * The I/O system calls here to find out what throttling tier to apply to an operation.
2366 * Returns THROTTLE_LEVEL_* values
2367 */
2368 value = thread->effective_policy.thep_io_tier;
2369 if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2370 value = MIN(value, thread->iotier_override);
2371 }
2372 break;
2373 case TASK_POLICY_PASSIVE_IO:
2374 /*
2375 * The I/O system calls here to find out whether an operation should be passive.
2376 * (i.e. not cause operations with lower throttle tiers to be throttled)
2377 * Returns 1 for passive mode, 0 for normal mode
2378 *
2379 * If an override is causing IO to go into a lower tier, we also set
2380 * the passive bit so that a thread doesn't end up stuck in its own throttle
2381 * window when the override goes away.
2382 */
2383 value = thread->effective_policy.thep_io_passive ? 1 : 0;
2384 if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2385 thread->iotier_override < thread->effective_policy.thep_io_tier) {
2386 value = 1;
2387 }
2388 break;
2389 case TASK_POLICY_ALL_SOCKETS_BG:
2390 /*
2391 * do_background_socket() calls this to determine whether
2392 * it should change the thread's sockets
2393 * Returns 1 for background mode, 0 for normal mode
2394 * This consults both thread and task so un-DBGing a thread while the task is BG
2395 * doesn't get you out of the network throttle.
2396 */
2397 value = (thread->effective_policy.thep_all_sockets_bg ||
2398 get_threadtask(thread)->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2399 break;
2400 case TASK_POLICY_NEW_SOCKETS_BG:
2401 /*
2402 * socreate() calls this to determine if it should mark a new socket as background
2403 * Returns 1 for background mode, 0 for normal mode
2404 */
2405 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2406 break;
2407 case TASK_POLICY_LATENCY_QOS:
2408 /*
2409 * timer arming calls into here to find out the timer coalescing level
2410 * Returns a latency QoS tier (0-6)
2411 */
2412 value = thread->effective_policy.thep_latency_qos;
2413 break;
2414 case TASK_POLICY_THROUGH_QOS:
2415 /*
2416 * This value is passed into the urgency callout from the scheduler
2417 * to the performance management subsystem.
2418 *
2419 * Returns a throughput QoS tier (0-6)
2420 */
2421 value = thread->effective_policy.thep_through_qos;
2422 break;
2423 case TASK_POLICY_QOS:
2424 /*
2425 * This is communicated to the performance management layer and SFI.
2426 *
2427 * Returns a QoS policy tier
2428 */
2429 value = thread->effective_policy.thep_qos;
2430 break;
2431 default:
2432 panic("unknown thread policy flavor %d", flavor);
2433 break;
2434 }
2435
2436 return value;
2437}
2438
2439
2440/*
2441 * (integer_t) casts limit the number of bits we can fit here
2442 * this interface is deprecated and replaced by the _EXT struct ?
2443 */
2444static void
2445proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2446{
2447 uint64_t bits = 0;
2448 struct thread_requested_policy requested = thread->requested_policy;
2449
2450 bits |= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
2451 bits |= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
2452 bits |= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2453 bits |= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2454 bits |= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
2455 bits |= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2456
2457 bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2458 bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0);
2459
2460 bits |= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0);
2461
2462 bits |= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2463 bits |= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2464
2465 info->requested = (integer_t) bits;
2466 bits = 0;
2467
2468 struct thread_effective_policy effective = thread->effective_policy;
2469
2470 bits |= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : 0);
2471
2472 bits |= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2473 bits |= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : 0);
2474 bits |= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2475 bits |= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2476
2477 bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2478
2479 bits |= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2480 bits |= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2481
2482 info->effective = (integer_t)bits;
2483 bits = 0;
2484
2485 info->pending = 0;
2486}
2487
2488/*
2489 * Sneakily trace either the task and thread requested
2490 * or just the thread requested, depending on if we have enough room.
2491 * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2492 *
2493 * LP32 LP64
2494 * threquested_0(thread) thread[0] task[0]
2495 * threquested_1(thread) thread[1] thread[0]
2496 *
2497 */
2498
2499uintptr_t
2500threquested_0(thread_t thread)
2501{
2502 static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2503
2504 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2505
2506 return raw[0];
2507}
2508
2509uintptr_t
2510threquested_1(thread_t thread)
2511{
2512#if defined __LP64__
2513 return *(uintptr_t*)&get_threadtask(thread)->requested_policy;
2514#else
2515 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2516 return raw[1];
2517#endif
2518}
2519
2520uintptr_t
2521theffective_0(thread_t thread)
2522{
2523 static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2524
2525 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2526 return raw[0];
2527}
2528
2529uintptr_t
2530theffective_1(thread_t thread)
2531{
2532#if defined __LP64__
2533 return *(uintptr_t*)&get_threadtask(thread)->effective_policy;
2534#else
2535 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2536 return raw[1];
2537#endif
2538}
2539
2540
2541/*
2542 * Set an override on the thread which is consulted with a
2543 * higher priority than the task/thread policy. This should
2544 * only be set for temporary grants until the thread
2545 * returns to the userspace boundary
2546 *
2547 * We use atomic operations to swap in the override, with
2548 * the assumption that the thread itself can
2549 * read the override and clear it on return to userspace.
2550 *
2551 * No locking is performed, since it is acceptable to see
2552 * a stale override for one loop through throttle_lowpri_io().
2553 * However a thread reference must be held on the thread.
2554 */
2555
2556void
2557set_thread_iotier_override(thread_t thread, int policy)
2558{
2559 int current_override;
2560
2561 /* Let most aggressive I/O policy win until user boundary */
2562 do {
2563 current_override = thread->iotier_override;
2564
2565 if (current_override != THROTTLE_LEVEL_NONE) {
2566 policy = MIN(current_override, policy);
2567 }
2568
2569 if (current_override == policy) {
2570 /* no effective change */
2571 return;
2572 }
2573 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2574
2575 /*
2576 * Since the thread may be currently throttled,
2577 * re-evaluate tiers and potentially break out
2578 * of an msleep
2579 */
2580 rethrottle_thread(uthread: get_bsdthread_info(thread));
2581}
2582
2583/*
2584 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2585 * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2586 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2587 * priority thread. In these cases, we attempt to propagate the priority token, as long
2588 * as the subsystem informs us of the relationships between the threads. The userspace
2589 * synchronization subsystem should maintain the information of owner->resource and
2590 * resource->waiters itself.
2591 */
2592
2593/*
2594 * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2595 * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2596 * to be handled specially in the future, but for now it's fine to slam
2597 * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2598 */
2599static void
2600canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2601{
2602 if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2603 /* Map all input resource/type to a single one */
2604 *resource = USER_ADDR_NULL;
2605 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2606 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2607 /* no transform */
2608 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2609 /* Map all mutex overrides to a single one, to avoid memory overhead */
2610 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2611 *resource = USER_ADDR_NULL;
2612 }
2613 }
2614}
2615
2616/* This helper routine finds an existing override if known. Locking should be done by caller */
2617static struct thread_qos_override *
2618find_qos_override(thread_t thread,
2619 user_addr_t resource,
2620 int resource_type)
2621{
2622 struct thread_qos_override *override;
2623
2624 override = thread->overrides;
2625 while (override) {
2626 if (override->override_resource == resource &&
2627 override->override_resource_type == resource_type) {
2628 return override;
2629 }
2630
2631 override = override->override_next;
2632 }
2633
2634 return NULL;
2635}
2636
2637static void
2638find_and_decrement_qos_override(thread_t thread,
2639 user_addr_t resource,
2640 int resource_type,
2641 boolean_t reset,
2642 struct thread_qos_override **free_override_list)
2643{
2644 struct thread_qos_override *override, *override_prev;
2645
2646 override_prev = NULL;
2647 override = thread->overrides;
2648 while (override) {
2649 struct thread_qos_override *override_next = override->override_next;
2650
2651 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2652 (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2653 if (reset) {
2654 override->override_contended_resource_count = 0;
2655 } else {
2656 override->override_contended_resource_count--;
2657 }
2658
2659 if (override->override_contended_resource_count == 0) {
2660 if (override_prev == NULL) {
2661 thread->overrides = override_next;
2662 } else {
2663 override_prev->override_next = override_next;
2664 }
2665
2666 /* Add to out-param for later zfree */
2667 override->override_next = *free_override_list;
2668 *free_override_list = override;
2669 } else {
2670 override_prev = override;
2671 }
2672
2673 if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2674 return;
2675 }
2676 } else {
2677 override_prev = override;
2678 }
2679
2680 override = override_next;
2681 }
2682}
2683
2684/* This helper recalculates the current requested override using the policy selected at boot */
2685static int
2686calculate_requested_qos_override(thread_t thread)
2687{
2688 if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2689 return THREAD_QOS_UNSPECIFIED;
2690 }
2691
2692 /* iterate over all overrides and calculate MAX */
2693 struct thread_qos_override *override;
2694 int qos_override = THREAD_QOS_UNSPECIFIED;
2695
2696 override = thread->overrides;
2697 while (override) {
2698 qos_override = MAX(qos_override, override->override_qos);
2699 override = override->override_next;
2700 }
2701
2702 return qos_override;
2703}
2704
2705/*
2706 * Returns:
2707 * - 0 on success
2708 * - EINVAL if some invalid input was passed
2709 */
2710static int
2711proc_thread_qos_add_override_internal(thread_t thread,
2712 int override_qos,
2713 boolean_t first_override_for_resource,
2714 user_addr_t resource,
2715 int resource_type)
2716{
2717 struct task_pend_token pend_token = {};
2718 int rc = 0;
2719
2720 thread_mtx_lock(thread);
2721
2722 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2723 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2724
2725 DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2726 uint64_t, thread->requested_policy.thrp_qos,
2727 uint64_t, thread->effective_policy.thep_qos,
2728 int, override_qos, boolean_t, first_override_for_resource);
2729
2730 struct thread_qos_override *override;
2731 struct thread_qos_override *override_new = NULL;
2732 int new_qos_override, prev_qos_override;
2733 int new_effective_qos;
2734
2735 canonicalize_resource_and_type(resource: &resource, resource_type: &resource_type);
2736
2737 override = find_qos_override(thread, resource, resource_type);
2738 if (first_override_for_resource && !override) {
2739 /* We need to allocate a new object. Drop the thread lock and
2740 * recheck afterwards in case someone else added the override
2741 */
2742 thread_mtx_unlock(thread);
2743 override_new = zalloc(kt_view: thread_qos_override_zone);
2744 thread_mtx_lock(thread);
2745 override = find_qos_override(thread, resource, resource_type);
2746 }
2747 if (first_override_for_resource && override) {
2748 /* Someone else already allocated while the thread lock was dropped */
2749 override->override_contended_resource_count++;
2750 } else if (!override && override_new) {
2751 override = override_new;
2752 override_new = NULL;
2753 override->override_next = thread->overrides;
2754 /* since first_override_for_resource was TRUE */
2755 override->override_contended_resource_count = 1;
2756 override->override_resource = resource;
2757 override->override_resource_type = (int16_t)resource_type;
2758 override->override_qos = THREAD_QOS_UNSPECIFIED;
2759 thread->overrides = override;
2760 }
2761
2762 if (override) {
2763 if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2764 override->override_qos = (int16_t)override_qos;
2765 } else {
2766 override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2767 }
2768 }
2769
2770 /* Determine how to combine the various overrides into a single current
2771 * requested override
2772 */
2773 new_qos_override = calculate_requested_qos_override(thread);
2774
2775 prev_qos_override = proc_get_thread_policy_locked(thread,
2776 TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2777
2778 if (new_qos_override != prev_qos_override) {
2779 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2780 TASK_POLICY_QOS_OVERRIDE,
2781 value: new_qos_override, value2: 0, pend_token: &pend_token);
2782 }
2783
2784 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2785
2786 thread_mtx_unlock(thread);
2787
2788 thread_policy_update_complete_unlocked(thread, pend_token: &pend_token);
2789
2790 if (override_new) {
2791 zfree(thread_qos_override_zone, override_new);
2792 }
2793
2794 DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2795 int, new_qos_override, int, new_effective_qos, int, rc);
2796
2797 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2798 new_qos_override, resource, resource_type, 0, 0);
2799
2800 return rc;
2801}
2802
2803int
2804proc_thread_qos_add_override(task_t task,
2805 thread_t thread,
2806 uint64_t tid,
2807 int override_qos,
2808 boolean_t first_override_for_resource,
2809 user_addr_t resource,
2810 int resource_type)
2811{
2812 boolean_t has_thread_reference = FALSE;
2813 int rc = 0;
2814
2815 if (thread == THREAD_NULL) {
2816 thread = task_findtid(task, tid);
2817 /* returns referenced thread */
2818
2819 if (thread == THREAD_NULL) {
2820 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2821 tid, 0, 0xdead, 0, 0);
2822 return ESRCH;
2823 }
2824 has_thread_reference = TRUE;
2825 } else {
2826 assert(get_threadtask(thread) == task);
2827 }
2828 rc = proc_thread_qos_add_override_internal(thread, override_qos,
2829 first_override_for_resource, resource, resource_type);
2830 if (has_thread_reference) {
2831 thread_deallocate(thread);
2832 }
2833
2834 return rc;
2835}
2836
2837static void
2838proc_thread_qos_remove_override_internal(thread_t thread,
2839 user_addr_t resource,
2840 int resource_type,
2841 boolean_t reset)
2842{
2843 struct task_pend_token pend_token = {};
2844
2845 struct thread_qos_override *deferred_free_override_list = NULL;
2846 int new_qos_override, prev_qos_override, new_effective_qos;
2847
2848 thread_mtx_lock(thread);
2849
2850 canonicalize_resource_and_type(resource: &resource, resource_type: &resource_type);
2851
2852 find_and_decrement_qos_override(thread, resource, resource_type, reset, free_override_list: &deferred_free_override_list);
2853
2854 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2855 thread_tid(thread), resource, reset, 0, 0);
2856
2857 DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2858 uint64_t, thread->requested_policy.thrp_qos,
2859 uint64_t, thread->effective_policy.thep_qos);
2860
2861 /* Determine how to combine the various overrides into a single current requested override */
2862 new_qos_override = calculate_requested_qos_override(thread);
2863
2864 spl_t s = splsched();
2865 thread_lock(thread);
2866
2867 /*
2868 * The override chain and therefore the value of the current override is locked with thread mutex,
2869 * so we can do a get/set without races. However, the rest of thread policy is locked under the spinlock.
2870 * This means you can't change the current override from a spinlock-only setter.
2871 */
2872 prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2873
2874 if (new_qos_override != prev_qos_override) {
2875 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, value: new_qos_override, value2: 0, pend_token: &pend_token);
2876 }
2877
2878 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2879
2880 thread_unlock(thread);
2881 splx(s);
2882
2883 thread_mtx_unlock(thread);
2884
2885 thread_policy_update_complete_unlocked(thread, pend_token: &pend_token);
2886
2887 while (deferred_free_override_list) {
2888 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2889
2890 zfree(thread_qos_override_zone, deferred_free_override_list);
2891 deferred_free_override_list = override_next;
2892 }
2893
2894 DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2895 int, new_qos_override, int, new_effective_qos);
2896
2897 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2898 thread_tid(thread), 0, 0, 0, 0);
2899}
2900
2901int
2902proc_thread_qos_remove_override(task_t task,
2903 thread_t thread,
2904 uint64_t tid,
2905 user_addr_t resource,
2906 int resource_type)
2907{
2908 boolean_t has_thread_reference = FALSE;
2909
2910 if (thread == THREAD_NULL) {
2911 thread = task_findtid(task, tid);
2912 /* returns referenced thread */
2913
2914 if (thread == THREAD_NULL) {
2915 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2916 tid, 0, 0xdead, 0, 0);
2917 return ESRCH;
2918 }
2919 has_thread_reference = TRUE;
2920 } else {
2921 assert(task == get_threadtask(thread));
2922 }
2923
2924 proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2925
2926 if (has_thread_reference) {
2927 thread_deallocate(thread);
2928 }
2929
2930 return 0;
2931}
2932
2933/* Deallocate before thread termination */
2934void
2935proc_thread_qos_deallocate(thread_t thread)
2936{
2937 /* This thread must have no more IPC overrides. */
2938 assert(thread->kevent_overrides == 0);
2939 assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2940 assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2941
2942 /*
2943 * Clear out any lingering override objects.
2944 */
2945 struct thread_qos_override *override;
2946
2947 thread_mtx_lock(thread);
2948 override = thread->overrides;
2949 thread->overrides = NULL;
2950 thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2951 /* We don't need to re-evaluate thread policy here because the thread has already exited */
2952 thread_mtx_unlock(thread);
2953
2954 while (override) {
2955 struct thread_qos_override *override_next = override->override_next;
2956
2957 zfree(thread_qos_override_zone, override);
2958 override = override_next;
2959 }
2960}
2961
2962/*
2963 * Set up the primordial thread's QoS
2964 */
2965void
2966task_set_main_thread_qos(task_t task, thread_t thread)
2967{
2968 struct task_pend_token pend_token = {};
2969
2970 assert(get_threadtask(thread) == task);
2971
2972 thread_mtx_lock(thread);
2973
2974 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2975 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2976 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2977 thread->requested_policy.thrp_qos, 0);
2978
2979 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2980
2981 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2982 value: primordial_qos, value2: 0, pend_token: &pend_token);
2983
2984 thread_mtx_unlock(thread);
2985
2986 thread_policy_update_complete_unlocked(thread, pend_token: &pend_token);
2987
2988 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2989 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2990 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2991 primordial_qos, 0);
2992}
2993
2994/*
2995 * KPI for pthread kext
2996 *
2997 * Return a good guess at what the initial manager QoS will be
2998 * Dispatch can override this in userspace if it so chooses
2999 */
3000thread_qos_t
3001task_get_default_manager_qos(task_t task)
3002{
3003 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
3004
3005 if (primordial_qos == THREAD_QOS_LEGACY) {
3006 primordial_qos = THREAD_QOS_USER_INITIATED;
3007 }
3008
3009 return primordial_qos;
3010}
3011
3012/*
3013 * Check if the kernel promotion on thread has changed
3014 * and apply it.
3015 *
3016 * thread locked on entry and exit
3017 */
3018boolean_t
3019thread_recompute_kernel_promotion_locked(thread_t thread)
3020{
3021 boolean_t needs_update = FALSE;
3022 uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
3023
3024 /*
3025 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
3026 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
3027 * and propagates the priority through the chain with the same cap, because as of now it does
3028 * not differenciate on the kernel primitive.
3029 *
3030 * If this assumption will change with the adoption of a kernel primitive that does not
3031 * cap the when adding/propagating,
3032 * then here is the place to put the generic cap for all kernel primitives
3033 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
3034 */
3035 assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
3036
3037 if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
3038 KDBG(MACHDBG_CODE(
3039 DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
3040 thread_tid(thread),
3041 kern_promotion_schedpri,
3042 thread->kern_promotion_schedpri);
3043
3044 needs_update = TRUE;
3045 thread->kern_promotion_schedpri = kern_promotion_schedpri;
3046 thread_recompute_sched_pri(thread, options: SETPRI_DEFAULT);
3047 }
3048
3049 return needs_update;
3050}
3051
3052/*
3053 * Check if the user promotion on thread has changed
3054 * and apply it.
3055 *
3056 * thread locked on entry, might drop the thread lock
3057 * and reacquire it.
3058 */
3059boolean_t
3060thread_recompute_user_promotion_locked(thread_t thread)
3061{
3062 boolean_t needs_update = FALSE;
3063 struct task_pend_token pend_token = {};
3064 uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
3065 int old_base_pri = thread->base_pri;
3066 thread_qos_t qos_promotion;
3067
3068 /* Check if user promotion has changed */
3069 if (thread->user_promotion_basepri == user_promotion_basepri) {
3070 return needs_update;
3071 } else {
3072 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3073 (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
3074 thread_tid(thread),
3075 user_promotion_basepri,
3076 thread->user_promotion_basepri,
3077 0, 0);
3078 KDBG(MACHDBG_CODE(
3079 DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
3080 thread_tid(thread),
3081 user_promotion_basepri,
3082 thread->user_promotion_basepri);
3083 }
3084
3085 /* Update the user promotion base pri */
3086 thread->user_promotion_basepri = user_promotion_basepri;
3087 pend_token.tpt_force_recompute_pri = 1;
3088
3089 if (user_promotion_basepri <= MAXPRI_THROTTLE) {
3090 qos_promotion = THREAD_QOS_UNSPECIFIED;
3091 } else {
3092 qos_promotion = thread_user_promotion_qos_for_pri(priority: user_promotion_basepri);
3093 }
3094
3095 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3096 TASK_POLICY_QOS_PROMOTE, value: qos_promotion, value2: 0, pend_token: &pend_token);
3097
3098 if (thread_get_waiting_turnstile(thread) &&
3099 thread->base_pri != old_base_pri) {
3100 needs_update = TRUE;
3101 }
3102
3103 thread_unlock(thread);
3104
3105 thread_policy_update_complete_unlocked(thread, pend_token: &pend_token);
3106
3107 thread_lock(thread);
3108
3109 return needs_update;
3110}
3111
3112/*
3113 * Convert the thread user promotion base pri to qos for threads in qos world.
3114 * For priority above UI qos, the qos would be set to UI.
3115 */
3116thread_qos_t
3117thread_user_promotion_qos_for_pri(int priority)
3118{
3119 thread_qos_t qos;
3120 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
3121 if (thread_qos_policy_params.qos_pri[qos] <= priority) {
3122 return qos;
3123 }
3124 }
3125 return THREAD_QOS_MAINTENANCE;
3126}
3127
3128/*
3129 * Set the thread's QoS Kevent override
3130 * Owned by the Kevent subsystem
3131 *
3132 * May be called with spinlocks held, but not spinlocks
3133 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3134 *
3135 * One 'add' must be balanced by one 'drop'.
3136 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3137 * Before the thread is deallocated, there must be 0 remaining overrides.
3138 */
3139static void
3140thread_kevent_override(thread_t thread,
3141 uint32_t qos_override,
3142 boolean_t is_new_override)
3143{
3144 struct task_pend_token pend_token = {};
3145 boolean_t needs_update;
3146
3147 spl_t s = splsched();
3148 thread_lock(thread);
3149
3150 uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
3151
3152 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3153 assert(qos_override < THREAD_QOS_LAST);
3154
3155 if (is_new_override) {
3156 if (thread->kevent_overrides++ == 0) {
3157 /* This add is the first override for this thread */
3158 assert(old_override == THREAD_QOS_UNSPECIFIED);
3159 } else {
3160 /* There are already other overrides in effect for this thread */
3161 assert(old_override > THREAD_QOS_UNSPECIFIED);
3162 }
3163 } else {
3164 /* There must be at least one override (the previous add call) in effect */
3165 assert(thread->kevent_overrides > 0);
3166 assert(old_override > THREAD_QOS_UNSPECIFIED);
3167 }
3168
3169 /*
3170 * We can't allow lowering if there are several IPC overrides because
3171 * the caller can't possibly know the whole truth
3172 */
3173 if (thread->kevent_overrides == 1) {
3174 needs_update = qos_override != old_override;
3175 } else {
3176 needs_update = qos_override > old_override;
3177 }
3178
3179 if (needs_update) {
3180 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3181 TASK_POLICY_QOS_KEVENT_OVERRIDE,
3182 value: qos_override, value2: 0, pend_token: &pend_token);
3183 assert(pend_token.tpt_update_sockets == 0);
3184 }
3185
3186 thread_unlock(thread);
3187 splx(s);
3188
3189 thread_policy_update_complete_unlocked(thread, pend_token: &pend_token);
3190}
3191
3192void
3193thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3194{
3195 thread_kevent_override(thread, qos_override, TRUE);
3196}
3197
3198void
3199thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3200{
3201 thread_kevent_override(thread, qos_override, FALSE);
3202}
3203
3204void
3205thread_drop_kevent_override(thread_t thread)
3206{
3207 struct task_pend_token pend_token = {};
3208
3209 spl_t s = splsched();
3210 thread_lock(thread);
3211
3212 assert(thread->kevent_overrides > 0);
3213
3214 if (--thread->kevent_overrides == 0) {
3215 /*
3216 * There are no more overrides for this thread, so we should
3217 * clear out the saturated override value
3218 */
3219
3220 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3221 TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3222 value2: 0, pend_token: &pend_token);
3223 }
3224
3225 thread_unlock(thread);
3226 splx(s);
3227
3228 thread_policy_update_complete_unlocked(thread, pend_token: &pend_token);
3229}
3230
3231/*
3232 * Set the thread's QoS Workloop Servicer override
3233 * Owned by the Kevent subsystem
3234 *
3235 * May be called with spinlocks held, but not spinlocks
3236 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3237 *
3238 * One 'add' must be balanced by one 'drop'.
3239 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3240 * Before the thread is deallocated, there must be 0 remaining overrides.
3241 */
3242static void
3243thread_servicer_override(thread_t thread,
3244 uint32_t qos_override,
3245 boolean_t is_new_override)
3246{
3247 struct task_pend_token pend_token = {};
3248
3249 spl_t s = splsched();
3250 thread_lock(thread);
3251
3252 if (is_new_override) {
3253 assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3254 } else {
3255 assert(thread->requested_policy.thrp_qos_wlsvc_override);
3256 }
3257
3258 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3259 TASK_POLICY_QOS_SERVICER_OVERRIDE,
3260 value: qos_override, value2: 0, pend_token: &pend_token);
3261
3262 thread_unlock(thread);
3263 splx(s);
3264
3265 assert(pend_token.tpt_update_sockets == 0);
3266 thread_policy_update_complete_unlocked(thread, pend_token: &pend_token);
3267}
3268
3269void
3270thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3271{
3272 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3273 assert(qos_override < THREAD_QOS_LAST);
3274
3275 thread_servicer_override(thread, qos_override, TRUE);
3276}
3277
3278void
3279thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3280{
3281 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3282 assert(qos_override < THREAD_QOS_LAST);
3283
3284 thread_servicer_override(thread, qos_override, FALSE);
3285}
3286
3287void
3288thread_drop_servicer_override(thread_t thread)
3289{
3290 thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3291}
3292
3293void
3294thread_update_servicer_iotier_override(thread_t thread, uint8_t iotier_override)
3295{
3296 struct task_pend_token pend_token = {};
3297 uint8_t current_iotier;
3298
3299 /* Check if the update is needed */
3300 current_iotier = (uint8_t)thread_get_requested_policy_spinlocked(thread,
3301 TASK_POLICY_ATTRIBUTE, TASK_POLICY_IOTIER_KEVENT_OVERRIDE, NULL);
3302
3303 if (iotier_override == current_iotier) {
3304 return;
3305 }
3306
3307 spl_t s = splsched();
3308 thread_lock(thread);
3309
3310 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3311 TASK_POLICY_IOTIER_KEVENT_OVERRIDE,
3312 value: iotier_override, value2: 0, pend_token: &pend_token);
3313
3314 thread_unlock(thread);
3315 splx(s);
3316
3317 assert(pend_token.tpt_update_sockets == 0);
3318 thread_policy_update_complete_unlocked(thread, pend_token: &pend_token);
3319}
3320
3321/* Get current requested qos / relpri, may be called from spinlock context */
3322thread_qos_t
3323thread_get_requested_qos(thread_t thread, int *relpri)
3324{
3325 int relprio_value = 0;
3326 thread_qos_t qos;
3327
3328 qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3329 TASK_POLICY_QOS_AND_RELPRIO, value2: &relprio_value);
3330 if (relpri) {
3331 *relpri = -relprio_value;
3332 }
3333 return qos;
3334}
3335
3336/*
3337 * This function will promote the thread priority
3338 * since exec could block other threads calling
3339 * proc_find on the proc. This boost must be removed
3340 * via call to thread_clear_exec_promotion.
3341 *
3342 * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3343 */
3344void
3345thread_set_exec_promotion(thread_t thread)
3346{
3347 spl_t s = splsched();
3348 thread_lock(thread);
3349
3350 sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, trace_obj: 0);
3351
3352 thread_unlock(thread);
3353 splx(s);
3354}
3355
3356/*
3357 * This function will clear the exec thread
3358 * promotion set on the thread by thread_set_exec_promotion.
3359 */
3360void
3361thread_clear_exec_promotion(thread_t thread)
3362{
3363 spl_t s = splsched();
3364 thread_lock(thread);
3365
3366 sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, trace_obj: 0);
3367
3368 thread_unlock(thread);
3369 splx(s);
3370}
3371
3372#if CONFIG_SCHED_RT_ALLOW
3373
3374/*
3375 * flag set by -rt-allow-policy-enable boot-arg to restrict use of
3376 * THREAD_TIME_CONSTRAINT_POLICY and THREAD_TIME_CONSTRAINT_WITH_PRIORITY_POLICY
3377 * to threads that have joined a workinterval with WORK_INTERVAL_WORKLOAD_ID_RT_ALLOWED.
3378 */
3379static TUNABLE(
3380 bool,
3381 rt_allow_policy_enabled,
3382 "-rt-allow_policy-enable",
3383 false
3384 );
3385
3386/*
3387 * When the RT allow policy is enabled and a thread allowed to become RT,
3388 * sometimes (if the processes RT allow policy is restricted) the thread will
3389 * have a CPU limit enforced. The following two tunables determine the
3390 * parameters for that CPU limit.
3391 */
3392
3393/* % of the interval allowed to run. */
3394TUNABLE_DEV_WRITEABLE(uint8_t, rt_allow_limit_percent,
3395 "rt_allow_limit_percent", 70);
3396
3397/* The length of interval in nanoseconds. */
3398TUNABLE_DEV_WRITEABLE(uint16_t, rt_allow_limit_interval_ms,
3399 "rt_allow_limit_interval", 10);
3400
3401static bool
3402thread_has_rt(thread_t thread)
3403{
3404 return
3405 thread->sched_mode == TH_MODE_REALTIME ||
3406 thread->saved_mode == TH_MODE_REALTIME;
3407}
3408
3409/*
3410 * Set a CPU limit on a thread based on the RT allow policy. This will be picked
3411 * up by the target thread via the ledger AST.
3412 */
3413static void
3414thread_rt_set_cpulimit(thread_t thread)
3415{
3416 /* Force reasonable values for the cpu limit. */
3417 const uint8_t percent = MAX(MIN(rt_allow_limit_percent, 99), 1);
3418 const uint16_t interval_ms = MAX(rt_allow_limit_interval_ms, 1);
3419
3420 thread->t_ledger_req_percentage = percent;
3421 thread->t_ledger_req_interval_ms = interval_ms;
3422 thread->t_ledger_req_action = THREAD_CPULIMIT_BLOCK;
3423
3424 thread->sched_flags |= TH_SFLAG_RT_CPULIMIT;
3425}
3426
3427/* Similar to the above but removes any CPU limit. */
3428static void
3429thread_rt_clear_cpulimit(thread_t thread)
3430{
3431 thread->sched_flags &= ~TH_SFLAG_RT_CPULIMIT;
3432
3433 thread->t_ledger_req_percentage = 0;
3434 thread->t_ledger_req_interval_ms = 0;
3435 thread->t_ledger_req_action = THREAD_CPULIMIT_DISABLE;
3436}
3437
3438/*
3439 * Evaluate RT policy for a thread, demoting and undemoting as needed.
3440 */
3441void
3442thread_rt_evaluate(thread_t thread)
3443{
3444 task_t task = get_threadtask(thread);
3445 bool platform_binary = false;
3446
3447 /* If the RT allow policy is not enabled - nothing to do. */
3448 if (!rt_allow_policy_enabled) {
3449 return;
3450 }
3451
3452 /* User threads only. */
3453 if (task == kernel_task) {
3454 return;
3455 }
3456
3457 /* Check for platform binary. */
3458 platform_binary = (task_ro_flags_get(task) & TFRO_PLATFORM) != 0;
3459
3460 spl_t s = splsched();
3461 thread_lock(thread);
3462
3463 const thread_work_interval_flags_t wi_flags =
3464 os_atomic_load(&thread->th_work_interval_flags, relaxed);
3465
3466 /*
3467 * RT threads which are not joined to a work interval which allows RT
3468 * threads are demoted. Once those conditions no longer hold, the thread
3469 * undemoted.
3470 */
3471 if (thread_has_rt(thread) && (wi_flags & TH_WORK_INTERVAL_FLAGS_RT_ALLOWED) == 0) {
3472 if (!sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3473 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RT_DISALLOWED_WORK_INTERVAL),
3474 thread_tid(thread));
3475 sched_thread_mode_demote(thread, TH_SFLAG_RT_DISALLOWED);
3476 }
3477 } else {
3478 if (sched_thread_mode_has_demotion(thread, TH_SFLAG_RT_DISALLOWED)) {
3479 sched_thread_mode_undemote(thread, TH_SFLAG_RT_DISALLOWED);
3480 }
3481 }
3482
3483 /*
3484 * RT threads get a CPU limit unless they're part of a platform binary
3485 * task. If the thread is no longer RT, any existing CPU limit should be
3486 * removed.
3487 */
3488 bool set_ast = false;
3489 if (!platform_binary &&
3490 thread_has_rt(thread) &&
3491 (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) == 0) {
3492 thread_rt_set_cpulimit(thread);
3493 set_ast = true;
3494 }
3495
3496 if (!platform_binary &&
3497 !thread_has_rt(thread) &&
3498 (thread->sched_flags & TH_SFLAG_RT_CPULIMIT) != 0) {
3499 thread_rt_clear_cpulimit(thread);
3500 set_ast = true;
3501 }
3502
3503 thread_unlock(thread);
3504 splx(s);
3505
3506 if (set_ast) {
3507 /* Ensure the target thread picks up any CPU limit change. */
3508 act_set_astledger(thread);
3509 }
3510}
3511
3512#else
3513
3514void
3515thread_rt_evaluate(__unused thread_t thread)
3516{
3517}
3518
3519#endif /* CONFIG_SCHED_RT_ALLOW */
3520