1/*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <mach/mach_types.h>
30#include <mach/thread_act_server.h>
31
32#include <kern/kern_types.h>
33#include <kern/processor.h>
34#include <kern/thread.h>
35#include <kern/affinity.h>
36#include <mach/task_policy.h>
37#include <kern/sfi.h>
38#include <kern/policy_internal.h>
39#include <sys/errno.h>
40#include <sys/ulock.h>
41
42#include <mach/machine/sdt.h>
43
44#ifdef MACH_BSD
45extern int proc_selfpid(void);
46extern char * proc_name_address(void *p);
47extern void rethrottle_thread(void * uthread);
48#endif /* MACH_BSD */
49
50#define QOS_EXTRACT(q) ((q) & 0xff)
51
52uint32_t qos_override_mode;
53#define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
54#define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
55#define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
56#define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
57
58extern zone_t thread_qos_override_zone;
59
60static void
61proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
62
63/*
64 * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
65 * to threads that don't have a QoS class set.
66 */
67const qos_policy_params_t thread_qos_policy_params = {
68 /*
69 * This table defines the starting base priority of the thread,
70 * which will be modified by the thread importance and the task max priority
71 * before being applied.
72 */
73 .qos_pri[THREAD_QOS_UNSPECIFIED] = 0, /* not consulted */
74 .qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, /* i.e. 46 */
75 .qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED,
76 .qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT,
77 .qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY,
78 .qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE,
79 .qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE,
80
81 /*
82 * This table defines the highest IO priority that a thread marked with this
83 * QoS class can have.
84 */
85 .qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0,
86 .qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0,
87 .qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0,
88 .qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0,
89 .qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1,
90 .qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
91 .qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3,
92
93 /*
94 * This table defines the highest QoS level that
95 * a thread marked with this QoS class can have.
96 */
97
98 .qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
99 .qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
100 .qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
101 .qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
102 .qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
103 .qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
104 .qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
105
106 .qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
107 .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0),
108 .qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
109 .qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
110 .qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
111 .qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
112 .qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
113};
114
115static void
116thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
117
118static int
119thread_qos_scaled_relative_priority(int qos, int qos_relprio);
120
121static void
122proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
123
124static void
125proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
126
127static void
128proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
129
130static void
131thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2);
132
133static int
134thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
135
136static int
137proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
138
139static void
140thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
141
142static void
143thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
144
145void
146thread_policy_init(void) {
147 if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
148 printf("QOS override mode: 0x%08x\n", qos_override_mode);
149 } else {
150 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
151 }
152}
153
154boolean_t
155thread_has_qos_policy(thread_t thread) {
156 return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
157}
158
159
160static void
161thread_remove_qos_policy_locked(thread_t thread,
162 task_pend_token_t pend_token)
163{
164
165 __unused int prev_qos = thread->requested_policy.thrp_qos;
166
167 DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
168
169 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
170 THREAD_QOS_UNSPECIFIED, 0, pend_token);
171}
172
173kern_return_t
174thread_remove_qos_policy(thread_t thread)
175{
176 struct task_pend_token pend_token = {};
177
178 thread_mtx_lock(thread);
179 if (!thread->active) {
180 thread_mtx_unlock(thread);
181 return KERN_TERMINATED;
182 }
183
184 thread_remove_qos_policy_locked(thread, &pend_token);
185
186 thread_mtx_unlock(thread);
187
188 thread_policy_update_complete_unlocked(thread, &pend_token);
189
190 return KERN_SUCCESS;
191}
192
193
194boolean_t
195thread_is_static_param(thread_t thread)
196{
197 if (thread->static_param) {
198 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
199 return TRUE;
200 }
201 return FALSE;
202}
203
204/*
205 * Relative priorities can range between 0REL and -15REL. These
206 * map to QoS-specific ranges, to create non-overlapping priority
207 * ranges.
208 */
209static int
210thread_qos_scaled_relative_priority(int qos, int qos_relprio)
211{
212 int next_lower_qos;
213
214 /* Fast path, since no validation or scaling is needed */
215 if (qos_relprio == 0) return 0;
216
217 switch (qos) {
218 case THREAD_QOS_USER_INTERACTIVE:
219 next_lower_qos = THREAD_QOS_USER_INITIATED;
220 break;
221 case THREAD_QOS_USER_INITIATED:
222 next_lower_qos = THREAD_QOS_LEGACY;
223 break;
224 case THREAD_QOS_LEGACY:
225 next_lower_qos = THREAD_QOS_UTILITY;
226 break;
227 case THREAD_QOS_UTILITY:
228 next_lower_qos = THREAD_QOS_BACKGROUND;
229 break;
230 case THREAD_QOS_MAINTENANCE:
231 case THREAD_QOS_BACKGROUND:
232 next_lower_qos = 0;
233 break;
234 default:
235 panic("Unrecognized QoS %d", qos);
236 return 0;
237 }
238
239 int prio_range_max = thread_qos_policy_params.qos_pri[qos];
240 int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
241
242 /*
243 * We now have the valid range that the scaled relative priority can map to. Note
244 * that the lower bound is exclusive, but the upper bound is inclusive. If the
245 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
246 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
247 * remainder.
248 */
249 int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
250
251 return scaled_relprio;
252}
253
254/*
255 * flag set by -qos-policy-allow boot-arg to allow
256 * testing thread qos policy from userspace
257 */
258boolean_t allow_qos_policy_set = FALSE;
259
260kern_return_t
261thread_policy_set(
262 thread_t thread,
263 thread_policy_flavor_t flavor,
264 thread_policy_t policy_info,
265 mach_msg_type_number_t count)
266{
267 thread_qos_policy_data_t req_qos;
268 kern_return_t kr;
269
270 req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
271
272 if (thread == THREAD_NULL)
273 return (KERN_INVALID_ARGUMENT);
274
275 if (allow_qos_policy_set == FALSE) {
276 if (thread_is_static_param(thread))
277 return (KERN_POLICY_STATIC);
278
279 if (flavor == THREAD_QOS_POLICY)
280 return (KERN_INVALID_ARGUMENT);
281 }
282
283 /* Threads without static_param set reset their QoS when other policies are applied. */
284 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
285 /* Store the existing tier, if we fail this call it is used to reset back. */
286 req_qos.qos_tier = thread->requested_policy.thrp_qos;
287 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
288
289 kr = thread_remove_qos_policy(thread);
290 if (kr != KERN_SUCCESS) {
291 return kr;
292 }
293 }
294
295 kr = thread_policy_set_internal(thread, flavor, policy_info, count);
296
297 /* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
298 if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
299 if (kr != KERN_SUCCESS) {
300 /* Reset back to our original tier as the set failed. */
301 (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
302 }
303 }
304
305 return kr;
306}
307
308kern_return_t
309thread_policy_set_internal(
310 thread_t thread,
311 thread_policy_flavor_t flavor,
312 thread_policy_t policy_info,
313 mach_msg_type_number_t count)
314{
315 kern_return_t result = KERN_SUCCESS;
316 struct task_pend_token pend_token = {};
317
318 thread_mtx_lock(thread);
319 if (!thread->active) {
320 thread_mtx_unlock(thread);
321
322 return (KERN_TERMINATED);
323 }
324
325 switch (flavor) {
326
327 case THREAD_EXTENDED_POLICY:
328 {
329 boolean_t timeshare = TRUE;
330
331 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
332 thread_extended_policy_t info;
333
334 info = (thread_extended_policy_t)policy_info;
335 timeshare = info->timeshare;
336 }
337
338 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
339
340 spl_t s = splsched();
341 thread_lock(thread);
342
343 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
344
345 thread_unlock(thread);
346 splx(s);
347
348 pend_token.tpt_update_thread_sfi = 1;
349
350 break;
351 }
352
353 case THREAD_TIME_CONSTRAINT_POLICY:
354 {
355 thread_time_constraint_policy_t info;
356
357 if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
358 result = KERN_INVALID_ARGUMENT;
359 break;
360 }
361
362 info = (thread_time_constraint_policy_t)policy_info;
363 if (info->constraint < info->computation ||
364 info->computation > max_rt_quantum ||
365 info->computation < min_rt_quantum ) {
366 result = KERN_INVALID_ARGUMENT;
367 break;
368 }
369
370 spl_t s = splsched();
371 thread_lock(thread);
372
373 thread->realtime.period = info->period;
374 thread->realtime.computation = info->computation;
375 thread->realtime.constraint = info->constraint;
376 thread->realtime.preemptible = info->preemptible;
377
378 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
379
380 thread_unlock(thread);
381 splx(s);
382
383 pend_token.tpt_update_thread_sfi = 1;
384
385 break;
386 }
387
388 case THREAD_PRECEDENCE_POLICY:
389 {
390 thread_precedence_policy_t info;
391
392 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
393 result = KERN_INVALID_ARGUMENT;
394 break;
395 }
396 info = (thread_precedence_policy_t)policy_info;
397
398 spl_t s = splsched();
399 thread_lock(thread);
400
401 thread->importance = info->importance;
402
403 thread_recompute_priority(thread);
404
405 thread_unlock(thread);
406 splx(s);
407
408 break;
409 }
410
411 case THREAD_AFFINITY_POLICY:
412 {
413 thread_affinity_policy_t info;
414
415 if (!thread_affinity_is_supported()) {
416 result = KERN_NOT_SUPPORTED;
417 break;
418 }
419 if (count < THREAD_AFFINITY_POLICY_COUNT) {
420 result = KERN_INVALID_ARGUMENT;
421 break;
422 }
423
424 info = (thread_affinity_policy_t) policy_info;
425 /*
426 * Unlock the thread mutex here and
427 * return directly after calling thread_affinity_set().
428 * This is necessary for correct lock ordering because
429 * thread_affinity_set() takes the task lock.
430 */
431 thread_mtx_unlock(thread);
432 return thread_affinity_set(thread, info->affinity_tag);
433 }
434
435#if CONFIG_EMBEDDED
436 case THREAD_BACKGROUND_POLICY:
437 {
438 thread_background_policy_t info;
439
440 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
441 result = KERN_INVALID_ARGUMENT;
442 break;
443 }
444
445 if (thread->task != current_task()) {
446 result = KERN_PROTECTION_FAILURE;
447 break;
448 }
449
450 info = (thread_background_policy_t) policy_info;
451
452 int enable;
453
454 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG)
455 enable = TASK_POLICY_ENABLE;
456 else
457 enable = TASK_POLICY_DISABLE;
458
459 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
460
461 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
462
463 break;
464 }
465#endif /* CONFIG_EMBEDDED */
466
467 case THREAD_THROUGHPUT_QOS_POLICY:
468 {
469 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
470 thread_throughput_qos_t tqos;
471
472 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
473 result = KERN_INVALID_ARGUMENT;
474 break;
475 }
476
477 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS)
478 break;
479
480 tqos = qos_extract(info->thread_throughput_qos_tier);
481
482 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
483 TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
484
485 break;
486 }
487
488 case THREAD_LATENCY_QOS_POLICY:
489 {
490 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
491 thread_latency_qos_t lqos;
492
493 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
494 result = KERN_INVALID_ARGUMENT;
495 break;
496 }
497
498 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS)
499 break;
500
501 lqos = qos_extract(info->thread_latency_qos_tier);
502
503 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
504 TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
505
506 break;
507 }
508
509 case THREAD_QOS_POLICY:
510 {
511 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
512
513 if (count < THREAD_QOS_POLICY_COUNT) {
514 result = KERN_INVALID_ARGUMENT;
515 break;
516 }
517
518 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
519 result = KERN_INVALID_ARGUMENT;
520 break;
521 }
522
523 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
524 result = KERN_INVALID_ARGUMENT;
525 break;
526 }
527
528 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
529 result = KERN_INVALID_ARGUMENT;
530 break;
531 }
532
533 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
534 info->qos_tier, -info->tier_importance, &pend_token);
535
536 break;
537 }
538
539 default:
540 result = KERN_INVALID_ARGUMENT;
541 break;
542 }
543
544 thread_mtx_unlock(thread);
545
546 thread_policy_update_complete_unlocked(thread, &pend_token);
547
548 return (result);
549}
550
551/*
552 * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
553 * Both result in FIXED mode scheduling.
554 */
555static sched_mode_t
556convert_policy_to_sched_mode(integer_t policy) {
557 switch (policy) {
558 case POLICY_TIMESHARE:
559 return TH_MODE_TIMESHARE;
560 case POLICY_RR:
561 case POLICY_FIFO:
562 return TH_MODE_FIXED;
563 default:
564 panic("unexpected sched policy: %d", policy);
565 return TH_MODE_NONE;
566 }
567}
568
569/*
570 * Called either with the thread mutex locked
571 * or from the pthread kext in a 'safe place'.
572 */
573static kern_return_t
574thread_set_mode_and_absolute_pri_internal(thread_t thread,
575 sched_mode_t mode,
576 integer_t priority,
577 task_pend_token_t pend_token)
578{
579 kern_return_t kr = KERN_SUCCESS;
580
581 spl_t s = splsched();
582 thread_lock(thread);
583
584 /* This path isn't allowed to change a thread out of realtime. */
585 if ((thread->sched_mode == TH_MODE_REALTIME) ||
586 (thread->saved_mode == TH_MODE_REALTIME)) {
587 kr = KERN_FAILURE;
588 goto unlock;
589 }
590
591 if (thread->policy_reset) {
592 kr = KERN_SUCCESS;
593 goto unlock;
594 }
595
596 sched_mode_t old_mode = thread->sched_mode;
597
598 /*
599 * Reverse engineer and apply the correct importance value
600 * from the requested absolute priority value.
601 *
602 * TODO: Store the absolute priority value instead
603 */
604
605 if (priority >= thread->max_priority)
606 priority = thread->max_priority - thread->task_priority;
607 else if (priority >= MINPRI_KERNEL)
608 priority -= MINPRI_KERNEL;
609 else if (priority >= MINPRI_RESERVED)
610 priority -= MINPRI_RESERVED;
611 else
612 priority -= BASEPRI_DEFAULT;
613
614 priority += thread->task_priority;
615
616 if (priority > thread->max_priority)
617 priority = thread->max_priority;
618 else if (priority < MINPRI)
619 priority = MINPRI;
620
621 thread->importance = priority - thread->task_priority;
622
623 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
624
625 if (mode != old_mode)
626 pend_token->tpt_update_thread_sfi = 1;
627
628unlock:
629 thread_unlock(thread);
630 splx(s);
631
632 return kr;
633}
634
635uint8_t
636thread_workq_pri_for_qos(thread_qos_t qos)
637{
638 assert(qos < THREAD_QOS_LAST);
639 return (uint8_t)thread_qos_policy_params.qos_pri[qos];
640}
641
642thread_qos_t
643thread_workq_qos_for_pri(int priority)
644{
645 int qos;
646 if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
647 // indicate that workq should map >UI threads to workq's
648 // internal notation for above-UI work.
649 return THREAD_QOS_UNSPECIFIED;
650 }
651 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
652 // map a given priority up to the next nearest qos band.
653 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
654 return qos;
655 }
656 }
657 return THREAD_QOS_MAINTENANCE;
658}
659
660/*
661 * private interface for pthread workqueues
662 *
663 * Set scheduling policy & absolute priority for thread
664 * May be called with spinlocks held
665 * Thread mutex lock is not held
666 */
667void
668thread_reset_workq_qos(thread_t thread, uint32_t qos)
669{
670 struct task_pend_token pend_token = {};
671
672 assert(qos < THREAD_QOS_LAST);
673
674 spl_t s = splsched();
675 thread_lock(thread);
676
677 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
678 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
679 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
680 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
681 &pend_token);
682
683 assert(pend_token.tpt_update_sockets == 0);
684
685 thread_unlock(thread);
686 splx(s);
687
688 thread_policy_update_complete_unlocked(thread, &pend_token);
689}
690
691/*
692 * private interface for pthread workqueues
693 *
694 * Set scheduling policy & absolute priority for thread
695 * May be called with spinlocks held
696 * Thread mutex lock is held
697 */
698void
699thread_set_workq_override(thread_t thread, uint32_t qos)
700{
701 struct task_pend_token pend_token = {};
702
703 assert(qos < THREAD_QOS_LAST);
704
705 spl_t s = splsched();
706 thread_lock(thread);
707
708 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
709 TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
710
711 assert(pend_token.tpt_update_sockets == 0);
712
713 thread_unlock(thread);
714 splx(s);
715
716 thread_policy_update_complete_unlocked(thread, &pend_token);
717}
718
719/*
720 * private interface for pthread workqueues
721 *
722 * Set scheduling policy & absolute priority for thread
723 * May be called with spinlocks held
724 * Thread mutex lock is not held
725 */
726void
727thread_set_workq_pri(thread_t thread,
728 thread_qos_t qos,
729 integer_t priority,
730 integer_t policy)
731{
732 struct task_pend_token pend_token = {};
733 sched_mode_t mode = convert_policy_to_sched_mode(policy);
734
735 assert(qos < THREAD_QOS_LAST);
736 assert(thread->static_param);
737
738 if (!thread->static_param || !thread->active)
739 return;
740
741 spl_t s = splsched();
742 thread_lock(thread);
743
744 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
745 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
746 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
747 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
748 0, &pend_token);
749
750 thread_unlock(thread);
751 splx(s);
752
753 /* Concern: this doesn't hold the mutex... */
754
755 __assert_only kern_return_t kr;
756 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
757 &pend_token);
758 assert(kr == KERN_SUCCESS);
759
760 if (pend_token.tpt_update_thread_sfi)
761 sfi_reevaluate(thread);
762}
763
764/*
765 * thread_set_mode_and_absolute_pri:
766 *
767 * Set scheduling policy & absolute priority for thread, for deprecated
768 * thread_set_policy and thread_policy interfaces.
769 *
770 * Called with nothing locked.
771 */
772kern_return_t
773thread_set_mode_and_absolute_pri(thread_t thread,
774 integer_t policy,
775 integer_t priority)
776{
777 kern_return_t kr = KERN_SUCCESS;
778 struct task_pend_token pend_token = {};
779
780 sched_mode_t mode = convert_policy_to_sched_mode(policy);
781
782 thread_mtx_lock(thread);
783
784 if (!thread->active) {
785 kr = KERN_TERMINATED;
786 goto unlock;
787 }
788
789 if (thread_is_static_param(thread)) {
790 kr = KERN_POLICY_STATIC;
791 goto unlock;
792 }
793
794 /* Setting legacy policies on threads kills the current QoS */
795 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED)
796 thread_remove_qos_policy_locked(thread, &pend_token);
797
798 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
799
800unlock:
801 thread_mtx_unlock(thread);
802
803 thread_policy_update_complete_unlocked(thread, &pend_token);
804
805 return (kr);
806}
807
808/*
809 * Set the thread's requested mode and recompute priority
810 * Called with thread mutex and thread locked
811 *
812 * TODO: Mitigate potential problems caused by moving thread to end of runq
813 * whenever its priority is recomputed
814 * Only remove when it actually changes? Attempt to re-insert at appropriate location?
815 */
816static void
817thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
818{
819 if (thread->policy_reset)
820 return;
821
822 boolean_t removed = thread_run_queue_remove(thread);
823
824 /*
825 * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
826 * That way there's zero confusion over which the user wants
827 * and which the kernel wants.
828 */
829 if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK)
830 thread->saved_mode = mode;
831 else
832 sched_set_thread_mode(thread, mode);
833
834 thread_recompute_priority(thread);
835
836 if (removed)
837 thread_run_queue_reinsert(thread, SCHED_TAILQ);
838}
839
840/* called at splsched with thread lock locked */
841static void
842thread_update_qos_cpu_time_locked(thread_t thread)
843{
844 task_t task = thread->task;
845 uint64_t timer_sum, timer_delta;
846
847 /*
848 * This is only as accurate as the distance between
849 * last context switch (embedded) or last user/kernel boundary transition (desktop)
850 * because user_timer and system_timer are only updated then.
851 *
852 * TODO: Consider running a timer_update operation here to update it first.
853 * Maybe doable with interrupts disabled from current thread.
854 * If the thread is on a different core, may not be easy to get right.
855 *
856 * TODO: There should be a function for this in timer.c
857 */
858
859 timer_sum = timer_grab(&thread->user_timer);
860 timer_sum += timer_grab(&thread->system_timer);
861 timer_delta = timer_sum - thread->vtimer_qos_save;
862
863 thread->vtimer_qos_save = timer_sum;
864
865 uint64_t* task_counter = NULL;
866
867 /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
868 switch (thread->effective_policy.thep_qos) {
869 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
870 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
871 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
872 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
873 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
874 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
875 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
876 default:
877 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
878 }
879
880 OSAddAtomic64(timer_delta, task_counter);
881
882 /* Update the task-level qos stats atomically, because we don't have the task lock. */
883 switch (thread->requested_policy.thrp_qos) {
884 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
885 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
886 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
887 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
888 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
889 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
890 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
891 default:
892 panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
893 }
894
895 OSAddAtomic64(timer_delta, task_counter);
896}
897
898/*
899 * called with no thread locks held
900 * may hold task lock
901 */
902void
903thread_update_qos_cpu_time(thread_t thread)
904{
905 thread_mtx_lock(thread);
906
907 spl_t s = splsched();
908 thread_lock(thread);
909
910 thread_update_qos_cpu_time_locked(thread);
911
912 thread_unlock(thread);
913 splx(s);
914
915 thread_mtx_unlock(thread);
916}
917
918/*
919 * Calculate base priority from thread attributes, and set it on the thread
920 *
921 * Called with thread_lock and thread mutex held.
922 */
923void
924thread_recompute_priority(
925 thread_t thread)
926{
927 integer_t priority;
928
929 if (thread->policy_reset)
930 return;
931
932 if (thread->sched_mode == TH_MODE_REALTIME) {
933 sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
934 return;
935 } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
936 int qos = thread->effective_policy.thep_qos;
937 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
938 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
939 int qos_scaled_relprio;
940
941 assert(qos >= 0 && qos < THREAD_QOS_LAST);
942 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
943
944 priority = thread_qos_policy_params.qos_pri[qos];
945 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
946
947 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
948 /* Bump priority 46 to 47 when in a frontmost app */
949 qos_scaled_relprio += 1;
950 }
951
952 /* TODO: factor in renice priority here? */
953
954 priority += qos_scaled_relprio;
955 } else {
956 if (thread->importance > MAXPRI)
957 priority = MAXPRI;
958 else if (thread->importance < -MAXPRI)
959 priority = -MAXPRI;
960 else
961 priority = thread->importance;
962
963 priority += thread->task_priority;
964 }
965
966 priority = MAX(priority, thread->user_promotion_basepri);
967
968 /*
969 * Clamp priority back into the allowed range for this task.
970 * The initial priority value could be out of this range due to:
971 * Task clamped to BG or Utility (max-pri is 4, or 20)
972 * Task is user task (max-pri is 63)
973 * Task is kernel task (max-pri is 95)
974 * Note that thread->importance is user-settable to any integer
975 * via THREAD_PRECEDENCE_POLICY.
976 */
977 if (priority > thread->max_priority)
978 priority = thread->max_priority;
979 else if (priority < MINPRI)
980 priority = MINPRI;
981
982 if (thread->saved_mode == TH_MODE_REALTIME &&
983 thread->sched_flags & TH_SFLAG_FAILSAFE)
984 priority = DEPRESSPRI;
985
986 if (thread->effective_policy.thep_terminated == TRUE) {
987 /*
988 * We temporarily want to override the expected priority to
989 * ensure that the thread exits in a timely manner.
990 * Note that this is allowed to exceed thread->max_priority
991 * so that the thread is no longer clamped to background
992 * during the final exit phase.
993 */
994 if (priority < thread->task_priority)
995 priority = thread->task_priority;
996 if (priority < BASEPRI_DEFAULT)
997 priority = BASEPRI_DEFAULT;
998 }
999
1000#if CONFIG_EMBEDDED
1001 /* No one can have a base priority less than MAXPRI_THROTTLE */
1002 if (priority < MAXPRI_THROTTLE)
1003 priority = MAXPRI_THROTTLE;
1004#endif /* CONFIG_EMBEDDED */
1005
1006 sched_set_thread_base_priority(thread, priority);
1007}
1008
1009/* Called with the task lock held, but not the thread mutex or spinlock */
1010void
1011thread_policy_update_tasklocked(
1012 thread_t thread,
1013 integer_t priority,
1014 integer_t max_priority,
1015 task_pend_token_t pend_token)
1016{
1017 thread_mtx_lock(thread);
1018
1019 if (!thread->active || thread->policy_reset) {
1020 thread_mtx_unlock(thread);
1021 return;
1022 }
1023
1024 spl_t s = splsched();
1025 thread_lock(thread);
1026
1027 __unused
1028 integer_t old_max_priority = thread->max_priority;
1029
1030 thread->task_priority = priority;
1031 thread->max_priority = max_priority;
1032
1033#if CONFIG_EMBEDDED
1034 /*
1035 * When backgrounding a thread, iOS has the semantic that
1036 * realtime and fixed priority threads should be demoted
1037 * to timeshare background threads.
1038 *
1039 * On OSX, realtime and fixed priority threads don't lose their mode.
1040 *
1041 * TODO: Do this inside the thread policy update routine in order to avoid double
1042 * remove/reinsert for a runnable thread
1043 */
1044 if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1045 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1046 } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1047 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1048 }
1049#endif /* CONFIG_EMBEDDED */
1050
1051 thread_policy_update_spinlocked(thread, TRUE, pend_token);
1052
1053 thread_unlock(thread);
1054 splx(s);
1055
1056 thread_mtx_unlock(thread);
1057}
1058
1059/*
1060 * Reset thread to default state in preparation for termination
1061 * Called with thread mutex locked
1062 *
1063 * Always called on current thread, so we don't need a run queue remove
1064 */
1065void
1066thread_policy_reset(
1067 thread_t thread)
1068{
1069 spl_t s;
1070
1071 assert(thread == current_thread());
1072
1073 s = splsched();
1074 thread_lock(thread);
1075
1076 if (thread->sched_flags & TH_SFLAG_FAILSAFE)
1077 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1078
1079 if (thread->sched_flags & TH_SFLAG_THROTTLED)
1080 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1081
1082 /* At this point, the various demotions should be inactive */
1083 assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1084 assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
1085 assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1086
1087 /* Reset thread back to task-default basepri and mode */
1088 sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task);
1089
1090 sched_set_thread_mode(thread, newmode);
1091
1092 thread->importance = 0;
1093
1094 /* Prevent further changes to thread base priority or mode */
1095 thread->policy_reset = 1;
1096
1097 sched_set_thread_base_priority(thread, thread->task_priority);
1098
1099 thread_unlock(thread);
1100 splx(s);
1101}
1102
1103kern_return_t
1104thread_policy_get(
1105 thread_t thread,
1106 thread_policy_flavor_t flavor,
1107 thread_policy_t policy_info,
1108 mach_msg_type_number_t *count,
1109 boolean_t *get_default)
1110{
1111 kern_return_t result = KERN_SUCCESS;
1112
1113 if (thread == THREAD_NULL)
1114 return (KERN_INVALID_ARGUMENT);
1115
1116 thread_mtx_lock(thread);
1117 if (!thread->active) {
1118 thread_mtx_unlock(thread);
1119
1120 return (KERN_TERMINATED);
1121 }
1122
1123 switch (flavor) {
1124
1125 case THREAD_EXTENDED_POLICY:
1126 {
1127 boolean_t timeshare = TRUE;
1128
1129 if (!(*get_default)) {
1130 spl_t s = splsched();
1131 thread_lock(thread);
1132
1133 if ( (thread->sched_mode != TH_MODE_REALTIME) &&
1134 (thread->saved_mode != TH_MODE_REALTIME) ) {
1135 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK))
1136 timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1137 else
1138 timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1139 }
1140 else
1141 *get_default = TRUE;
1142
1143 thread_unlock(thread);
1144 splx(s);
1145 }
1146
1147 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1148 thread_extended_policy_t info;
1149
1150 info = (thread_extended_policy_t)policy_info;
1151 info->timeshare = timeshare;
1152 }
1153
1154 break;
1155 }
1156
1157 case THREAD_TIME_CONSTRAINT_POLICY:
1158 {
1159 thread_time_constraint_policy_t info;
1160
1161 if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
1162 result = KERN_INVALID_ARGUMENT;
1163 break;
1164 }
1165
1166 info = (thread_time_constraint_policy_t)policy_info;
1167
1168 if (!(*get_default)) {
1169 spl_t s = splsched();
1170 thread_lock(thread);
1171
1172 if ( (thread->sched_mode == TH_MODE_REALTIME) ||
1173 (thread->saved_mode == TH_MODE_REALTIME) ) {
1174 info->period = thread->realtime.period;
1175 info->computation = thread->realtime.computation;
1176 info->constraint = thread->realtime.constraint;
1177 info->preemptible = thread->realtime.preemptible;
1178 }
1179 else
1180 *get_default = TRUE;
1181
1182 thread_unlock(thread);
1183 splx(s);
1184 }
1185
1186 if (*get_default) {
1187 info->period = 0;
1188 info->computation = default_timeshare_computation;
1189 info->constraint = default_timeshare_constraint;
1190 info->preemptible = TRUE;
1191 }
1192
1193 break;
1194 }
1195
1196 case THREAD_PRECEDENCE_POLICY:
1197 {
1198 thread_precedence_policy_t info;
1199
1200 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1201 result = KERN_INVALID_ARGUMENT;
1202 break;
1203 }
1204
1205 info = (thread_precedence_policy_t)policy_info;
1206
1207 if (!(*get_default)) {
1208 spl_t s = splsched();
1209 thread_lock(thread);
1210
1211 info->importance = thread->importance;
1212
1213 thread_unlock(thread);
1214 splx(s);
1215 }
1216 else
1217 info->importance = 0;
1218
1219 break;
1220 }
1221
1222 case THREAD_AFFINITY_POLICY:
1223 {
1224 thread_affinity_policy_t info;
1225
1226 if (!thread_affinity_is_supported()) {
1227 result = KERN_NOT_SUPPORTED;
1228 break;
1229 }
1230 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1231 result = KERN_INVALID_ARGUMENT;
1232 break;
1233 }
1234
1235 info = (thread_affinity_policy_t)policy_info;
1236
1237 if (!(*get_default))
1238 info->affinity_tag = thread_affinity_get(thread);
1239 else
1240 info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1241
1242 break;
1243 }
1244
1245 case THREAD_POLICY_STATE:
1246 {
1247 thread_policy_state_t info;
1248
1249 if (*count < THREAD_POLICY_STATE_COUNT) {
1250 result = KERN_INVALID_ARGUMENT;
1251 break;
1252 }
1253
1254 /* Only root can get this info */
1255 if (current_task()->sec_token.val[0] != 0) {
1256 result = KERN_PROTECTION_FAILURE;
1257 break;
1258 }
1259
1260 info = (thread_policy_state_t)(void*)policy_info;
1261
1262 if (!(*get_default)) {
1263 info->flags = 0;
1264
1265 spl_t s = splsched();
1266 thread_lock(thread);
1267
1268 info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1269
1270 info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1271 info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1272
1273 info->thps_user_promotions = 0;
1274 info->thps_user_promotion_basepri = thread->user_promotion_basepri;
1275 info->thps_ipc_overrides = thread->ipc_overrides;
1276
1277 proc_get_thread_policy_bitfield(thread, info);
1278
1279 thread_unlock(thread);
1280 splx(s);
1281 } else {
1282 info->requested = 0;
1283 info->effective = 0;
1284 info->pending = 0;
1285 }
1286
1287 break;
1288 }
1289
1290 case THREAD_LATENCY_QOS_POLICY:
1291 {
1292 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1293 thread_latency_qos_t plqos;
1294
1295 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1296 result = KERN_INVALID_ARGUMENT;
1297 break;
1298 }
1299
1300 if (*get_default) {
1301 plqos = 0;
1302 } else {
1303 plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1304 }
1305
1306 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1307 }
1308 break;
1309
1310 case THREAD_THROUGHPUT_QOS_POLICY:
1311 {
1312 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1313 thread_throughput_qos_t ptqos;
1314
1315 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1316 result = KERN_INVALID_ARGUMENT;
1317 break;
1318 }
1319
1320 if (*get_default) {
1321 ptqos = 0;
1322 } else {
1323 ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1324 }
1325
1326 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1327 }
1328 break;
1329
1330 case THREAD_QOS_POLICY:
1331 {
1332 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1333
1334 if (*count < THREAD_QOS_POLICY_COUNT) {
1335 result = KERN_INVALID_ARGUMENT;
1336 break;
1337 }
1338
1339 if (!(*get_default)) {
1340 int relprio_value = 0;
1341 info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1342 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1343
1344 info->tier_importance = -relprio_value;
1345 } else {
1346 info->qos_tier = THREAD_QOS_UNSPECIFIED;
1347 info->tier_importance = 0;
1348 }
1349
1350 break;
1351 }
1352
1353 default:
1354 result = KERN_INVALID_ARGUMENT;
1355 break;
1356 }
1357
1358 thread_mtx_unlock(thread);
1359
1360 return (result);
1361}
1362
1363void
1364thread_policy_create(thread_t thread)
1365{
1366 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1367 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1368 thread_tid(thread), theffective_0(thread),
1369 theffective_1(thread), thread->base_pri, 0);
1370
1371 /* We pass a pend token but ignore it */
1372 struct task_pend_token pend_token = {};
1373
1374 thread_policy_update_internal_spinlocked(thread, TRUE, &pend_token);
1375
1376 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1377 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1378 thread_tid(thread), theffective_0(thread),
1379 theffective_1(thread), thread->base_pri, 0);
1380}
1381
1382static void
1383thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token)
1384{
1385 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1386 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1387 thread_tid(thread), theffective_0(thread),
1388 theffective_1(thread), thread->base_pri, 0);
1389
1390 thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1391
1392 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1393 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1394 thread_tid(thread), theffective_0(thread),
1395 theffective_1(thread), thread->base_pri, 0);
1396}
1397
1398
1399
1400/*
1401 * One thread state update function TO RULE THEM ALL
1402 *
1403 * This function updates the thread effective policy fields
1404 * and pushes the results to the relevant subsystems.
1405 *
1406 * Returns TRUE if a pended action needs to be run.
1407 *
1408 * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1409 */
1410static void
1411thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority,
1412 task_pend_token_t pend_token)
1413{
1414 /*
1415 * Step 1:
1416 * Gather requested policy and effective task state
1417 */
1418
1419 struct thread_requested_policy requested = thread->requested_policy;
1420 struct task_effective_policy task_effective = thread->task->effective_policy;
1421
1422 /*
1423 * Step 2:
1424 * Calculate new effective policies from requested policy, task and thread state
1425 * Rules:
1426 * Don't change requested, it won't take effect
1427 */
1428
1429 struct thread_effective_policy next = {};
1430
1431 next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1432
1433 uint32_t next_qos = requested.thrp_qos;
1434
1435 if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1436 next_qos = MAX(requested.thrp_qos_override, next_qos);
1437 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1438 next_qos = MAX(requested.thrp_qos_ipc_override, next_qos);
1439 next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1440 }
1441
1442 next.thep_qos = next_qos;
1443
1444 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1445 if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1446 if (next.thep_qos != THREAD_QOS_UNSPECIFIED)
1447 next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1448 else
1449 next.thep_qos = task_effective.tep_qos_clamp;
1450 }
1451
1452 /*
1453 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1454 * This allows QoS promotions to work properly even after the process is unclamped.
1455 */
1456 next.thep_qos_promote = next.thep_qos;
1457
1458 /* The ceiling only applies to threads that are in the QoS world */
1459 if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1460 next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1461 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1462 }
1463
1464 /* Apply the sync ipc qos override */
1465 assert(requested.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
1466
1467 /*
1468 * The QoS relative priority is only applicable when the original programmer's
1469 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1470 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1471 * since otherwise it would be lower than unclamped threads. Similarly, in the
1472 * presence of boosting, the programmer doesn't know what other actors
1473 * are boosting the thread.
1474 */
1475 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1476 (requested.thrp_qos == next.thep_qos) &&
1477 (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1478 next.thep_qos_relprio = requested.thrp_qos_relprio;
1479 } else {
1480 next.thep_qos_relprio = 0;
1481 }
1482
1483 /* Calculate DARWIN_BG */
1484 boolean_t wants_darwinbg = FALSE;
1485 boolean_t wants_all_sockets_bg = FALSE; /* Do I want my existing sockets to be bg */
1486
1487 /*
1488 * If DARWIN_BG has been requested at either level, it's engaged.
1489 * darwinbg threads always create bg sockets,
1490 * but only some types of darwinbg change the sockets
1491 * after they're created
1492 */
1493 if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg)
1494 wants_all_sockets_bg = wants_darwinbg = TRUE;
1495
1496 if (requested.thrp_pidbind_bg)
1497 wants_all_sockets_bg = wants_darwinbg = TRUE;
1498
1499 if (task_effective.tep_darwinbg)
1500 wants_darwinbg = TRUE;
1501
1502 if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1503 next.thep_qos == THREAD_QOS_MAINTENANCE)
1504 wants_darwinbg = TRUE;
1505
1506 /* Calculate side effects of DARWIN_BG */
1507
1508 if (wants_darwinbg)
1509 next.thep_darwinbg = 1;
1510
1511 if (next.thep_darwinbg || task_effective.tep_new_sockets_bg)
1512 next.thep_new_sockets_bg = 1;
1513
1514 /* Don't use task_effective.tep_all_sockets_bg here */
1515 if (wants_all_sockets_bg)
1516 next.thep_all_sockets_bg = 1;
1517
1518 /* darwinbg implies background QOS (or lower) */
1519 if (next.thep_darwinbg &&
1520 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1521 next.thep_qos = THREAD_QOS_BACKGROUND;
1522 next.thep_qos_relprio = 0;
1523 }
1524
1525 /* Calculate IO policy */
1526
1527 int iopol = THROTTLE_LEVEL_TIER0;
1528
1529 /* Factor in the task's IO policy */
1530 if (next.thep_darwinbg)
1531 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1532
1533 iopol = MAX(iopol, task_effective.tep_io_tier);
1534
1535 /* Look up the associated IO tier value for the QoS class */
1536 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1537
1538 iopol = MAX(iopol, requested.thrp_int_iotier);
1539 iopol = MAX(iopol, requested.thrp_ext_iotier);
1540
1541 next.thep_io_tier = iopol;
1542
1543 /*
1544 * If a QoS override is causing IO to go into a lower tier, we also set
1545 * the passive bit so that a thread doesn't end up stuck in its own throttle
1546 * window when the override goes away.
1547 */
1548 boolean_t qos_io_override_active = FALSE;
1549 if (thread_qos_policy_params.qos_iotier[next.thep_qos] <
1550 thread_qos_policy_params.qos_iotier[requested.thrp_qos])
1551 qos_io_override_active = TRUE;
1552
1553 /* Calculate Passive IO policy */
1554 if (requested.thrp_ext_iopassive ||
1555 requested.thrp_int_iopassive ||
1556 qos_io_override_active ||
1557 task_effective.tep_io_passive )
1558 next.thep_io_passive = 1;
1559
1560 /* Calculate timer QOS */
1561 uint32_t latency_qos = requested.thrp_latency_qos;
1562
1563 latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1564 latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1565
1566 next.thep_latency_qos = latency_qos;
1567
1568 /* Calculate throughput QOS */
1569 uint32_t through_qos = requested.thrp_through_qos;
1570
1571 through_qos = MAX(through_qos, task_effective.tep_through_qos);
1572 through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1573
1574 next.thep_through_qos = through_qos;
1575
1576 if (task_effective.tep_terminated || requested.thrp_terminated) {
1577 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1578 next.thep_terminated = 1;
1579 next.thep_darwinbg = 0;
1580 next.thep_io_tier = THROTTLE_LEVEL_TIER0;
1581 next.thep_qos = THREAD_QOS_UNSPECIFIED;
1582 next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED;
1583 next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1584 }
1585
1586 /*
1587 * Step 3:
1588 * Swap out old policy for new policy
1589 */
1590
1591 struct thread_effective_policy prev = thread->effective_policy;
1592
1593 thread_update_qos_cpu_time_locked(thread);
1594
1595 /* This is the point where the new values become visible to other threads */
1596 thread->effective_policy = next;
1597
1598 /*
1599 * Step 4:
1600 * Pend updates that can't be done while holding the thread lock
1601 */
1602
1603 if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg)
1604 pend_token->tpt_update_sockets = 1;
1605
1606 /* TODO: Doesn't this only need to be done if the throttle went up? */
1607 if (prev.thep_io_tier != next.thep_io_tier)
1608 pend_token->tpt_update_throttle = 1;
1609
1610 /*
1611 * Check for the attributes that sfi_thread_classify() consults,
1612 * and trigger SFI re-evaluation.
1613 */
1614 if (prev.thep_qos != next.thep_qos ||
1615 prev.thep_darwinbg != next.thep_darwinbg )
1616 pend_token->tpt_update_thread_sfi = 1;
1617
1618 /*
1619 * Step 5:
1620 * Update other subsystems as necessary if something has changed
1621 */
1622
1623 /* Check for the attributes that thread_recompute_priority() consults */
1624 if (prev.thep_qos != next.thep_qos ||
1625 prev.thep_qos_relprio != next.thep_qos_relprio ||
1626 prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1627 prev.thep_terminated != next.thep_terminated ||
1628 pend_token->tpt_force_recompute_pri == 1 ||
1629 recompute_priority) {
1630 thread_recompute_priority(thread);
1631 }
1632}
1633
1634
1635/*
1636 * Initiate a thread policy state transition on a thread with its TID
1637 * Useful if you cannot guarantee the thread won't get terminated
1638 * Precondition: No locks are held
1639 * Will take task lock - using the non-tid variant is faster
1640 * if you already have a thread ref.
1641 */
1642void
1643proc_set_thread_policy_with_tid(task_t task,
1644 uint64_t tid,
1645 int category,
1646 int flavor,
1647 int value)
1648{
1649 /* takes task lock, returns ref'ed thread or NULL */
1650 thread_t thread = task_findtid(task, tid);
1651
1652 if (thread == THREAD_NULL)
1653 return;
1654
1655 proc_set_thread_policy(thread, category, flavor, value);
1656
1657 thread_deallocate(thread);
1658}
1659
1660/*
1661 * Initiate a thread policy transition on a thread
1662 * This path supports networking transitions (i.e. darwinbg transitions)
1663 * Precondition: No locks are held
1664 */
1665void
1666proc_set_thread_policy(thread_t thread,
1667 int category,
1668 int flavor,
1669 int value)
1670{
1671 struct task_pend_token pend_token = {};
1672
1673 thread_mtx_lock(thread);
1674
1675 proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1676
1677 thread_mtx_unlock(thread);
1678
1679 thread_policy_update_complete_unlocked(thread, &pend_token);
1680}
1681
1682/*
1683 * Do the things that can't be done while holding a thread mutex.
1684 * These are set up to call back into thread policy to get the latest value,
1685 * so they don't have to be synchronized with the update.
1686 * The only required semantic is 'call this sometime after updating effective policy'
1687 *
1688 * Precondition: Thread mutex is not held
1689 *
1690 * This may be called with the task lock held, but in that case it won't be
1691 * called with tpt_update_sockets set.
1692 */
1693void
1694thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1695{
1696#ifdef MACH_BSD
1697 if (pend_token->tpt_update_sockets)
1698 proc_apply_task_networkbg(thread->task->bsd_info, thread);
1699#endif /* MACH_BSD */
1700
1701 if (pend_token->tpt_update_throttle)
1702 rethrottle_thread(thread->uthread);
1703
1704 if (pend_token->tpt_update_thread_sfi)
1705 sfi_reevaluate(thread);
1706}
1707
1708/*
1709 * Set and update thread policy
1710 * Thread mutex might be held
1711 */
1712static void
1713proc_set_thread_policy_locked(thread_t thread,
1714 int category,
1715 int flavor,
1716 int value,
1717 int value2,
1718 task_pend_token_t pend_token)
1719{
1720 spl_t s = splsched();
1721 thread_lock(thread);
1722
1723 proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1724
1725 thread_unlock(thread);
1726 splx(s);
1727}
1728
1729/*
1730 * Set and update thread policy
1731 * Thread spinlock is held
1732 */
1733static void
1734proc_set_thread_policy_spinlocked(thread_t thread,
1735 int category,
1736 int flavor,
1737 int value,
1738 int value2,
1739 task_pend_token_t pend_token)
1740{
1741 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1742 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1743 thread_tid(thread), threquested_0(thread),
1744 threquested_1(thread), value, 0);
1745
1746 thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2);
1747
1748 thread_policy_update_spinlocked(thread, FALSE, pend_token);
1749
1750 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1751 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1752 thread_tid(thread), threquested_0(thread),
1753 threquested_1(thread), tpending(pend_token), 0);
1754}
1755
1756/*
1757 * Set the requested state for a specific flavor to a specific value.
1758 */
1759static void
1760thread_set_requested_policy_spinlocked(thread_t thread,
1761 int category,
1762 int flavor,
1763 int value,
1764 int value2)
1765{
1766 int tier, passive;
1767
1768 struct thread_requested_policy requested = thread->requested_policy;
1769
1770 switch (flavor) {
1771
1772 /* Category: EXTERNAL and INTERNAL, thread and task */
1773
1774 case TASK_POLICY_DARWIN_BG:
1775 if (category == TASK_POLICY_EXTERNAL)
1776 requested.thrp_ext_darwinbg = value;
1777 else
1778 requested.thrp_int_darwinbg = value;
1779 break;
1780
1781 case TASK_POLICY_IOPOL:
1782 proc_iopol_to_tier(value, &tier, &passive);
1783 if (category == TASK_POLICY_EXTERNAL) {
1784 requested.thrp_ext_iotier = tier;
1785 requested.thrp_ext_iopassive = passive;
1786 } else {
1787 requested.thrp_int_iotier = tier;
1788 requested.thrp_int_iopassive = passive;
1789 }
1790 break;
1791
1792 case TASK_POLICY_IO:
1793 if (category == TASK_POLICY_EXTERNAL)
1794 requested.thrp_ext_iotier = value;
1795 else
1796 requested.thrp_int_iotier = value;
1797 break;
1798
1799 case TASK_POLICY_PASSIVE_IO:
1800 if (category == TASK_POLICY_EXTERNAL)
1801 requested.thrp_ext_iopassive = value;
1802 else
1803 requested.thrp_int_iopassive = value;
1804 break;
1805
1806 /* Category: ATTRIBUTE, thread only */
1807
1808 case TASK_POLICY_PIDBIND_BG:
1809 assert(category == TASK_POLICY_ATTRIBUTE);
1810 requested.thrp_pidbind_bg = value;
1811 break;
1812
1813 case TASK_POLICY_LATENCY_QOS:
1814 assert(category == TASK_POLICY_ATTRIBUTE);
1815 requested.thrp_latency_qos = value;
1816 break;
1817
1818 case TASK_POLICY_THROUGH_QOS:
1819 assert(category == TASK_POLICY_ATTRIBUTE);
1820 requested.thrp_through_qos = value;
1821 break;
1822
1823 case TASK_POLICY_QOS:
1824 assert(category == TASK_POLICY_ATTRIBUTE);
1825 requested.thrp_qos = value;
1826 break;
1827
1828 case TASK_POLICY_QOS_OVERRIDE:
1829 assert(category == TASK_POLICY_ATTRIBUTE);
1830 requested.thrp_qos_override = value;
1831 break;
1832
1833 case TASK_POLICY_QOS_AND_RELPRIO:
1834 assert(category == TASK_POLICY_ATTRIBUTE);
1835 requested.thrp_qos = value;
1836 requested.thrp_qos_relprio = value2;
1837 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1838 break;
1839
1840 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
1841 assert(category == TASK_POLICY_ATTRIBUTE);
1842 requested.thrp_qos_workq_override = value;
1843 break;
1844
1845 case TASK_POLICY_QOS_PROMOTE:
1846 assert(category == TASK_POLICY_ATTRIBUTE);
1847 requested.thrp_qos_promote = value;
1848 break;
1849
1850 case TASK_POLICY_QOS_IPC_OVERRIDE:
1851 assert(category == TASK_POLICY_ATTRIBUTE);
1852 requested.thrp_qos_ipc_override = value;
1853 break;
1854
1855 case TASK_POLICY_TERMINATED:
1856 assert(category == TASK_POLICY_ATTRIBUTE);
1857 requested.thrp_terminated = value;
1858 break;
1859
1860 default:
1861 panic("unknown task policy: %d %d %d", category, flavor, value);
1862 break;
1863 }
1864
1865 thread->requested_policy = requested;
1866}
1867
1868/*
1869 * Gets what you set. Effective values may be different.
1870 * Precondition: No locks are held
1871 */
1872int
1873proc_get_thread_policy(thread_t thread,
1874 int category,
1875 int flavor)
1876{
1877 int value = 0;
1878 thread_mtx_lock(thread);
1879 value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
1880 thread_mtx_unlock(thread);
1881 return value;
1882}
1883
1884static int
1885proc_get_thread_policy_locked(thread_t thread,
1886 int category,
1887 int flavor,
1888 int* value2)
1889{
1890 int value = 0;
1891
1892 spl_t s = splsched();
1893 thread_lock(thread);
1894
1895 value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
1896
1897 thread_unlock(thread);
1898 splx(s);
1899
1900 return value;
1901}
1902
1903/*
1904 * Gets what you set. Effective values may be different.
1905 */
1906static int
1907thread_get_requested_policy_spinlocked(thread_t thread,
1908 int category,
1909 int flavor,
1910 int* value2)
1911{
1912 int value = 0;
1913
1914 struct thread_requested_policy requested = thread->requested_policy;
1915
1916 switch (flavor) {
1917 case TASK_POLICY_DARWIN_BG:
1918 if (category == TASK_POLICY_EXTERNAL)
1919 value = requested.thrp_ext_darwinbg;
1920 else
1921 value = requested.thrp_int_darwinbg;
1922 break;
1923 case TASK_POLICY_IOPOL:
1924 if (category == TASK_POLICY_EXTERNAL)
1925 value = proc_tier_to_iopol(requested.thrp_ext_iotier,
1926 requested.thrp_ext_iopassive);
1927 else
1928 value = proc_tier_to_iopol(requested.thrp_int_iotier,
1929 requested.thrp_int_iopassive);
1930 break;
1931 case TASK_POLICY_IO:
1932 if (category == TASK_POLICY_EXTERNAL)
1933 value = requested.thrp_ext_iotier;
1934 else
1935 value = requested.thrp_int_iotier;
1936 break;
1937 case TASK_POLICY_PASSIVE_IO:
1938 if (category == TASK_POLICY_EXTERNAL)
1939 value = requested.thrp_ext_iopassive;
1940 else
1941 value = requested.thrp_int_iopassive;
1942 break;
1943 case TASK_POLICY_QOS:
1944 assert(category == TASK_POLICY_ATTRIBUTE);
1945 value = requested.thrp_qos;
1946 break;
1947 case TASK_POLICY_QOS_OVERRIDE:
1948 assert(category == TASK_POLICY_ATTRIBUTE);
1949 value = requested.thrp_qos_override;
1950 break;
1951 case TASK_POLICY_LATENCY_QOS:
1952 assert(category == TASK_POLICY_ATTRIBUTE);
1953 value = requested.thrp_latency_qos;
1954 break;
1955 case TASK_POLICY_THROUGH_QOS:
1956 assert(category == TASK_POLICY_ATTRIBUTE);
1957 value = requested.thrp_through_qos;
1958 break;
1959 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
1960 assert(category == TASK_POLICY_ATTRIBUTE);
1961 value = requested.thrp_qos_workq_override;
1962 break;
1963 case TASK_POLICY_QOS_AND_RELPRIO:
1964 assert(category == TASK_POLICY_ATTRIBUTE);
1965 assert(value2 != NULL);
1966 value = requested.thrp_qos;
1967 *value2 = requested.thrp_qos_relprio;
1968 break;
1969 case TASK_POLICY_QOS_PROMOTE:
1970 assert(category == TASK_POLICY_ATTRIBUTE);
1971 value = requested.thrp_qos_promote;
1972 break;
1973 case TASK_POLICY_QOS_IPC_OVERRIDE:
1974 assert(category == TASK_POLICY_ATTRIBUTE);
1975 value = requested.thrp_qos_ipc_override;
1976 break;
1977 case TASK_POLICY_TERMINATED:
1978 assert(category == TASK_POLICY_ATTRIBUTE);
1979 value = requested.thrp_terminated;
1980 break;
1981
1982 default:
1983 panic("unknown policy_flavor %d", flavor);
1984 break;
1985 }
1986
1987 return value;
1988}
1989
1990/*
1991 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
1992 *
1993 * NOTE: This accessor does not take the task or thread lock.
1994 * Notifications of state updates need to be externally synchronized with state queries.
1995 * This routine *MUST* remain interrupt safe, as it is potentially invoked
1996 * within the context of a timer interrupt.
1997 *
1998 * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
1999 * Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2000 * I don't think that cost is worth not having the right answer.
2001 */
2002int
2003proc_get_effective_thread_policy(thread_t thread,
2004 int flavor)
2005{
2006 int value = 0;
2007
2008 switch (flavor) {
2009 case TASK_POLICY_DARWIN_BG:
2010 /*
2011 * This call is used within the timer layer, as well as
2012 * prioritizing requests to the graphics system.
2013 * It also informs SFI and originator-bg-state.
2014 * Returns 1 for background mode, 0 for normal mode
2015 */
2016
2017 value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2018 break;
2019 case TASK_POLICY_IO:
2020 /*
2021 * The I/O system calls here to find out what throttling tier to apply to an operation.
2022 * Returns THROTTLE_LEVEL_* values
2023 */
2024 value = thread->effective_policy.thep_io_tier;
2025 if (thread->iotier_override != THROTTLE_LEVEL_NONE)
2026 value = MIN(value, thread->iotier_override);
2027 break;
2028 case TASK_POLICY_PASSIVE_IO:
2029 /*
2030 * The I/O system calls here to find out whether an operation should be passive.
2031 * (i.e. not cause operations with lower throttle tiers to be throttled)
2032 * Returns 1 for passive mode, 0 for normal mode
2033 *
2034 * If an override is causing IO to go into a lower tier, we also set
2035 * the passive bit so that a thread doesn't end up stuck in its own throttle
2036 * window when the override goes away.
2037 */
2038 value = thread->effective_policy.thep_io_passive ? 1 : 0;
2039 if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2040 thread->iotier_override < thread->effective_policy.thep_io_tier)
2041 value = 1;
2042 break;
2043 case TASK_POLICY_ALL_SOCKETS_BG:
2044 /*
2045 * do_background_socket() calls this to determine whether
2046 * it should change the thread's sockets
2047 * Returns 1 for background mode, 0 for normal mode
2048 * This consults both thread and task so un-DBGing a thread while the task is BG
2049 * doesn't get you out of the network throttle.
2050 */
2051 value = (thread->effective_policy.thep_all_sockets_bg ||
2052 thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2053 break;
2054 case TASK_POLICY_NEW_SOCKETS_BG:
2055 /*
2056 * socreate() calls this to determine if it should mark a new socket as background
2057 * Returns 1 for background mode, 0 for normal mode
2058 */
2059 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2060 break;
2061 case TASK_POLICY_LATENCY_QOS:
2062 /*
2063 * timer arming calls into here to find out the timer coalescing level
2064 * Returns a latency QoS tier (0-6)
2065 */
2066 value = thread->effective_policy.thep_latency_qos;
2067 break;
2068 case TASK_POLICY_THROUGH_QOS:
2069 /*
2070 * This value is passed into the urgency callout from the scheduler
2071 * to the performance management subsystem.
2072 *
2073 * Returns a throughput QoS tier (0-6)
2074 */
2075 value = thread->effective_policy.thep_through_qos;
2076 break;
2077 case TASK_POLICY_QOS:
2078 /*
2079 * This is communicated to the performance management layer and SFI.
2080 *
2081 * Returns a QoS policy tier
2082 */
2083 value = thread->effective_policy.thep_qos;
2084 break;
2085 default:
2086 panic("unknown thread policy flavor %d", flavor);
2087 break;
2088 }
2089
2090 return value;
2091}
2092
2093
2094/*
2095 * (integer_t) casts limit the number of bits we can fit here
2096 * this interface is deprecated and replaced by the _EXT struct ?
2097 */
2098static void
2099proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2100{
2101 uint64_t bits = 0;
2102 struct thread_requested_policy requested = thread->requested_policy;
2103
2104 bits |= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
2105 bits |= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
2106 bits |= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2107 bits |= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2108 bits |= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
2109 bits |= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2110
2111 bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2112 bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0);
2113
2114 bits |= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0);
2115
2116 bits |= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2117 bits |= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2118
2119 info->requested = (integer_t) bits;
2120 bits = 0;
2121
2122 struct thread_effective_policy effective = thread->effective_policy;
2123
2124 bits |= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : 0);
2125
2126 bits |= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2127 bits |= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : 0);
2128 bits |= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2129 bits |= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2130
2131 bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2132
2133 bits |= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2134 bits |= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2135
2136 info->effective = (integer_t)bits;
2137 bits = 0;
2138
2139 info->pending = 0;
2140}
2141
2142/*
2143 * Sneakily trace either the task and thread requested
2144 * or just the thread requested, depending on if we have enough room.
2145 * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2146 *
2147 * LP32 LP64
2148 * threquested_0(thread) thread[0] task[0]
2149 * threquested_1(thread) thread[1] thread[0]
2150 *
2151 */
2152
2153uintptr_t
2154threquested_0(thread_t thread)
2155{
2156 static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2157
2158 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2159
2160 return raw[0];
2161}
2162
2163uintptr_t
2164threquested_1(thread_t thread)
2165{
2166#if defined __LP64__
2167 return *(uintptr_t*)&thread->task->requested_policy;
2168#else
2169 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2170 return raw[1];
2171#endif
2172}
2173
2174uintptr_t
2175theffective_0(thread_t thread)
2176{
2177 static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2178
2179 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2180 return raw[0];
2181}
2182
2183uintptr_t
2184theffective_1(thread_t thread)
2185{
2186#if defined __LP64__
2187 return *(uintptr_t*)&thread->task->effective_policy;
2188#else
2189 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2190 return raw[1];
2191#endif
2192}
2193
2194
2195/*
2196 * Set an override on the thread which is consulted with a
2197 * higher priority than the task/thread policy. This should
2198 * only be set for temporary grants until the thread
2199 * returns to the userspace boundary
2200 *
2201 * We use atomic operations to swap in the override, with
2202 * the assumption that the thread itself can
2203 * read the override and clear it on return to userspace.
2204 *
2205 * No locking is performed, since it is acceptable to see
2206 * a stale override for one loop through throttle_lowpri_io().
2207 * However a thread reference must be held on the thread.
2208 */
2209
2210void set_thread_iotier_override(thread_t thread, int policy)
2211{
2212 int current_override;
2213
2214 /* Let most aggressive I/O policy win until user boundary */
2215 do {
2216 current_override = thread->iotier_override;
2217
2218 if (current_override != THROTTLE_LEVEL_NONE)
2219 policy = MIN(current_override, policy);
2220
2221 if (current_override == policy) {
2222 /* no effective change */
2223 return;
2224 }
2225 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2226
2227 /*
2228 * Since the thread may be currently throttled,
2229 * re-evaluate tiers and potentially break out
2230 * of an msleep
2231 */
2232 rethrottle_thread(thread->uthread);
2233}
2234
2235/*
2236 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2237 * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2238 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2239 * priority thread. In these cases, we attempt to propagate the priority token, as long
2240 * as the subsystem informs us of the relationships between the threads. The userspace
2241 * synchronization subsystem should maintain the information of owner->resource and
2242 * resource->waiters itself.
2243 */
2244
2245/*
2246 * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2247 * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2248 * to be handled specially in the future, but for now it's fine to slam
2249 * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2250 */
2251static void canonicalize_resource_and_type(user_addr_t *resource, int *resource_type) {
2252 if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2253 /* Map all input resource/type to a single one */
2254 *resource = USER_ADDR_NULL;
2255 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2256 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2257 /* no transform */
2258 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2259 /* Map all mutex overrides to a single one, to avoid memory overhead */
2260 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2261 *resource = USER_ADDR_NULL;
2262 }
2263 }
2264}
2265
2266/* This helper routine finds an existing override if known. Locking should be done by caller */
2267static struct thread_qos_override *
2268find_qos_override(thread_t thread,
2269 user_addr_t resource,
2270 int resource_type)
2271{
2272 struct thread_qos_override *override;
2273
2274 override = thread->overrides;
2275 while (override) {
2276 if (override->override_resource == resource &&
2277 override->override_resource_type == resource_type) {
2278 return override;
2279 }
2280
2281 override = override->override_next;
2282 }
2283
2284 return NULL;
2285}
2286
2287static void
2288find_and_decrement_qos_override(thread_t thread,
2289 user_addr_t resource,
2290 int resource_type,
2291 boolean_t reset,
2292 struct thread_qos_override **free_override_list)
2293{
2294 struct thread_qos_override *override, *override_prev;
2295
2296 override_prev = NULL;
2297 override = thread->overrides;
2298 while (override) {
2299 struct thread_qos_override *override_next = override->override_next;
2300
2301 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2302 (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2303
2304 if (reset) {
2305 override->override_contended_resource_count = 0;
2306 } else {
2307 override->override_contended_resource_count--;
2308 }
2309
2310 if (override->override_contended_resource_count == 0) {
2311 if (override_prev == NULL) {
2312 thread->overrides = override_next;
2313 } else {
2314 override_prev->override_next = override_next;
2315 }
2316
2317 /* Add to out-param for later zfree */
2318 override->override_next = *free_override_list;
2319 *free_override_list = override;
2320 } else {
2321 override_prev = override;
2322 }
2323
2324 if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2325 return;
2326 }
2327 } else {
2328 override_prev = override;
2329 }
2330
2331 override = override_next;
2332 }
2333}
2334
2335/* This helper recalculates the current requested override using the policy selected at boot */
2336static int
2337calculate_requested_qos_override(thread_t thread)
2338{
2339 if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2340 return THREAD_QOS_UNSPECIFIED;
2341 }
2342
2343 /* iterate over all overrides and calculate MAX */
2344 struct thread_qos_override *override;
2345 int qos_override = THREAD_QOS_UNSPECIFIED;
2346
2347 override = thread->overrides;
2348 while (override) {
2349 qos_override = MAX(qos_override, override->override_qos);
2350 override = override->override_next;
2351 }
2352
2353 return qos_override;
2354}
2355
2356/*
2357 * Returns:
2358 * - 0 on success
2359 * - EINVAL if some invalid input was passed
2360 */
2361static int
2362proc_thread_qos_add_override_internal(thread_t thread,
2363 int override_qos,
2364 boolean_t first_override_for_resource,
2365 user_addr_t resource,
2366 int resource_type)
2367{
2368 struct task_pend_token pend_token = {};
2369 int rc = 0;
2370
2371 thread_mtx_lock(thread);
2372
2373 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2374 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2375
2376 DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2377 uint64_t, thread->requested_policy.thrp_qos,
2378 uint64_t, thread->effective_policy.thep_qos,
2379 int, override_qos, boolean_t, first_override_for_resource);
2380
2381 struct thread_qos_override *override;
2382 struct thread_qos_override *override_new = NULL;
2383 int new_qos_override, prev_qos_override;
2384 int new_effective_qos;
2385
2386 canonicalize_resource_and_type(&resource, &resource_type);
2387
2388 override = find_qos_override(thread, resource, resource_type);
2389 if (first_override_for_resource && !override) {
2390 /* We need to allocate a new object. Drop the thread lock and
2391 * recheck afterwards in case someone else added the override
2392 */
2393 thread_mtx_unlock(thread);
2394 override_new = zalloc(thread_qos_override_zone);
2395 thread_mtx_lock(thread);
2396 override = find_qos_override(thread, resource, resource_type);
2397 }
2398 if (first_override_for_resource && override) {
2399 /* Someone else already allocated while the thread lock was dropped */
2400 override->override_contended_resource_count++;
2401 } else if (!override && override_new) {
2402 override = override_new;
2403 override_new = NULL;
2404 override->override_next = thread->overrides;
2405 /* since first_override_for_resource was TRUE */
2406 override->override_contended_resource_count = 1;
2407 override->override_resource = resource;
2408 override->override_resource_type = resource_type;
2409 override->override_qos = THREAD_QOS_UNSPECIFIED;
2410 thread->overrides = override;
2411 }
2412
2413 if (override) {
2414 if (override->override_qos == THREAD_QOS_UNSPECIFIED)
2415 override->override_qos = override_qos;
2416 else
2417 override->override_qos = MAX(override->override_qos, override_qos);
2418 }
2419
2420 /* Determine how to combine the various overrides into a single current
2421 * requested override
2422 */
2423 new_qos_override = calculate_requested_qos_override(thread);
2424
2425 prev_qos_override = proc_get_thread_policy_locked(thread,
2426 TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2427
2428 if (new_qos_override != prev_qos_override) {
2429 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2430 TASK_POLICY_QOS_OVERRIDE,
2431 new_qos_override, 0, &pend_token);
2432 }
2433
2434 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2435
2436 thread_mtx_unlock(thread);
2437
2438 thread_policy_update_complete_unlocked(thread, &pend_token);
2439
2440 if (override_new) {
2441 zfree(thread_qos_override_zone, override_new);
2442 }
2443
2444 DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2445 int, new_qos_override, int, new_effective_qos, int, rc);
2446
2447 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2448 new_qos_override, resource, resource_type, 0, 0);
2449
2450 return rc;
2451}
2452
2453int
2454proc_thread_qos_add_override(task_t task,
2455 thread_t thread,
2456 uint64_t tid,
2457 int override_qos,
2458 boolean_t first_override_for_resource,
2459 user_addr_t resource,
2460 int resource_type)
2461{
2462 boolean_t has_thread_reference = FALSE;
2463 int rc = 0;
2464
2465 if (thread == THREAD_NULL) {
2466 thread = task_findtid(task, tid);
2467 /* returns referenced thread */
2468
2469 if (thread == THREAD_NULL) {
2470 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2471 tid, 0, 0xdead, 0, 0);
2472 return ESRCH;
2473 }
2474 has_thread_reference = TRUE;
2475 } else {
2476 assert(thread->task == task);
2477 }
2478 rc = proc_thread_qos_add_override_internal(thread, override_qos,
2479 first_override_for_resource, resource, resource_type);
2480 if (has_thread_reference) {
2481 thread_deallocate(thread);
2482 }
2483
2484 return rc;
2485}
2486
2487static void
2488proc_thread_qos_remove_override_internal(thread_t thread,
2489 user_addr_t resource,
2490 int resource_type,
2491 boolean_t reset)
2492{
2493 struct task_pend_token pend_token = {};
2494
2495 struct thread_qos_override *deferred_free_override_list = NULL;
2496 int new_qos_override, prev_qos_override, new_effective_qos;
2497
2498 thread_mtx_lock(thread);
2499
2500 canonicalize_resource_and_type(&resource, &resource_type);
2501
2502 find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2503
2504 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2505 thread_tid(thread), resource, reset, 0, 0);
2506
2507 DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2508 uint64_t, thread->requested_policy.thrp_qos,
2509 uint64_t, thread->effective_policy.thep_qos);
2510
2511 /* Determine how to combine the various overrides into a single current requested override */
2512 new_qos_override = calculate_requested_qos_override(thread);
2513
2514 spl_t s = splsched();
2515 thread_lock(thread);
2516
2517 /*
2518 * The override chain and therefore the value of the current override is locked with thread mutex,
2519 * so we can do a get/set without races. However, the rest of thread policy is locked under the spinlock.
2520 * This means you can't change the current override from a spinlock-only setter.
2521 */
2522 prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2523
2524 if (new_qos_override != prev_qos_override)
2525 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2526
2527 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2528
2529 thread_unlock(thread);
2530 splx(s);
2531
2532 thread_mtx_unlock(thread);
2533
2534 thread_policy_update_complete_unlocked(thread, &pend_token);
2535
2536 while (deferred_free_override_list) {
2537 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2538
2539 zfree(thread_qos_override_zone, deferred_free_override_list);
2540 deferred_free_override_list = override_next;
2541 }
2542
2543 DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2544 int, new_qos_override, int, new_effective_qos);
2545
2546 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2547 thread_tid(thread), 0, 0, 0, 0);
2548}
2549
2550int
2551proc_thread_qos_remove_override(task_t task,
2552 thread_t thread,
2553 uint64_t tid,
2554 user_addr_t resource,
2555 int resource_type)
2556{
2557 boolean_t has_thread_reference = FALSE;
2558
2559 if (thread == THREAD_NULL) {
2560 thread = task_findtid(task, tid);
2561 /* returns referenced thread */
2562
2563 if (thread == THREAD_NULL) {
2564 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2565 tid, 0, 0xdead, 0, 0);
2566 return ESRCH;
2567 }
2568 has_thread_reference = TRUE;
2569 } else {
2570 assert(task == thread->task);
2571 }
2572
2573 proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2574
2575 if (has_thread_reference)
2576 thread_deallocate(thread);
2577
2578 return 0;
2579}
2580
2581/* Deallocate before thread termination */
2582void proc_thread_qos_deallocate(thread_t thread)
2583{
2584 /* This thread must have no more IPC overrides. */
2585 assert(thread->ipc_overrides == 0);
2586 assert(thread->requested_policy.thrp_qos_ipc_override == THREAD_QOS_UNSPECIFIED);
2587 assert(thread->sync_ipc_overrides == 0);
2588 assert(thread->requested_policy.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
2589
2590 /*
2591 * Clear out any lingering override objects.
2592 */
2593 struct thread_qos_override *override;
2594
2595 thread_mtx_lock(thread);
2596 override = thread->overrides;
2597 thread->overrides = NULL;
2598 thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2599 /* We don't need to re-evaluate thread policy here because the thread has already exited */
2600 thread_mtx_unlock(thread);
2601
2602 while (override) {
2603 struct thread_qos_override *override_next = override->override_next;
2604
2605 zfree(thread_qos_override_zone, override);
2606 override = override_next;
2607 }
2608}
2609
2610/*
2611 * Set up the primordial thread's QoS
2612 */
2613void
2614task_set_main_thread_qos(task_t task, thread_t thread) {
2615 struct task_pend_token pend_token = {};
2616
2617 assert(thread->task == task);
2618
2619 thread_mtx_lock(thread);
2620
2621 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2622 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2623 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2624 thread->requested_policy.thrp_qos, 0);
2625
2626 int primordial_qos = task_compute_main_thread_qos(task);
2627
2628 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS,
2629 primordial_qos, 0, &pend_token);
2630
2631 thread_mtx_unlock(thread);
2632
2633 thread_policy_update_complete_unlocked(thread, &pend_token);
2634
2635 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2636 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2637 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2638 primordial_qos, 0);
2639}
2640
2641/*
2642 * KPI for pthread kext
2643 *
2644 * Return a good guess at what the initial manager QoS will be
2645 * Dispatch can override this in userspace if it so chooses
2646 */
2647int
2648task_get_default_manager_qos(task_t task)
2649{
2650 int primordial_qos = task_compute_main_thread_qos(task);
2651
2652 if (primordial_qos == THREAD_QOS_LEGACY)
2653 primordial_qos = THREAD_QOS_USER_INITIATED;
2654
2655 return primordial_qos;
2656}
2657
2658/*
2659 * Check if the user promotion on thread has changed
2660 * and apply it.
2661 *
2662 * thread locked on entry, might drop the thread lock
2663 * and reacquire it.
2664 */
2665boolean_t
2666thread_recompute_user_promotion_locked(thread_t thread)
2667{
2668 boolean_t needs_update = FALSE;
2669 struct task_pend_token pend_token = {};
2670 int user_promotion_basepri = MIN(thread_get_inheritor_turnstile_priority(thread), MAXPRI_USER);
2671 int old_base_pri = thread->base_pri;
2672 thread_qos_t qos_promotion;
2673
2674 /* Check if user promotion has changed */
2675 if (thread->user_promotion_basepri == user_promotion_basepri) {
2676 return needs_update;
2677 } else {
2678 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2679 (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
2680 thread_tid(thread),
2681 user_promotion_basepri,
2682 thread->user_promotion_basepri,
2683 0, 0);
2684 }
2685
2686 /* Update the user promotion base pri */
2687 thread->user_promotion_basepri = user_promotion_basepri;
2688 pend_token.tpt_force_recompute_pri = 1;
2689
2690 if (user_promotion_basepri <= MAXPRI_THROTTLE) {
2691 qos_promotion = THREAD_QOS_UNSPECIFIED;
2692 } else {
2693 qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
2694 }
2695
2696 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2697 TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
2698
2699 if (thread_get_waiting_turnstile(thread) &&
2700 thread->base_pri != old_base_pri) {
2701 needs_update = TRUE;
2702 }
2703
2704 thread_unlock(thread);
2705
2706 thread_policy_update_complete_unlocked(thread, &pend_token);
2707
2708 thread_lock(thread);
2709
2710 return needs_update;
2711}
2712
2713/*
2714 * Convert the thread user promotion base pri to qos for threads in qos world.
2715 * For priority above UI qos, the qos would be set to UI.
2716 */
2717thread_qos_t
2718thread_user_promotion_qos_for_pri(int priority)
2719{
2720 int qos;
2721 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
2722 if (thread_qos_policy_params.qos_pri[qos] <= priority) {
2723 return qos;
2724 }
2725 }
2726 return THREAD_QOS_MAINTENANCE;
2727}
2728
2729/*
2730 * Set the thread's QoS IPC override
2731 * Owned by the IPC subsystem
2732 *
2733 * May be called with spinlocks held, but not spinlocks
2734 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
2735 *
2736 * One 'add' must be balanced by one 'drop'.
2737 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
2738 * Before the thread is deallocated, there must be 0 remaining overrides.
2739 */
2740static void
2741thread_ipc_override(thread_t thread,
2742 uint32_t qos_override,
2743 boolean_t is_new_override)
2744{
2745 struct task_pend_token pend_token = {};
2746 boolean_t needs_update;
2747
2748 spl_t s = splsched();
2749 thread_lock(thread);
2750
2751 uint32_t old_override = thread->requested_policy.thrp_qos_ipc_override;
2752
2753 assert(qos_override > THREAD_QOS_UNSPECIFIED);
2754 assert(qos_override < THREAD_QOS_LAST);
2755
2756 if (is_new_override) {
2757 if (thread->ipc_overrides++ == 0) {
2758 /* This add is the first override for this thread */
2759 assert(old_override == THREAD_QOS_UNSPECIFIED);
2760 } else {
2761 /* There are already other overrides in effect for this thread */
2762 assert(old_override > THREAD_QOS_UNSPECIFIED);
2763 }
2764 } else {
2765 /* There must be at least one override (the previous add call) in effect */
2766 assert(thread->ipc_overrides > 0);
2767 assert(old_override > THREAD_QOS_UNSPECIFIED);
2768 }
2769
2770 /*
2771 * We can't allow lowering if there are several IPC overrides because
2772 * the caller can't possibly know the whole truth
2773 */
2774 if (thread->ipc_overrides == 1) {
2775 needs_update = qos_override != old_override;
2776 } else {
2777 needs_update = qos_override > old_override;
2778 }
2779
2780 if (needs_update) {
2781 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2782 TASK_POLICY_QOS_IPC_OVERRIDE,
2783 qos_override, 0, &pend_token);
2784 assert(pend_token.tpt_update_sockets == 0);
2785 }
2786
2787 thread_unlock(thread);
2788 splx(s);
2789
2790 thread_policy_update_complete_unlocked(thread, &pend_token);
2791}
2792
2793void
2794thread_add_ipc_override(thread_t thread,
2795 uint32_t qos_override)
2796{
2797 thread_ipc_override(thread, qos_override, TRUE);
2798}
2799
2800void
2801thread_update_ipc_override(thread_t thread,
2802 uint32_t qos_override)
2803{
2804 thread_ipc_override(thread, qos_override, FALSE);
2805}
2806
2807void
2808thread_drop_ipc_override(thread_t thread)
2809{
2810 struct task_pend_token pend_token = {};
2811
2812 spl_t s = splsched();
2813 thread_lock(thread);
2814
2815 assert(thread->ipc_overrides > 0);
2816
2817 if (--thread->ipc_overrides == 0) {
2818 /*
2819 * There are no more overrides for this thread, so we should
2820 * clear out the saturated override value
2821 */
2822
2823 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2824 TASK_POLICY_QOS_IPC_OVERRIDE, THREAD_QOS_UNSPECIFIED,
2825 0, &pend_token);
2826 }
2827
2828 thread_unlock(thread);
2829 splx(s);
2830
2831 thread_policy_update_complete_unlocked(thread, &pend_token);
2832}
2833
2834/* Get current requested qos / relpri, may be called from spinlock context */
2835thread_qos_t
2836thread_get_requested_qos(thread_t thread, int *relpri)
2837{
2838 int relprio_value = 0;
2839 thread_qos_t qos;
2840
2841 qos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2842 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
2843 if (relpri) *relpri = -relprio_value;
2844 return qos;
2845}
2846
2847/*
2848 * This function will promote the thread priority
2849 * since exec could block other threads calling
2850 * proc_find on the proc. This boost must be removed
2851 * via call to thread_clear_exec_promotion.
2852 *
2853 * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
2854 */
2855void
2856thread_set_exec_promotion(thread_t thread)
2857{
2858 spl_t s = splsched();
2859 thread_lock(thread);
2860
2861 sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
2862
2863 thread_unlock(thread);
2864 splx(s);
2865}
2866
2867/*
2868 * This function will clear the exec thread
2869 * promotion set on the thread by thread_set_exec_promotion.
2870 */
2871void
2872thread_clear_exec_promotion(thread_t thread)
2873{
2874 spl_t s = splsched();
2875 thread_lock(thread);
2876
2877 sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
2878
2879 thread_unlock(thread);
2880 splx(s);
2881}
2882
2883