1/*
2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57#include <mach/boolean.h>
58#include <mach/thread_switch.h>
59#include <ipc/ipc_port.h>
60#include <ipc/ipc_space.h>
61#include <kern/counter.h>
62#include <kern/ipc_kobject.h>
63#include <kern/processor.h>
64#include <kern/sched.h>
65#include <kern/sched_prim.h>
66#include <kern/spl.h>
67#include <kern/task.h>
68#include <kern/thread.h>
69#include <kern/policy_internal.h>
70
71#include <mach/policy.h>
72
73#include <kern/syscall_subr.h>
74#include <mach/mach_host_server.h>
75#include <mach/mach_syscalls.h>
76#include <sys/kdebug.h>
77#include <kern/ast.h>
78
79static void thread_depress_abstime(uint64_t interval);
80static void thread_depress_ms(mach_msg_timeout_t interval);
81
82/* Called from commpage to take a delayed preemption when exiting
83 * the "Preemption Free Zone" (PFZ).
84 */
85kern_return_t
86pfz_exit(
87 __unused struct pfz_exit_args *args)
88{
89 /* For now, nothing special to do. We'll pick up the ASTs on kernel exit. */
90
91 return KERN_SUCCESS;
92}
93
94
95/*
96 * swtch and swtch_pri both attempt to context switch (logic in
97 * thread_block no-ops the context switch if nothing would happen).
98 * A boolean is returned that indicates whether there is anything
99 * else runnable. That's no excuse to spin, though.
100 */
101
102static void
103swtch_continue(void)
104{
105 processor_t myprocessor;
106 boolean_t result;
107
108 disable_preemption();
109 myprocessor = current_processor();
110 result = SCHED(thread_should_yield)(myprocessor, current_thread());
111 enable_preemption();
112
113 ml_delay_on_yield();
114
115 thread_syscall_return(ret: result);
116 /*NOTREACHED*/
117}
118
119boolean_t
120swtch(
121 __unused struct swtch_args *args)
122{
123 processor_t myprocessor;
124
125 disable_preemption();
126 myprocessor = current_processor();
127 if (!SCHED(thread_should_yield)(myprocessor, current_thread())) {
128 mp_enable_preemption();
129
130 return FALSE;
131 }
132 enable_preemption();
133
134 thread_yield_with_continuation(continuation: (thread_continue_t)swtch_continue, NULL);
135}
136
137static void
138swtch_pri_continue(void)
139{
140 processor_t myprocessor;
141 boolean_t result;
142
143 thread_depress_abort(thread: current_thread());
144
145 disable_preemption();
146 myprocessor = current_processor();
147 result = SCHED(thread_should_yield)(myprocessor, current_thread());
148 mp_enable_preemption();
149
150 ml_delay_on_yield();
151
152 thread_syscall_return(ret: result);
153 /*NOTREACHED*/
154}
155
156boolean_t
157swtch_pri(
158 __unused struct swtch_pri_args *args)
159{
160 processor_t myprocessor;
161
162 disable_preemption();
163 myprocessor = current_processor();
164 if (!SCHED(thread_should_yield)(myprocessor, current_thread())) {
165 mp_enable_preemption();
166
167 return FALSE;
168 }
169 enable_preemption();
170
171 thread_depress_abstime(interval: thread_depress_time);
172
173 thread_yield_with_continuation(continuation: (thread_continue_t)swtch_pri_continue, NULL);
174}
175
176static void
177thread_switch_continue(void *parameter, __unused int ret)
178{
179 thread_t self = current_thread();
180 int option = (int)(intptr_t)parameter;
181
182 if (option == SWITCH_OPTION_DEPRESS || option == SWITCH_OPTION_OSLOCK_DEPRESS) {
183 thread_depress_abort(thread: self);
184 }
185
186 ml_delay_on_yield();
187
188 thread_syscall_return(KERN_SUCCESS);
189 /*NOTREACHED*/
190}
191
192/*
193 * thread_switch:
194 *
195 * Context switch. User may supply thread hint.
196 */
197kern_return_t
198thread_switch(
199 struct thread_switch_args *args)
200{
201 thread_t thread = THREAD_NULL;
202 thread_t self = current_thread();
203 mach_port_name_t thread_name = args->thread_name;
204 int option = args->option;
205 mach_msg_timeout_t option_time = args->option_time;
206 uint32_t scale_factor = NSEC_PER_MSEC;
207 boolean_t depress_option = FALSE;
208 boolean_t wait_option = FALSE;
209 wait_interrupt_t interruptible = THREAD_ABORTSAFE;
210 port_intrans_options_t ptt_options = PORT_INTRANS_THREAD_NOT_CURRENT_THREAD;
211
212 /*
213 * Validate and process option.
214 *
215 * OSLock boosting only applies to other threads
216 * in your same task (even if you have a port for
217 * a thread in another task)
218 */
219 switch (option) {
220 case SWITCH_OPTION_NONE:
221 break;
222 case SWITCH_OPTION_WAIT:
223 wait_option = TRUE;
224 break;
225 case SWITCH_OPTION_DEPRESS:
226 depress_option = TRUE;
227 break;
228 case SWITCH_OPTION_DISPATCH_CONTENTION:
229 scale_factor = NSEC_PER_USEC;
230 wait_option = TRUE;
231 interruptible |= THREAD_WAIT_NOREPORT;
232 break;
233 case SWITCH_OPTION_OSLOCK_DEPRESS:
234 depress_option = TRUE;
235 interruptible |= THREAD_WAIT_NOREPORT;
236 ptt_options |= PORT_INTRANS_THREAD_IN_CURRENT_TASK;
237 break;
238 case SWITCH_OPTION_OSLOCK_WAIT:
239 wait_option = TRUE;
240 interruptible |= THREAD_WAIT_NOREPORT;
241 ptt_options |= PORT_INTRANS_THREAD_IN_CURRENT_TASK;
242 break;
243 default:
244 return KERN_INVALID_ARGUMENT;
245 }
246
247 /*
248 * Translate the port name if supplied.
249 */
250 if (thread_name != MACH_PORT_NULL) {
251 thread = port_name_to_thread(port_name: thread_name, options: ptt_options);
252 }
253
254 if (option == SWITCH_OPTION_OSLOCK_DEPRESS || option == SWITCH_OPTION_OSLOCK_WAIT) {
255 if (thread != THREAD_NULL) {
256 /*
257 * Attempt to kick the lock owner up to our same IO throttling tier.
258 * If the thread is currently blocked in throttle_lowpri_io(),
259 * it will immediately break out.
260 *
261 * TODO: SFI break out?
262 */
263 int new_policy = proc_get_effective_thread_policy(thread: self, TASK_POLICY_IO);
264
265 set_thread_iotier_override(thread, policy: new_policy);
266 }
267 }
268
269 /*
270 * Try to handoff if supplied.
271 */
272 if (thread != THREAD_NULL) {
273 spl_t s = splsched();
274
275 /* This may return a different thread if the target is pushing on something */
276 thread_t pulled_thread = thread_run_queue_remove_for_handoff(thread);
277
278 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_THREAD_SWITCH) | DBG_FUNC_NONE,
279 thread_tid(thread), thread->state,
280 pulled_thread ? TRUE : FALSE, 0, 0);
281
282 if (pulled_thread != THREAD_NULL) {
283 /* We can't be dropping the last ref here */
284 thread_deallocate_safe(thread);
285
286 if (wait_option) {
287 assert_wait_timeout(event: (event_t)assert_wait_timeout, interruptible,
288 interval: option_time, scale_factor);
289 } else if (depress_option) {
290 thread_depress_ms(interval: option_time);
291 }
292
293 thread_run(self, continuation: thread_switch_continue, parameter: (void *)(intptr_t)option, new_thread: pulled_thread);
294 __builtin_unreachable();
295 }
296
297 splx(s);
298
299 thread_deallocate(thread);
300 }
301
302 if (wait_option) {
303 assert_wait_timeout(event: (event_t)assert_wait_timeout, interruptible, interval: option_time, scale_factor);
304 } else {
305 disable_preemption();
306 bool should_yield = SCHED(thread_should_yield)(current_processor(), current_thread());
307 enable_preemption();
308
309 if (should_yield == false) {
310 /* Early-return if yielding to the scheduler will not be beneficial */
311 return KERN_SUCCESS;
312 }
313
314 if (depress_option) {
315 thread_depress_ms(interval: option_time);
316 }
317 }
318
319 thread_yield_with_continuation(continuation: thread_switch_continue, parameter: (void *)(intptr_t)option);
320 __builtin_unreachable();
321}
322
323void
324thread_yield_with_continuation(
325 thread_continue_t continuation,
326 void *parameter)
327{
328 assert(continuation);
329 thread_block_reason(continuation, parameter, AST_YIELD);
330 __builtin_unreachable();
331}
332
333/* This function is called after an assert_wait(), therefore it must not
334 * cause another wait until after the thread_run() or thread_block()
335 *
336 * Following are the calling convention for thread ref deallocation.
337 *
338 * 1) If no continuation is provided, then thread ref is consumed.
339 * (thread_handoff_deallocate convention).
340 *
341 * 2) If continuation is provided with option THREAD_HANDOFF_SETRUN_NEEDED
342 * then thread ref is always consumed.
343 *
344 * 3) If continuation is provided with option THREAD_HANDOFF_NONE then thread
345 * ref is not consumed and it is upto the continuation to deallocate
346 * the thread reference.
347 */
348static wait_result_t
349thread_handoff_internal(thread_t thread, thread_continue_t continuation,
350 void *parameter, thread_handoff_option_t option)
351{
352 thread_t self = current_thread();
353
354 /*
355 * Try to handoff if supplied.
356 */
357 if (thread != THREAD_NULL) {
358 spl_t s = splsched();
359
360 thread_t pulled_thread = thread_prepare_for_handoff(thread, option);
361
362 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_THREAD_SWITCH) | DBG_FUNC_NONE,
363 thread_tid(thread), thread->state,
364 pulled_thread ? TRUE : FALSE, 0, 0);
365
366 /* Deallocate thread ref if needed */
367 if (continuation == NULL || (option & THREAD_HANDOFF_SETRUN_NEEDED)) {
368 /* Use the safe version of thread deallocate */
369 thread_deallocate_safe(thread);
370 }
371
372 if (pulled_thread != THREAD_NULL) {
373 int result = thread_run(self, continuation, parameter, new_thread: pulled_thread);
374
375 splx(s);
376 return result;
377 }
378
379 splx(s);
380 }
381
382 int result = thread_block_parameter(continuation, parameter);
383 return result;
384}
385
386void
387thread_handoff_parameter(thread_t thread, thread_continue_t continuation,
388 void *parameter, thread_handoff_option_t option)
389{
390 thread_handoff_internal(thread, continuation, parameter, option);
391 panic("NULL continuation passed to %s", __func__);
392 __builtin_unreachable();
393}
394
395wait_result_t
396thread_handoff_deallocate(thread_t thread, thread_handoff_option_t option)
397{
398 return thread_handoff_internal(thread, NULL, NULL, option);
399}
400
401/*
402 * Thread depression
403 *
404 * This mechanism drops a thread to priority 0 in order for it to yield to
405 * all other runnnable threads on the system. It can be canceled or timed out,
406 * whereupon the thread goes back to where it was.
407 *
408 * Note that TH_SFLAG_DEPRESS and TH_SFLAG_POLLDEPRESS are never set at the
409 * same time. DEPRESS always defers to POLLDEPRESS.
410 *
411 * DEPRESS only lasts across a single thread_block call, and never returns
412 * to userspace.
413 * POLLDEPRESS can be active anywhere up until thread termination.
414 */
415
416void
417thread_depress_timer_setup(thread_t self)
418{
419 self->depress_timer = kalloc_type(struct timer_call,
420 Z_ZERO | Z_WAITOK | Z_NOFAIL);
421 timer_call_setup(call: self->depress_timer, func: thread_depress_expire, param0: self);
422}
423
424/*
425 * Depress thread's priority to lowest possible for the specified interval,
426 * with an interval of zero resulting in no timeout being scheduled.
427 *
428 * Must block with AST_YIELD afterwards to take effect
429 */
430void
431thread_depress_abstime(uint64_t interval)
432{
433 thread_t self = current_thread();
434
435 spl_t s = splsched();
436 thread_lock(self);
437
438 assert((self->sched_flags & TH_SFLAG_DEPRESS) == 0);
439
440 if ((self->sched_flags & TH_SFLAG_POLLDEPRESS) == 0) {
441 self->sched_flags |= TH_SFLAG_DEPRESS;
442 thread_recompute_sched_pri(thread: self, options: SETPRI_LAZY);
443
444 if (interval != 0) {
445 uint64_t deadline;
446
447 clock_absolutetime_interval_to_deadline(abstime: interval, result: &deadline);
448 if (!timer_call_enter(call: self->depress_timer, deadline, TIMER_CALL_USER_CRITICAL)) {
449 self->depress_timer_active++;
450 }
451 }
452 }
453
454 thread_unlock(self);
455 splx(s);
456}
457
458void
459thread_depress_ms(mach_msg_timeout_t interval)
460{
461 uint64_t abstime;
462
463 clock_interval_to_absolutetime_interval(interval, NSEC_PER_MSEC, result: &abstime);
464 thread_depress_abstime(interval: abstime);
465}
466
467/*
468 * Priority depression expiration.
469 */
470void
471thread_depress_expire(void *p0,
472 __unused void *p1)
473{
474 thread_t thread = (thread_t)p0;
475
476 spl_t s = splsched();
477 thread_lock(thread);
478
479 assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
480
481 if (--thread->depress_timer_active == 0) {
482 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
483 if ((thread->state & TH_RUN) == TH_RUN) {
484 thread->last_basepri_change_time = mach_absolute_time();
485 }
486 thread_recompute_sched_pri(thread, options: SETPRI_DEFAULT);
487 }
488
489 thread_unlock(thread);
490 splx(s);
491}
492
493/*
494 * Prematurely abort priority depression if there is one.
495 */
496kern_return_t
497thread_depress_abort(thread_t thread)
498{
499 kern_return_t result = KERN_NOT_DEPRESSED;
500
501 spl_t s = splsched();
502 thread_lock(thread);
503
504 assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
505
506 /*
507 * User-triggered depress-aborts should not get out
508 * of the poll-depress, but they should cancel a regular depress.
509 */
510 if ((thread->sched_flags & TH_SFLAG_POLLDEPRESS) == 0) {
511 result = thread_depress_abort_locked(thread);
512 }
513
514 thread_unlock(thread);
515 splx(s);
516
517 return result;
518}
519
520/*
521 * Prematurely abort priority depression or poll depression if one is active.
522 * Called with the thread locked.
523 */
524kern_return_t
525thread_depress_abort_locked(thread_t thread)
526{
527 if ((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) == 0) {
528 return KERN_NOT_DEPRESSED;
529 }
530
531 assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
532
533 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
534 if ((thread->state & TH_RUN) == TH_RUN) {
535 thread->last_basepri_change_time = mach_absolute_time();
536 }
537
538 thread_recompute_sched_pri(thread, options: SETPRI_LAZY);
539
540 if (timer_call_cancel(call: thread->depress_timer)) {
541 thread->depress_timer_active--;
542 }
543
544 return KERN_SUCCESS;
545}
546
547/*
548 * Invoked as part of a polling operation like a no-timeout port receive
549 *
550 * Forces a fixpri thread to yield if it is detected polling without blocking for too long.
551 */
552void
553thread_poll_yield(thread_t self)
554{
555 assert(self == current_thread());
556 assert((self->sched_flags & TH_SFLAG_DEPRESS) == 0);
557
558 if (self->sched_mode != TH_MODE_FIXED) {
559 return;
560 }
561
562 spl_t s = splsched();
563
564 uint64_t abstime = mach_absolute_time();
565 uint64_t total_computation = abstime -
566 self->computation_epoch + self->computation_metered;
567
568 if (total_computation >= max_poll_computation) {
569 thread_lock(self);
570
571 self->computation_epoch = abstime;
572 self->computation_interrupt_epoch = recount_current_thread_interrupt_time_mach();
573 self->computation_metered = 0;
574
575 uint64_t yield_expiration = abstime +
576 (total_computation >> sched_poll_yield_shift);
577
578 if (!timer_call_enter(call: self->depress_timer, deadline: yield_expiration,
579 TIMER_CALL_USER_CRITICAL)) {
580 self->depress_timer_active++;
581 }
582
583 self->sched_flags |= TH_SFLAG_POLLDEPRESS;
584 thread_recompute_sched_pri(thread: self, options: SETPRI_DEFAULT);
585
586 thread_unlock(self);
587 }
588 splx(s);
589}
590
591/*
592 * Kernel-internal interface to yield for a specified period
593 *
594 * WARNING: Will still yield to priority 0 even if the thread is holding a contended lock!
595 */
596void
597thread_yield_internal(mach_msg_timeout_t ms)
598{
599 thread_t self = current_thread();
600
601 assert((self->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
602
603 processor_t myprocessor;
604
605 disable_preemption();
606 myprocessor = current_processor();
607 if (!SCHED(thread_should_yield)(myprocessor, self)) {
608 mp_enable_preemption();
609
610 return;
611 }
612 enable_preemption();
613
614 thread_depress_ms(interval: ms);
615
616 thread_block_reason(THREAD_CONTINUE_NULL, NULL, AST_YIELD);
617
618 thread_depress_abort(thread: self);
619}
620
621/*
622 * This yields to a possible non-urgent preemption pending on the current processor.
623 *
624 * This is useful when doing a long computation in the kernel without returning to userspace.
625 *
626 * As opposed to other yielding mechanisms, this does not drop the priority of the current thread.
627 */
628void
629thread_yield_to_preemption()
630{
631 /*
632 * ast_pending() should ideally be called with interrupts disabled, but
633 * the check here is fine because csw_check() will do the right thing.
634 */
635 ast_t *pending_ast = ast_pending();
636 ast_t ast = AST_NONE;
637 processor_t p;
638
639 if (*pending_ast & AST_PREEMPT) {
640 thread_t self = current_thread();
641
642 spl_t s = splsched();
643
644 p = current_processor();
645 thread_lock(self);
646 ast = csw_check(thread: self, processor: p, AST_YIELD);
647 ast_on(reasons: ast);
648 thread_unlock(self);
649
650 if (ast != AST_NONE) {
651 (void)thread_block_reason(THREAD_CONTINUE_NULL, NULL, reason: ast);
652 }
653
654 splx(s);
655 }
656}
657