1/*
2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57#include <mach/boolean.h>
58#include <mach/thread_switch.h>
59#include <ipc/ipc_port.h>
60#include <ipc/ipc_space.h>
61#include <kern/counters.h>
62#include <kern/ipc_kobject.h>
63#include <kern/processor.h>
64#include <kern/sched.h>
65#include <kern/sched_prim.h>
66#include <kern/spl.h>
67#include <kern/task.h>
68#include <kern/thread.h>
69#include <kern/policy_internal.h>
70
71#include <mach/policy.h>
72
73#include <kern/syscall_subr.h>
74#include <mach/mach_host_server.h>
75#include <mach/mach_syscalls.h>
76#include <sys/kdebug.h>
77#include <kern/ast.h>
78
79static void thread_depress_abstime(uint64_t interval);
80static void thread_depress_ms(mach_msg_timeout_t interval);
81
82/* Called from commpage to take a delayed preemption when exiting
83 * the "Preemption Free Zone" (PFZ).
84 */
85kern_return_t
86pfz_exit(
87__unused struct pfz_exit_args *args)
88{
89 /* For now, nothing special to do. We'll pick up the ASTs on kernel exit. */
90
91 return (KERN_SUCCESS);
92}
93
94
95/*
96 * swtch and swtch_pri both attempt to context switch (logic in
97 * thread_block no-ops the context switch if nothing would happen).
98 * A boolean is returned that indicates whether there is anything
99 * else runnable. That's no excuse to spin, though.
100 */
101
102static void
103swtch_continue(void)
104{
105 processor_t myprocessor;
106 boolean_t result;
107
108 disable_preemption();
109 myprocessor = current_processor();
110 result = SCHED(thread_should_yield)(myprocessor, current_thread());
111 enable_preemption();
112
113 thread_syscall_return(result);
114 /*NOTREACHED*/
115}
116
117boolean_t
118swtch(
119 __unused struct swtch_args *args)
120{
121 processor_t myprocessor;
122
123 disable_preemption();
124 myprocessor = current_processor();
125 if (!SCHED(thread_should_yield)(myprocessor, current_thread())) {
126 mp_enable_preemption();
127
128 return (FALSE);
129 }
130 enable_preemption();
131
132 counter(c_swtch_block++);
133
134 thread_yield_with_continuation((thread_continue_t)swtch_continue, NULL);
135}
136
137static void
138swtch_pri_continue(void)
139{
140 processor_t myprocessor;
141 boolean_t result;
142
143 thread_depress_abort(current_thread());
144
145 disable_preemption();
146 myprocessor = current_processor();
147 result = SCHED(thread_should_yield)(myprocessor, current_thread());
148 mp_enable_preemption();
149
150 thread_syscall_return(result);
151 /*NOTREACHED*/
152}
153
154boolean_t
155swtch_pri(
156__unused struct swtch_pri_args *args)
157{
158 processor_t myprocessor;
159
160 disable_preemption();
161 myprocessor = current_processor();
162 if (!SCHED(thread_should_yield)(myprocessor, current_thread())) {
163 mp_enable_preemption();
164
165 return (FALSE);
166 }
167 enable_preemption();
168
169 counter(c_swtch_pri_block++);
170
171 thread_depress_abstime(thread_depress_time);
172
173 thread_yield_with_continuation((thread_continue_t)swtch_pri_continue, NULL);
174}
175
176static void
177thread_switch_continue(void *parameter, __unused int ret)
178{
179 thread_t self = current_thread();
180 int option = (int)(intptr_t)parameter;
181
182 if (option == SWITCH_OPTION_DEPRESS || option == SWITCH_OPTION_OSLOCK_DEPRESS)
183 thread_depress_abort(self);
184
185 thread_syscall_return(KERN_SUCCESS);
186 /*NOTREACHED*/
187}
188
189/*
190 * thread_switch:
191 *
192 * Context switch. User may supply thread hint.
193 */
194kern_return_t
195thread_switch(
196 struct thread_switch_args *args)
197{
198 thread_t thread = THREAD_NULL;
199 thread_t self = current_thread();
200 mach_port_name_t thread_name = args->thread_name;
201 int option = args->option;
202 mach_msg_timeout_t option_time = args->option_time;
203 uint32_t scale_factor = NSEC_PER_MSEC;
204 boolean_t depress_option = FALSE;
205 boolean_t wait_option = FALSE;
206 wait_interrupt_t interruptible = THREAD_ABORTSAFE;
207
208 /*
209 * Validate and process option.
210 */
211 switch (option) {
212 case SWITCH_OPTION_NONE:
213 break;
214 case SWITCH_OPTION_WAIT:
215 wait_option = TRUE;
216 break;
217 case SWITCH_OPTION_DEPRESS:
218 depress_option = TRUE;
219 break;
220 case SWITCH_OPTION_DISPATCH_CONTENTION:
221 scale_factor = NSEC_PER_USEC;
222 wait_option = TRUE;
223 interruptible |= THREAD_WAIT_NOREPORT;
224 break;
225 case SWITCH_OPTION_OSLOCK_DEPRESS:
226 depress_option = TRUE;
227 interruptible |= THREAD_WAIT_NOREPORT;
228 break;
229 case SWITCH_OPTION_OSLOCK_WAIT:
230 wait_option = TRUE;
231 interruptible |= THREAD_WAIT_NOREPORT;
232 break;
233 default:
234 return (KERN_INVALID_ARGUMENT);
235 }
236
237 /*
238 * Translate the port name if supplied.
239 */
240 if (thread_name != MACH_PORT_NULL) {
241 ipc_port_t port;
242
243 if (ipc_port_translate_send(self->task->itk_space,
244 thread_name, &port) == KERN_SUCCESS) {
245 ip_reference(port);
246 ip_unlock(port);
247
248 thread = convert_port_to_thread(port);
249 ip_release(port);
250
251 if (thread == self) {
252 thread_deallocate(thread);
253 thread = THREAD_NULL;
254 }
255 }
256 }
257
258 if (option == SWITCH_OPTION_OSLOCK_DEPRESS || option == SWITCH_OPTION_OSLOCK_WAIT) {
259 if (thread != THREAD_NULL) {
260
261 if (thread->task != self->task) {
262 /*
263 * OSLock boosting only applies to other threads
264 * in your same task (even if you have a port for
265 * a thread in another task)
266 */
267
268 thread_deallocate(thread);
269 thread = THREAD_NULL;
270 } else {
271 /*
272 * Attempt to kick the lock owner up to our same IO throttling tier.
273 * If the thread is currently blocked in throttle_lowpri_io(),
274 * it will immediately break out.
275 *
276 * TODO: SFI break out?
277 */
278 int new_policy = proc_get_effective_thread_policy(self, TASK_POLICY_IO);
279
280 set_thread_iotier_override(thread, new_policy);
281 }
282 }
283 }
284
285 /*
286 * Try to handoff if supplied.
287 */
288 if (thread != THREAD_NULL) {
289 spl_t s = splsched();
290
291 /* This may return a different thread if the target is pushing on something */
292 thread_t pulled_thread = thread_run_queue_remove_for_handoff(thread);
293
294 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED_THREAD_SWITCH)|DBG_FUNC_NONE,
295 thread_tid(thread), thread->state,
296 pulled_thread ? TRUE : FALSE, 0, 0);
297
298 if (pulled_thread != THREAD_NULL) {
299 /* We can't be dropping the last ref here */
300 thread_deallocate_safe(thread);
301
302 if (wait_option)
303 assert_wait_timeout((event_t)assert_wait_timeout, interruptible,
304 option_time, scale_factor);
305 else if (depress_option)
306 thread_depress_ms(option_time);
307
308 thread_run(self, thread_switch_continue, (void *)(intptr_t)option, pulled_thread);
309 __builtin_unreachable();
310 }
311
312 splx(s);
313
314 thread_deallocate(thread);
315 }
316
317 if (wait_option)
318 assert_wait_timeout((event_t)assert_wait_timeout, interruptible, option_time, scale_factor);
319 else if (depress_option)
320 thread_depress_ms(option_time);
321
322 thread_yield_with_continuation(thread_switch_continue, (void *)(intptr_t)option);
323 __builtin_unreachable();
324}
325
326void
327thread_yield_with_continuation(
328 thread_continue_t continuation,
329 void *parameter)
330{
331 assert(continuation);
332 thread_block_reason(continuation, parameter, AST_YIELD);
333 __builtin_unreachable();
334}
335
336
337/* Returns a +1 thread reference */
338thread_t
339port_name_to_thread_for_ulock(mach_port_name_t thread_name)
340{
341 thread_t thread = THREAD_NULL;
342 thread_t self = current_thread();
343
344 /*
345 * Translate the port name if supplied.
346 */
347 if (thread_name != MACH_PORT_NULL) {
348 ipc_port_t port;
349
350 if (ipc_port_translate_send(self->task->itk_space,
351 thread_name, &port) == KERN_SUCCESS) {
352 ip_reference(port);
353 ip_unlock(port);
354
355 thread = convert_port_to_thread(port);
356 ip_release(port);
357
358 if (thread == THREAD_NULL) {
359 return thread;
360 }
361
362 if ((thread == self) || (thread->task != self->task)) {
363 thread_deallocate(thread);
364 thread = THREAD_NULL;
365 }
366 }
367 }
368
369 return thread;
370}
371
372/* This function is called after an assert_wait(), therefore it must not
373 * cause another wait until after the thread_run() or thread_block()
374 *
375 *
376 * When called with a NULL continuation, the thread ref is consumed
377 * (thread_handoff_deallocate calling convention) else it is up to the
378 * continuation to do the cleanup (thread_handoff_parameter calling convention)
379 * and it instead doesn't return.
380 */
381static wait_result_t
382thread_handoff_internal(thread_t thread, thread_continue_t continuation,
383 void *parameter)
384{
385 thread_t deallocate_thread = THREAD_NULL;
386 thread_t self = current_thread();
387
388 /*
389 * Try to handoff if supplied.
390 */
391 if (thread != THREAD_NULL) {
392 spl_t s = splsched();
393
394 thread_t pulled_thread = thread_run_queue_remove_for_handoff(thread);
395
396 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED_THREAD_SWITCH)|DBG_FUNC_NONE,
397 thread_tid(thread), thread->state,
398 pulled_thread ? TRUE : FALSE, 0, 0);
399
400 if (pulled_thread != THREAD_NULL) {
401 if (continuation == NULL) {
402 /* We can't be dropping the last ref here */
403 thread_deallocate_safe(thread);
404 }
405
406 int result = thread_run(self, continuation, parameter, pulled_thread);
407
408 splx(s);
409 return result;
410 }
411
412 splx(s);
413
414 deallocate_thread = thread;
415 thread = THREAD_NULL;
416 }
417
418 int result = thread_block_parameter(continuation, parameter);
419 if (deallocate_thread != THREAD_NULL) {
420 thread_deallocate(deallocate_thread);
421 }
422
423 return result;
424}
425
426void
427thread_handoff_parameter(thread_t thread, thread_continue_t continuation,
428 void *parameter)
429{
430 thread_handoff_internal(thread, continuation, parameter);
431 panic("NULL continuation passed to %s", __func__);
432 __builtin_unreachable();
433}
434
435wait_result_t
436thread_handoff_deallocate(thread_t thread)
437{
438 return thread_handoff_internal(thread, NULL, NULL);
439}
440
441/*
442 * Thread depression
443 *
444 * This mechanism drops a thread to priority 0 in order for it to yield to
445 * all other runnnable threads on the system. It can be canceled or timed out,
446 * whereupon the thread goes back to where it was.
447 *
448 * Note that TH_SFLAG_DEPRESS and TH_SFLAG_POLLDEPRESS are never set at the
449 * same time. DEPRESS always defers to POLLDEPRESS.
450 *
451 * DEPRESS only lasts across a single thread_block call, and never returns
452 * to userspace.
453 * POLLDEPRESS can be active anywhere up until thread termination.
454 */
455
456/*
457 * Depress thread's priority to lowest possible for the specified interval,
458 * with an interval of zero resulting in no timeout being scheduled.
459 *
460 * Must block with AST_YIELD afterwards to take effect
461 */
462void
463thread_depress_abstime(uint64_t interval)
464{
465 thread_t self = current_thread();
466
467 spl_t s = splsched();
468 thread_lock(self);
469
470 assert((self->sched_flags & TH_SFLAG_DEPRESS) == 0);
471
472 if ((self->sched_flags & TH_SFLAG_POLLDEPRESS) == 0) {
473 self->sched_flags |= TH_SFLAG_DEPRESS;
474 thread_recompute_sched_pri(self, SETPRI_LAZY);
475
476 if (interval != 0) {
477 uint64_t deadline;
478
479 clock_absolutetime_interval_to_deadline(interval, &deadline);
480 if (!timer_call_enter(&self->depress_timer, deadline, TIMER_CALL_USER_CRITICAL))
481 self->depress_timer_active++;
482 }
483 }
484
485 thread_unlock(self);
486 splx(s);
487}
488
489void
490thread_depress_ms(mach_msg_timeout_t interval)
491{
492 uint64_t abstime;
493
494 clock_interval_to_absolutetime_interval(interval, NSEC_PER_MSEC, &abstime);
495 thread_depress_abstime(abstime);
496}
497
498/*
499 * Priority depression expiration.
500 */
501void
502thread_depress_expire(void *p0,
503 __unused void *p1)
504{
505 thread_t thread = (thread_t)p0;
506
507 spl_t s = splsched();
508 thread_lock(thread);
509
510 assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
511
512 if (--thread->depress_timer_active == 0) {
513 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
514 thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
515 }
516
517 thread_unlock(thread);
518 splx(s);
519}
520
521/*
522 * Prematurely abort priority depression if there is one.
523 */
524kern_return_t
525thread_depress_abort(thread_t thread)
526{
527 kern_return_t result = KERN_NOT_DEPRESSED;
528
529 spl_t s = splsched();
530 thread_lock(thread);
531
532 assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
533
534 /*
535 * User-triggered depress-aborts should not get out
536 * of the poll-depress, but they should cancel a regular depress.
537 */
538 if ((thread->sched_flags & TH_SFLAG_POLLDEPRESS) == 0) {
539 result = thread_depress_abort_locked(thread);
540 }
541
542 thread_unlock(thread);
543 splx(s);
544
545 return result;
546}
547
548/*
549 * Prematurely abort priority depression or poll depression if one is active.
550 * Called with the thread locked.
551 */
552kern_return_t
553thread_depress_abort_locked(thread_t thread)
554{
555 if ((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) == 0)
556 return KERN_NOT_DEPRESSED;
557
558 assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
559
560 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
561
562 thread_recompute_sched_pri(thread, SETPRI_LAZY);
563
564 if (timer_call_cancel(&thread->depress_timer))
565 thread->depress_timer_active--;
566
567 return KERN_SUCCESS;
568}
569
570/*
571 * Invoked as part of a polling operation like a no-timeout port receive
572 *
573 * Forces a fixpri thread to yield if it is detected polling without blocking for too long.
574 */
575void
576thread_poll_yield(thread_t self)
577{
578 assert(self == current_thread());
579 assert((self->sched_flags & TH_SFLAG_DEPRESS) == 0);
580
581 if (self->sched_mode != TH_MODE_FIXED)
582 return;
583
584 spl_t s = splsched();
585
586 uint64_t abstime = mach_absolute_time();
587 uint64_t total_computation = abstime -
588 self->computation_epoch + self->computation_metered;
589
590 if (total_computation >= max_poll_computation) {
591 thread_lock(self);
592
593 self->computation_epoch = abstime;
594 self->computation_metered = 0;
595
596 uint64_t yield_expiration = abstime +
597 (total_computation >> sched_poll_yield_shift);
598
599 if (!timer_call_enter(&self->depress_timer, yield_expiration,
600 TIMER_CALL_USER_CRITICAL))
601 self->depress_timer_active++;
602
603 self->sched_flags |= TH_SFLAG_POLLDEPRESS;
604 thread_recompute_sched_pri(self, SETPRI_DEFAULT);
605
606 thread_unlock(self);
607 }
608 splx(s);
609}
610
611/*
612 * Kernel-internal interface to yield for a specified period
613 *
614 * WARNING: Will still yield to priority 0 even if the thread is holding a contended lock!
615 */
616void
617thread_yield_internal(mach_msg_timeout_t ms)
618{
619 thread_t self = current_thread();
620
621 assert((self->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
622
623 processor_t myprocessor;
624
625 disable_preemption();
626 myprocessor = current_processor();
627 if (!SCHED(thread_should_yield)(myprocessor, self)) {
628 mp_enable_preemption();
629
630 return;
631 }
632 enable_preemption();
633
634 thread_depress_ms(ms);
635
636 thread_block_reason(THREAD_CONTINUE_NULL, NULL, AST_YIELD);
637
638 thread_depress_abort(self);
639}
640
641/*
642 * This yields to a possible non-urgent preemption pending on the current processor.
643 *
644 * This is useful when doing a long computation in the kernel without returning to userspace.
645 *
646 * As opposed to other yielding mechanisms, this does not drop the priority of the current thread.
647 */
648void
649thread_yield_to_preemption()
650{
651 /*
652 * ast_pending() should ideally be called with interrupts disabled, but
653 * the check here is fine because csw_check() will do the right thing.
654 */
655 ast_t *pending_ast = ast_pending();
656 ast_t ast = AST_NONE;
657 processor_t p;
658
659 if (*pending_ast & AST_PREEMPT) {
660 thread_t self = current_thread();
661
662 spl_t s = splsched();
663
664 p = current_processor();
665 thread_lock(self);
666 ast = csw_check(p, AST_YIELD);
667 ast_on(ast);
668 thread_unlock(self);
669
670 if (ast != AST_NONE) {
671 (void)thread_block_reason(THREAD_CONTINUE_NULL, NULL, ast);
672 }
673
674 splx(s);
675 }
676}
677
678