1/*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_FREE_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: kern/thread.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
61 * Date: 1986
62 *
63 * Thread management primitives implementation.
64 */
65/*
66 * Copyright (c) 1993 The University of Utah and
67 * the Computer Systems Laboratory (CSL). All rights reserved.
68 *
69 * Permission to use, copy, modify and distribute this software and its
70 * documentation is hereby granted, provided that both the copyright
71 * notice and this permission notice appear in all copies of the
72 * software, derivative works or modified versions, and any portions
73 * thereof, and that both notices appear in supporting documentation.
74 *
75 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
76 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
77 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
78 *
79 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
80 * improvements that they make and grant CSL redistribution rights.
81 *
82 */
83
84#include <mach/mach_types.h>
85#include <mach/boolean.h>
86#include <mach/policy.h>
87#include <mach/thread_info.h>
88#include <mach/thread_special_ports.h>
89#include <mach/thread_status.h>
90#include <mach/time_value.h>
91#include <mach/vm_param.h>
92
93#include <machine/thread.h>
94#include <machine/pal_routines.h>
95#include <machine/limits.h>
96
97#include <kern/kern_types.h>
98#include <kern/kalloc.h>
99#include <kern/cpu_data.h>
100#include <kern/counters.h>
101#include <kern/extmod_statistics.h>
102#include <kern/ipc_mig.h>
103#include <kern/ipc_tt.h>
104#include <kern/mach_param.h>
105#include <kern/machine.h>
106#include <kern/misc_protos.h>
107#include <kern/processor.h>
108#include <kern/queue.h>
109#include <kern/sched.h>
110#include <kern/sched_prim.h>
111#include <kern/sync_lock.h>
112#include <kern/syscall_subr.h>
113#include <kern/task.h>
114#include <kern/thread.h>
115#include <kern/thread_group.h>
116#include <kern/coalition.h>
117#include <kern/host.h>
118#include <kern/zalloc.h>
119#include <kern/assert.h>
120#include <kern/exc_resource.h>
121#include <kern/exc_guard.h>
122#include <kern/telemetry.h>
123#include <kern/policy_internal.h>
124#include <kern/turnstile.h>
125
126#include <corpses/task_corpse.h>
127#if KPC
128#include <kern/kpc.h>
129#endif
130
131#if MONOTONIC
132#include <kern/monotonic.h>
133#include <machine/monotonic.h>
134#endif /* MONOTONIC */
135
136#include <ipc/ipc_kmsg.h>
137#include <ipc/ipc_port.h>
138#include <bank/bank_types.h>
139
140#include <vm/vm_kern.h>
141#include <vm/vm_pageout.h>
142
143#include <sys/kdebug.h>
144#include <sys/bsdtask_info.h>
145#include <mach/sdt.h>
146#include <san/kasan.h>
147
148#include <stdatomic.h>
149
150/*
151 * Exported interfaces
152 */
153#include <mach/task_server.h>
154#include <mach/thread_act_server.h>
155#include <mach/mach_host_server.h>
156#include <mach/host_priv_server.h>
157#include <mach/mach_voucher_server.h>
158#include <kern/policy_internal.h>
159
160static struct zone *thread_zone;
161static lck_grp_attr_t thread_lck_grp_attr;
162lck_attr_t thread_lck_attr;
163lck_grp_t thread_lck_grp;
164
165struct zone *thread_qos_override_zone;
166
167decl_simple_lock_data(static,thread_stack_lock)
168static queue_head_t thread_stack_queue;
169
170decl_simple_lock_data(static,thread_terminate_lock)
171static queue_head_t thread_terminate_queue;
172
173static queue_head_t thread_deallocate_queue;
174
175static queue_head_t turnstile_deallocate_queue;
176
177static queue_head_t crashed_threads_queue;
178
179static queue_head_t workq_deallocate_queue;
180
181decl_simple_lock_data(static,thread_exception_lock)
182static queue_head_t thread_exception_queue;
183
184struct thread_exception_elt {
185 queue_chain_t elt;
186 exception_type_t exception_type;
187 task_t exception_task;
188 thread_t exception_thread;
189};
190
191static struct thread thread_template, init_thread;
192static void thread_deallocate_enqueue(thread_t thread);
193static void thread_deallocate_complete(thread_t thread);
194
195#ifdef MACH_BSD
196extern void proc_exit(void *);
197extern mach_exception_data_type_t proc_encode_exit_exception_code(void *);
198extern uint64_t get_dispatchqueue_offset_from_proc(void *);
199extern uint64_t get_return_to_kernel_offset_from_proc(void *p);
200extern int proc_selfpid(void);
201extern void proc_name(int, char*, int);
202extern char * proc_name_address(void *p);
203#endif /* MACH_BSD */
204
205extern int disable_exc_resource;
206extern int audio_active;
207extern int debug_task;
208int thread_max = CONFIG_THREAD_MAX; /* Max number of threads */
209int task_threadmax = CONFIG_THREAD_MAX;
210
211static uint64_t thread_unique_id = 100;
212
213struct _thread_ledger_indices thread_ledgers = { -1 };
214static ledger_template_t thread_ledger_template = NULL;
215static void init_thread_ledgers(void);
216
217#if CONFIG_JETSAM
218void jetsam_on_ledger_cpulimit_exceeded(void);
219#endif
220
221extern int task_thread_soft_limit;
222extern int exc_via_corpse_forking;
223
224#if DEVELOPMENT || DEBUG
225extern int exc_resource_threads_enabled;
226#endif /* DEVELOPMENT || DEBUG */
227
228/*
229 * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
230 *
231 * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
232 * stacktraces, aka micro-stackshots)
233 */
234#define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
235
236int cpumon_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */
237void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void);
238#if DEVELOPMENT || DEBUG
239void __attribute__((noinline)) SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t, int);
240#endif /* DEVELOPMENT || DEBUG */
241
242/*
243 * The smallest interval over which we support limiting CPU consumption is 1ms
244 */
245#define MINIMUM_CPULIMIT_INTERVAL_MS 1
246
247void
248thread_bootstrap(void)
249{
250 /*
251 * Fill in a template thread for fast initialization.
252 */
253
254#if MACH_ASSERT
255 thread_template.thread_magic = THREAD_MAGIC;
256#endif /* MACH_ASSERT */
257
258 thread_template.runq = PROCESSOR_NULL;
259
260 thread_template.ref_count = 2;
261
262 thread_template.reason = AST_NONE;
263 thread_template.at_safe_point = FALSE;
264 thread_template.wait_event = NO_EVENT64;
265 thread_template.waitq = NULL;
266 thread_template.wait_result = THREAD_WAITING;
267 thread_template.options = THREAD_ABORTSAFE;
268 thread_template.state = TH_WAIT | TH_UNINT;
269 thread_template.wake_active = FALSE;
270 thread_template.continuation = THREAD_CONTINUE_NULL;
271 thread_template.parameter = NULL;
272
273 thread_template.importance = 0;
274 thread_template.sched_mode = TH_MODE_NONE;
275 thread_template.sched_flags = 0;
276 thread_template.saved_mode = TH_MODE_NONE;
277 thread_template.safe_release = 0;
278 thread_template.th_sched_bucket = TH_BUCKET_RUN;
279
280 thread_template.sfi_class = SFI_CLASS_UNSPECIFIED;
281 thread_template.sfi_wait_class = SFI_CLASS_UNSPECIFIED;
282
283 thread_template.active = 0;
284 thread_template.started = 0;
285 thread_template.static_param = 0;
286 thread_template.policy_reset = 0;
287
288 thread_template.base_pri = BASEPRI_DEFAULT;
289 thread_template.sched_pri = 0;
290 thread_template.max_priority = 0;
291 thread_template.task_priority = 0;
292 thread_template.promotions = 0;
293 thread_template.rwlock_count = 0;
294 thread_template.waiting_for_mutex = NULL;
295
296
297 thread_template.realtime.deadline = UINT64_MAX;
298
299 thread_template.quantum_remaining = 0;
300 thread_template.last_run_time = 0;
301 thread_template.last_made_runnable_time = THREAD_NOT_RUNNABLE;
302 thread_template.last_basepri_change_time = THREAD_NOT_RUNNABLE;
303 thread_template.same_pri_latency = 0;
304
305 thread_template.computation_metered = 0;
306 thread_template.computation_epoch = 0;
307
308#if defined(CONFIG_SCHED_TIMESHARE_CORE)
309 thread_template.sched_stamp = 0;
310 thread_template.pri_shift = INT8_MAX;
311 thread_template.sched_usage = 0;
312 thread_template.cpu_usage = thread_template.cpu_delta = 0;
313#endif
314 thread_template.c_switch = thread_template.p_switch = thread_template.ps_switch = 0;
315
316#if MONOTONIC
317 memset(&thread_template.t_monotonic, 0,
318 sizeof(thread_template.t_monotonic));
319#endif /* MONOTONIC */
320
321 thread_template.bound_processor = PROCESSOR_NULL;
322 thread_template.last_processor = PROCESSOR_NULL;
323
324 thread_template.sched_call = NULL;
325
326 timer_init(&thread_template.user_timer);
327 timer_init(&thread_template.system_timer);
328 timer_init(&thread_template.ptime);
329 timer_init(&thread_template.runnable_timer);
330 thread_template.user_timer_save = 0;
331 thread_template.system_timer_save = 0;
332 thread_template.vtimer_user_save = 0;
333 thread_template.vtimer_prof_save = 0;
334 thread_template.vtimer_rlim_save = 0;
335 thread_template.vtimer_qos_save = 0;
336
337#if CONFIG_SCHED_SFI
338 thread_template.wait_sfi_begin_time = 0;
339#endif
340
341 thread_template.wait_timer_is_set = FALSE;
342 thread_template.wait_timer_active = 0;
343
344 thread_template.depress_timer_active = 0;
345
346 thread_template.recover = (vm_offset_t)NULL;
347
348 thread_template.map = VM_MAP_NULL;
349#if DEVELOPMENT || DEBUG
350 thread_template.pmap_footprint_suspended = FALSE;
351#endif /* DEVELOPMENT || DEBUG */
352
353#if CONFIG_DTRACE
354 thread_template.t_dtrace_predcache = 0;
355 thread_template.t_dtrace_vtime = 0;
356 thread_template.t_dtrace_tracing = 0;
357#endif /* CONFIG_DTRACE */
358
359#if KPERF
360 thread_template.kperf_flags = 0;
361 thread_template.kperf_pet_gen = 0;
362 thread_template.kperf_c_switch = 0;
363 thread_template.kperf_pet_cnt = 0;
364#endif
365
366#if KPC
367 thread_template.kpc_buf = NULL;
368#endif
369
370#if HYPERVISOR
371 thread_template.hv_thread_target = NULL;
372#endif /* HYPERVISOR */
373
374#if (DEVELOPMENT || DEBUG)
375 thread_template.t_page_creation_throttled_hard = 0;
376 thread_template.t_page_creation_throttled_soft = 0;
377#endif /* DEVELOPMENT || DEBUG */
378 thread_template.t_page_creation_throttled = 0;
379 thread_template.t_page_creation_count = 0;
380 thread_template.t_page_creation_time = 0;
381
382 thread_template.affinity_set = NULL;
383
384 thread_template.syscalls_unix = 0;
385 thread_template.syscalls_mach = 0;
386
387 thread_template.t_ledger = LEDGER_NULL;
388 thread_template.t_threadledger = LEDGER_NULL;
389 thread_template.t_bankledger = LEDGER_NULL;
390 thread_template.t_deduct_bank_ledger_time = 0;
391
392 thread_template.requested_policy = (struct thread_requested_policy) {};
393 thread_template.effective_policy = (struct thread_effective_policy) {};
394
395 bzero(&thread_template.overrides, sizeof(thread_template.overrides));
396 thread_template.sync_ipc_overrides = 0;
397
398 thread_template.iotier_override = THROTTLE_LEVEL_NONE;
399 thread_template.thread_io_stats = NULL;
400#if CONFIG_EMBEDDED
401 thread_template.taskwatch = NULL;
402#endif /* CONFIG_EMBEDDED */
403 thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0;
404
405 thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0;
406 thread_template.callout_woken_from_icontext = thread_template.callout_woken_from_platform_idle = 0;
407
408 thread_template.thread_tag = 0;
409
410 thread_template.ith_voucher_name = MACH_PORT_NULL;
411 thread_template.ith_voucher = IPC_VOUCHER_NULL;
412
413 thread_template.th_work_interval = NULL;
414
415 init_thread = thread_template;
416 machine_set_current_thread(&init_thread);
417}
418
419extern boolean_t allow_qos_policy_set;
420
421void
422thread_init(void)
423{
424 thread_zone = zinit(
425 sizeof(struct thread),
426 thread_max * sizeof(struct thread),
427 THREAD_CHUNK * sizeof(struct thread),
428 "threads");
429
430 thread_qos_override_zone = zinit(
431 sizeof(struct thread_qos_override),
432 4 * thread_max * sizeof(struct thread_qos_override),
433 PAGE_SIZE,
434 "thread qos override");
435 zone_change(thread_qos_override_zone, Z_EXPAND, TRUE);
436 zone_change(thread_qos_override_zone, Z_COLLECT, TRUE);
437 zone_change(thread_qos_override_zone, Z_CALLERACCT, FALSE);
438 zone_change(thread_qos_override_zone, Z_NOENCRYPT, TRUE);
439
440 lck_grp_attr_setdefault(&thread_lck_grp_attr);
441 lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr);
442 lck_attr_setdefault(&thread_lck_attr);
443
444 stack_init();
445
446 thread_policy_init();
447
448 /*
449 * Initialize any machine-dependent
450 * per-thread structures necessary.
451 */
452 machine_thread_init();
453
454 if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct,
455 sizeof (cpumon_ustackshots_trigger_pct))) {
456 cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT;
457 }
458
459 PE_parse_boot_argn("-qos-policy-allow", &allow_qos_policy_set, sizeof(allow_qos_policy_set));
460
461 init_thread_ledgers();
462}
463
464boolean_t
465thread_is_active(thread_t thread)
466{
467 return (thread->active);
468}
469
470void
471thread_corpse_continue(void)
472{
473 thread_t thread = current_thread();
474
475 thread_terminate_internal(thread);
476
477 /*
478 * Handle the thread termination directly
479 * here instead of returning to userspace.
480 */
481 assert(thread->active == FALSE);
482 thread_ast_clear(thread, AST_APC);
483 thread_apc_ast(thread);
484
485 panic("thread_corpse_continue");
486 /*NOTREACHED*/
487}
488
489static void
490thread_terminate_continue(void)
491{
492 panic("thread_terminate_continue");
493 /*NOTREACHED*/
494}
495
496/*
497 * thread_terminate_self:
498 */
499void
500thread_terminate_self(void)
501{
502 thread_t thread = current_thread();
503 task_t task;
504 int threadcnt;
505
506 pal_thread_terminate_self(thread);
507
508 DTRACE_PROC(lwp__exit);
509
510 thread_mtx_lock(thread);
511
512 ipc_thread_disable(thread);
513
514 thread_mtx_unlock(thread);
515
516 thread_sched_call(thread, NULL);
517
518 spl_t s = splsched();
519 thread_lock(thread);
520
521 thread_depress_abort_locked(thread);
522
523 thread_unlock(thread);
524 splx(s);
525
526#if CONFIG_EMBEDDED
527 thead_remove_taskwatch(thread);
528#endif /* CONFIG_EMBEDDED */
529
530 work_interval_thread_terminate(thread);
531
532 thread_mtx_lock(thread);
533
534 thread_policy_reset(thread);
535
536 thread_mtx_unlock(thread);
537
538 bank_swap_thread_bank_ledger(thread, NULL);
539
540 if (kdebug_enable && bsd_hasthreadname(thread->uthread)) {
541 char threadname[MAXTHREADNAMESIZE];
542 bsd_getthreadname(thread->uthread, threadname);
543 kernel_debug_string_simple(TRACE_STRING_THREADNAME_PREV, threadname);
544 }
545
546 task = thread->task;
547 uthread_cleanup(task, thread->uthread, task->bsd_info);
548
549 if (kdebug_enable && task->bsd_info && !task_is_exec_copy(task)) {
550 /* trace out pid before we sign off */
551 long dbg_arg1 = 0;
552 long dbg_arg2 = 0;
553
554 kdbg_trace_data(thread->task->bsd_info, &dbg_arg1, &dbg_arg2);
555 KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE_PID, dbg_arg1, dbg_arg2);
556 }
557
558 /*
559 * After this subtraction, this thread should never access
560 * task->bsd_info unless it got 0 back from the hw_atomic_sub. It
561 * could be racing with other threads to be the last thread in the
562 * process, and the last thread in the process will tear down the proc
563 * structure and zero-out task->bsd_info.
564 */
565 threadcnt = hw_atomic_sub(&task->active_thread_count, 1);
566
567 /*
568 * If we are the last thread to terminate and the task is
569 * associated with a BSD process, perform BSD process exit.
570 */
571 if (threadcnt == 0 && task->bsd_info != NULL && !task_is_exec_copy(task)) {
572 mach_exception_data_type_t subcode = 0;
573 if (kdebug_enable) {
574 /* since we're the last thread in this process, trace out the command name too */
575 long args[4] = {};
576 kdbg_trace_string(thread->task->bsd_info, &args[0], &args[1], &args[2], &args[3]);
577 KDBG_RELEASE(TRACE_STRING_PROC_EXIT, args[0], args[1], args[2], args[3]);
578 }
579
580 /* Get the exit reason before proc_exit */
581 subcode = proc_encode_exit_exception_code(task->bsd_info);
582 proc_exit(task->bsd_info);
583 /*
584 * if there is crash info in task
585 * then do the deliver action since this is
586 * last thread for this task.
587 */
588 if (task->corpse_info) {
589 task_deliver_crash_notification(task, current_thread(), EXC_RESOURCE, subcode);
590 }
591 }
592
593 if (threadcnt == 0) {
594 task_lock(task);
595 if (task_is_a_corpse_fork(task)) {
596 thread_wakeup((event_t)&task->active_thread_count);
597 }
598 task_unlock(task);
599 }
600
601 uthread_cred_free(thread->uthread);
602
603 s = splsched();
604 thread_lock(thread);
605
606 /*
607 * Ensure that the depress timer is no longer enqueued,
608 * so the timer (stored in the thread) can be safely deallocated
609 *
610 * TODO: build timer_call_cancel_wait
611 */
612
613 assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) == 0);
614
615 uint32_t delay_us = 1;
616
617 while (thread->depress_timer_active > 0) {
618 thread_unlock(thread);
619 splx(s);
620
621 delay(delay_us++);
622
623 if (delay_us > USEC_PER_SEC)
624 panic("depress timer failed to inactivate!"
625 "thread: %p depress_timer_active: %d",
626 thread, thread->depress_timer_active);
627
628 s = splsched();
629 thread_lock(thread);
630 }
631
632 /*
633 * Cancel wait timer, and wait for
634 * concurrent expirations.
635 */
636 if (thread->wait_timer_is_set) {
637 thread->wait_timer_is_set = FALSE;
638
639 if (timer_call_cancel(&thread->wait_timer))
640 thread->wait_timer_active--;
641 }
642
643 delay_us = 1;
644
645 while (thread->wait_timer_active > 0) {
646 thread_unlock(thread);
647 splx(s);
648
649 delay(delay_us++);
650
651 if (delay_us > USEC_PER_SEC)
652 panic("wait timer failed to inactivate!"
653 "thread: %p wait_timer_active: %d",
654 thread, thread->wait_timer_active);
655
656 s = splsched();
657 thread_lock(thread);
658 }
659
660 /*
661 * If there is a reserved stack, release it.
662 */
663 if (thread->reserved_stack != 0) {
664 stack_free_reserved(thread);
665 thread->reserved_stack = 0;
666 }
667
668 /*
669 * Mark thread as terminating, and block.
670 */
671 thread->state |= TH_TERMINATE;
672 thread_mark_wait_locked(thread, THREAD_UNINT);
673
674 assert((thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED) == 0);
675 assert((thread->sched_flags & TH_SFLAG_RW_PROMOTED) == 0);
676 assert((thread->sched_flags & TH_SFLAG_EXEC_PROMOTED) == 0);
677 assert((thread->sched_flags & TH_SFLAG_PROMOTED) == 0);
678 assert(thread->promotions == 0);
679 assert(thread->was_promoted_on_wakeup == 0);
680 assert(thread->waiting_for_mutex == NULL);
681 assert(thread->rwlock_count == 0);
682
683 thread_unlock(thread);
684 /* splsched */
685
686 thread_block((thread_continue_t)thread_terminate_continue);
687 /*NOTREACHED*/
688}
689
690/* Drop a thread refcount safely without triggering a zfree */
691void
692thread_deallocate_safe(thread_t thread)
693{
694 __assert_only uint32_t th_ref_count;
695
696 if (thread == THREAD_NULL)
697 return;
698
699 assert_thread_magic(thread);
700
701 if (__probable(atomic_fetch_sub_explicit(&thread->ref_count, 1,
702 memory_order_release) - 1 > 0)) {
703 return;
704 }
705
706 th_ref_count = atomic_load_explicit(&thread->ref_count, memory_order_acquire);
707 assert(th_ref_count == 0);
708
709 /* enqueue the thread for thread deallocate deamon to call thread_deallocate_complete */
710 thread_deallocate_enqueue(thread);
711}
712
713void
714thread_deallocate(
715 thread_t thread)
716{
717 __assert_only uint32_t th_ref_count;
718
719 if (thread == THREAD_NULL)
720 return;
721
722 assert_thread_magic(thread);
723
724 if (__probable(atomic_fetch_sub_explicit(&thread->ref_count, 1,
725 memory_order_release) - 1 > 0)) {
726 return;
727 }
728
729 th_ref_count = atomic_load_explicit(&thread->ref_count, memory_order_acquire);
730 assert(th_ref_count == 0);
731
732 thread_deallocate_complete(thread);
733}
734
735void
736thread_deallocate_complete(
737 thread_t thread)
738{
739 task_t task;
740
741 assert_thread_magic(thread);
742
743 assert(thread->ref_count == 0);
744
745 assert(thread_owned_workloops_count(thread) == 0);
746
747 if (!(thread->state & TH_TERMINATE2))
748 panic("thread_deallocate: thread not properly terminated\n");
749
750 assert(thread->runq == PROCESSOR_NULL);
751
752#if KPC
753 kpc_thread_destroy(thread);
754#endif
755
756 ipc_thread_terminate(thread);
757
758 proc_thread_qos_deallocate(thread);
759
760 task = thread->task;
761
762#ifdef MACH_BSD
763 {
764 void *ut = thread->uthread;
765
766 thread->uthread = NULL;
767 uthread_zone_free(ut);
768 }
769#endif /* MACH_BSD */
770
771 if (thread->t_ledger)
772 ledger_dereference(thread->t_ledger);
773 if (thread->t_threadledger)
774 ledger_dereference(thread->t_threadledger);
775
776 assert(thread->turnstile != TURNSTILE_NULL);
777 if (thread->turnstile)
778 turnstile_deallocate(thread->turnstile);
779
780 if (IPC_VOUCHER_NULL != thread->ith_voucher)
781 ipc_voucher_release(thread->ith_voucher);
782
783 if (thread->thread_io_stats)
784 kfree(thread->thread_io_stats, sizeof(struct io_stat_info));
785
786 if (thread->kernel_stack != 0)
787 stack_free(thread);
788
789 lck_mtx_destroy(&thread->mutex, &thread_lck_grp);
790 machine_thread_destroy(thread);
791
792 task_deallocate(task);
793
794#if MACH_ASSERT
795 assert_thread_magic(thread);
796 thread->thread_magic = 0;
797#endif /* MACH_ASSERT */
798
799 zfree(thread_zone, thread);
800}
801
802void
803thread_starts_owning_workloop(thread_t thread)
804{
805 atomic_fetch_add_explicit(&thread->kqwl_owning_count, 1,
806 memory_order_relaxed);
807}
808
809void
810thread_ends_owning_workloop(thread_t thread)
811{
812 __assert_only uint32_t count;
813 count = atomic_fetch_sub_explicit(&thread->kqwl_owning_count, 1,
814 memory_order_relaxed);
815 assert(count > 0);
816}
817
818uint32_t
819thread_owned_workloops_count(thread_t thread)
820{
821 return atomic_load_explicit(&thread->kqwl_owning_count,
822 memory_order_relaxed);
823}
824
825/*
826 * thread_inspect_deallocate:
827 *
828 * Drop a thread inspection reference.
829 */
830void
831thread_inspect_deallocate(
832 thread_inspect_t thread_inspect)
833{
834 return(thread_deallocate((thread_t)thread_inspect));
835}
836
837/*
838 * thread_exception_daemon:
839 *
840 * Deliver EXC_{RESOURCE,GUARD} exception
841 */
842static void
843thread_exception_daemon(void)
844{
845 struct thread_exception_elt *elt;
846 task_t task;
847 thread_t thread;
848 exception_type_t etype;
849
850 simple_lock(&thread_exception_lock);
851 while ((elt = (struct thread_exception_elt *)dequeue_head(&thread_exception_queue)) != NULL) {
852 simple_unlock(&thread_exception_lock);
853
854 etype = elt->exception_type;
855 task = elt->exception_task;
856 thread = elt->exception_thread;
857 assert_thread_magic(thread);
858
859 kfree(elt, sizeof (*elt));
860
861 /* wait for all the threads in the task to terminate */
862 task_lock(task);
863 task_wait_till_threads_terminate_locked(task);
864 task_unlock(task);
865
866 /* Consumes the task ref returned by task_generate_corpse_internal */
867 task_deallocate(task);
868 /* Consumes the thread ref returned by task_generate_corpse_internal */
869 thread_deallocate(thread);
870
871 /* Deliver the notification, also clears the corpse. */
872 task_deliver_crash_notification(task, thread, etype, 0);
873
874 simple_lock(&thread_exception_lock);
875 }
876
877 assert_wait((event_t)&thread_exception_queue, THREAD_UNINT);
878 simple_unlock(&thread_exception_lock);
879
880 thread_block((thread_continue_t)thread_exception_daemon);
881}
882
883/*
884 * thread_exception_enqueue:
885 *
886 * Enqueue a corpse port to be delivered an EXC_{RESOURCE,GUARD}.
887 */
888void
889thread_exception_enqueue(
890 task_t task,
891 thread_t thread,
892 exception_type_t etype)
893{
894 assert(EXC_RESOURCE == etype || EXC_GUARD == etype);
895 struct thread_exception_elt *elt = kalloc(sizeof (*elt));
896 elt->exception_type = etype;
897 elt->exception_task = task;
898 elt->exception_thread = thread;
899
900 simple_lock(&thread_exception_lock);
901 enqueue_tail(&thread_exception_queue, (queue_entry_t)elt);
902 simple_unlock(&thread_exception_lock);
903
904 thread_wakeup((event_t)&thread_exception_queue);
905}
906
907/*
908 * thread_copy_resource_info
909 *
910 * Copy the resource info counters from source
911 * thread to destination thread.
912 */
913void
914thread_copy_resource_info(
915 thread_t dst_thread,
916 thread_t src_thread)
917{
918 dst_thread->c_switch = src_thread->c_switch;
919 dst_thread->p_switch = src_thread->p_switch;
920 dst_thread->ps_switch = src_thread->ps_switch;
921 dst_thread->precise_user_kernel_time = src_thread->precise_user_kernel_time;
922 dst_thread->user_timer = src_thread->user_timer;
923 dst_thread->user_timer_save = src_thread->user_timer_save;
924 dst_thread->system_timer = src_thread->system_timer;
925 dst_thread->system_timer_save = src_thread->system_timer_save;
926 dst_thread->runnable_timer = src_thread->runnable_timer;
927 dst_thread->vtimer_user_save = src_thread->vtimer_user_save;
928 dst_thread->vtimer_prof_save = src_thread->vtimer_prof_save;
929 dst_thread->vtimer_rlim_save = src_thread->vtimer_rlim_save;
930 dst_thread->vtimer_qos_save = src_thread->vtimer_qos_save;
931 dst_thread->syscalls_unix = src_thread->syscalls_unix;
932 dst_thread->syscalls_mach = src_thread->syscalls_mach;
933 ledger_rollup(dst_thread->t_threadledger, src_thread->t_threadledger);
934 *dst_thread->thread_io_stats = *src_thread->thread_io_stats;
935}
936
937/*
938 * thread_terminate_daemon:
939 *
940 * Perform final clean up for terminating threads.
941 */
942static void
943thread_terminate_daemon(void)
944{
945 thread_t self, thread;
946 task_t task;
947
948 self = current_thread();
949 self->options |= TH_OPT_SYSTEM_CRITICAL;
950
951 (void)splsched();
952 simple_lock(&thread_terminate_lock);
953
954thread_terminate_start:
955 while ((thread = qe_dequeue_head(&thread_terminate_queue, struct thread, runq_links)) != THREAD_NULL) {
956 assert_thread_magic(thread);
957
958 /*
959 * if marked for crash reporting, skip reaping.
960 * The corpse delivery thread will clear bit and enqueue
961 * for reaping when done
962 */
963 if (thread->inspection){
964 enqueue_tail(&crashed_threads_queue, &thread->runq_links);
965 continue;
966 }
967
968 simple_unlock(&thread_terminate_lock);
969 (void)spllo();
970
971 task = thread->task;
972
973 task_lock(task);
974 task->total_user_time += timer_grab(&thread->user_timer);
975 task->total_ptime += timer_grab(&thread->ptime);
976 task->total_runnable_time += timer_grab(&thread->runnable_timer);
977 if (thread->precise_user_kernel_time) {
978 task->total_system_time += timer_grab(&thread->system_timer);
979 } else {
980 task->total_user_time += timer_grab(&thread->system_timer);
981 }
982
983 task->c_switch += thread->c_switch;
984 task->p_switch += thread->p_switch;
985 task->ps_switch += thread->ps_switch;
986
987 task->syscalls_unix += thread->syscalls_unix;
988 task->syscalls_mach += thread->syscalls_mach;
989
990 task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
991 task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
992 task->task_gpu_ns += ml_gpu_stat(thread);
993 task->task_energy += ml_energy_stat(thread);
994
995#if MONOTONIC
996 mt_terminate_update(task, thread);
997#endif /* MONOTONIC */
998
999 thread_update_qos_cpu_time(thread);
1000
1001 queue_remove(&task->threads, thread, thread_t, task_threads);
1002 task->thread_count--;
1003
1004 /*
1005 * If the task is being halted, and there is only one thread
1006 * left in the task after this one, then wakeup that thread.
1007 */
1008 if (task->thread_count == 1 && task->halting)
1009 thread_wakeup((event_t)&task->halting);
1010
1011 task_unlock(task);
1012
1013 lck_mtx_lock(&tasks_threads_lock);
1014 queue_remove(&threads, thread, thread_t, threads);
1015 threads_count--;
1016 lck_mtx_unlock(&tasks_threads_lock);
1017
1018 thread_deallocate(thread);
1019
1020 (void)splsched();
1021 simple_lock(&thread_terminate_lock);
1022 }
1023
1024 while ((thread = qe_dequeue_head(&thread_deallocate_queue, struct thread, runq_links)) != THREAD_NULL) {
1025 assert_thread_magic(thread);
1026
1027 simple_unlock(&thread_terminate_lock);
1028 (void)spllo();
1029
1030 thread_deallocate_complete(thread);
1031
1032 (void)splsched();
1033 simple_lock(&thread_terminate_lock);
1034 }
1035
1036 struct turnstile *turnstile;
1037 while ((turnstile = qe_dequeue_head(&turnstile_deallocate_queue, struct turnstile, ts_deallocate_link)) != TURNSTILE_NULL) {
1038
1039 simple_unlock(&thread_terminate_lock);
1040 (void)spllo();
1041
1042 turnstile_destroy(turnstile);
1043
1044 (void)splsched();
1045 simple_lock(&thread_terminate_lock);
1046 }
1047
1048 queue_entry_t qe;
1049
1050 /*
1051 * see workq_deallocate_enqueue: struct workqueue is opaque to thread.c and
1052 * we just link pieces of memory here
1053 */
1054 while ((qe = dequeue_head(&workq_deallocate_queue))) {
1055 simple_unlock(&thread_terminate_lock);
1056 (void)spllo();
1057
1058 workq_destroy((struct workqueue *)qe);
1059
1060 (void)splsched();
1061 simple_lock(&thread_terminate_lock);
1062 }
1063
1064 /*
1065 * Check if something enqueued in thread terminate/deallocate queue
1066 * while processing workq deallocate queue
1067 */
1068 if (!queue_empty(&thread_terminate_queue) ||
1069 !queue_empty(&thread_deallocate_queue) ||
1070 !queue_empty(&turnstile_deallocate_queue))
1071 goto thread_terminate_start;
1072
1073 assert_wait((event_t)&thread_terminate_queue, THREAD_UNINT);
1074 simple_unlock(&thread_terminate_lock);
1075 /* splsched */
1076
1077 self->options &= ~TH_OPT_SYSTEM_CRITICAL;
1078 thread_block((thread_continue_t)thread_terminate_daemon);
1079 /*NOTREACHED*/
1080}
1081
1082/*
1083 * thread_terminate_enqueue:
1084 *
1085 * Enqueue a terminating thread for final disposition.
1086 *
1087 * Called at splsched.
1088 */
1089void
1090thread_terminate_enqueue(
1091 thread_t thread)
1092{
1093 KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE, thread->thread_id);
1094
1095 simple_lock(&thread_terminate_lock);
1096 enqueue_tail(&thread_terminate_queue, &thread->runq_links);
1097 simple_unlock(&thread_terminate_lock);
1098
1099 thread_wakeup((event_t)&thread_terminate_queue);
1100}
1101
1102/*
1103 * thread_deallocate_enqueue:
1104 *
1105 * Enqueue a thread for final deallocation.
1106 */
1107static void
1108thread_deallocate_enqueue(
1109 thread_t thread)
1110{
1111 spl_t s = splsched();
1112
1113 simple_lock(&thread_terminate_lock);
1114 enqueue_tail(&thread_deallocate_queue, &thread->runq_links);
1115 simple_unlock(&thread_terminate_lock);
1116
1117 thread_wakeup((event_t)&thread_terminate_queue);
1118 splx(s);
1119}
1120
1121/*
1122 * turnstile_deallocate_enqueue:
1123 *
1124 * Enqueue a turnstile for final deallocation.
1125 */
1126void
1127turnstile_deallocate_enqueue(
1128 struct turnstile *turnstile)
1129{
1130 spl_t s = splsched();
1131
1132 simple_lock(&thread_terminate_lock);
1133 enqueue_tail(&turnstile_deallocate_queue, &turnstile->ts_deallocate_link);
1134 simple_unlock(&thread_terminate_lock);
1135
1136 thread_wakeup((event_t)&thread_terminate_queue);
1137 splx(s);
1138}
1139
1140/*
1141 * workq_deallocate_enqueue:
1142 *
1143 * Enqueue a workqueue for final deallocation.
1144 */
1145void
1146workq_deallocate_enqueue(
1147 struct workqueue *wq)
1148{
1149 spl_t s = splsched();
1150
1151 simple_lock(&thread_terminate_lock);
1152 /*
1153 * this is just to delay a zfree(), so we link the memory with no regards
1154 * for how the struct looks like.
1155 */
1156 enqueue_tail(&workq_deallocate_queue, (queue_entry_t)wq);
1157 simple_unlock(&thread_terminate_lock);
1158
1159 thread_wakeup((event_t)&thread_terminate_queue);
1160 splx(s);
1161}
1162
1163/*
1164 * thread_terminate_crashed_threads:
1165 * walk the list of crashed threads and put back set of threads
1166 * who are no longer being inspected.
1167 */
1168void
1169thread_terminate_crashed_threads()
1170{
1171 thread_t th_remove;
1172 boolean_t should_wake_terminate_queue = FALSE;
1173 spl_t s = splsched();
1174
1175 simple_lock(&thread_terminate_lock);
1176 /*
1177 * loop through the crashed threads queue
1178 * to put any threads that are not being inspected anymore
1179 */
1180
1181 qe_foreach_element_safe(th_remove, &crashed_threads_queue, runq_links) {
1182 /* make sure current_thread is never in crashed queue */
1183 assert(th_remove != current_thread());
1184
1185 if (th_remove->inspection == FALSE) {
1186 re_queue_tail(&thread_terminate_queue, &th_remove->runq_links);
1187 should_wake_terminate_queue = TRUE;
1188 }
1189 }
1190
1191 simple_unlock(&thread_terminate_lock);
1192 splx(s);
1193 if (should_wake_terminate_queue == TRUE) {
1194 thread_wakeup((event_t)&thread_terminate_queue);
1195 }
1196}
1197
1198/*
1199 * thread_stack_daemon:
1200 *
1201 * Perform stack allocation as required due to
1202 * invoke failures.
1203 */
1204static void
1205thread_stack_daemon(void)
1206{
1207 thread_t thread;
1208 spl_t s;
1209
1210 s = splsched();
1211 simple_lock(&thread_stack_lock);
1212
1213 while ((thread = qe_dequeue_head(&thread_stack_queue, struct thread, runq_links)) != THREAD_NULL) {
1214 assert_thread_magic(thread);
1215
1216 simple_unlock(&thread_stack_lock);
1217 splx(s);
1218
1219 /* allocate stack with interrupts enabled so that we can call into VM */
1220 stack_alloc(thread);
1221
1222 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_END, thread_tid(thread), 0, 0, 0, 0);
1223
1224 s = splsched();
1225 thread_lock(thread);
1226 thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ);
1227 thread_unlock(thread);
1228
1229 simple_lock(&thread_stack_lock);
1230 }
1231
1232 assert_wait((event_t)&thread_stack_queue, THREAD_UNINT);
1233 simple_unlock(&thread_stack_lock);
1234 splx(s);
1235
1236 thread_block((thread_continue_t)thread_stack_daemon);
1237 /*NOTREACHED*/
1238}
1239
1240/*
1241 * thread_stack_enqueue:
1242 *
1243 * Enqueue a thread for stack allocation.
1244 *
1245 * Called at splsched.
1246 */
1247void
1248thread_stack_enqueue(
1249 thread_t thread)
1250{
1251 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_WAIT) | DBG_FUNC_START, thread_tid(thread), 0, 0, 0, 0);
1252 assert_thread_magic(thread);
1253
1254 simple_lock(&thread_stack_lock);
1255 enqueue_tail(&thread_stack_queue, &thread->runq_links);
1256 simple_unlock(&thread_stack_lock);
1257
1258 thread_wakeup((event_t)&thread_stack_queue);
1259}
1260
1261void
1262thread_daemon_init(void)
1263{
1264 kern_return_t result;
1265 thread_t thread = NULL;
1266
1267 simple_lock_init(&thread_terminate_lock, 0);
1268 queue_init(&thread_terminate_queue);
1269 queue_init(&thread_deallocate_queue);
1270 queue_init(&workq_deallocate_queue);
1271 queue_init(&turnstile_deallocate_queue);
1272 queue_init(&crashed_threads_queue);
1273
1274 result = kernel_thread_start_priority((thread_continue_t)thread_terminate_daemon, NULL, MINPRI_KERNEL, &thread);
1275 if (result != KERN_SUCCESS)
1276 panic("thread_daemon_init: thread_terminate_daemon");
1277
1278 thread_deallocate(thread);
1279
1280 simple_lock_init(&thread_stack_lock, 0);
1281 queue_init(&thread_stack_queue);
1282
1283 result = kernel_thread_start_priority((thread_continue_t)thread_stack_daemon, NULL, BASEPRI_PREEMPT_HIGH, &thread);
1284 if (result != KERN_SUCCESS)
1285 panic("thread_daemon_init: thread_stack_daemon");
1286
1287 thread_deallocate(thread);
1288
1289 simple_lock_init(&thread_exception_lock, 0);
1290 queue_init(&thread_exception_queue);
1291
1292 result = kernel_thread_start_priority((thread_continue_t)thread_exception_daemon, NULL, MINPRI_KERNEL, &thread);
1293 if (result != KERN_SUCCESS)
1294 panic("thread_daemon_init: thread_exception_daemon");
1295
1296 thread_deallocate(thread);
1297}
1298
1299#define TH_OPTION_NONE 0x00
1300#define TH_OPTION_NOCRED 0x01
1301#define TH_OPTION_NOSUSP 0x02
1302#define TH_OPTION_WORKQ 0x04
1303
1304/*
1305 * Create a new thread.
1306 * Doesn't start the thread running.
1307 *
1308 * Task and tasks_threads_lock are returned locked on success.
1309 */
1310static kern_return_t
1311thread_create_internal(
1312 task_t parent_task,
1313 integer_t priority,
1314 thread_continue_t continuation,
1315 void *parameter,
1316 int options,
1317 thread_t *out_thread)
1318{
1319 thread_t new_thread;
1320 static thread_t first_thread;
1321
1322 /*
1323 * Allocate a thread and initialize static fields
1324 */
1325 if (first_thread == THREAD_NULL)
1326 new_thread = first_thread = current_thread();
1327 else
1328 new_thread = (thread_t)zalloc(thread_zone);
1329 if (new_thread == THREAD_NULL)
1330 return (KERN_RESOURCE_SHORTAGE);
1331
1332 if (new_thread != first_thread)
1333 *new_thread = thread_template;
1334
1335#ifdef MACH_BSD
1336 new_thread->uthread = uthread_alloc(parent_task, new_thread, (options & TH_OPTION_NOCRED) != 0);
1337 if (new_thread->uthread == NULL) {
1338#if MACH_ASSERT
1339 new_thread->thread_magic = 0;
1340#endif /* MACH_ASSERT */
1341
1342 zfree(thread_zone, new_thread);
1343 return (KERN_RESOURCE_SHORTAGE);
1344 }
1345#endif /* MACH_BSD */
1346
1347 if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) {
1348#ifdef MACH_BSD
1349 void *ut = new_thread->uthread;
1350
1351 new_thread->uthread = NULL;
1352 /* cred free may not be necessary */
1353 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1354 uthread_cred_free(ut);
1355 uthread_zone_free(ut);
1356#endif /* MACH_BSD */
1357
1358#if MACH_ASSERT
1359 new_thread->thread_magic = 0;
1360#endif /* MACH_ASSERT */
1361
1362 zfree(thread_zone, new_thread);
1363 return (KERN_FAILURE);
1364 }
1365
1366 new_thread->task = parent_task;
1367
1368 thread_lock_init(new_thread);
1369 wake_lock_init(new_thread);
1370
1371 lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr);
1372
1373 ipc_thread_init(new_thread);
1374
1375 new_thread->continuation = continuation;
1376 new_thread->parameter = parameter;
1377 new_thread->inheritor_flags = TURNSTILE_UPDATE_FLAGS_NONE;
1378 priority_queue_init(&new_thread->inheritor_queue,
1379 PRIORITY_QUEUE_BUILTIN_MAX_HEAP);
1380
1381 /* Allocate I/O Statistics structure */
1382 new_thread->thread_io_stats = (io_stat_info_t)kalloc(sizeof(struct io_stat_info));
1383 assert(new_thread->thread_io_stats != NULL);
1384 bzero(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1385 new_thread->sync_ipc_overrides = 0;
1386
1387#if KASAN
1388 kasan_init_thread(&new_thread->kasan_data);
1389#endif
1390
1391#if CONFIG_IOSCHED
1392 /* Clear out the I/O Scheduling info for AppleFSCompression */
1393 new_thread->decmp_upl = NULL;
1394#endif /* CONFIG_IOSCHED */
1395
1396#if DEVELOPMENT || DEBUG
1397 task_lock(parent_task);
1398 uint16_t thread_limit = parent_task->task_thread_limit;
1399 if (exc_resource_threads_enabled &&
1400 thread_limit > 0 &&
1401 parent_task->thread_count >= thread_limit &&
1402 !parent_task->task_has_crossed_thread_limit &&
1403 !(parent_task->t_flags & TF_CORPSE)) {
1404 int thread_count = parent_task->thread_count;
1405 parent_task->task_has_crossed_thread_limit = TRUE;
1406 task_unlock(parent_task);
1407 SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(parent_task, thread_count);
1408 }
1409 else {
1410 task_unlock(parent_task);
1411 }
1412#endif
1413
1414 lck_mtx_lock(&tasks_threads_lock);
1415 task_lock(parent_task);
1416
1417 /*
1418 * Fail thread creation if parent task is being torn down or has too many threads
1419 * If the caller asked for TH_OPTION_NOSUSP, also fail if the parent task is suspended
1420 */
1421 if (parent_task->active == 0 || parent_task->halting ||
1422 (parent_task->suspend_count > 0 && (options & TH_OPTION_NOSUSP) != 0) ||
1423 (parent_task->thread_count >= task_threadmax && parent_task != kernel_task)) {
1424 task_unlock(parent_task);
1425 lck_mtx_unlock(&tasks_threads_lock);
1426
1427#ifdef MACH_BSD
1428 {
1429 void *ut = new_thread->uthread;
1430
1431 new_thread->uthread = NULL;
1432 uthread_cleanup(parent_task, ut, parent_task->bsd_info);
1433 /* cred free may not be necessary */
1434 uthread_cred_free(ut);
1435 uthread_zone_free(ut);
1436 }
1437#endif /* MACH_BSD */
1438 ipc_thread_disable(new_thread);
1439 ipc_thread_terminate(new_thread);
1440 kfree(new_thread->thread_io_stats, sizeof(struct io_stat_info));
1441 lck_mtx_destroy(&new_thread->mutex, &thread_lck_grp);
1442 machine_thread_destroy(new_thread);
1443 zfree(thread_zone, new_thread);
1444 return (KERN_FAILURE);
1445 }
1446
1447 /* New threads inherit any default state on the task */
1448 machine_thread_inherit_taskwide(new_thread, parent_task);
1449
1450 task_reference_internal(parent_task);
1451
1452 if (new_thread->task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
1453 /*
1454 * This task has a per-thread CPU limit; make sure this new thread
1455 * gets its limit set too, before it gets out of the kernel.
1456 */
1457 act_set_astledger(new_thread);
1458 }
1459
1460 /* Instantiate a thread ledger. Do not fail thread creation if ledger creation fails. */
1461 if ((new_thread->t_threadledger = ledger_instantiate(thread_ledger_template,
1462 LEDGER_CREATE_INACTIVE_ENTRIES)) != LEDGER_NULL) {
1463
1464 ledger_entry_setactive(new_thread->t_threadledger, thread_ledgers.cpu_time);
1465 }
1466
1467 new_thread->t_bankledger = LEDGER_NULL;
1468 new_thread->t_deduct_bank_ledger_time = 0;
1469 new_thread->t_deduct_bank_ledger_energy = 0;
1470
1471 new_thread->t_ledger = new_thread->task->ledger;
1472 if (new_thread->t_ledger)
1473 ledger_reference(new_thread->t_ledger);
1474
1475#if defined(CONFIG_SCHED_MULTIQ)
1476 /* Cache the task's sched_group */
1477 new_thread->sched_group = parent_task->sched_group;
1478#endif /* defined(CONFIG_SCHED_MULTIQ) */
1479
1480 /* Cache the task's map */
1481 new_thread->map = parent_task->map;
1482
1483 timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread);
1484 timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread);
1485
1486#if KPC
1487 kpc_thread_create(new_thread);
1488#endif
1489
1490 /* Set the thread's scheduling parameters */
1491 new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
1492 new_thread->max_priority = parent_task->max_priority;
1493 new_thread->task_priority = parent_task->priority;
1494
1495 int new_priority = (priority < 0) ? parent_task->priority: priority;
1496 new_priority = (priority < 0)? parent_task->priority: priority;
1497 if (new_priority > new_thread->max_priority)
1498 new_priority = new_thread->max_priority;
1499#if CONFIG_EMBEDDED
1500 if (new_priority < MAXPRI_THROTTLE) {
1501 new_priority = MAXPRI_THROTTLE;
1502 }
1503#endif /* CONFIG_EMBEDDED */
1504
1505 new_thread->importance = new_priority - new_thread->task_priority;
1506
1507 sched_set_thread_base_priority(new_thread, new_priority);
1508
1509#if defined(CONFIG_SCHED_TIMESHARE_CORE)
1510 new_thread->sched_stamp = sched_tick;
1511 new_thread->pri_shift = sched_pri_shifts[new_thread->th_sched_bucket];
1512#endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
1513
1514#if CONFIG_EMBEDDED
1515 if (parent_task->max_priority <= MAXPRI_THROTTLE)
1516 sched_thread_mode_demote(new_thread, TH_SFLAG_THROTTLED);
1517#endif /* CONFIG_EMBEDDED */
1518
1519 thread_policy_create(new_thread);
1520
1521 /* Chain the thread onto the task's list */
1522 queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
1523 parent_task->thread_count++;
1524
1525 /* So terminating threads don't need to take the task lock to decrement */
1526 hw_atomic_add(&parent_task->active_thread_count, 1);
1527
1528 /* Protected by the tasks_threads_lock */
1529 new_thread->thread_id = ++thread_unique_id;
1530
1531
1532 queue_enter(&threads, new_thread, thread_t, threads);
1533 threads_count++;
1534
1535 new_thread->active = TRUE;
1536 if (task_is_a_corpse_fork(parent_task)) {
1537 /* Set the inspection bit if the task is a corpse fork */
1538 new_thread->inspection = TRUE;
1539 } else {
1540 new_thread->inspection = FALSE;
1541 }
1542 new_thread->corpse_dup = FALSE;
1543 new_thread->turnstile = turnstile_alloc();
1544 *out_thread = new_thread;
1545
1546 if (kdebug_enable) {
1547 long args[4] = {};
1548
1549 kdbg_trace_data(parent_task->bsd_info, &args[1], &args[3]);
1550
1551 /*
1552 * Starting with 26604425, exec'ing creates a new task/thread.
1553 *
1554 * NEWTHREAD in the current process has two possible meanings:
1555 *
1556 * 1) Create a new thread for this process.
1557 * 2) Create a new thread for the future process this will become in an
1558 * exec.
1559 *
1560 * To disambiguate these, arg3 will be set to TRUE for case #2.
1561 *
1562 * The value we need to find (TPF_EXEC_COPY) is stable in the case of a
1563 * task exec'ing. The read of t_procflags does not take the proc_lock.
1564 */
1565 args[2] = task_is_exec_copy(parent_task) ? 1 : 0;
1566
1567 KDBG_RELEASE(TRACE_DATA_NEWTHREAD, (uintptr_t)thread_tid(new_thread),
1568 args[1], args[2], args[3]);
1569
1570 kdbg_trace_string(parent_task->bsd_info, &args[0], &args[1],
1571 &args[2], &args[3]);
1572 KDBG_RELEASE(TRACE_STRING_NEWTHREAD, args[0], args[1], args[2],
1573 args[3]);
1574 }
1575
1576 DTRACE_PROC1(lwp__create, thread_t, *out_thread);
1577
1578 return (KERN_SUCCESS);
1579}
1580
1581static kern_return_t
1582thread_create_internal2(
1583 task_t task,
1584 thread_t *new_thread,
1585 boolean_t from_user,
1586 thread_continue_t continuation)
1587{
1588 kern_return_t result;
1589 thread_t thread;
1590
1591 if (task == TASK_NULL || task == kernel_task)
1592 return (KERN_INVALID_ARGUMENT);
1593
1594 result = thread_create_internal(task, -1, continuation, NULL, TH_OPTION_NONE, &thread);
1595 if (result != KERN_SUCCESS)
1596 return (result);
1597
1598 thread->user_stop_count = 1;
1599 thread_hold(thread);
1600 if (task->suspend_count > 0)
1601 thread_hold(thread);
1602
1603 if (from_user)
1604 extmod_statistics_incr_thread_create(task);
1605
1606 task_unlock(task);
1607 lck_mtx_unlock(&tasks_threads_lock);
1608
1609 *new_thread = thread;
1610
1611 return (KERN_SUCCESS);
1612}
1613
1614/* No prototype, since task_server.h has the _from_user version if KERNEL_SERVER */
1615kern_return_t
1616thread_create(
1617 task_t task,
1618 thread_t *new_thread);
1619
1620kern_return_t
1621thread_create(
1622 task_t task,
1623 thread_t *new_thread)
1624{
1625 return thread_create_internal2(task, new_thread, FALSE, (thread_continue_t)thread_bootstrap_return);
1626}
1627
1628kern_return_t
1629thread_create_from_user(
1630 task_t task,
1631 thread_t *new_thread)
1632{
1633 return thread_create_internal2(task, new_thread, TRUE, (thread_continue_t)thread_bootstrap_return);
1634}
1635
1636kern_return_t
1637thread_create_with_continuation(
1638 task_t task,
1639 thread_t *new_thread,
1640 thread_continue_t continuation)
1641{
1642 return thread_create_internal2(task, new_thread, FALSE, continuation);
1643}
1644
1645/*
1646 * Create a thread that is already started, but is waiting on an event
1647 */
1648static kern_return_t
1649thread_create_waiting_internal(
1650 task_t task,
1651 thread_continue_t continuation,
1652 event_t event,
1653 block_hint_t block_hint,
1654 int options,
1655 thread_t *new_thread)
1656{
1657 kern_return_t result;
1658 thread_t thread;
1659
1660 if (task == TASK_NULL || task == kernel_task)
1661 return (KERN_INVALID_ARGUMENT);
1662
1663 result = thread_create_internal(task, -1, continuation, NULL,
1664 options, &thread);
1665 if (result != KERN_SUCCESS)
1666 return (result);
1667
1668 /* note no user_stop_count or thread_hold here */
1669
1670 if (task->suspend_count > 0)
1671 thread_hold(thread);
1672
1673 thread_mtx_lock(thread);
1674 thread_set_pending_block_hint(thread, block_hint);
1675 if (options & TH_OPTION_WORKQ) {
1676 thread->static_param = true;
1677 event = workq_thread_init_and_wq_lock(task, thread);
1678 }
1679 thread_start_in_assert_wait(thread, event, THREAD_INTERRUPTIBLE);
1680 thread_mtx_unlock(thread);
1681
1682 task_unlock(task);
1683 lck_mtx_unlock(&tasks_threads_lock);
1684
1685 *new_thread = thread;
1686
1687 return (KERN_SUCCESS);
1688}
1689
1690kern_return_t
1691thread_create_waiting(
1692 task_t task,
1693 thread_continue_t continuation,
1694 event_t event,
1695 thread_t *new_thread)
1696{
1697 return thread_create_waiting_internal(task, continuation, event,
1698 kThreadWaitNone, TH_OPTION_NONE, new_thread);
1699}
1700
1701
1702static kern_return_t
1703thread_create_running_internal2(
1704 task_t task,
1705 int flavor,
1706 thread_state_t new_state,
1707 mach_msg_type_number_t new_state_count,
1708 thread_t *new_thread,
1709 boolean_t from_user)
1710{
1711 kern_return_t result;
1712 thread_t thread;
1713
1714 if (task == TASK_NULL || task == kernel_task)
1715 return (KERN_INVALID_ARGUMENT);
1716
1717 result = thread_create_internal(task, -1,
1718 (thread_continue_t)thread_bootstrap_return, NULL,
1719 TH_OPTION_NONE, &thread);
1720 if (result != KERN_SUCCESS)
1721 return (result);
1722
1723 if (task->suspend_count > 0)
1724 thread_hold(thread);
1725
1726 if (from_user) {
1727 result = machine_thread_state_convert_from_user(thread, flavor,
1728 new_state, new_state_count);
1729 }
1730 if (result == KERN_SUCCESS) {
1731 result = machine_thread_set_state(thread, flavor, new_state,
1732 new_state_count);
1733 }
1734 if (result != KERN_SUCCESS) {
1735 task_unlock(task);
1736 lck_mtx_unlock(&tasks_threads_lock);
1737
1738 thread_terminate(thread);
1739 thread_deallocate(thread);
1740 return (result);
1741 }
1742
1743 thread_mtx_lock(thread);
1744 thread_start(thread);
1745 thread_mtx_unlock(thread);
1746
1747 if (from_user)
1748 extmod_statistics_incr_thread_create(task);
1749
1750 task_unlock(task);
1751 lck_mtx_unlock(&tasks_threads_lock);
1752
1753 *new_thread = thread;
1754
1755 return (result);
1756}
1757
1758/* Prototype, see justification above */
1759kern_return_t
1760thread_create_running(
1761 task_t task,
1762 int flavor,
1763 thread_state_t new_state,
1764 mach_msg_type_number_t new_state_count,
1765 thread_t *new_thread);
1766
1767kern_return_t
1768thread_create_running(
1769 task_t task,
1770 int flavor,
1771 thread_state_t new_state,
1772 mach_msg_type_number_t new_state_count,
1773 thread_t *new_thread)
1774{
1775 return thread_create_running_internal2(
1776 task, flavor, new_state, new_state_count,
1777 new_thread, FALSE);
1778}
1779
1780kern_return_t
1781thread_create_running_from_user(
1782 task_t task,
1783 int flavor,
1784 thread_state_t new_state,
1785 mach_msg_type_number_t new_state_count,
1786 thread_t *new_thread)
1787{
1788 return thread_create_running_internal2(
1789 task, flavor, new_state, new_state_count,
1790 new_thread, TRUE);
1791}
1792
1793kern_return_t
1794thread_create_workq_waiting(
1795 task_t task,
1796 thread_continue_t continuation,
1797 thread_t *new_thread)
1798{
1799 int options = TH_OPTION_NOCRED | TH_OPTION_NOSUSP | TH_OPTION_WORKQ;
1800 return thread_create_waiting_internal(task, continuation, NULL,
1801 kThreadWaitParkedWorkQueue, options, new_thread);
1802}
1803
1804/*
1805 * kernel_thread_create:
1806 *
1807 * Create a thread in the kernel task
1808 * to execute in kernel context.
1809 */
1810kern_return_t
1811kernel_thread_create(
1812 thread_continue_t continuation,
1813 void *parameter,
1814 integer_t priority,
1815 thread_t *new_thread)
1816{
1817 kern_return_t result;
1818 thread_t thread;
1819 task_t task = kernel_task;
1820
1821 result = thread_create_internal(task, priority, continuation, parameter,
1822 TH_OPTION_NOCRED | TH_OPTION_NONE, &thread);
1823 if (result != KERN_SUCCESS)
1824 return (result);
1825
1826 task_unlock(task);
1827 lck_mtx_unlock(&tasks_threads_lock);
1828
1829 stack_alloc(thread);
1830 assert(thread->kernel_stack != 0);
1831#if CONFIG_EMBEDDED
1832 if (priority > BASEPRI_KERNEL)
1833#endif
1834 thread->reserved_stack = thread->kernel_stack;
1835
1836if(debug_task & 1)
1837 kprintf("kernel_thread_create: thread = %p continuation = %p\n", thread, continuation);
1838 *new_thread = thread;
1839
1840 return (result);
1841}
1842
1843kern_return_t
1844kernel_thread_start_priority(
1845 thread_continue_t continuation,
1846 void *parameter,
1847 integer_t priority,
1848 thread_t *new_thread)
1849{
1850 kern_return_t result;
1851 thread_t thread;
1852
1853 result = kernel_thread_create(continuation, parameter, priority, &thread);
1854 if (result != KERN_SUCCESS)
1855 return (result);
1856
1857 *new_thread = thread;
1858
1859 thread_mtx_lock(thread);
1860 thread_start(thread);
1861 thread_mtx_unlock(thread);
1862
1863 return (result);
1864}
1865
1866kern_return_t
1867kernel_thread_start(
1868 thread_continue_t continuation,
1869 void *parameter,
1870 thread_t *new_thread)
1871{
1872 return kernel_thread_start_priority(continuation, parameter, -1, new_thread);
1873}
1874
1875/* Separated into helper function so it can be used by THREAD_BASIC_INFO and THREAD_EXTENDED_INFO */
1876/* it is assumed that the thread is locked by the caller */
1877static void
1878retrieve_thread_basic_info(thread_t thread, thread_basic_info_t basic_info)
1879{
1880 int state, flags;
1881
1882 /* fill in info */
1883
1884 thread_read_times(thread, &basic_info->user_time,
1885 &basic_info->system_time, NULL);
1886
1887 /*
1888 * Update lazy-evaluated scheduler info because someone wants it.
1889 */
1890 if (SCHED(can_update_priority)(thread))
1891 SCHED(update_priority)(thread);
1892
1893 basic_info->sleep_time = 0;
1894
1895 /*
1896 * To calculate cpu_usage, first correct for timer rate,
1897 * then for 5/8 ageing. The correction factor [3/5] is
1898 * (1/(5/8) - 1).
1899 */
1900 basic_info->cpu_usage = 0;
1901#if defined(CONFIG_SCHED_TIMESHARE_CORE)
1902 if (sched_tick_interval) {
1903 basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
1904 * TH_USAGE_SCALE) / sched_tick_interval);
1905 basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
1906 }
1907#endif
1908
1909 if (basic_info->cpu_usage > TH_USAGE_SCALE)
1910 basic_info->cpu_usage = TH_USAGE_SCALE;
1911
1912 basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
1913 POLICY_TIMESHARE: POLICY_RR);
1914
1915 flags = 0;
1916 if (thread->options & TH_OPT_IDLE_THREAD)
1917 flags |= TH_FLAGS_IDLE;
1918
1919 if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
1920 flags |= TH_FLAGS_GLOBAL_FORCED_IDLE;
1921 }
1922
1923 if (!thread->kernel_stack)
1924 flags |= TH_FLAGS_SWAPPED;
1925
1926 state = 0;
1927 if (thread->state & TH_TERMINATE)
1928 state = TH_STATE_HALTED;
1929 else
1930 if (thread->state & TH_RUN)
1931 state = TH_STATE_RUNNING;
1932 else
1933 if (thread->state & TH_UNINT)
1934 state = TH_STATE_UNINTERRUPTIBLE;
1935 else
1936 if (thread->state & TH_SUSP)
1937 state = TH_STATE_STOPPED;
1938 else
1939 if (thread->state & TH_WAIT)
1940 state = TH_STATE_WAITING;
1941
1942 basic_info->run_state = state;
1943 basic_info->flags = flags;
1944
1945 basic_info->suspend_count = thread->user_stop_count;
1946
1947 return;
1948}
1949
1950kern_return_t
1951thread_info_internal(
1952 thread_t thread,
1953 thread_flavor_t flavor,
1954 thread_info_t thread_info_out, /* ptr to OUT array */
1955 mach_msg_type_number_t *thread_info_count) /*IN/OUT*/
1956{
1957 spl_t s;
1958
1959 if (thread == THREAD_NULL)
1960 return (KERN_INVALID_ARGUMENT);
1961
1962 if (flavor == THREAD_BASIC_INFO) {
1963
1964 if (*thread_info_count < THREAD_BASIC_INFO_COUNT)
1965 return (KERN_INVALID_ARGUMENT);
1966
1967 s = splsched();
1968 thread_lock(thread);
1969
1970 retrieve_thread_basic_info(thread, (thread_basic_info_t) thread_info_out);
1971
1972 thread_unlock(thread);
1973 splx(s);
1974
1975 *thread_info_count = THREAD_BASIC_INFO_COUNT;
1976
1977 return (KERN_SUCCESS);
1978 }
1979 else
1980 if (flavor == THREAD_IDENTIFIER_INFO) {
1981 thread_identifier_info_t identifier_info;
1982
1983 if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT)
1984 return (KERN_INVALID_ARGUMENT);
1985
1986 identifier_info = (thread_identifier_info_t) thread_info_out;
1987
1988 s = splsched();
1989 thread_lock(thread);
1990
1991 identifier_info->thread_id = thread->thread_id;
1992 identifier_info->thread_handle = thread->machine.cthread_self;
1993 identifier_info->dispatch_qaddr = thread_dispatchqaddr(thread);
1994
1995 thread_unlock(thread);
1996 splx(s);
1997 return KERN_SUCCESS;
1998 }
1999 else
2000 if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
2001 policy_timeshare_info_t ts_info;
2002
2003 if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT)
2004 return (KERN_INVALID_ARGUMENT);
2005
2006 ts_info = (policy_timeshare_info_t)thread_info_out;
2007
2008 s = splsched();
2009 thread_lock(thread);
2010
2011 if (thread->sched_mode != TH_MODE_TIMESHARE) {
2012 thread_unlock(thread);
2013 splx(s);
2014 return (KERN_INVALID_POLICY);
2015 }
2016
2017 ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
2018 if (ts_info->depressed) {
2019 ts_info->base_priority = DEPRESSPRI;
2020 ts_info->depress_priority = thread->base_pri;
2021 }
2022 else {
2023 ts_info->base_priority = thread->base_pri;
2024 ts_info->depress_priority = -1;
2025 }
2026
2027 ts_info->cur_priority = thread->sched_pri;
2028 ts_info->max_priority = thread->max_priority;
2029
2030 thread_unlock(thread);
2031 splx(s);
2032
2033 *thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
2034
2035 return (KERN_SUCCESS);
2036 }
2037 else
2038 if (flavor == THREAD_SCHED_FIFO_INFO) {
2039 if (*thread_info_count < POLICY_FIFO_INFO_COUNT)
2040 return (KERN_INVALID_ARGUMENT);
2041
2042 return (KERN_INVALID_POLICY);
2043 }
2044 else
2045 if (flavor == THREAD_SCHED_RR_INFO) {
2046 policy_rr_info_t rr_info;
2047 uint32_t quantum_time;
2048 uint64_t quantum_ns;
2049
2050 if (*thread_info_count < POLICY_RR_INFO_COUNT)
2051 return (KERN_INVALID_ARGUMENT);
2052
2053 rr_info = (policy_rr_info_t) thread_info_out;
2054
2055 s = splsched();
2056 thread_lock(thread);
2057
2058 if (thread->sched_mode == TH_MODE_TIMESHARE) {
2059 thread_unlock(thread);
2060 splx(s);
2061
2062 return (KERN_INVALID_POLICY);
2063 }
2064
2065 rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
2066 if (rr_info->depressed) {
2067 rr_info->base_priority = DEPRESSPRI;
2068 rr_info->depress_priority = thread->base_pri;
2069 }
2070 else {
2071 rr_info->base_priority = thread->base_pri;
2072 rr_info->depress_priority = -1;
2073 }
2074
2075 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
2076 absolutetime_to_nanoseconds(quantum_time, &quantum_ns);
2077
2078 rr_info->max_priority = thread->max_priority;
2079 rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
2080
2081 thread_unlock(thread);
2082 splx(s);
2083
2084 *thread_info_count = POLICY_RR_INFO_COUNT;
2085
2086 return (KERN_SUCCESS);
2087 }
2088 else
2089 if (flavor == THREAD_EXTENDED_INFO) {
2090 thread_basic_info_data_t basic_info;
2091 thread_extended_info_t extended_info = (thread_extended_info_t) thread_info_out;
2092
2093 if (*thread_info_count < THREAD_EXTENDED_INFO_COUNT) {
2094 return (KERN_INVALID_ARGUMENT);
2095 }
2096
2097 s = splsched();
2098 thread_lock(thread);
2099
2100 /* NOTE: This mimics fill_taskthreadinfo(), which is the function used by proc_pidinfo() for
2101 * the PROC_PIDTHREADINFO flavor (which can't be used on corpses)
2102 */
2103 retrieve_thread_basic_info(thread, &basic_info);
2104 extended_info->pth_user_time = ((basic_info.user_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.user_time.microseconds * (integer_t)NSEC_PER_USEC));
2105 extended_info->pth_system_time = ((basic_info.system_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.system_time.microseconds * (integer_t)NSEC_PER_USEC));
2106
2107 extended_info->pth_cpu_usage = basic_info.cpu_usage;
2108 extended_info->pth_policy = basic_info.policy;
2109 extended_info->pth_run_state = basic_info.run_state;
2110 extended_info->pth_flags = basic_info.flags;
2111 extended_info->pth_sleep_time = basic_info.sleep_time;
2112 extended_info->pth_curpri = thread->sched_pri;
2113 extended_info->pth_priority = thread->base_pri;
2114 extended_info->pth_maxpriority = thread->max_priority;
2115
2116 bsd_getthreadname(thread->uthread,extended_info->pth_name);
2117
2118 thread_unlock(thread);
2119 splx(s);
2120
2121 *thread_info_count = THREAD_EXTENDED_INFO_COUNT;
2122
2123 return (KERN_SUCCESS);
2124 }
2125 else
2126 if (flavor == THREAD_DEBUG_INFO_INTERNAL) {
2127#if DEVELOPMENT || DEBUG
2128 thread_debug_info_internal_t dbg_info;
2129 if (*thread_info_count < THREAD_DEBUG_INFO_INTERNAL_COUNT)
2130 return (KERN_NOT_SUPPORTED);
2131
2132 if (thread_info_out == NULL)
2133 return (KERN_INVALID_ARGUMENT);
2134
2135 dbg_info = (thread_debug_info_internal_t) thread_info_out;
2136 dbg_info->page_creation_count = thread->t_page_creation_count;
2137
2138 *thread_info_count = THREAD_DEBUG_INFO_INTERNAL_COUNT;
2139 return (KERN_SUCCESS);
2140#endif /* DEVELOPMENT || DEBUG */
2141 return (KERN_NOT_SUPPORTED);
2142 }
2143
2144 return (KERN_INVALID_ARGUMENT);
2145}
2146
2147void
2148thread_read_times(
2149 thread_t thread,
2150 time_value_t *user_time,
2151 time_value_t *system_time,
2152 time_value_t *runnable_time)
2153{
2154 clock_sec_t secs;
2155 clock_usec_t usecs;
2156 uint64_t tval_user, tval_system;
2157
2158 tval_user = timer_grab(&thread->user_timer);
2159 tval_system = timer_grab(&thread->system_timer);
2160
2161 if (thread->precise_user_kernel_time) {
2162 absolutetime_to_microtime(tval_user, &secs, &usecs);
2163 user_time->seconds = (typeof(user_time->seconds))secs;
2164 user_time->microseconds = usecs;
2165
2166 absolutetime_to_microtime(tval_system, &secs, &usecs);
2167 system_time->seconds = (typeof(system_time->seconds))secs;
2168 system_time->microseconds = usecs;
2169 } else {
2170 /* system_timer may represent either sys or user */
2171 tval_user += tval_system;
2172 absolutetime_to_microtime(tval_user, &secs, &usecs);
2173 user_time->seconds = (typeof(user_time->seconds))secs;
2174 user_time->microseconds = usecs;
2175
2176 system_time->seconds = 0;
2177 system_time->microseconds = 0;
2178 }
2179
2180 if (runnable_time) {
2181 uint64_t tval_runnable = timer_grab(&thread->runnable_timer);
2182 absolutetime_to_microtime(tval_runnable, &secs, &usecs);
2183 runnable_time->seconds = (typeof(runnable_time->seconds))secs;
2184 runnable_time->microseconds = usecs;
2185 }
2186}
2187
2188uint64_t thread_get_runtime_self(void)
2189{
2190 boolean_t interrupt_state;
2191 uint64_t runtime;
2192 thread_t thread = NULL;
2193 processor_t processor = NULL;
2194
2195 thread = current_thread();
2196
2197 /* Not interrupt safe, as the scheduler may otherwise update timer values underneath us */
2198 interrupt_state = ml_set_interrupts_enabled(FALSE);
2199 processor = current_processor();
2200 timer_update(PROCESSOR_DATA(processor, thread_timer), mach_absolute_time());
2201 runtime = (timer_grab(&thread->user_timer) + timer_grab(&thread->system_timer));
2202 ml_set_interrupts_enabled(interrupt_state);
2203
2204 return runtime;
2205}
2206
2207kern_return_t
2208thread_assign(
2209 __unused thread_t thread,
2210 __unused processor_set_t new_pset)
2211{
2212 return (KERN_FAILURE);
2213}
2214
2215/*
2216 * thread_assign_default:
2217 *
2218 * Special version of thread_assign for assigning threads to default
2219 * processor set.
2220 */
2221kern_return_t
2222thread_assign_default(
2223 thread_t thread)
2224{
2225 return (thread_assign(thread, &pset0));
2226}
2227
2228/*
2229 * thread_get_assignment
2230 *
2231 * Return current assignment for this thread.
2232 */
2233kern_return_t
2234thread_get_assignment(
2235 thread_t thread,
2236 processor_set_t *pset)
2237{
2238 if (thread == NULL)
2239 return (KERN_INVALID_ARGUMENT);
2240
2241 *pset = &pset0;
2242
2243 return (KERN_SUCCESS);
2244}
2245
2246/*
2247 * thread_wire_internal:
2248 *
2249 * Specify that the target thread must always be able
2250 * to run and to allocate memory.
2251 */
2252kern_return_t
2253thread_wire_internal(
2254 host_priv_t host_priv,
2255 thread_t thread,
2256 boolean_t wired,
2257 boolean_t *prev_state)
2258{
2259 if (host_priv == NULL || thread != current_thread())
2260 return (KERN_INVALID_ARGUMENT);
2261
2262 assert(host_priv == &realhost);
2263
2264 if (prev_state)
2265 *prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
2266
2267 if (wired) {
2268 if (!(thread->options & TH_OPT_VMPRIV))
2269 vm_page_free_reserve(1); /* XXX */
2270 thread->options |= TH_OPT_VMPRIV;
2271 }
2272 else {
2273 if (thread->options & TH_OPT_VMPRIV)
2274 vm_page_free_reserve(-1); /* XXX */
2275 thread->options &= ~TH_OPT_VMPRIV;
2276 }
2277
2278 return (KERN_SUCCESS);
2279}
2280
2281
2282/*
2283 * thread_wire:
2284 *
2285 * User-api wrapper for thread_wire_internal()
2286 */
2287kern_return_t
2288thread_wire(
2289 host_priv_t host_priv,
2290 thread_t thread,
2291 boolean_t wired)
2292{
2293 return (thread_wire_internal(host_priv, thread, wired, NULL));
2294}
2295
2296
2297boolean_t
2298is_vm_privileged(void)
2299{
2300 return current_thread()->options & TH_OPT_VMPRIV ? TRUE : FALSE;
2301}
2302
2303boolean_t
2304set_vm_privilege(boolean_t privileged)
2305{
2306 boolean_t was_vmpriv;
2307
2308 if (current_thread()->options & TH_OPT_VMPRIV)
2309 was_vmpriv = TRUE;
2310 else
2311 was_vmpriv = FALSE;
2312
2313 if (privileged != FALSE)
2314 current_thread()->options |= TH_OPT_VMPRIV;
2315 else
2316 current_thread()->options &= ~TH_OPT_VMPRIV;
2317
2318 return (was_vmpriv);
2319}
2320
2321void
2322set_thread_rwlock_boost(void)
2323{
2324 current_thread()->rwlock_count++;
2325}
2326
2327void
2328clear_thread_rwlock_boost(void)
2329{
2330 thread_t thread = current_thread();
2331
2332 if ((thread->rwlock_count-- == 1) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2333
2334 lck_rw_clear_promotion(thread, 0);
2335 }
2336}
2337
2338
2339/*
2340 * XXX assuming current thread only, for now...
2341 */
2342void
2343thread_guard_violation(thread_t thread,
2344 mach_exception_data_type_t code, mach_exception_data_type_t subcode)
2345{
2346 assert(thread == current_thread());
2347
2348 /* don't set up the AST for kernel threads */
2349 if (thread->task == kernel_task)
2350 return;
2351
2352 spl_t s = splsched();
2353 /*
2354 * Use the saved state area of the thread structure
2355 * to store all info required to handle the AST when
2356 * returning to userspace
2357 */
2358 assert(EXC_GUARD_DECODE_GUARD_TYPE(code));
2359 thread->guard_exc_info.code = code;
2360 thread->guard_exc_info.subcode = subcode;
2361 thread_ast_set(thread, AST_GUARD);
2362 ast_propagate(thread);
2363
2364 splx(s);
2365}
2366
2367/*
2368 * guard_ast:
2369 *
2370 * Handle AST_GUARD for a thread. This routine looks at the
2371 * state saved in the thread structure to determine the cause
2372 * of this exception. Based on this value, it invokes the
2373 * appropriate routine which determines other exception related
2374 * info and raises the exception.
2375 */
2376void
2377guard_ast(thread_t t)
2378{
2379 const mach_exception_data_type_t
2380 code = t->guard_exc_info.code,
2381 subcode = t->guard_exc_info.subcode;
2382
2383 t->guard_exc_info.code = 0;
2384 t->guard_exc_info.subcode = 0;
2385
2386 switch (EXC_GUARD_DECODE_GUARD_TYPE(code)) {
2387 case GUARD_TYPE_NONE:
2388 /* lingering AST_GUARD on the processor? */
2389 break;
2390 case GUARD_TYPE_MACH_PORT:
2391 mach_port_guard_ast(t, code, subcode);
2392 break;
2393 case GUARD_TYPE_FD:
2394 fd_guard_ast(t, code, subcode);
2395 break;
2396#if CONFIG_VNGUARD
2397 case GUARD_TYPE_VN:
2398 vn_guard_ast(t, code, subcode);
2399 break;
2400#endif
2401 case GUARD_TYPE_VIRT_MEMORY:
2402 virt_memory_guard_ast(t, code, subcode);
2403 break;
2404 default:
2405 panic("guard_exc_info %llx %llx", code, subcode);
2406 }
2407}
2408
2409static void
2410thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1)
2411{
2412 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
2413#if CONFIG_TELEMETRY
2414 /*
2415 * This thread is in danger of violating the CPU usage monitor. Enable telemetry
2416 * on the entire task so there are micro-stackshots available if and when
2417 * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
2418 * for this thread only; but now that this task is suspect, knowing what all of
2419 * its threads are up to will be useful.
2420 */
2421 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1);
2422#endif
2423 return;
2424 }
2425
2426#if CONFIG_TELEMETRY
2427 /*
2428 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
2429 * exceeded the limit, turn telemetry off for the task.
2430 */
2431 telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0);
2432#endif
2433
2434 if (warning == 0) {
2435 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU();
2436 }
2437}
2438
2439void __attribute__((noinline))
2440SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void)
2441{
2442 int pid = 0;
2443 task_t task = current_task();
2444 thread_t thread = current_thread();
2445 uint64_t tid = thread->thread_id;
2446 const char *procname = "unknown";
2447 time_value_t thread_total_time = {0, 0};
2448 time_value_t thread_system_time;
2449 time_value_t thread_user_time;
2450 int action;
2451 uint8_t percentage;
2452 uint32_t usage_percent = 0;
2453 uint32_t interval_sec;
2454 uint64_t interval_ns;
2455 uint64_t balance_ns;
2456 boolean_t fatal = FALSE;
2457 boolean_t send_exc_resource = TRUE; /* in addition to RESOURCE_NOTIFY */
2458 kern_return_t kr;
2459
2460#ifdef EXC_RESOURCE_MONITORS
2461 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2462#endif /* EXC_RESOURCE_MONITORS */
2463 struct ledger_entry_info lei;
2464
2465 assert(thread->t_threadledger != LEDGER_NULL);
2466
2467 /*
2468 * Extract the fatal bit and suspend the monitor (which clears the bit).
2469 */
2470 task_lock(task);
2471 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) {
2472 fatal = TRUE;
2473 send_exc_resource = TRUE;
2474 }
2475 /* Only one thread can be here at a time. Whichever makes it through
2476 first will successfully suspend the monitor and proceed to send the
2477 notification. Other threads will get an error trying to suspend the
2478 monitor and give up on sending the notification. In the first release,
2479 the monitor won't be resumed for a number of seconds, but we may
2480 eventually need to handle low-latency resume.
2481 */
2482 kr = task_suspend_cpumon(task);
2483 task_unlock(task);
2484 if (kr == KERN_INVALID_ARGUMENT) return;
2485
2486#ifdef MACH_BSD
2487 pid = proc_selfpid();
2488 if (task->bsd_info != NULL) {
2489 procname = proc_name_address(task->bsd_info);
2490 }
2491#endif
2492
2493 thread_get_cpulimit(&action, &percentage, &interval_ns);
2494
2495 interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC);
2496
2497 thread_read_times(thread, &thread_user_time, &thread_system_time, NULL);
2498 time_value_add(&thread_total_time, &thread_user_time);
2499 time_value_add(&thread_total_time, &thread_system_time);
2500 ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei);
2501
2502 /* credit/debit/balance/limit are in absolute time units;
2503 the refill info is in nanoseconds. */
2504 absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns);
2505 if (lei.lei_last_refill > 0) {
2506 usage_percent = (uint32_t)((balance_ns*100ULL) / lei.lei_last_refill);
2507 }
2508
2509 /* TODO: show task total runtime (via TASK_ABSOLUTETIME_INFO)? */
2510 printf("process %s[%d] thread %llu caught burning CPU! "
2511 "It used more than %d%% CPU over %u seconds "
2512 "(actual recent usage: %d%% over ~%llu seconds). "
2513 "Thread lifetime cpu usage %d.%06ds, (%d.%06d user, %d.%06d sys) "
2514 "ledger balance: %lld mabs credit: %lld mabs debit: %lld mabs "
2515 "limit: %llu mabs period: %llu ns last refill: %llu ns%s.\n",
2516 procname, pid, tid,
2517 percentage, interval_sec,
2518 usage_percent,
2519 (lei.lei_last_refill + NSEC_PER_SEC/2) / NSEC_PER_SEC,
2520 thread_total_time.seconds, thread_total_time.microseconds,
2521 thread_user_time.seconds, thread_user_time.microseconds,
2522 thread_system_time.seconds,thread_system_time.microseconds,
2523 lei.lei_balance, lei.lei_credit, lei.lei_debit,
2524 lei.lei_limit, lei.lei_refill_period, lei.lei_last_refill,
2525 (fatal ? " [fatal violation]" : ""));
2526
2527 /*
2528 For now, send RESOURCE_NOTIFY in parallel with EXC_RESOURCE. Once
2529 we have logging parity, we will stop sending EXC_RESOURCE (24508922).
2530 */
2531
2532 /* RESOURCE_NOTIFY MIG specifies nanoseconds of CPU time */
2533 lei.lei_balance = balance_ns;
2534 absolutetime_to_nanoseconds(lei.lei_limit, &lei.lei_limit);
2535 trace_resource_violation(RMON_CPUUSAGE_VIOLATED, &lei);
2536 kr = send_resource_violation(send_cpu_usage_violation, task, &lei,
2537 fatal ? kRNFatalLimitFlag : 0);
2538 if (kr) {
2539 printf("send_resource_violation(CPU usage, ...): error %#x\n", kr);
2540 }
2541
2542#ifdef EXC_RESOURCE_MONITORS
2543 if (send_exc_resource) {
2544 if (disable_exc_resource) {
2545 printf("process %s[%d] thread %llu caught burning CPU! "
2546 "EXC_RESOURCE%s supressed by a boot-arg\n",
2547 procname, pid, tid, fatal ? " (and termination)" : "");
2548 return;
2549 }
2550
2551 if (audio_active) {
2552 printf("process %s[%d] thread %llu caught burning CPU! "
2553 "EXC_RESOURCE & termination supressed due to audio playback\n",
2554 procname, pid, tid);
2555 return;
2556 }
2557 }
2558
2559
2560 if (send_exc_resource) {
2561 code[0] = code[1] = 0;
2562 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU);
2563 if (fatal) {
2564 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR_FATAL);
2565 }else {
2566 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR);
2567 }
2568 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec);
2569 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], percentage);
2570 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent);
2571 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
2572 }
2573#endif /* EXC_RESOURCE_MONITORS */
2574
2575 if (fatal) {
2576#if CONFIG_JETSAM
2577 jetsam_on_ledger_cpulimit_exceeded();
2578#else
2579 task_terminate_internal(task);
2580#endif
2581 }
2582}
2583
2584#if DEVELOPMENT || DEBUG
2585void __attribute__((noinline)) SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t task, int thread_count)
2586{
2587 mach_exception_data_type_t code[EXCEPTION_CODE_MAX] = {0};
2588 int pid = task_pid(task);
2589 char procname[MAXCOMLEN+1] = "unknown";
2590
2591 if (pid == 1) {
2592 /*
2593 * Cannot suspend launchd
2594 */
2595 return;
2596 }
2597
2598 proc_name(pid, procname, sizeof(procname));
2599
2600 if (disable_exc_resource) {
2601 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2602 "supressed by a boot-arg. \n", procname, pid, thread_count);
2603 return;
2604 }
2605
2606 if (audio_active) {
2607 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2608 "supressed due to audio playback.\n", procname, pid, thread_count);
2609 return;
2610 }
2611
2612 if (exc_via_corpse_forking == 0) {
2613 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2614 "supressed due to corpse forking being disabled.\n", procname, pid,
2615 thread_count);
2616 return;
2617 }
2618
2619 printf("process %s[%d] crossed thread count high watermark (%d), sending "
2620 "EXC_RESOURCE\n", procname, pid, thread_count);
2621
2622 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_THREADS);
2623 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_THREADS_HIGH_WATERMARK);
2624 EXC_RESOURCE_THREADS_ENCODE_THREADS(code[0], thread_count);
2625
2626 task_enqueue_exception_with_corpse(task, EXC_RESOURCE, code, EXCEPTION_CODE_MAX, NULL);
2627}
2628#endif /* DEVELOPMENT || DEBUG */
2629
2630void thread_update_io_stats(thread_t thread, int size, int io_flags)
2631{
2632 int io_tier;
2633
2634 if (thread->thread_io_stats == NULL || thread->task->task_io_stats == NULL)
2635 return;
2636
2637 if (io_flags & DKIO_READ) {
2638 UPDATE_IO_STATS(thread->thread_io_stats->disk_reads, size);
2639 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->disk_reads, size);
2640 }
2641
2642 if (io_flags & DKIO_META) {
2643 UPDATE_IO_STATS(thread->thread_io_stats->metadata, size);
2644 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->metadata, size);
2645 }
2646
2647 if (io_flags & DKIO_PAGING) {
2648 UPDATE_IO_STATS(thread->thread_io_stats->paging, size);
2649 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->paging, size);
2650 }
2651
2652 io_tier = ((io_flags & DKIO_TIER_MASK) >> DKIO_TIER_SHIFT);
2653 assert (io_tier < IO_NUM_PRIORITIES);
2654
2655 UPDATE_IO_STATS(thread->thread_io_stats->io_priority[io_tier], size);
2656 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->io_priority[io_tier], size);
2657
2658 /* Update Total I/O Counts */
2659 UPDATE_IO_STATS(thread->thread_io_stats->total_io, size);
2660 UPDATE_IO_STATS_ATOMIC(thread->task->task_io_stats->total_io, size);
2661
2662 if (!(io_flags & DKIO_READ)) {
2663 DTRACE_IO3(physical_writes, struct task *, thread->task, uint32_t, size, int, io_flags);
2664 ledger_credit(thread->task->ledger, task_ledgers.physical_writes, size);
2665 }
2666}
2667
2668static void
2669init_thread_ledgers(void) {
2670 ledger_template_t t;
2671 int idx;
2672
2673 assert(thread_ledger_template == NULL);
2674
2675 if ((t = ledger_template_create("Per-thread ledger")) == NULL)
2676 panic("couldn't create thread ledger template");
2677
2678 if ((idx = ledger_entry_add(t, "cpu_time", "sched", "ns")) < 0) {
2679 panic("couldn't create cpu_time entry for thread ledger template");
2680 }
2681
2682 if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) {
2683 panic("couldn't set thread ledger callback for cpu_time entry");
2684 }
2685
2686 thread_ledgers.cpu_time = idx;
2687
2688 ledger_template_complete(t);
2689 thread_ledger_template = t;
2690}
2691
2692/*
2693 * Returns currently applied CPU usage limit, or 0/0 if none is applied.
2694 */
2695int
2696thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns)
2697{
2698 int64_t abstime = 0;
2699 uint64_t limittime = 0;
2700 thread_t thread = current_thread();
2701
2702 *percentage = 0;
2703 *interval_ns = 0;
2704 *action = 0;
2705
2706 if (thread->t_threadledger == LEDGER_NULL) {
2707 /*
2708 * This thread has no per-thread ledger, so it can't possibly
2709 * have a CPU limit applied.
2710 */
2711 return (KERN_SUCCESS);
2712 }
2713
2714 ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns);
2715 ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime);
2716
2717 if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) {
2718 /*
2719 * This thread's CPU time ledger has no period or limit; so it
2720 * doesn't have a CPU limit applied.
2721 */
2722 return (KERN_SUCCESS);
2723 }
2724
2725 /*
2726 * This calculation is the converse to the one in thread_set_cpulimit().
2727 */
2728 absolutetime_to_nanoseconds(abstime, &limittime);
2729 *percentage = (limittime * 100ULL) / *interval_ns;
2730 assert(*percentage <= 100);
2731
2732 if (thread->options & TH_OPT_PROC_CPULIMIT) {
2733 assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0);
2734
2735 *action = THREAD_CPULIMIT_BLOCK;
2736 } else if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2737 assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
2738
2739 *action = THREAD_CPULIMIT_EXCEPTION;
2740 } else {
2741 *action = THREAD_CPULIMIT_DISABLE;
2742 }
2743
2744 return (KERN_SUCCESS);
2745}
2746
2747/*
2748 * Set CPU usage limit on a thread.
2749 *
2750 * Calling with percentage of 0 will unset the limit for this thread.
2751 */
2752int
2753thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
2754{
2755 thread_t thread = current_thread();
2756 ledger_t l;
2757 uint64_t limittime = 0;
2758 uint64_t abstime = 0;
2759
2760 assert(percentage <= 100);
2761
2762 if (action == THREAD_CPULIMIT_DISABLE) {
2763 /*
2764 * Remove CPU limit, if any exists.
2765 */
2766 if (thread->t_threadledger != LEDGER_NULL) {
2767 l = thread->t_threadledger;
2768 ledger_set_limit(l, thread_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
2769 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_IGNORE);
2770 thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT);
2771 }
2772
2773 return (0);
2774 }
2775
2776 if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) {
2777 return (KERN_INVALID_ARGUMENT);
2778 }
2779
2780 l = thread->t_threadledger;
2781 if (l == LEDGER_NULL) {
2782 /*
2783 * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
2784 */
2785 if ((l = ledger_instantiate(thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL)
2786 return (KERN_RESOURCE_SHORTAGE);
2787
2788 /*
2789 * We are the first to create this thread's ledger, so only activate our entry.
2790 */
2791 ledger_entry_setactive(l, thread_ledgers.cpu_time);
2792 thread->t_threadledger = l;
2793 }
2794
2795 /*
2796 * The limit is specified as a percentage of CPU over an interval in nanoseconds.
2797 * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
2798 */
2799 limittime = (interval_ns * percentage) / 100;
2800 nanoseconds_to_absolutetime(limittime, &abstime);
2801 ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct);
2802 /*
2803 * Refill the thread's allotted CPU time every interval_ns nanoseconds.
2804 */
2805 ledger_set_period(l, thread_ledgers.cpu_time, interval_ns);
2806
2807 if (action == THREAD_CPULIMIT_EXCEPTION) {
2808 /*
2809 * We don't support programming the CPU usage monitor on a task if any of its
2810 * threads have a per-thread blocking CPU limit configured.
2811 */
2812 if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2813 panic("CPU usage monitor activated, but blocking thread limit exists");
2814 }
2815
2816 /*
2817 * Make a note that this thread's CPU limit is being used for the task-wide CPU
2818 * usage monitor. We don't have to arm the callback which will trigger the
2819 * exception, because that was done for us in ledger_instantiate (because the
2820 * ledger template used has a default callback).
2821 */
2822 thread->options |= TH_OPT_PROC_CPULIMIT;
2823 } else {
2824 /*
2825 * We deliberately override any CPU limit imposed by a task-wide limit (eg
2826 * CPU usage monitor).
2827 */
2828 thread->options &= ~TH_OPT_PROC_CPULIMIT;
2829
2830 thread->options |= TH_OPT_PRVT_CPULIMIT;
2831 /* The per-thread ledger template by default has a callback for CPU time */
2832 ledger_disable_callback(l, thread_ledgers.cpu_time);
2833 ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
2834 }
2835
2836 return (0);
2837}
2838
2839void
2840thread_sched_call(
2841 thread_t thread,
2842 sched_call_t call)
2843{
2844 assert((thread->state & TH_WAIT_REPORT) == 0);
2845 thread->sched_call = call;
2846}
2847
2848uint64_t
2849thread_tid(
2850 thread_t thread)
2851{
2852 return (thread != THREAD_NULL? thread->thread_id: 0);
2853}
2854
2855uint16_t
2856thread_set_tag(thread_t th, uint16_t tag)
2857{
2858 return thread_set_tag_internal(th, tag);
2859}
2860
2861uint16_t
2862thread_get_tag(thread_t th)
2863{
2864 return thread_get_tag_internal(th);
2865}
2866
2867uint64_t
2868thread_last_run_time(thread_t th)
2869{
2870 return th->last_run_time;
2871}
2872
2873uint64_t
2874thread_dispatchqaddr(
2875 thread_t thread)
2876{
2877 uint64_t dispatchqueue_addr;
2878 uint64_t thread_handle;
2879
2880 if (thread == THREAD_NULL)
2881 return 0;
2882
2883 thread_handle = thread->machine.cthread_self;
2884 if (thread_handle == 0)
2885 return 0;
2886
2887 if (thread->inspection == TRUE)
2888 dispatchqueue_addr = thread_handle + get_task_dispatchqueue_offset(thread->task);
2889 else if (thread->task->bsd_info)
2890 dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(thread->task->bsd_info);
2891 else
2892 dispatchqueue_addr = 0;
2893
2894 return dispatchqueue_addr;
2895}
2896
2897uint64_t
2898thread_rettokern_addr(
2899 thread_t thread)
2900{
2901 uint64_t rettokern_addr;
2902 uint64_t rettokern_offset;
2903 uint64_t thread_handle;
2904
2905 if (thread == THREAD_NULL)
2906 return 0;
2907
2908 thread_handle = thread->machine.cthread_self;
2909 if (thread_handle == 0)
2910 return 0;
2911
2912 if (thread->task->bsd_info) {
2913 rettokern_offset = get_return_to_kernel_offset_from_proc(thread->task->bsd_info);
2914
2915 /* Return 0 if return to kernel offset is not initialized. */
2916 if (rettokern_offset == 0) {
2917 rettokern_addr = 0;
2918 } else {
2919 rettokern_addr = thread_handle + rettokern_offset;
2920 }
2921 } else {
2922 rettokern_addr = 0;
2923 }
2924
2925 return rettokern_addr;
2926}
2927
2928/*
2929 * Export routines to other components for things that are done as macros
2930 * within the osfmk component.
2931 */
2932
2933#undef thread_mtx_lock
2934void thread_mtx_lock(thread_t thread);
2935void
2936thread_mtx_lock(thread_t thread)
2937{
2938 lck_mtx_lock(&thread->mutex);
2939}
2940
2941#undef thread_mtx_unlock
2942void thread_mtx_unlock(thread_t thread);
2943void
2944thread_mtx_unlock(thread_t thread)
2945{
2946 lck_mtx_unlock(&thread->mutex);
2947}
2948
2949#undef thread_reference
2950void thread_reference(thread_t thread);
2951void
2952thread_reference(
2953 thread_t thread)
2954{
2955 if (thread != THREAD_NULL)
2956 thread_reference_internal(thread);
2957}
2958
2959#undef thread_should_halt
2960
2961boolean_t
2962thread_should_halt(
2963 thread_t th)
2964{
2965 return (thread_should_halt_fast(th));
2966}
2967
2968/*
2969 * thread_set_voucher_name - reset the voucher port name bound to this thread
2970 *
2971 * Conditions: nothing locked
2972 *
2973 * If we already converted the previous name to a cached voucher
2974 * reference, then we discard that reference here. The next lookup
2975 * will cache it again.
2976 */
2977
2978kern_return_t
2979thread_set_voucher_name(mach_port_name_t voucher_name)
2980{
2981 thread_t thread = current_thread();
2982 ipc_voucher_t new_voucher = IPC_VOUCHER_NULL;
2983 ipc_voucher_t voucher;
2984 ledger_t bankledger = NULL;
2985 struct thread_group *banktg = NULL;
2986
2987 if (MACH_PORT_DEAD == voucher_name)
2988 return KERN_INVALID_RIGHT;
2989
2990 /*
2991 * agressively convert to voucher reference
2992 */
2993 if (MACH_PORT_VALID(voucher_name)) {
2994 new_voucher = convert_port_name_to_voucher(voucher_name);
2995 if (IPC_VOUCHER_NULL == new_voucher)
2996 return KERN_INVALID_ARGUMENT;
2997 }
2998 bank_get_bank_ledger_and_thread_group(new_voucher, &bankledger, &banktg);
2999
3000 thread_mtx_lock(thread);
3001 voucher = thread->ith_voucher;
3002 thread->ith_voucher_name = voucher_name;
3003 thread->ith_voucher = new_voucher;
3004 thread_mtx_unlock(thread);
3005
3006 bank_swap_thread_bank_ledger(thread, bankledger);
3007
3008 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3009 MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
3010 (uintptr_t)thread_tid(thread),
3011 (uintptr_t)voucher_name,
3012 VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
3013 1, 0);
3014
3015 if (IPC_VOUCHER_NULL != voucher)
3016 ipc_voucher_release(voucher);
3017
3018 return KERN_SUCCESS;
3019}
3020
3021/*
3022 * thread_get_mach_voucher - return a voucher reference for the specified thread voucher
3023 *
3024 * Conditions: nothing locked
3025 *
3026 * A reference to the voucher may be lazily pending, if someone set the voucher name
3027 * but nobody has done a lookup yet. In that case, we'll have to do the equivalent
3028 * lookup here.
3029 *
3030 * NOTE: At the moment, there is no distinction between the current and effective
3031 * vouchers because we only set them at the thread level currently.
3032 */
3033kern_return_t
3034thread_get_mach_voucher(
3035 thread_act_t thread,
3036 mach_voucher_selector_t __unused which,
3037 ipc_voucher_t *voucherp)
3038{
3039 ipc_voucher_t voucher;
3040 mach_port_name_t voucher_name;
3041
3042 if (THREAD_NULL == thread)
3043 return KERN_INVALID_ARGUMENT;
3044
3045 thread_mtx_lock(thread);
3046 voucher = thread->ith_voucher;
3047
3048 /* if already cached, just return a ref */
3049 if (IPC_VOUCHER_NULL != voucher) {
3050 ipc_voucher_reference(voucher);
3051 thread_mtx_unlock(thread);
3052 *voucherp = voucher;
3053 return KERN_SUCCESS;
3054 }
3055
3056 voucher_name = thread->ith_voucher_name;
3057
3058 /* convert the name to a port, then voucher reference */
3059 if (MACH_PORT_VALID(voucher_name)) {
3060 ipc_port_t port;
3061
3062 if (KERN_SUCCESS !=
3063 ipc_object_copyin(thread->task->itk_space, voucher_name,
3064 MACH_MSG_TYPE_COPY_SEND, (ipc_object_t *)&port)) {
3065 thread->ith_voucher_name = MACH_PORT_NULL;
3066 thread_mtx_unlock(thread);
3067 *voucherp = IPC_VOUCHER_NULL;
3068 return KERN_SUCCESS;
3069 }
3070
3071 /* convert to a voucher ref to return, and cache a ref on thread */
3072 voucher = convert_port_to_voucher(port);
3073 ipc_voucher_reference(voucher);
3074 thread->ith_voucher = voucher;
3075 thread_mtx_unlock(thread);
3076
3077 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3078 MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
3079 (uintptr_t)thread_tid(thread),
3080 (uintptr_t)port,
3081 VM_KERNEL_ADDRPERM((uintptr_t)voucher),
3082 2, 0);
3083
3084
3085 ipc_port_release_send(port);
3086 } else
3087 thread_mtx_unlock(thread);
3088
3089 *voucherp = voucher;
3090 return KERN_SUCCESS;
3091}
3092
3093/*
3094 * thread_set_mach_voucher - set a voucher reference for the specified thread voucher
3095 *
3096 * Conditions: callers holds a reference on the voucher.
3097 * nothing locked.
3098 *
3099 * We grab another reference to the voucher and bind it to the thread. Any lazy
3100 * binding is erased. The old voucher reference associated with the thread is
3101 * discarded.
3102 */
3103kern_return_t
3104thread_set_mach_voucher(
3105 thread_t thread,
3106 ipc_voucher_t voucher)
3107{
3108 ipc_voucher_t old_voucher;
3109 ledger_t bankledger = NULL;
3110 struct thread_group *banktg = NULL;
3111
3112 if (THREAD_NULL == thread)
3113 return KERN_INVALID_ARGUMENT;
3114
3115 if (thread != current_thread() && thread->started)
3116 return KERN_INVALID_ARGUMENT;
3117
3118 ipc_voucher_reference(voucher);
3119 bank_get_bank_ledger_and_thread_group(voucher, &bankledger, &banktg);
3120
3121 thread_mtx_lock(thread);
3122 old_voucher = thread->ith_voucher;
3123 thread->ith_voucher = voucher;
3124 thread->ith_voucher_name = MACH_PORT_NULL;
3125 thread_mtx_unlock(thread);
3126
3127 bank_swap_thread_bank_ledger(thread, bankledger);
3128
3129 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3130 MACHDBG_CODE(DBG_MACH_IPC,MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
3131 (uintptr_t)thread_tid(thread),
3132 (uintptr_t)MACH_PORT_NULL,
3133 VM_KERNEL_ADDRPERM((uintptr_t)voucher),
3134 3, 0);
3135
3136 ipc_voucher_release(old_voucher);
3137
3138 return KERN_SUCCESS;
3139}
3140
3141/*
3142 * thread_swap_mach_voucher - swap a voucher reference for the specified thread voucher
3143 *
3144 * Conditions: callers holds a reference on the new and presumed old voucher(s).
3145 * nothing locked.
3146 *
3147 * This function is no longer supported.
3148 */
3149kern_return_t
3150thread_swap_mach_voucher(
3151 __unused thread_t thread,
3152 __unused ipc_voucher_t new_voucher,
3153 ipc_voucher_t *in_out_old_voucher)
3154{
3155 /*
3156 * Currently this function is only called from a MIG generated
3157 * routine which doesn't release the reference on the voucher
3158 * addressed by in_out_old_voucher. To avoid leaking this reference,
3159 * a call to release it has been added here.
3160 */
3161 ipc_voucher_release(*in_out_old_voucher);
3162 return KERN_NOT_SUPPORTED;
3163}
3164
3165/*
3166 * thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher.
3167 */
3168kern_return_t
3169thread_get_current_voucher_origin_pid(
3170 int32_t *pid)
3171{
3172 uint32_t buf_size;
3173 kern_return_t kr;
3174 thread_t thread = current_thread();
3175
3176 buf_size = sizeof(*pid);
3177 kr = mach_voucher_attr_command(thread->ith_voucher,
3178 MACH_VOUCHER_ATTR_KEY_BANK,
3179 BANK_ORIGINATOR_PID,
3180 NULL,
3181 0,
3182 (mach_voucher_attr_content_t)pid,
3183 &buf_size);
3184
3185 return kr;
3186}
3187
3188
3189boolean_t
3190thread_has_thread_name(thread_t th)
3191{
3192 if ((th) && (th->uthread)) {
3193 return bsd_hasthreadname(th->uthread);
3194 }
3195
3196 /*
3197 * This is an odd case; clients may set the thread name based on the lack of
3198 * a name, but in this context there is no uthread to attach the name to.
3199 */
3200 return FALSE;
3201}
3202
3203void
3204thread_set_thread_name(thread_t th, const char* name)
3205{
3206 if ((th) && (th->uthread) && name) {
3207 bsd_setthreadname(th->uthread, name);
3208 }
3209}
3210
3211void
3212thread_set_honor_qlimit(thread_t thread)
3213{
3214 thread->options |= TH_OPT_HONOR_QLIMIT;
3215}
3216
3217void
3218thread_clear_honor_qlimit(thread_t thread)
3219{
3220 thread->options &= (~TH_OPT_HONOR_QLIMIT);
3221}
3222
3223/*
3224 * thread_enable_send_importance - set/clear the SEND_IMPORTANCE thread option bit.
3225 */
3226void thread_enable_send_importance(thread_t thread, boolean_t enable)
3227{
3228 if (enable == TRUE)
3229 thread->options |= TH_OPT_SEND_IMPORTANCE;
3230 else
3231 thread->options &= ~TH_OPT_SEND_IMPORTANCE;
3232}
3233
3234/*
3235 * thread_set_allocation_name - .
3236 */
3237
3238kern_allocation_name_t thread_set_allocation_name(kern_allocation_name_t new_name)
3239{
3240 kern_allocation_name_t ret;
3241 thread_kernel_state_t kstate = thread_get_kernel_state(current_thread());
3242 ret = kstate->allocation_name;
3243 // fifo
3244 if (!new_name || !kstate->allocation_name) kstate->allocation_name = new_name;
3245 return ret;
3246}
3247
3248uint64_t
3249thread_get_last_wait_duration(thread_t thread)
3250{
3251 return thread->last_made_runnable_time - thread->last_run_time;
3252}
3253
3254#if CONFIG_DTRACE
3255uint32_t dtrace_get_thread_predcache(thread_t thread)
3256{
3257 if (thread != THREAD_NULL)
3258 return thread->t_dtrace_predcache;
3259 else
3260 return 0;
3261}
3262
3263int64_t dtrace_get_thread_vtime(thread_t thread)
3264{
3265 if (thread != THREAD_NULL)
3266 return thread->t_dtrace_vtime;
3267 else
3268 return 0;
3269}
3270
3271int dtrace_get_thread_last_cpu_id(thread_t thread)
3272{
3273 if ((thread != THREAD_NULL) && (thread->last_processor != PROCESSOR_NULL)) {
3274 return thread->last_processor->cpu_id;
3275 } else {
3276 return -1;
3277 }
3278}
3279
3280int64_t dtrace_get_thread_tracing(thread_t thread)
3281{
3282 if (thread != THREAD_NULL)
3283 return thread->t_dtrace_tracing;
3284 else
3285 return 0;
3286}
3287
3288boolean_t dtrace_get_thread_reentering(thread_t thread)
3289{
3290 if (thread != THREAD_NULL)
3291 return (thread->options & TH_OPT_DTRACE) ? TRUE : FALSE;
3292 else
3293 return 0;
3294}
3295
3296vm_offset_t dtrace_get_kernel_stack(thread_t thread)
3297{
3298 if (thread != THREAD_NULL)
3299 return thread->kernel_stack;
3300 else
3301 return 0;
3302}
3303
3304#if KASAN
3305struct kasan_thread_data *
3306kasan_get_thread_data(thread_t thread)
3307{
3308 return &thread->kasan_data;
3309}
3310#endif
3311
3312int64_t dtrace_calc_thread_recent_vtime(thread_t thread)
3313{
3314 if (thread != THREAD_NULL) {
3315 processor_t processor = current_processor();
3316 uint64_t abstime = mach_absolute_time();
3317 timer_t timer;
3318
3319 timer = PROCESSOR_DATA(processor, thread_timer);
3320
3321 return timer_grab(&(thread->system_timer)) + timer_grab(&(thread->user_timer)) +
3322 (abstime - timer->tstamp); /* XXX need interrupts off to prevent missed time? */
3323 } else
3324 return 0;
3325}
3326
3327void dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
3328{
3329 if (thread != THREAD_NULL)
3330 thread->t_dtrace_predcache = predcache;
3331}
3332
3333void dtrace_set_thread_vtime(thread_t thread, int64_t vtime)
3334{
3335 if (thread != THREAD_NULL)
3336 thread->t_dtrace_vtime = vtime;
3337}
3338
3339void dtrace_set_thread_tracing(thread_t thread, int64_t accum)
3340{
3341 if (thread != THREAD_NULL)
3342 thread->t_dtrace_tracing = accum;
3343}
3344
3345void dtrace_set_thread_reentering(thread_t thread, boolean_t vbool)
3346{
3347 if (thread != THREAD_NULL) {
3348 if (vbool)
3349 thread->options |= TH_OPT_DTRACE;
3350 else
3351 thread->options &= (~TH_OPT_DTRACE);
3352 }
3353}
3354
3355vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover)
3356{
3357 vm_offset_t prev = 0;
3358
3359 if (thread != THREAD_NULL) {
3360 prev = thread->recover;
3361 thread->recover = recover;
3362 }
3363 return prev;
3364}
3365
3366void dtrace_thread_bootstrap(void)
3367{
3368 task_t task = current_task();
3369
3370 if (task->thread_count == 1) {
3371 thread_t thread = current_thread();
3372 if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) {
3373 thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS;
3374 DTRACE_PROC(exec__success);
3375 KDBG(BSDDBG_CODE(DBG_BSD_PROC,BSD_PROC_EXEC),
3376 task_pid(task));
3377 }
3378 DTRACE_PROC(start);
3379 }
3380 DTRACE_PROC(lwp__start);
3381
3382}
3383
3384void
3385dtrace_thread_didexec(thread_t thread)
3386{
3387 thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS;
3388}
3389#endif /* CONFIG_DTRACE */
3390