1/*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_FREE_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: kern/thread.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub
61 * Date: 1986
62 *
63 * Thread management primitives implementation.
64 */
65/*
66 * Copyright (c) 1993 The University of Utah and
67 * the Computer Systems Laboratory (CSL). All rights reserved.
68 *
69 * Permission to use, copy, modify and distribute this software and its
70 * documentation is hereby granted, provided that both the copyright
71 * notice and this permission notice appear in all copies of the
72 * software, derivative works or modified versions, and any portions
73 * thereof, and that both notices appear in supporting documentation.
74 *
75 * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
76 * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
77 * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
78 *
79 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
80 * improvements that they make and grant CSL redistribution rights.
81 *
82 */
83
84#include <mach/mach_types.h>
85#include <mach/boolean.h>
86#include <mach/policy.h>
87#include <mach/thread_info.h>
88#include <mach/thread_special_ports.h>
89#include <mach/thread_act.h>
90#include <mach/thread_status.h>
91#include <mach/time_value.h>
92#include <mach/vm_param.h>
93
94#include <machine/thread.h>
95#include <machine/pal_routines.h>
96#include <machine/limits.h>
97
98#include <kern/kern_types.h>
99#include <kern/kalloc.h>
100#include <kern/cpu_data.h>
101#include <kern/extmod_statistics.h>
102#include <kern/ipc_mig.h>
103#include <kern/ipc_tt.h>
104#include <kern/mach_param.h>
105#include <kern/machine.h>
106#include <kern/misc_protos.h>
107#include <kern/processor.h>
108#include <kern/queue.h>
109#include <kern/restartable.h>
110#include <kern/sched.h>
111#include <kern/sched_prim.h>
112#include <kern/syscall_subr.h>
113#include <kern/task.h>
114#include <kern/thread.h>
115#include <kern/thread_group.h>
116#include <kern/coalition.h>
117#include <kern/host.h>
118#include <kern/zalloc.h>
119#include <kern/assert.h>
120#include <kern/exc_resource.h>
121#include <kern/exc_guard.h>
122#include <kern/telemetry.h>
123#include <kern/policy_internal.h>
124#include <kern/turnstile.h>
125#include <kern/sched_clutch.h>
126#include <kern/recount.h>
127#include <kern/smr.h>
128#include <kern/ast.h>
129#include <kern/compact_id.h>
130
131#include <corpses/task_corpse.h>
132#include <kern/kpc.h>
133
134#if CONFIG_PERVASIVE_CPI
135#include <kern/monotonic.h>
136#include <machine/monotonic.h>
137#endif /* CONFIG_PERVASIVE_CPI */
138
139#include <ipc/ipc_kmsg.h>
140#include <ipc/ipc_port.h>
141#include <bank/bank_types.h>
142
143#include <vm/vm_kern.h>
144#include <vm/vm_pageout.h>
145
146#include <sys/kdebug.h>
147#include <sys/bsdtask_info.h>
148#include <mach/sdt.h>
149#include <san/kasan.h>
150#include <san/kcov_stksz.h>
151
152#include <stdatomic.h>
153
154#if defined(HAS_APPLE_PAC)
155#include <ptrauth.h>
156#include <arm64/proc_reg.h>
157#endif /* defined(HAS_APPLE_PAC) */
158
159/*
160 * Exported interfaces
161 */
162#include <mach/task_server.h>
163#include <mach/thread_act_server.h>
164#include <mach/mach_host_server.h>
165#include <mach/host_priv_server.h>
166#include <mach/mach_voucher_server.h>
167#include <kern/policy_internal.h>
168
169#if CONFIG_MACF
170#include <security/mac_mach_internal.h>
171#endif
172
173#include <pthread/workqueue_trace.h>
174
175#if CONFIG_EXCLAVES
176#include <mach/exclaves.h>
177#endif
178
179LCK_GRP_DECLARE(thread_lck_grp, "thread");
180
181static SECURITY_READ_ONLY_LATE(zone_t) thread_zone;
182ZONE_DEFINE_ID(ZONE_ID_THREAD_RO, "threads_ro", struct thread_ro, ZC_READONLY);
183
184static void thread_port_with_flavor_no_senders(ipc_port_t, mach_port_mscount_t);
185
186IPC_KOBJECT_DEFINE(IKOT_THREAD_CONTROL);
187IPC_KOBJECT_DEFINE(IKOT_THREAD_READ,
188 .iko_op_no_senders = thread_port_with_flavor_no_senders);
189IPC_KOBJECT_DEFINE(IKOT_THREAD_INSPECT,
190 .iko_op_no_senders = thread_port_with_flavor_no_senders);
191
192static struct mpsc_daemon_queue thread_stack_queue;
193static struct mpsc_daemon_queue thread_terminate_queue;
194static struct mpsc_daemon_queue thread_deallocate_queue;
195static struct mpsc_daemon_queue thread_exception_queue;
196static struct mpsc_daemon_queue thread_backtrace_queue;
197
198decl_simple_lock_data(static, crashed_threads_lock);
199static queue_head_t crashed_threads_queue;
200
201struct thread_exception_elt {
202 struct mpsc_queue_chain link;
203 exception_type_t exception_type;
204 task_t exception_task;
205 thread_t exception_thread;
206};
207
208struct thread_backtrace_elt {
209 struct mpsc_queue_chain link;
210 exception_type_t exception_type;
211 kcdata_object_t obj;
212 exception_port_t exc_ports[BT_EXC_PORTS_COUNT]; /* send rights */
213};
214
215static SECURITY_READ_ONLY_LATE(struct thread) thread_template = {
216#if MACH_ASSERT
217 .thread_magic = THREAD_MAGIC,
218#endif /* MACH_ASSERT */
219 .wait_result = THREAD_WAITING,
220 .options = THREAD_ABORTSAFE,
221 .state = TH_WAIT | TH_UNINT,
222 .th_sched_bucket = TH_BUCKET_RUN,
223 .base_pri = BASEPRI_DEFAULT,
224 .realtime.deadline = UINT64_MAX,
225 .last_made_runnable_time = THREAD_NOT_RUNNABLE,
226 .last_basepri_change_time = THREAD_NOT_RUNNABLE,
227#if defined(CONFIG_SCHED_TIMESHARE_CORE)
228 .pri_shift = INT8_MAX,
229#endif
230 /* timers are initialized in thread_bootstrap */
231};
232
233#define CTID_SIZE_BIT 20
234#define CTID_MASK ((1u << CTID_SIZE_BIT) - 1)
235#define CTID_MAX_THREAD_NUMBER (CTID_MASK - 1)
236static_assert(CTID_MAX_THREAD_NUMBER <= COMPACT_ID_MAX);
237
238#ifndef __LITTLE_ENDIAN__
239#error "ctid relies on the ls bits of uint32_t to be populated"
240#endif
241
242__startup_data
243static struct thread init_thread;
244static SECURITY_READ_ONLY_LATE(uint32_t) ctid_nonce;
245COMPACT_ID_TABLE_DEFINE(static, ctid_table);
246
247__startup_func
248static void
249thread_zone_startup(void)
250{
251 size_t size = sizeof(struct thread);
252
253#ifdef MACH_BSD
254 size += roundup(uthread_size, _Alignof(struct thread));
255#endif
256 thread_zone = zone_create_ext(name: "threads", size,
257 flags: ZC_SEQUESTER | ZC_ZFREE_CLEARMEM, desired_zid: ZONE_ID_THREAD, NULL);
258}
259STARTUP(ZALLOC, STARTUP_RANK_FOURTH, thread_zone_startup);
260
261static void thread_deallocate_enqueue(thread_t thread);
262static void thread_deallocate_complete(thread_t thread);
263
264static void ctid_table_remove(thread_t thread);
265static void ctid_table_add(thread_t thread);
266static void ctid_table_init(void);
267
268#ifdef MACH_BSD
269extern void proc_exit(void *);
270extern mach_exception_data_type_t proc_encode_exit_exception_code(void *);
271extern uint64_t get_dispatchqueue_offset_from_proc(void *);
272extern uint64_t get_return_to_kernel_offset_from_proc(void *p);
273extern uint64_t get_wq_quantum_offset_from_proc(void *);
274extern int proc_selfpid(void);
275extern void proc_name(int, char*, int);
276extern char * proc_name_address(void *p);
277exception_type_t get_exception_from_corpse_crashinfo(kcdata_descriptor_t corpse_info);
278extern void kdebug_proc_name_args(struct proc *proc, long args[static 4]);
279#endif /* MACH_BSD */
280
281extern bool bsdthread_part_of_cooperative_workqueue(struct uthread *uth);
282extern bool disable_exc_resource;
283extern bool disable_exc_resource_during_audio;
284extern int audio_active;
285extern int debug_task;
286int thread_max = CONFIG_THREAD_MAX; /* Max number of threads */
287int task_threadmax = CONFIG_THREAD_MAX;
288
289static uint64_t thread_unique_id = 100;
290
291struct _thread_ledger_indices thread_ledgers = { .cpu_time = -1 };
292static ledger_template_t thread_ledger_template = NULL;
293static void init_thread_ledgers(void);
294
295#if CONFIG_JETSAM
296void jetsam_on_ledger_cpulimit_exceeded(void);
297#endif
298
299extern int task_thread_soft_limit;
300
301
302/*
303 * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry.
304 *
305 * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user
306 * stacktraces, aka micro-stackshots)
307 */
308#define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70
309
310/* Percentage. Level at which we start gathering telemetry. */
311static TUNABLE(uint8_t, cpumon_ustackshots_trigger_pct,
312 "cpumon_ustackshots_trigger_pct", CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT);
313void __attribute__((noinline)) SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void);
314
315#if DEVELOPMENT || DEBUG
316TUNABLE_WRITEABLE(int, exc_resource_threads_enabled, "exc_resource_threads_enabled", 1);
317
318void __attribute__((noinline)) SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t, int);
319#endif /* DEVELOPMENT || DEBUG */
320
321/*
322 * The smallest interval over which we support limiting CPU consumption is 1ms
323 */
324#define MINIMUM_CPULIMIT_INTERVAL_MS 1
325
326os_refgrp_decl(static, thread_refgrp, "thread", NULL);
327
328static inline void
329init_thread_from_template(thread_t thread)
330{
331 /*
332 * In general, struct thread isn't trivially-copyable, since it may
333 * contain pointers to thread-specific state. This may be enforced at
334 * compile time on architectures that store authed + diversified
335 * pointers in machine_thread.
336 *
337 * In this specific case, where we're initializing a new thread from a
338 * thread_template, we know all diversified pointers are NULL; these are
339 * safe to bitwise copy.
340 */
341#pragma clang diagnostic push
342#pragma clang diagnostic ignored "-Wnontrivial-memaccess"
343 memcpy(dst: thread, src: &thread_template, n: sizeof(*thread));
344#pragma clang diagnostic pop
345}
346
347static void
348thread_ro_create(task_t parent_task, thread_t th, thread_ro_t tro_tpl)
349{
350#if __x86_64__
351 th->t_task = parent_task;
352#endif
353 tro_tpl->tro_owner = th;
354 tro_tpl->tro_task = parent_task;
355 th->t_tro = zalloc_ro(ZONE_ID_THREAD_RO, Z_WAITOK | Z_ZERO | Z_NOFAIL);
356 zalloc_ro_update_elem(ZONE_ID_THREAD_RO, th->t_tro, tro_tpl);
357}
358
359static void
360thread_ro_destroy(thread_t th)
361{
362 thread_ro_t tro = get_thread_ro(th);
363#if MACH_BSD
364 struct ucred *cred = tro->tro_cred;
365 struct ucred *rcred = tro->tro_realcred;
366#endif
367 zfree_ro(ZONE_ID_THREAD_RO, tro);
368#if MACH_BSD
369 uthread_cred_free(cred);
370 uthread_cred_free(rcred);
371#endif
372}
373
374__startup_func
375thread_t
376thread_bootstrap(void)
377{
378 /*
379 * Fill in a template thread for fast initialization.
380 */
381 timer_init(timer: &thread_template.runnable_timer);
382
383 init_thread_from_template(thread: &init_thread);
384 /* fiddle with init thread to skip asserts in set_sched_pri */
385 init_thread.sched_pri = MAXPRI_KERNEL;
386
387 /*
388 * We can't quite use ctid yet, on ARM thread_bootstrap() is called
389 * before we can call random or anything,
390 * so we just make it barely work and it will get fixed up
391 * when the first thread is actually made.
392 */
393 *compact_id_resolve(table: &ctid_table, compact_id: 0) = &init_thread;
394 init_thread.ctid = CTID_MASK;
395
396 return &init_thread;
397}
398
399void
400thread_machine_init_template(void)
401{
402 machine_thread_template_init(thr_template: &thread_template);
403}
404
405void
406thread_init(void)
407{
408 /*
409 * Initialize any machine-dependent
410 * per-thread structures necessary.
411 */
412 machine_thread_init();
413
414 init_thread_ledgers();
415}
416
417boolean_t
418thread_is_active(thread_t thread)
419{
420 return thread->active;
421}
422
423void
424thread_corpse_continue(void)
425{
426 thread_t thread = current_thread();
427
428 thread_terminate_internal(thread);
429
430 /*
431 * Handle the thread termination directly
432 * here instead of returning to userspace.
433 */
434 assert(thread->active == FALSE);
435 thread_ast_clear(thread, AST_APC);
436 thread_apc_ast(thread);
437
438 panic("thread_corpse_continue");
439 /*NOTREACHED*/
440}
441
442__dead2
443static void
444thread_terminate_continue(void)
445{
446 panic("thread_terminate_continue");
447 /*NOTREACHED*/
448}
449
450/*
451 * thread_terminate_self:
452 */
453void
454thread_terminate_self(void)
455{
456 thread_t thread = current_thread();
457 thread_ro_t tro = get_thread_ro(thread);
458 task_t task = tro->tro_task;
459 void *bsd_info = get_bsdtask_info(task);
460 int threadcnt;
461
462 pal_thread_terminate_self(thread);
463
464 DTRACE_PROC(lwp__exit);
465
466 thread_mtx_lock(thread);
467
468 ipc_thread_disable(thread);
469
470 thread_mtx_unlock(thread);
471
472 thread_sched_call(thread, NULL);
473
474 spl_t s = splsched();
475 thread_lock(thread);
476
477 thread_depress_abort_locked(thread);
478
479 /*
480 * Before we take the thread_lock right above,
481 * act_set_ast_reset_pcs() might not yet observe
482 * that the thread is inactive, and could have
483 * requested an IPI Ack.
484 *
485 * Once we unlock the thread, we know that
486 * act_set_ast_reset_pcs() can't fail to notice
487 * that thread->active is false,
488 * and won't set new ones.
489 */
490 thread_reset_pcs_ack_IPI(thread);
491
492 thread_unlock(thread);
493
494 splx(s);
495
496#if CONFIG_TASKWATCH
497 thead_remove_taskwatch(thread);
498#endif /* CONFIG_TASKWATCH */
499
500 work_interval_thread_terminate(thread);
501
502 thread_mtx_lock(thread);
503
504 thread_policy_reset(thread);
505
506 thread_mtx_unlock(thread);
507
508 assert(thread->th_work_interval == NULL);
509
510 bank_swap_thread_bank_ledger(thread, NULL);
511
512 if (kdebug_enable && bsd_hasthreadname(uth: get_bsdthread_info(thread))) {
513 char threadname[MAXTHREADNAMESIZE];
514 bsd_getthreadname(uth: get_bsdthread_info(thread), buffer: threadname);
515 kernel_debug_string_simple(TRACE_STRING_THREADNAME_PREV, str: threadname);
516 }
517
518 uthread_cleanup(get_bsdthread_info(thread), tro);
519
520 if (kdebug_enable && bsd_info && !task_is_exec_copy(task)) {
521 /* trace out pid before we sign off */
522 long dbg_arg1 = 0;
523 long dbg_arg2 = 0;
524
525 kdbg_trace_data(proc: get_bsdtask_info(task), arg_pid: &dbg_arg1, arg_uniqueid: &dbg_arg2);
526#if CONFIG_PERVASIVE_CPI
527 if (kdebug_debugid_enabled(DBG_MT_INSTRS_CYCLES_THR_EXIT)) {
528 struct recount_usage usage = { 0 };
529 struct recount_usage perf_only = { 0 };
530 boolean_t intrs_end = ml_set_interrupts_enabled(FALSE);
531 recount_current_thread_usage_perf_only(&usage, &perf_only);
532 ml_set_interrupts_enabled(intrs_end);
533 KDBG_RELEASE(DBG_MT_INSTRS_CYCLES_THR_EXIT,
534 recount_usage_instructions(&usage),
535 recount_usage_cycles(&usage),
536 recount_usage_system_time_mach(&usage),
537 usage.ru_metrics[RCT_LVL_USER].rm_time_mach);
538#if __AMP__
539 KDBG_RELEASE(DBG_MT_P_INSTRS_CYCLES_THR_EXIT,
540 recount_usage_instructions(&perf_only),
541 recount_usage_cycles(&perf_only),
542 recount_usage_system_time_mach(&perf_only),
543 perf_only.ru_metrics[RCT_LVL_USER].rm_time_mach);
544#endif // __AMP__
545 }
546#endif/* CONFIG_PERVASIVE_CPI */
547 KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE_PID, dbg_arg1, dbg_arg2);
548 }
549
550 /*
551 * After this subtraction, this thread should never access
552 * task->bsd_info unless it got 0 back from the os_atomic_dec. It
553 * could be racing with other threads to be the last thread in the
554 * process, and the last thread in the process will tear down the proc
555 * structure and zero-out task->bsd_info.
556 */
557 threadcnt = os_atomic_dec(&task->active_thread_count, relaxed);
558
559#if CONFIG_COALITIONS
560 /*
561 * Leave the coalitions when last thread of task is exiting and the
562 * task is not a corpse.
563 */
564 if (threadcnt == 0 && !task->corpse_info) {
565 coalitions_remove_task(task);
566 }
567#endif
568
569 /*
570 * If we are the last thread to terminate and the task is
571 * associated with a BSD process, perform BSD process exit.
572 */
573 if (threadcnt == 0 && bsd_info != NULL) {
574 mach_exception_data_type_t subcode = 0;
575 if (kdebug_enable) {
576 /* since we're the last thread in this process, trace out the command name too */
577 long args[4] = { 0 };
578 kdebug_proc_name_args(proc: bsd_info, args);
579#if CONFIG_PERVASIVE_CPI
580 if (kdebug_debugid_enabled(DBG_MT_INSTRS_CYCLES_PROC_EXIT)) {
581 struct recount_usage usage = { 0 };
582 struct recount_usage perf_only = { 0 };
583 recount_current_task_usage_perf_only(&usage, &perf_only);
584 KDBG_RELEASE(DBG_MT_INSTRS_CYCLES_PROC_EXIT,
585 recount_usage_instructions(&usage),
586 recount_usage_cycles(&usage),
587 recount_usage_system_time_mach(&usage),
588 usage.ru_metrics[RCT_LVL_USER].rm_time_mach);
589#if __AMP__
590 KDBG_RELEASE(DBG_MT_P_INSTRS_CYCLES_PROC_EXIT,
591 recount_usage_instructions(&perf_only),
592 recount_usage_cycles(&perf_only),
593 recount_usage_system_time_mach(&perf_only),
594 perf_only.ru_metrics[RCT_LVL_USER].rm_time_mach);
595#endif // __AMP__
596 }
597#endif/* CONFIG_PERVASIVE_CPI */
598 KDBG_RELEASE(TRACE_STRING_PROC_EXIT, args[0], args[1], args[2], args[3]);
599 }
600
601 /* Get the exit reason before proc_exit */
602 subcode = proc_encode_exit_exception_code(bsd_info);
603 proc_exit(bsd_info);
604 bsd_info = NULL;
605#if CONFIG_EXCLAVES
606 task_clear_conclave(task);
607#endif
608 /*
609 * if there is crash info in task
610 * then do the deliver action since this is
611 * last thread for this task.
612 */
613 if (task->corpse_info) {
614 /* reset all except task name port */
615 ipc_task_reset(task);
616 /* enable all task ports (name port unchanged) */
617 ipc_task_enable(task);
618 exception_type_t etype = get_exception_from_corpse_crashinfo(corpse_info: task->corpse_info);
619 task_deliver_crash_notification(task, current_thread(), etype, subcode);
620 }
621 }
622
623 if (threadcnt == 0) {
624 task_lock(task);
625 if (task_is_a_corpse_fork(task)) {
626 thread_wakeup((event_t)&task->active_thread_count);
627 }
628 task_unlock(task);
629 }
630
631#if CONFIG_EXCLAVES
632 exclaves_thread_terminate(thread);
633#endif
634
635 s = splsched();
636 thread_lock(thread);
637
638 /*
639 * Ensure that the depress timer is no longer enqueued,
640 * so the timer can be safely deallocated
641 *
642 * TODO: build timer_call_cancel_wait
643 */
644
645 assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) == 0);
646
647 uint32_t delay_us = 1;
648
649 while (thread->depress_timer_active > 0) {
650 thread_unlock(thread);
651 splx(s);
652
653 delay(usec: delay_us++);
654
655 if (delay_us > USEC_PER_SEC) {
656 panic("depress timer failed to inactivate!"
657 "thread: %p depress_timer_active: %d",
658 thread, thread->depress_timer_active);
659 }
660
661 s = splsched();
662 thread_lock(thread);
663 }
664
665 /*
666 * Cancel wait timer, and wait for
667 * concurrent expirations.
668 */
669 if (thread->wait_timer_armed) {
670 thread->wait_timer_armed = false;
671
672 if (timer_call_cancel(call: thread->wait_timer)) {
673 thread->wait_timer_active--;
674 }
675 }
676
677 delay_us = 1;
678
679 while (thread->wait_timer_active > 0) {
680 thread_unlock(thread);
681 splx(s);
682
683 delay(usec: delay_us++);
684
685 if (delay_us > USEC_PER_SEC) {
686 panic("wait timer failed to inactivate!"
687 "thread: %p, wait_timer_active: %d, "
688 "wait_timer_armed: %d",
689 thread, thread->wait_timer_active,
690 thread->wait_timer_armed);
691 }
692
693 s = splsched();
694 thread_lock(thread);
695 }
696
697 /*
698 * If there is a reserved stack, release it.
699 */
700 if (thread->reserved_stack != 0) {
701 stack_free_reserved(thread);
702 thread->reserved_stack = 0;
703 }
704
705 /*
706 * Mark thread as terminating, and block.
707 */
708 thread->state |= TH_TERMINATE;
709 thread_mark_wait_locked(thread, THREAD_UNINT);
710
711#if CONFIG_EXCLAVES
712 assert(thread->th_exclaves_ipc_buffer == NULL);
713 assert(thread->th_exclaves_scheduling_context_id == 0);
714 assert(thread->th_exclaves_intstate == 0);
715 assert(thread->th_exclaves_state == 0);
716#endif
717 assert(thread->th_work_interval_flags == TH_WORK_INTERVAL_FLAGS_NONE);
718 assert(thread->kern_promotion_schedpri == 0);
719 if (thread->rwlock_count > 0) {
720 panic("rwlock_count is %d for thread %p, possibly it still holds a rwlock", thread->rwlock_count, thread);
721 }
722 assert(thread->priority_floor_count == 0);
723 assert(thread->handoff_thread == THREAD_NULL);
724 assert(thread->th_work_interval == NULL);
725 assert(thread->t_rr_state.trr_value == 0);
726
727 assert3u(0, ==, thread->sched_flags &
728 (TH_SFLAG_WAITQ_PROMOTED |
729 TH_SFLAG_RW_PROMOTED |
730 TH_SFLAG_EXEC_PROMOTED |
731 TH_SFLAG_FLOOR_PROMOTED |
732 TH_SFLAG_PROMOTED |
733 TH_SFLAG_DEPRESS));
734
735 thread_unlock(thread);
736 /* splsched */
737
738 thread_block(continuation: (thread_continue_t)thread_terminate_continue);
739 /*NOTREACHED*/
740}
741
742static bool
743thread_ref_release(thread_t thread)
744{
745 if (thread == THREAD_NULL) {
746 return false;
747 }
748
749 assert_thread_magic(thread);
750
751 return os_ref_release_raw(&thread->ref_count, &thread_refgrp) == 0;
752}
753
754/* Drop a thread refcount safely without triggering a zfree */
755void
756thread_deallocate_safe(thread_t thread)
757{
758 if (__improbable(thread_ref_release(thread))) {
759 /* enqueue the thread for thread deallocate deamon to call thread_deallocate_complete */
760 thread_deallocate_enqueue(thread);
761 }
762}
763
764void
765thread_deallocate(thread_t thread)
766{
767 if (__improbable(thread_ref_release(thread))) {
768 thread_deallocate_complete(thread);
769 }
770}
771
772void
773thread_deallocate_complete(
774 thread_t thread)
775{
776 task_t task;
777
778 assert_thread_magic(thread);
779
780 assert(os_ref_get_count_raw(&thread->ref_count) == 0);
781
782 if (!(thread->state & TH_TERMINATE2)) {
783 panic("thread_deallocate: thread not properly terminated");
784 }
785
786 thread_assert_runq_null(thread);
787 assert(!(thread->state & TH_WAKING));
788
789#if CONFIG_CPU_COUNTERS
790 kpc_thread_destroy(thread);
791#endif /* CONFIG_CPU_COUNTERS */
792
793 ipc_thread_terminate(thread);
794
795 proc_thread_qos_deallocate(thread);
796
797 task = get_threadtask(thread);
798
799#ifdef MACH_BSD
800 uthread_destroy(get_bsdthread_info(thread));
801#endif /* MACH_BSD */
802
803 if (thread->t_ledger) {
804 ledger_dereference(ledger: thread->t_ledger);
805 }
806 if (thread->t_threadledger) {
807 ledger_dereference(ledger: thread->t_threadledger);
808 }
809
810 assert(thread->turnstile != TURNSTILE_NULL);
811 if (thread->turnstile) {
812 turnstile_deallocate(turnstile: thread->turnstile);
813 }
814 turnstile_compact_id_put(cid: thread->ctsid);
815
816 if (IPC_VOUCHER_NULL != thread->ith_voucher) {
817 ipc_voucher_release(voucher: thread->ith_voucher);
818 }
819
820 kfree_data(thread->thread_io_stats, sizeof(struct io_stat_info));
821#if CONFIG_PREADOPT_TG
822 if (thread->old_preadopt_thread_group) {
823 thread_group_release(tg: thread->old_preadopt_thread_group);
824 }
825
826 if (thread->preadopt_thread_group) {
827 thread_group_release(tg: thread->preadopt_thread_group);
828 }
829#endif /* CONFIG_PREADOPT_TG */
830
831 if (thread->kernel_stack != 0) {
832 stack_free(thread);
833 }
834
835 recount_thread_deinit(th: &thread->th_recount);
836
837 lck_mtx_destroy(lck: &thread->mutex, grp: &thread_lck_grp);
838 machine_thread_destroy(thread);
839
840 task_deallocate_grp(task, TASK_GRP_INTERNAL);
841
842#if MACH_ASSERT
843 assert_thread_magic(thread);
844 thread->thread_magic = 0;
845#endif /* MACH_ASSERT */
846
847 lck_mtx_lock(lck: &tasks_threads_lock);
848 assert(terminated_threads_count > 0);
849 queue_remove(&terminated_threads, thread, thread_t, threads);
850 terminated_threads_count--;
851 lck_mtx_unlock(lck: &tasks_threads_lock);
852
853 timer_call_free(call: thread->depress_timer);
854 timer_call_free(call: thread->wait_timer);
855
856 ctid_table_remove(thread);
857
858 thread_ro_destroy(th: thread);
859 zfree(thread_zone, thread);
860}
861
862/*
863 * thread_inspect_deallocate:
864 *
865 * Drop a thread inspection reference.
866 */
867void
868thread_inspect_deallocate(
869 thread_inspect_t thread_inspect)
870{
871 return thread_deallocate(thread: (thread_t)thread_inspect);
872}
873
874/*
875 * thread_read_deallocate:
876 *
877 * Drop a reference on thread read port.
878 */
879void
880thread_read_deallocate(
881 thread_read_t thread_read)
882{
883 return thread_deallocate(thread: (thread_t)thread_read);
884}
885
886
887/*
888 * thread_exception_queue_invoke:
889 *
890 * Deliver EXC_{RESOURCE,GUARD} exception
891 */
892static void
893thread_exception_queue_invoke(mpsc_queue_chain_t elm,
894 __assert_only mpsc_daemon_queue_t dq)
895{
896 struct thread_exception_elt *elt;
897 task_t task;
898 thread_t thread;
899 exception_type_t etype;
900
901 assert(dq == &thread_exception_queue);
902 elt = mpsc_queue_element(elm, struct thread_exception_elt, link);
903
904 etype = elt->exception_type;
905 task = elt->exception_task;
906 thread = elt->exception_thread;
907 assert_thread_magic(thread);
908
909 kfree_type(struct thread_exception_elt, elt);
910
911 /* wait for all the threads in the task to terminate */
912 task_lock(task);
913 task_wait_till_threads_terminate_locked(task);
914 task_unlock(task);
915
916 /* Consumes the task ref returned by task_generate_corpse_internal */
917 task_deallocate(task);
918 /* Consumes the thread ref returned by task_generate_corpse_internal */
919 thread_deallocate(thread);
920
921 /* Deliver the notification, also clears the corpse. */
922 task_deliver_crash_notification(task, thread, etype, 0);
923}
924
925static void
926thread_backtrace_queue_invoke(mpsc_queue_chain_t elm,
927 __assert_only mpsc_daemon_queue_t dq)
928{
929 struct thread_backtrace_elt *elt;
930 kcdata_object_t obj;
931 exception_port_t exc_ports[BT_EXC_PORTS_COUNT]; /* send rights */
932 exception_type_t etype;
933
934 assert(dq == &thread_backtrace_queue);
935 elt = mpsc_queue_element(elm, struct thread_backtrace_elt, link);
936
937 obj = elt->obj;
938 memcpy(dst: exc_ports, src: elt->exc_ports, n: sizeof(ipc_port_t) * BT_EXC_PORTS_COUNT);
939 etype = elt->exception_type;
940
941 kfree_type(struct thread_backtrace_elt, elt);
942
943 /* Deliver to backtrace exception ports */
944 exception_deliver_backtrace(bt_object: obj, exc_ports, exception: etype);
945
946 /*
947 * Release port right and kcdata object refs given by
948 * task_enqueue_exception_with_corpse()
949 */
950
951 for (unsigned int i = 0; i < BT_EXC_PORTS_COUNT; i++) {
952 ipc_port_release_send(port: exc_ports[i]);
953 }
954
955 kcdata_object_release(obj);
956}
957
958/*
959 * thread_exception_enqueue:
960 *
961 * Enqueue a corpse port to be delivered an EXC_{RESOURCE,GUARD}.
962 */
963void
964thread_exception_enqueue(
965 task_t task,
966 thread_t thread,
967 exception_type_t etype)
968{
969 assert(EXC_RESOURCE == etype || EXC_GUARD == etype);
970 struct thread_exception_elt *elt = kalloc_type(struct thread_exception_elt, Z_WAITOK | Z_NOFAIL);
971 elt->exception_type = etype;
972 elt->exception_task = task;
973 elt->exception_thread = thread;
974
975 mpsc_daemon_enqueue(dq: &thread_exception_queue, elm: &elt->link,
976 options: MPSC_QUEUE_DISABLE_PREEMPTION);
977}
978
979void
980thread_backtrace_enqueue(
981 kcdata_object_t obj,
982 exception_port_t ports[static BT_EXC_PORTS_COUNT],
983 exception_type_t etype)
984{
985 struct thread_backtrace_elt *elt = kalloc_type(struct thread_backtrace_elt, Z_WAITOK | Z_NOFAIL);
986 elt->obj = obj;
987 elt->exception_type = etype;
988
989 memcpy(dst: elt->exc_ports, src: ports, n: sizeof(ipc_port_t) * BT_EXC_PORTS_COUNT);
990
991 mpsc_daemon_enqueue(dq: &thread_backtrace_queue, elm: &elt->link,
992 options: MPSC_QUEUE_DISABLE_PREEMPTION);
993}
994
995/*
996 * thread_copy_resource_info
997 *
998 * Copy the resource info counters from source
999 * thread to destination thread.
1000 */
1001void
1002thread_copy_resource_info(
1003 thread_t dst_thread,
1004 thread_t src_thread)
1005{
1006 dst_thread->c_switch = src_thread->c_switch;
1007 dst_thread->p_switch = src_thread->p_switch;
1008 dst_thread->ps_switch = src_thread->ps_switch;
1009 dst_thread->sched_time_save = src_thread->sched_time_save;
1010 dst_thread->runnable_timer = src_thread->runnable_timer;
1011 dst_thread->vtimer_user_save = src_thread->vtimer_user_save;
1012 dst_thread->vtimer_prof_save = src_thread->vtimer_prof_save;
1013 dst_thread->vtimer_rlim_save = src_thread->vtimer_rlim_save;
1014 dst_thread->vtimer_qos_save = src_thread->vtimer_qos_save;
1015 dst_thread->syscalls_unix = src_thread->syscalls_unix;
1016 dst_thread->syscalls_mach = src_thread->syscalls_mach;
1017 ledger_rollup(to_ledger: dst_thread->t_threadledger, from_ledger: src_thread->t_threadledger);
1018 recount_thread_copy(dst: &dst_thread->th_recount, src: &src_thread->th_recount);
1019 *dst_thread->thread_io_stats = *src_thread->thread_io_stats;
1020}
1021
1022static void
1023thread_terminate_queue_invoke(mpsc_queue_chain_t e,
1024 __assert_only mpsc_daemon_queue_t dq)
1025{
1026 thread_t thread = mpsc_queue_element(e, struct thread, mpsc_links);
1027 task_t task = get_threadtask(thread);
1028
1029 assert(dq == &thread_terminate_queue);
1030
1031 task_lock(task);
1032
1033 /*
1034 * if marked for crash reporting, skip reaping.
1035 * The corpse delivery thread will clear bit and enqueue
1036 * for reaping when done
1037 *
1038 * Note: the inspection field is set under the task lock
1039 *
1040 * FIXME[mad]: why enqueue for termination before `inspection` is false ?
1041 */
1042 if (__improbable(thread->inspection)) {
1043 simple_lock(&crashed_threads_lock, &thread_lck_grp);
1044 task_unlock(task);
1045
1046 enqueue_tail(que: &crashed_threads_queue, elt: &thread->runq_links);
1047 simple_unlock(&crashed_threads_lock);
1048 return;
1049 }
1050
1051 recount_task_rollup_thread(tk: &task->tk_recount, th: &thread->th_recount);
1052
1053 task->total_runnable_time += timer_grab(timer: &thread->runnable_timer);
1054 task->c_switch += thread->c_switch;
1055 task->p_switch += thread->p_switch;
1056 task->ps_switch += thread->ps_switch;
1057
1058 task->syscalls_unix += thread->syscalls_unix;
1059 task->syscalls_mach += thread->syscalls_mach;
1060
1061 task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1;
1062 task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2;
1063 task->task_gpu_ns += ml_gpu_stat(thread);
1064 task->decompressions += thread->decompressions;
1065
1066 thread_update_qos_cpu_time(thread);
1067
1068 queue_remove(&task->threads, thread, thread_t, task_threads);
1069 task->thread_count--;
1070
1071 /*
1072 * If the task is being halted, and there is only one thread
1073 * left in the task after this one, then wakeup that thread.
1074 */
1075 if (task->thread_count == 1 && task->halting) {
1076 thread_wakeup((event_t)&task->halting);
1077 }
1078
1079 task_unlock(task);
1080
1081 lck_mtx_lock(lck: &tasks_threads_lock);
1082 queue_remove(&threads, thread, thread_t, threads);
1083 threads_count--;
1084 queue_enter(&terminated_threads, thread, thread_t, threads);
1085 terminated_threads_count++;
1086 lck_mtx_unlock(lck: &tasks_threads_lock);
1087
1088#if MACH_BSD
1089 /*
1090 * The thread no longer counts against the task's thread count,
1091 * we can now wake up any pending joiner.
1092 *
1093 * Note that the inheritor will be set to `thread` which is
1094 * incorrect once it is on the termination queue, however
1095 * the termination queue runs at MINPRI_KERNEL which is higher
1096 * than any user thread, so this isn't a priority inversion.
1097 */
1098 if (thread_get_tag(thread) & THREAD_TAG_USER_JOIN) {
1099 struct uthread *uth = get_bsdthread_info(thread);
1100 mach_port_name_t kport = uthread_joiner_port(uth);
1101
1102 /*
1103 * Clear the port low two bits to tell pthread that thread is gone.
1104 */
1105#ifndef NO_PORT_GEN
1106 kport &= ~MACH_PORT_MAKE(0, IE_BITS_GEN_MASK + IE_BITS_GEN_ONE);
1107#else
1108 kport |= MACH_PORT_MAKE(0, ~(IE_BITS_GEN_MASK + IE_BITS_GEN_ONE));
1109#endif
1110 (void)copyoutmap_atomic32(map: task->map, value: kport,
1111 toaddr: uthread_joiner_address(uth));
1112 uthread_joiner_wake(task, uth);
1113 }
1114#endif
1115
1116 thread_deallocate(thread);
1117}
1118
1119static void
1120thread_deallocate_queue_invoke(mpsc_queue_chain_t e,
1121 __assert_only mpsc_daemon_queue_t dq)
1122{
1123 thread_t thread = mpsc_queue_element(e, struct thread, mpsc_links);
1124
1125 assert(dq == &thread_deallocate_queue);
1126
1127 thread_deallocate_complete(thread);
1128}
1129
1130/*
1131 * thread_terminate_enqueue:
1132 *
1133 * Enqueue a terminating thread for final disposition.
1134 *
1135 * Called at splsched.
1136 */
1137void
1138thread_terminate_enqueue(
1139 thread_t thread)
1140{
1141 KDBG_RELEASE(TRACE_DATA_THREAD_TERMINATE, thread->thread_id);
1142
1143 mpsc_daemon_enqueue(dq: &thread_terminate_queue, elm: &thread->mpsc_links,
1144 options: MPSC_QUEUE_DISABLE_PREEMPTION);
1145}
1146
1147/*
1148 * thread_deallocate_enqueue:
1149 *
1150 * Enqueue a thread for final deallocation.
1151 */
1152static void
1153thread_deallocate_enqueue(
1154 thread_t thread)
1155{
1156 mpsc_daemon_enqueue(dq: &thread_deallocate_queue, elm: &thread->mpsc_links,
1157 options: MPSC_QUEUE_DISABLE_PREEMPTION);
1158}
1159
1160/*
1161 * thread_terminate_crashed_threads:
1162 * walk the list of crashed threads and put back set of threads
1163 * who are no longer being inspected.
1164 */
1165void
1166thread_terminate_crashed_threads(void)
1167{
1168 thread_t th_remove;
1169
1170 simple_lock(&crashed_threads_lock, &thread_lck_grp);
1171 /*
1172 * loop through the crashed threads queue
1173 * to put any threads that are not being inspected anymore
1174 */
1175
1176 qe_foreach_element_safe(th_remove, &crashed_threads_queue, runq_links) {
1177 /* make sure current_thread is never in crashed queue */
1178 assert(th_remove != current_thread());
1179
1180 if (th_remove->inspection == FALSE) {
1181 remqueue(elt: &th_remove->runq_links);
1182 mpsc_daemon_enqueue(dq: &thread_terminate_queue, elm: &th_remove->mpsc_links,
1183 options: MPSC_QUEUE_NONE);
1184 }
1185 }
1186
1187 simple_unlock(&crashed_threads_lock);
1188}
1189
1190/*
1191 * thread_stack_queue_invoke:
1192 *
1193 * Perform stack allocation as required due to
1194 * invoke failures.
1195 */
1196static void
1197thread_stack_queue_invoke(mpsc_queue_chain_t elm,
1198 __assert_only mpsc_daemon_queue_t dq)
1199{
1200 thread_t thread = mpsc_queue_element(elm, struct thread, mpsc_links);
1201
1202 assert(dq == &thread_stack_queue);
1203
1204 /* allocate stack with interrupts enabled so that we can call into VM */
1205 stack_alloc(thread);
1206
1207 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_WAIT) | DBG_FUNC_END, thread_tid(thread), 0, 0, 0, 0);
1208
1209 spl_t s = splsched();
1210 thread_lock(thread);
1211 thread_setrun(thread, options: SCHED_PREEMPT | SCHED_TAILQ);
1212 thread_unlock(thread);
1213 splx(s);
1214}
1215
1216/*
1217 * thread_stack_enqueue:
1218 *
1219 * Enqueue a thread for stack allocation.
1220 *
1221 * Called at splsched.
1222 */
1223void
1224thread_stack_enqueue(
1225 thread_t thread)
1226{
1227 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_WAIT) | DBG_FUNC_START, thread_tid(thread), 0, 0, 0, 0);
1228 assert_thread_magic(thread);
1229
1230 mpsc_daemon_enqueue(dq: &thread_stack_queue, elm: &thread->mpsc_links,
1231 options: MPSC_QUEUE_DISABLE_PREEMPTION);
1232}
1233
1234void
1235thread_daemon_init(void)
1236{
1237 kern_return_t result;
1238
1239 thread_deallocate_daemon_init();
1240
1241 thread_deallocate_daemon_register_queue(dq: &thread_terminate_queue,
1242 invoke: thread_terminate_queue_invoke);
1243
1244 thread_deallocate_daemon_register_queue(dq: &thread_deallocate_queue,
1245 invoke: thread_deallocate_queue_invoke);
1246
1247 ipc_object_deallocate_register_queue();
1248
1249 simple_lock_init(&crashed_threads_lock, 0);
1250 queue_init(&crashed_threads_queue);
1251
1252 result = mpsc_daemon_queue_init_with_thread(dq: &thread_stack_queue,
1253 invoke: thread_stack_queue_invoke, BASEPRI_PREEMPT_HIGH,
1254 name: "daemon.thread-stack", flags: MPSC_DAEMON_INIT_NONE);
1255 if (result != KERN_SUCCESS) {
1256 panic("thread_daemon_init: thread_stack_daemon");
1257 }
1258
1259 result = mpsc_daemon_queue_init_with_thread(dq: &thread_exception_queue,
1260 invoke: thread_exception_queue_invoke, MINPRI_KERNEL,
1261 name: "daemon.thread-exception", flags: MPSC_DAEMON_INIT_NONE);
1262
1263 if (result != KERN_SUCCESS) {
1264 panic("thread_daemon_init: thread_exception_daemon");
1265 }
1266
1267 result = mpsc_daemon_queue_init_with_thread(dq: &thread_backtrace_queue,
1268 invoke: thread_backtrace_queue_invoke, MINPRI_KERNEL,
1269 name: "daemon.thread-backtrace", flags: MPSC_DAEMON_INIT_NONE);
1270
1271 if (result != KERN_SUCCESS) {
1272 panic("thread_daemon_init: thread_backtrace_daemon");
1273 }
1274}
1275
1276__options_decl(thread_create_internal_options_t, uint32_t, {
1277 TH_OPTION_NONE = 0x00,
1278 TH_OPTION_NOSUSP = 0x02,
1279 TH_OPTION_WORKQ = 0x04,
1280 TH_OPTION_MAINTHREAD = 0x08,
1281});
1282
1283void
1284main_thread_set_immovable_pinned(thread_t thread)
1285{
1286 ipc_main_thread_set_immovable_pinned(thread);
1287}
1288
1289/*
1290 * Create a new thread.
1291 * Doesn't start the thread running.
1292 *
1293 * Task and tasks_threads_lock are returned locked on success.
1294 */
1295static kern_return_t
1296thread_create_internal(
1297 task_t parent_task,
1298 integer_t priority,
1299 thread_continue_t continuation,
1300 void *parameter,
1301 thread_create_internal_options_t options,
1302 thread_t *out_thread)
1303{
1304 thread_t new_thread;
1305 ipc_thread_init_options_t init_options = IPC_THREAD_INIT_NONE;
1306 struct thread_ro tro_tpl = { };
1307 bool first_thread = false;
1308 kern_return_t kr = KERN_FAILURE;
1309
1310 /*
1311 * Allocate a thread and initialize static fields
1312 */
1313 new_thread = zalloc_flags(thread_zone, Z_WAITOK | Z_NOFAIL);
1314
1315 if (__improbable(current_thread() == &init_thread)) {
1316 /*
1317 * The first thread ever is a global, but because we want to be
1318 * able to zone_id_require() threads, we have to stop using the
1319 * global piece of memory we used to boostrap the kernel and
1320 * jump to a proper thread from a zone.
1321 *
1322 * This is why that one thread will inherit its original
1323 * state differently.
1324 *
1325 * Also remember this thread in `vm_pageout_scan_thread`
1326 * as this is what the first thread ever becomes.
1327 *
1328 * Also pre-warm the depress timer since the VM pageout scan
1329 * daemon might need to use it.
1330 */
1331 assert(vm_pageout_scan_thread == THREAD_NULL);
1332 vm_pageout_scan_thread = new_thread;
1333
1334 first_thread = true;
1335#pragma clang diagnostic push
1336#pragma clang diagnostic ignored "-Wnontrivial-memaccess"
1337 /* work around 74481146 */
1338 memcpy(dst: new_thread, src: &init_thread, n: sizeof(*new_thread));
1339#pragma clang diagnostic pop
1340
1341 /*
1342 * Make the ctid table functional
1343 */
1344 ctid_table_init();
1345 new_thread->ctid = 0;
1346 } else {
1347 init_thread_from_template(thread: new_thread);
1348 }
1349
1350 if (options & TH_OPTION_MAINTHREAD) {
1351 init_options |= IPC_THREAD_INIT_MAINTHREAD;
1352 }
1353
1354 os_ref_init_count_raw(&new_thread->ref_count, &thread_refgrp, 2);
1355 machine_thread_create(thread: new_thread, task: parent_task, first_thread);
1356
1357 machine_thread_process_signature(thread: new_thread, task: parent_task);
1358
1359#ifdef MACH_BSD
1360 uthread_init(parent_task, get_bsdthread_info(new_thread),
1361 &tro_tpl, (options & TH_OPTION_WORKQ) != 0);
1362 if (!task_is_a_corpse(task: parent_task)) {
1363 /*
1364 * uthread_init will set tro_cred (with a +1)
1365 * and tro_proc for live tasks.
1366 */
1367 assert(tro_tpl.tro_cred && tro_tpl.tro_proc);
1368 }
1369#endif /* MACH_BSD */
1370
1371 thread_lock_init(new_thread);
1372 wake_lock_init(new_thread);
1373
1374 lck_mtx_init(lck: &new_thread->mutex, grp: &thread_lck_grp, LCK_ATTR_NULL);
1375
1376 ipc_thread_init(task: parent_task, thread: new_thread, tro: &tro_tpl, options: init_options);
1377
1378 thread_ro_create(parent_task, th: new_thread, tro_tpl: &tro_tpl);
1379
1380 new_thread->continuation = continuation;
1381 new_thread->parameter = parameter;
1382 new_thread->inheritor_flags = TURNSTILE_UPDATE_FLAGS_NONE;
1383 new_thread->requested_policy = default_thread_requested_policy;
1384 new_thread->__runq.runq = PROCESSOR_NULL;
1385 priority_queue_init(que: &new_thread->sched_inheritor_queue);
1386 priority_queue_init(que: &new_thread->base_inheritor_queue);
1387#if CONFIG_SCHED_CLUTCH
1388 priority_queue_entry_init(&new_thread->th_clutch_runq_link);
1389 priority_queue_entry_init(&new_thread->th_clutch_pri_link);
1390#endif /* CONFIG_SCHED_CLUTCH */
1391
1392#if CONFIG_SCHED_EDGE
1393 new_thread->th_bound_cluster_enqueued = false;
1394 for (cluster_shared_rsrc_type_t shared_rsrc_type = CLUSTER_SHARED_RSRC_TYPE_MIN; shared_rsrc_type < CLUSTER_SHARED_RSRC_TYPE_COUNT; shared_rsrc_type++) {
1395 new_thread->th_shared_rsrc_enqueued[shared_rsrc_type] = false;
1396 new_thread->th_shared_rsrc_heavy_user[shared_rsrc_type] = false;
1397 new_thread->th_shared_rsrc_heavy_perf_control[shared_rsrc_type] = false;
1398 }
1399#endif /* CONFIG_SCHED_EDGE */
1400 new_thread->th_bound_cluster_id = THREAD_BOUND_CLUSTER_NONE;
1401
1402 /* Allocate I/O Statistics structure */
1403 new_thread->thread_io_stats = kalloc_data(sizeof(struct io_stat_info),
1404 Z_WAITOK | Z_ZERO | Z_NOFAIL);
1405
1406#if KASAN_CLASSIC
1407 kasan_init_thread(&new_thread->kasan_data);
1408#endif /* KASAN_CLASSIC */
1409
1410#if CONFIG_KCOV
1411 kcov_init_thread(&new_thread->kcov_data);
1412#endif
1413
1414#if CONFIG_IOSCHED
1415 /* Clear out the I/O Scheduling info for AppleFSCompression */
1416 new_thread->decmp_upl = NULL;
1417#endif /* CONFIG_IOSCHED */
1418
1419 new_thread->thread_region_page_shift = 0;
1420
1421#if DEVELOPMENT || DEBUG
1422 task_lock(parent_task);
1423 uint16_t thread_limit = parent_task->task_thread_limit;
1424 if (exc_resource_threads_enabled &&
1425 thread_limit > 0 &&
1426 parent_task->thread_count >= thread_limit &&
1427 !parent_task->task_has_crossed_thread_limit &&
1428 !(task_is_a_corpse(parent_task))) {
1429 int thread_count = parent_task->thread_count;
1430 parent_task->task_has_crossed_thread_limit = TRUE;
1431 task_unlock(parent_task);
1432 SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(parent_task, thread_count);
1433 } else {
1434 task_unlock(parent_task);
1435 }
1436#endif
1437
1438 lck_mtx_lock(lck: &tasks_threads_lock);
1439 task_lock(parent_task);
1440
1441 /*
1442 * Fail thread creation if parent task is being torn down or has too many threads
1443 * If the caller asked for TH_OPTION_NOSUSP, also fail if the parent task is suspended
1444 */
1445 if (parent_task->active == 0 || parent_task->halting ||
1446 (parent_task->suspend_count > 0 && (options & TH_OPTION_NOSUSP) != 0) ||
1447 (parent_task->thread_count >= task_threadmax && parent_task != kernel_task)) {
1448 task_unlock(parent_task);
1449 lck_mtx_unlock(lck: &tasks_threads_lock);
1450
1451 ipc_thread_disable(thread: new_thread);
1452 ipc_thread_terminate(thread: new_thread);
1453 kfree_data(new_thread->thread_io_stats,
1454 sizeof(struct io_stat_info));
1455 lck_mtx_destroy(lck: &new_thread->mutex, grp: &thread_lck_grp);
1456 kr = KERN_FAILURE;
1457 goto out_thread_cleanup;
1458 }
1459
1460 /* Protected by the tasks_threads_lock */
1461 new_thread->thread_id = ++thread_unique_id;
1462
1463 ctid_table_add(thread: new_thread);
1464
1465 /* New threads inherit any default state on the task */
1466 machine_thread_inherit_taskwide(thread: new_thread, parent_task);
1467
1468 task_reference_grp(parent_task, TASK_GRP_INTERNAL);
1469
1470 if (parent_task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
1471 /*
1472 * This task has a per-thread CPU limit; make sure this new thread
1473 * gets its limit set too, before it gets out of the kernel.
1474 */
1475 act_set_astledger(thread: new_thread);
1476 }
1477
1478 /* Instantiate a thread ledger. Do not fail thread creation if ledger creation fails. */
1479 if ((new_thread->t_threadledger = ledger_instantiate(template: thread_ledger_template,
1480 LEDGER_CREATE_INACTIVE_ENTRIES)) != LEDGER_NULL) {
1481 ledger_entry_setactive(ledger: new_thread->t_threadledger, entry: thread_ledgers.cpu_time);
1482 }
1483
1484 new_thread->t_bankledger = LEDGER_NULL;
1485 new_thread->t_deduct_bank_ledger_time = 0;
1486 new_thread->t_deduct_bank_ledger_energy = 0;
1487
1488 new_thread->t_ledger = parent_task->ledger;
1489 if (new_thread->t_ledger) {
1490 ledger_reference(ledger: new_thread->t_ledger);
1491 }
1492
1493 recount_thread_init(th: &new_thread->th_recount);
1494
1495#if defined(CONFIG_SCHED_MULTIQ)
1496 /* Cache the task's sched_group */
1497 new_thread->sched_group = parent_task->sched_group;
1498#endif /* defined(CONFIG_SCHED_MULTIQ) */
1499
1500 /* Cache the task's map */
1501 new_thread->map = parent_task->map;
1502
1503 new_thread->depress_timer = timer_call_alloc(func: thread_depress_expire, param0: new_thread);
1504 new_thread->wait_timer = timer_call_alloc(func: thread_timer_expire, param0: new_thread);
1505
1506#if CONFIG_CPU_COUNTERS
1507 kpc_thread_create(new_thread);
1508#endif /* CONFIG_CPU_COUNTERS */
1509
1510 /* Set the thread's scheduling parameters */
1511 new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task);
1512 new_thread->max_priority = parent_task->max_priority;
1513 new_thread->task_priority = parent_task->priority;
1514
1515#if CONFIG_THREAD_GROUPS
1516 thread_group_init_thread(t: new_thread, task: parent_task);
1517#endif /* CONFIG_THREAD_GROUPS */
1518
1519 int new_priority = (priority < 0) ? parent_task->priority: priority;
1520 new_priority = (priority < 0)? parent_task->priority: priority;
1521 if (new_priority > new_thread->max_priority) {
1522 new_priority = new_thread->max_priority;
1523 }
1524#if !defined(XNU_TARGET_OS_OSX)
1525 if (new_priority < MAXPRI_THROTTLE) {
1526 new_priority = MAXPRI_THROTTLE;
1527 }
1528#endif /* !defined(XNU_TARGET_OS_OSX) */
1529
1530 new_thread->importance = new_priority - new_thread->task_priority;
1531
1532 sched_set_thread_base_priority(thread: new_thread, priority: new_priority);
1533
1534#if defined(CONFIG_SCHED_TIMESHARE_CORE)
1535 new_thread->sched_stamp = sched_tick;
1536#if CONFIG_SCHED_CLUTCH
1537 new_thread->pri_shift = sched_clutch_thread_pri_shift(new_thread, new_thread->th_sched_bucket);
1538#else /* CONFIG_SCHED_CLUTCH */
1539 new_thread->pri_shift = sched_pri_shifts[new_thread->th_sched_bucket];
1540#endif /* CONFIG_SCHED_CLUTCH */
1541#endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) */
1542
1543 if (parent_task->max_priority <= MAXPRI_THROTTLE) {
1544 sched_thread_mode_demote(thread: new_thread, TH_SFLAG_THROTTLED);
1545 }
1546
1547 thread_policy_create(thread: new_thread);
1548
1549 /* Chain the thread onto the task's list */
1550 queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
1551 parent_task->thread_count++;
1552
1553 /* So terminating threads don't need to take the task lock to decrement */
1554 os_atomic_inc(&parent_task->active_thread_count, relaxed);
1555
1556 queue_enter(&threads, new_thread, thread_t, threads);
1557 threads_count++;
1558
1559 new_thread->active = TRUE;
1560 if (task_is_a_corpse_fork(parent_task)) {
1561 /* Set the inspection bit if the task is a corpse fork */
1562 new_thread->inspection = TRUE;
1563 } else {
1564 new_thread->inspection = FALSE;
1565 }
1566 new_thread->corpse_dup = FALSE;
1567 new_thread->turnstile = turnstile_alloc();
1568 new_thread->ctsid = turnstile_compact_id_get();
1569
1570
1571 *out_thread = new_thread;
1572
1573 if (kdebug_enable) {
1574 long args[4] = {};
1575
1576 kdbg_trace_data(proc: get_bsdtask_info(parent_task), arg_pid: &args[1], arg_uniqueid: &args[3]);
1577
1578 /*
1579 * Starting with 26604425, exec'ing creates a new task/thread.
1580 *
1581 * NEWTHREAD in the current process has two possible meanings:
1582 *
1583 * 1) Create a new thread for this process.
1584 * 2) Create a new thread for the future process this will become in an
1585 * exec.
1586 *
1587 * To disambiguate these, arg3 will be set to TRUE for case #2.
1588 *
1589 * The value we need to find (TPF_EXEC_COPY) is stable in the case of a
1590 * task exec'ing. The read of t_procflags does not take the proc_lock.
1591 */
1592 args[2] = task_is_exec_copy(parent_task) ? 1 : 0;
1593
1594 KDBG_RELEASE(TRACE_DATA_NEWTHREAD, (uintptr_t)thread_tid(new_thread),
1595 args[1], args[2], args[3]);
1596
1597 kdebug_proc_name_args(proc: get_bsdtask_info(parent_task), args);
1598 KDBG_RELEASE(TRACE_STRING_NEWTHREAD, args[0], args[1], args[2],
1599 args[3]);
1600 }
1601
1602 DTRACE_PROC1(lwp__create, thread_t, *out_thread);
1603
1604 kr = KERN_SUCCESS;
1605 goto done;
1606
1607out_thread_cleanup:
1608#ifdef MACH_BSD
1609 {
1610 struct uthread *ut = get_bsdthread_info(new_thread);
1611
1612 uthread_cleanup(ut, &tro_tpl);
1613 uthread_destroy(ut);
1614 }
1615#endif /* MACH_BSD */
1616
1617 machine_thread_destroy(thread: new_thread);
1618
1619 thread_ro_destroy(th: new_thread);
1620 zfree(thread_zone, new_thread);
1621
1622done:
1623 return kr;
1624}
1625
1626static kern_return_t
1627thread_create_with_options_internal(
1628 task_t task,
1629 thread_t *new_thread,
1630 boolean_t from_user,
1631 thread_create_internal_options_t options,
1632 thread_continue_t continuation)
1633{
1634 kern_return_t result;
1635 thread_t thread;
1636
1637 if (task == TASK_NULL || task == kernel_task) {
1638 return KERN_INVALID_ARGUMENT;
1639 }
1640
1641#if CONFIG_MACF
1642 if (from_user && current_task() != task &&
1643 mac_proc_check_remote_thread_create(task, flavor: -1, NULL, new_state_count: 0) != 0) {
1644 return KERN_DENIED;
1645 }
1646#endif
1647
1648 result = thread_create_internal(parent_task: task, priority: -1, continuation, NULL, options, out_thread: &thread);
1649 if (result != KERN_SUCCESS) {
1650 return result;
1651 }
1652
1653 thread->user_stop_count = 1;
1654 thread_hold(thread);
1655 if (task->suspend_count > 0) {
1656 thread_hold(thread);
1657 }
1658
1659 if (from_user) {
1660 extmod_statistics_incr_thread_create(target: task);
1661 }
1662
1663 task_unlock(task);
1664 lck_mtx_unlock(lck: &tasks_threads_lock);
1665
1666 *new_thread = thread;
1667
1668 return KERN_SUCCESS;
1669}
1670
1671kern_return_t
1672thread_create_immovable(
1673 task_t task,
1674 thread_t *new_thread)
1675{
1676 return thread_create_with_options_internal(task, new_thread, FALSE,
1677 options: TH_OPTION_NONE, continuation: (thread_continue_t)thread_bootstrap_return);
1678}
1679
1680kern_return_t
1681thread_create_from_user(
1682 task_t task,
1683 thread_t *new_thread)
1684{
1685 /* All thread ports are created immovable by default */
1686 return thread_create_with_options_internal(task, new_thread, TRUE, options: TH_OPTION_NONE,
1687 continuation: (thread_continue_t)thread_bootstrap_return);
1688}
1689
1690kern_return_t
1691thread_create_with_continuation(
1692 task_t task,
1693 thread_t *new_thread,
1694 thread_continue_t continuation)
1695{
1696 return thread_create_with_options_internal(task, new_thread, FALSE, options: TH_OPTION_NONE, continuation);
1697}
1698
1699/*
1700 * Create a thread that is already started, but is waiting on an event
1701 */
1702static kern_return_t
1703thread_create_waiting_internal(
1704 task_t task,
1705 thread_continue_t continuation,
1706 event_t event,
1707 block_hint_t block_hint,
1708 thread_create_internal_options_t options,
1709 thread_t *new_thread)
1710{
1711 kern_return_t result;
1712 thread_t thread;
1713 wait_interrupt_t wait_interrupt = THREAD_INTERRUPTIBLE;
1714
1715 if (task == TASK_NULL || task == kernel_task) {
1716 return KERN_INVALID_ARGUMENT;
1717 }
1718
1719 result = thread_create_internal(parent_task: task, priority: -1, continuation, NULL,
1720 options, out_thread: &thread);
1721 if (result != KERN_SUCCESS) {
1722 return result;
1723 }
1724
1725 /* note no user_stop_count or thread_hold here */
1726
1727 if (task->suspend_count > 0) {
1728 thread_hold(thread);
1729 }
1730
1731 thread_mtx_lock(thread);
1732 thread_set_pending_block_hint(thread, block_hint);
1733 if (options & TH_OPTION_WORKQ) {
1734 thread->static_param = true;
1735 event = workq_thread_init_and_wq_lock(task, thread);
1736 } else if (options & TH_OPTION_MAINTHREAD) {
1737 wait_interrupt = THREAD_UNINT;
1738 }
1739 thread_start_in_assert_wait(thread,
1740 waitq: assert_wait_queue(event), CAST_EVENT64_T(event),
1741 interruptible: wait_interrupt);
1742 thread_mtx_unlock(thread);
1743
1744 task_unlock(task);
1745 lck_mtx_unlock(lck: &tasks_threads_lock);
1746
1747 *new_thread = thread;
1748
1749 return KERN_SUCCESS;
1750}
1751
1752kern_return_t
1753main_thread_create_waiting(
1754 task_t task,
1755 thread_continue_t continuation,
1756 event_t event,
1757 thread_t *new_thread)
1758{
1759 return thread_create_waiting_internal(task, continuation, event,
1760 block_hint: kThreadWaitNone, options: TH_OPTION_MAINTHREAD, new_thread);
1761}
1762
1763
1764static kern_return_t
1765thread_create_running_internal2(
1766 task_t task,
1767 int flavor,
1768 thread_state_t new_state,
1769 mach_msg_type_number_t new_state_count,
1770 thread_t *new_thread,
1771 boolean_t from_user)
1772{
1773 kern_return_t result;
1774 thread_t thread;
1775
1776 if (task == TASK_NULL || task == kernel_task) {
1777 return KERN_INVALID_ARGUMENT;
1778 }
1779
1780#if CONFIG_MACF
1781 if (from_user && current_task() != task &&
1782 mac_proc_check_remote_thread_create(task, flavor, new_state, new_state_count) != 0) {
1783 return KERN_DENIED;
1784 }
1785#endif
1786
1787 result = thread_create_internal(parent_task: task, priority: -1,
1788 continuation: (thread_continue_t)thread_bootstrap_return, NULL,
1789 options: TH_OPTION_NONE, out_thread: &thread);
1790 if (result != KERN_SUCCESS) {
1791 return result;
1792 }
1793
1794 if (task->suspend_count > 0) {
1795 thread_hold(thread);
1796 }
1797
1798 if (from_user) {
1799 result = machine_thread_state_convert_from_user(thread, flavor,
1800 tstate: new_state, count: new_state_count, NULL, old_count: 0, tssf_flags: TSSF_FLAGS_NONE);
1801 }
1802 if (result == KERN_SUCCESS) {
1803 result = machine_thread_set_state(thread, flavor, state: new_state,
1804 count: new_state_count);
1805 }
1806 if (result != KERN_SUCCESS) {
1807 task_unlock(task);
1808 lck_mtx_unlock(lck: &tasks_threads_lock);
1809
1810 thread_terminate(target_act: thread);
1811 thread_deallocate(thread);
1812 return result;
1813 }
1814
1815 thread_mtx_lock(thread);
1816 thread_start(thread);
1817 thread_mtx_unlock(thread);
1818
1819 if (from_user) {
1820 extmod_statistics_incr_thread_create(target: task);
1821 }
1822
1823 task_unlock(task);
1824 lck_mtx_unlock(lck: &tasks_threads_lock);
1825
1826 *new_thread = thread;
1827
1828 return result;
1829}
1830
1831/* Prototype, see justification above */
1832kern_return_t
1833thread_create_running(
1834 task_t task,
1835 int flavor,
1836 thread_state_t new_state,
1837 mach_msg_type_number_t new_state_count,
1838 thread_t *new_thread);
1839
1840kern_return_t
1841thread_create_running(
1842 task_t task,
1843 int flavor,
1844 thread_state_t new_state,
1845 mach_msg_type_number_t new_state_count,
1846 thread_t *new_thread)
1847{
1848 return thread_create_running_internal2(
1849 task, flavor, new_state, new_state_count,
1850 new_thread, FALSE);
1851}
1852
1853kern_return_t
1854thread_create_running_from_user(
1855 task_t task,
1856 int flavor,
1857 thread_state_t new_state,
1858 mach_msg_type_number_t new_state_count,
1859 thread_t *new_thread)
1860{
1861 return thread_create_running_internal2(
1862 task, flavor, new_state, new_state_count,
1863 new_thread, TRUE);
1864}
1865
1866kern_return_t
1867thread_create_workq_waiting(
1868 task_t task,
1869 thread_continue_t continuation,
1870 thread_t *new_thread)
1871{
1872 /*
1873 * Create thread, but don't pin control port just yet, in case someone calls
1874 * task_threads() and deallocates pinned port before kernel copyout happens,
1875 * which will result in pinned port guard exception. Instead, pin and copyout
1876 * atomically during workq_setup_and_run().
1877 */
1878 int options = TH_OPTION_NOSUSP | TH_OPTION_WORKQ;
1879 return thread_create_waiting_internal(task, continuation, NULL,
1880 block_hint: kThreadWaitParkedWorkQueue, options, new_thread);
1881}
1882
1883/*
1884 * kernel_thread_create:
1885 *
1886 * Create a thread in the kernel task
1887 * to execute in kernel context.
1888 */
1889kern_return_t
1890kernel_thread_create(
1891 thread_continue_t continuation,
1892 void *parameter,
1893 integer_t priority,
1894 thread_t *new_thread)
1895{
1896 kern_return_t result;
1897 thread_t thread;
1898 task_t task = kernel_task;
1899
1900 result = thread_create_internal(parent_task: task, priority, continuation, parameter,
1901 options: TH_OPTION_NONE, out_thread: &thread);
1902 if (result != KERN_SUCCESS) {
1903 return result;
1904 }
1905
1906 task_unlock(task);
1907 lck_mtx_unlock(lck: &tasks_threads_lock);
1908
1909 stack_alloc(thread);
1910 assert(thread->kernel_stack != 0);
1911#if !defined(XNU_TARGET_OS_OSX)
1912 if (priority > BASEPRI_KERNEL)
1913#endif
1914 thread->reserved_stack = thread->kernel_stack;
1915
1916 if (debug_task & 1) {
1917 kprintf(fmt: "kernel_thread_create: thread = %p continuation = %p\n", thread, continuation);
1918 }
1919 *new_thread = thread;
1920
1921 return result;
1922}
1923
1924kern_return_t
1925kernel_thread_start_priority(
1926 thread_continue_t continuation,
1927 void *parameter,
1928 integer_t priority,
1929 thread_t *new_thread)
1930{
1931 kern_return_t result;
1932 thread_t thread;
1933
1934 result = kernel_thread_create(continuation, parameter, priority, new_thread: &thread);
1935 if (result != KERN_SUCCESS) {
1936 return result;
1937 }
1938
1939 *new_thread = thread;
1940
1941 thread_mtx_lock(thread);
1942 thread_start(thread);
1943 thread_mtx_unlock(thread);
1944
1945 return result;
1946}
1947
1948kern_return_t
1949kernel_thread_start(
1950 thread_continue_t continuation,
1951 void *parameter,
1952 thread_t *new_thread)
1953{
1954 return kernel_thread_start_priority(continuation, parameter, priority: -1, new_thread);
1955}
1956
1957/* Separated into helper function so it can be used by THREAD_BASIC_INFO and THREAD_EXTENDED_INFO */
1958/* it is assumed that the thread is locked by the caller */
1959static void
1960retrieve_thread_basic_info(thread_t thread, thread_basic_info_t basic_info)
1961{
1962 int state, flags;
1963
1964 /* fill in info */
1965
1966 thread_read_times(thread, user_time: &basic_info->user_time,
1967 system_time: &basic_info->system_time, NULL);
1968
1969 /*
1970 * Update lazy-evaluated scheduler info because someone wants it.
1971 */
1972 if (SCHED(can_update_priority)(thread)) {
1973 SCHED(update_priority)(thread);
1974 }
1975
1976 basic_info->sleep_time = 0;
1977
1978 /*
1979 * To calculate cpu_usage, first correct for timer rate,
1980 * then for 5/8 ageing. The correction factor [3/5] is
1981 * (1/(5/8) - 1).
1982 */
1983 basic_info->cpu_usage = 0;
1984#if defined(CONFIG_SCHED_TIMESHARE_CORE)
1985 if (sched_tick_interval) {
1986 basic_info->cpu_usage = (integer_t)(((uint64_t)thread->cpu_usage
1987 * TH_USAGE_SCALE) / sched_tick_interval);
1988 basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
1989 }
1990#endif
1991
1992 if (basic_info->cpu_usage > TH_USAGE_SCALE) {
1993 basic_info->cpu_usage = TH_USAGE_SCALE;
1994 }
1995
1996 basic_info->policy = ((thread->sched_mode == TH_MODE_TIMESHARE)?
1997 POLICY_TIMESHARE: POLICY_RR);
1998
1999 flags = 0;
2000 if (thread->options & TH_OPT_IDLE_THREAD) {
2001 flags |= TH_FLAGS_IDLE;
2002 }
2003
2004 if (thread->options & TH_OPT_GLOBAL_FORCED_IDLE) {
2005 flags |= TH_FLAGS_GLOBAL_FORCED_IDLE;
2006 }
2007
2008 if (!thread->kernel_stack) {
2009 flags |= TH_FLAGS_SWAPPED;
2010 }
2011
2012 state = 0;
2013 if (thread->state & TH_TERMINATE) {
2014 state = TH_STATE_HALTED;
2015 } else if (thread->state & TH_RUN) {
2016 state = TH_STATE_RUNNING;
2017 } else if (thread->state & TH_UNINT) {
2018 state = TH_STATE_UNINTERRUPTIBLE;
2019 } else if (thread->state & TH_SUSP) {
2020 state = TH_STATE_STOPPED;
2021 } else if (thread->state & TH_WAIT) {
2022 state = TH_STATE_WAITING;
2023 }
2024
2025 basic_info->run_state = state;
2026 basic_info->flags = flags;
2027
2028 basic_info->suspend_count = thread->user_stop_count;
2029
2030 return;
2031}
2032
2033kern_return_t
2034thread_info_internal(
2035 thread_t thread,
2036 thread_flavor_t flavor,
2037 thread_info_t thread_info_out, /* ptr to OUT array */
2038 mach_msg_type_number_t *thread_info_count) /*IN/OUT*/
2039{
2040 spl_t s;
2041
2042 if (thread == THREAD_NULL) {
2043 return KERN_INVALID_ARGUMENT;
2044 }
2045
2046 if (flavor == THREAD_BASIC_INFO) {
2047 if (*thread_info_count < THREAD_BASIC_INFO_COUNT) {
2048 return KERN_INVALID_ARGUMENT;
2049 }
2050
2051 s = splsched();
2052 thread_lock(thread);
2053
2054 retrieve_thread_basic_info(thread, basic_info: (thread_basic_info_t) thread_info_out);
2055
2056 thread_unlock(thread);
2057 splx(s);
2058
2059 *thread_info_count = THREAD_BASIC_INFO_COUNT;
2060
2061 return KERN_SUCCESS;
2062 } else if (flavor == THREAD_IDENTIFIER_INFO) {
2063 thread_identifier_info_t identifier_info;
2064
2065 if (*thread_info_count < THREAD_IDENTIFIER_INFO_COUNT) {
2066 return KERN_INVALID_ARGUMENT;
2067 }
2068
2069 identifier_info = __IGNORE_WCASTALIGN((thread_identifier_info_t)thread_info_out);
2070
2071 s = splsched();
2072 thread_lock(thread);
2073
2074 identifier_info->thread_id = thread->thread_id;
2075 identifier_info->thread_handle = thread->machine.cthread_self;
2076 identifier_info->dispatch_qaddr = thread_dispatchqaddr(thread);
2077
2078 thread_unlock(thread);
2079 splx(s);
2080 return KERN_SUCCESS;
2081 } else if (flavor == THREAD_SCHED_TIMESHARE_INFO) {
2082 policy_timeshare_info_t ts_info;
2083
2084 if (*thread_info_count < POLICY_TIMESHARE_INFO_COUNT) {
2085 return KERN_INVALID_ARGUMENT;
2086 }
2087
2088 ts_info = (policy_timeshare_info_t)thread_info_out;
2089
2090 s = splsched();
2091 thread_lock(thread);
2092
2093 if (thread->sched_mode != TH_MODE_TIMESHARE) {
2094 thread_unlock(thread);
2095 splx(s);
2096 return KERN_INVALID_POLICY;
2097 }
2098
2099 ts_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
2100 if (ts_info->depressed) {
2101 ts_info->base_priority = DEPRESSPRI;
2102 ts_info->depress_priority = thread->base_pri;
2103 } else {
2104 ts_info->base_priority = thread->base_pri;
2105 ts_info->depress_priority = -1;
2106 }
2107
2108 ts_info->cur_priority = thread->sched_pri;
2109 ts_info->max_priority = thread->max_priority;
2110
2111 thread_unlock(thread);
2112 splx(s);
2113
2114 *thread_info_count = POLICY_TIMESHARE_INFO_COUNT;
2115
2116 return KERN_SUCCESS;
2117 } else if (flavor == THREAD_SCHED_FIFO_INFO) {
2118 if (*thread_info_count < POLICY_FIFO_INFO_COUNT) {
2119 return KERN_INVALID_ARGUMENT;
2120 }
2121
2122 return KERN_INVALID_POLICY;
2123 } else if (flavor == THREAD_SCHED_RR_INFO) {
2124 policy_rr_info_t rr_info;
2125 uint32_t quantum_time;
2126 uint64_t quantum_ns;
2127
2128 if (*thread_info_count < POLICY_RR_INFO_COUNT) {
2129 return KERN_INVALID_ARGUMENT;
2130 }
2131
2132 rr_info = (policy_rr_info_t) thread_info_out;
2133
2134 s = splsched();
2135 thread_lock(thread);
2136
2137 if (thread->sched_mode == TH_MODE_TIMESHARE) {
2138 thread_unlock(thread);
2139 splx(s);
2140
2141 return KERN_INVALID_POLICY;
2142 }
2143
2144 rr_info->depressed = (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != 0;
2145 if (rr_info->depressed) {
2146 rr_info->base_priority = DEPRESSPRI;
2147 rr_info->depress_priority = thread->base_pri;
2148 } else {
2149 rr_info->base_priority = thread->base_pri;
2150 rr_info->depress_priority = -1;
2151 }
2152
2153 quantum_time = SCHED(initial_quantum_size)(THREAD_NULL);
2154 absolutetime_to_nanoseconds(abstime: quantum_time, result: &quantum_ns);
2155
2156 rr_info->max_priority = thread->max_priority;
2157 rr_info->quantum = (uint32_t)(quantum_ns / 1000 / 1000);
2158
2159 thread_unlock(thread);
2160 splx(s);
2161
2162 *thread_info_count = POLICY_RR_INFO_COUNT;
2163
2164 return KERN_SUCCESS;
2165 } else if (flavor == THREAD_EXTENDED_INFO) {
2166 thread_basic_info_data_t basic_info;
2167 thread_extended_info_t extended_info = __IGNORE_WCASTALIGN((thread_extended_info_t)thread_info_out);
2168
2169 if (*thread_info_count < THREAD_EXTENDED_INFO_COUNT) {
2170 return KERN_INVALID_ARGUMENT;
2171 }
2172
2173 s = splsched();
2174 thread_lock(thread);
2175
2176 /* NOTE: This mimics fill_taskthreadinfo(), which is the function used by proc_pidinfo() for
2177 * the PROC_PIDTHREADINFO flavor (which can't be used on corpses)
2178 */
2179 retrieve_thread_basic_info(thread, basic_info: &basic_info);
2180 extended_info->pth_user_time = (((uint64_t)basic_info.user_time.seconds * NSEC_PER_SEC) + ((uint64_t)basic_info.user_time.microseconds * NSEC_PER_USEC));
2181 extended_info->pth_system_time = (((uint64_t)basic_info.system_time.seconds * NSEC_PER_SEC) + ((uint64_t)basic_info.system_time.microseconds * NSEC_PER_USEC));
2182
2183 extended_info->pth_cpu_usage = basic_info.cpu_usage;
2184 extended_info->pth_policy = basic_info.policy;
2185 extended_info->pth_run_state = basic_info.run_state;
2186 extended_info->pth_flags = basic_info.flags;
2187 extended_info->pth_sleep_time = basic_info.sleep_time;
2188 extended_info->pth_curpri = thread->sched_pri;
2189 extended_info->pth_priority = thread->base_pri;
2190 extended_info->pth_maxpriority = thread->max_priority;
2191
2192 bsd_getthreadname(uth: get_bsdthread_info(thread), buffer: extended_info->pth_name);
2193
2194 thread_unlock(thread);
2195 splx(s);
2196
2197 *thread_info_count = THREAD_EXTENDED_INFO_COUNT;
2198
2199 return KERN_SUCCESS;
2200 } else if (flavor == THREAD_DEBUG_INFO_INTERNAL) {
2201#if DEVELOPMENT || DEBUG
2202 thread_debug_info_internal_t dbg_info;
2203 if (*thread_info_count < THREAD_DEBUG_INFO_INTERNAL_COUNT) {
2204 return KERN_NOT_SUPPORTED;
2205 }
2206
2207 if (thread_info_out == NULL) {
2208 return KERN_INVALID_ARGUMENT;
2209 }
2210
2211 dbg_info = __IGNORE_WCASTALIGN((thread_debug_info_internal_t)thread_info_out);
2212 dbg_info->page_creation_count = thread->t_page_creation_count;
2213
2214 *thread_info_count = THREAD_DEBUG_INFO_INTERNAL_COUNT;
2215 return KERN_SUCCESS;
2216#endif /* DEVELOPMENT || DEBUG */
2217 return KERN_NOT_SUPPORTED;
2218 }
2219
2220 return KERN_INVALID_ARGUMENT;
2221}
2222
2223static void
2224_convert_mach_to_time_value(uint64_t time_mach, time_value_t *time)
2225{
2226 clock_sec_t secs;
2227 clock_usec_t usecs;
2228 absolutetime_to_microtime(abstime: time_mach, secs: &secs, microsecs: &usecs);
2229 time->seconds = (typeof(time->seconds))secs;
2230 time->microseconds = usecs;
2231}
2232
2233void
2234thread_read_times(
2235 thread_t thread,
2236 time_value_t *user_time,
2237 time_value_t *system_time,
2238 time_value_t *runnable_time)
2239{
2240 if (user_time && system_time) {
2241 struct recount_times_mach times = recount_thread_times(thread);
2242 _convert_mach_to_time_value(time_mach: times.rtm_user, time: user_time);
2243 _convert_mach_to_time_value(time_mach: times.rtm_system, time: system_time);
2244 }
2245
2246 if (runnable_time) {
2247 uint64_t runnable_time_mach = timer_grab(timer: &thread->runnable_timer);
2248 _convert_mach_to_time_value(time_mach: runnable_time_mach, time: runnable_time);
2249 }
2250}
2251
2252uint64_t
2253thread_get_runtime_self(void)
2254{
2255 /*
2256 * Must be guaranteed to stay on the same CPU and not be updated by the
2257 * scheduler.
2258 */
2259 boolean_t interrupt_state = ml_set_interrupts_enabled(FALSE);
2260 uint64_t time_mach = recount_current_thread_time_mach();
2261 ml_set_interrupts_enabled(enable: interrupt_state);
2262 return time_mach;
2263}
2264
2265/*
2266 * thread_wire_internal:
2267 *
2268 * Specify that the target thread must always be able
2269 * to run and to allocate memory.
2270 */
2271kern_return_t
2272thread_wire_internal(
2273 host_priv_t host_priv,
2274 thread_t thread,
2275 boolean_t wired,
2276 boolean_t *prev_state)
2277{
2278 if (host_priv == NULL || thread != current_thread()) {
2279 return KERN_INVALID_ARGUMENT;
2280 }
2281
2282 if (prev_state) {
2283 *prev_state = (thread->options & TH_OPT_VMPRIV) != 0;
2284 }
2285
2286 if (wired) {
2287 if (!(thread->options & TH_OPT_VMPRIV)) {
2288 vm_page_free_reserve(pages: 1); /* XXX */
2289 }
2290 thread->options |= TH_OPT_VMPRIV;
2291 } else {
2292 if (thread->options & TH_OPT_VMPRIV) {
2293 vm_page_free_reserve(pages: -1); /* XXX */
2294 }
2295 thread->options &= ~TH_OPT_VMPRIV;
2296 }
2297
2298 return KERN_SUCCESS;
2299}
2300
2301
2302/*
2303 * thread_wire:
2304 *
2305 * User-api wrapper for thread_wire_internal()
2306 */
2307kern_return_t
2308thread_wire(
2309 host_priv_t host_priv __unused,
2310 thread_t thread __unused,
2311 boolean_t wired __unused)
2312{
2313 return KERN_NOT_SUPPORTED;
2314}
2315
2316boolean_t
2317is_external_pageout_thread(void)
2318{
2319 return current_thread() == pgo_iothread_external_state.pgo_iothread;
2320}
2321
2322boolean_t
2323is_vm_privileged(void)
2324{
2325 return current_thread()->options & TH_OPT_VMPRIV ? TRUE : FALSE;
2326}
2327
2328boolean_t
2329set_vm_privilege(boolean_t privileged)
2330{
2331 boolean_t was_vmpriv;
2332
2333 if (current_thread()->options & TH_OPT_VMPRIV) {
2334 was_vmpriv = TRUE;
2335 } else {
2336 was_vmpriv = FALSE;
2337 }
2338
2339 if (privileged != FALSE) {
2340 current_thread()->options |= TH_OPT_VMPRIV;
2341 } else {
2342 current_thread()->options &= ~TH_OPT_VMPRIV;
2343 }
2344
2345 return was_vmpriv;
2346}
2347
2348void
2349thread_floor_boost_set_promotion_locked(thread_t thread)
2350{
2351 assert(thread->priority_floor_count > 0);
2352
2353 if (!(thread->sched_flags & TH_SFLAG_FLOOR_PROMOTED)) {
2354 sched_thread_promote_reason(thread, TH_SFLAG_FLOOR_PROMOTED, trace_obj: 0);
2355 }
2356}
2357
2358/*! @function thread_priority_floor_start
2359 * @abstract boost the current thread priority to floor.
2360 * @discussion Increase the priority of the current thread to at least MINPRI_FLOOR.
2361 * The boost will be mantained until a corresponding thread_priority_floor_end()
2362 * is called. Every call of thread_priority_floor_start() needs to have a corresponding
2363 * call to thread_priority_floor_end() from the same thread.
2364 * No thread can return to userspace before calling thread_priority_floor_end().
2365 *
2366 * NOTE: avoid to use this function. Try to use gate_t or sleep_with_inheritor()
2367 * instead.
2368 * @result a token to be given to the corresponding thread_priority_floor_end()
2369 */
2370thread_pri_floor_t
2371thread_priority_floor_start(void)
2372{
2373 thread_pri_floor_t ret;
2374 thread_t thread = current_thread();
2375 __assert_only uint16_t prev_priority_floor_count;
2376
2377 assert(thread->priority_floor_count < UINT16_MAX);
2378 prev_priority_floor_count = thread->priority_floor_count++;
2379#if MACH_ASSERT
2380 /*
2381 * Set the ast to check that the
2382 * priority_floor_count is going to be set to zero when
2383 * going back to userspace.
2384 * Set it only once when we increment it for the first time.
2385 */
2386 if (prev_priority_floor_count == 0) {
2387 act_set_debug_assert();
2388 }
2389#endif
2390
2391 ret.thread = thread;
2392 return ret;
2393}
2394
2395/*! @function thread_priority_floor_end
2396 * @abstract ends the floor boost.
2397 * @param token the token obtained from thread_priority_floor_start()
2398 * @discussion ends the priority floor boost started with thread_priority_floor_start()
2399 */
2400void
2401thread_priority_floor_end(thread_pri_floor_t *token)
2402{
2403 thread_t thread = current_thread();
2404
2405 assert(thread->priority_floor_count > 0);
2406 assertf(token->thread == thread, "thread_priority_floor_end called from a different thread from thread_priority_floor_start %p %p", thread, token->thread);
2407
2408 if ((thread->priority_floor_count-- == 1) && (thread->sched_flags & TH_SFLAG_FLOOR_PROMOTED)) {
2409 spl_t s = splsched();
2410 thread_lock(thread);
2411
2412 if (thread->sched_flags & TH_SFLAG_FLOOR_PROMOTED) {
2413 sched_thread_unpromote_reason(thread, TH_SFLAG_FLOOR_PROMOTED, trace_obj: 0);
2414 }
2415
2416 thread_unlock(thread);
2417 splx(s);
2418 }
2419
2420 token->thread = NULL;
2421}
2422
2423/*
2424 * XXX assuming current thread only, for now...
2425 */
2426void
2427thread_guard_violation(thread_t thread,
2428 mach_exception_data_type_t code, mach_exception_data_type_t subcode, boolean_t fatal)
2429{
2430 assert(thread == current_thread());
2431
2432 /* Don't set up the AST for kernel threads; this check is needed to ensure
2433 * that the guard_exc_* fields in the thread structure are set only by the
2434 * current thread and therefore, don't require a lock.
2435 */
2436 if (get_threadtask(thread) == kernel_task) {
2437 return;
2438 }
2439
2440 assert(EXC_GUARD_DECODE_GUARD_TYPE(code));
2441
2442 /*
2443 * Use the saved state area of the thread structure
2444 * to store all info required to handle the AST when
2445 * returning to userspace. It's possible that there is
2446 * already a pending guard exception. If it's non-fatal,
2447 * it can only be over-written by a fatal exception code.
2448 */
2449 if (thread->guard_exc_info.code && (thread->guard_exc_fatal || !fatal)) {
2450 return;
2451 }
2452
2453 thread->guard_exc_info.code = code;
2454 thread->guard_exc_info.subcode = subcode;
2455 thread->guard_exc_fatal = fatal ? 1 : 0;
2456
2457 spl_t s = splsched();
2458 thread_ast_set(thread, AST_GUARD);
2459 ast_propagate(thread);
2460 splx(s);
2461}
2462
2463#if CONFIG_DEBUG_SYSCALL_REJECTION
2464extern void rejected_syscall_guard_ast(thread_t __unused t, mach_exception_data_type_t code, mach_exception_data_type_t subcode);
2465#endif /* CONFIG_DEBUG_SYSCALL_REJECTION */
2466
2467/*
2468 * guard_ast:
2469 *
2470 * Handle AST_GUARD for a thread. This routine looks at the
2471 * state saved in the thread structure to determine the cause
2472 * of this exception. Based on this value, it invokes the
2473 * appropriate routine which determines other exception related
2474 * info and raises the exception.
2475 */
2476void
2477guard_ast(thread_t t)
2478{
2479 const mach_exception_data_type_t
2480 code = t->guard_exc_info.code,
2481 subcode = t->guard_exc_info.subcode;
2482
2483 t->guard_exc_info.code = 0;
2484 t->guard_exc_info.subcode = 0;
2485 t->guard_exc_fatal = 0;
2486
2487 switch (EXC_GUARD_DECODE_GUARD_TYPE(code)) {
2488 case GUARD_TYPE_NONE:
2489 /* lingering AST_GUARD on the processor? */
2490 break;
2491 case GUARD_TYPE_MACH_PORT:
2492 mach_port_guard_ast(t, code, subcode);
2493 break;
2494 case GUARD_TYPE_FD:
2495 fd_guard_ast(t, code, subcode);
2496 break;
2497#if CONFIG_VNGUARD
2498 case GUARD_TYPE_VN:
2499 vn_guard_ast(t, code, subcode);
2500 break;
2501#endif
2502 case GUARD_TYPE_VIRT_MEMORY:
2503 virt_memory_guard_ast(t, code, subcode);
2504 break;
2505#if CONFIG_DEBUG_SYSCALL_REJECTION
2506 case GUARD_TYPE_REJECTED_SC:
2507 rejected_syscall_guard_ast(t, code, subcode);
2508 break;
2509#endif /* CONFIG_DEBUG_SYSCALL_REJECTION */
2510 default:
2511 panic("guard_exc_info %llx %llx", code, subcode);
2512 }
2513}
2514
2515static void
2516thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1)
2517{
2518 if (warning == LEDGER_WARNING_ROSE_ABOVE) {
2519#if CONFIG_TELEMETRY
2520 /*
2521 * This thread is in danger of violating the CPU usage monitor. Enable telemetry
2522 * on the entire task so there are micro-stackshots available if and when
2523 * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots
2524 * for this thread only; but now that this task is suspect, knowing what all of
2525 * its threads are up to will be useful.
2526 */
2527 telemetry_task_ctl(task: current_task(), TF_CPUMON_WARNING, enable_disable: 1);
2528#endif
2529 return;
2530 }
2531
2532#if CONFIG_TELEMETRY
2533 /*
2534 * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or
2535 * exceeded the limit, turn telemetry off for the task.
2536 */
2537 telemetry_task_ctl(task: current_task(), TF_CPUMON_WARNING, enable_disable: 0);
2538#endif
2539
2540 if (warning == 0) {
2541 SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU();
2542 }
2543}
2544
2545void __attribute__((noinline))
2546SENDING_NOTIFICATION__THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU(void)
2547{
2548 int pid = 0;
2549 task_t task = current_task();
2550 thread_t thread = current_thread();
2551 uint64_t tid = thread->thread_id;
2552 const char *procname = "unknown";
2553 time_value_t thread_total_time = {0, 0};
2554 time_value_t thread_system_time;
2555 time_value_t thread_user_time;
2556 int action;
2557 uint8_t percentage;
2558 uint32_t usage_percent = 0;
2559 uint32_t interval_sec;
2560 uint64_t interval_ns;
2561 uint64_t balance_ns;
2562 boolean_t fatal = FALSE;
2563 boolean_t send_exc_resource = TRUE; /* in addition to RESOURCE_NOTIFY */
2564 kern_return_t kr;
2565
2566#ifdef EXC_RESOURCE_MONITORS
2567 mach_exception_data_type_t code[EXCEPTION_CODE_MAX];
2568#endif /* EXC_RESOURCE_MONITORS */
2569 struct ledger_entry_info lei;
2570
2571 assert(thread->t_threadledger != LEDGER_NULL);
2572
2573 /*
2574 * Extract the fatal bit and suspend the monitor (which clears the bit).
2575 */
2576 task_lock(task);
2577 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) {
2578 fatal = TRUE;
2579 send_exc_resource = TRUE;
2580 }
2581 /* Only one thread can be here at a time. Whichever makes it through
2582 * first will successfully suspend the monitor and proceed to send the
2583 * notification. Other threads will get an error trying to suspend the
2584 * monitor and give up on sending the notification. In the first release,
2585 * the monitor won't be resumed for a number of seconds, but we may
2586 * eventually need to handle low-latency resume.
2587 */
2588 kr = task_suspend_cpumon(task);
2589 task_unlock(task);
2590 if (kr == KERN_INVALID_ARGUMENT) {
2591 return;
2592 }
2593
2594#ifdef MACH_BSD
2595 pid = proc_selfpid();
2596 void *bsd_info = get_bsdtask_info(task);
2597 if (bsd_info != NULL) {
2598 procname = proc_name_address(p: bsd_info);
2599 }
2600#endif
2601
2602 thread_get_cpulimit(action: &action, percentage: &percentage, interval_ns: &interval_ns);
2603
2604 interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC);
2605
2606 thread_read_times(thread, user_time: &thread_user_time, system_time: &thread_system_time, NULL);
2607 time_value_add(&thread_total_time, &thread_user_time);
2608 time_value_add(&thread_total_time, &thread_system_time);
2609 ledger_get_entry_info(ledger: thread->t_threadledger, entry: thread_ledgers.cpu_time, lei: &lei);
2610
2611 /* credit/debit/balance/limit are in absolute time units;
2612 * the refill info is in nanoseconds. */
2613 absolutetime_to_nanoseconds(abstime: lei.lei_balance, result: &balance_ns);
2614 if (lei.lei_last_refill > 0) {
2615 usage_percent = (uint32_t)((balance_ns * 100ULL) / lei.lei_last_refill);
2616 }
2617
2618 /* TODO: show task total runtime (via TASK_ABSOLUTETIME_INFO)? */
2619 printf(format: "process %s[%d] thread %llu caught burning CPU! It used more than %d%% CPU over %u seconds\n",
2620 procname, pid, tid, percentage, interval_sec);
2621 printf(format: " (actual recent usage: %d%% over ~%llu seconds)\n",
2622 usage_percent, (lei.lei_last_refill + NSEC_PER_SEC / 2) / NSEC_PER_SEC);
2623 printf(format: " Thread lifetime cpu usage %d.%06ds, (%d.%06d user, %d.%06d sys)\n",
2624 thread_total_time.seconds, thread_total_time.microseconds,
2625 thread_user_time.seconds, thread_user_time.microseconds,
2626 thread_system_time.seconds, thread_system_time.microseconds);
2627 printf(format: " Ledger balance: %lld; mabs credit: %lld; mabs debit: %lld\n",
2628 lei.lei_balance, lei.lei_credit, lei.lei_debit);
2629 printf(format: " mabs limit: %llu; mabs period: %llu ns; last refill: %llu ns%s.\n",
2630 lei.lei_limit, lei.lei_refill_period, lei.lei_last_refill,
2631 (fatal ? " [fatal violation]" : ""));
2632
2633 /*
2634 * For now, send RESOURCE_NOTIFY in parallel with EXC_RESOURCE. Once
2635 * we have logging parity, we will stop sending EXC_RESOURCE (24508922).
2636 */
2637
2638 /* RESOURCE_NOTIFY MIG specifies nanoseconds of CPU time */
2639 lei.lei_balance = balance_ns;
2640 absolutetime_to_nanoseconds(abstime: lei.lei_limit, result: &lei.lei_limit);
2641 trace_resource_violation(RMON_CPUUSAGE_VIOLATED, ledger_info: &lei);
2642 kr = send_resource_violation(send_cpu_usage_violation, violator: task, ledger_info: &lei,
2643 flags: fatal ? kRNFatalLimitFlag : 0);
2644 if (kr) {
2645 printf(format: "send_resource_violation(CPU usage, ...): error %#x\n", kr);
2646 }
2647
2648#ifdef EXC_RESOURCE_MONITORS
2649 if (send_exc_resource) {
2650 if (disable_exc_resource) {
2651 printf("process %s[%d] thread %llu caught burning CPU! "
2652 "EXC_RESOURCE%s suppressed by a boot-arg\n",
2653 procname, pid, tid, fatal ? " (and termination)" : "");
2654 return;
2655 }
2656
2657 if (disable_exc_resource_during_audio && audio_active) {
2658 printf("process %s[%d] thread %llu caught burning CPU! "
2659 "EXC_RESOURCE & termination suppressed due to audio playback\n",
2660 procname, pid, tid);
2661 return;
2662 }
2663 }
2664
2665
2666 if (send_exc_resource) {
2667 code[0] = code[1] = 0;
2668 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU);
2669 if (fatal) {
2670 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR_FATAL);
2671 } else {
2672 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR);
2673 }
2674 EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec);
2675 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], percentage);
2676 EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent);
2677 exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX);
2678 }
2679#endif /* EXC_RESOURCE_MONITORS */
2680
2681 if (fatal) {
2682#if CONFIG_JETSAM
2683 jetsam_on_ledger_cpulimit_exceeded();
2684#else
2685 task_terminate_internal(task);
2686#endif
2687 }
2688}
2689
2690bool os_variant_has_internal_diagnostics(const char *subsystem);
2691
2692#if DEVELOPMENT || DEBUG
2693
2694void __attribute__((noinline))
2695SENDING_NOTIFICATION__TASK_HAS_TOO_MANY_THREADS(task_t task, int thread_count)
2696{
2697 mach_exception_data_type_t code[EXCEPTION_CODE_MAX] = {0};
2698 int pid = task_pid(task);
2699 char procname[MAXCOMLEN + 1] = "unknown";
2700
2701 if (pid == 1) {
2702 /*
2703 * Cannot suspend launchd
2704 */
2705 return;
2706 }
2707
2708 proc_name(pid, procname, sizeof(procname));
2709
2710 /*
2711 * Skip all checks for testing when exc_resource_threads_enabled is overriden
2712 */
2713 if (exc_resource_threads_enabled == 2) {
2714 goto skip_checks;
2715 }
2716
2717 if (disable_exc_resource) {
2718 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2719 "suppressed by a boot-arg.\n", procname, pid, thread_count);
2720 return;
2721 }
2722
2723 if (!os_variant_has_internal_diagnostics("com.apple.xnu")) {
2724 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2725 "suppressed, internal diagnostics disabled.\n", procname, pid, thread_count);
2726 return;
2727 }
2728
2729 if (disable_exc_resource_during_audio && audio_active) {
2730 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2731 "suppressed due to audio playback.\n", procname, pid, thread_count);
2732 return;
2733 }
2734
2735 if (!exc_via_corpse_forking) {
2736 printf("process %s[%d] crossed thread count high watermark (%d), EXC_RESOURCE "
2737 "suppressed due to corpse forking being disabled.\n", procname, pid,
2738 thread_count);
2739 return;
2740 }
2741
2742skip_checks:
2743 printf("process %s[%d] crossed thread count high watermark (%d), sending "
2744 "EXC_RESOURCE\n", procname, pid, thread_count);
2745
2746 EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_THREADS);
2747 EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_THREADS_HIGH_WATERMARK);
2748 EXC_RESOURCE_THREADS_ENCODE_THREADS(code[0], thread_count);
2749
2750 task_enqueue_exception_with_corpse(task, EXC_RESOURCE, code, EXCEPTION_CODE_MAX, NULL, FALSE);
2751}
2752#endif /* DEVELOPMENT || DEBUG */
2753
2754void
2755thread_update_io_stats(thread_t thread, int size, int io_flags)
2756{
2757 task_t task = get_threadtask(thread);
2758 int io_tier;
2759
2760 if (thread->thread_io_stats == NULL || task->task_io_stats == NULL) {
2761 return;
2762 }
2763
2764 if (io_flags & DKIO_READ) {
2765 UPDATE_IO_STATS(thread->thread_io_stats->disk_reads, size);
2766 UPDATE_IO_STATS_ATOMIC(task->task_io_stats->disk_reads, size);
2767 }
2768
2769 if (io_flags & DKIO_META) {
2770 UPDATE_IO_STATS(thread->thread_io_stats->metadata, size);
2771 UPDATE_IO_STATS_ATOMIC(task->task_io_stats->metadata, size);
2772 }
2773
2774 if (io_flags & DKIO_PAGING) {
2775 UPDATE_IO_STATS(thread->thread_io_stats->paging, size);
2776 UPDATE_IO_STATS_ATOMIC(task->task_io_stats->paging, size);
2777 }
2778
2779 io_tier = ((io_flags & DKIO_TIER_MASK) >> DKIO_TIER_SHIFT);
2780 assert(io_tier < IO_NUM_PRIORITIES);
2781
2782 UPDATE_IO_STATS(thread->thread_io_stats->io_priority[io_tier], size);
2783 UPDATE_IO_STATS_ATOMIC(task->task_io_stats->io_priority[io_tier], size);
2784
2785 /* Update Total I/O Counts */
2786 UPDATE_IO_STATS(thread->thread_io_stats->total_io, size);
2787 UPDATE_IO_STATS_ATOMIC(task->task_io_stats->total_io, size);
2788
2789 if (!(io_flags & DKIO_READ)) {
2790 DTRACE_IO3(physical_writes, struct task *, task, uint32_t, size, int, io_flags);
2791 ledger_credit(ledger: task->ledger, entry: task_ledgers.physical_writes, amount: size);
2792 }
2793}
2794
2795static void
2796init_thread_ledgers(void)
2797{
2798 ledger_template_t t;
2799 int idx;
2800
2801 assert(thread_ledger_template == NULL);
2802
2803 if ((t = ledger_template_create(name: "Per-thread ledger")) == NULL) {
2804 panic("couldn't create thread ledger template");
2805 }
2806
2807 if ((idx = ledger_entry_add(template: t, key: "cpu_time", group: "sched", units: "ns")) < 0) {
2808 panic("couldn't create cpu_time entry for thread ledger template");
2809 }
2810
2811 if (ledger_set_callback(template: t, entry: idx, callback: thread_cputime_callback, NULL, NULL) < 0) {
2812 panic("couldn't set thread ledger callback for cpu_time entry");
2813 }
2814
2815 thread_ledgers.cpu_time = idx;
2816
2817 ledger_template_complete(template: t);
2818 thread_ledger_template = t;
2819}
2820
2821/*
2822 * Returns the amount of (abs) CPU time that remains before the limit would be
2823 * hit or the amount of time left in the current interval, whichever is smaller.
2824 * This value changes as CPU time is consumed and the ledgers refilled.
2825 * Used to limit the quantum of a thread.
2826 */
2827uint64_t
2828thread_cpulimit_remaining(uint64_t now)
2829{
2830 thread_t thread = current_thread();
2831
2832 if ((thread->options &
2833 (TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT)) == 0) {
2834 return UINT64_MAX;
2835 }
2836
2837 /* Amount of time left in the current interval. */
2838 const uint64_t interval_remaining =
2839 ledger_get_interval_remaining(ledger: thread->t_threadledger, entry: thread_ledgers.cpu_time, now);
2840
2841 /* Amount that can be spent until the limit is hit. */
2842 const uint64_t remaining =
2843 ledger_get_remaining(ledger: thread->t_threadledger, entry: thread_ledgers.cpu_time);
2844
2845 return MIN(interval_remaining, remaining);
2846}
2847
2848/*
2849 * Returns true if a new interval should be started.
2850 */
2851bool
2852thread_cpulimit_interval_has_expired(uint64_t now)
2853{
2854 thread_t thread = current_thread();
2855
2856 if ((thread->options &
2857 (TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT)) == 0) {
2858 return false;
2859 }
2860
2861 return ledger_get_interval_remaining(ledger: thread->t_threadledger,
2862 entry: thread_ledgers.cpu_time, now) == 0;
2863}
2864
2865/*
2866 * Balances the ledger and sets the last refill time to `now`.
2867 */
2868void
2869thread_cpulimit_restart(uint64_t now)
2870{
2871 thread_t thread = current_thread();
2872
2873 assert3u(thread->options & (TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT), !=, 0);
2874
2875 ledger_restart(ledger: thread->t_threadledger, entry: thread_ledgers.cpu_time, now);
2876}
2877
2878/*
2879 * Returns currently applied CPU usage limit, or 0/0 if none is applied.
2880 */
2881int
2882thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns)
2883{
2884 int64_t abstime = 0;
2885 uint64_t limittime = 0;
2886 thread_t thread = current_thread();
2887
2888 *percentage = 0;
2889 *interval_ns = 0;
2890 *action = 0;
2891
2892 if (thread->t_threadledger == LEDGER_NULL) {
2893 /*
2894 * This thread has no per-thread ledger, so it can't possibly
2895 * have a CPU limit applied.
2896 */
2897 return KERN_SUCCESS;
2898 }
2899
2900 ledger_get_period(ledger: thread->t_threadledger, entry: thread_ledgers.cpu_time, period: interval_ns);
2901 ledger_get_limit(ledger: thread->t_threadledger, entry: thread_ledgers.cpu_time, limit: &abstime);
2902
2903 if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) {
2904 /*
2905 * This thread's CPU time ledger has no period or limit; so it
2906 * doesn't have a CPU limit applied.
2907 */
2908 return KERN_SUCCESS;
2909 }
2910
2911 /*
2912 * This calculation is the converse to the one in thread_set_cpulimit().
2913 */
2914 absolutetime_to_nanoseconds(abstime, result: &limittime);
2915 *percentage = (uint8_t)((limittime * 100ULL) / *interval_ns);
2916 assert(*percentage <= 100);
2917
2918 if (thread->options & TH_OPT_PROC_CPULIMIT) {
2919 assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0);
2920
2921 *action = THREAD_CPULIMIT_BLOCK;
2922 } else if (thread->options & TH_OPT_PRVT_CPULIMIT) {
2923 assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0);
2924
2925 *action = THREAD_CPULIMIT_EXCEPTION;
2926 } else {
2927 *action = THREAD_CPULIMIT_DISABLE;
2928 }
2929
2930 return KERN_SUCCESS;
2931}
2932
2933/*
2934 * Set CPU usage limit on a thread.
2935 */
2936int
2937thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns)
2938{
2939 thread_t thread = current_thread();
2940 ledger_t l;
2941 uint64_t limittime = 0;
2942 uint64_t abstime = 0;
2943
2944 assert(percentage <= 100);
2945 assert(percentage > 0 || action == THREAD_CPULIMIT_DISABLE);
2946
2947 /*
2948 * Disallow any change to the CPU limit if the TH_OPT_FORCED_LEDGER
2949 * flag is set.
2950 */
2951 if ((thread->options & TH_OPT_FORCED_LEDGER) != 0) {
2952 return KERN_FAILURE;
2953 }
2954
2955 if (action == THREAD_CPULIMIT_DISABLE) {
2956 /*
2957 * Remove CPU limit, if any exists.
2958 */
2959 if (thread->t_threadledger != LEDGER_NULL) {
2960 l = thread->t_threadledger;
2961 ledger_set_limit(ledger: l, entry: thread_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, warn_level_percentage: 0);
2962 ledger_set_action(ledger: l, entry: thread_ledgers.cpu_time, LEDGER_ACTION_IGNORE);
2963 thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT);
2964 }
2965
2966 return 0;
2967 }
2968
2969 if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) {
2970 return KERN_INVALID_ARGUMENT;
2971 }
2972
2973 l = thread->t_threadledger;
2974 if (l == LEDGER_NULL) {
2975 /*
2976 * This thread doesn't yet have a per-thread ledger; so create one with the CPU time entry active.
2977 */
2978 if ((l = ledger_instantiate(template: thread_ledger_template, LEDGER_CREATE_INACTIVE_ENTRIES)) == LEDGER_NULL) {
2979 return KERN_RESOURCE_SHORTAGE;
2980 }
2981
2982 /*
2983 * We are the first to create this thread's ledger, so only activate our entry.
2984 */
2985 ledger_entry_setactive(ledger: l, entry: thread_ledgers.cpu_time);
2986 thread->t_threadledger = l;
2987 }
2988
2989 /*
2990 * The limit is specified as a percentage of CPU over an interval in nanoseconds.
2991 * Calculate the amount of CPU time that the thread needs to consume in order to hit the limit.
2992 */
2993 limittime = (interval_ns * percentage) / 100;
2994 nanoseconds_to_absolutetime(nanoseconds: limittime, result: &abstime);
2995 ledger_set_limit(ledger: l, entry: thread_ledgers.cpu_time, limit: abstime, warn_level_percentage: cpumon_ustackshots_trigger_pct);
2996 /*
2997 * Refill the thread's allotted CPU time every interval_ns nanoseconds.
2998 */
2999 ledger_set_period(ledger: l, entry: thread_ledgers.cpu_time, period: interval_ns);
3000
3001 if (action == THREAD_CPULIMIT_EXCEPTION) {
3002 /*
3003 * We don't support programming the CPU usage monitor on a task if any of its
3004 * threads have a per-thread blocking CPU limit configured.
3005 */
3006 if (thread->options & TH_OPT_PRVT_CPULIMIT) {
3007 panic("CPU usage monitor activated, but blocking thread limit exists");
3008 }
3009
3010 /*
3011 * Make a note that this thread's CPU limit is being used for the task-wide CPU
3012 * usage monitor. We don't have to arm the callback which will trigger the
3013 * exception, because that was done for us in ledger_instantiate (because the
3014 * ledger template used has a default callback).
3015 */
3016 thread->options |= TH_OPT_PROC_CPULIMIT;
3017 } else {
3018 /*
3019 * We deliberately override any CPU limit imposed by a task-wide limit (eg
3020 * CPU usage monitor).
3021 */
3022 thread->options &= ~TH_OPT_PROC_CPULIMIT;
3023
3024 thread->options |= TH_OPT_PRVT_CPULIMIT;
3025 /* The per-thread ledger template by default has a callback for CPU time */
3026 ledger_disable_callback(ledger: l, entry: thread_ledgers.cpu_time);
3027 ledger_set_action(ledger: l, entry: thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
3028 }
3029
3030 return 0;
3031}
3032
3033void
3034thread_sched_call(
3035 thread_t thread,
3036 sched_call_t call)
3037{
3038 assert((thread->state & TH_WAIT_REPORT) == 0);
3039 thread->sched_call = call;
3040}
3041
3042uint64_t
3043thread_tid(
3044 thread_t thread)
3045{
3046 return thread != THREAD_NULL? thread->thread_id: 0;
3047}
3048
3049uint64_t
3050uthread_tid(
3051 struct uthread *uth)
3052{
3053 if (uth) {
3054 return thread_tid(thread: get_machthread(uth));
3055 }
3056 return 0;
3057}
3058
3059uint16_t
3060thread_set_tag(thread_t th, uint16_t tag)
3061{
3062 return thread_set_tag_internal(thread: th, tag);
3063}
3064
3065uint16_t
3066thread_get_tag(thread_t th)
3067{
3068 return thread_get_tag_internal(thread: th);
3069}
3070
3071uint64_t
3072thread_last_run_time(thread_t th)
3073{
3074 return th->last_run_time;
3075}
3076
3077/*
3078 * Shared resource contention management
3079 *
3080 * The scheduler attempts to load balance the shared resource intensive
3081 * workloads across clusters to ensure that the resource is not heavily
3082 * contended. The kernel relies on external agents (userspace or
3083 * performance controller) to identify shared resource heavy threads.
3084 * The load balancing is achieved based on the scheduler configuration
3085 * enabled on the platform.
3086 */
3087
3088
3089#if CONFIG_SCHED_EDGE
3090
3091/*
3092 * On the Edge scheduler, the load balancing is achieved by looking
3093 * at cluster level shared resource loads and migrating resource heavy
3094 * threads dynamically to under utilized cluster. Therefore, when a
3095 * thread is indicated as a resource heavy thread, the policy set
3096 * routine simply adds a flag to the thread which is looked at by
3097 * the scheduler on thread migration decisions.
3098 */
3099
3100boolean_t
3101thread_shared_rsrc_policy_get(thread_t thread, cluster_shared_rsrc_type_t type)
3102{
3103 return thread->th_shared_rsrc_heavy_user[type] || thread->th_shared_rsrc_heavy_perf_control[type];
3104}
3105
3106__options_decl(sched_edge_rsrc_heavy_thread_state, uint32_t, {
3107 SCHED_EDGE_RSRC_HEAVY_THREAD_SET = 1,
3108 SCHED_EDGE_RSRC_HEAVY_THREAD_CLR = 2,
3109});
3110
3111kern_return_t
3112thread_shared_rsrc_policy_set(thread_t thread, __unused uint32_t index, cluster_shared_rsrc_type_t type, shared_rsrc_policy_agent_t agent)
3113{
3114 spl_t s = splsched();
3115 thread_lock(thread);
3116
3117 bool user = (agent == SHARED_RSRC_POLICY_AGENT_DISPATCH) || (agent == SHARED_RSRC_POLICY_AGENT_SYSCTL);
3118 bool *thread_flags = (user) ? thread->th_shared_rsrc_heavy_user : thread->th_shared_rsrc_heavy_perf_control;
3119 if (thread_flags[type]) {
3120 thread_unlock(thread);
3121 splx(s);
3122 return KERN_FAILURE;
3123 }
3124
3125 thread_flags[type] = true;
3126 thread_unlock(thread);
3127 splx(s);
3128
3129 KDBG(MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_EDGE_RSRC_HEAVY_THREAD) | DBG_FUNC_NONE, SCHED_EDGE_RSRC_HEAVY_THREAD_SET, thread_tid(thread), type, agent);
3130 if (thread == current_thread()) {
3131 if (agent == SHARED_RSRC_POLICY_AGENT_PERFCTL_QUANTUM) {
3132 ast_on(AST_PREEMPT);
3133 } else {
3134 assert(agent != SHARED_RSRC_POLICY_AGENT_PERFCTL_CSW);
3135 thread_block(THREAD_CONTINUE_NULL);
3136 }
3137 }
3138 return KERN_SUCCESS;
3139}
3140
3141kern_return_t
3142thread_shared_rsrc_policy_clear(thread_t thread, cluster_shared_rsrc_type_t type, shared_rsrc_policy_agent_t agent)
3143{
3144 spl_t s = splsched();
3145 thread_lock(thread);
3146
3147 bool user = (agent == SHARED_RSRC_POLICY_AGENT_DISPATCH) || (agent == SHARED_RSRC_POLICY_AGENT_SYSCTL);
3148 bool *thread_flags = (user) ? thread->th_shared_rsrc_heavy_user : thread->th_shared_rsrc_heavy_perf_control;
3149 if (!thread_flags[type]) {
3150 thread_unlock(thread);
3151 splx(s);
3152 return KERN_FAILURE;
3153 }
3154
3155 thread_flags[type] = false;
3156 thread_unlock(thread);
3157 splx(s);
3158
3159 KDBG(MACHDBG_CODE(DBG_MACH_SCHED_CLUTCH, MACH_SCHED_EDGE_RSRC_HEAVY_THREAD) | DBG_FUNC_NONE, SCHED_EDGE_RSRC_HEAVY_THREAD_CLR, thread_tid(thread), type, agent);
3160 if (thread == current_thread()) {
3161 if (agent == SHARED_RSRC_POLICY_AGENT_PERFCTL_QUANTUM) {
3162 ast_on(AST_PREEMPT);
3163 } else {
3164 assert(agent != SHARED_RSRC_POLICY_AGENT_PERFCTL_CSW);
3165 thread_block(THREAD_CONTINUE_NULL);
3166 }
3167 }
3168 return KERN_SUCCESS;
3169}
3170
3171#else /* CONFIG_SCHED_EDGE */
3172
3173/*
3174 * On non-Edge schedulers, the shared resource contention
3175 * is managed by simply binding threads to specific clusters
3176 * based on the worker index passed by the agents marking
3177 * this thread as resource heavy threads. The thread binding
3178 * approach does not provide any rebalancing opportunities;
3179 * it can also suffer from scheduling delays if the cluster
3180 * where the thread is bound is contended.
3181 */
3182
3183boolean_t
3184thread_shared_rsrc_policy_get(__unused thread_t thread, __unused cluster_shared_rsrc_type_t type)
3185{
3186 return false;
3187}
3188
3189kern_return_t
3190thread_shared_rsrc_policy_set(thread_t thread, uint32_t index, __unused cluster_shared_rsrc_type_t type, __unused shared_rsrc_policy_agent_t agent)
3191{
3192 return thread_bind_cluster_id(thread, cluster_id: index, options: THREAD_BIND_SOFT | THREAD_BIND_ELIGIBLE_ONLY);
3193}
3194
3195kern_return_t
3196thread_shared_rsrc_policy_clear(thread_t thread, __unused cluster_shared_rsrc_type_t type, __unused shared_rsrc_policy_agent_t agent)
3197{
3198 return thread_bind_cluster_id(thread, cluster_id: 0, options: THREAD_UNBIND);
3199}
3200
3201#endif /* CONFIG_SCHED_EDGE */
3202
3203uint64_t
3204thread_dispatchqaddr(
3205 thread_t thread)
3206{
3207 uint64_t dispatchqueue_addr;
3208 uint64_t thread_handle;
3209 task_t task;
3210
3211 if (thread == THREAD_NULL) {
3212 return 0;
3213 }
3214
3215 thread_handle = thread->machine.cthread_self;
3216 if (thread_handle == 0) {
3217 return 0;
3218 }
3219
3220 task = get_threadtask(thread);
3221 void *bsd_info = get_bsdtask_info(task);
3222 if (thread->inspection == TRUE) {
3223 dispatchqueue_addr = thread_handle + get_task_dispatchqueue_offset(task);
3224 } else if (bsd_info) {
3225 dispatchqueue_addr = thread_handle + get_dispatchqueue_offset_from_proc(bsd_info);
3226 } else {
3227 dispatchqueue_addr = 0;
3228 }
3229
3230 return dispatchqueue_addr;
3231}
3232
3233
3234uint64_t
3235thread_wqquantum_addr(thread_t thread)
3236{
3237 uint64_t thread_handle;
3238 task_t task;
3239
3240 if (thread == THREAD_NULL) {
3241 return 0;
3242 }
3243
3244 thread_handle = thread->machine.cthread_self;
3245 if (thread_handle == 0) {
3246 return 0;
3247 }
3248 task = get_threadtask(thread);
3249
3250 uint64_t wq_quantum_expiry_offset = get_wq_quantum_offset_from_proc(get_bsdtask_info(task));
3251 if (wq_quantum_expiry_offset == 0) {
3252 return 0;
3253 }
3254
3255 return wq_quantum_expiry_offset + thread_handle;
3256}
3257
3258uint64_t
3259thread_rettokern_addr(
3260 thread_t thread)
3261{
3262 uint64_t rettokern_addr;
3263 uint64_t rettokern_offset;
3264 uint64_t thread_handle;
3265 task_t task;
3266 void *bsd_info;
3267
3268 if (thread == THREAD_NULL) {
3269 return 0;
3270 }
3271
3272 thread_handle = thread->machine.cthread_self;
3273 if (thread_handle == 0) {
3274 return 0;
3275 }
3276 task = get_threadtask(thread);
3277 bsd_info = get_bsdtask_info(task);
3278
3279 if (bsd_info) {
3280 rettokern_offset = get_return_to_kernel_offset_from_proc(p: bsd_info);
3281
3282 /* Return 0 if return to kernel offset is not initialized. */
3283 if (rettokern_offset == 0) {
3284 rettokern_addr = 0;
3285 } else {
3286 rettokern_addr = thread_handle + rettokern_offset;
3287 }
3288 } else {
3289 rettokern_addr = 0;
3290 }
3291
3292 return rettokern_addr;
3293}
3294
3295/*
3296 * Export routines to other components for things that are done as macros
3297 * within the osfmk component.
3298 */
3299
3300void
3301thread_mtx_lock(thread_t thread)
3302{
3303 lck_mtx_lock(lck: &thread->mutex);
3304}
3305
3306void
3307thread_mtx_unlock(thread_t thread)
3308{
3309 lck_mtx_unlock(lck: &thread->mutex);
3310}
3311
3312void
3313thread_reference(
3314 thread_t thread)
3315{
3316 if (thread != THREAD_NULL) {
3317 zone_id_require(zone_id: ZONE_ID_THREAD, elem_size: sizeof(struct thread), addr: thread);
3318 os_ref_retain_raw(&thread->ref_count, &thread_refgrp);
3319 }
3320}
3321
3322void
3323thread_require(thread_t thread)
3324{
3325 zone_id_require(zone_id: ZONE_ID_THREAD, elem_size: sizeof(struct thread), addr: thread);
3326}
3327
3328#undef thread_should_halt
3329
3330boolean_t
3331thread_should_halt(
3332 thread_t th)
3333{
3334 return thread_should_halt_fast(th);
3335}
3336
3337/*
3338 * thread_set_voucher_name - reset the voucher port name bound to this thread
3339 *
3340 * Conditions: nothing locked
3341 */
3342
3343kern_return_t
3344thread_set_voucher_name(mach_port_name_t voucher_name)
3345{
3346 thread_t thread = current_thread();
3347 ipc_voucher_t new_voucher = IPC_VOUCHER_NULL;
3348 ipc_voucher_t voucher;
3349 ledger_t bankledger = NULL;
3350 struct thread_group *banktg = NULL;
3351 uint32_t persona_id = 0;
3352
3353 if (MACH_PORT_DEAD == voucher_name) {
3354 return KERN_INVALID_RIGHT;
3355 }
3356
3357 /*
3358 * agressively convert to voucher reference
3359 */
3360 if (MACH_PORT_VALID(voucher_name)) {
3361 new_voucher = convert_port_name_to_voucher(name: voucher_name);
3362 if (IPC_VOUCHER_NULL == new_voucher) {
3363 return KERN_INVALID_ARGUMENT;
3364 }
3365 }
3366 bank_get_bank_ledger_thread_group_and_persona(voucher: new_voucher, bankledger: &bankledger, banktg: &banktg, persona_id: &persona_id);
3367
3368 thread_mtx_lock(thread);
3369 voucher = thread->ith_voucher;
3370 thread->ith_voucher_name = voucher_name;
3371 thread->ith_voucher = new_voucher;
3372 thread_mtx_unlock(thread);
3373
3374 bank_swap_thread_bank_ledger(thread, ledger: bankledger);
3375#if CONFIG_THREAD_GROUPS
3376 thread_group_set_bank(t: thread, tg: banktg);
3377#endif /* CONFIG_THREAD_GROUPS */
3378
3379 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3380 MACHDBG_CODE(DBG_MACH_IPC, MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
3381 (uintptr_t)thread_tid(thread),
3382 (uintptr_t)voucher_name,
3383 VM_KERNEL_ADDRPERM((uintptr_t)new_voucher),
3384 persona_id, 0);
3385
3386 if (IPC_VOUCHER_NULL != voucher) {
3387 ipc_voucher_release(voucher);
3388 }
3389
3390 return KERN_SUCCESS;
3391}
3392
3393/*
3394 * thread_get_mach_voucher - return a voucher reference for the specified thread voucher
3395 *
3396 * Conditions: nothing locked
3397 *
3398 * NOTE: At the moment, there is no distinction between the current and effective
3399 * vouchers because we only set them at the thread level currently.
3400 */
3401kern_return_t
3402thread_get_mach_voucher(
3403 thread_act_t thread,
3404 mach_voucher_selector_t __unused which,
3405 ipc_voucher_t *voucherp)
3406{
3407 ipc_voucher_t voucher;
3408
3409 if (THREAD_NULL == thread) {
3410 return KERN_INVALID_ARGUMENT;
3411 }
3412
3413 thread_mtx_lock(thread);
3414 voucher = thread->ith_voucher;
3415
3416 if (IPC_VOUCHER_NULL != voucher) {
3417 ipc_voucher_reference(voucher);
3418 thread_mtx_unlock(thread);
3419 *voucherp = voucher;
3420 return KERN_SUCCESS;
3421 }
3422
3423 thread_mtx_unlock(thread);
3424
3425 *voucherp = IPC_VOUCHER_NULL;
3426 return KERN_SUCCESS;
3427}
3428
3429/*
3430 * thread_set_mach_voucher - set a voucher reference for the specified thread voucher
3431 *
3432 * Conditions: callers holds a reference on the voucher.
3433 * nothing locked.
3434 *
3435 * We grab another reference to the voucher and bind it to the thread.
3436 * The old voucher reference associated with the thread is
3437 * discarded.
3438 */
3439kern_return_t
3440thread_set_mach_voucher(
3441 thread_t thread,
3442 ipc_voucher_t voucher)
3443{
3444 ipc_voucher_t old_voucher;
3445 ledger_t bankledger = NULL;
3446 struct thread_group *banktg = NULL;
3447 uint32_t persona_id = 0;
3448
3449 if (THREAD_NULL == thread) {
3450 return KERN_INVALID_ARGUMENT;
3451 }
3452
3453 bank_get_bank_ledger_thread_group_and_persona(voucher, bankledger: &bankledger, banktg: &banktg, persona_id: &persona_id);
3454
3455 thread_mtx_lock(thread);
3456 /*
3457 * Once the thread is started, we will look at `ith_voucher` without
3458 * holding any lock.
3459 *
3460 * Setting the voucher hence can only be done by current_thread() or
3461 * before it started. "started" flips under the thread mutex and must be
3462 * tested under it too.
3463 */
3464 if (thread != current_thread() && thread->started) {
3465 thread_mtx_unlock(thread);
3466 return KERN_INVALID_ARGUMENT;
3467 }
3468
3469 ipc_voucher_reference(voucher);
3470 old_voucher = thread->ith_voucher;
3471 thread->ith_voucher = voucher;
3472 thread->ith_voucher_name = MACH_PORT_NULL;
3473 thread_mtx_unlock(thread);
3474
3475 bank_swap_thread_bank_ledger(thread, ledger: bankledger);
3476#if CONFIG_THREAD_GROUPS
3477 thread_group_set_bank(t: thread, tg: banktg);
3478#endif /* CONFIG_THREAD_GROUPS */
3479
3480 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3481 MACHDBG_CODE(DBG_MACH_IPC, MACH_THREAD_SET_VOUCHER) | DBG_FUNC_NONE,
3482 (uintptr_t)thread_tid(thread),
3483 (uintptr_t)MACH_PORT_NULL,
3484 VM_KERNEL_ADDRPERM((uintptr_t)voucher),
3485 persona_id, 0);
3486
3487 ipc_voucher_release(voucher: old_voucher);
3488
3489 return KERN_SUCCESS;
3490}
3491
3492/*
3493 * thread_swap_mach_voucher - swap a voucher reference for the specified thread voucher
3494 *
3495 * Conditions: callers holds a reference on the new and presumed old voucher(s).
3496 * nothing locked.
3497 *
3498 * This function is no longer supported.
3499 */
3500kern_return_t
3501thread_swap_mach_voucher(
3502 __unused thread_t thread,
3503 __unused ipc_voucher_t new_voucher,
3504 ipc_voucher_t *in_out_old_voucher)
3505{
3506 /*
3507 * Currently this function is only called from a MIG generated
3508 * routine which doesn't release the reference on the voucher
3509 * addressed by in_out_old_voucher. To avoid leaking this reference,
3510 * a call to release it has been added here.
3511 */
3512 ipc_voucher_release(voucher: *in_out_old_voucher);
3513 OS_ANALYZER_SUPPRESS("81787115") return KERN_NOT_SUPPORTED;
3514}
3515
3516/*
3517 * thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher.
3518 */
3519kern_return_t
3520thread_get_current_voucher_origin_pid(
3521 int32_t *pid)
3522{
3523 return thread_get_voucher_origin_pid(thread: current_thread(), pid);
3524}
3525
3526/*
3527 * thread_get_current_voucher_origin_pid - get the pid of the originator of the current voucher.
3528 */
3529kern_return_t
3530thread_get_voucher_origin_pid(thread_t thread, int32_t *pid)
3531{
3532 uint32_t buf_size = sizeof(*pid);
3533 return mach_voucher_attr_command(voucher: thread->ith_voucher,
3534 MACH_VOUCHER_ATTR_KEY_BANK,
3535 BANK_ORIGINATOR_PID,
3536 NULL,
3537 in_contentCnt: 0,
3538 out_content: (mach_voucher_attr_content_t)pid,
3539 out_contentCnt: &buf_size);
3540}
3541
3542/*
3543 * thread_get_current_voucher_proximate_pid - get the pid of the proximate process of the current voucher.
3544 */
3545kern_return_t
3546thread_get_voucher_origin_proximate_pid(thread_t thread, int32_t *origin_pid, int32_t *proximate_pid)
3547{
3548 int32_t origin_proximate_pids[2] = { };
3549 uint32_t buf_size = sizeof(origin_proximate_pids);
3550 kern_return_t kr = mach_voucher_attr_command(voucher: thread->ith_voucher,
3551 MACH_VOUCHER_ATTR_KEY_BANK,
3552 BANK_ORIGINATOR_PROXIMATE_PID,
3553 NULL,
3554 in_contentCnt: 0,
3555 out_content: (mach_voucher_attr_content_t)origin_proximate_pids,
3556 out_contentCnt: &buf_size);
3557 if (kr == KERN_SUCCESS) {
3558 *origin_pid = origin_proximate_pids[0];
3559 *proximate_pid = origin_proximate_pids[1];
3560 }
3561 return kr;
3562}
3563
3564#if CONFIG_THREAD_GROUPS
3565/*
3566 * Returns the current thread's voucher-carried thread group
3567 *
3568 * Reference is borrowed from this being the current voucher, so it does NOT
3569 * return a reference to the group.
3570 */
3571struct thread_group *
3572thread_get_current_voucher_thread_group(thread_t thread)
3573{
3574 assert(thread == current_thread());
3575
3576 if (thread->ith_voucher == NULL) {
3577 return NULL;
3578 }
3579
3580 ledger_t bankledger = NULL;
3581 struct thread_group *banktg = NULL;
3582
3583 bank_get_bank_ledger_thread_group_and_persona(voucher: thread->ith_voucher, bankledger: &bankledger, banktg: &banktg, NULL);
3584
3585 return banktg;
3586}
3587
3588#endif /* CONFIG_THREAD_GROUPS */
3589
3590#if CONFIG_COALITIONS
3591
3592uint64_t
3593thread_get_current_voucher_resource_coalition_id(thread_t thread)
3594{
3595 uint64_t id = 0;
3596 assert(thread == current_thread());
3597 if (thread->ith_voucher != NULL) {
3598 id = bank_get_bank_ledger_resource_coalition_id(voucher: thread->ith_voucher);
3599 }
3600 return id;
3601}
3602
3603#endif /* CONFIG_COALITIONS */
3604
3605extern struct workqueue *
3606proc_get_wqptr(void *proc);
3607
3608static bool
3609task_supports_cooperative_workqueue(task_t task)
3610{
3611 void *bsd_info = get_bsdtask_info(task);
3612
3613 assert(task == current_task());
3614 if (bsd_info == NULL) {
3615 return false;
3616 }
3617
3618 uint64_t wq_quantum_expiry_offset = get_wq_quantum_offset_from_proc(bsd_info);
3619 /* userspace may not yet have called workq_open yet */
3620 struct workqueue *wq = proc_get_wqptr(proc: bsd_info);
3621
3622 return (wq != NULL) && (wq_quantum_expiry_offset != 0);
3623}
3624
3625/* Not safe to call from scheduler paths - should only be called on self */
3626bool
3627thread_supports_cooperative_workqueue(thread_t thread)
3628{
3629 struct uthread *uth = get_bsdthread_info(thread);
3630 task_t task = get_threadtask(thread);
3631
3632 assert(thread == current_thread());
3633
3634 return task_supports_cooperative_workqueue(task) &&
3635 bsdthread_part_of_cooperative_workqueue(uth);
3636}
3637
3638static inline bool
3639thread_has_armed_workqueue_quantum(thread_t thread)
3640{
3641 return thread->workq_quantum_deadline != 0;
3642}
3643
3644/*
3645 * The workq quantum is a lazy timer that is evaluated at 2 specific times in
3646 * the scheduler:
3647 *
3648 * - context switch time
3649 * - scheduler quantum expiry time.
3650 *
3651 * We're currently expressing the workq quantum with a 0.5 scale factor of the
3652 * scheduler quantum. It is possible that if the workq quantum is rearmed
3653 * shortly after the scheduler quantum begins, we could have a large delay
3654 * between when the workq quantum next expires and when it actually is noticed.
3655 *
3656 * A potential future improvement for the wq quantum expiry logic is to compare
3657 * it to the next actual scheduler quantum deadline and expire it if it is
3658 * within a certain leeway.
3659 */
3660static inline uint64_t
3661thread_workq_quantum_size(thread_t thread)
3662{
3663 return (uint64_t) (SCHED(initial_quantum_size)(thread) / 2);
3664}
3665
3666/*
3667 * Always called by thread on itself - either at AST boundary after processing
3668 * an existing quantum expiry, or when a new quantum is armed before the thread
3669 * goes out to userspace to handle a thread request
3670 */
3671void
3672thread_arm_workqueue_quantum(thread_t thread)
3673{
3674 /*
3675 * If the task is not opted into wq quantum notification, or if the thread
3676 * is not part of the cooperative workqueue, don't even bother with tracking
3677 * the quantum or calculating expiry
3678 */
3679 if (!thread_supports_cooperative_workqueue(thread)) {
3680 assert(thread->workq_quantum_deadline == 0);
3681 return;
3682 }
3683
3684 assert(current_thread() == thread);
3685 assert(thread_get_tag(thread) & THREAD_TAG_WORKQUEUE);
3686
3687 uint64_t current_runtime = thread_get_runtime_self();
3688 uint64_t deadline = thread_workq_quantum_size(thread) + current_runtime;
3689
3690 /*
3691 * The update of a workqueue quantum should always be followed by the update
3692 * of the AST - see explanation in kern/thread.h for synchronization of this
3693 * field
3694 */
3695 thread->workq_quantum_deadline = deadline;
3696
3697 /* We're arming a new quantum, clear any previous expiry notification */
3698 act_clear_astkevent(thread, AST_KEVENT_WORKQ_QUANTUM_EXPIRED);
3699
3700 WQ_TRACE(TRACE_wq_quantum_arm, current_runtime, deadline, 0, 0);
3701
3702 WORKQ_QUANTUM_HISTORY_WRITE_ENTRY(thread, thread->workq_quantum_deadline, true);
3703}
3704
3705/* Called by a thread on itself when it is about to park */
3706void
3707thread_disarm_workqueue_quantum(thread_t thread)
3708{
3709 /* The update of a workqueue quantum should always be followed by the update
3710 * of the AST - see explanation in kern/thread.h for synchronization of this
3711 * field */
3712 thread->workq_quantum_deadline = 0;
3713 act_clear_astkevent(thread, AST_KEVENT_WORKQ_QUANTUM_EXPIRED);
3714
3715 WQ_TRACE(TRACE_wq_quantum_disarm, 0, 0, 0, 0);
3716
3717 WORKQ_QUANTUM_HISTORY_WRITE_ENTRY(thread, thread->workq_quantum_deadline, false);
3718}
3719
3720/* This is called at context switch time on a thread that may not be self,
3721 * and at AST time
3722 */
3723bool
3724thread_has_expired_workqueue_quantum(thread_t thread, bool should_trace)
3725{
3726 if (!thread_has_armed_workqueue_quantum(thread)) {
3727 return false;
3728 }
3729 /* We do not do a thread_get_runtime_self() here since this function is
3730 * called from context switch time or during scheduler quantum expiry and
3731 * therefore, we may not be evaluating it on the current thread/self.
3732 *
3733 * In addition, the timers on the thread have just been updated recently so
3734 * we don't need to update them again.
3735 */
3736 uint64_t runtime = recount_thread_time_mach(thread);
3737 bool expired = runtime > thread->workq_quantum_deadline;
3738
3739 if (expired && should_trace) {
3740 WQ_TRACE(TRACE_wq_quantum_expired, runtime, thread->workq_quantum_deadline, 0, 0);
3741 }
3742
3743 return expired;
3744}
3745
3746/*
3747 * Called on a thread that is being context switched out or during quantum
3748 * expiry on self. Only called from scheduler paths.
3749 */
3750void
3751thread_evaluate_workqueue_quantum_expiry(thread_t thread)
3752{
3753 if (thread_has_expired_workqueue_quantum(thread, true)) {
3754 act_set_astkevent(thread, AST_KEVENT_WORKQ_QUANTUM_EXPIRED);
3755 }
3756}
3757
3758boolean_t
3759thread_has_thread_name(thread_t th)
3760{
3761 if (th) {
3762 return bsd_hasthreadname(uth: get_bsdthread_info(th));
3763 }
3764
3765 /*
3766 * This is an odd case; clients may set the thread name based on the lack of
3767 * a name, but in this context there is no uthread to attach the name to.
3768 */
3769 return FALSE;
3770}
3771
3772void
3773thread_set_thread_name(thread_t th, const char* name)
3774{
3775 if (th && name) {
3776 bsd_setthreadname(uth: get_bsdthread_info(th), tid: thread_tid(thread: th), buffer: name);
3777 }
3778}
3779
3780void
3781thread_get_thread_name(thread_t th, char* name)
3782{
3783 if (!name) {
3784 return;
3785 }
3786 if (th) {
3787 bsd_getthreadname(uth: get_bsdthread_info(th), buffer: name);
3788 } else {
3789 name[0] = '\0';
3790 }
3791}
3792
3793processor_t
3794thread_get_runq(thread_t thread)
3795{
3796 thread_lock_assert(thread, LCK_ASSERT_OWNED);
3797 processor_t runq = thread->__runq.runq;
3798 os_atomic_thread_fence(acquire);
3799 return runq;
3800}
3801
3802processor_t
3803thread_get_runq_locked(thread_t thread)
3804{
3805 thread_lock_assert(thread, LCK_ASSERT_OWNED);
3806 processor_t runq = thread->__runq.runq;
3807 if (runq != PROCESSOR_NULL) {
3808 pset_assert_locked(runq->processor_set);
3809 }
3810 return runq;
3811}
3812
3813void
3814thread_set_runq_locked(thread_t thread, processor_t new_runq)
3815{
3816 thread_lock_assert(thread, LCK_ASSERT_OWNED);
3817 pset_assert_locked(new_runq->processor_set);
3818 thread_assert_runq_null(thread);
3819 thread->__runq.runq = new_runq;
3820}
3821
3822void
3823thread_clear_runq(thread_t thread)
3824{
3825 thread_assert_runq_nonnull(thread);
3826 os_atomic_thread_fence(release);
3827 thread->__runq.runq = PROCESSOR_NULL;
3828}
3829
3830void
3831thread_clear_runq_locked(thread_t thread)
3832{
3833 thread_lock_assert(thread, LCK_ASSERT_OWNED);
3834 thread_assert_runq_nonnull(thread);
3835 thread->__runq.runq = PROCESSOR_NULL;
3836}
3837
3838void
3839thread_assert_runq_null(__assert_only thread_t thread)
3840{
3841 assert(thread->__runq.runq == PROCESSOR_NULL);
3842}
3843
3844void
3845thread_assert_runq_nonnull(thread_t thread)
3846{
3847 pset_assert_locked(thread->__runq.runq->processor_set);
3848 assert(thread->__runq.runq != PROCESSOR_NULL);
3849}
3850
3851void
3852thread_set_honor_qlimit(thread_t thread)
3853{
3854 thread->options |= TH_OPT_HONOR_QLIMIT;
3855}
3856
3857void
3858thread_clear_honor_qlimit(thread_t thread)
3859{
3860 thread->options &= (~TH_OPT_HONOR_QLIMIT);
3861}
3862
3863/*
3864 * thread_enable_send_importance - set/clear the SEND_IMPORTANCE thread option bit.
3865 */
3866void
3867thread_enable_send_importance(thread_t thread, boolean_t enable)
3868{
3869 if (enable == TRUE) {
3870 thread->options |= TH_OPT_SEND_IMPORTANCE;
3871 } else {
3872 thread->options &= ~TH_OPT_SEND_IMPORTANCE;
3873 }
3874}
3875
3876kern_return_t
3877thread_get_ipc_propagate_attr(thread_t thread, struct thread_attr_for_ipc_propagation *attr)
3878{
3879 int iotier;
3880 int qos;
3881
3882 if (thread == NULL || attr == NULL) {
3883 return KERN_INVALID_ARGUMENT;
3884 }
3885
3886 iotier = proc_get_effective_thread_policy(thread, TASK_POLICY_IO);
3887 qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
3888
3889 if (!qos) {
3890 qos = thread_user_promotion_qos_for_pri(priority: thread->base_pri);
3891 }
3892
3893 attr->tafip_iotier = iotier;
3894 attr->tafip_qos = qos;
3895
3896 return KERN_SUCCESS;
3897}
3898
3899/*
3900 * thread_set_allocation_name - .
3901 */
3902
3903kern_allocation_name_t
3904thread_set_allocation_name(kern_allocation_name_t new_name)
3905{
3906 kern_allocation_name_t ret;
3907 thread_kernel_state_t kstate = thread_get_kernel_state(current_thread());
3908 ret = kstate->allocation_name;
3909 // fifo
3910 if (!new_name || !kstate->allocation_name) {
3911 kstate->allocation_name = new_name;
3912 }
3913 return ret;
3914}
3915
3916void *
3917thread_iokit_tls_get(uint32_t index)
3918{
3919 assert(index < THREAD_SAVE_IOKIT_TLS_COUNT);
3920 return current_thread()->saved.iokit.tls[index];
3921}
3922
3923void
3924thread_iokit_tls_set(uint32_t index, void * data)
3925{
3926 assert(index < THREAD_SAVE_IOKIT_TLS_COUNT);
3927 current_thread()->saved.iokit.tls[index] = data;
3928}
3929
3930uint64_t
3931thread_get_last_wait_duration(thread_t thread)
3932{
3933 return thread->last_made_runnable_time - thread->last_run_time;
3934}
3935
3936integer_t
3937thread_kern_get_pri(thread_t thr)
3938{
3939 return thr->base_pri;
3940}
3941
3942void
3943thread_kern_set_pri(thread_t thr, integer_t pri)
3944{
3945 sched_set_kernel_thread_priority(thread: thr, priority: pri);
3946}
3947
3948integer_t
3949thread_kern_get_kernel_maxpri(void)
3950{
3951 return MAXPRI_KERNEL;
3952}
3953/*
3954 * thread_port_with_flavor_no_senders
3955 *
3956 * Called whenever the Mach port system detects no-senders on
3957 * the thread inspect or read port. These ports are allocated lazily and
3958 * should be deallocated here when there are no senders remaining.
3959 */
3960static void
3961thread_port_with_flavor_no_senders(
3962 ipc_port_t port,
3963 mach_port_mscount_t mscount __unused)
3964{
3965 thread_ro_t tro;
3966 thread_t thread;
3967 mach_thread_flavor_t flavor;
3968 ipc_kobject_type_t kotype;
3969
3970 ip_mq_lock(port);
3971 if (port->ip_srights > 0) {
3972 ip_mq_unlock(port);
3973 return;
3974 }
3975 kotype = ip_kotype(port);
3976 assert((IKOT_THREAD_READ == kotype) || (IKOT_THREAD_INSPECT == kotype));
3977 thread = ipc_kobject_get_locked(port, type: kotype);
3978 if (thread != THREAD_NULL) {
3979 thread_reference(thread);
3980 }
3981 ip_mq_unlock(port);
3982
3983 if (thread == THREAD_NULL) {
3984 /* The thread is exiting or disabled; it will eventually deallocate the port */
3985 return;
3986 }
3987
3988 if (kotype == IKOT_THREAD_READ) {
3989 flavor = THREAD_FLAVOR_READ;
3990 } else {
3991 flavor = THREAD_FLAVOR_INSPECT;
3992 }
3993
3994 thread_mtx_lock(thread);
3995 ip_mq_lock(port);
3996
3997 /*
3998 * If the port is no longer active, then ipc_thread_terminate() ran
3999 * and destroyed the kobject already. Just deallocate the task
4000 * ref we took and go away.
4001 *
4002 * It is also possible that several nsrequests are in flight,
4003 * only one shall NULL-out the port entry, and this is the one
4004 * that gets to dealloc the port.
4005 *
4006 * Check for a stale no-senders notification. A call to any function
4007 * that vends out send rights to this port could resurrect it between
4008 * this notification being generated and actually being handled here.
4009 */
4010 tro = get_thread_ro(thread);
4011 if (!ip_active(port) ||
4012 tro->tro_ports[flavor] != port ||
4013 port->ip_srights > 0) {
4014 ip_mq_unlock(port);
4015 thread_mtx_unlock(thread);
4016 thread_deallocate(thread);
4017 return;
4018 }
4019
4020 assert(tro->tro_ports[flavor] == port);
4021 zalloc_ro_clear_field(ZONE_ID_THREAD_RO, tro, tro_ports[flavor]);
4022 thread_mtx_unlock(thread);
4023
4024 ipc_kobject_dealloc_port_and_unlock(port, mscount: 0, type: kotype);
4025
4026 thread_deallocate(thread);
4027}
4028
4029/*
4030 * The 'thread_region_page_shift' is used by footprint
4031 * to specify the page size that it will use to
4032 * accomplish its accounting work on the task being
4033 * inspected. Since footprint uses a thread for each
4034 * task that it works on, we need to keep the page_shift
4035 * on a per-thread basis.
4036 */
4037
4038int
4039thread_self_region_page_shift(void)
4040{
4041 /*
4042 * Return the page shift that this thread
4043 * would like to use for its accounting work.
4044 */
4045 return current_thread()->thread_region_page_shift;
4046}
4047
4048void
4049thread_self_region_page_shift_set(
4050 int pgshift)
4051{
4052 /*
4053 * Set the page shift that this thread
4054 * would like to use for its accounting work
4055 * when dealing with a task.
4056 */
4057 current_thread()->thread_region_page_shift = pgshift;
4058}
4059
4060__startup_func
4061static void
4062ctid_table_init(void)
4063{
4064 /*
4065 * Pretend the early boot setup didn't exist,
4066 * and pick a mangling nonce.
4067 */
4068 *compact_id_resolve(table: &ctid_table, compact_id: 0) = THREAD_NULL;
4069 ctid_nonce = (uint32_t)early_random() & CTID_MASK;
4070}
4071
4072
4073/*
4074 * This maps the [0, CTID_MAX_THREAD_NUMBER] range
4075 * to [1, CTID_MAX_THREAD_NUMBER + 1 == CTID_MASK]
4076 * so that in mangled form, '0' is an invalid CTID.
4077 */
4078static ctid_t
4079ctid_mangle(compact_id_t cid)
4080{
4081 return (cid == ctid_nonce ? CTID_MASK : cid) ^ ctid_nonce;
4082}
4083
4084static compact_id_t
4085ctid_unmangle(ctid_t ctid)
4086{
4087 ctid ^= ctid_nonce;
4088 return ctid == CTID_MASK ? ctid_nonce : ctid;
4089}
4090
4091void
4092ctid_table_add(thread_t thread)
4093{
4094 compact_id_t cid;
4095
4096 cid = compact_id_get(table: &ctid_table, CTID_MAX_THREAD_NUMBER, value: thread);
4097 thread->ctid = ctid_mangle(cid);
4098}
4099
4100void
4101ctid_table_remove(thread_t thread)
4102{
4103 __assert_only thread_t value;
4104
4105 value = compact_id_put(table: &ctid_table, compact_id: ctid_unmangle(ctid: thread->ctid));
4106 assert3p(value, ==, thread);
4107 thread->ctid = 0;
4108}
4109
4110thread_t
4111ctid_get_thread_unsafe(ctid_t ctid)
4112{
4113 if (ctid) {
4114 return *compact_id_resolve(table: &ctid_table, compact_id: ctid_unmangle(ctid));
4115 }
4116 return THREAD_NULL;
4117}
4118
4119thread_t
4120ctid_get_thread(ctid_t ctid)
4121{
4122 thread_t thread = THREAD_NULL;
4123
4124 if (ctid) {
4125 thread = *compact_id_resolve(table: &ctid_table, compact_id: ctid_unmangle(ctid));
4126 assert(thread && thread->ctid == ctid);
4127 }
4128 return thread;
4129}
4130
4131ctid_t
4132thread_get_ctid(thread_t thread)
4133{
4134 return thread->ctid;
4135}
4136
4137/*
4138 * Adjust code signature dependent thread state.
4139 *
4140 * Called to allow code signature dependent adjustments to the thread
4141 * state. Note that this is usually called twice for the main thread:
4142 * Once at thread creation by thread_create, when the signature is
4143 * potentially not attached yet (which is usually the case for the
4144 * first/main thread of a task), and once after the task's signature
4145 * has actually been attached.
4146 *
4147 */
4148kern_return_t
4149thread_process_signature(thread_t thread, task_t task)
4150{
4151 return machine_thread_process_signature(thread, task);
4152}
4153
4154#if CONFIG_SPTM
4155
4156void
4157thread_associate_txm_thread_stack(uintptr_t thread_stack)
4158{
4159 thread_t self = current_thread();
4160
4161 if (self->txm_thread_stack != 0) {
4162 panic("attempted multiple TXM thread associations: %lu | %lu",
4163 self->txm_thread_stack, thread_stack);
4164 }
4165
4166 self->txm_thread_stack = thread_stack;
4167}
4168
4169void
4170thread_disassociate_txm_thread_stack(uintptr_t thread_stack)
4171{
4172 thread_t self = current_thread();
4173
4174 if (self->txm_thread_stack == 0) {
4175 panic("attempted to disassociate non-existent TXM thread");
4176 } else if (self->txm_thread_stack != thread_stack) {
4177 panic("invalid disassociation for TXM thread: %lu | %lu",
4178 self->txm_thread_stack, thread_stack);
4179 }
4180
4181 self->txm_thread_stack = 0;
4182}
4183
4184uintptr_t
4185thread_get_txm_thread_stack(void)
4186{
4187 return current_thread()->txm_thread_stack;
4188}
4189
4190#endif
4191
4192#if CONFIG_DTRACE
4193uint32_t
4194dtrace_get_thread_predcache(thread_t thread)
4195{
4196 if (thread != THREAD_NULL) {
4197 return thread->t_dtrace_predcache;
4198 } else {
4199 return 0;
4200 }
4201}
4202
4203int64_t
4204dtrace_get_thread_vtime(thread_t thread)
4205{
4206 if (thread != THREAD_NULL) {
4207 return thread->t_dtrace_vtime;
4208 } else {
4209 return 0;
4210 }
4211}
4212
4213int
4214dtrace_get_thread_last_cpu_id(thread_t thread)
4215{
4216 if ((thread != THREAD_NULL) && (thread->last_processor != PROCESSOR_NULL)) {
4217 return thread->last_processor->cpu_id;
4218 } else {
4219 return -1;
4220 }
4221}
4222
4223int64_t
4224dtrace_get_thread_tracing(thread_t thread)
4225{
4226 if (thread != THREAD_NULL) {
4227 return thread->t_dtrace_tracing;
4228 } else {
4229 return 0;
4230 }
4231}
4232
4233uint16_t
4234dtrace_get_thread_inprobe(thread_t thread)
4235{
4236 if (thread != THREAD_NULL) {
4237 return thread->t_dtrace_inprobe;
4238 } else {
4239 return 0;
4240 }
4241}
4242
4243vm_offset_t
4244thread_get_kernel_stack(thread_t thread)
4245{
4246 if (thread != THREAD_NULL) {
4247 return thread->kernel_stack;
4248 } else {
4249 return 0;
4250 }
4251}
4252
4253#if KASAN
4254struct kasan_thread_data *
4255kasan_get_thread_data(thread_t thread)
4256{
4257 return &thread->kasan_data;
4258}
4259#endif
4260
4261#if CONFIG_KCOV
4262kcov_thread_data_t *
4263kcov_get_thread_data(thread_t thread)
4264{
4265 return &thread->kcov_data;
4266}
4267#endif
4268
4269#if CONFIG_STKSZ
4270/*
4271 * Returns base of a thread's kernel stack.
4272 *
4273 * Coverage sanitizer instruments every function including those that participates in stack handoff between threads.
4274 * There is a window in which CPU still holds old values but stack has been handed over to anoher thread already.
4275 * In this window kernel_stack is 0 but CPU still uses the original stack (until contex switch occurs). The original
4276 * kernel_stack value is preserved in ksancov_stack during this window.
4277 */
4278vm_offset_t
4279kcov_stksz_get_thread_stkbase(thread_t thread)
4280{
4281 if (thread != THREAD_NULL) {
4282 kcov_thread_data_t *data = kcov_get_thread_data(thread);
4283 if (data->ktd_stksz.kst_stack) {
4284 return data->ktd_stksz.kst_stack;
4285 } else {
4286 return thread->kernel_stack;
4287 }
4288 } else {
4289 return 0;
4290 }
4291}
4292
4293vm_offset_t
4294kcov_stksz_get_thread_stksize(thread_t thread)
4295{
4296 if (thread != THREAD_NULL) {
4297 return kernel_stack_size;
4298 } else {
4299 return 0;
4300 }
4301}
4302
4303void
4304kcov_stksz_set_thread_stack(thread_t thread, vm_offset_t stack)
4305{
4306 kcov_thread_data_t *data = kcov_get_thread_data(thread);
4307 data->ktd_stksz.kst_stack = stack;
4308}
4309#endif /* CONFIG_STKSZ */
4310
4311int64_t
4312dtrace_calc_thread_recent_vtime(thread_t thread)
4313{
4314 if (thread == THREAD_NULL) {
4315 return 0;
4316 }
4317
4318 struct recount_usage usage = { 0 };
4319 recount_current_thread_usage(usage: &usage);
4320 return (int64_t)(recount_usage_time_mach(usage: &usage));
4321}
4322
4323void
4324dtrace_set_thread_predcache(thread_t thread, uint32_t predcache)
4325{
4326 if (thread != THREAD_NULL) {
4327 thread->t_dtrace_predcache = predcache;
4328 }
4329}
4330
4331void
4332dtrace_set_thread_vtime(thread_t thread, int64_t vtime)
4333{
4334 if (thread != THREAD_NULL) {
4335 thread->t_dtrace_vtime = vtime;
4336 }
4337}
4338
4339void
4340dtrace_set_thread_tracing(thread_t thread, int64_t accum)
4341{
4342 if (thread != THREAD_NULL) {
4343 thread->t_dtrace_tracing = accum;
4344 }
4345}
4346
4347void
4348dtrace_set_thread_inprobe(thread_t thread, uint16_t inprobe)
4349{
4350 if (thread != THREAD_NULL) {
4351 thread->t_dtrace_inprobe = inprobe;
4352 }
4353}
4354
4355void
4356dtrace_thread_bootstrap(void)
4357{
4358 task_t task = current_task();
4359
4360 if (task->thread_count == 1) {
4361 thread_t thread = current_thread();
4362 if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) {
4363 thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS;
4364 DTRACE_PROC(exec__success);
4365 KDBG(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_EXEC),
4366 task_pid(task));
4367 }
4368 DTRACE_PROC(start);
4369 }
4370 DTRACE_PROC(lwp__start);
4371}
4372
4373void
4374dtrace_thread_didexec(thread_t thread)
4375{
4376 thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS;
4377}
4378#endif /* CONFIG_DTRACE */
4379