locks.c source code [xnu/osfmk/kern/locks.c]

1	/*
2	* Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/*
29	* @OSF_COPYRIGHT@
30	*/
31	/*
32	* Mach Operating System
33	* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34	* All Rights Reserved.
35	*
36	* Permission to use, copy, modify and distribute this software and its
37	* documentation is hereby granted, provided that both the copyright
38	* notice and this permission notice appear in all copies of the
39	* software, derivative works or modified versions, and any portions
40	* thereof, and that both notices appear in supporting documentation.
41	*
42	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44	* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45	*
46	* Carnegie Mellon requests users of this software to return to
47	*
48	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49	* School of Computer Science
50	* Carnegie Mellon University
51	* Pittsburgh PA 15213-3890
52	*
53	* any improvements or extensions that they make and grant Carnegie Mellon
54	* the rights to redistribute these changes.
55	*/
56
57	#define LOCK_PRIVATE 1
58
59	#include <mach_ldebug.h>
60	#include <debug.h>
61
62	#include <mach/kern_return.h>
63
64	#include <kern/locks_internal.h>
65	#include <kern/lock_stat.h>
66	#include <kern/locks.h>
67	#include <kern/misc_protos.h>
68	#include <kern/zalloc.h>
69	#include <kern/thread.h>
70	#include <kern/processor.h>
71	#include <kern/sched_prim.h>
72	#include <kern/debug.h>
73	#include <libkern/section_keywords.h>
74	#if defined(__x86_64__)
75	#include <i386/tsc.h>
76	#include <i386/machine_routines.h>
77	#endif
78	#include <machine/atomic.h>
79	#include <machine/machine_cpu.h>
80	#include <string.h>
81	#include <vm/pmap.h>
82
83	#include <sys/kdebug.h>
84
85	#define LCK_MTX_SLEEP_CODE 0
86	#define LCK_MTX_SLEEP_DEADLINE_CODE 1
87	#define LCK_MTX_LCK_WAIT_CODE 2
88	#define LCK_MTX_UNLCK_WAKEUP_CODE 3
89
90	// Panic in tests that check lock usage correctness
91	// These are undesirable when in a panic or a debugger is runnning.
92	#define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
93
94	#if MACH_LDEBUG
95	#define ALIGN_TEST(p, t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
96	#else
97	#define ALIGN_TEST(p, t) do{}while(0)
98	#endif
99
100	#define NOINLINE __attribute__((noinline))
101
102	#define ordered_load_hw(lock) os_atomic_load(&(lock)->lock_data, compiler_acq_rel)
103	#define ordered_store_hw(lock, value) os_atomic_store(&(lock)->lock_data, (value), compiler_acq_rel)
104
105	KALLOC_TYPE_DEFINE(KT_GATE, gate_t, KT_PRIV_ACCT);
106
107	struct lck_spinlock_to_info PERCPU_DATA(lck_spinlock_to_info);
108	volatile lck_spinlock_to_info_t lck_spinlock_timeout_in_progress;
109
110	SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
111
112	struct lck_tktlock_pv_info PERCPU_DATA(lck_tktlock_pv_info);
113
114	#if CONFIG_PV_TICKET
115	SECURITY_READ_ONLY_LATE(bool) has_lock_pv = FALSE; / used by waitq.py /
116	#endif
117
118	#if DEBUG
119	TUNABLE(uint32_t, LcksOpts, "lcks", LCK_OPTION_ENABLE_DEBUG);
120	#else
121	TUNABLE(uint32_t, LcksOpts, "lcks", `0`);
122	#endif
123
124	#if CONFIG_DTRACE
125	#if defined (__x86_64__)
126	machine_timeout_t dtrace_spin_threshold = `500`; // 500ns
127	#elif defined(__arm64__)
128	MACHINE_TIMEOUT(dtrace_spin_threshold, "dtrace-spin-threshold",
129	`0xC` / 12 ticks == 500ns with 24MHz OSC /, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
130	#endif
131	#endif
132
133	struct lck_mcs PERCPU_DATA(lck_mcs);
134
135	__kdebug_only
136	uintptr_t
137	unslide_for_kdebug(const void* object)
138	{
139	if (__improbable(kdebug_enable)) {
140	return VM_KERNEL_UNSLIDE_OR_PERM(object);
141	} else {
142	return `0`;
143	}
144	}
145
146	static __abortlike void
147	__lck_require_preemption_disabled_panic(void *lock)
148	{
149	panic("Attempt to take no-preempt lock %p in preemptible context", lock);
150	}
151
152	static inline void
153	__lck_require_preemption_disabled(void *lock, thread_t self __unused)
154	{
155	if (__improbable(!lock_preemption_disabled_for_thread(self))) {
156	__lck_require_preemption_disabled_panic(lock);
157	}
158	}
159
160	#pragma mark - HW Spin policies
161
162	/*
163	* Input and output timeouts are expressed in absolute_time for arm and TSC for Intel
164	*/
165	__attribute__((always_inline))
166	hw_spin_timeout_t
167	hw_spin_compute_timeout(hw_spin_policy_t pol)
168	{
169	hw_spin_timeout_t ret = {
170	.hwst_timeout = os_atomic_load(pol->hwsp_timeout, relaxed),
171	};
172
173	ret.hwst_timeout <<= pol->hwsp_timeout_shift;
174	#if SCHED_HYGIENE_DEBUG
175	ret.hwst_in_ppl = pmap_in_ppl();
176	/ Note we can't check if we are interruptible if in ppl /
177	ret.hwst_interruptible = !ret.hwst_in_ppl && ml_get_interrupts_enabled();
178	#endif /* SCHED_HYGIENE_DEBUG */
179
180	#if SCHED_HYGIENE_DEBUG
181	#ifndef KASAN
182	if (ret.hwst_timeout > `0` &&
183	!ret.hwst_in_ppl &&
184	!ret.hwst_interruptible &&
185	interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
186	uint64_t int_timeout = os_atomic_load(&interrupt_masked_timeout, relaxed);
187
188	#if defined(__x86_64__)
189	int_timeout = tmrCvt(int_timeout, tscFCvtn2t);
190	#endif
191	if (int_timeout < ret.hwst_timeout) {
192	ret.hwst_timeout = int_timeout;
193	}
194	}
195	#endif /* !KASAN */
196	#endif /* SCHED_HYGIENE_DEBUG */
197
198	return ret;
199	}
200
201	__attribute__((always_inline))
202	bool
203	hw_spin_in_ppl(hw_spin_timeout_t to)
204	{
205	#if SCHED_HYGIENE_DEBUG
206	return to.hwst_in_ppl;
207	#else
208	(void)to;
209	return pmap_in_ppl();
210	#endif
211	}
212
213	bool
214	hw_spin_should_keep_spinning(
215	void *lock,
216	hw_spin_policy_t pol,
217	hw_spin_timeout_t to,
218	hw_spin_state_t *state)
219	{
220	hw_spin_timeout_status_t rc;
221	#if SCHED_HYGIENE_DEBUG
222	uint64_t irq_time = `0`;
223	#endif
224	uint64_t now;
225
226	if (__improbable(to.hwst_timeout == `0`)) {
227	return true;
228	}
229
230	now = ml_get_timebase();
231	if (__probable(now < state->hwss_deadline)) {
232	/ keep spinning /
233	return true;
234	}
235
236	#if SCHED_HYGIENE_DEBUG
237	if (to.hwst_interruptible) {
238	irq_time = current_thread()->machine.int_time_mt;
239	}
240	#endif /* SCHED_HYGIENE_DEBUG */
241
242	if (__probable(state->hwss_deadline == `0`)) {
243	state->hwss_start = now;
244	state->hwss_deadline = now + to.hwst_timeout;
245	#if SCHED_HYGIENE_DEBUG
246	state->hwss_irq_start = irq_time;
247	#endif
248	return true;
249	}
250
251	/*
252	* Update fields that the callback needs
253	*/
254	state->hwss_now = now;
255	#if SCHED_HYGIENE_DEBUG
256	state->hwss_irq_end = irq_time;
257	#endif /* SCHED_HYGIENE_DEBUG */
258
259	rc = pol->hwsp_op_timeout((char *)lock - pol->hwsp_lock_offset,
260	to, *state);
261	if (rc == HW_LOCK_TIMEOUT_CONTINUE) {
262	/ push the deadline /
263	state->hwss_deadline += to.hwst_timeout;
264	}
265	return rc == HW_LOCK_TIMEOUT_CONTINUE;
266	}
267
268	__attribute__((always_inline))
269	void
270	lck_spinlock_timeout_set_orig_owner(uintptr_t owner)
271	{
272	#if DEBUG \|\| DEVELOPMENT
273	PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig = owner & ~`0x7ul`;
274	#else
275	(void)owner;
276	#endif
277	}
278
279	__attribute__((always_inline))
280	void
281	lck_spinlock_timeout_set_orig_ctid(uint32_t ctid)
282	{
283	#if DEBUG \|\| DEVELOPMENT
284	PERCPU_GET(lck_spinlock_to_info)->owner_thread_orig =
285	(uintptr_t)ctid_get_thread_unsafe(ctid);
286	#else
287	(void)ctid;
288	#endif
289	}
290
291	lck_spinlock_to_info_t
292	lck_spinlock_timeout_hit(void *lck, uintptr_t owner)
293	{
294	lck_spinlock_to_info_t lsti = PERCPU_GET(lck_spinlock_to_info);
295
296	if (owner < (`1u` << CTID_SIZE_BIT)) {
297	owner = (uintptr_t)ctid_get_thread_unsafe(ctid: (uint32_t)owner);
298	} else {
299	/ strip possible bits used by the lock implementations /
300	owner &= ~`0x7ul`;
301	}
302
303	lsti->lock = lck;
304	lsti->owner_thread_cur = owner;
305	lsti->owner_cpu = ~`0u`;
306	os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
307
308	if (owner == `0`) {
309	/ if the owner isn't known, just bail /
310	goto out;
311	}
312
313	for (uint32_t i = `0`; i <= ml_early_cpu_max_number(); i++) {
314	cpu_data_t *data = cpu_datap(cpu: i);
315	if (data && (uintptr_t)data->cpu_active_thread == owner) {
316	lsti->owner_cpu = i;
317	os_atomic_store(&lck_spinlock_timeout_in_progress, lsti, release);
318	#if __x86_64__
319	if ((uint32_t)cpu_number() != i) {
320	/ Cause NMI and panic on the owner's cpu /
321	NMIPI_panic(cpu_to_cpumask(i), SPINLOCK_TIMEOUT);
322	}
323	#endif
324	break;
325	}
326	}
327
328	out:
329	return lsti;
330	}
331
332	#pragma mark - HW locks
333
334	/*
335	* Routine: hw_lock_init
336	*
337	* Initialize a hardware lock.
338	*/
339	MARK_AS_HIBERNATE_TEXT void
340	hw_lock_init(hw_lock_t lock)
341	{
342	ordered_store_hw(lock, `0`);
343	}
344
345	__result_use_check
346	static inline bool
347	hw_lock_trylock_contended(hw_lock_t lock, uintptr_t newval)
348	{
349	#if OS_ATOMIC_USE_LLSC
350	uintptr_t oldval;
351	os_atomic_rmw_loop(&lock->lock_data, oldval, newval, acquire, {
352	if (oldval != `0`) {
353	wait_for_event(); // clears the monitor so we don't need give_up()
354	return false;
355	}
356	});
357	return true;
358	#else // !OS_ATOMIC_USE_LLSC
359	#if OS_ATOMIC_HAS_LLSC
360	uintptr_t oldval = os_atomic_load_exclusive(&lock->lock_data, relaxed);
361	if (oldval != `0`) {
362	wait_for_event(); // clears the monitor so we don't need give_up()
363	return false;
364	}
365	#endif
366	return lock_cmpxchg(&lock->lock_data, `0`, newval, acquire);
367	#endif // !OS_ATOMIC_USE_LLSC
368	}
369
370	__result_use_check
371	static inline bool
372	hw_lock_trylock_bit(uint32_t target, unsigned* int bit, bool wait)
373	{
374	uint32_t mask = `1u` << bit;
375
376	#if OS_ATOMIC_USE_LLSC \|\| !OS_ATOMIC_HAS_LLSC
377	uint32_t oldval, newval;
378	os_atomic_rmw_loop(target, oldval, newval, acquire, {
379	newval = oldval \| mask;
380	if (__improbable(oldval & mask)) {
381	#if OS_ATOMIC_HAS_LLSC
382	if (wait) {
383	wait_for_event(); // clears the monitor so we don't need give_up()
384	} else {
385	os_atomic_clear_exclusive();
386	}
387	#else
388	if (wait) {
389	cpu_pause();
390	}
391	#endif
392	return false;
393	}
394	});
395	return true;
396	#else
397	uint32_t oldval = os_atomic_load_exclusive(target, relaxed);
398	if (__improbable(oldval & mask)) {
399	if (wait) {
400	wait_for_event(); // clears the monitor so we don't need give_up()
401	} else {
402	os_atomic_clear_exclusive();
403	}
404	return false;
405	}
406	return (os_atomic_or_orig(target, mask, acquire) & mask) == `0`;
407	#endif // !OS_ATOMIC_USE_LLSC && OS_ATOMIC_HAS_LLSC
408	}
409
410	static hw_spin_timeout_status_t
411	hw_spin_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
412	{
413	hw_lock_t lock = _lock;
414	uintptr_t owner = lock->lock_data & ~`0x7ul`;
415	lck_spinlock_to_info_t lsti;
416
417	if (!spinlock_timeout_panic) {
418	/ keep spinning rather than panicing /
419	return HW_LOCK_TIMEOUT_CONTINUE;
420	}
421
422	if (pmap_in_ppl()) {
423	/*
424	* This code is used by the PPL and can't write to globals.
425	*/
426	panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
427	"current owner: %p, " HW_SPIN_TIMEOUT_DETAILS_FMT,
428	lock, HW_SPIN_TIMEOUT_ARG(to, st),
429	(void *)owner, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
430	}
431
432	// Capture the actual time spent blocked, which may be higher than the timeout
433	// if a misbehaving interrupt stole this thread's CPU time.
434	lsti = lck_spinlock_timeout_hit(lck: lock, owner);
435	panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
436	"current owner: %p (on cpu %d), "
437	#if DEBUG \|\| DEVELOPMENT
438	"initial owner: %p, "
439	#endif /* DEBUG \|\| DEVELOPMENT */
440	HW_SPIN_TIMEOUT_DETAILS_FMT,
441	lock, HW_SPIN_TIMEOUT_ARG(to, st),
442	(void *)lsti->owner_thread_cur, lsti->owner_cpu,
443	#if DEBUG \|\| DEVELOPMENT
444	(void *)lsti->owner_thread_orig,
445	#endif /* DEBUG \|\| DEVELOPMENT */
446	HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
447	}
448
449	const struct hw_spin_policy hw_lock_spin_policy = {
450	.hwsp_name = "hw_lock_t",
451	.hwsp_timeout_atomic = &lock_panic_timeout,
452	.hwsp_op_timeout = hw_spin_timeout_panic,
453	};
454
455	static hw_spin_timeout_status_t
456	hw_spin_always_return(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
457	{
458	#pragma unused(_lock, to, st)
459	return HW_LOCK_TIMEOUT_RETURN;
460	}
461
462	const struct hw_spin_policy hw_lock_spin_panic_policy = {
463	.hwsp_name = "hw_lock_t[panic]",
464	#if defined(__x86_64__)
465	.hwsp_timeout = &LockTimeOutTSC,
466	#else
467	.hwsp_timeout_atomic = &LockTimeOut,
468	#endif
469	.hwsp_timeout_shift = `2`,
470	.hwsp_op_timeout = hw_spin_always_return,
471	};
472
473	#if DEBUG \|\| DEVELOPMENT
474	static machine_timeout_t hw_lock_test_to;
475	const struct hw_spin_policy hw_lock_test_give_up_policy = {
476	.hwsp_name = "testing policy",
477	#if defined(__x86_64__)
478	.hwsp_timeout = &LockTimeOutTSC,
479	#else
480	.hwsp_timeout_atomic = &LockTimeOut,
481	#endif
482	.hwsp_timeout_shift = `2`,
483	.hwsp_op_timeout = hw_spin_always_return,
484	};
485
486	__startup_func
487	static void
488	hw_lock_test_to_init(void)
489	{
490	uint64_t timeout;
491
492	nanoseconds_to_absolutetime(`100` * NSEC_PER_USEC, &timeout);
493	#if defined(__x86_64__)
494	timeout = tmrCvt(timeout, tscFCvtn2t);
495	#endif
496	os_atomic_init(&hw_lock_test_to, timeout);
497	}
498	STARTUP(TIMEOUTS, STARTUP_RANK_FIRST, hw_lock_test_to_init);
499	#endif
500
501	static hw_spin_timeout_status_t
502	hw_lock_bit_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
503	{
504	hw_lock_bit_t *lock = _lock;
505
506	if (!spinlock_timeout_panic) {
507	/ keep spinning rather than panicing /
508	return HW_LOCK_TIMEOUT_CONTINUE;
509	}
510
511	panic("Spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
512	"current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
513	lock, HW_SPIN_TIMEOUT_ARG(to, st),
514	*lock, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
515	}
516
517	const struct hw_spin_policy hw_lock_bit_policy = {
518	.hwsp_name = "hw_lock_bit_t",
519	.hwsp_timeout_atomic = &lock_panic_timeout,
520	.hwsp_op_timeout = hw_lock_bit_timeout_panic,
521	};
522
523	#if __arm64__
524	const uint64_t hw_lock_bit_timeout_2s = `0x3000000`;
525	const struct hw_spin_policy hw_lock_bit_policy_2s = {
526	.hwsp_name = "hw_lock_bit_t",
527	.hwsp_timeout = &hw_lock_bit_timeout_2s,
528	.hwsp_op_timeout = hw_lock_bit_timeout_panic,
529	};
530	#endif
531
532	/*
533	* Routine: hw_lock_lock_contended
534	*
535	* Spin until lock is acquired or timeout expires.
536	* timeout is in mach_absolute_time ticks. Called with
537	* preemption disabled.
538	*/
539	static hw_lock_status_t NOINLINE
540	hw_lock_lock_contended(
541	hw_lock_t lock,
542	uintptr_t data,
543	hw_spin_policy_t pol
544	LCK_GRP_ARG(lck_grp_t *grp))
545	{
546	hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
547	hw_spin_state_t state = { };
548	hw_lock_status_t rc = HW_LOCK_CONTENDED;
549
550	if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) ==
551	HW_LOCK_STATE_TO_THREAD(data) && LOCK_CORRECTNESS_PANIC()) {
552	panic("hwlock: thread %p is trying to lock %p recursively",
553	HW_LOCK_STATE_TO_THREAD(data), lock);
554	}
555
556	#if CONFIG_DTRACE \|\| LOCK_STATS
557	uint64_t begin = `0`;
558	boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
559
560	if (__improbable(stat_enabled)) {
561	begin = mach_absolute_time();
562	}
563	#endif /* CONFIG_DTRACE \|\| LOCK_STATS */
564
565	if (!hw_spin_in_ppl(to)) {
566	/*
567	* This code is used by the PPL and can't write to globals.
568	*/
569	lck_spinlock_timeout_set_orig_owner(owner: lock->lock_data);
570	}
571
572	do {
573	for (uint32_t i = `0`; i < LOCK_SNOOP_SPINS; i++) {
574	cpu_pause();
575	if (hw_lock_trylock_contended(lock, newval: data)) {
576	lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
577	rc = HW_LOCK_ACQUIRED;
578	goto end;
579	}
580	}
581	} while (hw_spin_should_keep_spinning(lock, pol, to, state: &state));
582
583	end:
584	#if CONFIG_DTRACE \|\| LOCK_STATS
585	if (__improbable(stat_enabled)) {
586	lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
587	time: mach_absolute_time() - begin);
588	}
589	lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
590	#endif /* CONFIG_DTRACE \|\| LOCK_STATS */
591	return rc;
592	}
593
594	static hw_spin_timeout_status_t
595	hw_wait_while_equals32_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
596	{
597	uint32_t *address = _lock;
598
599	if (!spinlock_timeout_panic) {
600	/ keep spinning rather than panicing /
601	return HW_LOCK_TIMEOUT_CONTINUE;
602	}
603
604	panic("wait_while_equals32[%p] " HW_SPIN_TIMEOUT_FMT "; "
605	"current value: 0x%08x, " HW_SPIN_TIMEOUT_DETAILS_FMT,
606	address, HW_SPIN_TIMEOUT_ARG(to, st),
607	*address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
608	}
609
610	static const struct hw_spin_policy hw_wait_while_equals32_policy = {
611	.hwsp_name = "hw_wait_while_equals32",
612	.hwsp_timeout_atomic = &lock_panic_timeout,
613	.hwsp_op_timeout = hw_wait_while_equals32_panic,
614	};
615
616	static hw_spin_timeout_status_t
617	hw_wait_while_equals64_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
618	{
619	uint64_t *address = _lock;
620
621	if (!spinlock_timeout_panic) {
622	/ keep spinning rather than panicing /
623	return HW_LOCK_TIMEOUT_CONTINUE;
624	}
625
626	panic("wait_while_equals64[%p] " HW_SPIN_TIMEOUT_FMT "; "
627	"current value: 0x%016llx, " HW_SPIN_TIMEOUT_DETAILS_FMT,
628	address, HW_SPIN_TIMEOUT_ARG(to, st),
629	*address, HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
630	}
631
632	static const struct hw_spin_policy hw_wait_while_equals64_policy = {
633	.hwsp_name = "hw_wait_while_equals64",
634	.hwsp_timeout_atomic = &lock_panic_timeout,
635	.hwsp_op_timeout = hw_wait_while_equals64_panic,
636	};
637
638	uint32_t
639	hw_wait_while_equals32(uint32_t *address, uint32_t current)
640	{
641	hw_spin_policy_t pol = &hw_wait_while_equals32_policy;
642	hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
643	hw_spin_state_t state = { };
644	uint32_t v;
645
646	while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
647	hw_spin_should_keep_spinning(lock: address, pol, to, state: &state);
648	}
649
650	return v;
651	}
652
653	uint64_t
654	hw_wait_while_equals64(uint64_t *address, uint64_t current)
655	{
656	hw_spin_policy_t pol = &hw_wait_while_equals64_policy;
657	hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
658	hw_spin_state_t state = { };
659	uint64_t v;
660
661	while (__improbable(!hw_spin_wait_until(address, v, v != current))) {
662	hw_spin_should_keep_spinning(lock: address, pol, to, state: &state);
663	}
664
665	return v;
666	}
667
668	__result_use_check
669	static inline hw_lock_status_t
670	hw_lock_to_internal(
671	hw_lock_t lock,
672	thread_t thread,
673	hw_spin_policy_t pol
674	LCK_GRP_ARG(lck_grp_t *grp))
675	{
676	uintptr_t state = HW_LOCK_THREAD_TO_STATE(thread);
677
678	if (__probable(hw_lock_trylock_contended(lock, state))) {
679	lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
680	return HW_LOCK_ACQUIRED;
681	}
682
683	return hw_lock_lock_contended(lock, data: state, pol LCK_GRP_ARG(grp));
684	}
685
686	/*
687	* Routine: hw_lock_lock
688	*
689	* Acquire lock, spinning until it becomes available,
690	* return with preemption disabled.
691	*/
692	void
693	(hw_lock_lock)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
694	{
695	thread_t thread = current_thread();
696	lock_disable_preemption_for_thread(thread);
697	(void)hw_lock_to_internal(lock, thread, pol: &hw_lock_spin_policy
698	LCK_GRP_ARG(grp));
699	}
700
701	/*
702	* Routine: hw_lock_lock_nopreempt
703	*
704	* Acquire lock, spinning until it becomes available.
705	*/
706	void
707	(hw_lock_lock_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
708	{
709	thread_t thread = current_thread();
710	__lck_require_preemption_disabled(lock, self: thread);
711	(void)hw_lock_to_internal(lock, thread, pol: &hw_lock_spin_policy
712	LCK_GRP_ARG(grp));
713	}
714
715	/*
716	* Routine: hw_lock_to
717	*
718	* Acquire lock, spinning until it becomes available or timeout.
719	* Timeout is in mach_absolute_time ticks (TSC in Intel), return with
720	* preemption disabled.
721	*/
722	unsigned
723	int
724	(hw_lock_to)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
725	{
726	thread_t thread = current_thread();
727	lock_disable_preemption_for_thread(thread);
728	return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
729	}
730
731	/*
732	* Routine: hw_lock_to_nopreempt
733	*
734	* Acquire lock, spinning until it becomes available or timeout.
735	* Timeout is in mach_absolute_time ticks, called and return with
736	* preemption disabled.
737	*/
738	unsigned
739	int
740	(hw_lock_to_nopreempt)(hw_lock_t lock, hw_spin_policy_t pol LCK_GRP_ARG(lck_grp_t *grp))
741	{
742	thread_t thread = current_thread();
743	__lck_require_preemption_disabled(lock, self: thread);
744	return (unsigned)hw_lock_to_internal(lock, thread, pol LCK_GRP_ARG(grp));
745	}
746
747	__result_use_check
748	static inline unsigned int
749	hw_lock_try_internal(hw_lock_t lock, thread_t thread LCK_GRP_ARG(lck_grp_t *grp))
750	{
751	if (__probable(lock_cmpxchg(&lock->lock_data, `0`,
752	HW_LOCK_THREAD_TO_STATE(thread), acquire))) {
753	lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
754	return true;
755	}
756	return false;
757	}
758
759	/*
760	* Routine: hw_lock_try
761	*
762	* returns with preemption disabled on success.
763	*/
764	unsigned
765	int
766	(hw_lock_try)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
767	{
768	thread_t thread = current_thread();
769	lock_disable_preemption_for_thread(thread);
770	unsigned int success = hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
771	if (!success) {
772	lock_enable_preemption();
773	}
774	return success;
775	}
776
777	unsigned
778	int
779	(hw_lock_try_nopreempt)(hw_lock_t lock LCK_GRP_ARG(lck_grp_t *grp))
780	{
781	thread_t thread = current_thread();
782	__lck_require_preemption_disabled(lock, self: thread);
783	return hw_lock_try_internal(lock, thread LCK_GRP_ARG(grp));
784	}
785
786	#if DEBUG \|\| DEVELOPMENT
787	__abortlike
788	static void
789	__hw_lock_unlock_unowned_panic(hw_lock_t lock)
790	{
791	panic("hwlock: thread %p is trying to lock %p recursively",
792	current_thread(), lock);
793	}
794	#endif /* DEBUG \|\| DEVELOPMENT */
795
796	/*
797	* Routine: hw_lock_unlock
798	*
799	* Unconditionally release lock, release preemption level.
800	*/
801	static inline void
802	hw_lock_unlock_internal(hw_lock_t lock)
803	{
804	#if DEBUG \|\| DEVELOPMENT
805	if (HW_LOCK_STATE_TO_THREAD(lock->lock_data) != current_thread() &&
806	LOCK_CORRECTNESS_PANIC()) {
807	__hw_lock_unlock_unowned_panic(lock);
808	}
809	#endif /* DEBUG \|\| DEVELOPMENT */
810
811	os_atomic_store(&lock->lock_data, `0`, release);
812	#if CONFIG_DTRACE
813	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, `0`);
814	#endif /* CONFIG_DTRACE */
815	}
816
817	void
818	(hw_lock_unlock)(hw_lock_t lock)
819	{
820	hw_lock_unlock_internal(lock);
821	lock_enable_preemption();
822	}
823
824	void
825	(hw_lock_unlock_nopreempt)(hw_lock_t lock)
826	{
827	hw_lock_unlock_internal(lock);
828	}
829
830	void
831	hw_lock_assert(__assert_only hw_lock_t lock, __assert_only unsigned int type)
832	{
833	#if MACH_ASSERT
834	thread_t thread, holder;
835
836	holder = HW_LOCK_STATE_TO_THREAD(lock->lock_data);
837	thread = current_thread();
838
839	if (type == LCK_ASSERT_OWNED) {
840	if (holder == `0`) {
841	panic("Lock not owned %p = %p", lock, holder);
842	}
843	if (holder != thread) {
844	panic("Lock not owned by current thread %p = %p", lock, holder);
845	}
846	} else if (type == LCK_ASSERT_NOTOWNED) {
847	if (holder != THREAD_NULL && holder == thread) {
848	panic("Lock owned by current thread %p = %p", lock, holder);
849	}
850	} else {
851	panic("hw_lock_assert(): invalid arg (%u)", type);
852	}
853	#endif /* MACH_ASSERT */
854	}
855
856	/*
857	* Routine hw_lock_held, doesn't change preemption state.
858	* N.B. Racy, of course.
859	*/
860	unsigned int
861	hw_lock_held(hw_lock_t lock)
862	{
863	return ordered_load_hw(lock) != `0`;
864	}
865
866	static hw_lock_status_t NOINLINE
867	hw_lock_bit_to_contended(
868	hw_lock_bit_t *lock,
869	uint32_t bit,
870	hw_spin_policy_t pol,
871	bool (^lock_pause)(void)
872	LCK_GRP_ARG(lck_grp_t *grp))
873	{
874	hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
875	hw_spin_state_t state = { };
876	hw_lock_status_t rc = HW_LOCK_CONTENDED;
877
878	#if CONFIG_DTRACE \|\| LOCK_STATS
879	uint64_t begin = `0`;
880	boolean_t stat_enabled = lck_grp_spin_spin_enabled(lock LCK_GRP_ARG(grp));
881
882	if (__improbable(stat_enabled)) {
883	begin = mach_absolute_time();
884	}
885	#endif /* LOCK_STATS \|\| CONFIG_DTRACE */
886
887	do {
888	for (int i = `0`; i < LOCK_SNOOP_SPINS; i++) {
889	rc = (hw_lock_trylock_bit(target: lock, bit, true) ? HW_LOCK_ACQUIRED : HW_LOCK_CONTENDED);
890
891	if (rc == HW_LOCK_ACQUIRED) {
892	lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
893	goto end;
894	}
895
896	if (__improbable(lock_pause && lock_pause())) {
897	goto end;
898	}
899	}
900
901	assert(rc == HW_LOCK_CONTENDED);
902	} while (hw_spin_should_keep_spinning(lock, pol, to, state: &state));
903
904	end:
905	#if CONFIG_DTRACE \|\| LOCK_STATS
906	if (__improbable(stat_enabled)) {
907	lck_grp_spin_update_spin(lock LCK_GRP_ARG(grp),
908	time: mach_absolute_time() - begin);
909	}
910	lck_grp_spin_update_miss(lock LCK_GRP_ARG(grp));
911	#endif /* CONFIG_DTRACE \|\| LCK_GRP_STAT */
912	return rc;
913	}
914
915	__result_use_check
916	static inline hw_lock_status_t
917	hw_lock_bit_to_internal(
918	hw_lock_bit_t *lock,
919	unsigned int bit,
920	hw_spin_policy_t pol,
921	bool (^lock_pause)(void)
922	LCK_GRP_ARG(lck_grp_t *grp))
923	{
924	if (__probable(hw_lock_trylock_bit(lock, bit, true))) {
925	lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
926	return HW_LOCK_ACQUIRED;
927	}
928
929	return hw_lock_bit_to_contended(lock, bit, pol, lock_pause LCK_GRP_ARG(grp));
930	}
931
932	/*
933	* Routine: hw_lock_bit_to
934	*
935	* Acquire bit lock, spinning until it becomes available or timeout.
936	* Timeout is in mach_absolute_time ticks (TSC in Intel), return with
937	* preemption disabled.
938	*/
939	unsigned
940	int
941	(hw_lock_bit_to)(
942	hw_lock_bit_t * lock,
943	uint32_t bit,
944	hw_spin_policy_t pol
945	LCK_GRP_ARG(lck_grp_t *grp))
946	{
947	_disable_preemption();
948	return (unsigned int)hw_lock_bit_to_internal(lock, bit, pol, NULL LCK_GRP_ARG(grp));
949	}
950
951	/*
952	* Routine: hw_lock_bit
953	*
954	* Acquire bit lock, spinning until it becomes available,
955	* return with preemption disabled.
956	*/
957	void
958	(hw_lock_bit)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
959	{
960	_disable_preemption();
961	(void)hw_lock_bit_to_internal(lock, bit, pol: &hw_lock_bit_policy, NULL LCK_GRP_ARG(grp));
962	}
963
964	/*
965	* Routine: hw_lock_bit_nopreempt
966	*
967	* Acquire bit lock with preemption already disabled, spinning until it becomes available.
968	*/
969	void
970	(hw_lock_bit_nopreempt)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
971	{
972	__lck_require_preemption_disabled(lock, current_thread());
973	(void)hw_lock_bit_to_internal(lock, bit, pol: &hw_lock_bit_policy, NULL LCK_GRP_ARG(grp));
974	}
975
976	/*
977	* Routine: hw_lock_bit_to_b
978	*
979	* Acquire bit lock, spinning until it becomes available, times out,
980	* or the supplied lock_pause callout returns true.
981	* Timeout is in mach_absolute_time ticks (TSC in Intel), return with
982	* preemption disabled iff the lock is successfully acquired.
983	*/
984	hw_lock_status_t
985	(hw_lock_bit_to_b)(
986	hw_lock_bit_t * lock,
987	uint32_t bit,
988	hw_spin_policy_t pol,
989	bool (^lock_pause) (void)
990	LCK_GRP_ARG(lck_grp_t * grp))
991	{
992	_disable_preemption();
993	hw_lock_status_t ret = hw_lock_bit_to_internal(lock, bit, pol, lock_pause LCK_GRP_ARG(grp));
994	if (ret != HW_LOCK_ACQUIRED) {
995	lock_enable_preemption();
996	}
997	return ret;
998	}
999
1000
1001	bool
1002	(hw_lock_bit_try)(hw_lock_bit_t * lock, unsigned int bit LCK_GRP_ARG(lck_grp_t *grp))
1003	{
1004	bool success = false;
1005
1006	_disable_preemption();
1007	success = hw_lock_trylock_bit(target: lock, bit, false);
1008	if (!success) {
1009	lock_enable_preemption();
1010	}
1011
1012	if (success) {
1013	lck_grp_spin_update_held(lock LCK_GRP_ARG(grp));
1014	}
1015
1016	return success;
1017	}
1018
1019	static inline void
1020	hw_unlock_bit_internal(hw_lock_bit_t lock, unsigned* int bit)
1021	{
1022	os_atomic_andnot(lock, `1u` << bit, release);
1023	#if CONFIG_DTRACE
1024	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
1025	#endif
1026	}
1027
1028	/*
1029	* Routine: hw_unlock_bit
1030	*
1031	* Release spin-lock. The second parameter is the bit number to test and set.
1032	* Decrement the preemption level.
1033	*/
1034	void
1035	hw_unlock_bit(hw_lock_bit_t * lock, unsigned int bit)
1036	{
1037	hw_unlock_bit_internal(lock, bit);
1038	lock_enable_preemption();
1039	}
1040
1041	void
1042	hw_unlock_bit_nopreempt(hw_lock_bit_t * lock, unsigned int bit)
1043	{
1044	__lck_require_preemption_disabled(lock, current_thread());
1045	hw_unlock_bit_internal(lock, bit);
1046	}
1047
1048
1049	#pragma mark - lck_*_sleep
1050
1051	/*
1052	* Routine: lck_spin_sleep
1053	*/
1054	wait_result_t
1055	lck_spin_sleep_grp(
1056	lck_spin_t *lck,
1057	lck_sleep_action_t lck_sleep_action,
1058	event_t event,
1059	wait_interrupt_t interruptible,
1060	lck_grp_t *grp)
1061	{
1062	wait_result_t res;
1063
1064	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != `0`) {
1065	panic("Invalid lock sleep action %x", lck_sleep_action);
1066	}
1067
1068	res = assert_wait(event, interruptible);
1069	if (res == THREAD_WAITING) {
1070	lck_spin_unlock(lck);
1071	res = thread_block(THREAD_CONTINUE_NULL);
1072	if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1073	lck_spin_lock_grp(lck, grp);
1074	}
1075	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1076	lck_spin_unlock(lck);
1077	}
1078
1079	return res;
1080	}
1081
1082	wait_result_t
1083	lck_spin_sleep(
1084	lck_spin_t *lck,
1085	lck_sleep_action_t lck_sleep_action,
1086	event_t event,
1087	wait_interrupt_t interruptible)
1088	{
1089	return lck_spin_sleep_grp(lck, lck_sleep_action, event, interruptible, LCK_GRP_NULL);
1090	}
1091
1092	/*
1093	* Routine: lck_spin_sleep_deadline
1094	*/
1095	wait_result_t
1096	lck_spin_sleep_deadline(
1097	lck_spin_t *lck,
1098	lck_sleep_action_t lck_sleep_action,
1099	event_t event,
1100	wait_interrupt_t interruptible,
1101	uint64_t deadline)
1102	{
1103	wait_result_t res;
1104
1105	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != `0`) {
1106	panic("Invalid lock sleep action %x", lck_sleep_action);
1107	}
1108
1109	res = assert_wait_deadline(event, interruptible, deadline);
1110	if (res == THREAD_WAITING) {
1111	lck_spin_unlock(lck);
1112	res = thread_block(THREAD_CONTINUE_NULL);
1113	if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1114	lck_spin_lock(lck);
1115	}
1116	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1117	lck_spin_unlock(lck);
1118	}
1119
1120	return res;
1121	}
1122
1123	/*
1124	* Routine: lck_mtx_sleep
1125	*/
1126	wait_result_t
1127	lck_mtx_sleep(
1128	lck_mtx_t *lck,
1129	lck_sleep_action_t lck_sleep_action,
1130	event_t event,
1131	wait_interrupt_t interruptible)
1132	{
1133	wait_result_t res;
1134	thread_pri_floor_t token;
1135
1136	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) \| DBG_FUNC_START,
1137	VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, `0`);
1138
1139	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != `0`) {
1140	panic("Invalid lock sleep action %x", lck_sleep_action);
1141	}
1142
1143	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1144	/*
1145	* We get a priority floor
1146	* during the time that this thread is asleep, so that when it
1147	* is re-awakened (and not yet contending on the mutex), it is
1148	* runnable at a reasonably high priority.
1149	*/
1150	token = thread_priority_floor_start();
1151	}
1152
1153	res = assert_wait(event, interruptible);
1154	if (res == THREAD_WAITING) {
1155	lck_mtx_unlock(lck);
1156	res = thread_block(THREAD_CONTINUE_NULL);
1157	if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1158	if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1159	lck_mtx_lock_spin(lck);
1160	} else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS)) {
1161	lck_mtx_lock_spin_always(lck);
1162	} else {
1163	lck_mtx_lock(lck);
1164	}
1165	}
1166	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1167	lck_mtx_unlock(lck);
1168	}
1169
1170	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1171	thread_priority_floor_end(token: &token);
1172	}
1173
1174	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) \| DBG_FUNC_END, (int)res, `0`, `0`, `0`, `0`);
1175
1176	return res;
1177	}
1178
1179
1180	/*
1181	* Routine: lck_mtx_sleep_deadline
1182	*/
1183	wait_result_t
1184	lck_mtx_sleep_deadline(
1185	lck_mtx_t *lck,
1186	lck_sleep_action_t lck_sleep_action,
1187	event_t event,
1188	wait_interrupt_t interruptible,
1189	uint64_t deadline)
1190	{
1191	wait_result_t res;
1192	thread_pri_floor_t token;
1193
1194	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) \| DBG_FUNC_START,
1195	VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, `0`);
1196
1197	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != `0`) {
1198	panic("Invalid lock sleep action %x", lck_sleep_action);
1199	}
1200
1201	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1202	/*
1203	* See lck_mtx_sleep().
1204	*/
1205	token = thread_priority_floor_start();
1206	}
1207
1208	res = assert_wait_deadline(event, interruptible, deadline);
1209	if (res == THREAD_WAITING) {
1210	lck_mtx_unlock(lck);
1211	res = thread_block(THREAD_CONTINUE_NULL);
1212	if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1213	if ((lck_sleep_action & LCK_SLEEP_SPIN)) {
1214	lck_mtx_lock_spin(lck);
1215	} else {
1216	lck_mtx_lock(lck);
1217	}
1218	}
1219	} else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1220	lck_mtx_unlock(lck);
1221	}
1222
1223	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1224	thread_priority_floor_end(token: &token);
1225	}
1226
1227	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) \| DBG_FUNC_END, (int)res, `0`, `0`, `0`, `0`);
1228
1229	return res;
1230	}
1231
1232	/*
1233	* sleep_with_inheritor and wakeup_with_inheritor KPI
1234	*
1235	* Functions that allow to sleep on an event and use turnstile to propagate the priority of the sleeping threads to
1236	* the latest thread specified as inheritor.
1237	*
1238	* The inheritor management is delegated to the caller, the caller needs to store a thread identifier to provide to this functions to specified upon whom
1239	* direct the push. The inheritor cannot return to user space or exit while holding a push from an event. Therefore is the caller responsibility to call a
1240	* wakeup_with_inheritor from inheritor before running in userspace or specify another inheritor before letting the old inheritor run in userspace.
1241	*
1242	* sleep_with_inheritor requires to hold a locking primitive while invoked, but wakeup_with_inheritor and change_sleep_inheritor don't require it.
1243	*
1244	* Turnstile requires a non blocking primitive as interlock to synchronize the turnstile data structure manipulation, threfore sleep_with_inheritor, change_sleep_inheritor and
1245	* wakeup_with_inheritor will require the same interlock to manipulate turnstiles.
1246	* If sleep_with_inheritor is associated with a locking primitive that can block (like lck_mtx_t or lck_rw_t), an handoff to a non blocking primitive is required before
1247	* invoking any turnstile operation.
1248	*
1249	* All functions will save the turnstile associated with the event on the turnstile kernel hash table and will use the the turnstile kernel hash table bucket
1250	* spinlock as the turnstile interlock. Because we do not want to hold interrupt disabled while holding the bucket interlock a new turnstile kernel hash table
1251	* is instantiated for this KPI to manage the hash without interrupt disabled.
1252	* Also:
1253	* - all events on the system that hash on the same bucket will contend on the same spinlock.
1254	* - every event will have a dedicated wait_queue.
1255	*
1256	* Different locking primitives can be associated with sleep_with_inheritor as long as the primitive_lock() and primitive_unlock() functions are provided to
1257	* sleep_with_inheritor_turnstile to perform the handoff with the bucket spinlock.
1258	*/
1259
1260
1261	typedef enum {
1262	LCK_WAKEUP_THREAD,
1263	LCK_WAKEUP_ONE,
1264	LCK_WAKEUP_ALL
1265	} lck_wakeup_type_t;
1266
1267	static kern_return_t
1268	wakeup_with_inheritor_and_turnstile(
1269	event_t event,
1270	wait_result_t result,
1271	lck_wakeup_type_t wake_type,
1272	lck_wake_action_t action,
1273	thread_t *thread_wokenup)
1274	{
1275	turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1276	uint32_t index;
1277	struct turnstile *ts = NULL;
1278	kern_return_t ret = KERN_NOT_WAITING;
1279
1280	/*
1281	* the hash bucket spinlock is used as turnstile interlock
1282	*/
1283	turnstile_hash_bucket_lock(proprietor: (uintptr_t)event, index_proprietor: &index, type);
1284
1285	ts = turnstile_prepare_hash(proprietor: (uintptr_t)event, type);
1286
1287	switch (wake_type) {
1288	case LCK_WAKEUP_ONE: {
1289	waitq_wakeup_flags_t flags = WAITQ_WAKEUP_DEFAULT;
1290
1291	if (action == LCK_WAKE_DEFAULT) {
1292	flags = WAITQ_UPDATE_INHERITOR;
1293	} else {
1294	assert(action == LCK_WAKE_DO_NOT_TRANSFER_PUSH);
1295	}
1296
1297	/*
1298	* WAITQ_UPDATE_INHERITOR will call turnstile_update_inheritor
1299	* if it finds a thread
1300	*/
1301	if (thread_wokenup) {
1302	thread_t wokeup;
1303
1304	wokeup = waitq_wakeup64_identify(waitq: &ts->ts_waitq,
1305	CAST_EVENT64_T(event), result, flags);
1306	*thread_wokenup = wokeup;
1307	ret = wokeup ? KERN_SUCCESS : KERN_NOT_WAITING;
1308	} else {
1309	ret = waitq_wakeup64_one(waitq: &ts->ts_waitq,
1310	CAST_EVENT64_T(event), result, flags);
1311	}
1312	if (ret == KERN_SUCCESS && action == LCK_WAKE_DO_NOT_TRANSFER_PUSH) {
1313	goto complete;
1314	}
1315	if (ret == KERN_NOT_WAITING) {
1316	turnstile_update_inheritor(turnstile: ts, TURNSTILE_INHERITOR_NULL,
1317	flags: TURNSTILE_IMMEDIATE_UPDATE);
1318	}
1319	break;
1320	}
1321	case LCK_WAKEUP_ALL: {
1322	ret = waitq_wakeup64_all(waitq: &ts->ts_waitq, CAST_EVENT64_T(event),
1323	result, flags: WAITQ_UPDATE_INHERITOR);
1324	break;
1325	}
1326	case LCK_WAKEUP_THREAD: {
1327	assert(thread_wokenup);
1328	ret = waitq_wakeup64_thread(waitq: &ts->ts_waitq, CAST_EVENT64_T(event),
1329	thread: *thread_wokenup, result);
1330	break;
1331	}
1332	}
1333
1334	/*
1335	* turnstile_update_inheritor_complete could be called while holding the interlock.
1336	* In this case the new inheritor or is null, or is a thread that is just been woken up
1337	* and have not blocked because it is racing with the same interlock used here
1338	* after the wait.
1339	* So there is no chain to update for the new inheritor.
1340	*
1341	* However unless the current thread is the old inheritor,
1342	* old inheritor can be blocked and requires a chain update.
1343	*
1344	* The chain should be short because kernel turnstiles cannot have user turnstiles
1345	* chained after them.
1346	*
1347	* We can anyway optimize this by asking turnstile to tell us
1348	* if old inheritor needs an update and drop the lock
1349	* just in that case.
1350	*/
1351	turnstile_hash_bucket_unlock(proprietor: (uintptr_t)NULL, index_proprietor: &index, type, s: `0`);
1352
1353	turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_NOT_HELD);
1354
1355	turnstile_hash_bucket_lock(proprietor: (uintptr_t)NULL, index_proprietor: &index, type);
1356
1357	complete:
1358	turnstile_complete_hash(proprietor: (uintptr_t)event, type);
1359
1360	turnstile_hash_bucket_unlock(proprietor: (uintptr_t)NULL, index_proprietor: &index, type, s: `0`);
1361
1362	turnstile_cleanup();
1363
1364	return ret;
1365	}
1366
1367	static wait_result_t
1368	sleep_with_inheritor_and_turnstile(
1369	event_t event,
1370	thread_t inheritor,
1371	wait_interrupt_t interruptible,
1372	uint64_t deadline,
1373	void (^primitive_lock)(void),
1374	void (^primitive_unlock)(void))
1375	{
1376	turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1377	wait_result_t ret;
1378	uint32_t index;
1379	struct turnstile *ts = NULL;
1380
1381	/*
1382	* the hash bucket spinlock is used as turnstile interlock,
1383	* lock it before releasing the primitive lock
1384	*/
1385	turnstile_hash_bucket_lock(proprietor: (uintptr_t)event, index_proprietor: &index, type);
1386
1387	primitive_unlock();
1388
1389	ts = turnstile_prepare_hash(proprietor: (uintptr_t)event, type);
1390
1391	thread_set_pending_block_hint(current_thread(), block_hint: kThreadWaitSleepWithInheritor);
1392	/*
1393	* We need TURNSTILE_DELAYED_UPDATE because we will call
1394	* waitq_assert_wait64 after.
1395	*/
1396	turnstile_update_inheritor(turnstile: ts, new_inheritor: inheritor, flags: (TURNSTILE_DELAYED_UPDATE \| TURNSTILE_INHERITOR_THREAD));
1397
1398	ret = waitq_assert_wait64(waitq: &ts->ts_waitq, CAST_EVENT64_T(event), interruptible, deadline);
1399
1400	turnstile_hash_bucket_unlock(proprietor: (uintptr_t)NULL, index_proprietor: &index, type, s: `0`);
1401
1402	/*
1403	* Update new and old inheritor chains outside the interlock;
1404	*/
1405	turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_NOT_HELD);
1406
1407	if (ret == THREAD_WAITING) {
1408	ret = thread_block(THREAD_CONTINUE_NULL);
1409	}
1410
1411	turnstile_hash_bucket_lock(proprietor: (uintptr_t)NULL, index_proprietor: &index, type);
1412
1413	turnstile_complete_hash(proprietor: (uintptr_t)event, type);
1414
1415	turnstile_hash_bucket_unlock(proprietor: (uintptr_t)NULL, index_proprietor: &index, type, s: `0`);
1416
1417	turnstile_cleanup();
1418
1419	primitive_lock();
1420
1421	return ret;
1422	}
1423
1424	/*
1425	* change_sleep_inheritor is independent from the locking primitive.
1426	*/
1427
1428	/*
1429	* Name: change_sleep_inheritor
1430	*
1431	* Description: Redirect the push of the waiting threads of event to the new inheritor specified.
1432	*
1433	* Args:
1434	* Arg1: event to redirect the push.
1435	* Arg2: new inheritor for event.
1436	*
1437	* Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1438	*
1439	* Conditions: In case of success, the new inheritor cannot return to user space or exit until another inheritor is specified for the event or a
1440	* wakeup for the event is called.
1441	* NOTE: this cannot be called from interrupt context.
1442	*/
1443	kern_return_t
1444	change_sleep_inheritor(event_t event, thread_t inheritor)
1445	{
1446	uint32_t index;
1447	struct turnstile *ts = NULL;
1448	kern_return_t ret = KERN_SUCCESS;
1449	turnstile_type_t type = TURNSTILE_SLEEP_INHERITOR;
1450
1451	/*
1452	* the hash bucket spinlock is used as turnstile interlock
1453	*/
1454	turnstile_hash_bucket_lock(proprietor: (uintptr_t)event, index_proprietor: &index, type);
1455
1456	ts = turnstile_prepare_hash(proprietor: (uintptr_t)event, type);
1457
1458	if (!turnstile_has_waiters(turnstile: ts)) {
1459	ret = KERN_NOT_WAITING;
1460	}
1461
1462	/*
1463	* We will not call an assert_wait later so use TURNSTILE_IMMEDIATE_UPDATE
1464	*/
1465	turnstile_update_inheritor(turnstile: ts, new_inheritor: inheritor, flags: (TURNSTILE_IMMEDIATE_UPDATE \| TURNSTILE_INHERITOR_THREAD));
1466
1467	turnstile_hash_bucket_unlock(proprietor: (uintptr_t)NULL, index_proprietor: &index, type, s: `0`);
1468
1469	/*
1470	* update the chains outside the interlock
1471	*/
1472	turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_NOT_HELD);
1473
1474	turnstile_hash_bucket_lock(proprietor: (uintptr_t)NULL, index_proprietor: &index, type);
1475
1476	turnstile_complete_hash(proprietor: (uintptr_t)event, type);
1477
1478	turnstile_hash_bucket_unlock(proprietor: (uintptr_t)NULL, index_proprietor: &index, type, s: `0`);
1479
1480	turnstile_cleanup();
1481
1482	return ret;
1483	}
1484
1485	wait_result_t
1486	lck_spin_sleep_with_inheritor(
1487	lck_spin_t *lock,
1488	lck_sleep_action_t lck_sleep_action,
1489	event_t event,
1490	thread_t inheritor,
1491	wait_interrupt_t interruptible,
1492	uint64_t deadline)
1493	{
1494	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1495	return sleep_with_inheritor_and_turnstile(event, inheritor,
1496	interruptible, deadline,
1497	primitive_lock: ^{}, primitive_unlock: ^{ lck_spin_unlock(lck: lock); });
1498	} else {
1499	return sleep_with_inheritor_and_turnstile(event, inheritor,
1500	interruptible, deadline,
1501	primitive_lock: ^{ lck_spin_lock(lck: lock); }, primitive_unlock: ^{ lck_spin_unlock(lck: lock); });
1502	}
1503	}
1504
1505	wait_result_t
1506	hw_lck_ticket_sleep_with_inheritor(
1507	hw_lck_ticket_t *lock,
1508	lck_grp_t *grp __unused,
1509	lck_sleep_action_t lck_sleep_action,
1510	event_t event,
1511	thread_t inheritor,
1512	wait_interrupt_t interruptible,
1513	uint64_t deadline)
1514	{
1515	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1516	return sleep_with_inheritor_and_turnstile(event, inheritor,
1517	interruptible, deadline,
1518	primitive_lock: ^{}, primitive_unlock: ^{ hw_lck_ticket_unlock(tlock: lock); });
1519	} else {
1520	return sleep_with_inheritor_and_turnstile(event, inheritor,
1521	interruptible, deadline,
1522	primitive_lock: ^{ hw_lck_ticket_lock(lock, grp); }, primitive_unlock: ^{ hw_lck_ticket_unlock(tlock: lock); });
1523	}
1524	}
1525
1526	wait_result_t
1527	lck_ticket_sleep_with_inheritor(
1528	lck_ticket_t *lock,
1529	lck_grp_t *grp,
1530	lck_sleep_action_t lck_sleep_action,
1531	event_t event,
1532	thread_t inheritor,
1533	wait_interrupt_t interruptible,
1534	uint64_t deadline)
1535	{
1536	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1537	return sleep_with_inheritor_and_turnstile(event, inheritor,
1538	interruptible, deadline,
1539	primitive_lock: ^{}, primitive_unlock: ^{ lck_ticket_unlock(tlock: lock); });
1540	} else {
1541	return sleep_with_inheritor_and_turnstile(event, inheritor,
1542	interruptible, deadline,
1543	primitive_lock: ^{ lck_ticket_lock(tlock: lock, grp); }, primitive_unlock: ^{ lck_ticket_unlock(tlock: lock); });
1544	}
1545	}
1546
1547	wait_result_t
1548	lck_mtx_sleep_with_inheritor(
1549	lck_mtx_t *lock,
1550	lck_sleep_action_t lck_sleep_action,
1551	event_t event,
1552	thread_t inheritor,
1553	wait_interrupt_t interruptible,
1554	uint64_t deadline)
1555	{
1556	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
1557
1558	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1559	return sleep_with_inheritor_and_turnstile(event,
1560	inheritor,
1561	interruptible,
1562	deadline,
1563	primitive_lock: ^{;},
1564	primitive_unlock: ^{lck_mtx_unlock(lck: lock);});
1565	} else if (lck_sleep_action & LCK_SLEEP_SPIN) {
1566	return sleep_with_inheritor_and_turnstile(event,
1567	inheritor,
1568	interruptible,
1569	deadline,
1570	primitive_lock: ^{lck_mtx_lock_spin(lck: lock);},
1571	primitive_unlock: ^{lck_mtx_unlock(lck: lock);});
1572	} else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
1573	return sleep_with_inheritor_and_turnstile(event,
1574	inheritor,
1575	interruptible,
1576	deadline,
1577	primitive_lock: ^{lck_mtx_lock_spin_always(lck: lock);},
1578	primitive_unlock: ^{lck_mtx_unlock(lck: lock);});
1579	} else {
1580	return sleep_with_inheritor_and_turnstile(event,
1581	inheritor,
1582	interruptible,
1583	deadline,
1584	primitive_lock: ^{lck_mtx_lock(lck: lock);},
1585	primitive_unlock: ^{lck_mtx_unlock(lck: lock);});
1586	}
1587	}
1588
1589	/*
1590	* sleep_with_inheritor functions with lck_rw_t as locking primitive.
1591	*/
1592
1593	wait_result_t
1594	lck_rw_sleep_with_inheritor(
1595	lck_rw_t *lock,
1596	lck_sleep_action_t lck_sleep_action,
1597	event_t event,
1598	thread_t inheritor,
1599	wait_interrupt_t interruptible,
1600	uint64_t deadline)
1601	{
1602	__block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
1603
1604	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
1605
1606	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
1607	return sleep_with_inheritor_and_turnstile(event,
1608	inheritor,
1609	interruptible,
1610	deadline,
1611	primitive_lock: ^{;},
1612	primitive_unlock: ^{lck_rw_type = lck_rw_done(lck: lock);});
1613	} else if (!(lck_sleep_action & (LCK_SLEEP_SHARED \| LCK_SLEEP_EXCLUSIVE))) {
1614	return sleep_with_inheritor_and_turnstile(event,
1615	inheritor,
1616	interruptible,
1617	deadline,
1618	primitive_lock: ^{lck_rw_lock(lck: lock, lck_rw_type);},
1619	primitive_unlock: ^{lck_rw_type = lck_rw_done(lck: lock);});
1620	} else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
1621	return sleep_with_inheritor_and_turnstile(event,
1622	inheritor,
1623	interruptible,
1624	deadline,
1625	primitive_lock: ^{lck_rw_lock_exclusive(lck: lock);},
1626	primitive_unlock: ^{lck_rw_type = lck_rw_done(lck: lock);});
1627	} else {
1628	return sleep_with_inheritor_and_turnstile(event,
1629	inheritor,
1630	interruptible,
1631	deadline,
1632	primitive_lock: ^{lck_rw_lock_shared(lck: lock);},
1633	primitive_unlock: ^{lck_rw_type = lck_rw_done(lck: lock);});
1634	}
1635	}
1636
1637	/*
1638	* wakeup_with_inheritor functions are independent from the locking primitive.
1639	*/
1640
1641	kern_return_t
1642	wakeup_thread_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t thread_towake)
1643	{
1644	return wakeup_with_inheritor_and_turnstile(event,
1645	result,
1646	wake_type: LCK_WAKEUP_THREAD,
1647	action,
1648	thread_wokenup: &thread_towake);
1649	}
1650
1651	kern_return_t
1652	wakeup_one_with_inheritor(event_t event, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1653	{
1654	return wakeup_with_inheritor_and_turnstile(event,
1655	result,
1656	wake_type: LCK_WAKEUP_ONE,
1657	action,
1658	thread_wokenup);
1659	}
1660
1661	kern_return_t
1662	wakeup_all_with_inheritor(event_t event, wait_result_t result)
1663	{
1664	return wakeup_with_inheritor_and_turnstile(event,
1665	result,
1666	wake_type: LCK_WAKEUP_ALL,
1667	action: `0`,
1668	NULL);
1669	}
1670
1671	void
1672	kdp_sleep_with_inheritor_find_owner(struct waitq * waitq, __unused event64_t event, thread_waitinfo_t * waitinfo)
1673	{
1674	assert(waitinfo->wait_type == kThreadWaitSleepWithInheritor);
1675	assert(waitq_type(waitq) == WQT_TURNSTILE);
1676	waitinfo->owner = `0`;
1677	waitinfo->context = `0`;
1678
1679	if (waitq_held(wq: waitq)) {
1680	return;
1681	}
1682
1683	struct turnstile *turnstile = waitq_to_turnstile(waitq);
1684	assert(turnstile->ts_inheritor_flags & TURNSTILE_INHERITOR_THREAD);
1685	waitinfo->owner = thread_tid(thread: turnstile->ts_inheritor);
1686	}
1687
1688	static_assert(SWI_COND_OWNER_BITS == CTID_SIZE_BIT);
1689	static_assert(sizeof(cond_swi_var32_s) == sizeof(uint32_t));
1690	static_assert(sizeof(cond_swi_var64_s) == sizeof(uint64_t));
1691
1692	static wait_result_t
1693	cond_sleep_with_inheritor_and_turnstile_type(
1694	cond_swi_var_t cond,
1695	bool (^cond_sleep_check)(ctid_t*),
1696	wait_interrupt_t interruptible,
1697	uint64_t deadline,
1698	turnstile_type_t type)
1699	{
1700	wait_result_t ret;
1701	uint32_t index;
1702	struct turnstile *ts = NULL;
1703	ctid_t ctid = `0`;
1704	thread_t inheritor;
1705
1706	/*
1707	* the hash bucket spinlock is used as turnstile interlock,
1708	* lock it before checking the sleep condition
1709	*/
1710	turnstile_hash_bucket_lock(proprietor: (uintptr_t)cond, index_proprietor: &index, type);
1711
1712	/*
1713	* In case the sleep check succeeds, the block will
1714	* provide us the ctid observed on the variable.
1715	*/
1716	if (!cond_sleep_check(&ctid)) {
1717	turnstile_hash_bucket_unlock(proprietor: (uintptr_t)NULL, index_proprietor: &index, type, s: `0`);
1718	return THREAD_NOT_WAITING;
1719	}
1720
1721	/*
1722	* We can translate the ctid to a thread_t only
1723	* if cond_sleep_check succeded.
1724	*/
1725	inheritor = ctid_get_thread(ctid);
1726	assert(inheritor != NULL);
1727
1728	ts = turnstile_prepare_hash(proprietor: (uintptr_t)cond, type);
1729
1730	thread_set_pending_block_hint(current_thread(), block_hint: kThreadWaitSleepWithInheritor);
1731	/*
1732	* We need TURNSTILE_DELAYED_UPDATE because we will call
1733	* waitq_assert_wait64 after.
1734	*/
1735	turnstile_update_inheritor(turnstile: ts, new_inheritor: inheritor, flags: (TURNSTILE_DELAYED_UPDATE \| TURNSTILE_INHERITOR_THREAD));
1736
1737	ret = waitq_assert_wait64(waitq: &ts->ts_waitq, CAST_EVENT64_T(cond), interruptible, deadline);
1738
1739	turnstile_hash_bucket_unlock(proprietor: (uintptr_t)NULL, index_proprietor: &index, type, s: `0`);
1740
1741	/*
1742	* Update new and old inheritor chains outside the interlock;
1743	*/
1744	turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_NOT_HELD);
1745	if (ret == THREAD_WAITING) {
1746	ret = thread_block(THREAD_CONTINUE_NULL);
1747	}
1748
1749	turnstile_hash_bucket_lock(proprietor: (uintptr_t)NULL, index_proprietor: &index, type);
1750
1751	turnstile_complete_hash(proprietor: (uintptr_t)cond, type);
1752
1753	turnstile_hash_bucket_unlock(proprietor: (uintptr_t)NULL, index_proprietor: &index, type, s: `0`);
1754
1755	turnstile_cleanup();
1756	return ret;
1757	}
1758
1759	/*
1760	* Name: cond_sleep_with_inheritor32_mask
1761	*
1762	* Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1763	* Allows a thread to conditionally sleep while indicating which thread should
1764	* inherit the priority push associated with the condition.
1765	* The condition should be expressed through a cond_swi_var32_s pointer.
1766	* The condition needs to be populated by the caller with the ctid of the
1767	* thread that should inherit the push. The remaining bits of the condition
1768	* can be used by the caller to implement its own synchronization logic.
1769	* A copy of the condition value observed by the caller when it decided to call
1770	* this function should be provided to prevent races with matching wakeups.
1771	* This function will atomically check the value stored in the condition against
1772	* the expected/observed one provided only for the bits that are set in the mask.
1773	* If the check doesn't pass the thread will not sleep and the function will return.
1774	* The ctid provided in the condition will be used only after a successful
1775	* check.
1776	*
1777	* Args:
1778	* Arg1: cond_swi_var32_s pointer that stores the condition to check.
1779	* Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1780	* Arg3: mask to apply to the condition to check.
1781	* Arg4: interruptible flag for wait.
1782	* Arg5: deadline for wait.
1783	*
1784	* Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1785	* wakeup for the cond is called.
1786	*
1787	* Returns: result of the wait.
1788	*/
1789	static wait_result_t
1790	cond_sleep_with_inheritor32_mask(cond_swi_var_t cond, cond_swi_var32_s expected_cond, uint32_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1791	{
1792	bool (^cond_sleep_check)(uint32_t) = ^(ctid_t ctid) {
1793	cond_swi_var32_s cond_val = {.cond32_data = os_atomic_load((uint32_t*) cond, relaxed)};
1794	bool ret;
1795	if ((cond_val.cond32_data & check_mask) == (expected_cond.cond32_data & check_mask)) {
1796	ret = true;
1797	*ctid = cond_val.cond32_owner;
1798	} else {
1799	ret = false;
1800	}
1801	return ret;
1802	};
1803
1804	return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, type: TURNSTILE_SLEEP_INHERITOR);
1805	}
1806
1807	/*
1808	* Name: cond_sleep_with_inheritor64_mask
1809	*
1810	* Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1811	* Allows a thread to conditionally sleep while indicating which thread should
1812	* inherit the priority push associated with the condition.
1813	* The condition should be expressed through a cond_swi_var64_s pointer.
1814	* The condition needs to be populated by the caller with the ctid of the
1815	* thread that should inherit the push. The remaining bits of the condition
1816	* can be used by the caller to implement its own synchronization logic.
1817	* A copy of the condition value observed by the caller when it decided to call
1818	* this function should be provided to prevent races with matching wakeups.
1819	* This function will atomically check the value stored in the condition against
1820	* the expected/observed one provided only for the bits that are set in the mask.
1821	* If the check doesn't pass the thread will not sleep and the function will return.
1822	* The ctid provided in the condition will be used only after a successful
1823	* check.
1824	*
1825	* Args:
1826	* Arg1: cond_swi_var64_s pointer that stores the condition to check.
1827	* Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1828	* Arg3: mask to apply to the condition to check.
1829	* Arg4: interruptible flag for wait.
1830	* Arg5: deadline for wait.
1831	*
1832	* Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1833	* wakeup for the cond is called.
1834	*
1835	* Returns: result of the wait.
1836	*/
1837	wait_result_t
1838	cond_sleep_with_inheritor64_mask(cond_swi_var_t cond, cond_swi_var64_s expected_cond, uint64_t check_mask, wait_interrupt_t interruptible, uint64_t deadline)
1839	{
1840	bool (^cond_sleep_check)(uint32_t) = ^(ctid_t ctid) {
1841	cond_swi_var64_s cond_val = {.cond64_data = os_atomic_load((uint64_t*) cond, relaxed)};
1842	bool ret;
1843	if ((cond_val.cond64_data & check_mask) == (expected_cond.cond64_data & check_mask)) {
1844	ret = true;
1845	*ctid = cond_val.cond64_owner;
1846	} else {
1847	ret = false;
1848	}
1849	return ret;
1850	};
1851
1852	return cond_sleep_with_inheritor_and_turnstile_type(cond, cond_sleep_check, interruptible, deadline, type: TURNSTILE_SLEEP_INHERITOR);
1853	}
1854
1855	/*
1856	* Name: cond_sleep_with_inheritor32
1857	*
1858	* Description: Conditionally sleeps with inheritor, with condition variable of 32bits.
1859	* Allows a thread to conditionally sleep while indicating which thread should
1860	* inherit the priority push associated with the condition.
1861	* The condition should be expressed through a cond_swi_var32_s pointer.
1862	* The condition needs to be populated by the caller with the ctid of the
1863	* thread that should inherit the push. The remaining bits of the condition
1864	* can be used by the caller to implement its own synchronization logic.
1865	* A copy of the condition value observed by the caller when it decided to call
1866	* this function should be provided to prevent races with matching wakeups.
1867	* This function will atomically check the value stored in the condition against
1868	* the expected/observed one provided. If the check doesn't pass the thread will not
1869	* sleep and the function will return.
1870	* The ctid provided in the condition will be used only after a successful
1871	* check.
1872	*
1873	* Args:
1874	* Arg1: cond_swi_var32_s pointer that stores the condition to check.
1875	* Arg2: cond_swi_var32_s observed value to check for conditionally sleep.
1876	* Arg3: interruptible flag for wait.
1877	* Arg4: deadline for wait.
1878	*
1879	* Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1880	* wakeup for the cond is called.
1881	*
1882	* Returns: result of the wait.
1883	*/
1884	wait_result_t
1885	cond_sleep_with_inheritor32(cond_swi_var_t cond, cond_swi_var32_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1886	{
1887	return cond_sleep_with_inheritor32_mask(cond, expected_cond, check_mask: ~`0u`, interruptible, deadline);
1888	}
1889
1890	/*
1891	* Name: cond_sleep_with_inheritor64
1892	*
1893	* Description: Conditionally sleeps with inheritor, with condition variable of 64bits.
1894	* Allows a thread to conditionally sleep while indicating which thread should
1895	* inherit the priority push associated with the condition.
1896	* The condition should be expressed through a cond_swi_var64_s pointer.
1897	* The condition needs to be populated by the caller with the ctid of the
1898	* thread that should inherit the push. The remaining bits of the condition
1899	* can be used by the caller to implement its own synchronization logic.
1900	* A copy of the condition value observed by the caller when it decided to call
1901	* this function should be provided to prevent races with matching wakeups.
1902	* This function will atomically check the value stored in the condition against
1903	* the expected/observed one provided. If the check doesn't pass the thread will not
1904	* sleep and the function will return.
1905	* The ctid provided in the condition will be used only after a successful
1906	* check.
1907	*
1908	* Args:
1909	* Arg1: cond_swi_var64_s pointer that stores the condition to check.
1910	* Arg2: cond_swi_var64_s observed value to check for conditionally sleep.
1911	* Arg3: interruptible flag for wait.
1912	* Arg4: deadline for wait.
1913	*
1914	* Conditions: The inheritor specified cannot return to user space or exit until another inheritor is specified for the cond or a
1915	* wakeup for the cond is called.
1916	*
1917	* Returns: result of the wait.
1918	*/
1919	wait_result_t
1920	cond_sleep_with_inheritor64(cond_swi_var_t cond, cond_swi_var64_s expected_cond, wait_interrupt_t interruptible, uint64_t deadline)
1921	{
1922	return cond_sleep_with_inheritor64_mask(cond, expected_cond, check_mask: ~`0ull`, interruptible, deadline);
1923	}
1924
1925	/*
1926	* Name: cond_wakeup_one_with_inheritor
1927	*
1928	* Description: Wake up one waiter waiting on the condition (if any).
1929	* The thread woken up will be the one with the higher sched priority waiting on the condition.
1930	* The push for the condition will be transferred from the last inheritor to the woken up thread.
1931	*
1932	* Args:
1933	* Arg1: condition to wake from.
1934	* Arg2: wait result to pass to the woken up thread.
1935	* Arg3: pointer for storing the thread wokenup.
1936	*
1937	* Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1938	*
1939	* Conditions: The new inheritor wokenup cannot return to user space or exit until another inheritor is specified for the
1940	* condition or a wakeup for the event is called.
1941	* A reference for the wokenup thread is acquired.
1942	* NOTE: this cannot be called from interrupt context.
1943	*/
1944	kern_return_t
1945	cond_wakeup_one_with_inheritor(cond_swi_var_t cond, wait_result_t result, lck_wake_action_t action, thread_t *thread_wokenup)
1946	{
1947	return wakeup_with_inheritor_and_turnstile(event: (event_t)cond,
1948	result,
1949	wake_type: LCK_WAKEUP_ONE,
1950	action,
1951	thread_wokenup);
1952	}
1953
1954	/*
1955	* Name: cond_wakeup_all_with_inheritor
1956	*
1957	* Description: Wake up all waiters waiting on the same condition. The old inheritor will lose the push.
1958	*
1959	* Args:
1960	* Arg1: condition to wake from.
1961	* Arg2: wait result to pass to the woken up threads.
1962	*
1963	* Returns: KERN_NOT_WAITING if no threads were waiting, KERN_SUCCESS otherwise.
1964	*
1965	* Conditions: NOTE: this cannot be called from interrupt context.
1966	*/
1967	kern_return_t
1968	cond_wakeup_all_with_inheritor(cond_swi_var_t cond, wait_result_t result)
1969	{
1970	return wakeup_with_inheritor_and_turnstile(event: (event_t)cond,
1971	result,
1972	wake_type: LCK_WAKEUP_ALL,
1973	action: `0`,
1974	NULL);
1975	}
1976
1977
1978	#pragma mark - gates
1979
1980	#define GATE_TYPE 3
1981	#define GATE_ILOCK_BIT 0
1982	#define GATE_WAITERS_BIT 1
1983
1984	#define GATE_ILOCK (1 << GATE_ILOCK_BIT)
1985	#define GATE_WAITERS (1 << GATE_WAITERS_BIT)
1986
1987	#define gate_ilock(gate) hw_lock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT, LCK_GRP_NULL)
1988	#define gate_iunlock(gate) hw_unlock_bit((hw_lock_bit_t*)(&(gate)->gt_data), GATE_ILOCK_BIT)
1989	#define gate_has_waiter_bit(state) ((state & GATE_WAITERS) != 0)
1990	#define ordered_load_gate(gate) os_atomic_load(&(gate)->gt_data, compiler_acq_rel)
1991	#define ordered_store_gate(gate, value) os_atomic_store(&(gate)->gt_data, value, compiler_acq_rel)
1992
1993	#define GATE_THREAD_MASK (~(uintptr_t)(GATE_ILOCK \| GATE_WAITERS))
1994	#define GATE_STATE_TO_THREAD(state) (thread_t)((state) & GATE_THREAD_MASK)
1995	#define GATE_STATE_MASKED(state) (uintptr_t)((state) & GATE_THREAD_MASK)
1996	#define GATE_THREAD_TO_STATE(thread) ((uintptr_t)(thread))
1997
1998	#define GATE_DESTROYED GATE_STATE_MASKED(0xdeadbeefdeadbeef)
1999
2000	#define GATE_EVENT(gate) ((event_t) gate)
2001	#define EVENT_TO_GATE(event) ((gate_t *) event)
2002
2003	typedef void (void_func_void)(void*);
2004
2005	__abortlike
2006	static void
2007	gate_verify_tag_panic(gate_t *gate)
2008	{
2009	panic("Gate used is invalid. gate %p data %lx turnstile %p refs %d flags %x ", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2010	}
2011
2012	__abortlike
2013	static void
2014	gate_verify_destroy_panic(gate_t *gate)
2015	{
2016	panic("Gate used was destroyed. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2017	}
2018
2019	static void
2020	gate_verify(gate_t *gate)
2021	{
2022	if (gate->gt_type != GATE_TYPE) {
2023	gate_verify_tag_panic(gate);
2024	}
2025	if (GATE_STATE_MASKED(gate->gt_data) == GATE_DESTROYED) {
2026	gate_verify_destroy_panic(gate);
2027	}
2028
2029	assert(gate->gt_refs > `0`);
2030	}
2031
2032	__abortlike
2033	static void
2034	gate_already_owned_panic(gate_t *gate, thread_t holder)
2035	{
2036	panic("Trying to close a gate already closed gate %p holder %p current_thread %p", gate, holder, current_thread());
2037	}
2038
2039	static kern_return_t
2040	gate_try_close(gate_t *gate)
2041	{
2042	uintptr_t state;
2043	thread_t holder;
2044	kern_return_t ret;
2045	thread_t thread = current_thread();
2046
2047	gate_verify(gate);
2048
2049	if (os_atomic_cmpxchg(&gate->gt_data, `0`, GATE_THREAD_TO_STATE(thread), acquire)) {
2050	return KERN_SUCCESS;
2051	}
2052
2053	gate_ilock(gate);
2054	state = ordered_load_gate(gate);
2055	holder = GATE_STATE_TO_THREAD(state);
2056
2057	if (holder == NULL) {
2058	assert(gate_has_waiter_bit(state) == FALSE);
2059
2060	state = GATE_THREAD_TO_STATE(current_thread());
2061	state \|= GATE_ILOCK;
2062	ordered_store_gate(gate, state);
2063	ret = KERN_SUCCESS;
2064	} else {
2065	if (holder == current_thread()) {
2066	gate_already_owned_panic(gate, holder);
2067	}
2068	ret = KERN_FAILURE;
2069	}
2070
2071	gate_iunlock(gate);
2072	return ret;
2073	}
2074
2075	static void
2076	gate_close(gate_t* gate)
2077	{
2078	uintptr_t state;
2079	thread_t holder;
2080	thread_t thread = current_thread();
2081
2082	gate_verify(gate);
2083
2084	if (os_atomic_cmpxchg(&gate->gt_data, `0`, GATE_THREAD_TO_STATE(thread), acquire)) {
2085	return;
2086	}
2087
2088	gate_ilock(gate);
2089	state = ordered_load_gate(gate);
2090	holder = GATE_STATE_TO_THREAD(state);
2091
2092	if (holder != NULL) {
2093	gate_already_owned_panic(gate, holder);
2094	}
2095
2096	assert(gate_has_waiter_bit(state) == FALSE);
2097
2098	state = GATE_THREAD_TO_STATE(thread);
2099	state \|= GATE_ILOCK;
2100	ordered_store_gate(gate, state);
2101
2102	gate_iunlock(gate);
2103	}
2104
2105	static void
2106	gate_open_turnstile(gate_t *gate)
2107	{
2108	struct turnstile *ts = NULL;
2109
2110	ts = turnstile_prepare(proprietor: (uintptr_t)gate, tstore: &gate->gt_turnstile,
2111	TURNSTILE_NULL, type: TURNSTILE_KERNEL_MUTEX);
2112	waitq_wakeup64_all(waitq: &ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2113	THREAD_AWAKENED, flags: WAITQ_UPDATE_INHERITOR);
2114	turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_HELD);
2115	turnstile_complete(proprietor: (uintptr_t)gate, tstore: &gate->gt_turnstile, NULL, type: TURNSTILE_KERNEL_MUTEX);
2116	/*
2117	* We can do the cleanup while holding the interlock.
2118	* It is ok because:
2119	* 1. current_thread is the previous inheritor and it is running
2120	* 2. new inheritor is NULL.
2121	* => No chain of turnstiles needs to be updated.
2122	*/
2123	turnstile_cleanup();
2124	}
2125
2126	__abortlike
2127	static void
2128	gate_not_owned_panic(gate_t *gate, thread_t holder, bool open)
2129	{
2130	if (open) {
2131	panic("Trying to open a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2132	} else {
2133	panic("Trying to handoff a gate %p owned by %p from current_thread %p", gate, holder, current_thread());
2134	}
2135	}
2136
2137	static void
2138	gate_open(gate_t *gate)
2139	{
2140	uintptr_t state;
2141	thread_t holder;
2142	bool waiters;
2143	thread_t thread = current_thread();
2144
2145	gate_verify(gate);
2146	if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), `0`, release)) {
2147	return;
2148	}
2149
2150	gate_ilock(gate);
2151	state = ordered_load_gate(gate);
2152	holder = GATE_STATE_TO_THREAD(state);
2153	waiters = gate_has_waiter_bit(state);
2154
2155	if (holder != thread) {
2156	gate_not_owned_panic(gate, holder, true);
2157	}
2158
2159	if (waiters) {
2160	gate_open_turnstile(gate);
2161	}
2162
2163	state = GATE_ILOCK;
2164	ordered_store_gate(gate, state);
2165
2166	gate_iunlock(gate);
2167	}
2168
2169	static kern_return_t
2170	gate_handoff_turnstile(gate_t *gate,
2171	int flags,
2172	thread_t *thread_woken_up,
2173	bool *waiters)
2174	{
2175	struct turnstile *ts = NULL;
2176	kern_return_t ret = KERN_FAILURE;
2177	thread_t hp_thread;
2178
2179	ts = turnstile_prepare(proprietor: (uintptr_t)gate, tstore: &gate->gt_turnstile, TURNSTILE_NULL, type: TURNSTILE_KERNEL_MUTEX);
2180	/*
2181	* Wake up the higest priority thread waiting on the gate
2182	*/
2183	hp_thread = waitq_wakeup64_identify(waitq: &ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)),
2184	THREAD_AWAKENED, flags: WAITQ_UPDATE_INHERITOR);
2185
2186	if (hp_thread != NULL) {
2187	/*
2188	* In this case waitq_wakeup64_identify has called turnstile_update_inheritor for us
2189	*/
2190	turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_HELD);
2191	*thread_woken_up = hp_thread;
2192	*waiters = turnstile_has_waiters(turnstile: ts);
2193	/*
2194	* Note: hp_thread is the new holder and the new inheritor.
2195	* In case there are no more waiters, it doesn't need to be the inheritor
2196	* and it shouldn't be it by the time it finishes the wait, so that its next open or
2197	* handoff can go through the fast path.
2198	* We could set the inheritor to NULL here, or the new holder itself can set it
2199	* on its way back from the sleep. In the latter case there are more chanses that
2200	* new waiters will come by, avoiding to do the opearation at all.
2201	*/
2202	ret = KERN_SUCCESS;
2203	} else {
2204	/*
2205	* waiters can have been woken up by an interrupt and still not
2206	* have updated gate->waiters, so we couldn't find them on the waitq.
2207	* Update the inheritor to NULL here, so that the current thread can return to userspace
2208	* indipendently from when the interrupted waiters will finish the wait.
2209	*/
2210	if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2211	turnstile_update_inheritor(turnstile: ts, TURNSTILE_INHERITOR_NULL, flags: TURNSTILE_IMMEDIATE_UPDATE);
2212	turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_HELD);
2213	}
2214	// there are no waiters.
2215	ret = KERN_NOT_WAITING;
2216	}
2217
2218	turnstile_complete(proprietor: (uintptr_t)gate, tstore: &gate->gt_turnstile, NULL, type: TURNSTILE_KERNEL_MUTEX);
2219
2220	/*
2221	* We can do the cleanup while holding the interlock.
2222	* It is ok because:
2223	* 1. current_thread is the previous inheritor and it is running
2224	* 2. new inheritor is NULL or it is a just wokenup thread that will race acquiring the lock
2225	* of the gate before trying to sleep.
2226	* => No chain of turnstiles needs to be updated.
2227	*/
2228	turnstile_cleanup();
2229
2230	return ret;
2231	}
2232
2233	static kern_return_t
2234	gate_handoff(gate_t *gate,
2235	int flags)
2236	{
2237	kern_return_t ret;
2238	thread_t new_holder = NULL;
2239	uintptr_t state;
2240	thread_t holder;
2241	bool waiters;
2242	thread_t thread = current_thread();
2243
2244	assert(flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS \|\| flags == GATE_HANDOFF_DEFAULT);
2245	gate_verify(gate);
2246
2247	if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2248	if (os_atomic_cmpxchg(&gate->gt_data, GATE_THREAD_TO_STATE(thread), `0`, release)) {
2249	//gate opened but there were no waiters, so return KERN_NOT_WAITING.
2250	return KERN_NOT_WAITING;
2251	}
2252	}
2253
2254	gate_ilock(gate);
2255	state = ordered_load_gate(gate);
2256	holder = GATE_STATE_TO_THREAD(state);
2257	waiters = gate_has_waiter_bit(state);
2258
2259	if (holder != current_thread()) {
2260	gate_not_owned_panic(gate, holder, false);
2261	}
2262
2263	if (waiters) {
2264	ret = gate_handoff_turnstile(gate, flags, thread_woken_up: &new_holder, waiters: &waiters);
2265	if (ret == KERN_SUCCESS) {
2266	state = GATE_THREAD_TO_STATE(new_holder);
2267	if (waiters) {
2268	state \|= GATE_WAITERS;
2269	}
2270	} else {
2271	if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2272	state = `0`;
2273	}
2274	}
2275	} else {
2276	if (flags == GATE_HANDOFF_OPEN_IF_NO_WAITERS) {
2277	state = `0`;
2278	}
2279	ret = KERN_NOT_WAITING;
2280	}
2281	state \|= GATE_ILOCK;
2282	ordered_store_gate(gate, state);
2283
2284	gate_iunlock(gate);
2285
2286	if (new_holder) {
2287	thread_deallocate(thread: new_holder);
2288	}
2289	return ret;
2290	}
2291
2292	static void_func_void
2293	gate_steal_turnstile(gate_t *gate,
2294	thread_t new_inheritor)
2295	{
2296	struct turnstile *ts = NULL;
2297
2298	ts = turnstile_prepare(proprietor: (uintptr_t)gate, tstore: &gate->gt_turnstile, TURNSTILE_NULL, type: TURNSTILE_KERNEL_MUTEX);
2299
2300	turnstile_update_inheritor(turnstile: ts, new_inheritor, flags: (TURNSTILE_IMMEDIATE_UPDATE \| TURNSTILE_INHERITOR_THREAD));
2301	turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_HELD);
2302	turnstile_complete(proprietor: (uintptr_t)gate, tstore: &gate->gt_turnstile, NULL, type: TURNSTILE_KERNEL_MUTEX);
2303
2304	/*
2305	* turnstile_cleanup might need to update the chain of the old holder.
2306	* This operation should happen without the turnstile interlock held.
2307	*/
2308	return turnstile_cleanup;
2309	}
2310
2311	__abortlike
2312	static void
2313	gate_not_closed_panic(gate_t *gate, bool wait)
2314	{
2315	if (wait) {
2316	panic("Trying to wait on a not closed gate %p from current_thread %p", gate, current_thread());
2317	} else {
2318	panic("Trying to steal a not closed gate %p from current_thread %p", gate, current_thread());
2319	}
2320	}
2321
2322	static void
2323	gate_steal(gate_t *gate)
2324	{
2325	uintptr_t state;
2326	thread_t holder;
2327	thread_t thread = current_thread();
2328	bool waiters;
2329
2330	void_func_void func_after_interlock_unlock;
2331
2332	gate_verify(gate);
2333
2334	gate_ilock(gate);
2335	state = ordered_load_gate(gate);
2336	holder = GATE_STATE_TO_THREAD(state);
2337	waiters = gate_has_waiter_bit(state);
2338
2339	if (holder == NULL) {
2340	gate_not_closed_panic(gate, false);
2341	}
2342
2343	state = GATE_THREAD_TO_STATE(thread) \| GATE_ILOCK;
2344	if (waiters) {
2345	state \|= GATE_WAITERS;
2346	ordered_store_gate(gate, state);
2347	func_after_interlock_unlock = gate_steal_turnstile(gate, new_inheritor: thread);
2348	gate_iunlock(gate);
2349
2350	func_after_interlock_unlock();
2351	} else {
2352	ordered_store_gate(gate, state);
2353	gate_iunlock(gate);
2354	}
2355	}
2356
2357	static void_func_void
2358	gate_wait_turnstile(gate_t *gate,
2359	wait_interrupt_t interruptible,
2360	uint64_t deadline,
2361	thread_t holder,
2362	wait_result_t* wait,
2363	bool* waiters)
2364	{
2365	struct turnstile *ts;
2366	uintptr_t state;
2367
2368	ts = turnstile_prepare(proprietor: (uintptr_t)gate, tstore: &gate->gt_turnstile, TURNSTILE_NULL, type: TURNSTILE_KERNEL_MUTEX);
2369
2370	turnstile_update_inheritor(turnstile: ts, new_inheritor: holder, flags: (TURNSTILE_DELAYED_UPDATE \| TURNSTILE_INHERITOR_THREAD));
2371	waitq_assert_wait64(waitq: &ts->ts_waitq, CAST_EVENT64_T(GATE_EVENT(gate)), interruptible, deadline);
2372
2373	gate_iunlock(gate);
2374
2375	turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_NOT_HELD);
2376
2377	*wait = thread_block(THREAD_CONTINUE_NULL);
2378
2379	gate_ilock(gate);
2380
2381	*waiters = turnstile_has_waiters(turnstile: ts);
2382
2383	if (!*waiters) {
2384	/*
2385	* We want to enable the fast path as soon as we see that there are no more waiters.
2386	* On the fast path the holder will not do any turnstile operations.
2387	* Set the inheritor as NULL here.
2388	*
2389	* NOTE: if it was an open operation that woke this thread up, the inheritor has
2390	* already been set to NULL.
2391	*/
2392	state = ordered_load_gate(gate);
2393	holder = GATE_STATE_TO_THREAD(state);
2394	if (holder &&
2395	((wait != THREAD_AWAKENED) \|\| // thread interrupted or timedout*
2396	holder == current_thread())) { // thread was woken up and it is the new holder
2397	turnstile_update_inheritor(turnstile: ts, TURNSTILE_INHERITOR_NULL, flags: TURNSTILE_IMMEDIATE_UPDATE);
2398	turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_NOT_HELD);
2399	}
2400	}
2401
2402	turnstile_complete(proprietor: (uintptr_t)gate, tstore: &gate->gt_turnstile, NULL, type: TURNSTILE_KERNEL_MUTEX);
2403
2404	/*
2405	* turnstile_cleanup might need to update the chain of the old holder.
2406	* This operation should happen without the turnstile primitive interlock held.
2407	*/
2408	return turnstile_cleanup;
2409	}
2410
2411	static void
2412	gate_free_internal(gate_t *gate)
2413	{
2414	zfree(KT_GATE, gate);
2415	}
2416
2417	__abortlike
2418	static void
2419	gate_too_many_refs_panic(gate_t *gate)
2420	{
2421	panic("Too many refs taken on gate. gate %p data %lx turnstile %p refs %d flags %x", gate, gate->gt_data, gate->gt_turnstile, gate->gt_refs, gate->gt_flags);
2422	}
2423
2424	static gate_wait_result_t
2425	gate_wait(gate_t* gate,
2426	wait_interrupt_t interruptible,
2427	uint64_t deadline,
2428	void (^primitive_unlock)(void),
2429	void (^primitive_lock)(void))
2430	{
2431	gate_wait_result_t ret;
2432	void_func_void func_after_interlock_unlock;
2433	wait_result_t wait_result;
2434	uintptr_t state;
2435	thread_t holder;
2436	bool waiters;
2437
2438	gate_verify(gate);
2439
2440	gate_ilock(gate);
2441	state = ordered_load_gate(gate);
2442	holder = GATE_STATE_TO_THREAD(state);
2443
2444	if (holder == NULL) {
2445	gate_not_closed_panic(gate, true);
2446	}
2447
2448	/*
2449	* Get a ref on the gate so it will not
2450	* be freed while we are coming back from the sleep.
2451	*/
2452	if (gate->gt_refs == UINT16_MAX) {
2453	gate_too_many_refs_panic(gate);
2454	}
2455	gate->gt_refs++;
2456	state \|= GATE_WAITERS;
2457	ordered_store_gate(gate, state);
2458
2459	/*
2460	* Release the primitive lock before any
2461	* turnstile operation. Turnstile
2462	* does not support a blocking primitive as
2463	* interlock.
2464	*
2465	* In this way, concurrent threads will be
2466	* able to acquire the primitive lock
2467	* but still will wait for me through the
2468	* gate interlock.
2469	*/
2470	primitive_unlock();
2471
2472	func_after_interlock_unlock = gate_wait_turnstile( gate,
2473	interruptible,
2474	deadline,
2475	holder,
2476	wait: &wait_result,
2477	waiters: &waiters);
2478
2479	state = ordered_load_gate(gate);
2480	holder = GATE_STATE_TO_THREAD(state);
2481
2482	switch (wait_result) {
2483	case THREAD_INTERRUPTED:
2484	case THREAD_TIMED_OUT:
2485	assert(holder != current_thread());
2486
2487	if (waiters) {
2488	state \|= GATE_WAITERS;
2489	} else {
2490	state &= ~GATE_WAITERS;
2491	}
2492	ordered_store_gate(gate, state);
2493
2494	if (wait_result == THREAD_INTERRUPTED) {
2495	ret = GATE_INTERRUPTED;
2496	} else {
2497	ret = GATE_TIMED_OUT;
2498	}
2499	break;
2500	default:
2501	/*
2502	* Note it is possible that even if the gate was handed off to
2503	* me, someone called gate_steal() before I woke up.
2504	*
2505	* As well as it is possible that the gate was opened, but someone
2506	* closed it while I was waking up.
2507	*
2508	* In both cases we return GATE_OPENED, as the gate was opened to me
2509	* at one point, it is the caller responsibility to check again if
2510	* the gate is open.
2511	*/
2512	if (holder == current_thread()) {
2513	ret = GATE_HANDOFF;
2514	} else {
2515	ret = GATE_OPENED;
2516	}
2517	break;
2518	}
2519
2520	assert(gate->gt_refs > `0`);
2521	uint32_t ref = --gate->gt_refs;
2522	bool to_free = gate->gt_alloc;
2523	gate_iunlock(gate);
2524
2525	if (GATE_STATE_MASKED(state) == GATE_DESTROYED) {
2526	if (to_free == true) {
2527	assert(!waiters);
2528	if (ref == `0`) {
2529	gate_free_internal(gate);
2530	}
2531	ret = GATE_OPENED;
2532	} else {
2533	gate_verify_destroy_panic(gate);
2534	}
2535	}
2536
2537	/*
2538	* turnstile func that needs to be executed without
2539	* holding the primitive interlock
2540	*/
2541	func_after_interlock_unlock();
2542
2543	primitive_lock();
2544
2545	return ret;
2546	}
2547
2548	static void
2549	gate_assert(gate_t gate, int* flags)
2550	{
2551	uintptr_t state;
2552	thread_t holder;
2553
2554	gate_verify(gate);
2555
2556	gate_ilock(gate);
2557	state = ordered_load_gate(gate);
2558	holder = GATE_STATE_TO_THREAD(state);
2559
2560	switch (flags) {
2561	case GATE_ASSERT_CLOSED:
2562	assert(holder != NULL);
2563	break;
2564	case GATE_ASSERT_OPEN:
2565	assert(holder == NULL);
2566	break;
2567	case GATE_ASSERT_HELD:
2568	assert(holder == current_thread());
2569	break;
2570	default:
2571	panic("invalid %s flag %d", __func__, flags);
2572	}
2573
2574	gate_iunlock(gate);
2575	}
2576
2577	enum {
2578	GT_INIT_DEFAULT = `0`,
2579	GT_INIT_ALLOC
2580	};
2581
2582	static void
2583	gate_init(gate_t *gate, uint type)
2584	{
2585	bzero(s: gate, n: sizeof(gate_t));
2586
2587	gate->gt_data = `0`;
2588	gate->gt_turnstile = NULL;
2589	gate->gt_refs = `1`;
2590	switch (type) {
2591	case GT_INIT_ALLOC:
2592	gate->gt_alloc = `1`;
2593	break;
2594	default:
2595	gate->gt_alloc = `0`;
2596	break;
2597	}
2598	gate->gt_type = GATE_TYPE;
2599	gate->gt_flags_pad = `0`;
2600	}
2601
2602	static gate_t*
2603	gate_alloc_init(void)
2604	{
2605	gate_t *gate;
2606	gate = zalloc_flags(KT_GATE, Z_WAITOK \| Z_NOFAIL);
2607	gate_init(gate, type: GT_INIT_ALLOC);
2608	return gate;
2609	}
2610
2611	__abortlike
2612	static void
2613	gate_destroy_owned_panic(gate_t *gate, thread_t holder)
2614	{
2615	panic("Trying to destroy a gate owned by %p. Gate %p", holder, gate);
2616	}
2617
2618	__abortlike
2619	static void
2620	gate_destroy_waiter_panic(gate_t *gate)
2621	{
2622	panic("Trying to destroy a gate with waiters. Gate %p data %lx turnstile %p", gate, gate->gt_data, gate->gt_turnstile);
2623	}
2624
2625	static uint16_t
2626	gate_destroy_internal(gate_t *gate)
2627	{
2628	uintptr_t state;
2629	thread_t holder;
2630	uint16_t ref;
2631
2632	gate_ilock(gate);
2633	state = ordered_load_gate(gate);
2634	holder = GATE_STATE_TO_THREAD(state);
2635
2636	/*
2637	* The gate must be open
2638	* and all the threads must
2639	* have been woken up by this time
2640	*/
2641	if (holder != NULL) {
2642	gate_destroy_owned_panic(gate, holder);
2643	}
2644	if (gate_has_waiter_bit(state)) {
2645	gate_destroy_waiter_panic(gate);
2646	}
2647
2648	assert(gate->gt_refs > `0`);
2649
2650	ref = --gate->gt_refs;
2651
2652	/*
2653	* Mark the gate as destroyed.
2654	* The interlock bit still need
2655	* to be available to let the
2656	* last wokenup threads to clear
2657	* the wait.
2658	*/
2659	state = GATE_DESTROYED;
2660	state \|= GATE_ILOCK;
2661	ordered_store_gate(gate, state);
2662	gate_iunlock(gate);
2663	return ref;
2664	}
2665
2666	__abortlike
2667	static void
2668	gate_destroy_panic(gate_t *gate)
2669	{
2670	panic("Trying to destroy a gate that was allocated by gate_alloc_init(). gate_free() should be used instead, gate %p thread %p", gate, current_thread());
2671	}
2672
2673	static void
2674	gate_destroy(gate_t *gate)
2675	{
2676	gate_verify(gate);
2677	if (gate->gt_alloc == `1`) {
2678	gate_destroy_panic(gate);
2679	}
2680	gate_destroy_internal(gate);
2681	}
2682
2683	__abortlike
2684	static void
2685	gate_free_panic(gate_t *gate)
2686	{
2687	panic("Trying to free a gate that was not allocated by gate_alloc_init(), gate %p thread %p", gate, current_thread());
2688	}
2689
2690	static void
2691	gate_free(gate_t *gate)
2692	{
2693	uint16_t ref;
2694
2695	gate_verify(gate);
2696
2697	if (gate->gt_alloc == `0`) {
2698	gate_free_panic(gate);
2699	}
2700
2701	ref = gate_destroy_internal(gate);
2702	/*
2703	* Some of the threads waiting on the gate
2704	* might still need to run after being woken up.
2705	* They will access the gate to cleanup the
2706	* state, so we cannot free it.
2707	* The last waiter will free the gate in this case.
2708	*/
2709	if (ref == `0`) {
2710	gate_free_internal(gate);
2711	}
2712	}
2713
2714	/*
2715	* Name: lck_rw_gate_init
2716	*
2717	* Description: initializes a variable declared with decl_lck_rw_gate_data.
2718	*
2719	* Args:
2720	* Arg1: lck_rw_t lock used to protect the gate.
2721	* Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2722	*/
2723	void
2724	lck_rw_gate_init(lck_rw_t lock, gate_t gate)
2725	{
2726	(void) lock;
2727	gate_init(gate, type: GT_INIT_DEFAULT);
2728	}
2729
2730	/*
2731	* Name: lck_rw_gate_alloc_init
2732	*
2733	* Description: allocates and initializes a gate_t.
2734	*
2735	* Args:
2736	* Arg1: lck_rw_t lock used to protect the gate.
2737	*
2738	* Returns:
2739	* gate_t allocated.
2740	*/
2741	gate_t*
2742	lck_rw_gate_alloc_init(lck_rw_t *lock)
2743	{
2744	(void) lock;
2745	return gate_alloc_init();
2746	}
2747
2748	/*
2749	* Name: lck_rw_gate_destroy
2750	*
2751	* Description: destroys a variable previously initialized
2752	* with lck_rw_gate_init().
2753	*
2754	* Args:
2755	* Arg1: lck_rw_t lock used to protect the gate.
2756	* Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2757	*/
2758	void
2759	lck_rw_gate_destroy(lck_rw_t lock, gate_t gate)
2760	{
2761	(void) lock;
2762	gate_destroy(gate);
2763	}
2764
2765	/*
2766	* Name: lck_rw_gate_free
2767	*
2768	* Description: destroys and tries to free a gate previously allocated
2769	* with lck_rw_gate_alloc_init().
2770	* The gate free might be delegated to the last thread returning
2771	* from the gate_wait().
2772	*
2773	* Args:
2774	* Arg1: lck_rw_t lock used to protect the gate.
2775	* Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
2776	*/
2777	void
2778	lck_rw_gate_free(lck_rw_t lock, gate_t gate)
2779	{
2780	(void) lock;
2781	gate_free(gate);
2782	}
2783
2784	/*
2785	* Name: lck_rw_gate_try_close
2786	*
2787	* Description: Tries to close the gate.
2788	* In case of success the current thread will be set as
2789	* the holder of the gate.
2790	*
2791	* Args:
2792	* Arg1: lck_rw_t lock used to protect the gate.
2793	* Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2794	*
2795	* Conditions: Lock must be held. Returns with the lock held.
2796	*
2797	* Returns:
2798	* KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
2799	* of the gate.
2800	* A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2801	* to wake up possible waiters on the gate before returning to userspace.
2802	* If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
2803	* between the calls to lck_rw_gate_try_close() and lck_rw_gate_wait().
2804	*
2805	* KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
2806	* lck_rw_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
2807	* The calls to lck_rw_gate_try_close() and lck_rw_gate_wait() should
2808	* be done without dropping the lock that is protecting the gate in between.
2809	*/
2810	int
2811	lck_rw_gate_try_close(__assert_only lck_rw_t lock, gate_t gate)
2812	{
2813	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2814
2815	return gate_try_close(gate);
2816	}
2817
2818	/*
2819	* Name: lck_rw_gate_close
2820	*
2821	* Description: Closes the gate. The current thread will be set as
2822	* the holder of the gate. Will panic if the gate is already closed.
2823	* A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2824	* to wake up possible waiters on the gate before returning to userspace.
2825	*
2826	* Args:
2827	* Arg1: lck_rw_t lock used to protect the gate.
2828	* Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2829	*
2830	* Conditions: Lock must be held. Returns with the lock held.
2831	* The gate must be open.
2832	*
2833	*/
2834	void
2835	lck_rw_gate_close(__assert_only lck_rw_t lock, gate_t gate)
2836	{
2837	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2838
2839	return gate_close(gate);
2840	}
2841
2842	/*
2843	* Name: lck_rw_gate_open
2844	*
2845	* Description: Opens the gate and wakes up possible waiters.
2846	*
2847	* Args:
2848	* Arg1: lck_rw_t lock used to protect the gate.
2849	* Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2850	*
2851	* Conditions: Lock must be held. Returns with the lock held.
2852	* The current thread must be the holder of the gate.
2853	*
2854	*/
2855	void
2856	lck_rw_gate_open(__assert_only lck_rw_t lock, gate_t gate)
2857	{
2858	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2859
2860	gate_open(gate);
2861	}
2862
2863	/*
2864	* Name: lck_rw_gate_handoff
2865	*
2866	* Description: Tries to transfer the ownership of the gate. The waiter with highest sched
2867	* priority will be selected as the new holder of the gate, and woken up,
2868	* with the gate remaining in the closed state throughout.
2869	* If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
2870	* will be returned.
2871	* GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
2872	* case no waiters were found.
2873	*
2874	*
2875	* Args:
2876	* Arg1: lck_rw_t lock used to protect the gate.
2877	* Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2878	* Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
2879	*
2880	* Conditions: Lock must be held. Returns with the lock held.
2881	* The current thread must be the holder of the gate.
2882	*
2883	* Returns:
2884	* KERN_SUCCESS in case one of the waiters became the new holder.
2885	* KERN_NOT_WAITING in case there were no waiters.
2886	*
2887	*/
2888	kern_return_t
2889	lck_rw_gate_handoff(__assert_only lck_rw_t lock, gate_t gate, gate_handoff_flags_t flags)
2890	{
2891	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2892
2893	return gate_handoff(gate, flags);
2894	}
2895
2896	/*
2897	* Name: lck_rw_gate_steal
2898	*
2899	* Description: Set the current ownership of the gate. It sets the current thread as the
2900	* new holder of the gate.
2901	* A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on
2902	* to wake up possible waiters on the gate before returning to userspace.
2903	* NOTE: the previous holder should not call lck_rw_gate_open() or lck_rw_gate_handoff()
2904	* anymore.
2905	*
2906	*
2907	* Args:
2908	* Arg1: lck_rw_t lock used to protect the gate.
2909	* Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2910	*
2911	* Conditions: Lock must be held. Returns with the lock held.
2912	* The gate must be closed and the current thread must not already be the holder.
2913	*
2914	*/
2915	void
2916	lck_rw_gate_steal(__assert_only lck_rw_t lock, gate_t gate)
2917	{
2918	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2919
2920	gate_steal(gate);
2921	}
2922
2923	/*
2924	* Name: lck_rw_gate_wait
2925	*
2926	* Description: Waits for the current thread to become the holder of the gate or for the
2927	* gate to become open. An interruptible mode and deadline can be specified
2928	* to return earlier from the wait.
2929	*
2930	* Args:
2931	* Arg1: lck_rw_t lock used to protect the gate.
2932	* Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2933	* Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_SHARED, LCK_SLEEP_EXCLUSIVE, LCK_SLEEP_UNLOCK.
2934	* Arg3: interruptible flag for wait.
2935	* Arg4: deadline
2936	*
2937	* Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
2938	* Lock will be dropped while waiting.
2939	* The gate must be closed.
2940	*
2941	* Returns: Reason why the thread was woken up.
2942	* GATE_HANDOFF - the current thread was handed off the ownership of the gate.
2943	* A matching lck_rw_gate_open() or lck_rw_gate_handoff() needs to be called later on.
2944	* to wake up possible waiters on the gate before returning to userspace.
2945	* GATE_OPENED - the gate was opened by the holder.
2946	* GATE_TIMED_OUT - the thread was woken up by a timeout.
2947	* GATE_INTERRUPTED - the thread was interrupted while sleeping.
2948	*/
2949	gate_wait_result_t
2950	lck_rw_gate_wait(lck_rw_t lock, gate_t gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
2951	{
2952	__block lck_rw_type_t lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
2953
2954	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
2955
2956	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2957	return gate_wait(gate,
2958	interruptible,
2959	deadline,
2960	primitive_unlock: ^{lck_rw_type = lck_rw_done(lck: lock);},
2961	primitive_lock: ^{;});
2962	} else if (!(lck_sleep_action & (LCK_SLEEP_SHARED \| LCK_SLEEP_EXCLUSIVE))) {
2963	return gate_wait(gate,
2964	interruptible,
2965	deadline,
2966	primitive_unlock: ^{lck_rw_type = lck_rw_done(lck: lock);},
2967	primitive_lock: ^{lck_rw_lock(lck: lock, lck_rw_type);});
2968	} else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2969	return gate_wait(gate,
2970	interruptible,
2971	deadline,
2972	primitive_unlock: ^{lck_rw_type = lck_rw_done(lck: lock);},
2973	primitive_lock: ^{lck_rw_lock_exclusive(lck: lock);});
2974	} else {
2975	return gate_wait(gate,
2976	interruptible,
2977	deadline,
2978	primitive_unlock: ^{lck_rw_type = lck_rw_done(lck: lock);},
2979	primitive_lock: ^{lck_rw_lock_shared(lck: lock);});
2980	}
2981	}
2982
2983	/*
2984	* Name: lck_rw_gate_assert
2985	*
2986	* Description: asserts that the gate is in the specified state.
2987	*
2988	* Args:
2989	* Arg1: lck_rw_t lock used to protect the gate.
2990	* Arg2: pointer to the gate data declared with decl_lck_rw_gate_data.
2991	* Arg3: flags to specified assert type.
2992	* GATE_ASSERT_CLOSED - the gate is currently closed
2993	* GATE_ASSERT_OPEN - the gate is currently opened
2994	* GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
2995	*/
2996	void
2997	lck_rw_gate_assert(__assert_only lck_rw_t lock, gate_t gate, gate_assert_flags_t flags)
2998	{
2999	LCK_RW_ASSERT(lock, LCK_RW_ASSERT_HELD);
3000
3001	gate_assert(gate, flags);
3002	return;
3003	}
3004
3005	/*
3006	* Name: lck_mtx_gate_init
3007	*
3008	* Description: initializes a variable declared with decl_lck_mtx_gate_data.
3009	*
3010	* Args:
3011	* Arg1: lck_mtx_t lock used to protect the gate.
3012	* Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3013	*/
3014	void
3015	lck_mtx_gate_init(lck_mtx_t lock, gate_t gate)
3016	{
3017	(void) lock;
3018	gate_init(gate, type: GT_INIT_DEFAULT);
3019	}
3020
3021	/*
3022	* Name: lck_mtx_gate_alloc_init
3023	*
3024	* Description: allocates and initializes a gate_t.
3025	*
3026	* Args:
3027	* Arg1: lck_mtx_t lock used to protect the gate.
3028	*
3029	* Returns:
3030	* gate_t allocated.
3031	*/
3032	gate_t*
3033	lck_mtx_gate_alloc_init(lck_mtx_t *lock)
3034	{
3035	(void) lock;
3036	return gate_alloc_init();
3037	}
3038
3039	/*
3040	* Name: lck_mtx_gate_destroy
3041	*
3042	* Description: destroys a variable previously initialized
3043	* with lck_mtx_gate_init().
3044	*
3045	* Args:
3046	* Arg1: lck_mtx_t lock used to protect the gate.
3047	* Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3048	*/
3049	void
3050	lck_mtx_gate_destroy(lck_mtx_t lock, gate_t gate)
3051	{
3052	(void) lock;
3053	gate_destroy(gate);
3054	}
3055
3056	/*
3057	* Name: lck_mtx_gate_free
3058	*
3059	* Description: destroys and tries to free a gate previously allocated
3060	* with lck_mtx_gate_alloc_init().
3061	* The gate free might be delegated to the last thread returning
3062	* from the gate_wait().
3063	*
3064	* Args:
3065	* Arg1: lck_mtx_t lock used to protect the gate.
3066	* Arg2: pointer to the gate obtained with lck_rw_gate_alloc_init().
3067	*/
3068	void
3069	lck_mtx_gate_free(lck_mtx_t lock, gate_t gate)
3070	{
3071	(void) lock;
3072	gate_free(gate);
3073	}
3074
3075	/*
3076	* Name: lck_mtx_gate_try_close
3077	*
3078	* Description: Tries to close the gate.
3079	* In case of success the current thread will be set as
3080	* the holder of the gate.
3081	*
3082	* Args:
3083	* Arg1: lck_mtx_t lock used to protect the gate.
3084	* Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3085	*
3086	* Conditions: Lock must be held. Returns with the lock held.
3087	*
3088	* Returns:
3089	* KERN_SUCCESS in case the gate was successfully closed. The current thread is the new holder
3090	* of the gate.
3091	* A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3092	* to wake up possible waiters on the gate before returning to userspace.
3093	* If the intent is to conditionally probe the gate before waiting, the lock must not be dropped
3094	* between the calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait().
3095	*
3096	* KERN_FAILURE in case the gate was already closed. Will panic if the current thread was already the holder of the gate.
3097	* lck_mtx_gate_wait() should be called instead if the intent is to unconditionally wait on this gate.
3098	* The calls to lck_mtx_gate_try_close() and lck_mtx_gate_wait() should
3099	* be done without dropping the lock that is protecting the gate in between.
3100	*/
3101	int
3102	lck_mtx_gate_try_close(__assert_only lck_mtx_t lock, gate_t gate)
3103	{
3104	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3105
3106	return gate_try_close(gate);
3107	}
3108
3109	/*
3110	* Name: lck_mtx_gate_close
3111	*
3112	* Description: Closes the gate. The current thread will be set as
3113	* the holder of the gate. Will panic if the gate is already closed.
3114	* A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3115	* to wake up possible waiters on the gate before returning to userspace.
3116	*
3117	* Args:
3118	* Arg1: lck_mtx_t lock used to protect the gate.
3119	* Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3120	*
3121	* Conditions: Lock must be held. Returns with the lock held.
3122	* The gate must be open.
3123	*
3124	*/
3125	void
3126	lck_mtx_gate_close(__assert_only lck_mtx_t lock, gate_t gate)
3127	{
3128	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3129
3130	return gate_close(gate);
3131	}
3132
3133	/*
3134	* Name: lck_mtx_gate_open
3135	*
3136	* Description: Opens of the gate and wakes up possible waiters.
3137	*
3138	* Args:
3139	* Arg1: lck_mtx_t lock used to protect the gate.
3140	* Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3141	*
3142	* Conditions: Lock must be held. Returns with the lock held.
3143	* The current thread must be the holder of the gate.
3144	*
3145	*/
3146	void
3147	lck_mtx_gate_open(__assert_only lck_mtx_t lock, gate_t gate)
3148	{
3149	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3150
3151	gate_open(gate);
3152	}
3153
3154	/*
3155	* Name: lck_mtx_gate_handoff
3156	*
3157	* Description: Tries to transfer the ownership of the gate. The waiter with highest sched
3158	* priority will be selected as the new holder of the gate, and woken up,
3159	* with the gate remaining in the closed state throughout.
3160	* If no waiters are present, the gate will be kept closed and KERN_NOT_WAITING
3161	* will be returned.
3162	* GATE_HANDOFF_OPEN_IF_NO_WAITERS flag can be used to specify if the gate should be opened in
3163	* case no waiters were found.
3164	*
3165	*
3166	* Args:
3167	* Arg1: lck_mtx_t lock used to protect the gate.
3168	* Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3169	* Arg3: flags - GATE_HANDOFF_DEFAULT or GATE_HANDOFF_OPEN_IF_NO_WAITERS
3170	*
3171	* Conditions: Lock must be held. Returns with the lock held.
3172	* The current thread must be the holder of the gate.
3173	*
3174	* Returns:
3175	* KERN_SUCCESS in case one of the waiters became the new holder.
3176	* KERN_NOT_WAITING in case there were no waiters.
3177	*
3178	*/
3179	kern_return_t
3180	lck_mtx_gate_handoff(__assert_only lck_mtx_t lock, gate_t gate, gate_handoff_flags_t flags)
3181	{
3182	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3183
3184	return gate_handoff(gate, flags);
3185	}
3186
3187	/*
3188	* Name: lck_mtx_gate_steal
3189	*
3190	* Description: Steals the ownership of the gate. It sets the current thread as the
3191	* new holder of the gate.
3192	* A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3193	* to wake up possible waiters on the gate before returning to userspace.
3194	* NOTE: the previous holder should not call lck_mtx_gate_open() or lck_mtx_gate_handoff()
3195	* anymore.
3196	*
3197	*
3198	* Args:
3199	* Arg1: lck_mtx_t lock used to protect the gate.
3200	* Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3201	*
3202	* Conditions: Lock must be held. Returns with the lock held.
3203	* The gate must be closed and the current thread must not already be the holder.
3204	*
3205	*/
3206	void
3207	lck_mtx_gate_steal(__assert_only lck_mtx_t lock, gate_t gate)
3208	{
3209	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3210
3211	gate_steal(gate);
3212	}
3213
3214	/*
3215	* Name: lck_mtx_gate_wait
3216	*
3217	* Description: Waits for the current thread to become the holder of the gate or for the
3218	* gate to become open. An interruptible mode and deadline can be specified
3219	* to return earlier from the wait.
3220	*
3221	* Args:
3222	* Arg1: lck_mtx_t lock used to protect the gate.
3223	* Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3224	* Arg3: sleep action. LCK_SLEEP_DEFAULT, LCK_SLEEP_UNLOCK, LCK_SLEEP_SPIN, LCK_SLEEP_SPIN_ALWAYS.
3225	* Arg3: interruptible flag for wait.
3226	* Arg4: deadline
3227	*
3228	* Conditions: Lock must be held. Returns with the lock held according to the sleep action specified.
3229	* Lock will be dropped while waiting.
3230	* The gate must be closed.
3231	*
3232	* Returns: Reason why the thread was woken up.
3233	* GATE_HANDOFF - the current thread was handed off the ownership of the gate.
3234	* A matching lck_mtx_gate_open() or lck_mtx_gate_handoff() needs to be called later on
3235	* to wake up possible waiters on the gate before returning to userspace.
3236	* GATE_OPENED - the gate was opened by the holder.
3237	* GATE_TIMED_OUT - the thread was woken up by a timeout.
3238	* GATE_INTERRUPTED - the thread was interrupted while sleeping.
3239	*/
3240	gate_wait_result_t
3241	lck_mtx_gate_wait(lck_mtx_t lock, gate_t gate, lck_sleep_action_t lck_sleep_action, wait_interrupt_t interruptible, uint64_t deadline)
3242	{
3243	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3244
3245	if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
3246	return gate_wait(gate,
3247	interruptible,
3248	deadline,
3249	primitive_unlock: ^{lck_mtx_unlock(lck: lock);},
3250	primitive_lock: ^{;});
3251	} else if (lck_sleep_action & LCK_SLEEP_SPIN) {
3252	return gate_wait(gate,
3253	interruptible,
3254	deadline,
3255	primitive_unlock: ^{lck_mtx_unlock(lck: lock);},
3256	primitive_lock: ^{lck_mtx_lock_spin(lck: lock);});
3257	} else if (lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS) {
3258	return gate_wait(gate,
3259	interruptible,
3260	deadline,
3261	primitive_unlock: ^{lck_mtx_unlock(lck: lock);},
3262	primitive_lock: ^{lck_mtx_lock_spin_always(lck: lock);});
3263	} else {
3264	return gate_wait(gate,
3265	interruptible,
3266	deadline,
3267	primitive_unlock: ^{lck_mtx_unlock(lck: lock);},
3268	primitive_lock: ^{lck_mtx_lock(lck: lock);});
3269	}
3270	}
3271
3272	/*
3273	* Name: lck_mtx_gate_assert
3274	*
3275	* Description: asserts that the gate is in the specified state.
3276	*
3277	* Args:
3278	* Arg1: lck_mtx_t lock used to protect the gate.
3279	* Arg2: pointer to the gate data declared with decl_lck_mtx_gate_data.
3280	* Arg3: flags to specified assert type.
3281	* GATE_ASSERT_CLOSED - the gate is currently closed
3282	* GATE_ASSERT_OPEN - the gate is currently opened
3283	* GATE_ASSERT_HELD - the gate is currently closed and the current thread is the holder
3284	*/
3285	void
3286	lck_mtx_gate_assert(__assert_only lck_mtx_t lock, gate_t gate, gate_assert_flags_t flags)
3287	{
3288	LCK_MTX_ASSERT(lock, LCK_MTX_ASSERT_OWNED);
3289
3290	gate_assert(gate, flags);
3291	}
3292
3293	#pragma mark - LCK_*_DECLARE support
3294
3295	__startup_func
3296	void
3297	lck_spin_startup_init(struct lck_spin_startup_spec *sp)
3298	{
3299	lck_spin_init(lck: sp->lck, grp: sp->lck_grp, attr: sp->lck_attr);
3300	}
3301
3302	__startup_func
3303	void
3304	lck_mtx_startup_init(struct lck_mtx_startup_spec *sp)
3305	{
3306	lck_mtx_init(lck: sp->lck, grp: sp->lck_grp, attr: sp->lck_attr);
3307	}
3308
3309	__startup_func
3310	void
3311	lck_rw_startup_init(struct lck_rw_startup_spec *sp)
3312	{
3313	lck_rw_init(lck: sp->lck, grp: sp->lck_grp, attr: sp->lck_attr);
3314	}
3315
3316	__startup_func
3317	void
3318	usimple_lock_startup_init(struct usimple_lock_startup_spec *sp)
3319	{
3320	simple_lock_init(sp->lck, sp->lck_init_arg);
3321	}
3322
3323	__startup_func
3324	void
3325	lck_ticket_startup_init(struct lck_ticket_startup_spec *sp)
3326	{
3327	lck_ticket_init(tlock: sp->lck, grp: sp->lck_grp);
3328	}
3329

Browse the source code of xnu/osfmk/kern/locks.c