lock_mtx.c source code [xnu/osfmk/kern/lock_mtx.c]

1	/*
2	* Copyright (c) 2022 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	#define LOCK_PRIVATE 1
30
31	#include <mach_ldebug.h>
32	#include <kern/locks_internal.h>
33	#include <kern/lock_stat.h>
34	#include <kern/locks.h>
35	#include <kern/kalloc.h>
36	#include <kern/thread.h>
37
38	#include <mach/machine/sdt.h>
39
40	#include <machine/cpu_data.h>
41	#include <machine/machine_cpu.h>
42
43	#if !LCK_MTX_USE_ARCH
44
45	/*
46	* lck_mtx_t
47	* ~~~~~~~~~
48	*
49	* Kernel mutexes in this implementation are made of four 32 bits words:
50	*
51	* - word 0: turnstile compact ID (24 bits) and the 0x22 lock tag
52	* - word 1: padding (to be used for group compact IDs)
53	* - word 2: mutex state (lock owner + interlock, spin and waiters bits),
54	* refered to as "data" in the code.
55	* - word 3: adaptive spin and interlock MCS queue tails.
56	*
57	* The 64 bits word made of the last two words is refered to
58	* as the "mutex state" in code.
59	*
60	*
61	* Core serialization rules
62	* ~~~~~~~~~~~~~~~~~~~~~~~~
63	*
64	* The mutex has a bit (lck_mtx_t::lck_mtx.ilocked or bit LCK_MTX_ILOCK
65	* of the data word) that serves as a spinlock for the mutex state.
66	*
67	*
68	* Updating the lock fields must follow the following rules:
69	*
70	* - It is ok to "steal" the mutex (updating its data field) if no one
71	* holds the interlock.
72	*
73	* - Holding the interlock allows its holder to update the first 3 words
74	* of the kernel mutex without using RMW atomics (plain stores are OK).
75	*
76	* - Holding the interlock is required for a thread to remove itself
77	* from the adaptive spin queue.
78	*
79	* - Threads can enqueue themselves onto the adaptive spin wait queue
80	* or the interlock wait queue at any time.
81	*
82	*
83	* Waiters bit and turnstiles
84	* ~~~~~~~~~~~~~~~~~~~~~~~~~~
85	*
86	* The turnstile on a kernel mutex is set by waiters, and cleared
87	* once they have all been resumed and successfully acquired the lock.
88	*
89	* LCK_MTX_NEEDS_WAKEUP being set (always with an owner set too)
90	* forces threads to the lck_mtx_unlock slowpath,
91	* in order to evaluate whether lck_mtx_unlock_wakeup() must be called.
92	*
93	* As a result it means it really only needs to be set at select times:
94	*
95	* - when a thread blocks and "snitches" on the current thread owner,
96	* so that when that thread unlocks it calls wake up,
97	*
98	* - when a thread that was woken up resumes its work and became
99	* the inheritor.
100	*/
101
102	#define ADAPTIVE_SPIN_ENABLE 0x1
103
104	#define NOINLINE __attribute__((noinline))
105	#define LCK_MTX_EVENT(lck) CAST_EVENT64_T(&(lck)->lck_mtx.data)
106	#define LCK_EVENT_TO_MUTEX(e) __container_of((uint32_t *)(e), lck_mtx_t, lck_mtx.data)
107	#define LCK_MTX_HAS_WAITERS(l) ((l)->lck_mtx.data & LCK_MTX_NEEDS_WAKEUP)
108
109	#if DEVELOPMENT \|\| DEBUG
110	TUNABLE(bool, LckDisablePreemptCheck, "-disable_mtx_chk", false);
111	#endif /* DEVELOPMENT \|\| DEBUG */
112
113	extern unsigned int not_in_kdp;
114
115	KALLOC_TYPE_DEFINE(KT_LCK_MTX, lck_mtx_t, KT_PRIV_ACCT);
116
117	#define LCK_MTX_NULL_CTID 0x00000000u
118
119	__enum_decl(lck_mtx_mode_t, uint32_t, {
120	LCK_MTX_MODE_SLEEPABLE,
121	LCK_MTX_MODE_SPIN,
122	LCK_MTX_MODE_SPIN_ALWAYS,
123	});
124
125	__enum_decl(lck_ilk_mode_t, uint32_t, {
126	LCK_ILK_MODE_UNLOCK,
127	LCK_ILK_MODE_DIRECT,
128	LCK_ILK_MODE_FROM_AS,
129	});
130
131	static inline void
132	lck_mtx_mcs_clear(lck_mtx_mcs_t mcs)
133	{
134	mcs = (struct* lck_mtx_mcs){ };
135	}
136
137	static inline lck_mcs_id_t
138	lck_mtx_get_mcs_id(void)
139	{
140	return lck_mcs_id_current(LCK_MCS_SLOT_0);
141	}
142
143	__pure2
144	static inline lck_mtx_mcs_t
145	lck_mtx_get_mcs(lck_mcs_id_t idx)
146	{
147	return &lck_mcs_get_other(mcs_id: idx)->mcs_mtx;
148	}
149
150
151	#pragma mark lck_mtx_t: validation
152
153	__abortlike
154	static void
155	__lck_mtx_invalid_panic(lck_mtx_t *lck)
156	{
157	panic("Invalid/destroyed mutex %p: "
158	"<0x%06x 0x%02x 0x%08x 0x%08x/%p 0x%04x 0x%04x>",
159	lck, lck->lck_mtx_tsid, lck->lck_mtx_type, lck->lck_mtx_grp,
160	lck->lck_mtx.data, ctid_get_thread_unsafe(lck->lck_mtx.owner),
161	lck->lck_mtx.as_tail, lck->lck_mtx.ilk_tail);
162	}
163
164	__abortlike
165	static void
166	__lck_mtx_not_owned_panic(lck_mtx_t *lock, thread_t thread)
167	{
168	panic("Mutex %p is unexpectedly not owned by thread %p", lock, thread);
169	}
170
171	__abortlike
172	static void
173	__lck_mtx_owned_panic(lck_mtx_t *lock, thread_t thread)
174	{
175	panic("Mutex %p is unexpectedly owned by thread %p", lock, thread);
176	}
177
178	__abortlike
179	static void
180	__lck_mtx_lock_is_sleepable_panic(lck_mtx_t *lck)
181	{
182	// "Always" variants can never block. If the lock is held as a normal mutex
183	// then someone is mixing always and non-always calls on the same lock, which is
184	// forbidden.
185	panic("Mutex %p is held as a full-mutex (spin-always lock attempted)", lck);
186	}
187
188	#if DEVELOPMENT \|\| DEBUG
189	__abortlike
190	static void
191	__lck_mtx_preemption_disabled_panic(lck_mtx_t lck, int* expected)
192	{
193	panic("Attempt to take mutex %p with preemption disabled (%d)",
194	lck, get_preemption_level() - expected);
195	}
196
197	__abortlike
198	static void
199	__lck_mtx_at_irq_panic(lck_mtx_t *lck)
200	{
201	panic("Attempt to take mutex %p in IRQ context", lck);
202	}
203
204	/*
205	* Routine: lck_mtx_check_preemption
206	*
207	* Verify preemption is enabled when attempting to acquire a mutex.
208	*/
209	static inline void
210	lck_mtx_check_preemption(lck_mtx_t lock, thread_t thread, int* expected)
211	{
212	#pragma unused(thread)
213	if (lock_preemption_level_for_thread(thread) == expected) {
214	return;
215	}
216	if (LckDisablePreemptCheck) {
217	return;
218	}
219	if (current_cpu_datap()->cpu_hibernate) {
220	return;
221	}
222	if (startup_phase < STARTUP_SUB_EARLY_BOOT) {
223	return;
224	}
225	__lck_mtx_preemption_disabled_panic(lock, expected);
226	}
227
228	static inline void
229	lck_mtx_check_irq(lck_mtx_t *lock)
230	{
231	if (ml_at_interrupt_context()) {
232	__lck_mtx_at_irq_panic(lock);
233	}
234	}
235
236	#define LCK_MTX_SNIFF_PREEMPTION(thread) lock_preemption_level_for_thread(thread)
237	#define LCK_MTX_CHECK_INVARIANTS 1
238	#else
239	#define lck_mtx_check_irq(lck) ((void)0)
240	#define LCK_MTX_SNIFF_PREEMPTION(thread) 0
241	#define LCK_MTX_CHECK_INVARIANTS 0
242	#endif /* !DEVELOPMENT && !DEBUG */
243
244	#if CONFIG_DTRACE
245	#define LCK_MTX_SNIFF_DTRACE() lck_debug_state.lds_value
246	#else
247	#define LCK_MTX_SNIFF_DTRACE() 0
248	#endif
249
250
251	#pragma mark lck_mtx_t: alloc/init/destroy/free
252
253	lck_mtx_t *
254	lck_mtx_alloc_init(lck_grp_t grp, lck_attr_t attr)
255	{
256	lck_mtx_t *lck;
257
258	lck = zalloc(kt_view: KT_LCK_MTX);
259	lck_mtx_init(lck, grp, attr);
260	return lck;
261	}
262
263	void
264	lck_mtx_free(lck_mtx_t lck, lck_grp_t grp)
265	{
266	lck_mtx_destroy(lck, grp);
267	zfree(KT_LCK_MTX, lck);
268	}
269
270	void
271	lck_mtx_init(lck_mtx_t lck, lck_grp_t grp, lck_attr_t *attr)
272	{
273	if (attr == LCK_ATTR_NULL) {
274	attr = &lck_attr_default;
275	}
276
277	*lck = (lck_mtx_t){
278	.lck_mtx_type = LCK_TYPE_MUTEX,
279	.lck_mtx_grp = grp->lck_grp_attr_id,
280	};
281	if (attr->lck_attr_val & LCK_ATTR_DEBUG) {
282	lck->lck_mtx.data \|= LCK_MTX_PROFILE;
283	}
284
285	lck_grp_reference(grp, cnt: &grp->lck_grp_mtxcnt);
286	}
287
288	void
289	lck_mtx_destroy(lck_mtx_t lck, lck_grp_t grp)
290	{
291	if (lck->lck_mtx_tsid && lck->lck_mtx_type == LCK_TYPE_MUTEX) {
292	panic("Mutex to destroy still has waiters: %p: "
293	"<0x%06x 0x%02x 0x%08x 0x%08x/%p 0x%04x 0x%04x>",
294	lck, lck->lck_mtx_tsid, lck->lck_mtx_type, lck->lck_mtx_grp,
295	lck->lck_mtx.data, ctid_get_thread_unsafe(lck->lck_mtx.owner),
296	lck->lck_mtx.as_tail, lck->lck_mtx.ilk_tail);
297	}
298	if (lck->lck_mtx_type != LCK_TYPE_MUTEX \|\|
299	(lck->lck_mtx.data & ~LCK_MTX_PROFILE) \|\|
300	lck->lck_mtx.as_tail \|\| lck->lck_mtx.ilk_tail) {
301	__lck_mtx_invalid_panic(lck);
302	}
303	LCK_GRP_ASSERT_ID(grp, lck->lck_mtx_grp);
304	lck->lck_mtx_type = LCK_TYPE_NONE;
305	lck->lck_mtx.data = LCK_MTX_TAG_DESTROYED;
306	lck->lck_mtx_grp = `0`;
307	lck_grp_deallocate(grp, cnt: &grp->lck_grp_mtxcnt);
308	}
309
310
311	#pragma mark lck_mtx_t: lck_mtx_ilk*
312
313	static hw_spin_timeout_status_t
314	lck_mtx_ilk_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
315	{
316	lck_mtx_t *lck = _lock;
317
318	panic("Mutex interlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
319	"current owner: %p, "
320	"<0x%06x 0x%02x 0x%08x 0x%08x 0x%04x 0x%04x>, "
321	HW_SPIN_TIMEOUT_DETAILS_FMT,
322	lck, HW_SPIN_TIMEOUT_ARG(to, st),
323	ctid_get_thread_unsafe(lck->lck_mtx.owner),
324	lck->lck_mtx_tsid, lck->lck_mtx_type,
325	lck->lck_mtx_grp, lck->lck_mtx.data,
326	lck->lck_mtx.as_tail, lck->lck_mtx.ilk_tail,
327	HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
328	}
329
330	static const struct hw_spin_policy lck_mtx_ilk_timeout_policy = {
331	.hwsp_name = "lck_mtx_t (ilk)",
332	.hwsp_timeout_atomic = &lock_panic_timeout,
333	.hwsp_op_timeout = lck_mtx_ilk_timeout_panic,
334	};
335
336	static void
337	lck_mtx_ilk_lock_cleanup_as_mcs(
338	lck_mtx_t *lock,
339	lck_mcs_id_t idx,
340	lck_mtx_mcs_t mcs,
341	hw_spin_timeout_t to,
342	hw_spin_state_t *ss)
343	{
344	lck_mtx_mcs_t nnode = NULL;
345	lck_mcs_id_t pidx = (lck_mcs_id_t)mcs->lmm_as_prev;
346	bool was_last;
347
348	/*
349	* This is called when the thread made use
350	* of the adaptive spin queue and needs
351	* to remove itself from it.
352	*/
353
354	/*
355	* If the thread is last, set the tail to the node before us.
356	*/
357	was_last = lock_cmpxchg(&lock->lck_mtx.as_tail, idx, pidx, release);
358
359	if (was_last) {
360	/*
361	* If @c mcs was last, we need to erase the previous
362	* node link to it.
363	*
364	* However, new nodes could have now taken our place
365	* and set the previous node's @c lmm_as_next field
366	* already, so we must CAS rather than blindly set.
367	*
368	* We know the previous node is stable because
369	* we hold the interlock (preventing concurrent
370	* removals).
371	*/
372	if (pidx) {
373	os_atomic_cmpxchg(&lck_mtx_get_mcs(pidx)->lmm_as_next,
374	mcs, nnode, relaxed);
375	}
376	} else {
377	/*
378	* If @c mcs wasn't last, then wait to make sure
379	* we observe @c lmm_as_next. Once we do, we know
380	* the field is stable since we hold the interlock
381	* (preventing concurrent dequeues).
382	*
383	* We can then update it to @c mcs next node index
384	* (which is also stable for similar reasons).
385	*
386	* Lastly update the previous node @c lmm_as_next
387	* field as well to terminate the dequeue.
388	*/
389	while (!hw_spin_wait_until(&mcs->lmm_as_next, nnode, nnode)) {
390	hw_spin_policy_t pol = &lck_mtx_ilk_timeout_policy;
391	hw_spin_should_keep_spinning(lock, policy: pol, to, state: ss);
392	}
393
394	os_atomic_store(&nnode->lmm_as_prev, pidx, relaxed);
395	if (pidx) {
396	os_atomic_store(&lck_mtx_get_mcs(pidx)->lmm_as_next,
397	nnode, relaxed);
398	}
399	}
400
401	/*
402	* @c mcs's fields are left dangling,
403	* it is the responsibilty of the caller
404	* to terminate the cleanup.
405	*/
406	}
407
408	static NOINLINE void
409	lck_mtx_ilk_lock_contended(
410	lck_mtx_t *lock,
411	lck_mtx_state_t state,
412	lck_ilk_mode_t mode)
413	{
414	hw_spin_policy_t pol = &lck_mtx_ilk_timeout_policy;
415	hw_spin_timeout_t to = hw_spin_compute_timeout(policy: pol);
416	hw_spin_state_t ss = { };
417
418	lck_mtx_mcs_t mcs, nnode, pnode;
419	lck_mcs_id_t idx, pidx;
420	lck_mtx_state_t nstate;
421	unsigned long ready;
422	uint64_t spin_start;
423
424	/*
425	* Take a spot in the interlock MCS queue,
426	* and then spin until we're at the head of it.
427	*/
428
429	idx = lck_mtx_get_mcs_id();
430	mcs = &lck_mcs_get_current()->mcs_mtx;
431	if (mode != LCK_MTX_MODE_SPIN) {
432	spin_start = LCK_MTX_ADAPTIVE_SPIN_BEGIN();
433	}
434
435	mcs->lmm_ilk_current = lock;
436	pidx = os_atomic_xchg(&lock->lck_mtx.ilk_tail, idx, release);
437	if (pidx) {
438	pnode = lck_mtx_get_mcs(idx: pidx);
439	os_atomic_store(&pnode->lmm_ilk_next, mcs, relaxed);
440
441	while (!hw_spin_wait_until(&mcs->lmm_ilk_ready, ready, ready)) {
442	hw_spin_should_keep_spinning(lock, policy: pol, to, state: &ss);
443	}
444	}
445
446
447	/*
448	* We're now the first in line, wait for the interlock
449	* to look ready and take it.
450	*
451	* We can't just assume the lock is ours for the taking,
452	* because the fastpath of lck_mtx_lock_spin{,_always}
453	* only look at the mutex "data" and might steal it.
454	*
455	* Also clear the interlock MCS tail if @c mcs is last.
456	*/
457	do {
458	while (!hw_spin_wait_until(&lock->lck_mtx.val,
459	state.val, state.ilocked == `0`)) {
460	hw_spin_should_keep_spinning(lock, policy: pol, to, state: &ss);
461	}
462
463	nstate = state;
464	nstate.ilocked = `1`;
465	if (nstate.ilk_tail == idx) {
466	nstate.ilk_tail = `0`;
467	}
468	} while (!os_atomic_cmpxchg(&lock->lck_mtx, state, nstate, acquire));
469
470
471	/*
472	* We now have the interlock, let's cleanup the MCS state.
473	*
474	* First, if there is a node after us, notify that it
475	* is at the head of the interlock queue.
476	*
477	* Second, perform the adaptive spin MCS cleanup if needed.
478	*
479	* Lastly, clear the MCS node.
480	*/
481	if (state.ilk_tail != idx) {
482	while (!hw_spin_wait_until(&mcs->lmm_ilk_next, nnode, nnode)) {
483	hw_spin_should_keep_spinning(lock, policy: pol, to, state: &ss);
484	}
485
486	os_atomic_store(&nnode->lmm_ilk_ready, `1`, relaxed);
487	}
488
489	if (mode == LCK_ILK_MODE_FROM_AS) {
490	lck_mtx_ilk_lock_cleanup_as_mcs(lock, idx, mcs, to, ss: &ss);
491	}
492	lck_mtx_mcs_clear(mcs);
493
494	if (mode != LCK_MTX_MODE_SPIN) {
495	LCK_MTX_ADAPTIVE_SPIN_END(lock, lock->lck_mtx_grp, spin_start);
496	}
497	}
498
499	static void
500	lck_mtx_ilk_lock_nopreempt(lck_mtx_t *lock, lck_ilk_mode_t mode)
501	{
502	lck_mtx_state_t state, nstate;
503
504	os_atomic_rmw_loop(&lock->lck_mtx.val, state.val, nstate.val, acquire, {
505	if (__improbable(state.ilocked \|\| state.ilk_tail)) {
506	os_atomic_rmw_loop_give_up({
507	return lck_mtx_ilk_lock_contended(lock, state, mode);
508	});
509	}
510
511	nstate = state;
512	nstate.ilocked = true;
513	});
514	}
515
516	static void
517	lck_mtx_ilk_unlock_v(lck_mtx_t *lock, uint32_t data)
518	{
519	os_atomic_store(&lock->lck_mtx.data, data, release);
520	lock_enable_preemption();
521	}
522
523	static void
524	lck_mtx_ilk_unlock(lck_mtx_t *lock)
525	{
526	lck_mtx_ilk_unlock_v(lock, data: lock->lck_mtx.data & ~LCK_MTX_ILOCK);
527	}
528
529
530	#pragma mark lck_mtx_t: turnstile integration
531
532	/*
533	* Routine: lck_mtx_lock_wait
534	*
535	* Invoked in order to wait on contention.
536	*
537	* Called with the interlock locked and
538	* returns it unlocked.
539	*
540	* Always aggressively sets the owning thread to promoted,
541	* even if it's the same or higher priority
542	* This prevents it from lowering its own priority while holding a lock
543	*
544	* TODO: Come up with a more efficient way to handle same-priority promotions
545	* <rdar://problem/30737670> ARM mutex contention logic could avoid taking the thread lock
546	*/
547	static struct turnstile *
548	lck_mtx_lock_wait(
549	lck_mtx_t *lck,
550	thread_t self,
551	thread_t holder,
552	struct turnstile *ts)
553	{
554	uint64_t sleep_start = LCK_MTX_BLOCK_BEGIN();
555
556	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) \| DBG_FUNC_START,
557	unslide_for_kdebug(lck), (uintptr_t)thread_tid(self), `0`, `0`, `0`);
558
559	if (ts == TURNSTILE_NULL) {
560	ts = turnstile_prepare_compact_id(proprietor: (uintptr_t)lck,
561	compact_id: lck->lck_mtx_tsid, type: TURNSTILE_KERNEL_MUTEX);
562	if (lck->lck_mtx_tsid == `0`) {
563	lck->lck_mtx_tsid = ts->ts_compact_id;
564	}
565	}
566	assert3u(ts->ts_compact_id, ==, lck->lck_mtx_tsid);
567
568	thread_set_pending_block_hint(thread: self, block_hint: kThreadWaitKernelMutex);
569	turnstile_update_inheritor(turnstile: ts, new_inheritor: holder, flags: (TURNSTILE_DELAYED_UPDATE \| TURNSTILE_INHERITOR_THREAD));
570
571	waitq_assert_wait64(waitq: &ts->ts_waitq, LCK_MTX_EVENT(lck),
572	THREAD_UNINT \| THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
573
574	lck_mtx_ilk_unlock(lock: lck);
575
576	turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_NOT_HELD);
577
578	thread_block(THREAD_CONTINUE_NULL);
579
580	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) \| DBG_FUNC_END, `0`, `0`, `0`, `0`, `0`);
581
582	LCK_MTX_BLOCK_END(lck, lck->lck_mtx_grp, sleep_start);
583
584	return ts;
585	}
586
587	static void
588	lck_mtx_lock_wait_done(lck_mtx_t lck, struct* turnstile *ts)
589	{
590	if (turnstile_complete_compact_id(proprietor: (uintptr_t)lck, turnstile: ts,
591	type: TURNSTILE_KERNEL_MUTEX)) {
592	lck->lck_mtx_tsid = `0`;
593	}
594	}
595
596	/*
597	* Routine: lck_mtx_lock_will_need_wakeup
598	*
599	* Returns whether the thread is the current turnstile inheritor,
600	* which means it will have to call lck_mtx_unlock_wakeup()
601	* on unlock.
602	*/
603	__attribute__((always_inline))
604	static bool
605	lck_mtx_lock_will_need_wakeup(lck_mtx_t *lck, thread_t self)
606	{
607	uint32_t tsid = lck->lck_mtx_tsid;
608
609	return tsid && turnstile_get_by_id(tsid)->ts_inheritor == self;
610	}
611
612	/*
613	* Routine: lck_mtx_unlock_wakeup
614	*
615	* Invoked on unlock when there is contention.
616	*
617	* Called with the interlock locked.
618	*
619	* NOTE: callers should call turnstile_clenup after
620	* dropping the interlock.
621	*/
622	static void
623	lck_mtx_unlock_wakeup(
624	lck_mtx_t *lck,
625	__kdebug_only thread_t thread)
626	{
627	struct turnstile *ts;
628	kern_return_t did_wake;
629
630	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) \| DBG_FUNC_START,
631	unslide_for_kdebug(lck), (uintptr_t)thread_tid(thread), `0`, `0`, `0`);
632
633	ts = turnstile_get_by_id(tsid: lck->lck_mtx_tsid);
634
635	/*
636	* We can skip turnstile_{prepare,cleanup} because
637	* we hold the interlock of the primitive,
638	* and enqueues/wakeups all happen under the interlock,
639	* which means the turnstile is stable.
640	*/
641	did_wake = waitq_wakeup64_one(waitq: &ts->ts_waitq, LCK_MTX_EVENT(lck),
642	THREAD_AWAKENED, flags: WAITQ_UPDATE_INHERITOR);
643	assert(did_wake == KERN_SUCCESS);
644
645	turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_HELD);
646
647	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) \| DBG_FUNC_END, `0`, `0`, `0`, `0`, `0`);
648	}
649
650
651	#pragma mark lck_mtx_t: lck_mtx_lock
652
653	static inline bool
654	lck_mtx_ctid_on_core(uint32_t ctid)
655	{
656	thread_t th = ctid_get_thread_unsafe(ctid);
657
658	return th && machine_thread_on_core_allow_invalid(thread: th);
659	}
660
661	#define LCK_MTX_OWNER_FOR_TRACE(lock) \
662	VM_KERNEL_UNSLIDE_OR_PERM(ctid_get_thread_unsafe((lock)->lck_mtx.data))
663
664	static void
665	lck_mtx_lock_adaptive_spin(lck_mtx_t *lock, lck_mtx_state_t state)
666	{
667	__kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
668	hw_spin_policy_t pol = &lck_mtx_ilk_timeout_policy;
669	hw_spin_timeout_t to = hw_spin_compute_timeout(policy: pol);
670	hw_spin_state_t ss = { };
671	uint64_t deadline;
672
673	lck_mtx_mcs_t mcs, node;
674	lck_mcs_id_t idx, pidx, clear_idx;
675	unsigned long prev;
676	lck_mtx_state_t nstate;
677	ast_t *const astp = ast_pending();
678
679	idx = lck_mtx_get_mcs_id();
680	mcs = &lck_mcs_get_current()->mcs_mtx;
681
682	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) \| DBG_FUNC_START,
683	trace_lck, LCK_MTX_OWNER_FOR_TRACE(lock), lock->lck_mtx_tsid, `0`, `0`);
684
685	/*
686	* Take a spot in the adaptive spin queue,
687	* and then spin until we're at the head of it.
688	*
689	* Until we're at the head, we do not need to monitor
690	* for whether the current owner is on core or not:
691	*
692	* 1. the head of the queue is doing it already,
693	*
694	* 2. when the entire adaptive spin queue will "give up"
695	* as a result of the owner going off core, we want
696	* to avoid a thundering herd and let the AS queue
697	* pour into the interlock one slowly.
698	*
699	* Do give up if the scheduler made noises something
700	* more important has shown up.
701	*
702	* Note: this function is optimized so that we do not touch
703	* our local mcs node when we're the head of the queue.
704	*
705	* This allows us in the case when the contention is
706	* between 2 cores only to not have to touch this
707	* cacheline at all.
708	*/
709	pidx = os_atomic_xchg(&lock->lck_mtx.as_tail, idx, release);
710	if (pidx) {
711	node = lck_mtx_get_mcs(idx: pidx);
712	mcs->lmm_as_prev = pidx;
713	os_atomic_store(&node->lmm_as_next, mcs, release);
714
715	while (!hw_spin_wait_until(&mcs->lmm_as_prev, prev,
716	prev == `0` \|\| (os_atomic_load(astp, relaxed) & AST_URGENT))) {
717	hw_spin_should_keep_spinning(lock, policy: pol, to, state: &ss);
718	}
719
720	if (__improbable(prev)) {
721	goto adaptive_spin_fail;
722	}
723
724	clear_idx = `0`;
725	} else {
726	clear_idx = idx;
727	}
728
729	/*
730	* We're now first in line.
731	*
732	* It's our responsbility to monitor the lock's state
733	* for whether (1) the lock has become available,
734	* (2) its owner has gone off core, (3) the scheduler
735	* wants its CPU back, or (4) we've spun for too long.
736	*/
737	deadline = ml_get_timebase() + os_atomic_load(&MutexSpin, relaxed);
738
739	for (;;) {
740	state.val = lock_load_exclusive(&lock->lck_mtx.val, acquire);
741
742	if (__probable(!state.ilocked && !state.ilk_tail && !state.owner)) {
743	/*
744	* 2-core contention: if we can, try to dequeue
745	* ourselves from the adaptive spin queue
746	* as part of this CAS in order to avoid
747	* the cost of lck_mtx_ilk_lock_cleanup_as_mcs()
748	* and zeroing the mcs node at all.
749	*
750	* Because the queue is designed to limit contention,
751	* using store-exclusive over an armv8.1 LSE atomic
752	* is actually marginally better (presumably due to
753	* the better codegen).
754	*/
755	nstate = state;
756	nstate.ilocked = true;
757	if (state.as_tail == clear_idx) {
758	nstate.as_tail = `0`;
759	}
760	if (__probable(lock_store_exclusive(&lock->lck_mtx.val,
761	state.val, nstate.val, acquire))) {
762	break;
763	}
764	} else {
765	lock_wait_for_event();
766	}
767
768	if (__improbable(ml_get_timebase() > deadline \|\|
769	(os_atomic_load(astp, relaxed) & AST_URGENT) \|\|
770	(!state.ilocked && !state.ilk_tail && state.owner &&
771	!lck_mtx_ctid_on_core(state.owner)))) {
772	goto adaptive_spin_fail;
773	}
774	}
775
776	/*
777	* If we're here, we got the lock, we just have to cleanup
778	* the MCS nodes and return.
779	*/
780	if (state.as_tail != clear_idx) {
781	lck_mtx_ilk_lock_cleanup_as_mcs(lock, idx, mcs, to, ss: &ss);
782	lck_mtx_mcs_clear(mcs);
783	}
784
785	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) \| DBG_FUNC_END,
786	trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(thread),
787	lock->lck_mtx_tsid, `0`, `0`);
788	return;
789
790	adaptive_spin_fail:
791	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) \| DBG_FUNC_END,
792	trace_lck, LCK_MTX_OWNER_FOR_TRACE(lock), lock->lck_mtx_tsid, `0`, `0`);
793	return lck_mtx_ilk_lock_contended(lock, state, mode: LCK_ILK_MODE_FROM_AS);
794	}
795
796	static NOINLINE void
797	lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, lck_mtx_mode_t mode)
798	{
799	struct turnstile *ts = TURNSTILE_NULL;
800	lck_mtx_state_t state;
801	uint32_t ctid = thread->ctid;
802	uint32_t data;
803	#if CONFIG_DTRACE
804	int first_miss = `0`;
805	#endif /* CONFIG_DTRACE */
806	bool direct_wait = false;
807	uint64_t spin_start;
808	uint32_t profile;
809
810	lck_mtx_check_irq(lock);
811	if (mode == LCK_MTX_MODE_SLEEPABLE) {
812	lock_disable_preemption_for_thread(thread);
813	}
814
815	for (;;) {
816	/*
817	* Load the current state and perform sanity checks
818	*
819	* Note that the various "corrupt" values are designed
820	* so that the slowpath is taken when a mutex was used
821	* after destruction, so that we do not have to do
822	* sanity checks in the fast path.
823	*/
824	state = os_atomic_load(&lock->lck_mtx, relaxed);
825	if (state.owner == ctid) {
826	__lck_mtx_owned_panic(lock, thread);
827	}
828	if (lock->lck_mtx_type != LCK_TYPE_MUTEX \|\|
829	state.data == LCK_MTX_TAG_DESTROYED) {
830	__lck_mtx_invalid_panic(lck: lock);
831	}
832	profile = (state.data & LCK_MTX_PROFILE);
833
834	/*
835	* Attempt steal
836	*
837	* When the lock state is 0, then no thread can be queued
838	* for adaptive spinning or for the interlock yet.
839	*
840	* As such we can attempt to try to take the interlock.
841	* (we can't take the mutex directly because we need
842	* the interlock to do turnstile operations on the way out).
843	*/
844	if ((state.val & ~(uint64_t)LCK_MTX_PROFILE) == `0`) {
845	if (!os_atomic_cmpxchgv(&lock->lck_mtx.val,
846	state.val, state.val \| LCK_MTX_ILOCK,
847	&state.val, acquire)) {
848	continue;
849	}
850	break;
851	}
852
853	#if CONFIG_DTRACE
854	if (profile) {
855	LCK_MTX_PROF_MISS(mtx: lock, grp_attr_id: lock->lck_mtx_grp, first_miss: &first_miss);
856	}
857	#endif /* CONFIG_DTRACE */
858
859	if (mode == LCK_MTX_MODE_SLEEPABLE) {
860	spin_start = LCK_MTX_ADAPTIVE_SPIN_BEGIN();
861	} else {
862	spin_start = LCK_MTX_SPIN_SPIN_BEGIN();
863	}
864
865	/*
866	* Adaptive spin or interlock
867	*
868	* Evaluate if adaptive spinning should be attempted,
869	* and if yes go to adaptive spin.
870	*
871	* Otherwise (and this includes always-spin mutexes),
872	* go for the interlock.
873	*/
874	if (mode != LCK_MTX_MODE_SPIN_ALWAYS &&
875	(state.ilocked \|\| state.as_tail \|\| !state.owner \|\|
876	lck_mtx_ctid_on_core(ctid: state.owner))) {
877	lck_mtx_lock_adaptive_spin(lock, state);
878	} else {
879	direct_wait = true;
880	lck_mtx_ilk_lock_nopreempt(lock, mode: LCK_ILK_MODE_DIRECT);
881	}
882
883	if (mode == LCK_MTX_MODE_SLEEPABLE) {
884	LCK_MTX_ADAPTIVE_SPIN_END(lock, lock->lck_mtx_grp, spin_start);
885	} else {
886	LCK_MTX_SPIN_SPIN_END(lock, lock->lck_mtx_grp, spin_start);
887	}
888
889	/*
890	* Take or sleep
891	*
892	* We now have the interlock. Either the owner
893	* isn't set, and the mutex is ours to claim,
894	* or we must go to sleep.
895	*
896	* If we go to sleep, we need to set LCK_MTX_NEEDS_WAKEUP
897	* to force the current lock owner to call
898	* lck_mtx_unlock_wakeup().
899	*/
900	state = os_atomic_load(&lock->lck_mtx, relaxed);
901	if (state.owner == LCK_MTX_NULL_CTID) {
902	break;
903	}
904
905	if (mode == LCK_MTX_MODE_SPIN_ALWAYS) {
906	__lck_mtx_lock_is_sleepable_panic(lck: lock);
907	}
908
909	#if CONFIG_DTRACE
910	if (profile) {
911	LCK_MTX_PROF_WAIT(mtx: lock, grp_attr_id: lock->lck_mtx_grp,
912	direct_wait, first_miss: &first_miss);
913	}
914	#endif /* CONFIG_DTRACE */
915	os_atomic_store(&lock->lck_mtx.data,
916	state.data \| LCK_MTX_ILOCK \| LCK_MTX_NEEDS_WAKEUP,
917	compiler_acq_rel);
918	ts = lck_mtx_lock_wait(lck: lock, self: thread,
919	holder: ctid_get_thread(ctid: state.owner), ts);
920
921	/ returns interlock unlocked and preemption re-enabled /
922	lock_disable_preemption_for_thread(thread);
923	}
924
925	/*
926	* We can take the lock!
927	*
928	* We only have the interlock and the owner field is 0.
929	*
930	* Perform various turnstile cleanups if needed,
931	* claim the lock, and reenable preemption (if needed).
932	*/
933	if (ts) {
934	lck_mtx_lock_wait_done(lck: lock, ts);
935	}
936	data = ctid \| profile;
937	if (lck_mtx_lock_will_need_wakeup(lck: lock, self: thread)) {
938	data \|= LCK_MTX_NEEDS_WAKEUP;
939	}
940	if (mode != LCK_MTX_MODE_SLEEPABLE) {
941	data \|= LCK_MTX_ILOCK \| LCK_MTX_SPIN_MODE;
942	}
943	os_atomic_store(&lock->lck_mtx.data, data, release);
944
945	if (mode == LCK_MTX_MODE_SLEEPABLE) {
946	lock_enable_preemption();
947	}
948
949	assert(thread->turnstile != NULL);
950
951	if (ts) {
952	turnstile_cleanup();
953	}
954	LCK_MTX_ACQUIRED(lock, lock->lck_mtx_grp,
955	mode != LCK_MTX_MODE_SLEEPABLE, profile);
956	}
957
958	#if LCK_MTX_CHECK_INVARIANTS \|\| CONFIG_DTRACE
959	__attribute__((noinline))
960	#else
961	__attribute__((always_inline))
962	#endif
963	static void
964	lck_mtx_lock_slow(
965	lck_mtx_t *lock,
966	thread_t thread,
967	lck_mtx_state_t state,
968	lck_mtx_mode_t mode)
969	{
970	#pragma unused(state)
971	#if CONFIG_DTRACE
972	lck_mtx_state_t ostate = {
973	.data = LCK_MTX_PROFILE,
974	};
975	#endif /* CONFIG_DTRACE */
976
977	#if LCK_MTX_CHECK_INVARIANTS
978	if (mode != LCK_MTX_MODE_SPIN_ALWAYS) {
979	lck_mtx_check_preemption(lock, thread,
980	(mode == LCK_MTX_MODE_SPIN));
981	}
982	#endif /* LCK_MTX_CHECK_INVARIANTS */
983	#if CONFIG_DTRACE
984	if (state.val == ostate.val) {
985	state.data = thread->ctid \| LCK_MTX_PROFILE;
986	if (mode != LCK_MTX_MODE_SLEEPABLE) {
987	state.ilocked = true;
988	state.spin_mode = true;
989	}
990	os_atomic_cmpxchgv(&lock->lck_mtx.val,
991	ostate.val, state.val, &state.val, acquire);
992	}
993	if ((state.val & ~ostate.val) == `0`) {
994	LCK_MTX_ACQUIRED(lock, lock->lck_mtx_grp,
995	mode != LCK_MTX_MODE_SLEEPABLE,
996	state.data & LCK_MTX_PROFILE);
997	return;
998	}
999	#endif /* CONFIG_DTRACE */
1000	lck_mtx_lock_contended(lock, thread, mode);
1001	}
1002
1003	static __attribute__((always_inline)) void
1004	lck_mtx_lock_fastpath(lck_mtx_t *lock, lck_mtx_mode_t mode)
1005	{
1006	thread_t thread = current_thread();
1007	lck_mtx_state_t state = {
1008	.data = thread->ctid,
1009	};
1010	uint64_t take_slowpath = `0`;
1011
1012	if (mode != LCK_MTX_MODE_SPIN_ALWAYS) {
1013	take_slowpath \|= LCK_MTX_SNIFF_PREEMPTION(thread);
1014	}
1015	take_slowpath \|= LCK_MTX_SNIFF_DTRACE();
1016
1017	if (mode != LCK_MTX_MODE_SLEEPABLE) {
1018	lock_disable_preemption_for_thread(thread);
1019	state.ilocked = true;
1020	state.spin_mode = true;
1021	}
1022
1023	/*
1024	* Do the CAS on the entire mutex state,
1025	* which hence requires for the ILK/AS queues
1026	* to be empty (which is fairer).
1027	*/
1028	lock_cmpxchgv(&lock->lck_mtx.val,
1029	`0`, state.val, &state.val, acquire);
1030
1031	take_slowpath \|= state.val;
1032	if (__improbable(take_slowpath)) {
1033	return lck_mtx_lock_slow(lock, thread, state, mode);
1034	}
1035	}
1036
1037	void
1038	lck_mtx_lock(lck_mtx_t *lock)
1039	{
1040	lck_mtx_lock_fastpath(lock, mode: LCK_MTX_MODE_SLEEPABLE);
1041	}
1042
1043	void
1044	lck_mtx_lock_spin(lck_mtx_t *lock)
1045	{
1046	lck_mtx_lock_fastpath(lock, mode: LCK_MTX_MODE_SPIN);
1047	}
1048
1049	void
1050	lck_mtx_lock_spin_always(lck_mtx_t *lock)
1051	{
1052	lck_mtx_lock_fastpath(lock, mode: LCK_MTX_MODE_SPIN_ALWAYS);
1053	}
1054
1055
1056	#pragma mark lck_mtx_t: lck_mtx_try_lock
1057
1058	static __attribute__((always_inline)) bool
1059	lck_mtx_try_lock_slow_inline(
1060	lck_mtx_t *lock,
1061	thread_t thread,
1062	uint32_t odata,
1063	uint32_t ndata,
1064	bool spin)
1065	{
1066	#pragma unused(lock, thread, odata, ndata)
1067	#if CONFIG_DTRACE
1068	if (odata == LCK_MTX_PROFILE) {
1069	os_atomic_cmpxchgv(&lock->lck_mtx.data,
1070	odata, ndata \| LCK_MTX_PROFILE, &odata, acquire);
1071	}
1072	if ((odata & ~LCK_MTX_PROFILE) == `0`) {
1073	LCK_MTX_TRY_ACQUIRED(lock, lock->lck_mtx_grp,
1074	spin, odata & LCK_MTX_PROFILE);
1075	return true;
1076	}
1077	if (odata & LCK_MTX_PROFILE) {
1078	LCK_MTX_PROF_MISS(mtx: lock, grp_attr_id: lock->lck_mtx_grp, first_miss: &(int){ `0` });
1079	}
1080	#endif /* CONFIG_DTRACE */
1081
1082	if (spin) {
1083	lock_enable_preemption();
1084	}
1085	return false;
1086	}
1087
1088	#if CONFIG_DTRACE \|\| LCK_MTX_CHECK_INVARIANTS
1089	__attribute__((noinline))
1090	#else
1091	__attribute__((always_inline))
1092	#endif
1093	static bool
1094	lck_mtx_try_lock_slow(
1095	lck_mtx_t *lock,
1096	thread_t thread,
1097	uint32_t odata,
1098	uint32_t ndata)
1099	{
1100	return lck_mtx_try_lock_slow_inline(lock, thread, odata, ndata, false);
1101	}
1102
1103	#if CONFIG_DTRACE \|\| LCK_MTX_CHECK_INVARIANTS
1104	__attribute__((noinline))
1105	#else
1106	__attribute__((always_inline))
1107	#endif
1108	static bool
1109	lck_mtx_try_lock_slow_spin(
1110	lck_mtx_t *lock,
1111	thread_t thread,
1112	uint32_t odata,
1113	uint32_t ndata)
1114	{
1115	return lck_mtx_try_lock_slow_inline(lock, thread, odata, ndata, true);
1116	}
1117
1118	static __attribute__((always_inline)) bool
1119	lck_mtx_try_lock_fastpath(lck_mtx_t *lock, lck_mtx_mode_t mode)
1120	{
1121	thread_t thread = current_thread();
1122	uint32_t odata, ndata = thread->ctid;
1123	uint32_t take_slowpath = `0`;
1124
1125	#if CONFIG_DTRACE
1126	take_slowpath \|= lck_debug_state.lds_value;
1127	#endif
1128	if (mode != LCK_MTX_MODE_SLEEPABLE) {
1129	lock_disable_preemption_for_thread(thread);
1130	ndata \|= LCK_MTX_SPIN_MODE \| LCK_MTX_ILOCK;
1131	}
1132
1133	/*
1134	* try_lock because it's likely to be used for cases
1135	* like lock inversion resolutions tries a bit harder
1136	* than lck_mtx_lock() to take the lock and ignores
1137	* adaptive spin / interlock queues by doing the CAS
1138	* on the 32bit mutex data only.
1139	*/
1140	lock_cmpxchgv(&lock->lck_mtx.data, `0`, ndata, &odata, acquire);
1141
1142	take_slowpath \|= odata;
1143	if (__probable(!take_slowpath)) {
1144	return true;
1145	}
1146
1147	if (mode == LCK_MTX_MODE_SPIN_ALWAYS &&
1148	(odata & LCK_MTX_CTID_MASK) &&
1149	!(odata & LCK_MTX_SPIN_MODE)) {
1150	__lck_mtx_lock_is_sleepable_panic(lck: lock);
1151	}
1152
1153	if (mode == LCK_MTX_MODE_SLEEPABLE) {
1154	return lck_mtx_try_lock_slow(lock, thread, odata, ndata);
1155	} else {
1156	return lck_mtx_try_lock_slow_spin(lock, thread, odata, ndata);
1157	}
1158	}
1159
1160	boolean_t
1161	lck_mtx_try_lock(lck_mtx_t *lock)
1162	{
1163	return lck_mtx_try_lock_fastpath(lock, mode: LCK_MTX_MODE_SLEEPABLE);
1164	}
1165
1166	boolean_t
1167	lck_mtx_try_lock_spin(lck_mtx_t *lock)
1168	{
1169	return lck_mtx_try_lock_fastpath(lock, mode: LCK_MTX_MODE_SPIN);
1170	}
1171
1172	boolean_t
1173	lck_mtx_try_lock_spin_always(lck_mtx_t *lock)
1174	{
1175	return lck_mtx_try_lock_fastpath(lock, mode: LCK_MTX_MODE_SPIN_ALWAYS);
1176	}
1177
1178
1179	#pragma mark lck_mtx_t: lck_mtx_unlock
1180
1181	static NOINLINE void
1182	lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, uint32_t data)
1183	{
1184	bool cleanup = false;
1185
1186	#if !CONFIG_DTRACE
1187	/*
1188	* This check is done by lck_mtx_unlock_slow() when it is enabled.
1189	*/
1190	if (thread->ctid != (data & LCK_MTX_CTID_MASK)) {
1191	__lck_mtx_not_owned_panic(lock, thread);
1192	}
1193	#endif /* !CONFIG_DTRACE */
1194
1195	if ((data & LCK_MTX_SPIN_MODE) == `0`) {
1196	lock_disable_preemption_for_thread(thread);
1197	lck_mtx_ilk_lock_nopreempt(lock, mode: LCK_ILK_MODE_UNLOCK);
1198	}
1199
1200	/*
1201	* We must re-load the data: we might have taken
1202	* the slowpath because another thread had taken
1203	* the interlock and set the NEEDS_WAKEUP bit
1204	* while we were spinning to get it.
1205	*/
1206	data = os_atomic_load(&lock->lck_mtx.data, compiler_acq_rel);
1207	if (data & LCK_MTX_NEEDS_WAKEUP) {
1208	lck_mtx_unlock_wakeup(lck: lock, thread);
1209	cleanup = true;
1210	}
1211	lck_mtx_ilk_unlock_v(lock, data: data & LCK_MTX_PROFILE);
1212
1213	LCK_MTX_RELEASED(lock, lock->lck_mtx_grp, data & LCK_MTX_PROFILE);
1214
1215	/*
1216	* Do not do any turnstile operations outside of this block.
1217	*
1218	* lock/unlock is called at early stage of boot while single
1219	* threaded, without turnstiles being available yet.
1220	* Even without contention we can come throught the slow path
1221	* if the mutex is acquired as a spin lock.
1222	*/
1223	if (cleanup) {
1224	turnstile_cleanup();
1225	}
1226	}
1227
1228	#if CONFIG_DTRACE
1229	__attribute__((noinline))
1230	#else
1231	__attribute__((always_inline))
1232	#endif
1233	static void
1234	lck_mtx_unlock_slow(lck_mtx_t *lock, thread_t thread, uint32_t data)
1235	{
1236	#if CONFIG_DTRACE
1237	/*
1238	* If Dtrace is enabled, locks can be profiled,
1239	* which causes the fastpath of unlock to fail.
1240	*/
1241	if ((data & LCK_MTX_BITS_MASK) == LCK_MTX_PROFILE) {
1242	os_atomic_cmpxchgv(&lock->lck_mtx.data, data, LCK_MTX_PROFILE,
1243	&data, release);
1244	}
1245	if (thread->ctid != (data & LCK_MTX_CTID_MASK)) {
1246	__lck_mtx_not_owned_panic(lock, thread);
1247	}
1248	if ((data & (LCK_MTX_BITS_MASK & ~LCK_MTX_PROFILE)) == `0`) {
1249	LCK_MTX_RELEASED(lock, lock->lck_mtx_grp, false);
1250	return;
1251	}
1252	#endif /* CONFIG_DTRACE */
1253
1254	lck_mtx_unlock_contended(lock, thread, data);
1255	}
1256
1257	void
1258	lck_mtx_unlock(lck_mtx_t *lock)
1259	{
1260	thread_t thread = current_thread();
1261	uint32_t take_slowpath = `0`;
1262	uint32_t data;
1263
1264	take_slowpath \|= LCK_MTX_SNIFF_DTRACE();
1265
1266	/*
1267	* The fast path ignores the ILK/AS queues on purpose,
1268	* those really are a "lock" concept, not unlock.
1269	*/
1270	if (__probable(lock_cmpxchgv(&lock->lck_mtx.data,
1271	thread->ctid, `0`, &data, release))) {
1272	if (__probable(!take_slowpath)) {
1273	return;
1274	}
1275	}
1276
1277	lck_mtx_unlock_slow(lock, thread, data);
1278	}
1279
1280
1281	#pragma mark lck_mtx_t: misc
1282
1283	void
1284	lck_mtx_assert(lck_mtx_t lock, unsigned* int type)
1285	{
1286	lck_mtx_state_t state = os_atomic_load(&lock->lck_mtx, relaxed);
1287	thread_t thread = current_thread();
1288
1289	if (type == LCK_MTX_ASSERT_OWNED) {
1290	if (state.owner != thread->ctid) {
1291	__lck_mtx_not_owned_panic(lock, thread);
1292	}
1293	} else if (type == LCK_MTX_ASSERT_NOTOWNED) {
1294	if (state.owner == thread->ctid) {
1295	__lck_mtx_owned_panic(lock, thread);
1296	}
1297	} else {
1298	panic("lck_mtx_assert(): invalid arg (%u)", type);
1299	}
1300	}
1301
1302	/*
1303	* Routine: lck_mtx_convert_spin
1304	*
1305	* Convert a mutex held for spin into a held full mutex
1306	*/
1307	void
1308	lck_mtx_convert_spin(lck_mtx_t *lock)
1309	{
1310	lck_mtx_state_t state = os_atomic_load(&lock->lck_mtx, relaxed);
1311	thread_t thread = current_thread();
1312	uint32_t data = thread->ctid;
1313
1314	if (state.owner != data) {
1315	__lck_mtx_not_owned_panic(lock, thread);
1316	}
1317
1318	if (state.spin_mode) {
1319	/*
1320	* Note: we can acquire the lock in spin mode
1321	* _and_ be the inheritor if we waited.
1322	*
1323	* We must only clear ilocked and spin_mode,
1324	* but preserve owner and needs_wakeup.
1325	*/
1326	state.ilocked = false;
1327	state.spin_mode = false;
1328	lck_mtx_ilk_unlock_v(lock, data: state.data);
1329	turnstile_cleanup();
1330	}
1331	}
1332
1333	/*
1334	* Routine: kdp_lck_mtx_lock_spin_is_acquired
1335	* NOT SAFE: To be used only by kernel debugger to avoid deadlock.
1336	*/
1337	boolean_t
1338	kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
1339	{
1340	lck_mtx_state_t state = os_atomic_load(&lck->lck_mtx, relaxed);
1341
1342	if (not_in_kdp) {
1343	panic("panic: spinlock acquired check done outside of kernel debugger");
1344	}
1345	if (state.data == LCK_MTX_TAG_DESTROYED) {
1346	return false;
1347	}
1348	return state.owner \|\| state.ilocked;
1349	}
1350
1351	void
1352	kdp_lck_mtx_find_owner(
1353	struct waitq *waitq __unused,
1354	event64_t event,
1355	thread_waitinfo_t *waitinfo)
1356	{
1357	lck_mtx_t *mutex = LCK_EVENT_TO_MUTEX(event);
1358	lck_mtx_state_t state = os_atomic_load(&mutex->lck_mtx, relaxed);
1359
1360	assert3u(state.data, !=, LCK_MTX_TAG_DESTROYED);
1361	waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
1362	waitinfo->owner = thread_tid(thread: ctid_get_thread(ctid: state.owner));
1363	}
1364
1365	#endif /* !LCK_MTX_USE_ARCH */
1366
1367	/*
1368	* Routine: mutex_pause
1369	*
1370	* Called by former callers of simple_lock_pause().
1371	*/
1372	#define MAX_COLLISION_COUNTS 32
1373	#define MAX_COLLISION 8
1374
1375	unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1376
1377	uint32_t collision_backoffs[MAX_COLLISION] = {
1378	`10`, `50`, `100`, `200`, `400`, `600`, `800`, `1000`
1379	};
1380
1381
1382	void
1383	mutex_pause(uint32_t collisions)
1384	{
1385	wait_result_t wait_result;
1386	uint32_t back_off;
1387
1388	if (collisions >= MAX_COLLISION_COUNTS) {
1389	collisions = MAX_COLLISION_COUNTS - `1`;
1390	}
1391	max_collision_count[collisions]++;
1392
1393	if (collisions >= MAX_COLLISION) {
1394	collisions = MAX_COLLISION - `1`;
1395	}
1396	back_off = collision_backoffs[collisions];
1397
1398	wait_result = assert_wait_timeout(event: (event_t)mutex_pause, THREAD_UNINT, interval: back_off, NSEC_PER_USEC);
1399	assert(wait_result == THREAD_WAITING);
1400
1401	wait_result = thread_block(THREAD_CONTINUE_NULL);
1402	assert(wait_result == THREAD_TIMED_OUT);
1403	}
1404
1405
1406	unsigned int mutex_yield_wait = `0`;
1407	unsigned int mutex_yield_no_wait = `0`;
1408
1409	boolean_t
1410	lck_mtx_yield(
1411	lck_mtx_t *lck)
1412	{
1413	bool has_waiters = LCK_MTX_HAS_WAITERS(lck);
1414
1415	#if DEBUG
1416	lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
1417	#endif /* DEBUG */
1418
1419	if (!has_waiters) {
1420	mutex_yield_no_wait++;
1421	} else {
1422	mutex_yield_wait++;
1423	lck_mtx_unlock(lock: lck);
1424	mutex_pause(collisions: `0`);
1425	lck_mtx_lock(lock: lck);
1426	}
1427	return has_waiters;
1428	}
1429

Browse the source code of xnu/osfmk/kern/lock_mtx.c