1/*
2 * Copyright (c) 2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#define LOCK_PRIVATE 1
30
31#include <mach_ldebug.h>
32#include <kern/locks_internal.h>
33#include <kern/lock_stat.h>
34#include <kern/locks.h>
35#include <kern/kalloc.h>
36#include <kern/thread.h>
37
38#include <mach/machine/sdt.h>
39
40#include <machine/cpu_data.h>
41#include <machine/machine_cpu.h>
42
43#if !LCK_MTX_USE_ARCH
44
45/*
46 * lck_mtx_t
47 * ~~~~~~~~~
48 *
49 * Kernel mutexes in this implementation are made of four 32 bits words:
50 *
51 * - word 0: turnstile compact ID (24 bits) and the 0x22 lock tag
52 * - word 1: padding (to be used for group compact IDs)
53 * - word 2: mutex state (lock owner + interlock, spin and waiters bits),
54 * refered to as "data" in the code.
55 * - word 3: adaptive spin and interlock MCS queue tails.
56 *
57 * The 64 bits word made of the last two words is refered to
58 * as the "mutex state" in code.
59 *
60 *
61 * Core serialization rules
62 * ~~~~~~~~~~~~~~~~~~~~~~~~
63 *
64 * The mutex has a bit (lck_mtx_t::lck_mtx.ilocked or bit LCK_MTX_ILOCK
65 * of the data word) that serves as a spinlock for the mutex state.
66 *
67 *
68 * Updating the lock fields must follow the following rules:
69 *
70 * - It is ok to "steal" the mutex (updating its data field) if no one
71 * holds the interlock.
72 *
73 * - Holding the interlock allows its holder to update the first 3 words
74 * of the kernel mutex without using RMW atomics (plain stores are OK).
75 *
76 * - Holding the interlock is required for a thread to remove itself
77 * from the adaptive spin queue.
78 *
79 * - Threads can enqueue themselves onto the adaptive spin wait queue
80 * or the interlock wait queue at any time.
81 *
82 *
83 * Waiters bit and turnstiles
84 * ~~~~~~~~~~~~~~~~~~~~~~~~~~
85 *
86 * The turnstile on a kernel mutex is set by waiters, and cleared
87 * once they have all been resumed and successfully acquired the lock.
88 *
89 * LCK_MTX_NEEDS_WAKEUP being set (always with an owner set too)
90 * forces threads to the lck_mtx_unlock slowpath,
91 * in order to evaluate whether lck_mtx_unlock_wakeup() must be called.
92 *
93 * As a result it means it really only needs to be set at select times:
94 *
95 * - when a thread blocks and "snitches" on the current thread owner,
96 * so that when that thread unlocks it calls wake up,
97 *
98 * - when a thread that was woken up resumes its work and became
99 * the inheritor.
100 */
101
102#define ADAPTIVE_SPIN_ENABLE 0x1
103
104#define NOINLINE __attribute__((noinline))
105#define LCK_MTX_EVENT(lck) CAST_EVENT64_T(&(lck)->lck_mtx.data)
106#define LCK_EVENT_TO_MUTEX(e) __container_of((uint32_t *)(e), lck_mtx_t, lck_mtx.data)
107#define LCK_MTX_HAS_WAITERS(l) ((l)->lck_mtx.data & LCK_MTX_NEEDS_WAKEUP)
108
109#if DEVELOPMENT || DEBUG
110TUNABLE(bool, LckDisablePreemptCheck, "-disable_mtx_chk", false);
111#endif /* DEVELOPMENT || DEBUG */
112
113extern unsigned int not_in_kdp;
114
115KALLOC_TYPE_DEFINE(KT_LCK_MTX, lck_mtx_t, KT_PRIV_ACCT);
116
117#define LCK_MTX_NULL_CTID 0x00000000u
118
119__enum_decl(lck_mtx_mode_t, uint32_t, {
120 LCK_MTX_MODE_SLEEPABLE,
121 LCK_MTX_MODE_SPIN,
122 LCK_MTX_MODE_SPIN_ALWAYS,
123});
124
125__enum_decl(lck_ilk_mode_t, uint32_t, {
126 LCK_ILK_MODE_UNLOCK,
127 LCK_ILK_MODE_DIRECT,
128 LCK_ILK_MODE_FROM_AS,
129});
130
131static inline void
132lck_mtx_mcs_clear(lck_mtx_mcs_t mcs)
133{
134 *mcs = (struct lck_mtx_mcs){ };
135}
136
137static inline lck_mcs_id_t
138lck_mtx_get_mcs_id(void)
139{
140 return lck_mcs_id_current(LCK_MCS_SLOT_0);
141}
142
143__pure2
144static inline lck_mtx_mcs_t
145lck_mtx_get_mcs(lck_mcs_id_t idx)
146{
147 return &lck_mcs_get_other(mcs_id: idx)->mcs_mtx;
148}
149
150
151#pragma mark lck_mtx_t: validation
152
153__abortlike
154static void
155__lck_mtx_invalid_panic(lck_mtx_t *lck)
156{
157 panic("Invalid/destroyed mutex %p: "
158 "<0x%06x 0x%02x 0x%08x 0x%08x/%p 0x%04x 0x%04x>",
159 lck, lck->lck_mtx_tsid, lck->lck_mtx_type, lck->lck_mtx_grp,
160 lck->lck_mtx.data, ctid_get_thread_unsafe(lck->lck_mtx.owner),
161 lck->lck_mtx.as_tail, lck->lck_mtx.ilk_tail);
162}
163
164__abortlike
165static void
166__lck_mtx_not_owned_panic(lck_mtx_t *lock, thread_t thread)
167{
168 panic("Mutex %p is unexpectedly not owned by thread %p", lock, thread);
169}
170
171__abortlike
172static void
173__lck_mtx_owned_panic(lck_mtx_t *lock, thread_t thread)
174{
175 panic("Mutex %p is unexpectedly owned by thread %p", lock, thread);
176}
177
178__abortlike
179static void
180__lck_mtx_lock_is_sleepable_panic(lck_mtx_t *lck)
181{
182 // "Always" variants can never block. If the lock is held as a normal mutex
183 // then someone is mixing always and non-always calls on the same lock, which is
184 // forbidden.
185 panic("Mutex %p is held as a full-mutex (spin-always lock attempted)", lck);
186}
187
188#if DEVELOPMENT || DEBUG
189__abortlike
190static void
191__lck_mtx_preemption_disabled_panic(lck_mtx_t *lck, int expected)
192{
193 panic("Attempt to take mutex %p with preemption disabled (%d)",
194 lck, get_preemption_level() - expected);
195}
196
197__abortlike
198static void
199__lck_mtx_at_irq_panic(lck_mtx_t *lck)
200{
201 panic("Attempt to take mutex %p in IRQ context", lck);
202}
203
204/*
205 * Routine: lck_mtx_check_preemption
206 *
207 * Verify preemption is enabled when attempting to acquire a mutex.
208 */
209static inline void
210lck_mtx_check_preemption(lck_mtx_t *lock, thread_t thread, int expected)
211{
212#pragma unused(thread)
213 if (lock_preemption_level_for_thread(thread) == expected) {
214 return;
215 }
216 if (LckDisablePreemptCheck) {
217 return;
218 }
219 if (current_cpu_datap()->cpu_hibernate) {
220 return;
221 }
222 if (startup_phase < STARTUP_SUB_EARLY_BOOT) {
223 return;
224 }
225 __lck_mtx_preemption_disabled_panic(lock, expected);
226}
227
228static inline void
229lck_mtx_check_irq(lck_mtx_t *lock)
230{
231 if (ml_at_interrupt_context()) {
232 __lck_mtx_at_irq_panic(lock);
233 }
234}
235
236#define LCK_MTX_SNIFF_PREEMPTION(thread) lock_preemption_level_for_thread(thread)
237#define LCK_MTX_CHECK_INVARIANTS 1
238#else
239#define lck_mtx_check_irq(lck) ((void)0)
240#define LCK_MTX_SNIFF_PREEMPTION(thread) 0
241#define LCK_MTX_CHECK_INVARIANTS 0
242#endif /* !DEVELOPMENT && !DEBUG */
243
244#if CONFIG_DTRACE
245#define LCK_MTX_SNIFF_DTRACE() lck_debug_state.lds_value
246#else
247#define LCK_MTX_SNIFF_DTRACE() 0
248#endif
249
250
251#pragma mark lck_mtx_t: alloc/init/destroy/free
252
253lck_mtx_t *
254lck_mtx_alloc_init(lck_grp_t *grp, lck_attr_t *attr)
255{
256 lck_mtx_t *lck;
257
258 lck = zalloc(kt_view: KT_LCK_MTX);
259 lck_mtx_init(lck, grp, attr);
260 return lck;
261}
262
263void
264lck_mtx_free(lck_mtx_t *lck, lck_grp_t *grp)
265{
266 lck_mtx_destroy(lck, grp);
267 zfree(KT_LCK_MTX, lck);
268}
269
270void
271lck_mtx_init(lck_mtx_t *lck, lck_grp_t *grp, lck_attr_t *attr)
272{
273 if (attr == LCK_ATTR_NULL) {
274 attr = &lck_attr_default;
275 }
276
277 *lck = (lck_mtx_t){
278 .lck_mtx_type = LCK_TYPE_MUTEX,
279 .lck_mtx_grp = grp->lck_grp_attr_id,
280 };
281 if (attr->lck_attr_val & LCK_ATTR_DEBUG) {
282 lck->lck_mtx.data |= LCK_MTX_PROFILE;
283 }
284
285 lck_grp_reference(grp, cnt: &grp->lck_grp_mtxcnt);
286}
287
288void
289lck_mtx_destroy(lck_mtx_t *lck, lck_grp_t *grp)
290{
291 if (lck->lck_mtx_tsid && lck->lck_mtx_type == LCK_TYPE_MUTEX) {
292 panic("Mutex to destroy still has waiters: %p: "
293 "<0x%06x 0x%02x 0x%08x 0x%08x/%p 0x%04x 0x%04x>",
294 lck, lck->lck_mtx_tsid, lck->lck_mtx_type, lck->lck_mtx_grp,
295 lck->lck_mtx.data, ctid_get_thread_unsafe(lck->lck_mtx.owner),
296 lck->lck_mtx.as_tail, lck->lck_mtx.ilk_tail);
297 }
298 if (lck->lck_mtx_type != LCK_TYPE_MUTEX ||
299 (lck->lck_mtx.data & ~LCK_MTX_PROFILE) ||
300 lck->lck_mtx.as_tail || lck->lck_mtx.ilk_tail) {
301 __lck_mtx_invalid_panic(lck);
302 }
303 LCK_GRP_ASSERT_ID(grp, lck->lck_mtx_grp);
304 lck->lck_mtx_type = LCK_TYPE_NONE;
305 lck->lck_mtx.data = LCK_MTX_TAG_DESTROYED;
306 lck->lck_mtx_grp = 0;
307 lck_grp_deallocate(grp, cnt: &grp->lck_grp_mtxcnt);
308}
309
310
311#pragma mark lck_mtx_t: lck_mtx_ilk*
312
313static hw_spin_timeout_status_t
314lck_mtx_ilk_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
315{
316 lck_mtx_t *lck = _lock;
317
318 panic("Mutex interlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
319 "current owner: %p, "
320 "<0x%06x 0x%02x 0x%08x 0x%08x 0x%04x 0x%04x>, "
321 HW_SPIN_TIMEOUT_DETAILS_FMT,
322 lck, HW_SPIN_TIMEOUT_ARG(to, st),
323 ctid_get_thread_unsafe(lck->lck_mtx.owner),
324 lck->lck_mtx_tsid, lck->lck_mtx_type,
325 lck->lck_mtx_grp, lck->lck_mtx.data,
326 lck->lck_mtx.as_tail, lck->lck_mtx.ilk_tail,
327 HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
328}
329
330static const struct hw_spin_policy lck_mtx_ilk_timeout_policy = {
331 .hwsp_name = "lck_mtx_t (ilk)",
332 .hwsp_timeout_atomic = &lock_panic_timeout,
333 .hwsp_op_timeout = lck_mtx_ilk_timeout_panic,
334};
335
336static void
337lck_mtx_ilk_lock_cleanup_as_mcs(
338 lck_mtx_t *lock,
339 lck_mcs_id_t idx,
340 lck_mtx_mcs_t mcs,
341 hw_spin_timeout_t to,
342 hw_spin_state_t *ss)
343{
344 lck_mtx_mcs_t nnode = NULL;
345 lck_mcs_id_t pidx = (lck_mcs_id_t)mcs->lmm_as_prev;
346 bool was_last;
347
348 /*
349 * This is called when the thread made use
350 * of the adaptive spin queue and needs
351 * to remove itself from it.
352 */
353
354 /*
355 * If the thread is last, set the tail to the node before us.
356 */
357 was_last = lock_cmpxchg(&lock->lck_mtx.as_tail, idx, pidx, release);
358
359 if (was_last) {
360 /*
361 * If @c mcs was last, we need to erase the previous
362 * node link to it.
363 *
364 * However, new nodes could have now taken our place
365 * and set the previous node's @c lmm_as_next field
366 * already, so we must CAS rather than blindly set.
367 *
368 * We know the previous node is stable because
369 * we hold the interlock (preventing concurrent
370 * removals).
371 */
372 if (pidx) {
373 os_atomic_cmpxchg(&lck_mtx_get_mcs(pidx)->lmm_as_next,
374 mcs, nnode, relaxed);
375 }
376 } else {
377 /*
378 * If @c mcs wasn't last, then wait to make sure
379 * we observe @c lmm_as_next. Once we do, we know
380 * the field is stable since we hold the interlock
381 * (preventing concurrent dequeues).
382 *
383 * We can then update it to @c mcs next node index
384 * (which is also stable for similar reasons).
385 *
386 * Lastly update the previous node @c lmm_as_next
387 * field as well to terminate the dequeue.
388 */
389 while (!hw_spin_wait_until(&mcs->lmm_as_next, nnode, nnode)) {
390 hw_spin_policy_t pol = &lck_mtx_ilk_timeout_policy;
391 hw_spin_should_keep_spinning(lock, policy: pol, to, state: ss);
392 }
393
394 os_atomic_store(&nnode->lmm_as_prev, pidx, relaxed);
395 if (pidx) {
396 os_atomic_store(&lck_mtx_get_mcs(pidx)->lmm_as_next,
397 nnode, relaxed);
398 }
399 }
400
401 /*
402 * @c mcs's fields are left dangling,
403 * it is the responsibilty of the caller
404 * to terminate the cleanup.
405 */
406}
407
408static NOINLINE void
409lck_mtx_ilk_lock_contended(
410 lck_mtx_t *lock,
411 lck_mtx_state_t state,
412 lck_ilk_mode_t mode)
413{
414 hw_spin_policy_t pol = &lck_mtx_ilk_timeout_policy;
415 hw_spin_timeout_t to = hw_spin_compute_timeout(policy: pol);
416 hw_spin_state_t ss = { };
417
418 lck_mtx_mcs_t mcs, nnode, pnode;
419 lck_mcs_id_t idx, pidx;
420 lck_mtx_state_t nstate;
421 unsigned long ready;
422 uint64_t spin_start;
423
424 /*
425 * Take a spot in the interlock MCS queue,
426 * and then spin until we're at the head of it.
427 */
428
429 idx = lck_mtx_get_mcs_id();
430 mcs = &lck_mcs_get_current()->mcs_mtx;
431 if (mode != LCK_MTX_MODE_SPIN) {
432 spin_start = LCK_MTX_ADAPTIVE_SPIN_BEGIN();
433 }
434
435 mcs->lmm_ilk_current = lock;
436 pidx = os_atomic_xchg(&lock->lck_mtx.ilk_tail, idx, release);
437 if (pidx) {
438 pnode = lck_mtx_get_mcs(idx: pidx);
439 os_atomic_store(&pnode->lmm_ilk_next, mcs, relaxed);
440
441 while (!hw_spin_wait_until(&mcs->lmm_ilk_ready, ready, ready)) {
442 hw_spin_should_keep_spinning(lock, policy: pol, to, state: &ss);
443 }
444 }
445
446
447 /*
448 * We're now the first in line, wait for the interlock
449 * to look ready and take it.
450 *
451 * We can't just assume the lock is ours for the taking,
452 * because the fastpath of lck_mtx_lock_spin{,_always}
453 * only look at the mutex "data" and might steal it.
454 *
455 * Also clear the interlock MCS tail if @c mcs is last.
456 */
457 do {
458 while (!hw_spin_wait_until(&lock->lck_mtx.val,
459 state.val, state.ilocked == 0)) {
460 hw_spin_should_keep_spinning(lock, policy: pol, to, state: &ss);
461 }
462
463 nstate = state;
464 nstate.ilocked = 1;
465 if (nstate.ilk_tail == idx) {
466 nstate.ilk_tail = 0;
467 }
468 } while (!os_atomic_cmpxchg(&lock->lck_mtx, state, nstate, acquire));
469
470
471 /*
472 * We now have the interlock, let's cleanup the MCS state.
473 *
474 * First, if there is a node after us, notify that it
475 * is at the head of the interlock queue.
476 *
477 * Second, perform the adaptive spin MCS cleanup if needed.
478 *
479 * Lastly, clear the MCS node.
480 */
481 if (state.ilk_tail != idx) {
482 while (!hw_spin_wait_until(&mcs->lmm_ilk_next, nnode, nnode)) {
483 hw_spin_should_keep_spinning(lock, policy: pol, to, state: &ss);
484 }
485
486 os_atomic_store(&nnode->lmm_ilk_ready, 1, relaxed);
487 }
488
489 if (mode == LCK_ILK_MODE_FROM_AS) {
490 lck_mtx_ilk_lock_cleanup_as_mcs(lock, idx, mcs, to, ss: &ss);
491 }
492 lck_mtx_mcs_clear(mcs);
493
494 if (mode != LCK_MTX_MODE_SPIN) {
495 LCK_MTX_ADAPTIVE_SPIN_END(lock, lock->lck_mtx_grp, spin_start);
496 }
497}
498
499static void
500lck_mtx_ilk_lock_nopreempt(lck_mtx_t *lock, lck_ilk_mode_t mode)
501{
502 lck_mtx_state_t state, nstate;
503
504 os_atomic_rmw_loop(&lock->lck_mtx.val, state.val, nstate.val, acquire, {
505 if (__improbable(state.ilocked || state.ilk_tail)) {
506 os_atomic_rmw_loop_give_up({
507 return lck_mtx_ilk_lock_contended(lock, state, mode);
508 });
509 }
510
511 nstate = state;
512 nstate.ilocked = true;
513 });
514}
515
516static void
517lck_mtx_ilk_unlock_v(lck_mtx_t *lock, uint32_t data)
518{
519 os_atomic_store(&lock->lck_mtx.data, data, release);
520 lock_enable_preemption();
521}
522
523static void
524lck_mtx_ilk_unlock(lck_mtx_t *lock)
525{
526 lck_mtx_ilk_unlock_v(lock, data: lock->lck_mtx.data & ~LCK_MTX_ILOCK);
527}
528
529
530#pragma mark lck_mtx_t: turnstile integration
531
532/*
533 * Routine: lck_mtx_lock_wait
534 *
535 * Invoked in order to wait on contention.
536 *
537 * Called with the interlock locked and
538 * returns it unlocked.
539 *
540 * Always aggressively sets the owning thread to promoted,
541 * even if it's the same or higher priority
542 * This prevents it from lowering its own priority while holding a lock
543 *
544 * TODO: Come up with a more efficient way to handle same-priority promotions
545 * <rdar://problem/30737670> ARM mutex contention logic could avoid taking the thread lock
546 */
547static struct turnstile *
548lck_mtx_lock_wait(
549 lck_mtx_t *lck,
550 thread_t self,
551 thread_t holder,
552 struct turnstile *ts)
553{
554 uint64_t sleep_start = LCK_MTX_BLOCK_BEGIN();
555
556 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
557 unslide_for_kdebug(lck), (uintptr_t)thread_tid(self), 0, 0, 0);
558
559 if (ts == TURNSTILE_NULL) {
560 ts = turnstile_prepare_compact_id(proprietor: (uintptr_t)lck,
561 compact_id: lck->lck_mtx_tsid, type: TURNSTILE_KERNEL_MUTEX);
562 if (lck->lck_mtx_tsid == 0) {
563 lck->lck_mtx_tsid = ts->ts_compact_id;
564 }
565 }
566 assert3u(ts->ts_compact_id, ==, lck->lck_mtx_tsid);
567
568 thread_set_pending_block_hint(thread: self, block_hint: kThreadWaitKernelMutex);
569 turnstile_update_inheritor(turnstile: ts, new_inheritor: holder, flags: (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
570
571 waitq_assert_wait64(waitq: &ts->ts_waitq, LCK_MTX_EVENT(lck),
572 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER, TIMEOUT_WAIT_FOREVER);
573
574 lck_mtx_ilk_unlock(lock: lck);
575
576 turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_NOT_HELD);
577
578 thread_block(THREAD_CONTINUE_NULL);
579
580 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
581
582 LCK_MTX_BLOCK_END(lck, lck->lck_mtx_grp, sleep_start);
583
584 return ts;
585}
586
587static void
588lck_mtx_lock_wait_done(lck_mtx_t *lck, struct turnstile *ts)
589{
590 if (turnstile_complete_compact_id(proprietor: (uintptr_t)lck, turnstile: ts,
591 type: TURNSTILE_KERNEL_MUTEX)) {
592 lck->lck_mtx_tsid = 0;
593 }
594}
595
596/*
597 * Routine: lck_mtx_lock_will_need_wakeup
598 *
599 * Returns whether the thread is the current turnstile inheritor,
600 * which means it will have to call lck_mtx_unlock_wakeup()
601 * on unlock.
602 */
603__attribute__((always_inline))
604static bool
605lck_mtx_lock_will_need_wakeup(lck_mtx_t *lck, thread_t self)
606{
607 uint32_t tsid = lck->lck_mtx_tsid;
608
609 return tsid && turnstile_get_by_id(tsid)->ts_inheritor == self;
610}
611
612/*
613 * Routine: lck_mtx_unlock_wakeup
614 *
615 * Invoked on unlock when there is contention.
616 *
617 * Called with the interlock locked.
618 *
619 * NOTE: callers should call turnstile_clenup after
620 * dropping the interlock.
621 */
622static void
623lck_mtx_unlock_wakeup(
624 lck_mtx_t *lck,
625 __kdebug_only thread_t thread)
626{
627 struct turnstile *ts;
628 kern_return_t did_wake;
629
630 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START,
631 unslide_for_kdebug(lck), (uintptr_t)thread_tid(thread), 0, 0, 0);
632
633 ts = turnstile_get_by_id(tsid: lck->lck_mtx_tsid);
634
635 /*
636 * We can skip turnstile_{prepare,cleanup} because
637 * we hold the interlock of the primitive,
638 * and enqueues/wakeups all happen under the interlock,
639 * which means the turnstile is stable.
640 */
641 did_wake = waitq_wakeup64_one(waitq: &ts->ts_waitq, LCK_MTX_EVENT(lck),
642 THREAD_AWAKENED, flags: WAITQ_UPDATE_INHERITOR);
643 assert(did_wake == KERN_SUCCESS);
644
645 turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_HELD);
646
647 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
648}
649
650
651#pragma mark lck_mtx_t: lck_mtx_lock
652
653static inline bool
654lck_mtx_ctid_on_core(uint32_t ctid)
655{
656 thread_t th = ctid_get_thread_unsafe(ctid);
657
658 return th && machine_thread_on_core_allow_invalid(thread: th);
659}
660
661#define LCK_MTX_OWNER_FOR_TRACE(lock) \
662 VM_KERNEL_UNSLIDE_OR_PERM(ctid_get_thread_unsafe((lock)->lck_mtx.data))
663
664static void
665lck_mtx_lock_adaptive_spin(lck_mtx_t *lock, lck_mtx_state_t state)
666{
667 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
668 hw_spin_policy_t pol = &lck_mtx_ilk_timeout_policy;
669 hw_spin_timeout_t to = hw_spin_compute_timeout(policy: pol);
670 hw_spin_state_t ss = { };
671 uint64_t deadline;
672
673 lck_mtx_mcs_t mcs, node;
674 lck_mcs_id_t idx, pidx, clear_idx;
675 unsigned long prev;
676 lck_mtx_state_t nstate;
677 ast_t *const astp = ast_pending();
678
679 idx = lck_mtx_get_mcs_id();
680 mcs = &lck_mcs_get_current()->mcs_mtx;
681
682 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_START,
683 trace_lck, LCK_MTX_OWNER_FOR_TRACE(lock), lock->lck_mtx_tsid, 0, 0);
684
685 /*
686 * Take a spot in the adaptive spin queue,
687 * and then spin until we're at the head of it.
688 *
689 * Until we're at the head, we do not need to monitor
690 * for whether the current owner is on core or not:
691 *
692 * 1. the head of the queue is doing it already,
693 *
694 * 2. when the entire adaptive spin queue will "give up"
695 * as a result of the owner going off core, we want
696 * to avoid a thundering herd and let the AS queue
697 * pour into the interlock one slowly.
698 *
699 * Do give up if the scheduler made noises something
700 * more important has shown up.
701 *
702 * Note: this function is optimized so that we do not touch
703 * our local mcs node when we're the head of the queue.
704 *
705 * This allows us in the case when the contention is
706 * between 2 cores only to not have to touch this
707 * cacheline at all.
708 */
709 pidx = os_atomic_xchg(&lock->lck_mtx.as_tail, idx, release);
710 if (pidx) {
711 node = lck_mtx_get_mcs(idx: pidx);
712 mcs->lmm_as_prev = pidx;
713 os_atomic_store(&node->lmm_as_next, mcs, release);
714
715 while (!hw_spin_wait_until(&mcs->lmm_as_prev, prev,
716 prev == 0 || (os_atomic_load(astp, relaxed) & AST_URGENT))) {
717 hw_spin_should_keep_spinning(lock, policy: pol, to, state: &ss);
718 }
719
720 if (__improbable(prev)) {
721 goto adaptive_spin_fail;
722 }
723
724 clear_idx = 0;
725 } else {
726 clear_idx = idx;
727 }
728
729 /*
730 * We're now first in line.
731 *
732 * It's our responsbility to monitor the lock's state
733 * for whether (1) the lock has become available,
734 * (2) its owner has gone off core, (3) the scheduler
735 * wants its CPU back, or (4) we've spun for too long.
736 */
737 deadline = ml_get_timebase() + os_atomic_load(&MutexSpin, relaxed);
738
739 for (;;) {
740 state.val = lock_load_exclusive(&lock->lck_mtx.val, acquire);
741
742 if (__probable(!state.ilocked && !state.ilk_tail && !state.owner)) {
743 /*
744 * 2-core contention: if we can, try to dequeue
745 * ourselves from the adaptive spin queue
746 * as part of this CAS in order to avoid
747 * the cost of lck_mtx_ilk_lock_cleanup_as_mcs()
748 * and zeroing the mcs node at all.
749 *
750 * Because the queue is designed to limit contention,
751 * using store-exclusive over an armv8.1 LSE atomic
752 * is actually marginally better (presumably due to
753 * the better codegen).
754 */
755 nstate = state;
756 nstate.ilocked = true;
757 if (state.as_tail == clear_idx) {
758 nstate.as_tail = 0;
759 }
760 if (__probable(lock_store_exclusive(&lock->lck_mtx.val,
761 state.val, nstate.val, acquire))) {
762 break;
763 }
764 } else {
765 lock_wait_for_event();
766 }
767
768 if (__improbable(ml_get_timebase() > deadline ||
769 (os_atomic_load(astp, relaxed) & AST_URGENT) ||
770 (!state.ilocked && !state.ilk_tail && state.owner &&
771 !lck_mtx_ctid_on_core(state.owner)))) {
772 goto adaptive_spin_fail;
773 }
774 }
775
776 /*
777 * If we're here, we got the lock, we just have to cleanup
778 * the MCS nodes and return.
779 */
780 if (state.as_tail != clear_idx) {
781 lck_mtx_ilk_lock_cleanup_as_mcs(lock, idx, mcs, to, ss: &ss);
782 lck_mtx_mcs_clear(mcs);
783 }
784
785 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
786 trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(thread),
787 lock->lck_mtx_tsid, 0, 0);
788 return;
789
790adaptive_spin_fail:
791 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN_CODE) | DBG_FUNC_END,
792 trace_lck, LCK_MTX_OWNER_FOR_TRACE(lock), lock->lck_mtx_tsid, 0, 0);
793 return lck_mtx_ilk_lock_contended(lock, state, mode: LCK_ILK_MODE_FROM_AS);
794}
795
796static NOINLINE void
797lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, lck_mtx_mode_t mode)
798{
799 struct turnstile *ts = TURNSTILE_NULL;
800 lck_mtx_state_t state;
801 uint32_t ctid = thread->ctid;
802 uint32_t data;
803#if CONFIG_DTRACE
804 int first_miss = 0;
805#endif /* CONFIG_DTRACE */
806 bool direct_wait = false;
807 uint64_t spin_start;
808 uint32_t profile;
809
810 lck_mtx_check_irq(lock);
811 if (mode == LCK_MTX_MODE_SLEEPABLE) {
812 lock_disable_preemption_for_thread(thread);
813 }
814
815 for (;;) {
816 /*
817 * Load the current state and perform sanity checks
818 *
819 * Note that the various "corrupt" values are designed
820 * so that the slowpath is taken when a mutex was used
821 * after destruction, so that we do not have to do
822 * sanity checks in the fast path.
823 */
824 state = os_atomic_load(&lock->lck_mtx, relaxed);
825 if (state.owner == ctid) {
826 __lck_mtx_owned_panic(lock, thread);
827 }
828 if (lock->lck_mtx_type != LCK_TYPE_MUTEX ||
829 state.data == LCK_MTX_TAG_DESTROYED) {
830 __lck_mtx_invalid_panic(lck: lock);
831 }
832 profile = (state.data & LCK_MTX_PROFILE);
833
834 /*
835 * Attempt steal
836 *
837 * When the lock state is 0, then no thread can be queued
838 * for adaptive spinning or for the interlock yet.
839 *
840 * As such we can attempt to try to take the interlock.
841 * (we can't take the mutex directly because we need
842 * the interlock to do turnstile operations on the way out).
843 */
844 if ((state.val & ~(uint64_t)LCK_MTX_PROFILE) == 0) {
845 if (!os_atomic_cmpxchgv(&lock->lck_mtx.val,
846 state.val, state.val | LCK_MTX_ILOCK,
847 &state.val, acquire)) {
848 continue;
849 }
850 break;
851 }
852
853#if CONFIG_DTRACE
854 if (profile) {
855 LCK_MTX_PROF_MISS(mtx: lock, grp_attr_id: lock->lck_mtx_grp, first_miss: &first_miss);
856 }
857#endif /* CONFIG_DTRACE */
858
859 if (mode == LCK_MTX_MODE_SLEEPABLE) {
860 spin_start = LCK_MTX_ADAPTIVE_SPIN_BEGIN();
861 } else {
862 spin_start = LCK_MTX_SPIN_SPIN_BEGIN();
863 }
864
865 /*
866 * Adaptive spin or interlock
867 *
868 * Evaluate if adaptive spinning should be attempted,
869 * and if yes go to adaptive spin.
870 *
871 * Otherwise (and this includes always-spin mutexes),
872 * go for the interlock.
873 */
874 if (mode != LCK_MTX_MODE_SPIN_ALWAYS &&
875 (state.ilocked || state.as_tail || !state.owner ||
876 lck_mtx_ctid_on_core(ctid: state.owner))) {
877 lck_mtx_lock_adaptive_spin(lock, state);
878 } else {
879 direct_wait = true;
880 lck_mtx_ilk_lock_nopreempt(lock, mode: LCK_ILK_MODE_DIRECT);
881 }
882
883 if (mode == LCK_MTX_MODE_SLEEPABLE) {
884 LCK_MTX_ADAPTIVE_SPIN_END(lock, lock->lck_mtx_grp, spin_start);
885 } else {
886 LCK_MTX_SPIN_SPIN_END(lock, lock->lck_mtx_grp, spin_start);
887 }
888
889 /*
890 * Take or sleep
891 *
892 * We now have the interlock. Either the owner
893 * isn't set, and the mutex is ours to claim,
894 * or we must go to sleep.
895 *
896 * If we go to sleep, we need to set LCK_MTX_NEEDS_WAKEUP
897 * to force the current lock owner to call
898 * lck_mtx_unlock_wakeup().
899 */
900 state = os_atomic_load(&lock->lck_mtx, relaxed);
901 if (state.owner == LCK_MTX_NULL_CTID) {
902 break;
903 }
904
905 if (mode == LCK_MTX_MODE_SPIN_ALWAYS) {
906 __lck_mtx_lock_is_sleepable_panic(lck: lock);
907 }
908
909#if CONFIG_DTRACE
910 if (profile) {
911 LCK_MTX_PROF_WAIT(mtx: lock, grp_attr_id: lock->lck_mtx_grp,
912 direct_wait, first_miss: &first_miss);
913 }
914#endif /* CONFIG_DTRACE */
915 os_atomic_store(&lock->lck_mtx.data,
916 state.data | LCK_MTX_ILOCK | LCK_MTX_NEEDS_WAKEUP,
917 compiler_acq_rel);
918 ts = lck_mtx_lock_wait(lck: lock, self: thread,
919 holder: ctid_get_thread(ctid: state.owner), ts);
920
921 /* returns interlock unlocked and preemption re-enabled */
922 lock_disable_preemption_for_thread(thread);
923 }
924
925 /*
926 * We can take the lock!
927 *
928 * We only have the interlock and the owner field is 0.
929 *
930 * Perform various turnstile cleanups if needed,
931 * claim the lock, and reenable preemption (if needed).
932 */
933 if (ts) {
934 lck_mtx_lock_wait_done(lck: lock, ts);
935 }
936 data = ctid | profile;
937 if (lck_mtx_lock_will_need_wakeup(lck: lock, self: thread)) {
938 data |= LCK_MTX_NEEDS_WAKEUP;
939 }
940 if (mode != LCK_MTX_MODE_SLEEPABLE) {
941 data |= LCK_MTX_ILOCK | LCK_MTX_SPIN_MODE;
942 }
943 os_atomic_store(&lock->lck_mtx.data, data, release);
944
945 if (mode == LCK_MTX_MODE_SLEEPABLE) {
946 lock_enable_preemption();
947 }
948
949 assert(thread->turnstile != NULL);
950
951 if (ts) {
952 turnstile_cleanup();
953 }
954 LCK_MTX_ACQUIRED(lock, lock->lck_mtx_grp,
955 mode != LCK_MTX_MODE_SLEEPABLE, profile);
956}
957
958#if LCK_MTX_CHECK_INVARIANTS || CONFIG_DTRACE
959__attribute__((noinline))
960#else
961__attribute__((always_inline))
962#endif
963static void
964lck_mtx_lock_slow(
965 lck_mtx_t *lock,
966 thread_t thread,
967 lck_mtx_state_t state,
968 lck_mtx_mode_t mode)
969{
970#pragma unused(state)
971#if CONFIG_DTRACE
972 lck_mtx_state_t ostate = {
973 .data = LCK_MTX_PROFILE,
974 };
975#endif /* CONFIG_DTRACE */
976
977#if LCK_MTX_CHECK_INVARIANTS
978 if (mode != LCK_MTX_MODE_SPIN_ALWAYS) {
979 lck_mtx_check_preemption(lock, thread,
980 (mode == LCK_MTX_MODE_SPIN));
981 }
982#endif /* LCK_MTX_CHECK_INVARIANTS */
983#if CONFIG_DTRACE
984 if (state.val == ostate.val) {
985 state.data = thread->ctid | LCK_MTX_PROFILE;
986 if (mode != LCK_MTX_MODE_SLEEPABLE) {
987 state.ilocked = true;
988 state.spin_mode = true;
989 }
990 os_atomic_cmpxchgv(&lock->lck_mtx.val,
991 ostate.val, state.val, &state.val, acquire);
992 }
993 if ((state.val & ~ostate.val) == 0) {
994 LCK_MTX_ACQUIRED(lock, lock->lck_mtx_grp,
995 mode != LCK_MTX_MODE_SLEEPABLE,
996 state.data & LCK_MTX_PROFILE);
997 return;
998 }
999#endif /* CONFIG_DTRACE */
1000 lck_mtx_lock_contended(lock, thread, mode);
1001}
1002
1003static __attribute__((always_inline)) void
1004lck_mtx_lock_fastpath(lck_mtx_t *lock, lck_mtx_mode_t mode)
1005{
1006 thread_t thread = current_thread();
1007 lck_mtx_state_t state = {
1008 .data = thread->ctid,
1009 };
1010 uint64_t take_slowpath = 0;
1011
1012 if (mode != LCK_MTX_MODE_SPIN_ALWAYS) {
1013 take_slowpath |= LCK_MTX_SNIFF_PREEMPTION(thread);
1014 }
1015 take_slowpath |= LCK_MTX_SNIFF_DTRACE();
1016
1017 if (mode != LCK_MTX_MODE_SLEEPABLE) {
1018 lock_disable_preemption_for_thread(thread);
1019 state.ilocked = true;
1020 state.spin_mode = true;
1021 }
1022
1023 /*
1024 * Do the CAS on the entire mutex state,
1025 * which hence requires for the ILK/AS queues
1026 * to be empty (which is fairer).
1027 */
1028 lock_cmpxchgv(&lock->lck_mtx.val,
1029 0, state.val, &state.val, acquire);
1030
1031 take_slowpath |= state.val;
1032 if (__improbable(take_slowpath)) {
1033 return lck_mtx_lock_slow(lock, thread, state, mode);
1034 }
1035}
1036
1037void
1038lck_mtx_lock(lck_mtx_t *lock)
1039{
1040 lck_mtx_lock_fastpath(lock, mode: LCK_MTX_MODE_SLEEPABLE);
1041}
1042
1043void
1044lck_mtx_lock_spin(lck_mtx_t *lock)
1045{
1046 lck_mtx_lock_fastpath(lock, mode: LCK_MTX_MODE_SPIN);
1047}
1048
1049void
1050lck_mtx_lock_spin_always(lck_mtx_t *lock)
1051{
1052 lck_mtx_lock_fastpath(lock, mode: LCK_MTX_MODE_SPIN_ALWAYS);
1053}
1054
1055
1056#pragma mark lck_mtx_t: lck_mtx_try_lock
1057
1058static __attribute__((always_inline)) bool
1059lck_mtx_try_lock_slow_inline(
1060 lck_mtx_t *lock,
1061 thread_t thread,
1062 uint32_t odata,
1063 uint32_t ndata,
1064 bool spin)
1065{
1066#pragma unused(lock, thread, odata, ndata)
1067#if CONFIG_DTRACE
1068 if (odata == LCK_MTX_PROFILE) {
1069 os_atomic_cmpxchgv(&lock->lck_mtx.data,
1070 odata, ndata | LCK_MTX_PROFILE, &odata, acquire);
1071 }
1072 if ((odata & ~LCK_MTX_PROFILE) == 0) {
1073 LCK_MTX_TRY_ACQUIRED(lock, lock->lck_mtx_grp,
1074 spin, odata & LCK_MTX_PROFILE);
1075 return true;
1076 }
1077 if (odata & LCK_MTX_PROFILE) {
1078 LCK_MTX_PROF_MISS(mtx: lock, grp_attr_id: lock->lck_mtx_grp, first_miss: &(int){ 0 });
1079 }
1080#endif /* CONFIG_DTRACE */
1081
1082 if (spin) {
1083 lock_enable_preemption();
1084 }
1085 return false;
1086}
1087
1088#if CONFIG_DTRACE || LCK_MTX_CHECK_INVARIANTS
1089__attribute__((noinline))
1090#else
1091__attribute__((always_inline))
1092#endif
1093static bool
1094lck_mtx_try_lock_slow(
1095 lck_mtx_t *lock,
1096 thread_t thread,
1097 uint32_t odata,
1098 uint32_t ndata)
1099{
1100 return lck_mtx_try_lock_slow_inline(lock, thread, odata, ndata, false);
1101}
1102
1103#if CONFIG_DTRACE || LCK_MTX_CHECK_INVARIANTS
1104__attribute__((noinline))
1105#else
1106__attribute__((always_inline))
1107#endif
1108static bool
1109lck_mtx_try_lock_slow_spin(
1110 lck_mtx_t *lock,
1111 thread_t thread,
1112 uint32_t odata,
1113 uint32_t ndata)
1114{
1115 return lck_mtx_try_lock_slow_inline(lock, thread, odata, ndata, true);
1116}
1117
1118static __attribute__((always_inline)) bool
1119lck_mtx_try_lock_fastpath(lck_mtx_t *lock, lck_mtx_mode_t mode)
1120{
1121 thread_t thread = current_thread();
1122 uint32_t odata, ndata = thread->ctid;
1123 uint32_t take_slowpath = 0;
1124
1125#if CONFIG_DTRACE
1126 take_slowpath |= lck_debug_state.lds_value;
1127#endif
1128 if (mode != LCK_MTX_MODE_SLEEPABLE) {
1129 lock_disable_preemption_for_thread(thread);
1130 ndata |= LCK_MTX_SPIN_MODE | LCK_MTX_ILOCK;
1131 }
1132
1133 /*
1134 * try_lock because it's likely to be used for cases
1135 * like lock inversion resolutions tries a bit harder
1136 * than lck_mtx_lock() to take the lock and ignores
1137 * adaptive spin / interlock queues by doing the CAS
1138 * on the 32bit mutex data only.
1139 */
1140 lock_cmpxchgv(&lock->lck_mtx.data, 0, ndata, &odata, acquire);
1141
1142 take_slowpath |= odata;
1143 if (__probable(!take_slowpath)) {
1144 return true;
1145 }
1146
1147 if (mode == LCK_MTX_MODE_SPIN_ALWAYS &&
1148 (odata & LCK_MTX_CTID_MASK) &&
1149 !(odata & LCK_MTX_SPIN_MODE)) {
1150 __lck_mtx_lock_is_sleepable_panic(lck: lock);
1151 }
1152
1153 if (mode == LCK_MTX_MODE_SLEEPABLE) {
1154 return lck_mtx_try_lock_slow(lock, thread, odata, ndata);
1155 } else {
1156 return lck_mtx_try_lock_slow_spin(lock, thread, odata, ndata);
1157 }
1158}
1159
1160boolean_t
1161lck_mtx_try_lock(lck_mtx_t *lock)
1162{
1163 return lck_mtx_try_lock_fastpath(lock, mode: LCK_MTX_MODE_SLEEPABLE);
1164}
1165
1166boolean_t
1167lck_mtx_try_lock_spin(lck_mtx_t *lock)
1168{
1169 return lck_mtx_try_lock_fastpath(lock, mode: LCK_MTX_MODE_SPIN);
1170}
1171
1172boolean_t
1173lck_mtx_try_lock_spin_always(lck_mtx_t *lock)
1174{
1175 return lck_mtx_try_lock_fastpath(lock, mode: LCK_MTX_MODE_SPIN_ALWAYS);
1176}
1177
1178
1179#pragma mark lck_mtx_t: lck_mtx_unlock
1180
1181static NOINLINE void
1182lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, uint32_t data)
1183{
1184 bool cleanup = false;
1185
1186#if !CONFIG_DTRACE
1187 /*
1188 * This check is done by lck_mtx_unlock_slow() when it is enabled.
1189 */
1190 if (thread->ctid != (data & LCK_MTX_CTID_MASK)) {
1191 __lck_mtx_not_owned_panic(lock, thread);
1192 }
1193#endif /* !CONFIG_DTRACE */
1194
1195 if ((data & LCK_MTX_SPIN_MODE) == 0) {
1196 lock_disable_preemption_for_thread(thread);
1197 lck_mtx_ilk_lock_nopreempt(lock, mode: LCK_ILK_MODE_UNLOCK);
1198 }
1199
1200 /*
1201 * We must re-load the data: we might have taken
1202 * the slowpath because another thread had taken
1203 * the interlock and set the NEEDS_WAKEUP bit
1204 * while we were spinning to get it.
1205 */
1206 data = os_atomic_load(&lock->lck_mtx.data, compiler_acq_rel);
1207 if (data & LCK_MTX_NEEDS_WAKEUP) {
1208 lck_mtx_unlock_wakeup(lck: lock, thread);
1209 cleanup = true;
1210 }
1211 lck_mtx_ilk_unlock_v(lock, data: data & LCK_MTX_PROFILE);
1212
1213 LCK_MTX_RELEASED(lock, lock->lck_mtx_grp, data & LCK_MTX_PROFILE);
1214
1215 /*
1216 * Do not do any turnstile operations outside of this block.
1217 *
1218 * lock/unlock is called at early stage of boot while single
1219 * threaded, without turnstiles being available yet.
1220 * Even without contention we can come throught the slow path
1221 * if the mutex is acquired as a spin lock.
1222 */
1223 if (cleanup) {
1224 turnstile_cleanup();
1225 }
1226}
1227
1228#if CONFIG_DTRACE
1229__attribute__((noinline))
1230#else
1231__attribute__((always_inline))
1232#endif
1233static void
1234lck_mtx_unlock_slow(lck_mtx_t *lock, thread_t thread, uint32_t data)
1235{
1236#if CONFIG_DTRACE
1237 /*
1238 * If Dtrace is enabled, locks can be profiled,
1239 * which causes the fastpath of unlock to fail.
1240 */
1241 if ((data & LCK_MTX_BITS_MASK) == LCK_MTX_PROFILE) {
1242 os_atomic_cmpxchgv(&lock->lck_mtx.data, data, LCK_MTX_PROFILE,
1243 &data, release);
1244 }
1245 if (thread->ctid != (data & LCK_MTX_CTID_MASK)) {
1246 __lck_mtx_not_owned_panic(lock, thread);
1247 }
1248 if ((data & (LCK_MTX_BITS_MASK & ~LCK_MTX_PROFILE)) == 0) {
1249 LCK_MTX_RELEASED(lock, lock->lck_mtx_grp, false);
1250 return;
1251 }
1252#endif /* CONFIG_DTRACE */
1253
1254 lck_mtx_unlock_contended(lock, thread, data);
1255}
1256
1257void
1258lck_mtx_unlock(lck_mtx_t *lock)
1259{
1260 thread_t thread = current_thread();
1261 uint32_t take_slowpath = 0;
1262 uint32_t data;
1263
1264 take_slowpath |= LCK_MTX_SNIFF_DTRACE();
1265
1266 /*
1267 * The fast path ignores the ILK/AS queues on purpose,
1268 * those really are a "lock" concept, not unlock.
1269 */
1270 if (__probable(lock_cmpxchgv(&lock->lck_mtx.data,
1271 thread->ctid, 0, &data, release))) {
1272 if (__probable(!take_slowpath)) {
1273 return;
1274 }
1275 }
1276
1277 lck_mtx_unlock_slow(lock, thread, data);
1278}
1279
1280
1281#pragma mark lck_mtx_t: misc
1282
1283void
1284lck_mtx_assert(lck_mtx_t *lock, unsigned int type)
1285{
1286 lck_mtx_state_t state = os_atomic_load(&lock->lck_mtx, relaxed);
1287 thread_t thread = current_thread();
1288
1289 if (type == LCK_MTX_ASSERT_OWNED) {
1290 if (state.owner != thread->ctid) {
1291 __lck_mtx_not_owned_panic(lock, thread);
1292 }
1293 } else if (type == LCK_MTX_ASSERT_NOTOWNED) {
1294 if (state.owner == thread->ctid) {
1295 __lck_mtx_owned_panic(lock, thread);
1296 }
1297 } else {
1298 panic("lck_mtx_assert(): invalid arg (%u)", type);
1299 }
1300}
1301
1302/*
1303 * Routine: lck_mtx_convert_spin
1304 *
1305 * Convert a mutex held for spin into a held full mutex
1306 */
1307void
1308lck_mtx_convert_spin(lck_mtx_t *lock)
1309{
1310 lck_mtx_state_t state = os_atomic_load(&lock->lck_mtx, relaxed);
1311 thread_t thread = current_thread();
1312 uint32_t data = thread->ctid;
1313
1314 if (state.owner != data) {
1315 __lck_mtx_not_owned_panic(lock, thread);
1316 }
1317
1318 if (state.spin_mode) {
1319 /*
1320 * Note: we can acquire the lock in spin mode
1321 * _and_ be the inheritor if we waited.
1322 *
1323 * We must only clear ilocked and spin_mode,
1324 * but preserve owner and needs_wakeup.
1325 */
1326 state.ilocked = false;
1327 state.spin_mode = false;
1328 lck_mtx_ilk_unlock_v(lock, data: state.data);
1329 turnstile_cleanup();
1330 }
1331}
1332
1333/*
1334 * Routine: kdp_lck_mtx_lock_spin_is_acquired
1335 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
1336 */
1337boolean_t
1338kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
1339{
1340 lck_mtx_state_t state = os_atomic_load(&lck->lck_mtx, relaxed);
1341
1342 if (not_in_kdp) {
1343 panic("panic: spinlock acquired check done outside of kernel debugger");
1344 }
1345 if (state.data == LCK_MTX_TAG_DESTROYED) {
1346 return false;
1347 }
1348 return state.owner || state.ilocked;
1349}
1350
1351void
1352kdp_lck_mtx_find_owner(
1353 struct waitq *waitq __unused,
1354 event64_t event,
1355 thread_waitinfo_t *waitinfo)
1356{
1357 lck_mtx_t *mutex = LCK_EVENT_TO_MUTEX(event);
1358 lck_mtx_state_t state = os_atomic_load(&mutex->lck_mtx, relaxed);
1359
1360 assert3u(state.data, !=, LCK_MTX_TAG_DESTROYED);
1361 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
1362 waitinfo->owner = thread_tid(thread: ctid_get_thread(ctid: state.owner));
1363}
1364
1365#endif /* !LCK_MTX_USE_ARCH */
1366
1367/*
1368 * Routine: mutex_pause
1369 *
1370 * Called by former callers of simple_lock_pause().
1371 */
1372#define MAX_COLLISION_COUNTS 32
1373#define MAX_COLLISION 8
1374
1375unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1376
1377uint32_t collision_backoffs[MAX_COLLISION] = {
1378 10, 50, 100, 200, 400, 600, 800, 1000
1379};
1380
1381
1382void
1383mutex_pause(uint32_t collisions)
1384{
1385 wait_result_t wait_result;
1386 uint32_t back_off;
1387
1388 if (collisions >= MAX_COLLISION_COUNTS) {
1389 collisions = MAX_COLLISION_COUNTS - 1;
1390 }
1391 max_collision_count[collisions]++;
1392
1393 if (collisions >= MAX_COLLISION) {
1394 collisions = MAX_COLLISION - 1;
1395 }
1396 back_off = collision_backoffs[collisions];
1397
1398 wait_result = assert_wait_timeout(event: (event_t)mutex_pause, THREAD_UNINT, interval: back_off, NSEC_PER_USEC);
1399 assert(wait_result == THREAD_WAITING);
1400
1401 wait_result = thread_block(THREAD_CONTINUE_NULL);
1402 assert(wait_result == THREAD_TIMED_OUT);
1403}
1404
1405
1406unsigned int mutex_yield_wait = 0;
1407unsigned int mutex_yield_no_wait = 0;
1408
1409boolean_t
1410lck_mtx_yield(
1411 lck_mtx_t *lck)
1412{
1413 bool has_waiters = LCK_MTX_HAS_WAITERS(lck);
1414
1415#if DEBUG
1416 lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
1417#endif /* DEBUG */
1418
1419 if (!has_waiters) {
1420 mutex_yield_no_wait++;
1421 } else {
1422 mutex_yield_wait++;
1423 lck_mtx_unlock(lock: lck);
1424 mutex_pause(collisions: 0);
1425 lck_mtx_lock(lock: lck);
1426 }
1427 return has_waiters;
1428}
1429