1/*
2 * Copyright (c) 2007-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie
33 * Mellon University All Rights Reserved.
34 *
35 * Permission to use, copy, modify and distribute this software and its
36 * documentation is hereby granted, provided that both the copyright notice
37 * and this permission notice appear in all copies of the software,
38 * derivative works or modified versions, and any portions thereof, and that
39 * both notices appear in supporting documentation.
40 *
41 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
42 * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
43 * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
44 *
45 * Carnegie Mellon requests users of this software to return to
46 *
47 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
48 * School of Computer Science Carnegie Mellon University Pittsburgh PA
49 * 15213-3890
50 *
51 * any improvements or extensions that they make and grant Carnegie Mellon the
52 * rights to redistribute these changes.
53 */
54/*
55 * File: kern/lock.c
56 * Author: Avadis Tevanian, Jr., Michael Wayne Young
57 * Date: 1985
58 *
59 * Locking primitives implementation
60 */
61
62#define ATOMIC_PRIVATE 1
63#define LOCK_PRIVATE 1
64
65#include <mach_ldebug.h>
66
67#include <kern/kalloc.h>
68#include <kern/locks.h>
69#include <kern/misc_protos.h>
70#include <kern/thread.h>
71#include <kern/processor.h>
72#include <kern/sched_prim.h>
73#include <kern/xpr.h>
74#include <kern/debug.h>
75#include <kern/kcdata.h>
76#include <string.h>
77
78#include <arm/cpu_data_internal.h>
79#include <arm/proc_reg.h>
80#include <arm/smp.h>
81#include <machine/atomic.h>
82#include <machine/machine_cpu.h>
83
84#include <sys/kdebug.h>
85
86/*
87 * We need only enough declarations from the BSD-side to be able to
88 * test if our probe is active, and to call __dtrace_probe(). Setting
89 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
90 */
91#if CONFIG_DTRACE
92#define NEED_DTRACE_DEFS
93#include <../bsd/sys/lockstat.h>
94
95#define DTRACE_RW_SHARED 0x0 //reader
96#define DTRACE_RW_EXCL 0x1 //writer
97#define DTRACE_NO_FLAG 0x0 //not applicable
98
99#endif /* CONFIG_DTRACE */
100
101#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
102#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
103#define LCK_RW_LCK_SHARED_CODE 0x102
104#define LCK_RW_LCK_SH_TO_EX_CODE 0x103
105#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
106#define LCK_RW_LCK_EX_TO_SH_CODE 0x105
107
108
109#define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG)
110
111// Panic in tests that check lock usage correctness
112// These are undesirable when in a panic or a debugger is runnning.
113#define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0)
114
115unsigned int LcksOpts = 0;
116
117#if CONFIG_DTRACE && __SMP__
118extern uint64_t dtrace_spin_threshold;
119#endif
120
121/* Forwards */
122
123
124#if USLOCK_DEBUG
125/*
126 * Perform simple lock checks.
127 */
128int uslock_check = 1;
129int max_lock_loops = 100000000;
130decl_simple_lock_data(extern, printf_lock)
131decl_simple_lock_data(extern, panic_lock)
132#endif /* USLOCK_DEBUG */
133
134extern unsigned int not_in_kdp;
135
136/*
137 * We often want to know the addresses of the callers
138 * of the various lock routines. However, this information
139 * is only used for debugging and statistics.
140 */
141typedef void *pc_t;
142#define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS)
143#define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS)
144
145#ifdef lint
146/*
147 * Eliminate lint complaints about unused local pc variables.
148 */
149#define OBTAIN_PC(pc,l) ++pc
150#else /* lint */
151#define OBTAIN_PC(pc,l)
152#endif /* lint */
153
154
155/*
156 * Portable lock package implementation of usimple_locks.
157 */
158
159#if USLOCK_DEBUG
160#define USLDBG(stmt) stmt
161 void usld_lock_init(usimple_lock_t, unsigned short);
162 void usld_lock_pre(usimple_lock_t, pc_t);
163 void usld_lock_post(usimple_lock_t, pc_t);
164 void usld_unlock(usimple_lock_t, pc_t);
165 void usld_lock_try_pre(usimple_lock_t, pc_t);
166 void usld_lock_try_post(usimple_lock_t, pc_t);
167 int usld_lock_common_checks(usimple_lock_t, const char *);
168#else /* USLOCK_DEBUG */
169#define USLDBG(stmt)
170#endif /* USLOCK_DEBUG */
171
172/*
173 * Owner thread pointer when lock held in spin mode
174 */
175#define LCK_MTX_SPIN_TAG 0xfffffff0
176
177
178#define interlock_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
179#define interlock_try(lock) hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
180#define interlock_unlock(lock) hw_unlock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT)
181#define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
182#define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
183
184#define memory_barrier() __c11_atomic_thread_fence(memory_order_acq_rel_smp)
185#define load_memory_barrier() __c11_atomic_thread_fence(memory_order_acquire_smp)
186#define store_memory_barrier() __c11_atomic_thread_fence(memory_order_release_smp)
187
188// Enforce program order of loads and stores.
189#define ordered_load(target, type) \
190 __c11_atomic_load((_Atomic type *)(target), memory_order_relaxed)
191#define ordered_store(target, type, value) \
192 __c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed)
193
194#define ordered_load_mtx(lock) ordered_load(&(lock)->lck_mtx_data, uintptr_t)
195#define ordered_store_mtx(lock, value) ordered_store(&(lock)->lck_mtx_data, uintptr_t, (value))
196#define ordered_load_rw(lock) ordered_load(&(lock)->lck_rw_data, uint32_t)
197#define ordered_store_rw(lock, value) ordered_store(&(lock)->lck_rw_data, uint32_t, (value))
198#define ordered_load_rw_owner(lock) ordered_load(&(lock)->lck_rw_owner, thread_t)
199#define ordered_store_rw_owner(lock, value) ordered_store(&(lock)->lck_rw_owner, thread_t, (value))
200#define ordered_load_hw(lock) ordered_load(&(lock)->lock_data, uintptr_t)
201#define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, uintptr_t, (value))
202#define ordered_load_bit(lock) ordered_load((lock), uint32_t)
203#define ordered_store_bit(lock, value) ordered_store((lock), uint32_t, (value))
204
205
206// Prevent the compiler from reordering memory operations around this
207#define compiler_memory_fence() __asm__ volatile ("" ::: "memory")
208
209#define LOCK_PANIC_TIMEOUT 0xc00000
210#define NOINLINE __attribute__((noinline))
211
212
213#if __arm__
214#define interrupts_disabled(mask) (mask & PSR_INTMASK)
215#else
216#define interrupts_disabled(mask) (mask & DAIF_IRQF)
217#endif
218
219
220#if __arm__
221#define enable_fiq() __asm__ volatile ("cpsie f" ::: "memory");
222#define enable_interrupts() __asm__ volatile ("cpsie if" ::: "memory");
223#endif
224
225/*
226 * Forward declarations
227 */
228
229static void lck_rw_lock_shared_gen(lck_rw_t *lck);
230static void lck_rw_lock_exclusive_gen(lck_rw_t *lck);
231static boolean_t lck_rw_lock_shared_to_exclusive_success(lck_rw_t *lck);
232static boolean_t lck_rw_lock_shared_to_exclusive_failure(lck_rw_t *lck, uint32_t prior_lock_state);
233static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t *lck, uint32_t prior_lock_state);
234static lck_rw_type_t lck_rw_done_gen(lck_rw_t *lck, uint32_t prior_lock_state);
235static boolean_t lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait);
236
237/*
238 * atomic exchange API is a low level abstraction of the operations
239 * to atomically read, modify, and write a pointer. This abstraction works
240 * for both Intel and ARMv8.1 compare and exchange atomic instructions as
241 * well as the ARM exclusive instructions.
242 *
243 * atomic_exchange_begin() - begin exchange and retrieve current value
244 * atomic_exchange_complete() - conclude an exchange
245 * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin()
246 */
247static uint32_t
248atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord)
249{
250 uint32_t val;
251
252 val = load_exclusive32(target, ord);
253 *previous = val;
254 return val;
255}
256
257static boolean_t
258atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord)
259{
260 (void)previous; // Previous not needed, monitor is held
261 return store_exclusive32(target, newval, ord);
262}
263
264static void
265atomic_exchange_abort(void)
266{
267 clear_exclusive();
268}
269
270static boolean_t
271atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait)
272{
273 uint32_t value, prev;
274
275 for ( ; ; ) {
276 value = atomic_exchange_begin32(target, &prev, ord);
277 if (value & test_mask) {
278 if (wait)
279 wait_for_event(); // Wait with monitor held
280 else
281 atomic_exchange_abort(); // Clear exclusive monitor
282 return FALSE;
283 }
284 value |= set_mask;
285 if (atomic_exchange_complete32(target, prev, value, ord))
286 return TRUE;
287 }
288}
289
290void _disable_preemption(void)
291{
292 thread_t thread = current_thread();
293 unsigned int count;
294
295 count = thread->machine.preemption_count + 1;
296 ordered_store(&thread->machine.preemption_count, unsigned int, count);
297}
298
299void _enable_preemption(void)
300{
301 thread_t thread = current_thread();
302 long state;
303 unsigned int count;
304#if __arm__
305#define INTERRUPT_MASK PSR_IRQF
306#else // __arm__
307#define INTERRUPT_MASK DAIF_IRQF
308#endif // __arm__
309
310 count = thread->machine.preemption_count;
311 if (count == 0)
312 panic("Preemption count negative"); // Count will go negative when released
313 count--;
314 if (count > 0)
315 goto update_count; // Preemption is still disabled, just update
316 state = get_interrupts(); // Get interrupt state
317 if (state & INTERRUPT_MASK)
318 goto update_count; // Interrupts are already masked, can't take AST here
319
320 disable_interrupts_noread(); // Disable interrupts
321 ordered_store(&thread->machine.preemption_count, unsigned int, count);
322 if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
323#if __arm__
324#if __ARM_USER_PROTECT__
325 uintptr_t up = arm_user_protect_begin(thread);
326#endif // __ARM_USER_PROTECT__
327 enable_fiq();
328#endif // __arm__
329 ast_taken_kernel(); // Handle urgent AST
330#if __arm__
331#if __ARM_USER_PROTECT__
332 arm_user_protect_end(thread, up, TRUE);
333#endif // __ARM_USER_PROTECT__
334 enable_interrupts();
335 return; // Return early on arm only due to FIQ enabling
336#endif // __arm__
337 }
338 restore_interrupts(state); // Enable interrupts
339 return;
340
341update_count:
342 ordered_store(&thread->machine.preemption_count, unsigned int, count);
343 return;
344}
345
346int get_preemption_level(void)
347{
348 return current_thread()->machine.preemption_count;
349}
350
351/* Forward declarations for unexported functions that are used externally */
352void hw_lock_bit(hw_lock_bit_t *lock, unsigned int bit);
353void hw_unlock_bit(hw_lock_bit_t *lock, unsigned int bit);
354
355#if __SMP__
356static unsigned int
357hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout);
358#endif
359
360static inline unsigned int
361hw_lock_bit_to_internal(hw_lock_bit_t *lock, unsigned int bit, uint32_t timeout)
362{
363 unsigned int success = 0;
364 uint32_t mask = (1 << bit);
365#if !__SMP__
366 uint32_t state;
367#endif
368
369#if __SMP__
370 if (__improbable(!atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE)))
371 success = hw_lock_bit_to_contended(lock, mask, timeout);
372 else
373 success = 1;
374#else // __SMP__
375 (void)timeout;
376 state = ordered_load_bit(lock);
377 if (!(mask & state)) {
378 ordered_store_bit(lock, state | mask);
379 success = 1;
380 }
381#endif // __SMP__
382
383#if CONFIG_DTRACE
384 if (success)
385 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, bit);
386#endif
387
388 return success;
389}
390
391unsigned int
392hw_lock_bit_to(hw_lock_bit_t *lock, unsigned int bit, uint32_t timeout)
393{
394 _disable_preemption();
395 return hw_lock_bit_to_internal(lock, bit, timeout);
396}
397
398#if __SMP__
399static unsigned int NOINLINE
400hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout)
401{
402 uint64_t end = 0;
403 int i;
404#if CONFIG_DTRACE
405 uint64_t begin;
406 boolean_t dtrace_enabled = lockstat_probemap[LS_LCK_SPIN_LOCK_SPIN] != 0;
407 if (__improbable(dtrace_enabled))
408 begin = mach_absolute_time();
409#endif
410 for ( ; ; ) {
411 for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
412 // Always load-exclusive before wfe
413 // This grabs the monitor and wakes up on a release event
414 if (atomic_test_and_set32(lock, mask, mask, memory_order_acquire, TRUE)) {
415 goto end;
416 }
417 }
418 if (end == 0)
419 end = ml_get_timebase() + timeout;
420 else if (ml_get_timebase() >= end)
421 break;
422 }
423 return 0;
424end:
425#if CONFIG_DTRACE
426 if (__improbable(dtrace_enabled)) {
427 uint64_t spintime = mach_absolute_time() - begin;
428 if (spintime > dtrace_spin_threshold)
429 LOCKSTAT_RECORD2(LS_LCK_SPIN_LOCK_SPIN, lock, spintime, mask);
430 }
431#endif
432 return 1;
433}
434#endif // __SMP__
435
436void
437hw_lock_bit(hw_lock_bit_t *lock, unsigned int bit)
438{
439 if (hw_lock_bit_to(lock, bit, LOCK_PANIC_TIMEOUT))
440 return;
441#if __SMP__
442 panic("hw_lock_bit(): timed out (%p)", lock);
443#else
444 panic("hw_lock_bit(): interlock held (%p)", lock);
445#endif
446}
447
448void
449hw_lock_bit_nopreempt(hw_lock_bit_t *lock, unsigned int bit)
450{
451 if (__improbable(get_preemption_level() == 0))
452 panic("Attempt to take no-preempt bitlock %p in preemptible context", lock);
453 if (hw_lock_bit_to_internal(lock, bit, LOCK_PANIC_TIMEOUT))
454 return;
455#if __SMP__
456 panic("hw_lock_bit_nopreempt(): timed out (%p)", lock);
457#else
458 panic("hw_lock_bit_nopreempt(): interlock held (%p)", lock);
459#endif
460}
461
462unsigned int
463hw_lock_bit_try(hw_lock_bit_t *lock, unsigned int bit)
464{
465 uint32_t mask = (1 << bit);
466#if !__SMP__
467 uint32_t state;
468#endif
469 boolean_t success = FALSE;
470
471 _disable_preemption();
472#if __SMP__
473 // TODO: consider weak (non-looping) atomic test-and-set
474 success = atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE);
475#else
476 state = ordered_load_bit(lock);
477 if (!(mask & state)) {
478 ordered_store_bit(lock, state | mask);
479 success = TRUE;
480 }
481#endif // __SMP__
482 if (!success)
483 _enable_preemption();
484
485#if CONFIG_DTRACE
486 if (success)
487 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, bit);
488#endif
489
490 return success;
491}
492
493static inline void
494hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit)
495{
496 uint32_t mask = (1 << bit);
497#if !__SMP__
498 uint32_t state;
499#endif
500
501#if __SMP__
502 __c11_atomic_fetch_and((_Atomic uint32_t *)lock, ~mask, memory_order_release);
503 set_event();
504#else // __SMP__
505 state = ordered_load_bit(lock);
506 ordered_store_bit(lock, state & ~mask);
507#endif // __SMP__
508#if CONFIG_DTRACE
509 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit);
510#endif
511}
512
513/*
514 * Routine: hw_unlock_bit
515 *
516 * Release spin-lock. The second parameter is the bit number to test and set.
517 * Decrement the preemption level.
518 */
519void
520hw_unlock_bit(hw_lock_bit_t *lock, unsigned int bit)
521{
522 hw_unlock_bit_internal(lock, bit);
523 _enable_preemption();
524}
525
526void
527hw_unlock_bit_nopreempt(hw_lock_bit_t *lock, unsigned int bit)
528{
529 if (__improbable(get_preemption_level() == 0))
530 panic("Attempt to release no-preempt bitlock %p in preemptible context", lock);
531 hw_unlock_bit_internal(lock, bit);
532}
533
534/*
535 * Routine: lck_spin_alloc_init
536 */
537lck_spin_t *
538lck_spin_alloc_init(
539 lck_grp_t * grp,
540 lck_attr_t * attr)
541{
542 lck_spin_t *lck;
543
544 if ((lck = (lck_spin_t *) kalloc(sizeof(lck_spin_t))) != 0)
545 lck_spin_init(lck, grp, attr);
546
547 return (lck);
548}
549
550/*
551 * Routine: lck_spin_free
552 */
553void
554lck_spin_free(
555 lck_spin_t * lck,
556 lck_grp_t * grp)
557{
558 lck_spin_destroy(lck, grp);
559 kfree((void *) lck, sizeof(lck_spin_t));
560}
561
562/*
563 * Routine: lck_spin_init
564 */
565void
566lck_spin_init(
567 lck_spin_t * lck,
568 lck_grp_t * grp,
569 __unused lck_attr_t * attr)
570{
571 hw_lock_init(&lck->hwlock);
572 lck->type = LCK_SPIN_TYPE;
573 lck_grp_reference(grp);
574 lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN);
575 store_memory_barrier();
576}
577
578/*
579 * arm_usimple_lock is a lck_spin_t without a group or attributes
580 */
581void inline
582arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value)
583{
584 lck->type = LCK_SPIN_TYPE;
585 hw_lock_init(&lck->hwlock);
586 store_memory_barrier();
587}
588
589
590/*
591 * Routine: lck_spin_lock
592 */
593void
594lck_spin_lock(lck_spin_t *lock)
595{
596#if DEVELOPMENT || DEBUG
597 if (lock->type != LCK_SPIN_TYPE)
598 panic("Invalid spinlock %p", lock);
599#endif // DEVELOPMENT || DEBUG
600 hw_lock_lock(&lock->hwlock);
601}
602
603/*
604 * Routine: lck_spin_lock_nopreempt
605 */
606void
607lck_spin_lock_nopreempt(lck_spin_t *lock)
608{
609#if DEVELOPMENT || DEBUG
610 if (lock->type != LCK_SPIN_TYPE)
611 panic("Invalid spinlock %p", lock);
612#endif // DEVELOPMENT || DEBUG
613 hw_lock_lock_nopreempt(&lock->hwlock);
614}
615
616/*
617 * Routine: lck_spin_try_lock
618 */
619int
620lck_spin_try_lock(lck_spin_t *lock)
621{
622 return hw_lock_try(&lock->hwlock);
623}
624
625/*
626 * Routine: lck_spin_try_lock_nopreempt
627 */
628int
629lck_spin_try_lock_nopreempt(lck_spin_t *lock)
630{
631 return hw_lock_try_nopreempt(&lock->hwlock);
632}
633
634/*
635 * Routine: lck_spin_unlock
636 */
637void
638lck_spin_unlock(lck_spin_t *lock)
639{
640#if DEVELOPMENT || DEBUG
641 if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC())
642 panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
643 if (lock->type != LCK_SPIN_TYPE)
644 panic("Invalid spinlock type %p", lock);
645#endif // DEVELOPMENT || DEBUG
646 hw_lock_unlock(&lock->hwlock);
647}
648
649/*
650 * Routine: lck_spin_unlock_nopreempt
651 */
652void
653lck_spin_unlock_nopreempt(lck_spin_t *lock)
654{
655#if DEVELOPMENT || DEBUG
656 if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC())
657 panic("Spinlock not owned by thread %p = %lx", lock, lock->lck_spin_data);
658 if (lock->type != LCK_SPIN_TYPE)
659 panic("Invalid spinlock type %p", lock);
660#endif // DEVELOPMENT || DEBUG
661 hw_lock_unlock_nopreempt(&lock->hwlock);
662}
663
664/*
665 * Routine: lck_spin_destroy
666 */
667void
668lck_spin_destroy(
669 lck_spin_t * lck,
670 lck_grp_t * grp)
671{
672 if (lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED)
673 return;
674 lck->lck_spin_data = LCK_SPIN_TAG_DESTROYED;
675 lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN);
676 lck_grp_deallocate(grp);
677}
678
679/*
680 * Routine: kdp_lck_spin_is_acquired
681 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
682 */
683boolean_t
684kdp_lck_spin_is_acquired(lck_spin_t *lck) {
685 if (not_in_kdp) {
686 panic("panic: spinlock acquired check done outside of kernel debugger");
687 }
688 return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE;
689}
690
691/*
692 * Initialize a usimple_lock.
693 *
694 * No change in preemption state.
695 */
696void
697usimple_lock_init(
698 usimple_lock_t l,
699 unsigned short tag)
700{
701#ifndef MACHINE_SIMPLE_LOCK
702 USLDBG(usld_lock_init(l, tag));
703 hw_lock_init(&l->lck_spin_data);
704#else
705 simple_lock_init((simple_lock_t) l, tag);
706#endif
707}
708
709
710/*
711 * Acquire a usimple_lock.
712 *
713 * Returns with preemption disabled. Note
714 * that the hw_lock routines are responsible for
715 * maintaining preemption state.
716 */
717void
718usimple_lock(
719 usimple_lock_t l)
720{
721#ifndef MACHINE_SIMPLE_LOCK
722 pc_t pc;
723
724 OBTAIN_PC(pc, l);
725 USLDBG(usld_lock_pre(l, pc));
726
727 if (!hw_lock_to(&l->lck_spin_data, LockTimeOut)) /* Try to get the lock
728 * with a timeout */
729 panic("simple lock deadlock detection - l=%p, cpu=%d, ret=%p", &l, cpu_number(), pc);
730
731 USLDBG(usld_lock_post(l, pc));
732#else
733 simple_lock((simple_lock_t) l);
734#endif
735}
736
737
738extern void sync(void);
739
740/*
741 * Release a usimple_lock.
742 *
743 * Returns with preemption enabled. Note
744 * that the hw_lock routines are responsible for
745 * maintaining preemption state.
746 */
747void
748usimple_unlock(
749 usimple_lock_t l)
750{
751#ifndef MACHINE_SIMPLE_LOCK
752 pc_t pc;
753
754 OBTAIN_PC(pc, l);
755 USLDBG(usld_unlock(l, pc));
756 sync();
757 hw_lock_unlock(&l->lck_spin_data);
758#else
759 simple_unlock((simple_lock_t) l);
760#endif
761}
762
763
764/*
765 * Conditionally acquire a usimple_lock.
766 *
767 * On success, returns with preemption disabled.
768 * On failure, returns with preemption in the same state
769 * as when first invoked. Note that the hw_lock routines
770 * are responsible for maintaining preemption state.
771 *
772 * XXX No stats are gathered on a miss; I preserved this
773 * behavior from the original assembly-language code, but
774 * doesn't it make sense to log misses? XXX
775 */
776unsigned int
777usimple_lock_try(
778 usimple_lock_t l)
779{
780#ifndef MACHINE_SIMPLE_LOCK
781 pc_t pc;
782 unsigned int success;
783
784 OBTAIN_PC(pc, l);
785 USLDBG(usld_lock_try_pre(l, pc));
786 if ((success = hw_lock_try(&l->lck_spin_data))) {
787 USLDBG(usld_lock_try_post(l, pc));
788 }
789 return success;
790#else
791 return (simple_lock_try((simple_lock_t) l));
792#endif
793}
794
795#if USLOCK_DEBUG
796/*
797 * States of a usimple_lock. The default when initializing
798 * a usimple_lock is setting it up for debug checking.
799 */
800#define USLOCK_CHECKED 0x0001 /* lock is being checked */
801#define USLOCK_TAKEN 0x0002 /* lock has been taken */
802#define USLOCK_INIT 0xBAA0 /* lock has been initialized */
803#define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED)
804#define USLOCK_CHECKING(l) (uslock_check && \
805 ((l)->debug.state & USLOCK_CHECKED))
806
807/*
808 * Trace activities of a particularly interesting lock.
809 */
810void usl_trace(usimple_lock_t, int, pc_t, const char *);
811
812
813/*
814 * Initialize the debugging information contained
815 * in a usimple_lock.
816 */
817void
818usld_lock_init(
819 usimple_lock_t l,
820 __unused unsigned short tag)
821{
822 if (l == USIMPLE_LOCK_NULL)
823 panic("lock initialization: null lock pointer");
824 l->lock_type = USLOCK_TAG;
825 l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0;
826 l->debug.lock_cpu = l->debug.unlock_cpu = 0;
827 l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC;
828 l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD;
829 l->debug.duration[0] = l->debug.duration[1] = 0;
830 l->debug.unlock_cpu = l->debug.unlock_cpu = 0;
831 l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC;
832 l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD;
833}
834
835
836/*
837 * These checks apply to all usimple_locks, not just
838 * those with USLOCK_CHECKED turned on.
839 */
840int
841usld_lock_common_checks(
842 usimple_lock_t l,
843 const char *caller)
844{
845 if (l == USIMPLE_LOCK_NULL)
846 panic("%s: null lock pointer", caller);
847 if (l->lock_type != USLOCK_TAG)
848 panic("%s: 0x%x is not a usimple lock", caller, (integer_t) l);
849 if (!(l->debug.state & USLOCK_INIT))
850 panic("%s: 0x%x is not an initialized lock",
851 caller, (integer_t) l);
852 return USLOCK_CHECKING(l);
853}
854
855
856/*
857 * Debug checks on a usimple_lock just before attempting
858 * to acquire it.
859 */
860/* ARGSUSED */
861void
862usld_lock_pre(
863 usimple_lock_t l,
864 pc_t pc)
865{
866 const char *caller = "usimple_lock";
867
868
869 if (!usld_lock_common_checks(l, caller))
870 return;
871
872 /*
873 * Note that we have a weird case where we are getting a lock when we are]
874 * in the process of putting the system to sleep. We are running with no
875 * current threads, therefore we can't tell if we are trying to retake a lock
876 * we have or someone on the other processor has it. Therefore we just
877 * ignore this test if the locking thread is 0.
878 */
879
880 if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread &&
881 l->debug.lock_thread == (void *) current_thread()) {
882 printf("%s: lock 0x%x already locked (at %p) by",
883 caller, (integer_t) l, l->debug.lock_pc);
884 printf(" current thread %p (new attempt at pc %p)\n",
885 l->debug.lock_thread, pc);
886 panic("%s", caller);
887 }
888 mp_disable_preemption();
889 usl_trace(l, cpu_number(), pc, caller);
890 mp_enable_preemption();
891}
892
893
894/*
895 * Debug checks on a usimple_lock just after acquiring it.
896 *
897 * Pre-emption has been disabled at this point,
898 * so we are safe in using cpu_number.
899 */
900void
901usld_lock_post(
902 usimple_lock_t l,
903 pc_t pc)
904{
905 int mycpu;
906 const char *caller = "successful usimple_lock";
907
908
909 if (!usld_lock_common_checks(l, caller))
910 return;
911
912 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
913 panic("%s: lock 0x%x became uninitialized",
914 caller, (integer_t) l);
915 if ((l->debug.state & USLOCK_TAKEN))
916 panic("%s: lock 0x%x became TAKEN by someone else",
917 caller, (integer_t) l);
918
919 mycpu = cpu_number();
920 l->debug.lock_thread = (void *) current_thread();
921 l->debug.state |= USLOCK_TAKEN;
922 l->debug.lock_pc = pc;
923 l->debug.lock_cpu = mycpu;
924
925 usl_trace(l, mycpu, pc, caller);
926}
927
928
929/*
930 * Debug checks on a usimple_lock just before
931 * releasing it. Note that the caller has not
932 * yet released the hardware lock.
933 *
934 * Preemption is still disabled, so there's
935 * no problem using cpu_number.
936 */
937void
938usld_unlock(
939 usimple_lock_t l,
940 pc_t pc)
941{
942 int mycpu;
943 const char *caller = "usimple_unlock";
944
945
946 if (!usld_lock_common_checks(l, caller))
947 return;
948
949 mycpu = cpu_number();
950
951 if (!(l->debug.state & USLOCK_TAKEN))
952 panic("%s: lock 0x%x hasn't been taken",
953 caller, (integer_t) l);
954 if (l->debug.lock_thread != (void *) current_thread())
955 panic("%s: unlocking lock 0x%x, owned by thread %p",
956 caller, (integer_t) l, l->debug.lock_thread);
957 if (l->debug.lock_cpu != mycpu) {
958 printf("%s: unlocking lock 0x%x on cpu 0x%x",
959 caller, (integer_t) l, mycpu);
960 printf(" (acquired on cpu 0x%x)\n", l->debug.lock_cpu);
961 panic("%s", caller);
962 }
963 usl_trace(l, mycpu, pc, caller);
964
965 l->debug.unlock_thread = l->debug.lock_thread;
966 l->debug.lock_thread = INVALID_PC;
967 l->debug.state &= ~USLOCK_TAKEN;
968 l->debug.unlock_pc = pc;
969 l->debug.unlock_cpu = mycpu;
970}
971
972
973/*
974 * Debug checks on a usimple_lock just before
975 * attempting to acquire it.
976 *
977 * Preemption isn't guaranteed to be disabled.
978 */
979void
980usld_lock_try_pre(
981 usimple_lock_t l,
982 pc_t pc)
983{
984 const char *caller = "usimple_lock_try";
985
986 if (!usld_lock_common_checks(l, caller))
987 return;
988 mp_disable_preemption();
989 usl_trace(l, cpu_number(), pc, caller);
990 mp_enable_preemption();
991}
992
993
994/*
995 * Debug checks on a usimple_lock just after
996 * successfully attempting to acquire it.
997 *
998 * Preemption has been disabled by the
999 * lock acquisition attempt, so it's safe
1000 * to use cpu_number.
1001 */
1002void
1003usld_lock_try_post(
1004 usimple_lock_t l,
1005 pc_t pc)
1006{
1007 int mycpu;
1008 const char *caller = "successful usimple_lock_try";
1009
1010 if (!usld_lock_common_checks(l, caller))
1011 return;
1012
1013 if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED))
1014 panic("%s: lock 0x%x became uninitialized",
1015 caller, (integer_t) l);
1016 if ((l->debug.state & USLOCK_TAKEN))
1017 panic("%s: lock 0x%x became TAKEN by someone else",
1018 caller, (integer_t) l);
1019
1020 mycpu = cpu_number();
1021 l->debug.lock_thread = (void *) current_thread();
1022 l->debug.state |= USLOCK_TAKEN;
1023 l->debug.lock_pc = pc;
1024 l->debug.lock_cpu = mycpu;
1025
1026 usl_trace(l, mycpu, pc, caller);
1027}
1028
1029
1030/*
1031 * For very special cases, set traced_lock to point to a
1032 * specific lock of interest. The result is a series of
1033 * XPRs showing lock operations on that lock. The lock_seq
1034 * value is used to show the order of those operations.
1035 */
1036usimple_lock_t traced_lock;
1037unsigned int lock_seq;
1038
1039void
1040usl_trace(
1041 usimple_lock_t l,
1042 int mycpu,
1043 pc_t pc,
1044 const char *op_name)
1045{
1046 if (traced_lock == l) {
1047 XPR(XPR_SLOCK,
1048 "seq %d, cpu %d, %s @ %x\n",
1049 (integer_t) lock_seq, (integer_t) mycpu,
1050 (integer_t) op_name, (integer_t) pc, 0);
1051 lock_seq++;
1052 }
1053}
1054
1055
1056#endif /* USLOCK_DEBUG */
1057
1058/*
1059 * The C portion of the shared/exclusive locks package.
1060 */
1061
1062/*
1063 * compute the deadline to spin against when
1064 * waiting for a change of state on a lck_rw_t
1065 */
1066#if __SMP__
1067static inline uint64_t
1068lck_rw_deadline_for_spin(lck_rw_t *lck)
1069{
1070 lck_rw_word_t word;
1071
1072 word.data = ordered_load_rw(lck);
1073 if (word.can_sleep) {
1074 if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
1075 /*
1076 * there are already threads waiting on this lock... this
1077 * implies that they have spun beyond their deadlines waiting for
1078 * the desired state to show up so we will not bother spinning at this time...
1079 * or
1080 * the current number of threads sharing this lock exceeds our capacity to run them
1081 * concurrently and since all states we're going to spin for require the rw_shared_count
1082 * to be at 0, we'll not bother spinning since the latency for this to happen is
1083 * unpredictable...
1084 */
1085 return (mach_absolute_time());
1086 }
1087 return (mach_absolute_time() + MutexSpin);
1088 } else
1089 return (mach_absolute_time() + (100000LL * 1000000000LL));
1090}
1091#endif // __SMP__
1092
1093static boolean_t
1094lck_rw_drain_status(lck_rw_t *lock, uint32_t status_mask, boolean_t wait __unused)
1095{
1096#if __SMP__
1097 uint64_t deadline = 0;
1098 uint32_t data;
1099
1100 if (wait)
1101 deadline = lck_rw_deadline_for_spin(lock);
1102
1103 for ( ; ; ) {
1104 data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp);
1105 if ((data & status_mask) == 0)
1106 break;
1107 if (wait)
1108 wait_for_event();
1109 else
1110 clear_exclusive();
1111 if (!wait || (mach_absolute_time() >= deadline))
1112 return FALSE;
1113 }
1114 clear_exclusive();
1115 return TRUE;
1116#else
1117 uint32_t data;
1118
1119 data = ordered_load_rw(lock);
1120 if ((data & status_mask) == 0)
1121 return TRUE;
1122 else
1123 return FALSE;
1124#endif // __SMP__
1125}
1126
1127/*
1128 * Spin while interlock is held.
1129 */
1130static inline void
1131lck_rw_interlock_spin(lck_rw_t *lock)
1132{
1133#if __SMP__
1134 uint32_t data;
1135
1136 for ( ; ; ) {
1137 data = load_exclusive32(&lock->lck_rw_data, memory_order_relaxed);
1138 if (data & LCK_RW_INTERLOCK)
1139 wait_for_event();
1140 else {
1141 clear_exclusive();
1142 return;
1143 }
1144 }
1145#else
1146 panic("lck_rw_interlock_spin(): Interlock locked %p %x", lock, lock->lck_rw_data);
1147#endif
1148}
1149
1150/*
1151 * We disable interrupts while holding the RW interlock to prevent an
1152 * interrupt from exacerbating hold time.
1153 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
1154 */
1155static inline boolean_t
1156lck_interlock_lock(lck_rw_t *lck)
1157{
1158 boolean_t istate;
1159
1160 istate = ml_set_interrupts_enabled(FALSE);
1161 lck_rw_ilk_lock(lck);
1162 return istate;
1163}
1164
1165static inline void
1166lck_interlock_unlock(lck_rw_t *lck, boolean_t istate)
1167{
1168 lck_rw_ilk_unlock(lck);
1169 ml_set_interrupts_enabled(istate);
1170}
1171
1172
1173#define LCK_RW_GRAB_WANT 0
1174#define LCK_RW_GRAB_SHARED 1
1175
1176static boolean_t
1177lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait)
1178{
1179 uint64_t deadline = 0;
1180 uint32_t data, prev;
1181 boolean_t do_exch;
1182
1183#if __SMP__
1184 if (wait)
1185 deadline = lck_rw_deadline_for_spin(lock);
1186#else
1187 wait = FALSE; // Don't spin on UP systems
1188#endif
1189
1190 for ( ; ; ) {
1191 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1192 if (data & LCK_RW_INTERLOCK) {
1193 atomic_exchange_abort();
1194 lck_rw_interlock_spin(lock);
1195 continue;
1196 }
1197 do_exch = FALSE;
1198 if (mode == LCK_RW_GRAB_WANT) {
1199 if ((data & LCK_RW_WANT_EXCL) == 0) {
1200 data |= LCK_RW_WANT_EXCL;
1201 do_exch = TRUE;
1202 }
1203 } else { // LCK_RW_GRAB_SHARED
1204 if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
1205 (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
1206 data += LCK_RW_SHARED_READER;
1207 do_exch = TRUE;
1208 }
1209 }
1210 if (do_exch) {
1211 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1212 return TRUE;
1213 } else {
1214 if (wait) // Non-waiting
1215 wait_for_event();
1216 else
1217 atomic_exchange_abort();
1218 if (!wait || (mach_absolute_time() >= deadline))
1219 return FALSE;
1220 }
1221 }
1222}
1223
1224
1225/*
1226 * Routine: lck_rw_alloc_init
1227 */
1228lck_rw_t *
1229lck_rw_alloc_init(
1230 lck_grp_t *grp,
1231 lck_attr_t *attr)
1232{
1233 lck_rw_t *lck;
1234
1235 if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0)
1236 lck_rw_init(lck, grp, attr);
1237
1238 return lck;
1239}
1240
1241/*
1242 * Routine: lck_rw_free
1243 */
1244void
1245lck_rw_free(
1246 lck_rw_t *lck,
1247 lck_grp_t *grp)
1248{
1249 lck_rw_destroy(lck, grp);
1250 kfree(lck, sizeof(lck_rw_t));
1251}
1252
1253/*
1254 * Routine: lck_rw_init
1255 */
1256void
1257lck_rw_init(
1258 lck_rw_t *lck,
1259 lck_grp_t *grp,
1260 lck_attr_t *attr)
1261{
1262 if (attr == LCK_ATTR_NULL)
1263 attr = &LockDefaultLckAttr;
1264 memset(lck, 0, sizeof(lck_rw_t));
1265 lck->lck_rw_can_sleep = TRUE;
1266 if ((attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY) == 0)
1267 lck->lck_rw_priv_excl = TRUE;
1268
1269 lck_grp_reference(grp);
1270 lck_grp_lckcnt_incr(grp, LCK_TYPE_RW);
1271}
1272
1273
1274/*
1275 * Routine: lck_rw_destroy
1276 */
1277void
1278lck_rw_destroy(
1279 lck_rw_t *lck,
1280 lck_grp_t *grp)
1281{
1282 if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED)
1283 return;
1284#if MACH_LDEBUG
1285 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
1286#endif
1287 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
1288 lck_grp_lckcnt_decr(grp, LCK_TYPE_RW);
1289 lck_grp_deallocate(grp);
1290 return;
1291}
1292
1293/*
1294 * Routine: lck_rw_lock
1295 */
1296void
1297lck_rw_lock(
1298 lck_rw_t *lck,
1299 lck_rw_type_t lck_rw_type)
1300{
1301 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1302 lck_rw_lock_shared(lck);
1303 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1304 lck_rw_lock_exclusive(lck);
1305 else
1306 panic("lck_rw_lock(): Invalid RW lock type: %x", lck_rw_type);
1307}
1308
1309/*
1310 * Routine: lck_rw_lock_exclusive
1311 */
1312void
1313lck_rw_lock_exclusive(lck_rw_t *lock)
1314{
1315 thread_t thread = current_thread();
1316
1317 thread->rwlock_count++;
1318 if (atomic_test_and_set32(&lock->lck_rw_data,
1319 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK),
1320 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) {
1321#if CONFIG_DTRACE
1322 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1323#endif /* CONFIG_DTRACE */
1324 } else
1325 lck_rw_lock_exclusive_gen(lock);
1326#if MACH_ASSERT
1327 thread_t owner = ordered_load_rw_owner(lock);
1328 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1329#endif
1330 ordered_store_rw_owner(lock, thread);
1331}
1332
1333/*
1334 * Routine: lck_rw_lock_shared
1335 */
1336void
1337lck_rw_lock_shared(lck_rw_t *lock)
1338{
1339 uint32_t data, prev;
1340
1341 current_thread()->rwlock_count++;
1342 for ( ; ; ) {
1343 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1344 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1345 atomic_exchange_abort();
1346 lck_rw_lock_shared_gen(lock);
1347 break;
1348 }
1349 data += LCK_RW_SHARED_READER;
1350 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1351 break;
1352 cpu_pause();
1353 }
1354#if MACH_ASSERT
1355 thread_t owner = ordered_load_rw_owner(lock);
1356 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1357#endif
1358#if CONFIG_DTRACE
1359 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1360#endif /* CONFIG_DTRACE */
1361 return;
1362}
1363
1364/*
1365 * Routine: lck_rw_lock_shared_to_exclusive
1366 */
1367boolean_t
1368lck_rw_lock_shared_to_exclusive(lck_rw_t *lock)
1369{
1370 uint32_t data, prev;
1371
1372 for ( ; ; ) {
1373 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1374 if (data & LCK_RW_INTERLOCK) {
1375 atomic_exchange_abort();
1376 lck_rw_interlock_spin(lock);
1377 continue;
1378 }
1379 if (data & LCK_RW_WANT_UPGRADE) {
1380 data -= LCK_RW_SHARED_READER;
1381 if ((data & LCK_RW_SHARED_MASK) == 0) /* we were the last reader */
1382 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1383 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1384 return lck_rw_lock_shared_to_exclusive_failure(lock, prev);
1385 } else {
1386 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1387 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1388 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1389 break;
1390 }
1391 cpu_pause();
1392 }
1393 /* we now own the WANT_UPGRADE */
1394 if (data & LCK_RW_SHARED_MASK) /* check to see if all of the readers are drained */
1395 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1396#if MACH_ASSERT
1397 thread_t owner = ordered_load_rw_owner(lock);
1398 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1399#endif
1400 ordered_store_rw_owner(lock, current_thread());
1401#if CONFIG_DTRACE
1402 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
1403#endif /* CONFIG_DTRACE */
1404 return TRUE;
1405}
1406
1407
1408/*
1409 * Routine: lck_rw_lock_shared_to_exclusive_failure
1410 * Function:
1411 * Fast path code has already dropped our read
1412 * count and determined that someone else owns 'lck_rw_want_upgrade'
1413 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1414 * all we need to do here is determine if a wakeup is needed
1415 */
1416static boolean_t
1417lck_rw_lock_shared_to_exclusive_failure(
1418 lck_rw_t *lck,
1419 uint32_t prior_lock_state)
1420{
1421 thread_t thread = current_thread();
1422 uint32_t rwlock_count;
1423
1424 /* Check if dropping the lock means that we need to unpromote */
1425 rwlock_count = thread->rwlock_count--;
1426#if MACH_LDEBUG
1427 if (rwlock_count == 0) {
1428 panic("rw lock count underflow for thread %p", thread);
1429 }
1430#endif
1431 if ((prior_lock_state & LCK_RW_W_WAITING) &&
1432 ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
1433 /*
1434 * Someone else has requested upgrade.
1435 * Since we've released the read lock, wake
1436 * him up if he's blocked waiting
1437 */
1438 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1439 }
1440
1441 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1442 /* sched_flags checked without lock, but will be rechecked while clearing */
1443 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
1444 }
1445
1446 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1447 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1448
1449 return (FALSE);
1450}
1451
1452/*
1453 * Routine: lck_rw_lock_shared_to_exclusive_success
1454 * Function:
1455 * assembly fast path code has already dropped our read
1456 * count and successfully acquired 'lck_rw_want_upgrade'
1457 * we just need to wait for the rest of the readers to drain
1458 * and then we can return as the exclusive holder of this lock
1459 */
1460static boolean_t
1461lck_rw_lock_shared_to_exclusive_success(
1462 lck_rw_t *lock)
1463{
1464 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1465 int slept = 0;
1466 lck_rw_word_t word;
1467 wait_result_t res;
1468 boolean_t istate;
1469 boolean_t not_shared;
1470
1471#if CONFIG_DTRACE
1472 uint64_t wait_interval = 0;
1473 int readers_at_sleep = 0;
1474 boolean_t dtrace_ls_initialized = FALSE;
1475 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1476#endif
1477
1478 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE)) {
1479
1480 word.data = ordered_load_rw(lock);
1481#if CONFIG_DTRACE
1482 if (dtrace_ls_initialized == FALSE) {
1483 dtrace_ls_initialized = TRUE;
1484 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1485 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1486 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1487 if (dtrace_ls_enabled) {
1488 /*
1489 * Either sleeping or spinning is happening,
1490 * start a timing of our delay interval now.
1491 */
1492 readers_at_sleep = word.shared_count;
1493 wait_interval = mach_absolute_time();
1494 }
1495 }
1496#endif
1497
1498 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1499 trace_lck, word.shared_count, 0, 0, 0);
1500
1501 not_shared = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE);
1502
1503 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1504 trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
1505
1506 if (not_shared)
1507 break;
1508
1509 /*
1510 * if we get here, the spin deadline in lck_rw_wait_on_status()
1511 * has expired w/o the rw_shared_count having drained to 0
1512 * check to see if we're allowed to do a thread_block
1513 */
1514 if (word.can_sleep) {
1515
1516 istate = lck_interlock_lock(lock);
1517
1518 word.data = ordered_load_rw(lock);
1519 if (word.shared_count != 0) {
1520 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1521 trace_lck, word.shared_count, 0, 0, 0);
1522
1523 word.w_waiting = 1;
1524 ordered_store_rw(lock, word.data);
1525
1526 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade);
1527 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1528 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1529 lck_interlock_unlock(lock, istate);
1530
1531 if (res == THREAD_WAITING) {
1532 res = thread_block(THREAD_CONTINUE_NULL);
1533 slept++;
1534 }
1535 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1536 trace_lck, res, slept, 0, 0);
1537 } else {
1538 lck_interlock_unlock(lock, istate);
1539 break;
1540 }
1541 }
1542 }
1543#if CONFIG_DTRACE
1544 /*
1545 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1546 */
1547 if (dtrace_ls_enabled == TRUE) {
1548 if (slept == 0) {
1549 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
1550 } else {
1551 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
1552 mach_absolute_time() - wait_interval, 1,
1553 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1554 }
1555 }
1556 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
1557#endif
1558 return (TRUE);
1559}
1560
1561
1562/*
1563 * Routine: lck_rw_lock_exclusive_to_shared
1564 */
1565
1566void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock)
1567{
1568 uint32_t data, prev;
1569
1570 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
1571 ordered_store_rw_owner(lock, THREAD_NULL);
1572 for ( ; ; ) {
1573 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
1574 if (data & LCK_RW_INTERLOCK) {
1575#if __SMP__
1576 atomic_exchange_abort();
1577 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
1578 continue;
1579#else
1580 panic("lck_rw_lock_exclusive_to_shared(): Interlock locked (%p): %x", lock, data);
1581#endif // __SMP__
1582 }
1583 data += LCK_RW_SHARED_READER;
1584 if (data & LCK_RW_WANT_UPGRADE)
1585 data &= ~(LCK_RW_WANT_UPGRADE);
1586 else
1587 data &= ~(LCK_RW_WANT_EXCL);
1588 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL)))
1589 data &= ~(LCK_RW_W_WAITING);
1590 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp))
1591 break;
1592 cpu_pause();
1593 }
1594 return lck_rw_lock_exclusive_to_shared_gen(lock, prev);
1595}
1596
1597/*
1598 * Routine: lck_rw_lock_exclusive_to_shared_gen
1599 * Function:
1600 * Fast path has already dropped
1601 * our exclusive state and bumped lck_rw_shared_count
1602 * all we need to do here is determine if anyone
1603 * needs to be awakened.
1604 */
1605static void
1606lck_rw_lock_exclusive_to_shared_gen(
1607 lck_rw_t *lck,
1608 uint32_t prior_lock_state)
1609{
1610 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1611 lck_rw_word_t fake_lck;
1612
1613 /*
1614 * prior_lock state is a snapshot of the 1st word of the
1615 * lock in question... we'll fake up a pointer to it
1616 * and carefully not access anything beyond whats defined
1617 * in the first word of a lck_rw_t
1618 */
1619 fake_lck.data = prior_lock_state;
1620
1621 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1622 trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
1623
1624 /*
1625 * don't wake up anyone waiting to take the lock exclusively
1626 * since we hold a read count... when the read count drops to 0,
1627 * the writers will be woken.
1628 *
1629 * wake up any waiting readers if we don't have any writers waiting,
1630 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1631 */
1632 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting)
1633 thread_wakeup(LCK_RW_READER_EVENT(lck));
1634
1635 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1636 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1637
1638#if CONFIG_DTRACE
1639 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1640#endif
1641}
1642
1643
1644/*
1645 * Routine: lck_rw_try_lock
1646 */
1647boolean_t
1648lck_rw_try_lock(
1649 lck_rw_t *lck,
1650 lck_rw_type_t lck_rw_type)
1651{
1652 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1653 return lck_rw_try_lock_shared(lck);
1654 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1655 return lck_rw_try_lock_exclusive(lck);
1656 else
1657 panic("lck_rw_try_lock(): Invalid rw lock type: %x", lck_rw_type);
1658 return FALSE;
1659}
1660
1661/*
1662 * Routine: lck_rw_try_lock_shared
1663 */
1664
1665boolean_t lck_rw_try_lock_shared(lck_rw_t *lock)
1666{
1667 uint32_t data, prev;
1668
1669 for ( ; ; ) {
1670 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1671 if (data & LCK_RW_INTERLOCK) {
1672#if __SMP__
1673 atomic_exchange_abort();
1674 lck_rw_interlock_spin(lock);
1675 continue;
1676#else
1677 panic("lck_rw_try_lock_shared(): Interlock locked (%p): %x", lock, data);
1678#endif
1679 }
1680 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1681 atomic_exchange_abort();
1682 return FALSE; /* lock is busy */
1683 }
1684 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1685 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1686 break;
1687 cpu_pause();
1688 }
1689#if MACH_ASSERT
1690 thread_t owner = ordered_load_rw_owner(lock);
1691 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1692#endif
1693 current_thread()->rwlock_count++;
1694#if CONFIG_DTRACE
1695 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1696#endif /* CONFIG_DTRACE */
1697 return TRUE;
1698}
1699
1700
1701/*
1702 * Routine: lck_rw_try_lock_exclusive
1703 */
1704
1705boolean_t lck_rw_try_lock_exclusive(lck_rw_t *lock)
1706{
1707 uint32_t data, prev;
1708 thread_t thread;
1709
1710 for ( ; ; ) {
1711 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp);
1712 if (data & LCK_RW_INTERLOCK) {
1713#if __SMP__
1714 atomic_exchange_abort();
1715 lck_rw_interlock_spin(lock);
1716 continue;
1717#else
1718 panic("lck_rw_try_lock_exclusive(): Interlock locked (%p): %x", lock, data);
1719#endif
1720 }
1721 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1722 atomic_exchange_abort();
1723 return FALSE;
1724 }
1725 data |= LCK_RW_WANT_EXCL;
1726 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp))
1727 break;
1728 cpu_pause();
1729 }
1730 thread = current_thread();
1731 thread->rwlock_count++;
1732#if MACH_ASSERT
1733 thread_t owner = ordered_load_rw_owner(lock);
1734 assertf(owner == THREAD_NULL, "state=0x%x, owner=%p", ordered_load_rw(lock), owner);
1735#endif
1736 ordered_store_rw_owner(lock, thread);
1737#if CONFIG_DTRACE
1738 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1739#endif /* CONFIG_DTRACE */
1740 return TRUE;
1741}
1742
1743
1744/*
1745 * Routine: lck_rw_unlock
1746 */
1747void
1748lck_rw_unlock(
1749 lck_rw_t *lck,
1750 lck_rw_type_t lck_rw_type)
1751{
1752 if (lck_rw_type == LCK_RW_TYPE_SHARED)
1753 lck_rw_unlock_shared(lck);
1754 else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE)
1755 lck_rw_unlock_exclusive(lck);
1756 else
1757 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
1758}
1759
1760
1761/*
1762 * Routine: lck_rw_unlock_shared
1763 */
1764void
1765lck_rw_unlock_shared(
1766 lck_rw_t *lck)
1767{
1768 lck_rw_type_t ret;
1769
1770 assertf(lck->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
1771 assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
1772 ret = lck_rw_done(lck);
1773
1774 if (ret != LCK_RW_TYPE_SHARED)
1775 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
1776}
1777
1778
1779/*
1780 * Routine: lck_rw_unlock_exclusive
1781 */
1782void
1783lck_rw_unlock_exclusive(
1784 lck_rw_t *lck)
1785{
1786 lck_rw_type_t ret;
1787
1788 assertf(lck->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lck->lck_rw_data, lck->lck_rw_owner);
1789 ret = lck_rw_done(lck);
1790
1791 if (ret != LCK_RW_TYPE_EXCLUSIVE)
1792 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
1793}
1794
1795
1796/*
1797 * Routine: lck_rw_lock_exclusive_gen
1798 */
1799static void
1800lck_rw_lock_exclusive_gen(
1801 lck_rw_t *lock)
1802{
1803 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1804 lck_rw_word_t word;
1805 int slept = 0;
1806 boolean_t gotlock = 0;
1807 boolean_t not_shared_or_upgrade = 0;
1808 wait_result_t res = 0;
1809 boolean_t istate;
1810
1811#if CONFIG_DTRACE
1812 boolean_t dtrace_ls_initialized = FALSE;
1813 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE;
1814 uint64_t wait_interval = 0;
1815 int readers_at_sleep = 0;
1816#endif
1817
1818 /*
1819 * Try to acquire the lck_rw_want_excl bit.
1820 */
1821 while (!lck_rw_grab(lock, LCK_RW_GRAB_WANT, FALSE)) {
1822
1823#if CONFIG_DTRACE
1824 if (dtrace_ls_initialized == FALSE) {
1825 dtrace_ls_initialized = TRUE;
1826 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1827 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1828 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1829 if (dtrace_ls_enabled) {
1830 /*
1831 * Either sleeping or spinning is happening,
1832 * start a timing of our delay interval now.
1833 */
1834 readers_at_sleep = lock->lck_rw_shared_count;
1835 wait_interval = mach_absolute_time();
1836 }
1837 }
1838#endif
1839
1840 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1841
1842 gotlock = lck_rw_grab(lock, LCK_RW_GRAB_WANT, TRUE);
1843
1844 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0);
1845
1846 if (gotlock)
1847 break;
1848 /*
1849 * if we get here, the deadline has expired w/o us
1850 * being able to grab the lock exclusively
1851 * check to see if we're allowed to do a thread_block
1852 */
1853 word.data = ordered_load_rw(lock);
1854 if (word.can_sleep) {
1855
1856 istate = lck_interlock_lock(lock);
1857 word.data = ordered_load_rw(lock);
1858
1859 if (word.want_excl) {
1860
1861 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1862
1863 word.w_waiting = 1;
1864 ordered_store_rw(lock, word.data);
1865
1866 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
1867 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1868 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1869 lck_interlock_unlock(lock, istate);
1870
1871 if (res == THREAD_WAITING) {
1872 res = thread_block(THREAD_CONTINUE_NULL);
1873 slept++;
1874 }
1875 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1876 } else {
1877 word.want_excl = 1;
1878 ordered_store_rw(lock, word.data);
1879 lck_interlock_unlock(lock, istate);
1880 break;
1881 }
1882 }
1883 }
1884 /*
1885 * Wait for readers (and upgrades) to finish...
1886 */
1887 while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE)) {
1888
1889#if CONFIG_DTRACE
1890 /*
1891 * Either sleeping or spinning is happening, start
1892 * a timing of our delay interval now. If we set it
1893 * to -1 we don't have accurate data so we cannot later
1894 * decide to record a dtrace spin or sleep event.
1895 */
1896 if (dtrace_ls_initialized == FALSE) {
1897 dtrace_ls_initialized = TRUE;
1898 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
1899 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
1900 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
1901 if (dtrace_ls_enabled) {
1902 /*
1903 * Either sleeping or spinning is happening,
1904 * start a timing of our delay interval now.
1905 */
1906 readers_at_sleep = lock->lck_rw_shared_count;
1907 wait_interval = mach_absolute_time();
1908 }
1909 }
1910#endif
1911
1912 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1913
1914 not_shared_or_upgrade = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE);
1915
1916 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, not_shared_or_upgrade, 0);
1917
1918 if (not_shared_or_upgrade)
1919 break;
1920 /*
1921 * if we get here, the deadline has expired w/o us
1922 * being able to grab the lock exclusively
1923 * check to see if we're allowed to do a thread_block
1924 */
1925 word.data = ordered_load_rw(lock);
1926 if (word.can_sleep) {
1927
1928 istate = lck_interlock_lock(lock);
1929 word.data = ordered_load_rw(lock);
1930
1931 if (word.shared_count != 0 || word.want_upgrade) {
1932 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1933
1934 word.w_waiting = 1;
1935 ordered_store_rw(lock, word.data);
1936
1937 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite);
1938 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1939 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1940 lck_interlock_unlock(lock, istate);
1941
1942 if (res == THREAD_WAITING) {
1943 res = thread_block(THREAD_CONTINUE_NULL);
1944 slept++;
1945 }
1946 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1947 } else {
1948 lck_interlock_unlock(lock, istate);
1949 /*
1950 * must own the lock now, since we checked for
1951 * readers or upgrade owner behind the interlock
1952 * no need for a call to 'lck_rw_drain_status'
1953 */
1954 break;
1955 }
1956 }
1957 }
1958
1959#if CONFIG_DTRACE
1960 /*
1961 * Decide what latencies we suffered that are Dtrace events.
1962 * If we have set wait_interval, then we either spun or slept.
1963 * At least we get out from under the interlock before we record
1964 * which is the best we can do here to minimize the impact
1965 * of the tracing.
1966 * If we have set wait_interval to -1, then dtrace was not enabled when we
1967 * started sleeping/spinning so we don't record this event.
1968 */
1969 if (dtrace_ls_enabled == TRUE) {
1970 if (slept == 0) {
1971 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
1972 mach_absolute_time() - wait_interval, 1);
1973 } else {
1974 /*
1975 * For the blocking case, we also record if when we blocked
1976 * it was held for read or write, and how many readers.
1977 * Notice that above we recorded this before we dropped
1978 * the interlock so the count is accurate.
1979 */
1980 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
1981 mach_absolute_time() - wait_interval, 1,
1982 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1983 }
1984 }
1985 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
1986#endif /* CONFIG_DTRACE */
1987}
1988
1989/*
1990 * Routine: lck_rw_done
1991 */
1992
1993lck_rw_type_t lck_rw_done(lck_rw_t *lock)
1994{
1995 uint32_t data, prev;
1996 boolean_t once = FALSE;
1997
1998 for ( ; ; ) {
1999 data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp);
2000 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
2001#if __SMP__
2002 atomic_exchange_abort();
2003 lck_rw_interlock_spin(lock);
2004 continue;
2005#else
2006 panic("lck_rw_done(): Interlock locked (%p): %x", lock, data);
2007#endif // __SMP__
2008 }
2009 if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
2010 assertf(lock->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
2011 data -= LCK_RW_SHARED_READER;
2012 if ((data & LCK_RW_SHARED_MASK) == 0) /* if reader count has now gone to 0, check for waiters */
2013 goto check_waiters;
2014 } else { /* if reader count == 0, must be exclusive lock */
2015 if (data & LCK_RW_WANT_UPGRADE) {
2016 data &= ~(LCK_RW_WANT_UPGRADE);
2017 } else {
2018 if (data & LCK_RW_WANT_EXCL)
2019 data &= ~(LCK_RW_WANT_EXCL);
2020 else /* lock is not 'owned', panic */
2021 panic("Releasing non-exclusive RW lock without a reader refcount!");
2022 }
2023 if (!once) {
2024 // Only check for holder and clear it once
2025 assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p", lock->lck_rw_data, lock->lck_rw_owner);
2026 ordered_store_rw_owner(lock, THREAD_NULL);
2027 once = TRUE;
2028 }
2029check_waiters:
2030 /*
2031 * test the original values to match what
2032 * lck_rw_done_gen is going to do to determine
2033 * which wakeups need to happen...
2034 *
2035 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
2036 */
2037 if (prev & LCK_RW_W_WAITING) {
2038 data &= ~(LCK_RW_W_WAITING);
2039 if ((prev & LCK_RW_PRIV_EXCL) == 0)
2040 data &= ~(LCK_RW_R_WAITING);
2041 } else
2042 data &= ~(LCK_RW_R_WAITING);
2043 }
2044 if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp))
2045 break;
2046 cpu_pause();
2047 }
2048 return lck_rw_done_gen(lock, prev);
2049}
2050
2051/*
2052 * Routine: lck_rw_done_gen
2053 *
2054 * called from the assembly language wrapper...
2055 * prior_lock_state is the value in the 1st
2056 * word of the lock at the time of a successful
2057 * atomic compare and exchange with the new value...
2058 * it represents the state of the lock before we
2059 * decremented the rw_shared_count or cleared either
2060 * rw_want_upgrade or rw_want_write and
2061 * the lck_x_waiting bits... since the wrapper
2062 * routine has already changed the state atomically,
2063 * we just need to decide if we should
2064 * wake up anyone and what value to return... we do
2065 * this by examining the state of the lock before
2066 * we changed it
2067 */
2068static lck_rw_type_t
2069lck_rw_done_gen(
2070 lck_rw_t *lck,
2071 uint32_t prior_lock_state)
2072{
2073 lck_rw_word_t fake_lck;
2074 lck_rw_type_t lock_type;
2075 thread_t thread;
2076 uint32_t rwlock_count;
2077
2078 /*
2079 * prior_lock state is a snapshot of the 1st word of the
2080 * lock in question... we'll fake up a pointer to it
2081 * and carefully not access anything beyond whats defined
2082 * in the first word of a lck_rw_t
2083 */
2084 fake_lck.data = prior_lock_state;
2085
2086 if (fake_lck.shared_count <= 1) {
2087 if (fake_lck.w_waiting)
2088 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
2089
2090 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting)
2091 thread_wakeup(LCK_RW_READER_EVENT(lck));
2092 }
2093 if (fake_lck.shared_count)
2094 lock_type = LCK_RW_TYPE_SHARED;
2095 else
2096 lock_type = LCK_RW_TYPE_EXCLUSIVE;
2097
2098 /* Check if dropping the lock means that we need to unpromote */
2099 thread = current_thread();
2100 rwlock_count = thread->rwlock_count--;
2101#if MACH_LDEBUG
2102 if (rwlock_count == 0)
2103 panic("rw lock count underflow for thread %p", thread);
2104#endif
2105 if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2106 /* sched_flags checked without lock, but will be rechecked while clearing */
2107 lck_rw_clear_promotion(thread, unslide_for_kdebug(lck));
2108 }
2109#if CONFIG_DTRACE
2110 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
2111#endif
2112 return lock_type;
2113}
2114
2115/*
2116 * Routine: lck_rw_lock_shared_gen
2117 * Function:
2118 * Fast path code has determined that this lock
2119 * is held exclusively... this is where we spin/block
2120 * until we can acquire the lock in the shared mode
2121 */
2122static void
2123lck_rw_lock_shared_gen(
2124 lck_rw_t *lck)
2125{
2126 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
2127 lck_rw_word_t word;
2128 boolean_t gotlock = 0;
2129 int slept = 0;
2130 wait_result_t res = 0;
2131 boolean_t istate;
2132
2133#if CONFIG_DTRACE
2134 uint64_t wait_interval = 0;
2135 int readers_at_sleep = 0;
2136 boolean_t dtrace_ls_initialized = FALSE;
2137 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
2138#endif /* CONFIG_DTRACE */
2139
2140 while ( !lck_rw_grab(lck, LCK_RW_GRAB_SHARED, FALSE)) {
2141
2142#if CONFIG_DTRACE
2143 if (dtrace_ls_initialized == FALSE) {
2144 dtrace_ls_initialized = TRUE;
2145 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
2146 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
2147 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
2148 if (dtrace_ls_enabled) {
2149 /*
2150 * Either sleeping or spinning is happening,
2151 * start a timing of our delay interval now.
2152 */
2153 readers_at_sleep = lck->lck_rw_shared_count;
2154 wait_interval = mach_absolute_time();
2155 }
2156 }
2157#endif
2158
2159 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
2160 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
2161
2162 gotlock = lck_rw_grab(lck, LCK_RW_GRAB_SHARED, TRUE);
2163
2164 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
2165 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, gotlock, 0);
2166
2167 if (gotlock)
2168 break;
2169 /*
2170 * if we get here, the deadline has expired w/o us
2171 * being able to grab the lock for read
2172 * check to see if we're allowed to do a thread_block
2173 */
2174 if (lck->lck_rw_can_sleep) {
2175
2176 istate = lck_interlock_lock(lck);
2177
2178 word.data = ordered_load_rw(lck);
2179 if ((word.want_excl || word.want_upgrade) &&
2180 ((word.shared_count == 0) || word.priv_excl)) {
2181
2182 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
2183 trace_lck, word.want_excl, word.want_upgrade, 0, 0);
2184
2185 word.r_waiting = 1;
2186 ordered_store_rw(lck, word.data);
2187
2188 thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead);
2189 res = assert_wait(LCK_RW_READER_EVENT(lck),
2190 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
2191 lck_interlock_unlock(lck, istate);
2192
2193 if (res == THREAD_WAITING) {
2194 res = thread_block(THREAD_CONTINUE_NULL);
2195 slept++;
2196 }
2197 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
2198 trace_lck, res, slept, 0, 0);
2199 } else {
2200 word.shared_count++;
2201 ordered_store_rw(lck, word.data);
2202 lck_interlock_unlock(lck, istate);
2203 break;
2204 }
2205 }
2206 }
2207
2208#if CONFIG_DTRACE
2209 if (dtrace_ls_enabled == TRUE) {
2210 if (slept == 0) {
2211 LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
2212 } else {
2213 LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
2214 mach_absolute_time() - wait_interval, 0,
2215 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
2216 }
2217 }
2218 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
2219#endif /* CONFIG_DTRACE */
2220}
2221
2222
2223void
2224lck_rw_assert(
2225 lck_rw_t *lck,
2226 unsigned int type)
2227{
2228 switch (type) {
2229 case LCK_RW_ASSERT_SHARED:
2230 if ((lck->lck_rw_shared_count != 0) &&
2231 (lck->lck_rw_owner == THREAD_NULL)) {
2232 return;
2233 }
2234 break;
2235 case LCK_RW_ASSERT_EXCLUSIVE:
2236 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2237 (lck->lck_rw_shared_count == 0) &&
2238 (lck->lck_rw_owner == current_thread())) {
2239 return;
2240 }
2241 break;
2242 case LCK_RW_ASSERT_HELD:
2243 if (lck->lck_rw_shared_count != 0)
2244 return; // Held shared
2245 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2246 (lck->lck_rw_owner == current_thread())) {
2247 return; // Held exclusive
2248 }
2249 break;
2250 case LCK_RW_ASSERT_NOTHELD:
2251 if ((lck->lck_rw_shared_count == 0) &&
2252 !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2253 (lck->lck_rw_owner == THREAD_NULL)) {
2254 return;
2255 }
2256 break;
2257 default:
2258 break;
2259 }
2260 panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
2261}
2262
2263
2264/*
2265 * Routine: kdp_lck_rw_lock_is_acquired_exclusive
2266 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2267 */
2268boolean_t
2269kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck) {
2270 if (not_in_kdp) {
2271 panic("panic: rw lock exclusive check done outside of kernel debugger");
2272 }
2273 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
2274}
2275
2276/*
2277 * The C portion of the mutex package. These routines are only invoked
2278 * if the optimized assembler routines can't do the work.
2279 */
2280
2281/*
2282 * Forward declaration
2283 */
2284
2285void
2286lck_mtx_ext_init(
2287 lck_mtx_ext_t * lck,
2288 lck_grp_t * grp,
2289 lck_attr_t * attr);
2290
2291/*
2292 * Routine: lck_mtx_alloc_init
2293 */
2294lck_mtx_t *
2295lck_mtx_alloc_init(
2296 lck_grp_t * grp,
2297 lck_attr_t * attr)
2298{
2299 lck_mtx_t *lck;
2300
2301 if ((lck = (lck_mtx_t *) kalloc(sizeof(lck_mtx_t))) != 0)
2302 lck_mtx_init(lck, grp, attr);
2303
2304 return (lck);
2305}
2306
2307/*
2308 * Routine: lck_mtx_free
2309 */
2310void
2311lck_mtx_free(
2312 lck_mtx_t * lck,
2313 lck_grp_t * grp)
2314{
2315 lck_mtx_destroy(lck, grp);
2316 kfree((void *) lck, sizeof(lck_mtx_t));
2317}
2318
2319/*
2320 * Routine: lck_mtx_init
2321 */
2322void
2323lck_mtx_init(
2324 lck_mtx_t * lck,
2325 lck_grp_t * grp,
2326 lck_attr_t * attr)
2327{
2328#ifdef BER_XXX
2329 lck_mtx_ext_t *lck_ext;
2330#endif
2331 lck_attr_t *lck_attr;
2332
2333 if (attr != LCK_ATTR_NULL)
2334 lck_attr = attr;
2335 else
2336 lck_attr = &LockDefaultLckAttr;
2337
2338#ifdef BER_XXX
2339 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2340 if ((lck_ext = (lck_mtx_ext_t *) kalloc(sizeof(lck_mtx_ext_t))) != 0) {
2341 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2342 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2343 lck->lck_mtx_ptr = lck_ext;
2344 lck->lck_mtx_type = LCK_MTX_TYPE;
2345 }
2346 } else
2347#endif
2348 {
2349 lck->lck_mtx_ptr = NULL; // Clear any padding in the union fields below
2350 lck->lck_mtx_waiters = 0;
2351 lck->lck_mtx_pri = 0;
2352 lck->lck_mtx_type = LCK_MTX_TYPE;
2353 ordered_store_mtx(lck, 0);
2354 }
2355 lck_grp_reference(grp);
2356 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2357}
2358
2359/*
2360 * Routine: lck_mtx_init_ext
2361 */
2362void
2363lck_mtx_init_ext(
2364 lck_mtx_t * lck,
2365 lck_mtx_ext_t * lck_ext,
2366 lck_grp_t * grp,
2367 lck_attr_t * attr)
2368{
2369 lck_attr_t *lck_attr;
2370
2371 if (attr != LCK_ATTR_NULL)
2372 lck_attr = attr;
2373 else
2374 lck_attr = &LockDefaultLckAttr;
2375
2376 if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2377 lck_mtx_ext_init(lck_ext, grp, lck_attr);
2378 lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT;
2379 lck->lck_mtx_ptr = lck_ext;
2380 lck->lck_mtx_type = LCK_MTX_TYPE;
2381 } else {
2382 lck->lck_mtx_waiters = 0;
2383 lck->lck_mtx_pri = 0;
2384 lck->lck_mtx_type = LCK_MTX_TYPE;
2385 ordered_store_mtx(lck, 0);
2386 }
2387 lck_grp_reference(grp);
2388 lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX);
2389}
2390
2391/*
2392 * Routine: lck_mtx_ext_init
2393 */
2394void
2395lck_mtx_ext_init(
2396 lck_mtx_ext_t * lck,
2397 lck_grp_t * grp,
2398 lck_attr_t * attr)
2399{
2400 bzero((void *) lck, sizeof(lck_mtx_ext_t));
2401
2402 lck->lck_mtx.lck_mtx_type = LCK_MTX_TYPE;
2403
2404 if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) {
2405 lck->lck_mtx_deb.type = MUTEX_TAG;
2406 lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG;
2407 }
2408 lck->lck_mtx_grp = grp;
2409
2410 if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT)
2411 lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT;
2412}
2413
2414/* The slow versions */
2415static void lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2416static boolean_t lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread);
2417static void lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked);
2418
2419/*
2420 * Routine: lck_mtx_verify
2421 *
2422 * Verify if a mutex is valid
2423 */
2424static inline void
2425lck_mtx_verify(lck_mtx_t *lock)
2426{
2427 if (lock->lck_mtx_type != LCK_MTX_TYPE)
2428 panic("Invalid mutex %p", lock);
2429#if DEVELOPMENT || DEBUG
2430 if (lock->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
2431 panic("Mutex destroyed %p", lock);
2432#endif /* DEVELOPMENT || DEBUG */
2433}
2434
2435/*
2436 * Routine: lck_mtx_check_preemption
2437 *
2438 * Verify preemption is enabled when attempting to acquire a mutex.
2439 */
2440
2441static inline void
2442lck_mtx_check_preemption(lck_mtx_t *lock)
2443{
2444#if DEVELOPMENT || DEBUG
2445 int pl = get_preemption_level();
2446
2447 if (pl != 0)
2448 panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d", lock, pl);
2449#else
2450 (void)lock;
2451#endif
2452}
2453
2454/*
2455 * Routine: lck_mtx_lock
2456 */
2457void
2458lck_mtx_lock(lck_mtx_t *lock)
2459{
2460 thread_t thread;
2461
2462 lck_mtx_verify(lock);
2463 lck_mtx_check_preemption(lock);
2464 thread = current_thread();
2465 if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
2466 memory_order_acquire_smp, FALSE)) {
2467#if CONFIG_DTRACE
2468 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
2469#endif /* CONFIG_DTRACE */
2470 return;
2471 }
2472 lck_mtx_lock_contended(lock, thread, FALSE);
2473}
2474
2475/*
2476 This is the slow version of mutex locking.
2477 */
2478static void NOINLINE
2479lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked)
2480{
2481 thread_t holding_thread;
2482 uintptr_t state;
2483 int waiters;
2484
2485 if (interlocked)
2486 goto interlock_held;
2487
2488 /* TODO: short-duration spin for on-core contention <rdar://problem/10234625> */
2489
2490 /* Loop waiting until I see that the mutex is unowned */
2491 for ( ; ; ) {
2492 interlock_lock(lock);
2493interlock_held:
2494 state = ordered_load_mtx(lock);
2495 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2496 if (holding_thread == NULL)
2497 break;
2498 ordered_store_mtx(lock, (state | LCK_ILOCK | ARM_LCK_WAITERS)); // Set waiters bit and wait
2499 lck_mtx_lock_wait(lock, holding_thread);
2500 /* returns interlock unlocked */
2501 }
2502
2503 /* Hooray, I'm the new owner! */
2504 waiters = lck_mtx_lock_acquire(lock);
2505 state = LCK_MTX_THREAD_TO_STATE(thread);
2506 if (waiters != 0)
2507 state |= ARM_LCK_WAITERS;
2508#if __SMP__
2509 state |= LCK_ILOCK; // Preserve interlock
2510 ordered_store_mtx(lock, state); // Set ownership
2511 interlock_unlock(lock); // Release interlock, enable preemption
2512#else
2513 ordered_store_mtx(lock, state); // Set ownership
2514 enable_preemption();
2515#endif
2516 load_memory_barrier();
2517
2518#if CONFIG_DTRACE
2519 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0);
2520#endif /* CONFIG_DTRACE */
2521}
2522
2523/*
2524 * Common code for mutex locking as spinlock
2525 */
2526static inline void
2527lck_mtx_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
2528{
2529 uintptr_t state;
2530
2531 interlock_lock(lock);
2532 state = ordered_load_mtx(lock);
2533 if (LCK_MTX_STATE_TO_THREAD(state)) {
2534 if (allow_held_as_mutex)
2535 lck_mtx_lock_contended(lock, current_thread(), TRUE);
2536 else
2537 // "Always" variants can never block. If the lock is held and blocking is not allowed
2538 // then someone is mixing always and non-always calls on the same lock, which is
2539 // forbidden.
2540 panic("Attempting to block on a lock taken as spin-always %p", lock);
2541 return;
2542 }
2543 state &= ARM_LCK_WAITERS; // Preserve waiters bit
2544 state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock
2545 ordered_store_mtx(lock, state);
2546 load_memory_barrier();
2547
2548#if CONFIG_DTRACE
2549 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0);
2550#endif /* CONFIG_DTRACE */
2551}
2552
2553/*
2554 * Routine: lck_mtx_lock_spin
2555 */
2556void
2557lck_mtx_lock_spin(lck_mtx_t *lock)
2558{
2559 lck_mtx_check_preemption(lock);
2560 lck_mtx_lock_spin_internal(lock, TRUE);
2561}
2562
2563/*
2564 * Routine: lck_mtx_lock_spin_always
2565 */
2566void
2567lck_mtx_lock_spin_always(lck_mtx_t *lock)
2568{
2569 lck_mtx_lock_spin_internal(lock, FALSE);
2570}
2571
2572/*
2573 * Routine: lck_mtx_try_lock
2574 */
2575boolean_t
2576lck_mtx_try_lock(lck_mtx_t *lock)
2577{
2578 thread_t thread = current_thread();
2579
2580 lck_mtx_verify(lock);
2581 if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread),
2582 memory_order_acquire_smp, FALSE)) {
2583#if CONFIG_DTRACE
2584 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, lock, 0);
2585#endif /* CONFIG_DTRACE */
2586 return TRUE;
2587 }
2588 return lck_mtx_try_lock_contended(lock, thread);
2589}
2590
2591static boolean_t NOINLINE
2592lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread)
2593{
2594 thread_t holding_thread;
2595 uintptr_t state;
2596 int waiters;
2597
2598#if __SMP__
2599 interlock_lock(lock);
2600 state = ordered_load_mtx(lock);
2601 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2602 if (holding_thread) {
2603 interlock_unlock(lock);
2604 return FALSE;
2605 }
2606#else
2607 disable_preemption_for_thread(thread);
2608 state = ordered_load_mtx(lock);
2609 if (state & LCK_ILOCK)
2610 panic("Unexpected interlock set (%p)", lock);
2611 holding_thread = LCK_MTX_STATE_TO_THREAD(state);
2612 if (holding_thread) {
2613 enable_preemption();
2614 return FALSE;
2615 }
2616 state |= LCK_ILOCK;
2617 ordered_store_mtx(lock, state);
2618#endif // __SMP__
2619 waiters = lck_mtx_lock_acquire(lock);
2620 state = LCK_MTX_THREAD_TO_STATE(thread);
2621 if (waiters != 0)
2622 state |= ARM_LCK_WAITERS;
2623#if __SMP__
2624 state |= LCK_ILOCK; // Preserve interlock
2625 ordered_store_mtx(lock, state); // Set ownership
2626 interlock_unlock(lock); // Release interlock, enable preemption
2627#else
2628 ordered_store_mtx(lock, state); // Set ownership
2629 enable_preemption();
2630#endif
2631 load_memory_barrier();
2632 return TRUE;
2633}
2634
2635static inline boolean_t
2636lck_mtx_try_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex)
2637{
2638 uintptr_t state;
2639
2640 if (!interlock_try(lock))
2641 return FALSE;
2642 state = ordered_load_mtx(lock);
2643 if(LCK_MTX_STATE_TO_THREAD(state)) {
2644 // Lock is held as mutex
2645 if (allow_held_as_mutex)
2646 interlock_unlock(lock);
2647 else
2648 // "Always" variants can never block. If the lock is held as a normal mutex
2649 // then someone is mixing always and non-always calls on the same lock, which is
2650 // forbidden.
2651 panic("Spin-mutex held as full mutex %p", lock);
2652 return FALSE;
2653 }
2654 state &= ARM_LCK_WAITERS; // Preserve waiters bit
2655 state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock
2656 ordered_store_mtx(lock, state);
2657 load_memory_barrier();
2658
2659#if CONFIG_DTRACE
2660 LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0);
2661#endif /* CONFIG_DTRACE */
2662 return TRUE;
2663}
2664
2665/*
2666 * Routine: lck_mtx_try_lock_spin
2667 */
2668boolean_t
2669lck_mtx_try_lock_spin(lck_mtx_t *lock)
2670{
2671 return lck_mtx_try_lock_spin_internal(lock, TRUE);
2672}
2673
2674/*
2675 * Routine: lck_mtx_try_lock_spin_always
2676 */
2677boolean_t
2678lck_mtx_try_lock_spin_always(lck_mtx_t *lock)
2679{
2680 return lck_mtx_try_lock_spin_internal(lock, FALSE);
2681}
2682
2683
2684
2685/*
2686 * Routine: lck_mtx_unlock
2687 */
2688void
2689lck_mtx_unlock(lck_mtx_t *lock)
2690{
2691 thread_t thread = current_thread();
2692 uintptr_t state;
2693 boolean_t ilk_held = FALSE;
2694
2695 lck_mtx_verify(lock);
2696
2697 state = ordered_load_mtx(lock);
2698 if (state & LCK_ILOCK) {
2699 if(LCK_MTX_STATE_TO_THREAD(state) == (thread_t)LCK_MTX_SPIN_TAG)
2700 ilk_held = TRUE; // Interlock is held by (presumably) this thread
2701 goto slow_case;
2702 }
2703 // Locked as a mutex
2704 if (atomic_compare_exchange(&lock->lck_mtx_data, LCK_MTX_THREAD_TO_STATE(thread), 0,
2705 memory_order_release_smp, FALSE)) {
2706#if CONFIG_DTRACE
2707 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
2708#endif /* CONFIG_DTRACE */
2709 return;
2710 }
2711slow_case:
2712 lck_mtx_unlock_contended(lock, thread, ilk_held);
2713}
2714
2715static void NOINLINE
2716lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t ilk_held)
2717{
2718 uintptr_t state;
2719
2720 if (ilk_held) {
2721 state = ordered_load_mtx(lock);
2722 } else {
2723#if __SMP__
2724 interlock_lock(lock);
2725 state = ordered_load_mtx(lock);
2726 if (thread != LCK_MTX_STATE_TO_THREAD(state))
2727 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
2728#else
2729 disable_preemption_for_thread(thread);
2730 state = ordered_load_mtx(lock);
2731 if (state & LCK_ILOCK)
2732 panic("lck_mtx_unlock(): Unexpected interlock set (%p)", lock);
2733 if (thread != LCK_MTX_STATE_TO_THREAD(state))
2734 panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)", lock);
2735 state |= LCK_ILOCK;
2736 ordered_store_mtx(lock, state);
2737#endif
2738 if (state & ARM_LCK_WAITERS) {
2739 lck_mtx_unlock_wakeup(lock, thread);
2740 state = ordered_load_mtx(lock);
2741 } else {
2742 assertf(lock->lck_mtx_pri == 0, "pri=0x%x", lock->lck_mtx_pri);
2743 }
2744 }
2745 state &= ARM_LCK_WAITERS; /* Clear state, retain waiters bit */
2746#if __SMP__
2747 state |= LCK_ILOCK;
2748 ordered_store_mtx(lock, state);
2749 interlock_unlock(lock);
2750#else
2751 ordered_store_mtx(lock, state);
2752 enable_preemption();
2753#endif
2754
2755#if CONFIG_DTRACE
2756 LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0);
2757#endif /* CONFIG_DTRACE */
2758}
2759
2760/*
2761 * Routine: lck_mtx_assert
2762 */
2763void
2764lck_mtx_assert(lck_mtx_t *lock, unsigned int type)
2765{
2766 thread_t thread, holder;
2767 uintptr_t state;
2768
2769 state = ordered_load_mtx(lock);
2770 holder = LCK_MTX_STATE_TO_THREAD(state);
2771 if (holder == (thread_t)LCK_MTX_SPIN_TAG) {
2772 // Lock is held in spin mode, owner is unknown.
2773 return; // Punt
2774 }
2775 thread = current_thread();
2776 if (type == LCK_MTX_ASSERT_OWNED) {
2777 if (thread != holder)
2778 panic("lck_mtx_assert(): mutex (%p) owned", lock);
2779 } else if (type == LCK_MTX_ASSERT_NOTOWNED) {
2780 if (thread == holder)
2781 panic("lck_mtx_assert(): mutex (%p) not owned", lock);
2782 } else
2783 panic("lck_mtx_assert(): invalid arg (%u)", type);
2784}
2785
2786/*
2787 * Routine: lck_mtx_ilk_unlock
2788 */
2789boolean_t
2790lck_mtx_ilk_unlock(lck_mtx_t *lock)
2791{
2792 interlock_unlock(lock);
2793 return TRUE;
2794}
2795
2796/*
2797 * Routine: lck_mtx_convert_spin
2798 *
2799 * Convert a mutex held for spin into a held full mutex
2800 */
2801void
2802lck_mtx_convert_spin(lck_mtx_t *lock)
2803{
2804 thread_t thread = current_thread();
2805 uintptr_t state;
2806 int waiters;
2807
2808 state = ordered_load_mtx(lock);
2809 if (LCK_MTX_STATE_TO_THREAD(state) == thread)
2810 return; // Already owned as mutex, return
2811 if ((state & LCK_ILOCK) == 0 || (LCK_MTX_STATE_TO_THREAD(state) != (thread_t)LCK_MTX_SPIN_TAG))
2812 panic("lck_mtx_convert_spin: Not held as spinlock (%p)", lock);
2813 state &= ~(LCK_MTX_THREAD_MASK); // Clear the spin tag
2814 ordered_store_mtx(lock, state);
2815 waiters = lck_mtx_lock_acquire(lock); // Acquire to manage priority boosts
2816 state = LCK_MTX_THREAD_TO_STATE(thread);
2817 if (waiters != 0)
2818 state |= ARM_LCK_WAITERS;
2819#if __SMP__
2820 state |= LCK_ILOCK;
2821 ordered_store_mtx(lock, state); // Set ownership
2822 interlock_unlock(lock); // Release interlock, enable preemption
2823#else
2824 ordered_store_mtx(lock, state); // Set ownership
2825 enable_preemption();
2826#endif
2827}
2828
2829
2830/*
2831 * Routine: lck_mtx_destroy
2832 */
2833void
2834lck_mtx_destroy(
2835 lck_mtx_t * lck,
2836 lck_grp_t * grp)
2837{
2838 if (lck->lck_mtx_type != LCK_MTX_TYPE)
2839 panic("Destroying invalid mutex %p", lck);
2840 if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED)
2841 panic("Destroying previously destroyed lock %p", lck);
2842 lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED);
2843 lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED;
2844 lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX);
2845 lck_grp_deallocate(grp);
2846 return;
2847}
2848
2849/*
2850 * Routine: lck_spin_assert
2851 */
2852void
2853lck_spin_assert(lck_spin_t *lock, unsigned int type)
2854{
2855 thread_t thread, holder;
2856 uintptr_t state;
2857
2858 if (lock->type != LCK_SPIN_TYPE)
2859 panic("Invalid spinlock %p", lock);
2860
2861 state = lock->lck_spin_data;
2862 holder = (thread_t)(state & ~LCK_ILOCK);
2863 thread = current_thread();
2864 if (type == LCK_ASSERT_OWNED) {
2865 if (holder == 0)
2866 panic("Lock not owned %p = %lx", lock, state);
2867 if (holder != thread)
2868 panic("Lock not owned by current thread %p = %lx", lock, state);
2869 if ((state & LCK_ILOCK) == 0)
2870 panic("Lock bit not set %p = %lx", lock, state);
2871 } else if (type == LCK_ASSERT_NOTOWNED) {
2872 if (holder != 0) {
2873 if (holder == thread)
2874 panic("Lock owned by current thread %p = %lx", lock, state);
2875 else
2876 panic("Lock %p owned by thread %p", lock, holder);
2877 }
2878 if (state & LCK_ILOCK)
2879 panic("Lock bit set %p = %lx", lock, state);
2880 } else
2881 panic("lck_spin_assert(): invalid arg (%u)", type);
2882}
2883
2884boolean_t
2885lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield)
2886{
2887 lck_rw_word_t word;
2888
2889 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
2890
2891 word.data = ordered_load_rw(lck);
2892 if (word.want_excl || word.want_upgrade || force_yield) {
2893 lck_rw_unlock_shared(lck);
2894 mutex_pause(2);
2895 lck_rw_lock_shared(lck);
2896 return TRUE;
2897 }
2898
2899 return FALSE;
2900}
2901
2902/*
2903 * Routine: kdp_lck_mtx_lock_spin_is_acquired
2904 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2905 */
2906boolean_t
2907kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck)
2908{
2909 uintptr_t state;
2910
2911 if (not_in_kdp) {
2912 panic("panic: spinlock acquired check done outside of kernel debugger");
2913 }
2914 state = ordered_load_mtx(lck);
2915 if (state == LCK_MTX_TAG_DESTROYED)
2916 return FALSE;
2917 if (LCK_MTX_STATE_TO_THREAD(state) || (state & LCK_ILOCK))
2918 return TRUE;
2919 return FALSE;
2920}
2921
2922void
2923kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2924{
2925 lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event);
2926 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex);
2927 uintptr_t state = ordered_load_mtx(mutex);
2928 thread_t holder = LCK_MTX_STATE_TO_THREAD(state);
2929 if ((uintptr_t)holder == (uintptr_t)LCK_MTX_SPIN_TAG) {
2930 waitinfo->owner = STACKSHOT_WAITOWNER_MTXSPIN;
2931 } else {
2932 assertf(state != (uintptr_t)LCK_MTX_TAG_DESTROYED, "state=0x%llx", (uint64_t)state);
2933 assertf(state != (uintptr_t)LCK_MTX_TAG_INDIRECT, "state=0x%llx", (uint64_t)state);
2934 waitinfo->owner = thread_tid(holder);
2935 }
2936}
2937
2938void
2939kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
2940{
2941 lck_rw_t *rwlck = NULL;
2942 switch(waitinfo->wait_type) {
2943 case kThreadWaitKernelRWLockRead:
2944 rwlck = READ_EVENT_TO_RWLOCK(event);
2945 break;
2946 case kThreadWaitKernelRWLockWrite:
2947 case kThreadWaitKernelRWLockUpgrade:
2948 rwlck = WRITE_EVENT_TO_RWLOCK(event);
2949 break;
2950 default:
2951 panic("%s was called with an invalid blocking type", __FUNCTION__);
2952 break;
2953 }
2954 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
2955 waitinfo->owner = thread_tid(rwlck->lck_rw_owner);
2956}
2957