1/*
2 * Copyright (c) 2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56#define LOCK_PRIVATE 1
57#include <debug.h>
58#include <kern/locks_internal.h>
59#include <kern/lock_stat.h>
60#include <kern/locks.h>
61#include <kern/zalloc.h>
62#include <kern/thread.h>
63#include <kern/processor.h>
64#include <kern/sched_prim.h>
65#include <kern/debug.h>
66#include <machine/atomic.h>
67#include <machine/machine_cpu.h>
68
69KALLOC_TYPE_DEFINE(KT_LCK_RW, lck_rw_t, KT_PRIV_ACCT);
70
71#define LCK_RW_WRITER_EVENT(lck) (event_t)((uintptr_t)(lck)+1)
72#define LCK_RW_READER_EVENT(lck) (event_t)((uintptr_t)(lck)+2)
73#define WRITE_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-1))
74#define READ_EVENT_TO_RWLOCK(event) ((lck_rw_t *)((uintptr_t)(event)-2))
75
76#if CONFIG_DTRACE
77#define DTRACE_RW_SHARED 0x0 //reader
78#define DTRACE_RW_EXCL 0x1 //writer
79#define DTRACE_NO_FLAG 0x0 //not applicable
80#endif /* CONFIG_DTRACE */
81
82#define LCK_RW_LCK_EXCLUSIVE_CODE 0x100
83#define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101
84#define LCK_RW_LCK_SHARED_CODE 0x102
85#define LCK_RW_LCK_SH_TO_EX_CODE 0x103
86#define LCK_RW_LCK_SH_TO_EX1_CODE 0x104
87#define LCK_RW_LCK_EX_TO_SH_CODE 0x105
88
89#if __x86_64__
90#define LCK_RW_LCK_EX_WRITER_SPIN_CODE 0x106
91#define LCK_RW_LCK_EX_WRITER_WAIT_CODE 0x107
92#define LCK_RW_LCK_EX_READER_SPIN_CODE 0x108
93#define LCK_RW_LCK_EX_READER_WAIT_CODE 0x109
94#define LCK_RW_LCK_SHARED_SPIN_CODE 0x110
95#define LCK_RW_LCK_SHARED_WAIT_CODE 0x111
96#define LCK_RW_LCK_SH_TO_EX_SPIN_CODE 0x112
97#define LCK_RW_LCK_SH_TO_EX_WAIT_CODE 0x113
98#endif
99
100#define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT, LCK_GRP_NULL)
101#define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT)
102
103#define ordered_load_rw(lock) os_atomic_load(&(lock)->lck_rw_data, compiler_acq_rel)
104#define ordered_store_rw(lock, value) os_atomic_store(&(lock)->lck_rw_data, (value), compiler_acq_rel)
105#define ordered_store_rw_owner(lock, value) os_atomic_store(&(lock)->lck_rw_owner, (value), compiler_acq_rel)
106
107#ifdef DEBUG_RW
108static TUNABLE(bool, lck_rw_recursive_shared_assert_74048094, "lck_rw_recursive_shared_assert", false);
109SECURITY_READ_ONLY_EARLY(vm_packing_params_t) rwlde_caller_packing_params =
110 VM_PACKING_PARAMS(LCK_RW_CALLER_PACKED);
111#define rw_lock_debug_disabled() (lck_opts_get() & LCK_OPTION_DISABLE_RW_DEBUG)
112
113#define set_rwlde_caller_packed(entry, caller) ((entry)->rwlde_caller_packed = VM_PACK_POINTER((vm_offset_t)caller, LCK_RW_CALLER_PACKED))
114#define get_rwlde_caller(entry) ((void*)VM_UNPACK_POINTER(entry->rwlde_caller_packed, LCK_RW_CALLER_PACKED))
115
116#endif /* DEBUG_RW */
117
118/*!
119 * @function lck_rw_alloc_init
120 *
121 * @abstract
122 * Allocates and initializes a rw_lock_t.
123 *
124 * @discussion
125 * The function can block. See lck_rw_init() for initialization details.
126 *
127 * @param grp lock group to associate with the lock.
128 * @param attr lock attribute to initialize the lock.
129 *
130 * @returns NULL or the allocated lock
131 */
132lck_rw_t *
133lck_rw_alloc_init(
134 lck_grp_t *grp,
135 lck_attr_t *attr)
136{
137 lck_rw_t *lck;
138
139 lck = zalloc_flags(KT_LCK_RW, Z_WAITOK | Z_ZERO);
140 lck_rw_init(lck, grp, attr);
141 return lck;
142}
143
144/*!
145 * @function lck_rw_init
146 *
147 * @abstract
148 * Initializes a rw_lock_t.
149 *
150 * @discussion
151 * Usage statistics for the lock are going to be added to the lock group provided.
152 *
153 * The lock attribute can be used to specify the lock contention behaviour.
154 * RW_WRITER_PRIORITY is the default behaviour (LCK_ATTR_NULL defaults to RW_WRITER_PRIORITY)
155 * and lck_attr_rw_shared_priority() can be used to set the behaviour to RW_SHARED_PRIORITY.
156 *
157 * RW_WRITER_PRIORITY gives priority to the writers upon contention with the readers;
158 * if the lock is held and a writer starts waiting for the lock, readers will not be able
159 * to acquire the lock until all writers stop contending. Readers could
160 * potentially starve.
161 * RW_SHARED_PRIORITY gives priority to the readers upon contention with the writers:
162 * unleass the lock is held in exclusive mode, readers will always be able to acquire the lock.
163 * Readers can lock a shared lock even if there are writers waiting. Writers could potentially
164 * starve.
165 *
166 * @param lck lock to initialize.
167 * @param grp lock group to associate with the lock.
168 * @param attr lock attribute to initialize the lock.
169 *
170 */
171void
172lck_rw_init(
173 lck_rw_t *lck,
174 lck_grp_t *grp,
175 lck_attr_t *attr)
176{
177 /* keep this so that the lck_type_t type is referenced for lldb */
178 lck_type_t type = LCK_TYPE_RW;
179
180 if (attr == LCK_ATTR_NULL) {
181 attr = &lck_attr_default;
182 }
183 *lck = (lck_rw_t){
184 .lck_rw_type = type,
185 .lck_rw_can_sleep = true,
186 .lck_rw_priv_excl = !(attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY),
187 };
188 lck_grp_reference(grp, cnt: &grp->lck_grp_rwcnt);
189}
190
191/*!
192 * @function lck_rw_free
193 *
194 * @abstract
195 * Frees a rw_lock previously allocated with lck_rw_alloc_init().
196 *
197 * @discussion
198 * The lock must be not held by any thread.
199 *
200 * @param lck rw_lock to free.
201 */
202void
203lck_rw_free(
204 lck_rw_t *lck,
205 lck_grp_t *grp)
206{
207 lck_rw_destroy(lck, grp);
208 zfree(KT_LCK_RW, lck);
209}
210
211/*!
212 * @function lck_rw_destroy
213 *
214 * @abstract
215 * Destroys a rw_lock previously initialized with lck_rw_init().
216 *
217 * @discussion
218 * The lock must be not held by any thread.
219 *
220 * @param lck rw_lock to destroy.
221 */
222void
223lck_rw_destroy(
224 lck_rw_t *lck,
225 lck_grp_t *grp)
226{
227 if (lck->lck_rw_type != LCK_TYPE_RW ||
228 lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) {
229 panic("Destroying previously destroyed lock %p", lck);
230 }
231 lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD);
232
233 lck->lck_rw_type = LCK_TYPE_NONE;
234 lck->lck_rw_tag = LCK_RW_TAG_DESTROYED;
235 lck_grp_deallocate(grp, cnt: &grp->lck_grp_rwcnt);
236}
237
238#ifdef DEBUG_RW
239
240/*
241 * Best effort mechanism to debug rw_locks.
242 *
243 * This mechanism is in addition to the owner checks. The owner is set
244 * only when the lock is held in exclusive mode so the checks do not cover
245 * the cases in which the lock is held in shared mode.
246 *
247 * This mechanism tentatively stores the rw_lock acquired and its debug
248 * information on the thread struct.
249 * Just up to LCK_RW_EXPECTED_MAX_NUMBER rw lock debug information can be stored.
250 *
251 * NOTE: LCK_RW_EXPECTED_MAX_NUMBER is the expected number of rw_locks held
252 * at the same time. If a thread holds more than this number of rw_locks we
253 * will start losing debug information.
254 * Increasing LCK_RW_EXPECTED_MAX_NUMBER will increase the probability we will
255 * store the debug information but it will require more memory per thread
256 * and longer lock/unlock time.
257 *
258 * If an empty slot is found for the debug information, we record the lock
259 * otherwise we set the overflow threshold flag.
260 *
261 * If we reached the overflow threshold we might stop asserting because we cannot be sure
262 * anymore if the lock was acquired or not.
263 *
264 * Even if we reached the overflow threshold, we try to store the debug information
265 * for the new locks acquired. This can be useful in core dumps to debug
266 * possible return to userspace without unlocking and to find possible readers
267 * holding the lock.
268 */
269__startup_func
270static void
271rw_lock_init(void)
272{
273 if (kern_feature_override(KF_RW_LOCK_DEBUG_OVRD)) {
274 LcksOpts |= LCK_OPTION_DISABLE_RW_DEBUG;
275 }
276}
277STARTUP(LOCKS, STARTUP_RANK_FIRST, rw_lock_init);
278
279static inline struct rw_lock_debug_entry *
280find_lock_in_savedlocks(lck_rw_t* lock, rw_lock_debug_t *rw_locks_held)
281{
282 int i;
283 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
284 struct rw_lock_debug_entry *existing = &rw_locks_held->rwld_locks[i];
285 if (existing->rwlde_lock == lock) {
286 return existing;
287 }
288 }
289
290 return NULL;
291}
292
293__abortlike
294static void
295rwlock_slot_panic(rw_lock_debug_t *rw_locks_held)
296{
297 panic("No empty slot found in %p slot_used %d", rw_locks_held, rw_locks_held->rwld_locks_saved);
298}
299
300static inline struct rw_lock_debug_entry *
301find_empty_slot(rw_lock_debug_t *rw_locks_held)
302{
303 int i;
304 for (i = 0; i < LCK_RW_EXPECTED_MAX_NUMBER; i++) {
305 struct rw_lock_debug_entry *entry = &rw_locks_held->rwld_locks[i];
306 if (entry->rwlde_lock == NULL) {
307 return entry;
308 }
309 }
310 rwlock_slot_panic(rw_locks_held);
311}
312
313__abortlike
314static void
315canlock_rwlock_panic(lck_rw_t* lock, thread_t thread, struct rw_lock_debug_entry *entry)
316{
317 panic("RW lock %p already held by %p caller %p mode_count %d state 0x%x owner 0x%p ",
318 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
319 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
320}
321
322__attribute__((noinline))
323static void
324assert_canlock_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
325{
326 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
327 if (__probable(rw_locks_held->rwld_locks_acquired == 0)) {
328 //no locks saved, safe to lock
329 return;
330 }
331
332 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
333 if (__improbable(entry != NULL)) {
334 boolean_t can_be_shared_recursive;
335 if (lck_rw_recursive_shared_assert_74048094) {
336 can_be_shared_recursive = (lock->lck_rw_priv_excl == 0);
337 } else {
338 /* currently rw_lock_shared is called recursively,
339 * until the code is fixed allow to lock
340 * recursively in shared mode
341 */
342 can_be_shared_recursive = TRUE;
343 }
344 if ((type == LCK_RW_TYPE_SHARED) && can_be_shared_recursive && entry->rwlde_mode_count >= 1) {
345 return;
346 }
347 canlock_rwlock_panic(lock, thread, entry);
348 }
349}
350
351static inline void
352assert_canlock_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
353{
354 if (__improbable(!rw_lock_debug_disabled())) {
355 assert_canlock_rwlock_slow(lock, thread, type);
356 }
357}
358
359__abortlike
360static void
361held_rwlock_notheld_panic(lck_rw_t* lock, thread_t thread)
362{
363 panic("RW lock %p not held by %p", lock, thread);
364}
365
366__abortlike
367static void
368held_rwlock_notheld_with_info_panic(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, struct rw_lock_debug_entry *entry)
369{
370 if (type == LCK_RW_TYPE_EXCLUSIVE) {
371 panic("RW lock %p not held in exclusive by %p caller %p read %d state 0x%x owner 0x%p ",
372 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
373 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
374 } else {
375 panic("RW lock %p not held in shared by %p caller %p read %d state 0x%x owner 0x%p ",
376 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
377 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
378 }
379}
380
381__attribute__((noinline))
382static void
383assert_held_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
384{
385 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
386
387 if (__improbable(rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_locks_saved == 0)) {
388 if (rw_locks_held->rwld_locks_acquired == 0 || rw_locks_held->rwld_overflow == 0) {
389 held_rwlock_notheld_panic(lock, thread);
390 }
391 return;
392 }
393
394 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
395 if (__probable(entry != NULL)) {
396 if (type == LCK_RW_TYPE_EXCLUSIVE && entry->rwlde_mode_count != -1) {
397 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
398 } else {
399 if (type == LCK_RW_TYPE_SHARED && entry->rwlde_mode_count <= 0) {
400 held_rwlock_notheld_with_info_panic(lock, thread, type, entry);
401 }
402 }
403 } else {
404 if (rw_locks_held->rwld_overflow == 0) {
405 held_rwlock_notheld_panic(lock, thread);
406 }
407 }
408}
409
410static inline void
411assert_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
412{
413 if (__improbable(!rw_lock_debug_disabled())) {
414 assert_held_rwlock_slow(lock, thread, type);
415 }
416}
417
418__attribute__((noinline))
419static void
420change_held_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t typeFrom, void* caller)
421{
422 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
423 if (__improbable(rw_locks_held->rwld_locks_saved == 0)) {
424 if (rw_locks_held->rwld_overflow == 0) {
425 held_rwlock_notheld_panic(lock, thread);
426 }
427 return;
428 }
429
430 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
431 if (__probable(entry != NULL)) {
432 if (typeFrom == LCK_RW_TYPE_SHARED) {
433 //We are upgrading
434 assertf(entry->rwlde_mode_count == 1,
435 "RW lock %p not held by a single shared when upgrading "
436 "by %p caller %p read %d state 0x%x owner 0x%p ",
437 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
438 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
439 entry->rwlde_mode_count = -1;
440 set_rwlde_caller_packed(entry, caller);
441 } else {
442 //We are downgrading
443 assertf(entry->rwlde_mode_count == -1,
444 "RW lock %p not held in write mode when downgrading "
445 "by %p caller %p read %d state 0x%x owner 0x%p ",
446 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
447 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
448 entry->rwlde_mode_count = 1;
449 set_rwlde_caller_packed(entry, caller);
450 }
451 return;
452 }
453
454 if (rw_locks_held->rwld_overflow == 0) {
455 held_rwlock_notheld_panic(lock, thread);
456 }
457
458 if (rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER) {
459 //array is full
460 return;
461 }
462
463 struct rw_lock_debug_entry *null_entry = find_empty_slot(rw_locks_held);
464 null_entry->rwlde_lock = lock;
465 set_rwlde_caller_packed(null_entry, caller);
466 if (typeFrom == LCK_RW_TYPE_SHARED) {
467 null_entry->rwlde_mode_count = -1;
468 } else {
469 null_entry->rwlde_mode_count = 1;
470 }
471 rw_locks_held->rwld_locks_saved++;
472}
473
474static inline void
475change_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t typeFrom, void* caller)
476{
477 if (__improbable(!rw_lock_debug_disabled())) {
478 change_held_rwlock_slow(lock, thread, typeFrom, caller);
479 }
480}
481
482__abortlike
483static void
484add_held_rwlock_too_many_panic(thread_t thread)
485{
486 panic("RW lock too many rw locks held, rwld_locks_acquired maxed out for thread %p", thread);
487}
488
489static __attribute__((noinline)) void
490add_held_rwlock_slow(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, void* caller)
491{
492 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
493 struct rw_lock_debug_entry *null_entry;
494 if (__improbable(rw_locks_held->rwld_locks_acquired == UINT32_MAX)) {
495 add_held_rwlock_too_many_panic(thread);
496 }
497 rw_locks_held->rwld_locks_acquired++;
498
499 if (type == LCK_RW_TYPE_EXCLUSIVE) {
500 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
501 //array is full
502 rw_locks_held->rwld_overflow = 1;
503 return;
504 }
505 null_entry = find_empty_slot(rw_locks_held);
506 null_entry->rwlde_lock = lock;
507 set_rwlde_caller_packed(null_entry, caller);
508 null_entry->rwlde_mode_count = -1;
509 rw_locks_held->rwld_locks_saved++;
510 return;
511 } else {
512 if (__probable(rw_locks_held->rwld_locks_saved == 0)) {
513 //array is empty
514 goto add_shared;
515 }
516
517 boolean_t allow_shared_recursive;
518 if (lck_rw_recursive_shared_assert_74048094) {
519 allow_shared_recursive = (lock->lck_rw_priv_excl == 0);
520 } else {
521 allow_shared_recursive = TRUE;
522 }
523 if (allow_shared_recursive) {
524 //It could be already locked in shared mode
525 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
526 if (entry != NULL) {
527 assert(entry->rwlde_mode_count > 0);
528 assertf(entry->rwlde_mode_count != INT8_MAX,
529 "RW lock %p with too many recursive shared held "
530 "from %p caller %p read %d state 0x%x owner 0x%p",
531 lock, thread, get_rwlde_caller(entry), entry->rwlde_mode_count,
532 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
533 entry->rwlde_mode_count += 1;
534 return;
535 }
536 }
537
538 //none of the locks were a match
539 //try to add a new entry
540 if (__improbable(rw_locks_held->rwld_locks_saved == LCK_RW_EXPECTED_MAX_NUMBER)) {
541 //array is full
542 rw_locks_held->rwld_overflow = 1;
543 return;
544 }
545
546add_shared:
547 null_entry = find_empty_slot(rw_locks_held);
548 null_entry->rwlde_lock = lock;
549 set_rwlde_caller_packed(null_entry, caller);
550 null_entry->rwlde_mode_count = 1;
551 rw_locks_held->rwld_locks_saved++;
552 }
553}
554
555static inline void
556add_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type, void* caller)
557{
558 if (__improbable(!rw_lock_debug_disabled())) {
559 add_held_rwlock_slow(lock, thread, type, caller);
560 }
561}
562
563static void
564remove_held_rwlock_slow(lck_rw_t *lock, thread_t thread, lck_rw_type_t type)
565{
566 rw_lock_debug_t *rw_locks_held = &thread->rw_lock_held;
567 if (__improbable(rw_locks_held->rwld_locks_acquired == 0)) {
568 return;
569 }
570 rw_locks_held->rwld_locks_acquired--;
571
572 if (rw_locks_held->rwld_locks_saved == 0) {
573 assert(rw_locks_held->rwld_overflow == 1);
574 goto out;
575 }
576
577 struct rw_lock_debug_entry *entry = find_lock_in_savedlocks(lock, rw_locks_held);
578 if (__probable(entry != NULL)) {
579 if (type == LCK_RW_TYPE_EXCLUSIVE) {
580 assert(entry->rwlde_mode_count == -1);
581 entry->rwlde_mode_count = 0;
582 } else {
583 assert(entry->rwlde_mode_count > 0);
584 entry->rwlde_mode_count--;
585 if (entry->rwlde_mode_count > 0) {
586 goto out;
587 }
588 }
589 entry->rwlde_caller_packed = 0;
590 entry->rwlde_lock = NULL;
591 rw_locks_held->rwld_locks_saved--;
592 } else {
593 assert(rw_locks_held->rwld_overflow == 1);
594 }
595
596out:
597 if (rw_locks_held->rwld_locks_acquired == 0) {
598 rw_locks_held->rwld_overflow = 0;
599 }
600 return;
601}
602
603static inline void
604remove_held_rwlock(lck_rw_t* lock, thread_t thread, lck_rw_type_t type)
605{
606 if (__improbable(!rw_lock_debug_disabled())) {
607 remove_held_rwlock_slow(lock, thread, type);
608 }
609}
610#endif /* DEBUG_RW */
611
612/*
613 * We disable interrupts while holding the RW interlock to prevent an
614 * interrupt from exacerbating hold time.
615 * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock().
616 */
617static inline boolean_t
618lck_interlock_lock(
619 lck_rw_t *lck)
620{
621 boolean_t istate;
622
623 istate = ml_set_interrupts_enabled(FALSE);
624 lck_rw_ilk_lock(lck);
625 return istate;
626}
627
628static inline void
629lck_interlock_unlock(
630 lck_rw_t *lck,
631 boolean_t istate)
632{
633 lck_rw_ilk_unlock(lck);
634 ml_set_interrupts_enabled(enable: istate);
635}
636
637/*
638 * compute the deadline to spin against when
639 * waiting for a change of state on a lck_rw_t
640 */
641static inline uint64_t
642lck_rw_deadline_for_spin(
643 lck_rw_t *lck)
644{
645 lck_rw_word_t word;
646
647 word.data = ordered_load_rw(lck);
648 if (word.can_sleep) {
649 if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) {
650 /*
651 * there are already threads waiting on this lock... this
652 * implies that they have spun beyond their deadlines waiting for
653 * the desired state to show up so we will not bother spinning at this time...
654 * or
655 * the current number of threads sharing this lock exceeds our capacity to run them
656 * concurrently and since all states we're going to spin for require the rw_shared_count
657 * to be at 0, we'll not bother spinning since the latency for this to happen is
658 * unpredictable...
659 */
660 return mach_absolute_time();
661 }
662 return mach_absolute_time() + os_atomic_load(&MutexSpin, relaxed);
663 } else {
664 return mach_absolute_time() + (100000LL * 1000000000LL);
665 }
666}
667
668/*
669 * This inline is used when busy-waiting for an rw lock.
670 * If interrupts were disabled when the lock primitive was called,
671 * we poll the IPI handler for pending tlb flushes in x86.
672 */
673static inline void
674lck_rw_lock_pause(
675 boolean_t interrupts_enabled)
676{
677#if X86_64
678 if (!interrupts_enabled) {
679 handle_pending_TLB_flushes();
680 }
681 cpu_pause();
682#else
683 (void) interrupts_enabled;
684 wait_for_event();
685#endif
686}
687
688typedef enum __enum_closed {
689 LCK_RW_DRAIN_S_DRAINED = 0,
690 LCK_RW_DRAIN_S_NOT_DRAINED = 1,
691 LCK_RW_DRAIN_S_EARLY_RETURN = 2,
692 LCK_RW_DRAIN_S_TIMED_OUT = 3,
693} lck_rw_drain_state_t;
694
695static lck_rw_drain_state_t
696lck_rw_drain_status(
697 lck_rw_t *lock,
698 uint32_t status_mask,
699 boolean_t wait,
700 bool (^lock_pause)(void))
701{
702 uint64_t deadline = 0;
703 uint32_t data;
704 boolean_t istate = FALSE;
705
706 if (wait) {
707 deadline = lck_rw_deadline_for_spin(lck: lock);
708#if __x86_64__
709 istate = ml_get_interrupts_enabled();
710#endif
711 }
712
713 for (;;) {
714#if __x86_64__
715 data = os_atomic_load(&lock->lck_rw_data, relaxed);
716#else
717 data = load_exclusive32(target: &lock->lck_rw_data, memory_order_acquire_smp);
718#endif
719 if ((data & status_mask) == 0) {
720 atomic_exchange_abort();
721 return LCK_RW_DRAIN_S_DRAINED;
722 }
723
724 if (!wait) {
725 atomic_exchange_abort();
726 return LCK_RW_DRAIN_S_NOT_DRAINED;
727 }
728
729 lck_rw_lock_pause(interrupts_enabled: istate);
730
731 if (mach_absolute_time() >= deadline) {
732 return LCK_RW_DRAIN_S_TIMED_OUT;
733 }
734
735 if (lock_pause && lock_pause()) {
736 return LCK_RW_DRAIN_S_EARLY_RETURN;
737 }
738 }
739}
740
741/*
742 * Spin while interlock is held.
743 */
744static inline void
745lck_rw_interlock_spin(
746 lck_rw_t *lock)
747{
748 uint32_t data, prev;
749
750 for (;;) {
751 data = atomic_exchange_begin32(target: &lock->lck_rw_data, previous: &prev, ord: memory_order_relaxed);
752 if (data & LCK_RW_INTERLOCK) {
753#if __x86_64__
754 cpu_pause();
755#else
756 wait_for_event();
757#endif
758 } else {
759 atomic_exchange_abort();
760 return;
761 }
762 }
763}
764
765#define LCK_RW_GRAB_WANT 0
766#define LCK_RW_GRAB_SHARED 1
767
768typedef enum __enum_closed __enum_options {
769 LCK_RW_GRAB_F_SHARED = 0x0, // Not really a flag obviously but makes call sites more readable.
770 LCK_RW_GRAB_F_WANT_EXCL = 0x1,
771 LCK_RW_GRAB_F_WAIT = 0x2,
772} lck_rw_grab_flags_t;
773
774typedef enum __enum_closed {
775 LCK_RW_GRAB_S_NOT_LOCKED = 0,
776 LCK_RW_GRAB_S_LOCKED = 1,
777 LCK_RW_GRAB_S_EARLY_RETURN = 2,
778 LCK_RW_GRAB_S_TIMED_OUT = 3,
779} lck_rw_grab_state_t;
780
781static lck_rw_grab_state_t
782lck_rw_grab(
783 lck_rw_t *lock,
784 lck_rw_grab_flags_t flags,
785 bool (^lock_pause)(void))
786{
787 uint64_t deadline = 0;
788 uint32_t data, prev;
789 boolean_t do_exch, istate = FALSE;
790
791 assert3u(flags & ~(LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT), ==, 0);
792
793 if ((flags & LCK_RW_GRAB_F_WAIT) != 0) {
794 deadline = lck_rw_deadline_for_spin(lck: lock);
795#if __x86_64__
796 istate = ml_get_interrupts_enabled();
797#endif
798 }
799
800 for (;;) {
801 data = atomic_exchange_begin32(target: &lock->lck_rw_data, previous: &prev, memory_order_acquire_smp);
802 if (data & LCK_RW_INTERLOCK) {
803 atomic_exchange_abort();
804 lck_rw_interlock_spin(lock);
805 continue;
806 }
807 do_exch = FALSE;
808 if ((flags & LCK_RW_GRAB_F_WANT_EXCL) != 0) {
809 if ((data & LCK_RW_WANT_EXCL) == 0) {
810 data |= LCK_RW_WANT_EXCL;
811 do_exch = TRUE;
812 }
813 } else { // LCK_RW_GRAB_SHARED
814 if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) ||
815 (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) {
816 data += LCK_RW_SHARED_READER;
817 do_exch = TRUE;
818 }
819 }
820 if (do_exch) {
821 if (atomic_exchange_complete32(target: &lock->lck_rw_data, previous: prev, newval: data, memory_order_acquire_smp)) {
822 return LCK_RW_GRAB_S_LOCKED;
823 }
824 } else {
825 if ((flags & LCK_RW_GRAB_F_WAIT) == 0) {
826 atomic_exchange_abort();
827 return LCK_RW_GRAB_S_NOT_LOCKED;
828 }
829
830 lck_rw_lock_pause(interrupts_enabled: istate);
831
832 if (mach_absolute_time() >= deadline) {
833 return LCK_RW_GRAB_S_TIMED_OUT;
834 }
835 if (lock_pause && lock_pause()) {
836 return LCK_RW_GRAB_S_EARLY_RETURN;
837 }
838 }
839 }
840}
841
842/*
843 * The inverse of lck_rw_grab - drops either the LCK_RW_WANT_EXCL bit or
844 * decrements the reader count. Doesn't deal with waking up waiters - i.e.
845 * should only be called when can_sleep is false.
846 */
847static void
848lck_rw_drop(lck_rw_t *lock, lck_rw_grab_flags_t flags)
849{
850 uint32_t data, prev;
851
852 assert3u(flags & ~(LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT), ==, 0);
853 assert(!lock->lck_rw_can_sleep);
854
855 for (;;) {
856 data = atomic_exchange_begin32(target: &lock->lck_rw_data, previous: &prev, memory_order_acquire_smp);
857
858 /* Interlock should never be taken when can_sleep is false. */
859 assert3u(data & LCK_RW_INTERLOCK, ==, 0);
860
861 if ((flags & LCK_RW_GRAB_F_WANT_EXCL) != 0) {
862 data &= ~LCK_RW_WANT_EXCL;
863 } else {
864 data -= LCK_RW_SHARED_READER;
865 }
866
867 if (atomic_exchange_complete32(target: &lock->lck_rw_data, previous: prev, newval: data, memory_order_acquire_smp)) {
868 break;
869 }
870
871 cpu_pause();
872 }
873
874 return;
875}
876
877static boolean_t
878lck_rw_lock_exclusive_gen(
879 lck_rw_t *lock,
880 bool (^lock_pause)(void))
881{
882 __assert_only thread_t self = current_thread();
883 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
884 lck_rw_word_t word;
885 int slept = 0;
886 lck_rw_grab_state_t grab_state = LCK_RW_GRAB_S_NOT_LOCKED;
887 lck_rw_drain_state_t drain_state = LCK_RW_DRAIN_S_NOT_DRAINED;
888 wait_result_t res = 0;
889 boolean_t istate;
890
891#if CONFIG_DTRACE
892 boolean_t dtrace_ls_initialized = FALSE;
893 boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled = FALSE;
894 uint64_t wait_interval = 0;
895 int readers_at_sleep = 0;
896#endif
897
898 assertf(lock->lck_rw_owner != self->ctid,
899 "Lock already held state=0x%x, owner=%p",
900 ordered_load_rw(lock), self);
901
902#ifdef DEBUG_RW
903 /*
904 * Best effort attempt to check that this thread
905 * is not already holding the lock (this checks read mode too).
906 */
907 assert_canlock_rwlock(lock, self, LCK_RW_TYPE_EXCLUSIVE);
908#endif /* DEBUG_RW */
909
910 /*
911 * Try to acquire the lck_rw_want_excl bit.
912 */
913 while (lck_rw_grab(lock, flags: LCK_RW_GRAB_F_WANT_EXCL, NULL) != LCK_RW_GRAB_S_LOCKED) {
914#if CONFIG_DTRACE
915 if (dtrace_ls_initialized == FALSE) {
916 dtrace_ls_initialized = TRUE;
917 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
918 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
919 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
920 if (dtrace_ls_enabled) {
921 /*
922 * Either sleeping or spinning is happening,
923 * start a timing of our delay interval now.
924 */
925 readers_at_sleep = lock->lck_rw_shared_count;
926 wait_interval = mach_absolute_time();
927 }
928 }
929#endif
930
931 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START,
932 trace_lck, 0, 0, 0, 0);
933
934 grab_state = lck_rw_grab(lock, flags: LCK_RW_GRAB_F_WANT_EXCL | LCK_RW_GRAB_F_WAIT, lock_pause);
935
936 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END,
937 trace_lck, 0, 0, grab_state, 0);
938
939 if (grab_state == LCK_RW_GRAB_S_LOCKED ||
940 grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
941 break;
942 }
943 /*
944 * if we get here, the deadline has expired w/o us
945 * being able to grab the lock exclusively
946 * check to see if we're allowed to do a thread_block
947 */
948 word.data = ordered_load_rw(lock);
949 if (word.can_sleep) {
950 istate = lck_interlock_lock(lck: lock);
951 word.data = ordered_load_rw(lock);
952
953 if (word.want_excl) {
954 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
955
956 word.w_waiting = 1;
957 ordered_store_rw(lock, word.data);
958
959 thread_set_pending_block_hint(current_thread(), block_hint: kThreadWaitKernelRWLockWrite);
960 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
961 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
962 lck_interlock_unlock(lck: lock, istate);
963 if (res == THREAD_WAITING) {
964 res = thread_block(THREAD_CONTINUE_NULL);
965 slept++;
966 }
967 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
968 } else {
969 word.want_excl = 1;
970 ordered_store_rw(lock, word.data);
971 lck_interlock_unlock(lck: lock, istate);
972 break;
973 }
974 }
975 }
976
977 if (grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
978 assert(lock_pause);
979 return FALSE;
980 }
981
982 /*
983 * Wait for readers (and upgrades) to finish...
984 */
985 while (lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE, NULL) != LCK_RW_DRAIN_S_DRAINED) {
986#if CONFIG_DTRACE
987 /*
988 * Either sleeping or spinning is happening, start
989 * a timing of our delay interval now. If we set it
990 * to -1 we don't have accurate data so we cannot later
991 * decide to record a dtrace spin or sleep event.
992 */
993 if (dtrace_ls_initialized == FALSE) {
994 dtrace_ls_initialized = TRUE;
995 dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0);
996 dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0);
997 dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block;
998 if (dtrace_ls_enabled) {
999 /*
1000 * Either sleeping or spinning is happening,
1001 * start a timing of our delay interval now.
1002 */
1003 readers_at_sleep = lock->lck_rw_shared_count;
1004 wait_interval = mach_absolute_time();
1005 }
1006 }
1007#endif
1008
1009 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1010
1011 drain_state = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE, lock_pause);
1012
1013 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, drain_state, 0);
1014
1015 if (drain_state == LCK_RW_DRAIN_S_DRAINED ||
1016 drain_state == LCK_RW_DRAIN_S_EARLY_RETURN) {
1017 break;
1018 }
1019 /*
1020 * if we get here, the deadline has expired w/o us
1021 * being able to grab the lock exclusively
1022 * check to see if we're allowed to do a thread_block
1023 */
1024 word.data = ordered_load_rw(lock);
1025 if (word.can_sleep) {
1026 istate = lck_interlock_lock(lck: lock);
1027 word.data = ordered_load_rw(lock);
1028
1029 if (word.shared_count != 0 || word.want_upgrade) {
1030 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0);
1031
1032 word.w_waiting = 1;
1033 ordered_store_rw(lock, word.data);
1034
1035 thread_set_pending_block_hint(current_thread(), block_hint: kThreadWaitKernelRWLockWrite);
1036 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1037 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1038 lck_interlock_unlock(lck: lock, istate);
1039
1040 if (res == THREAD_WAITING) {
1041 res = thread_block(THREAD_CONTINUE_NULL);
1042 slept++;
1043 }
1044 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0);
1045 } else {
1046 lck_interlock_unlock(lck: lock, istate);
1047 /*
1048 * must own the lock now, since we checked for
1049 * readers or upgrade owner behind the interlock
1050 * no need for a call to 'lck_rw_drain_status'
1051 */
1052 break;
1053 }
1054 }
1055 }
1056
1057#if CONFIG_DTRACE
1058 /*
1059 * Decide what latencies we suffered that are Dtrace events.
1060 * If we have set wait_interval, then we either spun or slept.
1061 * At least we get out from under the interlock before we record
1062 * which is the best we can do here to minimize the impact
1063 * of the tracing.
1064 * If we have set wait_interval to -1, then dtrace was not enabled when we
1065 * started sleeping/spinning so we don't record this event.
1066 */
1067 if (dtrace_ls_enabled == TRUE) {
1068 if (slept == 0) {
1069 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_SPIN, lock,
1070 mach_absolute_time() - wait_interval, 1);
1071 } else {
1072 /*
1073 * For the blocking case, we also record if when we blocked
1074 * it was held for read or write, and how many readers.
1075 * Notice that above we recorded this before we dropped
1076 * the interlock so the count is accurate.
1077 */
1078 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_BLOCK, lock,
1079 mach_absolute_time() - wait_interval, 1,
1080 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1081 }
1082 }
1083#endif /* CONFIG_DTRACE */
1084
1085 if (drain_state == LCK_RW_DRAIN_S_EARLY_RETURN) {
1086 lck_rw_drop(lock, flags: LCK_RW_GRAB_F_WANT_EXCL);
1087 assert(lock_pause);
1088 return FALSE;
1089 }
1090
1091#if CONFIG_DTRACE
1092 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1);
1093#endif /* CONFIG_DTRACE */
1094
1095 return TRUE;
1096}
1097
1098#define LCK_RW_LOCK_EXCLUSIVE_TAS(lck) (atomic_test_and_set32(&(lck)->lck_rw_data, \
1099 (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK), \
1100 LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE))
1101/*!
1102 * @function lck_rw_lock_exclusive_check_contended
1103 *
1104 * @abstract
1105 * Locks a rw_lock in exclusive mode.
1106 *
1107 * @discussion
1108 * This routine IS EXPERIMENTAL.
1109 * It's only used for the vm object lock, and use for other subsystems is UNSUPPORTED.
1110 * Note that the return value is ONLY A HEURISTIC w.r.t. the lock's contention.
1111 *
1112 * @param lock rw_lock to lock.
1113 *
1114 * @returns Returns TRUE if the thread spun or blocked while attempting to acquire the lock, FALSE
1115 * otherwise.
1116 */
1117bool
1118lck_rw_lock_exclusive_check_contended(
1119 lck_rw_t *lock)
1120{
1121 thread_t thread = current_thread();
1122 bool contended = false;
1123
1124 if (lock->lck_rw_can_sleep) {
1125 lck_rw_lock_count_inc(thread, lock);
1126 } else if (get_preemption_level() == 0) {
1127 panic("Taking non-sleepable RW lock with preemption enabled");
1128 }
1129
1130 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1131#if CONFIG_DTRACE
1132 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1133#endif /* CONFIG_DTRACE */
1134 } else {
1135 contended = true;
1136 (void) lck_rw_lock_exclusive_gen(lock, NULL);
1137 }
1138 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1139 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1140 ordered_store_rw_owner(lock, thread->ctid);
1141
1142#ifdef DEBUG_RW
1143 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, __builtin_return_address(0));
1144#endif /* DEBUG_RW */
1145 return contended;
1146}
1147
1148__attribute__((always_inline))
1149static boolean_t
1150lck_rw_lock_exclusive_internal_inline(
1151 lck_rw_t *lock,
1152 void *caller,
1153 bool (^lock_pause)(void))
1154{
1155#pragma unused(caller)
1156 thread_t thread = current_thread();
1157
1158 if (lock->lck_rw_can_sleep) {
1159 lck_rw_lock_count_inc(thread, lock);
1160 } else if (get_preemption_level() == 0) {
1161 panic("Taking non-sleepable RW lock with preemption enabled");
1162 }
1163
1164 if (LCK_RW_LOCK_EXCLUSIVE_TAS(lock)) {
1165#if CONFIG_DTRACE
1166 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
1167#endif /* CONFIG_DTRACE */
1168 } else if (!lck_rw_lock_exclusive_gen(lock, lock_pause)) {
1169 /*
1170 * lck_rw_lock_exclusive_gen() should only return
1171 * early if lock_pause has been passed and
1172 * returns FALSE. lock_pause is exclusive with
1173 * lck_rw_can_sleep().
1174 */
1175 assert(!lock->lck_rw_can_sleep);
1176 return FALSE;
1177 }
1178
1179 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1180 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1181 ordered_store_rw_owner(lock, thread->ctid);
1182
1183#if DEBUG_RW
1184 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1185#endif /* DEBUG_RW */
1186
1187 return TRUE;
1188}
1189
1190__attribute__((noinline))
1191static void
1192lck_rw_lock_exclusive_internal(
1193 lck_rw_t *lock,
1194 void *caller)
1195{
1196 (void) lck_rw_lock_exclusive_internal_inline(lock, caller, NULL);
1197}
1198
1199/*!
1200 * @function lck_rw_lock_exclusive
1201 *
1202 * @abstract
1203 * Locks a rw_lock in exclusive mode.
1204 *
1205 * @discussion
1206 * This function can block.
1207 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1208 * can acquire it in exclusive mode.
1209 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1210 *
1211 * @param lock rw_lock to lock.
1212 */
1213void
1214lck_rw_lock_exclusive(
1215 lck_rw_t *lock)
1216{
1217 (void) lck_rw_lock_exclusive_internal_inline(lock, caller: __builtin_return_address(0), NULL);
1218}
1219
1220/*!
1221 * @function lck_rw_lock_exclusive_b
1222 *
1223 * @abstract
1224 * Locks a rw_lock in exclusive mode. Returns early if the lock can't be acquired
1225 * and the specified block returns true.
1226 *
1227 * @discussion
1228 * Identical to lck_rw_lock_exclusive() but can return early if the lock can't be
1229 * acquired and the specified block returns true. The block is called
1230 * repeatedly when waiting to acquire the lock.
1231 * Should only be called when the lock cannot sleep (i.e. when
1232 * lock->lck_rw_can_sleep is false).
1233 *
1234 * @param lock rw_lock to lock.
1235 * @param lock_pause block invoked while waiting to acquire lock
1236 *
1237 * @returns Returns TRUE if the lock is successfully taken,
1238 * FALSE if the block returns true and the lock has
1239 * not been acquired.
1240 */
1241boolean_t
1242lck_rw_lock_exclusive_b(
1243 lck_rw_t *lock,
1244 bool (^lock_pause)(void))
1245{
1246 assert(!lock->lck_rw_can_sleep);
1247
1248 return lck_rw_lock_exclusive_internal_inline(lock, caller: __builtin_return_address(0), lock_pause);
1249}
1250
1251/*
1252 * Routine: lck_rw_lock_shared_gen
1253 * Function:
1254 * Fast path code has determined that this lock
1255 * is held exclusively... this is where we spin/block
1256 * until we can acquire the lock in the shared mode
1257 */
1258static boolean_t
1259lck_rw_lock_shared_gen(
1260 lck_rw_t *lck,
1261 bool (^lock_pause)(void))
1262{
1263 __assert_only thread_t self = current_thread();
1264 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1265 lck_rw_word_t word;
1266 lck_rw_grab_state_t grab_state = LCK_RW_GRAB_S_NOT_LOCKED;
1267 int slept = 0;
1268 wait_result_t res = 0;
1269 boolean_t istate;
1270
1271#if CONFIG_DTRACE
1272 uint64_t wait_interval = 0;
1273 int readers_at_sleep = 0;
1274 boolean_t dtrace_ls_initialized = FALSE;
1275 boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE;
1276#endif /* CONFIG_DTRACE */
1277
1278 assertf(lck->lck_rw_owner != self->ctid,
1279 "Lock already held state=0x%x, owner=%p",
1280 ordered_load_rw(lck), self);
1281
1282#ifdef DEBUG_RW
1283 /*
1284 * Best effort attempt to check that this thread
1285 * is not already holding the lock in shared mode.
1286 */
1287 assert_canlock_rwlock(lck, self, LCK_RW_TYPE_SHARED);
1288#endif
1289
1290 while (lck_rw_grab(lock: lck, flags: LCK_RW_GRAB_F_SHARED, NULL) != LCK_RW_GRAB_S_LOCKED) {
1291#if CONFIG_DTRACE
1292 if (dtrace_ls_initialized == FALSE) {
1293 dtrace_ls_initialized = TRUE;
1294 dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0);
1295 dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0);
1296 dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block;
1297 if (dtrace_ls_enabled) {
1298 /*
1299 * Either sleeping or spinning is happening,
1300 * start a timing of our delay interval now.
1301 */
1302 readers_at_sleep = lck->lck_rw_shared_count;
1303 wait_interval = mach_absolute_time();
1304 }
1305 }
1306#endif
1307
1308 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START,
1309 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0);
1310
1311 grab_state = lck_rw_grab(lock: lck, flags: LCK_RW_GRAB_F_SHARED | LCK_RW_GRAB_F_WAIT, lock_pause);
1312
1313 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END,
1314 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, grab_state, 0);
1315
1316 if (grab_state == LCK_RW_GRAB_S_LOCKED ||
1317 grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
1318 break;
1319 }
1320
1321 /*
1322 * if we get here, the deadline has expired w/o us
1323 * being able to grab the lock for read
1324 * check to see if we're allowed to do a thread_block
1325 */
1326 if (lck->lck_rw_can_sleep) {
1327 istate = lck_interlock_lock(lck);
1328
1329 word.data = ordered_load_rw(lck);
1330 if ((word.want_excl || word.want_upgrade) &&
1331 ((word.shared_count == 0) || word.priv_excl)) {
1332 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START,
1333 trace_lck, word.want_excl, word.want_upgrade, 0, 0);
1334
1335 word.r_waiting = 1;
1336 ordered_store_rw(lck, word.data);
1337
1338 thread_set_pending_block_hint(current_thread(), block_hint: kThreadWaitKernelRWLockRead);
1339 res = assert_wait(LCK_RW_READER_EVENT(lck),
1340 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1341 lck_interlock_unlock(lck, istate);
1342
1343 if (res == THREAD_WAITING) {
1344 res = thread_block(THREAD_CONTINUE_NULL);
1345 slept++;
1346 }
1347 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END,
1348 trace_lck, res, slept, 0, 0);
1349 } else {
1350 word.shared_count++;
1351 ordered_store_rw(lck, word.data);
1352 lck_interlock_unlock(lck, istate);
1353 break;
1354 }
1355 }
1356 }
1357
1358#if CONFIG_DTRACE
1359 if (dtrace_ls_enabled == TRUE) {
1360 if (slept == 0) {
1361 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
1362 } else {
1363 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
1364 mach_absolute_time() - wait_interval, 0,
1365 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1366 }
1367 }
1368#endif /* CONFIG_DTRACE */
1369
1370 if (grab_state == LCK_RW_GRAB_S_EARLY_RETURN) {
1371 assert(lock_pause);
1372 return FALSE;
1373 }
1374
1375#if CONFIG_DTRACE
1376 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
1377#endif /* CONFIG_DTRACE */
1378
1379 return TRUE;
1380}
1381
1382__attribute__((always_inline))
1383static boolean_t
1384lck_rw_lock_shared_internal_inline(
1385 lck_rw_t *lock,
1386 void *caller,
1387 bool (^lock_pause)(void))
1388{
1389#pragma unused(caller)
1390
1391 uint32_t data, prev;
1392 thread_t thread = current_thread();
1393#ifdef DEBUG_RW
1394 boolean_t check_canlock = TRUE;
1395#endif
1396
1397 if (lock->lck_rw_can_sleep) {
1398 lck_rw_lock_count_inc(thread, lock);
1399 } else if (get_preemption_level() == 0) {
1400 panic("Taking non-sleepable RW lock with preemption enabled");
1401 }
1402
1403 for (;;) {
1404 data = atomic_exchange_begin32(target: &lock->lck_rw_data, previous: &prev, memory_order_acquire_smp);
1405 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) {
1406 atomic_exchange_abort();
1407 if (!lck_rw_lock_shared_gen(lck: lock, lock_pause)) {
1408 /*
1409 * lck_rw_lock_shared_gen() should only return
1410 * early if lock_pause has been passed and
1411 * returns FALSE. lock_pause is exclusive with
1412 * lck_rw_can_sleep().
1413 */
1414 assert(!lock->lck_rw_can_sleep);
1415 return FALSE;
1416 }
1417
1418 goto locked;
1419 }
1420#ifdef DEBUG_RW
1421 if ((data & LCK_RW_SHARED_MASK) == 0) {
1422 /*
1423 * If the lock is uncontended,
1424 * we do not need to check if we can lock it
1425 */
1426 check_canlock = FALSE;
1427 }
1428#endif
1429 data += LCK_RW_SHARED_READER;
1430 if (atomic_exchange_complete32(target: &lock->lck_rw_data, previous: prev, newval: data, memory_order_acquire_smp)) {
1431 break;
1432 }
1433 cpu_pause();
1434 }
1435#ifdef DEBUG_RW
1436 if (check_canlock) {
1437 /*
1438 * Best effort attempt to check that this thread
1439 * is not already holding the lock (this checks read mode too).
1440 */
1441 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1442 }
1443#endif
1444locked:
1445 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1446 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1447
1448#if CONFIG_DTRACE
1449 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1450#endif /* CONFIG_DTRACE */
1451
1452#ifdef DEBUG_RW
1453 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1454#endif /* DEBUG_RW */
1455
1456 return TRUE;
1457}
1458
1459__attribute__((noinline))
1460static void
1461lck_rw_lock_shared_internal(
1462 lck_rw_t *lock,
1463 void *caller)
1464{
1465 (void) lck_rw_lock_shared_internal_inline(lock, caller, NULL);
1466}
1467
1468/*!
1469 * @function lck_rw_lock_shared
1470 *
1471 * @abstract
1472 * Locks a rw_lock in shared mode.
1473 *
1474 * @discussion
1475 * This function can block.
1476 * Multiple threads can acquire the lock in shared mode at the same time, but only one thread at a time
1477 * can acquire it in exclusive mode.
1478 * If the lock is held in shared mode and there are no writers waiting, a reader will be able to acquire
1479 * the lock without waiting.
1480 * If the lock is held in shared mode and there is at least a writer waiting, a reader will wait
1481 * for all the writers to make progress if the lock was initialized with the default settings. Instead if
1482 * RW_SHARED_PRIORITY was selected at initialization time, a reader will never wait if the lock is held
1483 * in shared mode.
1484 * NOTE: the thread cannot return to userspace while the lock is held. Recursive locking is not supported.
1485 *
1486 * @param lock rw_lock to lock.
1487 */
1488void
1489lck_rw_lock_shared(
1490 lck_rw_t *lock)
1491{
1492 (void) lck_rw_lock_shared_internal_inline(lock, caller: __builtin_return_address(0), NULL);
1493}
1494
1495/*!
1496 * @function lck_rw_lock_shared_b
1497 *
1498 * @abstract
1499 * Locks a rw_lock in shared mode. Returns early if the lock can't be acquired
1500 * and the specified block returns true.
1501 *
1502 * @discussion
1503 * Identical to lck_rw_lock_shared() but can return early if the lock can't be
1504 * acquired and the specified block returns true. The block is called
1505 * repeatedly when waiting to acquire the lock.
1506 * Should only be called when the lock cannot sleep (i.e. when
1507 * lock->lck_rw_can_sleep is false).
1508 *
1509 * @param lock rw_lock to lock.
1510 * @param lock_pause block invoked while waiting to acquire lock
1511 *
1512 * @returns Returns TRUE if the lock is successfully taken,
1513 * FALSE if the block returns true and the lock has
1514 * not been acquired.
1515 */
1516boolean_t
1517lck_rw_lock_shared_b(
1518 lck_rw_t *lock,
1519 bool (^lock_pause)(void))
1520{
1521 assert(!lock->lck_rw_can_sleep);
1522
1523 return lck_rw_lock_shared_internal_inline(lock, caller: __builtin_return_address(0), lock_pause);
1524}
1525
1526/*
1527 * Routine: lck_rw_lock_shared_to_exclusive_failure
1528 * Function:
1529 * Fast path code has already dropped our read
1530 * count and determined that someone else owns 'lck_rw_want_upgrade'
1531 * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting'
1532 * all we need to do here is determine if a wakeup is needed
1533 */
1534static boolean_t
1535lck_rw_lock_shared_to_exclusive_failure(
1536 lck_rw_t *lck,
1537 uint32_t prior_lock_state)
1538{
1539 thread_t thread = current_thread();
1540
1541 if ((prior_lock_state & LCK_RW_W_WAITING) &&
1542 ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) {
1543 /*
1544 * Someone else has requested upgrade.
1545 * Since we've released the read lock, wake
1546 * him up if he's blocked waiting
1547 */
1548 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
1549 }
1550
1551 /* Check if dropping the lock means that we need to unpromote */
1552 if (lck->lck_rw_can_sleep) {
1553 lck_rw_lock_count_dec(thread, lock: lck);
1554 }
1555
1556 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE,
1557 VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
1558
1559#ifdef DEBUG_RW
1560 remove_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
1561#endif /* DEBUG_RW */
1562
1563 return FALSE;
1564}
1565
1566/*
1567 * Routine: lck_rw_lock_shared_to_exclusive_success
1568 * Function:
1569 * the fast path code has already dropped our read
1570 * count and successfully acquired 'lck_rw_want_upgrade'
1571 * we just need to wait for the rest of the readers to drain
1572 * and then we can return as the exclusive holder of this lock
1573 */
1574static void
1575lck_rw_lock_shared_to_exclusive_success(
1576 lck_rw_t *lock)
1577{
1578 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock);
1579 int slept = 0;
1580 lck_rw_word_t word;
1581 wait_result_t res;
1582 boolean_t istate;
1583 lck_rw_drain_state_t drain_state;
1584
1585#if CONFIG_DTRACE
1586 uint64_t wait_interval = 0;
1587 int readers_at_sleep = 0;
1588 boolean_t dtrace_ls_initialized = FALSE;
1589 boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE;
1590#endif
1591
1592 while (lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE, NULL) != LCK_RW_DRAIN_S_DRAINED) {
1593 word.data = ordered_load_rw(lock);
1594#if CONFIG_DTRACE
1595 if (dtrace_ls_initialized == FALSE) {
1596 dtrace_ls_initialized = TRUE;
1597 dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0);
1598 dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0);
1599 dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block;
1600 if (dtrace_ls_enabled) {
1601 /*
1602 * Either sleeping or spinning is happening,
1603 * start a timing of our delay interval now.
1604 */
1605 readers_at_sleep = word.shared_count;
1606 wait_interval = mach_absolute_time();
1607 }
1608 }
1609#endif
1610
1611 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START,
1612 trace_lck, word.shared_count, 0, 0, 0);
1613
1614 drain_state = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE, NULL);
1615
1616 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END,
1617 trace_lck, lock->lck_rw_shared_count, 0, 0, 0);
1618
1619 if (drain_state == LCK_RW_DRAIN_S_DRAINED) {
1620 break;
1621 }
1622
1623 /*
1624 * if we get here, the spin deadline in lck_rw_wait_on_status()
1625 * has expired w/o the rw_shared_count having drained to 0
1626 * check to see if we're allowed to do a thread_block
1627 */
1628 if (word.can_sleep) {
1629 istate = lck_interlock_lock(lck: lock);
1630
1631 word.data = ordered_load_rw(lock);
1632 if (word.shared_count != 0) {
1633 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START,
1634 trace_lck, word.shared_count, 0, 0, 0);
1635
1636 word.w_waiting = 1;
1637 ordered_store_rw(lock, word.data);
1638
1639 thread_set_pending_block_hint(current_thread(), block_hint: kThreadWaitKernelRWLockUpgrade);
1640 res = assert_wait(LCK_RW_WRITER_EVENT(lock),
1641 THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1642 lck_interlock_unlock(lck: lock, istate);
1643
1644 if (res == THREAD_WAITING) {
1645 res = thread_block(THREAD_CONTINUE_NULL);
1646 slept++;
1647 }
1648 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END,
1649 trace_lck, res, slept, 0, 0);
1650 } else {
1651 lck_interlock_unlock(lck: lock, istate);
1652 break;
1653 }
1654 }
1655 }
1656#if CONFIG_DTRACE
1657 /*
1658 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
1659 */
1660 if (dtrace_ls_enabled == TRUE) {
1661 if (slept == 0) {
1662 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0);
1663 } else {
1664 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock,
1665 mach_absolute_time() - wait_interval, 1,
1666 (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
1667 }
1668 }
1669 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1);
1670#endif
1671}
1672
1673/*!
1674 * @function lck_rw_lock_shared_to_exclusive
1675 *
1676 * @abstract
1677 * Upgrades a rw_lock held in shared mode to exclusive.
1678 *
1679 * @discussion
1680 * This function can block.
1681 * Only one reader at a time can upgrade to exclusive mode. If the upgrades fails the function will
1682 * return with the lock not held.
1683 * The caller needs to hold the lock in shared mode to upgrade it.
1684 *
1685 * @param lock rw_lock already held in shared mode to upgrade.
1686 *
1687 * @returns TRUE if the lock was upgraded, FALSE if it was not possible.
1688 * If the function was not able to upgrade the lock, the lock will be dropped
1689 * by the function.
1690 */
1691boolean_t
1692lck_rw_lock_shared_to_exclusive(
1693 lck_rw_t *lock)
1694{
1695 thread_t thread = current_thread();
1696 uint32_t data, prev;
1697
1698 assertf(lock->lck_rw_priv_excl != 0, "lock %p thread %p", lock, current_thread());
1699
1700#if DEBUG_RW
1701 assert_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1702#endif /* DEBUG_RW */
1703
1704 for (;;) {
1705 data = atomic_exchange_begin32(target: &lock->lck_rw_data, previous: &prev, memory_order_acquire_smp);
1706 if (data & LCK_RW_INTERLOCK) {
1707 atomic_exchange_abort();
1708 lck_rw_interlock_spin(lock);
1709 continue;
1710 }
1711 if (data & LCK_RW_WANT_UPGRADE) {
1712 data -= LCK_RW_SHARED_READER;
1713 if ((data & LCK_RW_SHARED_MASK) == 0) { /* we were the last reader */
1714 data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */
1715 }
1716 if (atomic_exchange_complete32(target: &lock->lck_rw_data, previous: prev, newval: data, memory_order_acquire_smp)) {
1717 return lck_rw_lock_shared_to_exclusive_failure(lck: lock, prior_lock_state: prev);
1718 }
1719 } else {
1720 data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */
1721 data -= LCK_RW_SHARED_READER; /* and shed our read count */
1722 if (atomic_exchange_complete32(target: &lock->lck_rw_data, previous: prev, newval: data, memory_order_acquire_smp)) {
1723 break;
1724 }
1725 }
1726 cpu_pause();
1727 }
1728 /* we now own the WANT_UPGRADE */
1729 if (data & LCK_RW_SHARED_MASK) { /* check to see if all of the readers are drained */
1730 lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */
1731 }
1732
1733 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1734 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1735
1736 ordered_store_rw_owner(lock, thread->ctid);
1737#if CONFIG_DTRACE
1738 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0);
1739#endif /* CONFIG_DTRACE */
1740
1741#if DEBUG_RW
1742 change_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, __builtin_return_address(0));
1743#endif /* DEBUG_RW */
1744 return TRUE;
1745}
1746
1747/*
1748 * Routine: lck_rw_lock_exclusive_to_shared_gen
1749 * Function:
1750 * Fast path has already dropped
1751 * our exclusive state and bumped lck_rw_shared_count
1752 * all we need to do here is determine if anyone
1753 * needs to be awakened.
1754 */
1755static void
1756lck_rw_lock_exclusive_to_shared_gen(
1757 lck_rw_t *lck,
1758 uint32_t prior_lock_state,
1759 void *caller)
1760{
1761#pragma unused(caller)
1762 __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
1763 lck_rw_word_t fake_lck;
1764
1765 /*
1766 * prior_lock state is a snapshot of the 1st word of the
1767 * lock in question... we'll fake up a pointer to it
1768 * and carefully not access anything beyond whats defined
1769 * in the first word of a lck_rw_t
1770 */
1771 fake_lck.data = prior_lock_state;
1772
1773 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
1774 trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0);
1775
1776 /*
1777 * don't wake up anyone waiting to take the lock exclusively
1778 * since we hold a read count... when the read count drops to 0,
1779 * the writers will be woken.
1780 *
1781 * wake up any waiting readers if we don't have any writers waiting,
1782 * or the lock is NOT marked as rw_priv_excl (writers have privilege)
1783 */
1784 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
1785 thread_wakeup(LCK_RW_READER_EVENT(lck));
1786 }
1787
1788 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
1789 trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);
1790
1791#if CONFIG_DTRACE
1792 LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
1793#endif
1794
1795#if DEBUG_RW
1796 thread_t thread = current_thread();
1797 change_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
1798#endif /* DEBUG_RW */
1799}
1800
1801/*!
1802 * @function lck_rw_lock_exclusive_to_shared
1803 *
1804 * @abstract
1805 * Downgrades a rw_lock held in exclusive mode to shared.
1806 *
1807 * @discussion
1808 * The caller needs to hold the lock in exclusive mode to be able to downgrade it.
1809 *
1810 * @param lock rw_lock already held in exclusive mode to downgrade.
1811 */
1812void
1813lck_rw_lock_exclusive_to_shared(
1814 lck_rw_t *lock)
1815{
1816 uint32_t data, prev;
1817
1818 assertf(lock->lck_rw_owner == current_thread()->ctid,
1819 "state=0x%x, owner=%p", lock->lck_rw_data,
1820 ctid_get_thread_unsafe(lock->lck_rw_owner));
1821 ordered_store_rw_owner(lock, 0);
1822
1823 for (;;) {
1824 data = atomic_exchange_begin32(target: &lock->lck_rw_data, previous: &prev, memory_order_release_smp);
1825 if (data & LCK_RW_INTERLOCK) {
1826 atomic_exchange_abort();
1827 lck_rw_interlock_spin(lock); /* wait for interlock to clear */
1828 continue;
1829 }
1830 data += LCK_RW_SHARED_READER;
1831 if (data & LCK_RW_WANT_UPGRADE) {
1832 data &= ~(LCK_RW_WANT_UPGRADE);
1833 } else {
1834 data &= ~(LCK_RW_WANT_EXCL);
1835 }
1836 if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) {
1837 data &= ~(LCK_RW_W_WAITING);
1838 }
1839 if (atomic_exchange_complete32(target: &lock->lck_rw_data, previous: prev, newval: data, memory_order_release_smp)) {
1840 break;
1841 }
1842 cpu_pause();
1843 }
1844 lck_rw_lock_exclusive_to_shared_gen(lck: lock, prior_lock_state: prev, caller: __builtin_return_address(0));
1845}
1846
1847/*
1848 * Very sad hack, but the codegen for lck_rw_lock
1849 * is very unhappy with the combination of __builtin_return_address()
1850 * and a noreturn function. For some reason it adds more frames
1851 * than it should. rdar://76570684
1852 */
1853void
1854_lck_rw_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
1855#pragma clang diagnostic push
1856#pragma clang diagnostic ignored "-Wmissing-noreturn"
1857__attribute__((noinline, weak))
1858void
1859_lck_rw_lock_type_panic(
1860 lck_rw_t *lck,
1861 lck_rw_type_t lck_rw_type)
1862{
1863 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
1864}
1865#pragma clang diagnostic pop
1866
1867/*!
1868 * @function lck_rw_lock
1869 *
1870 * @abstract
1871 * Locks a rw_lock with the specified type.
1872 *
1873 * @discussion
1874 * See lck_rw_lock_shared() or lck_rw_lock_exclusive() for more details.
1875 *
1876 * @param lck rw_lock to lock.
1877 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
1878 */
1879void
1880lck_rw_lock(
1881 lck_rw_t *lck,
1882 lck_rw_type_t lck_rw_type)
1883{
1884 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
1885 return lck_rw_lock_shared_internal(lock: lck, caller: __builtin_return_address(0));
1886 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
1887 return lck_rw_lock_exclusive_internal(lock: lck, caller: __builtin_return_address(0));
1888 }
1889 _lck_rw_lock_type_panic(lck, lck_rw_type);
1890}
1891
1892__attribute__((always_inline))
1893static boolean_t
1894lck_rw_try_lock_shared_internal_inline(
1895 lck_rw_t *lock,
1896 void *caller)
1897{
1898#pragma unused(caller)
1899
1900 uint32_t data, prev;
1901 thread_t thread = current_thread();
1902#ifdef DEBUG_RW
1903 boolean_t check_canlock = TRUE;
1904#endif
1905
1906 for (;;) {
1907 data = atomic_exchange_begin32(target: &lock->lck_rw_data, previous: &prev, memory_order_acquire_smp);
1908 if (data & LCK_RW_INTERLOCK) {
1909 atomic_exchange_abort();
1910 lck_rw_interlock_spin(lock);
1911 continue;
1912 }
1913 if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
1914 atomic_exchange_abort();
1915 return FALSE; /* lock is busy */
1916 }
1917#ifdef DEBUG_RW
1918 if ((data & LCK_RW_SHARED_MASK) == 0) {
1919 /*
1920 * If the lock is uncontended,
1921 * we do not need to check if we can lock it
1922 */
1923 check_canlock = FALSE;
1924 }
1925#endif
1926 data += LCK_RW_SHARED_READER; /* Increment reader refcount */
1927 if (atomic_exchange_complete32(target: &lock->lck_rw_data, previous: prev, newval: data, memory_order_acquire_smp)) {
1928 break;
1929 }
1930 cpu_pause();
1931 }
1932#ifdef DEBUG_RW
1933 if (check_canlock) {
1934 /*
1935 * Best effort attempt to check that this thread
1936 * is not already holding the lock (this checks read mode too).
1937 */
1938 assert_canlock_rwlock(lock, thread, LCK_RW_TYPE_SHARED);
1939 }
1940#endif
1941 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
1942 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
1943
1944 if (lock->lck_rw_can_sleep) {
1945 lck_rw_lock_count_inc(thread, lock);
1946 } else if (get_preemption_level() == 0) {
1947 panic("Taking non-sleepable RW lock with preemption enabled");
1948 }
1949
1950#if CONFIG_DTRACE
1951 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED);
1952#endif /* CONFIG_DTRACE */
1953
1954#ifdef DEBUG_RW
1955 add_held_rwlock(lock, thread, LCK_RW_TYPE_SHARED, caller);
1956#endif /* DEBUG_RW */
1957 return TRUE;
1958}
1959
1960__attribute__((noinline))
1961static boolean_t
1962lck_rw_try_lock_shared_internal(
1963 lck_rw_t *lock,
1964 void *caller)
1965{
1966 return lck_rw_try_lock_shared_internal_inline(lock, caller);
1967}
1968
1969/*!
1970 * @function lck_rw_try_lock_shared
1971 *
1972 * @abstract
1973 * Tries to locks a rw_lock in read mode.
1974 *
1975 * @discussion
1976 * This function will return and not block in case the lock is already held.
1977 * See lck_rw_lock_shared for more details.
1978 *
1979 * @param lock rw_lock to lock.
1980 *
1981 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
1982 */
1983boolean_t
1984lck_rw_try_lock_shared(
1985 lck_rw_t *lock)
1986{
1987 return lck_rw_try_lock_shared_internal_inline(lock, caller: __builtin_return_address(0));
1988}
1989
1990__attribute__((always_inline))
1991static boolean_t
1992lck_rw_try_lock_exclusive_internal_inline(
1993 lck_rw_t *lock,
1994 void *caller)
1995{
1996#pragma unused(caller)
1997 uint32_t data, prev;
1998
1999 for (;;) {
2000 data = atomic_exchange_begin32(target: &lock->lck_rw_data, previous: &prev, memory_order_acquire_smp);
2001 if (data & LCK_RW_INTERLOCK) {
2002 atomic_exchange_abort();
2003 lck_rw_interlock_spin(lock);
2004 continue;
2005 }
2006 if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) {
2007 atomic_exchange_abort();
2008 return FALSE;
2009 }
2010 data |= LCK_RW_WANT_EXCL;
2011 if (atomic_exchange_complete32(target: &lock->lck_rw_data, previous: prev, newval: data, memory_order_acquire_smp)) {
2012 break;
2013 }
2014 cpu_pause();
2015 }
2016 thread_t thread = current_thread();
2017
2018 if (lock->lck_rw_can_sleep) {
2019 lck_rw_lock_count_inc(thread, lock);
2020 } else if (get_preemption_level() == 0) {
2021 panic("Taking non-sleepable RW lock with preemption enabled");
2022 }
2023
2024 assertf(lock->lck_rw_owner == 0, "state=0x%x, owner=%p",
2025 ordered_load_rw(lock), ctid_get_thread_unsafe(lock->lck_rw_owner));
2026
2027 ordered_store_rw_owner(lock, thread->ctid);
2028#if CONFIG_DTRACE
2029 LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL);
2030#endif /* CONFIG_DTRACE */
2031
2032#ifdef DEBUG_RW
2033 add_held_rwlock(lock, thread, LCK_RW_TYPE_EXCLUSIVE, caller);
2034#endif /* DEBUG_RW */
2035 return TRUE;
2036}
2037
2038__attribute__((noinline))
2039static boolean_t
2040lck_rw_try_lock_exclusive_internal(
2041 lck_rw_t *lock,
2042 void *caller)
2043{
2044 return lck_rw_try_lock_exclusive_internal_inline(lock, caller);
2045}
2046
2047/*!
2048 * @function lck_rw_try_lock_exclusive
2049 *
2050 * @abstract
2051 * Tries to locks a rw_lock in write mode.
2052 *
2053 * @discussion
2054 * This function will return and not block in case the lock is already held.
2055 * See lck_rw_lock_exclusive for more details.
2056 *
2057 * @param lock rw_lock to lock.
2058 *
2059 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
2060 */
2061boolean_t
2062lck_rw_try_lock_exclusive(
2063 lck_rw_t *lock)
2064{
2065 return lck_rw_try_lock_exclusive_internal_inline(lock, caller: __builtin_return_address(0));
2066}
2067
2068/*
2069 * Very sad hack, but the codegen for lck_rw_try_lock
2070 * is very unhappy with the combination of __builtin_return_address()
2071 * and a noreturn function. For some reason it adds more frames
2072 * than it should. rdar://76570684
2073 */
2074boolean_t
2075_lck_rw_try_lock_type_panic(lck_rw_t *lck, lck_rw_type_t lck_rw_type);
2076#pragma clang diagnostic push
2077#pragma clang diagnostic ignored "-Wmissing-noreturn"
2078__attribute__((noinline, weak))
2079boolean_t
2080_lck_rw_try_lock_type_panic(
2081 lck_rw_t *lck,
2082 lck_rw_type_t lck_rw_type)
2083{
2084 panic("lck_rw_lock(): Invalid RW lock type: %x for lock %p", lck_rw_type, lck);
2085}
2086#pragma clang diagnostic pop
2087
2088/*!
2089 * @function lck_rw_try_lock
2090 *
2091 * @abstract
2092 * Tries to locks a rw_lock with the specified type.
2093 *
2094 * @discussion
2095 * This function will return and not wait/block in case the lock is already held.
2096 * See lck_rw_try_lock_shared() or lck_rw_try_lock_exclusive() for more details.
2097 *
2098 * @param lck rw_lock to lock.
2099 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
2100 *
2101 * @returns TRUE if the lock is successfully acquired, FALSE in case it was already held.
2102 */
2103boolean_t
2104lck_rw_try_lock(
2105 lck_rw_t *lck,
2106 lck_rw_type_t lck_rw_type)
2107{
2108 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
2109 return lck_rw_try_lock_shared_internal(lock: lck, caller: __builtin_return_address(0));
2110 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
2111 return lck_rw_try_lock_exclusive_internal(lock: lck, caller: __builtin_return_address(0));
2112 }
2113 return _lck_rw_try_lock_type_panic(lck, lck_rw_type);
2114}
2115
2116/*
2117 * Routine: lck_rw_done_gen
2118 *
2119 * prior_lock_state is the value in the 1st
2120 * word of the lock at the time of a successful
2121 * atomic compare and exchange with the new value...
2122 * it represents the state of the lock before we
2123 * decremented the rw_shared_count or cleared either
2124 * rw_want_upgrade or rw_want_write and
2125 * the lck_x_waiting bits... since the wrapper
2126 * routine has already changed the state atomically,
2127 * we just need to decide if we should
2128 * wake up anyone and what value to return... we do
2129 * this by examining the state of the lock before
2130 * we changed it
2131 */
2132static lck_rw_type_t
2133lck_rw_done_gen(
2134 lck_rw_t *lck,
2135 uint32_t prior_lock_state)
2136{
2137 lck_rw_word_t fake_lck;
2138 lck_rw_type_t lock_type;
2139 thread_t thread;
2140
2141 /*
2142 * prior_lock state is a snapshot of the 1st word of the
2143 * lock in question... we'll fake up a pointer to it
2144 * and carefully not access anything beyond whats defined
2145 * in the first word of a lck_rw_t
2146 */
2147 fake_lck.data = prior_lock_state;
2148
2149 if (fake_lck.shared_count <= 1) {
2150 if (fake_lck.w_waiting) {
2151 thread_wakeup(LCK_RW_WRITER_EVENT(lck));
2152 }
2153
2154 if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) {
2155 thread_wakeup(LCK_RW_READER_EVENT(lck));
2156 }
2157 }
2158 if (fake_lck.shared_count) {
2159 lock_type = LCK_RW_TYPE_SHARED;
2160 } else {
2161 lock_type = LCK_RW_TYPE_EXCLUSIVE;
2162 }
2163
2164 /* Check if dropping the lock means that we need to unpromote */
2165 thread = current_thread();
2166 if (fake_lck.can_sleep) {
2167 lck_rw_lock_count_dec(thread, lock: lck);
2168 }
2169
2170#if CONFIG_DTRACE
2171 LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1);
2172#endif
2173
2174#ifdef DEBUG_RW
2175 remove_held_rwlock(lck, thread, lock_type);
2176#endif /* DEBUG_RW */
2177 return lock_type;
2178}
2179
2180/*!
2181 * @function lck_rw_done
2182 *
2183 * @abstract
2184 * Force unlocks a rw_lock without consistency checks.
2185 *
2186 * @discussion
2187 * Do not use unless sure you can avoid consistency checks.
2188 *
2189 * @param lock rw_lock to unlock.
2190 */
2191lck_rw_type_t
2192lck_rw_done(
2193 lck_rw_t *lock)
2194{
2195 uint32_t data, prev;
2196 boolean_t once = FALSE;
2197
2198#ifdef DEBUG_RW
2199 /*
2200 * Best effort attempt to check that this thread
2201 * is holding the lock.
2202 */
2203 thread_t thread = current_thread();
2204 assert_held_rwlock(lock, thread, 0);
2205#endif /* DEBUG_RW */
2206 for (;;) {
2207 data = atomic_exchange_begin32(target: &lock->lck_rw_data, previous: &prev, memory_order_release_smp);
2208 if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */
2209 atomic_exchange_abort();
2210 lck_rw_interlock_spin(lock);
2211 continue;
2212 }
2213 if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */
2214 assertf(lock->lck_rw_owner == 0,
2215 "state=0x%x, owner=%p", lock->lck_rw_data,
2216 ctid_get_thread_unsafe(lock->lck_rw_owner));
2217 data -= LCK_RW_SHARED_READER;
2218 if ((data & LCK_RW_SHARED_MASK) == 0) { /* if reader count has now gone to 0, check for waiters */
2219 goto check_waiters;
2220 }
2221 } else { /* if reader count == 0, must be exclusive lock */
2222 if (data & LCK_RW_WANT_UPGRADE) {
2223 data &= ~(LCK_RW_WANT_UPGRADE);
2224 } else {
2225 if (data & LCK_RW_WANT_EXCL) {
2226 data &= ~(LCK_RW_WANT_EXCL);
2227 } else { /* lock is not 'owned', panic */
2228 panic("Releasing non-exclusive RW lock without a reader refcount!");
2229 }
2230 }
2231 if (!once) {
2232 // Only check for holder and clear it once
2233 assertf(lock->lck_rw_owner == current_thread()->ctid,
2234 "state=0x%x, owner=%p", lock->lck_rw_data,
2235 ctid_get_thread_unsafe(lock->lck_rw_owner));
2236 ordered_store_rw_owner(lock, 0);
2237 once = TRUE;
2238 }
2239check_waiters:
2240 /*
2241 * test the original values to match what
2242 * lck_rw_done_gen is going to do to determine
2243 * which wakeups need to happen...
2244 *
2245 * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting)
2246 */
2247 if (prev & LCK_RW_W_WAITING) {
2248 data &= ~(LCK_RW_W_WAITING);
2249 if ((prev & LCK_RW_PRIV_EXCL) == 0) {
2250 data &= ~(LCK_RW_R_WAITING);
2251 }
2252 } else {
2253 data &= ~(LCK_RW_R_WAITING);
2254 }
2255 }
2256 if (atomic_exchange_complete32(target: &lock->lck_rw_data, previous: prev, newval: data, memory_order_release_smp)) {
2257 break;
2258 }
2259 cpu_pause();
2260 }
2261 return lck_rw_done_gen(lck: lock, prior_lock_state: prev);
2262}
2263
2264/*!
2265 * @function lck_rw_unlock_shared
2266 *
2267 * @abstract
2268 * Unlocks a rw_lock previously locked in shared mode.
2269 *
2270 * @discussion
2271 * The same thread that locked the lock needs to unlock it.
2272 *
2273 * @param lck rw_lock held in shared mode to unlock.
2274 */
2275void
2276lck_rw_unlock_shared(
2277 lck_rw_t *lck)
2278{
2279 lck_rw_type_t ret;
2280
2281 assertf(lck->lck_rw_owner == 0,
2282 "state=0x%x, owner=%p", lck->lck_rw_data,
2283 ctid_get_thread_unsafe(lck->lck_rw_owner));
2284 assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x", lck->lck_rw_shared_count);
2285 ret = lck_rw_done(lock: lck);
2286
2287 if (ret != LCK_RW_TYPE_SHARED) {
2288 panic("lck_rw_unlock_shared(): lock %p held in mode: %d", lck, ret);
2289 }
2290}
2291
2292/*!
2293 * @function lck_rw_unlock_exclusive
2294 *
2295 * @abstract
2296 * Unlocks a rw_lock previously locked in exclusive mode.
2297 *
2298 * @discussion
2299 * The same thread that locked the lock needs to unlock it.
2300 *
2301 * @param lck rw_lock held in exclusive mode to unlock.
2302 */
2303void
2304lck_rw_unlock_exclusive(
2305 lck_rw_t *lck)
2306{
2307 lck_rw_type_t ret;
2308
2309 assertf(lck->lck_rw_owner == current_thread()->ctid,
2310 "state=0x%x, owner=%p", lck->lck_rw_data,
2311 ctid_get_thread_unsafe(lck->lck_rw_owner));
2312 ret = lck_rw_done(lock: lck);
2313
2314 if (ret != LCK_RW_TYPE_EXCLUSIVE) {
2315 panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d", lck, ret);
2316 }
2317}
2318
2319/*!
2320 * @function lck_rw_unlock
2321 *
2322 * @abstract
2323 * Unlocks a rw_lock previously locked with lck_rw_type.
2324 *
2325 * @discussion
2326 * The lock must be unlocked by the same thread it was locked from.
2327 * The type of the lock/unlock have to match, unless an upgrade/downgrade was performed while
2328 * holding the lock.
2329 *
2330 * @param lck rw_lock to unlock.
2331 * @param lck_rw_type LCK_RW_TYPE_SHARED or LCK_RW_TYPE_EXCLUSIVE
2332 */
2333void
2334lck_rw_unlock(
2335 lck_rw_t *lck,
2336 lck_rw_type_t lck_rw_type)
2337{
2338 if (lck_rw_type == LCK_RW_TYPE_SHARED) {
2339 lck_rw_unlock_shared(lck);
2340 } else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) {
2341 lck_rw_unlock_exclusive(lck);
2342 } else {
2343 panic("lck_rw_unlock(): Invalid RW lock type: %d", lck_rw_type);
2344 }
2345}
2346
2347/*!
2348 * @function lck_rw_assert
2349 *
2350 * @abstract
2351 * Asserts the rw_lock is held.
2352 *
2353 * @discussion
2354 * read-write locks do not have a concept of ownership when held in shared mode,
2355 * so this function merely asserts that someone is holding the lock, not necessarily the caller.
2356 * However if rw_lock_debug is on, a best effort mechanism to track the owners is in place, and
2357 * this function can be more accurate.
2358 * Type can be LCK_RW_ASSERT_SHARED, LCK_RW_ASSERT_EXCLUSIVE, LCK_RW_ASSERT_HELD
2359 * LCK_RW_ASSERT_NOTHELD.
2360 *
2361 * @param lck rw_lock to check.
2362 * @param type assert type
2363 */
2364void
2365lck_rw_assert(
2366 lck_rw_t *lck,
2367 unsigned int type)
2368{
2369 thread_t thread = current_thread();
2370
2371 switch (type) {
2372 case LCK_RW_ASSERT_SHARED:
2373 if ((lck->lck_rw_shared_count != 0) &&
2374 (lck->lck_rw_owner == 0)) {
2375#if DEBUG_RW
2376 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2377#endif /* DEBUG_RW */
2378 return;
2379 }
2380 break;
2381 case LCK_RW_ASSERT_EXCLUSIVE:
2382 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2383 (lck->lck_rw_shared_count == 0) &&
2384 (lck->lck_rw_owner == thread->ctid)) {
2385#if DEBUG_RW
2386 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2387#endif /* DEBUG_RW */
2388 return;
2389 }
2390 break;
2391 case LCK_RW_ASSERT_HELD:
2392 if (lck->lck_rw_shared_count != 0) {
2393#if DEBUG_RW
2394 assert_held_rwlock(lck, thread, LCK_RW_TYPE_SHARED);
2395#endif /* DEBUG_RW */
2396 return; // Held shared
2397 }
2398 if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2399 (lck->lck_rw_owner == thread->ctid)) {
2400#if DEBUG_RW
2401 assert_held_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2402#endif /* DEBUG_RW */
2403 return; // Held exclusive
2404 }
2405 break;
2406 case LCK_RW_ASSERT_NOTHELD:
2407 if ((lck->lck_rw_shared_count == 0) &&
2408 !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
2409 (lck->lck_rw_owner == 0)) {
2410#ifdef DEBUG_RW
2411 assert_canlock_rwlock(lck, thread, LCK_RW_TYPE_EXCLUSIVE);
2412#endif /* DEBUG_RW */
2413 return;
2414 }
2415 break;
2416 default:
2417 break;
2418 }
2419 panic("rw lock (%p)%s held (mode=%u)", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type);
2420}
2421
2422/*!
2423 * @function kdp_lck_rw_lock_is_acquired_exclusive
2424 *
2425 * @abstract
2426 * Checks if a rw_lock is held exclusevely.
2427 *
2428 * @discussion
2429 * NOT SAFE: To be used only by kernel debugger to avoid deadlock.
2430 *
2431 * @param lck lock to check
2432 *
2433 * @returns TRUE if the lock is held exclusevely
2434 */
2435boolean_t
2436kdp_lck_rw_lock_is_acquired_exclusive(
2437 lck_rw_t *lck)
2438{
2439 if (not_in_kdp) {
2440 panic("panic: rw lock exclusive check done outside of kernel debugger");
2441 }
2442 return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE;
2443}
2444
2445void
2446kdp_rwlck_find_owner(
2447 __unused struct waitq *waitq,
2448 event64_t event,
2449 thread_waitinfo_t *waitinfo)
2450{
2451 lck_rw_t *rwlck = NULL;
2452 switch (waitinfo->wait_type) {
2453 case kThreadWaitKernelRWLockRead:
2454 rwlck = READ_EVENT_TO_RWLOCK(event);
2455 break;
2456 case kThreadWaitKernelRWLockWrite:
2457 case kThreadWaitKernelRWLockUpgrade:
2458 rwlck = WRITE_EVENT_TO_RWLOCK(event);
2459 break;
2460 default:
2461 panic("%s was called with an invalid blocking type", __FUNCTION__);
2462 break;
2463 }
2464 waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck);
2465 waitinfo->owner = thread_tid(thread: ctid_get_thread(ctid: rwlck->lck_rw_owner));
2466}
2467
2468/*!
2469 * @function lck_rw_lock_yield_shared
2470 *
2471 * @abstract
2472 * Yields a rw_lock held in shared mode.
2473 *
2474 * @discussion
2475 * This function can block.
2476 * Yields the lock in case there are writers waiting.
2477 * The yield will unlock, block, and re-lock the lock in shared mode.
2478 *
2479 * @param lck rw_lock already held in shared mode to yield.
2480 * @param force_yield if set to true it will always yield irrespective of the lock status
2481 *
2482 * @returns TRUE if the lock was yield, FALSE otherwise
2483 */
2484bool
2485lck_rw_lock_yield_shared(
2486 lck_rw_t *lck,
2487 boolean_t force_yield)
2488{
2489 lck_rw_word_t word;
2490
2491 lck_rw_assert(lck, LCK_RW_ASSERT_SHARED);
2492
2493 word.data = ordered_load_rw(lck);
2494 if (word.want_excl || word.want_upgrade || force_yield) {
2495 lck_rw_unlock_shared(lck);
2496 mutex_pause(2);
2497 lck_rw_lock_shared(lock: lck);
2498 return true;
2499 }
2500
2501 return false;
2502}
2503
2504/*!
2505 * @function lck_rw_lock_yield_exclusive
2506 *
2507 * @abstract
2508 * Yields a rw_lock held in exclusive mode.
2509 *
2510 * @discussion
2511 * This function can block.
2512 * Yields the lock in case there are writers waiting.
2513 * The yield will unlock, block, and re-lock the lock in exclusive mode.
2514 *
2515 * @param lck rw_lock already held in exclusive mode to yield.
2516 * @param mode when to yield.
2517 *
2518 * @returns TRUE if the lock was yield, FALSE otherwise
2519 */
2520bool
2521lck_rw_lock_yield_exclusive(
2522 lck_rw_t *lck,
2523 lck_rw_yield_t mode)
2524{
2525 lck_rw_word_t word;
2526 bool yield = false;
2527
2528 lck_rw_assert(lck, LCK_RW_ASSERT_EXCLUSIVE);
2529
2530 if (mode == LCK_RW_YIELD_ALWAYS) {
2531 yield = true;
2532 } else {
2533 word.data = ordered_load_rw(lck);
2534 if (word.w_waiting) {
2535 yield = true;
2536 } else if (mode == LCK_RW_YIELD_ANY_WAITER) {
2537 yield = (word.r_waiting != 0);
2538 }
2539 }
2540
2541 if (yield) {
2542 lck_rw_unlock_exclusive(lck);
2543 mutex_pause(2);
2544 lck_rw_lock_exclusive(lock: lck);
2545 }
2546
2547 return yield;
2548}
2549
2550/*!
2551 * @function lck_rw_sleep
2552 *
2553 * @abstract
2554 * Assert_wait on an event while holding the rw_lock.
2555 *
2556 * @discussion
2557 * the flags can decide how to re-acquire the lock upon wake up
2558 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2559 * and if the priority needs to be kept boosted until the lock is
2560 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2561 *
2562 * @param lck rw_lock to use to synch the assert_wait.
2563 * @param lck_sleep_action flags.
2564 * @param event event to assert_wait on.
2565 * @param interruptible wait type.
2566 */
2567wait_result_t
2568lck_rw_sleep(
2569 lck_rw_t *lck,
2570 lck_sleep_action_t lck_sleep_action,
2571 event_t event,
2572 wait_interrupt_t interruptible)
2573{
2574 wait_result_t res;
2575 lck_rw_type_t lck_rw_type;
2576 thread_pri_floor_t token;
2577
2578 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2579 panic("Invalid lock sleep action %x", lck_sleep_action);
2580 }
2581
2582 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2583 /*
2584 * Although we are dropping the RW lock, the intent in most cases
2585 * is that this thread remains as an observer, since it may hold
2586 * some secondary resource, but must yield to avoid deadlock. In
2587 * this situation, make sure that the thread is boosted to the
2588 * ceiling while blocked, so that it can re-acquire the
2589 * RW lock at that priority.
2590 */
2591 token = thread_priority_floor_start();
2592 }
2593
2594 res = assert_wait(event, interruptible);
2595 if (res == THREAD_WAITING) {
2596 lck_rw_type = lck_rw_done(lock: lck);
2597 res = thread_block(THREAD_CONTINUE_NULL);
2598 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2599 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2600 lck_rw_lock(lck, lck_rw_type);
2601 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2602 lck_rw_lock_exclusive(lock: lck);
2603 } else {
2604 lck_rw_lock_shared(lock: lck);
2605 }
2606 }
2607 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2608 (void)lck_rw_done(lock: lck);
2609 }
2610
2611 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2612 thread_priority_floor_end(token: &token);
2613 }
2614
2615 return res;
2616}
2617
2618/*!
2619 * @function lck_rw_sleep_deadline
2620 *
2621 * @abstract
2622 * Assert_wait_deadline on an event while holding the rw_lock.
2623 *
2624 * @discussion
2625 * the flags can decide how to re-acquire the lock upon wake up
2626 * (LCK_SLEEP_SHARED, or LCK_SLEEP_EXCLUSIVE, or LCK_SLEEP_UNLOCK)
2627 * and if the priority needs to be kept boosted until the lock is
2628 * re-acquired (LCK_SLEEP_PROMOTED_PRI).
2629 *
2630 * @param lck rw_lock to use to synch the assert_wait.
2631 * @param lck_sleep_action flags.
2632 * @param event event to assert_wait on.
2633 * @param interruptible wait type.
2634 * @param deadline maximum time after which being woken up
2635 */
2636wait_result_t
2637lck_rw_sleep_deadline(
2638 lck_rw_t *lck,
2639 lck_sleep_action_t lck_sleep_action,
2640 event_t event,
2641 wait_interrupt_t interruptible,
2642 uint64_t deadline)
2643{
2644 wait_result_t res;
2645 lck_rw_type_t lck_rw_type;
2646 thread_pri_floor_t token;
2647
2648 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0) {
2649 panic("Invalid lock sleep action %x", lck_sleep_action);
2650 }
2651
2652 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2653 token = thread_priority_floor_start();
2654 }
2655
2656 res = assert_wait_deadline(event, interruptible, deadline);
2657 if (res == THREAD_WAITING) {
2658 lck_rw_type = lck_rw_done(lock: lck);
2659 res = thread_block(THREAD_CONTINUE_NULL);
2660 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
2661 if (!(lck_sleep_action & (LCK_SLEEP_SHARED | LCK_SLEEP_EXCLUSIVE))) {
2662 lck_rw_lock(lck, lck_rw_type);
2663 } else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE) {
2664 lck_rw_lock_exclusive(lock: lck);
2665 } else {
2666 lck_rw_lock_shared(lock: lck);
2667 }
2668 }
2669 } else if (lck_sleep_action & LCK_SLEEP_UNLOCK) {
2670 (void)lck_rw_done(lock: lck);
2671 }
2672
2673 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
2674 thread_priority_floor_end(token: &token);
2675 }
2676
2677 return res;
2678}
2679
2680/*
2681 * Reader-writer lock promotion
2682 *
2683 * We support a limited form of reader-writer
2684 * lock promotion whose effects are:
2685 *
2686 * * Qualifying threads have decay disabled
2687 * * Scheduler priority is reset to a floor of
2688 * of their statically assigned priority
2689 * or MINPRI_RWLOCK
2690 *
2691 * The rationale is that lck_rw_ts do not have
2692 * a single owner, so we cannot apply a directed
2693 * priority boost from all waiting threads
2694 * to all holding threads without maintaining
2695 * lists of all shared owners and all waiting
2696 * threads for every lock.
2697 *
2698 * Instead (and to preserve the uncontended fast-
2699 * path), acquiring (or attempting to acquire)
2700 * a RW lock in shared or exclusive lock increments
2701 * a per-thread counter. Only if that thread stops
2702 * making forward progress (for instance blocking
2703 * on a mutex, or being preempted) do we consult
2704 * the counter and apply the priority floor.
2705 * When the thread becomes runnable again (or in
2706 * the case of preemption it never stopped being
2707 * runnable), it has the priority boost and should
2708 * be in a good position to run on the CPU and
2709 * release all RW locks (at which point the priority
2710 * boost is cleared).
2711 *
2712 * Care must be taken to ensure that priority
2713 * boosts are not retained indefinitely, since unlike
2714 * mutex priority boosts (where the boost is tied
2715 * to the mutex lifecycle), the boost is tied
2716 * to the thread and independent of any particular
2717 * lck_rw_t. Assertions are in place on return
2718 * to userspace so that the boost is not held
2719 * indefinitely.
2720 *
2721 * The routines that increment/decrement the
2722 * per-thread counter should err on the side of
2723 * incrementing any time a preemption is possible
2724 * and the lock would be visible to the rest of the
2725 * system as held (so it should be incremented before
2726 * interlocks are dropped/preemption is enabled, or
2727 * before a CAS is executed to acquire the lock).
2728 *
2729 */
2730
2731/*!
2732 * @function lck_rw_clear_promotion
2733 *
2734 * @abstract
2735 * Undo priority promotions when the last rw_lock
2736 * is released by a thread (if a promotion was active).
2737 *
2738 * @param thread thread to demote.
2739 * @param lock object reason for the demotion.
2740 */
2741__attribute__((noinline))
2742static void
2743lck_rw_clear_promotion(thread_t thread, const void *lock)
2744{
2745 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
2746 spl_t s = splsched();
2747 thread_lock(thread);
2748
2749 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
2750 sched_thread_unpromote_reason(thread, TH_SFLAG_RW_PROMOTED,
2751 trace_obj: unslide_for_kdebug(object: lock));
2752 }
2753
2754 thread_unlock(thread);
2755 splx(s);
2756}
2757
2758/*!
2759 * @function lck_rw_set_promotion_locked
2760 *
2761 * @abstract
2762 * Callout from context switch if the thread goes
2763 * off core with a positive rwlock_count.
2764 *
2765 * @discussion
2766 * Called at splsched with the thread locked.
2767 *
2768 * @param thread thread to promote.
2769 */
2770__attribute__((always_inline))
2771void
2772lck_rw_set_promotion_locked(thread_t thread)
2773{
2774 if (LcksOpts & LCK_OPTION_DISABLE_RW_PRIO) {
2775 return;
2776 }
2777
2778 assert(thread->rwlock_count > 0);
2779
2780 if (!(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2781 sched_thread_promote_reason(thread, TH_SFLAG_RW_PROMOTED, trace_obj: 0);
2782 }
2783}
2784
2785__attribute__((always_inline))
2786void
2787lck_rw_lock_count_inc(thread_t thread, const void *lock __unused)
2788{
2789 if (thread->rwlock_count++ == 0) {
2790#if MACH_ASSERT
2791 /*
2792 * Set the ast to check that the
2793 * rwlock_count is going to be set to zero when
2794 * going back to userspace.
2795 * Set it only once when we increment it for the first time.
2796 */
2797 act_set_debug_assert();
2798#endif
2799 }
2800}
2801
2802__abortlike
2803static void
2804__lck_rw_lock_count_dec_panic(thread_t thread)
2805{
2806 panic("rw lock count underflow for thread %p", thread);
2807}
2808
2809__attribute__((always_inline))
2810void
2811lck_rw_lock_count_dec(thread_t thread, const void *lock)
2812{
2813 uint32_t rwlock_count = thread->rwlock_count--;
2814
2815 if (rwlock_count == 0) {
2816 __lck_rw_lock_count_dec_panic(thread);
2817 }
2818
2819 if (__probable(rwlock_count == 1)) {
2820 /* sched_flags checked without lock, but will be rechecked while clearing */
2821 if (__improbable(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
2822 lck_rw_clear_promotion(thread, lock);
2823 }
2824 }
2825}
2826