1 | /* |
2 | * Copyright (c) 2007-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * @OSF_COPYRIGHT@ |
30 | */ |
31 | /* |
32 | * Mach Operating System Copyright (c) 1991,1990,1989,1988,1987 Carnegie |
33 | * Mellon University All Rights Reserved. |
34 | * |
35 | * Permission to use, copy, modify and distribute this software and its |
36 | * documentation is hereby granted, provided that both the copyright notice |
37 | * and this permission notice appear in all copies of the software, |
38 | * derivative works or modified versions, and any portions thereof, and that |
39 | * both notices appear in supporting documentation. |
40 | * |
41 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. |
42 | * CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES |
43 | * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
44 | * |
45 | * Carnegie Mellon requests users of this software to return to |
46 | * |
47 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
48 | * School of Computer Science Carnegie Mellon University Pittsburgh PA |
49 | * 15213-3890 |
50 | * |
51 | * any improvements or extensions that they make and grant Carnegie Mellon the |
52 | * rights to redistribute these changes. |
53 | */ |
54 | /* |
55 | * File: kern/lock.c |
56 | * Author: Avadis Tevanian, Jr., Michael Wayne Young |
57 | * Date: 1985 |
58 | * |
59 | * Locking primitives implementation |
60 | */ |
61 | |
62 | #define ATOMIC_PRIVATE 1 |
63 | #define LOCK_PRIVATE 1 |
64 | |
65 | #include <mach_ldebug.h> |
66 | |
67 | #include <kern/kalloc.h> |
68 | #include <kern/locks.h> |
69 | #include <kern/misc_protos.h> |
70 | #include <kern/thread.h> |
71 | #include <kern/processor.h> |
72 | #include <kern/sched_prim.h> |
73 | #include <kern/xpr.h> |
74 | #include <kern/debug.h> |
75 | #include <kern/kcdata.h> |
76 | #include <string.h> |
77 | |
78 | #include <arm/cpu_data_internal.h> |
79 | #include <arm/proc_reg.h> |
80 | #include <arm/smp.h> |
81 | #include <machine/atomic.h> |
82 | #include <machine/machine_cpu.h> |
83 | |
84 | #include <sys/kdebug.h> |
85 | |
86 | /* |
87 | * We need only enough declarations from the BSD-side to be able to |
88 | * test if our probe is active, and to call __dtrace_probe(). Setting |
89 | * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in. |
90 | */ |
91 | #if CONFIG_DTRACE |
92 | #define NEED_DTRACE_DEFS |
93 | #include <../bsd/sys/lockstat.h> |
94 | |
95 | #define DTRACE_RW_SHARED 0x0 //reader |
96 | #define DTRACE_RW_EXCL 0x1 //writer |
97 | #define DTRACE_NO_FLAG 0x0 //not applicable |
98 | |
99 | #endif /* CONFIG_DTRACE */ |
100 | |
101 | #define LCK_RW_LCK_EXCLUSIVE_CODE 0x100 |
102 | #define LCK_RW_LCK_EXCLUSIVE1_CODE 0x101 |
103 | #define LCK_RW_LCK_SHARED_CODE 0x102 |
104 | #define LCK_RW_LCK_SH_TO_EX_CODE 0x103 |
105 | #define LCK_RW_LCK_SH_TO_EX1_CODE 0x104 |
106 | #define LCK_RW_LCK_EX_TO_SH_CODE 0x105 |
107 | |
108 | |
109 | #define ANY_LOCK_DEBUG (USLOCK_DEBUG || LOCK_DEBUG || MUTEX_DEBUG) |
110 | |
111 | // Panic in tests that check lock usage correctness |
112 | // These are undesirable when in a panic or a debugger is runnning. |
113 | #define LOCK_CORRECTNESS_PANIC() (kernel_debugger_entry_count == 0) |
114 | |
115 | unsigned int LcksOpts = 0; |
116 | |
117 | #if CONFIG_DTRACE && __SMP__ |
118 | extern uint64_t dtrace_spin_threshold; |
119 | #endif |
120 | |
121 | /* Forwards */ |
122 | |
123 | |
124 | #if USLOCK_DEBUG |
125 | /* |
126 | * Perform simple lock checks. |
127 | */ |
128 | int uslock_check = 1; |
129 | int max_lock_loops = 100000000; |
130 | decl_simple_lock_data(extern, printf_lock) |
131 | decl_simple_lock_data(extern, panic_lock) |
132 | #endif /* USLOCK_DEBUG */ |
133 | |
134 | extern unsigned int not_in_kdp; |
135 | |
136 | /* |
137 | * We often want to know the addresses of the callers |
138 | * of the various lock routines. However, this information |
139 | * is only used for debugging and statistics. |
140 | */ |
141 | typedef void *pc_t; |
142 | #define INVALID_PC ((void *) VM_MAX_KERNEL_ADDRESS) |
143 | #define INVALID_THREAD ((void *) VM_MAX_KERNEL_ADDRESS) |
144 | |
145 | #ifdef lint |
146 | /* |
147 | * Eliminate lint complaints about unused local pc variables. |
148 | */ |
149 | #define OBTAIN_PC(pc,l) ++pc |
150 | #else /* lint */ |
151 | #define OBTAIN_PC(pc,l) |
152 | #endif /* lint */ |
153 | |
154 | |
155 | /* |
156 | * Portable lock package implementation of usimple_locks. |
157 | */ |
158 | |
159 | #if USLOCK_DEBUG |
160 | #define USLDBG(stmt) stmt |
161 | void usld_lock_init(usimple_lock_t, unsigned short); |
162 | void usld_lock_pre(usimple_lock_t, pc_t); |
163 | void usld_lock_post(usimple_lock_t, pc_t); |
164 | void usld_unlock(usimple_lock_t, pc_t); |
165 | void usld_lock_try_pre(usimple_lock_t, pc_t); |
166 | void usld_lock_try_post(usimple_lock_t, pc_t); |
167 | int usld_lock_common_checks(usimple_lock_t, const char *); |
168 | #else /* USLOCK_DEBUG */ |
169 | #define USLDBG(stmt) |
170 | #endif /* USLOCK_DEBUG */ |
171 | |
172 | /* |
173 | * Owner thread pointer when lock held in spin mode |
174 | */ |
175 | #define LCK_MTX_SPIN_TAG 0xfffffff0 |
176 | |
177 | |
178 | #define interlock_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT) |
179 | #define interlock_try(lock) hw_lock_bit_try((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT) |
180 | #define interlock_unlock(lock) hw_unlock_bit ((hw_lock_bit_t*)(&(lock)->lck_mtx_data), LCK_ILOCK_BIT) |
181 | #define lck_rw_ilk_lock(lock) hw_lock_bit ((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT) |
182 | #define lck_rw_ilk_unlock(lock) hw_unlock_bit((hw_lock_bit_t*)(&(lock)->lck_rw_tag), LCK_RW_INTERLOCK_BIT) |
183 | |
184 | #define memory_barrier() __c11_atomic_thread_fence(memory_order_acq_rel_smp) |
185 | #define load_memory_barrier() __c11_atomic_thread_fence(memory_order_acquire_smp) |
186 | #define store_memory_barrier() __c11_atomic_thread_fence(memory_order_release_smp) |
187 | |
188 | // Enforce program order of loads and stores. |
189 | #define ordered_load(target, type) \ |
190 | __c11_atomic_load((_Atomic type *)(target), memory_order_relaxed) |
191 | #define ordered_store(target, type, value) \ |
192 | __c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed) |
193 | |
194 | #define ordered_load_mtx(lock) ordered_load(&(lock)->lck_mtx_data, uintptr_t) |
195 | #define ordered_store_mtx(lock, value) ordered_store(&(lock)->lck_mtx_data, uintptr_t, (value)) |
196 | #define ordered_load_rw(lock) ordered_load(&(lock)->lck_rw_data, uint32_t) |
197 | #define ordered_store_rw(lock, value) ordered_store(&(lock)->lck_rw_data, uint32_t, (value)) |
198 | #define ordered_load_rw_owner(lock) ordered_load(&(lock)->lck_rw_owner, thread_t) |
199 | #define ordered_store_rw_owner(lock, value) ordered_store(&(lock)->lck_rw_owner, thread_t, (value)) |
200 | #define ordered_load_hw(lock) ordered_load(&(lock)->lock_data, uintptr_t) |
201 | #define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, uintptr_t, (value)) |
202 | #define ordered_load_bit(lock) ordered_load((lock), uint32_t) |
203 | #define ordered_store_bit(lock, value) ordered_store((lock), uint32_t, (value)) |
204 | |
205 | |
206 | // Prevent the compiler from reordering memory operations around this |
207 | #define compiler_memory_fence() __asm__ volatile ("" ::: "memory") |
208 | |
209 | #define LOCK_PANIC_TIMEOUT 0xc00000 |
210 | #define NOINLINE __attribute__((noinline)) |
211 | |
212 | |
213 | #if __arm__ |
214 | #define interrupts_disabled(mask) (mask & PSR_INTMASK) |
215 | #else |
216 | #define interrupts_disabled(mask) (mask & DAIF_IRQF) |
217 | #endif |
218 | |
219 | |
220 | #if __arm__ |
221 | #define enable_fiq() __asm__ volatile ("cpsie f" ::: "memory"); |
222 | #define enable_interrupts() __asm__ volatile ("cpsie if" ::: "memory"); |
223 | #endif |
224 | |
225 | /* |
226 | * Forward declarations |
227 | */ |
228 | |
229 | static void lck_rw_lock_shared_gen(lck_rw_t *lck); |
230 | static void lck_rw_lock_exclusive_gen(lck_rw_t *lck); |
231 | static boolean_t lck_rw_lock_shared_to_exclusive_success(lck_rw_t *lck); |
232 | static boolean_t lck_rw_lock_shared_to_exclusive_failure(lck_rw_t *lck, uint32_t prior_lock_state); |
233 | static void lck_rw_lock_exclusive_to_shared_gen(lck_rw_t *lck, uint32_t prior_lock_state); |
234 | static lck_rw_type_t lck_rw_done_gen(lck_rw_t *lck, uint32_t prior_lock_state); |
235 | static boolean_t lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait); |
236 | |
237 | /* |
238 | * atomic exchange API is a low level abstraction of the operations |
239 | * to atomically read, modify, and write a pointer. This abstraction works |
240 | * for both Intel and ARMv8.1 compare and exchange atomic instructions as |
241 | * well as the ARM exclusive instructions. |
242 | * |
243 | * atomic_exchange_begin() - begin exchange and retrieve current value |
244 | * atomic_exchange_complete() - conclude an exchange |
245 | * atomic_exchange_abort() - cancel an exchange started with atomic_exchange_begin() |
246 | */ |
247 | static uint32_t |
248 | atomic_exchange_begin32(uint32_t *target, uint32_t *previous, enum memory_order ord) |
249 | { |
250 | uint32_t val; |
251 | |
252 | val = load_exclusive32(target, ord); |
253 | *previous = val; |
254 | return val; |
255 | } |
256 | |
257 | static boolean_t |
258 | atomic_exchange_complete32(uint32_t *target, uint32_t previous, uint32_t newval, enum memory_order ord) |
259 | { |
260 | (void)previous; // Previous not needed, monitor is held |
261 | return store_exclusive32(target, newval, ord); |
262 | } |
263 | |
264 | static void |
265 | atomic_exchange_abort(void) |
266 | { |
267 | clear_exclusive(); |
268 | } |
269 | |
270 | static boolean_t |
271 | atomic_test_and_set32(uint32_t *target, uint32_t test_mask, uint32_t set_mask, enum memory_order ord, boolean_t wait) |
272 | { |
273 | uint32_t value, prev; |
274 | |
275 | for ( ; ; ) { |
276 | value = atomic_exchange_begin32(target, &prev, ord); |
277 | if (value & test_mask) { |
278 | if (wait) |
279 | wait_for_event(); // Wait with monitor held |
280 | else |
281 | atomic_exchange_abort(); // Clear exclusive monitor |
282 | return FALSE; |
283 | } |
284 | value |= set_mask; |
285 | if (atomic_exchange_complete32(target, prev, value, ord)) |
286 | return TRUE; |
287 | } |
288 | } |
289 | |
290 | void _disable_preemption(void) |
291 | { |
292 | thread_t thread = current_thread(); |
293 | unsigned int count; |
294 | |
295 | count = thread->machine.preemption_count + 1; |
296 | ordered_store(&thread->machine.preemption_count, unsigned int, count); |
297 | } |
298 | |
299 | void _enable_preemption(void) |
300 | { |
301 | thread_t thread = current_thread(); |
302 | long state; |
303 | unsigned int count; |
304 | #if __arm__ |
305 | #define INTERRUPT_MASK PSR_IRQF |
306 | #else // __arm__ |
307 | #define INTERRUPT_MASK DAIF_IRQF |
308 | #endif // __arm__ |
309 | |
310 | count = thread->machine.preemption_count; |
311 | if (count == 0) |
312 | panic("Preemption count negative" ); // Count will go negative when released |
313 | count--; |
314 | if (count > 0) |
315 | goto update_count; // Preemption is still disabled, just update |
316 | state = get_interrupts(); // Get interrupt state |
317 | if (state & INTERRUPT_MASK) |
318 | goto update_count; // Interrupts are already masked, can't take AST here |
319 | |
320 | disable_interrupts_noread(); // Disable interrupts |
321 | ordered_store(&thread->machine.preemption_count, unsigned int, count); |
322 | if (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) { |
323 | #if __arm__ |
324 | #if __ARM_USER_PROTECT__ |
325 | uintptr_t up = arm_user_protect_begin(thread); |
326 | #endif // __ARM_USER_PROTECT__ |
327 | enable_fiq(); |
328 | #endif // __arm__ |
329 | ast_taken_kernel(); // Handle urgent AST |
330 | #if __arm__ |
331 | #if __ARM_USER_PROTECT__ |
332 | arm_user_protect_end(thread, up, TRUE); |
333 | #endif // __ARM_USER_PROTECT__ |
334 | enable_interrupts(); |
335 | return; // Return early on arm only due to FIQ enabling |
336 | #endif // __arm__ |
337 | } |
338 | restore_interrupts(state); // Enable interrupts |
339 | return; |
340 | |
341 | update_count: |
342 | ordered_store(&thread->machine.preemption_count, unsigned int, count); |
343 | return; |
344 | } |
345 | |
346 | int get_preemption_level(void) |
347 | { |
348 | return current_thread()->machine.preemption_count; |
349 | } |
350 | |
351 | /* Forward declarations for unexported functions that are used externally */ |
352 | void hw_lock_bit(hw_lock_bit_t *lock, unsigned int bit); |
353 | void hw_unlock_bit(hw_lock_bit_t *lock, unsigned int bit); |
354 | |
355 | #if __SMP__ |
356 | static unsigned int |
357 | hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout); |
358 | #endif |
359 | |
360 | static inline unsigned int |
361 | hw_lock_bit_to_internal(hw_lock_bit_t *lock, unsigned int bit, uint32_t timeout) |
362 | { |
363 | unsigned int success = 0; |
364 | uint32_t mask = (1 << bit); |
365 | #if !__SMP__ |
366 | uint32_t state; |
367 | #endif |
368 | |
369 | #if __SMP__ |
370 | if (__improbable(!atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE))) |
371 | success = hw_lock_bit_to_contended(lock, mask, timeout); |
372 | else |
373 | success = 1; |
374 | #else // __SMP__ |
375 | (void)timeout; |
376 | state = ordered_load_bit(lock); |
377 | if (!(mask & state)) { |
378 | ordered_store_bit(lock, state | mask); |
379 | success = 1; |
380 | } |
381 | #endif // __SMP__ |
382 | |
383 | #if CONFIG_DTRACE |
384 | if (success) |
385 | LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, bit); |
386 | #endif |
387 | |
388 | return success; |
389 | } |
390 | |
391 | unsigned int |
392 | hw_lock_bit_to(hw_lock_bit_t *lock, unsigned int bit, uint32_t timeout) |
393 | { |
394 | _disable_preemption(); |
395 | return hw_lock_bit_to_internal(lock, bit, timeout); |
396 | } |
397 | |
398 | #if __SMP__ |
399 | static unsigned int NOINLINE |
400 | hw_lock_bit_to_contended(hw_lock_bit_t *lock, uint32_t mask, uint32_t timeout) |
401 | { |
402 | uint64_t end = 0; |
403 | int i; |
404 | #if CONFIG_DTRACE |
405 | uint64_t begin; |
406 | boolean_t dtrace_enabled = lockstat_probemap[LS_LCK_SPIN_LOCK_SPIN] != 0; |
407 | if (__improbable(dtrace_enabled)) |
408 | begin = mach_absolute_time(); |
409 | #endif |
410 | for ( ; ; ) { |
411 | for (i = 0; i < LOCK_SNOOP_SPINS; i++) { |
412 | // Always load-exclusive before wfe |
413 | // This grabs the monitor and wakes up on a release event |
414 | if (atomic_test_and_set32(lock, mask, mask, memory_order_acquire, TRUE)) { |
415 | goto end; |
416 | } |
417 | } |
418 | if (end == 0) |
419 | end = ml_get_timebase() + timeout; |
420 | else if (ml_get_timebase() >= end) |
421 | break; |
422 | } |
423 | return 0; |
424 | end: |
425 | #if CONFIG_DTRACE |
426 | if (__improbable(dtrace_enabled)) { |
427 | uint64_t spintime = mach_absolute_time() - begin; |
428 | if (spintime > dtrace_spin_threshold) |
429 | LOCKSTAT_RECORD2(LS_LCK_SPIN_LOCK_SPIN, lock, spintime, mask); |
430 | } |
431 | #endif |
432 | return 1; |
433 | } |
434 | #endif // __SMP__ |
435 | |
436 | void |
437 | hw_lock_bit(hw_lock_bit_t *lock, unsigned int bit) |
438 | { |
439 | if (hw_lock_bit_to(lock, bit, LOCK_PANIC_TIMEOUT)) |
440 | return; |
441 | #if __SMP__ |
442 | panic("hw_lock_bit(): timed out (%p)" , lock); |
443 | #else |
444 | panic("hw_lock_bit(): interlock held (%p)" , lock); |
445 | #endif |
446 | } |
447 | |
448 | void |
449 | hw_lock_bit_nopreempt(hw_lock_bit_t *lock, unsigned int bit) |
450 | { |
451 | if (__improbable(get_preemption_level() == 0)) |
452 | panic("Attempt to take no-preempt bitlock %p in preemptible context" , lock); |
453 | if (hw_lock_bit_to_internal(lock, bit, LOCK_PANIC_TIMEOUT)) |
454 | return; |
455 | #if __SMP__ |
456 | panic("hw_lock_bit_nopreempt(): timed out (%p)" , lock); |
457 | #else |
458 | panic("hw_lock_bit_nopreempt(): interlock held (%p)" , lock); |
459 | #endif |
460 | } |
461 | |
462 | unsigned int |
463 | hw_lock_bit_try(hw_lock_bit_t *lock, unsigned int bit) |
464 | { |
465 | uint32_t mask = (1 << bit); |
466 | #if !__SMP__ |
467 | uint32_t state; |
468 | #endif |
469 | boolean_t success = FALSE; |
470 | |
471 | _disable_preemption(); |
472 | #if __SMP__ |
473 | // TODO: consider weak (non-looping) atomic test-and-set |
474 | success = atomic_test_and_set32(lock, mask, mask, memory_order_acquire, FALSE); |
475 | #else |
476 | state = ordered_load_bit(lock); |
477 | if (!(mask & state)) { |
478 | ordered_store_bit(lock, state | mask); |
479 | success = TRUE; |
480 | } |
481 | #endif // __SMP__ |
482 | if (!success) |
483 | _enable_preemption(); |
484 | |
485 | #if CONFIG_DTRACE |
486 | if (success) |
487 | LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, bit); |
488 | #endif |
489 | |
490 | return success; |
491 | } |
492 | |
493 | static inline void |
494 | hw_unlock_bit_internal(hw_lock_bit_t *lock, unsigned int bit) |
495 | { |
496 | uint32_t mask = (1 << bit); |
497 | #if !__SMP__ |
498 | uint32_t state; |
499 | #endif |
500 | |
501 | #if __SMP__ |
502 | __c11_atomic_fetch_and((_Atomic uint32_t *)lock, ~mask, memory_order_release); |
503 | set_event(); |
504 | #else // __SMP__ |
505 | state = ordered_load_bit(lock); |
506 | ordered_store_bit(lock, state & ~mask); |
507 | #endif // __SMP__ |
508 | #if CONFIG_DTRACE |
509 | LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, bit); |
510 | #endif |
511 | } |
512 | |
513 | /* |
514 | * Routine: hw_unlock_bit |
515 | * |
516 | * Release spin-lock. The second parameter is the bit number to test and set. |
517 | * Decrement the preemption level. |
518 | */ |
519 | void |
520 | hw_unlock_bit(hw_lock_bit_t *lock, unsigned int bit) |
521 | { |
522 | hw_unlock_bit_internal(lock, bit); |
523 | _enable_preemption(); |
524 | } |
525 | |
526 | void |
527 | hw_unlock_bit_nopreempt(hw_lock_bit_t *lock, unsigned int bit) |
528 | { |
529 | if (__improbable(get_preemption_level() == 0)) |
530 | panic("Attempt to release no-preempt bitlock %p in preemptible context" , lock); |
531 | hw_unlock_bit_internal(lock, bit); |
532 | } |
533 | |
534 | /* |
535 | * Routine: lck_spin_alloc_init |
536 | */ |
537 | lck_spin_t * |
538 | lck_spin_alloc_init( |
539 | lck_grp_t * grp, |
540 | lck_attr_t * attr) |
541 | { |
542 | lck_spin_t *lck; |
543 | |
544 | if ((lck = (lck_spin_t *) kalloc(sizeof(lck_spin_t))) != 0) |
545 | lck_spin_init(lck, grp, attr); |
546 | |
547 | return (lck); |
548 | } |
549 | |
550 | /* |
551 | * Routine: lck_spin_free |
552 | */ |
553 | void |
554 | lck_spin_free( |
555 | lck_spin_t * lck, |
556 | lck_grp_t * grp) |
557 | { |
558 | lck_spin_destroy(lck, grp); |
559 | kfree((void *) lck, sizeof(lck_spin_t)); |
560 | } |
561 | |
562 | /* |
563 | * Routine: lck_spin_init |
564 | */ |
565 | void |
566 | lck_spin_init( |
567 | lck_spin_t * lck, |
568 | lck_grp_t * grp, |
569 | __unused lck_attr_t * attr) |
570 | { |
571 | hw_lock_init(&lck->hwlock); |
572 | lck->type = LCK_SPIN_TYPE; |
573 | lck_grp_reference(grp); |
574 | lck_grp_lckcnt_incr(grp, LCK_TYPE_SPIN); |
575 | store_memory_barrier(); |
576 | } |
577 | |
578 | /* |
579 | * arm_usimple_lock is a lck_spin_t without a group or attributes |
580 | */ |
581 | void inline |
582 | arm_usimple_lock_init(simple_lock_t lck, __unused unsigned short initial_value) |
583 | { |
584 | lck->type = LCK_SPIN_TYPE; |
585 | hw_lock_init(&lck->hwlock); |
586 | store_memory_barrier(); |
587 | } |
588 | |
589 | |
590 | /* |
591 | * Routine: lck_spin_lock |
592 | */ |
593 | void |
594 | lck_spin_lock(lck_spin_t *lock) |
595 | { |
596 | #if DEVELOPMENT || DEBUG |
597 | if (lock->type != LCK_SPIN_TYPE) |
598 | panic("Invalid spinlock %p" , lock); |
599 | #endif // DEVELOPMENT || DEBUG |
600 | hw_lock_lock(&lock->hwlock); |
601 | } |
602 | |
603 | /* |
604 | * Routine: lck_spin_lock_nopreempt |
605 | */ |
606 | void |
607 | lck_spin_lock_nopreempt(lck_spin_t *lock) |
608 | { |
609 | #if DEVELOPMENT || DEBUG |
610 | if (lock->type != LCK_SPIN_TYPE) |
611 | panic("Invalid spinlock %p" , lock); |
612 | #endif // DEVELOPMENT || DEBUG |
613 | hw_lock_lock_nopreempt(&lock->hwlock); |
614 | } |
615 | |
616 | /* |
617 | * Routine: lck_spin_try_lock |
618 | */ |
619 | int |
620 | lck_spin_try_lock(lck_spin_t *lock) |
621 | { |
622 | return hw_lock_try(&lock->hwlock); |
623 | } |
624 | |
625 | /* |
626 | * Routine: lck_spin_try_lock_nopreempt |
627 | */ |
628 | int |
629 | lck_spin_try_lock_nopreempt(lck_spin_t *lock) |
630 | { |
631 | return hw_lock_try_nopreempt(&lock->hwlock); |
632 | } |
633 | |
634 | /* |
635 | * Routine: lck_spin_unlock |
636 | */ |
637 | void |
638 | lck_spin_unlock(lck_spin_t *lock) |
639 | { |
640 | #if DEVELOPMENT || DEBUG |
641 | if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) |
642 | panic("Spinlock not owned by thread %p = %lx" , lock, lock->lck_spin_data); |
643 | if (lock->type != LCK_SPIN_TYPE) |
644 | panic("Invalid spinlock type %p" , lock); |
645 | #endif // DEVELOPMENT || DEBUG |
646 | hw_lock_unlock(&lock->hwlock); |
647 | } |
648 | |
649 | /* |
650 | * Routine: lck_spin_unlock_nopreempt |
651 | */ |
652 | void |
653 | lck_spin_unlock_nopreempt(lck_spin_t *lock) |
654 | { |
655 | #if DEVELOPMENT || DEBUG |
656 | if ((LCK_MTX_STATE_TO_THREAD(lock->lck_spin_data) != current_thread()) && LOCK_CORRECTNESS_PANIC()) |
657 | panic("Spinlock not owned by thread %p = %lx" , lock, lock->lck_spin_data); |
658 | if (lock->type != LCK_SPIN_TYPE) |
659 | panic("Invalid spinlock type %p" , lock); |
660 | #endif // DEVELOPMENT || DEBUG |
661 | hw_lock_unlock_nopreempt(&lock->hwlock); |
662 | } |
663 | |
664 | /* |
665 | * Routine: lck_spin_destroy |
666 | */ |
667 | void |
668 | lck_spin_destroy( |
669 | lck_spin_t * lck, |
670 | lck_grp_t * grp) |
671 | { |
672 | if (lck->lck_spin_data == LCK_SPIN_TAG_DESTROYED) |
673 | return; |
674 | lck->lck_spin_data = LCK_SPIN_TAG_DESTROYED; |
675 | lck_grp_lckcnt_decr(grp, LCK_TYPE_SPIN); |
676 | lck_grp_deallocate(grp); |
677 | } |
678 | |
679 | /* |
680 | * Routine: kdp_lck_spin_is_acquired |
681 | * NOT SAFE: To be used only by kernel debugger to avoid deadlock. |
682 | */ |
683 | boolean_t |
684 | kdp_lck_spin_is_acquired(lck_spin_t *lck) { |
685 | if (not_in_kdp) { |
686 | panic("panic: spinlock acquired check done outside of kernel debugger" ); |
687 | } |
688 | return ((lck->lck_spin_data & ~LCK_SPIN_TAG_DESTROYED) != 0) ? TRUE:FALSE; |
689 | } |
690 | |
691 | /* |
692 | * Initialize a usimple_lock. |
693 | * |
694 | * No change in preemption state. |
695 | */ |
696 | void |
697 | usimple_lock_init( |
698 | usimple_lock_t l, |
699 | unsigned short tag) |
700 | { |
701 | #ifndef MACHINE_SIMPLE_LOCK |
702 | USLDBG(usld_lock_init(l, tag)); |
703 | hw_lock_init(&l->lck_spin_data); |
704 | #else |
705 | simple_lock_init((simple_lock_t) l, tag); |
706 | #endif |
707 | } |
708 | |
709 | |
710 | /* |
711 | * Acquire a usimple_lock. |
712 | * |
713 | * Returns with preemption disabled. Note |
714 | * that the hw_lock routines are responsible for |
715 | * maintaining preemption state. |
716 | */ |
717 | void |
718 | usimple_lock( |
719 | usimple_lock_t l) |
720 | { |
721 | #ifndef MACHINE_SIMPLE_LOCK |
722 | pc_t pc; |
723 | |
724 | OBTAIN_PC(pc, l); |
725 | USLDBG(usld_lock_pre(l, pc)); |
726 | |
727 | if (!hw_lock_to(&l->lck_spin_data, LockTimeOut)) /* Try to get the lock |
728 | * with a timeout */ |
729 | panic("simple lock deadlock detection - l=%p, cpu=%d, ret=%p" , &l, cpu_number(), pc); |
730 | |
731 | USLDBG(usld_lock_post(l, pc)); |
732 | #else |
733 | simple_lock((simple_lock_t) l); |
734 | #endif |
735 | } |
736 | |
737 | |
738 | extern void sync(void); |
739 | |
740 | /* |
741 | * Release a usimple_lock. |
742 | * |
743 | * Returns with preemption enabled. Note |
744 | * that the hw_lock routines are responsible for |
745 | * maintaining preemption state. |
746 | */ |
747 | void |
748 | usimple_unlock( |
749 | usimple_lock_t l) |
750 | { |
751 | #ifndef MACHINE_SIMPLE_LOCK |
752 | pc_t pc; |
753 | |
754 | OBTAIN_PC(pc, l); |
755 | USLDBG(usld_unlock(l, pc)); |
756 | sync(); |
757 | hw_lock_unlock(&l->lck_spin_data); |
758 | #else |
759 | simple_unlock((simple_lock_t) l); |
760 | #endif |
761 | } |
762 | |
763 | |
764 | /* |
765 | * Conditionally acquire a usimple_lock. |
766 | * |
767 | * On success, returns with preemption disabled. |
768 | * On failure, returns with preemption in the same state |
769 | * as when first invoked. Note that the hw_lock routines |
770 | * are responsible for maintaining preemption state. |
771 | * |
772 | * XXX No stats are gathered on a miss; I preserved this |
773 | * behavior from the original assembly-language code, but |
774 | * doesn't it make sense to log misses? XXX |
775 | */ |
776 | unsigned int |
777 | usimple_lock_try( |
778 | usimple_lock_t l) |
779 | { |
780 | #ifndef MACHINE_SIMPLE_LOCK |
781 | pc_t pc; |
782 | unsigned int success; |
783 | |
784 | OBTAIN_PC(pc, l); |
785 | USLDBG(usld_lock_try_pre(l, pc)); |
786 | if ((success = hw_lock_try(&l->lck_spin_data))) { |
787 | USLDBG(usld_lock_try_post(l, pc)); |
788 | } |
789 | return success; |
790 | #else |
791 | return (simple_lock_try((simple_lock_t) l)); |
792 | #endif |
793 | } |
794 | |
795 | #if USLOCK_DEBUG |
796 | /* |
797 | * States of a usimple_lock. The default when initializing |
798 | * a usimple_lock is setting it up for debug checking. |
799 | */ |
800 | #define USLOCK_CHECKED 0x0001 /* lock is being checked */ |
801 | #define USLOCK_TAKEN 0x0002 /* lock has been taken */ |
802 | #define USLOCK_INIT 0xBAA0 /* lock has been initialized */ |
803 | #define USLOCK_INITIALIZED (USLOCK_INIT|USLOCK_CHECKED) |
804 | #define USLOCK_CHECKING(l) (uslock_check && \ |
805 | ((l)->debug.state & USLOCK_CHECKED)) |
806 | |
807 | /* |
808 | * Trace activities of a particularly interesting lock. |
809 | */ |
810 | void usl_trace(usimple_lock_t, int, pc_t, const char *); |
811 | |
812 | |
813 | /* |
814 | * Initialize the debugging information contained |
815 | * in a usimple_lock. |
816 | */ |
817 | void |
818 | usld_lock_init( |
819 | usimple_lock_t l, |
820 | __unused unsigned short tag) |
821 | { |
822 | if (l == USIMPLE_LOCK_NULL) |
823 | panic("lock initialization: null lock pointer" ); |
824 | l->lock_type = USLOCK_TAG; |
825 | l->debug.state = uslock_check ? USLOCK_INITIALIZED : 0; |
826 | l->debug.lock_cpu = l->debug.unlock_cpu = 0; |
827 | l->debug.lock_pc = l->debug.unlock_pc = INVALID_PC; |
828 | l->debug.lock_thread = l->debug.unlock_thread = INVALID_THREAD; |
829 | l->debug.duration[0] = l->debug.duration[1] = 0; |
830 | l->debug.unlock_cpu = l->debug.unlock_cpu = 0; |
831 | l->debug.unlock_pc = l->debug.unlock_pc = INVALID_PC; |
832 | l->debug.unlock_thread = l->debug.unlock_thread = INVALID_THREAD; |
833 | } |
834 | |
835 | |
836 | /* |
837 | * These checks apply to all usimple_locks, not just |
838 | * those with USLOCK_CHECKED turned on. |
839 | */ |
840 | int |
841 | usld_lock_common_checks( |
842 | usimple_lock_t l, |
843 | const char *caller) |
844 | { |
845 | if (l == USIMPLE_LOCK_NULL) |
846 | panic("%s: null lock pointer" , caller); |
847 | if (l->lock_type != USLOCK_TAG) |
848 | panic("%s: 0x%x is not a usimple lock" , caller, (integer_t) l); |
849 | if (!(l->debug.state & USLOCK_INIT)) |
850 | panic("%s: 0x%x is not an initialized lock" , |
851 | caller, (integer_t) l); |
852 | return USLOCK_CHECKING(l); |
853 | } |
854 | |
855 | |
856 | /* |
857 | * Debug checks on a usimple_lock just before attempting |
858 | * to acquire it. |
859 | */ |
860 | /* ARGSUSED */ |
861 | void |
862 | usld_lock_pre( |
863 | usimple_lock_t l, |
864 | pc_t pc) |
865 | { |
866 | const char *caller = "usimple_lock" ; |
867 | |
868 | |
869 | if (!usld_lock_common_checks(l, caller)) |
870 | return; |
871 | |
872 | /* |
873 | * Note that we have a weird case where we are getting a lock when we are] |
874 | * in the process of putting the system to sleep. We are running with no |
875 | * current threads, therefore we can't tell if we are trying to retake a lock |
876 | * we have or someone on the other processor has it. Therefore we just |
877 | * ignore this test if the locking thread is 0. |
878 | */ |
879 | |
880 | if ((l->debug.state & USLOCK_TAKEN) && l->debug.lock_thread && |
881 | l->debug.lock_thread == (void *) current_thread()) { |
882 | printf("%s: lock 0x%x already locked (at %p) by" , |
883 | caller, (integer_t) l, l->debug.lock_pc); |
884 | printf(" current thread %p (new attempt at pc %p)\n" , |
885 | l->debug.lock_thread, pc); |
886 | panic("%s" , caller); |
887 | } |
888 | mp_disable_preemption(); |
889 | usl_trace(l, cpu_number(), pc, caller); |
890 | mp_enable_preemption(); |
891 | } |
892 | |
893 | |
894 | /* |
895 | * Debug checks on a usimple_lock just after acquiring it. |
896 | * |
897 | * Pre-emption has been disabled at this point, |
898 | * so we are safe in using cpu_number. |
899 | */ |
900 | void |
901 | usld_lock_post( |
902 | usimple_lock_t l, |
903 | pc_t pc) |
904 | { |
905 | int mycpu; |
906 | const char *caller = "successful usimple_lock" ; |
907 | |
908 | |
909 | if (!usld_lock_common_checks(l, caller)) |
910 | return; |
911 | |
912 | if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) |
913 | panic("%s: lock 0x%x became uninitialized" , |
914 | caller, (integer_t) l); |
915 | if ((l->debug.state & USLOCK_TAKEN)) |
916 | panic("%s: lock 0x%x became TAKEN by someone else" , |
917 | caller, (integer_t) l); |
918 | |
919 | mycpu = cpu_number(); |
920 | l->debug.lock_thread = (void *) current_thread(); |
921 | l->debug.state |= USLOCK_TAKEN; |
922 | l->debug.lock_pc = pc; |
923 | l->debug.lock_cpu = mycpu; |
924 | |
925 | usl_trace(l, mycpu, pc, caller); |
926 | } |
927 | |
928 | |
929 | /* |
930 | * Debug checks on a usimple_lock just before |
931 | * releasing it. Note that the caller has not |
932 | * yet released the hardware lock. |
933 | * |
934 | * Preemption is still disabled, so there's |
935 | * no problem using cpu_number. |
936 | */ |
937 | void |
938 | usld_unlock( |
939 | usimple_lock_t l, |
940 | pc_t pc) |
941 | { |
942 | int mycpu; |
943 | const char *caller = "usimple_unlock" ; |
944 | |
945 | |
946 | if (!usld_lock_common_checks(l, caller)) |
947 | return; |
948 | |
949 | mycpu = cpu_number(); |
950 | |
951 | if (!(l->debug.state & USLOCK_TAKEN)) |
952 | panic("%s: lock 0x%x hasn't been taken" , |
953 | caller, (integer_t) l); |
954 | if (l->debug.lock_thread != (void *) current_thread()) |
955 | panic("%s: unlocking lock 0x%x, owned by thread %p" , |
956 | caller, (integer_t) l, l->debug.lock_thread); |
957 | if (l->debug.lock_cpu != mycpu) { |
958 | printf("%s: unlocking lock 0x%x on cpu 0x%x" , |
959 | caller, (integer_t) l, mycpu); |
960 | printf(" (acquired on cpu 0x%x)\n" , l->debug.lock_cpu); |
961 | panic("%s" , caller); |
962 | } |
963 | usl_trace(l, mycpu, pc, caller); |
964 | |
965 | l->debug.unlock_thread = l->debug.lock_thread; |
966 | l->debug.lock_thread = INVALID_PC; |
967 | l->debug.state &= ~USLOCK_TAKEN; |
968 | l->debug.unlock_pc = pc; |
969 | l->debug.unlock_cpu = mycpu; |
970 | } |
971 | |
972 | |
973 | /* |
974 | * Debug checks on a usimple_lock just before |
975 | * attempting to acquire it. |
976 | * |
977 | * Preemption isn't guaranteed to be disabled. |
978 | */ |
979 | void |
980 | usld_lock_try_pre( |
981 | usimple_lock_t l, |
982 | pc_t pc) |
983 | { |
984 | const char *caller = "usimple_lock_try" ; |
985 | |
986 | if (!usld_lock_common_checks(l, caller)) |
987 | return; |
988 | mp_disable_preemption(); |
989 | usl_trace(l, cpu_number(), pc, caller); |
990 | mp_enable_preemption(); |
991 | } |
992 | |
993 | |
994 | /* |
995 | * Debug checks on a usimple_lock just after |
996 | * successfully attempting to acquire it. |
997 | * |
998 | * Preemption has been disabled by the |
999 | * lock acquisition attempt, so it's safe |
1000 | * to use cpu_number. |
1001 | */ |
1002 | void |
1003 | usld_lock_try_post( |
1004 | usimple_lock_t l, |
1005 | pc_t pc) |
1006 | { |
1007 | int mycpu; |
1008 | const char *caller = "successful usimple_lock_try" ; |
1009 | |
1010 | if (!usld_lock_common_checks(l, caller)) |
1011 | return; |
1012 | |
1013 | if (!((l->debug.state & ~USLOCK_TAKEN) == USLOCK_INITIALIZED)) |
1014 | panic("%s: lock 0x%x became uninitialized" , |
1015 | caller, (integer_t) l); |
1016 | if ((l->debug.state & USLOCK_TAKEN)) |
1017 | panic("%s: lock 0x%x became TAKEN by someone else" , |
1018 | caller, (integer_t) l); |
1019 | |
1020 | mycpu = cpu_number(); |
1021 | l->debug.lock_thread = (void *) current_thread(); |
1022 | l->debug.state |= USLOCK_TAKEN; |
1023 | l->debug.lock_pc = pc; |
1024 | l->debug.lock_cpu = mycpu; |
1025 | |
1026 | usl_trace(l, mycpu, pc, caller); |
1027 | } |
1028 | |
1029 | |
1030 | /* |
1031 | * For very special cases, set traced_lock to point to a |
1032 | * specific lock of interest. The result is a series of |
1033 | * XPRs showing lock operations on that lock. The lock_seq |
1034 | * value is used to show the order of those operations. |
1035 | */ |
1036 | usimple_lock_t traced_lock; |
1037 | unsigned int lock_seq; |
1038 | |
1039 | void |
1040 | usl_trace( |
1041 | usimple_lock_t l, |
1042 | int mycpu, |
1043 | pc_t pc, |
1044 | const char *op_name) |
1045 | { |
1046 | if (traced_lock == l) { |
1047 | XPR(XPR_SLOCK, |
1048 | "seq %d, cpu %d, %s @ %x\n" , |
1049 | (integer_t) lock_seq, (integer_t) mycpu, |
1050 | (integer_t) op_name, (integer_t) pc, 0); |
1051 | lock_seq++; |
1052 | } |
1053 | } |
1054 | |
1055 | |
1056 | #endif /* USLOCK_DEBUG */ |
1057 | |
1058 | /* |
1059 | * The C portion of the shared/exclusive locks package. |
1060 | */ |
1061 | |
1062 | /* |
1063 | * compute the deadline to spin against when |
1064 | * waiting for a change of state on a lck_rw_t |
1065 | */ |
1066 | #if __SMP__ |
1067 | static inline uint64_t |
1068 | lck_rw_deadline_for_spin(lck_rw_t *lck) |
1069 | { |
1070 | lck_rw_word_t word; |
1071 | |
1072 | word.data = ordered_load_rw(lck); |
1073 | if (word.can_sleep) { |
1074 | if (word.r_waiting || word.w_waiting || (word.shared_count > machine_info.max_cpus)) { |
1075 | /* |
1076 | * there are already threads waiting on this lock... this |
1077 | * implies that they have spun beyond their deadlines waiting for |
1078 | * the desired state to show up so we will not bother spinning at this time... |
1079 | * or |
1080 | * the current number of threads sharing this lock exceeds our capacity to run them |
1081 | * concurrently and since all states we're going to spin for require the rw_shared_count |
1082 | * to be at 0, we'll not bother spinning since the latency for this to happen is |
1083 | * unpredictable... |
1084 | */ |
1085 | return (mach_absolute_time()); |
1086 | } |
1087 | return (mach_absolute_time() + MutexSpin); |
1088 | } else |
1089 | return (mach_absolute_time() + (100000LL * 1000000000LL)); |
1090 | } |
1091 | #endif // __SMP__ |
1092 | |
1093 | static boolean_t |
1094 | lck_rw_drain_status(lck_rw_t *lock, uint32_t status_mask, boolean_t wait __unused) |
1095 | { |
1096 | #if __SMP__ |
1097 | uint64_t deadline = 0; |
1098 | uint32_t data; |
1099 | |
1100 | if (wait) |
1101 | deadline = lck_rw_deadline_for_spin(lock); |
1102 | |
1103 | for ( ; ; ) { |
1104 | data = load_exclusive32(&lock->lck_rw_data, memory_order_acquire_smp); |
1105 | if ((data & status_mask) == 0) |
1106 | break; |
1107 | if (wait) |
1108 | wait_for_event(); |
1109 | else |
1110 | clear_exclusive(); |
1111 | if (!wait || (mach_absolute_time() >= deadline)) |
1112 | return FALSE; |
1113 | } |
1114 | clear_exclusive(); |
1115 | return TRUE; |
1116 | #else |
1117 | uint32_t data; |
1118 | |
1119 | data = ordered_load_rw(lock); |
1120 | if ((data & status_mask) == 0) |
1121 | return TRUE; |
1122 | else |
1123 | return FALSE; |
1124 | #endif // __SMP__ |
1125 | } |
1126 | |
1127 | /* |
1128 | * Spin while interlock is held. |
1129 | */ |
1130 | static inline void |
1131 | lck_rw_interlock_spin(lck_rw_t *lock) |
1132 | { |
1133 | #if __SMP__ |
1134 | uint32_t data; |
1135 | |
1136 | for ( ; ; ) { |
1137 | data = load_exclusive32(&lock->lck_rw_data, memory_order_relaxed); |
1138 | if (data & LCK_RW_INTERLOCK) |
1139 | wait_for_event(); |
1140 | else { |
1141 | clear_exclusive(); |
1142 | return; |
1143 | } |
1144 | } |
1145 | #else |
1146 | panic("lck_rw_interlock_spin(): Interlock locked %p %x" , lock, lock->lck_rw_data); |
1147 | #endif |
1148 | } |
1149 | |
1150 | /* |
1151 | * We disable interrupts while holding the RW interlock to prevent an |
1152 | * interrupt from exacerbating hold time. |
1153 | * Hence, local helper functions lck_interlock_lock()/lck_interlock_unlock(). |
1154 | */ |
1155 | static inline boolean_t |
1156 | lck_interlock_lock(lck_rw_t *lck) |
1157 | { |
1158 | boolean_t istate; |
1159 | |
1160 | istate = ml_set_interrupts_enabled(FALSE); |
1161 | lck_rw_ilk_lock(lck); |
1162 | return istate; |
1163 | } |
1164 | |
1165 | static inline void |
1166 | lck_interlock_unlock(lck_rw_t *lck, boolean_t istate) |
1167 | { |
1168 | lck_rw_ilk_unlock(lck); |
1169 | ml_set_interrupts_enabled(istate); |
1170 | } |
1171 | |
1172 | |
1173 | #define LCK_RW_GRAB_WANT 0 |
1174 | #define LCK_RW_GRAB_SHARED 1 |
1175 | |
1176 | static boolean_t |
1177 | lck_rw_grab(lck_rw_t *lock, int mode, boolean_t wait) |
1178 | { |
1179 | uint64_t deadline = 0; |
1180 | uint32_t data, prev; |
1181 | boolean_t do_exch; |
1182 | |
1183 | #if __SMP__ |
1184 | if (wait) |
1185 | deadline = lck_rw_deadline_for_spin(lock); |
1186 | #else |
1187 | wait = FALSE; // Don't spin on UP systems |
1188 | #endif |
1189 | |
1190 | for ( ; ; ) { |
1191 | data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp); |
1192 | if (data & LCK_RW_INTERLOCK) { |
1193 | atomic_exchange_abort(); |
1194 | lck_rw_interlock_spin(lock); |
1195 | continue; |
1196 | } |
1197 | do_exch = FALSE; |
1198 | if (mode == LCK_RW_GRAB_WANT) { |
1199 | if ((data & LCK_RW_WANT_EXCL) == 0) { |
1200 | data |= LCK_RW_WANT_EXCL; |
1201 | do_exch = TRUE; |
1202 | } |
1203 | } else { // LCK_RW_GRAB_SHARED |
1204 | if (((data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) == 0) || |
1205 | (((data & LCK_RW_SHARED_MASK)) && ((data & LCK_RW_PRIV_EXCL) == 0))) { |
1206 | data += LCK_RW_SHARED_READER; |
1207 | do_exch = TRUE; |
1208 | } |
1209 | } |
1210 | if (do_exch) { |
1211 | if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) |
1212 | return TRUE; |
1213 | } else { |
1214 | if (wait) // Non-waiting |
1215 | wait_for_event(); |
1216 | else |
1217 | atomic_exchange_abort(); |
1218 | if (!wait || (mach_absolute_time() >= deadline)) |
1219 | return FALSE; |
1220 | } |
1221 | } |
1222 | } |
1223 | |
1224 | |
1225 | /* |
1226 | * Routine: lck_rw_alloc_init |
1227 | */ |
1228 | lck_rw_t * |
1229 | lck_rw_alloc_init( |
1230 | lck_grp_t *grp, |
1231 | lck_attr_t *attr) |
1232 | { |
1233 | lck_rw_t *lck; |
1234 | |
1235 | if ((lck = (lck_rw_t *)kalloc(sizeof(lck_rw_t))) != 0) |
1236 | lck_rw_init(lck, grp, attr); |
1237 | |
1238 | return lck; |
1239 | } |
1240 | |
1241 | /* |
1242 | * Routine: lck_rw_free |
1243 | */ |
1244 | void |
1245 | lck_rw_free( |
1246 | lck_rw_t *lck, |
1247 | lck_grp_t *grp) |
1248 | { |
1249 | lck_rw_destroy(lck, grp); |
1250 | kfree(lck, sizeof(lck_rw_t)); |
1251 | } |
1252 | |
1253 | /* |
1254 | * Routine: lck_rw_init |
1255 | */ |
1256 | void |
1257 | lck_rw_init( |
1258 | lck_rw_t *lck, |
1259 | lck_grp_t *grp, |
1260 | lck_attr_t *attr) |
1261 | { |
1262 | if (attr == LCK_ATTR_NULL) |
1263 | attr = &LockDefaultLckAttr; |
1264 | memset(lck, 0, sizeof(lck_rw_t)); |
1265 | lck->lck_rw_can_sleep = TRUE; |
1266 | if ((attr->lck_attr_val & LCK_ATTR_RW_SHARED_PRIORITY) == 0) |
1267 | lck->lck_rw_priv_excl = TRUE; |
1268 | |
1269 | lck_grp_reference(grp); |
1270 | lck_grp_lckcnt_incr(grp, LCK_TYPE_RW); |
1271 | } |
1272 | |
1273 | |
1274 | /* |
1275 | * Routine: lck_rw_destroy |
1276 | */ |
1277 | void |
1278 | lck_rw_destroy( |
1279 | lck_rw_t *lck, |
1280 | lck_grp_t *grp) |
1281 | { |
1282 | if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) |
1283 | return; |
1284 | #if MACH_LDEBUG |
1285 | lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD); |
1286 | #endif |
1287 | lck->lck_rw_tag = LCK_RW_TAG_DESTROYED; |
1288 | lck_grp_lckcnt_decr(grp, LCK_TYPE_RW); |
1289 | lck_grp_deallocate(grp); |
1290 | return; |
1291 | } |
1292 | |
1293 | /* |
1294 | * Routine: lck_rw_lock |
1295 | */ |
1296 | void |
1297 | lck_rw_lock( |
1298 | lck_rw_t *lck, |
1299 | lck_rw_type_t lck_rw_type) |
1300 | { |
1301 | if (lck_rw_type == LCK_RW_TYPE_SHARED) |
1302 | lck_rw_lock_shared(lck); |
1303 | else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) |
1304 | lck_rw_lock_exclusive(lck); |
1305 | else |
1306 | panic("lck_rw_lock(): Invalid RW lock type: %x" , lck_rw_type); |
1307 | } |
1308 | |
1309 | /* |
1310 | * Routine: lck_rw_lock_exclusive |
1311 | */ |
1312 | void |
1313 | lck_rw_lock_exclusive(lck_rw_t *lock) |
1314 | { |
1315 | thread_t thread = current_thread(); |
1316 | |
1317 | thread->rwlock_count++; |
1318 | if (atomic_test_and_set32(&lock->lck_rw_data, |
1319 | (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK), |
1320 | LCK_RW_WANT_EXCL, memory_order_acquire_smp, FALSE)) { |
1321 | #if CONFIG_DTRACE |
1322 | LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL); |
1323 | #endif /* CONFIG_DTRACE */ |
1324 | } else |
1325 | lck_rw_lock_exclusive_gen(lock); |
1326 | #if MACH_ASSERT |
1327 | thread_t owner = ordered_load_rw_owner(lock); |
1328 | assertf(owner == THREAD_NULL, "state=0x%x, owner=%p" , ordered_load_rw(lock), owner); |
1329 | #endif |
1330 | ordered_store_rw_owner(lock, thread); |
1331 | } |
1332 | |
1333 | /* |
1334 | * Routine: lck_rw_lock_shared |
1335 | */ |
1336 | void |
1337 | lck_rw_lock_shared(lck_rw_t *lock) |
1338 | { |
1339 | uint32_t data, prev; |
1340 | |
1341 | current_thread()->rwlock_count++; |
1342 | for ( ; ; ) { |
1343 | data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp); |
1344 | if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE | LCK_RW_INTERLOCK)) { |
1345 | atomic_exchange_abort(); |
1346 | lck_rw_lock_shared_gen(lock); |
1347 | break; |
1348 | } |
1349 | data += LCK_RW_SHARED_READER; |
1350 | if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) |
1351 | break; |
1352 | cpu_pause(); |
1353 | } |
1354 | #if MACH_ASSERT |
1355 | thread_t owner = ordered_load_rw_owner(lock); |
1356 | assertf(owner == THREAD_NULL, "state=0x%x, owner=%p" , ordered_load_rw(lock), owner); |
1357 | #endif |
1358 | #if CONFIG_DTRACE |
1359 | LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED); |
1360 | #endif /* CONFIG_DTRACE */ |
1361 | return; |
1362 | } |
1363 | |
1364 | /* |
1365 | * Routine: lck_rw_lock_shared_to_exclusive |
1366 | */ |
1367 | boolean_t |
1368 | lck_rw_lock_shared_to_exclusive(lck_rw_t *lock) |
1369 | { |
1370 | uint32_t data, prev; |
1371 | |
1372 | for ( ; ; ) { |
1373 | data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp); |
1374 | if (data & LCK_RW_INTERLOCK) { |
1375 | atomic_exchange_abort(); |
1376 | lck_rw_interlock_spin(lock); |
1377 | continue; |
1378 | } |
1379 | if (data & LCK_RW_WANT_UPGRADE) { |
1380 | data -= LCK_RW_SHARED_READER; |
1381 | if ((data & LCK_RW_SHARED_MASK) == 0) /* we were the last reader */ |
1382 | data &= ~(LCK_RW_W_WAITING); /* so clear the wait indicator */ |
1383 | if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) |
1384 | return lck_rw_lock_shared_to_exclusive_failure(lock, prev); |
1385 | } else { |
1386 | data |= LCK_RW_WANT_UPGRADE; /* ask for WANT_UPGRADE */ |
1387 | data -= LCK_RW_SHARED_READER; /* and shed our read count */ |
1388 | if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) |
1389 | break; |
1390 | } |
1391 | cpu_pause(); |
1392 | } |
1393 | /* we now own the WANT_UPGRADE */ |
1394 | if (data & LCK_RW_SHARED_MASK) /* check to see if all of the readers are drained */ |
1395 | lck_rw_lock_shared_to_exclusive_success(lock); /* if not, we need to go wait */ |
1396 | #if MACH_ASSERT |
1397 | thread_t owner = ordered_load_rw_owner(lock); |
1398 | assertf(owner == THREAD_NULL, "state=0x%x, owner=%p" , ordered_load_rw(lock), owner); |
1399 | #endif |
1400 | ordered_store_rw_owner(lock, current_thread()); |
1401 | #if CONFIG_DTRACE |
1402 | LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 0); |
1403 | #endif /* CONFIG_DTRACE */ |
1404 | return TRUE; |
1405 | } |
1406 | |
1407 | |
1408 | /* |
1409 | * Routine: lck_rw_lock_shared_to_exclusive_failure |
1410 | * Function: |
1411 | * Fast path code has already dropped our read |
1412 | * count and determined that someone else owns 'lck_rw_want_upgrade' |
1413 | * if 'lck_rw_shared_count' == 0, its also already dropped 'lck_w_waiting' |
1414 | * all we need to do here is determine if a wakeup is needed |
1415 | */ |
1416 | static boolean_t |
1417 | lck_rw_lock_shared_to_exclusive_failure( |
1418 | lck_rw_t *lck, |
1419 | uint32_t prior_lock_state) |
1420 | { |
1421 | thread_t thread = current_thread(); |
1422 | uint32_t rwlock_count; |
1423 | |
1424 | /* Check if dropping the lock means that we need to unpromote */ |
1425 | rwlock_count = thread->rwlock_count--; |
1426 | #if MACH_LDEBUG |
1427 | if (rwlock_count == 0) { |
1428 | panic("rw lock count underflow for thread %p" , thread); |
1429 | } |
1430 | #endif |
1431 | if ((prior_lock_state & LCK_RW_W_WAITING) && |
1432 | ((prior_lock_state & LCK_RW_SHARED_MASK) == LCK_RW_SHARED_READER)) { |
1433 | /* |
1434 | * Someone else has requested upgrade. |
1435 | * Since we've released the read lock, wake |
1436 | * him up if he's blocked waiting |
1437 | */ |
1438 | thread_wakeup(LCK_RW_WRITER_EVENT(lck)); |
1439 | } |
1440 | |
1441 | if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { |
1442 | /* sched_flags checked without lock, but will be rechecked while clearing */ |
1443 | lck_rw_clear_promotion(thread, unslide_for_kdebug(lck)); |
1444 | } |
1445 | |
1446 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_NONE, |
1447 | VM_KERNEL_UNSLIDE_OR_PERM(lck), lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0); |
1448 | |
1449 | return (FALSE); |
1450 | } |
1451 | |
1452 | /* |
1453 | * Routine: lck_rw_lock_shared_to_exclusive_success |
1454 | * Function: |
1455 | * assembly fast path code has already dropped our read |
1456 | * count and successfully acquired 'lck_rw_want_upgrade' |
1457 | * we just need to wait for the rest of the readers to drain |
1458 | * and then we can return as the exclusive holder of this lock |
1459 | */ |
1460 | static boolean_t |
1461 | lck_rw_lock_shared_to_exclusive_success( |
1462 | lck_rw_t *lock) |
1463 | { |
1464 | __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock); |
1465 | int slept = 0; |
1466 | lck_rw_word_t word; |
1467 | wait_result_t res; |
1468 | boolean_t istate; |
1469 | boolean_t not_shared; |
1470 | |
1471 | #if CONFIG_DTRACE |
1472 | uint64_t wait_interval = 0; |
1473 | int readers_at_sleep = 0; |
1474 | boolean_t dtrace_ls_initialized = FALSE; |
1475 | boolean_t dtrace_rwl_shared_to_excl_spin, dtrace_rwl_shared_to_excl_block, dtrace_ls_enabled = FALSE; |
1476 | #endif |
1477 | |
1478 | while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, FALSE)) { |
1479 | |
1480 | word.data = ordered_load_rw(lock); |
1481 | #if CONFIG_DTRACE |
1482 | if (dtrace_ls_initialized == FALSE) { |
1483 | dtrace_ls_initialized = TRUE; |
1484 | dtrace_rwl_shared_to_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] != 0); |
1485 | dtrace_rwl_shared_to_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK] != 0); |
1486 | dtrace_ls_enabled = dtrace_rwl_shared_to_excl_spin || dtrace_rwl_shared_to_excl_block; |
1487 | if (dtrace_ls_enabled) { |
1488 | /* |
1489 | * Either sleeping or spinning is happening, |
1490 | * start a timing of our delay interval now. |
1491 | */ |
1492 | readers_at_sleep = word.shared_count; |
1493 | wait_interval = mach_absolute_time(); |
1494 | } |
1495 | } |
1496 | #endif |
1497 | |
1498 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_START, |
1499 | trace_lck, word.shared_count, 0, 0, 0); |
1500 | |
1501 | not_shared = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK, TRUE); |
1502 | |
1503 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_SPIN_CODE) | DBG_FUNC_END, |
1504 | trace_lck, lock->lck_rw_shared_count, 0, 0, 0); |
1505 | |
1506 | if (not_shared) |
1507 | break; |
1508 | |
1509 | /* |
1510 | * if we get here, the spin deadline in lck_rw_wait_on_status() |
1511 | * has expired w/o the rw_shared_count having drained to 0 |
1512 | * check to see if we're allowed to do a thread_block |
1513 | */ |
1514 | if (word.can_sleep) { |
1515 | |
1516 | istate = lck_interlock_lock(lock); |
1517 | |
1518 | word.data = ordered_load_rw(lock); |
1519 | if (word.shared_count != 0) { |
1520 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_START, |
1521 | trace_lck, word.shared_count, 0, 0, 0); |
1522 | |
1523 | word.w_waiting = 1; |
1524 | ordered_store_rw(lock, word.data); |
1525 | |
1526 | thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockUpgrade); |
1527 | res = assert_wait(LCK_RW_WRITER_EVENT(lock), |
1528 | THREAD_UNINT | THREAD_WAIT_NOREPORT_USER); |
1529 | lck_interlock_unlock(lock, istate); |
1530 | |
1531 | if (res == THREAD_WAITING) { |
1532 | res = thread_block(THREAD_CONTINUE_NULL); |
1533 | slept++; |
1534 | } |
1535 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_WAIT_CODE) | DBG_FUNC_END, |
1536 | trace_lck, res, slept, 0, 0); |
1537 | } else { |
1538 | lck_interlock_unlock(lock, istate); |
1539 | break; |
1540 | } |
1541 | } |
1542 | } |
1543 | #if CONFIG_DTRACE |
1544 | /* |
1545 | * We infer whether we took the sleep/spin path above by checking readers_at_sleep. |
1546 | */ |
1547 | if (dtrace_ls_enabled == TRUE) { |
1548 | if (slept == 0) { |
1549 | LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lock, mach_absolute_time() - wait_interval, 0); |
1550 | } else { |
1551 | LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lock, |
1552 | mach_absolute_time() - wait_interval, 1, |
1553 | (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep); |
1554 | } |
1555 | } |
1556 | LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lock, 1); |
1557 | #endif |
1558 | return (TRUE); |
1559 | } |
1560 | |
1561 | |
1562 | /* |
1563 | * Routine: lck_rw_lock_exclusive_to_shared |
1564 | */ |
1565 | |
1566 | void lck_rw_lock_exclusive_to_shared(lck_rw_t *lock) |
1567 | { |
1568 | uint32_t data, prev; |
1569 | |
1570 | assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p" , lock->lck_rw_data, lock->lck_rw_owner); |
1571 | ordered_store_rw_owner(lock, THREAD_NULL); |
1572 | for ( ; ; ) { |
1573 | data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp); |
1574 | if (data & LCK_RW_INTERLOCK) { |
1575 | #if __SMP__ |
1576 | atomic_exchange_abort(); |
1577 | lck_rw_interlock_spin(lock); /* wait for interlock to clear */ |
1578 | continue; |
1579 | #else |
1580 | panic("lck_rw_lock_exclusive_to_shared(): Interlock locked (%p): %x" , lock, data); |
1581 | #endif // __SMP__ |
1582 | } |
1583 | data += LCK_RW_SHARED_READER; |
1584 | if (data & LCK_RW_WANT_UPGRADE) |
1585 | data &= ~(LCK_RW_WANT_UPGRADE); |
1586 | else |
1587 | data &= ~(LCK_RW_WANT_EXCL); |
1588 | if (!((prev & LCK_RW_W_WAITING) && (prev & LCK_RW_PRIV_EXCL))) |
1589 | data &= ~(LCK_RW_W_WAITING); |
1590 | if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) |
1591 | break; |
1592 | cpu_pause(); |
1593 | } |
1594 | return lck_rw_lock_exclusive_to_shared_gen(lock, prev); |
1595 | } |
1596 | |
1597 | /* |
1598 | * Routine: lck_rw_lock_exclusive_to_shared_gen |
1599 | * Function: |
1600 | * Fast path has already dropped |
1601 | * our exclusive state and bumped lck_rw_shared_count |
1602 | * all we need to do here is determine if anyone |
1603 | * needs to be awakened. |
1604 | */ |
1605 | static void |
1606 | lck_rw_lock_exclusive_to_shared_gen( |
1607 | lck_rw_t *lck, |
1608 | uint32_t prior_lock_state) |
1609 | { |
1610 | __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck); |
1611 | lck_rw_word_t fake_lck; |
1612 | |
1613 | /* |
1614 | * prior_lock state is a snapshot of the 1st word of the |
1615 | * lock in question... we'll fake up a pointer to it |
1616 | * and carefully not access anything beyond whats defined |
1617 | * in the first word of a lck_rw_t |
1618 | */ |
1619 | fake_lck.data = prior_lock_state; |
1620 | |
1621 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START, |
1622 | trace_lck, fake_lck->want_excl, fake_lck->want_upgrade, 0, 0); |
1623 | |
1624 | /* |
1625 | * don't wake up anyone waiting to take the lock exclusively |
1626 | * since we hold a read count... when the read count drops to 0, |
1627 | * the writers will be woken. |
1628 | * |
1629 | * wake up any waiting readers if we don't have any writers waiting, |
1630 | * or the lock is NOT marked as rw_priv_excl (writers have privilege) |
1631 | */ |
1632 | if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) |
1633 | thread_wakeup(LCK_RW_READER_EVENT(lck)); |
1634 | |
1635 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END, |
1636 | trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0); |
1637 | |
1638 | #if CONFIG_DTRACE |
1639 | LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0); |
1640 | #endif |
1641 | } |
1642 | |
1643 | |
1644 | /* |
1645 | * Routine: lck_rw_try_lock |
1646 | */ |
1647 | boolean_t |
1648 | lck_rw_try_lock( |
1649 | lck_rw_t *lck, |
1650 | lck_rw_type_t lck_rw_type) |
1651 | { |
1652 | if (lck_rw_type == LCK_RW_TYPE_SHARED) |
1653 | return lck_rw_try_lock_shared(lck); |
1654 | else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) |
1655 | return lck_rw_try_lock_exclusive(lck); |
1656 | else |
1657 | panic("lck_rw_try_lock(): Invalid rw lock type: %x" , lck_rw_type); |
1658 | return FALSE; |
1659 | } |
1660 | |
1661 | /* |
1662 | * Routine: lck_rw_try_lock_shared |
1663 | */ |
1664 | |
1665 | boolean_t lck_rw_try_lock_shared(lck_rw_t *lock) |
1666 | { |
1667 | uint32_t data, prev; |
1668 | |
1669 | for ( ; ; ) { |
1670 | data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp); |
1671 | if (data & LCK_RW_INTERLOCK) { |
1672 | #if __SMP__ |
1673 | atomic_exchange_abort(); |
1674 | lck_rw_interlock_spin(lock); |
1675 | continue; |
1676 | #else |
1677 | panic("lck_rw_try_lock_shared(): Interlock locked (%p): %x" , lock, data); |
1678 | #endif |
1679 | } |
1680 | if (data & (LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) { |
1681 | atomic_exchange_abort(); |
1682 | return FALSE; /* lock is busy */ |
1683 | } |
1684 | data += LCK_RW_SHARED_READER; /* Increment reader refcount */ |
1685 | if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) |
1686 | break; |
1687 | cpu_pause(); |
1688 | } |
1689 | #if MACH_ASSERT |
1690 | thread_t owner = ordered_load_rw_owner(lock); |
1691 | assertf(owner == THREAD_NULL, "state=0x%x, owner=%p" , ordered_load_rw(lock), owner); |
1692 | #endif |
1693 | current_thread()->rwlock_count++; |
1694 | #if CONFIG_DTRACE |
1695 | LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lock, DTRACE_RW_SHARED); |
1696 | #endif /* CONFIG_DTRACE */ |
1697 | return TRUE; |
1698 | } |
1699 | |
1700 | |
1701 | /* |
1702 | * Routine: lck_rw_try_lock_exclusive |
1703 | */ |
1704 | |
1705 | boolean_t lck_rw_try_lock_exclusive(lck_rw_t *lock) |
1706 | { |
1707 | uint32_t data, prev; |
1708 | thread_t thread; |
1709 | |
1710 | for ( ; ; ) { |
1711 | data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_acquire_smp); |
1712 | if (data & LCK_RW_INTERLOCK) { |
1713 | #if __SMP__ |
1714 | atomic_exchange_abort(); |
1715 | lck_rw_interlock_spin(lock); |
1716 | continue; |
1717 | #else |
1718 | panic("lck_rw_try_lock_exclusive(): Interlock locked (%p): %x" , lock, data); |
1719 | #endif |
1720 | } |
1721 | if (data & (LCK_RW_SHARED_MASK | LCK_RW_WANT_EXCL | LCK_RW_WANT_UPGRADE)) { |
1722 | atomic_exchange_abort(); |
1723 | return FALSE; |
1724 | } |
1725 | data |= LCK_RW_WANT_EXCL; |
1726 | if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_acquire_smp)) |
1727 | break; |
1728 | cpu_pause(); |
1729 | } |
1730 | thread = current_thread(); |
1731 | thread->rwlock_count++; |
1732 | #if MACH_ASSERT |
1733 | thread_t owner = ordered_load_rw_owner(lock); |
1734 | assertf(owner == THREAD_NULL, "state=0x%x, owner=%p" , ordered_load_rw(lock), owner); |
1735 | #endif |
1736 | ordered_store_rw_owner(lock, thread); |
1737 | #if CONFIG_DTRACE |
1738 | LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lock, DTRACE_RW_EXCL); |
1739 | #endif /* CONFIG_DTRACE */ |
1740 | return TRUE; |
1741 | } |
1742 | |
1743 | |
1744 | /* |
1745 | * Routine: lck_rw_unlock |
1746 | */ |
1747 | void |
1748 | lck_rw_unlock( |
1749 | lck_rw_t *lck, |
1750 | lck_rw_type_t lck_rw_type) |
1751 | { |
1752 | if (lck_rw_type == LCK_RW_TYPE_SHARED) |
1753 | lck_rw_unlock_shared(lck); |
1754 | else if (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE) |
1755 | lck_rw_unlock_exclusive(lck); |
1756 | else |
1757 | panic("lck_rw_unlock(): Invalid RW lock type: %d" , lck_rw_type); |
1758 | } |
1759 | |
1760 | |
1761 | /* |
1762 | * Routine: lck_rw_unlock_shared |
1763 | */ |
1764 | void |
1765 | lck_rw_unlock_shared( |
1766 | lck_rw_t *lck) |
1767 | { |
1768 | lck_rw_type_t ret; |
1769 | |
1770 | assertf(lck->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p" , lck->lck_rw_data, lck->lck_rw_owner); |
1771 | assertf(lck->lck_rw_shared_count > 0, "shared_count=0x%x" , lck->lck_rw_shared_count); |
1772 | ret = lck_rw_done(lck); |
1773 | |
1774 | if (ret != LCK_RW_TYPE_SHARED) |
1775 | panic("lck_rw_unlock_shared(): lock %p held in mode: %d" , lck, ret); |
1776 | } |
1777 | |
1778 | |
1779 | /* |
1780 | * Routine: lck_rw_unlock_exclusive |
1781 | */ |
1782 | void |
1783 | lck_rw_unlock_exclusive( |
1784 | lck_rw_t *lck) |
1785 | { |
1786 | lck_rw_type_t ret; |
1787 | |
1788 | assertf(lck->lck_rw_owner == current_thread(), "state=0x%x, owner=%p" , lck->lck_rw_data, lck->lck_rw_owner); |
1789 | ret = lck_rw_done(lck); |
1790 | |
1791 | if (ret != LCK_RW_TYPE_EXCLUSIVE) |
1792 | panic("lck_rw_unlock_exclusive(): lock %p held in mode: %d" , lck, ret); |
1793 | } |
1794 | |
1795 | |
1796 | /* |
1797 | * Routine: lck_rw_lock_exclusive_gen |
1798 | */ |
1799 | static void |
1800 | lck_rw_lock_exclusive_gen( |
1801 | lck_rw_t *lock) |
1802 | { |
1803 | __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lock); |
1804 | lck_rw_word_t word; |
1805 | int slept = 0; |
1806 | boolean_t gotlock = 0; |
1807 | boolean_t not_shared_or_upgrade = 0; |
1808 | wait_result_t res = 0; |
1809 | boolean_t istate; |
1810 | |
1811 | #if CONFIG_DTRACE |
1812 | boolean_t dtrace_ls_initialized = FALSE; |
1813 | boolean_t dtrace_rwl_excl_spin, dtrace_rwl_excl_block, dtrace_ls_enabled= FALSE; |
1814 | uint64_t wait_interval = 0; |
1815 | int readers_at_sleep = 0; |
1816 | #endif |
1817 | |
1818 | /* |
1819 | * Try to acquire the lck_rw_want_excl bit. |
1820 | */ |
1821 | while (!lck_rw_grab(lock, LCK_RW_GRAB_WANT, FALSE)) { |
1822 | |
1823 | #if CONFIG_DTRACE |
1824 | if (dtrace_ls_initialized == FALSE) { |
1825 | dtrace_ls_initialized = TRUE; |
1826 | dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0); |
1827 | dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0); |
1828 | dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block; |
1829 | if (dtrace_ls_enabled) { |
1830 | /* |
1831 | * Either sleeping or spinning is happening, |
1832 | * start a timing of our delay interval now. |
1833 | */ |
1834 | readers_at_sleep = lock->lck_rw_shared_count; |
1835 | wait_interval = mach_absolute_time(); |
1836 | } |
1837 | } |
1838 | #endif |
1839 | |
1840 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0); |
1841 | |
1842 | gotlock = lck_rw_grab(lock, LCK_RW_GRAB_WANT, TRUE); |
1843 | |
1844 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, gotlock, 0); |
1845 | |
1846 | if (gotlock) |
1847 | break; |
1848 | /* |
1849 | * if we get here, the deadline has expired w/o us |
1850 | * being able to grab the lock exclusively |
1851 | * check to see if we're allowed to do a thread_block |
1852 | */ |
1853 | word.data = ordered_load_rw(lock); |
1854 | if (word.can_sleep) { |
1855 | |
1856 | istate = lck_interlock_lock(lock); |
1857 | word.data = ordered_load_rw(lock); |
1858 | |
1859 | if (word.want_excl) { |
1860 | |
1861 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0); |
1862 | |
1863 | word.w_waiting = 1; |
1864 | ordered_store_rw(lock, word.data); |
1865 | |
1866 | thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite); |
1867 | res = assert_wait(LCK_RW_WRITER_EVENT(lock), |
1868 | THREAD_UNINT | THREAD_WAIT_NOREPORT_USER); |
1869 | lck_interlock_unlock(lock, istate); |
1870 | |
1871 | if (res == THREAD_WAITING) { |
1872 | res = thread_block(THREAD_CONTINUE_NULL); |
1873 | slept++; |
1874 | } |
1875 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_WRITER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0); |
1876 | } else { |
1877 | word.want_excl = 1; |
1878 | ordered_store_rw(lock, word.data); |
1879 | lck_interlock_unlock(lock, istate); |
1880 | break; |
1881 | } |
1882 | } |
1883 | } |
1884 | /* |
1885 | * Wait for readers (and upgrades) to finish... |
1886 | */ |
1887 | while (!lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, FALSE)) { |
1888 | |
1889 | #if CONFIG_DTRACE |
1890 | /* |
1891 | * Either sleeping or spinning is happening, start |
1892 | * a timing of our delay interval now. If we set it |
1893 | * to -1 we don't have accurate data so we cannot later |
1894 | * decide to record a dtrace spin or sleep event. |
1895 | */ |
1896 | if (dtrace_ls_initialized == FALSE) { |
1897 | dtrace_ls_initialized = TRUE; |
1898 | dtrace_rwl_excl_spin = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] != 0); |
1899 | dtrace_rwl_excl_block = (lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK] != 0); |
1900 | dtrace_ls_enabled = dtrace_rwl_excl_spin || dtrace_rwl_excl_block; |
1901 | if (dtrace_ls_enabled) { |
1902 | /* |
1903 | * Either sleeping or spinning is happening, |
1904 | * start a timing of our delay interval now. |
1905 | */ |
1906 | readers_at_sleep = lock->lck_rw_shared_count; |
1907 | wait_interval = mach_absolute_time(); |
1908 | } |
1909 | } |
1910 | #endif |
1911 | |
1912 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0); |
1913 | |
1914 | not_shared_or_upgrade = lck_rw_drain_status(lock, LCK_RW_SHARED_MASK | LCK_RW_WANT_UPGRADE, TRUE); |
1915 | |
1916 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_SPIN_CODE) | DBG_FUNC_END, trace_lck, 0, 0, not_shared_or_upgrade, 0); |
1917 | |
1918 | if (not_shared_or_upgrade) |
1919 | break; |
1920 | /* |
1921 | * if we get here, the deadline has expired w/o us |
1922 | * being able to grab the lock exclusively |
1923 | * check to see if we're allowed to do a thread_block |
1924 | */ |
1925 | word.data = ordered_load_rw(lock); |
1926 | if (word.can_sleep) { |
1927 | |
1928 | istate = lck_interlock_lock(lock); |
1929 | word.data = ordered_load_rw(lock); |
1930 | |
1931 | if (word.shared_count != 0 || word.want_upgrade) { |
1932 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_START, trace_lck, 0, 0, 0, 0); |
1933 | |
1934 | word.w_waiting = 1; |
1935 | ordered_store_rw(lock, word.data); |
1936 | |
1937 | thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockWrite); |
1938 | res = assert_wait(LCK_RW_WRITER_EVENT(lock), |
1939 | THREAD_UNINT | THREAD_WAIT_NOREPORT_USER); |
1940 | lck_interlock_unlock(lock, istate); |
1941 | |
1942 | if (res == THREAD_WAITING) { |
1943 | res = thread_block(THREAD_CONTINUE_NULL); |
1944 | slept++; |
1945 | } |
1946 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_READER_WAIT_CODE) | DBG_FUNC_END, trace_lck, res, slept, 0, 0); |
1947 | } else { |
1948 | lck_interlock_unlock(lock, istate); |
1949 | /* |
1950 | * must own the lock now, since we checked for |
1951 | * readers or upgrade owner behind the interlock |
1952 | * no need for a call to 'lck_rw_drain_status' |
1953 | */ |
1954 | break; |
1955 | } |
1956 | } |
1957 | } |
1958 | |
1959 | #if CONFIG_DTRACE |
1960 | /* |
1961 | * Decide what latencies we suffered that are Dtrace events. |
1962 | * If we have set wait_interval, then we either spun or slept. |
1963 | * At least we get out from under the interlock before we record |
1964 | * which is the best we can do here to minimize the impact |
1965 | * of the tracing. |
1966 | * If we have set wait_interval to -1, then dtrace was not enabled when we |
1967 | * started sleeping/spinning so we don't record this event. |
1968 | */ |
1969 | if (dtrace_ls_enabled == TRUE) { |
1970 | if (slept == 0) { |
1971 | LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lock, |
1972 | mach_absolute_time() - wait_interval, 1); |
1973 | } else { |
1974 | /* |
1975 | * For the blocking case, we also record if when we blocked |
1976 | * it was held for read or write, and how many readers. |
1977 | * Notice that above we recorded this before we dropped |
1978 | * the interlock so the count is accurate. |
1979 | */ |
1980 | LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lock, |
1981 | mach_absolute_time() - wait_interval, 1, |
1982 | (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep); |
1983 | } |
1984 | } |
1985 | LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lock, 1); |
1986 | #endif /* CONFIG_DTRACE */ |
1987 | } |
1988 | |
1989 | /* |
1990 | * Routine: lck_rw_done |
1991 | */ |
1992 | |
1993 | lck_rw_type_t lck_rw_done(lck_rw_t *lock) |
1994 | { |
1995 | uint32_t data, prev; |
1996 | boolean_t once = FALSE; |
1997 | |
1998 | for ( ; ; ) { |
1999 | data = atomic_exchange_begin32(&lock->lck_rw_data, &prev, memory_order_release_smp); |
2000 | if (data & LCK_RW_INTERLOCK) { /* wait for interlock to clear */ |
2001 | #if __SMP__ |
2002 | atomic_exchange_abort(); |
2003 | lck_rw_interlock_spin(lock); |
2004 | continue; |
2005 | #else |
2006 | panic("lck_rw_done(): Interlock locked (%p): %x" , lock, data); |
2007 | #endif // __SMP__ |
2008 | } |
2009 | if (data & LCK_RW_SHARED_MASK) { /* lock is held shared */ |
2010 | assertf(lock->lck_rw_owner == THREAD_NULL, "state=0x%x, owner=%p" , lock->lck_rw_data, lock->lck_rw_owner); |
2011 | data -= LCK_RW_SHARED_READER; |
2012 | if ((data & LCK_RW_SHARED_MASK) == 0) /* if reader count has now gone to 0, check for waiters */ |
2013 | goto check_waiters; |
2014 | } else { /* if reader count == 0, must be exclusive lock */ |
2015 | if (data & LCK_RW_WANT_UPGRADE) { |
2016 | data &= ~(LCK_RW_WANT_UPGRADE); |
2017 | } else { |
2018 | if (data & LCK_RW_WANT_EXCL) |
2019 | data &= ~(LCK_RW_WANT_EXCL); |
2020 | else /* lock is not 'owned', panic */ |
2021 | panic("Releasing non-exclusive RW lock without a reader refcount!" ); |
2022 | } |
2023 | if (!once) { |
2024 | // Only check for holder and clear it once |
2025 | assertf(lock->lck_rw_owner == current_thread(), "state=0x%x, owner=%p" , lock->lck_rw_data, lock->lck_rw_owner); |
2026 | ordered_store_rw_owner(lock, THREAD_NULL); |
2027 | once = TRUE; |
2028 | } |
2029 | check_waiters: |
2030 | /* |
2031 | * test the original values to match what |
2032 | * lck_rw_done_gen is going to do to determine |
2033 | * which wakeups need to happen... |
2034 | * |
2035 | * if !(fake_lck->lck_rw_priv_excl && fake_lck->lck_w_waiting) |
2036 | */ |
2037 | if (prev & LCK_RW_W_WAITING) { |
2038 | data &= ~(LCK_RW_W_WAITING); |
2039 | if ((prev & LCK_RW_PRIV_EXCL) == 0) |
2040 | data &= ~(LCK_RW_R_WAITING); |
2041 | } else |
2042 | data &= ~(LCK_RW_R_WAITING); |
2043 | } |
2044 | if (atomic_exchange_complete32(&lock->lck_rw_data, prev, data, memory_order_release_smp)) |
2045 | break; |
2046 | cpu_pause(); |
2047 | } |
2048 | return lck_rw_done_gen(lock, prev); |
2049 | } |
2050 | |
2051 | /* |
2052 | * Routine: lck_rw_done_gen |
2053 | * |
2054 | * called from the assembly language wrapper... |
2055 | * prior_lock_state is the value in the 1st |
2056 | * word of the lock at the time of a successful |
2057 | * atomic compare and exchange with the new value... |
2058 | * it represents the state of the lock before we |
2059 | * decremented the rw_shared_count or cleared either |
2060 | * rw_want_upgrade or rw_want_write and |
2061 | * the lck_x_waiting bits... since the wrapper |
2062 | * routine has already changed the state atomically, |
2063 | * we just need to decide if we should |
2064 | * wake up anyone and what value to return... we do |
2065 | * this by examining the state of the lock before |
2066 | * we changed it |
2067 | */ |
2068 | static lck_rw_type_t |
2069 | lck_rw_done_gen( |
2070 | lck_rw_t *lck, |
2071 | uint32_t prior_lock_state) |
2072 | { |
2073 | lck_rw_word_t fake_lck; |
2074 | lck_rw_type_t lock_type; |
2075 | thread_t thread; |
2076 | uint32_t rwlock_count; |
2077 | |
2078 | /* |
2079 | * prior_lock state is a snapshot of the 1st word of the |
2080 | * lock in question... we'll fake up a pointer to it |
2081 | * and carefully not access anything beyond whats defined |
2082 | * in the first word of a lck_rw_t |
2083 | */ |
2084 | fake_lck.data = prior_lock_state; |
2085 | |
2086 | if (fake_lck.shared_count <= 1) { |
2087 | if (fake_lck.w_waiting) |
2088 | thread_wakeup(LCK_RW_WRITER_EVENT(lck)); |
2089 | |
2090 | if (!(fake_lck.priv_excl && fake_lck.w_waiting) && fake_lck.r_waiting) |
2091 | thread_wakeup(LCK_RW_READER_EVENT(lck)); |
2092 | } |
2093 | if (fake_lck.shared_count) |
2094 | lock_type = LCK_RW_TYPE_SHARED; |
2095 | else |
2096 | lock_type = LCK_RW_TYPE_EXCLUSIVE; |
2097 | |
2098 | /* Check if dropping the lock means that we need to unpromote */ |
2099 | thread = current_thread(); |
2100 | rwlock_count = thread->rwlock_count--; |
2101 | #if MACH_LDEBUG |
2102 | if (rwlock_count == 0) |
2103 | panic("rw lock count underflow for thread %p" , thread); |
2104 | #endif |
2105 | if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { |
2106 | /* sched_flags checked without lock, but will be rechecked while clearing */ |
2107 | lck_rw_clear_promotion(thread, unslide_for_kdebug(lck)); |
2108 | } |
2109 | #if CONFIG_DTRACE |
2110 | LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, lock_type == LCK_RW_TYPE_SHARED ? 0 : 1); |
2111 | #endif |
2112 | return lock_type; |
2113 | } |
2114 | |
2115 | /* |
2116 | * Routine: lck_rw_lock_shared_gen |
2117 | * Function: |
2118 | * Fast path code has determined that this lock |
2119 | * is held exclusively... this is where we spin/block |
2120 | * until we can acquire the lock in the shared mode |
2121 | */ |
2122 | static void |
2123 | lck_rw_lock_shared_gen( |
2124 | lck_rw_t *lck) |
2125 | { |
2126 | __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck); |
2127 | lck_rw_word_t word; |
2128 | boolean_t gotlock = 0; |
2129 | int slept = 0; |
2130 | wait_result_t res = 0; |
2131 | boolean_t istate; |
2132 | |
2133 | #if CONFIG_DTRACE |
2134 | uint64_t wait_interval = 0; |
2135 | int readers_at_sleep = 0; |
2136 | boolean_t dtrace_ls_initialized = FALSE; |
2137 | boolean_t dtrace_rwl_shared_spin, dtrace_rwl_shared_block, dtrace_ls_enabled = FALSE; |
2138 | #endif /* CONFIG_DTRACE */ |
2139 | |
2140 | while ( !lck_rw_grab(lck, LCK_RW_GRAB_SHARED, FALSE)) { |
2141 | |
2142 | #if CONFIG_DTRACE |
2143 | if (dtrace_ls_initialized == FALSE) { |
2144 | dtrace_ls_initialized = TRUE; |
2145 | dtrace_rwl_shared_spin = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] != 0); |
2146 | dtrace_rwl_shared_block = (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK] != 0); |
2147 | dtrace_ls_enabled = dtrace_rwl_shared_spin || dtrace_rwl_shared_block; |
2148 | if (dtrace_ls_enabled) { |
2149 | /* |
2150 | * Either sleeping or spinning is happening, |
2151 | * start a timing of our delay interval now. |
2152 | */ |
2153 | readers_at_sleep = lck->lck_rw_shared_count; |
2154 | wait_interval = mach_absolute_time(); |
2155 | } |
2156 | } |
2157 | #endif |
2158 | |
2159 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_START, |
2160 | trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, 0, 0); |
2161 | |
2162 | gotlock = lck_rw_grab(lck, LCK_RW_GRAB_SHARED, TRUE); |
2163 | |
2164 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_SPIN_CODE) | DBG_FUNC_END, |
2165 | trace_lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, gotlock, 0); |
2166 | |
2167 | if (gotlock) |
2168 | break; |
2169 | /* |
2170 | * if we get here, the deadline has expired w/o us |
2171 | * being able to grab the lock for read |
2172 | * check to see if we're allowed to do a thread_block |
2173 | */ |
2174 | if (lck->lck_rw_can_sleep) { |
2175 | |
2176 | istate = lck_interlock_lock(lck); |
2177 | |
2178 | word.data = ordered_load_rw(lck); |
2179 | if ((word.want_excl || word.want_upgrade) && |
2180 | ((word.shared_count == 0) || word.priv_excl)) { |
2181 | |
2182 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_START, |
2183 | trace_lck, word.want_excl, word.want_upgrade, 0, 0); |
2184 | |
2185 | word.r_waiting = 1; |
2186 | ordered_store_rw(lck, word.data); |
2187 | |
2188 | thread_set_pending_block_hint(current_thread(), kThreadWaitKernelRWLockRead); |
2189 | res = assert_wait(LCK_RW_READER_EVENT(lck), |
2190 | THREAD_UNINT | THREAD_WAIT_NOREPORT_USER); |
2191 | lck_interlock_unlock(lck, istate); |
2192 | |
2193 | if (res == THREAD_WAITING) { |
2194 | res = thread_block(THREAD_CONTINUE_NULL); |
2195 | slept++; |
2196 | } |
2197 | KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_WAIT_CODE) | DBG_FUNC_END, |
2198 | trace_lck, res, slept, 0, 0); |
2199 | } else { |
2200 | word.shared_count++; |
2201 | ordered_store_rw(lck, word.data); |
2202 | lck_interlock_unlock(lck, istate); |
2203 | break; |
2204 | } |
2205 | } |
2206 | } |
2207 | |
2208 | #if CONFIG_DTRACE |
2209 | if (dtrace_ls_enabled == TRUE) { |
2210 | if (slept == 0) { |
2211 | LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0); |
2212 | } else { |
2213 | LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck, |
2214 | mach_absolute_time() - wait_interval, 0, |
2215 | (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep); |
2216 | } |
2217 | } |
2218 | LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0); |
2219 | #endif /* CONFIG_DTRACE */ |
2220 | } |
2221 | |
2222 | |
2223 | void |
2224 | lck_rw_assert( |
2225 | lck_rw_t *lck, |
2226 | unsigned int type) |
2227 | { |
2228 | switch (type) { |
2229 | case LCK_RW_ASSERT_SHARED: |
2230 | if ((lck->lck_rw_shared_count != 0) && |
2231 | (lck->lck_rw_owner == THREAD_NULL)) { |
2232 | return; |
2233 | } |
2234 | break; |
2235 | case LCK_RW_ASSERT_EXCLUSIVE: |
2236 | if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) && |
2237 | (lck->lck_rw_shared_count == 0) && |
2238 | (lck->lck_rw_owner == current_thread())) { |
2239 | return; |
2240 | } |
2241 | break; |
2242 | case LCK_RW_ASSERT_HELD: |
2243 | if (lck->lck_rw_shared_count != 0) |
2244 | return; // Held shared |
2245 | if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) && |
2246 | (lck->lck_rw_owner == current_thread())) { |
2247 | return; // Held exclusive |
2248 | } |
2249 | break; |
2250 | case LCK_RW_ASSERT_NOTHELD: |
2251 | if ((lck->lck_rw_shared_count == 0) && |
2252 | !(lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) && |
2253 | (lck->lck_rw_owner == THREAD_NULL)) { |
2254 | return; |
2255 | } |
2256 | break; |
2257 | default: |
2258 | break; |
2259 | } |
2260 | panic("rw lock (%p)%s held (mode=%u)" , lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not" ), type); |
2261 | } |
2262 | |
2263 | |
2264 | /* |
2265 | * Routine: kdp_lck_rw_lock_is_acquired_exclusive |
2266 | * NOT SAFE: To be used only by kernel debugger to avoid deadlock. |
2267 | */ |
2268 | boolean_t |
2269 | kdp_lck_rw_lock_is_acquired_exclusive(lck_rw_t *lck) { |
2270 | if (not_in_kdp) { |
2271 | panic("panic: rw lock exclusive check done outside of kernel debugger" ); |
2272 | } |
2273 | return ((lck->lck_rw_want_upgrade || lck->lck_rw_want_excl) && (lck->lck_rw_shared_count == 0)) ? TRUE : FALSE; |
2274 | } |
2275 | |
2276 | /* |
2277 | * The C portion of the mutex package. These routines are only invoked |
2278 | * if the optimized assembler routines can't do the work. |
2279 | */ |
2280 | |
2281 | /* |
2282 | * Forward declaration |
2283 | */ |
2284 | |
2285 | void |
2286 | lck_mtx_ext_init( |
2287 | lck_mtx_ext_t * lck, |
2288 | lck_grp_t * grp, |
2289 | lck_attr_t * attr); |
2290 | |
2291 | /* |
2292 | * Routine: lck_mtx_alloc_init |
2293 | */ |
2294 | lck_mtx_t * |
2295 | lck_mtx_alloc_init( |
2296 | lck_grp_t * grp, |
2297 | lck_attr_t * attr) |
2298 | { |
2299 | lck_mtx_t *lck; |
2300 | |
2301 | if ((lck = (lck_mtx_t *) kalloc(sizeof(lck_mtx_t))) != 0) |
2302 | lck_mtx_init(lck, grp, attr); |
2303 | |
2304 | return (lck); |
2305 | } |
2306 | |
2307 | /* |
2308 | * Routine: lck_mtx_free |
2309 | */ |
2310 | void |
2311 | lck_mtx_free( |
2312 | lck_mtx_t * lck, |
2313 | lck_grp_t * grp) |
2314 | { |
2315 | lck_mtx_destroy(lck, grp); |
2316 | kfree((void *) lck, sizeof(lck_mtx_t)); |
2317 | } |
2318 | |
2319 | /* |
2320 | * Routine: lck_mtx_init |
2321 | */ |
2322 | void |
2323 | lck_mtx_init( |
2324 | lck_mtx_t * lck, |
2325 | lck_grp_t * grp, |
2326 | lck_attr_t * attr) |
2327 | { |
2328 | #ifdef BER_XXX |
2329 | lck_mtx_ext_t *lck_ext; |
2330 | #endif |
2331 | lck_attr_t *lck_attr; |
2332 | |
2333 | if (attr != LCK_ATTR_NULL) |
2334 | lck_attr = attr; |
2335 | else |
2336 | lck_attr = &LockDefaultLckAttr; |
2337 | |
2338 | #ifdef BER_XXX |
2339 | if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) { |
2340 | if ((lck_ext = (lck_mtx_ext_t *) kalloc(sizeof(lck_mtx_ext_t))) != 0) { |
2341 | lck_mtx_ext_init(lck_ext, grp, lck_attr); |
2342 | lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT; |
2343 | lck->lck_mtx_ptr = lck_ext; |
2344 | lck->lck_mtx_type = LCK_MTX_TYPE; |
2345 | } |
2346 | } else |
2347 | #endif |
2348 | { |
2349 | lck->lck_mtx_ptr = NULL; // Clear any padding in the union fields below |
2350 | lck->lck_mtx_waiters = 0; |
2351 | lck->lck_mtx_pri = 0; |
2352 | lck->lck_mtx_type = LCK_MTX_TYPE; |
2353 | ordered_store_mtx(lck, 0); |
2354 | } |
2355 | lck_grp_reference(grp); |
2356 | lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX); |
2357 | } |
2358 | |
2359 | /* |
2360 | * Routine: lck_mtx_init_ext |
2361 | */ |
2362 | void |
2363 | lck_mtx_init_ext( |
2364 | lck_mtx_t * lck, |
2365 | lck_mtx_ext_t * lck_ext, |
2366 | lck_grp_t * grp, |
2367 | lck_attr_t * attr) |
2368 | { |
2369 | lck_attr_t *lck_attr; |
2370 | |
2371 | if (attr != LCK_ATTR_NULL) |
2372 | lck_attr = attr; |
2373 | else |
2374 | lck_attr = &LockDefaultLckAttr; |
2375 | |
2376 | if ((lck_attr->lck_attr_val) & LCK_ATTR_DEBUG) { |
2377 | lck_mtx_ext_init(lck_ext, grp, lck_attr); |
2378 | lck->lck_mtx_tag = LCK_MTX_TAG_INDIRECT; |
2379 | lck->lck_mtx_ptr = lck_ext; |
2380 | lck->lck_mtx_type = LCK_MTX_TYPE; |
2381 | } else { |
2382 | lck->lck_mtx_waiters = 0; |
2383 | lck->lck_mtx_pri = 0; |
2384 | lck->lck_mtx_type = LCK_MTX_TYPE; |
2385 | ordered_store_mtx(lck, 0); |
2386 | } |
2387 | lck_grp_reference(grp); |
2388 | lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX); |
2389 | } |
2390 | |
2391 | /* |
2392 | * Routine: lck_mtx_ext_init |
2393 | */ |
2394 | void |
2395 | lck_mtx_ext_init( |
2396 | lck_mtx_ext_t * lck, |
2397 | lck_grp_t * grp, |
2398 | lck_attr_t * attr) |
2399 | { |
2400 | bzero((void *) lck, sizeof(lck_mtx_ext_t)); |
2401 | |
2402 | lck->lck_mtx.lck_mtx_type = LCK_MTX_TYPE; |
2403 | |
2404 | if ((attr->lck_attr_val) & LCK_ATTR_DEBUG) { |
2405 | lck->lck_mtx_deb.type = MUTEX_TAG; |
2406 | lck->lck_mtx_attr |= LCK_MTX_ATTR_DEBUG; |
2407 | } |
2408 | lck->lck_mtx_grp = grp; |
2409 | |
2410 | if (grp->lck_grp_attr & LCK_GRP_ATTR_STAT) |
2411 | lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT; |
2412 | } |
2413 | |
2414 | /* The slow versions */ |
2415 | static void lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked); |
2416 | static boolean_t lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread); |
2417 | static void lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked); |
2418 | |
2419 | /* |
2420 | * Routine: lck_mtx_verify |
2421 | * |
2422 | * Verify if a mutex is valid |
2423 | */ |
2424 | static inline void |
2425 | lck_mtx_verify(lck_mtx_t *lock) |
2426 | { |
2427 | if (lock->lck_mtx_type != LCK_MTX_TYPE) |
2428 | panic("Invalid mutex %p" , lock); |
2429 | #if DEVELOPMENT || DEBUG |
2430 | if (lock->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) |
2431 | panic("Mutex destroyed %p" , lock); |
2432 | #endif /* DEVELOPMENT || DEBUG */ |
2433 | } |
2434 | |
2435 | /* |
2436 | * Routine: lck_mtx_check_preemption |
2437 | * |
2438 | * Verify preemption is enabled when attempting to acquire a mutex. |
2439 | */ |
2440 | |
2441 | static inline void |
2442 | lck_mtx_check_preemption(lck_mtx_t *lock) |
2443 | { |
2444 | #if DEVELOPMENT || DEBUG |
2445 | int pl = get_preemption_level(); |
2446 | |
2447 | if (pl != 0) |
2448 | panic("Attempt to take mutex with preemption disabled. Lock=%p, level=%d" , lock, pl); |
2449 | #else |
2450 | (void)lock; |
2451 | #endif |
2452 | } |
2453 | |
2454 | /* |
2455 | * Routine: lck_mtx_lock |
2456 | */ |
2457 | void |
2458 | lck_mtx_lock(lck_mtx_t *lock) |
2459 | { |
2460 | thread_t thread; |
2461 | |
2462 | lck_mtx_verify(lock); |
2463 | lck_mtx_check_preemption(lock); |
2464 | thread = current_thread(); |
2465 | if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread), |
2466 | memory_order_acquire_smp, FALSE)) { |
2467 | #if CONFIG_DTRACE |
2468 | LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0); |
2469 | #endif /* CONFIG_DTRACE */ |
2470 | return; |
2471 | } |
2472 | lck_mtx_lock_contended(lock, thread, FALSE); |
2473 | } |
2474 | |
2475 | /* |
2476 | This is the slow version of mutex locking. |
2477 | */ |
2478 | static void NOINLINE |
2479 | lck_mtx_lock_contended(lck_mtx_t *lock, thread_t thread, boolean_t interlocked) |
2480 | { |
2481 | thread_t holding_thread; |
2482 | uintptr_t state; |
2483 | int waiters; |
2484 | |
2485 | if (interlocked) |
2486 | goto interlock_held; |
2487 | |
2488 | /* TODO: short-duration spin for on-core contention <rdar://problem/10234625> */ |
2489 | |
2490 | /* Loop waiting until I see that the mutex is unowned */ |
2491 | for ( ; ; ) { |
2492 | interlock_lock(lock); |
2493 | interlock_held: |
2494 | state = ordered_load_mtx(lock); |
2495 | holding_thread = LCK_MTX_STATE_TO_THREAD(state); |
2496 | if (holding_thread == NULL) |
2497 | break; |
2498 | ordered_store_mtx(lock, (state | LCK_ILOCK | ARM_LCK_WAITERS)); // Set waiters bit and wait |
2499 | lck_mtx_lock_wait(lock, holding_thread); |
2500 | /* returns interlock unlocked */ |
2501 | } |
2502 | |
2503 | /* Hooray, I'm the new owner! */ |
2504 | waiters = lck_mtx_lock_acquire(lock); |
2505 | state = LCK_MTX_THREAD_TO_STATE(thread); |
2506 | if (waiters != 0) |
2507 | state |= ARM_LCK_WAITERS; |
2508 | #if __SMP__ |
2509 | state |= LCK_ILOCK; // Preserve interlock |
2510 | ordered_store_mtx(lock, state); // Set ownership |
2511 | interlock_unlock(lock); // Release interlock, enable preemption |
2512 | #else |
2513 | ordered_store_mtx(lock, state); // Set ownership |
2514 | enable_preemption(); |
2515 | #endif |
2516 | load_memory_barrier(); |
2517 | |
2518 | #if CONFIG_DTRACE |
2519 | LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lock, 0); |
2520 | #endif /* CONFIG_DTRACE */ |
2521 | } |
2522 | |
2523 | /* |
2524 | * Common code for mutex locking as spinlock |
2525 | */ |
2526 | static inline void |
2527 | lck_mtx_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex) |
2528 | { |
2529 | uintptr_t state; |
2530 | |
2531 | interlock_lock(lock); |
2532 | state = ordered_load_mtx(lock); |
2533 | if (LCK_MTX_STATE_TO_THREAD(state)) { |
2534 | if (allow_held_as_mutex) |
2535 | lck_mtx_lock_contended(lock, current_thread(), TRUE); |
2536 | else |
2537 | // "Always" variants can never block. If the lock is held and blocking is not allowed |
2538 | // then someone is mixing always and non-always calls on the same lock, which is |
2539 | // forbidden. |
2540 | panic("Attempting to block on a lock taken as spin-always %p" , lock); |
2541 | return; |
2542 | } |
2543 | state &= ARM_LCK_WAITERS; // Preserve waiters bit |
2544 | state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock |
2545 | ordered_store_mtx(lock, state); |
2546 | load_memory_barrier(); |
2547 | |
2548 | #if CONFIG_DTRACE |
2549 | LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN_ACQUIRE, lock, 0); |
2550 | #endif /* CONFIG_DTRACE */ |
2551 | } |
2552 | |
2553 | /* |
2554 | * Routine: lck_mtx_lock_spin |
2555 | */ |
2556 | void |
2557 | lck_mtx_lock_spin(lck_mtx_t *lock) |
2558 | { |
2559 | lck_mtx_check_preemption(lock); |
2560 | lck_mtx_lock_spin_internal(lock, TRUE); |
2561 | } |
2562 | |
2563 | /* |
2564 | * Routine: lck_mtx_lock_spin_always |
2565 | */ |
2566 | void |
2567 | lck_mtx_lock_spin_always(lck_mtx_t *lock) |
2568 | { |
2569 | lck_mtx_lock_spin_internal(lock, FALSE); |
2570 | } |
2571 | |
2572 | /* |
2573 | * Routine: lck_mtx_try_lock |
2574 | */ |
2575 | boolean_t |
2576 | lck_mtx_try_lock(lck_mtx_t *lock) |
2577 | { |
2578 | thread_t thread = current_thread(); |
2579 | |
2580 | lck_mtx_verify(lock); |
2581 | if (atomic_compare_exchange(&lock->lck_mtx_data, 0, LCK_MTX_THREAD_TO_STATE(thread), |
2582 | memory_order_acquire_smp, FALSE)) { |
2583 | #if CONFIG_DTRACE |
2584 | LOCKSTAT_RECORD(LS_LCK_MTX_TRY_LOCK_ACQUIRE, lock, 0); |
2585 | #endif /* CONFIG_DTRACE */ |
2586 | return TRUE; |
2587 | } |
2588 | return lck_mtx_try_lock_contended(lock, thread); |
2589 | } |
2590 | |
2591 | static boolean_t NOINLINE |
2592 | lck_mtx_try_lock_contended(lck_mtx_t *lock, thread_t thread) |
2593 | { |
2594 | thread_t holding_thread; |
2595 | uintptr_t state; |
2596 | int waiters; |
2597 | |
2598 | #if __SMP__ |
2599 | interlock_lock(lock); |
2600 | state = ordered_load_mtx(lock); |
2601 | holding_thread = LCK_MTX_STATE_TO_THREAD(state); |
2602 | if (holding_thread) { |
2603 | interlock_unlock(lock); |
2604 | return FALSE; |
2605 | } |
2606 | #else |
2607 | disable_preemption_for_thread(thread); |
2608 | state = ordered_load_mtx(lock); |
2609 | if (state & LCK_ILOCK) |
2610 | panic("Unexpected interlock set (%p)" , lock); |
2611 | holding_thread = LCK_MTX_STATE_TO_THREAD(state); |
2612 | if (holding_thread) { |
2613 | enable_preemption(); |
2614 | return FALSE; |
2615 | } |
2616 | state |= LCK_ILOCK; |
2617 | ordered_store_mtx(lock, state); |
2618 | #endif // __SMP__ |
2619 | waiters = lck_mtx_lock_acquire(lock); |
2620 | state = LCK_MTX_THREAD_TO_STATE(thread); |
2621 | if (waiters != 0) |
2622 | state |= ARM_LCK_WAITERS; |
2623 | #if __SMP__ |
2624 | state |= LCK_ILOCK; // Preserve interlock |
2625 | ordered_store_mtx(lock, state); // Set ownership |
2626 | interlock_unlock(lock); // Release interlock, enable preemption |
2627 | #else |
2628 | ordered_store_mtx(lock, state); // Set ownership |
2629 | enable_preemption(); |
2630 | #endif |
2631 | load_memory_barrier(); |
2632 | return TRUE; |
2633 | } |
2634 | |
2635 | static inline boolean_t |
2636 | lck_mtx_try_lock_spin_internal(lck_mtx_t *lock, boolean_t allow_held_as_mutex) |
2637 | { |
2638 | uintptr_t state; |
2639 | |
2640 | if (!interlock_try(lock)) |
2641 | return FALSE; |
2642 | state = ordered_load_mtx(lock); |
2643 | if(LCK_MTX_STATE_TO_THREAD(state)) { |
2644 | // Lock is held as mutex |
2645 | if (allow_held_as_mutex) |
2646 | interlock_unlock(lock); |
2647 | else |
2648 | // "Always" variants can never block. If the lock is held as a normal mutex |
2649 | // then someone is mixing always and non-always calls on the same lock, which is |
2650 | // forbidden. |
2651 | panic("Spin-mutex held as full mutex %p" , lock); |
2652 | return FALSE; |
2653 | } |
2654 | state &= ARM_LCK_WAITERS; // Preserve waiters bit |
2655 | state |= (LCK_MTX_SPIN_TAG | LCK_ILOCK); // Add spin tag and maintain interlock |
2656 | ordered_store_mtx(lock, state); |
2657 | load_memory_barrier(); |
2658 | |
2659 | #if CONFIG_DTRACE |
2660 | LOCKSTAT_RECORD(LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, lock, 0); |
2661 | #endif /* CONFIG_DTRACE */ |
2662 | return TRUE; |
2663 | } |
2664 | |
2665 | /* |
2666 | * Routine: lck_mtx_try_lock_spin |
2667 | */ |
2668 | boolean_t |
2669 | lck_mtx_try_lock_spin(lck_mtx_t *lock) |
2670 | { |
2671 | return lck_mtx_try_lock_spin_internal(lock, TRUE); |
2672 | } |
2673 | |
2674 | /* |
2675 | * Routine: lck_mtx_try_lock_spin_always |
2676 | */ |
2677 | boolean_t |
2678 | lck_mtx_try_lock_spin_always(lck_mtx_t *lock) |
2679 | { |
2680 | return lck_mtx_try_lock_spin_internal(lock, FALSE); |
2681 | } |
2682 | |
2683 | |
2684 | |
2685 | /* |
2686 | * Routine: lck_mtx_unlock |
2687 | */ |
2688 | void |
2689 | lck_mtx_unlock(lck_mtx_t *lock) |
2690 | { |
2691 | thread_t thread = current_thread(); |
2692 | uintptr_t state; |
2693 | boolean_t ilk_held = FALSE; |
2694 | |
2695 | lck_mtx_verify(lock); |
2696 | |
2697 | state = ordered_load_mtx(lock); |
2698 | if (state & LCK_ILOCK) { |
2699 | if(LCK_MTX_STATE_TO_THREAD(state) == (thread_t)LCK_MTX_SPIN_TAG) |
2700 | ilk_held = TRUE; // Interlock is held by (presumably) this thread |
2701 | goto slow_case; |
2702 | } |
2703 | // Locked as a mutex |
2704 | if (atomic_compare_exchange(&lock->lck_mtx_data, LCK_MTX_THREAD_TO_STATE(thread), 0, |
2705 | memory_order_release_smp, FALSE)) { |
2706 | #if CONFIG_DTRACE |
2707 | LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0); |
2708 | #endif /* CONFIG_DTRACE */ |
2709 | return; |
2710 | } |
2711 | slow_case: |
2712 | lck_mtx_unlock_contended(lock, thread, ilk_held); |
2713 | } |
2714 | |
2715 | static void NOINLINE |
2716 | lck_mtx_unlock_contended(lck_mtx_t *lock, thread_t thread, boolean_t ilk_held) |
2717 | { |
2718 | uintptr_t state; |
2719 | |
2720 | if (ilk_held) { |
2721 | state = ordered_load_mtx(lock); |
2722 | } else { |
2723 | #if __SMP__ |
2724 | interlock_lock(lock); |
2725 | state = ordered_load_mtx(lock); |
2726 | if (thread != LCK_MTX_STATE_TO_THREAD(state)) |
2727 | panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)" , lock); |
2728 | #else |
2729 | disable_preemption_for_thread(thread); |
2730 | state = ordered_load_mtx(lock); |
2731 | if (state & LCK_ILOCK) |
2732 | panic("lck_mtx_unlock(): Unexpected interlock set (%p)" , lock); |
2733 | if (thread != LCK_MTX_STATE_TO_THREAD(state)) |
2734 | panic("lck_mtx_unlock(): Attempt to release lock not owned by thread (%p)" , lock); |
2735 | state |= LCK_ILOCK; |
2736 | ordered_store_mtx(lock, state); |
2737 | #endif |
2738 | if (state & ARM_LCK_WAITERS) { |
2739 | lck_mtx_unlock_wakeup(lock, thread); |
2740 | state = ordered_load_mtx(lock); |
2741 | } else { |
2742 | assertf(lock->lck_mtx_pri == 0, "pri=0x%x" , lock->lck_mtx_pri); |
2743 | } |
2744 | } |
2745 | state &= ARM_LCK_WAITERS; /* Clear state, retain waiters bit */ |
2746 | #if __SMP__ |
2747 | state |= LCK_ILOCK; |
2748 | ordered_store_mtx(lock, state); |
2749 | interlock_unlock(lock); |
2750 | #else |
2751 | ordered_store_mtx(lock, state); |
2752 | enable_preemption(); |
2753 | #endif |
2754 | |
2755 | #if CONFIG_DTRACE |
2756 | LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lock, 0); |
2757 | #endif /* CONFIG_DTRACE */ |
2758 | } |
2759 | |
2760 | /* |
2761 | * Routine: lck_mtx_assert |
2762 | */ |
2763 | void |
2764 | lck_mtx_assert(lck_mtx_t *lock, unsigned int type) |
2765 | { |
2766 | thread_t thread, holder; |
2767 | uintptr_t state; |
2768 | |
2769 | state = ordered_load_mtx(lock); |
2770 | holder = LCK_MTX_STATE_TO_THREAD(state); |
2771 | if (holder == (thread_t)LCK_MTX_SPIN_TAG) { |
2772 | // Lock is held in spin mode, owner is unknown. |
2773 | return; // Punt |
2774 | } |
2775 | thread = current_thread(); |
2776 | if (type == LCK_MTX_ASSERT_OWNED) { |
2777 | if (thread != holder) |
2778 | panic("lck_mtx_assert(): mutex (%p) owned" , lock); |
2779 | } else if (type == LCK_MTX_ASSERT_NOTOWNED) { |
2780 | if (thread == holder) |
2781 | panic("lck_mtx_assert(): mutex (%p) not owned" , lock); |
2782 | } else |
2783 | panic("lck_mtx_assert(): invalid arg (%u)" , type); |
2784 | } |
2785 | |
2786 | /* |
2787 | * Routine: lck_mtx_ilk_unlock |
2788 | */ |
2789 | boolean_t |
2790 | lck_mtx_ilk_unlock(lck_mtx_t *lock) |
2791 | { |
2792 | interlock_unlock(lock); |
2793 | return TRUE; |
2794 | } |
2795 | |
2796 | /* |
2797 | * Routine: lck_mtx_convert_spin |
2798 | * |
2799 | * Convert a mutex held for spin into a held full mutex |
2800 | */ |
2801 | void |
2802 | lck_mtx_convert_spin(lck_mtx_t *lock) |
2803 | { |
2804 | thread_t thread = current_thread(); |
2805 | uintptr_t state; |
2806 | int waiters; |
2807 | |
2808 | state = ordered_load_mtx(lock); |
2809 | if (LCK_MTX_STATE_TO_THREAD(state) == thread) |
2810 | return; // Already owned as mutex, return |
2811 | if ((state & LCK_ILOCK) == 0 || (LCK_MTX_STATE_TO_THREAD(state) != (thread_t)LCK_MTX_SPIN_TAG)) |
2812 | panic("lck_mtx_convert_spin: Not held as spinlock (%p)" , lock); |
2813 | state &= ~(LCK_MTX_THREAD_MASK); // Clear the spin tag |
2814 | ordered_store_mtx(lock, state); |
2815 | waiters = lck_mtx_lock_acquire(lock); // Acquire to manage priority boosts |
2816 | state = LCK_MTX_THREAD_TO_STATE(thread); |
2817 | if (waiters != 0) |
2818 | state |= ARM_LCK_WAITERS; |
2819 | #if __SMP__ |
2820 | state |= LCK_ILOCK; |
2821 | ordered_store_mtx(lock, state); // Set ownership |
2822 | interlock_unlock(lock); // Release interlock, enable preemption |
2823 | #else |
2824 | ordered_store_mtx(lock, state); // Set ownership |
2825 | enable_preemption(); |
2826 | #endif |
2827 | } |
2828 | |
2829 | |
2830 | /* |
2831 | * Routine: lck_mtx_destroy |
2832 | */ |
2833 | void |
2834 | lck_mtx_destroy( |
2835 | lck_mtx_t * lck, |
2836 | lck_grp_t * grp) |
2837 | { |
2838 | if (lck->lck_mtx_type != LCK_MTX_TYPE) |
2839 | panic("Destroying invalid mutex %p" , lck); |
2840 | if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) |
2841 | panic("Destroying previously destroyed lock %p" , lck); |
2842 | lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED); |
2843 | lck->lck_mtx_tag = LCK_MTX_TAG_DESTROYED; |
2844 | lck_grp_lckcnt_decr(grp, LCK_TYPE_MTX); |
2845 | lck_grp_deallocate(grp); |
2846 | return; |
2847 | } |
2848 | |
2849 | /* |
2850 | * Routine: lck_spin_assert |
2851 | */ |
2852 | void |
2853 | lck_spin_assert(lck_spin_t *lock, unsigned int type) |
2854 | { |
2855 | thread_t thread, holder; |
2856 | uintptr_t state; |
2857 | |
2858 | if (lock->type != LCK_SPIN_TYPE) |
2859 | panic("Invalid spinlock %p" , lock); |
2860 | |
2861 | state = lock->lck_spin_data; |
2862 | holder = (thread_t)(state & ~LCK_ILOCK); |
2863 | thread = current_thread(); |
2864 | if (type == LCK_ASSERT_OWNED) { |
2865 | if (holder == 0) |
2866 | panic("Lock not owned %p = %lx" , lock, state); |
2867 | if (holder != thread) |
2868 | panic("Lock not owned by current thread %p = %lx" , lock, state); |
2869 | if ((state & LCK_ILOCK) == 0) |
2870 | panic("Lock bit not set %p = %lx" , lock, state); |
2871 | } else if (type == LCK_ASSERT_NOTOWNED) { |
2872 | if (holder != 0) { |
2873 | if (holder == thread) |
2874 | panic("Lock owned by current thread %p = %lx" , lock, state); |
2875 | else |
2876 | panic("Lock %p owned by thread %p" , lock, holder); |
2877 | } |
2878 | if (state & LCK_ILOCK) |
2879 | panic("Lock bit set %p = %lx" , lock, state); |
2880 | } else |
2881 | panic("lck_spin_assert(): invalid arg (%u)" , type); |
2882 | } |
2883 | |
2884 | boolean_t |
2885 | lck_rw_lock_yield_shared(lck_rw_t *lck, boolean_t force_yield) |
2886 | { |
2887 | lck_rw_word_t word; |
2888 | |
2889 | lck_rw_assert(lck, LCK_RW_ASSERT_SHARED); |
2890 | |
2891 | word.data = ordered_load_rw(lck); |
2892 | if (word.want_excl || word.want_upgrade || force_yield) { |
2893 | lck_rw_unlock_shared(lck); |
2894 | mutex_pause(2); |
2895 | lck_rw_lock_shared(lck); |
2896 | return TRUE; |
2897 | } |
2898 | |
2899 | return FALSE; |
2900 | } |
2901 | |
2902 | /* |
2903 | * Routine: kdp_lck_mtx_lock_spin_is_acquired |
2904 | * NOT SAFE: To be used only by kernel debugger to avoid deadlock. |
2905 | */ |
2906 | boolean_t |
2907 | kdp_lck_mtx_lock_spin_is_acquired(lck_mtx_t *lck) |
2908 | { |
2909 | uintptr_t state; |
2910 | |
2911 | if (not_in_kdp) { |
2912 | panic("panic: spinlock acquired check done outside of kernel debugger" ); |
2913 | } |
2914 | state = ordered_load_mtx(lck); |
2915 | if (state == LCK_MTX_TAG_DESTROYED) |
2916 | return FALSE; |
2917 | if (LCK_MTX_STATE_TO_THREAD(state) || (state & LCK_ILOCK)) |
2918 | return TRUE; |
2919 | return FALSE; |
2920 | } |
2921 | |
2922 | void |
2923 | kdp_lck_mtx_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo) |
2924 | { |
2925 | lck_mtx_t * mutex = LCK_EVENT_TO_MUTEX(event); |
2926 | waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(mutex); |
2927 | uintptr_t state = ordered_load_mtx(mutex); |
2928 | thread_t holder = LCK_MTX_STATE_TO_THREAD(state); |
2929 | if ((uintptr_t)holder == (uintptr_t)LCK_MTX_SPIN_TAG) { |
2930 | waitinfo->owner = STACKSHOT_WAITOWNER_MTXSPIN; |
2931 | } else { |
2932 | assertf(state != (uintptr_t)LCK_MTX_TAG_DESTROYED, "state=0x%llx" , (uint64_t)state); |
2933 | assertf(state != (uintptr_t)LCK_MTX_TAG_INDIRECT, "state=0x%llx" , (uint64_t)state); |
2934 | waitinfo->owner = thread_tid(holder); |
2935 | } |
2936 | } |
2937 | |
2938 | void |
2939 | kdp_rwlck_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo) |
2940 | { |
2941 | lck_rw_t *rwlck = NULL; |
2942 | switch(waitinfo->wait_type) { |
2943 | case kThreadWaitKernelRWLockRead: |
2944 | rwlck = READ_EVENT_TO_RWLOCK(event); |
2945 | break; |
2946 | case kThreadWaitKernelRWLockWrite: |
2947 | case kThreadWaitKernelRWLockUpgrade: |
2948 | rwlck = WRITE_EVENT_TO_RWLOCK(event); |
2949 | break; |
2950 | default: |
2951 | panic("%s was called with an invalid blocking type" , __FUNCTION__); |
2952 | break; |
2953 | } |
2954 | waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(rwlck); |
2955 | waitinfo->owner = thread_tid(rwlck->lck_rw_owner); |
2956 | } |
2957 | |