1/*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57#define ATOMIC_PRIVATE 1
58#define LOCK_PRIVATE 1
59
60#include <mach_ldebug.h>
61#include <debug.h>
62
63#include <mach/kern_return.h>
64#include <mach/mach_host_server.h>
65#include <mach_debug/lockgroup_info.h>
66
67#include <kern/locks.h>
68#include <kern/misc_protos.h>
69#include <kern/kalloc.h>
70#include <kern/thread.h>
71#include <kern/processor.h>
72#include <kern/sched_prim.h>
73#include <kern/debug.h>
74#include <libkern/section_keywords.h>
75#include <machine/atomic.h>
76#include <machine/machine_cpu.h>
77#include <string.h>
78
79#include <sys/kdebug.h>
80
81#if CONFIG_DTRACE
82/*
83 * We need only enough declarations from the BSD-side to be able to
84 * test if our probe is active, and to call __dtrace_probe(). Setting
85 * NEED_DTRACE_DEFS gets a local copy of those definitions pulled in.
86 */
87#define NEED_DTRACE_DEFS
88#include <../bsd/sys/lockstat.h>
89#endif
90
91#define LCK_MTX_SLEEP_CODE 0
92#define LCK_MTX_SLEEP_DEADLINE_CODE 1
93#define LCK_MTX_LCK_WAIT_CODE 2
94#define LCK_MTX_UNLCK_WAKEUP_CODE 3
95
96#if MACH_LDEBUG
97#define ALIGN_TEST(p,t) do{if((uintptr_t)p&(sizeof(t)-1)) __builtin_trap();}while(0)
98#else
99#define ALIGN_TEST(p,t) do{}while(0)
100#endif
101
102/* Silence the volatile to _Atomic cast warning */
103#define ATOMIC_CAST(t,p) ((_Atomic t*)(uintptr_t)(p))
104
105/* Enforce program order of loads and stores. */
106#define ordered_load(target, type) \
107 __c11_atomic_load((_Atomic type *)(target), memory_order_relaxed)
108#define ordered_store(target, type, value) \
109 __c11_atomic_store((_Atomic type *)(target), value, memory_order_relaxed)
110
111#define ordered_load_hw(lock) ordered_load(&(lock)->lock_data, uintptr_t)
112#define ordered_store_hw(lock, value) ordered_store(&(lock)->lock_data, uintptr_t, (value))
113
114#define NOINLINE __attribute__((noinline))
115
116
117static queue_head_t lck_grp_queue;
118static unsigned int lck_grp_cnt;
119
120decl_lck_mtx_data(static,lck_grp_lock)
121static lck_mtx_ext_t lck_grp_lock_ext;
122
123SECURITY_READ_ONLY_LATE(boolean_t) spinlock_timeout_panic = TRUE;
124
125lck_grp_attr_t LockDefaultGroupAttr;
126lck_grp_t LockCompatGroup;
127lck_attr_t LockDefaultLckAttr;
128
129#if CONFIG_DTRACE && __SMP__
130#if defined (__x86_64__)
131uint64_t dtrace_spin_threshold = 500; // 500ns
132#elif defined(__arm__) || defined(__arm64__)
133uint64_t dtrace_spin_threshold = LOCK_PANIC_TIMEOUT / 1000000; // 500ns
134#endif
135#endif
136
137uintptr_t
138unslide_for_kdebug(void* object) {
139 if (__improbable(kdebug_enable))
140 return VM_KERNEL_UNSLIDE_OR_PERM(object);
141 else
142 return 0;
143}
144
145/*
146 * Routine: lck_mod_init
147 */
148
149void
150lck_mod_init(
151 void)
152{
153 /*
154 * Obtain "lcks" options:this currently controls lock statistics
155 */
156 if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts)))
157 LcksOpts = 0;
158
159
160#if (DEVELOPMENT || DEBUG) && defined(__x86_64__)
161 if (!PE_parse_boot_argn("-disable_mtx_chk", &LckDisablePreemptCheck, sizeof (LckDisablePreemptCheck)))
162 LckDisablePreemptCheck = 0;
163#endif /* (DEVELOPMENT || DEBUG) && defined(__x86_64__) */
164
165 queue_init(&lck_grp_queue);
166
167 /*
168 * Need to bootstrap the LockCompatGroup instead of calling lck_grp_init() here. This avoids
169 * grabbing the lck_grp_lock before it is initialized.
170 */
171
172 bzero(&LockCompatGroup, sizeof(lck_grp_t));
173 (void) strncpy(LockCompatGroup.lck_grp_name, "Compatibility APIs", LCK_GRP_MAX_NAME);
174
175 if (LcksOpts & enaLkStat)
176 LockCompatGroup.lck_grp_attr = LCK_GRP_ATTR_STAT;
177 else
178 LockCompatGroup.lck_grp_attr = LCK_ATTR_NONE;
179
180 LockCompatGroup.lck_grp_refcnt = 1;
181
182 enqueue_tail(&lck_grp_queue, (queue_entry_t)&LockCompatGroup);
183 lck_grp_cnt = 1;
184
185 lck_grp_attr_setdefault(&LockDefaultGroupAttr);
186 lck_attr_setdefault(&LockDefaultLckAttr);
187
188 lck_mtx_init_ext(&lck_grp_lock, &lck_grp_lock_ext, &LockCompatGroup, &LockDefaultLckAttr);
189}
190
191/*
192 * Routine: lck_grp_attr_alloc_init
193 */
194
195lck_grp_attr_t *
196lck_grp_attr_alloc_init(
197 void)
198{
199 lck_grp_attr_t *attr;
200
201 if ((attr = (lck_grp_attr_t *)kalloc(sizeof(lck_grp_attr_t))) != 0)
202 lck_grp_attr_setdefault(attr);
203
204 return(attr);
205}
206
207
208/*
209 * Routine: lck_grp_attr_setdefault
210 */
211
212void
213lck_grp_attr_setdefault(
214 lck_grp_attr_t *attr)
215{
216 if (LcksOpts & enaLkStat)
217 attr->grp_attr_val = LCK_GRP_ATTR_STAT;
218 else
219 attr->grp_attr_val = 0;
220}
221
222
223/*
224 * Routine: lck_grp_attr_setstat
225 */
226
227void
228lck_grp_attr_setstat(
229 lck_grp_attr_t *attr)
230{
231 (void)hw_atomic_or(&attr->grp_attr_val, LCK_GRP_ATTR_STAT);
232}
233
234
235/*
236 * Routine: lck_grp_attr_free
237 */
238
239void
240lck_grp_attr_free(
241 lck_grp_attr_t *attr)
242{
243 kfree(attr, sizeof(lck_grp_attr_t));
244}
245
246
247/*
248 * Routine: lck_grp_alloc_init
249 */
250
251lck_grp_t *
252lck_grp_alloc_init(
253 const char* grp_name,
254 lck_grp_attr_t *attr)
255{
256 lck_grp_t *grp;
257
258 if ((grp = (lck_grp_t *)kalloc(sizeof(lck_grp_t))) != 0)
259 lck_grp_init(grp, grp_name, attr);
260
261 return(grp);
262}
263
264/*
265 * Routine: lck_grp_init
266 */
267
268void
269lck_grp_init(lck_grp_t * grp, const char * grp_name, lck_grp_attr_t * attr)
270{
271 /* make sure locking infrastructure has been initialized */
272 assert(lck_grp_cnt > 0);
273
274 bzero((void *)grp, sizeof(lck_grp_t));
275
276 (void)strlcpy(grp->lck_grp_name, grp_name, LCK_GRP_MAX_NAME);
277
278 if (attr != LCK_GRP_ATTR_NULL)
279 grp->lck_grp_attr = attr->grp_attr_val;
280 else if (LcksOpts & enaLkStat)
281 grp->lck_grp_attr = LCK_GRP_ATTR_STAT;
282 else
283 grp->lck_grp_attr = LCK_ATTR_NONE;
284
285 grp->lck_grp_refcnt = 1;
286
287 lck_mtx_lock(&lck_grp_lock);
288 enqueue_tail(&lck_grp_queue, (queue_entry_t)grp);
289 lck_grp_cnt++;
290 lck_mtx_unlock(&lck_grp_lock);
291}
292
293/*
294 * Routine: lck_grp_free
295 */
296
297void
298lck_grp_free(
299 lck_grp_t *grp)
300{
301 lck_mtx_lock(&lck_grp_lock);
302 lck_grp_cnt--;
303 (void)remque((queue_entry_t)grp);
304 lck_mtx_unlock(&lck_grp_lock);
305 lck_grp_deallocate(grp);
306}
307
308
309/*
310 * Routine: lck_grp_reference
311 */
312
313void
314lck_grp_reference(
315 lck_grp_t *grp)
316{
317 (void)hw_atomic_add(&grp->lck_grp_refcnt, 1);
318}
319
320
321/*
322 * Routine: lck_grp_deallocate
323 */
324
325void
326lck_grp_deallocate(
327 lck_grp_t *grp)
328{
329 if (hw_atomic_sub(&grp->lck_grp_refcnt, 1) == 0)
330 kfree(grp, sizeof(lck_grp_t));
331}
332
333/*
334 * Routine: lck_grp_lckcnt_incr
335 */
336
337void
338lck_grp_lckcnt_incr(
339 lck_grp_t *grp,
340 lck_type_t lck_type)
341{
342 unsigned int *lckcnt;
343
344 switch (lck_type) {
345 case LCK_TYPE_SPIN:
346 lckcnt = &grp->lck_grp_spincnt;
347 break;
348 case LCK_TYPE_MTX:
349 lckcnt = &grp->lck_grp_mtxcnt;
350 break;
351 case LCK_TYPE_RW:
352 lckcnt = &grp->lck_grp_rwcnt;
353 break;
354 default:
355 return panic("lck_grp_lckcnt_incr(): invalid lock type: %d\n", lck_type);
356 }
357
358 (void)hw_atomic_add(lckcnt, 1);
359}
360
361/*
362 * Routine: lck_grp_lckcnt_decr
363 */
364
365void
366lck_grp_lckcnt_decr(
367 lck_grp_t *grp,
368 lck_type_t lck_type)
369{
370 unsigned int *lckcnt;
371 int updated;
372
373 switch (lck_type) {
374 case LCK_TYPE_SPIN:
375 lckcnt = &grp->lck_grp_spincnt;
376 break;
377 case LCK_TYPE_MTX:
378 lckcnt = &grp->lck_grp_mtxcnt;
379 break;
380 case LCK_TYPE_RW:
381 lckcnt = &grp->lck_grp_rwcnt;
382 break;
383 default:
384 panic("lck_grp_lckcnt_decr(): invalid lock type: %d\n", lck_type);
385 return;
386 }
387
388 updated = (int)hw_atomic_sub(lckcnt, 1);
389 assert(updated >= 0);
390}
391
392/*
393 * Routine: lck_attr_alloc_init
394 */
395
396lck_attr_t *
397lck_attr_alloc_init(
398 void)
399{
400 lck_attr_t *attr;
401
402 if ((attr = (lck_attr_t *)kalloc(sizeof(lck_attr_t))) != 0)
403 lck_attr_setdefault(attr);
404
405 return(attr);
406}
407
408
409/*
410 * Routine: lck_attr_setdefault
411 */
412
413void
414lck_attr_setdefault(
415 lck_attr_t *attr)
416{
417#if __arm__ || __arm64__
418 /* <rdar://problem/4404579>: Using LCK_ATTR_DEBUG here causes panic at boot time for arm */
419 attr->lck_attr_val = LCK_ATTR_NONE;
420#elif __i386__ || __x86_64__
421#if !DEBUG
422 if (LcksOpts & enaLkDeb)
423 attr->lck_attr_val = LCK_ATTR_DEBUG;
424 else
425 attr->lck_attr_val = LCK_ATTR_NONE;
426#else
427 attr->lck_attr_val = LCK_ATTR_DEBUG;
428#endif /* !DEBUG */
429#else
430#error Unknown architecture.
431#endif /* __arm__ */
432}
433
434
435/*
436 * Routine: lck_attr_setdebug
437 */
438void
439lck_attr_setdebug(
440 lck_attr_t *attr)
441{
442 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_DEBUG);
443}
444
445/*
446 * Routine: lck_attr_setdebug
447 */
448void
449lck_attr_cleardebug(
450 lck_attr_t *attr)
451{
452 (void)hw_atomic_and(&attr->lck_attr_val, ~LCK_ATTR_DEBUG);
453}
454
455
456/*
457 * Routine: lck_attr_rw_shared_priority
458 */
459void
460lck_attr_rw_shared_priority(
461 lck_attr_t *attr)
462{
463 (void)hw_atomic_or(&attr->lck_attr_val, LCK_ATTR_RW_SHARED_PRIORITY);
464}
465
466
467/*
468 * Routine: lck_attr_free
469 */
470void
471lck_attr_free(
472 lck_attr_t *attr)
473{
474 kfree(attr, sizeof(lck_attr_t));
475}
476
477/*
478 * Routine: hw_lock_init
479 *
480 * Initialize a hardware lock.
481 */
482void
483hw_lock_init(hw_lock_t lock)
484{
485 ordered_store_hw(lock, 0);
486}
487
488/*
489 * Routine: hw_lock_lock_contended
490 *
491 * Spin until lock is acquired or timeout expires.
492 * timeout is in mach_absolute_time ticks. Called with
493 * preemption disabled.
494 */
495
496#if __SMP__
497static unsigned int NOINLINE
498hw_lock_lock_contended(hw_lock_t lock, uintptr_t data, uint64_t timeout, boolean_t do_panic)
499{
500 uint64_t end = 0;
501 uintptr_t holder = lock->lock_data;
502 int i;
503
504 if (timeout == 0)
505 timeout = LOCK_PANIC_TIMEOUT;
506#if CONFIG_DTRACE
507 uint64_t begin;
508 boolean_t dtrace_enabled = lockstat_probemap[LS_LCK_SPIN_LOCK_SPIN] != 0;
509 if (__improbable(dtrace_enabled))
510 begin = mach_absolute_time();
511#endif
512 for ( ; ; ) {
513 for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
514 cpu_pause();
515#if (!__ARM_ENABLE_WFE_) || (LOCK_PRETEST)
516 holder = ordered_load_hw(lock);
517 if (holder != 0)
518 continue;
519#endif
520 if (atomic_compare_exchange(&lock->lock_data, 0, data,
521 memory_order_acquire_smp, TRUE)) {
522#if CONFIG_DTRACE
523 if (__improbable(dtrace_enabled)) {
524 uint64_t spintime = mach_absolute_time() - begin;
525 if (spintime > dtrace_spin_threshold)
526 LOCKSTAT_RECORD2(LS_LCK_SPIN_LOCK_SPIN, lock, spintime, dtrace_spin_threshold);
527 }
528#endif
529 return 1;
530 }
531 }
532 if (end == 0) {
533 end = ml_get_timebase() + timeout;
534 }
535 else if (ml_get_timebase() >= end)
536 break;
537 }
538 if (do_panic) {
539 // Capture the actual time spent blocked, which may be higher than the timeout
540 // if a misbehaving interrupt stole this thread's CPU time.
541 panic("Spinlock timeout after %llu ticks, %p = %lx",
542 (ml_get_timebase() - end + timeout), lock, holder);
543 }
544 return 0;
545}
546#endif // __SMP__
547
548static inline void
549hw_lock_lock_internal(hw_lock_t lock, thread_t thread)
550{
551 uintptr_t state;
552
553 state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
554#if __SMP__
555
556#if LOCK_PRETEST
557 if (ordered_load_hw(lock))
558 goto contended;
559#endif // LOCK_PRETEST
560 if (atomic_compare_exchange(&lock->lock_data, 0, state,
561 memory_order_acquire_smp, TRUE)) {
562 goto end;
563 }
564#if LOCK_PRETEST
565contended:
566#endif // LOCK_PRETEST
567 hw_lock_lock_contended(lock, state, 0, spinlock_timeout_panic);
568end:
569#else // __SMP__
570 if (lock->lock_data)
571 panic("Spinlock held %p", lock);
572 lock->lock_data = state;
573#endif // __SMP__
574#if CONFIG_DTRACE
575 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0);
576#endif
577 return;
578}
579
580/*
581 * Routine: hw_lock_lock
582 *
583 * Acquire lock, spinning until it becomes available,
584 * return with preemption disabled.
585 */
586void
587hw_lock_lock(hw_lock_t lock)
588{
589 thread_t thread = current_thread();
590 disable_preemption_for_thread(thread);
591 hw_lock_lock_internal(lock, thread);
592}
593
594/*
595 * Routine: hw_lock_lock_nopreempt
596 *
597 * Acquire lock, spinning until it becomes available.
598 */
599void
600hw_lock_lock_nopreempt(hw_lock_t lock)
601{
602 thread_t thread = current_thread();
603 if (__improbable(!preemption_disabled_for_thread(thread)))
604 panic("Attempt to take no-preempt spinlock %p in preemptible context", lock);
605 hw_lock_lock_internal(lock, thread);
606}
607
608/*
609 * Routine: hw_lock_to
610 *
611 * Acquire lock, spinning until it becomes available or timeout.
612 * Timeout is in mach_absolute_time ticks, return with
613 * preemption disabled.
614 */
615unsigned int
616hw_lock_to(hw_lock_t lock, uint64_t timeout)
617{
618 thread_t thread;
619 uintptr_t state;
620 unsigned int success = 0;
621
622 thread = current_thread();
623 disable_preemption_for_thread(thread);
624 state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
625#if __SMP__
626
627#if LOCK_PRETEST
628 if (ordered_load_hw(lock))
629 goto contended;
630#endif // LOCK_PRETEST
631 if (atomic_compare_exchange(&lock->lock_data, 0, state,
632 memory_order_acquire_smp, TRUE)) {
633 success = 1;
634 goto end;
635 }
636#if LOCK_PRETEST
637contended:
638#endif // LOCK_PRETEST
639 success = hw_lock_lock_contended(lock, state, timeout, FALSE);
640end:
641#else // __SMP__
642 (void)timeout;
643 if (ordered_load_hw(lock) == 0) {
644 ordered_store_hw(lock, state);
645 success = 1;
646 }
647#endif // __SMP__
648#if CONFIG_DTRACE
649 if (success)
650 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0);
651#endif
652 return success;
653}
654
655/*
656 * Routine: hw_lock_try
657 *
658 * returns with preemption disabled on success.
659 */
660static inline unsigned int
661hw_lock_try_internal(hw_lock_t lock, thread_t thread)
662{
663 int success = 0;
664
665#if __SMP__
666#if LOCK_PRETEST
667 if (ordered_load_hw(lock))
668 goto failed;
669#endif // LOCK_PRETEST
670 success = atomic_compare_exchange(&lock->lock_data, 0, LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK,
671 memory_order_acquire_smp, FALSE);
672#else
673 if (lock->lock_data == 0) {
674 lock->lock_data = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
675 success = 1;
676 }
677#endif // __SMP__
678
679#if LOCK_PRETEST
680failed:
681#endif // LOCK_PRETEST
682#if CONFIG_DTRACE
683 if (success)
684 LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0);
685#endif
686 return success;
687}
688
689unsigned int
690hw_lock_try(hw_lock_t lock)
691{
692 thread_t thread = current_thread();
693 disable_preemption_for_thread(thread);
694 unsigned int success = hw_lock_try_internal(lock, thread);
695 if (!success)
696 enable_preemption();
697 return success;
698}
699
700unsigned int
701hw_lock_try_nopreempt(hw_lock_t lock)
702{
703 thread_t thread = current_thread();
704 if (__improbable(!preemption_disabled_for_thread(thread)))
705 panic("Attempt to test no-preempt spinlock %p in preemptible context", lock);
706 return hw_lock_try_internal(lock, thread);
707}
708
709/*
710 * Routine: hw_lock_unlock
711 *
712 * Unconditionally release lock, release preemption level.
713 */
714static inline void
715hw_lock_unlock_internal(hw_lock_t lock)
716{
717 __c11_atomic_store((_Atomic uintptr_t *)&lock->lock_data, 0, memory_order_release_smp);
718#if __arm__ || __arm64__
719 // ARM tests are only for open-source exclusion
720 set_event();
721#endif // __arm__ || __arm64__
722#if CONFIG_DTRACE
723 LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
724#endif /* CONFIG_DTRACE */
725}
726
727void
728hw_lock_unlock(hw_lock_t lock)
729{
730 hw_lock_unlock_internal(lock);
731 enable_preemption();
732}
733
734void
735hw_lock_unlock_nopreempt(hw_lock_t lock)
736{
737 if (__improbable(!preemption_disabled_for_thread(current_thread())))
738 panic("Attempt to release no-preempt spinlock %p in preemptible context", lock);
739 hw_lock_unlock_internal(lock);
740}
741
742/*
743 * Routine hw_lock_held, doesn't change preemption state.
744 * N.B. Racy, of course.
745 */
746unsigned int
747hw_lock_held(hw_lock_t lock)
748{
749 return (ordered_load_hw(lock) != 0);
750}
751
752/*
753 * Routine: lck_spin_sleep
754 */
755wait_result_t
756lck_spin_sleep(
757 lck_spin_t *lck,
758 lck_sleep_action_t lck_sleep_action,
759 event_t event,
760 wait_interrupt_t interruptible)
761{
762 wait_result_t res;
763
764 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
765 panic("Invalid lock sleep action %x\n", lck_sleep_action);
766
767 res = assert_wait(event, interruptible);
768 if (res == THREAD_WAITING) {
769 lck_spin_unlock(lck);
770 res = thread_block(THREAD_CONTINUE_NULL);
771 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
772 lck_spin_lock(lck);
773 }
774 else
775 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
776 lck_spin_unlock(lck);
777
778 return res;
779}
780
781
782/*
783 * Routine: lck_spin_sleep_deadline
784 */
785wait_result_t
786lck_spin_sleep_deadline(
787 lck_spin_t *lck,
788 lck_sleep_action_t lck_sleep_action,
789 event_t event,
790 wait_interrupt_t interruptible,
791 uint64_t deadline)
792{
793 wait_result_t res;
794
795 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
796 panic("Invalid lock sleep action %x\n", lck_sleep_action);
797
798 res = assert_wait_deadline(event, interruptible, deadline);
799 if (res == THREAD_WAITING) {
800 lck_spin_unlock(lck);
801 res = thread_block(THREAD_CONTINUE_NULL);
802 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
803 lck_spin_lock(lck);
804 }
805 else
806 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
807 lck_spin_unlock(lck);
808
809 return res;
810}
811
812/*
813 * Routine: lck_mtx_sleep
814 */
815wait_result_t
816lck_mtx_sleep(
817 lck_mtx_t *lck,
818 lck_sleep_action_t lck_sleep_action,
819 event_t event,
820 wait_interrupt_t interruptible)
821{
822 wait_result_t res;
823 thread_t thread = current_thread();
824
825 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
826 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
827
828 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
829 panic("Invalid lock sleep action %x\n", lck_sleep_action);
830
831 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
832 /*
833 * We overload the RW lock promotion to give us a priority ceiling
834 * during the time that this thread is asleep, so that when it
835 * is re-awakened (and not yet contending on the mutex), it is
836 * runnable at a reasonably high priority.
837 */
838 thread->rwlock_count++;
839 }
840
841 res = assert_wait(event, interruptible);
842 if (res == THREAD_WAITING) {
843 lck_mtx_unlock(lck);
844 res = thread_block(THREAD_CONTINUE_NULL);
845 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
846 if ((lck_sleep_action & LCK_SLEEP_SPIN))
847 lck_mtx_lock_spin(lck);
848 else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS))
849 lck_mtx_lock_spin_always(lck);
850 else
851 lck_mtx_lock(lck);
852 }
853 }
854 else
855 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
856 lck_mtx_unlock(lck);
857
858 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
859 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
860 /* sched_flags checked without lock, but will be rechecked while clearing */
861 lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
862 }
863 }
864
865 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
866
867 return res;
868}
869
870
871/*
872 * Routine: lck_mtx_sleep_deadline
873 */
874wait_result_t
875lck_mtx_sleep_deadline(
876 lck_mtx_t *lck,
877 lck_sleep_action_t lck_sleep_action,
878 event_t event,
879 wait_interrupt_t interruptible,
880 uint64_t deadline)
881{
882 wait_result_t res;
883 thread_t thread = current_thread();
884
885 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
886 VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);
887
888 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
889 panic("Invalid lock sleep action %x\n", lck_sleep_action);
890
891 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
892 /*
893 * See lck_mtx_sleep().
894 */
895 thread->rwlock_count++;
896 }
897
898 res = assert_wait_deadline(event, interruptible, deadline);
899 if (res == THREAD_WAITING) {
900 lck_mtx_unlock(lck);
901 res = thread_block(THREAD_CONTINUE_NULL);
902 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
903 if ((lck_sleep_action & LCK_SLEEP_SPIN))
904 lck_mtx_lock_spin(lck);
905 else
906 lck_mtx_lock(lck);
907 }
908 }
909 else
910 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
911 lck_mtx_unlock(lck);
912
913 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
914 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
915 /* sched_flags checked without lock, but will be rechecked while clearing */
916 lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
917 }
918 }
919
920 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);
921
922 return res;
923}
924
925/*
926 * Lock Boosting Invariants:
927 *
928 * The lock owner is always promoted to the max priority of all its waiters.
929 * Max priority is capped at MAXPRI_PROMOTE.
930 *
931 * lck_mtx_pri being set implies that the lock owner is promoted to at least lck_mtx_pri
932 * This prevents the thread from dropping in priority while holding a mutex
933 * (note: Intel locks currently don't do this, to avoid thread lock churn)
934 *
935 * thread->promotions has a +1 for every mutex currently promoting the thread
936 * and 1 for was_promoted_on_wakeup being set.
937 * TH_SFLAG_PROMOTED is set on a thread whenever it has any promotions
938 * from any mutex (i.e. thread->promotions != 0)
939 *
940 * was_promoted_on_wakeup is set on a thread which is woken up by a mutex when
941 * it raises the priority of the woken thread to match lck_mtx_pri.
942 * It can be set for multiple iterations of wait, fail to acquire, re-wait, etc
943 * was_promoted_on_wakeup being set always implies a +1 promotions count.
944 *
945 * The last waiter is not given a promotion when it wakes up or acquires the lock.
946 * When the last waiter is waking up, a new contender can always come in and
947 * steal the lock without having to wait for the last waiter to make forward progress.
948 *
949 * lck_mtx_waiters has a +1 for every waiter currently between wait and acquire
950 * This prevents us from asserting that every wakeup wakes up a thread.
951 * This also causes excess thread_wakeup calls in the unlock path.
952 * It can only be fooled into thinking there are more waiters than are
953 * actually blocked, not less.
954 * It does allows us to reduce the complexity of the lock state.
955 *
956 * This also means that a starved bg thread as the last waiter could end up
957 * keeping the lock in the contended state for a long period of time, which
958 * may keep lck_mtx_pri artificially high for a very long time even though
959 * it is not participating or blocking anyone else.
960 * Intel locks don't have this problem because they can go uncontended
961 * as soon as there are no blocked threads involved.
962 */
963
964/*
965 * Routine: lck_mtx_lock_wait
966 *
967 * Invoked in order to wait on contention.
968 *
969 * Called with the interlock locked and
970 * returns it unlocked.
971 *
972 * Always aggressively sets the owning thread to promoted,
973 * even if it's the same or higher priority
974 * This prevents it from lowering its own priority while holding a lock
975 *
976 * TODO: Come up with a more efficient way to handle same-priority promotions
977 * <rdar://problem/30737670> ARM mutex contention logic could avoid taking the thread lock
978 */
979void
980lck_mtx_lock_wait (
981 lck_mtx_t *lck,
982 thread_t holder)
983{
984 thread_t self = current_thread();
985 lck_mtx_t *mutex;
986 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
987
988#if CONFIG_DTRACE
989 uint64_t sleep_start = 0;
990
991 if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
992 sleep_start = mach_absolute_time();
993 }
994#endif
995
996 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
997 mutex = lck;
998 else
999 mutex = &lck->lck_mtx_ptr->lck_mtx;
1000
1001 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START,
1002 trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1003
1004 spl_t s = splsched();
1005 thread_lock(holder);
1006
1007 assert_promotions_invariant(holder);
1008
1009 if ((holder->sched_flags & TH_SFLAG_DEPRESS) == 0)
1010 assert(holder->sched_pri >= mutex->lck_mtx_pri);
1011
1012 integer_t priority = self->sched_pri;
1013 priority = MAX(priority, self->base_pri);
1014 priority = MAX(priority, BASEPRI_DEFAULT);
1015 priority = MIN(priority, MAXPRI_PROMOTE);
1016
1017 if (mutex->lck_mtx_pri == 0) {
1018 /* This is the first promotion for this mutex */
1019 if (holder->promotions++ == 0) {
1020 /* This is the first promotion for holder */
1021 sched_thread_promote_to_pri(holder, priority, trace_lck);
1022 } else {
1023 /* Holder was previously promoted due to a different mutex, raise to match this one */
1024 sched_thread_update_promotion_to_pri(holder, priority, trace_lck);
1025 }
1026 } else {
1027 /* Holder was previously promoted due to this mutex, check if the pri needs to go up */
1028 sched_thread_update_promotion_to_pri(holder, priority, trace_lck);
1029 }
1030
1031 assert(holder->promotions > 0);
1032 assert(holder->promotion_priority >= priority);
1033
1034 if ((holder->sched_flags & TH_SFLAG_DEPRESS) == 0)
1035 assert(holder->sched_pri >= mutex->lck_mtx_pri);
1036
1037 assert_promotions_invariant(holder);
1038
1039 thread_unlock(holder);
1040 splx(s);
1041
1042 if (mutex->lck_mtx_pri < priority)
1043 mutex->lck_mtx_pri = priority;
1044
1045 if (self->waiting_for_mutex == NULL) {
1046 self->waiting_for_mutex = mutex;
1047 mutex->lck_mtx_waiters++;
1048 }
1049
1050 assert(self->waiting_for_mutex == mutex);
1051
1052 thread_set_pending_block_hint(self, kThreadWaitKernelMutex);
1053 assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT | THREAD_WAIT_NOREPORT_USER);
1054 lck_mtx_ilk_unlock(mutex);
1055
1056 thread_block(THREAD_CONTINUE_NULL);
1057
1058 assert(mutex->lck_mtx_waiters > 0);
1059
1060 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1061#if CONFIG_DTRACE
1062 /*
1063 * Record the DTrace lockstat probe for blocking, block time
1064 * measured from when we were entered.
1065 */
1066 if (sleep_start) {
1067 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1068 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
1069 mach_absolute_time() - sleep_start);
1070 } else {
1071 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
1072 mach_absolute_time() - sleep_start);
1073 }
1074 }
1075#endif
1076}
1077
1078/*
1079 * Routine: lck_mtx_lock_acquire
1080 *
1081 * Invoked on acquiring the mutex when there is
1082 * contention.
1083 *
1084 * Returns the current number of waiters.
1085 *
1086 * Called with the interlock locked.
1087 */
1088int
1089lck_mtx_lock_acquire(
1090 lck_mtx_t *lck)
1091{
1092 thread_t thread = current_thread();
1093 lck_mtx_t *mutex;
1094 integer_t priority;
1095
1096 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
1097 mutex = lck;
1098 else
1099 mutex = &lck->lck_mtx_ptr->lck_mtx;
1100
1101 /*
1102 * If waiting_for_mutex is set, then this thread was previously blocked waiting on this lock
1103 * If it's un-set, then this thread stole the lock from another waiter.
1104 */
1105 if (thread->waiting_for_mutex == mutex) {
1106 assert(mutex->lck_mtx_waiters > 0);
1107
1108 thread->waiting_for_mutex = NULL;
1109 mutex->lck_mtx_waiters--;
1110 }
1111
1112 assert(thread->waiting_for_mutex == NULL);
1113
1114 if (mutex->lck_mtx_waiters > 0) {
1115 priority = mutex->lck_mtx_pri;
1116 } else {
1117 /* I was the last waiter, so the mutex is no longer promoted or contended */
1118 mutex->lck_mtx_pri = 0;
1119 priority = 0;
1120 }
1121
1122 if (priority || thread->was_promoted_on_wakeup) {
1123 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1124
1125 /*
1126 * Note: was_promoted_on_wakeup can happen for multiple wakeups in a row without
1127 * an intervening acquire if a thread keeps failing to acquire the lock
1128 *
1129 * If priority is true but not promoted on wakeup,
1130 * then this is a lock steal of a promoted mutex, so it needs a ++ of promotions.
1131 *
1132 * If promoted on wakeup is true, but priority is not,
1133 * then this is the last owner, and the last owner does not need a promotion.
1134 */
1135
1136 spl_t s = splsched();
1137 thread_lock(thread);
1138
1139 assert_promotions_invariant(thread);
1140
1141 if (thread->was_promoted_on_wakeup)
1142 assert(thread->promotions > 0);
1143
1144 if (priority) {
1145 if (thread->promotions++ == 0) {
1146 /* This is the first promotion for holder */
1147 sched_thread_promote_to_pri(thread, priority, trace_lck);
1148 } else {
1149 /*
1150 * Holder was previously promoted due to a different mutex, raise to match this one
1151 * Or, this thread was promoted on wakeup but someone else later contended on mutex
1152 * at higher priority before we got here
1153 */
1154 sched_thread_update_promotion_to_pri(thread, priority, trace_lck);
1155 }
1156 }
1157
1158 if (thread->was_promoted_on_wakeup) {
1159 thread->was_promoted_on_wakeup = 0;
1160 if (--thread->promotions == 0)
1161 sched_thread_unpromote(thread, trace_lck);
1162 }
1163
1164 assert_promotions_invariant(thread);
1165
1166 if (priority && (thread->sched_flags & TH_SFLAG_DEPRESS) == 0)
1167 assert(thread->sched_pri >= priority);
1168
1169 thread_unlock(thread);
1170 splx(s);
1171 }
1172
1173#if CONFIG_DTRACE
1174 if (lockstat_probemap[LS_LCK_MTX_LOCK_ACQUIRE] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_ACQUIRE]) {
1175 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
1176 LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lck, 0);
1177 } else {
1178 LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, lck, 0);
1179 }
1180 }
1181#endif
1182 return (mutex->lck_mtx_waiters);
1183}
1184
1185/*
1186 * Routine: lck_mtx_unlock_wakeup
1187 *
1188 * Invoked on unlock when there is contention.
1189 *
1190 * Called with the interlock locked.
1191 *
1192 * TODO: the 'waiters' flag does not indicate waiters exist on the waitqueue,
1193 * it indicates waiters exist between wait and acquire.
1194 * This means that here we may do extra unneeded wakeups.
1195 */
1196void
1197lck_mtx_unlock_wakeup (
1198 lck_mtx_t *lck,
1199 thread_t holder)
1200{
1201 thread_t thread = current_thread();
1202 lck_mtx_t *mutex;
1203 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(lck);
1204
1205 if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
1206 mutex = lck;
1207 else
1208 mutex = &lck->lck_mtx_ptr->lck_mtx;
1209
1210 if (thread != holder)
1211 panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);
1212
1213 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START,
1214 trace_lck, (uintptr_t)thread_tid(thread), 0, 0, 0);
1215
1216 assert(mutex->lck_mtx_waiters > 0);
1217 assert(thread->was_promoted_on_wakeup == 0);
1218 assert(thread->waiting_for_mutex == NULL);
1219
1220 /*
1221 * The waiters count does not precisely match the number of threads on the waitqueue,
1222 * therefore we cannot assert that we actually wake up a thread here
1223 */
1224 if (mutex->lck_mtx_waiters > 1)
1225 thread_wakeup_one_with_pri(LCK_MTX_EVENT(lck), lck->lck_mtx_pri);
1226 else
1227 thread_wakeup_one(LCK_MTX_EVENT(lck));
1228
1229 /* When mutex->lck_mtx_pri is set, it means means I as the owner have a promotion. */
1230 if (mutex->lck_mtx_pri) {
1231 spl_t s = splsched();
1232 thread_lock(thread);
1233
1234 assert(thread->promotions > 0);
1235
1236 assert_promotions_invariant(thread);
1237
1238 if (--thread->promotions == 0)
1239 sched_thread_unpromote(thread, trace_lck);
1240
1241 assert_promotions_invariant(thread);
1242
1243 thread_unlock(thread);
1244 splx(s);
1245 }
1246
1247 KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1248}
1249
1250/*
1251 * Callout from the waitqueue code from inside thread_wakeup_one_with_pri
1252 * At splsched, thread is pulled from waitq, still locked, not on runqueue yet
1253 *
1254 * We always make sure to set the promotion flag, even if the thread is already at this priority,
1255 * so that it doesn't go down.
1256 */
1257void
1258lck_mtx_wakeup_adjust_pri(thread_t thread, integer_t priority)
1259{
1260 assert(priority <= MAXPRI_PROMOTE);
1261 assert(thread->waiting_for_mutex != NULL);
1262
1263 __kdebug_only uintptr_t trace_lck = unslide_for_kdebug(thread->waiting_for_mutex);
1264
1265 assert_promotions_invariant(thread);
1266
1267 if (thread->was_promoted_on_wakeup) {
1268 /* Thread was previously promoted, but contended again */
1269 sched_thread_update_promotion_to_pri(thread, priority, trace_lck);
1270 return;
1271 }
1272
1273 if (thread->promotions > 0 && priority <= thread->promotion_priority) {
1274 /*
1275 * Thread is already promoted to the right level, no need to do more
1276 * I can draft off of another promotion here, which is OK
1277 * because I know the thread will soon run acquire to get its own promotion
1278 */
1279 assert((thread->sched_flags & TH_SFLAG_PROMOTED) == TH_SFLAG_PROMOTED);
1280 return;
1281 }
1282
1283 thread->was_promoted_on_wakeup = 1;
1284
1285 if (thread->promotions++ == 0) {
1286 /* This is the first promotion for this thread */
1287 sched_thread_promote_to_pri(thread, priority, trace_lck);
1288 } else {
1289 /* Holder was previously promoted due to a different mutex, raise to match this one */
1290 sched_thread_update_promotion_to_pri(thread, priority, trace_lck);
1291 }
1292
1293 assert_promotions_invariant(thread);
1294}
1295
1296
1297/*
1298 * Routine: mutex_pause
1299 *
1300 * Called by former callers of simple_lock_pause().
1301 */
1302#define MAX_COLLISION_COUNTS 32
1303#define MAX_COLLISION 8
1304
1305unsigned int max_collision_count[MAX_COLLISION_COUNTS];
1306
1307uint32_t collision_backoffs[MAX_COLLISION] = {
1308 10, 50, 100, 200, 400, 600, 800, 1000
1309};
1310
1311
1312void
1313mutex_pause(uint32_t collisions)
1314{
1315 wait_result_t wait_result;
1316 uint32_t back_off;
1317
1318 if (collisions >= MAX_COLLISION_COUNTS)
1319 collisions = MAX_COLLISION_COUNTS - 1;
1320 max_collision_count[collisions]++;
1321
1322 if (collisions >= MAX_COLLISION)
1323 collisions = MAX_COLLISION - 1;
1324 back_off = collision_backoffs[collisions];
1325
1326 wait_result = assert_wait_timeout((event_t)mutex_pause, THREAD_UNINT, back_off, NSEC_PER_USEC);
1327 assert(wait_result == THREAD_WAITING);
1328
1329 wait_result = thread_block(THREAD_CONTINUE_NULL);
1330 assert(wait_result == THREAD_TIMED_OUT);
1331}
1332
1333
1334unsigned int mutex_yield_wait = 0;
1335unsigned int mutex_yield_no_wait = 0;
1336
1337void
1338lck_mtx_yield(
1339 lck_mtx_t *lck)
1340{
1341 int waiters;
1342
1343#if DEBUG
1344 lck_mtx_assert(lck, LCK_MTX_ASSERT_OWNED);
1345#endif /* DEBUG */
1346
1347 if (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT)
1348 waiters = lck->lck_mtx_ptr->lck_mtx.lck_mtx_waiters;
1349 else
1350 waiters = lck->lck_mtx_waiters;
1351
1352 if ( !waiters) {
1353 mutex_yield_no_wait++;
1354 } else {
1355 mutex_yield_wait++;
1356 lck_mtx_unlock(lck);
1357 mutex_pause(0);
1358 lck_mtx_lock(lck);
1359 }
1360}
1361
1362
1363/*
1364 * Routine: lck_rw_sleep
1365 */
1366wait_result_t
1367lck_rw_sleep(
1368 lck_rw_t *lck,
1369 lck_sleep_action_t lck_sleep_action,
1370 event_t event,
1371 wait_interrupt_t interruptible)
1372{
1373 wait_result_t res;
1374 lck_rw_type_t lck_rw_type;
1375 thread_t thread = current_thread();
1376
1377 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
1378 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1379
1380 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1381 /*
1382 * Although we are dropping the RW lock, the intent in most cases
1383 * is that this thread remains as an observer, since it may hold
1384 * some secondary resource, but must yield to avoid deadlock. In
1385 * this situation, make sure that the thread is boosted to the
1386 * RW lock ceiling while blocked, so that it can re-acquire the
1387 * RW lock at that priority.
1388 */
1389 thread->rwlock_count++;
1390 }
1391
1392 res = assert_wait(event, interruptible);
1393 if (res == THREAD_WAITING) {
1394 lck_rw_type = lck_rw_done(lck);
1395 res = thread_block(THREAD_CONTINUE_NULL);
1396 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1397 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE)))
1398 lck_rw_lock(lck, lck_rw_type);
1399 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
1400 lck_rw_lock_exclusive(lck);
1401 else
1402 lck_rw_lock_shared(lck);
1403 }
1404 }
1405 else
1406 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
1407 (void)lck_rw_done(lck);
1408
1409 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1410 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1411 /* sched_flags checked without lock, but will be rechecked while clearing */
1412
1413 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1414 assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1415
1416 lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1417 }
1418 }
1419
1420 return res;
1421}
1422
1423
1424/*
1425 * Routine: lck_rw_sleep_deadline
1426 */
1427wait_result_t
1428lck_rw_sleep_deadline(
1429 lck_rw_t *lck,
1430 lck_sleep_action_t lck_sleep_action,
1431 event_t event,
1432 wait_interrupt_t interruptible,
1433 uint64_t deadline)
1434{
1435 wait_result_t res;
1436 lck_rw_type_t lck_rw_type;
1437 thread_t thread = current_thread();
1438
1439 if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
1440 panic("Invalid lock sleep action %x\n", lck_sleep_action);
1441
1442 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1443 thread->rwlock_count++;
1444 }
1445
1446 res = assert_wait_deadline(event, interruptible, deadline);
1447 if (res == THREAD_WAITING) {
1448 lck_rw_type = lck_rw_done(lck);
1449 res = thread_block(THREAD_CONTINUE_NULL);
1450 if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
1451 if (!(lck_sleep_action & (LCK_SLEEP_SHARED|LCK_SLEEP_EXCLUSIVE)))
1452 lck_rw_lock(lck, lck_rw_type);
1453 else if (lck_sleep_action & LCK_SLEEP_EXCLUSIVE)
1454 lck_rw_lock_exclusive(lck);
1455 else
1456 lck_rw_lock_shared(lck);
1457 }
1458 }
1459 else
1460 if (lck_sleep_action & LCK_SLEEP_UNLOCK)
1461 (void)lck_rw_done(lck);
1462
1463 if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
1464 if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
1465 /* sched_flags checked without lock, but will be rechecked while clearing */
1466
1467 /* Only if the caller wanted the lck_rw_t returned unlocked should we drop to 0 */
1468 assert(lck_sleep_action & LCK_SLEEP_UNLOCK);
1469
1470 lck_rw_clear_promotion(thread, unslide_for_kdebug(event));
1471 }
1472 }
1473
1474 return res;
1475}
1476
1477/*
1478 * Reader-writer lock promotion
1479 *
1480 * We support a limited form of reader-writer
1481 * lock promotion whose effects are:
1482 *
1483 * * Qualifying threads have decay disabled
1484 * * Scheduler priority is reset to a floor of
1485 * of their statically assigned priority
1486 * or MINPRI_RWLOCK
1487 *
1488 * The rationale is that lck_rw_ts do not have
1489 * a single owner, so we cannot apply a directed
1490 * priority boost from all waiting threads
1491 * to all holding threads without maintaining
1492 * lists of all shared owners and all waiting
1493 * threads for every lock.
1494 *
1495 * Instead (and to preserve the uncontended fast-
1496 * path), acquiring (or attempting to acquire)
1497 * a RW lock in shared or exclusive lock increments
1498 * a per-thread counter. Only if that thread stops
1499 * making forward progress (for instance blocking
1500 * on a mutex, or being preempted) do we consult
1501 * the counter and apply the priority floor.
1502 * When the thread becomes runnable again (or in
1503 * the case of preemption it never stopped being
1504 * runnable), it has the priority boost and should
1505 * be in a good position to run on the CPU and
1506 * release all RW locks (at which point the priority
1507 * boost is cleared).
1508 *
1509 * Care must be taken to ensure that priority
1510 * boosts are not retained indefinitely, since unlike
1511 * mutex priority boosts (where the boost is tied
1512 * to the mutex lifecycle), the boost is tied
1513 * to the thread and independent of any particular
1514 * lck_rw_t. Assertions are in place on return
1515 * to userspace so that the boost is not held
1516 * indefinitely.
1517 *
1518 * The routines that increment/decrement the
1519 * per-thread counter should err on the side of
1520 * incrementing any time a preemption is possible
1521 * and the lock would be visible to the rest of the
1522 * system as held (so it should be incremented before
1523 * interlocks are dropped/preemption is enabled, or
1524 * before a CAS is executed to acquire the lock).
1525 *
1526 */
1527
1528/*
1529 * lck_rw_clear_promotion: Undo priority promotions when the last RW
1530 * lock is released by a thread (if a promotion was active)
1531 */
1532void lck_rw_clear_promotion(thread_t thread, uintptr_t trace_obj)
1533{
1534 assert(thread->rwlock_count == 0);
1535
1536 /* Cancel any promotions if the thread had actually blocked while holding a RW lock */
1537 spl_t s = splsched();
1538 thread_lock(thread);
1539
1540 if (thread->sched_flags & TH_SFLAG_RW_PROMOTED)
1541 sched_thread_unpromote_reason(thread, TH_SFLAG_RW_PROMOTED, trace_obj);
1542
1543 thread_unlock(thread);
1544 splx(s);
1545}
1546
1547/*
1548 * Callout from context switch if the thread goes
1549 * off core with a positive rwlock_count
1550 *
1551 * Called at splsched with the thread locked
1552 */
1553void
1554lck_rw_set_promotion_locked(thread_t thread)
1555{
1556 if (LcksOpts & disLkRWPrio)
1557 return;
1558
1559 assert(thread->rwlock_count > 0);
1560
1561 if (!(thread->sched_flags & TH_SFLAG_RW_PROMOTED))
1562 sched_thread_promote_reason(thread, TH_SFLAG_RW_PROMOTED, 0);
1563}
1564
1565kern_return_t
1566host_lockgroup_info(
1567 host_t host,
1568 lockgroup_info_array_t *lockgroup_infop,
1569 mach_msg_type_number_t *lockgroup_infoCntp)
1570{
1571 lockgroup_info_t *lockgroup_info_base;
1572 lockgroup_info_t *lockgroup_info;
1573 vm_offset_t lockgroup_info_addr;
1574 vm_size_t lockgroup_info_size;
1575 vm_size_t lockgroup_info_vmsize;
1576 lck_grp_t *lck_grp;
1577 unsigned int i;
1578 vm_map_copy_t copy;
1579 kern_return_t kr;
1580
1581 if (host == HOST_NULL)
1582 return KERN_INVALID_HOST;
1583
1584 lck_mtx_lock(&lck_grp_lock);
1585
1586 lockgroup_info_size = lck_grp_cnt * sizeof(*lockgroup_info);
1587 lockgroup_info_vmsize = round_page(lockgroup_info_size);
1588 kr = kmem_alloc_pageable(ipc_kernel_map,
1589 &lockgroup_info_addr, lockgroup_info_vmsize, VM_KERN_MEMORY_IPC);
1590 if (kr != KERN_SUCCESS) {
1591 lck_mtx_unlock(&lck_grp_lock);
1592 return(kr);
1593 }
1594
1595 lockgroup_info_base = (lockgroup_info_t *) lockgroup_info_addr;
1596 lck_grp = (lck_grp_t *)queue_first(&lck_grp_queue);
1597 lockgroup_info = lockgroup_info_base;
1598
1599 for (i = 0; i < lck_grp_cnt; i++) {
1600
1601 lockgroup_info->lock_spin_cnt = lck_grp->lck_grp_spincnt;
1602 lockgroup_info->lock_spin_util_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_util_cnt;
1603 lockgroup_info->lock_spin_held_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cnt;
1604 lockgroup_info->lock_spin_miss_cnt = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_miss_cnt;
1605 lockgroup_info->lock_spin_held_max = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_max;
1606 lockgroup_info->lock_spin_held_cum = lck_grp->lck_grp_stat.lck_grp_spin_stat.lck_grp_spin_held_cum;
1607
1608 lockgroup_info->lock_mtx_cnt = lck_grp->lck_grp_mtxcnt;
1609 lockgroup_info->lock_mtx_util_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt;
1610 lockgroup_info->lock_mtx_held_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cnt;
1611 lockgroup_info->lock_mtx_miss_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt;
1612 lockgroup_info->lock_mtx_wait_cnt = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt;
1613 lockgroup_info->lock_mtx_held_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_max;
1614 lockgroup_info->lock_mtx_held_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_held_cum;
1615 lockgroup_info->lock_mtx_wait_max = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_max;
1616 lockgroup_info->lock_mtx_wait_cum = lck_grp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cum;
1617
1618 lockgroup_info->lock_rw_cnt = lck_grp->lck_grp_rwcnt;
1619 lockgroup_info->lock_rw_util_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt;
1620 lockgroup_info->lock_rw_held_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cnt;
1621 lockgroup_info->lock_rw_miss_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt;
1622 lockgroup_info->lock_rw_wait_cnt = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt;
1623 lockgroup_info->lock_rw_held_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_max;
1624 lockgroup_info->lock_rw_held_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_held_cum;
1625 lockgroup_info->lock_rw_wait_max = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_max;
1626 lockgroup_info->lock_rw_wait_cum = lck_grp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cum;
1627
1628 (void) strncpy(lockgroup_info->lockgroup_name,lck_grp->lck_grp_name, LOCKGROUP_MAX_NAME);
1629
1630 lck_grp = (lck_grp_t *)(queue_next((queue_entry_t)(lck_grp)));
1631 lockgroup_info++;
1632 }
1633
1634 *lockgroup_infoCntp = lck_grp_cnt;
1635 lck_mtx_unlock(&lck_grp_lock);
1636
1637 if (lockgroup_info_size != lockgroup_info_vmsize)
1638 bzero((char *)lockgroup_info, lockgroup_info_vmsize - lockgroup_info_size);
1639
1640 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)lockgroup_info_addr,
1641 (vm_map_size_t)lockgroup_info_size, TRUE, &copy);
1642 assert(kr == KERN_SUCCESS);
1643
1644 *lockgroup_infop = (lockgroup_info_t *) copy;
1645
1646 return(KERN_SUCCESS);
1647}
1648
1649/*
1650 * Atomic primitives, prototyped in kern/simple_lock.h
1651 * Noret versions are more efficient on some architectures
1652 */
1653
1654uint32_t
1655hw_atomic_add(volatile uint32_t *dest, uint32_t delt)
1656{
1657 ALIGN_TEST(dest,uint32_t);
1658 return __c11_atomic_fetch_add(ATOMIC_CAST(uint32_t,dest), delt, memory_order_relaxed) + delt;
1659}
1660
1661uint32_t
1662hw_atomic_sub(volatile uint32_t *dest, uint32_t delt)
1663{
1664 ALIGN_TEST(dest,uint32_t);
1665 return __c11_atomic_fetch_sub(ATOMIC_CAST(uint32_t,dest), delt, memory_order_relaxed) - delt;
1666}
1667
1668uint32_t
1669hw_atomic_or(volatile uint32_t *dest, uint32_t mask)
1670{
1671 ALIGN_TEST(dest,uint32_t);
1672 return __c11_atomic_fetch_or(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed) | mask;
1673}
1674
1675void
1676hw_atomic_or_noret(volatile uint32_t *dest, uint32_t mask)
1677{
1678 ALIGN_TEST(dest,uint32_t);
1679 __c11_atomic_fetch_or(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed);
1680}
1681
1682uint32_t
1683hw_atomic_and(volatile uint32_t *dest, uint32_t mask)
1684{
1685 ALIGN_TEST(dest,uint32_t);
1686 return __c11_atomic_fetch_and(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed) & mask;
1687}
1688
1689void
1690hw_atomic_and_noret(volatile uint32_t *dest, uint32_t mask)
1691{
1692 ALIGN_TEST(dest,uint32_t);
1693 __c11_atomic_fetch_and(ATOMIC_CAST(uint32_t,dest), mask, memory_order_relaxed);
1694}
1695
1696uint32_t
1697hw_compare_and_store(uint32_t oldval, uint32_t newval, volatile uint32_t *dest)
1698{
1699 ALIGN_TEST(dest,uint32_t);
1700 return __c11_atomic_compare_exchange_strong(ATOMIC_CAST(uint32_t,dest), &oldval, newval,
1701 memory_order_acq_rel_smp, memory_order_relaxed);
1702}
1703
1704