1/*
2 * Copyright (c) 2015-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <machine/atomic.h>
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/ioctl.h>
34#include <sys/file_internal.h>
35#include <sys/proc_internal.h>
36#include <sys/kernel.h>
37#include <sys/guarded.h>
38#include <sys/stat.h>
39#include <sys/malloc.h>
40#include <sys/sysproto.h>
41#include <sys/pthread_shims.h>
42
43#include <mach/mach_types.h>
44
45#include <kern/cpu_data.h>
46#include <kern/mach_param.h>
47#include <kern/kern_types.h>
48#include <kern/assert.h>
49#include <kern/zalloc.h>
50#include <kern/thread.h>
51#include <kern/clock.h>
52#include <kern/ledger.h>
53#include <kern/policy_internal.h>
54#include <kern/task.h>
55#include <kern/telemetry.h>
56#include <kern/waitq.h>
57#include <kern/sched_prim.h>
58#include <kern/turnstile.h>
59#include <kern/zalloc.h>
60#include <kern/debug.h>
61
62#include <pexpert/pexpert.h>
63
64#define XNU_TEST_BITMAP
65#include <kern/bits.h>
66
67#include <os/hash.h>
68#include <sys/ulock.h>
69
70/*
71 * How ulock promotion works:
72 *
73 * There’s a requested policy field on every thread called ‘promotions’, which
74 * expresses which ulock promotions are happening to this thread.
75 * The promotion priority saturates until the promotion count goes to 0.
76 *
77 * We also track effective promotion qos, which is the qos before clamping.
78 * This value is used for promoting a thread that another thread is waiting on,
79 * so that the lock owner reinflates to the right priority after unclamping.
80 *
81 * This also works for non-QoS threads, which can donate base priority to QoS
82 * and non-QoS threads alike.
83 *
84 * ulock wait applies a promotion to the owner communicated through
85 * UL_UNFAIR_LOCK as waiters block, and that promotion is saturated as long as
86 * there is still an owner. In ulock wake, if the waker is still the owner,
87 * then it clears its ownership and drops the boost. It does NOT transfer
88 * ownership/priority boost to the new thread. Instead, it selects the
89 * waiting thread with the highest base priority to be woken next, and
90 * relies on that thread to carry the torch for the other waiting threads.
91 */
92
93static LCK_GRP_DECLARE(ull_lck_grp, "ulocks");
94
95
96#if ULL_TICKET_LOCK
97typedef lck_ticket_t ull_lock_t;
98#define ull_lock_init(ull) lck_ticket_init(&ull->ull_lock, &ull_lck_grp)
99#define ull_lock_destroy(ull) lck_ticket_destroy(&ull->ull_lock, &ull_lck_grp)
100#define ull_lock(ull) lck_ticket_lock(&ull->ull_lock, &ull_lck_grp)
101#define ull_unlock(ull) lck_ticket_unlock(&ull->ull_lock)
102#define ull_assert_owned(ull) lck_ticket_assert_owned(&ull->ull_lock)
103#define ull_assert_notwned(ull) lck_ticket_assert_not_owned(&ull->ull_lock)
104#else
105typedef lck_spin_t ull_lock_t;
106#define ull_lock_init(ull) lck_spin_init(&ull->ull_lock, &ull_lck_grp, NULL)
107#define ull_lock_destroy(ull) lck_spin_destroy(&ull->ull_lock, &ull_lck_grp)
108#define ull_lock(ull) lck_spin_lock_grp(&ull->ull_lock, &ull_lck_grp)
109#define ull_unlock(ull) lck_spin_unlock(&ull->ull_lock)
110#define ull_assert_owned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_OWNED)
111#define ull_assert_notwned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_NOTOWNED)
112#endif /* ULL_TICKET_LOCK */
113
114#define ULOCK_TO_EVENT(ull) ((event_t)ull)
115#define EVENT_TO_ULOCK(event) ((ull_t *)event)
116
117typedef enum {
118 ULK_INVALID = 0,
119 ULK_UADDR,
120 ULK_XPROC,
121} ulk_type;
122
123typedef struct {
124 union {
125 struct __attribute__((packed)) {
126 user_addr_t ulk_addr;
127 /*
128 * We use the task address as a hashing key,
129 * so that ulock wakes across exec can't
130 * be confused.
131 */
132 task_t ulk_task __kernel_data_semantics;
133 };
134 struct __attribute__((packed)) {
135 uint64_t ulk_object;
136 uint64_t ulk_offset;
137 };
138 };
139 ulk_type ulk_key_type;
140} ulk_t;
141
142#define ULK_UADDR_LEN (sizeof(user_addr_t) + sizeof(task_t))
143#define ULK_XPROC_LEN (sizeof(uint64_t) + sizeof(uint64_t))
144
145inline static bool
146ull_key_match(ulk_t *a, ulk_t *b)
147{
148 if (a->ulk_key_type != b->ulk_key_type) {
149 return false;
150 }
151
152 if (a->ulk_key_type == ULK_UADDR) {
153 return (a->ulk_task == b->ulk_task) &&
154 (a->ulk_addr == b->ulk_addr);
155 }
156
157 assert(a->ulk_key_type == ULK_XPROC);
158 return (a->ulk_object == b->ulk_object) &&
159 (a->ulk_offset == b->ulk_offset);
160}
161
162typedef struct ull {
163 /*
164 * ull_owner is the most recent known value for the owner of this ulock
165 * i.e. it may be out of date WRT the real value in userspace.
166 */
167 thread_t ull_owner; /* holds +1 thread reference */
168 ulk_t ull_key;
169 ull_lock_t ull_lock;
170 uint ull_bucket_index;
171 int32_t ull_nwaiters;
172 int32_t ull_refcount;
173 uint8_t ull_opcode;
174 struct turnstile *ull_turnstile;
175 queue_chain_t ull_hash_link;
176} ull_t;
177
178#define ULL_MUST_EXIST 0x0001
179static void ull_put(ull_t *);
180
181static uint32_t ulock_adaptive_spin_usecs = 20;
182
183SYSCTL_INT(_kern, OID_AUTO, ulock_adaptive_spin_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
184 &ulock_adaptive_spin_usecs, 0, "ulock adaptive spin duration");
185
186#if DEVELOPMENT || DEBUG
187static int ull_simulate_copyin_fault = 0;
188
189static void
190ull_dump(ull_t *ull)
191{
192 kprintf("ull\t%p\n", ull);
193 switch (ull->ull_key.ulk_key_type) {
194 case ULK_UADDR:
195 kprintf("ull_key.ulk_key_type\tULK_UADDR\n");
196 kprintf("ull_key.ulk_task\t%p\n", ull->ull_key.ulk_task);
197 kprintf("ull_key.ulk_addr\t%p\n", (void *)(ull->ull_key.ulk_addr));
198 break;
199 case ULK_XPROC:
200 kprintf("ull_key.ulk_key_type\tULK_XPROC\n");
201 kprintf("ull_key.ulk_object\t%p\n", (void *)(ull->ull_key.ulk_object));
202 kprintf("ull_key.ulk_offset\t%p\n", (void *)(ull->ull_key.ulk_offset));
203 break;
204 default:
205 kprintf("ull_key.ulk_key_type\tUNKNOWN %d\n", ull->ull_key.ulk_key_type);
206 break;
207 }
208 kprintf("ull_nwaiters\t%d\n", ull->ull_nwaiters);
209 kprintf("ull_refcount\t%d\n", ull->ull_refcount);
210 kprintf("ull_opcode\t%d\n\n", ull->ull_opcode);
211 kprintf("ull_owner\t0x%llx\n\n", thread_tid(ull->ull_owner));
212 kprintf("ull_turnstile\t%p\n\n", ull->ull_turnstile);
213}
214#endif
215
216typedef struct ull_bucket {
217 queue_head_t ulb_head;
218#if ULL_TICKET_LOCK
219 lck_ticket_t ulb_lock;
220#else
221 lck_spin_t ulb_lock;
222#endif /* ULL_TICKET_LOCK */
223} ull_bucket_t;
224
225static SECURITY_READ_ONLY_LATE(int) ull_hash_buckets;
226static SECURITY_READ_ONLY_LATE(ull_bucket_t *) ull_bucket;
227static uint32_t ull_nzalloc = 0;
228static KALLOC_TYPE_DEFINE(ull_zone, ull_t, KT_DEFAULT);
229
230#if ULL_TICKET_LOCK
231#define ull_bucket_lock(i) lck_ticket_lock(&ull_bucket[i].ulb_lock, &ull_lck_grp)
232#define ull_bucket_unlock(i) lck_ticket_unlock(&ull_bucket[i].ulb_lock)
233#else
234#define ull_bucket_lock(i) lck_spin_lock_grp(&ull_bucket[i].ulb_lock, &ull_lck_grp)
235#define ull_bucket_unlock(i) lck_spin_unlock(&ull_bucket[i].ulb_lock)
236#endif /* ULL_TICKET_LOCK */
237static __inline__ uint32_t
238ull_hash_index(const void *key, size_t length)
239{
240 uint32_t hash = os_hash_jenkins(data: key, length);
241
242 hash &= (ull_hash_buckets - 1);
243
244 return hash;
245}
246
247#define ULL_INDEX(keyp) ull_hash_index(keyp, keyp->ulk_key_type == ULK_UADDR ? ULK_UADDR_LEN : ULK_XPROC_LEN)
248
249static void
250ulock_initialize(void)
251{
252 assert(thread_max > 16);
253 /* Size ull_hash_buckets based on thread_max.
254 * Round up to nearest power of 2, then divide by 4
255 */
256 ull_hash_buckets = (1 << (bit_ceiling(n: thread_max) - 2));
257
258 kprintf(fmt: "%s>thread_max=%d, ull_hash_buckets=%d\n", __FUNCTION__, thread_max, ull_hash_buckets);
259 assert(ull_hash_buckets >= thread_max / 4);
260
261 ull_bucket = zalloc_permanent(sizeof(ull_bucket_t) * ull_hash_buckets,
262 ZALIGN_PTR);
263 assert(ull_bucket != NULL);
264
265 for (int i = 0; i < ull_hash_buckets; i++) {
266 queue_init(&ull_bucket[i].ulb_head);
267#if ULL_TICKET_LOCK
268 lck_ticket_init(&ull_bucket[i].ulb_lock, &ull_lck_grp);
269#else
270 lck_spin_init(lck: &ull_bucket[i].ulb_lock, grp: &ull_lck_grp, NULL);
271#endif /* ULL_TICKET_LOCK */
272 }
273}
274STARTUP(EARLY_BOOT, STARTUP_RANK_FIRST, ulock_initialize);
275
276#if DEVELOPMENT || DEBUG
277/* Count the number of hash entries for a given task address.
278 * if task==0, dump the whole table.
279 */
280static int
281ull_hash_dump(task_t task)
282{
283 int count = 0;
284 if (task == TASK_NULL) {
285 kprintf("%s>total number of ull_t allocated %d\n", __FUNCTION__, ull_nzalloc);
286 kprintf("%s>BEGIN\n", __FUNCTION__);
287 }
288 for (int i = 0; i < ull_hash_buckets; i++) {
289 ull_bucket_lock(i);
290 if (!queue_empty(&ull_bucket[i].ulb_head)) {
291 ull_t *elem;
292 if (task == TASK_NULL) {
293 kprintf("%s>index %d:\n", __FUNCTION__, i);
294 }
295 qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
296 if ((task == TASK_NULL) || ((elem->ull_key.ulk_key_type == ULK_UADDR)
297 && (task == elem->ull_key.ulk_task))) {
298 ull_dump(elem);
299 count++;
300 }
301 }
302 }
303 ull_bucket_unlock(i);
304 }
305 if (task == TASK_NULL) {
306 kprintf("%s>END\n", __FUNCTION__);
307 ull_nzalloc = 0;
308 }
309 return count;
310}
311#endif
312
313static ull_t *
314ull_alloc(ulk_t *key)
315{
316 ull_t *ull = (ull_t *)zalloc_flags(ull_zone, Z_SET_NOTSHARED);
317 assert(ull != NULL);
318
319 ull->ull_refcount = 1;
320 ull->ull_key = *key;
321 ull->ull_bucket_index = ULL_INDEX(key);
322 ull->ull_nwaiters = 0;
323 ull->ull_opcode = 0;
324
325 ull->ull_owner = THREAD_NULL;
326 ull->ull_turnstile = TURNSTILE_NULL;
327
328 ull_lock_init(ull);
329
330 ull_nzalloc++;
331 return ull;
332}
333
334static void
335ull_free(ull_t *ull)
336{
337 assert(ull->ull_owner == THREAD_NULL);
338 assert(ull->ull_turnstile == TURNSTILE_NULL);
339
340 ull_assert_notwned(ull);
341
342 ull_lock_destroy(ull);
343
344 zfree(ull_zone, ull);
345}
346
347/* Finds an existing ulock structure (ull_t), or creates a new one.
348 * If MUST_EXIST flag is set, returns NULL instead of creating a new one.
349 * The ulock structure is returned with ull_lock locked
350 */
351static ull_t *
352ull_get(ulk_t *key, uint32_t flags, ull_t **unused_ull)
353{
354 ull_t *ull = NULL;
355 uint i = ULL_INDEX(key);
356 ull_t *new_ull = (flags & ULL_MUST_EXIST) ? NULL : ull_alloc(key);
357 ull_t *elem;
358
359 ull_bucket_lock(i);
360 qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) {
361 ull_lock(elem);
362 if (ull_key_match(a: &elem->ull_key, b: key)) {
363 ull = elem;
364 break;
365 } else {
366 ull_unlock(elem);
367 }
368 }
369 if (ull == NULL) {
370 if (flags & ULL_MUST_EXIST) {
371 /* Must already exist (called from wake) */
372 ull_bucket_unlock(i);
373 assert(new_ull == NULL);
374 assert(unused_ull == NULL);
375 return NULL;
376 }
377
378 if (new_ull == NULL) {
379 /* Alloc above failed */
380 ull_bucket_unlock(i);
381 return NULL;
382 }
383
384 ull = new_ull;
385 ull_lock(ull);
386 enqueue(&ull_bucket[i].ulb_head, &ull->ull_hash_link);
387 } else if (!(flags & ULL_MUST_EXIST)) {
388 assert(new_ull);
389 assert(unused_ull);
390 assert(*unused_ull == NULL);
391 *unused_ull = new_ull;
392 }
393
394 ull->ull_refcount++;
395
396 ull_bucket_unlock(i);
397
398 return ull; /* still locked */
399}
400
401/*
402 * Must be called with ull_lock held
403 */
404static void
405ull_put(ull_t *ull)
406{
407 ull_assert_owned(ull);
408 int refcount = --ull->ull_refcount;
409 assert(refcount == 0 ? (ull->ull_key.ulk_key_type == ULK_INVALID) : 1);
410 ull_unlock(ull);
411
412 if (refcount > 0) {
413 return;
414 }
415
416 ull_bucket_lock(ull->ull_bucket_index);
417 remqueue(elt: &ull->ull_hash_link);
418 ull_bucket_unlock(ull->ull_bucket_index);
419
420 ull_free(ull);
421}
422
423extern kern_return_t vm_map_page_info(vm_map_t map, vm_map_offset_t offset, vm_page_info_flavor_t flavor, vm_page_info_t info, mach_msg_type_number_t *count);
424extern vm_map_t current_map(void);
425extern boolean_t machine_thread_on_core(thread_t thread);
426
427static int
428uaddr_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
429{
430 kern_return_t ret;
431 vm_page_info_basic_data_t info;
432 mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
433 ret = vm_map_page_info(map: current_map(), offset: uaddr, VM_PAGE_INFO_BASIC, info: (vm_page_info_t)&info, count: &count);
434 if (ret != KERN_SUCCESS) {
435 return EINVAL;
436 }
437
438 if (objectp != NULL) {
439 *objectp = (uint64_t)info.object_id;
440 }
441 if (offsetp != NULL) {
442 *offsetp = (uint64_t)info.offset;
443 }
444
445 return 0;
446}
447
448static void ulock_wait_continue(void *, wait_result_t);
449static void ulock_wait_cleanup(ull_t *, thread_t, thread_t, int32_t *);
450
451inline static int
452wait_result_to_return_code(wait_result_t wr)
453{
454 int ret = 0;
455
456 switch (wr) {
457 case THREAD_AWAKENED:
458 break;
459 case THREAD_TIMED_OUT:
460 ret = ETIMEDOUT;
461 break;
462 case THREAD_INTERRUPTED:
463 case THREAD_RESTART:
464 default:
465 ret = EINTR;
466 break;
467 }
468
469 return ret;
470}
471
472static int
473ulock_resolve_owner(uint32_t value, thread_t *owner)
474{
475 mach_port_name_t owner_name = ulock_owner_value_to_port_name(uval: value);
476
477 *owner = port_name_to_thread(port_name: owner_name,
478 options: PORT_INTRANS_THREAD_IN_CURRENT_TASK |
479 PORT_INTRANS_THREAD_NOT_CURRENT_THREAD);
480 if (*owner == THREAD_NULL) {
481 /*
482 * Translation failed - even though the lock value is up to date,
483 * whatever was stored in the lock wasn't actually a thread port.
484 */
485 return owner_name == MACH_PORT_DEAD ? ESRCH : EOWNERDEAD;
486 }
487 return 0;
488}
489
490int
491sys_ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval)
492{
493 struct ulock_wait2_args args2;
494
495 args2.operation = args->operation;
496 args2.addr = args->addr;
497 args2.value = args->value;
498 args2.timeout = (uint64_t)(args->timeout) * NSEC_PER_USEC;
499 args2.value2 = 0;
500
501 return sys_ulock_wait2(p, &args2, retval);
502}
503
504int
505sys_ulock_wait2(struct proc *p, struct ulock_wait2_args *args, int32_t *retval)
506{
507 uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
508 uint flags = args->operation & UL_FLAGS_MASK;
509
510 if (flags & ULF_WAIT_CANCEL_POINT) {
511 __pthread_testcancel(presyscall: 1);
512 }
513
514 int ret = 0;
515 thread_t self = current_thread();
516 ulk_t key;
517
518 /* involved threads - each variable holds +1 ref if not null */
519 thread_t owner_thread = THREAD_NULL;
520 thread_t old_owner = THREAD_NULL;
521
522 ull_t *unused_ull = NULL;
523
524 if ((flags & ULF_WAIT_MASK) != flags) {
525 ret = EINVAL;
526 goto munge_retval;
527 }
528
529 bool set_owner = false;
530 bool xproc = false;
531 size_t lock_size = sizeof(uint32_t);
532 int copy_ret;
533
534 switch (opcode) {
535 case UL_UNFAIR_LOCK:
536 set_owner = true;
537 break;
538 case UL_COMPARE_AND_WAIT:
539 break;
540 case UL_COMPARE_AND_WAIT64:
541 lock_size = sizeof(uint64_t);
542 break;
543 case UL_COMPARE_AND_WAIT_SHARED:
544 xproc = true;
545 break;
546 case UL_COMPARE_AND_WAIT64_SHARED:
547 xproc = true;
548 lock_size = sizeof(uint64_t);
549 break;
550 default:
551 ret = EINVAL;
552 goto munge_retval;
553 }
554
555 uint64_t value = 0;
556
557 if ((args->addr == 0) || (args->addr & (lock_size - 1))) {
558 ret = EINVAL;
559 goto munge_retval;
560 }
561
562 if (xproc) {
563 uint64_t object = 0;
564 uint64_t offset = 0;
565
566 ret = uaddr_findobj(uaddr: args->addr, objectp: &object, offsetp: &offset);
567 if (ret) {
568 ret = EINVAL;
569 goto munge_retval;
570 }
571 key.ulk_key_type = ULK_XPROC;
572 key.ulk_object = object;
573 key.ulk_offset = offset;
574 } else {
575 key.ulk_key_type = ULK_UADDR;
576 key.ulk_task = proc_task(p);
577 key.ulk_addr = args->addr;
578 }
579
580 if ((flags & ULF_WAIT_ADAPTIVE_SPIN) && set_owner) {
581 /*
582 * Attempt the copyin outside of the lock once,
583 *
584 * If it doesn't match (which is common), return right away.
585 *
586 * If it matches, resolve the current owner, and if it is on core,
587 * spin a bit waiting for the value to change. If the owner isn't on
588 * core, or if the value stays stable, then go on with the regular
589 * blocking code.
590 */
591 uint64_t end = 0;
592 uint32_t u32;
593
594 ret = copyin_atomic32(user_addr: args->addr, u32: &u32);
595 if (ret || u32 != args->value) {
596 goto munge_retval;
597 }
598 for (;;) {
599 if (owner_thread == NULL && ulock_resolve_owner(value: u32, owner: &owner_thread) != 0) {
600 break;
601 }
602
603 /* owner_thread may have a +1 starting here */
604
605 if (!machine_thread_on_core(thread: owner_thread)) {
606 break;
607 }
608 if (end == 0) {
609 clock_interval_to_deadline(interval: ulock_adaptive_spin_usecs,
610 NSEC_PER_USEC, result: &end);
611 } else if (mach_absolute_time() > end) {
612 break;
613 }
614 if (copyin_atomic32_wait_if_equals(user_addr: args->addr, u32) != 0) {
615 goto munge_retval;
616 }
617 }
618 }
619
620 ull_t *ull = ull_get(key: &key, flags: 0, unused_ull: &unused_ull);
621 if (ull == NULL) {
622 ret = ENOMEM;
623 goto munge_retval;
624 }
625 /* ull is locked */
626
627 ull->ull_nwaiters++;
628
629 if (ull->ull_opcode == 0) {
630 ull->ull_opcode = opcode;
631 } else if (ull->ull_opcode != opcode) {
632 ret = EDOM;
633 goto out_locked;
634 }
635
636 /*
637 * We don't want this copyin to get wedged behind VM operations,
638 * but we have to read the userspace value under the ull lock for correctness.
639 *
640 * Until <rdar://problem/24999882> exists,
641 * holding the ull spinlock across copyin forces any
642 * vm_fault we encounter to fail.
643 */
644
645 /* copyin_atomicXX always checks alignment */
646
647 if (lock_size == 4) {
648 uint32_t u32;
649 copy_ret = copyin_atomic32(user_addr: args->addr, u32: &u32);
650 value = u32;
651 } else {
652 copy_ret = copyin_atomic64(user_addr: args->addr, u64: &value);
653 }
654
655#if DEVELOPMENT || DEBUG
656 /* Occasionally simulate copyin finding the user address paged out */
657 if (((ull_simulate_copyin_fault == proc_getpid(p)) || (ull_simulate_copyin_fault == 1)) && (copy_ret == 0)) {
658 static _Atomic int fault_inject = 0;
659 if (os_atomic_inc_orig(&fault_inject, relaxed) % 73 == 0) {
660 copy_ret = EFAULT;
661 }
662 }
663#endif
664 if (copy_ret != 0) {
665 /* copyin() will return an error if the access to the user addr would have faulted,
666 * so just return and let the user level code fault it in.
667 */
668 ret = copy_ret;
669 goto out_locked;
670 }
671
672 if (value != args->value) {
673 /* Lock value has changed from expected so bail out */
674 goto out_locked;
675 }
676
677 if (set_owner) {
678 if (owner_thread == THREAD_NULL) {
679 ret = ulock_resolve_owner(value: (uint32_t)args->value, owner: &owner_thread);
680 if (ret == EOWNERDEAD) {
681 /*
682 * Translation failed - even though the lock value is up to date,
683 * whatever was stored in the lock wasn't actually a thread port.
684 */
685 goto out_locked;
686 }
687 /* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */
688 ret = 0;
689 }
690 /* owner_thread has a +1 reference */
691
692 /*
693 * At this point, I know:
694 * a) owner_thread is definitely the current owner, because I just read the value
695 * b) owner_thread is either:
696 * i) holding the user lock or
697 * ii) has just unlocked the user lock after I looked
698 * and is heading toward the kernel to call ull_wake.
699 * If so, it's going to have to wait for the ull mutex.
700 *
701 * Therefore, I can ask the turnstile to promote its priority, and I can rely
702 * on it to come by later to issue the wakeup and lose its promotion.
703 */
704
705 /* Return the +1 ref from the ull_owner field */
706 old_owner = ull->ull_owner;
707 ull->ull_owner = THREAD_NULL;
708
709 if (owner_thread != THREAD_NULL) {
710 /* The ull_owner field now owns a +1 ref on owner_thread */
711 thread_reference(thread: owner_thread);
712 ull->ull_owner = owner_thread;
713 }
714 }
715
716 wait_result_t wr;
717 uint64_t timeout = args->timeout; /* nanoseconds */
718 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
719 wait_interrupt_t interruptible = THREAD_ABORTSAFE;
720 struct turnstile *ts;
721
722 ts = turnstile_prepare(proprietor: (uintptr_t)ull, tstore: &ull->ull_turnstile,
723 TURNSTILE_NULL, type: TURNSTILE_ULOCK);
724 thread_set_pending_block_hint(thread: self, block_hint: kThreadWaitUserLock);
725
726 if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION) {
727 interruptible |= THREAD_WAIT_NOREPORT;
728 }
729
730 turnstile_update_inheritor(turnstile: ts, new_inheritor: owner_thread,
731 flags: (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD));
732
733 if (timeout) {
734 if (flags & ULF_DEADLINE) {
735 deadline = timeout;
736 } else {
737 nanoseconds_to_deadline(interval: timeout, result: &deadline);
738 }
739 }
740
741 wr = waitq_assert_wait64(waitq: &ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
742 interruptible, deadline);
743
744 if (wr == THREAD_WAITING) {
745 uthread_t uthread = (uthread_t)get_bsdthread_info(self);
746 uthread->uu_save.uus_ulock_wait_data.ull = ull;
747 uthread->uu_save.uus_ulock_wait_data.retval = retval;
748 uthread->uu_save.uus_ulock_wait_data.flags = flags;
749 uthread->uu_save.uus_ulock_wait_data.owner_thread = owner_thread;
750 uthread->uu_save.uus_ulock_wait_data.old_owner = old_owner;
751 }
752
753 ull_unlock(ull);
754
755 if (unused_ull) {
756 ull_free(ull: unused_ull);
757 unused_ull = NULL;
758 }
759
760 turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_NOT_HELD);
761
762 if (wr == THREAD_WAITING) {
763 if (set_owner && owner_thread != THREAD_NULL) {
764 thread_handoff_parameter(thread: owner_thread, continuation: ulock_wait_continue, parameter: ull, THREAD_HANDOFF_NONE);
765 } else {
766 assert(owner_thread == THREAD_NULL);
767 thread_block_parameter(continuation: ulock_wait_continue, parameter: ull);
768 }
769 /* NOT REACHED */
770 }
771
772 ret = wait_result_to_return_code(wr);
773
774 ull_lock(ull);
775 turnstile_complete(proprietor: (uintptr_t)ull, tstore: &ull->ull_turnstile, NULL, type: TURNSTILE_ULOCK);
776
777out_locked:
778 ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
779 owner_thread = NULL;
780
781 if (unused_ull) {
782 ull_free(ull: unused_ull);
783 unused_ull = NULL;
784 }
785
786 assert(*retval >= 0);
787
788munge_retval:
789 if (owner_thread) {
790 thread_deallocate(thread: owner_thread);
791 }
792 if (ret == ESTALE) {
793 ret = 0;
794 }
795 if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
796 *retval = -ret;
797 ret = 0;
798 }
799 return ret;
800}
801
802/*
803 * Must be called with ull_lock held
804 */
805static void
806ulock_wait_cleanup(ull_t *ull, thread_t owner_thread, thread_t old_owner, int32_t *retval)
807{
808 ull_assert_owned(ull);
809
810 thread_t old_lingering_owner = THREAD_NULL;
811
812 *retval = --ull->ull_nwaiters;
813 if (ull->ull_nwaiters == 0) {
814 /*
815 * If the wait was canceled early, we might need to
816 * clear out the lingering owner reference before
817 * freeing the ull.
818 */
819 old_lingering_owner = ull->ull_owner;
820 ull->ull_owner = THREAD_NULL;
821
822 memset(s: &ull->ull_key, c: 0, n: sizeof ull->ull_key);
823 ull->ull_refcount--;
824 assert(ull->ull_refcount > 0);
825 }
826 ull_put(ull);
827
828 /* Need to be called after dropping the interlock */
829 turnstile_cleanup();
830
831 if (owner_thread != THREAD_NULL) {
832 thread_deallocate(thread: owner_thread);
833 }
834
835 if (old_owner != THREAD_NULL) {
836 thread_deallocate(thread: old_owner);
837 }
838
839 if (old_lingering_owner != THREAD_NULL) {
840 thread_deallocate(thread: old_lingering_owner);
841 }
842
843 assert(*retval >= 0);
844}
845
846__attribute__((noreturn))
847static void
848ulock_wait_continue(__unused void * parameter, wait_result_t wr)
849{
850 uthread_t uthread = current_uthread();
851 int ret = 0;
852
853 ull_t *ull = uthread->uu_save.uus_ulock_wait_data.ull;
854 int32_t *retval = uthread->uu_save.uus_ulock_wait_data.retval;
855 uint flags = uthread->uu_save.uus_ulock_wait_data.flags;
856 thread_t owner_thread = uthread->uu_save.uus_ulock_wait_data.owner_thread;
857 thread_t old_owner = uthread->uu_save.uus_ulock_wait_data.old_owner;
858
859 ret = wait_result_to_return_code(wr);
860
861 ull_lock(ull);
862 turnstile_complete(proprietor: (uintptr_t)ull, tstore: &ull->ull_turnstile, NULL, type: TURNSTILE_ULOCK);
863
864 ulock_wait_cleanup(ull, owner_thread, old_owner, retval);
865
866 if ((flags & ULF_NO_ERRNO) && (ret != 0)) {
867 *retval = -ret;
868 ret = 0;
869 }
870
871 unix_syscall_return(ret);
872}
873
874int
875sys_ulock_wake(struct proc *p, struct ulock_wake_args *args, int32_t *retval)
876{
877 int ret = 0;
878#if DEVELOPMENT || DEBUG
879 uint8_t opcode = (uint8_t)(args->operation & UL_OPCODE_MASK);
880
881 if (opcode == UL_DEBUG_HASH_DUMP_PID) {
882 *retval = ull_hash_dump(proc_task(p));
883 return ret;
884 } else if (opcode == UL_DEBUG_HASH_DUMP_ALL) {
885 *retval = ull_hash_dump(TASK_NULL);
886 return ret;
887 } else if (opcode == UL_DEBUG_SIMULATE_COPYIN_FAULT) {
888 ull_simulate_copyin_fault = (int)(args->wake_value);
889 return ret;
890 }
891#endif
892 ret = ulock_wake(task: proc_task(p), operation: args->operation, addr: args->addr, wake_value: args->wake_value);
893
894 if ((args->operation & ULF_NO_ERRNO) && (ret != 0)) {
895 *retval = -ret;
896 ret = 0;
897 }
898
899 return ret;
900}
901
902int
903ulock_wake(task_t task, uint32_t operation, user_addr_t addr, uint64_t wake_value)
904{
905 uint8_t opcode = (uint8_t)(operation & UL_OPCODE_MASK);
906 uint flags = operation & UL_FLAGS_MASK;
907 int ret = 0;
908 ulk_t key;
909
910 /* involved threads - each variable holds +1 ref if not null */
911 thread_t wake_thread = THREAD_NULL;
912
913 bool set_owner = false;
914 bool allow_non_owner = false;
915 bool xproc = false;
916
917 switch (opcode) {
918 case UL_UNFAIR_LOCK:
919 set_owner = true;
920 break;
921 case UL_COMPARE_AND_WAIT:
922 case UL_COMPARE_AND_WAIT64:
923 break;
924 case UL_COMPARE_AND_WAIT_SHARED:
925 case UL_COMPARE_AND_WAIT64_SHARED:
926 xproc = true;
927 break;
928 default:
929 ret = EINVAL;
930 goto munge_retval;
931 }
932
933 if ((flags & ULF_WAKE_MASK) != flags) {
934 ret = EINVAL;
935 goto munge_retval;
936 }
937
938 if ((flags & ULF_WAKE_THREAD) && ((flags & ULF_WAKE_ALL) || set_owner)) {
939 ret = EINVAL;
940 goto munge_retval;
941 }
942
943 if (flags & ULF_WAKE_ALLOW_NON_OWNER) {
944 if (!set_owner) {
945 ret = EINVAL;
946 goto munge_retval;
947 }
948
949 allow_non_owner = true;
950 }
951
952 if (addr == 0) {
953 ret = EINVAL;
954 goto munge_retval;
955 }
956
957 if (xproc) {
958 uint64_t object = 0;
959 uint64_t offset = 0;
960
961 ret = uaddr_findobj(uaddr: addr, objectp: &object, offsetp: &offset);
962 if (ret) {
963 ret = EINVAL;
964 goto munge_retval;
965 }
966 key.ulk_key_type = ULK_XPROC;
967 key.ulk_object = object;
968 key.ulk_offset = offset;
969 } else {
970 key.ulk_key_type = ULK_UADDR;
971 key.ulk_task = task;
972 key.ulk_addr = addr;
973 }
974
975 if (flags & ULF_WAKE_THREAD) {
976 mach_port_name_t wake_thread_name = (mach_port_name_t)(wake_value);
977 wake_thread = port_name_to_thread(port_name: wake_thread_name,
978 options: PORT_INTRANS_THREAD_IN_CURRENT_TASK |
979 PORT_INTRANS_THREAD_NOT_CURRENT_THREAD);
980 if (wake_thread == THREAD_NULL) {
981 ret = ESRCH;
982 goto munge_retval;
983 }
984 }
985
986 ull_t *ull = ull_get(key: &key, ULL_MUST_EXIST, NULL);
987 thread_t new_owner = THREAD_NULL;
988 struct turnstile *ts = TURNSTILE_NULL;
989 thread_t cleanup_thread = THREAD_NULL;
990
991 if (ull == NULL) {
992 ret = ENOENT;
993 goto munge_retval;
994 }
995 /* ull is locked */
996
997 if (opcode != ull->ull_opcode) {
998 ret = EDOM;
999 goto out_ull_put;
1000 }
1001
1002 if (set_owner) {
1003 if ((ull->ull_owner != current_thread()) && !allow_non_owner) {
1004 /*
1005 * If the current thread isn't the known owner,
1006 * then this wake call was late to the party,
1007 * and the kernel already knows who owns the lock.
1008 *
1009 * This current owner already knows the lock is contended
1010 * and will redrive wakes, just bail out.
1011 */
1012 goto out_ull_put;
1013 }
1014 } else {
1015 assert(ull->ull_owner == THREAD_NULL);
1016 }
1017
1018 ts = turnstile_prepare(proprietor: (uintptr_t)ull, tstore: &ull->ull_turnstile,
1019 TURNSTILE_NULL, type: TURNSTILE_ULOCK);
1020 assert(ts != TURNSTILE_NULL);
1021
1022 if (flags & ULF_WAKE_THREAD) {
1023 kern_return_t kr = waitq_wakeup64_thread(waitq: &ts->ts_waitq,
1024 CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1025 thread: wake_thread, THREAD_AWAKENED);
1026 if (kr != KERN_SUCCESS) {
1027 assert(kr == KERN_NOT_WAITING);
1028 ret = EALREADY;
1029 }
1030 } else if (flags & ULF_WAKE_ALL) {
1031 waitq_wakeup64_all(waitq: &ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1032 THREAD_AWAKENED,
1033 flags: set_owner ? WAITQ_UPDATE_INHERITOR : WAITQ_WAKEUP_DEFAULT);
1034 } else if (set_owner) {
1035 /*
1036 * The turnstile waitq is priority ordered,
1037 * and will wake up the highest priority waiter
1038 * and set it as the inheritor for us.
1039 */
1040 new_owner = waitq_wakeup64_identify(waitq: &ts->ts_waitq,
1041 CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1042 THREAD_AWAKENED, flags: WAITQ_UPDATE_INHERITOR);
1043 } else {
1044 waitq_wakeup64_one(waitq: &ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)),
1045 THREAD_AWAKENED, flags: WAITQ_WAKEUP_DEFAULT);
1046 }
1047
1048 if (set_owner) {
1049 turnstile_update_inheritor_complete(turnstile: ts, flags: TURNSTILE_INTERLOCK_HELD);
1050 cleanup_thread = ull->ull_owner;
1051 ull->ull_owner = new_owner;
1052 }
1053
1054 turnstile_complete(proprietor: (uintptr_t)ull, tstore: &ull->ull_turnstile, NULL, type: TURNSTILE_ULOCK);
1055
1056out_ull_put:
1057 ull_put(ull);
1058
1059 if (ts != TURNSTILE_NULL) {
1060 /* Need to be called after dropping the interlock */
1061 turnstile_cleanup();
1062 }
1063
1064 if (cleanup_thread != THREAD_NULL) {
1065 thread_deallocate(thread: cleanup_thread);
1066 }
1067
1068munge_retval:
1069 if (wake_thread != THREAD_NULL) {
1070 thread_deallocate(thread: wake_thread);
1071 }
1072
1073 return ret;
1074}
1075
1076void
1077kdp_ulock_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo)
1078{
1079 ull_t *ull = EVENT_TO_ULOCK(event);
1080
1081 zone_require(zone: ull_zone->kt_zv.zv_zone, addr: ull);
1082
1083 switch (ull->ull_opcode) {
1084 case UL_UNFAIR_LOCK:
1085 case UL_UNFAIR_LOCK64_SHARED:
1086 waitinfo->owner = thread_tid(thread: ull->ull_owner);
1087 waitinfo->context = ull->ull_key.ulk_addr;
1088 break;
1089 case UL_COMPARE_AND_WAIT:
1090 case UL_COMPARE_AND_WAIT64:
1091 case UL_COMPARE_AND_WAIT_SHARED:
1092 case UL_COMPARE_AND_WAIT64_SHARED:
1093 waitinfo->owner = 0;
1094 waitinfo->context = ull->ull_key.ulk_addr;
1095 break;
1096 default:
1097 panic("%s: Invalid ulock opcode %d addr %p", __FUNCTION__, ull->ull_opcode, (void*)ull);
1098 break;
1099 }
1100 return;
1101}
1102