1 | /* |
2 | * Copyright (c) 2015 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <sys/param.h> |
30 | #include <sys/systm.h> |
31 | #include <sys/ioctl.h> |
32 | #include <sys/file_internal.h> |
33 | #include <sys/proc_internal.h> |
34 | #include <sys/kernel.h> |
35 | #include <sys/guarded.h> |
36 | #include <sys/stat.h> |
37 | #include <sys/malloc.h> |
38 | #include <sys/sysproto.h> |
39 | #include <sys/pthread_shims.h> |
40 | |
41 | #include <mach/mach_types.h> |
42 | |
43 | #include <kern/cpu_data.h> |
44 | #include <kern/mach_param.h> |
45 | #include <kern/kern_types.h> |
46 | #include <kern/assert.h> |
47 | #include <kern/kalloc.h> |
48 | #include <kern/thread.h> |
49 | #include <kern/clock.h> |
50 | #include <kern/ledger.h> |
51 | #include <kern/policy_internal.h> |
52 | #include <kern/task.h> |
53 | #include <kern/telemetry.h> |
54 | #include <kern/waitq.h> |
55 | #include <kern/sched_prim.h> |
56 | #include <kern/turnstile.h> |
57 | #include <kern/zalloc.h> |
58 | #include <kern/debug.h> |
59 | |
60 | #include <pexpert/pexpert.h> |
61 | |
62 | #define XNU_TEST_BITMAP |
63 | #include <kern/bits.h> |
64 | |
65 | #include <sys/ulock.h> |
66 | |
67 | /* |
68 | * How ulock promotion works: |
69 | * |
70 | * There’s a requested policy field on every thread called ‘promotions’, which |
71 | * expresses which ulock promotions are happening to this thread. |
72 | * The promotion priority saturates until the promotion count goes to 0. |
73 | * |
74 | * We also track effective promotion qos, which is the qos before clamping. |
75 | * This value is used for promoting a thread that another thread is waiting on, |
76 | * so that the lock owner reinflates to the right priority after unclamping. |
77 | * |
78 | * This also works for non-QoS threads, which can donate base priority to QoS |
79 | * and non-QoS threads alike. |
80 | * |
81 | * ulock wait applies a promotion to the owner communicated through |
82 | * UL_UNFAIR_LOCK as waiters block, and that promotion is saturated as long as |
83 | * there is still an owner. In ulock wake, if the waker is still the owner, |
84 | * then it clears its ownership and drops the boost. It does NOT transfer |
85 | * ownership/priority boost to the new thread. Instead, it selects the |
86 | * waiting thread with the highest base priority to be woken next, and |
87 | * relies on that thread to carry the torch for the other waiting threads. |
88 | */ |
89 | |
90 | static lck_grp_t *ull_lck_grp; |
91 | |
92 | typedef lck_spin_t ull_lock_t; |
93 | #define ull_lock_init(ull) lck_spin_init(&ull->ull_lock, ull_lck_grp, NULL) |
94 | #define ull_lock_destroy(ull) lck_spin_destroy(&ull->ull_lock, ull_lck_grp) |
95 | #define ull_lock(ull) lck_spin_lock(&ull->ull_lock) |
96 | #define ull_unlock(ull) lck_spin_unlock(&ull->ull_lock) |
97 | #define ull_assert_owned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_OWNED) |
98 | #define ull_assert_notwned(ull) LCK_SPIN_ASSERT(&ull->ull_lock, LCK_ASSERT_NOTOWNED) |
99 | |
100 | #define ULOCK_TO_EVENT(ull) ((event_t)ull) |
101 | #define EVENT_TO_ULOCK(event) ((ull_t *)event) |
102 | |
103 | typedef struct __attribute__((packed)) { |
104 | user_addr_t ulk_addr; |
105 | pid_t ulk_pid; |
106 | } ulk_t; |
107 | |
108 | inline static bool |
109 | ull_key_match(ulk_t *a, ulk_t *b) |
110 | { |
111 | return ((a->ulk_pid == b->ulk_pid) && |
112 | (a->ulk_addr == b->ulk_addr)); |
113 | } |
114 | |
115 | typedef struct ull { |
116 | /* |
117 | * ull_owner is the most recent known value for the owner of this ulock |
118 | * i.e. it may be out of date WRT the real value in userspace. |
119 | */ |
120 | thread_t ull_owner; /* holds +1 thread reference */ |
121 | ulk_t ull_key; |
122 | ulk_t ull_saved_key; |
123 | ull_lock_t ull_lock; |
124 | uint ull_bucket_index; |
125 | int32_t ull_nwaiters; |
126 | int32_t ull_max_nwaiters; |
127 | int32_t ull_refcount; |
128 | uint8_t ull_opcode; |
129 | struct turnstile *ull_turnstile; |
130 | queue_chain_t ull_hash_link; |
131 | } ull_t; |
132 | |
133 | extern void ulock_initialize(void); |
134 | |
135 | #define ULL_MUST_EXIST 0x0001 |
136 | static ull_t *ull_get(ulk_t *, uint32_t, ull_t **); |
137 | static void ull_put(ull_t *); |
138 | |
139 | #if DEVELOPMENT || DEBUG |
140 | static int ull_simulate_copyin_fault = 0; |
141 | |
142 | static void |
143 | ull_dump(ull_t *ull) |
144 | { |
145 | kprintf("ull\t%p\n" , ull); |
146 | kprintf("ull_key.ulk_pid\t%d\n" , ull->ull_key.ulk_pid); |
147 | kprintf("ull_key.ulk_addr\t%p\n" , (void *)(ull->ull_key.ulk_addr)); |
148 | kprintf("ull_saved_key.ulk_pid\t%d\n" , ull->ull_saved_key.ulk_pid); |
149 | kprintf("ull_saved_key.ulk_addr\t%p\n" , (void *)(ull->ull_saved_key.ulk_addr)); |
150 | kprintf("ull_nwaiters\t%d\n" , ull->ull_nwaiters); |
151 | kprintf("ull_max_nwaiters\t%d\n" , ull->ull_max_nwaiters); |
152 | kprintf("ull_refcount\t%d\n" , ull->ull_refcount); |
153 | kprintf("ull_opcode\t%d\n\n" , ull->ull_opcode); |
154 | kprintf("ull_owner\t0x%llx\n\n" , thread_tid(ull->ull_owner)); |
155 | kprintf("ull_turnstile\t%p\n\n" , ull->ull_turnstile); |
156 | } |
157 | #endif |
158 | |
159 | typedef struct ull_bucket { |
160 | queue_head_t ulb_head; |
161 | lck_spin_t ulb_lock; |
162 | } ull_bucket_t; |
163 | |
164 | static int ull_hash_buckets; |
165 | static ull_bucket_t *ull_bucket; |
166 | static uint32_t ull_nzalloc = 0; |
167 | static zone_t ull_zone; |
168 | |
169 | #define ull_bucket_lock(i) lck_spin_lock(&ull_bucket[i].ulb_lock) |
170 | #define ull_bucket_unlock(i) lck_spin_unlock(&ull_bucket[i].ulb_lock) |
171 | |
172 | static __inline__ uint32_t |
173 | ull_hash_index(char *key, size_t length) |
174 | { |
175 | uint32_t hash = jenkins_hash(key, length); |
176 | |
177 | hash &= (ull_hash_buckets - 1); |
178 | |
179 | return hash; |
180 | } |
181 | |
182 | /* Ensure that the key structure is packed, |
183 | * so that no undefined memory is passed to |
184 | * ull_hash_index() |
185 | */ |
186 | static_assert(sizeof(ulk_t) == sizeof(user_addr_t) + sizeof(pid_t)); |
187 | |
188 | #define ULL_INDEX(keyp) ull_hash_index((char *)keyp, sizeof *keyp) |
189 | |
190 | void |
191 | ulock_initialize(void) |
192 | { |
193 | ull_lck_grp = lck_grp_alloc_init("ulocks" , NULL); |
194 | |
195 | assert(thread_max > 16); |
196 | /* Size ull_hash_buckets based on thread_max. |
197 | * Round up to nearest power of 2, then divide by 4 |
198 | */ |
199 | ull_hash_buckets = (1 << (bit_ceiling(thread_max) - 2)); |
200 | |
201 | kprintf("%s>thread_max=%d, ull_hash_buckets=%d\n" , __FUNCTION__, thread_max, ull_hash_buckets); |
202 | assert(ull_hash_buckets >= thread_max/4); |
203 | |
204 | ull_bucket = (ull_bucket_t *)kalloc(sizeof(ull_bucket_t) * ull_hash_buckets); |
205 | assert(ull_bucket != NULL); |
206 | |
207 | for (int i = 0; i < ull_hash_buckets; i++) { |
208 | queue_init(&ull_bucket[i].ulb_head); |
209 | lck_spin_init(&ull_bucket[i].ulb_lock, ull_lck_grp, NULL); |
210 | } |
211 | |
212 | ull_zone = zinit(sizeof(ull_t), |
213 | thread_max * sizeof(ull_t), |
214 | 0, "ulocks" ); |
215 | |
216 | zone_change(ull_zone, Z_NOENCRYPT, TRUE); |
217 | } |
218 | |
219 | #if DEVELOPMENT || DEBUG |
220 | /* Count the number of hash entries for a given pid. |
221 | * if pid==0, dump the whole table. |
222 | */ |
223 | static int |
224 | ull_hash_dump(pid_t pid) |
225 | { |
226 | int count = 0; |
227 | if (pid == 0) { |
228 | kprintf("%s>total number of ull_t allocated %d\n" , __FUNCTION__, ull_nzalloc); |
229 | kprintf("%s>BEGIN\n" , __FUNCTION__); |
230 | } |
231 | for (int i = 0; i < ull_hash_buckets; i++) { |
232 | ull_bucket_lock(i); |
233 | if (!queue_empty(&ull_bucket[i].ulb_head)) { |
234 | ull_t *elem; |
235 | if (pid == 0) { |
236 | kprintf("%s>index %d:\n" , __FUNCTION__, i); |
237 | } |
238 | qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) { |
239 | if ((pid == 0) || (pid == elem->ull_key.ulk_pid)) { |
240 | ull_dump(elem); |
241 | count++; |
242 | } |
243 | } |
244 | } |
245 | ull_bucket_unlock(i); |
246 | } |
247 | if (pid == 0) { |
248 | kprintf("%s>END\n" , __FUNCTION__); |
249 | ull_nzalloc = 0; |
250 | } |
251 | return count; |
252 | } |
253 | #endif |
254 | |
255 | static ull_t * |
256 | ull_alloc(ulk_t *key) |
257 | { |
258 | ull_t *ull = (ull_t *)zalloc(ull_zone); |
259 | assert(ull != NULL); |
260 | |
261 | ull->ull_refcount = 1; |
262 | ull->ull_key = *key; |
263 | ull->ull_saved_key = *key; |
264 | ull->ull_bucket_index = ULL_INDEX(key); |
265 | ull->ull_nwaiters = 0; |
266 | ull->ull_max_nwaiters = 0; |
267 | ull->ull_opcode = 0; |
268 | |
269 | ull->ull_owner = THREAD_NULL; |
270 | ull->ull_turnstile = TURNSTILE_NULL; |
271 | |
272 | ull_lock_init(ull); |
273 | |
274 | ull_nzalloc++; |
275 | return ull; |
276 | } |
277 | |
278 | static void |
279 | ull_free(ull_t *ull) |
280 | { |
281 | assert(ull->ull_owner == THREAD_NULL); |
282 | assert(ull->ull_turnstile == TURNSTILE_NULL); |
283 | |
284 | ull_assert_notwned(ull); |
285 | |
286 | ull_lock_destroy(ull); |
287 | |
288 | zfree(ull_zone, ull); |
289 | } |
290 | |
291 | /* Finds an existing ulock structure (ull_t), or creates a new one. |
292 | * If MUST_EXIST flag is set, returns NULL instead of creating a new one. |
293 | * The ulock structure is returned with ull_lock locked |
294 | */ |
295 | static ull_t * |
296 | ull_get(ulk_t *key, uint32_t flags, ull_t **unused_ull) |
297 | { |
298 | ull_t *ull = NULL; |
299 | uint i = ULL_INDEX(key); |
300 | ull_t *new_ull = (flags & ULL_MUST_EXIST) ? NULL : ull_alloc(key); |
301 | ull_t *elem; |
302 | |
303 | ull_bucket_lock(i); |
304 | qe_foreach_element(elem, &ull_bucket[i].ulb_head, ull_hash_link) { |
305 | ull_lock(elem); |
306 | if (ull_key_match(&elem->ull_key, key)) { |
307 | ull = elem; |
308 | break; |
309 | } else { |
310 | ull_unlock(elem); |
311 | } |
312 | } |
313 | if (ull == NULL) { |
314 | if (flags & ULL_MUST_EXIST) { |
315 | /* Must already exist (called from wake) */ |
316 | ull_bucket_unlock(i); |
317 | assert(new_ull == NULL); |
318 | assert(unused_ull == NULL); |
319 | return NULL; |
320 | } |
321 | |
322 | if (new_ull == NULL) { |
323 | /* Alloc above failed */ |
324 | ull_bucket_unlock(i); |
325 | return NULL; |
326 | } |
327 | |
328 | ull = new_ull; |
329 | ull_lock(ull); |
330 | enqueue(&ull_bucket[i].ulb_head, &ull->ull_hash_link); |
331 | } else if (!(flags & ULL_MUST_EXIST)) { |
332 | assert(new_ull); |
333 | assert(unused_ull); |
334 | assert(*unused_ull == NULL); |
335 | *unused_ull = new_ull; |
336 | } |
337 | |
338 | ull->ull_refcount++; |
339 | |
340 | ull_bucket_unlock(i); |
341 | |
342 | return ull; /* still locked */ |
343 | } |
344 | |
345 | /* |
346 | * Must be called with ull_lock held |
347 | */ |
348 | static void |
349 | ull_put(ull_t *ull) |
350 | { |
351 | ull_assert_owned(ull); |
352 | int refcount = --ull->ull_refcount; |
353 | assert(refcount == 0 ? (ull->ull_key.ulk_pid == 0 && ull->ull_key.ulk_addr == 0) : 1); |
354 | ull_unlock(ull); |
355 | |
356 | if (refcount > 0) { |
357 | return; |
358 | } |
359 | |
360 | ull_bucket_lock(ull->ull_bucket_index); |
361 | remqueue(&ull->ull_hash_link); |
362 | ull_bucket_unlock(ull->ull_bucket_index); |
363 | |
364 | ull_free(ull); |
365 | } |
366 | |
367 | static void ulock_wait_continue(void *, wait_result_t); |
368 | static void ulock_wait_cleanup(ull_t *, thread_t, thread_t, int32_t *); |
369 | |
370 | inline static int |
371 | wait_result_to_return_code(wait_result_t wr) |
372 | { |
373 | int ret = 0; |
374 | |
375 | switch (wr) { |
376 | case THREAD_AWAKENED: |
377 | break; |
378 | case THREAD_TIMED_OUT: |
379 | ret = ETIMEDOUT; |
380 | break; |
381 | case THREAD_INTERRUPTED: |
382 | case THREAD_RESTART: |
383 | default: |
384 | ret = EINTR; |
385 | break; |
386 | } |
387 | |
388 | return ret; |
389 | } |
390 | |
391 | int |
392 | ulock_wait(struct proc *p, struct ulock_wait_args *args, int32_t *retval) |
393 | { |
394 | uint opcode = args->operation & UL_OPCODE_MASK; |
395 | uint flags = args->operation & UL_FLAGS_MASK; |
396 | |
397 | if (flags & ULF_WAIT_CANCEL_POINT) { |
398 | __pthread_testcancel(1); |
399 | } |
400 | |
401 | int ret = 0; |
402 | thread_t self = current_thread(); |
403 | ulk_t key; |
404 | |
405 | /* involved threads - each variable holds +1 ref if not null */ |
406 | thread_t owner_thread = THREAD_NULL; |
407 | thread_t old_owner = THREAD_NULL; |
408 | |
409 | ull_t *unused_ull = NULL; |
410 | |
411 | if ((flags & ULF_WAIT_MASK) != flags) { |
412 | ret = EINVAL; |
413 | goto munge_retval; |
414 | } |
415 | |
416 | boolean_t set_owner = FALSE; |
417 | |
418 | switch (opcode) { |
419 | case UL_UNFAIR_LOCK: |
420 | set_owner = TRUE; |
421 | break; |
422 | case UL_COMPARE_AND_WAIT: |
423 | break; |
424 | default: |
425 | ret = EINVAL; |
426 | goto munge_retval; |
427 | } |
428 | |
429 | /* 32-bit lock type for UL_COMPARE_AND_WAIT and UL_UNFAIR_LOCK */ |
430 | uint32_t value = 0; |
431 | |
432 | if ((args->addr == 0) || (args->addr % _Alignof(_Atomic(typeof(value))))) { |
433 | ret = EINVAL; |
434 | goto munge_retval; |
435 | } |
436 | |
437 | key.ulk_pid = p->p_pid; |
438 | key.ulk_addr = args->addr; |
439 | |
440 | ull_t *ull = ull_get(&key, 0, &unused_ull); |
441 | if (ull == NULL) { |
442 | ret = ENOMEM; |
443 | goto munge_retval; |
444 | } |
445 | /* ull is locked */ |
446 | |
447 | ull->ull_nwaiters++; |
448 | |
449 | if (ull->ull_nwaiters > ull->ull_max_nwaiters) { |
450 | ull->ull_max_nwaiters = ull->ull_nwaiters; |
451 | } |
452 | |
453 | if (ull->ull_opcode == 0) { |
454 | ull->ull_opcode = opcode; |
455 | } else if (ull->ull_opcode != opcode) { |
456 | ret = EDOM; |
457 | goto out_locked; |
458 | } |
459 | |
460 | /* |
461 | * We don't want this copyin to get wedged behind VM operations, |
462 | * but we have to read the userspace value under the ull lock for correctness. |
463 | * |
464 | * Until <rdar://problem/24999882> exists, |
465 | * holding the ull spinlock across copyin forces any |
466 | * vm_fault we encounter to fail. |
467 | */ |
468 | uint64_t val64; /* copyin_word always zero-extends to 64-bits */ |
469 | |
470 | int copy_ret = copyin_word(args->addr, &val64, sizeof(value)); |
471 | |
472 | value = (uint32_t)val64; |
473 | |
474 | #if DEVELOPMENT || DEBUG |
475 | /* Occasionally simulate copyin finding the user address paged out */ |
476 | if (((ull_simulate_copyin_fault == p->p_pid) || (ull_simulate_copyin_fault == 1)) && (copy_ret == 0)) { |
477 | static _Atomic int fault_inject = 0; |
478 | if (__c11_atomic_fetch_add(&fault_inject, 1, __ATOMIC_RELAXED) % 73 == 0) { |
479 | copy_ret = EFAULT; |
480 | } |
481 | } |
482 | #endif |
483 | if (copy_ret != 0) { |
484 | /* copyin() will return an error if the access to the user addr would have faulted, |
485 | * so just return and let the user level code fault it in. |
486 | */ |
487 | ret = copy_ret; |
488 | goto out_locked; |
489 | } |
490 | |
491 | if (value != args->value) { |
492 | /* Lock value has changed from expected so bail out */ |
493 | goto out_locked; |
494 | } |
495 | |
496 | if (set_owner) { |
497 | mach_port_name_t owner_name = ulock_owner_value_to_port_name(args->value); |
498 | owner_thread = port_name_to_thread_for_ulock(owner_name); |
499 | |
500 | /* HACK: don't bail on MACH_PORT_DEAD, to avoid blowing up the no-tsd pthread lock */ |
501 | if (owner_name != MACH_PORT_DEAD && owner_thread == THREAD_NULL) { |
502 | /* |
503 | * Translation failed - even though the lock value is up to date, |
504 | * whatever was stored in the lock wasn't actually a thread port. |
505 | */ |
506 | ret = EOWNERDEAD; |
507 | goto out_locked; |
508 | } |
509 | /* owner_thread has a +1 reference */ |
510 | |
511 | /* |
512 | * At this point, I know: |
513 | * a) owner_thread is definitely the current owner, because I just read the value |
514 | * b) owner_thread is either: |
515 | * i) holding the user lock or |
516 | * ii) has just unlocked the user lock after I looked |
517 | * and is heading toward the kernel to call ull_wake. |
518 | * If so, it's going to have to wait for the ull mutex. |
519 | * |
520 | * Therefore, I can ask the turnstile to promote its priority, and I can rely |
521 | * on it to come by later to issue the wakeup and lose its promotion. |
522 | */ |
523 | |
524 | /* Return the +1 ref from the ull_owner field */ |
525 | old_owner = ull->ull_owner; |
526 | ull->ull_owner = THREAD_NULL; |
527 | |
528 | if (owner_thread != THREAD_NULL) { |
529 | /* The ull_owner field now owns a +1 ref on owner_thread */ |
530 | thread_reference(owner_thread); |
531 | ull->ull_owner = owner_thread; |
532 | } |
533 | } |
534 | |
535 | wait_result_t wr; |
536 | uint32_t timeout = args->timeout; |
537 | uint64_t deadline = TIMEOUT_WAIT_FOREVER; |
538 | wait_interrupt_t interruptible = THREAD_ABORTSAFE; |
539 | struct turnstile *ts; |
540 | |
541 | ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile, |
542 | TURNSTILE_NULL, TURNSTILE_ULOCK); |
543 | thread_set_pending_block_hint(self, kThreadWaitUserLock); |
544 | |
545 | if (flags & ULF_WAIT_WORKQ_DATA_CONTENTION) { |
546 | interruptible |= THREAD_WAIT_NOREPORT; |
547 | } |
548 | |
549 | if (timeout) { |
550 | clock_interval_to_deadline(timeout, NSEC_PER_USEC, &deadline); |
551 | } |
552 | |
553 | turnstile_update_inheritor(ts, owner_thread, |
554 | (TURNSTILE_DELAYED_UPDATE | TURNSTILE_INHERITOR_THREAD)); |
555 | |
556 | wr = waitq_assert_wait64(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), |
557 | interruptible, deadline); |
558 | |
559 | ull_unlock(ull); |
560 | |
561 | if (unused_ull) { |
562 | ull_free(unused_ull); |
563 | unused_ull = NULL; |
564 | } |
565 | |
566 | turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_NOT_HELD); |
567 | |
568 | if (wr == THREAD_WAITING) { |
569 | uthread_t uthread = (uthread_t)get_bsdthread_info(self); |
570 | uthread->uu_save.uus_ulock_wait_data.retval = retval; |
571 | uthread->uu_save.uus_ulock_wait_data.flags = flags; |
572 | uthread->uu_save.uus_ulock_wait_data.owner_thread = owner_thread; |
573 | uthread->uu_save.uus_ulock_wait_data.old_owner = old_owner; |
574 | if (set_owner && owner_thread != THREAD_NULL) { |
575 | thread_handoff_parameter(owner_thread, ulock_wait_continue, ull); |
576 | } else { |
577 | assert(owner_thread == THREAD_NULL); |
578 | thread_block_parameter(ulock_wait_continue, ull); |
579 | } |
580 | /* NOT REACHED */ |
581 | } |
582 | |
583 | ret = wait_result_to_return_code(wr); |
584 | |
585 | ull_lock(ull); |
586 | turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL); |
587 | |
588 | out_locked: |
589 | ulock_wait_cleanup(ull, owner_thread, old_owner, retval); |
590 | |
591 | if (unused_ull) { |
592 | ull_free(unused_ull); |
593 | unused_ull = NULL; |
594 | } |
595 | |
596 | assert(*retval >= 0); |
597 | |
598 | munge_retval: |
599 | if ((flags & ULF_NO_ERRNO) && (ret != 0)) { |
600 | *retval = -ret; |
601 | ret = 0; |
602 | } |
603 | return ret; |
604 | } |
605 | |
606 | /* |
607 | * Must be called with ull_lock held |
608 | */ |
609 | static void |
610 | ulock_wait_cleanup(ull_t *ull, thread_t owner_thread, thread_t old_owner, int32_t *retval) |
611 | { |
612 | ull_assert_owned(ull); |
613 | |
614 | thread_t old_lingering_owner = THREAD_NULL; |
615 | |
616 | *retval = --ull->ull_nwaiters; |
617 | if (ull->ull_nwaiters == 0) { |
618 | /* |
619 | * If the wait was canceled early, we might need to |
620 | * clear out the lingering owner reference before |
621 | * freeing the ull. |
622 | */ |
623 | old_lingering_owner = ull->ull_owner; |
624 | ull->ull_owner = THREAD_NULL; |
625 | |
626 | ull->ull_key.ulk_pid = 0; |
627 | ull->ull_key.ulk_addr = 0; |
628 | ull->ull_refcount--; |
629 | assert(ull->ull_refcount > 0); |
630 | } |
631 | ull_put(ull); |
632 | |
633 | /* Need to be called after dropping the interlock */ |
634 | turnstile_cleanup(); |
635 | |
636 | if (owner_thread != THREAD_NULL) { |
637 | thread_deallocate(owner_thread); |
638 | } |
639 | |
640 | if (old_owner != THREAD_NULL) { |
641 | thread_deallocate(old_owner); |
642 | } |
643 | |
644 | if (old_lingering_owner != THREAD_NULL) { |
645 | thread_deallocate(old_lingering_owner); |
646 | } |
647 | |
648 | assert(*retval >= 0); |
649 | } |
650 | |
651 | __attribute__((noreturn)) |
652 | static void |
653 | ulock_wait_continue(void * parameter, wait_result_t wr) |
654 | { |
655 | thread_t self = current_thread(); |
656 | uthread_t uthread = (uthread_t)get_bsdthread_info(self); |
657 | int ret = 0; |
658 | |
659 | ull_t *ull = (ull_t *)parameter; |
660 | int32_t *retval = uthread->uu_save.uus_ulock_wait_data.retval; |
661 | uint flags = uthread->uu_save.uus_ulock_wait_data.flags; |
662 | thread_t owner_thread = uthread->uu_save.uus_ulock_wait_data.owner_thread; |
663 | thread_t old_owner = uthread->uu_save.uus_ulock_wait_data.old_owner; |
664 | |
665 | ret = wait_result_to_return_code(wr); |
666 | |
667 | ull_lock(ull); |
668 | turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL); |
669 | |
670 | ulock_wait_cleanup(ull, owner_thread, old_owner, retval); |
671 | |
672 | if ((flags & ULF_NO_ERRNO) && (ret != 0)) { |
673 | *retval = -ret; |
674 | ret = 0; |
675 | } |
676 | |
677 | unix_syscall_return(ret); |
678 | } |
679 | |
680 | int |
681 | ulock_wake(struct proc *p, struct ulock_wake_args *args, __unused int32_t *retval) |
682 | { |
683 | uint opcode = args->operation & UL_OPCODE_MASK; |
684 | uint flags = args->operation & UL_FLAGS_MASK; |
685 | int ret = 0; |
686 | ulk_t key; |
687 | |
688 | /* involved threads - each variable holds +1 ref if not null */ |
689 | thread_t wake_thread = THREAD_NULL; |
690 | thread_t old_owner = THREAD_NULL; |
691 | |
692 | if ((flags & ULF_WAKE_MASK) != flags) { |
693 | ret = EINVAL; |
694 | goto munge_retval; |
695 | } |
696 | |
697 | #if DEVELOPMENT || DEBUG |
698 | if (opcode == UL_DEBUG_HASH_DUMP_PID) { |
699 | *retval = ull_hash_dump(p->p_pid); |
700 | return ret; |
701 | } else if (opcode == UL_DEBUG_HASH_DUMP_ALL) { |
702 | *retval = ull_hash_dump(0); |
703 | return ret; |
704 | } else if (opcode == UL_DEBUG_SIMULATE_COPYIN_FAULT) { |
705 | ull_simulate_copyin_fault = (int)(args->wake_value); |
706 | return ret; |
707 | } |
708 | #endif |
709 | |
710 | if (args->addr == 0) { |
711 | ret = EINVAL; |
712 | goto munge_retval; |
713 | } |
714 | |
715 | if (flags & ULF_WAKE_THREAD) { |
716 | if (flags & ULF_WAKE_ALL) { |
717 | ret = EINVAL; |
718 | goto munge_retval; |
719 | } |
720 | mach_port_name_t wake_thread_name = (mach_port_name_t)(args->wake_value); |
721 | wake_thread = port_name_to_thread_for_ulock(wake_thread_name); |
722 | if (wake_thread == THREAD_NULL) { |
723 | ret = ESRCH; |
724 | goto munge_retval; |
725 | } |
726 | } |
727 | |
728 | key.ulk_pid = p->p_pid; |
729 | key.ulk_addr = args->addr; |
730 | |
731 | ull_t *ull = ull_get(&key, ULL_MUST_EXIST, NULL); |
732 | if (ull == NULL) { |
733 | if (wake_thread != THREAD_NULL) { |
734 | thread_deallocate(wake_thread); |
735 | } |
736 | ret = ENOENT; |
737 | goto munge_retval; |
738 | } |
739 | /* ull is locked */ |
740 | |
741 | boolean_t clear_owner = FALSE; /* need to reset owner */ |
742 | |
743 | switch (opcode) { |
744 | case UL_UNFAIR_LOCK: |
745 | clear_owner = TRUE; |
746 | break; |
747 | case UL_COMPARE_AND_WAIT: |
748 | break; |
749 | default: |
750 | ret = EINVAL; |
751 | goto out_locked; |
752 | } |
753 | |
754 | if (opcode != ull->ull_opcode) { |
755 | ret = EDOM; |
756 | goto out_locked; |
757 | } |
758 | |
759 | if (!clear_owner) { |
760 | assert(ull->ull_owner == THREAD_NULL); |
761 | } |
762 | |
763 | struct turnstile *ts; |
764 | ts = turnstile_prepare((uintptr_t)ull, &ull->ull_turnstile, |
765 | TURNSTILE_NULL, TURNSTILE_ULOCK); |
766 | |
767 | if (flags & ULF_WAKE_ALL) { |
768 | waitq_wakeup64_all(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), |
769 | THREAD_AWAKENED, 0); |
770 | } else if (flags & ULF_WAKE_THREAD) { |
771 | kern_return_t kr = waitq_wakeup64_thread(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), |
772 | wake_thread, THREAD_AWAKENED); |
773 | if (kr != KERN_SUCCESS) { |
774 | assert(kr == KERN_NOT_WAITING); |
775 | ret = EALREADY; |
776 | } |
777 | } else { |
778 | /* |
779 | * TODO: WAITQ_SELECT_MAX_PRI forces a linear scan of the (hashed) global waitq. |
780 | * Move to a ulock-private, priority sorted waitq (i.e. SYNC_POLICY_FIXED_PRIORITY) to avoid that. |
781 | * |
782 | * TODO: 'owner is not current_thread (or null)' likely means we can avoid this wakeup |
783 | * <rdar://problem/25487001> |
784 | */ |
785 | waitq_wakeup64_one(&ts->ts_waitq, CAST_EVENT64_T(ULOCK_TO_EVENT(ull)), |
786 | THREAD_AWAKENED, WAITQ_SELECT_MAX_PRI); |
787 | } |
788 | |
789 | /* |
790 | * Reaching this point means I previously moved the lock to 'unowned' state in userspace. |
791 | * Therefore I need to relinquish my promotion. |
792 | * |
793 | * However, someone else could have locked it after I unlocked, and then had a third thread |
794 | * block on the lock, causing a promotion of some other owner. |
795 | * |
796 | * I don't want to stomp over that, so only remove the promotion if I'm the current owner. |
797 | */ |
798 | |
799 | if (ull->ull_owner == current_thread()) { |
800 | turnstile_update_inheritor(ts, THREAD_NULL, |
801 | (TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD)); |
802 | turnstile_update_inheritor_complete(ts, TURNSTILE_INTERLOCK_HELD); |
803 | old_owner = ull->ull_owner; |
804 | ull->ull_owner = THREAD_NULL; |
805 | } |
806 | |
807 | turnstile_complete((uintptr_t)ull, &ull->ull_turnstile, NULL); |
808 | |
809 | out_locked: |
810 | ull_put(ull); |
811 | |
812 | /* Need to be called after dropping the interlock */ |
813 | turnstile_cleanup(); |
814 | |
815 | if (wake_thread != THREAD_NULL) { |
816 | thread_deallocate(wake_thread); |
817 | } |
818 | |
819 | if (old_owner != THREAD_NULL) { |
820 | thread_deallocate(old_owner); |
821 | } |
822 | |
823 | munge_retval: |
824 | if ((flags & ULF_NO_ERRNO) && (ret != 0)) { |
825 | *retval = -ret; |
826 | ret = 0; |
827 | } |
828 | return ret; |
829 | } |
830 | |
831 | void |
832 | kdp_ulock_find_owner(__unused struct waitq * waitq, event64_t event, thread_waitinfo_t * waitinfo) |
833 | { |
834 | ull_t *ull = EVENT_TO_ULOCK(event); |
835 | assert(kdp_is_in_zone(ull, "ulocks" )); |
836 | |
837 | if (ull->ull_opcode == UL_UNFAIR_LOCK) {// owner is only set if it's an os_unfair_lock |
838 | waitinfo->owner = thread_tid(ull->ull_owner); |
839 | waitinfo->context = ull->ull_key.ulk_addr; |
840 | } else if (ull->ull_opcode == UL_COMPARE_AND_WAIT) { // otherwise, this is a spinlock |
841 | waitinfo->owner = 0; |
842 | waitinfo->context = ull->ull_key.ulk_addr; |
843 | } else { |
844 | panic("%s: Invalid ulock opcode %d addr %p" , __FUNCTION__, ull->ull_opcode, (void*)ull); |
845 | } |
846 | return; |
847 | } |
848 | |