1 | /* |
2 | * Copyright (c) 2019 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <mach/mach_types.h> |
30 | #include <mach/task.h> |
31 | |
32 | #include <kern/ast.h> |
33 | #include <kern/kalloc.h> |
34 | #include <kern/kern_types.h> |
35 | #include <kern/mach_param.h> |
36 | #include <kern/machine.h> |
37 | #include <kern/misc_protos.h> |
38 | #include <kern/processor.h> |
39 | #include <kern/queue.h> |
40 | #include <kern/restartable.h> |
41 | #include <kern/task.h> |
42 | #include <kern/thread.h> |
43 | #include <kern/waitq.h> |
44 | |
45 | #include <os/atomic_private.h> |
46 | #include <os/hash.h> |
47 | #include <os/refcnt.h> |
48 | |
49 | /** |
50 | * @file osfmk/kern/restartable.c |
51 | * |
52 | * @brief |
53 | * This module implements restartable userspace functions. |
54 | * |
55 | * @discussion |
56 | * task_restartable_ranges_register() allows task to configure |
57 | * the restartable ranges, only once per task, |
58 | * before it has made its second thread. |
59 | * |
60 | * task_restartable_ranges_synchronize() can later be used to trigger |
61 | * restarts for threads with a PC in a restartable region. |
62 | * |
63 | * It is implemented with an AST (AST_RESET_PCS) that will cause threads |
64 | * as they return to userspace to reset PCs in a restartable region |
65 | * to the recovery offset of this region. |
66 | * |
67 | * Because signal delivery would mask the proper saved PC for threads, |
68 | * sigreturn also forcefully sets the AST and will go through the logic |
69 | * every single time. |
70 | */ |
71 | |
72 | typedef int (*cmpfunc_t)(const void *a, const void *b); |
73 | extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp); |
74 | |
75 | #define RR_RANGES_MAX 64 |
76 | struct restartable_ranges { |
77 | queue_chain_t rr_link; |
78 | os_refcnt_t rr_ref; |
79 | uint32_t rr_count; |
80 | uint32_t rr_hash; |
81 | task_restartable_range_t rr_ranges[RR_RANGES_MAX]; |
82 | }; |
83 | |
84 | #if DEBUG || DEVELOPMENT |
85 | #define RR_HASH_SIZE 256 |
86 | #else |
87 | // Release kernel userspace should have shared caches and a single registration |
88 | #define RR_HASH_SIZE 16 |
89 | #endif |
90 | |
91 | static queue_head_t rr_hash[RR_HASH_SIZE]; |
92 | LCK_GRP_DECLARE(rr_lock_grp, "restartable ranges" ); |
93 | LCK_SPIN_DECLARE(rr_spinlock, &rr_lock_grp); |
94 | |
95 | #define rr_lock() lck_spin_lock_grp(&rr_spinlock, &rr_lock_grp) |
96 | #define rr_unlock() lck_spin_unlock(&rr_spinlock); |
97 | |
98 | #pragma mark internals |
99 | |
100 | /** |
101 | * @function _ranges_cmp |
102 | * |
103 | * @brief |
104 | * Compares two ranges together. |
105 | */ |
106 | static int |
107 | _ranges_cmp(const void *_r1, const void *_r2) |
108 | { |
109 | const task_restartable_range_t *r1 = _r1; |
110 | const task_restartable_range_t *r2 = _r2; |
111 | |
112 | if (r1->location != r2->location) { |
113 | return r1->location < r2->location ? -1 : 1; |
114 | } |
115 | if (r1->length == r2->length) { |
116 | return 0; |
117 | } |
118 | return r1->length < r2->length ? -1 : 1; |
119 | } |
120 | |
121 | /** |
122 | * @function _ranges_validate |
123 | * |
124 | * @brief |
125 | * Validates an array of PC ranges for wraps and intersections. |
126 | * |
127 | * @discussion |
128 | * This sorts and modifies the input. |
129 | * |
130 | * The ranges must: |
131 | * - not wrap around, |
132 | * - have a length/recovery offset within a page of the range start |
133 | * |
134 | * @returns |
135 | * - KERN_SUCCESS: ranges are valid |
136 | * - KERN_INVALID_ARGUMENT: ranges are invalid |
137 | */ |
138 | static kern_return_t |
139 | _ranges_validate(task_t task, task_restartable_range_t *ranges, uint32_t count) |
140 | { |
141 | qsort(a: ranges, n: count, es: sizeof(task_restartable_range_t), cmp: _ranges_cmp); |
142 | uint64_t limit = task_has_64Bit_data(task) ? UINT64_MAX : UINT32_MAX; |
143 | uint64_t end, recovery; |
144 | |
145 | if (count == 0) { |
146 | return KERN_INVALID_ARGUMENT; |
147 | } |
148 | |
149 | for (size_t i = 0; i < count; i++) { |
150 | if (ranges[i].length > TASK_RESTARTABLE_OFFSET_MAX || |
151 | ranges[i].recovery_offs > TASK_RESTARTABLE_OFFSET_MAX) { |
152 | return KERN_INVALID_ARGUMENT; |
153 | } |
154 | if (ranges[i].flags) { |
155 | return KERN_INVALID_ARGUMENT; |
156 | } |
157 | if (os_add_overflow(ranges[i].location, ranges[i].length, &end)) { |
158 | return KERN_INVALID_ARGUMENT; |
159 | } |
160 | if (os_add_overflow(ranges[i].location, ranges[i].recovery_offs, &recovery)) { |
161 | return KERN_INVALID_ARGUMENT; |
162 | } |
163 | if (ranges[i].location > limit || end > limit || recovery > limit) { |
164 | return KERN_INVALID_ARGUMENT; |
165 | } |
166 | if (i + 1 < count && end > ranges[i + 1].location) { |
167 | return KERN_INVALID_ARGUMENT; |
168 | } |
169 | } |
170 | |
171 | return KERN_SUCCESS; |
172 | } |
173 | |
174 | /** |
175 | * @function _ranges_lookup |
176 | * |
177 | * @brief |
178 | * Lookup the left side of a range for a given PC within a set of ranges. |
179 | * |
180 | * @returns |
181 | * - 0: no PC range found |
182 | * - the left-side of the range. |
183 | */ |
184 | __attribute__((always_inline)) |
185 | static mach_vm_address_t |
186 | _ranges_lookup(struct restartable_ranges *rr, mach_vm_address_t pc) |
187 | { |
188 | task_restartable_range_t *ranges = rr->rr_ranges; |
189 | uint32_t l = 0, r = rr->rr_count; |
190 | |
191 | if (pc <= ranges[0].location) { |
192 | return 0; |
193 | } |
194 | if (pc >= ranges[r - 1].location + ranges[r - 1].length) { |
195 | return 0; |
196 | } |
197 | |
198 | while (l < r) { |
199 | uint32_t i = (r + l) / 2; |
200 | mach_vm_address_t location = ranges[i].location; |
201 | |
202 | if (pc <= location) { |
203 | /* if the PC is exactly at pc_start, no reset is needed */ |
204 | r = i; |
205 | } else if (location + ranges[i].length <= pc) { |
206 | /* if the PC is exactly at the end, it's out of the function */ |
207 | l = i + 1; |
208 | } else { |
209 | /* else it's strictly in the range, return the recovery pc */ |
210 | return location + ranges[i].recovery_offs; |
211 | } |
212 | } |
213 | |
214 | return 0; |
215 | } |
216 | |
217 | /** |
218 | * @function _restartable_ranges_dispose |
219 | * |
220 | * @brief |
221 | * Helper to dispose of a range that has reached a 0 refcount. |
222 | */ |
223 | __attribute__((noinline)) |
224 | static void |
225 | _restartable_ranges_dispose(struct restartable_ranges *rr, bool hash_remove) |
226 | { |
227 | if (hash_remove) { |
228 | rr_lock(); |
229 | remqueue(elt: &rr->rr_link); |
230 | rr_unlock(); |
231 | } |
232 | kfree_type(struct restartable_ranges, rr); |
233 | } |
234 | |
235 | /** |
236 | * @function _restartable_ranges_equals |
237 | * |
238 | * @brief |
239 | * Helper to compare two restartable ranges. |
240 | */ |
241 | static bool |
242 | _restartable_ranges_equals( |
243 | const struct restartable_ranges *rr1, |
244 | const struct restartable_ranges *rr2) |
245 | { |
246 | size_t rr1_size = rr1->rr_count * sizeof(task_restartable_range_t); |
247 | return rr1->rr_hash == rr2->rr_hash && |
248 | rr1->rr_count == rr2->rr_count && |
249 | memcmp(s1: rr1->rr_ranges, s2: rr2->rr_ranges, n: rr1_size) == 0; |
250 | } |
251 | |
252 | /** |
253 | * @function _restartable_ranges_create |
254 | * |
255 | * @brief |
256 | * Helper to create a uniqued restartable range. |
257 | * |
258 | * @returns |
259 | * - KERN_SUCCESS |
260 | * - KERN_INVALID_ARGUMENT: the validation of the new ranges failed. |
261 | * - KERN_RESOURCE_SHORTAGE: too many ranges, out of memory |
262 | */ |
263 | static kern_return_t |
264 | _restartable_ranges_create(task_t task, task_restartable_range_t *ranges, |
265 | uint32_t count, struct restartable_ranges **rr_storage) |
266 | { |
267 | struct restartable_ranges *rr, *rr_found, *rr_base; |
268 | queue_head_t *head; |
269 | uint32_t base_count, total_count; |
270 | size_t base_size, size; |
271 | kern_return_t kr; |
272 | |
273 | rr_base = *rr_storage; |
274 | base_count = rr_base ? rr_base->rr_count : 0; |
275 | base_size = sizeof(task_restartable_range_t) * base_count; |
276 | size = sizeof(task_restartable_range_t) * count; |
277 | |
278 | if (os_add_overflow(base_count, count, &total_count)) { |
279 | return KERN_INVALID_ARGUMENT; |
280 | } |
281 | if (total_count > RR_RANGES_MAX) { |
282 | return KERN_RESOURCE_SHORTAGE; |
283 | } |
284 | |
285 | rr = kalloc_type(struct restartable_ranges, |
286 | (zalloc_flags_t) (Z_WAITOK | Z_ZERO | Z_NOFAIL)); |
287 | |
288 | queue_chain_init(rr->rr_link); |
289 | os_ref_init(&rr->rr_ref, NULL); |
290 | rr->rr_count = total_count; |
291 | if (base_size) { |
292 | memcpy(dst: rr->rr_ranges, src: rr_base->rr_ranges, n: base_size); |
293 | } |
294 | memcpy(dst: rr->rr_ranges + base_count, src: ranges, n: size); |
295 | kr = _ranges_validate(task, ranges: rr->rr_ranges, count: total_count); |
296 | if (kr) { |
297 | _restartable_ranges_dispose(rr, false); |
298 | return kr; |
299 | } |
300 | rr->rr_hash = os_hash_jenkins(data: rr->rr_ranges, |
301 | length: rr->rr_count * sizeof(task_restartable_range_t)); |
302 | |
303 | head = &rr_hash[rr->rr_hash % RR_HASH_SIZE]; |
304 | |
305 | rr_lock(); |
306 | queue_iterate(head, rr_found, struct restartable_ranges *, rr_link) { |
307 | if (_restartable_ranges_equals(rr1: rr, rr2: rr_found) && |
308 | os_ref_retain_try(rc: &rr_found->rr_ref)) { |
309 | goto found; |
310 | } |
311 | } |
312 | |
313 | enqueue_tail(que: head, elt: &rr->rr_link); |
314 | rr_found = rr; |
315 | |
316 | found: |
317 | if (rr_base && os_ref_release_relaxed(rc: &rr_base->rr_ref) == 0) { |
318 | remqueue(elt: &rr_base->rr_link); |
319 | } else { |
320 | rr_base = NULL; |
321 | } |
322 | rr_unlock(); |
323 | |
324 | *rr_storage = rr_found; |
325 | |
326 | if (rr_found != rr) { |
327 | _restartable_ranges_dispose(rr, false); |
328 | } |
329 | if (rr_base) { |
330 | _restartable_ranges_dispose(rr: rr_base, false); |
331 | } |
332 | return KERN_SUCCESS; |
333 | } |
334 | |
335 | #pragma mark extern interfaces |
336 | |
337 | __attribute__((always_inline)) |
338 | void |
339 | restartable_ranges_release(struct restartable_ranges *rr) |
340 | { |
341 | if (os_ref_release_relaxed(rc: &rr->rr_ref) == 0) { |
342 | _restartable_ranges_dispose(rr, true); |
343 | } |
344 | } |
345 | |
346 | __attribute__((always_inline)) |
347 | void |
348 | thread_reset_pcs_will_fault(thread_t thread) |
349 | { |
350 | /* |
351 | * Called in the exception handling code while interrupts |
352 | * are still disabled. |
353 | */ |
354 | os_atomic_store(&thread->t_rr_state.trr_fault_state, |
355 | (uint8_t)TRR_FAULT_PENDING, relaxed); |
356 | } |
357 | |
358 | __attribute__((always_inline)) |
359 | void |
360 | thread_reset_pcs_done_faulting(struct thread *thread) |
361 | { |
362 | thread_rr_state_t state = { |
363 | .trr_ipi_ack_pending = ~0, |
364 | }; |
365 | |
366 | /* |
367 | * Called by the exception handling code on the way back, |
368 | * or when the thread is terminated. |
369 | */ |
370 | state.trr_value = os_atomic_and_orig(&thread->t_rr_state.trr_value, |
371 | state.trr_value, relaxed); |
372 | |
373 | if (__improbable(state.trr_sync_waiting)) { |
374 | task_t task = get_threadtask(thread); |
375 | |
376 | task_lock(task); |
377 | wakeup_all_with_inheritor(event: &thread->t_rr_state, THREAD_AWAKENED); |
378 | task_unlock(task); |
379 | } |
380 | } |
381 | |
382 | void |
383 | thread_reset_pcs_ack_IPI(struct thread *thread) |
384 | { |
385 | thread_rr_state_t trrs; |
386 | |
387 | /* |
388 | * Called under the thread lock from IPI or CSwitch context. |
389 | */ |
390 | trrs.trr_value = os_atomic_load(&thread->t_rr_state.trr_value, relaxed); |
391 | if (__improbable(trrs.trr_ipi_ack_pending)) { |
392 | trrs.trr_ipi_ack_pending = false; |
393 | if (trrs.trr_fault_state) { |
394 | assert3u(trrs.trr_fault_state, ==, TRR_FAULT_PENDING); |
395 | trrs.trr_fault_state = TRR_FAULT_OBSERVED; |
396 | } |
397 | os_atomic_store(&thread->t_rr_state.trr_value, |
398 | trrs.trr_value, relaxed); |
399 | } |
400 | } |
401 | |
402 | static bool |
403 | thread_rr_wait_if_needed(task_t task, thread_t thread) |
404 | { |
405 | thread_rr_state_t state; |
406 | bool did_unlock = false; |
407 | |
408 | state.trr_value = os_atomic_load(&thread->t_rr_state.trr_value, relaxed); |
409 | if (state.trr_value == 0) { |
410 | return did_unlock; |
411 | } |
412 | |
413 | assert(state.trr_sync_waiting == 0); |
414 | |
415 | thread_reference(thread); |
416 | |
417 | /* |
418 | * The thread_rr_state state machine is: |
419 | * |
420 | * ,------------ IPI ack --------------. |
421 | * v | |
422 | * .-----> {f:N, w:0, ipi:0} --- IPI sent ---> {f:N, w:0, ipi:1} |
423 | * | | ^ | |
424 | * | | | | |
425 | * fault will fault will |
426 | * done fault done fault |
427 | * | | | | |
428 | * | v | v |
429 | * | {f:P, w:0, ipi:0} --- IPI sent ---> {f:P, w:0, ipi:1} |
430 | * | | | |
431 | * | | | |
432 | * | act_set_ast_reset_pcs() | |
433 | * | | | |
434 | * | v | |
435 | * +------ {f:O, w:0, ipi:0} <--- IPI Ack -------------' |
436 | * | | |
437 | * | | |
438 | * | wait_if_needed() |
439 | * | | |
440 | * | v |
441 | * `------ {f:O, w:1, ipi:0} |
442 | */ |
443 | |
444 | while (state.trr_ipi_ack_pending) { |
445 | disable_preemption(); |
446 | task_unlock(task); |
447 | |
448 | state.trr_value = |
449 | hw_wait_while_equals32(address: &thread->t_rr_state.trr_value, |
450 | current: state.trr_value); |
451 | |
452 | enable_preemption(); |
453 | task_lock(task); |
454 | |
455 | did_unlock = true; |
456 | } |
457 | |
458 | /* |
459 | * If a VM fault is in flight we must wait for it to resolve |
460 | * before we can return from task_restartable_ranges_synchronize(), |
461 | * as the memory we're faulting against might be freed by the caller |
462 | * as soon as it returns, leading a crash. |
463 | */ |
464 | if (state.trr_fault_state == TRR_FAULT_OBSERVED) { |
465 | thread_rr_state_t nstate = { |
466 | .trr_fault_state = TRR_FAULT_OBSERVED, |
467 | .trr_sync_waiting = 1, |
468 | }; |
469 | |
470 | if (os_atomic_cmpxchg(&thread->t_rr_state, state, |
471 | nstate, relaxed)) { |
472 | lck_mtx_sleep_with_inheritor(lock: &task->lock, |
473 | lck_sleep_action: LCK_SLEEP_DEFAULT, event: &thread->t_rr_state, |
474 | inheritor: thread, THREAD_UNINT, TIMEOUT_WAIT_FOREVER); |
475 | did_unlock = true; |
476 | } |
477 | } |
478 | |
479 | #if MACH_ASSERT |
480 | state.trr_value = os_atomic_load(&thread->t_rr_state.trr_value, relaxed); |
481 | assert3u(state.trr_fault_state, !=, TRR_FAULT_OBSERVED); |
482 | assert3u(state.trr_ipi_ack_pending, ==, 0); |
483 | assert3u(state.trr_sync_waiting, ==, 0); |
484 | #endif |
485 | |
486 | thread_deallocate_safe(thread); |
487 | return did_unlock; |
488 | } |
489 | |
490 | bool |
491 | thread_reset_pcs_in_range(task_t task, thread_t thread) |
492 | { |
493 | return _ranges_lookup(rr: task->t_rr_ranges, pc: machine_thread_pc(thread)) != 0; |
494 | } |
495 | |
496 | void |
497 | thread_reset_pcs_ast(task_t task, thread_t thread) |
498 | { |
499 | struct restartable_ranges *rr; |
500 | mach_vm_address_t pc; |
501 | |
502 | /* |
503 | * Because restartable_ranges are set while the task only has on thread |
504 | * and can't be mutated outside of this, no lock is required to read this. |
505 | */ |
506 | rr = task->t_rr_ranges; |
507 | if (thread->active && rr) { |
508 | pc = _ranges_lookup(rr, pc: machine_thread_pc(thread)); |
509 | |
510 | if (pc) { |
511 | machine_thread_reset_pc(thread, pc); |
512 | } |
513 | } |
514 | |
515 | #if MACH_ASSERT |
516 | thread_rr_state_t state; |
517 | |
518 | state.trr_value = os_atomic_load(&thread->t_rr_state.trr_value, relaxed); |
519 | assert3u(state.trr_fault_state, ==, TRR_FAULT_NONE); |
520 | assert3u(state.trr_sync_waiting, ==, 0); |
521 | #endif |
522 | } |
523 | |
524 | void |
525 | restartable_init(void) |
526 | { |
527 | for (size_t i = 0; i < RR_HASH_SIZE; i++) { |
528 | queue_head_init(rr_hash[i]); |
529 | } |
530 | } |
531 | |
532 | #pragma mark MiG interfaces |
533 | |
534 | kern_return_t |
535 | task_restartable_ranges_register( |
536 | task_t task, |
537 | task_restartable_range_t *ranges, |
538 | mach_msg_type_number_t count) |
539 | { |
540 | kern_return_t kr; |
541 | |
542 | if (task != current_task()) { |
543 | return KERN_FAILURE; |
544 | } |
545 | |
546 | #if CONFIG_ROSETTA |
547 | // <rdar://problem/48527888> Obj-C adoption of task_restartable_ranges_register breaks Cambria |
548 | if (task_is_translated(task)) { |
549 | return KERN_RESOURCE_SHORTAGE; |
550 | } |
551 | #endif |
552 | |
553 | kr = _ranges_validate(task, ranges, count); |
554 | |
555 | if (kr == KERN_SUCCESS) { |
556 | task_lock(task); |
557 | |
558 | if (task->thread_count > 1) { |
559 | kr = KERN_NOT_SUPPORTED; |
560 | #if !DEBUG && !DEVELOPMENT |
561 | } else if (task->t_rr_ranges) { |
562 | /* |
563 | * For security reasons, on release kernels, |
564 | * only allow for this to be configured once. |
565 | * |
566 | * But to be able to test the feature we need |
567 | * to relax this for dev kernels. |
568 | */ |
569 | kr = KERN_NOT_SUPPORTED; |
570 | #endif |
571 | } else { |
572 | kr = _restartable_ranges_create(task, ranges, count, |
573 | rr_storage: &task->t_rr_ranges); |
574 | } |
575 | |
576 | task_unlock(task); |
577 | } |
578 | |
579 | return kr; |
580 | } |
581 | |
582 | kern_return_t |
583 | task_restartable_ranges_synchronize(task_t task) |
584 | { |
585 | thread_pri_floor_t token; |
586 | thread_t thread; |
587 | bool needs_wait = false; |
588 | kern_return_t kr = KERN_SUCCESS; |
589 | |
590 | if (task != current_task()) { |
591 | return KERN_FAILURE; |
592 | } |
593 | |
594 | /* |
595 | * t_rr_ranges can only be set if the process is single threaded. |
596 | * As a result, `t_rr_ranges` can _always_ be looked at |
597 | * from current_thread() without holding a lock: |
598 | * - either because it's the only thread in the task |
599 | * - or because the existence of another thread precludes |
600 | * modification |
601 | */ |
602 | if (!task->t_rr_ranges) { |
603 | return KERN_SUCCESS; |
604 | } |
605 | |
606 | /* |
607 | * When initiating a GC, artificially raise the priority for the |
608 | * duration of sending ASTs, we want to be preemptible, but this |
609 | * sequence has to terminate in a timely fashion. |
610 | */ |
611 | token = thread_priority_floor_start(); |
612 | |
613 | task_lock(task); |
614 | |
615 | /* |
616 | * In order to avoid trivial deadlocks of 2 threads trying |
617 | * to wait on each other while in kernel, disallow |
618 | * concurrent usage of task_restartable_ranges_synchronize(). |
619 | * |
620 | * At the time this code was written, the one client (Objective-C) |
621 | * does this under lock which guarantees ordering. If we ever need |
622 | * more clients, the library around restartable ranges will have |
623 | * to synchronize in userspace. |
624 | */ |
625 | if (task->task_rr_in_flight) { |
626 | kr = KERN_ALREADY_WAITING; |
627 | goto out; |
628 | } |
629 | |
630 | task->task_rr_in_flight = true; |
631 | |
632 | /* |
633 | * Pair with the acquire barriers handling RR_TSTATE_ONCORE. |
634 | * |
635 | * For threads that weren't on core, we rely on the fact |
636 | * that we are taking their lock in act_set_ast_reset_pcs() |
637 | * and that the context switch path will also take it before |
638 | * resuming them which rovides the required ordering. |
639 | * |
640 | * For new threads not existing yet, because the task_lock() |
641 | * is taken to add them to the task thread list, |
642 | * which also synchronizes with this code. |
643 | */ |
644 | os_atomic_thread_fence(release); |
645 | |
646 | /* |
647 | * Set all the AST_RESET_PCS, and see if any thread needs |
648 | * actual acknowledgement. |
649 | */ |
650 | queue_iterate(&task->threads, thread, thread_t, task_threads) { |
651 | if (thread != current_thread()) { |
652 | needs_wait |= act_set_ast_reset_pcs(task, thread); |
653 | } |
654 | } |
655 | |
656 | /* |
657 | * Now wait for acknowledgement if we need any |
658 | */ |
659 | while (needs_wait) { |
660 | needs_wait = false; |
661 | |
662 | queue_iterate(&task->threads, thread, thread_t, task_threads) { |
663 | if (thread == current_thread()) { |
664 | continue; |
665 | } |
666 | |
667 | needs_wait = thread_rr_wait_if_needed(task, thread); |
668 | if (needs_wait) { |
669 | /* |
670 | * We drop the task lock, |
671 | * we need to restart enumerating threads. |
672 | */ |
673 | break; |
674 | } |
675 | } |
676 | } |
677 | |
678 | task->task_rr_in_flight = false; |
679 | |
680 | out: |
681 | task_unlock(task); |
682 | |
683 | thread_priority_floor_end(token: &token); |
684 | |
685 | return kr; |
686 | } |
687 | |