| 1 | /* |
| 2 | * Copyright (c) 2019 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | #include <mach/mach_types.h> |
| 30 | #include <mach/task.h> |
| 31 | |
| 32 | #include <kern/ast.h> |
| 33 | #include <kern/kalloc.h> |
| 34 | #include <kern/kern_types.h> |
| 35 | #include <kern/mach_param.h> |
| 36 | #include <kern/machine.h> |
| 37 | #include <kern/misc_protos.h> |
| 38 | #include <kern/processor.h> |
| 39 | #include <kern/queue.h> |
| 40 | #include <kern/restartable.h> |
| 41 | #include <kern/task.h> |
| 42 | #include <kern/thread.h> |
| 43 | #include <kern/waitq.h> |
| 44 | |
| 45 | #include <os/atomic_private.h> |
| 46 | #include <os/hash.h> |
| 47 | #include <os/refcnt.h> |
| 48 | |
| 49 | /** |
| 50 | * @file osfmk/kern/restartable.c |
| 51 | * |
| 52 | * @brief |
| 53 | * This module implements restartable userspace functions. |
| 54 | * |
| 55 | * @discussion |
| 56 | * task_restartable_ranges_register() allows task to configure |
| 57 | * the restartable ranges, only once per task, |
| 58 | * before it has made its second thread. |
| 59 | * |
| 60 | * task_restartable_ranges_synchronize() can later be used to trigger |
| 61 | * restarts for threads with a PC in a restartable region. |
| 62 | * |
| 63 | * It is implemented with an AST (AST_RESET_PCS) that will cause threads |
| 64 | * as they return to userspace to reset PCs in a restartable region |
| 65 | * to the recovery offset of this region. |
| 66 | * |
| 67 | * Because signal delivery would mask the proper saved PC for threads, |
| 68 | * sigreturn also forcefully sets the AST and will go through the logic |
| 69 | * every single time. |
| 70 | */ |
| 71 | |
| 72 | typedef int (*cmpfunc_t)(const void *a, const void *b); |
| 73 | extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp); |
| 74 | |
| 75 | #define RR_RANGES_MAX 64 |
| 76 | struct restartable_ranges { |
| 77 | queue_chain_t rr_link; |
| 78 | os_refcnt_t rr_ref; |
| 79 | uint32_t rr_count; |
| 80 | uint32_t rr_hash; |
| 81 | task_restartable_range_t rr_ranges[RR_RANGES_MAX]; |
| 82 | }; |
| 83 | |
| 84 | #if DEBUG || DEVELOPMENT |
| 85 | #define RR_HASH_SIZE 256 |
| 86 | #else |
| 87 | // Release kernel userspace should have shared caches and a single registration |
| 88 | #define RR_HASH_SIZE 16 |
| 89 | #endif |
| 90 | |
| 91 | static queue_head_t rr_hash[RR_HASH_SIZE]; |
| 92 | LCK_GRP_DECLARE(rr_lock_grp, "restartable ranges" ); |
| 93 | LCK_SPIN_DECLARE(rr_spinlock, &rr_lock_grp); |
| 94 | |
| 95 | #define rr_lock() lck_spin_lock_grp(&rr_spinlock, &rr_lock_grp) |
| 96 | #define rr_unlock() lck_spin_unlock(&rr_spinlock); |
| 97 | |
| 98 | #pragma mark internals |
| 99 | |
| 100 | /** |
| 101 | * @function _ranges_cmp |
| 102 | * |
| 103 | * @brief |
| 104 | * Compares two ranges together. |
| 105 | */ |
| 106 | static int |
| 107 | _ranges_cmp(const void *_r1, const void *_r2) |
| 108 | { |
| 109 | const task_restartable_range_t *r1 = _r1; |
| 110 | const task_restartable_range_t *r2 = _r2; |
| 111 | |
| 112 | if (r1->location != r2->location) { |
| 113 | return r1->location < r2->location ? -1 : 1; |
| 114 | } |
| 115 | if (r1->length == r2->length) { |
| 116 | return 0; |
| 117 | } |
| 118 | return r1->length < r2->length ? -1 : 1; |
| 119 | } |
| 120 | |
| 121 | /** |
| 122 | * @function _ranges_validate |
| 123 | * |
| 124 | * @brief |
| 125 | * Validates an array of PC ranges for wraps and intersections. |
| 126 | * |
| 127 | * @discussion |
| 128 | * This sorts and modifies the input. |
| 129 | * |
| 130 | * The ranges must: |
| 131 | * - not wrap around, |
| 132 | * - have a length/recovery offset within a page of the range start |
| 133 | * |
| 134 | * @returns |
| 135 | * - KERN_SUCCESS: ranges are valid |
| 136 | * - KERN_INVALID_ARGUMENT: ranges are invalid |
| 137 | */ |
| 138 | static kern_return_t |
| 139 | _ranges_validate(task_t task, task_restartable_range_t *ranges, uint32_t count) |
| 140 | { |
| 141 | qsort(a: ranges, n: count, es: sizeof(task_restartable_range_t), cmp: _ranges_cmp); |
| 142 | uint64_t limit = task_has_64Bit_data(task) ? UINT64_MAX : UINT32_MAX; |
| 143 | uint64_t end, recovery; |
| 144 | |
| 145 | if (count == 0) { |
| 146 | return KERN_INVALID_ARGUMENT; |
| 147 | } |
| 148 | |
| 149 | for (size_t i = 0; i < count; i++) { |
| 150 | if (ranges[i].length > TASK_RESTARTABLE_OFFSET_MAX || |
| 151 | ranges[i].recovery_offs > TASK_RESTARTABLE_OFFSET_MAX) { |
| 152 | return KERN_INVALID_ARGUMENT; |
| 153 | } |
| 154 | if (ranges[i].flags) { |
| 155 | return KERN_INVALID_ARGUMENT; |
| 156 | } |
| 157 | if (os_add_overflow(ranges[i].location, ranges[i].length, &end)) { |
| 158 | return KERN_INVALID_ARGUMENT; |
| 159 | } |
| 160 | if (os_add_overflow(ranges[i].location, ranges[i].recovery_offs, &recovery)) { |
| 161 | return KERN_INVALID_ARGUMENT; |
| 162 | } |
| 163 | if (ranges[i].location > limit || end > limit || recovery > limit) { |
| 164 | return KERN_INVALID_ARGUMENT; |
| 165 | } |
| 166 | if (i + 1 < count && end > ranges[i + 1].location) { |
| 167 | return KERN_INVALID_ARGUMENT; |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | return KERN_SUCCESS; |
| 172 | } |
| 173 | |
| 174 | /** |
| 175 | * @function _ranges_lookup |
| 176 | * |
| 177 | * @brief |
| 178 | * Lookup the left side of a range for a given PC within a set of ranges. |
| 179 | * |
| 180 | * @returns |
| 181 | * - 0: no PC range found |
| 182 | * - the left-side of the range. |
| 183 | */ |
| 184 | __attribute__((always_inline)) |
| 185 | static mach_vm_address_t |
| 186 | _ranges_lookup(struct restartable_ranges *rr, mach_vm_address_t pc) |
| 187 | { |
| 188 | task_restartable_range_t *ranges = rr->rr_ranges; |
| 189 | uint32_t l = 0, r = rr->rr_count; |
| 190 | |
| 191 | if (pc <= ranges[0].location) { |
| 192 | return 0; |
| 193 | } |
| 194 | if (pc >= ranges[r - 1].location + ranges[r - 1].length) { |
| 195 | return 0; |
| 196 | } |
| 197 | |
| 198 | while (l < r) { |
| 199 | uint32_t i = (r + l) / 2; |
| 200 | mach_vm_address_t location = ranges[i].location; |
| 201 | |
| 202 | if (pc <= location) { |
| 203 | /* if the PC is exactly at pc_start, no reset is needed */ |
| 204 | r = i; |
| 205 | } else if (location + ranges[i].length <= pc) { |
| 206 | /* if the PC is exactly at the end, it's out of the function */ |
| 207 | l = i + 1; |
| 208 | } else { |
| 209 | /* else it's strictly in the range, return the recovery pc */ |
| 210 | return location + ranges[i].recovery_offs; |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | return 0; |
| 215 | } |
| 216 | |
| 217 | /** |
| 218 | * @function _restartable_ranges_dispose |
| 219 | * |
| 220 | * @brief |
| 221 | * Helper to dispose of a range that has reached a 0 refcount. |
| 222 | */ |
| 223 | __attribute__((noinline)) |
| 224 | static void |
| 225 | _restartable_ranges_dispose(struct restartable_ranges *rr, bool hash_remove) |
| 226 | { |
| 227 | if (hash_remove) { |
| 228 | rr_lock(); |
| 229 | remqueue(elt: &rr->rr_link); |
| 230 | rr_unlock(); |
| 231 | } |
| 232 | kfree_type(struct restartable_ranges, rr); |
| 233 | } |
| 234 | |
| 235 | /** |
| 236 | * @function _restartable_ranges_equals |
| 237 | * |
| 238 | * @brief |
| 239 | * Helper to compare two restartable ranges. |
| 240 | */ |
| 241 | static bool |
| 242 | _restartable_ranges_equals( |
| 243 | const struct restartable_ranges *rr1, |
| 244 | const struct restartable_ranges *rr2) |
| 245 | { |
| 246 | size_t rr1_size = rr1->rr_count * sizeof(task_restartable_range_t); |
| 247 | return rr1->rr_hash == rr2->rr_hash && |
| 248 | rr1->rr_count == rr2->rr_count && |
| 249 | memcmp(s1: rr1->rr_ranges, s2: rr2->rr_ranges, n: rr1_size) == 0; |
| 250 | } |
| 251 | |
| 252 | /** |
| 253 | * @function _restartable_ranges_create |
| 254 | * |
| 255 | * @brief |
| 256 | * Helper to create a uniqued restartable range. |
| 257 | * |
| 258 | * @returns |
| 259 | * - KERN_SUCCESS |
| 260 | * - KERN_INVALID_ARGUMENT: the validation of the new ranges failed. |
| 261 | * - KERN_RESOURCE_SHORTAGE: too many ranges, out of memory |
| 262 | */ |
| 263 | static kern_return_t |
| 264 | _restartable_ranges_create(task_t task, task_restartable_range_t *ranges, |
| 265 | uint32_t count, struct restartable_ranges **rr_storage) |
| 266 | { |
| 267 | struct restartable_ranges *rr, *rr_found, *rr_base; |
| 268 | queue_head_t *head; |
| 269 | uint32_t base_count, total_count; |
| 270 | size_t base_size, size; |
| 271 | kern_return_t kr; |
| 272 | |
| 273 | rr_base = *rr_storage; |
| 274 | base_count = rr_base ? rr_base->rr_count : 0; |
| 275 | base_size = sizeof(task_restartable_range_t) * base_count; |
| 276 | size = sizeof(task_restartable_range_t) * count; |
| 277 | |
| 278 | if (os_add_overflow(base_count, count, &total_count)) { |
| 279 | return KERN_INVALID_ARGUMENT; |
| 280 | } |
| 281 | if (total_count > RR_RANGES_MAX) { |
| 282 | return KERN_RESOURCE_SHORTAGE; |
| 283 | } |
| 284 | |
| 285 | rr = kalloc_type(struct restartable_ranges, |
| 286 | (zalloc_flags_t) (Z_WAITOK | Z_ZERO | Z_NOFAIL)); |
| 287 | |
| 288 | queue_chain_init(rr->rr_link); |
| 289 | os_ref_init(&rr->rr_ref, NULL); |
| 290 | rr->rr_count = total_count; |
| 291 | if (base_size) { |
| 292 | memcpy(dst: rr->rr_ranges, src: rr_base->rr_ranges, n: base_size); |
| 293 | } |
| 294 | memcpy(dst: rr->rr_ranges + base_count, src: ranges, n: size); |
| 295 | kr = _ranges_validate(task, ranges: rr->rr_ranges, count: total_count); |
| 296 | if (kr) { |
| 297 | _restartable_ranges_dispose(rr, false); |
| 298 | return kr; |
| 299 | } |
| 300 | rr->rr_hash = os_hash_jenkins(data: rr->rr_ranges, |
| 301 | length: rr->rr_count * sizeof(task_restartable_range_t)); |
| 302 | |
| 303 | head = &rr_hash[rr->rr_hash % RR_HASH_SIZE]; |
| 304 | |
| 305 | rr_lock(); |
| 306 | queue_iterate(head, rr_found, struct restartable_ranges *, rr_link) { |
| 307 | if (_restartable_ranges_equals(rr1: rr, rr2: rr_found) && |
| 308 | os_ref_retain_try(rc: &rr_found->rr_ref)) { |
| 309 | goto found; |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | enqueue_tail(que: head, elt: &rr->rr_link); |
| 314 | rr_found = rr; |
| 315 | |
| 316 | found: |
| 317 | if (rr_base && os_ref_release_relaxed(rc: &rr_base->rr_ref) == 0) { |
| 318 | remqueue(elt: &rr_base->rr_link); |
| 319 | } else { |
| 320 | rr_base = NULL; |
| 321 | } |
| 322 | rr_unlock(); |
| 323 | |
| 324 | *rr_storage = rr_found; |
| 325 | |
| 326 | if (rr_found != rr) { |
| 327 | _restartable_ranges_dispose(rr, false); |
| 328 | } |
| 329 | if (rr_base) { |
| 330 | _restartable_ranges_dispose(rr: rr_base, false); |
| 331 | } |
| 332 | return KERN_SUCCESS; |
| 333 | } |
| 334 | |
| 335 | #pragma mark extern interfaces |
| 336 | |
| 337 | __attribute__((always_inline)) |
| 338 | void |
| 339 | restartable_ranges_release(struct restartable_ranges *rr) |
| 340 | { |
| 341 | if (os_ref_release_relaxed(rc: &rr->rr_ref) == 0) { |
| 342 | _restartable_ranges_dispose(rr, true); |
| 343 | } |
| 344 | } |
| 345 | |
| 346 | __attribute__((always_inline)) |
| 347 | void |
| 348 | thread_reset_pcs_will_fault(thread_t thread) |
| 349 | { |
| 350 | /* |
| 351 | * Called in the exception handling code while interrupts |
| 352 | * are still disabled. |
| 353 | */ |
| 354 | os_atomic_store(&thread->t_rr_state.trr_fault_state, |
| 355 | (uint8_t)TRR_FAULT_PENDING, relaxed); |
| 356 | } |
| 357 | |
| 358 | __attribute__((always_inline)) |
| 359 | void |
| 360 | thread_reset_pcs_done_faulting(struct thread *thread) |
| 361 | { |
| 362 | thread_rr_state_t state = { |
| 363 | .trr_ipi_ack_pending = ~0, |
| 364 | }; |
| 365 | |
| 366 | /* |
| 367 | * Called by the exception handling code on the way back, |
| 368 | * or when the thread is terminated. |
| 369 | */ |
| 370 | state.trr_value = os_atomic_and_orig(&thread->t_rr_state.trr_value, |
| 371 | state.trr_value, relaxed); |
| 372 | |
| 373 | if (__improbable(state.trr_sync_waiting)) { |
| 374 | task_t task = get_threadtask(thread); |
| 375 | |
| 376 | task_lock(task); |
| 377 | wakeup_all_with_inheritor(event: &thread->t_rr_state, THREAD_AWAKENED); |
| 378 | task_unlock(task); |
| 379 | } |
| 380 | } |
| 381 | |
| 382 | void |
| 383 | thread_reset_pcs_ack_IPI(struct thread *thread) |
| 384 | { |
| 385 | thread_rr_state_t trrs; |
| 386 | |
| 387 | /* |
| 388 | * Called under the thread lock from IPI or CSwitch context. |
| 389 | */ |
| 390 | trrs.trr_value = os_atomic_load(&thread->t_rr_state.trr_value, relaxed); |
| 391 | if (__improbable(trrs.trr_ipi_ack_pending)) { |
| 392 | trrs.trr_ipi_ack_pending = false; |
| 393 | if (trrs.trr_fault_state) { |
| 394 | assert3u(trrs.trr_fault_state, ==, TRR_FAULT_PENDING); |
| 395 | trrs.trr_fault_state = TRR_FAULT_OBSERVED; |
| 396 | } |
| 397 | os_atomic_store(&thread->t_rr_state.trr_value, |
| 398 | trrs.trr_value, relaxed); |
| 399 | } |
| 400 | } |
| 401 | |
| 402 | static bool |
| 403 | thread_rr_wait_if_needed(task_t task, thread_t thread) |
| 404 | { |
| 405 | thread_rr_state_t state; |
| 406 | bool did_unlock = false; |
| 407 | |
| 408 | state.trr_value = os_atomic_load(&thread->t_rr_state.trr_value, relaxed); |
| 409 | if (state.trr_value == 0) { |
| 410 | return did_unlock; |
| 411 | } |
| 412 | |
| 413 | assert(state.trr_sync_waiting == 0); |
| 414 | |
| 415 | thread_reference(thread); |
| 416 | |
| 417 | /* |
| 418 | * The thread_rr_state state machine is: |
| 419 | * |
| 420 | * ,------------ IPI ack --------------. |
| 421 | * v | |
| 422 | * .-----> {f:N, w:0, ipi:0} --- IPI sent ---> {f:N, w:0, ipi:1} |
| 423 | * | | ^ | |
| 424 | * | | | | |
| 425 | * fault will fault will |
| 426 | * done fault done fault |
| 427 | * | | | | |
| 428 | * | v | v |
| 429 | * | {f:P, w:0, ipi:0} --- IPI sent ---> {f:P, w:0, ipi:1} |
| 430 | * | | | |
| 431 | * | | | |
| 432 | * | act_set_ast_reset_pcs() | |
| 433 | * | | | |
| 434 | * | v | |
| 435 | * +------ {f:O, w:0, ipi:0} <--- IPI Ack -------------' |
| 436 | * | | |
| 437 | * | | |
| 438 | * | wait_if_needed() |
| 439 | * | | |
| 440 | * | v |
| 441 | * `------ {f:O, w:1, ipi:0} |
| 442 | */ |
| 443 | |
| 444 | while (state.trr_ipi_ack_pending) { |
| 445 | disable_preemption(); |
| 446 | task_unlock(task); |
| 447 | |
| 448 | state.trr_value = |
| 449 | hw_wait_while_equals32(address: &thread->t_rr_state.trr_value, |
| 450 | current: state.trr_value); |
| 451 | |
| 452 | enable_preemption(); |
| 453 | task_lock(task); |
| 454 | |
| 455 | did_unlock = true; |
| 456 | } |
| 457 | |
| 458 | /* |
| 459 | * If a VM fault is in flight we must wait for it to resolve |
| 460 | * before we can return from task_restartable_ranges_synchronize(), |
| 461 | * as the memory we're faulting against might be freed by the caller |
| 462 | * as soon as it returns, leading a crash. |
| 463 | */ |
| 464 | if (state.trr_fault_state == TRR_FAULT_OBSERVED) { |
| 465 | thread_rr_state_t nstate = { |
| 466 | .trr_fault_state = TRR_FAULT_OBSERVED, |
| 467 | .trr_sync_waiting = 1, |
| 468 | }; |
| 469 | |
| 470 | if (os_atomic_cmpxchg(&thread->t_rr_state, state, |
| 471 | nstate, relaxed)) { |
| 472 | lck_mtx_sleep_with_inheritor(lock: &task->lock, |
| 473 | lck_sleep_action: LCK_SLEEP_DEFAULT, event: &thread->t_rr_state, |
| 474 | inheritor: thread, THREAD_UNINT, TIMEOUT_WAIT_FOREVER); |
| 475 | did_unlock = true; |
| 476 | } |
| 477 | } |
| 478 | |
| 479 | #if MACH_ASSERT |
| 480 | state.trr_value = os_atomic_load(&thread->t_rr_state.trr_value, relaxed); |
| 481 | assert3u(state.trr_fault_state, !=, TRR_FAULT_OBSERVED); |
| 482 | assert3u(state.trr_ipi_ack_pending, ==, 0); |
| 483 | assert3u(state.trr_sync_waiting, ==, 0); |
| 484 | #endif |
| 485 | |
| 486 | thread_deallocate_safe(thread); |
| 487 | return did_unlock; |
| 488 | } |
| 489 | |
| 490 | bool |
| 491 | thread_reset_pcs_in_range(task_t task, thread_t thread) |
| 492 | { |
| 493 | return _ranges_lookup(rr: task->t_rr_ranges, pc: machine_thread_pc(thread)) != 0; |
| 494 | } |
| 495 | |
| 496 | void |
| 497 | thread_reset_pcs_ast(task_t task, thread_t thread) |
| 498 | { |
| 499 | struct restartable_ranges *rr; |
| 500 | mach_vm_address_t pc; |
| 501 | |
| 502 | /* |
| 503 | * Because restartable_ranges are set while the task only has on thread |
| 504 | * and can't be mutated outside of this, no lock is required to read this. |
| 505 | */ |
| 506 | rr = task->t_rr_ranges; |
| 507 | if (thread->active && rr) { |
| 508 | pc = _ranges_lookup(rr, pc: machine_thread_pc(thread)); |
| 509 | |
| 510 | if (pc) { |
| 511 | machine_thread_reset_pc(thread, pc); |
| 512 | } |
| 513 | } |
| 514 | |
| 515 | #if MACH_ASSERT |
| 516 | thread_rr_state_t state; |
| 517 | |
| 518 | state.trr_value = os_atomic_load(&thread->t_rr_state.trr_value, relaxed); |
| 519 | assert3u(state.trr_fault_state, ==, TRR_FAULT_NONE); |
| 520 | assert3u(state.trr_sync_waiting, ==, 0); |
| 521 | #endif |
| 522 | } |
| 523 | |
| 524 | void |
| 525 | restartable_init(void) |
| 526 | { |
| 527 | for (size_t i = 0; i < RR_HASH_SIZE; i++) { |
| 528 | queue_head_init(rr_hash[i]); |
| 529 | } |
| 530 | } |
| 531 | |
| 532 | #pragma mark MiG interfaces |
| 533 | |
| 534 | kern_return_t |
| 535 | task_restartable_ranges_register( |
| 536 | task_t task, |
| 537 | task_restartable_range_t *ranges, |
| 538 | mach_msg_type_number_t count) |
| 539 | { |
| 540 | kern_return_t kr; |
| 541 | |
| 542 | if (task != current_task()) { |
| 543 | return KERN_FAILURE; |
| 544 | } |
| 545 | |
| 546 | #if CONFIG_ROSETTA |
| 547 | // <rdar://problem/48527888> Obj-C adoption of task_restartable_ranges_register breaks Cambria |
| 548 | if (task_is_translated(task)) { |
| 549 | return KERN_RESOURCE_SHORTAGE; |
| 550 | } |
| 551 | #endif |
| 552 | |
| 553 | kr = _ranges_validate(task, ranges, count); |
| 554 | |
| 555 | if (kr == KERN_SUCCESS) { |
| 556 | task_lock(task); |
| 557 | |
| 558 | if (task->thread_count > 1) { |
| 559 | kr = KERN_NOT_SUPPORTED; |
| 560 | #if !DEBUG && !DEVELOPMENT |
| 561 | } else if (task->t_rr_ranges) { |
| 562 | /* |
| 563 | * For security reasons, on release kernels, |
| 564 | * only allow for this to be configured once. |
| 565 | * |
| 566 | * But to be able to test the feature we need |
| 567 | * to relax this for dev kernels. |
| 568 | */ |
| 569 | kr = KERN_NOT_SUPPORTED; |
| 570 | #endif |
| 571 | } else { |
| 572 | kr = _restartable_ranges_create(task, ranges, count, |
| 573 | rr_storage: &task->t_rr_ranges); |
| 574 | } |
| 575 | |
| 576 | task_unlock(task); |
| 577 | } |
| 578 | |
| 579 | return kr; |
| 580 | } |
| 581 | |
| 582 | kern_return_t |
| 583 | task_restartable_ranges_synchronize(task_t task) |
| 584 | { |
| 585 | thread_pri_floor_t token; |
| 586 | thread_t thread; |
| 587 | bool needs_wait = false; |
| 588 | kern_return_t kr = KERN_SUCCESS; |
| 589 | |
| 590 | if (task != current_task()) { |
| 591 | return KERN_FAILURE; |
| 592 | } |
| 593 | |
| 594 | /* |
| 595 | * t_rr_ranges can only be set if the process is single threaded. |
| 596 | * As a result, `t_rr_ranges` can _always_ be looked at |
| 597 | * from current_thread() without holding a lock: |
| 598 | * - either because it's the only thread in the task |
| 599 | * - or because the existence of another thread precludes |
| 600 | * modification |
| 601 | */ |
| 602 | if (!task->t_rr_ranges) { |
| 603 | return KERN_SUCCESS; |
| 604 | } |
| 605 | |
| 606 | /* |
| 607 | * When initiating a GC, artificially raise the priority for the |
| 608 | * duration of sending ASTs, we want to be preemptible, but this |
| 609 | * sequence has to terminate in a timely fashion. |
| 610 | */ |
| 611 | token = thread_priority_floor_start(); |
| 612 | |
| 613 | task_lock(task); |
| 614 | |
| 615 | /* |
| 616 | * In order to avoid trivial deadlocks of 2 threads trying |
| 617 | * to wait on each other while in kernel, disallow |
| 618 | * concurrent usage of task_restartable_ranges_synchronize(). |
| 619 | * |
| 620 | * At the time this code was written, the one client (Objective-C) |
| 621 | * does this under lock which guarantees ordering. If we ever need |
| 622 | * more clients, the library around restartable ranges will have |
| 623 | * to synchronize in userspace. |
| 624 | */ |
| 625 | if (task->task_rr_in_flight) { |
| 626 | kr = KERN_ALREADY_WAITING; |
| 627 | goto out; |
| 628 | } |
| 629 | |
| 630 | task->task_rr_in_flight = true; |
| 631 | |
| 632 | /* |
| 633 | * Pair with the acquire barriers handling RR_TSTATE_ONCORE. |
| 634 | * |
| 635 | * For threads that weren't on core, we rely on the fact |
| 636 | * that we are taking their lock in act_set_ast_reset_pcs() |
| 637 | * and that the context switch path will also take it before |
| 638 | * resuming them which rovides the required ordering. |
| 639 | * |
| 640 | * For new threads not existing yet, because the task_lock() |
| 641 | * is taken to add them to the task thread list, |
| 642 | * which also synchronizes with this code. |
| 643 | */ |
| 644 | os_atomic_thread_fence(release); |
| 645 | |
| 646 | /* |
| 647 | * Set all the AST_RESET_PCS, and see if any thread needs |
| 648 | * actual acknowledgement. |
| 649 | */ |
| 650 | queue_iterate(&task->threads, thread, thread_t, task_threads) { |
| 651 | if (thread != current_thread()) { |
| 652 | needs_wait |= act_set_ast_reset_pcs(task, thread); |
| 653 | } |
| 654 | } |
| 655 | |
| 656 | /* |
| 657 | * Now wait for acknowledgement if we need any |
| 658 | */ |
| 659 | while (needs_wait) { |
| 660 | needs_wait = false; |
| 661 | |
| 662 | queue_iterate(&task->threads, thread, thread_t, task_threads) { |
| 663 | if (thread == current_thread()) { |
| 664 | continue; |
| 665 | } |
| 666 | |
| 667 | needs_wait = thread_rr_wait_if_needed(task, thread); |
| 668 | if (needs_wait) { |
| 669 | /* |
| 670 | * We drop the task lock, |
| 671 | * we need to restart enumerating threads. |
| 672 | */ |
| 673 | break; |
| 674 | } |
| 675 | } |
| 676 | } |
| 677 | |
| 678 | task->task_rr_in_flight = false; |
| 679 | |
| 680 | out: |
| 681 | task_unlock(task); |
| 682 | |
| 683 | thread_priority_floor_end(token: &token); |
| 684 | |
| 685 | return kr; |
| 686 | } |
| 687 | |