| 1 | /* |
| 2 | * Copyright (c) 2020 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | #include <machine/asm.h> |
| 30 | |
| 31 | /* This section has all the code necessary for the atomic operations supported by |
| 32 | * OSAtomicFifoEnqueue, OSAtomicFifoDequeue APIs in libplatform. |
| 33 | * |
| 34 | * This code needs to be compiled as 1 section and should not make branches |
| 35 | * outside of this section. This allows us to copy the entire section to the |
| 36 | * text comm page once it is created - see osfmk/arm/commpage/commpage.c |
| 37 | * |
| 38 | * This section is split into 2 parts - the preemption-free zone (PFZ) routines |
| 39 | * and the preemptible routines (non-PFZ). The PFZ routines will not be |
| 40 | * preempted by the scheduler if the pc of the userspace process is in that |
| 41 | * region while handling asynchronous interrupts (note that traps are still |
| 42 | * possible in the PFZ). Instead, the scheduler will mark x15 (known through |
| 43 | * coordination with the functions in the commpage section) to indicate to the |
| 44 | * userspace code that it needs to take a delayed preemption. The PFZ functions |
| 45 | * may make callouts to preemptible routines and vice-versa. When a function |
| 46 | * returns to a preemptible routine after a callout to a function in the PFZ, it |
| 47 | * needs to check x15 to determine if a delayed preemption needs to be taken. In |
| 48 | * addition, functions in the PFZ should not have backwards branches. |
| 49 | * |
| 50 | * The entry point to execute code in the commpage text section is through the |
| 51 | * jump table at the very top of the section. The base of the jump table is |
| 52 | * exposed to userspace via the APPLE array and the offsets from the base of the |
| 53 | * jump table are listed in the arm/cpu_capabilities.h header. Adding any new |
| 54 | * functions in the PFZ requires a lockstep change to the cpu_capabilities.h |
| 55 | * header. |
| 56 | * |
| 57 | * Functions in PFZ: |
| 58 | * Enqueue function |
| 59 | * Dequeue function |
| 60 | * |
| 61 | * Functions not in PFZ: |
| 62 | * Backoff function as part of spin loop |
| 63 | * Preempt function to take delayed preemption as indicated by kernel |
| 64 | * |
| 65 | * ---------------------------------------------------------------------- |
| 66 | * |
| 67 | * The high level goal of the asm code in this section is to enqueue and dequeue |
| 68 | * from a FIFO linked list. |
| 69 | * |
| 70 | * typedef volatile struct { |
| 71 | * void *opaque1; <-- ptr to first queue element or null |
| 72 | * void *opaque2; <-- ptr to second queue element or null |
| 73 | * int opaque3; <-- spinlock |
| 74 | * } OSFifoQueueHead; |
| 75 | * |
| 76 | * This is done through a userspace spin lock stored in the linked list head |
| 77 | * for synchronization. |
| 78 | * |
| 79 | * Here is the pseudocode for the spin lock acquire algorithm which is split |
| 80 | * between the PFZ and the non-PFZ areas of the commpage text section. The |
| 81 | * pseudocode here is just for the enqueue operation but it is symmetrical for |
| 82 | * the dequeue operation. |
| 83 | * |
| 84 | * // Not in the PFZ. Entry from jump table. |
| 85 | * ENQUEUE() |
| 86 | * enqueued = TRY_LOCK_AND_ENQUEUE(lock_addr); |
| 87 | * // We're running here after running the TRY_LOCK_AND_ENQUEUE code in |
| 88 | * // the PFZ so we need to check if we need to take a delayed |
| 89 | * // preemption. |
| 90 | * if (kernel_wants_to_preempt_us){ |
| 91 | * // This is done through the pfz_exit() mach trap which is a dummy |
| 92 | * // syscall whose sole purpose is to allow the thread to enter the |
| 93 | * // kernel so that it can be preempted at AST. |
| 94 | * enter_kernel_to_take_delayed_preemption() |
| 95 | * } |
| 96 | * |
| 97 | * if (!enqueued) { |
| 98 | * ARM_MONITOR; |
| 99 | * WFE; |
| 100 | * enqueued = TRY_LOCK_AND_ENQUEUE(lock_addr); |
| 101 | * if (!enqueued) { |
| 102 | * // We failed twice, take a backoff |
| 103 | * BACKOFF(); |
| 104 | * goto ENQUEUE() |
| 105 | * } else { |
| 106 | * // We got here from PFZ, check for delayed preemption |
| 107 | * if (kernel_wants_to_preempt_us){ |
| 108 | * enter_kernel_to_take_delayed_preemption() |
| 109 | * } |
| 110 | * } |
| 111 | * } |
| 112 | * |
| 113 | * // in PFZ |
| 114 | * TRY_LOCK_AND_ENQUEUE(): |
| 115 | * is_locked = try_lock(lock_addr); |
| 116 | * if (is_locked) { |
| 117 | * <do enqueue operation> |
| 118 | * return true |
| 119 | * } else { |
| 120 | * return false |
| 121 | * } |
| 122 | * |
| 123 | * |
| 124 | * // Not in the PFZ |
| 125 | * BACKOFF(): |
| 126 | * // We're running here after running the TRY_LOCK_AND_ENQUEUE code in |
| 127 | * // the PFZ so we need to check if we need to take a delayed |
| 128 | * // preemption. |
| 129 | * if (kernel_wants_to_preempt_us) { |
| 130 | * enter_kernel_to_take_preemption() |
| 131 | * } else { |
| 132 | * // Note that it is safe to do this loop here since the entire |
| 133 | * // BACKOFF function isn't in the PFZ and so can be preempted at any |
| 134 | * // time |
| 135 | * do { |
| 136 | * lock_is_free = peek(lock_addr); |
| 137 | * if (lock_is_free) { |
| 138 | * return |
| 139 | * } else { |
| 140 | * pause_with_monitor(lock_addr) |
| 141 | * } |
| 142 | * } while (1) |
| 143 | * } |
| 144 | */ |
| 145 | |
| 146 | /* Macros and helpers */ |
| 147 | |
| 148 | .macro BACKOFF lock_addr |
| 149 | // Save registers we can't clobber |
| 150 | stp x0, x1, [sp, #-16]! |
| 151 | stp x2, x9, [sp, #-16]! |
| 152 | |
| 153 | // Pass in lock addr to backoff function |
| 154 | mov x0, \lock_addr |
| 155 | bl _backoff // Jump out of the PFZ zone now |
| 156 | |
| 157 | // Restore registers |
| 158 | ldp x2, x9, [sp], #16 |
| 159 | ldp x0, x1, [sp], #16 |
| 160 | .endmacro |
| 161 | |
| 162 | /* x0 = pointer to queue head |
| 163 | * x1 = pointer to new elem to enqueue |
| 164 | * x2 = offset of link field inside element |
| 165 | * x3 = Address of lock |
| 166 | * |
| 167 | * Moves result of the helper function to the register specified |
| 168 | */ |
| 169 | .macro TRYLOCK_ENQUEUE result |
| 170 | stp x0, xzr, [sp, #-16]! // Save x0 since it'll be clobbered by return value |
| 171 | |
| 172 | bl _pfz_trylock_and_enqueue |
| 173 | mov \result, x0 |
| 174 | |
| 175 | ldp x0, xzr, [sp], #16 // Restore saved registers |
| 176 | .endmacro |
| 177 | |
| 178 | /* x0 = pointer to queue head |
| 179 | * x1 = offset of link field inside element |
| 180 | * x2 = Address of lock |
| 181 | * |
| 182 | * Moves result of the helper function to the register specified |
| 183 | */ |
| 184 | .macro TRYLOCK_DEQUEUE result |
| 185 | stp x0, xzr, [sp, #-16]! // Save x0 since it'll be clobbered by return value |
| 186 | |
| 187 | bl _pfz_trylock_and_dequeue |
| 188 | mov \result, x0 |
| 189 | |
| 190 | ldp x0, xzr, [sp], #16 // Restore saved registers |
| 191 | .endmacro |
| 192 | |
| 193 | /* |
| 194 | * Takes a delayed preemption if needed and then branches to the label |
| 195 | * specified. |
| 196 | * |
| 197 | * Modifies x15 |
| 198 | */ |
| 199 | .macro PREEMPT_SELF_THEN branch_to_take_on_success |
| 200 | cbz x15, \branch_to_take_on_success // No delayed preemption to take, just try again |
| 201 | |
| 202 | mov x15, xzr // zero out the preemption pending field |
| 203 | bl _preempt_self |
| 204 | b \branch_to_take_on_success |
| 205 | .endmacro |
| 206 | |
| 207 | .section __TEXT_EXEC,__commpage_text,regular,pure_instructions |
| 208 | |
| 209 | /* Preemption free functions */ |
| 210 | .align 2 |
| 211 | _jump_table: // 32 entry jump table, only 2 are used |
| 212 | b _pfz_enqueue |
| 213 | b _pfz_dequeue |
| 214 | brk #666 |
| 215 | brk #666 |
| 216 | brk #666 |
| 217 | brk #666 |
| 218 | brk #666 |
| 219 | brk #666 |
| 220 | brk #666 |
| 221 | brk #666 |
| 222 | brk #666 |
| 223 | brk #666 |
| 224 | brk #666 |
| 225 | brk #666 |
| 226 | brk #666 |
| 227 | brk #666 |
| 228 | brk #666 |
| 229 | brk #666 |
| 230 | brk #666 |
| 231 | brk #666 |
| 232 | brk #666 |
| 233 | brk #666 |
| 234 | brk #666 |
| 235 | brk #666 |
| 236 | brk #666 |
| 237 | brk #666 |
| 238 | brk #666 |
| 239 | brk #666 |
| 240 | brk #666 |
| 241 | brk #666 |
| 242 | brk #666 |
| 243 | brk #666 |
| 244 | |
| 245 | |
| 246 | /* |
| 247 | * typedef volatile struct { |
| 248 | * void *opaque1; <-- ptr to first queue element or null |
| 249 | * void *opaque2; <-- ptr to second queue element or null |
| 250 | * int opaque3; <-- spinlock |
| 251 | * } osfifoqueuehead; |
| 252 | */ |
| 253 | |
| 254 | /* Non-preemptible helper routine to FIFO enqueue: |
| 255 | * int pfz_trylock_and_enqueue(OSFifoQueueHead *__list, void *__new, size_t __offset, uint32_t *lock_addr); |
| 256 | * |
| 257 | * x0 = pointer to queue head structure |
| 258 | * x1 = pointer to new element to enqueue |
| 259 | * x2 = offset of link field inside element |
| 260 | * x3 = address of lock |
| 261 | * |
| 262 | * Only caller save registers (x9 - x15) are used in this function |
| 263 | * |
| 264 | * Returns 0 on success and non-zero value on failure |
| 265 | */ |
| 266 | .globl _pfz_trylock_and_enqueue |
| 267 | .align 2 |
| 268 | _pfz_trylock_and_enqueue: |
| 269 | ARM64_STACK_PROLOG |
| 270 | PUSH_FRAME |
| 271 | |
| 272 | mov w10, wzr // unlock value = w10 = 0 |
| 273 | mov w11, #1 // locked value = w11 = 1 |
| 274 | |
| 275 | // Try to grab the lock |
| 276 | casa w10, w11, [x3] // Atomic CAS with acquire barrier |
| 277 | cbz w10, Ltrylock_enqueue_success |
| 278 | |
| 279 | mov x0, #-1 // Failed |
| 280 | b Ltrylock_enqueue_exit |
| 281 | |
| 282 | /* We got the lock, enqueue the element */ |
| 283 | |
| 284 | Ltrylock_enqueue_success: |
| 285 | ldr x10, [x0, #8] // x10 = tail of the queue |
| 286 | cbnz x10, Lnon_empty_queue // tail not NULL |
| 287 | str x1, [x0] // Set head to new element |
| 288 | b Lset_new_tail |
| 289 | |
| 290 | Lnon_empty_queue: |
| 291 | str x1, [x10, x2] // Set old tail -> offset = new elem |
| 292 | |
| 293 | Lset_new_tail: |
| 294 | str x1, [x0, #8] // Set tail = new elem |
| 295 | |
| 296 | // Drop spin lock with release barrier (pairs with acquire in casa) |
| 297 | stlr wzr, [x3] |
| 298 | |
| 299 | mov x0, xzr // Mark success |
| 300 | |
| 301 | Ltrylock_enqueue_exit: |
| 302 | POP_FRAME |
| 303 | ARM64_STACK_EPILOG |
| 304 | |
| 305 | /* Non-preemptible helper routine to FIFO dequeue: |
| 306 | * void *pfz_trylock_and_dequeue(OSFifoQueueHead *__list, size_t __offset, uint32_t *lock_addr); |
| 307 | * |
| 308 | * x0 = pointer to queue head structure |
| 309 | * x1 = pointer to new element to enqueue |
| 310 | * x2 = address of lock |
| 311 | * |
| 312 | * Only caller save registers (x9 - x15) are used in this function |
| 313 | * |
| 314 | * Returns -1 on failure, and the pointer on success (can be NULL) |
| 315 | */ |
| 316 | .globl _pfz_trylock_and_dequeue |
| 317 | .align 2 |
| 318 | _pfz_trylock_and_dequeue: |
| 319 | ARM64_STACK_PROLOG |
| 320 | PUSH_FRAME |
| 321 | |
| 322 | // Try to grab the lock |
| 323 | mov w10, wzr // unlock value = w10 = 0 |
| 324 | mov w11, #1 // locked value = w11 = 1 |
| 325 | |
| 326 | casa w10, w11, [x2] // Atomic CAS with acquire barrier |
| 327 | cbz w10, Ltrylock_dequeue_success |
| 328 | |
| 329 | mov x0, #-1 // Failed |
| 330 | b Ltrylock_dequeue_exit |
| 331 | |
| 332 | /* We got the lock, dequeue the element */ |
| 333 | Ltrylock_dequeue_success: |
| 334 | ldr x10, [x0] // x10 = head of the queue |
| 335 | cbz x10, Lreturn_head // if head is null, return |
| 336 | |
| 337 | ldr x11, [x10, x1] // get ptr to new head |
| 338 | cbnz x11, Lupdate_new_head // If new head != NULL, then not singleton. Only need to update head |
| 339 | |
| 340 | // Singleton case |
| 341 | str xzr, [x0, #8] // dequeuing from singleton queue, update tail to NULL |
| 342 | |
| 343 | Lupdate_new_head: |
| 344 | str xzr, [x10, x1] // zero the link in the old head |
| 345 | str x11, [x0] // Set up a new head |
| 346 | |
| 347 | Lreturn_head: |
| 348 | mov x0, x10 // Move head to x0 |
| 349 | stlr wzr, [x2] // Drop spin lock with release barrier (pairs with acquire in casa) |
| 350 | |
| 351 | Ltrylock_dequeue_exit: |
| 352 | POP_FRAME |
| 353 | ARM64_STACK_EPILOG |
| 354 | |
| 355 | |
| 356 | /* Preemptible functions */ |
| 357 | .private_extern _commpage_text_preemptible_functions |
| 358 | _commpage_text_preemptible_functions: |
| 359 | |
| 360 | |
| 361 | /* |
| 362 | * void pfz_enqueue(OSFifoQueueHead *__list, void *__new, size_t __offset); |
| 363 | * x0 = pointer to queue head |
| 364 | * x1 = pointer to new elem to enqueue |
| 365 | * x2 = offset of link field inside element |
| 366 | */ |
| 367 | .globl _pfz_enqueue |
| 368 | |
| 369 | .align 2 |
| 370 | _pfz_enqueue: |
| 371 | ARM64_STACK_PROLOG |
| 372 | PUSH_FRAME |
| 373 | |
| 374 | str xzr, [x1, x2] // Zero the forward link in the new element |
| 375 | mov x15, xzr // zero out the register used to communicate with kernel |
| 376 | |
| 377 | add x3, x0, #16 // address of lock = x3 = x0 + 16 |
| 378 | Lenqueue_trylock_loop: |
| 379 | |
| 380 | // Attempt #1 |
| 381 | TRYLOCK_ENQUEUE x9 |
| 382 | PREEMPT_SELF_THEN Lenqueue_determine_success |
| 383 | |
| 384 | Lenqueue_determine_success: |
| 385 | |
| 386 | cbz x9, Lenqueue_success // did we succeed? if so, exit |
| 387 | |
| 388 | ldxr w9, [x3] // arm the monitor for the lock address |
| 389 | cbz w9, Lenqueue_clear_monitor // lock is available, retry. |
| 390 | |
| 391 | wfe // Wait with monitor armed |
| 392 | |
| 393 | // Attempt #2 |
| 394 | TRYLOCK_ENQUEUE x9 |
| 395 | cbz x9, Lenqueue_take_delayed_preemption_upon_success // did we succeed? if so, exit |
| 396 | |
| 397 | // We failed twice - backoff then try again |
| 398 | |
| 399 | BACKOFF x3 |
| 400 | b Lenqueue_trylock_loop |
| 401 | |
| 402 | Lenqueue_clear_monitor: |
| 403 | clrex // Pairs with the ldxr |
| 404 | |
| 405 | // Take a preemption if needed then branch to enqueue_trylock_loop |
| 406 | PREEMPT_SELF_THEN Lenqueue_trylock_loop |
| 407 | |
| 408 | Lenqueue_take_delayed_preemption_upon_success: |
| 409 | PREEMPT_SELF_THEN Lenqueue_success |
| 410 | |
| 411 | Lenqueue_success: |
| 412 | POP_FRAME |
| 413 | ARM64_STACK_EPILOG |
| 414 | |
| 415 | /* |
| 416 | * void *pfz_dequeue(OSFifoQueueHead *__list, size_t __offset); |
| 417 | * x0 = pointer to queue head |
| 418 | * x1 = offset of link field inside element |
| 419 | * |
| 420 | * This function is not in the PFZ but calls out to a helper which is in the PFZ |
| 421 | * (_pfz_trylock_and_dequeue) |
| 422 | */ |
| 423 | .globl _pfz_dequeue |
| 424 | .align 2 |
| 425 | _pfz_dequeue: |
| 426 | ARM64_STACK_PROLOG |
| 427 | PUSH_FRAME |
| 428 | |
| 429 | mov x15, xzr // zero out the register used to communicate with kernel |
| 430 | |
| 431 | add x2, x0, #16 // address of lock = x2 = x0 + 16 |
| 432 | Ldequeue_trylock_loop: |
| 433 | |
| 434 | // Attempt #1 |
| 435 | TRYLOCK_DEQUEUE x9 |
| 436 | PREEMPT_SELF_THEN Ldequeue_determine_success |
| 437 | |
| 438 | Ldequeue_determine_success: |
| 439 | cmp x9, #-1 // is result of dequeue == -1? |
| 440 | b.ne Ldequeue_success // no, we succeeded |
| 441 | |
| 442 | ldxr w9, [x2] // arm the monitor for the lock address |
| 443 | cbz w9, Ldequeue_clear_monitor // lock is available, retry. |
| 444 | |
| 445 | wfe // Wait with monitor armed |
| 446 | |
| 447 | // Attempt #2 |
| 448 | TRYLOCK_DEQUEUE x9 |
| 449 | cmp x9, #-1 // did we fail? |
| 450 | b.ne Ldequeue_take_delayed_preemption_upon_success // no, we succeeded |
| 451 | |
| 452 | // We failed twice - backoff then try again |
| 453 | |
| 454 | BACKOFF x2 |
| 455 | b Ldequeue_trylock_loop |
| 456 | |
| 457 | Ldequeue_take_delayed_preemption_upon_success: |
| 458 | // We just got here after executing PFZ code, check if we need a preemption |
| 459 | PREEMPT_SELF_THEN Ldequeue_success |
| 460 | |
| 461 | Ldequeue_clear_monitor: |
| 462 | clrex // Pairs with the ldxr |
| 463 | // Take a preemption if needed then branch to dequeue_trylock_loop. |
| 464 | PREEMPT_SELF_THEN Ldequeue_trylock_loop |
| 465 | |
| 466 | Ldequeue_success: |
| 467 | mov x0, x9 // Move x9 (where result was stored earlier) to x0 |
| 468 | POP_FRAME |
| 469 | ARM64_STACK_EPILOG |
| 470 | |
| 471 | |
| 472 | /* void preempt_self(void) |
| 473 | * |
| 474 | * Make a syscall to take a preemption. This function is not in the PFZ. |
| 475 | */ |
| 476 | .align 2 |
| 477 | _preempt_self: |
| 478 | ARM64_STACK_PROLOG |
| 479 | PUSH_FRAME |
| 480 | |
| 481 | // Save registers on which will be clobbered by mach trap on stack and keep |
| 482 | // it 16 byte aligned |
| 483 | stp x0, x1, [sp, #-16]! |
| 484 | |
| 485 | // Note: We don't need to caller save registers since svc will trigger an |
| 486 | // exception and kernel will save and restore register state |
| 487 | |
| 488 | // Make syscall to take delayed preemption |
| 489 | mov x16, #-58 // -58 = pfz_exit |
| 490 | svc #0x80 |
| 491 | |
| 492 | // Restore registers from stack |
| 493 | ldp x0, x1, [sp], #16 |
| 494 | |
| 495 | POP_FRAME |
| 496 | ARM64_STACK_EPILOG |
| 497 | |
| 498 | /* |
| 499 | * void backoff(uint32_t *lock_addr); |
| 500 | * The function returns when it observes that the lock has become available. |
| 501 | * This function is not in the PFZ. |
| 502 | * |
| 503 | * x0 = lock address |
| 504 | */ |
| 505 | .align 2 |
| 506 | .globl _backoff |
| 507 | _backoff: |
| 508 | ARM64_STACK_PROLOG |
| 509 | PUSH_FRAME |
| 510 | |
| 511 | cbz x15, Lno_preempt // Kernel doesn't want to preempt us, jump to loop |
| 512 | |
| 513 | mov x15, xzr // zero out the preemption pending field |
| 514 | bl _preempt_self |
| 515 | |
| 516 | Lno_preempt: |
| 517 | ldxr w9, [x0] // Snoop on lock and arm the monitor |
| 518 | cbz w9, Lend_backoff // The lock seems to be available, return |
| 519 | |
| 520 | wfe // pause |
| 521 | |
| 522 | b Lno_preempt |
| 523 | |
| 524 | Lend_backoff: |
| 525 | clrex |
| 526 | |
| 527 | POP_FRAME |
| 528 | ARM64_STACK_EPILOG |
| 529 | |