1 | /* |
2 | * Copyright (c) 2020 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <machine/asm.h> |
30 | |
31 | /* This section has all the code necessary for the atomic operations supported by |
32 | * OSAtomicFifoEnqueue, OSAtomicFifoDequeue APIs in libplatform. |
33 | * |
34 | * This code needs to be compiled as 1 section and should not make branches |
35 | * outside of this section. This allows us to copy the entire section to the |
36 | * text comm page once it is created - see osfmk/arm/commpage/commpage.c |
37 | * |
38 | * This section is split into 2 parts - the preemption-free zone (PFZ) routines |
39 | * and the preemptible routines (non-PFZ). The PFZ routines will not be |
40 | * preempted by the scheduler if the pc of the userspace process is in that |
41 | * region while handling asynchronous interrupts (note that traps are still |
42 | * possible in the PFZ). Instead, the scheduler will mark x15 (known through |
43 | * coordination with the functions in the commpage section) to indicate to the |
44 | * userspace code that it needs to take a delayed preemption. The PFZ functions |
45 | * may make callouts to preemptible routines and vice-versa. When a function |
46 | * returns to a preemptible routine after a callout to a function in the PFZ, it |
47 | * needs to check x15 to determine if a delayed preemption needs to be taken. In |
48 | * addition, functions in the PFZ should not have backwards branches. |
49 | * |
50 | * The entry point to execute code in the commpage text section is through the |
51 | * jump table at the very top of the section. The base of the jump table is |
52 | * exposed to userspace via the APPLE array and the offsets from the base of the |
53 | * jump table are listed in the arm/cpu_capabilities.h header. Adding any new |
54 | * functions in the PFZ requires a lockstep change to the cpu_capabilities.h |
55 | * header. |
56 | * |
57 | * Functions in PFZ: |
58 | * Enqueue function |
59 | * Dequeue function |
60 | * |
61 | * Functions not in PFZ: |
62 | * Backoff function as part of spin loop |
63 | * Preempt function to take delayed preemption as indicated by kernel |
64 | * |
65 | * ---------------------------------------------------------------------- |
66 | * |
67 | * The high level goal of the asm code in this section is to enqueue and dequeue |
68 | * from a FIFO linked list. |
69 | * |
70 | * typedef volatile struct { |
71 | * void *opaque1; <-- ptr to first queue element or null |
72 | * void *opaque2; <-- ptr to second queue element or null |
73 | * int opaque3; <-- spinlock |
74 | * } OSFifoQueueHead; |
75 | * |
76 | * This is done through a userspace spin lock stored in the linked list head |
77 | * for synchronization. |
78 | * |
79 | * Here is the pseudocode for the spin lock acquire algorithm which is split |
80 | * between the PFZ and the non-PFZ areas of the commpage text section. The |
81 | * pseudocode here is just for the enqueue operation but it is symmetrical for |
82 | * the dequeue operation. |
83 | * |
84 | * // Not in the PFZ. Entry from jump table. |
85 | * ENQUEUE() |
86 | * enqueued = TRY_LOCK_AND_ENQUEUE(lock_addr); |
87 | * // We're running here after running the TRY_LOCK_AND_ENQUEUE code in |
88 | * // the PFZ so we need to check if we need to take a delayed |
89 | * // preemption. |
90 | * if (kernel_wants_to_preempt_us){ |
91 | * // This is done through the pfz_exit() mach trap which is a dummy |
92 | * // syscall whose sole purpose is to allow the thread to enter the |
93 | * // kernel so that it can be preempted at AST. |
94 | * enter_kernel_to_take_delayed_preemption() |
95 | * } |
96 | * |
97 | * if (!enqueued) { |
98 | * ARM_MONITOR; |
99 | * WFE; |
100 | * enqueued = TRY_LOCK_AND_ENQUEUE(lock_addr); |
101 | * if (!enqueued) { |
102 | * // We failed twice, take a backoff |
103 | * BACKOFF(); |
104 | * goto ENQUEUE() |
105 | * } else { |
106 | * // We got here from PFZ, check for delayed preemption |
107 | * if (kernel_wants_to_preempt_us){ |
108 | * enter_kernel_to_take_delayed_preemption() |
109 | * } |
110 | * } |
111 | * } |
112 | * |
113 | * // in PFZ |
114 | * TRY_LOCK_AND_ENQUEUE(): |
115 | * is_locked = try_lock(lock_addr); |
116 | * if (is_locked) { |
117 | * <do enqueue operation> |
118 | * return true |
119 | * } else { |
120 | * return false |
121 | * } |
122 | * |
123 | * |
124 | * // Not in the PFZ |
125 | * BACKOFF(): |
126 | * // We're running here after running the TRY_LOCK_AND_ENQUEUE code in |
127 | * // the PFZ so we need to check if we need to take a delayed |
128 | * // preemption. |
129 | * if (kernel_wants_to_preempt_us) { |
130 | * enter_kernel_to_take_preemption() |
131 | * } else { |
132 | * // Note that it is safe to do this loop here since the entire |
133 | * // BACKOFF function isn't in the PFZ and so can be preempted at any |
134 | * // time |
135 | * do { |
136 | * lock_is_free = peek(lock_addr); |
137 | * if (lock_is_free) { |
138 | * return |
139 | * } else { |
140 | * pause_with_monitor(lock_addr) |
141 | * } |
142 | * } while (1) |
143 | * } |
144 | */ |
145 | |
146 | /* Macros and helpers */ |
147 | |
148 | .macro BACKOFF lock_addr |
149 | // Save registers we can't clobber |
150 | stp x0, x1, [sp, #-16]! |
151 | stp x2, x9, [sp, #-16]! |
152 | |
153 | // Pass in lock addr to backoff function |
154 | mov x0, \lock_addr |
155 | bl _backoff // Jump out of the PFZ zone now |
156 | |
157 | // Restore registers |
158 | ldp x2, x9, [sp], #16 |
159 | ldp x0, x1, [sp], #16 |
160 | .endmacro |
161 | |
162 | /* x0 = pointer to queue head |
163 | * x1 = pointer to new elem to enqueue |
164 | * x2 = offset of link field inside element |
165 | * x3 = Address of lock |
166 | * |
167 | * Moves result of the helper function to the register specified |
168 | */ |
169 | .macro TRYLOCK_ENQUEUE result |
170 | stp x0, xzr, [sp, #-16]! // Save x0 since it'll be clobbered by return value |
171 | |
172 | bl _pfz_trylock_and_enqueue |
173 | mov \result, x0 |
174 | |
175 | ldp x0, xzr, [sp], #16 // Restore saved registers |
176 | .endmacro |
177 | |
178 | /* x0 = pointer to queue head |
179 | * x1 = offset of link field inside element |
180 | * x2 = Address of lock |
181 | * |
182 | * Moves result of the helper function to the register specified |
183 | */ |
184 | .macro TRYLOCK_DEQUEUE result |
185 | stp x0, xzr, [sp, #-16]! // Save x0 since it'll be clobbered by return value |
186 | |
187 | bl _pfz_trylock_and_dequeue |
188 | mov \result, x0 |
189 | |
190 | ldp x0, xzr, [sp], #16 // Restore saved registers |
191 | .endmacro |
192 | |
193 | /* |
194 | * Takes a delayed preemption if needed and then branches to the label |
195 | * specified. |
196 | * |
197 | * Modifies x15 |
198 | */ |
199 | .macro PREEMPT_SELF_THEN branch_to_take_on_success |
200 | cbz x15, \branch_to_take_on_success // No delayed preemption to take, just try again |
201 | |
202 | mov x15, xzr // zero out the preemption pending field |
203 | bl _preempt_self |
204 | b \branch_to_take_on_success |
205 | .endmacro |
206 | |
207 | .section __TEXT_EXEC,__commpage_text,regular,pure_instructions |
208 | |
209 | /* Preemption free functions */ |
210 | .align 2 |
211 | _jump_table: // 32 entry jump table, only 2 are used |
212 | b _pfz_enqueue |
213 | b _pfz_dequeue |
214 | brk #666 |
215 | brk #666 |
216 | brk #666 |
217 | brk #666 |
218 | brk #666 |
219 | brk #666 |
220 | brk #666 |
221 | brk #666 |
222 | brk #666 |
223 | brk #666 |
224 | brk #666 |
225 | brk #666 |
226 | brk #666 |
227 | brk #666 |
228 | brk #666 |
229 | brk #666 |
230 | brk #666 |
231 | brk #666 |
232 | brk #666 |
233 | brk #666 |
234 | brk #666 |
235 | brk #666 |
236 | brk #666 |
237 | brk #666 |
238 | brk #666 |
239 | brk #666 |
240 | brk #666 |
241 | brk #666 |
242 | brk #666 |
243 | brk #666 |
244 | |
245 | |
246 | /* |
247 | * typedef volatile struct { |
248 | * void *opaque1; <-- ptr to first queue element or null |
249 | * void *opaque2; <-- ptr to second queue element or null |
250 | * int opaque3; <-- spinlock |
251 | * } osfifoqueuehead; |
252 | */ |
253 | |
254 | /* Non-preemptible helper routine to FIFO enqueue: |
255 | * int pfz_trylock_and_enqueue(OSFifoQueueHead *__list, void *__new, size_t __offset, uint32_t *lock_addr); |
256 | * |
257 | * x0 = pointer to queue head structure |
258 | * x1 = pointer to new element to enqueue |
259 | * x2 = offset of link field inside element |
260 | * x3 = address of lock |
261 | * |
262 | * Only caller save registers (x9 - x15) are used in this function |
263 | * |
264 | * Returns 0 on success and non-zero value on failure |
265 | */ |
266 | .globl _pfz_trylock_and_enqueue |
267 | .align 2 |
268 | _pfz_trylock_and_enqueue: |
269 | ARM64_STACK_PROLOG |
270 | PUSH_FRAME |
271 | |
272 | mov w10, wzr // unlock value = w10 = 0 |
273 | mov w11, #1 // locked value = w11 = 1 |
274 | |
275 | // Try to grab the lock |
276 | casa w10, w11, [x3] // Atomic CAS with acquire barrier |
277 | cbz w10, Ltrylock_enqueue_success |
278 | |
279 | mov x0, #-1 // Failed |
280 | b Ltrylock_enqueue_exit |
281 | |
282 | /* We got the lock, enqueue the element */ |
283 | |
284 | Ltrylock_enqueue_success: |
285 | ldr x10, [x0, #8] // x10 = tail of the queue |
286 | cbnz x10, Lnon_empty_queue // tail not NULL |
287 | str x1, [x0] // Set head to new element |
288 | b Lset_new_tail |
289 | |
290 | Lnon_empty_queue: |
291 | str x1, [x10, x2] // Set old tail -> offset = new elem |
292 | |
293 | Lset_new_tail: |
294 | str x1, [x0, #8] // Set tail = new elem |
295 | |
296 | // Drop spin lock with release barrier (pairs with acquire in casa) |
297 | stlr wzr, [x3] |
298 | |
299 | mov x0, xzr // Mark success |
300 | |
301 | Ltrylock_enqueue_exit: |
302 | POP_FRAME |
303 | ARM64_STACK_EPILOG |
304 | |
305 | /* Non-preemptible helper routine to FIFO dequeue: |
306 | * void *pfz_trylock_and_dequeue(OSFifoQueueHead *__list, size_t __offset, uint32_t *lock_addr); |
307 | * |
308 | * x0 = pointer to queue head structure |
309 | * x1 = pointer to new element to enqueue |
310 | * x2 = address of lock |
311 | * |
312 | * Only caller save registers (x9 - x15) are used in this function |
313 | * |
314 | * Returns -1 on failure, and the pointer on success (can be NULL) |
315 | */ |
316 | .globl _pfz_trylock_and_dequeue |
317 | .align 2 |
318 | _pfz_trylock_and_dequeue: |
319 | ARM64_STACK_PROLOG |
320 | PUSH_FRAME |
321 | |
322 | // Try to grab the lock |
323 | mov w10, wzr // unlock value = w10 = 0 |
324 | mov w11, #1 // locked value = w11 = 1 |
325 | |
326 | casa w10, w11, [x2] // Atomic CAS with acquire barrier |
327 | cbz w10, Ltrylock_dequeue_success |
328 | |
329 | mov x0, #-1 // Failed |
330 | b Ltrylock_dequeue_exit |
331 | |
332 | /* We got the lock, dequeue the element */ |
333 | Ltrylock_dequeue_success: |
334 | ldr x10, [x0] // x10 = head of the queue |
335 | cbz x10, Lreturn_head // if head is null, return |
336 | |
337 | ldr x11, [x10, x1] // get ptr to new head |
338 | cbnz x11, Lupdate_new_head // If new head != NULL, then not singleton. Only need to update head |
339 | |
340 | // Singleton case |
341 | str xzr, [x0, #8] // dequeuing from singleton queue, update tail to NULL |
342 | |
343 | Lupdate_new_head: |
344 | str xzr, [x10, x1] // zero the link in the old head |
345 | str x11, [x0] // Set up a new head |
346 | |
347 | Lreturn_head: |
348 | mov x0, x10 // Move head to x0 |
349 | stlr wzr, [x2] // Drop spin lock with release barrier (pairs with acquire in casa) |
350 | |
351 | Ltrylock_dequeue_exit: |
352 | POP_FRAME |
353 | ARM64_STACK_EPILOG |
354 | |
355 | |
356 | /* Preemptible functions */ |
357 | .private_extern _commpage_text_preemptible_functions |
358 | _commpage_text_preemptible_functions: |
359 | |
360 | |
361 | /* |
362 | * void pfz_enqueue(OSFifoQueueHead *__list, void *__new, size_t __offset); |
363 | * x0 = pointer to queue head |
364 | * x1 = pointer to new elem to enqueue |
365 | * x2 = offset of link field inside element |
366 | */ |
367 | .globl _pfz_enqueue |
368 | |
369 | .align 2 |
370 | _pfz_enqueue: |
371 | ARM64_STACK_PROLOG |
372 | PUSH_FRAME |
373 | |
374 | str xzr, [x1, x2] // Zero the forward link in the new element |
375 | mov x15, xzr // zero out the register used to communicate with kernel |
376 | |
377 | add x3, x0, #16 // address of lock = x3 = x0 + 16 |
378 | Lenqueue_trylock_loop: |
379 | |
380 | // Attempt #1 |
381 | TRYLOCK_ENQUEUE x9 |
382 | PREEMPT_SELF_THEN Lenqueue_determine_success |
383 | |
384 | Lenqueue_determine_success: |
385 | |
386 | cbz x9, Lenqueue_success // did we succeed? if so, exit |
387 | |
388 | ldxr w9, [x3] // arm the monitor for the lock address |
389 | cbz w9, Lenqueue_clear_monitor // lock is available, retry. |
390 | |
391 | wfe // Wait with monitor armed |
392 | |
393 | // Attempt #2 |
394 | TRYLOCK_ENQUEUE x9 |
395 | cbz x9, Lenqueue_take_delayed_preemption_upon_success // did we succeed? if so, exit |
396 | |
397 | // We failed twice - backoff then try again |
398 | |
399 | BACKOFF x3 |
400 | b Lenqueue_trylock_loop |
401 | |
402 | Lenqueue_clear_monitor: |
403 | clrex // Pairs with the ldxr |
404 | |
405 | // Take a preemption if needed then branch to enqueue_trylock_loop |
406 | PREEMPT_SELF_THEN Lenqueue_trylock_loop |
407 | |
408 | Lenqueue_take_delayed_preemption_upon_success: |
409 | PREEMPT_SELF_THEN Lenqueue_success |
410 | |
411 | Lenqueue_success: |
412 | POP_FRAME |
413 | ARM64_STACK_EPILOG |
414 | |
415 | /* |
416 | * void *pfz_dequeue(OSFifoQueueHead *__list, size_t __offset); |
417 | * x0 = pointer to queue head |
418 | * x1 = offset of link field inside element |
419 | * |
420 | * This function is not in the PFZ but calls out to a helper which is in the PFZ |
421 | * (_pfz_trylock_and_dequeue) |
422 | */ |
423 | .globl _pfz_dequeue |
424 | .align 2 |
425 | _pfz_dequeue: |
426 | ARM64_STACK_PROLOG |
427 | PUSH_FRAME |
428 | |
429 | mov x15, xzr // zero out the register used to communicate with kernel |
430 | |
431 | add x2, x0, #16 // address of lock = x2 = x0 + 16 |
432 | Ldequeue_trylock_loop: |
433 | |
434 | // Attempt #1 |
435 | TRYLOCK_DEQUEUE x9 |
436 | PREEMPT_SELF_THEN Ldequeue_determine_success |
437 | |
438 | Ldequeue_determine_success: |
439 | cmp x9, #-1 // is result of dequeue == -1? |
440 | b.ne Ldequeue_success // no, we succeeded |
441 | |
442 | ldxr w9, [x2] // arm the monitor for the lock address |
443 | cbz w9, Ldequeue_clear_monitor // lock is available, retry. |
444 | |
445 | wfe // Wait with monitor armed |
446 | |
447 | // Attempt #2 |
448 | TRYLOCK_DEQUEUE x9 |
449 | cmp x9, #-1 // did we fail? |
450 | b.ne Ldequeue_take_delayed_preemption_upon_success // no, we succeeded |
451 | |
452 | // We failed twice - backoff then try again |
453 | |
454 | BACKOFF x2 |
455 | b Ldequeue_trylock_loop |
456 | |
457 | Ldequeue_take_delayed_preemption_upon_success: |
458 | // We just got here after executing PFZ code, check if we need a preemption |
459 | PREEMPT_SELF_THEN Ldequeue_success |
460 | |
461 | Ldequeue_clear_monitor: |
462 | clrex // Pairs with the ldxr |
463 | // Take a preemption if needed then branch to dequeue_trylock_loop. |
464 | PREEMPT_SELF_THEN Ldequeue_trylock_loop |
465 | |
466 | Ldequeue_success: |
467 | mov x0, x9 // Move x9 (where result was stored earlier) to x0 |
468 | POP_FRAME |
469 | ARM64_STACK_EPILOG |
470 | |
471 | |
472 | /* void preempt_self(void) |
473 | * |
474 | * Make a syscall to take a preemption. This function is not in the PFZ. |
475 | */ |
476 | .align 2 |
477 | _preempt_self: |
478 | ARM64_STACK_PROLOG |
479 | PUSH_FRAME |
480 | |
481 | // Save registers on which will be clobbered by mach trap on stack and keep |
482 | // it 16 byte aligned |
483 | stp x0, x1, [sp, #-16]! |
484 | |
485 | // Note: We don't need to caller save registers since svc will trigger an |
486 | // exception and kernel will save and restore register state |
487 | |
488 | // Make syscall to take delayed preemption |
489 | mov x16, #-58 // -58 = pfz_exit |
490 | svc #0x80 |
491 | |
492 | // Restore registers from stack |
493 | ldp x0, x1, [sp], #16 |
494 | |
495 | POP_FRAME |
496 | ARM64_STACK_EPILOG |
497 | |
498 | /* |
499 | * void backoff(uint32_t *lock_addr); |
500 | * The function returns when it observes that the lock has become available. |
501 | * This function is not in the PFZ. |
502 | * |
503 | * x0 = lock address |
504 | */ |
505 | .align 2 |
506 | .globl _backoff |
507 | _backoff: |
508 | ARM64_STACK_PROLOG |
509 | PUSH_FRAME |
510 | |
511 | cbz x15, Lno_preempt // Kernel doesn't want to preempt us, jump to loop |
512 | |
513 | mov x15, xzr // zero out the preemption pending field |
514 | bl _preempt_self |
515 | |
516 | Lno_preempt: |
517 | ldxr w9, [x0] // Snoop on lock and arm the monitor |
518 | cbz w9, Lend_backoff // The lock seems to be available, return |
519 | |
520 | wfe // pause |
521 | |
522 | b Lno_preempt |
523 | |
524 | Lend_backoff: |
525 | clrex |
526 | |
527 | POP_FRAME |
528 | ARM64_STACK_EPILOG |
529 | |