1 | /* |
2 | * Copyright (c) 2007-2022 Apple Inc. All rights reserved. |
3 | */ |
4 | /* |
5 | * CDDL HEADER START |
6 | * |
7 | * The contents of this file are subject to the terms of the |
8 | * Common Development and Distribution License, Version 1.0 only |
9 | * (the "License"). You may not use this file except in compliance |
10 | * with the License. |
11 | * |
12 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE |
13 | * or http://www.opensolaris.org/os/licensing. |
14 | * See the License for the specific language governing permissions |
15 | * and limitations under the License. |
16 | * |
17 | * When distributing Covered Code, include this CDDL HEADER in each |
18 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. |
19 | * If applicable, add the following below this CDDL HEADER, with the |
20 | * fields enclosed by brackets "[]" replaced with your own identifying |
21 | * information: Portions Copyright [yyyy] [name of copyright owner] |
22 | * |
23 | * CDDL HEADER END |
24 | */ |
25 | /* |
26 | * Copyright 2005 Sun Microsystems, Inc. All rights reserved. |
27 | * Use is subject to license terms. |
28 | */ |
29 | |
30 | #include <sys/fasttrap_isa.h> |
31 | #include <sys/fasttrap_impl.h> |
32 | #include <sys/dtrace.h> |
33 | #include <sys/dtrace_impl.h> |
34 | #include <kern/task.h> |
35 | #include <arm/thread.h> |
36 | |
37 | #include <sys/dtrace_ptss.h> |
38 | |
39 | #if __has_include(<ptrauth.h>) |
40 | #include <ptrauth.h> |
41 | #endif |
42 | |
43 | extern dtrace_id_t dtrace_probeid_error; |
44 | |
45 | /* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ |
46 | #define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ |
47 | |
48 | extern uint8_t dtrace_decode_arm64(uint32_t instr); |
49 | |
50 | #define IS_ARM64_NOP(x) ((x) == 0xD503201F) |
51 | /* Marker for is-enabled probes */ |
52 | #define IS_ARM64_IS_ENABLED(x) ((x) == 0xD2800000) |
53 | |
54 | int |
55 | fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, |
56 | user_addr_t pc, fasttrap_probe_type_t type) |
57 | { |
58 | #pragma unused(type) |
59 | uint32_t instr = 0; |
60 | |
61 | /* |
62 | * Read the instruction at the given address out of the process's |
63 | * address space. We don't have to worry about a debugger |
64 | * changing this instruction before we overwrite it with our trap |
65 | * instruction since P_PR_LOCK is set. Since instructions can span |
66 | * pages, we potentially read the instruction in two parts. If the |
67 | * second part fails, we just zero out that part of the instruction. |
68 | */ |
69 | /* |
70 | * APPLE NOTE: Of course, we do not have a P_PR_LOCK, so this is racey... |
71 | */ |
72 | |
73 | if (uread(p, buf: &instr, len: 4, a: pc) != 0) { |
74 | return -1; |
75 | } |
76 | |
77 | tp->ftt_instr = instr; |
78 | |
79 | if (tp->ftt_fntype != FASTTRAP_FN_DONE_INIT) { |
80 | switch (tp->ftt_fntype) { |
81 | case FASTTRAP_FN_UNKNOWN: |
82 | case FASTTRAP_FN_ARM64: |
83 | case FASTTRAP_FN_ARM64_32: |
84 | /* |
85 | * On arm64 there is no distinction between |
86 | * arm vs. thumb mode instruction types. |
87 | */ |
88 | tp->ftt_fntype = FASTTRAP_FN_DONE_INIT; |
89 | break; |
90 | |
91 | case FASTTRAP_FN_USDT: |
92 | if (IS_ARM64_NOP(instr) || IS_ARM64_IS_ENABLED(instr)) { |
93 | tp->ftt_fntype = FASTTRAP_FN_DONE_INIT; |
94 | } else { |
95 | /* |
96 | * Shouldn't reach here - this means we don't |
97 | * recognize the instruction at one of the |
98 | * USDT probe locations |
99 | */ |
100 | return -1; |
101 | } |
102 | |
103 | break; |
104 | |
105 | case FASTTRAP_FN_ARM: |
106 | case FASTTRAP_FN_THUMB: |
107 | default: |
108 | /* |
109 | * If we get an arm or thumb mode type |
110 | * then we are clearly in the wrong path. |
111 | */ |
112 | return -1; |
113 | } |
114 | } |
115 | |
116 | tp->ftt_type = dtrace_decode_arm64(instr); |
117 | |
118 | if (tp->ftt_type == FASTTRAP_T_ARM64_EXCLUSIVE_MEM) { |
119 | kprintf(fmt: "Detected attempt to place DTrace probe on exclusive memory instruction (pc = 0x%llx); refusing to trace (or exclusive operation could never succeed).\n" , pc); |
120 | tp->ftt_type = FASTTRAP_T_INV; |
121 | return -1; |
122 | } |
123 | |
124 | if (tp->ftt_type == FASTTRAP_T_INV) { |
125 | /* This is an instruction we either don't recognize or can't instrument */ |
126 | printf("dtrace: fasttrap init64: Unrecognized instruction: %08x at %08llx\n" , instr, pc); |
127 | return -1; |
128 | } |
129 | |
130 | return 0; |
131 | } |
132 | |
133 | int |
134 | fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) |
135 | { |
136 | uint32_t instr; |
137 | int size; |
138 | |
139 | if (proc_is64bit_data(p)) { |
140 | size = 4; |
141 | instr = FASTTRAP_ARM64_INSTR; |
142 | } else { |
143 | return -1; |
144 | } |
145 | |
146 | if (uwrite(p, buf: &instr, len: size, a: tp->ftt_pc) != 0) { |
147 | return -1; |
148 | } |
149 | |
150 | tp->ftt_installed = 1; |
151 | |
152 | return 0; |
153 | } |
154 | |
155 | int |
156 | fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) |
157 | { |
158 | uint32_t instr; |
159 | int size = 4; |
160 | |
161 | if (proc_is64bit_data(p)) { |
162 | /* |
163 | * Distinguish between read or write failures and a changed |
164 | * instruction. |
165 | */ |
166 | if (uread(p, buf: &instr, len: size, a: tp->ftt_pc) != 0) { |
167 | goto end; |
168 | } |
169 | |
170 | if (instr != FASTTRAP_ARM64_INSTR) { |
171 | goto end; |
172 | } |
173 | } else { |
174 | return -1; |
175 | } |
176 | |
177 | if (uwrite(p, buf: &tp->ftt_instr, len: size, a: tp->ftt_pc) != 0) { |
178 | return -1; |
179 | } |
180 | |
181 | end: |
182 | tp->ftt_installed = 0; |
183 | |
184 | return 0; |
185 | } |
186 | |
187 | static void |
188 | fasttrap_return_common(proc_t *p, arm_saved_state_t *regs, user_addr_t pc, user_addr_t new_pc) |
189 | { |
190 | pid_t pid = proc_getpid(p); |
191 | fasttrap_tracepoint_t *tp; |
192 | fasttrap_bucket_t *bucket; |
193 | fasttrap_id_t *id; |
194 | lck_mtx_t *pid_mtx; |
195 | int retire_tp = 1; |
196 | pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; |
197 | lck_mtx_lock(lck: pid_mtx); |
198 | bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; |
199 | |
200 | for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { |
201 | if (pid == tp->ftt_pid && pc == tp->ftt_pc && |
202 | tp->ftt_proc->ftpc_acount != 0) { |
203 | break; |
204 | } |
205 | } |
206 | |
207 | /* |
208 | * Don't sweat it if we can't find the tracepoint again; unlike |
209 | * when we're in fasttrap_pid_probe(), finding the tracepoint here |
210 | * is not essential to the correct execution of the process. |
211 | */ |
212 | if (tp == NULL) { |
213 | lck_mtx_unlock(lck: pid_mtx); |
214 | return; |
215 | } |
216 | |
217 | for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { |
218 | fasttrap_probe_t *probe = id->fti_probe; |
219 | /* ARM64_TODO - check for FASTTRAP_T_RET */ |
220 | if ((tp->ftt_type != FASTTRAP_T_ARM64_RET || tp->ftt_type != FASTTRAP_T_ARM64_RETAB) && |
221 | new_pc - probe->ftp_faddr < probe->ftp_fsize) { |
222 | continue; |
223 | } |
224 | if (probe->ftp_prov->ftp_provider_type == DTFTP_PROVIDER_ONESHOT) { |
225 | if (os_atomic_xchg(&probe->ftp_triggered, 1, relaxed)) { |
226 | /* already triggered */ |
227 | continue; |
228 | } |
229 | } |
230 | /* |
231 | * If we have at least one probe associated that |
232 | * is not a oneshot probe, don't remove the |
233 | * tracepoint |
234 | */ |
235 | else { |
236 | retire_tp = 0; |
237 | } |
238 | |
239 | #if defined(XNU_TARGET_OS_OSX) |
240 | if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) { |
241 | dtrace_probe(dtrace_probeid_error, arg0: 0 /* state */, arg1: id->fti_probe->ftp_id, |
242 | arg2: 1 /* ndx */, arg3: -1 /* offset */, DTRACEFLT_UPRIV); |
243 | #else |
244 | if (FALSE) { |
245 | #endif /* defined(XNU_TARGET_OS_OSX) */ |
246 | } else { |
247 | dtrace_probe(probe->ftp_id, |
248 | arg0: pc - id->fti_probe->ftp_faddr, |
249 | arg1: saved_state64(regs)->x[0], arg2: 0, arg3: 0, arg4: 0); |
250 | } |
251 | } |
252 | if (retire_tp) { |
253 | fasttrap_tracepoint_retire(p, tp); |
254 | } |
255 | |
256 | lck_mtx_unlock(lck: pid_mtx); |
257 | } |
258 | |
259 | #if DEBUG |
260 | __dead2 |
261 | #endif |
262 | static void |
263 | fasttrap_sigsegv(proc_t *p, uthread_t t, user_addr_t addr, arm_saved_state_t *regs) |
264 | { |
265 | /* TODO: This function isn't implemented yet. In debug mode, panic the system to |
266 | * find out why we're hitting this point. In other modes, kill the process. |
267 | */ |
268 | #if DEBUG |
269 | #pragma unused(p,t,addr,arm_saved_state) |
270 | panic("fasttrap: sigsegv not yet implemented" ); |
271 | #else |
272 | #pragma unused(p,t,addr) |
273 | /* Kill the process */ |
274 | set_saved_state_pc(regs, pc: 0); |
275 | #endif |
276 | |
277 | #if 0 |
278 | proc_lock(p); |
279 | |
280 | /* Set fault address and mark signal */ |
281 | t->uu_code = addr; |
282 | t->uu_siglist |= sigmask(SIGSEGV); |
283 | |
284 | /* |
285 | * XXX These two line may be redundant; if not, then we need |
286 | * XXX to potentially set the data address in the machine |
287 | * XXX specific thread state structure to indicate the address. |
288 | */ |
289 | t->uu_exception = KERN_INVALID_ADDRESS; /* SIGSEGV */ |
290 | t->uu_subcode = 0; /* XXX pad */ |
291 | |
292 | proc_unlock(p); |
293 | |
294 | /* raise signal */ |
295 | signal_setast(get_machthread(t)); |
296 | #endif |
297 | } |
298 | |
299 | static void |
300 | fasttrap_usdt_args64(fasttrap_probe_t *probe, arm_saved_state64_t *regs64, int argc, |
301 | uint64_t *argv) |
302 | { |
303 | int i, x, cap = MIN(argc, probe->ftp_nargs); |
304 | |
305 | for (i = 0; i < cap; i++) { |
306 | x = probe->ftp_argmap[i]; |
307 | |
308 | /* Up to 8 args are passed in registers on arm64 */ |
309 | if (x < 8) { |
310 | argv[i] = regs64->x[x]; |
311 | } else { |
312 | fasttrap_fuword64_noerr(regs64->sp + (x - 8) * sizeof(uint64_t), &argv[i]); |
313 | } |
314 | } |
315 | |
316 | for (; i < argc; i++) { |
317 | argv[i] = 0; |
318 | } |
319 | } |
320 | |
321 | static int |
322 | condition_true(int cond, int cpsr) |
323 | { |
324 | int taken = 0; |
325 | int zf = (cpsr & PSR64_Z) ? 1 : 0, |
326 | nf = (cpsr & PSR64_N) ? 1 : 0, |
327 | cf = (cpsr & PSR64_C) ? 1 : 0, |
328 | vf = (cpsr & PSR64_V) ? 1 : 0; |
329 | |
330 | switch (cond) { |
331 | case 0: taken = zf; break; |
332 | case 1: taken = !zf; break; |
333 | case 2: taken = cf; break; |
334 | case 3: taken = !cf; break; |
335 | case 4: taken = nf; break; |
336 | case 5: taken = !nf; break; |
337 | case 6: taken = vf; break; |
338 | case 7: taken = !vf; break; |
339 | case 8: taken = (cf && !zf); break; |
340 | case 9: taken = (!cf || zf); break; |
341 | case 10: taken = (nf == vf); break; |
342 | case 11: taken = (nf != vf); break; |
343 | case 12: taken = (!zf && (nf == vf)); break; |
344 | case 13: taken = (zf || (nf != vf)); break; |
345 | case 14: taken = 1; break; |
346 | case 15: taken = 1; break; /* always "true" for ARM, unpredictable for THUMB. */ |
347 | } |
348 | |
349 | return taken; |
350 | } |
351 | |
352 | /* |
353 | * Copy out an instruction for execution in userland. |
354 | * Trap back to kernel to handle return to original flow of execution, because |
355 | * direct branches don't have sufficient range (+/- 128MB) and we |
356 | * cannot clobber a GPR. Note that we have to specially handle PC-rel loads/stores |
357 | * as well, which have range +/- 1MB (convert to an indirect load). Instruction buffer |
358 | * layout: |
359 | * |
360 | * [ Thunked instruction sequence ] |
361 | * [ Trap for return to original code and return probe handling ] |
362 | * |
363 | * This *does* make it impossible for an ldxr/stxr pair to succeed if we trace on or between |
364 | * them... may need to get fancy at some point. |
365 | */ |
366 | static void |
367 | fasttrap_pid_probe_thunk_instr64(arm_saved_state_t *state, fasttrap_tracepoint_t *tp, proc_t *p, uthread_t uthread, |
368 | const uint32_t *instructions, uint32_t num_instrs, user_addr_t *pc_out) |
369 | { |
370 | uint32_t local_scratch[8]; |
371 | user_addr_t pc = get_saved_state_pc(iss: state); |
372 | user_addr_t user_scratch_area; |
373 | |
374 | assert(num_instrs < 8); |
375 | |
376 | bcopy(src: instructions, dst: local_scratch, n: num_instrs * sizeof(uint32_t)); |
377 | local_scratch[num_instrs] = FASTTRAP_ARM64_RET_INSTR; |
378 | |
379 | uthread->t_dtrace_astpc = uthread->t_dtrace_scrpc = uthread->t_dtrace_scratch->addr; |
380 | user_scratch_area = uthread->t_dtrace_scratch->write_addr; |
381 | |
382 | if (user_scratch_area == (user_addr_t)0) { |
383 | fasttrap_sigtrap(p, uthread, pc); // Should be killing target proc |
384 | *pc_out = pc; |
385 | return; |
386 | } |
387 | |
388 | if (uwrite(p, buf: local_scratch, len: (num_instrs + 1) * sizeof(uint32_t), a: user_scratch_area) != KERN_SUCCESS) { |
389 | fasttrap_sigtrap(p, uthread, pc); |
390 | *pc_out = pc; |
391 | return; |
392 | } |
393 | |
394 | /* We're stepping (come back to kernel to adjust PC for return to regular code). */ |
395 | uthread->t_dtrace_step = 1; |
396 | |
397 | /* We may or may not be about to run a return probe (but we wouldn't thunk ret lr)*/ |
398 | uthread->t_dtrace_ret = (tp->ftt_retids != NULL); |
399 | assert(tp->ftt_type != FASTTRAP_T_ARM64_RET); |
400 | assert(tp->ftt_type != FASTTRAP_T_ARM64_RETAB); |
401 | |
402 | /* Set address of instruction we've patched */ |
403 | uthread->t_dtrace_pc = pc; |
404 | |
405 | /* Any branch would be emulated, next instruction should be one ahead */ |
406 | uthread->t_dtrace_npc = pc + 4; |
407 | |
408 | /* We are certainly handling a probe */ |
409 | uthread->t_dtrace_on = 1; |
410 | |
411 | /* Let's jump to the scratch area */ |
412 | *pc_out = uthread->t_dtrace_scratch->addr; |
413 | } |
414 | |
415 | /* |
416 | * Sign-extend bit "sign_bit_index" out to bit 64. |
417 | */ |
418 | static int64_t |
419 | sign_extend(int64_t input, uint32_t sign_bit_index) |
420 | { |
421 | assert(sign_bit_index < 63); |
422 | if (input & (1ULL << sign_bit_index)) { |
423 | /* All 1's & ~[1's from 0 to sign bit] */ |
424 | input |= ((~0ULL) & ~((1ULL << (sign_bit_index + 1)) - 1ULL)); |
425 | } |
426 | |
427 | return input; |
428 | } |
429 | |
430 | /* |
431 | * Handle xzr vs. sp, fp, lr, etc. Will *not* read the SP. |
432 | */ |
433 | static uint64_t |
434 | get_saved_state64_regno(arm_saved_state64_t *regs64, uint32_t regno, int use_xzr) |
435 | { |
436 | /* Set PC to register value */ |
437 | switch (regno) { |
438 | case 29: |
439 | return regs64->fp; |
440 | case 30: |
441 | return regs64->lr; |
442 | case 31: |
443 | /* xzr */ |
444 | if (use_xzr) { |
445 | return 0; |
446 | } else { |
447 | return regs64->sp; |
448 | } |
449 | default: |
450 | return regs64->x[regno]; |
451 | } |
452 | } |
453 | |
454 | static void |
455 | set_saved_state_regno(arm_saved_state_t *state, uint32_t regno, int use_xzr, register_t value) |
456 | { |
457 | /* Set PC to register value */ |
458 | switch (regno) { |
459 | case 29: |
460 | set_saved_state_fp(iss: state, fp: value); |
461 | break; |
462 | case 30: |
463 | set_saved_state_lr(iss: state, lr: value); |
464 | break; |
465 | case 31: |
466 | if (!use_xzr) { |
467 | set_saved_state_sp(iss: state, sp: value); |
468 | } |
469 | break; |
470 | default: |
471 | set_saved_state_reg(iss: state, reg: regno, value); |
472 | break; |
473 | } |
474 | } |
475 | |
476 | /* |
477 | * Common operation: extract sign-extended PC offset from instruction |
478 | * Left-shifts result by two bits. |
479 | */ |
480 | static uint64_t |
481 | (uint32_t instr, uint32_t base, uint32_t numbits) |
482 | { |
483 | uint64_t offset; |
484 | |
485 | offset = (instr >> base) & ((1 << numbits) - 1); |
486 | offset = sign_extend(input: offset, sign_bit_index: numbits - 1); |
487 | offset = offset << 2; |
488 | |
489 | return offset; |
490 | } |
491 | |
492 | static void |
493 | do_cbz_cnbz(arm_saved_state64_t *regs64, uint32_t regwidth, uint32_t instr, int is_cbz, user_addr_t *pc_out) |
494 | { |
495 | uint32_t regno; |
496 | uint64_t regval; |
497 | uint64_t offset; |
498 | |
499 | /* Extract register */ |
500 | regno = (instr & 0x1f); |
501 | assert(regno <= 31); |
502 | regval = get_saved_state64_regno(regs64, regno, use_xzr: 1); |
503 | |
504 | /* Control for size */ |
505 | if (regwidth == 32) { |
506 | regval &= 0xFFFFFFFFULL; |
507 | } |
508 | |
509 | /* Extract offset */ |
510 | offset = extract_address_literal_sign_extended(instr, base: 5, numbits: 19); |
511 | |
512 | /* Do test */ |
513 | if ((is_cbz && regval == 0) || ((!is_cbz) && regval != 0)) { |
514 | /* Set PC from label */ |
515 | *pc_out = regs64->pc + offset; |
516 | } else { |
517 | /* Advance PC */ |
518 | *pc_out = regs64->pc + 4; |
519 | } |
520 | } |
521 | |
522 | static void |
523 | do_tbz_tbnz(arm_saved_state64_t *regs64, uint32_t instr, int is_tbz, user_addr_t *pc_out) |
524 | { |
525 | uint64_t offset, regval; |
526 | uint32_t bit_index, b5, b40, regno, bit_set; |
527 | |
528 | /* Compute offset */ |
529 | offset = extract_address_literal_sign_extended(instr, base: 5, numbits: 14); |
530 | |
531 | /* Extract bit index */ |
532 | b5 = (instr >> 31); |
533 | b40 = ((instr >> 19) & 0x1f); |
534 | bit_index = (b5 << 5) | b40; |
535 | assert(bit_index <= 63); |
536 | |
537 | /* Extract register */ |
538 | regno = (instr & 0x1f); |
539 | assert(regno <= 31); |
540 | regval = get_saved_state64_regno(regs64, regno, use_xzr: 1); |
541 | |
542 | /* Test bit */ |
543 | bit_set = ((regval & (1 << bit_index)) != 0); |
544 | |
545 | if ((is_tbz && (!bit_set)) || ((!is_tbz) && bit_set)) { |
546 | /* Branch: unsigned addition so overflow defined */ |
547 | *pc_out = regs64->pc + offset; |
548 | } else { |
549 | /* Advance PC */ |
550 | *pc_out = regs64->pc + 4; |
551 | } |
552 | } |
553 | |
554 | |
555 | static void |
556 | fasttrap_pid_probe_handle_patched_instr64(arm_saved_state_t *state, fasttrap_tracepoint_t *tp __unused, uthread_t uthread, |
557 | proc_t *p, uint_t is_enabled, int *was_simulated) |
558 | { |
559 | thread_t th = get_machthread(uthread); |
560 | int res1, res2; |
561 | arm_saved_state64_t *regs64 = saved_state64(iss: state); |
562 | uint32_t instr = tp->ftt_instr; |
563 | user_addr_t new_pc = 0; |
564 | |
565 | /* Neon state should be threaded throw, but hack it until we have better arm/arm64 integration */ |
566 | arm_neon_saved_state64_t *ns64 = &(get_user_neon_regs(th)->ns_64); |
567 | |
568 | /* is-enabled probe: set x0 to 1 and step forwards */ |
569 | if (is_enabled) { |
570 | regs64->x[0] = 1; |
571 | add_saved_state_pc(iss: state, diff: 4); |
572 | return; |
573 | } |
574 | |
575 | /* For USDT probes, bypass all the emulation logic for the nop instruction */ |
576 | if (IS_ARM64_NOP(tp->ftt_instr)) { |
577 | add_saved_state_pc(iss: state, diff: 4); |
578 | return; |
579 | } |
580 | |
581 | |
582 | /* Only one of many cases in the switch doesn't simulate */ |
583 | switch (tp->ftt_type) { |
584 | /* |
585 | * Function entry: emulate for speed. |
586 | * stp fp, lr, [sp, #-16]! |
587 | */ |
588 | case FASTTRAP_T_ARM64_STANDARD_FUNCTION_ENTRY: |
589 | { |
590 | /* Store values to stack */ |
591 | res1 = fasttrap_suword64(regs64->sp - 16, value: regs64->fp); |
592 | res2 = fasttrap_suword64(regs64->sp - 8, value: regs64->lr); |
593 | if (res1 != 0 || res2 != 0) { |
594 | fasttrap_sigsegv(p, t: uthread, addr: regs64->sp - (res1 ? 16 : 8), arm_saved_state: state); |
595 | #ifndef DEBUG |
596 | new_pc = regs64->pc; /* Bit of a hack */ |
597 | break; |
598 | #endif |
599 | } |
600 | |
601 | /* Move stack pointer */ |
602 | regs64->sp -= 16; |
603 | |
604 | /* Move PC forward */ |
605 | new_pc = regs64->pc + 4; |
606 | *was_simulated = 1; |
607 | break; |
608 | } |
609 | |
610 | /* |
611 | * PC-relative loads/stores: emulate for correctness. |
612 | * All loads are 32bits or greater (no need to handle byte or halfword accesses). |
613 | * LDR Wt, addr |
614 | * LDR Xt, addr |
615 | * LDRSW Xt, addr |
616 | * |
617 | * LDR St, addr |
618 | * LDR Dt, addr |
619 | * LDR Qt, addr |
620 | * PRFM label -> becomes a NOP |
621 | */ |
622 | case FASTTRAP_T_ARM64_LDR_S_PC_REL: |
623 | case FASTTRAP_T_ARM64_LDR_W_PC_REL: |
624 | case FASTTRAP_T_ARM64_LDR_D_PC_REL: |
625 | case FASTTRAP_T_ARM64_LDR_X_PC_REL: |
626 | case FASTTRAP_T_ARM64_LDR_Q_PC_REL: |
627 | case FASTTRAP_T_ARM64_LDRSW_PC_REL: |
628 | { |
629 | uint64_t offset; |
630 | uint32_t valsize, regno; |
631 | user_addr_t address; |
632 | union { |
633 | uint32_t val32; |
634 | uint64_t val64; |
635 | uint128_t val128; |
636 | } value; |
637 | |
638 | /* Extract 19-bit offset, add to pc */ |
639 | offset = extract_address_literal_sign_extended(instr, base: 5, numbits: 19); |
640 | address = regs64->pc + offset; |
641 | |
642 | /* Extract destination register */ |
643 | regno = (instr & 0x1f); |
644 | assert(regno <= 31); |
645 | |
646 | /* Read value of desired size from memory */ |
647 | switch (tp->ftt_type) { |
648 | case FASTTRAP_T_ARM64_LDR_S_PC_REL: |
649 | case FASTTRAP_T_ARM64_LDR_W_PC_REL: |
650 | case FASTTRAP_T_ARM64_LDRSW_PC_REL: |
651 | valsize = 4; |
652 | break; |
653 | case FASTTRAP_T_ARM64_LDR_D_PC_REL: |
654 | case FASTTRAP_T_ARM64_LDR_X_PC_REL: |
655 | valsize = 8; |
656 | break; |
657 | case FASTTRAP_T_ARM64_LDR_Q_PC_REL: |
658 | valsize = 16; |
659 | break; |
660 | default: |
661 | panic("Should never get here!" ); |
662 | valsize = -1; |
663 | break; |
664 | } |
665 | |
666 | if (copyin(address, &value, valsize) != 0) { |
667 | fasttrap_sigsegv(p, t: uthread, addr: address, arm_saved_state: state); |
668 | #ifndef DEBUG |
669 | new_pc = regs64->pc; /* Bit of a hack, we know about update in fasttrap_sigsegv() */ |
670 | break; |
671 | #endif |
672 | } |
673 | |
674 | /* Stash in correct register slot */ |
675 | switch (tp->ftt_type) { |
676 | case FASTTRAP_T_ARM64_LDR_W_PC_REL: |
677 | set_saved_state_regno(state, regno, use_xzr: 1, value: value.val32); |
678 | break; |
679 | case FASTTRAP_T_ARM64_LDRSW_PC_REL: |
680 | set_saved_state_regno(state, regno, use_xzr: 1, value: sign_extend(input: value.val32, sign_bit_index: 31)); |
681 | break; |
682 | case FASTTRAP_T_ARM64_LDR_X_PC_REL: |
683 | set_saved_state_regno(state, regno, use_xzr: 1, value: value.val64); |
684 | break; |
685 | case FASTTRAP_T_ARM64_LDR_S_PC_REL: |
686 | ns64->v.s[regno][0] = value.val32; |
687 | break; |
688 | case FASTTRAP_T_ARM64_LDR_D_PC_REL: |
689 | ns64->v.d[regno][0] = value.val64; |
690 | break; |
691 | case FASTTRAP_T_ARM64_LDR_Q_PC_REL: |
692 | ns64->v.q[regno] = value.val128; |
693 | break; |
694 | default: |
695 | panic("Should never get here!" ); |
696 | } |
697 | |
698 | |
699 | /* Move PC forward */ |
700 | new_pc = regs64->pc + 4; |
701 | *was_simulated = 1; |
702 | break; |
703 | } |
704 | |
705 | case FASTTRAP_T_ARM64_PRFM: |
706 | { |
707 | /* Becomes a NOP (architecturally permitted). Just move PC forward */ |
708 | new_pc = regs64->pc + 4; |
709 | *was_simulated = 1; |
710 | break; |
711 | } |
712 | |
713 | /* |
714 | * End explicit memory accesses. |
715 | */ |
716 | |
717 | /* |
718 | * Branches: parse condition codes if needed, emulate for correctness and |
719 | * in the case of the indirect branches, convenience |
720 | * B.cond |
721 | * CBNZ Wn, label |
722 | * CBNZ Xn, label |
723 | * CBZ Wn, label |
724 | * CBZ Xn, label |
725 | * TBNZ, Xn|Wn, #uimm16, label |
726 | * TBZ, Xn|Wn, #uimm16, label |
727 | * |
728 | * B label |
729 | * BL label |
730 | * |
731 | * BLR Xm |
732 | * BR Xm |
733 | * RET Xm |
734 | */ |
735 | case FASTTRAP_T_ARM64_B_COND: |
736 | { |
737 | int cond; |
738 | |
739 | /* Extract condition code */ |
740 | cond = (instr & 0xf); |
741 | |
742 | /* Determine if it passes */ |
743 | if (condition_true(cond, cpsr: regs64->cpsr)) { |
744 | uint64_t offset; |
745 | |
746 | /* Extract 19-bit target offset, add to PC */ |
747 | offset = extract_address_literal_sign_extended(instr, base: 5, numbits: 19); |
748 | new_pc = regs64->pc + offset; |
749 | } else { |
750 | /* Move forwards */ |
751 | new_pc = regs64->pc + 4; |
752 | } |
753 | |
754 | *was_simulated = 1; |
755 | break; |
756 | } |
757 | |
758 | case FASTTRAP_T_ARM64_CBNZ_W: |
759 | { |
760 | do_cbz_cnbz(regs64, regwidth: 32, instr, is_cbz: 0, pc_out: &new_pc); |
761 | *was_simulated = 1; |
762 | break; |
763 | } |
764 | case FASTTRAP_T_ARM64_CBNZ_X: |
765 | { |
766 | do_cbz_cnbz(regs64, regwidth: 64, instr, is_cbz: 0, pc_out: &new_pc); |
767 | *was_simulated = 1; |
768 | break; |
769 | } |
770 | case FASTTRAP_T_ARM64_CBZ_W: |
771 | { |
772 | do_cbz_cnbz(regs64, regwidth: 32, instr, is_cbz: 1, pc_out: &new_pc); |
773 | *was_simulated = 1; |
774 | break; |
775 | } |
776 | case FASTTRAP_T_ARM64_CBZ_X: |
777 | { |
778 | do_cbz_cnbz(regs64, regwidth: 64, instr, is_cbz: 1, pc_out: &new_pc); |
779 | *was_simulated = 1; |
780 | break; |
781 | } |
782 | |
783 | case FASTTRAP_T_ARM64_TBNZ: |
784 | { |
785 | do_tbz_tbnz(regs64, instr, is_tbz: 0, pc_out: &new_pc); |
786 | *was_simulated = 1; |
787 | break; |
788 | } |
789 | case FASTTRAP_T_ARM64_TBZ: |
790 | { |
791 | do_tbz_tbnz(regs64, instr, is_tbz: 1, pc_out: &new_pc); |
792 | *was_simulated = 1; |
793 | break; |
794 | } |
795 | case FASTTRAP_T_ARM64_B: |
796 | case FASTTRAP_T_ARM64_BL: |
797 | { |
798 | uint64_t offset; |
799 | |
800 | /* Extract offset from instruction */ |
801 | offset = extract_address_literal_sign_extended(instr, base: 0, numbits: 26); |
802 | |
803 | /* Update LR if appropriate */ |
804 | if (tp->ftt_type == FASTTRAP_T_ARM64_BL) { |
805 | set_saved_state_lr(iss: state, lr: regs64->pc + 4); |
806 | } |
807 | |
808 | /* Compute PC (unsigned addition for defined overflow) */ |
809 | new_pc = regs64->pc + offset; |
810 | *was_simulated = 1; |
811 | break; |
812 | } |
813 | |
814 | case FASTTRAP_T_ARM64_BLR: |
815 | case FASTTRAP_T_ARM64_BR: |
816 | { |
817 | uint32_t regno; |
818 | |
819 | /* Extract register from instruction */ |
820 | regno = ((instr >> 5) & 0x1f); |
821 | assert(regno <= 31); |
822 | |
823 | /* Update LR if appropriate */ |
824 | if (tp->ftt_type == FASTTRAP_T_ARM64_BLR) { |
825 | set_saved_state_lr(iss: state, lr: regs64->pc + 4); |
826 | } |
827 | |
828 | /* Update PC in saved state */ |
829 | new_pc = get_saved_state64_regno(regs64, regno, use_xzr: 1); |
830 | *was_simulated = 1; |
831 | break; |
832 | } |
833 | |
834 | case FASTTRAP_T_ARM64_RET: |
835 | { |
836 | /* Extract register */ |
837 | unsigned regno = ((instr >> 5) & 0x1f); |
838 | assert(regno <= 31); |
839 | |
840 | /* Set PC to register value (xzr, not sp) */ |
841 | new_pc = get_saved_state64_regno(regs64, regno, use_xzr: 1); |
842 | |
843 | *was_simulated = 1; |
844 | break; |
845 | } |
846 | case FASTTRAP_T_ARM64_RETAB: |
847 | { |
848 | /* Set PC to register value (xzr, not sp) */ |
849 | new_pc = get_saved_state64_regno(regs64, regno: 30, use_xzr: 1); |
850 | #if __has_feature(ptrauth_calls) |
851 | new_pc = (user_addr_t) ptrauth_strip((void *)new_pc, ptrauth_key_return_address); |
852 | #endif |
853 | |
854 | *was_simulated = 1; |
855 | break; |
856 | } |
857 | /* |
858 | * End branches. |
859 | */ |
860 | |
861 | /* |
862 | * Address calculations: emulate for correctness. |
863 | * |
864 | * ADRP Xd, label |
865 | * ADR Xd, label |
866 | */ |
867 | case FASTTRAP_T_ARM64_ADRP: |
868 | case FASTTRAP_T_ARM64_ADR: |
869 | { |
870 | uint64_t immhi, immlo, offset, result; |
871 | uint32_t regno; |
872 | |
873 | /* Extract destination register */ |
874 | regno = (instr & 0x1f); |
875 | assert(regno <= 31); |
876 | |
877 | /* Extract offset */ |
878 | immhi = ((instr & 0x00ffffe0) >> 5); /* bits [23,5]: 19 bits */ |
879 | immlo = ((instr & 0x60000000) >> 29); /* bits [30,29]: 2 bits */ |
880 | |
881 | /* Add to PC. Use unsigned addition so that overflow wraps (rather than being undefined). */ |
882 | if (tp->ftt_type == FASTTRAP_T_ARM64_ADRP) { |
883 | offset = (immhi << 14) | (immlo << 12); /* Concatenate bits into [32,12]*/ |
884 | offset = sign_extend(input: offset, sign_bit_index: 32); /* Sign extend from bit 32 */ |
885 | result = (regs64->pc & ~0xfffULL) + offset; /* And add to page of current pc */ |
886 | } else { |
887 | assert(tp->ftt_type == FASTTRAP_T_ARM64_ADR); |
888 | offset = (immhi << 2) | immlo; /* Concatenate bits into [20,0] */ |
889 | offset = sign_extend(input: offset, sign_bit_index: 20); /* Sign-extend */ |
890 | result = regs64->pc + offset; /* And add to page of current pc */ |
891 | } |
892 | |
893 | /* xzr, not sp */ |
894 | set_saved_state_regno(state, regno, use_xzr: 1, value: result); |
895 | |
896 | /* Move PC forward */ |
897 | new_pc = regs64->pc + 4; |
898 | *was_simulated = 1; |
899 | break; |
900 | } |
901 | |
902 | /* |
903 | * End address calculations. |
904 | */ |
905 | |
906 | /* |
907 | * Everything else: thunk to userland |
908 | */ |
909 | case FASTTRAP_T_COMMON: |
910 | { |
911 | fasttrap_pid_probe_thunk_instr64(state, tp, p, uthread, instructions: &tp->ftt_instr, num_instrs: 1, pc_out: &new_pc); |
912 | *was_simulated = 0; |
913 | break; |
914 | } |
915 | default: |
916 | { |
917 | panic("An instruction DTrace doesn't expect: %d" , tp->ftt_type); |
918 | break; |
919 | } |
920 | } |
921 | |
922 | set_saved_state_pc(iss: state, pc: new_pc); |
923 | return; |
924 | } |
925 | |
926 | int |
927 | fasttrap_pid_probe(arm_saved_state_t *state) |
928 | { |
929 | proc_t *p = current_proc(); |
930 | fasttrap_bucket_t *bucket; |
931 | lck_mtx_t *pid_mtx; |
932 | fasttrap_tracepoint_t *tp, tp_local; |
933 | pid_t pid; |
934 | dtrace_icookie_t cookie; |
935 | uint_t is_enabled = 0; |
936 | int was_simulated, retire_tp = 1; |
937 | |
938 | uint64_t pc = get_saved_state_pc(iss: state); |
939 | |
940 | assert(is_saved_state64(state)); |
941 | |
942 | uthread_t uthread = current_uthread(); |
943 | |
944 | /* |
945 | * It's possible that a user (in a veritable orgy of bad planning) |
946 | * could redirect this thread's flow of control before it reached the |
947 | * return probe fasttrap. In this case we need to kill the process |
948 | * since it's in a unrecoverable state. |
949 | */ |
950 | if (uthread->t_dtrace_step) { |
951 | ASSERT(uthread->t_dtrace_on); |
952 | fasttrap_sigtrap(p, uthread, (user_addr_t)pc); |
953 | return 0; |
954 | } |
955 | |
956 | /* |
957 | * Clear all user tracing flags. |
958 | */ |
959 | uthread->t_dtrace_ft = 0; |
960 | uthread->t_dtrace_pc = 0; |
961 | uthread->t_dtrace_npc = 0; |
962 | uthread->t_dtrace_scrpc = 0; |
963 | uthread->t_dtrace_astpc = 0; |
964 | uthread->t_dtrace_reg = 0; |
965 | |
966 | |
967 | pid = proc_getpid(p); |
968 | pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; |
969 | lck_mtx_lock(lck: pid_mtx); |
970 | bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; |
971 | |
972 | /* |
973 | * Lookup the tracepoint that the process just hit. |
974 | */ |
975 | for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { |
976 | if (pid == tp->ftt_pid && pc == tp->ftt_pc && |
977 | tp->ftt_proc->ftpc_acount != 0) { |
978 | break; |
979 | } |
980 | } |
981 | |
982 | /* |
983 | * If we couldn't find a matching tracepoint, either a tracepoint has |
984 | * been inserted without using the pid<pid> ioctl interface (see |
985 | * fasttrap_ioctl), or somehow we have mislaid this tracepoint. |
986 | */ |
987 | if (tp == NULL) { |
988 | lck_mtx_unlock(lck: pid_mtx); |
989 | return -1; |
990 | } |
991 | |
992 | /* Execute the actual probe */ |
993 | if (tp->ftt_ids != NULL) { |
994 | fasttrap_id_t *id; |
995 | uint64_t arg4; |
996 | |
997 | if (is_saved_state64(iss: state)) { |
998 | arg4 = get_saved_state_reg(iss: state, reg: 4); |
999 | } else { |
1000 | return -1; |
1001 | } |
1002 | |
1003 | |
1004 | /* First four parameters are passed in registers */ |
1005 | |
1006 | for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { |
1007 | fasttrap_probe_t *probe = id->fti_probe; |
1008 | |
1009 | #if defined(XNU_TARGET_OS_OSX) |
1010 | if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) { |
1011 | dtrace_probe(dtrace_probeid_error, arg0: 0 /* state */, arg1: probe->ftp_id, |
1012 | arg2: 1 /* ndx */, arg3: -1 /* offset */, DTRACEFLT_UPRIV); |
1013 | #else |
1014 | if (FALSE) { |
1015 | #endif /* defined(XNU_TARGET_OS_OSX) */ |
1016 | } else { |
1017 | if (probe->ftp_prov->ftp_provider_type == DTFTP_PROVIDER_ONESHOT) { |
1018 | if (os_atomic_xchg(&probe->ftp_triggered, 1, relaxed)) { |
1019 | /* already triggered */ |
1020 | continue; |
1021 | } |
1022 | } |
1023 | /* |
1024 | * If we have at least one probe associated that |
1025 | * is not a oneshot probe, don't remove the |
1026 | * tracepoint |
1027 | */ |
1028 | else { |
1029 | retire_tp = 0; |
1030 | } |
1031 | if (id->fti_ptype == DTFTP_ENTRY) { |
1032 | /* |
1033 | * We note that this was an entry |
1034 | * probe to help ustack() find the |
1035 | * first caller. |
1036 | */ |
1037 | cookie = dtrace_interrupt_disable(); |
1038 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); |
1039 | dtrace_probe(probe->ftp_id, |
1040 | arg0: get_saved_state_reg(iss: state, reg: 0), |
1041 | arg1: get_saved_state_reg(iss: state, reg: 1), |
1042 | arg2: get_saved_state_reg(iss: state, reg: 2), |
1043 | arg3: get_saved_state_reg(iss: state, reg: 3), |
1044 | arg4); |
1045 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); |
1046 | dtrace_interrupt_enable(cookie); |
1047 | } else if (id->fti_ptype == DTFTP_IS_ENABLED) { |
1048 | /* |
1049 | * Note that in this case, we don't |
1050 | * call dtrace_probe() since it's only |
1051 | * an artificial probe meant to change |
1052 | * the flow of control so that it |
1053 | * encounters the true probe. |
1054 | */ |
1055 | is_enabled = 1; |
1056 | } else if (probe->ftp_argmap == NULL) { |
1057 | dtrace_probe(probe->ftp_id, |
1058 | arg0: get_saved_state_reg(iss: state, reg: 0), |
1059 | arg1: get_saved_state_reg(iss: state, reg: 1), |
1060 | arg2: get_saved_state_reg(iss: state, reg: 2), |
1061 | arg3: get_saved_state_reg(iss: state, reg: 3), |
1062 | arg4); |
1063 | } else { |
1064 | uint64_t t[5]; |
1065 | |
1066 | fasttrap_usdt_args64(probe, regs64: saved_state64(iss: state), argc: 5, argv: t); |
1067 | dtrace_probe(probe->ftp_id, arg0: t[0], arg1: t[1], arg2: t[2], arg3: t[3], arg4: t[4]); |
1068 | } |
1069 | } |
1070 | } |
1071 | if (retire_tp) { |
1072 | fasttrap_tracepoint_retire(p, tp); |
1073 | } |
1074 | } |
1075 | /* |
1076 | * We're about to do a bunch of work so we cache a local copy of |
1077 | * the tracepoint to emulate the instruction, and then find the |
1078 | * tracepoint again later if we need to light up any return probes. |
1079 | */ |
1080 | tp_local = *tp; |
1081 | lck_mtx_unlock(lck: pid_mtx); |
1082 | tp = &tp_local; |
1083 | |
1084 | /* |
1085 | * APPLE NOTE: |
1086 | * |
1087 | * Subroutines should update PC. |
1088 | * We're setting this earlier than Solaris does, to get a "correct" |
1089 | * ustack() output. In the Sun code, a() -> b() -> c() -> d() is |
1090 | * reported at: d, b, a. The new way gives c, b, a, which is closer |
1091 | * to correct, as the return instruction has already exectued. |
1092 | */ |
1093 | fasttrap_pid_probe_handle_patched_instr64(state, tp, uthread, p, is_enabled, was_simulated: &was_simulated); |
1094 | |
1095 | /* |
1096 | * If there were no return probes when we first found the tracepoint, |
1097 | * we should feel no obligation to honor any return probes that were |
1098 | * subsequently enabled -- they'll just have to wait until the next |
1099 | * time around. |
1100 | */ |
1101 | if (tp->ftt_retids != NULL) { |
1102 | /* |
1103 | * We need to wait until the results of the instruction are |
1104 | * apparent before invoking any return probes. If this |
1105 | * instruction was emulated we can just call |
1106 | * fasttrap_return_common(); if it needs to be executed, we |
1107 | * need to wait until the user thread returns to the kernel. |
1108 | */ |
1109 | /* |
1110 | * It used to be that only common instructions were simulated. |
1111 | * For performance reasons, we now simulate some instructions |
1112 | * when safe and go back to userland otherwise. The was_simulated |
1113 | * flag means we don't need to go back to userland. |
1114 | */ |
1115 | if (was_simulated) { |
1116 | fasttrap_return_common(p, arm_saved_state: state, pc: (user_addr_t)pc, new_pc: (user_addr_t)get_saved_state_pc(iss: state)); |
1117 | } else { |
1118 | ASSERT(uthread->t_dtrace_ret != 0); |
1119 | ASSERT(uthread->t_dtrace_pc == pc); |
1120 | ASSERT(uthread->t_dtrace_scrpc != 0); |
1121 | ASSERT(((user_addr_t)get_saved_state_pc(state)) == uthread->t_dtrace_astpc); |
1122 | } |
1123 | } |
1124 | |
1125 | return 0; |
1126 | } |
1127 | |
1128 | int |
1129 | fasttrap_return_probe(arm_saved_state_t *regs) |
1130 | { |
1131 | proc_t *p = current_proc(); |
1132 | uthread_t uthread = current_uthread(); |
1133 | user_addr_t pc = uthread->t_dtrace_pc; |
1134 | user_addr_t npc = uthread->t_dtrace_npc; |
1135 | |
1136 | uthread->t_dtrace_pc = 0; |
1137 | uthread->t_dtrace_npc = 0; |
1138 | uthread->t_dtrace_scrpc = 0; |
1139 | uthread->t_dtrace_astpc = 0; |
1140 | |
1141 | |
1142 | /* |
1143 | * We set rp->r_pc to the address of the traced instruction so |
1144 | * that it appears to dtrace_probe() that we're on the original |
1145 | * instruction, and so that the user can't easily detect our |
1146 | * complex web of lies. dtrace_return_probe() (our caller) |
1147 | * will correctly set %pc after we return. |
1148 | */ |
1149 | set_saved_state_pc(regs, pc); |
1150 | |
1151 | fasttrap_return_common(p, regs, pc, new_pc: npc); |
1152 | |
1153 | return 0; |
1154 | } |
1155 | |
1156 | uint64_t |
1157 | fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, |
1158 | int aframes) |
1159 | { |
1160 | #pragma unused(arg, id, parg, aframes) |
1161 | arm_saved_state_t* regs = find_user_regs(current_thread()); |
1162 | |
1163 | /* First eight arguments are in registers */ |
1164 | if (argno < 8) { |
1165 | return saved_state64(regs)->x[argno]; |
1166 | } |
1167 | |
1168 | /* Look on the stack for the rest */ |
1169 | uint64_t value; |
1170 | uint64_t* sp = (uint64_t*) saved_state64(regs)->sp; |
1171 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); |
1172 | value = dtrace_fuword64((user_addr_t) (sp + argno - 8)); |
1173 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); |
1174 | |
1175 | return value; |
1176 | } |
1177 | |
1178 | uint64_t |
1179 | fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) |
1180 | { |
1181 | #pragma unused(arg, id, parg, argno, aframes) |
1182 | return 0; |
1183 | } |
1184 | |