1/*
2 * Copyright (c) 2000-2022 Apple Inc. All rights reserved.
3 */
4
5#include <kern/bits.h>
6#include <kern/task.h>
7#include <kern/thread.h>
8#include <kern/assert.h>
9#include <kern/clock.h>
10#include <kern/locks.h>
11#include <kern/sched_prim.h>
12#include <mach/machine/thread_status.h>
13#include <mach/thread_act.h>
14#include <machine/machine_routines.h>
15#include <arm/thread.h>
16#include <arm64/proc_reg.h>
17#include <pexpert/pexpert.h>
18
19#include <sys/kernel.h>
20#include <sys/kern_debug.h>
21#include <sys/vm.h>
22#include <sys/proc_internal.h>
23#include <sys/syscall.h>
24#include <sys/systm.h>
25#include <sys/user.h>
26#include <sys/errno.h>
27#include <sys/kdebug.h>
28#include <sys/sysent.h>
29#include <sys/sysproto.h>
30#include <sys/kauth.h>
31#include <sys/bitstring.h>
32
33#include <security/audit/audit.h>
34
35#if CONFIG_MACF
36#include <security/mac_framework.h>
37#endif
38
39#if CONFIG_DTRACE
40extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
41extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
42#endif /* CONFIG_DTRACE */
43
44extern void
45unix_syscall(struct arm_saved_state * regs, thread_t thread_act, struct proc * proc);
46
47static int arm_get_syscall_args(uthread_t, struct arm_saved_state *, const struct sysent *);
48static int arm_get_u32_syscall_args(uthread_t, arm_saved_state32_t *, const struct sysent *);
49static void arm_prepare_u32_syscall_return(const struct sysent *, arm_saved_state_t *, uthread_t, int);
50static void arm_prepare_syscall_return(const struct sysent *, struct arm_saved_state *, uthread_t, int);
51static unsigned short arm_get_syscall_number(struct arm_saved_state *);
52static void arm_trace_unix_syscall(int, struct arm_saved_state *);
53static void arm_clear_syscall_error(struct arm_saved_state *);
54#define save_r0 r[0]
55#define save_r1 r[1]
56#define save_r2 r[2]
57#define save_r3 r[3]
58#define save_r4 r[4]
59#define save_r5 r[5]
60#define save_r6 r[6]
61#define save_r7 r[7]
62#define save_r8 r[8]
63#define save_r9 r[9]
64#define save_r10 r[10]
65#define save_r11 r[11]
66#define save_r12 r[12]
67#define save_r13 r[13]
68
69#if COUNT_SYSCALLS
70__XNU_PRIVATE_EXTERN int do_count_syscalls = 1;
71__XNU_PRIVATE_EXTERN int syscalls_log[SYS_MAXSYSCALL];
72#endif
73
74#define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || \
75 ((code) == SYS_kdebug_trace64) || \
76 ((code) == SYS_kdebug_trace_string))
77
78#if CONFIG_DEBUG_SYSCALL_REJECTION
79extern int mach_trap_count;
80#endif
81
82/*
83 * Function: unix_syscall
84 *
85 * Inputs: regs - pointer to Process Control Block
86 *
87 * Outputs: none
88 */
89void
90unix_syscall(
91 struct arm_saved_state * state,
92 thread_t thread_act,
93 struct proc * proc)
94{
95 const struct sysent *callp;
96 int error;
97 unsigned short code, syscode;
98 pid_t pid;
99 struct uthread *uthread = get_bsdthread_info(thread_act);
100
101 uthread_reset_proc_refcount(uthread);
102
103 code = arm_get_syscall_number(state);
104
105#define unix_syscall_kprintf(x...) /* kprintf("unix_syscall: " x) */
106
107 if (kdebug_enable && !code_is_kdebug_trace(code)) {
108 arm_trace_unix_syscall(code, state);
109 }
110
111
112 syscode = (code < nsysent) ? code : SYS_invalid;
113 callp = &sysent[syscode];
114
115 /*
116 * sy_narg is inaccurate on ARM if a 64 bit parameter is specified. Since user_addr_t
117 * is currently a 32 bit type, this is really a long word count. See rdar://problem/6104668.
118 */
119 if (callp->sy_narg != 0) {
120 if (arm_get_syscall_args(uthread, state, callp) != 0) {
121 /* Too many arguments, or something failed */
122 unix_syscall_kprintf("arm_get_syscall_args failed.\n");
123 callp = &sysent[SYS_invalid];
124 }
125 }
126
127 uthread->uu_flag |= UT_NOTCANCELPT;
128 uthread->syscall_code = code;
129
130 uthread->uu_rval[0] = 0;
131
132 /*
133 * r4 is volatile, if we set it to regs->save_r4 here the child
134 * will have parents r4 after execve
135 */
136 uthread->uu_rval[1] = 0;
137
138 error = 0;
139
140 /*
141 * ARM runtime will call cerror if the carry bit is set after a
142 * system call, so clear it here for the common case of success.
143 */
144 arm_clear_syscall_error(state);
145
146#if COUNT_SYSCALLS
147 if (do_count_syscalls > 0) {
148 syscalls_log[code]++;
149 }
150#endif
151 pid = proc_pid(proc);
152
153#ifdef CONFIG_IOCOUNT_TRACE
154 uthread->uu_iocount = 0;
155 uthread->uu_vpindex = 0;
156#endif
157 unix_syscall_kprintf("code %d (pid %d - %s, tid %lld)\n", code,
158 pid, proc->p_comm, thread_tid(current_thread()));
159
160#if CONFIG_MACF
161 if (__improbable(proc_syscall_filter_mask(proc) != NULL && !bitstr_test(proc_syscall_filter_mask(proc), syscode))) {
162 error = mac_proc_check_syscall_unix(proc, scnum: syscode);
163 if (error) {
164 goto skip_syscall;
165 }
166 }
167#endif /* CONFIG_MACF */
168
169#if CONFIG_DEBUG_SYSCALL_REJECTION
170 unsigned int call_number = mach_trap_count + syscode;
171 if (__improbable(uthread->syscall_rejection_mask != NULL &&
172 uthread_syscall_rejection_is_enabled(uthread)) &&
173 !bitmap_test(uthread->syscall_rejection_mask, call_number)) {
174 if (debug_syscall_rejection_handle(syscode)) {
175 goto skip_syscall;
176 }
177 }
178#endif /* CONFIG_DEBUG_SYSCALL_REJECTION */
179
180 AUDIT_SYSCALL_ENTER(code, proc, uthread);
181 error = (*(callp->sy_call))(proc, &uthread->uu_arg[0], &(uthread->uu_rval[0]));
182 AUDIT_SYSCALL_EXIT(code, proc, uthread, error);
183
184#if CONFIG_MACF
185skip_syscall:
186#endif /* CONFIG_MACF */
187
188 unix_syscall_kprintf("code %d, error %d, results %x, %x (pid %d - %s, tid %lld)\n", code, error,
189 uthread->uu_rval[0], uthread->uu_rval[1],
190 pid, get_bsdtask_info(current_task()) ? proc->p_comm : "unknown", thread_tid(current_thread()));
191
192#ifdef CONFIG_IOCOUNT_TRACE
193 if (uthread->uu_iocount) {
194 printf("system call returned with uu_iocount(%d) != 0",
195 uthread->uu_iocount);
196 }
197#endif
198#if CONFIG_DTRACE
199 uthread->t_dtrace_errno = error;
200#endif /* CONFIG_DTRACE */
201#if DEBUG || DEVELOPMENT
202 kern_allocation_name_t
203 prior __assert_only = thread_set_allocation_name(NULL);
204 assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
205#endif /* DEBUG || DEVELOPMENT */
206
207 arm_prepare_syscall_return(callp, state, uthread, error);
208
209 uthread->uu_flag &= ~UT_NOTCANCELPT;
210 uthread->syscall_code = 0;
211
212 if (uthread->uu_lowpri_window) {
213 /*
214 * task is marked as a low priority I/O type
215 * and the I/O we issued while in this system call
216 * collided with normal I/O operations... we'll
217 * delay in order to mitigate the impact of this
218 * task on the normal operation of the system
219 */
220 throttle_lowpri_io(sleep_amount: 1);
221 }
222 if (kdebug_enable && !code_is_kdebug_trace(code)) {
223 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
224 error, uthread->uu_rval[0], uthread->uu_rval[1], pid);
225 }
226
227 uthread_assert_zero_proc_refcount(uthread);
228}
229
230void
231unix_syscall_return(int error)
232{
233 thread_t thread_act;
234 struct uthread *uthread;
235 struct proc *proc;
236 struct arm_saved_state *regs;
237 unsigned short code;
238 const struct sysent *callp;
239
240#define unix_syscall_return_kprintf(x...) /* kprintf("unix_syscall_retur
241 * n: " x) */
242
243 thread_act = current_thread();
244 proc = current_proc();
245 uthread = get_bsdthread_info(thread_act);
246
247 regs = find_user_regs(thread_act);
248 code = uthread->syscall_code;
249 callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code];
250
251#if CONFIG_DTRACE
252 if (callp->sy_call == dtrace_systrace_syscall) {
253 dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
254 }
255#endif /* CONFIG_DTRACE */
256#if DEBUG || DEVELOPMENT
257 kern_allocation_name_t
258 prior __assert_only = thread_set_allocation_name(NULL);
259 assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
260#endif /* DEBUG || DEVELOPMENT */
261
262 AUDIT_SYSCALL_EXIT(code, proc, uthread, error);
263
264 /*
265 * Get index into sysent table
266 */
267 arm_prepare_syscall_return(callp, regs, uthread, error);
268
269 uthread->uu_flag &= ~UT_NOTCANCELPT;
270 uthread->syscall_code = 0;
271
272 if (uthread->uu_lowpri_window) {
273 /*
274 * task is marked as a low priority I/O type
275 * and the I/O we issued while in this system call
276 * collided with normal I/O operations... we'll
277 * delay in order to mitigate the impact of this
278 * task on the normal operation of the system
279 */
280 throttle_lowpri_io(sleep_amount: 1);
281 }
282 if (kdebug_enable && !code_is_kdebug_trace(code)) {
283 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
284 error, uthread->uu_rval[0], uthread->uu_rval[1], proc_getpid(proc));
285 }
286
287 thread_exception_return();
288 /* NOTREACHED */
289}
290
291static void
292arm_prepare_u32_syscall_return(const struct sysent *callp, arm_saved_state_t *regs, uthread_t uthread, int error)
293{
294 assert(is_saved_state32(regs));
295
296 arm_saved_state32_t *ss32 = saved_state32(iss: regs);
297
298 if (error == ERESTART) {
299 ss32->pc -= 4;
300 } else if (error != EJUSTRETURN) {
301 if (error) {
302 ss32->save_r0 = error;
303 ss32->save_r1 = 0;
304 /* set the carry bit to execute cerror routine */
305 ss32->cpsr |= PSR_CF;
306 unix_syscall_return_kprintf("error: setting carry to trigger cerror call\n");
307 } else { /* (not error) */
308 switch (callp->sy_return_type) {
309 case _SYSCALL_RET_INT_T:
310 case _SYSCALL_RET_UINT_T:
311 case _SYSCALL_RET_OFF_T:
312 case _SYSCALL_RET_ADDR_T:
313 case _SYSCALL_RET_SIZE_T:
314 case _SYSCALL_RET_SSIZE_T:
315 case _SYSCALL_RET_UINT64_T:
316 ss32->save_r0 = uthread->uu_rval[0];
317 ss32->save_r1 = uthread->uu_rval[1];
318 break;
319 case _SYSCALL_RET_NONE:
320 ss32->save_r0 = 0;
321 ss32->save_r1 = 0;
322 break;
323 default:
324 panic("unix_syscall: unknown return type");
325 break;
326 }
327 }
328 }
329 /* else (error == EJUSTRETURN) { nothing } */
330}
331
332static void
333arm_trace_u32_unix_syscall(int code, arm_saved_state32_t *regs)
334{
335 bool indirect = (regs->save_r12 == 0);
336 if (indirect) {
337 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
338 regs->save_r1, regs->save_r2, regs->save_r3, regs->save_r4);
339 } else {
340 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
341 regs->save_r0, regs->save_r1, regs->save_r2, regs->save_r3);
342 }
343}
344
345static void
346arm_clear_u32_syscall_error(arm_saved_state32_t *regs)
347{
348 regs->cpsr &= ~PSR_CF;
349}
350
351#if defined(__arm64__)
352static void arm_prepare_u64_syscall_return(const struct sysent *, arm_saved_state_t *, uthread_t, int);
353static int arm_get_u64_syscall_args(uthread_t, arm_saved_state64_t *, const struct sysent *);
354
355static int
356arm_get_syscall_args(uthread_t uthread, struct arm_saved_state *state, const struct sysent *callp)
357{
358 if (is_saved_state32(iss: state)) {
359 return arm_get_u32_syscall_args(uthread, saved_state32(iss: state), callp);
360 } else {
361 return arm_get_u64_syscall_args(uthread, saved_state64(iss: state), callp);
362 }
363}
364
365/*
366 * 64-bit: all arguments in registers. We're willing to use x9, a temporary
367 * register per the ABI, to pass an argument to the kernel for one case,
368 * an indirect syscall with 8 arguments. No munging required, as all arguments
369 * are in 64-bit wide registers already.
370 */
371static int
372arm_get_u64_syscall_args(uthread_t uthread, arm_saved_state64_t *regs, const struct sysent *callp)
373{
374 int indirect_offset;
375
376#if CONFIG_REQUIRES_U32_MUNGING
377 sy_munge_t *mungerp;
378#endif
379
380 indirect_offset = (regs->x[ARM64_SYSCALL_CODE_REG_NUM] == 0) ? 1 : 0;
381
382 /*
383 * Everything should fit in registers for now.
384 */
385 if (callp->sy_narg > (int)(sizeof(uthread->uu_arg) / sizeof(uthread->uu_arg[0]))) {
386 return -1;
387 }
388
389 memcpy(dst: &uthread->uu_arg[0], src: &regs->x[indirect_offset], n: callp->sy_narg * sizeof(uint64_t));
390
391#if CONFIG_REQUIRES_U32_MUNGING
392 /*
393 * The indirect system call interface is vararg based. For armv7k, arm64_32,
394 * and arm64, this means we simply lay the values down on the stack, padded to
395 * a width multiple (4 bytes for armv7k and arm64_32, 8 bytes for arm64).
396 * The arm64(_32) stub for syscall will load this data into the registers and
397 * then trap. This gives us register state that corresponds to what we would
398 * expect from a armv7 task, so in this particular case we need to munge the
399 * arguments.
400 *
401 * TODO: Is there a cleaner way to do this check? What we're actually
402 * interested in is whether the task is arm64_32. We don't appear to guarantee
403 * that uu_proc is populated here, which is why this currently uses the
404 * thread_t.
405 */
406 mungerp = callp->sy_arg_munge32;
407
408 if (indirect_offset && !ml_thread_is64bit(thread: get_machthread(uthread))) {
409 (*mungerp)(&uthread->uu_arg[0]);
410 }
411#endif
412
413 return 0;
414}
415/*
416 * When the kernel is running AArch64, munge arguments from 32-bit
417 * userland out to 64-bit.
418 *
419 * flavor == 1 indicates an indirect syscall.
420 */
421static int
422arm_get_u32_syscall_args(uthread_t uthread, arm_saved_state32_t *regs, const struct sysent *callp)
423{
424 int regparams;
425#if CONFIG_REQUIRES_U32_MUNGING
426 sy_munge_t *mungerp;
427#else
428#error U32 syscalls on ARM64 kernel requires munging
429#endif
430 int flavor = (regs->save_r12 == 0 ? 1 : 0);
431
432 regparams = (7 - flavor); /* Indirect value consumes a register */
433
434 assert((unsigned) callp->sy_arg_bytes <= sizeof(uthread->uu_arg));
435
436 if (callp->sy_arg_bytes <= (sizeof(uint32_t) * regparams)) {
437 /*
438 * Seven arguments or less are passed in registers.
439 */
440 memcpy(dst: &uthread->uu_arg[0], src: &regs->r[flavor], n: callp->sy_arg_bytes);
441 } else if (callp->sy_arg_bytes <= sizeof(uthread->uu_arg)) {
442 /*
443 * In this case, we composite - take the first args from registers,
444 * the remainder from the stack (offset by the 7 regs therein).
445 */
446 unix_syscall_kprintf("%s: spillover...\n", __FUNCTION__);
447 memcpy(dst: &uthread->uu_arg[0], src: &regs->r[flavor], n: regparams * sizeof(int));
448 if (copyin((user_addr_t)regs->sp + 7 * sizeof(int), (int *)&uthread->uu_arg[0] + regparams,
449 (callp->sy_arg_bytes - (sizeof(uint32_t) * regparams))) != 0) {
450 return -1;
451 }
452 } else {
453 return -1;
454 }
455
456#if CONFIG_REQUIRES_U32_MUNGING
457 /* Munge here */
458 mungerp = callp->sy_arg_munge32;
459 if (mungerp != NULL) {
460 (*mungerp)(&uthread->uu_arg[0]);
461 }
462#endif
463
464 return 0;
465}
466
467static unsigned short
468arm_get_syscall_number(struct arm_saved_state *state)
469{
470 if (is_saved_state32(iss: state)) {
471 if (saved_state32(iss: state)->save_r12 != 0) {
472 return (unsigned short)saved_state32(iss: state)->save_r12;
473 } else {
474 return (unsigned short)saved_state32(iss: state)->save_r0;
475 }
476 } else {
477 if (saved_state64(iss: state)->x[ARM64_SYSCALL_CODE_REG_NUM] != 0) {
478 return (unsigned short)saved_state64(iss: state)->x[ARM64_SYSCALL_CODE_REG_NUM];
479 } else {
480 return (unsigned short)saved_state64(iss: state)->x[0];
481 }
482 }
483}
484
485static void
486arm_prepare_syscall_return(const struct sysent *callp, struct arm_saved_state *state, uthread_t uthread, int error)
487{
488 if (is_saved_state32(iss: state)) {
489 arm_prepare_u32_syscall_return(callp, regs: state, uthread, error);
490 } else {
491 arm_prepare_u64_syscall_return(callp, state, uthread, error);
492 }
493}
494
495static void
496arm_prepare_u64_syscall_return(const struct sysent *callp, arm_saved_state_t *regs, uthread_t uthread, int error)
497{
498 assert(is_saved_state64(regs));
499
500 arm_saved_state64_t *ss64 = saved_state64(iss: regs);
501
502 if (error == ERESTART) {
503 add_user_saved_state_pc(iss: regs, diff: -4);
504 } else if (error != EJUSTRETURN) {
505 if (error) {
506 ss64->x[0] = error;
507 ss64->x[1] = 0;
508 /*
509 * Set the carry bit to execute cerror routine.
510 * ARM64_TODO: should we have a separate definition?
511 * The bits are the same.
512 */
513 ss64->cpsr |= PSR64_CF;
514 unix_syscall_return_kprintf("error: setting carry to trigger cerror call\n");
515 } else { /* (not error) */
516 switch (callp->sy_return_type) {
517 case _SYSCALL_RET_INT_T:
518 ss64->x[0] = uthread->uu_rval[0];
519 ss64->x[1] = uthread->uu_rval[1];
520 break;
521 case _SYSCALL_RET_UINT_T:
522 ss64->x[0] = (u_int)uthread->uu_rval[0];
523 ss64->x[1] = (u_int)uthread->uu_rval[1];
524 break;
525 case _SYSCALL_RET_OFF_T:
526 case _SYSCALL_RET_ADDR_T:
527 case _SYSCALL_RET_SIZE_T:
528 case _SYSCALL_RET_SSIZE_T:
529 case _SYSCALL_RET_UINT64_T:
530 ss64->x[0] = *((uint64_t *)(&uthread->uu_rval[0]));
531 ss64->x[1] = 0;
532 break;
533 case _SYSCALL_RET_NONE:
534 break;
535 default:
536 panic("unix_syscall: unknown return type");
537 break;
538 }
539 }
540 }
541 /* else (error == EJUSTRETURN) { nothing } */
542}
543static void
544arm_trace_u64_unix_syscall(int code, arm_saved_state64_t *regs)
545{
546 bool indirect = (regs->x[ARM64_SYSCALL_CODE_REG_NUM] == 0);
547 if (indirect) {
548 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
549 regs->x[1], regs->x[2], regs->x[3], regs->x[4]);
550 } else {
551 KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
552 regs->x[0], regs->x[1], regs->x[2], regs->x[3]);
553 }
554}
555
556static void
557arm_trace_unix_syscall(int code, struct arm_saved_state *state)
558{
559 if (is_saved_state32(iss: state)) {
560 arm_trace_u32_unix_syscall(code, regs: saved_state32(iss: state));
561 } else {
562 arm_trace_u64_unix_syscall(code, regs: saved_state64(iss: state));
563 }
564}
565
566static void
567arm_clear_u64_syscall_error(arm_saved_state64_t *regs)
568{
569 regs->cpsr &= ~PSR64_CF;
570}
571
572static void
573arm_clear_syscall_error(struct arm_saved_state * state)
574{
575 if (is_saved_state32(iss: state)) {
576 arm_clear_u32_syscall_error(regs: saved_state32(iss: state));
577 } else {
578 arm_clear_u64_syscall_error(regs: saved_state64(iss: state));
579 }
580}
581
582#else
583#error Unknown architecture.
584#endif
585