1/*
2 * Copyright (c) 2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/sysctl.h>
30
31#include <kern/cpu_data.h>
32
33#if __arm64__
34#include <arm/machine_routines.h>
35#endif /* __arm64__ */
36
37#if CONFIG_DEBUG_SYSCALL_REJECTION
38
39#include <mach/mach_time.h>
40
41#include <kern/bits.h>
42#include <kern/clock.h>
43#include <kern/exc_guard.h>
44#include <kern/exception.h>
45#include <kern/kalloc.h>
46#include <kern/simple_lock.h>
47#include <kern/startup.h>
48#include <kern/syscall_sw.h>
49#include <kern/task.h>
50
51#include <pexpert/pexpert.h>
52
53#include <sys/syscall.h>
54#include <sys/sysent.h>
55#include <sys/systm.h>
56#include <sys/types.h>
57#include <sys/user.h>
58#include <sys/variant_internal.h>
59
60#include <sys/kern_debug.h>
61
62#define SYSCALL_REJECTION_MODE_IGNORE 0
63#define SYSCALL_REJECTION_MODE_GUARD 1
64#define SYSCALL_REJECTION_MODE_CRASH 2
65
66TUNABLE_WRITEABLE(int, debug_syscall_rejection_mode, "syscall_rejection_mode",
67#if DEVELOPMENT || DEBUG
68 SYSCALL_REJECTION_MODE_GUARD
69#else
70 SYSCALL_REJECTION_MODE_IGNORE
71#endif
72 );
73
74static int
75sysctl_debug_syscall_rejection_mode(struct sysctl_oid __unused *oidp, void * __unused arg1, int __unused arg2,
76 struct sysctl_req *req)
77{
78 int error, changed;
79 int value = *(int *) arg1;
80
81 if (!os_variant_has_internal_diagnostics("com.apple.xnu")) {
82 return ENOTSUP;
83 }
84
85 error = sysctl_io_number(req, value, sizeof(value), &value, &changed);
86 if (!error && changed) {
87 debug_syscall_rejection_mode = value;
88 }
89 return error;
90}
91
92void
93reset_debug_syscall_rejection_mode(void)
94{
95 if (!os_variant_has_internal_diagnostics("com.apple.xnu")) {
96 debug_syscall_rejection_mode = 0;
97 }
98}
99
100SYSCTL_PROC(_kern, OID_AUTO, debug_syscall_rejection_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
101 &debug_syscall_rejection_mode, 0, sysctl_debug_syscall_rejection_mode, "I", "0: ignore, 1: non-fatal, 2: crash");
102
103
104static size_t const predefined_masks = 2; // 0: null mask (all 0), 1: all mask (all 1)
105
106/*
107 * The number of masks is derived from the mask selector width:
108 *
109 * A selector is just made of an index into syscall_rejection_masks,
110 * with the exception of the highest bit, which indicates whether the
111 * mask is to be added as an "allow" mask or a "deny" mask.
112 * Additionally, predefined masks don't actually have storage and are
113 * handled specially, so syscall_rejection_masks starts with the first
114 * non-predefined mask (and is sized appropriately).
115 */
116static size_t const syscall_rejection_mask_count = SYSCALL_REJECTION_SELECTOR_MASK_COUNT - predefined_masks;
117static syscall_rejection_mask_t syscall_rejection_masks[syscall_rejection_mask_count];
118
119#define SR_MASK_SIZE (BITMAP_SIZE(mach_trap_count + nsysent))
120
121static LCK_GRP_DECLARE(syscall_rejection_lck_grp, "syscall rejection lock");
122static LCK_MTX_DECLARE(syscall_rejection_mtx, &syscall_rejection_lck_grp);
123
124bool
125debug_syscall_rejection_handle(int syscall_mach_trap_number)
126{
127 uthread_t ut = current_uthread();
128 uint64_t const flags = ut->syscall_rejection_flags;
129 bool fatal = (bool)(flags & SYSCALL_REJECTION_FLAGS_FORCE_FATAL);
130
131 switch (debug_syscall_rejection_mode) {
132 case SYSCALL_REJECTION_MODE_IGNORE:
133 if (!fatal) {
134 /* ignore */
135 break;
136 }
137 OS_FALLTHROUGH;
138 case SYSCALL_REJECTION_MODE_CRASH:
139 fatal = true;
140 OS_FALLTHROUGH;
141 case SYSCALL_REJECTION_MODE_GUARD: {
142 if (flags & SYSCALL_REJECTION_FLAGS_ONCE) {
143 int const number = syscall_mach_trap_number < 0 ? -syscall_mach_trap_number : (mach_trap_count + syscall_mach_trap_number);
144
145 // don't trip on this system call again
146 bitmap_set(ut->syscall_rejection_mask, number);
147 bitmap_set(ut->syscall_rejection_once_mask, number);
148 }
149
150 mach_exception_code_t code = 0;
151 EXC_GUARD_ENCODE_TYPE(code, GUARD_TYPE_REJECTED_SC);
152 EXC_GUARD_ENCODE_FLAVOR(code, 0);
153 EXC_GUARD_ENCODE_TARGET(code, syscall_mach_trap_number < 0);
154 mach_exception_subcode_t subcode =
155 syscall_mach_trap_number < 0 ? -syscall_mach_trap_number : syscall_mach_trap_number;
156
157 if (!fatal) {
158 task_violated_guard(code, subcode, NULL, TRUE);
159 } else {
160 thread_guard_violation(current_thread(), code, subcode, fatal);
161 }
162 break;
163 };
164 default:
165 /* ignore */
166 ;
167 }
168 return fatal;
169}
170
171extern int exit_with_guard_exception(void *p, mach_exception_data_type_t code,
172 mach_exception_data_type_t subcode);
173
174void
175rejected_syscall_guard_ast(
176 thread_t t,
177 mach_exception_data_type_t code,
178 mach_exception_data_type_t subcode)
179{
180 const bool fatal = true;
181 /*
182 * Check if anyone has registered for Synchronous EXC_GUARD, if yes then,
183 * deliver it synchronously and then kill the process, else kill the process
184 * and deliver the exception via EXC_CORPSE_NOTIFY. Always kill the process if we are not in dev mode.
185 */
186 if (task_exception_notify(EXC_GUARD, code, subcode, fatal) == KERN_SUCCESS) {
187 psignal_uthread(t, SIGSYS);
188 } else {
189 exit_with_guard_exception(current_proc(), code, subcode);
190 }
191}
192
193
194static void
195_syscall_rejection_apply_mask(syscall_rejection_mask_t dest, const syscall_rejection_mask_t src, bool apply_as_allow)
196{
197 assert(dest != NULL);
198 assert(src != NULL);
199
200 if (apply_as_allow) {
201 bitmap_or(dest, dest, src, mach_trap_count + nsysent);
202 } else {
203 bitmap_and_not(dest, dest, src, mach_trap_count + nsysent);
204 }
205}
206
207/*
208 * The masks to apply are passed to the kernel as packed selectors,
209 * which are just however many of the selector data type fit into one
210 * (or more) fields of the natural word size (i.e. a register). This
211 * avoids copying from user space.
212 *
213 * More specifically, at the time of this writing, a selector is 7
214 * bits wide, and there are two uint64_t arguments
215 * (args->packed_selectors<n>), so up to 18 selectors can be
216 * specified, which are then stuffed into the 128 bits of the
217 * arguments. If less than 18 masks are requested to be applied, the
218 * remaining selectors will just be left as 0, which naturally
219 * resolves as the "empty" or "NULL" mask that changes nothing.
220 *
221 * The libsyscall wrapper provides a more convenient interface where
222 * an array (up to 18 elements long) and its length are passed in,
223 * which the wrapper then packs into packed_selectors of the actual
224 * system call.
225 */
226
227int
228sys_debug_syscall_reject_config(struct proc *p __unused, struct debug_syscall_reject_config_args *args, int *retval)
229{
230 int error = 0;
231
232 *retval = 0;
233
234 uthread_t ut = current_uthread();
235
236 bitmap_t mask[SR_MASK_SIZE / sizeof(bitmap_t)];
237 // syscall rejection masks are always reset to "deny all"
238 memset(mask, 0, SR_MASK_SIZE);
239
240 lck_mtx_lock(&syscall_rejection_mtx);
241
242 for (int i = 0;
243 i + SYSCALL_REJECTION_SELECTOR_BITS < (sizeof(args->packed_selectors1) + sizeof(args->packed_selectors2)) * 8;
244 i += SYSCALL_REJECTION_SELECTOR_BITS) {
245#define s_left_shift(x, n) ((n) < 0 ? ((x) >> -(n)) : ((x) << (n)))
246
247 syscall_rejection_selector_t const selector = (syscall_rejection_selector_t)
248 (((i < 64 ? (args->packed_selectors1 >> i) : 0) |
249 (i > 64 - SYSCALL_REJECTION_SELECTOR_BITS ? s_left_shift(args->packed_selectors2, 64 - i) : 0)) & SYSCALL_REJECTION_SELECTOR_MASK);
250 bool const is_allow_mask = selector & SYSCALL_REJECTION_IS_ALLOW_MASK;
251 int const mask_index = selector & SYSCALL_REJECTION_INDEX_MASK;
252
253 if (mask_index == SYSCALL_REJECTION_NULL) {
254 // mask 0 is always empty (nothing to apply)
255 continue;
256 }
257
258 if (mask_index == SYSCALL_REJECTION_ALL) {
259 // mask 1 is always full (overrides everything)
260 memset(mask, is_allow_mask ? 0xff : 0x00, SR_MASK_SIZE);
261 continue;
262 }
263
264 syscall_rejection_mask_t mask_to_apply = syscall_rejection_masks[mask_index - predefined_masks];
265
266 if (mask_to_apply == NULL) {
267 error = ENOENT;
268 goto out_locked;
269 }
270
271 _syscall_rejection_apply_mask(mask, mask_to_apply, is_allow_mask);
272 }
273
274 /* Not RT-safe, but only necessary once. */
275 if (ut->syscall_rejection_mask == NULL) {
276 ut->syscall_rejection_mask = kalloc_data(SR_MASK_SIZE, Z_WAITOK);
277
278 if (ut->syscall_rejection_mask == NULL) {
279 error = ENOMEM;
280 goto out_locked;
281 }
282 }
283
284 memcpy(ut->syscall_rejection_mask, mask, SR_MASK_SIZE);
285
286 if ((args->flags & SYSCALL_REJECTION_FLAGS_ONCE)) {
287 if (ut->syscall_rejection_once_mask == NULL) {
288 ut->syscall_rejection_once_mask = kalloc_data(SR_MASK_SIZE, Z_WAITOK);
289
290 if (ut->syscall_rejection_once_mask == NULL) {
291 kfree_data(ut->syscall_rejection_mask, SR_MASK_SIZE);
292 ut->syscall_rejection_mask = NULL;
293 error = ENOMEM;
294 goto out_locked;
295 }
296
297 memset(ut->syscall_rejection_once_mask, 0, SR_MASK_SIZE);
298 } else {
299 // prevent the already hit syscalls from hitting again.
300 bitmap_or(ut->syscall_rejection_mask, ut->syscall_rejection_mask, ut->syscall_rejection_once_mask, mach_trap_count + nsysent);
301 }
302 }
303
304out_locked:
305 lck_mtx_unlock(&syscall_rejection_mtx);
306
307 if (error == 0) {
308 ut->syscall_rejection_flags = args->flags;
309 }
310
311 if (error == ENOENT && debug_syscall_rejection_mode == SYSCALL_REJECTION_MODE_IGNORE) {
312 /* Existing code may rely on the system call failing
313 * gracefully if syscall rejection is currently off. */
314 error = 0;
315 }
316
317 return error;
318}
319
320/*
321 * debug_syscall_reject
322 *
323 * Compatibility interface to the old form of the system call.
324 */
325int
326debug_syscall_reject(struct proc *p, struct debug_syscall_reject_args *args, int *retval)
327{
328 struct debug_syscall_reject_config_args new_args;
329
330 bzero(&new_args, sizeof(new_args));
331 new_args.packed_selectors1 = args->packed_selectors;
332 // packed_selectors2 left empty
333 new_args.flags = SYSCALL_REJECTION_FLAGS_DEFAULT;
334
335 return sys_debug_syscall_reject_config(p, &new_args, retval);
336}
337
338
339static bool
340_syscall_rejection_add(syscall_rejection_mask_t dst, char const *name)
341{
342 /*
343 * Yes, this function is O(n+m), making the whole act of setting a
344 * mask O(l*(n+m)), but defining masks is done rarely enough (and
345 * i, n and m small enough) for this to not matter.
346 */
347
348 for (int i = 0; i < mach_trap_count; i++) {
349 if (strcmp(mach_syscall_name_table[i], name) == 0) {
350 bitmap_set(dst, i);
351 return true;
352 }
353 }
354
355 extern char const *syscallnames[];
356
357 for (int i = 0; i < nsysent; i++) {
358 if (strcmp(syscallnames[i], name) == 0) {
359 bitmap_set(dst, i + mach_trap_count);
360 return true;
361 }
362 }
363
364 printf("%s: trying to add non-existing syscall/mach trap '%s'\n", __func__, name);
365 return false;
366}
367
368/* Pretty much arbitrary, we just don't want userspace to pass
369 * unreasonably large buffers to parse. */
370static size_t const max_input_size = 16 * PAGE_MAX_SIZE;
371
372static int
373_sysctl_debug_syscall_rejection_masks(struct sysctl_oid __unused *oidp, void * __unused arg1, int __unused arg2,
374 struct sysctl_req *req)
375{
376 size_t const max_name_len = 128;
377 char name[max_name_len];
378
379 if (req->newptr == 0) {
380 return 0;
381 }
382
383 if (req->newlen > max_input_size) {
384 return E2BIG;
385 }
386
387 size_t const len = req->newlen;
388 char *buf = kalloc_data(len + 1, Z_WAITOK);
389
390 if (buf == NULL) {
391 return ENOMEM;
392 }
393
394 /*
395 * sysctl_io_string always copies out the given buffer as the
396 * "old" value if requested. We could construct a text
397 * representation of existing masks, but this is not particularly
398 * interesting, so we just return the dummy string "<masks>".
399 */
400 strlcpy(buf, "<masks>", len + 1);
401 int changed = 0;
402 int error = sysctl_io_string(req, buf, len + 1, 0, &changed);
403
404 if (error != 0 || !changed) {
405 goto out;
406 }
407
408 char const *p = buf;
409
410 int id = 0;
411 int l = 0;
412 int n = sscanf(p, "%i: %n", &id, &l);
413
414 if (n != 1 || id < predefined_masks || id > syscall_rejection_mask_count + predefined_masks) {
415 printf("%s: invalid mask id %i (or conversion failed)\n", __FUNCTION__, id);
416 error = EINVAL;
417 goto out;
418 }
419
420 p += l;
421
422 syscall_rejection_mask_t new_mask = kalloc_data(SR_MASK_SIZE,
423 Z_WAITOK | Z_ZERO);
424 if (new_mask == NULL) {
425 printf("%s: allocating new mask for id %i failed\n", __FUNCTION__, id);
426 error = ENOMEM;
427 goto out;
428 }
429
430 error = 0;
431
432 while (p < buf + len && *p != 0) {
433 name[0] = 0;
434 n = sscanf(p, "%127s %n", name, &l);
435 if (n != 1 || name[0] == 0) {
436 error = EINVAL;
437 kfree_data(new_mask, SR_MASK_SIZE);
438 goto out;
439 }
440
441 if (!_syscall_rejection_add(new_mask, name)) {
442 error = ENOENT;
443 kfree_data(new_mask, SR_MASK_SIZE);
444 goto out;
445 }
446
447 p += l;
448 }
449
450
451 syscall_rejection_mask_t to_free = NULL;
452
453 lck_mtx_lock(&syscall_rejection_mtx);
454
455 syscall_rejection_mask_t *target_mask = &syscall_rejection_masks[id - predefined_masks];
456
457 to_free = *target_mask;
458 *target_mask = new_mask;
459
460 lck_mtx_unlock(&syscall_rejection_mtx);
461
462 kfree_data(to_free, SR_MASK_SIZE);
463out:
464
465 kfree_data(buf, len + 1);
466 return error;
467}
468
469SYSCTL_PROC(_kern, OID_AUTO, syscall_rejection_masks, CTLTYPE_STRING | CTLFLAG_WR | CTLFLAG_MASKED | CTLFLAG_LOCKED,
470 0, 0, _sysctl_debug_syscall_rejection_masks, "A", "system call rejection masks");
471
472#else /* CONFIG_DEBUG_SYSCALL_REJECTION */
473
474#include <sys/kern_debug.h>
475
476int
477sys_debug_syscall_reject_config(struct proc * __unused p, struct debug_syscall_reject_config_args * __unused args, int __unused *ret)
478{
479 /* not supported. */
480 return ENOTSUP;
481}
482
483int
484debug_syscall_reject(struct proc * __unused p, struct debug_syscall_reject_args * __unused args, int * __unused retval)
485{
486 /* not supported. */
487 return ENOTSUP;
488}
489
490void
491reset_debug_syscall_rejection_mode(void)
492{
493 /* not supported. */
494}
495
496#endif /* CONFIG_DEBUG_SYSCALL_REJECTION */
497
498#if __arm64__ && (DEBUG || DEVELOPMENT)
499
500static void
501_spinfor(uint64_t nanoseconds)
502{
503 uint64_t mt = 0;
504 nanoseconds_to_absolutetime(nanoseconds, &mt);
505
506 uint64_t start = mach_absolute_time();
507
508 while (mach_absolute_time() < start + mt) {
509 // Spinning.
510 }
511}
512
513static int
514_sysctl_debug_disable_interrupts_test(struct sysctl_oid __unused *oidp, void * __unused arg1, int __unused arg2,
515 struct sysctl_req *req)
516{
517 int error = 0;
518
519 if (req->newptr == 0) {
520 goto out;
521 }
522
523 uint64_t val = 0;
524 error = sysctl_io_number(req, 0, sizeof(val), &val, NULL);
525
526 if (error != 0 || val == 0) {
527 goto out;
528 }
529
530 boolean_t istate = ml_set_interrupts_enabled(false);
531 _spinfor(val);
532 ml_set_interrupts_enabled(istate);
533
534out:
535 return error;
536}
537
538static int
539_sysctl_debug_disable_preemption_test(struct sysctl_oid __unused *oidp, void * __unused arg1, int __unused arg2,
540 struct sysctl_req *req)
541{
542 int error = 0;
543
544 if (req->newptr == 0) {
545 goto out;
546 }
547
548 uint64_t val = 0;
549 error = sysctl_io_number(req, 0, sizeof(val), &val, NULL);
550
551 if (error != 0 || val == 0) {
552 goto out;
553 }
554
555 disable_preemption();
556 _spinfor(val);
557 enable_preemption();
558
559out:
560 return error;
561}
562
563SYSCTL_PROC(_kern, OID_AUTO, debug_disable_interrupts_test, CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_MASKED | CTLFLAG_LOCKED,
564 0, 0, _sysctl_debug_disable_interrupts_test, "Q", "disable interrupts for specified number of nanoseconds, for testing");
565
566SYSCTL_PROC(_kern, OID_AUTO, debug_disable_preemption_test, CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_MASKED | CTLFLAG_LOCKED,
567 0, 0, _sysctl_debug_disable_preemption_test, "Q", "disable preemption for specified number of nanoseconds, for testing");
568
569#endif /* __arm64__ && (DEBUG || DEVELOPMENT) */
570