1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <mach-o/loader.h>
27#include <libkern/kernel_mach_header.h>
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/sysctl.h>
32#include <sys/errno.h>
33#include <sys/stat.h>
34#include <sys/ioctl.h>
35#include <sys/conf.h>
36#include <sys/fcntl.h>
37#include <miscfs/devfs/devfs.h>
38#include <pexpert/pexpert.h>
39
40#include <sys/dtrace.h>
41#include <sys/dtrace_impl.h>
42#include <sys/fbt.h>
43
44#include <sys/dtrace_glue.h>
45#include <san/kasan.h>
46
47#include <ptrauth.h>
48
49/* #include <machine/trap.h> */
50struct savearea_t; /* Used anonymously */
51
52#if defined(__arm64__)
53typedef kern_return_t (*perfCallback)(int, struct savearea_t *, __unused int, __unused int);
54extern perfCallback tempDTraceTrapHook;
55extern kern_return_t fbt_perfCallback(int, struct savearea_t *, __unused int, __unused int);
56#elif defined(__x86_64__)
57typedef kern_return_t (*perfCallback)(int, struct savearea_t *, uintptr_t *, __unused int);
58extern perfCallback tempDTraceTrapHook;
59extern kern_return_t fbt_perfCallback(int, struct savearea_t *, uintptr_t *, __unused int);
60#else
61#error Unknown architecture
62#endif
63
64__private_extern__
65void
66qsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *));
67
68#define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
69#define FBT_PROBETAB_SIZE 0x8000 /* 32k entries -- 128K total */
70
71static int fbt_probetab_size;
72dtrace_provider_id_t fbt_id;
73fbt_probe_t **fbt_probetab;
74int fbt_probetab_mask;
75static int fbt_verbose = 0;
76
77extern int ignore_fbt_blacklist;
78
79extern int dtrace_kernel_symbol_mode;
80
81
82void fbt_init( void );
83
84/*ARGSUSED*/
85static void
86fbt_destroy(void *arg, dtrace_id_t id, void *parg)
87{
88#pragma unused(arg,id)
89 fbt_probe_t *fbt = parg, *next, *hash, *last;
90 int ndx;
91
92 do {
93 /*
94 * Now we need to remove this probe from the fbt_probetab.
95 */
96 ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
97 last = NULL;
98 hash = fbt_probetab[ndx];
99
100 while (hash != fbt) {
101 ASSERT(hash != NULL);
102 last = hash;
103 hash = hash->fbtp_hashnext;
104 }
105
106 if (last != NULL) {
107 last->fbtp_hashnext = fbt->fbtp_hashnext;
108 } else {
109 fbt_probetab[ndx] = fbt->fbtp_hashnext;
110 }
111
112 next = fbt->fbtp_next;
113 kmem_free(fbt, sizeof(fbt_probe_t));
114
115 fbt = next;
116 } while (fbt != NULL);
117}
118
119/*ARGSUSED*/
120int
121fbt_enable(void *arg, dtrace_id_t id, void *parg)
122{
123#pragma unused(arg,id)
124 fbt_probe_t *fbt = parg;
125 struct modctl *ctl = NULL;
126
127 for (; fbt != NULL; fbt = fbt->fbtp_next) {
128 ctl = fbt->fbtp_ctl;
129
130 if (!ctl->mod_loaded) {
131 if (fbt_verbose) {
132 cmn_err(CE_NOTE, "fbt is failing for probe %s "
133 "(module %s unloaded)",
134 fbt->fbtp_name, ctl->mod_modname);
135 }
136
137 continue;
138 }
139
140 /*
141 * Now check that our modctl has the expected load count. If it
142 * doesn't, this module must have been unloaded and reloaded -- and
143 * we're not going to touch it.
144 */
145 if (ctl->mod_loadcnt != fbt->fbtp_loadcnt) {
146 if (fbt_verbose) {
147 cmn_err(CE_NOTE, "fbt is failing for probe %s "
148 "(module %s reloaded)",
149 fbt->fbtp_name, ctl->mod_modname);
150 }
151
152 continue;
153 }
154
155 dtrace_casptr(&tempDTraceTrapHook, NULL, ptrauth_nop_cast(void *, &fbt_perfCallback));
156 if (tempDTraceTrapHook != (perfCallback)fbt_perfCallback) {
157 if (fbt_verbose) {
158 cmn_err(CE_NOTE, "fbt_enable is failing for probe %s "
159 "in module %s: tempDTraceTrapHook already occupied.",
160 fbt->fbtp_name, ctl->mod_modname);
161 }
162 continue;
163 }
164
165 if (fbt->fbtp_currentval != fbt->fbtp_patchval) {
166#if KASAN_CLASSIC
167 /* Since dtrace probes can call into KASan and vice versa, things can get
168 * very slow if we have a lot of probes. This call will disable the KASan
169 * fakestack after a threshold of probes is reached. */
170 kasan_fakestack_suspend();
171#endif /* KASAN_CLASSIC */
172
173 (void)ml_nofault_copy(virtsrc: (vm_offset_t)&fbt->fbtp_patchval, virtdst: (vm_offset_t)fbt->fbtp_patchpoint,
174 size: sizeof(fbt->fbtp_patchval));
175 /*
176 * Make the patched instruction visible via a data + instruction
177 * cache flush for the platforms that need it
178 */
179 flush_dcache((vm_offset_t)fbt->fbtp_patchpoint, (vm_size_t)sizeof(fbt->fbtp_patchval), 0);
180 invalidate_icache((vm_offset_t)fbt->fbtp_patchpoint, (vm_size_t)sizeof(fbt->fbtp_patchval), 0);
181 fbt->fbtp_currentval = fbt->fbtp_patchval;
182
183 ctl->mod_nenabled++;
184 }
185 }
186
187 dtrace_membar_consumer();
188
189 return 0;
190}
191
192/*ARGSUSED*/
193static void
194fbt_disable(void *arg, dtrace_id_t id, void *parg)
195{
196#pragma unused(arg,id)
197 fbt_probe_t *fbt = parg;
198 struct modctl *ctl = NULL;
199
200 for (; fbt != NULL; fbt = fbt->fbtp_next) {
201 ctl = fbt->fbtp_ctl;
202
203 if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt)) {
204 continue;
205 }
206
207 if (fbt->fbtp_currentval != fbt->fbtp_savedval) {
208 (void)ml_nofault_copy(virtsrc: (vm_offset_t)&fbt->fbtp_savedval, virtdst: (vm_offset_t)fbt->fbtp_patchpoint,
209 size: sizeof(fbt->fbtp_savedval));
210 /*
211 * Make the patched instruction visible via a data + instruction
212 * cache flush for the platforms that need it
213 */
214 flush_dcache((vm_offset_t)fbt->fbtp_patchpoint, (vm_size_t)sizeof(fbt->fbtp_patchval), 0);
215 invalidate_icache((vm_offset_t)fbt->fbtp_patchpoint, (vm_size_t)sizeof(fbt->fbtp_patchval), 0);
216
217 fbt->fbtp_currentval = fbt->fbtp_savedval;
218 ASSERT(ctl->mod_nenabled > 0);
219 ctl->mod_nenabled--;
220
221#if KASAN && KASAN_CLASSIC
222 kasan_fakestack_resume();
223#endif /* KASAN && KASAN_CLASSIC */
224 }
225 }
226 dtrace_membar_consumer();
227}
228
229/*ARGSUSED*/
230static void
231fbt_suspend(void *arg, dtrace_id_t id, void *parg)
232{
233#pragma unused(arg,id)
234 fbt_probe_t *fbt = parg;
235 struct modctl *ctl = NULL;
236
237 for (; fbt != NULL; fbt = fbt->fbtp_next) {
238 ctl = fbt->fbtp_ctl;
239
240 ASSERT(ctl->mod_nenabled > 0);
241 if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt)) {
242 continue;
243 }
244
245 (void)ml_nofault_copy(virtsrc: (vm_offset_t)&fbt->fbtp_savedval, virtdst: (vm_offset_t)fbt->fbtp_patchpoint,
246 size: sizeof(fbt->fbtp_savedval));
247
248 /*
249 * Make the patched instruction visible via a data + instruction
250 * cache flush for the platforms that need it
251 */
252 flush_dcache((vm_offset_t)fbt->fbtp_patchpoint, (vm_size_t)sizeof(fbt->fbtp_savedval), 0);
253 invalidate_icache((vm_offset_t)fbt->fbtp_patchpoint, (vm_size_t)sizeof(fbt->fbtp_savedval), 0);
254
255 fbt->fbtp_currentval = fbt->fbtp_savedval;
256 }
257
258 dtrace_membar_consumer();
259}
260
261/*ARGSUSED*/
262static void
263fbt_resume(void *arg, dtrace_id_t id, void *parg)
264{
265#pragma unused(arg,id)
266 fbt_probe_t *fbt = parg;
267 struct modctl *ctl = NULL;
268
269 for (; fbt != NULL; fbt = fbt->fbtp_next) {
270 ctl = fbt->fbtp_ctl;
271
272 ASSERT(ctl->mod_nenabled > 0);
273 if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt)) {
274 continue;
275 }
276
277 dtrace_casptr(&tempDTraceTrapHook, NULL, ptrauth_nop_cast(void *, &fbt_perfCallback));
278 if (tempDTraceTrapHook != (perfCallback)fbt_perfCallback) {
279 if (fbt_verbose) {
280 cmn_err(CE_NOTE, "fbt_resume is failing for probe %s "
281 "in module %s: tempDTraceTrapHook already occupied.",
282 fbt->fbtp_name, ctl->mod_modname);
283 }
284 return;
285 }
286
287 (void)ml_nofault_copy(virtsrc: (vm_offset_t)&fbt->fbtp_patchval, virtdst: (vm_offset_t)fbt->fbtp_patchpoint,
288 size: sizeof(fbt->fbtp_patchval));
289
290 /*
291 * Make the patched instruction visible via a data + instruction cache flush.
292 */
293 flush_dcache((vm_offset_t)fbt->fbtp_patchpoint, (vm_size_t)sizeof(fbt->fbtp_patchval), 0);
294 invalidate_icache((vm_offset_t)fbt->fbtp_patchpoint, (vm_size_t)sizeof(fbt->fbtp_patchval), 0);
295
296 fbt->fbtp_currentval = fbt->fbtp_patchval;
297 }
298
299 dtrace_membar_consumer();
300}
301
302static void
303fbt_provide_module_user_syms(struct modctl *ctl)
304{
305 unsigned int i;
306 char *modname = ctl->mod_modname;
307
308 dtrace_module_symbols_t* module_symbols = ctl->mod_user_symbols;
309 if (module_symbols) {
310 for (i = 0; i < module_symbols->dtmodsyms_count; i++) {
311 /*
312 * symbol->dtsym_addr (the symbol address) passed in from
313 * user space, is already slid for both kexts and kernel.
314 */
315 dtrace_symbol_t* symbol = &module_symbols->dtmodsyms_symbols[i];
316
317 char* name = symbol->dtsym_name;
318
319 /* Lop off omnipresent leading underscore. */
320 if (*name == '_') {
321 name += 1;
322 }
323
324 if (fbt_excluded(name)) {
325 continue;
326 }
327
328 /*
329 * Ignore symbols with a null address
330 */
331 if (!symbol->dtsym_addr) {
332 continue;
333 }
334
335 /*
336 * Ignore symbols not part of this module
337 */
338 if (!dtrace_addr_in_module((void*)symbol->dtsym_addr, ctl)) {
339 continue;
340 }
341
342 fbt_provide_probe(ctl, modname, name, instr: (machine_inst_t*)(uintptr_t)symbol->dtsym_addr, limit: (machine_inst_t*)(uintptr_t)(symbol->dtsym_addr + symbol->dtsym_size));
343 }
344 }
345}
346static void
347fbt_provide_kernel_section(struct modctl *ctl, kernel_section_t *sect, kernel_nlist_t *sym, uint32_t nsyms, const char *strings)
348{
349 uintptr_t sect_start = (uintptr_t)sect->addr;
350 uintptr_t sect_end = (uintptr_t)sect->size + sect->addr;
351 unsigned int i;
352
353 if ((sect->flags & S_ATTR_PURE_INSTRUCTIONS) != S_ATTR_PURE_INSTRUCTIONS) {
354 return;
355 }
356
357 for (i = 0; i < nsyms; i++) {
358 uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
359 const char *name = strings + sym[i].n_un.n_strx;
360 uint64_t limit;
361
362 if (sym[i].n_value < sect_start || sym[i].n_value > sect_end) {
363 continue;
364 }
365
366 /* Check that the symbol is a global and that it has a name. */
367 if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type)) {
368 continue;
369 }
370
371 if (0 == sym[i].n_un.n_strx) { /* iff a null, "", name. */
372 continue;
373 }
374
375 /* Lop off omnipresent leading underscore. */
376 if (*name == '_') {
377 name += 1;
378 }
379
380 if (fbt_excluded(name)) {
381 continue;
382 }
383
384 /*
385 * Find the function boundary by looking at either the
386 * end of the section or the beginning of the next symbol
387 */
388 if (i == nsyms - 1 || sym[i + 1].n_value > sect_end) {
389 limit = sect_end;
390 } else {
391 limit = sym[i + 1].n_value;
392 }
393
394 fbt_provide_probe(ctl, modname: ctl->mod_modname, name, instr: (machine_inst_t*)sym[i].n_value, limit: (machine_inst_t*)limit);
395 }
396}
397
398static int
399fbt_sym_cmp(const void *ap, const void *bp)
400{
401 return (int)(((const kernel_nlist_t*)ap)->n_value - ((const kernel_nlist_t*)bp)->n_value);
402}
403
404static void
405fbt_provide_module_kernel_syms(struct modctl *ctl)
406{
407 kernel_mach_header_t *mh = (kernel_mach_header_t *)(ctl->mod_address);
408 kernel_segment_command_t *seg;
409 struct load_command *cmd;
410 kernel_segment_command_t *linkedit = NULL;
411 struct symtab_command *symtab = NULL;
412 kernel_nlist_t *syms = NULL, *sorted_syms = NULL;
413 const char *strings;
414 unsigned int i;
415 size_t symlen;
416
417 if (mh->magic != MH_MAGIC_KERNEL) {
418 return;
419 }
420
421 cmd = (struct load_command *) &mh[1];
422 for (i = 0; i < mh->ncmds; i++) {
423 if (cmd->cmd == LC_SEGMENT_KERNEL) {
424 kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd;
425 if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT)) {
426 linkedit = orig_sg;
427 }
428 } else if (cmd->cmd == LC_SYMTAB) {
429 symtab = (struct symtab_command *) cmd;
430 }
431 if (symtab && linkedit) {
432 break;
433 }
434 cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
435 }
436
437 if ((symtab == NULL) || (linkedit == NULL)) {
438 return;
439 }
440
441 syms = (kernel_nlist_t *)(linkedit->vmaddr + symtab->symoff - linkedit->fileoff);
442 strings = (const char *)(linkedit->vmaddr + symtab->stroff - linkedit->fileoff);
443
444 /*
445 * Make a copy of the symbol table and sort it to not cross into the next function
446 * when disassembling the function
447 */
448 symlen = sizeof(kernel_nlist_t) * symtab->nsyms;
449 sorted_syms = kmem_alloc(symlen, KM_SLEEP);
450 bcopy(src: syms, dst: sorted_syms, n: symlen);
451 qsort(a: sorted_syms, n: symtab->nsyms, es: sizeof(kernel_nlist_t), cmp: fbt_sym_cmp);
452
453 for (seg = firstsegfromheader(header: mh); seg != NULL; seg = nextsegfromheader(header: mh, seg)) {
454 kernel_section_t *sect = firstsect(sgp: seg);
455
456 if (strcmp(s1: seg->segname, s2: "__KLD") == 0 || strcmp(s1: seg->segname, s2: "__KLDDATA") == 0) {
457 continue;
458 }
459
460 for (sect = firstsect(sgp: seg); sect != NULL; sect = nextsect(sgp: seg, sp: sect)) {
461 fbt_provide_kernel_section(ctl, sect, sym: sorted_syms, nsyms: symtab->nsyms, strings);
462 }
463 }
464
465 kmem_free(sorted_syms, symlen);
466}
467
468void
469fbt_provide_module(void *arg, struct modctl *ctl)
470{
471#pragma unused(arg)
472 ASSERT(ctl != NULL);
473 ASSERT(dtrace_kernel_symbol_mode != DTRACE_KERNEL_SYMBOLS_NEVER);
474 LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED);
475
476 if (dtrace_fbt_probes_restricted()) {
477 return;
478 }
479
480 // Update the "ignore blacklist" bit
481 if (ignore_fbt_blacklist) {
482 ctl->mod_flags |= MODCTL_FBT_PROVIDE_BLACKLISTED_PROBES;
483 }
484
485 if (MOD_FBT_DONE(ctl)) {
486 return;
487 }
488
489 if (fbt_module_excluded(ctl)) {
490 ctl->mod_flags |= MODCTL_FBT_INVALID;
491 return;
492 }
493
494 if (MOD_HAS_KERNEL_SYMBOLS(ctl)) {
495 fbt_provide_module_kernel_syms(ctl);
496 ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED;
497 if (MOD_FBT_PROVIDE_BLACKLISTED_PROBES(ctl)) {
498 ctl->mod_flags |= MODCTL_FBT_BLACKLISTED_PROBES_PROVIDED;
499 }
500 return;
501 }
502
503 if (MOD_HAS_USERSPACE_SYMBOLS(ctl)) {
504 fbt_provide_module_user_syms(ctl);
505 ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED;
506 if (MOD_FBT_PROVIDE_BLACKLISTED_PROBES(ctl)) {
507 ctl->mod_flags |= MODCTL_FBT_BLACKLISTED_PROBES_PROVIDED;
508 }
509 return;
510 }
511}
512
513static dtrace_pattr_t fbt_attr = {
514 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
515 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
516 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
517 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
518 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
519};
520
521static dtrace_pops_t fbt_pops = {
522 .dtps_provide = NULL,
523 .dtps_provide_module = fbt_provide_module,
524 .dtps_enable = fbt_enable,
525 .dtps_disable = fbt_disable,
526 .dtps_suspend = fbt_suspend,
527 .dtps_resume = fbt_resume,
528 .dtps_getargdesc = NULL, /* APPLE NOTE: fbt_getargdesc implemented in userspace */
529 .dtps_getargval = NULL,
530 .dtps_usermode = NULL,
531 .dtps_destroy = fbt_destroy
532};
533
534static void
535fbt_cleanup(dev_info_t *devi)
536{
537 dtrace_invop_remove(fbt_invop);
538 ddi_remove_minor_node(devi, NULL);
539 kmem_free(fbt_probetab, fbt_probetab_size * sizeof(fbt_probe_t *));
540 fbt_probetab = NULL;
541 fbt_probetab_mask = 0;
542}
543
544static int
545fbt_attach(dev_info_t *devi)
546{
547 if (fbt_probetab_size == 0) {
548 fbt_probetab_size = FBT_PROBETAB_SIZE;
549 }
550
551 fbt_probetab_mask = fbt_probetab_size - 1;
552 fbt_probetab =
553 kmem_zalloc(fbt_probetab_size * sizeof(fbt_probe_t *), KM_SLEEP);
554
555 dtrace_invop_add(fbt_invop);
556
557 if (ddi_create_minor_node(devi, "fbt", S_IFCHR, 0,
558 DDI_PSEUDO, 0) == DDI_FAILURE ||
559 dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_KERNEL, NULL,
560 &fbt_pops, NULL, &fbt_id) != 0) {
561 fbt_cleanup(devi);
562 return DDI_FAILURE;
563 }
564
565 return DDI_SUCCESS;
566}
567
568static d_open_t _fbt_open;
569
570static int
571_fbt_open(dev_t dev, int flags, int devtype, struct proc *p)
572{
573#pragma unused(dev,flags,devtype,p)
574 return 0;
575}
576
577#define FBT_MAJOR -24 /* let the kernel pick the device number */
578
579static const struct cdevsw fbt_cdevsw =
580{
581 .d_open = _fbt_open,
582 .d_close = eno_opcl,
583 .d_read = eno_rdwrt,
584 .d_write = eno_rdwrt,
585 .d_ioctl = eno_ioctl,
586 .d_stop = eno_stop,
587 .d_reset = eno_reset,
588 .d_select = eno_select,
589 .d_mmap = eno_mmap,
590 .d_strategy = eno_strat,
591 .d_reserved_1 = eno_getc,
592 .d_reserved_2 = eno_putc,
593};
594
595#undef kmem_alloc /* from its binding to dt_kmem_alloc glue */
596#undef kmem_free /* from its binding to dt_kmem_free glue */
597#include <vm/vm_kern.h>
598
599
600void
601fbt_init( void )
602{
603 int majdevno = cdevsw_add(FBT_MAJOR, &fbt_cdevsw);
604
605 if (majdevno < 0) {
606 printf("fbt_init: failed to allocate a major number!\n");
607 return;
608 }
609
610 fbt_blacklist_init();
611 fbt_attach(devi: (dev_info_t*)(uintptr_t)majdevno);
612}
613#undef FBT_MAJOR
614