1/*
2 * Copyright (c) 2000-2021 Apple Inc. All rights reserved.
3 *
4 * @Apple_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
21 */
22
23#include <sys/kdebug_common.h>
24
25LCK_GRP_DECLARE(kdebug_lck_grp, "kdebug");
26int kdbg_debug = 0;
27
28extern struct kd_control kd_control_trace, kd_control_triage;
29
30int
31kdebug_storage_lock(struct kd_control *kd_ctrl_page)
32{
33 int intrs_en = ml_set_interrupts_enabled(false);
34 lck_spin_lock_grp(lck: &kd_ctrl_page->kdc_storage_lock, grp: &kdebug_lck_grp);
35 return intrs_en;
36}
37
38void
39kdebug_storage_unlock(struct kd_control *kd_ctrl_page, int intrs_en)
40{
41 lck_spin_unlock(lck: &kd_ctrl_page->kdc_storage_lock);
42 ml_set_interrupts_enabled(enable: intrs_en);
43}
44
45// Turn on boot tracing and set the number of events.
46static TUNABLE(unsigned int, new_nkdbufs, "trace", 0);
47// Enable wrapping during boot tracing.
48TUNABLE(unsigned int, trace_wrap, "trace_wrap", 0);
49// The filter description to apply to boot tracing.
50static TUNABLE_STR(trace_typefilter, 256, "trace_typefilter", "");
51
52// Turn on wake tracing and set the number of events.
53TUNABLE(unsigned int, wake_nkdbufs, "trace_wake", 0);
54// Write trace events to a file in the event of a panic.
55TUNABLE(unsigned int, write_trace_on_panic, "trace_panic", 0);
56
57// Obsolete leak logging system.
58TUNABLE(int, log_leaks, "-l", 0);
59
60void
61kdebug_startup(void)
62{
63 lck_spin_init(lck: &kd_control_trace.kdc_storage_lock, grp: &kdebug_lck_grp, LCK_ATTR_NULL);
64 lck_spin_init(lck: &kd_control_triage.kdc_storage_lock, grp: &kdebug_lck_grp, LCK_ATTR_NULL);
65 kdebug_init(n_events: new_nkdbufs, filterdesc: trace_typefilter,
66 opts: (trace_wrap ? KDOPT_WRAPPING : 0) | KDOPT_ATBOOT);
67 create_buffers_triage();
68}
69
70uint32_t
71kdbg_cpu_count(void)
72{
73#if defined(__x86_64__)
74 return ml_early_cpu_max_number() + 1;
75#else // defined(__x86_64__)
76 return ml_get_cpu_count();
77#endif // !defined(__x86_64__)
78}
79
80/*
81 * Both kdebug_timestamp and kdebug_using_continuous_time are known
82 * to kexts. And going forward we always want to use mach_continuous_time().
83 * So we keep these 2 routines as-is to keep the TRACE mode use outside
84 * the kernel intact. TRIAGE mode will explicitly only use mach_continuous_time()
85 * for its timestamp.
86 */
87bool
88kdebug_using_continuous_time(void)
89{
90 return kd_control_trace.kdc_flags & KDBG_CONTINUOUS_TIME;
91}
92
93uint64_t
94kdebug_timestamp(void)
95{
96 if (kdebug_using_continuous_time()) {
97 return mach_continuous_time();
98 } else {
99 return mach_absolute_time();
100 }
101}
102
103int
104create_buffers(
105 struct kd_control *kd_ctrl_page,
106 struct kd_buffer *kd_data_page,
107 vm_tag_t tag)
108{
109 unsigned int i;
110 unsigned int p_buffer_size;
111 unsigned int f_buffer_size;
112 unsigned int f_buffers;
113 int error = 0;
114 int ncpus, count_storage_units = 0;
115
116 struct kd_bufinfo *kdbip = NULL;
117 struct kd_region *kd_bufs = NULL;
118 int kdb_storage_count = kd_data_page->kdb_storage_count;
119
120 ncpus = kd_ctrl_page->alloc_cpus;
121
122 kdbip = kalloc_type_tag(struct kd_bufinfo, ncpus, Z_WAITOK | Z_ZERO, tag);
123 if (kdbip == NULL) {
124 error = ENOSPC;
125 goto out;
126 }
127 kd_data_page->kdb_info = kdbip;
128
129 f_buffers = kdb_storage_count / N_STORAGE_UNITS_PER_BUFFER;
130 kd_data_page->kdb_region_count = f_buffers;
131
132 f_buffer_size = N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage);
133 p_buffer_size = (kdb_storage_count % N_STORAGE_UNITS_PER_BUFFER) * sizeof(struct kd_storage);
134
135 if (p_buffer_size) {
136 kd_data_page->kdb_region_count++;
137 }
138
139 if (kd_data_page->kdcopybuf == 0) {
140 if (kmem_alloc(map: kernel_map, addrp: (vm_offset_t *)&kd_data_page->kdcopybuf,
141 size: (vm_size_t) kd_ctrl_page->kdebug_kdcopybuf_size,
142 flags: KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) {
143 error = ENOSPC;
144 goto out;
145 }
146 }
147
148 kd_bufs = kalloc_type_tag(struct kd_region, kd_data_page->kdb_region_count,
149 Z_WAITOK | Z_ZERO, tag);
150 if (kd_bufs == NULL) {
151 error = ENOSPC;
152 goto out;
153 }
154 kd_data_page->kd_bufs = kd_bufs;
155
156 for (i = 0; i < f_buffers; i++) {
157 if (kmem_alloc(map: kernel_map, addrp: (vm_offset_t *)&kd_bufs[i].kdr_addr,
158 size: (vm_size_t)f_buffer_size, flags: KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) {
159 error = ENOSPC;
160 goto out;
161 }
162
163 kd_bufs[i].kdr_size = f_buffer_size;
164 }
165 if (p_buffer_size) {
166 if (kmem_alloc(map: kernel_map, addrp: (vm_offset_t *)&kd_bufs[i].kdr_addr,
167 size: (vm_size_t)p_buffer_size, flags: KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) {
168 error = ENOSPC;
169 goto out;
170 }
171
172 kd_bufs[i].kdr_size = p_buffer_size;
173 }
174
175 count_storage_units = 0;
176 for (i = 0; i < kd_data_page->kdb_region_count; i++) {
177 struct kd_storage *kds;
178 uint16_t n_elements;
179 static_assert(N_STORAGE_UNITS_PER_BUFFER <= UINT16_MAX);
180 assert(kd_bufs[i].kdr_size <= N_STORAGE_UNITS_PER_BUFFER *
181 sizeof(struct kd_storage));
182
183 n_elements = kd_bufs[i].kdr_size / sizeof(struct kd_storage);
184 kds = kd_bufs[i].kdr_addr;
185
186 for (uint16_t n = 0; n < n_elements; n++) {
187 kds[n].kds_next.buffer_index = kd_ctrl_page->kds_free_list.buffer_index;
188 kds[n].kds_next.offset = kd_ctrl_page->kds_free_list.offset;
189
190 kd_ctrl_page->kds_free_list.buffer_index = i;
191 kd_ctrl_page->kds_free_list.offset = n;
192 }
193 count_storage_units += n_elements;
194 }
195
196 kd_data_page->kdb_storage_count = count_storage_units;
197
198 for (i = 0; i < ncpus; i++) {
199 kdbip[i].kd_list_head.raw = KDS_PTR_NULL;
200 kdbip[i].kd_list_tail.raw = KDS_PTR_NULL;
201 kdbip[i].kd_lostevents = false;
202 kdbip[i].num_bufs = 0;
203 }
204
205 kd_ctrl_page->kdc_flags |= KDBG_BUFINIT;
206
207 kd_ctrl_page->kdc_storage_used = 0;
208out:
209 if (error) {
210 delete_buffers(ctl: kd_ctrl_page, buf: kd_data_page);
211 }
212
213 return error;
214}
215
216void
217delete_buffers(struct kd_control *kd_ctrl_page,
218 struct kd_buffer *kd_data_page)
219{
220 unsigned int i;
221 int kdb_region_count = kd_data_page->kdb_region_count;
222
223 struct kd_bufinfo *kdbip = kd_data_page->kdb_info;
224 struct kd_region *kd_bufs = kd_data_page->kd_bufs;
225
226 if (kd_bufs) {
227 for (i = 0; i < kdb_region_count; i++) {
228 if (kd_bufs[i].kdr_addr) {
229 kmem_free(map: kernel_map, addr: (vm_offset_t)kd_bufs[i].kdr_addr, size: (vm_size_t)kd_bufs[i].kdr_size);
230 }
231 }
232 kfree_type(struct kd_region, kdb_region_count, kd_bufs);
233
234 kd_data_page->kd_bufs = NULL;
235 kd_data_page->kdb_region_count = 0;
236 }
237 if (kd_data_page->kdcopybuf) {
238 kmem_free(map: kernel_map, addr: (vm_offset_t)kd_data_page->kdcopybuf, size: kd_ctrl_page->kdebug_kdcopybuf_size);
239
240 kd_data_page->kdcopybuf = NULL;
241 }
242 kd_ctrl_page->kds_free_list.raw = KDS_PTR_NULL;
243
244 if (kdbip) {
245 kfree_type(struct kd_bufinfo, kd_ctrl_page->alloc_cpus, kdbip);
246 kd_data_page->kdb_info = NULL;
247 }
248 kd_ctrl_page->kdc_coprocs = NULL;
249 kd_ctrl_page->kdebug_cpus = 0;
250 kd_ctrl_page->alloc_cpus = 0;
251 kd_ctrl_page->kdc_flags &= ~KDBG_BUFINIT;
252}
253
254static bool
255allocate_storage_unit(struct kd_control *kd_ctrl_page,
256 struct kd_buffer *kd_data_page, int cpu)
257{
258 union kds_ptr kdsp;
259 struct kd_storage *kdsp_actual, *kdsp_next_actual;
260 struct kd_bufinfo *kdbip, *kdbp, *kdbp_vict, *kdbp_try;
261 uint64_t oldest_ts, ts;
262 bool retval = true;
263 struct kd_region *kd_bufs;
264
265 int intrs_en = kdebug_storage_lock(kd_ctrl_page);
266
267 kdbp = &kd_data_page->kdb_info[cpu];
268 kd_bufs = kd_data_page->kd_bufs;
269 kdbip = kd_data_page->kdb_info;
270
271 /* If someone beat us to the allocate, return success */
272 if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) {
273 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail);
274
275 if (kdsp_actual->kds_bufindx < kd_ctrl_page->kdebug_events_per_storage_unit) {
276 goto out;
277 }
278 }
279
280 if ((kdsp = kd_ctrl_page->kds_free_list).raw != KDS_PTR_NULL) {
281 /*
282 * If there's a free page, grab it from the free list.
283 */
284 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
285 kd_ctrl_page->kds_free_list = kdsp_actual->kds_next;
286
287 kd_ctrl_page->kdc_storage_used++;
288 } else {
289 /*
290 * Otherwise, we're going to lose events and repurpose the oldest
291 * storage unit we can find.
292 */
293 if (kd_ctrl_page->kdc_live_flags & KDBG_NOWRAP) {
294 kd_ctrl_page->kdc_emit = KDEMIT_DISABLE;
295 kd_ctrl_page->kdc_live_flags |= KDBG_WRAPPED;
296 kdebug_enable = 0;
297 kd_ctrl_page->enabled = 0;
298 commpage_update_kdebug_state();
299 kdbp->kd_lostevents = true;
300 retval = false;
301 goto out;
302 }
303 kdbp_vict = NULL;
304 oldest_ts = UINT64_MAX;
305
306 for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_ctrl_page->kdebug_cpus]; kdbp_try++) {
307 if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) {
308 /*
309 * no storage unit to steal
310 */
311 continue;
312 }
313
314 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_try->kd_list_head);
315
316 if (kdsp_actual->kds_bufcnt < kd_ctrl_page->kdebug_events_per_storage_unit) {
317 /*
318 * make sure we don't steal the storage unit
319 * being actively recorded to... need to
320 * move on because we don't want an out-of-order
321 * set of events showing up later
322 */
323 continue;
324 }
325
326 /*
327 * When wrapping, steal the storage unit with the
328 * earliest timestamp on its last event, instead of the
329 * earliest timestamp on the first event. This allows a
330 * storage unit with more recent events to be preserved,
331 * even if the storage unit contains events that are
332 * older than those found in other CPUs.
333 */
334 ts = kdbg_get_timestamp(kp: &kdsp_actual->kds_records[kd_ctrl_page->kdebug_events_per_storage_unit - 1]);
335 if (ts < oldest_ts) {
336 oldest_ts = ts;
337 kdbp_vict = kdbp_try;
338 }
339 }
340 if (kdbp_vict == NULL && kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
341 kd_ctrl_page->kdc_emit = KDEMIT_DISABLE;
342 kdebug_enable = 0;
343 kd_ctrl_page->enabled = 0;
344 commpage_update_kdebug_state();
345 retval = false;
346 goto out;
347 }
348 kdsp = kdbp_vict->kd_list_head;
349 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
350 kdbp_vict->kd_list_head = kdsp_actual->kds_next;
351
352 if (kdbp_vict->kd_list_head.raw != KDS_PTR_NULL) {
353 kdsp_next_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_vict->kd_list_head);
354 kdsp_next_actual->kds_lostevents = true;
355 } else {
356 kdbp_vict->kd_lostevents = true;
357 }
358
359 if (kd_ctrl_page->kdc_oldest_time < oldest_ts) {
360 kd_ctrl_page->kdc_oldest_time = oldest_ts;
361 }
362 kd_ctrl_page->kdc_live_flags |= KDBG_WRAPPED;
363 }
364
365 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
366 kdsp_actual->kds_timestamp = kdebug_timestamp();
367 } else {
368 kdsp_actual->kds_timestamp = mach_continuous_time();
369 }
370
371 kdsp_actual->kds_next.raw = KDS_PTR_NULL;
372 kdsp_actual->kds_bufcnt = 0;
373 kdsp_actual->kds_readlast = 0;
374
375 kdsp_actual->kds_lostevents = kdbp->kd_lostevents;
376 kdbp->kd_lostevents = false;
377 kdsp_actual->kds_bufindx = 0;
378
379 if (kdbp->kd_list_head.raw == KDS_PTR_NULL) {
380 kdbp->kd_list_head = kdsp;
381 } else {
382 POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail)->kds_next = kdsp;
383 }
384 kdbp->kd_list_tail = kdsp;
385out:
386 kdebug_storage_unlock(kd_ctrl_page, intrs_en);
387
388 return retval;
389}
390
391static void
392release_storage_unit(struct kd_control *kd_ctrl_page, struct kd_buffer *kd_data_page, int cpu, uint32_t kdsp_raw)
393{
394 struct kd_storage *kdsp_actual;
395 struct kd_bufinfo *kdbp;
396 union kds_ptr kdsp;
397
398 kdbp = &kd_data_page->kdb_info[cpu];
399
400 kdsp.raw = kdsp_raw;
401
402 int intrs_en = kdebug_storage_lock(kd_ctrl_page);
403
404 if (kdsp.raw == kdbp->kd_list_head.raw) {
405 /*
406 * it's possible for the storage unit pointed to
407 * by kdsp to have already been stolen... so
408 * check to see if it's still the head of the list
409 * now that we're behind the lock that protects
410 * adding and removing from the queue...
411 * since we only ever release and steal units from
412 * that position, if it's no longer the head
413 * we having nothing to do in this context
414 */
415 kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kdsp);
416 kdbp->kd_list_head = kdsp_actual->kds_next;
417
418 kdsp_actual->kds_next = kd_ctrl_page->kds_free_list;
419 kd_ctrl_page->kds_free_list = kdsp;
420
421 kd_ctrl_page->kdc_storage_used--;
422 }
423
424 kdebug_storage_unlock(kd_ctrl_page, intrs_en);
425}
426
427bool
428kdebug_disable_wrap(struct kd_control *ctl,
429 kdebug_emit_filter_t *old_emit, kdebug_live_flags_t *old_live)
430{
431 int intrs_en = kdebug_storage_lock(kd_ctrl_page: ctl);
432
433 *old_emit = ctl->kdc_emit;
434 *old_live = ctl->kdc_live_flags;
435
436 bool wrapped = ctl->kdc_live_flags & KDBG_WRAPPED;
437 ctl->kdc_live_flags &= ~KDBG_WRAPPED;
438 ctl->kdc_live_flags |= KDBG_NOWRAP;
439
440 kdebug_storage_unlock(kd_ctrl_page: ctl, intrs_en);
441
442 return wrapped;
443}
444
445static void
446_enable_wrap(struct kd_control *kd_ctrl_page, kdebug_emit_filter_t emit)
447{
448 int intrs_en = kdebug_storage_lock(kd_ctrl_page);
449 kd_ctrl_page->kdc_live_flags &= ~KDBG_NOWRAP;
450 if (emit) {
451 kd_ctrl_page->kdc_emit = emit;
452 }
453 kdebug_storage_unlock(kd_ctrl_page, intrs_en);
454}
455
456__attribute__((always_inline))
457void
458kernel_debug_write(struct kd_control *kd_ctrl_page,
459 struct kd_buffer *kd_data_page,
460 struct kd_record kd_rec)
461{
462 uint64_t now = 0;
463 uint32_t bindx;
464 kd_buf *kd;
465 int cpu;
466 struct kd_bufinfo *kdbp;
467 struct kd_storage *kdsp_actual;
468 union kds_ptr kds_raw;
469
470 disable_preemption();
471
472 if (kd_ctrl_page->enabled == 0) {
473 goto out;
474 }
475
476 if (kd_rec.cpu == -1) {
477 cpu = cpu_number();
478 } else {
479 cpu = kd_rec.cpu;
480 }
481
482 kdbp = &kd_data_page->kdb_info[cpu];
483
484 bool timestamp_is_continuous = kdbp->continuous_timestamps;
485
486 if (kd_rec.timestamp != -1) {
487 if (kdebug_using_continuous_time()) {
488 if (!timestamp_is_continuous) {
489 kd_rec.timestamp = absolutetime_to_continuoustime(abstime: kd_rec.timestamp);
490 }
491 } else {
492 if (timestamp_is_continuous) {
493 kd_rec.timestamp = continuoustime_to_absolutetime(conttime: kd_rec.timestamp);
494 }
495 }
496 kd_rec.timestamp &= KDBG_TIMESTAMP_MASK;
497 if (kd_rec.timestamp < kd_ctrl_page->kdc_oldest_time) {
498 if (kdbp->latest_past_event_timestamp < kd_rec.timestamp) {
499 kdbp->latest_past_event_timestamp = kd_rec.timestamp;
500 }
501 goto out;
502 }
503 }
504
505retry_q:
506 kds_raw = kdbp->kd_list_tail;
507
508 if (kds_raw.raw != KDS_PTR_NULL) {
509 kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kds_raw);
510 bindx = kdsp_actual->kds_bufindx;
511 } else {
512 kdsp_actual = NULL;
513 bindx = kd_ctrl_page->kdebug_events_per_storage_unit;
514 }
515
516 if (kdsp_actual == NULL || bindx >= kd_ctrl_page->kdebug_events_per_storage_unit) {
517 if (allocate_storage_unit(kd_ctrl_page, kd_data_page, cpu) == false) {
518 /*
519 * this can only happen if wrapping
520 * has been disabled
521 */
522 goto out;
523 }
524 goto retry_q;
525 }
526
527 if (kd_rec.timestamp != -1) {
528 /*
529 * IOP entries can be allocated before xnu allocates and inits the buffer
530 * And, Intel uses a special 0 value as a early tracing timestamp sentinel
531 * to set the start of trace-time-start-of-interest.
532 */
533 if (kd_rec.timestamp < kdsp_actual->kds_timestamp) {
534 kdsp_actual->kds_timestamp = kd_rec.timestamp;
535 }
536 now = kd_rec.timestamp;
537 } else {
538 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
539 now = kdebug_timestamp() & KDBG_TIMESTAMP_MASK;
540 } else {
541 now = mach_continuous_time() & KDBG_TIMESTAMP_MASK;
542 }
543 }
544
545 if (!OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx)) {
546 goto retry_q;
547 }
548
549 kd = &kdsp_actual->kds_records[bindx];
550
551 if (kd_ctrl_page->kdc_flags & KDBG_DEBUGID_64) {
552 /*DebugID has been passed in arg 4*/
553 kd->debugid = 0;
554 } else {
555 kd->debugid = kd_rec.debugid;
556 }
557
558 kd->arg1 = kd_rec.arg1;
559 kd->arg2 = kd_rec.arg2;
560 kd->arg3 = kd_rec.arg3;
561 kd->arg4 = kd_rec.arg4;
562 kd->arg5 = kd_rec.arg5;
563
564 kdbg_set_timestamp_and_cpu(kp: kd, thetime: now, cpu);
565
566 OSAddAtomic(1, &kdsp_actual->kds_bufcnt);
567
568out:
569 enable_preemption();
570}
571
572// Read events from kdebug storage units into a user space buffer or file.
573//
574// This code runs while events are emitted -- storage unit allocation and
575// deallocation wll synchronize with the emitters. Only one reader per control
576// structure is allowed.
577int
578kernel_debug_read(struct kd_control *kd_ctrl_page,
579 struct kd_buffer *kd_data_page, user_addr_t buffer, size_t *number,
580 vnode_t vp, vfs_context_t ctx, uint32_t file_version)
581{
582 size_t count;
583 unsigned int cpu, min_cpu;
584 uint64_t barrier_min = 0, barrier_max = 0, t, earliest_time;
585 int error = 0;
586 kd_buf *tempbuf;
587 uint32_t rcursor;
588 kd_buf lostevent;
589 union kds_ptr kdsp;
590 bool traced_retrograde = false;
591 struct kd_storage *kdsp_actual;
592 struct kd_bufinfo *kdbp;
593 struct kd_bufinfo *min_kdbp;
594 size_t tempbuf_count;
595 uint32_t tempbuf_number;
596 kdebug_emit_filter_t old_emit;
597 uint32_t old_live_flags;
598 bool out_of_events = false;
599 bool wrapped = false;
600 bool set_preempt = true;
601 bool should_disable = false;
602
603 struct kd_bufinfo *kdbip = kd_data_page->kdb_info;
604 struct kd_region *kd_bufs = kd_data_page->kd_bufs;
605
606 assert(number != NULL);
607 count = *number / sizeof(kd_buf);
608 *number = 0;
609
610 if (count == 0 || !(kd_ctrl_page->kdc_flags & KDBG_BUFINIT) || kd_data_page->kdcopybuf == 0) {
611 return EINVAL;
612 }
613
614 if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
615 /*
616 * A corpse can be created due to 'TASK_HAS_TOO_MANY_THREADS'
617 * and that can be handled by a callout thread that already
618 * has the eager-preemption set.
619 * So check to see if we are dealing with one such thread.
620 */
621 set_preempt = !(thread_is_eager_preempt(thread: current_thread()));
622 }
623
624 if (set_preempt) {
625 thread_set_eager_preempt(thread: current_thread());
626 }
627
628 memset(s: &lostevent, c: 0, n: sizeof(lostevent));
629 lostevent.debugid = TRACE_LOST_EVENTS;
630
631 /*
632 * Capture the current time. Only sort events that have occured
633 * before now. Since the IOPs are being flushed here, it is possible
634 * that events occur on the AP while running live tracing.
635 */
636 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
637 barrier_max = kdebug_timestamp() & KDBG_TIMESTAMP_MASK;
638 } else {
639 barrier_max = mach_continuous_time() & KDBG_TIMESTAMP_MASK;
640 }
641
642 /*
643 * Disable wrap so storage units cannot be stolen out from underneath us
644 * while merging events.
645 *
646 * Because we hold ktrace_lock, no other control threads can be playing
647 * with kdc_flags. The code that emits new events could be running,
648 * but it grabs kdc_storage_lock if it needs to acquire a new storage
649 * chunk, which is where it examines kdc_flags. If it is adding to
650 * the same chunk we're reading from, check for that below.
651 */
652 wrapped = kdebug_disable_wrap(ctl: kd_ctrl_page, old_emit: &old_emit, old_live: &old_live_flags);
653
654 if (count > kd_data_page->kdb_event_count) {
655 count = kd_data_page->kdb_event_count;
656 }
657
658 if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) {
659 tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count;
660 }
661
662 /*
663 * If the buffers have wrapped, do not emit additional lost events for the
664 * oldest storage units.
665 */
666 if (wrapped) {
667 kd_ctrl_page->kdc_live_flags &= ~KDBG_WRAPPED;
668
669 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
670 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
671 continue;
672 }
673 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
674 kdsp_actual->kds_lostevents = false;
675 }
676 }
677
678 if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
679 /*
680 * In TRIAGE mode we want to extract all the current
681 * records regardless of where we stopped reading last
682 * time so that we have the best shot at getting older
683 * records for threads before the buffers are wrapped.
684 * So set:-
685 * a) kd_prev_timebase to 0 so we (re-)consider older records
686 * b) readlast to 0 to initiate the search from the
687 * 1st record.
688 */
689 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
690 kdbp->kd_prev_timebase = 0;
691 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
692 continue;
693 }
694 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
695 kdsp_actual->kds_readlast = 0;
696 }
697 }
698
699 /*
700 * Capture the earliest time where there are events for all CPUs and don't
701 * emit events with timestamps prior.
702 */
703 barrier_min = kd_ctrl_page->kdc_oldest_time;
704
705 while (count) {
706 tempbuf = kd_data_page->kdcopybuf;
707 tempbuf_number = 0;
708
709 if (wrapped) {
710 /*
711 * Emit a lost events tracepoint to indicate that previous events
712 * were lost -- the thread map cannot be trusted. A new one must
713 * be taken so tools can analyze the trace in a backwards-facing
714 * fashion.
715 */
716 kdbg_set_timestamp_and_cpu(kp: &lostevent, thetime: barrier_min, cpu: 0);
717 *tempbuf = lostevent;
718 wrapped = false;
719 goto nextevent;
720 }
721
722 /* While space left in merged events scratch buffer. */
723 while (tempbuf_count) {
724 bool lostevents = false;
725 int lostcpu = 0;
726 earliest_time = UINT64_MAX;
727 min_kdbp = NULL;
728 min_cpu = 0;
729
730 /* Check each CPU's buffers for the earliest event. */
731 for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) {
732 /* Skip CPUs without data in their oldest storage unit. */
733 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
734next_cpu:
735 continue;
736 }
737 /* From CPU data to buffer header to buffer. */
738 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
739
740next_event:
741 /* The next event to be read from this buffer. */
742 rcursor = kdsp_actual->kds_readlast;
743
744 /* Skip this buffer if there are no events left. */
745 if (rcursor == kdsp_actual->kds_bufindx) {
746 continue;
747 }
748
749 if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
750 /*
751 * TRIAGE mode record keeping doesn't (currently)
752 * use lostevent markers. It also doesn't want to
753 * call release_storage_unit() in this read call.
754 * It expects the buffers to wrap and records reclaimed
755 * in that way solely.
756 */
757 t = kdbg_get_timestamp(kp: &kdsp_actual->kds_records[rcursor]);
758 goto skip_record_checks;
759 }
760
761 /*
762 * Check that this storage unit wasn't stolen and events were
763 * lost. This must have happened while wrapping was disabled
764 * in this function.
765 */
766 if (kdsp_actual->kds_lostevents) {
767 lostevents = true;
768 kdsp_actual->kds_lostevents = false;
769
770 /*
771 * The earliest event we can trust is the first one in this
772 * stolen storage unit.
773 */
774 uint64_t lost_time =
775 kdbg_get_timestamp(kp: &kdsp_actual->kds_records[0]);
776 if (kd_ctrl_page->kdc_oldest_time < lost_time) {
777 /*
778 * If this is the first time we've seen lost events for
779 * this gap, record its timestamp as the oldest
780 * timestamp we're willing to merge for the lost events
781 * tracepoint.
782 */
783 kd_ctrl_page->kdc_oldest_time = barrier_min = lost_time;
784 lostcpu = cpu;
785 }
786 }
787
788 t = kdbg_get_timestamp(kp: &kdsp_actual->kds_records[rcursor]);
789
790 if (t > barrier_max) {
791 goto next_cpu;
792 }
793 if (t < kdsp_actual->kds_timestamp) {
794 /*
795 * This indicates the event emitter hasn't completed
796 * filling in the event (becuase we're looking at the
797 * buffer that the record head is using). The max barrier
798 * timestamp should have saved us from seeing these kinds
799 * of things, but other CPUs might be slow on the up-take.
800 *
801 * Bail out so we don't get out-of-order events by
802 * continuing to read events from other CPUs' events.
803 */
804 out_of_events = true;
805 break;
806 }
807
808 /*
809 * Ignore events that have aged out due to wrapping or storage
810 * unit exhaustion while merging events.
811 */
812 if (t < barrier_min) {
813 kdsp_actual->kds_readlast++;
814 if (kdsp_actual->kds_readlast >= kd_ctrl_page->kdebug_events_per_storage_unit) {
815 release_storage_unit(kd_ctrl_page, kd_data_page, cpu, kdsp_raw: kdsp.raw);
816
817 if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) {
818 goto next_cpu;
819 }
820 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
821 }
822 goto next_event;
823 }
824
825 /*
826 * Don't worry about merging any events -- just walk through
827 * the CPUs and find the latest timestamp of lost events.
828 */
829 if (lostevents) {
830 continue;
831 }
832skip_record_checks:
833 if (t < earliest_time) {
834 earliest_time = t;
835 min_kdbp = kdbp;
836 min_cpu = cpu;
837 }
838 }
839 if (lostevents) {
840 /*
841 * If any lost events were hit in the buffers, emit an event
842 * with the latest timestamp.
843 */
844 kdbg_set_timestamp_and_cpu(kp: &lostevent, thetime: barrier_min, cpu: lostcpu);
845 *tempbuf = lostevent;
846 tempbuf->arg1 = 1;
847 goto nextevent;
848 }
849 if (min_kdbp == NULL) {
850 /* All buffers ran empty. */
851 out_of_events = true;
852 }
853 if (out_of_events) {
854 break;
855 }
856
857 kdsp = min_kdbp->kd_list_head;
858 kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp);
859
860 if (min_kdbp->latest_past_event_timestamp != 0) {
861 if (kdbg_debug) {
862 printf("kdebug: PAST EVENT: debugid %#8x: "
863 "time %lld from CPU %u "
864 "(barrier at time %lld)\n",
865 kdsp_actual->kds_records[rcursor].debugid,
866 t, cpu, barrier_min);
867 }
868
869 kdbg_set_timestamp_and_cpu(kp: tempbuf, thetime: earliest_time, cpu: min_cpu);
870 tempbuf->arg1 = (kd_buf_argtype)min_kdbp->latest_past_event_timestamp;
871 tempbuf->arg2 = 0;
872 tempbuf->arg3 = 0;
873 tempbuf->arg4 = 0;
874 tempbuf->debugid = TRACE_PAST_EVENTS;
875 min_kdbp->latest_past_event_timestamp = 0;
876 goto nextevent;
877 }
878
879 /* Copy earliest event into merged events scratch buffer. */
880 *tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++];
881 kd_buf *earliest_event = tempbuf;
882 if (kd_control_trace.kdc_flags & KDBG_MATCH_DISABLE) {
883 kd_event_matcher *match = &kd_control_trace.disable_event_match;
884 kd_event_matcher *mask = &kd_control_trace.disable_event_mask;
885 if ((earliest_event->debugid & mask->kem_debugid) == match->kem_debugid &&
886 (earliest_event->arg1 & mask->kem_args[0]) == match->kem_args[0] &&
887 (earliest_event->arg2 & mask->kem_args[1]) == match->kem_args[1] &&
888 (earliest_event->arg3 & mask->kem_args[2]) == match->kem_args[2] &&
889 (earliest_event->arg4 & mask->kem_args[3]) == match->kem_args[3]) {
890 should_disable = true;
891 }
892 }
893
894 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
895 if (kdsp_actual->kds_readlast == kd_ctrl_page->kdebug_events_per_storage_unit) {
896 release_storage_unit(kd_ctrl_page, kd_data_page, cpu: min_cpu, kdsp_raw: kdsp.raw);
897 }
898 }
899
900 /*
901 * Watch for out of order timestamps (from IOPs).
902 */
903 if (earliest_time < min_kdbp->kd_prev_timebase) {
904 /*
905 * If we haven't already, emit a retrograde events event.
906 * Otherwise, ignore this event.
907 */
908 if (traced_retrograde) {
909 continue;
910 }
911 if (kdbg_debug) {
912 printf("kdebug: RETRO EVENT: debugid %#8x: "
913 "time %lld from CPU %u "
914 "(barrier at time %lld)\n",
915 kdsp_actual->kds_records[rcursor].debugid,
916 t, cpu, barrier_min);
917 }
918
919 kdbg_set_timestamp_and_cpu(kp: tempbuf, thetime: min_kdbp->kd_prev_timebase,
920 cpu: kdbg_get_cpu(kp: tempbuf));
921 tempbuf->arg1 = tempbuf->debugid;
922 tempbuf->arg2 = (kd_buf_argtype)earliest_time;
923 tempbuf->arg3 = 0;
924 tempbuf->arg4 = 0;
925 tempbuf->debugid = TRACE_RETROGRADE_EVENTS;
926 traced_retrograde = true;
927 } else {
928 min_kdbp->kd_prev_timebase = earliest_time;
929 }
930nextevent:
931 tempbuf_count--;
932 tempbuf_number++;
933 tempbuf++;
934
935 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE &&
936 (RAW_file_written += sizeof(kd_buf)) >= RAW_FLUSH_SIZE) {
937 break;
938 }
939 }
940
941 if (tempbuf_number) {
942 /*
943 * Remember the latest timestamp of events that we've merged so we
944 * don't think we've lost events later.
945 */
946 uint64_t latest_time = kdbg_get_timestamp(kp: tempbuf - 1);
947 if (kd_ctrl_page->kdc_oldest_time < latest_time) {
948 kd_ctrl_page->kdc_oldest_time = latest_time;
949 }
950
951 if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) {
952 extern int kernel_debug_trace_write_to_file(user_addr_t *buffer,
953 size_t *number, size_t *count, size_t tempbuf_number,
954 vnode_t vp, vfs_context_t ctx, uint32_t file_version);
955 error = kernel_debug_trace_write_to_file(buffer: &buffer, number,
956 count: &count, tempbuf_number, vp, ctx, file_version);
957 } else if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) {
958 memcpy(dst: (void*)buffer, src: kd_data_page->kdcopybuf,
959 n: tempbuf_number * sizeof(kd_buf));
960 buffer += tempbuf_number * sizeof(kd_buf);
961 } else {
962 panic("kdebug: invalid kdebug mode %d", kd_ctrl_page->mode);
963 }
964 if (error) {
965 *number = 0;
966 error = EINVAL;
967 break;
968 }
969 count -= tempbuf_number;
970 *number += tempbuf_number;
971 }
972 if (out_of_events) {
973 break;
974 }
975
976 if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) {
977 tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count;
978 }
979 }
980 if ((old_live_flags & KDBG_NOWRAP) == 0) {
981 _enable_wrap(kd_ctrl_page, emit: old_emit);
982 }
983
984 if (set_preempt) {
985 thread_clear_eager_preempt(thread: current_thread());
986 }
987
988 if (should_disable) {
989 kernel_debug_disable();
990 }
991
992 return error;
993}
994