1 | /* |
2 | * Copyright (c) 2000-2021 Apple Inc. All rights reserved. |
3 | * |
4 | * @Apple_LICENSE_HEADER_START@ |
5 | * |
6 | * The contents of this file constitute Original Code as defined in and |
7 | * are subject to the Apple Public Source License Version 1.1 (the |
8 | * "License"). You may not use this file except in compliance with the |
9 | * License. Please obtain a copy of the License at |
10 | * http://www.apple.com/publicsource and read it before using this file. |
11 | * |
12 | * This Original Code and all software distributed under the License are |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the |
17 | * License for the specific language governing rights and limitations |
18 | * under the License. |
19 | * |
20 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
21 | */ |
22 | |
23 | #include <sys/kdebug_common.h> |
24 | |
25 | LCK_GRP_DECLARE(kdebug_lck_grp, "kdebug" ); |
26 | int kdbg_debug = 0; |
27 | |
28 | extern struct kd_control kd_control_trace, kd_control_triage; |
29 | |
30 | int |
31 | kdebug_storage_lock(struct kd_control *kd_ctrl_page) |
32 | { |
33 | int intrs_en = ml_set_interrupts_enabled(false); |
34 | lck_spin_lock_grp(lck: &kd_ctrl_page->kdc_storage_lock, grp: &kdebug_lck_grp); |
35 | return intrs_en; |
36 | } |
37 | |
38 | void |
39 | kdebug_storage_unlock(struct kd_control *kd_ctrl_page, int intrs_en) |
40 | { |
41 | lck_spin_unlock(lck: &kd_ctrl_page->kdc_storage_lock); |
42 | ml_set_interrupts_enabled(enable: intrs_en); |
43 | } |
44 | |
45 | // Turn on boot tracing and set the number of events. |
46 | static TUNABLE(unsigned int, new_nkdbufs, "trace" , 0); |
47 | // Enable wrapping during boot tracing. |
48 | TUNABLE(unsigned int, trace_wrap, "trace_wrap" , 0); |
49 | // The filter description to apply to boot tracing. |
50 | static TUNABLE_STR(trace_typefilter, 256, "trace_typefilter" , "" ); |
51 | |
52 | // Turn on wake tracing and set the number of events. |
53 | TUNABLE(unsigned int, wake_nkdbufs, "trace_wake" , 0); |
54 | // Write trace events to a file in the event of a panic. |
55 | TUNABLE(unsigned int, write_trace_on_panic, "trace_panic" , 0); |
56 | |
57 | // Obsolete leak logging system. |
58 | TUNABLE(int, log_leaks, "-l" , 0); |
59 | |
60 | void |
61 | kdebug_startup(void) |
62 | { |
63 | lck_spin_init(lck: &kd_control_trace.kdc_storage_lock, grp: &kdebug_lck_grp, LCK_ATTR_NULL); |
64 | lck_spin_init(lck: &kd_control_triage.kdc_storage_lock, grp: &kdebug_lck_grp, LCK_ATTR_NULL); |
65 | kdebug_init(n_events: new_nkdbufs, filterdesc: trace_typefilter, |
66 | opts: (trace_wrap ? KDOPT_WRAPPING : 0) | KDOPT_ATBOOT); |
67 | create_buffers_triage(); |
68 | } |
69 | |
70 | uint32_t |
71 | kdbg_cpu_count(void) |
72 | { |
73 | #if defined(__x86_64__) |
74 | return ml_early_cpu_max_number() + 1; |
75 | #else // defined(__x86_64__) |
76 | return ml_get_cpu_count(); |
77 | #endif // !defined(__x86_64__) |
78 | } |
79 | |
80 | /* |
81 | * Both kdebug_timestamp and kdebug_using_continuous_time are known |
82 | * to kexts. And going forward we always want to use mach_continuous_time(). |
83 | * So we keep these 2 routines as-is to keep the TRACE mode use outside |
84 | * the kernel intact. TRIAGE mode will explicitly only use mach_continuous_time() |
85 | * for its timestamp. |
86 | */ |
87 | bool |
88 | kdebug_using_continuous_time(void) |
89 | { |
90 | return kd_control_trace.kdc_flags & KDBG_CONTINUOUS_TIME; |
91 | } |
92 | |
93 | uint64_t |
94 | kdebug_timestamp(void) |
95 | { |
96 | if (kdebug_using_continuous_time()) { |
97 | return mach_continuous_time(); |
98 | } else { |
99 | return mach_absolute_time(); |
100 | } |
101 | } |
102 | |
103 | int |
104 | create_buffers( |
105 | struct kd_control *kd_ctrl_page, |
106 | struct kd_buffer *kd_data_page, |
107 | vm_tag_t tag) |
108 | { |
109 | unsigned int i; |
110 | unsigned int p_buffer_size; |
111 | unsigned int f_buffer_size; |
112 | unsigned int f_buffers; |
113 | int error = 0; |
114 | int ncpus, count_storage_units = 0; |
115 | |
116 | struct kd_bufinfo *kdbip = NULL; |
117 | struct kd_region *kd_bufs = NULL; |
118 | int kdb_storage_count = kd_data_page->kdb_storage_count; |
119 | |
120 | ncpus = kd_ctrl_page->alloc_cpus; |
121 | |
122 | kdbip = kalloc_type_tag(struct kd_bufinfo, ncpus, Z_WAITOK | Z_ZERO, tag); |
123 | if (kdbip == NULL) { |
124 | error = ENOSPC; |
125 | goto out; |
126 | } |
127 | kd_data_page->kdb_info = kdbip; |
128 | |
129 | f_buffers = kdb_storage_count / N_STORAGE_UNITS_PER_BUFFER; |
130 | kd_data_page->kdb_region_count = f_buffers; |
131 | |
132 | f_buffer_size = N_STORAGE_UNITS_PER_BUFFER * sizeof(struct kd_storage); |
133 | p_buffer_size = (kdb_storage_count % N_STORAGE_UNITS_PER_BUFFER) * sizeof(struct kd_storage); |
134 | |
135 | if (p_buffer_size) { |
136 | kd_data_page->kdb_region_count++; |
137 | } |
138 | |
139 | if (kd_data_page->kdcopybuf == 0) { |
140 | if (kmem_alloc(map: kernel_map, addrp: (vm_offset_t *)&kd_data_page->kdcopybuf, |
141 | size: (vm_size_t) kd_ctrl_page->kdebug_kdcopybuf_size, |
142 | flags: KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) { |
143 | error = ENOSPC; |
144 | goto out; |
145 | } |
146 | } |
147 | |
148 | kd_bufs = kalloc_type_tag(struct kd_region, kd_data_page->kdb_region_count, |
149 | Z_WAITOK | Z_ZERO, tag); |
150 | if (kd_bufs == NULL) { |
151 | error = ENOSPC; |
152 | goto out; |
153 | } |
154 | kd_data_page->kd_bufs = kd_bufs; |
155 | |
156 | for (i = 0; i < f_buffers; i++) { |
157 | if (kmem_alloc(map: kernel_map, addrp: (vm_offset_t *)&kd_bufs[i].kdr_addr, |
158 | size: (vm_size_t)f_buffer_size, flags: KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) { |
159 | error = ENOSPC; |
160 | goto out; |
161 | } |
162 | |
163 | kd_bufs[i].kdr_size = f_buffer_size; |
164 | } |
165 | if (p_buffer_size) { |
166 | if (kmem_alloc(map: kernel_map, addrp: (vm_offset_t *)&kd_bufs[i].kdr_addr, |
167 | size: (vm_size_t)p_buffer_size, flags: KMA_DATA | KMA_ZERO, tag) != KERN_SUCCESS) { |
168 | error = ENOSPC; |
169 | goto out; |
170 | } |
171 | |
172 | kd_bufs[i].kdr_size = p_buffer_size; |
173 | } |
174 | |
175 | count_storage_units = 0; |
176 | for (i = 0; i < kd_data_page->kdb_region_count; i++) { |
177 | struct kd_storage *kds; |
178 | uint16_t n_elements; |
179 | static_assert(N_STORAGE_UNITS_PER_BUFFER <= UINT16_MAX); |
180 | assert(kd_bufs[i].kdr_size <= N_STORAGE_UNITS_PER_BUFFER * |
181 | sizeof(struct kd_storage)); |
182 | |
183 | n_elements = kd_bufs[i].kdr_size / sizeof(struct kd_storage); |
184 | kds = kd_bufs[i].kdr_addr; |
185 | |
186 | for (uint16_t n = 0; n < n_elements; n++) { |
187 | kds[n].kds_next.buffer_index = kd_ctrl_page->kds_free_list.buffer_index; |
188 | kds[n].kds_next.offset = kd_ctrl_page->kds_free_list.offset; |
189 | |
190 | kd_ctrl_page->kds_free_list.buffer_index = i; |
191 | kd_ctrl_page->kds_free_list.offset = n; |
192 | } |
193 | count_storage_units += n_elements; |
194 | } |
195 | |
196 | kd_data_page->kdb_storage_count = count_storage_units; |
197 | |
198 | for (i = 0; i < ncpus; i++) { |
199 | kdbip[i].kd_list_head.raw = KDS_PTR_NULL; |
200 | kdbip[i].kd_list_tail.raw = KDS_PTR_NULL; |
201 | kdbip[i].kd_lostevents = false; |
202 | kdbip[i].num_bufs = 0; |
203 | } |
204 | |
205 | kd_ctrl_page->kdc_flags |= KDBG_BUFINIT; |
206 | |
207 | kd_ctrl_page->kdc_storage_used = 0; |
208 | out: |
209 | if (error) { |
210 | delete_buffers(ctl: kd_ctrl_page, buf: kd_data_page); |
211 | } |
212 | |
213 | return error; |
214 | } |
215 | |
216 | void |
217 | delete_buffers(struct kd_control *kd_ctrl_page, |
218 | struct kd_buffer *kd_data_page) |
219 | { |
220 | unsigned int i; |
221 | int kdb_region_count = kd_data_page->kdb_region_count; |
222 | |
223 | struct kd_bufinfo *kdbip = kd_data_page->kdb_info; |
224 | struct kd_region *kd_bufs = kd_data_page->kd_bufs; |
225 | |
226 | if (kd_bufs) { |
227 | for (i = 0; i < kdb_region_count; i++) { |
228 | if (kd_bufs[i].kdr_addr) { |
229 | kmem_free(map: kernel_map, addr: (vm_offset_t)kd_bufs[i].kdr_addr, size: (vm_size_t)kd_bufs[i].kdr_size); |
230 | } |
231 | } |
232 | kfree_type(struct kd_region, kdb_region_count, kd_bufs); |
233 | |
234 | kd_data_page->kd_bufs = NULL; |
235 | kd_data_page->kdb_region_count = 0; |
236 | } |
237 | if (kd_data_page->kdcopybuf) { |
238 | kmem_free(map: kernel_map, addr: (vm_offset_t)kd_data_page->kdcopybuf, size: kd_ctrl_page->kdebug_kdcopybuf_size); |
239 | |
240 | kd_data_page->kdcopybuf = NULL; |
241 | } |
242 | kd_ctrl_page->kds_free_list.raw = KDS_PTR_NULL; |
243 | |
244 | if (kdbip) { |
245 | kfree_type(struct kd_bufinfo, kd_ctrl_page->alloc_cpus, kdbip); |
246 | kd_data_page->kdb_info = NULL; |
247 | } |
248 | kd_ctrl_page->kdc_coprocs = NULL; |
249 | kd_ctrl_page->kdebug_cpus = 0; |
250 | kd_ctrl_page->alloc_cpus = 0; |
251 | kd_ctrl_page->kdc_flags &= ~KDBG_BUFINIT; |
252 | } |
253 | |
254 | static bool |
255 | allocate_storage_unit(struct kd_control *kd_ctrl_page, |
256 | struct kd_buffer *kd_data_page, int cpu) |
257 | { |
258 | union kds_ptr kdsp; |
259 | struct kd_storage *kdsp_actual, *kdsp_next_actual; |
260 | struct kd_bufinfo *kdbip, *kdbp, *kdbp_vict, *kdbp_try; |
261 | uint64_t oldest_ts, ts; |
262 | bool retval = true; |
263 | struct kd_region *kd_bufs; |
264 | |
265 | int intrs_en = kdebug_storage_lock(kd_ctrl_page); |
266 | |
267 | kdbp = &kd_data_page->kdb_info[cpu]; |
268 | kd_bufs = kd_data_page->kd_bufs; |
269 | kdbip = kd_data_page->kdb_info; |
270 | |
271 | /* If someone beat us to the allocate, return success */ |
272 | if (kdbp->kd_list_tail.raw != KDS_PTR_NULL) { |
273 | kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail); |
274 | |
275 | if (kdsp_actual->kds_bufindx < kd_ctrl_page->kdebug_events_per_storage_unit) { |
276 | goto out; |
277 | } |
278 | } |
279 | |
280 | if ((kdsp = kd_ctrl_page->kds_free_list).raw != KDS_PTR_NULL) { |
281 | /* |
282 | * If there's a free page, grab it from the free list. |
283 | */ |
284 | kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp); |
285 | kd_ctrl_page->kds_free_list = kdsp_actual->kds_next; |
286 | |
287 | kd_ctrl_page->kdc_storage_used++; |
288 | } else { |
289 | /* |
290 | * Otherwise, we're going to lose events and repurpose the oldest |
291 | * storage unit we can find. |
292 | */ |
293 | if (kd_ctrl_page->kdc_live_flags & KDBG_NOWRAP) { |
294 | kd_ctrl_page->kdc_emit = KDEMIT_DISABLE; |
295 | kd_ctrl_page->kdc_live_flags |= KDBG_WRAPPED; |
296 | kdebug_enable = 0; |
297 | kd_ctrl_page->enabled = 0; |
298 | commpage_update_kdebug_state(); |
299 | kdbp->kd_lostevents = true; |
300 | retval = false; |
301 | goto out; |
302 | } |
303 | kdbp_vict = NULL; |
304 | oldest_ts = UINT64_MAX; |
305 | |
306 | for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_ctrl_page->kdebug_cpus]; kdbp_try++) { |
307 | if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) { |
308 | /* |
309 | * no storage unit to steal |
310 | */ |
311 | continue; |
312 | } |
313 | |
314 | kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_try->kd_list_head); |
315 | |
316 | if (kdsp_actual->kds_bufcnt < kd_ctrl_page->kdebug_events_per_storage_unit) { |
317 | /* |
318 | * make sure we don't steal the storage unit |
319 | * being actively recorded to... need to |
320 | * move on because we don't want an out-of-order |
321 | * set of events showing up later |
322 | */ |
323 | continue; |
324 | } |
325 | |
326 | /* |
327 | * When wrapping, steal the storage unit with the |
328 | * earliest timestamp on its last event, instead of the |
329 | * earliest timestamp on the first event. This allows a |
330 | * storage unit with more recent events to be preserved, |
331 | * even if the storage unit contains events that are |
332 | * older than those found in other CPUs. |
333 | */ |
334 | ts = kdbg_get_timestamp(kp: &kdsp_actual->kds_records[kd_ctrl_page->kdebug_events_per_storage_unit - 1]); |
335 | if (ts < oldest_ts) { |
336 | oldest_ts = ts; |
337 | kdbp_vict = kdbp_try; |
338 | } |
339 | } |
340 | if (kdbp_vict == NULL && kd_ctrl_page->mode == KDEBUG_MODE_TRACE) { |
341 | kd_ctrl_page->kdc_emit = KDEMIT_DISABLE; |
342 | kdebug_enable = 0; |
343 | kd_ctrl_page->enabled = 0; |
344 | commpage_update_kdebug_state(); |
345 | retval = false; |
346 | goto out; |
347 | } |
348 | kdsp = kdbp_vict->kd_list_head; |
349 | kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp); |
350 | kdbp_vict->kd_list_head = kdsp_actual->kds_next; |
351 | |
352 | if (kdbp_vict->kd_list_head.raw != KDS_PTR_NULL) { |
353 | kdsp_next_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdbp_vict->kd_list_head); |
354 | kdsp_next_actual->kds_lostevents = true; |
355 | } else { |
356 | kdbp_vict->kd_lostevents = true; |
357 | } |
358 | |
359 | if (kd_ctrl_page->kdc_oldest_time < oldest_ts) { |
360 | kd_ctrl_page->kdc_oldest_time = oldest_ts; |
361 | } |
362 | kd_ctrl_page->kdc_live_flags |= KDBG_WRAPPED; |
363 | } |
364 | |
365 | if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) { |
366 | kdsp_actual->kds_timestamp = kdebug_timestamp(); |
367 | } else { |
368 | kdsp_actual->kds_timestamp = mach_continuous_time(); |
369 | } |
370 | |
371 | kdsp_actual->kds_next.raw = KDS_PTR_NULL; |
372 | kdsp_actual->kds_bufcnt = 0; |
373 | kdsp_actual->kds_readlast = 0; |
374 | |
375 | kdsp_actual->kds_lostevents = kdbp->kd_lostevents; |
376 | kdbp->kd_lostevents = false; |
377 | kdsp_actual->kds_bufindx = 0; |
378 | |
379 | if (kdbp->kd_list_head.raw == KDS_PTR_NULL) { |
380 | kdbp->kd_list_head = kdsp; |
381 | } else { |
382 | POINTER_FROM_KDS_PTR(kd_bufs, kdbp->kd_list_tail)->kds_next = kdsp; |
383 | } |
384 | kdbp->kd_list_tail = kdsp; |
385 | out: |
386 | kdebug_storage_unlock(kd_ctrl_page, intrs_en); |
387 | |
388 | return retval; |
389 | } |
390 | |
391 | static void |
392 | release_storage_unit(struct kd_control *kd_ctrl_page, struct kd_buffer *kd_data_page, int cpu, uint32_t kdsp_raw) |
393 | { |
394 | struct kd_storage *kdsp_actual; |
395 | struct kd_bufinfo *kdbp; |
396 | union kds_ptr kdsp; |
397 | |
398 | kdbp = &kd_data_page->kdb_info[cpu]; |
399 | |
400 | kdsp.raw = kdsp_raw; |
401 | |
402 | int intrs_en = kdebug_storage_lock(kd_ctrl_page); |
403 | |
404 | if (kdsp.raw == kdbp->kd_list_head.raw) { |
405 | /* |
406 | * it's possible for the storage unit pointed to |
407 | * by kdsp to have already been stolen... so |
408 | * check to see if it's still the head of the list |
409 | * now that we're behind the lock that protects |
410 | * adding and removing from the queue... |
411 | * since we only ever release and steal units from |
412 | * that position, if it's no longer the head |
413 | * we having nothing to do in this context |
414 | */ |
415 | kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kdsp); |
416 | kdbp->kd_list_head = kdsp_actual->kds_next; |
417 | |
418 | kdsp_actual->kds_next = kd_ctrl_page->kds_free_list; |
419 | kd_ctrl_page->kds_free_list = kdsp; |
420 | |
421 | kd_ctrl_page->kdc_storage_used--; |
422 | } |
423 | |
424 | kdebug_storage_unlock(kd_ctrl_page, intrs_en); |
425 | } |
426 | |
427 | bool |
428 | kdebug_disable_wrap(struct kd_control *ctl, |
429 | kdebug_emit_filter_t *old_emit, kdebug_live_flags_t *old_live) |
430 | { |
431 | int intrs_en = kdebug_storage_lock(kd_ctrl_page: ctl); |
432 | |
433 | *old_emit = ctl->kdc_emit; |
434 | *old_live = ctl->kdc_live_flags; |
435 | |
436 | bool wrapped = ctl->kdc_live_flags & KDBG_WRAPPED; |
437 | ctl->kdc_live_flags &= ~KDBG_WRAPPED; |
438 | ctl->kdc_live_flags |= KDBG_NOWRAP; |
439 | |
440 | kdebug_storage_unlock(kd_ctrl_page: ctl, intrs_en); |
441 | |
442 | return wrapped; |
443 | } |
444 | |
445 | static void |
446 | _enable_wrap(struct kd_control *kd_ctrl_page, kdebug_emit_filter_t emit) |
447 | { |
448 | int intrs_en = kdebug_storage_lock(kd_ctrl_page); |
449 | kd_ctrl_page->kdc_live_flags &= ~KDBG_NOWRAP; |
450 | if (emit) { |
451 | kd_ctrl_page->kdc_emit = emit; |
452 | } |
453 | kdebug_storage_unlock(kd_ctrl_page, intrs_en); |
454 | } |
455 | |
456 | __attribute__((always_inline)) |
457 | void |
458 | kernel_debug_write(struct kd_control *kd_ctrl_page, |
459 | struct kd_buffer *kd_data_page, |
460 | struct kd_record kd_rec) |
461 | { |
462 | uint64_t now = 0; |
463 | uint32_t bindx; |
464 | kd_buf *kd; |
465 | int cpu; |
466 | struct kd_bufinfo *kdbp; |
467 | struct kd_storage *kdsp_actual; |
468 | union kds_ptr kds_raw; |
469 | |
470 | disable_preemption(); |
471 | |
472 | if (kd_ctrl_page->enabled == 0) { |
473 | goto out; |
474 | } |
475 | |
476 | if (kd_rec.cpu == -1) { |
477 | cpu = cpu_number(); |
478 | } else { |
479 | cpu = kd_rec.cpu; |
480 | } |
481 | |
482 | kdbp = &kd_data_page->kdb_info[cpu]; |
483 | |
484 | bool timestamp_is_continuous = kdbp->continuous_timestamps; |
485 | |
486 | if (kd_rec.timestamp != -1) { |
487 | if (kdebug_using_continuous_time()) { |
488 | if (!timestamp_is_continuous) { |
489 | kd_rec.timestamp = absolutetime_to_continuoustime(abstime: kd_rec.timestamp); |
490 | } |
491 | } else { |
492 | if (timestamp_is_continuous) { |
493 | kd_rec.timestamp = continuoustime_to_absolutetime(conttime: kd_rec.timestamp); |
494 | } |
495 | } |
496 | kd_rec.timestamp &= KDBG_TIMESTAMP_MASK; |
497 | if (kd_rec.timestamp < kd_ctrl_page->kdc_oldest_time) { |
498 | if (kdbp->latest_past_event_timestamp < kd_rec.timestamp) { |
499 | kdbp->latest_past_event_timestamp = kd_rec.timestamp; |
500 | } |
501 | goto out; |
502 | } |
503 | } |
504 | |
505 | retry_q: |
506 | kds_raw = kdbp->kd_list_tail; |
507 | |
508 | if (kds_raw.raw != KDS_PTR_NULL) { |
509 | kdsp_actual = POINTER_FROM_KDS_PTR(kd_data_page->kd_bufs, kds_raw); |
510 | bindx = kdsp_actual->kds_bufindx; |
511 | } else { |
512 | kdsp_actual = NULL; |
513 | bindx = kd_ctrl_page->kdebug_events_per_storage_unit; |
514 | } |
515 | |
516 | if (kdsp_actual == NULL || bindx >= kd_ctrl_page->kdebug_events_per_storage_unit) { |
517 | if (allocate_storage_unit(kd_ctrl_page, kd_data_page, cpu) == false) { |
518 | /* |
519 | * this can only happen if wrapping |
520 | * has been disabled |
521 | */ |
522 | goto out; |
523 | } |
524 | goto retry_q; |
525 | } |
526 | |
527 | if (kd_rec.timestamp != -1) { |
528 | /* |
529 | * IOP entries can be allocated before xnu allocates and inits the buffer |
530 | * And, Intel uses a special 0 value as a early tracing timestamp sentinel |
531 | * to set the start of trace-time-start-of-interest. |
532 | */ |
533 | if (kd_rec.timestamp < kdsp_actual->kds_timestamp) { |
534 | kdsp_actual->kds_timestamp = kd_rec.timestamp; |
535 | } |
536 | now = kd_rec.timestamp; |
537 | } else { |
538 | if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) { |
539 | now = kdebug_timestamp() & KDBG_TIMESTAMP_MASK; |
540 | } else { |
541 | now = mach_continuous_time() & KDBG_TIMESTAMP_MASK; |
542 | } |
543 | } |
544 | |
545 | if (!OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx)) { |
546 | goto retry_q; |
547 | } |
548 | |
549 | kd = &kdsp_actual->kds_records[bindx]; |
550 | |
551 | if (kd_ctrl_page->kdc_flags & KDBG_DEBUGID_64) { |
552 | /*DebugID has been passed in arg 4*/ |
553 | kd->debugid = 0; |
554 | } else { |
555 | kd->debugid = kd_rec.debugid; |
556 | } |
557 | |
558 | kd->arg1 = kd_rec.arg1; |
559 | kd->arg2 = kd_rec.arg2; |
560 | kd->arg3 = kd_rec.arg3; |
561 | kd->arg4 = kd_rec.arg4; |
562 | kd->arg5 = kd_rec.arg5; |
563 | |
564 | kdbg_set_timestamp_and_cpu(kp: kd, thetime: now, cpu); |
565 | |
566 | OSAddAtomic(1, &kdsp_actual->kds_bufcnt); |
567 | |
568 | out: |
569 | enable_preemption(); |
570 | } |
571 | |
572 | // Read events from kdebug storage units into a user space buffer or file. |
573 | // |
574 | // This code runs while events are emitted -- storage unit allocation and |
575 | // deallocation wll synchronize with the emitters. Only one reader per control |
576 | // structure is allowed. |
577 | int |
578 | kernel_debug_read(struct kd_control *kd_ctrl_page, |
579 | struct kd_buffer *kd_data_page, user_addr_t buffer, size_t *number, |
580 | vnode_t vp, vfs_context_t ctx, uint32_t file_version) |
581 | { |
582 | size_t count; |
583 | unsigned int cpu, min_cpu; |
584 | uint64_t barrier_min = 0, barrier_max = 0, t, earliest_time; |
585 | int error = 0; |
586 | kd_buf *tempbuf; |
587 | uint32_t rcursor; |
588 | kd_buf lostevent; |
589 | union kds_ptr kdsp; |
590 | bool traced_retrograde = false; |
591 | struct kd_storage *kdsp_actual; |
592 | struct kd_bufinfo *kdbp; |
593 | struct kd_bufinfo *min_kdbp; |
594 | size_t tempbuf_count; |
595 | uint32_t tempbuf_number; |
596 | kdebug_emit_filter_t old_emit; |
597 | uint32_t old_live_flags; |
598 | bool out_of_events = false; |
599 | bool wrapped = false; |
600 | bool set_preempt = true; |
601 | bool should_disable = false; |
602 | |
603 | struct kd_bufinfo *kdbip = kd_data_page->kdb_info; |
604 | struct kd_region *kd_bufs = kd_data_page->kd_bufs; |
605 | |
606 | assert(number != NULL); |
607 | count = *number / sizeof(kd_buf); |
608 | *number = 0; |
609 | |
610 | if (count == 0 || !(kd_ctrl_page->kdc_flags & KDBG_BUFINIT) || kd_data_page->kdcopybuf == 0) { |
611 | return EINVAL; |
612 | } |
613 | |
614 | if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) { |
615 | /* |
616 | * A corpse can be created due to 'TASK_HAS_TOO_MANY_THREADS' |
617 | * and that can be handled by a callout thread that already |
618 | * has the eager-preemption set. |
619 | * So check to see if we are dealing with one such thread. |
620 | */ |
621 | set_preempt = !(thread_is_eager_preempt(thread: current_thread())); |
622 | } |
623 | |
624 | if (set_preempt) { |
625 | thread_set_eager_preempt(thread: current_thread()); |
626 | } |
627 | |
628 | memset(s: &lostevent, c: 0, n: sizeof(lostevent)); |
629 | lostevent.debugid = TRACE_LOST_EVENTS; |
630 | |
631 | /* |
632 | * Capture the current time. Only sort events that have occured |
633 | * before now. Since the IOPs are being flushed here, it is possible |
634 | * that events occur on the AP while running live tracing. |
635 | */ |
636 | if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) { |
637 | barrier_max = kdebug_timestamp() & KDBG_TIMESTAMP_MASK; |
638 | } else { |
639 | barrier_max = mach_continuous_time() & KDBG_TIMESTAMP_MASK; |
640 | } |
641 | |
642 | /* |
643 | * Disable wrap so storage units cannot be stolen out from underneath us |
644 | * while merging events. |
645 | * |
646 | * Because we hold ktrace_lock, no other control threads can be playing |
647 | * with kdc_flags. The code that emits new events could be running, |
648 | * but it grabs kdc_storage_lock if it needs to acquire a new storage |
649 | * chunk, which is where it examines kdc_flags. If it is adding to |
650 | * the same chunk we're reading from, check for that below. |
651 | */ |
652 | wrapped = kdebug_disable_wrap(ctl: kd_ctrl_page, old_emit: &old_emit, old_live: &old_live_flags); |
653 | |
654 | if (count > kd_data_page->kdb_event_count) { |
655 | count = kd_data_page->kdb_event_count; |
656 | } |
657 | |
658 | if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) { |
659 | tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count; |
660 | } |
661 | |
662 | /* |
663 | * If the buffers have wrapped, do not emit additional lost events for the |
664 | * oldest storage units. |
665 | */ |
666 | if (wrapped) { |
667 | kd_ctrl_page->kdc_live_flags &= ~KDBG_WRAPPED; |
668 | |
669 | for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) { |
670 | if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) { |
671 | continue; |
672 | } |
673 | kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp); |
674 | kdsp_actual->kds_lostevents = false; |
675 | } |
676 | } |
677 | |
678 | if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) { |
679 | /* |
680 | * In TRIAGE mode we want to extract all the current |
681 | * records regardless of where we stopped reading last |
682 | * time so that we have the best shot at getting older |
683 | * records for threads before the buffers are wrapped. |
684 | * So set:- |
685 | * a) kd_prev_timebase to 0 so we (re-)consider older records |
686 | * b) readlast to 0 to initiate the search from the |
687 | * 1st record. |
688 | */ |
689 | for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) { |
690 | kdbp->kd_prev_timebase = 0; |
691 | if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) { |
692 | continue; |
693 | } |
694 | kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp); |
695 | kdsp_actual->kds_readlast = 0; |
696 | } |
697 | } |
698 | |
699 | /* |
700 | * Capture the earliest time where there are events for all CPUs and don't |
701 | * emit events with timestamps prior. |
702 | */ |
703 | barrier_min = kd_ctrl_page->kdc_oldest_time; |
704 | |
705 | while (count) { |
706 | tempbuf = kd_data_page->kdcopybuf; |
707 | tempbuf_number = 0; |
708 | |
709 | if (wrapped) { |
710 | /* |
711 | * Emit a lost events tracepoint to indicate that previous events |
712 | * were lost -- the thread map cannot be trusted. A new one must |
713 | * be taken so tools can analyze the trace in a backwards-facing |
714 | * fashion. |
715 | */ |
716 | kdbg_set_timestamp_and_cpu(kp: &lostevent, thetime: barrier_min, cpu: 0); |
717 | *tempbuf = lostevent; |
718 | wrapped = false; |
719 | goto nextevent; |
720 | } |
721 | |
722 | /* While space left in merged events scratch buffer. */ |
723 | while (tempbuf_count) { |
724 | bool lostevents = false; |
725 | int lostcpu = 0; |
726 | earliest_time = UINT64_MAX; |
727 | min_kdbp = NULL; |
728 | min_cpu = 0; |
729 | |
730 | /* Check each CPU's buffers for the earliest event. */ |
731 | for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page->kdebug_cpus; cpu++, kdbp++) { |
732 | /* Skip CPUs without data in their oldest storage unit. */ |
733 | if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) { |
734 | next_cpu: |
735 | continue; |
736 | } |
737 | /* From CPU data to buffer header to buffer. */ |
738 | kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp); |
739 | |
740 | next_event: |
741 | /* The next event to be read from this buffer. */ |
742 | rcursor = kdsp_actual->kds_readlast; |
743 | |
744 | /* Skip this buffer if there are no events left. */ |
745 | if (rcursor == kdsp_actual->kds_bufindx) { |
746 | continue; |
747 | } |
748 | |
749 | if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) { |
750 | /* |
751 | * TRIAGE mode record keeping doesn't (currently) |
752 | * use lostevent markers. It also doesn't want to |
753 | * call release_storage_unit() in this read call. |
754 | * It expects the buffers to wrap and records reclaimed |
755 | * in that way solely. |
756 | */ |
757 | t = kdbg_get_timestamp(kp: &kdsp_actual->kds_records[rcursor]); |
758 | goto skip_record_checks; |
759 | } |
760 | |
761 | /* |
762 | * Check that this storage unit wasn't stolen and events were |
763 | * lost. This must have happened while wrapping was disabled |
764 | * in this function. |
765 | */ |
766 | if (kdsp_actual->kds_lostevents) { |
767 | lostevents = true; |
768 | kdsp_actual->kds_lostevents = false; |
769 | |
770 | /* |
771 | * The earliest event we can trust is the first one in this |
772 | * stolen storage unit. |
773 | */ |
774 | uint64_t lost_time = |
775 | kdbg_get_timestamp(kp: &kdsp_actual->kds_records[0]); |
776 | if (kd_ctrl_page->kdc_oldest_time < lost_time) { |
777 | /* |
778 | * If this is the first time we've seen lost events for |
779 | * this gap, record its timestamp as the oldest |
780 | * timestamp we're willing to merge for the lost events |
781 | * tracepoint. |
782 | */ |
783 | kd_ctrl_page->kdc_oldest_time = barrier_min = lost_time; |
784 | lostcpu = cpu; |
785 | } |
786 | } |
787 | |
788 | t = kdbg_get_timestamp(kp: &kdsp_actual->kds_records[rcursor]); |
789 | |
790 | if (t > barrier_max) { |
791 | goto next_cpu; |
792 | } |
793 | if (t < kdsp_actual->kds_timestamp) { |
794 | /* |
795 | * This indicates the event emitter hasn't completed |
796 | * filling in the event (becuase we're looking at the |
797 | * buffer that the record head is using). The max barrier |
798 | * timestamp should have saved us from seeing these kinds |
799 | * of things, but other CPUs might be slow on the up-take. |
800 | * |
801 | * Bail out so we don't get out-of-order events by |
802 | * continuing to read events from other CPUs' events. |
803 | */ |
804 | out_of_events = true; |
805 | break; |
806 | } |
807 | |
808 | /* |
809 | * Ignore events that have aged out due to wrapping or storage |
810 | * unit exhaustion while merging events. |
811 | */ |
812 | if (t < barrier_min) { |
813 | kdsp_actual->kds_readlast++; |
814 | if (kdsp_actual->kds_readlast >= kd_ctrl_page->kdebug_events_per_storage_unit) { |
815 | release_storage_unit(kd_ctrl_page, kd_data_page, cpu, kdsp_raw: kdsp.raw); |
816 | |
817 | if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) { |
818 | goto next_cpu; |
819 | } |
820 | kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp); |
821 | } |
822 | goto next_event; |
823 | } |
824 | |
825 | /* |
826 | * Don't worry about merging any events -- just walk through |
827 | * the CPUs and find the latest timestamp of lost events. |
828 | */ |
829 | if (lostevents) { |
830 | continue; |
831 | } |
832 | skip_record_checks: |
833 | if (t < earliest_time) { |
834 | earliest_time = t; |
835 | min_kdbp = kdbp; |
836 | min_cpu = cpu; |
837 | } |
838 | } |
839 | if (lostevents) { |
840 | /* |
841 | * If any lost events were hit in the buffers, emit an event |
842 | * with the latest timestamp. |
843 | */ |
844 | kdbg_set_timestamp_and_cpu(kp: &lostevent, thetime: barrier_min, cpu: lostcpu); |
845 | *tempbuf = lostevent; |
846 | tempbuf->arg1 = 1; |
847 | goto nextevent; |
848 | } |
849 | if (min_kdbp == NULL) { |
850 | /* All buffers ran empty. */ |
851 | out_of_events = true; |
852 | } |
853 | if (out_of_events) { |
854 | break; |
855 | } |
856 | |
857 | kdsp = min_kdbp->kd_list_head; |
858 | kdsp_actual = POINTER_FROM_KDS_PTR(kd_bufs, kdsp); |
859 | |
860 | if (min_kdbp->latest_past_event_timestamp != 0) { |
861 | if (kdbg_debug) { |
862 | printf("kdebug: PAST EVENT: debugid %#8x: " |
863 | "time %lld from CPU %u " |
864 | "(barrier at time %lld)\n" , |
865 | kdsp_actual->kds_records[rcursor].debugid, |
866 | t, cpu, barrier_min); |
867 | } |
868 | |
869 | kdbg_set_timestamp_and_cpu(kp: tempbuf, thetime: earliest_time, cpu: min_cpu); |
870 | tempbuf->arg1 = (kd_buf_argtype)min_kdbp->latest_past_event_timestamp; |
871 | tempbuf->arg2 = 0; |
872 | tempbuf->arg3 = 0; |
873 | tempbuf->arg4 = 0; |
874 | tempbuf->debugid = TRACE_PAST_EVENTS; |
875 | min_kdbp->latest_past_event_timestamp = 0; |
876 | goto nextevent; |
877 | } |
878 | |
879 | /* Copy earliest event into merged events scratch buffer. */ |
880 | *tempbuf = kdsp_actual->kds_records[kdsp_actual->kds_readlast++]; |
881 | kd_buf *earliest_event = tempbuf; |
882 | if (kd_control_trace.kdc_flags & KDBG_MATCH_DISABLE) { |
883 | kd_event_matcher *match = &kd_control_trace.disable_event_match; |
884 | kd_event_matcher *mask = &kd_control_trace.disable_event_mask; |
885 | if ((earliest_event->debugid & mask->kem_debugid) == match->kem_debugid && |
886 | (earliest_event->arg1 & mask->kem_args[0]) == match->kem_args[0] && |
887 | (earliest_event->arg2 & mask->kem_args[1]) == match->kem_args[1] && |
888 | (earliest_event->arg3 & mask->kem_args[2]) == match->kem_args[2] && |
889 | (earliest_event->arg4 & mask->kem_args[3]) == match->kem_args[3]) { |
890 | should_disable = true; |
891 | } |
892 | } |
893 | |
894 | if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) { |
895 | if (kdsp_actual->kds_readlast == kd_ctrl_page->kdebug_events_per_storage_unit) { |
896 | release_storage_unit(kd_ctrl_page, kd_data_page, cpu: min_cpu, kdsp_raw: kdsp.raw); |
897 | } |
898 | } |
899 | |
900 | /* |
901 | * Watch for out of order timestamps (from IOPs). |
902 | */ |
903 | if (earliest_time < min_kdbp->kd_prev_timebase) { |
904 | /* |
905 | * If we haven't already, emit a retrograde events event. |
906 | * Otherwise, ignore this event. |
907 | */ |
908 | if (traced_retrograde) { |
909 | continue; |
910 | } |
911 | if (kdbg_debug) { |
912 | printf("kdebug: RETRO EVENT: debugid %#8x: " |
913 | "time %lld from CPU %u " |
914 | "(barrier at time %lld)\n" , |
915 | kdsp_actual->kds_records[rcursor].debugid, |
916 | t, cpu, barrier_min); |
917 | } |
918 | |
919 | kdbg_set_timestamp_and_cpu(kp: tempbuf, thetime: min_kdbp->kd_prev_timebase, |
920 | cpu: kdbg_get_cpu(kp: tempbuf)); |
921 | tempbuf->arg1 = tempbuf->debugid; |
922 | tempbuf->arg2 = (kd_buf_argtype)earliest_time; |
923 | tempbuf->arg3 = 0; |
924 | tempbuf->arg4 = 0; |
925 | tempbuf->debugid = TRACE_RETROGRADE_EVENTS; |
926 | traced_retrograde = true; |
927 | } else { |
928 | min_kdbp->kd_prev_timebase = earliest_time; |
929 | } |
930 | nextevent: |
931 | tempbuf_count--; |
932 | tempbuf_number++; |
933 | tempbuf++; |
934 | |
935 | if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE && |
936 | (RAW_file_written += sizeof(kd_buf)) >= RAW_FLUSH_SIZE) { |
937 | break; |
938 | } |
939 | } |
940 | |
941 | if (tempbuf_number) { |
942 | /* |
943 | * Remember the latest timestamp of events that we've merged so we |
944 | * don't think we've lost events later. |
945 | */ |
946 | uint64_t latest_time = kdbg_get_timestamp(kp: tempbuf - 1); |
947 | if (kd_ctrl_page->kdc_oldest_time < latest_time) { |
948 | kd_ctrl_page->kdc_oldest_time = latest_time; |
949 | } |
950 | |
951 | if (kd_ctrl_page->mode == KDEBUG_MODE_TRACE) { |
952 | extern int kernel_debug_trace_write_to_file(user_addr_t *buffer, |
953 | size_t *number, size_t *count, size_t tempbuf_number, |
954 | vnode_t vp, vfs_context_t ctx, uint32_t file_version); |
955 | error = kernel_debug_trace_write_to_file(buffer: &buffer, number, |
956 | count: &count, tempbuf_number, vp, ctx, file_version); |
957 | } else if (kd_ctrl_page->mode == KDEBUG_MODE_TRIAGE) { |
958 | memcpy(dst: (void*)buffer, src: kd_data_page->kdcopybuf, |
959 | n: tempbuf_number * sizeof(kd_buf)); |
960 | buffer += tempbuf_number * sizeof(kd_buf); |
961 | } else { |
962 | panic("kdebug: invalid kdebug mode %d" , kd_ctrl_page->mode); |
963 | } |
964 | if (error) { |
965 | *number = 0; |
966 | error = EINVAL; |
967 | break; |
968 | } |
969 | count -= tempbuf_number; |
970 | *number += tempbuf_number; |
971 | } |
972 | if (out_of_events) { |
973 | break; |
974 | } |
975 | |
976 | if ((tempbuf_count = count) > kd_ctrl_page->kdebug_kdcopybuf_count) { |
977 | tempbuf_count = kd_ctrl_page->kdebug_kdcopybuf_count; |
978 | } |
979 | } |
980 | if ((old_live_flags & KDBG_NOWRAP) == 0) { |
981 | _enable_wrap(kd_ctrl_page, emit: old_emit); |
982 | } |
983 | |
984 | if (set_preempt) { |
985 | thread_clear_eager_preempt(thread: current_thread()); |
986 | } |
987 | |
988 | if (should_disable) { |
989 | kernel_debug_disable(); |
990 | } |
991 | |
992 | return error; |
993 | } |
994 | |