1 | /* |
2 | * Copyright (c) 2011-2016 Apple Computer, Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | /* all thread states code */ |
30 | #include <mach/mach_types.h> |
31 | #include <sys/errno.h> |
32 | |
33 | #include <kperf/kperf.h> |
34 | #include <kperf/buffer.h> |
35 | #include <kperf/sample.h> |
36 | #include <kperf/context.h> |
37 | #include <kperf/action.h> |
38 | #include <kperf/pet.h> |
39 | #include <kperf/kperf_timer.h> |
40 | |
41 | #include <kern/task.h> |
42 | #include <kern/kalloc.h> |
43 | |
44 | /* action ID to call for each sample |
45 | * |
46 | * Address is used as the sync point for waiting. |
47 | */ |
48 | static unsigned int pet_action_id = 0; |
49 | |
50 | static lck_mtx_t *pet_lock; |
51 | static boolean_t pet_initted = FALSE; |
52 | static boolean_t pet_running = FALSE; |
53 | |
54 | /* number of callstack samples to skip for idle threads */ |
55 | static uint32_t pet_idle_rate = KPERF_PET_DEFAULT_IDLE_RATE; |
56 | |
57 | /* |
58 | * Lightweight PET mode samples the system less-intrusively than normal PET |
59 | * mode. Instead of iterating tasks and threads on each sample, it increments |
60 | * a global generation count, kperf_pet_gen, which is checked as threads are |
61 | * context switched on-core. If the thread's local generation count is older |
62 | * than the global generation, the thread samples itself. |
63 | * |
64 | * | | |
65 | * thread A +--+---------| |
66 | * | | |
67 | * thread B |--+---------------| |
68 | * | | |
69 | * thread C | | |------------------------------------- |
70 | * | | | |
71 | * thread D | | | |------------------------------- |
72 | * | | | | |
73 | * +--+---------+-----+--------------------------------> time |
74 | * | │ | |
75 | * | +-----+--- threads sampled when they come on-core in |
76 | * | kperf_pet_switch_context |
77 | * | |
78 | * +--- PET timer fire, sample on-core threads A and B, |
79 | * increment kperf_pet_gen |
80 | */ |
81 | static boolean_t lightweight_pet = FALSE; |
82 | |
83 | /* |
84 | * Whether or not lightweight PET and sampling is active. |
85 | */ |
86 | boolean_t kperf_lightweight_pet_active = FALSE; |
87 | |
88 | uint32_t kperf_pet_gen = 0; |
89 | |
90 | static struct kperf_sample *pet_sample; |
91 | |
92 | /* thread lifecycle */ |
93 | |
94 | static kern_return_t pet_init(void); |
95 | static void pet_start(void); |
96 | static void pet_stop(void); |
97 | |
98 | /* PET thread-only */ |
99 | |
100 | static void pet_thread_loop(void *param, wait_result_t wr); |
101 | static void pet_thread_idle(void); |
102 | static void pet_thread_work_unit(void); |
103 | |
104 | /* listing things to sample */ |
105 | |
106 | static task_array_t pet_tasks = NULL; |
107 | static vm_size_t pet_tasks_size = 0; |
108 | static vm_size_t pet_tasks_count = 0; |
109 | |
110 | static thread_array_t pet_threads = NULL; |
111 | static vm_size_t pet_threads_size = 0; |
112 | static vm_size_t pet_threads_count = 0; |
113 | |
114 | static kern_return_t pet_tasks_prepare(void); |
115 | static kern_return_t pet_tasks_prepare_internal(void); |
116 | |
117 | static kern_return_t pet_threads_prepare(task_t task); |
118 | |
119 | /* sampling */ |
120 | |
121 | static void pet_sample_all_tasks(uint32_t idle_rate); |
122 | static void pet_sample_task(task_t task, uint32_t idle_rate); |
123 | static void pet_sample_thread(int pid, task_t task, thread_t thread, |
124 | uint32_t idle_rate); |
125 | |
126 | /* functions called by other areas of kperf */ |
127 | |
128 | void |
129 | kperf_pet_fire_before(void) |
130 | { |
131 | if (!pet_initted || !pet_running) { |
132 | return; |
133 | } |
134 | |
135 | if (lightweight_pet) { |
136 | BUF_INFO(PERF_PET_SAMPLE); |
137 | OSIncrementAtomic(&kperf_pet_gen); |
138 | } |
139 | } |
140 | |
141 | void |
142 | kperf_pet_fire_after(void) |
143 | { |
144 | if (!pet_initted || !pet_running) { |
145 | return; |
146 | } |
147 | |
148 | if (lightweight_pet) { |
149 | kperf_timer_pet_rearm(0); |
150 | } else { |
151 | thread_wakeup(&pet_action_id); |
152 | } |
153 | } |
154 | |
155 | void |
156 | kperf_pet_on_cpu(thread_t thread, thread_continue_t continuation, |
157 | uintptr_t *starting_fp) |
158 | { |
159 | assert(thread != NULL); |
160 | assert(ml_get_interrupts_enabled() == FALSE); |
161 | |
162 | if (thread->kperf_pet_gen != kperf_pet_gen) { |
163 | BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_START, kperf_pet_gen, thread->kperf_pet_gen); |
164 | |
165 | task_t task = get_threadtask(thread); |
166 | struct kperf_context ctx = { |
167 | .cur_thread = thread, |
168 | .cur_task = task, |
169 | .cur_pid = task_pid(task), |
170 | .starting_fp = starting_fp, |
171 | }; |
172 | /* |
173 | * Use a per-CPU interrupt buffer, since this is only called |
174 | * while interrupts are disabled, from the scheduler. |
175 | */ |
176 | struct kperf_sample *sample = kperf_intr_sample_buffer(); |
177 | if (!sample) { |
178 | BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END, 1); |
179 | return; |
180 | } |
181 | |
182 | unsigned int flags = SAMPLE_FLAG_NON_INTERRUPT | SAMPLE_FLAG_PEND_USER; |
183 | if (continuation != NULL) { |
184 | flags |= SAMPLE_FLAG_CONTINUATION; |
185 | } |
186 | kperf_sample(sample, &ctx, pet_action_id, flags); |
187 | |
188 | BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END); |
189 | } else { |
190 | BUF_VERB(PERF_PET_SAMPLE_THREAD, kperf_pet_gen, thread->kperf_pet_gen); |
191 | } |
192 | } |
193 | |
194 | void |
195 | kperf_pet_config(unsigned int action_id) |
196 | { |
197 | kern_return_t kr = pet_init(); |
198 | if (kr != KERN_SUCCESS) { |
199 | return; |
200 | } |
201 | |
202 | lck_mtx_lock(pet_lock); |
203 | |
204 | BUF_INFO(PERF_PET_THREAD, 3, action_id); |
205 | |
206 | if (action_id == 0) { |
207 | pet_stop(); |
208 | } else { |
209 | pet_start(); |
210 | } |
211 | |
212 | pet_action_id = action_id; |
213 | |
214 | lck_mtx_unlock(pet_lock); |
215 | } |
216 | |
217 | /* handle resource allocation */ |
218 | |
219 | void |
220 | pet_start(void) |
221 | { |
222 | lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED); |
223 | |
224 | if (pet_running) { |
225 | return; |
226 | } |
227 | |
228 | pet_sample = kalloc(sizeof(struct kperf_sample)); |
229 | if (!pet_sample) { |
230 | return; |
231 | } |
232 | |
233 | pet_running = TRUE; |
234 | } |
235 | |
236 | void |
237 | pet_stop(void) |
238 | { |
239 | lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED); |
240 | |
241 | if (!pet_initted) { |
242 | return; |
243 | } |
244 | |
245 | if (pet_tasks != NULL) { |
246 | assert(pet_tasks_size != 0); |
247 | kfree(pet_tasks, pet_tasks_size); |
248 | |
249 | pet_tasks = NULL; |
250 | pet_tasks_size = 0; |
251 | pet_tasks_count = 0; |
252 | } |
253 | |
254 | if (pet_threads != NULL) { |
255 | assert(pet_threads_size != 0); |
256 | kfree(pet_threads, pet_threads_size); |
257 | |
258 | pet_threads = NULL; |
259 | pet_threads_size = 0; |
260 | pet_threads_count = 0; |
261 | } |
262 | |
263 | if (pet_sample != NULL) { |
264 | kfree(pet_sample, sizeof(struct kperf_sample)); |
265 | pet_sample = NULL; |
266 | } |
267 | |
268 | pet_running = FALSE; |
269 | } |
270 | |
271 | /* |
272 | * Lazily initialize PET. The PET thread never exits once PET has been used |
273 | * once. |
274 | */ |
275 | static kern_return_t |
276 | pet_init(void) |
277 | { |
278 | if (pet_initted) { |
279 | return KERN_SUCCESS; |
280 | } |
281 | |
282 | /* make the sync point */ |
283 | pet_lock = lck_mtx_alloc_init(&kperf_lck_grp, NULL); |
284 | assert(pet_lock); |
285 | |
286 | /* create the thread */ |
287 | |
288 | BUF_INFO(PERF_PET_THREAD, 0); |
289 | thread_t t; |
290 | kern_return_t kr = kernel_thread_start(pet_thread_loop, NULL, &t); |
291 | if (kr != KERN_SUCCESS) { |
292 | lck_mtx_free(pet_lock, &kperf_lck_grp); |
293 | return kr; |
294 | } |
295 | |
296 | thread_set_thread_name(t, "kperf sampling" ); |
297 | /* let the thread hold the only reference */ |
298 | thread_deallocate(t); |
299 | |
300 | pet_initted = TRUE; |
301 | |
302 | return KERN_SUCCESS; |
303 | } |
304 | |
305 | /* called by PET thread only */ |
306 | |
307 | static void |
308 | pet_thread_work_unit(void) |
309 | { |
310 | pet_sample_all_tasks(pet_idle_rate); |
311 | } |
312 | |
313 | static void |
314 | pet_thread_idle(void) |
315 | { |
316 | lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED); |
317 | |
318 | (void)lck_mtx_sleep(pet_lock, LCK_SLEEP_DEFAULT, &pet_action_id, |
319 | THREAD_UNINT); |
320 | } |
321 | |
322 | __attribute__((noreturn)) |
323 | static void |
324 | pet_thread_loop(void *param, wait_result_t wr) |
325 | { |
326 | #pragma unused(param, wr) |
327 | uint64_t work_unit_ticks; |
328 | |
329 | BUF_INFO(PERF_PET_THREAD, 1); |
330 | |
331 | lck_mtx_lock(pet_lock); |
332 | for (;;) { |
333 | BUF_INFO(PERF_PET_IDLE); |
334 | pet_thread_idle(); |
335 | |
336 | BUF_INFO(PERF_PET_RUN); |
337 | |
338 | /* measure how long the work unit takes */ |
339 | work_unit_ticks = mach_absolute_time(); |
340 | pet_thread_work_unit(); |
341 | work_unit_ticks = mach_absolute_time() - work_unit_ticks; |
342 | |
343 | /* re-program the timer */ |
344 | kperf_timer_pet_rearm(work_unit_ticks); |
345 | } |
346 | } |
347 | |
348 | /* sampling */ |
349 | |
350 | static void |
351 | pet_sample_thread(int pid, task_t task, thread_t thread, uint32_t idle_rate) |
352 | { |
353 | lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED); |
354 | |
355 | uint32_t sample_flags = SAMPLE_FLAG_IDLE_THREADS | SAMPLE_FLAG_THREAD_ONLY; |
356 | |
357 | BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_START); |
358 | |
359 | /* work out the context */ |
360 | struct kperf_context ctx = { |
361 | .cur_thread = thread, |
362 | .cur_task = task, |
363 | .cur_pid = pid, |
364 | }; |
365 | |
366 | boolean_t thread_dirty = kperf_thread_get_dirty(thread); |
367 | |
368 | /* |
369 | * Clean a dirty thread and skip callstack sample if the thread was not |
370 | * dirty and thread has skipped less than pet_idle_rate samples. |
371 | */ |
372 | if (thread_dirty) { |
373 | kperf_thread_set_dirty(thread, FALSE); |
374 | } else if ((thread->kperf_pet_cnt % idle_rate) != 0) { |
375 | sample_flags |= SAMPLE_FLAG_EMPTY_CALLSTACK; |
376 | } |
377 | thread->kperf_pet_cnt++; |
378 | |
379 | kperf_sample(pet_sample, &ctx, pet_action_id, sample_flags); |
380 | |
381 | BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END); |
382 | } |
383 | |
384 | static kern_return_t |
385 | pet_threads_prepare(task_t task) |
386 | { |
387 | lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED); |
388 | |
389 | vm_size_t threads_size_needed; |
390 | |
391 | if (task == TASK_NULL) { |
392 | return KERN_INVALID_ARGUMENT; |
393 | } |
394 | |
395 | for (;;) { |
396 | task_lock(task); |
397 | |
398 | if (!task->active) { |
399 | task_unlock(task); |
400 | |
401 | return KERN_FAILURE; |
402 | } |
403 | |
404 | /* do we have the memory we need? */ |
405 | threads_size_needed = task->thread_count * sizeof(thread_t); |
406 | if (threads_size_needed <= pet_threads_size) { |
407 | break; |
408 | } |
409 | |
410 | /* not enough memory, unlock the task and increase allocation */ |
411 | task_unlock(task); |
412 | |
413 | if (pet_threads_size != 0) { |
414 | kfree(pet_threads, pet_threads_size); |
415 | } |
416 | |
417 | assert(threads_size_needed > 0); |
418 | pet_threads_size = threads_size_needed; |
419 | |
420 | pet_threads = kalloc(pet_threads_size); |
421 | if (pet_threads == NULL) { |
422 | pet_threads_size = 0; |
423 | return KERN_RESOURCE_SHORTAGE; |
424 | } |
425 | } |
426 | |
427 | /* have memory and the task is locked and active */ |
428 | thread_t thread; |
429 | pet_threads_count = 0; |
430 | queue_iterate(&(task->threads), thread, thread_t, task_threads) { |
431 | thread_reference_internal(thread); |
432 | pet_threads[pet_threads_count++] = thread; |
433 | } |
434 | |
435 | /* can unlock task now that threads are referenced */ |
436 | task_unlock(task); |
437 | |
438 | return (pet_threads_count == 0) ? KERN_FAILURE : KERN_SUCCESS; |
439 | } |
440 | |
441 | static void |
442 | pet_sample_task(task_t task, uint32_t idle_rate) |
443 | { |
444 | lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED); |
445 | |
446 | BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_START); |
447 | |
448 | int pid = task_pid(task); |
449 | if (kperf_action_has_task(pet_action_id)) { |
450 | struct kperf_context ctx = { |
451 | .cur_task = task, |
452 | .cur_pid = pid, |
453 | }; |
454 | |
455 | kperf_sample(pet_sample, &ctx, pet_action_id, SAMPLE_FLAG_TASK_ONLY); |
456 | } |
457 | |
458 | if (!kperf_action_has_thread(pet_action_id)) { |
459 | BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END); |
460 | return; |
461 | } |
462 | |
463 | kern_return_t kr = KERN_SUCCESS; |
464 | |
465 | /* |
466 | * Suspend the task to see an atomic snapshot of all its threads. This |
467 | * is expensive, and disruptive. |
468 | */ |
469 | bool needs_suspend = task != kernel_task; |
470 | if (needs_suspend) { |
471 | kr = task_suspend_internal(task); |
472 | if (kr != KERN_SUCCESS) { |
473 | BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END, 1); |
474 | return; |
475 | } |
476 | needs_suspend = true; |
477 | } |
478 | |
479 | kr = pet_threads_prepare(task); |
480 | if (kr != KERN_SUCCESS) { |
481 | BUF_INFO(PERF_PET_ERROR, ERR_THREAD, kr); |
482 | goto out; |
483 | } |
484 | |
485 | for (unsigned int i = 0; i < pet_threads_count; i++) { |
486 | thread_t thread = pet_threads[i]; |
487 | assert(thread != THREAD_NULL); |
488 | |
489 | /* |
490 | * Do not sample the thread if it was on a CPU when the timer fired. |
491 | */ |
492 | int cpu = 0; |
493 | for (cpu = 0; cpu < machine_info.logical_cpu_max; cpu++) { |
494 | if (kperf_tid_on_cpus[cpu] == thread_tid(thread)) { |
495 | break; |
496 | } |
497 | } |
498 | |
499 | /* the thread was not on a CPU */ |
500 | if (cpu == machine_info.logical_cpu_max) { |
501 | pet_sample_thread(pid, task, thread, idle_rate); |
502 | } |
503 | |
504 | thread_deallocate(pet_threads[i]); |
505 | } |
506 | |
507 | out: |
508 | if (needs_suspend) { |
509 | task_resume_internal(task); |
510 | } |
511 | |
512 | BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END, pet_threads_count); |
513 | } |
514 | |
515 | static kern_return_t |
516 | pet_tasks_prepare_internal(void) |
517 | { |
518 | lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED); |
519 | |
520 | vm_size_t tasks_size_needed = 0; |
521 | |
522 | for (;;) { |
523 | lck_mtx_lock(&tasks_threads_lock); |
524 | |
525 | /* do we have the memory we need? */ |
526 | tasks_size_needed = tasks_count * sizeof(task_t); |
527 | if (tasks_size_needed <= pet_tasks_size) { |
528 | break; |
529 | } |
530 | |
531 | /* unlock and allocate more memory */ |
532 | lck_mtx_unlock(&tasks_threads_lock); |
533 | |
534 | /* grow task array */ |
535 | if (tasks_size_needed > pet_tasks_size) { |
536 | if (pet_tasks_size != 0) { |
537 | kfree(pet_tasks, pet_tasks_size); |
538 | } |
539 | |
540 | assert(tasks_size_needed > 0); |
541 | pet_tasks_size = tasks_size_needed; |
542 | |
543 | pet_tasks = (task_array_t)kalloc(pet_tasks_size); |
544 | if (pet_tasks == NULL) { |
545 | pet_tasks_size = 0; |
546 | return KERN_RESOURCE_SHORTAGE; |
547 | } |
548 | } |
549 | } |
550 | |
551 | return KERN_SUCCESS; |
552 | } |
553 | |
554 | static kern_return_t |
555 | pet_tasks_prepare(void) |
556 | { |
557 | lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED); |
558 | |
559 | /* allocate space and take the tasks_threads_lock */ |
560 | kern_return_t kr = pet_tasks_prepare_internal(); |
561 | if (KERN_SUCCESS != kr) { |
562 | return kr; |
563 | } |
564 | lck_mtx_assert(&tasks_threads_lock, LCK_MTX_ASSERT_OWNED); |
565 | |
566 | /* make sure the tasks are not deallocated after dropping the lock */ |
567 | task_t task; |
568 | pet_tasks_count = 0; |
569 | queue_iterate(&tasks, task, task_t, tasks) { |
570 | if (task != kernel_task) { |
571 | task_reference_internal(task); |
572 | pet_tasks[pet_tasks_count++] = task; |
573 | } |
574 | } |
575 | |
576 | lck_mtx_unlock(&tasks_threads_lock); |
577 | |
578 | return KERN_SUCCESS; |
579 | } |
580 | |
581 | static void |
582 | pet_sample_all_tasks(uint32_t idle_rate) |
583 | { |
584 | lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED); |
585 | |
586 | BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_START); |
587 | |
588 | kern_return_t kr = pet_tasks_prepare(); |
589 | if (kr != KERN_SUCCESS) { |
590 | BUF_INFO(PERF_PET_ERROR, ERR_TASK, kr); |
591 | BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_END, 0); |
592 | return; |
593 | } |
594 | |
595 | for (unsigned int i = 0; i < pet_tasks_count; i++) { |
596 | task_t task = pet_tasks[i]; |
597 | |
598 | pet_sample_task(task, idle_rate); |
599 | } |
600 | |
601 | for(unsigned int i = 0; i < pet_tasks_count; i++) { |
602 | task_deallocate(pet_tasks[i]); |
603 | } |
604 | |
605 | BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_END, pet_tasks_count); |
606 | } |
607 | |
608 | /* support sysctls */ |
609 | |
610 | int |
611 | kperf_get_pet_idle_rate(void) |
612 | { |
613 | return pet_idle_rate; |
614 | } |
615 | |
616 | int |
617 | kperf_set_pet_idle_rate(int val) |
618 | { |
619 | pet_idle_rate = val; |
620 | |
621 | return 0; |
622 | } |
623 | |
624 | int |
625 | kperf_get_lightweight_pet(void) |
626 | { |
627 | return lightweight_pet; |
628 | } |
629 | |
630 | int |
631 | kperf_set_lightweight_pet(int val) |
632 | { |
633 | if (kperf_sampling_status() == KPERF_SAMPLING_ON) { |
634 | return EBUSY; |
635 | } |
636 | |
637 | lightweight_pet = (val == 1); |
638 | kperf_lightweight_pet_active_update(); |
639 | |
640 | return 0; |
641 | } |
642 | |
643 | void |
644 | kperf_lightweight_pet_active_update(void) |
645 | { |
646 | kperf_lightweight_pet_active = (kperf_sampling_status() && lightweight_pet); |
647 | kperf_on_cpu_update(); |
648 | } |
649 | |