1/*
2 * Copyright (c) 2011-2016 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/* all thread states code */
30#include <mach/mach_types.h>
31#include <sys/errno.h>
32
33#include <kperf/kperf.h>
34#include <kperf/buffer.h>
35#include <kperf/sample.h>
36#include <kperf/context.h>
37#include <kperf/action.h>
38#include <kperf/pet.h>
39#include <kperf/kperf_timer.h>
40
41#include <kern/task.h>
42#include <kern/kalloc.h>
43
44/* action ID to call for each sample
45 *
46 * Address is used as the sync point for waiting.
47 */
48static unsigned int pet_action_id = 0;
49
50static lck_mtx_t *pet_lock;
51static boolean_t pet_initted = FALSE;
52static boolean_t pet_running = FALSE;
53
54/* number of callstack samples to skip for idle threads */
55static uint32_t pet_idle_rate = KPERF_PET_DEFAULT_IDLE_RATE;
56
57/*
58 * Lightweight PET mode samples the system less-intrusively than normal PET
59 * mode. Instead of iterating tasks and threads on each sample, it increments
60 * a global generation count, kperf_pet_gen, which is checked as threads are
61 * context switched on-core. If the thread's local generation count is older
62 * than the global generation, the thread samples itself.
63 *
64 * | |
65 * thread A +--+---------|
66 * | |
67 * thread B |--+---------------|
68 * | |
69 * thread C | | |-------------------------------------
70 * | | |
71 * thread D | | | |-------------------------------
72 * | | | |
73 * +--+---------+-----+--------------------------------> time
74 * | │ |
75 * | +-----+--- threads sampled when they come on-core in
76 * | kperf_pet_switch_context
77 * |
78 * +--- PET timer fire, sample on-core threads A and B,
79 * increment kperf_pet_gen
80 */
81static boolean_t lightweight_pet = FALSE;
82
83/*
84 * Whether or not lightweight PET and sampling is active.
85 */
86boolean_t kperf_lightweight_pet_active = FALSE;
87
88uint32_t kperf_pet_gen = 0;
89
90static struct kperf_sample *pet_sample;
91
92/* thread lifecycle */
93
94static kern_return_t pet_init(void);
95static void pet_start(void);
96static void pet_stop(void);
97
98/* PET thread-only */
99
100static void pet_thread_loop(void *param, wait_result_t wr);
101static void pet_thread_idle(void);
102static void pet_thread_work_unit(void);
103
104/* listing things to sample */
105
106static task_array_t pet_tasks = NULL;
107static vm_size_t pet_tasks_size = 0;
108static vm_size_t pet_tasks_count = 0;
109
110static thread_array_t pet_threads = NULL;
111static vm_size_t pet_threads_size = 0;
112static vm_size_t pet_threads_count = 0;
113
114static kern_return_t pet_tasks_prepare(void);
115static kern_return_t pet_tasks_prepare_internal(void);
116
117static kern_return_t pet_threads_prepare(task_t task);
118
119/* sampling */
120
121static void pet_sample_all_tasks(uint32_t idle_rate);
122static void pet_sample_task(task_t task, uint32_t idle_rate);
123static void pet_sample_thread(int pid, task_t task, thread_t thread,
124 uint32_t idle_rate);
125
126/* functions called by other areas of kperf */
127
128void
129kperf_pet_fire_before(void)
130{
131 if (!pet_initted || !pet_running) {
132 return;
133 }
134
135 if (lightweight_pet) {
136 BUF_INFO(PERF_PET_SAMPLE);
137 OSIncrementAtomic(&kperf_pet_gen);
138 }
139}
140
141void
142kperf_pet_fire_after(void)
143{
144 if (!pet_initted || !pet_running) {
145 return;
146 }
147
148 if (lightweight_pet) {
149 kperf_timer_pet_rearm(0);
150 } else {
151 thread_wakeup(&pet_action_id);
152 }
153}
154
155void
156kperf_pet_on_cpu(thread_t thread, thread_continue_t continuation,
157 uintptr_t *starting_fp)
158{
159 assert(thread != NULL);
160 assert(ml_get_interrupts_enabled() == FALSE);
161
162 if (thread->kperf_pet_gen != kperf_pet_gen) {
163 BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_START, kperf_pet_gen, thread->kperf_pet_gen);
164
165 task_t task = get_threadtask(thread);
166 struct kperf_context ctx = {
167 .cur_thread = thread,
168 .cur_task = task,
169 .cur_pid = task_pid(task),
170 .starting_fp = starting_fp,
171 };
172 /*
173 * Use a per-CPU interrupt buffer, since this is only called
174 * while interrupts are disabled, from the scheduler.
175 */
176 struct kperf_sample *sample = kperf_intr_sample_buffer();
177 if (!sample) {
178 BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END, 1);
179 return;
180 }
181
182 unsigned int flags = SAMPLE_FLAG_NON_INTERRUPT | SAMPLE_FLAG_PEND_USER;
183 if (continuation != NULL) {
184 flags |= SAMPLE_FLAG_CONTINUATION;
185 }
186 kperf_sample(sample, &ctx, pet_action_id, flags);
187
188 BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END);
189 } else {
190 BUF_VERB(PERF_PET_SAMPLE_THREAD, kperf_pet_gen, thread->kperf_pet_gen);
191 }
192}
193
194void
195kperf_pet_config(unsigned int action_id)
196{
197 kern_return_t kr = pet_init();
198 if (kr != KERN_SUCCESS) {
199 return;
200 }
201
202 lck_mtx_lock(pet_lock);
203
204 BUF_INFO(PERF_PET_THREAD, 3, action_id);
205
206 if (action_id == 0) {
207 pet_stop();
208 } else {
209 pet_start();
210 }
211
212 pet_action_id = action_id;
213
214 lck_mtx_unlock(pet_lock);
215}
216
217/* handle resource allocation */
218
219void
220pet_start(void)
221{
222 lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
223
224 if (pet_running) {
225 return;
226 }
227
228 pet_sample = kalloc(sizeof(struct kperf_sample));
229 if (!pet_sample) {
230 return;
231 }
232
233 pet_running = TRUE;
234}
235
236void
237pet_stop(void)
238{
239 lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
240
241 if (!pet_initted) {
242 return;
243 }
244
245 if (pet_tasks != NULL) {
246 assert(pet_tasks_size != 0);
247 kfree(pet_tasks, pet_tasks_size);
248
249 pet_tasks = NULL;
250 pet_tasks_size = 0;
251 pet_tasks_count = 0;
252 }
253
254 if (pet_threads != NULL) {
255 assert(pet_threads_size != 0);
256 kfree(pet_threads, pet_threads_size);
257
258 pet_threads = NULL;
259 pet_threads_size = 0;
260 pet_threads_count = 0;
261 }
262
263 if (pet_sample != NULL) {
264 kfree(pet_sample, sizeof(struct kperf_sample));
265 pet_sample = NULL;
266 }
267
268 pet_running = FALSE;
269}
270
271/*
272 * Lazily initialize PET. The PET thread never exits once PET has been used
273 * once.
274 */
275static kern_return_t
276pet_init(void)
277{
278 if (pet_initted) {
279 return KERN_SUCCESS;
280 }
281
282 /* make the sync point */
283 pet_lock = lck_mtx_alloc_init(&kperf_lck_grp, NULL);
284 assert(pet_lock);
285
286 /* create the thread */
287
288 BUF_INFO(PERF_PET_THREAD, 0);
289 thread_t t;
290 kern_return_t kr = kernel_thread_start(pet_thread_loop, NULL, &t);
291 if (kr != KERN_SUCCESS) {
292 lck_mtx_free(pet_lock, &kperf_lck_grp);
293 return kr;
294 }
295
296 thread_set_thread_name(t, "kperf sampling");
297 /* let the thread hold the only reference */
298 thread_deallocate(t);
299
300 pet_initted = TRUE;
301
302 return KERN_SUCCESS;
303}
304
305/* called by PET thread only */
306
307static void
308pet_thread_work_unit(void)
309{
310 pet_sample_all_tasks(pet_idle_rate);
311}
312
313static void
314pet_thread_idle(void)
315{
316 lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
317
318 (void)lck_mtx_sleep(pet_lock, LCK_SLEEP_DEFAULT, &pet_action_id,
319 THREAD_UNINT);
320}
321
322__attribute__((noreturn))
323static void
324pet_thread_loop(void *param, wait_result_t wr)
325{
326#pragma unused(param, wr)
327 uint64_t work_unit_ticks;
328
329 BUF_INFO(PERF_PET_THREAD, 1);
330
331 lck_mtx_lock(pet_lock);
332 for (;;) {
333 BUF_INFO(PERF_PET_IDLE);
334 pet_thread_idle();
335
336 BUF_INFO(PERF_PET_RUN);
337
338 /* measure how long the work unit takes */
339 work_unit_ticks = mach_absolute_time();
340 pet_thread_work_unit();
341 work_unit_ticks = mach_absolute_time() - work_unit_ticks;
342
343 /* re-program the timer */
344 kperf_timer_pet_rearm(work_unit_ticks);
345 }
346}
347
348/* sampling */
349
350static void
351pet_sample_thread(int pid, task_t task, thread_t thread, uint32_t idle_rate)
352{
353 lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
354
355 uint32_t sample_flags = SAMPLE_FLAG_IDLE_THREADS | SAMPLE_FLAG_THREAD_ONLY;
356
357 BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_START);
358
359 /* work out the context */
360 struct kperf_context ctx = {
361 .cur_thread = thread,
362 .cur_task = task,
363 .cur_pid = pid,
364 };
365
366 boolean_t thread_dirty = kperf_thread_get_dirty(thread);
367
368 /*
369 * Clean a dirty thread and skip callstack sample if the thread was not
370 * dirty and thread has skipped less than pet_idle_rate samples.
371 */
372 if (thread_dirty) {
373 kperf_thread_set_dirty(thread, FALSE);
374 } else if ((thread->kperf_pet_cnt % idle_rate) != 0) {
375 sample_flags |= SAMPLE_FLAG_EMPTY_CALLSTACK;
376 }
377 thread->kperf_pet_cnt++;
378
379 kperf_sample(pet_sample, &ctx, pet_action_id, sample_flags);
380
381 BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END);
382}
383
384static kern_return_t
385pet_threads_prepare(task_t task)
386{
387 lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
388
389 vm_size_t threads_size_needed;
390
391 if (task == TASK_NULL) {
392 return KERN_INVALID_ARGUMENT;
393 }
394
395 for (;;) {
396 task_lock(task);
397
398 if (!task->active) {
399 task_unlock(task);
400
401 return KERN_FAILURE;
402 }
403
404 /* do we have the memory we need? */
405 threads_size_needed = task->thread_count * sizeof(thread_t);
406 if (threads_size_needed <= pet_threads_size) {
407 break;
408 }
409
410 /* not enough memory, unlock the task and increase allocation */
411 task_unlock(task);
412
413 if (pet_threads_size != 0) {
414 kfree(pet_threads, pet_threads_size);
415 }
416
417 assert(threads_size_needed > 0);
418 pet_threads_size = threads_size_needed;
419
420 pet_threads = kalloc(pet_threads_size);
421 if (pet_threads == NULL) {
422 pet_threads_size = 0;
423 return KERN_RESOURCE_SHORTAGE;
424 }
425 }
426
427 /* have memory and the task is locked and active */
428 thread_t thread;
429 pet_threads_count = 0;
430 queue_iterate(&(task->threads), thread, thread_t, task_threads) {
431 thread_reference_internal(thread);
432 pet_threads[pet_threads_count++] = thread;
433 }
434
435 /* can unlock task now that threads are referenced */
436 task_unlock(task);
437
438 return (pet_threads_count == 0) ? KERN_FAILURE : KERN_SUCCESS;
439}
440
441static void
442pet_sample_task(task_t task, uint32_t idle_rate)
443{
444 lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
445
446 BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_START);
447
448 int pid = task_pid(task);
449 if (kperf_action_has_task(pet_action_id)) {
450 struct kperf_context ctx = {
451 .cur_task = task,
452 .cur_pid = pid,
453 };
454
455 kperf_sample(pet_sample, &ctx, pet_action_id, SAMPLE_FLAG_TASK_ONLY);
456 }
457
458 if (!kperf_action_has_thread(pet_action_id)) {
459 BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END);
460 return;
461 }
462
463 kern_return_t kr = KERN_SUCCESS;
464
465 /*
466 * Suspend the task to see an atomic snapshot of all its threads. This
467 * is expensive, and disruptive.
468 */
469 bool needs_suspend = task != kernel_task;
470 if (needs_suspend) {
471 kr = task_suspend_internal(task);
472 if (kr != KERN_SUCCESS) {
473 BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END, 1);
474 return;
475 }
476 needs_suspend = true;
477 }
478
479 kr = pet_threads_prepare(task);
480 if (kr != KERN_SUCCESS) {
481 BUF_INFO(PERF_PET_ERROR, ERR_THREAD, kr);
482 goto out;
483 }
484
485 for (unsigned int i = 0; i < pet_threads_count; i++) {
486 thread_t thread = pet_threads[i];
487 assert(thread != THREAD_NULL);
488
489 /*
490 * Do not sample the thread if it was on a CPU when the timer fired.
491 */
492 int cpu = 0;
493 for (cpu = 0; cpu < machine_info.logical_cpu_max; cpu++) {
494 if (kperf_tid_on_cpus[cpu] == thread_tid(thread)) {
495 break;
496 }
497 }
498
499 /* the thread was not on a CPU */
500 if (cpu == machine_info.logical_cpu_max) {
501 pet_sample_thread(pid, task, thread, idle_rate);
502 }
503
504 thread_deallocate(pet_threads[i]);
505 }
506
507out:
508 if (needs_suspend) {
509 task_resume_internal(task);
510 }
511
512 BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END, pet_threads_count);
513}
514
515static kern_return_t
516pet_tasks_prepare_internal(void)
517{
518 lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
519
520 vm_size_t tasks_size_needed = 0;
521
522 for (;;) {
523 lck_mtx_lock(&tasks_threads_lock);
524
525 /* do we have the memory we need? */
526 tasks_size_needed = tasks_count * sizeof(task_t);
527 if (tasks_size_needed <= pet_tasks_size) {
528 break;
529 }
530
531 /* unlock and allocate more memory */
532 lck_mtx_unlock(&tasks_threads_lock);
533
534 /* grow task array */
535 if (tasks_size_needed > pet_tasks_size) {
536 if (pet_tasks_size != 0) {
537 kfree(pet_tasks, pet_tasks_size);
538 }
539
540 assert(tasks_size_needed > 0);
541 pet_tasks_size = tasks_size_needed;
542
543 pet_tasks = (task_array_t)kalloc(pet_tasks_size);
544 if (pet_tasks == NULL) {
545 pet_tasks_size = 0;
546 return KERN_RESOURCE_SHORTAGE;
547 }
548 }
549 }
550
551 return KERN_SUCCESS;
552}
553
554static kern_return_t
555pet_tasks_prepare(void)
556{
557 lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
558
559 /* allocate space and take the tasks_threads_lock */
560 kern_return_t kr = pet_tasks_prepare_internal();
561 if (KERN_SUCCESS != kr) {
562 return kr;
563 }
564 lck_mtx_assert(&tasks_threads_lock, LCK_MTX_ASSERT_OWNED);
565
566 /* make sure the tasks are not deallocated after dropping the lock */
567 task_t task;
568 pet_tasks_count = 0;
569 queue_iterate(&tasks, task, task_t, tasks) {
570 if (task != kernel_task) {
571 task_reference_internal(task);
572 pet_tasks[pet_tasks_count++] = task;
573 }
574 }
575
576 lck_mtx_unlock(&tasks_threads_lock);
577
578 return KERN_SUCCESS;
579}
580
581static void
582pet_sample_all_tasks(uint32_t idle_rate)
583{
584 lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
585
586 BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_START);
587
588 kern_return_t kr = pet_tasks_prepare();
589 if (kr != KERN_SUCCESS) {
590 BUF_INFO(PERF_PET_ERROR, ERR_TASK, kr);
591 BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_END, 0);
592 return;
593 }
594
595 for (unsigned int i = 0; i < pet_tasks_count; i++) {
596 task_t task = pet_tasks[i];
597
598 pet_sample_task(task, idle_rate);
599 }
600
601 for(unsigned int i = 0; i < pet_tasks_count; i++) {
602 task_deallocate(pet_tasks[i]);
603 }
604
605 BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_END, pet_tasks_count);
606}
607
608/* support sysctls */
609
610int
611kperf_get_pet_idle_rate(void)
612{
613 return pet_idle_rate;
614}
615
616int
617kperf_set_pet_idle_rate(int val)
618{
619 pet_idle_rate = val;
620
621 return 0;
622}
623
624int
625kperf_get_lightweight_pet(void)
626{
627 return lightweight_pet;
628}
629
630int
631kperf_set_lightweight_pet(int val)
632{
633 if (kperf_sampling_status() == KPERF_SAMPLING_ON) {
634 return EBUSY;
635 }
636
637 lightweight_pet = (val == 1);
638 kperf_lightweight_pet_active_update();
639
640 return 0;
641}
642
643void
644kperf_lightweight_pet_active_update(void)
645{
646 kperf_lightweight_pet_active = (kperf_sampling_status() && lightweight_pet);
647 kperf_on_cpu_update();
648}
649