1/*
2 * Copyright (c) 2011 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/* Manage timers */
30
31#include <mach/mach_types.h>
32#include <kern/cpu_data.h> /* current_thread() */
33#include <kern/kalloc.h>
34#include <stdatomic.h>
35#include <sys/errno.h>
36#include <sys/vm.h>
37#include <sys/ktrace.h>
38
39#include <machine/machine_routines.h>
40#if defined(__x86_64__)
41#include <i386/mp.h>
42#endif /* defined(__x86_64__) */
43
44#include <kperf/kperf.h>
45#include <kperf/buffer.h>
46#include <kperf/context.h>
47#include <kperf/action.h>
48#include <kperf/kperf_timer.h>
49#include <kperf/kperf_arch.h>
50#include <kperf/pet.h>
51#include <kperf/sample.h>
52
53/* the list of timers */
54struct kperf_timer *kperf_timerv = NULL;
55unsigned int kperf_timerc = 0;
56
57static unsigned int pet_timer_id = 999;
58
59/* maximum number of timers we can construct */
60#define TIMER_MAX (16)
61
62static uint64_t min_period_abstime;
63static uint64_t min_period_bg_abstime;
64static uint64_t min_period_pet_abstime;
65static uint64_t min_period_pet_bg_abstime;
66
67static uint64_t
68kperf_timer_min_period_abstime(void)
69{
70 if (ktrace_background_active()) {
71 return min_period_bg_abstime;
72 } else {
73 return min_period_abstime;
74 }
75}
76
77static uint64_t
78kperf_timer_min_pet_period_abstime(void)
79{
80 if (ktrace_background_active()) {
81 return min_period_pet_bg_abstime;
82 } else {
83 return min_period_pet_abstime;
84 }
85}
86
87static void
88kperf_timer_schedule(struct kperf_timer *timer, uint64_t now)
89{
90 BUF_INFO(PERF_TM_SCHED, timer->period);
91
92 /* if we re-programmed the timer to zero, just drop it */
93 if (timer->period == 0) {
94 return;
95 }
96
97 /* calculate deadline */
98 uint64_t deadline = now + timer->period;
99
100 /* re-schedule the timer, making sure we don't apply slop */
101 timer_call_enter(&timer->tcall, deadline, TIMER_CALL_SYS_CRITICAL);
102}
103
104static void
105kperf_sample_cpu(struct kperf_timer *timer, bool system_sample,
106 bool only_system)
107{
108 assert(timer != NULL);
109
110 /* Always cut a tracepoint to show a sample event occurred */
111 BUF_DATA(PERF_TM_HNDLR | DBG_FUNC_START, 0);
112
113 int ncpu = cpu_number();
114
115 struct kperf_sample *intbuf = kperf_intr_sample_buffer();
116#if DEVELOPMENT || DEBUG
117 intbuf->sample_time = mach_absolute_time();
118#endif /* DEVELOPMENT || DEBUG */
119
120 /* On a timer, we can see the "real" current thread */
121 thread_t thread = current_thread();
122 task_t task = get_threadtask(thread);
123 struct kperf_context ctx = {
124 .cur_thread = thread,
125 .cur_task = task,
126 .cur_pid = task_pid(task),
127 .trigger_type = TRIGGER_TYPE_TIMER,
128 .trigger_id = (unsigned int)(timer - kperf_timerv),
129 };
130
131 if (ctx.trigger_id == pet_timer_id && ncpu < machine_info.logical_cpu_max) {
132 kperf_tid_on_cpus[ncpu] = thread_tid(ctx.cur_thread);
133 }
134
135 /* make sure sampling is on */
136 unsigned int status = kperf_sampling_status();
137 if (status == KPERF_SAMPLING_OFF) {
138 BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END, SAMPLE_OFF);
139 return;
140 } else if (status == KPERF_SAMPLING_SHUTDOWN) {
141 BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END, SAMPLE_SHUTDOWN);
142 return;
143 }
144
145 /* call the action -- kernel-only from interrupt, pend user */
146 int r = kperf_sample(intbuf, &ctx, timer->actionid,
147 SAMPLE_FLAG_PEND_USER | (system_sample ? SAMPLE_FLAG_SYSTEM : 0) |
148 (only_system ? SAMPLE_FLAG_ONLY_SYSTEM : 0));
149
150 /* end tracepoint is informational */
151 BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END, r);
152
153 (void)atomic_fetch_and_explicit(&timer->pending_cpus,
154 ~(UINT64_C(1) << ncpu), memory_order_relaxed);
155}
156
157void
158kperf_ipi_handler(void *param)
159{
160 kperf_sample_cpu((struct kperf_timer *)param, false, false);
161}
162
163static void
164kperf_timer_handler(void *param0, __unused void *param1)
165{
166 struct kperf_timer *timer = param0;
167 unsigned int ntimer = (unsigned int)(timer - kperf_timerv);
168 unsigned int ncpus = machine_info.logical_cpu_max;
169 bool system_only_self = true;
170
171 if (timer->actionid == 0) {
172 return;
173 }
174
175 timer->active = 1;
176#if DEVELOPMENT || DEBUG
177 timer->fire_time = mach_absolute_time();
178#endif /* DEVELOPMENT || DEBUG */
179
180 /* along the lines of do not ipi if we are all shutting down */
181 if (kperf_sampling_status() == KPERF_SAMPLING_SHUTDOWN) {
182 goto deactivate;
183 }
184
185 BUF_DATA(PERF_TM_FIRE, ntimer, ntimer == pet_timer_id, timer->period,
186 timer->actionid);
187
188 if (ntimer == pet_timer_id) {
189 kperf_pet_fire_before();
190
191 /* clean-up the thread-on-CPUs cache */
192 bzero(kperf_tid_on_cpus, ncpus * sizeof(*kperf_tid_on_cpus));
193 }
194
195 /*
196 * IPI other cores only if the action has non-system samplers.
197 */
198 if (kperf_action_has_non_system(timer->actionid)) {
199 /*
200 * If the core that's handling the timer is not scheduling
201 * threads, only run system samplers.
202 */
203 system_only_self = kperf_mp_broadcast_other_running(timer);
204 }
205 kperf_sample_cpu(timer, true, system_only_self);
206
207 /* release the pet thread? */
208 if (ntimer == pet_timer_id) {
209 /* PET mode is responsible for rearming the timer */
210 kperf_pet_fire_after();
211 } else {
212 /*
213 * FIXME: Get the current time from elsewhere. The next
214 * timer's period now includes the time taken to reach this
215 * point. This causes a bias towards longer sampling periods
216 * than requested.
217 */
218 kperf_timer_schedule(timer, mach_absolute_time());
219 }
220
221deactivate:
222 timer->active = 0;
223}
224
225/* program the timer from the PET thread */
226void
227kperf_timer_pet_rearm(uint64_t elapsed_ticks)
228{
229 struct kperf_timer *timer = NULL;
230 uint64_t period = 0;
231 uint64_t deadline;
232
233 /*
234 * If the pet_timer_id is invalid, it has been disabled, so this should
235 * do nothing.
236 */
237 if (pet_timer_id >= kperf_timerc) {
238 return;
239 }
240
241 unsigned int status = kperf_sampling_status();
242 /* do not reprogram the timer if it has been shutdown or sampling is off */
243 if (status == KPERF_SAMPLING_OFF) {
244 BUF_INFO(PERF_PET_END, SAMPLE_OFF);
245 return;
246 } else if (status == KPERF_SAMPLING_SHUTDOWN) {
247 BUF_INFO(PERF_PET_END, SAMPLE_SHUTDOWN);
248 return;
249 }
250
251 timer = &(kperf_timerv[pet_timer_id]);
252
253 /* if we re-programmed the timer to zero, just drop it */
254 if (!timer->period) {
255 return;
256 }
257
258 /* subtract the time the pet sample took being careful not to underflow */
259 if (timer->period > elapsed_ticks) {
260 period = timer->period - elapsed_ticks;
261 }
262
263 /* make sure we don't set the next PET sample to happen too soon */
264 if (period < min_period_pet_abstime) {
265 period = min_period_pet_abstime;
266 }
267
268 /* we probably took so long in the PET thread, it makes sense to take
269 * the time again.
270 */
271 deadline = mach_absolute_time() + period;
272
273 BUF_INFO(PERF_PET_SCHED, timer->period, period, elapsed_ticks, deadline);
274
275 /* re-schedule the timer, making sure we don't apply slop */
276 timer_call_enter(&timer->tcall, deadline, TIMER_CALL_SYS_CRITICAL);
277
278 return;
279}
280
281/* turn on all the timers */
282void
283kperf_timer_go(void)
284{
285 /* get the PET thread going */
286 if (pet_timer_id < kperf_timerc) {
287 kperf_pet_config(kperf_timerv[pet_timer_id].actionid);
288 }
289
290 uint64_t now = mach_absolute_time();
291
292 for (unsigned int i = 0; i < kperf_timerc; i++) {
293 if (kperf_timerv[i].period == 0) {
294 continue;
295 }
296
297 kperf_timer_schedule(&(kperf_timerv[i]), now);
298 }
299}
300
301void
302kperf_timer_stop(void)
303{
304 for (unsigned int i = 0; i < kperf_timerc; i++) {
305 if (kperf_timerv[i].period == 0) {
306 continue;
307 }
308
309 /* wait for the timer to stop */
310 while (kperf_timerv[i].active);
311
312 timer_call_cancel(&kperf_timerv[i].tcall);
313 }
314
315 /* wait for PET to stop, too */
316 kperf_pet_config(0);
317}
318
319unsigned int
320kperf_timer_get_petid(void)
321{
322 return pet_timer_id;
323}
324
325int
326kperf_timer_set_petid(unsigned int timerid)
327{
328 if (timerid < kperf_timerc) {
329 uint64_t min_period;
330
331 min_period = kperf_timer_min_pet_period_abstime();
332 if (kperf_timerv[timerid].period < min_period) {
333 kperf_timerv[timerid].period = min_period;
334 }
335 kperf_pet_config(kperf_timerv[timerid].actionid);
336 } else {
337 /* clear the PET trigger if it's a bogus ID */
338 kperf_pet_config(0);
339 }
340
341 pet_timer_id = timerid;
342
343 return 0;
344}
345
346int
347kperf_timer_get_period(unsigned int timerid, uint64_t *period_abstime)
348{
349 if (timerid >= kperf_timerc) {
350 return EINVAL;
351 }
352
353 *period_abstime = kperf_timerv[timerid].period;
354 return 0;
355}
356
357int
358kperf_timer_set_period(unsigned int timerid, uint64_t period_abstime)
359{
360 uint64_t min_period;
361
362 if (timerid >= kperf_timerc) {
363 return EINVAL;
364 }
365
366 if (pet_timer_id == timerid) {
367 min_period = kperf_timer_min_pet_period_abstime();
368 } else {
369 min_period = kperf_timer_min_period_abstime();
370 }
371
372 if (period_abstime > 0 && period_abstime < min_period) {
373 period_abstime = min_period;
374 }
375
376 kperf_timerv[timerid].period = period_abstime;
377
378 /* FIXME: re-program running timers? */
379
380 return 0;
381}
382
383int
384kperf_timer_get_action(unsigned int timerid, uint32_t *action)
385{
386 if (timerid >= kperf_timerc) {
387 return EINVAL;
388 }
389
390 *action = kperf_timerv[timerid].actionid;
391 return 0;
392}
393
394int
395kperf_timer_set_action(unsigned int timerid, uint32_t action)
396{
397 if (timerid >= kperf_timerc) {
398 return EINVAL;
399 }
400
401 kperf_timerv[timerid].actionid = action;
402 return 0;
403}
404
405unsigned int
406kperf_timer_get_count(void)
407{
408 return kperf_timerc;
409}
410
411void
412kperf_timer_reset(void)
413{
414 kperf_timer_set_petid(999);
415 kperf_set_pet_idle_rate(KPERF_PET_DEFAULT_IDLE_RATE);
416 kperf_set_lightweight_pet(0);
417 for (unsigned int i = 0; i < kperf_timerc; i++) {
418 kperf_timerv[i].period = 0;
419 kperf_timerv[i].actionid = 0;
420 kperf_timerv[i].pending_cpus = 0;
421 }
422}
423
424extern int
425kperf_timer_set_count(unsigned int count)
426{
427 struct kperf_timer *new_timerv = NULL, *old_timerv = NULL;
428 unsigned int old_count;
429
430 if (min_period_abstime == 0) {
431 nanoseconds_to_absolutetime(KP_MIN_PERIOD_NS, &min_period_abstime);
432 nanoseconds_to_absolutetime(KP_MIN_PERIOD_BG_NS, &min_period_bg_abstime);
433 nanoseconds_to_absolutetime(KP_MIN_PERIOD_PET_NS, &min_period_pet_abstime);
434 nanoseconds_to_absolutetime(KP_MIN_PERIOD_PET_BG_NS,
435 &min_period_pet_bg_abstime);
436 assert(min_period_abstime > 0);
437 }
438
439 if (count == kperf_timerc) {
440 return 0;
441 }
442 if (count > TIMER_MAX) {
443 return EINVAL;
444 }
445
446 /* TODO: allow shrinking? */
447 if (count < kperf_timerc) {
448 return EINVAL;
449 }
450
451 /*
452 * Make sure kperf is initialized when creating the array for the first
453 * time.
454 */
455 if (kperf_timerc == 0) {
456 int r;
457
458 /* main kperf */
459 if ((r = kperf_init())) {
460 return r;
461 }
462 }
463
464 /*
465 * Shut down any running timers since we will be messing with the timer
466 * call structures.
467 */
468 kperf_timer_stop();
469
470 /* create a new array */
471 new_timerv = kalloc_tag(count * sizeof(struct kperf_timer),
472 VM_KERN_MEMORY_DIAG);
473 if (new_timerv == NULL) {
474 return ENOMEM;
475 }
476 old_timerv = kperf_timerv;
477 old_count = kperf_timerc;
478
479 if (old_timerv != NULL) {
480 bcopy(kperf_timerv, new_timerv,
481 kperf_timerc * sizeof(struct kperf_timer));
482 }
483
484 /* zero the new entries */
485 bzero(&(new_timerv[kperf_timerc]),
486 (count - old_count) * sizeof(struct kperf_timer));
487
488 /* (re-)setup the timer call info for all entries */
489 for (unsigned int i = 0; i < count; i++) {
490 timer_call_setup(&new_timerv[i].tcall, kperf_timer_handler, &new_timerv[i]);
491 }
492
493 kperf_timerv = new_timerv;
494 kperf_timerc = count;
495
496 if (old_timerv != NULL) {
497 kfree(old_timerv, old_count * sizeof(struct kperf_timer));
498 }
499
500 return 0;
501}
502