1 | /* |
2 | * Copyright (c) 2011 Apple Computer, Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | /* Manage timers */ |
30 | |
31 | #include <mach/mach_types.h> |
32 | #include <kern/cpu_data.h> /* current_thread() */ |
33 | #include <kern/kalloc.h> |
34 | #include <stdatomic.h> |
35 | #include <sys/errno.h> |
36 | #include <sys/vm.h> |
37 | #include <sys/ktrace.h> |
38 | |
39 | #include <machine/machine_routines.h> |
40 | #if defined(__x86_64__) |
41 | #include <i386/mp.h> |
42 | #endif /* defined(__x86_64__) */ |
43 | |
44 | #include <kperf/kperf.h> |
45 | #include <kperf/buffer.h> |
46 | #include <kperf/context.h> |
47 | #include <kperf/action.h> |
48 | #include <kperf/kperf_timer.h> |
49 | #include <kperf/kperf_arch.h> |
50 | #include <kperf/pet.h> |
51 | #include <kperf/sample.h> |
52 | |
53 | /* the list of timers */ |
54 | struct kperf_timer *kperf_timerv = NULL; |
55 | unsigned int kperf_timerc = 0; |
56 | |
57 | static unsigned int pet_timer_id = 999; |
58 | |
59 | /* maximum number of timers we can construct */ |
60 | #define TIMER_MAX (16) |
61 | |
62 | static uint64_t min_period_abstime; |
63 | static uint64_t min_period_bg_abstime; |
64 | static uint64_t min_period_pet_abstime; |
65 | static uint64_t min_period_pet_bg_abstime; |
66 | |
67 | static uint64_t |
68 | kperf_timer_min_period_abstime(void) |
69 | { |
70 | if (ktrace_background_active()) { |
71 | return min_period_bg_abstime; |
72 | } else { |
73 | return min_period_abstime; |
74 | } |
75 | } |
76 | |
77 | static uint64_t |
78 | kperf_timer_min_pet_period_abstime(void) |
79 | { |
80 | if (ktrace_background_active()) { |
81 | return min_period_pet_bg_abstime; |
82 | } else { |
83 | return min_period_pet_abstime; |
84 | } |
85 | } |
86 | |
87 | static void |
88 | kperf_timer_schedule(struct kperf_timer *timer, uint64_t now) |
89 | { |
90 | BUF_INFO(PERF_TM_SCHED, timer->period); |
91 | |
92 | /* if we re-programmed the timer to zero, just drop it */ |
93 | if (timer->period == 0) { |
94 | return; |
95 | } |
96 | |
97 | /* calculate deadline */ |
98 | uint64_t deadline = now + timer->period; |
99 | |
100 | /* re-schedule the timer, making sure we don't apply slop */ |
101 | timer_call_enter(&timer->tcall, deadline, TIMER_CALL_SYS_CRITICAL); |
102 | } |
103 | |
104 | static void |
105 | kperf_sample_cpu(struct kperf_timer *timer, bool system_sample, |
106 | bool only_system) |
107 | { |
108 | assert(timer != NULL); |
109 | |
110 | /* Always cut a tracepoint to show a sample event occurred */ |
111 | BUF_DATA(PERF_TM_HNDLR | DBG_FUNC_START, 0); |
112 | |
113 | int ncpu = cpu_number(); |
114 | |
115 | struct kperf_sample *intbuf = kperf_intr_sample_buffer(); |
116 | #if DEVELOPMENT || DEBUG |
117 | intbuf->sample_time = mach_absolute_time(); |
118 | #endif /* DEVELOPMENT || DEBUG */ |
119 | |
120 | /* On a timer, we can see the "real" current thread */ |
121 | thread_t thread = current_thread(); |
122 | task_t task = get_threadtask(thread); |
123 | struct kperf_context ctx = { |
124 | .cur_thread = thread, |
125 | .cur_task = task, |
126 | .cur_pid = task_pid(task), |
127 | .trigger_type = TRIGGER_TYPE_TIMER, |
128 | .trigger_id = (unsigned int)(timer - kperf_timerv), |
129 | }; |
130 | |
131 | if (ctx.trigger_id == pet_timer_id && ncpu < machine_info.logical_cpu_max) { |
132 | kperf_tid_on_cpus[ncpu] = thread_tid(ctx.cur_thread); |
133 | } |
134 | |
135 | /* make sure sampling is on */ |
136 | unsigned int status = kperf_sampling_status(); |
137 | if (status == KPERF_SAMPLING_OFF) { |
138 | BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END, SAMPLE_OFF); |
139 | return; |
140 | } else if (status == KPERF_SAMPLING_SHUTDOWN) { |
141 | BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END, SAMPLE_SHUTDOWN); |
142 | return; |
143 | } |
144 | |
145 | /* call the action -- kernel-only from interrupt, pend user */ |
146 | int r = kperf_sample(intbuf, &ctx, timer->actionid, |
147 | SAMPLE_FLAG_PEND_USER | (system_sample ? SAMPLE_FLAG_SYSTEM : 0) | |
148 | (only_system ? SAMPLE_FLAG_ONLY_SYSTEM : 0)); |
149 | |
150 | /* end tracepoint is informational */ |
151 | BUF_INFO(PERF_TM_HNDLR | DBG_FUNC_END, r); |
152 | |
153 | (void)atomic_fetch_and_explicit(&timer->pending_cpus, |
154 | ~(UINT64_C(1) << ncpu), memory_order_relaxed); |
155 | } |
156 | |
157 | void |
158 | kperf_ipi_handler(void *param) |
159 | { |
160 | kperf_sample_cpu((struct kperf_timer *)param, false, false); |
161 | } |
162 | |
163 | static void |
164 | kperf_timer_handler(void *param0, __unused void *param1) |
165 | { |
166 | struct kperf_timer *timer = param0; |
167 | unsigned int ntimer = (unsigned int)(timer - kperf_timerv); |
168 | unsigned int ncpus = machine_info.logical_cpu_max; |
169 | bool system_only_self = true; |
170 | |
171 | if (timer->actionid == 0) { |
172 | return; |
173 | } |
174 | |
175 | timer->active = 1; |
176 | #if DEVELOPMENT || DEBUG |
177 | timer->fire_time = mach_absolute_time(); |
178 | #endif /* DEVELOPMENT || DEBUG */ |
179 | |
180 | /* along the lines of do not ipi if we are all shutting down */ |
181 | if (kperf_sampling_status() == KPERF_SAMPLING_SHUTDOWN) { |
182 | goto deactivate; |
183 | } |
184 | |
185 | BUF_DATA(PERF_TM_FIRE, ntimer, ntimer == pet_timer_id, timer->period, |
186 | timer->actionid); |
187 | |
188 | if (ntimer == pet_timer_id) { |
189 | kperf_pet_fire_before(); |
190 | |
191 | /* clean-up the thread-on-CPUs cache */ |
192 | bzero(kperf_tid_on_cpus, ncpus * sizeof(*kperf_tid_on_cpus)); |
193 | } |
194 | |
195 | /* |
196 | * IPI other cores only if the action has non-system samplers. |
197 | */ |
198 | if (kperf_action_has_non_system(timer->actionid)) { |
199 | /* |
200 | * If the core that's handling the timer is not scheduling |
201 | * threads, only run system samplers. |
202 | */ |
203 | system_only_self = kperf_mp_broadcast_other_running(timer); |
204 | } |
205 | kperf_sample_cpu(timer, true, system_only_self); |
206 | |
207 | /* release the pet thread? */ |
208 | if (ntimer == pet_timer_id) { |
209 | /* PET mode is responsible for rearming the timer */ |
210 | kperf_pet_fire_after(); |
211 | } else { |
212 | /* |
213 | * FIXME: Get the current time from elsewhere. The next |
214 | * timer's period now includes the time taken to reach this |
215 | * point. This causes a bias towards longer sampling periods |
216 | * than requested. |
217 | */ |
218 | kperf_timer_schedule(timer, mach_absolute_time()); |
219 | } |
220 | |
221 | deactivate: |
222 | timer->active = 0; |
223 | } |
224 | |
225 | /* program the timer from the PET thread */ |
226 | void |
227 | kperf_timer_pet_rearm(uint64_t elapsed_ticks) |
228 | { |
229 | struct kperf_timer *timer = NULL; |
230 | uint64_t period = 0; |
231 | uint64_t deadline; |
232 | |
233 | /* |
234 | * If the pet_timer_id is invalid, it has been disabled, so this should |
235 | * do nothing. |
236 | */ |
237 | if (pet_timer_id >= kperf_timerc) { |
238 | return; |
239 | } |
240 | |
241 | unsigned int status = kperf_sampling_status(); |
242 | /* do not reprogram the timer if it has been shutdown or sampling is off */ |
243 | if (status == KPERF_SAMPLING_OFF) { |
244 | BUF_INFO(PERF_PET_END, SAMPLE_OFF); |
245 | return; |
246 | } else if (status == KPERF_SAMPLING_SHUTDOWN) { |
247 | BUF_INFO(PERF_PET_END, SAMPLE_SHUTDOWN); |
248 | return; |
249 | } |
250 | |
251 | timer = &(kperf_timerv[pet_timer_id]); |
252 | |
253 | /* if we re-programmed the timer to zero, just drop it */ |
254 | if (!timer->period) { |
255 | return; |
256 | } |
257 | |
258 | /* subtract the time the pet sample took being careful not to underflow */ |
259 | if (timer->period > elapsed_ticks) { |
260 | period = timer->period - elapsed_ticks; |
261 | } |
262 | |
263 | /* make sure we don't set the next PET sample to happen too soon */ |
264 | if (period < min_period_pet_abstime) { |
265 | period = min_period_pet_abstime; |
266 | } |
267 | |
268 | /* we probably took so long in the PET thread, it makes sense to take |
269 | * the time again. |
270 | */ |
271 | deadline = mach_absolute_time() + period; |
272 | |
273 | BUF_INFO(PERF_PET_SCHED, timer->period, period, elapsed_ticks, deadline); |
274 | |
275 | /* re-schedule the timer, making sure we don't apply slop */ |
276 | timer_call_enter(&timer->tcall, deadline, TIMER_CALL_SYS_CRITICAL); |
277 | |
278 | return; |
279 | } |
280 | |
281 | /* turn on all the timers */ |
282 | void |
283 | kperf_timer_go(void) |
284 | { |
285 | /* get the PET thread going */ |
286 | if (pet_timer_id < kperf_timerc) { |
287 | kperf_pet_config(kperf_timerv[pet_timer_id].actionid); |
288 | } |
289 | |
290 | uint64_t now = mach_absolute_time(); |
291 | |
292 | for (unsigned int i = 0; i < kperf_timerc; i++) { |
293 | if (kperf_timerv[i].period == 0) { |
294 | continue; |
295 | } |
296 | |
297 | kperf_timer_schedule(&(kperf_timerv[i]), now); |
298 | } |
299 | } |
300 | |
301 | void |
302 | kperf_timer_stop(void) |
303 | { |
304 | for (unsigned int i = 0; i < kperf_timerc; i++) { |
305 | if (kperf_timerv[i].period == 0) { |
306 | continue; |
307 | } |
308 | |
309 | /* wait for the timer to stop */ |
310 | while (kperf_timerv[i].active); |
311 | |
312 | timer_call_cancel(&kperf_timerv[i].tcall); |
313 | } |
314 | |
315 | /* wait for PET to stop, too */ |
316 | kperf_pet_config(0); |
317 | } |
318 | |
319 | unsigned int |
320 | kperf_timer_get_petid(void) |
321 | { |
322 | return pet_timer_id; |
323 | } |
324 | |
325 | int |
326 | kperf_timer_set_petid(unsigned int timerid) |
327 | { |
328 | if (timerid < kperf_timerc) { |
329 | uint64_t min_period; |
330 | |
331 | min_period = kperf_timer_min_pet_period_abstime(); |
332 | if (kperf_timerv[timerid].period < min_period) { |
333 | kperf_timerv[timerid].period = min_period; |
334 | } |
335 | kperf_pet_config(kperf_timerv[timerid].actionid); |
336 | } else { |
337 | /* clear the PET trigger if it's a bogus ID */ |
338 | kperf_pet_config(0); |
339 | } |
340 | |
341 | pet_timer_id = timerid; |
342 | |
343 | return 0; |
344 | } |
345 | |
346 | int |
347 | kperf_timer_get_period(unsigned int timerid, uint64_t *period_abstime) |
348 | { |
349 | if (timerid >= kperf_timerc) { |
350 | return EINVAL; |
351 | } |
352 | |
353 | *period_abstime = kperf_timerv[timerid].period; |
354 | return 0; |
355 | } |
356 | |
357 | int |
358 | kperf_timer_set_period(unsigned int timerid, uint64_t period_abstime) |
359 | { |
360 | uint64_t min_period; |
361 | |
362 | if (timerid >= kperf_timerc) { |
363 | return EINVAL; |
364 | } |
365 | |
366 | if (pet_timer_id == timerid) { |
367 | min_period = kperf_timer_min_pet_period_abstime(); |
368 | } else { |
369 | min_period = kperf_timer_min_period_abstime(); |
370 | } |
371 | |
372 | if (period_abstime > 0 && period_abstime < min_period) { |
373 | period_abstime = min_period; |
374 | } |
375 | |
376 | kperf_timerv[timerid].period = period_abstime; |
377 | |
378 | /* FIXME: re-program running timers? */ |
379 | |
380 | return 0; |
381 | } |
382 | |
383 | int |
384 | kperf_timer_get_action(unsigned int timerid, uint32_t *action) |
385 | { |
386 | if (timerid >= kperf_timerc) { |
387 | return EINVAL; |
388 | } |
389 | |
390 | *action = kperf_timerv[timerid].actionid; |
391 | return 0; |
392 | } |
393 | |
394 | int |
395 | kperf_timer_set_action(unsigned int timerid, uint32_t action) |
396 | { |
397 | if (timerid >= kperf_timerc) { |
398 | return EINVAL; |
399 | } |
400 | |
401 | kperf_timerv[timerid].actionid = action; |
402 | return 0; |
403 | } |
404 | |
405 | unsigned int |
406 | kperf_timer_get_count(void) |
407 | { |
408 | return kperf_timerc; |
409 | } |
410 | |
411 | void |
412 | kperf_timer_reset(void) |
413 | { |
414 | kperf_timer_set_petid(999); |
415 | kperf_set_pet_idle_rate(KPERF_PET_DEFAULT_IDLE_RATE); |
416 | kperf_set_lightweight_pet(0); |
417 | for (unsigned int i = 0; i < kperf_timerc; i++) { |
418 | kperf_timerv[i].period = 0; |
419 | kperf_timerv[i].actionid = 0; |
420 | kperf_timerv[i].pending_cpus = 0; |
421 | } |
422 | } |
423 | |
424 | extern int |
425 | kperf_timer_set_count(unsigned int count) |
426 | { |
427 | struct kperf_timer *new_timerv = NULL, *old_timerv = NULL; |
428 | unsigned int old_count; |
429 | |
430 | if (min_period_abstime == 0) { |
431 | nanoseconds_to_absolutetime(KP_MIN_PERIOD_NS, &min_period_abstime); |
432 | nanoseconds_to_absolutetime(KP_MIN_PERIOD_BG_NS, &min_period_bg_abstime); |
433 | nanoseconds_to_absolutetime(KP_MIN_PERIOD_PET_NS, &min_period_pet_abstime); |
434 | nanoseconds_to_absolutetime(KP_MIN_PERIOD_PET_BG_NS, |
435 | &min_period_pet_bg_abstime); |
436 | assert(min_period_abstime > 0); |
437 | } |
438 | |
439 | if (count == kperf_timerc) { |
440 | return 0; |
441 | } |
442 | if (count > TIMER_MAX) { |
443 | return EINVAL; |
444 | } |
445 | |
446 | /* TODO: allow shrinking? */ |
447 | if (count < kperf_timerc) { |
448 | return EINVAL; |
449 | } |
450 | |
451 | /* |
452 | * Make sure kperf is initialized when creating the array for the first |
453 | * time. |
454 | */ |
455 | if (kperf_timerc == 0) { |
456 | int r; |
457 | |
458 | /* main kperf */ |
459 | if ((r = kperf_init())) { |
460 | return r; |
461 | } |
462 | } |
463 | |
464 | /* |
465 | * Shut down any running timers since we will be messing with the timer |
466 | * call structures. |
467 | */ |
468 | kperf_timer_stop(); |
469 | |
470 | /* create a new array */ |
471 | new_timerv = kalloc_tag(count * sizeof(struct kperf_timer), |
472 | VM_KERN_MEMORY_DIAG); |
473 | if (new_timerv == NULL) { |
474 | return ENOMEM; |
475 | } |
476 | old_timerv = kperf_timerv; |
477 | old_count = kperf_timerc; |
478 | |
479 | if (old_timerv != NULL) { |
480 | bcopy(kperf_timerv, new_timerv, |
481 | kperf_timerc * sizeof(struct kperf_timer)); |
482 | } |
483 | |
484 | /* zero the new entries */ |
485 | bzero(&(new_timerv[kperf_timerc]), |
486 | (count - old_count) * sizeof(struct kperf_timer)); |
487 | |
488 | /* (re-)setup the timer call info for all entries */ |
489 | for (unsigned int i = 0; i < count; i++) { |
490 | timer_call_setup(&new_timerv[i].tcall, kperf_timer_handler, &new_timerv[i]); |
491 | } |
492 | |
493 | kperf_timerv = new_timerv; |
494 | kperf_timerc = count; |
495 | |
496 | if (old_timerv != NULL) { |
497 | kfree(old_timerv, old_count * sizeof(struct kperf_timer)); |
498 | } |
499 | |
500 | return 0; |
501 | } |
502 | |