1 | // Copyright (c) 2021-2023 Apple Inc. All rights reserved. |
2 | // |
3 | // @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
4 | // |
5 | // This file contains Original Code and/or Modifications of Original Code |
6 | // as defined in and that are subject to the Apple Public Source License |
7 | // Version 2.0 (the 'License'). You may not use this file except in |
8 | // compliance with the License. The rights granted to you under the License |
9 | // may not be used to create, or enable the creation or redistribution of, |
10 | // unlawful or unlicensed copies of an Apple operating system, or to |
11 | // circumvent, violate, or enable the circumvention or violation of, any |
12 | // terms of an Apple operating system software license agreement. |
13 | // |
14 | // Please obtain a copy of the License at |
15 | // http://www.opensource.apple.com/apsl/ and read it before using this file. |
16 | // |
17 | // The Original Code and all software distributed under the License are |
18 | // distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
19 | // EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
20 | // INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
21 | // FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
22 | // Please see the License for the specific language governing rights and |
23 | // limitations under the License. |
24 | // |
25 | // @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
26 | |
27 | #ifndef KERN_RECOUNT_H |
28 | #define KERN_RECOUNT_H |
29 | |
30 | #include <os/base.h> |
31 | #include <stdbool.h> |
32 | #include <stdint.h> |
33 | #include <sys/cdefs.h> |
34 | #include <sys/_types/_size_t.h> |
35 | |
36 | #if CONFIG_SPTM |
37 | // Track counters in secure execution contexts when the SPTM is available. |
38 | #define RECOUNT_SECURE_METRICS 1 |
39 | #else // CONFIG_SPTM |
40 | #define RECOUNT_SECURE_METRICS 0 |
41 | #endif // !CONFIG_SPTM |
42 | |
43 | #if __arm64__ |
44 | // Only ARM64 keeps precise track of user/system based on thread state. |
45 | #define RECOUNT_THREAD_BASED_LEVEL 1 |
46 | #else // __arm64__ |
47 | #define RECOUNT_THREAD_BASED_LEVEL 0 |
48 | #endif // !__arm64__ |
49 | |
50 | __BEGIN_DECLS; |
51 | |
52 | // Recount maintains counters for resources used by software, like CPU time and cycles. |
53 | // These counters are tracked at different levels of granularity depending on what execution bucket they're tracked in. |
54 | // For instance, while threads only differentiate on the broad CPU kinds due to memory constraints, |
55 | // the fewer number of tasks are free to use more memory and accumulate counters per-CPU. |
56 | // |
57 | // At context-switch, the scheduler calls `recount_switch_thread` to update the counters. |
58 | // The difference between the current counter values and per-CPU snapshots are added to each thread. |
59 | // On modern systems with fast timebase reads, the counters are also updated on entering and exiting the kernel. |
60 | |
61 | #pragma mark - config |
62 | |
63 | // A domain of the system's CPU topology, used as granularity when tracking counter values. |
64 | __enum_decl(recount_topo_t, unsigned int, { |
65 | // Attribute counts to the entire system, i.e. only a single counter. |
66 | // Note that mutual exclusion must be provided to update this kind of counter. |
67 | RCT_TOPO_SYSTEM, |
68 | // Attribute counts to the CPU they accumulated on. |
69 | // Mutual exclusion is not required to update this counter, but preemption must be disabled. |
70 | RCT_TOPO_CPU, |
71 | // Attribute counts to the CPU kind (e.g. P or E). |
72 | // Note that mutual exclusion must be provided to update this kind of counter. |
73 | RCT_TOPO_CPU_KIND, |
74 | // The number of different topographies. |
75 | RCT_TOPO_COUNT, |
76 | }); |
77 | |
78 | // Get the number of elements in an array for per-topography data. |
79 | size_t recount_topo_count(recount_topo_t topo); |
80 | |
81 | // Recount's definitions of CPU kinds, in lieu of one from the platform layers. |
82 | __enum_decl(recount_cpu_kind_t, unsigned int, { |
83 | RCT_CPU_EFFICIENCY, |
84 | RCT_CPU_PERFORMANCE, |
85 | RCT_CPU_KIND_COUNT, |
86 | }); |
87 | |
88 | // A `recount_plan` structure controls the granularity of counting for a set of tracks and must be consulted when updating their counters. |
89 | typedef const struct recount_plan { |
90 | const char *rpl_name; |
91 | recount_topo_t rpl_topo; |
92 | } *recount_plan_t; |
93 | |
94 | #define RECOUNT_PLAN_DECLARE(_name) \ |
95 | extern const struct recount_plan _name; |
96 | |
97 | #define RECOUNT_PLAN_DEFINE(_name, _topo) \ |
98 | const struct recount_plan _name = { \ |
99 | .rpl_name = #_name, \ |
100 | .rpl_topo = _topo, \ |
101 | } |
102 | |
103 | // Represents exception levels that Recount can track metrics during. |
104 | __enum_closed_decl(recount_level_t, unsigned int, { |
105 | // Exception level is transitioning from the kernel. |
106 | // Must be first, as this is the initial state. |
107 | RCT_LVL_KERNEL, |
108 | // Exception level is transitioning from user space. |
109 | RCT_LVL_USER, |
110 | #if RECOUNT_SECURE_METRICS |
111 | // Exception level is transitioning from secure execution. |
112 | RCT_LVL_SECURE, |
113 | #endif // RECOUNT_SECURE_METRICS |
114 | RCT_LVL_COUNT, |
115 | }); |
116 | |
117 | // The current objects with resource accounting policies. |
118 | RECOUNT_PLAN_DECLARE(recount_thread_plan); |
119 | RECOUNT_PLAN_DECLARE(recount_task_plan); |
120 | RECOUNT_PLAN_DECLARE(recount_task_terminated_plan); |
121 | RECOUNT_PLAN_DECLARE(recount_coalition_plan); |
122 | RECOUNT_PLAN_DECLARE(recount_processor_plan); |
123 | |
124 | #pragma mark - generic accounting |
125 | |
126 | // A track is where counter values can be updated atomically for readers by a |
127 | // single writer. |
128 | struct recount_track { |
129 | // Used to synchronize updates so multiple values appear to be updated atomically. |
130 | uint32_t rt_pad; |
131 | uint32_t rt_sync; |
132 | |
133 | // The CPU usage metrics currently supported by Recount. |
134 | struct recount_usage { |
135 | struct recount_metrics { |
136 | // Time tracking, in Mach timebase units. |
137 | uint64_t rm_time_mach; |
138 | #if CONFIG_PERVASIVE_CPI |
139 | // CPU performance counter metrics, when available. |
140 | uint64_t rm_instructions; |
141 | uint64_t rm_cycles; |
142 | #endif // CONFIG_PERVASIVE_CPI |
143 | } ru_metrics[RCT_LVL_COUNT]; |
144 | |
145 | #if CONFIG_PERVASIVE_ENERGY |
146 | // CPU energy in nanojoules, when available. |
147 | // This is not a "metric" because it is sampled out-of-band by ApplePMGR through CLPC. |
148 | uint64_t ru_energy_nj; |
149 | #endif // CONFIG_PERVASIVE_ENERGY |
150 | } rt_usage; |
151 | }; |
152 | |
153 | // Memory management routines for tracks and usage structures. |
154 | struct recount_track *recount_tracks_create(recount_plan_t plan); |
155 | void recount_tracks_destroy(recount_plan_t plan, struct recount_track *tracks); |
156 | struct recount_usage *recount_usage_alloc(recount_topo_t topo); |
157 | void recount_usage_free(recount_topo_t topo, struct recount_usage *usage); |
158 | |
159 | // Attribute tracks to usage structures, to read their values for typical high-level interfaces. |
160 | |
161 | // Sum any tracks to a single sum. |
162 | void recount_sum(recount_plan_t plan, const struct recount_track *tracks, |
163 | struct recount_usage *sum); |
164 | |
165 | // Summarize tracks into a total sum and another for a particular CPU kind. |
166 | void recount_sum_and_isolate_cpu_kind(recount_plan_t plan, |
167 | struct recount_track *tracks, recount_cpu_kind_t kind, |
168 | struct recount_usage *sum, struct recount_usage *only_kind); |
169 | // The same as above, but for usage-only objects, like coalitions. |
170 | void recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan, |
171 | struct recount_usage *usage_list, recount_cpu_kind_t kind, |
172 | struct recount_usage *sum, struct recount_usage *only_kind); |
173 | |
174 | // Sum the counters for each perf-level, in the order returned by the sysctls. |
175 | void recount_sum_perf_levels(recount_plan_t plan, |
176 | struct recount_track *tracks, struct recount_usage *sums); |
177 | |
178 | #pragma mark - xnu internals |
179 | |
180 | #if XNU_KERNEL_PRIVATE |
181 | |
182 | struct thread; |
183 | struct work_interval; |
184 | struct task; |
185 | struct proc; |
186 | |
187 | // A smaller usage structure if only times are needed by a client. |
188 | struct recount_times_mach { |
189 | uint64_t rtm_user; |
190 | uint64_t rtm_system; |
191 | }; |
192 | |
193 | struct recount_times_mach recount_usage_times_mach(struct recount_usage *usage); |
194 | uint64_t recount_usage_system_time_mach(struct recount_usage *usage); |
195 | uint64_t recount_usage_time_mach(struct recount_usage *usage); |
196 | uint64_t recount_usage_cycles(struct recount_usage *usage); |
197 | uint64_t recount_usage_instructions(struct recount_usage *usage); |
198 | |
199 | // Access another thread's usage data. |
200 | void recount_thread_usage(struct thread *thread, struct recount_usage *usage); |
201 | void recount_thread_perf_level_usage(struct thread *thread, |
202 | struct recount_usage *usage_levels); |
203 | uint64_t recount_thread_time_mach(struct thread *thread); |
204 | struct recount_times_mach recount_thread_times(struct thread *thread); |
205 | |
206 | // Read the current thread's usage data, accumulating counts until now. |
207 | // |
208 | // Interrupts must be disabled. |
209 | void recount_current_thread_usage(struct recount_usage *usage); |
210 | struct recount_times_mach recount_current_thread_times(void); |
211 | void recount_current_thread_usage_perf_only(struct recount_usage *usage, |
212 | struct recount_usage *usage_perf_only); |
213 | void recount_current_thread_perf_level_usage(struct recount_usage |
214 | *usage_levels); |
215 | uint64_t recount_current_thread_time_mach(void); |
216 | uint64_t recount_current_thread_user_time_mach(void); |
217 | uint64_t recount_current_thread_interrupt_time_mach(void); |
218 | uint64_t recount_current_thread_energy_nj(void); |
219 | void recount_current_task_usage(struct recount_usage *usage); |
220 | void recount_current_task_usage_perf_only(struct recount_usage *usage, |
221 | struct recount_usage *usage_perf_only); |
222 | |
223 | // Access a work interval's usage data. |
224 | void recount_work_interval_usage(struct work_interval *work_interval, struct recount_usage *usage); |
225 | struct recount_times_mach recount_work_interval_times(struct work_interval *work_interval); |
226 | uint64_t recount_work_interval_energy_nj(struct work_interval *work_interval); |
227 | |
228 | // Access another task's usage data. |
229 | void recount_task_usage(struct task *task, struct recount_usage *usage); |
230 | struct recount_times_mach recount_task_times(struct task *task); |
231 | void recount_task_usage_perf_only(struct task *task, struct recount_usage *sum, |
232 | struct recount_usage *sum_perf_only); |
233 | void recount_task_times_perf_only(struct task *task, |
234 | struct recount_times_mach *sum, struct recount_times_mach *sum_perf_only); |
235 | uint64_t recount_task_energy_nj(struct task *task); |
236 | bool recount_task_thread_perf_level_usage(struct task *task, uint64_t tid, |
237 | struct recount_usage *usage_levels); |
238 | |
239 | // Get the sum of all terminated threads in the task (not including active threads). |
240 | void recount_task_terminated_usage(struct task *task, |
241 | struct recount_usage *sum); |
242 | struct recount_times_mach recount_task_terminated_times(struct task *task); |
243 | void recount_task_terminated_usage_perf_only(struct task *task, |
244 | struct recount_usage *sum, struct recount_usage *perf_only); |
245 | |
246 | int proc_pidthreadcounts(struct proc *p, uint64_t thuniqueid, user_addr_t uaddr, |
247 | size_t usize, int *ret); |
248 | |
249 | #endif // XNU_KERNEL_PRIVATE |
250 | |
251 | #if MACH_KERNEL_PRIVATE |
252 | |
253 | #include <kern/smp.h> |
254 | #include <mach/machine/thread_status.h> |
255 | #include <machine/machine_routines.h> |
256 | |
257 | #if __arm64__ |
258 | static_assert((RCT_CPU_EFFICIENCY > RCT_CPU_PERFORMANCE) == |
259 | (CLUSTER_TYPE_E > CLUSTER_TYPE_P)); |
260 | #endif // __arm64__ |
261 | |
262 | #pragma mark threads |
263 | |
264 | // The per-thread resource accounting structure. |
265 | struct recount_thread { |
266 | // Resources consumed across the lifetime of the thread, according to |
267 | // `recount_thread_plan`. |
268 | struct recount_track *rth_lifetime; |
269 | // Time spent by this thread running interrupt handlers. |
270 | uint64_t rth_interrupt_time_mach; |
271 | #if RECOUNT_THREAD_BASED_LEVEL |
272 | // The current level this thread is executing in. |
273 | recount_level_t rth_current_level; |
274 | #endif // RECOUNT_THREAD_BASED_LEVEL |
275 | }; |
276 | void recount_thread_init(struct recount_thread *th); |
277 | void recount_thread_copy(struct recount_thread *dst, |
278 | struct recount_thread *src); |
279 | void recount_thread_deinit(struct recount_thread *th); |
280 | |
281 | #pragma mark work_intervals |
282 | |
283 | // The per-work-interval resource accounting structure. |
284 | struct recount_work_interval { |
285 | // Resources consumed during the currently active work interval instance by |
286 | // threads participating in the work interval, according to `recount_work_interval_plan`. |
287 | struct recount_track *rwi_current_instance; |
288 | }; |
289 | void recount_work_interval_init(struct recount_work_interval *wi); |
290 | void recount_work_interval_deinit(struct recount_work_interval *wi); |
291 | |
292 | #pragma mark tasks |
293 | |
294 | // The per-task resource accounting structure. |
295 | struct recount_task { |
296 | // Resources consumed across the lifetime of the task, including active |
297 | // threads, according to `recount_task_plan`. |
298 | // |
299 | // The `recount_task_plan` must be per-CPU to provide mutual exclusion for |
300 | // writers. |
301 | struct recount_track *rtk_lifetime; |
302 | // Usage from threads that have terminated or child tasks that have exited, |
303 | // according to `recount_task_terminated_plan`. |
304 | // |
305 | // Protected by the task lock when threads terminate. |
306 | struct recount_usage *rtk_terminated; |
307 | }; |
308 | void recount_task_init(struct recount_task *tk); |
309 | // Called on tasks that are moving their accounting information to a |
310 | // synthetic or re-exec-ed task. |
311 | void recount_task_copy(struct recount_task *dst, |
312 | const struct recount_task *src); |
313 | void recount_task_deinit(struct recount_task *tk); |
314 | |
315 | #pragma mark coalitions |
316 | |
317 | // The per-coalition resource accounting structure. |
318 | struct recount_coalition { |
319 | // Resources consumed by exited tasks only, according to |
320 | // `recount_coalition_plan`. |
321 | // |
322 | // Protected by the coalition lock when tasks exit and roll-up their |
323 | // statistics. |
324 | struct recount_usage *rco_exited; |
325 | }; |
326 | void recount_coalition_init(struct recount_coalition *co); |
327 | void recount_coalition_deinit(struct recount_coalition *co); |
328 | |
329 | // Get the sum of all currently-exited tasks in the coalition, and a separate P-only structure. |
330 | void recount_coalition_usage_perf_only(struct recount_coalition *coal, |
331 | struct recount_usage *sum, struct recount_usage *sum_perf_only); |
332 | |
333 | #pragma mark processors |
334 | |
335 | struct processor; |
336 | |
337 | // A snap records counter values at a specific point in time. |
338 | struct recount_snap { |
339 | uint64_t rsn_time_mach; |
340 | #if CONFIG_PERVASIVE_CPI |
341 | uint64_t rsn_insns; |
342 | uint64_t rsn_cycles; |
343 | #endif // CONFIG_PERVASIVE_CPI |
344 | }; |
345 | |
346 | // The per-processor resource accounting structure. |
347 | struct recount_processor { |
348 | struct recount_snap rpr_snap; |
349 | struct recount_track rpr_active; |
350 | struct recount_snap rpr_interrupt_snap; |
351 | #if MACH_ASSERT |
352 | recount_level_t rpr_current_level; |
353 | #endif // MACH_ASSERT |
354 | uint64_t rpr_interrupt_time_mach; |
355 | uint64_t rpr_idle_time_mach; |
356 | _Atomic uint64_t rpr_state_last_abs_time; |
357 | #if __AMP__ |
358 | // Cache the RCT_TOPO_CPU_KIND offset, which cannot change. |
359 | uint8_t rpr_cpu_kind_index; |
360 | #endif // __AMP__ |
361 | }; |
362 | void recount_processor_init(struct processor *processor); |
363 | |
364 | // Get a snapshot of the processor's usage, along with an up-to-date snapshot |
365 | // of its idle time (to now if the processor is currently idle). |
366 | void recount_processor_usage(struct recount_processor *pr, |
367 | struct recount_usage *usage, uint64_t *idle_time_mach); |
368 | |
369 | // Get the current amount of time spent handling interrupts by the current |
370 | // processor. |
371 | uint64_t recount_current_processor_interrupt_time_mach(void); |
372 | |
373 | #pragma mark updates |
374 | |
375 | // The following interfaces are meant for specific adopters, like the |
376 | // scheduler or platform code responsible for entering and exiting the kernel. |
377 | |
378 | // Fill in a snap with the current values from time- and count-keeping hardware. |
379 | void recount_snapshot(struct recount_snap *snap); |
380 | |
381 | // During user/kernel transitions, other serializing events provide enough |
382 | // serialization around reading the counter values. |
383 | void recount_snapshot_speculative(struct recount_snap *snap); |
384 | |
385 | // Called by the scheduler when a context switch occurs. |
386 | void recount_switch_thread(struct recount_snap *snap, struct thread *off_thread, |
387 | struct task *off_task); |
388 | // Called by the machine-dependent code to accumulate energy. |
389 | void recount_add_energy(struct thread *off_thread, struct task *off_task, |
390 | uint64_t energy_nj); |
391 | // Log a kdebug event when a thread switches off-CPU. |
392 | void recount_log_switch_thread(const struct recount_snap *snap); |
393 | // Log a kdebug event when a thread switches on-CPU. |
394 | void recount_log_switch_thread_on(const struct recount_snap *snap); |
395 | |
396 | // This function requires that no writers race with it -- this is only safe in |
397 | // debugger context or while running in the context of the track being |
398 | // inspected. |
399 | void recount_sum_unsafe(recount_plan_t plan, const struct recount_track *tracks, |
400 | struct recount_usage *sum); |
401 | |
402 | // For handling precise user/kernel time updates. |
403 | void recount_leave_user(void); |
404 | void recount_enter_user(void); |
405 | // For handling interrupt time updates. |
406 | void recount_enter_interrupt(void); |
407 | void recount_leave_interrupt(void); |
408 | #if __x86_64__ |
409 | // Handle interrupt time-keeping on Intel, which aren't unified with the trap |
410 | // handlers, so whether the user or system timers are updated depends on the |
411 | // save-state. |
412 | void recount_enter_intel_interrupt(x86_saved_state_t *state); |
413 | void recount_leave_intel_interrupt(void); |
414 | #endif // __x86_64__ |
415 | |
416 | #endif // MACH_KERNEL_PRIVATE |
417 | |
418 | #if XNU_KERNEL_PRIVATE |
419 | |
420 | #if RECOUNT_SECURE_METRICS |
421 | // Handle guarded mode updates. |
422 | void recount_enter_secure(void); |
423 | void recount_leave_secure(void); |
424 | #endif // RECOUNT_SECURE_METRICS |
425 | |
426 | #endif // XNU_KERNEL_PRIVATE |
427 | |
428 | #if MACH_KERNEL_PRIVATE |
429 | |
430 | // Hooks for each processor idling, running, and onlining. |
431 | void recount_processor_idle(struct recount_processor *pr, |
432 | struct recount_snap *snap); |
433 | void recount_processor_run(struct recount_processor *pr, |
434 | struct recount_snap *snap); |
435 | void recount_processor_online(processor_t processor, struct recount_snap *snap); |
436 | |
437 | #pragma mark rollups |
438 | |
439 | // Called by the thread termination queue with the task lock held. |
440 | void recount_task_rollup_thread(struct recount_task *tk, |
441 | const struct recount_thread *th); |
442 | |
443 | // Called by the coalition roll-up statistics functions with coalition lock |
444 | // held. |
445 | void recount_coalition_rollup_task(struct recount_coalition *co, |
446 | struct recount_task *tk); |
447 | |
448 | #endif // MACH_KERNEL_PRIVATE |
449 | |
450 | __END_DECLS |
451 | |
452 | #endif // KERN_RECOUNT_H |
453 | |