1// Copyright (c) 2021-2023 Apple Inc. All rights reserved.
2//
3// @APPLE_OSREFERENCE_LICENSE_HEADER_START@
4//
5// This file contains Original Code and/or Modifications of Original Code
6// as defined in and that are subject to the Apple Public Source License
7// Version 2.0 (the 'License'). You may not use this file except in
8// compliance with the License. The rights granted to you under the License
9// may not be used to create, or enable the creation or redistribution of,
10// unlawful or unlicensed copies of an Apple operating system, or to
11// circumvent, violate, or enable the circumvention or violation of, any
12// terms of an Apple operating system software license agreement.
13//
14// Please obtain a copy of the License at
15// http://www.opensource.apple.com/apsl/ and read it before using this file.
16//
17// The Original Code and all software distributed under the License are
18// distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19// EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
20// INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
21// FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
22// Please see the License for the specific language governing rights and
23// limitations under the License.
24//
25// @APPLE_OSREFERENCE_LICENSE_HEADER_END@
26
27#ifndef KERN_RECOUNT_H
28#define KERN_RECOUNT_H
29
30#include <os/base.h>
31#include <stdbool.h>
32#include <stdint.h>
33#include <sys/cdefs.h>
34#include <sys/_types/_size_t.h>
35
36#if CONFIG_SPTM
37// Track counters in secure execution contexts when the SPTM is available.
38#define RECOUNT_SECURE_METRICS 1
39#else // CONFIG_SPTM
40#define RECOUNT_SECURE_METRICS 0
41#endif // !CONFIG_SPTM
42
43#if __arm64__
44// Only ARM64 keeps precise track of user/system based on thread state.
45#define RECOUNT_THREAD_BASED_LEVEL 1
46#else // __arm64__
47#define RECOUNT_THREAD_BASED_LEVEL 0
48#endif // !__arm64__
49
50__BEGIN_DECLS;
51
52// Recount maintains counters for resources used by software, like CPU time and cycles.
53// These counters are tracked at different levels of granularity depending on what execution bucket they're tracked in.
54// For instance, while threads only differentiate on the broad CPU kinds due to memory constraints,
55// the fewer number of tasks are free to use more memory and accumulate counters per-CPU.
56//
57// At context-switch, the scheduler calls `recount_switch_thread` to update the counters.
58// The difference between the current counter values and per-CPU snapshots are added to each thread.
59// On modern systems with fast timebase reads, the counters are also updated on entering and exiting the kernel.
60
61#pragma mark - config
62
63// A domain of the system's CPU topology, used as granularity when tracking counter values.
64__enum_decl(recount_topo_t, unsigned int, {
65 // Attribute counts to the entire system, i.e. only a single counter.
66 // Note that mutual exclusion must be provided to update this kind of counter.
67 RCT_TOPO_SYSTEM,
68 // Attribute counts to the CPU they accumulated on.
69 // Mutual exclusion is not required to update this counter, but preemption must be disabled.
70 RCT_TOPO_CPU,
71 // Attribute counts to the CPU kind (e.g. P or E).
72 // Note that mutual exclusion must be provided to update this kind of counter.
73 RCT_TOPO_CPU_KIND,
74 // The number of different topographies.
75 RCT_TOPO_COUNT,
76});
77
78// Get the number of elements in an array for per-topography data.
79size_t recount_topo_count(recount_topo_t topo);
80
81// Recount's definitions of CPU kinds, in lieu of one from the platform layers.
82__enum_decl(recount_cpu_kind_t, unsigned int, {
83 RCT_CPU_EFFICIENCY,
84 RCT_CPU_PERFORMANCE,
85 RCT_CPU_KIND_COUNT,
86});
87
88// A `recount_plan` structure controls the granularity of counting for a set of tracks and must be consulted when updating their counters.
89typedef const struct recount_plan {
90 const char *rpl_name;
91 recount_topo_t rpl_topo;
92} *recount_plan_t;
93
94#define RECOUNT_PLAN_DECLARE(_name) \
95 extern const struct recount_plan _name;
96
97#define RECOUNT_PLAN_DEFINE(_name, _topo) \
98 const struct recount_plan _name = { \
99 .rpl_name = #_name, \
100 .rpl_topo = _topo, \
101 }
102
103// Represents exception levels that Recount can track metrics during.
104__enum_closed_decl(recount_level_t, unsigned int, {
105 // Exception level is transitioning from the kernel.
106 // Must be first, as this is the initial state.
107 RCT_LVL_KERNEL,
108 // Exception level is transitioning from user space.
109 RCT_LVL_USER,
110#if RECOUNT_SECURE_METRICS
111 // Exception level is transitioning from secure execution.
112 RCT_LVL_SECURE,
113#endif // RECOUNT_SECURE_METRICS
114 RCT_LVL_COUNT,
115});
116
117// The current objects with resource accounting policies.
118RECOUNT_PLAN_DECLARE(recount_thread_plan);
119RECOUNT_PLAN_DECLARE(recount_task_plan);
120RECOUNT_PLAN_DECLARE(recount_task_terminated_plan);
121RECOUNT_PLAN_DECLARE(recount_coalition_plan);
122RECOUNT_PLAN_DECLARE(recount_processor_plan);
123
124#pragma mark - generic accounting
125
126// A track is where counter values can be updated atomically for readers by a
127// single writer.
128struct recount_track {
129 // Used to synchronize updates so multiple values appear to be updated atomically.
130 uint32_t rt_pad;
131 uint32_t rt_sync;
132
133 // The CPU usage metrics currently supported by Recount.
134 struct recount_usage {
135 struct recount_metrics {
136 // Time tracking, in Mach timebase units.
137 uint64_t rm_time_mach;
138#if CONFIG_PERVASIVE_CPI
139 // CPU performance counter metrics, when available.
140 uint64_t rm_instructions;
141 uint64_t rm_cycles;
142#endif // CONFIG_PERVASIVE_CPI
143 } ru_metrics[RCT_LVL_COUNT];
144
145#if CONFIG_PERVASIVE_ENERGY
146 // CPU energy in nanojoules, when available.
147 // This is not a "metric" because it is sampled out-of-band by ApplePMGR through CLPC.
148 uint64_t ru_energy_nj;
149#endif // CONFIG_PERVASIVE_ENERGY
150 } rt_usage;
151};
152
153// Memory management routines for tracks and usage structures.
154struct recount_track *recount_tracks_create(recount_plan_t plan);
155void recount_tracks_destroy(recount_plan_t plan, struct recount_track *tracks);
156struct recount_usage *recount_usage_alloc(recount_topo_t topo);
157void recount_usage_free(recount_topo_t topo, struct recount_usage *usage);
158
159// Attribute tracks to usage structures, to read their values for typical high-level interfaces.
160
161// Sum any tracks to a single sum.
162void recount_sum(recount_plan_t plan, const struct recount_track *tracks,
163 struct recount_usage *sum);
164
165// Summarize tracks into a total sum and another for a particular CPU kind.
166void recount_sum_and_isolate_cpu_kind(recount_plan_t plan,
167 struct recount_track *tracks, recount_cpu_kind_t kind,
168 struct recount_usage *sum, struct recount_usage *only_kind);
169// The same as above, but for usage-only objects, like coalitions.
170void recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan,
171 struct recount_usage *usage_list, recount_cpu_kind_t kind,
172 struct recount_usage *sum, struct recount_usage *only_kind);
173
174// Sum the counters for each perf-level, in the order returned by the sysctls.
175void recount_sum_perf_levels(recount_plan_t plan,
176 struct recount_track *tracks, struct recount_usage *sums);
177
178#pragma mark - xnu internals
179
180#if XNU_KERNEL_PRIVATE
181
182struct thread;
183struct work_interval;
184struct task;
185struct proc;
186
187// A smaller usage structure if only times are needed by a client.
188struct recount_times_mach {
189 uint64_t rtm_user;
190 uint64_t rtm_system;
191};
192
193struct recount_times_mach recount_usage_times_mach(struct recount_usage *usage);
194uint64_t recount_usage_system_time_mach(struct recount_usage *usage);
195uint64_t recount_usage_time_mach(struct recount_usage *usage);
196uint64_t recount_usage_cycles(struct recount_usage *usage);
197uint64_t recount_usage_instructions(struct recount_usage *usage);
198
199// Access another thread's usage data.
200void recount_thread_usage(struct thread *thread, struct recount_usage *usage);
201void recount_thread_perf_level_usage(struct thread *thread,
202 struct recount_usage *usage_levels);
203uint64_t recount_thread_time_mach(struct thread *thread);
204struct recount_times_mach recount_thread_times(struct thread *thread);
205
206// Read the current thread's usage data, accumulating counts until now.
207//
208// Interrupts must be disabled.
209void recount_current_thread_usage(struct recount_usage *usage);
210struct recount_times_mach recount_current_thread_times(void);
211void recount_current_thread_usage_perf_only(struct recount_usage *usage,
212 struct recount_usage *usage_perf_only);
213void recount_current_thread_perf_level_usage(struct recount_usage
214 *usage_levels);
215uint64_t recount_current_thread_time_mach(void);
216uint64_t recount_current_thread_user_time_mach(void);
217uint64_t recount_current_thread_interrupt_time_mach(void);
218uint64_t recount_current_thread_energy_nj(void);
219void recount_current_task_usage(struct recount_usage *usage);
220void recount_current_task_usage_perf_only(struct recount_usage *usage,
221 struct recount_usage *usage_perf_only);
222
223// Access a work interval's usage data.
224void recount_work_interval_usage(struct work_interval *work_interval, struct recount_usage *usage);
225struct recount_times_mach recount_work_interval_times(struct work_interval *work_interval);
226uint64_t recount_work_interval_energy_nj(struct work_interval *work_interval);
227
228// Access another task's usage data.
229void recount_task_usage(struct task *task, struct recount_usage *usage);
230struct recount_times_mach recount_task_times(struct task *task);
231void recount_task_usage_perf_only(struct task *task, struct recount_usage *sum,
232 struct recount_usage *sum_perf_only);
233void recount_task_times_perf_only(struct task *task,
234 struct recount_times_mach *sum, struct recount_times_mach *sum_perf_only);
235uint64_t recount_task_energy_nj(struct task *task);
236bool recount_task_thread_perf_level_usage(struct task *task, uint64_t tid,
237 struct recount_usage *usage_levels);
238
239// Get the sum of all terminated threads in the task (not including active threads).
240void recount_task_terminated_usage(struct task *task,
241 struct recount_usage *sum);
242struct recount_times_mach recount_task_terminated_times(struct task *task);
243void recount_task_terminated_usage_perf_only(struct task *task,
244 struct recount_usage *sum, struct recount_usage *perf_only);
245
246int proc_pidthreadcounts(struct proc *p, uint64_t thuniqueid, user_addr_t uaddr,
247 size_t usize, int *ret);
248
249#endif // XNU_KERNEL_PRIVATE
250
251#if MACH_KERNEL_PRIVATE
252
253#include <kern/smp.h>
254#include <mach/machine/thread_status.h>
255#include <machine/machine_routines.h>
256
257#if __arm64__
258static_assert((RCT_CPU_EFFICIENCY > RCT_CPU_PERFORMANCE) ==
259 (CLUSTER_TYPE_E > CLUSTER_TYPE_P));
260#endif // __arm64__
261
262#pragma mark threads
263
264// The per-thread resource accounting structure.
265struct recount_thread {
266 // Resources consumed across the lifetime of the thread, according to
267 // `recount_thread_plan`.
268 struct recount_track *rth_lifetime;
269 // Time spent by this thread running interrupt handlers.
270 uint64_t rth_interrupt_time_mach;
271#if RECOUNT_THREAD_BASED_LEVEL
272 // The current level this thread is executing in.
273 recount_level_t rth_current_level;
274#endif // RECOUNT_THREAD_BASED_LEVEL
275};
276void recount_thread_init(struct recount_thread *th);
277void recount_thread_copy(struct recount_thread *dst,
278 struct recount_thread *src);
279void recount_thread_deinit(struct recount_thread *th);
280
281#pragma mark work_intervals
282
283// The per-work-interval resource accounting structure.
284struct recount_work_interval {
285 // Resources consumed during the currently active work interval instance by
286 // threads participating in the work interval, according to `recount_work_interval_plan`.
287 struct recount_track *rwi_current_instance;
288};
289void recount_work_interval_init(struct recount_work_interval *wi);
290void recount_work_interval_deinit(struct recount_work_interval *wi);
291
292#pragma mark tasks
293
294// The per-task resource accounting structure.
295struct recount_task {
296 // Resources consumed across the lifetime of the task, including active
297 // threads, according to `recount_task_plan`.
298 //
299 // The `recount_task_plan` must be per-CPU to provide mutual exclusion for
300 // writers.
301 struct recount_track *rtk_lifetime;
302 // Usage from threads that have terminated or child tasks that have exited,
303 // according to `recount_task_terminated_plan`.
304 //
305 // Protected by the task lock when threads terminate.
306 struct recount_usage *rtk_terminated;
307};
308void recount_task_init(struct recount_task *tk);
309// Called on tasks that are moving their accounting information to a
310// synthetic or re-exec-ed task.
311void recount_task_copy(struct recount_task *dst,
312 const struct recount_task *src);
313void recount_task_deinit(struct recount_task *tk);
314
315#pragma mark coalitions
316
317// The per-coalition resource accounting structure.
318struct recount_coalition {
319 // Resources consumed by exited tasks only, according to
320 // `recount_coalition_plan`.
321 //
322 // Protected by the coalition lock when tasks exit and roll-up their
323 // statistics.
324 struct recount_usage *rco_exited;
325};
326void recount_coalition_init(struct recount_coalition *co);
327void recount_coalition_deinit(struct recount_coalition *co);
328
329// Get the sum of all currently-exited tasks in the coalition, and a separate P-only structure.
330void recount_coalition_usage_perf_only(struct recount_coalition *coal,
331 struct recount_usage *sum, struct recount_usage *sum_perf_only);
332
333#pragma mark processors
334
335struct processor;
336
337// A snap records counter values at a specific point in time.
338struct recount_snap {
339 uint64_t rsn_time_mach;
340#if CONFIG_PERVASIVE_CPI
341 uint64_t rsn_insns;
342 uint64_t rsn_cycles;
343#endif // CONFIG_PERVASIVE_CPI
344};
345
346// The per-processor resource accounting structure.
347struct recount_processor {
348 struct recount_snap rpr_snap;
349 struct recount_track rpr_active;
350 struct recount_snap rpr_interrupt_snap;
351#if MACH_ASSERT
352 recount_level_t rpr_current_level;
353#endif // MACH_ASSERT
354 uint64_t rpr_interrupt_time_mach;
355 uint64_t rpr_idle_time_mach;
356 _Atomic uint64_t rpr_state_last_abs_time;
357#if __AMP__
358 // Cache the RCT_TOPO_CPU_KIND offset, which cannot change.
359 uint8_t rpr_cpu_kind_index;
360#endif // __AMP__
361};
362void recount_processor_init(struct processor *processor);
363
364// Get a snapshot of the processor's usage, along with an up-to-date snapshot
365// of its idle time (to now if the processor is currently idle).
366void recount_processor_usage(struct recount_processor *pr,
367 struct recount_usage *usage, uint64_t *idle_time_mach);
368
369// Get the current amount of time spent handling interrupts by the current
370// processor.
371uint64_t recount_current_processor_interrupt_time_mach(void);
372
373#pragma mark updates
374
375// The following interfaces are meant for specific adopters, like the
376// scheduler or platform code responsible for entering and exiting the kernel.
377
378// Fill in a snap with the current values from time- and count-keeping hardware.
379void recount_snapshot(struct recount_snap *snap);
380
381// During user/kernel transitions, other serializing events provide enough
382// serialization around reading the counter values.
383void recount_snapshot_speculative(struct recount_snap *snap);
384
385// Called by the scheduler when a context switch occurs.
386void recount_switch_thread(struct recount_snap *snap, struct thread *off_thread,
387 struct task *off_task);
388// Called by the machine-dependent code to accumulate energy.
389void recount_add_energy(struct thread *off_thread, struct task *off_task,
390 uint64_t energy_nj);
391// Log a kdebug event when a thread switches off-CPU.
392void recount_log_switch_thread(const struct recount_snap *snap);
393// Log a kdebug event when a thread switches on-CPU.
394void recount_log_switch_thread_on(const struct recount_snap *snap);
395
396// This function requires that no writers race with it -- this is only safe in
397// debugger context or while running in the context of the track being
398// inspected.
399void recount_sum_unsafe(recount_plan_t plan, const struct recount_track *tracks,
400 struct recount_usage *sum);
401
402// For handling precise user/kernel time updates.
403void recount_leave_user(void);
404void recount_enter_user(void);
405// For handling interrupt time updates.
406void recount_enter_interrupt(void);
407void recount_leave_interrupt(void);
408#if __x86_64__
409// Handle interrupt time-keeping on Intel, which aren't unified with the trap
410// handlers, so whether the user or system timers are updated depends on the
411// save-state.
412void recount_enter_intel_interrupt(x86_saved_state_t *state);
413void recount_leave_intel_interrupt(void);
414#endif // __x86_64__
415
416#endif // MACH_KERNEL_PRIVATE
417
418#if XNU_KERNEL_PRIVATE
419
420#if RECOUNT_SECURE_METRICS
421// Handle guarded mode updates.
422void recount_enter_secure(void);
423void recount_leave_secure(void);
424#endif // RECOUNT_SECURE_METRICS
425
426#endif // XNU_KERNEL_PRIVATE
427
428#if MACH_KERNEL_PRIVATE
429
430// Hooks for each processor idling, running, and onlining.
431void recount_processor_idle(struct recount_processor *pr,
432 struct recount_snap *snap);
433void recount_processor_run(struct recount_processor *pr,
434 struct recount_snap *snap);
435void recount_processor_online(processor_t processor, struct recount_snap *snap);
436
437#pragma mark rollups
438
439// Called by the thread termination queue with the task lock held.
440void recount_task_rollup_thread(struct recount_task *tk,
441 const struct recount_thread *th);
442
443// Called by the coalition roll-up statistics functions with coalition lock
444// held.
445void recount_coalition_rollup_task(struct recount_coalition *co,
446 struct recount_task *tk);
447
448#endif // MACH_KERNEL_PRIVATE
449
450__END_DECLS
451
452#endif // KERN_RECOUNT_H
453