recount.h source code [xnu/osfmk/kern/recount.h]

1	// Copyright (c) 2021-2023 Apple Inc. All rights reserved.
2	//
3	// @APPLE_OSREFERENCE_LICENSE_HEADER_START@
4	//
5	// This file contains Original Code and/or Modifications of Original Code
6	// as defined in and that are subject to the Apple Public Source License
7	// Version 2.0 (the 'License'). You may not use this file except in
8	// compliance with the License. The rights granted to you under the License
9	// may not be used to create, or enable the creation or redistribution of,
10	// unlawful or unlicensed copies of an Apple operating system, or to
11	// circumvent, violate, or enable the circumvention or violation of, any
12	// terms of an Apple operating system software license agreement.
13	//
14	// Please obtain a copy of the License at
15	// http://www.opensource.apple.com/apsl/ and read it before using this file.
16	//
17	// The Original Code and all software distributed under the License are
18	// distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19	// EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
20	// INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
21	// FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
22	// Please see the License for the specific language governing rights and
23	// limitations under the License.
24	//
25	// @APPLE_OSREFERENCE_LICENSE_HEADER_END@
26
27	#ifndef KERN_RECOUNT_H
28	#define KERN_RECOUNT_H
29
30	#include <os/base.h>
31	#include <stdbool.h>
32	#include <stdint.h>
33	#include <sys/cdefs.h>
34	#include <sys/_types/_size_t.h>
35
36	#if CONFIG_SPTM
37	// Track counters in secure execution contexts when the SPTM is available.
38	#define RECOUNT_SECURE_METRICS 1
39	#else // CONFIG_SPTM
40	#define RECOUNT_SECURE_METRICS 0
41	#endif // !CONFIG_SPTM
42
43	#if __arm64__
44	// Only ARM64 keeps precise track of user/system based on thread state.
45	#define RECOUNT_THREAD_BASED_LEVEL 1
46	#else // __arm64__
47	#define RECOUNT_THREAD_BASED_LEVEL 0
48	#endif // !__arm64__
49
50	__BEGIN_DECLS;
51
52	// Recount maintains counters for resources used by software, like CPU time and cycles.
53	// These counters are tracked at different levels of granularity depending on what execution bucket they're tracked in.
54	// For instance, while threads only differentiate on the broad CPU kinds due to memory constraints,
55	// the fewer number of tasks are free to use more memory and accumulate counters per-CPU.
56	//
57	// At context-switch, the scheduler calls `recount_switch_thread` to update the counters.
58	// The difference between the current counter values and per-CPU snapshots are added to each thread.
59	// On modern systems with fast timebase reads, the counters are also updated on entering and exiting the kernel.
60
61	#pragma mark - config
62
63	// A domain of the system's CPU topology, used as granularity when tracking counter values.
64	__enum_decl(recount_topo_t, unsigned int, {
65	// Attribute counts to the entire system, i.e. only a single counter.
66	// Note that mutual exclusion must be provided to update this kind of counter.
67	RCT_TOPO_SYSTEM,
68	// Attribute counts to the CPU they accumulated on.
69	// Mutual exclusion is not required to update this counter, but preemption must be disabled.
70	RCT_TOPO_CPU,
71	// Attribute counts to the CPU kind (e.g. P or E).
72	// Note that mutual exclusion must be provided to update this kind of counter.
73	RCT_TOPO_CPU_KIND,
74	// The number of different topographies.
75	RCT_TOPO_COUNT,
76	});
77
78	// Get the number of elements in an array for per-topography data.
79	size_t recount_topo_count(recount_topo_t topo);
80
81	// Recount's definitions of CPU kinds, in lieu of one from the platform layers.
82	__enum_decl(recount_cpu_kind_t, unsigned int, {
83	RCT_CPU_EFFICIENCY,
84	RCT_CPU_PERFORMANCE,
85	RCT_CPU_KIND_COUNT,
86	});
87
88	// A `recount_plan` structure controls the granularity of counting for a set of tracks and must be consulted when updating their counters.
89	typedef const struct recount_plan {
90	const char *rpl_name;
91	recount_topo_t rpl_topo;
92	} *recount_plan_t;
93
94	#define RECOUNT_PLAN_DECLARE(_name) \
95	extern const struct recount_plan _name;
96
97	#define RECOUNT_PLAN_DEFINE(_name, _topo) \
98	const struct recount_plan _name = { \
99	.rpl_name = #_name, \
100	.rpl_topo = _topo, \
101	}
102
103	// Represents exception levels that Recount can track metrics during.
104	__enum_closed_decl(recount_level_t, unsigned int, {
105	// Exception level is transitioning from the kernel.
106	// Must be first, as this is the initial state.
107	RCT_LVL_KERNEL,
108	// Exception level is transitioning from user space.
109	RCT_LVL_USER,
110	#if RECOUNT_SECURE_METRICS
111	// Exception level is transitioning from secure execution.
112	RCT_LVL_SECURE,
113	#endif // RECOUNT_SECURE_METRICS
114	RCT_LVL_COUNT,
115	});
116
117	// The current objects with resource accounting policies.
118	RECOUNT_PLAN_DECLARE(recount_thread_plan);
119	RECOUNT_PLAN_DECLARE(recount_task_plan);
120	RECOUNT_PLAN_DECLARE(recount_task_terminated_plan);
121	RECOUNT_PLAN_DECLARE(recount_coalition_plan);
122	RECOUNT_PLAN_DECLARE(recount_processor_plan);
123
124	#pragma mark - generic accounting
125
126	// A track is where counter values can be updated atomically for readers by a
127	// single writer.
128	struct recount_track {
129	// Used to synchronize updates so multiple values appear to be updated atomically.
130	uint32_t rt_pad;
131	uint32_t rt_sync;
132
133	// The CPU usage metrics currently supported by Recount.
134	struct recount_usage {
135	struct recount_metrics {
136	// Time tracking, in Mach timebase units.
137	uint64_t rm_time_mach;
138	#if CONFIG_PERVASIVE_CPI
139	// CPU performance counter metrics, when available.
140	uint64_t rm_instructions;
141	uint64_t rm_cycles;
142	#endif // CONFIG_PERVASIVE_CPI
143	} ru_metrics[RCT_LVL_COUNT];
144
145	#if CONFIG_PERVASIVE_ENERGY
146	// CPU energy in nanojoules, when available.
147	// This is not a "metric" because it is sampled out-of-band by ApplePMGR through CLPC.
148	uint64_t ru_energy_nj;
149	#endif // CONFIG_PERVASIVE_ENERGY
150	} rt_usage;
151	};
152
153	// Memory management routines for tracks and usage structures.
154	struct recount_track *recount_tracks_create(recount_plan_t plan);
155	void recount_tracks_destroy(recount_plan_t plan, struct recount_track *tracks);
156	struct recount_usage *recount_usage_alloc(recount_topo_t topo);
157	void recount_usage_free(recount_topo_t topo, struct recount_usage *usage);
158
159	// Attribute tracks to usage structures, to read their values for typical high-level interfaces.
160
161	// Sum any tracks to a single sum.
162	void recount_sum(recount_plan_t plan, const struct recount_track *tracks,
163	struct recount_usage *sum);
164
165	// Summarize tracks into a total sum and another for a particular CPU kind.
166	void recount_sum_and_isolate_cpu_kind(recount_plan_t plan,
167	struct recount_track *tracks, recount_cpu_kind_t kind,
168	struct recount_usage sum, struct* recount_usage *only_kind);
169	// The same as above, but for usage-only objects, like coalitions.
170	void recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan,
171	struct recount_usage *usage_list, recount_cpu_kind_t kind,
172	struct recount_usage sum, struct* recount_usage *only_kind);
173
174	// Sum the counters for each perf-level, in the order returned by the sysctls.
175	void recount_sum_perf_levels(recount_plan_t plan,
176	struct recount_track tracks, struct* recount_usage *sums);
177
178	#pragma mark - xnu internals
179
180	#if XNU_KERNEL_PRIVATE
181
182	struct thread;
183	struct work_interval;
184	struct task;
185	struct proc;
186
187	// A smaller usage structure if only times are needed by a client.
188	struct recount_times_mach {
189	uint64_t rtm_user;
190	uint64_t rtm_system;
191	};
192
193	struct recount_times_mach recount_usage_times_mach(struct recount_usage *usage);
194	uint64_t recount_usage_system_time_mach(struct recount_usage *usage);
195	uint64_t recount_usage_time_mach(struct recount_usage *usage);
196	uint64_t recount_usage_cycles(struct recount_usage *usage);
197	uint64_t recount_usage_instructions(struct recount_usage *usage);
198
199	// Access another thread's usage data.
200	void recount_thread_usage(struct thread thread, struct* recount_usage *usage);
201	void recount_thread_perf_level_usage(struct thread *thread,
202	struct recount_usage *usage_levels);
203	uint64_t recount_thread_time_mach(struct thread *thread);
204	struct recount_times_mach recount_thread_times(struct thread *thread);
205
206	// Read the current thread's usage data, accumulating counts until now.
207	//
208	// Interrupts must be disabled.
209	void recount_current_thread_usage(struct recount_usage *usage);
210	struct recount_times_mach recount_current_thread_times(void);
211	void recount_current_thread_usage_perf_only(struct recount_usage *usage,
212	struct recount_usage *usage_perf_only);
213	void recount_current_thread_perf_level_usage(struct recount_usage
214	*usage_levels);
215	uint64_t recount_current_thread_time_mach(void);
216	uint64_t recount_current_thread_user_time_mach(void);
217	uint64_t recount_current_thread_interrupt_time_mach(void);
218	uint64_t recount_current_thread_energy_nj(void);
219	void recount_current_task_usage(struct recount_usage *usage);
220	void recount_current_task_usage_perf_only(struct recount_usage *usage,
221	struct recount_usage *usage_perf_only);
222
223	// Access a work interval's usage data.
224	void recount_work_interval_usage(struct work_interval work_interval, struct* recount_usage *usage);
225	struct recount_times_mach recount_work_interval_times(struct work_interval *work_interval);
226	uint64_t recount_work_interval_energy_nj(struct work_interval *work_interval);
227
228	// Access another task's usage data.
229	void recount_task_usage(struct task task, struct* recount_usage *usage);
230	struct recount_times_mach recount_task_times(struct task *task);
231	void recount_task_usage_perf_only(struct task task, struct* recount_usage *sum,
232	struct recount_usage *sum_perf_only);
233	void recount_task_times_perf_only(struct task *task,
234	struct recount_times_mach sum, struct* recount_times_mach *sum_perf_only);
235	uint64_t recount_task_energy_nj(struct task *task);
236	bool recount_task_thread_perf_level_usage(struct task *task, uint64_t tid,
237	struct recount_usage *usage_levels);
238
239	// Get the sum of all terminated threads in the task (not including active threads).
240	void recount_task_terminated_usage(struct task *task,
241	struct recount_usage *sum);
242	struct recount_times_mach recount_task_terminated_times(struct task *task);
243	void recount_task_terminated_usage_perf_only(struct task *task,
244	struct recount_usage sum, struct* recount_usage *perf_only);
245
246	int proc_pidthreadcounts(struct proc *p, uint64_t thuniqueid, user_addr_t uaddr,
247	size_t usize, int *ret);
248
249	#endif // XNU_KERNEL_PRIVATE
250
251	#if MACH_KERNEL_PRIVATE
252
253	#include <kern/smp.h>
254	#include <mach/machine/thread_status.h>
255	#include <machine/machine_routines.h>
256
257	#if __arm64__
258	static_assert((RCT_CPU_EFFICIENCY > RCT_CPU_PERFORMANCE) ==
259	(CLUSTER_TYPE_E > CLUSTER_TYPE_P));
260	#endif // __arm64__
261
262	#pragma mark threads
263
264	// The per-thread resource accounting structure.
265	struct recount_thread {
266	// Resources consumed across the lifetime of the thread, according to
267	// `recount_thread_plan`.
268	struct recount_track *rth_lifetime;
269	// Time spent by this thread running interrupt handlers.
270	uint64_t rth_interrupt_time_mach;
271	#if RECOUNT_THREAD_BASED_LEVEL
272	// The current level this thread is executing in.
273	recount_level_t rth_current_level;
274	#endif // RECOUNT_THREAD_BASED_LEVEL
275	};
276	void recount_thread_init(struct recount_thread *th);
277	void recount_thread_copy(struct recount_thread *dst,
278	struct recount_thread *src);
279	void recount_thread_deinit(struct recount_thread *th);
280
281	#pragma mark work_intervals
282
283	// The per-work-interval resource accounting structure.
284	struct recount_work_interval {
285	// Resources consumed during the currently active work interval instance by
286	// threads participating in the work interval, according to `recount_work_interval_plan`.
287	struct recount_track *rwi_current_instance;
288	};
289	void recount_work_interval_init(struct recount_work_interval *wi);
290	void recount_work_interval_deinit(struct recount_work_interval *wi);
291
292	#pragma mark tasks
293
294	// The per-task resource accounting structure.
295	struct recount_task {
296	// Resources consumed across the lifetime of the task, including active
297	// threads, according to `recount_task_plan`.
298	//
299	// The `recount_task_plan` must be per-CPU to provide mutual exclusion for
300	// writers.
301	struct recount_track *rtk_lifetime;
302	// Usage from threads that have terminated or child tasks that have exited,
303	// according to `recount_task_terminated_plan`.
304	//
305	// Protected by the task lock when threads terminate.
306	struct recount_usage *rtk_terminated;
307	};
308	void recount_task_init(struct recount_task *tk);
309	// Called on tasks that are moving their accounting information to a
310	// synthetic or re-exec-ed task.
311	void recount_task_copy(struct recount_task *dst,
312	const struct recount_task *src);
313	void recount_task_deinit(struct recount_task *tk);
314
315	#pragma mark coalitions
316
317	// The per-coalition resource accounting structure.
318	struct recount_coalition {
319	// Resources consumed by exited tasks only, according to
320	// `recount_coalition_plan`.
321	//
322	// Protected by the coalition lock when tasks exit and roll-up their
323	// statistics.
324	struct recount_usage *rco_exited;
325	};
326	void recount_coalition_init(struct recount_coalition *co);
327	void recount_coalition_deinit(struct recount_coalition *co);
328
329	// Get the sum of all currently-exited tasks in the coalition, and a separate P-only structure.
330	void recount_coalition_usage_perf_only(struct recount_coalition *coal,
331	struct recount_usage sum, struct* recount_usage *sum_perf_only);
332
333	#pragma mark processors
334
335	struct processor;
336
337	// A snap records counter values at a specific point in time.
338	struct recount_snap {
339	uint64_t rsn_time_mach;
340	#if CONFIG_PERVASIVE_CPI
341	uint64_t rsn_insns;
342	uint64_t rsn_cycles;
343	#endif // CONFIG_PERVASIVE_CPI
344	};
345
346	// The per-processor resource accounting structure.
347	struct recount_processor {
348	struct recount_snap rpr_snap;
349	struct recount_track rpr_active;
350	struct recount_snap rpr_interrupt_snap;
351	#if MACH_ASSERT
352	recount_level_t rpr_current_level;
353	#endif // MACH_ASSERT
354	uint64_t rpr_interrupt_time_mach;
355	uint64_t rpr_idle_time_mach;
356	_Atomic uint64_t rpr_state_last_abs_time;
357	#if __AMP__
358	// Cache the RCT_TOPO_CPU_KIND offset, which cannot change.
359	uint8_t rpr_cpu_kind_index;
360	#endif // __AMP__
361	};
362	void recount_processor_init(struct processor *processor);
363
364	// Get a snapshot of the processor's usage, along with an up-to-date snapshot
365	// of its idle time (to now if the processor is currently idle).
366	void recount_processor_usage(struct recount_processor *pr,
367	struct recount_usage usage, uint64_t idle_time_mach);
368
369	// Get the current amount of time spent handling interrupts by the current
370	// processor.
371	uint64_t recount_current_processor_interrupt_time_mach(void);
372
373	#pragma mark updates
374
375	// The following interfaces are meant for specific adopters, like the
376	// scheduler or platform code responsible for entering and exiting the kernel.
377
378	// Fill in a snap with the current values from time- and count-keeping hardware.
379	void recount_snapshot(struct recount_snap *snap);
380
381	// During user/kernel transitions, other serializing events provide enough
382	// serialization around reading the counter values.
383	void recount_snapshot_speculative(struct recount_snap *snap);
384
385	// Called by the scheduler when a context switch occurs.
386	void recount_switch_thread(struct recount_snap snap, struct* thread *off_thread,
387	struct task *off_task);
388	// Called by the machine-dependent code to accumulate energy.
389	void recount_add_energy(struct thread off_thread, struct* task *off_task,
390	uint64_t energy_nj);
391	// Log a kdebug event when a thread switches off-CPU.
392	void recount_log_switch_thread(const struct recount_snap *snap);
393	// Log a kdebug event when a thread switches on-CPU.
394	void recount_log_switch_thread_on(const struct recount_snap *snap);
395
396	// This function requires that no writers race with it -- this is only safe in
397	// debugger context or while running in the context of the track being
398	// inspected.
399	void recount_sum_unsafe(recount_plan_t plan, const struct recount_track *tracks,
400	struct recount_usage *sum);
401
402	// For handling precise user/kernel time updates.
403	void recount_leave_user(void);
404	void recount_enter_user(void);
405	// For handling interrupt time updates.
406	void recount_enter_interrupt(void);
407	void recount_leave_interrupt(void);
408	#if __x86_64__
409	// Handle interrupt time-keeping on Intel, which aren't unified with the trap
410	// handlers, so whether the user or system timers are updated depends on the
411	// save-state.
412	void recount_enter_intel_interrupt(x86_saved_state_t *state);
413	void recount_leave_intel_interrupt(void);
414	#endif // __x86_64__
415
416	#endif // MACH_KERNEL_PRIVATE
417
418	#if XNU_KERNEL_PRIVATE
419
420	#if RECOUNT_SECURE_METRICS
421	// Handle guarded mode updates.
422	void recount_enter_secure(void);
423	void recount_leave_secure(void);
424	#endif // RECOUNT_SECURE_METRICS
425
426	#endif // XNU_KERNEL_PRIVATE
427
428	#if MACH_KERNEL_PRIVATE
429
430	// Hooks for each processor idling, running, and onlining.
431	void recount_processor_idle(struct recount_processor *pr,
432	struct recount_snap *snap);
433	void recount_processor_run(struct recount_processor *pr,
434	struct recount_snap *snap);
435	void recount_processor_online(processor_t processor, struct recount_snap *snap);
436
437	#pragma mark rollups
438
439	// Called by the thread termination queue with the task lock held.
440	void recount_task_rollup_thread(struct recount_task *tk,
441	const struct recount_thread *th);
442
443	// Called by the coalition roll-up statistics functions with coalition lock
444	// held.
445	void recount_coalition_rollup_task(struct recount_coalition *co,
446	struct recount_task *tk);
447
448	#endif // MACH_KERNEL_PRIVATE
449
450	__END_DECLS
451
452	#endif // KERN_RECOUNT_H
453

Browse the source code of xnu/osfmk/kern/recount.h