1// Copyright (c) 2021 Apple Inc. All rights reserved.
2//
3// @APPLE_OSREFERENCE_LICENSE_HEADER_START@
4//
5// This file contains Original Code and/or Modifications of Original Code
6// as defined in and that are subject to the Apple Public Source License
7// Version 2.0 (the 'License'). You may not use this file except in
8// compliance with the License. The rights granted to you under the License
9// may not be used to create, or enable the creation or redistribution of,
10// unlawful or unlicensed copies of an Apple operating system, or to
11// circumvent, violate, or enable the circumvention or violation of, any
12// terms of an Apple operating system software license agreement.
13//
14// Please obtain a copy of the License at
15// http://www.opensource.apple.com/apsl/ and read it before using this file.
16//
17// The Original Code and all software distributed under the License are
18// distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19// EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
20// INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
21// FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
22// Please see the License for the specific language governing rights and
23// limitations under the License.
24//
25// @APPLE_OSREFERENCE_LICENSE_HEADER_END@
26
27#include <kern/assert.h>
28#include <kern/kalloc.h>
29#include <pexpert/pexpert.h>
30#include <sys/kdebug.h>
31#include <sys/_types/_size_t.h>
32#include <kern/monotonic.h>
33#include <kern/percpu.h>
34#include <kern/processor.h>
35#include <kern/recount.h>
36#include <kern/startup.h>
37#include <kern/task.h>
38#include <kern/thread.h>
39#include <kern/work_interval.h>
40#include <mach/mach_time.h>
41#include <mach/mach_types.h>
42#include <machine/config.h>
43#include <machine/machine_routines.h>
44#include <os/atomic_private.h>
45#include <stdbool.h>
46#include <stdint.h>
47
48// Recount's machine-independent implementation and interfaces for the kernel
49// at-large.
50
51#define PRECISE_USER_KERNEL_PMCS PRECISE_USER_KERNEL_TIME
52
53// On non-release kernels, allow precise PMC (instructions, cycles) updates to
54// be disabled for performance characterization.
55#if PRECISE_USER_KERNEL_PMCS && (DEVELOPMENT || DEBUG)
56#define PRECISE_USER_KERNEL_PMC_TUNABLE 1
57
58TUNABLE(bool, no_precise_pmcs, "-no-precise-pmcs", false);
59#endif // PRECISE_USER_KERNEL_PMCS
60
61#if !PRECISE_USER_KERNEL_TIME
62#define PRECISE_TIME_FATAL_FUNC OS_NORETURN
63#define PRECISE_TIME_ONLY_FUNC OS_UNUSED
64#else // !PRECISE_USER_KERNEL_TIME
65#define PRECISE_TIME_FATAL_FUNC
66#define PRECISE_TIME_ONLY_FUNC
67#endif // PRECISE_USER_KERNEL_TIME
68
69#if !PRECISE_USER_KERNEL_PMCS
70#define PRECISE_PMCS_ONLY_FUNC OS_UNUSED
71#else // !PRECISE_PMCS_ONLY_FUNC
72#define PRECISE_PMCS_ONLY_FUNC
73#endif // PRECISE_USER_KERNEL_PMCS
74
75#if HAS_CPU_DPE_COUNTER
76// Only certain platforms have DPE counters.
77#define RECOUNT_ENERGY CONFIG_PERVASIVE_ENERGY
78#else // HAS_CPU_DPE_COUNTER
79#define RECOUNT_ENERGY 0
80#endif // !HAS_CPU_DPE_COUNTER
81
82// Topography helpers.
83size_t recount_topo_count(recount_topo_t topo);
84static bool recount_topo_matches_cpu_kind(recount_topo_t topo,
85 recount_cpu_kind_t kind, size_t idx);
86static size_t recount_topo_index(recount_topo_t topo, processor_t processor);
87static size_t recount_convert_topo_index(recount_topo_t from, recount_topo_t to,
88 size_t i);
89
90// Prevent counter updates before the system is ready.
91__security_const_late bool _recount_started = false;
92
93// Lookup table that matches CPU numbers (indices) to their track index.
94__security_const_late uint8_t _topo_cpu_kinds[MAX_CPUS] = { 0 };
95
96// Allocation metadata and zones.
97
98// Keep static strings for `zone_create`.
99static const char *_usage_zone_names[RCT_TOPO_COUNT] = {
100 [RCT_TOPO_CPU] = "recount_usage_cpu",
101 [RCT_TOPO_CPU_KIND] = "recount_usage_cpu_kind",
102};
103
104static const char *_track_zone_names[RCT_TOPO_COUNT] = {
105 [RCT_TOPO_CPU] = "recount_track_cpu",
106 [RCT_TOPO_CPU_KIND] = "recount_track_cpu_kind",
107};
108
109static const bool _topo_allocates[RCT_TOPO_COUNT] = {
110 [RCT_TOPO_SYSTEM] = false,
111 [RCT_TOPO_CPU] = true,
112 [RCT_TOPO_CPU_KIND] = true,
113};
114
115// Fixed-size zones for allocations.
116__security_const_late zone_t _recount_usage_zones[RCT_TOPO_COUNT] = { };
117__security_const_late zone_t _recount_track_zones[RCT_TOPO_COUNT] = { };
118
119__startup_func
120static void
121recount_startup(void)
122{
123#if __AMP__
124 unsigned int cpu_count = ml_get_cpu_count();
125 const ml_topology_info_t *topo_info = ml_get_topology_info();
126 for (unsigned int i = 0; i < cpu_count; i++) {
127 cluster_type_t type = topo_info->cpus[i].cluster_type;
128 uint8_t cluster_i = (type == CLUSTER_TYPE_P) ? RCT_CPU_PERFORMANCE :
129 RCT_CPU_EFFICIENCY;
130 _topo_cpu_kinds[i] = cluster_i;
131 }
132#endif // __AMP__
133
134 for (unsigned int i = 0; i < RCT_TOPO_COUNT; i++) {
135 if (_topo_allocates[i]) {
136 const char *usage_name = _usage_zone_names[i];
137 assert(usage_name != NULL);
138 _recount_usage_zones[i] = zone_create(name: usage_name,
139 size: sizeof(struct recount_usage) * recount_topo_count(topo: i),
140 flags: 0);
141
142 const char *track_name = _track_zone_names[i];
143 assert(track_name != NULL);
144 _recount_track_zones[i] = zone_create(name: track_name,
145 size: sizeof(struct recount_track) * recount_topo_count(topo: i),
146 flags: 0);
147 }
148 }
149
150 _recount_started = true;
151}
152
153STARTUP(PERCPU, STARTUP_RANK_LAST, recount_startup);
154
155#pragma mark - tracks
156
157RECOUNT_PLAN_DEFINE(recount_thread_plan, RCT_TOPO_CPU_KIND);
158RECOUNT_PLAN_DEFINE(recount_work_interval_plan, RCT_TOPO_CPU);
159RECOUNT_PLAN_DEFINE(recount_task_plan, RCT_TOPO_CPU);
160RECOUNT_PLAN_DEFINE(recount_task_terminated_plan, RCT_TOPO_CPU_KIND);
161RECOUNT_PLAN_DEFINE(recount_coalition_plan, RCT_TOPO_CPU_KIND);
162RECOUNT_PLAN_DEFINE(recount_processor_plan, RCT_TOPO_SYSTEM);
163
164OS_ALWAYS_INLINE
165static inline uint64_t
166recount_timestamp_speculative(void)
167{
168#if __arm__ || __arm64__
169 return ml_get_speculative_timebase();
170#else // __arm__ || __arm64__
171 return mach_absolute_time();
172#endif // !__arm__ && !__arm64__
173}
174
175OS_ALWAYS_INLINE
176void
177recount_snapshot_speculative(struct recount_snap *snap)
178{
179 snap->rsn_time_mach = recount_timestamp_speculative();
180#if CONFIG_PERVASIVE_CPI
181 mt_cur_cpu_cycles_instrs_speculative(&snap->rsn_cycles, &snap->rsn_insns);
182#endif // CONFIG_PERVASIVE_CPI
183}
184
185void
186recount_snapshot(struct recount_snap *snap)
187{
188#if __arm__ || __arm64__
189 __builtin_arm_isb(ISB_SY);
190#endif // __arm__ || __arm64__
191 recount_snapshot_speculative(snap);
192}
193
194static struct recount_snap *
195recount_get_snap(processor_t processor)
196{
197 return &processor->pr_recount.rpr_snap;
198}
199
200static struct recount_snap *
201recount_get_interrupt_snap(processor_t processor)
202{
203 return &processor->pr_recount.rpr_interrupt_snap;
204}
205
206// A simple sequence lock implementation.
207
208static void
209_seqlock_shared_lock_slowpath(const uint32_t *lck, uint32_t gen)
210{
211 disable_preemption();
212 do {
213 gen = hw_wait_while_equals32(address: (uint32_t *)(uintptr_t)lck, current: gen);
214 } while (__improbable((gen & 1) != 0));
215 os_atomic_thread_fence(acquire);
216 enable_preemption();
217}
218
219static uintptr_t
220_seqlock_shared_lock(const uint32_t *lck)
221{
222 uint32_t gen = os_atomic_load(lck, acquire);
223 if (__improbable((gen & 1) != 0)) {
224 _seqlock_shared_lock_slowpath(lck, gen);
225 }
226 return gen;
227}
228
229static bool
230_seqlock_shared_try_unlock(const uint32_t *lck, uintptr_t on_enter)
231{
232 return os_atomic_load(lck, acquire) == on_enter;
233}
234
235static void
236_seqlock_excl_lock_relaxed(uint32_t *lck)
237{
238 __assert_only uintptr_t new = os_atomic_inc(lck, relaxed);
239 assert3u((new & 1), ==, 1);
240}
241
242static void
243_seqlock_excl_commit(void)
244{
245 os_atomic_thread_fence(release);
246}
247
248static void
249_seqlock_excl_unlock_relaxed(uint32_t *lck)
250{
251 __assert_only uint32_t new = os_atomic_inc(lck, relaxed);
252 assert3u((new & 1), ==, 0);
253}
254
255static struct recount_track *
256recount_update_start(struct recount_track *tracks, recount_topo_t topo,
257 processor_t processor)
258{
259 struct recount_track *track = &tracks[recount_topo_index(topo, processor)];
260 _seqlock_excl_lock_relaxed(lck: &track->rt_sync);
261 return track;
262}
263
264#if RECOUNT_ENERGY
265
266static struct recount_track *
267recount_update_single_start(struct recount_track *tracks, recount_topo_t topo,
268 processor_t processor)
269{
270 return &tracks[recount_topo_index(topo, processor)];
271}
272
273#endif // RECOUNT_ENERGY
274
275static void
276recount_update_commit(void)
277{
278 _seqlock_excl_commit();
279}
280
281static void
282recount_update_end(struct recount_track *track)
283{
284 _seqlock_excl_unlock_relaxed(lck: &track->rt_sync);
285}
286
287static const struct recount_usage *
288recount_read_start(const struct recount_track *track, uintptr_t *on_enter)
289{
290 const struct recount_usage *stats = &track->rt_usage;
291 *on_enter = _seqlock_shared_lock(lck: &track->rt_sync);
292 return stats;
293}
294
295static bool
296recount_try_read_end(const struct recount_track *track, uintptr_t on_enter)
297{
298 return _seqlock_shared_try_unlock(lck: &track->rt_sync, on_enter);
299}
300
301static void
302recount_read_track(struct recount_usage *stats,
303 const struct recount_track *track)
304{
305 uintptr_t on_enter = 0;
306 do {
307 const struct recount_usage *vol_stats =
308 recount_read_start(track, on_enter: &on_enter);
309 *stats = *vol_stats;
310 } while (!recount_try_read_end(track, on_enter));
311}
312
313static void
314recount_metrics_add(struct recount_metrics *sum, const struct recount_metrics *to_add)
315{
316 sum->rm_time_mach += to_add->rm_time_mach;
317#if CONFIG_PERVASIVE_CPI
318 sum->rm_instructions += to_add->rm_instructions;
319 sum->rm_cycles += to_add->rm_cycles;
320#endif // CONFIG_PERVASIVE_CPI
321}
322
323static void
324recount_usage_add(struct recount_usage *sum, const struct recount_usage *to_add)
325{
326 for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
327 recount_metrics_add(sum: &sum->ru_metrics[i], to_add: &to_add->ru_metrics[i]);
328 }
329#if CONFIG_PERVASIVE_ENERGY
330 sum->ru_energy_nj += to_add->ru_energy_nj;
331#endif // CONFIG_PERVASIVE_CPI
332}
333
334OS_ALWAYS_INLINE
335static inline void
336recount_usage_add_snap(struct recount_usage *usage, recount_level_t level,
337 struct recount_snap *snap)
338{
339 struct recount_metrics *metrics = &usage->ru_metrics[level];
340
341 metrics->rm_time_mach += snap->rsn_time_mach;
342#if CONFIG_PERVASIVE_CPI
343 metrics->rm_cycles += snap->rsn_cycles;
344 metrics->rm_instructions += snap->rsn_insns;
345#else // CONFIG_PERVASIVE_CPI
346#pragma unused(usage)
347#endif // !CONFIG_PERVASIVE_CPI
348}
349
350static void
351recount_rollup(recount_plan_t plan, const struct recount_track *tracks,
352 recount_topo_t to_topo, struct recount_usage *stats)
353{
354 recount_topo_t from_topo = plan->rpl_topo;
355 size_t topo_count = recount_topo_count(topo: from_topo);
356 struct recount_usage tmp = { 0 };
357 for (size_t i = 0; i < topo_count; i++) {
358 recount_read_track(stats: &tmp, track: &tracks[i]);
359 size_t to_i = recount_convert_topo_index(from: from_topo, to: to_topo, i);
360 recount_usage_add(sum: &stats[to_i], to_add: &tmp);
361 }
362}
363
364// This function must be run when counters cannot increment for the track, like from the current thread.
365static void
366recount_rollup_unsafe(recount_plan_t plan, struct recount_track *tracks,
367 recount_topo_t to_topo, struct recount_usage *stats)
368{
369 recount_topo_t from_topo = plan->rpl_topo;
370 size_t topo_count = recount_topo_count(topo: from_topo);
371 for (size_t i = 0; i < topo_count; i++) {
372 size_t to_i = recount_convert_topo_index(from: from_topo, to: to_topo, i);
373 recount_usage_add(sum: &stats[to_i], to_add: &tracks[i].rt_usage);
374 }
375}
376
377void
378recount_sum(recount_plan_t plan, const struct recount_track *tracks,
379 struct recount_usage *sum)
380{
381 recount_rollup(plan, tracks, to_topo: RCT_TOPO_SYSTEM, stats: sum);
382}
383
384void
385recount_sum_unsafe(recount_plan_t plan, const struct recount_track *tracks,
386 struct recount_usage *sum)
387{
388 recount_topo_t topo = plan->rpl_topo;
389 size_t topo_count = recount_topo_count(topo);
390 for (size_t i = 0; i < topo_count; i++) {
391 recount_usage_add(sum, to_add: &tracks[i].rt_usage);
392 }
393}
394
395void
396recount_sum_and_isolate_cpu_kind(recount_plan_t plan,
397 struct recount_track *tracks, recount_cpu_kind_t kind,
398 struct recount_usage *sum, struct recount_usage *only_kind)
399{
400 size_t topo_count = recount_topo_count(topo: plan->rpl_topo);
401 struct recount_usage tmp = { 0 };
402 for (size_t i = 0; i < topo_count; i++) {
403 recount_read_track(stats: &tmp, track: &tracks[i]);
404 recount_usage_add(sum, to_add: &tmp);
405 if (recount_topo_matches_cpu_kind(topo: plan->rpl_topo, kind, idx: i)) {
406 recount_usage_add(sum: only_kind, to_add: &tmp);
407 }
408 }
409}
410
411static void
412recount_sum_usage(recount_plan_t plan, const struct recount_usage *usages,
413 struct recount_usage *sum)
414{
415 const size_t topo_count = recount_topo_count(topo: plan->rpl_topo);
416 for (size_t i = 0; i < topo_count; i++) {
417 recount_usage_add(sum, to_add: &usages[i]);
418 }
419}
420
421void
422recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan,
423 struct recount_usage *usage, recount_cpu_kind_t kind,
424 struct recount_usage *sum, struct recount_usage *only_kind)
425{
426 const size_t topo_count = recount_topo_count(topo: plan->rpl_topo);
427 for (size_t i = 0; i < topo_count; i++) {
428 recount_usage_add(sum, to_add: &usage[i]);
429 if (only_kind && recount_topo_matches_cpu_kind(topo: plan->rpl_topo, kind, idx: i)) {
430 recount_usage_add(sum: only_kind, to_add: &usage[i]);
431 }
432 }
433}
434
435void
436recount_sum_perf_levels(recount_plan_t plan, struct recount_track *tracks,
437 struct recount_usage *sums)
438{
439 recount_rollup(plan, tracks, to_topo: RCT_TOPO_CPU_KIND, stats: sums);
440}
441
442struct recount_times_mach
443recount_usage_times_mach(struct recount_usage *usage)
444{
445 return (struct recount_times_mach){
446 .rtm_user = usage->ru_metrics[RCT_LVL_USER].rm_time_mach,
447 .rtm_system = recount_usage_system_time_mach(usage),
448 };
449}
450
451uint64_t
452recount_usage_system_time_mach(struct recount_usage *usage)
453{
454 uint64_t system_time = usage->ru_metrics[RCT_LVL_KERNEL].rm_time_mach;
455#if RECOUNT_SECURE_METRICS
456 system_time += usage->ru_metrics[RCT_LVL_SECURE].rm_time_mach;
457#endif // RECOUNT_SECURE_METRICS
458 return system_time;
459}
460
461uint64_t
462recount_usage_time_mach(struct recount_usage *usage)
463{
464 uint64_t time = 0;
465 for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
466 time += usage->ru_metrics[i].rm_time_mach;
467 }
468 return time;
469}
470
471uint64_t
472recount_usage_cycles(struct recount_usage *usage)
473{
474 uint64_t cycles = 0;
475#if CONFIG_CPU_COUNTERS
476 for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
477 cycles += usage->ru_metrics[i].rm_cycles;
478 }
479#else // CONFIG_CPU_COUNTERS
480#pragma unused(usage)
481#endif // !CONFIG_CPU_COUNTERS
482 return cycles;
483}
484
485uint64_t
486recount_usage_instructions(struct recount_usage *usage)
487{
488 uint64_t instructions = 0;
489#if CONFIG_CPU_COUNTERS
490 for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
491 instructions += usage->ru_metrics[i].rm_instructions;
492 }
493#else // CONFIG_CPU_COUNTERS
494#pragma unused(usage)
495#endif // !CONFIG_CPU_COUNTERS
496 return instructions;
497}
498
499// Plan-specific helpers.
500
501void
502recount_coalition_rollup_task(struct recount_coalition *co,
503 struct recount_task *tk)
504{
505 recount_rollup(plan: &recount_task_plan, tracks: tk->rtk_lifetime,
506 to_topo: recount_coalition_plan.rpl_topo, stats: co->rco_exited);
507}
508
509void
510recount_task_rollup_thread(struct recount_task *tk,
511 const struct recount_thread *th)
512{
513 recount_rollup(plan: &recount_thread_plan, tracks: th->rth_lifetime,
514 to_topo: recount_task_terminated_plan.rpl_topo, stats: tk->rtk_terminated);
515}
516
517#pragma mark - scheduler
518
519// `result = lhs - rhs` for snapshots.
520OS_ALWAYS_INLINE
521static void
522recount_snap_diff(struct recount_snap *result,
523 const struct recount_snap *lhs, const struct recount_snap *rhs)
524{
525 assert3u(lhs->rsn_time_mach, >=, rhs->rsn_time_mach);
526 result->rsn_time_mach = lhs->rsn_time_mach - rhs->rsn_time_mach;
527#if CONFIG_PERVASIVE_CPI
528 assert3u(lhs->rsn_insns, >=, rhs->rsn_insns);
529 assert3u(lhs->rsn_cycles, >=, rhs->rsn_cycles);
530 result->rsn_cycles = lhs->rsn_cycles - rhs->rsn_cycles;
531 result->rsn_insns = lhs->rsn_insns - rhs->rsn_insns;
532#endif // CONFIG_PERVASIVE_CPI
533}
534
535static void
536_fix_time_precision(struct recount_usage *usage)
537{
538#if PRECISE_USER_KERNEL_TIME
539#pragma unused(usage)
540#else // PRECISE_USER_KERNEL_TIME
541 // Attribute all time to user, as the system is only acting "on behalf
542 // of" user processes -- a bit sketchy.
543 usage->ru_metrics[RCT_LVL_USER].rm_time_mach +=
544 recount_usage_system_time_mach(usage);
545 usage->ru_metrics[RCT_LVL_KERNEL].rm_time_mach = 0;
546#endif // !PRECISE_USER_KERNEL_TIME
547}
548
549void
550recount_current_thread_usage(struct recount_usage *usage)
551{
552 assert(ml_get_interrupts_enabled() == FALSE);
553 thread_t thread = current_thread();
554 struct recount_snap snap = { 0 };
555 recount_snapshot(snap: &snap);
556 recount_sum_unsafe(plan: &recount_thread_plan, tracks: thread->th_recount.rth_lifetime,
557 sum: usage);
558 struct recount_snap *last = recount_get_snap(processor: current_processor());
559 struct recount_snap diff = { 0 };
560 recount_snap_diff(result: &diff, lhs: &snap, rhs: last);
561 recount_usage_add_snap(usage, level: RCT_LVL_KERNEL, snap: &diff);
562 _fix_time_precision(usage);
563}
564
565void
566recount_current_thread_usage_perf_only(struct recount_usage *usage,
567 struct recount_usage *usage_perf_only)
568{
569 struct recount_usage usage_perf_levels[RCT_CPU_KIND_COUNT] = { 0 };
570 recount_current_thread_perf_level_usage(usage_levels: usage_perf_levels);
571 recount_sum_usage(plan: &recount_thread_plan, usages: usage_perf_levels, sum: usage);
572 *usage_perf_only = usage_perf_levels[RCT_CPU_PERFORMANCE];
573 _fix_time_precision(usage);
574 _fix_time_precision(usage: usage_perf_only);
575}
576
577void
578recount_thread_perf_level_usage(struct thread *thread,
579 struct recount_usage *usage_levels)
580{
581 recount_rollup(plan: &recount_thread_plan, tracks: thread->th_recount.rth_lifetime,
582 to_topo: RCT_TOPO_CPU_KIND, stats: usage_levels);
583 size_t topo_count = recount_topo_count(topo: RCT_TOPO_CPU_KIND);
584 for (size_t i = 0; i < topo_count; i++) {
585 _fix_time_precision(usage: &usage_levels[i]);
586 }
587}
588
589void
590recount_current_thread_perf_level_usage(struct recount_usage *usage_levels)
591{
592 assert(ml_get_interrupts_enabled() == FALSE);
593 processor_t processor = current_processor();
594 thread_t thread = current_thread();
595 struct recount_snap snap = { 0 };
596 recount_snapshot(snap: &snap);
597 recount_rollup_unsafe(plan: &recount_thread_plan, tracks: thread->th_recount.rth_lifetime,
598 to_topo: RCT_TOPO_CPU_KIND, stats: usage_levels);
599 struct recount_snap *last = recount_get_snap(processor);
600 struct recount_snap diff = { 0 };
601 recount_snap_diff(result: &diff, lhs: &snap, rhs: last);
602 size_t cur_i = recount_topo_index(topo: RCT_TOPO_CPU_KIND, processor);
603 struct recount_usage *cur_usage = &usage_levels[cur_i];
604 recount_usage_add_snap(usage: cur_usage, level: RCT_LVL_KERNEL, snap: &diff);
605 size_t topo_count = recount_topo_count(topo: RCT_TOPO_CPU_KIND);
606 for (size_t i = 0; i < topo_count; i++) {
607 _fix_time_precision(usage: &usage_levels[i]);
608 }
609}
610
611uint64_t
612recount_current_thread_energy_nj(void)
613{
614#if RECOUNT_ENERGY
615 assert(ml_get_interrupts_enabled() == FALSE);
616 thread_t thread = current_thread();
617 size_t topo_count = recount_topo_count(recount_thread_plan.rpl_topo);
618 uint64_t energy_nj = 0;
619 for (size_t i = 0; i < topo_count; i++) {
620 energy_nj += thread->th_recount.rth_lifetime[i].rt_usage.ru_energy_nj;
621 }
622 return energy_nj;
623#else // RECOUNT_ENERGY
624 return 0;
625#endif // !RECOUNT_ENERGY
626}
627
628static void
629_times_add_usage(struct recount_times_mach *times, struct recount_usage *usage)
630{
631 times->rtm_user += usage->ru_metrics[RCT_LVL_USER].rm_time_mach;
632#if PRECISE_USER_KERNEL_TIME
633 times->rtm_system += recount_usage_system_time_mach(usage);
634#else // PRECISE_USER_KERNEL_TIME
635 times->rtm_user += recount_usage_system_time_mach(usage);
636#endif // !PRECISE_USER_KERNEL_TIME
637}
638
639struct recount_times_mach
640recount_thread_times(struct thread *thread)
641{
642 size_t topo_count = recount_topo_count(topo: recount_thread_plan.rpl_topo);
643 struct recount_times_mach times = { 0 };
644 for (size_t i = 0; i < topo_count; i++) {
645 _times_add_usage(times: &times, usage: &thread->th_recount.rth_lifetime[i].rt_usage);
646 }
647 return times;
648}
649
650uint64_t
651recount_thread_time_mach(struct thread *thread)
652{
653 struct recount_times_mach times = recount_thread_times(thread);
654 return times.rtm_user + times.rtm_system;
655}
656
657static uint64_t
658_time_since_last_snapshot(void)
659{
660 struct recount_snap *last = recount_get_snap(processor: current_processor());
661 uint64_t cur_time = mach_absolute_time();
662 return cur_time - last->rsn_time_mach;
663}
664
665uint64_t
666recount_current_thread_time_mach(void)
667{
668 assert(ml_get_interrupts_enabled() == FALSE);
669 uint64_t previous_time = recount_thread_time_mach(thread: current_thread());
670 return previous_time + _time_since_last_snapshot();
671}
672
673struct recount_times_mach
674recount_current_thread_times(void)
675{
676 assert(ml_get_interrupts_enabled() == FALSE);
677 struct recount_times_mach times = recount_thread_times(
678 thread: current_thread());
679#if PRECISE_USER_KERNEL_TIME
680 // This code is executing in the kernel, so the time since the last snapshot
681 // (with precise user/kernel time) is since entering the kernel.
682 times.rtm_system += _time_since_last_snapshot();
683#else // PRECISE_USER_KERNEL_TIME
684 times.rtm_user += _time_since_last_snapshot();
685#endif // !PRECISE_USER_KERNEL_TIME
686 return times;
687}
688
689void
690recount_thread_usage(thread_t thread, struct recount_usage *usage)
691{
692 recount_sum(plan: &recount_thread_plan, tracks: thread->th_recount.rth_lifetime, sum: usage);
693 _fix_time_precision(usage);
694}
695
696uint64_t
697recount_current_thread_interrupt_time_mach(void)
698{
699 thread_t thread = current_thread();
700 return thread->th_recount.rth_interrupt_time_mach;
701}
702
703void
704recount_work_interval_usage(struct work_interval *work_interval, struct recount_usage *usage)
705{
706 recount_sum(plan: &recount_work_interval_plan, tracks: work_interval_get_recount_tracks(work_interval), sum: usage);
707 _fix_time_precision(usage);
708}
709
710struct recount_times_mach
711recount_work_interval_times(struct work_interval *work_interval)
712{
713 size_t topo_count = recount_topo_count(topo: recount_work_interval_plan.rpl_topo);
714 struct recount_times_mach times = { 0 };
715 for (size_t i = 0; i < topo_count; i++) {
716 _times_add_usage(times: &times, usage: &work_interval_get_recount_tracks(work_interval)[i].rt_usage);
717 }
718 return times;
719}
720
721uint64_t
722recount_work_interval_energy_nj(struct work_interval *work_interval)
723{
724#if RECOUNT_ENERGY
725 size_t topo_count = recount_topo_count(recount_work_interval_plan.rpl_topo);
726 uint64_t energy = 0;
727 for (size_t i = 0; i < topo_count; i++) {
728 energy += work_interval_get_recount_tracks(work_interval)[i].rt_usage.ru_energy_nj;
729 }
730 return energy;
731#else // RECOUNT_ENERGY
732#pragma unused(work_interval)
733 return 0;
734#endif // !RECOUNT_ENERGY
735}
736
737void
738recount_current_task_usage(struct recount_usage *usage)
739{
740 task_t task = current_task();
741 struct recount_track *tracks = task->tk_recount.rtk_lifetime;
742 recount_sum(plan: &recount_task_plan, tracks, sum: usage);
743 _fix_time_precision(usage);
744}
745
746void
747recount_current_task_usage_perf_only(struct recount_usage *usage,
748 struct recount_usage *usage_perf_only)
749{
750 task_t task = current_task();
751 struct recount_track *tracks = task->tk_recount.rtk_lifetime;
752 recount_sum_and_isolate_cpu_kind(plan: &recount_task_plan,
753 tracks, kind: RCT_CPU_PERFORMANCE, sum: usage, only_kind: usage_perf_only);
754 _fix_time_precision(usage);
755 _fix_time_precision(usage: usage_perf_only);
756}
757
758void
759recount_task_times_perf_only(struct task *task,
760 struct recount_times_mach *sum, struct recount_times_mach *sum_perf_only)
761{
762 const recount_topo_t topo = recount_task_plan.rpl_topo;
763 const size_t topo_count = recount_topo_count(topo);
764 struct recount_track *tracks = task->tk_recount.rtk_lifetime;
765 for (size_t i = 0; i < topo_count; i++) {
766 struct recount_usage *usage = &tracks[i].rt_usage;
767 _times_add_usage(times: sum, usage);
768 if (recount_topo_matches_cpu_kind(topo, kind: RCT_CPU_PERFORMANCE, idx: i)) {
769 _times_add_usage(times: sum_perf_only, usage);
770 }
771 }
772}
773
774void
775recount_task_terminated_usage(task_t task, struct recount_usage *usage)
776{
777 recount_sum_usage(plan: &recount_task_terminated_plan,
778 usages: task->tk_recount.rtk_terminated, sum: usage);
779 _fix_time_precision(usage);
780}
781
782struct recount_times_mach
783recount_task_terminated_times(struct task *task)
784{
785 size_t topo_count = recount_topo_count(topo: recount_task_terminated_plan.rpl_topo);
786 struct recount_times_mach times = { 0 };
787 for (size_t i = 0; i < topo_count; i++) {
788 _times_add_usage(times: &times, usage: &task->tk_recount.rtk_terminated[i]);
789 }
790 return times;
791}
792
793void
794recount_task_terminated_usage_perf_only(task_t task,
795 struct recount_usage *usage, struct recount_usage *perf_only)
796{
797 recount_sum_usage_and_isolate_cpu_kind(plan: &recount_task_terminated_plan,
798 usage: task->tk_recount.rtk_terminated, kind: RCT_CPU_PERFORMANCE, sum: usage, only_kind: perf_only);
799 _fix_time_precision(usage);
800 _fix_time_precision(usage: perf_only);
801}
802
803void
804recount_task_usage_perf_only(task_t task, struct recount_usage *sum,
805 struct recount_usage *sum_perf_only)
806{
807 recount_sum_and_isolate_cpu_kind(plan: &recount_task_plan,
808 tracks: task->tk_recount.rtk_lifetime, kind: RCT_CPU_PERFORMANCE, sum, only_kind: sum_perf_only);
809 _fix_time_precision(usage: sum);
810 _fix_time_precision(usage: sum_perf_only);
811}
812
813void
814recount_task_usage(task_t task, struct recount_usage *usage)
815{
816 recount_sum(plan: &recount_task_plan, tracks: task->tk_recount.rtk_lifetime, sum: usage);
817 _fix_time_precision(usage);
818}
819
820struct recount_times_mach
821recount_task_times(struct task *task)
822{
823 size_t topo_count = recount_topo_count(topo: recount_task_plan.rpl_topo);
824 struct recount_times_mach times = { 0 };
825 for (size_t i = 0; i < topo_count; i++) {
826 _times_add_usage(times: &times, usage: &task->tk_recount.rtk_lifetime[i].rt_usage);
827 }
828 return times;
829}
830
831uint64_t
832recount_task_energy_nj(struct task *task)
833{
834#if RECOUNT_ENERGY
835 size_t topo_count = recount_topo_count(recount_task_plan.rpl_topo);
836 uint64_t energy = 0;
837 for (size_t i = 0; i < topo_count; i++) {
838 energy += task->tk_recount.rtk_lifetime[i].rt_usage.ru_energy_nj;
839 }
840 return energy;
841#else // RECOUNT_ENERGY
842#pragma unused(task)
843 return 0;
844#endif // !RECOUNT_ENERGY
845}
846
847void
848recount_coalition_usage_perf_only(struct recount_coalition *coal,
849 struct recount_usage *sum, struct recount_usage *sum_perf_only)
850{
851 recount_sum_usage_and_isolate_cpu_kind(plan: &recount_coalition_plan,
852 usage: coal->rco_exited, kind: RCT_CPU_PERFORMANCE, sum, only_kind: sum_perf_only);
853 _fix_time_precision(usage: sum);
854 _fix_time_precision(usage: sum_perf_only);
855}
856
857OS_ALWAYS_INLINE
858static void
859recount_absorb_snap(struct recount_snap *to_add, thread_t thread, task_t task,
860 processor_t processor, recount_level_t level)
861{
862 // Idle threads do not attribute their usage back to the task or processor,
863 // as the time is not spent "running."
864 //
865 // The processor-level metrics include idle time, instead, as the idle time
866 // needs to be read as up-to-date from `recount_processor_usage`.
867
868 const bool was_idle = (thread->options & TH_OPT_IDLE_THREAD) != 0;
869
870 struct recount_track *wi_tracks_array = NULL;
871 if (!was_idle) {
872 wi_tracks_array = work_interval_get_recount_tracks(
873 work_interval: thread->th_work_interval);
874 }
875 bool absorb_work_interval = wi_tracks_array != NULL;
876
877 struct recount_track *th_track = recount_update_start(
878 tracks: thread->th_recount.rth_lifetime, topo: recount_thread_plan.rpl_topo,
879 processor);
880 struct recount_track *wi_track = NULL;
881 if (absorb_work_interval) {
882 wi_track = recount_update_start(tracks: wi_tracks_array,
883 topo: recount_work_interval_plan.rpl_topo, processor);
884 }
885 struct recount_track *tk_track = was_idle ? NULL : recount_update_start(
886 tracks: task->tk_recount.rtk_lifetime, topo: recount_task_plan.rpl_topo, processor);
887 struct recount_track *pr_track = was_idle ? NULL : recount_update_start(
888 tracks: &processor->pr_recount.rpr_active, topo: recount_processor_plan.rpl_topo,
889 processor);
890 recount_update_commit();
891
892 recount_usage_add_snap(usage: &th_track->rt_usage, level, snap: to_add);
893 if (!was_idle) {
894 if (absorb_work_interval) {
895 recount_usage_add_snap(usage: &wi_track->rt_usage, level, snap: to_add);
896 }
897 recount_usage_add_snap(usage: &tk_track->rt_usage, level, snap: to_add);
898 recount_usage_add_snap(usage: &pr_track->rt_usage, level, snap: to_add);
899 }
900
901 recount_update_commit();
902 recount_update_end(track: th_track);
903 if (!was_idle) {
904 if (absorb_work_interval) {
905 recount_update_end(track: wi_track);
906 }
907 recount_update_end(track: tk_track);
908 recount_update_end(track: pr_track);
909 }
910}
911
912void
913recount_switch_thread(struct recount_snap *cur, struct thread *off_thread,
914 struct task *off_task)
915{
916 assert(ml_get_interrupts_enabled() == FALSE);
917
918 if (__improbable(!_recount_started)) {
919 return;
920 }
921
922 processor_t processor = current_processor();
923
924 struct recount_snap *last = recount_get_snap(processor);
925 struct recount_snap diff = { 0 };
926 recount_snap_diff(result: &diff, lhs: cur, rhs: last);
927 recount_absorb_snap(to_add: &diff, thread: off_thread, task: off_task, processor,
928#if RECOUNT_THREAD_BASED_LEVEL
929 level: off_thread->th_recount.rth_current_level
930#else // RECOUNT_THREAD_BASED_LEVEL
931 RCT_LVL_KERNEL
932#endif // !RECOUNT_THREAD_BASED_LEVEL
933 );
934 memcpy(dst: last, src: cur, n: sizeof(*last));
935}
936
937void
938recount_add_energy(struct thread *off_thread, struct task *off_task,
939 uint64_t energy_nj)
940{
941#if RECOUNT_ENERGY
942 assert(ml_get_interrupts_enabled() == FALSE);
943 if (__improbable(!_recount_started)) {
944 return;
945 }
946
947 bool was_idle = (off_thread->options & TH_OPT_IDLE_THREAD) != 0;
948 struct recount_track *wi_tracks_array = work_interval_get_recount_tracks(off_thread->th_work_interval);
949 bool collect_work_interval_telemetry = wi_tracks_array != NULL;
950 processor_t processor = current_processor();
951
952 struct recount_track *th_track = recount_update_single_start(
953 off_thread->th_recount.rth_lifetime, recount_thread_plan.rpl_topo,
954 processor);
955 struct recount_track *wi_track = (was_idle || !collect_work_interval_telemetry) ? NULL :
956 recount_update_single_start(wi_tracks_array,
957 recount_work_interval_plan.rpl_topo, processor);
958 struct recount_track *tk_track = was_idle ? NULL :
959 recount_update_single_start(off_task->tk_recount.rtk_lifetime,
960 recount_task_plan.rpl_topo, processor);
961 struct recount_track *pr_track = was_idle ? NULL :
962 recount_update_single_start(&processor->pr_recount.rpr_active,
963 recount_processor_plan.rpl_topo, processor);
964
965 th_track->rt_usage.ru_energy_nj += energy_nj;
966 if (!was_idle) {
967 if (collect_work_interval_telemetry) {
968 wi_track->rt_usage.ru_energy_nj += energy_nj;
969 }
970 tk_track->rt_usage.ru_energy_nj += energy_nj;
971 pr_track->rt_usage.ru_energy_nj += energy_nj;
972 }
973#else // RECOUNT_ENERGY
974#pragma unused(off_thread, off_task, energy_nj)
975#endif // !RECOUNT_ENERGY
976}
977
978#define MT_KDBG_IC_CPU_CSWITCH \
979 KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES, 1)
980
981#define MT_KDBG_IC_CPU_CSWITCH_ON \
982 KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES_ON_CPU, 1)
983
984void
985recount_log_switch_thread(const struct recount_snap *snap)
986{
987#if CONFIG_PERVASIVE_CPI
988 if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH)) {
989 // In Monotonic's event hierarchy for backwards-compatibility.
990 KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH, snap->rsn_insns, snap->rsn_cycles);
991 }
992#else // CONFIG_PERVASIVE_CPI
993#pragma unused(snap)
994#endif // CONFIG_PERVASIVE_CPI
995}
996
997void
998recount_log_switch_thread_on(const struct recount_snap *snap)
999{
1000#if CONFIG_PERVASIVE_CPI
1001 if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH_ON)) {
1002 if (!snap) {
1003 snap = recount_get_snap(current_processor());
1004 }
1005 // In Monotonic's event hierarchy for backwards-compatibility.
1006 KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH_ON, snap->rsn_insns, snap->rsn_cycles);
1007 }
1008#else // CONFIG_PERVASIVE_CPI
1009#pragma unused(snap)
1010#endif // CONFIG_PERVASIVE_CPI
1011}
1012
1013OS_ALWAYS_INLINE
1014PRECISE_TIME_ONLY_FUNC
1015static void
1016recount_precise_transition_diff(struct recount_snap *diff,
1017 struct recount_snap *last, struct recount_snap *cur)
1018{
1019#if PRECISE_USER_KERNEL_PMCS
1020#if PRECISE_USER_KERNEL_PMC_TUNABLE
1021 // The full `recount_snapshot_speculative` shouldn't get PMCs with a tunable
1022 // in this configuration.
1023 if (__improbable(no_precise_pmcs)) {
1024 cur->rsn_time_mach = recount_timestamp_speculative();
1025 diff->rsn_time_mach = cur->rsn_time_mach - last->rsn_time_mach;
1026 } else
1027#endif // PRECISE_USER_KERNEL_PMC_TUNABLE
1028 {
1029 recount_snapshot_speculative(snap: cur);
1030 recount_snap_diff(result: diff, lhs: cur, rhs: last);
1031 }
1032#else // PRECISE_USER_KERNEL_PMCS
1033 cur->rsn_time_mach = recount_timestamp_speculative();
1034 diff->rsn_time_mach = cur->rsn_time_mach - last->rsn_time_mach;
1035#endif // !PRECISE_USER_KERNEL_PMCS
1036}
1037
1038#if MACH_ASSERT && RECOUNT_THREAD_BASED_LEVEL
1039
1040PRECISE_TIME_ONLY_FUNC
1041static void
1042recount_assert_level(thread_t thread, recount_level_t old)
1043{
1044 assert3u(thread->th_recount.rth_current_level, ==, old);
1045}
1046
1047#else // MACH_ASSERT && RECOUNT_THREAD_BASED_LEVEL
1048
1049PRECISE_TIME_ONLY_FUNC
1050static void
1051recount_assert_level(thread_t __unused thread,
1052 recount_level_t __unused old)
1053{
1054}
1055
1056#endif // !(MACH_ASSERT && RECOUNT_THREAD_BASED_LEVEL)
1057
1058/// Called when entering or exiting the kernel to maintain system vs. user counts, extremely performance sensitive.
1059///
1060/// Must be called with interrupts disabled.
1061///
1062/// - Parameter from: What level is being switched from.
1063/// - Parameter to: What level is being switched to.
1064///
1065/// - Returns: The value of Mach time that was sampled inside this function.
1066PRECISE_TIME_FATAL_FUNC
1067static uint64_t
1068recount_transition(recount_level_t from, recount_level_t to)
1069{
1070#if PRECISE_USER_KERNEL_TIME
1071 // Omit interrupts-disabled assertion for performance reasons.
1072 processor_t processor = current_processor();
1073 thread_t thread = processor->active_thread;
1074 if (thread) {
1075 task_t task = get_thread_ro_unchecked(thread)->tro_task;
1076
1077 recount_assert_level(thread, old: from);
1078#if RECOUNT_THREAD_BASED_LEVEL
1079 thread->th_recount.rth_current_level = to;
1080#else // RECOUNT_THREAD_BASED_LEVEL
1081#pragma unused(to)
1082#endif // !RECOUNT_THREAD_BASED_LEVEL
1083 struct recount_snap *last = recount_get_snap(processor);
1084 struct recount_snap diff = { 0 };
1085 struct recount_snap cur = { 0 };
1086 recount_precise_transition_diff(diff: &diff, last, cur: &cur);
1087 recount_absorb_snap(to_add: &diff, thread, task, processor, level: from);
1088 memcpy(dst: last, src: &cur, n: sizeof(*last));
1089
1090 return cur.rsn_time_mach;
1091 } else {
1092 return 0;
1093 }
1094#else // PRECISE_USER_KERNEL_TIME
1095#pragma unused(from, to)
1096 panic("recount: kernel transition called with precise time off");
1097#endif // !PRECISE_USER_KERNEL_TIME
1098}
1099
1100PRECISE_TIME_FATAL_FUNC
1101void
1102recount_leave_user(void)
1103{
1104 recount_transition(from: RCT_LVL_USER, to: RCT_LVL_KERNEL);
1105}
1106
1107PRECISE_TIME_FATAL_FUNC
1108void
1109recount_enter_user(void)
1110{
1111 recount_transition(from: RCT_LVL_KERNEL, to: RCT_LVL_USER);
1112}
1113
1114void
1115recount_enter_interrupt(void)
1116{
1117 processor_t processor = current_processor();
1118 struct recount_snap *last = recount_get_interrupt_snap(processor);
1119 recount_snapshot_speculative(snap: last);
1120}
1121
1122void
1123recount_leave_interrupt(void)
1124{
1125 processor_t processor = current_processor();
1126 thread_t thread = processor->active_thread;
1127 struct recount_snap *last = recount_get_snap(processor);
1128 uint64_t last_time = last->rsn_time_mach;
1129 recount_snapshot_speculative(snap: last);
1130 processor->pr_recount.rpr_interrupt_time_mach +=
1131 last->rsn_time_mach - last_time;
1132 thread->th_recount.rth_interrupt_time_mach +=
1133 last->rsn_time_mach - last_time;
1134}
1135
1136#if __x86_64__
1137
1138void
1139recount_enter_intel_interrupt(x86_saved_state_t *state)
1140{
1141 // The low bits of `%cs` being set indicate interrupt was delivered while
1142 // executing in user space.
1143 bool from_user = (is_saved_state64(state) ? state->ss_64.isf.cs :
1144 state->ss_32.cs) & 0x03;
1145 uint64_t timestamp = recount_transition(
1146 from_user ? RCT_LVL_USER : RCT_LVL_KERNEL, RCT_LVL_KERNEL);
1147 current_cpu_datap()->cpu_int_event_time = timestamp;
1148}
1149
1150void
1151recount_leave_intel_interrupt(void)
1152{
1153 recount_transition(RCT_LVL_KERNEL, RCT_LVL_KERNEL);
1154 current_cpu_datap()->cpu_int_event_time = 0;
1155}
1156
1157#endif // __x86_64__
1158
1159#if RECOUNT_SECURE_METRICS
1160
1161PRECISE_TIME_FATAL_FUNC
1162void
1163recount_leave_secure(void)
1164{
1165 boolean_t intrs_en = ml_set_interrupts_enabled(FALSE);
1166 recount_transition(RCT_LVL_SECURE, RCT_LVL_KERNEL);
1167 ml_set_interrupts_enabled(intrs_en);
1168}
1169
1170PRECISE_TIME_FATAL_FUNC
1171void
1172recount_enter_secure(void)
1173{
1174 boolean_t intrs_en = ml_set_interrupts_enabled(FALSE);
1175 recount_transition(RCT_LVL_KERNEL, RCT_LVL_SECURE);
1176 ml_set_interrupts_enabled(intrs_en);
1177}
1178
1179#endif // RECOUNT_SECURE_METRICS
1180
1181// Set on rpr_state_last_abs_time when the processor is idle.
1182#define RCT_PR_IDLING (0x1ULL << 63)
1183
1184void
1185recount_processor_idle(struct recount_processor *pr, struct recount_snap *snap)
1186{
1187 __assert_only uint64_t state_time = os_atomic_load_wide(
1188 &pr->rpr_state_last_abs_time, relaxed);
1189 assert((state_time & RCT_PR_IDLING) == 0);
1190 assert((snap->rsn_time_mach & RCT_PR_IDLING) == 0);
1191 uint64_t new_state_stamp = RCT_PR_IDLING | snap->rsn_time_mach;
1192 os_atomic_store_wide(&pr->rpr_state_last_abs_time, new_state_stamp,
1193 relaxed);
1194}
1195
1196OS_PURE OS_ALWAYS_INLINE
1197static inline uint64_t
1198_state_time(uint64_t state_stamp)
1199{
1200 return state_stamp & ~(RCT_PR_IDLING);
1201}
1202
1203void
1204recount_processor_init(processor_t processor)
1205{
1206#if __AMP__
1207 processor->pr_recount.rpr_cpu_kind_index =
1208 processor->processor_set->pset_cluster_type == PSET_AMP_P ?
1209 RCT_CPU_PERFORMANCE : RCT_CPU_EFFICIENCY;
1210#else // __AMP__
1211#pragma unused(processor)
1212#endif // !__AMP__
1213}
1214
1215void
1216recount_processor_run(struct recount_processor *pr, struct recount_snap *snap)
1217{
1218 uint64_t state = os_atomic_load_wide(&pr->rpr_state_last_abs_time, relaxed);
1219 assert(state == 0 || (state & RCT_PR_IDLING) == RCT_PR_IDLING);
1220 assert((snap->rsn_time_mach & RCT_PR_IDLING) == 0);
1221 uint64_t new_state_stamp = snap->rsn_time_mach;
1222 pr->rpr_idle_time_mach += snap->rsn_time_mach - _state_time(state_stamp: state);
1223 os_atomic_store_wide(&pr->rpr_state_last_abs_time, new_state_stamp,
1224 relaxed);
1225}
1226
1227void
1228recount_processor_online(processor_t processor, struct recount_snap *cur)
1229{
1230 recount_processor_run(pr: &processor->pr_recount, snap: cur);
1231 struct recount_snap *pr_snap = recount_get_snap(processor);
1232 memcpy(dst: pr_snap, src: cur, n: sizeof(*pr_snap));
1233}
1234
1235void
1236recount_processor_usage(struct recount_processor *pr,
1237 struct recount_usage *usage, uint64_t *idle_time_out)
1238{
1239 recount_sum(plan: &recount_processor_plan, tracks: &pr->rpr_active, sum: usage);
1240 _fix_time_precision(usage);
1241
1242 uint64_t idle_time = pr->rpr_idle_time_mach;
1243 uint64_t idle_stamp = os_atomic_load_wide(&pr->rpr_state_last_abs_time,
1244 relaxed);
1245 bool idle = (idle_stamp & RCT_PR_IDLING) == RCT_PR_IDLING;
1246 if (idle) {
1247 // Since processors can idle for some time without an update, make sure
1248 // the idle time is up-to-date with respect to the caller.
1249 idle_time += mach_absolute_time() - _state_time(state_stamp: idle_stamp);
1250 }
1251 *idle_time_out = idle_time;
1252}
1253
1254uint64_t
1255recount_current_processor_interrupt_time_mach(void)
1256{
1257 assert(!preemption_enabled());
1258 return current_processor()->pr_recount.rpr_interrupt_time_mach;
1259}
1260
1261bool
1262recount_task_thread_perf_level_usage(struct task *task, uint64_t tid,
1263 struct recount_usage *usage_levels)
1264{
1265 thread_t thread = task_findtid(task, tid);
1266 if (thread != THREAD_NULL) {
1267 if (thread == current_thread()) {
1268 boolean_t interrupt_state = ml_set_interrupts_enabled(FALSE);
1269 recount_current_thread_perf_level_usage(usage_levels);
1270 ml_set_interrupts_enabled(enable: interrupt_state);
1271 } else {
1272 recount_thread_perf_level_usage(thread, usage_levels);
1273 }
1274 }
1275 return thread != THREAD_NULL;
1276}
1277
1278#pragma mark - utilities
1279
1280// For rolling up counts, convert an index from one topography to another.
1281static size_t
1282recount_convert_topo_index(recount_topo_t from, recount_topo_t to, size_t i)
1283{
1284 if (from == to) {
1285 return i;
1286 } else if (to == RCT_TOPO_SYSTEM) {
1287 return 0;
1288 } else if (from == RCT_TOPO_CPU) {
1289 assertf(to == RCT_TOPO_CPU_KIND,
1290 "recount: cannot convert from CPU topography to %d", to);
1291 return _topo_cpu_kinds[i];
1292 } else {
1293 panic("recount: unexpected rollup request from %d to %d", from, to);
1294 }
1295}
1296
1297// Get the track index of the provided processor and topography.
1298OS_ALWAYS_INLINE
1299static size_t
1300recount_topo_index(recount_topo_t topo, processor_t processor)
1301{
1302 switch (topo) {
1303 case RCT_TOPO_SYSTEM:
1304 return 0;
1305 case RCT_TOPO_CPU:
1306 return processor->cpu_id;
1307 case RCT_TOPO_CPU_KIND:
1308#if __AMP__
1309 return processor->pr_recount.rpr_cpu_kind_index;
1310#else // __AMP__
1311 return 0;
1312#endif // !__AMP__
1313 default:
1314 panic("recount: invalid topology %u to index", topo);
1315 }
1316}
1317
1318// Return the number of tracks needed for a given topography.
1319size_t
1320recount_topo_count(recount_topo_t topo)
1321{
1322 // Allow the compiler to reason about at least the system and CPU kind
1323 // counts.
1324 switch (topo) {
1325 case RCT_TOPO_SYSTEM:
1326 return 1;
1327
1328 case RCT_TOPO_CPU_KIND:
1329#if __AMP__
1330 return 2;
1331#else // __AMP__
1332 return 1;
1333#endif // !__AMP__
1334
1335 case RCT_TOPO_CPU:
1336#if __arm__ || __arm64__
1337 return ml_get_cpu_count();
1338#else // __arm__ || __arm64__
1339 return ml_early_cpu_max_number() + 1;
1340#endif // !__arm__ && !__arm64__
1341
1342 default:
1343 panic("recount: invalid topography %d", topo);
1344 }
1345}
1346
1347static bool
1348recount_topo_matches_cpu_kind(recount_topo_t topo, recount_cpu_kind_t kind,
1349 size_t idx)
1350{
1351#if !__AMP__
1352#pragma unused(kind, idx)
1353#endif // !__AMP__
1354 switch (topo) {
1355 case RCT_TOPO_SYSTEM:
1356 return true;
1357
1358 case RCT_TOPO_CPU_KIND:
1359#if __AMP__
1360 return kind == idx;
1361#else // __AMP__
1362 return false;
1363#endif // !__AMP__
1364
1365 case RCT_TOPO_CPU: {
1366#if __AMP__
1367 return _topo_cpu_kinds[idx] == kind;
1368#else // __AMP__
1369 return false;
1370#endif // !__AMP__
1371 }
1372
1373 default:
1374 panic("recount: unexpected topography %d", topo);
1375 }
1376}
1377
1378struct recount_track *
1379recount_tracks_create(recount_plan_t plan)
1380{
1381 assert(_topo_allocates[plan->rpl_topo]);
1382 return zalloc_flags(_recount_track_zones[plan->rpl_topo],
1383 Z_VM_TAG(Z_WAITOK | Z_ZERO | Z_NOFAIL, VM_KERN_MEMORY_RECOUNT));
1384}
1385
1386static void
1387recount_tracks_copy(recount_plan_t plan, struct recount_track *dst,
1388 struct recount_track *src)
1389{
1390 size_t topo_count = recount_topo_count(topo: plan->rpl_topo);
1391 for (size_t i = 0; i < topo_count; i++) {
1392 recount_read_track(stats: &dst[i].rt_usage, track: &src[i]);
1393 }
1394}
1395
1396void
1397recount_tracks_destroy(recount_plan_t plan, struct recount_track *tracks)
1398{
1399 assert(_topo_allocates[plan->rpl_topo]);
1400 zfree(_recount_track_zones[plan->rpl_topo], tracks);
1401}
1402
1403void
1404recount_thread_init(struct recount_thread *th)
1405{
1406 th->rth_lifetime = recount_tracks_create(plan: &recount_thread_plan);
1407}
1408
1409void
1410recount_thread_copy(struct recount_thread *dst, struct recount_thread *src)
1411{
1412 recount_tracks_copy(plan: &recount_thread_plan, dst: dst->rth_lifetime,
1413 src: src->rth_lifetime);
1414}
1415
1416void
1417recount_task_copy(struct recount_task *dst, const struct recount_task *src)
1418{
1419 recount_tracks_copy(plan: &recount_task_plan, dst: dst->rtk_lifetime,
1420 src: src->rtk_lifetime);
1421}
1422
1423void
1424recount_thread_deinit(struct recount_thread *th)
1425{
1426 recount_tracks_destroy(plan: &recount_thread_plan, tracks: th->rth_lifetime);
1427}
1428
1429void
1430recount_task_init(struct recount_task *tk)
1431{
1432 tk->rtk_lifetime = recount_tracks_create(plan: &recount_task_plan);
1433 tk->rtk_terminated = recount_usage_alloc(
1434 topo: recount_task_terminated_plan.rpl_topo);
1435}
1436
1437void
1438recount_task_deinit(struct recount_task *tk)
1439{
1440 recount_tracks_destroy(plan: &recount_task_plan, tracks: tk->rtk_lifetime);
1441 recount_usage_free(topo: recount_task_terminated_plan.rpl_topo,
1442 usage: tk->rtk_terminated);
1443}
1444
1445void
1446recount_coalition_init(struct recount_coalition *co)
1447{
1448 co->rco_exited = recount_usage_alloc(topo: recount_coalition_plan.rpl_topo);
1449}
1450
1451void
1452recount_coalition_deinit(struct recount_coalition *co)
1453{
1454 recount_usage_free(topo: recount_coalition_plan.rpl_topo, usage: co->rco_exited);
1455}
1456
1457void
1458recount_work_interval_init(struct recount_work_interval *wi)
1459{
1460 wi->rwi_current_instance = recount_tracks_create(plan: &recount_work_interval_plan);
1461}
1462
1463void
1464recount_work_interval_deinit(struct recount_work_interval *wi)
1465{
1466 recount_tracks_destroy(plan: &recount_work_interval_plan, tracks: wi->rwi_current_instance);
1467}
1468
1469struct recount_usage *
1470recount_usage_alloc(recount_topo_t topo)
1471{
1472 assert(_topo_allocates[topo]);
1473 return zalloc_flags(_recount_usage_zones[topo],
1474 Z_VM_TAG(Z_WAITOK | Z_ZERO | Z_NOFAIL, VM_KERN_MEMORY_RECOUNT));
1475}
1476
1477void
1478recount_usage_free(recount_topo_t topo, struct recount_usage *usage)
1479{
1480 assert(_topo_allocates[topo]);
1481 zfree(_recount_usage_zones[topo], usage);
1482}
1483