1 | // Copyright (c) 2021 Apple Inc. All rights reserved. |
2 | // |
3 | // @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
4 | // |
5 | // This file contains Original Code and/or Modifications of Original Code |
6 | // as defined in and that are subject to the Apple Public Source License |
7 | // Version 2.0 (the 'License'). You may not use this file except in |
8 | // compliance with the License. The rights granted to you under the License |
9 | // may not be used to create, or enable the creation or redistribution of, |
10 | // unlawful or unlicensed copies of an Apple operating system, or to |
11 | // circumvent, violate, or enable the circumvention or violation of, any |
12 | // terms of an Apple operating system software license agreement. |
13 | // |
14 | // Please obtain a copy of the License at |
15 | // http://www.opensource.apple.com/apsl/ and read it before using this file. |
16 | // |
17 | // The Original Code and all software distributed under the License are |
18 | // distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
19 | // EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
20 | // INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
21 | // FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
22 | // Please see the License for the specific language governing rights and |
23 | // limitations under the License. |
24 | // |
25 | // @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
26 | |
27 | #include <kern/assert.h> |
28 | #include <kern/kalloc.h> |
29 | #include <pexpert/pexpert.h> |
30 | #include <sys/kdebug.h> |
31 | #include <sys/_types/_size_t.h> |
32 | #include <kern/monotonic.h> |
33 | #include <kern/percpu.h> |
34 | #include <kern/processor.h> |
35 | #include <kern/recount.h> |
36 | #include <kern/startup.h> |
37 | #include <kern/task.h> |
38 | #include <kern/thread.h> |
39 | #include <kern/work_interval.h> |
40 | #include <mach/mach_time.h> |
41 | #include <mach/mach_types.h> |
42 | #include <machine/config.h> |
43 | #include <machine/machine_routines.h> |
44 | #include <os/atomic_private.h> |
45 | #include <stdbool.h> |
46 | #include <stdint.h> |
47 | |
48 | // Recount's machine-independent implementation and interfaces for the kernel |
49 | // at-large. |
50 | |
51 | #define PRECISE_USER_KERNEL_PMCS PRECISE_USER_KERNEL_TIME |
52 | |
53 | // On non-release kernels, allow precise PMC (instructions, cycles) updates to |
54 | // be disabled for performance characterization. |
55 | #if PRECISE_USER_KERNEL_PMCS && (DEVELOPMENT || DEBUG) |
56 | #define PRECISE_USER_KERNEL_PMC_TUNABLE 1 |
57 | |
58 | TUNABLE(bool, no_precise_pmcs, "-no-precise-pmcs" , false); |
59 | #endif // PRECISE_USER_KERNEL_PMCS |
60 | |
61 | #if !PRECISE_USER_KERNEL_TIME |
62 | #define PRECISE_TIME_FATAL_FUNC OS_NORETURN |
63 | #define PRECISE_TIME_ONLY_FUNC OS_UNUSED |
64 | #else // !PRECISE_USER_KERNEL_TIME |
65 | #define PRECISE_TIME_FATAL_FUNC |
66 | #define PRECISE_TIME_ONLY_FUNC |
67 | #endif // PRECISE_USER_KERNEL_TIME |
68 | |
69 | #if !PRECISE_USER_KERNEL_PMCS |
70 | #define PRECISE_PMCS_ONLY_FUNC OS_UNUSED |
71 | #else // !PRECISE_PMCS_ONLY_FUNC |
72 | #define PRECISE_PMCS_ONLY_FUNC |
73 | #endif // PRECISE_USER_KERNEL_PMCS |
74 | |
75 | #if HAS_CPU_DPE_COUNTER |
76 | // Only certain platforms have DPE counters. |
77 | #define RECOUNT_ENERGY CONFIG_PERVASIVE_ENERGY |
78 | #else // HAS_CPU_DPE_COUNTER |
79 | #define RECOUNT_ENERGY 0 |
80 | #endif // !HAS_CPU_DPE_COUNTER |
81 | |
82 | // Topography helpers. |
83 | size_t recount_topo_count(recount_topo_t topo); |
84 | static bool recount_topo_matches_cpu_kind(recount_topo_t topo, |
85 | recount_cpu_kind_t kind, size_t idx); |
86 | static size_t recount_topo_index(recount_topo_t topo, processor_t processor); |
87 | static size_t recount_convert_topo_index(recount_topo_t from, recount_topo_t to, |
88 | size_t i); |
89 | |
90 | // Prevent counter updates before the system is ready. |
91 | __security_const_late bool _recount_started = false; |
92 | |
93 | // Lookup table that matches CPU numbers (indices) to their track index. |
94 | __security_const_late uint8_t _topo_cpu_kinds[MAX_CPUS] = { 0 }; |
95 | |
96 | // Allocation metadata and zones. |
97 | |
98 | // Keep static strings for `zone_create`. |
99 | static const char *_usage_zone_names[RCT_TOPO_COUNT] = { |
100 | [RCT_TOPO_CPU] = "recount_usage_cpu" , |
101 | [RCT_TOPO_CPU_KIND] = "recount_usage_cpu_kind" , |
102 | }; |
103 | |
104 | static const char *_track_zone_names[RCT_TOPO_COUNT] = { |
105 | [RCT_TOPO_CPU] = "recount_track_cpu" , |
106 | [RCT_TOPO_CPU_KIND] = "recount_track_cpu_kind" , |
107 | }; |
108 | |
109 | static const bool _topo_allocates[RCT_TOPO_COUNT] = { |
110 | [RCT_TOPO_SYSTEM] = false, |
111 | [RCT_TOPO_CPU] = true, |
112 | [RCT_TOPO_CPU_KIND] = true, |
113 | }; |
114 | |
115 | // Fixed-size zones for allocations. |
116 | __security_const_late zone_t _recount_usage_zones[RCT_TOPO_COUNT] = { }; |
117 | __security_const_late zone_t _recount_track_zones[RCT_TOPO_COUNT] = { }; |
118 | |
119 | __startup_func |
120 | static void |
121 | recount_startup(void) |
122 | { |
123 | #if __AMP__ |
124 | unsigned int cpu_count = ml_get_cpu_count(); |
125 | const ml_topology_info_t *topo_info = ml_get_topology_info(); |
126 | for (unsigned int i = 0; i < cpu_count; i++) { |
127 | cluster_type_t type = topo_info->cpus[i].cluster_type; |
128 | uint8_t cluster_i = (type == CLUSTER_TYPE_P) ? RCT_CPU_PERFORMANCE : |
129 | RCT_CPU_EFFICIENCY; |
130 | _topo_cpu_kinds[i] = cluster_i; |
131 | } |
132 | #endif // __AMP__ |
133 | |
134 | for (unsigned int i = 0; i < RCT_TOPO_COUNT; i++) { |
135 | if (_topo_allocates[i]) { |
136 | const char *usage_name = _usage_zone_names[i]; |
137 | assert(usage_name != NULL); |
138 | _recount_usage_zones[i] = zone_create(name: usage_name, |
139 | size: sizeof(struct recount_usage) * recount_topo_count(topo: i), |
140 | flags: 0); |
141 | |
142 | const char *track_name = _track_zone_names[i]; |
143 | assert(track_name != NULL); |
144 | _recount_track_zones[i] = zone_create(name: track_name, |
145 | size: sizeof(struct recount_track) * recount_topo_count(topo: i), |
146 | flags: 0); |
147 | } |
148 | } |
149 | |
150 | _recount_started = true; |
151 | } |
152 | |
153 | STARTUP(PERCPU, STARTUP_RANK_LAST, recount_startup); |
154 | |
155 | #pragma mark - tracks |
156 | |
157 | RECOUNT_PLAN_DEFINE(recount_thread_plan, RCT_TOPO_CPU_KIND); |
158 | RECOUNT_PLAN_DEFINE(recount_work_interval_plan, RCT_TOPO_CPU); |
159 | RECOUNT_PLAN_DEFINE(recount_task_plan, RCT_TOPO_CPU); |
160 | RECOUNT_PLAN_DEFINE(recount_task_terminated_plan, RCT_TOPO_CPU_KIND); |
161 | RECOUNT_PLAN_DEFINE(recount_coalition_plan, RCT_TOPO_CPU_KIND); |
162 | RECOUNT_PLAN_DEFINE(recount_processor_plan, RCT_TOPO_SYSTEM); |
163 | |
164 | OS_ALWAYS_INLINE |
165 | static inline uint64_t |
166 | recount_timestamp_speculative(void) |
167 | { |
168 | #if __arm__ || __arm64__ |
169 | return ml_get_speculative_timebase(); |
170 | #else // __arm__ || __arm64__ |
171 | return mach_absolute_time(); |
172 | #endif // !__arm__ && !__arm64__ |
173 | } |
174 | |
175 | OS_ALWAYS_INLINE |
176 | void |
177 | recount_snapshot_speculative(struct recount_snap *snap) |
178 | { |
179 | snap->rsn_time_mach = recount_timestamp_speculative(); |
180 | #if CONFIG_PERVASIVE_CPI |
181 | mt_cur_cpu_cycles_instrs_speculative(&snap->rsn_cycles, &snap->rsn_insns); |
182 | #endif // CONFIG_PERVASIVE_CPI |
183 | } |
184 | |
185 | void |
186 | recount_snapshot(struct recount_snap *snap) |
187 | { |
188 | #if __arm__ || __arm64__ |
189 | __builtin_arm_isb(ISB_SY); |
190 | #endif // __arm__ || __arm64__ |
191 | recount_snapshot_speculative(snap); |
192 | } |
193 | |
194 | static struct recount_snap * |
195 | recount_get_snap(processor_t processor) |
196 | { |
197 | return &processor->pr_recount.rpr_snap; |
198 | } |
199 | |
200 | static struct recount_snap * |
201 | recount_get_interrupt_snap(processor_t processor) |
202 | { |
203 | return &processor->pr_recount.rpr_interrupt_snap; |
204 | } |
205 | |
206 | // A simple sequence lock implementation. |
207 | |
208 | static void |
209 | _seqlock_shared_lock_slowpath(const uint32_t *lck, uint32_t gen) |
210 | { |
211 | disable_preemption(); |
212 | do { |
213 | gen = hw_wait_while_equals32(address: (uint32_t *)(uintptr_t)lck, current: gen); |
214 | } while (__improbable((gen & 1) != 0)); |
215 | os_atomic_thread_fence(acquire); |
216 | enable_preemption(); |
217 | } |
218 | |
219 | static uintptr_t |
220 | _seqlock_shared_lock(const uint32_t *lck) |
221 | { |
222 | uint32_t gen = os_atomic_load(lck, acquire); |
223 | if (__improbable((gen & 1) != 0)) { |
224 | _seqlock_shared_lock_slowpath(lck, gen); |
225 | } |
226 | return gen; |
227 | } |
228 | |
229 | static bool |
230 | _seqlock_shared_try_unlock(const uint32_t *lck, uintptr_t on_enter) |
231 | { |
232 | return os_atomic_load(lck, acquire) == on_enter; |
233 | } |
234 | |
235 | static void |
236 | _seqlock_excl_lock_relaxed(uint32_t *lck) |
237 | { |
238 | __assert_only uintptr_t new = os_atomic_inc(lck, relaxed); |
239 | assert3u((new & 1), ==, 1); |
240 | } |
241 | |
242 | static void |
243 | _seqlock_excl_commit(void) |
244 | { |
245 | os_atomic_thread_fence(release); |
246 | } |
247 | |
248 | static void |
249 | _seqlock_excl_unlock_relaxed(uint32_t *lck) |
250 | { |
251 | __assert_only uint32_t new = os_atomic_inc(lck, relaxed); |
252 | assert3u((new & 1), ==, 0); |
253 | } |
254 | |
255 | static struct recount_track * |
256 | recount_update_start(struct recount_track *tracks, recount_topo_t topo, |
257 | processor_t processor) |
258 | { |
259 | struct recount_track *track = &tracks[recount_topo_index(topo, processor)]; |
260 | _seqlock_excl_lock_relaxed(lck: &track->rt_sync); |
261 | return track; |
262 | } |
263 | |
264 | #if RECOUNT_ENERGY |
265 | |
266 | static struct recount_track * |
267 | recount_update_single_start(struct recount_track *tracks, recount_topo_t topo, |
268 | processor_t processor) |
269 | { |
270 | return &tracks[recount_topo_index(topo, processor)]; |
271 | } |
272 | |
273 | #endif // RECOUNT_ENERGY |
274 | |
275 | static void |
276 | recount_update_commit(void) |
277 | { |
278 | _seqlock_excl_commit(); |
279 | } |
280 | |
281 | static void |
282 | recount_update_end(struct recount_track *track) |
283 | { |
284 | _seqlock_excl_unlock_relaxed(lck: &track->rt_sync); |
285 | } |
286 | |
287 | static const struct recount_usage * |
288 | recount_read_start(const struct recount_track *track, uintptr_t *on_enter) |
289 | { |
290 | const struct recount_usage *stats = &track->rt_usage; |
291 | *on_enter = _seqlock_shared_lock(lck: &track->rt_sync); |
292 | return stats; |
293 | } |
294 | |
295 | static bool |
296 | recount_try_read_end(const struct recount_track *track, uintptr_t on_enter) |
297 | { |
298 | return _seqlock_shared_try_unlock(lck: &track->rt_sync, on_enter); |
299 | } |
300 | |
301 | static void |
302 | recount_read_track(struct recount_usage *stats, |
303 | const struct recount_track *track) |
304 | { |
305 | uintptr_t on_enter = 0; |
306 | do { |
307 | const struct recount_usage *vol_stats = |
308 | recount_read_start(track, on_enter: &on_enter); |
309 | *stats = *vol_stats; |
310 | } while (!recount_try_read_end(track, on_enter)); |
311 | } |
312 | |
313 | static void |
314 | recount_metrics_add(struct recount_metrics *sum, const struct recount_metrics *to_add) |
315 | { |
316 | sum->rm_time_mach += to_add->rm_time_mach; |
317 | #if CONFIG_PERVASIVE_CPI |
318 | sum->rm_instructions += to_add->rm_instructions; |
319 | sum->rm_cycles += to_add->rm_cycles; |
320 | #endif // CONFIG_PERVASIVE_CPI |
321 | } |
322 | |
323 | static void |
324 | recount_usage_add(struct recount_usage *sum, const struct recount_usage *to_add) |
325 | { |
326 | for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) { |
327 | recount_metrics_add(sum: &sum->ru_metrics[i], to_add: &to_add->ru_metrics[i]); |
328 | } |
329 | #if CONFIG_PERVASIVE_ENERGY |
330 | sum->ru_energy_nj += to_add->ru_energy_nj; |
331 | #endif // CONFIG_PERVASIVE_CPI |
332 | } |
333 | |
334 | OS_ALWAYS_INLINE |
335 | static inline void |
336 | recount_usage_add_snap(struct recount_usage *usage, recount_level_t level, |
337 | struct recount_snap *snap) |
338 | { |
339 | struct recount_metrics *metrics = &usage->ru_metrics[level]; |
340 | |
341 | metrics->rm_time_mach += snap->rsn_time_mach; |
342 | #if CONFIG_PERVASIVE_CPI |
343 | metrics->rm_cycles += snap->rsn_cycles; |
344 | metrics->rm_instructions += snap->rsn_insns; |
345 | #else // CONFIG_PERVASIVE_CPI |
346 | #pragma unused(usage) |
347 | #endif // !CONFIG_PERVASIVE_CPI |
348 | } |
349 | |
350 | static void |
351 | recount_rollup(recount_plan_t plan, const struct recount_track *tracks, |
352 | recount_topo_t to_topo, struct recount_usage *stats) |
353 | { |
354 | recount_topo_t from_topo = plan->rpl_topo; |
355 | size_t topo_count = recount_topo_count(topo: from_topo); |
356 | struct recount_usage tmp = { 0 }; |
357 | for (size_t i = 0; i < topo_count; i++) { |
358 | recount_read_track(stats: &tmp, track: &tracks[i]); |
359 | size_t to_i = recount_convert_topo_index(from: from_topo, to: to_topo, i); |
360 | recount_usage_add(sum: &stats[to_i], to_add: &tmp); |
361 | } |
362 | } |
363 | |
364 | // This function must be run when counters cannot increment for the track, like from the current thread. |
365 | static void |
366 | recount_rollup_unsafe(recount_plan_t plan, struct recount_track *tracks, |
367 | recount_topo_t to_topo, struct recount_usage *stats) |
368 | { |
369 | recount_topo_t from_topo = plan->rpl_topo; |
370 | size_t topo_count = recount_topo_count(topo: from_topo); |
371 | for (size_t i = 0; i < topo_count; i++) { |
372 | size_t to_i = recount_convert_topo_index(from: from_topo, to: to_topo, i); |
373 | recount_usage_add(sum: &stats[to_i], to_add: &tracks[i].rt_usage); |
374 | } |
375 | } |
376 | |
377 | void |
378 | recount_sum(recount_plan_t plan, const struct recount_track *tracks, |
379 | struct recount_usage *sum) |
380 | { |
381 | recount_rollup(plan, tracks, to_topo: RCT_TOPO_SYSTEM, stats: sum); |
382 | } |
383 | |
384 | void |
385 | recount_sum_unsafe(recount_plan_t plan, const struct recount_track *tracks, |
386 | struct recount_usage *sum) |
387 | { |
388 | recount_topo_t topo = plan->rpl_topo; |
389 | size_t topo_count = recount_topo_count(topo); |
390 | for (size_t i = 0; i < topo_count; i++) { |
391 | recount_usage_add(sum, to_add: &tracks[i].rt_usage); |
392 | } |
393 | } |
394 | |
395 | void |
396 | recount_sum_and_isolate_cpu_kind(recount_plan_t plan, |
397 | struct recount_track *tracks, recount_cpu_kind_t kind, |
398 | struct recount_usage *sum, struct recount_usage *only_kind) |
399 | { |
400 | size_t topo_count = recount_topo_count(topo: plan->rpl_topo); |
401 | struct recount_usage tmp = { 0 }; |
402 | for (size_t i = 0; i < topo_count; i++) { |
403 | recount_read_track(stats: &tmp, track: &tracks[i]); |
404 | recount_usage_add(sum, to_add: &tmp); |
405 | if (recount_topo_matches_cpu_kind(topo: plan->rpl_topo, kind, idx: i)) { |
406 | recount_usage_add(sum: only_kind, to_add: &tmp); |
407 | } |
408 | } |
409 | } |
410 | |
411 | static void |
412 | recount_sum_usage(recount_plan_t plan, const struct recount_usage *usages, |
413 | struct recount_usage *sum) |
414 | { |
415 | const size_t topo_count = recount_topo_count(topo: plan->rpl_topo); |
416 | for (size_t i = 0; i < topo_count; i++) { |
417 | recount_usage_add(sum, to_add: &usages[i]); |
418 | } |
419 | } |
420 | |
421 | void |
422 | recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan, |
423 | struct recount_usage *usage, recount_cpu_kind_t kind, |
424 | struct recount_usage *sum, struct recount_usage *only_kind) |
425 | { |
426 | const size_t topo_count = recount_topo_count(topo: plan->rpl_topo); |
427 | for (size_t i = 0; i < topo_count; i++) { |
428 | recount_usage_add(sum, to_add: &usage[i]); |
429 | if (only_kind && recount_topo_matches_cpu_kind(topo: plan->rpl_topo, kind, idx: i)) { |
430 | recount_usage_add(sum: only_kind, to_add: &usage[i]); |
431 | } |
432 | } |
433 | } |
434 | |
435 | void |
436 | recount_sum_perf_levels(recount_plan_t plan, struct recount_track *tracks, |
437 | struct recount_usage *sums) |
438 | { |
439 | recount_rollup(plan, tracks, to_topo: RCT_TOPO_CPU_KIND, stats: sums); |
440 | } |
441 | |
442 | struct recount_times_mach |
443 | recount_usage_times_mach(struct recount_usage *usage) |
444 | { |
445 | return (struct recount_times_mach){ |
446 | .rtm_user = usage->ru_metrics[RCT_LVL_USER].rm_time_mach, |
447 | .rtm_system = recount_usage_system_time_mach(usage), |
448 | }; |
449 | } |
450 | |
451 | uint64_t |
452 | recount_usage_system_time_mach(struct recount_usage *usage) |
453 | { |
454 | uint64_t system_time = usage->ru_metrics[RCT_LVL_KERNEL].rm_time_mach; |
455 | #if RECOUNT_SECURE_METRICS |
456 | system_time += usage->ru_metrics[RCT_LVL_SECURE].rm_time_mach; |
457 | #endif // RECOUNT_SECURE_METRICS |
458 | return system_time; |
459 | } |
460 | |
461 | uint64_t |
462 | recount_usage_time_mach(struct recount_usage *usage) |
463 | { |
464 | uint64_t time = 0; |
465 | for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) { |
466 | time += usage->ru_metrics[i].rm_time_mach; |
467 | } |
468 | return time; |
469 | } |
470 | |
471 | uint64_t |
472 | recount_usage_cycles(struct recount_usage *usage) |
473 | { |
474 | uint64_t cycles = 0; |
475 | #if CONFIG_CPU_COUNTERS |
476 | for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) { |
477 | cycles += usage->ru_metrics[i].rm_cycles; |
478 | } |
479 | #else // CONFIG_CPU_COUNTERS |
480 | #pragma unused(usage) |
481 | #endif // !CONFIG_CPU_COUNTERS |
482 | return cycles; |
483 | } |
484 | |
485 | uint64_t |
486 | recount_usage_instructions(struct recount_usage *usage) |
487 | { |
488 | uint64_t instructions = 0; |
489 | #if CONFIG_CPU_COUNTERS |
490 | for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) { |
491 | instructions += usage->ru_metrics[i].rm_instructions; |
492 | } |
493 | #else // CONFIG_CPU_COUNTERS |
494 | #pragma unused(usage) |
495 | #endif // !CONFIG_CPU_COUNTERS |
496 | return instructions; |
497 | } |
498 | |
499 | // Plan-specific helpers. |
500 | |
501 | void |
502 | recount_coalition_rollup_task(struct recount_coalition *co, |
503 | struct recount_task *tk) |
504 | { |
505 | recount_rollup(plan: &recount_task_plan, tracks: tk->rtk_lifetime, |
506 | to_topo: recount_coalition_plan.rpl_topo, stats: co->rco_exited); |
507 | } |
508 | |
509 | void |
510 | recount_task_rollup_thread(struct recount_task *tk, |
511 | const struct recount_thread *th) |
512 | { |
513 | recount_rollup(plan: &recount_thread_plan, tracks: th->rth_lifetime, |
514 | to_topo: recount_task_terminated_plan.rpl_topo, stats: tk->rtk_terminated); |
515 | } |
516 | |
517 | #pragma mark - scheduler |
518 | |
519 | // `result = lhs - rhs` for snapshots. |
520 | OS_ALWAYS_INLINE |
521 | static void |
522 | recount_snap_diff(struct recount_snap *result, |
523 | const struct recount_snap *lhs, const struct recount_snap *rhs) |
524 | { |
525 | assert3u(lhs->rsn_time_mach, >=, rhs->rsn_time_mach); |
526 | result->rsn_time_mach = lhs->rsn_time_mach - rhs->rsn_time_mach; |
527 | #if CONFIG_PERVASIVE_CPI |
528 | assert3u(lhs->rsn_insns, >=, rhs->rsn_insns); |
529 | assert3u(lhs->rsn_cycles, >=, rhs->rsn_cycles); |
530 | result->rsn_cycles = lhs->rsn_cycles - rhs->rsn_cycles; |
531 | result->rsn_insns = lhs->rsn_insns - rhs->rsn_insns; |
532 | #endif // CONFIG_PERVASIVE_CPI |
533 | } |
534 | |
535 | static void |
536 | _fix_time_precision(struct recount_usage *usage) |
537 | { |
538 | #if PRECISE_USER_KERNEL_TIME |
539 | #pragma unused(usage) |
540 | #else // PRECISE_USER_KERNEL_TIME |
541 | // Attribute all time to user, as the system is only acting "on behalf |
542 | // of" user processes -- a bit sketchy. |
543 | usage->ru_metrics[RCT_LVL_USER].rm_time_mach += |
544 | recount_usage_system_time_mach(usage); |
545 | usage->ru_metrics[RCT_LVL_KERNEL].rm_time_mach = 0; |
546 | #endif // !PRECISE_USER_KERNEL_TIME |
547 | } |
548 | |
549 | void |
550 | recount_current_thread_usage(struct recount_usage *usage) |
551 | { |
552 | assert(ml_get_interrupts_enabled() == FALSE); |
553 | thread_t thread = current_thread(); |
554 | struct recount_snap snap = { 0 }; |
555 | recount_snapshot(snap: &snap); |
556 | recount_sum_unsafe(plan: &recount_thread_plan, tracks: thread->th_recount.rth_lifetime, |
557 | sum: usage); |
558 | struct recount_snap *last = recount_get_snap(processor: current_processor()); |
559 | struct recount_snap diff = { 0 }; |
560 | recount_snap_diff(result: &diff, lhs: &snap, rhs: last); |
561 | recount_usage_add_snap(usage, level: RCT_LVL_KERNEL, snap: &diff); |
562 | _fix_time_precision(usage); |
563 | } |
564 | |
565 | void |
566 | recount_current_thread_usage_perf_only(struct recount_usage *usage, |
567 | struct recount_usage *usage_perf_only) |
568 | { |
569 | struct recount_usage usage_perf_levels[RCT_CPU_KIND_COUNT] = { 0 }; |
570 | recount_current_thread_perf_level_usage(usage_levels: usage_perf_levels); |
571 | recount_sum_usage(plan: &recount_thread_plan, usages: usage_perf_levels, sum: usage); |
572 | *usage_perf_only = usage_perf_levels[RCT_CPU_PERFORMANCE]; |
573 | _fix_time_precision(usage); |
574 | _fix_time_precision(usage: usage_perf_only); |
575 | } |
576 | |
577 | void |
578 | recount_thread_perf_level_usage(struct thread *thread, |
579 | struct recount_usage *usage_levels) |
580 | { |
581 | recount_rollup(plan: &recount_thread_plan, tracks: thread->th_recount.rth_lifetime, |
582 | to_topo: RCT_TOPO_CPU_KIND, stats: usage_levels); |
583 | size_t topo_count = recount_topo_count(topo: RCT_TOPO_CPU_KIND); |
584 | for (size_t i = 0; i < topo_count; i++) { |
585 | _fix_time_precision(usage: &usage_levels[i]); |
586 | } |
587 | } |
588 | |
589 | void |
590 | recount_current_thread_perf_level_usage(struct recount_usage *usage_levels) |
591 | { |
592 | assert(ml_get_interrupts_enabled() == FALSE); |
593 | processor_t processor = current_processor(); |
594 | thread_t thread = current_thread(); |
595 | struct recount_snap snap = { 0 }; |
596 | recount_snapshot(snap: &snap); |
597 | recount_rollup_unsafe(plan: &recount_thread_plan, tracks: thread->th_recount.rth_lifetime, |
598 | to_topo: RCT_TOPO_CPU_KIND, stats: usage_levels); |
599 | struct recount_snap *last = recount_get_snap(processor); |
600 | struct recount_snap diff = { 0 }; |
601 | recount_snap_diff(result: &diff, lhs: &snap, rhs: last); |
602 | size_t cur_i = recount_topo_index(topo: RCT_TOPO_CPU_KIND, processor); |
603 | struct recount_usage *cur_usage = &usage_levels[cur_i]; |
604 | recount_usage_add_snap(usage: cur_usage, level: RCT_LVL_KERNEL, snap: &diff); |
605 | size_t topo_count = recount_topo_count(topo: RCT_TOPO_CPU_KIND); |
606 | for (size_t i = 0; i < topo_count; i++) { |
607 | _fix_time_precision(usage: &usage_levels[i]); |
608 | } |
609 | } |
610 | |
611 | uint64_t |
612 | recount_current_thread_energy_nj(void) |
613 | { |
614 | #if RECOUNT_ENERGY |
615 | assert(ml_get_interrupts_enabled() == FALSE); |
616 | thread_t thread = current_thread(); |
617 | size_t topo_count = recount_topo_count(recount_thread_plan.rpl_topo); |
618 | uint64_t energy_nj = 0; |
619 | for (size_t i = 0; i < topo_count; i++) { |
620 | energy_nj += thread->th_recount.rth_lifetime[i].rt_usage.ru_energy_nj; |
621 | } |
622 | return energy_nj; |
623 | #else // RECOUNT_ENERGY |
624 | return 0; |
625 | #endif // !RECOUNT_ENERGY |
626 | } |
627 | |
628 | static void |
629 | _times_add_usage(struct recount_times_mach *times, struct recount_usage *usage) |
630 | { |
631 | times->rtm_user += usage->ru_metrics[RCT_LVL_USER].rm_time_mach; |
632 | #if PRECISE_USER_KERNEL_TIME |
633 | times->rtm_system += recount_usage_system_time_mach(usage); |
634 | #else // PRECISE_USER_KERNEL_TIME |
635 | times->rtm_user += recount_usage_system_time_mach(usage); |
636 | #endif // !PRECISE_USER_KERNEL_TIME |
637 | } |
638 | |
639 | struct recount_times_mach |
640 | recount_thread_times(struct thread *thread) |
641 | { |
642 | size_t topo_count = recount_topo_count(topo: recount_thread_plan.rpl_topo); |
643 | struct recount_times_mach times = { 0 }; |
644 | for (size_t i = 0; i < topo_count; i++) { |
645 | _times_add_usage(times: ×, usage: &thread->th_recount.rth_lifetime[i].rt_usage); |
646 | } |
647 | return times; |
648 | } |
649 | |
650 | uint64_t |
651 | recount_thread_time_mach(struct thread *thread) |
652 | { |
653 | struct recount_times_mach times = recount_thread_times(thread); |
654 | return times.rtm_user + times.rtm_system; |
655 | } |
656 | |
657 | static uint64_t |
658 | _time_since_last_snapshot(void) |
659 | { |
660 | struct recount_snap *last = recount_get_snap(processor: current_processor()); |
661 | uint64_t cur_time = mach_absolute_time(); |
662 | return cur_time - last->rsn_time_mach; |
663 | } |
664 | |
665 | uint64_t |
666 | recount_current_thread_time_mach(void) |
667 | { |
668 | assert(ml_get_interrupts_enabled() == FALSE); |
669 | uint64_t previous_time = recount_thread_time_mach(thread: current_thread()); |
670 | return previous_time + _time_since_last_snapshot(); |
671 | } |
672 | |
673 | struct recount_times_mach |
674 | recount_current_thread_times(void) |
675 | { |
676 | assert(ml_get_interrupts_enabled() == FALSE); |
677 | struct recount_times_mach times = recount_thread_times( |
678 | thread: current_thread()); |
679 | #if PRECISE_USER_KERNEL_TIME |
680 | // This code is executing in the kernel, so the time since the last snapshot |
681 | // (with precise user/kernel time) is since entering the kernel. |
682 | times.rtm_system += _time_since_last_snapshot(); |
683 | #else // PRECISE_USER_KERNEL_TIME |
684 | times.rtm_user += _time_since_last_snapshot(); |
685 | #endif // !PRECISE_USER_KERNEL_TIME |
686 | return times; |
687 | } |
688 | |
689 | void |
690 | recount_thread_usage(thread_t thread, struct recount_usage *usage) |
691 | { |
692 | recount_sum(plan: &recount_thread_plan, tracks: thread->th_recount.rth_lifetime, sum: usage); |
693 | _fix_time_precision(usage); |
694 | } |
695 | |
696 | uint64_t |
697 | recount_current_thread_interrupt_time_mach(void) |
698 | { |
699 | thread_t thread = current_thread(); |
700 | return thread->th_recount.rth_interrupt_time_mach; |
701 | } |
702 | |
703 | void |
704 | recount_work_interval_usage(struct work_interval *work_interval, struct recount_usage *usage) |
705 | { |
706 | recount_sum(plan: &recount_work_interval_plan, tracks: work_interval_get_recount_tracks(work_interval), sum: usage); |
707 | _fix_time_precision(usage); |
708 | } |
709 | |
710 | struct recount_times_mach |
711 | recount_work_interval_times(struct work_interval *work_interval) |
712 | { |
713 | size_t topo_count = recount_topo_count(topo: recount_work_interval_plan.rpl_topo); |
714 | struct recount_times_mach times = { 0 }; |
715 | for (size_t i = 0; i < topo_count; i++) { |
716 | _times_add_usage(times: ×, usage: &work_interval_get_recount_tracks(work_interval)[i].rt_usage); |
717 | } |
718 | return times; |
719 | } |
720 | |
721 | uint64_t |
722 | recount_work_interval_energy_nj(struct work_interval *work_interval) |
723 | { |
724 | #if RECOUNT_ENERGY |
725 | size_t topo_count = recount_topo_count(recount_work_interval_plan.rpl_topo); |
726 | uint64_t energy = 0; |
727 | for (size_t i = 0; i < topo_count; i++) { |
728 | energy += work_interval_get_recount_tracks(work_interval)[i].rt_usage.ru_energy_nj; |
729 | } |
730 | return energy; |
731 | #else // RECOUNT_ENERGY |
732 | #pragma unused(work_interval) |
733 | return 0; |
734 | #endif // !RECOUNT_ENERGY |
735 | } |
736 | |
737 | void |
738 | recount_current_task_usage(struct recount_usage *usage) |
739 | { |
740 | task_t task = current_task(); |
741 | struct recount_track *tracks = task->tk_recount.rtk_lifetime; |
742 | recount_sum(plan: &recount_task_plan, tracks, sum: usage); |
743 | _fix_time_precision(usage); |
744 | } |
745 | |
746 | void |
747 | recount_current_task_usage_perf_only(struct recount_usage *usage, |
748 | struct recount_usage *usage_perf_only) |
749 | { |
750 | task_t task = current_task(); |
751 | struct recount_track *tracks = task->tk_recount.rtk_lifetime; |
752 | recount_sum_and_isolate_cpu_kind(plan: &recount_task_plan, |
753 | tracks, kind: RCT_CPU_PERFORMANCE, sum: usage, only_kind: usage_perf_only); |
754 | _fix_time_precision(usage); |
755 | _fix_time_precision(usage: usage_perf_only); |
756 | } |
757 | |
758 | void |
759 | recount_task_times_perf_only(struct task *task, |
760 | struct recount_times_mach *sum, struct recount_times_mach *sum_perf_only) |
761 | { |
762 | const recount_topo_t topo = recount_task_plan.rpl_topo; |
763 | const size_t topo_count = recount_topo_count(topo); |
764 | struct recount_track *tracks = task->tk_recount.rtk_lifetime; |
765 | for (size_t i = 0; i < topo_count; i++) { |
766 | struct recount_usage *usage = &tracks[i].rt_usage; |
767 | _times_add_usage(times: sum, usage); |
768 | if (recount_topo_matches_cpu_kind(topo, kind: RCT_CPU_PERFORMANCE, idx: i)) { |
769 | _times_add_usage(times: sum_perf_only, usage); |
770 | } |
771 | } |
772 | } |
773 | |
774 | void |
775 | recount_task_terminated_usage(task_t task, struct recount_usage *usage) |
776 | { |
777 | recount_sum_usage(plan: &recount_task_terminated_plan, |
778 | usages: task->tk_recount.rtk_terminated, sum: usage); |
779 | _fix_time_precision(usage); |
780 | } |
781 | |
782 | struct recount_times_mach |
783 | recount_task_terminated_times(struct task *task) |
784 | { |
785 | size_t topo_count = recount_topo_count(topo: recount_task_terminated_plan.rpl_topo); |
786 | struct recount_times_mach times = { 0 }; |
787 | for (size_t i = 0; i < topo_count; i++) { |
788 | _times_add_usage(times: ×, usage: &task->tk_recount.rtk_terminated[i]); |
789 | } |
790 | return times; |
791 | } |
792 | |
793 | void |
794 | recount_task_terminated_usage_perf_only(task_t task, |
795 | struct recount_usage *usage, struct recount_usage *perf_only) |
796 | { |
797 | recount_sum_usage_and_isolate_cpu_kind(plan: &recount_task_terminated_plan, |
798 | usage: task->tk_recount.rtk_terminated, kind: RCT_CPU_PERFORMANCE, sum: usage, only_kind: perf_only); |
799 | _fix_time_precision(usage); |
800 | _fix_time_precision(usage: perf_only); |
801 | } |
802 | |
803 | void |
804 | recount_task_usage_perf_only(task_t task, struct recount_usage *sum, |
805 | struct recount_usage *sum_perf_only) |
806 | { |
807 | recount_sum_and_isolate_cpu_kind(plan: &recount_task_plan, |
808 | tracks: task->tk_recount.rtk_lifetime, kind: RCT_CPU_PERFORMANCE, sum, only_kind: sum_perf_only); |
809 | _fix_time_precision(usage: sum); |
810 | _fix_time_precision(usage: sum_perf_only); |
811 | } |
812 | |
813 | void |
814 | recount_task_usage(task_t task, struct recount_usage *usage) |
815 | { |
816 | recount_sum(plan: &recount_task_plan, tracks: task->tk_recount.rtk_lifetime, sum: usage); |
817 | _fix_time_precision(usage); |
818 | } |
819 | |
820 | struct recount_times_mach |
821 | recount_task_times(struct task *task) |
822 | { |
823 | size_t topo_count = recount_topo_count(topo: recount_task_plan.rpl_topo); |
824 | struct recount_times_mach times = { 0 }; |
825 | for (size_t i = 0; i < topo_count; i++) { |
826 | _times_add_usage(times: ×, usage: &task->tk_recount.rtk_lifetime[i].rt_usage); |
827 | } |
828 | return times; |
829 | } |
830 | |
831 | uint64_t |
832 | recount_task_energy_nj(struct task *task) |
833 | { |
834 | #if RECOUNT_ENERGY |
835 | size_t topo_count = recount_topo_count(recount_task_plan.rpl_topo); |
836 | uint64_t energy = 0; |
837 | for (size_t i = 0; i < topo_count; i++) { |
838 | energy += task->tk_recount.rtk_lifetime[i].rt_usage.ru_energy_nj; |
839 | } |
840 | return energy; |
841 | #else // RECOUNT_ENERGY |
842 | #pragma unused(task) |
843 | return 0; |
844 | #endif // !RECOUNT_ENERGY |
845 | } |
846 | |
847 | void |
848 | recount_coalition_usage_perf_only(struct recount_coalition *coal, |
849 | struct recount_usage *sum, struct recount_usage *sum_perf_only) |
850 | { |
851 | recount_sum_usage_and_isolate_cpu_kind(plan: &recount_coalition_plan, |
852 | usage: coal->rco_exited, kind: RCT_CPU_PERFORMANCE, sum, only_kind: sum_perf_only); |
853 | _fix_time_precision(usage: sum); |
854 | _fix_time_precision(usage: sum_perf_only); |
855 | } |
856 | |
857 | OS_ALWAYS_INLINE |
858 | static void |
859 | recount_absorb_snap(struct recount_snap *to_add, thread_t thread, task_t task, |
860 | processor_t processor, recount_level_t level) |
861 | { |
862 | // Idle threads do not attribute their usage back to the task or processor, |
863 | // as the time is not spent "running." |
864 | // |
865 | // The processor-level metrics include idle time, instead, as the idle time |
866 | // needs to be read as up-to-date from `recount_processor_usage`. |
867 | |
868 | const bool was_idle = (thread->options & TH_OPT_IDLE_THREAD) != 0; |
869 | |
870 | struct recount_track *wi_tracks_array = NULL; |
871 | if (!was_idle) { |
872 | wi_tracks_array = work_interval_get_recount_tracks( |
873 | work_interval: thread->th_work_interval); |
874 | } |
875 | bool absorb_work_interval = wi_tracks_array != NULL; |
876 | |
877 | struct recount_track *th_track = recount_update_start( |
878 | tracks: thread->th_recount.rth_lifetime, topo: recount_thread_plan.rpl_topo, |
879 | processor); |
880 | struct recount_track *wi_track = NULL; |
881 | if (absorb_work_interval) { |
882 | wi_track = recount_update_start(tracks: wi_tracks_array, |
883 | topo: recount_work_interval_plan.rpl_topo, processor); |
884 | } |
885 | struct recount_track *tk_track = was_idle ? NULL : recount_update_start( |
886 | tracks: task->tk_recount.rtk_lifetime, topo: recount_task_plan.rpl_topo, processor); |
887 | struct recount_track *pr_track = was_idle ? NULL : recount_update_start( |
888 | tracks: &processor->pr_recount.rpr_active, topo: recount_processor_plan.rpl_topo, |
889 | processor); |
890 | recount_update_commit(); |
891 | |
892 | recount_usage_add_snap(usage: &th_track->rt_usage, level, snap: to_add); |
893 | if (!was_idle) { |
894 | if (absorb_work_interval) { |
895 | recount_usage_add_snap(usage: &wi_track->rt_usage, level, snap: to_add); |
896 | } |
897 | recount_usage_add_snap(usage: &tk_track->rt_usage, level, snap: to_add); |
898 | recount_usage_add_snap(usage: &pr_track->rt_usage, level, snap: to_add); |
899 | } |
900 | |
901 | recount_update_commit(); |
902 | recount_update_end(track: th_track); |
903 | if (!was_idle) { |
904 | if (absorb_work_interval) { |
905 | recount_update_end(track: wi_track); |
906 | } |
907 | recount_update_end(track: tk_track); |
908 | recount_update_end(track: pr_track); |
909 | } |
910 | } |
911 | |
912 | void |
913 | recount_switch_thread(struct recount_snap *cur, struct thread *off_thread, |
914 | struct task *off_task) |
915 | { |
916 | assert(ml_get_interrupts_enabled() == FALSE); |
917 | |
918 | if (__improbable(!_recount_started)) { |
919 | return; |
920 | } |
921 | |
922 | processor_t processor = current_processor(); |
923 | |
924 | struct recount_snap *last = recount_get_snap(processor); |
925 | struct recount_snap diff = { 0 }; |
926 | recount_snap_diff(result: &diff, lhs: cur, rhs: last); |
927 | recount_absorb_snap(to_add: &diff, thread: off_thread, task: off_task, processor, |
928 | #if RECOUNT_THREAD_BASED_LEVEL |
929 | level: off_thread->th_recount.rth_current_level |
930 | #else // RECOUNT_THREAD_BASED_LEVEL |
931 | RCT_LVL_KERNEL |
932 | #endif // !RECOUNT_THREAD_BASED_LEVEL |
933 | ); |
934 | memcpy(dst: last, src: cur, n: sizeof(*last)); |
935 | } |
936 | |
937 | void |
938 | recount_add_energy(struct thread *off_thread, struct task *off_task, |
939 | uint64_t energy_nj) |
940 | { |
941 | #if RECOUNT_ENERGY |
942 | assert(ml_get_interrupts_enabled() == FALSE); |
943 | if (__improbable(!_recount_started)) { |
944 | return; |
945 | } |
946 | |
947 | bool was_idle = (off_thread->options & TH_OPT_IDLE_THREAD) != 0; |
948 | struct recount_track *wi_tracks_array = work_interval_get_recount_tracks(off_thread->th_work_interval); |
949 | bool collect_work_interval_telemetry = wi_tracks_array != NULL; |
950 | processor_t processor = current_processor(); |
951 | |
952 | struct recount_track *th_track = recount_update_single_start( |
953 | off_thread->th_recount.rth_lifetime, recount_thread_plan.rpl_topo, |
954 | processor); |
955 | struct recount_track *wi_track = (was_idle || !collect_work_interval_telemetry) ? NULL : |
956 | recount_update_single_start(wi_tracks_array, |
957 | recount_work_interval_plan.rpl_topo, processor); |
958 | struct recount_track *tk_track = was_idle ? NULL : |
959 | recount_update_single_start(off_task->tk_recount.rtk_lifetime, |
960 | recount_task_plan.rpl_topo, processor); |
961 | struct recount_track *pr_track = was_idle ? NULL : |
962 | recount_update_single_start(&processor->pr_recount.rpr_active, |
963 | recount_processor_plan.rpl_topo, processor); |
964 | |
965 | th_track->rt_usage.ru_energy_nj += energy_nj; |
966 | if (!was_idle) { |
967 | if (collect_work_interval_telemetry) { |
968 | wi_track->rt_usage.ru_energy_nj += energy_nj; |
969 | } |
970 | tk_track->rt_usage.ru_energy_nj += energy_nj; |
971 | pr_track->rt_usage.ru_energy_nj += energy_nj; |
972 | } |
973 | #else // RECOUNT_ENERGY |
974 | #pragma unused(off_thread, off_task, energy_nj) |
975 | #endif // !RECOUNT_ENERGY |
976 | } |
977 | |
978 | #define MT_KDBG_IC_CPU_CSWITCH \ |
979 | KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES, 1) |
980 | |
981 | #define MT_KDBG_IC_CPU_CSWITCH_ON \ |
982 | KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES_ON_CPU, 1) |
983 | |
984 | void |
985 | recount_log_switch_thread(const struct recount_snap *snap) |
986 | { |
987 | #if CONFIG_PERVASIVE_CPI |
988 | if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH)) { |
989 | // In Monotonic's event hierarchy for backwards-compatibility. |
990 | KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH, snap->rsn_insns, snap->rsn_cycles); |
991 | } |
992 | #else // CONFIG_PERVASIVE_CPI |
993 | #pragma unused(snap) |
994 | #endif // CONFIG_PERVASIVE_CPI |
995 | } |
996 | |
997 | void |
998 | recount_log_switch_thread_on(const struct recount_snap *snap) |
999 | { |
1000 | #if CONFIG_PERVASIVE_CPI |
1001 | if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH_ON)) { |
1002 | if (!snap) { |
1003 | snap = recount_get_snap(current_processor()); |
1004 | } |
1005 | // In Monotonic's event hierarchy for backwards-compatibility. |
1006 | KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH_ON, snap->rsn_insns, snap->rsn_cycles); |
1007 | } |
1008 | #else // CONFIG_PERVASIVE_CPI |
1009 | #pragma unused(snap) |
1010 | #endif // CONFIG_PERVASIVE_CPI |
1011 | } |
1012 | |
1013 | OS_ALWAYS_INLINE |
1014 | PRECISE_TIME_ONLY_FUNC |
1015 | static void |
1016 | recount_precise_transition_diff(struct recount_snap *diff, |
1017 | struct recount_snap *last, struct recount_snap *cur) |
1018 | { |
1019 | #if PRECISE_USER_KERNEL_PMCS |
1020 | #if PRECISE_USER_KERNEL_PMC_TUNABLE |
1021 | // The full `recount_snapshot_speculative` shouldn't get PMCs with a tunable |
1022 | // in this configuration. |
1023 | if (__improbable(no_precise_pmcs)) { |
1024 | cur->rsn_time_mach = recount_timestamp_speculative(); |
1025 | diff->rsn_time_mach = cur->rsn_time_mach - last->rsn_time_mach; |
1026 | } else |
1027 | #endif // PRECISE_USER_KERNEL_PMC_TUNABLE |
1028 | { |
1029 | recount_snapshot_speculative(snap: cur); |
1030 | recount_snap_diff(result: diff, lhs: cur, rhs: last); |
1031 | } |
1032 | #else // PRECISE_USER_KERNEL_PMCS |
1033 | cur->rsn_time_mach = recount_timestamp_speculative(); |
1034 | diff->rsn_time_mach = cur->rsn_time_mach - last->rsn_time_mach; |
1035 | #endif // !PRECISE_USER_KERNEL_PMCS |
1036 | } |
1037 | |
1038 | #if MACH_ASSERT && RECOUNT_THREAD_BASED_LEVEL |
1039 | |
1040 | PRECISE_TIME_ONLY_FUNC |
1041 | static void |
1042 | recount_assert_level(thread_t thread, recount_level_t old) |
1043 | { |
1044 | assert3u(thread->th_recount.rth_current_level, ==, old); |
1045 | } |
1046 | |
1047 | #else // MACH_ASSERT && RECOUNT_THREAD_BASED_LEVEL |
1048 | |
1049 | PRECISE_TIME_ONLY_FUNC |
1050 | static void |
1051 | recount_assert_level(thread_t __unused thread, |
1052 | recount_level_t __unused old) |
1053 | { |
1054 | } |
1055 | |
1056 | #endif // !(MACH_ASSERT && RECOUNT_THREAD_BASED_LEVEL) |
1057 | |
1058 | /// Called when entering or exiting the kernel to maintain system vs. user counts, extremely performance sensitive. |
1059 | /// |
1060 | /// Must be called with interrupts disabled. |
1061 | /// |
1062 | /// - Parameter from: What level is being switched from. |
1063 | /// - Parameter to: What level is being switched to. |
1064 | /// |
1065 | /// - Returns: The value of Mach time that was sampled inside this function. |
1066 | PRECISE_TIME_FATAL_FUNC |
1067 | static uint64_t |
1068 | recount_transition(recount_level_t from, recount_level_t to) |
1069 | { |
1070 | #if PRECISE_USER_KERNEL_TIME |
1071 | // Omit interrupts-disabled assertion for performance reasons. |
1072 | processor_t processor = current_processor(); |
1073 | thread_t thread = processor->active_thread; |
1074 | if (thread) { |
1075 | task_t task = get_thread_ro_unchecked(thread)->tro_task; |
1076 | |
1077 | recount_assert_level(thread, old: from); |
1078 | #if RECOUNT_THREAD_BASED_LEVEL |
1079 | thread->th_recount.rth_current_level = to; |
1080 | #else // RECOUNT_THREAD_BASED_LEVEL |
1081 | #pragma unused(to) |
1082 | #endif // !RECOUNT_THREAD_BASED_LEVEL |
1083 | struct recount_snap *last = recount_get_snap(processor); |
1084 | struct recount_snap diff = { 0 }; |
1085 | struct recount_snap cur = { 0 }; |
1086 | recount_precise_transition_diff(diff: &diff, last, cur: &cur); |
1087 | recount_absorb_snap(to_add: &diff, thread, task, processor, level: from); |
1088 | memcpy(dst: last, src: &cur, n: sizeof(*last)); |
1089 | |
1090 | return cur.rsn_time_mach; |
1091 | } else { |
1092 | return 0; |
1093 | } |
1094 | #else // PRECISE_USER_KERNEL_TIME |
1095 | #pragma unused(from, to) |
1096 | panic("recount: kernel transition called with precise time off" ); |
1097 | #endif // !PRECISE_USER_KERNEL_TIME |
1098 | } |
1099 | |
1100 | PRECISE_TIME_FATAL_FUNC |
1101 | void |
1102 | recount_leave_user(void) |
1103 | { |
1104 | recount_transition(from: RCT_LVL_USER, to: RCT_LVL_KERNEL); |
1105 | } |
1106 | |
1107 | PRECISE_TIME_FATAL_FUNC |
1108 | void |
1109 | recount_enter_user(void) |
1110 | { |
1111 | recount_transition(from: RCT_LVL_KERNEL, to: RCT_LVL_USER); |
1112 | } |
1113 | |
1114 | void |
1115 | recount_enter_interrupt(void) |
1116 | { |
1117 | processor_t processor = current_processor(); |
1118 | struct recount_snap *last = recount_get_interrupt_snap(processor); |
1119 | recount_snapshot_speculative(snap: last); |
1120 | } |
1121 | |
1122 | void |
1123 | recount_leave_interrupt(void) |
1124 | { |
1125 | processor_t processor = current_processor(); |
1126 | thread_t thread = processor->active_thread; |
1127 | struct recount_snap *last = recount_get_snap(processor); |
1128 | uint64_t last_time = last->rsn_time_mach; |
1129 | recount_snapshot_speculative(snap: last); |
1130 | processor->pr_recount.rpr_interrupt_time_mach += |
1131 | last->rsn_time_mach - last_time; |
1132 | thread->th_recount.rth_interrupt_time_mach += |
1133 | last->rsn_time_mach - last_time; |
1134 | } |
1135 | |
1136 | #if __x86_64__ |
1137 | |
1138 | void |
1139 | recount_enter_intel_interrupt(x86_saved_state_t *state) |
1140 | { |
1141 | // The low bits of `%cs` being set indicate interrupt was delivered while |
1142 | // executing in user space. |
1143 | bool from_user = (is_saved_state64(state) ? state->ss_64.isf.cs : |
1144 | state->ss_32.cs) & 0x03; |
1145 | uint64_t timestamp = recount_transition( |
1146 | from_user ? RCT_LVL_USER : RCT_LVL_KERNEL, RCT_LVL_KERNEL); |
1147 | current_cpu_datap()->cpu_int_event_time = timestamp; |
1148 | } |
1149 | |
1150 | void |
1151 | recount_leave_intel_interrupt(void) |
1152 | { |
1153 | recount_transition(RCT_LVL_KERNEL, RCT_LVL_KERNEL); |
1154 | current_cpu_datap()->cpu_int_event_time = 0; |
1155 | } |
1156 | |
1157 | #endif // __x86_64__ |
1158 | |
1159 | #if RECOUNT_SECURE_METRICS |
1160 | |
1161 | PRECISE_TIME_FATAL_FUNC |
1162 | void |
1163 | recount_leave_secure(void) |
1164 | { |
1165 | boolean_t intrs_en = ml_set_interrupts_enabled(FALSE); |
1166 | recount_transition(RCT_LVL_SECURE, RCT_LVL_KERNEL); |
1167 | ml_set_interrupts_enabled(intrs_en); |
1168 | } |
1169 | |
1170 | PRECISE_TIME_FATAL_FUNC |
1171 | void |
1172 | recount_enter_secure(void) |
1173 | { |
1174 | boolean_t intrs_en = ml_set_interrupts_enabled(FALSE); |
1175 | recount_transition(RCT_LVL_KERNEL, RCT_LVL_SECURE); |
1176 | ml_set_interrupts_enabled(intrs_en); |
1177 | } |
1178 | |
1179 | #endif // RECOUNT_SECURE_METRICS |
1180 | |
1181 | // Set on rpr_state_last_abs_time when the processor is idle. |
1182 | #define RCT_PR_IDLING (0x1ULL << 63) |
1183 | |
1184 | void |
1185 | recount_processor_idle(struct recount_processor *pr, struct recount_snap *snap) |
1186 | { |
1187 | __assert_only uint64_t state_time = os_atomic_load_wide( |
1188 | &pr->rpr_state_last_abs_time, relaxed); |
1189 | assert((state_time & RCT_PR_IDLING) == 0); |
1190 | assert((snap->rsn_time_mach & RCT_PR_IDLING) == 0); |
1191 | uint64_t new_state_stamp = RCT_PR_IDLING | snap->rsn_time_mach; |
1192 | os_atomic_store_wide(&pr->rpr_state_last_abs_time, new_state_stamp, |
1193 | relaxed); |
1194 | } |
1195 | |
1196 | OS_PURE OS_ALWAYS_INLINE |
1197 | static inline uint64_t |
1198 | _state_time(uint64_t state_stamp) |
1199 | { |
1200 | return state_stamp & ~(RCT_PR_IDLING); |
1201 | } |
1202 | |
1203 | void |
1204 | recount_processor_init(processor_t processor) |
1205 | { |
1206 | #if __AMP__ |
1207 | processor->pr_recount.rpr_cpu_kind_index = |
1208 | processor->processor_set->pset_cluster_type == PSET_AMP_P ? |
1209 | RCT_CPU_PERFORMANCE : RCT_CPU_EFFICIENCY; |
1210 | #else // __AMP__ |
1211 | #pragma unused(processor) |
1212 | #endif // !__AMP__ |
1213 | } |
1214 | |
1215 | void |
1216 | recount_processor_run(struct recount_processor *pr, struct recount_snap *snap) |
1217 | { |
1218 | uint64_t state = os_atomic_load_wide(&pr->rpr_state_last_abs_time, relaxed); |
1219 | assert(state == 0 || (state & RCT_PR_IDLING) == RCT_PR_IDLING); |
1220 | assert((snap->rsn_time_mach & RCT_PR_IDLING) == 0); |
1221 | uint64_t new_state_stamp = snap->rsn_time_mach; |
1222 | pr->rpr_idle_time_mach += snap->rsn_time_mach - _state_time(state_stamp: state); |
1223 | os_atomic_store_wide(&pr->rpr_state_last_abs_time, new_state_stamp, |
1224 | relaxed); |
1225 | } |
1226 | |
1227 | void |
1228 | recount_processor_online(processor_t processor, struct recount_snap *cur) |
1229 | { |
1230 | recount_processor_run(pr: &processor->pr_recount, snap: cur); |
1231 | struct recount_snap *pr_snap = recount_get_snap(processor); |
1232 | memcpy(dst: pr_snap, src: cur, n: sizeof(*pr_snap)); |
1233 | } |
1234 | |
1235 | void |
1236 | recount_processor_usage(struct recount_processor *pr, |
1237 | struct recount_usage *usage, uint64_t *idle_time_out) |
1238 | { |
1239 | recount_sum(plan: &recount_processor_plan, tracks: &pr->rpr_active, sum: usage); |
1240 | _fix_time_precision(usage); |
1241 | |
1242 | uint64_t idle_time = pr->rpr_idle_time_mach; |
1243 | uint64_t idle_stamp = os_atomic_load_wide(&pr->rpr_state_last_abs_time, |
1244 | relaxed); |
1245 | bool idle = (idle_stamp & RCT_PR_IDLING) == RCT_PR_IDLING; |
1246 | if (idle) { |
1247 | // Since processors can idle for some time without an update, make sure |
1248 | // the idle time is up-to-date with respect to the caller. |
1249 | idle_time += mach_absolute_time() - _state_time(state_stamp: idle_stamp); |
1250 | } |
1251 | *idle_time_out = idle_time; |
1252 | } |
1253 | |
1254 | uint64_t |
1255 | recount_current_processor_interrupt_time_mach(void) |
1256 | { |
1257 | assert(!preemption_enabled()); |
1258 | return current_processor()->pr_recount.rpr_interrupt_time_mach; |
1259 | } |
1260 | |
1261 | bool |
1262 | recount_task_thread_perf_level_usage(struct task *task, uint64_t tid, |
1263 | struct recount_usage *usage_levels) |
1264 | { |
1265 | thread_t thread = task_findtid(task, tid); |
1266 | if (thread != THREAD_NULL) { |
1267 | if (thread == current_thread()) { |
1268 | boolean_t interrupt_state = ml_set_interrupts_enabled(FALSE); |
1269 | recount_current_thread_perf_level_usage(usage_levels); |
1270 | ml_set_interrupts_enabled(enable: interrupt_state); |
1271 | } else { |
1272 | recount_thread_perf_level_usage(thread, usage_levels); |
1273 | } |
1274 | } |
1275 | return thread != THREAD_NULL; |
1276 | } |
1277 | |
1278 | #pragma mark - utilities |
1279 | |
1280 | // For rolling up counts, convert an index from one topography to another. |
1281 | static size_t |
1282 | recount_convert_topo_index(recount_topo_t from, recount_topo_t to, size_t i) |
1283 | { |
1284 | if (from == to) { |
1285 | return i; |
1286 | } else if (to == RCT_TOPO_SYSTEM) { |
1287 | return 0; |
1288 | } else if (from == RCT_TOPO_CPU) { |
1289 | assertf(to == RCT_TOPO_CPU_KIND, |
1290 | "recount: cannot convert from CPU topography to %d" , to); |
1291 | return _topo_cpu_kinds[i]; |
1292 | } else { |
1293 | panic("recount: unexpected rollup request from %d to %d" , from, to); |
1294 | } |
1295 | } |
1296 | |
1297 | // Get the track index of the provided processor and topography. |
1298 | OS_ALWAYS_INLINE |
1299 | static size_t |
1300 | recount_topo_index(recount_topo_t topo, processor_t processor) |
1301 | { |
1302 | switch (topo) { |
1303 | case RCT_TOPO_SYSTEM: |
1304 | return 0; |
1305 | case RCT_TOPO_CPU: |
1306 | return processor->cpu_id; |
1307 | case RCT_TOPO_CPU_KIND: |
1308 | #if __AMP__ |
1309 | return processor->pr_recount.rpr_cpu_kind_index; |
1310 | #else // __AMP__ |
1311 | return 0; |
1312 | #endif // !__AMP__ |
1313 | default: |
1314 | panic("recount: invalid topology %u to index" , topo); |
1315 | } |
1316 | } |
1317 | |
1318 | // Return the number of tracks needed for a given topography. |
1319 | size_t |
1320 | recount_topo_count(recount_topo_t topo) |
1321 | { |
1322 | // Allow the compiler to reason about at least the system and CPU kind |
1323 | // counts. |
1324 | switch (topo) { |
1325 | case RCT_TOPO_SYSTEM: |
1326 | return 1; |
1327 | |
1328 | case RCT_TOPO_CPU_KIND: |
1329 | #if __AMP__ |
1330 | return 2; |
1331 | #else // __AMP__ |
1332 | return 1; |
1333 | #endif // !__AMP__ |
1334 | |
1335 | case RCT_TOPO_CPU: |
1336 | #if __arm__ || __arm64__ |
1337 | return ml_get_cpu_count(); |
1338 | #else // __arm__ || __arm64__ |
1339 | return ml_early_cpu_max_number() + 1; |
1340 | #endif // !__arm__ && !__arm64__ |
1341 | |
1342 | default: |
1343 | panic("recount: invalid topography %d" , topo); |
1344 | } |
1345 | } |
1346 | |
1347 | static bool |
1348 | recount_topo_matches_cpu_kind(recount_topo_t topo, recount_cpu_kind_t kind, |
1349 | size_t idx) |
1350 | { |
1351 | #if !__AMP__ |
1352 | #pragma unused(kind, idx) |
1353 | #endif // !__AMP__ |
1354 | switch (topo) { |
1355 | case RCT_TOPO_SYSTEM: |
1356 | return true; |
1357 | |
1358 | case RCT_TOPO_CPU_KIND: |
1359 | #if __AMP__ |
1360 | return kind == idx; |
1361 | #else // __AMP__ |
1362 | return false; |
1363 | #endif // !__AMP__ |
1364 | |
1365 | case RCT_TOPO_CPU: { |
1366 | #if __AMP__ |
1367 | return _topo_cpu_kinds[idx] == kind; |
1368 | #else // __AMP__ |
1369 | return false; |
1370 | #endif // !__AMP__ |
1371 | } |
1372 | |
1373 | default: |
1374 | panic("recount: unexpected topography %d" , topo); |
1375 | } |
1376 | } |
1377 | |
1378 | struct recount_track * |
1379 | recount_tracks_create(recount_plan_t plan) |
1380 | { |
1381 | assert(_topo_allocates[plan->rpl_topo]); |
1382 | return zalloc_flags(_recount_track_zones[plan->rpl_topo], |
1383 | Z_VM_TAG(Z_WAITOK | Z_ZERO | Z_NOFAIL, VM_KERN_MEMORY_RECOUNT)); |
1384 | } |
1385 | |
1386 | static void |
1387 | recount_tracks_copy(recount_plan_t plan, struct recount_track *dst, |
1388 | struct recount_track *src) |
1389 | { |
1390 | size_t topo_count = recount_topo_count(topo: plan->rpl_topo); |
1391 | for (size_t i = 0; i < topo_count; i++) { |
1392 | recount_read_track(stats: &dst[i].rt_usage, track: &src[i]); |
1393 | } |
1394 | } |
1395 | |
1396 | void |
1397 | recount_tracks_destroy(recount_plan_t plan, struct recount_track *tracks) |
1398 | { |
1399 | assert(_topo_allocates[plan->rpl_topo]); |
1400 | zfree(_recount_track_zones[plan->rpl_topo], tracks); |
1401 | } |
1402 | |
1403 | void |
1404 | recount_thread_init(struct recount_thread *th) |
1405 | { |
1406 | th->rth_lifetime = recount_tracks_create(plan: &recount_thread_plan); |
1407 | } |
1408 | |
1409 | void |
1410 | recount_thread_copy(struct recount_thread *dst, struct recount_thread *src) |
1411 | { |
1412 | recount_tracks_copy(plan: &recount_thread_plan, dst: dst->rth_lifetime, |
1413 | src: src->rth_lifetime); |
1414 | } |
1415 | |
1416 | void |
1417 | recount_task_copy(struct recount_task *dst, const struct recount_task *src) |
1418 | { |
1419 | recount_tracks_copy(plan: &recount_task_plan, dst: dst->rtk_lifetime, |
1420 | src: src->rtk_lifetime); |
1421 | } |
1422 | |
1423 | void |
1424 | recount_thread_deinit(struct recount_thread *th) |
1425 | { |
1426 | recount_tracks_destroy(plan: &recount_thread_plan, tracks: th->rth_lifetime); |
1427 | } |
1428 | |
1429 | void |
1430 | recount_task_init(struct recount_task *tk) |
1431 | { |
1432 | tk->rtk_lifetime = recount_tracks_create(plan: &recount_task_plan); |
1433 | tk->rtk_terminated = recount_usage_alloc( |
1434 | topo: recount_task_terminated_plan.rpl_topo); |
1435 | } |
1436 | |
1437 | void |
1438 | recount_task_deinit(struct recount_task *tk) |
1439 | { |
1440 | recount_tracks_destroy(plan: &recount_task_plan, tracks: tk->rtk_lifetime); |
1441 | recount_usage_free(topo: recount_task_terminated_plan.rpl_topo, |
1442 | usage: tk->rtk_terminated); |
1443 | } |
1444 | |
1445 | void |
1446 | recount_coalition_init(struct recount_coalition *co) |
1447 | { |
1448 | co->rco_exited = recount_usage_alloc(topo: recount_coalition_plan.rpl_topo); |
1449 | } |
1450 | |
1451 | void |
1452 | recount_coalition_deinit(struct recount_coalition *co) |
1453 | { |
1454 | recount_usage_free(topo: recount_coalition_plan.rpl_topo, usage: co->rco_exited); |
1455 | } |
1456 | |
1457 | void |
1458 | recount_work_interval_init(struct recount_work_interval *wi) |
1459 | { |
1460 | wi->rwi_current_instance = recount_tracks_create(plan: &recount_work_interval_plan); |
1461 | } |
1462 | |
1463 | void |
1464 | recount_work_interval_deinit(struct recount_work_interval *wi) |
1465 | { |
1466 | recount_tracks_destroy(plan: &recount_work_interval_plan, tracks: wi->rwi_current_instance); |
1467 | } |
1468 | |
1469 | struct recount_usage * |
1470 | recount_usage_alloc(recount_topo_t topo) |
1471 | { |
1472 | assert(_topo_allocates[topo]); |
1473 | return zalloc_flags(_recount_usage_zones[topo], |
1474 | Z_VM_TAG(Z_WAITOK | Z_ZERO | Z_NOFAIL, VM_KERN_MEMORY_RECOUNT)); |
1475 | } |
1476 | |
1477 | void |
1478 | recount_usage_free(recount_topo_t topo, struct recount_usage *usage) |
1479 | { |
1480 | assert(_topo_allocates[topo]); |
1481 | zfree(_recount_usage_zones[topo], usage); |
1482 | } |
1483 | |