1/*
2 * Copyright (c) 1993-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Timer interrupt callout module.
30 */
31
32#include <mach/mach_types.h>
33
34#include <kern/clock.h>
35#include <kern/smp.h>
36#include <kern/processor.h>
37#include <kern/timer_call.h>
38#include <kern/timer_queue.h>
39#include <kern/call_entry.h>
40#include <kern/thread.h>
41#include <kern/policy_internal.h>
42
43#include <sys/kdebug.h>
44
45#if CONFIG_DTRACE
46#include <mach/sdt.h>
47#endif
48
49
50#if DEBUG
51#define TIMER_ASSERT 1
52#endif
53
54//#define TIMER_ASSERT 1
55//#define TIMER_DBG 1
56
57#if TIMER_DBG
58#define DBG(x...) kprintf("DBG: " x);
59#else
60#define DBG(x...)
61#endif
62
63#if TIMER_TRACE
64#define TIMER_KDEBUG_TRACE KERNEL_DEBUG_CONSTANT_IST
65#else
66#define TIMER_KDEBUG_TRACE(x...)
67#endif
68
69
70lck_grp_t timer_call_lck_grp;
71lck_attr_t timer_call_lck_attr;
72lck_grp_attr_t timer_call_lck_grp_attr;
73
74lck_grp_t timer_longterm_lck_grp;
75lck_attr_t timer_longterm_lck_attr;
76lck_grp_attr_t timer_longterm_lck_grp_attr;
77
78/* Timer queue lock must be acquired with interrupts disabled (under splclock()) */
79#if __SMP__
80#define timer_queue_lock_spin(queue) \
81 lck_mtx_lock_spin_always(&queue->lock_data)
82
83#define timer_queue_unlock(queue) \
84 lck_mtx_unlock_always(&queue->lock_data)
85#else
86#define timer_queue_lock_spin(queue) (void)1
87#define timer_queue_unlock(queue) (void)1
88#endif
89
90#define QUEUE(x) ((queue_t)(x))
91#define MPQUEUE(x) ((mpqueue_head_t *)(x))
92#define TIMER_CALL(x) ((timer_call_t)(x))
93#define TCE(x) (&(x->call_entry))
94/*
95 * The longterm timer object is a global structure holding all timers
96 * beyond the short-term, local timer queue threshold. The boot processor
97 * is responsible for moving each timer to its local timer queue
98 * if and when that timer becomes due within the threshold.
99 */
100
101/* Sentinel for "no time set": */
102#define TIMER_LONGTERM_NONE EndOfAllTime
103/* The default threadhold is the delta above which a timer is "long-term" */
104#if defined(__x86_64__)
105#define TIMER_LONGTERM_THRESHOLD (1ULL * NSEC_PER_SEC) /* 1 sec */
106#else
107#define TIMER_LONGTERM_THRESHOLD TIMER_LONGTERM_NONE /* disabled */
108#endif
109
110/*
111 * The scan_limit throttles processing of the longterm queue.
112 * If the scan time exceeds this limit, we terminate, unlock
113 * and defer for scan_interval. This prevents unbounded holding of
114 * timer queue locks with interrupts masked.
115 */
116#define TIMER_LONGTERM_SCAN_LIMIT (100ULL * NSEC_PER_USEC) /* 100 us */
117#define TIMER_LONGTERM_SCAN_INTERVAL (100ULL * NSEC_PER_USEC) /* 100 us */
118/* Sentinel for "scan limit exceeded": */
119#define TIMER_LONGTERM_SCAN_AGAIN 0
120
121typedef struct {
122 uint64_t interval; /* longterm timer interval */
123 uint64_t margin; /* fudge factor (10% of interval */
124 uint64_t deadline; /* first/soonest longterm deadline */
125 uint64_t preempted; /* sooner timer has pre-empted */
126 timer_call_t call; /* first/soonest longterm timer call */
127 uint64_t deadline_set; /* next timer set */
128 timer_call_data_t timer; /* timer used by threshold management */
129 /* Stats: */
130 uint64_t scans; /* num threshold timer scans */
131 uint64_t preempts; /* num threshold reductions */
132 uint64_t latency; /* average threshold latency */
133 uint64_t latency_min; /* minimum threshold latency */
134 uint64_t latency_max; /* maximum threshold latency */
135} threshold_t;
136
137typedef struct {
138 mpqueue_head_t queue; /* longterm timer list */
139 uint64_t enqueues; /* num timers queued */
140 uint64_t dequeues; /* num timers dequeued */
141 uint64_t escalates; /* num timers becoming shortterm */
142 uint64_t scan_time; /* last time the list was scanned */
143 threshold_t threshold; /* longterm timer threshold */
144 uint64_t scan_limit; /* maximum scan time */
145 uint64_t scan_interval; /* interval between LT "escalation" scans */
146 uint64_t scan_pauses; /* num scans exceeding time limit */
147} timer_longterm_t;
148
149timer_longterm_t timer_longterm = {
150 .scan_limit = TIMER_LONGTERM_SCAN_LIMIT,
151 .scan_interval = TIMER_LONGTERM_SCAN_INTERVAL,
152 };
153
154static mpqueue_head_t *timer_longterm_queue = NULL;
155
156static void timer_longterm_init(void);
157static void timer_longterm_callout(
158 timer_call_param_t p0,
159 timer_call_param_t p1);
160extern void timer_longterm_scan(
161 timer_longterm_t *tlp,
162 uint64_t now);
163static void timer_longterm_update(
164 timer_longterm_t *tlp);
165static void timer_longterm_update_locked(
166 timer_longterm_t *tlp);
167static mpqueue_head_t * timer_longterm_enqueue_unlocked(
168 timer_call_t call,
169 uint64_t now,
170 uint64_t deadline,
171 mpqueue_head_t ** old_queue,
172 uint64_t soft_deadline,
173 uint64_t ttd,
174 timer_call_param_t param1,
175 uint32_t callout_flags);
176static void timer_longterm_dequeued_locked(
177 timer_call_t call);
178
179uint64_t past_deadline_timers;
180uint64_t past_deadline_deltas;
181uint64_t past_deadline_longest;
182uint64_t past_deadline_shortest = ~0ULL;
183enum {PAST_DEADLINE_TIMER_ADJUSTMENT_NS = 10 * 1000};
184
185uint64_t past_deadline_timer_adjustment;
186
187static boolean_t timer_call_enter_internal(timer_call_t call, timer_call_param_t param1, uint64_t deadline, uint64_t leeway, uint32_t flags, boolean_t ratelimited);
188boolean_t mach_timer_coalescing_enabled = TRUE;
189
190mpqueue_head_t *timer_call_enqueue_deadline_unlocked(
191 timer_call_t call,
192 mpqueue_head_t *queue,
193 uint64_t deadline,
194 uint64_t soft_deadline,
195 uint64_t ttd,
196 timer_call_param_t param1,
197 uint32_t flags);
198
199mpqueue_head_t *timer_call_dequeue_unlocked(
200 timer_call_t call);
201
202timer_coalescing_priority_params_t tcoal_prio_params;
203
204#if TCOAL_PRIO_STATS
205int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl;
206#define TCOAL_PRIO_STAT(x) (x++)
207#else
208#define TCOAL_PRIO_STAT(x)
209#endif
210
211static void
212timer_call_init_abstime(void)
213{
214 int i;
215 uint64_t result;
216 timer_coalescing_priority_params_ns_t * tcoal_prio_params_init = timer_call_get_priority_params();
217 nanoseconds_to_absolutetime(PAST_DEADLINE_TIMER_ADJUSTMENT_NS, &past_deadline_timer_adjustment);
218 nanoseconds_to_absolutetime(tcoal_prio_params_init->idle_entry_timer_processing_hdeadline_threshold_ns, &result);
219 tcoal_prio_params.idle_entry_timer_processing_hdeadline_threshold_abstime = (uint32_t)result;
220 nanoseconds_to_absolutetime(tcoal_prio_params_init->interrupt_timer_coalescing_ilat_threshold_ns, &result);
221 tcoal_prio_params.interrupt_timer_coalescing_ilat_threshold_abstime = (uint32_t)result;
222 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_resort_threshold_ns, &result);
223 tcoal_prio_params.timer_resort_threshold_abstime = (uint32_t)result;
224 tcoal_prio_params.timer_coalesce_rt_shift = tcoal_prio_params_init->timer_coalesce_rt_shift;
225 tcoal_prio_params.timer_coalesce_bg_shift = tcoal_prio_params_init->timer_coalesce_bg_shift;
226 tcoal_prio_params.timer_coalesce_kt_shift = tcoal_prio_params_init->timer_coalesce_kt_shift;
227 tcoal_prio_params.timer_coalesce_fp_shift = tcoal_prio_params_init->timer_coalesce_fp_shift;
228 tcoal_prio_params.timer_coalesce_ts_shift = tcoal_prio_params_init->timer_coalesce_ts_shift;
229
230 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_rt_ns_max,
231 &tcoal_prio_params.timer_coalesce_rt_abstime_max);
232 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_bg_ns_max,
233 &tcoal_prio_params.timer_coalesce_bg_abstime_max);
234 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_kt_ns_max,
235 &tcoal_prio_params.timer_coalesce_kt_abstime_max);
236 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_fp_ns_max,
237 &tcoal_prio_params.timer_coalesce_fp_abstime_max);
238 nanoseconds_to_absolutetime(tcoal_prio_params_init->timer_coalesce_ts_ns_max,
239 &tcoal_prio_params.timer_coalesce_ts_abstime_max);
240
241 for (i = 0; i < NUM_LATENCY_QOS_TIERS; i++) {
242 tcoal_prio_params.latency_qos_scale[i] = tcoal_prio_params_init->latency_qos_scale[i];
243 nanoseconds_to_absolutetime(tcoal_prio_params_init->latency_qos_ns_max[i],
244 &tcoal_prio_params.latency_qos_abstime_max[i]);
245 tcoal_prio_params.latency_tier_rate_limited[i] = tcoal_prio_params_init->latency_tier_rate_limited[i];
246 }
247}
248
249
250void
251timer_call_init(void)
252{
253 lck_attr_setdefault(&timer_call_lck_attr);
254 lck_grp_attr_setdefault(&timer_call_lck_grp_attr);
255 lck_grp_init(&timer_call_lck_grp, "timer_call", &timer_call_lck_grp_attr);
256
257 timer_longterm_init();
258 timer_call_init_abstime();
259}
260
261
262void
263timer_call_queue_init(mpqueue_head_t *queue)
264{
265 DBG("timer_call_queue_init(%p)\n", queue);
266 mpqueue_init(queue, &timer_call_lck_grp, &timer_call_lck_attr);
267}
268
269
270void
271timer_call_setup(
272 timer_call_t call,
273 timer_call_func_t func,
274 timer_call_param_t param0)
275{
276 DBG("timer_call_setup(%p,%p,%p)\n", call, func, param0);
277 call_entry_setup(TCE(call), func, param0);
278 simple_lock_init(&(call)->lock, 0);
279 call->async_dequeue = FALSE;
280}
281#if TIMER_ASSERT
282static __inline__ mpqueue_head_t *
283timer_call_entry_dequeue(
284 timer_call_t entry)
285{
286 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
287
288 if (!hw_lock_held((hw_lock_t)&entry->lock))
289 panic("_call_entry_dequeue() "
290 "entry %p is not locked\n", entry);
291 /*
292 * XXX The queue lock is actually a mutex in spin mode
293 * but there's no way to test for it being held
294 * so we pretend it's a spinlock!
295 */
296 if (!hw_lock_held((hw_lock_t)&old_queue->lock_data))
297 panic("_call_entry_dequeue() "
298 "queue %p is not locked\n", old_queue);
299
300 call_entry_dequeue(TCE(entry));
301 old_queue->count--;
302
303 return (old_queue);
304}
305
306static __inline__ mpqueue_head_t *
307timer_call_entry_enqueue_deadline(
308 timer_call_t entry,
309 mpqueue_head_t *queue,
310 uint64_t deadline)
311{
312 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
313
314 if (!hw_lock_held((hw_lock_t)&entry->lock))
315 panic("_call_entry_enqueue_deadline() "
316 "entry %p is not locked\n", entry);
317 /* XXX More lock pretense: */
318 if (!hw_lock_held((hw_lock_t)&queue->lock_data))
319 panic("_call_entry_enqueue_deadline() "
320 "queue %p is not locked\n", queue);
321 if (old_queue != NULL && old_queue != queue)
322 panic("_call_entry_enqueue_deadline() "
323 "old_queue %p != queue", old_queue);
324
325 call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline);
326
327/* For efficiency, track the earliest soft deadline on the queue, so that
328 * fuzzy decisions can be made without lock acquisitions.
329 */
330 timer_call_t thead = (timer_call_t)queue_first(&queue->head);
331
332 queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline;
333
334 if (old_queue)
335 old_queue->count--;
336 queue->count++;
337
338 return (old_queue);
339}
340
341#else
342
343static __inline__ mpqueue_head_t *
344timer_call_entry_dequeue(
345 timer_call_t entry)
346{
347 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
348
349 call_entry_dequeue(TCE(entry));
350 old_queue->count--;
351
352 return old_queue;
353}
354
355static __inline__ mpqueue_head_t *
356timer_call_entry_enqueue_deadline(
357 timer_call_t entry,
358 mpqueue_head_t *queue,
359 uint64_t deadline)
360{
361 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
362
363 call_entry_enqueue_deadline(TCE(entry), QUEUE(queue), deadline);
364
365 /* For efficiency, track the earliest soft deadline on the queue,
366 * so that fuzzy decisions can be made without lock acquisitions.
367 */
368
369 timer_call_t thead = (timer_call_t)queue_first(&queue->head);
370 queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline;
371
372 if (old_queue)
373 old_queue->count--;
374 queue->count++;
375
376 return old_queue;
377}
378
379#endif
380
381static __inline__ void
382timer_call_entry_enqueue_tail(
383 timer_call_t entry,
384 mpqueue_head_t *queue)
385{
386 call_entry_enqueue_tail(TCE(entry), QUEUE(queue));
387 queue->count++;
388 return;
389}
390
391/*
392 * Remove timer entry from its queue but don't change the queue pointer
393 * and set the async_dequeue flag. This is locking case 2b.
394 */
395static __inline__ void
396timer_call_entry_dequeue_async(
397 timer_call_t entry)
398{
399 mpqueue_head_t *old_queue = MPQUEUE(TCE(entry)->queue);
400 if (old_queue) {
401 old_queue->count--;
402 (void) remque(qe(entry));
403 entry->async_dequeue = TRUE;
404 }
405 return;
406}
407
408#if TIMER_ASSERT
409unsigned timer_call_enqueue_deadline_unlocked_async1;
410unsigned timer_call_enqueue_deadline_unlocked_async2;
411#endif
412/*
413 * Assumes call_entry and queues unlocked, interrupts disabled.
414 */
415__inline__ mpqueue_head_t *
416timer_call_enqueue_deadline_unlocked(
417 timer_call_t call,
418 mpqueue_head_t *queue,
419 uint64_t deadline,
420 uint64_t soft_deadline,
421 uint64_t ttd,
422 timer_call_param_t param1,
423 uint32_t callout_flags)
424{
425 call_entry_t entry = TCE(call);
426 mpqueue_head_t *old_queue;
427
428 DBG("timer_call_enqueue_deadline_unlocked(%p,%p,)\n", call, queue);
429
430 simple_lock(&call->lock);
431
432 old_queue = MPQUEUE(entry->queue);
433
434 if (old_queue != NULL) {
435 timer_queue_lock_spin(old_queue);
436 if (call->async_dequeue) {
437 /* collision (1c): timer already dequeued, clear flag */
438#if TIMER_ASSERT
439 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
440 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
441 VM_KERNEL_UNSLIDE_OR_PERM(call),
442 call->async_dequeue,
443 VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
444 0x1c, 0);
445 timer_call_enqueue_deadline_unlocked_async1++;
446#endif
447 call->async_dequeue = FALSE;
448 entry->queue = NULL;
449 } else if (old_queue != queue) {
450 timer_call_entry_dequeue(call);
451#if TIMER_ASSERT
452 timer_call_enqueue_deadline_unlocked_async2++;
453#endif
454 }
455 if (old_queue == timer_longterm_queue)
456 timer_longterm_dequeued_locked(call);
457 if (old_queue != queue) {
458 timer_queue_unlock(old_queue);
459 timer_queue_lock_spin(queue);
460 }
461 } else {
462 timer_queue_lock_spin(queue);
463 }
464
465 call->soft_deadline = soft_deadline;
466 call->flags = callout_flags;
467 TCE(call)->param1 = param1;
468 call->ttd = ttd;
469
470 timer_call_entry_enqueue_deadline(call, queue, deadline);
471 timer_queue_unlock(queue);
472 simple_unlock(&call->lock);
473
474 return (old_queue);
475}
476
477#if TIMER_ASSERT
478unsigned timer_call_dequeue_unlocked_async1;
479unsigned timer_call_dequeue_unlocked_async2;
480#endif
481mpqueue_head_t *
482timer_call_dequeue_unlocked(
483 timer_call_t call)
484{
485 call_entry_t entry = TCE(call);
486 mpqueue_head_t *old_queue;
487
488 DBG("timer_call_dequeue_unlocked(%p)\n", call);
489
490 simple_lock(&call->lock);
491 old_queue = MPQUEUE(entry->queue);
492#if TIMER_ASSERT
493 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
494 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
495 VM_KERNEL_UNSLIDE_OR_PERM(call),
496 call->async_dequeue,
497 VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
498 0, 0);
499#endif
500 if (old_queue != NULL) {
501 timer_queue_lock_spin(old_queue);
502 if (call->async_dequeue) {
503 /* collision (1c): timer already dequeued, clear flag */
504#if TIMER_ASSERT
505 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
506 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
507 VM_KERNEL_UNSLIDE_OR_PERM(call),
508 call->async_dequeue,
509 VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
510 0x1c, 0);
511 timer_call_dequeue_unlocked_async1++;
512#endif
513 call->async_dequeue = FALSE;
514 entry->queue = NULL;
515 } else {
516 timer_call_entry_dequeue(call);
517 }
518 if (old_queue == timer_longterm_queue)
519 timer_longterm_dequeued_locked(call);
520 timer_queue_unlock(old_queue);
521 }
522 simple_unlock(&call->lock);
523 return (old_queue);
524}
525
526static uint64_t
527past_deadline_timer_handle(uint64_t deadline, uint64_t ctime)
528{
529 uint64_t delta = (ctime - deadline);
530
531 past_deadline_timers++;
532 past_deadline_deltas += delta;
533 if (delta > past_deadline_longest)
534 past_deadline_longest = deadline;
535 if (delta < past_deadline_shortest)
536 past_deadline_shortest = delta;
537
538 return (ctime + past_deadline_timer_adjustment);
539}
540
541/*
542 * Timer call entry locking model
543 * ==============================
544 *
545 * Timer call entries are linked on per-cpu timer queues which are protected
546 * by the queue lock and the call entry lock. The locking protocol is:
547 *
548 * 0) The canonical locking order is timer call entry followed by queue.
549 *
550 * 1) With only the entry lock held, entry.queue is valid:
551 * 1a) NULL: the entry is not queued, or
552 * 1b) non-NULL: this queue must be locked before the entry is modified.
553 * After locking the queue, the call.async_dequeue flag must be checked:
554 * 1c) TRUE: the entry was removed from the queue by another thread
555 * and we must NULL the entry.queue and reset this flag, or
556 * 1d) FALSE: (ie. queued), the entry can be manipulated.
557 *
558 * 2) If a queue lock is obtained first, the queue is stable:
559 * 2a) If a try-lock of a queued entry succeeds, the call can be operated on
560 * and dequeued.
561 * 2b) If a try-lock fails, it indicates that another thread is attempting
562 * to change the entry and move it to a different position in this queue
563 * or to different queue. The entry can be dequeued but it should not be
564 * operated upon since it is being changed. Furthermore, we don't null
565 * the entry.queue pointer (protected by the entry lock we don't own).
566 * Instead, we set the async_dequeue flag -- see (1c).
567 * 2c) Same as 2b but occurring when a longterm timer is matured.
568 * 3) A callout's parameters (deadline, flags, parameters, soft deadline &c.)
569 * should be manipulated with the appropriate timer queue lock held,
570 * to prevent queue traversal observations from observing inconsistent
571 * updates to an in-flight callout.
572 */
573
574/*
575 * Inlines timer_call_entry_dequeue() and timer_call_entry_enqueue_deadline()
576 * cast between pointer types (mpqueue_head_t *) and (queue_t) so that
577 * we can use the call_entry_dequeue() and call_entry_enqueue_deadline()
578 * methods to operate on timer_call structs as if they are call_entry structs.
579 * These structures are identical except for their queue head pointer fields.
580 *
581 * In the debug case, we assert that the timer call locking protocol
582 * is being obeyed.
583 */
584
585static boolean_t
586timer_call_enter_internal(
587 timer_call_t call,
588 timer_call_param_t param1,
589 uint64_t deadline,
590 uint64_t leeway,
591 uint32_t flags,
592 boolean_t ratelimited)
593{
594 mpqueue_head_t *queue = NULL;
595 mpqueue_head_t *old_queue;
596 spl_t s;
597 uint64_t slop;
598 uint32_t urgency;
599 uint64_t sdeadline, ttd;
600
601 assert(call->call_entry.func != NULL);
602 s = splclock();
603
604 sdeadline = deadline;
605 uint64_t ctime = mach_absolute_time();
606
607 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
608 DECR_TIMER_ENTER | DBG_FUNC_START,
609 VM_KERNEL_UNSLIDE_OR_PERM(call),
610 VM_KERNEL_ADDRHIDE(param1), deadline, flags, 0);
611
612 urgency = (flags & TIMER_CALL_URGENCY_MASK);
613
614 boolean_t slop_ratelimited = FALSE;
615 slop = timer_call_slop(deadline, ctime, urgency, current_thread(), &slop_ratelimited);
616
617 if ((flags & TIMER_CALL_LEEWAY) != 0 && leeway > slop)
618 slop = leeway;
619
620 if (UINT64_MAX - deadline <= slop) {
621 deadline = UINT64_MAX;
622 } else {
623 deadline += slop;
624 }
625
626 if (__improbable(deadline < ctime)) {
627 deadline = past_deadline_timer_handle(deadline, ctime);
628 sdeadline = deadline;
629 }
630
631 if (ratelimited || slop_ratelimited) {
632 flags |= TIMER_CALL_RATELIMITED;
633 } else {
634 flags &= ~TIMER_CALL_RATELIMITED;
635 }
636
637 ttd = sdeadline - ctime;
638#if CONFIG_DTRACE
639 DTRACE_TMR7(callout__create, timer_call_func_t, TCE(call)->func,
640 timer_call_param_t, TCE(call)->param0, uint32_t, flags,
641 (deadline - sdeadline),
642 (ttd >> 32), (unsigned) (ttd & 0xFFFFFFFF), call);
643#endif
644
645 /* Program timer callout parameters under the appropriate per-CPU or
646 * longterm queue lock. The callout may have been previously enqueued
647 * and in-flight on this or another timer queue.
648 */
649 if (!ratelimited && !slop_ratelimited) {
650 queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue, sdeadline, ttd, param1, flags);
651 }
652
653 if (queue == NULL) {
654 queue = timer_queue_assign(deadline);
655 old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline, sdeadline, ttd, param1, flags);
656 }
657
658#if TIMER_TRACE
659 TCE(call)->entry_time = ctime;
660#endif
661
662 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
663 DECR_TIMER_ENTER | DBG_FUNC_END,
664 VM_KERNEL_UNSLIDE_OR_PERM(call),
665 (old_queue != NULL), deadline, queue->count, 0);
666
667 splx(s);
668
669 return (old_queue != NULL);
670}
671
672/*
673 * timer_call_*()
674 * return boolean indicating whether the call was previously queued.
675 */
676boolean_t
677timer_call_enter(
678 timer_call_t call,
679 uint64_t deadline,
680 uint32_t flags)
681{
682 return timer_call_enter_internal(call, NULL, deadline, 0, flags, FALSE);
683}
684
685boolean_t
686timer_call_enter1(
687 timer_call_t call,
688 timer_call_param_t param1,
689 uint64_t deadline,
690 uint32_t flags)
691{
692 return timer_call_enter_internal(call, param1, deadline, 0, flags, FALSE);
693}
694
695boolean_t
696timer_call_enter_with_leeway(
697 timer_call_t call,
698 timer_call_param_t param1,
699 uint64_t deadline,
700 uint64_t leeway,
701 uint32_t flags,
702 boolean_t ratelimited)
703{
704 return timer_call_enter_internal(call, param1, deadline, leeway, flags, ratelimited);
705}
706
707boolean_t
708timer_call_quantum_timer_enter(
709 timer_call_t call,
710 timer_call_param_t param1,
711 uint64_t deadline,
712 uint64_t ctime)
713{
714 assert(call->call_entry.func != NULL);
715 assert(ml_get_interrupts_enabled() == FALSE);
716
717 uint32_t flags = TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL;
718
719 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ENTER | DBG_FUNC_START,
720 VM_KERNEL_UNSLIDE_OR_PERM(call),
721 VM_KERNEL_ADDRHIDE(param1), deadline,
722 flags, 0);
723
724 if (__improbable(deadline < ctime)) {
725 deadline = past_deadline_timer_handle(deadline, ctime);
726 }
727
728 uint64_t ttd = deadline - ctime;
729#if CONFIG_DTRACE
730 DTRACE_TMR7(callout__create, timer_call_func_t, TCE(call)->func,
731 timer_call_param_t, TCE(call)->param0, uint32_t, flags, 0,
732 (ttd >> 32), (unsigned) (ttd & 0xFFFFFFFF), call);
733#endif
734
735 quantum_timer_set_deadline(deadline);
736 TCE(call)->deadline = deadline;
737 TCE(call)->param1 = param1;
738 call->ttd = ttd;
739 call->flags = flags;
740
741#if TIMER_TRACE
742 TCE(call)->entry_time = ctime;
743#endif
744
745 TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_ENTER | DBG_FUNC_END,
746 VM_KERNEL_UNSLIDE_OR_PERM(call),
747 1, deadline, 0, 0);
748
749 return true;
750}
751
752
753boolean_t
754timer_call_quantum_timer_cancel(
755 timer_call_t call)
756{
757 assert(ml_get_interrupts_enabled() == FALSE);
758
759 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
760 DECR_TIMER_CANCEL | DBG_FUNC_START,
761 VM_KERNEL_UNSLIDE_OR_PERM(call), TCE(call)->deadline,
762 0, call->flags, 0);
763
764 TCE(call)->deadline = 0;
765 quantum_timer_set_deadline(0);
766
767 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
768 DECR_TIMER_CANCEL | DBG_FUNC_END,
769 VM_KERNEL_UNSLIDE_OR_PERM(call), 0,
770 TCE(call)->deadline - mach_absolute_time(),
771 TCE(call)->deadline - TCE(call)->entry_time, 0);
772
773#if CONFIG_DTRACE
774 DTRACE_TMR6(callout__cancel, timer_call_func_t, TCE(call)->func,
775 timer_call_param_t, TCE(call)->param0, uint32_t, call->flags, 0,
776 (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF));
777#endif
778
779 return true;
780}
781
782boolean_t
783timer_call_cancel(
784 timer_call_t call)
785{
786 mpqueue_head_t *old_queue;
787 spl_t s;
788
789 s = splclock();
790
791 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
792 DECR_TIMER_CANCEL | DBG_FUNC_START,
793 VM_KERNEL_UNSLIDE_OR_PERM(call),
794 TCE(call)->deadline, call->soft_deadline, call->flags, 0);
795
796 old_queue = timer_call_dequeue_unlocked(call);
797
798 if (old_queue != NULL) {
799 timer_queue_lock_spin(old_queue);
800 if (!queue_empty(&old_queue->head)) {
801 timer_queue_cancel(old_queue, TCE(call)->deadline, CE(queue_first(&old_queue->head))->deadline);
802 timer_call_t thead = (timer_call_t)queue_first(&old_queue->head);
803 old_queue->earliest_soft_deadline = thead->flags & TIMER_CALL_RATELIMITED ? TCE(thead)->deadline : thead->soft_deadline;
804 }
805 else {
806 timer_queue_cancel(old_queue, TCE(call)->deadline, UINT64_MAX);
807 old_queue->earliest_soft_deadline = UINT64_MAX;
808 }
809 timer_queue_unlock(old_queue);
810 }
811 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
812 DECR_TIMER_CANCEL | DBG_FUNC_END,
813 VM_KERNEL_UNSLIDE_OR_PERM(call),
814 VM_KERNEL_UNSLIDE_OR_PERM(old_queue),
815 TCE(call)->deadline - mach_absolute_time(),
816 TCE(call)->deadline - TCE(call)->entry_time, 0);
817 splx(s);
818
819#if CONFIG_DTRACE
820 DTRACE_TMR6(callout__cancel, timer_call_func_t, TCE(call)->func,
821 timer_call_param_t, TCE(call)->param0, uint32_t, call->flags, 0,
822 (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF));
823#endif
824
825 return (old_queue != NULL);
826}
827
828static uint32_t timer_queue_shutdown_lock_skips;
829static uint32_t timer_queue_shutdown_discarded;
830
831void
832timer_queue_shutdown(
833 mpqueue_head_t *queue)
834{
835 timer_call_t call;
836 mpqueue_head_t *new_queue;
837 spl_t s;
838
839
840 DBG("timer_queue_shutdown(%p)\n", queue);
841
842 s = splclock();
843
844 /* Note comma operator in while expression re-locking each iteration */
845 while ((void)timer_queue_lock_spin(queue), !queue_empty(&queue->head)) {
846 call = TIMER_CALL(queue_first(&queue->head));
847
848 if (!simple_lock_try(&call->lock)) {
849 /*
850 * case (2b) lock order inversion, dequeue and skip
851 * Don't change the call_entry queue back-pointer
852 * but set the async_dequeue field.
853 */
854 timer_queue_shutdown_lock_skips++;
855 timer_call_entry_dequeue_async(call);
856#if TIMER_ASSERT
857 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
858 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
859 VM_KERNEL_UNSLIDE_OR_PERM(call),
860 call->async_dequeue,
861 VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
862 0x2b, 0);
863#endif
864 timer_queue_unlock(queue);
865 continue;
866 }
867
868 boolean_t call_local = ((call->flags & TIMER_CALL_LOCAL) != 0);
869
870 /* remove entry from old queue */
871 timer_call_entry_dequeue(call);
872 timer_queue_unlock(queue);
873
874 if (call_local == FALSE) {
875 /* and queue it on new, discarding LOCAL timers */
876 new_queue = timer_queue_assign(TCE(call)->deadline);
877 timer_queue_lock_spin(new_queue);
878 timer_call_entry_enqueue_deadline(
879 call, new_queue, TCE(call)->deadline);
880 timer_queue_unlock(new_queue);
881 } else {
882 timer_queue_shutdown_discarded++;
883 }
884
885 assert(call_local == FALSE);
886 simple_unlock(&call->lock);
887 }
888
889 timer_queue_unlock(queue);
890 splx(s);
891}
892
893
894void
895quantum_timer_expire(
896 uint64_t deadline)
897{
898 processor_t processor = current_processor();
899 timer_call_t call = TIMER_CALL(&(processor->quantum_timer));
900
901 if (__improbable(TCE(call)->deadline > deadline))
902 panic("CPU quantum timer deadlin out of sync with timer call deadline");
903
904 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
905 DECR_TIMER_EXPIRE | DBG_FUNC_NONE,
906 VM_KERNEL_UNSLIDE_OR_PERM(call),
907 TCE(call)->deadline,
908 TCE(call)->deadline,
909 TCE(call)->entry_time, 0);
910
911 timer_call_func_t func = TCE(call)->func;
912 timer_call_param_t param0 = TCE(call)->param0;
913 timer_call_param_t param1 = TCE(call)->param1;
914
915 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
916 DECR_TIMER_CALLOUT | DBG_FUNC_START,
917 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
918 VM_KERNEL_ADDRHIDE(param0),
919 VM_KERNEL_ADDRHIDE(param1),
920 0);
921
922#if CONFIG_DTRACE
923 DTRACE_TMR7(callout__start, timer_call_func_t, func,
924 timer_call_param_t, param0, unsigned, call->flags,
925 0, (call->ttd >> 32),
926 (unsigned) (call->ttd & 0xFFFFFFFF), call);
927#endif
928 (*func)(param0, param1);
929
930 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
931 DECR_TIMER_CALLOUT | DBG_FUNC_END,
932 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
933 VM_KERNEL_ADDRHIDE(param0),
934 VM_KERNEL_ADDRHIDE(param1),
935 0);
936}
937
938static uint32_t timer_queue_expire_lock_skips;
939uint64_t
940timer_queue_expire_with_options(
941 mpqueue_head_t *queue,
942 uint64_t deadline,
943 boolean_t rescan)
944{
945 timer_call_t call = NULL;
946 uint32_t tc_iterations = 0;
947 DBG("timer_queue_expire(%p,)\n", queue);
948
949 uint64_t cur_deadline = deadline;
950 timer_queue_lock_spin(queue);
951
952 while (!queue_empty(&queue->head)) {
953 /* Upon processing one or more timer calls, refresh the
954 * deadline to account for time elapsed in the callout
955 */
956 if (++tc_iterations > 1)
957 cur_deadline = mach_absolute_time();
958
959 if (call == NULL)
960 call = TIMER_CALL(queue_first(&queue->head));
961
962 if (call->soft_deadline <= cur_deadline) {
963 timer_call_func_t func;
964 timer_call_param_t param0, param1;
965
966 TCOAL_DEBUG(0xDDDD0000, queue->earliest_soft_deadline, call->soft_deadline, 0, 0, 0);
967 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
968 DECR_TIMER_EXPIRE | DBG_FUNC_NONE,
969 VM_KERNEL_UNSLIDE_OR_PERM(call),
970 call->soft_deadline,
971 TCE(call)->deadline,
972 TCE(call)->entry_time, 0);
973
974 if ((call->flags & TIMER_CALL_RATELIMITED) &&
975 (TCE(call)->deadline > cur_deadline)) {
976 if (rescan == FALSE)
977 break;
978 }
979
980 if (!simple_lock_try(&call->lock)) {
981 /* case (2b) lock inversion, dequeue and skip */
982 timer_queue_expire_lock_skips++;
983 timer_call_entry_dequeue_async(call);
984 call = NULL;
985 continue;
986 }
987
988 timer_call_entry_dequeue(call);
989
990 func = TCE(call)->func;
991 param0 = TCE(call)->param0;
992 param1 = TCE(call)->param1;
993
994 simple_unlock(&call->lock);
995 timer_queue_unlock(queue);
996
997 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
998 DECR_TIMER_CALLOUT | DBG_FUNC_START,
999 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
1000 VM_KERNEL_ADDRHIDE(param0),
1001 VM_KERNEL_ADDRHIDE(param1),
1002 0);
1003
1004#if CONFIG_DTRACE
1005 DTRACE_TMR7(callout__start, timer_call_func_t, func,
1006 timer_call_param_t, param0, unsigned, call->flags,
1007 0, (call->ttd >> 32),
1008 (unsigned) (call->ttd & 0xFFFFFFFF), call);
1009#endif
1010 /* Maintain time-to-deadline in per-processor data
1011 * structure for thread wakeup deadline statistics.
1012 */
1013 uint64_t *ttdp = &(PROCESSOR_DATA(current_processor(), timer_call_ttd));
1014 *ttdp = call->ttd;
1015 (*func)(param0, param1);
1016 *ttdp = 0;
1017#if CONFIG_DTRACE
1018 DTRACE_TMR4(callout__end, timer_call_func_t, func,
1019 param0, param1, call);
1020#endif
1021
1022 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1023 DECR_TIMER_CALLOUT | DBG_FUNC_END,
1024 VM_KERNEL_UNSLIDE_OR_PERM(call), VM_KERNEL_UNSLIDE(func),
1025 VM_KERNEL_ADDRHIDE(param0),
1026 VM_KERNEL_ADDRHIDE(param1),
1027 0);
1028 call = NULL;
1029 timer_queue_lock_spin(queue);
1030 } else {
1031 if (__probable(rescan == FALSE)) {
1032 break;
1033 } else {
1034 int64_t skew = TCE(call)->deadline - call->soft_deadline;
1035 assert(TCE(call)->deadline >= call->soft_deadline);
1036
1037 /* DRK: On a latency quality-of-service level change,
1038 * re-sort potentially rate-limited timers. The platform
1039 * layer determines which timers require
1040 * this. In the absence of the per-callout
1041 * synchronization requirement, a global resort could
1042 * be more efficient. The re-sort effectively
1043 * annuls all timer adjustments, i.e. the "soft
1044 * deadline" is the sort key.
1045 */
1046
1047 if (timer_resort_threshold(skew)) {
1048 if (__probable(simple_lock_try(&call->lock))) {
1049 timer_call_entry_dequeue(call);
1050 timer_call_entry_enqueue_deadline(call, queue, call->soft_deadline);
1051 simple_unlock(&call->lock);
1052 call = NULL;
1053 }
1054 }
1055 if (call) {
1056 call = TIMER_CALL(queue_next(qe(call)));
1057 if (queue_end(&queue->head, qe(call)))
1058 break;
1059 }
1060 }
1061 }
1062 }
1063
1064 if (!queue_empty(&queue->head)) {
1065 call = TIMER_CALL(queue_first(&queue->head));
1066 cur_deadline = TCE(call)->deadline;
1067 queue->earliest_soft_deadline = (call->flags & TIMER_CALL_RATELIMITED) ? TCE(call)->deadline: call->soft_deadline;
1068 } else {
1069 queue->earliest_soft_deadline = cur_deadline = UINT64_MAX;
1070 }
1071
1072 timer_queue_unlock(queue);
1073
1074 return (cur_deadline);
1075}
1076
1077uint64_t
1078timer_queue_expire(
1079 mpqueue_head_t *queue,
1080 uint64_t deadline)
1081{
1082 return timer_queue_expire_with_options(queue, deadline, FALSE);
1083}
1084
1085extern int serverperfmode;
1086static uint32_t timer_queue_migrate_lock_skips;
1087/*
1088 * timer_queue_migrate() is called by timer_queue_migrate_cpu()
1089 * to move timer requests from the local processor (queue_from)
1090 * to a target processor's (queue_to).
1091 */
1092int
1093timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to)
1094{
1095 timer_call_t call;
1096 timer_call_t head_to;
1097 int timers_migrated = 0;
1098
1099 DBG("timer_queue_migrate(%p,%p)\n", queue_from, queue_to);
1100
1101 assert(!ml_get_interrupts_enabled());
1102 assert(queue_from != queue_to);
1103
1104 if (serverperfmode) {
1105 /*
1106 * if we're running a high end server
1107 * avoid migrations... they add latency
1108 * and don't save us power under typical
1109 * server workloads
1110 */
1111 return -4;
1112 }
1113
1114 /*
1115 * Take both local (from) and target (to) timer queue locks while
1116 * moving the timers from the local queue to the target processor.
1117 * We assume that the target is always the boot processor.
1118 * But only move if all of the following is true:
1119 * - the target queue is non-empty
1120 * - the local queue is non-empty
1121 * - the local queue's first deadline is later than the target's
1122 * - the local queue contains no non-migrateable "local" call
1123 * so that we need not have the target resync.
1124 */
1125
1126 timer_queue_lock_spin(queue_to);
1127
1128 head_to = TIMER_CALL(queue_first(&queue_to->head));
1129 if (queue_empty(&queue_to->head)) {
1130 timers_migrated = -1;
1131 goto abort1;
1132 }
1133
1134 timer_queue_lock_spin(queue_from);
1135
1136 if (queue_empty(&queue_from->head)) {
1137 timers_migrated = -2;
1138 goto abort2;
1139 }
1140
1141 call = TIMER_CALL(queue_first(&queue_from->head));
1142 if (TCE(call)->deadline < TCE(head_to)->deadline) {
1143 timers_migrated = 0;
1144 goto abort2;
1145 }
1146
1147 /* perform scan for non-migratable timers */
1148 do {
1149 if (call->flags & TIMER_CALL_LOCAL) {
1150 timers_migrated = -3;
1151 goto abort2;
1152 }
1153 call = TIMER_CALL(queue_next(qe(call)));
1154 } while (!queue_end(&queue_from->head, qe(call)));
1155
1156 /* migration loop itself -- both queues are locked */
1157 while (!queue_empty(&queue_from->head)) {
1158 call = TIMER_CALL(queue_first(&queue_from->head));
1159 if (!simple_lock_try(&call->lock)) {
1160 /* case (2b) lock order inversion, dequeue only */
1161#ifdef TIMER_ASSERT
1162 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1163 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
1164 VM_KERNEL_UNSLIDE_OR_PERM(call),
1165 VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
1166 VM_KERNEL_UNSLIDE_OR_PERM(call->lock.interlock.lock_data),
1167 0x2b, 0);
1168#endif
1169 timer_queue_migrate_lock_skips++;
1170 timer_call_entry_dequeue_async(call);
1171 continue;
1172 }
1173 timer_call_entry_dequeue(call);
1174 timer_call_entry_enqueue_deadline(
1175 call, queue_to, TCE(call)->deadline);
1176 timers_migrated++;
1177 simple_unlock(&call->lock);
1178 }
1179 queue_from->earliest_soft_deadline = UINT64_MAX;
1180abort2:
1181 timer_queue_unlock(queue_from);
1182abort1:
1183 timer_queue_unlock(queue_to);
1184
1185 return timers_migrated;
1186}
1187
1188void
1189timer_queue_trace_cpu(int ncpu)
1190{
1191 timer_call_nosync_cpu(
1192 ncpu,
1193 (void(*)(void *))timer_queue_trace,
1194 (void*) timer_queue_cpu(ncpu));
1195}
1196
1197void
1198timer_queue_trace(
1199 mpqueue_head_t *queue)
1200{
1201 timer_call_t call;
1202 spl_t s;
1203
1204 if (!kdebug_enable)
1205 return;
1206
1207 s = splclock();
1208 timer_queue_lock_spin(queue);
1209
1210 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1211 DECR_TIMER_QUEUE | DBG_FUNC_START,
1212 queue->count, mach_absolute_time(), 0, 0, 0);
1213
1214 if (!queue_empty(&queue->head)) {
1215 call = TIMER_CALL(queue_first(&queue->head));
1216 do {
1217 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1218 DECR_TIMER_QUEUE | DBG_FUNC_NONE,
1219 call->soft_deadline,
1220 TCE(call)->deadline,
1221 TCE(call)->entry_time,
1222 VM_KERNEL_UNSLIDE(TCE(call)->func),
1223 0);
1224 call = TIMER_CALL(queue_next(qe(call)));
1225 } while (!queue_end(&queue->head, qe(call)));
1226 }
1227
1228 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1229 DECR_TIMER_QUEUE | DBG_FUNC_END,
1230 queue->count, mach_absolute_time(), 0, 0, 0);
1231
1232 timer_queue_unlock(queue);
1233 splx(s);
1234}
1235
1236void
1237timer_longterm_dequeued_locked(timer_call_t call)
1238{
1239 timer_longterm_t *tlp = &timer_longterm;
1240
1241 tlp->dequeues++;
1242 if (call == tlp->threshold.call)
1243 tlp->threshold.call = NULL;
1244}
1245
1246/*
1247 * Place a timer call in the longterm list
1248 * and adjust the next timer callout deadline if the new timer is first.
1249 */
1250mpqueue_head_t *
1251timer_longterm_enqueue_unlocked(timer_call_t call,
1252 uint64_t now,
1253 uint64_t deadline,
1254 mpqueue_head_t **old_queue,
1255 uint64_t soft_deadline,
1256 uint64_t ttd,
1257 timer_call_param_t param1,
1258 uint32_t callout_flags)
1259{
1260 timer_longterm_t *tlp = &timer_longterm;
1261 boolean_t update_required = FALSE;
1262 uint64_t longterm_threshold;
1263
1264 longterm_threshold = now + tlp->threshold.interval;
1265
1266 /*
1267 * Return NULL without doing anything if:
1268 * - this timer is local, or
1269 * - the longterm mechanism is disabled, or
1270 * - this deadline is too short.
1271 */
1272 if ((callout_flags & TIMER_CALL_LOCAL) != 0 ||
1273 (tlp->threshold.interval == TIMER_LONGTERM_NONE) ||
1274 (deadline <= longterm_threshold))
1275 return NULL;
1276
1277 /*
1278 * Remove timer from its current queue, if any.
1279 */
1280 *old_queue = timer_call_dequeue_unlocked(call);
1281
1282 /*
1283 * Lock the longterm queue, queue timer and determine
1284 * whether an update is necessary.
1285 */
1286 assert(!ml_get_interrupts_enabled());
1287 simple_lock(&call->lock);
1288 timer_queue_lock_spin(timer_longterm_queue);
1289 TCE(call)->deadline = deadline;
1290 TCE(call)->param1 = param1;
1291 call->ttd = ttd;
1292 call->soft_deadline = soft_deadline;
1293 call->flags = callout_flags;
1294 timer_call_entry_enqueue_tail(call, timer_longterm_queue);
1295
1296 tlp->enqueues++;
1297
1298 /*
1299 * We'll need to update the currently set threshold timer
1300 * if the new deadline is sooner and no sooner update is in flight.
1301 */
1302 if (deadline < tlp->threshold.deadline &&
1303 deadline < tlp->threshold.preempted) {
1304 tlp->threshold.preempted = deadline;
1305 tlp->threshold.call = call;
1306 update_required = TRUE;
1307 }
1308 timer_queue_unlock(timer_longterm_queue);
1309 simple_unlock(&call->lock);
1310
1311 if (update_required) {
1312 /*
1313 * Note: this call expects that calling the master cpu
1314 * alone does not involve locking the topo lock.
1315 */
1316 timer_call_nosync_cpu(
1317 master_cpu,
1318 (void (*)(void *)) timer_longterm_update,
1319 (void *)tlp);
1320 }
1321
1322 return timer_longterm_queue;
1323}
1324
1325/*
1326 * Scan for timers below the longterm threshold.
1327 * Move these to the local timer queue (of the boot processor on which the
1328 * calling thread is running).
1329 * Both the local (boot) queue and the longterm queue are locked.
1330 * The scan is similar to the timer migrate sequence but is performed by
1331 * successively examining each timer on the longterm queue:
1332 * - if within the short-term threshold
1333 * - enter on the local queue (unless being deleted),
1334 * - otherwise:
1335 * - if sooner, deadline becomes the next threshold deadline.
1336 * The total scan time is limited to TIMER_LONGTERM_SCAN_LIMIT. Should this be
1337 * exceeded, we abort and reschedule again so that we don't shut others from
1338 * the timer queues. Longterm timers firing late is not critical.
1339 */
1340void
1341timer_longterm_scan(timer_longterm_t *tlp,
1342 uint64_t time_start)
1343{
1344 queue_entry_t qe;
1345 timer_call_t call;
1346 uint64_t threshold;
1347 uint64_t deadline;
1348 uint64_t time_limit = time_start + tlp->scan_limit;
1349 mpqueue_head_t *timer_master_queue;
1350
1351 assert(!ml_get_interrupts_enabled());
1352 assert(cpu_number() == master_cpu);
1353
1354 if (tlp->threshold.interval != TIMER_LONGTERM_NONE)
1355 threshold = time_start + tlp->threshold.interval;
1356
1357 tlp->threshold.deadline = TIMER_LONGTERM_NONE;
1358 tlp->threshold.call = NULL;
1359
1360 if (queue_empty(&timer_longterm_queue->head))
1361 return;
1362
1363 timer_master_queue = timer_queue_cpu(master_cpu);
1364 timer_queue_lock_spin(timer_master_queue);
1365
1366 qe = queue_first(&timer_longterm_queue->head);
1367 while (!queue_end(&timer_longterm_queue->head, qe)) {
1368 call = TIMER_CALL(qe);
1369 deadline = call->soft_deadline;
1370 qe = queue_next(qe);
1371 if (!simple_lock_try(&call->lock)) {
1372 /* case (2c) lock order inversion, dequeue only */
1373#ifdef TIMER_ASSERT
1374 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1375 DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE,
1376 VM_KERNEL_UNSLIDE_OR_PERM(call),
1377 VM_KERNEL_UNSLIDE_OR_PERM(TCE(call)->queue),
1378 VM_KERNEL_UNSLIDE_OR_PERM(call->lock.interlock.lock_data),
1379 0x2c, 0);
1380#endif
1381 timer_call_entry_dequeue_async(call);
1382 continue;
1383 }
1384 if (deadline < threshold) {
1385 /*
1386 * This timer needs moving (escalating)
1387 * to the local (boot) processor's queue.
1388 */
1389#ifdef TIMER_ASSERT
1390 if (deadline < time_start)
1391 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1392 DECR_TIMER_OVERDUE | DBG_FUNC_NONE,
1393 VM_KERNEL_UNSLIDE_OR_PERM(call),
1394 deadline,
1395 time_start,
1396 threshold,
1397 0);
1398#endif
1399 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1400 DECR_TIMER_ESCALATE | DBG_FUNC_NONE,
1401 VM_KERNEL_UNSLIDE_OR_PERM(call),
1402 TCE(call)->deadline,
1403 TCE(call)->entry_time,
1404 VM_KERNEL_UNSLIDE(TCE(call)->func),
1405 0);
1406 tlp->escalates++;
1407 timer_call_entry_dequeue(call);
1408 timer_call_entry_enqueue_deadline(
1409 call, timer_master_queue, TCE(call)->deadline);
1410 /*
1411 * A side-effect of the following call is to update
1412 * the actual hardware deadline if required.
1413 */
1414 (void) timer_queue_assign(deadline);
1415 } else {
1416 if (deadline < tlp->threshold.deadline) {
1417 tlp->threshold.deadline = deadline;
1418 tlp->threshold.call = call;
1419 }
1420 }
1421 simple_unlock(&call->lock);
1422
1423 /* Abort scan if we're taking too long. */
1424 if (mach_absolute_time() > time_limit) {
1425 tlp->threshold.deadline = TIMER_LONGTERM_SCAN_AGAIN;
1426 tlp->scan_pauses++;
1427 DBG("timer_longterm_scan() paused %llu, qlen: %llu\n",
1428 time_limit, tlp->queue.count);
1429 break;
1430 }
1431 }
1432
1433 timer_queue_unlock(timer_master_queue);
1434}
1435
1436void
1437timer_longterm_callout(timer_call_param_t p0, __unused timer_call_param_t p1)
1438{
1439 timer_longterm_t *tlp = (timer_longterm_t *) p0;
1440
1441 timer_longterm_update(tlp);
1442}
1443
1444void
1445timer_longterm_update_locked(timer_longterm_t *tlp)
1446{
1447 uint64_t latency;
1448
1449 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1450 DECR_TIMER_UPDATE | DBG_FUNC_START,
1451 VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue),
1452 tlp->threshold.deadline,
1453 tlp->threshold.preempted,
1454 tlp->queue.count, 0);
1455
1456 tlp->scan_time = mach_absolute_time();
1457 if (tlp->threshold.preempted != TIMER_LONGTERM_NONE) {
1458 tlp->threshold.preempts++;
1459 tlp->threshold.deadline = tlp->threshold.preempted;
1460 tlp->threshold.preempted = TIMER_LONGTERM_NONE;
1461 /*
1462 * Note: in the unlikely event that a pre-empted timer has
1463 * itself been cancelled, we'll simply re-scan later at the
1464 * time of the preempted/cancelled timer.
1465 */
1466 } else {
1467 tlp->threshold.scans++;
1468
1469 /*
1470 * Maintain a moving average of our wakeup latency.
1471 * Clamp latency to 0 and ignore above threshold interval.
1472 */
1473 if (tlp->scan_time > tlp->threshold.deadline_set)
1474 latency = tlp->scan_time - tlp->threshold.deadline_set;
1475 else
1476 latency = 0;
1477 if (latency < tlp->threshold.interval) {
1478 tlp->threshold.latency_min =
1479 MIN(tlp->threshold.latency_min, latency);
1480 tlp->threshold.latency_max =
1481 MAX(tlp->threshold.latency_max, latency);
1482 tlp->threshold.latency =
1483 (tlp->threshold.latency*99 + latency) / 100;
1484 }
1485
1486 timer_longterm_scan(tlp, tlp->scan_time);
1487 }
1488
1489 tlp->threshold.deadline_set = tlp->threshold.deadline;
1490 /* The next deadline timer to be set is adjusted */
1491 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE &&
1492 tlp->threshold.deadline != TIMER_LONGTERM_SCAN_AGAIN) {
1493 tlp->threshold.deadline_set -= tlp->threshold.margin;
1494 tlp->threshold.deadline_set -= tlp->threshold.latency;
1495 }
1496
1497 /* Throttle next scan time */
1498 uint64_t scan_clamp = mach_absolute_time() + tlp->scan_interval;
1499 if (tlp->threshold.deadline_set < scan_clamp)
1500 tlp->threshold.deadline_set = scan_clamp;
1501
1502 TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
1503 DECR_TIMER_UPDATE | DBG_FUNC_END,
1504 VM_KERNEL_UNSLIDE_OR_PERM(&tlp->queue),
1505 tlp->threshold.deadline,
1506 tlp->threshold.scans,
1507 tlp->queue.count, 0);
1508}
1509
1510void
1511timer_longterm_update(timer_longterm_t *tlp)
1512{
1513 spl_t s = splclock();
1514
1515 timer_queue_lock_spin(timer_longterm_queue);
1516
1517 if (cpu_number() != master_cpu)
1518 panic("timer_longterm_update_master() on non-boot cpu");
1519
1520 timer_longterm_update_locked(tlp);
1521
1522 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE)
1523 timer_call_enter(
1524 &tlp->threshold.timer,
1525 tlp->threshold.deadline_set,
1526 TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL);
1527
1528 timer_queue_unlock(timer_longterm_queue);
1529 splx(s);
1530}
1531
1532void
1533timer_longterm_init(void)
1534{
1535 uint32_t longterm;
1536 timer_longterm_t *tlp = &timer_longterm;
1537
1538 DBG("timer_longterm_init() tlp: %p, queue: %p\n", tlp, &tlp->queue);
1539
1540 /*
1541 * Set the longterm timer threshold. Defaults to TIMER_LONGTERM_THRESHOLD
1542 * or TIMER_LONGTERM_NONE (disabled) for server;
1543 * overridden longterm boot-arg
1544 */
1545 tlp->threshold.interval = serverperfmode ? TIMER_LONGTERM_NONE
1546 : TIMER_LONGTERM_THRESHOLD;
1547 if (PE_parse_boot_argn("longterm", &longterm, sizeof (longterm))) {
1548 tlp->threshold.interval = (longterm == 0) ?
1549 TIMER_LONGTERM_NONE :
1550 longterm * NSEC_PER_MSEC;
1551 }
1552 if (tlp->threshold.interval != TIMER_LONGTERM_NONE) {
1553 printf("Longterm timer threshold: %llu ms\n",
1554 tlp->threshold.interval / NSEC_PER_MSEC);
1555 kprintf("Longterm timer threshold: %llu ms\n",
1556 tlp->threshold.interval / NSEC_PER_MSEC);
1557 nanoseconds_to_absolutetime(tlp->threshold.interval,
1558 &tlp->threshold.interval);
1559 tlp->threshold.margin = tlp->threshold.interval / 10;
1560 tlp->threshold.latency_min = EndOfAllTime;
1561 tlp->threshold.latency_max = 0;
1562 }
1563
1564 tlp->threshold.preempted = TIMER_LONGTERM_NONE;
1565 tlp->threshold.deadline = TIMER_LONGTERM_NONE;
1566
1567 lck_attr_setdefault(&timer_longterm_lck_attr);
1568 lck_grp_attr_setdefault(&timer_longterm_lck_grp_attr);
1569 lck_grp_init(&timer_longterm_lck_grp,
1570 "timer_longterm", &timer_longterm_lck_grp_attr);
1571 mpqueue_init(&tlp->queue,
1572 &timer_longterm_lck_grp, &timer_longterm_lck_attr);
1573
1574 timer_call_setup(&tlp->threshold.timer,
1575 timer_longterm_callout, (timer_call_param_t) tlp);
1576
1577 timer_longterm_queue = &tlp->queue;
1578}
1579
1580enum {
1581 THRESHOLD, QCOUNT,
1582 ENQUEUES, DEQUEUES, ESCALATES, SCANS, PREEMPTS,
1583 LATENCY, LATENCY_MIN, LATENCY_MAX, SCAN_LIMIT, SCAN_INTERVAL, PAUSES
1584};
1585uint64_t
1586timer_sysctl_get(int oid)
1587{
1588 timer_longterm_t *tlp = &timer_longterm;
1589
1590 switch (oid) {
1591 case THRESHOLD:
1592 return (tlp->threshold.interval == TIMER_LONGTERM_NONE) ?
1593 0 : tlp->threshold.interval / NSEC_PER_MSEC;
1594 case QCOUNT:
1595 return tlp->queue.count;
1596 case ENQUEUES:
1597 return tlp->enqueues;
1598 case DEQUEUES:
1599 return tlp->dequeues;
1600 case ESCALATES:
1601 return tlp->escalates;
1602 case SCANS:
1603 return tlp->threshold.scans;
1604 case PREEMPTS:
1605 return tlp->threshold.preempts;
1606 case LATENCY:
1607 return tlp->threshold.latency;
1608 case LATENCY_MIN:
1609 return tlp->threshold.latency_min;
1610 case LATENCY_MAX:
1611 return tlp->threshold.latency_max;
1612 case SCAN_LIMIT:
1613 return tlp->scan_limit;
1614 case SCAN_INTERVAL:
1615 return tlp->scan_interval;
1616 case PAUSES:
1617 return tlp->scan_pauses;
1618 default:
1619 return 0;
1620 }
1621}
1622
1623/*
1624 * timer_master_scan() is the inverse of timer_longterm_scan()
1625 * since it un-escalates timers to the longterm queue.
1626 */
1627static void
1628timer_master_scan(timer_longterm_t *tlp,
1629 uint64_t now)
1630{
1631 queue_entry_t qe;
1632 timer_call_t call;
1633 uint64_t threshold;
1634 uint64_t deadline;
1635 mpqueue_head_t *timer_master_queue;
1636
1637 if (tlp->threshold.interval != TIMER_LONGTERM_NONE)
1638 threshold = now + tlp->threshold.interval;
1639 else
1640 threshold = TIMER_LONGTERM_NONE;
1641
1642 timer_master_queue = timer_queue_cpu(master_cpu);
1643 timer_queue_lock_spin(timer_master_queue);
1644
1645 qe = queue_first(&timer_master_queue->head);
1646 while (!queue_end(&timer_master_queue->head, qe)) {
1647 call = TIMER_CALL(qe);
1648 deadline = TCE(call)->deadline;
1649 qe = queue_next(qe);
1650 if ((call->flags & TIMER_CALL_LOCAL) != 0)
1651 continue;
1652 if (!simple_lock_try(&call->lock)) {
1653 /* case (2c) lock order inversion, dequeue only */
1654 timer_call_entry_dequeue_async(call);
1655 continue;
1656 }
1657 if (deadline > threshold) {
1658 /* move from master to longterm */
1659 timer_call_entry_dequeue(call);
1660 timer_call_entry_enqueue_tail(call, timer_longterm_queue);
1661 if (deadline < tlp->threshold.deadline) {
1662 tlp->threshold.deadline = deadline;
1663 tlp->threshold.call = call;
1664 }
1665 }
1666 simple_unlock(&call->lock);
1667 }
1668 timer_queue_unlock(timer_master_queue);
1669}
1670
1671static void
1672timer_sysctl_set_threshold(uint64_t value)
1673{
1674 timer_longterm_t *tlp = &timer_longterm;
1675 spl_t s = splclock();
1676 boolean_t threshold_increase;
1677
1678 timer_queue_lock_spin(timer_longterm_queue);
1679
1680 timer_call_cancel(&tlp->threshold.timer);
1681
1682 /*
1683 * Set the new threshold and note whther it's increasing.
1684 */
1685 if (value == 0) {
1686 tlp->threshold.interval = TIMER_LONGTERM_NONE;
1687 threshold_increase = TRUE;
1688 timer_call_cancel(&tlp->threshold.timer);
1689 } else {
1690 uint64_t old_interval = tlp->threshold.interval;
1691 tlp->threshold.interval = value * NSEC_PER_MSEC;
1692 nanoseconds_to_absolutetime(tlp->threshold.interval,
1693 &tlp->threshold.interval);
1694 tlp->threshold.margin = tlp->threshold.interval / 10;
1695 if (old_interval == TIMER_LONGTERM_NONE)
1696 threshold_increase = FALSE;
1697 else
1698 threshold_increase = (tlp->threshold.interval > old_interval);
1699 }
1700
1701 if (threshold_increase /* or removal */) {
1702 /* Escalate timers from the longterm queue */
1703 timer_longterm_scan(tlp, mach_absolute_time());
1704 } else /* decrease or addition */ {
1705 /*
1706 * We scan the local/master queue for timers now longterm.
1707 * To be strictly correct, we should scan all processor queues
1708 * but timer migration results in most timers gravitating to the
1709 * master processor in any case.
1710 */
1711 timer_master_scan(tlp, mach_absolute_time());
1712 }
1713
1714 /* Set new timer accordingly */
1715 tlp->threshold.deadline_set = tlp->threshold.deadline;
1716 if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) {
1717 tlp->threshold.deadline_set -= tlp->threshold.margin;
1718 tlp->threshold.deadline_set -= tlp->threshold.latency;
1719 timer_call_enter(
1720 &tlp->threshold.timer,
1721 tlp->threshold.deadline_set,
1722 TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL);
1723 }
1724
1725 /* Reset stats */
1726 tlp->enqueues = 0;
1727 tlp->dequeues = 0;
1728 tlp->escalates = 0;
1729 tlp->scan_pauses = 0;
1730 tlp->threshold.scans = 0;
1731 tlp->threshold.preempts = 0;
1732 tlp->threshold.latency = 0;
1733 tlp->threshold.latency_min = EndOfAllTime;
1734 tlp->threshold.latency_max = 0;
1735
1736 timer_queue_unlock(timer_longterm_queue);
1737 splx(s);
1738}
1739
1740int
1741timer_sysctl_set(int oid, uint64_t value)
1742{
1743 switch (oid) {
1744 case THRESHOLD:
1745 timer_call_cpu(
1746 master_cpu,
1747 (void (*)(void *)) timer_sysctl_set_threshold,
1748 (void *) value);
1749 return KERN_SUCCESS;
1750 case SCAN_LIMIT:
1751 timer_longterm.scan_limit = value;
1752 return KERN_SUCCESS;
1753 case SCAN_INTERVAL:
1754 timer_longterm.scan_interval = value;
1755 return KERN_SUCCESS;
1756 default:
1757 return KERN_INVALID_ARGUMENT;
1758 }
1759}
1760
1761
1762/* Select timer coalescing window based on per-task quality-of-service hints */
1763static boolean_t tcoal_qos_adjust(thread_t t, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) {
1764 uint32_t latency_qos;
1765 boolean_t adjusted = FALSE;
1766 task_t ctask = t->task;
1767
1768 if (ctask) {
1769 latency_qos = proc_get_effective_thread_policy(t, TASK_POLICY_LATENCY_QOS);
1770
1771 assert(latency_qos <= NUM_LATENCY_QOS_TIERS);
1772
1773 if (latency_qos) {
1774 *tshift = tcoal_prio_params.latency_qos_scale[latency_qos - 1];
1775 *tmax_abstime = tcoal_prio_params.latency_qos_abstime_max[latency_qos - 1];
1776 *pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - 1];
1777 adjusted = TRUE;
1778 }
1779 }
1780 return adjusted;
1781}
1782
1783
1784/* Adjust timer deadlines based on priority of the thread and the
1785 * urgency value provided at timeout establishment. With this mechanism,
1786 * timers are no longer necessarily sorted in order of soft deadline
1787 * on a given timer queue, i.e. they may be differentially skewed.
1788 * In the current scheme, this could lead to fewer pending timers
1789 * processed than is technically possible when the HW deadline arrives.
1790 */
1791static void
1792timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t *tshift, uint64_t *tmax_abstime, boolean_t *pratelimited) {
1793 int16_t tpri = cthread->sched_pri;
1794 if ((urgency & TIMER_CALL_USER_MASK) != 0) {
1795 if (tpri >= BASEPRI_RTQUEUES ||
1796 urgency == TIMER_CALL_USER_CRITICAL) {
1797 *tshift = tcoal_prio_params.timer_coalesce_rt_shift;
1798 *tmax_abstime = tcoal_prio_params.timer_coalesce_rt_abstime_max;
1799 TCOAL_PRIO_STAT(rt_tcl);
1800 } else if (proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG) ||
1801 (urgency == TIMER_CALL_USER_BACKGROUND)) {
1802 /* Determine if timer should be subjected to a lower QoS */
1803 if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) {
1804 if (*tmax_abstime > tcoal_prio_params.timer_coalesce_bg_abstime_max) {
1805 return;
1806 } else {
1807 *pratelimited = FALSE;
1808 }
1809 }
1810 *tshift = tcoal_prio_params.timer_coalesce_bg_shift;
1811 *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max;
1812 TCOAL_PRIO_STAT(bg_tcl);
1813 } else if (tpri >= MINPRI_KERNEL) {
1814 *tshift = tcoal_prio_params.timer_coalesce_kt_shift;
1815 *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max;
1816 TCOAL_PRIO_STAT(kt_tcl);
1817 } else if (cthread->sched_mode == TH_MODE_FIXED) {
1818 *tshift = tcoal_prio_params.timer_coalesce_fp_shift;
1819 *tmax_abstime = tcoal_prio_params.timer_coalesce_fp_abstime_max;
1820 TCOAL_PRIO_STAT(fp_tcl);
1821 } else if (tcoal_qos_adjust(cthread, tshift, tmax_abstime, pratelimited)) {
1822 TCOAL_PRIO_STAT(qos_tcl);
1823 } else if (cthread->sched_mode == TH_MODE_TIMESHARE) {
1824 *tshift = tcoal_prio_params.timer_coalesce_ts_shift;
1825 *tmax_abstime = tcoal_prio_params.timer_coalesce_ts_abstime_max;
1826 TCOAL_PRIO_STAT(ts_tcl);
1827 } else {
1828 TCOAL_PRIO_STAT(nc_tcl);
1829 }
1830 } else if (urgency == TIMER_CALL_SYS_BACKGROUND) {
1831 *tshift = tcoal_prio_params.timer_coalesce_bg_shift;
1832 *tmax_abstime = tcoal_prio_params.timer_coalesce_bg_abstime_max;
1833 TCOAL_PRIO_STAT(bg_tcl);
1834 } else {
1835 *tshift = tcoal_prio_params.timer_coalesce_kt_shift;
1836 *tmax_abstime = tcoal_prio_params.timer_coalesce_kt_abstime_max;
1837 TCOAL_PRIO_STAT(kt_tcl);
1838 }
1839}
1840
1841
1842int timer_user_idle_level;
1843
1844uint64_t
1845timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited)
1846{
1847 int32_t tcs_shift = 0;
1848 uint64_t tcs_max_abstime = 0;
1849 uint64_t adjval;
1850 uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK);
1851
1852 if (mach_timer_coalescing_enabled &&
1853 (deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) {
1854 timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_max_abstime, pratelimited);
1855
1856 if (tcs_shift >= 0)
1857 adjval = MIN((deadline - now) >> tcs_shift, tcs_max_abstime);
1858 else
1859 adjval = MIN((deadline - now) << (-tcs_shift), tcs_max_abstime);
1860 /* Apply adjustments derived from "user idle level" heuristic */
1861 adjval += (adjval * timer_user_idle_level) >> 7;
1862 return adjval;
1863 } else {
1864 return 0;
1865 }
1866}
1867
1868int
1869timer_get_user_idle_level(void) {
1870 return timer_user_idle_level;
1871}
1872
1873kern_return_t timer_set_user_idle_level(int ilevel) {
1874 boolean_t do_reeval = FALSE;
1875
1876 if ((ilevel < 0) || (ilevel > 128))
1877 return KERN_INVALID_ARGUMENT;
1878
1879 if (ilevel < timer_user_idle_level) {
1880 do_reeval = TRUE;
1881 }
1882
1883 timer_user_idle_level = ilevel;
1884
1885 if (do_reeval)
1886 ml_timer_evaluate();
1887
1888 return KERN_SUCCESS;
1889}
1890