1/*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <skywalk/os_skywalk_private.h>
30#include <skywalk/nexus/netif/nx_netif.h>
31#include <sys/kdebug.h>
32#include <mach/thread_act.h>
33#include <kern/thread.h>
34#include <kern/sched_prim.h>
35
36extern kern_return_t thread_terminate(thread_t);
37
38static void nx_netif_mit_reset_interval(struct nx_netif_mit *);
39static void nx_netif_mit_set_start_interval(struct nx_netif_mit *);
40static uint32_t nx_netif_mit_update_interval(struct nx_netif_mit *, boolean_t);
41static void nx_netif_mit_thread_func(void *, wait_result_t);
42static void nx_netif_mit_thread_cont(void *, wait_result_t);
43static void nx_netif_mit_s_thread_cont(void *, wait_result_t);
44static void nx_netif_mit_stats(struct __kern_channel_ring *, uint64_t,
45 uint64_t);
46
47/* mitigation intervals in micro seconds */
48#define NETIF_BUSY_MIT_DELAY (100)
49
50static uint32_t netif_busy_mit_delay = NETIF_BUSY_MIT_DELAY;
51
52#define MIT_EWMA(old, new, gdecay, sdecay) do { \
53 uint32_t _avg; \
54 if ((_avg = (old)) > 0) { \
55 uint32_t _d = ((new) > _avg) ? gdecay : sdecay; \
56 _avg = (((_avg << (_d)) - _avg) + (new)) >> (_d); \
57 } else { \
58 _avg = (new); \
59 } \
60 (old) = _avg; \
61} while (0)
62
63/*
64 * Larger decay factor results in slower reaction. Each value is ilog2
65 * of EWMA decay rate; one for growth and another for shrink. The two
66 * decay factors chosen are such that we reach quickly to shrink, and
67 * slowly to grow. Growth and shrink are relevant to the mitigation
68 * delay interval.
69 */
70#define NETIF_AD_MIT_GDECAY 3 /* ilog2(8) */
71static uint32_t netif_ad_mit_gdecay = NETIF_AD_MIT_GDECAY;
72
73#define NETIF_AD_MIT_SDECAY 2 /* ilog2(4) */
74static uint32_t netif_ad_mit_sdecay = NETIF_AD_MIT_SDECAY;
75
76#define NETIF_MIT_MODE_HOLDTIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
77#define NETIF_MIT_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
78static uint64_t netif_mit_mode_holdtime = NETIF_MIT_MODE_HOLDTIME;
79
80#define NETIF_MIT_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
81#define NETIF_MIT_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
82static uint64_t netif_mit_sample_holdtime = NETIF_MIT_SAMPLETIME;
83
84/*
85 * These numbers are based off 10ms netif_mit_sample_holdtime;
86 * changing the hold time will require recomputing them.
87 */
88#if (DEVELOPMENT || DEBUG)
89static struct mit_cfg_tbl mit_cfg_tbl_native[] = {
90#else /* !DEVELOPMENT && !DEBUG */
91static const struct mit_cfg_tbl mit_cfg_tbl_native[] = {
92#endif /* !DEVELOPMENT && !DEBUG */
93 { .cfg_plowat = 10, .cfg_phiwat = 60, .cfg_blowat = 4000,
94 .cfg_bhiwat = 6000, .cfg_ival = 100 },
95 { .cfg_plowat = 40, .cfg_phiwat = 100, .cfg_blowat = 5000,
96 .cfg_bhiwat = 300000, .cfg_ival = 300 },
97 { .cfg_plowat = 80, .cfg_phiwat = 200, .cfg_blowat = 100000,
98 .cfg_bhiwat = 300000, .cfg_ival = 500 },
99 { .cfg_plowat = 220, .cfg_phiwat = 240, .cfg_blowat = 330000,
100 .cfg_bhiwat = 375000, .cfg_ival = 1000 },
101 { .cfg_plowat = 250, .cfg_phiwat = 2000, .cfg_blowat = 450000,
102 .cfg_bhiwat = 30000000, .cfg_ival = 200 },
103};
104
105#if (DEVELOPMENT || DEBUG)
106static struct mit_cfg_tbl mit_cfg_tbl_compat[] = {
107#else /* !DEVELOPMENT && !DEBUG */
108static const struct mit_cfg_tbl mit_cfg_tbl_compat[] = {
109#endif /* !DEVELOPMENT && !DEBUG */
110 { .cfg_plowat = 10, .cfg_phiwat = 60, .cfg_blowat = 4000,
111 .cfg_bhiwat = 6000, .cfg_ival = 100 },
112 { .cfg_plowat = 40, .cfg_phiwat = 100, .cfg_blowat = 5000,
113 .cfg_bhiwat = 300000, .cfg_ival = 300 },
114 { .cfg_plowat = 80, .cfg_phiwat = 200, .cfg_blowat = 100000,
115 .cfg_bhiwat = 300000, .cfg_ival = 500 },
116 { .cfg_plowat = 220, .cfg_phiwat = 240, .cfg_blowat = 330000,
117 .cfg_bhiwat = 375000, .cfg_ival = 1000 },
118 { .cfg_plowat = 250, .cfg_phiwat = 2000, .cfg_blowat = 450000,
119 .cfg_bhiwat = 3000000, .cfg_ival = 200 },
120};
121
122#if (DEVELOPMENT || DEBUG)
123static struct mit_cfg_tbl mit_cfg_tbl_native_cellular[] = {
124#else /* !DEVELOPMENT && !DEBUG */
125static const struct mit_cfg_tbl mit_cfg_tbl_native_cellular[] = {
126#endif /* !DEVELOPMENT && !DEBUG */
127 { .cfg_plowat = 10, .cfg_phiwat = 40, .cfg_blowat = 4000,
128 .cfg_bhiwat = 6000, .cfg_ival = 300 },
129 { .cfg_plowat = 20, .cfg_phiwat = 60, .cfg_blowat = 5000,
130 .cfg_bhiwat = 150000, .cfg_ival = 500 },
131 { .cfg_plowat = 40, .cfg_phiwat = 80, .cfg_blowat = 80000,
132 .cfg_bhiwat = 200000, .cfg_ival = 700 },
133 { .cfg_plowat = 60, .cfg_phiwat = 250, .cfg_blowat = 150000,
134 .cfg_bhiwat = 375000, .cfg_ival = 1500 },
135 { .cfg_plowat = 260, .cfg_phiwat = 2000, .cfg_blowat = 450000,
136 .cfg_bhiwat = 3000000, .cfg_ival = 400 },
137};
138
139#if (DEVELOPMENT || DEBUG)
140static int sysctl_mit_mode_holdtime SYSCTL_HANDLER_ARGS;
141SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, busy_mit_delay,
142 CTLFLAG_RW | CTLFLAG_LOCKED, &netif_busy_mit_delay,
143 NETIF_BUSY_MIT_DELAY, "");
144SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, ad_mit_gdecay,
145 CTLFLAG_RW | CTLFLAG_LOCKED, &netif_ad_mit_gdecay, NETIF_AD_MIT_GDECAY, "");
146SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, ad_mit_sdecay,
147 CTLFLAG_RW | CTLFLAG_LOCKED, &netif_ad_mit_sdecay, NETIF_AD_MIT_SDECAY, "");
148SYSCTL_PROC(_kern_skywalk_netif, OID_AUTO, ad_mit_freeze,
149 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &netif_mit_mode_holdtime,
150 NETIF_MIT_MODE_HOLDTIME, sysctl_mit_mode_holdtime, "Q", "");
151#endif /* !DEVELOPMENT && !DEBUG */
152
153void
154nx_netif_mit_init(struct nx_netif *nif, const struct ifnet *ifp,
155 struct nx_netif_mit *mit, struct __kern_channel_ring *kr,
156 boolean_t simple)
157{
158#pragma unused(nif)
159 thread_precedence_policy_data_t info;
160 __unused kern_return_t kret;
161 char oid_name[24];
162
163 _CASSERT(sizeof(mit_cfg_tbl_native_cellular) <=
164 sizeof(((struct nx_netif_mit *)0)->mit_tbl));
165
166 lck_spin_init(lck: &mit->mit_lock, grp: kr->ckr_qlock_group, attr: &channel_lock_attr);
167
168 if (kr->ckr_tx == NR_TX) {
169 if (simple) {
170 (void) snprintf(mit->mit_name, count: sizeof(mit->mit_name),
171 "skywalk_%s_tx_%u", ifp->if_xname, kr->ckr_ring_id);
172 } else {
173 (void) snprintf(mit->mit_name, count: sizeof(mit->mit_name),
174 "skywalk_mit_%s_tx_%u", ifp->if_xname,
175 kr->ckr_ring_id);
176 }
177 (void) snprintf(oid_name, count: sizeof(oid_name),
178 "tx_%u", kr->ckr_ring_id);
179 } else {
180 if (simple) {
181 (void) snprintf(mit->mit_name, count: sizeof(mit->mit_name),
182 "skywalk_%s_rx_%u", ifp->if_xname, kr->ckr_ring_id);
183 } else {
184 (void) snprintf(mit->mit_name, count: sizeof(mit->mit_name),
185 "skywalk_mit_%s_rx_%u", ifp->if_xname,
186 kr->ckr_ring_id);
187 }
188 (void) snprintf(oid_name, count: sizeof(oid_name),
189 "rx_%u", kr->ckr_ring_id);
190 }
191
192 mit->mit_ckr = kr;
193 mit->mit_ckr->ckr_mit = mit;
194 mit->mit_interval = 0;
195 mit->mit_netif_ifp = ifp;
196
197 if ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) && (ifp->if_family ==
198 IFNET_FAMILY_CELLULAR)) {
199 bcopy(src: mit_cfg_tbl_native_cellular,
200 dst: (void *)__DECONST(struct mit_cfg_tbl *, mit->mit_tbl),
201 n: sizeof(mit_cfg_tbl_native_cellular));
202 mit->mit_cfg_idx_max = sizeof(mit_cfg_tbl_native_cellular) /
203 sizeof(*mit->mit_cfg);
204 } else if (ifp->if_eflags & IFEF_SKYWALK_NATIVE) {
205 bcopy(src: mit_cfg_tbl_native,
206 dst: (void *)__DECONST(struct mit_cfg_tbl *, mit->mit_tbl),
207 n: sizeof(mit->mit_tbl));
208 mit->mit_cfg_idx_max = sizeof(mit_cfg_tbl_native) /
209 sizeof(*mit->mit_cfg);
210 } else {
211 bcopy(src: mit_cfg_tbl_compat,
212 dst: (void *)__DECONST(struct mit_cfg_tbl *, mit->mit_tbl),
213 n: sizeof(mit->mit_tbl));
214 mit->mit_cfg_idx_max = sizeof(mit_cfg_tbl_compat) /
215 sizeof(*mit->mit_cfg);
216 }
217 VERIFY(mit->mit_cfg_idx_max > 0);
218 VERIFY(mit->mit_cfg_idx_max <= NETIF_MIT_CFG_TBL_MAX_CFG);
219
220 if (ifp->if_rx_mit_ival != 0) {
221 mit->mit_tbl[0].cfg_ival = ifp->if_rx_mit_ival;
222 SK_D("mit interval updated: %s cfg %u ival %u",
223 mit->mit_name, 0, mit->mit_tbl[0].cfg_ival);
224 }
225
226 net_timerclear(&mit->mit_mode_holdtime);
227 net_timerclear(&mit->mit_mode_lasttime);
228 net_timerclear(&mit->mit_sample_time);
229 net_timerclear(&mit->mit_sample_lasttime);
230 net_timerclear(&mit->mit_start_time);
231
232 net_nsectimer(&netif_mit_mode_holdtime, &mit->mit_mode_holdtime);
233 net_nsectimer(&netif_mit_sample_holdtime, &mit->mit_sample_time);
234
235 /* initialize mode and params */
236 nx_netif_mit_reset_interval(mit);
237 VERIFY(mit->mit_cfg != NULL && mit->mit_cfg_idx < mit->mit_cfg_idx_max);
238 mit->mit_flags = NETIF_MITF_INITIALIZED;
239 if (simple) {
240 /*
241 * Simple mitigation where we don't collect any statistics
242 * at all, and therefore don't want to register the ring's
243 * ckr_netif_mit_stats() callback.
244 */
245 mit->mit_flags |= NETIF_MITF_SIMPLE;
246 ASSERT(kr->ckr_netif_mit_stats == NULL);
247 } else {
248 /*
249 * Regular mitigation where we collect stats and use them
250 * for determining the delay between wakeups; initialize
251 * the ring's ckr_netif_mit_stats() callback.
252 */
253 kr->ckr_netif_mit_stats = nx_netif_mit_stats;
254 }
255
256 if (kernel_thread_start(continuation: nx_netif_mit_thread_func, parameter: mit,
257 new_thread: &mit->mit_thread) != KERN_SUCCESS) {
258 panic_plain("%s: can't create thread", mit->mit_name);
259 /* NOTREACHED */
260 __builtin_unreachable();
261 }
262 /* this must not fail */
263 VERIFY(mit->mit_thread != NULL);
264
265 /* wait until nx_netif_mit_thread_func() is ready */
266 MIT_SPIN_LOCK(mit);
267 while (!(mit->mit_flags & NETIF_MITF_READY)) {
268 (void) assert_wait(event: &mit->mit_thread, THREAD_UNINT);
269 MIT_SPIN_UNLOCK(mit);
270 (void) thread_block(THREAD_CONTINUE_NULL);
271 MIT_SPIN_LOCK(mit);
272 }
273 MIT_SPIN_UNLOCK(mit);
274
275 bzero(s: &info, n: sizeof(info));
276 info.importance = 0;
277 kret = thread_policy_set(thread: mit->mit_thread, THREAD_PRECEDENCE_POLICY,
278 policy_info: (thread_policy_t)&info, THREAD_PRECEDENCE_POLICY_COUNT);
279 ASSERT(kret == KERN_SUCCESS);
280
281#if (DEVELOPMENT || DEBUG)
282 /* register mit sysctl skoid */
283 skoid_create(&mit->mit_skoid, SKOID_DNODE(nif->nif_skoid), oid_name, 0);
284 skoid_add_uint(&mit->mit_skoid, "interval", CTLFLAG_RW,
285 &mit->mit_interval);
286 struct skoid *skoid = &mit->mit_skoid;
287 struct mit_cfg_tbl *t;
288#define MIT_ADD_SKOID(_i) \
289 t = &mit->mit_tbl[_i]; \
290 skoid_add_uint(skoid, #_i"_plowat", CTLFLAG_RW, &t->cfg_plowat); \
291 skoid_add_uint(skoid, #_i"_phiwat", CTLFLAG_RW, &t->cfg_phiwat); \
292 skoid_add_uint(skoid, #_i"_blowat", CTLFLAG_RW, &t->cfg_blowat); \
293 skoid_add_uint(skoid, #_i"_bhiwat", CTLFLAG_RW, &t->cfg_bhiwat);\
294 skoid_add_uint(skoid, #_i"_ival", CTLFLAG_RW, &t->cfg_ival);
295 MIT_ADD_SKOID(0);
296 MIT_ADD_SKOID(1);
297 MIT_ADD_SKOID(2);
298 MIT_ADD_SKOID(3);
299 MIT_ADD_SKOID(4);
300 _CASSERT(NETIF_MIT_CFG_TBL_MAX_CFG == 5);
301#endif /* !DEVELOPMENT && !DEBUG */
302}
303
304__attribute__((always_inline))
305static inline void
306nx_netif_mit_reset_interval(struct nx_netif_mit *mit)
307{
308 (void) nx_netif_mit_update_interval(mit, TRUE);
309}
310
311__attribute__((always_inline))
312static inline void
313nx_netif_mit_set_start_interval(struct nx_netif_mit *mit)
314{
315 nanouptime(ts: &mit->mit_start_time);
316}
317
318__attribute__((always_inline))
319static inline uint32_t
320nx_netif_mit_update_interval(struct nx_netif_mit *mit, boolean_t reset)
321{
322 struct timespec now, delta;
323 uint64_t r;
324 uint32_t i;
325
326 nanouptime(ts: &now);
327 net_timersub(&now, &mit->mit_sample_lasttime, &delta);
328
329 /* CSTYLED */
330 if ((net_timercmp(&delta, &mit->mit_mode_holdtime, >)) || reset) {
331 mit_mode_t mode = (mit->mit_flags & NETIF_MITF_SIMPLE) ?
332 MIT_MODE_SIMPLE : MIT_MODE_ADVANCED_STATIC;
333
334 /* if we haven't updated stats in a while, reset it back */
335 SK_DF(SK_VERB_NETIF_MIT, "%s: resetting [mode %u->%u]",
336 mit->mit_name, mit->mit_mode, mode);
337
338 mit->mit_mode = mode;
339 mit->mit_cfg_idx = 0;
340 mit->mit_cfg = &mit->mit_tbl[mit->mit_cfg_idx];
341 mit->mit_packets_avg = 0;
342 mit->mit_bytes_avg = 0;
343 }
344
345 /* calculate work duration (since last start work time) */
346 if (net_timerisset(&mit->mit_start_time)) {
347 net_timersub(&now, &mit->mit_start_time, &delta);
348 net_timerusec(&delta, &r);
349 } else {
350 r = 0;
351 }
352
353 switch (mit->mit_mode) {
354 case MIT_MODE_SIMPLE:
355 i = 0;
356 break;
357
358 case MIT_MODE_ADVANCED_STATIC:
359 i = mit->mit_interval;
360 break;
361
362 case MIT_MODE_ADVANCED_DYNAMIC:
363 i = mit->mit_cfg->cfg_ival;
364 break;
365 }
366
367 /*
368 * The idea here is to return the effective delay interval that
369 * causes each work phase to begin at the desired cadence, at
370 * the minimum.
371 */
372 if (__probable(r != 0)) {
373 if (__probable(i > r)) {
374 i -= r;
375 } else {
376 /* bump up cfg_idx perhaps? */
377 i = 0;
378 }
379 }
380
381 return i;
382}
383
384void
385nx_netif_mit_cleanup(struct nx_netif_mit *mit)
386{
387 if (mit->mit_thread != THREAD_NULL) {
388 ASSERT(mit->mit_flags & NETIF_MITF_INITIALIZED);
389
390 /* signal thread to begin self-termination */
391 MIT_SPIN_LOCK(mit);
392 mit->mit_flags |= NETIF_MITF_TERMINATING;
393 (void) thread_wakeup_thread(event: (caddr_t)&mit->mit_flags,
394 thread: mit->mit_thread);
395 MIT_SPIN_UNLOCK(mit);
396
397 /* and wait for thread to terminate */
398 MIT_SPIN_LOCK(mit);
399 while (!(mit->mit_flags & NETIF_MITF_TERMINATED)) {
400 (void) assert_wait(event: &mit->mit_flags, THREAD_UNINT);
401 MIT_SPIN_UNLOCK(mit);
402 (void) thread_block(THREAD_CONTINUE_NULL);
403 MIT_SPIN_LOCK(mit);
404 }
405 ASSERT(mit->mit_flags & NETIF_MITF_TERMINATED);
406 MIT_SPIN_UNLOCK(mit);
407 mit->mit_thread = THREAD_NULL;
408 }
409 ASSERT(mit->mit_thread == THREAD_NULL);
410 lck_spin_destroy(lck: &mit->mit_lock, grp: mit->mit_ckr->ckr_qlock_group);
411
412 mit->mit_ckr->ckr_mit = NULL;
413 mit->mit_ckr = NULL;
414 mit->mit_netif_ifp = NULL;
415 mit->mit_flags &= ~NETIF_MITF_INITIALIZED;
416
417 net_timerclear(&mit->mit_mode_holdtime);
418 net_timerclear(&mit->mit_mode_lasttime);
419 net_timerclear(&mit->mit_sample_time);
420 net_timerclear(&mit->mit_sample_lasttime);
421 net_timerclear(&mit->mit_start_time);
422
423#if (DEVELOPMENT || DEBUG)
424 skoid_destroy(&mit->mit_skoid);
425#endif /* !DEVELOPMENT && !DEBUG */
426}
427
428int
429nx_netif_mit_tx_intr(struct __kern_channel_ring *kr, struct proc *p,
430 uint32_t flags, uint32_t *work_done)
431{
432 struct nexus_netif_adapter *nifna =
433 (struct nexus_netif_adapter *)KRNA(kr);
434 struct netif_stats *nifs =
435 &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
436
437 ASSERT(kr->ckr_tx == NR_TX);
438 STATS_INC(nifs, NETIF_STATS_TX_IRQ);
439
440 /*
441 * If mitigation is not enabled for this kring, we're done; otherwise,
442 * signal the thread that there is work to do, unless it's terminating.
443 */
444 if (__probable(nifna->nifna_tx_mit == NULL)) {
445 (void) nx_netif_common_intr(kr, p, flags, work_done);
446 } else {
447 struct nx_netif_mit *mit =
448 &nifna->nifna_tx_mit[kr->ckr_ring_id];
449 ASSERT(mit->mit_flags & NETIF_MITF_INITIALIZED);
450 MIT_SPIN_LOCK(mit);
451 mit->mit_requests++;
452 if (!(mit->mit_flags & (NETIF_MITF_RUNNING |
453 NETIF_MITF_TERMINATING | NETIF_MITF_TERMINATED))) {
454 (void) thread_wakeup_thread(event: (caddr_t)&mit->mit_flags,
455 thread: mit->mit_thread);
456 }
457 MIT_SPIN_UNLOCK(mit);
458 }
459
460 return 0;
461}
462
463int
464nx_netif_mit_rx_intr(struct __kern_channel_ring *kr, struct proc *p,
465 uint32_t flags, uint32_t *work_done)
466{
467 struct nexus_netif_adapter *nifna =
468 (struct nexus_netif_adapter *)KRNA(kr);
469 struct netif_stats *nifs =
470 &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
471
472 KDBG((SK_KTRACE_NETIF_MIT_RX_INTR | DBG_FUNC_START), SK_KVA(kr));
473
474 ASSERT(kr->ckr_tx == NR_RX);
475 STATS_INC(nifs, NETIF_STATS_RX_IRQ);
476
477 /*
478 * If mitigation is enabled for this kring, signal the thread that there
479 * is work to do, unless it's terminating. Otherwise, we're done.
480 */
481 if (__improbable(nifna->nifna_rx_mit != NULL)) {
482 struct nx_netif_mit *mit =
483 &nifna->nifna_rx_mit[kr->ckr_ring_id];
484 ASSERT(mit->mit_flags & NETIF_MITF_INITIALIZED);
485 MIT_SPIN_LOCK(mit);
486 mit->mit_requests++;
487 if (!(mit->mit_flags & (NETIF_MITF_RUNNING |
488 NETIF_MITF_TERMINATING | NETIF_MITF_TERMINATED))) {
489 (void) thread_wakeup_thread(event: (caddr_t)&mit->mit_flags,
490 thread: mit->mit_thread);
491 }
492 MIT_SPIN_UNLOCK(mit);
493 } else {
494 (void) nx_netif_common_intr(kr, p, flags, work_done);
495 }
496
497 KDBG((SK_KTRACE_NETIF_MIT_RX_INTR | DBG_FUNC_END), SK_KVA(kr));
498
499 return 0;
500}
501
502__attribute__((noreturn))
503static void
504nx_netif_mit_thread_func(void *v, wait_result_t w)
505{
506#pragma unused(w)
507 struct nx_netif_mit *mit = v;
508
509 ASSERT(mit->mit_thread == current_thread());
510 thread_set_thread_name(th: current_thread(), name: mit->mit_name);
511
512 MIT_SPIN_LOCK(mit);
513 VERIFY(!(mit->mit_flags & (NETIF_MITF_READY | NETIF_MITF_RUNNING)));
514 /* tell nx_netif_mit_init() to proceed */
515 mit->mit_flags |= NETIF_MITF_READY;
516 wakeup(chan: (caddr_t)&mit->mit_thread);
517 (void) assert_wait(event: &mit->mit_flags, THREAD_UNINT);
518 MIT_SPIN_UNLOCK(mit);
519 if (mit->mit_flags & NETIF_MITF_SIMPLE) {
520 (void) thread_block_parameter(continuation: nx_netif_mit_s_thread_cont, parameter: mit);
521 } else {
522 (void) thread_block_parameter(continuation: nx_netif_mit_thread_cont, parameter: mit);
523 }
524 /* NOTREACHED */
525 __builtin_unreachable();
526}
527
528/*
529 * Simple variant.
530 */
531__attribute__((noreturn))
532static void
533nx_netif_mit_s_thread_cont(void *v, wait_result_t wres)
534{
535 struct __kern_channel_ring *kr;
536 struct nx_netif_mit *mit = v;
537 struct netif_stats *nifs;
538 int irq_stat, error;
539
540 ASSERT(mit->mit_flags & NETIF_MITF_SIMPLE);
541 kr = __DEVOLATILE(struct __kern_channel_ring *, mit->mit_ckr);
542 nifs = &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
543 irq_stat = (kr->ckr_tx == NR_TX) ? NETIF_STATS_TX_IRQ_MIT :
544 NETIF_STATS_RX_IRQ_MIT;
545
546 MIT_SPIN_LOCK(mit);
547 if (__improbable(wres == THREAD_INTERRUPTED ||
548 (mit->mit_flags & NETIF_MITF_TERMINATING))) {
549 goto terminate;
550 }
551
552 ASSERT(!(mit->mit_flags & NETIF_MITF_TERMINATED));
553 mit->mit_flags |= NETIF_MITF_RUNNING;
554
555 /*
556 * Keep on servicing the ring until no more request.
557 */
558 for (;;) {
559 uint32_t requests = mit->mit_requests;
560
561 STATS_INC(nifs, irq_stat);
562 MIT_SPIN_UNLOCK(mit);
563
564 error = nx_netif_common_intr(kr, kernproc, 0, NULL);
565
566 /*
567 * We could get EBUSY here due to netif_inject_rx() holding
568 * the kring lock. EBUSY means the rx notify callback (which
569 * does the rx syncs..etc) wasn't called. If we don't retry
570 * nx_netif_common_intr() the driver will eventually stop
571 * notifying due to its queues being full.
572 */
573 if (error == EBUSY) {
574 uint32_t ival =
575 MAX(netif_busy_mit_delay, NETIF_BUSY_MIT_DELAY);
576
577 MIT_SPIN_LOCK(mit);
578 mit->mit_requests++;
579 MIT_SPIN_UNLOCK(mit);
580 delay(usec: ival);
581 }
582
583 MIT_SPIN_LOCK(mit);
584
585 if ((mit->mit_flags & NETIF_MITF_TERMINATING) != 0 ||
586 requests == mit->mit_requests) {
587 mit->mit_requests = 0;
588 break;
589 }
590 }
591
592 if (__probable((mit->mit_flags & NETIF_MITF_TERMINATING) == 0)) {
593 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
594
595 MIT_SPIN_LOCK_ASSERT_HELD(mit);
596
597 if (kr->ckr_rate_limited) {
598 SK_DF(SK_VERB_NETIF_MIT,
599 "%s: posting wait deadline for MIT",
600 mit->mit_name);
601 clock_interval_to_deadline(interval: 1, NSEC_PER_MSEC,
602 result: &deadline);
603 }
604 mit->mit_flags &= ~NETIF_MITF_RUNNING;
605 (void) assert_wait_deadline(event: &mit->mit_flags,
606 THREAD_UNINT, deadline);
607 MIT_SPIN_UNLOCK(mit);
608 (void) thread_block_parameter(continuation: nx_netif_mit_s_thread_cont, parameter: mit);
609 /* NOTREACHED */
610 } else {
611terminate:
612 MIT_SPIN_LOCK_ASSERT_HELD(mit);
613
614 VERIFY(mit->mit_thread == current_thread());
615 VERIFY((mit->mit_flags & NETIF_MITF_TERMINATING) != 0);
616 mit->mit_flags &= ~(NETIF_MITF_READY | NETIF_MITF_RUNNING |
617 NETIF_MITF_TERMINATING);
618 mit->mit_flags |= NETIF_MITF_TERMINATED;
619 wakeup(chan: (caddr_t)&mit->mit_flags);
620 MIT_SPIN_UNLOCK(mit);
621
622 /* for the extra refcnt from kernel_thread_start() */
623 thread_deallocate(thread: current_thread());
624 /* this is the end */
625 thread_terminate(current_thread());
626 /* NOTREACHED */
627 }
628
629 /* must never get here */
630 VERIFY(0);
631 /* NOTREACHED */
632 __builtin_unreachable();
633}
634
635/*
636 * Advanced variant.
637 */
638__attribute__((noreturn))
639static void
640nx_netif_mit_thread_cont(void *v, wait_result_t wres)
641{
642 struct __kern_channel_ring *kr;
643 struct nx_netif_mit *mit = v;
644 struct netif_stats *nifs;
645 int irq_stat;
646
647 ASSERT(!(mit->mit_flags & NETIF_MITF_SIMPLE));
648 kr = __DEVOLATILE(struct __kern_channel_ring *, mit->mit_ckr);
649 nifs = &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats;
650 irq_stat = (kr->ckr_tx == NR_TX) ? NETIF_STATS_TX_IRQ_MIT :
651 NETIF_STATS_RX_IRQ_MIT;
652
653 MIT_SPIN_LOCK(mit);
654 if (__improbable(wres == THREAD_INTERRUPTED ||
655 (mit->mit_flags & NETIF_MITF_TERMINATING))) {
656 goto terminate;
657 }
658
659 ASSERT(!(mit->mit_flags & NETIF_MITF_TERMINATED));
660 mit->mit_flags |= NETIF_MITF_RUNNING;
661
662 /*
663 * Keep on servicing the ring until no more request.
664 */
665 for (;;) {
666 uint32_t requests = mit->mit_requests;
667 uint32_t ival;
668 int error = 0;
669
670 STATS_INC(nifs, irq_stat);
671 MIT_SPIN_UNLOCK(mit);
672
673 /*
674 * Notify the ring and trigger packets fan-out;
675 * bracket the call with timestamps to compute
676 * our effective mitigation/delay interval below.
677 */
678 nx_netif_mit_set_start_interval(mit);
679 error = nx_netif_common_intr(kr, kernproc, 0, NULL);
680 ival = nx_netif_mit_update_interval(mit, FALSE);
681
682 /*
683 * If mitigation interval is non-zero (for TX/RX)
684 * then we always introduce an artificial delay
685 * for that amount of time. Otherwise, if we get
686 * EBUSY, then kr_enter() has another thread that
687 * is working on it, and so we should wait a bit.
688 */
689 if (ival != 0 || error == EBUSY) {
690 if (error == EBUSY) {
691 ival = MAX(netif_busy_mit_delay,
692 NETIF_BUSY_MIT_DELAY);
693 MIT_SPIN_LOCK(mit);
694 mit->mit_requests++;
695 MIT_SPIN_UNLOCK(mit);
696 }
697 delay(usec: ival);
698 }
699
700 MIT_SPIN_LOCK(mit);
701
702 if ((mit->mit_flags & NETIF_MITF_TERMINATING) != 0 ||
703 requests == mit->mit_requests) {
704 mit->mit_requests = 0;
705 break;
706 }
707 }
708
709 if (__probable((mit->mit_flags & NETIF_MITF_TERMINATING) == 0)) {
710 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
711
712 MIT_SPIN_LOCK_ASSERT_HELD(mit);
713
714 if (kr->ckr_rate_limited) {
715 SK_DF(SK_VERB_NETIF_MIT,
716 "%s: posting wait deadline for MIT",
717 mit->mit_name);
718 clock_interval_to_deadline(interval: 1, NSEC_PER_MSEC,
719 result: &deadline);
720 }
721 mit->mit_flags &= ~NETIF_MITF_RUNNING;
722 (void) assert_wait_deadline(event: &mit->mit_flags,
723 THREAD_UNINT, deadline);
724 MIT_SPIN_UNLOCK(mit);
725 (void) thread_block_parameter(continuation: nx_netif_mit_thread_cont, parameter: mit);
726 /* NOTREACHED */
727 } else {
728terminate:
729 MIT_SPIN_LOCK_ASSERT_HELD(mit);
730
731 VERIFY(mit->mit_thread == current_thread());
732 VERIFY((mit->mit_flags & NETIF_MITF_TERMINATING) != 0);
733 mit->mit_flags &= ~(NETIF_MITF_READY | NETIF_MITF_RUNNING |
734 NETIF_MITF_TERMINATING);
735 mit->mit_flags |= NETIF_MITF_TERMINATED;
736 wakeup(chan: (caddr_t)&mit->mit_flags);
737 MIT_SPIN_UNLOCK(mit);
738
739 /* for the extra refcnt from kernel_thread_start() */
740 thread_deallocate(thread: current_thread());
741 /* this is the end */
742 thread_terminate(current_thread());
743 /* NOTREACHED */
744 }
745
746 /* must never get here */
747 VERIFY(0);
748 /* NOTREACHED */
749 __builtin_unreachable();
750}
751
752static void
753nx_netif_mit_stats(struct __kern_channel_ring *kr, uint64_t pkts,
754 uint64_t bytes)
755{
756 struct nx_netif_mit *mit = kr->ckr_mit;
757 struct timespec now, delta;
758 mit_mode_t mode;
759 uint32_t cfg_idx;
760
761 ASSERT(mit != NULL && !(mit->mit_flags & NETIF_MITF_SIMPLE));
762
763 if ((os_atomic_or_orig(&mit->mit_flags, NETIF_MITF_SAMPLING, relaxed) &
764 NETIF_MITF_SAMPLING) != 0) {
765 return;
766 }
767
768 mode = mit->mit_mode;
769 cfg_idx = mit->mit_cfg_idx;
770
771 nanouptime(ts: &now);
772 if (!net_timerisset(&mit->mit_sample_lasttime)) {
773 *(&mit->mit_sample_lasttime) = *(&now);
774 }
775
776 net_timersub(&now, &mit->mit_sample_lasttime, &delta);
777 if (net_timerisset(&mit->mit_sample_time)) {
778 uint32_t ptot, btot;
779
780 /* accumulate statistics for current sampling */
781 PKTCNTR_ADD(&mit->mit_sstats, pkts, bytes);
782
783 /* CSTYLED */
784 if (net_timercmp(&delta, &mit->mit_sample_time, <)) {
785 goto done;
786 }
787
788 *(&mit->mit_sample_lasttime) = *(&now);
789
790 /* calculate min/max of bytes */
791 btot = (uint32_t)mit->mit_sstats.bytes;
792 if (mit->mit_bytes_min == 0 || mit->mit_bytes_min > btot) {
793 mit->mit_bytes_min = btot;
794 }
795 if (btot > mit->mit_bytes_max) {
796 mit->mit_bytes_max = btot;
797 }
798
799 /* calculate EWMA of bytes */
800 MIT_EWMA(mit->mit_bytes_avg, btot,
801 netif_ad_mit_gdecay, netif_ad_mit_sdecay);
802
803 /* calculate min/max of packets */
804 ptot = (uint32_t)mit->mit_sstats.packets;
805 if (mit->mit_packets_min == 0 || mit->mit_packets_min > ptot) {
806 mit->mit_packets_min = ptot;
807 }
808 if (ptot > mit->mit_packets_max) {
809 mit->mit_packets_max = ptot;
810 }
811
812 /* calculate EWMA of packets */
813 MIT_EWMA(mit->mit_packets_avg, ptot,
814 netif_ad_mit_gdecay, netif_ad_mit_sdecay);
815
816 /* reset sampling statistics */
817 PKTCNTR_CLEAR(&mit->mit_sstats);
818
819 /* Perform mode transition, if necessary */
820 if (!net_timerisset(&mit->mit_mode_lasttime)) {
821 *(&mit->mit_mode_lasttime) = *(&now);
822 }
823
824 net_timersub(&now, &mit->mit_mode_lasttime, &delta);
825 /* CSTYLED */
826 if (net_timercmp(&delta, &mit->mit_mode_holdtime, <)) {
827 goto done;
828 }
829
830 SK_RDF(SK_VERB_NETIF_MIT, 2, "%s [%u]: pavg %u bavg %u "
831 "delay %llu usec", mit->mit_name, mit->mit_cfg_idx,
832 mit->mit_packets_avg, mit->mit_bytes_avg,
833 (mode == MIT_MODE_ADVANCED_STATIC ? 0 :
834 (mit->mit_tbl[mit->mit_cfg_idx].cfg_ival)));
835
836 if (mit->mit_packets_avg <= mit->mit_cfg->cfg_plowat &&
837 mit->mit_bytes_avg <= mit->mit_cfg->cfg_blowat) {
838 if (cfg_idx == 0) {
839 mode = MIT_MODE_ADVANCED_STATIC;
840 } else {
841 ASSERT(mode == MIT_MODE_ADVANCED_DYNAMIC);
842 --cfg_idx;
843 }
844 } else if (mit->mit_packets_avg >= mit->mit_cfg->cfg_phiwat &&
845 mit->mit_bytes_avg >= mit->mit_cfg->cfg_bhiwat) {
846 mode = MIT_MODE_ADVANCED_DYNAMIC;
847 if (cfg_idx < (mit->mit_cfg_idx_max - 1)) {
848 ++cfg_idx;
849 }
850 }
851
852 if (mode != mit->mit_mode || cfg_idx != mit->mit_cfg_idx) {
853 ASSERT(cfg_idx < mit->mit_cfg_idx_max);
854
855 SK_DF(SK_VERB_NETIF_MIT, "%s [%u->%u]: pavg %u "
856 "bavg %u [mode %u->%u, delay %llu->%llu usec]",
857 mit->mit_name, mit->mit_cfg_idx, cfg_idx,
858 mit->mit_packets_avg, mit->mit_bytes_avg,
859 mit->mit_mode, mode,
860 (mit->mit_mode == MIT_MODE_ADVANCED_STATIC ? 0 :
861 (mit->mit_cfg->cfg_ival)),
862 (mode == MIT_MODE_ADVANCED_STATIC ? 0 :
863 (mit->mit_tbl[cfg_idx].cfg_ival)));
864
865 mit->mit_mode = mode;
866 mit->mit_cfg_idx = cfg_idx;
867 mit->mit_cfg = &mit->mit_tbl[mit->mit_cfg_idx];
868 *(&mit->mit_mode_lasttime) = *(&now);
869 }
870 }
871
872done:
873 os_atomic_andnot(&mit->mit_flags, NETIF_MITF_SAMPLING, relaxed);
874}
875
876#if (DEVELOPMENT || DEBUG)
877static int
878sysctl_mit_mode_holdtime SYSCTL_HANDLER_ARGS
879{
880#pragma unused(arg1, arg2)
881 uint64_t q;
882 int err;
883
884 q = netif_mit_mode_holdtime;
885
886 err = sysctl_handle_quad(oidp, &q, 0, req);
887 if (err != 0 || req->newptr == USER_ADDR_NULL) {
888 return err;
889 }
890
891 if (q < NETIF_MIT_MODE_HOLDTIME_MIN) {
892 q = NETIF_MIT_MODE_HOLDTIME_MIN;
893 }
894
895 netif_mit_mode_holdtime = q;
896
897 return err;
898}
899#endif /* !DEVELOPMENT && !DEBUG */
900