1 | /* |
2 | * Copyright (c) 2015-2022 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <skywalk/os_skywalk_private.h> |
30 | #include <skywalk/nexus/netif/nx_netif.h> |
31 | #include <sys/kdebug.h> |
32 | #include <mach/thread_act.h> |
33 | #include <kern/thread.h> |
34 | #include <kern/sched_prim.h> |
35 | |
36 | extern kern_return_t thread_terminate(thread_t); |
37 | |
38 | static void nx_netif_mit_reset_interval(struct nx_netif_mit *); |
39 | static void nx_netif_mit_set_start_interval(struct nx_netif_mit *); |
40 | static uint32_t nx_netif_mit_update_interval(struct nx_netif_mit *, boolean_t); |
41 | static void nx_netif_mit_thread_func(void *, wait_result_t); |
42 | static void nx_netif_mit_thread_cont(void *, wait_result_t); |
43 | static void nx_netif_mit_s_thread_cont(void *, wait_result_t); |
44 | static void nx_netif_mit_stats(struct __kern_channel_ring *, uint64_t, |
45 | uint64_t); |
46 | |
47 | /* mitigation intervals in micro seconds */ |
48 | #define NETIF_BUSY_MIT_DELAY (100) |
49 | |
50 | static uint32_t netif_busy_mit_delay = NETIF_BUSY_MIT_DELAY; |
51 | |
52 | #define MIT_EWMA(old, new, gdecay, sdecay) do { \ |
53 | uint32_t _avg; \ |
54 | if ((_avg = (old)) > 0) { \ |
55 | uint32_t _d = ((new) > _avg) ? gdecay : sdecay; \ |
56 | _avg = (((_avg << (_d)) - _avg) + (new)) >> (_d); \ |
57 | } else { \ |
58 | _avg = (new); \ |
59 | } \ |
60 | (old) = _avg; \ |
61 | } while (0) |
62 | |
63 | /* |
64 | * Larger decay factor results in slower reaction. Each value is ilog2 |
65 | * of EWMA decay rate; one for growth and another for shrink. The two |
66 | * decay factors chosen are such that we reach quickly to shrink, and |
67 | * slowly to grow. Growth and shrink are relevant to the mitigation |
68 | * delay interval. |
69 | */ |
70 | #define NETIF_AD_MIT_GDECAY 3 /* ilog2(8) */ |
71 | static uint32_t netif_ad_mit_gdecay = NETIF_AD_MIT_GDECAY; |
72 | |
73 | #define NETIF_AD_MIT_SDECAY 2 /* ilog2(4) */ |
74 | static uint32_t netif_ad_mit_sdecay = NETIF_AD_MIT_SDECAY; |
75 | |
76 | #define NETIF_MIT_MODE_HOLDTIME_MIN (1ULL * 1000 * 1000) /* 1 ms */ |
77 | #define NETIF_MIT_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */ |
78 | static uint64_t netif_mit_mode_holdtime = NETIF_MIT_MODE_HOLDTIME; |
79 | |
80 | #define NETIF_MIT_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */ |
81 | #define NETIF_MIT_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */ |
82 | static uint64_t netif_mit_sample_holdtime = NETIF_MIT_SAMPLETIME; |
83 | |
84 | /* |
85 | * These numbers are based off 10ms netif_mit_sample_holdtime; |
86 | * changing the hold time will require recomputing them. |
87 | */ |
88 | #if (DEVELOPMENT || DEBUG) |
89 | static struct mit_cfg_tbl mit_cfg_tbl_native[] = { |
90 | #else /* !DEVELOPMENT && !DEBUG */ |
91 | static const struct mit_cfg_tbl mit_cfg_tbl_native[] = { |
92 | #endif /* !DEVELOPMENT && !DEBUG */ |
93 | { .cfg_plowat = 10, .cfg_phiwat = 60, .cfg_blowat = 4000, |
94 | .cfg_bhiwat = 6000, .cfg_ival = 100 }, |
95 | { .cfg_plowat = 40, .cfg_phiwat = 100, .cfg_blowat = 5000, |
96 | .cfg_bhiwat = 300000, .cfg_ival = 300 }, |
97 | { .cfg_plowat = 80, .cfg_phiwat = 200, .cfg_blowat = 100000, |
98 | .cfg_bhiwat = 300000, .cfg_ival = 500 }, |
99 | { .cfg_plowat = 220, .cfg_phiwat = 240, .cfg_blowat = 330000, |
100 | .cfg_bhiwat = 375000, .cfg_ival = 1000 }, |
101 | { .cfg_plowat = 250, .cfg_phiwat = 2000, .cfg_blowat = 450000, |
102 | .cfg_bhiwat = 30000000, .cfg_ival = 200 }, |
103 | }; |
104 | |
105 | #if (DEVELOPMENT || DEBUG) |
106 | static struct mit_cfg_tbl mit_cfg_tbl_compat[] = { |
107 | #else /* !DEVELOPMENT && !DEBUG */ |
108 | static const struct mit_cfg_tbl mit_cfg_tbl_compat[] = { |
109 | #endif /* !DEVELOPMENT && !DEBUG */ |
110 | { .cfg_plowat = 10, .cfg_phiwat = 60, .cfg_blowat = 4000, |
111 | .cfg_bhiwat = 6000, .cfg_ival = 100 }, |
112 | { .cfg_plowat = 40, .cfg_phiwat = 100, .cfg_blowat = 5000, |
113 | .cfg_bhiwat = 300000, .cfg_ival = 300 }, |
114 | { .cfg_plowat = 80, .cfg_phiwat = 200, .cfg_blowat = 100000, |
115 | .cfg_bhiwat = 300000, .cfg_ival = 500 }, |
116 | { .cfg_plowat = 220, .cfg_phiwat = 240, .cfg_blowat = 330000, |
117 | .cfg_bhiwat = 375000, .cfg_ival = 1000 }, |
118 | { .cfg_plowat = 250, .cfg_phiwat = 2000, .cfg_blowat = 450000, |
119 | .cfg_bhiwat = 3000000, .cfg_ival = 200 }, |
120 | }; |
121 | |
122 | #if (DEVELOPMENT || DEBUG) |
123 | static struct mit_cfg_tbl mit_cfg_tbl_native_cellular[] = { |
124 | #else /* !DEVELOPMENT && !DEBUG */ |
125 | static const struct mit_cfg_tbl mit_cfg_tbl_native_cellular[] = { |
126 | #endif /* !DEVELOPMENT && !DEBUG */ |
127 | { .cfg_plowat = 10, .cfg_phiwat = 40, .cfg_blowat = 4000, |
128 | .cfg_bhiwat = 6000, .cfg_ival = 300 }, |
129 | { .cfg_plowat = 20, .cfg_phiwat = 60, .cfg_blowat = 5000, |
130 | .cfg_bhiwat = 150000, .cfg_ival = 500 }, |
131 | { .cfg_plowat = 40, .cfg_phiwat = 80, .cfg_blowat = 80000, |
132 | .cfg_bhiwat = 200000, .cfg_ival = 700 }, |
133 | { .cfg_plowat = 60, .cfg_phiwat = 250, .cfg_blowat = 150000, |
134 | .cfg_bhiwat = 375000, .cfg_ival = 1500 }, |
135 | { .cfg_plowat = 260, .cfg_phiwat = 2000, .cfg_blowat = 450000, |
136 | .cfg_bhiwat = 3000000, .cfg_ival = 400 }, |
137 | }; |
138 | |
139 | #if (DEVELOPMENT || DEBUG) |
140 | static int sysctl_mit_mode_holdtime SYSCTL_HANDLER_ARGS; |
141 | SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, busy_mit_delay, |
142 | CTLFLAG_RW | CTLFLAG_LOCKED, &netif_busy_mit_delay, |
143 | NETIF_BUSY_MIT_DELAY, "" ); |
144 | SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, ad_mit_gdecay, |
145 | CTLFLAG_RW | CTLFLAG_LOCKED, &netif_ad_mit_gdecay, NETIF_AD_MIT_GDECAY, "" ); |
146 | SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, ad_mit_sdecay, |
147 | CTLFLAG_RW | CTLFLAG_LOCKED, &netif_ad_mit_sdecay, NETIF_AD_MIT_SDECAY, "" ); |
148 | SYSCTL_PROC(_kern_skywalk_netif, OID_AUTO, ad_mit_freeze, |
149 | CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &netif_mit_mode_holdtime, |
150 | NETIF_MIT_MODE_HOLDTIME, sysctl_mit_mode_holdtime, "Q" , "" ); |
151 | #endif /* !DEVELOPMENT && !DEBUG */ |
152 | |
153 | void |
154 | nx_netif_mit_init(struct nx_netif *nif, const struct ifnet *ifp, |
155 | struct nx_netif_mit *mit, struct __kern_channel_ring *kr, |
156 | boolean_t simple) |
157 | { |
158 | #pragma unused(nif) |
159 | thread_precedence_policy_data_t info; |
160 | __unused kern_return_t kret; |
161 | char oid_name[24]; |
162 | |
163 | _CASSERT(sizeof(mit_cfg_tbl_native_cellular) <= |
164 | sizeof(((struct nx_netif_mit *)0)->mit_tbl)); |
165 | |
166 | lck_spin_init(lck: &mit->mit_lock, grp: kr->ckr_qlock_group, attr: &channel_lock_attr); |
167 | |
168 | if (kr->ckr_tx == NR_TX) { |
169 | if (simple) { |
170 | (void) snprintf(mit->mit_name, count: sizeof(mit->mit_name), |
171 | "skywalk_%s_tx_%u" , ifp->if_xname, kr->ckr_ring_id); |
172 | } else { |
173 | (void) snprintf(mit->mit_name, count: sizeof(mit->mit_name), |
174 | "skywalk_mit_%s_tx_%u" , ifp->if_xname, |
175 | kr->ckr_ring_id); |
176 | } |
177 | (void) snprintf(oid_name, count: sizeof(oid_name), |
178 | "tx_%u" , kr->ckr_ring_id); |
179 | } else { |
180 | if (simple) { |
181 | (void) snprintf(mit->mit_name, count: sizeof(mit->mit_name), |
182 | "skywalk_%s_rx_%u" , ifp->if_xname, kr->ckr_ring_id); |
183 | } else { |
184 | (void) snprintf(mit->mit_name, count: sizeof(mit->mit_name), |
185 | "skywalk_mit_%s_rx_%u" , ifp->if_xname, |
186 | kr->ckr_ring_id); |
187 | } |
188 | (void) snprintf(oid_name, count: sizeof(oid_name), |
189 | "rx_%u" , kr->ckr_ring_id); |
190 | } |
191 | |
192 | mit->mit_ckr = kr; |
193 | mit->mit_ckr->ckr_mit = mit; |
194 | mit->mit_interval = 0; |
195 | mit->mit_netif_ifp = ifp; |
196 | |
197 | if ((ifp->if_eflags & IFEF_SKYWALK_NATIVE) && (ifp->if_family == |
198 | IFNET_FAMILY_CELLULAR)) { |
199 | bcopy(src: mit_cfg_tbl_native_cellular, |
200 | dst: (void *)__DECONST(struct mit_cfg_tbl *, mit->mit_tbl), |
201 | n: sizeof(mit_cfg_tbl_native_cellular)); |
202 | mit->mit_cfg_idx_max = sizeof(mit_cfg_tbl_native_cellular) / |
203 | sizeof(*mit->mit_cfg); |
204 | } else if (ifp->if_eflags & IFEF_SKYWALK_NATIVE) { |
205 | bcopy(src: mit_cfg_tbl_native, |
206 | dst: (void *)__DECONST(struct mit_cfg_tbl *, mit->mit_tbl), |
207 | n: sizeof(mit->mit_tbl)); |
208 | mit->mit_cfg_idx_max = sizeof(mit_cfg_tbl_native) / |
209 | sizeof(*mit->mit_cfg); |
210 | } else { |
211 | bcopy(src: mit_cfg_tbl_compat, |
212 | dst: (void *)__DECONST(struct mit_cfg_tbl *, mit->mit_tbl), |
213 | n: sizeof(mit->mit_tbl)); |
214 | mit->mit_cfg_idx_max = sizeof(mit_cfg_tbl_compat) / |
215 | sizeof(*mit->mit_cfg); |
216 | } |
217 | VERIFY(mit->mit_cfg_idx_max > 0); |
218 | VERIFY(mit->mit_cfg_idx_max <= NETIF_MIT_CFG_TBL_MAX_CFG); |
219 | |
220 | if (ifp->if_rx_mit_ival != 0) { |
221 | mit->mit_tbl[0].cfg_ival = ifp->if_rx_mit_ival; |
222 | SK_D("mit interval updated: %s cfg %u ival %u" , |
223 | mit->mit_name, 0, mit->mit_tbl[0].cfg_ival); |
224 | } |
225 | |
226 | net_timerclear(&mit->mit_mode_holdtime); |
227 | net_timerclear(&mit->mit_mode_lasttime); |
228 | net_timerclear(&mit->mit_sample_time); |
229 | net_timerclear(&mit->mit_sample_lasttime); |
230 | net_timerclear(&mit->mit_start_time); |
231 | |
232 | net_nsectimer(&netif_mit_mode_holdtime, &mit->mit_mode_holdtime); |
233 | net_nsectimer(&netif_mit_sample_holdtime, &mit->mit_sample_time); |
234 | |
235 | /* initialize mode and params */ |
236 | nx_netif_mit_reset_interval(mit); |
237 | VERIFY(mit->mit_cfg != NULL && mit->mit_cfg_idx < mit->mit_cfg_idx_max); |
238 | mit->mit_flags = NETIF_MITF_INITIALIZED; |
239 | if (simple) { |
240 | /* |
241 | * Simple mitigation where we don't collect any statistics |
242 | * at all, and therefore don't want to register the ring's |
243 | * ckr_netif_mit_stats() callback. |
244 | */ |
245 | mit->mit_flags |= NETIF_MITF_SIMPLE; |
246 | ASSERT(kr->ckr_netif_mit_stats == NULL); |
247 | } else { |
248 | /* |
249 | * Regular mitigation where we collect stats and use them |
250 | * for determining the delay between wakeups; initialize |
251 | * the ring's ckr_netif_mit_stats() callback. |
252 | */ |
253 | kr->ckr_netif_mit_stats = nx_netif_mit_stats; |
254 | } |
255 | |
256 | if (kernel_thread_start(continuation: nx_netif_mit_thread_func, parameter: mit, |
257 | new_thread: &mit->mit_thread) != KERN_SUCCESS) { |
258 | panic_plain("%s: can't create thread" , mit->mit_name); |
259 | /* NOTREACHED */ |
260 | __builtin_unreachable(); |
261 | } |
262 | /* this must not fail */ |
263 | VERIFY(mit->mit_thread != NULL); |
264 | |
265 | /* wait until nx_netif_mit_thread_func() is ready */ |
266 | MIT_SPIN_LOCK(mit); |
267 | while (!(mit->mit_flags & NETIF_MITF_READY)) { |
268 | (void) assert_wait(event: &mit->mit_thread, THREAD_UNINT); |
269 | MIT_SPIN_UNLOCK(mit); |
270 | (void) thread_block(THREAD_CONTINUE_NULL); |
271 | MIT_SPIN_LOCK(mit); |
272 | } |
273 | MIT_SPIN_UNLOCK(mit); |
274 | |
275 | bzero(s: &info, n: sizeof(info)); |
276 | info.importance = 0; |
277 | kret = thread_policy_set(thread: mit->mit_thread, THREAD_PRECEDENCE_POLICY, |
278 | policy_info: (thread_policy_t)&info, THREAD_PRECEDENCE_POLICY_COUNT); |
279 | ASSERT(kret == KERN_SUCCESS); |
280 | |
281 | #if (DEVELOPMENT || DEBUG) |
282 | /* register mit sysctl skoid */ |
283 | skoid_create(&mit->mit_skoid, SKOID_DNODE(nif->nif_skoid), oid_name, 0); |
284 | skoid_add_uint(&mit->mit_skoid, "interval" , CTLFLAG_RW, |
285 | &mit->mit_interval); |
286 | struct skoid *skoid = &mit->mit_skoid; |
287 | struct mit_cfg_tbl *t; |
288 | #define MIT_ADD_SKOID(_i) \ |
289 | t = &mit->mit_tbl[_i]; \ |
290 | skoid_add_uint(skoid, #_i"_plowat", CTLFLAG_RW, &t->cfg_plowat); \ |
291 | skoid_add_uint(skoid, #_i"_phiwat", CTLFLAG_RW, &t->cfg_phiwat); \ |
292 | skoid_add_uint(skoid, #_i"_blowat", CTLFLAG_RW, &t->cfg_blowat); \ |
293 | skoid_add_uint(skoid, #_i"_bhiwat", CTLFLAG_RW, &t->cfg_bhiwat);\ |
294 | skoid_add_uint(skoid, #_i"_ival", CTLFLAG_RW, &t->cfg_ival); |
295 | MIT_ADD_SKOID(0); |
296 | MIT_ADD_SKOID(1); |
297 | MIT_ADD_SKOID(2); |
298 | MIT_ADD_SKOID(3); |
299 | MIT_ADD_SKOID(4); |
300 | _CASSERT(NETIF_MIT_CFG_TBL_MAX_CFG == 5); |
301 | #endif /* !DEVELOPMENT && !DEBUG */ |
302 | } |
303 | |
304 | __attribute__((always_inline)) |
305 | static inline void |
306 | nx_netif_mit_reset_interval(struct nx_netif_mit *mit) |
307 | { |
308 | (void) nx_netif_mit_update_interval(mit, TRUE); |
309 | } |
310 | |
311 | __attribute__((always_inline)) |
312 | static inline void |
313 | nx_netif_mit_set_start_interval(struct nx_netif_mit *mit) |
314 | { |
315 | nanouptime(ts: &mit->mit_start_time); |
316 | } |
317 | |
318 | __attribute__((always_inline)) |
319 | static inline uint32_t |
320 | nx_netif_mit_update_interval(struct nx_netif_mit *mit, boolean_t reset) |
321 | { |
322 | struct timespec now, delta; |
323 | uint64_t r; |
324 | uint32_t i; |
325 | |
326 | nanouptime(ts: &now); |
327 | net_timersub(&now, &mit->mit_sample_lasttime, &delta); |
328 | |
329 | /* CSTYLED */ |
330 | if ((net_timercmp(&delta, &mit->mit_mode_holdtime, >)) || reset) { |
331 | mit_mode_t mode = (mit->mit_flags & NETIF_MITF_SIMPLE) ? |
332 | MIT_MODE_SIMPLE : MIT_MODE_ADVANCED_STATIC; |
333 | |
334 | /* if we haven't updated stats in a while, reset it back */ |
335 | SK_DF(SK_VERB_NETIF_MIT, "%s: resetting [mode %u->%u]" , |
336 | mit->mit_name, mit->mit_mode, mode); |
337 | |
338 | mit->mit_mode = mode; |
339 | mit->mit_cfg_idx = 0; |
340 | mit->mit_cfg = &mit->mit_tbl[mit->mit_cfg_idx]; |
341 | mit->mit_packets_avg = 0; |
342 | mit->mit_bytes_avg = 0; |
343 | } |
344 | |
345 | /* calculate work duration (since last start work time) */ |
346 | if (net_timerisset(&mit->mit_start_time)) { |
347 | net_timersub(&now, &mit->mit_start_time, &delta); |
348 | net_timerusec(&delta, &r); |
349 | } else { |
350 | r = 0; |
351 | } |
352 | |
353 | switch (mit->mit_mode) { |
354 | case MIT_MODE_SIMPLE: |
355 | i = 0; |
356 | break; |
357 | |
358 | case MIT_MODE_ADVANCED_STATIC: |
359 | i = mit->mit_interval; |
360 | break; |
361 | |
362 | case MIT_MODE_ADVANCED_DYNAMIC: |
363 | i = mit->mit_cfg->cfg_ival; |
364 | break; |
365 | } |
366 | |
367 | /* |
368 | * The idea here is to return the effective delay interval that |
369 | * causes each work phase to begin at the desired cadence, at |
370 | * the minimum. |
371 | */ |
372 | if (__probable(r != 0)) { |
373 | if (__probable(i > r)) { |
374 | i -= r; |
375 | } else { |
376 | /* bump up cfg_idx perhaps? */ |
377 | i = 0; |
378 | } |
379 | } |
380 | |
381 | return i; |
382 | } |
383 | |
384 | void |
385 | nx_netif_mit_cleanup(struct nx_netif_mit *mit) |
386 | { |
387 | if (mit->mit_thread != THREAD_NULL) { |
388 | ASSERT(mit->mit_flags & NETIF_MITF_INITIALIZED); |
389 | |
390 | /* signal thread to begin self-termination */ |
391 | MIT_SPIN_LOCK(mit); |
392 | mit->mit_flags |= NETIF_MITF_TERMINATING; |
393 | (void) thread_wakeup_thread(event: (caddr_t)&mit->mit_flags, |
394 | thread: mit->mit_thread); |
395 | MIT_SPIN_UNLOCK(mit); |
396 | |
397 | /* and wait for thread to terminate */ |
398 | MIT_SPIN_LOCK(mit); |
399 | while (!(mit->mit_flags & NETIF_MITF_TERMINATED)) { |
400 | (void) assert_wait(event: &mit->mit_flags, THREAD_UNINT); |
401 | MIT_SPIN_UNLOCK(mit); |
402 | (void) thread_block(THREAD_CONTINUE_NULL); |
403 | MIT_SPIN_LOCK(mit); |
404 | } |
405 | ASSERT(mit->mit_flags & NETIF_MITF_TERMINATED); |
406 | MIT_SPIN_UNLOCK(mit); |
407 | mit->mit_thread = THREAD_NULL; |
408 | } |
409 | ASSERT(mit->mit_thread == THREAD_NULL); |
410 | lck_spin_destroy(lck: &mit->mit_lock, grp: mit->mit_ckr->ckr_qlock_group); |
411 | |
412 | mit->mit_ckr->ckr_mit = NULL; |
413 | mit->mit_ckr = NULL; |
414 | mit->mit_netif_ifp = NULL; |
415 | mit->mit_flags &= ~NETIF_MITF_INITIALIZED; |
416 | |
417 | net_timerclear(&mit->mit_mode_holdtime); |
418 | net_timerclear(&mit->mit_mode_lasttime); |
419 | net_timerclear(&mit->mit_sample_time); |
420 | net_timerclear(&mit->mit_sample_lasttime); |
421 | net_timerclear(&mit->mit_start_time); |
422 | |
423 | #if (DEVELOPMENT || DEBUG) |
424 | skoid_destroy(&mit->mit_skoid); |
425 | #endif /* !DEVELOPMENT && !DEBUG */ |
426 | } |
427 | |
428 | int |
429 | nx_netif_mit_tx_intr(struct __kern_channel_ring *kr, struct proc *p, |
430 | uint32_t flags, uint32_t *work_done) |
431 | { |
432 | struct nexus_netif_adapter *nifna = |
433 | (struct nexus_netif_adapter *)KRNA(kr); |
434 | struct netif_stats *nifs = |
435 | &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats; |
436 | |
437 | ASSERT(kr->ckr_tx == NR_TX); |
438 | STATS_INC(nifs, NETIF_STATS_TX_IRQ); |
439 | |
440 | /* |
441 | * If mitigation is not enabled for this kring, we're done; otherwise, |
442 | * signal the thread that there is work to do, unless it's terminating. |
443 | */ |
444 | if (__probable(nifna->nifna_tx_mit == NULL)) { |
445 | (void) nx_netif_common_intr(kr, p, flags, work_done); |
446 | } else { |
447 | struct nx_netif_mit *mit = |
448 | &nifna->nifna_tx_mit[kr->ckr_ring_id]; |
449 | ASSERT(mit->mit_flags & NETIF_MITF_INITIALIZED); |
450 | MIT_SPIN_LOCK(mit); |
451 | mit->mit_requests++; |
452 | if (!(mit->mit_flags & (NETIF_MITF_RUNNING | |
453 | NETIF_MITF_TERMINATING | NETIF_MITF_TERMINATED))) { |
454 | (void) thread_wakeup_thread(event: (caddr_t)&mit->mit_flags, |
455 | thread: mit->mit_thread); |
456 | } |
457 | MIT_SPIN_UNLOCK(mit); |
458 | } |
459 | |
460 | return 0; |
461 | } |
462 | |
463 | int |
464 | nx_netif_mit_rx_intr(struct __kern_channel_ring *kr, struct proc *p, |
465 | uint32_t flags, uint32_t *work_done) |
466 | { |
467 | struct nexus_netif_adapter *nifna = |
468 | (struct nexus_netif_adapter *)KRNA(kr); |
469 | struct netif_stats *nifs = |
470 | &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats; |
471 | |
472 | KDBG((SK_KTRACE_NETIF_MIT_RX_INTR | DBG_FUNC_START), SK_KVA(kr)); |
473 | |
474 | ASSERT(kr->ckr_tx == NR_RX); |
475 | STATS_INC(nifs, NETIF_STATS_RX_IRQ); |
476 | |
477 | /* |
478 | * If mitigation is enabled for this kring, signal the thread that there |
479 | * is work to do, unless it's terminating. Otherwise, we're done. |
480 | */ |
481 | if (__improbable(nifna->nifna_rx_mit != NULL)) { |
482 | struct nx_netif_mit *mit = |
483 | &nifna->nifna_rx_mit[kr->ckr_ring_id]; |
484 | ASSERT(mit->mit_flags & NETIF_MITF_INITIALIZED); |
485 | MIT_SPIN_LOCK(mit); |
486 | mit->mit_requests++; |
487 | if (!(mit->mit_flags & (NETIF_MITF_RUNNING | |
488 | NETIF_MITF_TERMINATING | NETIF_MITF_TERMINATED))) { |
489 | (void) thread_wakeup_thread(event: (caddr_t)&mit->mit_flags, |
490 | thread: mit->mit_thread); |
491 | } |
492 | MIT_SPIN_UNLOCK(mit); |
493 | } else { |
494 | (void) nx_netif_common_intr(kr, p, flags, work_done); |
495 | } |
496 | |
497 | KDBG((SK_KTRACE_NETIF_MIT_RX_INTR | DBG_FUNC_END), SK_KVA(kr)); |
498 | |
499 | return 0; |
500 | } |
501 | |
502 | __attribute__((noreturn)) |
503 | static void |
504 | nx_netif_mit_thread_func(void *v, wait_result_t w) |
505 | { |
506 | #pragma unused(w) |
507 | struct nx_netif_mit *mit = v; |
508 | |
509 | ASSERT(mit->mit_thread == current_thread()); |
510 | thread_set_thread_name(th: current_thread(), name: mit->mit_name); |
511 | |
512 | MIT_SPIN_LOCK(mit); |
513 | VERIFY(!(mit->mit_flags & (NETIF_MITF_READY | NETIF_MITF_RUNNING))); |
514 | /* tell nx_netif_mit_init() to proceed */ |
515 | mit->mit_flags |= NETIF_MITF_READY; |
516 | wakeup(chan: (caddr_t)&mit->mit_thread); |
517 | (void) assert_wait(event: &mit->mit_flags, THREAD_UNINT); |
518 | MIT_SPIN_UNLOCK(mit); |
519 | if (mit->mit_flags & NETIF_MITF_SIMPLE) { |
520 | (void) thread_block_parameter(continuation: nx_netif_mit_s_thread_cont, parameter: mit); |
521 | } else { |
522 | (void) thread_block_parameter(continuation: nx_netif_mit_thread_cont, parameter: mit); |
523 | } |
524 | /* NOTREACHED */ |
525 | __builtin_unreachable(); |
526 | } |
527 | |
528 | /* |
529 | * Simple variant. |
530 | */ |
531 | __attribute__((noreturn)) |
532 | static void |
533 | nx_netif_mit_s_thread_cont(void *v, wait_result_t wres) |
534 | { |
535 | struct __kern_channel_ring *kr; |
536 | struct nx_netif_mit *mit = v; |
537 | struct netif_stats *nifs; |
538 | int irq_stat, error; |
539 | |
540 | ASSERT(mit->mit_flags & NETIF_MITF_SIMPLE); |
541 | kr = __DEVOLATILE(struct __kern_channel_ring *, mit->mit_ckr); |
542 | nifs = &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats; |
543 | irq_stat = (kr->ckr_tx == NR_TX) ? NETIF_STATS_TX_IRQ_MIT : |
544 | NETIF_STATS_RX_IRQ_MIT; |
545 | |
546 | MIT_SPIN_LOCK(mit); |
547 | if (__improbable(wres == THREAD_INTERRUPTED || |
548 | (mit->mit_flags & NETIF_MITF_TERMINATING))) { |
549 | goto terminate; |
550 | } |
551 | |
552 | ASSERT(!(mit->mit_flags & NETIF_MITF_TERMINATED)); |
553 | mit->mit_flags |= NETIF_MITF_RUNNING; |
554 | |
555 | /* |
556 | * Keep on servicing the ring until no more request. |
557 | */ |
558 | for (;;) { |
559 | uint32_t requests = mit->mit_requests; |
560 | |
561 | STATS_INC(nifs, irq_stat); |
562 | MIT_SPIN_UNLOCK(mit); |
563 | |
564 | error = nx_netif_common_intr(kr, kernproc, 0, NULL); |
565 | |
566 | /* |
567 | * We could get EBUSY here due to netif_inject_rx() holding |
568 | * the kring lock. EBUSY means the rx notify callback (which |
569 | * does the rx syncs..etc) wasn't called. If we don't retry |
570 | * nx_netif_common_intr() the driver will eventually stop |
571 | * notifying due to its queues being full. |
572 | */ |
573 | if (error == EBUSY) { |
574 | uint32_t ival = |
575 | MAX(netif_busy_mit_delay, NETIF_BUSY_MIT_DELAY); |
576 | |
577 | MIT_SPIN_LOCK(mit); |
578 | mit->mit_requests++; |
579 | MIT_SPIN_UNLOCK(mit); |
580 | delay(usec: ival); |
581 | } |
582 | |
583 | MIT_SPIN_LOCK(mit); |
584 | |
585 | if ((mit->mit_flags & NETIF_MITF_TERMINATING) != 0 || |
586 | requests == mit->mit_requests) { |
587 | mit->mit_requests = 0; |
588 | break; |
589 | } |
590 | } |
591 | |
592 | if (__probable((mit->mit_flags & NETIF_MITF_TERMINATING) == 0)) { |
593 | uint64_t deadline = TIMEOUT_WAIT_FOREVER; |
594 | |
595 | MIT_SPIN_LOCK_ASSERT_HELD(mit); |
596 | |
597 | if (kr->ckr_rate_limited) { |
598 | SK_DF(SK_VERB_NETIF_MIT, |
599 | "%s: posting wait deadline for MIT" , |
600 | mit->mit_name); |
601 | clock_interval_to_deadline(interval: 1, NSEC_PER_MSEC, |
602 | result: &deadline); |
603 | } |
604 | mit->mit_flags &= ~NETIF_MITF_RUNNING; |
605 | (void) assert_wait_deadline(event: &mit->mit_flags, |
606 | THREAD_UNINT, deadline); |
607 | MIT_SPIN_UNLOCK(mit); |
608 | (void) thread_block_parameter(continuation: nx_netif_mit_s_thread_cont, parameter: mit); |
609 | /* NOTREACHED */ |
610 | } else { |
611 | terminate: |
612 | MIT_SPIN_LOCK_ASSERT_HELD(mit); |
613 | |
614 | VERIFY(mit->mit_thread == current_thread()); |
615 | VERIFY((mit->mit_flags & NETIF_MITF_TERMINATING) != 0); |
616 | mit->mit_flags &= ~(NETIF_MITF_READY | NETIF_MITF_RUNNING | |
617 | NETIF_MITF_TERMINATING); |
618 | mit->mit_flags |= NETIF_MITF_TERMINATED; |
619 | wakeup(chan: (caddr_t)&mit->mit_flags); |
620 | MIT_SPIN_UNLOCK(mit); |
621 | |
622 | /* for the extra refcnt from kernel_thread_start() */ |
623 | thread_deallocate(thread: current_thread()); |
624 | /* this is the end */ |
625 | thread_terminate(current_thread()); |
626 | /* NOTREACHED */ |
627 | } |
628 | |
629 | /* must never get here */ |
630 | VERIFY(0); |
631 | /* NOTREACHED */ |
632 | __builtin_unreachable(); |
633 | } |
634 | |
635 | /* |
636 | * Advanced variant. |
637 | */ |
638 | __attribute__((noreturn)) |
639 | static void |
640 | nx_netif_mit_thread_cont(void *v, wait_result_t wres) |
641 | { |
642 | struct __kern_channel_ring *kr; |
643 | struct nx_netif_mit *mit = v; |
644 | struct netif_stats *nifs; |
645 | int irq_stat; |
646 | |
647 | ASSERT(!(mit->mit_flags & NETIF_MITF_SIMPLE)); |
648 | kr = __DEVOLATILE(struct __kern_channel_ring *, mit->mit_ckr); |
649 | nifs = &NX_NETIF_PRIVATE(KRNA(kr)->na_nx)->nif_stats; |
650 | irq_stat = (kr->ckr_tx == NR_TX) ? NETIF_STATS_TX_IRQ_MIT : |
651 | NETIF_STATS_RX_IRQ_MIT; |
652 | |
653 | MIT_SPIN_LOCK(mit); |
654 | if (__improbable(wres == THREAD_INTERRUPTED || |
655 | (mit->mit_flags & NETIF_MITF_TERMINATING))) { |
656 | goto terminate; |
657 | } |
658 | |
659 | ASSERT(!(mit->mit_flags & NETIF_MITF_TERMINATED)); |
660 | mit->mit_flags |= NETIF_MITF_RUNNING; |
661 | |
662 | /* |
663 | * Keep on servicing the ring until no more request. |
664 | */ |
665 | for (;;) { |
666 | uint32_t requests = mit->mit_requests; |
667 | uint32_t ival; |
668 | int error = 0; |
669 | |
670 | STATS_INC(nifs, irq_stat); |
671 | MIT_SPIN_UNLOCK(mit); |
672 | |
673 | /* |
674 | * Notify the ring and trigger packets fan-out; |
675 | * bracket the call with timestamps to compute |
676 | * our effective mitigation/delay interval below. |
677 | */ |
678 | nx_netif_mit_set_start_interval(mit); |
679 | error = nx_netif_common_intr(kr, kernproc, 0, NULL); |
680 | ival = nx_netif_mit_update_interval(mit, FALSE); |
681 | |
682 | /* |
683 | * If mitigation interval is non-zero (for TX/RX) |
684 | * then we always introduce an artificial delay |
685 | * for that amount of time. Otherwise, if we get |
686 | * EBUSY, then kr_enter() has another thread that |
687 | * is working on it, and so we should wait a bit. |
688 | */ |
689 | if (ival != 0 || error == EBUSY) { |
690 | if (error == EBUSY) { |
691 | ival = MAX(netif_busy_mit_delay, |
692 | NETIF_BUSY_MIT_DELAY); |
693 | MIT_SPIN_LOCK(mit); |
694 | mit->mit_requests++; |
695 | MIT_SPIN_UNLOCK(mit); |
696 | } |
697 | delay(usec: ival); |
698 | } |
699 | |
700 | MIT_SPIN_LOCK(mit); |
701 | |
702 | if ((mit->mit_flags & NETIF_MITF_TERMINATING) != 0 || |
703 | requests == mit->mit_requests) { |
704 | mit->mit_requests = 0; |
705 | break; |
706 | } |
707 | } |
708 | |
709 | if (__probable((mit->mit_flags & NETIF_MITF_TERMINATING) == 0)) { |
710 | uint64_t deadline = TIMEOUT_WAIT_FOREVER; |
711 | |
712 | MIT_SPIN_LOCK_ASSERT_HELD(mit); |
713 | |
714 | if (kr->ckr_rate_limited) { |
715 | SK_DF(SK_VERB_NETIF_MIT, |
716 | "%s: posting wait deadline for MIT" , |
717 | mit->mit_name); |
718 | clock_interval_to_deadline(interval: 1, NSEC_PER_MSEC, |
719 | result: &deadline); |
720 | } |
721 | mit->mit_flags &= ~NETIF_MITF_RUNNING; |
722 | (void) assert_wait_deadline(event: &mit->mit_flags, |
723 | THREAD_UNINT, deadline); |
724 | MIT_SPIN_UNLOCK(mit); |
725 | (void) thread_block_parameter(continuation: nx_netif_mit_thread_cont, parameter: mit); |
726 | /* NOTREACHED */ |
727 | } else { |
728 | terminate: |
729 | MIT_SPIN_LOCK_ASSERT_HELD(mit); |
730 | |
731 | VERIFY(mit->mit_thread == current_thread()); |
732 | VERIFY((mit->mit_flags & NETIF_MITF_TERMINATING) != 0); |
733 | mit->mit_flags &= ~(NETIF_MITF_READY | NETIF_MITF_RUNNING | |
734 | NETIF_MITF_TERMINATING); |
735 | mit->mit_flags |= NETIF_MITF_TERMINATED; |
736 | wakeup(chan: (caddr_t)&mit->mit_flags); |
737 | MIT_SPIN_UNLOCK(mit); |
738 | |
739 | /* for the extra refcnt from kernel_thread_start() */ |
740 | thread_deallocate(thread: current_thread()); |
741 | /* this is the end */ |
742 | thread_terminate(current_thread()); |
743 | /* NOTREACHED */ |
744 | } |
745 | |
746 | /* must never get here */ |
747 | VERIFY(0); |
748 | /* NOTREACHED */ |
749 | __builtin_unreachable(); |
750 | } |
751 | |
752 | static void |
753 | nx_netif_mit_stats(struct __kern_channel_ring *kr, uint64_t pkts, |
754 | uint64_t bytes) |
755 | { |
756 | struct nx_netif_mit *mit = kr->ckr_mit; |
757 | struct timespec now, delta; |
758 | mit_mode_t mode; |
759 | uint32_t cfg_idx; |
760 | |
761 | ASSERT(mit != NULL && !(mit->mit_flags & NETIF_MITF_SIMPLE)); |
762 | |
763 | if ((os_atomic_or_orig(&mit->mit_flags, NETIF_MITF_SAMPLING, relaxed) & |
764 | NETIF_MITF_SAMPLING) != 0) { |
765 | return; |
766 | } |
767 | |
768 | mode = mit->mit_mode; |
769 | cfg_idx = mit->mit_cfg_idx; |
770 | |
771 | nanouptime(ts: &now); |
772 | if (!net_timerisset(&mit->mit_sample_lasttime)) { |
773 | *(&mit->mit_sample_lasttime) = *(&now); |
774 | } |
775 | |
776 | net_timersub(&now, &mit->mit_sample_lasttime, &delta); |
777 | if (net_timerisset(&mit->mit_sample_time)) { |
778 | uint32_t ptot, btot; |
779 | |
780 | /* accumulate statistics for current sampling */ |
781 | PKTCNTR_ADD(&mit->mit_sstats, pkts, bytes); |
782 | |
783 | /* CSTYLED */ |
784 | if (net_timercmp(&delta, &mit->mit_sample_time, <)) { |
785 | goto done; |
786 | } |
787 | |
788 | *(&mit->mit_sample_lasttime) = *(&now); |
789 | |
790 | /* calculate min/max of bytes */ |
791 | btot = (uint32_t)mit->mit_sstats.bytes; |
792 | if (mit->mit_bytes_min == 0 || mit->mit_bytes_min > btot) { |
793 | mit->mit_bytes_min = btot; |
794 | } |
795 | if (btot > mit->mit_bytes_max) { |
796 | mit->mit_bytes_max = btot; |
797 | } |
798 | |
799 | /* calculate EWMA of bytes */ |
800 | MIT_EWMA(mit->mit_bytes_avg, btot, |
801 | netif_ad_mit_gdecay, netif_ad_mit_sdecay); |
802 | |
803 | /* calculate min/max of packets */ |
804 | ptot = (uint32_t)mit->mit_sstats.packets; |
805 | if (mit->mit_packets_min == 0 || mit->mit_packets_min > ptot) { |
806 | mit->mit_packets_min = ptot; |
807 | } |
808 | if (ptot > mit->mit_packets_max) { |
809 | mit->mit_packets_max = ptot; |
810 | } |
811 | |
812 | /* calculate EWMA of packets */ |
813 | MIT_EWMA(mit->mit_packets_avg, ptot, |
814 | netif_ad_mit_gdecay, netif_ad_mit_sdecay); |
815 | |
816 | /* reset sampling statistics */ |
817 | PKTCNTR_CLEAR(&mit->mit_sstats); |
818 | |
819 | /* Perform mode transition, if necessary */ |
820 | if (!net_timerisset(&mit->mit_mode_lasttime)) { |
821 | *(&mit->mit_mode_lasttime) = *(&now); |
822 | } |
823 | |
824 | net_timersub(&now, &mit->mit_mode_lasttime, &delta); |
825 | /* CSTYLED */ |
826 | if (net_timercmp(&delta, &mit->mit_mode_holdtime, <)) { |
827 | goto done; |
828 | } |
829 | |
830 | SK_RDF(SK_VERB_NETIF_MIT, 2, "%s [%u]: pavg %u bavg %u " |
831 | "delay %llu usec" , mit->mit_name, mit->mit_cfg_idx, |
832 | mit->mit_packets_avg, mit->mit_bytes_avg, |
833 | (mode == MIT_MODE_ADVANCED_STATIC ? 0 : |
834 | (mit->mit_tbl[mit->mit_cfg_idx].cfg_ival))); |
835 | |
836 | if (mit->mit_packets_avg <= mit->mit_cfg->cfg_plowat && |
837 | mit->mit_bytes_avg <= mit->mit_cfg->cfg_blowat) { |
838 | if (cfg_idx == 0) { |
839 | mode = MIT_MODE_ADVANCED_STATIC; |
840 | } else { |
841 | ASSERT(mode == MIT_MODE_ADVANCED_DYNAMIC); |
842 | --cfg_idx; |
843 | } |
844 | } else if (mit->mit_packets_avg >= mit->mit_cfg->cfg_phiwat && |
845 | mit->mit_bytes_avg >= mit->mit_cfg->cfg_bhiwat) { |
846 | mode = MIT_MODE_ADVANCED_DYNAMIC; |
847 | if (cfg_idx < (mit->mit_cfg_idx_max - 1)) { |
848 | ++cfg_idx; |
849 | } |
850 | } |
851 | |
852 | if (mode != mit->mit_mode || cfg_idx != mit->mit_cfg_idx) { |
853 | ASSERT(cfg_idx < mit->mit_cfg_idx_max); |
854 | |
855 | SK_DF(SK_VERB_NETIF_MIT, "%s [%u->%u]: pavg %u " |
856 | "bavg %u [mode %u->%u, delay %llu->%llu usec]" , |
857 | mit->mit_name, mit->mit_cfg_idx, cfg_idx, |
858 | mit->mit_packets_avg, mit->mit_bytes_avg, |
859 | mit->mit_mode, mode, |
860 | (mit->mit_mode == MIT_MODE_ADVANCED_STATIC ? 0 : |
861 | (mit->mit_cfg->cfg_ival)), |
862 | (mode == MIT_MODE_ADVANCED_STATIC ? 0 : |
863 | (mit->mit_tbl[cfg_idx].cfg_ival))); |
864 | |
865 | mit->mit_mode = mode; |
866 | mit->mit_cfg_idx = cfg_idx; |
867 | mit->mit_cfg = &mit->mit_tbl[mit->mit_cfg_idx]; |
868 | *(&mit->mit_mode_lasttime) = *(&now); |
869 | } |
870 | } |
871 | |
872 | done: |
873 | os_atomic_andnot(&mit->mit_flags, NETIF_MITF_SAMPLING, relaxed); |
874 | } |
875 | |
876 | #if (DEVELOPMENT || DEBUG) |
877 | static int |
878 | sysctl_mit_mode_holdtime SYSCTL_HANDLER_ARGS |
879 | { |
880 | #pragma unused(arg1, arg2) |
881 | uint64_t q; |
882 | int err; |
883 | |
884 | q = netif_mit_mode_holdtime; |
885 | |
886 | err = sysctl_handle_quad(oidp, &q, 0, req); |
887 | if (err != 0 || req->newptr == USER_ADDR_NULL) { |
888 | return err; |
889 | } |
890 | |
891 | if (q < NETIF_MIT_MODE_HOLDTIME_MIN) { |
892 | q = NETIF_MIT_MODE_HOLDTIME_MIN; |
893 | } |
894 | |
895 | netif_mit_mode_holdtime = q; |
896 | |
897 | return err; |
898 | } |
899 | #endif /* !DEVELOPMENT && !DEBUG */ |
900 | |