1/*
2 * Copyright (c) 2019-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <skywalk/os_skywalk_private.h>
30#include <skywalk/nexus/netif/nx_netif.h>
31#include <sys/kdebug.h>
32#include <kern/thread.h>
33#include <kern/sched_prim.h>
34
35extern kern_return_t thread_terminate(thread_t);
36
37#define NETIF_POLL_EWMA(old, new, decay) do { \
38 uint32_t _avg; \
39 if ((_avg = (old)) > 0) \
40 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
41 else \
42 _avg = (new); \
43 (old) = _avg; \
44} while (0)
45
46/* rate limit debug messages */
47struct timespec netif_poll_dbgrate = { .tv_sec = 1, .tv_nsec = 0 };
48
49static inline void
50nx_netif_rxpoll_set_mode(struct ifnet *ifp, ifnet_model_t mode)
51{
52 errno_t err;
53 uint64_t ival;
54 struct timespec ts;
55 struct ifnet_model_params p = { .model = mode, .reserved = { 0 } };
56
57 if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
58 ival = IF_RXPOLL_INTERVALTIME_MIN;
59 }
60
61 if ((err = ((*ifp->if_input_ctl)(ifp, IFNET_CTL_SET_INPUT_MODEL,
62 sizeof(p), &p))) != 0) {
63 SK_ERR("%s: error setting polling mode to %s (%d)",
64 if_name(ifp), (mode == IFNET_MODEL_INPUT_POLL_ON) ?
65 "ON" : "OFF", err);
66 }
67
68 switch (mode) {
69 case IFNET_MODEL_INPUT_POLL_OFF:
70 ifnet_set_poll_cycle(ifp, NULL);
71 ifp->if_rxpoll_offreq++;
72 if (err != 0) {
73 ifp->if_rxpoll_offerr++;
74 }
75 break;
76
77 case IFNET_MODEL_INPUT_POLL_ON:
78 net_nsectimer(&ival, &ts);
79 ifnet_set_poll_cycle(ifp, &ts);
80 ifp->if_rxpoll_onreq++;
81 if (err != 0) {
82 ifp->if_rxpoll_onerr++;
83 }
84 break;
85
86 default:
87 VERIFY(0);
88 /* NOTREACHED */
89 __builtin_unreachable();
90 }
91}
92
93/*
94 * Updates the input poll statistics and determines the next mode based
95 * on the configured thresholds.
96 */
97static inline void
98netif_rxpoll_compat_update_rxpoll_stats(struct ifnet *ifp,
99 struct ifnet_stat_increment_param *s)
100{
101 uint32_t poll_thresh = 0, poll_ival = 0;
102 uint32_t m_cnt, m_size, poll_req = 0;
103 struct timespec now, delta;
104 ifnet_model_t mode;
105 uint64_t ival;
106
107 ASSERT(net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL));
108 LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_NOTOWNED);
109
110 /* total packets and bytes passed in by driver */
111 m_cnt = s->packets_in;
112 m_size = s->bytes_in;
113
114 lck_mtx_lock_spin(lck: &ifp->if_poll_lock);
115 if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) {
116 ival = IF_RXPOLL_INTERVALTIME_MIN;
117 }
118 /* Link parameters changed? */
119 if (ifp->if_poll_update != 0) {
120 ifp->if_poll_update = 0;
121 (void) netif_rxpoll_set_params(ifp, NULL, TRUE);
122 }
123
124 /* Current operating mode */
125 mode = ifp->if_poll_mode;
126
127 nanouptime(ts: &now);
128 if (!net_timerisset(&ifp->if_poll_sample_lasttime)) {
129 *(&ifp->if_poll_sample_lasttime) = now;
130 }
131
132 net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta);
133 if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) {
134 uint32_t ptot, btot;
135
136 /* Accumulate statistics for current sampling */
137 PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size);
138
139 if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) {
140 goto skip;
141 }
142 *(&ifp->if_poll_sample_lasttime) = now;
143
144 /* Calculate min/max of inbound bytes */
145 btot = (uint32_t)ifp->if_poll_sstats.bytes;
146 if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) {
147 ifp->if_rxpoll_bmin = btot;
148 }
149 if (btot > ifp->if_rxpoll_bmax) {
150 ifp->if_rxpoll_bmax = btot;
151 }
152
153 /* Calculate EWMA of inbound bytes */
154 NETIF_POLL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay);
155
156 /* Calculate min/max of inbound packets */
157 ptot = (uint32_t)ifp->if_poll_sstats.packets;
158 if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) {
159 ifp->if_rxpoll_pmin = ptot;
160 }
161 if (ptot > ifp->if_rxpoll_pmax) {
162 ifp->if_rxpoll_pmax = ptot;
163 }
164
165 /* Calculate EWMA of inbound packets */
166 NETIF_POLL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay);
167
168 /* Reset sampling statistics */
169 PKTCNTR_CLEAR(&ifp->if_poll_sstats);
170
171#if (SK_LOG && (DEVELOPMENT || DEBUG))
172 if (__improbable(sk_verbose & SK_VERB_NETIF_POLL)) {
173 if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) {
174 *(&ifp->if_poll_dbg_lasttime) = *(&now);
175 }
176 net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta);
177 if (net_timercmp(&delta, &netif_poll_dbgrate, >=)) {
178 *(&ifp->if_poll_dbg_lasttime) = *(&now);
179 SK_DF(SK_VERB_NETIF_POLL,
180 "%s: [%s] pkts avg %d max %d "
181 "limits [%d/%d], bytes avg %d "
182 "limits [%d/%d]", if_name(ifp),
183 (ifp->if_poll_mode ==
184 IFNET_MODEL_INPUT_POLL_ON) ?
185 "ON" : "OFF", ifp->if_rxpoll_pavg,
186 ifp->if_rxpoll_pmax,
187 ifp->if_rxpoll_plowat,
188 ifp->if_rxpoll_phiwat,
189 ifp->if_rxpoll_bavg,
190 ifp->if_rxpoll_blowat,
191 ifp->if_rxpoll_bhiwat);
192 }
193 }
194#endif /* (SK_LOG && (DEVELOPMENT || DEBUG)) */
195
196 /* Perform mode transition, if necessary */
197 if (!net_timerisset(&ifp->if_poll_mode_lasttime)) {
198 *(&ifp->if_poll_mode_lasttime) = *(&now);
199 }
200
201 net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta);
202 if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) {
203 goto skip;
204 }
205
206 if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat &&
207 ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat &&
208 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) {
209 mode = IFNET_MODEL_INPUT_POLL_OFF;
210 } else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat &&
211 ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat &&
212 ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) {
213 mode = IFNET_MODEL_INPUT_POLL_ON;
214 }
215
216 if (mode != ifp->if_poll_mode) {
217 ifp->if_poll_mode = mode;
218 *(&ifp->if_poll_mode_lasttime) = *(&now);
219 poll_req++;
220 }
221 }
222skip:
223 /* update rxpoll stats */
224 if (ifp->if_poll_tstats.packets != 0) {
225 ifp->if_poll_pstats.ifi_poll_packets +=
226 ifp->if_poll_tstats.packets;
227 ifp->if_poll_tstats.packets = 0;
228 }
229 if (ifp->if_poll_tstats.bytes != 0) {
230 ifp->if_poll_pstats.ifi_poll_bytes +=
231 ifp->if_poll_tstats.bytes;
232 ifp->if_poll_tstats.bytes = 0;
233 }
234
235 lck_mtx_unlock(lck: &ifp->if_poll_lock);
236 /*
237 * If there's a mode change, perform a downcall to the driver
238 * for the new mode. This function is called from the poller thread
239 * which holds a reference on the ifnet.
240 */
241 if (poll_req != 0) {
242 nx_netif_rxpoll_set_mode(ifp, mode);
243 }
244
245 /* Signal the poller thread to do work if required */
246 if (mode == IFNET_MODEL_INPUT_POLL_ON && m_cnt > 1 &&
247 (poll_ival = if_rxpoll_interval_pkts) > 0) {
248 poll_thresh = m_cnt;
249 }
250 if (poll_thresh != 0 && poll_ival > 0 &&
251 (--poll_thresh % poll_ival) == 0) {
252 lck_mtx_lock_spin(lck: &ifp->if_poll_lock);
253 ifp->if_poll_req++;
254 lck_mtx_unlock(lck: &ifp->if_poll_lock);
255 }
256}
257
258/*
259 * Must be called on an attached ifnet (caller is expected to check.)
260 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
261 */
262errno_t
263netif_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
264 boolean_t locked)
265{
266 errno_t err;
267
268 VERIFY(ifp != NULL);
269 if ((ifp->if_eflags & IFEF_RXPOLL) == 0) {
270 return ENXIO;
271 }
272 err = dlil_rxpoll_validate_params(p);
273 if (err != 0) {
274 return err;
275 }
276
277 if (!locked) {
278 lck_mtx_lock(lck: &ifp->if_poll_lock);
279 }
280 LCK_MTX_ASSERT(&ifp->if_poll_lock, LCK_MTX_ASSERT_OWNED);
281 /*
282 * Normally, we'd reset the parameters to the auto-tuned values
283 * if the the poller thread detects a change in link rate. If the
284 * driver provides its own parameters right after a link rate
285 * changes, but before the input thread gets to run, we want to
286 * make sure to keep the driver's values. Clearing if_poll_update
287 * will achieve that.
288 */
289 if (p != NULL && !locked && ifp->if_poll_update != 0) {
290 ifp->if_poll_update = 0;
291 }
292 dlil_rxpoll_update_params(ifp, p);
293 if (!locked) {
294 lck_mtx_unlock(lck: &ifp->if_poll_lock);
295 }
296 return 0;
297}
298
299static inline void
300netif_rxpoll_poll_driver(struct ifnet *ifp, uint32_t m_lim,
301 struct ifnet_stat_increment_param *s, struct timespec *start_time,
302 struct timespec *poll_duration)
303{
304 struct mbuf *m_head = NULL, *m_tail = NULL;
305 uint32_t m_cnt = 0, m_totlen = 0;
306 struct timespec now;
307
308 /* invoke the driver's input poll routine */
309 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail, &m_cnt,
310 &m_totlen));
311 VERIFY((m_cnt > 0) || ((m_head == NULL) && (m_tail == NULL)));
312
313 s->packets_in = m_cnt;
314 s->bytes_in = m_totlen;
315 /*
316 * Bracket the work done with timestamps to compute the effective
317 * poll interval.
318 */
319 nanouptime(ts: start_time);
320 (void) ifnet_input_poll(ifp, m_head, m_tail,
321 (m_head != NULL) ? s : NULL);
322 nanouptime(ts: &now);
323 net_timersub(&now, start_time, poll_duration);
324
325 SK_DF(SK_VERB_NETIF_POLL, "%s: polled %d pkts, pkts avg %d max %d, "
326 "wreq avg %d, bytes avg %d", if_name(ifp), m_cnt,
327 ifp->if_rxpoll_pavg, ifp->if_rxpoll_pmax, ifp->if_rxpoll_wavg,
328 ifp->if_rxpoll_bavg);
329}
330
331static inline void
332netif_rxpoll_process_interrupt(struct ifnet *ifp, proc_t p,
333 struct ifnet_stat_increment_param *s, struct nx_mbq *rcvq)
334{
335 struct nexus_adapter *na = &NA(ifp)->nifna_up;
336
337 nx_mbq_lock_spin(q: rcvq);
338 s->packets_in = nx_mbq_len(q: rcvq);
339 s->bytes_in = (uint32_t)nx_mbq_size(q: rcvq);
340 nx_mbq_unlock(q: rcvq);
341 (void) nx_netif_mit_rx_intr((NAKR(na, t: NR_RX)), p, 0, NULL);
342}
343
344__attribute__((noreturn))
345static void
346netif_rxpoll_compat_thread_cont(void *v, wait_result_t wres)
347{
348 struct ifnet *ifp = v;
349 struct timespec *ts = NULL;
350 struct timespec start_time, poll_intvl, poll_duration;
351 struct ifnet_stat_increment_param s;
352
353 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
354 bzero(s: &s, n: sizeof(s));
355 net_timerclear(&start_time);
356
357 lck_mtx_lock_spin(lck: &ifp->if_poll_lock);
358 if (__improbable(wres == THREAD_INTERRUPTED ||
359 (ifp->if_poll_flags & IF_POLLF_TERMINATING) != 0)) {
360 goto terminate;
361 }
362
363 ifp->if_poll_flags |= IF_POLLF_RUNNING;
364 /*
365 * Keep on servicing until no more request.
366 */
367
368 for (;;) {
369 uint16_t req = ifp->if_poll_req;
370 struct nexus_adapter *na = &NA(ifp)->nifna_up;
371 struct __kern_channel_ring *kring = &na->na_rx_rings[0];
372 struct nx_mbq *rxq = &kring->ckr_rx_queue;
373 uint32_t m_lim;
374 boolean_t poll, poll_again = false;
375
376 m_lim = (ifp->if_rxpoll_plim != 0) ? ifp->if_rxpoll_plim :
377 MAX((nx_mbq_limit(rxq)), (ifp->if_rxpoll_phiwat << 2));
378 poll = (ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_ON);
379 lck_mtx_unlock(lck: &ifp->if_poll_lock);
380
381 net_timerclear(&poll_duration);
382
383 /* If no longer attached, there's nothing to do;
384 * else hold an IO refcnt to prevent the interface
385 * from being detached (will be released below.)
386 */
387 if (!ifnet_is_attached(ifp, refio: 1)) {
388 lck_mtx_lock_spin(lck: &ifp->if_poll_lock);
389 break;
390 }
391
392 if (poll) {
393 netif_rxpoll_poll_driver(ifp, m_lim, s: &s, start_time: &start_time,
394 poll_duration: &poll_duration);
395 /*
396 * if the polled duration is more than the poll
397 * interval, then poll again to catch up.
398 */
399 ASSERT(net_timerisset(&ifp->if_poll_cycle));
400 if (net_timercmp(&poll_duration, &ifp->if_poll_cycle,
401 >=)) {
402 poll_again = true;
403 }
404 } else {
405 netif_rxpoll_process_interrupt(ifp, p: kernproc, s: &s, rcvq: rxq);
406 net_timerclear(&start_time);
407 }
408
409 netif_rxpoll_compat_update_rxpoll_stats(ifp, s: &s);
410 /* Release the io ref count */
411 ifnet_decr_iorefcnt(ifp);
412
413 lck_mtx_lock_spin(lck: &ifp->if_poll_lock);
414
415 /* if signalled to terminate */
416 if (__improbable((ifp->if_poll_flags & IF_POLLF_TERMINATING)
417 != 0)) {
418 break;
419 }
420 /* if there's no pending request, we're done. */
421 if (!poll_again && (req == ifp->if_poll_req)) {
422 break;
423 }
424 }
425
426 ifp->if_poll_req = 0;
427 ifp->if_poll_flags &= ~IF_POLLF_RUNNING;
428 /*
429 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
430 * until ifnet_poll() is called again.
431 */
432 /* calculate work duration (since last start work time) */
433 if (ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_ON) {
434 ASSERT(net_timerisset(&ifp->if_poll_cycle));
435 ASSERT(net_timercmp(&poll_duration, &ifp->if_poll_cycle, <));
436 net_timersub(&ifp->if_poll_cycle, &poll_duration, &poll_intvl);
437 ASSERT(net_timerisset(&poll_intvl));
438 ts = &poll_intvl;
439 } else {
440 ts = NULL;
441 }
442
443 if (__probable((ifp->if_poll_flags & IF_POLLF_TERMINATING) == 0)) {
444 uint64_t deadline = TIMEOUT_WAIT_FOREVER;
445
446 if (ts != NULL) {
447 uint64_t interval;
448
449 _CASSERT(IF_RXPOLL_INTERVALTIME_MIN >= (1ULL * 1000));
450 net_timerusec(ts, &interval);
451 ASSERT(interval <= UINT32_MAX);
452 clock_interval_to_deadline(interval: (uint32_t)interval, NSEC_PER_USEC,
453 result: &deadline);
454 }
455
456 (void) assert_wait_deadline(event: &ifp->if_poll_thread,
457 THREAD_UNINT, deadline);
458 lck_mtx_unlock(lck: &ifp->if_poll_lock);
459 (void) thread_block_parameter(continuation: netif_rxpoll_compat_thread_cont,
460 parameter: ifp);
461 /* NOTREACHED */
462 } else {
463terminate:
464 /* interface is detached (maybe while asleep)? */
465 ifnet_set_poll_cycle(ifp, NULL);
466 ifp->if_poll_flags &= ~IF_POLLF_READY;
467
468 /* clear if_poll_thread to allow termination to continue */
469 ASSERT(ifp->if_poll_thread != THREAD_NULL);
470 ifp->if_poll_thread = THREAD_NULL;
471 wakeup(chan: (caddr_t)&ifp->if_poll_thread);
472 lck_mtx_unlock(lck: &ifp->if_poll_lock);
473 SK_DF(SK_VERB_NETIF_POLL, "%s: poller thread terminated",
474 if_name(ifp));
475 /* for the extra refcnt from kernel_thread_start() */
476 thread_deallocate(thread: current_thread());
477 /* this is the end */
478 thread_terminate(current_thread());
479 /* NOTREACHED */
480 }
481
482 VERIFY(0);
483 /* NOTREACHED */
484 __builtin_unreachable();
485}
486
487__attribute__((noreturn))
488void
489netif_rxpoll_compat_thread_func(void *v, wait_result_t w)
490{
491#pragma unused(w)
492 char thread_name[MAXTHREADNAMESIZE];
493 struct ifnet *ifp = v;
494
495 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
496 VERIFY(current_thread() == ifp->if_poll_thread);
497
498 /* construct the name for this thread, and then apply it */
499 bzero(s: thread_name, n: sizeof(thread_name));
500 (void) snprintf(thread_name, count: sizeof(thread_name),
501 "skywalk_netif_poller_%s", ifp->if_xname);
502 thread_set_thread_name(th: ifp->if_poll_thread, name: thread_name);
503
504 lck_mtx_lock(lck: &ifp->if_poll_lock);
505 VERIFY(!(ifp->if_poll_flags & (IF_POLLF_READY | IF_POLLF_RUNNING)));
506 /* tell nx_netif_compat_na_activate() to proceed */
507 ifp->if_poll_flags |= IF_POLLF_READY;
508 wakeup(chan: (caddr_t)&ifp->if_poll_flags);
509 (void) assert_wait(event: &ifp->if_poll_thread, THREAD_UNINT);
510 lck_mtx_unlock(lck: &ifp->if_poll_lock);
511 (void) thread_block_parameter(continuation: netif_rxpoll_compat_thread_cont, parameter: ifp);
512 /* NOTREACHED */
513 __builtin_unreachable();
514}
515