1/*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 */
53
54#include <skywalk/os_skywalk_private.h>
55#include <skywalk/nexus/netif/nx_netif.h>
56#include <skywalk/nexus/flowswitch/nx_flowswitch.h>
57#include <mach/thread_act.h>
58#include <kern/thread.h>
59#include <kern/sched_prim.h>
60
61static void na_netif_compat_finalize(struct nexus_netif_adapter *,
62 struct ifnet *);
63static errno_t nx_netif_compat_receive(struct ifnet *ifp, struct mbuf *m_head,
64 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
65 boolean_t poll, struct thread *tp);
66static int nx_netif_compat_catch_rx(struct nexus_netif_compat_adapter *na,
67 boolean_t enable);
68static int nx_netif_compat_xmit_frame(struct nexus_adapter *, struct mbuf *,
69 struct __kern_packet *);
70
71static int nx_netif_compat_na_notify_tx(struct __kern_channel_ring *,
72 struct proc *, uint32_t);
73static int nx_netif_compat_na_notify_rx(struct __kern_channel_ring *,
74 struct proc *, uint32_t);
75static int nx_netif_compat_na_activate(struct nexus_adapter *,
76 na_activate_mode_t);
77static int nx_netif_compat_na_txsync(struct __kern_channel_ring *,
78 struct proc *, uint32_t);
79static int nx_netif_compat_na_rxsync(struct __kern_channel_ring *,
80 struct proc *, uint32_t);
81static void nx_netif_compat_na_dtor(struct nexus_adapter *na);
82
83static void nx_netif_compat_tx_intr(struct ifnet *, enum txrx, uint32_t,
84 uint32_t *);
85static inline struct mbuf *nx_netif_compat_ring_alloc(int, int, uint16_t);
86static inline void nx_netif_compat_ring_free(struct mbuf *m);
87static void nx_netif_compat_ringcb(caddr_t cl, uint32_t size, caddr_t arg);
88
89static uint32_t nx_netif_compat_tx_clean(struct netif_stats *nifs,
90 struct __kern_channel_ring *kring);
91static void nx_netif_compat_set_tx_event(struct __kern_channel_ring *kring,
92 slot_idx_t khead);
93
94static struct nexus_netif_compat_adapter *na_netif_compat_alloc(zalloc_flags_t);
95static void na_netif_compat_free(struct nexus_adapter *);
96#if DEBUG || DEVELOPMENT
97static struct mbuf *nx_netif_rx_split(struct mbuf *, uint32_t);
98#endif /* DEBUG || DEVELOPMENT */
99
100#define MBUF_TXQ(m) ((m)->m_pkthdr.pkt_flowid)
101#define MBUF_RXQ(m) ((m)->m_pkthdr.pkt_flowid)
102
103#define NMB_PROPF_TX_NOTIFY 0x1 /* generate transmit event */
104#define NMB_FLAGS_MASK 0x0000ffff
105#define NMB_INDEX_MASK 0xffff0000
106#define NMB_GET_FLAGS(p) (((uint32_t)(p) & NMB_FLAGS_MASK))
107#define NMB_SET_FLAGS(p, f) (((uint32_t)(p) & ~NMB_FLAGS_MASK) | (f))
108#define NMB_GET_INDEX(p) (((uint32_t)(p) & NMB_INDEX_MASK) >> 16)
109#define NMB_SET_INDEX(p, i) (((uint32_t)(p) & ~NMB_INDEX_MASK) | (i << 16))
110
111static SKMEM_TYPE_DEFINE(na_netif_compat_zone, struct nexus_netif_compat_adapter);
112
113static int netif_tx_event_mode = 0;
114
115#if (DEVELOPMENT || DEBUG)
116SYSCTL_EXTENSIBLE_NODE(_kern_skywalk_netif, OID_AUTO, compat,
117 CTLFLAG_RW | CTLFLAG_LOCKED,
118 0, "Skywalk netif Nexus legacy compatibility support");
119SYSCTL_INT(_kern_skywalk_netif_compat, OID_AUTO, tx_event_mode,
120 CTLFLAG_RW | CTLFLAG_LOCKED, &netif_tx_event_mode, 0, "");
121static uint32_t netif_rx_split = 0;
122SYSCTL_UINT(_kern_skywalk_netif_compat, OID_AUTO, rx_split,
123 CTLFLAG_RW | CTLFLAG_LOCKED, &netif_rx_split, 0, "");
124#endif /* !DEVELOPMENT && !DEBUG */
125
126struct kern_nexus_domain_provider nx_netif_compat_prov_s = {
127 .nxdom_prov_name = NEXUS_PROVIDER_NET_IF_COMPAT,
128 .nxdom_prov_flags = NXDOMPROVF_DEFAULT,
129 .nxdom_prov_cb = {
130 .dp_cb_init = nx_netif_prov_init,
131 .dp_cb_fini = nx_netif_prov_fini,
132 .dp_cb_params = nx_netif_prov_params,
133 /*
134 * We must be using the native netif handlers below,
135 * since we act as the default domain provider; see
136 * kern_nexus_register_domain_provider().
137 */
138 .dp_cb_mem_new = nx_netif_prov_mem_new,
139 .dp_cb_config = nx_netif_prov_config,
140 .dp_cb_nx_ctor = nx_netif_prov_nx_ctor,
141 .dp_cb_nx_dtor = nx_netif_prov_nx_dtor,
142 .dp_cb_nx_mem_info = nx_netif_prov_nx_mem_info,
143 .dp_cb_nx_mib_get = nx_netif_prov_nx_mib_get,
144 .dp_cb_nx_stop = nx_netif_prov_nx_stop,
145 },
146};
147
148struct nexus_ifnet_ops na_netif_compat_ops = {
149 .ni_finalize = na_netif_compat_finalize,
150 .ni_reap = nx_netif_reap,
151 .ni_dequeue = nx_netif_compat_tx_dequeue,
152 .ni_get_len = nx_netif_compat_tx_get_len,
153};
154
155#define SKMEM_TAG_NETIF_COMPAT_MIT "com.apple.skywalk.netif.compat.mit"
156static SKMEM_TAG_DEFINE(skmem_tag_netif_compat_mit, SKMEM_TAG_NETIF_COMPAT_MIT);
157
158#define SKMEM_TAG_NETIF_COMPAT_POOL "com.apple.skywalk.netif.compat.pool"
159static SKMEM_TAG_DEFINE(skmem_tag_netif_compat_pool, SKMEM_TAG_NETIF_COMPAT_POOL);
160
161void
162nx_netif_compat_init(struct nxdom *nxdom)
163{
164 _CASSERT(NETIF_COMPAT_MAX_MBUF_DATA_COPY <= NETIF_COMPAT_BUF_SIZE);
165
166 /*
167 * We want nxprov_create() coming from userland to use the
168 * netif_compat domain provider, so install it as default.
169 * This is verified by the caller.
170 */
171 (void) nxdom_prov_add(nxdom, &nx_netif_compat_prov_s);
172}
173
174void
175nx_netif_compat_fini(void)
176{
177 (void) nxdom_prov_del(&nx_netif_compat_prov_s);
178}
179
180static struct nexus_netif_compat_adapter *
181na_netif_compat_alloc(zalloc_flags_t how)
182{
183 struct nexus_netif_compat_adapter *nca;
184
185 _CASSERT(offsetof(struct nexus_netif_compat_adapter, nca_up) == 0);
186
187 nca = zalloc_flags(na_netif_compat_zone, how | Z_ZERO);
188 if (nca) {
189 SK_DF(SK_VERB_MEM, "nca %p ALLOC", SK_KVA(nca));
190 }
191 return nca;
192}
193
194static void
195na_netif_compat_free(struct nexus_adapter *na)
196{
197 struct nexus_netif_compat_adapter *nca =
198 (struct nexus_netif_compat_adapter *)na;
199
200 SK_LOCK_ASSERT_HELD();
201 ASSERT(na->na_refcount == 0);
202
203 SK_DF(SK_VERB_MEM, "nca [dev+host] %p FREE", SK_KVA(nca));
204 bzero(s: nca, n: sizeof(*nca));
205 zfree(na_netif_compat_zone, nca);
206}
207
208/*
209 * Callback invoked when the device driver frees an mbuf used
210 * by skywalk to transmit a packet. This usually happens when
211 * the NIC notifies the driver that transmission is completed.
212 */
213static void
214nx_netif_compat_ringcb(caddr_t cl, uint32_t size, caddr_t arg)
215{
216#pragma unused(cl, size)
217 struct mbuf *m = (void *)arg;
218 struct ifnet *ifp = NULL;
219 struct netif_stats *nifs = NULL;
220 uintptr_t data; /* not used */
221 uint32_t txq;
222 errno_t err;
223
224 err = mbuf_get_tx_compl_data(m, arg: (uintptr_t *)&ifp, data: &data);
225 ASSERT(err == 0);
226
227 nifs = &NX_NETIF_PRIVATE(NA(ifp)->nifna_up.na_nx)->nif_stats;
228 txq = MBUF_TXQ(m);
229
230 for (;;) {
231 uint32_t p = 0, i, f;
232
233 (void) mbuf_cluster_get_prop(mbuf: m, prop: &p);
234 f = NMB_GET_FLAGS(p);
235 i = NMB_GET_INDEX(p);
236
237 SK_DF(SK_VERB_NETIF, "%s m 0x%llx txq %u i %u f 0x%x",
238 if_name(ifp), SK_KVA(m), MBUF_TXQ(m), i, f);
239
240 if (f & NMB_PROPF_TX_NOTIFY) {
241 uint32_t pn;
242
243 f &= ~NMB_PROPF_TX_NOTIFY;
244 pn = NMB_SET_FLAGS(p, f);
245
246 err = mbuf_cluster_set_prop(mbuf: m, oldprop: p, newprop: pn);
247 if (err != 0) {
248 if (err == EBUSY) { /* try again */
249 continue;
250 }
251 /* TODO: adi@apple.com -- what to do? */
252 SK_ERR("Failed to clear TX_NOTIFY "
253 "m 0x%llx i %u err %d", SK_KVA(m), i, err);
254 } else {
255 nx_netif_compat_tx_intr(ifp, NR_TX, txq, NULL);
256 SK_DF(SK_VERB_NETIF | SK_VERB_INTR | SK_VERB_TX,
257 "%s TX irq m 0x%llx txq %u i %u f 0x%x",
258 if_name(ifp), SK_KVA(m), MBUF_TXQ(m), i, f);
259 STATS_INC(nifs, NETIF_STATS_TX_IRQ);
260 }
261 }
262 break;
263 }
264}
265
266/* Hoisted out of line to reduce kernel stack footprint */
267SK_NO_INLINE_ATTRIBUTE
268static struct mbuf *
269nx_netif_compat_ring_alloc(int how, int len, uint16_t idx)
270{
271 struct mbuf *m = NULL;
272 size_t size = len;
273 uint32_t i;
274
275 if (mbuf_ring_cluster_alloc(how, type: MBUF_TYPE_HEADER, mbuf: &m,
276 extfree: nx_netif_compat_ringcb, size: &size) != 0) {
277 return NULL;
278 }
279
280 for (;;) {
281 uint32_t p = 0, pn;
282 int err;
283
284 (void) mbuf_cluster_get_prop(mbuf: m, prop: &p);
285 pn = NMB_SET_FLAGS(p, 0);
286 pn = NMB_SET_INDEX(pn, idx);
287
288 err = mbuf_cluster_set_prop(mbuf: m, oldprop: p, newprop: pn);
289 if (err != 0) {
290 if (err == EBUSY) { /* try again */
291 continue;
292 }
293 SK_ERR("Failed to initialize properties m 0x%llx "
294 "err %d", SK_KVA(m), err);
295 m_freem(m);
296 return NULL;
297 }
298 (void) mbuf_cluster_get_prop(mbuf: m, prop: &p);
299 i = NMB_GET_INDEX(p);
300 ASSERT(i == idx);
301 break;
302 }
303
304 SK_DF(SK_VERB_MEM, "alloc m 0x%llx size %u i %u",
305 SK_KVA(m), (uint32_t)size, i);
306
307 return m;
308}
309
310/* Hoisted out of line to reduce kernel stack footprint */
311SK_NO_INLINE_ATTRIBUTE
312static void
313nx_netif_compat_ring_free(struct mbuf *m)
314{
315 if (m == NULL) {
316 return;
317 }
318
319 for (;;) {
320 uint32_t p = 0;
321 int err;
322
323 (void) mbuf_cluster_get_prop(mbuf: m, prop: &p);
324 err = mbuf_cluster_set_prop(mbuf: m, oldprop: p, newprop: 0);
325 if (err != 0) {
326 if (err == EBUSY) { /* try again */
327 continue;
328 }
329 /* TODO: adi@apple.com -- what to do? */
330 SK_ERR("Failed to clear properties m 0x%llx err %d",
331 SK_KVA(m), err);
332 }
333 break;
334 }
335 m_freem(m);
336}
337
338static void
339nx_netif_compat_tx_intr(struct ifnet *ifp, enum txrx t, uint32_t q,
340 uint32_t *work_done)
341{
342 struct nexus_adapter *na = &NA(ifp)->nifna_up;
343
344 if (__improbable(!NA_IS_ACTIVE(na) || q >= na_get_nrings(na, t))) {
345 if (q >= na_get_nrings(na, t)) {
346 SK_ERR("na \"%s\" (0x%llx) invalid q %u >= %u",
347 na->na_name, SK_KVA(na), q, na_get_nrings(na, t));
348 }
349 } else {
350 (void) nx_netif_mit_tx_intr((NAKR(na, t) + q), kernproc,
351 0, work_done);
352 }
353}
354
355static int
356nx_netif_compat_na_notify_tx(struct __kern_channel_ring *kring,
357 struct proc *p, uint32_t flags)
358{
359 /*
360 * This should never get executed, as nothing should be invoking
361 * the TX ring notify callback. The compat adapter directly
362 * calls nx_netif_compat_tx_intr() for TX completion from within
363 * nx_netif_compat_ringcb().
364 *
365 * If we ever get here, use the original na_notify callback
366 * saved during na_activate().
367 */
368 return kring->ckr_netif_notify(kring, p, flags);
369}
370
371static int
372nx_netif_compat_na_notify_rx(struct __kern_channel_ring *kring,
373 struct proc *p, uint32_t flags)
374{
375 /*
376 * This should never get executed, as nothing should be invoking
377 * the RX ring notify callback. The compat adapter directly
378 * calls nx_netif_mit_rx_intr() for RX completion from within
379 * nx_netif_compat_receive().
380 *
381 * If we ever get here, use the original na_notify callback
382 * saved during na_activate().
383 */
384 return kring->ckr_netif_notify(kring, p, flags);
385}
386
387/* Enable/disable skywalk mode for a compat network interface. */
388static int
389nx_netif_compat_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
390{
391 struct nexus_netif_adapter *nifna = (struct nexus_netif_adapter *)na;
392 boolean_t tx_mit, rx_mit, tx_mit_simple, rx_mit_simple, rxpoll;
393 uint32_t limit = (uint32_t)sk_netif_compat_rx_mbq_limit;
394 struct nx_netif *nif = nifna->nifna_netif;
395 struct nexus_netif_compat_adapter *nca;
396 ifnet_t ifp = na->na_ifp;
397 uint32_t i, r;
398 int error;
399
400 ASSERT(na->na_type == NA_NETIF_COMPAT_DEV);
401 ASSERT(!(na->na_flags & NAF_HOST_ONLY));
402
403 SK_DF(SK_VERB_NETIF, "na \"%s\" (0x%llx) %s", na->na_name,
404 SK_KVA(na), na_activate_mode2str(mode));
405
406 nca = (struct nexus_netif_compat_adapter *)nifna;
407
408 switch (mode) {
409 case NA_ACTIVATE_MODE_ON:
410 ASSERT(SKYWALK_CAPABLE(na->na_ifp));
411
412 nx_netif_mit_config(nifna, &tx_mit, &tx_mit_simple,
413 &rx_mit, &rx_mit_simple);
414
415 /*
416 * Init the mitigation support on all the dev TX rings.
417 */
418 if (na_get_nrings(na, t: NR_TX) != 0 && tx_mit) {
419 nifna->nifna_tx_mit =
420 skn_alloc_type_array(tx_on, struct nx_netif_mit,
421 na_get_nrings(na, NR_TX), Z_WAITOK,
422 skmem_tag_netif_compat_mit);
423 if (nifna->nifna_tx_mit == NULL) {
424 SK_ERR("TX mitigation allocation failed");
425 error = ENOMEM;
426 goto out;
427 }
428 } else {
429 ASSERT(nifna->nifna_tx_mit == NULL);
430 }
431
432 /*
433 * Init either poller or mitigation support on all the
434 * dev RX rings; they're mutually exclusive and poller
435 * takes precedence.
436 */
437 rxpoll = (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL));
438 if (rxpoll) {
439 int err;
440 __unused kern_return_t kret;
441 thread_precedence_policy_data_t info;
442
443 ASSERT((ifp->if_xflags & IFXF_LEGACY) == 0);
444 ASSERT(ifp->if_input_poll != NULL);
445 ASSERT(ifp->if_input_ctl != NULL);
446 if ((err =
447 kernel_thread_start(continuation: netif_rxpoll_compat_thread_func,
448 parameter: ifp, new_thread: &ifp->if_poll_thread)) != KERN_SUCCESS) {
449 panic_plain("%s: ifp=%p couldn't get a poll "
450 " thread; err=%d", __func__, ifp, err);
451 /* NOTREACHED */
452 __builtin_unreachable();
453 }
454 VERIFY(ifp->if_poll_thread != NULL);
455
456 /* wait until thread is ready */
457 lck_mtx_lock(lck: &ifp->if_poll_lock);
458 while (!(ifp->if_poll_flags & IF_POLLF_READY)) {
459 (void) assert_wait(event: &ifp->if_poll_flags,
460 THREAD_UNINT);
461 lck_mtx_unlock(lck: &ifp->if_poll_lock);
462 (void) thread_block(THREAD_CONTINUE_NULL);
463 lck_mtx_lock(lck: &ifp->if_poll_lock);
464 }
465 lck_mtx_unlock(lck: &ifp->if_poll_lock);
466
467 bzero(s: &info, n: sizeof(info));
468 info.importance = 1;
469 kret = thread_policy_set(thread: ifp->if_poll_thread,
470 THREAD_PRECEDENCE_POLICY, policy_info: (thread_policy_t)&info,
471 THREAD_PRECEDENCE_POLICY_COUNT);
472 ASSERT(kret == KERN_SUCCESS);
473 limit = if_rcvq_maxlen;
474 (void) netif_rxpoll_set_params(ifp, NULL, FALSE);
475 ASSERT(nifna->nifna_rx_mit == NULL);
476 } else if (rx_mit) {
477 nifna->nifna_rx_mit =
478 skn_alloc_type_array(rx_on, struct nx_netif_mit,
479 na_get_nrings(na, NR_RX), Z_WAITOK,
480 skmem_tag_netif_compat_mit);
481 if (nifna->nifna_rx_mit == NULL) {
482 SK_ERR("RX mitigation allocation failed");
483 if (nifna->nifna_tx_mit != NULL) {
484 skn_free_type_array(rx_fail,
485 struct nx_netif_mit,
486 na_get_nrings(na, NR_TX),
487 nifna->nifna_tx_mit);
488 nifna->nifna_tx_mit = NULL;
489 }
490 error = ENOMEM;
491 goto out;
492 }
493 }
494
495 /* intercept na_notify callback on the TX rings */
496 for (r = 0; r < na_get_nrings(na, t: NR_TX); r++) {
497 na->na_tx_rings[r].ckr_netif_notify =
498 na->na_tx_rings[r].ckr_na_notify;
499 na->na_tx_rings[r].ckr_na_notify =
500 nx_netif_compat_na_notify_tx;
501 if (nifna->nifna_tx_mit != NULL) {
502 nx_netif_mit_init(nif, na->na_ifp,
503 &nifna->nifna_tx_mit[r],
504 &na->na_tx_rings[r], tx_mit_simple);
505 }
506 }
507
508 /* intercept na_notify callback on the RX rings */
509 for (r = 0; r < na_get_nrings(na, t: NR_RX); r++) {
510 na->na_rx_rings[r].ckr_netif_notify =
511 na->na_rx_rings[r].ckr_na_notify;
512 na->na_rx_rings[r].ckr_na_notify =
513 nx_netif_compat_na_notify_rx;
514 if (nifna->nifna_rx_mit != NULL) {
515 nx_netif_mit_init(nif, na->na_ifp,
516 &nifna->nifna_rx_mit[r],
517 &na->na_rx_rings[r], rx_mit_simple);
518 }
519 }
520 /*
521 * Initialize the rx queue, as nx_netif_compat_receive() can
522 * be called as soon as nx_netif_compat_catch_rx() returns.
523 */
524 for (r = 0; r < na_get_nrings(na, t: NR_RX); r++) {
525 struct __kern_channel_ring *kr = &na->na_rx_rings[r];
526
527 nx_mbq_safe_init(kr, q: &kr->ckr_rx_queue, lim: limit,
528 lck_grp: &nexus_mbq_lock_group, lck_attr: &nexus_lock_attr);
529 SK_DF(SK_VERB_NETIF,
530 "na \"%s\" (0x%llx) initialized kr \"%s\" "
531 "(0x%llx) krflags 0x%b", na->na_name, SK_KVA(na),
532 kr->ckr_name, SK_KVA(kr), kr->ckr_flags, CKRF_BITS);
533 }
534
535 /*
536 * Prepare packet buffers for the tx rings; don't preallocate
537 * the mbufs here, leave this to nx_netif_compat_na_txsync().
538 */
539 for (r = 0; r < na_get_nrings(na, t: NR_TX); r++) {
540 na->na_tx_rings[r].ckr_tx_pool = NULL;
541 }
542
543 for (r = 0; r < na_get_nrings(na, t: NR_TX); r++) {
544 na->na_tx_rings[r].ckr_tx_pool =
545 skn_alloc_type_array(tx_pool_on, struct mbuf *,
546 na_get_nslots(na, NR_TX), Z_WAITOK,
547 skmem_tag_netif_compat_pool);
548 if (na->na_tx_rings[r].ckr_tx_pool == NULL) {
549 SK_ERR("ckr_tx_pool allocation failed");
550 error = ENOMEM;
551 goto free_tx_pools;
552 }
553 }
554
555 /* Prepare to intercept incoming traffic. */
556 error = nx_netif_compat_catch_rx(na: nca, TRUE);
557 if (error != 0) {
558 SK_ERR("RX intercept failed (%d)", error);
559 goto uncatch;
560 }
561 nx_netif_filter_enable(nifna->nifna_netif);
562 nx_netif_flow_enable(nifna->nifna_netif);
563 os_atomic_or(&na->na_flags, NAF_ACTIVE, relaxed);
564 break;
565
566 case NA_ACTIVATE_MODE_DEFUNCT:
567 ASSERT(SKYWALK_CAPABLE(na->na_ifp));
568 break;
569
570 case NA_ACTIVATE_MODE_OFF:
571 /*
572 * Note that here we cannot assert SKYWALK_CAPABLE()
573 * as we're called in the destructor path.
574 */
575 os_atomic_andnot(&na->na_flags, NAF_ACTIVE, relaxed);
576 nx_netif_flow_disable(nifna->nifna_netif);
577 nx_netif_filter_disable(nifna->nifna_netif);
578
579 /*
580 * Signal the poller thread to terminate itself, and
581 * wait for it to exit.
582 */
583 if (ifp->if_poll_thread != THREAD_NULL) {
584 ASSERT(net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL));
585 ASSERT((ifp->if_xflags & IFXF_LEGACY) == 0);
586 lck_mtx_lock_spin(lck: &ifp->if_poll_lock);
587 ifp->if_poll_flags |= IF_POLLF_TERMINATING;
588 wakeup_one(chan: (caddr_t)&ifp->if_poll_thread);
589 lck_mtx_unlock(lck: &ifp->if_poll_lock);
590
591 /* wait for poller thread to terminate */
592 lck_mtx_lock(lck: &ifp->if_poll_lock);
593 while (ifp->if_poll_thread != THREAD_NULL) {
594 SK_DF(SK_VERB_NETIF_POLL,
595 "%s: waiting for poller thread to terminate",
596 if_name(ifp));
597 (void) msleep(chan: &ifp->if_poll_thread,
598 mtx: &ifp->if_poll_lock, pri: (PZERO - 1),
599 wmesg: "netif_poll_thread_exit", NULL);
600 }
601 lck_mtx_unlock(lck: &ifp->if_poll_lock);
602 SK_DF(SK_VERB_NETIF_POLL,
603 "%s: poller thread termination complete",
604 if_name(ifp));
605 }
606
607 /* Do not intercept packets on the rx path. */
608 (void) nx_netif_compat_catch_rx(na: nca, FALSE);
609
610 /* Free the mbufs going to the channel rings */
611 for (r = 0; r < na_get_nrings(na, t: NR_RX); r++) {
612 nx_mbq_safe_purge(q: &na->na_rx_rings[r].ckr_rx_queue);
613 nx_mbq_safe_destroy(q: &na->na_rx_rings[r].ckr_rx_queue);
614 }
615
616 /* reset all TX notify callbacks */
617 for (r = 0; r < na_get_nrings(na, t: NR_TX); r++) {
618 na->na_tx_rings[r].ckr_na_notify =
619 na->na_tx_rings[r].ckr_netif_notify;
620 na->na_tx_rings[r].ckr_netif_notify = NULL;
621 if (nifna->nifna_tx_mit != NULL) {
622 na->na_tx_rings[r].ckr_netif_mit_stats = NULL;
623 nx_netif_mit_cleanup(&nifna->nifna_tx_mit[r]);
624 }
625 }
626
627 if (nifna->nifna_tx_mit != NULL) {
628 skn_free_type_array(tx_off, struct nx_netif_mit,
629 na_get_nrings(na, NR_TX), nifna->nifna_tx_mit);
630 nifna->nifna_tx_mit = NULL;
631 }
632
633 /* reset all RX notify callbacks */
634 for (r = 0; r < na_get_nrings(na, t: NR_RX); r++) {
635 na->na_rx_rings[r].ckr_na_notify =
636 na->na_rx_rings[r].ckr_netif_notify;
637 na->na_rx_rings[r].ckr_netif_notify = NULL;
638 if (nifna->nifna_rx_mit != NULL) {
639 na->na_rx_rings[r].ckr_netif_mit_stats = NULL;
640 nx_netif_mit_cleanup(&nifna->nifna_rx_mit[r]);
641 }
642 }
643 if (nifna->nifna_rx_mit != NULL) {
644 skn_free_type_array(rx_off, struct nx_netif_mit,
645 na_get_nrings(na, NR_RX), nifna->nifna_rx_mit);
646 nifna->nifna_rx_mit = NULL;
647 }
648
649 for (r = 0; r < na_get_nrings(na, t: NR_TX); r++) {
650 for (i = 0; i < na_get_nslots(na, t: NR_TX); i++) {
651 nx_netif_compat_ring_free(m: na->
652 na_tx_rings[r].ckr_tx_pool[i]);
653 na->na_tx_rings[r].ckr_tx_pool[i] = NULL;
654 }
655 skn_free_type_array(tx_pool_off,
656 struct mbuf *, na_get_nslots(na, NR_TX),
657 na->na_tx_rings[r].ckr_tx_pool);
658 }
659 break;
660
661 default:
662 VERIFY(0);
663 /* NOTREACHED */
664 __builtin_unreachable();
665 }
666
667 return 0;
668
669uncatch:
670 (void) nx_netif_compat_catch_rx(na: nca, FALSE);
671
672free_tx_pools:
673 for (r = 0; r < na_get_nrings(na, t: NR_TX); r++) {
674 if (na->na_tx_rings[r].ckr_tx_pool == NULL) {
675 continue;
676 }
677 for (i = 0; i < na_get_nslots(na, t: NR_TX); i++) {
678 nx_netif_compat_ring_free(
679 m: na->na_tx_rings[r].ckr_tx_pool[i]);
680 na->na_tx_rings[r].ckr_tx_pool[i] = NULL;
681 }
682 skn_free_type_array(tx_pool, struct mbuf *,
683 na_get_nslots(na, NR_TX), na->na_tx_rings[r].ckr_tx_pool);
684 na->na_tx_rings[r].ckr_tx_pool = NULL;
685 }
686 if (nifna->nifna_tx_mit != NULL) {
687 for (r = 0; r < na_get_nrings(na, t: NR_TX); r++) {
688 nx_netif_mit_cleanup(&nifna->nifna_tx_mit[r]);
689 }
690 skn_free_type_array(tx, struct nx_netif_mit,
691 na_get_nrings(na, NR_TX), nifna->nifna_tx_mit);
692 nifna->nifna_tx_mit = NULL;
693 }
694 if (nifna->nifna_rx_mit != NULL) {
695 for (r = 0; r < na_get_nrings(na, t: NR_RX); r++) {
696 nx_netif_mit_cleanup(&nifna->nifna_rx_mit[r]);
697 }
698 skn_free_type_array(rx, struct nx_netif_mit,
699 na_get_nrings(na, NR_RX), nifna->nifna_rx_mit);
700 nifna->nifna_rx_mit = NULL;
701 }
702 for (r = 0; r < na_get_nrings(na, t: NR_RX); r++) {
703 nx_mbq_safe_destroy(q: &na->na_rx_rings[r].ckr_rx_queue);
704 }
705out:
706
707 return error;
708}
709
710/*
711 * Record completed transmissions and update ktail.
712 *
713 * The oldest tx buffer not yet completed is at ckr_ktail + 1,
714 * ckr_khead is the first unsent buffer.
715 */
716/* Hoisted out of line to reduce kernel stack footprint */
717SK_NO_INLINE_ATTRIBUTE
718static uint32_t
719nx_netif_compat_tx_clean(struct netif_stats *nifs,
720 struct __kern_channel_ring *kring)
721{
722 const slot_idx_t lim = kring->ckr_lim;
723 slot_idx_t nm_i = SLOT_NEXT(i: kring->ckr_ktail, lim);
724 slot_idx_t khead = kring->ckr_khead;
725 uint32_t n = 0;
726 struct mbuf **ckr_tx_pool = kring->ckr_tx_pool;
727
728 while (nm_i != khead) { /* buffers not completed */
729 struct mbuf *m = ckr_tx_pool[nm_i];
730
731 if (__improbable(m == NULL)) {
732 /* this is done, try to replenish the entry */
733 VERIFY(nm_i <= UINT16_MAX);
734 ckr_tx_pool[nm_i] = m =
735 nx_netif_compat_ring_alloc(M_WAITOK,
736 len: kring->ckr_max_pkt_len, idx: (uint16_t)nm_i);
737 if (__improbable(m == NULL)) {
738 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
739 STATS_INC(nifs, NETIF_STATS_DROP);
740 SK_DF(SK_VERB_MEM,
741 "mbuf allocation failed (slot %u)", nm_i);
742 /* XXX how do we proceed ? break ? */
743 return -ENOMEM;
744 }
745 } else if (mbuf_ring_cluster_is_active(mbuf: m)) {
746 break; /* This mbuf is still busy */
747 }
748 n++;
749 nm_i = SLOT_NEXT(i: nm_i, lim);
750 }
751 kring->ckr_ktail = SLOT_PREV(i: nm_i, lim);
752
753 SK_RDF(SK_VERB_NETIF, 10, "kr \"%s\" (0x%llx) tx completed [%u] -> "
754 "kh %u kt %u | rh %u rt %u", kring->ckr_name, SK_KVA(kring),
755 n, kring->ckr_khead, kring->ckr_ktail,
756 kring->ckr_rhead, kring->ckr_rtail);
757
758 return n;
759}
760
761/* Hoisted out of line to reduce kernel stack footprint */
762SK_NO_INLINE_ATTRIBUTE
763static void
764nx_netif_compat_set_tx_event(struct __kern_channel_ring *kring,
765 slot_idx_t khead)
766{
767 const slot_idx_t lim = kring->ckr_lim;
768 slot_idx_t ntc = SLOT_NEXT(i: kring->ckr_ktail, lim); /* next to clean */
769 struct mbuf *m;
770 slot_idx_t e;
771
772 if (ntc == khead) {
773 return; /* all buffers are free */
774 }
775 /*
776 * We have pending packet in the driver between ckr_ktail+1 and
777 * ckr_khead, and we have to choose one of these slots to generate
778 * a TX notification. There is a race, but this is only called
779 * within TX sync which does a double check.
780 */
781 if (__probable(netif_tx_event_mode == 0)) {
782 /*
783 * Choose the first pending slot, to be safe against drivers
784 * reordering mbuf transmissions.
785 */
786 e = ntc;
787 } else {
788 /*
789 * Choose a slot in the middle, so that we don't risk ending
790 * up in a situation where the client continuously wake up,
791 * fills one or a few TX slots and go to sleep again.
792 */
793 slot_idx_t n = lim + 1;
794
795 if (khead >= ntc) {
796 e = (khead + ntc) >> 1;
797 } else { /* wrap around */
798 e = (khead + n + ntc) >> 1;
799 if (e >= n) {
800 e -= n;
801 }
802 }
803
804 if (__improbable(e >= n)) {
805 SK_ERR("This cannot happen");
806 e = 0;
807 }
808 }
809 m = kring->ckr_tx_pool[e];
810
811 for (;;) {
812 uint32_t p = 0, pn, i, f;
813 int err;
814
815 (void) mbuf_cluster_get_prop(mbuf: m, prop: &p);
816 f = NMB_GET_FLAGS(p);
817 i = NMB_GET_INDEX(p);
818
819 if (f & NMB_PROPF_TX_NOTIFY) {
820 /*
821 * This can happen if there is already an event
822 * on the ring slot 'e': There is nothing to do.
823 */
824 SK_DF(SK_VERB_NETIF | SK_VERB_NOTIFY | SK_VERB_TX,
825 "TX_NOTIFY already set at %u m 0x%llx kc %u ntc %u",
826 e, SK_KVA(m), khead, ntc);
827 return;
828 }
829
830 f |= NMB_PROPF_TX_NOTIFY;
831 pn = NMB_SET_FLAGS(p, f);
832
833 err = mbuf_cluster_set_prop(mbuf: m, oldprop: p, newprop: pn);
834 if (err != 0) {
835 if (err == EBUSY) { /* try again */
836 continue;
837 }
838 /* TODO: adi@apple.com -- what to do? */
839 SK_ERR("Failed to set TX_NOTIFY at %u m 0x%llx kh %u "
840 "ntc %u, err %d", e, SK_KVA(m), khead, ntc, err);
841 } else {
842 SK_DF(SK_VERB_NETIF | SK_VERB_NOTIFY | SK_VERB_TX,
843 "Request TX_NOTIFY at %u m 0x%llx kh %u ntc %u",
844 e, SK_KVA(m), khead, ntc);
845 }
846 break;
847 }
848}
849
850#if SK_LOG
851/* Hoisted out of line to reduce kernel stack footprint */
852SK_LOG_ATTRIBUTE
853static void
854nx_netif_compat_na_txsync_log(struct __kern_channel_ring *kring,
855 struct proc *p, uint32_t flags, slot_idx_t nm_i)
856{
857 SK_DF(SK_VERB_NETIF | SK_VERB_SYNC | SK_VERB_TX,
858 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0x%x "
859 "nm_i %u, kh %u kt %u | rh %u rt %u",
860 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
861 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
862 flags, nm_i, kring->ckr_khead, kring->ckr_ktail,
863 kring->ckr_rhead, kring->ckr_rtail);
864}
865#endif /* SK_LOG */
866
867/*
868 * nx_netif_compat_na_txsync() transforms packets into mbufs and passes
869 * them to the device driver.
870 */
871static int
872nx_netif_compat_na_txsync(struct __kern_channel_ring *kring, struct proc *p,
873 uint32_t flags)
874{
875#pragma unused(p)
876 struct nexus_adapter *na = KRNA(kring);
877 struct netif_stats *nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
878 slot_idx_t nm_i; /* index into the channel ring */ // j
879 const slot_idx_t head = kring->ckr_rhead;
880 uint32_t slot_count = 0;
881 uint32_t byte_count = 0;
882
883 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
884
885 /* update our work timestamp */
886 na->na_work_ts = _net_uptime;
887
888 /*
889 * First part: process new packets to send.
890 */
891 nm_i = kring->ckr_khead;
892 if (nm_i != head) { /* we have new packets to send */
893 while (nm_i != head) {
894 struct __kern_slot_desc *sd = KR_KSD(kring, nm_i);
895
896 /* device-specific */
897 struct mbuf *m;
898 int tx_ret;
899 /*
900 * Take a mbuf from the tx pool (replenishing the pool
901 * entry if necessary) and copy in the user packet.
902 */
903 VERIFY(nm_i <= UINT16_MAX);
904 m = kring->ckr_tx_pool[nm_i];
905 if (__improbable(m == NULL)) {
906 kring->ckr_tx_pool[nm_i] = m =
907 nx_netif_compat_ring_alloc(M_WAITOK,
908 len: kring->ckr_max_pkt_len, idx: (uint16_t)nm_i);
909 if (__improbable(m == NULL)) {
910 STATS_INC(nifs, NETIF_STATS_DROP);
911 STATS_INC(nifs,
912 NETIF_STATS_DROP_NOMEM_MBUF);
913 SK_DF(SK_VERB_MEM,
914 "%s(%d) kr \"%s\" (0x%llx) "
915 "krflags 0x%b ckr_tx_pool[%u] "
916 "allocation failed",
917 sk_proc_name_address(p),
918 sk_proc_pid(p), kring->ckr_name,
919 SK_KVA(kring), kring->ckr_flags,
920 CKRF_BITS, nm_i);
921 /*
922 * Here we could schedule a timer
923 * which retries to replenish after
924 * a while, and notifies the client
925 * when it manages to replenish some
926 * slot. In any cae we break early
927 * to avoid crashes.
928 */
929 break;
930 }
931 STATS_INC(nifs, NETIF_STATS_TX_REPL);
932 }
933
934 byte_count += sd->sd_pkt->pkt_length;
935 slot_count++;
936
937 /*
938 * We should ask notifications when CS_REPORT is set,
939 * or roughly every half ring. To optimize this,
940 * we set a notification event when the client runs
941 * out of TX ring space, or when transmission fails.
942 * In the latter case we also break early.
943 */
944 tx_ret = nx_netif_compat_xmit_frame(na, m, sd->sd_pkt);
945 if (__improbable(tx_ret)) {
946 SK_RD(5, "start_xmit failed: err %d "
947 "[nm_i %u, h %u, kt %u]",
948 tx_ret, nm_i, head, kring->ckr_ktail);
949 /*
950 * No room for this mbuf in the device driver.
951 * Request a notification FOR A PREVIOUS MBUF,
952 * then call nx_netif_compat_tx_clean(kring) to
953 * do the double check and see if we can free
954 * more buffers. If there is space continue,
955 * else break; NOTE: the double check is
956 * necessary if the problem occurs in the
957 * txsync call after selrecord(). Also, we
958 * need some way to tell the caller that not
959 * all buffers were queued onto the device
960 * (this was not a problem with native skywalk
961 * driver where space is preallocated). The
962 * bridge has a similar problem and we solve
963 * it there by dropping the excess packets.
964 */
965 nx_netif_compat_set_tx_event(kring, khead: nm_i);
966 if (nx_netif_compat_tx_clean(nifs, kring)) {
967 /* space now available */
968 continue;
969 } else {
970 break;
971 }
972 }
973 nm_i = SLOT_NEXT(i: nm_i, lim: kring->ckr_lim);
974 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
975 }
976
977 /*
978 * Update khead to the next slot to transmit; Here nm_i
979 * is not necesarrily head, we could break early.
980 */
981 kring->ckr_khead = nm_i;
982
983 kr_update_stats(kring, slot_count, byte_count);
984 }
985
986 /*
987 * Second, reclaim completed buffers
988 */
989 if ((flags & NA_SYNCF_FORCE_RECLAIM) || kr_txempty(kring)) {
990 /*
991 * No more available slots? Set a notification event on a
992 * channel slot that will be cleaned in the future. No
993 * doublecheck is performed, since nx_netif_compat_na_txsync()
994 * will be called twice by ch_event().
995 */
996 nx_netif_compat_set_tx_event(kring, khead: nm_i);
997 }
998 kring->ckr_pending_intr = 0;
999
1000#if SK_LOG
1001 if (__improbable((sk_verbose & SK_VERB_NETIF) != 0)) {
1002 nx_netif_compat_na_txsync_log(kring, p, flags, nm_i);
1003 }
1004#endif /* SK_LOG */
1005
1006 (void) nx_netif_compat_tx_clean(nifs, kring);
1007
1008 return 0;
1009}
1010
1011#if SK_LOG
1012/* Hoisted out of line to reduce kernel stack footprint */
1013SK_LOG_ATTRIBUTE
1014static void
1015nx_netif_compat_receive_log1(const struct __kern_channel_ring *kring,
1016 struct nx_mbq *q)
1017{
1018 SK_RD(10, "kr \"%s\" (0x%llx) krflags 0x%b FULL "
1019 "(qlen %u qsize %llu), kc %u kt %u", kring->ckr_name,
1020 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, nx_mbq_len(q),
1021 nx_mbq_size(q), kring->ckr_khead, kring->ckr_ktail);
1022}
1023
1024/* Hoisted out of line to reduce kernel stack footprint */
1025SK_LOG_ATTRIBUTE
1026static void
1027nx_netif_compat_receive_log2(const struct __kern_channel_ring *kring,
1028 struct nx_mbq *q, const struct ifnet_stat_increment_param *s)
1029{
1030 SK_RDF(SK_VERB_RX, 10, "kr \"%s\" (0x%llx) krflags 0x%b OK, "
1031 "added %u packets %u bytes, now qlen %u qsize %llu",
1032 kring->ckr_name, SK_KVA(kring), kring->ckr_flags, CKRF_BITS,
1033 s->packets_in, s->bytes_in, nx_mbq_len(q), nx_mbq_size(q));
1034}
1035#endif /* SK_LOG */
1036
1037/*
1038 * This is the default RX path for the compat netif nexus. Packets
1039 * are enqueued and later extracted by nx_netif_compat_na_rxsync().
1040 */
1041/* TODO: adi@apple.com -- implement chaining */
1042static errno_t
1043nx_netif_compat_receive(struct ifnet *ifp, struct mbuf *m_head,
1044 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
1045 boolean_t poll, struct thread *tp)
1046{
1047#pragma unused(tp)
1048 boolean_t ifp_rxpoll = ((ifp->if_eflags & IFEF_RXPOLL) && net_rxpoll);
1049 struct nexus_adapter *na = &NA(ifp)->nifna_up;
1050 struct __kern_channel_ring *kring;
1051 struct netif_stats *nifs;
1052 uint32_t r, work_done;
1053 unsigned int qlimit;
1054 struct nx_mbq *q;
1055 errno_t err = 0;
1056
1057 /* update our work timestamp */
1058 na->na_work_ts = _net_uptime;
1059
1060 if (__improbable(m_head == NULL)) {
1061 ASSERT(m_tail == NULL);
1062 ASSERT(poll);
1063 ASSERT(s->bytes_in == 0);
1064 ASSERT(s->packets_in == 0);
1065 }
1066
1067 /* BEGIN CSTYLED */
1068 /*
1069 * TODO: adi@apple.com -- this needs to be revisited once we
1070 * have a clear definition of how multiple RX rings are mapped
1071 * to flows; this would involve the hardware/driver doing some
1072 * kind of classification and RSS-like demuxing.
1073 *
1074 * When we enable that, we'll need to consider sifting thru the
1075 * mbuf chain we get from the caller, and enqueue them across
1076 * per-ring temporary mbuf queue (along with marking the ring
1077 * indicating pending packets.) During second stage processing,
1078 * we'll issue nx_netif_mit_rx_intr() on each marked ring to
1079 * dispatch the packets upstream.
1080 *
1081 * r = MBUF_RXQ(m);
1082 *
1083 * if (r >= na->na_num_rx_rings)
1084 * r = r % na->na_num_rx_rings;
1085 *
1086 * kring = &na->na_rx_rings[r];
1087 * q = &kring->ckr_rx_queue;
1088 *
1089 * For now, target only the first RX ring (ring 0).
1090 */
1091 /* END CSTYLED */
1092 r = 0; /* receive ring number */
1093 kring = &na->na_rx_rings[r];
1094
1095 ASSERT(na->na_type == NA_NETIF_COMPAT_DEV);
1096 nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
1097
1098 if (__improbable((!NA_IS_ACTIVE(na)) || KR_DROP(kring))) {
1099 /* BEGIN CSTYLED */
1100 /*
1101 * If we deal with multiple rings, change above to:
1102 *
1103 * if (!NA_IS_ACTIVE(na) || r >= na_get_nrings(na, NR_RX)))
1104 *
1105 * then here do:
1106 *
1107 * if (r >= na_get_nrings(na, NR_RX)) {
1108 * SK_ERR("na \"%s\" (0x%llx) invalid r %u >= %u",
1109 * na->na_name, SK_KVA(na), r,
1110 * na_get_nrings(na, NR_RX));
1111 * }
1112 */
1113 /* END CSTYLED */
1114 m_freem_list(m_head);
1115 if (!NA_IS_ACTIVE(na)) {
1116 STATS_ADD(nifs, NETIF_STATS_DROP_NA_INACTIVE,
1117 s->packets_in);
1118 } else if (KR_DROP(kring)) {
1119 STATS_ADD(nifs, NETIF_STATS_DROP_KRDROP_MODE,
1120 s->packets_in);
1121 }
1122 STATS_ADD(nifs, NETIF_STATS_DROP, s->packets_in);
1123 err = ENXIO;
1124 goto done;
1125 }
1126 if (__improbable(m_head == NULL)) {
1127 goto send_packets;
1128 }
1129
1130 q = &kring->ckr_rx_queue;
1131 nx_mbq_lock_spin(q);
1132 qlimit = nx_mbq_limit(q);
1133 if (ifp_rxpoll) {
1134 /*
1135 * qlimit of the receive queue is much smaller when the
1136 * interface is in oppurtunistic polling mode. In this case
1137 * when the interface is operating in interrupt mode,
1138 * a sudden burst of input packets can cause the receive queue
1139 * to quickly buildup due to scheduling latency in waking up
1140 * the poller thread. To avoid drops here due to this latency
1141 * we provide a leeway on the qlimit.
1142 */
1143 qlimit <<= 5;
1144 }
1145 if (__improbable(nx_mbq_len(q) > qlimit)) {
1146#if SK_LOG
1147 if (__improbable(sk_verbose != 0)) {
1148 nx_netif_compat_receive_log1(kring, q);
1149 }
1150#endif /* SK_LOG */
1151 nx_mbq_unlock(q);
1152 m_freem_list(m_head);
1153 STATS_ADD(nifs, NETIF_STATS_DROP_RXQ_OVFL, s->packets_in);
1154 STATS_ADD(nifs, NETIF_STATS_DROP, s->packets_in);
1155 goto send_packets;
1156 }
1157 nx_mbq_enq_multi(q, m_head, m_tail, cnt: s->packets_in, size: s->bytes_in);
1158
1159#if SK_LOG
1160 if (__improbable((sk_verbose & SK_VERB_NETIF) != 0)) {
1161 nx_netif_compat_receive_log2(kring, q, s);
1162 }
1163#endif /* SK_LOG */
1164
1165 nx_mbq_unlock(q);
1166
1167 (void) ifnet_stat_increment_in(interface: ifp, packets_in: s->packets_in, bytes_in: s->bytes_in,
1168 errors_in: s->errors_in);
1169
1170 if (poll) {
1171 /* update incremental poll stats */
1172 PKTCNTR_ADD(&ifp->if_poll_tstats, s->packets_in, s->bytes_in);
1173 }
1174
1175send_packets:
1176 /*
1177 * if the interface supports oppurtunistic input polling, then the
1178 * input packet processing is performed in context of the poller thread.
1179 */
1180 if (!poll && ifp_rxpoll) {
1181 /* wakeup the poller thread */
1182 ifnet_poll(ifp);
1183 } else {
1184 /*
1185 * wakeup the mitigation thread if needed to perform input
1186 * packet processing.
1187 * if the interface supports oppurtunistic input polling, then
1188 * mitigation thread is not created and the input packet
1189 * processing happens in context of the poller thread.
1190 */
1191 err = nx_netif_mit_rx_intr((NAKR(na, t: NR_RX) + r), kernproc, 0,
1192 &work_done);
1193 }
1194done:
1195 return err;
1196}
1197
1198#if SK_LOG
1199/* Hoisted out of line to reduce kernel stack footprint */
1200SK_LOG_ATTRIBUTE
1201static void
1202nx_netif_compat_na_rxsync_log(const struct __kern_channel_ring *kring,
1203 struct proc *p, uint32_t flags, slot_idx_t nm_i)
1204{
1205 SK_DF(SK_VERB_NETIF | SK_VERB_SYNC | SK_VERB_RX,
1206 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b "
1207 "ring %u flags 0x%x nm_i %u kt %u", sk_proc_name_address(p),
1208 sk_proc_pid(p), kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1209 CKRF_BITS, kring->ckr_ring_id, flags, nm_i, kring->ckr_ktail);
1210}
1211#endif /* SK_LOG */
1212
1213#if DEBUG || DEVELOPMENT
1214/*
1215 * Split an mbuf chain at offset "split", such that the first mbuf
1216 * is a zero-length M_PKTHDR, followed by the rest of the mbufs.
1217 * Typically, the "split" value is equal to the size of the link
1218 * layer header, e.g. Ethernet header.
1219 */
1220static struct mbuf *
1221nx_netif_rx_split(struct mbuf *m0, uint32_t split)
1222{
1223 struct mbuf *m = m0;
1224
1225 if (split == 0) {
1226 split = MHLEN;
1227 M_PREPEND(m, split, M_DONTWAIT, 0);
1228 } else {
1229 m->m_data -= split;
1230 m->m_len += split;
1231 m_pktlen(m) += split;
1232
1233 ASSERT((uintptr_t)m->m_data >= (uintptr_t)mbuf_datastart(m));
1234 ASSERT((uintptr_t)m->m_data < ((uintptr_t)mbuf_datastart(m) +
1235 mbuf_maxlen(m)));
1236 }
1237 if (m != NULL) {
1238 struct mbuf *n = m_split(m, split, M_DONTWAIT);
1239 if (n == NULL) {
1240 m_freem(m);
1241 return NULL;
1242 }
1243 m0 = m;
1244 ASSERT((uint32_t)m->m_len == split);
1245 m->m_data += split;
1246 m->m_len -= split;
1247 while (m->m_next != NULL) {
1248 m = m->m_next;
1249 }
1250 m->m_next = n;
1251 m = m0;
1252 m_pktlen(m) = m_length2(m, NULL);
1253 }
1254
1255 return m;
1256}
1257#endif /* DEBUG || DEVELOPMENT */
1258
1259/*
1260 * nx_netif_compat_na_rxsync() extracts mbufs from the queue filled by
1261 * nx_netif_compat_receive() and puts their content in the channel
1262 * receive ring.
1263 *
1264 * Accesses to kring are serialized via kring->ckr_rx_queue lock, because
1265 * the rx handler is asynchronous,
1266 */
1267static int
1268nx_netif_compat_na_rxsync(struct __kern_channel_ring *kring, struct proc *p,
1269 uint32_t flags)
1270{
1271#pragma unused(p)
1272 struct nexus_adapter *na = KRNA(kring);
1273 struct nexus_netif_adapter *nifna = (struct nexus_netif_adapter *)na;
1274 struct nx_netif *nif = nifna->nifna_netif;
1275 slot_idx_t nm_i; /* index into the channel ring */
1276 struct netif_stats *nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
1277 uint32_t npkts = 0;
1278 uint32_t byte_count = 0;
1279 const slot_idx_t lim = kring->ckr_lim;
1280 const slot_idx_t head = kring->ckr_rhead;
1281 boolean_t force_update = ((flags & NA_SYNCF_FORCE_READ) ||
1282 kring->ckr_pending_intr != 0);
1283 struct mbuf *m;
1284 uint32_t n;
1285 uint32_t avail; /* in slots */
1286 int err, mlen;
1287 boolean_t attach_mbuf = FALSE;
1288 struct nx_mbq *q, tmpq;
1289 struct kern_pbufpool *pp = kring->ckr_pp;
1290 uint32_t ph_cnt, i = 0;
1291
1292 ASSERT(pp->pp_max_frags == 1);
1293 ASSERT(head <= lim);
1294
1295 /*
1296 * First part: skip past packets that userspace has released.
1297 * This can possibly make room for the second part.
1298 * equivalent to kr_reclaim()
1299 */
1300 if (kring->ckr_khead != head) {
1301 kring->ckr_khead = head;
1302 /* ensure global visibility */
1303 os_atomic_thread_fence(seq_cst);
1304 }
1305
1306 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1307
1308 /*
1309 * Second part: import newly received packets.
1310 */
1311 if (!force_update) {
1312 return 0;
1313 }
1314
1315 /* update our work timestamp */
1316 na->na_work_ts = _net_uptime;
1317
1318 /* first empty slot in the receive ring */
1319 nm_i = kring->ckr_ktail;
1320
1321 /*
1322 * Compute the available space (in bytes) in this ring.
1323 * The first slot that is not considered in is the one
1324 * before ckr_khead.
1325 */
1326 avail = kr_available_slots_rxring(rxkring: kring);
1327 if (__improbable(avail == 0)) {
1328 return 0;
1329 }
1330
1331 if (NA_KERNEL_ONLY(na)) {
1332 ASSERT(na->na_ifp != NULL &&
1333 fsw_ifp_to_fsw(na->na_ifp) != NULL);
1334 /*
1335 * We are not supporting attachment to bridge flowswitch
1336 * for now, until we support PKT_F_MBUF_DATA packets
1337 * in bridge flowswitch.
1338 */
1339 attach_mbuf = TRUE;
1340 }
1341
1342 /*
1343 * Quickly move all of ckr_rx_queue to a temporary queue to dequeue
1344 * from. For each mbuf, attach or copy it to the packet attached
1345 * to the slot. Release the lock while we're doing that, to allow
1346 * for the input thread to enqueue.
1347 */
1348 q = &kring->ckr_rx_queue;
1349 nx_mbq_init(q: &tmpq, NX_MBQ_NO_LIMIT);
1350 nx_mbq_lock_spin(q);
1351 nx_mbq_concat(&tmpq, q);
1352 nx_mbq_unlock(q);
1353
1354 if (__improbable(nx_mbq_len(&tmpq) == 0)) {
1355 return 0;
1356 }
1357
1358 ph_cnt = MIN(avail, nx_mbq_len(&tmpq));
1359 err = kern_pbufpool_alloc_batch_nosleep(pbufpool: pp, bufcnt: 1, array: kring->ckr_scratch,
1360 size: &ph_cnt);
1361 if (err == ENOMEM) {
1362 SK_DF(SK_VERB_MEM, "%s(%p) failed to alloc %d pkts for kr "
1363 "0x%llu", sk_proc_name_address(p), sk_proc_pid(p), ph_cnt,
1364 SK_KVA(kring));
1365 goto done;
1366 }
1367 ASSERT(ph_cnt != 0);
1368
1369 for (n = 0; (n < ph_cnt) &&
1370 ((m = nx_mbq_deq(q: &tmpq)) != NULL); n++) {
1371 struct __kern_slot_desc *ksd = KR_KSD(kring, nm_i);
1372 struct __kern_packet *pkt;
1373 kern_packet_t ph;
1374 uint8_t hlen;
1375 uint16_t tag;
1376 char *h;
1377
1378 ASSERT(m->m_flags & M_PKTHDR);
1379 mlen = m_pktlen(m);
1380 h = m->m_pkthdr.pkt_hdr;
1381 if (__improbable(mlen == 0 || h == NULL ||
1382 h < (char *)mbuf_datastart(m) || h > (char *)m->m_data)) {
1383 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1384 SK_RD(5, "kr \"%s\" (0x%llx) m 0x%llx len %d"
1385 "bad pkt_hdr", kring->ckr_name,
1386 SK_KVA(kring), SK_KVA(m), mlen);
1387 m_freem(m);
1388 m = NULL;
1389 continue;
1390 }
1391
1392 hlen = (uint8_t)(m->m_data - (uintptr_t)h);
1393 mlen += hlen;
1394
1395#if DEBUG || DEVELOPMENT
1396 if (__improbable(netif_rx_split != 0)) {
1397 /* callee frees mbuf upon failure */
1398 if ((m = nx_netif_rx_split(m, hlen)) == NULL) {
1399 continue;
1400 }
1401
1402 ASSERT((uintptr_t)m->m_data >=
1403 (uintptr_t)mbuf_datastart(m));
1404 ASSERT((uintptr_t)m->m_data <
1405 ((uintptr_t)mbuf_datastart(m) +
1406 mbuf_maxlen(m)));
1407 }
1408#endif /* DEBUG || DEVELOPMENT */
1409
1410 ph = kring->ckr_scratch[i];
1411 ASSERT(ph != 0);
1412 kring->ckr_scratch[i] = 0;
1413 pkt = SK_PTR_ADDR_KPKT(ph);
1414 ++i;
1415
1416 /*
1417 * Wind back the data pointer to include any frame headers
1418 * as part of the copy below. The header length is then
1419 * stored in the corresponding metadata area of the buffer.
1420 */
1421 m->m_data -= hlen;
1422 m->m_len += hlen;
1423 m->m_pkthdr.len += hlen;
1424 ASSERT(mlen == m->m_pkthdr.len);
1425
1426 pkt->pkt_link_flags = 0;
1427 if (m->m_flags & M_HASFCS) {
1428 pkt->pkt_link_flags |= PKT_LINKF_ETHFCS;
1429 }
1430 if (mbuf_get_vlan_tag(mbuf: m, vlan: &tag) == 0) {
1431 (void) kern_packet_set_vlan_tag(SK_PKT2PH(pkt), tag,
1432 FALSE);
1433 }
1434 SK_DF(SK_VERB_NETIF | SK_VERB_SYNC | SK_VERB_RX,
1435 "kr \"%s\" (0x%llx) m 0x%llx idx %u slot_len %d",
1436 kring->ckr_name, SK_KVA(kring), SK_KVA(m), nm_i, mlen);
1437
1438 if (__probable(attach_mbuf)) {
1439 STATS_INC(nifs, NETIF_STATS_RX_COPY_ATTACH);
1440 err = __packet_initialize_with_mbuf(pkt, mbuf: m, headroom: 0, l2len: hlen);
1441 VERIFY(err == 0);
1442 } else if (__probable(mlen <= (int)PP_BUF_SIZE_DEF(pp))) {
1443 STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
1444 /*
1445 * We're sending this up to a user channel opened
1446 * directly to the netif; copy everything.
1447 */
1448 err = __packet_set_headroom(ph, headroom: 0);
1449 VERIFY(err == 0);
1450 err = __packet_set_link_header_length(ph, len: hlen);
1451 VERIFY(err == 0);
1452 nif->nif_pkt_copy_from_mbuf(NR_RX, ph, 0, m, 0,
1453 mlen, FALSE, 0);
1454 /* finalize and attach the packet */
1455 err = __packet_finalize(ph);
1456 VERIFY(err == 0);
1457 m_freem(m);
1458 m = NULL;
1459 } else {
1460 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1461 STATS_INC(nifs, NETIF_STATS_DROP);
1462 m_freem(m);
1463 m = NULL;
1464 kern_pbufpool_free(pbufpool: pp, ph);
1465 ph = 0;
1466 pkt = NULL;
1467 continue;
1468 }
1469
1470 err = KR_SLOT_ATTACH_METADATA(kring, ksd,
1471 kqum: (struct __kern_quantum *)pkt);
1472 ASSERT(err == 0);
1473
1474 byte_count += mlen;
1475 ++npkts;
1476 ASSERT(npkts < kring->ckr_num_slots);
1477 nm_i = SLOT_NEXT(i: nm_i, lim);
1478 }
1479
1480 if (__improbable(i < ph_cnt)) {
1481 kern_pbufpool_free_batch(pbufpool: pp, array: &kring->ckr_scratch[i],
1482 size: (ph_cnt - i));
1483 }
1484
1485 ASSERT(npkts <= ph_cnt);
1486 kr_update_stats(kring, slot_count: npkts, byte_count);
1487
1488 if (npkts != 0) {
1489 kring->ckr_ktail = nm_i;
1490 STATS_ADD(nifs, NETIF_STATS_RX_PACKETS, npkts);
1491 }
1492 kring->ckr_pending_intr = 0;
1493
1494#if SK_LOG
1495 if (__improbable((sk_verbose & SK_VERB_NETIF) != 0)) {
1496 nx_netif_compat_na_rxsync_log(kring, p, flags, nm_i);
1497 }
1498#endif /* SK_LOG */
1499
1500done:
1501 /*
1502 * If we didn't process all packets in temporary queue,
1503 * move them back to the head of ckr_rx_queue.
1504 */
1505 if (!nx_mbq_empty(&tmpq)) {
1506 nx_mbq_lock_spin(q);
1507 nx_mbq_concat(&tmpq, q);
1508 ASSERT(nx_mbq_empty(q));
1509 nx_mbq_concat(q, &tmpq);
1510 nx_mbq_unlock(q);
1511 }
1512 ASSERT(nx_mbq_empty(&tmpq));
1513
1514 return 0;
1515}
1516
1517static void
1518nx_netif_compat_na_dtor(struct nexus_adapter *na)
1519{
1520 struct ifnet *ifp;
1521 struct nexus_netif_compat_adapter *nca =
1522 (struct nexus_netif_compat_adapter *)na;
1523
1524 SK_LOCK_ASSERT_HELD();
1525
1526 SK_DF(SK_VERB_NETIF, "na \"%s\" (0x%llx)", na->na_name, SK_KVA(na));
1527
1528 /*
1529 * If the finalizer callback hasn't been called for whatever
1530 * reasons, pick up the embryonic ifnet stored in na_private.
1531 * Otherwise, release the I/O refcnt of a non-NULL na_ifp.
1532 */
1533 if ((ifp = na->na_ifp) == NULL) {
1534 ifp = na->na_private;
1535 na->na_private = NULL;
1536 } else {
1537 ifnet_decr_iorefcnt(ifp);
1538 na->na_ifp = NULL;
1539 }
1540
1541 if (nca->nca_up.nifna_netif != NULL) {
1542 nx_netif_release(nca->nca_up.nifna_netif);
1543 nca->nca_up.nifna_netif = NULL;
1544 }
1545 ASSERT(!SKYWALK_NATIVE(ifp));
1546}
1547
1548/*
1549 * nx_netif_compat_attach() makes it possible to use skywalk on
1550 * a device without native skywalk support.
1551 * This is less performant than native support but potentially
1552 * faster than raw sockets or similar schemes.
1553 */
1554int
1555nx_netif_compat_attach(struct kern_nexus *nx, struct ifnet *ifp)
1556{
1557 struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
1558 struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
1559 struct nexus_netif_compat_adapter *devnca = NULL;
1560 struct nexus_netif_compat_adapter *hostnca = NULL;
1561 struct nexus_adapter *devna = NULL;
1562 struct nexus_adapter *hostna = NULL;
1563 boolean_t embryonic = FALSE;
1564 uint32_t tx_rings, tx_slots;
1565 int retval = 0;
1566
1567 SK_LOCK_ASSERT_HELD();
1568 ASSERT(!SKYWALK_NATIVE(ifp));
1569 ASSERT(!SKYWALK_CAPABLE(ifp));
1570 ASSERT(ifp->if_na == NULL);
1571 ASSERT(ifp->if_na_ops == NULL);
1572
1573 devnca = na_netif_compat_alloc(how: Z_WAITOK);
1574 hostnca = na_netif_compat_alloc(how: Z_WAITOK);
1575
1576 /*
1577 * We can be called for two different interface states:
1578 *
1579 * Fully attached: get an io ref count; upon success, this
1580 * holds a reference to the ifnet for the ifp pointer stored
1581 * in 'na_ifp' down below for both adapters.
1582 *
1583 * Embryonic: temporary hold the ifnet in na_private, which
1584 * upon a successful ifnet_attach(), will be moved over to
1585 * the 'na_ifp' with an io ref count held.
1586 *
1587 * The ifnet in 'na_ifp' will be released by na_release_locked().
1588 */
1589 if (!ifnet_is_attached(ifp, refio: 1)) {
1590 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
1591 ifp = NULL;
1592 retval = ENXIO;
1593 goto err;
1594 }
1595 embryonic = TRUE;
1596 }
1597
1598 /* initialize the (compat) device netif adapter */
1599 devnca->nca_up.nifna_netif = nif;
1600 nx_netif_retain(nif);
1601 devna = &devnca->nca_up.nifna_up;
1602 (void) strncpy(devna->na_name, ifp->if_xname, sizeof(devna->na_name) - 1);
1603 devna->na_name[sizeof(devna->na_name) - 1] = '\0';
1604 uuid_generate_random(out: devna->na_uuid);
1605 if (embryonic) {
1606 /*
1607 * We will move this over to na_ifp once
1608 * the interface is fully attached.
1609 */
1610 devna->na_private = ifp;
1611 ASSERT(devna->na_ifp == NULL);
1612 } else {
1613 ASSERT(devna->na_private == NULL);
1614 /* use I/O refcnt from ifnet_is_attached() */
1615 devna->na_ifp = ifp;
1616 }
1617
1618 devna->na_type = NA_NETIF_COMPAT_DEV;
1619 devna->na_free = na_netif_compat_free;
1620 devna->na_activate = nx_netif_compat_na_activate;
1621 devna->na_txsync = nx_netif_compat_na_txsync;
1622 devna->na_rxsync = nx_netif_compat_na_rxsync;
1623 devna->na_dtor = nx_netif_compat_na_dtor;
1624 devna->na_krings_create = nx_netif_dev_krings_create;
1625 devna->na_krings_delete = nx_netif_dev_krings_delete;
1626 devna->na_special = nx_netif_na_special;
1627
1628 *(nexus_stats_type_t *)(uintptr_t)&devna->na_stats_type =
1629 NEXUS_STATS_TYPE_INVALID;
1630
1631 if (skywalk_netif_direct_allowed(ifp->if_xname)) {
1632 tx_rings = nxp->nxp_tx_rings;
1633 tx_slots = nxp->nxp_tx_slots;
1634 } else {
1635 tx_rings = 0;
1636 tx_slots = 0;
1637 }
1638 na_set_nrings(na: devna, t: NR_TX, v: tx_rings);
1639 na_set_nrings(na: devna, t: NR_RX, v: nxp->nxp_rx_rings);
1640 na_set_nslots(na: devna, t: NR_TX, v: tx_slots);
1641 na_set_nslots(na: devna, t: NR_RX, v: nxp->nxp_rx_slots);
1642 /*
1643 * Verify upper bounds; the parameters must have already been
1644 * validated by nxdom_prov_params() by the time we get here.
1645 */
1646 ASSERT(na_get_nrings(devna, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
1647 ASSERT(na_get_nrings(devna, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
1648 ASSERT(na_get_nslots(devna, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
1649 ASSERT(na_get_nslots(devna, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
1650
1651 na_attach_common(devna, nx, &nx_netif_compat_prov_s);
1652
1653 if ((retval = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
1654 nx, devna)) != 0) {
1655 ASSERT(devna->na_arena == NULL);
1656 /* we've transferred the refcnt to na_ifp above */
1657 ifp = NULL;
1658 goto err;
1659 }
1660 ASSERT(devna->na_arena != NULL);
1661
1662 *(uint32_t *)(uintptr_t)&devna->na_flowadv_max = nxp->nxp_flowadv_max;
1663 ASSERT(devna->na_flowadv_max == 0 ||
1664 skmem_arena_nexus(devna->na_arena)->arn_flowadv_obj != NULL);
1665
1666 /* setup packet copy routines */
1667 if (skmem_arena_nexus(ar: devna->na_arena)->arn_rx_pp->pp_max_frags > 1) {
1668 nif->nif_pkt_copy_from_mbuf =
1669 pkt_copy_multi_buflet_from_mbuf;
1670 nif->nif_pkt_copy_to_mbuf =
1671 pkt_copy_multi_buflet_to_mbuf;
1672 } else {
1673 nif->nif_pkt_copy_from_mbuf = pkt_copy_from_mbuf;
1674 nif->nif_pkt_copy_to_mbuf = pkt_copy_to_mbuf;
1675 }
1676
1677 /* initialize the host netif adapter */
1678 hostnca->nca_up.nifna_netif = nif;
1679 nx_netif_retain(nif);
1680 hostna = &hostnca->nca_up.nifna_up;
1681 (void) snprintf(hostna->na_name, count: sizeof(hostna->na_name),
1682 "%s^", devna->na_name);
1683 uuid_generate_random(out: hostna->na_uuid);
1684 if (embryonic) {
1685 /*
1686 * We will move this over to na_ifp once
1687 * the interface is fully attached.
1688 */
1689 hostna->na_private = ifp;
1690 ASSERT(hostna->na_ifp == NULL);
1691 } else {
1692 ASSERT(hostna->na_private == NULL);
1693 hostna->na_ifp = devna->na_ifp;
1694 ifnet_incr_iorefcnt(hostna->na_ifp);
1695 }
1696 hostna->na_type = NA_NETIF_COMPAT_HOST;
1697 hostna->na_free = na_netif_compat_free;
1698 hostna->na_activate = nx_netif_host_na_activate;
1699 hostna->na_txsync = nx_netif_host_na_txsync;
1700 hostna->na_rxsync = nx_netif_host_na_rxsync;
1701 hostna->na_dtor = nx_netif_compat_na_dtor;
1702 hostna->na_krings_create = nx_netif_host_krings_create;
1703 hostna->na_krings_delete = nx_netif_host_krings_delete;
1704 hostna->na_special = nx_netif_host_na_special;
1705
1706 os_atomic_or(&hostna->na_flags, NAF_HOST_ONLY, relaxed);
1707 *(nexus_stats_type_t *)(uintptr_t)&hostna->na_stats_type =
1708 NEXUS_STATS_TYPE_INVALID;
1709
1710 na_set_nrings(na: hostna, t: NR_TX, v: 1);
1711 na_set_nrings(na: hostna, t: NR_RX, v: 0);
1712 na_set_nslots(na: hostna, t: NR_TX, v: nxp->nxp_tx_slots);
1713 na_set_nslots(na: hostna, t: NR_RX, v: 0);
1714
1715 na_attach_common(hostna, nx, &nx_netif_prov_s);
1716
1717 if ((retval = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
1718 nx, hostna)) != 0) {
1719 ASSERT(hostna->na_arena == NULL);
1720 /* we've transferred the refcnt to na_ifp above */
1721 ifp = NULL;
1722 goto err;
1723 }
1724 ASSERT(hostna->na_arena != NULL);
1725
1726 *(uint32_t *)(uintptr_t)&hostna->na_flowadv_max = nxp->nxp_flowadv_max;
1727 ASSERT(hostna->na_flowadv_max == 0 ||
1728 skmem_arena_nexus(hostna->na_arena)->arn_flowadv_obj != NULL);
1729
1730 /* these will be undone by destructor */
1731 ifp->if_na_ops = &na_netif_compat_ops;
1732 ifp->if_na = &devnca->nca_up;
1733 na_retain_locked(na: devna);
1734 na_retain_locked(na: hostna);
1735
1736 SKYWALK_SET_CAPABLE(ifp);
1737
1738 NETIF_WLOCK(nif);
1739 nif->nif_ifp = ifp;
1740 retval = nx_port_alloc(nx, NEXUS_PORT_NET_IF_DEV, NULL, &devna, kernproc);
1741 ASSERT(retval == 0);
1742 retval = nx_port_alloc(nx, NEXUS_PORT_NET_IF_HOST, NULL, &hostna, kernproc);
1743 ASSERT(retval == 0);
1744 NETIF_WUNLOCK(nif);
1745
1746#if SK_LOG
1747 uuid_string_t uuidstr;
1748 SK_DF(SK_VERB_NETIF, "na_name: \"%s\"", devna->na_name);
1749 SK_DF(SK_VERB_NETIF, " UUID: %s",
1750 sk_uuid_unparse(devna->na_uuid, uuidstr));
1751 SK_DF(SK_VERB_NETIF, " nx: 0x%llx (\"%s\":\"%s\")",
1752 SK_KVA(devna->na_nx), NX_DOM(devna->na_nx)->nxdom_name,
1753 NX_DOM_PROV(devna->na_nx)->nxdom_prov_name);
1754 SK_DF(SK_VERB_NETIF, " flags: 0x%b", devna->na_flags, NAF_BITS);
1755 SK_DF(SK_VERB_NETIF, " flowadv_max: %u", devna->na_flowadv_max);
1756 SK_DF(SK_VERB_NETIF, " rings: tx %u rx %u",
1757 na_get_nrings(devna, NR_TX), na_get_nrings(devna, NR_RX));
1758 SK_DF(SK_VERB_NETIF, " slots: tx %u rx %u",
1759 na_get_nslots(devna, NR_TX), na_get_nslots(devna, NR_RX));
1760#if CONFIG_NEXUS_USER_PIPE
1761 SK_DF(SK_VERB_NETIF, " next_pipe: %u", devna->na_next_pipe);
1762 SK_DF(SK_VERB_NETIF, " max_pipes: %u", devna->na_max_pipes);
1763#endif /* CONFIG_NEXUS_USER_PIPE */
1764 SK_DF(SK_VERB_NETIF, " ifp: 0x%llx %s [ioref %u]",
1765 SK_KVA(ifp), ifp->if_xname, ifp->if_refio);
1766 SK_DF(SK_VERB_NETIF, "hostna: \"%s\"", hostna->na_name);
1767 SK_DF(SK_VERB_NETIF, " UUID: %s",
1768 sk_uuid_unparse(hostna->na_uuid, uuidstr));
1769 SK_DF(SK_VERB_NETIF, " nx: 0x%llx (\"%s\":\"%s\")",
1770 SK_KVA(hostna->na_nx), NX_DOM(hostna->na_nx)->nxdom_name,
1771 NX_DOM_PROV(hostna->na_nx)->nxdom_prov_name);
1772 SK_DF(SK_VERB_NETIF, " flags: 0x%b",
1773 hostna->na_flags, NAF_BITS);
1774 SK_DF(SK_VERB_NETIF, " flowadv_max: %u", hostna->na_flowadv_max);
1775 SK_DF(SK_VERB_NETIF, " rings: tx %u rx %u",
1776 na_get_nrings(hostna, NR_TX), na_get_nrings(hostna, NR_RX));
1777 SK_DF(SK_VERB_NETIF, " slots: tx %u rx %u",
1778 na_get_nslots(hostna, NR_TX), na_get_nslots(hostna, NR_RX));
1779#if CONFIG_NEXUS_USER_PIPE
1780 SK_DF(SK_VERB_NETIF, " next_pipe: %u", hostna->na_next_pipe);
1781 SK_DF(SK_VERB_NETIF, " max_pipes: %u", hostna->na_max_pipes);
1782#endif /* CONFIG_NEXUS_USER_PIPE */
1783 SK_DF(SK_VERB_NETIF, " ifp: 0x%llx %s [ioref %u]", SK_KVA(ifp),
1784 ifp->if_xname, ifp->if_refio);
1785#endif /* SK_LOG */
1786
1787err:
1788 if (retval != 0) {
1789 ASSERT(ifp == NULL);
1790 if (devna != NULL) {
1791 if (devna->na_arena != NULL) {
1792 skmem_arena_release(devna->na_arena);
1793 devna->na_arena = NULL;
1794 }
1795 if (devna->na_ifp != NULL) {
1796 ifnet_decr_iorefcnt(devna->na_ifp);
1797 devna->na_ifp = NULL;
1798 }
1799 devna->na_private = NULL;
1800 }
1801 if (hostna != NULL) {
1802 if (hostna->na_arena != NULL) {
1803 skmem_arena_release(hostna->na_arena);
1804 hostna->na_arena = NULL;
1805 }
1806 if (hostna->na_ifp != NULL) {
1807 ifnet_decr_iorefcnt(hostna->na_ifp);
1808 hostna->na_ifp = NULL;
1809 }
1810 hostna->na_private = NULL;
1811 }
1812 if (devnca != NULL) {
1813 if (devnca->nca_up.nifna_netif != NULL) {
1814 nx_netif_release(devnca->nca_up.nifna_netif);
1815 devnca->nca_up.nifna_netif = NULL;
1816 }
1817 na_netif_compat_free(na: (struct nexus_adapter *)devnca);
1818 }
1819 if (hostnca != NULL) {
1820 if (hostnca->nca_up.nifna_netif != NULL) {
1821 nx_netif_release(hostnca->nca_up.nifna_netif);
1822 hostnca->nca_up.nifna_netif = NULL;
1823 }
1824 na_netif_compat_free(na: (struct nexus_adapter *)hostnca);
1825 }
1826 }
1827 return retval;
1828}
1829
1830static void
1831na_netif_compat_finalize(struct nexus_netif_adapter *nifna, struct ifnet *ifp)
1832{
1833 na_netif_finalize(nifna, ifp);
1834}
1835
1836/*
1837 * Intercept the rx routine in the standard device driver.
1838 * Second argument is non-zero to intercept, 0 to restore
1839 */
1840static int
1841nx_netif_compat_catch_rx(struct nexus_netif_compat_adapter *nca,
1842 boolean_t enable)
1843{
1844 struct ifnet *ifp = nca->nca_up.nifna_up.na_ifp;
1845 int err = 0;
1846
1847 ASSERT(!(nca->nca_up.nifna_up.na_flags & NAF_HOST_ONLY));
1848
1849 if (enable) {
1850 err = dlil_set_input_handler(ifp, fn: nx_netif_compat_receive);
1851 } else {
1852 dlil_reset_input_handler(ifp);
1853 }
1854 return err;
1855}
1856
1857/*
1858 * Transmit routine used by nx_netif_compat_na_txsync(). Returns 0 on success
1859 * and non-zero on error (which may be packet drops or other errors).
1860 * len identifies the channel buffer, m is the (preallocated) mbuf to use
1861 * for transmissions.
1862 *
1863 * We should add a reference to the mbuf so the m_freem() at the end
1864 * of the transmission does not consume resources.
1865 *
1866 * On FreeBSD, and on multiqueue cards, we can force the queue using
1867 * if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1868 * i = m->m_pkthdr.flowid % adapter->num_queues;
1869 * else
1870 * i = curcpu % adapter->num_queues;
1871 *
1872 */
1873static int
1874nx_netif_compat_xmit_frame(struct nexus_adapter *na, struct mbuf *m,
1875 struct __kern_packet *pkt)
1876{
1877 struct nexus_netif_adapter *nifna = (struct nexus_netif_adapter *)na;
1878 struct nx_netif *nif = nifna->nifna_netif;
1879 struct netif_stats *nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
1880 struct ifnet *ifp = na->na_ifp;
1881 kern_packet_t ph = SK_PTR_ENCODE(pkt, METADATA_TYPE(pkt),
1882 METADATA_SUBTYPE(pkt));
1883 uint32_t len;
1884 int ret = 0;
1885
1886 if ((ret = mbuf_ring_cluster_activate(mbuf: m)) != 0) {
1887 panic("Failed to activate mbuf ring cluster 0x%llx (%d)",
1888 SK_KVA(m), ret);
1889 /* NOTREACHED */
1890 __builtin_unreachable();
1891 }
1892
1893 len = pkt->pkt_length;
1894
1895 /*
1896 * The mbuf should be a cluster from our special pool,
1897 * so we do not need to do an m_copyback but just copy.
1898 */
1899 if (m->m_ext.ext_size < len) {
1900 SK_RD(5, "size %u < len %u", m->m_ext.ext_size, len);
1901 len = m->m_ext.ext_size;
1902 }
1903
1904 STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
1905 if (PACKET_HAS_PARTIAL_CHECKSUM(pkt)) {
1906 STATS_INC(nifs, NETIF_STATS_TX_COPY_SUM);
1907 }
1908
1909 nif->nif_pkt_copy_to_mbuf(NR_TX, ph, pkt->pkt_headroom, m, 0, len,
1910 PACKET_HAS_PARTIAL_CHECKSUM(pkt), pkt->pkt_csum_tx_start_off);
1911
1912 /* used for tx notification */
1913 ret = mbuf_set_tx_compl_data(m, arg: (uintptr_t)ifp, data: (uintptr_t)NULL);
1914 ASSERT(ret == 0);
1915
1916 ret = dlil_output_handler(ifp, m);
1917 return ret;
1918}
1919