nx_netif_compat.c source code [xnu/bsd/skywalk/nexus/netif/nx_netif_compat.c]

1	/*
2	* Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	/*
30	* Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
31	*
32	* Redistribution and use in source and binary forms, with or without
33	* modification, are permitted provided that the following conditions
34	* are met:
35	* 1. Redistributions of source code must retain the above copyright
36	* notice, this list of conditions and the following disclaimer.
37	* 2. Redistributions in binary form must reproduce the above copyright
38	* notice, this list of conditions and the following disclaimer in the
39	* documentation and/or other materials provided with the distribution.
40	*
41	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51	* SUCH DAMAGE.
52	*/
53
54	#include <skywalk/os_skywalk_private.h>
55	#include <skywalk/nexus/netif/nx_netif.h>
56	#include <skywalk/nexus/flowswitch/nx_flowswitch.h>
57	#include <mach/thread_act.h>
58	#include <kern/thread.h>
59	#include <kern/sched_prim.h>
60
61	static void na_netif_compat_finalize(struct nexus_netif_adapter *,
62	struct ifnet *);
63	static errno_t nx_netif_compat_receive(struct ifnet ifp, struct* mbuf *m_head,
64	struct mbuf m_tail, const* struct ifnet_stat_increment_param *s,
65	boolean_t poll, struct thread *tp);
66	static int nx_netif_compat_catch_rx(struct nexus_netif_compat_adapter *na,
67	boolean_t enable);
68	static int nx_netif_compat_xmit_frame(struct nexus_adapter , struct* mbuf *,
69	struct __kern_packet *);
70
71	static int nx_netif_compat_na_notify_tx(struct __kern_channel_ring *,
72	struct proc *, uint32_t);
73	static int nx_netif_compat_na_notify_rx(struct __kern_channel_ring *,
74	struct proc *, uint32_t);
75	static int nx_netif_compat_na_activate(struct nexus_adapter *,
76	na_activate_mode_t);
77	static int nx_netif_compat_na_txsync(struct __kern_channel_ring *,
78	struct proc *, uint32_t);
79	static int nx_netif_compat_na_rxsync(struct __kern_channel_ring *,
80	struct proc *, uint32_t);
81	static void nx_netif_compat_na_dtor(struct nexus_adapter *na);
82
83	static void nx_netif_compat_tx_intr(struct ifnet , enum* txrx, uint32_t,
84	uint32_t *);
85	static inline struct mbuf nx_netif_compat_ring_alloc(int, int*, uint16_t);
86	static inline void nx_netif_compat_ring_free(struct mbuf *m);
87	static void nx_netif_compat_ringcb(caddr_t cl, uint32_t size, caddr_t arg);
88
89	static uint32_t nx_netif_compat_tx_clean(struct netif_stats *nifs,
90	struct __kern_channel_ring *kring);
91	static void nx_netif_compat_set_tx_event(struct __kern_channel_ring *kring,
92	slot_idx_t khead);
93
94	static struct nexus_netif_compat_adapter *na_netif_compat_alloc(zalloc_flags_t);
95	static void na_netif_compat_free(struct nexus_adapter *);
96	#if DEBUG \|\| DEVELOPMENT
97	static struct mbuf nx_netif_rx_split(struct* mbuf *, uint32_t);
98	#endif /* DEBUG \|\| DEVELOPMENT */
99
100	#define MBUF_TXQ(m) ((m)->m_pkthdr.pkt_flowid)
101	#define MBUF_RXQ(m) ((m)->m_pkthdr.pkt_flowid)
102
103	#define NMB_PROPF_TX_NOTIFY 0x1 /* generate transmit event */
104	#define NMB_FLAGS_MASK 0x0000ffff
105	#define NMB_INDEX_MASK 0xffff0000
106	#define NMB_GET_FLAGS(p) (((uint32_t)(p) & NMB_FLAGS_MASK))
107	#define NMB_SET_FLAGS(p, f) (((uint32_t)(p) & ~NMB_FLAGS_MASK) \| (f))
108	#define NMB_GET_INDEX(p) (((uint32_t)(p) & NMB_INDEX_MASK) >> 16)
109	#define NMB_SET_INDEX(p, i) (((uint32_t)(p) & ~NMB_INDEX_MASK) \| (i << 16))
110
111	static SKMEM_TYPE_DEFINE(na_netif_compat_zone, struct nexus_netif_compat_adapter);
112
113	static int netif_tx_event_mode = `0`;
114
115	#if (DEVELOPMENT \|\| DEBUG)
116	SYSCTL_EXTENSIBLE_NODE(_kern_skywalk_netif, OID_AUTO, compat,
117	CTLFLAG_RW \| CTLFLAG_LOCKED,
118	`0`, "Skywalk netif Nexus legacy compatibility support");
119	SYSCTL_INT(_kern_skywalk_netif_compat, OID_AUTO, tx_event_mode,
120	CTLFLAG_RW \| CTLFLAG_LOCKED, &netif_tx_event_mode, `0`, "");
121	static uint32_t netif_rx_split = `0`;
122	SYSCTL_UINT(_kern_skywalk_netif_compat, OID_AUTO, rx_split,
123	CTLFLAG_RW \| CTLFLAG_LOCKED, &netif_rx_split, `0`, "");
124	#endif /* !DEVELOPMENT && !DEBUG */
125
126	struct kern_nexus_domain_provider nx_netif_compat_prov_s = {
127	.nxdom_prov_name = NEXUS_PROVIDER_NET_IF_COMPAT,
128	.nxdom_prov_flags = NXDOMPROVF_DEFAULT,
129	.nxdom_prov_cb = {
130	.dp_cb_init = nx_netif_prov_init,
131	.dp_cb_fini = nx_netif_prov_fini,
132	.dp_cb_params = nx_netif_prov_params,
133	/*
134	* We must be using the native netif handlers below,
135	* since we act as the default domain provider; see
136	* kern_nexus_register_domain_provider().
137	*/
138	.dp_cb_mem_new = nx_netif_prov_mem_new,
139	.dp_cb_config = nx_netif_prov_config,
140	.dp_cb_nx_ctor = nx_netif_prov_nx_ctor,
141	.dp_cb_nx_dtor = nx_netif_prov_nx_dtor,
142	.dp_cb_nx_mem_info = nx_netif_prov_nx_mem_info,
143	.dp_cb_nx_mib_get = nx_netif_prov_nx_mib_get,
144	.dp_cb_nx_stop = nx_netif_prov_nx_stop,
145	},
146	};
147
148	struct nexus_ifnet_ops na_netif_compat_ops = {
149	.ni_finalize = na_netif_compat_finalize,
150	.ni_reap = nx_netif_reap,
151	.ni_dequeue = nx_netif_compat_tx_dequeue,
152	.ni_get_len = nx_netif_compat_tx_get_len,
153	};
154
155	#define SKMEM_TAG_NETIF_COMPAT_MIT "com.apple.skywalk.netif.compat.mit"
156	static SKMEM_TAG_DEFINE(skmem_tag_netif_compat_mit, SKMEM_TAG_NETIF_COMPAT_MIT);
157
158	#define SKMEM_TAG_NETIF_COMPAT_POOL "com.apple.skywalk.netif.compat.pool"
159	static SKMEM_TAG_DEFINE(skmem_tag_netif_compat_pool, SKMEM_TAG_NETIF_COMPAT_POOL);
160
161	void
162	nx_netif_compat_init(struct nxdom *nxdom)
163	{
164	_CASSERT(NETIF_COMPAT_MAX_MBUF_DATA_COPY <= NETIF_COMPAT_BUF_SIZE);
165
166	/*
167	* We want nxprov_create() coming from userland to use the
168	* netif_compat domain provider, so install it as default.
169	* This is verified by the caller.
170	*/
171	(void) nxdom_prov_add(nxdom, &nx_netif_compat_prov_s);
172	}
173
174	void
175	nx_netif_compat_fini(void)
176	{
177	(void) nxdom_prov_del(&nx_netif_compat_prov_s);
178	}
179
180	static struct nexus_netif_compat_adapter *
181	na_netif_compat_alloc(zalloc_flags_t how)
182	{
183	struct nexus_netif_compat_adapter *nca;
184
185	_CASSERT(offsetof(struct nexus_netif_compat_adapter, nca_up) == `0`);
186
187	nca = zalloc_flags(na_netif_compat_zone, how \| Z_ZERO);
188	if (nca) {
189	SK_DF(SK_VERB_MEM, "nca %p ALLOC", SK_KVA(nca));
190	}
191	return nca;
192	}
193
194	static void
195	na_netif_compat_free(struct nexus_adapter *na)
196	{
197	struct nexus_netif_compat_adapter *nca =
198	(struct nexus_netif_compat_adapter *)na;
199
200	SK_LOCK_ASSERT_HELD();
201	ASSERT(na->na_refcount == `0`);
202
203	SK_DF(SK_VERB_MEM, "nca [dev+host] %p FREE", SK_KVA(nca));
204	bzero(s: nca, n: sizeof(*nca));
205	zfree(na_netif_compat_zone, nca);
206	}
207
208	/*
209	* Callback invoked when the device driver frees an mbuf used
210	* by skywalk to transmit a packet. This usually happens when
211	* the NIC notifies the driver that transmission is completed.
212	*/
213	static void
214	nx_netif_compat_ringcb(caddr_t cl, uint32_t size, caddr_t arg)
215	{
216	#pragma unused(cl, size)
217	struct mbuf m = (void* *)arg;
218	struct ifnet *ifp = NULL;
219	struct netif_stats *nifs = NULL;
220	uintptr_t data; / not used /
221	uint32_t txq;
222	errno_t err;
223
224	err = mbuf_get_tx_compl_data(m, arg: (uintptr_t *)&ifp, data: &data);
225	ASSERT(err == `0`);
226
227	nifs = &NX_NETIF_PRIVATE(NA(ifp)->nifna_up.na_nx)->nif_stats;
228	txq = MBUF_TXQ(m);
229
230	for (;;) {
231	uint32_t p = `0`, i, f;
232
233	(void) mbuf_cluster_get_prop(mbuf: m, prop: &p);
234	f = NMB_GET_FLAGS(p);
235	i = NMB_GET_INDEX(p);
236
237	SK_DF(SK_VERB_NETIF, "%s m 0x%llx txq %u i %u f 0x%x",
238	if_name(ifp), SK_KVA(m), MBUF_TXQ(m), i, f);
239
240	if (f & NMB_PROPF_TX_NOTIFY) {
241	uint32_t pn;
242
243	f &= ~NMB_PROPF_TX_NOTIFY;
244	pn = NMB_SET_FLAGS(p, f);
245
246	err = mbuf_cluster_set_prop(mbuf: m, oldprop: p, newprop: pn);
247	if (err != `0`) {
248	if (err == EBUSY) { / try again /
249	continue;
250	}
251	/ TODO: adi@apple.com -- what to do? /
252	SK_ERR("Failed to clear TX_NOTIFY "
253	"m 0x%llx i %u err %d", SK_KVA(m), i, err);
254	} else {
255	nx_netif_compat_tx_intr(ifp, NR_TX, txq, NULL);
256	SK_DF(SK_VERB_NETIF \| SK_VERB_INTR \| SK_VERB_TX,
257	"%s TX irq m 0x%llx txq %u i %u f 0x%x",
258	if_name(ifp), SK_KVA(m), MBUF_TXQ(m), i, f);
259	STATS_INC(nifs, NETIF_STATS_TX_IRQ);
260	}
261	}
262	break;
263	}
264	}
265
266	/ Hoisted out of line to reduce kernel stack footprint /
267	SK_NO_INLINE_ATTRIBUTE
268	static struct mbuf *
269	nx_netif_compat_ring_alloc(int how, int len, uint16_t idx)
270	{
271	struct mbuf *m = NULL;
272	size_t size = len;
273	uint32_t i;
274
275	if (mbuf_ring_cluster_alloc(how, type: MBUF_TYPE_HEADER, mbuf: &m,
276	extfree: nx_netif_compat_ringcb, size: &size) != `0`) {
277	return NULL;
278	}
279
280	for (;;) {
281	uint32_t p = `0`, pn;
282	int err;
283
284	(void) mbuf_cluster_get_prop(mbuf: m, prop: &p);
285	pn = NMB_SET_FLAGS(p, `0`);
286	pn = NMB_SET_INDEX(pn, idx);
287
288	err = mbuf_cluster_set_prop(mbuf: m, oldprop: p, newprop: pn);
289	if (err != `0`) {
290	if (err == EBUSY) { / try again /
291	continue;
292	}
293	SK_ERR("Failed to initialize properties m 0x%llx "
294	"err %d", SK_KVA(m), err);
295	m_freem(m);
296	return NULL;
297	}
298	(void) mbuf_cluster_get_prop(mbuf: m, prop: &p);
299	i = NMB_GET_INDEX(p);
300	ASSERT(i == idx);
301	break;
302	}
303
304	SK_DF(SK_VERB_MEM, "alloc m 0x%llx size %u i %u",
305	SK_KVA(m), (uint32_t)size, i);
306
307	return m;
308	}
309
310	/ Hoisted out of line to reduce kernel stack footprint /
311	SK_NO_INLINE_ATTRIBUTE
312	static void
313	nx_netif_compat_ring_free(struct mbuf *m)
314	{
315	if (m == NULL) {
316	return;
317	}
318
319	for (;;) {
320	uint32_t p = `0`;
321	int err;
322
323	(void) mbuf_cluster_get_prop(mbuf: m, prop: &p);
324	err = mbuf_cluster_set_prop(mbuf: m, oldprop: p, newprop: `0`);
325	if (err != `0`) {
326	if (err == EBUSY) { / try again /
327	continue;
328	}
329	/ TODO: adi@apple.com -- what to do? /
330	SK_ERR("Failed to clear properties m 0x%llx err %d",
331	SK_KVA(m), err);
332	}
333	break;
334	}
335	m_freem(m);
336	}
337
338	static void
339	nx_netif_compat_tx_intr(struct ifnet ifp, enum* txrx t, uint32_t q,
340	uint32_t *work_done)
341	{
342	struct nexus_adapter *na = &NA(ifp)->nifna_up;
343
344	if (__improbable(!NA_IS_ACTIVE(na) \|\| q >= na_get_nrings(na, t))) {
345	if (q >= na_get_nrings(na, t)) {
346	SK_ERR("na \"%s\" (0x%llx) invalid q %u >= %u",
347	na->na_name, SK_KVA(na), q, na_get_nrings(na, t));
348	}
349	} else {
350	(void) nx_netif_mit_tx_intr((NAKR(na, t) + q), kernproc,
351	`0`, work_done);
352	}
353	}
354
355	static int
356	nx_netif_compat_na_notify_tx(struct __kern_channel_ring *kring,
357	struct proc *p, uint32_t flags)
358	{
359	/*
360	* This should never get executed, as nothing should be invoking
361	* the TX ring notify callback. The compat adapter directly
362	* calls nx_netif_compat_tx_intr() for TX completion from within
363	* nx_netif_compat_ringcb().
364	*
365	* If we ever get here, use the original na_notify callback
366	* saved during na_activate().
367	*/
368	return kring->ckr_netif_notify(kring, p, flags);
369	}
370
371	static int
372	nx_netif_compat_na_notify_rx(struct __kern_channel_ring *kring,
373	struct proc *p, uint32_t flags)
374	{
375	/*
376	* This should never get executed, as nothing should be invoking
377	* the RX ring notify callback. The compat adapter directly
378	* calls nx_netif_mit_rx_intr() for RX completion from within
379	* nx_netif_compat_receive().
380	*
381	* If we ever get here, use the original na_notify callback
382	* saved during na_activate().
383	*/
384	return kring->ckr_netif_notify(kring, p, flags);
385	}
386
387	/ Enable/disable skywalk mode for a compat network interface. /
388	static int
389	nx_netif_compat_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
390	{
391	struct nexus_netif_adapter nifna = (struct* nexus_netif_adapter *)na;
392	boolean_t tx_mit, rx_mit, tx_mit_simple, rx_mit_simple, rxpoll;
393	uint32_t limit = (uint32_t)sk_netif_compat_rx_mbq_limit;
394	struct nx_netif *nif = nifna->nifna_netif;
395	struct nexus_netif_compat_adapter *nca;
396	ifnet_t ifp = na->na_ifp;
397	uint32_t i, r;
398	int error;
399
400	ASSERT(na->na_type == NA_NETIF_COMPAT_DEV);
401	ASSERT(!(na->na_flags & NAF_HOST_ONLY));
402
403	SK_DF(SK_VERB_NETIF, "na \"%s\" (0x%llx) %s", na->na_name,
404	SK_KVA(na), na_activate_mode2str(mode));
405
406	nca = (struct nexus_netif_compat_adapter *)nifna;
407
408	switch (mode) {
409	case NA_ACTIVATE_MODE_ON:
410	ASSERT(SKYWALK_CAPABLE(na->na_ifp));
411
412	nx_netif_mit_config(nifna, &tx_mit, &tx_mit_simple,
413	&rx_mit, &rx_mit_simple);
414
415	/*
416	* Init the mitigation support on all the dev TX rings.
417	*/
418	if (na_get_nrings(na, t: NR_TX) != `0` && tx_mit) {
419	nifna->nifna_tx_mit =
420	skn_alloc_type_array(tx_on, struct nx_netif_mit,
421	na_get_nrings(na, NR_TX), Z_WAITOK,
422	skmem_tag_netif_compat_mit);
423	if (nifna->nifna_tx_mit == NULL) {
424	SK_ERR("TX mitigation allocation failed");
425	error = ENOMEM;
426	goto out;
427	}
428	} else {
429	ASSERT(nifna->nifna_tx_mit == NULL);
430	}
431
432	/*
433	* Init either poller or mitigation support on all the
434	* dev RX rings; they're mutually exclusive and poller
435	* takes precedence.
436	*/
437	rxpoll = (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL));
438	if (rxpoll) {
439	int err;
440	__unused kern_return_t kret;
441	thread_precedence_policy_data_t info;
442
443	ASSERT((ifp->if_xflags & IFXF_LEGACY) == `0`);
444	ASSERT(ifp->if_input_poll != NULL);
445	ASSERT(ifp->if_input_ctl != NULL);
446	if ((err =
447	kernel_thread_start(continuation: netif_rxpoll_compat_thread_func,
448	parameter: ifp, new_thread: &ifp->if_poll_thread)) != KERN_SUCCESS) {
449	panic_plain("%s: ifp=%p couldn't get a poll "
450	" thread; err=%d", __func__, ifp, err);
451	/ NOTREACHED /
452	__builtin_unreachable();
453	}
454	VERIFY(ifp->if_poll_thread != NULL);
455
456	/ wait until thread is ready /
457	lck_mtx_lock(lck: &ifp->if_poll_lock);
458	while (!(ifp->if_poll_flags & IF_POLLF_READY)) {
459	(void) assert_wait(event: &ifp->if_poll_flags,
460	THREAD_UNINT);
461	lck_mtx_unlock(lck: &ifp->if_poll_lock);
462	(void) thread_block(THREAD_CONTINUE_NULL);
463	lck_mtx_lock(lck: &ifp->if_poll_lock);
464	}
465	lck_mtx_unlock(lck: &ifp->if_poll_lock);
466
467	bzero(s: &info, n: sizeof(info));
468	info.importance = `1`;
469	kret = thread_policy_set(thread: ifp->if_poll_thread,
470	THREAD_PRECEDENCE_POLICY, policy_info: (thread_policy_t)&info,
471	THREAD_PRECEDENCE_POLICY_COUNT);
472	ASSERT(kret == KERN_SUCCESS);
473	limit = if_rcvq_maxlen;
474	(void) netif_rxpoll_set_params(ifp, NULL, FALSE);
475	ASSERT(nifna->nifna_rx_mit == NULL);
476	} else if (rx_mit) {
477	nifna->nifna_rx_mit =
478	skn_alloc_type_array(rx_on, struct nx_netif_mit,
479	na_get_nrings(na, NR_RX), Z_WAITOK,
480	skmem_tag_netif_compat_mit);
481	if (nifna->nifna_rx_mit == NULL) {
482	SK_ERR("RX mitigation allocation failed");
483	if (nifna->nifna_tx_mit != NULL) {
484	skn_free_type_array(rx_fail,
485	struct nx_netif_mit,
486	na_get_nrings(na, NR_TX),
487	nifna->nifna_tx_mit);
488	nifna->nifna_tx_mit = NULL;
489	}
490	error = ENOMEM;
491	goto out;
492	}
493	}
494
495	/ intercept na_notify callback on the TX rings /
496	for (r = `0`; r < na_get_nrings(na, t: NR_TX); r++) {
497	na->na_tx_rings[r].ckr_netif_notify =
498	na->na_tx_rings[r].ckr_na_notify;
499	na->na_tx_rings[r].ckr_na_notify =
500	nx_netif_compat_na_notify_tx;
501	if (nifna->nifna_tx_mit != NULL) {
502	nx_netif_mit_init(nif, na->na_ifp,
503	&nifna->nifna_tx_mit[r],
504	&na->na_tx_rings[r], tx_mit_simple);
505	}
506	}
507
508	/ intercept na_notify callback on the RX rings /
509	for (r = `0`; r < na_get_nrings(na, t: NR_RX); r++) {
510	na->na_rx_rings[r].ckr_netif_notify =
511	na->na_rx_rings[r].ckr_na_notify;
512	na->na_rx_rings[r].ckr_na_notify =
513	nx_netif_compat_na_notify_rx;
514	if (nifna->nifna_rx_mit != NULL) {
515	nx_netif_mit_init(nif, na->na_ifp,
516	&nifna->nifna_rx_mit[r],
517	&na->na_rx_rings[r], rx_mit_simple);
518	}
519	}
520	/*
521	* Initialize the rx queue, as nx_netif_compat_receive() can
522	* be called as soon as nx_netif_compat_catch_rx() returns.
523	*/
524	for (r = `0`; r < na_get_nrings(na, t: NR_RX); r++) {
525	struct __kern_channel_ring *kr = &na->na_rx_rings[r];
526
527	nx_mbq_safe_init(kr, q: &kr->ckr_rx_queue, lim: limit,
528	lck_grp: &nexus_mbq_lock_group, lck_attr: &nexus_lock_attr);
529	SK_DF(SK_VERB_NETIF,
530	"na \"%s\" (0x%llx) initialized kr \"%s\" "
531	"(0x%llx) krflags 0x%b", na->na_name, SK_KVA(na),
532	kr->ckr_name, SK_KVA(kr), kr->ckr_flags, CKRF_BITS);
533	}
534
535	/*
536	* Prepare packet buffers for the tx rings; don't preallocate
537	* the mbufs here, leave this to nx_netif_compat_na_txsync().
538	*/
539	for (r = `0`; r < na_get_nrings(na, t: NR_TX); r++) {
540	na->na_tx_rings[r].ckr_tx_pool = NULL;
541	}
542
543	for (r = `0`; r < na_get_nrings(na, t: NR_TX); r++) {
544	na->na_tx_rings[r].ckr_tx_pool =
545	skn_alloc_type_array(tx_pool_on, struct mbuf *,
546	na_get_nslots(na, NR_TX), Z_WAITOK,
547	skmem_tag_netif_compat_pool);
548	if (na->na_tx_rings[r].ckr_tx_pool == NULL) {
549	SK_ERR("ckr_tx_pool allocation failed");
550	error = ENOMEM;
551	goto free_tx_pools;
552	}
553	}
554
555	/ Prepare to intercept incoming traffic. /
556	error = nx_netif_compat_catch_rx(na: nca, TRUE);
557	if (error != `0`) {
558	SK_ERR("RX intercept failed (%d)", error);
559	goto uncatch;
560	}
561	nx_netif_filter_enable(nifna->nifna_netif);
562	nx_netif_flow_enable(nifna->nifna_netif);
563	os_atomic_or(&na->na_flags, NAF_ACTIVE, relaxed);
564	break;
565
566	case NA_ACTIVATE_MODE_DEFUNCT:
567	ASSERT(SKYWALK_CAPABLE(na->na_ifp));
568	break;
569
570	case NA_ACTIVATE_MODE_OFF:
571	/*
572	* Note that here we cannot assert SKYWALK_CAPABLE()
573	* as we're called in the destructor path.
574	*/
575	os_atomic_andnot(&na->na_flags, NAF_ACTIVE, relaxed);
576	nx_netif_flow_disable(nifna->nifna_netif);
577	nx_netif_filter_disable(nifna->nifna_netif);
578
579	/*
580	* Signal the poller thread to terminate itself, and
581	* wait for it to exit.
582	*/
583	if (ifp->if_poll_thread != THREAD_NULL) {
584	ASSERT(net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL));
585	ASSERT((ifp->if_xflags & IFXF_LEGACY) == `0`);
586	lck_mtx_lock_spin(lck: &ifp->if_poll_lock);
587	ifp->if_poll_flags \|= IF_POLLF_TERMINATING;
588	wakeup_one(chan: (caddr_t)&ifp->if_poll_thread);
589	lck_mtx_unlock(lck: &ifp->if_poll_lock);
590
591	/ wait for poller thread to terminate /
592	lck_mtx_lock(lck: &ifp->if_poll_lock);
593	while (ifp->if_poll_thread != THREAD_NULL) {
594	SK_DF(SK_VERB_NETIF_POLL,
595	"%s: waiting for poller thread to terminate",
596	if_name(ifp));
597	(void) msleep(chan: &ifp->if_poll_thread,
598	mtx: &ifp->if_poll_lock, pri: (PZERO - `1`),
599	wmesg: "netif_poll_thread_exit", NULL);
600	}
601	lck_mtx_unlock(lck: &ifp->if_poll_lock);
602	SK_DF(SK_VERB_NETIF_POLL,
603	"%s: poller thread termination complete",
604	if_name(ifp));
605	}
606
607	/ Do not intercept packets on the rx path. /
608	(void) nx_netif_compat_catch_rx(na: nca, FALSE);
609
610	/ Free the mbufs going to the channel rings /
611	for (r = `0`; r < na_get_nrings(na, t: NR_RX); r++) {
612	nx_mbq_safe_purge(q: &na->na_rx_rings[r].ckr_rx_queue);
613	nx_mbq_safe_destroy(q: &na->na_rx_rings[r].ckr_rx_queue);
614	}
615
616	/ reset all TX notify callbacks /
617	for (r = `0`; r < na_get_nrings(na, t: NR_TX); r++) {
618	na->na_tx_rings[r].ckr_na_notify =
619	na->na_tx_rings[r].ckr_netif_notify;
620	na->na_tx_rings[r].ckr_netif_notify = NULL;
621	if (nifna->nifna_tx_mit != NULL) {
622	na->na_tx_rings[r].ckr_netif_mit_stats = NULL;
623	nx_netif_mit_cleanup(&nifna->nifna_tx_mit[r]);
624	}
625	}
626
627	if (nifna->nifna_tx_mit != NULL) {
628	skn_free_type_array(tx_off, struct nx_netif_mit,
629	na_get_nrings(na, NR_TX), nifna->nifna_tx_mit);
630	nifna->nifna_tx_mit = NULL;
631	}
632
633	/ reset all RX notify callbacks /
634	for (r = `0`; r < na_get_nrings(na, t: NR_RX); r++) {
635	na->na_rx_rings[r].ckr_na_notify =
636	na->na_rx_rings[r].ckr_netif_notify;
637	na->na_rx_rings[r].ckr_netif_notify = NULL;
638	if (nifna->nifna_rx_mit != NULL) {
639	na->na_rx_rings[r].ckr_netif_mit_stats = NULL;
640	nx_netif_mit_cleanup(&nifna->nifna_rx_mit[r]);
641	}
642	}
643	if (nifna->nifna_rx_mit != NULL) {
644	skn_free_type_array(rx_off, struct nx_netif_mit,
645	na_get_nrings(na, NR_RX), nifna->nifna_rx_mit);
646	nifna->nifna_rx_mit = NULL;
647	}
648
649	for (r = `0`; r < na_get_nrings(na, t: NR_TX); r++) {
650	for (i = `0`; i < na_get_nslots(na, t: NR_TX); i++) {
651	nx_netif_compat_ring_free(m: na->
652	na_tx_rings[r].ckr_tx_pool[i]);
653	na->na_tx_rings[r].ckr_tx_pool[i] = NULL;
654	}
655	skn_free_type_array(tx_pool_off,
656	struct mbuf *, na_get_nslots(na, NR_TX),
657	na->na_tx_rings[r].ckr_tx_pool);
658	}
659	break;
660
661	default:
662	VERIFY(`0`);
663	/ NOTREACHED /
664	__builtin_unreachable();
665	}
666
667	return `0`;
668
669	uncatch:
670	(void) nx_netif_compat_catch_rx(na: nca, FALSE);
671
672	free_tx_pools:
673	for (r = `0`; r < na_get_nrings(na, t: NR_TX); r++) {
674	if (na->na_tx_rings[r].ckr_tx_pool == NULL) {
675	continue;
676	}
677	for (i = `0`; i < na_get_nslots(na, t: NR_TX); i++) {
678	nx_netif_compat_ring_free(
679	m: na->na_tx_rings[r].ckr_tx_pool[i]);
680	na->na_tx_rings[r].ckr_tx_pool[i] = NULL;
681	}
682	skn_free_type_array(tx_pool, struct mbuf *,
683	na_get_nslots(na, NR_TX), na->na_tx_rings[r].ckr_tx_pool);
684	na->na_tx_rings[r].ckr_tx_pool = NULL;
685	}
686	if (nifna->nifna_tx_mit != NULL) {
687	for (r = `0`; r < na_get_nrings(na, t: NR_TX); r++) {
688	nx_netif_mit_cleanup(&nifna->nifna_tx_mit[r]);
689	}
690	skn_free_type_array(tx, struct nx_netif_mit,
691	na_get_nrings(na, NR_TX), nifna->nifna_tx_mit);
692	nifna->nifna_tx_mit = NULL;
693	}
694	if (nifna->nifna_rx_mit != NULL) {
695	for (r = `0`; r < na_get_nrings(na, t: NR_RX); r++) {
696	nx_netif_mit_cleanup(&nifna->nifna_rx_mit[r]);
697	}
698	skn_free_type_array(rx, struct nx_netif_mit,
699	na_get_nrings(na, NR_RX), nifna->nifna_rx_mit);
700	nifna->nifna_rx_mit = NULL;
701	}
702	for (r = `0`; r < na_get_nrings(na, t: NR_RX); r++) {
703	nx_mbq_safe_destroy(q: &na->na_rx_rings[r].ckr_rx_queue);
704	}
705	out:
706
707	return error;
708	}
709
710	/*
711	* Record completed transmissions and update ktail.
712	*
713	* The oldest tx buffer not yet completed is at ckr_ktail + 1,
714	* ckr_khead is the first unsent buffer.
715	*/
716	/ Hoisted out of line to reduce kernel stack footprint /
717	SK_NO_INLINE_ATTRIBUTE
718	static uint32_t
719	nx_netif_compat_tx_clean(struct netif_stats *nifs,
720	struct __kern_channel_ring *kring)
721	{
722	const slot_idx_t lim = kring->ckr_lim;
723	slot_idx_t nm_i = SLOT_NEXT(i: kring->ckr_ktail, lim);
724	slot_idx_t khead = kring->ckr_khead;
725	uint32_t n = `0`;
726	struct mbuf **ckr_tx_pool = kring->ckr_tx_pool;
727
728	while (nm_i != khead) { / buffers not completed /
729	struct mbuf *m = ckr_tx_pool[nm_i];
730
731	if (__improbable(m == NULL)) {
732	/ this is done, try to replenish the entry /
733	VERIFY(nm_i <= UINT16_MAX);
734	ckr_tx_pool[nm_i] = m =
735	nx_netif_compat_ring_alloc(M_WAITOK,
736	len: kring->ckr_max_pkt_len, idx: (uint16_t)nm_i);
737	if (__improbable(m == NULL)) {
738	STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
739	STATS_INC(nifs, NETIF_STATS_DROP);
740	SK_DF(SK_VERB_MEM,
741	"mbuf allocation failed (slot %u)", nm_i);
742	/ XXX how do we proceed ? break ? /
743	return -ENOMEM;
744	}
745	} else if (mbuf_ring_cluster_is_active(mbuf: m)) {
746	break; / This mbuf is still busy /
747	}
748	n++;
749	nm_i = SLOT_NEXT(i: nm_i, lim);
750	}
751	kring->ckr_ktail = SLOT_PREV(i: nm_i, lim);
752
753	SK_RDF(SK_VERB_NETIF, `10`, "kr \"%s\" (0x%llx) tx completed [%u] -> "
754	"kh %u kt %u \| rh %u rt %u", kring->ckr_name, SK_KVA(kring),
755	n, kring->ckr_khead, kring->ckr_ktail,
756	kring->ckr_rhead, kring->ckr_rtail);
757
758	return n;
759	}
760
761	/ Hoisted out of line to reduce kernel stack footprint /
762	SK_NO_INLINE_ATTRIBUTE
763	static void
764	nx_netif_compat_set_tx_event(struct __kern_channel_ring *kring,
765	slot_idx_t khead)
766	{
767	const slot_idx_t lim = kring->ckr_lim;
768	slot_idx_t ntc = SLOT_NEXT(i: kring->ckr_ktail, lim); / next to clean /
769	struct mbuf *m;
770	slot_idx_t e;
771
772	if (ntc == khead) {
773	return; / all buffers are free /
774	}
775	/*
776	* We have pending packet in the driver between ckr_ktail+1 and
777	* ckr_khead, and we have to choose one of these slots to generate
778	* a TX notification. There is a race, but this is only called
779	* within TX sync which does a double check.
780	*/
781	if (__probable(netif_tx_event_mode == `0`)) {
782	/*
783	* Choose the first pending slot, to be safe against drivers
784	* reordering mbuf transmissions.
785	*/
786	e = ntc;
787	} else {
788	/*
789	* Choose a slot in the middle, so that we don't risk ending
790	* up in a situation where the client continuously wake up,
791	* fills one or a few TX slots and go to sleep again.
792	*/
793	slot_idx_t n = lim + `1`;
794
795	if (khead >= ntc) {
796	e = (khead + ntc) >> `1`;
797	} else { / wrap around /
798	e = (khead + n + ntc) >> `1`;
799	if (e >= n) {
800	e -= n;
801	}
802	}
803
804	if (__improbable(e >= n)) {
805	SK_ERR("This cannot happen");
806	e = `0`;
807	}
808	}
809	m = kring->ckr_tx_pool[e];
810
811	for (;;) {
812	uint32_t p = `0`, pn, i, f;
813	int err;
814
815	(void) mbuf_cluster_get_prop(mbuf: m, prop: &p);
816	f = NMB_GET_FLAGS(p);
817	i = NMB_GET_INDEX(p);
818
819	if (f & NMB_PROPF_TX_NOTIFY) {
820	/*
821	* This can happen if there is already an event
822	* on the ring slot 'e': There is nothing to do.
823	*/
824	SK_DF(SK_VERB_NETIF \| SK_VERB_NOTIFY \| SK_VERB_TX,
825	"TX_NOTIFY already set at %u m 0x%llx kc %u ntc %u",
826	e, SK_KVA(m), khead, ntc);
827	return;
828	}
829
830	f \|= NMB_PROPF_TX_NOTIFY;
831	pn = NMB_SET_FLAGS(p, f);
832
833	err = mbuf_cluster_set_prop(mbuf: m, oldprop: p, newprop: pn);
834	if (err != `0`) {
835	if (err == EBUSY) { / try again /
836	continue;
837	}
838	/ TODO: adi@apple.com -- what to do? /
839	SK_ERR("Failed to set TX_NOTIFY at %u m 0x%llx kh %u "
840	"ntc %u, err %d", e, SK_KVA(m), khead, ntc, err);
841	} else {
842	SK_DF(SK_VERB_NETIF \| SK_VERB_NOTIFY \| SK_VERB_TX,
843	"Request TX_NOTIFY at %u m 0x%llx kh %u ntc %u",
844	e, SK_KVA(m), khead, ntc);
845	}
846	break;
847	}
848	}
849
850	#if SK_LOG
851	/ Hoisted out of line to reduce kernel stack footprint /
852	SK_LOG_ATTRIBUTE
853	static void
854	nx_netif_compat_na_txsync_log(struct __kern_channel_ring *kring,
855	struct proc *p, uint32_t flags, slot_idx_t nm_i)
856	{
857	SK_DF(SK_VERB_NETIF \| SK_VERB_SYNC \| SK_VERB_TX,
858	"%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0x%x "
859	"nm_i %u, kh %u kt %u \| rh %u rt %u",
860	sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
861	SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
862	flags, nm_i, kring->ckr_khead, kring->ckr_ktail,
863	kring->ckr_rhead, kring->ckr_rtail);
864	}
865	#endif /* SK_LOG */
866
867	/*
868	* nx_netif_compat_na_txsync() transforms packets into mbufs and passes
869	* them to the device driver.
870	*/
871	static int
872	nx_netif_compat_na_txsync(struct __kern_channel_ring kring, struct* proc *p,
873	uint32_t flags)
874	{
875	#pragma unused(p)
876	struct nexus_adapter *na = KRNA(kring);
877	struct netif_stats *nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
878	slot_idx_t nm_i; / index into the channel ring / // j
879	const slot_idx_t head = kring->ckr_rhead;
880	uint32_t slot_count = `0`;
881	uint32_t byte_count = `0`;
882
883	STATS_INC(nifs, NETIF_STATS_TX_SYNC);
884
885	/ update our work timestamp /
886	na->na_work_ts = _net_uptime;
887
888	/*
889	* First part: process new packets to send.
890	*/
891	nm_i = kring->ckr_khead;
892	if (nm_i != head) { / we have new packets to send /
893	while (nm_i != head) {
894	struct __kern_slot_desc *sd = KR_KSD(kring, nm_i);
895
896	/ device-specific /
897	struct mbuf *m;
898	int tx_ret;
899	/*
900	* Take a mbuf from the tx pool (replenishing the pool
901	* entry if necessary) and copy in the user packet.
902	*/
903	VERIFY(nm_i <= UINT16_MAX);
904	m = kring->ckr_tx_pool[nm_i];
905	if (__improbable(m == NULL)) {
906	kring->ckr_tx_pool[nm_i] = m =
907	nx_netif_compat_ring_alloc(M_WAITOK,
908	len: kring->ckr_max_pkt_len, idx: (uint16_t)nm_i);
909	if (__improbable(m == NULL)) {
910	STATS_INC(nifs, NETIF_STATS_DROP);
911	STATS_INC(nifs,
912	NETIF_STATS_DROP_NOMEM_MBUF);
913	SK_DF(SK_VERB_MEM,
914	"%s(%d) kr \"%s\" (0x%llx) "
915	"krflags 0x%b ckr_tx_pool[%u] "
916	"allocation failed",
917	sk_proc_name_address(p),
918	sk_proc_pid(p), kring->ckr_name,
919	SK_KVA(kring), kring->ckr_flags,
920	CKRF_BITS, nm_i);
921	/*
922	* Here we could schedule a timer
923	* which retries to replenish after
924	* a while, and notifies the client
925	* when it manages to replenish some
926	* slot. In any cae we break early
927	* to avoid crashes.
928	*/
929	break;
930	}
931	STATS_INC(nifs, NETIF_STATS_TX_REPL);
932	}
933
934	byte_count += sd->sd_pkt->pkt_length;
935	slot_count++;
936
937	/*
938	* We should ask notifications when CS_REPORT is set,
939	* or roughly every half ring. To optimize this,
940	* we set a notification event when the client runs
941	* out of TX ring space, or when transmission fails.
942	* In the latter case we also break early.
943	*/
944	tx_ret = nx_netif_compat_xmit_frame(na, m, sd->sd_pkt);
945	if (__improbable(tx_ret)) {
946	SK_RD(`5`, "start_xmit failed: err %d "
947	"[nm_i %u, h %u, kt %u]",
948	tx_ret, nm_i, head, kring->ckr_ktail);
949	/*
950	* No room for this mbuf in the device driver.
951	* Request a notification FOR A PREVIOUS MBUF,
952	* then call nx_netif_compat_tx_clean(kring) to
953	* do the double check and see if we can free
954	* more buffers. If there is space continue,
955	* else break; NOTE: the double check is
956	* necessary if the problem occurs in the
957	* txsync call after selrecord(). Also, we
958	* need some way to tell the caller that not
959	* all buffers were queued onto the device
960	* (this was not a problem with native skywalk
961	* driver where space is preallocated). The
962	* bridge has a similar problem and we solve
963	* it there by dropping the excess packets.
964	*/
965	nx_netif_compat_set_tx_event(kring, khead: nm_i);
966	if (nx_netif_compat_tx_clean(nifs, kring)) {
967	/ space now available /
968	continue;
969	} else {
970	break;
971	}
972	}
973	nm_i = SLOT_NEXT(i: nm_i, lim: kring->ckr_lim);
974	STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
975	}
976
977	/*
978	* Update khead to the next slot to transmit; Here nm_i
979	* is not necesarrily head, we could break early.
980	*/
981	kring->ckr_khead = nm_i;
982
983	kr_update_stats(kring, slot_count, byte_count);
984	}
985
986	/*
987	* Second, reclaim completed buffers
988	*/
989	if ((flags & NA_SYNCF_FORCE_RECLAIM) \|\| kr_txempty(kring)) {
990	/*
991	* No more available slots? Set a notification event on a
992	* channel slot that will be cleaned in the future. No
993	* doublecheck is performed, since nx_netif_compat_na_txsync()
994	* will be called twice by ch_event().
995	*/
996	nx_netif_compat_set_tx_event(kring, khead: nm_i);
997	}
998	kring->ckr_pending_intr = `0`;
999
1000	#if SK_LOG
1001	if (__improbable((sk_verbose & SK_VERB_NETIF) != `0`)) {
1002	nx_netif_compat_na_txsync_log(kring, p, flags, nm_i);
1003	}
1004	#endif /* SK_LOG */
1005
1006	(void) nx_netif_compat_tx_clean(nifs, kring);
1007
1008	return `0`;
1009	}
1010
1011	#if SK_LOG
1012	/ Hoisted out of line to reduce kernel stack footprint /
1013	SK_LOG_ATTRIBUTE
1014	static void
1015	nx_netif_compat_receive_log1(const struct __kern_channel_ring *kring,
1016	struct nx_mbq *q)
1017	{
1018	SK_RD(`10`, "kr \"%s\" (0x%llx) krflags 0x%b FULL "
1019	"(qlen %u qsize %llu), kc %u kt %u", kring->ckr_name,
1020	SK_KVA(kring), kring->ckr_flags, CKRF_BITS, nx_mbq_len(q),
1021	nx_mbq_size(q), kring->ckr_khead, kring->ckr_ktail);
1022	}
1023
1024	/ Hoisted out of line to reduce kernel stack footprint /
1025	SK_LOG_ATTRIBUTE
1026	static void
1027	nx_netif_compat_receive_log2(const struct __kern_channel_ring *kring,
1028	struct nx_mbq q, const* struct ifnet_stat_increment_param *s)
1029	{
1030	SK_RDF(SK_VERB_RX, `10`, "kr \"%s\" (0x%llx) krflags 0x%b OK, "
1031	"added %u packets %u bytes, now qlen %u qsize %llu",
1032	kring->ckr_name, SK_KVA(kring), kring->ckr_flags, CKRF_BITS,
1033	s->packets_in, s->bytes_in, nx_mbq_len(q), nx_mbq_size(q));
1034	}
1035	#endif /* SK_LOG */
1036
1037	/*
1038	* This is the default RX path for the compat netif nexus. Packets
1039	* are enqueued and later extracted by nx_netif_compat_na_rxsync().
1040	*/
1041	/ TODO: adi@apple.com -- implement chaining /
1042	static errno_t
1043	nx_netif_compat_receive(struct ifnet ifp, struct* mbuf *m_head,
1044	struct mbuf m_tail, const* struct ifnet_stat_increment_param *s,
1045	boolean_t poll, struct thread *tp)
1046	{
1047	#pragma unused(tp)
1048	boolean_t ifp_rxpoll = ((ifp->if_eflags & IFEF_RXPOLL) && net_rxpoll);
1049	struct nexus_adapter *na = &NA(ifp)->nifna_up;
1050	struct __kern_channel_ring *kring;
1051	struct netif_stats *nifs;
1052	uint32_t r, work_done;
1053	unsigned int qlimit;
1054	struct nx_mbq *q;
1055	errno_t err = `0`;
1056
1057	/ update our work timestamp /
1058	na->na_work_ts = _net_uptime;
1059
1060	if (__improbable(m_head == NULL)) {
1061	ASSERT(m_tail == NULL);
1062	ASSERT(poll);
1063	ASSERT(s->bytes_in == `0`);
1064	ASSERT(s->packets_in == `0`);
1065	}
1066
1067	/ BEGIN CSTYLED /
1068	/*
1069	* TODO: adi@apple.com -- this needs to be revisited once we
1070	* have a clear definition of how multiple RX rings are mapped
1071	* to flows; this would involve the hardware/driver doing some
1072	* kind of classification and RSS-like demuxing.
1073	*
1074	* When we enable that, we'll need to consider sifting thru the
1075	* mbuf chain we get from the caller, and enqueue them across
1076	* per-ring temporary mbuf queue (along with marking the ring
1077	* indicating pending packets.) During second stage processing,
1078	* we'll issue nx_netif_mit_rx_intr() on each marked ring to
1079	* dispatch the packets upstream.
1080	*
1081	* r = MBUF_RXQ(m);
1082	*
1083	* if (r >= na->na_num_rx_rings)
1084	* r = r % na->na_num_rx_rings;
1085	*
1086	* kring = &na->na_rx_rings[r];
1087	* q = &kring->ckr_rx_queue;
1088	*
1089	* For now, target only the first RX ring (ring 0).
1090	*/
1091	/ END CSTYLED /
1092	r = `0`; / receive ring number /
1093	kring = &na->na_rx_rings[r];
1094
1095	ASSERT(na->na_type == NA_NETIF_COMPAT_DEV);
1096	nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
1097
1098	if (__improbable((!NA_IS_ACTIVE(na)) \|\| KR_DROP(kring))) {
1099	/ BEGIN CSTYLED /
1100	/*
1101	* If we deal with multiple rings, change above to:
1102	*
1103	* if (!NA_IS_ACTIVE(na) \|\| r >= na_get_nrings(na, NR_RX)))
1104	*
1105	* then here do:
1106	*
1107	* if (r >= na_get_nrings(na, NR_RX)) {
1108	* SK_ERR("na \"%s\" (0x%llx) invalid r %u >= %u",
1109	* na->na_name, SK_KVA(na), r,
1110	* na_get_nrings(na, NR_RX));
1111	* }
1112	*/
1113	/ END CSTYLED /
1114	m_freem_list(m_head);
1115	if (!NA_IS_ACTIVE(na)) {
1116	STATS_ADD(nifs, NETIF_STATS_DROP_NA_INACTIVE,
1117	s->packets_in);
1118	} else if (KR_DROP(kring)) {
1119	STATS_ADD(nifs, NETIF_STATS_DROP_KRDROP_MODE,
1120	s->packets_in);
1121	}
1122	STATS_ADD(nifs, NETIF_STATS_DROP, s->packets_in);
1123	err = ENXIO;
1124	goto done;
1125	}
1126	if (__improbable(m_head == NULL)) {
1127	goto send_packets;
1128	}
1129
1130	q = &kring->ckr_rx_queue;
1131	nx_mbq_lock_spin(q);
1132	qlimit = nx_mbq_limit(q);
1133	if (ifp_rxpoll) {
1134	/*
1135	* qlimit of the receive queue is much smaller when the
1136	* interface is in oppurtunistic polling mode. In this case
1137	* when the interface is operating in interrupt mode,
1138	* a sudden burst of input packets can cause the receive queue
1139	* to quickly buildup due to scheduling latency in waking up
1140	* the poller thread. To avoid drops here due to this latency
1141	* we provide a leeway on the qlimit.
1142	*/
1143	qlimit <<= `5`;
1144	}
1145	if (__improbable(nx_mbq_len(q) > qlimit)) {
1146	#if SK_LOG
1147	if (__improbable(sk_verbose != `0`)) {
1148	nx_netif_compat_receive_log1(kring, q);
1149	}
1150	#endif /* SK_LOG */
1151	nx_mbq_unlock(q);
1152	m_freem_list(m_head);
1153	STATS_ADD(nifs, NETIF_STATS_DROP_RXQ_OVFL, s->packets_in);
1154	STATS_ADD(nifs, NETIF_STATS_DROP, s->packets_in);
1155	goto send_packets;
1156	}
1157	nx_mbq_enq_multi(q, m_head, m_tail, cnt: s->packets_in, size: s->bytes_in);
1158
1159	#if SK_LOG
1160	if (__improbable((sk_verbose & SK_VERB_NETIF) != `0`)) {
1161	nx_netif_compat_receive_log2(kring, q, s);
1162	}
1163	#endif /* SK_LOG */
1164
1165	nx_mbq_unlock(q);
1166
1167	(void) ifnet_stat_increment_in(interface: ifp, packets_in: s->packets_in, bytes_in: s->bytes_in,
1168	errors_in: s->errors_in);
1169
1170	if (poll) {
1171	/ update incremental poll stats /
1172	PKTCNTR_ADD(&ifp->if_poll_tstats, s->packets_in, s->bytes_in);
1173	}
1174
1175	send_packets:
1176	/*
1177	* if the interface supports oppurtunistic input polling, then the
1178	* input packet processing is performed in context of the poller thread.
1179	*/
1180	if (!poll && ifp_rxpoll) {
1181	/ wakeup the poller thread /
1182	ifnet_poll(ifp);
1183	} else {
1184	/*
1185	* wakeup the mitigation thread if needed to perform input
1186	* packet processing.
1187	* if the interface supports oppurtunistic input polling, then
1188	* mitigation thread is not created and the input packet
1189	* processing happens in context of the poller thread.
1190	*/
1191	err = nx_netif_mit_rx_intr((NAKR(na, t: NR_RX) + r), kernproc, `0`,
1192	&work_done);
1193	}
1194	done:
1195	return err;
1196	}
1197
1198	#if SK_LOG
1199	/ Hoisted out of line to reduce kernel stack footprint /
1200	SK_LOG_ATTRIBUTE
1201	static void
1202	nx_netif_compat_na_rxsync_log(const struct __kern_channel_ring *kring,
1203	struct proc *p, uint32_t flags, slot_idx_t nm_i)
1204	{
1205	SK_DF(SK_VERB_NETIF \| SK_VERB_SYNC \| SK_VERB_RX,
1206	"%s(%d) kr \"%s\" (0x%llx) krflags 0x%b "
1207	"ring %u flags 0x%x nm_i %u kt %u", sk_proc_name_address(p),
1208	sk_proc_pid(p), kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1209	CKRF_BITS, kring->ckr_ring_id, flags, nm_i, kring->ckr_ktail);
1210	}
1211	#endif /* SK_LOG */
1212
1213	#if DEBUG \|\| DEVELOPMENT
1214	/*
1215	* Split an mbuf chain at offset "split", such that the first mbuf
1216	* is a zero-length M_PKTHDR, followed by the rest of the mbufs.
1217	* Typically, the "split" value is equal to the size of the link
1218	* layer header, e.g. Ethernet header.
1219	*/
1220	static struct mbuf *
1221	nx_netif_rx_split(struct mbuf *m0, uint32_t split)
1222	{
1223	struct mbuf *m = m0;
1224
1225	if (split == `0`) {
1226	split = MHLEN;
1227	M_PREPEND(m, split, M_DONTWAIT, `0`);
1228	} else {
1229	m->m_data -= split;
1230	m->m_len += split;
1231	m_pktlen(m) += split;
1232
1233	ASSERT((uintptr_t)m->m_data >= (uintptr_t)mbuf_datastart(m));
1234	ASSERT((uintptr_t)m->m_data < ((uintptr_t)mbuf_datastart(m) +
1235	mbuf_maxlen(m)));
1236	}
1237	if (m != NULL) {
1238	struct mbuf *n = m_split(m, split, M_DONTWAIT);
1239	if (n == NULL) {
1240	m_freem(m);
1241	return NULL;
1242	}
1243	m0 = m;
1244	ASSERT((uint32_t)m->m_len == split);
1245	m->m_data += split;
1246	m->m_len -= split;
1247	while (m->m_next != NULL) {
1248	m = m->m_next;
1249	}
1250	m->m_next = n;
1251	m = m0;
1252	m_pktlen(m) = m_length2(m, NULL);
1253	}
1254
1255	return m;
1256	}
1257	#endif /* DEBUG \|\| DEVELOPMENT */
1258
1259	/*
1260	* nx_netif_compat_na_rxsync() extracts mbufs from the queue filled by
1261	* nx_netif_compat_receive() and puts their content in the channel
1262	* receive ring.
1263	*
1264	* Accesses to kring are serialized via kring->ckr_rx_queue lock, because
1265	* the rx handler is asynchronous,
1266	*/
1267	static int
1268	nx_netif_compat_na_rxsync(struct __kern_channel_ring kring, struct* proc *p,
1269	uint32_t flags)
1270	{
1271	#pragma unused(p)
1272	struct nexus_adapter *na = KRNA(kring);
1273	struct nexus_netif_adapter nifna = (struct* nexus_netif_adapter *)na;
1274	struct nx_netif *nif = nifna->nifna_netif;
1275	slot_idx_t nm_i; / index into the channel ring /
1276	struct netif_stats *nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
1277	uint32_t npkts = `0`;
1278	uint32_t byte_count = `0`;
1279	const slot_idx_t lim = kring->ckr_lim;
1280	const slot_idx_t head = kring->ckr_rhead;
1281	boolean_t force_update = ((flags & NA_SYNCF_FORCE_READ) \|\|
1282	kring->ckr_pending_intr != `0`);
1283	struct mbuf *m;
1284	uint32_t n;
1285	uint32_t avail; / in slots /
1286	int err, mlen;
1287	boolean_t attach_mbuf = FALSE;
1288	struct nx_mbq *q, tmpq;
1289	struct kern_pbufpool *pp = kring->ckr_pp;
1290	uint32_t ph_cnt, i = `0`;
1291
1292	ASSERT(pp->pp_max_frags == `1`);
1293	ASSERT(head <= lim);
1294
1295	/*
1296	* First part: skip past packets that userspace has released.
1297	* This can possibly make room for the second part.
1298	* equivalent to kr_reclaim()
1299	*/
1300	if (kring->ckr_khead != head) {
1301	kring->ckr_khead = head;
1302	/ ensure global visibility /
1303	os_atomic_thread_fence(seq_cst);
1304	}
1305
1306	STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1307
1308	/*
1309	* Second part: import newly received packets.
1310	*/
1311	if (!force_update) {
1312	return `0`;
1313	}
1314
1315	/ update our work timestamp /
1316	na->na_work_ts = _net_uptime;
1317
1318	/ first empty slot in the receive ring /
1319	nm_i = kring->ckr_ktail;
1320
1321	/*
1322	* Compute the available space (in bytes) in this ring.
1323	* The first slot that is not considered in is the one
1324	* before ckr_khead.
1325	*/
1326	avail = kr_available_slots_rxring(rxkring: kring);
1327	if (__improbable(avail == `0`)) {
1328	return `0`;
1329	}
1330
1331	if (NA_KERNEL_ONLY(na)) {
1332	ASSERT(na->na_ifp != NULL &&
1333	fsw_ifp_to_fsw(na->na_ifp) != NULL);
1334	/*
1335	* We are not supporting attachment to bridge flowswitch
1336	* for now, until we support PKT_F_MBUF_DATA packets
1337	* in bridge flowswitch.
1338	*/
1339	attach_mbuf = TRUE;
1340	}
1341
1342	/*
1343	* Quickly move all of ckr_rx_queue to a temporary queue to dequeue
1344	* from. For each mbuf, attach or copy it to the packet attached
1345	* to the slot. Release the lock while we're doing that, to allow
1346	* for the input thread to enqueue.
1347	*/
1348	q = &kring->ckr_rx_queue;
1349	nx_mbq_init(q: &tmpq, NX_MBQ_NO_LIMIT);
1350	nx_mbq_lock_spin(q);
1351	nx_mbq_concat(&tmpq, q);
1352	nx_mbq_unlock(q);
1353
1354	if (__improbable(nx_mbq_len(&tmpq) == `0`)) {
1355	return `0`;
1356	}
1357
1358	ph_cnt = MIN(avail, nx_mbq_len(&tmpq));
1359	err = kern_pbufpool_alloc_batch_nosleep(pbufpool: pp, bufcnt: `1`, array: kring->ckr_scratch,
1360	size: &ph_cnt);
1361	if (err == ENOMEM) {
1362	SK_DF(SK_VERB_MEM, "%s(%p) failed to alloc %d pkts for kr "
1363	"0x%llu", sk_proc_name_address(p), sk_proc_pid(p), ph_cnt,
1364	SK_KVA(kring));
1365	goto done;
1366	}
1367	ASSERT(ph_cnt != `0`);
1368
1369	for (n = `0`; (n < ph_cnt) &&
1370	((m = nx_mbq_deq(q: &tmpq)) != NULL); n++) {
1371	struct __kern_slot_desc *ksd = KR_KSD(kring, nm_i);
1372	struct __kern_packet *pkt;
1373	kern_packet_t ph;
1374	uint8_t hlen;
1375	uint16_t tag;
1376	char *h;
1377
1378	ASSERT(m->m_flags & M_PKTHDR);
1379	mlen = m_pktlen(m);
1380	h = m->m_pkthdr.pkt_hdr;
1381	if (__improbable(mlen == `0` \|\| h == NULL \|\|
1382	h < (char )mbuf_datastart(m) \|\| h > (char* *)m->m_data)) {
1383	STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1384	SK_RD(`5`, "kr \"%s\" (0x%llx) m 0x%llx len %d"
1385	"bad pkt_hdr", kring->ckr_name,
1386	SK_KVA(kring), SK_KVA(m), mlen);
1387	m_freem(m);
1388	m = NULL;
1389	continue;
1390	}
1391
1392	hlen = (uint8_t)(m->m_data - (uintptr_t)h);
1393	mlen += hlen;
1394
1395	#if DEBUG \|\| DEVELOPMENT
1396	if (__improbable(netif_rx_split != `0`)) {
1397	/ callee frees mbuf upon failure /
1398	if ((m = nx_netif_rx_split(m, hlen)) == NULL) {
1399	continue;
1400	}
1401
1402	ASSERT((uintptr_t)m->m_data >=
1403	(uintptr_t)mbuf_datastart(m));
1404	ASSERT((uintptr_t)m->m_data <
1405	((uintptr_t)mbuf_datastart(m) +
1406	mbuf_maxlen(m)));
1407	}
1408	#endif /* DEBUG \|\| DEVELOPMENT */
1409
1410	ph = kring->ckr_scratch[i];
1411	ASSERT(ph != `0`);
1412	kring->ckr_scratch[i] = `0`;
1413	pkt = SK_PTR_ADDR_KPKT(ph);
1414	++i;
1415
1416	/*
1417	* Wind back the data pointer to include any frame headers
1418	* as part of the copy below. The header length is then
1419	* stored in the corresponding metadata area of the buffer.
1420	*/
1421	m->m_data -= hlen;
1422	m->m_len += hlen;
1423	m->m_pkthdr.len += hlen;
1424	ASSERT(mlen == m->m_pkthdr.len);
1425
1426	pkt->pkt_link_flags = `0`;
1427	if (m->m_flags & M_HASFCS) {
1428	pkt->pkt_link_flags \|= PKT_LINKF_ETHFCS;
1429	}
1430	if (mbuf_get_vlan_tag(mbuf: m, vlan: &tag) == `0`) {
1431	(void) kern_packet_set_vlan_tag(SK_PKT2PH(pkt), tag,
1432	FALSE);
1433	}
1434	SK_DF(SK_VERB_NETIF \| SK_VERB_SYNC \| SK_VERB_RX,
1435	"kr \"%s\" (0x%llx) m 0x%llx idx %u slot_len %d",
1436	kring->ckr_name, SK_KVA(kring), SK_KVA(m), nm_i, mlen);
1437
1438	if (__probable(attach_mbuf)) {
1439	STATS_INC(nifs, NETIF_STATS_RX_COPY_ATTACH);
1440	err = __packet_initialize_with_mbuf(pkt, mbuf: m, headroom: `0`, l2len: hlen);
1441	VERIFY(err == `0`);
1442	} else if (__probable(mlen <= (int)PP_BUF_SIZE_DEF(pp))) {
1443	STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
1444	/*
1445	* We're sending this up to a user channel opened
1446	* directly to the netif; copy everything.
1447	*/
1448	err = __packet_set_headroom(ph, headroom: `0`);
1449	VERIFY(err == `0`);
1450	err = __packet_set_link_header_length(ph, len: hlen);
1451	VERIFY(err == `0`);
1452	nif->nif_pkt_copy_from_mbuf(NR_RX, ph, `0`, m, `0`,
1453	mlen, FALSE, `0`);
1454	/ finalize and attach the packet /
1455	err = __packet_finalize(ph);
1456	VERIFY(err == `0`);
1457	m_freem(m);
1458	m = NULL;
1459	} else {
1460	STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1461	STATS_INC(nifs, NETIF_STATS_DROP);
1462	m_freem(m);
1463	m = NULL;
1464	kern_pbufpool_free(pbufpool: pp, ph);
1465	ph = `0`;
1466	pkt = NULL;
1467	continue;
1468	}
1469
1470	err = KR_SLOT_ATTACH_METADATA(kring, ksd,
1471	kqum: (struct __kern_quantum *)pkt);
1472	ASSERT(err == `0`);
1473
1474	byte_count += mlen;
1475	++npkts;
1476	ASSERT(npkts < kring->ckr_num_slots);
1477	nm_i = SLOT_NEXT(i: nm_i, lim);
1478	}
1479
1480	if (__improbable(i < ph_cnt)) {
1481	kern_pbufpool_free_batch(pbufpool: pp, array: &kring->ckr_scratch[i],
1482	size: (ph_cnt - i));
1483	}
1484
1485	ASSERT(npkts <= ph_cnt);
1486	kr_update_stats(kring, slot_count: npkts, byte_count);
1487
1488	if (npkts != `0`) {
1489	kring->ckr_ktail = nm_i;
1490	STATS_ADD(nifs, NETIF_STATS_RX_PACKETS, npkts);
1491	}
1492	kring->ckr_pending_intr = `0`;
1493
1494	#if SK_LOG
1495	if (__improbable((sk_verbose & SK_VERB_NETIF) != `0`)) {
1496	nx_netif_compat_na_rxsync_log(kring, p, flags, nm_i);
1497	}
1498	#endif /* SK_LOG */
1499
1500	done:
1501	/*
1502	* If we didn't process all packets in temporary queue,
1503	* move them back to the head of ckr_rx_queue.
1504	*/
1505	if (!nx_mbq_empty(&tmpq)) {
1506	nx_mbq_lock_spin(q);
1507	nx_mbq_concat(&tmpq, q);
1508	ASSERT(nx_mbq_empty(q));
1509	nx_mbq_concat(q, &tmpq);
1510	nx_mbq_unlock(q);
1511	}
1512	ASSERT(nx_mbq_empty(&tmpq));
1513
1514	return `0`;
1515	}
1516
1517	static void
1518	nx_netif_compat_na_dtor(struct nexus_adapter *na)
1519	{
1520	struct ifnet *ifp;
1521	struct nexus_netif_compat_adapter *nca =
1522	(struct nexus_netif_compat_adapter *)na;
1523
1524	SK_LOCK_ASSERT_HELD();
1525
1526	SK_DF(SK_VERB_NETIF, "na \"%s\" (0x%llx)", na->na_name, SK_KVA(na));
1527
1528	/*
1529	* If the finalizer callback hasn't been called for whatever
1530	* reasons, pick up the embryonic ifnet stored in na_private.
1531	* Otherwise, release the I/O refcnt of a non-NULL na_ifp.
1532	*/
1533	if ((ifp = na->na_ifp) == NULL) {
1534	ifp = na->na_private;
1535	na->na_private = NULL;
1536	} else {
1537	ifnet_decr_iorefcnt(ifp);
1538	na->na_ifp = NULL;
1539	}
1540
1541	if (nca->nca_up.nifna_netif != NULL) {
1542	nx_netif_release(nca->nca_up.nifna_netif);
1543	nca->nca_up.nifna_netif = NULL;
1544	}
1545	ASSERT(!SKYWALK_NATIVE(ifp));
1546	}
1547
1548	/*
1549	* nx_netif_compat_attach() makes it possible to use skywalk on
1550	* a device without native skywalk support.
1551	* This is less performant than native support but potentially
1552	* faster than raw sockets or similar schemes.
1553	*/
1554	int
1555	nx_netif_compat_attach(struct kern_nexus nx, struct* ifnet *ifp)
1556	{
1557	struct nx_netif *nif = NX_NETIF_PRIVATE(nx);
1558	struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
1559	struct nexus_netif_compat_adapter *devnca = NULL;
1560	struct nexus_netif_compat_adapter *hostnca = NULL;
1561	struct nexus_adapter *devna = NULL;
1562	struct nexus_adapter *hostna = NULL;
1563	boolean_t embryonic = FALSE;
1564	uint32_t tx_rings, tx_slots;
1565	int retval = `0`;
1566
1567	SK_LOCK_ASSERT_HELD();
1568	ASSERT(!SKYWALK_NATIVE(ifp));
1569	ASSERT(!SKYWALK_CAPABLE(ifp));
1570	ASSERT(ifp->if_na == NULL);
1571	ASSERT(ifp->if_na_ops == NULL);
1572
1573	devnca = na_netif_compat_alloc(how: Z_WAITOK);
1574	hostnca = na_netif_compat_alloc(how: Z_WAITOK);
1575
1576	/*
1577	* We can be called for two different interface states:
1578	*
1579	* Fully attached: get an io ref count; upon success, this
1580	* holds a reference to the ifnet for the ifp pointer stored
1581	* in 'na_ifp' down below for both adapters.
1582	*
1583	* Embryonic: temporary hold the ifnet in na_private, which
1584	* upon a successful ifnet_attach(), will be moved over to
1585	* the 'na_ifp' with an io ref count held.
1586	*
1587	* The ifnet in 'na_ifp' will be released by na_release_locked().
1588	*/
1589	if (!ifnet_is_attached(ifp, refio: `1`)) {
1590	if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
1591	ifp = NULL;
1592	retval = ENXIO;
1593	goto err;
1594	}
1595	embryonic = TRUE;
1596	}
1597
1598	/ initialize the (compat) device netif adapter /
1599	devnca->nca_up.nifna_netif = nif;
1600	nx_netif_retain(nif);
1601	devna = &devnca->nca_up.nifna_up;
1602	(void) strncpy(devna->na_name, ifp->if_xname, sizeof(devna->na_name) - `1`);
1603	devna->na_name[sizeof(devna->na_name) - `1`] = `'\0'`;
1604	uuid_generate_random(out: devna->na_uuid);
1605	if (embryonic) {
1606	/*
1607	* We will move this over to na_ifp once
1608	* the interface is fully attached.
1609	*/
1610	devna->na_private = ifp;
1611	ASSERT(devna->na_ifp == NULL);
1612	} else {
1613	ASSERT(devna->na_private == NULL);
1614	/ use I/O refcnt from ifnet_is_attached() /
1615	devna->na_ifp = ifp;
1616	}
1617
1618	devna->na_type = NA_NETIF_COMPAT_DEV;
1619	devna->na_free = na_netif_compat_free;
1620	devna->na_activate = nx_netif_compat_na_activate;
1621	devna->na_txsync = nx_netif_compat_na_txsync;
1622	devna->na_rxsync = nx_netif_compat_na_rxsync;
1623	devna->na_dtor = nx_netif_compat_na_dtor;
1624	devna->na_krings_create = nx_netif_dev_krings_create;
1625	devna->na_krings_delete = nx_netif_dev_krings_delete;
1626	devna->na_special = nx_netif_na_special;
1627
1628	(nexus_stats_type_t )(uintptr_t)&devna->na_stats_type =
1629	NEXUS_STATS_TYPE_INVALID;
1630
1631	if (skywalk_netif_direct_allowed(ifp->if_xname)) {
1632	tx_rings = nxp->nxp_tx_rings;
1633	tx_slots = nxp->nxp_tx_slots;
1634	} else {
1635	tx_rings = `0`;
1636	tx_slots = `0`;
1637	}
1638	na_set_nrings(na: devna, t: NR_TX, v: tx_rings);
1639	na_set_nrings(na: devna, t: NR_RX, v: nxp->nxp_rx_rings);
1640	na_set_nslots(na: devna, t: NR_TX, v: tx_slots);
1641	na_set_nslots(na: devna, t: NR_RX, v: nxp->nxp_rx_slots);
1642	/*
1643	* Verify upper bounds; the parameters must have already been
1644	* validated by nxdom_prov_params() by the time we get here.
1645	*/
1646	ASSERT(na_get_nrings(devna, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
1647	ASSERT(na_get_nrings(devna, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
1648	ASSERT(na_get_nslots(devna, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
1649	ASSERT(na_get_nslots(devna, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
1650
1651	na_attach_common(devna, nx, &nx_netif_compat_prov_s);
1652
1653	if ((retval = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
1654	nx, devna)) != `0`) {
1655	ASSERT(devna->na_arena == NULL);
1656	/ we've transferred the refcnt to na_ifp above /
1657	ifp = NULL;
1658	goto err;
1659	}
1660	ASSERT(devna->na_arena != NULL);
1661
1662	(uint32_t )(uintptr_t)&devna->na_flowadv_max = nxp->nxp_flowadv_max;
1663	ASSERT(devna->na_flowadv_max == `0` \|\|
1664	skmem_arena_nexus(devna->na_arena)->arn_flowadv_obj != NULL);
1665
1666	/ setup packet copy routines /
1667	if (skmem_arena_nexus(ar: devna->na_arena)->arn_rx_pp->pp_max_frags > `1`) {
1668	nif->nif_pkt_copy_from_mbuf =
1669	pkt_copy_multi_buflet_from_mbuf;
1670	nif->nif_pkt_copy_to_mbuf =
1671	pkt_copy_multi_buflet_to_mbuf;
1672	} else {
1673	nif->nif_pkt_copy_from_mbuf = pkt_copy_from_mbuf;
1674	nif->nif_pkt_copy_to_mbuf = pkt_copy_to_mbuf;
1675	}
1676
1677	/ initialize the host netif adapter /
1678	hostnca->nca_up.nifna_netif = nif;
1679	nx_netif_retain(nif);
1680	hostna = &hostnca->nca_up.nifna_up;
1681	(void) snprintf(hostna->na_name, count: sizeof(hostna->na_name),
1682	"%s^", devna->na_name);
1683	uuid_generate_random(out: hostna->na_uuid);
1684	if (embryonic) {
1685	/*
1686	* We will move this over to na_ifp once
1687	* the interface is fully attached.
1688	*/
1689	hostna->na_private = ifp;
1690	ASSERT(hostna->na_ifp == NULL);
1691	} else {
1692	ASSERT(hostna->na_private == NULL);
1693	hostna->na_ifp = devna->na_ifp;
1694	ifnet_incr_iorefcnt(hostna->na_ifp);
1695	}
1696	hostna->na_type = NA_NETIF_COMPAT_HOST;
1697	hostna->na_free = na_netif_compat_free;
1698	hostna->na_activate = nx_netif_host_na_activate;
1699	hostna->na_txsync = nx_netif_host_na_txsync;
1700	hostna->na_rxsync = nx_netif_host_na_rxsync;
1701	hostna->na_dtor = nx_netif_compat_na_dtor;
1702	hostna->na_krings_create = nx_netif_host_krings_create;
1703	hostna->na_krings_delete = nx_netif_host_krings_delete;
1704	hostna->na_special = nx_netif_host_na_special;
1705
1706	os_atomic_or(&hostna->na_flags, NAF_HOST_ONLY, relaxed);
1707	(nexus_stats_type_t )(uintptr_t)&hostna->na_stats_type =
1708	NEXUS_STATS_TYPE_INVALID;
1709
1710	na_set_nrings(na: hostna, t: NR_TX, v: `1`);
1711	na_set_nrings(na: hostna, t: NR_RX, v: `0`);
1712	na_set_nslots(na: hostna, t: NR_TX, v: nxp->nxp_tx_slots);
1713	na_set_nslots(na: hostna, t: NR_RX, v: `0`);
1714
1715	na_attach_common(hostna, nx, &nx_netif_prov_s);
1716
1717	if ((retval = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
1718	nx, hostna)) != `0`) {
1719	ASSERT(hostna->na_arena == NULL);
1720	/ we've transferred the refcnt to na_ifp above /
1721	ifp = NULL;
1722	goto err;
1723	}
1724	ASSERT(hostna->na_arena != NULL);
1725
1726	(uint32_t )(uintptr_t)&hostna->na_flowadv_max = nxp->nxp_flowadv_max;
1727	ASSERT(hostna->na_flowadv_max == `0` \|\|
1728	skmem_arena_nexus(hostna->na_arena)->arn_flowadv_obj != NULL);
1729
1730	/ these will be undone by destructor /
1731	ifp->if_na_ops = &na_netif_compat_ops;
1732	ifp->if_na = &devnca->nca_up;
1733	na_retain_locked(na: devna);
1734	na_retain_locked(na: hostna);
1735
1736	SKYWALK_SET_CAPABLE(ifp);
1737
1738	NETIF_WLOCK(nif);
1739	nif->nif_ifp = ifp;
1740	retval = nx_port_alloc(nx, NEXUS_PORT_NET_IF_DEV, NULL, &devna, kernproc);
1741	ASSERT(retval == `0`);
1742	retval = nx_port_alloc(nx, NEXUS_PORT_NET_IF_HOST, NULL, &hostna, kernproc);
1743	ASSERT(retval == `0`);
1744	NETIF_WUNLOCK(nif);
1745
1746	#if SK_LOG
1747	uuid_string_t uuidstr;
1748	SK_DF(SK_VERB_NETIF, "na_name: \"%s\"", devna->na_name);
1749	SK_DF(SK_VERB_NETIF, " UUID: %s",
1750	sk_uuid_unparse(devna->na_uuid, uuidstr));
1751	SK_DF(SK_VERB_NETIF, " nx: 0x%llx (\"%s\":\"%s\")",
1752	SK_KVA(devna->na_nx), NX_DOM(devna->na_nx)->nxdom_name,
1753	NX_DOM_PROV(devna->na_nx)->nxdom_prov_name);
1754	SK_DF(SK_VERB_NETIF, " flags: 0x%b", devna->na_flags, NAF_BITS);
1755	SK_DF(SK_VERB_NETIF, " flowadv_max: %u", devna->na_flowadv_max);
1756	SK_DF(SK_VERB_NETIF, " rings: tx %u rx %u",
1757	na_get_nrings(devna, NR_TX), na_get_nrings(devna, NR_RX));
1758	SK_DF(SK_VERB_NETIF, " slots: tx %u rx %u",
1759	na_get_nslots(devna, NR_TX), na_get_nslots(devna, NR_RX));
1760	#if CONFIG_NEXUS_USER_PIPE
1761	SK_DF(SK_VERB_NETIF, " next_pipe: %u", devna->na_next_pipe);
1762	SK_DF(SK_VERB_NETIF, " max_pipes: %u", devna->na_max_pipes);
1763	#endif /* CONFIG_NEXUS_USER_PIPE */
1764	SK_DF(SK_VERB_NETIF, " ifp: 0x%llx %s [ioref %u]",
1765	SK_KVA(ifp), ifp->if_xname, ifp->if_refio);
1766	SK_DF(SK_VERB_NETIF, "hostna: \"%s\"", hostna->na_name);
1767	SK_DF(SK_VERB_NETIF, " UUID: %s",
1768	sk_uuid_unparse(hostna->na_uuid, uuidstr));
1769	SK_DF(SK_VERB_NETIF, " nx: 0x%llx (\"%s\":\"%s\")",
1770	SK_KVA(hostna->na_nx), NX_DOM(hostna->na_nx)->nxdom_name,
1771	NX_DOM_PROV(hostna->na_nx)->nxdom_prov_name);
1772	SK_DF(SK_VERB_NETIF, " flags: 0x%b",
1773	hostna->na_flags, NAF_BITS);
1774	SK_DF(SK_VERB_NETIF, " flowadv_max: %u", hostna->na_flowadv_max);
1775	SK_DF(SK_VERB_NETIF, " rings: tx %u rx %u",
1776	na_get_nrings(hostna, NR_TX), na_get_nrings(hostna, NR_RX));
1777	SK_DF(SK_VERB_NETIF, " slots: tx %u rx %u",
1778	na_get_nslots(hostna, NR_TX), na_get_nslots(hostna, NR_RX));
1779	#if CONFIG_NEXUS_USER_PIPE
1780	SK_DF(SK_VERB_NETIF, " next_pipe: %u", hostna->na_next_pipe);
1781	SK_DF(SK_VERB_NETIF, " max_pipes: %u", hostna->na_max_pipes);
1782	#endif /* CONFIG_NEXUS_USER_PIPE */
1783	SK_DF(SK_VERB_NETIF, " ifp: 0x%llx %s [ioref %u]", SK_KVA(ifp),
1784	ifp->if_xname, ifp->if_refio);
1785	#endif /* SK_LOG */
1786
1787	err:
1788	if (retval != `0`) {
1789	ASSERT(ifp == NULL);
1790	if (devna != NULL) {
1791	if (devna->na_arena != NULL) {
1792	skmem_arena_release(devna->na_arena);
1793	devna->na_arena = NULL;
1794	}
1795	if (devna->na_ifp != NULL) {
1796	ifnet_decr_iorefcnt(devna->na_ifp);
1797	devna->na_ifp = NULL;
1798	}
1799	devna->na_private = NULL;
1800	}
1801	if (hostna != NULL) {
1802	if (hostna->na_arena != NULL) {
1803	skmem_arena_release(hostna->na_arena);
1804	hostna->na_arena = NULL;
1805	}
1806	if (hostna->na_ifp != NULL) {
1807	ifnet_decr_iorefcnt(hostna->na_ifp);
1808	hostna->na_ifp = NULL;
1809	}
1810	hostna->na_private = NULL;
1811	}
1812	if (devnca != NULL) {
1813	if (devnca->nca_up.nifna_netif != NULL) {
1814	nx_netif_release(devnca->nca_up.nifna_netif);
1815	devnca->nca_up.nifna_netif = NULL;
1816	}
1817	na_netif_compat_free(na: (struct nexus_adapter *)devnca);
1818	}
1819	if (hostnca != NULL) {
1820	if (hostnca->nca_up.nifna_netif != NULL) {
1821	nx_netif_release(hostnca->nca_up.nifna_netif);
1822	hostnca->nca_up.nifna_netif = NULL;
1823	}
1824	na_netif_compat_free(na: (struct nexus_adapter *)hostnca);
1825	}
1826	}
1827	return retval;
1828	}
1829
1830	static void
1831	na_netif_compat_finalize(struct nexus_netif_adapter nifna, struct* ifnet *ifp)
1832	{
1833	na_netif_finalize(nifna, ifp);
1834	}
1835
1836	/*
1837	* Intercept the rx routine in the standard device driver.
1838	* Second argument is non-zero to intercept, 0 to restore
1839	*/
1840	static int
1841	nx_netif_compat_catch_rx(struct nexus_netif_compat_adapter *nca,
1842	boolean_t enable)
1843	{
1844	struct ifnet *ifp = nca->nca_up.nifna_up.na_ifp;
1845	int err = `0`;
1846
1847	ASSERT(!(nca->nca_up.nifna_up.na_flags & NAF_HOST_ONLY));
1848
1849	if (enable) {
1850	err = dlil_set_input_handler(ifp, fn: nx_netif_compat_receive);
1851	} else {
1852	dlil_reset_input_handler(ifp);
1853	}
1854	return err;
1855	}
1856
1857	/*
1858	* Transmit routine used by nx_netif_compat_na_txsync(). Returns 0 on success
1859	* and non-zero on error (which may be packet drops or other errors).
1860	* len identifies the channel buffer, m is the (preallocated) mbuf to use
1861	* for transmissions.
1862	*
1863	* We should add a reference to the mbuf so the m_freem() at the end
1864	* of the transmission does not consume resources.
1865	*
1866	* On FreeBSD, and on multiqueue cards, we can force the queue using
1867	* if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1868	* i = m->m_pkthdr.flowid % adapter->num_queues;
1869	* else
1870	* i = curcpu % adapter->num_queues;
1871	*
1872	*/
1873	static int
1874	nx_netif_compat_xmit_frame(struct nexus_adapter na, struct* mbuf *m,
1875	struct __kern_packet *pkt)
1876	{
1877	struct nexus_netif_adapter nifna = (struct* nexus_netif_adapter *)na;
1878	struct nx_netif *nif = nifna->nifna_netif;
1879	struct netif_stats *nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
1880	struct ifnet *ifp = na->na_ifp;
1881	kern_packet_t ph = SK_PTR_ENCODE(pkt, METADATA_TYPE(pkt),
1882	METADATA_SUBTYPE(pkt));
1883	uint32_t len;
1884	int ret = `0`;
1885
1886	if ((ret = mbuf_ring_cluster_activate(mbuf: m)) != `0`) {
1887	panic("Failed to activate mbuf ring cluster 0x%llx (%d)",
1888	SK_KVA(m), ret);
1889	/ NOTREACHED /
1890	__builtin_unreachable();
1891	}
1892
1893	len = pkt->pkt_length;
1894
1895	/*
1896	* The mbuf should be a cluster from our special pool,
1897	* so we do not need to do an m_copyback but just copy.
1898	*/
1899	if (m->m_ext.ext_size < len) {
1900	SK_RD(`5`, "size %u < len %u", m->m_ext.ext_size, len);
1901	len = m->m_ext.ext_size;
1902	}
1903
1904	STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
1905	if (PACKET_HAS_PARTIAL_CHECKSUM(pkt)) {
1906	STATS_INC(nifs, NETIF_STATS_TX_COPY_SUM);
1907	}
1908
1909	nif->nif_pkt_copy_to_mbuf(NR_TX, ph, pkt->pkt_headroom, m, `0`, len,
1910	PACKET_HAS_PARTIAL_CHECKSUM(pkt), pkt->pkt_csum_tx_start_off);
1911
1912	/ used for tx notification /
1913	ret = mbuf_set_tx_compl_data(m, arg: (uintptr_t)ifp, data: (uintptr_t)NULL);
1914	ASSERT(ret == `0`);
1915
1916	ret = dlil_output_handler(ifp, m);
1917	return ret;
1918	}
1919

Browse the source code of xnu/bsd/skywalk/nexus/netif/nx_netif_compat.c