1/*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#define _IP_VHL
29#include <skywalk/os_skywalk_private.h>
30#include <skywalk/nexus/netif/nx_netif.h>
31#include <skywalk/nexus/flowswitch/nx_flowswitch.h>
32#include <net/ethernet.h>
33#include <net/pktap.h>
34#include <sys/kdebug.h>
35#include <sys/sdt.h>
36
37#define DBG_FUNC_NX_NETIF_HOST_ENQUEUE \
38 SKYWALKDBG_CODE(DBG_SKYWALK_NETIF, 2)
39
40static void nx_netif_host_catch_tx(struct nexus_adapter *, bool);
41static inline struct __kern_packet*
42nx_netif_mbuf_to_kpkt(struct nexus_adapter *, struct mbuf *);
43
44#define SK_IFCAP_CSUM (IFCAP_HWCSUM|IFCAP_CSUM_PARTIAL|IFCAP_CSUM_ZERO_INVERT)
45
46static void
47nx_netif_host_adjust_if_capabilities(struct nexus_adapter *na, bool activate)
48{
49 struct nx_netif *nif = ((struct nexus_netif_adapter *)na)->nifna_netif;
50 struct ifnet *ifp = na->na_ifp;
51
52 ifnet_lock_exclusive(ifp);
53
54 if (activate) {
55 /* XXX: adi@apple.com - disable TSO and LRO for now */
56 nif->nif_hwassist = ifp->if_hwassist;
57 nif->nif_capabilities = ifp->if_capabilities;
58 nif->nif_capenable = ifp->if_capenable;
59 ifp->if_hwassist &= ~(IFNET_CHECKSUMF | IFNET_TSOF);
60 ifp->if_capabilities &= ~(SK_IFCAP_CSUM | IFCAP_TSO);
61 ifp->if_capenable &= ~(SK_IFCAP_CSUM | IFCAP_TSO);
62
63 /*
64 * Re-enable the capabilities which Skywalk layer provides:
65 *
66 * Native driver: a copy from packet to mbuf always occurs
67 * for each inbound and outbound packet; if hardware
68 * does not support csum offload, we leverage combined
69 * copy and checksum, and thus advertise IFNET_CSUM_PARTIAL.
70 * We also always enable 16KB jumbo mbuf support.
71 *
72 * Compat driver: inbound and outbound mbufs don't incur a
73 * copy, and so leave the driver advertised flags alone.
74 */
75 if (NA_KERNEL_ONLY(na)) {
76 if (na->na_type == NA_NETIF_HOST) { /* native */
77 ifp->if_hwassist |=
78 IFNET_MULTIPAGES | (nif->nif_hwassist &
79 (IFNET_CHECKSUMF | IFNET_TSOF));
80 ifp->if_capabilities |=
81 (nif->nif_capabilities &
82 (SK_IFCAP_CSUM | IFCAP_TSO));
83 ifp->if_capenable |=
84 (nif->nif_capenable &
85 (SK_IFCAP_CSUM | IFCAP_TSO));
86 /*
87 * If hardware doesn't support IP and TCP/UDP csum offload,
88 * advertise IFNET_CSUM_PARTIAL.
89 */
90 if ((ifp->if_hwassist & IFNET_UDP_TCP_TX_CHECKSUMF) !=
91 IFNET_UDP_TCP_TX_CHECKSUMF) {
92 ifp->if_hwassist |= IFNET_CSUM_PARTIAL | IFNET_CSUM_ZERO_INVERT;
93 ifp->if_capabilities |= IFCAP_CSUM_PARTIAL | IFCAP_CSUM_ZERO_INVERT;
94 ifp->if_capenable |= IFCAP_CSUM_PARTIAL | IFCAP_CSUM_ZERO_INVERT;
95 }
96 if (sk_fsw_tx_agg_tcp != 0) {
97 ifp->if_hwassist |= IFNET_TSOF;
98 ifp->if_capabilities |= IFCAP_TSO;
99 ifp->if_capenable |= IFCAP_TSO;
100 }
101 } else { /* compat */
102 ifp->if_hwassist |=
103 (nif->nif_hwassist &
104 (IFNET_CHECKSUMF | IFNET_TSOF));
105 ifp->if_capabilities |=
106 (nif->nif_capabilities &
107 (SK_IFCAP_CSUM | IFCAP_TSO));
108 ifp->if_capenable |=
109 (nif->nif_capenable &
110 (SK_IFCAP_CSUM | IFCAP_TSO));
111 }
112 }
113 } else {
114 /* Unset any capabilities previously set by Skywalk */
115 ifp->if_hwassist &= ~(IFNET_CHECKSUMF | IFNET_MULTIPAGES);
116 ifp->if_capabilities &= ~SK_IFCAP_CSUM;
117 ifp->if_capenable &= ~SK_IFCAP_CSUM;
118 if ((sk_fsw_tx_agg_tcp != 0) &&
119 (na->na_type == NA_NETIF_HOST)) {
120 ifp->if_hwassist &= ~IFNET_TSOF;
121 ifp->if_capabilities &= ~IFCAP_TSO;
122 ifp->if_capenable &= ~IFCAP_TSO;
123 }
124 /* Restore driver original flags */
125 ifp->if_hwassist |= (nif->nif_hwassist &
126 (IFNET_CHECKSUMF | IFNET_TSOF | IFNET_MULTIPAGES));
127 ifp->if_capabilities |=
128 (nif->nif_capabilities & (SK_IFCAP_CSUM | IFCAP_TSO));
129 ifp->if_capenable |=
130 (nif->nif_capenable & (SK_IFCAP_CSUM | IFCAP_TSO));
131 }
132
133 ifnet_lock_done(ifp);
134}
135
136static bool
137nx_netif_host_is_gso_needed(struct nexus_adapter *na)
138{
139 struct nx_netif *nif = ((struct nexus_netif_adapter *)na)->nifna_netif;
140
141 /*
142 * Don't enable for Compat netif.
143 */
144 if (na->na_type != NA_NETIF_HOST) {
145 return false;
146 }
147 /*
148 * Don't enable if netif is not plumbed under a flowswitch.
149 */
150 if (!NA_KERNEL_ONLY(na)) {
151 return false;
152 }
153 /*
154 * Don't enable If HW TSO is enabled.
155 */
156 if (((nif->nif_hwassist & IFNET_TSO_IPV4) != 0) ||
157 ((nif->nif_hwassist & IFNET_TSO_IPV6) != 0)) {
158 return false;
159 }
160 /*
161 * Don't enable if TX aggregation is disabled.
162 */
163 if (sk_fsw_tx_agg_tcp == 0) {
164 return false;
165 }
166 return true;
167}
168
169int
170nx_netif_host_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
171{
172 struct ifnet *ifp = na->na_ifp;
173 int error = 0;
174
175 ASSERT(na->na_type == NA_NETIF_HOST ||
176 na->na_type == NA_NETIF_COMPAT_HOST);
177 ASSERT(na->na_flags & NAF_HOST_ONLY);
178
179 SK_DF(SK_VERB_NETIF, "na \"%s\" (0x%llx) %s", na->na_name,
180 SK_KVA(na), na_activate_mode2str(mode));
181
182 switch (mode) {
183 case NA_ACTIVATE_MODE_ON:
184 VERIFY(SKYWALK_CAPABLE(ifp));
185
186 nx_netif_host_adjust_if_capabilities(na, true);
187 /*
188 * Make skywalk control the packet steering
189 * Don't intercept tx packets if this is a netif compat
190 * adapter attached to a flowswitch
191 */
192 nx_netif_host_catch_tx(na, true);
193
194 os_atomic_or(&na->na_flags, NAF_ACTIVE, relaxed);
195 break;
196
197 case NA_ACTIVATE_MODE_DEFUNCT:
198 VERIFY(SKYWALK_CAPABLE(ifp));
199 break;
200
201 case NA_ACTIVATE_MODE_OFF:
202 /* Release packet steering control. */
203 nx_netif_host_catch_tx(na, false);
204
205 /*
206 * Note that here we cannot assert SKYWALK_CAPABLE()
207 * as we're called in the destructor path.
208 */
209 os_atomic_andnot(&na->na_flags, NAF_ACTIVE, relaxed);
210
211 nx_netif_host_adjust_if_capabilities(na, false);
212 break;
213
214 default:
215 VERIFY(0);
216 /* NOTREACHED */
217 __builtin_unreachable();
218 }
219
220 return error;
221}
222
223/* na_krings_create callback for netif host adapters */
224int
225nx_netif_host_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
226{
227 int ret;
228
229 SK_LOCK_ASSERT_HELD();
230 ASSERT(na->na_type == NA_NETIF_HOST ||
231 na->na_type == NA_NETIF_COMPAT_HOST);
232 ASSERT(na->na_flags & NAF_HOST_ONLY);
233
234 ret = na_rings_mem_setup(na, FALSE, ch);
235 if (ret == 0) {
236 struct __kern_channel_ring *kring;
237 uint32_t i;
238
239 /* drop by default until fully bound */
240 if (NA_KERNEL_ONLY(na)) {
241 na_kr_drop(na, TRUE);
242 }
243
244 for (i = 0; i < na_get_nrings(na, t: NR_RX); i++) {
245 kring = &NAKR(na, t: NR_RX)[i];
246 /* initialize the nx_mbq for the sw rx ring */
247 nx_mbq_safe_init(kr: kring, q: &kring->ckr_rx_queue,
248 NX_MBQ_NO_LIMIT, lck_grp: &nexus_mbq_lock_group,
249 lck_attr: &nexus_lock_attr);
250 SK_DF(SK_VERB_NETIF,
251 "na \"%s\" (0x%llx) initialized host kr \"%s\" "
252 "(0x%llx) krflags 0x%b", na->na_name, SK_KVA(na),
253 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
254 CKRF_BITS);
255 }
256 }
257 return ret;
258}
259
260/*
261 * Destructor for netif host adapters; they also have an mbuf queue
262 * on the rings connected to the host so we need to purge them first.
263 */
264void
265nx_netif_host_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
266 boolean_t defunct)
267{
268 struct __kern_channel_ring *kring;
269 uint32_t i;
270
271 SK_LOCK_ASSERT_HELD();
272 ASSERT(na->na_type == NA_NETIF_HOST ||
273 na->na_type == NA_NETIF_COMPAT_HOST);
274 ASSERT(na->na_flags & NAF_HOST_ONLY);
275
276 if (NA_KERNEL_ONLY(na)) {
277 na_kr_drop(na, TRUE);
278 }
279
280 for (i = 0; i < na_get_nrings(na, t: NR_RX); i++) {
281 struct nx_mbq *q;
282
283 kring = &NAKR(na, t: NR_RX)[i];
284 q = &kring->ckr_rx_queue;
285 SK_DF(SK_VERB_NETIF,
286 "na \"%s\" (0x%llx) destroy host kr \"%s\" (0x%llx) "
287 "krflags 0x%b with qlen %u", na->na_name, SK_KVA(na),
288 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
289 CKRF_BITS, nx_mbq_len(q));
290 nx_mbq_purge(q);
291 if (!defunct) {
292 nx_mbq_safe_destroy(q);
293 }
294 }
295
296 na_rings_mem_teardown(na, ch, defunct);
297}
298
299/* kring->ckr_na_sync callback for the host rx ring */
300int
301nx_netif_host_na_rxsync(struct __kern_channel_ring *kring,
302 struct proc *p, uint32_t flags)
303{
304#pragma unused(kring, p, flags)
305 return 0;
306}
307
308/*
309 * kring->ckr_na_sync callback for the host tx ring.
310 */
311int
312nx_netif_host_na_txsync(struct __kern_channel_ring *kring, struct proc *p,
313 uint32_t flags)
314{
315#pragma unused(kring, p, flags)
316 return 0;
317}
318
319int
320nx_netif_host_na_special(struct nexus_adapter *na, struct kern_channel *ch,
321 struct chreq *chr, nxspec_cmd_t spec_cmd)
322{
323 ASSERT(na->na_type == NA_NETIF_HOST ||
324 na->na_type == NA_NETIF_COMPAT_HOST);
325 return nx_netif_na_special_common(na, ch, chr, spec_cmd);
326}
327
328/*
329 * Intercept the packet steering routine in the tx path,
330 * so that we can decide which queue is used for an mbuf.
331 * Second argument is TRUE to intercept, FALSE to restore.
332 */
333static void
334nx_netif_host_catch_tx(struct nexus_adapter *na, bool activate)
335{
336 struct ifnet *ifp = na->na_ifp;
337 int err = 0;
338
339 ASSERT(na->na_type == NA_NETIF_HOST ||
340 na->na_type == NA_NETIF_COMPAT_HOST);
341 ASSERT(na->na_flags & NAF_HOST_ONLY);
342
343 /*
344 * Common case is NA_KERNEL_ONLY: if the netif is plumbed
345 * below the flowswitch. For TXSTART compat driver and legacy:
346 * don't intercept DLIL output handler, since in this model
347 * packets from both BSD stack and flowswitch are directly
348 * enqueued to the classq via ifnet_enqueue().
349 *
350 * Otherwise, it's the uncommon case where a user channel is
351 * opened directly to the netif. Here we either intercept
352 * or restore the DLIL output handler.
353 */
354 if (activate) {
355 if (__improbable(!NA_KERNEL_ONLY(na))) {
356 return;
357 }
358 /*
359 * For native drivers only, intercept if_output();
360 * for compat, leave it alone since we don't need
361 * to perform any mbuf-pkt conversion.
362 */
363 if (na->na_type == NA_NETIF_HOST) {
364 err = ifnet_set_output_handler(ifp,
365 nx_netif_host_is_gso_needed(na) ?
366 netif_gso_dispatch : nx_netif_host_output);
367 VERIFY(err == 0);
368 }
369 } else {
370 if (__improbable(!NA_KERNEL_ONLY(na))) {
371 return;
372 }
373 /*
374 * Restore original if_output() for native drivers.
375 */
376 if (na->na_type == NA_NETIF_HOST) {
377 ifnet_reset_output_handler(ifp);
378 }
379 }
380}
381
382static int
383get_af_from_mbuf(struct mbuf *m)
384{
385 uint8_t *pkt_hdr;
386 uint8_t ipv;
387 struct mbuf *m0;
388 int af;
389
390 pkt_hdr = m->m_pkthdr.pkt_hdr;
391 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
392 if (pkt_hdr >= (uint8_t *)m0->m_data &&
393 pkt_hdr < (uint8_t *)m0->m_data + m0->m_len) {
394 break;
395 }
396 }
397 if (m0 == NULL) {
398 DTRACE_SKYWALK1(bad__pkthdr, struct mbuf *, m);
399 af = AF_UNSPEC;
400 goto done;
401 }
402 ipv = IP_VHL_V(*pkt_hdr);
403 if (ipv == 4) {
404 af = AF_INET;
405 } else if (ipv == 6) {
406 af = AF_INET6;
407 } else {
408 af = AF_UNSPEC;
409 }
410done:
411 DTRACE_SKYWALK2(mbuf__af, int, af, struct mbuf *, m);
412 return af;
413}
414
415/*
416 * if_output() callback called by dlil_output() to handle mbufs coming out
417 * of the host networking stack. The mbuf will get converted to a packet,
418 * and enqueued to the classq of a Skywalk native interface.
419 */
420int
421nx_netif_host_output(struct ifnet *ifp, struct mbuf *m)
422{
423 struct nx_netif *nif = NA(ifp)->nifna_netif;
424 struct kern_nexus *nx = nif->nif_nx;
425 struct nexus_adapter *hwna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
426 struct nexus_adapter *hostna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_HOST);
427 struct __kern_channel_ring *kring;
428 uint32_t sc_idx = MBUF_SCIDX(m_get_service_class(m));
429 struct netif_stats *nifs = &NX_NETIF_PRIVATE(hwna->na_nx)->nif_stats;
430 struct __kern_packet *kpkt;
431 uint64_t qset_id;
432 errno_t error = ENOBUFS;
433 boolean_t pkt_drop = FALSE;
434
435 /*
436 * nx_netif_host_catch_tx() must only be steering the output
437 * packets here only for native interfaces, otherwise we must
438 * not get here for compat.
439 */
440 ASSERT(ifp->if_eflags & IFEF_SKYWALK_NATIVE);
441 ASSERT(m->m_nextpkt == NULL);
442 ASSERT(hostna->na_type == NA_NETIF_HOST);
443 ASSERT(sc_idx < KPKT_SC_MAX_CLASSES);
444
445 kring = &hwna->na_tx_rings[hwna->na_kring_svc_lut[sc_idx]];
446 KDBG((SK_KTRACE_NETIF_HOST_ENQUEUE | DBG_FUNC_START), SK_KVA(kring));
447 if (__improbable(!NA_IS_ACTIVE(hwna) || !NA_IS_ACTIVE(hostna))) {
448 STATS_INC(nifs, NETIF_STATS_DROP_NA_INACTIVE);
449 SK_ERR("\"%s\" (0x%llx) not in skywalk mode anymore",
450 hwna->na_name, SK_KVA(hwna));
451 error = ENXIO;
452 pkt_drop = TRUE;
453 goto done;
454 }
455 /*
456 * Drop if the kring no longer accepts packets.
457 */
458 if (__improbable(KR_DROP(&hostna->na_rx_rings[0]) || KR_DROP(kring))) {
459 STATS_INC(nifs, NETIF_STATS_DROP_KRDROP_MODE);
460 /* not a serious error, so no need to be chatty here */
461 SK_DF(SK_VERB_NETIF,
462 "kr \"%s\" (0x%llx) krflags 0x%b or %s in drop mode",
463 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
464 CKRF_BITS, ifp->if_xname);
465 error = ENXIO;
466 pkt_drop = TRUE;
467 goto done;
468 }
469 if (__improbable(((unsigned)m_pktlen(m) + ifp->if_tx_headroom) >
470 kring->ckr_max_pkt_len)) { /* too long for us */
471 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
472 SK_ERR("\"%s\" (0x%llx) from_host, drop packet size %u > %u",
473 hwna->na_name, SK_KVA(hwna), m_pktlen(m),
474 kring->ckr_max_pkt_len);
475 pkt_drop = TRUE;
476 goto done;
477 }
478 /*
479 * Convert mbuf to packet and enqueue it.
480 */
481 kpkt = nx_netif_mbuf_to_kpkt(hwna, m);
482 if (__probable(kpkt != NULL)) {
483 if ((m->m_pkthdr.pkt_flags & PKTF_SKIP_PKTAP) == 0 &&
484 pktap_total_tap_count != 0) {
485 int af = get_af_from_mbuf(m);
486
487 if (af != AF_UNSPEC) {
488 nx_netif_pktap_output(ifp, af, kpkt);
489 }
490 }
491 if (NX_LLINK_PROV(nif->nif_nx) &&
492 ifp->if_traffic_rule_count > 0 &&
493 nxctl_inet_traffic_rule_find_qset_id_with_pkt(ifp->if_xname,
494 kpkt, &qset_id) == 0) {
495 struct netif_qset *qset;
496
497 /*
498 * This always returns a qset because if the qset id
499 * is invalid the default qset is returned.
500 */
501 qset = nx_netif_find_qset(nif, qset_id);
502 ASSERT(qset != NULL);
503 kpkt->pkt_qset_idx = qset->nqs_idx;
504 error = ifnet_enqueue_ifcq_pkt(ifp, qset->nqs_ifcq, kpkt,
505 false, &pkt_drop);
506 nx_netif_qset_release(&qset);
507 } else {
508 /* callee consumes packet */
509 error = ifnet_enqueue_pkt(ifp, kpkt, false, &pkt_drop);
510 }
511 netif_transmit(ifp, NETIF_XMIT_FLAG_HOST);
512 if (pkt_drop) {
513 STATS_INC(nifs, NETIF_STATS_TX_DROP_ENQ_AQM);
514 }
515 } else {
516 error = ENOBUFS;
517 pkt_drop = TRUE;
518 }
519done:
520 /* always free mbuf (even in the success case) */
521 m_freem(m);
522 if (__improbable(pkt_drop)) {
523 STATS_INC(nifs, NETIF_STATS_DROP);
524 }
525
526 KDBG((SK_KTRACE_NETIF_HOST_ENQUEUE | DBG_FUNC_END), SK_KVA(kring),
527 error);
528
529 return error;
530}
531
532static inline int
533get_l2_hlen(struct mbuf *m, uint8_t *l2len)
534{
535 char *pkt_hdr;
536 struct mbuf *m0;
537 uint64_t len = 0;
538
539 pkt_hdr = m->m_pkthdr.pkt_hdr;
540 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
541 if (pkt_hdr >= m_mtod_current(m: m0) && pkt_hdr < m_mtod_current(m: m0) + m0->m_len) {
542 break;
543 }
544 len += m0->m_len;
545 }
546 if (m0 == NULL) {
547 DTRACE_SKYWALK2(bad__pkthdr, struct mbuf *, m, char *, pkt_hdr);
548 return EINVAL;
549 }
550 len += (pkt_hdr - m_mtod_current(m: m0));
551 if (len > UINT8_MAX) {
552 DTRACE_SKYWALK2(bad__l2len, struct mbuf *, m, uint64_t, len);
553 return EINVAL;
554 }
555 *l2len = (uint8_t)len;
556 return 0;
557}
558
559#if SK_LOG
560/* Hoisted out of line to reduce kernel stack footprint */
561SK_LOG_ATTRIBUTE
562static void
563nx_netif_mbuf_to_kpkt_log(struct __kern_packet *kpkt, uint32_t len,
564 uint32_t poff)
565{
566 uint8_t *baddr;
567 MD_BUFLET_ADDR_ABS(kpkt, baddr);
568 SK_DF(SK_VERB_HOST | SK_VERB_TX, "mlen %u dplen %u"
569 " hr %u l2 %u poff %u", len, kpkt->pkt_length,
570 kpkt->pkt_headroom, kpkt->pkt_l2_len, poff);
571 SK_DF(SK_VERB_HOST | SK_VERB_TX | SK_VERB_DUMP, "%s",
572 sk_dump("buf", baddr, kpkt->pkt_length, 128, NULL, 0));
573}
574#endif /* SK_LOG */
575
576static inline struct __kern_packet *
577nx_netif_mbuf_to_kpkt(struct nexus_adapter *na, struct mbuf *m)
578{
579 struct netif_stats *nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
580 struct nexus_netif_adapter *nifna = NIFNA(na);
581 struct nx_netif *nif = nifna->nifna_netif;
582 uint16_t poff = na->na_ifp->if_tx_headroom;
583 uint32_t len;
584 struct kern_pbufpool *pp;
585 struct __kern_packet *kpkt;
586 kern_packet_t ph;
587 boolean_t copysum;
588 uint8_t l2hlen;
589 int err;
590
591 pp = skmem_arena_nexus(ar: na->na_arena)->arn_tx_pp;
592 ASSERT((pp != NULL) && (pp->pp_md_type == NEXUS_META_TYPE_PACKET) &&
593 (pp->pp_md_subtype == NEXUS_META_SUBTYPE_RAW));
594 ASSERT(!PP_HAS_TRUNCATED_BUF(pp));
595
596 len = m_pktlen(m);
597 VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pp) * pp->pp_max_frags));
598
599 /* alloc packet */
600 ph = pp_alloc_packet_by_size(pp, poff + len, SKMEM_NOSLEEP);
601 if (__improbable(ph == 0)) {
602 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
603 SK_DF(SK_VERB_MEM,
604 "%s(%d) pp \"%s\" (0x%llx) has no more "
605 "packet for %s", sk_proc_name_address(current_proc()),
606 sk_proc_pid(current_proc()), pp->pp_name, SK_KVA(pp),
607 if_name(na->na_ifp));
608 return NULL;
609 }
610
611 copysum = ((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID |
612 CSUM_PARTIAL)) == (CSUM_DATA_VALID | CSUM_PARTIAL));
613
614 STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
615 if (copysum) {
616 STATS_INC(nifs, NETIF_STATS_TX_COPY_SUM);
617 }
618
619 kpkt = SK_PTR_ADDR_KPKT(ph);
620 kpkt->pkt_link_flags = 0;
621 nif->nif_pkt_copy_from_mbuf(NR_TX, ph, poff, m, 0, len,
622 copysum, m->m_pkthdr.csum_tx_start);
623
624 kpkt->pkt_headroom = (uint8_t)poff;
625 if ((err = get_l2_hlen(m, l2len: &l2hlen)) == 0) {
626 kpkt->pkt_l2_len = l2hlen;
627 } else {
628 kpkt->pkt_l2_len = 0;
629 }
630 /* finalize the packet */
631 METADATA_ADJUST_LEN(kpkt, 0, poff);
632 err = __packet_finalize(ph);
633 VERIFY(err == 0);
634
635#if SK_LOG
636 if (__improbable((sk_verbose & SK_VERB_HOST) != 0) && kpkt != NULL) {
637 nx_netif_mbuf_to_kpkt_log(kpkt, len, poff);
638 }
639#endif /* SK_LOG */
640
641 return kpkt;
642}
643