1/*
2 * Copyright (c) 2020-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 */
54
55#include <sys/param.h>
56#include <sys/kernel.h>
57#include <sys/types.h>
58#include <sys/systm.h>
59#include <sys/mbuf.h>
60#include <sys/socket.h>
61#include <sys/sysctl.h>
62#include <sys/malloc.h>
63
64#include <netinet/in.h>
65#include <netinet/ip_var.h>
66#include <netinet/ip.h>
67#include <netinet/tcp.h>
68#include <netinet/tcpip.h>
69#include <netinet/ip6.h>
70#include <netinet6/ip6_var.h>
71
72#include <net/if.h>
73#include <net/if_var.h>
74#include <net/ethernet.h>
75#include <net/pktap.h>
76#include <skywalk/os_skywalk_private.h>
77#include <skywalk/nexus/netif/nx_netif.h>
78
79#define CSUM_GSO_MASK 0x00300000
80#define CSUM_GSO_OFFSET 20
81#define CSUM_TO_GSO(x) ((x & CSUM_GSO_MASK) >> CSUM_GSO_OFFSET)
82
83enum netif_gso_type {
84 GSO_NONE,
85 GSO_TCP4,
86 GSO_TCP6,
87 GSO_END_OF_TYPE
88};
89
90uint32_t netif_chain_enqueue = 1;
91#if (DEVELOPMENT || DEBUG)
92SYSCTL_UINT(_kern_skywalk_netif, OID_AUTO, chain_enqueue,
93 CTLFLAG_RW | CTLFLAG_LOCKED, &netif_chain_enqueue, 0,
94 "netif chain enqueue");
95#endif /* (DEVELOPMENT || DEBUG) */
96
97/*
98 * Array of function pointers that execute GSO depending on packet type
99 */
100int (*netif_gso_functions[GSO_END_OF_TYPE]) (struct ifnet*, struct mbuf*);
101
102/*
103 * Structure that contains the state during the TCP segmentation
104 */
105struct netif_gso_ip_tcp_state {
106 void (*update)(struct netif_gso_ip_tcp_state*,
107 struct __kern_packet *pkt, uint8_t *baddr);
108 void (*internal)(struct netif_gso_ip_tcp_state*, uint32_t partial,
109 uint16_t payload_len, uint32_t *csum_flags);
110 union {
111 struct ip *ip;
112 struct ip6_hdr *ip6;
113 } hdr;
114 int af;
115 struct tcphdr *tcp;
116 struct kern_pbufpool *pp;
117 uint32_t psuedo_hdr_csum;
118 uint32_t tcp_seq;
119 uint16_t hlen;
120 uint16_t mss;
121 uint16_t ip_id;
122 uint8_t mac_hlen;
123 uint8_t ip_hlen;
124 uint8_t tcp_hlen;
125 boolean_t copy_data_sum;
126};
127
128static inline uint8_t
129netif_gso_get_frame_header_len(struct mbuf *m, uint8_t *hlen)
130{
131 uint64_t len;
132 char *ph = m->m_pkthdr.pkt_hdr;
133
134 if (__improbable(m_pktlen(m) == 0 || ph == NULL ||
135 ph < (char *)m->m_data)) {
136 return ERANGE;
137 }
138 len = (ph - m_mtod_current(m));
139 if (__improbable(len > UINT8_MAX)) {
140 return ERANGE;
141 }
142 *hlen = (uint8_t)len;
143 return 0;
144}
145
146static inline int
147netif_gso_check_netif_active(struct ifnet *ifp, struct mbuf *m,
148 struct kern_pbufpool **pp)
149{
150 struct __kern_channel_ring *kring;
151 struct nx_netif *nif = NA(ifp)->nifna_netif;
152 struct netif_stats *nifs = &nif->nif_stats;
153 struct kern_nexus *nx = nif->nif_nx;
154 struct nexus_adapter *hwna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
155 uint32_t sc_idx = MBUF_SCIDX(m_get_service_class(m));
156
157 if (__improbable(!NA_IS_ACTIVE(hwna))) {
158 STATS_INC(nifs, NETIF_STATS_DROP_NA_INACTIVE);
159 SK_DF(SK_VERB_NETIF,
160 "\"%s\" (0x%llx) not in skywalk mode anymore",
161 hwna->na_name, SK_KVA(hwna));
162 return ENXIO;
163 }
164
165 VERIFY(sc_idx < KPKT_SC_MAX_CLASSES);
166 kring = &hwna->na_tx_rings[hwna->na_kring_svc_lut[sc_idx]];
167 if (__improbable(KR_DROP(kring))) {
168 STATS_INC(nifs, NETIF_STATS_DROP_KRDROP_MODE);
169 SK_DF(SK_VERB_NETIF,
170 "kr \"%s\" (0x%llx) krflags 0x%b or %s in drop mode",
171 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
172 CKRF_BITS, ifp->if_xname);
173 return ENXIO;
174 }
175 *pp = kring->ckr_pp;
176 return 0;
177}
178
179static inline boolean_t
180netif_chain_enqueue_enabled(struct ifnet *ifp)
181{
182 return netif_chain_enqueue != 0 && ifp->if_output_netem == NULL &&
183 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) == 0;
184}
185
186static inline int
187netif_gso_send(struct ifnet *ifp, struct __kern_packet *head,
188 struct __kern_packet *tail, uint32_t count, uint32_t bytes)
189{
190 struct nx_netif *nif = NA(ifp)->nifna_netif;
191 struct netif_stats *nifs = &nif->nif_stats;
192 struct netif_qset *qset = NULL;
193 uint64_t qset_id = 0;
194 int error = 0;
195 boolean_t dropped;
196
197 if (NX_LLINK_PROV(nif->nif_nx) &&
198 ifp->if_traffic_rule_count > 0 &&
199 nxctl_inet_traffic_rule_find_qset_id_with_pkt(ifp->if_xname,
200 head, &qset_id) == 0) {
201 qset = nx_netif_find_qset(nif, qset_id);
202 ASSERT(qset != NULL);
203 }
204 if (netif_chain_enqueue_enabled(ifp)) {
205 dropped = false;
206 if (qset != NULL) {
207 head->pkt_qset_idx = qset->nqs_idx;
208 error = ifnet_enqueue_ifcq_pkt_chain(ifp, qset->nqs_ifcq,
209 head, tail, count, bytes, false, &dropped);
210 } else {
211 error = ifnet_enqueue_pkt_chain(ifp, head, tail,
212 count, bytes, false, &dropped);
213 }
214 if (__improbable(dropped)) {
215 STATS_ADD(nifs, NETIF_STATS_TX_DROP_ENQ_AQM, count);
216 STATS_ADD(nifs, NETIF_STATS_DROP, count);
217 }
218 } else {
219 struct __kern_packet *pkt = head, *next;
220 uint32_t c = 0, b = 0;
221
222 while (pkt != NULL) {
223 int err;
224
225 next = pkt->pkt_nextpkt;
226 pkt->pkt_nextpkt = NULL;
227 c++;
228 b += pkt->pkt_length;
229
230 dropped = false;
231 if (qset != NULL) {
232 pkt->pkt_qset_idx = qset->nqs_idx;
233 err = ifnet_enqueue_ifcq_pkt(ifp, qset->nqs_ifcq,
234 pkt, false, &dropped);
235 } else {
236 err = ifnet_enqueue_pkt(ifp, pkt, false, &dropped);
237 }
238 if (error == 0 && __improbable(err != 0)) {
239 error = err;
240 }
241 if (__improbable(dropped)) {
242 STATS_INC(nifs, NETIF_STATS_TX_DROP_ENQ_AQM);
243 STATS_INC(nifs, NETIF_STATS_DROP);
244 }
245 pkt = next;
246 }
247 ASSERT(c == count);
248 ASSERT(b == bytes);
249 }
250 if (qset != NULL) {
251 nx_netif_qset_release(&qset);
252 }
253 netif_transmit(ifp, NETIF_XMIT_FLAG_HOST);
254 return error;
255}
256
257/*
258 * Segment and transmit a queue of packets which fit the given mss + hdr_len.
259 * m points to mbuf chain to be segmented.
260 * This function splits the payload (m-> m_pkthdr.len - hdr_len)
261 * into segments of length MSS bytes and then copy the first hdr_len bytes
262 * from m at the top of each segment.
263 */
264static inline int
265netif_gso_tcp_segment_mbuf(struct mbuf *m, struct ifnet *ifp,
266 struct netif_gso_ip_tcp_state *state, struct kern_pbufpool *pp)
267{
268 uuid_t euuid;
269 struct pktq pktq_alloc, pktq_seg;
270 uint64_t timestamp = 0;
271 uint64_t pflags;
272 int error = 0;
273 uint32_t policy_id;
274 uint32_t skip_policy_id;
275 uint32_t svc_class;
276 uint32_t n, n_pkts, n_bytes;
277 int32_t off = 0, total_len = m->m_pkthdr.len;
278 uint8_t tx_headroom = (uint8_t)ifp->if_tx_headroom;
279 struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
280 struct __kern_packet *pkt_chain_head, *pkt_chain_tail;
281 uint16_t mss = state->mss;
282 bool skip_pktap;
283
284 VERIFY(total_len > state->hlen);
285 VERIFY(((tx_headroom + state->mac_hlen) & 0x1) == 0);
286 VERIFY((tx_headroom + state->hlen + mss) <= PP_BUF_SIZE_DEF(pp));
287
288 KPKTQ_INIT(&pktq_alloc);
289 KPKTQ_INIT(&pktq_seg);
290 /* batch allocate enough packets */
291 n_pkts = (uint32_t)(SK_ROUNDUP((total_len - state->hlen), mss) / mss);
292 error = pp_alloc_pktq(pp, 1, &pktq_alloc, n_pkts, NULL,
293 NULL, SKMEM_NOSLEEP);
294 if (__improbable(error != 0)) {
295 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
296 SK_ERR("failed to alloc %u pkts", n_pkts);
297 pp_free_pktq(&pktq_alloc);
298 error = ENOBUFS;
299 goto done;
300 }
301
302 ASSERT(m->m_pkthdr.pkt_proto == IPPROTO_TCP);
303 ASSERT((m->m_flags & M_BCAST) == 0);
304 ASSERT((m->m_flags & M_MCAST) == 0);
305 ASSERT(((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) == 0));
306 pflags = m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK;
307 pflags |= PKTF_START_SEQ;
308 (void) mbuf_get_timestamp(mbuf: m, ts: &timestamp, NULL);
309 necp_get_app_uuid_from_packet(packet: m, app_uuid: euuid);
310 policy_id = necp_get_policy_id_from_packet(packet: m);
311 skip_policy_id = necp_get_skip_policy_id_from_packet(packet: m);
312 svc_class = m_get_service_class(m);
313 skip_pktap = (m->m_pkthdr.pkt_flags & PKTF_SKIP_PKTAP) != 0 ||
314 pktap_total_tap_count == 0;
315
316 for (n = 1, off = state->hlen; off < total_len; off += mss, n++) {
317 uint8_t *baddr, *baddr0;
318 uint32_t partial = 0;
319 struct __kern_packet *pkt;
320
321 KPKTQ_DEQUEUE(&pktq_alloc, pkt);
322 ASSERT(pkt != NULL);
323
324 /* get buffer address from packet */
325 MD_BUFLET_ADDR_ABS(pkt, baddr0);
326 baddr = baddr0;
327 baddr += tx_headroom;
328
329 /*
330 * Copy the link-layer, IP and TCP header from the
331 * original packet.
332 */
333 m_copydata(m, 0, state->hlen, baddr);
334 baddr += state->hlen;
335
336 /*
337 * Copy the payload from original packet and
338 * compute partial checksum on the payload.
339 */
340 if (off + mss > total_len) {
341 /* if last segment is less than mss */
342 mss = (uint16_t)(total_len - off);
343 }
344 if (state->copy_data_sum) {
345 partial = m_copydata_sum(m, off, len: mss, vp: baddr, initial_sum: 0, NULL);
346 } else {
347 m_copydata(m, off, mss, baddr);
348 }
349
350 /*
351 * update packet metadata
352 */
353 pkt->pkt_headroom = tx_headroom;
354 pkt->pkt_l2_len = state->mac_hlen;
355 pkt->pkt_link_flags = 0;
356 pkt->pkt_csum_flags = 0;
357 pkt->pkt_csum_tx_start_off = 0;
358 pkt->pkt_csum_tx_stuff_off = 0;
359 uuid_copy(dst: pkt->pkt_policy_euuid, src: euuid);
360 pkt->pkt_policy_id = policy_id;
361 pkt->pkt_skip_policy_id = skip_policy_id;
362 pkt->pkt_timestamp = timestamp;
363 pkt->pkt_svc_class = svc_class;
364 pkt->pkt_pflags |= pflags;
365 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
366 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
367 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
368 pkt->pkt_flow_ip_proto = IPPROTO_TCP;
369 pkt->pkt_transport_protocol = IPPROTO_TCP;
370 pkt->pkt_flow_tcp_seq = htonl(state->tcp_seq);
371
372 state->update(state, pkt, baddr0);
373 /*
374 * FIN or PUSH flags if present will be set only on the last
375 * segment.
376 */
377 if (n != n_pkts) {
378 state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
379 }
380 /*
381 * CWR flag if present is set only on the first segment
382 * and cleared on the subsequent segments.
383 */
384 if (n != 1) {
385 state->tcp->th_flags &= ~TH_CWR;
386 state->tcp->th_seq = htonl(state->tcp_seq);
387 }
388 ASSERT(state->tcp->th_seq == pkt->pkt_flow_tcp_seq);
389 state->internal(state, partial, mss, &pkt->pkt_csum_flags);
390 METADATA_ADJUST_LEN(pkt, state->hlen + mss, tx_headroom);
391 VERIFY(__packet_finalize(SK_PKT2PH(pkt)) == 0);
392 KPKTQ_ENQUEUE(&pktq_seg, pkt);
393 if (!skip_pktap) {
394 nx_netif_pktap_output(ifp, state->af, pkt);
395 }
396 }
397 ASSERT(off == total_len);
398 STATS_ADD(nifs, NETIF_STATS_GSO_SEG, n_pkts);
399
400 /* ifnet_enqueue_pkt_chain() consumes the packet chain */
401 pkt_chain_head = KPKTQ_FIRST(&pktq_seg);
402 pkt_chain_tail = KPKTQ_LAST(&pktq_seg);
403 KPKTQ_INIT(&pktq_seg);
404 n_bytes = total_len + (state->hlen * (n_pkts - 1));
405
406 error = netif_gso_send(ifp, head: pkt_chain_head, tail: pkt_chain_tail,
407 count: n_pkts, bytes: n_bytes);
408
409done:
410 KPKTQ_FINI(&pktq_alloc);
411 return error;
412}
413
414/*
415 * Update the pointers to TCP and IPv4 headers
416 */
417static void
418netif_gso_ipv4_tcp_update(struct netif_gso_ip_tcp_state *state,
419 struct __kern_packet *pkt, uint8_t *baddr)
420{
421 state->hdr.ip = (struct ip *)(void *)(baddr + pkt->pkt_headroom +
422 pkt->pkt_l2_len);
423 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip) +
424 state->ip_hlen);
425}
426
427/*
428 * Finalize the TCP and IPv4 headers
429 */
430static void
431netif_gso_ipv4_tcp_internal(struct netif_gso_ip_tcp_state *state,
432 uint32_t partial, uint16_t payload_len, uint32_t *csum_flags __unused)
433{
434 /*
435 * Update IP header
436 */
437 state->hdr.ip->ip_id = htons((state->ip_id)++);
438 state->hdr.ip->ip_len = htons(state->ip_hlen + state->tcp_hlen +
439 payload_len);
440 /*
441 * IP header checksum
442 */
443 state->hdr.ip->ip_sum = 0;
444 state->hdr.ip->ip_sum = inet_cksum_buffer(state->hdr.ip, 0, 0,
445 len: state->ip_hlen);
446 /*
447 * TCP Checksum
448 */
449 state->tcp->th_sum = 0;
450 partial = __packet_cksum(data: state->tcp, len: state->tcp_hlen, sum0: partial);
451 partial += htons(state->tcp_hlen + IPPROTO_TCP + payload_len);
452 partial += state->psuedo_hdr_csum;
453 ADDCARRY(partial);
454 state->tcp->th_sum = ~(uint16_t)partial;
455 /*
456 * Update tcp sequence number in gso state
457 */
458 state->tcp_seq += payload_len;
459}
460
461static void
462netif_gso_ipv4_tcp_internal_nosum(struct netif_gso_ip_tcp_state *state,
463 uint32_t partial __unused, uint16_t payload_len __unused,
464 uint32_t *csum_flags)
465{
466 /*
467 * Update IP header
468 */
469 state->hdr.ip->ip_id = htons((state->ip_id)++);
470 state->hdr.ip->ip_len = htons(state->ip_hlen + state->tcp_hlen +
471 payload_len);
472 /*
473 * Update tcp sequence number in gso state
474 */
475 state->tcp_seq += payload_len;
476
477 /* offload csum to hardware */
478 *csum_flags |= PACKET_CSUM_IP | PACKET_CSUM_TCP;
479}
480
481/*
482 * Updates the pointers to TCP and IPv6 headers
483 */
484static void
485netif_gso_ipv6_tcp_update(struct netif_gso_ip_tcp_state *state,
486 struct __kern_packet *pkt, uint8_t *baddr)
487{
488 state->hdr.ip6 = (struct ip6_hdr *)(baddr + pkt->pkt_headroom +
489 pkt->pkt_l2_len);
490 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip6) +
491 state->ip_hlen);
492}
493
494/*
495 * Finalize the TCP and IPv6 headers
496 */
497static void
498netif_gso_ipv6_tcp_internal_nosum(struct netif_gso_ip_tcp_state *state,
499 uint32_t partial __unused, uint16_t payload_len __unused,
500 uint32_t *csum_flags)
501{
502 /*
503 * Update IP header
504 */
505 state->hdr.ip6->ip6_plen = htons(state->tcp_hlen + payload_len);
506
507 /*
508 * Update tcp sequence number
509 */
510 state->tcp_seq += payload_len;
511
512 /* offload csum to hardware */
513 *csum_flags |= PACKET_CSUM_TCPIPV6;
514}
515
516/*
517 * Finalize the TCP and IPv6 headers
518 */
519static void
520netif_gso_ipv6_tcp_internal(struct netif_gso_ip_tcp_state *state,
521 uint32_t partial, uint16_t payload_len, uint32_t *csum_flags __unused)
522{
523 /*
524 * Update IP header
525 */
526 state->hdr.ip6->ip6_plen = htons(state->tcp_hlen + payload_len);
527 /*
528 * TCP Checksum
529 */
530 state->tcp->th_sum = 0;
531 partial = __packet_cksum(data: state->tcp, len: state->tcp_hlen, sum0: partial);
532 partial += htonl(state->tcp_hlen + IPPROTO_TCP + payload_len);
533 partial += state->psuedo_hdr_csum;
534 ADDCARRY(partial);
535 state->tcp->th_sum = ~(uint16_t)partial;
536 /*
537 * Update tcp sequence number
538 */
539 state->tcp_seq += payload_len;
540}
541
542/*
543 * Init the state during the TCP segmentation
544 */
545static inline void
546netif_gso_ip_tcp_init_state(struct netif_gso_ip_tcp_state *state,
547 struct mbuf *m, uint8_t mac_hlen, uint8_t ip_hlen, bool isipv6, ifnet_t ifp)
548{
549 if (isipv6) {
550 state->af = AF_INET6;
551 state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) +
552 mac_hlen);
553 /* should be atleast 16 bit aligned */
554 VERIFY(((uintptr_t)state->hdr.ip6 & (uintptr_t)0x1) == 0);
555 state->tcp = (struct tcphdr *)(void *)((caddr_t)
556 (state->hdr.ip6) + ip_hlen);
557 state->update = netif_gso_ipv6_tcp_update;
558 if (ifp->if_hwassist & IFNET_CSUM_TCPIPV6) {
559 state->internal = netif_gso_ipv6_tcp_internal_nosum;
560 state->copy_data_sum = false;
561 } else {
562 state->internal = netif_gso_ipv6_tcp_internal;
563 state->copy_data_sum = true;
564 }
565 state->psuedo_hdr_csum = in6_pseudo(&state->hdr.ip6->ip6_src,
566 &state->hdr.ip6->ip6_dst, 0);
567 } else {
568 struct in_addr ip_src, ip_dst;
569
570 state->af = AF_INET;
571 state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) +
572 mac_hlen);
573 /* should be atleast 16 bit aligned */
574 VERIFY(((uintptr_t)state->hdr.ip & (uintptr_t)0x1) == 0);
575 state->ip_id = ntohs(state->hdr.ip->ip_id);
576 state->tcp = (struct tcphdr *)(void *)((caddr_t)
577 (state->hdr.ip) + ip_hlen);
578 state->update = netif_gso_ipv4_tcp_update;
579 if ((ifp->if_hwassist & (IFNET_CSUM_IP | IFNET_CSUM_TCP)) ==
580 (IFNET_CSUM_IP | IFNET_CSUM_TCP)) {
581 state->internal = netif_gso_ipv4_tcp_internal_nosum;
582 state->copy_data_sum = false;
583 } else {
584 state->internal = netif_gso_ipv4_tcp_internal;
585 state->copy_data_sum = true;
586 }
587 bcopy(src: &state->hdr.ip->ip_src, dst: &ip_src, n: sizeof(ip_src));
588 bcopy(src: &state->hdr.ip->ip_dst, dst: &ip_dst, n: sizeof(ip_dst));
589 state->psuedo_hdr_csum = in_pseudo(ip_src.s_addr,
590 ip_dst.s_addr, 0);
591 }
592
593 state->mac_hlen = mac_hlen;
594 state->ip_hlen = ip_hlen;
595 state->tcp_hlen = (uint8_t)(state->tcp->th_off << 2);
596 state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
597 VERIFY(m->m_pkthdr.tso_segsz != 0);
598 state->mss = (uint16_t)m->m_pkthdr.tso_segsz;
599 state->tcp_seq = ntohl(state->tcp->th_seq);
600}
601
602/*
603 * GSO on TCP/IPv4
604 */
605static int
606netif_gso_ipv4_tcp(struct ifnet *ifp, struct mbuf *m)
607{
608 struct ip *ip;
609 struct kern_pbufpool *pp = NULL;
610 struct netif_gso_ip_tcp_state state;
611 uint16_t hlen;
612 uint8_t ip_hlen;
613 uint8_t mac_hlen;
614 struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
615 boolean_t pkt_dropped = false;
616 int error;
617
618 STATS_INC(nifs, NETIF_STATS_GSO_PKT);
619 if (__improbable(m->m_pkthdr.pkt_proto != IPPROTO_TCP)) {
620 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NONTCP);
621 error = ENOTSUP;
622 pkt_dropped = true;
623 goto done;
624 }
625
626 error = netif_gso_check_netif_active(ifp, m, pp: &pp);
627 if (__improbable(error != 0)) {
628 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NA_INACTIVE);
629 error = ENXIO;
630 pkt_dropped = true;
631 goto done;
632 }
633
634 error = netif_gso_get_frame_header_len(m, hlen: &mac_hlen);
635 if (__improbable(error != 0)) {
636 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_BADLEN);
637 pkt_dropped = true;
638 goto done;
639 }
640
641 hlen = mac_hlen + sizeof(struct ip);
642 if (__improbable(m->m_len < hlen)) {
643 m = m_pullup(m, hlen);
644 if (m == NULL) {
645 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
646 error = ENOBUFS;
647 pkt_dropped = true;
648 goto done;
649 }
650 }
651 ip = (struct ip *)(void *)(mtod(m, uint8_t *) + mac_hlen);
652 ip_hlen = (uint8_t)(ip->ip_hl << 2);
653 hlen = mac_hlen + ip_hlen + sizeof(struct tcphdr);
654 if (__improbable(m->m_len < hlen)) {
655 m = m_pullup(m, hlen);
656 if (m == NULL) {
657 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
658 error = ENOBUFS;
659 pkt_dropped = true;
660 goto done;
661 }
662 }
663 netif_gso_ip_tcp_init_state(state: &state, m, mac_hlen, ip_hlen, false, ifp);
664 error = netif_gso_tcp_segment_mbuf(m, ifp, state: &state, pp);
665done:
666 m_freem(m);
667 if (__improbable(pkt_dropped)) {
668 STATS_INC(nifs, NETIF_STATS_DROP);
669 }
670 return error;
671}
672
673/*
674 * GSO on TCP/IPv6
675 */
676static int
677netif_gso_ipv6_tcp(struct ifnet *ifp, struct mbuf *m)
678{
679 struct ip6_hdr *ip6;
680 struct kern_pbufpool *pp = NULL;
681 struct netif_gso_ip_tcp_state state;
682 int lasthdr_off;
683 uint16_t hlen;
684 uint8_t ip_hlen;
685 uint8_t mac_hlen;
686 struct netif_stats *nifs = &NA(ifp)->nifna_netif->nif_stats;
687 boolean_t pkt_dropped = false;
688 int error;
689
690 STATS_INC(nifs, NETIF_STATS_GSO_PKT);
691 if (__improbable(m->m_pkthdr.pkt_proto != IPPROTO_TCP)) {
692 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NONTCP);
693 error = ENOTSUP;
694 pkt_dropped = true;
695 goto done;
696 }
697
698 error = netif_gso_check_netif_active(ifp, m, pp: &pp);
699 if (__improbable(error != 0)) {
700 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NA_INACTIVE);
701 error = ENXIO;
702 pkt_dropped = true;
703 goto done;
704 }
705
706 error = netif_gso_get_frame_header_len(m, hlen: &mac_hlen);
707 if (__improbable(error != 0)) {
708 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_BADLEN);
709 pkt_dropped = true;
710 goto done;
711 }
712
713 hlen = mac_hlen + sizeof(struct ip6_hdr);
714 if (__improbable(m->m_len < hlen)) {
715 m = m_pullup(m, hlen);
716 if (m == NULL) {
717 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
718 error = ENOBUFS;
719 pkt_dropped = true;
720 goto done;
721 }
722 }
723 ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + mac_hlen);
724 lasthdr_off = ip6_lasthdr(m, mac_hlen, IPPROTO_IPV6, NULL) - mac_hlen;
725 VERIFY(lasthdr_off <= UINT8_MAX);
726 ip_hlen = (uint8_t)lasthdr_off;
727 hlen = mac_hlen + ip_hlen + sizeof(struct tcphdr);
728 if (__improbable(m->m_len < hlen)) {
729 m = m_pullup(m, hlen);
730 if (m == NULL) {
731 STATS_INC(nifs, NETIF_STATS_GSO_PKT_DROP_NOMEM);
732 error = ENOBUFS;
733 pkt_dropped = true;
734 goto done;
735 }
736 }
737 netif_gso_ip_tcp_init_state(state: &state, m, mac_hlen, ip_hlen, true, ifp);
738 error = netif_gso_tcp_segment_mbuf(m, ifp, state: &state, pp);
739done:
740 m_freem(m);
741 if (__improbable(pkt_dropped)) {
742 STATS_INC(nifs, NETIF_STATS_DROP);
743 }
744 return error;
745}
746
747int
748netif_gso_dispatch(struct ifnet *ifp, struct mbuf *m)
749{
750 int gso_flags;
751
752 ASSERT(m->m_nextpkt == NULL);
753 gso_flags = CSUM_TO_GSO(m->m_pkthdr.csum_flags);
754 VERIFY(gso_flags < GSO_END_OF_TYPE);
755 return netif_gso_functions[gso_flags](ifp, m);
756}
757
758void
759netif_gso_init(void)
760{
761 _CASSERT(CSUM_TO_GSO(~(CSUM_TSO_IPV4 | CSUM_TSO_IPV6)) == GSO_NONE);
762 _CASSERT(CSUM_TO_GSO(CSUM_TSO_IPV4) == GSO_TCP4);
763 _CASSERT(CSUM_TO_GSO(CSUM_TSO_IPV6) == GSO_TCP6);
764 netif_gso_functions[GSO_NONE] = nx_netif_host_output;
765 netif_gso_functions[GSO_TCP4] = netif_gso_ipv4_tcp;
766 netif_gso_functions[GSO_TCP6] = netif_gso_ipv6_tcp;
767}
768
769void
770netif_gso_fini(void)
771{
772 netif_gso_functions[GSO_NONE] = NULL;
773 netif_gso_functions[GSO_TCP4] = NULL;
774 netif_gso_functions[GSO_TCP6] = NULL;
775}
776