1/*
2 * Copyright (c) 2017-2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <skywalk/os_skywalk_private.h>
30#include <machine/endian.h>
31#include <net/necp.h>
32
33uint32_t copy_pkt_tx_time = 1;
34#if (DEVELOPMENT || DEBUG)
35SYSCTL_NODE(_kern_skywalk, OID_AUTO, packet,
36 CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Skywalk packet");
37int pkt_trailers = 0; /* for testing trailing bytes */
38SYSCTL_INT(_kern_skywalk_packet, OID_AUTO, trailers,
39 CTLFLAG_RW | CTLFLAG_LOCKED, &pkt_trailers, 0, "");
40
41SYSCTL_UINT(_kern_skywalk_packet, OID_AUTO, copy_pkt_tx_time,
42 CTLFLAG_RW | CTLFLAG_LOCKED, &copy_pkt_tx_time, 0,
43 "copy tx time from pkt to mbuf");
44#endif /* !DEVELOPMENT && !DEBUG */
45
46
47__attribute__((always_inline))
48static inline void
49_pkt_copy(void *src, void *dst, size_t len)
50{
51 if (__probable(IS_P2ALIGNED(src, 8) && IS_P2ALIGNED(dst, 8))) {
52 switch (len) {
53 case 20: /* standard IPv4 header */
54 sk_copy64_20(src, dst);
55 return;
56
57 case 40: /* IPv6 header */
58 sk_copy64_40(src, dst);
59 return;
60
61 default:
62 if (IS_P2ALIGNED(len, 64)) {
63 sk_copy64_64x(src, dst, l: len);
64 return;
65 } else if (IS_P2ALIGNED(len, 32)) {
66 sk_copy64_32x(src, dst, l: len);
67 return;
68 } else if (IS_P2ALIGNED(len, 8)) {
69 sk_copy64_8x(src, dst, l: len);
70 return;
71 } else if (IS_P2ALIGNED(len, 4)) {
72 sk_copy64_4x(src, dst, l: len);
73 return;
74 }
75 break;
76 }
77 }
78 bcopy(src, dst, n: len);
79}
80
81/*
82 * This routine is used for copying data across two kernel packets.
83 * Can also optionally compute 16-bit partial inet checksum as the
84 * data is copied.
85 * This routine is used by flowswitch while copying packet from vp
86 * adapter pool to packet in native netif pool and vice-a-versa.
87 *
88 * start/stuff is relative to soff, within [0, len], such that
89 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
90 */
91void
92pkt_copy_from_pkt(const enum txrx t, kern_packet_t dph, const uint16_t doff,
93 kern_packet_t sph, const uint16_t soff, const uint32_t len,
94 const boolean_t copysum, const uint16_t start, const uint16_t stuff,
95 const boolean_t invert)
96{
97 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
98 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
99 uint32_t partial;
100 uint16_t csum = 0;
101 uint8_t *sbaddr, *dbaddr;
102 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
103
104 _CASSERT(sizeof(csum) == sizeof(uint16_t));
105
106 /* get buffer address from packet */
107 MD_BUFLET_ADDR_ABS(spkt, sbaddr);
108 ASSERT(sbaddr != NULL);
109 sbaddr += soff;
110 MD_BUFLET_ADDR_ABS(dpkt, dbaddr);
111 ASSERT(dbaddr != NULL);
112 dbaddr += doff;
113 VERIFY((doff + len) <= PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
114
115 switch (t) {
116 case NR_RX:
117 dpkt->pkt_csum_flags = 0;
118 if (__probable(do_sum)) {
119 /*
120 * Use pkt_copy() to copy the portion up to the
121 * point where we need to start the checksum, and
122 * copy the remainder, checksumming as we go.
123 */
124 if (__probable(start != 0)) {
125 _pkt_copy(src: sbaddr, dst: dbaddr, len: start);
126 }
127 partial = __packet_copy_and_sum(src: (sbaddr + start),
128 dst: (dbaddr + start), len: (len - start), sum0: 0);
129 csum = __packet_fold_sum(sum: partial);
130
131 __packet_set_inet_checksum(ph: dph, PACKET_CSUM_PARTIAL,
132 start, stuff_val: csum, FALSE);
133 } else {
134 _pkt_copy(src: sbaddr, dst: dbaddr, len);
135 dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
136 dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
137 dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
138 }
139
140 SK_DF(SK_VERB_COPY | SK_VERB_RX,
141 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
142 sk_proc_name_address(current_proc()),
143 sk_proc_pid(current_proc()), len,
144 (copysum ? (len - start) : 0), csum, start);
145 SK_DF(SK_VERB_COPY | SK_VERB_RX,
146 " pkt 0x%llx doff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
147 SK_KVA(dpkt), doff, dpkt->pkt_csum_flags,
148 (uint32_t)dpkt->pkt_csum_rx_start_off,
149 (uint32_t)dpkt->pkt_csum_rx_value);
150 break;
151
152 case NR_TX:
153 if (copysum) {
154 /*
155 * Use pkt_copy() to copy the portion up to the
156 * point where we need to start the checksum, and
157 * copy the remainder, checksumming as we go.
158 */
159 if (__probable(start != 0)) {
160 _pkt_copy(src: sbaddr, dst: dbaddr, len: start);
161 }
162 partial = __packet_copy_and_sum(src: (sbaddr + start),
163 dst: (dbaddr + start), len: (len - start), sum0: 0);
164 csum = __packet_fold_sum_final(sum: partial);
165
166 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
167 if (csum == 0 && invert) {
168 csum = 0xffff;
169 }
170
171 /* Insert checksum into packet */
172 ASSERT(stuff <= (len - sizeof(csum)));
173 if (IS_P2ALIGNED(dbaddr + stuff, sizeof(csum))) {
174 *(uint16_t *)(uintptr_t)(dbaddr + stuff) = csum;
175 } else {
176 bcopy(src: (void *)&csum, dst: dbaddr + stuff,
177 n: sizeof(csum));
178 }
179 } else {
180 _pkt_copy(src: sbaddr, dst: dbaddr, len);
181 }
182 dpkt->pkt_csum_flags = spkt->pkt_csum_flags &
183 (PACKET_CSUM_TSO_FLAGS | PACKET_TX_CSUM_OFFLOAD_FLAGS);
184 dpkt->pkt_csum_tx_start_off = 0;
185 dpkt->pkt_csum_tx_stuff_off = 0;
186
187 SK_DF(SK_VERB_COPY | SK_VERB_TX,
188 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u, flags %u",
189 sk_proc_name_address(current_proc()),
190 sk_proc_pid(current_proc()), len,
191 (copysum ? (len - start) : 0), csum, start, dpkt->pkt_csum_flags);
192 break;
193
194 default:
195 VERIFY(0);
196 /* NOTREACHED */
197 __builtin_unreachable();
198 }
199 METADATA_ADJUST_LEN(dpkt, len, doff);
200
201 SK_DF(SK_VERB_COPY | SK_VERB_DUMP, "%s(%d) %s %s",
202 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
203 (t == NR_RX) ? "RX" : "TX",
204 sk_dump("buf", dbaddr, len, 128, NULL, 0));
205}
206
207/*
208 * NOTE: soff is the offset within the packet
209 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
210 * caller is responsible for further reducing it to 16-bit if needed,
211 * as well as to perform the final 1's complement on it.
212 */
213uint32_t static inline
214_pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
215 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
216{
217 uint8_t odd = 0;
218 uint8_t *sbaddr = NULL;
219 uint32_t sum = initial_sum, partial;
220 uint32_t len0 = len;
221 boolean_t needs_swap, started_on_odd = FALSE;
222 uint16_t sbcnt, off0 = soff;
223 uint32_t clen, sboff, sblen;
224 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
225 kern_buflet_t sbuf = NULL, sbufp = NULL;
226
227 sbcnt = __packet_get_buflet_count(ph: sph);
228
229 if (odd_start) {
230 started_on_odd = *odd_start;
231 }
232
233 /* fastpath (copy+sum, single buflet, even aligned, even length) */
234 if (do_csum && sbcnt == 1 && len != 0) {
235 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
236 ASSERT(sbuf != NULL);
237 sboff = __buflet_get_data_offset(buf: sbuf);
238 sblen = __buflet_get_data_length(buf: sbuf);
239 ASSERT(sboff <= soff);
240 ASSERT(soff < sboff + sblen);
241 sblen -= (soff - sboff);
242 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(buf: sbuf) + soff);
243
244 clen = (uint16_t)MIN(len, sblen);
245
246 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
247 sum = __packet_copy_and_sum(src: sbaddr, dst: dbaddr, len: clen, sum0: sum);
248 return __packet_fold_sum(sum);
249 }
250
251 sbaddr = NULL;
252 sbuf = sbufp = NULL;
253 }
254
255 while (len != 0) {
256 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
257 if (__improbable(sbuf == NULL)) {
258 panic("%s: bad packet, 0x%llx [off %d, len %d]",
259 __func__, SK_KVA(spkt), off0, len0);
260 /* NOTREACHED */
261 __builtin_unreachable();
262 }
263 sbufp = sbuf;
264 sboff = __buflet_get_data_offset(buf: sbuf);
265 sblen = __buflet_get_data_length(buf: sbuf);
266 ASSERT((sboff <= soff) && (soff < sboff + sblen));
267 sblen -= (soff - sboff);
268 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(buf: sbuf) + soff);
269 soff = 0;
270 clen = (uint16_t)MIN(len, sblen);
271 if (__probable(do_csum)) {
272 partial = 0;
273 if (__improbable((uintptr_t)sbaddr & 1)) {
274 /* Align on word boundary */
275 started_on_odd = !started_on_odd;
276#if BYTE_ORDER == LITTLE_ENDIAN
277 partial = (uint8_t)*sbaddr << 8;
278#else /* BYTE_ORDER != LITTLE_ENDIAN */
279 partial = (uint8_t)*sbaddr;
280#endif /* BYTE_ORDER != LITTLE_ENDIAN */
281 *dbaddr++ = *sbaddr++;
282 sblen -= 1;
283 clen -= 1;
284 len -= 1;
285 }
286 needs_swap = started_on_odd;
287
288 odd = clen & 1u;
289 clen -= odd;
290
291 if (clen != 0) {
292 partial = __packet_copy_and_sum(src: sbaddr, dst: dbaddr,
293 len: clen, sum0: partial);
294 }
295
296 if (__improbable(partial & 0xc0000000)) {
297 if (needs_swap) {
298 partial = (partial << 8) +
299 (partial >> 24);
300 }
301 sum += (partial >> 16);
302 sum += (partial & 0xffff);
303 partial = 0;
304 }
305 } else {
306 _pkt_copy(src: sbaddr, dst: dbaddr, len: clen);
307 }
308
309 dbaddr += clen;
310 sbaddr += clen;
311
312 if (__probable(do_csum)) {
313 if (odd != 0) {
314#if BYTE_ORDER == LITTLE_ENDIAN
315 partial += (uint8_t)*sbaddr;
316#else /* BYTE_ORDER != LITTLE_ENDIAN */
317 partial += (uint8_t)*sbaddr << 8;
318#endif /* BYTE_ORDER != LITTLE_ENDIAN */
319 *dbaddr++ = *sbaddr++;
320 started_on_odd = !started_on_odd;
321 }
322
323 if (needs_swap) {
324 partial = (partial << 8) + (partial >> 24);
325 }
326 sum += (partial >> 16) + (partial & 0xffff);
327 /*
328 * Reduce sum to allow potential byte swap
329 * in the next iteration without carry.
330 */
331 sum = (sum >> 16) + (sum & 0xffff);
332 }
333
334 sblen -= clen + odd;
335 len -= clen + odd;
336 ASSERT(sblen == 0 || len == 0);
337 }
338
339 if (odd_start) {
340 *odd_start = started_on_odd;
341 }
342
343 if (__probable(do_csum)) {
344 /* Final fold (reduce 32-bit to 16-bit) */
345 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
346 sum = (sum >> 16) + (sum & 0xffff);
347 }
348 return sum;
349}
350
351/*
352 * NOTE: Caller of this function is responsible to adjust the length and offset
353 * of the first buflet of the destination packet if (doff != 0),
354 * i.e. additional data is being prependend to the packet.
355 * It should also finalize the packet.
356 * To simplify & optimize the routine, we have also assumed that soff & doff
357 * will lie within the first buffer, which is true for the current use cases
358 * where, doff is the offset of the checksum field in the TCP/IP header and
359 * soff is the L3 offset.
360 * The accumulated partial sum (32-bit) is returned to caller in csum_partial;
361 * caller is responsible for further reducing it to 16-bit if needed,
362 * as well as to perform the final 1's complement on it.
363 */
364static inline boolean_t
365_pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
366 uint16_t doff, uint32_t len, uint32_t *csum_partial, boolean_t do_csum)
367{
368 uint8_t odd = 0;
369 uint32_t sum = 0, partial;
370 boolean_t needs_swap, started_on_odd = FALSE;
371 uint8_t *sbaddr = NULL, *dbaddr = NULL;
372 uint16_t sbcnt, dbcnt;
373 uint32_t clen, dlen0, sboff, sblen, dlim;
374 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
375 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
376 kern_buflet_t sbuf = NULL, sbufp = NULL, dbuf = NULL, dbufp = NULL;
377
378 ASSERT(csum_partial != NULL || !do_csum);
379 sbcnt = __packet_get_buflet_count(ph: sph);
380 dbcnt = __packet_get_buflet_count(ph: dph);
381
382 while (len != 0) {
383 ASSERT(sbaddr == NULL || dbaddr == NULL);
384 if (sbaddr == NULL) {
385 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
386 if (__improbable(sbuf == NULL)) {
387 break;
388 }
389 sbufp = sbuf;
390 sblen = __buflet_get_data_length(buf: sbuf);
391 sboff = __buflet_get_data_offset(buf: sbuf);
392 ASSERT(soff >= sboff);
393 ASSERT(sboff + sblen > soff);
394 sblen -= (soff - sboff);
395 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(buf: sbuf) + soff);
396 soff = 0;
397 }
398
399 if (dbaddr == NULL) {
400 if (dbufp != NULL) {
401 __buflet_set_data_length(buf: dbufp, dlen: dlen0);
402 }
403
404 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
405 if (__improbable(dbuf == NULL)) {
406 break;
407 }
408 dbufp = dbuf;
409 dlim = __buflet_get_data_limit(buf: dbuf);
410 ASSERT(dlim > doff);
411 dlim -= doff;
412 if (doff != 0) {
413 VERIFY(__buflet_set_data_offset(dbuf, doff) == 0);
414 }
415 dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(buf: dbuf) + doff);
416 dlen0 = dlim;
417 doff = 0;
418 }
419
420 clen = MIN(len, sblen);
421 clen = MIN(clen, dlim);
422
423 if (__probable(do_csum)) {
424 partial = 0;
425 if (__improbable((uintptr_t)sbaddr & 1)) {
426 /* Align on word boundary */
427 started_on_odd = !started_on_odd;
428#if BYTE_ORDER == LITTLE_ENDIAN
429 partial = (uint8_t)*sbaddr << 8;
430#else /* BYTE_ORDER != LITTLE_ENDIAN */
431 partial = (uint8_t)*sbaddr;
432#endif /* BYTE_ORDER != LITTLE_ENDIAN */
433 *dbaddr++ = *sbaddr++;
434 clen -= 1;
435 dlim -= 1;
436 len -= 1;
437 }
438 needs_swap = started_on_odd;
439
440 odd = clen & 1u;
441 clen -= odd;
442
443 if (clen != 0) {
444 partial = __packet_copy_and_sum(src: sbaddr, dst: dbaddr,
445 len: clen, sum0: partial);
446 }
447
448 if (__improbable(partial & 0xc0000000)) {
449 if (needs_swap) {
450 partial = (partial << 8) +
451 (partial >> 24);
452 }
453 sum += (partial >> 16);
454 sum += (partial & 0xffff);
455 partial = 0;
456 }
457 } else {
458 _pkt_copy(src: sbaddr, dst: dbaddr, len: clen);
459 }
460 sbaddr += clen;
461 dbaddr += clen;
462
463 if (__probable(do_csum)) {
464 if (odd != 0) {
465#if BYTE_ORDER == LITTLE_ENDIAN
466 partial += (uint8_t)*sbaddr;
467#else /* BYTE_ORDER != LITTLE_ENDIAN */
468 partial += (uint8_t)*sbaddr << 8;
469#endif /* BYTE_ORDER != LITTLE_ENDIAN */
470 *dbaddr++ = *sbaddr++;
471 started_on_odd = !started_on_odd;
472 }
473
474 if (needs_swap) {
475 partial = (partial << 8) + (partial >> 24);
476 }
477 sum += (partial >> 16) + (partial & 0xffff);
478 /*
479 * Reduce sum to allow potential byte swap
480 * in the next iteration without carry.
481 */
482 sum = (sum >> 16) + (sum & 0xffff);
483 }
484
485 sblen -= clen + odd;
486 dlim -= clen + odd;
487 len -= clen + odd;
488
489 if (sblen == 0) {
490 sbaddr = NULL;
491 }
492
493 if (dlim == 0) {
494 dbaddr = NULL;
495 }
496 }
497
498 if (__probable(dbuf != NULL)) {
499 __buflet_set_data_length(buf: dbuf, dlen: (dlen0 - dlim));
500 }
501 if (__probable(do_csum)) {
502 /* Final fold (reduce 32-bit to 16-bit) */
503 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
504 sum = (sum >> 16) + (sum & 0xffff);
505 *csum_partial = (uint32_t)sum;
506 }
507 return len == 0;
508}
509
510uint32_t
511pkt_sum(kern_packet_t sph, uint16_t soff, uint16_t len)
512{
513 uint8_t odd = 0;
514 uint32_t sum = 0, partial;
515 boolean_t needs_swap, started_on_odd = FALSE;
516 uint8_t *sbaddr = NULL;
517 uint16_t sbcnt;
518 uint32_t clen, sblen, sboff;
519 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
520 kern_buflet_t sbuf = NULL, sbufp = NULL;
521
522 sbcnt = __packet_get_buflet_count(ph: sph);
523
524 /* fastpath (single buflet, even aligned, even length) */
525 if (sbcnt == 1 && len != 0) {
526 PKT_GET_NEXT_BUFLET(spkt, 1, sbufp, sbuf);
527 ASSERT(sbuf != NULL);
528 sblen = __buflet_get_data_length(buf: sbuf);
529 sboff = __buflet_get_data_offset(buf: sbuf);
530 ASSERT(soff >= sboff);
531 ASSERT(sboff + sblen > soff);
532 sblen -= (soff - sboff);
533 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(buf: sbuf) + soff);
534
535 clen = MIN(len, sblen);
536
537 if (((uintptr_t)sbaddr & 1) == 0 && clen && (clen & 1) == 0) {
538 sum = __packet_cksum(data: sbaddr, len: clen, sum0: 0);
539 return __packet_fold_sum(sum);
540 }
541
542 sbaddr = NULL;
543 sbuf = sbufp = NULL;
544 }
545
546 /* slowpath */
547 while (len != 0) {
548 ASSERT(sbaddr == NULL);
549 if (sbaddr == NULL) {
550 PKT_GET_NEXT_BUFLET(spkt, sbcnt, sbufp, sbuf);
551 if (__improbable(sbuf == NULL)) {
552 break;
553 }
554 sbufp = sbuf;
555 sblen = __buflet_get_data_length(buf: sbuf);
556 sboff = __buflet_get_data_offset(buf: sbuf);
557 ASSERT(soff >= sboff);
558 ASSERT(sboff + sblen > soff);
559 sblen -= (soff - sboff);
560 sbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(buf: sbuf) + soff);
561 soff = 0;
562 }
563
564 clen = MIN(len, sblen);
565
566 partial = 0;
567 if (__improbable((uintptr_t)sbaddr & 1)) {
568 /* Align on word boundary */
569 started_on_odd = !started_on_odd;
570#if BYTE_ORDER == LITTLE_ENDIAN
571 partial = (uint8_t)*sbaddr << 8;
572#else /* BYTE_ORDER != LITTLE_ENDIAN */
573 partial = (uint8_t)*sbaddr;
574#endif /* BYTE_ORDER != LITTLE_ENDIAN */
575 clen -= 1;
576 len -= 1;
577 }
578 needs_swap = started_on_odd;
579
580 odd = clen & 1u;
581 clen -= odd;
582
583 if (clen != 0) {
584 partial = __packet_cksum(data: sbaddr,
585 len: clen, sum0: partial);
586 }
587
588 if (__improbable(partial & 0xc0000000)) {
589 if (needs_swap) {
590 partial = (partial << 8) +
591 (partial >> 24);
592 }
593 sum += (partial >> 16);
594 sum += (partial & 0xffff);
595 partial = 0;
596 }
597 sbaddr += clen;
598
599 if (odd != 0) {
600#if BYTE_ORDER == LITTLE_ENDIAN
601 partial += (uint8_t)*sbaddr;
602#else /* BYTE_ORDER != LITTLE_ENDIAN */
603 partial += (uint8_t)*sbaddr << 8;
604#endif /* BYTE_ORDER != LITTLE_ENDIAN */
605 started_on_odd = !started_on_odd;
606 }
607
608 if (needs_swap) {
609 partial = (partial << 8) + (partial >> 24);
610 }
611 sum += (partial >> 16) + (partial & 0xffff);
612 /*
613 * Reduce sum to allow potential byte swap
614 * in the next iteration without carry.
615 */
616 sum = (sum >> 16) + (sum & 0xffff);
617
618 sblen -= clen + odd;
619 len -= clen + odd;
620
621 if (sblen == 0) {
622 sbaddr = NULL;
623 }
624 }
625
626 /* Final fold (reduce 32-bit to 16-bit) */
627 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
628 sum = (sum >> 16) + (sum & 0xffff);
629 return (uint32_t)sum;
630}
631
632
633/*
634 * This is a multi-buflet variant of pkt_copy_from_pkt().
635 *
636 * start/stuff is relative to soff, within [0, len], such that
637 * [ 0 ... soff ... soff + start/stuff ... soff + len ... ]
638 */
639void
640pkt_copy_multi_buflet_from_pkt(const enum txrx t, kern_packet_t dph,
641 const uint16_t doff, kern_packet_t sph, const uint16_t soff,
642 const uint32_t len, const boolean_t copysum, const uint16_t start,
643 const uint16_t stuff, const boolean_t invert)
644{
645 boolean_t rc;
646 uint32_t partial;
647 uint16_t csum = 0;
648 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
649 struct __kern_packet *spkt = SK_PTR_ADDR_KPKT(sph);
650 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(spkt);
651
652 VERIFY((doff + len) <= (PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp) *
653 __packet_get_buflet_count(dph)));
654
655 switch (t) {
656 case NR_RX:
657 dpkt->pkt_csum_flags = 0;
658 if (__probable(do_sum)) {
659 /*
660 * copy the portion up to the point where we need to
661 * start the checksum, and copy the remainder,
662 * checksumming as we go.
663 */
664 if (__probable(start != 0)) {
665 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
666 len: start, NULL, FALSE);
667 ASSERT(rc);
668 }
669 _pkt_copypkt_sum(sph, soff: (soff + start), dph,
670 doff: (doff + start), len: (len - start), csum_partial: &partial, TRUE);
671 csum = __packet_fold_sum(sum: partial);
672 __packet_set_inet_checksum(ph: dph, PACKET_CSUM_PARTIAL,
673 start, stuff_val: csum, FALSE);
674 METADATA_ADJUST_LEN(dpkt, start, doff);
675 } else {
676 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
677 FALSE);
678 ASSERT(rc);
679 dpkt->pkt_csum_rx_start_off = spkt->pkt_csum_rx_start_off;
680 dpkt->pkt_csum_rx_value = spkt->pkt_csum_rx_value;
681 dpkt->pkt_csum_flags |= spkt->pkt_csum_flags & PACKET_CSUM_RX_FLAGS;
682 }
683 break;
684
685 case NR_TX:
686 if (copysum) {
687 uint8_t *baddr;
688 /*
689 * copy the portion up to the point where we need to
690 * start the checksum, and copy the remainder,
691 * checksumming as we go.
692 */
693 if (__probable(start != 0)) {
694 rc = _pkt_copypkt_sum(sph, soff, dph, doff,
695 len: start, NULL, FALSE);
696 ASSERT(rc);
697 }
698 rc = _pkt_copypkt_sum(sph, soff: (soff + start), dph,
699 doff: (doff + start), len: (len - start), csum_partial: &partial, TRUE);
700 ASSERT(rc);
701 csum = __packet_fold_sum_final(sum: partial);
702
703 /* RFC1122 4.1.3.4: Invert 0 to -0 for UDP */
704 if (csum == 0 && invert) {
705 csum = 0xffff;
706 }
707
708 /*
709 * Insert checksum into packet.
710 * Here we assume that checksum will be in the
711 * first buffer.
712 */
713 ASSERT((stuff + doff + sizeof(csum)) <=
714 PP_BUF_SIZE_DEF(dpkt->pkt_qum.qum_pp));
715 ASSERT(stuff <= (len - sizeof(csum)));
716
717 /* get first buflet buffer address from packet */
718 MD_BUFLET_ADDR_ABS(dpkt, baddr);
719 ASSERT(baddr != NULL);
720 baddr += doff;
721 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
722 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
723 } else {
724 bcopy(src: (void *)&csum, dst: baddr + stuff,
725 n: sizeof(csum));
726 }
727 METADATA_ADJUST_LEN(dpkt, start, doff);
728 } else {
729 rc = _pkt_copypkt_sum(sph, soff, dph, doff, len, NULL,
730 FALSE);
731 ASSERT(rc);
732 }
733 dpkt->pkt_csum_flags = spkt->pkt_csum_flags &
734 (PACKET_CSUM_TSO_FLAGS | PACKET_TX_CSUM_OFFLOAD_FLAGS);
735 dpkt->pkt_csum_tx_start_off = 0;
736 dpkt->pkt_csum_tx_stuff_off = 0;
737
738 SK_DF(SK_VERB_COPY | SK_VERB_TX,
739 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u, flags %u",
740 sk_proc_name_address(current_proc()),
741 sk_proc_pid(current_proc()), len,
742 (copysum ? (len - start) : 0), csum, start, dpkt->pkt_csum_flags);
743 break;
744
745 default:
746 VERIFY(0);
747 /* NOTREACHED */
748 __builtin_unreachable();
749 }
750}
751
752static inline uint32_t
753_convert_mbuf_csum_flags(uint32_t mbuf_flags)
754{
755 uint32_t pkt_flags = 0;
756
757 if (mbuf_flags & CSUM_TCP) {
758 pkt_flags |= PACKET_CSUM_TCP;
759 }
760 if (mbuf_flags & CSUM_TCPIPV6) {
761 pkt_flags |= PACKET_CSUM_TCPIPV6;
762 }
763 if (mbuf_flags & CSUM_UDP) {
764 pkt_flags |= PACKET_CSUM_UDP;
765 }
766 if (mbuf_flags & CSUM_UDPIPV6) {
767 pkt_flags |= PACKET_CSUM_UDPIPV6;
768 }
769 if (mbuf_flags & CSUM_IP) {
770 pkt_flags |= PACKET_CSUM_IP;
771 }
772 if (mbuf_flags & CSUM_ZERO_INVERT) {
773 pkt_flags |= PACKET_CSUM_ZERO_INVERT;
774 }
775
776 return pkt_flags;
777}
778
779/*
780 * This routine is used for copying an mbuf which originated in the host
781 * stack destined to a native skywalk interface (NR_TX), as well as for
782 * mbufs originating on compat network interfaces (NR_RX).
783 *
784 * start/stuff is relative to moff, within [0, len], such that
785 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
786 */
787void
788pkt_copy_from_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
789 struct mbuf *m, const uint16_t moff, const uint32_t len,
790 const boolean_t copysum, const uint16_t start)
791{
792 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
793 uint32_t partial;
794 uint16_t csum = 0;
795 uint8_t *baddr;
796
797 _CASSERT(sizeof(csum) == sizeof(uint16_t));
798
799 /* get buffer address from packet */
800 MD_BUFLET_ADDR_ABS(pkt, baddr);
801 ASSERT(baddr != NULL);
802 baddr += poff;
803 VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
804
805 switch (t) {
806 case NR_RX:
807 pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
808 pkt->pkt_csum_rx_start_off = 0;
809 pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
810 pkt->pkt_svc_class = m_get_service_class(m);
811 if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
812 != CSUM_RX_FULL_FLAGS) && copysum)) {
813 /*
814 * Use m_copydata() to copy the portion up to the
815 * point where we need to start the checksum, and
816 * copy the remainder, checksumming as we go.
817 */
818 if (start != 0) {
819 m_copydata(m, moff, start, baddr);
820 }
821 partial = m_copydata_sum(m, off: start, len: (len - start),
822 vp: (baddr + start), initial_sum: 0, NULL);
823 csum = __packet_fold_sum(sum: partial);
824
825 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
826 start, stuff_val: csum, FALSE);
827 } else {
828 m_copydata(m, moff, len, baddr);
829 }
830 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
831 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
832 sk_proc_name_address(current_proc()),
833 sk_proc_pid(current_proc()), len,
834 (copysum ? (len - start) : 0), csum, start);
835 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
836 " mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
837 SK_KVA(m), m->m_pkthdr.csum_flags,
838 (uint32_t)m->m_pkthdr.csum_rx_start,
839 (uint32_t)m->m_pkthdr.csum_rx_val);
840 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
841 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
842 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
843 (uint32_t)pkt->pkt_csum_rx_start_off,
844 (uint32_t)pkt->pkt_csum_rx_value);
845 break;
846
847 case NR_TX:
848 if (copysum) {
849 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
850 /*
851 * Use m_copydata() to copy the portion up to the
852 * point where we need to start the checksum, and
853 * copy the remainder, checksumming as we go.
854 */
855 if (start != 0) {
856 m_copydata(m, moff, start, baddr);
857 }
858 partial = m_copydata_sum(m, off: start, len: (len - start),
859 vp: (baddr + start), initial_sum: 0, NULL);
860 csum = __packet_fold_sum_final(sum: partial);
861
862 /*
863 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
864 * ideally we'd only test for CSUM_ZERO_INVERT
865 * here, but catch cases where the originator
866 * did not set it for UDP.
867 */
868 if (csum == 0 && (m->m_pkthdr.csum_flags &
869 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
870 csum = 0xffff;
871 }
872
873 /* Insert checksum into packet */
874 ASSERT(stuff <= (len - sizeof(csum)));
875 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
876 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
877 } else {
878 bcopy(src: (void *)&csum, dst: baddr + stuff,
879 n: sizeof(csum));
880 }
881 } else {
882 m_copydata(m, moff, len, baddr);
883 }
884 pkt->pkt_csum_flags = 0;
885 pkt->pkt_csum_tx_start_off = 0;
886 pkt->pkt_csum_tx_stuff_off = 0;
887
888 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
889 pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV4;
890 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
891 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
892 }
893 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
894 pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV6;
895 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
896 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
897 }
898 if (!copysum) {
899 pkt->pkt_csum_flags |= _convert_mbuf_csum_flags(mbuf_flags: m->m_pkthdr.csum_flags);
900 }
901
902 /* translate mbuf metadata */
903 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
904 pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
905 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
906 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
907 switch (m->m_pkthdr.pkt_proto) {
908 case IPPROTO_QUIC:
909 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
910 pkt->pkt_transport_protocol = IPPROTO_QUIC;
911 break;
912
913 default:
914 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
915 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
916 break;
917 }
918 (void) mbuf_get_timestamp(mbuf: m, ts: &pkt->pkt_timestamp, NULL);
919 pkt->pkt_svc_class = m_get_service_class(m);
920 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
921 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
922 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
923 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
924 }
925 if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
926 pkt->pkt_pflags |= PKT_F_L4S;
927 }
928 necp_get_app_uuid_from_packet(packet: m, app_uuid: pkt->pkt_policy_euuid);
929 pkt->pkt_policy_id =
930 (uint32_t)necp_get_policy_id_from_packet(packet: m);
931 pkt->pkt_skip_policy_id =
932 (uint32_t)necp_get_skip_policy_id_from_packet(packet: m);
933
934 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
935 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
936 __packet_set_tx_completion_data(ph,
937 cb_arg: m->m_pkthdr.drv_tx_compl_arg,
938 cb_data: m->m_pkthdr.drv_tx_compl_data);
939 }
940 pkt->pkt_tx_compl_context =
941 m->m_pkthdr.pkt_compl_context;
942 pkt->pkt_tx_compl_callbacks =
943 m->m_pkthdr.pkt_compl_callbacks;
944 /*
945 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
946 * mbuf can no longer trigger a completion callback.
947 * callback will be invoked when the kernel packet is
948 * completed.
949 */
950 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
951
952 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
953 }
954
955 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
956 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
957 sk_proc_name_address(current_proc()),
958 sk_proc_pid(current_proc()), len,
959 (copysum ? (len - start) : 0), csum, start);
960 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
961 " mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
962 SK_KVA(m), m->m_pkthdr.csum_flags,
963 (uint32_t)m->m_pkthdr.csum_tx_start,
964 (uint32_t)m->m_pkthdr.csum_tx_stuff);
965 break;
966
967 default:
968 VERIFY(0);
969 /* NOTREACHED */
970 __builtin_unreachable();
971 }
972 METADATA_ADJUST_LEN(pkt, len, poff);
973
974 if (m->m_flags & M_BCAST) {
975 __packet_set_link_broadcast(ph);
976 } else if (m->m_flags & M_MCAST) {
977 __packet_set_link_multicast(ph);
978 }
979
980 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
981 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
982 (t == NR_RX) ? "RX" : "TX",
983 sk_dump("buf", baddr, len, 128, NULL, 0));
984}
985
986/*
987 * Like m_copydata_sum(), but works on a destination kernel packet.
988 */
989static inline uint32_t
990m_copypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
991 uint32_t len, boolean_t do_cscum)
992{
993 boolean_t needs_swap, started_on_odd = FALSE;
994 int off0 = soff;
995 uint32_t len0 = len;
996 struct mbuf *m0 = m;
997 uint32_t sum = 0, partial;
998 unsigned count0, count, odd, mlen_copied;
999 uint8_t *sbaddr = NULL, *dbaddr = NULL;
1000 uint16_t dbcnt = __packet_get_buflet_count(ph: dph);
1001 uint32_t dlim, dlen0;
1002 struct __kern_packet *dpkt = SK_PTR_ADDR_KPKT(dph);
1003 kern_buflet_t dbuf = NULL, dbufp = NULL;
1004
1005 while (soff > 0) {
1006 if (__improbable(m == NULL)) {
1007 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1008 __func__, m0, off0, len0);
1009 /* NOTREACHED */
1010 __builtin_unreachable();
1011 }
1012 if (soff < m->m_len) {
1013 break;
1014 }
1015 soff -= m->m_len;
1016 m = m->m_next;
1017 }
1018
1019 if (__improbable(m == NULL)) {
1020 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1021 __func__, m0, off0, len0);
1022 /* NOTREACHED */
1023 __builtin_unreachable();
1024 }
1025
1026 sbaddr = mtod(m, uint8_t *) + soff;
1027 count = m->m_len - soff;
1028 mlen_copied = 0;
1029
1030 while (len != 0) {
1031 ASSERT(sbaddr == NULL || dbaddr == NULL);
1032 if (sbaddr == NULL) {
1033 soff = 0;
1034 m = m->m_next;
1035 if (__improbable(m == NULL)) {
1036 panic("%s: invalid mbuf chain %p [off %d, "
1037 "len %d]", __func__, m0, off0, len0);
1038 /* NOTREACHED */
1039 __builtin_unreachable();
1040 }
1041 sbaddr = mtod(m, uint8_t *);
1042 count = m->m_len;
1043 mlen_copied = 0;
1044 }
1045
1046 if (__improbable(count == 0)) {
1047 sbaddr = NULL;
1048 continue;
1049 }
1050
1051 if (dbaddr == NULL) {
1052 if (dbufp != NULL) {
1053 __buflet_set_data_length(buf: dbufp, dlen: dlen0);
1054 }
1055
1056 PKT_GET_NEXT_BUFLET(dpkt, dbcnt, dbufp, dbuf);
1057 if (__improbable(dbuf == NULL)) {
1058 panic("%s: mbuf too large %p [off %d, "
1059 "len %d]", __func__, m0, off0, len0);
1060 /* NOTREACHED */
1061 __builtin_unreachable();
1062 }
1063 dbufp = dbuf;
1064 dlim = __buflet_get_data_limit(buf: dbuf) - doff;
1065 dbaddr = (uint8_t *)((uintptr_t)__buflet_get_data_address(buf: dbuf) + doff);
1066 dlen0 = dlim;
1067 doff = 0;
1068 }
1069
1070 count = MIN(count, (unsigned)len);
1071 count0 = count = MIN(count, dlim);
1072
1073 if (!do_cscum) {
1074 _pkt_copy(src: sbaddr, dst: dbaddr, len: count);
1075 sbaddr += count;
1076 dbaddr += count;
1077 goto skip_csum;
1078 }
1079
1080 partial = 0;
1081 if ((uintptr_t)sbaddr & 1) {
1082 /* Align on word boundary */
1083 started_on_odd = !started_on_odd;
1084#if BYTE_ORDER == LITTLE_ENDIAN
1085 partial = *sbaddr << 8;
1086#else /* BYTE_ORDER != LITTLE_ENDIAN */
1087 partial = *sbaddr;
1088#endif /* BYTE_ORDER != LITTLE_ENDIAN */
1089 *dbaddr++ = *sbaddr++;
1090 count -= 1;
1091 }
1092
1093 needs_swap = started_on_odd;
1094 odd = count & 1u;
1095 count -= odd;
1096
1097 if (count) {
1098 partial = __packet_copy_and_sum(src: sbaddr,
1099 dst: dbaddr, len: count, sum0: partial);
1100 sbaddr += count;
1101 dbaddr += count;
1102 if (__improbable(partial & 0xc0000000)) {
1103 if (needs_swap) {
1104 partial = (partial << 8) +
1105 (partial >> 24);
1106 }
1107 sum += (partial >> 16);
1108 sum += (partial & 0xffff);
1109 partial = 0;
1110 }
1111 }
1112
1113 if (odd) {
1114#if BYTE_ORDER == LITTLE_ENDIAN
1115 partial += *sbaddr;
1116#else /* BYTE_ORDER != LITTLE_ENDIAN */
1117 partial += *sbaddr << 8;
1118#endif /* BYTE_ORDER != LITTLE_ENDIAN */
1119 *dbaddr++ = *sbaddr++;
1120 started_on_odd = !started_on_odd;
1121 }
1122
1123 if (needs_swap) {
1124 partial = (partial << 8) + (partial >> 24);
1125 }
1126 sum += (partial >> 16) + (partial & 0xffff);
1127 /*
1128 * Reduce sum to allow potential byte swap
1129 * in the next iteration without carry.
1130 */
1131 sum = (sum >> 16) + (sum & 0xffff);
1132
1133skip_csum:
1134 dlim -= count0;
1135 len -= count0;
1136 mlen_copied += count0;
1137
1138 if (dlim == 0) {
1139 dbaddr = NULL;
1140 }
1141
1142 count = m->m_len - soff - mlen_copied;
1143 if (count == 0) {
1144 sbaddr = NULL;
1145 }
1146 }
1147
1148 ASSERT(len == 0);
1149 ASSERT(dbuf != NULL);
1150 __buflet_set_data_length(buf: dbuf, dlen: (dlen0 - dlim));
1151
1152 if (!do_cscum) {
1153 return 0;
1154 }
1155
1156 /* Final fold (reduce 32-bit to 16-bit) */
1157 sum = ((sum >> 16) & 0xffff) + (sum & 0xffff);
1158 sum = (sum >> 16) + (sum & 0xffff);
1159 return sum;
1160}
1161
1162/*
1163 * This is a multi-buflet variant of pkt_copy_from_mbuf().
1164 *
1165 * start/stuff is relative to moff, within [0, len], such that
1166 * [ 0 ... moff ... moff + start/stuff ... moff + len ... ]
1167 */
1168void
1169pkt_copy_multi_buflet_from_mbuf(const enum txrx t, kern_packet_t ph,
1170 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1171 const uint32_t len, const boolean_t copysum, const uint16_t start)
1172{
1173 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1174 uint32_t partial;
1175 uint16_t csum = 0;
1176 uint8_t *baddr;
1177
1178 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1179
1180 /* get buffer address from packet */
1181 MD_BUFLET_ADDR_ABS(pkt, baddr);
1182 ASSERT(baddr != NULL);
1183 baddr += poff;
1184 VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1185 __packet_get_buflet_count(ph)));
1186
1187 switch (t) {
1188 case NR_RX:
1189 pkt->pkt_csum_flags = m->m_pkthdr.csum_flags;
1190 pkt->pkt_csum_rx_start_off = 0;
1191 pkt->pkt_csum_rx_value = m->m_pkthdr.csum_rx_val;
1192 pkt->pkt_svc_class = m_get_service_class(m);
1193 if (__probable(((m->m_pkthdr.csum_flags & CSUM_RX_FULL_FLAGS)
1194 != CSUM_RX_FULL_FLAGS) && copysum)) {
1195 /*
1196 * Use m_copydata() to copy the portion up to the
1197 * point where we need to start the checksum, and
1198 * copy the remainder, checksumming as we go.
1199 */
1200 if (start != 0) {
1201 m_copydata(m, moff, start, baddr);
1202 }
1203 partial = m_copypkt_sum(m, soff: start, dph: ph, doff: (poff + start),
1204 len: (len - start), TRUE);
1205 csum = __packet_fold_sum(sum: partial);
1206 __packet_set_inet_checksum(ph, PACKET_CSUM_PARTIAL,
1207 start, stuff_val: csum, FALSE);
1208 METADATA_ADJUST_LEN(pkt, start, poff);
1209 } else {
1210 (void) m_copypkt_sum(m, soff: moff, dph: ph, doff: poff, len, FALSE);
1211 }
1212 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1213 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1214 sk_proc_name_address(current_proc()),
1215 sk_proc_pid(current_proc()), len,
1216 (copysum ? (len - start) : 0), csum, start);
1217 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1218 " mbuf 0x%llx csumf/rxstart/rxval 0x%x/%u/0x%04x",
1219 SK_KVA(m), m->m_pkthdr.csum_flags,
1220 (uint32_t)m->m_pkthdr.csum_rx_start,
1221 (uint32_t)m->m_pkthdr.csum_rx_val);
1222 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1223 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1224 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1225 (uint32_t)pkt->pkt_csum_rx_start_off,
1226 (uint32_t)pkt->pkt_csum_rx_value);
1227 break;
1228
1229 case NR_TX:
1230 if (copysum) {
1231 uint16_t stuff = m->m_pkthdr.csum_tx_stuff;
1232 /*
1233 * Use m_copydata() to copy the portion up to the
1234 * point where we need to start the checksum, and
1235 * copy the remainder, checksumming as we go.
1236 */
1237 if (start != 0) {
1238 m_copydata(m, moff, start, baddr);
1239 }
1240 partial = m_copypkt_sum(m, soff: start, dph: ph, doff: (poff + start),
1241 len: (len - start), TRUE);
1242 csum = __packet_fold_sum_final(sum: partial);
1243
1244 /*
1245 * RFC1122 4.1.3.4: Invert 0 to -0 for UDP;
1246 * ideally we'd only test for CSUM_ZERO_INVERT
1247 * here, but catch cases where the originator
1248 * did not set it for UDP.
1249 */
1250 if (csum == 0 && (m->m_pkthdr.csum_flags &
1251 (CSUM_UDP | CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
1252 csum = 0xffff;
1253 }
1254
1255 /* Insert checksum into packet */
1256 ASSERT(stuff <= (len - sizeof(csum)));
1257 if (IS_P2ALIGNED(baddr + stuff, sizeof(csum))) {
1258 *(uint16_t *)(uintptr_t)(baddr + stuff) = csum;
1259 } else {
1260 bcopy(src: (void *)&csum, dst: baddr + stuff,
1261 n: sizeof(csum));
1262 }
1263 METADATA_ADJUST_LEN(pkt, start, poff);
1264 } else {
1265 m_copypkt_sum(m, soff: moff, dph: ph, doff: poff, len, FALSE);
1266 }
1267 pkt->pkt_csum_flags = 0;
1268 pkt->pkt_csum_tx_start_off = 0;
1269 pkt->pkt_csum_tx_stuff_off = 0;
1270
1271 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) {
1272 pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV4;
1273 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1274 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV6) == 0);
1275 }
1276 if (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) {
1277 pkt->pkt_csum_flags |= PACKET_CSUM_TSO_IPV6;
1278 pkt->pkt_proto_seg_sz = (uint16_t)m->m_pkthdr.tso_segsz;
1279 ASSERT((pkt->pkt_csum_flags & PACKET_TSO_IPV4) == 0);
1280 }
1281 if (!copysum) {
1282 pkt->pkt_csum_flags |= _convert_mbuf_csum_flags(mbuf_flags: m->m_pkthdr.csum_flags);
1283 }
1284
1285 /* translate mbuf metadata */
1286 pkt->pkt_flowsrc_type = m->m_pkthdr.pkt_flowsrc;
1287 pkt->pkt_flowsrc_token = m->m_pkthdr.pkt_mpriv_srcid;
1288 pkt->pkt_flow_token = m->m_pkthdr.pkt_flowid;
1289 pkt->pkt_comp_gencnt = m->m_pkthdr.comp_gencnt;
1290 switch (m->m_pkthdr.pkt_proto) {
1291 case IPPROTO_QUIC:
1292 pkt->pkt_flow_ip_proto = IPPROTO_UDP;
1293 pkt->pkt_transport_protocol = IPPROTO_QUIC;
1294 break;
1295
1296 default:
1297 pkt->pkt_flow_ip_proto = m->m_pkthdr.pkt_proto;
1298 pkt->pkt_transport_protocol = m->m_pkthdr.pkt_proto;
1299 break;
1300 }
1301 (void) mbuf_get_timestamp(mbuf: m, ts: &pkt->pkt_timestamp, NULL);
1302 pkt->pkt_svc_class = m_get_service_class(m);
1303 pkt->pkt_pflags &= ~PKT_F_COMMON_MASK;
1304 pkt->pkt_pflags |= (m->m_pkthdr.pkt_flags & PKT_F_COMMON_MASK);
1305 if ((m->m_pkthdr.pkt_flags & PKTF_START_SEQ) != 0) {
1306 pkt->pkt_flow_tcp_seq = htonl(m->m_pkthdr.tx_start_seq);
1307 }
1308 if ((m->m_pkthdr.pkt_ext_flags & PKTF_EXT_L4S) != 0) {
1309 pkt->pkt_pflags |= PKT_F_L4S;
1310 }
1311 necp_get_app_uuid_from_packet(packet: m, app_uuid: pkt->pkt_policy_euuid);
1312 pkt->pkt_policy_id =
1313 (uint32_t)necp_get_policy_id_from_packet(packet: m);
1314 pkt->pkt_skip_policy_id =
1315 (uint32_t)necp_get_skip_policy_id_from_packet(packet: m);
1316
1317 if ((m->m_pkthdr.pkt_flags & PKTF_TX_COMPL_TS_REQ) != 0) {
1318 if ((m->m_pkthdr.pkt_flags & PKTF_DRIVER_MTAG) != 0) {
1319 __packet_set_tx_completion_data(ph,
1320 cb_arg: m->m_pkthdr.drv_tx_compl_arg,
1321 cb_data: m->m_pkthdr.drv_tx_compl_data);
1322 }
1323 pkt->pkt_tx_compl_context =
1324 m->m_pkthdr.pkt_compl_context;
1325 pkt->pkt_tx_compl_callbacks =
1326 m->m_pkthdr.pkt_compl_callbacks;
1327 /*
1328 * Remove PKTF_TX_COMPL_TS_REQ flag so that this
1329 * mbuf can no longer trigger a completion callback.
1330 * callback will be invoked when the kernel packet is
1331 * completed.
1332 */
1333 m->m_pkthdr.pkt_flags &= ~PKTF_TX_COMPL_TS_REQ;
1334
1335 m_add_crumb(m, PKT_CRUMB_SK_PKT_COPY);
1336 }
1337
1338 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1339 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1340 sk_proc_name_address(current_proc()),
1341 sk_proc_pid(current_proc()), len,
1342 (copysum ? (len - start) : 0), csum, start);
1343 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1344 " mbuf 0x%llx csumf/txstart/txstuff 0x%x/%u/%u",
1345 SK_KVA(m), m->m_pkthdr.csum_flags,
1346 (uint32_t)m->m_pkthdr.csum_tx_start,
1347 (uint32_t)m->m_pkthdr.csum_tx_stuff);
1348 break;
1349
1350 default:
1351 VERIFY(0);
1352 /* NOTREACHED */
1353 __builtin_unreachable();
1354 }
1355
1356 if (m->m_flags & M_BCAST) {
1357 __packet_set_link_broadcast(ph);
1358 } else if (m->m_flags & M_MCAST) {
1359 __packet_set_link_multicast(ph);
1360 }
1361
1362 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1363 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1364 (t == NR_RX) ? "RX" : "TX",
1365 sk_dump("buf", baddr, len, 128, NULL, 0));
1366}
1367
1368static inline uint32_t
1369_convert_pkt_csum_flags(uint32_t pkt_flags)
1370{
1371 uint32_t mbuf_flags = 0;
1372 if (pkt_flags & PACKET_CSUM_TCP) {
1373 mbuf_flags |= CSUM_TCP;
1374 }
1375 if (pkt_flags & PACKET_CSUM_TCPIPV6) {
1376 mbuf_flags |= CSUM_TCPIPV6;
1377 }
1378 if (pkt_flags & PACKET_CSUM_UDP) {
1379 mbuf_flags |= CSUM_UDP;
1380 }
1381 if (pkt_flags & PACKET_CSUM_UDPIPV6) {
1382 mbuf_flags |= CSUM_UDPIPV6;
1383 }
1384 if (pkt_flags & PACKET_CSUM_IP) {
1385 mbuf_flags |= CSUM_IP;
1386 }
1387 if (pkt_flags & PACKET_CSUM_ZERO_INVERT) {
1388 mbuf_flags |= CSUM_ZERO_INVERT;
1389 }
1390
1391 return mbuf_flags;
1392}
1393
1394/*
1395 * This routine is used for copying from a packet originating from a native
1396 * skywalk interface to an mbuf destined for the host legacy stack (NR_RX),
1397 * as well as for mbufs destined for the compat network interfaces (NR_TX).
1398 *
1399 * We do adjust the length to reflect the total data span.
1400 *
1401 * This routine supports copying into an mbuf chain for RX but not TX.
1402 *
1403 * start/stuff is relative to poff, within [0, len], such that
1404 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1405 */
1406void
1407pkt_copy_to_mbuf(const enum txrx t, kern_packet_t ph, const uint16_t poff,
1408 struct mbuf *m, const uint16_t moff, const uint32_t len,
1409 const boolean_t copysum, const uint16_t start)
1410{
1411 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1412 struct mbuf *curr_m;
1413 uint32_t partial = 0;
1414 uint32_t remaining_len = len, copied_len = 0;
1415 uint16_t csum = 0;
1416 uint8_t *baddr;
1417 uint8_t *dp;
1418 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1419
1420 ASSERT(len >= start);
1421 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1422
1423 /* get buffer address from packet */
1424 MD_BUFLET_ADDR_ABS(pkt, baddr);
1425 ASSERT(baddr != NULL);
1426 baddr += poff;
1427 VERIFY((poff + len) <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1428
1429 ASSERT((m->m_flags & M_PKTHDR));
1430 m->m_data += moff;
1431
1432 switch (t) {
1433 case NR_RX:
1434 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1435
1436 /*
1437 * Use pkt_copy() to copy the portion up to the
1438 * point where we need to start the checksum, and
1439 * copy the remainder, checksumming as we go.
1440 */
1441 if (__probable(do_sum && start != 0)) {
1442 ASSERT(M_TRAILINGSPACE(m) >= start);
1443 ASSERT(m->m_len == 0);
1444 dp = (uint8_t *)m->m_data;
1445 _pkt_copy(src: baddr, dst: dp, len: start);
1446 remaining_len -= start;
1447 copied_len += start;
1448 m->m_len += start;
1449 m->m_pkthdr.len += start;
1450 }
1451 curr_m = m;
1452 while (curr_m != NULL && remaining_len != 0) {
1453 uint32_t tmp_len = MIN(remaining_len,
1454 (uint32_t)M_TRAILINGSPACE(curr_m));
1455 dp = (uint8_t *)curr_m->m_data + curr_m->m_len;
1456 if (__probable(do_sum)) {
1457 partial = __packet_copy_and_sum(src: (baddr + copied_len),
1458 dst: dp, len: tmp_len, sum0: partial);
1459 } else {
1460 _pkt_copy(src: (baddr + copied_len), dst: dp, len: tmp_len);
1461 }
1462
1463 curr_m->m_len += tmp_len;
1464 m->m_pkthdr.len += tmp_len;
1465 copied_len += tmp_len;
1466 remaining_len -= tmp_len;
1467 curr_m = curr_m->m_next;
1468 }
1469 ASSERT(remaining_len == 0);
1470
1471 if (__probable(do_sum)) {
1472 csum = __packet_fold_sum(sum: partial);
1473
1474 m->m_pkthdr.csum_flags |=
1475 (CSUM_DATA_VALID | CSUM_PARTIAL);
1476 m->m_pkthdr.csum_rx_start = start;
1477 m->m_pkthdr.csum_rx_val = csum;
1478 } else {
1479 m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1480 m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1481 _CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1482 m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1483 if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1484 m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1485 }
1486 }
1487
1488 /* translate packet metadata */
1489 mbuf_set_timestamp(mbuf: m, ts: pkt->pkt_timestamp,
1490 valid: ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1491
1492 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1493 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1494 sk_proc_name_address(current_proc()),
1495 sk_proc_pid(current_proc()), len,
1496 (copysum ? (len - start) : 0), csum, start);
1497 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1498 " mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1499 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1500 (uint32_t)m->m_pkthdr.csum_rx_start,
1501 (uint32_t)m->m_pkthdr.csum_rx_val);
1502 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1503 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1504 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1505 (uint32_t)pkt->pkt_csum_rx_start_off,
1506 (uint32_t)pkt->pkt_csum_rx_value);
1507 break;
1508
1509 case NR_TX:
1510 dp = (uint8_t *)m->m_data;
1511 ASSERT(m->m_next == NULL);
1512
1513 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1514 (uint32_t)mbuf_maxlen(m));
1515 m->m_len += len;
1516 m->m_pkthdr.len += len;
1517 VERIFY(m->m_len == m->m_pkthdr.len &&
1518 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1519
1520 if (copysum) {
1521 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1522 /*
1523 * Use pkt_copy() to copy the portion up to the
1524 * point where we need to start the checksum, and
1525 * copy the remainder, checksumming as we go.
1526 */
1527 if (__probable(start != 0)) {
1528 _pkt_copy(src: baddr, dst: dp, len: start);
1529 }
1530 partial = __packet_copy_and_sum(src: (baddr + start),
1531 dst: (dp + start), len: (len - start), sum0: 0);
1532 csum = __packet_fold_sum_final(sum: partial);
1533
1534 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1535 if (csum == 0 &&
1536 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1537 csum = 0xffff;
1538 }
1539
1540 /* Insert checksum into packet */
1541 ASSERT(stuff <= (len - sizeof(csum)));
1542 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1543 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1544 } else {
1545 bcopy(src: (void *)&csum, dst: dp + stuff, n: sizeof(csum));
1546 }
1547 } else {
1548 _pkt_copy(src: baddr, dst: dp, len);
1549 }
1550 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1551 m->m_pkthdr.csum_tx_start = 0;
1552 m->m_pkthdr.csum_tx_stuff = 0;
1553 m->m_pkthdr.csum_flags |= _convert_pkt_csum_flags(pkt_flags: pkt->pkt_csum_flags);
1554
1555 /* translate packet metadata */
1556 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1557 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1558 m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1559 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1560 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1561 m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1562 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1563 mbuf_set_timestamp(mbuf: m, ts: pkt->pkt_timestamp,
1564 valid: ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1565 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1566 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1567 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1568 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1569 }
1570 if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1571 m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1572 }
1573 if (__improbable(copy_pkt_tx_time != 0 &&
1574 (pkt->pkt_pflags & PKT_F_OPT_TX_TIMESTAMP) != 0)) {
1575 struct m_tag *tag = NULL;
1576 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM,
1577 sizeof(uint64_t), M_WAITOK, m);
1578 if (tag != NULL) {
1579 m_tag_prepend(m, tag);
1580 *(uint64_t *)tag->m_tag_data = pkt->pkt_com_opt->__po_pkt_tx_time;
1581 }
1582 }
1583 m->m_pkthdr.necp_mtag.necp_policy_id = pkt->pkt_policy_id;
1584 m->m_pkthdr.necp_mtag.necp_skip_policy_id = pkt->pkt_skip_policy_id;
1585
1586 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1587 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1588 sk_proc_name_address(current_proc()),
1589 sk_proc_pid(current_proc()), len,
1590 (copysum ? (len - start) : 0), csum, start);
1591 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1592 " pkt 0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1593 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1594 (uint32_t)pkt->pkt_csum_tx_start_off,
1595 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1596 break;
1597
1598 default:
1599 VERIFY(0);
1600 /* NOTREACHED */
1601 __builtin_unreachable();
1602 }
1603
1604 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1605 m->m_flags |= M_BCAST;
1606 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1607 m->m_flags |= M_MCAST;
1608 }
1609 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1610 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1611 (t == NR_RX) ? "RX" : "TX",
1612 sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1613}
1614
1615/*
1616 * This is a multi-buflet variant of pkt_copy_to_mbuf().
1617 * NOTE: poff is the offset within the packet.
1618 *
1619 * This routine supports copying into an mbuf chain for RX but not TX.
1620 *
1621 * start/stuff is relative to poff, within [0, len], such that
1622 * [ 0 ... poff ... poff + start/stuff ... poff + len ... ]
1623 */
1624void
1625pkt_copy_multi_buflet_to_mbuf(const enum txrx t, kern_packet_t ph,
1626 const uint16_t poff, struct mbuf *m, const uint16_t moff,
1627 const uint32_t len, const boolean_t copysum, const uint16_t start)
1628{
1629 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1630 struct mbuf *curr_m;
1631 uint32_t partial = 0;
1632 uint32_t remaining_len = len, copied_len = 0;
1633 uint16_t csum = 0;
1634 uint8_t *baddr;
1635 uint8_t *dp;
1636 boolean_t do_sum = copysum && !PACKET_HAS_FULL_CHECKSUM_FLAGS(pkt);
1637
1638 ASSERT(len >= start);
1639 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1640
1641 /* get buffer address from packet */
1642 MD_BUFLET_ADDR_ABS(pkt, baddr);
1643 ASSERT(baddr != NULL);
1644 baddr += poff;
1645 VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp) *
1646 __packet_get_buflet_count(ph)));
1647
1648 ASSERT((m->m_flags & M_PKTHDR));
1649 m->m_data += moff;
1650
1651 switch (t) {
1652 case NR_RX:
1653 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
1654 if (__probable(do_sum && start != 0)) {
1655 ASSERT(M_TRAILINGSPACE(m) >= start);
1656 ASSERT(m->m_len == 0);
1657 dp = (uint8_t *)m->m_data;
1658 _pkt_copy(src: baddr, dst: dp, len: start);
1659 remaining_len -= start;
1660 copied_len += start;
1661 m->m_len += start;
1662 m->m_pkthdr.len += start;
1663 }
1664 curr_m = m;
1665 while (curr_m != NULL && remaining_len != 0) {
1666 uint32_t tmp_len = MIN(remaining_len,
1667 (uint32_t)M_TRAILINGSPACE(curr_m));
1668 uint16_t soff = poff + (uint16_t)copied_len;
1669 dp = (uint8_t *)curr_m->m_data + curr_m->m_len;
1670
1671 if (__probable(do_sum)) {
1672 partial = _pkt_copyaddr_sum(sph: ph, soff,
1673 dbaddr: dp, len: tmp_len, TRUE, initial_sum: partial, NULL);
1674 } else {
1675 pkt_copyaddr_sum(sph: ph, soff,
1676 dbaddr: dp, len: tmp_len, FALSE, initial_sum: 0, NULL);
1677 }
1678
1679 curr_m->m_len += tmp_len;
1680 m->m_pkthdr.len += tmp_len;
1681 copied_len += tmp_len;
1682 remaining_len -= tmp_len;
1683 curr_m = curr_m->m_next;
1684 }
1685 ASSERT(remaining_len == 0);
1686
1687 if (__probable(do_sum)) {
1688 csum = __packet_fold_sum(sum: partial);
1689
1690 m->m_pkthdr.csum_flags |=
1691 (CSUM_DATA_VALID | CSUM_PARTIAL);
1692 m->m_pkthdr.csum_rx_start = start;
1693 m->m_pkthdr.csum_rx_val = csum;
1694 } else {
1695 m->m_pkthdr.csum_rx_start = pkt->pkt_csum_rx_start_off;
1696 m->m_pkthdr.csum_rx_val = pkt->pkt_csum_rx_value;
1697 _CASSERT(CSUM_RX_FULL_FLAGS == PACKET_CSUM_RX_FULL_FLAGS);
1698 m->m_pkthdr.csum_flags |= pkt->pkt_csum_flags & PACKET_CSUM_RX_FULL_FLAGS;
1699 if (__improbable((pkt->pkt_csum_flags & PACKET_CSUM_PARTIAL) != 0)) {
1700 m->m_pkthdr.csum_flags |= CSUM_PARTIAL;
1701 }
1702 }
1703
1704 m->m_pkthdr.necp_mtag.necp_policy_id = pkt->pkt_policy_id;
1705 m->m_pkthdr.necp_mtag.necp_skip_policy_id = pkt->pkt_skip_policy_id;
1706
1707 /* translate packet metadata */
1708 mbuf_set_timestamp(mbuf: m, ts: pkt->pkt_timestamp,
1709 valid: ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1710
1711 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1712 "%s(%d) RX len %u, copy+sum %u (csum 0x%04x), start %u",
1713 sk_proc_name_address(current_proc()),
1714 sk_proc_pid(current_proc()), len,
1715 (copysum ? (len - start) : 0), csum, start);
1716 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1717 " mbuf 0x%llx moff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1718 SK_KVA(m), moff, m->m_pkthdr.csum_flags,
1719 (uint32_t)m->m_pkthdr.csum_rx_start,
1720 (uint32_t)m->m_pkthdr.csum_rx_val);
1721 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_RX,
1722 " pkt 0x%llx poff %u csumf/rxstart/rxval 0x%x/%u/0x%04x",
1723 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1724 (uint32_t)pkt->pkt_csum_rx_start_off,
1725 (uint32_t)pkt->pkt_csum_rx_value);
1726 break;
1727 case NR_TX:
1728 dp = (uint8_t *)m->m_data;
1729 ASSERT(m->m_next == NULL);
1730 VERIFY(((intptr_t)dp - (intptr_t)mbuf_datastart(m)) + len <=
1731 (uint32_t)mbuf_maxlen(m));
1732 m->m_len += len;
1733 m->m_pkthdr.len += len;
1734 VERIFY(m->m_len == m->m_pkthdr.len &&
1735 (uint32_t)m->m_len <= (uint32_t)mbuf_maxlen(m));
1736 if (copysum) {
1737 uint16_t stuff = pkt->pkt_csum_tx_stuff_off;
1738 /*
1739 * Use pkt_copy() to copy the portion up to the
1740 * point where we need to start the checksum, and
1741 * copy the remainder, checksumming as we go.
1742 */
1743 if (__probable(start != 0)) {
1744 _pkt_copy(src: baddr, dst: dp, len: start);
1745 }
1746 partial = _pkt_copyaddr_sum(sph: ph, soff: (poff + start),
1747 dbaddr: (dp + start), len: (len - start), TRUE, initial_sum: 0, NULL);
1748 csum = __packet_fold_sum_final(sum: partial);
1749
1750 /* RFC1122 4.1.3.4: Invert 0 to -0 (for UDP) */
1751 if (csum == 0 &&
1752 (pkt->pkt_csum_flags & PACKET_CSUM_ZERO_INVERT)) {
1753 csum = 0xffff;
1754 }
1755
1756 /* Insert checksum into packet */
1757 ASSERT(stuff <= (len - sizeof(csum)));
1758 if (IS_P2ALIGNED(dp + stuff, sizeof(csum))) {
1759 *(uint16_t *)(uintptr_t)(dp + stuff) = csum;
1760 } else {
1761 bcopy(src: (void *)&csum, dst: dp + stuff, n: sizeof(csum));
1762 }
1763 } else {
1764 (void) _pkt_copyaddr_sum(sph: ph, soff: poff, dbaddr: dp, len, FALSE, initial_sum: 0, NULL);
1765 }
1766 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1767 m->m_pkthdr.csum_tx_start = 0;
1768 m->m_pkthdr.csum_tx_stuff = 0;
1769 m->m_pkthdr.csum_flags |= _convert_pkt_csum_flags(pkt_flags: pkt->pkt_csum_flags);
1770
1771 /* translate packet metadata */
1772 m->m_pkthdr.pkt_flowsrc = pkt->pkt_flowsrc_type;
1773 m->m_pkthdr.pkt_svc = pkt->pkt_svc_class;
1774 m->m_pkthdr.pkt_mpriv_srcid = pkt->pkt_flowsrc_token;
1775 m->m_pkthdr.pkt_flowid = pkt->pkt_flow_token;
1776 m->m_pkthdr.comp_gencnt = pkt->pkt_comp_gencnt;
1777 m->m_pkthdr.tso_segsz = pkt->pkt_proto_seg_sz;
1778 m->m_pkthdr.pkt_proto = pkt->pkt_flow->flow_ip_proto;
1779 mbuf_set_timestamp(mbuf: m, ts: pkt->pkt_timestamp,
1780 valid: ((pkt->pkt_pflags & PKT_F_TS_VALID) != 0));
1781 m->m_pkthdr.pkt_flags &= ~PKT_F_COMMON_MASK;
1782 m->m_pkthdr.pkt_flags |= (pkt->pkt_pflags & PKT_F_COMMON_MASK);
1783 if ((pkt->pkt_pflags & PKT_F_START_SEQ) != 0) {
1784 m->m_pkthdr.tx_start_seq = ntohl(pkt->pkt_flow_tcp_seq);
1785 }
1786 if ((pkt->pkt_pflags & PKT_F_L4S) != 0) {
1787 m->m_pkthdr.pkt_ext_flags |= PKTF_EXT_L4S;
1788 }
1789 if (__improbable(copy_pkt_tx_time != 0 &&
1790 (pkt->pkt_pflags & PKT_F_OPT_TX_TIMESTAMP) != 0)) {
1791 struct m_tag *tag = NULL;
1792 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_AQM,
1793 sizeof(uint64_t), M_WAITOK, m);
1794 if (tag != NULL) {
1795 m_tag_prepend(m, tag);
1796 *(uint64_t *)tag->m_tag_data = pkt->pkt_com_opt->__po_pkt_tx_time;
1797 }
1798 }
1799
1800 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1801 "%s(%d) TX len %u, copy+sum %u (csum 0x%04x), start %u",
1802 sk_proc_name_address(current_proc()),
1803 sk_proc_pid(current_proc()), len,
1804 (copysum ? (len - start) : 0), csum, start);
1805 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_TX,
1806 " pkt 0x%llx poff %u csumf/txstart/txstuff 0x%x/%u/%u",
1807 SK_KVA(pkt), poff, pkt->pkt_csum_flags,
1808 (uint32_t)pkt->pkt_csum_tx_start_off,
1809 (uint32_t)pkt->pkt_csum_tx_stuff_off);
1810 break;
1811
1812 default:
1813 VERIFY(0);
1814 /* NOTREACHED */
1815 __builtin_unreachable();
1816 }
1817
1818 if (pkt->pkt_link_flags & PKT_LINKF_BCAST) {
1819 m->m_flags |= M_BCAST;
1820 } else if (pkt->pkt_link_flags & PKT_LINKF_MCAST) {
1821 m->m_flags |= M_MCAST;
1822 }
1823 SK_DF(SK_VERB_COPY_MBUF | SK_VERB_DUMP, "%s(%d) %s %s",
1824 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1825 (t == NR_RX) ? "RX" : "TX",
1826 sk_dump("buf", (uint8_t *)dp, m->m_pkthdr.len, 128, NULL, 0));
1827}
1828
1829/*
1830 * Like m_copydata(), but computes 16-bit sum as the data is copied.
1831 * Caller can provide an initial sum to be folded into the computed
1832 * sum. The accumulated partial sum (32-bit) is returned to caller;
1833 * caller is responsible for further reducing it to 16-bit if needed,
1834 * as well as to perform the final 1's complement on it.
1835 */
1836uint32_t
1837m_copydata_sum(struct mbuf *m, int off, int len, void *vp, uint32_t initial_sum,
1838 boolean_t *odd_start)
1839{
1840 boolean_t needs_swap, started_on_odd = FALSE;
1841 int off0 = off, len0 = len;
1842 struct mbuf *m0 = m;
1843 uint64_t sum, partial;
1844 unsigned count, odd;
1845 char *cp = vp;
1846
1847 if (__improbable(off < 0 || len < 0)) {
1848 panic("%s: invalid offset %d or len %d", __func__, off, len);
1849 /* NOTREACHED */
1850 __builtin_unreachable();
1851 }
1852
1853 while (off > 0) {
1854 if (__improbable(m == NULL)) {
1855 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1856 __func__, m0, off0, len0);
1857 /* NOTREACHED */
1858 __builtin_unreachable();
1859 }
1860 if (off < m->m_len) {
1861 break;
1862 }
1863 off -= m->m_len;
1864 m = m->m_next;
1865 }
1866
1867 if (odd_start) {
1868 started_on_odd = *odd_start;
1869 }
1870 sum = initial_sum;
1871
1872 for (; len > 0; m = m->m_next) {
1873 uint8_t *datap;
1874
1875 if (__improbable(m == NULL)) {
1876 panic("%s: invalid mbuf chain %p [off %d, len %d]",
1877 __func__, m0, off0, len0);
1878 /* NOTREACHED */
1879 __builtin_unreachable();
1880 }
1881
1882 datap = mtod(m, uint8_t *) + off;
1883 count = m->m_len;
1884
1885 if (__improbable(count == 0)) {
1886 continue;
1887 }
1888
1889 count = MIN(count - off, (unsigned)len);
1890 partial = 0;
1891
1892 if ((uintptr_t)datap & 1) {
1893 /* Align on word boundary */
1894 started_on_odd = !started_on_odd;
1895#if BYTE_ORDER == LITTLE_ENDIAN
1896 partial = *datap << 8;
1897#else /* BYTE_ORDER != LITTLE_ENDIAN */
1898 partial = *datap;
1899#endif /* BYTE_ORDER != LITTLE_ENDIAN */
1900 *cp++ = *datap++;
1901 count -= 1;
1902 len -= 1;
1903 }
1904
1905 needs_swap = started_on_odd;
1906 odd = count & 1u;
1907 count -= odd;
1908
1909 if (count) {
1910 partial = __packet_copy_and_sum(src: datap,
1911 dst: cp, len: count, sum0: (uint32_t)partial);
1912 datap += count;
1913 cp += count;
1914 len -= count;
1915 if (__improbable((partial & (3ULL << 62)) != 0)) {
1916 if (needs_swap) {
1917 partial = (partial << 8) +
1918 (partial >> 56);
1919 }
1920 sum += (partial >> 32);
1921 sum += (partial & 0xffffffff);
1922 partial = 0;
1923 }
1924 }
1925
1926 if (odd) {
1927#if BYTE_ORDER == LITTLE_ENDIAN
1928 partial += *datap;
1929#else /* BYTE_ORDER != LITTLE_ENDIAN */
1930 partial += *datap << 8;
1931#endif /* BYTE_ORDER != LITTLE_ENDIAN */
1932 *cp++ = *datap++;
1933 len -= 1;
1934 started_on_odd = !started_on_odd;
1935 }
1936 off = 0;
1937
1938 if (needs_swap) {
1939 partial = (partial << 8) + (partial >> 24);
1940 }
1941 sum += (partial >> 32) + (partial & 0xffffffff);
1942 /*
1943 * Reduce sum to allow potential byte swap
1944 * in the next iteration without carry.
1945 */
1946 sum = (sum >> 32) + (sum & 0xffffffff);
1947 }
1948
1949 if (odd_start) {
1950 *odd_start = started_on_odd;
1951 }
1952
1953 /* Final fold (reduce 64-bit to 32-bit) */
1954 sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */
1955 sum = (sum >> 16) + (sum & 0xffff); /* 17-bit + carry */
1956
1957 /* return 32-bit partial sum to caller */
1958 return (uint32_t)sum;
1959}
1960
1961#if DEBUG || DEVELOPMENT
1962#define TRAILERS_MAX 16 /* max trailing bytes */
1963#define TRAILERS_REGEN (64 * 1024) /* regeneration threshold */
1964static uint8_t tb[TRAILERS_MAX]; /* random trailing bytes */
1965static uint32_t regen = TRAILERS_REGEN; /* regeneration counter */
1966
1967uint32_t
1968pkt_add_trailers(kern_packet_t ph, const uint32_t len, const uint16_t start)
1969{
1970 struct __kern_packet *pkt = SK_PTR_ADDR_KPKT(ph);
1971 uint32_t extra;
1972 uint8_t *baddr;
1973
1974 /* get buffer address from packet */
1975 MD_BUFLET_ADDR_ABS(pkt, baddr);
1976 ASSERT(baddr != NULL);
1977 ASSERT(len <= PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp));
1978
1979 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
1980 if (extra == 0 || extra > sizeof(tb) ||
1981 (len + extra) > PP_BUF_SIZE_DEF(pkt->pkt_qum.qum_pp)) {
1982 return 0;
1983 }
1984
1985 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
1986 if (regen++ == TRAILERS_REGEN) {
1987 read_frandom(&tb[0], sizeof(tb));
1988 regen = 0;
1989 }
1990
1991 bcopy(&tb[0], (baddr + len), extra);
1992
1993 /* recompute partial sum (also to exercise related logic) */
1994 pkt->pkt_csum_flags |= PACKET_CSUM_PARTIAL;
1995 pkt->pkt_csum_rx_value = (uint16_t)__packet_cksum((baddr + start),
1996 ((len + extra) - start), 0);
1997 pkt->pkt_csum_rx_start_off = start;
1998
1999 return extra;
2000}
2001
2002uint32_t
2003pkt_add_trailers_mbuf(struct mbuf *m, const uint16_t start)
2004{
2005 uint32_t extra;
2006
2007 extra = MIN((uint32_t)pkt_trailers, (uint32_t)TRAILERS_MAX);
2008 if (extra == 0 || extra > sizeof(tb)) {
2009 return 0;
2010 }
2011
2012 if (mbuf_copyback(m, m_pktlen(m), extra, &tb[0], M_NOWAIT) != 0) {
2013 return 0;
2014 }
2015
2016 /* generate random bytes once per TRAILERS_REGEN packets (approx.) */
2017 if (regen++ == TRAILERS_REGEN) {
2018 read_frandom(&tb[0], sizeof(tb));
2019 regen = 0;
2020 }
2021
2022 /* recompute partial sum (also to exercise related logic) */
2023 m->m_pkthdr.csum_rx_val = m_sum16(m, start, (m_pktlen(m) - start));
2024 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
2025 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
2026 m->m_pkthdr.csum_rx_start = start;
2027
2028 return extra;
2029}
2030#endif /* DEBUG || DEVELOPMENT */
2031
2032void
2033pkt_copypkt_sum(kern_packet_t sph, uint16_t soff, kern_packet_t dph,
2034 uint16_t doff, uint16_t len, uint32_t *partial, boolean_t do_csum)
2035{
2036 VERIFY(_pkt_copypkt_sum(sph, soff, dph, doff, len, partial, do_csum));
2037}
2038
2039uint32_t
2040pkt_copyaddr_sum(kern_packet_t sph, uint16_t soff, uint8_t *dbaddr,
2041 uint32_t len, boolean_t do_csum, uint32_t initial_sum, boolean_t *odd_start)
2042{
2043 return _pkt_copyaddr_sum(sph, soff, dbaddr, len, do_csum, initial_sum, odd_start);
2044}
2045
2046uint32_t
2047pkt_mcopypkt_sum(mbuf_t m, int soff, kern_packet_t dph, uint16_t doff,
2048 uint16_t len, boolean_t do_cscum)
2049{
2050 return m_copypkt_sum(m, soff, dph, doff, len, do_cscum);
2051}
2052
2053void
2054pkt_copy(void *src, void *dst, size_t len)
2055{
2056 return _pkt_copy(src, dst, len);
2057}
2058