1/*
2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1988, 1992, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93
61 */
62
63#include <sys/param.h>
64#include <machine/endian.h>
65#include <sys/mbuf.h>
66#include <kern/debug.h>
67#include <net/dlil.h>
68#include <netinet/in.h>
69#define _IP_VHL
70#include <netinet/ip.h>
71#include <netinet/ip_var.h>
72
73/*
74 * Checksum routine for Internet Protocol family headers (Portable Version).
75 *
76 * This routine is very heavily used in the network
77 * code and should be modified for each CPU to be as fast as possible.
78 */
79#define REDUCE16 { \
80 q_util.q = sum; \
81 l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
82 sum = l_util.s[0] + l_util.s[1]; \
83 ADDCARRY(sum); \
84}
85
86union l_util {
87 uint16_t s[2];
88 uint32_t l;
89};
90
91union q_util {
92 uint16_t s[4];
93 uint32_t l[2];
94 uint64_t q;
95};
96
97extern uint32_t os_cpu_in_cksum(const void *, uint32_t, uint32_t);
98
99/*
100 * Perform 16-bit 1's complement sum on a contiguous span.
101 */
102uint16_t
103b_sum16(const void *buf, int len)
104{
105 return (uint16_t)os_cpu_in_cksum(buf, len, 0);
106}
107
108uint16_t inet_cksum_simple(struct mbuf *, int);
109/*
110 * For the exported _in_cksum symbol in BSDKernel symbol set.
111 */
112uint16_t
113inet_cksum_simple(struct mbuf *m, int len)
114{
115 return inet_cksum(m, 0, 0, len);
116}
117
118uint16_t
119in_addword(uint16_t a, uint16_t b)
120{
121 uint64_t sum = a + b;
122
123 ADDCARRY(sum);
124 return (uint16_t)sum;
125}
126
127uint16_t
128in_pseudo(uint32_t a, uint32_t b, uint32_t c)
129{
130 uint64_t sum;
131 union q_util q_util;
132 union l_util l_util;
133
134 sum = (uint64_t)a + b + c;
135 REDUCE16;
136 return (uint16_t)sum;
137}
138
139uint16_t
140in_pseudo64(uint64_t a, uint64_t b, uint64_t c)
141{
142 uint64_t sum;
143 union q_util q_util;
144 union l_util l_util;
145
146 sum = a + b + c;
147 REDUCE16;
148 return (uint16_t)sum;
149}
150
151/*
152 * May be used on IP header with options.
153 */
154uint16_t
155in_cksum_hdr_opt(const struct ip *ip)
156{
157 return ~b_sum16(buf: ip, len: (IP_VHL_HL(ip->ip_vhl) << 2)) & 0xffff;
158}
159
160/*
161 * A wrapper around the simple in_cksum_hdr() and the more complicated
162 * inet_cksum(); the former is chosen if the IP header is simple,
163 * contiguous and 32-bit aligned. Also does some stats accounting.
164 */
165uint16_t
166ip_cksum_hdr_dir(struct mbuf *m, uint32_t hlen, int out)
167{
168 struct ip *ip = mtod(m, struct ip *);
169
170 if (out) {
171 ipstat.ips_snd_swcsum++;
172 ipstat.ips_snd_swcsum_bytes += hlen;
173 } else {
174 ipstat.ips_rcv_swcsum++;
175 ipstat.ips_rcv_swcsum_bytes += hlen;
176 }
177
178 if (hlen == sizeof(*ip) &&
179 m->m_len >= sizeof(*ip) && IP_HDR_ALIGNED_P(ip)) {
180 return in_cksum_hdr(ip);
181 }
182
183 return inet_cksum(m, 0, 0, hlen);
184}
185
186uint16_t
187ip_cksum_hdr_dir_buffer(const void *buffer, uint32_t hlen, uint32_t len,
188 int out)
189{
190 const struct ip *ip = buffer;
191
192 if (out) {
193 ipstat.ips_snd_swcsum++;
194 ipstat.ips_snd_swcsum_bytes += hlen;
195 } else {
196 ipstat.ips_rcv_swcsum++;
197 ipstat.ips_rcv_swcsum_bytes += hlen;
198 }
199
200 if (hlen == sizeof(*ip) &&
201 len >= sizeof(*ip) && IP_HDR_ALIGNED_P(ip)) {
202 return in_cksum_hdr(ip);
203 }
204
205 return inet_cksum_buffer(buffer, 0, 0, len: hlen);
206}
207
208/*
209 * m MUST contain at least an IP header, if nxt is specified;
210 * nxt is the upper layer protocol number;
211 * off is an offset where TCP/UDP/ICMP header starts;
212 * len is a total length of a transport segment (e.g. TCP header + TCP payload)
213 */
214uint16_t
215inet_cksum(struct mbuf *m, uint32_t nxt, uint32_t off, uint32_t len)
216{
217 uint32_t sum;
218
219 sum = m_sum16(m, off, len);
220
221 /* include pseudo header checksum? */
222 if (nxt != 0) {
223 struct ip *ip;
224 unsigned char buf[sizeof((*ip))] __attribute__((aligned(8)));
225 uint32_t mlen;
226
227 /*
228 * Sanity check
229 *
230 * Use m_length2() instead of m_length(), as we cannot rely on
231 * the caller setting m_pkthdr.len correctly, if the mbuf is
232 * a M_PKTHDR one.
233 */
234 if ((mlen = m_length2(m, NULL)) < sizeof(*ip)) {
235 panic("%s: mbuf %p too short (%d) for IPv4 header",
236 __func__, m, mlen);
237 /* NOTREACHED */
238 }
239
240 /*
241 * In case the IP header is not contiguous, or not 32-bit
242 * aligned, copy it to a local buffer. Note here that we
243 * expect the data pointer to point to the IP header.
244 */
245 if ((sizeof(*ip) > m->m_len) ||
246 !IP_HDR_ALIGNED_P(mtod(m, caddr_t))) {
247 m_copydata(m, 0, sizeof(*ip), (caddr_t)buf);
248 ip = (struct ip *)(void *)buf;
249 } else {
250 ip = (struct ip *)(void *)(m->m_data);
251 }
252
253 /* add pseudo header checksum */
254 sum += in_pseudo(a: ip->ip_src.s_addr, b: ip->ip_dst.s_addr,
255 htonl(len + nxt));
256
257 /* fold in carry bits */
258 ADDCARRY(sum);
259 }
260
261 return ~sum & 0xffff;
262}
263
264/*
265 * buffer MUST contain at least an IP header, if nxt is specified;
266 * nxt is the upper layer protocol number;
267 * off is an offset where TCP/UDP/ICMP header starts;
268 * len is a total length of a transport segment (e.g. TCP header + TCP payload)
269 */
270uint16_t
271inet_cksum_buffer(const void *buffer, uint32_t nxt, uint32_t off,
272 uint32_t len)
273{
274 uint32_t sum;
275
276 if (off >= len) {
277 panic("%s: off (%d) >= len (%d)", __func__, off, len);
278 }
279
280 sum = b_sum16(buf: &((const uint8_t *)buffer)[off], len);
281
282 /* include pseudo header checksum? */
283 if (nxt != 0) {
284 const struct ip *ip;
285 unsigned char buf[sizeof((*ip))] __attribute__((aligned(8)));
286
287 /*
288 * In case the IP header is not contiguous, or not 32-bit
289 * aligned, copy it to a local buffer. Note here that we
290 * expect the data pointer to point to the IP header.
291 */
292 if (!IP_HDR_ALIGNED_P(buffer)) {
293 memcpy(dst: buf, src: buffer, n: sizeof(*ip));
294 ip = (const struct ip *)(const void *)buf;
295 } else {
296 ip = (const struct ip *)buffer;
297 }
298
299 /* add pseudo header checksum */
300 sum += in_pseudo(a: ip->ip_src.s_addr, b: ip->ip_dst.s_addr,
301 htonl(len + nxt));
302
303 /* fold in carry bits */
304 ADDCARRY(sum);
305 }
306
307 return ~sum & 0xffff;
308}
309
310#if DEBUG || DEVELOPMENT
311#include <pexpert/pexpert.h>
312
313#define CKSUM_ERR kprintf
314
315/*
316 * The following routines implement the portable, reference implementation
317 * of os_cpu_in_cksum_mbuf(). This is currently used only for validating
318 * the correctness of the platform-specific implementation, at boot time
319 * in dlil_verify_sum16(). It returns the 32-bit accumulator without doing
320 * a 1's complement on it.
321 */
322#if !defined(__LP64__)
323/* 32-bit version */
324uint32_t
325in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum)
326{
327 int mlen;
328 uint32_t sum, partial;
329 unsigned int final_acc;
330 uint8_t *data;
331 boolean_t needs_swap, started_on_odd;
332
333 VERIFY(len >= 0);
334 VERIFY(off >= 0);
335
336 needs_swap = FALSE;
337 started_on_odd = FALSE;
338 sum = (initial_sum >> 16) + (initial_sum & 0xffff);
339
340 for (;;) {
341 if (__improbable(m == NULL)) {
342 CKSUM_ERR("%s: out of data\n", __func__);
343 return (uint32_t)-1;
344 }
345 mlen = m->m_len;
346 if (mlen > off) {
347 mlen -= off;
348 data = mtod(m, uint8_t *) + off;
349 goto post_initial_offset;
350 }
351 off -= mlen;
352 if (len == 0) {
353 break;
354 }
355 m = m->m_next;
356 }
357
358 for (; len > 0; m = m->m_next) {
359 if (__improbable(m == NULL)) {
360 CKSUM_ERR("%s: out of data\n", __func__);
361 return (uint32_t)-1;
362 }
363 mlen = m->m_len;
364 data = mtod(m, uint8_t *);
365post_initial_offset:
366 if (mlen == 0) {
367 continue;
368 }
369 if (mlen > len) {
370 mlen = len;
371 }
372 len -= mlen;
373
374 partial = 0;
375 if ((uintptr_t)data & 1) {
376 /* Align on word boundary */
377 started_on_odd = !started_on_odd;
378#if BYTE_ORDER == LITTLE_ENDIAN
379 partial = *data << 8;
380#else /* BYTE_ORDER != LITTLE_ENDIAN */
381 partial = *data;
382#endif /* BYTE_ORDER != LITTLE_ENDIAN */
383 ++data;
384 --mlen;
385 }
386 needs_swap = started_on_odd;
387 while (mlen >= 32) {
388 __builtin_prefetch(data + 32);
389 partial += *(uint16_t *)(void *)data;
390 partial += *(uint16_t *)(void *)(data + 2);
391 partial += *(uint16_t *)(void *)(data + 4);
392 partial += *(uint16_t *)(void *)(data + 6);
393 partial += *(uint16_t *)(void *)(data + 8);
394 partial += *(uint16_t *)(void *)(data + 10);
395 partial += *(uint16_t *)(void *)(data + 12);
396 partial += *(uint16_t *)(void *)(data + 14);
397 partial += *(uint16_t *)(void *)(data + 16);
398 partial += *(uint16_t *)(void *)(data + 18);
399 partial += *(uint16_t *)(void *)(data + 20);
400 partial += *(uint16_t *)(void *)(data + 22);
401 partial += *(uint16_t *)(void *)(data + 24);
402 partial += *(uint16_t *)(void *)(data + 26);
403 partial += *(uint16_t *)(void *)(data + 28);
404 partial += *(uint16_t *)(void *)(data + 30);
405 data += 32;
406 mlen -= 32;
407 if (__improbable(partial & 0xc0000000)) {
408 if (needs_swap) {
409 partial = (partial << 8) +
410 (partial >> 24);
411 }
412 sum += (partial >> 16);
413 sum += (partial & 0xffff);
414 partial = 0;
415 }
416 }
417 if (mlen & 16) {
418 partial += *(uint16_t *)(void *)data;
419 partial += *(uint16_t *)(void *)(data + 2);
420 partial += *(uint16_t *)(void *)(data + 4);
421 partial += *(uint16_t *)(void *)(data + 6);
422 partial += *(uint16_t *)(void *)(data + 8);
423 partial += *(uint16_t *)(void *)(data + 10);
424 partial += *(uint16_t *)(void *)(data + 12);
425 partial += *(uint16_t *)(void *)(data + 14);
426 data += 16;
427 mlen -= 16;
428 }
429 /*
430 * mlen is not updated below as the remaining tests
431 * are using bit masks, which are not affected.
432 */
433 if (mlen & 8) {
434 partial += *(uint16_t *)(void *)data;
435 partial += *(uint16_t *)(void *)(data + 2);
436 partial += *(uint16_t *)(void *)(data + 4);
437 partial += *(uint16_t *)(void *)(data + 6);
438 data += 8;
439 }
440 if (mlen & 4) {
441 partial += *(uint16_t *)(void *)data;
442 partial += *(uint16_t *)(void *)(data + 2);
443 data += 4;
444 }
445 if (mlen & 2) {
446 partial += *(uint16_t *)(void *)data;
447 data += 2;
448 }
449 if (mlen & 1) {
450#if BYTE_ORDER == LITTLE_ENDIAN
451 partial += *data;
452#else /* BYTE_ORDER != LITTLE_ENDIAN */
453 partial += *data << 8;
454#endif /* BYTE_ORDER != LITTLE_ENDIAN */
455 started_on_odd = !started_on_odd;
456 }
457
458 if (needs_swap) {
459 partial = (partial << 8) + (partial >> 24);
460 }
461 sum += (partial >> 16) + (partial & 0xffff);
462 /*
463 * Reduce sum to allow potential byte swap
464 * in the next iteration without carry.
465 */
466 sum = (sum >> 16) + (sum & 0xffff);
467 }
468 final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff);
469 final_acc = (final_acc >> 16) + (final_acc & 0xffff);
470 return final_acc & 0xffff;
471}
472
473#else /* __LP64__ */
474/* 64-bit version */
475uint32_t
476in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum)
477{
478 int mlen;
479 uint64_t sum, partial;
480 unsigned int final_acc;
481 uint8_t *data;
482 boolean_t needs_swap, started_on_odd;
483
484 VERIFY(len >= 0);
485 VERIFY(off >= 0);
486
487 needs_swap = FALSE;
488 started_on_odd = FALSE;
489 sum = initial_sum;
490
491 for (;;) {
492 if (__improbable(m == NULL)) {
493 CKSUM_ERR("%s: out of data\n", __func__);
494 return (uint32_t)-1;
495 }
496 mlen = m->m_len;
497 if (mlen > off) {
498 mlen -= off;
499 data = mtod(m, uint8_t *) + off;
500 goto post_initial_offset;
501 }
502 off -= mlen;
503 if (len == 0) {
504 break;
505 }
506 m = m->m_next;
507 }
508
509 for (; len > 0; m = m->m_next) {
510 if (__improbable(m == NULL)) {
511 CKSUM_ERR("%s: out of data\n", __func__);
512 return (uint32_t)-1;
513 }
514 mlen = m->m_len;
515 data = mtod(m, uint8_t *);
516post_initial_offset:
517 if (mlen == 0) {
518 continue;
519 }
520 if (mlen > len) {
521 mlen = len;
522 }
523 len -= mlen;
524
525 partial = 0;
526 if ((uintptr_t)data & 1) {
527 /* Align on word boundary */
528 started_on_odd = !started_on_odd;
529#if BYTE_ORDER == LITTLE_ENDIAN
530 partial = *data << 8;
531#else /* BYTE_ORDER != LITTLE_ENDIAN */
532 partial = *data;
533#endif /* BYTE_ORDER != LITTLE_ENDIAN */
534 ++data;
535 --mlen;
536 }
537 needs_swap = started_on_odd;
538 if ((uintptr_t)data & 2) {
539 if (mlen < 2) {
540 goto trailing_bytes;
541 }
542 partial += *(uint16_t *)(void *)data;
543 data += 2;
544 mlen -= 2;
545 }
546 while (mlen >= 64) {
547 __builtin_prefetch(data + 32);
548 __builtin_prefetch(data + 64);
549 partial += *(uint32_t *)(void *)data;
550 partial += *(uint32_t *)(void *)(data + 4);
551 partial += *(uint32_t *)(void *)(data + 8);
552 partial += *(uint32_t *)(void *)(data + 12);
553 partial += *(uint32_t *)(void *)(data + 16);
554 partial += *(uint32_t *)(void *)(data + 20);
555 partial += *(uint32_t *)(void *)(data + 24);
556 partial += *(uint32_t *)(void *)(data + 28);
557 partial += *(uint32_t *)(void *)(data + 32);
558 partial += *(uint32_t *)(void *)(data + 36);
559 partial += *(uint32_t *)(void *)(data + 40);
560 partial += *(uint32_t *)(void *)(data + 44);
561 partial += *(uint32_t *)(void *)(data + 48);
562 partial += *(uint32_t *)(void *)(data + 52);
563 partial += *(uint32_t *)(void *)(data + 56);
564 partial += *(uint32_t *)(void *)(data + 60);
565 data += 64;
566 mlen -= 64;
567 if (__improbable(partial & (3ULL << 62))) {
568 if (needs_swap) {
569 partial = (partial << 8) +
570 (partial >> 56);
571 }
572 sum += (partial >> 32);
573 sum += (partial & 0xffffffff);
574 partial = 0;
575 }
576 }
577 /*
578 * mlen is not updated below as the remaining tests
579 * are using bit masks, which are not affected.
580 */
581 if (mlen & 32) {
582 partial += *(uint32_t *)(void *)data;
583 partial += *(uint32_t *)(void *)(data + 4);
584 partial += *(uint32_t *)(void *)(data + 8);
585 partial += *(uint32_t *)(void *)(data + 12);
586 partial += *(uint32_t *)(void *)(data + 16);
587 partial += *(uint32_t *)(void *)(data + 20);
588 partial += *(uint32_t *)(void *)(data + 24);
589 partial += *(uint32_t *)(void *)(data + 28);
590 data += 32;
591 }
592 if (mlen & 16) {
593 partial += *(uint32_t *)(void *)data;
594 partial += *(uint32_t *)(void *)(data + 4);
595 partial += *(uint32_t *)(void *)(data + 8);
596 partial += *(uint32_t *)(void *)(data + 12);
597 data += 16;
598 }
599 if (mlen & 8) {
600 partial += *(uint32_t *)(void *)data;
601 partial += *(uint32_t *)(void *)(data + 4);
602 data += 8;
603 }
604 if (mlen & 4) {
605 partial += *(uint32_t *)(void *)data;
606 data += 4;
607 }
608 if (mlen & 2) {
609 partial += *(uint16_t *)(void *)data;
610 data += 2;
611 }
612trailing_bytes:
613 if (mlen & 1) {
614#if BYTE_ORDER == LITTLE_ENDIAN
615 partial += *data;
616#else /* BYTE_ORDER != LITTLE_ENDIAN */
617 partial += *data << 8;
618#endif /* BYTE_ORDER != LITTLE_ENDIAN */
619 started_on_odd = !started_on_odd;
620 }
621
622 if (needs_swap) {
623 partial = (partial << 8) + (partial >> 56);
624 }
625 sum += (partial >> 32) + (partial & 0xffffffff);
626 /*
627 * Reduce sum to allow potential byte swap
628 * in the next iteration without carry.
629 */
630 sum = (sum >> 32) + (sum & 0xffffffff);
631 }
632 final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) +
633 ((sum >> 16) & 0xffff) + (sum & 0xffff);
634 final_acc = (final_acc >> 16) + (final_acc & 0xffff);
635 final_acc = (final_acc >> 16) + (final_acc & 0xffff);
636 return final_acc & 0xffff;
637}
638#endif /* __LP64 */
639#endif /* DEBUG || DEVELOPMENT */
640