1 | /* |
2 | * Copyright (c) 2012-2021 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | /*- |
30 | * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>. |
31 | * All rights reserved. |
32 | * |
33 | * Redistribution and use in source and binary forms, with or without |
34 | * modification, are permitted provided that the following conditions |
35 | * are met: |
36 | * |
37 | * 1. Redistributions of source code must retain the above copyright |
38 | * notice, this list of conditions and the following disclaimer. |
39 | * 2. Redistributions in binary form must reproduce the above copyright |
40 | * notice, this list of conditions and the following disclaimer in |
41 | * the documentation and/or other materials provided with the |
42 | * distribution. |
43 | * |
44 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
45 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
46 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
47 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
48 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
49 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, |
50 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
51 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
52 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
53 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
54 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
55 | * SUCH DAMAGE. |
56 | */ |
57 | |
58 | #ifdef KERNEL |
59 | #include <sys/param.h> |
60 | #include <machine/endian.h> |
61 | #include <sys/mcache.h> |
62 | #include <sys/mbuf.h> |
63 | #include <kern/debug.h> |
64 | #include <libkern/libkern.h> |
65 | #include <mach/boolean.h> |
66 | #include <pexpert/pexpert.h> |
67 | #define CKSUM_ERR(fmt, args...) kprintf(fmt, ## args) |
68 | #else /* !KERNEL */ |
69 | #ifndef LIBSYSCALL_INTERFACE |
70 | #error "LIBSYSCALL_INTERFACE not defined" |
71 | #endif /* !LIBSYSCALL_INTERFACE */ |
72 | #include <stdlib.h> |
73 | #include <stddef.h> |
74 | #include <stdint.h> |
75 | #include <unistd.h> |
76 | #include <strings.h> |
77 | #include <mach/boolean.h> |
78 | #include <skywalk/os_skywalk_private.h> |
79 | #define CKSUM_ERR(fmt, args...) fprintf_stderr(fmt, ## args) |
80 | #endif /* !KERNEL */ |
81 | |
82 | /* compile time assert */ |
83 | #ifndef _CASSERT |
84 | #define _CASSERT(x) _Static_assert(x, "compile-time assertion failed") |
85 | #endif /* !_CASSERT */ |
86 | |
87 | #ifndef VERIFY |
88 | #define VERIFY(EX) ((void)0) |
89 | #endif /* !VERIFY */ |
90 | |
91 | #ifndef CKSUM_ERR |
92 | #define CKSUM_ERR(fmt, args...) ((void)0) |
93 | #endif /* !CKSUM_ERR */ |
94 | |
95 | #define PREDICT_TRUE(x) __builtin_expect(!!((long)(x)), 1L) |
96 | #define PREDICT_FALSE(x) __builtin_expect(!!((long)(x)), 0L) |
97 | |
98 | /* fake mbuf struct used only for calling os_cpu_in_cksum_mbuf() */ |
99 | struct _mbuf { |
100 | struct _mbuf *_m_next; |
101 | void *_m_pad; |
102 | uint8_t *_m_data; |
103 | int32_t _m_len; |
104 | }; |
105 | |
106 | extern uint32_t os_cpu_in_cksum(const void *, uint32_t, uint32_t); |
107 | extern uint32_t os_cpu_in_cksum_mbuf(struct _mbuf *, int, int, uint32_t); |
108 | |
109 | uint32_t |
110 | os_cpu_in_cksum(const void *data, uint32_t len, uint32_t initial_sum) |
111 | { |
112 | /* |
113 | * If data is 4-bytes aligned (conditional), length is multiple |
114 | * of 4-bytes (required), and the amount to checksum is small, |
115 | * this would be quicker; this is suitable for IPv4/TCP header. |
116 | */ |
117 | if ( |
118 | #if !defined(__arm64__) && !defined(__x86_64__) |
119 | IS_P2ALIGNED(data, sizeof(uint32_t)) && |
120 | #endif /* !__arm64__ && !__x86_64__ */ |
121 | len <= 64 && (len & 3) == 0) { |
122 | uint8_t *p = __DECONST(uint8_t *, data); |
123 | uint64_t sum = initial_sum; |
124 | |
125 | switch (len) { |
126 | case 20: /* simple IPv4 or TCP header */ |
127 | sum += *(uint32_t *)(void *)p; |
128 | sum += *(uint32_t *)(void *)(p + 4); |
129 | sum += *(uint32_t *)(void *)(p + 8); |
130 | sum += *(uint32_t *)(void *)(p + 12); |
131 | sum += *(uint32_t *)(void *)(p + 16); |
132 | break; |
133 | |
134 | case 32: /* TCP header + timestamp option */ |
135 | sum += *(uint32_t *)(void *)p; |
136 | sum += *(uint32_t *)(void *)(p + 4); |
137 | sum += *(uint32_t *)(void *)(p + 8); |
138 | sum += *(uint32_t *)(void *)(p + 12); |
139 | sum += *(uint32_t *)(void *)(p + 16); |
140 | sum += *(uint32_t *)(void *)(p + 20); |
141 | sum += *(uint32_t *)(void *)(p + 24); |
142 | sum += *(uint32_t *)(void *)(p + 28); |
143 | break; |
144 | |
145 | default: |
146 | while (len) { |
147 | sum += *(uint32_t *)(void *)p; |
148 | p += 4; |
149 | len -= 4; |
150 | } |
151 | break; |
152 | } |
153 | |
154 | /* fold 64-bit to 16-bit (deferred carries) */ |
155 | sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */ |
156 | sum = (sum >> 16) + (sum & 0xffff); /* 17-bit + carry */ |
157 | sum = (sum >> 16) + (sum & 0xffff); /* 16-bit + carry */ |
158 | sum = (sum >> 16) + (sum & 0xffff); /* final carry */ |
159 | |
160 | return sum & 0xffff; |
161 | } |
162 | |
163 | /* |
164 | * Otherwise, let os_cpu_in_cksum_mbuf() handle it; it only looks |
165 | * at 3 fields: {next,data,len}, and since it doesn't care about |
166 | * the authenticity of the mbuf, we use a fake one here. Make |
167 | * sure the offsets are as expected. |
168 | */ |
169 | #if defined(__LP64__) |
170 | _CASSERT(offsetof(struct _mbuf, _m_next) == 0); |
171 | _CASSERT(offsetof(struct _mbuf, _m_data) == 16); |
172 | _CASSERT(offsetof(struct _mbuf, _m_len) == 24); |
173 | #else /* !__LP64__ */ |
174 | _CASSERT(offsetof(struct _mbuf, _m_next) == 0); |
175 | _CASSERT(offsetof(struct _mbuf, _m_data) == 8); |
176 | _CASSERT(offsetof(struct _mbuf, _m_len) == 12); |
177 | #endif /* !__LP64__ */ |
178 | #ifdef KERNEL |
179 | _CASSERT(offsetof(struct _mbuf, _m_next) == |
180 | offsetof(struct mbuf, m_next)); |
181 | _CASSERT(offsetof(struct _mbuf, _m_data) == |
182 | offsetof(struct mbuf, m_data)); |
183 | _CASSERT(offsetof(struct _mbuf, _m_len) == |
184 | offsetof(struct mbuf, m_len)); |
185 | #endif /* KERNEL */ |
186 | struct _mbuf m = { |
187 | ._m_next = NULL, |
188 | ._m_data = __DECONST(uint8_t *, data), |
189 | ._m_len = len, |
190 | }; |
191 | |
192 | return os_cpu_in_cksum_mbuf(&m, len, 0, initial_sum); |
193 | } |
194 | |
195 | #if defined(__i386__) || defined(__x86_64__) |
196 | |
197 | /* |
198 | * Checksum routine for Internet Protocol family headers (Portable Version). |
199 | * |
200 | * This routine is very heavily used in the network |
201 | * code and should be modified for each CPU to be as fast as possible. |
202 | * |
203 | * A discussion of different implementation techniques can be found in |
204 | * RFC 1071. |
205 | * |
206 | * The default implementation for 32-bit architectures is using |
207 | * a 32-bit accumulator and operating on 16-bit operands. |
208 | * |
209 | * The default implementation for 64-bit architectures is using |
210 | * a 64-bit accumulator and operating on 32-bit operands. |
211 | * |
212 | * Both versions are unrolled to handle 32 Byte / 64 Byte fragments as core |
213 | * of the inner loop. After each iteration of the inner loop, a partial |
214 | * reduction is done to avoid carry in long packets. |
215 | */ |
216 | |
217 | #if !defined(__LP64__) |
218 | /* 32-bit version */ |
219 | uint32_t |
220 | os_cpu_in_cksum_mbuf(struct _mbuf *m, int len, int off, uint32_t initial_sum) |
221 | { |
222 | int mlen; |
223 | uint32_t sum, partial; |
224 | unsigned int final_acc; |
225 | uint8_t *data; |
226 | boolean_t needs_swap, started_on_odd; |
227 | |
228 | VERIFY(len >= 0); |
229 | VERIFY(off >= 0); |
230 | |
231 | needs_swap = FALSE; |
232 | started_on_odd = FALSE; |
233 | sum = (initial_sum >> 16) + (initial_sum & 0xffff); |
234 | |
235 | for (;;) { |
236 | if (PREDICT_FALSE(m == NULL)) { |
237 | CKSUM_ERR("%s: out of data\n" , __func__); |
238 | return (uint32_t)-1; |
239 | } |
240 | mlen = m->_m_len; |
241 | if (mlen > off) { |
242 | mlen -= off; |
243 | data = m->_m_data + off; |
244 | goto post_initial_offset; |
245 | } |
246 | off -= mlen; |
247 | if (len == 0) { |
248 | break; |
249 | } |
250 | m = m->_m_next; |
251 | } |
252 | |
253 | for (; len > 0; m = m->_m_next) { |
254 | if (PREDICT_FALSE(m == NULL)) { |
255 | CKSUM_ERR("%s: out of data\n" , __func__); |
256 | return (uint32_t)-1; |
257 | } |
258 | mlen = m->_m_len; |
259 | data = m->_m_data; |
260 | post_initial_offset: |
261 | if (mlen == 0) { |
262 | continue; |
263 | } |
264 | if (mlen > len) { |
265 | mlen = len; |
266 | } |
267 | len -= mlen; |
268 | |
269 | partial = 0; |
270 | if ((uintptr_t)data & 1) { |
271 | /* Align on word boundary */ |
272 | started_on_odd = !started_on_odd; |
273 | #if BYTE_ORDER == LITTLE_ENDIAN |
274 | partial = *data << 8; |
275 | #else |
276 | partial = *data; |
277 | #endif |
278 | ++data; |
279 | --mlen; |
280 | } |
281 | needs_swap = started_on_odd; |
282 | while (mlen >= 32) { |
283 | __builtin_prefetch(data + 32); |
284 | partial += *(uint16_t *)(void *)data; |
285 | partial += *(uint16_t *)(void *)(data + 2); |
286 | partial += *(uint16_t *)(void *)(data + 4); |
287 | partial += *(uint16_t *)(void *)(data + 6); |
288 | partial += *(uint16_t *)(void *)(data + 8); |
289 | partial += *(uint16_t *)(void *)(data + 10); |
290 | partial += *(uint16_t *)(void *)(data + 12); |
291 | partial += *(uint16_t *)(void *)(data + 14); |
292 | partial += *(uint16_t *)(void *)(data + 16); |
293 | partial += *(uint16_t *)(void *)(data + 18); |
294 | partial += *(uint16_t *)(void *)(data + 20); |
295 | partial += *(uint16_t *)(void *)(data + 22); |
296 | partial += *(uint16_t *)(void *)(data + 24); |
297 | partial += *(uint16_t *)(void *)(data + 26); |
298 | partial += *(uint16_t *)(void *)(data + 28); |
299 | partial += *(uint16_t *)(void *)(data + 30); |
300 | data += 32; |
301 | mlen -= 32; |
302 | if (PREDICT_FALSE(partial & 0xc0000000)) { |
303 | if (needs_swap) { |
304 | partial = (partial << 8) + |
305 | (partial >> 24); |
306 | } |
307 | sum += (partial >> 16); |
308 | sum += (partial & 0xffff); |
309 | partial = 0; |
310 | } |
311 | } |
312 | if (mlen & 16) { |
313 | partial += *(uint16_t *)(void *)data; |
314 | partial += *(uint16_t *)(void *)(data + 2); |
315 | partial += *(uint16_t *)(void *)(data + 4); |
316 | partial += *(uint16_t *)(void *)(data + 6); |
317 | partial += *(uint16_t *)(void *)(data + 8); |
318 | partial += *(uint16_t *)(void *)(data + 10); |
319 | partial += *(uint16_t *)(void *)(data + 12); |
320 | partial += *(uint16_t *)(void *)(data + 14); |
321 | data += 16; |
322 | mlen -= 16; |
323 | } |
324 | /* |
325 | * mlen is not updated below as the remaining tests |
326 | * are using bit masks, which are not affected. |
327 | */ |
328 | if (mlen & 8) { |
329 | partial += *(uint16_t *)(void *)data; |
330 | partial += *(uint16_t *)(void *)(data + 2); |
331 | partial += *(uint16_t *)(void *)(data + 4); |
332 | partial += *(uint16_t *)(void *)(data + 6); |
333 | data += 8; |
334 | } |
335 | if (mlen & 4) { |
336 | partial += *(uint16_t *)(void *)data; |
337 | partial += *(uint16_t *)(void *)(data + 2); |
338 | data += 4; |
339 | } |
340 | if (mlen & 2) { |
341 | partial += *(uint16_t *)(void *)data; |
342 | data += 2; |
343 | } |
344 | if (mlen & 1) { |
345 | #if BYTE_ORDER == LITTLE_ENDIAN |
346 | partial += *data; |
347 | #else |
348 | partial += *data << 8; |
349 | #endif |
350 | started_on_odd = !started_on_odd; |
351 | } |
352 | |
353 | if (needs_swap) { |
354 | partial = (partial << 8) + (partial >> 24); |
355 | } |
356 | sum += (partial >> 16) + (partial & 0xffff); |
357 | /* |
358 | * Reduce sum to allow potential byte swap |
359 | * in the next iteration without carry. |
360 | */ |
361 | sum = (sum >> 16) + (sum & 0xffff); |
362 | } |
363 | final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff); |
364 | final_acc = (final_acc >> 16) + (final_acc & 0xffff); |
365 | return final_acc & 0xffff; |
366 | } |
367 | |
368 | #else /* __LP64__ */ |
369 | /* 64-bit version */ |
370 | uint32_t |
371 | os_cpu_in_cksum_mbuf(struct _mbuf *m, int len, int off, uint32_t initial_sum) |
372 | { |
373 | int mlen; |
374 | uint64_t sum, partial; |
375 | unsigned int final_acc; |
376 | uint8_t *data; |
377 | boolean_t needs_swap, started_on_odd; |
378 | |
379 | VERIFY(len >= 0); |
380 | VERIFY(off >= 0); |
381 | |
382 | needs_swap = FALSE; |
383 | started_on_odd = FALSE; |
384 | sum = initial_sum; |
385 | |
386 | for (;;) { |
387 | if (PREDICT_FALSE(m == NULL)) { |
388 | CKSUM_ERR("%s: out of data\n" , __func__); |
389 | return (uint32_t)-1; |
390 | } |
391 | mlen = m->_m_len; |
392 | if (mlen > off) { |
393 | mlen -= off; |
394 | data = m->_m_data + off; |
395 | goto post_initial_offset; |
396 | } |
397 | off -= mlen; |
398 | if (len == 0) { |
399 | break; |
400 | } |
401 | m = m->_m_next; |
402 | } |
403 | |
404 | for (; len > 0; m = m->_m_next) { |
405 | if (PREDICT_FALSE(m == NULL)) { |
406 | CKSUM_ERR("%s: out of data\n" , __func__); |
407 | return (uint32_t)-1; |
408 | } |
409 | mlen = m->_m_len; |
410 | data = m->_m_data; |
411 | post_initial_offset: |
412 | if (mlen == 0) { |
413 | continue; |
414 | } |
415 | if (mlen > len) { |
416 | mlen = len; |
417 | } |
418 | len -= mlen; |
419 | |
420 | partial = 0; |
421 | if ((uintptr_t)data & 1) { |
422 | /* Align on word boundary */ |
423 | started_on_odd = !started_on_odd; |
424 | #if BYTE_ORDER == LITTLE_ENDIAN |
425 | partial = *data << 8; |
426 | #else |
427 | partial = *data; |
428 | #endif |
429 | ++data; |
430 | --mlen; |
431 | } |
432 | needs_swap = started_on_odd; |
433 | if ((uintptr_t)data & 2) { |
434 | if (mlen < 2) { |
435 | goto trailing_bytes; |
436 | } |
437 | partial += *(uint16_t *)(void *)data; |
438 | data += 2; |
439 | mlen -= 2; |
440 | } |
441 | while (mlen >= 64) { |
442 | __builtin_prefetch(data + 32); |
443 | __builtin_prefetch(data + 64); |
444 | partial += *(uint32_t *)(void *)data; |
445 | partial += *(uint32_t *)(void *)(data + 4); |
446 | partial += *(uint32_t *)(void *)(data + 8); |
447 | partial += *(uint32_t *)(void *)(data + 12); |
448 | partial += *(uint32_t *)(void *)(data + 16); |
449 | partial += *(uint32_t *)(void *)(data + 20); |
450 | partial += *(uint32_t *)(void *)(data + 24); |
451 | partial += *(uint32_t *)(void *)(data + 28); |
452 | partial += *(uint32_t *)(void *)(data + 32); |
453 | partial += *(uint32_t *)(void *)(data + 36); |
454 | partial += *(uint32_t *)(void *)(data + 40); |
455 | partial += *(uint32_t *)(void *)(data + 44); |
456 | partial += *(uint32_t *)(void *)(data + 48); |
457 | partial += *(uint32_t *)(void *)(data + 52); |
458 | partial += *(uint32_t *)(void *)(data + 56); |
459 | partial += *(uint32_t *)(void *)(data + 60); |
460 | data += 64; |
461 | mlen -= 64; |
462 | if (PREDICT_FALSE(partial & (3ULL << 62))) { |
463 | if (needs_swap) { |
464 | partial = (partial << 8) + |
465 | (partial >> 56); |
466 | } |
467 | sum += (partial >> 32); |
468 | sum += (partial & 0xffffffff); |
469 | partial = 0; |
470 | } |
471 | } |
472 | /* |
473 | * mlen is not updated below as the remaining tests |
474 | * are using bit masks, which are not affected. |
475 | */ |
476 | if (mlen & 32) { |
477 | partial += *(uint32_t *)(void *)data; |
478 | partial += *(uint32_t *)(void *)(data + 4); |
479 | partial += *(uint32_t *)(void *)(data + 8); |
480 | partial += *(uint32_t *)(void *)(data + 12); |
481 | partial += *(uint32_t *)(void *)(data + 16); |
482 | partial += *(uint32_t *)(void *)(data + 20); |
483 | partial += *(uint32_t *)(void *)(data + 24); |
484 | partial += *(uint32_t *)(void *)(data + 28); |
485 | data += 32; |
486 | } |
487 | if (mlen & 16) { |
488 | partial += *(uint32_t *)(void *)data; |
489 | partial += *(uint32_t *)(void *)(data + 4); |
490 | partial += *(uint32_t *)(void *)(data + 8); |
491 | partial += *(uint32_t *)(void *)(data + 12); |
492 | data += 16; |
493 | } |
494 | if (mlen & 8) { |
495 | partial += *(uint32_t *)(void *)data; |
496 | partial += *(uint32_t *)(void *)(data + 4); |
497 | data += 8; |
498 | } |
499 | if (mlen & 4) { |
500 | partial += *(uint32_t *)(void *)data; |
501 | data += 4; |
502 | } |
503 | if (mlen & 2) { |
504 | partial += *(uint16_t *)(void *)data; |
505 | data += 2; |
506 | } |
507 | trailing_bytes: |
508 | if (mlen & 1) { |
509 | #if BYTE_ORDER == LITTLE_ENDIAN |
510 | partial += *data; |
511 | #else |
512 | partial += *data << 8; |
513 | #endif |
514 | started_on_odd = !started_on_odd; |
515 | } |
516 | |
517 | if (needs_swap) { |
518 | partial = (partial << 8) + (partial >> 56); |
519 | } |
520 | sum += (partial >> 32) + (partial & 0xffffffff); |
521 | /* |
522 | * Reduce sum to allow potential byte swap |
523 | * in the next iteration without carry. |
524 | */ |
525 | sum = (sum >> 32) + (sum & 0xffffffff); |
526 | } |
527 | final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) + |
528 | ((sum >> 16) & 0xffff) + (sum & 0xffff); |
529 | final_acc = (final_acc >> 16) + (final_acc & 0xffff); |
530 | final_acc = (final_acc >> 16) + (final_acc & 0xffff); |
531 | return final_acc & 0xffff; |
532 | } |
533 | #endif /* __LP64 */ |
534 | |
535 | #endif /* __i386__ || __x86_64__ */ |
536 | |