1 | /* |
2 | * Copyright (c) 2012-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | /*- |
30 | * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>. |
31 | * All rights reserved. |
32 | * |
33 | * Redistribution and use in source and binary forms, with or without |
34 | * modification, are permitted provided that the following conditions |
35 | * are met: |
36 | * |
37 | * 1. Redistributions of source code must retain the above copyright |
38 | * notice, this list of conditions and the following disclaimer. |
39 | * 2. Redistributions in binary form must reproduce the above copyright |
40 | * notice, this list of conditions and the following disclaimer in |
41 | * the documentation and/or other materials provided with the |
42 | * distribution. |
43 | * |
44 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
45 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
46 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
47 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
48 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
49 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, |
50 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
51 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
52 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
53 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
54 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
55 | * SUCH DAMAGE. |
56 | */ |
57 | |
58 | #ifdef KERNEL |
59 | #include <sys/param.h> |
60 | #include <machine/endian.h> |
61 | #include <sys/mcache.h> |
62 | #include <sys/mbuf.h> |
63 | #include <kern/debug.h> |
64 | #include <libkern/libkern.h> |
65 | #include <mach/boolean.h> |
66 | #include <pexpert/pexpert.h> |
67 | #define CKSUM_ERR(fmt, args...) kprintf(fmt, ## args) |
68 | #else /* !KERNEL */ |
69 | #ifndef LIBSYSCALL_INTERFACE |
70 | #error "LIBSYSCALL_INTERFACE not defined" |
71 | #endif /* !LIBSYSCALL_INTERFACE */ |
72 | #include <stdlib.h> |
73 | #include <stddef.h> |
74 | #include <stdint.h> |
75 | #include <unistd.h> |
76 | #include <strings.h> |
77 | #include <mach/boolean.h> |
78 | #endif /* !KERNEL */ |
79 | |
80 | /* compile time assert */ |
81 | #ifndef _CASSERT |
82 | #define _CASSERT(x) _Static_assert(x, "compile-time assertion failed") |
83 | #endif /* !_CASSERT */ |
84 | |
85 | #ifndef VERIFY |
86 | #define VERIFY(EX) ((void)0) |
87 | #endif /* !VERIFY */ |
88 | |
89 | #ifndef CKSUM_ERR |
90 | #define CKSUM_ERR(fmt, args...) ((void)0) |
91 | #endif /* !CKSUM_ERR */ |
92 | |
93 | #define PREDICT_TRUE(x) __builtin_expect(!!((long)(x)), 1L) |
94 | #define PREDICT_FALSE(x) __builtin_expect(!!((long)(x)), 0L) |
95 | |
96 | /* fake mbuf struct used only for calling os_cpu_in_cksum_mbuf() */ |
97 | struct _mbuf { |
98 | struct _mbuf *_m_next; |
99 | void *_m_pad; |
100 | uint8_t *_m_data; |
101 | int32_t _m_len; |
102 | }; |
103 | |
104 | extern uint32_t os_cpu_in_cksum(const void *, uint32_t, uint32_t); |
105 | extern uint32_t os_cpu_in_cksum_mbuf(struct _mbuf *, int, int, uint32_t); |
106 | |
107 | uint32_t |
108 | os_cpu_in_cksum(const void *data, uint32_t len, uint32_t initial_sum) |
109 | { |
110 | /* |
111 | * If data is 4-bytes aligned, length is multiple of 4-bytes, |
112 | * and the amount to checksum is small, this would be quicker; |
113 | * this is suitable for IPv4 header. |
114 | */ |
115 | if (IS_P2ALIGNED(data, sizeof (uint32_t)) && |
116 | len <= 64 && (len & 3) == 0) { |
117 | uint8_t *p = __DECONST(uint8_t *, data); |
118 | uint64_t sum = initial_sum; |
119 | |
120 | if (PREDICT_TRUE(len == 20)) { /* simple IPv4 header */ |
121 | sum += *(uint32_t *)(void *)p; |
122 | sum += *(uint32_t *)(void *)(p + 4); |
123 | sum += *(uint32_t *)(void *)(p + 8); |
124 | sum += *(uint32_t *)(void *)(p + 12); |
125 | sum += *(uint32_t *)(void *)(p + 16); |
126 | } else { |
127 | while (len) { |
128 | sum += *(uint32_t *)(void *)p; |
129 | p += 4; |
130 | len -= 4; |
131 | } |
132 | } |
133 | |
134 | /* fold 64-bit to 16-bit (deferred carries) */ |
135 | sum = (sum >> 32) + (sum & 0xffffffff); /* 33-bit */ |
136 | sum = (sum >> 16) + (sum & 0xffff); /* 17-bit + carry */ |
137 | sum = (sum >> 16) + (sum & 0xffff); /* 16-bit + carry */ |
138 | sum = (sum >> 16) + (sum & 0xffff); /* final carry */ |
139 | |
140 | return (sum & 0xffff); |
141 | } |
142 | |
143 | /* |
144 | * Otherwise, let os_cpu_in_cksum_mbuf() handle it; it only looks |
145 | * at 3 fields: {next,data,len}, and since it doesn't care about |
146 | * the authenticity of the mbuf, we use a fake one here. Make |
147 | * sure the offsets are as expected. |
148 | */ |
149 | #if defined(__LP64__) |
150 | _CASSERT(offsetof(struct _mbuf, _m_next) == 0); |
151 | _CASSERT(offsetof(struct _mbuf, _m_data) == 16); |
152 | _CASSERT(offsetof(struct _mbuf, _m_len) == 24); |
153 | #else /* !__LP64__ */ |
154 | _CASSERT(offsetof(struct _mbuf, _m_next) == 0); |
155 | _CASSERT(offsetof(struct _mbuf, _m_data) == 8); |
156 | _CASSERT(offsetof(struct _mbuf, _m_len) == 12); |
157 | #endif /* !__LP64__ */ |
158 | #ifdef KERNEL |
159 | _CASSERT(offsetof(struct _mbuf, _m_next) == |
160 | offsetof(struct mbuf, m_next)); |
161 | _CASSERT(offsetof(struct _mbuf, _m_data) == |
162 | offsetof(struct mbuf, m_data)); |
163 | _CASSERT(offsetof(struct _mbuf, _m_len) == |
164 | offsetof(struct mbuf, m_len)); |
165 | #endif /* KERNEL */ |
166 | struct _mbuf m = { |
167 | ._m_next = NULL, |
168 | ._m_data = __DECONST(uint8_t *, data), |
169 | ._m_len = len, |
170 | }; |
171 | |
172 | return (os_cpu_in_cksum_mbuf(&m, len, 0, initial_sum)); |
173 | } |
174 | |
175 | #if defined(__i386__) || defined(__x86_64__) |
176 | |
177 | /* |
178 | * Checksum routine for Internet Protocol family headers (Portable Version). |
179 | * |
180 | * This routine is very heavily used in the network |
181 | * code and should be modified for each CPU to be as fast as possible. |
182 | * |
183 | * A discussion of different implementation techniques can be found in |
184 | * RFC 1071. |
185 | * |
186 | * The default implementation for 32-bit architectures is using |
187 | * a 32-bit accumulator and operating on 16-bit operands. |
188 | * |
189 | * The default implementation for 64-bit architectures is using |
190 | * a 64-bit accumulator and operating on 32-bit operands. |
191 | * |
192 | * Both versions are unrolled to handle 32 Byte / 64 Byte fragments as core |
193 | * of the inner loop. After each iteration of the inner loop, a partial |
194 | * reduction is done to avoid carry in long packets. |
195 | */ |
196 | |
197 | #if !defined(__LP64__) |
198 | /* 32-bit version */ |
199 | uint32_t |
200 | os_cpu_in_cksum_mbuf(struct _mbuf *m, int len, int off, uint32_t initial_sum) |
201 | { |
202 | int mlen; |
203 | uint32_t sum, partial; |
204 | unsigned int final_acc; |
205 | uint8_t *data; |
206 | boolean_t needs_swap, started_on_odd; |
207 | |
208 | VERIFY(len >= 0); |
209 | VERIFY(off >= 0); |
210 | |
211 | needs_swap = FALSE; |
212 | started_on_odd = FALSE; |
213 | sum = (initial_sum >> 16) + (initial_sum & 0xffff); |
214 | |
215 | for (;;) { |
216 | if (PREDICT_FALSE(m == NULL)) { |
217 | CKSUM_ERR("%s: out of data\n" , __func__); |
218 | return ((uint32_t)-1); |
219 | } |
220 | mlen = m->_m_len; |
221 | if (mlen > off) { |
222 | mlen -= off; |
223 | data = m->_m_data + off; |
224 | goto post_initial_offset; |
225 | } |
226 | off -= mlen; |
227 | if (len == 0) |
228 | break; |
229 | m = m->_m_next; |
230 | } |
231 | |
232 | for (; len > 0; m = m->_m_next) { |
233 | if (PREDICT_FALSE(m == NULL)) { |
234 | CKSUM_ERR("%s: out of data\n" , __func__); |
235 | return ((uint32_t)-1); |
236 | } |
237 | mlen = m->_m_len; |
238 | data = m->_m_data; |
239 | post_initial_offset: |
240 | if (mlen == 0) |
241 | continue; |
242 | if (mlen > len) |
243 | mlen = len; |
244 | len -= mlen; |
245 | |
246 | partial = 0; |
247 | if ((uintptr_t)data & 1) { |
248 | /* Align on word boundary */ |
249 | started_on_odd = !started_on_odd; |
250 | #if BYTE_ORDER == LITTLE_ENDIAN |
251 | partial = *data << 8; |
252 | #else |
253 | partial = *data; |
254 | #endif |
255 | ++data; |
256 | --mlen; |
257 | } |
258 | needs_swap = started_on_odd; |
259 | while (mlen >= 32) { |
260 | __builtin_prefetch(data + 32); |
261 | partial += *(uint16_t *)(void *)data; |
262 | partial += *(uint16_t *)(void *)(data + 2); |
263 | partial += *(uint16_t *)(void *)(data + 4); |
264 | partial += *(uint16_t *)(void *)(data + 6); |
265 | partial += *(uint16_t *)(void *)(data + 8); |
266 | partial += *(uint16_t *)(void *)(data + 10); |
267 | partial += *(uint16_t *)(void *)(data + 12); |
268 | partial += *(uint16_t *)(void *)(data + 14); |
269 | partial += *(uint16_t *)(void *)(data + 16); |
270 | partial += *(uint16_t *)(void *)(data + 18); |
271 | partial += *(uint16_t *)(void *)(data + 20); |
272 | partial += *(uint16_t *)(void *)(data + 22); |
273 | partial += *(uint16_t *)(void *)(data + 24); |
274 | partial += *(uint16_t *)(void *)(data + 26); |
275 | partial += *(uint16_t *)(void *)(data + 28); |
276 | partial += *(uint16_t *)(void *)(data + 30); |
277 | data += 32; |
278 | mlen -= 32; |
279 | if (PREDICT_FALSE(partial & 0xc0000000)) { |
280 | if (needs_swap) |
281 | partial = (partial << 8) + |
282 | (partial >> 24); |
283 | sum += (partial >> 16); |
284 | sum += (partial & 0xffff); |
285 | partial = 0; |
286 | } |
287 | } |
288 | if (mlen & 16) { |
289 | partial += *(uint16_t *)(void *)data; |
290 | partial += *(uint16_t *)(void *)(data + 2); |
291 | partial += *(uint16_t *)(void *)(data + 4); |
292 | partial += *(uint16_t *)(void *)(data + 6); |
293 | partial += *(uint16_t *)(void *)(data + 8); |
294 | partial += *(uint16_t *)(void *)(data + 10); |
295 | partial += *(uint16_t *)(void *)(data + 12); |
296 | partial += *(uint16_t *)(void *)(data + 14); |
297 | data += 16; |
298 | mlen -= 16; |
299 | } |
300 | /* |
301 | * mlen is not updated below as the remaining tests |
302 | * are using bit masks, which are not affected. |
303 | */ |
304 | if (mlen & 8) { |
305 | partial += *(uint16_t *)(void *)data; |
306 | partial += *(uint16_t *)(void *)(data + 2); |
307 | partial += *(uint16_t *)(void *)(data + 4); |
308 | partial += *(uint16_t *)(void *)(data + 6); |
309 | data += 8; |
310 | } |
311 | if (mlen & 4) { |
312 | partial += *(uint16_t *)(void *)data; |
313 | partial += *(uint16_t *)(void *)(data + 2); |
314 | data += 4; |
315 | } |
316 | if (mlen & 2) { |
317 | partial += *(uint16_t *)(void *)data; |
318 | data += 2; |
319 | } |
320 | if (mlen & 1) { |
321 | #if BYTE_ORDER == LITTLE_ENDIAN |
322 | partial += *data; |
323 | #else |
324 | partial += *data << 8; |
325 | #endif |
326 | started_on_odd = !started_on_odd; |
327 | } |
328 | |
329 | if (needs_swap) |
330 | partial = (partial << 8) + (partial >> 24); |
331 | sum += (partial >> 16) + (partial & 0xffff); |
332 | /* |
333 | * Reduce sum to allow potential byte swap |
334 | * in the next iteration without carry. |
335 | */ |
336 | sum = (sum >> 16) + (sum & 0xffff); |
337 | } |
338 | final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff); |
339 | final_acc = (final_acc >> 16) + (final_acc & 0xffff); |
340 | return (final_acc & 0xffff); |
341 | } |
342 | |
343 | #else /* __LP64__ */ |
344 | /* 64-bit version */ |
345 | uint32_t |
346 | os_cpu_in_cksum_mbuf(struct _mbuf *m, int len, int off, uint32_t initial_sum) |
347 | { |
348 | int mlen; |
349 | uint64_t sum, partial; |
350 | unsigned int final_acc; |
351 | uint8_t *data; |
352 | boolean_t needs_swap, started_on_odd; |
353 | |
354 | VERIFY(len >= 0); |
355 | VERIFY(off >= 0); |
356 | |
357 | needs_swap = FALSE; |
358 | started_on_odd = FALSE; |
359 | sum = initial_sum; |
360 | |
361 | for (;;) { |
362 | if (PREDICT_FALSE(m == NULL)) { |
363 | CKSUM_ERR("%s: out of data\n" , __func__); |
364 | return ((uint32_t)-1); |
365 | } |
366 | mlen = m->_m_len; |
367 | if (mlen > off) { |
368 | mlen -= off; |
369 | data = m->_m_data + off; |
370 | goto post_initial_offset; |
371 | } |
372 | off -= mlen; |
373 | if (len == 0) |
374 | break; |
375 | m = m->_m_next; |
376 | } |
377 | |
378 | for (; len > 0; m = m->_m_next) { |
379 | if (PREDICT_FALSE(m == NULL)) { |
380 | CKSUM_ERR("%s: out of data\n" , __func__); |
381 | return ((uint32_t)-1); |
382 | } |
383 | mlen = m->_m_len; |
384 | data = m->_m_data; |
385 | post_initial_offset: |
386 | if (mlen == 0) |
387 | continue; |
388 | if (mlen > len) |
389 | mlen = len; |
390 | len -= mlen; |
391 | |
392 | partial = 0; |
393 | if ((uintptr_t)data & 1) { |
394 | /* Align on word boundary */ |
395 | started_on_odd = !started_on_odd; |
396 | #if BYTE_ORDER == LITTLE_ENDIAN |
397 | partial = *data << 8; |
398 | #else |
399 | partial = *data; |
400 | #endif |
401 | ++data; |
402 | --mlen; |
403 | } |
404 | needs_swap = started_on_odd; |
405 | if ((uintptr_t)data & 2) { |
406 | if (mlen < 2) |
407 | goto trailing_bytes; |
408 | partial += *(uint16_t *)(void *)data; |
409 | data += 2; |
410 | mlen -= 2; |
411 | } |
412 | while (mlen >= 64) { |
413 | __builtin_prefetch(data + 32); |
414 | __builtin_prefetch(data + 64); |
415 | partial += *(uint32_t *)(void *)data; |
416 | partial += *(uint32_t *)(void *)(data + 4); |
417 | partial += *(uint32_t *)(void *)(data + 8); |
418 | partial += *(uint32_t *)(void *)(data + 12); |
419 | partial += *(uint32_t *)(void *)(data + 16); |
420 | partial += *(uint32_t *)(void *)(data + 20); |
421 | partial += *(uint32_t *)(void *)(data + 24); |
422 | partial += *(uint32_t *)(void *)(data + 28); |
423 | partial += *(uint32_t *)(void *)(data + 32); |
424 | partial += *(uint32_t *)(void *)(data + 36); |
425 | partial += *(uint32_t *)(void *)(data + 40); |
426 | partial += *(uint32_t *)(void *)(data + 44); |
427 | partial += *(uint32_t *)(void *)(data + 48); |
428 | partial += *(uint32_t *)(void *)(data + 52); |
429 | partial += *(uint32_t *)(void *)(data + 56); |
430 | partial += *(uint32_t *)(void *)(data + 60); |
431 | data += 64; |
432 | mlen -= 64; |
433 | if (PREDICT_FALSE(partial & (3ULL << 62))) { |
434 | if (needs_swap) |
435 | partial = (partial << 8) + |
436 | (partial >> 56); |
437 | sum += (partial >> 32); |
438 | sum += (partial & 0xffffffff); |
439 | partial = 0; |
440 | } |
441 | } |
442 | /* |
443 | * mlen is not updated below as the remaining tests |
444 | * are using bit masks, which are not affected. |
445 | */ |
446 | if (mlen & 32) { |
447 | partial += *(uint32_t *)(void *)data; |
448 | partial += *(uint32_t *)(void *)(data + 4); |
449 | partial += *(uint32_t *)(void *)(data + 8); |
450 | partial += *(uint32_t *)(void *)(data + 12); |
451 | partial += *(uint32_t *)(void *)(data + 16); |
452 | partial += *(uint32_t *)(void *)(data + 20); |
453 | partial += *(uint32_t *)(void *)(data + 24); |
454 | partial += *(uint32_t *)(void *)(data + 28); |
455 | data += 32; |
456 | } |
457 | if (mlen & 16) { |
458 | partial += *(uint32_t *)(void *)data; |
459 | partial += *(uint32_t *)(void *)(data + 4); |
460 | partial += *(uint32_t *)(void *)(data + 8); |
461 | partial += *(uint32_t *)(void *)(data + 12); |
462 | data += 16; |
463 | } |
464 | if (mlen & 8) { |
465 | partial += *(uint32_t *)(void *)data; |
466 | partial += *(uint32_t *)(void *)(data + 4); |
467 | data += 8; |
468 | } |
469 | if (mlen & 4) { |
470 | partial += *(uint32_t *)(void *)data; |
471 | data += 4; |
472 | } |
473 | if (mlen & 2) { |
474 | partial += *(uint16_t *)(void *)data; |
475 | data += 2; |
476 | } |
477 | trailing_bytes: |
478 | if (mlen & 1) { |
479 | #if BYTE_ORDER == LITTLE_ENDIAN |
480 | partial += *data; |
481 | #else |
482 | partial += *data << 8; |
483 | #endif |
484 | started_on_odd = !started_on_odd; |
485 | } |
486 | |
487 | if (needs_swap) |
488 | partial = (partial << 8) + (partial >> 56); |
489 | sum += (partial >> 32) + (partial & 0xffffffff); |
490 | /* |
491 | * Reduce sum to allow potential byte swap |
492 | * in the next iteration without carry. |
493 | */ |
494 | sum = (sum >> 32) + (sum & 0xffffffff); |
495 | } |
496 | final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) + |
497 | ((sum >> 16) & 0xffff) + (sum & 0xffff); |
498 | final_acc = (final_acc >> 16) + (final_acc & 0xffff); |
499 | final_acc = (final_acc >> 16) + (final_acc & 0xffff); |
500 | return (final_acc & 0xffff); |
501 | } |
502 | #endif /* __LP64 */ |
503 | |
504 | #endif /* __i386__ || __x86_64__ */ |
505 | |