1 | /* |
2 | * Copyright (c) 2000-2016 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* $FreeBSD: src/sys/netinet/ip_encap.c,v 1.1.2.2 2001/07/03 11:01:46 ume Exp $ */ |
29 | /* $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $ */ |
30 | |
31 | /* |
32 | * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. |
33 | * All rights reserved. |
34 | * |
35 | * Redistribution and use in source and binary forms, with or without |
36 | * modification, are permitted provided that the following conditions |
37 | * are met: |
38 | * 1. Redistributions of source code must retain the above copyright |
39 | * notice, this list of conditions and the following disclaimer. |
40 | * 2. Redistributions in binary form must reproduce the above copyright |
41 | * notice, this list of conditions and the following disclaimer in the |
42 | * documentation and/or other materials provided with the distribution. |
43 | * 3. Neither the name of the project nor the names of its contributors |
44 | * may be used to endorse or promote products derived from this software |
45 | * without specific prior written permission. |
46 | * |
47 | * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND |
48 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
49 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
50 | * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE |
51 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
52 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
53 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
54 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
55 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
56 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
57 | * SUCH DAMAGE. |
58 | */ |
59 | /* |
60 | * My grandfather said that there's a devil inside tunnelling technology... |
61 | * |
62 | * We have surprisingly many protocols that want packets with IP protocol |
63 | * #4 or #41. Here's a list of protocols that want protocol #41: |
64 | * RFC1933 configured tunnel |
65 | * RFC1933 automatic tunnel |
66 | * RFC2401 IPsec tunnel |
67 | * RFC2473 IPv6 generic packet tunnelling |
68 | * RFC2529 6over4 tunnel |
69 | * mobile-ip6 (uses RFC2473) |
70 | * 6to4 tunnel |
71 | * Here's a list of protocol that want protocol #4: |
72 | * RFC1853 IPv4-in-IPv4 tunnelling |
73 | * RFC2003 IPv4 encapsulation within IPv4 |
74 | * RFC2344 reverse tunnelling for mobile-ip4 |
75 | * RFC2401 IPsec tunnel |
76 | * Well, what can I say. They impose different en/decapsulation mechanism |
77 | * from each other, so they need separate protocol handler. The only one |
78 | * we can easily determine by protocol # is IPsec, which always has |
79 | * AH/ESP/IPComp header right after outer IP header. |
80 | * |
81 | * So, clearly good old protosw does not work for protocol #4 and #41. |
82 | * The code will let you match protocol via src/dst address pair. |
83 | */ |
84 | /* XXX is M_NETADDR correct? */ |
85 | |
86 | #include <sys/param.h> |
87 | #include <sys/systm.h> |
88 | #include <sys/socket.h> |
89 | #include <sys/sockio.h> |
90 | #include <sys/mbuf.h> |
91 | #include <sys/mcache.h> |
92 | #include <sys/errno.h> |
93 | #include <sys/domain.h> |
94 | #include <sys/protosw.h> |
95 | #include <sys/queue.h> |
96 | |
97 | #include <net/if.h> |
98 | #include <net/route.h> |
99 | |
100 | #include <netinet/in.h> |
101 | #include <netinet/in_systm.h> |
102 | #include <netinet/ip.h> |
103 | #include <netinet/ip_var.h> |
104 | #include <netinet/ip_encap.h> |
105 | |
106 | #if INET6 |
107 | #include <netinet/ip6.h> |
108 | #include <netinet6/ip6_var.h> |
109 | #include <netinet6/ip6protosw.h> |
110 | #endif |
111 | |
112 | |
113 | #include <net/net_osdep.h> |
114 | |
115 | #ifndef __APPLE__ |
116 | #include <sys/kernel.h> |
117 | #include <sys/malloc.h> |
118 | MALLOC_DEFINE(M_NETADDR, "Export Host" , "Export host address structure" ); |
119 | #endif |
120 | |
121 | static void encap_init(struct protosw *, struct domain *); |
122 | static void encap_add(struct encaptab *); |
123 | static int mask_match(const struct encaptab *, const struct sockaddr *, |
124 | const struct sockaddr *); |
125 | static void encap_fillarg(struct mbuf *, const struct encaptab *); |
126 | |
127 | #ifndef LIST_HEAD_INITIALIZER |
128 | /* rely upon BSS initialization */ |
129 | LIST_HEAD(, encaptab) encaptab; |
130 | #else |
131 | LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab); |
132 | #endif |
133 | |
134 | static void |
135 | encap_init(struct protosw *pp, struct domain *dp) |
136 | { |
137 | #pragma unused(dp) |
138 | static int encap_initialized = 0; |
139 | |
140 | VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED); |
141 | |
142 | /* This gets called by more than one protocols, so initialize once */ |
143 | if (encap_initialized) |
144 | return; |
145 | encap_initialized = 1; |
146 | #if 0 |
147 | /* |
148 | * we cannot use LIST_INIT() here, since drivers may want to call |
149 | * encap_attach(), on driver attach. encap_init() will be called |
150 | * on AF_INET{,6} initialization, which happens after driver |
151 | * initialization - using LIST_INIT() here can nuke encap_attach() |
152 | * from drivers. |
153 | */ |
154 | LIST_INIT(&encaptab); |
155 | #endif |
156 | } |
157 | |
158 | void |
159 | encap4_init(struct protosw *pp, struct domain *dp) |
160 | { |
161 | encap_init(pp, dp); |
162 | } |
163 | |
164 | void |
165 | encap6_init(struct ip6protosw *pp, struct domain *dp) |
166 | { |
167 | encap_init((struct protosw *)pp, dp); |
168 | } |
169 | |
170 | #if INET |
171 | void |
172 | encap4_input(struct mbuf *m, int off) |
173 | { |
174 | int proto; |
175 | struct ip *ip; |
176 | struct sockaddr_in s, d; |
177 | const struct protosw *psw; |
178 | struct encaptab *ep, *match; |
179 | int prio, matchprio; |
180 | |
181 | #ifndef __APPLE__ |
182 | va_start(ap, m); |
183 | off = va_arg(ap, int); |
184 | proto = va_arg(ap, int); |
185 | va_end(ap); |
186 | #endif |
187 | |
188 | /* Expect 32-bit aligned data pointer on strict-align platforms */ |
189 | MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); |
190 | |
191 | ip = mtod(m, struct ip *); |
192 | #ifdef __APPLE__ |
193 | proto = ip->ip_p; |
194 | #endif |
195 | |
196 | bzero(&s, sizeof(s)); |
197 | s.sin_family = AF_INET; |
198 | s.sin_len = sizeof(struct sockaddr_in); |
199 | s.sin_addr = ip->ip_src; |
200 | bzero(&d, sizeof(d)); |
201 | d.sin_family = AF_INET; |
202 | d.sin_len = sizeof(struct sockaddr_in); |
203 | d.sin_addr = ip->ip_dst; |
204 | |
205 | match = NULL; |
206 | matchprio = 0; |
207 | for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { |
208 | if (ep->af != AF_INET) |
209 | continue; |
210 | if (ep->proto >= 0 && ep->proto != proto) |
211 | continue; |
212 | if (ep->func) |
213 | prio = (*ep->func)(m, off, proto, ep->arg); |
214 | else { |
215 | /* |
216 | * it's inbound traffic, we need to match in reverse |
217 | * order |
218 | */ |
219 | prio = mask_match(ep, (struct sockaddr *)&d, |
220 | (struct sockaddr *)&s); |
221 | } |
222 | |
223 | /* |
224 | * We prioritize the matches by using bit length of the |
225 | * matches. mask_match() and user-supplied matching function |
226 | * should return the bit length of the matches (for example, |
227 | * if both src/dst are matched for IPv4, 64 should be returned). |
228 | * 0 or negative return value means "it did not match". |
229 | * |
230 | * The question is, since we have two "mask" portion, we |
231 | * cannot really define total order between entries. |
232 | * For example, which of these should be preferred? |
233 | * mask_match() returns 48 (32 + 16) for both of them. |
234 | * src=3ffe::/16, dst=3ffe:501::/32 |
235 | * src=3ffe:501::/32, dst=3ffe::/16 |
236 | * |
237 | * We need to loop through all the possible candidates |
238 | * to get the best match - the search takes O(n) for |
239 | * n attachments (i.e. interfaces). |
240 | */ |
241 | if (prio <= 0) |
242 | continue; |
243 | if (prio > matchprio) { |
244 | matchprio = prio; |
245 | match = ep; |
246 | } |
247 | } |
248 | |
249 | if (match) { |
250 | /* found a match, "match" has the best one */ |
251 | psw = (const struct protosw *)match->psw; |
252 | if (psw && psw->pr_input) { |
253 | encap_fillarg(m, match); |
254 | (*psw->pr_input)(m, off); |
255 | } else |
256 | m_freem(m); |
257 | return; |
258 | } |
259 | |
260 | /* last resort: inject to raw socket */ |
261 | rip_input(m, off); |
262 | } |
263 | #endif |
264 | |
265 | #if INET6 |
266 | int |
267 | encap6_input(struct mbuf **mp, int *offp, int proto) |
268 | { |
269 | struct mbuf *m = *mp; |
270 | struct ip6_hdr *ip6; |
271 | struct sockaddr_in6 s, d; |
272 | const struct ip6protosw *psw; |
273 | struct encaptab *ep, *match; |
274 | int prio, matchprio; |
275 | |
276 | /* Expect 32-bit aligned data pointer on strict-align platforms */ |
277 | MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); |
278 | |
279 | ip6 = mtod(m, struct ip6_hdr *); |
280 | bzero(&s, sizeof(s)); |
281 | s.sin6_family = AF_INET6; |
282 | s.sin6_len = sizeof(struct sockaddr_in6); |
283 | s.sin6_addr = ip6->ip6_src; |
284 | bzero(&d, sizeof(d)); |
285 | d.sin6_family = AF_INET6; |
286 | d.sin6_len = sizeof(struct sockaddr_in6); |
287 | d.sin6_addr = ip6->ip6_dst; |
288 | |
289 | match = NULL; |
290 | matchprio = 0; |
291 | for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { |
292 | if (ep->af != AF_INET6) |
293 | continue; |
294 | if (ep->proto >= 0 && ep->proto != proto) |
295 | continue; |
296 | if (ep->func) |
297 | prio = (*ep->func)(m, *offp, proto, ep->arg); |
298 | else { |
299 | /* |
300 | * it's inbound traffic, we need to match in reverse |
301 | * order |
302 | */ |
303 | prio = mask_match(ep, (struct sockaddr *)&d, |
304 | (struct sockaddr *)&s); |
305 | } |
306 | |
307 | /* see encap4_input() for issues here */ |
308 | if (prio <= 0) |
309 | continue; |
310 | if (prio > matchprio) { |
311 | matchprio = prio; |
312 | match = ep; |
313 | } |
314 | } |
315 | |
316 | if (match) { |
317 | /* found a match */ |
318 | psw = (const struct ip6protosw *)match->psw; |
319 | if (psw && psw->pr_input) { |
320 | encap_fillarg(m, match); |
321 | return (*psw->pr_input)(mp, offp, proto); |
322 | } else { |
323 | m_freem(m); |
324 | return IPPROTO_DONE; |
325 | } |
326 | } |
327 | |
328 | /* last resort: inject to raw socket */ |
329 | return rip6_input(mp, offp, proto); |
330 | } |
331 | #endif |
332 | |
333 | static void |
334 | encap_add(struct encaptab *ep) |
335 | { |
336 | LIST_INSERT_HEAD(&encaptab, ep, chain); |
337 | } |
338 | |
339 | /* |
340 | * sp (src ptr) is always my side, and dp (dst ptr) is always remote side. |
341 | * length of mask (sm and dm) is assumed to be same as sp/dp. |
342 | * Return value will be necessary as input (cookie) for encap_detach(). |
343 | */ |
344 | const struct encaptab * |
345 | encap_attach(int af, int proto, const struct sockaddr *sp, |
346 | const struct sockaddr *sm, const struct sockaddr *dp, |
347 | const struct sockaddr *dm, const struct protosw *psw, void *arg) |
348 | { |
349 | struct encaptab *ep; |
350 | int error; |
351 | |
352 | /* sanity check on args */ |
353 | if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst)) { |
354 | error = EINVAL; |
355 | goto fail; |
356 | } |
357 | if (sp->sa_len != dp->sa_len) { |
358 | error = EINVAL; |
359 | goto fail; |
360 | } |
361 | if (af != sp->sa_family || af != dp->sa_family) { |
362 | error = EINVAL; |
363 | goto fail; |
364 | } |
365 | |
366 | /* check if anyone have already attached with exactly same config */ |
367 | for (ep = LIST_FIRST(&encaptab); ep; ep = LIST_NEXT(ep, chain)) { |
368 | if (ep->af != af) |
369 | continue; |
370 | if (ep->proto != proto) |
371 | continue; |
372 | if (ep->src.ss_len != sp->sa_len || |
373 | bcmp(&ep->src, sp, sp->sa_len) != 0 || |
374 | bcmp(&ep->srcmask, sm, sp->sa_len) != 0) |
375 | continue; |
376 | if (ep->dst.ss_len != dp->sa_len || |
377 | bcmp(&ep->dst, dp, dp->sa_len) != 0 || |
378 | bcmp(&ep->dstmask, dm, dp->sa_len) != 0) |
379 | continue; |
380 | |
381 | error = EEXIST; |
382 | goto fail; |
383 | } |
384 | |
385 | ep = _MALLOC(sizeof(*ep), M_NETADDR, M_WAITOK | M_ZERO); /* XXX */ |
386 | if (ep == NULL) { |
387 | error = ENOBUFS; |
388 | goto fail; |
389 | } |
390 | |
391 | ep->af = af; |
392 | ep->proto = proto; |
393 | bcopy(sp, &ep->src, sp->sa_len); |
394 | bcopy(sm, &ep->srcmask, sp->sa_len); |
395 | bcopy(dp, &ep->dst, dp->sa_len); |
396 | bcopy(dm, &ep->dstmask, dp->sa_len); |
397 | ep->psw = psw; |
398 | ep->arg = arg; |
399 | |
400 | encap_add(ep); |
401 | |
402 | error = 0; |
403 | return ep; |
404 | |
405 | fail: |
406 | return NULL; |
407 | } |
408 | |
409 | const struct encaptab * |
410 | encap_attach_func( int af, int proto, |
411 | int (*func)(const struct mbuf *, int, int, void *), |
412 | const struct protosw *psw, void *arg) |
413 | { |
414 | struct encaptab *ep; |
415 | int error; |
416 | |
417 | /* sanity check on args */ |
418 | if (!func) { |
419 | error = EINVAL; |
420 | goto fail; |
421 | } |
422 | |
423 | ep = _MALLOC(sizeof(*ep), M_NETADDR, M_WAITOK | M_ZERO); /* XXX */ |
424 | if (ep == NULL) { |
425 | error = ENOBUFS; |
426 | goto fail; |
427 | } |
428 | |
429 | ep->af = af; |
430 | ep->proto = proto; |
431 | ep->func = func; |
432 | ep->psw = psw; |
433 | ep->arg = arg; |
434 | |
435 | encap_add(ep); |
436 | |
437 | error = 0; |
438 | return ep; |
439 | |
440 | fail: |
441 | return NULL; |
442 | } |
443 | |
444 | int |
445 | encap_detach(const struct encaptab *cookie) |
446 | { |
447 | const struct encaptab *ep = cookie; |
448 | struct encaptab *p; |
449 | |
450 | for (p = LIST_FIRST(&encaptab); p; p = LIST_NEXT(p, chain)) { |
451 | if (p == ep) { |
452 | LIST_REMOVE(p, chain); |
453 | _FREE(p, M_NETADDR); /*XXX*/ |
454 | return 0; |
455 | } |
456 | } |
457 | |
458 | return EINVAL; |
459 | } |
460 | |
461 | static int |
462 | mask_match(const struct encaptab *ep, const struct sockaddr *sp, |
463 | const struct sockaddr *dp) |
464 | { |
465 | struct sockaddr_storage s; |
466 | struct sockaddr_storage d; |
467 | int i; |
468 | const u_int8_t *p, *q; |
469 | u_int8_t *r; |
470 | int matchlen; |
471 | |
472 | if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d)) |
473 | return 0; |
474 | if (sp->sa_family != ep->af || dp->sa_family != ep->af) |
475 | return 0; |
476 | if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len) |
477 | return 0; |
478 | |
479 | matchlen = 0; |
480 | |
481 | p = (const u_int8_t *)sp; |
482 | q = (const u_int8_t *)&ep->srcmask; |
483 | r = (u_int8_t *)&s; |
484 | for (i = 0 ; i < sp->sa_len; i++) { |
485 | r[i] = p[i] & q[i]; |
486 | /* XXX estimate */ |
487 | matchlen += (q[i] ? 8 : 0); |
488 | } |
489 | |
490 | p = (const u_int8_t *)dp; |
491 | q = (const u_int8_t *)&ep->dstmask; |
492 | r = (u_int8_t *)&d; |
493 | for (i = 0 ; i < dp->sa_len; i++) { |
494 | r[i] = p[i] & q[i]; |
495 | /* XXX rough estimate */ |
496 | matchlen += (q[i] ? 8 : 0); |
497 | } |
498 | |
499 | /* need to overwrite len/family portion as we don't compare them */ |
500 | s.ss_len = sp->sa_len; |
501 | s.ss_family = sp->sa_family; |
502 | d.ss_len = dp->sa_len; |
503 | d.ss_family = dp->sa_family; |
504 | |
505 | if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 && |
506 | bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) { |
507 | return matchlen; |
508 | } else |
509 | return 0; |
510 | } |
511 | |
512 | struct encaptabtag { |
513 | void* *arg; |
514 | }; |
515 | |
516 | static void |
517 | encap_fillarg( |
518 | struct mbuf *m, |
519 | const struct encaptab *ep) |
520 | { |
521 | struct m_tag *tag; |
522 | struct encaptabtag *et; |
523 | |
524 | tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_ENCAP, |
525 | sizeof(struct encaptabtag), M_WAITOK, m); |
526 | |
527 | if (tag != NULL) { |
528 | et = (struct encaptabtag*)(tag + 1); |
529 | et->arg = ep->arg; |
530 | m_tag_prepend(m, tag); |
531 | } |
532 | } |
533 | |
534 | void * |
535 | encap_getarg(struct mbuf *m) |
536 | { |
537 | struct m_tag *tag; |
538 | struct encaptabtag *et; |
539 | void *p = NULL; |
540 | |
541 | tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_ENCAP, NULL); |
542 | if (tag) { |
543 | et = (struct encaptabtag*)(tag + 1); |
544 | p = et->arg; |
545 | m_tag_delete(m, tag); |
546 | } |
547 | |
548 | return p; |
549 | } |
550 | |