1/*
2 * Copyright (c) 2015-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http: www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*
25 * THEORY OF OPERATION
26 *
27 * The packet mangler subsystem provides a limited way for user space
28 * applications to apply certain actions on certain flows.
29 *
30 * A user space applications opens a kernel control socket with the name
31 * PACKET_MANGLER_CONTROL_NAME to attach to the packet mangler subsystem.
32 * When connected, a "struct packet_mangler" is created and set as the
33 * "unitinfo" of the corresponding kernel control socket instance.
34 * Connect call for packet mangler's kernel control socket also registers
35 * ip filers with cookie set to the packet_mangler instance.
36 * The ip filters are removed when control socket is disconnected.
37 */
38#include <sys/types.h>
39#include <sys/kern_control.h>
40#include <sys/domain.h>
41#include <sys/protosw.h>
42#include <sys/syslog.h>
43
44#include <kern/locks.h>
45#include <kern/zalloc.h>
46#include <kern/debug.h>
47
48#include <net/packet_mangler.h>
49
50#include <netinet/tcp.h>
51#include <netinet/tcp_var.h>
52#include <netinet/ip.h>
53#include <netinet/kpi_ipfilter.h>
54#include <string.h>
55#include <libkern/libkern.h>
56
57#define MAX_PACKET_MANGLER 1
58
59#define PKT_MNGLR_FLG_IPFILTER_ATTACHED 0x00000001
60
61SYSCTL_NODE(_net, OID_AUTO, pktmnglr, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "pktmnglr");
62SYSCTL_INT(_net_pktmnglr, OID_AUTO, log, CTLFLAG_RW|CTLFLAG_LOCKED,
63 &pkt_mnglr_log_level, 0, "");
64/*
65 * The structure packet_mangler represents a user space packet filter
66 * It's created and associated with a kernel control socket instance
67 */
68struct packet_mangler {
69 kern_ctl_ref pkt_mnglr_kcref;
70 uint32_t pkt_mnglr_kcunit;
71 uint32_t pkt_mnglr_flags;
72 /* IP filter related params */
73 ipfilter_t pkt_mnglr_ipfref;
74 ipfilter_t pkt_mnglr_ipfrefv6;
75 struct ipf_filter pkt_mnglr_ipfilter;
76
77 /* Options */
78 uint8_t activate;
79 Pkt_Mnglr_Flow dir;
80 struct sockaddr_storage lsaddr;
81 struct sockaddr_storage rsaddr;
82 struct sockaddr_storage swap_lsaddr;
83 struct sockaddr_storage swap_rsaddr;
84 uint32_t ip_action_mask;
85 uint16_t lport;
86 uint16_t rport;
87 uint32_t proto;
88 uint32_t proto_action_mask;
89};
90
91/* Array of all the packet mangler instancesi */
92struct packet_mangler **packet_manglers = NULL;
93
94uint32_t pkt_mnglr_active_count = 0; /* Number of active packet filters */
95uint32_t pkt_mnglr_close_wait_timeout = 1000; /* in milliseconds */
96
97static kern_ctl_ref pkt_mnglr_kctlref = NULL;
98
99static lck_grp_attr_t *pkt_mnglr_lck_grp_attr = NULL;
100static lck_attr_t *pkt_mnglr_lck_attr = NULL;
101static lck_grp_t *pkt_mnglr_lck_grp = NULL;
102
103/* The lock below protects packet_manglers DS, packet_mangler DS */
104decl_lck_rw_data(static, pkt_mnglr_lck_rw);
105
106#define PKT_MNGLR_RW_LCK_MAX 8
107
108int pkt_mnglr_rw_nxt_lck = 0;
109void* pkt_mnglr_rw_lock_history[PKT_MNGLR_RW_LCK_MAX];
110
111int pkt_mnglr_rw_nxt_unlck = 0;
112void* pkt_mnglr_rw_unlock_history[PKT_MNGLR_RW_LCK_MAX];
113
114
115#define PACKET_MANGLER_ZONE_NAME "packet_mangler"
116#define PACKET_MANGLER_ZONE_MAX 10
117static struct zone *packet_mangler_zone = NULL; /* zone for packet_mangler */
118
119/*
120 * For troubleshooting
121 */
122int pkt_mnglr_log_level = LOG_ERR;
123int pkt_mnglr_debug = 1;
124
125/*
126 * Forward declaration to appease the compiler
127 */
128static void pkt_mnglr_rw_lock_exclusive(lck_rw_t *);
129static void pkt_mnglr_rw_unlock_exclusive(lck_rw_t *);
130static void pkt_mnglr_rw_lock_shared(lck_rw_t *);
131static void pkt_mnglr_rw_unlock_shared(lck_rw_t *);
132
133static errno_t pktmnglr_ipfilter_output(void *cookie, mbuf_t *data,
134 ipf_pktopts_t options);
135static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data,
136 int offset, u_int8_t protocol);
137static void pktmnglr_ipfilter_detach(void *cookie);
138
139static void chksm_update(mbuf_t data);
140
141#define TCP_OPT_MULTIPATH_TCP 30
142#define MPTCP_SBT_VER_OFFSET 2
143
144#define MPTCP_SUBTYPE_MPCAPABLE 0x0
145#define MPTCP_SUBTYPE_MPJOIN 0x1
146#define MPTCP_SUBTYPE_DSS 0x2
147#define MPTCP_SUBTYPE_ADD_ADDR 0x3
148#define MPTCP_SUBTYPE_REM_ADDR 0x4
149#define MPTCP_SUBTYPE_MP_PRIO 0x5
150#define MPTCP_SUBTYPE_MP_FAIL 0x6
151#define MPTCP_SUBTYPE_MP_FASTCLOSE 0x7
152
153/*
154 * packet filter global read write lock
155 */
156
157static void
158pkt_mnglr_rw_lock_exclusive(lck_rw_t *lck)
159{
160 void *lr_saved;
161
162 lr_saved = __builtin_return_address(0);
163
164 lck_rw_lock_exclusive(lck);
165
166 pkt_mnglr_rw_lock_history[pkt_mnglr_rw_nxt_lck] = lr_saved;
167 pkt_mnglr_rw_nxt_lck =
168 (pkt_mnglr_rw_nxt_lck + 1) % PKT_MNGLR_RW_LCK_MAX;
169}
170
171static void
172pkt_mnglr_rw_unlock_exclusive(lck_rw_t *lck)
173{
174 void *lr_saved;
175
176 lr_saved = __builtin_return_address(0);
177
178 lck_rw_unlock_exclusive(lck);
179
180 pkt_mnglr_rw_unlock_history[pkt_mnglr_rw_nxt_unlck] =
181 lr_saved;
182 pkt_mnglr_rw_nxt_unlck = (pkt_mnglr_rw_nxt_unlck + 1) % PKT_MNGLR_RW_LCK_MAX;
183}
184
185static void
186pkt_mnglr_rw_lock_shared(lck_rw_t *lck)
187{
188 void *lr_saved;
189
190 lr_saved = __builtin_return_address(0);
191
192 lck_rw_lock_shared(lck);
193
194 pkt_mnglr_rw_lock_history[pkt_mnglr_rw_nxt_lck] = lr_saved;
195 pkt_mnglr_rw_nxt_lck = (pkt_mnglr_rw_nxt_lck + 1) % PKT_MNGLR_RW_LCK_MAX;
196}
197
198static void
199pkt_mnglr_rw_unlock_shared(lck_rw_t *lck)
200{
201 void *lr_saved;
202
203 lr_saved = __builtin_return_address(0);
204
205 lck_rw_unlock_shared(lck);
206
207 pkt_mnglr_rw_unlock_history[pkt_mnglr_rw_nxt_unlck] = lr_saved;
208 pkt_mnglr_rw_nxt_unlck = (pkt_mnglr_rw_nxt_unlck + 1) % PKT_MNGLR_RW_LCK_MAX;
209}
210
211/*
212 * Packet Mangler's Kernel control socket callbacks
213 */
214static errno_t
215pkt_mnglr_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
216 void **unitinfo)
217{
218 errno_t error = 0;
219 struct packet_mangler *p_pkt_mnglr = NULL;
220
221 PKT_MNGLR_LOG(LOG_NOTICE, "Connecting packet mangler filter.");
222
223 p_pkt_mnglr = zalloc(packet_mangler_zone);
224 if (p_pkt_mnglr == NULL) {
225 PKT_MNGLR_LOG(LOG_ERR, "zalloc failed");
226 error = ENOMEM;
227 goto done;
228 }
229
230 bzero(p_pkt_mnglr, sizeof(struct packet_mangler));
231
232 pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw);
233 if (packet_manglers == NULL) {
234 struct packet_mangler **tmp;
235
236 pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
237
238 MALLOC(tmp,
239 struct packet_mangler **,
240 MAX_PACKET_MANGLER * sizeof(struct packet_mangler *),
241 M_TEMP,
242 M_WAITOK | M_ZERO);
243
244 pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw);
245
246 if (tmp == NULL && packet_manglers == NULL) {
247 error = ENOMEM;
248 pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
249 goto done;
250 }
251 /* Another thread may have won the race */
252 if (packet_manglers != NULL)
253 FREE(tmp, M_TEMP);
254 else
255 packet_manglers = tmp;
256 }
257
258 if (sac->sc_unit == 0 || sac->sc_unit > MAX_PACKET_MANGLER) {
259 PKT_MNGLR_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
260 error = EINVAL;
261 } else if (packet_manglers[sac->sc_unit - 1] != NULL) {
262 PKT_MNGLR_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
263 error = EADDRINUSE;
264 } else {
265 /*
266 * kernel control socket kcunit numbers start at 1
267 */
268 packet_manglers[sac->sc_unit - 1] = p_pkt_mnglr;
269
270 p_pkt_mnglr->pkt_mnglr_kcref = kctlref;
271 p_pkt_mnglr->pkt_mnglr_kcunit = sac->sc_unit;
272
273 *unitinfo = p_pkt_mnglr;
274 pkt_mnglr_active_count++;
275 }
276
277 p_pkt_mnglr->pkt_mnglr_ipfilter.cookie = p_pkt_mnglr;
278 p_pkt_mnglr->pkt_mnglr_ipfilter.name = "com.apple.pktmnglripfilter";
279 p_pkt_mnglr->pkt_mnglr_ipfilter.ipf_input = pktmnglr_ipfilter_input;
280 p_pkt_mnglr->pkt_mnglr_ipfilter.ipf_output = pktmnglr_ipfilter_output;
281 p_pkt_mnglr->pkt_mnglr_ipfilter.ipf_detach = pktmnglr_ipfilter_detach;
282 error = ipf_addv4(&(p_pkt_mnglr->pkt_mnglr_ipfilter), &(p_pkt_mnglr->pkt_mnglr_ipfref));
283 if (error) {
284 PKT_MNGLR_LOG(LOG_ERR, "Could not register packet mangler's IPv4 Filter");
285 goto done;
286 }
287 error = ipf_addv6(&(p_pkt_mnglr->pkt_mnglr_ipfilter), &(p_pkt_mnglr->pkt_mnglr_ipfrefv6));
288 if (error) {
289 ipf_remove(p_pkt_mnglr->pkt_mnglr_ipfref);
290 PKT_MNGLR_LOG(LOG_ERR, "Could not register packet mangler's IPv6 Filter");
291 goto done;
292 }
293
294 PKT_MNGLR_LOG(LOG_INFO, "Registered packet mangler's IP Filters");
295 p_pkt_mnglr->pkt_mnglr_flags |= PKT_MNGLR_FLG_IPFILTER_ATTACHED;
296 pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
297
298done:
299 if (error != 0 && p_pkt_mnglr != NULL)
300 zfree(packet_mangler_zone, p_pkt_mnglr);
301
302 PKT_MNGLR_LOG(LOG_INFO, "return %d pkt_mnglr_active_count %u kcunit %u",
303 error, pkt_mnglr_active_count, sac->sc_unit);
304
305 return (error);
306}
307
308static errno_t
309pkt_mnglr_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
310{
311#pragma unused(kctlref)
312 errno_t error = 0;
313 struct packet_mangler *p_pkt_mnglr;
314
315 PKT_MNGLR_LOG(LOG_INFO, "Disconnecting packet mangler kernel control");
316
317 if (packet_manglers == NULL) {
318 PKT_MNGLR_LOG(LOG_ERR, "no packet filter");
319 error = EINVAL;
320 goto done;
321 }
322 if (kcunit > MAX_PACKET_MANGLER) {
323 PKT_MNGLR_LOG(LOG_ERR, "kcunit %u > MAX_PACKET_MANGLER (%d)",
324 kcunit, MAX_PACKET_MANGLER);
325 error = EINVAL;
326 goto done;
327 }
328
329 p_pkt_mnglr = (struct packet_mangler *)unitinfo;
330 if (p_pkt_mnglr == NULL) {
331 PKT_MNGLR_LOG(LOG_ERR, "Unit info is NULL");
332 goto done;
333 }
334
335 pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw);
336 if (packet_manglers[kcunit - 1] != p_pkt_mnglr || p_pkt_mnglr->pkt_mnglr_kcunit != kcunit) {
337 PKT_MNGLR_LOG(LOG_ERR, "bad unit info %u)",
338 kcunit);
339 pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
340 goto done;
341 }
342
343 /*
344 * Make filter inactive
345 */
346 packet_manglers[kcunit - 1] = NULL;
347 pkt_mnglr_active_count--;
348 if (p_pkt_mnglr->pkt_mnglr_flags & PKT_MNGLR_FLG_IPFILTER_ATTACHED) {
349 (void) ipf_remove(p_pkt_mnglr->pkt_mnglr_ipfref);
350 (void) ipf_remove(p_pkt_mnglr->pkt_mnglr_ipfrefv6);
351 }
352 pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
353 zfree(packet_mangler_zone, p_pkt_mnglr);
354done:
355 PKT_MNGLR_LOG(LOG_INFO, "return %d pkt_mnglr_active_count %u kcunit %u",
356 error, pkt_mnglr_active_count, kcunit);
357
358 return (error);
359}
360
361static errno_t
362pkt_mnglr_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
363 int opt, void *data, size_t *len)
364{
365#pragma unused(kctlref, opt)
366 errno_t error = 0;
367 struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)unitinfo;
368
369 PKT_MNGLR_LOG(LOG_NOTICE, "");
370
371 pkt_mnglr_rw_lock_shared(&pkt_mnglr_lck_rw);
372
373 if (packet_manglers == NULL) {
374 PKT_MNGLR_LOG(LOG_ERR, "no packet filter");
375 error = EINVAL;
376 goto done;
377 }
378 if (kcunit > MAX_PACKET_MANGLER) {
379 PKT_MNGLR_LOG(LOG_ERR, "kcunit %u > MAX_PACKET_MANGLER (%d)",
380 kcunit, MAX_PACKET_MANGLER);
381 error = EINVAL;
382 goto done;
383 }
384 if (p_pkt_mnglr != (void *)packet_manglers[kcunit - 1]) {
385 PKT_MNGLR_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
386 kcunit);
387 error = EINVAL;
388 goto done;
389 }
390 switch (opt) {
391 case PKT_MNGLR_OPT_PROTO_ACT_MASK:
392 if (*len < sizeof(uint32_t)) {
393 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTO_ACT_MASK "
394 "len too small %lu", *len);
395 error = EINVAL;
396 goto done;
397 }
398
399 if (data != NULL) {
400 *(uint32_t *)data = p_pkt_mnglr->proto_action_mask;
401 }
402 break;
403 case PKT_MNGLR_OPT_IP_ACT_MASK:
404 if (*len < sizeof(uint32_t)) {
405 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_IP_ACT_MASK "
406 "len too small %lu", *len);
407 error = EINVAL;
408 goto done;
409 }
410
411 if (data != NULL) {
412 *(uint32_t *)data = p_pkt_mnglr->ip_action_mask;
413 }
414 break;
415 case PKT_MNGLR_OPT_LOCAL_IP:
416 if (*len < sizeof(struct sockaddr_storage)) {
417 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_IP "
418 "len too small %lu", *len);
419 error = EINVAL;
420 goto done;
421 }
422
423 if (data != NULL) {
424 *(struct sockaddr_storage *)data = p_pkt_mnglr->lsaddr;
425 }
426 break;
427 case PKT_MNGLR_OPT_REMOTE_IP:
428 if (*len < sizeof(struct sockaddr_storage)) {
429 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_IP "
430 "len too small %lu", *len);
431 error = EINVAL;
432 goto done;
433 }
434
435 if (data != NULL) {
436 *(struct sockaddr_storage *)data = p_pkt_mnglr->rsaddr;
437 }
438 break;
439 case PKT_MNGLR_OPT_LOCAL_PORT:
440 if (*len < sizeof(uint16_t)) {
441 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_PORT "
442 "len too small %lu", *len);
443 error = EINVAL;
444 goto done;
445 }
446
447 if (data != NULL) {
448 *(uint16_t *)data = p_pkt_mnglr->lport;
449 }
450 break;
451 case PKT_MNGLR_OPT_REMOTE_PORT:
452 if (*len < sizeof(uint16_t)) {
453 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_PORT "
454 "len too small %lu", *len);
455 error = EINVAL;
456 goto done;
457 }
458
459 if (data != NULL) {
460 *(uint16_t *)data = p_pkt_mnglr->rport;
461 }
462 break;
463 case PKT_MNGLR_OPT_DIRECTION:
464 if (*len < sizeof(uint32_t)) {
465 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_DIRECTION "
466 "len too small %lu", *len);
467 error = EINVAL;
468 goto done;
469 }
470 if (data != NULL) {
471 *(uint32_t *)data = p_pkt_mnglr->dir;
472 }
473 break;
474 case PKT_MNGLR_OPT_PROTOCOL:
475 if (*len < sizeof(uint32_t)) {
476 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTOCOL "
477 "len too small %lu", *len);
478 error = EINVAL;
479 goto done;
480 }
481 if (data != NULL) {
482 *(uint32_t *)data = p_pkt_mnglr->proto;
483 }
484 break;
485 case PKT_MNGLR_OPT_ACTIVATE:
486 if (*len < sizeof(uint8_t)) {
487 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_ACTIVATE "
488 "len too small %lu", *len);
489 error = EINVAL;
490 goto done;
491 }
492
493 if (data != NULL) {
494 *(uint8_t *)data = p_pkt_mnglr->activate;
495 }
496 break;
497 default:
498 error = ENOPROTOOPT;
499 break;
500 }
501done:
502 pkt_mnglr_rw_unlock_shared(&pkt_mnglr_lck_rw);
503
504 return (error);
505}
506
507static errno_t
508pkt_mnglr_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
509 int opt, void *data, size_t len)
510{
511#pragma unused(kctlref, opt)
512 errno_t error = 0;
513 struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)unitinfo;
514
515 PKT_MNGLR_LOG(LOG_NOTICE, "");
516
517 pkt_mnglr_rw_lock_exclusive(&pkt_mnglr_lck_rw);
518
519 if (packet_manglers == NULL) {
520 PKT_MNGLR_LOG(LOG_ERR, "no packet filter");
521 error = EINVAL;
522 goto done;
523 }
524 if (kcunit > MAX_PACKET_MANGLER) {
525 PKT_MNGLR_LOG(LOG_ERR, "kcunit %u > MAX_PACKET_MANGLER (%d)",
526 kcunit, MAX_PACKET_MANGLER);
527 error = EINVAL;
528 goto done;
529 }
530 if (p_pkt_mnglr != (void *)packet_manglers[kcunit - 1]) {
531 PKT_MNGLR_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
532 kcunit);
533 error = EINVAL;
534 goto done;
535 }
536 switch (opt) {
537 case PKT_MNGLR_OPT_PROTO_ACT_MASK:
538 if (len < sizeof(uint32_t)) {
539 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTO_ACT_MASK "
540 "len too small %lu", len);
541 error = EINVAL;
542 goto done;
543 }
544 if (p_pkt_mnglr->proto_action_mask != 0) {
545 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTO_ACT_MASK "
546 "already set %u",
547 p_pkt_mnglr->proto_action_mask);
548 error = EINVAL;
549 goto done;
550 }
551 p_pkt_mnglr->proto_action_mask = *(uint32_t *)data;
552 PKT_MNGLR_LOG(LOG_INFO, "p_pkt_mnglr->proto_action_mask set to :%d", p_pkt_mnglr->proto_action_mask);
553 break;
554 case PKT_MNGLR_OPT_IP_ACT_MASK:
555 if (len < sizeof(uint32_t)) {
556 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_IP_ACT_MASK "
557 "len too small %lu", len);
558 error = EINVAL;
559 goto done;
560 }
561 if (p_pkt_mnglr->ip_action_mask != 0) {
562 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_IP_ACT_MASK "
563 "already set %u",
564 p_pkt_mnglr->ip_action_mask);
565 error = EINVAL;
566 goto done;
567 }
568 p_pkt_mnglr->ip_action_mask = *(uint32_t *)data;
569 break;
570 case PKT_MNGLR_OPT_LOCAL_IP:
571 if (len < sizeof(struct sockaddr_storage)) {
572 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_IP "
573 "len too small %lu", len);
574 error = EINVAL;
575 goto done;
576 }
577 if (p_pkt_mnglr->lsaddr.ss_family) {
578 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_IP "
579 "already set");
580 error = EINVAL;
581 goto done;
582 }
583 p_pkt_mnglr->lsaddr = *(struct sockaddr_storage *)data;
584 break;
585 case PKT_MNGLR_OPT_REMOTE_IP:
586 if (len < sizeof(struct sockaddr_storage)) {
587 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_IP "
588 "len too small %lu", len);
589 error = EINVAL;
590 goto done;
591 }
592 if (p_pkt_mnglr->rsaddr.ss_family) {
593 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_IP "
594 "already set");
595 error = EINVAL;
596 goto done;
597 }
598
599 p_pkt_mnglr->rsaddr = *(struct sockaddr_storage *)data;
600 PKT_MNGLR_LOG(LOG_INFO,
601 "Remote IP registered for address family: %d",
602 p_pkt_mnglr->rsaddr.ss_family);
603 break;
604 case PKT_MNGLR_OPT_LOCAL_PORT:
605 if (len < sizeof(uint16_t)) {
606 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_PORT "
607 "len too small %lu", len);
608 error = EINVAL;
609 goto done;
610 }
611 if (p_pkt_mnglr->lport != 0) {
612 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_LOCAL_PORT "
613 "already set %d",
614 p_pkt_mnglr->lport);
615 error = EINVAL;
616 goto done;
617 }
618 p_pkt_mnglr->lport = *(uint16_t *)data;
619 break;
620 case PKT_MNGLR_OPT_REMOTE_PORT:
621 if (len < sizeof(uint16_t)) {
622 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_PORT "
623 "len too small %lu", len);
624 error = EINVAL;
625 goto done;
626 }
627 if (p_pkt_mnglr->rport != 0) {
628 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_REMOTE_PORT "
629 "already set %d",
630 p_pkt_mnglr->rport);
631 error = EINVAL;
632 goto done;
633 }
634 p_pkt_mnglr->rport = *(uint16_t *)data;
635 break;
636 case PKT_MNGLR_OPT_DIRECTION:
637 if (len < sizeof(uint32_t)) {
638 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_DIRECTION "
639 "len too small %lu", len);
640 error = EINVAL;
641 goto done;
642 }
643 if (p_pkt_mnglr->dir != 0) {
644 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_DIRECTION "
645 "already set %u",
646 p_pkt_mnglr->dir);
647 error = EINVAL;
648 goto done;
649 }
650 p_pkt_mnglr->dir = *(uint32_t *)data;
651 break;
652 case PKT_MNGLR_OPT_PROTOCOL:
653 if (len < sizeof(uint32_t)) {
654 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTOCOL "
655 "len too small %lu", len);
656 error = EINVAL;
657 goto done;
658 }
659 if (p_pkt_mnglr->proto != 0) {
660 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_PROTOCOL "
661 "already set %u",
662 p_pkt_mnglr->proto);
663 error = EINVAL;
664 goto done;
665 }
666 p_pkt_mnglr->proto = *(uint32_t *)data;
667 break;
668 case PKT_MNGLR_OPT_ACTIVATE:
669 if (len < sizeof(uint8_t)) {
670 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_ACTIVATE "
671 "len too small %lu", len);
672 error = EINVAL;
673 goto done;
674 }
675 if (p_pkt_mnglr->activate != 0) {
676 PKT_MNGLR_LOG(LOG_ERR, "PKT_MNGLR_OPT_ACTIVATE "
677 "already set %u",
678 p_pkt_mnglr->activate);
679 error = EINVAL;
680 goto done;
681 }
682 p_pkt_mnglr->activate = *(uint8_t *)data;
683 PKT_MNGLR_LOG(LOG_ERR, "p_pkt_mnglr->activate set to :%d",
684 p_pkt_mnglr->activate);
685 break;
686 default:
687 error = ENOPROTOOPT;
688 break;
689 }
690done:
691 pkt_mnglr_rw_unlock_exclusive(&pkt_mnglr_lck_rw);
692
693 return (error);
694}
695
696void
697pkt_mnglr_init(void)
698{
699 struct kern_ctl_reg kern_ctl;
700 errno_t error = 0;
701 vm_size_t pkt_mnglr_size = 0;
702
703 PKT_MNGLR_LOG(LOG_NOTICE, "");
704
705 /*
706 * Compile time verifications
707 */
708 _CASSERT(PKT_MNGLR_MAX_FILTER_COUNT == MAX_PACKET_MANGLER);
709
710 /*
711 * Zone for packet mangler kernel control sockets
712 */
713 pkt_mnglr_size = sizeof(struct packet_mangler);
714 packet_mangler_zone = zinit(pkt_mnglr_size,
715 PACKET_MANGLER_ZONE_MAX * pkt_mnglr_size,
716 0,
717 PACKET_MANGLER_ZONE_NAME);
718
719 if (packet_mangler_zone == NULL) {
720 panic("%s: zinit(%s) failed", __func__,
721 PACKET_MANGLER_ZONE_NAME);
722 /* NOTREACHED */
723 }
724 zone_change(packet_mangler_zone, Z_CALLERACCT, FALSE);
725 zone_change(packet_mangler_zone, Z_EXPAND, TRUE);
726
727 /*
728 * Allocate locks
729 */
730 pkt_mnglr_lck_grp_attr = lck_grp_attr_alloc_init();
731 if (pkt_mnglr_lck_grp_attr == NULL) {
732 panic("%s: lck_grp_attr_alloc_init failed", __func__);
733 /* NOTREACHED */
734 }
735 pkt_mnglr_lck_grp = lck_grp_alloc_init("packet manglerr",
736 pkt_mnglr_lck_grp_attr);
737 if (pkt_mnglr_lck_grp == NULL) {
738 panic("%s: lck_grp_alloc_init failed", __func__);
739 /* NOTREACHED */
740 }
741 pkt_mnglr_lck_attr = lck_attr_alloc_init();
742 if (pkt_mnglr_lck_attr == NULL) {
743 panic("%s: lck_attr_alloc_init failed", __func__);
744 /* NOTREACHED */
745 }
746 lck_rw_init(&pkt_mnglr_lck_rw, pkt_mnglr_lck_grp, pkt_mnglr_lck_attr);
747
748 /*
749 * Register kernel control
750 */
751 bzero(&kern_ctl, sizeof(kern_ctl));
752 strlcpy(kern_ctl.ctl_name, PACKET_MANGLER_CONTROL_NAME,
753 sizeof(kern_ctl.ctl_name));
754 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
755 kern_ctl.ctl_connect = pkt_mnglr_ctl_connect;
756 kern_ctl.ctl_disconnect = pkt_mnglr_ctl_disconnect;
757 kern_ctl.ctl_getopt = pkt_mnglr_ctl_getopt;
758 kern_ctl.ctl_setopt = pkt_mnglr_ctl_setopt;
759 error = ctl_register(&kern_ctl, &pkt_mnglr_kctlref);
760 if (error != 0) {
761 PKT_MNGLR_LOG(LOG_ERR, "ctl_register failed: %d", error);
762 } else {
763 PKT_MNGLR_LOG(LOG_INFO, "Registered packet mangler kernel control.");
764 }
765}
766
767static errno_t pktmnglr_ipfilter_output(void *cookie, mbuf_t *data, ipf_pktopts_t options)
768{
769 struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)cookie;
770 struct ip ip;
771 struct tcphdr tcp;
772 int optlen = 0;
773 errno_t error = 0;
774
775#pragma unused(tcp, optlen, options)
776 if (p_pkt_mnglr == NULL) {
777 goto output_done;
778 }
779
780 if (!p_pkt_mnglr->activate) {
781 goto output_done;
782 }
783
784 if (p_pkt_mnglr->dir == IN) {
785 goto output_done;
786 }
787
788 if (data == NULL) {
789 PKT_MNGLR_LOG(LOG_ERR, "Data pointer is NULL");
790 goto output_done;
791 }
792
793 /* Check for IP filter options */
794 error = mbuf_copydata(*data, 0, sizeof(ip), &ip);
795 if (error) {
796 PKT_MNGLR_LOG(LOG_ERR, "Could not make local IP header copy");
797 goto output_done;
798 }
799
800 if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET6) && (ip.ip_v == 4)) {
801 goto output_done;
802 }
803
804 if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET) && (ip.ip_v == 6)) {
805 goto output_done;
806 }
807
808 if (p_pkt_mnglr->lsaddr.ss_family == AF_INET) {
809 struct sockaddr_in laddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->lsaddr));
810 if (ip.ip_src.s_addr != laddr.sin_addr.s_addr) {
811 goto output_done;
812 }
813 }
814
815 if (p_pkt_mnglr->rsaddr.ss_family == AF_INET) {
816 struct sockaddr_in raddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->rsaddr));
817 if (ip.ip_dst.s_addr != raddr.sin_addr.s_addr) {
818 goto output_done;
819 }
820 }
821
822 if (ip.ip_v != 4) {
823 PKT_MNGLR_LOG(LOG_INFO,
824 "%s:%d Not handling IP version %d\n",
825 __func__, __LINE__, ip.ip_v);
826 goto output_done;
827 }
828
829output_done:
830 /* Not handling output flow */
831 return 0;
832}
833
834#define TCP_MAX_OPTLEN 40
835
836static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u_int8_t protocol)
837{
838 struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)cookie;
839 struct ip ip;
840 struct tcphdr tcp;
841 int ip_pld_len;
842 errno_t error = 0;
843
844 if (p_pkt_mnglr == NULL) {
845 PKT_MNGLR_LOG(LOG_ERR, "p_pkt_mnglr is NULL");
846 goto input_done;
847 }
848
849 if (p_pkt_mnglr->activate == 0) {
850 PKT_MNGLR_LOG(LOG_INFO, "p_pkt_mnglr not yet activated");
851 goto input_done;
852 }
853
854 if (p_pkt_mnglr->dir == OUT) {
855 goto input_done;
856 }
857
858 if (data == NULL) {
859 PKT_MNGLR_LOG(LOG_ERR, "Data pointer is NULL");
860 goto input_done;
861 }
862
863 /* Check for IP filter options */
864 error = mbuf_copydata(*data, 0, sizeof(ip), &ip);
865 if (error) {
866 PKT_MNGLR_LOG(LOG_ERR, "Could not make local IP header copy");
867 goto input_done;
868 }
869
870 if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET6) && (ip.ip_v == 4)) {
871 PKT_MNGLR_LOG(LOG_INFO, "Skipping filtering as address family of packet is IPv4 but local "
872 "address is set to IPv6");
873 goto input_done;
874 }
875
876 if ((p_pkt_mnglr->lsaddr.ss_family == AF_INET) && (ip.ip_v == 6)) {
877 PKT_MNGLR_LOG(LOG_INFO, "Skipping filtering as address family "
878 "of packet is IPv6 but local address is set to IPv4");
879 goto input_done;
880 }
881
882 if (p_pkt_mnglr->lsaddr.ss_family == AF_INET) {
883 struct sockaddr_in laddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->lsaddr));
884 if (ip.ip_dst.s_addr != laddr.sin_addr.s_addr) {
885 goto input_done;
886 }
887 }
888
889 if (p_pkt_mnglr->rsaddr.ss_family == AF_INET) {
890 struct sockaddr_in raddr = *(struct sockaddr_in *)(&(p_pkt_mnglr->rsaddr));
891 if (ip.ip_src.s_addr != raddr.sin_addr.s_addr) {
892 goto input_done;
893 }
894 PKT_MNGLR_LOG(LOG_INFO, "Remote IP: %x Source IP: %x in input path",
895 raddr.sin_addr.s_addr,
896 ip.ip_src.s_addr);
897 }
898
899 if (ip.ip_v != 4) {
900 goto input_done;
901 }
902
903 ip_pld_len = ntohs(ip.ip_len) - (ip.ip_hl << 2);
904
905 if (protocol != p_pkt_mnglr->proto) {
906 PKT_MNGLR_LOG(LOG_INFO, "Skip: Protocol mismatch");
907 goto input_done;
908 }
909
910 switch (protocol) {
911 case IPPROTO_TCP:
912 if (ip_pld_len < (int) sizeof(tcp)) {
913 PKT_MNGLR_LOG(LOG_ERR, "IP total len not big enough for TCP: %d", ip_pld_len);
914 goto drop_it;
915 }
916
917 error = mbuf_copydata(*data, offset, sizeof(tcp), &tcp);
918 if (error) {
919 PKT_MNGLR_LOG(LOG_ERR, "Could not make local TCP header copy");
920 goto input_done;
921 }
922
923 if (p_pkt_mnglr->lport && (p_pkt_mnglr->lport != tcp.th_dport)) {
924 PKT_MNGLR_LOG(LOG_INFO, "Local port and IP des port do not match");
925 goto input_done;
926 }
927
928 if (p_pkt_mnglr->rport && (p_pkt_mnglr->rport != tcp.th_sport)) {
929 PKT_MNGLR_LOG(LOG_INFO, "Remote port and IP src port do not match");
930 goto input_done;
931 }
932 break;
933 case IPPROTO_UDP:
934 goto input_done;
935 case IPPROTO_ICMP:
936 goto input_done;
937 case IPPROTO_ICMPV6:
938 goto input_done;
939 default:
940 goto input_done;
941 }
942
943 /* XXX Do IP actions here */
944 PKT_MNGLR_LOG(LOG_INFO, "Proceeding with packet mangler actions on the packet");
945
946 /* Protocol actions */
947 switch (protocol) {
948 case IPPROTO_TCP:
949 if (p_pkt_mnglr->proto_action_mask) {
950 char tcp_opt_buf[TCP_MAX_OPTLEN] = {0};
951 int orig_tcp_optlen;
952 int tcp_optlen = 0;
953 int i = 0, off;
954
955 off = (tcp.th_off << 2);
956
957 if (off < (int) sizeof(struct tcphdr) || off > ip_pld_len) {
958 PKT_MNGLR_LOG(LOG_ERR, "TCP header offset is wrong: %d", off);
959 goto drop_it;
960 }
961
962
963 tcp_optlen = off - sizeof(struct tcphdr);
964
965 PKT_MNGLR_LOG(LOG_INFO, "Packet from F5 is TCP\n");
966 PKT_MNGLR_LOG(LOG_INFO, "Optlen: %d\n", tcp_optlen);
967 orig_tcp_optlen = tcp_optlen;
968 if (orig_tcp_optlen) {
969 error = mbuf_copydata(*data, offset+sizeof(struct tcphdr), orig_tcp_optlen, tcp_opt_buf);
970 if (error) {
971 PKT_MNGLR_LOG(LOG_ERR, "Failed to copy tcp options: error %d offset %d optlen %d", error, offset, orig_tcp_optlen);
972 goto input_done;
973 }
974 }
975
976 while (tcp_optlen > 0) {
977 if (tcp_opt_buf[i] == 0x1) {
978 PKT_MNGLR_LOG(LOG_INFO, "Skipping NOP\n");
979 tcp_optlen--;
980 i++;
981 continue;
982 } else if ((tcp_opt_buf[i] != 0) && (tcp_opt_buf[i] != TCP_OPT_MULTIPATH_TCP)) {
983 PKT_MNGLR_LOG(LOG_INFO, "Skipping option %x\n", tcp_opt_buf[i]);
984
985 /* Minimum TCP option size is 2 */
986 if (tcp_opt_buf[i+1] < 2) {
987 PKT_MNGLR_LOG(LOG_ERR, "Received suspicious TCP option");
988 goto drop_it;
989 }
990 tcp_optlen -= tcp_opt_buf[i+1];
991 i += tcp_opt_buf[i+1];
992 continue;
993 } else if (tcp_opt_buf[i] == TCP_OPT_MULTIPATH_TCP) {
994 int j = 0;
995 unsigned char mptcpoptlen = tcp_opt_buf[i+1];
996 uint8_t sbtver = tcp_opt_buf[i+MPTCP_SBT_VER_OFFSET];
997 uint8_t subtype = sbtver >> 4;
998
999 PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP option %x\n", tcp_opt_buf[i]);
1000 PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP subtype %x\n", subtype);
1001 if (subtype == MPTCP_SUBTYPE_DSS) {
1002 PKT_MNGLR_LOG(LOG_INFO, "Got DSS option\n");
1003 PKT_MNGLR_LOG(LOG_INFO, "Protocol option mask: %d\n", p_pkt_mnglr->proto_action_mask);
1004 if (p_pkt_mnglr->proto_action_mask &
1005 PKT_MNGLR_TCP_ACT_DSS_DROP) {
1006 goto drop_it;
1007 }
1008 }
1009
1010 PKT_MNGLR_LOG(LOG_INFO, "Got MPTCP option %x\n", tcp_opt_buf[i]);
1011 for (; j < mptcpoptlen && j < tcp_optlen; j++) {
1012 if (p_pkt_mnglr->proto_action_mask &
1013 PKT_MNGLR_TCP_ACT_NOP_MPTCP) {
1014 tcp_opt_buf[i+j] = 0x1;
1015 }
1016 }
1017 tcp_optlen -= mptcpoptlen;
1018 i += mptcpoptlen;
1019 } else {
1020 tcp_optlen--;
1021 i++;
1022 }
1023 }
1024
1025 if (orig_tcp_optlen) {
1026 error = mbuf_copyback(*data,
1027 offset+sizeof(struct tcphdr),
1028 orig_tcp_optlen, tcp_opt_buf, MBUF_WAITOK);
1029
1030 if (error) {
1031 PKT_MNGLR_LOG(LOG_ERR,
1032 "Failed to copy tcp options back: error %d offset %d optlen %d",
1033 error, offset, orig_tcp_optlen);
1034 goto input_done;
1035 }
1036 }
1037 }
1038 break;
1039 case IPPROTO_UDP:
1040 /* Don't handle UDP */
1041 break;
1042 case IPPROTO_ICMP:
1043 break;
1044 case IPPROTO_ICMPV6:
1045 break;
1046 default:
1047 break;
1048 }
1049 chksm_update(*data);
1050input_done:
1051 return 0;
1052
1053drop_it:
1054 PKT_MNGLR_LOG(LOG_INFO, "Dropping packet\n");
1055 mbuf_freem(*data);
1056 return EJUSTRETURN;
1057}
1058
1059static void pktmnglr_ipfilter_detach(void *cookie)
1060{
1061#pragma unused(cookie)
1062 return;
1063}
1064
1065/* XXX Still need to modify this to use mbuf_copy* macros */
1066static void chksm_update(mbuf_t data)
1067{
1068 u_int16_t ip_sum;
1069 u_int16_t tsum;
1070 struct tcphdr *tcp;
1071 errno_t err;
1072
1073 unsigned char *ptr = (unsigned char *)mbuf_data(data);
1074 struct ip *ip = (struct ip *)(void *)ptr;
1075 if (ip->ip_v != 4) {
1076 return;
1077 }
1078
1079 ip->ip_sum = 0;
1080 err = mbuf_inet_cksum(data, 0, 0, ip->ip_hl << 2, &ip_sum); // ip sum
1081 if (err == 0)
1082 ip->ip_sum = ip_sum;
1083 switch (ip->ip_p) {
1084 case IPPROTO_TCP:
1085 tcp = (struct tcphdr *)(void *)(ptr + (ip->ip_hl << 2));
1086 tcp->th_sum = 0;
1087 err = mbuf_inet_cksum(data, IPPROTO_TCP, ip->ip_hl << 2,
1088 ntohs(ip->ip_len) - (ip->ip_hl << 2), &tsum);
1089 if (err == 0)
1090 tcp->th_sum = tsum;
1091 break;
1092 case IPPROTO_UDP:
1093 /* Don't handle UDP */
1094 break;
1095 case IPPROTO_ICMP:
1096 break;
1097 case IPPROTO_ICMPV6:
1098 break;
1099 default:
1100 break;
1101 }
1102
1103 mbuf_clear_csum_performed(data);
1104 return;
1105}
1106