1/*
2 * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1990, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from the Stanford/CMU enet packet filter,
33 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35 * Berkeley Laboratory.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)bpf.c 8.2 (Berkeley) 3/28/94
66 *
67 * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68 */
69/*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76#include "bpf.h"
77
78#ifndef __GNUC__
79#define inline
80#else
81#define inline __inline
82#endif
83
84#include <sys/param.h>
85#include <sys/systm.h>
86#include <sys/conf.h>
87#include <sys/malloc.h>
88#include <sys/mbuf.h>
89#include <sys/time.h>
90#include <sys/proc.h>
91#include <sys/signalvar.h>
92#include <sys/filio.h>
93#include <sys/sockio.h>
94#include <sys/ttycom.h>
95#include <sys/filedesc.h>
96#include <sys/uio_internal.h>
97#include <sys/file_internal.h>
98#include <sys/event.h>
99
100#include <sys/poll.h>
101
102#include <sys/socket.h>
103#include <sys/socketvar.h>
104#include <sys/vnode.h>
105
106#include <net/if.h>
107#include <net/bpf.h>
108#include <net/bpfdesc.h>
109
110#include <netinet/in.h>
111#include <netinet/ip.h>
112#include <netinet/ip6.h>
113#include <netinet/in_pcb.h>
114#include <netinet/in_var.h>
115#include <netinet/ip_var.h>
116#include <netinet/tcp.h>
117#include <netinet/tcp_var.h>
118#include <netinet/udp.h>
119#include <netinet/udp_var.h>
120#include <netinet/if_ether.h>
121#include <netinet/isakmp.h>
122#include <netinet6/esp.h>
123#include <sys/kernel.h>
124#include <sys/sysctl.h>
125#include <net/firewire.h>
126
127#include <miscfs/devfs/devfs.h>
128#include <net/dlil.h>
129#include <net/pktap.h>
130
131#include <kern/locks.h>
132#include <kern/thread_call.h>
133#include <libkern/section_keywords.h>
134
135#if CONFIG_MACF_NET
136#include <security/mac_framework.h>
137#endif /* MAC_NET */
138
139#include <os/log.h>
140
141extern int tvtohz(struct timeval *);
142
143#define BPF_BUFSIZE 4096
144#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
145
146#define PRINET 26 /* interruptible */
147
148#define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
149#define ESP_HDR_SIZE sizeof(struct newesp)
150
151typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
152
153/*
154 * The default read buffer size is patchable.
155 */
156static unsigned int bpf_bufsize = BPF_BUFSIZE;
157SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
158 &bpf_bufsize, 0, "");
159__private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
160SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
161 &bpf_maxbufsize, 0, "");
162static unsigned int bpf_maxdevices = 256;
163SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW | CTLFLAG_LOCKED,
164 &bpf_maxdevices, 0, "");
165/*
166 * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
167 * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
168 * explicitly to be able to use DLT_PKTAP.
169 */
170#if CONFIG_EMBEDDED
171static unsigned int bpf_wantpktap = 1;
172#else
173static unsigned int bpf_wantpktap = 0;
174#endif
175SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
176 &bpf_wantpktap, 0, "");
177
178static int bpf_debug = 0;
179SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
180 &bpf_debug, 0, "");
181
182/*
183 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
184 * bpf_dtab holds pointer to the descriptors, indexed by minor device #
185 */
186static struct bpf_if *bpf_iflist;
187#ifdef __APPLE__
188/*
189 * BSD now stores the bpf_d in the dev_t which is a struct
190 * on their system. Our dev_t is an int, so we still store
191 * the bpf_d in a separate table indexed by minor device #.
192 *
193 * The value stored in bpf_dtab[n] represent three states:
194 * NULL: device not opened
195 * BPF_DEV_RESERVED: device opening or closing
196 * other: device <n> opened with pointer to storage
197 */
198#define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
199static struct bpf_d **bpf_dtab = NULL;
200static unsigned int bpf_dtab_size = 0;
201static unsigned int nbpfilter = 0;
202
203decl_lck_mtx_data(static, bpf_mlock_data);
204static lck_mtx_t *bpf_mlock = &bpf_mlock_data;
205static lck_grp_t *bpf_mlock_grp;
206static lck_grp_attr_t *bpf_mlock_grp_attr;
207static lck_attr_t *bpf_mlock_attr;
208
209#endif /* __APPLE__ */
210
211static int bpf_allocbufs(struct bpf_d *);
212static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
213static int bpf_detachd(struct bpf_d *d, int);
214static void bpf_freed(struct bpf_d *);
215static int bpf_movein(struct uio *, int,
216 struct mbuf **, struct sockaddr *, int *);
217static int bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool);
218static void bpf_timed_out(void *, void *);
219static void bpf_wakeup(struct bpf_d *);
220static u_int get_pkt_trunc_len(u_char *, u_int);
221static void catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
222static void reset_d(struct bpf_d *);
223static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
224static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
225static int bpf_setdlt(struct bpf_d *, u_int);
226static int bpf_set_traffic_class(struct bpf_d *, int);
227static void bpf_set_packet_service_class(struct mbuf *, int);
228
229static void bpf_acquire_d(struct bpf_d *);
230static void bpf_release_d(struct bpf_d *);
231
232static int bpf_devsw_installed;
233
234void bpf_init(void *unused);
235static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
236
237/*
238 * Darwin differs from BSD here, the following are static
239 * on BSD and not static on Darwin.
240 */
241 d_open_t bpfopen;
242 d_close_t bpfclose;
243 d_read_t bpfread;
244 d_write_t bpfwrite;
245 ioctl_fcn_t bpfioctl;
246 select_fcn_t bpfselect;
247
248/* Darwin's cdevsw struct differs slightly from BSDs */
249#define CDEV_MAJOR 23
250static struct cdevsw bpf_cdevsw = {
251 /* open */ bpfopen,
252 /* close */ bpfclose,
253 /* read */ bpfread,
254 /* write */ bpfwrite,
255 /* ioctl */ bpfioctl,
256 /* stop */ eno_stop,
257 /* reset */ eno_reset,
258 /* tty */ NULL,
259 /* select */ bpfselect,
260 /* mmap */ eno_mmap,
261 /* strategy */ eno_strat,
262 /* getc */ eno_getc,
263 /* putc */ eno_putc,
264 /* type */ 0
265};
266
267#define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
268
269static int
270bpf_movein(struct uio *uio, int linktype, struct mbuf **mp,
271 struct sockaddr *sockp, int *datlen)
272{
273 struct mbuf *m;
274 int error;
275 int len;
276 uint8_t sa_family;
277 int hlen;
278
279 switch (linktype) {
280
281#if SLIP
282 case DLT_SLIP:
283 sa_family = AF_INET;
284 hlen = 0;
285 break;
286#endif /* SLIP */
287
288 case DLT_EN10MB:
289 sa_family = AF_UNSPEC;
290 /* XXX Would MAXLINKHDR be better? */
291 hlen = sizeof(struct ether_header);
292 break;
293
294#if FDDI
295 case DLT_FDDI:
296#if defined(__FreeBSD__) || defined(__bsdi__)
297 sa_family = AF_IMPLINK;
298 hlen = 0;
299#else
300 sa_family = AF_UNSPEC;
301 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
302 hlen = 24;
303#endif
304 break;
305#endif /* FDDI */
306
307 case DLT_RAW:
308 case DLT_NULL:
309 sa_family = AF_UNSPEC;
310 hlen = 0;
311 break;
312
313#ifdef __FreeBSD__
314 case DLT_ATM_RFC1483:
315 /*
316 * en atm driver requires 4-byte atm pseudo header.
317 * though it isn't standard, vpi:vci needs to be
318 * specified anyway.
319 */
320 sa_family = AF_UNSPEC;
321 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
322 break;
323#endif
324
325 case DLT_PPP:
326 sa_family = AF_UNSPEC;
327 hlen = 4; /* This should match PPP_HDRLEN */
328 break;
329
330 case DLT_APPLE_IP_OVER_IEEE1394:
331 sa_family = AF_UNSPEC;
332 hlen = sizeof(struct firewire_header);
333 break;
334
335 case DLT_IEEE802_11: /* IEEE 802.11 wireless */
336 sa_family = AF_IEEE80211;
337 hlen = 0;
338 break;
339
340 case DLT_IEEE802_11_RADIO:
341 sa_family = AF_IEEE80211;
342 hlen = 0;
343 break;
344
345 default:
346 return (EIO);
347 }
348
349 // LP64todo - fix this!
350 len = uio_resid(uio);
351 *datlen = len - hlen;
352 if ((unsigned)len > MCLBYTES)
353 return (EIO);
354
355 if (sockp) {
356 /*
357 * Build a sockaddr based on the data link layer type.
358 * We do this at this level because the ethernet header
359 * is copied directly into the data field of the sockaddr.
360 * In the case of SLIP, there is no header and the packet
361 * is forwarded as is.
362 * Also, we are careful to leave room at the front of the mbuf
363 * for the link level header.
364 */
365 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
366 return (EIO);
367 }
368 sockp->sa_family = sa_family;
369 } else {
370 /*
371 * We're directly sending the packet data supplied by
372 * the user; we don't need to make room for the link
373 * header, and don't need the header length value any
374 * more, so set it to 0.
375 */
376 hlen = 0;
377 }
378
379 MGETHDR(m, M_WAIT, MT_DATA);
380 if (m == 0)
381 return (ENOBUFS);
382 if ((unsigned)len > MHLEN) {
383 MCLGET(m, M_WAIT);
384 if ((m->m_flags & M_EXT) == 0) {
385 error = ENOBUFS;
386 goto bad;
387 }
388 }
389 m->m_pkthdr.len = m->m_len = len;
390 m->m_pkthdr.rcvif = NULL;
391 *mp = m;
392
393 /*
394 * Make room for link header.
395 */
396 if (hlen != 0) {
397 m->m_pkthdr.len -= hlen;
398 m->m_len -= hlen;
399 m->m_data += hlen; /* XXX */
400 error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
401 if (error)
402 goto bad;
403 }
404 error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
405 if (error)
406 goto bad;
407
408 /* Check for multicast destination */
409 switch (linktype) {
410 case DLT_EN10MB: {
411 struct ether_header *eh;
412
413 eh = mtod(m, struct ether_header *);
414 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
415 if (_ether_cmp(etherbroadcastaddr,
416 eh->ether_dhost) == 0) {
417 m->m_flags |= M_BCAST;
418 } else {
419 m->m_flags |= M_MCAST;
420 }
421 }
422 break;
423 }
424 }
425
426 return (0);
427bad:
428 m_freem(m);
429 return (error);
430}
431
432#ifdef __APPLE__
433
434/*
435 * The dynamic addition of a new device node must block all processes that
436 * are opening the last device so that no process will get an unexpected
437 * ENOENT
438 */
439static void
440bpf_make_dev_t(int maj)
441{
442 static int bpf_growing = 0;
443 unsigned int cur_size = nbpfilter, i;
444
445 if (nbpfilter >= bpf_maxdevices)
446 return;
447
448 while (bpf_growing) {
449 /* Wait until new device has been created */
450 (void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", 0);
451 }
452 if (nbpfilter > cur_size) {
453 /* other thread grew it already */
454 return;
455 }
456 bpf_growing = 1;
457
458 /* need to grow bpf_dtab first */
459 if (nbpfilter == bpf_dtab_size) {
460 int new_dtab_size;
461 struct bpf_d **new_dtab = NULL;
462 struct bpf_d **old_dtab = NULL;
463
464 new_dtab_size = bpf_dtab_size + NBPFILTER;
465 new_dtab = (struct bpf_d **)_MALLOC(
466 sizeof(struct bpf_d *) * new_dtab_size, M_DEVBUF, M_WAIT);
467 if (new_dtab == 0) {
468 printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
469 goto done;
470 }
471 if (bpf_dtab) {
472 bcopy(bpf_dtab, new_dtab,
473 sizeof(struct bpf_d *) * bpf_dtab_size);
474 }
475 bzero(new_dtab + bpf_dtab_size,
476 sizeof(struct bpf_d *) * NBPFILTER);
477 old_dtab = bpf_dtab;
478 bpf_dtab = new_dtab;
479 bpf_dtab_size = new_dtab_size;
480 if (old_dtab != NULL)
481 _FREE(old_dtab, M_DEVBUF);
482 }
483 i = nbpfilter++;
484 (void) devfs_make_node(makedev(maj, i),
485 DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600,
486 "bpf%d", i);
487done:
488 bpf_growing = 0;
489 wakeup((caddr_t)&bpf_growing);
490}
491
492#endif
493
494/*
495 * Attach file to the bpf interface, i.e. make d listen on bp.
496 */
497static errno_t
498bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
499{
500 int first = bp->bif_dlist == NULL;
501 int error = 0;
502
503 /*
504 * Point d at bp, and add d to the interface's list of listeners.
505 * Finally, point the driver's bpf cookie at the interface so
506 * it will divert packets to bpf.
507 */
508 d->bd_bif = bp;
509 d->bd_next = bp->bif_dlist;
510 bp->bif_dlist = d;
511
512 /*
513 * Take a reference on the device even if an error is returned
514 * because we keep the device in the interface's list of listeners
515 */
516 bpf_acquire_d(d);
517
518 if (first) {
519 /* Find the default bpf entry for this ifp */
520 if (bp->bif_ifp->if_bpf == NULL) {
521 struct bpf_if *tmp, *primary = NULL;
522
523 for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
524 if (tmp->bif_ifp == bp->bif_ifp) {
525 primary = tmp;
526 break;
527 }
528 }
529 bp->bif_ifp->if_bpf = primary;
530 }
531 /* Only call dlil_set_bpf_tap for primary dlt */
532 if (bp->bif_ifp->if_bpf == bp)
533 dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
534 bpf_tap_callback);
535
536 if (bp->bif_tap != NULL)
537 error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
538 BPF_TAP_INPUT_OUTPUT);
539 }
540
541 /*
542 * Reset the detach flags in case we previously detached an interface
543 */
544 d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
545
546 if (bp->bif_dlt == DLT_PKTAP) {
547 d->bd_flags |= BPF_FINALIZE_PKTAP;
548 } else {
549 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
550 }
551 return (error);
552}
553
554/*
555 * Detach a file from its interface.
556 *
557 * Return 1 if was closed by some thread, 0 otherwise
558 */
559static int
560bpf_detachd(struct bpf_d *d, int closing)
561{
562 struct bpf_d **p;
563 struct bpf_if *bp;
564 struct ifnet *ifp;
565
566 int bpf_closed = d->bd_flags & BPF_CLOSING;
567 /*
568 * Some other thread already detached
569 */
570 if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0)
571 goto done;
572 /*
573 * This thread is doing the detach
574 */
575 d->bd_flags |= BPF_DETACHING;
576
577 ifp = d->bd_bif->bif_ifp;
578 bp = d->bd_bif;
579
580 if (bpf_debug != 0)
581 printf("%s: %llx %s%s\n",
582 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
583 if_name(ifp), closing ? " closing" : "");
584
585 /* Remove d from the interface's descriptor list. */
586 p = &bp->bif_dlist;
587 while (*p != d) {
588 p = &(*p)->bd_next;
589 if (*p == 0)
590 panic("bpf_detachd: descriptor not in list");
591 }
592 *p = (*p)->bd_next;
593 if (bp->bif_dlist == 0) {
594 /*
595 * Let the driver know that there are no more listeners.
596 */
597 /* Only call dlil_set_bpf_tap for primary dlt */
598 if (bp->bif_ifp->if_bpf == bp)
599 dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
600 if (bp->bif_tap)
601 bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
602
603 for (bp = bpf_iflist; bp; bp = bp->bif_next)
604 if (bp->bif_ifp == ifp && bp->bif_dlist != 0)
605 break;
606 if (bp == NULL)
607 ifp->if_bpf = NULL;
608 }
609 d->bd_bif = NULL;
610 /*
611 * Check if this descriptor had requested promiscuous mode.
612 * If so, turn it off.
613 */
614 if (d->bd_promisc) {
615 d->bd_promisc = 0;
616 lck_mtx_unlock(bpf_mlock);
617 if (ifnet_set_promiscuous(ifp, 0)) {
618 /*
619 * Something is really wrong if we were able to put
620 * the driver into promiscuous mode, but can't
621 * take it out.
622 * Most likely the network interface is gone.
623 */
624 printf("%s: ifnet_set_promiscuous failed\n", __func__);
625 }
626 lck_mtx_lock(bpf_mlock);
627 }
628
629 /*
630 * Wake up other thread that are waiting for this thread to finish
631 * detaching
632 */
633 d->bd_flags &= ~BPF_DETACHING;
634 d->bd_flags |= BPF_DETACHED;
635
636 /* Refresh the local variable as d could have been modified */
637 bpf_closed = d->bd_flags & BPF_CLOSING;
638 /*
639 * Note that We've kept the reference because we may have dropped
640 * the lock when turning off promiscuous mode
641 */
642 bpf_release_d(d);
643
644done:
645 /*
646 * When closing makes sure no other thread refer to the bpf_d
647 */
648 if (bpf_debug != 0)
649 printf("%s: %llx done\n",
650 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
651 /*
652 * Let the caller know the bpf_d is closed
653 */
654 if (bpf_closed)
655 return (1);
656 else
657 return (0);
658}
659
660/*
661 * Start asynchronous timer, if necessary.
662 * Must be called with bpf_mlock held.
663 */
664static void
665bpf_start_timer(struct bpf_d *d)
666{
667 uint64_t deadline;
668 struct timeval tv;
669
670 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
671 tv.tv_sec = d->bd_rtout / hz;
672 tv.tv_usec = (d->bd_rtout % hz) * tick;
673
674 clock_interval_to_deadline(
675 (uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
676 NSEC_PER_USEC, &deadline);
677 /*
678 * The state is BPF_IDLE, so the timer hasn't
679 * been started yet, and hasn't gone off yet;
680 * there is no thread call scheduled, so this
681 * won't change the schedule.
682 *
683 * XXX - what if, by the time it gets entered,
684 * the deadline has already passed?
685 */
686 thread_call_enter_delayed(d->bd_thread_call, deadline);
687 d->bd_state = BPF_WAITING;
688 }
689}
690
691/*
692 * Cancel asynchronous timer.
693 * Must be called with bpf_mlock held.
694 */
695static boolean_t
696bpf_stop_timer(struct bpf_d *d)
697{
698 /*
699 * If the timer has already gone off, this does nothing.
700 * Our caller is expected to set d->bd_state to BPF_IDLE,
701 * with the bpf_mlock, after we are called. bpf_timed_out()
702 * also grabs bpf_mlock, so, if the timer has gone off and
703 * bpf_timed_out() hasn't finished, it's waiting for the
704 * lock; when this thread releases the lock, it will
705 * find the state is BPF_IDLE, and just release the
706 * lock and return.
707 */
708 return (thread_call_cancel(d->bd_thread_call));
709}
710
711void
712bpf_acquire_d(struct bpf_d *d)
713{
714 void *lr_saved = __builtin_return_address(0);
715
716 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
717
718 d->bd_refcnt += 1;
719
720 d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
721 d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
722}
723
724void
725bpf_release_d(struct bpf_d *d)
726{
727 void *lr_saved = __builtin_return_address(0);
728
729 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
730
731 if (d->bd_refcnt <= 0)
732 panic("%s: %p refcnt <= 0", __func__, d);
733
734 d->bd_refcnt -= 1;
735
736 d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
737 d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
738
739 if (d->bd_refcnt == 0) {
740 /* Assert the device is detached */
741 if ((d->bd_flags & BPF_DETACHED) == 0)
742 panic("%s: %p BPF_DETACHED not set", __func__, d);
743
744 _FREE(d, M_DEVBUF);
745 }
746}
747
748/*
749 * Open ethernet device. Returns ENXIO for illegal minor device number,
750 * EBUSY if file is open by another process.
751 */
752/* ARGSUSED */
753int
754bpfopen(dev_t dev, int flags, __unused int fmt,
755 struct proc *p)
756{
757 struct bpf_d *d;
758
759 lck_mtx_lock(bpf_mlock);
760 if ((unsigned int) minor(dev) >= nbpfilter) {
761 lck_mtx_unlock(bpf_mlock);
762 return (ENXIO);
763 }
764 /*
765 * New device nodes are created on demand when opening the last one.
766 * The programming model is for processes to loop on the minor starting
767 * at 0 as long as EBUSY is returned. The loop stops when either the
768 * open succeeds or an error other that EBUSY is returned. That means
769 * that bpf_make_dev_t() must block all processes that are opening the
770 * last node. If not all processes are blocked, they could unexpectedly
771 * get ENOENT and abort their opening loop.
772 */
773 if ((unsigned int) minor(dev) == (nbpfilter - 1))
774 bpf_make_dev_t(major(dev));
775
776 /*
777 * Each minor can be opened by only one process. If the requested
778 * minor is in use, return EBUSY.
779 *
780 * Important: bpfopen() and bpfclose() have to check and set the status
781 * of a device in the same lockin context otherwise the device may be
782 * leaked because the vnode use count will be unpextectly greater than 1
783 * when close() is called.
784 */
785 if (bpf_dtab[minor(dev)] == NULL) {
786 /* Reserve while opening */
787 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
788 } else {
789 lck_mtx_unlock(bpf_mlock);
790 return (EBUSY);
791 }
792 d = (struct bpf_d *)_MALLOC(sizeof(struct bpf_d), M_DEVBUF,
793 M_WAIT | M_ZERO);
794 if (d == NULL) {
795 /* this really is a catastrophic failure */
796 printf("bpfopen: malloc bpf_d failed\n");
797 bpf_dtab[minor(dev)] = NULL;
798 lck_mtx_unlock(bpf_mlock);
799 return (ENOMEM);
800 }
801
802 /* Mark "in use" and do most initialization. */
803 bpf_acquire_d(d);
804 d->bd_bufsize = bpf_bufsize;
805 d->bd_sig = SIGIO;
806 d->bd_seesent = 1;
807 d->bd_oflags = flags;
808 d->bd_state = BPF_IDLE;
809 d->bd_traffic_class = SO_TC_BE;
810 d->bd_flags |= BPF_DETACHED;
811 if (bpf_wantpktap)
812 d->bd_flags |= BPF_WANT_PKTAP;
813 else
814 d->bd_flags &= ~BPF_WANT_PKTAP;
815 d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
816 if (d->bd_thread_call == NULL) {
817 printf("bpfopen: malloc thread call failed\n");
818 bpf_dtab[minor(dev)] = NULL;
819 bpf_release_d(d);
820 lck_mtx_unlock(bpf_mlock);
821
822 return (ENOMEM);
823 }
824 d->bd_opened_by = p;
825 uuid_generate(d->bd_uuid);
826
827#if CONFIG_MACF_NET
828 mac_bpfdesc_label_init(d);
829 mac_bpfdesc_label_associate(kauth_cred_get(), d);
830#endif
831 bpf_dtab[minor(dev)] = d; /* Mark opened */
832 lck_mtx_unlock(bpf_mlock);
833
834 return (0);
835}
836
837/*
838 * Close the descriptor by detaching it from its interface,
839 * deallocating its buffers, and marking it free.
840 */
841/* ARGSUSED */
842int
843bpfclose(dev_t dev, __unused int flags, __unused int fmt,
844 __unused struct proc *p)
845{
846 struct bpf_d *d;
847
848 /* Take BPF lock to ensure no other thread is using the device */
849 lck_mtx_lock(bpf_mlock);
850
851 d = bpf_dtab[minor(dev)];
852 if (d == NULL || d == BPF_DEV_RESERVED) {
853 lck_mtx_unlock(bpf_mlock);
854 return (ENXIO);
855 }
856
857 /*
858 * Other threads may call bpd_detachd() if we drop the bpf_mlock
859 */
860 d->bd_flags |= BPF_CLOSING;
861
862 if (bpf_debug != 0)
863 printf("%s: %llx\n",
864 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
865
866 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
867
868 /*
869 * Deal with any in-progress timeouts.
870 */
871 switch (d->bd_state) {
872 case BPF_IDLE:
873 /*
874 * Not waiting for a timeout, and no timeout happened.
875 */
876 break;
877
878 case BPF_WAITING:
879 /*
880 * Waiting for a timeout.
881 * Cancel any timer that has yet to go off,
882 * and mark the state as "closing".
883 * Then drop the lock to allow any timers that
884 * *have* gone off to run to completion, and wait
885 * for them to finish.
886 */
887 if (!bpf_stop_timer(d)) {
888 /*
889 * There was no pending call, so the call must
890 * have been in progress. Wait for the call to
891 * complete; we have to drop the lock while
892 * waiting. to let the in-progrss call complete
893 */
894 d->bd_state = BPF_DRAINING;
895 while (d->bd_state == BPF_DRAINING)
896 msleep((caddr_t)d, bpf_mlock, PRINET,
897 "bpfdraining", NULL);
898 }
899 d->bd_state = BPF_IDLE;
900 break;
901
902 case BPF_TIMED_OUT:
903 /*
904 * Timer went off, and the timeout routine finished.
905 */
906 d->bd_state = BPF_IDLE;
907 break;
908
909 case BPF_DRAINING:
910 /*
911 * Another thread is blocked on a close waiting for
912 * a timeout to finish.
913 * This "shouldn't happen", as the first thread to enter
914 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
915 * all subsequent threads should see that and fail with
916 * ENXIO.
917 */
918 panic("Two threads blocked in a BPF close");
919 break;
920 }
921
922 if (d->bd_bif)
923 bpf_detachd(d, 1);
924 selthreadclear(&d->bd_sel);
925#if CONFIG_MACF_NET
926 mac_bpfdesc_label_destroy(d);
927#endif
928 thread_call_free(d->bd_thread_call);
929
930 while (d->bd_hbuf_read != 0)
931 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
932
933 bpf_freed(d);
934
935 /* Mark free in same context as bpfopen comes to check */
936 bpf_dtab[minor(dev)] = NULL; /* Mark closed */
937
938 bpf_release_d(d);
939
940 lck_mtx_unlock(bpf_mlock);
941
942 return (0);
943}
944
945#define BPF_SLEEP bpf_sleep
946
947static int
948bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
949{
950 u_int64_t abstime = 0;
951
952 if (timo != 0)
953 clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
954
955 return (msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime));
956}
957
958static void
959bpf_finalize_pktap(struct bpf_hdr *hp, struct pktap_header *pktaphdr)
960{
961 if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
962 struct pktap_v2_hdr *pktap_v2_hdr;
963
964 pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
965
966 if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
967 pktap_v2_finalize_proc_info(pktap_v2_hdr);
968 } else {
969 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
970 pktap_finalize_proc_info(pktaphdr);
971
972 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
973 hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
974 hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
975 }
976 }
977}
978
979/*
980 * Rotate the packet buffers in descriptor d. Move the store buffer
981 * into the hold slot, and the free buffer into the store slot.
982 * Zero the length of the new store buffer.
983 */
984#define ROTATE_BUFFERS(d) \
985 if (d->bd_hbuf_read != 0) \
986 panic("rotating bpf buffers during read"); \
987 (d)->bd_hbuf = (d)->bd_sbuf; \
988 (d)->bd_hlen = (d)->bd_slen; \
989 (d)->bd_hcnt = (d)->bd_scnt; \
990 (d)->bd_sbuf = (d)->bd_fbuf; \
991 (d)->bd_slen = 0; \
992 (d)->bd_scnt = 0; \
993 (d)->bd_fbuf = NULL;
994/*
995 * bpfread - read next chunk of packets from buffers
996 */
997int
998bpfread(dev_t dev, struct uio *uio, int ioflag)
999{
1000 struct bpf_d *d;
1001 caddr_t hbuf;
1002 int timed_out, hbuf_len;
1003 int error;
1004 int flags;
1005
1006 lck_mtx_lock(bpf_mlock);
1007
1008 d = bpf_dtab[minor(dev)];
1009 if (d == NULL || d == BPF_DEV_RESERVED ||
1010 (d->bd_flags & BPF_CLOSING) != 0) {
1011 lck_mtx_unlock(bpf_mlock);
1012 return (ENXIO);
1013 }
1014
1015 bpf_acquire_d(d);
1016
1017 /*
1018 * Restrict application to use a buffer the same size as
1019 * as kernel buffers.
1020 */
1021 if (uio_resid(uio) != d->bd_bufsize) {
1022 bpf_release_d(d);
1023 lck_mtx_unlock(bpf_mlock);
1024 return (EINVAL);
1025 }
1026
1027 if (d->bd_state == BPF_WAITING)
1028 bpf_stop_timer(d);
1029
1030 timed_out = (d->bd_state == BPF_TIMED_OUT);
1031 d->bd_state = BPF_IDLE;
1032
1033 while (d->bd_hbuf_read != 0)
1034 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1035
1036 if ((d->bd_flags & BPF_CLOSING) != 0) {
1037 bpf_release_d(d);
1038 lck_mtx_unlock(bpf_mlock);
1039 return (ENXIO);
1040 }
1041 /*
1042 * If the hold buffer is empty, then do a timed sleep, which
1043 * ends when the timeout expires or when enough packets
1044 * have arrived to fill the store buffer.
1045 */
1046 while (d->bd_hbuf == 0) {
1047 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) &&
1048 d->bd_slen != 0) {
1049 /*
1050 * We're in immediate mode, or are reading
1051 * in non-blocking mode, or a timer was
1052 * started before the read (e.g., by select()
1053 * or poll()) and has expired and a packet(s)
1054 * either arrived since the previous
1055 * read or arrived while we were asleep.
1056 * Rotate the buffers and return what's here.
1057 */
1058 ROTATE_BUFFERS(d);
1059 break;
1060 }
1061
1062 /*
1063 * No data is available, check to see if the bpf device
1064 * is still pointed at a real interface. If not, return
1065 * ENXIO so that the userland process knows to rebind
1066 * it before using it again.
1067 */
1068 if (d->bd_bif == NULL) {
1069 bpf_release_d(d);
1070 lck_mtx_unlock(bpf_mlock);
1071 return (ENXIO);
1072 }
1073 if (ioflag & IO_NDELAY) {
1074 bpf_release_d(d);
1075 lck_mtx_unlock(bpf_mlock);
1076 return (EWOULDBLOCK);
1077 }
1078 error = BPF_SLEEP(d, PRINET|PCATCH, "bpf", d->bd_rtout);
1079 /*
1080 * Make sure device is still opened
1081 */
1082 if ((d->bd_flags & BPF_CLOSING) != 0) {
1083 bpf_release_d(d);
1084 lck_mtx_unlock(bpf_mlock);
1085 return (ENXIO);
1086 }
1087
1088 while (d->bd_hbuf_read != 0)
1089 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1090 NULL);
1091
1092 if ((d->bd_flags & BPF_CLOSING) != 0) {
1093 bpf_release_d(d);
1094 lck_mtx_unlock(bpf_mlock);
1095 return (ENXIO);
1096 }
1097
1098 if (error == EINTR || error == ERESTART) {
1099 if (d->bd_hbuf != NULL) {
1100 /*
1101 * Because we msleep, the hold buffer might
1102 * be filled when we wake up. Avoid rotating
1103 * in this case.
1104 */
1105 break;
1106 }
1107 if (d->bd_slen != 0) {
1108 /*
1109 * Sometimes we may be interrupted often and
1110 * the sleep above will not timeout.
1111 * Regardless, we should rotate the buffers
1112 * if there's any new data pending and
1113 * return it.
1114 */
1115 ROTATE_BUFFERS(d);
1116 break;
1117 }
1118 bpf_release_d(d);
1119 lck_mtx_unlock(bpf_mlock);
1120 if (error == ERESTART) {
1121 printf("%s: %llx ERESTART to EINTR\n",
1122 __func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
1123 error = EINTR;
1124 }
1125 return (error);
1126 }
1127 if (error == EWOULDBLOCK) {
1128 /*
1129 * On a timeout, return what's in the buffer,
1130 * which may be nothing. If there is something
1131 * in the store buffer, we can rotate the buffers.
1132 */
1133 if (d->bd_hbuf)
1134 /*
1135 * We filled up the buffer in between
1136 * getting the timeout and arriving
1137 * here, so we don't need to rotate.
1138 */
1139 break;
1140
1141 if (d->bd_slen == 0) {
1142 bpf_release_d(d);
1143 lck_mtx_unlock(bpf_mlock);
1144 return (0);
1145 }
1146 ROTATE_BUFFERS(d);
1147 break;
1148 }
1149 }
1150 /*
1151 * At this point, we know we have something in the hold slot.
1152 */
1153
1154 /*
1155 * Set the hold buffer read. So we do not
1156 * rotate the buffers until the hold buffer
1157 * read is complete. Also to avoid issues resulting
1158 * from page faults during disk sleep (<rdar://problem/13436396>).
1159 */
1160 d->bd_hbuf_read = 1;
1161 hbuf = d->bd_hbuf;
1162 hbuf_len = d->bd_hlen;
1163 flags = d->bd_flags;
1164 lck_mtx_unlock(bpf_mlock);
1165
1166#ifdef __APPLE__
1167 /*
1168 * Before we move data to userland, we fill out the extended
1169 * header fields.
1170 */
1171 if (flags & BPF_EXTENDED_HDR) {
1172 char *p;
1173
1174 p = hbuf;
1175 while (p < hbuf + hbuf_len) {
1176 struct bpf_hdr_ext *ehp;
1177 uint32_t flowid;
1178 struct so_procinfo soprocinfo;
1179 int found = 0;
1180
1181 ehp = (struct bpf_hdr_ext *)(void *)p;
1182 if ((flowid = ehp->bh_flowid) != 0) {
1183 if (ehp->bh_proto == IPPROTO_TCP)
1184 found = inp_findinpcb_procinfo(&tcbinfo,
1185 flowid, &soprocinfo);
1186 else if (ehp->bh_proto == IPPROTO_UDP)
1187 found = inp_findinpcb_procinfo(&udbinfo,
1188 flowid, &soprocinfo);
1189 if (found == 1) {
1190 ehp->bh_pid = soprocinfo.spi_pid;
1191 proc_name(ehp->bh_pid, ehp->bh_comm,
1192 MAXCOMLEN);
1193 }
1194 ehp->bh_flowid = 0;
1195 }
1196
1197 if (flags & BPF_FINALIZE_PKTAP) {
1198 struct pktap_header *pktaphdr;
1199
1200 pktaphdr = (struct pktap_header *)(void *)
1201 (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1202
1203 bpf_finalize_pktap((struct bpf_hdr *) ehp,
1204 pktaphdr);
1205 }
1206 p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1207 }
1208 } else if (flags & BPF_FINALIZE_PKTAP) {
1209 char *p;
1210
1211 p = hbuf;
1212 while (p < hbuf + hbuf_len) {
1213 struct bpf_hdr *hp;
1214 struct pktap_header *pktaphdr;
1215
1216 hp = (struct bpf_hdr *)(void *)p;
1217 pktaphdr = (struct pktap_header *)(void *)
1218 (p + BPF_WORDALIGN(hp->bh_hdrlen));
1219
1220 bpf_finalize_pktap(hp, pktaphdr);
1221
1222 p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1223 }
1224 }
1225#endif
1226
1227 /*
1228 * Move data from hold buffer into user space.
1229 * We know the entire buffer is transferred since
1230 * we checked above that the read buffer is bpf_bufsize bytes.
1231 */
1232 error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1233
1234 lck_mtx_lock(bpf_mlock);
1235 /*
1236 * Make sure device is still opened
1237 */
1238 if ((d->bd_flags & BPF_CLOSING) != 0) {
1239 bpf_release_d(d);
1240 lck_mtx_unlock(bpf_mlock);
1241 return (ENXIO);
1242 }
1243
1244 d->bd_hbuf_read = 0;
1245 d->bd_fbuf = d->bd_hbuf;
1246 d->bd_hbuf = NULL;
1247 d->bd_hlen = 0;
1248 d->bd_hcnt = 0;
1249 wakeup((caddr_t)d);
1250
1251 bpf_release_d(d);
1252 lck_mtx_unlock(bpf_mlock);
1253 return (error);
1254
1255}
1256
1257/*
1258 * If there are processes sleeping on this descriptor, wake them up.
1259 */
1260static void
1261bpf_wakeup(struct bpf_d *d)
1262{
1263 if (d->bd_state == BPF_WAITING) {
1264 bpf_stop_timer(d);
1265 d->bd_state = BPF_IDLE;
1266 }
1267 wakeup((caddr_t)d);
1268 if (d->bd_async && d->bd_sig && d->bd_sigio)
1269 pgsigio(d->bd_sigio, d->bd_sig);
1270
1271 selwakeup(&d->bd_sel);
1272 if ((d->bd_flags & BPF_KNOTE))
1273 KNOTE(&d->bd_sel.si_note, 1);
1274}
1275
1276static void
1277bpf_timed_out(void *arg, __unused void *dummy)
1278{
1279 struct bpf_d *d = (struct bpf_d *)arg;
1280
1281 lck_mtx_lock(bpf_mlock);
1282 if (d->bd_state == BPF_WAITING) {
1283 /*
1284 * There's a select or kqueue waiting for this; if there's
1285 * now stuff to read, wake it up.
1286 */
1287 d->bd_state = BPF_TIMED_OUT;
1288 if (d->bd_slen != 0)
1289 bpf_wakeup(d);
1290 } else if (d->bd_state == BPF_DRAINING) {
1291 /*
1292 * A close is waiting for this to finish.
1293 * Mark it as finished, and wake the close up.
1294 */
1295 d->bd_state = BPF_IDLE;
1296 bpf_wakeup(d);
1297 }
1298 lck_mtx_unlock(bpf_mlock);
1299}
1300
1301/* keep in sync with bpf_movein above: */
1302#define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
1303
1304int
1305bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1306{
1307 struct bpf_d *d;
1308 struct ifnet *ifp;
1309 struct mbuf *m = NULL;
1310 int error;
1311 char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1312 int datlen = 0;
1313 int bif_dlt;
1314 int bd_hdrcmplt;
1315
1316 lck_mtx_lock(bpf_mlock);
1317
1318 d = bpf_dtab[minor(dev)];
1319 if (d == NULL || d == BPF_DEV_RESERVED ||
1320 (d->bd_flags & BPF_CLOSING) != 0) {
1321 lck_mtx_unlock(bpf_mlock);
1322 return (ENXIO);
1323 }
1324
1325 bpf_acquire_d(d);
1326
1327 if (d->bd_bif == 0) {
1328 bpf_release_d(d);
1329 lck_mtx_unlock(bpf_mlock);
1330 return (ENXIO);
1331 }
1332
1333 ifp = d->bd_bif->bif_ifp;
1334
1335 if ((ifp->if_flags & IFF_UP) == 0) {
1336 bpf_release_d(d);
1337 lck_mtx_unlock(bpf_mlock);
1338 return (ENETDOWN);
1339 }
1340 if (uio_resid(uio) == 0) {
1341 bpf_release_d(d);
1342 lck_mtx_unlock(bpf_mlock);
1343 return (0);
1344 }
1345 ((struct sockaddr *)dst_buf)->sa_len = sizeof(dst_buf);
1346
1347 /*
1348 * fix for PR-6849527
1349 * geting variables onto stack before dropping lock for bpf_movein()
1350 */
1351 bif_dlt = (int)d->bd_bif->bif_dlt;
1352 bd_hdrcmplt = d->bd_hdrcmplt;
1353
1354 /* bpf_movein allocating mbufs; drop lock */
1355 lck_mtx_unlock(bpf_mlock);
1356
1357 error = bpf_movein(uio, bif_dlt, &m,
1358 bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1359 &datlen);
1360
1361 /* take the lock again */
1362 lck_mtx_lock(bpf_mlock);
1363 if (error) {
1364 bpf_release_d(d);
1365 lck_mtx_unlock(bpf_mlock);
1366 return (error);
1367 }
1368
1369 /* verify the device is still open */
1370 if ((d->bd_flags & BPF_CLOSING) != 0) {
1371 bpf_release_d(d);
1372 lck_mtx_unlock(bpf_mlock);
1373 m_freem(m);
1374 return (ENXIO);
1375 }
1376
1377 if (d->bd_bif == NULL) {
1378 bpf_release_d(d);
1379 lck_mtx_unlock(bpf_mlock);
1380 m_free(m);
1381 return (ENXIO);
1382 }
1383
1384 if ((unsigned)datlen > ifp->if_mtu) {
1385 bpf_release_d(d);
1386 lck_mtx_unlock(bpf_mlock);
1387 m_freem(m);
1388 return (EMSGSIZE);
1389 }
1390
1391#if CONFIG_MACF_NET
1392 mac_mbuf_label_associate_bpfdesc(d, m);
1393#endif
1394
1395 bpf_set_packet_service_class(m, d->bd_traffic_class);
1396
1397 lck_mtx_unlock(bpf_mlock);
1398
1399 /*
1400 * The driver frees the mbuf.
1401 */
1402 if (d->bd_hdrcmplt) {
1403 if (d->bd_bif->bif_send)
1404 error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1405 else
1406 error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1407 } else {
1408 error = dlil_output(ifp, PF_INET, m, NULL,
1409 (struct sockaddr *)dst_buf, 0, NULL);
1410 }
1411
1412 lck_mtx_lock(bpf_mlock);
1413 bpf_release_d(d);
1414 lck_mtx_unlock(bpf_mlock);
1415
1416 return (error);
1417}
1418
1419/*
1420 * Reset a descriptor by flushing its packet buffer and clearing the
1421 * receive and drop counts.
1422 */
1423static void
1424reset_d(struct bpf_d *d)
1425{
1426 if (d->bd_hbuf_read != 0)
1427 panic("resetting buffers during read");
1428
1429 if (d->bd_hbuf) {
1430 /* Free the hold buffer. */
1431 d->bd_fbuf = d->bd_hbuf;
1432 d->bd_hbuf = NULL;
1433 }
1434 d->bd_slen = 0;
1435 d->bd_hlen = 0;
1436 d->bd_scnt = 0;
1437 d->bd_hcnt = 0;
1438 d->bd_rcount = 0;
1439 d->bd_dcount = 0;
1440}
1441
1442static struct bpf_d *
1443bpf_get_device_from_uuid(uuid_t uuid)
1444{
1445 unsigned int i;
1446
1447 for (i = 0; i < nbpfilter; i++) {
1448 struct bpf_d *d = bpf_dtab[i];
1449
1450 if (d == NULL || d == BPF_DEV_RESERVED ||
1451 (d->bd_flags & BPF_CLOSING) != 0)
1452 continue;
1453 if (uuid_compare(uuid, d->bd_uuid) == 0)
1454 return (d);
1455 }
1456
1457 return (NULL);
1458}
1459
1460/*
1461 * The BIOCSETUP command "atomically" attach to the interface and
1462 * copy the buffer from another interface. This minimizes the risk
1463 * of missing packet because this is done while holding
1464 * the BPF global lock
1465 */
1466static int
1467bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1468{
1469 struct bpf_d *d_from;
1470 int error = 0;
1471
1472 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1473
1474 /*
1475 * Sanity checks
1476 */
1477 d_from = bpf_get_device_from_uuid(uuid_from);
1478 if (d_from == NULL) {
1479 error = ENOENT;
1480 os_log_info(OS_LOG_DEFAULT,
1481 "%s: uuids not found error %d",
1482 __func__, error);
1483 return (error);
1484 }
1485 if (d_from->bd_opened_by != d_to->bd_opened_by) {
1486 error = EACCES;
1487 os_log_info(OS_LOG_DEFAULT,
1488 "%s: processes not matching error %d",
1489 __func__, error);
1490 return (error);
1491 }
1492
1493 /*
1494 * Prevent any read while copying
1495 */
1496 while (d_to->bd_hbuf_read != 0)
1497 msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
1498 d_to->bd_hbuf_read = 1;
1499
1500 while (d_from->bd_hbuf_read != 0)
1501 msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
1502 d_from->bd_hbuf_read = 1;
1503
1504 /*
1505 * Verify the devices have not been closed
1506 */
1507 if (d_to->bd_flags & BPF_CLOSING) {
1508 error = ENXIO;
1509 os_log_info(OS_LOG_DEFAULT,
1510 "%s: d_to is closing error %d",
1511 __func__, error);
1512 goto done;
1513 }
1514 if (d_from->bd_flags & BPF_CLOSING) {
1515 error = ENXIO;
1516 os_log_info(OS_LOG_DEFAULT,
1517 "%s: d_from is closing error %d",
1518 __func__, error);
1519 goto done;
1520 }
1521
1522 /*
1523 * For now require the same buffer size
1524 */
1525 if (d_from->bd_bufsize != d_to->bd_bufsize) {
1526 error = EINVAL;
1527 os_log_info(OS_LOG_DEFAULT,
1528 "%s: bufsizes not matching error %d",
1529 __func__, error);
1530 goto done;
1531 }
1532
1533 /*
1534 * Attach to the interface
1535 */
1536 error = bpf_setif(d_to, ifp, false, true);
1537 if (error != 0) {
1538 os_log_info(OS_LOG_DEFAULT,
1539 "%s: bpf_setif() failed error %d",
1540 __func__, error);
1541 goto done;
1542 }
1543
1544 /*
1545 * Make sure the buffers are setup as expected by bpf_setif()
1546 */
1547 ASSERT(d_to->bd_hbuf == NULL);
1548 ASSERT(d_to->bd_sbuf != NULL);
1549 ASSERT(d_to->bd_fbuf != NULL);
1550
1551 /*
1552 * Copy the buffers and update the pointers and counts
1553 */
1554 memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
1555 d_to->bd_slen = d_from->bd_slen;
1556 d_to->bd_scnt = d_from->bd_scnt;
1557
1558 if (d_from->bd_hbuf != NULL) {
1559 d_to->bd_hbuf = d_to->bd_fbuf;
1560 d_to->bd_fbuf = NULL;
1561 memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
1562 }
1563 d_to->bd_hlen = d_from->bd_hlen;
1564 d_to->bd_hcnt = d_from->bd_hcnt;
1565
1566 if (bpf_debug > 0) {
1567 os_log_info(OS_LOG_DEFAULT,
1568 "%s: done slen %u scnt %u hlen %u hcnt %u",
1569 __func__, d_to->bd_slen, d_to->bd_scnt,
1570 d_to->bd_hlen, d_to->bd_hcnt);
1571 }
1572done:
1573 d_from->bd_hbuf_read = 0;
1574 wakeup((caddr_t)d_from);
1575
1576 d_to->bd_hbuf_read = 0;
1577 wakeup((caddr_t)d_to);
1578
1579 return (error);
1580}
1581
1582/*
1583 * FIONREAD Check for read packet available.
1584 * SIOCGIFADDR Get interface address - convenient hook to driver.
1585 * BIOCGBLEN Get buffer len [for read()].
1586 * BIOCSETF Set ethernet read filter.
1587 * BIOCFLUSH Flush read packet buffer.
1588 * BIOCPROMISC Put interface into promiscuous mode.
1589 * BIOCGDLT Get link layer type.
1590 * BIOCGETIF Get interface name.
1591 * BIOCSETIF Set interface.
1592 * BIOCSRTIMEOUT Set read timeout.
1593 * BIOCGRTIMEOUT Get read timeout.
1594 * BIOCGSTATS Get packet stats.
1595 * BIOCIMMEDIATE Set immediate mode.
1596 * BIOCVERSION Get filter language version.
1597 * BIOCGHDRCMPLT Get "header already complete" flag
1598 * BIOCSHDRCMPLT Set "header already complete" flag
1599 * BIOCGSEESENT Get "see packets sent" flag
1600 * BIOCSSEESENT Set "see packets sent" flag
1601 * BIOCSETTC Set traffic class.
1602 * BIOCGETTC Get traffic class.
1603 * BIOCSEXTHDR Set "extended header" flag
1604 * BIOCSHEADDROP Drop head of the buffer if user is not reading
1605 * BIOCGHEADDROP Get "head-drop" flag
1606 */
1607/* ARGSUSED */
1608int
1609bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1610 struct proc *p)
1611{
1612 struct bpf_d *d;
1613 int error = 0;
1614 u_int int_arg;
1615 struct ifreq ifr;
1616
1617 lck_mtx_lock(bpf_mlock);
1618
1619 d = bpf_dtab[minor(dev)];
1620 if (d == NULL || d == BPF_DEV_RESERVED ||
1621 (d->bd_flags & BPF_CLOSING) != 0) {
1622 lck_mtx_unlock(bpf_mlock);
1623 return (ENXIO);
1624 }
1625
1626 bpf_acquire_d(d);
1627
1628 if (d->bd_state == BPF_WAITING)
1629 bpf_stop_timer(d);
1630 d->bd_state = BPF_IDLE;
1631
1632 switch (cmd) {
1633
1634 default:
1635 error = EINVAL;
1636 break;
1637
1638 /*
1639 * Check for read packet available.
1640 */
1641 case FIONREAD: /* int */
1642 {
1643 int n;
1644
1645 n = d->bd_slen;
1646 if (d->bd_hbuf && d->bd_hbuf_read == 0)
1647 n += d->bd_hlen;
1648
1649 bcopy(&n, addr, sizeof (n));
1650 break;
1651 }
1652
1653 case SIOCGIFADDR: /* struct ifreq */
1654 {
1655 struct ifnet *ifp;
1656
1657 if (d->bd_bif == 0)
1658 error = EINVAL;
1659 else {
1660 ifp = d->bd_bif->bif_ifp;
1661 error = ifnet_ioctl(ifp, 0, cmd, addr);
1662 }
1663 break;
1664 }
1665
1666 /*
1667 * Get buffer len [for read()].
1668 */
1669 case BIOCGBLEN: /* u_int */
1670 bcopy(&d->bd_bufsize, addr, sizeof (u_int));
1671 break;
1672
1673 /*
1674 * Set buffer length.
1675 */
1676 case BIOCSBLEN: { /* u_int */
1677 u_int size;
1678 unsigned int maxbufsize = bpf_maxbufsize;
1679
1680 /*
1681 * Allow larger buffer in head drop mode to with the
1682 * assumption the reading process may be low priority but
1683 * is interested in the most recent traffic
1684 */
1685 if (d->bd_headdrop != 0) {
1686 maxbufsize = 2 * bpf_maxbufsize;
1687 }
1688
1689 if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
1690 /*
1691 * Interface already attached, unable to change buffers
1692 */
1693 error = EINVAL;
1694 break;
1695 }
1696 bcopy(addr, &size, sizeof (size));
1697
1698 if (size > maxbufsize) {
1699 d->bd_bufsize = maxbufsize;
1700
1701 os_log_info(OS_LOG_DEFAULT,
1702 "%s bufsize capped to %u from %u",
1703 __func__, d->bd_bufsize, size);
1704 } else if (size < BPF_MINBUFSIZE) {
1705 d->bd_bufsize = BPF_MINBUFSIZE;
1706
1707 os_log_info(OS_LOG_DEFAULT,
1708 "%s bufsize bumped to %u from %u",
1709 __func__, d->bd_bufsize, size);
1710 } else {
1711 d->bd_bufsize = size;
1712 }
1713
1714 /* It's a read/write ioctl */
1715 bcopy(&d->bd_bufsize, addr, sizeof (u_int));
1716 break;
1717 }
1718 /*
1719 * Set link layer read filter.
1720 */
1721 case BIOCSETF32:
1722 case BIOCSETFNR32: { /* struct bpf_program32 */
1723 struct bpf_program32 prg32;
1724
1725 bcopy(addr, &prg32, sizeof (prg32));
1726 error = bpf_setf(d, prg32.bf_len,
1727 CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1728 break;
1729 }
1730
1731 case BIOCSETF64:
1732 case BIOCSETFNR64: { /* struct bpf_program64 */
1733 struct bpf_program64 prg64;
1734
1735 bcopy(addr, &prg64, sizeof (prg64));
1736 error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
1737 break;
1738 }
1739
1740 /*
1741 * Flush read packet buffer.
1742 */
1743 case BIOCFLUSH:
1744 while (d->bd_hbuf_read != 0) {
1745 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1746 NULL);
1747 }
1748 if ((d->bd_flags & BPF_CLOSING) != 0) {
1749 error = ENXIO;
1750 break;
1751 }
1752 reset_d(d);
1753 break;
1754
1755 /*
1756 * Put interface into promiscuous mode.
1757 */
1758 case BIOCPROMISC:
1759 if (d->bd_bif == 0) {
1760 /*
1761 * No interface attached yet.
1762 */
1763 error = EINVAL;
1764 break;
1765 }
1766 if (d->bd_promisc == 0) {
1767 lck_mtx_unlock(bpf_mlock);
1768 error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, 1);
1769 lck_mtx_lock(bpf_mlock);
1770 if (error == 0)
1771 d->bd_promisc = 1;
1772 }
1773 break;
1774
1775 /*
1776 * Get device parameters.
1777 */
1778 case BIOCGDLT: /* u_int */
1779 if (d->bd_bif == 0)
1780 error = EINVAL;
1781 else
1782 bcopy(&d->bd_bif->bif_dlt, addr, sizeof (u_int));
1783 break;
1784
1785 /*
1786 * Get a list of supported data link types.
1787 */
1788 case BIOCGDLTLIST: /* struct bpf_dltlist */
1789 if (d->bd_bif == NULL) {
1790 error = EINVAL;
1791 } else {
1792 error = bpf_getdltlist(d, addr, p);
1793 }
1794 break;
1795
1796 /*
1797 * Set data link type.
1798 */
1799 case BIOCSDLT: /* u_int */
1800 if (d->bd_bif == NULL) {
1801 error = EINVAL;
1802 } else {
1803 u_int dlt;
1804
1805 bcopy(addr, &dlt, sizeof (dlt));
1806
1807 if (dlt == DLT_PKTAP &&
1808 !(d->bd_flags & BPF_WANT_PKTAP)) {
1809 dlt = DLT_RAW;
1810 }
1811 error = bpf_setdlt(d, dlt);
1812 }
1813 break;
1814
1815 /*
1816 * Get interface name.
1817 */
1818 case BIOCGETIF: /* struct ifreq */
1819 if (d->bd_bif == 0)
1820 error = EINVAL;
1821 else {
1822 struct ifnet *const ifp = d->bd_bif->bif_ifp;
1823
1824 snprintf(((struct ifreq *)(void *)addr)->ifr_name,
1825 sizeof (ifr.ifr_name), "%s", if_name(ifp));
1826 }
1827 break;
1828
1829 /*
1830 * Set interface.
1831 */
1832 case BIOCSETIF: { /* struct ifreq */
1833 ifnet_t ifp;
1834
1835 bcopy(addr, &ifr, sizeof (ifr));
1836 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
1837 ifp = ifunit(ifr.ifr_name);
1838 if (ifp == NULL)
1839 error = ENXIO;
1840 else
1841 error = bpf_setif(d, ifp, true, false);
1842 break;
1843 }
1844
1845 /*
1846 * Set read timeout.
1847 */
1848 case BIOCSRTIMEOUT32: { /* struct user32_timeval */
1849 struct user32_timeval _tv;
1850 struct timeval tv;
1851
1852 bcopy(addr, &_tv, sizeof (_tv));
1853 tv.tv_sec = _tv.tv_sec;
1854 tv.tv_usec = _tv.tv_usec;
1855
1856 /*
1857 * Subtract 1 tick from tvtohz() since this isn't
1858 * a one-shot timer.
1859 */
1860 if ((error = itimerfix(&tv)) == 0)
1861 d->bd_rtout = tvtohz(&tv) - 1;
1862 break;
1863 }
1864
1865 case BIOCSRTIMEOUT64: { /* struct user64_timeval */
1866 struct user64_timeval _tv;
1867 struct timeval tv;
1868
1869 bcopy(addr, &_tv, sizeof (_tv));
1870 tv.tv_sec = _tv.tv_sec;
1871 tv.tv_usec = _tv.tv_usec;
1872
1873 /*
1874 * Subtract 1 tick from tvtohz() since this isn't
1875 * a one-shot timer.
1876 */
1877 if ((error = itimerfix(&tv)) == 0)
1878 d->bd_rtout = tvtohz(&tv) - 1;
1879 break;
1880 }
1881
1882 /*
1883 * Get read timeout.
1884 */
1885 case BIOCGRTIMEOUT32: { /* struct user32_timeval */
1886 struct user32_timeval tv;
1887
1888 bzero(&tv, sizeof (tv));
1889 tv.tv_sec = d->bd_rtout / hz;
1890 tv.tv_usec = (d->bd_rtout % hz) * tick;
1891 bcopy(&tv, addr, sizeof (tv));
1892 break;
1893 }
1894
1895 case BIOCGRTIMEOUT64: { /* struct user64_timeval */
1896 struct user64_timeval tv;
1897
1898 bzero(&tv, sizeof (tv));
1899 tv.tv_sec = d->bd_rtout / hz;
1900 tv.tv_usec = (d->bd_rtout % hz) * tick;
1901 bcopy(&tv, addr, sizeof (tv));
1902 break;
1903 }
1904
1905 /*
1906 * Get packet stats.
1907 */
1908 case BIOCGSTATS: { /* struct bpf_stat */
1909 struct bpf_stat bs;
1910
1911 bzero(&bs, sizeof (bs));
1912 bs.bs_recv = d->bd_rcount;
1913 bs.bs_drop = d->bd_dcount;
1914 bcopy(&bs, addr, sizeof (bs));
1915 break;
1916 }
1917
1918 /*
1919 * Set immediate mode.
1920 */
1921 case BIOCIMMEDIATE: /* u_int */
1922 d->bd_immediate = *(u_int *)(void *)addr;
1923 break;
1924
1925 case BIOCVERSION: { /* struct bpf_version */
1926 struct bpf_version bv;
1927
1928 bzero(&bv, sizeof (bv));
1929 bv.bv_major = BPF_MAJOR_VERSION;
1930 bv.bv_minor = BPF_MINOR_VERSION;
1931 bcopy(&bv, addr, sizeof (bv));
1932 break;
1933 }
1934
1935 /*
1936 * Get "header already complete" flag
1937 */
1938 case BIOCGHDRCMPLT: /* u_int */
1939 bcopy(&d->bd_hdrcmplt, addr, sizeof (u_int));
1940 break;
1941
1942 /*
1943 * Set "header already complete" flag
1944 */
1945 case BIOCSHDRCMPLT: /* u_int */
1946 bcopy(addr, &int_arg, sizeof (int_arg));
1947 d->bd_hdrcmplt = int_arg ? 1 : 0;
1948 break;
1949
1950 /*
1951 * Get "see sent packets" flag
1952 */
1953 case BIOCGSEESENT: /* u_int */
1954 bcopy(&d->bd_seesent, addr, sizeof (u_int));
1955 break;
1956
1957 /*
1958 * Set "see sent packets" flag
1959 */
1960 case BIOCSSEESENT: /* u_int */
1961 bcopy(addr, &d->bd_seesent, sizeof (u_int));
1962 break;
1963
1964 /*
1965 * Set traffic service class
1966 */
1967 case BIOCSETTC: { /* int */
1968 int tc;
1969
1970 bcopy(addr, &tc, sizeof (int));
1971 error = bpf_set_traffic_class(d, tc);
1972 break;
1973 }
1974
1975 /*
1976 * Get traffic service class
1977 */
1978 case BIOCGETTC: /* int */
1979 bcopy(&d->bd_traffic_class, addr, sizeof (int));
1980 break;
1981
1982 case FIONBIO: /* Non-blocking I/O; int */
1983 break;
1984
1985 case FIOASYNC: /* Send signal on receive packets; int */
1986 bcopy(addr, &d->bd_async, sizeof (int));
1987 break;
1988#ifndef __APPLE__
1989 case FIOSETOWN:
1990 error = fsetown(*(int *)addr, &d->bd_sigio);
1991 break;
1992
1993 case FIOGETOWN:
1994 *(int *)addr = fgetown(d->bd_sigio);
1995 break;
1996
1997 /* This is deprecated, FIOSETOWN should be used instead. */
1998 case TIOCSPGRP:
1999 error = fsetown(-(*(int *)addr), &d->bd_sigio);
2000 break;
2001
2002 /* This is deprecated, FIOGETOWN should be used instead. */
2003 case TIOCGPGRP:
2004 *(int *)addr = -fgetown(d->bd_sigio);
2005 break;
2006#endif
2007 case BIOCSRSIG: { /* Set receive signal; u_int */
2008 u_int sig;
2009
2010 bcopy(addr, &sig, sizeof (u_int));
2011
2012 if (sig >= NSIG)
2013 error = EINVAL;
2014 else
2015 d->bd_sig = sig;
2016 break;
2017 }
2018 case BIOCGRSIG: /* u_int */
2019 bcopy(&d->bd_sig, addr, sizeof (u_int));
2020 break;
2021#ifdef __APPLE__
2022 case BIOCSEXTHDR: /* u_int */
2023 bcopy(addr, &int_arg, sizeof (int_arg));
2024 if (int_arg)
2025 d->bd_flags |= BPF_EXTENDED_HDR;
2026 else
2027 d->bd_flags &= ~BPF_EXTENDED_HDR;
2028 break;
2029
2030 case BIOCGIFATTACHCOUNT: { /* struct ifreq */
2031 ifnet_t ifp;
2032 struct bpf_if *bp;
2033
2034 bcopy(addr, &ifr, sizeof (ifr));
2035 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2036 ifp = ifunit(ifr.ifr_name);
2037 if (ifp == NULL) {
2038 error = ENXIO;
2039 break;
2040 }
2041 ifr.ifr_intval = 0;
2042 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2043 struct bpf_d *bpf_d;
2044
2045 if (bp->bif_ifp == NULL || bp->bif_ifp != ifp)
2046 continue;
2047 for (bpf_d = bp->bif_dlist; bpf_d;
2048 bpf_d = bpf_d->bd_next) {
2049 ifr.ifr_intval += 1;
2050 }
2051 }
2052 bcopy(&ifr, addr, sizeof (ifr));
2053 break;
2054 }
2055 case BIOCGWANTPKTAP: /* u_int */
2056 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
2057 bcopy(&int_arg, addr, sizeof (int_arg));
2058 break;
2059
2060 case BIOCSWANTPKTAP: /* u_int */
2061 bcopy(addr, &int_arg, sizeof (int_arg));
2062 if (int_arg)
2063 d->bd_flags |= BPF_WANT_PKTAP;
2064 else
2065 d->bd_flags &= ~BPF_WANT_PKTAP;
2066 break;
2067#endif
2068
2069 case BIOCSHEADDROP:
2070 bcopy(addr, &int_arg, sizeof (int_arg));
2071 d->bd_headdrop = int_arg ? 1 : 0;
2072 break;
2073
2074 case BIOCGHEADDROP:
2075 bcopy(&d->bd_headdrop, addr, sizeof (int));
2076 break;
2077
2078 case BIOCSTRUNCATE:
2079 bcopy(addr, &int_arg, sizeof(int_arg));
2080 if (int_arg)
2081 d->bd_flags |= BPF_TRUNCATE;
2082 else
2083 d->bd_flags &= ~BPF_TRUNCATE;
2084 break;
2085
2086 case BIOCGETUUID:
2087 bcopy(&d->bd_uuid, addr, sizeof (uuid_t));
2088 break;
2089
2090 case BIOCSETUP: {
2091 struct bpf_setup_args bsa;
2092 ifnet_t ifp;
2093
2094 bcopy(addr, &bsa, sizeof (struct bpf_setup_args));
2095 bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
2096 ifp = ifunit(bsa.bsa_ifname);
2097 if (ifp == NULL) {
2098 error = ENXIO;
2099 os_log_info(OS_LOG_DEFAULT,
2100 "%s: ifnet not found for %s error %d",
2101 __func__, bsa.bsa_ifname, error);
2102 break;
2103 }
2104
2105 error = bpf_setup(d, bsa.bsa_uuid, ifp);
2106 break;
2107 }
2108 case BIOCSPKTHDRV2:
2109 bcopy(addr, &int_arg, sizeof(int_arg));
2110 if (int_arg != 0)
2111 d->bd_flags |= BPF_PKTHDRV2;
2112 else
2113 d->bd_flags &= ~BPF_PKTHDRV2;
2114 break;
2115
2116 case BIOCGPKTHDRV2:
2117 int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
2118 bcopy(&int_arg, addr, sizeof (int));
2119 break;
2120 }
2121
2122 bpf_release_d(d);
2123 lck_mtx_unlock(bpf_mlock);
2124
2125 return (error);
2126}
2127
2128/*
2129 * Set d's packet filter program to fp. If this file already has a filter,
2130 * free it and replace it. Returns EINVAL for bogus requests.
2131 */
2132static int
2133bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2134 u_long cmd)
2135{
2136 struct bpf_insn *fcode, *old;
2137 u_int flen, size;
2138
2139 while (d->bd_hbuf_read != 0)
2140 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2141
2142 if ((d->bd_flags & BPF_CLOSING) != 0)
2143 return (ENXIO);
2144
2145 old = d->bd_filter;
2146 if (bf_insns == USER_ADDR_NULL) {
2147 if (bf_len != 0)
2148 return (EINVAL);
2149 d->bd_filter = NULL;
2150 reset_d(d);
2151 if (old != 0)
2152 FREE((caddr_t)old, M_DEVBUF);
2153 return (0);
2154 }
2155 flen = bf_len;
2156 if (flen > BPF_MAXINSNS)
2157 return (EINVAL);
2158
2159 size = flen * sizeof(struct bpf_insn);
2160 fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
2161#ifdef __APPLE__
2162 if (fcode == NULL)
2163 return (ENOBUFS);
2164#endif
2165 if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
2166 bpf_validate(fcode, (int)flen)) {
2167 d->bd_filter = fcode;
2168
2169 if (cmd == BIOCSETF32 || cmd == BIOCSETF64)
2170 reset_d(d);
2171
2172 if (old != 0)
2173 FREE((caddr_t)old, M_DEVBUF);
2174
2175 return (0);
2176 }
2177 FREE((caddr_t)fcode, M_DEVBUF);
2178 return (EINVAL);
2179}
2180
2181/*
2182 * Detach a file from its current interface (if attached at all) and attach
2183 * to the interface indicated by the name stored in ifr.
2184 * Return an errno or 0.
2185 */
2186static int
2187bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read)
2188{
2189 struct bpf_if *bp;
2190 int error;
2191
2192 while (d->bd_hbuf_read != 0 && !has_hbuf_read)
2193 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2194
2195 if ((d->bd_flags & BPF_CLOSING) != 0)
2196 return (ENXIO);
2197
2198 /*
2199 * Look through attached interfaces for the named one.
2200 */
2201 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2202 struct ifnet *ifp = bp->bif_ifp;
2203
2204 if (ifp == 0 || ifp != theywant)
2205 continue;
2206 /*
2207 * Do not use DLT_PKTAP, unless requested explicitly
2208 */
2209 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP))
2210 continue;
2211 /*
2212 * Skip the coprocessor interface
2213 */
2214 if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp))
2215 continue;
2216 /*
2217 * We found the requested interface.
2218 * Allocate the packet buffers.
2219 */
2220 error = bpf_allocbufs(d);
2221 if (error != 0)
2222 return (error);
2223 /*
2224 * Detach if attached to something else.
2225 */
2226 if (bp != d->bd_bif) {
2227 if (d->bd_bif != NULL) {
2228 if (bpf_detachd(d, 0) != 0)
2229 return (ENXIO);
2230 }
2231 if (bpf_attachd(d, bp) != 0)
2232 return (ENXIO);
2233 }
2234 if (do_reset) {
2235 reset_d(d);
2236 }
2237 return (0);
2238 }
2239 /* Not found. */
2240 return (ENXIO);
2241}
2242
2243/*
2244 * Get a list of available data link type of the interface.
2245 */
2246static int
2247bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2248{
2249 u_int n;
2250 int error;
2251 struct ifnet *ifp;
2252 struct bpf_if *bp;
2253 user_addr_t dlist;
2254 struct bpf_dltlist bfl;
2255
2256 bcopy(addr, &bfl, sizeof (bfl));
2257 if (proc_is64bit(p)) {
2258 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2259 } else {
2260 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2261 }
2262
2263 ifp = d->bd_bif->bif_ifp;
2264 n = 0;
2265 error = 0;
2266
2267 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2268 if (bp->bif_ifp != ifp)
2269 continue;
2270 /*
2271 * Do not use DLT_PKTAP, unless requested explicitly
2272 */
2273 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP))
2274 continue;
2275 if (dlist != USER_ADDR_NULL) {
2276 if (n >= bfl.bfl_len) {
2277 return (ENOMEM);
2278 }
2279 error = copyout(&bp->bif_dlt, dlist,
2280 sizeof (bp->bif_dlt));
2281 if (error != 0)
2282 break;
2283 dlist += sizeof (bp->bif_dlt);
2284 }
2285 n++;
2286 }
2287 bfl.bfl_len = n;
2288 bcopy(&bfl, addr, sizeof (bfl));
2289
2290 return (error);
2291}
2292
2293/*
2294 * Set the data link type of a BPF instance.
2295 */
2296static int
2297bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2298{
2299 int error, opromisc;
2300 struct ifnet *ifp;
2301 struct bpf_if *bp;
2302
2303 if (d->bd_bif->bif_dlt == dlt)
2304 return (0);
2305
2306 while (d->bd_hbuf_read != 0)
2307 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2308
2309 if ((d->bd_flags & BPF_CLOSING) != 0)
2310 return (ENXIO);
2311
2312 ifp = d->bd_bif->bif_ifp;
2313 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2314 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2315 /*
2316 * Do not use DLT_PKTAP, unless requested explicitly
2317 */
2318 if (bp->bif_dlt == DLT_PKTAP &&
2319 !(d->bd_flags & BPF_WANT_PKTAP)) {
2320 continue;
2321 }
2322 break;
2323 }
2324 }
2325 if (bp != NULL) {
2326 opromisc = d->bd_promisc;
2327 if (bpf_detachd(d, 0) != 0)
2328 return (ENXIO);
2329 error = bpf_attachd(d, bp);
2330 if (error) {
2331 printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
2332 ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp),
2333 error);
2334 return (error);
2335 }
2336 reset_d(d);
2337 if (opromisc) {
2338 lck_mtx_unlock(bpf_mlock);
2339 error = ifnet_set_promiscuous(bp->bif_ifp, 1);
2340 lck_mtx_lock(bpf_mlock);
2341 if (error) {
2342 printf("%s: ifpromisc %s%d failed (%d)\n",
2343 __func__, ifnet_name(bp->bif_ifp),
2344 ifnet_unit(bp->bif_ifp), error);
2345 } else {
2346 d->bd_promisc = 1;
2347 }
2348 }
2349 }
2350 return (bp == NULL ? EINVAL : 0);
2351}
2352
2353static int
2354bpf_set_traffic_class(struct bpf_d *d, int tc)
2355{
2356 int error = 0;
2357
2358 if (!SO_VALID_TC(tc))
2359 error = EINVAL;
2360 else
2361 d->bd_traffic_class = tc;
2362
2363 return (error);
2364}
2365
2366static void
2367bpf_set_packet_service_class(struct mbuf *m, int tc)
2368{
2369 if (!(m->m_flags & M_PKTHDR))
2370 return;
2371
2372 VERIFY(SO_VALID_TC(tc));
2373 (void) m_set_service_class(m, so_tc2msc(tc));
2374}
2375
2376/*
2377 * Support for select()
2378 *
2379 * Return true iff the specific operation will not block indefinitely.
2380 * Otherwise, return false but make a note that a selwakeup() must be done.
2381 */
2382int
2383bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2384{
2385 struct bpf_d *d;
2386 int ret = 0;
2387
2388 lck_mtx_lock(bpf_mlock);
2389
2390 d = bpf_dtab[minor(dev)];
2391 if (d == NULL || d == BPF_DEV_RESERVED ||
2392 (d->bd_flags & BPF_CLOSING) != 0) {
2393 lck_mtx_unlock(bpf_mlock);
2394 return (ENXIO);
2395 }
2396
2397 bpf_acquire_d(d);
2398
2399 if (d->bd_bif == NULL) {
2400 bpf_release_d(d);
2401 lck_mtx_unlock(bpf_mlock);
2402 return (ENXIO);
2403 }
2404
2405 while (d->bd_hbuf_read != 0)
2406 msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2407
2408 if ((d->bd_flags & BPF_CLOSING) != 0) {
2409 bpf_release_d(d);
2410 lck_mtx_unlock(bpf_mlock);
2411 return (ENXIO);
2412 }
2413
2414 switch (which) {
2415 case FREAD:
2416 if (d->bd_hlen != 0 ||
2417 ((d->bd_immediate ||
2418 d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0))
2419 ret = 1; /* read has data to return */
2420 else {
2421 /*
2422 * Read has no data to return.
2423 * Make the select wait, and start a timer if
2424 * necessary.
2425 */
2426 selrecord(p, &d->bd_sel, wql);
2427 bpf_start_timer(d);
2428 }
2429 break;
2430
2431 case FWRITE:
2432 /* can't determine whether a write would block */
2433 ret = 1;
2434 break;
2435 }
2436
2437 bpf_release_d(d);
2438 lck_mtx_unlock(bpf_mlock);
2439
2440 return (ret);
2441}
2442
2443/*
2444 * Support for kevent() system call. Register EVFILT_READ filters and
2445 * reject all others.
2446 */
2447int bpfkqfilter(dev_t dev, struct knote *kn);
2448static void filt_bpfdetach(struct knote *);
2449static int filt_bpfread(struct knote *, long);
2450static int filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev);
2451static int filt_bpfprocess(struct knote *kn, struct filt_process_s *data,
2452 struct kevent_internal_s *kev);
2453
2454SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2455 .f_isfd = 1,
2456 .f_detach = filt_bpfdetach,
2457 .f_event = filt_bpfread,
2458 .f_touch = filt_bpftouch,
2459 .f_process = filt_bpfprocess,
2460};
2461
2462static int
2463filt_bpfread_common(struct knote *kn, struct bpf_d *d)
2464{
2465 int ready = 0;
2466
2467 if (d->bd_immediate) {
2468 /*
2469 * If there's data in the hold buffer, it's the
2470 * amount of data a read will return.
2471 *
2472 * If there's no data in the hold buffer, but
2473 * there's data in the store buffer, a read will
2474 * immediately rotate the store buffer to the
2475 * hold buffer, the amount of data in the store
2476 * buffer is the amount of data a read will
2477 * return.
2478 *
2479 * If there's no data in either buffer, we're not
2480 * ready to read.
2481 */
2482 kn->kn_data = (d->bd_hlen == 0 || d->bd_hbuf_read != 0 ?
2483 d->bd_slen : d->bd_hlen);
2484 int64_t lowwat = 1;
2485 if (kn->kn_sfflags & NOTE_LOWAT) {
2486 if (kn->kn_sdata > d->bd_bufsize)
2487 lowwat = d->bd_bufsize;
2488 else if (kn->kn_sdata > lowwat)
2489 lowwat = kn->kn_sdata;
2490 }
2491 ready = (kn->kn_data >= lowwat);
2492 } else {
2493 /*
2494 * If there's data in the hold buffer, it's the
2495 * amount of data a read will return.
2496 *
2497 * If there's no data in the hold buffer, but
2498 * there's data in the store buffer, if the
2499 * timer has expired a read will immediately
2500 * rotate the store buffer to the hold buffer,
2501 * so the amount of data in the store buffer is
2502 * the amount of data a read will return.
2503 *
2504 * If there's no data in either buffer, or there's
2505 * no data in the hold buffer and the timer hasn't
2506 * expired, we're not ready to read.
2507 */
2508 kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read != 0) &&
2509 d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
2510 ready = (kn->kn_data > 0);
2511 }
2512 if (!ready)
2513 bpf_start_timer(d);
2514
2515 return (ready);
2516}
2517
2518int
2519bpfkqfilter(dev_t dev, struct knote *kn)
2520{
2521 struct bpf_d *d;
2522 int res;
2523
2524 /*
2525 * Is this device a bpf?
2526 */
2527 if (major(dev) != CDEV_MAJOR ||
2528 kn->kn_filter != EVFILT_READ) {
2529 kn->kn_flags = EV_ERROR;
2530 kn->kn_data = EINVAL;
2531 return (0);
2532 }
2533
2534 lck_mtx_lock(bpf_mlock);
2535
2536 d = bpf_dtab[minor(dev)];
2537
2538 if (d == NULL || d == BPF_DEV_RESERVED ||
2539 (d->bd_flags & BPF_CLOSING) != 0 ||
2540 d->bd_bif == NULL) {
2541 lck_mtx_unlock(bpf_mlock);
2542 kn->kn_flags = EV_ERROR;
2543 kn->kn_data = ENXIO;
2544 return (0);
2545 }
2546
2547 kn->kn_hook = d;
2548 kn->kn_filtid = EVFILTID_BPFREAD;
2549 KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2550 d->bd_flags |= BPF_KNOTE;
2551
2552 /* capture the current state */
2553 res = filt_bpfread_common(kn, d);
2554
2555 lck_mtx_unlock(bpf_mlock);
2556
2557 return (res);
2558}
2559
2560static void
2561filt_bpfdetach(struct knote *kn)
2562{
2563 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2564
2565 lck_mtx_lock(bpf_mlock);
2566 if (d->bd_flags & BPF_KNOTE) {
2567 KNOTE_DETACH(&d->bd_sel.si_note, kn);
2568 d->bd_flags &= ~BPF_KNOTE;
2569 }
2570 lck_mtx_unlock(bpf_mlock);
2571}
2572
2573static int
2574filt_bpfread(struct knote *kn, long hint)
2575{
2576#pragma unused(hint)
2577 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2578
2579 return (filt_bpfread_common(kn, d));
2580}
2581
2582static int
2583filt_bpftouch(struct knote *kn, struct kevent_internal_s *kev)
2584{
2585 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2586 int res;
2587
2588 lck_mtx_lock(bpf_mlock);
2589
2590 /* save off the lowat threshold and flag */
2591 kn->kn_sdata = kev->data;
2592 kn->kn_sfflags = kev->fflags;
2593
2594 /* output data will be re-generated here */
2595 res = filt_bpfread_common(kn, d);
2596
2597 lck_mtx_unlock(bpf_mlock);
2598
2599 return (res);
2600}
2601
2602static int
2603filt_bpfprocess(struct knote *kn, struct filt_process_s *data,
2604 struct kevent_internal_s *kev)
2605{
2606#pragma unused(data)
2607 struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
2608 int res;
2609
2610 lck_mtx_lock(bpf_mlock);
2611 res = filt_bpfread_common(kn, d);
2612 if (res) {
2613 *kev = kn->kn_kevent;
2614 }
2615 lck_mtx_unlock(bpf_mlock);
2616
2617 return (res);
2618}
2619
2620/*
2621 * Copy data from an mbuf chain into a buffer. This code is derived
2622 * from m_copydata in kern/uipc_mbuf.c.
2623 */
2624static void
2625bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
2626{
2627 u_int count;
2628 u_char *dst;
2629
2630 dst = dst_arg;
2631 while (len > 0) {
2632 if (m == 0)
2633 panic("bpf_mcopy");
2634 count = min(m->m_len, len);
2635 bcopy(mbuf_data(m), dst, count);
2636 m = m->m_next;
2637 dst += count;
2638 len -= count;
2639 }
2640}
2641
2642static inline void
2643bpf_tap_imp(
2644 ifnet_t ifp,
2645 u_int32_t dlt,
2646 struct bpf_packet *bpf_pkt,
2647 int outbound)
2648{
2649 struct bpf_d *d;
2650 u_int slen;
2651 struct bpf_if *bp;
2652
2653 /*
2654 * It's possible that we get here after the bpf descriptor has been
2655 * detached from the interface; in such a case we simply return.
2656 * Lock ordering is important since we can be called asynchronously
2657 * (from IOKit) to process an inbound packet; when that happens
2658 * we would have been holding its "gateLock" and will be acquiring
2659 * "bpf_mlock" upon entering this routine. Due to that, we release
2660 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2661 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
2662 * when a ifnet_set_promiscuous request simultaneously collides with
2663 * an inbound packet being passed into the tap callback.
2664 */
2665 lck_mtx_lock(bpf_mlock);
2666 if (ifp->if_bpf == NULL) {
2667 lck_mtx_unlock(bpf_mlock);
2668 return;
2669 }
2670 for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2671 if (bp->bif_ifp != ifp) {
2672 /* wrong interface */
2673 bp = NULL;
2674 break;
2675 }
2676 if (dlt == 0 || bp->bif_dlt == dlt) {
2677 /* tapping default DLT or DLT matches */
2678 break;
2679 }
2680 }
2681 if (bp == NULL) {
2682 goto done;
2683 }
2684 for (d = bp->bif_dlist; d; d = d->bd_next) {
2685 struct bpf_packet *bpf_pkt_saved = bpf_pkt;
2686 struct bpf_packet bpf_pkt_tmp;
2687 struct pktap_header_buffer bpfp_header_tmp;
2688
2689 if (outbound && !d->bd_seesent)
2690 continue;
2691
2692 ++d->bd_rcount;
2693 slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
2694 bpf_pkt->bpfp_total_length, 0);
2695 if (bp->bif_ifp->if_type == IFT_PKTAP &&
2696 bp->bif_dlt == DLT_PKTAP) {
2697 /*
2698 * Need to copy the bpf_pkt because the conversion
2699 * to v2 pktap header modifies the content of the
2700 * bpfp_header
2701 */
2702 if ((d->bd_flags & BPF_PKTHDRV2) &&
2703 bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
2704 bpf_pkt_tmp = *bpf_pkt;
2705
2706 bpf_pkt = &bpf_pkt_tmp;
2707
2708 memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
2709 bpf_pkt->bpfp_header_length);
2710
2711 bpf_pkt->bpfp_header = &bpfp_header_tmp;
2712
2713 convert_to_pktap_header_to_v2(bpf_pkt,
2714 !!(d->bd_flags & BPF_TRUNCATE));
2715 }
2716
2717 if (d->bd_flags & BPF_TRUNCATE) {
2718 slen = min(slen,
2719 get_pkt_trunc_len((u_char *)bpf_pkt,
2720 bpf_pkt->bpfp_total_length));
2721 }
2722 }
2723 if (slen != 0) {
2724#if CONFIG_MACF_NET
2725 if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != 0)
2726 continue;
2727#endif
2728 catchpacket(d, bpf_pkt, slen, outbound);
2729 }
2730 bpf_pkt = bpf_pkt_saved;
2731 }
2732
2733done:
2734 lck_mtx_unlock(bpf_mlock);
2735}
2736
2737static inline void
2738bpf_tap_mbuf(
2739 ifnet_t ifp,
2740 u_int32_t dlt,
2741 mbuf_t m,
2742 void* hdr,
2743 size_t hlen,
2744 int outbound)
2745{
2746 struct bpf_packet bpf_pkt;
2747 struct mbuf *m0;
2748
2749 if (ifp->if_bpf == NULL) {
2750 /* quickly check without taking lock */
2751 return;
2752 }
2753 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2754 bpf_pkt.bpfp_mbuf = m;
2755 bpf_pkt.bpfp_total_length = 0;
2756 for (m0 = m; m0 != NULL; m0 = m0->m_next)
2757 bpf_pkt.bpfp_total_length += m0->m_len;
2758 bpf_pkt.bpfp_header = hdr;
2759 if (hdr != NULL) {
2760 bpf_pkt.bpfp_total_length += hlen;
2761 bpf_pkt.bpfp_header_length = hlen;
2762 } else {
2763 bpf_pkt.bpfp_header_length = 0;
2764 }
2765 bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2766}
2767
2768void
2769bpf_tap_out(
2770 ifnet_t ifp,
2771 u_int32_t dlt,
2772 mbuf_t m,
2773 void* hdr,
2774 size_t hlen)
2775{
2776 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 1);
2777}
2778
2779void
2780bpf_tap_in(
2781 ifnet_t ifp,
2782 u_int32_t dlt,
2783 mbuf_t m,
2784 void* hdr,
2785 size_t hlen)
2786{
2787 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, 0);
2788}
2789
2790/* Callback registered with Ethernet driver. */
2791static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
2792{
2793 bpf_tap_mbuf(ifp, 0, m, NULL, 0, mbuf_pkthdr_rcvif(m) == NULL);
2794
2795 return (0);
2796}
2797
2798
2799static errno_t
2800bpf_copydata(struct bpf_packet *pkt, size_t off, size_t len, void* out_data)
2801{
2802 errno_t err = 0;
2803 if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
2804 err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
2805 } else {
2806 err = EINVAL;
2807 }
2808
2809 return (err);
2810}
2811
2812static void
2813copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
2814{
2815 /* copy the optional header */
2816 if (pkt->bpfp_header_length != 0) {
2817 size_t count = min(len, pkt->bpfp_header_length);
2818 bcopy(pkt->bpfp_header, dst, count);
2819 len -= count;
2820 dst += count;
2821 }
2822 if (len == 0) {
2823 /* nothing past the header */
2824 return;
2825 }
2826 /* copy the packet */
2827 switch (pkt->bpfp_type) {
2828 case BPF_PACKET_TYPE_MBUF:
2829 bpf_mcopy(pkt->bpfp_mbuf, dst, len);
2830 break;
2831 default:
2832 break;
2833 }
2834}
2835
2836static uint16_t
2837get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
2838 const uint16_t remaining_caplen)
2839{
2840 /*
2841 * For some reason tcpdump expects to have one byte beyond the ESP header
2842 */
2843 uint16_t trunc_len = ESP_HDR_SIZE + 1;
2844
2845 if (trunc_len > remaining_caplen)
2846 return (remaining_caplen);
2847
2848 return (trunc_len);
2849}
2850
2851static uint16_t
2852get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
2853 const uint16_t remaining_caplen)
2854{
2855 /*
2856 * Include the payload generic header
2857 */
2858 uint16_t trunc_len = ISAKMP_HDR_SIZE;
2859
2860 if (trunc_len > remaining_caplen)
2861 return (remaining_caplen);
2862
2863 return (trunc_len);
2864}
2865
2866static uint16_t
2867get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint16_t off,
2868 const uint16_t remaining_caplen)
2869{
2870 int err = 0;
2871 uint16_t trunc_len = 0;
2872 char payload[remaining_caplen];
2873
2874 err = bpf_copydata(pkt, off, remaining_caplen, payload);
2875 if (err != 0)
2876 return (remaining_caplen);
2877 /*
2878 * They are three cases:
2879 * - IKE: payload start with 4 bytes header set to zero before ISAKMP header
2880 * - keep alive: 1 byte payload
2881 * - otherwise it's ESP
2882 */
2883 if (remaining_caplen >= 4 &&
2884 payload[0] == 0 && payload[1] == 0 &&
2885 payload[2] == 0 && payload[3] == 0) {
2886 trunc_len = 4 + get_isakmp_trunc_len(pkt, off + 4, remaining_caplen - 4);
2887 } else if (remaining_caplen == 1) {
2888 trunc_len = 1;
2889 } else {
2890 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
2891 }
2892
2893 if (trunc_len > remaining_caplen)
2894 return (remaining_caplen);
2895
2896 return (trunc_len);
2897
2898}
2899
2900static uint16_t
2901get_udp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
2902{
2903 int err = 0;
2904 uint16_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
2905
2906 if (trunc_len >= remaining_caplen)
2907 return (remaining_caplen);
2908
2909 struct udphdr udphdr;
2910 err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
2911 if (err != 0)
2912 return (remaining_caplen);
2913
2914 u_short sport, dport;
2915
2916 sport = EXTRACT_SHORT(&udphdr.uh_sport);
2917 dport = EXTRACT_SHORT(&udphdr.uh_dport);
2918
2919 if (dport == PORT_DNS || sport == PORT_DNS) {
2920 /*
2921 * Full UDP payload for DNS
2922 */
2923 trunc_len = remaining_caplen;
2924 } else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) ||
2925 (sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
2926 /*
2927 * Full UDP payload for BOOTP and DHCP
2928 */
2929 trunc_len = remaining_caplen;
2930 } else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
2931 /*
2932 * Return the ISAKMP header
2933 */
2934 trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
2935 remaining_caplen - sizeof(struct udphdr));
2936 } else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
2937 trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
2938 remaining_caplen - sizeof(struct udphdr));
2939 }
2940 if (trunc_len >= remaining_caplen)
2941 return (remaining_caplen);
2942
2943 return (trunc_len);
2944}
2945
2946static uint16_t
2947get_tcp_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
2948{
2949 int err = 0;
2950 uint16_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
2951 if (trunc_len >= remaining_caplen)
2952 return (remaining_caplen);
2953
2954 struct tcphdr tcphdr;
2955 err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
2956 if (err != 0)
2957 return (remaining_caplen);
2958
2959 u_short sport, dport;
2960 sport = EXTRACT_SHORT(&tcphdr.th_sport);
2961 dport = EXTRACT_SHORT(&tcphdr.th_dport);
2962
2963 if (dport == PORT_DNS || sport == PORT_DNS) {
2964 /*
2965 * Full TCP payload for DNS
2966 */
2967 trunc_len = remaining_caplen;
2968 } else {
2969 trunc_len = tcphdr.th_off << 2;
2970 }
2971 if (trunc_len >= remaining_caplen)
2972 return (remaining_caplen);
2973
2974 return (trunc_len);
2975}
2976
2977static uint16_t
2978get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
2979{
2980 uint16_t trunc_len;
2981
2982 switch (proto) {
2983 case IPPROTO_ICMP: {
2984 /*
2985 * Full IMCP payload
2986 */
2987 trunc_len = remaining_caplen;
2988 break;
2989 }
2990 case IPPROTO_ICMPV6: {
2991 /*
2992 * Full IMCPV6 payload
2993 */
2994 trunc_len = remaining_caplen;
2995 break;
2996 }
2997 case IPPROTO_IGMP: {
2998 /*
2999 * Full IGMP payload
3000 */
3001 trunc_len = remaining_caplen;
3002 break;
3003 }
3004 case IPPROTO_UDP: {
3005 trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3006 break;
3007 }
3008 case IPPROTO_TCP: {
3009 trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3010 break;
3011 }
3012 case IPPROTO_ESP: {
3013 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3014 break;
3015 }
3016 default: {
3017 /*
3018 * By default we only include the IP header
3019 */
3020 trunc_len = 0;
3021 break;
3022 }
3023 }
3024 if (trunc_len >= remaining_caplen)
3025 return (remaining_caplen);
3026
3027 return (trunc_len);
3028}
3029
3030static uint16_t
3031get_ip_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3032{
3033 int err = 0;
3034 uint16_t iplen = sizeof(struct ip);
3035 if (iplen >= remaining_caplen)
3036 return (remaining_caplen);
3037
3038 struct ip iphdr;
3039 err = bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
3040 if (err != 0)
3041 return (remaining_caplen);
3042
3043 uint8_t proto = 0;
3044
3045 iplen = iphdr.ip_hl << 2;
3046 if (iplen >= remaining_caplen)
3047 return (remaining_caplen);
3048
3049 proto = iphdr.ip_p;
3050 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3051
3052 if (iplen >= remaining_caplen)
3053 return (remaining_caplen);
3054
3055 return (iplen);
3056}
3057
3058static uint16_t
3059get_ip6_trunc_len(struct bpf_packet *pkt, uint16_t off, const uint16_t remaining_caplen)
3060{
3061 int err = 0;
3062 uint16_t iplen = sizeof(struct ip6_hdr);
3063 if (iplen >= remaining_caplen)
3064 return (remaining_caplen);
3065
3066 struct ip6_hdr ip6hdr;
3067 err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
3068 if (err != 0)
3069 return (remaining_caplen);
3070
3071 uint8_t proto = 0;
3072
3073 /*
3074 * TBD: process the extension headers
3075 */
3076 proto = ip6hdr.ip6_nxt;
3077 iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3078
3079 if (iplen >= remaining_caplen)
3080 return (remaining_caplen);
3081
3082 return (iplen);
3083}
3084
3085static uint16_t
3086get_ether_trunc_len(struct bpf_packet *pkt, int off, const uint16_t remaining_caplen)
3087{
3088 int err = 0;
3089 uint16_t ethlen = sizeof(struct ether_header);
3090 if (ethlen >= remaining_caplen)
3091 return (remaining_caplen);
3092
3093 struct ether_header eh;
3094 u_short type;
3095 err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
3096 if (err != 0)
3097 return (remaining_caplen);
3098
3099 type = EXTRACT_SHORT(&eh.ether_type);
3100 /* Include full ARP */
3101 if (type == ETHERTYPE_ARP) {
3102 ethlen = remaining_caplen;
3103 } else if (type != ETHERTYPE_IP && type != ETHERTYPE_IPV6) {
3104 ethlen = min(BPF_MIN_PKT_SIZE, remaining_caplen);
3105 } else {
3106 if (type == ETHERTYPE_IP) {
3107 ethlen += get_ip_trunc_len(pkt, sizeof(struct ether_header),
3108 remaining_caplen);
3109 } else if (type == ETHERTYPE_IPV6) {
3110 ethlen += get_ip6_trunc_len(pkt, sizeof(struct ether_header),
3111 remaining_caplen);
3112 }
3113 }
3114 return (ethlen);
3115}
3116
3117static uint32_t
3118get_pkt_trunc_len(u_char *p, u_int len)
3119{
3120 struct bpf_packet *pkt = (struct bpf_packet *)(void *) p;
3121 struct pktap_header *pktap = (struct pktap_header *) (pkt->bpfp_header);
3122 uint32_t out_pkt_len = 0, tlen = 0;
3123 /*
3124 * pktap->pth_frame_pre_length is L2 header length and accounts
3125 * for both pre and pre_adjust.
3126 * pktap->pth_length is sizeof(pktap_header) (excl the pre/pre_adjust)
3127 * pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3128 * pre is the offset to the L3 header after the bpfp_header, or length
3129 * of L2 header after bpfp_header, if present.
3130 */
3131 uint32_t pre = pktap->pth_frame_pre_length -
3132 (pkt->bpfp_header_length - pktap->pth_length);
3133
3134 /* Length of the input packet starting from L3 header */
3135 uint32_t in_pkt_len = len - pkt->bpfp_header_length - pre;
3136 if (pktap->pth_protocol_family == AF_INET ||
3137 pktap->pth_protocol_family == AF_INET6) {
3138 /* Contains L2 header */
3139 if (pre > 0) {
3140 if (pre < sizeof(struct ether_header))
3141 goto too_short;
3142
3143 out_pkt_len = get_ether_trunc_len(pkt, 0, in_pkt_len);
3144 } else if (pre == 0) {
3145 if (pktap->pth_protocol_family == AF_INET) {
3146 out_pkt_len = get_ip_trunc_len(pkt, pre, in_pkt_len);
3147 } else if (pktap->pth_protocol_family == AF_INET6) {
3148 out_pkt_len = get_ip6_trunc_len(pkt, pre, in_pkt_len);
3149 }
3150 } else {
3151 /* Ideally pre should be >= 0. This is an exception */
3152 out_pkt_len = min(BPF_MIN_PKT_SIZE, in_pkt_len);
3153 }
3154 } else {
3155 if (pktap->pth_iftype == IFT_ETHER) {
3156 if (in_pkt_len < sizeof(struct ether_header)) {
3157 goto too_short;
3158 }
3159 /* At most include the Ethernet header and 16 bytes */
3160 out_pkt_len = MIN(sizeof(struct ether_header) + 16,
3161 in_pkt_len);
3162 } else {
3163 /*
3164 * For unknown protocols include at most 16 bytes
3165 */
3166 out_pkt_len = MIN(16, in_pkt_len);
3167 }
3168 }
3169done:
3170 tlen = pkt->bpfp_header_length + out_pkt_len + pre;
3171 return (tlen);
3172too_short:
3173 out_pkt_len = in_pkt_len;
3174 goto done;
3175}
3176
3177/*
3178 * Move the packet data from interface memory (pkt) into the
3179 * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
3180 * otherwise 0.
3181 */
3182static void
3183catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
3184 u_int snaplen, int outbound)
3185{
3186 struct bpf_hdr *hp;
3187 struct bpf_hdr_ext *ehp;
3188 int totlen, curlen;
3189 int hdrlen, caplen;
3190 int do_wakeup = 0;
3191 u_char *payload;
3192 struct timeval tv;
3193
3194 hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
3195 d->bd_bif->bif_hdrlen;
3196 /*
3197 * Figure out how many bytes to move. If the packet is
3198 * greater or equal to the snapshot length, transfer that
3199 * much. Otherwise, transfer the whole packet (unless
3200 * we hit the buffer size limit).
3201 */
3202 totlen = hdrlen + min(snaplen, pkt->bpfp_total_length);
3203 if (totlen > d->bd_bufsize)
3204 totlen = d->bd_bufsize;
3205
3206 if (hdrlen > totlen)
3207 return;
3208
3209 /*
3210 * Round up the end of the previous packet to the next longword.
3211 */
3212 curlen = BPF_WORDALIGN(d->bd_slen);
3213 if (curlen + totlen > d->bd_bufsize) {
3214 /*
3215 * This packet will overflow the storage buffer.
3216 * Rotate the buffers if we can, then wakeup any
3217 * pending reads.
3218 *
3219 * We cannot rotate buffers if a read is in progress
3220 * so drop the packet
3221 */
3222 if (d->bd_hbuf_read != 0) {
3223 ++d->bd_dcount;
3224 return;
3225 }
3226
3227 if (d->bd_fbuf == NULL) {
3228 if (d->bd_headdrop == 0) {
3229 /*
3230 * We haven't completed the previous read yet,
3231 * so drop the packet.
3232 */
3233 ++d->bd_dcount;
3234 return;
3235 }
3236 /*
3237 * Drop the hold buffer as it contains older packets
3238 */
3239 d->bd_dcount += d->bd_hcnt;
3240 d->bd_fbuf = d->bd_hbuf;
3241 ROTATE_BUFFERS(d);
3242 } else {
3243 ROTATE_BUFFERS(d);
3244 }
3245 do_wakeup = 1;
3246 curlen = 0;
3247 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
3248 /*
3249 * Immediate mode is set, or the read timeout has
3250 * already expired during a select call. A packet
3251 * arrived, so the reader should be woken up.
3252 */
3253 do_wakeup = 1;
3254
3255 /*
3256 * Append the bpf header.
3257 */
3258 microtime(&tv);
3259 if (d->bd_flags & BPF_EXTENDED_HDR) {
3260 struct mbuf *m;
3261
3262 m = (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF)
3263 ? pkt->bpfp_mbuf : NULL;
3264 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
3265 memset(ehp, 0, sizeof(*ehp));
3266 ehp->bh_tstamp.tv_sec = tv.tv_sec;
3267 ehp->bh_tstamp.tv_usec = tv.tv_usec;
3268
3269 ehp->bh_datalen = pkt->bpfp_total_length;
3270 ehp->bh_hdrlen = hdrlen;
3271 caplen = ehp->bh_caplen = totlen - hdrlen;
3272 if (m == NULL) {
3273 if (outbound) {
3274 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3275 } else {
3276 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
3277 }
3278 } else if (outbound) {
3279 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
3280
3281 /* only do lookups on non-raw INPCB */
3282 if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID|
3283 PKTF_FLOW_LOCALSRC|PKTF_FLOW_RAWSOCK)) ==
3284 (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC) &&
3285 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3286 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
3287 ehp->bh_proto = m->m_pkthdr.pkt_proto;
3288 }
3289 ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
3290 if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT)
3291 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
3292 if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ)
3293 ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
3294 if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT)
3295 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
3296 if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
3297 ehp->bh_unsent_bytes =
3298 m->m_pkthdr.bufstatus_if;
3299 ehp->bh_unsent_snd =
3300 m->m_pkthdr.bufstatus_sndbuf;
3301 }
3302 } else
3303 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
3304 payload = (u_char *)ehp + hdrlen;
3305 } else {
3306 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
3307 hp->bh_tstamp.tv_sec = tv.tv_sec;
3308 hp->bh_tstamp.tv_usec = tv.tv_usec;
3309 hp->bh_datalen = pkt->bpfp_total_length;
3310 hp->bh_hdrlen = hdrlen;
3311 caplen = hp->bh_caplen = totlen - hdrlen;
3312 payload = (u_char *)hp + hdrlen;
3313 }
3314 /*
3315 * Copy the packet data into the store buffer and update its length.
3316 */
3317 copy_bpf_packet(pkt, payload, caplen);
3318 d->bd_slen = curlen + totlen;
3319 d->bd_scnt += 1;
3320
3321 if (do_wakeup)
3322 bpf_wakeup(d);
3323}
3324
3325/*
3326 * Initialize all nonzero fields of a descriptor.
3327 */
3328static int
3329bpf_allocbufs(struct bpf_d *d)
3330{
3331 if (d->bd_sbuf != NULL) {
3332 FREE(d->bd_sbuf, M_DEVBUF);
3333 d->bd_sbuf = NULL;
3334 }
3335 if (d->bd_hbuf != NULL) {
3336 FREE(d->bd_hbuf, M_DEVBUF);
3337 d->bd_hbuf = NULL;
3338 }
3339 if (d->bd_fbuf != NULL) {
3340 FREE(d->bd_fbuf, M_DEVBUF);
3341 d->bd_fbuf = NULL;
3342 }
3343
3344 d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
3345 if (d->bd_fbuf == NULL)
3346 return (ENOBUFS);
3347
3348 d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
3349 if (d->bd_sbuf == NULL) {
3350 FREE(d->bd_fbuf, M_DEVBUF);
3351 d->bd_fbuf = NULL;
3352 return (ENOBUFS);
3353 }
3354 d->bd_slen = 0;
3355 d->bd_hlen = 0;
3356 d->bd_scnt = 0;
3357 d->bd_hcnt = 0;
3358 return (0);
3359}
3360
3361/*
3362 * Free buffers currently in use by a descriptor.
3363 * Called on close.
3364 */
3365static void
3366bpf_freed(struct bpf_d *d)
3367{
3368 /*
3369 * We don't need to lock out interrupts since this descriptor has
3370 * been detached from its interface and it yet hasn't been marked
3371 * free.
3372 */
3373 if (d->bd_hbuf_read != 0)
3374 panic("bpf buffer freed during read");
3375
3376 if (d->bd_sbuf != 0) {
3377 FREE(d->bd_sbuf, M_DEVBUF);
3378 if (d->bd_hbuf != 0)
3379 FREE(d->bd_hbuf, M_DEVBUF);
3380 if (d->bd_fbuf != 0)
3381 FREE(d->bd_fbuf, M_DEVBUF);
3382 }
3383 if (d->bd_filter)
3384 FREE((caddr_t)d->bd_filter, M_DEVBUF);
3385}
3386
3387/*
3388 * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
3389 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
3390 * size of the link header (variable length headers not yet supported).
3391 */
3392void
3393bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
3394{
3395 bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
3396}
3397
3398errno_t
3399bpf_attach(
3400 ifnet_t ifp,
3401 u_int32_t dlt,
3402 u_int32_t hdrlen,
3403 bpf_send_func send,
3404 bpf_tap_func tap)
3405{
3406 struct bpf_if *bp;
3407 struct bpf_if *bp_new;
3408 struct bpf_if *bp_before_first = NULL;
3409 struct bpf_if *bp_first = NULL;
3410 struct bpf_if *bp_last = NULL;
3411 boolean_t found;
3412
3413 bp_new = (struct bpf_if *) _MALLOC(sizeof(*bp_new), M_DEVBUF,
3414 M_WAIT | M_ZERO);
3415 if (bp_new == 0)
3416 panic("bpfattach");
3417
3418 lck_mtx_lock(bpf_mlock);
3419
3420 /*
3421 * Check if this interface/dlt is already attached. Remember the
3422 * first and last attachment for this interface, as well as the
3423 * element before the first attachment.
3424 */
3425 found = FALSE;
3426 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
3427 if (bp->bif_ifp != ifp) {
3428 if (bp_first != NULL) {
3429 /* no more elements for this interface */
3430 break;
3431 }
3432 bp_before_first = bp;
3433 } else {
3434 if (bp->bif_dlt == dlt) {
3435 found = TRUE;
3436 break;
3437 }
3438 if (bp_first == NULL) {
3439 bp_first = bp;
3440 }
3441 bp_last = bp;
3442 }
3443 }
3444 if (found) {
3445 lck_mtx_unlock(bpf_mlock);
3446 printf("bpfattach - %s with dlt %d is already attached\n",
3447 if_name(ifp), dlt);
3448 FREE(bp_new, M_DEVBUF);
3449 return (EEXIST);
3450 }
3451
3452 bp_new->bif_ifp = ifp;
3453 bp_new->bif_dlt = dlt;
3454 bp_new->bif_send = send;
3455 bp_new->bif_tap = tap;
3456
3457 if (bp_first == NULL) {
3458 /* No other entries for this ifp */
3459 bp_new->bif_next = bpf_iflist;
3460 bpf_iflist = bp_new;
3461 } else {
3462 if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
3463 /* Make this the first entry for this interface */
3464 if (bp_before_first != NULL) {
3465 /* point the previous to us */
3466 bp_before_first->bif_next = bp_new;
3467 } else {
3468 /* we're the new head */
3469 bpf_iflist = bp_new;
3470 }
3471 bp_new->bif_next = bp_first;
3472 } else {
3473 /* Add this after the last entry for this interface */
3474 bp_new->bif_next = bp_last->bif_next;
3475 bp_last->bif_next = bp_new;
3476 }
3477 }
3478
3479 /*
3480 * Compute the length of the bpf header. This is not necessarily
3481 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
3482 * that the network layer header begins on a longword boundary (for
3483 * performance reasons and to alleviate alignment restrictions).
3484 */
3485 bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
3486 bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
3487 sizeof(struct bpf_hdr_ext)) - hdrlen;
3488
3489 /* Take a reference on the interface */
3490 ifnet_reference(ifp);
3491
3492 lck_mtx_unlock(bpf_mlock);
3493
3494#ifndef __APPLE__
3495 if (bootverbose)
3496 printf("bpf: %s attached\n", if_name(ifp));
3497#endif
3498
3499 return (0);
3500}
3501
3502/*
3503 * Detach bpf from an interface. This involves detaching each descriptor
3504 * associated with the interface, and leaving bd_bif NULL. Notify each
3505 * descriptor as it's detached so that any sleepers wake up and get
3506 * ENXIO.
3507 */
3508void
3509bpfdetach(struct ifnet *ifp)
3510{
3511 struct bpf_if *bp, *bp_prev, *bp_next;
3512 struct bpf_d *d;
3513
3514 if (bpf_debug != 0)
3515 printf("%s: %s\n", __func__, if_name(ifp));
3516
3517 lck_mtx_lock(bpf_mlock);
3518
3519 /*
3520 * Build the list of devices attached to that interface
3521 * that we need to free while keeping the lock to maintain
3522 * the integrity of the interface list
3523 */
3524 bp_prev = NULL;
3525 for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
3526 bp_next = bp->bif_next;
3527
3528 if (ifp != bp->bif_ifp) {
3529 bp_prev = bp;
3530 continue;
3531 }
3532 /* Unlink from the interface list */
3533 if (bp_prev)
3534 bp_prev->bif_next = bp->bif_next;
3535 else
3536 bpf_iflist = bp->bif_next;
3537
3538 /* Detach the devices attached to the interface */
3539 while ((d = bp->bif_dlist) != NULL) {
3540 /*
3541 * Take an extra reference to prevent the device
3542 * from being freed when bpf_detachd() releases
3543 * the reference for the interface list
3544 */
3545 bpf_acquire_d(d);
3546 bpf_detachd(d, 0);
3547 bpf_wakeup(d);
3548 bpf_release_d(d);
3549 }
3550 ifnet_release(ifp);
3551 }
3552
3553 lck_mtx_unlock(bpf_mlock);
3554}
3555
3556void
3557bpf_init(__unused void *unused)
3558{
3559#ifdef __APPLE__
3560 int i;
3561 int maj;
3562
3563 if (bpf_devsw_installed == 0) {
3564 bpf_devsw_installed = 1;
3565 bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
3566 bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
3567 bpf_mlock_attr = lck_attr_alloc_init();
3568 lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
3569 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
3570 if (maj == -1) {
3571 if (bpf_mlock_attr)
3572 lck_attr_free(bpf_mlock_attr);
3573 if (bpf_mlock_grp)
3574 lck_grp_free(bpf_mlock_grp);
3575 if (bpf_mlock_grp_attr)
3576 lck_grp_attr_free(bpf_mlock_grp_attr);
3577
3578 bpf_mlock = NULL;
3579 bpf_mlock_attr = NULL;
3580 bpf_mlock_grp = NULL;
3581 bpf_mlock_grp_attr = NULL;
3582 bpf_devsw_installed = 0;
3583 printf("bpf_init: failed to allocate a major number\n");
3584 return;
3585 }
3586
3587 for (i = 0; i < NBPFILTER; i++)
3588 bpf_make_dev_t(maj);
3589 }
3590#else
3591 cdevsw_add(&bpf_cdevsw);
3592#endif
3593}
3594
3595#ifndef __APPLE__
3596SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE+CDEV_MAJOR, bpf_drvinit, NULL)
3597#endif
3598
3599#if CONFIG_MACF_NET
3600struct label *
3601mac_bpfdesc_label_get(struct bpf_d *d)
3602{
3603
3604 return (d->bd_label);
3605}
3606
3607void
3608mac_bpfdesc_label_set(struct bpf_d *d, struct label *label)
3609{
3610
3611 d->bd_label = label;
3612}
3613#endif
3614