1/*
2 * Copyright (c) 2000-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1990, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from the Stanford/CMU enet packet filter,
33 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
34 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35 * Berkeley Laboratory.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)bpf.c 8.2 (Berkeley) 3/28/94
66 *
67 * $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68 */
69/*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76#include "bpf.h"
77
78#ifndef __GNUC__
79#define inline
80#else
81#define inline __inline
82#endif
83
84#include <sys/param.h>
85#include <sys/systm.h>
86#include <sys/conf.h>
87#include <sys/malloc.h>
88#include <sys/mbuf.h>
89#include <sys/time.h>
90#include <sys/proc.h>
91#include <sys/signalvar.h>
92#include <sys/filio.h>
93#include <sys/sockio.h>
94#include <sys/ttycom.h>
95#include <sys/filedesc.h>
96#include <sys/uio_internal.h>
97#include <sys/file_internal.h>
98#include <sys/event.h>
99
100#include <sys/poll.h>
101
102#include <sys/socket.h>
103#include <sys/socketvar.h>
104#include <sys/vnode.h>
105
106#include <net/if.h>
107#include <net/bpf.h>
108#include <net/bpfdesc.h>
109
110#include <netinet/in.h>
111#include <netinet/ip.h>
112#include <netinet/ip6.h>
113#include <netinet/in_pcb.h>
114#include <netinet/in_var.h>
115#include <netinet/ip_var.h>
116#include <netinet/tcp.h>
117#include <netinet/tcp_var.h>
118#include <netinet/udp.h>
119#include <netinet/udp_var.h>
120#include <netinet/if_ether.h>
121#include <netinet/isakmp.h>
122#include <netinet6/esp.h>
123#include <sys/kernel.h>
124#include <sys/sysctl.h>
125#include <net/firewire.h>
126
127#include <miscfs/devfs/devfs.h>
128#include <net/dlil.h>
129#include <net/pktap.h>
130
131#include <net/sockaddr_utils.h>
132
133#include <kern/assert.h>
134#include <kern/locks.h>
135#include <kern/thread_call.h>
136#include <libkern/section_keywords.h>
137
138#include <os/log.h>
139
140#include <IOKit/IOBSD.h>
141
142
143extern int tvtohz(struct timeval *);
144extern char *proc_name_address(void *p);
145
146#define BPF_BUFSIZE 4096
147
148#define PRINET 26 /* interruptible */
149
150#define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
151#define ESP_HDR_SIZE sizeof(struct newesp)
152
153#define BPF_WRITE_LEEWAY 18 /* space for link layer header */
154
155#define BPF_WRITE_MAX 0x1000000 /* 16 MB arbitrary value */
156
157typedef void (*pktcopyfunc_t)(const void *, void *, size_t);
158
159/*
160 * The default read buffer size is patchable.
161 */
162static unsigned int bpf_bufsize = BPF_BUFSIZE;
163SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW | CTLFLAG_LOCKED,
164 &bpf_bufsize, 0, "");
165
166__private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
167static int sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS;
168SYSCTL_PROC(_debug, OID_AUTO, bpf_maxbufsize, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
169 &bpf_maxbufsize, 0,
170 sysctl_bpf_maxbufsize, "I", "Default BPF max buffer size");
171
172extern const int copysize_limit_panic;
173#define BPF_BUFSIZE_CAP (copysize_limit_panic >> 1)
174static int sysctl_bpf_bufsize_cap SYSCTL_HANDLER_ARGS;
175SYSCTL_PROC(_debug, OID_AUTO, bpf_bufsize_cap, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
176 0, 0,
177 sysctl_bpf_bufsize_cap, "I", "Upper limit on BPF max buffer size");
178
179#define BPF_MAX_DEVICES 256
180static unsigned int bpf_maxdevices = BPF_MAX_DEVICES;
181SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RD | CTLFLAG_LOCKED,
182 &bpf_maxdevices, 0, "");
183
184/*
185 * bpf_wantpktap controls the defaul visibility of DLT_PKTAP
186 * For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
187 * explicitly to be able to use DLT_PKTAP.
188 */
189#if !XNU_TARGET_OS_OSX
190static unsigned int bpf_wantpktap = 1;
191#else /* XNU_TARGET_OS_OSX */
192static unsigned int bpf_wantpktap = 0;
193#endif /* XNU_TARGET_OS_OSX */
194SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW | CTLFLAG_LOCKED,
195 &bpf_wantpktap, 0, "");
196
197static int bpf_debug = 0;
198SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
199 &bpf_debug, 0, "");
200
201static unsigned long bpf_trunc_overflow = 0;
202SYSCTL_ULONG(_debug, OID_AUTO, bpf_trunc_overflow, CTLFLAG_RD | CTLFLAG_LOCKED,
203 &bpf_trunc_overflow, "");
204
205static int bpf_hdr_comp_enable = 1;
206SYSCTL_INT(_debug, OID_AUTO, bpf_hdr_comp_enable, CTLFLAG_RW | CTLFLAG_LOCKED,
207 &bpf_hdr_comp_enable, 1, "");
208
209static int sysctl_bpf_stats SYSCTL_HANDLER_ARGS;
210SYSCTL_PROC(_debug, OID_AUTO, bpf_stats, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
211 0, 0,
212 sysctl_bpf_stats, "S", "BPF statistics");
213
214/*
215 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
216 * bpf_dtab holds pointer to the descriptors, indexed by minor device #
217 */
218static struct bpf_if *bpf_iflist;
219/*
220 * BSD now stores the bpf_d in the dev_t which is a struct
221 * on their system. Our dev_t is an int, so we still store
222 * the bpf_d in a separate table indexed by minor device #.
223 *
224 * The value stored in bpf_dtab[n] represent three states:
225 * NULL: device not opened
226 * BPF_DEV_RESERVED: device opening or closing
227 * other: device <n> opened with pointer to storage
228 */
229#define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
230static struct bpf_d **bpf_dtab = NULL;
231static unsigned int bpf_dtab_size = 0;
232static unsigned int nbpfilter = 0;
233static unsigned bpf_bpfd_cnt = 0;
234
235static LCK_GRP_DECLARE(bpf_mlock_grp, "bpf");
236static LCK_MTX_DECLARE(bpf_mlock_data, &bpf_mlock_grp);
237static lck_mtx_t *const bpf_mlock = &bpf_mlock_data;
238
239static int bpf_allocbufs(struct bpf_d *);
240static errno_t bpf_attachd(struct bpf_d *d, struct bpf_if *bp);
241static int bpf_detachd(struct bpf_d *d);
242static void bpf_freed(struct bpf_d *);
243static int bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool, bool);
244static void bpf_timed_out(void *, void *);
245static void bpf_wakeup(struct bpf_d *);
246static uint32_t get_pkt_trunc_len(struct bpf_packet *);
247static void catchpacket(struct bpf_d *, struct bpf_packet *, u_int, int);
248static void reset_d(struct bpf_d *);
249static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
250static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *);
251static int bpf_setdlt(struct bpf_d *, u_int);
252static int bpf_set_traffic_class(struct bpf_d *, int);
253static void bpf_set_packet_service_class(struct mbuf *, int);
254
255static void bpf_acquire_d(struct bpf_d *);
256static void bpf_release_d(struct bpf_d *);
257
258static int bpf_devsw_installed;
259
260void bpf_init(void *unused);
261static int bpf_tap_callback(struct ifnet *ifp, struct mbuf *m);
262
263/*
264 * Darwin differs from BSD here, the following are static
265 * on BSD and not static on Darwin.
266 */
267d_open_t bpfopen;
268d_close_t bpfclose;
269d_read_t bpfread;
270d_write_t bpfwrite;
271ioctl_fcn_t bpfioctl;
272select_fcn_t bpfselect;
273
274/* Darwin's cdevsw struct differs slightly from BSDs */
275#define CDEV_MAJOR 23
276static const struct cdevsw bpf_cdevsw = {
277 .d_open = bpfopen,
278 .d_close = bpfclose,
279 .d_read = bpfread,
280 .d_write = bpfwrite,
281 .d_ioctl = bpfioctl,
282 .d_stop = eno_stop,
283 .d_reset = eno_reset,
284 .d_ttys = NULL,
285 .d_select = bpfselect,
286 .d_mmap = eno_mmap,
287 .d_strategy = eno_strat,
288 .d_reserved_1 = eno_getc,
289 .d_reserved_2 = eno_putc,
290 .d_type = 0
291};
292
293#define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
294
295static int
296bpf_copy_uio_to_mbuf_packet(struct uio *auio, int bytes_to_copy, struct mbuf *top)
297{
298 int error = 0;
299
300 for (struct mbuf *m = top; m != NULL; m = m->m_next) {
301 int mlen;
302
303 if (m->m_flags & M_EXT) {
304 mlen = m->m_ext.ext_size - (int)M_LEADINGSPACE(m);
305 } else if (m->m_flags & M_PKTHDR) {
306 mlen = MHLEN - (int)M_LEADINGSPACE(m);
307 } else {
308 mlen = MLEN - (int)M_LEADINGSPACE(m);
309 }
310 int copy_len = imin(a: (int)mlen, b: bytes_to_copy);
311
312 error = uiomove(mtod(m, caddr_t), n: (int)copy_len, uio: auio);
313 if (error != 0) {
314 os_log(OS_LOG_DEFAULT, "bpf_copy_uio_to_mbuf_packet: len %d error %d",
315 copy_len, error);
316 goto done;
317 }
318 m->m_len = copy_len;
319 top->m_pkthdr.len += copy_len;
320
321 if (bytes_to_copy > copy_len) {
322 bytes_to_copy -= copy_len;
323 } else {
324 break;
325 }
326 }
327done:
328 return error;
329}
330
331static inline void
332bpf_set_bcast_mcast(mbuf_t m, struct ether_header * eh)
333{
334 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
335 if (_ether_cmp(a: etherbroadcastaddr, b: eh->ether_dhost) == 0) {
336 m->m_flags |= M_BCAST;
337 } else {
338 m->m_flags |= M_MCAST;
339 }
340 }
341}
342
343#if DEBUG | DEVELOPMENT
344static void
345bpf_log_bcast(const char * func, const char * ifname, uint16_t flags,
346 bool hdrcmplt)
347{
348 const char * type;
349
350 if ((flags & M_BCAST) != 0) {
351 type = "broadcast";
352 } else if ((flags & M_MCAST) != 0) {
353 type = "multicast";
354 } else {
355 type = "unicast";
356 }
357 os_log(OS_LOG_DEFAULT, "%s %s %s hdrcmplt=%s", func, ifname, type,
358 hdrcmplt ? "true" : "false");
359}
360#endif /* DEBUG | DEVELOPMENT */
361
362static int
363bpf_movein(struct uio *uio, int copy_len, struct bpf_d *d, struct mbuf **mp,
364 struct sockaddr *sockp)
365{
366 struct mbuf *m = NULL;
367 int error;
368 int len;
369 uint8_t sa_family;
370 int hlen = 0;
371 struct ifnet *ifp = d->bd_bif->bif_ifp;
372 int linktype = (int)d->bd_bif->bif_dlt;
373
374 switch (linktype) {
375#if SLIP
376 case DLT_SLIP:
377 sa_family = AF_INET;
378 hlen = 0;
379 break;
380#endif /* SLIP */
381
382 case DLT_EN10MB:
383 sa_family = AF_UNSPEC;
384 /* XXX Would MAXLINKHDR be better? */
385 hlen = sizeof(struct ether_header);
386 break;
387
388#if FDDI
389 case DLT_FDDI:
390#if defined(__FreeBSD__) || defined(__bsdi__)
391 sa_family = AF_IMPLINK;
392 hlen = 0;
393#else
394 sa_family = AF_UNSPEC;
395 /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
396 hlen = 24;
397#endif
398 break;
399#endif /* FDDI */
400
401 case DLT_RAW:
402 case DLT_NULL:
403 sa_family = AF_UNSPEC;
404 hlen = 0;
405 break;
406
407#ifdef __FreeBSD__
408 case DLT_ATM_RFC1483:
409 /*
410 * en atm driver requires 4-byte atm pseudo header.
411 * though it isn't standard, vpi:vci needs to be
412 * specified anyway.
413 */
414 sa_family = AF_UNSPEC;
415 hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
416 break;
417#endif
418
419 case DLT_PPP:
420 sa_family = AF_UNSPEC;
421 hlen = 4; /* This should match PPP_HDRLEN */
422 break;
423
424 case DLT_APPLE_IP_OVER_IEEE1394:
425 sa_family = AF_UNSPEC;
426 hlen = sizeof(struct firewire_header);
427 break;
428
429 case DLT_IEEE802_11: /* IEEE 802.11 wireless */
430 sa_family = AF_IEEE80211;
431 hlen = 0;
432 break;
433
434 case DLT_IEEE802_11_RADIO:
435 sa_family = AF_IEEE80211;
436 hlen = 0;
437 break;
438
439 default:
440 return EIO;
441 }
442
443 if (sockp) {
444 /*
445 * Build a sockaddr based on the data link layer type.
446 * We do this at this level because the ethernet header
447 * is copied directly into the data field of the sockaddr.
448 * In the case of SLIP, there is no header and the packet
449 * is forwarded as is.
450 * Also, we are careful to leave room at the front of the mbuf
451 * for the link level header.
452 */
453 if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
454 return EIO;
455 }
456 sockp->sa_family = sa_family;
457 } else {
458 /*
459 * We're directly sending the packet data supplied by
460 * the user; we don't need to make room for the link
461 * header, and don't need the header length value any
462 * more, so set it to 0.
463 */
464 hlen = 0;
465 }
466
467 len = (int)uio_resid(a_uio: uio);
468 if (len < copy_len) {
469 os_log(OS_LOG_DEFAULT, "bpfwrite: len %d if %s less than copy_len %d",
470 (unsigned)len, ifp->if_xname, copy_len);
471 return EMSGSIZE;
472 }
473 len = copy_len;
474 if (len < hlen || (unsigned)len > BPF_WRITE_MAX) {
475 os_log(OS_LOG_DEFAULT, "bpfwrite: bad len %d if %s",
476 (unsigned)len, ifp->if_xname);
477 return EMSGSIZE;
478 }
479 if (d->bd_write_size_max != 0) {
480 if ((len - hlen) > (d->bd_write_size_max + BPF_WRITE_LEEWAY)) {
481 os_log(OS_LOG_DEFAULT, "bpfwrite: len %u - hlen %u too big if %s write_size_max %u",
482 (unsigned)len, (unsigned)hlen, ifp->if_xname, d->bd_write_size_max);
483 }
484 } else if ((len - hlen) > (ifp->if_mtu + BPF_WRITE_LEEWAY)) {
485 os_log(OS_LOG_DEFAULT, "bpfwrite: len %u - hlen %u too big if %s mtu %u",
486 (unsigned)len, (unsigned)hlen, ifp->if_xname, ifp->if_mtu);
487 return EMSGSIZE;
488 }
489
490 /* drop lock while allocating mbuf and copying data */
491 lck_mtx_unlock(lck: bpf_mlock);
492
493 error = mbuf_allocpacket(how: MBUF_WAITOK, packetlen: len, NULL, mbuf: &m);
494 if (error != 0) {
495 os_log(OS_LOG_DEFAULT,
496 "bpfwrite mbuf_allocpacket len %d error %d", len, error);
497 goto bad;
498 }
499 /*
500 * Make room for link header -- the packet length is 0 at this stage
501 */
502 if (hlen != 0) {
503 m->m_data += hlen; /* leading space */
504 error = uiomove(cp: (caddr_t)sockp->sa_data, n: hlen, uio);
505 if (error) {
506 os_log(OS_LOG_DEFAULT,
507 "bpfwrite uiomove hlen %d error %d", hlen, error);
508 goto bad;
509 }
510 len -= hlen;
511 if (linktype == DLT_EN10MB) {
512 struct ether_header * eh;
513
514 eh = (struct ether_header *)(void *)sockp->sa_data;
515 bpf_set_bcast_mcast(m, eh);
516#if DEBUG || DEVELOPMENT
517 if (__improbable(bpf_debug != 0)) {
518 bpf_log_bcast(__func__, ifp->if_xname,
519 m->m_flags, false);
520 }
521#endif /* DEBUG || DEVELOPMENT */
522 }
523 }
524 /*
525 * bpf_copy_uio_to_mbuf_packet() does set the length of each mbuf and adds it to
526 * the total packet length
527 */
528 error = bpf_copy_uio_to_mbuf_packet(auio: uio, bytes_to_copy: len, top: m);
529 if (error != 0) {
530 os_log(OS_LOG_DEFAULT,
531 "bpfwrite bpf_copy_uio_to_mbuf_packet error %d", error);
532 goto bad;
533 }
534
535 /* Check for multicast destination */
536 if (hlen == 0 && linktype == DLT_EN10MB) {
537 struct ether_header *eh;
538
539 eh = mtod(m, struct ether_header *);
540 bpf_set_bcast_mcast(m, eh);
541#if DEBUG || DEVELOPMENT
542 if (__improbable(bpf_debug != 0)) {
543 bpf_log_bcast(__func__, ifp->if_xname,
544 m->m_flags, true);
545 }
546#endif /* DEBUG || DEVELOPMENT */
547 }
548 *mp = m;
549
550 lck_mtx_lock(lck: bpf_mlock);
551 return 0;
552bad:
553 if (m != NULL) {
554 m_freem(m);
555 }
556 lck_mtx_lock(lck: bpf_mlock);
557 return error;
558}
559
560static int
561bpf_movein_batch(struct uio *uio, struct bpf_d *d, struct mbuf **mp,
562 struct sockaddr *sockp)
563{
564 int error = 0;
565 user_ssize_t resid;
566 int count = 0;
567 struct mbuf *last = NULL;
568
569 *mp = NULL;
570 while ((resid = uio_resid(a_uio: uio)) >= sizeof(struct bpf_hdr)) {
571 struct bpf_hdr bpfhdr = {};
572 int bpf_hdr_min_len = offsetof(struct bpf_hdr, bh_hdrlen) + sizeof(bpfhdr.bh_hdrlen);
573 int padding_len;
574
575 error = uiomove(cp: (caddr_t)&bpfhdr, n: bpf_hdr_min_len, uio);
576 if (error != 0) {
577 os_log(OS_LOG_DEFAULT, "bpf_movein_batch uiomove error %d", error);
578 break;
579 }
580 /*
581 * Buffer validation:
582 * - ignore bh_tstamp
583 * - bh_hdrlen must fit
584 * - bh_caplen and bh_datalen must be equal
585 */
586 if (bpfhdr.bh_hdrlen < bpf_hdr_min_len) {
587 error = EINVAL;
588 os_log(OS_LOG_DEFAULT, "bpf_movein_batch bh_hdrlen %u too small",
589 bpfhdr.bh_hdrlen);
590 break;
591 }
592 if (bpfhdr.bh_caplen != bpfhdr.bh_datalen) {
593 error = EINVAL;
594 os_log(OS_LOG_DEFAULT, "bpf_movein_batch bh_caplen %u != bh_datalen %u",
595 bpfhdr.bh_caplen, bpfhdr.bh_datalen);
596 break;
597 }
598 if (bpfhdr.bh_hdrlen > resid) {
599 error = EINVAL;
600 os_log(OS_LOG_DEFAULT, "bpf_movein_batch bh_hdrlen %u too large",
601 bpfhdr.bh_hdrlen);
602 break;
603 }
604
605 /*
606 * Ignore additional bytes in the header
607 */
608 padding_len = bpfhdr.bh_hdrlen - bpf_hdr_min_len;
609 if (padding_len > 0) {
610 uio_update(a_uio: uio, a_count: padding_len);
611 }
612
613 /* skip empty packets */
614 if (bpfhdr.bh_caplen > 0) {
615 struct mbuf *m;
616
617 /*
618 * For time being assume all packets have same destination
619 */
620 error = bpf_movein(uio, copy_len: bpfhdr.bh_caplen, d, mp: &m, sockp);
621 if (error != 0) {
622 os_log(OS_LOG_DEFAULT, "bpf_movein_batch bpf_movein error %d",
623 error);
624 break;
625 }
626 count += 1;
627
628 if (last == NULL) {
629 *mp = m;
630 } else {
631 last->m_nextpkt = m;
632 }
633 last = m;
634 }
635
636 /*
637 * Each BPF packet is padded for alignment
638 */
639 padding_len = BPF_WORDALIGN(bpfhdr.bh_hdrlen + bpfhdr.bh_caplen) - (bpfhdr.bh_hdrlen + bpfhdr.bh_caplen);
640 if (padding_len > 0) {
641 uio_update(a_uio: uio, a_count: padding_len);
642 }
643 }
644
645 if (error != 0) {
646 if (*mp != NULL) {
647 m_freem_list(*mp);
648 *mp = NULL;
649 }
650 }
651 return error;
652}
653
654/*
655 * The dynamic addition of a new device node must block all processes that
656 * are opening the last device so that no process will get an unexpected
657 * ENOENT
658 */
659static void
660bpf_make_dev_t(int maj)
661{
662 static int bpf_growing = 0;
663 unsigned int cur_size = nbpfilter, i;
664
665 if (nbpfilter >= BPF_MAX_DEVICES) {
666 return;
667 }
668
669 while (bpf_growing) {
670 /* Wait until new device has been created */
671 (void) tsleep(chan: (caddr_t)&bpf_growing, PZERO, wmesg: "bpf_growing", timo: 0);
672 }
673 if (nbpfilter > cur_size) {
674 /* other thread grew it already */
675 return;
676 }
677 bpf_growing = 1;
678
679 /* need to grow bpf_dtab first */
680 if (nbpfilter == bpf_dtab_size) {
681 unsigned int new_dtab_size;
682 struct bpf_d **new_dtab = NULL;
683
684 new_dtab_size = bpf_dtab_size + NBPFILTER;
685 new_dtab = krealloc_type(struct bpf_d *,
686 bpf_dtab_size, new_dtab_size, bpf_dtab, Z_WAITOK | Z_ZERO);
687 if (new_dtab == 0) {
688 os_log_error(OS_LOG_DEFAULT, "bpf_make_dev_t: malloc bpf_dtab failed");
689 goto done;
690 }
691 bpf_dtab = new_dtab;
692 bpf_dtab_size = new_dtab_size;
693 }
694 i = nbpfilter++;
695 (void) devfs_make_node(makedev(maj, i),
696 DEVFS_CHAR, UID_ROOT, GID_WHEEL, perms: 0600,
697 fmt: "bpf%d", i);
698done:
699 bpf_growing = 0;
700 wakeup(chan: (caddr_t)&bpf_growing);
701}
702
703/*
704 * Attach file to the bpf interface, i.e. make d listen on bp.
705 */
706static errno_t
707bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
708{
709 int first = bp->bif_dlist == NULL;
710 int error = 0;
711
712 /*
713 * Point d at bp, and add d to the interface's list of listeners.
714 * Finally, point the driver's bpf cookie at the interface so
715 * it will divert packets to bpf.
716 */
717 d->bd_bif = bp;
718 d->bd_next = bp->bif_dlist;
719 bp->bif_dlist = d;
720 bpf_bpfd_cnt++;
721
722 /*
723 * Take a reference on the device even if an error is returned
724 * because we keep the device in the interface's list of listeners
725 */
726 bpf_acquire_d(d);
727
728 if (first) {
729 /* Find the default bpf entry for this ifp */
730 if (bp->bif_ifp->if_bpf == NULL) {
731 struct bpf_if *tmp, *primary = NULL;
732
733 for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
734 if (tmp->bif_ifp == bp->bif_ifp) {
735 primary = tmp;
736 break;
737 }
738 }
739 bp->bif_ifp->if_bpf = primary;
740 }
741 /* Only call dlil_set_bpf_tap for primary dlt */
742 if (bp->bif_ifp->if_bpf == bp) {
743 dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
744 bpf_tap_callback);
745 }
746
747 if (bp->bif_tap != NULL) {
748 error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
749 BPF_TAP_INPUT_OUTPUT);
750 }
751 }
752
753 /*
754 * Reset the detach flags in case we previously detached an interface
755 */
756 d->bd_flags &= ~(BPF_DETACHING | BPF_DETACHED);
757
758 if (bp->bif_dlt == DLT_PKTAP) {
759 d->bd_flags |= BPF_FINALIZE_PKTAP;
760 } else {
761 d->bd_flags &= ~BPF_FINALIZE_PKTAP;
762 }
763 return error;
764}
765
766/*
767 * Detach a file from its interface.
768 *
769 * Return 1 if was closed by some thread, 0 otherwise
770 */
771static int
772bpf_detachd(struct bpf_d *d)
773{
774 struct bpf_d **p;
775 struct bpf_if *bp;
776 struct ifnet *ifp;
777 uint32_t dlt;
778 bpf_tap_func disable_tap;
779 uint8_t bd_promisc;
780
781 int bpf_closed = d->bd_flags & BPF_CLOSING;
782 /*
783 * Some other thread already detached
784 */
785 if ((d->bd_flags & (BPF_DETACHED | BPF_DETACHING)) != 0) {
786 goto done;
787 }
788 /*
789 * This thread is doing the detach
790 */
791 d->bd_flags |= BPF_DETACHING;
792
793 ifp = d->bd_bif->bif_ifp;
794 bp = d->bd_bif;
795
796 /* Remove d from the interface's descriptor list. */
797 p = &bp->bif_dlist;
798 while (*p != d) {
799 p = &(*p)->bd_next;
800 if (*p == 0) {
801 panic("bpf_detachd: descriptor not in list");
802 }
803 }
804 *p = (*p)->bd_next;
805 bpf_bpfd_cnt--;
806 disable_tap = NULL;
807 if (bp->bif_dlist == 0) {
808 /*
809 * Let the driver know that there are no more listeners.
810 */
811 /* Only call dlil_set_bpf_tap for primary dlt */
812 if (bp->bif_ifp->if_bpf == bp) {
813 dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
814 }
815
816 disable_tap = bp->bif_tap;
817 if (disable_tap) {
818 dlt = bp->bif_dlt;
819 }
820
821 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
822 if (bp->bif_ifp == ifp && bp->bif_dlist != 0) {
823 break;
824 }
825 }
826 if (bp == NULL) {
827 ifp->if_bpf = NULL;
828 }
829 }
830 d->bd_bif = NULL;
831 /*
832 * Check if this descriptor had requested promiscuous mode.
833 * If so, turn it off.
834 */
835 bd_promisc = d->bd_promisc;
836 d->bd_promisc = 0;
837
838 lck_mtx_unlock(lck: bpf_mlock);
839 if (bd_promisc) {
840 if (ifnet_set_promiscuous(interface: ifp, on: 0)) {
841 /*
842 * Something is really wrong if we were able to put
843 * the driver into promiscuous mode, but can't
844 * take it out.
845 * Most likely the network interface is gone.
846 */
847 os_log_error(OS_LOG_DEFAULT,
848 "%s: bpf%d ifnet_set_promiscuous %s failed",
849 __func__, d->bd_dev_minor, if_name(ifp));
850 }
851 }
852
853 if (disable_tap) {
854 disable_tap(ifp, dlt, BPF_TAP_DISABLE);
855 }
856 lck_mtx_lock(lck: bpf_mlock);
857
858 /*
859 * Wake up other thread that are waiting for this thread to finish
860 * detaching
861 */
862 d->bd_flags &= ~BPF_DETACHING;
863 d->bd_flags |= BPF_DETACHED;
864
865 /* Refresh the local variable as d could have been modified */
866 bpf_closed = d->bd_flags & BPF_CLOSING;
867
868 os_log(OS_LOG_DEFAULT, "bpf%d%s detached from %s fcount %llu dcount %llu",
869 d->bd_dev_minor, bpf_closed ? " closed and" : "", if_name(ifp),
870 d->bd_fcount, d->bd_dcount);
871
872 /*
873 * Note that We've kept the reference because we may have dropped
874 * the lock when turning off promiscuous mode
875 */
876 bpf_release_d(d);
877done:
878 /*
879 * Let the caller know the bpf_d is closed
880 */
881 if (bpf_closed) {
882 return 1;
883 } else {
884 return 0;
885 }
886}
887
888/*
889 * Start asynchronous timer, if necessary.
890 * Must be called with bpf_mlock held.
891 */
892static void
893bpf_start_timer(struct bpf_d *d)
894{
895 uint64_t deadline;
896 struct timeval tv;
897
898 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
899 tv.tv_sec = d->bd_rtout / hz;
900 tv.tv_usec = (d->bd_rtout % hz) * tick;
901
902 clock_interval_to_deadline(
903 interval: (uint32_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
904 NSEC_PER_USEC, result: &deadline);
905 /*
906 * The state is BPF_IDLE, so the timer hasn't
907 * been started yet, and hasn't gone off yet;
908 * there is no thread call scheduled, so this
909 * won't change the schedule.
910 *
911 * XXX - what if, by the time it gets entered,
912 * the deadline has already passed?
913 */
914 thread_call_enter_delayed(call: d->bd_thread_call, deadline);
915 d->bd_state = BPF_WAITING;
916 }
917}
918
919/*
920 * Cancel asynchronous timer.
921 * Must be called with bpf_mlock held.
922 */
923static boolean_t
924bpf_stop_timer(struct bpf_d *d)
925{
926 /*
927 * If the timer has already gone off, this does nothing.
928 * Our caller is expected to set d->bd_state to BPF_IDLE,
929 * with the bpf_mlock, after we are called. bpf_timed_out()
930 * also grabs bpf_mlock, so, if the timer has gone off and
931 * bpf_timed_out() hasn't finished, it's waiting for the
932 * lock; when this thread releases the lock, it will
933 * find the state is BPF_IDLE, and just release the
934 * lock and return.
935 */
936 return thread_call_cancel(call: d->bd_thread_call);
937}
938
939void
940bpf_acquire_d(struct bpf_d *d)
941{
942 void *lr_saved = __builtin_return_address(0);
943
944 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
945
946 d->bd_refcnt += 1;
947
948 d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
949 d->bd_next_ref_lr = (d->bd_next_ref_lr + 1) % BPF_REF_HIST;
950}
951
952void
953bpf_release_d(struct bpf_d *d)
954{
955 void *lr_saved = __builtin_return_address(0);
956
957 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
958
959 if (d->bd_refcnt <= 0) {
960 panic("%s: %p refcnt <= 0", __func__, d);
961 }
962
963 d->bd_refcnt -= 1;
964
965 d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
966 d->bd_next_unref_lr = (d->bd_next_unref_lr + 1) % BPF_REF_HIST;
967
968 if (d->bd_refcnt == 0) {
969 /* Assert the device is detached */
970 if ((d->bd_flags & BPF_DETACHED) == 0) {
971 panic("%s: %p BPF_DETACHED not set", __func__, d);
972 }
973
974 kfree_type(struct bpf_d, d);
975 }
976}
977
978/*
979 * Open ethernet device. Returns ENXIO for illegal minor device number,
980 * EBUSY if file is open by another process.
981 */
982/* ARGSUSED */
983int
984bpfopen(dev_t dev, int flags, __unused int fmt,
985 struct proc *p)
986{
987 struct bpf_d *d;
988
989 lck_mtx_lock(lck: bpf_mlock);
990 if ((unsigned int) minor(dev) >= nbpfilter) {
991 lck_mtx_unlock(lck: bpf_mlock);
992 return ENXIO;
993 }
994 /*
995 * New device nodes are created on demand when opening the last one.
996 * The programming model is for processes to loop on the minor starting
997 * at 0 as long as EBUSY is returned. The loop stops when either the
998 * open succeeds or an error other that EBUSY is returned. That means
999 * that bpf_make_dev_t() must block all processes that are opening the
1000 * last node. If not all processes are blocked, they could unexpectedly
1001 * get ENOENT and abort their opening loop.
1002 */
1003 if ((unsigned int) minor(dev) == (nbpfilter - 1)) {
1004 bpf_make_dev_t(major(dev));
1005 }
1006
1007 /*
1008 * Each minor can be opened by only one process. If the requested
1009 * minor is in use, return EBUSY.
1010 *
1011 * Important: bpfopen() and bpfclose() have to check and set the status
1012 * of a device in the same lockin context otherwise the device may be
1013 * leaked because the vnode use count will be unpextectly greater than 1
1014 * when close() is called.
1015 */
1016 if (bpf_dtab[minor(dev)] == NULL) {
1017 /* Reserve while opening */
1018 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
1019 } else {
1020 lck_mtx_unlock(lck: bpf_mlock);
1021 return EBUSY;
1022 }
1023 d = kalloc_type(struct bpf_d, Z_WAITOK | Z_ZERO);
1024 if (d == NULL) {
1025 /* this really is a catastrophic failure */
1026 os_log_error(OS_LOG_DEFAULT,
1027 "bpfopen: bpf%d kalloc_type bpf_d failed", minor(dev));
1028 bpf_dtab[minor(dev)] = NULL;
1029 lck_mtx_unlock(lck: bpf_mlock);
1030 return ENOMEM;
1031 }
1032
1033 /* Mark "in use" and do most initialization. */
1034 bpf_acquire_d(d);
1035 d->bd_bufsize = bpf_bufsize;
1036 d->bd_sig = SIGIO;
1037 d->bd_direction = BPF_D_INOUT;
1038 d->bd_oflags = flags;
1039 d->bd_state = BPF_IDLE;
1040 d->bd_traffic_class = SO_TC_BE;
1041 d->bd_flags |= BPF_DETACHED;
1042 if (bpf_wantpktap) {
1043 d->bd_flags |= BPF_WANT_PKTAP;
1044 } else {
1045 d->bd_flags &= ~BPF_WANT_PKTAP;
1046 }
1047
1048 d->bd_thread_call = thread_call_allocate(func: bpf_timed_out, param0: d);
1049 if (d->bd_thread_call == NULL) {
1050 os_log_error(OS_LOG_DEFAULT, "bpfopen: bpf%d malloc thread call failed",
1051 minor(dev));
1052 bpf_dtab[minor(dev)] = NULL;
1053 bpf_release_d(d);
1054 lck_mtx_unlock(lck: bpf_mlock);
1055
1056 return ENOMEM;
1057 }
1058 d->bd_opened_by = p;
1059 uuid_generate(out: d->bd_uuid);
1060 d->bd_pid = proc_pid(p);
1061
1062 d->bd_dev_minor = minor(dev);
1063 bpf_dtab[minor(dev)] = d; /* Mark opened */
1064 lck_mtx_unlock(lck: bpf_mlock);
1065
1066 if (bpf_debug) {
1067 os_log(OS_LOG_DEFAULT, "bpf%u opened by %s.%u",
1068 d->bd_dev_minor, proc_name_address(p), d->bd_pid);
1069 }
1070 return 0;
1071}
1072
1073/*
1074 * Close the descriptor by detaching it from its interface,
1075 * deallocating its buffers, and marking it free.
1076 */
1077/* ARGSUSED */
1078int
1079bpfclose(dev_t dev, __unused int flags, __unused int fmt,
1080 __unused struct proc *p)
1081{
1082 struct bpf_d *d;
1083
1084 /* Take BPF lock to ensure no other thread is using the device */
1085 lck_mtx_lock(lck: bpf_mlock);
1086
1087 d = bpf_dtab[minor(dev)];
1088 if (d == NULL || d == BPF_DEV_RESERVED) {
1089 lck_mtx_unlock(lck: bpf_mlock);
1090 return ENXIO;
1091 }
1092
1093 /*
1094 * Other threads may call bpd_detachd() if we drop the bpf_mlock
1095 */
1096 d->bd_flags |= BPF_CLOSING;
1097
1098 if (bpf_debug != 0) {
1099 os_log(OS_LOG_DEFAULT, "%s: bpf%d",
1100 __func__, d->bd_dev_minor);
1101 }
1102
1103 bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; /* Reserve while closing */
1104
1105 /*
1106 * Deal with any in-progress timeouts.
1107 */
1108 switch (d->bd_state) {
1109 case BPF_IDLE:
1110 /*
1111 * Not waiting for a timeout, and no timeout happened.
1112 */
1113 break;
1114
1115 case BPF_WAITING:
1116 /*
1117 * Waiting for a timeout.
1118 * Cancel any timer that has yet to go off,
1119 * and mark the state as "closing".
1120 * Then drop the lock to allow any timers that
1121 * *have* gone off to run to completion, and wait
1122 * for them to finish.
1123 */
1124 if (!bpf_stop_timer(d)) {
1125 /*
1126 * There was no pending call, so the call must
1127 * have been in progress. Wait for the call to
1128 * complete; we have to drop the lock while
1129 * waiting. to let the in-progrss call complete
1130 */
1131 d->bd_state = BPF_DRAINING;
1132 while (d->bd_state == BPF_DRAINING) {
1133 msleep(chan: (caddr_t)d, mtx: bpf_mlock, PRINET,
1134 wmesg: "bpfdraining", NULL);
1135 }
1136 }
1137 d->bd_state = BPF_IDLE;
1138 break;
1139
1140 case BPF_TIMED_OUT:
1141 /*
1142 * Timer went off, and the timeout routine finished.
1143 */
1144 d->bd_state = BPF_IDLE;
1145 break;
1146
1147 case BPF_DRAINING:
1148 /*
1149 * Another thread is blocked on a close waiting for
1150 * a timeout to finish.
1151 * This "shouldn't happen", as the first thread to enter
1152 * bpfclose() will set bpf_dtab[minor(dev)] to 1, and
1153 * all subsequent threads should see that and fail with
1154 * ENXIO.
1155 */
1156 panic("Two threads blocked in a BPF close");
1157 break;
1158 }
1159
1160 if (d->bd_bif) {
1161 bpf_detachd(d);
1162 }
1163 selthreadclear(&d->bd_sel);
1164 thread_call_free(call: d->bd_thread_call);
1165
1166 while (d->bd_hbuf_read || d->bd_hbuf_write) {
1167 msleep(chan: (caddr_t)d, mtx: bpf_mlock, PRINET, wmesg: "bpfclose", NULL);
1168 }
1169
1170 if (bpf_debug) {
1171 os_log(OS_LOG_DEFAULT,
1172 "bpf%u closed by %s.%u dcount %llu fcount %llu ccount %llu",
1173 d->bd_dev_minor, proc_name_address(p), d->bd_pid,
1174 d->bd_dcount, d->bd_fcount, d->bd_bcs.bcs_count_compressed_prefix);
1175 }
1176
1177 bpf_freed(d);
1178
1179 /* Mark free in same context as bpfopen comes to check */
1180 bpf_dtab[minor(dev)] = NULL; /* Mark closed */
1181
1182 bpf_release_d(d);
1183
1184 lck_mtx_unlock(lck: bpf_mlock);
1185
1186 return 0;
1187}
1188
1189#define BPF_SLEEP bpf_sleep
1190
1191static int
1192bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo)
1193{
1194 u_int64_t abstime = 0;
1195
1196 if (timo != 0) {
1197 clock_interval_to_deadline(interval: timo, NSEC_PER_SEC / hz, result: &abstime);
1198 }
1199
1200 return msleep1(chan: (caddr_t)d, mtx: bpf_mlock, pri, wmesg, timo: abstime);
1201}
1202
1203static void
1204bpf_finalize_pktap(struct bpf_hdr *hp, struct pktap_header *pktaphdr)
1205{
1206 if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
1207 struct pktap_v2_hdr *pktap_v2_hdr;
1208
1209 pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
1210
1211 if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
1212 pktap_v2_finalize_proc_info(pktap_v2_hdr);
1213 }
1214 } else {
1215 if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP) {
1216 pktap_finalize_proc_info(pktaphdr);
1217 }
1218
1219 if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
1220 hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
1221 hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
1222 }
1223 }
1224}
1225
1226/*
1227 * Rotate the packet buffers in descriptor d. Move the store buffer
1228 * into the hold slot, and the free buffer into the store slot.
1229 * Zero the length of the new store buffer.
1230 *
1231 * Note: in head drop mode, the hold buffer can be dropped so the fist packet of the
1232 * store buffer cannot be compressed as it otherwise would refer to deleted data
1233 * in a dropped hold buffer that the reader process does know about
1234 */
1235#define ROTATE_BUFFERS(d) do { \
1236 if (d->bd_hbuf_read) \
1237 panic("rotating bpf buffers during read"); \
1238 (d)->bd_hbuf = (d)->bd_sbuf; \
1239 (d)->bd_hlen = (d)->bd_slen; \
1240 (d)->bd_hcnt = (d)->bd_scnt; \
1241 (d)->bd_sbuf = (d)->bd_fbuf; \
1242 (d)->bd_slen = 0; \
1243 (d)->bd_scnt = 0; \
1244 (d)->bd_fbuf = NULL; \
1245 if ((d)->bd_headdrop != 0) \
1246 (d)->bd_prev_slen = 0; \
1247} while(false)
1248
1249/*
1250 * bpfread - read next chunk of packets from buffers
1251 */
1252int
1253bpfread(dev_t dev, struct uio *uio, int ioflag)
1254{
1255 struct bpf_d *d;
1256 caddr_t hbuf;
1257 int timed_out, hbuf_len;
1258 int error;
1259 int flags;
1260
1261 lck_mtx_lock(lck: bpf_mlock);
1262
1263 d = bpf_dtab[minor(dev)];
1264 if (d == NULL || d == BPF_DEV_RESERVED ||
1265 (d->bd_flags & BPF_CLOSING) != 0) {
1266 lck_mtx_unlock(lck: bpf_mlock);
1267 return ENXIO;
1268 }
1269
1270 bpf_acquire_d(d);
1271
1272 /*
1273 * Restrict application to use a buffer the same size as
1274 * as kernel buffers.
1275 */
1276 if (uio_resid(a_uio: uio) != d->bd_bufsize) {
1277 bpf_release_d(d);
1278 lck_mtx_unlock(lck: bpf_mlock);
1279 return EINVAL;
1280 }
1281
1282 if (d->bd_state == BPF_WAITING) {
1283 bpf_stop_timer(d);
1284 }
1285
1286 timed_out = (d->bd_state == BPF_TIMED_OUT);
1287 d->bd_state = BPF_IDLE;
1288
1289 while (d->bd_hbuf_read) {
1290 msleep(chan: (caddr_t)d, mtx: bpf_mlock, PRINET, wmesg: "bpfread", NULL);
1291 }
1292
1293 if ((d->bd_flags & BPF_CLOSING) != 0) {
1294 bpf_release_d(d);
1295 lck_mtx_unlock(lck: bpf_mlock);
1296 return ENXIO;
1297 }
1298 /*
1299 * If the hold buffer is empty, then do a timed sleep, which
1300 * ends when the timeout expires or when enough packets
1301 * have arrived to fill the store buffer.
1302 */
1303 while (d->bd_hbuf == 0) {
1304 if ((d->bd_immediate || timed_out || (ioflag & IO_NDELAY)) &&
1305 d->bd_slen != 0) {
1306 /*
1307 * We're in immediate mode, or are reading
1308 * in non-blocking mode, or a timer was
1309 * started before the read (e.g., by select()
1310 * or poll()) and has expired and a packet(s)
1311 * either arrived since the previous
1312 * read or arrived while we were asleep.
1313 * Rotate the buffers and return what's here.
1314 */
1315 ROTATE_BUFFERS(d);
1316 break;
1317 }
1318
1319 /*
1320 * No data is available, check to see if the bpf device
1321 * is still pointed at a real interface. If not, return
1322 * ENXIO so that the userland process knows to rebind
1323 * it before using it again.
1324 */
1325 if (d->bd_bif == NULL) {
1326 bpf_release_d(d);
1327 lck_mtx_unlock(lck: bpf_mlock);
1328 return ENXIO;
1329 }
1330 if (ioflag & IO_NDELAY) {
1331 bpf_release_d(d);
1332 lck_mtx_unlock(lck: bpf_mlock);
1333 return EWOULDBLOCK;
1334 }
1335 error = BPF_SLEEP(d, PRINET | PCATCH, wmesg: "bpf", timo: d->bd_rtout);
1336 /*
1337 * Make sure device is still opened
1338 */
1339 if ((d->bd_flags & BPF_CLOSING) != 0) {
1340 bpf_release_d(d);
1341 lck_mtx_unlock(lck: bpf_mlock);
1342 return ENXIO;
1343 }
1344
1345 while (d->bd_hbuf_read) {
1346 msleep(chan: (caddr_t)d, mtx: bpf_mlock, PRINET, wmesg: "bpf_read",
1347 NULL);
1348 }
1349
1350 if ((d->bd_flags & BPF_CLOSING) != 0) {
1351 bpf_release_d(d);
1352 lck_mtx_unlock(lck: bpf_mlock);
1353 return ENXIO;
1354 }
1355
1356 if (error == EINTR || error == ERESTART) {
1357 if (d->bd_hbuf != NULL) {
1358 /*
1359 * Because we msleep, the hold buffer might
1360 * be filled when we wake up. Avoid rotating
1361 * in this case.
1362 */
1363 break;
1364 }
1365 if (d->bd_slen != 0) {
1366 /*
1367 * Sometimes we may be interrupted often and
1368 * the sleep above will not timeout.
1369 * Regardless, we should rotate the buffers
1370 * if there's any new data pending and
1371 * return it.
1372 */
1373 ROTATE_BUFFERS(d);
1374 break;
1375 }
1376 bpf_release_d(d);
1377 lck_mtx_unlock(lck: bpf_mlock);
1378 if (error == ERESTART) {
1379 os_log(OS_LOG_DEFAULT, "%s: bpf%d ERESTART to EINTR",
1380 __func__, d->bd_dev_minor);
1381 error = EINTR;
1382 }
1383 return error;
1384 }
1385 if (error == EWOULDBLOCK) {
1386 /*
1387 * On a timeout, return what's in the buffer,
1388 * which may be nothing. If there is something
1389 * in the store buffer, we can rotate the buffers.
1390 */
1391 if (d->bd_hbuf) {
1392 /*
1393 * We filled up the buffer in between
1394 * getting the timeout and arriving
1395 * here, so we don't need to rotate.
1396 */
1397 break;
1398 }
1399
1400 if (d->bd_slen == 0) {
1401 bpf_release_d(d);
1402 lck_mtx_unlock(lck: bpf_mlock);
1403 return 0;
1404 }
1405 ROTATE_BUFFERS(d);
1406 break;
1407 }
1408 }
1409 /*
1410 * At this point, we know we have something in the hold slot.
1411 */
1412
1413 /*
1414 * Set the hold buffer read. So we do not
1415 * rotate the buffers until the hold buffer
1416 * read is complete. Also to avoid issues resulting
1417 * from page faults during disk sleep (<rdar://problem/13436396>).
1418 */
1419 d->bd_hbuf_read = true;
1420 hbuf = d->bd_hbuf;
1421 hbuf_len = d->bd_hlen;
1422 flags = d->bd_flags;
1423 d->bd_bcs.bcs_total_read += d->bd_hcnt;
1424 lck_mtx_unlock(lck: bpf_mlock);
1425
1426 /*
1427 * Before we move data to userland, we fill out the extended
1428 * header fields.
1429 */
1430 if (flags & BPF_EXTENDED_HDR) {
1431 char *p;
1432
1433 p = hbuf;
1434 while (p < hbuf + hbuf_len) {
1435 struct bpf_hdr_ext *ehp;
1436 uint32_t flowid;
1437 struct so_procinfo soprocinfo;
1438 int found = 0;
1439
1440 ehp = (struct bpf_hdr_ext *)(void *)p;
1441 if ((flowid = ehp->bh_flowid) != 0) {
1442 if (ehp->bh_flags & BPF_HDR_EXT_FLAGS_TCP) {
1443 ehp->bh_flags &= ~BPF_HDR_EXT_FLAGS_TCP;
1444 found = inp_findinpcb_procinfo(&tcbinfo,
1445 flowid, &soprocinfo);
1446 } else if (ehp->bh_flags == BPF_HDR_EXT_FLAGS_UDP) {
1447 ehp->bh_flags &= ~BPF_HDR_EXT_FLAGS_UDP;
1448 found = inp_findinpcb_procinfo(&udbinfo,
1449 flowid, &soprocinfo);
1450 }
1451 if (found == 1) {
1452 ehp->bh_pid = soprocinfo.spi_pid;
1453 strlcpy(dst: &ehp->bh_comm[0], src: &soprocinfo.spi_proc_name[0], n: sizeof(ehp->bh_comm));
1454 }
1455 ehp->bh_flowid = 0;
1456 }
1457
1458 if ((flags & BPF_FINALIZE_PKTAP) != 0 && ehp->bh_complen == 0) {
1459 struct pktap_header *pktaphdr;
1460
1461 pktaphdr = (struct pktap_header *)(void *)
1462 (p + BPF_WORDALIGN(ehp->bh_hdrlen));
1463
1464 bpf_finalize_pktap(hp: (struct bpf_hdr *) ehp,
1465 pktaphdr);
1466 }
1467 p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1468 }
1469 } else if (flags & BPF_FINALIZE_PKTAP) {
1470 char *p;
1471
1472 p = hbuf;
1473
1474 while (p < hbuf + hbuf_len) {
1475 struct bpf_hdr *hp;
1476 struct pktap_header *pktaphdr;
1477
1478 hp = (struct bpf_hdr *)(void *)p;
1479
1480 /*
1481 * Cannot finalize a compressed pktap header as we may not have
1482 * all the fields present
1483 */
1484 if (d->bd_flags & BPF_COMP_ENABLED) {
1485 struct bpf_comp_hdr *hcp;
1486
1487 hcp = (struct bpf_comp_hdr *)(void *)p;
1488
1489 if (hcp->bh_complen != 0) {
1490 p += BPF_WORDALIGN(hcp->bh_hdrlen + hcp->bh_caplen);
1491 continue;
1492 }
1493 }
1494
1495 pktaphdr = (struct pktap_header *)(void *)
1496 (p + BPF_WORDALIGN(hp->bh_hdrlen));
1497
1498 bpf_finalize_pktap(hp, pktaphdr);
1499
1500 p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1501 }
1502 }
1503
1504 /*
1505 * Move data from hold buffer into user space.
1506 * We know the entire buffer is transferred since
1507 * we checked above that the read buffer is bpf_bufsize bytes.
1508 */
1509 error = uiomove(cp: hbuf, n: hbuf_len, uio);
1510
1511 lck_mtx_lock(lck: bpf_mlock);
1512 /*
1513 * Make sure device is still opened
1514 */
1515 if ((d->bd_flags & BPF_CLOSING) != 0) {
1516 bpf_release_d(d);
1517 lck_mtx_unlock(lck: bpf_mlock);
1518 return ENXIO;
1519 }
1520
1521 d->bd_hbuf_read = false;
1522 d->bd_fbuf = d->bd_hbuf;
1523 d->bd_hbuf = NULL;
1524 d->bd_hlen = 0;
1525 d->bd_hcnt = 0;
1526 wakeup(chan: (caddr_t)d);
1527
1528 bpf_release_d(d);
1529 lck_mtx_unlock(lck: bpf_mlock);
1530 return error;
1531}
1532
1533/*
1534 * If there are processes sleeping on this descriptor, wake them up.
1535 */
1536static void
1537bpf_wakeup(struct bpf_d *d)
1538{
1539 if (d->bd_state == BPF_WAITING) {
1540 bpf_stop_timer(d);
1541 d->bd_state = BPF_IDLE;
1542 }
1543 wakeup(chan: (caddr_t)d);
1544 if (d->bd_async && d->bd_sig && d->bd_sigio) {
1545 pgsigio(pgid: d->bd_sigio, signalnum: d->bd_sig);
1546 }
1547
1548 selwakeup(&d->bd_sel);
1549 if ((d->bd_flags & BPF_KNOTE)) {
1550 KNOTE(&d->bd_sel.si_note, 1);
1551 }
1552}
1553
1554static void
1555bpf_timed_out(void *arg, __unused void *dummy)
1556{
1557 struct bpf_d *d = (struct bpf_d *)arg;
1558
1559 lck_mtx_lock(lck: bpf_mlock);
1560 if (d->bd_state == BPF_WAITING) {
1561 /*
1562 * There's a select or kqueue waiting for this; if there's
1563 * now stuff to read, wake it up.
1564 */
1565 d->bd_state = BPF_TIMED_OUT;
1566 if (d->bd_slen != 0) {
1567 bpf_wakeup(d);
1568 }
1569 } else if (d->bd_state == BPF_DRAINING) {
1570 /*
1571 * A close is waiting for this to finish.
1572 * Mark it as finished, and wake the close up.
1573 */
1574 d->bd_state = BPF_IDLE;
1575 bpf_wakeup(d);
1576 }
1577 lck_mtx_unlock(lck: bpf_mlock);
1578}
1579
1580/* keep in sync with bpf_movein above: */
1581#define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
1582
1583int
1584bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag)
1585{
1586 struct bpf_d *d;
1587 struct ifnet *ifp;
1588 struct mbuf *m = NULL;
1589 int error = 0;
1590 char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1591 int bif_dlt;
1592 int bd_hdrcmplt;
1593 bpf_send_func bif_send;
1594
1595 lck_mtx_lock(lck: bpf_mlock);
1596
1597 while (true) {
1598 d = bpf_dtab[minor(dev)];
1599 if (d == NULL || d == BPF_DEV_RESERVED ||
1600 (d->bd_flags & BPF_CLOSING) != 0) {
1601 lck_mtx_unlock(lck: bpf_mlock);
1602 return ENXIO;
1603 }
1604
1605 if (d->bd_hbuf_write) {
1606 msleep(chan: (caddr_t)d, mtx: bpf_mlock, PRINET, wmesg: "bpfwrite",
1607 NULL);
1608 } else {
1609 break;
1610 }
1611 }
1612 d->bd_hbuf_write = true;
1613
1614 bpf_acquire_d(d);
1615
1616 ++d->bd_wcount;
1617
1618 if (d->bd_bif == NULL) {
1619 error = ENXIO;
1620 goto done;
1621 }
1622
1623 ifp = d->bd_bif->bif_ifp;
1624
1625 if (IFNET_IS_MANAGEMENT(ifp) &&
1626 IOCurrentTaskHasEntitlement(MANAGEMENT_DATA_ENTITLEMENT) == false) {
1627 ++d->bd_wdcount;
1628 bpf_release_d(d);
1629 lck_mtx_unlock(lck: bpf_mlock);
1630 return ENETDOWN;
1631 }
1632
1633 if ((ifp->if_flags & IFF_UP) == 0) {
1634 error = ENETDOWN;
1635 goto done;
1636 }
1637 int resid = (int)uio_resid(a_uio: uio);
1638 if (resid <= 0) {
1639 error = resid == 0 ? 0 : EINVAL;
1640 os_log(OS_LOG_DEFAULT, "bpfwrite: resid %d error %d", resid, error);
1641 goto done;
1642 }
1643 SA(dst_buf)->sa_len = sizeof(dst_buf);
1644
1645 /*
1646 * geting variables onto stack before dropping the lock
1647 */
1648 bif_dlt = (int)d->bd_bif->bif_dlt;
1649 bd_hdrcmplt = d->bd_hdrcmplt;
1650 bool batch_write = (d->bd_flags & BPF_BATCH_WRITE) ? true : false;
1651
1652 if (batch_write) {
1653 error = bpf_movein_batch(uio, d, mp: &m, sockp: bd_hdrcmplt ? NULL : SA(dst_buf));
1654 if (error != 0) {
1655 goto done;
1656 }
1657 } else {
1658 error = bpf_movein(uio, copy_len: resid, d, mp: &m, sockp: bd_hdrcmplt ? NULL : SA(dst_buf));
1659 if (error != 0) {
1660 goto done;
1661 }
1662 bpf_set_packet_service_class(m, d->bd_traffic_class);
1663 }
1664
1665 /* verify the device is still open */
1666 if ((d->bd_flags & BPF_CLOSING) != 0) {
1667 error = ENXIO;
1668 goto done;
1669 }
1670
1671 if (d->bd_bif == NULL || d->bd_bif->bif_ifp != ifp) {
1672 error = ENXIO;
1673 goto done;
1674 }
1675
1676 bif_send = d->bd_bif->bif_send;
1677
1678 lck_mtx_unlock(lck: bpf_mlock);
1679
1680 if (bd_hdrcmplt) {
1681 if (bif_send) {
1682 /*
1683 * Send one packet at a time, the driver frees the mbuf
1684 * but we need to take care of the leftover
1685 */
1686 while (m != NULL && error == 0) {
1687 struct mbuf *next = m->m_nextpkt;
1688
1689 m->m_nextpkt = NULL;
1690 error = bif_send(ifp, bif_dlt, m);
1691 m = next;
1692 }
1693 } else {
1694 error = dlil_output(ifp, 0, m, NULL, NULL, 1, NULL);
1695 /* Make sure we do not double free */
1696 m = NULL;
1697 }
1698 } else {
1699 error = dlil_output(ifp, PF_INET, m, NULL,
1700 SA(dst_buf), 0, NULL);
1701 /* Make sure we do not double free */
1702 m = NULL;
1703 }
1704
1705 lck_mtx_lock(lck: bpf_mlock);
1706done:
1707 if (error != 0 && m != NULL) {
1708 ++d->bd_wdcount;
1709 }
1710 if (m != NULL) {
1711 m_freem_list(m);
1712 }
1713 d->bd_hbuf_write = false;
1714 wakeup(chan: (caddr_t)d);
1715 bpf_release_d(d);
1716 lck_mtx_unlock(lck: bpf_mlock);
1717
1718 return error;
1719}
1720
1721/*
1722 * Reset a descriptor by flushing its packet buffer and clearing the
1723 * receive and drop counts.
1724 */
1725static void
1726reset_d(struct bpf_d *d)
1727{
1728 if (d->bd_hbuf_read) {
1729 panic("resetting buffers during read");
1730 }
1731
1732 if (d->bd_hbuf) {
1733 /* Free the hold buffer. */
1734 d->bd_fbuf = d->bd_hbuf;
1735 d->bd_hbuf = NULL;
1736 }
1737 d->bd_slen = 0;
1738 d->bd_hlen = 0;
1739 d->bd_scnt = 0;
1740 d->bd_hcnt = 0;
1741 d->bd_rcount = 0;
1742 d->bd_dcount = 0;
1743 d->bd_fcount = 0;
1744 d->bd_wcount = 0;
1745 d->bd_wdcount = 0;
1746
1747 d->bd_prev_slen = 0;
1748}
1749
1750static struct bpf_d *
1751bpf_get_device_from_uuid(uuid_t uuid)
1752{
1753 unsigned int i;
1754
1755 for (i = 0; i < nbpfilter; i++) {
1756 struct bpf_d *d = bpf_dtab[i];
1757
1758 if (d == NULL || d == BPF_DEV_RESERVED ||
1759 (d->bd_flags & BPF_CLOSING) != 0) {
1760 continue;
1761 }
1762 if (uuid_compare(uu1: uuid, uu2: d->bd_uuid) == 0) {
1763 return d;
1764 }
1765 }
1766
1767 return NULL;
1768}
1769
1770/*
1771 * The BIOCSETUP command "atomically" attach to the interface and
1772 * copy the buffer from another interface. This minimizes the risk
1773 * of missing packet because this is done while holding
1774 * the BPF global lock
1775 */
1776static int
1777bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1778{
1779 struct bpf_d *d_from;
1780 int error = 0;
1781
1782 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1783
1784 /*
1785 * Sanity checks
1786 */
1787 d_from = bpf_get_device_from_uuid(uuid: uuid_from);
1788 if (d_from == NULL) {
1789 error = ENOENT;
1790 os_log_error(OS_LOG_DEFAULT,
1791 "%s: uuids not found error %d",
1792 __func__, error);
1793 return error;
1794 }
1795 if (d_from->bd_opened_by != d_to->bd_opened_by) {
1796 error = EACCES;
1797 os_log_error(OS_LOG_DEFAULT,
1798 "%s: processes not matching error %d",
1799 __func__, error);
1800 return error;
1801 }
1802
1803 /*
1804 * Prevent any read or write while copying
1805 */
1806 while (d_to->bd_hbuf_read || d_to->bd_hbuf_write) {
1807 msleep(chan: (caddr_t)d_to, mtx: bpf_mlock, PRINET, wmesg: __func__, NULL);
1808 }
1809 d_to->bd_hbuf_read = true;
1810 d_to->bd_hbuf_write = true;
1811
1812 while (d_from->bd_hbuf_read || d_from->bd_hbuf_write) {
1813 msleep(chan: (caddr_t)d_from, mtx: bpf_mlock, PRINET, wmesg: __func__, NULL);
1814 }
1815 d_from->bd_hbuf_read = true;
1816 d_from->bd_hbuf_write = true;
1817
1818 /*
1819 * Verify the devices have not been closed
1820 */
1821 if (d_to->bd_flags & BPF_CLOSING) {
1822 error = ENXIO;
1823 os_log_error(OS_LOG_DEFAULT,
1824 "%s: d_to is closing error %d",
1825 __func__, error);
1826 goto done;
1827 }
1828 if (d_from->bd_flags & BPF_CLOSING) {
1829 error = ENXIO;
1830 os_log_error(OS_LOG_DEFAULT,
1831 "%s: d_from is closing error %d",
1832 __func__, error);
1833 goto done;
1834 }
1835
1836 /*
1837 * For now require the same buffer size
1838 */
1839 if (d_from->bd_bufsize != d_to->bd_bufsize) {
1840 error = EINVAL;
1841 os_log_error(OS_LOG_DEFAULT,
1842 "%s: bufsizes not matching error %d",
1843 __func__, error);
1844 goto done;
1845 }
1846
1847 /*
1848 * Copy relevant options and flags
1849 */
1850 d_to->bd_flags = d_from->bd_flags & (BPF_EXTENDED_HDR | BPF_WANT_PKTAP |
1851 BPF_FINALIZE_PKTAP | BPF_TRUNCATE | BPF_PKTHDRV2 |
1852 BPF_COMP_REQ | BPF_COMP_ENABLED);
1853
1854 d_to->bd_headdrop = d_from->bd_headdrop;
1855
1856 /*
1857 * Allocate and copy the buffers
1858 */
1859 error = bpf_allocbufs(d_to);
1860 if (error != 0) {
1861 goto done;
1862 }
1863
1864 /*
1865 * Make sure the buffers are setup as expected by bpf_setif()
1866 */
1867 ASSERT(d_to->bd_hbuf == NULL);
1868 ASSERT(d_to->bd_sbuf != NULL);
1869 ASSERT(d_to->bd_fbuf != NULL);
1870
1871 /*
1872 * Copy the buffers and update the pointers and counts
1873 */
1874 memcpy(dst: d_to->bd_sbuf, src: d_from->bd_sbuf, n: d_from->bd_slen);
1875 d_to->bd_slen = d_from->bd_slen;
1876 d_to->bd_scnt = d_from->bd_scnt;
1877
1878 if (d_from->bd_hbuf != NULL) {
1879 d_to->bd_hbuf = d_to->bd_fbuf;
1880 d_to->bd_fbuf = NULL;
1881 memcpy(dst: d_to->bd_hbuf, src: d_from->bd_hbuf, n: d_from->bd_hlen);
1882 }
1883 d_to->bd_hlen = d_from->bd_hlen;
1884 d_to->bd_hcnt = d_from->bd_hcnt;
1885
1886 if (d_to->bd_flags & BPF_COMP_REQ) {
1887 ASSERT(d_to->bd_prev_sbuf != NULL);
1888 ASSERT(d_to->bd_prev_fbuf != NULL);
1889
1890 d_to->bd_prev_slen = d_from->bd_prev_slen;
1891 ASSERT(d_to->bd_prev_slen <= BPF_HDR_COMP_LEN_MAX);
1892 memcpy(dst: d_to->bd_prev_sbuf, src: d_from->bd_prev_sbuf, BPF_HDR_COMP_LEN_MAX);
1893 }
1894
1895 d_to->bd_bcs = d_from->bd_bcs;
1896
1897 /*
1898 * Attach to the interface:
1899 * - don't reset the buffers
1900 * - we already prevent reads and writes
1901 * - the buffers are already allocated
1902 */
1903 error = bpf_setif(d_to, ifp, false, true, true);
1904 if (error != 0) {
1905 os_log_error(OS_LOG_DEFAULT,
1906 "%s: bpf_setif() failed error %d",
1907 __func__, error);
1908 goto done;
1909 }
1910done:
1911 d_from->bd_hbuf_read = false;
1912 d_from->bd_hbuf_write = false;
1913 wakeup(chan: (caddr_t)d_from);
1914
1915 d_to->bd_hbuf_read = false;
1916 d_to->bd_hbuf_write = false;
1917 wakeup(chan: (caddr_t)d_to);
1918
1919 return error;
1920}
1921
1922#if DEVELOPMENT || DEBUG
1923#define BPF_IOC_LIST \
1924 X(FIONREAD) \
1925 X(SIOCGIFADDR) \
1926 X(BIOCGBLEN) \
1927 X(BIOCSBLEN) \
1928 X(BIOCSETF32) \
1929 X(BIOCSETFNR32) \
1930 X(BIOCSETF64) \
1931 X(BIOCSETFNR64) \
1932 X(BIOCFLUSH) \
1933 X(BIOCPROMISC) \
1934 X(BIOCGDLT) \
1935 X(BIOCGDLTLIST) \
1936 X(BIOCSDLT) \
1937 X(BIOCGETIF) \
1938 X(BIOCSETIF) \
1939 X(BIOCSRTIMEOUT32) \
1940 X(BIOCSRTIMEOUT64) \
1941 X(BIOCGRTIMEOUT32) \
1942 X(BIOCGRTIMEOUT64) \
1943 X(BIOCGSTATS) \
1944 X(BIOCIMMEDIATE) \
1945 X(BIOCVERSION) \
1946 X(BIOCGHDRCMPLT) \
1947 X(BIOCSHDRCMPLT) \
1948 X(BIOCGSEESENT) \
1949 X(BIOCSSEESENT) \
1950 X(BIOCSETTC) \
1951 X(BIOCGETTC) \
1952 X(FIONBIO) \
1953 X(FIOASYNC) \
1954 X(BIOCSRSIG) \
1955 X(BIOCGRSIG) \
1956 X(BIOCSEXTHDR) \
1957 X(BIOCGIFATTACHCOUNT) \
1958 X(BIOCGWANTPKTAP) \
1959 X(BIOCSWANTPKTAP) \
1960 X(BIOCSHEADDROP) \
1961 X(BIOCGHEADDROP) \
1962 X(BIOCSTRUNCATE) \
1963 X(BIOCGETUUID) \
1964 X(BIOCSETUP) \
1965 X(BIOCSPKTHDRV2) \
1966 X(BIOCGHDRCOMP) \
1967 X(BIOCSHDRCOMP) \
1968 X(BIOCGHDRCOMPSTATS) \
1969 X(BIOCGHDRCOMPON) \
1970 X(BIOCGDIRECTION) \
1971 X(BIOCSDIRECTION) \
1972 X(BIOCSWRITEMAX) \
1973 X(BIOCGWRITEMAX) \
1974 X(BIOCGBATCHWRITE) \
1975 X(BIOCSBATCHWRITE)
1976
1977static void
1978log_bpf_ioctl_str(struct bpf_d *d, u_long cmd)
1979{
1980 const char *p = NULL;
1981 char str[32];
1982
1983#define X(x) case x: { p = #x ; printf("%s\n", p); break; }
1984 switch (cmd) {
1985 BPF_IOC_LIST
1986 }
1987#undef X
1988 if (p == NULL) {
1989 snprintf(str, sizeof(str), "0x%08x", (unsigned int)cmd);
1990 p = str;
1991 }
1992 os_log(OS_LOG_DEFAULT, "bpfioctl bpf%u %s",
1993 d->bd_dev_minor, p);
1994}
1995#endif /* DEVELOPMENT || DEBUG */
1996
1997/*
1998 * FIONREAD Check for read packet available.
1999 * SIOCGIFADDR Get interface address - convenient hook to driver.
2000 * BIOCGBLEN Get buffer len [for read()].
2001 * BIOCSETF Set ethernet read filter.
2002 * BIOCFLUSH Flush read packet buffer.
2003 * BIOCPROMISC Put interface into promiscuous mode.
2004 * BIOCGDLT Get link layer type.
2005 * BIOCGETIF Get interface name.
2006 * BIOCSETIF Set interface.
2007 * BIOCSRTIMEOUT Set read timeout.
2008 * BIOCGRTIMEOUT Get read timeout.
2009 * BIOCGSTATS Get packet stats.
2010 * BIOCIMMEDIATE Set immediate mode.
2011 * BIOCVERSION Get filter language version.
2012 * BIOCGHDRCMPLT Get "header already complete" flag
2013 * BIOCSHDRCMPLT Set "header already complete" flag
2014 * BIOCGSEESENT Get "see packets sent" flag
2015 * BIOCSSEESENT Set "see packets sent" flag
2016 * BIOCSETTC Set traffic class.
2017 * BIOCGETTC Get traffic class.
2018 * BIOCSEXTHDR Set "extended header" flag
2019 * BIOCSHEADDROP Drop head of the buffer if user is not reading
2020 * BIOCGHEADDROP Get "head-drop" flag
2021 */
2022/* ARGSUSED */
2023int
2024bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
2025 struct proc *p)
2026{
2027 struct bpf_d *d;
2028 int error = 0;
2029 u_int int_arg;
2030 struct ifreq ifr = {};
2031
2032 lck_mtx_lock(lck: bpf_mlock);
2033
2034 d = bpf_dtab[minor(dev)];
2035 if (d == NULL || d == BPF_DEV_RESERVED ||
2036 (d->bd_flags & BPF_CLOSING) != 0) {
2037 lck_mtx_unlock(lck: bpf_mlock);
2038 return ENXIO;
2039 }
2040
2041 bpf_acquire_d(d);
2042
2043 if (d->bd_state == BPF_WAITING) {
2044 bpf_stop_timer(d);
2045 }
2046 d->bd_state = BPF_IDLE;
2047
2048#if DEVELOPMENT || DEBUG
2049 if (bpf_debug > 0) {
2050 log_bpf_ioctl_str(d, cmd);
2051 }
2052#endif /* DEVELOPMENT || DEBUG */
2053
2054 switch (cmd) {
2055 default:
2056 error = EINVAL;
2057 break;
2058
2059 /*
2060 * Check for read packet available.
2061 */
2062 case FIONREAD: /* int */
2063 {
2064 int n;
2065
2066 n = d->bd_slen;
2067 if (d->bd_hbuf && d->bd_hbuf_read) {
2068 n += d->bd_hlen;
2069 }
2070
2071 bcopy(src: &n, dst: addr, n: sizeof(n));
2072 break;
2073 }
2074
2075 case SIOCGIFADDR: /* struct ifreq */
2076 {
2077 struct ifnet *ifp;
2078
2079 if (d->bd_bif == 0) {
2080 error = EINVAL;
2081 } else {
2082 ifp = d->bd_bif->bif_ifp;
2083 error = ifnet_ioctl(interface: ifp, protocol: 0, ioctl_code: cmd, ioctl_arg: addr);
2084 }
2085 break;
2086 }
2087
2088 /*
2089 * Get buffer len [for read()].
2090 */
2091 case BIOCGBLEN: /* u_int */
2092 bcopy(src: &d->bd_bufsize, dst: addr, n: sizeof(u_int));
2093 break;
2094
2095 /*
2096 * Set buffer length.
2097 */
2098 case BIOCSBLEN: { /* u_int */
2099 u_int size;
2100
2101 if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
2102 /*
2103 * Interface already attached, unable to change buffers
2104 */
2105 error = EINVAL;
2106 break;
2107 }
2108 bcopy(src: addr, dst: &size, n: sizeof(size));
2109
2110 if (size > BPF_BUFSIZE_CAP) {
2111 d->bd_bufsize = BPF_BUFSIZE_CAP;
2112
2113 os_log_info(OS_LOG_DEFAULT,
2114 "bpf%d BIOCSBLEN capped to %u from %u",
2115 minor(dev), d->bd_bufsize, size);
2116 } else if (size < BPF_MINBUFSIZE) {
2117 d->bd_bufsize = BPF_MINBUFSIZE;
2118
2119 os_log_info(OS_LOG_DEFAULT,
2120 "bpf%d BIOCSBLEN bumped to %u from %u",
2121 minor(dev), d->bd_bufsize, size);
2122 } else {
2123 d->bd_bufsize = size;
2124
2125 os_log_info(OS_LOG_DEFAULT,
2126 "bpf%d BIOCSBLEN %u",
2127 minor(dev), d->bd_bufsize);
2128 }
2129
2130 /* It's a read/write ioctl */
2131 bcopy(src: &d->bd_bufsize, dst: addr, n: sizeof(u_int));
2132 break;
2133 }
2134 /*
2135 * Set link layer read filter.
2136 */
2137 case BIOCSETF32:
2138 case BIOCSETFNR32: { /* struct bpf_program32 */
2139 struct bpf_program32 prg32;
2140
2141 bcopy(src: addr, dst: &prg32, n: sizeof(prg32));
2142 error = bpf_setf(d, prg32.bf_len,
2143 CAST_USER_ADDR_T(prg32.bf_insns), cmd);
2144 break;
2145 }
2146
2147 case BIOCSETF64:
2148 case BIOCSETFNR64: { /* struct bpf_program64 */
2149 struct bpf_program64 prg64;
2150
2151 bcopy(src: addr, dst: &prg64, n: sizeof(prg64));
2152 error = bpf_setf(d, prg64.bf_len, CAST_USER_ADDR_T(prg64.bf_insns), cmd);
2153 break;
2154 }
2155
2156 /*
2157 * Flush read packet buffer.
2158 */
2159 case BIOCFLUSH:
2160 while (d->bd_hbuf_read) {
2161 msleep(chan: (caddr_t)d, mtx: bpf_mlock, PRINET, wmesg: "BIOCFLUSH",
2162 NULL);
2163 }
2164 if ((d->bd_flags & BPF_CLOSING) != 0) {
2165 error = ENXIO;
2166 break;
2167 }
2168 reset_d(d);
2169 break;
2170
2171 /*
2172 * Put interface into promiscuous mode.
2173 */
2174 case BIOCPROMISC:
2175 if (d->bd_bif == 0) {
2176 /*
2177 * No interface attached yet.
2178 */
2179 error = EINVAL;
2180 break;
2181 }
2182 if (d->bd_promisc == 0) {
2183 lck_mtx_unlock(lck: bpf_mlock);
2184 error = ifnet_set_promiscuous(interface: d->bd_bif->bif_ifp, on: 1);
2185 lck_mtx_lock(lck: bpf_mlock);
2186 if (error == 0) {
2187 d->bd_promisc = 1;
2188 }
2189 }
2190 break;
2191
2192 /*
2193 * Get device parameters.
2194 */
2195 case BIOCGDLT: /* u_int */
2196 if (d->bd_bif == 0) {
2197 error = EINVAL;
2198 } else {
2199 bcopy(src: &d->bd_bif->bif_dlt, dst: addr, n: sizeof(u_int));
2200 }
2201 break;
2202
2203 /*
2204 * Get a list of supported data link types.
2205 */
2206 case BIOCGDLTLIST: /* struct bpf_dltlist */
2207 if (d->bd_bif == NULL) {
2208 error = EINVAL;
2209 } else {
2210 error = bpf_getdltlist(d, addr, p);
2211 }
2212 break;
2213
2214 /*
2215 * Set data link type.
2216 */
2217 case BIOCSDLT: /* u_int */
2218 if (d->bd_bif == NULL) {
2219 error = EINVAL;
2220 } else {
2221 u_int dlt;
2222
2223 bcopy(src: addr, dst: &dlt, n: sizeof(dlt));
2224
2225 if (dlt == DLT_PKTAP &&
2226 !(d->bd_flags & BPF_WANT_PKTAP)) {
2227 dlt = DLT_RAW;
2228 }
2229 error = bpf_setdlt(d, dlt);
2230 }
2231 break;
2232
2233 /*
2234 * Get interface name.
2235 */
2236 case BIOCGETIF: /* struct ifreq */
2237 if (d->bd_bif == 0) {
2238 error = EINVAL;
2239 } else {
2240 struct ifnet *const ifp = d->bd_bif->bif_ifp;
2241
2242 snprintf(((struct ifreq *)(void *)addr)->ifr_name,
2243 count: sizeof(ifr.ifr_name), "%s", if_name(ifp));
2244 }
2245 break;
2246
2247 /*
2248 * Set interface.
2249 */
2250 case BIOCSETIF: { /* struct ifreq */
2251 ifnet_t ifp;
2252
2253 bcopy(src: addr, dst: &ifr, n: sizeof(ifr));
2254 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2255 ifp = ifunit(ifr.ifr_name);
2256 if (ifp == NULL) {
2257 error = ENXIO;
2258 } else {
2259 error = bpf_setif(d, ifp, true, false, false);
2260 }
2261 break;
2262 }
2263
2264 /*
2265 * Set read timeout.
2266 */
2267 case BIOCSRTIMEOUT32: { /* struct user32_timeval */
2268 struct user32_timeval _tv;
2269 struct timeval tv;
2270
2271 bcopy(src: addr, dst: &_tv, n: sizeof(_tv));
2272 tv.tv_sec = _tv.tv_sec;
2273 tv.tv_usec = _tv.tv_usec;
2274
2275 /*
2276 * Subtract 1 tick from tvtohz() since this isn't
2277 * a one-shot timer.
2278 */
2279 if ((error = itimerfix(tv: &tv)) == 0) {
2280 d->bd_rtout = tvtohz(&tv) - 1;
2281 }
2282 break;
2283 }
2284
2285 case BIOCSRTIMEOUT64: { /* struct user64_timeval */
2286 struct user64_timeval _tv;
2287 struct timeval tv;
2288
2289 bcopy(src: addr, dst: &_tv, n: sizeof(_tv));
2290 tv.tv_sec = (__darwin_time_t)_tv.tv_sec;
2291 tv.tv_usec = _tv.tv_usec;
2292
2293 /*
2294 * Subtract 1 tick from tvtohz() since this isn't
2295 * a one-shot timer.
2296 */
2297 if ((error = itimerfix(tv: &tv)) == 0) {
2298 d->bd_rtout = tvtohz(&tv) - 1;
2299 }
2300 break;
2301 }
2302
2303 /*
2304 * Get read timeout.
2305 */
2306 case BIOCGRTIMEOUT32: { /* struct user32_timeval */
2307 struct user32_timeval tv;
2308
2309 bzero(s: &tv, n: sizeof(tv));
2310 tv.tv_sec = d->bd_rtout / hz;
2311 tv.tv_usec = (d->bd_rtout % hz) * tick;
2312 bcopy(src: &tv, dst: addr, n: sizeof(tv));
2313 break;
2314 }
2315
2316 case BIOCGRTIMEOUT64: { /* struct user64_timeval */
2317 struct user64_timeval tv;
2318
2319 bzero(s: &tv, n: sizeof(tv));
2320 tv.tv_sec = d->bd_rtout / hz;
2321 tv.tv_usec = (d->bd_rtout % hz) * tick;
2322 bcopy(src: &tv, dst: addr, n: sizeof(tv));
2323 break;
2324 }
2325
2326 /*
2327 * Get packet stats.
2328 */
2329 case BIOCGSTATS: { /* struct bpf_stat */
2330 struct bpf_stat bs;
2331
2332 bzero(s: &bs, n: sizeof(bs));
2333 bs.bs_recv = (u_int)d->bd_rcount;
2334 bs.bs_drop = (u_int)d->bd_dcount;
2335 bcopy(src: &bs, dst: addr, n: sizeof(bs));
2336 break;
2337 }
2338
2339 /*
2340 * Set immediate mode.
2341 */
2342 case BIOCIMMEDIATE: /* u_int */
2343 d->bd_immediate = *(u_char *)(void *)addr;
2344 break;
2345
2346 case BIOCVERSION: { /* struct bpf_version */
2347 struct bpf_version bv;
2348
2349 bzero(s: &bv, n: sizeof(bv));
2350 bv.bv_major = BPF_MAJOR_VERSION;
2351 bv.bv_minor = BPF_MINOR_VERSION;
2352 bcopy(src: &bv, dst: addr, n: sizeof(bv));
2353 break;
2354 }
2355
2356 /*
2357 * Get "header already complete" flag
2358 */
2359 case BIOCGHDRCMPLT: /* u_int */
2360 bcopy(src: &d->bd_hdrcmplt, dst: addr, n: sizeof(u_int));
2361 break;
2362
2363 /*
2364 * Set "header already complete" flag
2365 */
2366 case BIOCSHDRCMPLT: /* u_int */
2367 bcopy(src: addr, dst: &int_arg, n: sizeof(int_arg));
2368 if (int_arg == 0 && (d->bd_flags & BPF_BATCH_WRITE)) {
2369 os_log(OS_LOG_DEFAULT,
2370 "bpf%u cannot set BIOCSHDRCMPLT when BIOCSBATCHWRITE is set",
2371 d->bd_dev_minor);
2372 error = EINVAL;
2373 break;
2374 }
2375 d->bd_hdrcmplt = int_arg ? 1 : 0;
2376 break;
2377
2378 /*
2379 * Get "see sent packets" flag
2380 */
2381 case BIOCGSEESENT: { /* u_int */
2382 int_arg = 0;
2383
2384 if (d->bd_direction & BPF_D_OUT) {
2385 int_arg = 1;
2386 }
2387 bcopy(src: &int_arg, dst: addr, n: sizeof(u_int));
2388 break;
2389 }
2390 /*
2391 * Set "see sent packets" flag
2392 */
2393 case BIOCSSEESENT: { /* u_int */
2394 bcopy(src: addr, dst: &int_arg, n: sizeof(u_int));
2395
2396 if (int_arg == 0) {
2397 d->bd_direction = BPF_D_IN;
2398 } else {
2399 d->bd_direction = BPF_D_INOUT;
2400 }
2401 break;
2402 }
2403 /*
2404 * Get direction of tapped packets that can be seen for reading
2405 */
2406 case BIOCGDIRECTION: { /* u_int */
2407 int_arg = d->bd_direction;
2408
2409 bcopy(src: &int_arg, dst: addr, n: sizeof(u_int));
2410 break;
2411 }
2412 /*
2413 * Set direction of tapped packets that can be seen for reading
2414 */
2415 case BIOCSDIRECTION: { /* u_int */
2416 bcopy(src: addr, dst: &int_arg, n: sizeof(u_int));
2417
2418 switch (int_arg) {
2419 case BPF_D_NONE:
2420 case BPF_D_IN:
2421 case BPF_D_OUT:
2422 case BPF_D_INOUT:
2423 d->bd_direction = int_arg;
2424 break;
2425 default:
2426 error = EINVAL;
2427 break;
2428 }
2429 break;
2430 }
2431 /*
2432 * Set traffic service class
2433 */
2434 case BIOCSETTC: { /* int */
2435 int tc;
2436
2437 bcopy(src: addr, dst: &tc, n: sizeof(int));
2438 if (tc != 0 && (d->bd_flags & BPF_BATCH_WRITE)) {
2439 os_log(OS_LOG_DEFAULT,
2440 "bpf%u cannot set BIOCSETTC when BIOCSBATCHWRITE is set",
2441 d->bd_dev_minor);
2442 error = EINVAL;
2443 break;
2444 }
2445 error = bpf_set_traffic_class(d, tc);
2446 break;
2447 }
2448
2449 /*
2450 * Get traffic service class
2451 */
2452 case BIOCGETTC: /* int */
2453 bcopy(src: &d->bd_traffic_class, dst: addr, n: sizeof(int));
2454 break;
2455
2456 case FIONBIO: /* Non-blocking I/O; int */
2457 break;
2458
2459 case FIOASYNC: /* Send signal on receive packets; int */
2460 bcopy(src: addr, dst: &d->bd_async, n: sizeof(int));
2461 break;
2462
2463 case BIOCSRSIG: { /* Set receive signal; u_int */
2464 u_int sig;
2465
2466 bcopy(src: addr, dst: &sig, n: sizeof(u_int));
2467
2468 if (sig >= NSIG) {
2469 error = EINVAL;
2470 } else {
2471 d->bd_sig = sig;
2472 }
2473 break;
2474 }
2475 case BIOCGRSIG: /* u_int */
2476 bcopy(src: &d->bd_sig, dst: addr, n: sizeof(u_int));
2477 break;
2478
2479 case BIOCSEXTHDR: /* u_int */
2480 bcopy(src: addr, dst: &int_arg, n: sizeof(int_arg));
2481 if (int_arg) {
2482 d->bd_flags |= BPF_EXTENDED_HDR;
2483 } else {
2484 d->bd_flags &= ~BPF_EXTENDED_HDR;
2485 }
2486 break;
2487
2488 case BIOCGIFATTACHCOUNT: { /* struct ifreq */
2489 ifnet_t ifp;
2490 struct bpf_if *bp;
2491
2492 bcopy(src: addr, dst: &ifr, n: sizeof(ifr));
2493 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2494 ifp = ifunit(ifr.ifr_name);
2495 if (ifp == NULL) {
2496 error = ENXIO;
2497 break;
2498 }
2499 ifr.ifr_intval = 0;
2500 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2501 struct bpf_d *bpf_d;
2502
2503 if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) {
2504 continue;
2505 }
2506 for (bpf_d = bp->bif_dlist; bpf_d;
2507 bpf_d = bpf_d->bd_next) {
2508 ifr.ifr_intval += 1;
2509 }
2510 }
2511 bcopy(src: &ifr, dst: addr, n: sizeof(ifr));
2512 break;
2513 }
2514 case BIOCGWANTPKTAP: /* u_int */
2515 int_arg = d->bd_flags & BPF_WANT_PKTAP ? 1 : 0;
2516 bcopy(src: &int_arg, dst: addr, n: sizeof(int_arg));
2517 break;
2518
2519 case BIOCSWANTPKTAP: /* u_int */
2520 bcopy(src: addr, dst: &int_arg, n: sizeof(int_arg));
2521 if (int_arg) {
2522 d->bd_flags |= BPF_WANT_PKTAP;
2523 } else {
2524 d->bd_flags &= ~BPF_WANT_PKTAP;
2525 }
2526 break;
2527
2528 case BIOCSHEADDROP:
2529 bcopy(src: addr, dst: &int_arg, n: sizeof(int_arg));
2530 d->bd_headdrop = int_arg ? 1 : 0;
2531 break;
2532
2533 case BIOCGHEADDROP:
2534 bcopy(src: &d->bd_headdrop, dst: addr, n: sizeof(int));
2535 break;
2536
2537 case BIOCSTRUNCATE:
2538 bcopy(src: addr, dst: &int_arg, n: sizeof(int_arg));
2539 if (int_arg) {
2540 d->bd_flags |= BPF_TRUNCATE;
2541 } else {
2542 d->bd_flags &= ~BPF_TRUNCATE;
2543 }
2544 break;
2545
2546 case BIOCGETUUID:
2547 bcopy(src: &d->bd_uuid, dst: addr, n: sizeof(uuid_t));
2548 break;
2549
2550 case BIOCSETUP: {
2551 struct bpf_setup_args bsa;
2552 ifnet_t ifp;
2553
2554 bcopy(src: addr, dst: &bsa, n: sizeof(struct bpf_setup_args));
2555 bsa.bsa_ifname[IFNAMSIZ - 1] = 0;
2556 ifp = ifunit(bsa.bsa_ifname);
2557 if (ifp == NULL) {
2558 error = ENXIO;
2559 os_log_error(OS_LOG_DEFAULT,
2560 "%s: ifnet not found for %s error %d",
2561 __func__, bsa.bsa_ifname, error);
2562 break;
2563 }
2564
2565 error = bpf_setup(d_to: d, uuid_from: bsa.bsa_uuid, ifp);
2566 break;
2567 }
2568 case BIOCSPKTHDRV2:
2569 bcopy(src: addr, dst: &int_arg, n: sizeof(int_arg));
2570 if (int_arg != 0) {
2571 d->bd_flags |= BPF_PKTHDRV2;
2572 } else {
2573 d->bd_flags &= ~BPF_PKTHDRV2;
2574 }
2575 break;
2576
2577 case BIOCGPKTHDRV2:
2578 int_arg = d->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
2579 bcopy(src: &int_arg, dst: addr, n: sizeof(int_arg));
2580 break;
2581
2582 case BIOCGHDRCOMP:
2583 int_arg = d->bd_flags & BPF_COMP_REQ ? 1 : 0;
2584 bcopy(src: &int_arg, dst: addr, n: sizeof(int_arg));
2585 break;
2586
2587 case BIOCSHDRCOMP:
2588 bcopy(src: addr, dst: &int_arg, n: sizeof(int_arg));
2589 if (int_arg != 0 && int_arg != 1) {
2590 return EINVAL;
2591 }
2592 if (d->bd_bif != 0 || (d->bd_flags & BPF_DETACHING)) {
2593 /*
2594 * Interface already attached, unable to change buffers
2595 */
2596 error = EINVAL;
2597 break;
2598 }
2599 if (int_arg != 0) {
2600 d->bd_flags |= BPF_COMP_REQ;
2601 if (bpf_hdr_comp_enable != 0) {
2602 d->bd_flags |= BPF_COMP_ENABLED;
2603 }
2604 } else {
2605 d->bd_flags &= ~(BPF_COMP_REQ | BPF_COMP_ENABLED);
2606 }
2607 break;
2608
2609 case BIOCGHDRCOMPON:
2610 int_arg = d->bd_flags & BPF_COMP_ENABLED ? 1 : 0;
2611 bcopy(src: &int_arg, dst: addr, n: sizeof(int_arg));
2612 break;
2613
2614 case BIOCGHDRCOMPSTATS: {
2615 struct bpf_comp_stats bcs = {};
2616
2617 bcs = d->bd_bcs;
2618
2619 bcopy(src: &bcs, dst: addr, n: sizeof(bcs));
2620 break;
2621 }
2622 case BIOCSWRITEMAX:
2623 bcopy(src: addr, dst: &int_arg, n: sizeof(int_arg));
2624 if (int_arg > BPF_WRITE_MAX) {
2625 os_log(OS_LOG_DEFAULT, "bpf%u bd_write_size_max %u too big",
2626 d->bd_dev_minor, d->bd_write_size_max);
2627 error = EINVAL;
2628 break;
2629 }
2630 d->bd_write_size_max = int_arg;
2631 break;
2632
2633 case BIOCGWRITEMAX:
2634 int_arg = d->bd_write_size_max;
2635 bcopy(src: &int_arg, dst: addr, n: sizeof(int_arg));
2636 break;
2637
2638 case BIOCGBATCHWRITE: /* int */
2639 int_arg = d->bd_flags & BPF_BATCH_WRITE ? 1 : 0;
2640 bcopy(src: &int_arg, dst: addr, n: sizeof(int_arg));
2641 break;
2642
2643 case BIOCSBATCHWRITE: /* int */
2644 bcopy(src: addr, dst: &int_arg, n: sizeof(int_arg));
2645 if (int_arg != 0) {
2646 if (d->bd_hdrcmplt == 0) {
2647 os_log(OS_LOG_DEFAULT,
2648 "bpf%u cannot set BIOCSBATCHWRITE when BIOCSHDRCMPLT is not set",
2649 d->bd_dev_minor);
2650 error = EINVAL;
2651 break;
2652 }
2653 if (d->bd_traffic_class != 0) {
2654 os_log(OS_LOG_DEFAULT,
2655 "bpf%u cannot set BIOCSBATCHWRITE when BIOCSETTC is set",
2656 d->bd_dev_minor);
2657 error = EINVAL;
2658 break;
2659 }
2660 d->bd_flags |= BPF_BATCH_WRITE;
2661 } else {
2662 d->bd_flags &= ~BPF_BATCH_WRITE;
2663 }
2664 break;
2665 }
2666
2667 bpf_release_d(d);
2668 lck_mtx_unlock(lck: bpf_mlock);
2669
2670 return error;
2671}
2672
2673/*
2674 * Set d's packet filter program to fp. If this file already has a filter,
2675 * free it and replace it. Returns EINVAL for bogus requests.
2676 */
2677static int
2678bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2679 u_long cmd)
2680{
2681 struct bpf_insn *fcode, *old;
2682 u_int flen, size;
2683
2684 while (d->bd_hbuf_read) {
2685 msleep(chan: (caddr_t)d, mtx: bpf_mlock, PRINET, wmesg: "bpf_setf", NULL);
2686 }
2687
2688 if ((d->bd_flags & BPF_CLOSING) != 0) {
2689 return ENXIO;
2690 }
2691
2692 old = d->bd_filter;
2693 if (bf_insns == USER_ADDR_NULL) {
2694 if (bf_len != 0) {
2695 return EINVAL;
2696 }
2697 d->bd_filter = NULL;
2698 reset_d(d);
2699 if (old != 0) {
2700 kfree_data_addr(old);
2701 }
2702 return 0;
2703 }
2704 flen = bf_len;
2705 if (flen > BPF_MAXINSNS) {
2706 return EINVAL;
2707 }
2708
2709 size = flen * sizeof(struct bpf_insn);
2710 fcode = (struct bpf_insn *) kalloc_data(size, Z_WAITOK | Z_ZERO);
2711 if (fcode == NULL) {
2712 return ENOMEM;
2713 }
2714 if (copyin(bf_insns, (caddr_t)fcode, size) == 0 &&
2715 bpf_validate(fcode, (int)flen)) {
2716 d->bd_filter = fcode;
2717
2718 if (cmd == BIOCSETF32 || cmd == BIOCSETF64) {
2719 reset_d(d);
2720 }
2721
2722 if (old != 0) {
2723 kfree_data_addr(old);
2724 }
2725
2726 return 0;
2727 }
2728 kfree_data(fcode, size);
2729 return EINVAL;
2730}
2731
2732/*
2733 * Detach a file from its current interface (if attached at all) and attach
2734 * to the interface indicated by the name stored in ifr.
2735 * Return an errno or 0.
2736 */
2737static int
2738bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read_write,
2739 bool has_bufs_allocated)
2740{
2741 struct bpf_if *bp;
2742 int error;
2743
2744 while (!has_hbuf_read_write && (d->bd_hbuf_read || d->bd_hbuf_write)) {
2745 msleep(chan: (caddr_t)d, mtx: bpf_mlock, PRINET, wmesg: "bpf_setif", NULL);
2746 }
2747
2748 if ((d->bd_flags & BPF_CLOSING) != 0) {
2749 return ENXIO;
2750 }
2751
2752 /*
2753 * Look through attached interfaces for the named one.
2754 */
2755 for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
2756 struct ifnet *ifp = bp->bif_ifp;
2757
2758 if (ifp == 0 || ifp != theywant) {
2759 continue;
2760 }
2761 /*
2762 * Do not use DLT_PKTAP, unless requested explicitly
2763 */
2764 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2765 continue;
2766 }
2767 /*
2768 * Skip the coprocessor interface
2769 */
2770 if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp)) {
2771 continue;
2772 }
2773 /*
2774 * We found the requested interface.
2775 * Allocate the packet buffers.
2776 */
2777 if (has_bufs_allocated == false) {
2778 error = bpf_allocbufs(d);
2779 if (error != 0) {
2780 return error;
2781 }
2782 }
2783 /*
2784 * Detach if attached to something else.
2785 */
2786 if (bp != d->bd_bif) {
2787 if (d->bd_bif != NULL) {
2788 if (bpf_detachd(d) != 0) {
2789 return ENXIO;
2790 }
2791 }
2792 if (bpf_attachd(d, bp) != 0) {
2793 return ENXIO;
2794 }
2795 }
2796 if (do_reset) {
2797 reset_d(d);
2798 }
2799 os_log(OS_LOG_DEFAULT, "bpf%u attached to %s",
2800 d->bd_dev_minor, if_name(theywant));
2801 return 0;
2802 }
2803 /* Not found. */
2804 return ENXIO;
2805}
2806
2807/*
2808 * Get a list of available data link type of the interface.
2809 */
2810static int
2811bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p)
2812{
2813 u_int n;
2814 int error;
2815 struct ifnet *ifp;
2816 struct bpf_if *bp;
2817 user_addr_t dlist;
2818 struct bpf_dltlist bfl;
2819
2820 bcopy(src: addr, dst: &bfl, n: sizeof(bfl));
2821 if (proc_is64bit(p)) {
2822 dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2823 } else {
2824 dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2825 }
2826
2827 ifp = d->bd_bif->bif_ifp;
2828 n = 0;
2829 error = 0;
2830
2831 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2832 if (bp->bif_ifp != ifp) {
2833 continue;
2834 }
2835 /*
2836 * Do not use DLT_PKTAP, unless requested explicitly
2837 */
2838 if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP)) {
2839 continue;
2840 }
2841 if (dlist != USER_ADDR_NULL) {
2842 if (n >= bfl.bfl_len) {
2843 return ENOMEM;
2844 }
2845 error = copyout(&bp->bif_dlt, dlist,
2846 sizeof(bp->bif_dlt));
2847 if (error != 0) {
2848 break;
2849 }
2850 dlist += sizeof(bp->bif_dlt);
2851 }
2852 n++;
2853 }
2854 bfl.bfl_len = n;
2855 bcopy(src: &bfl, dst: addr, n: sizeof(bfl));
2856
2857 return error;
2858}
2859
2860/*
2861 * Set the data link type of a BPF instance.
2862 */
2863static int
2864bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2865{
2866 int error, opromisc;
2867 struct ifnet *ifp;
2868 struct bpf_if *bp;
2869
2870 if (d->bd_bif->bif_dlt == dlt) {
2871 return 0;
2872 }
2873
2874 while (d->bd_hbuf_read) {
2875 msleep(chan: (caddr_t)d, mtx: bpf_mlock, PRINET, wmesg: "bpf_setdlt", NULL);
2876 }
2877
2878 if ((d->bd_flags & BPF_CLOSING) != 0) {
2879 return ENXIO;
2880 }
2881
2882 ifp = d->bd_bif->bif_ifp;
2883 for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2884 if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2885 /*
2886 * Do not use DLT_PKTAP, unless requested explicitly
2887 */
2888 if (bp->bif_dlt == DLT_PKTAP &&
2889 !(d->bd_flags & BPF_WANT_PKTAP)) {
2890 continue;
2891 }
2892 break;
2893 }
2894 }
2895 if (bp != NULL) {
2896 opromisc = d->bd_promisc;
2897 if (bpf_detachd(d) != 0) {
2898 return ENXIO;
2899 }
2900 error = bpf_attachd(d, bp);
2901 if (error != 0) {
2902 os_log_error(OS_LOG_DEFAULT,
2903 "bpf_setdlt: bpf%d bpf_attachd %s error %d",
2904 d->bd_dev_minor, if_name(bp->bif_ifp),
2905 error);
2906 return error;
2907 }
2908 reset_d(d);
2909 if (opromisc) {
2910 lck_mtx_unlock(lck: bpf_mlock);
2911 error = ifnet_set_promiscuous(interface: bp->bif_ifp, on: 1);
2912 lck_mtx_lock(lck: bpf_mlock);
2913 if (error != 0) {
2914 os_log_error(OS_LOG_DEFAULT,
2915 "bpf_setdlt: bpf%d ifpromisc %s error %d",
2916 d->bd_dev_minor, if_name(bp->bif_ifp), error);
2917 } else {
2918 d->bd_promisc = 1;
2919 }
2920 }
2921 }
2922 return bp == NULL ? EINVAL : 0;
2923}
2924
2925static int
2926bpf_set_traffic_class(struct bpf_d *d, int tc)
2927{
2928 int error = 0;
2929
2930 if (!SO_VALID_TC(tc)) {
2931 error = EINVAL;
2932 } else {
2933 d->bd_traffic_class = tc;
2934 }
2935
2936 return error;
2937}
2938
2939static void
2940bpf_set_packet_service_class(struct mbuf *m, int tc)
2941{
2942 if (!(m->m_flags & M_PKTHDR)) {
2943 return;
2944 }
2945
2946 VERIFY(SO_VALID_TC(tc));
2947 (void) m_set_service_class(m, so_tc2msc(tc));
2948}
2949
2950/*
2951 * Support for select()
2952 *
2953 * Return true iff the specific operation will not block indefinitely.
2954 * Otherwise, return false but make a note that a selwakeup() must be done.
2955 */
2956int
2957bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2958{
2959 struct bpf_d *d;
2960 int ret = 0;
2961
2962 lck_mtx_lock(lck: bpf_mlock);
2963
2964 d = bpf_dtab[minor(dev)];
2965 if (d == NULL || d == BPF_DEV_RESERVED ||
2966 (d->bd_flags & BPF_CLOSING) != 0) {
2967 lck_mtx_unlock(lck: bpf_mlock);
2968 return ENXIO;
2969 }
2970
2971 bpf_acquire_d(d);
2972
2973 if (d->bd_bif == NULL) {
2974 bpf_release_d(d);
2975 lck_mtx_unlock(lck: bpf_mlock);
2976 return ENXIO;
2977 }
2978
2979 while (d->bd_hbuf_read) {
2980 msleep(chan: (caddr_t)d, mtx: bpf_mlock, PRINET, wmesg: "bpfselect", NULL);
2981 }
2982
2983 if ((d->bd_flags & BPF_CLOSING) != 0) {
2984 bpf_release_d(d);
2985 lck_mtx_unlock(lck: bpf_mlock);
2986 return ENXIO;
2987 }
2988
2989 switch (which) {
2990 case FREAD:
2991 if (d->bd_hlen != 0 ||
2992 ((d->bd_immediate ||
2993 d->bd_state == BPF_TIMED_OUT) && d->bd_slen != 0)) {
2994 ret = 1; /* read has data to return */
2995 } else {
2996 /*
2997 * Read has no data to return.
2998 * Make the select wait, and start a timer if
2999 * necessary.
3000 */
3001 selrecord(selector: p, &d->bd_sel, wql);
3002 bpf_start_timer(d);
3003 }
3004 break;
3005
3006 case FWRITE:
3007 /* can't determine whether a write would block */
3008 ret = 1;
3009 break;
3010 }
3011
3012 bpf_release_d(d);
3013 lck_mtx_unlock(lck: bpf_mlock);
3014
3015 return ret;
3016}
3017
3018/*
3019 * Support for kevent() system call. Register EVFILT_READ filters and
3020 * reject all others.
3021 */
3022int bpfkqfilter(dev_t dev, struct knote *kn);
3023static void filt_bpfdetach(struct knote *);
3024static int filt_bpfread(struct knote *, long);
3025static int filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev);
3026static int filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev);
3027
3028SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
3029 .f_isfd = 1,
3030 .f_detach = filt_bpfdetach,
3031 .f_event = filt_bpfread,
3032 .f_touch = filt_bpftouch,
3033 .f_process = filt_bpfprocess,
3034};
3035
3036static int
3037filt_bpfread_common(struct knote *kn, struct kevent_qos_s *kev, struct bpf_d *d)
3038{
3039 int ready = 0;
3040 int64_t data = 0;
3041
3042 if (d->bd_immediate) {
3043 /*
3044 * If there's data in the hold buffer, it's the
3045 * amount of data a read will return.
3046 *
3047 * If there's no data in the hold buffer, but
3048 * there's data in the store buffer, a read will
3049 * immediately rotate the store buffer to the
3050 * hold buffer, the amount of data in the store
3051 * buffer is the amount of data a read will
3052 * return.
3053 *
3054 * If there's no data in either buffer, we're not
3055 * ready to read.
3056 */
3057 data = (d->bd_hlen == 0 || d->bd_hbuf_read ?
3058 d->bd_slen : d->bd_hlen);
3059 int64_t lowwat = knote_low_watermark(kn);
3060 if (lowwat > d->bd_bufsize) {
3061 lowwat = d->bd_bufsize;
3062 }
3063 ready = (data >= lowwat);
3064 } else {
3065 /*
3066 * If there's data in the hold buffer, it's the
3067 * amount of data a read will return.
3068 *
3069 * If there's no data in the hold buffer, but
3070 * there's data in the store buffer, if the
3071 * timer has expired a read will immediately
3072 * rotate the store buffer to the hold buffer,
3073 * so the amount of data in the store buffer is
3074 * the amount of data a read will return.
3075 *
3076 * If there's no data in either buffer, or there's
3077 * no data in the hold buffer and the timer hasn't
3078 * expired, we're not ready to read.
3079 */
3080 data = ((d->bd_hlen == 0 || d->bd_hbuf_read) &&
3081 d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
3082 ready = (data > 0);
3083 }
3084 if (!ready) {
3085 bpf_start_timer(d);
3086 } else if (kev) {
3087 knote_fill_kevent(kn, kev, data);
3088 }
3089
3090 return ready;
3091}
3092
3093int
3094bpfkqfilter(dev_t dev, struct knote *kn)
3095{
3096 struct bpf_d *d;
3097 int res;
3098
3099 /*
3100 * Is this device a bpf?
3101 */
3102 if (major(dev) != CDEV_MAJOR || kn->kn_filter != EVFILT_READ) {
3103 knote_set_error(kn, EINVAL);
3104 return 0;
3105 }
3106
3107 lck_mtx_lock(lck: bpf_mlock);
3108
3109 d = bpf_dtab[minor(dev)];
3110
3111 if (d == NULL || d == BPF_DEV_RESERVED ||
3112 (d->bd_flags & BPF_CLOSING) != 0 ||
3113 d->bd_bif == NULL) {
3114 lck_mtx_unlock(lck: bpf_mlock);
3115 knote_set_error(kn, ENXIO);
3116 return 0;
3117 }
3118
3119 kn->kn_filtid = EVFILTID_BPFREAD;
3120 knote_kn_hook_set_raw(kn, kn_hook: d);
3121 KNOTE_ATTACH(&d->bd_sel.si_note, kn);
3122 d->bd_flags |= BPF_KNOTE;
3123
3124 /* capture the current state */
3125 res = filt_bpfread_common(kn, NULL, d);
3126
3127 lck_mtx_unlock(lck: bpf_mlock);
3128
3129 return res;
3130}
3131
3132static void
3133filt_bpfdetach(struct knote *kn)
3134{
3135 struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
3136
3137 lck_mtx_lock(lck: bpf_mlock);
3138 if (d->bd_flags & BPF_KNOTE) {
3139 KNOTE_DETACH(&d->bd_sel.si_note, kn);
3140 d->bd_flags &= ~BPF_KNOTE;
3141 }
3142 lck_mtx_unlock(lck: bpf_mlock);
3143}
3144
3145static int
3146filt_bpfread(struct knote *kn, long hint)
3147{
3148#pragma unused(hint)
3149 struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
3150
3151 return filt_bpfread_common(kn, NULL, d);
3152}
3153
3154static int
3155filt_bpftouch(struct knote *kn, struct kevent_qos_s *kev)
3156{
3157 struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
3158 int res;
3159
3160 lck_mtx_lock(lck: bpf_mlock);
3161
3162 /* save off the lowat threshold and flag */
3163 kn->kn_sdata = kev->data;
3164 kn->kn_sfflags = kev->fflags;
3165
3166 /* output data will be re-generated here */
3167 res = filt_bpfread_common(kn, NULL, d);
3168
3169 lck_mtx_unlock(lck: bpf_mlock);
3170
3171 return res;
3172}
3173
3174static int
3175filt_bpfprocess(struct knote *kn, struct kevent_qos_s *kev)
3176{
3177 struct bpf_d *d = (struct bpf_d *)knote_kn_hook_get_raw(kn);
3178 int res;
3179
3180 lck_mtx_lock(lck: bpf_mlock);
3181 res = filt_bpfread_common(kn, kev, d);
3182 lck_mtx_unlock(lck: bpf_mlock);
3183
3184 return res;
3185}
3186
3187/*
3188 * Copy data from an mbuf chain into a buffer. This code is derived
3189 * from m_copydata in kern/uipc_mbuf.c.
3190 */
3191static void
3192bpf_mcopy(struct mbuf *m, void *dst_arg, size_t len, size_t offset)
3193{
3194 u_int count;
3195 u_char *dst;
3196
3197 dst = dst_arg;
3198
3199 while (offset >= m->m_len) {
3200 offset -= m->m_len;
3201 m = m->m_next;
3202 if (m == NULL) {
3203 panic("bpf_mcopy");
3204 }
3205 continue;
3206 }
3207
3208 while (len > 0) {
3209 if (m == NULL) {
3210 panic("bpf_mcopy");
3211 }
3212 count = MIN(m->m_len - (u_int)offset, (u_int)len);
3213 bcopy(src: (u_char *)mbuf_data(mbuf: m) + offset, dst, n: count);
3214 m = m->m_next;
3215 dst += count;
3216 len -= count;
3217 offset = 0;
3218 }
3219}
3220
3221static inline void
3222bpf_tap_imp(
3223 ifnet_t ifp,
3224 u_int32_t dlt,
3225 struct bpf_packet *bpf_pkt,
3226 int outbound)
3227{
3228 struct bpf_d *d;
3229 u_int slen;
3230 struct bpf_if *bp;
3231
3232 /*
3233 * It's possible that we get here after the bpf descriptor has been
3234 * detached from the interface; in such a case we simply return.
3235 * Lock ordering is important since we can be called asynchronously
3236 * (from IOKit) to process an inbound packet; when that happens
3237 * we would have been holding its "gateLock" and will be acquiring
3238 * "bpf_mlock" upon entering this routine. Due to that, we release
3239 * "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
3240 * acquire "gateLock" in the IOKit), in order to avoid a deadlock
3241 * when a ifnet_set_promiscuous request simultaneously collides with
3242 * an inbound packet being passed into the tap callback.
3243 */
3244 lck_mtx_lock(lck: bpf_mlock);
3245 if (ifp->if_bpf == NULL) {
3246 lck_mtx_unlock(lck: bpf_mlock);
3247 return;
3248 }
3249 for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
3250 if (bp->bif_ifp != ifp) {
3251 /* wrong interface */
3252 bp = NULL;
3253 break;
3254 }
3255 if (dlt == 0 || bp->bif_dlt == dlt) {
3256 /* tapping default DLT or DLT matches */
3257 break;
3258 }
3259 }
3260 if (bp == NULL) {
3261 goto done;
3262 }
3263 for (d = bp->bif_dlist; d != NULL; d = d->bd_next) {
3264 struct bpf_packet *bpf_pkt_saved = bpf_pkt;
3265 struct bpf_packet bpf_pkt_tmp = {};
3266 struct pktap_header_buffer bpfp_header_tmp = {};
3267
3268 if (outbound && (d->bd_direction & BPF_D_OUT) == 0) {
3269 continue;
3270 }
3271 if (!outbound && (d->bd_direction & BPF_D_IN) == 0) {
3272 continue;
3273 }
3274
3275 ++d->bd_rcount;
3276 slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
3277 (u_int)bpf_pkt->bpfp_total_length, 0);
3278
3279 if (slen != 0) {
3280 if (bp->bif_ifp->if_type == IFT_PKTAP &&
3281 bp->bif_dlt == DLT_PKTAP) {
3282 if (d->bd_flags & BPF_TRUNCATE) {
3283 slen = min(a: slen, b: get_pkt_trunc_len(bpf_pkt));
3284 }
3285 /*
3286 * Need to copy the bpf_pkt because the conversion
3287 * to v2 pktap header modifies the content of the
3288 * bpfp_header
3289 */
3290 if ((d->bd_flags & BPF_PKTHDRV2) &&
3291 bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
3292 bpf_pkt_tmp = *bpf_pkt;
3293
3294 bpf_pkt = &bpf_pkt_tmp;
3295
3296 memcpy(dst: &bpfp_header_tmp, src: bpf_pkt->bpfp_header,
3297 n: bpf_pkt->bpfp_header_length);
3298
3299 bpf_pkt->bpfp_header = &bpfp_header_tmp;
3300
3301 convert_to_pktap_header_to_v2(bpf_pkt,
3302 truncate: !!(d->bd_flags & BPF_TRUNCATE));
3303 }
3304 }
3305 ++d->bd_fcount;
3306 catchpacket(d, bpf_pkt, slen, outbound);
3307 }
3308 bpf_pkt = bpf_pkt_saved;
3309 }
3310
3311done:
3312 lck_mtx_unlock(lck: bpf_mlock);
3313}
3314
3315static inline void
3316bpf_tap_mbuf(
3317 ifnet_t ifp,
3318 u_int32_t dlt,
3319 mbuf_t m,
3320 void* hdr,
3321 size_t hlen,
3322 int outbound)
3323{
3324 struct bpf_packet bpf_pkt;
3325 struct mbuf *m0;
3326
3327 if (ifp->if_bpf == NULL) {
3328 /* quickly check without taking lock */
3329 return;
3330 }
3331 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
3332 bpf_pkt.bpfp_mbuf = m;
3333 bpf_pkt.bpfp_total_length = 0;
3334 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
3335 bpf_pkt.bpfp_total_length += m0->m_len;
3336 }
3337 bpf_pkt.bpfp_header = hdr;
3338 if (hdr != NULL) {
3339 bpf_pkt.bpfp_total_length += hlen;
3340 bpf_pkt.bpfp_header_length = hlen;
3341 } else {
3342 bpf_pkt.bpfp_header_length = 0;
3343 }
3344 bpf_tap_imp(ifp, dlt, bpf_pkt: &bpf_pkt, outbound);
3345}
3346
3347void
3348bpf_tap_out(
3349 ifnet_t ifp,
3350 u_int32_t dlt,
3351 mbuf_t m,
3352 void* hdr,
3353 size_t hlen)
3354{
3355 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, outbound: 1);
3356}
3357
3358void
3359bpf_tap_in(
3360 ifnet_t ifp,
3361 u_int32_t dlt,
3362 mbuf_t m,
3363 void* hdr,
3364 size_t hlen)
3365{
3366 bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, outbound: 0);
3367}
3368
3369/* Callback registered with Ethernet driver. */
3370static int
3371bpf_tap_callback(struct ifnet *ifp, struct mbuf *m)
3372{
3373 bpf_tap_mbuf(ifp, dlt: 0, m, NULL, hlen: 0, outbound: mbuf_pkthdr_rcvif(mbuf: m) == NULL);
3374
3375 return 0;
3376}
3377
3378#if SKYWALK
3379#include <skywalk/os_skywalk_private.h>
3380
3381static void
3382bpf_pktcopy(kern_packet_t pkt, void *dst_arg, size_t len, size_t offset)
3383{
3384 kern_buflet_t buflet = NULL;
3385 size_t count;
3386 u_char *dst;
3387
3388 dst = dst_arg;
3389 while (len > 0) {
3390 uint8_t *addr;
3391
3392 u_int32_t buflet_length;
3393
3394 buflet = kern_packet_get_next_buflet(pkt, buflet);
3395 VERIFY(buflet != NULL);
3396 addr = kern_buflet_get_data_address(buflet);
3397 VERIFY(addr != NULL);
3398 addr += kern_buflet_get_data_offset(buflet);
3399 buflet_length = kern_buflet_get_data_length(buflet);
3400 if (offset >= buflet_length) {
3401 offset -= buflet_length;
3402 continue;
3403 }
3404 count = MIN(buflet_length - offset, len);
3405 bcopy(src: (void *)(addr + offset), dst: (void *)dst, n: count);
3406 dst += count;
3407 len -= count;
3408 offset = 0;
3409 }
3410}
3411
3412static inline void
3413bpf_tap_packet(
3414 ifnet_t ifp,
3415 u_int32_t dlt,
3416 kern_packet_t pkt,
3417 void* hdr,
3418 size_t hlen,
3419 int outbound)
3420{
3421 struct bpf_packet bpf_pkt;
3422 struct mbuf * m;
3423
3424 if (ifp->if_bpf == NULL) {
3425 /* quickly check without taking lock */
3426 return;
3427 }
3428 m = kern_packet_get_mbuf(pkt);
3429 if (m != NULL) {
3430 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
3431 bpf_pkt.bpfp_mbuf = m;
3432 bpf_pkt.bpfp_total_length = m_length(m);
3433 } else {
3434 bpf_pkt.bpfp_type = BPF_PACKET_TYPE_PKT;
3435 bpf_pkt.bpfp_pkt = pkt;
3436 bpf_pkt.bpfp_total_length = kern_packet_get_data_length(pkt);
3437 }
3438 bpf_pkt.bpfp_header = hdr;
3439 bpf_pkt.bpfp_header_length = hlen;
3440 if (hlen != 0) {
3441 bpf_pkt.bpfp_total_length += hlen;
3442 }
3443 bpf_tap_imp(ifp, dlt, bpf_pkt: &bpf_pkt, outbound);
3444}
3445
3446void
3447bpf_tap_packet_out(
3448 ifnet_t ifp,
3449 u_int32_t dlt,
3450 kern_packet_t pkt,
3451 void* hdr,
3452 size_t hlen)
3453{
3454 bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, outbound: 1);
3455}
3456
3457void
3458bpf_tap_packet_in(
3459 ifnet_t ifp,
3460 u_int32_t dlt,
3461 kern_packet_t pkt,
3462 void* hdr,
3463 size_t hlen)
3464{
3465 bpf_tap_packet(ifp, dlt, pkt, hdr, hlen, outbound: 0);
3466}
3467
3468#endif /* SKYWALK */
3469
3470static errno_t
3471bpf_copydata(struct bpf_packet *pkt, size_t off, size_t len, void* out_data)
3472{
3473 errno_t err = 0;
3474 if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
3475 err = mbuf_copydata(mbuf: pkt->bpfp_mbuf, offset: off, length: len, out_data);
3476#if SKYWALK
3477 } else if (pkt->bpfp_type == BPF_PACKET_TYPE_PKT) {
3478 err = kern_packet_copy_bytes(pkt->bpfp_pkt, off, len, out_data);
3479#endif /* SKYWALK */
3480 } else {
3481 err = EINVAL;
3482 }
3483
3484 return err;
3485}
3486
3487static void
3488copy_bpf_packet_offset(struct bpf_packet * pkt, void * dst, size_t len, size_t offset)
3489{
3490 /* copy the optional header */
3491 if (offset < pkt->bpfp_header_length) {
3492 size_t count = MIN(len, pkt->bpfp_header_length - offset);
3493 caddr_t src = (caddr_t)pkt->bpfp_header;
3494 bcopy(src: src + offset, dst, n: count);
3495 len -= count;
3496 dst = (void *)((uintptr_t)dst + count);
3497 offset = 0;
3498 } else {
3499 offset -= pkt->bpfp_header_length;
3500 }
3501
3502 if (len == 0) {
3503 /* nothing past the header */
3504 return;
3505 }
3506 /* copy the packet */
3507 switch (pkt->bpfp_type) {
3508 case BPF_PACKET_TYPE_MBUF:
3509 bpf_mcopy(m: pkt->bpfp_mbuf, dst_arg: dst, len, offset);
3510 break;
3511#if SKYWALK
3512 case BPF_PACKET_TYPE_PKT:
3513 bpf_pktcopy(pkt: pkt->bpfp_pkt, dst_arg: dst, len, offset);
3514 break;
3515#endif /* SKYWALK */
3516 default:
3517 break;
3518 }
3519}
3520
3521static void
3522copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
3523{
3524 copy_bpf_packet_offset(pkt, dst, len, offset: 0);
3525}
3526
3527static uint32_t
3528get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
3529 const uint32_t remaining_caplen)
3530{
3531 /*
3532 * For some reason tcpdump expects to have one byte beyond the ESP header
3533 */
3534 uint32_t trunc_len = ESP_HDR_SIZE + 1;
3535
3536 if (trunc_len > remaining_caplen) {
3537 return remaining_caplen;
3538 }
3539
3540 return trunc_len;
3541}
3542
3543static uint32_t
3544get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint32_t off,
3545 const uint32_t remaining_caplen)
3546{
3547 /*
3548 * Include the payload generic header
3549 */
3550 uint32_t trunc_len = ISAKMP_HDR_SIZE;
3551
3552 if (trunc_len > remaining_caplen) {
3553 return remaining_caplen;
3554 }
3555
3556 return trunc_len;
3557}
3558
3559static uint32_t
3560get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint32_t off,
3561 const uint32_t remaining_caplen)
3562{
3563 int err = 0;
3564 uint32_t trunc_len = 0;
3565 char payload[remaining_caplen];
3566
3567 err = bpf_copydata(pkt, off, len: remaining_caplen, out_data: payload);
3568 if (err != 0) {
3569 return remaining_caplen;
3570 }
3571 /*
3572 * They are three cases:
3573 * - IKE: payload start with 4 bytes header set to zero before ISAKMP header
3574 * - keep alive: 1 byte payload
3575 * - otherwise it's ESP
3576 */
3577 if (remaining_caplen >= 4 &&
3578 payload[0] == 0 && payload[1] == 0 &&
3579 payload[2] == 0 && payload[3] == 0) {
3580 trunc_len = 4 + get_isakmp_trunc_len(pkt, off: off + 4, remaining_caplen: remaining_caplen - 4);
3581 } else if (remaining_caplen == 1) {
3582 trunc_len = 1;
3583 } else {
3584 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3585 }
3586
3587 if (trunc_len > remaining_caplen) {
3588 return remaining_caplen;
3589 }
3590
3591 return trunc_len;
3592}
3593
3594static uint32_t
3595get_udp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3596{
3597 int err = 0;
3598 uint32_t trunc_len = sizeof(struct udphdr); /* By default no UDP payload */
3599
3600 if (trunc_len >= remaining_caplen) {
3601 return remaining_caplen;
3602 }
3603
3604 struct udphdr udphdr;
3605 err = bpf_copydata(pkt, off, len: sizeof(struct udphdr), out_data: &udphdr);
3606 if (err != 0) {
3607 return remaining_caplen;
3608 }
3609
3610 u_short sport, dport;
3611
3612 sport = EXTRACT_SHORT(&udphdr.uh_sport);
3613 dport = EXTRACT_SHORT(&udphdr.uh_dport);
3614
3615 if (dport == PORT_DNS || sport == PORT_DNS) {
3616 /*
3617 * Full UDP payload for DNS
3618 */
3619 trunc_len = remaining_caplen;
3620 } else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) ||
3621 (sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
3622 /*
3623 * Full UDP payload for BOOTP and DHCP
3624 */
3625 trunc_len = remaining_caplen;
3626 } else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
3627 /*
3628 * Return the ISAKMP header
3629 */
3630 trunc_len += get_isakmp_trunc_len(pkt, off: off + sizeof(struct udphdr),
3631 remaining_caplen: remaining_caplen - sizeof(struct udphdr));
3632 } else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
3633 trunc_len += get_isakmp_natt_trunc_len(pkt, off: off + sizeof(struct udphdr),
3634 remaining_caplen: remaining_caplen - sizeof(struct udphdr));
3635 }
3636 if (trunc_len >= remaining_caplen) {
3637 return remaining_caplen;
3638 }
3639
3640 return trunc_len;
3641}
3642
3643static uint32_t
3644get_tcp_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3645{
3646 int err = 0;
3647 uint32_t trunc_len = sizeof(struct tcphdr); /* By default no TCP payload */
3648 if (trunc_len >= remaining_caplen) {
3649 return remaining_caplen;
3650 }
3651
3652 struct tcphdr tcphdr;
3653 err = bpf_copydata(pkt, off, len: sizeof(struct tcphdr), out_data: &tcphdr);
3654 if (err != 0) {
3655 return remaining_caplen;
3656 }
3657
3658 u_short sport, dport;
3659 sport = EXTRACT_SHORT(&tcphdr.th_sport);
3660 dport = EXTRACT_SHORT(&tcphdr.th_dport);
3661
3662 if (dport == PORT_DNS || sport == PORT_DNS) {
3663 /*
3664 * Full TCP payload for DNS
3665 */
3666 trunc_len = remaining_caplen;
3667 } else {
3668 trunc_len = (uint16_t)(tcphdr.th_off << 2);
3669 }
3670 if (trunc_len >= remaining_caplen) {
3671 return remaining_caplen;
3672 }
3673
3674 return trunc_len;
3675}
3676
3677static uint32_t
3678get_proto_trunc_len(uint8_t proto, struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3679{
3680 uint32_t trunc_len;
3681
3682 switch (proto) {
3683 case IPPROTO_ICMP: {
3684 /*
3685 * Full IMCP payload
3686 */
3687 trunc_len = remaining_caplen;
3688 break;
3689 }
3690 case IPPROTO_ICMPV6: {
3691 /*
3692 * Full IMCPV6 payload
3693 */
3694 trunc_len = remaining_caplen;
3695 break;
3696 }
3697 case IPPROTO_IGMP: {
3698 /*
3699 * Full IGMP payload
3700 */
3701 trunc_len = remaining_caplen;
3702 break;
3703 }
3704 case IPPROTO_UDP: {
3705 trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3706 break;
3707 }
3708 case IPPROTO_TCP: {
3709 trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3710 break;
3711 }
3712 case IPPROTO_ESP: {
3713 trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3714 break;
3715 }
3716 default: {
3717 /*
3718 * By default we only include the IP header
3719 */
3720 trunc_len = 0;
3721 break;
3722 }
3723 }
3724 if (trunc_len >= remaining_caplen) {
3725 return remaining_caplen;
3726 }
3727
3728 return trunc_len;
3729}
3730
3731static uint32_t
3732get_ip_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3733{
3734 int err = 0;
3735 uint32_t iplen = sizeof(struct ip);
3736 if (iplen >= remaining_caplen) {
3737 return remaining_caplen;
3738 }
3739
3740 struct ip iphdr;
3741 err = bpf_copydata(pkt, off, len: sizeof(struct ip), out_data: &iphdr);
3742 if (err != 0) {
3743 return remaining_caplen;
3744 }
3745
3746 uint8_t proto = 0;
3747
3748 iplen = (uint16_t)(iphdr.ip_hl << 2);
3749 if (iplen >= remaining_caplen) {
3750 return remaining_caplen;
3751 }
3752
3753 proto = iphdr.ip_p;
3754 iplen += get_proto_trunc_len(proto, pkt, off: off + iplen, remaining_caplen: remaining_caplen - iplen);
3755
3756 if (iplen >= remaining_caplen) {
3757 return remaining_caplen;
3758 }
3759
3760 return iplen;
3761}
3762
3763static uint32_t
3764get_ip6_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3765{
3766 int err = 0;
3767 uint32_t iplen = sizeof(struct ip6_hdr);
3768 if (iplen >= remaining_caplen) {
3769 return remaining_caplen;
3770 }
3771
3772 struct ip6_hdr ip6hdr;
3773 err = bpf_copydata(pkt, off, len: sizeof(struct ip6_hdr), out_data: &ip6hdr);
3774 if (err != 0) {
3775 return remaining_caplen;
3776 }
3777
3778 uint8_t proto = 0;
3779
3780 /*
3781 * TBD: process the extension headers
3782 */
3783 proto = ip6hdr.ip6_nxt;
3784 iplen += get_proto_trunc_len(proto, pkt, off: off + iplen, remaining_caplen: remaining_caplen - iplen);
3785
3786 if (iplen >= remaining_caplen) {
3787 return remaining_caplen;
3788 }
3789
3790 return iplen;
3791}
3792
3793static uint32_t
3794get_ether_trunc_len(struct bpf_packet *pkt, uint32_t off, const uint32_t remaining_caplen)
3795{
3796 int err = 0;
3797 uint32_t ethlen = sizeof(struct ether_header);
3798 if (ethlen >= remaining_caplen) {
3799 return remaining_caplen;
3800 }
3801
3802 struct ether_header eh = {};
3803 err = bpf_copydata(pkt, off, len: sizeof(struct ether_header), out_data: &eh);
3804 if (err != 0) {
3805 return remaining_caplen;
3806 }
3807
3808 u_short type = EXTRACT_SHORT(&eh.ether_type);
3809 /* Include full ARP */
3810 if (type == ETHERTYPE_ARP) {
3811 ethlen = remaining_caplen;
3812 } else if (type == ETHERTYPE_IP) {
3813 ethlen += get_ip_trunc_len(pkt, off: off + sizeof(struct ether_header),
3814 remaining_caplen: remaining_caplen - ethlen);
3815 } else if (type == ETHERTYPE_IPV6) {
3816 ethlen += get_ip6_trunc_len(pkt, off: off + sizeof(struct ether_header),
3817 remaining_caplen: remaining_caplen - ethlen);
3818 } else {
3819 ethlen = MIN(BPF_MIN_PKT_SIZE, remaining_caplen);
3820 }
3821 return ethlen;
3822}
3823
3824static uint32_t
3825get_pkt_trunc_len(struct bpf_packet *pkt)
3826{
3827 struct pktap_header *pktap = (struct pktap_header *) (pkt->bpfp_header);
3828 uint32_t in_pkt_len = 0;
3829 uint32_t out_pkt_len = 0;
3830 uint32_t tlen = 0;
3831 uint32_t pre_adjust; // L2 header not in mbuf or kern_packet
3832
3833 // bpfp_total_length must contain the BPF packet header
3834 assert3u(pkt->bpfp_total_length, >=, pkt->bpfp_header_length);
3835
3836 // The BPF packet header must contain the pktap header
3837 assert3u(pkt->bpfp_header_length, >=, pktap->pth_length);
3838
3839 // The pre frame length (L2 header) must be contained in the packet
3840 assert3u(pkt->bpfp_total_length, >=, pktap->pth_length + pktap->pth_frame_pre_length);
3841
3842 /*
3843 * pktap->pth_frame_pre_length is the L2 header length and accounts
3844 * for both L2 header in the packet payload and pre_adjust.
3845 *
3846 * pre_adjust represents an adjustment for a pseudo L2 header that is not
3847 * part of packet payload -- not in the mbuf or kern_packet -- and comes
3848 * just after the pktap header.
3849 *
3850 * pktap->pth_length is the size of the pktap header (exclude pre_adjust)
3851 *
3852 * pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3853 */
3854 pre_adjust = (uint32_t)(pkt->bpfp_header_length - pktap->pth_length);
3855
3856 if (pktap->pth_iftype == IFT_ETHER) {
3857 /*
3858 * We need to parse the Ethernet header to find the network layer
3859 * protocol
3860 */
3861 in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pre_adjust);
3862
3863 out_pkt_len = get_ether_trunc_len(pkt, off: 0, remaining_caplen: in_pkt_len);
3864
3865 tlen = pktap->pth_length + pre_adjust + out_pkt_len;
3866 } else {
3867 /*
3868 * For other interface types, we only know to parse IPv4 and IPv6.
3869 *
3870 * To get to the beginning of the IPv4 or IPv6 packet, we need to to skip
3871 * over the L2 header that is the actual packet payload (mbuf or kern_packet)
3872 */
3873 uint32_t off; // offset past the L2 header in the actual packet payload
3874
3875 off = pktap->pth_frame_pre_length - pre_adjust;
3876
3877 in_pkt_len = (uint32_t)(pkt->bpfp_total_length - pktap->pth_length - pktap->pth_frame_pre_length);
3878
3879 if (pktap->pth_protocol_family == AF_INET) {
3880 out_pkt_len = get_ip_trunc_len(pkt, off, remaining_caplen: in_pkt_len);
3881 } else if (pktap->pth_protocol_family == AF_INET6) {
3882 out_pkt_len = get_ip6_trunc_len(pkt, off, remaining_caplen: in_pkt_len);
3883 } else {
3884 out_pkt_len = MIN(BPF_MIN_PKT_SIZE, in_pkt_len);
3885 }
3886 tlen = pktap->pth_length + pktap->pth_frame_pre_length + out_pkt_len;
3887 }
3888
3889 // Verify we do not overflow the buffer
3890 if (__improbable(tlen > pkt->bpfp_total_length)) {
3891 bool do_panic = bpf_debug != 0 ? true : false;
3892
3893#if DEBUG
3894 do_panic = true;
3895#endif /* DEBUG */
3896 if (do_panic) {
3897 panic("%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3898 __func__, __LINE__,
3899 tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3900 } else {
3901 os_log(OS_LOG_DEFAULT,
3902 "%s:%d tlen %u > bpfp_total_length %lu bpfp_header_length %lu pth_frame_pre_length %u pre_adjust %u in_pkt_len %u out_pkt_len %u",
3903 __func__, __LINE__,
3904 tlen, pkt->bpfp_total_length, pkt->bpfp_header_length, pktap->pth_frame_pre_length, pre_adjust, in_pkt_len, out_pkt_len);
3905 }
3906 bpf_trunc_overflow += 1;
3907 tlen = (uint32_t)pkt->bpfp_total_length;
3908 }
3909
3910 return tlen;
3911}
3912
3913static uint8_t
3914get_common_prefix_size(const void *a, const void *b, uint8_t max_bytes)
3915{
3916 uint8_t max_words = max_bytes >> 2;
3917 const uint32_t *x = (const uint32_t *)a;
3918 const uint32_t *y = (const uint32_t *)b;
3919 uint8_t i;
3920
3921 for (i = 0; i < max_words; i++) {
3922 if (x[i] != y[i]) {
3923 break;
3924 }
3925 }
3926 return (uint8_t)(i << 2);
3927}
3928
3929/*
3930 * Move the packet data from interface memory (pkt) into the
3931 * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
3932 * otherwise 0.
3933 */
3934static void
3935catchpacket(struct bpf_d *d, struct bpf_packet * pkt,
3936 u_int snaplen, int outbound)
3937{
3938 struct bpf_hdr *hp;
3939 struct bpf_hdr_ext *ehp;
3940 uint32_t totlen, curlen;
3941 uint32_t hdrlen, caplen;
3942 int do_wakeup = 0;
3943 u_char *payload;
3944 struct timeval tv;
3945
3946 hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
3947 (d->bd_flags & BPF_COMP_REQ) ? d->bd_bif->bif_comphdrlen:
3948 d->bd_bif->bif_hdrlen;
3949 /*
3950 * Figure out how many bytes to move. If the packet is
3951 * greater or equal to the snapshot length, transfer that
3952 * much. Otherwise, transfer the whole packet (unless
3953 * we hit the buffer size limit).
3954 */
3955 totlen = hdrlen + MIN(snaplen, (int)pkt->bpfp_total_length);
3956 if (totlen > d->bd_bufsize) {
3957 totlen = d->bd_bufsize;
3958 }
3959
3960 if (hdrlen > totlen) {
3961 return;
3962 }
3963
3964 /*
3965 * Round up the end of the previous packet to the next longword.
3966 */
3967 curlen = BPF_WORDALIGN(d->bd_slen);
3968 if (curlen + totlen > d->bd_bufsize) {
3969 /*
3970 * This packet will overflow the storage buffer.
3971 * Rotate the buffers if we can, then wakeup any
3972 * pending reads.
3973 *
3974 * We cannot rotate buffers if a read is in progress
3975 * so drop the packet
3976 */
3977 if (d->bd_hbuf_read) {
3978 ++d->bd_dcount;
3979 return;
3980 }
3981
3982 if (d->bd_fbuf == NULL) {
3983 if (d->bd_headdrop == 0) {
3984 /*
3985 * We haven't completed the previous read yet,
3986 * so drop the packet.
3987 */
3988 ++d->bd_dcount;
3989 return;
3990 }
3991 /*
3992 * Drop the hold buffer as it contains older packets
3993 */
3994 d->bd_dcount += d->bd_hcnt;
3995 d->bd_fbuf = d->bd_hbuf;
3996 ROTATE_BUFFERS(d);
3997 } else {
3998 ROTATE_BUFFERS(d);
3999 }
4000 do_wakeup = 1;
4001 curlen = 0;
4002 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
4003 /*
4004 * Immediate mode is set, or the read timeout has
4005 * already expired during a select call. A packet
4006 * arrived, so the reader should be woken up.
4007 */
4008 do_wakeup = 1;
4009 }
4010
4011 /*
4012 * Append the bpf header.
4013 */
4014 microtime(tv: &tv);
4015 if (d->bd_flags & BPF_EXTENDED_HDR) {
4016 ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen);
4017 memset(s: ehp, c: 0, n: sizeof(*ehp));
4018 ehp->bh_tstamp.tv_sec = (int)tv.tv_sec;
4019 ehp->bh_tstamp.tv_usec = tv.tv_usec;
4020
4021 ehp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
4022 ehp->bh_hdrlen = (u_short)hdrlen;
4023 caplen = ehp->bh_caplen = totlen - hdrlen;
4024 payload = (u_char *)ehp + hdrlen;
4025
4026 if (outbound) {
4027 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT;
4028 } else {
4029 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN;
4030 }
4031
4032 if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
4033 struct mbuf *m = pkt->bpfp_mbuf;
4034
4035 if (outbound) {
4036 /* only do lookups on non-raw INPCB */
4037 if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID |
4038 PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK)) ==
4039 (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC) &&
4040 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
4041 ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
4042 if (m->m_pkthdr.pkt_proto == IPPROTO_TCP) {
4043 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_TCP;
4044 } else if (m->m_pkthdr.pkt_proto == IPPROTO_UDP) {
4045 ehp->bh_flags |= BPF_HDR_EXT_FLAGS_UDP;
4046 }
4047 }
4048 ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
4049 if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT) {
4050 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
4051 }
4052 if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ) {
4053 ehp->bh_pktflags |= BPF_PKTFLAGS_START_SEQ;
4054 }
4055 if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT) {
4056 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
4057 }
4058 if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
4059 ehp->bh_unsent_bytes =
4060 m->m_pkthdr.bufstatus_if;
4061 ehp->bh_unsent_snd =
4062 m->m_pkthdr.bufstatus_sndbuf;
4063 }
4064 } else {
4065 if (m->m_pkthdr.pkt_flags & PKTF_WAKE_PKT) {
4066 ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
4067 }
4068 }
4069#if SKYWALK
4070 } else {
4071 kern_packet_t kern_pkt = pkt->bpfp_pkt;
4072 packet_flowid_t flowid = 0;
4073
4074 if (outbound) {
4075 /*
4076 * Note: pp_init() asserts that kern_packet_svc_class_t is equivalent
4077 * to mbuf_svc_class_t
4078 */
4079 ehp->bh_svc = so_svc2tc((mbuf_svc_class_t)kern_packet_get_service_class(kern_pkt));
4080 if (kern_packet_get_transport_retransmit(kern_pkt)) {
4081 ehp->bh_pktflags |= BPF_PKTFLAGS_TCP_REXMT;
4082 }
4083 if (kern_packet_get_transport_last_packet(kern_pkt)) {
4084 ehp->bh_pktflags |= BPF_PKTFLAGS_LAST_PKT;
4085 }
4086 } else {
4087 if (kern_packet_get_wake_flag(kern_pkt)) {
4088 ehp->bh_pktflags |= BPF_PKTFLAGS_WAKE_PKT;
4089 }
4090 }
4091 ehp->bh_trace_tag = kern_packet_get_trace_tag(ph: kern_pkt);
4092 if (kern_packet_get_flowid(kern_pkt, &flowid) == 0) {
4093 ehp->bh_flowid = flowid;
4094 }
4095#endif /* SKYWALK */
4096 }
4097 } else {
4098 hp = (struct bpf_hdr *)(void *)(d->bd_sbuf + curlen);
4099 memset(s: hp, c: 0, BPF_WORDALIGN(sizeof(*hp)));
4100 hp->bh_tstamp.tv_sec = (int)tv.tv_sec;
4101 hp->bh_tstamp.tv_usec = tv.tv_usec;
4102 hp->bh_datalen = (bpf_u_int32)pkt->bpfp_total_length;
4103 hp->bh_hdrlen = (u_short)hdrlen;
4104 caplen = hp->bh_caplen = totlen - hdrlen;
4105 payload = (u_char *)hp + hdrlen;
4106 }
4107 if (d->bd_flags & BPF_COMP_REQ) {
4108 uint8_t common_prefix_size = 0;
4109 uint8_t copy_len = MIN((uint8_t)caplen, BPF_HDR_COMP_LEN_MAX);
4110
4111 copy_bpf_packet(pkt, dst: d->bd_prev_fbuf, len: copy_len);
4112
4113 if (d->bd_prev_slen != 0) {
4114 common_prefix_size = get_common_prefix_size(a: d->bd_prev_fbuf,
4115 b: d->bd_prev_sbuf, MIN(copy_len, d->bd_prev_slen));
4116 }
4117
4118 if (d->bd_flags & BPF_COMP_ENABLED) {
4119 assert3u(caplen, >=, common_prefix_size);
4120 copy_bpf_packet_offset(pkt, dst: payload, len: caplen - common_prefix_size,
4121 offset: common_prefix_size);
4122 d->bd_slen = curlen + totlen - common_prefix_size;
4123 } else {
4124 copy_bpf_packet(pkt, dst: payload, len: caplen);
4125 d->bd_slen = curlen + totlen;
4126 }
4127
4128 /*
4129 * Update the caplen only if compression is enabled -- the caller
4130 * must pay attention to bpf_hdr_comp_enable
4131 */
4132 if (d->bd_flags & BPF_EXTENDED_HDR) {
4133 ehp->bh_complen = common_prefix_size;
4134 if (d->bd_flags & BPF_COMP_ENABLED) {
4135 ehp->bh_caplen -= common_prefix_size;
4136 }
4137 } else {
4138 struct bpf_comp_hdr *hcp;
4139
4140 hcp = (struct bpf_comp_hdr *)(void *)(d->bd_sbuf + curlen);
4141 hcp->bh_complen = common_prefix_size;
4142 if (d->bd_flags & BPF_COMP_ENABLED) {
4143 hcp->bh_caplen -= common_prefix_size;
4144 }
4145 }
4146
4147 if (common_prefix_size > 0) {
4148 d->bd_bcs.bcs_total_compressed_prefix_size += common_prefix_size;
4149 if (common_prefix_size > d->bd_bcs.bcs_max_compressed_prefix_size) {
4150 d->bd_bcs.bcs_max_compressed_prefix_size = common_prefix_size;
4151 }
4152 d->bd_bcs.bcs_count_compressed_prefix += 1;
4153 } else {
4154 d->bd_bcs.bcs_count_no_common_prefix += 1;
4155 }
4156
4157 /* The current compression buffer becomes the previous one */
4158 caddr_t tmp = d->bd_prev_sbuf;
4159 d->bd_prev_sbuf = d->bd_prev_fbuf;
4160 d->bd_prev_slen = copy_len;
4161 d->bd_prev_fbuf = tmp;
4162 } else {
4163 /*
4164 * Copy the packet data into the store buffer and update its length.
4165 */
4166 copy_bpf_packet(pkt, dst: payload, len: caplen);
4167 d->bd_slen = curlen + totlen;
4168 }
4169 d->bd_scnt += 1;
4170 d->bd_bcs.bcs_total_hdr_size += pkt->bpfp_header_length;
4171 d->bd_bcs.bcs_total_size += caplen;
4172
4173 if (do_wakeup) {
4174 bpf_wakeup(d);
4175 }
4176}
4177
4178static void
4179bpf_freebufs(struct bpf_d *d)
4180{
4181 if (d->bd_sbuf != NULL) {
4182 kfree_data_addr(d->bd_sbuf);
4183 }
4184 if (d->bd_hbuf != NULL) {
4185 kfree_data_addr(d->bd_hbuf);
4186 }
4187 if (d->bd_fbuf != NULL) {
4188 kfree_data_addr(d->bd_fbuf);
4189 }
4190
4191 if (d->bd_prev_sbuf != NULL) {
4192 kfree_data_addr(d->bd_prev_sbuf);
4193 }
4194 if (d->bd_prev_fbuf != NULL) {
4195 kfree_data_addr(d->bd_prev_fbuf);
4196 }
4197}
4198/*
4199 * Initialize all nonzero fields of a descriptor.
4200 */
4201static int
4202bpf_allocbufs(struct bpf_d *d)
4203{
4204 bpf_freebufs(d);
4205
4206 d->bd_fbuf = (caddr_t) kalloc_data(d->bd_bufsize, Z_WAITOK | Z_ZERO);
4207 if (d->bd_fbuf == NULL) {
4208 goto nobufs;
4209 }
4210
4211 d->bd_sbuf = (caddr_t) kalloc_data(d->bd_bufsize, Z_WAITOK | Z_ZERO);
4212 if (d->bd_sbuf == NULL) {
4213 goto nobufs;
4214 }
4215 d->bd_slen = 0;
4216 d->bd_hlen = 0;
4217 d->bd_scnt = 0;
4218 d->bd_hcnt = 0;
4219
4220 d->bd_prev_slen = 0;
4221 if (d->bd_flags & BPF_COMP_REQ) {
4222 d->bd_prev_sbuf = (caddr_t) kalloc_data(BPF_HDR_COMP_LEN_MAX, Z_WAITOK | Z_ZERO);
4223 if (d->bd_prev_sbuf == NULL) {
4224 goto nobufs;
4225 }
4226 d->bd_prev_fbuf = (caddr_t) kalloc_data(BPF_HDR_COMP_LEN_MAX, Z_WAITOK | Z_ZERO);
4227 if (d->bd_prev_fbuf == NULL) {
4228 goto nobufs;
4229 }
4230 }
4231 return 0;
4232nobufs:
4233 bpf_freebufs(d);
4234 return ENOMEM;
4235}
4236
4237/*
4238 * Free buffers currently in use by a descriptor.
4239 * Called on close.
4240 */
4241static void
4242bpf_freed(struct bpf_d *d)
4243{
4244 /*
4245 * We don't need to lock out interrupts since this descriptor has
4246 * been detached from its interface and it yet hasn't been marked
4247 * free.
4248 */
4249 if (d->bd_hbuf_read || d->bd_hbuf_write) {
4250 panic("bpf buffer freed during read/write");
4251 }
4252
4253 bpf_freebufs(d);
4254
4255 if (d->bd_filter) {
4256 kfree_data_addr(d->bd_filter);
4257 }
4258}
4259
4260/*
4261 * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
4262 * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
4263 * size of the link header (variable length headers not yet supported).
4264 */
4265void
4266bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
4267{
4268 bpf_attach(interface: ifp, data_link_type: dlt, header_length: hdrlen, NULL, NULL);
4269}
4270
4271errno_t
4272bpf_attach(
4273 ifnet_t ifp,
4274 u_int32_t dlt,
4275 u_int32_t hdrlen,
4276 bpf_send_func send,
4277 bpf_tap_func tap)
4278{
4279 struct bpf_if *bp;
4280 struct bpf_if *bp_new;
4281 struct bpf_if *bp_before_first = NULL;
4282 struct bpf_if *bp_first = NULL;
4283 struct bpf_if *bp_last = NULL;
4284 boolean_t found;
4285
4286 /*
4287 * Z_NOFAIL will cause a panic if the allocation fails
4288 */
4289 bp_new = kalloc_type(struct bpf_if, Z_WAITOK | Z_NOFAIL | Z_ZERO);
4290
4291 lck_mtx_lock(lck: bpf_mlock);
4292
4293 /*
4294 * Check if this interface/dlt is already attached. Remember the
4295 * first and last attachment for this interface, as well as the
4296 * element before the first attachment.
4297 */
4298 found = FALSE;
4299 for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
4300 if (bp->bif_ifp != ifp) {
4301 if (bp_first != NULL) {
4302 /* no more elements for this interface */
4303 break;
4304 }
4305 bp_before_first = bp;
4306 } else {
4307 if (bp->bif_dlt == dlt) {
4308 found = TRUE;
4309 break;
4310 }
4311 if (bp_first == NULL) {
4312 bp_first = bp;
4313 }
4314 bp_last = bp;
4315 }
4316 }
4317 if (found) {
4318 lck_mtx_unlock(lck: bpf_mlock);
4319 os_log_error(OS_LOG_DEFAULT,
4320 "bpfattach - %s with dlt %d is already attached",
4321 if_name(ifp), dlt);
4322 kfree_type(struct bpf_if, bp_new);
4323 return EEXIST;
4324 }
4325
4326 bp_new->bif_ifp = ifp;
4327 bp_new->bif_dlt = dlt;
4328 bp_new->bif_send = send;
4329 bp_new->bif_tap = tap;
4330
4331 if (bp_first == NULL) {
4332 /* No other entries for this ifp */
4333 bp_new->bif_next = bpf_iflist;
4334 bpf_iflist = bp_new;
4335 } else {
4336 if (ifnet_type(interface: ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
4337 /* Make this the first entry for this interface */
4338 if (bp_before_first != NULL) {
4339 /* point the previous to us */
4340 bp_before_first->bif_next = bp_new;
4341 } else {
4342 /* we're the new head */
4343 bpf_iflist = bp_new;
4344 }
4345 bp_new->bif_next = bp_first;
4346 } else {
4347 /* Add this after the last entry for this interface */
4348 bp_new->bif_next = bp_last->bif_next;
4349 bp_last->bif_next = bp_new;
4350 }
4351 }
4352
4353 /*
4354 * Compute the length of the bpf header. This is not necessarily
4355 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
4356 * that the network layer header begins on a longword boundary (for
4357 * performance reasons and to alleviate alignment restrictions).
4358 */
4359 bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
4360 bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
4361 sizeof(struct bpf_hdr_ext)) - hdrlen;
4362 bp_new->bif_comphdrlen = BPF_WORDALIGN(hdrlen +
4363 sizeof(struct bpf_comp_hdr)) - hdrlen;
4364
4365 /* Take a reference on the interface */
4366 ifnet_reference(interface: ifp);
4367
4368 lck_mtx_unlock(lck: bpf_mlock);
4369
4370 return 0;
4371}
4372
4373/*
4374 * Detach bpf from an interface. This involves detaching each descriptor
4375 * associated with the interface, and leaving bd_bif NULL. Notify each
4376 * descriptor as it's detached so that any sleepers wake up and get
4377 * ENXIO.
4378 */
4379void
4380bpfdetach(struct ifnet *ifp)
4381{
4382 struct bpf_if *bp, *bp_prev, *bp_next;
4383 struct bpf_d *d;
4384
4385 if (bpf_debug != 0) {
4386 os_log(OS_LOG_DEFAULT, "%s: %s", __func__, if_name(ifp));
4387 }
4388
4389 lck_mtx_lock(lck: bpf_mlock);
4390
4391 /*
4392 * Build the list of devices attached to that interface
4393 * that we need to free while keeping the lock to maintain
4394 * the integrity of the interface list
4395 */
4396 bp_prev = NULL;
4397 for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
4398 bp_next = bp->bif_next;
4399
4400 if (ifp != bp->bif_ifp) {
4401 bp_prev = bp;
4402 continue;
4403 }
4404 /* Unlink from the interface list */
4405 if (bp_prev) {
4406 bp_prev->bif_next = bp->bif_next;
4407 } else {
4408 bpf_iflist = bp->bif_next;
4409 }
4410
4411 /* Detach the devices attached to the interface */
4412 while ((d = bp->bif_dlist) != NULL) {
4413 /*
4414 * Take an extra reference to prevent the device
4415 * from being freed when bpf_detachd() releases
4416 * the reference for the interface list
4417 */
4418 bpf_acquire_d(d);
4419
4420 /*
4421 * Wait for active read and writes to complete
4422 */
4423 while (d->bd_hbuf_read || d->bd_hbuf_write) {
4424 msleep(chan: (caddr_t)d, mtx: bpf_mlock, PRINET, wmesg: "bpfdetach", NULL);
4425 }
4426
4427 bpf_detachd(d);
4428 bpf_wakeup(d);
4429 bpf_release_d(d);
4430 }
4431 ifnet_release(interface: ifp);
4432 }
4433
4434 lck_mtx_unlock(lck: bpf_mlock);
4435}
4436
4437void
4438bpf_init(__unused void *unused)
4439{
4440 int maj;
4441
4442 /* bpf_comp_hdr is an overlay of bpf_hdr */
4443 _CASSERT(BPF_WORDALIGN(sizeof(struct bpf_hdr)) ==
4444 BPF_WORDALIGN(sizeof(struct bpf_comp_hdr)));
4445
4446 /* compression length must fits in a byte */
4447 _CASSERT(BPF_HDR_COMP_LEN_MAX <= UCHAR_MAX );
4448
4449 (void) PE_parse_boot_argn(arg_string: "bpf_hdr_comp", arg_ptr: &bpf_hdr_comp_enable,
4450 max_arg: sizeof(bpf_hdr_comp_enable));
4451
4452 if (bpf_devsw_installed == 0) {
4453 bpf_devsw_installed = 1;
4454 maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
4455 if (maj == -1) {
4456 bpf_devsw_installed = 0;
4457 os_log_error(OS_LOG_DEFAULT,
4458 "bpf_init: failed to allocate a major number");
4459 return;
4460 }
4461
4462 for (int i = 0; i < NBPFILTER; i++) {
4463 bpf_make_dev_t(maj);
4464 }
4465 }
4466}
4467
4468static int
4469sysctl_bpf_maxbufsize SYSCTL_HANDLER_ARGS
4470{
4471#pragma unused(arg1, arg2)
4472 int i, err;
4473
4474 i = bpf_maxbufsize;
4475
4476 err = sysctl_handle_int(oidp, arg1: &i, arg2: 0, req);
4477 if (err != 0 || req->newptr == USER_ADDR_NULL) {
4478 return err;
4479 }
4480
4481 if (i < 0 || i > BPF_BUFSIZE_CAP) {
4482 i = BPF_BUFSIZE_CAP;
4483 }
4484
4485 bpf_maxbufsize = i;
4486 return err;
4487}
4488
4489static int
4490sysctl_bpf_bufsize_cap SYSCTL_HANDLER_ARGS
4491{
4492#pragma unused(arg1, arg2)
4493 int i, err;
4494
4495 i = BPF_BUFSIZE_CAP;
4496
4497 err = sysctl_handle_int(oidp, arg1: &i, arg2: 0, req);
4498 if (err != 0 || req->newptr == USER_ADDR_NULL) {
4499 return err;
4500 }
4501
4502 return err;
4503}
4504
4505/*
4506 * Fill filter statistics
4507 */
4508static void
4509bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
4510{
4511 LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
4512
4513 d->bd_structsize = sizeof(struct xbpf_d);
4514 d->bd_promisc = bd->bd_promisc != 0 ? 1 : 0;
4515 d->bd_immediate = d->bd_immediate != 0 ? 1 : 0;
4516 d->bd_hdrcmplt = bd->bd_hdrcmplt != 0 ? 1 : 0;
4517 d->bd_async = bd->bd_async != 0 ? 1 : 0;
4518 d->bd_headdrop = bd->bd_headdrop != 0 ? 1 : 0;
4519 d->bd_direction = (uint8_t)bd->bd_direction;
4520 d->bh_compreq = bd->bd_flags & BPF_COMP_REQ ? 1 : 0;
4521 d->bh_compenabled = bd->bd_flags & BPF_COMP_ENABLED ? 1 : 0;
4522 d->bd_exthdr = bd->bd_flags & BPF_EXTENDED_HDR ? 1 : 0;
4523 d->bd_trunc = bd->bd_flags & BPF_TRUNCATE ? 1 : 0;
4524 d->bd_pkthdrv2 = bd->bd_flags & BPF_PKTHDRV2 ? 1 : 0;
4525
4526 d->bd_dev_minor = (uint8_t)bd->bd_dev_minor;
4527
4528 d->bd_sig = bd->bd_sig;
4529
4530 d->bd_rcount = bd->bd_rcount;
4531 d->bd_dcount = bd->bd_dcount;
4532 d->bd_fcount = bd->bd_fcount;
4533 d->bd_wcount = bd->bd_wcount;
4534 d->bd_wdcount = bd->bd_wdcount;
4535 d->bd_slen = bd->bd_slen;
4536 d->bd_hlen = bd->bd_hlen;
4537 d->bd_bufsize = bd->bd_bufsize;
4538 d->bd_pid = bd->bd_pid;
4539 if (bd->bd_bif != NULL && bd->bd_bif->bif_ifp != NULL) {
4540 strlcpy(dst: d->bd_ifname,
4541 src: bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
4542 }
4543
4544 d->bd_comp_count = bd->bd_bcs.bcs_count_compressed_prefix;
4545 d->bd_comp_size = bd->bd_bcs.bcs_total_compressed_prefix_size;
4546
4547 d->bd_scnt = bd->bd_scnt;
4548 d->bd_hcnt = bd->bd_hcnt;
4549
4550 d->bd_read_count = bd->bd_bcs.bcs_total_read;
4551 d->bd_fsize = bd->bd_bcs.bcs_total_size;
4552}
4553
4554/*
4555 * Handle `netstat -B' stats request
4556 */
4557static int
4558sysctl_bpf_stats SYSCTL_HANDLER_ARGS
4559{
4560 int error;
4561 struct xbpf_d *xbdbuf;
4562 unsigned int x_cnt;
4563 vm_size_t buf_size;
4564
4565 if (req->oldptr == USER_ADDR_NULL) {
4566 return SYSCTL_OUT(req, 0, nbpfilter * sizeof(struct xbpf_d));
4567 }
4568 if (nbpfilter == 0) {
4569 return SYSCTL_OUT(req, 0, 0);
4570 }
4571 buf_size = req->oldlen;
4572 if (buf_size > BPF_MAX_DEVICES * sizeof(struct xbpf_d)) {
4573 buf_size = BPF_MAX_DEVICES * sizeof(struct xbpf_d);
4574 }
4575 xbdbuf = kalloc_data(buf_size, Z_WAITOK | Z_ZERO);
4576
4577 lck_mtx_lock(lck: bpf_mlock);
4578 if (buf_size < (nbpfilter * sizeof(struct xbpf_d))) {
4579 lck_mtx_unlock(lck: bpf_mlock);
4580 kfree_data(xbdbuf, buf_size);
4581 return ENOMEM;
4582 }
4583 x_cnt = 0;
4584 unsigned int i;
4585
4586 for (i = 0; i < nbpfilter; i++) {
4587 struct bpf_d *bd = bpf_dtab[i];
4588 struct xbpf_d *xbd;
4589
4590 if (bd == NULL || bd == BPF_DEV_RESERVED ||
4591 (bd->bd_flags & BPF_CLOSING) != 0) {
4592 continue;
4593 }
4594 VERIFY(x_cnt < nbpfilter);
4595
4596 xbd = &xbdbuf[x_cnt++];
4597 bpfstats_fill_xbpf(d: xbd, bd);
4598 }
4599 lck_mtx_unlock(lck: bpf_mlock);
4600
4601 error = SYSCTL_OUT(req, xbdbuf, x_cnt * sizeof(struct xbpf_d));
4602 kfree_data(xbdbuf, buf_size);
4603 return error;
4604}
4605