bpf.c source code [xnu/bsd/net/bpf.c]

1	/*
2	* Copyright (c) 2000-2018 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28	/*
29	* Copyright (c) 1990, 1991, 1993
30	* The Regents of the University of California. All rights reserved.
31	*
32	* This code is derived from the Stanford/CMU enet packet filter,
33	* (net/enet.c) distributed as part of 4.3BSD, and code contributed
34	* to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
35	* Berkeley Laboratory.
36	*
37	* Redistribution and use in source and binary forms, with or without
38	* modification, are permitted provided that the following conditions
39	* are met:
40	* 1. Redistributions of source code must retain the above copyright
41	* notice, this list of conditions and the following disclaimer.
42	* 2. Redistributions in binary form must reproduce the above copyright
43	* notice, this list of conditions and the following disclaimer in the
44	* documentation and/or other materials provided with the distribution.
45	* 3. All advertising materials mentioning features or use of this software
46	* must display the following acknowledgement:
47	* This product includes software developed by the University of
48	* California, Berkeley and its contributors.
49	* 4. Neither the name of the University nor the names of its contributors
50	* may be used to endorse or promote products derived from this software
51	* without specific prior written permission.
52	*
53	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63	* SUCH DAMAGE.
64	*
65	* @(#)bpf.c 8.2 (Berkeley) 3/28/94
66	*
67	* $FreeBSD: src/sys/net/bpf.c,v 1.59.2.5 2001/01/05 04:49:09 jdp Exp $
68	*/
69	/*
70	* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71	* support for mandatory and extensible security protections. This notice
72	* is included in support of clause 2.2 (b) of the Apple Public License,
73	* Version 2.0.
74	*/
75
76	#include "bpf.h"
77
78	#ifndef __GNUC__
79	#define inline
80	#else
81	#define inline __inline
82	#endif
83
84	#include <sys/param.h>
85	#include <sys/systm.h>
86	#include <sys/conf.h>
87	#include <sys/malloc.h>
88	#include <sys/mbuf.h>
89	#include <sys/time.h>
90	#include <sys/proc.h>
91	#include <sys/signalvar.h>
92	#include <sys/filio.h>
93	#include <sys/sockio.h>
94	#include <sys/ttycom.h>
95	#include <sys/filedesc.h>
96	#include <sys/uio_internal.h>
97	#include <sys/file_internal.h>
98	#include <sys/event.h>
99
100	#include <sys/poll.h>
101
102	#include <sys/socket.h>
103	#include <sys/socketvar.h>
104	#include <sys/vnode.h>
105
106	#include <net/if.h>
107	#include <net/bpf.h>
108	#include <net/bpfdesc.h>
109
110	#include <netinet/in.h>
111	#include <netinet/ip.h>
112	#include <netinet/ip6.h>
113	#include <netinet/in_pcb.h>
114	#include <netinet/in_var.h>
115	#include <netinet/ip_var.h>
116	#include <netinet/tcp.h>
117	#include <netinet/tcp_var.h>
118	#include <netinet/udp.h>
119	#include <netinet/udp_var.h>
120	#include <netinet/if_ether.h>
121	#include <netinet/isakmp.h>
122	#include <netinet6/esp.h>
123	#include <sys/kernel.h>
124	#include <sys/sysctl.h>
125	#include <net/firewire.h>
126
127	#include <miscfs/devfs/devfs.h>
128	#include <net/dlil.h>
129	#include <net/pktap.h>
130
131	#include <kern/locks.h>
132	#include <kern/thread_call.h>
133	#include <libkern/section_keywords.h>
134
135	#if CONFIG_MACF_NET
136	#include <security/mac_framework.h>
137	#endif /* MAC_NET */
138
139	#include <os/log.h>
140
141	extern int tvtohz(struct timeval *);
142
143	#define BPF_BUFSIZE 4096
144	#define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
145
146	#define PRINET 26 /* interruptible */
147
148	#define ISAKMP_HDR_SIZE (sizeof(struct isakmp) + sizeof(struct isakmp_gen))
149	#define ESP_HDR_SIZE sizeof(struct newesp)
150
151	typedef void (pktcopyfunc_t)(const* void , void* *, size_t);
152
153	/*
154	* The default read buffer size is patchable.
155	*/
156	static unsigned int bpf_bufsize = BPF_BUFSIZE;
157	SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW \| CTLFLAG_LOCKED,
158	&bpf_bufsize, `0`, "");
159	__private_extern__ unsigned int bpf_maxbufsize = BPF_MAXBUFSIZE;
160	SYSCTL_INT(_debug, OID_AUTO, bpf_maxbufsize, CTLFLAG_RW \| CTLFLAG_LOCKED,
161	&bpf_maxbufsize, `0`, "");
162	static unsigned int bpf_maxdevices = `256`;
163	SYSCTL_UINT(_debug, OID_AUTO, bpf_maxdevices, CTLFLAG_RW \| CTLFLAG_LOCKED,
164	&bpf_maxdevices, `0`, "");
165	/*
166	* bpf_wantpktap controls the defaul visibility of DLT_PKTAP
167	* For OS X is off by default so process need to use the ioctl BPF_WANT_PKTAP
168	* explicitly to be able to use DLT_PKTAP.
169	*/
170	#if CONFIG_EMBEDDED
171	static unsigned int bpf_wantpktap = `1`;
172	#else
173	static unsigned int bpf_wantpktap = `0`;
174	#endif
175	SYSCTL_UINT(_debug, OID_AUTO, bpf_wantpktap, CTLFLAG_RW \| CTLFLAG_LOCKED,
176	&bpf_wantpktap, `0`, "");
177
178	static int bpf_debug = `0`;
179	SYSCTL_INT(_debug, OID_AUTO, bpf_debug, CTLFLAG_RW \| CTLFLAG_LOCKED,
180	&bpf_debug, `0`, "");
181
182	/*
183	* bpf_iflist is the list of interfaces; each corresponds to an ifnet
184	* bpf_dtab holds pointer to the descriptors, indexed by minor device #
185	*/
186	static struct bpf_if *bpf_iflist;
187	#ifdef __APPLE__
188	/*
189	* BSD now stores the bpf_d in the dev_t which is a struct
190	* on their system. Our dev_t is an int, so we still store
191	* the bpf_d in a separate table indexed by minor device #.
192	*
193	* The value stored in bpf_dtab[n] represent three states:
194	* NULL: device not opened
195	* BPF_DEV_RESERVED: device opening or closing
196	* other: device <n> opened with pointer to storage
197	*/
198	#define BPF_DEV_RESERVED ((struct bpf_d *)(uintptr_t)1)
199	static struct bpf_d **bpf_dtab = NULL;
200	static unsigned int bpf_dtab_size = `0`;
201	static unsigned int nbpfilter = `0`;
202
203	decl_lck_mtx_data(static, bpf_mlock_data);
204	static lck_mtx_t *bpf_mlock = &bpf_mlock_data;
205	static lck_grp_t *bpf_mlock_grp;
206	static lck_grp_attr_t *bpf_mlock_grp_attr;
207	static lck_attr_t *bpf_mlock_attr;
208
209	#endif /* __APPLE__ */
210
211	static int bpf_allocbufs(struct bpf_d *);
212	static errno_t bpf_attachd(struct bpf_d d, struct* bpf_if *bp);
213	static int bpf_detachd(struct bpf_d d, int*);
214	static void bpf_freed(struct bpf_d *);
215	static int bpf_movein(struct uio , int*,
216	struct mbuf , struct** sockaddr , int* *);
217	static int bpf_setif(struct bpf_d *, ifnet_t ifp, bool, bool);
218	static void bpf_timed_out(void , void* *);
219	static void bpf_wakeup(struct bpf_d *);
220	static u_int get_pkt_trunc_len(u_char *, u_int);
221	static void catchpacket(struct bpf_d , struct* bpf_packet , u_int, int*);
222	static void reset_d(struct bpf_d *);
223	static int bpf_setf(struct bpf_d *, u_int, user_addr_t, u_long);
224	static int bpf_getdltlist(struct bpf_d , caddr_t, struct* proc *);
225	static int bpf_setdlt(struct bpf_d *, u_int);
226	static int bpf_set_traffic_class(struct bpf_d , int*);
227	static void bpf_set_packet_service_class(struct mbuf , int*);
228
229	static void bpf_acquire_d(struct bpf_d *);
230	static void bpf_release_d(struct bpf_d *);
231
232	static int bpf_devsw_installed;
233
234	void bpf_init(void *unused);
235	static int bpf_tap_callback(struct ifnet ifp, struct* mbuf *m);
236
237	/*
238	* Darwin differs from BSD here, the following are static
239	* on BSD and not static on Darwin.
240	*/
241	d_open_t bpfopen;
242	d_close_t bpfclose;
243	d_read_t bpfread;
244	d_write_t bpfwrite;
245	ioctl_fcn_t bpfioctl;
246	select_fcn_t bpfselect;
247
248	/ Darwin's cdevsw struct differs slightly from BSDs /
249	#define CDEV_MAJOR 23
250	static struct cdevsw bpf_cdevsw = {
251	/ open / bpfopen,
252	/ close / bpfclose,
253	/ read / bpfread,
254	/ write / bpfwrite,
255	/ ioctl / bpfioctl,
256	/ stop / eno_stop,
257	/ reset / eno_reset,
258	/ tty / NULL,
259	/ select / bpfselect,
260	/ mmap / eno_mmap,
261	/ strategy / eno_strat,
262	/ getc / eno_getc,
263	/ putc / eno_putc,
264	/ type / `0`
265	};
266
267	#define SOCKADDR_HDR_LEN offsetof(struct sockaddr, sa_data)
268
269	static int
270	bpf_movein(struct uio uio, int* linktype, struct mbuf **mp,
271	struct sockaddr sockp, int* *datlen)
272	{
273	struct mbuf *m;
274	int error;
275	int len;
276	uint8_t sa_family;
277	int hlen;
278
279	switch (linktype) {
280
281	#if SLIP
282	case DLT_SLIP:
283	sa_family = AF_INET;
284	hlen = `0`;
285	break;
286	#endif /* SLIP */
287
288	case DLT_EN10MB:
289	sa_family = AF_UNSPEC;
290	/ XXX Would MAXLINKHDR be better? /
291	hlen = sizeof(struct ether_header);
292	break;
293
294	#if FDDI
295	case DLT_FDDI:
296	#if defined(__FreeBSD__) \|\| defined(__bsdi__)
297	sa_family = AF_IMPLINK;
298	hlen = `0`;
299	#else
300	sa_family = AF_UNSPEC;
301	/ XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) /
302	hlen = `24`;
303	#endif
304	break;
305	#endif /* FDDI */
306
307	case DLT_RAW:
308	case DLT_NULL:
309	sa_family = AF_UNSPEC;
310	hlen = `0`;
311	break;
312
313	#ifdef __FreeBSD__
314	case DLT_ATM_RFC1483:
315	/*
316	* en atm driver requires 4-byte atm pseudo header.
317	* though it isn't standard, vpi:vci needs to be
318	* specified anyway.
319	*/
320	sa_family = AF_UNSPEC;
321	hlen = `12`; / XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) /
322	break;
323	#endif
324
325	case DLT_PPP:
326	sa_family = AF_UNSPEC;
327	hlen = `4`; / This should match PPP_HDRLEN /
328	break;
329
330	case DLT_APPLE_IP_OVER_IEEE1394:
331	sa_family = AF_UNSPEC;
332	hlen = sizeof(struct firewire_header);
333	break;
334
335	case DLT_IEEE802_11: / IEEE 802.11 wireless /
336	sa_family = AF_IEEE80211;
337	hlen = `0`;
338	break;
339
340	case DLT_IEEE802_11_RADIO:
341	sa_family = AF_IEEE80211;
342	hlen = `0`;
343	break;
344
345	default:
346	return (EIO);
347	}
348
349	// LP64todo - fix this!
350	len = uio_resid(uio);
351	*datlen = len - hlen;
352	if ((unsigned)len > MCLBYTES)
353	return (EIO);
354
355	if (sockp) {
356	/*
357	* Build a sockaddr based on the data link layer type.
358	* We do this at this level because the ethernet header
359	* is copied directly into the data field of the sockaddr.
360	* In the case of SLIP, there is no header and the packet
361	* is forwarded as is.
362	* Also, we are careful to leave room at the front of the mbuf
363	* for the link level header.
364	*/
365	if ((hlen + SOCKADDR_HDR_LEN) > sockp->sa_len) {
366	return (EIO);
367	}
368	sockp->sa_family = sa_family;
369	} else {
370	/*
371	* We're directly sending the packet data supplied by
372	* the user; we don't need to make room for the link
373	* header, and don't need the header length value any
374	* more, so set it to 0.
375	*/
376	hlen = `0`;
377	}
378
379	MGETHDR(m, M_WAIT, MT_DATA);
380	if (m == `0`)
381	return (ENOBUFS);
382	if ((unsigned)len > MHLEN) {
383	MCLGET(m, M_WAIT);
384	if ((m->m_flags & M_EXT) == `0`) {
385	error = ENOBUFS;
386	goto bad;
387	}
388	}
389	m->m_pkthdr.len = m->m_len = len;
390	m->m_pkthdr.rcvif = NULL;
391	*mp = m;
392
393	/*
394	* Make room for link header.
395	*/
396	if (hlen != `0`) {
397	m->m_pkthdr.len -= hlen;
398	m->m_len -= hlen;
399	m->m_data += hlen; / XXX /
400	error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
401	if (error)
402	goto bad;
403	}
404	error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
405	if (error)
406	goto bad;
407
408	/ Check for multicast destination /
409	switch (linktype) {
410	case DLT_EN10MB: {
411	struct ether_header *eh;
412
413	eh = mtod(m, struct ether_header *);
414	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
415	if (_ether_cmp(etherbroadcastaddr,
416	eh->ether_dhost) == `0`) {
417	m->m_flags \|= M_BCAST;
418	} else {
419	m->m_flags \|= M_MCAST;
420	}
421	}
422	break;
423	}
424	}
425
426	return (`0`);
427	bad:
428	m_freem(m);
429	return (error);
430	}
431
432	#ifdef __APPLE__
433
434	/*
435	* The dynamic addition of a new device node must block all processes that
436	* are opening the last device so that no process will get an unexpected
437	* ENOENT
438	*/
439	static void
440	bpf_make_dev_t(int maj)
441	{
442	static int bpf_growing = `0`;
443	unsigned int cur_size = nbpfilter, i;
444
445	if (nbpfilter >= bpf_maxdevices)
446	return;
447
448	while (bpf_growing) {
449	/ Wait until new device has been created /
450	(void) tsleep((caddr_t)&bpf_growing, PZERO, "bpf_growing", `0`);
451	}
452	if (nbpfilter > cur_size) {
453	/ other thread grew it already /
454	return;
455	}
456	bpf_growing = `1`;
457
458	/ need to grow bpf_dtab first /
459	if (nbpfilter == bpf_dtab_size) {
460	int new_dtab_size;
461	struct bpf_d **new_dtab = NULL;
462	struct bpf_d **old_dtab = NULL;
463
464	new_dtab_size = bpf_dtab_size + NBPFILTER;
465	new_dtab = (struct bpf_d **)_MALLOC(
466	sizeof(struct bpf_d ) new_dtab_size, M_DEVBUF, M_WAIT);
467	if (new_dtab == `0`) {
468	printf("bpf_make_dev_t: malloc bpf_dtab failed\n");
469	goto done;
470	}
471	if (bpf_dtab) {
472	bcopy(bpf_dtab, new_dtab,
473	sizeof(struct bpf_d ) bpf_dtab_size);
474	}
475	bzero(new_dtab + bpf_dtab_size,
476	sizeof(struct bpf_d ) NBPFILTER);
477	old_dtab = bpf_dtab;
478	bpf_dtab = new_dtab;
479	bpf_dtab_size = new_dtab_size;
480	if (old_dtab != NULL)
481	_FREE(old_dtab, M_DEVBUF);
482	}
483	i = nbpfilter++;
484	(void) devfs_make_node(makedev(maj, i),
485	DEVFS_CHAR, UID_ROOT, GID_WHEEL, `0600`,
486	"bpf%d", i);
487	done:
488	bpf_growing = `0`;
489	wakeup((caddr_t)&bpf_growing);
490	}
491
492	#endif
493
494	/*
495	* Attach file to the bpf interface, i.e. make d listen on bp.
496	*/
497	static errno_t
498	bpf_attachd(struct bpf_d d, struct* bpf_if *bp)
499	{
500	int first = bp->bif_dlist == NULL;
501	int error = `0`;
502
503	/*
504	* Point d at bp, and add d to the interface's list of listeners.
505	* Finally, point the driver's bpf cookie at the interface so
506	* it will divert packets to bpf.
507	*/
508	d->bd_bif = bp;
509	d->bd_next = bp->bif_dlist;
510	bp->bif_dlist = d;
511
512	/*
513	* Take a reference on the device even if an error is returned
514	* because we keep the device in the interface's list of listeners
515	*/
516	bpf_acquire_d(d);
517
518	if (first) {
519	/ Find the default bpf entry for this ifp /
520	if (bp->bif_ifp->if_bpf == NULL) {
521	struct bpf_if tmp, primary = NULL;
522
523	for (tmp = bpf_iflist; tmp; tmp = tmp->bif_next) {
524	if (tmp->bif_ifp == bp->bif_ifp) {
525	primary = tmp;
526	break;
527	}
528	}
529	bp->bif_ifp->if_bpf = primary;
530	}
531	/ Only call dlil_set_bpf_tap for primary dlt /
532	if (bp->bif_ifp->if_bpf == bp)
533	dlil_set_bpf_tap(bp->bif_ifp, BPF_TAP_INPUT_OUTPUT,
534	bpf_tap_callback);
535
536	if (bp->bif_tap != NULL)
537	error = bp->bif_tap(bp->bif_ifp, bp->bif_dlt,
538	BPF_TAP_INPUT_OUTPUT);
539	}
540
541	/*
542	* Reset the detach flags in case we previously detached an interface
543	*/
544	d->bd_flags &= ~(BPF_DETACHING \| BPF_DETACHED);
545
546	if (bp->bif_dlt == DLT_PKTAP) {
547	d->bd_flags \|= BPF_FINALIZE_PKTAP;
548	} else {
549	d->bd_flags &= ~BPF_FINALIZE_PKTAP;
550	}
551	return (error);
552	}
553
554	/*
555	* Detach a file from its interface.
556	*
557	* Return 1 if was closed by some thread, 0 otherwise
558	*/
559	static int
560	bpf_detachd(struct bpf_d d, int* closing)
561	{
562	struct bpf_d **p;
563	struct bpf_if *bp;
564	struct ifnet *ifp;
565
566	int bpf_closed = d->bd_flags & BPF_CLOSING;
567	/*
568	* Some other thread already detached
569	*/
570	if ((d->bd_flags & (BPF_DETACHED \| BPF_DETACHING)) != `0`)
571	goto done;
572	/*
573	* This thread is doing the detach
574	*/
575	d->bd_flags \|= BPF_DETACHING;
576
577	ifp = d->bd_bif->bif_ifp;
578	bp = d->bd_bif;
579
580	if (bpf_debug != `0`)
581	printf("%s: %llx %s%s\n",
582	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d),
583	if_name(ifp), closing ? " closing" : "");
584
585	/ Remove d from the interface's descriptor list. /
586	p = &bp->bif_dlist;
587	while (*p != d) {
588	p = &(*p)->bd_next;
589	if (*p == `0`)
590	panic("bpf_detachd: descriptor not in list");
591	}
592	p = (p)->bd_next;
593	if (bp->bif_dlist == `0`) {
594	/*
595	* Let the driver know that there are no more listeners.
596	*/
597	/ Only call dlil_set_bpf_tap for primary dlt /
598	if (bp->bif_ifp->if_bpf == bp)
599	dlil_set_bpf_tap(ifp, BPF_TAP_DISABLE, NULL);
600	if (bp->bif_tap)
601	bp->bif_tap(ifp, bp->bif_dlt, BPF_TAP_DISABLE);
602
603	for (bp = bpf_iflist; bp; bp = bp->bif_next)
604	if (bp->bif_ifp == ifp && bp->bif_dlist != `0`)
605	break;
606	if (bp == NULL)
607	ifp->if_bpf = NULL;
608	}
609	d->bd_bif = NULL;
610	/*
611	* Check if this descriptor had requested promiscuous mode.
612	* If so, turn it off.
613	*/
614	if (d->bd_promisc) {
615	d->bd_promisc = `0`;
616	lck_mtx_unlock(bpf_mlock);
617	if (ifnet_set_promiscuous(ifp, `0`)) {
618	/*
619	* Something is really wrong if we were able to put
620	* the driver into promiscuous mode, but can't
621	* take it out.
622	* Most likely the network interface is gone.
623	*/
624	printf("%s: ifnet_set_promiscuous failed\n", __func__);
625	}
626	lck_mtx_lock(bpf_mlock);
627	}
628
629	/*
630	* Wake up other thread that are waiting for this thread to finish
631	* detaching
632	*/
633	d->bd_flags &= ~BPF_DETACHING;
634	d->bd_flags \|= BPF_DETACHED;
635
636	/ Refresh the local variable as d could have been modified /
637	bpf_closed = d->bd_flags & BPF_CLOSING;
638	/*
639	* Note that We've kept the reference because we may have dropped
640	* the lock when turning off promiscuous mode
641	*/
642	bpf_release_d(d);
643
644	done:
645	/*
646	* When closing makes sure no other thread refer to the bpf_d
647	*/
648	if (bpf_debug != `0`)
649	printf("%s: %llx done\n",
650	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
651	/*
652	* Let the caller know the bpf_d is closed
653	*/
654	if (bpf_closed)
655	return (`1`);
656	else
657	return (`0`);
658	}
659
660	/*
661	* Start asynchronous timer, if necessary.
662	* Must be called with bpf_mlock held.
663	*/
664	static void
665	bpf_start_timer(struct bpf_d *d)
666	{
667	uint64_t deadline;
668	struct timeval tv;
669
670	if (d->bd_rtout > `0` && d->bd_state == BPF_IDLE) {
671	tv.tv_sec = d->bd_rtout / hz;
672	tv.tv_usec = (d->bd_rtout % hz) * tick;
673
674	clock_interval_to_deadline(
675	(uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec,
676	NSEC_PER_USEC, &deadline);
677	/*
678	* The state is BPF_IDLE, so the timer hasn't
679	* been started yet, and hasn't gone off yet;
680	* there is no thread call scheduled, so this
681	* won't change the schedule.
682	*
683	* XXX - what if, by the time it gets entered,
684	* the deadline has already passed?
685	*/
686	thread_call_enter_delayed(d->bd_thread_call, deadline);
687	d->bd_state = BPF_WAITING;
688	}
689	}
690
691	/*
692	* Cancel asynchronous timer.
693	* Must be called with bpf_mlock held.
694	*/
695	static boolean_t
696	bpf_stop_timer(struct bpf_d *d)
697	{
698	/*
699	* If the timer has already gone off, this does nothing.
700	* Our caller is expected to set d->bd_state to BPF_IDLE,
701	* with the bpf_mlock, after we are called. bpf_timed_out()
702	* also grabs bpf_mlock, so, if the timer has gone off and
703	* bpf_timed_out() hasn't finished, it's waiting for the
704	* lock; when this thread releases the lock, it will
705	* find the state is BPF_IDLE, and just release the
706	* lock and return.
707	*/
708	return (thread_call_cancel(d->bd_thread_call));
709	}
710
711	void
712	bpf_acquire_d(struct bpf_d *d)
713	{
714	void *lr_saved = __builtin_return_address(`0`);
715
716	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
717
718	d->bd_refcnt += `1`;
719
720	d->bd_ref_lr[d->bd_next_ref_lr] = lr_saved;
721	d->bd_next_ref_lr = (d->bd_next_ref_lr + `1`) % BPF_REF_HIST;
722	}
723
724	void
725	bpf_release_d(struct bpf_d *d)
726	{
727	void *lr_saved = __builtin_return_address(`0`);
728
729	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
730
731	if (d->bd_refcnt <= `0`)
732	panic("%s: %p refcnt <= 0", __func__, d);
733
734	d->bd_refcnt -= `1`;
735
736	d->bd_unref_lr[d->bd_next_unref_lr] = lr_saved;
737	d->bd_next_unref_lr = (d->bd_next_unref_lr + `1`) % BPF_REF_HIST;
738
739	if (d->bd_refcnt == `0`) {
740	/ Assert the device is detached /
741	if ((d->bd_flags & BPF_DETACHED) == `0`)
742	panic("%s: %p BPF_DETACHED not set", __func__, d);
743
744	_FREE(d, M_DEVBUF);
745	}
746	}
747
748	/*
749	* Open ethernet device. Returns ENXIO for illegal minor device number,
750	* EBUSY if file is open by another process.
751	*/
752	/ ARGSUSED /
753	int
754	bpfopen(dev_t dev, int flags, __unused int fmt,
755	struct proc *p)
756	{
757	struct bpf_d *d;
758
759	lck_mtx_lock(bpf_mlock);
760	if ((unsigned int) minor(dev) >= nbpfilter) {
761	lck_mtx_unlock(bpf_mlock);
762	return (ENXIO);
763	}
764	/*
765	* New device nodes are created on demand when opening the last one.
766	* The programming model is for processes to loop on the minor starting
767	* at 0 as long as EBUSY is returned. The loop stops when either the
768	* open succeeds or an error other that EBUSY is returned. That means
769	* that bpf_make_dev_t() must block all processes that are opening the
770	* last node. If not all processes are blocked, they could unexpectedly
771	* get ENOENT and abort their opening loop.
772	*/
773	if ((unsigned int) minor(dev) == (nbpfilter - `1`))
774	bpf_make_dev_t(major(dev));
775
776	/*
777	* Each minor can be opened by only one process. If the requested
778	* minor is in use, return EBUSY.
779	*
780	* Important: bpfopen() and bpfclose() have to check and set the status
781	* of a device in the same lockin context otherwise the device may be
782	* leaked because the vnode use count will be unpextectly greater than 1
783	* when close() is called.
784	*/
785	if (bpf_dtab[minor(dev)] == NULL) {
786	/ Reserve while opening /
787	bpf_dtab[minor(dev)] = BPF_DEV_RESERVED;
788	} else {
789	lck_mtx_unlock(bpf_mlock);
790	return (EBUSY);
791	}
792	d = (struct bpf_d )_MALLOC(sizeof(struct* bpf_d), M_DEVBUF,
793	M_WAIT \| M_ZERO);
794	if (d == NULL) {
795	/ this really is a catastrophic failure /
796	printf("bpfopen: malloc bpf_d failed\n");
797	bpf_dtab[minor(dev)] = NULL;
798	lck_mtx_unlock(bpf_mlock);
799	return (ENOMEM);
800	}
801
802	/ Mark "in use" and do most initialization. /
803	bpf_acquire_d(d);
804	d->bd_bufsize = bpf_bufsize;
805	d->bd_sig = SIGIO;
806	d->bd_seesent = `1`;
807	d->bd_oflags = flags;
808	d->bd_state = BPF_IDLE;
809	d->bd_traffic_class = SO_TC_BE;
810	d->bd_flags \|= BPF_DETACHED;
811	if (bpf_wantpktap)
812	d->bd_flags \|= BPF_WANT_PKTAP;
813	else
814	d->bd_flags &= ~BPF_WANT_PKTAP;
815	d->bd_thread_call = thread_call_allocate(bpf_timed_out, d);
816	if (d->bd_thread_call == NULL) {
817	printf("bpfopen: malloc thread call failed\n");
818	bpf_dtab[minor(dev)] = NULL;
819	bpf_release_d(d);
820	lck_mtx_unlock(bpf_mlock);
821
822	return (ENOMEM);
823	}
824	d->bd_opened_by = p;
825	uuid_generate(d->bd_uuid);
826
827	#if CONFIG_MACF_NET
828	mac_bpfdesc_label_init(d);
829	mac_bpfdesc_label_associate(kauth_cred_get(), d);
830	#endif
831	bpf_dtab[minor(dev)] = d; / Mark opened /
832	lck_mtx_unlock(bpf_mlock);
833
834	return (`0`);
835	}
836
837	/*
838	* Close the descriptor by detaching it from its interface,
839	* deallocating its buffers, and marking it free.
840	*/
841	/ ARGSUSED /
842	int
843	bpfclose(dev_t dev, __unused int flags, __unused int fmt,
844	__unused struct proc *p)
845	{
846	struct bpf_d *d;
847
848	/ Take BPF lock to ensure no other thread is using the device /
849	lck_mtx_lock(bpf_mlock);
850
851	d = bpf_dtab[minor(dev)];
852	if (d == NULL \|\| d == BPF_DEV_RESERVED) {
853	lck_mtx_unlock(bpf_mlock);
854	return (ENXIO);
855	}
856
857	/*
858	* Other threads may call bpd_detachd() if we drop the bpf_mlock
859	*/
860	d->bd_flags \|= BPF_CLOSING;
861
862	if (bpf_debug != `0`)
863	printf("%s: %llx\n",
864	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
865
866	bpf_dtab[minor(dev)] = BPF_DEV_RESERVED; / Reserve while closing /
867
868	/*
869	* Deal with any in-progress timeouts.
870	*/
871	switch (d->bd_state) {
872	case BPF_IDLE:
873	/*
874	* Not waiting for a timeout, and no timeout happened.
875	*/
876	break;
877
878	case BPF_WAITING:
879	/*
880	* Waiting for a timeout.
881	* Cancel any timer that has yet to go off,
882	* and mark the state as "closing".
883	* Then drop the lock to allow any timers that
884	* have gone off to run to completion, and wait
885	* for them to finish.
886	*/
887	if (!bpf_stop_timer(d)) {
888	/*
889	* There was no pending call, so the call must
890	* have been in progress. Wait for the call to
891	* complete; we have to drop the lock while
892	* waiting. to let the in-progrss call complete
893	*/
894	d->bd_state = BPF_DRAINING;
895	while (d->bd_state == BPF_DRAINING)
896	msleep((caddr_t)d, bpf_mlock, PRINET,
897	"bpfdraining", NULL);
898	}
899	d->bd_state = BPF_IDLE;
900	break;
901
902	case BPF_TIMED_OUT:
903	/*
904	* Timer went off, and the timeout routine finished.
905	*/
906	d->bd_state = BPF_IDLE;
907	break;
908
909	case BPF_DRAINING:
910	/*
911	* Another thread is blocked on a close waiting for
912	* a timeout to finish.
913	* This "shouldn't happen", as the first thread to enter
914	* bpfclose() will set bpf_dtab[minor(dev)] to 1, and
915	* all subsequent threads should see that and fail with
916	* ENXIO.
917	*/
918	panic("Two threads blocked in a BPF close");
919	break;
920	}
921
922	if (d->bd_bif)
923	bpf_detachd(d, `1`);
924	selthreadclear(&d->bd_sel);
925	#if CONFIG_MACF_NET
926	mac_bpfdesc_label_destroy(d);
927	#endif
928	thread_call_free(d->bd_thread_call);
929
930	while (d->bd_hbuf_read != `0`)
931	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
932
933	bpf_freed(d);
934
935	/ Mark free in same context as bpfopen comes to check /
936	bpf_dtab[minor(dev)] = NULL; / Mark closed /
937
938	bpf_release_d(d);
939
940	lck_mtx_unlock(bpf_mlock);
941
942	return (`0`);
943	}
944
945	#define BPF_SLEEP bpf_sleep
946
947	static int
948	bpf_sleep(struct bpf_d d, int* pri, const char wmesg, int* timo)
949	{
950	u_int64_t abstime = `0`;
951
952	if (timo != `0`)
953	clock_interval_to_deadline(timo, NSEC_PER_SEC / hz, &abstime);
954
955	return (msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime));
956	}
957
958	static void
959	bpf_finalize_pktap(struct bpf_hdr hp, struct* pktap_header *pktaphdr)
960	{
961	if (pktaphdr->pth_flags & PTH_FLAG_V2_HDR) {
962	struct pktap_v2_hdr *pktap_v2_hdr;
963
964	pktap_v2_hdr = (struct pktap_v2_hdr *)pktaphdr;
965
966	if (pktap_v2_hdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
967	pktap_v2_finalize_proc_info(pktap_v2_hdr);
968	} else {
969	if (pktaphdr->pth_flags & PTH_FLAG_DELAY_PKTAP)
970	pktap_finalize_proc_info(pktaphdr);
971
972	if (pktaphdr->pth_flags & PTH_FLAG_TSTAMP) {
973	hp->bh_tstamp.tv_sec = pktaphdr->pth_tstamp.tv_sec;
974	hp->bh_tstamp.tv_usec = pktaphdr->pth_tstamp.tv_usec;
975	}
976	}
977	}
978
979	/*
980	* Rotate the packet buffers in descriptor d. Move the store buffer
981	* into the hold slot, and the free buffer into the store slot.
982	* Zero the length of the new store buffer.
983	*/
984	#define ROTATE_BUFFERS(d) \
985	if (d->bd_hbuf_read != 0) \
986	panic("rotating bpf buffers during read"); \
987	(d)->bd_hbuf = (d)->bd_sbuf; \
988	(d)->bd_hlen = (d)->bd_slen; \
989	(d)->bd_hcnt = (d)->bd_scnt; \
990	(d)->bd_sbuf = (d)->bd_fbuf; \
991	(d)->bd_slen = 0; \
992	(d)->bd_scnt = 0; \
993	(d)->bd_fbuf = NULL;
994	/*
995	* bpfread - read next chunk of packets from buffers
996	*/
997	int
998	bpfread(dev_t dev, struct uio uio, int* ioflag)
999	{
1000	struct bpf_d *d;
1001	caddr_t hbuf;
1002	int timed_out, hbuf_len;
1003	int error;
1004	int flags;
1005
1006	lck_mtx_lock(bpf_mlock);
1007
1008	d = bpf_dtab[minor(dev)];
1009	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
1010	(d->bd_flags & BPF_CLOSING) != `0`) {
1011	lck_mtx_unlock(bpf_mlock);
1012	return (ENXIO);
1013	}
1014
1015	bpf_acquire_d(d);
1016
1017	/*
1018	* Restrict application to use a buffer the same size as
1019	* as kernel buffers.
1020	*/
1021	if (uio_resid(uio) != d->bd_bufsize) {
1022	bpf_release_d(d);
1023	lck_mtx_unlock(bpf_mlock);
1024	return (EINVAL);
1025	}
1026
1027	if (d->bd_state == BPF_WAITING)
1028	bpf_stop_timer(d);
1029
1030	timed_out = (d->bd_state == BPF_TIMED_OUT);
1031	d->bd_state = BPF_IDLE;
1032
1033	while (d->bd_hbuf_read != `0`)
1034	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
1035
1036	if ((d->bd_flags & BPF_CLOSING) != `0`) {
1037	bpf_release_d(d);
1038	lck_mtx_unlock(bpf_mlock);
1039	return (ENXIO);
1040	}
1041	/*
1042	* If the hold buffer is empty, then do a timed sleep, which
1043	* ends when the timeout expires or when enough packets
1044	* have arrived to fill the store buffer.
1045	*/
1046	while (d->bd_hbuf == `0`) {
1047	if ((d->bd_immediate \|\| timed_out \|\| (ioflag & IO_NDELAY)) &&
1048	d->bd_slen != `0`) {
1049	/*
1050	* We're in immediate mode, or are reading
1051	* in non-blocking mode, or a timer was
1052	* started before the read (e.g., by select()
1053	* or poll()) and has expired and a packet(s)
1054	* either arrived since the previous
1055	* read or arrived while we were asleep.
1056	* Rotate the buffers and return what's here.
1057	*/
1058	ROTATE_BUFFERS(d);
1059	break;
1060	}
1061
1062	/*
1063	* No data is available, check to see if the bpf device
1064	* is still pointed at a real interface. If not, return
1065	* ENXIO so that the userland process knows to rebind
1066	* it before using it again.
1067	*/
1068	if (d->bd_bif == NULL) {
1069	bpf_release_d(d);
1070	lck_mtx_unlock(bpf_mlock);
1071	return (ENXIO);
1072	}
1073	if (ioflag & IO_NDELAY) {
1074	bpf_release_d(d);
1075	lck_mtx_unlock(bpf_mlock);
1076	return (EWOULDBLOCK);
1077	}
1078	error = BPF_SLEEP(d, PRINET\|PCATCH, "bpf", d->bd_rtout);
1079	/*
1080	* Make sure device is still opened
1081	*/
1082	if ((d->bd_flags & BPF_CLOSING) != `0`) {
1083	bpf_release_d(d);
1084	lck_mtx_unlock(bpf_mlock);
1085	return (ENXIO);
1086	}
1087
1088	while (d->bd_hbuf_read != `0`)
1089	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1090	NULL);
1091
1092	if ((d->bd_flags & BPF_CLOSING) != `0`) {
1093	bpf_release_d(d);
1094	lck_mtx_unlock(bpf_mlock);
1095	return (ENXIO);
1096	}
1097
1098	if (error == EINTR \|\| error == ERESTART) {
1099	if (d->bd_hbuf != NULL) {
1100	/*
1101	* Because we msleep, the hold buffer might
1102	* be filled when we wake up. Avoid rotating
1103	* in this case.
1104	*/
1105	break;
1106	}
1107	if (d->bd_slen != `0`) {
1108	/*
1109	* Sometimes we may be interrupted often and
1110	* the sleep above will not timeout.
1111	* Regardless, we should rotate the buffers
1112	* if there's any new data pending and
1113	* return it.
1114	*/
1115	ROTATE_BUFFERS(d);
1116	break;
1117	}
1118	bpf_release_d(d);
1119	lck_mtx_unlock(bpf_mlock);
1120	if (error == ERESTART) {
1121	printf("%s: %llx ERESTART to EINTR\n",
1122	__func__, (uint64_t)VM_KERNEL_ADDRPERM(d));
1123	error = EINTR;
1124	}
1125	return (error);
1126	}
1127	if (error == EWOULDBLOCK) {
1128	/*
1129	* On a timeout, return what's in the buffer,
1130	* which may be nothing. If there is something
1131	* in the store buffer, we can rotate the buffers.
1132	*/
1133	if (d->bd_hbuf)
1134	/*
1135	* We filled up the buffer in between
1136	* getting the timeout and arriving
1137	* here, so we don't need to rotate.
1138	*/
1139	break;
1140
1141	if (d->bd_slen == `0`) {
1142	bpf_release_d(d);
1143	lck_mtx_unlock(bpf_mlock);
1144	return (`0`);
1145	}
1146	ROTATE_BUFFERS(d);
1147	break;
1148	}
1149	}
1150	/*
1151	* At this point, we know we have something in the hold slot.
1152	*/
1153
1154	/*
1155	* Set the hold buffer read. So we do not
1156	* rotate the buffers until the hold buffer
1157	* read is complete. Also to avoid issues resulting
1158	* from page faults during disk sleep (<rdar://problem/13436396>).
1159	*/
1160	d->bd_hbuf_read = `1`;
1161	hbuf = d->bd_hbuf;
1162	hbuf_len = d->bd_hlen;
1163	flags = d->bd_flags;
1164	lck_mtx_unlock(bpf_mlock);
1165
1166	#ifdef __APPLE__
1167	/*
1168	* Before we move data to userland, we fill out the extended
1169	* header fields.
1170	*/
1171	if (flags & BPF_EXTENDED_HDR) {
1172	char *p;
1173
1174	p = hbuf;
1175	while (p < hbuf + hbuf_len) {
1176	struct bpf_hdr_ext *ehp;
1177	uint32_t flowid;
1178	struct so_procinfo soprocinfo;
1179	int found = `0`;
1180
1181	ehp = (struct bpf_hdr_ext )(void* *)p;
1182	if ((flowid = ehp->bh_flowid) != `0`) {
1183	if (ehp->bh_proto == IPPROTO_TCP)
1184	found = inp_findinpcb_procinfo(&tcbinfo,
1185	flowid, &soprocinfo);
1186	else if (ehp->bh_proto == IPPROTO_UDP)
1187	found = inp_findinpcb_procinfo(&udbinfo,
1188	flowid, &soprocinfo);
1189	if (found == `1`) {
1190	ehp->bh_pid = soprocinfo.spi_pid;
1191	proc_name(ehp->bh_pid, ehp->bh_comm,
1192	MAXCOMLEN);
1193	}
1194	ehp->bh_flowid = `0`;
1195	}
1196
1197	if (flags & BPF_FINALIZE_PKTAP) {
1198	struct pktap_header *pktaphdr;
1199
1200	pktaphdr = (struct pktap_header )(void* *)
1201	(p + BPF_WORDALIGN(ehp->bh_hdrlen));
1202
1203	bpf_finalize_pktap((struct bpf_hdr *) ehp,
1204	pktaphdr);
1205	}
1206	p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen);
1207	}
1208	} else if (flags & BPF_FINALIZE_PKTAP) {
1209	char *p;
1210
1211	p = hbuf;
1212	while (p < hbuf + hbuf_len) {
1213	struct bpf_hdr *hp;
1214	struct pktap_header *pktaphdr;
1215
1216	hp = (struct bpf_hdr )(void* *)p;
1217	pktaphdr = (struct pktap_header )(void* *)
1218	(p + BPF_WORDALIGN(hp->bh_hdrlen));
1219
1220	bpf_finalize_pktap(hp, pktaphdr);
1221
1222	p += BPF_WORDALIGN(hp->bh_hdrlen + hp->bh_caplen);
1223	}
1224	}
1225	#endif
1226
1227	/*
1228	* Move data from hold buffer into user space.
1229	* We know the entire buffer is transferred since
1230	* we checked above that the read buffer is bpf_bufsize bytes.
1231	*/
1232	error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio);
1233
1234	lck_mtx_lock(bpf_mlock);
1235	/*
1236	* Make sure device is still opened
1237	*/
1238	if ((d->bd_flags & BPF_CLOSING) != `0`) {
1239	bpf_release_d(d);
1240	lck_mtx_unlock(bpf_mlock);
1241	return (ENXIO);
1242	}
1243
1244	d->bd_hbuf_read = `0`;
1245	d->bd_fbuf = d->bd_hbuf;
1246	d->bd_hbuf = NULL;
1247	d->bd_hlen = `0`;
1248	d->bd_hcnt = `0`;
1249	wakeup((caddr_t)d);
1250
1251	bpf_release_d(d);
1252	lck_mtx_unlock(bpf_mlock);
1253	return (error);
1254
1255	}
1256
1257	/*
1258	* If there are processes sleeping on this descriptor, wake them up.
1259	*/
1260	static void
1261	bpf_wakeup(struct bpf_d *d)
1262	{
1263	if (d->bd_state == BPF_WAITING) {
1264	bpf_stop_timer(d);
1265	d->bd_state = BPF_IDLE;
1266	}
1267	wakeup((caddr_t)d);
1268	if (d->bd_async && d->bd_sig && d->bd_sigio)
1269	pgsigio(d->bd_sigio, d->bd_sig);
1270
1271	selwakeup(&d->bd_sel);
1272	if ((d->bd_flags & BPF_KNOTE))
1273	KNOTE(&d->bd_sel.si_note, `1`);
1274	}
1275
1276	static void
1277	bpf_timed_out(void arg, __unused void* *dummy)
1278	{
1279	struct bpf_d d = (struct* bpf_d *)arg;
1280
1281	lck_mtx_lock(bpf_mlock);
1282	if (d->bd_state == BPF_WAITING) {
1283	/*
1284	* There's a select or kqueue waiting for this; if there's
1285	* now stuff to read, wake it up.
1286	*/
1287	d->bd_state = BPF_TIMED_OUT;
1288	if (d->bd_slen != `0`)
1289	bpf_wakeup(d);
1290	} else if (d->bd_state == BPF_DRAINING) {
1291	/*
1292	* A close is waiting for this to finish.
1293	* Mark it as finished, and wake the close up.
1294	*/
1295	d->bd_state = BPF_IDLE;
1296	bpf_wakeup(d);
1297	}
1298	lck_mtx_unlock(bpf_mlock);
1299	}
1300
1301	/ keep in sync with bpf_movein above: /
1302	#define MAX_DATALINK_HDR_LEN (sizeof(struct firewire_header))
1303
1304	int
1305	bpfwrite(dev_t dev, struct uio uio, __unused int* ioflag)
1306	{
1307	struct bpf_d *d;
1308	struct ifnet *ifp;
1309	struct mbuf *m = NULL;
1310	int error;
1311	char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN];
1312	int datlen = `0`;
1313	int bif_dlt;
1314	int bd_hdrcmplt;
1315
1316	lck_mtx_lock(bpf_mlock);
1317
1318	d = bpf_dtab[minor(dev)];
1319	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
1320	(d->bd_flags & BPF_CLOSING) != `0`) {
1321	lck_mtx_unlock(bpf_mlock);
1322	return (ENXIO);
1323	}
1324
1325	bpf_acquire_d(d);
1326
1327	if (d->bd_bif == `0`) {
1328	bpf_release_d(d);
1329	lck_mtx_unlock(bpf_mlock);
1330	return (ENXIO);
1331	}
1332
1333	ifp = d->bd_bif->bif_ifp;
1334
1335	if ((ifp->if_flags & IFF_UP) == `0`) {
1336	bpf_release_d(d);
1337	lck_mtx_unlock(bpf_mlock);
1338	return (ENETDOWN);
1339	}
1340	if (uio_resid(uio) == `0`) {
1341	bpf_release_d(d);
1342	lck_mtx_unlock(bpf_mlock);
1343	return (`0`);
1344	}
1345	((struct sockaddr )dst_buf)->sa_len = sizeof*(dst_buf);
1346
1347	/*
1348	* fix for PR-6849527
1349	* geting variables onto stack before dropping lock for bpf_movein()
1350	*/
1351	bif_dlt = (int)d->bd_bif->bif_dlt;
1352	bd_hdrcmplt = d->bd_hdrcmplt;
1353
1354	/ bpf_movein allocating mbufs; drop lock /
1355	lck_mtx_unlock(bpf_mlock);
1356
1357	error = bpf_movein(uio, bif_dlt, &m,
1358	bd_hdrcmplt ? NULL : (struct sockaddr *)dst_buf,
1359	&datlen);
1360
1361	/ take the lock again /
1362	lck_mtx_lock(bpf_mlock);
1363	if (error) {
1364	bpf_release_d(d);
1365	lck_mtx_unlock(bpf_mlock);
1366	return (error);
1367	}
1368
1369	/ verify the device is still open /
1370	if ((d->bd_flags & BPF_CLOSING) != `0`) {
1371	bpf_release_d(d);
1372	lck_mtx_unlock(bpf_mlock);
1373	m_freem(m);
1374	return (ENXIO);
1375	}
1376
1377	if (d->bd_bif == NULL) {
1378	bpf_release_d(d);
1379	lck_mtx_unlock(bpf_mlock);
1380	m_free(m);
1381	return (ENXIO);
1382	}
1383
1384	if ((unsigned)datlen > ifp->if_mtu) {
1385	bpf_release_d(d);
1386	lck_mtx_unlock(bpf_mlock);
1387	m_freem(m);
1388	return (EMSGSIZE);
1389	}
1390
1391	#if CONFIG_MACF_NET
1392	mac_mbuf_label_associate_bpfdesc(d, m);
1393	#endif
1394
1395	bpf_set_packet_service_class(m, d->bd_traffic_class);
1396
1397	lck_mtx_unlock(bpf_mlock);
1398
1399	/*
1400	* The driver frees the mbuf.
1401	*/
1402	if (d->bd_hdrcmplt) {
1403	if (d->bd_bif->bif_send)
1404	error = d->bd_bif->bif_send(ifp, d->bd_bif->bif_dlt, m);
1405	else
1406	error = dlil_output(ifp, `0`, m, NULL, NULL, `1`, NULL);
1407	} else {
1408	error = dlil_output(ifp, PF_INET, m, NULL,
1409	(struct sockaddr *)dst_buf, `0`, NULL);
1410	}
1411
1412	lck_mtx_lock(bpf_mlock);
1413	bpf_release_d(d);
1414	lck_mtx_unlock(bpf_mlock);
1415
1416	return (error);
1417	}
1418
1419	/*
1420	* Reset a descriptor by flushing its packet buffer and clearing the
1421	* receive and drop counts.
1422	*/
1423	static void
1424	reset_d(struct bpf_d *d)
1425	{
1426	if (d->bd_hbuf_read != `0`)
1427	panic("resetting buffers during read");
1428
1429	if (d->bd_hbuf) {
1430	/ Free the hold buffer. /
1431	d->bd_fbuf = d->bd_hbuf;
1432	d->bd_hbuf = NULL;
1433	}
1434	d->bd_slen = `0`;
1435	d->bd_hlen = `0`;
1436	d->bd_scnt = `0`;
1437	d->bd_hcnt = `0`;
1438	d->bd_rcount = `0`;
1439	d->bd_dcount = `0`;
1440	}
1441
1442	static struct bpf_d *
1443	bpf_get_device_from_uuid(uuid_t uuid)
1444	{
1445	unsigned int i;
1446
1447	for (i = `0`; i < nbpfilter; i++) {
1448	struct bpf_d *d = bpf_dtab[i];
1449
1450	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
1451	(d->bd_flags & BPF_CLOSING) != `0`)
1452	continue;
1453	if (uuid_compare(uuid, d->bd_uuid) == `0`)
1454	return (d);
1455	}
1456
1457	return (NULL);
1458	}
1459
1460	/*
1461	* The BIOCSETUP command "atomically" attach to the interface and
1462	* copy the buffer from another interface. This minimizes the risk
1463	* of missing packet because this is done while holding
1464	* the BPF global lock
1465	*/
1466	static int
1467	bpf_setup(struct bpf_d *d_to, uuid_t uuid_from, ifnet_t ifp)
1468	{
1469	struct bpf_d *d_from;
1470	int error = `0`;
1471
1472	LCK_MTX_ASSERT(bpf_mlock, LCK_MTX_ASSERT_OWNED);
1473
1474	/*
1475	* Sanity checks
1476	*/
1477	d_from = bpf_get_device_from_uuid(uuid_from);
1478	if (d_from == NULL) {
1479	error = ENOENT;
1480	os_log_info(OS_LOG_DEFAULT,
1481	"%s: uuids not found error %d",
1482	__func__, error);
1483	return (error);
1484	}
1485	if (d_from->bd_opened_by != d_to->bd_opened_by) {
1486	error = EACCES;
1487	os_log_info(OS_LOG_DEFAULT,
1488	"%s: processes not matching error %d",
1489	__func__, error);
1490	return (error);
1491	}
1492
1493	/*
1494	* Prevent any read while copying
1495	*/
1496	while (d_to->bd_hbuf_read != `0`)
1497	msleep((caddr_t)d_to, bpf_mlock, PRINET, __func__, NULL);
1498	d_to->bd_hbuf_read = `1`;
1499
1500	while (d_from->bd_hbuf_read != `0`)
1501	msleep((caddr_t)d_from, bpf_mlock, PRINET, __func__, NULL);
1502	d_from->bd_hbuf_read = `1`;
1503
1504	/*
1505	* Verify the devices have not been closed
1506	*/
1507	if (d_to->bd_flags & BPF_CLOSING) {
1508	error = ENXIO;
1509	os_log_info(OS_LOG_DEFAULT,
1510	"%s: d_to is closing error %d",
1511	__func__, error);
1512	goto done;
1513	}
1514	if (d_from->bd_flags & BPF_CLOSING) {
1515	error = ENXIO;
1516	os_log_info(OS_LOG_DEFAULT,
1517	"%s: d_from is closing error %d",
1518	__func__, error);
1519	goto done;
1520	}
1521
1522	/*
1523	* For now require the same buffer size
1524	*/
1525	if (d_from->bd_bufsize != d_to->bd_bufsize) {
1526	error = EINVAL;
1527	os_log_info(OS_LOG_DEFAULT,
1528	"%s: bufsizes not matching error %d",
1529	__func__, error);
1530	goto done;
1531	}
1532
1533	/*
1534	* Attach to the interface
1535	*/
1536	error = bpf_setif(d_to, ifp, false, true);
1537	if (error != `0`) {
1538	os_log_info(OS_LOG_DEFAULT,
1539	"%s: bpf_setif() failed error %d",
1540	__func__, error);
1541	goto done;
1542	}
1543
1544	/*
1545	* Make sure the buffers are setup as expected by bpf_setif()
1546	*/
1547	ASSERT(d_to->bd_hbuf == NULL);
1548	ASSERT(d_to->bd_sbuf != NULL);
1549	ASSERT(d_to->bd_fbuf != NULL);
1550
1551	/*
1552	* Copy the buffers and update the pointers and counts
1553	*/
1554	memcpy(d_to->bd_sbuf, d_from->bd_sbuf, d_from->bd_slen);
1555	d_to->bd_slen = d_from->bd_slen;
1556	d_to->bd_scnt = d_from->bd_scnt;
1557
1558	if (d_from->bd_hbuf != NULL) {
1559	d_to->bd_hbuf = d_to->bd_fbuf;
1560	d_to->bd_fbuf = NULL;
1561	memcpy(d_to->bd_hbuf, d_from->bd_hbuf, d_from->bd_hlen);
1562	}
1563	d_to->bd_hlen = d_from->bd_hlen;
1564	d_to->bd_hcnt = d_from->bd_hcnt;
1565
1566	if (bpf_debug > `0`) {
1567	os_log_info(OS_LOG_DEFAULT,
1568	"%s: done slen %u scnt %u hlen %u hcnt %u",
1569	__func__, d_to->bd_slen, d_to->bd_scnt,
1570	d_to->bd_hlen, d_to->bd_hcnt);
1571	}
1572	done:
1573	d_from->bd_hbuf_read = `0`;
1574	wakeup((caddr_t)d_from);
1575
1576	d_to->bd_hbuf_read = `0`;
1577	wakeup((caddr_t)d_to);
1578
1579	return (error);
1580	}
1581
1582	/*
1583	* FIONREAD Check for read packet available.
1584	* SIOCGIFADDR Get interface address - convenient hook to driver.
1585	* BIOCGBLEN Get buffer len [for read()].
1586	* BIOCSETF Set ethernet read filter.
1587	* BIOCFLUSH Flush read packet buffer.
1588	* BIOCPROMISC Put interface into promiscuous mode.
1589	* BIOCGDLT Get link layer type.
1590	* BIOCGETIF Get interface name.
1591	* BIOCSETIF Set interface.
1592	* BIOCSRTIMEOUT Set read timeout.
1593	* BIOCGRTIMEOUT Get read timeout.
1594	* BIOCGSTATS Get packet stats.
1595	* BIOCIMMEDIATE Set immediate mode.
1596	* BIOCVERSION Get filter language version.
1597	* BIOCGHDRCMPLT Get "header already complete" flag
1598	* BIOCSHDRCMPLT Set "header already complete" flag
1599	* BIOCGSEESENT Get "see packets sent" flag
1600	* BIOCSSEESENT Set "see packets sent" flag
1601	* BIOCSETTC Set traffic class.
1602	* BIOCGETTC Get traffic class.
1603	* BIOCSEXTHDR Set "extended header" flag
1604	* BIOCSHEADDROP Drop head of the buffer if user is not reading
1605	* BIOCGHEADDROP Get "head-drop" flag
1606	*/
1607	/ ARGSUSED /
1608	int
1609	bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags,
1610	struct proc *p)
1611	{
1612	struct bpf_d *d;
1613	int error = `0`;
1614	u_int int_arg;
1615	struct ifreq ifr;
1616
1617	lck_mtx_lock(bpf_mlock);
1618
1619	d = bpf_dtab[minor(dev)];
1620	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
1621	(d->bd_flags & BPF_CLOSING) != `0`) {
1622	lck_mtx_unlock(bpf_mlock);
1623	return (ENXIO);
1624	}
1625
1626	bpf_acquire_d(d);
1627
1628	if (d->bd_state == BPF_WAITING)
1629	bpf_stop_timer(d);
1630	d->bd_state = BPF_IDLE;
1631
1632	switch (cmd) {
1633
1634	default:
1635	error = EINVAL;
1636	break;
1637
1638	/*
1639	* Check for read packet available.
1640	*/
1641	case FIONREAD: / int /
1642	{
1643	int n;
1644
1645	n = d->bd_slen;
1646	if (d->bd_hbuf && d->bd_hbuf_read == `0`)
1647	n += d->bd_hlen;
1648
1649	bcopy(&n, addr, sizeof (n));
1650	break;
1651	}
1652
1653	case SIOCGIFADDR: / struct ifreq /
1654	{
1655	struct ifnet *ifp;
1656
1657	if (d->bd_bif == `0`)
1658	error = EINVAL;
1659	else {
1660	ifp = d->bd_bif->bif_ifp;
1661	error = ifnet_ioctl(ifp, `0`, cmd, addr);
1662	}
1663	break;
1664	}
1665
1666	/*
1667	* Get buffer len [for read()].
1668	*/
1669	case BIOCGBLEN: / u_int /
1670	bcopy(&d->bd_bufsize, addr, sizeof (u_int));
1671	break;
1672
1673	/*
1674	* Set buffer length.
1675	*/
1676	case BIOCSBLEN: { / u_int /
1677	u_int size;
1678	unsigned int maxbufsize = bpf_maxbufsize;
1679
1680	/*
1681	* Allow larger buffer in head drop mode to with the
1682	* assumption the reading process may be low priority but
1683	* is interested in the most recent traffic
1684	*/
1685	if (d->bd_headdrop != `0`) {
1686	maxbufsize = `2` * bpf_maxbufsize;
1687	}
1688
1689	if (d->bd_bif != `0` \|\| (d->bd_flags & BPF_DETACHING)) {
1690	/*
1691	* Interface already attached, unable to change buffers
1692	*/
1693	error = EINVAL;
1694	break;
1695	}
1696	bcopy(addr, &size, sizeof (size));
1697
1698	if (size > maxbufsize) {
1699	d->bd_bufsize = maxbufsize;
1700
1701	os_log_info(OS_LOG_DEFAULT,
1702	"%s bufsize capped to %u from %u",
1703	__func__, d->bd_bufsize, size);
1704	} else if (size < BPF_MINBUFSIZE) {
1705	d->bd_bufsize = BPF_MINBUFSIZE;
1706
1707	os_log_info(OS_LOG_DEFAULT,
1708	"%s bufsize bumped to %u from %u",
1709	__func__, d->bd_bufsize, size);
1710	} else {
1711	d->bd_bufsize = size;
1712	}
1713
1714	/ It's a read/write ioctl /
1715	bcopy(&d->bd_bufsize, addr, sizeof (u_int));
1716	break;
1717	}
1718	/*
1719	* Set link layer read filter.
1720	*/
1721	case BIOCSETF32:
1722	case BIOCSETFNR32: { / struct bpf_program32 /
1723	struct bpf_program32 prg32;
1724
1725	bcopy(addr, &prg32, sizeof (prg32));
1726	error = bpf_setf(d, prg32.bf_len,
1727	CAST_USER_ADDR_T(prg32.bf_insns), cmd);
1728	break;
1729	}
1730
1731	case BIOCSETF64:
1732	case BIOCSETFNR64: { / struct bpf_program64 /
1733	struct bpf_program64 prg64;
1734
1735	bcopy(addr, &prg64, sizeof (prg64));
1736	error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, cmd);
1737	break;
1738	}
1739
1740	/*
1741	* Flush read packet buffer.
1742	*/
1743	case BIOCFLUSH:
1744	while (d->bd_hbuf_read != `0`) {
1745	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading",
1746	NULL);
1747	}
1748	if ((d->bd_flags & BPF_CLOSING) != `0`) {
1749	error = ENXIO;
1750	break;
1751	}
1752	reset_d(d);
1753	break;
1754
1755	/*
1756	* Put interface into promiscuous mode.
1757	*/
1758	case BIOCPROMISC:
1759	if (d->bd_bif == `0`) {
1760	/*
1761	* No interface attached yet.
1762	*/
1763	error = EINVAL;
1764	break;
1765	}
1766	if (d->bd_promisc == `0`) {
1767	lck_mtx_unlock(bpf_mlock);
1768	error = ifnet_set_promiscuous(d->bd_bif->bif_ifp, `1`);
1769	lck_mtx_lock(bpf_mlock);
1770	if (error == `0`)
1771	d->bd_promisc = `1`;
1772	}
1773	break;
1774
1775	/*
1776	* Get device parameters.
1777	*/
1778	case BIOCGDLT: / u_int /
1779	if (d->bd_bif == `0`)
1780	error = EINVAL;
1781	else
1782	bcopy(&d->bd_bif->bif_dlt, addr, sizeof (u_int));
1783	break;
1784
1785	/*
1786	* Get a list of supported data link types.
1787	*/
1788	case BIOCGDLTLIST: / struct bpf_dltlist /
1789	if (d->bd_bif == NULL) {
1790	error = EINVAL;
1791	} else {
1792	error = bpf_getdltlist(d, addr, p);
1793	}
1794	break;
1795
1796	/*
1797	* Set data link type.
1798	*/
1799	case BIOCSDLT: / u_int /
1800	if (d->bd_bif == NULL) {
1801	error = EINVAL;
1802	} else {
1803	u_int dlt;
1804
1805	bcopy(addr, &dlt, sizeof (dlt));
1806
1807	if (dlt == DLT_PKTAP &&
1808	!(d->bd_flags & BPF_WANT_PKTAP)) {
1809	dlt = DLT_RAW;
1810	}
1811	error = bpf_setdlt(d, dlt);
1812	}
1813	break;
1814
1815	/*
1816	* Get interface name.
1817	*/
1818	case BIOCGETIF: / struct ifreq /
1819	if (d->bd_bif == `0`)
1820	error = EINVAL;
1821	else {
1822	struct ifnet *const ifp = d->bd_bif->bif_ifp;
1823
1824	snprintf(((struct ifreq )(void* *)addr)->ifr_name,
1825	sizeof (ifr.ifr_name), "%s", if_name(ifp));
1826	}
1827	break;
1828
1829	/*
1830	* Set interface.
1831	*/
1832	case BIOCSETIF: { / struct ifreq /
1833	ifnet_t ifp;
1834
1835	bcopy(addr, &ifr, sizeof (ifr));
1836	ifr.ifr_name[IFNAMSIZ - `1`] = `'\0'`;
1837	ifp = ifunit(ifr.ifr_name);
1838	if (ifp == NULL)
1839	error = ENXIO;
1840	else
1841	error = bpf_setif(d, ifp, true, false);
1842	break;
1843	}
1844
1845	/*
1846	* Set read timeout.
1847	*/
1848	case BIOCSRTIMEOUT32: { / struct user32_timeval /
1849	struct user32_timeval _tv;
1850	struct timeval tv;
1851
1852	bcopy(addr, &_tv, sizeof (_tv));
1853	tv.tv_sec = _tv.tv_sec;
1854	tv.tv_usec = _tv.tv_usec;
1855
1856	/*
1857	* Subtract 1 tick from tvtohz() since this isn't
1858	* a one-shot timer.
1859	*/
1860	if ((error = itimerfix(&tv)) == `0`)
1861	d->bd_rtout = tvtohz(&tv) - `1`;
1862	break;
1863	}
1864
1865	case BIOCSRTIMEOUT64: { / struct user64_timeval /
1866	struct user64_timeval _tv;
1867	struct timeval tv;
1868
1869	bcopy(addr, &_tv, sizeof (_tv));
1870	tv.tv_sec = _tv.tv_sec;
1871	tv.tv_usec = _tv.tv_usec;
1872
1873	/*
1874	* Subtract 1 tick from tvtohz() since this isn't
1875	* a one-shot timer.
1876	*/
1877	if ((error = itimerfix(&tv)) == `0`)
1878	d->bd_rtout = tvtohz(&tv) - `1`;
1879	break;
1880	}
1881
1882	/*
1883	* Get read timeout.
1884	*/
1885	case BIOCGRTIMEOUT32: { / struct user32_timeval /
1886	struct user32_timeval tv;
1887
1888	bzero(&tv, sizeof (tv));
1889	tv.tv_sec = d->bd_rtout / hz;
1890	tv.tv_usec = (d->bd_rtout % hz) * tick;
1891	bcopy(&tv, addr, sizeof (tv));
1892	break;
1893	}
1894
1895	case BIOCGRTIMEOUT64: { / struct user64_timeval /
1896	struct user64_timeval tv;
1897
1898	bzero(&tv, sizeof (tv));
1899	tv.tv_sec = d->bd_rtout / hz;
1900	tv.tv_usec = (d->bd_rtout % hz) * tick;
1901	bcopy(&tv, addr, sizeof (tv));
1902	break;
1903	}
1904
1905	/*
1906	* Get packet stats.
1907	*/
1908	case BIOCGSTATS: { / struct bpf_stat /
1909	struct bpf_stat bs;
1910
1911	bzero(&bs, sizeof (bs));
1912	bs.bs_recv = d->bd_rcount;
1913	bs.bs_drop = d->bd_dcount;
1914	bcopy(&bs, addr, sizeof (bs));
1915	break;
1916	}
1917
1918	/*
1919	* Set immediate mode.
1920	*/
1921	case BIOCIMMEDIATE: / u_int /
1922	d->bd_immediate = (u_int )(void *)addr;
1923	break;
1924
1925	case BIOCVERSION: { / struct bpf_version /
1926	struct bpf_version bv;
1927
1928	bzero(&bv, sizeof (bv));
1929	bv.bv_major = BPF_MAJOR_VERSION;
1930	bv.bv_minor = BPF_MINOR_VERSION;
1931	bcopy(&bv, addr, sizeof (bv));
1932	break;
1933	}
1934
1935	/*
1936	* Get "header already complete" flag
1937	*/
1938	case BIOCGHDRCMPLT: / u_int /
1939	bcopy(&d->bd_hdrcmplt, addr, sizeof (u_int));
1940	break;
1941
1942	/*
1943	* Set "header already complete" flag
1944	*/
1945	case BIOCSHDRCMPLT: / u_int /
1946	bcopy(addr, &int_arg, sizeof (int_arg));
1947	d->bd_hdrcmplt = int_arg ? `1` : `0`;
1948	break;
1949
1950	/*
1951	* Get "see sent packets" flag
1952	*/
1953	case BIOCGSEESENT: / u_int /
1954	bcopy(&d->bd_seesent, addr, sizeof (u_int));
1955	break;
1956
1957	/*
1958	* Set "see sent packets" flag
1959	*/
1960	case BIOCSSEESENT: / u_int /
1961	bcopy(addr, &d->bd_seesent, sizeof (u_int));
1962	break;
1963
1964	/*
1965	* Set traffic service class
1966	*/
1967	case BIOCSETTC: { / int /
1968	int tc;
1969
1970	bcopy(addr, &tc, sizeof (int));
1971	error = bpf_set_traffic_class(d, tc);
1972	break;
1973	}
1974
1975	/*
1976	* Get traffic service class
1977	*/
1978	case BIOCGETTC: / int /
1979	bcopy(&d->bd_traffic_class, addr, sizeof (int));
1980	break;
1981
1982	case FIONBIO: / Non-blocking I/O; int /
1983	break;
1984
1985	case FIOASYNC: / Send signal on receive packets; int /
1986	bcopy(addr, &d->bd_async, sizeof (int));
1987	break;
1988	#ifndef __APPLE__
1989	case FIOSETOWN:
1990	error = fsetown((int* *)addr, &d->bd_sigio);
1991	break;
1992
1993	case FIOGETOWN:
1994	(int* *)addr = fgetown(d->bd_sigio);
1995	break;
1996
1997	/ This is deprecated, FIOSETOWN should be used instead. /
1998	case TIOCSPGRP:
1999	error = fsetown(-((int* *)addr), &d->bd_sigio);
2000	break;
2001
2002	/ This is deprecated, FIOGETOWN should be used instead. /
2003	case TIOCGPGRP:
2004	(int* *)addr = -fgetown(d->bd_sigio);
2005	break;
2006	#endif
2007	case BIOCSRSIG: { / Set receive signal; u_int /
2008	u_int sig;
2009
2010	bcopy(addr, &sig, sizeof (u_int));
2011
2012	if (sig >= NSIG)
2013	error = EINVAL;
2014	else
2015	d->bd_sig = sig;
2016	break;
2017	}
2018	case BIOCGRSIG: / u_int /
2019	bcopy(&d->bd_sig, addr, sizeof (u_int));
2020	break;
2021	#ifdef __APPLE__
2022	case BIOCSEXTHDR: / u_int /
2023	bcopy(addr, &int_arg, sizeof (int_arg));
2024	if (int_arg)
2025	d->bd_flags \|= BPF_EXTENDED_HDR;
2026	else
2027	d->bd_flags &= ~BPF_EXTENDED_HDR;
2028	break;
2029
2030	case BIOCGIFATTACHCOUNT: { / struct ifreq /
2031	ifnet_t ifp;
2032	struct bpf_if *bp;
2033
2034	bcopy(addr, &ifr, sizeof (ifr));
2035	ifr.ifr_name[IFNAMSIZ - `1`] = `'\0'`;
2036	ifp = ifunit(ifr.ifr_name);
2037	if (ifp == NULL) {
2038	error = ENXIO;
2039	break;
2040	}
2041	ifr.ifr_intval = `0`;
2042	for (bp = bpf_iflist; bp != `0`; bp = bp->bif_next) {
2043	struct bpf_d *bpf_d;
2044
2045	if (bp->bif_ifp == NULL \|\| bp->bif_ifp != ifp)
2046	continue;
2047	for (bpf_d = bp->bif_dlist; bpf_d;
2048	bpf_d = bpf_d->bd_next) {
2049	ifr.ifr_intval += `1`;
2050	}
2051	}
2052	bcopy(&ifr, addr, sizeof (ifr));
2053	break;
2054	}
2055	case BIOCGWANTPKTAP: / u_int /
2056	int_arg = d->bd_flags & BPF_WANT_PKTAP ? `1` : `0`;
2057	bcopy(&int_arg, addr, sizeof (int_arg));
2058	break;
2059
2060	case BIOCSWANTPKTAP: / u_int /
2061	bcopy(addr, &int_arg, sizeof (int_arg));
2062	if (int_arg)
2063	d->bd_flags \|= BPF_WANT_PKTAP;
2064	else
2065	d->bd_flags &= ~BPF_WANT_PKTAP;
2066	break;
2067	#endif
2068
2069	case BIOCSHEADDROP:
2070	bcopy(addr, &int_arg, sizeof (int_arg));
2071	d->bd_headdrop = int_arg ? `1` : `0`;
2072	break;
2073
2074	case BIOCGHEADDROP:
2075	bcopy(&d->bd_headdrop, addr, sizeof (int));
2076	break;
2077
2078	case BIOCSTRUNCATE:
2079	bcopy(addr, &int_arg, sizeof(int_arg));
2080	if (int_arg)
2081	d->bd_flags \|= BPF_TRUNCATE;
2082	else
2083	d->bd_flags &= ~BPF_TRUNCATE;
2084	break;
2085
2086	case BIOCGETUUID:
2087	bcopy(&d->bd_uuid, addr, sizeof (uuid_t));
2088	break;
2089
2090	case BIOCSETUP: {
2091	struct bpf_setup_args bsa;
2092	ifnet_t ifp;
2093
2094	bcopy(addr, &bsa, sizeof (struct bpf_setup_args));
2095	bsa.bsa_ifname[IFNAMSIZ - `1`] = `0`;
2096	ifp = ifunit(bsa.bsa_ifname);
2097	if (ifp == NULL) {
2098	error = ENXIO;
2099	os_log_info(OS_LOG_DEFAULT,
2100	"%s: ifnet not found for %s error %d",
2101	__func__, bsa.bsa_ifname, error);
2102	break;
2103	}
2104
2105	error = bpf_setup(d, bsa.bsa_uuid, ifp);
2106	break;
2107	}
2108	case BIOCSPKTHDRV2:
2109	bcopy(addr, &int_arg, sizeof(int_arg));
2110	if (int_arg != `0`)
2111	d->bd_flags \|= BPF_PKTHDRV2;
2112	else
2113	d->bd_flags &= ~BPF_PKTHDRV2;
2114	break;
2115
2116	case BIOCGPKTHDRV2:
2117	int_arg = d->bd_flags & BPF_PKTHDRV2 ? `1` : `0`;
2118	bcopy(&int_arg, addr, sizeof (int));
2119	break;
2120	}
2121
2122	bpf_release_d(d);
2123	lck_mtx_unlock(bpf_mlock);
2124
2125	return (error);
2126	}
2127
2128	/*
2129	* Set d's packet filter program to fp. If this file already has a filter,
2130	* free it and replace it. Returns EINVAL for bogus requests.
2131	*/
2132	static int
2133	bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns,
2134	u_long cmd)
2135	{
2136	struct bpf_insn fcode, old;
2137	u_int flen, size;
2138
2139	while (d->bd_hbuf_read != `0`)
2140	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2141
2142	if ((d->bd_flags & BPF_CLOSING) != `0`)
2143	return (ENXIO);
2144
2145	old = d->bd_filter;
2146	if (bf_insns == USER_ADDR_NULL) {
2147	if (bf_len != `0`)
2148	return (EINVAL);
2149	d->bd_filter = NULL;
2150	reset_d(d);
2151	if (old != `0`)
2152	FREE((caddr_t)old, M_DEVBUF);
2153	return (`0`);
2154	}
2155	flen = bf_len;
2156	if (flen > BPF_MAXINSNS)
2157	return (EINVAL);
2158
2159	size = flen * sizeof(struct bpf_insn);
2160	fcode = (struct bpf_insn *) _MALLOC(size, M_DEVBUF, M_WAIT);
2161	#ifdef __APPLE__
2162	if (fcode == NULL)
2163	return (ENOBUFS);
2164	#endif
2165	if (copyin(bf_insns, (caddr_t)fcode, size) == `0` &&
2166	bpf_validate(fcode, (int)flen)) {
2167	d->bd_filter = fcode;
2168
2169	if (cmd == BIOCSETF32 \|\| cmd == BIOCSETF64)
2170	reset_d(d);
2171
2172	if (old != `0`)
2173	FREE((caddr_t)old, M_DEVBUF);
2174
2175	return (`0`);
2176	}
2177	FREE((caddr_t)fcode, M_DEVBUF);
2178	return (EINVAL);
2179	}
2180
2181	/*
2182	* Detach a file from its current interface (if attached at all) and attach
2183	* to the interface indicated by the name stored in ifr.
2184	* Return an errno or 0.
2185	*/
2186	static int
2187	bpf_setif(struct bpf_d *d, ifnet_t theywant, bool do_reset, bool has_hbuf_read)
2188	{
2189	struct bpf_if *bp;
2190	int error;
2191
2192	while (d->bd_hbuf_read != `0` && !has_hbuf_read)
2193	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2194
2195	if ((d->bd_flags & BPF_CLOSING) != `0`)
2196	return (ENXIO);
2197
2198	/*
2199	* Look through attached interfaces for the named one.
2200	*/
2201	for (bp = bpf_iflist; bp != `0`; bp = bp->bif_next) {
2202	struct ifnet *ifp = bp->bif_ifp;
2203
2204	if (ifp == `0` \|\| ifp != theywant)
2205	continue;
2206	/*
2207	* Do not use DLT_PKTAP, unless requested explicitly
2208	*/
2209	if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP))
2210	continue;
2211	/*
2212	* Skip the coprocessor interface
2213	*/
2214	if (!intcoproc_unrestricted && IFNET_IS_INTCOPROC(ifp))
2215	continue;
2216	/*
2217	* We found the requested interface.
2218	* Allocate the packet buffers.
2219	*/
2220	error = bpf_allocbufs(d);
2221	if (error != `0`)
2222	return (error);
2223	/*
2224	* Detach if attached to something else.
2225	*/
2226	if (bp != d->bd_bif) {
2227	if (d->bd_bif != NULL) {
2228	if (bpf_detachd(d, `0`) != `0`)
2229	return (ENXIO);
2230	}
2231	if (bpf_attachd(d, bp) != `0`)
2232	return (ENXIO);
2233	}
2234	if (do_reset) {
2235	reset_d(d);
2236	}
2237	return (`0`);
2238	}
2239	/ Not found. /
2240	return (ENXIO);
2241	}
2242
2243	/*
2244	* Get a list of available data link type of the interface.
2245	*/
2246	static int
2247	bpf_getdltlist(struct bpf_d d, caddr_t addr, struct* proc *p)
2248	{
2249	u_int n;
2250	int error;
2251	struct ifnet *ifp;
2252	struct bpf_if *bp;
2253	user_addr_t dlist;
2254	struct bpf_dltlist bfl;
2255
2256	bcopy(addr, &bfl, sizeof (bfl));
2257	if (proc_is64bit(p)) {
2258	dlist = (user_addr_t)bfl.bfl_u.bflu_pad;
2259	} else {
2260	dlist = CAST_USER_ADDR_T(bfl.bfl_u.bflu_list);
2261	}
2262
2263	ifp = d->bd_bif->bif_ifp;
2264	n = `0`;
2265	error = `0`;
2266
2267	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2268	if (bp->bif_ifp != ifp)
2269	continue;
2270	/*
2271	* Do not use DLT_PKTAP, unless requested explicitly
2272	*/
2273	if (bp->bif_dlt == DLT_PKTAP && !(d->bd_flags & BPF_WANT_PKTAP))
2274	continue;
2275	if (dlist != USER_ADDR_NULL) {
2276	if (n >= bfl.bfl_len) {
2277	return (ENOMEM);
2278	}
2279	error = copyout(&bp->bif_dlt, dlist,
2280	sizeof (bp->bif_dlt));
2281	if (error != `0`)
2282	break;
2283	dlist += sizeof (bp->bif_dlt);
2284	}
2285	n++;
2286	}
2287	bfl.bfl_len = n;
2288	bcopy(&bfl, addr, sizeof (bfl));
2289
2290	return (error);
2291	}
2292
2293	/*
2294	* Set the data link type of a BPF instance.
2295	*/
2296	static int
2297	bpf_setdlt(struct bpf_d *d, uint32_t dlt)
2298	{
2299	int error, opromisc;
2300	struct ifnet *ifp;
2301	struct bpf_if *bp;
2302
2303	if (d->bd_bif->bif_dlt == dlt)
2304	return (`0`);
2305
2306	while (d->bd_hbuf_read != `0`)
2307	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2308
2309	if ((d->bd_flags & BPF_CLOSING) != `0`)
2310	return (ENXIO);
2311
2312	ifp = d->bd_bif->bif_ifp;
2313	for (bp = bpf_iflist; bp; bp = bp->bif_next) {
2314	if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) {
2315	/*
2316	* Do not use DLT_PKTAP, unless requested explicitly
2317	*/
2318	if (bp->bif_dlt == DLT_PKTAP &&
2319	!(d->bd_flags & BPF_WANT_PKTAP)) {
2320	continue;
2321	}
2322	break;
2323	}
2324	}
2325	if (bp != NULL) {
2326	opromisc = d->bd_promisc;
2327	if (bpf_detachd(d, `0`) != `0`)
2328	return (ENXIO);
2329	error = bpf_attachd(d, bp);
2330	if (error) {
2331	printf("bpf_setdlt: bpf_attachd %s%d failed (%d)\n",
2332	ifnet_name(bp->bif_ifp), ifnet_unit(bp->bif_ifp),
2333	error);
2334	return (error);
2335	}
2336	reset_d(d);
2337	if (opromisc) {
2338	lck_mtx_unlock(bpf_mlock);
2339	error = ifnet_set_promiscuous(bp->bif_ifp, `1`);
2340	lck_mtx_lock(bpf_mlock);
2341	if (error) {
2342	printf("%s: ifpromisc %s%d failed (%d)\n",
2343	__func__, ifnet_name(bp->bif_ifp),
2344	ifnet_unit(bp->bif_ifp), error);
2345	} else {
2346	d->bd_promisc = `1`;
2347	}
2348	}
2349	}
2350	return (bp == NULL ? EINVAL : `0`);
2351	}
2352
2353	static int
2354	bpf_set_traffic_class(struct bpf_d d, int* tc)
2355	{
2356	int error = `0`;
2357
2358	if (!SO_VALID_TC(tc))
2359	error = EINVAL;
2360	else
2361	d->bd_traffic_class = tc;
2362
2363	return (error);
2364	}
2365
2366	static void
2367	bpf_set_packet_service_class(struct mbuf m, int* tc)
2368	{
2369	if (!(m->m_flags & M_PKTHDR))
2370	return;
2371
2372	VERIFY(SO_VALID_TC(tc));
2373	(void) m_set_service_class(m, so_tc2msc(tc));
2374	}
2375
2376	/*
2377	* Support for select()
2378	*
2379	* Return true iff the specific operation will not block indefinitely.
2380	* Otherwise, return false but make a note that a selwakeup() must be done.
2381	*/
2382	int
2383	bpfselect(dev_t dev, int which, void * wql, struct proc *p)
2384	{
2385	struct bpf_d *d;
2386	int ret = `0`;
2387
2388	lck_mtx_lock(bpf_mlock);
2389
2390	d = bpf_dtab[minor(dev)];
2391	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
2392	(d->bd_flags & BPF_CLOSING) != `0`) {
2393	lck_mtx_unlock(bpf_mlock);
2394	return (ENXIO);
2395	}
2396
2397	bpf_acquire_d(d);
2398
2399	if (d->bd_bif == NULL) {
2400	bpf_release_d(d);
2401	lck_mtx_unlock(bpf_mlock);
2402	return (ENXIO);
2403	}
2404
2405	while (d->bd_hbuf_read != `0`)
2406	msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL);
2407
2408	if ((d->bd_flags & BPF_CLOSING) != `0`) {
2409	bpf_release_d(d);
2410	lck_mtx_unlock(bpf_mlock);
2411	return (ENXIO);
2412	}
2413
2414	switch (which) {
2415	case FREAD:
2416	if (d->bd_hlen != `0` \|\|
2417	((d->bd_immediate \|\|
2418	d->bd_state == BPF_TIMED_OUT) && d->bd_slen != `0`))
2419	ret = `1`; / read has data to return /
2420	else {
2421	/*
2422	* Read has no data to return.
2423	* Make the select wait, and start a timer if
2424	* necessary.
2425	*/
2426	selrecord(p, &d->bd_sel, wql);
2427	bpf_start_timer(d);
2428	}
2429	break;
2430
2431	case FWRITE:
2432	/ can't determine whether a write would block /
2433	ret = `1`;
2434	break;
2435	}
2436
2437	bpf_release_d(d);
2438	lck_mtx_unlock(bpf_mlock);
2439
2440	return (ret);
2441	}
2442
2443	/*
2444	* Support for kevent() system call. Register EVFILT_READ filters and
2445	* reject all others.
2446	*/
2447	int bpfkqfilter(dev_t dev, struct knote *kn);
2448	static void filt_bpfdetach(struct knote *);
2449	static int filt_bpfread(struct knote , long*);
2450	static int filt_bpftouch(struct knote kn, struct* kevent_internal_s *kev);
2451	static int filt_bpfprocess(struct knote kn, struct* filt_process_s *data,
2452	struct kevent_internal_s *kev);
2453
2454	SECURITY_READ_ONLY_EARLY(struct filterops) bpfread_filtops = {
2455	.f_isfd = `1`,
2456	.f_detach = filt_bpfdetach,
2457	.f_event = filt_bpfread,
2458	.f_touch = filt_bpftouch,
2459	.f_process = filt_bpfprocess,
2460	};
2461
2462	static int
2463	filt_bpfread_common(struct knote kn, struct* bpf_d *d)
2464	{
2465	int ready = `0`;
2466
2467	if (d->bd_immediate) {
2468	/*
2469	* If there's data in the hold buffer, it's the
2470	* amount of data a read will return.
2471	*
2472	* If there's no data in the hold buffer, but
2473	* there's data in the store buffer, a read will
2474	* immediately rotate the store buffer to the
2475	* hold buffer, the amount of data in the store
2476	* buffer is the amount of data a read will
2477	* return.
2478	*
2479	* If there's no data in either buffer, we're not
2480	* ready to read.
2481	*/
2482	kn->kn_data = (d->bd_hlen == `0` \|\| d->bd_hbuf_read != `0` ?
2483	d->bd_slen : d->bd_hlen);
2484	int64_t lowwat = `1`;
2485	if (kn->kn_sfflags & NOTE_LOWAT) {
2486	if (kn->kn_sdata > d->bd_bufsize)
2487	lowwat = d->bd_bufsize;
2488	else if (kn->kn_sdata > lowwat)
2489	lowwat = kn->kn_sdata;
2490	}
2491	ready = (kn->kn_data >= lowwat);
2492	} else {
2493	/*
2494	* If there's data in the hold buffer, it's the
2495	* amount of data a read will return.
2496	*
2497	* If there's no data in the hold buffer, but
2498	* there's data in the store buffer, if the
2499	* timer has expired a read will immediately
2500	* rotate the store buffer to the hold buffer,
2501	* so the amount of data in the store buffer is
2502	* the amount of data a read will return.
2503	*
2504	* If there's no data in either buffer, or there's
2505	* no data in the hold buffer and the timer hasn't
2506	* expired, we're not ready to read.
2507	*/
2508	kn->kn_data = ((d->bd_hlen == `0` \|\| d->bd_hbuf_read != `0`) &&
2509	d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen);
2510	ready = (kn->kn_data > `0`);
2511	}
2512	if (!ready)
2513	bpf_start_timer(d);
2514
2515	return (ready);
2516	}
2517
2518	int
2519	bpfkqfilter(dev_t dev, struct knote *kn)
2520	{
2521	struct bpf_d *d;
2522	int res;
2523
2524	/*
2525	* Is this device a bpf?
2526	*/
2527	if (major(dev) != CDEV_MAJOR \|\|
2528	kn->kn_filter != EVFILT_READ) {
2529	kn->kn_flags = EV_ERROR;
2530	kn->kn_data = EINVAL;
2531	return (`0`);
2532	}
2533
2534	lck_mtx_lock(bpf_mlock);
2535
2536	d = bpf_dtab[minor(dev)];
2537
2538	if (d == NULL \|\| d == BPF_DEV_RESERVED \|\|
2539	(d->bd_flags & BPF_CLOSING) != `0` \|\|
2540	d->bd_bif == NULL) {
2541	lck_mtx_unlock(bpf_mlock);
2542	kn->kn_flags = EV_ERROR;
2543	kn->kn_data = ENXIO;
2544	return (`0`);
2545	}
2546
2547	kn->kn_hook = d;
2548	kn->kn_filtid = EVFILTID_BPFREAD;
2549	KNOTE_ATTACH(&d->bd_sel.si_note, kn);
2550	d->bd_flags \|= BPF_KNOTE;
2551
2552	/ capture the current state /
2553	res = filt_bpfread_common(kn, d);
2554
2555	lck_mtx_unlock(bpf_mlock);
2556
2557	return (res);
2558	}
2559
2560	static void
2561	filt_bpfdetach(struct knote *kn)
2562	{
2563	struct bpf_d d = (struct* bpf_d *)kn->kn_hook;
2564
2565	lck_mtx_lock(bpf_mlock);
2566	if (d->bd_flags & BPF_KNOTE) {
2567	KNOTE_DETACH(&d->bd_sel.si_note, kn);
2568	d->bd_flags &= ~BPF_KNOTE;
2569	}
2570	lck_mtx_unlock(bpf_mlock);
2571	}
2572
2573	static int
2574	filt_bpfread(struct knote kn, long* hint)
2575	{
2576	#pragma unused(hint)
2577	struct bpf_d d = (struct* bpf_d *)kn->kn_hook;
2578
2579	return (filt_bpfread_common(kn, d));
2580	}
2581
2582	static int
2583	filt_bpftouch(struct knote kn, struct* kevent_internal_s *kev)
2584	{
2585	struct bpf_d d = (struct* bpf_d *)kn->kn_hook;
2586	int res;
2587
2588	lck_mtx_lock(bpf_mlock);
2589
2590	/ save off the lowat threshold and flag /
2591	kn->kn_sdata = kev->data;
2592	kn->kn_sfflags = kev->fflags;
2593
2594	/ output data will be re-generated here /
2595	res = filt_bpfread_common(kn, d);
2596
2597	lck_mtx_unlock(bpf_mlock);
2598
2599	return (res);
2600	}
2601
2602	static int
2603	filt_bpfprocess(struct knote kn, struct* filt_process_s *data,
2604	struct kevent_internal_s *kev)
2605	{
2606	#pragma unused(data)
2607	struct bpf_d d = (struct* bpf_d *)kn->kn_hook;
2608	int res;
2609
2610	lck_mtx_lock(bpf_mlock);
2611	res = filt_bpfread_common(kn, d);
2612	if (res) {
2613	*kev = kn->kn_kevent;
2614	}
2615	lck_mtx_unlock(bpf_mlock);
2616
2617	return (res);
2618	}
2619
2620	/*
2621	* Copy data from an mbuf chain into a buffer. This code is derived
2622	* from m_copydata in kern/uipc_mbuf.c.
2623	*/
2624	static void
2625	bpf_mcopy(struct mbuf * m, void *dst_arg, size_t len)
2626	{
2627	u_int count;
2628	u_char *dst;
2629
2630	dst = dst_arg;
2631	while (len > `0`) {
2632	if (m == `0`)
2633	panic("bpf_mcopy");
2634	count = min(m->m_len, len);
2635	bcopy(mbuf_data(m), dst, count);
2636	m = m->m_next;
2637	dst += count;
2638	len -= count;
2639	}
2640	}
2641
2642	static inline void
2643	bpf_tap_imp(
2644	ifnet_t ifp,
2645	u_int32_t dlt,
2646	struct bpf_packet *bpf_pkt,
2647	int outbound)
2648	{
2649	struct bpf_d *d;
2650	u_int slen;
2651	struct bpf_if *bp;
2652
2653	/*
2654	* It's possible that we get here after the bpf descriptor has been
2655	* detached from the interface; in such a case we simply return.
2656	* Lock ordering is important since we can be called asynchronously
2657	* (from IOKit) to process an inbound packet; when that happens
2658	* we would have been holding its "gateLock" and will be acquiring
2659	* "bpf_mlock" upon entering this routine. Due to that, we release
2660	* "bpf_mlock" prior to calling ifnet_set_promiscuous (which will
2661	* acquire "gateLock" in the IOKit), in order to avoid a deadlock
2662	* when a ifnet_set_promiscuous request simultaneously collides with
2663	* an inbound packet being passed into the tap callback.
2664	*/
2665	lck_mtx_lock(bpf_mlock);
2666	if (ifp->if_bpf == NULL) {
2667	lck_mtx_unlock(bpf_mlock);
2668	return;
2669	}
2670	for (bp = ifp->if_bpf; bp != NULL; bp = bp->bif_next) {
2671	if (bp->bif_ifp != ifp) {
2672	/ wrong interface /
2673	bp = NULL;
2674	break;
2675	}
2676	if (dlt == `0` \|\| bp->bif_dlt == dlt) {
2677	/ tapping default DLT or DLT matches /
2678	break;
2679	}
2680	}
2681	if (bp == NULL) {
2682	goto done;
2683	}
2684	for (d = bp->bif_dlist; d; d = d->bd_next) {
2685	struct bpf_packet *bpf_pkt_saved = bpf_pkt;
2686	struct bpf_packet bpf_pkt_tmp;
2687	struct pktap_header_buffer bpfp_header_tmp;
2688
2689	if (outbound && !d->bd_seesent)
2690	continue;
2691
2692	++d->bd_rcount;
2693	slen = bpf_filter(d->bd_filter, (u_char *)bpf_pkt,
2694	bpf_pkt->bpfp_total_length, `0`);
2695	if (bp->bif_ifp->if_type == IFT_PKTAP &&
2696	bp->bif_dlt == DLT_PKTAP) {
2697	/*
2698	* Need to copy the bpf_pkt because the conversion
2699	* to v2 pktap header modifies the content of the
2700	* bpfp_header
2701	*/
2702	if ((d->bd_flags & BPF_PKTHDRV2) &&
2703	bpf_pkt->bpfp_header_length <= sizeof(bpfp_header_tmp)) {
2704	bpf_pkt_tmp = *bpf_pkt;
2705
2706	bpf_pkt = &bpf_pkt_tmp;
2707
2708	memcpy(&bpfp_header_tmp, bpf_pkt->bpfp_header,
2709	bpf_pkt->bpfp_header_length);
2710
2711	bpf_pkt->bpfp_header = &bpfp_header_tmp;
2712
2713	convert_to_pktap_header_to_v2(bpf_pkt,
2714	!!(d->bd_flags & BPF_TRUNCATE));
2715	}
2716
2717	if (d->bd_flags & BPF_TRUNCATE) {
2718	slen = min(slen,
2719	get_pkt_trunc_len((u_char *)bpf_pkt,
2720	bpf_pkt->bpfp_total_length));
2721	}
2722	}
2723	if (slen != `0`) {
2724	#if CONFIG_MACF_NET
2725	if (mac_bpfdesc_check_receive(d, bp->bif_ifp) != `0`)
2726	continue;
2727	#endif
2728	catchpacket(d, bpf_pkt, slen, outbound);
2729	}
2730	bpf_pkt = bpf_pkt_saved;
2731	}
2732
2733	done:
2734	lck_mtx_unlock(bpf_mlock);
2735	}
2736
2737	static inline void
2738	bpf_tap_mbuf(
2739	ifnet_t ifp,
2740	u_int32_t dlt,
2741	mbuf_t m,
2742	void* hdr,
2743	size_t hlen,
2744	int outbound)
2745	{
2746	struct bpf_packet bpf_pkt;
2747	struct mbuf *m0;
2748
2749	if (ifp->if_bpf == NULL) {
2750	/ quickly check without taking lock /
2751	return;
2752	}
2753	bpf_pkt.bpfp_type = BPF_PACKET_TYPE_MBUF;
2754	bpf_pkt.bpfp_mbuf = m;
2755	bpf_pkt.bpfp_total_length = `0`;
2756	for (m0 = m; m0 != NULL; m0 = m0->m_next)
2757	bpf_pkt.bpfp_total_length += m0->m_len;
2758	bpf_pkt.bpfp_header = hdr;
2759	if (hdr != NULL) {
2760	bpf_pkt.bpfp_total_length += hlen;
2761	bpf_pkt.bpfp_header_length = hlen;
2762	} else {
2763	bpf_pkt.bpfp_header_length = `0`;
2764	}
2765	bpf_tap_imp(ifp, dlt, &bpf_pkt, outbound);
2766	}
2767
2768	void
2769	bpf_tap_out(
2770	ifnet_t ifp,
2771	u_int32_t dlt,
2772	mbuf_t m,
2773	void* hdr,
2774	size_t hlen)
2775	{
2776	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, `1`);
2777	}
2778
2779	void
2780	bpf_tap_in(
2781	ifnet_t ifp,
2782	u_int32_t dlt,
2783	mbuf_t m,
2784	void* hdr,
2785	size_t hlen)
2786	{
2787	bpf_tap_mbuf(ifp, dlt, m, hdr, hlen, `0`);
2788	}
2789
2790	/ Callback registered with Ethernet driver. /
2791	static int bpf_tap_callback(struct ifnet ifp, struct* mbuf *m)
2792	{
2793	bpf_tap_mbuf(ifp, `0`, m, NULL, `0`, mbuf_pkthdr_rcvif(m) == NULL);
2794
2795	return (`0`);
2796	}
2797
2798
2799	static errno_t
2800	bpf_copydata(struct bpf_packet pkt, size_t off, size_t len, void** out_data)
2801	{
2802	errno_t err = `0`;
2803	if (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF) {
2804	err = mbuf_copydata(pkt->bpfp_mbuf, off, len, out_data);
2805	} else {
2806	err = EINVAL;
2807	}
2808
2809	return (err);
2810	}
2811
2812	static void
2813	copy_bpf_packet(struct bpf_packet * pkt, void * dst, size_t len)
2814	{
2815	/ copy the optional header /
2816	if (pkt->bpfp_header_length != `0`) {
2817	size_t count = min(len, pkt->bpfp_header_length);
2818	bcopy(pkt->bpfp_header, dst, count);
2819	len -= count;
2820	dst += count;
2821	}
2822	if (len == `0`) {
2823	/ nothing past the header /
2824	return;
2825	}
2826	/ copy the packet /
2827	switch (pkt->bpfp_type) {
2828	case BPF_PACKET_TYPE_MBUF:
2829	bpf_mcopy(pkt->bpfp_mbuf, dst, len);
2830	break;
2831	default:
2832	break;
2833	}
2834	}
2835
2836	static uint16_t
2837	get_esp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
2838	const uint16_t remaining_caplen)
2839	{
2840	/*
2841	* For some reason tcpdump expects to have one byte beyond the ESP header
2842	*/
2843	uint16_t trunc_len = ESP_HDR_SIZE + `1`;
2844
2845	if (trunc_len > remaining_caplen)
2846	return (remaining_caplen);
2847
2848	return (trunc_len);
2849	}
2850
2851	static uint16_t
2852	get_isakmp_trunc_len(__unused struct bpf_packet *pkt, __unused uint16_t off,
2853	const uint16_t remaining_caplen)
2854	{
2855	/*
2856	* Include the payload generic header
2857	*/
2858	uint16_t trunc_len = ISAKMP_HDR_SIZE;
2859
2860	if (trunc_len > remaining_caplen)
2861	return (remaining_caplen);
2862
2863	return (trunc_len);
2864	}
2865
2866	static uint16_t
2867	get_isakmp_natt_trunc_len(struct bpf_packet *pkt, uint16_t off,
2868	const uint16_t remaining_caplen)
2869	{
2870	int err = `0`;
2871	uint16_t trunc_len = `0`;
2872	char payload[remaining_caplen];
2873
2874	err = bpf_copydata(pkt, off, remaining_caplen, payload);
2875	if (err != `0`)
2876	return (remaining_caplen);
2877	/*
2878	* They are three cases:
2879	* - IKE: payload start with 4 bytes header set to zero before ISAKMP header
2880	* - keep alive: 1 byte payload
2881	* - otherwise it's ESP
2882	*/
2883	if (remaining_caplen >= `4` &&
2884	payload[`0`] == `0` && payload[`1`] == `0` &&
2885	payload[`2`] == `0` && payload[`3`] == `0`) {
2886	trunc_len = `4` + get_isakmp_trunc_len(pkt, off + `4`, remaining_caplen - `4`);
2887	} else if (remaining_caplen == `1`) {
2888	trunc_len = `1`;
2889	} else {
2890	trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
2891	}
2892
2893	if (trunc_len > remaining_caplen)
2894	return (remaining_caplen);
2895
2896	return (trunc_len);
2897
2898	}
2899
2900	static uint16_t
2901	get_udp_trunc_len(struct bpf_packet pkt, uint16_t off, const* uint16_t remaining_caplen)
2902	{
2903	int err = `0`;
2904	uint16_t trunc_len = sizeof(struct udphdr); / By default no UDP payload /
2905
2906	if (trunc_len >= remaining_caplen)
2907	return (remaining_caplen);
2908
2909	struct udphdr udphdr;
2910	err = bpf_copydata(pkt, off, sizeof(struct udphdr), &udphdr);
2911	if (err != `0`)
2912	return (remaining_caplen);
2913
2914	u_short sport, dport;
2915
2916	sport = EXTRACT_SHORT(&udphdr.uh_sport);
2917	dport = EXTRACT_SHORT(&udphdr.uh_dport);
2918
2919	if (dport == PORT_DNS \|\| sport == PORT_DNS) {
2920	/*
2921	* Full UDP payload for DNS
2922	*/
2923	trunc_len = remaining_caplen;
2924	} else if ((sport == PORT_BOOTPS && dport == PORT_BOOTPC) \|\|
2925	(sport == PORT_BOOTPC && dport == PORT_BOOTPS)) {
2926	/*
2927	* Full UDP payload for BOOTP and DHCP
2928	*/
2929	trunc_len = remaining_caplen;
2930	} else if (dport == PORT_ISAKMP && sport == PORT_ISAKMP) {
2931	/*
2932	* Return the ISAKMP header
2933	*/
2934	trunc_len += get_isakmp_trunc_len(pkt, off + sizeof(struct udphdr),
2935	remaining_caplen - sizeof(struct udphdr));
2936	} else if (dport == PORT_ISAKMP_NATT && sport == PORT_ISAKMP_NATT) {
2937	trunc_len += get_isakmp_natt_trunc_len(pkt, off + sizeof(struct udphdr),
2938	remaining_caplen - sizeof(struct udphdr));
2939	}
2940	if (trunc_len >= remaining_caplen)
2941	return (remaining_caplen);
2942
2943	return (trunc_len);
2944	}
2945
2946	static uint16_t
2947	get_tcp_trunc_len(struct bpf_packet pkt, uint16_t off, const* uint16_t remaining_caplen)
2948	{
2949	int err = `0`;
2950	uint16_t trunc_len = sizeof(struct tcphdr); / By default no TCP payload /
2951	if (trunc_len >= remaining_caplen)
2952	return (remaining_caplen);
2953
2954	struct tcphdr tcphdr;
2955	err = bpf_copydata(pkt, off, sizeof(struct tcphdr), &tcphdr);
2956	if (err != `0`)
2957	return (remaining_caplen);
2958
2959	u_short sport, dport;
2960	sport = EXTRACT_SHORT(&tcphdr.th_sport);
2961	dport = EXTRACT_SHORT(&tcphdr.th_dport);
2962
2963	if (dport == PORT_DNS \|\| sport == PORT_DNS) {
2964	/*
2965	* Full TCP payload for DNS
2966	*/
2967	trunc_len = remaining_caplen;
2968	} else {
2969	trunc_len = tcphdr.th_off << `2`;
2970	}
2971	if (trunc_len >= remaining_caplen)
2972	return (remaining_caplen);
2973
2974	return (trunc_len);
2975	}
2976
2977	static uint16_t
2978	get_proto_trunc_len(uint8_t proto, struct bpf_packet pkt, uint16_t off, const* uint16_t remaining_caplen)
2979	{
2980	uint16_t trunc_len;
2981
2982	switch (proto) {
2983	case IPPROTO_ICMP: {
2984	/*
2985	* Full IMCP payload
2986	*/
2987	trunc_len = remaining_caplen;
2988	break;
2989	}
2990	case IPPROTO_ICMPV6: {
2991	/*
2992	* Full IMCPV6 payload
2993	*/
2994	trunc_len = remaining_caplen;
2995	break;
2996	}
2997	case IPPROTO_IGMP: {
2998	/*
2999	* Full IGMP payload
3000	*/
3001	trunc_len = remaining_caplen;
3002	break;
3003	}
3004	case IPPROTO_UDP: {
3005	trunc_len = get_udp_trunc_len(pkt, off, remaining_caplen);
3006	break;
3007	}
3008	case IPPROTO_TCP: {
3009	trunc_len = get_tcp_trunc_len(pkt, off, remaining_caplen);
3010	break;
3011	}
3012	case IPPROTO_ESP: {
3013	trunc_len = get_esp_trunc_len(pkt, off, remaining_caplen);
3014	break;
3015	}
3016	default: {
3017	/*
3018	* By default we only include the IP header
3019	*/
3020	trunc_len = `0`;
3021	break;
3022	}
3023	}
3024	if (trunc_len >= remaining_caplen)
3025	return (remaining_caplen);
3026
3027	return (trunc_len);
3028	}
3029
3030	static uint16_t
3031	get_ip_trunc_len(struct bpf_packet pkt, uint16_t off, const* uint16_t remaining_caplen)
3032	{
3033	int err = `0`;
3034	uint16_t iplen = sizeof(struct ip);
3035	if (iplen >= remaining_caplen)
3036	return (remaining_caplen);
3037
3038	struct ip iphdr;
3039	err = bpf_copydata(pkt, off, sizeof(struct ip), &iphdr);
3040	if (err != `0`)
3041	return (remaining_caplen);
3042
3043	uint8_t proto = `0`;
3044
3045	iplen = iphdr.ip_hl << `2`;
3046	if (iplen >= remaining_caplen)
3047	return (remaining_caplen);
3048
3049	proto = iphdr.ip_p;
3050	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3051
3052	if (iplen >= remaining_caplen)
3053	return (remaining_caplen);
3054
3055	return (iplen);
3056	}
3057
3058	static uint16_t
3059	get_ip6_trunc_len(struct bpf_packet pkt, uint16_t off, const* uint16_t remaining_caplen)
3060	{
3061	int err = `0`;
3062	uint16_t iplen = sizeof(struct ip6_hdr);
3063	if (iplen >= remaining_caplen)
3064	return (remaining_caplen);
3065
3066	struct ip6_hdr ip6hdr;
3067	err = bpf_copydata(pkt, off, sizeof(struct ip6_hdr), &ip6hdr);
3068	if (err != `0`)
3069	return (remaining_caplen);
3070
3071	uint8_t proto = `0`;
3072
3073	/*
3074	* TBD: process the extension headers
3075	*/
3076	proto = ip6hdr.ip6_nxt;
3077	iplen += get_proto_trunc_len(proto, pkt, off + iplen, remaining_caplen - iplen);
3078
3079	if (iplen >= remaining_caplen)
3080	return (remaining_caplen);
3081
3082	return (iplen);
3083	}
3084
3085	static uint16_t
3086	get_ether_trunc_len(struct bpf_packet pkt, int* off, const uint16_t remaining_caplen)
3087	{
3088	int err = `0`;
3089	uint16_t ethlen = sizeof(struct ether_header);
3090	if (ethlen >= remaining_caplen)
3091	return (remaining_caplen);
3092
3093	struct ether_header eh;
3094	u_short type;
3095	err = bpf_copydata(pkt, off, sizeof(struct ether_header), &eh);
3096	if (err != `0`)
3097	return (remaining_caplen);
3098
3099	type = EXTRACT_SHORT(&eh.ether_type);
3100	/ Include full ARP /
3101	if (type == ETHERTYPE_ARP) {
3102	ethlen = remaining_caplen;
3103	} else if (type != ETHERTYPE_IP && type != ETHERTYPE_IPV6) {
3104	ethlen = min(BPF_MIN_PKT_SIZE, remaining_caplen);
3105	} else {
3106	if (type == ETHERTYPE_IP) {
3107	ethlen += get_ip_trunc_len(pkt, sizeof(struct ether_header),
3108	remaining_caplen);
3109	} else if (type == ETHERTYPE_IPV6) {
3110	ethlen += get_ip6_trunc_len(pkt, sizeof(struct ether_header),
3111	remaining_caplen);
3112	}
3113	}
3114	return (ethlen);
3115	}
3116
3117	static uint32_t
3118	get_pkt_trunc_len(u_char *p, u_int len)
3119	{
3120	struct bpf_packet pkt = (struct* bpf_packet )(void* *) p;
3121	struct pktap_header pktap = (struct* pktap_header *) (pkt->bpfp_header);
3122	uint32_t out_pkt_len = `0`, tlen = `0`;
3123	/*
3124	* pktap->pth_frame_pre_length is L2 header length and accounts
3125	* for both pre and pre_adjust.
3126	* pktap->pth_length is sizeof(pktap_header) (excl the pre/pre_adjust)
3127	* pkt->bpfp_header_length is (pktap->pth_length + pre_adjust)
3128	* pre is the offset to the L3 header after the bpfp_header, or length
3129	* of L2 header after bpfp_header, if present.
3130	*/
3131	uint32_t pre = pktap->pth_frame_pre_length -
3132	(pkt->bpfp_header_length - pktap->pth_length);
3133
3134	/ Length of the input packet starting from L3 header /
3135	uint32_t in_pkt_len = len - pkt->bpfp_header_length - pre;
3136	if (pktap->pth_protocol_family == AF_INET \|\|
3137	pktap->pth_protocol_family == AF_INET6) {
3138	/ Contains L2 header /
3139	if (pre > `0`) {
3140	if (pre < sizeof(struct ether_header))
3141	goto too_short;
3142
3143	out_pkt_len = get_ether_trunc_len(pkt, `0`, in_pkt_len);
3144	} else if (pre == `0`) {
3145	if (pktap->pth_protocol_family == AF_INET) {
3146	out_pkt_len = get_ip_trunc_len(pkt, pre, in_pkt_len);
3147	} else if (pktap->pth_protocol_family == AF_INET6) {
3148	out_pkt_len = get_ip6_trunc_len(pkt, pre, in_pkt_len);
3149	}
3150	} else {
3151	/ Ideally pre should be >= 0. This is an exception /
3152	out_pkt_len = min(BPF_MIN_PKT_SIZE, in_pkt_len);
3153	}
3154	} else {
3155	if (pktap->pth_iftype == IFT_ETHER) {
3156	if (in_pkt_len < sizeof(struct ether_header)) {
3157	goto too_short;
3158	}
3159	/ At most include the Ethernet header and 16 bytes /
3160	out_pkt_len = MIN(sizeof(struct ether_header) + `16`,
3161	in_pkt_len);
3162	} else {
3163	/*
3164	* For unknown protocols include at most 16 bytes
3165	*/
3166	out_pkt_len = MIN(`16`, in_pkt_len);
3167	}
3168	}
3169	done:
3170	tlen = pkt->bpfp_header_length + out_pkt_len + pre;
3171	return (tlen);
3172	too_short:
3173	out_pkt_len = in_pkt_len;
3174	goto done;
3175	}
3176
3177	/*
3178	* Move the packet data from interface memory (pkt) into the
3179	* store buffer. Return 1 if it's time to wakeup a listener (buffer full),
3180	* otherwise 0.
3181	*/
3182	static void
3183	catchpacket(struct bpf_d d, struct* bpf_packet * pkt,
3184	u_int snaplen, int outbound)
3185	{
3186	struct bpf_hdr *hp;
3187	struct bpf_hdr_ext *ehp;
3188	int totlen, curlen;
3189	int hdrlen, caplen;
3190	int do_wakeup = `0`;
3191	u_char *payload;
3192	struct timeval tv;
3193
3194	hdrlen = (d->bd_flags & BPF_EXTENDED_HDR) ? d->bd_bif->bif_exthdrlen :
3195	d->bd_bif->bif_hdrlen;
3196	/*
3197	* Figure out how many bytes to move. If the packet is
3198	* greater or equal to the snapshot length, transfer that
3199	* much. Otherwise, transfer the whole packet (unless
3200	* we hit the buffer size limit).
3201	*/
3202	totlen = hdrlen + min(snaplen, pkt->bpfp_total_length);
3203	if (totlen > d->bd_bufsize)
3204	totlen = d->bd_bufsize;
3205
3206	if (hdrlen > totlen)
3207	return;
3208
3209	/*
3210	* Round up the end of the previous packet to the next longword.
3211	*/
3212	curlen = BPF_WORDALIGN(d->bd_slen);
3213	if (curlen + totlen > d->bd_bufsize) {
3214	/*
3215	* This packet will overflow the storage buffer.
3216	* Rotate the buffers if we can, then wakeup any
3217	* pending reads.
3218	*
3219	* We cannot rotate buffers if a read is in progress
3220	* so drop the packet
3221	*/
3222	if (d->bd_hbuf_read != `0`) {
3223	++d->bd_dcount;
3224	return;
3225	}
3226
3227	if (d->bd_fbuf == NULL) {
3228	if (d->bd_headdrop == `0`) {
3229	/*
3230	* We haven't completed the previous read yet,
3231	* so drop the packet.
3232	*/
3233	++d->bd_dcount;
3234	return;
3235	}
3236	/*
3237	* Drop the hold buffer as it contains older packets
3238	*/
3239	d->bd_dcount += d->bd_hcnt;
3240	d->bd_fbuf = d->bd_hbuf;
3241	ROTATE_BUFFERS(d);
3242	} else {
3243	ROTATE_BUFFERS(d);
3244	}
3245	do_wakeup = `1`;
3246	curlen = `0`;
3247	} else if (d->bd_immediate \|\| d->bd_state == BPF_TIMED_OUT)
3248	/*
3249	* Immediate mode is set, or the read timeout has
3250	* already expired during a select call. A packet
3251	* arrived, so the reader should be woken up.
3252	*/
3253	do_wakeup = `1`;
3254
3255	/*
3256	* Append the bpf header.
3257	*/
3258	microtime(&tv);
3259	if (d->bd_flags & BPF_EXTENDED_HDR) {
3260	struct mbuf *m;
3261
3262	m = (pkt->bpfp_type == BPF_PACKET_TYPE_MBUF)
3263	? pkt->bpfp_mbuf : NULL;
3264	ehp = (struct bpf_hdr_ext )(void* *)(d->bd_sbuf + curlen);
3265	memset(ehp, `0`, sizeof(*ehp));
3266	ehp->bh_tstamp.tv_sec = tv.tv_sec;
3267	ehp->bh_tstamp.tv_usec = tv.tv_usec;
3268
3269	ehp->bh_datalen = pkt->bpfp_total_length;
3270	ehp->bh_hdrlen = hdrlen;
3271	caplen = ehp->bh_caplen = totlen - hdrlen;
3272	if (m == NULL) {
3273	if (outbound) {
3274	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_OUT;
3275	} else {
3276	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_IN;
3277	}
3278	} else if (outbound) {
3279	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_OUT;
3280
3281	/ only do lookups on non-raw INPCB /
3282	if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID\|
3283	PKTF_FLOW_LOCALSRC\|PKTF_FLOW_RAWSOCK)) ==
3284	(PKTF_FLOW_ID\|PKTF_FLOW_LOCALSRC) &&
3285	m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3286	ehp->bh_flowid = m->m_pkthdr.pkt_flowid;
3287	ehp->bh_proto = m->m_pkthdr.pkt_proto;
3288	}
3289	ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc);
3290	if (m->m_pkthdr.pkt_flags & PKTF_TCP_REXMT)
3291	ehp->bh_pktflags \|= BPF_PKTFLAGS_TCP_REXMT;
3292	if (m->m_pkthdr.pkt_flags & PKTF_START_SEQ)
3293	ehp->bh_pktflags \|= BPF_PKTFLAGS_START_SEQ;
3294	if (m->m_pkthdr.pkt_flags & PKTF_LAST_PKT)
3295	ehp->bh_pktflags \|= BPF_PKTFLAGS_LAST_PKT;
3296	if (m->m_pkthdr.pkt_flags & PKTF_VALID_UNSENT_DATA) {
3297	ehp->bh_unsent_bytes =
3298	m->m_pkthdr.bufstatus_if;
3299	ehp->bh_unsent_snd =
3300	m->m_pkthdr.bufstatus_sndbuf;
3301	}
3302	} else
3303	ehp->bh_flags \|= BPF_HDR_EXT_FLAGS_DIR_IN;
3304	payload = (u_char *)ehp + hdrlen;
3305	} else {
3306	hp = (struct bpf_hdr )(void* *)(d->bd_sbuf + curlen);
3307	hp->bh_tstamp.tv_sec = tv.tv_sec;
3308	hp->bh_tstamp.tv_usec = tv.tv_usec;
3309	hp->bh_datalen = pkt->bpfp_total_length;
3310	hp->bh_hdrlen = hdrlen;
3311	caplen = hp->bh_caplen = totlen - hdrlen;
3312	payload = (u_char *)hp + hdrlen;
3313	}
3314	/*
3315	* Copy the packet data into the store buffer and update its length.
3316	*/
3317	copy_bpf_packet(pkt, payload, caplen);
3318	d->bd_slen = curlen + totlen;
3319	d->bd_scnt += `1`;
3320
3321	if (do_wakeup)
3322	bpf_wakeup(d);
3323	}
3324
3325	/*
3326	* Initialize all nonzero fields of a descriptor.
3327	*/
3328	static int
3329	bpf_allocbufs(struct bpf_d *d)
3330	{
3331	if (d->bd_sbuf != NULL) {
3332	FREE(d->bd_sbuf, M_DEVBUF);
3333	d->bd_sbuf = NULL;
3334	}
3335	if (d->bd_hbuf != NULL) {
3336	FREE(d->bd_hbuf, M_DEVBUF);
3337	d->bd_hbuf = NULL;
3338	}
3339	if (d->bd_fbuf != NULL) {
3340	FREE(d->bd_fbuf, M_DEVBUF);
3341	d->bd_fbuf = NULL;
3342	}
3343
3344	d->bd_fbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
3345	if (d->bd_fbuf == NULL)
3346	return (ENOBUFS);
3347
3348	d->bd_sbuf = (caddr_t) _MALLOC(d->bd_bufsize, M_DEVBUF, M_WAIT);
3349	if (d->bd_sbuf == NULL) {
3350	FREE(d->bd_fbuf, M_DEVBUF);
3351	d->bd_fbuf = NULL;
3352	return (ENOBUFS);
3353	}
3354	d->bd_slen = `0`;
3355	d->bd_hlen = `0`;
3356	d->bd_scnt = `0`;
3357	d->bd_hcnt = `0`;
3358	return (`0`);
3359	}
3360
3361	/*
3362	* Free buffers currently in use by a descriptor.
3363	* Called on close.
3364	*/
3365	static void
3366	bpf_freed(struct bpf_d *d)
3367	{
3368	/*
3369	* We don't need to lock out interrupts since this descriptor has
3370	* been detached from its interface and it yet hasn't been marked
3371	* free.
3372	*/
3373	if (d->bd_hbuf_read != `0`)
3374	panic("bpf buffer freed during read");
3375
3376	if (d->bd_sbuf != `0`) {
3377	FREE(d->bd_sbuf, M_DEVBUF);
3378	if (d->bd_hbuf != `0`)
3379	FREE(d->bd_hbuf, M_DEVBUF);
3380	if (d->bd_fbuf != `0`)
3381	FREE(d->bd_fbuf, M_DEVBUF);
3382	}
3383	if (d->bd_filter)
3384	FREE((caddr_t)d->bd_filter, M_DEVBUF);
3385	}
3386
3387	/*
3388	* Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *)
3389	* in the driver's softc; dlt is the link layer type; hdrlen is the fixed
3390	* size of the link header (variable length headers not yet supported).
3391	*/
3392	void
3393	bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
3394	{
3395	bpf_attach(ifp, dlt, hdrlen, NULL, NULL);
3396	}
3397
3398	errno_t
3399	bpf_attach(
3400	ifnet_t ifp,
3401	u_int32_t dlt,
3402	u_int32_t hdrlen,
3403	bpf_send_func send,
3404	bpf_tap_func tap)
3405	{
3406	struct bpf_if *bp;
3407	struct bpf_if *bp_new;
3408	struct bpf_if *bp_before_first = NULL;
3409	struct bpf_if *bp_first = NULL;
3410	struct bpf_if *bp_last = NULL;
3411	boolean_t found;
3412
3413	bp_new = (struct bpf_if ) _MALLOC(sizeof(bp_new), M_DEVBUF,
3414	M_WAIT \| M_ZERO);
3415	if (bp_new == `0`)
3416	panic("bpfattach");
3417
3418	lck_mtx_lock(bpf_mlock);
3419
3420	/*
3421	* Check if this interface/dlt is already attached. Remember the
3422	* first and last attachment for this interface, as well as the
3423	* element before the first attachment.
3424	*/
3425	found = FALSE;
3426	for (bp = bpf_iflist; bp != NULL; bp = bp->bif_next) {
3427	if (bp->bif_ifp != ifp) {
3428	if (bp_first != NULL) {
3429	/ no more elements for this interface /
3430	break;
3431	}
3432	bp_before_first = bp;
3433	} else {
3434	if (bp->bif_dlt == dlt) {
3435	found = TRUE;
3436	break;
3437	}
3438	if (bp_first == NULL) {
3439	bp_first = bp;
3440	}
3441	bp_last = bp;
3442	}
3443	}
3444	if (found) {
3445	lck_mtx_unlock(bpf_mlock);
3446	printf("bpfattach - %s with dlt %d is already attached\n",
3447	if_name(ifp), dlt);
3448	FREE(bp_new, M_DEVBUF);
3449	return (EEXIST);
3450	}
3451
3452	bp_new->bif_ifp = ifp;
3453	bp_new->bif_dlt = dlt;
3454	bp_new->bif_send = send;
3455	bp_new->bif_tap = tap;
3456
3457	if (bp_first == NULL) {
3458	/ No other entries for this ifp /
3459	bp_new->bif_next = bpf_iflist;
3460	bpf_iflist = bp_new;
3461	} else {
3462	if (ifnet_type(ifp) == IFT_ETHER && dlt == DLT_EN10MB) {
3463	/ Make this the first entry for this interface /
3464	if (bp_before_first != NULL) {
3465	/ point the previous to us /
3466	bp_before_first->bif_next = bp_new;
3467	} else {
3468	/ we're the new head /
3469	bpf_iflist = bp_new;
3470	}
3471	bp_new->bif_next = bp_first;
3472	} else {
3473	/ Add this after the last entry for this interface /
3474	bp_new->bif_next = bp_last->bif_next;
3475	bp_last->bif_next = bp_new;
3476	}
3477	}
3478
3479	/*
3480	* Compute the length of the bpf header. This is not necessarily
3481	* equal to SIZEOF_BPF_HDR because we want to insert spacing such
3482	* that the network layer header begins on a longword boundary (for
3483	* performance reasons and to alleviate alignment restrictions).
3484	*/
3485	bp_new->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
3486	bp_new->bif_exthdrlen = BPF_WORDALIGN(hdrlen +
3487	sizeof(struct bpf_hdr_ext)) - hdrlen;
3488
3489	/ Take a reference on the interface /
3490	ifnet_reference(ifp);
3491
3492	lck_mtx_unlock(bpf_mlock);
3493
3494	#ifndef __APPLE__
3495	if (bootverbose)
3496	printf("bpf: %s attached\n", if_name(ifp));
3497	#endif
3498
3499	return (`0`);
3500	}
3501
3502	/*
3503	* Detach bpf from an interface. This involves detaching each descriptor
3504	* associated with the interface, and leaving bd_bif NULL. Notify each
3505	* descriptor as it's detached so that any sleepers wake up and get
3506	* ENXIO.
3507	*/
3508	void
3509	bpfdetach(struct ifnet *ifp)
3510	{
3511	struct bpf_if bp, bp_prev, *bp_next;
3512	struct bpf_d *d;
3513
3514	if (bpf_debug != `0`)
3515	printf("%s: %s\n", __func__, if_name(ifp));
3516
3517	lck_mtx_lock(bpf_mlock);
3518
3519	/*
3520	* Build the list of devices attached to that interface
3521	* that we need to free while keeping the lock to maintain
3522	* the integrity of the interface list
3523	*/
3524	bp_prev = NULL;
3525	for (bp = bpf_iflist; bp != NULL; bp = bp_next) {
3526	bp_next = bp->bif_next;
3527
3528	if (ifp != bp->bif_ifp) {
3529	bp_prev = bp;
3530	continue;
3531	}
3532	/ Unlink from the interface list /
3533	if (bp_prev)
3534	bp_prev->bif_next = bp->bif_next;
3535	else
3536	bpf_iflist = bp->bif_next;
3537
3538	/ Detach the devices attached to the interface /
3539	while ((d = bp->bif_dlist) != NULL) {
3540	/*
3541	* Take an extra reference to prevent the device
3542	* from being freed when bpf_detachd() releases
3543	* the reference for the interface list
3544	*/
3545	bpf_acquire_d(d);
3546	bpf_detachd(d, `0`);
3547	bpf_wakeup(d);
3548	bpf_release_d(d);
3549	}
3550	ifnet_release(ifp);
3551	}
3552
3553	lck_mtx_unlock(bpf_mlock);
3554	}
3555
3556	void
3557	bpf_init(__unused void *unused)
3558	{
3559	#ifdef __APPLE__
3560	int i;
3561	int maj;
3562
3563	if (bpf_devsw_installed == `0`) {
3564	bpf_devsw_installed = `1`;
3565	bpf_mlock_grp_attr = lck_grp_attr_alloc_init();
3566	bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr);
3567	bpf_mlock_attr = lck_attr_alloc_init();
3568	lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr);
3569	maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw);
3570	if (maj == -`1`) {
3571	if (bpf_mlock_attr)
3572	lck_attr_free(bpf_mlock_attr);
3573	if (bpf_mlock_grp)
3574	lck_grp_free(bpf_mlock_grp);
3575	if (bpf_mlock_grp_attr)
3576	lck_grp_attr_free(bpf_mlock_grp_attr);
3577
3578	bpf_mlock = NULL;
3579	bpf_mlock_attr = NULL;
3580	bpf_mlock_grp = NULL;
3581	bpf_mlock_grp_attr = NULL;
3582	bpf_devsw_installed = `0`;
3583	printf("bpf_init: failed to allocate a major number\n");
3584	return;
3585	}
3586
3587	for (i = `0`; i < NBPFILTER; i++)
3588	bpf_make_dev_t(maj);
3589	}
3590	#else
3591	cdevsw_add(&bpf_cdevsw);
3592	#endif
3593	}
3594
3595	#ifndef __APPLE__
3596	SYSINIT(bpfdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE+CDEV_MAJOR, bpf_drvinit, NULL)
3597	#endif
3598
3599	#if CONFIG_MACF_NET
3600	struct label *
3601	mac_bpfdesc_label_get(struct bpf_d *d)
3602	{
3603
3604	return (d->bd_label);
3605	}
3606
3607	void
3608	mac_bpfdesc_label_set(struct bpf_d d, struct* label *label)
3609	{
3610
3611	d->bd_label = label;
3612	}
3613	#endif
3614

Browse the source code of xnu/bsd/net/bpf.c