content_filter.c source code [xnu/bsd/net/content_filter.c]

1	/*
2	* Copyright (c) 2013-2022 Apple Inc. All rights reserved.
3	*
4	* @APPLE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. Please obtain a copy of the License at
10	* http://www.opensource.apple.com/apsl/ and read it before using this
11	* file.
12	*
13	* The Original Code and all software distributed under the License are
14	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18	* Please see the License for the specific language governing rights and
19	* limitations under the License.
20	*
21	* @APPLE_LICENSE_HEADER_END@
22	*/
23
24	/*
25	* THEORY OF OPERATION
26	*
27	* The socket content filter subsystem provides a way for user space agents to
28	* make filtering decisions based on the content of the data being sent and
29	* received by INET/INET6 sockets.
30	*
31	* A content filter user space agents gets a copy of the data and the data is
32	* also kept in kernel buffer until the user space agents makes a pass or drop
33	* decision. This unidirectional flow of content avoids unnecessary data copies
34	* back to the kernel.
35	*
36	* A user space filter agent opens a kernel control socket with the name
37	* CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38	* When connected, a "struct content_filter" is created and set as the
39	* "unitinfo" of the corresponding kernel control socket instance.
40	*
41	* The socket content filter subsystem exchanges messages with the user space
42	* filter agent until an ultimate pass or drop decision is made by the
43	* user space filter agent.
44	*
45	* It should be noted that messages about many INET/INET6 sockets can be multiplexed
46	* over a single kernel control socket.
47	*
48	* Notes:
49	* - The current implementation supports all INET/INET6 sockets (i.e. TCP,
50	* UDP, ICMP, etc).
51	* - The current implementation supports up to two simultaneous content filters
52	* for iOS devices and eight simultaneous content filters for OSX.
53	*
54	*
55	* NECP FILTER CONTROL UNIT
56	*
57	* A user space filter agent uses the Network Extension Control Policy (NECP)
58	* database to specify which INET/INET6 sockets need to be filtered. The NECP
59	* criteria may be based on a variety of properties like user ID or proc UUID.
60	*
61	* The NECP "filter control unit" is used by the socket content filter subsystem
62	* to deliver the relevant INET/INET6 content information to the appropriate
63	* user space filter agent via its kernel control socket instance.
64	* This works as follows:
65	*
66	* 1) The user space filter agent specifies an NECP filter control unit when
67	* in adds its filtering rules to the NECP database.
68	*
69	* 2) The user space filter agent also sets its NECP filter control unit on the
70	* content filter kernel control socket via the socket option
71	* CFIL_OPT_NECP_CONTROL_UNIT.
72	*
73	* 3) The NECP database is consulted to find out if a given INET/INET6 socket
74	* needs to be subjected to content filtering and returns the corresponding
75	* NECP filter control unit -- the NECP filter control unit is actually
76	* stored in the INET/INET6 socket structure so the NECP lookup is really simple.
77	*
78	* 4) The NECP filter control unit is then used to find the corresponding
79	* kernel control socket instance.
80	*
81	* Note: NECP currently supports a single filter control unit per INET/INET6 socket
82	* but this restriction may be soon lifted.
83	*
84	*
85	* THE MESSAGING PROTOCOL
86	*
87	* The socket content filter subsystem and a user space filter agent
88	* communicate over the kernel control socket via an asynchronous
89	* messaging protocol (this is not a request-response protocol).
90	* The socket content filter subsystem sends event messages to the user
91	* space filter agent about the INET/INET6 sockets it is interested to filter.
92	* The user space filter agent sends action messages to either allow
93	* data to pass or to disallow the data flow (and drop the connection).
94	*
95	* All messages over a content filter kernel control socket share the same
96	* common header of type "struct cfil_msg_hdr". The message type tells if
97	* it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
98	* The message header field "cfm_sock_id" identifies a given INET/INET6 flow.
99	* For TCP, flows are per-socket. For UDP and other datagrame protocols, there
100	* could be multiple flows per socket.
101	*
102	* Note the message header length field may be padded for alignment and can
103	* be larger than the actual content of the message.
104	* The field "cfm_op" describe the kind of event or action.
105	*
106	* Here are the kinds of content filter events:
107	* - CFM_OP_SOCKET_ATTACHED: a new INET/INET6 socket is being filtered
108	* - CFM_OP_SOCKET_CLOSED: A INET/INET6 socket is closed
109	* - CFM_OP_DATA_OUT: A span of data is being sent on a INET/INET6 socket
110	* - CFM_OP_DATA_IN: A span of data is being or received on a INET/INET6 socket
111	*
112	*
113	* EVENT MESSAGES
114	*
115	* The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
116	* data that is being sent or received. The position of this span of data
117	* in the data flow is described by a set of start and end offsets. These
118	* are absolute 64 bits offsets. The first byte sent (or received) starts
119	* at offset 0 and ends at offset 1. The length of the content data
120	* is given by the difference between the end offset and the start offset.
121	*
122	* After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
123	* CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
124	* action message is sent by the user space filter agent.
125	*
126	* Note: absolute 64 bits offsets should be large enough for the foreseeable
127	* future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
128	* 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
129	*
130	* They are two kinds of primary content filter actions:
131	* - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
132	* - CFM_OP_DROP: to shutdown socket and disallow further data flow
133	*
134	* There is also an action to mark a given client flow as already filtered
135	* at a higher level, CFM_OP_BLESS_CLIENT.
136	*
137	*
138	* ACTION MESSAGES
139	*
140	* The CFM_OP_DATA_UPDATE action messages let the user space filter
141	* agent allow data to flow up to the specified pass offset -- there
142	* is a pass offset for outgoing data and a pass offset for incoming data.
143	* When a new INET/INET6 socket is attached to the content filter and a flow is
144	* created, each pass offset is initially set to 0 so no data is allowed to pass by
145	* default. When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
146	* then the data flow becomes unrestricted.
147	*
148	* Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
149	* with a pass offset smaller than the pass offset of a previous
150	* CFM_OP_DATA_UPDATE message is silently ignored.
151	*
152	* A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
153	* to tell the kernel how much data it wants to see by using the peek offsets.
154	* Just like pass offsets, there is a peek offset for each direction.
155	* When a new INET/INET6 flow is created, each peek offset is initially set to 0
156	* so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages are dispatched by default
157	* until a CFM_OP_DATA_UPDATE action message with a greater than 0 peek offset is sent
158	* by the user space filter agent. When the peek offset is set to CFM_MAX_OFFSET via
159	* a CFM_OP_DATA_UPDATE then the flow of update data events becomes unrestricted.
160	*
161	* Note that peek offsets cannot be smaller than the corresponding pass offset.
162	* Also a peek offsets cannot be smaller than the corresponding end offset
163	* of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
164	* to set a too small peek value is silently ignored.
165	*
166	*
167	* PER FLOW "struct cfil_info"
168	*
169	* As soon as a INET/INET6 socket gets attached to a content filter, a
170	* "struct cfil_info" is created to hold the content filtering state for this
171	* socket. For UDP and other datagram protocols, as soon as traffic is seen for
172	* each new flow identified by its 4-tuple of source address/port and destination
173	* address/port, a "struct cfil_info" is created. Each datagram socket may
174	* have multiple flows maintained in a hash table of "struct cfil_info" entries.
175	*
176	* The content filtering state is made of the following information
177	* for each direction:
178	* - The current pass offset;
179	* - The first and last offsets of the data pending, waiting for a filtering
180	* decision;
181	* - The inject queue for data that passed the filters and that needs
182	* to be re-injected;
183	* - A content filter specific state in a set of "struct cfil_entry"
184	*
185	*
186	* CONTENT FILTER STATE "struct cfil_entry"
187	*
188	* The "struct cfil_entry" maintains the information most relevant to the
189	* message handling over a kernel control socket with a user space filter agent.
190	*
191	* The "struct cfil_entry" holds the NECP filter control unit that corresponds
192	* to the kernel control socket unit it corresponds to and also has a pointer
193	* to the corresponding "struct content_filter".
194	*
195	* For each direction, "struct cfil_entry" maintains the following information:
196	* - The pass offset
197	* - The peek offset
198	* - The offset of the last data peeked at by the filter
199	* - A queue of data that's waiting to be delivered to the user space filter
200	* agent on the kernel control socket
201	* - A queue of data for which event messages have been sent on the kernel
202	* control socket and are pending for a filtering decision.
203	*
204	*
205	* CONTENT FILTER QUEUES
206	*
207	* Data that is being filtered is steered away from the INET/INET6 socket buffer
208	* and instead will sit in one of three content filter queues until the data
209	* can be re-injected into the INET/INET6 socket buffer.
210	*
211	* A content filter queue is represented by "struct cfil_queue" that contains
212	* a list of mbufs and the start and end offset of the data span of
213	* the list of mbufs.
214	*
215	* The data moves into the three content filter queues according to this
216	* sequence:
217	* a) The "cfe_ctl_q" of "struct cfil_entry"
218	* b) The "cfe_pending_q" of "struct cfil_entry"
219	* c) The "cfi_inject_q" of "struct cfil_info"
220	*
221	* Note: The sequence (a),(b) may be repeated several times if there is more
222	* than one content filter attached to the INET/INET6 socket.
223	*
224	* The "cfe_ctl_q" queue holds data than cannot be delivered to the
225	* kernel conntrol socket for two reasons:
226	* - The peek offset is less that the end offset of the mbuf data
227	* - The kernel control socket is flow controlled
228	*
229	* The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
230	* CFM_OP_DATA_IN have been successfully dispatched to the kernel control
231	* socket and are waiting for a pass action message fromn the user space
232	* filter agent. An mbuf length must be fully allowed to pass to be removed
233	* from the cfe_pending_q.
234	*
235	* The "cfi_inject_q" queue holds data that has been fully allowed to pass
236	* by the user space filter agent and that needs to be re-injected into the
237	* INET/INET6 socket.
238	*
239	*
240	* IMPACT ON FLOW CONTROL
241	*
242	* An essential aspect of the content filer subsystem is to minimize the
243	* impact on flow control of the INET/INET6 sockets being filtered.
244	*
245	* The processing overhead of the content filtering may have an effect on
246	* flow control by adding noticeable delays and cannot be eliminated --
247	* care must be taken by the user space filter agent to minimize the
248	* processing delays.
249	*
250	* The amount of data being filtered is kept in buffers while waiting for
251	* a decision by the user space filter agent. This amount of data pending
252	* needs to be subtracted from the amount of data available in the
253	* corresponding INET/INET6 socket buffer. This is done by modifying
254	* sbspace() and tcp_sbspace() to account for amount of data pending
255	* in the content filter.
256	*
257	*
258	* LOCKING STRATEGY
259	*
260	* The global state of content filter subsystem is protected by a single
261	* read-write lock "cfil_lck_rw". The data flow can be done with the
262	* cfil read-write lock held as shared so it can be re-entered from multiple
263	* threads.
264	*
265	* The per INET/INET6 socket content filterstate -- "struct cfil_info" -- is
266	* protected by the socket lock.
267	*
268	* A INET/INET6 socket lock cannot be taken while the cfil read-write lock
269	* is held. That's why we have some sequences where we drop the cfil read-write
270	* lock before taking the INET/INET6 lock.
271	*
272	* It is also important to lock the INET/INET6 socket buffer while the content
273	* filter is modifying the amount of pending data. Otherwise the calculations
274	* in sbspace() and tcp_sbspace() could be wrong.
275	*
276	* The "cfil_lck_rw" protects "struct content_filter" and also the fields
277	* "cfe_link" and "cfe_filter" of "struct cfil_entry".
278	*
279	* Actually "cfe_link" and "cfe_filter" are protected by both by
280	* "cfil_lck_rw" and the socket lock: they may be modified only when
281	* "cfil_lck_rw" is exclusive and the socket is locked.
282	*
283	* To read the other fields of "struct content_filter" we have to take
284	* "cfil_lck_rw" in shared mode.
285	*
286	* DATAGRAM SPECIFICS:
287	*
288	* The socket content filter supports all INET/INET6 protocols. However
289	* the treatments for TCP sockets and for datagram (UDP, ICMP, etc) sockets
290	* are slightly different.
291	*
292	* Each datagram socket may have multiple flows. Each flow is identified
293	* by the flow's source address/port and destination address/port tuple
294	* and is represented as a "struct cfil_info" entry. For each socket,
295	* a hash table is used to maintain the collection of flows under that socket.
296	*
297	* Each datagram flow is uniquely identified by it's "struct cfil_info" cfi_sock_id.
298	* The highest 32-bits of the cfi_sock_id contains the socket's so_gencnt. This portion
299	* of the cfi_sock_id is used locate the socket during socket lookup. The lowest 32-bits
300	* of the cfi_sock_id contains a hash of the flow's 4-tuple. This portion of the cfi_sock_id
301	* is used as the hash value for the flow hash table lookup within the parent socket.
302	*
303	* Since datagram sockets may not be connected, flow states may not be maintained in the
304	* socket structures and thus have to be saved for each packet. These saved states will be
305	* used for both outgoing and incoming reinjections. For outgoing packets, destination
306	* address/port as well as the current socket states will be saved. During reinjection,
307	* these saved states will be used instead. For incoming packets, control and address
308	* mbufs will be chained to the data. During reinjection, the whole chain will be queued
309	* onto the incoming socket buffer.
310	*
311	* LIMITATIONS
312	*
313	* - Support all INET/INET6 sockets, such as TCP, UDP, ICMP, etc
314	*
315	* - Does not support TCP unordered messages
316	*/
317
318	/*
319	* TO DO LIST
320	*
321	* Deal with OOB
322	*
323	*/
324
325	#include <sys/types.h>
326	#include <sys/kern_control.h>
327	#include <sys/queue.h>
328	#include <sys/domain.h>
329	#include <sys/protosw.h>
330	#include <sys/syslog.h>
331	#include <sys/systm.h>
332	#include <sys/param.h>
333	#include <sys/mbuf.h>
334
335	#include <kern/locks.h>
336	#include <kern/zalloc.h>
337	#include <kern/debug.h>
338
339	#include <net/ntstat.h>
340	#include <net/content_filter.h>
341	#include <net/content_filter_crypto.h>
342
343	#define _IP_VHL
344	#include <netinet/ip.h>
345	#include <netinet/in_pcb.h>
346	#include <netinet/tcp.h>
347	#include <netinet/tcp_var.h>
348	#include <netinet/udp.h>
349	#include <netinet/udp_var.h>
350	#include <kern/socket_flows.h>
351
352	#include <string.h>
353	#include <libkern/libkern.h>
354	#include <kern/sched_prim.h>
355	#include <kern/task.h>
356	#include <mach/task_info.h>
357
358	#include <net/sockaddr_utils.h>
359
360	#define MAX_CONTENT_FILTER 8
361
362	extern int tcp_msl;
363	extern struct inpcbinfo ripcbinfo;
364	struct cfil_entry;
365
366	/*
367	* The structure content_filter represents a user space content filter
368	* It's created and associated with a kernel control socket instance
369	*/
370	struct content_filter {
371	kern_ctl_ref cf_kcref;
372	u_int32_t cf_kcunit;
373	u_int32_t cf_flags;
374
375	uint32_t cf_necp_control_unit;
376
377	uint32_t cf_sock_count;
378	TAILQ_HEAD(, cfil_entry) cf_sock_entries;
379
380	cfil_crypto_state_t cf_crypto_state;
381	};
382
383	#define CFF_ACTIVE 0x01
384	#define CFF_DETACHING 0x02
385	#define CFF_FLOW_CONTROLLED 0x04
386	#define CFF_PRESERVE_CONNECTIONS 0x08
387
388	struct content_filter *content_filters[MAX_CONTENT_FILTER];
389	uint32_t cfil_active_count = `0`; / Number of active content filters /
390	uint32_t cfil_sock_attached_count = `0`; / Number of sockets attachements /
391	uint32_t cfil_sock_attached_stats_count = `0`; / Number of sockets requested periodic stats report /
392	uint32_t cfil_close_wait_timeout = `1000`; / in milliseconds /
393
394	static kern_ctl_ref cfil_kctlref = NULL;
395
396	static LCK_GRP_DECLARE(cfil_lck_grp, "content filter");
397	static LCK_RW_DECLARE(cfil_lck_rw, &cfil_lck_grp);
398
399	#define CFIL_RW_LCK_MAX 8
400
401	int cfil_rw_nxt_lck = `0`;
402	void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
403
404	int cfil_rw_nxt_unlck = `0`;
405	void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
406
407	static KALLOC_TYPE_DEFINE(content_filter_zone, struct content_filter, NET_KT_DEFAULT);
408
409	MBUFQ_HEAD(cfil_mqhead);
410
411	struct cfil_queue {
412	uint64_t q_start; / offset of first byte in queue /
413	uint64_t q_end; / offset of last byte in queue /
414	struct cfil_mqhead q_mq;
415	};
416
417	/*
418	* struct cfil_entry
419	*
420	* The is one entry per content filter
421	*/
422	struct cfil_entry {
423	TAILQ_ENTRY(cfil_entry) cfe_link;
424	SLIST_ENTRY(cfil_entry) cfe_order_link;
425	struct content_filter *cfe_filter;
426
427	struct cfil_info *cfe_cfil_info;
428	uint32_t cfe_flags;
429	uint32_t cfe_necp_control_unit;
430	struct timeval cfe_last_event; / To user space /
431	struct timeval cfe_last_action; / From user space /
432	uint64_t cfe_byte_inbound_count_reported; / stats already been reported /
433	uint64_t cfe_byte_outbound_count_reported; / stats already been reported /
434	struct timeval cfe_stats_report_ts; / Timestamp for last stats report /
435	uint32_t cfe_stats_report_frequency; / Interval for stats report in msecs /
436	boolean_t cfe_laddr_sent;
437
438	struct cfe_buf {
439	/*
440	* cfe_pending_q holds data that has been delivered to
441	* the filter and for which we are waiting for an action
442	*/
443	struct cfil_queue cfe_pending_q;
444	/*
445	* This queue is for data that has not be delivered to
446	* the content filter (new data, pass peek or flow control)
447	*/
448	struct cfil_queue cfe_ctl_q;
449
450	uint64_t cfe_pass_offset;
451	uint64_t cfe_peek_offset;
452	uint64_t cfe_peeked;
453	} cfe_snd, cfe_rcv;
454	};
455
456	#define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
457	#define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
458	#define CFEF_DATA_START 0x0004 /* can send data event */
459	#define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
460	#define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
461	#define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
462	#define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
463	#define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
464
465
466	#define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
467	struct timeval64 _tdiff; \
468	if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
469	timersub(t1, t0, &_tdiff); \
470	(cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
471	(cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
472	(cfil)->cfi_op_list_ctr ++; \
473	}
474
475	/*
476	* struct cfil_info
477	*
478	* There is a struct cfil_info per socket
479	*/
480	struct cfil_info {
481	TAILQ_ENTRY(cfil_info) cfi_link;
482	TAILQ_ENTRY(cfil_info) cfi_link_stats;
483	struct socket *cfi_so;
484	uint64_t cfi_flags;
485	uint64_t cfi_sock_id;
486	struct timeval64 cfi_first_event;
487	uint32_t cfi_op_list_ctr;
488	uint32_t cfi_op_time[CFI_MAX_TIME_LOG_ENTRY]; / time interval in microseconds since first event /
489	unsigned char cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
490	union sockaddr_in_4_6 cfi_so_attach_faddr; / faddr at the time of attach /
491	union sockaddr_in_4_6 cfi_so_attach_laddr; / laddr at the time of attach /
492
493	int cfi_dir;
494	uint64_t cfi_byte_inbound_count;
495	uint64_t cfi_byte_outbound_count;
496
497	boolean_t cfi_isSignatureLatest; / Indicates if signature covers latest flow attributes /
498	u_int32_t cfi_filter_control_unit;
499	u_int32_t cfi_debug;
500	struct cfi_buf {
501	/*
502	* cfi_pending_first and cfi_pending_last describe the total
503	* amount of data outstanding for all the filters on
504	* this socket and data in the flow queue
505	* cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
506	*/
507	uint64_t cfi_pending_first;
508	uint64_t cfi_pending_last;
509	uint32_t cfi_pending_mbcnt;
510	uint32_t cfi_pending_mbnum;
511	uint32_t cfi_tail_drop_cnt;
512	/*
513	* cfi_pass_offset is the minimum of all the filters
514	*/
515	uint64_t cfi_pass_offset;
516	/*
517	* cfi_inject_q holds data that needs to be re-injected
518	* into the socket after filtering and that can
519	* be queued because of flow control
520	*/
521	struct cfil_queue cfi_inject_q;
522	} cfi_snd, cfi_rcv;
523
524	struct cfil_entry cfi_entries[MAX_CONTENT_FILTER];
525	struct soflow_hash_entry *cfi_hash_entry;
526	SLIST_HEAD(, cfil_entry) cfi_ordered_entries;
527	os_refcnt_t cfi_ref_count;
528	} __attribute__((aligned(`8`)));
529
530	#define CFIF_DROP 0x0001 /* drop action applied */
531	#define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
532	#define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
533	#define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
534	#define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
535	#define CFIF_SHUT_WR 0x0040 /* shutdown write */
536	#define CFIF_SHUT_RD 0x0080 /* shutdown read */
537	#define CFIF_SOCKET_CONNECTED 0x0100 /* socket is connected */
538	#define CFIF_INITIAL_VERDICT 0x0200 /* received initial verdict */
539	#define CFIF_NO_CLOSE_WAIT 0x0400 /* do not wait to close */
540	#define CFIF_SO_DELAYED_DEAD 0x0800 /* Delayed socket DEAD marking */
541	#define CFIF_SO_DELAYED_TCP_TIME_WAIT 0x1000 /* Delayed TCP FIN TIME WAIT */
542
543	#define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
544	#define CFI_SHIFT_GENCNT 32
545	#define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
546	#define CFI_SHIFT_FLOWHASH 0
547
548	#define CFI_ENTRY_KCUNIT(i, e) ((uint32_t)(((e) - &((i)->cfi_entries[0])) + 1))
549
550	static KALLOC_TYPE_DEFINE(cfil_info_zone, struct cfil_info, NET_KT_DEFAULT);
551
552	TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
553	TAILQ_HEAD(cfil_sock_head_stats, cfil_info) cfil_sock_head_stats;
554
555	#define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
556	#define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
557
558	/*
559	* UDP Socket Support
560	*/
561	#define IS_ICMP(so) (so && (SOCK_CHECK_TYPE(so, SOCK_RAW) \|\| SOCK_CHECK_TYPE(so, SOCK_DGRAM)) && \
562	(SOCK_CHECK_PROTO(so, IPPROTO_ICMP) \|\| SOCK_CHECK_PROTO(so, IPPROTO_ICMPV6)))
563	#define IS_RAW(so) (so && SOCK_CHECK_TYPE(so, SOCK_RAW) && SOCK_CHECK_PROTO(so, IPPROTO_RAW))
564
565	#define OPTIONAL_IP_HEADER(so) (!IS_TCP(so) && !IS_UDP(so))
566	#define GET_SO_PROTOCOL(so) (so ? SOCK_PROTO(so) : IPPROTO_IP)
567	#define GET_SO_INP_PROTOCOL(so) ((so && sotoinpcb(so)) ? sotoinpcb(so)->inp_ip_p : IPPROTO_IP)
568	#define GET_SO_PROTO(so) ((GET_SO_PROTOCOL(so) != IPPROTO_IP) ? GET_SO_PROTOCOL(so) : GET_SO_INP_PROTOCOL(so))
569	#define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
570
571	#define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) \|\| \
572	((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
573	#define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
574	cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
575	#define IS_DNS(local, remote) (check_port(local, 53) \|\| check_port(remote, 53) \|\| check_port(local, 5353) \|\| check_port(remote, 5353))
576	#define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
577	#define NULLADDRESS(addr) ((addr.sa.sa_len == 0) \|\| \
578	(addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) \|\| \
579	(addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
580
581	#define SKIP_FILTER_FOR_TCP_SOCKET(so) \
582	(so == NULL \|\| \
583	(!SOCK_CHECK_DOM(so, PF_INET) && !SOCK_CHECK_DOM(so, PF_INET6)) \|\| \
584	!SOCK_CHECK_TYPE(so, SOCK_STREAM) \|\| \
585	!SOCK_CHECK_PROTO(so, IPPROTO_TCP) \|\| \
586	(so->so_flags & SOF_MP_SUBFLOW) != 0 \|\| \
587	(so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0)
588
589	/*
590	* Special handling for 0.0.0.0-faddr TCP flows. This flows will be changed to loopback addr by TCP and
591	* may result in an immediate TCP RESET and socket close. This leads to CFIL blocking the owner thread for
592	* 1 sec waiting for ack from user-space provider (ack recevied by CFIL but socket already removed from
593	* global socket list). To avoid this, identify these flows and do not perform the close-wait blocking.
594	* These flows are identified as destined to Loopback address and were disconnected shortly after connect
595	* (before initial-verdict received).
596	*/
597	#define IS_LOOPBACK_FADDR(inp) \
598	(inp && ((IS_INP_V6(inp) && IN6_IS_ADDR_LOOPBACK(&inp->in6p_faddr)) \|\| (ntohl(inp->inp_faddr.s_addr) == INADDR_LOOPBACK)))
599
600	#define SET_NO_CLOSE_WAIT(inp, cfil_info) \
601	if (inp && cfil_info && !(cfil_info->cfi_flags & CFIF_INITIAL_VERDICT) && IS_LOOPBACK_FADDR(inp)) { \
602	cfil_info->cfi_flags \|= CFIF_NO_CLOSE_WAIT; \
603	}
604
605	#define IS_NO_CLOSE_WAIT(cfil_info) (cfil_info && (cfil_info->cfi_flags & CFIF_NO_CLOSE_WAIT))
606
607	os_refgrp_decl(static, cfil_refgrp, "CFILRefGroup", NULL);
608
609	#define CFIL_INFO_FREE(cfil_info) \
610	if (cfil_info && (os_ref_release(&cfil_info->cfi_ref_count) == 0)) { \
611	cfil_info_free(cfil_info); \
612	}
613
614	#define SOCKET_PID(so) ((so->so_flags & SOF_DELEGATED) ? so->e_pid : so->last_pid)
615	#define MATCH_PID(so) (so && (cfil_log_pid == SOCKET_PID(so)))
616	#define MATCH_PORT(inp, local, remote) \
617	((inp && ntohs(inp->inp_lport) == cfil_log_port) \|\| (inp && ntohs(inp->inp_fport) == cfil_log_port) \|\| \
618	check_port(local, cfil_log_port) \|\| check_port(remote, cfil_log_port))
619	#define MATCH_PROTO(so) (GET_SO_PROTO(so) == cfil_log_proto)
620
621	#define DEBUG_FLOW(inp, so, local, remote) \
622	((cfil_log_port && MATCH_PORT(inp, local, remote)) \|\| (cfil_log_pid && MATCH_PID(so)) \|\| (cfil_log_proto && MATCH_PROTO(so)))
623
624	#define SO_DELAYED_DEAD_SET(so, set) \
625	if (so->so_cfil) { \
626	if (set) { \
627	so->so_cfil->cfi_flags \|= CFIF_SO_DELAYED_DEAD; \
628	} else { \
629	so->so_cfil->cfi_flags &= ~CFIF_SO_DELAYED_DEAD; \
630	} \
631	} else if (so->so_flow_db) { \
632	if (set) { \
633	so->so_flow_db->soflow_db_flags \|= SOFLOWF_SO_DELAYED_DEAD; \
634	} else { \
635	so->so_flow_db->soflow_db_flags &= ~SOFLOWF_SO_DELAYED_DEAD; \
636	} \
637	}
638
639	#define SO_DELAYED_DEAD_GET(so) \
640	(so->so_cfil ? (so->so_cfil->cfi_flags & CFIF_SO_DELAYED_DEAD) : \
641	(so->so_flow_db) ? (so->so_flow_db->soflow_db_flags & SOFLOWF_SO_DELAYED_DEAD) : false)
642
643	#define SO_DELAYED_TCP_TIME_WAIT_SET(so, set) \
644	if (so->so_cfil) { \
645	if (set) { \
646	so->so_cfil->cfi_flags \|= CFIF_SO_DELAYED_TCP_TIME_WAIT; \
647	} else { \
648	so->so_cfil->cfi_flags &= ~CFIF_SO_DELAYED_TCP_TIME_WAIT; \
649	} \
650	}
651
652	#define SO_DELAYED_TCP_TIME_WAIT_GET(so) \
653	(so->so_cfil ? (so->so_cfil->cfi_flags & CFIF_SO_DELAYED_TCP_TIME_WAIT) : false)
654
655	/*
656	* Periodic Statistics Report:
657	*/
658	static struct thread *cfil_stats_report_thread;
659	#define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC 500 // Highest report frequency
660	#define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
661	#define CFIL_STATS_REPORT_MAX_COUNT 50 // Max stats to be reported per run
662
663	/ This buffer must have same layout as struct cfil_msg_stats_report /
664	struct cfil_stats_report_buffer {
665	struct cfil_msg_hdr msghdr;
666	uint32_t count;
667	struct cfil_msg_sock_stats stats[CFIL_STATS_REPORT_MAX_COUNT];
668	};
669	static struct cfil_stats_report_buffer *global_cfil_stats_report_buffers[MAX_CONTENT_FILTER];
670	static uint32_t global_cfil_stats_counts[MAX_CONTENT_FILTER];
671
672	/*
673	* UDP Garbage Collection:
674	*/
675	#define UDP_FLOW_GC_ACTION_TO 10 // Flow Action Timeout (no action from user space) in seconds
676	#define UDP_FLOW_GC_MAX_COUNT 100 // Max UDP flows to be handled per run
677
678	/*
679	* UDP flow queue thresholds
680	*/
681	#define UDP_FLOW_GC_MBUF_CNT_MAX (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
682	#define UDP_FLOW_GC_MBUF_NUM_MAX (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
683	#define UDP_FLOW_GC_MBUF_SHIFT 5 // Shift to get 1/32 of platform limits
684	/*
685	* UDP flow queue threshold globals:
686	*/
687	static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
688	static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
689
690	/*
691	* CFIL specific mbuf tag:
692	* Save state of socket at the point of data entry into cfil.
693	* Use saved state for reinjection at protocol layer.
694	*/
695	struct cfil_tag {
696	union sockaddr_in_4_6 cfil_faddr;
697	uint32_t cfil_so_state_change_cnt;
698	uint32_t cfil_so_options;
699	int cfil_inp_flags;
700	};
701
702	/*
703	* Global behavior flags:
704	*/
705	#define CFIL_BEHAVIOR_FLAG_PRESERVE_CONNECTIONS 0x00000001
706	static uint32_t cfil_behavior_flags = `0`;
707
708	#define DO_PRESERVE_CONNECTIONS (cfil_behavior_flags & CFIL_BEHAVIOR_FLAG_PRESERVE_CONNECTIONS)
709
710	/*
711	* Statistics
712	*/
713
714	struct cfil_stats cfil_stats;
715
716	/*
717	* For troubleshooting
718	*/
719	int cfil_log_level = LOG_ERR;
720	int cfil_log_port = `0`;
721	int cfil_log_pid = `0`;
722	int cfil_log_proto = `0`;
723	int cfil_log_data = `0`;
724	int cfil_log_stats = `0`;
725	int cfil_debug = `1`;
726
727	/*
728	* Sysctls for logs and statistics
729	*/
730	static int sysctl_cfil_filter_list(struct sysctl_oid , void* , int*,
731	struct sysctl_req *);
732	static int sysctl_cfil_sock_list(struct sysctl_oid , void* , int*,
733	struct sysctl_req *);
734
735	SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW \| CTLFLAG_LOCKED, `0`, "cfil");
736
737	SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW \| CTLFLAG_LOCKED,
738	&cfil_log_level, `0`, "");
739
740	SYSCTL_INT(_net_cfil, OID_AUTO, log_port, CTLFLAG_RW \| CTLFLAG_LOCKED,
741	&cfil_log_port, `0`, "");
742
743	SYSCTL_INT(_net_cfil, OID_AUTO, log_pid, CTLFLAG_RW \| CTLFLAG_LOCKED,
744	&cfil_log_pid, `0`, "");
745
746	SYSCTL_INT(_net_cfil, OID_AUTO, log_proto, CTLFLAG_RW \| CTLFLAG_LOCKED,
747	&cfil_log_proto, `0`, "");
748
749	SYSCTL_INT(_net_cfil, OID_AUTO, log_data, CTLFLAG_RW \| CTLFLAG_LOCKED,
750	&cfil_log_data, `0`, "");
751
752	SYSCTL_INT(_net_cfil, OID_AUTO, log_stats, CTLFLAG_RW \| CTLFLAG_LOCKED,
753	&cfil_log_stats, `0`, "");
754
755	SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW \| CTLFLAG_LOCKED,
756	&cfil_debug, `0`, "");
757
758	SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD \| CTLFLAG_LOCKED,
759	&cfil_sock_attached_count, `0`, "");
760
761	SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD \| CTLFLAG_LOCKED,
762	&cfil_active_count, `0`, "");
763
764	SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW \| CTLFLAG_LOCKED,
765	&cfil_close_wait_timeout, `0`, "");
766
767	SYSCTL_UINT(_net_cfil, OID_AUTO, behavior_flags, CTLFLAG_RW \| CTLFLAG_LOCKED,
768	&cfil_behavior_flags, `0`, "");
769
770	static int cfil_sbtrim = `1`;
771	SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW \| CTLFLAG_LOCKED,
772	&cfil_sbtrim, `0`, "");
773
774	SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD \| CTLFLAG_LOCKED,
775	`0`, `0`, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
776
777	SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD \| CTLFLAG_LOCKED,
778	`0`, `0`, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
779
780	SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD \| CTLFLAG_LOCKED,
781	&cfil_stats, cfil_stats, "");
782
783	/*
784	* Forward declaration to appease the compiler
785	*/
786	static int cfil_action_data_pass(struct socket , struct* cfil_info , uint32_t, int*,
787	uint64_t, uint64_t);
788	static int cfil_action_drop(struct socket , struct* cfil_info *, uint32_t);
789	static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
790	static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr *);
791	static int cfil_dispatch_closed_event(struct socket , struct* cfil_info , int*);
792	static int cfil_data_common(struct socket , struct* cfil_info , int, struct* sockaddr *,
793	struct mbuf , struct* mbuf *, uint32_t);
794	static int cfil_data_filter(struct socket , struct* cfil_info , uint32_t, int*,
795	struct mbuf *, uint32_t);
796	static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
797	struct in_addr, u_int16_t);
798	static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
799	struct in6_addr *, u_int16_t, uint32_t);
800
801	static int cfil_dispatch_attach_event(struct socket , struct* cfil_info , uint32_t, int*);
802	static void cfil_info_free(struct cfil_info *);
803	static struct cfil_info * cfil_info_alloc(struct socket , struct* soflow_hash_entry *);
804	static int cfil_info_attach_unit(struct socket , uint32_t, struct* cfil_info *);
805	static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
806	static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
807	static int cfil_service_pending_queue(struct socket , struct* cfil_info , uint32_t, int*);
808	static int cfil_data_service_ctl_q(struct socket , struct* cfil_info , uint32_t, int*);
809	static void cfil_info_verify(struct cfil_info *);
810	static int cfil_update_data_offsets(struct socket , struct* cfil_info , uint32_t, int*,
811	uint64_t, uint64_t);
812	static int cfil_acquire_sockbuf(struct socket , struct* cfil_info , int*);
813	static void cfil_release_sockbuf(struct socket , int*);
814	static int cfil_filters_attached(struct socket *);
815
816	static void cfil_rw_lock_exclusive(lck_rw_t *);
817	static void cfil_rw_unlock_exclusive(lck_rw_t *);
818	static void cfil_rw_lock_shared(lck_rw_t *);
819	static void cfil_rw_unlock_shared(lck_rw_t *);
820	static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
821	static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
822
823	static unsigned int cfil_data_length(struct mbuf , int* , int* *);
824	static struct cfil_info cfil_sock_udp_get_info(struct* socket , uint32_t, bool, struct* soflow_hash_entry , struct* sockaddr , struct* sockaddr *);
825	static errno_t cfil_sock_udp_handle_data(bool, struct socket , struct* sockaddr , struct* sockaddr *,
826	struct mbuf , struct* mbuf , uint32_t, struct* soflow_hash_entry *);
827	static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
828	static void cfil_sock_udp_is_closed(struct socket *);
829	static int cfil_sock_udp_notify_shutdown(struct socket , int, int, int*);
830	static int cfil_sock_udp_shutdown(struct socket , int* *);
831	static void cfil_sock_udp_close_wait(struct socket *);
832	static void cfil_sock_udp_buf_update(struct sockbuf *);
833	static int cfil_filters_udp_attached(struct socket *, bool);
834	static void cfil_get_flow_address_v6(struct soflow_hash_entry , struct* inpcb *,
835	struct in6_addr , struct in6_addr ,
836	u_int16_t , u_int16_t );
837	static void cfil_get_flow_address(struct soflow_hash_entry , struct* inpcb *,
838	struct in_addr , struct* in_addr *,
839	u_int16_t , u_int16_t );
840	static void cfil_info_log(int, struct cfil_info , const* char *);
841	void cfil_filter_show(u_int32_t);
842	void cfil_info_show(void);
843	bool cfil_info_action_timed_out(struct cfil_info , int*);
844	bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
845	struct m_tag cfil_dgram_save_socket_state(struct* cfil_info , struct* mbuf *);
846	boolean_t cfil_dgram_peek_socket_state(struct mbuf m, int* *inp_flags);
847	static void cfil_sock_received_verdict(struct socket *so);
848	static void cfil_fill_event_msg_addresses(struct soflow_hash_entry , struct* inpcb *,
849	union sockaddr_in_4_6 , union* sockaddr_in_4_6 *,
850	boolean_t, boolean_t);
851	static void cfil_stats_report_thread_func(void *, wait_result_t);
852	static void cfil_stats_report(void *v, wait_result_t w);
853	static bool cfil_dgram_gc_needed(struct socket , struct* soflow_hash_entry *, u_int64_t);
854	static bool cfil_dgram_gc_perform(struct socket , struct* soflow_hash_entry *);
855	static bool cfil_dgram_detach_entry(struct socket , struct* soflow_hash_entry *);
856	static bool cfil_dgram_detach_db(struct socket , struct* soflow_db *);
857	bool check_port(struct sockaddr *, u_short);
858
859	/*
860	* Content filter global read write lock
861	*/
862
863	static void
864	cfil_rw_lock_exclusive(lck_rw_t *lck)
865	{
866	void *lr_saved;
867
868	lr_saved = __builtin_return_address(`0`);
869
870	lck_rw_lock_exclusive(lck);
871
872	cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
873	cfil_rw_nxt_lck = (cfil_rw_nxt_lck + `1`) % CFIL_RW_LCK_MAX;
874	}
875
876	static void
877	cfil_rw_unlock_exclusive(lck_rw_t *lck)
878	{
879	void *lr_saved;
880
881	lr_saved = __builtin_return_address(`0`);
882
883	lck_rw_unlock_exclusive(lck);
884
885	cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
886	cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + `1`) % CFIL_RW_LCK_MAX;
887	}
888
889	static void
890	cfil_rw_lock_shared(lck_rw_t *lck)
891	{
892	void *lr_saved;
893
894	lr_saved = __builtin_return_address(`0`);
895
896	lck_rw_lock_shared(lck);
897
898	cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
899	cfil_rw_nxt_lck = (cfil_rw_nxt_lck + `1`) % CFIL_RW_LCK_MAX;
900	}
901
902	static void
903	cfil_rw_unlock_shared(lck_rw_t *lck)
904	{
905	void *lr_saved;
906
907	lr_saved = __builtin_return_address(`0`);
908
909	lck_rw_unlock_shared(lck);
910
911	cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
912	cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + `1`) % CFIL_RW_LCK_MAX;
913	}
914
915	static boolean_t
916	cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
917	{
918	void *lr_saved;
919	boolean_t upgraded;
920
921	lr_saved = __builtin_return_address(`0`);
922
923	upgraded = lck_rw_lock_shared_to_exclusive(lck);
924	if (upgraded) {
925	cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
926	cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + `1`) % CFIL_RW_LCK_MAX;
927	}
928	return upgraded;
929	}
930
931	static void
932	cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
933	{
934	void *lr_saved;
935
936	lr_saved = __builtin_return_address(`0`);
937
938	lck_rw_lock_exclusive_to_shared(lck);
939
940	cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
941	cfil_rw_nxt_lck = (cfil_rw_nxt_lck + `1`) % CFIL_RW_LCK_MAX;
942	}
943
944	static void
945	cfil_rw_lock_assert_held(lck_rw_t lck, int* exclusive)
946	{
947	#if !MACH_ASSERT
948	#pragma unused(lck, exclusive)
949	#endif
950	LCK_RW_ASSERT(lck,
951	exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
952	}
953
954	/*
955	* Return the number of bytes in the mbuf chain using the same
956	* method as m_length() or sballoc()
957	*
958	* Returns data len - starting from PKT start
959	* - retmbcnt - optional param to get total mbuf bytes in chain
960	* - retmbnum - optional param to get number of mbufs in chain
961	*/
962	static unsigned int
963	cfil_data_length(struct mbuf m, int* retmbcnt, int* *retmbnum)
964	{
965	struct mbuf *m0;
966	unsigned int pktlen = `0`;
967	int mbcnt;
968	int mbnum;
969
970	// Locate M_PKTHDR and mark as start of data if present
971	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
972	if (m0->m_flags & M_PKTHDR) {
973	m = m0;
974	break;
975	}
976	}
977
978	if (retmbcnt == NULL && retmbnum == NULL) {
979	return m_length(m);
980	}
981
982	pktlen = `0`;
983	mbcnt = `0`;
984	mbnum = `0`;
985	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
986	pktlen += m0->m_len;
987	mbnum++;
988	mbcnt += _MSIZE;
989	if (m0->m_flags & M_EXT) {
990	mbcnt += m0->m_ext.ext_size;
991	}
992	}
993	if (retmbcnt) {
994	*retmbcnt = mbcnt;
995	}
996	if (retmbnum) {
997	*retmbnum = mbnum;
998	}
999	return pktlen;
1000	}
1001
1002	static struct mbuf *
1003	cfil_data_start(struct mbuf *m)
1004	{
1005	struct mbuf *m0;
1006
1007	// Locate M_PKTHDR and use it as start of data if present
1008	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
1009	if (m0->m_flags & M_PKTHDR) {
1010	return m0;
1011	}
1012	}
1013	return m;
1014	}
1015
1016	/*
1017	* Common mbuf queue utilities
1018	*/
1019
1020	static inline void
1021	cfil_queue_init(struct cfil_queue *cfq)
1022	{
1023	cfq->q_start = `0`;
1024	cfq->q_end = `0`;
1025	MBUFQ_INIT(&cfq->q_mq);
1026	}
1027
1028	static inline uint64_t
1029	cfil_queue_drain(struct cfil_queue *cfq)
1030	{
1031	uint64_t drained = cfq->q_start - cfq->q_end;
1032	cfq->q_start = `0`;
1033	cfq->q_end = `0`;
1034	MBUFQ_DRAIN(&cfq->q_mq);
1035
1036	return drained;
1037	}
1038
1039	/ Return 1 when empty, 0 otherwise /
1040	static inline int
1041	cfil_queue_empty(struct cfil_queue *cfq)
1042	{
1043	return MBUFQ_EMPTY(&cfq->q_mq);
1044	}
1045
1046	static inline uint64_t
1047	cfil_queue_offset_first(struct cfil_queue *cfq)
1048	{
1049	return cfq->q_start;
1050	}
1051
1052	static inline uint64_t
1053	cfil_queue_offset_last(struct cfil_queue *cfq)
1054	{
1055	return cfq->q_end;
1056	}
1057
1058	static inline uint64_t
1059	cfil_queue_len(struct cfil_queue *cfq)
1060	{
1061	return cfq->q_end - cfq->q_start;
1062	}
1063
1064	/*
1065	* Routines to verify some fundamental assumptions
1066	*/
1067
1068	static void
1069	cfil_queue_verify(struct cfil_queue *cfq)
1070	{
1071	mbuf_t chain;
1072	mbuf_t m;
1073	mbuf_t n;
1074	uint64_t queuesize = `0`;
1075
1076	/ Verify offset are ordered /
1077	VERIFY(cfq->q_start <= cfq->q_end);
1078
1079	/*
1080	* When queue is empty, the offsets are equal otherwise the offsets
1081	* are different
1082	*/
1083	VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) \|\|
1084	(!MBUFQ_EMPTY(&cfq->q_mq) &&
1085	cfq->q_start != cfq->q_end));
1086
1087	MBUFQ_FOREACH(chain, &cfq->q_mq) {
1088	size_t chainsize = `0`;
1089	m = chain;
1090	unsigned int mlen = cfil_data_length(m, NULL, NULL);
1091	// skip the addr and control stuff if present
1092	m = cfil_data_start(m);
1093
1094	if (m == NULL \|\|
1095	m == (void *)M_TAG_FREE_PATTERN \|\|
1096	m->m_next == (void *)M_TAG_FREE_PATTERN \|\|
1097	m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) {
1098	panic("%s - mq %p is free at %p", __func__,
1099	&cfq->q_mq, m);
1100	}
1101	for (n = m; n != NULL; n = n->m_next) {
1102	if (!m_has_mtype(m: n, mtype_flags: MTF_DATA \| MTF_HEADER \| MTF_OOBDATA)) {
1103	panic("%s - %p unsupported type %u", __func__,
1104	n, n->m_type);
1105	}
1106	chainsize += n->m_len;
1107	}
1108	if (mlen != chainsize) {
1109	panic("%s - %p m_length() %u != chainsize %lu",
1110	__func__, m, mlen, chainsize);
1111	}
1112	queuesize += chainsize;
1113	}
1114	OS_ANALYZER_SUPPRESS("81031590") if (queuesize != cfq->q_end - cfq->q_start) {
1115	panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
1116	m, queuesize, cfq->q_end - cfq->q_start);
1117	}
1118	}
1119
1120	static void
1121	cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
1122	{
1123	CFIL_QUEUE_VERIFY(cfq);
1124
1125	MBUFQ_ENQUEUE(&cfq->q_mq, m);
1126	cfq->q_end += len;
1127
1128	CFIL_QUEUE_VERIFY(cfq);
1129	}
1130
1131	static void
1132	cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1133	{
1134	CFIL_QUEUE_VERIFY(cfq);
1135
1136	VERIFY(cfil_data_length(m, NULL, NULL) == len);
1137
1138	MBUFQ_REMOVE(&cfq->q_mq, m);
1139	MBUFQ_NEXT(m) = NULL;
1140	cfq->q_start += len;
1141
1142	CFIL_QUEUE_VERIFY(cfq);
1143	}
1144
1145	static mbuf_t
1146	cfil_queue_first(struct cfil_queue *cfq)
1147	{
1148	return MBUFQ_FIRST(&cfq->q_mq);
1149	}
1150
1151	static mbuf_t
1152	cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1153	{
1154	#pragma unused(cfq)
1155	return MBUFQ_NEXT(m);
1156	}
1157
1158	static void
1159	cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1160	{
1161	CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1162	CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1163
1164	/ Verify the queues are ordered so that pending is before ctl /
1165	VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1166
1167	/ The peek offset cannot be less than the pass offset /
1168	VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1169
1170	/ Make sure we've updated the offset we peeked at /
1171	VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1172	}
1173
1174	static void
1175	cfil_entry_verify(struct cfil_entry *entry)
1176	{
1177	cfil_entry_buf_verify(cfe_buf: &entry->cfe_snd);
1178	cfil_entry_buf_verify(cfe_buf: &entry->cfe_rcv);
1179	}
1180
1181	static void
1182	cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1183	{
1184	CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1185
1186	VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1187	}
1188
1189	static void
1190	cfil_info_verify(struct cfil_info *cfil_info)
1191	{
1192	int i;
1193
1194	if (cfil_info == NULL) {
1195	return;
1196	}
1197
1198	cfil_info_buf_verify(cfi_buf: &cfil_info->cfi_snd);
1199	cfil_info_buf_verify(cfi_buf: &cfil_info->cfi_rcv);
1200
1201	for (i = `0`; i < MAX_CONTENT_FILTER; i++) {
1202	cfil_entry_verify(entry: &cfil_info->cfi_entries[i]);
1203	}
1204	}
1205
1206	static void
1207	verify_content_filter(struct content_filter *cfc)
1208	{
1209	struct cfil_entry *entry;
1210	uint32_t count = `0`;
1211
1212	VERIFY(cfc->cf_sock_count >= `0`);
1213
1214	TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1215	count++;
1216	VERIFY(cfc == entry->cfe_filter);
1217	}
1218	VERIFY(count == cfc->cf_sock_count);
1219	}
1220
1221	/*
1222	* Kernel control socket callbacks
1223	*/
1224	static errno_t
1225	cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1226	void **unitinfo)
1227	{
1228	errno_t error = `0`;
1229	struct content_filter *cfc = NULL;
1230
1231	CFIL_LOG(LOG_NOTICE, "");
1232
1233	cfc = zalloc_flags(content_filter_zone, Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
1234
1235	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
1236
1237	if (sac->sc_unit == `0` \|\| sac->sc_unit > MAX_CONTENT_FILTER) {
1238	CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1239	error = EINVAL;
1240	} else if (content_filters[sac->sc_unit - `1`] != NULL) {
1241	CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1242	error = EADDRINUSE;
1243	} else {
1244	/*
1245	* kernel control socket kcunit numbers start at 1
1246	*/
1247	content_filters[sac->sc_unit - `1`] = cfc;
1248
1249	cfc->cf_kcref = kctlref;
1250	cfc->cf_kcunit = sac->sc_unit;
1251	TAILQ_INIT(&cfc->cf_sock_entries);
1252
1253	*unitinfo = cfc;
1254	cfil_active_count++;
1255
1256	if (cfil_active_count == `1`) {
1257	soflow_feat_set_functions(cfil_dgram_gc_needed, cfil_dgram_gc_perform,
1258	cfil_dgram_detach_entry, cfil_dgram_detach_db);
1259	}
1260
1261	// Allocate periodic stats buffer for this filter
1262	if (global_cfil_stats_report_buffers[cfc->cf_kcunit - `1`] == NULL) {
1263	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
1264
1265	struct cfil_stats_report_buffer *buf;
1266
1267	buf = kalloc_type(struct cfil_stats_report_buffer,
1268	Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
1269
1270	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
1271
1272	/ Another thread may have won the race /
1273	if (global_cfil_stats_report_buffers[cfc->cf_kcunit - `1`] != NULL) {
1274	kfree_type(struct cfil_stats_report_buffer, buf);
1275	} else {
1276	global_cfil_stats_report_buffers[cfc->cf_kcunit - `1`] = buf;
1277	}
1278	}
1279	}
1280	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
1281
1282	if (error != `0` && cfc != NULL) {
1283	zfree(content_filter_zone, cfc);
1284	}
1285
1286	if (error == `0`) {
1287	OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1288	} else {
1289	OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1290	}
1291
1292	CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1293	error, cfil_active_count, sac->sc_unit);
1294
1295	return error;
1296	}
1297
1298	static void
1299	cfil_update_behavior_flags(void)
1300	{
1301	struct content_filter *cfc = NULL;
1302
1303	// Update global flag
1304	bool preserve_connections = false;
1305	for (int i = `0`; i < MAX_CONTENT_FILTER; i++) {
1306	cfc = content_filters[i];
1307	if (cfc != NULL) {
1308	if (cfc->cf_flags & CFF_PRESERVE_CONNECTIONS) {
1309	preserve_connections = true;
1310	} else {
1311	preserve_connections = false;
1312	break;
1313	}
1314	}
1315	}
1316	if (preserve_connections == true) {
1317	cfil_behavior_flags \|= CFIL_BEHAVIOR_FLAG_PRESERVE_CONNECTIONS;
1318	} else {
1319	cfil_behavior_flags &= ~CFIL_BEHAVIOR_FLAG_PRESERVE_CONNECTIONS;
1320	}
1321	CFIL_LOG(LOG_INFO, "CFIL Preserve Connections - %s",
1322	(cfil_behavior_flags & CFIL_BEHAVIOR_FLAG_PRESERVE_CONNECTIONS) ? "On" : "Off");
1323	}
1324
1325	static errno_t
1326	cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1327	{
1328	#pragma unused(kctlref)
1329	errno_t error = `0`;
1330	struct content_filter *cfc;
1331	struct cfil_entry *entry;
1332	uint64_t sock_flow_id = `0`;
1333
1334	CFIL_LOG(LOG_NOTICE, "");
1335
1336	if (kcunit > MAX_CONTENT_FILTER) {
1337	CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1338	kcunit, MAX_CONTENT_FILTER);
1339	error = EINVAL;
1340	goto done;
1341	}
1342
1343	cfc = (struct content_filter *)unitinfo;
1344	if (cfc == NULL) {
1345	goto done;
1346	}
1347
1348	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
1349	if (content_filters[kcunit - `1`] != cfc \|\| cfc->cf_kcunit != kcunit) {
1350	CFIL_LOG(LOG_ERR, "bad unit info %u)",
1351	kcunit);
1352	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
1353	goto done;
1354	}
1355	cfc->cf_flags \|= CFF_DETACHING;
1356	/*
1357	* Remove all sockets from the filter
1358	*/
1359	while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1360	cfil_rw_lock_assert_held(lck: &cfil_lck_rw, exclusive: `1`);
1361
1362	verify_content_filter(cfc);
1363	/*
1364	* Accept all outstanding data by pushing to next filter
1365	* or back to socket
1366	*
1367	* TBD: Actually we should make sure all data has been pushed
1368	* back to socket
1369	*/
1370	if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1371	struct cfil_info *cfil_info = entry->cfe_cfil_info;
1372	struct socket *so = cfil_info->cfi_so;
1373	sock_flow_id = cfil_info->cfi_sock_id;
1374
1375	/ Need to let data flow immediately /
1376	entry->cfe_flags \|= CFEF_SENT_SOCK_ATTACHED \|
1377	CFEF_DATA_START;
1378
1379	// Before we release global lock, retain the cfil_info -
1380	// We attempt to retain a valid cfil_info to prevent any deallocation until
1381	// we are done. Abort retain if cfil_info has already entered the free code path.
1382	if (cfil_info == NULL \|\| os_ref_retain_try(rc: &cfil_info->cfi_ref_count) == false) {
1383	// Failing to retain cfil_info means detach is in progress already,
1384	// remove entry from filter list and move on.
1385	entry->cfe_filter = NULL;
1386	entry->cfe_necp_control_unit = `0`;
1387	TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1388	cfc->cf_sock_count--;
1389	continue;
1390	}
1391
1392	/*
1393	* Respect locking hierarchy
1394	*/
1395	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
1396
1397	// Search for socket from cfil_info sock_flow_id and lock so
1398	so = cfil_socket_from_sock_id(sock_flow_id, false);
1399	if (so == NULL \|\| so != cfil_info->cfi_so) {
1400	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
1401
1402	// Socket has already been disconnected and removed from socket list.
1403	// Remove entry from filter list and move on.
1404	if (entry == TAILQ_FIRST(&cfc->cf_sock_entries)) {
1405	entry->cfe_filter = NULL;
1406	entry->cfe_necp_control_unit = `0`;
1407	TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1408	cfc->cf_sock_count--;
1409	}
1410
1411	goto release_cfil_info;
1412	}
1413
1414	/*
1415	* When cfe_filter is NULL the filter is detached
1416	* and the entry has been removed from cf_sock_entries
1417	*/
1418	if ((so->so_cfil == NULL && so->so_flow_db == NULL) \|\| entry->cfe_filter == NULL) {
1419	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
1420	goto release;
1421	}
1422
1423	(void) cfil_action_data_pass(so, cfil_info, kcunit, `1`,
1424	CFM_MAX_OFFSET,
1425	CFM_MAX_OFFSET);
1426
1427	(void) cfil_action_data_pass(so, cfil_info, kcunit, `0`,
1428	CFM_MAX_OFFSET,
1429	CFM_MAX_OFFSET);
1430
1431	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
1432
1433	/*
1434	* Check again to make sure if the cfil_info is still valid
1435	* as the socket may have been unlocked when when calling
1436	* cfil_acquire_sockbuf()
1437	*/
1438	if (entry->cfe_filter == NULL \|\|
1439	(so->so_cfil == NULL && soflow_db_get_feature_context(so->so_flow_db, sock_flow_id) == NULL)) {
1440	goto release;
1441	}
1442
1443	/ The filter is now detached /
1444	entry->cfe_flags \|= CFEF_CFIL_DETACHED;
1445
1446	if (cfil_info->cfi_debug) {
1447	cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER DISCONNECTED");
1448	}
1449
1450	CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1451	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1452	if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1453	cfil_filters_attached(so) == `0`) {
1454	CFIL_LOG(LOG_NOTICE, "so %llx waking",
1455	(uint64_t)VM_KERNEL_ADDRPERM(so));
1456	wakeup(chan: (caddr_t)cfil_info);
1457	}
1458
1459	/*
1460	* Remove the filter entry from the content filter
1461	* but leave the rest of the state intact as the queues
1462	* may not be empty yet
1463	*/
1464	entry->cfe_filter = NULL;
1465	entry->cfe_necp_control_unit = `0`;
1466
1467	TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1468	cfc->cf_sock_count--;
1469
1470	// This is the last filter disconnecting, clear the cfil_info
1471	// saved control unit so we will be able to drop this flow if
1472	// a new filter get installed.
1473	if (cfil_active_count == `1`) {
1474	cfil_info->cfi_filter_control_unit = `0`;
1475	}
1476	release:
1477	socket_unlock(so, refcount: `1`);
1478
1479	release_cfil_info:
1480	/*
1481	* Release reference on cfil_info. To avoid double locking,
1482	* temporarily unlock in case it has been detached and we
1483	* end up freeing it which will take the global lock again.
1484	*/
1485	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
1486	CFIL_INFO_FREE(cfil_info);
1487	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
1488	}
1489	}
1490	verify_content_filter(cfc);
1491
1492	/ Free the stats buffer for this filter /
1493	if (global_cfil_stats_report_buffers[cfc->cf_kcunit - `1`] != NULL) {
1494	kfree_type(struct cfil_stats_report_buffer,
1495	global_cfil_stats_report_buffers[cfc->cf_kcunit - `1`]);
1496	global_cfil_stats_report_buffers[cfc->cf_kcunit - `1`] = NULL;
1497	}
1498	VERIFY(cfc->cf_sock_count == `0`);
1499
1500	/*
1501	* Make filter inactive
1502	*/
1503	content_filters[kcunit - `1`] = NULL;
1504	cfil_active_count--;
1505	cfil_update_behavior_flags();
1506	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
1507
1508	if (cfc->cf_crypto_state != NULL) {
1509	cfil_crypto_cleanup_state(state: cfc->cf_crypto_state);
1510	cfc->cf_crypto_state = NULL;
1511	}
1512
1513	zfree(content_filter_zone, cfc);
1514	done:
1515	if (error == `0`) {
1516	OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1517	} else {
1518	OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1519	}
1520
1521	CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1522	error, cfil_active_count, kcunit);
1523
1524	return error;
1525	}
1526
1527	/*
1528	* cfil_acquire_sockbuf()
1529	*
1530	* Prevent any other thread from acquiring the sockbuf
1531	* We use sb_cfil_thread as a semaphore to prevent other threads from
1532	* messing with the sockbuf -- see sblock()
1533	* Note: We do not set SB_LOCK here because the thread may check or modify
1534	* SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1535	* sblock(), sbunlock() or sodefunct()
1536	*/
1537	static int
1538	cfil_acquire_sockbuf(struct socket so, struct* cfil_info cfil_info, int* outgoing)
1539	{
1540	thread_t tp = current_thread();
1541	struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1542	lck_mtx_t *mutex_held;
1543	int error = `0`;
1544
1545	/*
1546	* Wait until no thread is holding the sockbuf and other content
1547	* filter threads have released the sockbuf
1548	*/
1549	while ((sb->sb_flags & SB_LOCK) \|\|
1550	(sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1551	if (so->so_proto->pr_getlock != NULL) {
1552	mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1553	} else {
1554	mutex_held = so->so_proto->pr_domain->dom_mtx;
1555	}
1556
1557	LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1558
1559	sb->sb_wantlock++;
1560	VERIFY(sb->sb_wantlock != `0`);
1561
1562	msleep(chan: &sb->sb_flags, mtx: mutex_held, PSOCK, wmesg: "cfil_acquire_sockbuf",
1563	NULL);
1564
1565	VERIFY(sb->sb_wantlock != `0`);
1566	sb->sb_wantlock--;
1567	}
1568	/*
1569	* Use reference count for repetitive calls on same thread
1570	*/
1571	if (sb->sb_cfil_refs == `0`) {
1572	VERIFY(sb->sb_cfil_thread == NULL);
1573	VERIFY((sb->sb_flags & SB_LOCK) == `0`);
1574
1575	sb->sb_cfil_thread = tp;
1576	sb->sb_flags \|= SB_LOCK;
1577	}
1578	sb->sb_cfil_refs++;
1579
1580	/ We acquire the socket buffer when we need to cleanup /
1581	if (cfil_info == NULL) {
1582	CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1583	(uint64_t)VM_KERNEL_ADDRPERM(so));
1584	error = `0`;
1585	} else if (cfil_info->cfi_flags & CFIF_DROP) {
1586	CFIL_LOG(LOG_ERR, "so %llx drop set",
1587	(uint64_t)VM_KERNEL_ADDRPERM(so));
1588	error = EPIPE;
1589	}
1590
1591	return error;
1592	}
1593
1594	static void
1595	cfil_release_sockbuf(struct socket so, int* outgoing)
1596	{
1597	struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1598	thread_t tp = current_thread();
1599
1600	socket_lock_assert_owned(so);
1601
1602	if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) {
1603	panic("%s sb_cfil_thread %p not current %p", __func__,
1604	sb->sb_cfil_thread, tp);
1605	}
1606	/*
1607	* Don't panic if we are defunct because SB_LOCK has
1608	* been cleared by sodefunct()
1609	*/
1610	if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
1611	panic("%s SB_LOCK not set on %p", __func__,
1612	sb);
1613	}
1614	/*
1615	* We can unlock when the thread unwinds to the last reference
1616	*/
1617	sb->sb_cfil_refs--;
1618	if (sb->sb_cfil_refs == `0`) {
1619	sb->sb_cfil_thread = NULL;
1620	sb->sb_flags &= ~SB_LOCK;
1621
1622	if (sb->sb_wantlock > `0`) {
1623	wakeup(chan: &sb->sb_flags);
1624	}
1625	}
1626	}
1627
1628	cfil_sock_id_t
1629	cfil_sock_id_from_socket(struct socket *so)
1630	{
1631	if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) {
1632	return so->so_cfil->cfi_sock_id;
1633	} else {
1634	return CFIL_SOCK_ID_NONE;
1635	}
1636	}
1637
1638	/*
1639	* cfil_socket_safe_lock -
1640	* This routine attempts to lock the socket safely.
1641	*
1642	* The passed in pcbinfo is assumed to be locked and must be unlocked once the
1643	* inp state is safeguarded and before we attempt to lock/unlock the socket.
1644	* This is to prevent getting blocked by socket_lock() while holding the pcbinfo
1645	* lock, avoiding potential deadlock with other processes contending for the same
1646	* resources. This is also to avoid double locking the pcbinfo for rip sockets
1647	* since rip_unlock() will lock ripcbinfo if it needs to dispose inpcb when
1648	* so_usecount is 0.
1649	*/
1650	static bool
1651	cfil_socket_safe_lock(struct inpcb inp, struct* inpcbinfo *pcbinfo)
1652	{
1653	struct socket *so = NULL;
1654
1655	VERIFY(pcbinfo != NULL);
1656
1657	if (in_pcb_checkstate(inp, WNT_ACQUIRE, `0`) != WNT_STOPUSING) {
1658	// Safeguarded the inp state, unlock pcbinfo before locking socket.
1659	lck_rw_done(lck: &pcbinfo->ipi_lock);
1660
1661	so = inp->inp_socket;
1662	socket_lock(so, refcount: `1`);
1663	if (in_pcb_checkstate(inp, WNT_RELEASE, `1`) != WNT_STOPUSING) {
1664	return true;
1665	}
1666	} else {
1667	// Failed to safeguarded the inp state, unlock pcbinfo and abort.
1668	lck_rw_done(lck: &pcbinfo->ipi_lock);
1669	}
1670
1671	if (so) {
1672	socket_unlock(so, refcount: `1`);
1673	}
1674	return false;
1675	}
1676
1677	static struct socket *
1678	cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1679	{
1680	struct socket *so = NULL;
1681	u_int64_t gencnt = cfil_sock_id >> `32`;
1682	u_int32_t flowhash = (u_int32_t)(cfil_sock_id & `0x0ffffffff`);
1683	struct inpcb *inp = NULL;
1684	struct inpcbinfo *pcbinfo = NULL;
1685
1686	if (udp_only) {
1687	goto find_udp;
1688	}
1689
1690	pcbinfo = &tcbinfo;
1691	lck_rw_lock_shared(lck: &pcbinfo->ipi_lock);
1692	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1693	if (inp->inp_state != INPCB_STATE_DEAD &&
1694	inp->inp_socket != NULL &&
1695	inp->inp_flowhash == flowhash &&
1696	(inp->inp_socket->so_gencnt & `0x0ffffffff`) == gencnt &&
1697	inp->inp_socket->so_cfil != NULL) {
1698	if (cfil_socket_safe_lock(inp, pcbinfo)) {
1699	so = inp->inp_socket;
1700	}
1701	/ pcbinfo is already unlocked, we are done. /
1702	goto done;
1703	}
1704	}
1705	lck_rw_done(lck: &pcbinfo->ipi_lock);
1706	if (so != NULL) {
1707	goto done;
1708	}
1709
1710	find_udp:
1711
1712	pcbinfo = &udbinfo;
1713	lck_rw_lock_shared(lck: &pcbinfo->ipi_lock);
1714	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1715	if (inp->inp_state != INPCB_STATE_DEAD &&
1716	inp->inp_socket != NULL &&
1717	inp->inp_socket->so_flow_db != NULL &&
1718	(inp->inp_socket->so_gencnt & `0x0ffffffff`) == gencnt) {
1719	if (cfil_socket_safe_lock(inp, pcbinfo)) {
1720	so = inp->inp_socket;
1721	}
1722	/ pcbinfo is already unlocked, we are done. /
1723	goto done;
1724	}
1725	}
1726	lck_rw_done(lck: &pcbinfo->ipi_lock);
1727	if (so != NULL) {
1728	goto done;
1729	}
1730
1731	pcbinfo = &ripcbinfo;
1732	lck_rw_lock_shared(lck: &pcbinfo->ipi_lock);
1733	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1734	if (inp->inp_state != INPCB_STATE_DEAD &&
1735	inp->inp_socket != NULL &&
1736	inp->inp_socket->so_flow_db != NULL &&
1737	(inp->inp_socket->so_gencnt & `0x0ffffffff`) == gencnt) {
1738	if (cfil_socket_safe_lock(inp, pcbinfo)) {
1739	so = inp->inp_socket;
1740	}
1741	/ pcbinfo is already unlocked, we are done. /
1742	goto done;
1743	}
1744	}
1745	lck_rw_done(lck: &pcbinfo->ipi_lock);
1746
1747	done:
1748	if (so == NULL) {
1749	OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1750	CFIL_LOG(LOG_DEBUG,
1751	"no socket for sock_id %llx gencnt %llx flowhash %x",
1752	cfil_sock_id, gencnt, flowhash);
1753	}
1754
1755	return so;
1756	}
1757
1758	static struct socket *
1759	cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1760	{
1761	struct socket *so = NULL;
1762	struct inpcb *inp = NULL;
1763	struct inpcbinfo *pcbinfo = &tcbinfo;
1764
1765	lck_rw_lock_shared(lck: &pcbinfo->ipi_lock);
1766	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1767	if (inp->inp_state != INPCB_STATE_DEAD &&
1768	inp->inp_socket != NULL &&
1769	uuid_compare(uu1: inp->necp_client_uuid, uu2: necp_client_uuid) == `0`) {
1770	*cfil_attached = (inp->inp_socket->so_cfil != NULL);
1771	if (cfil_socket_safe_lock(inp, pcbinfo)) {
1772	so = inp->inp_socket;
1773	}
1774	/ pcbinfo is already unlocked, we are done. /
1775	goto done;
1776	}
1777	}
1778	lck_rw_done(lck: &pcbinfo->ipi_lock);
1779	if (so != NULL) {
1780	goto done;
1781	}
1782
1783	pcbinfo = &udbinfo;
1784	lck_rw_lock_shared(lck: &pcbinfo->ipi_lock);
1785	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1786	if (inp->inp_state != INPCB_STATE_DEAD &&
1787	inp->inp_socket != NULL &&
1788	uuid_compare(uu1: inp->necp_client_uuid, uu2: necp_client_uuid) == `0`) {
1789	*cfil_attached = (inp->inp_socket->so_flow_db != NULL);
1790	if (cfil_socket_safe_lock(inp, pcbinfo)) {
1791	so = inp->inp_socket;
1792	}
1793	/ pcbinfo is already unlocked, we are done. /
1794	goto done;
1795	}
1796	}
1797	lck_rw_done(lck: &pcbinfo->ipi_lock);
1798
1799	done:
1800	return so;
1801	}
1802
1803	static void
1804	cfil_info_stats_toggle(struct cfil_info cfil_info, struct* cfil_entry *entry, uint32_t report_frequency)
1805	{
1806	struct cfil_info *cfil = NULL;
1807	Boolean found = FALSE;
1808	int kcunit;
1809
1810	if (cfil_info == NULL) {
1811	return;
1812	}
1813
1814	if (report_frequency) {
1815	if (entry == NULL) {
1816	return;
1817	}
1818
1819	// Update stats reporting frequency.
1820	if (entry->cfe_stats_report_frequency != report_frequency) {
1821	entry->cfe_stats_report_frequency = report_frequency;
1822	if (entry->cfe_stats_report_frequency < CFIL_STATS_REPORT_INTERVAL_MIN_MSEC) {
1823	entry->cfe_stats_report_frequency = CFIL_STATS_REPORT_INTERVAL_MIN_MSEC;
1824	}
1825	microuptime(tv: &entry->cfe_stats_report_ts);
1826
1827	// Insert cfil_info into list only if it is not in yet.
1828	TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1829	if (cfil == cfil_info) {
1830	return;
1831	}
1832	}
1833
1834	TAILQ_INSERT_TAIL(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1835
1836	// Wake up stats thread if this is first flow added
1837	if (cfil_sock_attached_stats_count == `0`) {
1838	thread_wakeup((caddr_t)&cfil_sock_attached_stats_count);
1839	}
1840	cfil_sock_attached_stats_count++;
1841
1842	if (cfil_info->cfi_debug && cfil_log_stats) {
1843	CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu <%llx>> stats frequency %d msecs",
1844	cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : `0`,
1845	cfil_info->cfi_sock_id, cfil_info->cfi_sock_id,
1846	entry->cfe_stats_report_frequency);
1847	}
1848	}
1849	} else {
1850	// Turn off stats reporting for this filter.
1851	if (entry != NULL) {
1852	// Already off, no change.
1853	if (entry->cfe_stats_report_frequency == `0`) {
1854	return;
1855	}
1856
1857	entry->cfe_stats_report_frequency = `0`;
1858	// If cfil_info still has filter(s) asking for stats, no need to remove from list.
1859	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
1860	if (cfil_info->cfi_entries[kcunit - `1`].cfe_stats_report_frequency > `0`) {
1861	return;
1862	}
1863	}
1864	}
1865
1866	// No more filter asking for stats for this cfil_info, remove from list.
1867	if (!TAILQ_EMPTY(&cfil_sock_head_stats)) {
1868	found = FALSE;
1869	TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1870	if (cfil == cfil_info) {
1871	found = TRUE;
1872	break;
1873	}
1874	}
1875	if (found) {
1876	cfil_sock_attached_stats_count--;
1877	TAILQ_REMOVE(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1878	if (cfil_info->cfi_debug && cfil_log_stats) {
1879	CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu <%llx>> stats frequency reset",
1880	cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : `0`,
1881	cfil_info->cfi_sock_id, cfil_info->cfi_sock_id);
1882	}
1883	}
1884	}
1885	}
1886	}
1887
1888	static errno_t
1889	cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1890	int flags)
1891	{
1892	#pragma unused(kctlref, flags)
1893	errno_t error = `0`;
1894	struct cfil_msg_hdr *msghdr;
1895	struct content_filter cfc = (struct* content_filter *)unitinfo;
1896	struct socket *so;
1897	struct cfil_msg_action *action_msg;
1898	struct cfil_entry *entry;
1899	struct cfil_info *cfil_info = NULL;
1900	unsigned int data_len = `0`;
1901
1902	CFIL_LOG(LOG_INFO, "");
1903
1904	if (cfc == NULL) {
1905	CFIL_LOG(LOG_ERR, "no unitinfo");
1906	error = EINVAL;
1907	goto done;
1908	}
1909
1910	if (kcunit > MAX_CONTENT_FILTER) {
1911	CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1912	kcunit, MAX_CONTENT_FILTER);
1913	error = EINVAL;
1914	goto done;
1915	}
1916	if (m == NULL) {
1917	CFIL_LOG(LOG_ERR, "null mbuf");
1918	error = EINVAL;
1919	goto done;
1920	}
1921	data_len = m_length(m);
1922
1923	if (data_len < sizeof(struct cfil_msg_hdr)) {
1924	CFIL_LOG(LOG_ERR, "too short %u", data_len);
1925	error = EINVAL;
1926	goto done;
1927	}
1928	msghdr = (struct cfil_msg_hdr *)mbuf_data(mbuf: m);
1929	if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1930	CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1931	error = EINVAL;
1932	goto done;
1933	}
1934	if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1935	CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1936	error = EINVAL;
1937	goto done;
1938	}
1939	if (msghdr->cfm_len > data_len) {
1940	CFIL_LOG(LOG_ERR, "bad length %u", msghdr->cfm_len);
1941	error = EINVAL;
1942	goto done;
1943	}
1944
1945	/ Validate action operation /
1946	switch (msghdr->cfm_op) {
1947	case CFM_OP_DATA_UPDATE:
1948	OSIncrementAtomic(
1949	&cfil_stats.cfs_ctl_action_data_update);
1950	break;
1951	case CFM_OP_DROP:
1952	OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1953	break;
1954	case CFM_OP_BLESS_CLIENT:
1955	if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1956	OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1957	error = EINVAL;
1958	CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1959	msghdr->cfm_len,
1960	msghdr->cfm_op);
1961	goto done;
1962	}
1963	error = cfil_action_bless_client(kcunit, msghdr);
1964	goto done;
1965	case CFM_OP_SET_CRYPTO_KEY:
1966	if (msghdr->cfm_len != sizeof(struct cfil_msg_set_crypto_key)) {
1967	OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1968	error = EINVAL;
1969	CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1970	msghdr->cfm_len,
1971	msghdr->cfm_op);
1972	goto done;
1973	}
1974	error = cfil_action_set_crypto_key(kcunit, msghdr);
1975	goto done;
1976	default:
1977	OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1978	CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1979	error = EINVAL;
1980	goto done;
1981	}
1982	if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1983	OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1984	error = EINVAL;
1985	CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1986	msghdr->cfm_len,
1987	msghdr->cfm_op);
1988	goto done;
1989	}
1990	cfil_rw_lock_shared(lck: &cfil_lck_rw);
1991	if (cfc != (void *)content_filters[kcunit - `1`]) {
1992	CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1993	kcunit);
1994	error = EINVAL;
1995	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
1996	goto done;
1997	}
1998	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
1999
2000	// Search for socket (TCP+UDP and lock so)
2001	so = cfil_socket_from_sock_id(cfil_sock_id: msghdr->cfm_sock_id, false);
2002	if (so == NULL) {
2003	CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
2004	msghdr->cfm_sock_id);
2005	error = EINVAL;
2006	goto done;
2007	}
2008
2009	cfil_info = so->so_flow_db != NULL ?
2010	soflow_db_get_feature_context(so->so_flow_db, msghdr->cfm_sock_id) : so->so_cfil;
2011
2012	// We should not obtain global lock here in order to avoid deadlock down the path.
2013	// But we attempt to retain a valid cfil_info to prevent any deallocation until
2014	// we are done. Abort retain if cfil_info has already entered the free code path.
2015	if (cfil_info && os_ref_retain_try(rc: &cfil_info->cfi_ref_count) == false) {
2016	socket_unlock(so, refcount: `1`);
2017	goto done;
2018	}
2019
2020	if (cfil_info == NULL) {
2021	CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
2022	(uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
2023	error = EINVAL;
2024	goto unlock;
2025	} else if (cfil_info->cfi_flags & CFIF_DROP) {
2026	CFIL_LOG(LOG_NOTICE, "so %llx drop set",
2027	(uint64_t)VM_KERNEL_ADDRPERM(so));
2028	error = EINVAL;
2029	goto unlock;
2030	}
2031
2032	if (cfil_info->cfi_debug) {
2033	cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED MSG FROM FILTER");
2034	}
2035
2036	entry = &cfil_info->cfi_entries[kcunit - `1`];
2037	if (entry->cfe_filter == NULL) {
2038	CFIL_LOG(LOG_NOTICE, "so %llx no filter",
2039	(uint64_t)VM_KERNEL_ADDRPERM(so));
2040	error = EINVAL;
2041	goto unlock;
2042	}
2043
2044	if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) {
2045	entry->cfe_flags \|= CFEF_DATA_START;
2046	} else {
2047	CFIL_LOG(LOG_ERR,
2048	"so %llx attached not sent for %u",
2049	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
2050	error = EINVAL;
2051	goto unlock;
2052	}
2053
2054	microuptime(tv: &entry->cfe_last_action);
2055	CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
2056
2057	action_msg = (struct cfil_msg_action *)msghdr;
2058
2059	switch (msghdr->cfm_op) {
2060	case CFM_OP_DATA_UPDATE:
2061
2062	if (cfil_info->cfi_debug) {
2063	cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DATA_UPDATE");
2064	CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu <%llx>> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2065	(uint64_t)VM_KERNEL_ADDRPERM(so),
2066	cfil_info->cfi_sock_id, cfil_info->cfi_sock_id,
2067	action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2068	action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2069	}
2070
2071	/*
2072	* Received verdict, at this point we know this
2073	* socket connection is allowed. Unblock thread
2074	* immediately before proceeding to process the verdict.
2075	*/
2076	cfil_sock_received_verdict(so);
2077
2078	if (action_msg->cfa_out_peek_offset != `0` \|\|
2079	action_msg->cfa_out_pass_offset != `0`) {
2080	error = cfil_action_data_pass(so, cfil_info, kcunit, `1`,
2081	action_msg->cfa_out_pass_offset,
2082	action_msg->cfa_out_peek_offset);
2083	}
2084	if (error == EJUSTRETURN) {
2085	error = `0`;
2086	}
2087	if (error != `0`) {
2088	break;
2089	}
2090	if (action_msg->cfa_in_peek_offset != `0` \|\|
2091	action_msg->cfa_in_pass_offset != `0`) {
2092	error = cfil_action_data_pass(so, cfil_info, kcunit, `0`,
2093	action_msg->cfa_in_pass_offset,
2094	action_msg->cfa_in_peek_offset);
2095	}
2096	if (error == EJUSTRETURN) {
2097	error = `0`;
2098	}
2099
2100	// Toggle stats reporting according to received verdict.
2101	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
2102	cfil_info_stats_toggle(cfil_info, entry, report_frequency: action_msg->cfa_stats_frequency);
2103	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
2104
2105	break;
2106
2107	case CFM_OP_DROP:
2108	if (cfil_info->cfi_debug) {
2109	cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DROP");
2110	CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu <%llx>> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2111	(uint64_t)VM_KERNEL_ADDRPERM(so),
2112	cfil_info->cfi_sock_id, cfil_info->cfi_sock_id,
2113	action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2114	action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2115	}
2116
2117	error = cfil_action_drop(so, cfil_info, kcunit);
2118	cfil_sock_received_verdict(so);
2119	break;
2120
2121	default:
2122	error = EINVAL;
2123	break;
2124	}
2125	unlock:
2126	CFIL_INFO_FREE(cfil_info)
2127	socket_unlock(so, refcount: `1`);
2128	done:
2129	mbuf_freem(mbuf: m);
2130
2131	if (error == `0`) {
2132	OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
2133	} else {
2134	OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
2135	}
2136
2137	return error;
2138	}
2139
2140	static errno_t
2141	cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2142	int opt, void data, size_t len)
2143	{
2144	#pragma unused(kctlref, opt)
2145	struct cfil_info *cfil_info = NULL;
2146	errno_t error = `0`;
2147	struct content_filter cfc = (struct* content_filter *)unitinfo;
2148
2149	CFIL_LOG(LOG_NOTICE, "");
2150
2151	if (cfc == NULL) {
2152	CFIL_LOG(LOG_ERR, "no unitinfo");
2153	return EINVAL;
2154	}
2155
2156	cfil_rw_lock_shared(lck: &cfil_lck_rw);
2157
2158	if (kcunit > MAX_CONTENT_FILTER) {
2159	CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2160	kcunit, MAX_CONTENT_FILTER);
2161	error = EINVAL;
2162	goto done;
2163	}
2164	if (cfc != (void *)content_filters[kcunit - `1`]) {
2165	CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2166	kcunit);
2167	error = EINVAL;
2168	goto done;
2169	}
2170	switch (opt) {
2171	case CFIL_OPT_NECP_CONTROL_UNIT:
2172	if (len < sizeof*(uint32_t)) {
2173	CFIL_LOG(LOG_ERR, "len too small %lu", *len);
2174	error = EINVAL;
2175	goto done;
2176	}
2177	if (data != NULL) {
2178	(uint32_t )data = cfc->cf_necp_control_unit;
2179	}
2180	break;
2181	case CFIL_OPT_PRESERVE_CONNECTIONS:
2182	if (len < sizeof*(uint32_t)) {
2183	CFIL_LOG(LOG_ERR, "CFIL_OPT_PRESERVE_CONNECTIONS len too small %lu", *len);
2184	error = EINVAL;
2185	goto done;
2186	}
2187	if (data != NULL) {
2188	(uint32_t )data = (cfc->cf_flags & CFF_PRESERVE_CONNECTIONS) ? true : false;
2189	}
2190	break;
2191	case CFIL_OPT_GET_SOCKET_INFO:
2192	if (len != sizeof(struct* cfil_opt_sock_info)) {
2193	CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
2194	error = EINVAL;
2195	goto done;
2196	}
2197	if (data == NULL) {
2198	CFIL_LOG(LOG_ERR, "data not passed");
2199	error = EINVAL;
2200	goto done;
2201	}
2202
2203	struct cfil_opt_sock_info *sock_info =
2204	(struct cfil_opt_sock_info *) data;
2205
2206	// Unlock here so that we never hold both cfil_lck_rw and the
2207	// socket_lock at the same time. Otherwise, this can deadlock
2208	// because soclose() takes the socket_lock and then exclusive
2209	// cfil_lck_rw and we require the opposite order.
2210
2211	// WARNING: Be sure to never use anything protected
2212	// by cfil_lck_rw beyond this point.
2213	// WARNING: Be sure to avoid fallthrough and
2214	// goto return_already_unlocked from this branch.
2215	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
2216
2217	// Search (TCP+UDP) and lock socket
2218	struct socket *sock =
2219	cfil_socket_from_sock_id(cfil_sock_id: sock_info->cfs_sock_id, false);
2220	if (sock == NULL) {
2221	CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
2222	sock_info->cfs_sock_id);
2223	error = ENOENT;
2224	goto return_already_unlocked;
2225	}
2226
2227	cfil_info = (sock->so_flow_db != NULL) ?
2228	soflow_db_get_feature_context(sock->so_flow_db, sock_info->cfs_sock_id) : sock->so_cfil;
2229
2230	if (cfil_info == NULL) {
2231	CFIL_LOG(LOG_INFO, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
2232	(uint64_t)VM_KERNEL_ADDRPERM(sock));
2233	error = EINVAL;
2234	socket_unlock(so: sock, refcount: `1`);
2235	goto return_already_unlocked;
2236	}
2237
2238	if (sock->so_proto == NULL \|\| sock->so_proto->pr_domain == NULL) {
2239	CFIL_LOG(LOG_INFO, "CFIL: GET_SOCKET_INFO failed: so %llx NULL so_proto / pr_domain",
2240	(uint64_t)VM_KERNEL_ADDRPERM(sock));
2241	error = EINVAL;
2242	socket_unlock(so: sock, refcount: `1`);
2243	goto return_already_unlocked;
2244	}
2245
2246	// Fill out family, type, and protocol
2247	sock_info->cfs_sock_family = SOCK_DOM(sock);
2248	sock_info->cfs_sock_type = SOCK_TYPE(sock);
2249	sock_info->cfs_sock_protocol = GET_SO_PROTO(sock);
2250
2251	// Source and destination addresses
2252	struct inpcb *inp = sotoinpcb(sock);
2253	if (inp->inp_vflag & INP_IPV6) {
2254	struct in6_addr laddr = NULL, faddr = NULL;
2255	u_int16_t lport = `0`, fport = `0`;
2256
2257	cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
2258	&laddr, &faddr, &lport, &fport);
2259	fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport, inp->inp_lifscope);
2260	fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport, inp->inp_fifscope);
2261	} else if (inp->inp_vflag & INP_IPV4) {
2262	struct in_addr laddr = {.s_addr = `0`}, faddr = {.s_addr = `0`};
2263	u_int16_t lport = `0`, fport = `0`;
2264
2265	cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
2266	&laddr, &faddr, &lport, &fport);
2267	fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2268	fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2269	}
2270
2271	// Set the pid info
2272	sock_info->cfs_pid = sock->last_pid;
2273	memcpy(dst: sock_info->cfs_uuid, src: sock->last_uuid, n: sizeof(uuid_t));
2274
2275	if (sock->so_flags & SOF_DELEGATED) {
2276	sock_info->cfs_e_pid = sock->e_pid;
2277	memcpy(dst: sock_info->cfs_e_uuid, src: sock->e_uuid, n: sizeof(uuid_t));
2278	} else {
2279	sock_info->cfs_e_pid = sock->last_pid;
2280	memcpy(dst: sock_info->cfs_e_uuid, src: sock->last_uuid, n: sizeof(uuid_t));
2281	}
2282
2283	socket_unlock(so: sock, refcount: `1`);
2284
2285	goto return_already_unlocked;
2286	default:
2287	error = ENOPROTOOPT;
2288	break;
2289	}
2290	done:
2291	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
2292
2293	return error;
2294
2295	return_already_unlocked:
2296
2297	return error;
2298	}
2299
2300	static errno_t
2301	cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2302	int opt, void *data, size_t len)
2303	{
2304	#pragma unused(kctlref, opt)
2305	errno_t error = `0`;
2306	struct content_filter cfc = (struct* content_filter *)unitinfo;
2307
2308	CFIL_LOG(LOG_NOTICE, "");
2309
2310	if (cfc == NULL) {
2311	CFIL_LOG(LOG_ERR, "no unitinfo");
2312	return EINVAL;
2313	}
2314
2315	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
2316
2317	if (kcunit > MAX_CONTENT_FILTER) {
2318	CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2319	kcunit, MAX_CONTENT_FILTER);
2320	error = EINVAL;
2321	goto done;
2322	}
2323	if (cfc != (void *)content_filters[kcunit - `1`]) {
2324	CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2325	kcunit);
2326	error = EINVAL;
2327	goto done;
2328	}
2329	switch (opt) {
2330	case CFIL_OPT_NECP_CONTROL_UNIT:
2331	if (len < sizeof(uint32_t)) {
2332	CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2333	"len too small %lu", len);
2334	error = EINVAL;
2335	goto done;
2336	}
2337	if (cfc->cf_necp_control_unit != `0`) {
2338	CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2339	"already set %u",
2340	cfc->cf_necp_control_unit);
2341	error = EINVAL;
2342	goto done;
2343	}
2344	cfc->cf_necp_control_unit = (uint32_t )data;
2345	break;
2346	case CFIL_OPT_PRESERVE_CONNECTIONS:
2347	if (len < sizeof(uint32_t)) {
2348	CFIL_LOG(LOG_ERR, "CFIL_OPT_PRESERVE_CONNECTIONS "
2349	"len too small %lu", len);
2350	error = EINVAL;
2351	goto done;
2352	}
2353	uint32_t preserve_connections = ((uint32_t )data);
2354	CFIL_LOG(LOG_INFO, "CFIL_OPT_PRESERVE_CONNECTIONS got %d (kcunit %d)", preserve_connections, kcunit);
2355	if (preserve_connections) {
2356	cfc->cf_flags \|= CFF_PRESERVE_CONNECTIONS;
2357	} else {
2358	cfc->cf_flags &= ~CFF_PRESERVE_CONNECTIONS;
2359	}
2360
2361	cfil_update_behavior_flags();
2362	break;
2363	default:
2364	error = ENOPROTOOPT;
2365	break;
2366	}
2367	done:
2368	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
2369
2370	return error;
2371	}
2372
2373
2374	static void
2375	cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void unitinfo, int* flags)
2376	{
2377	#pragma unused(kctlref, flags)
2378	struct content_filter cfc = (struct* content_filter *)unitinfo;
2379	struct socket *so = NULL;
2380	int error;
2381	struct cfil_entry *entry;
2382	struct cfil_info *cfil_info = NULL;
2383
2384	CFIL_LOG(LOG_INFO, "");
2385
2386	if (cfc == NULL) {
2387	CFIL_LOG(LOG_ERR, "no unitinfo");
2388	OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2389	return;
2390	}
2391
2392	if (kcunit > MAX_CONTENT_FILTER) {
2393	CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2394	kcunit, MAX_CONTENT_FILTER);
2395	OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2396	return;
2397	}
2398	cfil_rw_lock_shared(lck: &cfil_lck_rw);
2399	if (cfc != (void *)content_filters[kcunit - `1`]) {
2400	CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2401	kcunit);
2402	OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2403	goto done;
2404	}
2405	/ Let's assume the flow control is lifted /
2406	if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2407	if (!cfil_rw_lock_shared_to_exclusive(lck: &cfil_lck_rw)) {
2408	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
2409	}
2410
2411	cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
2412
2413	cfil_rw_lock_exclusive_to_shared(lck: &cfil_lck_rw);
2414	LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
2415	}
2416	/*
2417	* Flow control will be raised again as soon as an entry cannot enqueue
2418	* to the kernel control socket
2419	*/
2420	while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == `0`) {
2421	verify_content_filter(cfc);
2422
2423	cfil_rw_lock_assert_held(lck: &cfil_lck_rw, exclusive: `0`);
2424
2425	/ Find an entry that is flow controlled /
2426	TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
2427	if (entry->cfe_cfil_info == NULL \|\|
2428	entry->cfe_cfil_info->cfi_so == NULL) {
2429	continue;
2430	}
2431	if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == `0`) {
2432	continue;
2433	}
2434	}
2435	if (entry == NULL) {
2436	break;
2437	}
2438
2439	OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
2440
2441	cfil_info = entry->cfe_cfil_info;
2442	so = cfil_info->cfi_so;
2443
2444	if (cfil_info == NULL \|\| os_ref_retain_try(rc: &cfil_info->cfi_ref_count) == false) {
2445	break;
2446	}
2447
2448	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
2449	socket_lock(so, refcount: `1`);
2450
2451	do {
2452	error = cfil_acquire_sockbuf(so, cfil_info, outgoing: `1`);
2453	if (error == `0`) {
2454	error = cfil_data_service_ctl_q(so, cfil_info, kcunit, `1`);
2455	}
2456	cfil_release_sockbuf(so, outgoing: `1`);
2457	if (error != `0`) {
2458	break;
2459	}
2460
2461	error = cfil_acquire_sockbuf(so, cfil_info, outgoing: `0`);
2462	if (error == `0`) {
2463	error = cfil_data_service_ctl_q(so, cfil_info, kcunit, `0`);
2464	}
2465	cfil_release_sockbuf(so, outgoing: `0`);
2466	} while (`0`);
2467
2468	CFIL_INFO_FREE(cfil_info);
2469	socket_lock_assert_owned(so);
2470	socket_unlock(so, refcount: `1`);
2471
2472	cfil_rw_lock_shared(lck: &cfil_lck_rw);
2473	}
2474	done:
2475	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
2476	}
2477
2478	struct cflil_tag_container {
2479	struct m_tag cfil_m_tag;
2480	struct cfil_tag cfil_tag;
2481	};
2482
2483	static struct m_tag *
2484	m_tag_kalloc_cfil_udp(u_int32_t id, u_int16_t type, uint16_t len, int wait)
2485	{
2486	struct cflil_tag_container *tag_container;
2487	struct m_tag *tag = NULL;
2488
2489	assert3u(id, ==, KERNEL_MODULE_TAG_ID);
2490	assert3u(type, ==, KERNEL_TAG_TYPE_CFIL_UDP);
2491	assert3u(len, ==, sizeof(struct cfil_tag));
2492
2493	if (len != sizeof(struct cfil_tag)) {
2494	return NULL;
2495	}
2496
2497	tag_container = kalloc_type(struct cflil_tag_container, wait \| M_ZERO);
2498	if (tag_container != NULL) {
2499	tag = &tag_container->cfil_m_tag;
2500
2501	assert3p(tag, ==, tag_container);
2502
2503	M_TAG_INIT(tag, id, type, len, &tag_container->cfil_tag, NULL);
2504	}
2505
2506	return tag;
2507	}
2508
2509	static void
2510	m_tag_kfree_cfil_udp(struct m_tag *tag)
2511	{
2512	struct cflil_tag_container tag_container = (struct* cflil_tag_container *)tag;
2513
2514	kfree_type(struct cflil_tag_container, tag_container);
2515	}
2516
2517	void
2518	cfil_register_m_tag(void)
2519	{
2520	errno_t error = `0`;
2521
2522	error = m_register_internal_tag_type(type: KERNEL_TAG_TYPE_CFIL_UDP, len: sizeof(struct cfil_tag),
2523	alloc_func: m_tag_kalloc_cfil_udp, free_func: m_tag_kfree_cfil_udp);
2524
2525	assert3u(error, ==, `0`);
2526	}
2527
2528	void
2529	cfil_init(void)
2530	{
2531	struct kern_ctl_reg kern_ctl;
2532	errno_t error = `0`;
2533	unsigned int mbuf_limit = `0`;
2534
2535	CFIL_LOG(LOG_NOTICE, "");
2536
2537	/*
2538	* Compile time verifications
2539	*/
2540	_CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2541	_CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == `0`);
2542	_CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == `0`);
2543	_CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == `0`);
2544
2545	/*
2546	* Runtime time verifications
2547	*/
2548	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2549	sizeof(uint32_t)));
2550	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2551	sizeof(uint32_t)));
2552	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2553	sizeof(uint32_t)));
2554	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2555	sizeof(uint32_t)));
2556
2557	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2558	sizeof(uint32_t)));
2559	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2560	sizeof(uint32_t)));
2561
2562	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2563	sizeof(uint32_t)));
2564	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2565	sizeof(uint32_t)));
2566	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2567	sizeof(uint32_t)));
2568	VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2569	sizeof(uint32_t)));
2570
2571	/*
2572	* Allocate locks
2573	*/
2574	TAILQ_INIT(&cfil_sock_head);
2575	TAILQ_INIT(&cfil_sock_head_stats);
2576
2577	/*
2578	* Register kernel control
2579	*/
2580	bzero(s: &kern_ctl, n: sizeof(kern_ctl));
2581	strlcpy(dst: kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2582	n: sizeof(kern_ctl.ctl_name));
2583	kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED \| CTL_FLAG_REG_EXTENDED;
2584	kern_ctl.ctl_sendsize = `512` * `1024`; / enough? /
2585	kern_ctl.ctl_recvsize = `512` * `1024`; / enough? /
2586	kern_ctl.ctl_connect = cfil_ctl_connect;
2587	kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2588	kern_ctl.ctl_send = cfil_ctl_send;
2589	kern_ctl.ctl_getopt = cfil_ctl_getopt;
2590	kern_ctl.ctl_setopt = cfil_ctl_setopt;
2591	kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2592	error = ctl_register(userkctl: &kern_ctl, kctlref: &cfil_kctlref);
2593	if (error != `0`) {
2594	CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2595	return;
2596	}
2597
2598	// Spawn thread for statistics reporting
2599	if (kernel_thread_start(continuation: cfil_stats_report_thread_func, NULL,
2600	new_thread: &cfil_stats_report_thread) != KERN_SUCCESS) {
2601	panic_plain("%s: Can't create statistics report thread", __func__);
2602	/ NOTREACHED /
2603	}
2604	/ this must not fail /
2605	VERIFY(cfil_stats_report_thread != NULL);
2606
2607	// Set UDP per-flow mbuf thresholds to 1/32 of platform max
2608	mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2609	cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2610	cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2611
2612	memset(s: &global_cfil_stats_report_buffers, c: `0`, n: sizeof(global_cfil_stats_report_buffers));
2613	}
2614
2615	struct cfil_info *
2616	cfil_info_alloc(struct socket so, struct* soflow_hash_entry *hash_entry)
2617	{
2618	int kcunit;
2619	struct cfil_info *cfil_info = NULL;
2620	struct inpcb *inp = sotoinpcb(so);
2621
2622	CFIL_LOG(LOG_INFO, "");
2623
2624	socket_lock_assert_owned(so);
2625
2626	cfil_info = zalloc_flags(cfil_info_zone, Z_WAITOK \| Z_ZERO \| Z_NOFAIL);
2627	os_ref_init(&cfil_info->cfi_ref_count, &cfil_refgrp);
2628
2629	cfil_queue_init(cfq: &cfil_info->cfi_snd.cfi_inject_q);
2630	cfil_queue_init(cfq: &cfil_info->cfi_rcv.cfi_inject_q);
2631
2632	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2633	struct cfil_entry *entry;
2634
2635	entry = &cfil_info->cfi_entries[kcunit - `1`];
2636	entry->cfe_cfil_info = cfil_info;
2637
2638	/ Initialize the filter entry /
2639	entry->cfe_filter = NULL;
2640	entry->cfe_flags = `0`;
2641	entry->cfe_necp_control_unit = `0`;
2642	entry->cfe_snd.cfe_pass_offset = `0`;
2643	entry->cfe_snd.cfe_peek_offset = `0`;
2644	entry->cfe_snd.cfe_peeked = `0`;
2645	entry->cfe_rcv.cfe_pass_offset = `0`;
2646	entry->cfe_rcv.cfe_peek_offset = `0`;
2647	entry->cfe_rcv.cfe_peeked = `0`;
2648	/*
2649	* Timestamp the last action to avoid pre-maturely
2650	* triggering garbage collection
2651	*/
2652	microuptime(tv: &entry->cfe_last_action);
2653
2654	cfil_queue_init(cfq: &entry->cfe_snd.cfe_pending_q);
2655	cfil_queue_init(cfq: &entry->cfe_rcv.cfe_pending_q);
2656	cfil_queue_init(cfq: &entry->cfe_snd.cfe_ctl_q);
2657	cfil_queue_init(cfq: &entry->cfe_rcv.cfe_ctl_q);
2658	}
2659
2660	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
2661
2662	/*
2663	* Create a cfi_sock_id that's not the socket pointer!
2664	*/
2665
2666	if (hash_entry == NULL) {
2667	// This is the TCP case, cfil_info is tracked per socket
2668	if (inp->inp_flowhash == `0`) {
2669	inp_calc_flowhash(inp);
2670	ASSERT(inp->inp_flowhash != `0`);
2671	}
2672
2673	so->so_cfil = cfil_info;
2674	cfil_info->cfi_so = so;
2675	cfil_info->cfi_sock_id =
2676	((so->so_gencnt << `32`) \| inp->inp_flowhash);
2677	} else {
2678	// This is the UDP case, cfil_info is tracked in per-socket hash
2679	cfil_info->cfi_so = so;
2680	cfil_info->cfi_hash_entry = hash_entry;
2681	cfil_info->cfi_sock_id = ((so->so_gencnt << `32`) \| (hash_entry->soflow_flowhash & `0xffffffff`));
2682	}
2683
2684	TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2685	SLIST_INIT(&cfil_info->cfi_ordered_entries);
2686
2687	cfil_sock_attached_count++;
2688
2689	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
2690
2691	if (cfil_info != NULL) {
2692	OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2693	} else {
2694	OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2695	}
2696
2697	return cfil_info;
2698	}
2699
2700	int
2701	cfil_info_attach_unit(struct socket so, uint32_t filter_control_unit, struct* cfil_info *cfil_info)
2702	{
2703	int kcunit;
2704	int attached = `0`;
2705
2706	CFIL_LOG(LOG_INFO, "");
2707
2708	socket_lock_assert_owned(so);
2709
2710	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
2711
2712	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2713	struct content_filter *cfc = content_filters[kcunit - `1`];
2714	struct cfil_entry *entry;
2715	struct cfil_entry *iter_entry;
2716	struct cfil_entry *iter_prev;
2717
2718	if (cfc == NULL) {
2719	continue;
2720	}
2721	if (!(cfc->cf_necp_control_unit & filter_control_unit)) {
2722	continue;
2723	}
2724
2725	entry = &cfil_info->cfi_entries[kcunit - `1`];
2726
2727	entry->cfe_filter = cfc;
2728	entry->cfe_necp_control_unit = cfc->cf_necp_control_unit;
2729	TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2730	cfc->cf_sock_count++;
2731
2732	/ Insert the entry into the list ordered by control unit /
2733	iter_prev = NULL;
2734	SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
2735	if (entry->cfe_necp_control_unit < iter_entry->cfe_necp_control_unit) {
2736	break;
2737	}
2738	iter_prev = iter_entry;
2739	}
2740
2741	if (iter_prev == NULL) {
2742	SLIST_INSERT_HEAD(&cfil_info->cfi_ordered_entries, entry, cfe_order_link);
2743	} else {
2744	SLIST_INSERT_AFTER(iter_prev, entry, cfe_order_link);
2745	}
2746
2747	verify_content_filter(cfc);
2748	attached = `1`;
2749	entry->cfe_flags \|= CFEF_CFIL_ATTACHED;
2750	}
2751
2752	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
2753
2754	return attached;
2755	}
2756
2757	static void
2758	cfil_info_free(struct cfil_info *cfil_info)
2759	{
2760	int kcunit;
2761	uint64_t in_drain = `0`;
2762	uint64_t out_drained = `0`;
2763
2764	if (cfil_info == NULL) {
2765	return;
2766	}
2767
2768	CFIL_LOG(LOG_INFO, "");
2769
2770	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
2771
2772	if (cfil_info->cfi_debug) {
2773	cfil_info_log(LOG_ERR, cfil_info, "CFIL: FREEING CFIL_INFO");
2774	}
2775
2776	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2777	struct cfil_entry *entry;
2778	struct content_filter *cfc;
2779
2780	entry = &cfil_info->cfi_entries[kcunit - `1`];
2781
2782	/ Don't be silly and try to detach twice /
2783	if (entry->cfe_filter == NULL) {
2784	continue;
2785	}
2786
2787	cfc = content_filters[kcunit - `1`];
2788
2789	VERIFY(cfc == entry->cfe_filter);
2790
2791	entry->cfe_filter = NULL;
2792	entry->cfe_necp_control_unit = `0`;
2793	TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2794	cfc->cf_sock_count--;
2795
2796	verify_content_filter(cfc);
2797	}
2798
2799	cfil_sock_attached_count--;
2800	TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2801
2802	// Turn off stats reporting for cfil_info.
2803	cfil_info_stats_toggle(cfil_info, NULL, report_frequency: `0`);
2804
2805	out_drained += cfil_queue_drain(cfq: &cfil_info->cfi_snd.cfi_inject_q);
2806	in_drain += cfil_queue_drain(cfq: &cfil_info->cfi_rcv.cfi_inject_q);
2807
2808	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2809	struct cfil_entry *entry;
2810
2811	entry = &cfil_info->cfi_entries[kcunit - `1`];
2812	out_drained += cfil_queue_drain(cfq: &entry->cfe_snd.cfe_pending_q);
2813	in_drain += cfil_queue_drain(cfq: &entry->cfe_rcv.cfe_pending_q);
2814	out_drained += cfil_queue_drain(cfq: &entry->cfe_snd.cfe_ctl_q);
2815	in_drain += cfil_queue_drain(cfq: &entry->cfe_rcv.cfe_ctl_q);
2816	}
2817	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
2818
2819	if (out_drained) {
2820	OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2821	}
2822	if (in_drain) {
2823	OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2824	}
2825
2826	zfree(cfil_info_zone, cfil_info);
2827	}
2828
2829	/*
2830	* Received a verdict from userspace for a socket.
2831	* Perform any delayed operation if needed.
2832	*/
2833	static void
2834	cfil_sock_received_verdict(struct socket *so)
2835	{
2836	if (so == NULL \|\| so->so_cfil == NULL) {
2837	return;
2838	}
2839
2840	so->so_cfil->cfi_flags \|= CFIF_INITIAL_VERDICT;
2841
2842	/*
2843	* If socket has already been connected, trigger
2844	* soisconnected now.
2845	*/
2846	if (so->so_cfil->cfi_flags & CFIF_SOCKET_CONNECTED) {
2847	so->so_cfil->cfi_flags &= ~CFIF_SOCKET_CONNECTED;
2848	soisconnected(so);
2849	return;
2850	}
2851	}
2852
2853	/*
2854	* Entry point from Sockets layer
2855	* The socket is locked.
2856	*
2857	* Checks if a connected socket is subject to filter and
2858	* pending the initial verdict.
2859	*/
2860	boolean_t
2861	cfil_sock_connected_pending_verdict(struct socket *so)
2862	{
2863	if (so == NULL \|\| so->so_cfil == NULL) {
2864	return false;
2865	}
2866
2867	if (so->so_cfil->cfi_flags & CFIF_INITIAL_VERDICT) {
2868	return false;
2869	} else {
2870	/*
2871	* Remember that this protocol is already connected, so
2872	* we will trigger soisconnected() upon receipt of
2873	* initial verdict later.
2874	*/
2875	so->so_cfil->cfi_flags \|= CFIF_SOCKET_CONNECTED;
2876	return true;
2877	}
2878	}
2879
2880	/*
2881	* Entry point from Flow Divert
2882	* The socket is locked.
2883	*
2884	* Mark socket as DEAD if all CFIL data has been processed by filter(s).
2885	* Otherwise, delay the marking until all data has been processed.
2886	*/
2887	boolean_t
2888	cfil_sock_is_dead(struct socket *so)
2889	{
2890	struct inpcb *inp = NULL;
2891
2892	if (so == NULL) {
2893	return false;
2894	}
2895
2896	socket_lock_assert_owned(so);
2897
2898	if ((so->so_flags & SOF_CONTENT_FILTER) != `0`) {
2899	int32_t pending_snd = cfil_sock_data_pending(sb: &so->so_snd);
2900	int32_t pending_rcv = cfil_sock_data_pending(sb: &so->so_rcv);
2901	if (pending_snd \|\| pending_rcv) {
2902	SO_DELAYED_DEAD_SET(so, true)
2903	return false;
2904	}
2905	}
2906
2907	inp = sotoinpcb(so);
2908	if (inp != NULL) {
2909	inp->inp_state = INPCB_STATE_DEAD;
2910	inpcb_gc_sched(inp->inp_pcbinfo, type: INPCB_TIMER_FAST);
2911	SO_DELAYED_DEAD_SET(so, false)
2912	return true;
2913	}
2914	return false;
2915	}
2916
2917	/*
2918	* Entry point from tcp_timer.c
2919	* The socket is locked.
2920	*
2921	* Perform TCP FIN time wait handling if all CFIL data has been processed by filter(s).
2922	* Otherwise, delay until all data has been processed.
2923	*/
2924	boolean_t
2925	cfil_sock_tcp_add_time_wait(struct socket *so)
2926	{
2927	struct inpcb *inp = NULL;
2928	struct tcpcb *tp = NULL;
2929
2930	// Only handle TCP sockets
2931	if (so == NULL \|\| !IS_TCP(so)) {
2932	return false;
2933	}
2934
2935	socket_lock_assert_owned(so);
2936
2937	if ((so->so_flags & SOF_CONTENT_FILTER) != `0`) {
2938	int32_t pending_snd = cfil_sock_data_pending(sb: &so->so_snd);
2939	int32_t pending_rcv = cfil_sock_data_pending(sb: &so->so_rcv);
2940	if (pending_snd \|\| pending_rcv) {
2941	SO_DELAYED_TCP_TIME_WAIT_SET(so, true)
2942	return false;
2943	}
2944	}
2945
2946	inp = sotoinpcb(so);
2947	tp = inp ? intotcpcb(inp) : NULL;
2948	if (tp != NULL) {
2949	add_to_time_wait_now(tp, delay: `2` * tcp_msl);
2950	SO_DELAYED_TCP_TIME_WAIT_SET(so, false)
2951	return true;
2952	}
2953	return false;
2954	}
2955
2956	boolean_t
2957	cfil_filter_present(void)
2958	{
2959	return cfil_active_count > `0`;
2960	}
2961
2962	/*
2963	* Entry point from Sockets layer
2964	* The socket is locked.
2965	*/
2966	errno_t
2967	cfil_sock_attach(struct socket so, struct* sockaddr local, struct* sockaddr remote, int* dir)
2968	{
2969	errno_t error = `0`;
2970	uint32_t filter_control_unit;
2971	int debug = `0`;
2972
2973	socket_lock_assert_owned(so);
2974
2975	if (so->so_flags1 & SOF1_FLOW_DIVERT_SKIP) {
2976	/*
2977	* This socket has already been evaluated (and ultimately skipped) by
2978	* flow divert, so it has also already been through content filter if there
2979	* is one.
2980	*/
2981	goto done;
2982	}
2983
2984	/ Limit ourselves to TCP that are not MPTCP subflows /
2985	if (SKIP_FILTER_FOR_TCP_SOCKET(so)) {
2986	goto done;
2987	}
2988
2989	debug = DEBUG_FLOW(sotoinpcb(so), so, local, remote);
2990	if (debug) {
2991	CFIL_LOG(LOG_INFO, "CFIL: TCP (dir %d) - debug flow with port %d", dir, cfil_log_port);
2992	}
2993
2994	filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2995	if (filter_control_unit == `0`) {
2996	goto done;
2997	}
2998
2999	if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
3000	goto done;
3001	}
3002	if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != `0`) {
3003	OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
3004	goto done;
3005	}
3006	if (cfil_active_count == `0`) {
3007	OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
3008	goto done;
3009	}
3010	if (so->so_cfil != NULL) {
3011	OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
3012	CFIL_LOG(LOG_ERR, "already attached");
3013	goto done;
3014	} else {
3015	cfil_info_alloc(so, NULL);
3016	if (so->so_cfil == NULL) {
3017	error = ENOMEM;
3018	OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
3019	goto done;
3020	}
3021	so->so_cfil->cfi_dir = dir;
3022	so->so_cfil->cfi_filter_control_unit = filter_control_unit;
3023	so->so_cfil->cfi_debug = debug;
3024	}
3025	if (cfil_info_attach_unit(so, filter_control_unit, cfil_info: so->so_cfil) == `0`) {
3026	CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
3027	filter_control_unit);
3028	OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
3029	goto done;
3030	}
3031	CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llu <%llx>",
3032	(uint64_t)VM_KERNEL_ADDRPERM(so),
3033	filter_control_unit, so->so_cfil->cfi_sock_id, so->so_cfil->cfi_sock_id);
3034
3035	so->so_flags \|= SOF_CONTENT_FILTER;
3036	OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
3037
3038	/ Hold a reference on the socket /
3039	so->so_usecount++;
3040
3041	/*
3042	* Save passed addresses for attach event msg (in case resend
3043	* is needed.
3044	*/
3045	if (remote != NULL && (remote->sa_len <= sizeof(union sockaddr_in_4_6))) {
3046	memcpy(dst: &so->so_cfil->cfi_so_attach_faddr, src: remote, n: remote->sa_len);
3047	}
3048	if (local != NULL && (local->sa_len <= sizeof(union sockaddr_in_4_6))) {
3049	memcpy(dst: &so->so_cfil->cfi_so_attach_laddr, src: local, n: local->sa_len);
3050	}
3051
3052	error = cfil_dispatch_attach_event(so, so->so_cfil, `0`, dir);
3053	/ We can recover from flow control or out of memory errors /
3054	if (error == ENOBUFS \|\| error == ENOMEM) {
3055	error = `0`;
3056	} else if (error != `0`) {
3057	goto done;
3058	}
3059
3060	CFIL_INFO_VERIFY(so->so_cfil);
3061	done:
3062	return error;
3063	}
3064
3065	/*
3066	* Entry point from Sockets layer
3067	* The socket is locked.
3068	*/
3069	errno_t
3070	cfil_sock_detach(struct socket *so)
3071	{
3072	if (NEED_DGRAM_FLOW_TRACKING(so)) {
3073	return `0`;
3074	}
3075
3076	if (so->so_cfil) {
3077	if (so->so_flags & SOF_CONTENT_FILTER) {
3078	so->so_flags &= ~SOF_CONTENT_FILTER;
3079	VERIFY(so->so_usecount > `0`);
3080	so->so_usecount--;
3081	}
3082	CFIL_INFO_FREE(so->so_cfil);
3083	so->so_cfil = NULL;
3084	OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
3085	}
3086	return `0`;
3087	}
3088
3089	/*
3090	* Fill in the address info of an event message from either
3091	* the socket or passed in address info.
3092	*/
3093	static void
3094	cfil_fill_event_msg_addresses(struct soflow_hash_entry entry, struct* inpcb *inp,
3095	union sockaddr_in_4_6 sin_src, union* sockaddr_in_4_6 *sin_dst,
3096	boolean_t isIPv4, boolean_t outgoing)
3097	{
3098	if (isIPv4) {
3099	struct in_addr laddr = {`0`}, faddr = {`0`};
3100	u_int16_t lport = `0`, fport = `0`;
3101
3102	cfil_get_flow_address(entry, inp, &laddr, &faddr, &lport, &fport);
3103
3104	if (outgoing) {
3105	fill_ip_sockaddr_4_6(sin_src, laddr, lport);
3106	fill_ip_sockaddr_4_6(sin_dst, faddr, fport);
3107	} else {
3108	fill_ip_sockaddr_4_6(sin_src, faddr, fport);
3109	fill_ip_sockaddr_4_6(sin_dst, laddr, lport);
3110	}
3111	} else {
3112	struct in6_addr laddr = NULL, faddr = NULL;
3113	u_int16_t lport = `0`, fport = `0`;
3114	const u_int32_t lifscope = inp ? inp->inp_lifscope : IFSCOPE_UNKNOWN;
3115	const u_int32_t fifscope = inp ? inp->inp_fifscope : IFSCOPE_UNKNOWN;
3116
3117	cfil_get_flow_address_v6(entry, inp, &laddr, &faddr, &lport, &fport);
3118	if (outgoing) {
3119	fill_ip6_sockaddr_4_6(sin_src, laddr, lport, lifscope);
3120	fill_ip6_sockaddr_4_6(sin_dst, faddr, fport, fifscope);
3121	} else {
3122	fill_ip6_sockaddr_4_6(sin_src, faddr, fport, fifscope);
3123	fill_ip6_sockaddr_4_6(sin_dst, laddr, lport, lifscope);
3124	}
3125	}
3126	}
3127
3128	static boolean_t
3129	cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,
3130	struct cfil_info *cfil_info,
3131	struct cfil_msg_sock_attached *msg)
3132	{
3133	struct cfil_crypto_data data = {};
3134	struct iovec extra_data[`1`] = { { NULL, `0` } };
3135
3136	if (crypto_state == NULL \|\| msg == NULL \|\| cfil_info == NULL) {
3137	return false;
3138	}
3139
3140	data.sock_id = msg->cfs_msghdr.cfm_sock_id;
3141	data.direction = msg->cfs_conn_dir;
3142
3143	data.pid = msg->cfs_pid;
3144	data.effective_pid = msg->cfs_e_pid;
3145	uuid_copy(dst: data.uuid, src: msg->cfs_uuid);
3146	uuid_copy(dst: data.effective_uuid, src: msg->cfs_e_uuid);
3147	data.socketProtocol = msg->cfs_sock_protocol;
3148	if (data.direction == CFS_CONNECTION_DIR_OUT) {
3149	data.remote.sin6 = msg->cfs_dst.sin6;
3150	data.local.sin6 = msg->cfs_src.sin6;
3151	} else {
3152	data.remote.sin6 = msg->cfs_src.sin6;
3153	data.local.sin6 = msg->cfs_dst.sin6;
3154	}
3155
3156	if (strlen(s: msg->cfs_remote_domain_name) > `0`) {
3157	extra_data[`0`].iov_base = msg->cfs_remote_domain_name;
3158	extra_data[`0`].iov_len = strlen(s: msg->cfs_remote_domain_name);
3159	}
3160
3161	// At attach, if local address is already present, no need to re-sign subsequent data messages.
3162	if (!NULLADDRESS(data.local)) {
3163	cfil_info->cfi_isSignatureLatest = true;
3164	}
3165
3166	msg->cfs_signature_length = sizeof(cfil_crypto_signature);
3167	if (cfil_crypto_sign_data(state: crypto_state, data: &data, extra_data, extra_data_count: sizeof(extra_data) / sizeof(extra_data[`0`]), signature: msg->cfs_signature, signature_length: &msg->cfs_signature_length) != `0`) {
3168	msg->cfs_signature_length = `0`;
3169	CFIL_LOG(LOG_ERR, "CFIL: Failed to sign attached msg <sockID %llu <%llx>>",
3170	msg->cfs_msghdr.cfm_sock_id, msg->cfs_msghdr.cfm_sock_id);
3171	return false;
3172	}
3173
3174	return true;
3175	}
3176
3177	struct cfil_sign_parameters {
3178	cfil_crypto_state_t csp_state;
3179	struct cfil_crypto_data *csp_data;
3180	uint8_t *csp_signature;
3181	uint32_t *csp_signature_size;
3182	};
3183
3184	static void
3185	cfil_sign_with_domain_name(char domain_name, void* *ctx)
3186	{
3187	struct cfil_sign_parameters parameters = (struct* cfil_sign_parameters *)ctx;
3188	struct iovec extra_data[`1`] = { { NULL, `0` } };
3189
3190	if (parameters == NULL) {
3191	return;
3192	}
3193
3194	if (domain_name != NULL) {
3195	extra_data[`0`].iov_base = domain_name;
3196	extra_data[`0`].iov_len = strlen(s: domain_name);
3197	}
3198
3199	(parameters->csp_signature_size) = sizeof*(cfil_crypto_signature);
3200	if (cfil_crypto_sign_data(state: parameters->csp_state, data: parameters->csp_data,
3201	extra_data, extra_data_count: sizeof(extra_data) / sizeof(extra_data[`0`]),
3202	signature: parameters->csp_signature, signature_length: parameters->csp_signature_size) != `0`) {
3203	*(parameters->csp_signature_size) = `0`;
3204	}
3205	}
3206
3207	static boolean_t
3208	cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,
3209	struct socket so, struct* cfil_info *cfil_info,
3210	struct cfil_msg_data_event *msg)
3211	{
3212	struct cfil_crypto_data data = {};
3213
3214	if (crypto_state == NULL \|\| msg == NULL \|\|
3215	so == NULL \|\| cfil_info == NULL) {
3216	return false;
3217	}
3218
3219	data.sock_id = cfil_info->cfi_sock_id;
3220	data.direction = cfil_info->cfi_dir;
3221	data.pid = so->last_pid;
3222	memcpy(dst: data.uuid, src: so->last_uuid, n: sizeof(uuid_t));
3223	if (so->so_flags & SOF_DELEGATED) {
3224	data.effective_pid = so->e_pid;
3225	memcpy(dst: data.effective_uuid, src: so->e_uuid, n: sizeof(uuid_t));
3226	} else {
3227	data.effective_pid = so->last_pid;
3228	memcpy(dst: data.effective_uuid, src: so->last_uuid, n: sizeof(uuid_t));
3229	}
3230	data.socketProtocol = GET_SO_PROTO(so);
3231
3232	if (data.direction == CFS_CONNECTION_DIR_OUT) {
3233	data.remote.sin6 = msg->cfc_dst.sin6;
3234	data.local.sin6 = msg->cfc_src.sin6;
3235	} else {
3236	data.remote.sin6 = msg->cfc_src.sin6;
3237	data.local.sin6 = msg->cfc_dst.sin6;
3238	}
3239
3240	// At first data, local address may show up for the first time, update address cache and
3241	// no need to re-sign subsequent data messages anymore.
3242	if (!NULLADDRESS(data.local)) {
3243	memcpy(dst: &cfil_info->cfi_so_attach_laddr, src: &data.local, n: data.local.sa.sa_len);
3244	cfil_info->cfi_isSignatureLatest = true;
3245	}
3246
3247	struct cfil_sign_parameters parameters = {
3248	.csp_state = crypto_state,
3249	.csp_data = &data,
3250	.csp_signature = msg->cfd_signature,
3251	.csp_signature_size = &msg->cfd_signature_length,
3252	};
3253	necp_with_inp_domain_name(so, ctx: &parameters, with_func: cfil_sign_with_domain_name);
3254
3255	if (msg->cfd_signature_length == `0`) {
3256	CFIL_LOG(LOG_ERR, "CFIL: Failed to sign data msg <sockID %llu <%llx>>",
3257	msg->cfd_msghdr.cfm_sock_id, msg->cfd_msghdr.cfm_sock_id);
3258	return false;
3259	}
3260
3261	return true;
3262	}
3263
3264	static boolean_t
3265	cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,
3266	struct socket so, struct* cfil_info *cfil_info,
3267	struct cfil_msg_sock_closed *msg)
3268	{
3269	struct cfil_crypto_data data = {};
3270	struct soflow_hash_entry hash_entry = {};
3271	struct soflow_hash_entry *hash_entry_ptr = NULL;
3272	struct inpcb inp = (struct* inpcb *)so->so_pcb;
3273
3274	if (crypto_state == NULL \|\| msg == NULL \|\|
3275	so == NULL \|\| inp == NULL \|\| cfil_info == NULL) {
3276	return false;
3277	}
3278
3279	data.sock_id = cfil_info->cfi_sock_id;
3280	data.direction = cfil_info->cfi_dir;
3281
3282	data.pid = so->last_pid;
3283	memcpy(dst: data.uuid, src: so->last_uuid, n: sizeof(uuid_t));
3284	if (so->so_flags & SOF_DELEGATED) {
3285	data.effective_pid = so->e_pid;
3286	memcpy(dst: data.effective_uuid, src: so->e_uuid, n: sizeof(uuid_t));
3287	} else {
3288	data.effective_pid = so->last_pid;
3289	memcpy(dst: data.effective_uuid, src: so->last_uuid, n: sizeof(uuid_t));
3290	}
3291	data.socketProtocol = GET_SO_PROTO(so);
3292
3293	/*
3294	* Fill in address info:
3295	* For UDP, use the cfil_info hash entry directly.
3296	* For TCP, compose an hash entry with the saved addresses.
3297	*/
3298	if (cfil_info->cfi_hash_entry != NULL) {
3299	hash_entry_ptr = cfil_info->cfi_hash_entry;
3300	} else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > `0` \|\|
3301	cfil_info->cfi_so_attach_laddr.sa.sa_len > `0`) {
3302	soflow_fill_hash_entry_from_address(&hash_entry, TRUE, SA(&cfil_info->cfi_so_attach_laddr.sa), FALSE);
3303	soflow_fill_hash_entry_from_address(&hash_entry, FALSE, SA(&cfil_info->cfi_so_attach_faddr.sa), FALSE);
3304	hash_entry_ptr = &hash_entry;
3305	}
3306	if (hash_entry_ptr != NULL) {
3307	boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
3308	union sockaddr_in_4_6 *src = outgoing ? &data.local : &data.remote;
3309	union sockaddr_in_4_6 *dst = outgoing ? &data.remote : &data.local;
3310	cfil_fill_event_msg_addresses(entry: hash_entry_ptr, inp, sin_src: src, sin_dst: dst, isIPv4: !IS_INP_V6(inp), outgoing);
3311	}
3312
3313	data.byte_count_in = cfil_info->cfi_byte_inbound_count;
3314	data.byte_count_out = cfil_info->cfi_byte_outbound_count;
3315
3316	struct cfil_sign_parameters parameters = {
3317	.csp_state = crypto_state,
3318	.csp_data = &data,
3319	.csp_signature = msg->cfc_signature,
3320	.csp_signature_size = &msg->cfc_signature_length
3321	};
3322	necp_with_inp_domain_name(so, ctx: &parameters, with_func: cfil_sign_with_domain_name);
3323
3324	if (msg->cfc_signature_length == `0`) {
3325	CFIL_LOG(LOG_ERR, "CFIL: Failed to sign closed msg <sockID %llu <%llx>>",
3326	msg->cfc_msghdr.cfm_sock_id, msg->cfc_msghdr.cfm_sock_id);
3327	return false;
3328	}
3329
3330	return true;
3331	}
3332
3333	static void
3334	cfil_populate_attached_msg_domain_name(char domain_name, void* *ctx)
3335	{
3336	struct cfil_msg_sock_attached msg_attached = (struct* cfil_msg_sock_attached *)ctx;
3337
3338	if (msg_attached == NULL) {
3339	return;
3340	}
3341
3342	if (domain_name != NULL) {
3343	strlcpy(dst: msg_attached->cfs_remote_domain_name, src: domain_name, n: sizeof(msg_attached->cfs_remote_domain_name));
3344	}
3345	}
3346
3347	static bool
3348	cfil_copy_audit_token(pid_t pid, audit_token_t *buffer)
3349	{
3350	bool success = false;
3351	proc_t p = proc_find(pid);
3352	if (p != PROC_NULL) {
3353	task_t t = proc_task(p);
3354	if (t != TASK_NULL) {
3355	audit_token_t audit_token = {};
3356	mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
3357	if (task_info(task: t, TASK_AUDIT_TOKEN, task_info_out: (task_info_t)&audit_token, task_info_count: &count) == KERN_SUCCESS) {
3358	memcpy(dst: buffer, src: &audit_token, n: sizeof(audit_token_t));
3359	success = true;
3360	}
3361	}
3362	proc_rele(p);
3363	}
3364	return success;
3365	}
3366
3367	static int
3368	cfil_dispatch_attach_event(struct socket so, struct* cfil_info *cfil_info,
3369	uint32_t kcunit, int conn_dir)
3370	{
3371	errno_t error = `0`;
3372	struct cfil_entry *entry = NULL;
3373	struct cfil_msg_sock_attached *msg_attached;
3374	struct content_filter *cfc = NULL;
3375	struct inpcb inp = (struct* inpcb *)so->so_pcb;
3376	struct soflow_hash_entry *hash_entry_ptr = NULL;
3377	struct soflow_hash_entry hash_entry;
3378
3379	memset(s: &hash_entry, c: `0`, n: sizeof(struct soflow_hash_entry));
3380
3381	socket_lock_assert_owned(so);
3382
3383	cfil_rw_lock_shared(lck: &cfil_lck_rw);
3384
3385	if (so->so_proto == NULL \|\| so->so_proto->pr_domain == NULL) {
3386	error = EINVAL;
3387	goto done;
3388	}
3389
3390	if (kcunit == `0`) {
3391	entry = SLIST_FIRST(&cfil_info->cfi_ordered_entries);
3392	} else {
3393	entry = &cfil_info->cfi_entries[kcunit - `1`];
3394	}
3395
3396	if (entry == NULL) {
3397	goto done;
3398	}
3399
3400	cfc = entry->cfe_filter;
3401	if (cfc == NULL) {
3402	goto done;
3403	}
3404
3405	if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) {
3406	goto done;
3407	}
3408
3409	if (kcunit == `0`) {
3410	kcunit = CFI_ENTRY_KCUNIT(cfil_info, entry);
3411	}
3412
3413	CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
3414	(uint64_t)VM_KERNEL_ADDRPERM(so), entry->cfe_necp_control_unit, kcunit);
3415
3416	/ Would be wasteful to try when flow controlled /
3417	if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3418	error = ENOBUFS;
3419	goto done;
3420	}
3421
3422	msg_attached = kalloc_data(sizeof(struct cfil_msg_sock_attached), Z_WAITOK);
3423	if (msg_attached == NULL) {
3424	error = ENOMEM;
3425	goto done;
3426	}
3427
3428	bzero(s: msg_attached, n: sizeof(struct cfil_msg_sock_attached));
3429	msg_attached->cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
3430	msg_attached->cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
3431	msg_attached->cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
3432	msg_attached->cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
3433	msg_attached->cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3434
3435	msg_attached->cfs_sock_family = SOCK_DOM(so);
3436	msg_attached->cfs_sock_type = SOCK_TYPE(so);
3437	msg_attached->cfs_sock_protocol = GET_SO_PROTO(so);
3438	msg_attached->cfs_pid = so->last_pid;
3439	memcpy(dst: msg_attached->cfs_uuid, src: so->last_uuid, n: sizeof(uuid_t));
3440	if (so->so_flags & SOF_DELEGATED) {
3441	msg_attached->cfs_e_pid = so->e_pid;
3442	memcpy(dst: msg_attached->cfs_e_uuid, src: so->e_uuid, n: sizeof(uuid_t));
3443	} else {
3444	msg_attached->cfs_e_pid = so->last_pid;
3445	memcpy(dst: msg_attached->cfs_e_uuid, src: so->last_uuid, n: sizeof(uuid_t));
3446	}
3447
3448	/*
3449	* Fill in address info:
3450	* For UDP, use the cfil_info hash entry directly.
3451	* For TCP, compose an hash entry with the saved addresses.
3452	*/
3453	if (cfil_info->cfi_hash_entry != NULL) {
3454	hash_entry_ptr = cfil_info->cfi_hash_entry;
3455	} else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > `0` \|\|
3456	cfil_info->cfi_so_attach_laddr.sa.sa_len > `0`) {
3457	soflow_fill_hash_entry_from_address(&hash_entry, TRUE, SA(&cfil_info->cfi_so_attach_laddr.sa), FALSE);
3458	soflow_fill_hash_entry_from_address(&hash_entry, FALSE, SA(&cfil_info->cfi_so_attach_faddr.sa), FALSE);
3459	hash_entry_ptr = &hash_entry;
3460	}
3461	if (hash_entry_ptr != NULL) {
3462	cfil_fill_event_msg_addresses(entry: hash_entry_ptr, inp,
3463	sin_src: &msg_attached->cfs_src, sin_dst: &msg_attached->cfs_dst,
3464	isIPv4: !IS_INP_V6(inp), outgoing: conn_dir == CFS_CONNECTION_DIR_OUT);
3465	}
3466	msg_attached->cfs_conn_dir = conn_dir;
3467
3468	if (msg_attached->cfs_e_pid != `0`) {
3469	if (!cfil_copy_audit_token(pid: msg_attached->cfs_e_pid, buffer: (audit_token_t *)&msg_attached->cfs_audit_token)) {
3470	CFIL_LOG(LOG_ERR, "CFIL: Failed to get effective audit token for <sockID %llu <%llx>> ",
3471	entry->cfe_cfil_info->cfi_sock_id, entry->cfe_cfil_info->cfi_sock_id);
3472	}
3473	}
3474
3475	if (msg_attached->cfs_pid != `0`) {
3476	if (msg_attached->cfs_pid == msg_attached->cfs_e_pid) {
3477	memcpy(dst: &msg_attached->cfs_real_audit_token, src: &msg_attached->cfs_audit_token, n: sizeof(msg_attached->cfs_real_audit_token));
3478	} else if (!cfil_copy_audit_token(pid: msg_attached->cfs_pid, buffer: (audit_token_t *)&msg_attached->cfs_real_audit_token)) {
3479	CFIL_LOG(LOG_ERR, "CFIL: Failed to get real audit token for <sockID %llu <%llx>> ",
3480	entry->cfe_cfil_info->cfi_sock_id, entry->cfe_cfil_info->cfi_sock_id);
3481	}
3482	}
3483
3484	necp_with_inp_domain_name(so, ctx: msg_attached, with_func: cfil_populate_attached_msg_domain_name);
3485
3486	if (cfil_info->cfi_debug) {
3487	cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING ATTACH UP");
3488	}
3489
3490	cfil_dispatch_attach_event_sign(crypto_state: entry->cfe_filter->cf_crypto_state, cfil_info, msg: msg_attached);
3491
3492	error = ctl_enqueuedata(kctlref: entry->cfe_filter->cf_kcref,
3493	unit: entry->cfe_filter->cf_kcunit,
3494	data: msg_attached,
3495	len: sizeof(struct cfil_msg_sock_attached),
3496	CTL_DATA_EOR);
3497
3498	kfree_data(msg_attached, sizeof(struct cfil_msg_sock_attached));
3499
3500	if (error != `0`) {
3501	CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
3502	goto done;
3503	}
3504	microuptime(tv: &entry->cfe_last_event);
3505	cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
3506	cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
3507
3508	entry->cfe_flags \|= CFEF_SENT_SOCK_ATTACHED;
3509	OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
3510	done:
3511
3512	/ We can recover from flow control /
3513	if (error == ENOBUFS) {
3514	entry->cfe_flags \|= CFEF_FLOW_CONTROLLED;
3515	OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
3516
3517	if (!cfil_rw_lock_shared_to_exclusive(lck: &cfil_lck_rw)) {
3518	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
3519	}
3520
3521	cfc->cf_flags \|= CFF_FLOW_CONTROLLED;
3522
3523	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
3524	} else {
3525	if (error != `0`) {
3526	OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
3527	}
3528
3529	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
3530	}
3531	return error;
3532	}
3533
3534	static int
3535	cfil_dispatch_disconnect_event(struct socket so, struct* cfil_info cfil_info, uint32_t kcunit, int* outgoing)
3536	{
3537	errno_t error = `0`;
3538	struct mbuf *msg = NULL;
3539	struct cfil_entry *entry;
3540	struct cfe_buf *entrybuf;
3541	struct cfil_msg_hdr msg_disconnected;
3542	struct content_filter *cfc;
3543
3544	socket_lock_assert_owned(so);
3545
3546	cfil_rw_lock_shared(lck: &cfil_lck_rw);
3547
3548	entry = &cfil_info->cfi_entries[kcunit - `1`];
3549	if (outgoing) {
3550	entrybuf = &entry->cfe_snd;
3551	} else {
3552	entrybuf = &entry->cfe_rcv;
3553	}
3554
3555	cfc = entry->cfe_filter;
3556	if (cfc == NULL) {
3557	goto done;
3558	}
3559
3560	// Mark if this flow qualifies for immediate close.
3561	SET_NO_CLOSE_WAIT(sotoinpcb(so), cfil_info);
3562
3563	CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3564	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3565
3566	/*
3567	* Send the disconnection event once
3568	*/
3569	if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) \|\|
3570	(!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
3571	CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
3572	(uint64_t)VM_KERNEL_ADDRPERM(so));
3573	goto done;
3574	}
3575
3576	/*
3577	* We're not disconnected as long as some data is waiting
3578	* to be delivered to the filter
3579	*/
3580	if (outgoing && cfil_queue_empty(cfq: &entrybuf->cfe_ctl_q) == `0`) {
3581	CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
3582	(uint64_t)VM_KERNEL_ADDRPERM(so));
3583	error = EBUSY;
3584	goto done;
3585	}
3586	/ Would be wasteful to try when flow controlled /
3587	if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3588	error = ENOBUFS;
3589	goto done;
3590	}
3591
3592	if (cfil_info->cfi_debug) {
3593	cfil_info_log(LOG_ERR, cfil_info, outgoing ?
3594	"CFIL: OUT - SENDING DISCONNECT UP":
3595	"CFIL: IN - SENDING DISCONNECT UP");
3596	}
3597
3598	bzero(s: &msg_disconnected, n: sizeof(struct cfil_msg_hdr));
3599	msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
3600	msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
3601	msg_disconnected.cfm_type = CFM_TYPE_EVENT;
3602	msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
3603	CFM_OP_DISCONNECT_IN;
3604	msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3605	error = ctl_enqueuedata(kctlref: entry->cfe_filter->cf_kcref,
3606	unit: entry->cfe_filter->cf_kcunit,
3607	data: &msg_disconnected,
3608	len: sizeof(struct cfil_msg_hdr),
3609	CTL_DATA_EOR);
3610	if (error != `0`) {
3611	CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3612	mbuf_freem(mbuf: msg);
3613	goto done;
3614	}
3615	microuptime(tv: &entry->cfe_last_event);
3616	CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
3617
3618	/ Remember we have sent the disconnection message /
3619	if (outgoing) {
3620	entry->cfe_flags \|= CFEF_SENT_DISCONNECT_OUT;
3621	OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
3622	} else {
3623	entry->cfe_flags \|= CFEF_SENT_DISCONNECT_IN;
3624	OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
3625	}
3626	done:
3627	if (error == ENOBUFS) {
3628	entry->cfe_flags \|= CFEF_FLOW_CONTROLLED;
3629	OSIncrementAtomic(
3630	&cfil_stats.cfs_disconnect_event_flow_control);
3631
3632	if (!cfil_rw_lock_shared_to_exclusive(lck: &cfil_lck_rw)) {
3633	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
3634	}
3635
3636	cfc->cf_flags \|= CFF_FLOW_CONTROLLED;
3637
3638	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
3639	} else {
3640	if (error != `0`) {
3641	OSIncrementAtomic(
3642	&cfil_stats.cfs_disconnect_event_fail);
3643	}
3644
3645	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
3646	}
3647	return error;
3648	}
3649
3650	int
3651	cfil_dispatch_closed_event(struct socket so, struct* cfil_info cfil_info, int* kcunit)
3652	{
3653	struct cfil_entry *entry;
3654	struct cfil_msg_sock_closed msg_closed;
3655	errno_t error = `0`;
3656	struct content_filter *cfc;
3657	struct inpcb *inp = NULL;
3658
3659	socket_lock_assert_owned(so);
3660
3661	cfil_rw_lock_shared(lck: &cfil_lck_rw);
3662
3663	entry = &cfil_info->cfi_entries[kcunit - `1`];
3664	cfc = entry->cfe_filter;
3665	if (cfc == NULL) {
3666	goto done;
3667	}
3668
3669	CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
3670	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3671
3672	/ Would be wasteful to try when flow controlled /
3673	if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3674	error = ENOBUFS;
3675	goto done;
3676	}
3677	/*
3678	* Send a single closed message per filter
3679	*/
3680	if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != `0`) {
3681	goto done;
3682	}
3683	if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == `0`) {
3684	goto done;
3685	}
3686
3687	microuptime(tv: &entry->cfe_last_event);
3688	CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
3689
3690	bzero(s: &msg_closed, n: sizeof(struct cfil_msg_sock_closed));
3691	msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
3692	msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
3693	msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
3694	msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
3695	msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3696	msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
3697	msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
3698	memcpy(dst: msg_closed.cfc_op_time, src: cfil_info->cfi_op_time, n: sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY);
3699	memcpy(dst: msg_closed.cfc_op_list, src: cfil_info->cfi_op_list, n: sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY);
3700	msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
3701	msg_closed.cfc_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
3702	msg_closed.cfc_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
3703
3704	if (entry->cfe_laddr_sent == false) {
3705	/ cache it if necessary /
3706	if (cfil_info->cfi_so_attach_laddr.sa.sa_len == `0`) {
3707	inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
3708	if (inp != NULL) {
3709	boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
3710	union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
3711	union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
3712	cfil_fill_event_msg_addresses(entry: cfil_info->cfi_hash_entry, inp,
3713	sin_src: src, sin_dst: dst, isIPv4: !IS_INP_V6(inp), outgoing);
3714	}
3715	}
3716
3717	if (cfil_info->cfi_so_attach_laddr.sa.sa_len != `0`) {
3718	msg_closed.cfc_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
3719	entry->cfe_laddr_sent = true;
3720	}
3721	}
3722
3723	cfil_dispatch_closed_event_sign(crypto_state: entry->cfe_filter->cf_crypto_state, so, cfil_info, msg: &msg_closed);
3724
3725	if (cfil_info->cfi_debug) {
3726	cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING CLOSED UP");
3727	}
3728
3729	/ for debugging*
3730	* if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3731	* msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
3732	* }
3733	* for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3734	* CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3735	* }
3736	*/
3737
3738	error = ctl_enqueuedata(kctlref: entry->cfe_filter->cf_kcref,
3739	unit: entry->cfe_filter->cf_kcunit,
3740	data: &msg_closed,
3741	len: sizeof(struct cfil_msg_sock_closed),
3742	CTL_DATA_EOR);
3743	if (error != `0`) {
3744	CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
3745	error);
3746	goto done;
3747	}
3748
3749	entry->cfe_flags \|= CFEF_SENT_SOCK_CLOSED;
3750	OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
3751	done:
3752	/ We can recover from flow control /
3753	if (error == ENOBUFS) {
3754	entry->cfe_flags \|= CFEF_FLOW_CONTROLLED;
3755	OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
3756
3757	if (!cfil_rw_lock_shared_to_exclusive(lck: &cfil_lck_rw)) {
3758	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
3759	}
3760
3761	cfc->cf_flags \|= CFF_FLOW_CONTROLLED;
3762
3763	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
3764	} else {
3765	if (error != `0`) {
3766	OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
3767	}
3768
3769	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
3770	}
3771
3772	return error;
3773	}
3774
3775	static void
3776	fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3777	struct in6_addr *ip6, u_int16_t port, uint32_t ifscope)
3778	{
3779	if (sin46 == NULL) {
3780	return;
3781	}
3782
3783	struct sockaddr_in6 *sin6 = &sin46->sin6;
3784
3785	sin6->sin6_family = AF_INET6;
3786	sin6->sin6_len = sizeof(*sin6);
3787	sin6->sin6_port = port;
3788	sin6->sin6_addr = *ip6;
3789	if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
3790	sin6->sin6_scope_id = ifscope;
3791	if (in6_embedded_scope) {
3792	in6_verify_ifscope(&sin6->sin6_addr, sin6->sin6_scope_id);
3793	if (sin6->sin6_addr.s6_addr16[`1`] != `0`) {
3794	sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[`1`]);
3795	sin6->sin6_addr.s6_addr16[`1`] = `0`;
3796	}
3797	}
3798	}
3799	}
3800
3801	static void
3802	fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3803	struct in_addr ip, u_int16_t port)
3804	{
3805	if (sin46 == NULL) {
3806	return;
3807	}
3808
3809	struct sockaddr_in *sin = &sin46->sin;
3810
3811	sin->sin_family = AF_INET;
3812	sin->sin_len = sizeof(*sin);
3813	sin->sin_port = port;
3814	sin->sin_addr.s_addr = ip.s_addr;
3815	}
3816
3817	static void
3818	cfil_get_flow_address_v6(struct soflow_hash_entry entry, struct* inpcb *inp,
3819	struct in6_addr laddr, struct in6_addr faddr,
3820	u_int16_t lport, u_int16_t fport)
3821	{
3822	if (entry != NULL) {
3823	*laddr = &entry->soflow_laddr.addr6;
3824	*faddr = &entry->soflow_faddr.addr6;
3825	*lport = entry->soflow_lport;
3826	*fport = entry->soflow_fport;
3827	} else {
3828	*laddr = &inp->in6p_laddr;
3829	*faddr = &inp->in6p_faddr;
3830	*lport = inp->inp_lport;
3831	*fport = inp->inp_fport;
3832	}
3833	}
3834
3835	static void
3836	cfil_get_flow_address(struct soflow_hash_entry entry, struct* inpcb *inp,
3837	struct in_addr laddr, struct* in_addr *faddr,
3838	u_int16_t lport, u_int16_t fport)
3839	{
3840	if (entry != NULL) {
3841	*laddr = entry->soflow_laddr.addr46.ia46_addr4;
3842	*faddr = entry->soflow_faddr.addr46.ia46_addr4;
3843	*lport = entry->soflow_lport;
3844	*fport = entry->soflow_fport;
3845	} else {
3846	*laddr = inp->inp_laddr;
3847	*faddr = inp->inp_faddr;
3848	*lport = inp->inp_lport;
3849	*fport = inp->inp_fport;
3850	}
3851	}
3852
3853	static int
3854	cfil_dispatch_data_event(struct socket so, struct* cfil_info cfil_info, uint32_t kcunit, int* outgoing,
3855	struct mbuf data, unsigned* int copyoffset, unsigned int copylen)
3856	{
3857	errno_t error = `0`;
3858	struct mbuf *copy = NULL;
3859	struct mbuf *msg = NULL;
3860	unsigned int one = `1`;
3861	struct cfil_msg_data_event *data_req;
3862	size_t hdrsize;
3863	struct inpcb inp = (struct* inpcb *)so->so_pcb;
3864	struct cfil_entry *entry;
3865	struct cfe_buf *entrybuf;
3866	struct content_filter *cfc;
3867	struct timeval tv;
3868	int inp_flags = `0`;
3869
3870	cfil_rw_lock_shared(lck: &cfil_lck_rw);
3871
3872	entry = &cfil_info->cfi_entries[kcunit - `1`];
3873	if (outgoing) {
3874	entrybuf = &entry->cfe_snd;
3875	} else {
3876	entrybuf = &entry->cfe_rcv;
3877	}
3878
3879	cfc = entry->cfe_filter;
3880	if (cfc == NULL) {
3881	goto done;
3882	}
3883
3884	data = cfil_data_start(m: data);
3885	if (data == NULL) {
3886	CFIL_LOG(LOG_ERR, "No data start");
3887	goto done;
3888	}
3889
3890	CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3891	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3892
3893	socket_lock_assert_owned(so);
3894
3895	/ Would be wasteful to try /
3896	if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3897	error = ENOBUFS;
3898	goto done;
3899	}
3900
3901	/ Make a copy of the data to pass to kernel control socket /
3902	copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT, NULL, NULL,
3903	M_COPYM_NOOP_HDR);
3904	if (copy == NULL) {
3905	CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
3906	error = ENOMEM;
3907	goto done;
3908	}
3909
3910	/ We need an mbuf packet for the message header /
3911	hdrsize = sizeof(struct cfil_msg_data_event);
3912	error = mbuf_allocpacket(how: MBUF_DONTWAIT, packetlen: hdrsize, maxchunks: &one, mbuf: &msg);
3913	if (error != `0`) {
3914	CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
3915	m_freem(copy);
3916	/*
3917	* ENOBUFS is to indicate flow control
3918	*/
3919	error = ENOMEM;
3920	goto done;
3921	}
3922	mbuf_setlen(mbuf: msg, len: hdrsize);
3923	mbuf_pkthdr_setlen(mbuf: msg, len: hdrsize + copylen);
3924	msg->m_next = copy;
3925	data_req = (struct cfil_msg_data_event *)mbuf_data(mbuf: msg);
3926	bzero(s: data_req, n: hdrsize);
3927	data_req->cfd_msghdr.cfm_len = (uint32_t)hdrsize + copylen;
3928	data_req->cfd_msghdr.cfm_version = `1`;
3929	data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
3930	data_req->cfd_msghdr.cfm_op =
3931	outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
3932	data_req->cfd_msghdr.cfm_sock_id =
3933	entry->cfe_cfil_info->cfi_sock_id;
3934	data_req->cfd_start_offset = entrybuf->cfe_peeked;
3935	data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
3936
3937	data_req->cfd_flags = `0`;
3938	if (OPTIONAL_IP_HEADER(so)) {
3939	/*
3940	* For non-UDP/TCP traffic, indicate to filters if optional
3941	* IP header is present:
3942	* outgoing - indicate according to INP_HDRINCL flag
3943	* incoming - For IPv4 only, stripping of IP header is
3944	* optional. But for CFIL, we delay stripping
3945	* at rip_input. So CFIL always expects IP
3946	* frames. IP header will be stripped according
3947	* to INP_STRIPHDR flag later at reinjection.
3948	*/
3949	if ((!outgoing && !IS_INP_V6(inp)) \|\|
3950	(outgoing && cfil_dgram_peek_socket_state(m: data, inp_flags: &inp_flags) && (inp_flags & INP_HDRINCL))) {
3951	data_req->cfd_flags \|= CFD_DATA_FLAG_IP_HEADER;
3952	}
3953	}
3954
3955	/*
3956	* Copy address/port into event msg.
3957	* For non connected sockets need to copy addresses from passed
3958	* parameters
3959	*/
3960	cfil_fill_event_msg_addresses(entry: cfil_info->cfi_hash_entry, inp,
3961	sin_src: &data_req->cfc_src, sin_dst: &data_req->cfc_dst,
3962	isIPv4: !IS_INP_V6(inp), outgoing);
3963
3964	if (cfil_info->cfi_debug && cfil_log_data) {
3965	cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DATA UP");
3966	}
3967
3968	if (cfil_info->cfi_isSignatureLatest == false) {
3969	cfil_dispatch_data_event_sign(crypto_state: entry->cfe_filter->cf_crypto_state, so, cfil_info, msg: data_req);
3970	}
3971
3972	microuptime(tv: &tv);
3973	CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
3974
3975	/ Pass the message to the content filter /
3976	error = ctl_enqueuembuf(kctlref: entry->cfe_filter->cf_kcref,
3977	unit: entry->cfe_filter->cf_kcunit,
3978	m: msg, CTL_DATA_EOR);
3979	if (error != `0`) {
3980	CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3981	mbuf_freem(mbuf: msg);
3982	goto done;
3983	}
3984	entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
3985	OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
3986
3987	if (cfil_info->cfi_debug && cfil_log_data) {
3988	CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu <%llx> outgoing %d: mbuf %llx copyoffset %u copylen %u (%s)",
3989	(uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen,
3990	data_req->cfd_flags & CFD_DATA_FLAG_IP_HEADER ? "IP HDR" : "NO IP HDR");
3991	}
3992
3993	done:
3994	if (error == ENOBUFS) {
3995	entry->cfe_flags \|= CFEF_FLOW_CONTROLLED;
3996	OSIncrementAtomic(
3997	&cfil_stats.cfs_data_event_flow_control);
3998
3999	if (!cfil_rw_lock_shared_to_exclusive(lck: &cfil_lck_rw)) {
4000	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
4001	}
4002
4003	cfc->cf_flags \|= CFF_FLOW_CONTROLLED;
4004
4005	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
4006	} else {
4007	if (error != `0`) {
4008	OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
4009	}
4010
4011	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
4012	}
4013	return error;
4014	}
4015
4016	/*
4017	* Process the queue of data waiting to be delivered to content filter
4018	*/
4019	static int
4020	cfil_data_service_ctl_q(struct socket so, struct* cfil_info cfil_info, uint32_t kcunit, int* outgoing)
4021	{
4022	errno_t error = `0`;
4023	struct mbuf data, tmp = NULL;
4024	unsigned int datalen = `0`, copylen = `0`, copyoffset = `0`;
4025	struct cfil_entry *entry;
4026	struct cfe_buf *entrybuf;
4027	uint64_t currentoffset = `0`;
4028
4029	if (cfil_info == NULL) {
4030	return `0`;
4031	}
4032
4033	CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4034	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4035
4036	socket_lock_assert_owned(so);
4037
4038	entry = &cfil_info->cfi_entries[kcunit - `1`];
4039	if (outgoing) {
4040	entrybuf = &entry->cfe_snd;
4041	} else {
4042	entrybuf = &entry->cfe_rcv;
4043	}
4044
4045	/ Send attached message if not yet done /
4046	if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == `0`) {
4047	error = cfil_dispatch_attach_event(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, entry),
4048	conn_dir: cfil_info->cfi_dir);
4049	if (error != `0`) {
4050	/ We can recover from flow control /
4051	if (error == ENOBUFS \|\| error == ENOMEM) {
4052	error = `0`;
4053	}
4054	goto done;
4055	}
4056	} else if ((entry->cfe_flags & CFEF_DATA_START) == `0`) {
4057	OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
4058	goto done;
4059	}
4060
4061	if (cfil_info->cfi_debug && cfil_log_data) {
4062	CFIL_LOG(LOG_ERR, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
4063	entrybuf->cfe_pass_offset,
4064	entrybuf->cfe_peeked,
4065	entrybuf->cfe_peek_offset);
4066	}
4067
4068	/ Move all data that can pass /
4069	while ((data = cfil_queue_first(cfq: &entrybuf->cfe_ctl_q)) != NULL &&
4070	entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
4071	datalen = cfil_data_length(m: data, NULL, NULL);
4072	tmp = data;
4073
4074	if (entrybuf->cfe_ctl_q.q_start + datalen <=
4075	entrybuf->cfe_pass_offset) {
4076	/*
4077	* The first mbuf can fully pass
4078	*/
4079	copylen = datalen;
4080	} else {
4081	/*
4082	* The first mbuf can partially pass
4083	*/
4084	copylen = (unsigned int)(entrybuf->cfe_pass_offset - entrybuf->cfe_ctl_q.q_start);
4085	}
4086	VERIFY(copylen <= datalen);
4087
4088	if (cfil_info->cfi_debug && cfil_log_data) {
4089	CFIL_LOG(LOG_ERR,
4090	"CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
4091	"datalen %u copylen %u",
4092	(uint64_t)VM_KERNEL_ADDRPERM(tmp),
4093	entrybuf->cfe_ctl_q.q_start,
4094	entrybuf->cfe_peeked,
4095	entrybuf->cfe_pass_offset,
4096	entrybuf->cfe_peek_offset,
4097	datalen, copylen);
4098	}
4099
4100	/*
4101	* Data that passes has been peeked at explicitly or
4102	* implicitly
4103	*/
4104	if (entrybuf->cfe_ctl_q.q_start + copylen >
4105	entrybuf->cfe_peeked) {
4106	entrybuf->cfe_peeked =
4107	entrybuf->cfe_ctl_q.q_start + copylen;
4108	}
4109	/*
4110	* Stop on partial pass
4111	*/
4112	if (copylen < datalen) {
4113	break;
4114	}
4115
4116	/ All good, move full data from ctl queue to pending queue /
4117	cfil_queue_remove(cfq: &entrybuf->cfe_ctl_q, m: data, len: datalen);
4118
4119	cfil_queue_enqueue(cfq: &entrybuf->cfe_pending_q, m: data, len: datalen);
4120	if (outgoing) {
4121	OSAddAtomic64(datalen,
4122	&cfil_stats.cfs_pending_q_out_enqueued);
4123	} else {
4124	OSAddAtomic64(datalen,
4125	&cfil_stats.cfs_pending_q_in_enqueued);
4126	}
4127	}
4128	CFIL_INFO_VERIFY(cfil_info);
4129	if (tmp != NULL) {
4130	CFIL_LOG(LOG_DEBUG,
4131	"%llx first %llu peeked %llu pass %llu peek %llu"
4132	"datalen %u copylen %u",
4133	(uint64_t)VM_KERNEL_ADDRPERM(tmp),
4134	entrybuf->cfe_ctl_q.q_start,
4135	entrybuf->cfe_peeked,
4136	entrybuf->cfe_pass_offset,
4137	entrybuf->cfe_peek_offset,
4138	datalen, copylen);
4139	}
4140	tmp = NULL;
4141
4142	/ Now deal with remaining data the filter wants to peek at /
4143	for (data = cfil_queue_first(cfq: &entrybuf->cfe_ctl_q),
4144	currentoffset = entrybuf->cfe_ctl_q.q_start;
4145	data != NULL && currentoffset < entrybuf->cfe_peek_offset;
4146	data = cfil_queue_next(cfq: &entrybuf->cfe_ctl_q, m: data),
4147	currentoffset += datalen) {
4148	datalen = cfil_data_length(m: data, NULL, NULL);
4149	tmp = data;
4150
4151	/ We've already peeked at this mbuf /
4152	if (currentoffset + datalen <= entrybuf->cfe_peeked) {
4153	continue;
4154	}
4155	/*
4156	* The data in the first mbuf may have been
4157	* partially peeked at
4158	*/
4159	copyoffset = (unsigned int)(entrybuf->cfe_peeked - currentoffset);
4160	VERIFY(copyoffset < datalen);
4161	copylen = datalen - copyoffset;
4162	VERIFY(copylen <= datalen);
4163	/*
4164	* Do not copy more than needed
4165	*/
4166	if (currentoffset + copyoffset + copylen >
4167	entrybuf->cfe_peek_offset) {
4168	copylen = (unsigned int)(entrybuf->cfe_peek_offset -
4169	(currentoffset + copyoffset));
4170	}
4171
4172	if (cfil_info->cfi_debug && cfil_log_data) {
4173	CFIL_LOG(LOG_ERR,
4174	"CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
4175	"datalen %u copylen %u copyoffset %u",
4176	(uint64_t)VM_KERNEL_ADDRPERM(tmp),
4177	currentoffset,
4178	entrybuf->cfe_peeked,
4179	entrybuf->cfe_pass_offset,
4180	entrybuf->cfe_peek_offset,
4181	datalen, copylen, copyoffset);
4182	}
4183
4184	/*
4185	* Stop if there is nothing more to peek at
4186	*/
4187	if (copylen == `0`) {
4188	break;
4189	}
4190	/*
4191	* Let the filter get a peek at this span of data
4192	*/
4193	error = cfil_dispatch_data_event(so, cfil_info, kcunit,
4194	outgoing, data, copyoffset, copylen);
4195	if (error != `0`) {
4196	/ On error, leave data in ctl_q /
4197	break;
4198	}
4199	entrybuf->cfe_peeked += copylen;
4200	if (outgoing) {
4201	OSAddAtomic64(copylen,
4202	&cfil_stats.cfs_ctl_q_out_peeked);
4203	} else {
4204	OSAddAtomic64(copylen,
4205	&cfil_stats.cfs_ctl_q_in_peeked);
4206	}
4207
4208	/ Stop when data could not be fully peeked at /
4209	if (copylen + copyoffset < datalen) {
4210	break;
4211	}
4212	}
4213	CFIL_INFO_VERIFY(cfil_info);
4214	if (tmp != NULL) {
4215	CFIL_LOG(LOG_DEBUG,
4216	"%llx first %llu peeked %llu pass %llu peek %llu"
4217	"datalen %u copylen %u copyoffset %u",
4218	(uint64_t)VM_KERNEL_ADDRPERM(tmp),
4219	currentoffset,
4220	entrybuf->cfe_peeked,
4221	entrybuf->cfe_pass_offset,
4222	entrybuf->cfe_peek_offset,
4223	datalen, copylen, copyoffset);
4224	}
4225
4226	/*
4227	* Process data that has passed the filter
4228	*/
4229	error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
4230	if (error != `0`) {
4231	CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
4232	error);
4233	goto done;
4234	}
4235
4236	/*
4237	* Dispatch disconnect events that could not be sent
4238	*/
4239	if (cfil_info == NULL) {
4240	goto done;
4241	} else if (outgoing) {
4242	if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
4243	!(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) {
4244	cfil_dispatch_disconnect_event(so, cfil_info, kcunit, outgoing: `1`);
4245	}
4246	} else {
4247	if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
4248	!(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) {
4249	cfil_dispatch_disconnect_event(so, cfil_info, kcunit, outgoing: `0`);
4250	}
4251	}
4252
4253	done:
4254	CFIL_LOG(LOG_DEBUG,
4255	"first %llu peeked %llu pass %llu peek %llu",
4256	entrybuf->cfe_ctl_q.q_start,
4257	entrybuf->cfe_peeked,
4258	entrybuf->cfe_pass_offset,
4259	entrybuf->cfe_peek_offset);
4260
4261	CFIL_INFO_VERIFY(cfil_info);
4262	return error;
4263	}
4264
4265	/*
4266	* cfil_data_filter()
4267	*
4268	* Process data for a content filter installed on a socket
4269	*/
4270	int
4271	cfil_data_filter(struct socket so, struct* cfil_info cfil_info, uint32_t kcunit, int* outgoing,
4272	struct mbuf *data, uint32_t datalen)
4273	{
4274	errno_t error = `0`;
4275	struct cfil_entry *entry;
4276	struct cfe_buf *entrybuf;
4277
4278	CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4279	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4280
4281	socket_lock_assert_owned(so);
4282
4283	entry = &cfil_info->cfi_entries[kcunit - `1`];
4284	if (outgoing) {
4285	entrybuf = &entry->cfe_snd;
4286	} else {
4287	entrybuf = &entry->cfe_rcv;
4288	}
4289
4290	/ Are we attached to the filter? /
4291	if (entry->cfe_filter == NULL) {
4292	error = `0`;
4293	goto done;
4294	}
4295
4296	/ Dispatch to filters /
4297	cfil_queue_enqueue(cfq: &entrybuf->cfe_ctl_q, m: data, len: datalen);
4298	if (outgoing) {
4299	OSAddAtomic64(datalen,
4300	&cfil_stats.cfs_ctl_q_out_enqueued);
4301	} else {
4302	OSAddAtomic64(datalen,
4303	&cfil_stats.cfs_ctl_q_in_enqueued);
4304	}
4305
4306	error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4307	if (error != `0`) {
4308	CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4309	error);
4310	}
4311	/*
4312	* We have to return EJUSTRETURN in all cases to avoid double free
4313	* by socket layer
4314	*/
4315	error = EJUSTRETURN;
4316	done:
4317	CFIL_INFO_VERIFY(cfil_info);
4318
4319	CFIL_LOG(LOG_INFO, "return %d", error);
4320	return error;
4321	}
4322
4323	/*
4324	* cfil_service_inject_queue() re-inject data that passed the
4325	* content filters
4326	*/
4327	static int
4328	cfil_service_inject_queue(struct socket so, struct* cfil_info cfil_info, int* outgoing)
4329	{
4330	mbuf_t data;
4331	unsigned int datalen;
4332	int mbcnt = `0`;
4333	int mbnum = `0`;
4334	errno_t error = `0`;
4335	struct cfi_buf *cfi_buf;
4336	struct cfil_queue *inject_q;
4337	int need_rwakeup = `0`;
4338	int count = `0`;
4339	struct inpcb *inp = NULL;
4340	struct ip *ip = NULL;
4341	unsigned int hlen;
4342
4343	if (cfil_info == NULL) {
4344	return `0`;
4345	}
4346
4347	socket_lock_assert_owned(so);
4348
4349	if (so->so_state & SS_DEFUNCT) {
4350	return `0`;
4351	}
4352
4353	if (outgoing) {
4354	cfi_buf = &cfil_info->cfi_snd;
4355	cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
4356	} else {
4357	cfi_buf = &cfil_info->cfi_rcv;
4358	cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
4359	}
4360	inject_q = &cfi_buf->cfi_inject_q;
4361
4362	if (cfil_queue_empty(cfq: inject_q)) {
4363	return `0`;
4364	}
4365
4366	if (cfil_info->cfi_debug && cfil_log_data) {
4367	CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
4368	(uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
4369	}
4370
4371	while ((data = cfil_queue_first(cfq: inject_q)) != NULL) {
4372	datalen = cfil_data_length(m: data, retmbcnt: &mbcnt, retmbnum: &mbnum);
4373
4374	if (cfil_info->cfi_debug && cfil_log_data) {
4375	CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4376	(uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4377	}
4378
4379	/ Remove data from queue and adjust stats /
4380	cfil_queue_remove(cfq: inject_q, m: data, len: datalen);
4381	cfi_buf->cfi_pending_first += datalen;
4382	cfi_buf->cfi_pending_mbcnt -= mbcnt;
4383	cfi_buf->cfi_pending_mbnum -= mbnum;
4384	cfil_info_buf_verify(cfi_buf);
4385
4386	if (outgoing) {
4387	error = sosend_reinject(so, NULL, top: data, NULL, sendflags: `0`);
4388	if (error != `0`) {
4389	cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
4390	CFIL_LOG(LOG_ERR, "CFIL: sosend() failed %d", error);
4391	break;
4392	}
4393	// At least one injection succeeded, need to wake up pending threads.
4394	need_rwakeup = `1`;
4395	} else {
4396	data->m_flags \|= M_SKIPCFIL;
4397
4398	/*
4399	* NOTE: We currently only support TCP, UDP, ICMP,
4400	* ICMPv6 and RAWIP. For MPTCP and message TCP we'll
4401	* need to call the appropriate sbappendxxx()
4402	* of fix sock_inject_data_in()
4403	*/
4404	if (NEED_DGRAM_FLOW_TRACKING(so)) {
4405	if (OPTIONAL_IP_HEADER(so)) {
4406	inp = sotoinpcb(so);
4407	if (inp && (inp->inp_flags & INP_STRIPHDR)) {
4408	mbuf_t data_start = cfil_data_start(m: data);
4409	if (data_start != NULL && (data_start->m_flags & M_PKTHDR)) {
4410	ip = mtod(data_start, struct ip *);
4411	hlen = IP_VHL_HL(ip->ip_vhl) << `2`;
4412	data_start->m_len -= hlen;
4413	data_start->m_pkthdr.len -= hlen;
4414	data_start->m_data += hlen;
4415	}
4416	}
4417	}
4418
4419	if (sbappendchain(sb: &so->so_rcv, m: data, space: `0`)) {
4420	need_rwakeup = `1`;
4421	}
4422	} else {
4423	if (sbappendstream(sb: &so->so_rcv, m: data)) {
4424	need_rwakeup = `1`;
4425	}
4426	}
4427	}
4428
4429	if (outgoing) {
4430	OSAddAtomic64(datalen,
4431	&cfil_stats.cfs_inject_q_out_passed);
4432	} else {
4433	OSAddAtomic64(datalen,
4434	&cfil_stats.cfs_inject_q_in_passed);
4435	}
4436
4437	count++;
4438	}
4439
4440	if (cfil_info->cfi_debug && cfil_log_data) {
4441	CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4442	(uint64_t)VM_KERNEL_ADDRPERM(so), count);
4443	}
4444
4445	/ A single wakeup is for several packets is more efficient /
4446	if (need_rwakeup) {
4447	if (outgoing == TRUE) {
4448	sowwakeup(so);
4449	} else {
4450	sorwakeup(so);
4451	}
4452	}
4453
4454	if (error != `0` && cfil_info) {
4455	if (error == ENOBUFS) {
4456	OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
4457	}
4458	if (error == ENOMEM) {
4459	OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
4460	}
4461
4462	if (outgoing) {
4463	cfil_info->cfi_flags \|= CFIF_RETRY_INJECT_OUT;
4464	OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
4465	} else {
4466	cfil_info->cfi_flags \|= CFIF_RETRY_INJECT_IN;
4467	OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
4468	}
4469	}
4470
4471	/*
4472	* Notify
4473	*/
4474	if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
4475	cfil_sock_notify_shutdown(so, SHUT_WR);
4476	if (cfil_sock_data_pending(sb: &so->so_snd) == `0`) {
4477	soshutdownlock_final(so, SHUT_WR);
4478	}
4479	}
4480	if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4481	if (cfil_filters_attached(so) == `0`) {
4482	CFIL_LOG(LOG_INFO, "so %llx waking",
4483	(uint64_t)VM_KERNEL_ADDRPERM(so));
4484	wakeup(chan: (caddr_t)cfil_info);
4485	}
4486	}
4487
4488	if (SO_DELAYED_DEAD_GET(so)) {
4489	// Check to see if all data processed for this socket, if so mark it DEAD now.
4490	const bool is_dead = cfil_sock_is_dead(so);
4491	if (is_dead && cfil_info->cfi_debug) {
4492	cfil_info_log(LOG_ERR, cfil_info, "CFIL: Marked previoulsy delayed socket as DEAD");
4493	}
4494	}
4495	if (SO_DELAYED_TCP_TIME_WAIT_GET(so)) {
4496	// Check to see if all data processed for this socket, if so handle the TCP time wait now
4497	const bool is_added = cfil_sock_tcp_add_time_wait(so);
4498	if (is_added && cfil_info->cfi_debug) {
4499	cfil_info_log(LOG_ERR, cfil_info, "CFIL: Handled previously delayed socket for TCP time wait");
4500	}
4501	}
4502
4503	CFIL_INFO_VERIFY(cfil_info);
4504
4505	return error;
4506	}
4507
4508	static int
4509	cfil_service_pending_queue(struct socket so, struct* cfil_info cfil_info, uint32_t kcunit, int* outgoing)
4510	{
4511	uint64_t passlen, curlen;
4512	mbuf_t data;
4513	unsigned int datalen;
4514	errno_t error = `0`;
4515	struct cfil_entry *entry;
4516	struct cfe_buf *entrybuf;
4517	struct cfil_queue *pending_q;
4518	struct cfil_entry *iter_entry = NULL;
4519
4520	CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4521	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4522
4523	socket_lock_assert_owned(so);
4524
4525	entry = &cfil_info->cfi_entries[kcunit - `1`];
4526	if (outgoing) {
4527	entrybuf = &entry->cfe_snd;
4528	} else {
4529	entrybuf = &entry->cfe_rcv;
4530	}
4531
4532	pending_q = &entrybuf->cfe_pending_q;
4533
4534	passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
4535
4536	if (cfil_queue_empty(cfq: pending_q)) {
4537	for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4538	iter_entry != NULL;
4539	iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4540	error = cfil_data_service_ctl_q(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing);
4541	/ 0 means passed so we can continue /
4542	if (error != `0`) {
4543	break;
4544	}
4545	}
4546	goto done;
4547	}
4548
4549	/*
4550	* Locate the chunks of data that we can pass to the next filter
4551	* A data chunk must be on mbuf boundaries
4552	*/
4553	curlen = `0`;
4554	while ((data = cfil_queue_first(cfq: pending_q)) != NULL) {
4555	datalen = cfil_data_length(m: data, NULL, NULL);
4556
4557	if (cfil_info->cfi_debug && cfil_log_data) {
4558	CFIL_LOG(LOG_ERR,
4559	"CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
4560	(uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
4561	passlen, curlen);
4562	}
4563
4564	if (curlen + datalen > passlen) {
4565	break;
4566	}
4567
4568	cfil_queue_remove(cfq: pending_q, m: data, len: datalen);
4569
4570	curlen += datalen;
4571
4572	for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4573	iter_entry != NULL;
4574	iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4575	error = cfil_data_filter(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing,
4576	data, datalen);
4577	/ 0 means passed so we can continue /
4578	if (error != `0`) {
4579	break;
4580	}
4581	}
4582	/ When data has passed all filters, re-inject /
4583	if (error == `0`) {
4584	if (outgoing) {
4585	cfil_queue_enqueue(
4586	cfq: &cfil_info->cfi_snd.cfi_inject_q,
4587	m: data, len: datalen);
4588	OSAddAtomic64(datalen,
4589	&cfil_stats.cfs_inject_q_out_enqueued);
4590	} else {
4591	cfil_queue_enqueue(
4592	cfq: &cfil_info->cfi_rcv.cfi_inject_q,
4593	m: data, len: datalen);
4594	OSAddAtomic64(datalen,
4595	&cfil_stats.cfs_inject_q_in_enqueued);
4596	}
4597	}
4598	}
4599
4600	done:
4601	CFIL_INFO_VERIFY(cfil_info);
4602
4603	return error;
4604	}
4605
4606	int
4607	cfil_update_data_offsets(struct socket so, struct* cfil_info cfil_info, uint32_t kcunit, int* outgoing,
4608	uint64_t pass_offset, uint64_t peek_offset)
4609	{
4610	errno_t error = `0`;
4611	struct cfil_entry *entry = NULL;
4612	struct cfe_buf *entrybuf;
4613	int updated = `0`;
4614
4615	CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
4616
4617	socket_lock_assert_owned(so);
4618
4619	if (cfil_info == NULL) {
4620	CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4621	(uint64_t)VM_KERNEL_ADDRPERM(so));
4622	error = `0`;
4623	goto done;
4624	} else if (cfil_info->cfi_flags & CFIF_DROP) {
4625	CFIL_LOG(LOG_ERR, "so %llx drop set",
4626	(uint64_t)VM_KERNEL_ADDRPERM(so));
4627	error = EPIPE;
4628	goto done;
4629	}
4630
4631	entry = &cfil_info->cfi_entries[kcunit - `1`];
4632	if (outgoing) {
4633	entrybuf = &entry->cfe_snd;
4634	} else {
4635	entrybuf = &entry->cfe_rcv;
4636	}
4637
4638	/ Record updated offsets for this content filter /
4639	if (pass_offset > entrybuf->cfe_pass_offset) {
4640	entrybuf->cfe_pass_offset = pass_offset;
4641
4642	if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4643	entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4644	}
4645	updated = `1`;
4646	} else {
4647	CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
4648	pass_offset, entrybuf->cfe_pass_offset);
4649	}
4650	/ Filter does not want or need to see data that's allowed to pass /
4651	if (peek_offset > entrybuf->cfe_pass_offset &&
4652	peek_offset > entrybuf->cfe_peek_offset) {
4653	entrybuf->cfe_peek_offset = peek_offset;
4654	updated = `1`;
4655	}
4656	/ Nothing to do /
4657	if (updated == `0`) {
4658	goto done;
4659	}
4660
4661	/ Move data held in control queue to pending queue if needed /
4662	error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4663	if (error != `0`) {
4664	CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4665	error);
4666	goto done;
4667	}
4668	error = EJUSTRETURN;
4669
4670	done:
4671	/*
4672	* The filter is effectively detached when pass all from both sides
4673	* or when the socket is closed and no more data is waiting
4674	* to be delivered to the filter
4675	*/
4676	if (entry != NULL &&
4677	((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
4678	entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) \|\|
4679	((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4680	cfil_queue_empty(cfq: &entry->cfe_snd.cfe_ctl_q) &&
4681	cfil_queue_empty(cfq: &entry->cfe_rcv.cfe_ctl_q)))) {
4682	entry->cfe_flags \|= CFEF_CFIL_DETACHED;
4683
4684	if (cfil_info->cfi_debug) {
4685	cfil_info_log(LOG_ERR, cfil_info, outgoing ?
4686	"CFIL: OUT - PASSED ALL - DETACH":
4687	"CFIL: IN - PASSED ALL - DETACH");
4688	}
4689
4690	CFIL_LOG(LOG_INFO, "so %llx detached %u",
4691	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4692	if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4693	cfil_filters_attached(so) == `0`) {
4694	if (cfil_info->cfi_debug) {
4695	cfil_info_log(LOG_ERR, cfil_info, "CFIL: WAKING");
4696	}
4697	CFIL_LOG(LOG_INFO, "so %llx waking",
4698	(uint64_t)VM_KERNEL_ADDRPERM(so));
4699	wakeup(chan: (caddr_t)cfil_info);
4700	}
4701	}
4702	CFIL_INFO_VERIFY(cfil_info);
4703	CFIL_LOG(LOG_INFO, "return %d", error);
4704	return error;
4705	}
4706
4707	/*
4708	* Update pass offset for socket when no data is pending
4709	*/
4710	static int
4711	cfil_set_socket_pass_offset(struct socket so, struct* cfil_info cfil_info, int* outgoing)
4712	{
4713	struct cfi_buf *cfi_buf;
4714	struct cfil_entry *entry;
4715	struct cfe_buf *entrybuf;
4716	uint32_t kcunit;
4717	uint64_t pass_offset = `0`;
4718	boolean_t first = true;
4719
4720	if (cfil_info == NULL) {
4721	return `0`;
4722	}
4723
4724	if (cfil_info->cfi_debug && cfil_log_data) {
4725	CFIL_LOG(LOG_ERR, "so %llx outgoing %d",
4726	(uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4727	}
4728
4729	socket_lock_assert_owned(so);
4730
4731	if (outgoing) {
4732	cfi_buf = &cfil_info->cfi_snd;
4733	} else {
4734	cfi_buf = &cfil_info->cfi_rcv;
4735	}
4736
4737	if (cfil_info->cfi_debug && cfil_log_data) {
4738	CFIL_LOG(LOG_ERR, "CFIL: <so %llx, sockID %llu <%llx>> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4739	(uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfil_info->cfi_sock_id, outgoing,
4740	cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
4741	}
4742
4743	if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == `0`) {
4744	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4745	entry = &cfil_info->cfi_entries[kcunit - `1`];
4746
4747	/ Are we attached to a filter? /
4748	if (entry->cfe_filter == NULL) {
4749	continue;
4750	}
4751
4752	if (outgoing) {
4753	entrybuf = &entry->cfe_snd;
4754	} else {
4755	entrybuf = &entry->cfe_rcv;
4756	}
4757
4758	// Keep track of the smallest pass_offset among filters.
4759	if (first == true \|\|
4760	entrybuf->cfe_pass_offset < pass_offset) {
4761	pass_offset = entrybuf->cfe_pass_offset;
4762	first = false;
4763	}
4764	}
4765	cfi_buf->cfi_pass_offset = pass_offset;
4766	}
4767
4768	if (cfil_info->cfi_debug && cfil_log_data) {
4769	CFIL_LOG(LOG_ERR, "CFIL: <so %llx, sockID %llu <%llx>>, cfi_pass_offset %llu",
4770	(uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
4771	}
4772
4773	return `0`;
4774	}
4775
4776	int
4777	cfil_action_data_pass(struct socket so, struct* cfil_info cfil_info, uint32_t kcunit, int* outgoing,
4778	uint64_t pass_offset, uint64_t peek_offset)
4779	{
4780	errno_t error = `0`;
4781
4782	CFIL_LOG(LOG_INFO, "");
4783
4784	socket_lock_assert_owned(so);
4785
4786	error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
4787	if (error != `0`) {
4788	CFIL_LOG(LOG_INFO, "so %llx %s dropped",
4789	(uint64_t)VM_KERNEL_ADDRPERM(so),
4790	outgoing ? "out" : "in");
4791	goto release;
4792	}
4793
4794	error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
4795	pass_offset, peek_offset);
4796
4797	cfil_service_inject_queue(so, cfil_info, outgoing);
4798
4799	cfil_set_socket_pass_offset(so, cfil_info, outgoing);
4800	release:
4801	CFIL_INFO_VERIFY(cfil_info);
4802	cfil_release_sockbuf(so, outgoing);
4803
4804	return error;
4805	}
4806
4807
4808	static void
4809	cfil_flush_queues(struct socket so, struct* cfil_info *cfil_info)
4810	{
4811	struct cfil_entry *entry;
4812	int kcunit;
4813	uint64_t drained;
4814
4815	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| cfil_info == NULL) {
4816	goto done;
4817	}
4818
4819	socket_lock_assert_owned(so);
4820
4821	/*
4822	* Flush the output queues and ignore errors as long as
4823	* we are attached
4824	*/
4825	(void) cfil_acquire_sockbuf(so, cfil_info, outgoing: `1`);
4826	if (cfil_info != NULL) {
4827	drained = `0`;
4828	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4829	entry = &cfil_info->cfi_entries[kcunit - `1`];
4830
4831	drained += cfil_queue_drain(cfq: &entry->cfe_snd.cfe_ctl_q);
4832	drained += cfil_queue_drain(cfq: &entry->cfe_snd.cfe_pending_q);
4833	}
4834	drained += cfil_queue_drain(cfq: &cfil_info->cfi_snd.cfi_inject_q);
4835
4836	if (drained) {
4837	if (cfil_info->cfi_flags & CFIF_DROP) {
4838	OSIncrementAtomic(
4839	&cfil_stats.cfs_flush_out_drop);
4840	} else {
4841	OSIncrementAtomic(
4842	&cfil_stats.cfs_flush_out_close);
4843	}
4844	}
4845	}
4846	cfil_release_sockbuf(so, outgoing: `1`);
4847
4848	/*
4849	* Flush the input queues
4850	*/
4851	(void) cfil_acquire_sockbuf(so, cfil_info, outgoing: `0`);
4852	if (cfil_info != NULL) {
4853	drained = `0`;
4854	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4855	entry = &cfil_info->cfi_entries[kcunit - `1`];
4856
4857	drained += cfil_queue_drain(
4858	cfq: &entry->cfe_rcv.cfe_ctl_q);
4859	drained += cfil_queue_drain(
4860	cfq: &entry->cfe_rcv.cfe_pending_q);
4861	}
4862	drained += cfil_queue_drain(cfq: &cfil_info->cfi_rcv.cfi_inject_q);
4863
4864	if (drained) {
4865	if (cfil_info->cfi_flags & CFIF_DROP) {
4866	OSIncrementAtomic(
4867	&cfil_stats.cfs_flush_in_drop);
4868	} else {
4869	OSIncrementAtomic(
4870	&cfil_stats.cfs_flush_in_close);
4871	}
4872	}
4873	}
4874	cfil_release_sockbuf(so, outgoing: `0`);
4875	done:
4876	CFIL_INFO_VERIFY(cfil_info);
4877	}
4878
4879	int
4880	cfil_action_drop(struct socket so, struct* cfil_info *cfil_info, uint32_t kcunit)
4881	{
4882	errno_t error = `0`;
4883	struct cfil_entry *entry;
4884	struct proc *p;
4885
4886	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| cfil_info == NULL) {
4887	goto done;
4888	}
4889
4890	socket_lock_assert_owned(so);
4891
4892	entry = &cfil_info->cfi_entries[kcunit - `1`];
4893
4894	/ Are we attached to the filter? /
4895	if (entry->cfe_filter == NULL) {
4896	goto done;
4897	}
4898
4899	cfil_info->cfi_flags \|= CFIF_DROP;
4900
4901	p = current_proc();
4902
4903	/*
4904	* Force the socket to be marked defunct
4905	* (forcing fixed along with rdar://19391339)
4906	*/
4907	if (so->so_flow_db == NULL) {
4908	error = sosetdefunct(p, so,
4909	SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER \| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
4910	FALSE);
4911
4912	/ Flush the socket buffer and disconnect /
4913	if (error == `0`) {
4914	error = sodefunct(p, so,
4915	SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER \| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
4916	}
4917	}
4918
4919	/ The filter is done, mark as detached /
4920	entry->cfe_flags \|= CFEF_CFIL_DETACHED;
4921
4922	if (cfil_info->cfi_debug) {
4923	cfil_info_log(LOG_ERR, cfil_info, "CFIL: DROP - DETACH");
4924	}
4925
4926	CFIL_LOG(LOG_INFO, "so %llx detached %u",
4927	(uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4928
4929	/ Pending data needs to go /
4930	cfil_flush_queues(so, cfil_info);
4931
4932	if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4933	if (cfil_filters_attached(so) == `0`) {
4934	CFIL_LOG(LOG_INFO, "so %llx waking",
4935	(uint64_t)VM_KERNEL_ADDRPERM(so));
4936	wakeup(chan: (caddr_t)cfil_info);
4937	}
4938	}
4939	done:
4940	return error;
4941	}
4942
4943	int
4944	cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4945	{
4946	errno_t error = `0`;
4947	struct cfil_info *cfil_info = NULL;
4948
4949	bool cfil_attached = false;
4950	struct cfil_msg_bless_client blessmsg = (struct* cfil_msg_bless_client *)msghdr;
4951
4952	// Search and lock socket
4953	struct socket *so = cfil_socket_from_client_uuid(necp_client_uuid: blessmsg->cfb_client_uuid, cfil_attached: &cfil_attached);
4954	if (so == NULL) {
4955	error = ENOENT;
4956	} else {
4957	// The client gets a pass automatically
4958	cfil_info = (so->so_flow_db != NULL) ?
4959	soflow_db_get_feature_context(so->so_flow_db, msghdr->cfm_sock_id) : so->so_cfil;
4960
4961	if (cfil_attached) {
4962	if (cfil_info != NULL && cfil_info->cfi_debug) {
4963	cfil_info_log(LOG_ERR, cfil_info, "CFIL: VERDICT RECEIVED: BLESS");
4964	}
4965	cfil_sock_received_verdict(so);
4966	(void)cfil_action_data_pass(so, cfil_info, kcunit, outgoing: `1`, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4967	(void)cfil_action_data_pass(so, cfil_info, kcunit, outgoing: `0`, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4968	} else {
4969	so->so_flags1 \|= SOF1_CONTENT_FILTER_SKIP;
4970	}
4971	socket_unlock(so, refcount: `1`);
4972	}
4973
4974	return error;
4975	}
4976
4977	int
4978	cfil_action_set_crypto_key(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4979	{
4980	struct content_filter *cfc = NULL;
4981	cfil_crypto_state_t crypto_state = NULL;
4982	struct cfil_msg_set_crypto_key keymsg = (struct* cfil_msg_set_crypto_key *)msghdr;
4983
4984	CFIL_LOG(LOG_NOTICE, "");
4985
4986	if (kcunit > MAX_CONTENT_FILTER) {
4987	CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
4988	kcunit, MAX_CONTENT_FILTER);
4989	return EINVAL;
4990	}
4991	crypto_state = cfil_crypto_init_client(client_key: (uint8_t *)keymsg->crypto_key);
4992	if (crypto_state == NULL) {
4993	CFIL_LOG(LOG_ERR, "failed to initialize crypto state for unit %u)",
4994	kcunit);
4995	return EINVAL;
4996	}
4997
4998	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
4999
5000	cfc = content_filters[kcunit - `1`];
5001	if (cfc->cf_kcunit != kcunit) {
5002	CFIL_LOG(LOG_ERR, "bad unit info %u)",
5003	kcunit);
5004	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
5005	cfil_crypto_cleanup_state(state: crypto_state);
5006	return EINVAL;
5007	}
5008	if (cfc->cf_crypto_state != NULL) {
5009	cfil_crypto_cleanup_state(state: cfc->cf_crypto_state);
5010	cfc->cf_crypto_state = NULL;
5011	}
5012	cfc->cf_crypto_state = crypto_state;
5013
5014	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
5015	return `0`;
5016	}
5017
5018	static int
5019	cfil_update_entry_offsets(struct socket so, struct* cfil_info cfil_info, int* outgoing, unsigned int datalen)
5020	{
5021	struct cfil_entry *entry;
5022	struct cfe_buf *entrybuf;
5023	uint32_t kcunit;
5024
5025	CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
5026	(uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
5027
5028	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5029	entry = &cfil_info->cfi_entries[kcunit - `1`];
5030
5031	/ Are we attached to the filter? /
5032	if (entry->cfe_filter == NULL) {
5033	continue;
5034	}
5035
5036	if (outgoing) {
5037	entrybuf = &entry->cfe_snd;
5038	} else {
5039	entrybuf = &entry->cfe_rcv;
5040	}
5041
5042	entrybuf->cfe_ctl_q.q_start += datalen;
5043	if (entrybuf->cfe_pass_offset < entrybuf->cfe_ctl_q.q_start) {
5044	entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
5045	}
5046	entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
5047	if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
5048	entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
5049	}
5050
5051	entrybuf->cfe_ctl_q.q_end += datalen;
5052
5053	entrybuf->cfe_pending_q.q_start += datalen;
5054	entrybuf->cfe_pending_q.q_end += datalen;
5055	}
5056	CFIL_INFO_VERIFY(cfil_info);
5057	return `0`;
5058	}
5059
5060	int
5061	cfil_data_common(struct socket so, struct* cfil_info cfil_info, int* outgoing, struct sockaddr *to,
5062	struct mbuf data, struct* mbuf *control, uint32_t flags)
5063	{
5064	#pragma unused(to, control, flags)
5065	errno_t error = `0`;
5066	unsigned int datalen;
5067	int mbcnt = `0`;
5068	int mbnum = `0`;
5069	int kcunit;
5070	struct cfi_buf *cfi_buf;
5071	struct mbuf *chain = NULL;
5072
5073	if (cfil_info == NULL) {
5074	CFIL_LOG(LOG_ERR, "so %llx cfil detached",
5075	(uint64_t)VM_KERNEL_ADDRPERM(so));
5076	error = `0`;
5077	goto done;
5078	} else if (cfil_info->cfi_flags & CFIF_DROP) {
5079	CFIL_LOG(LOG_ERR, "so %llx drop set",
5080	(uint64_t)VM_KERNEL_ADDRPERM(so));
5081	error = EPIPE;
5082	goto done;
5083	}
5084
5085	datalen = cfil_data_length(m: data, retmbcnt: &mbcnt, retmbnum: &mbnum);
5086
5087	if (datalen == `0`) {
5088	error = `0`;
5089	goto done;
5090	}
5091
5092	if (outgoing) {
5093	cfi_buf = &cfil_info->cfi_snd;
5094	cfil_info->cfi_byte_outbound_count += datalen;
5095	} else {
5096	cfi_buf = &cfil_info->cfi_rcv;
5097	cfil_info->cfi_byte_inbound_count += datalen;
5098	}
5099
5100	cfi_buf->cfi_pending_last += datalen;
5101	cfi_buf->cfi_pending_mbcnt += mbcnt;
5102	cfi_buf->cfi_pending_mbnum += mbnum;
5103
5104	if (NEED_DGRAM_FLOW_TRACKING(so)) {
5105	if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max \|\|
5106	cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
5107	cfi_buf->cfi_tail_drop_cnt++;
5108	cfi_buf->cfi_pending_mbcnt -= mbcnt;
5109	cfi_buf->cfi_pending_mbnum -= mbnum;
5110	return EPIPE;
5111	}
5112	}
5113
5114	cfil_info_buf_verify(cfi_buf);
5115
5116	if (cfil_info->cfi_debug && cfil_log_data) {
5117	CFIL_LOG(LOG_ERR, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u cfi_pass_offset %llu",
5118	(uint64_t)VM_KERNEL_ADDRPERM(so),
5119	outgoing ? "OUT" : "IN",
5120	(uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
5121	(uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
5122	cfi_buf->cfi_pending_last,
5123	cfi_buf->cfi_pending_mbcnt,
5124	cfi_buf->cfi_pass_offset);
5125	}
5126
5127	/ Fast path when below pass offset /
5128	if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
5129	cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
5130	if (cfil_info->cfi_debug && cfil_log_data) {
5131	CFIL_LOG(LOG_ERR, "CFIL: QUEUEING DATA: FAST PATH");
5132	}
5133	} else {
5134	struct cfil_entry *iter_entry;
5135	SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
5136	// Is cfil attached to this filter?
5137	kcunit = CFI_ENTRY_KCUNIT(cfil_info, iter_entry);
5138	if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
5139	if (NEED_DGRAM_FLOW_TRACKING(so) && chain == NULL) {
5140	/ Datagrams only:*
5141	* Chain addr (incoming only TDB), control (optional) and data into one chain.
5142	* This full chain will be reinjected into socket after recieving verdict.
5143	*/
5144	(void) cfil_dgram_save_socket_state(cfil_info, data);
5145	chain = sbconcat_mbufs(NULL, asa: outgoing ? NULL : to, m0: data, control);
5146	if (chain == NULL) {
5147	return ENOBUFS;
5148	}
5149	data = chain;
5150	}
5151	error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
5152	datalen);
5153	}
5154	/ 0 means passed so continue with next filter /
5155	if (error != `0`) {
5156	break;
5157	}
5158	}
5159	}
5160
5161	/ Move cursor if no filter claimed the data /
5162	if (error == `0`) {
5163	cfi_buf->cfi_pending_first += datalen;
5164	cfi_buf->cfi_pending_mbcnt -= mbcnt;
5165	cfi_buf->cfi_pending_mbnum -= mbnum;
5166	cfil_info_buf_verify(cfi_buf);
5167	}
5168	done:
5169	CFIL_INFO_VERIFY(cfil_info);
5170
5171	return error;
5172	}
5173
5174	/*
5175	* Callback from socket layer sosendxxx()
5176	*/
5177	int
5178	cfil_sock_data_out(struct socket so, struct* sockaddr *to,
5179	struct mbuf data, struct* mbuf control, uint32_t flags, struct* soflow_hash_entry *flow_entry)
5180	{
5181	int error = `0`;
5182	int new_filter_control_unit = `0`;
5183
5184	if (NEED_DGRAM_FLOW_TRACKING(so)) {
5185	return cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags, flow_entry);
5186	}
5187
5188	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL) {
5189	/ Drop pre-existing TCP sockets if filter is enabled now /
5190	if (!DO_PRESERVE_CONNECTIONS && cfil_active_count > `0` && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5191	new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5192	if (new_filter_control_unit > `0`) {
5193	CFIL_LOG(LOG_NOTICE, "CFIL: TCP(OUT) <so %llx> - filter state changed - dropped pre-existing flow", (uint64_t)VM_KERNEL_ADDRPERM(so));
5194	return EPIPE;
5195	}
5196	}
5197	return `0`;
5198	}
5199
5200	/ Drop pre-existing TCP sockets when filter state changed /
5201	new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5202	if (new_filter_control_unit > `0` && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5203	if (DO_PRESERVE_CONNECTIONS) {
5204	so->so_cfil->cfi_filter_control_unit = new_filter_control_unit;
5205	} else {
5206	CFIL_LOG(LOG_NOTICE, "CFIL: TCP(OUT) <so %llx> - filter state changed - dropped pre-existing flow (old state 0x%x new state 0x%x)",
5207	(uint64_t)VM_KERNEL_ADDRPERM(so),
5208	so->so_cfil->cfi_filter_control_unit, new_filter_control_unit);
5209	return EPIPE;
5210	}
5211	}
5212
5213	/*
5214	* Pass initial data for TFO.
5215	*/
5216	if (IS_INITIAL_TFO_DATA(so)) {
5217	return `0`;
5218	}
5219
5220	socket_lock_assert_owned(so);
5221
5222	if (so->so_cfil->cfi_flags & CFIF_DROP) {
5223	CFIL_LOG(LOG_ERR, "so %llx drop set",
5224	(uint64_t)VM_KERNEL_ADDRPERM(so));
5225	return EPIPE;
5226	}
5227	if (control != NULL) {
5228	CFIL_LOG(LOG_ERR, "so %llx control",
5229	(uint64_t)VM_KERNEL_ADDRPERM(so));
5230	OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
5231	}
5232	if ((flags & MSG_OOB)) {
5233	CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
5234	(uint64_t)VM_KERNEL_ADDRPERM(so));
5235	OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
5236	}
5237	/*
5238	* Abort if socket is defunct.
5239	*/
5240	if (so->so_flags & SOF_DEFUNCT) {
5241	return EPIPE;
5242	}
5243	if ((so->so_snd.sb_flags & SB_LOCK) == `0`) {
5244	panic("so %p SB_LOCK not set", so);
5245	}
5246
5247	if (so->so_snd.sb_cfil_thread != NULL) {
5248	panic("%s sb_cfil_thread %p not NULL", __func__,
5249	so->so_snd.sb_cfil_thread);
5250	}
5251
5252	error = cfil_data_common(so, cfil_info: so->so_cfil, outgoing: `1`, to, data, control, flags);
5253
5254	return error;
5255	}
5256
5257	/*
5258	* Callback from socket layer sbappendxxx()
5259	*/
5260	int
5261	cfil_sock_data_in(struct socket so, struct* sockaddr *from,
5262	struct mbuf data, struct* mbuf control, uint32_t flags, struct* soflow_hash_entry *flow_entry)
5263	{
5264	int error = `0`;
5265	int new_filter_control_unit = `0`;
5266
5267	if (NEED_DGRAM_FLOW_TRACKING(so)) {
5268	return cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags, flow_entry);
5269	}
5270
5271	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL) {
5272	/ Drop pre-existing TCP sockets if filter is enabled now /
5273	if (!DO_PRESERVE_CONNECTIONS && cfil_active_count > `0` && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5274	new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5275	if (new_filter_control_unit > `0`) {
5276	CFIL_LOG(LOG_NOTICE, "CFIL: TCP(IN) <so %llx> - filter state changed - dropped pre-existing flow", (uint64_t)VM_KERNEL_ADDRPERM(so));
5277	return EPIPE;
5278	}
5279	}
5280	return `0`;
5281	}
5282
5283	/ Drop pre-existing TCP sockets when filter state changed /
5284	new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5285	if (new_filter_control_unit > `0` && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5286	if (DO_PRESERVE_CONNECTIONS) {
5287	so->so_cfil->cfi_filter_control_unit = new_filter_control_unit;
5288	} else {
5289	CFIL_LOG(LOG_NOTICE, "CFIL: TCP(IN) <so %llx> - filter state changed - dropped pre-existing flow (old state 0x%x new state 0x%x)",
5290	(uint64_t)VM_KERNEL_ADDRPERM(so),
5291	so->so_cfil->cfi_filter_control_unit, new_filter_control_unit);
5292	return EPIPE;
5293	}
5294	}
5295
5296	/*
5297	* Pass initial data for TFO.
5298	*/
5299	if (IS_INITIAL_TFO_DATA(so)) {
5300	return `0`;
5301	}
5302
5303	socket_lock_assert_owned(so);
5304
5305	if (so->so_cfil->cfi_flags & CFIF_DROP) {
5306	CFIL_LOG(LOG_ERR, "so %llx drop set",
5307	(uint64_t)VM_KERNEL_ADDRPERM(so));
5308	return EPIPE;
5309	}
5310	if (control != NULL) {
5311	CFIL_LOG(LOG_ERR, "so %llx control",
5312	(uint64_t)VM_KERNEL_ADDRPERM(so));
5313	OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
5314	}
5315	if (data->m_type == MT_OOBDATA) {
5316	CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
5317	(uint64_t)VM_KERNEL_ADDRPERM(so));
5318	OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
5319	}
5320	error = cfil_data_common(so, cfil_info: so->so_cfil, outgoing: `0`, to: from, data, control, flags);
5321
5322	return error;
5323	}
5324
5325	/*
5326	* Callback from socket layer soshutdownxxx()
5327	*
5328	* We may delay the shutdown write if there's outgoing data in process.
5329	*
5330	* There is no point in delaying the shutdown read because the process
5331	* indicated that it does not want to read anymore data.
5332	*/
5333	int
5334	cfil_sock_shutdown(struct socket so, int* *how)
5335	{
5336	int error = `0`;
5337
5338	if (NEED_DGRAM_FLOW_TRACKING(so)) {
5339	return cfil_sock_udp_shutdown(so, how);
5340	}
5341
5342	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL) {
5343	goto done;
5344	}
5345
5346	socket_lock_assert_owned(so);
5347
5348	CFIL_LOG(LOG_INFO, "so %llx how %d",
5349	(uint64_t)VM_KERNEL_ADDRPERM(so), *how);
5350
5351	/*
5352	* Check the state of the socket before the content filter
5353	*/
5354	if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != `0`) {
5355	/ read already shut down /
5356	error = ENOTCONN;
5357	goto done;
5358	}
5359	if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != `0`) {
5360	/ write already shut down /
5361	error = ENOTCONN;
5362	goto done;
5363	}
5364
5365	if ((so->so_cfil->cfi_flags & CFIF_DROP) != `0`) {
5366	CFIL_LOG(LOG_ERR, "so %llx drop set",
5367	(uint64_t)VM_KERNEL_ADDRPERM(so));
5368	goto done;
5369	}
5370
5371	/*
5372	* shutdown read: SHUT_RD or SHUT_RDWR
5373	*/
5374	if (*how != SHUT_WR) {
5375	if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
5376	error = ENOTCONN;
5377	goto done;
5378	}
5379	so->so_cfil->cfi_flags \|= CFIF_SHUT_RD;
5380	cfil_sock_notify_shutdown(so, SHUT_RD);
5381	}
5382	/*
5383	* shutdown write: SHUT_WR or SHUT_RDWR
5384	*/
5385	if (*how != SHUT_RD) {
5386	if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
5387	error = ENOTCONN;
5388	goto done;
5389	}
5390	so->so_cfil->cfi_flags \|= CFIF_SHUT_WR;
5391	cfil_sock_notify_shutdown(so, SHUT_WR);
5392	/*
5393	* When outgoing data is pending, we delay the shutdown at the
5394	* protocol level until the content filters give the final
5395	* verdict on the pending data.
5396	*/
5397	if (cfil_sock_data_pending(sb: &so->so_snd) != `0`) {
5398	/*
5399	* When shutting down the read and write sides at once
5400	* we can proceed to the final shutdown of the read
5401	* side. Otherwise, we just return.
5402	*/
5403	if (*how == SHUT_WR) {
5404	error = EJUSTRETURN;
5405	} else if (*how == SHUT_RDWR) {
5406	*how = SHUT_RD;
5407	}
5408	}
5409	}
5410	done:
5411	return error;
5412	}
5413
5414	/*
5415	* This is called when the socket is closed and there is no more
5416	* opportunity for filtering
5417	*/
5418	void
5419	cfil_sock_is_closed(struct socket *so)
5420	{
5421	errno_t error = `0`;
5422	int kcunit;
5423
5424	if (NEED_DGRAM_FLOW_TRACKING(so)) {
5425	cfil_sock_udp_is_closed(so);
5426	return;
5427	}
5428
5429	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL) {
5430	return;
5431	}
5432
5433	CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5434
5435	socket_lock_assert_owned(so);
5436
5437	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5438	/ Let the filters know of the closing /
5439	error = cfil_dispatch_closed_event(so, cfil_info: so->so_cfil, kcunit);
5440	}
5441
5442	/ Last chance to push passed data out /
5443	error = cfil_acquire_sockbuf(so, cfil_info: so->so_cfil, outgoing: `1`);
5444	if (error == `0`) {
5445	cfil_service_inject_queue(so, cfil_info: so->so_cfil, outgoing: `1`);
5446	}
5447	cfil_release_sockbuf(so, outgoing: `1`);
5448
5449	if (so->so_cfil != NULL) {
5450	so->so_cfil->cfi_flags \|= CFIF_SOCK_CLOSED;
5451	}
5452
5453	/ Pending data needs to go /
5454	cfil_flush_queues(so, cfil_info: so->so_cfil);
5455
5456	CFIL_INFO_VERIFY(so->so_cfil);
5457	}
5458
5459	/*
5460	* This is called when the socket is disconnected so let the filters
5461	* know about the disconnection and that no more data will come
5462	*
5463	* The how parameter has the same values as soshutown()
5464	*/
5465	void
5466	cfil_sock_notify_shutdown(struct socket so, int* how)
5467	{
5468	errno_t error = `0`;
5469	int kcunit;
5470
5471	if (NEED_DGRAM_FLOW_TRACKING(so)) {
5472	cfil_sock_udp_notify_shutdown(so, how, `0`, `0`);
5473	return;
5474	}
5475
5476	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL) {
5477	return;
5478	}
5479
5480	CFIL_LOG(LOG_INFO, "so %llx how %d",
5481	(uint64_t)VM_KERNEL_ADDRPERM(so), how);
5482
5483	socket_lock_assert_owned(so);
5484
5485	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5486	/ Disconnect incoming side /
5487	if (how != SHUT_WR) {
5488	error = cfil_dispatch_disconnect_event(so, cfil_info: so->so_cfil, kcunit, outgoing: `0`);
5489	}
5490	/ Disconnect outgoing side /
5491	if (how != SHUT_RD) {
5492	error = cfil_dispatch_disconnect_event(so, cfil_info: so->so_cfil, kcunit, outgoing: `1`);
5493	}
5494	}
5495	}
5496
5497	static int
5498	cfil_filters_attached(struct socket *so)
5499	{
5500	struct cfil_entry *entry;
5501	uint32_t kcunit;
5502	int attached = `0`;
5503
5504	if (NEED_DGRAM_FLOW_TRACKING(so)) {
5505	return cfil_filters_udp_attached(so, FALSE);
5506	}
5507
5508	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL) {
5509	return `0`;
5510	}
5511
5512	socket_lock_assert_owned(so);
5513
5514	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5515	entry = &so->so_cfil->cfi_entries[kcunit - `1`];
5516
5517	/ Are we attached to the filter? /
5518	if (entry->cfe_filter == NULL) {
5519	continue;
5520	}
5521	if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == `0`) {
5522	continue;
5523	}
5524	if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != `0`) {
5525	continue;
5526	}
5527	attached = `1`;
5528	break;
5529	}
5530
5531	return attached;
5532	}
5533
5534	/*
5535	* This is called when the socket is closed and we are waiting for
5536	* the filters to gives the final pass or drop
5537	*/
5538	void
5539	cfil_sock_close_wait(struct socket *so)
5540	{
5541	lck_mtx_t *mutex_held;
5542	struct timespec ts;
5543	int error;
5544
5545	if (NEED_DGRAM_FLOW_TRACKING(so)) {
5546	cfil_sock_udp_close_wait(so);
5547	return;
5548	}
5549
5550	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL) {
5551	return;
5552	}
5553
5554	// This flow does not need to wait for close ack from user-space
5555	if (IS_NO_CLOSE_WAIT(so->so_cfil)) {
5556	if (so->so_cfil->cfi_debug) {
5557	cfil_info_log(LOG_ERR, so->so_cfil, "CFIL: SKIP CLOSE WAIT");
5558	}
5559	return;
5560	}
5561
5562	CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5563
5564	if (so->so_proto->pr_getlock != NULL) {
5565	mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5566	} else {
5567	mutex_held = so->so_proto->pr_domain->dom_mtx;
5568	}
5569	LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5570
5571	while (cfil_filters_attached(so)) {
5572	/*
5573	* Notify the filters we are going away so they can detach
5574	*/
5575	cfil_sock_notify_shutdown(so, SHUT_RDWR);
5576
5577	/*
5578	* Make sure we need to wait after the filter are notified
5579	* of the disconnection
5580	*/
5581	if (cfil_filters_attached(so) == `0`) {
5582	break;
5583	}
5584
5585	CFIL_LOG(LOG_INFO, "so %llx waiting",
5586	(uint64_t)VM_KERNEL_ADDRPERM(so));
5587
5588	ts.tv_sec = cfil_close_wait_timeout / `1000`;
5589	ts.tv_nsec = (cfil_close_wait_timeout % `1000`) *
5590	NSEC_PER_USEC * `1000`;
5591
5592	OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5593	so->so_cfil->cfi_flags \|= CFIF_CLOSE_WAIT;
5594	error = msleep(chan: (caddr_t)so->so_cfil, mtx: mutex_held,
5595	PSOCK \| PCATCH, wmesg: "cfil_sock_close_wait", ts: &ts);
5596
5597	// Woke up from sleep, validate if cfil_info is still valid
5598	if (so->so_cfil == NULL) {
5599	// cfil_info is not valid, do not continue
5600	return;
5601	}
5602
5603	so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
5604
5605	CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
5606	(uint64_t)VM_KERNEL_ADDRPERM(so), (error != `0`));
5607
5608	/*
5609	* Force close in case of timeout
5610	*/
5611	if (error != `0`) {
5612	OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5613	break;
5614	}
5615	}
5616	}
5617
5618	/*
5619	* Returns the size of the data held by the content filter by using
5620	*/
5621	int32_t
5622	cfil_sock_data_pending(struct sockbuf *sb)
5623	{
5624	struct socket *so = sb->sb_so;
5625	uint64_t pending = `0`;
5626
5627	if (NEED_DGRAM_FLOW_TRACKING(so)) {
5628	return cfil_sock_udp_data_pending(sb, FALSE);
5629	}
5630
5631	if ((so->so_flags & SOF_CONTENT_FILTER) != `0` && so->so_cfil != NULL) {
5632	struct cfi_buf *cfi_buf;
5633
5634	socket_lock_assert_owned(so);
5635
5636	if ((sb->sb_flags & SB_RECV) == `0`) {
5637	cfi_buf = &so->so_cfil->cfi_snd;
5638	} else {
5639	cfi_buf = &so->so_cfil->cfi_rcv;
5640	}
5641
5642	pending = cfi_buf->cfi_pending_last -
5643	cfi_buf->cfi_pending_first;
5644
5645	/*
5646	* If we are limited by the "chars of mbufs used" roughly
5647	* adjust so we won't overcommit
5648	*/
5649	if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) {
5650	pending = cfi_buf->cfi_pending_mbcnt;
5651	}
5652	}
5653
5654	VERIFY(pending < INT32_MAX);
5655
5656	return (int32_t)(pending);
5657	}
5658
5659	/*
5660	* Return the socket buffer space used by data being held by content filters
5661	* so processes won't clog the socket buffer
5662	*/
5663	int32_t
5664	cfil_sock_data_space(struct sockbuf *sb)
5665	{
5666	struct socket *so = sb->sb_so;
5667	uint64_t pending = `0`;
5668
5669	if (NEED_DGRAM_FLOW_TRACKING(so)) {
5670	return cfil_sock_udp_data_pending(sb, TRUE);
5671	}
5672
5673	if ((so->so_flags & SOF_CONTENT_FILTER) != `0` && so->so_cfil != NULL &&
5674	so->so_snd.sb_cfil_thread != current_thread()) {
5675	struct cfi_buf *cfi_buf;
5676
5677	socket_lock_assert_owned(so);
5678
5679	if ((sb->sb_flags & SB_RECV) == `0`) {
5680	cfi_buf = &so->so_cfil->cfi_snd;
5681	} else {
5682	cfi_buf = &so->so_cfil->cfi_rcv;
5683	}
5684
5685	pending = cfi_buf->cfi_pending_last -
5686	cfi_buf->cfi_pending_first;
5687
5688	/*
5689	* If we are limited by the "chars of mbufs used" roughly
5690	* adjust so we won't overcommit
5691	*/
5692	if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
5693	pending = cfi_buf->cfi_pending_mbcnt;
5694	}
5695
5696	VERIFY(pending < INT32_MAX);
5697	}
5698
5699	return (int32_t)(pending);
5700	}
5701
5702	/*
5703	* A callback from the socket and protocol layer when data becomes
5704	* available in the socket buffer to give a chance for the content filter
5705	* to re-inject data that was held back
5706	*/
5707	void
5708	cfil_sock_buf_update(struct sockbuf *sb)
5709	{
5710	int outgoing;
5711	int error;
5712	struct socket *so = sb->sb_so;
5713
5714	if (NEED_DGRAM_FLOW_TRACKING(so)) {
5715	cfil_sock_udp_buf_update(sb);
5716	return;
5717	}
5718
5719	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| so->so_cfil == NULL) {
5720	return;
5721	}
5722
5723	if (!cfil_sbtrim) {
5724	return;
5725	}
5726
5727	socket_lock_assert_owned(so);
5728
5729	if ((sb->sb_flags & SB_RECV) == `0`) {
5730	if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == `0`) {
5731	return;
5732	}
5733	outgoing = `1`;
5734	OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5735	} else {
5736	if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == `0`) {
5737	return;
5738	}
5739	outgoing = `0`;
5740	OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5741	}
5742
5743	CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5744	(uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5745
5746	error = cfil_acquire_sockbuf(so, cfil_info: so->so_cfil, outgoing);
5747	if (error == `0`) {
5748	cfil_service_inject_queue(so, cfil_info: so->so_cfil, outgoing);
5749	}
5750	cfil_release_sockbuf(so, outgoing);
5751	}
5752
5753	int
5754	sysctl_cfil_filter_list(struct sysctl_oid oidp, void* arg1, int* arg2,
5755	struct sysctl_req *req)
5756	{
5757	#pragma unused(oidp, arg1, arg2)
5758	int error = `0`;
5759	size_t len = `0`;
5760	u_int32_t i;
5761
5762	/ Read only /
5763	if (req->newptr != USER_ADDR_NULL) {
5764	return EPERM;
5765	}
5766
5767	cfil_rw_lock_shared(lck: &cfil_lck_rw);
5768
5769	for (i = `0`; i < MAX_CONTENT_FILTER; i++) {
5770	struct cfil_filter_stat filter_stat;
5771	struct content_filter *cfc = content_filters[i];
5772
5773	if (cfc == NULL) {
5774	continue;
5775	}
5776
5777	/ If just asking for the size /
5778	if (req->oldptr == USER_ADDR_NULL) {
5779	len += sizeof(struct cfil_filter_stat);
5780	continue;
5781	}
5782
5783	bzero(s: &filter_stat, n: sizeof(struct cfil_filter_stat));
5784	filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
5785	filter_stat.cfs_filter_id = cfc->cf_kcunit;
5786	filter_stat.cfs_flags = cfc->cf_flags;
5787	filter_stat.cfs_sock_count = cfc->cf_sock_count;
5788	filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
5789
5790	error = SYSCTL_OUT(req, &filter_stat,
5791	sizeof(struct cfil_filter_stat));
5792	if (error != `0`) {
5793	break;
5794	}
5795	}
5796	/ If just asking for the size /
5797	if (req->oldptr == USER_ADDR_NULL) {
5798	req->oldidx = len;
5799	}
5800
5801	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
5802
5803	if (cfil_log_level >= LOG_DEBUG) {
5804	if (req->oldptr != USER_ADDR_NULL) {
5805	for (i = `1`; i <= MAX_CONTENT_FILTER; i++) {
5806	cfil_filter_show(i);
5807	}
5808	}
5809	}
5810
5811	return error;
5812	}
5813
5814	static int
5815	sysctl_cfil_sock_list(struct sysctl_oid oidp, void* arg1, int* arg2,
5816	struct sysctl_req *req)
5817	{
5818	#pragma unused(oidp, arg1, arg2)
5819	int error = `0`;
5820	u_int32_t i;
5821	struct cfil_info *cfi;
5822
5823	/ Read only /
5824	if (req->newptr != USER_ADDR_NULL) {
5825	return EPERM;
5826	}
5827
5828	cfil_rw_lock_shared(lck: &cfil_lck_rw);
5829
5830	/*
5831	* If just asking for the size,
5832	*/
5833	if (req->oldptr == USER_ADDR_NULL) {
5834	req->oldidx = cfil_sock_attached_count *
5835	sizeof(struct cfil_sock_stat);
5836	/ Bump the length in case new sockets gets attached /
5837	req->oldidx += req->oldidx >> `3`;
5838	goto done;
5839	}
5840
5841	TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
5842	struct cfil_entry *entry;
5843	struct cfil_sock_stat stat;
5844	struct socket *so = cfi->cfi_so;
5845
5846	bzero(s: &stat, n: sizeof(struct cfil_sock_stat));
5847	stat.cfs_len = sizeof(struct cfil_sock_stat);
5848	stat.cfs_sock_id = cfi->cfi_sock_id;
5849	stat.cfs_flags = cfi->cfi_flags;
5850
5851	if (so != NULL && so->so_proto != NULL && so->so_proto->pr_domain != NULL) {
5852	stat.cfs_pid = so->last_pid;
5853	memcpy(dst: stat.cfs_uuid, src: so->last_uuid,
5854	n: sizeof(uuid_t));
5855	if (so->so_flags & SOF_DELEGATED) {
5856	stat.cfs_e_pid = so->e_pid;
5857	memcpy(dst: stat.cfs_e_uuid, src: so->e_uuid,
5858	n: sizeof(uuid_t));
5859	} else {
5860	stat.cfs_e_pid = so->last_pid;
5861	memcpy(dst: stat.cfs_e_uuid, src: so->last_uuid,
5862	n: sizeof(uuid_t));
5863	}
5864
5865	stat.cfs_sock_family = SOCK_DOM(so);
5866	stat.cfs_sock_type = SOCK_TYPE(so);
5867	stat.cfs_sock_protocol = GET_SO_PROTO(so);
5868	}
5869
5870	stat.cfs_snd.cbs_pending_first =
5871	cfi->cfi_snd.cfi_pending_first;
5872	stat.cfs_snd.cbs_pending_last =
5873	cfi->cfi_snd.cfi_pending_last;
5874	stat.cfs_snd.cbs_inject_q_len =
5875	cfil_queue_len(cfq: &cfi->cfi_snd.cfi_inject_q);
5876	stat.cfs_snd.cbs_pass_offset =
5877	cfi->cfi_snd.cfi_pass_offset;
5878
5879	stat.cfs_rcv.cbs_pending_first =
5880	cfi->cfi_rcv.cfi_pending_first;
5881	stat.cfs_rcv.cbs_pending_last =
5882	cfi->cfi_rcv.cfi_pending_last;
5883	stat.cfs_rcv.cbs_inject_q_len =
5884	cfil_queue_len(cfq: &cfi->cfi_rcv.cfi_inject_q);
5885	stat.cfs_rcv.cbs_pass_offset =
5886	cfi->cfi_rcv.cfi_pass_offset;
5887
5888	for (i = `0`; i < MAX_CONTENT_FILTER; i++) {
5889	struct cfil_entry_stat *estat;
5890	struct cfe_buf *ebuf;
5891	struct cfe_buf_stat *sbuf;
5892
5893	entry = &cfi->cfi_entries[i];
5894
5895	estat = &stat.ces_entries[i];
5896
5897	estat->ces_len = sizeof(struct cfil_entry_stat);
5898	estat->ces_filter_id = entry->cfe_filter ?
5899	entry->cfe_filter->cf_kcunit : `0`;
5900	estat->ces_flags = entry->cfe_flags;
5901	estat->ces_necp_control_unit =
5902	entry->cfe_necp_control_unit;
5903
5904	estat->ces_last_event.tv_sec =
5905	(int64_t)entry->cfe_last_event.tv_sec;
5906	estat->ces_last_event.tv_usec =
5907	(int64_t)entry->cfe_last_event.tv_usec;
5908
5909	estat->ces_last_action.tv_sec =
5910	(int64_t)entry->cfe_last_action.tv_sec;
5911	estat->ces_last_action.tv_usec =
5912	(int64_t)entry->cfe_last_action.tv_usec;
5913
5914	ebuf = &entry->cfe_snd;
5915	sbuf = &estat->ces_snd;
5916	sbuf->cbs_pending_first =
5917	cfil_queue_offset_first(cfq: &ebuf->cfe_pending_q);
5918	sbuf->cbs_pending_last =
5919	cfil_queue_offset_last(cfq: &ebuf->cfe_pending_q);
5920	sbuf->cbs_ctl_first =
5921	cfil_queue_offset_first(cfq: &ebuf->cfe_ctl_q);
5922	sbuf->cbs_ctl_last =
5923	cfil_queue_offset_last(cfq: &ebuf->cfe_ctl_q);
5924	sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
5925	sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
5926	sbuf->cbs_peeked = ebuf->cfe_peeked;
5927
5928	ebuf = &entry->cfe_rcv;
5929	sbuf = &estat->ces_rcv;
5930	sbuf->cbs_pending_first =
5931	cfil_queue_offset_first(cfq: &ebuf->cfe_pending_q);
5932	sbuf->cbs_pending_last =
5933	cfil_queue_offset_last(cfq: &ebuf->cfe_pending_q);
5934	sbuf->cbs_ctl_first =
5935	cfil_queue_offset_first(cfq: &ebuf->cfe_ctl_q);
5936	sbuf->cbs_ctl_last =
5937	cfil_queue_offset_last(cfq: &ebuf->cfe_ctl_q);
5938	sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
5939	sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
5940	sbuf->cbs_peeked = ebuf->cfe_peeked;
5941	}
5942	error = SYSCTL_OUT(req, &stat,
5943	sizeof(struct cfil_sock_stat));
5944	if (error != `0`) {
5945	break;
5946	}
5947	}
5948	done:
5949	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
5950
5951	if (cfil_log_level >= LOG_DEBUG) {
5952	if (req->oldptr != USER_ADDR_NULL) {
5953	cfil_info_show();
5954	}
5955	}
5956
5957	return error;
5958	}
5959
5960	/*
5961	* UDP Socket Support
5962	*/
5963	static void
5964	cfil_hash_entry_log(int level, struct socket so, struct* soflow_hash_entry entry, uint64_t sockId, const* char* msg)
5965	{
5966	char local[MAX_IPv6_STR_LEN + `6`];
5967	char remote[MAX_IPv6_STR_LEN + `6`];
5968	const void *addr;
5969
5970	// No sock or not UDP, no-op
5971	if (so == NULL \|\| entry == NULL) {
5972	return;
5973	}
5974
5975	local[`0`] = remote[`0`] = `0x0`;
5976
5977	switch (entry->soflow_family) {
5978	case AF_INET6:
5979	addr = &entry->soflow_laddr.addr6;
5980	inet_ntop(AF_INET6, addr, local, sizeof(local));
5981	addr = &entry->soflow_faddr.addr6;
5982	inet_ntop(AF_INET6, addr, remote, sizeof(local));
5983	break;
5984	case AF_INET:
5985	addr = &entry->soflow_laddr.addr46.ia46_addr4.s_addr;
5986	inet_ntop(AF_INET, addr, local, sizeof(local));
5987	addr = &entry->soflow_faddr.addr46.ia46_addr4.s_addr;
5988	inet_ntop(AF_INET, addr, remote, sizeof(local));
5989	break;
5990	default:
5991	return;
5992	}
5993
5994	CFIL_LOG(level, "<%s>: <%s(%d) so %llx cfil %p, entry %p, sockID %llu <%llx> feat_ctxt_id <%llu> lport %d fport %d laddr %s faddr %s hash %X",
5995	msg,
5996	IS_UDP(so) ? "UDP" : "proto", GET_SO_PROTO(so),
5997	(uint64_t)VM_KERNEL_ADDRPERM(so), entry->soflow_feat_ctxt, entry, sockId, sockId, entry->soflow_feat_ctxt_id,
5998	ntohs(entry->soflow_lport), ntohs(entry->soflow_fport), local, remote,
5999	entry->soflow_flowhash);
6000	}
6001
6002	static void
6003	cfil_inp_log(int level, struct socket so, const* char* msg)
6004	{
6005	struct inpcb *inp = NULL;
6006	char local[MAX_IPv6_STR_LEN + `6`];
6007	char remote[MAX_IPv6_STR_LEN + `6`];
6008	const void *addr;
6009
6010	if (so == NULL) {
6011	return;
6012	}
6013
6014	inp = sotoinpcb(so);
6015	if (inp == NULL) {
6016	return;
6017	}
6018
6019	local[`0`] = remote[`0`] = `0x0`;
6020
6021	if (inp->inp_vflag & INP_IPV6) {
6022	addr = &inp->in6p_laddr.s6_addr32;
6023	inet_ntop(AF_INET6, addr, local, sizeof(local));
6024	addr = &inp->in6p_faddr.s6_addr32;
6025	inet_ntop(AF_INET6, addr, remote, sizeof(local));
6026	} else {
6027	addr = &inp->inp_laddr.s_addr;
6028	inet_ntop(AF_INET, addr, local, sizeof(local));
6029	addr = &inp->inp_faddr.s_addr;
6030	inet_ntop(AF_INET, addr, remote, sizeof(local));
6031	}
6032
6033	if (so->so_cfil != NULL) {
6034	CFIL_LOG(level, "<%s>: <%s so %llx cfil %p - flags 0x%x 0x%x, sockID %llu <%llx>> lport %d fport %d laddr %s faddr %s",
6035	msg, IS_UDP(so) ? "UDP" : "TCP",
6036	(uint64_t)VM_KERNEL_ADDRPERM(so), so->so_cfil, inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id, so->so_cfil->cfi_sock_id,
6037	ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
6038	} else {
6039	CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
6040	msg, IS_UDP(so) ? "UDP" : "TCP",
6041	(uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
6042	ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
6043	}
6044	}
6045
6046	static void
6047	cfil_info_log(int level, struct cfil_info cfil_info, const* char* msg)
6048	{
6049	if (cfil_info == NULL) {
6050	return;
6051	}
6052
6053	if (cfil_info->cfi_hash_entry != NULL) {
6054	cfil_hash_entry_log(level, so: cfil_info->cfi_so, entry: cfil_info->cfi_hash_entry, sockId: cfil_info->cfi_sock_id, msg);
6055	} else {
6056	cfil_inp_log(level, so: cfil_info->cfi_so, msg);
6057	}
6058	}
6059
6060	static void
6061	cfil_sock_udp_unlink_flow(struct socket so, struct* soflow_hash_entry hash_entry, struct* cfil_info *cfil_info)
6062	{
6063	if (so == NULL \|\| hash_entry == NULL \|\| cfil_info == NULL) {
6064	return;
6065	}
6066
6067	if (so->so_flags & SOF_CONTENT_FILTER) {
6068	VERIFY(so->so_usecount > `0`);
6069	so->so_usecount--;
6070	}
6071
6072	// Hold exclusive lock before clearing cfil_info hash entry link
6073	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
6074
6075	cfil_info->cfi_hash_entry = NULL;
6076
6077	if (cfil_info->cfi_debug) {
6078	CFIL_LOG(LOG_ERR, "CFIL <%s>: <so %llx> - use count %d",
6079	IS_UDP(so) ? "UDP" : "TCP", (uint64_t)VM_KERNEL_ADDRPERM(so), so->so_usecount);
6080	}
6081
6082	cfil_rw_unlock_exclusive(lck: &cfil_lck_rw);
6083	}
6084
6085	bool
6086	check_port(struct sockaddr *addr, u_short port)
6087	{
6088	struct sockaddr_in *sin = NULL;
6089	struct sockaddr_in6 *sin6 = NULL;
6090
6091	if (addr == NULL \|\| port == `0`) {
6092	return FALSE;
6093	}
6094
6095	switch (addr->sa_family) {
6096	case AF_INET:
6097	sin = satosin(addr);
6098	if (sin->sin_len < sizeof(*sin)) {
6099	return FALSE;
6100	}
6101	if (port == ntohs(sin->sin_port)) {
6102	return TRUE;
6103	}
6104	break;
6105	case AF_INET6:
6106	sin6 = satosin6(addr);
6107	if (sin6->sin6_len < sizeof(*sin6)) {
6108	return FALSE;
6109	}
6110	if (port == ntohs(sin6->sin6_port)) {
6111	return TRUE;
6112	}
6113	break;
6114	default:
6115	break;
6116	}
6117	return FALSE;
6118	}
6119
6120	cfil_sock_id_t
6121	cfil_sock_id_from_datagram_socket(struct socket so, struct* sockaddr local, struct* sockaddr *remote)
6122	{
6123	socket_lock_assert_owned(so);
6124
6125	if (so->so_flow_db == NULL) {
6126	return CFIL_SOCK_ID_NONE;
6127	}
6128	return (cfil_sock_id_t)soflow_db_get_feature_context_id(so->so_flow_db, local, remote);
6129	}
6130
6131	static struct cfil_info *
6132	cfil_sock_udp_get_info(struct socket so, uint32_t filter_control_unit, bool outgoing, struct* soflow_hash_entry *hash_entry,
6133	struct sockaddr local, struct* sockaddr *remote)
6134	{
6135	int new_filter_control_unit = `0`;
6136	struct cfil_info *cfil_info = NULL;
6137
6138	errno_t error = `0`;
6139	socket_lock_assert_owned(so);
6140
6141	if (hash_entry == NULL \|\| hash_entry->soflow_db == NULL) {
6142	return NULL;
6143	}
6144
6145	if (hash_entry->soflow_feat_ctxt != NULL && hash_entry->soflow_feat_ctxt_id != `0`) {
6146	/ Drop pre-existing UDP flow if filter state changed /
6147	cfil_info = (struct cfil_info *) hash_entry->soflow_feat_ctxt;
6148	new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6149	if (new_filter_control_unit > `0` &&
6150	new_filter_control_unit != cfil_info->cfi_filter_control_unit) {
6151	if (DO_PRESERVE_CONNECTIONS) {
6152	cfil_info->cfi_filter_control_unit = new_filter_control_unit;
6153	} else {
6154	CFIL_LOG(LOG_NOTICE, "CFIL: UDP(%s) <so %llx> - filter state changed - dropped pre-existing flow (old state 0x%x new state 0x%x)",
6155	outgoing ? "OUT" : "IN", (uint64_t)VM_KERNEL_ADDRPERM(so),
6156	cfil_info->cfi_filter_control_unit, new_filter_control_unit);
6157	return NULL;
6158	}
6159	}
6160	return cfil_info;
6161	}
6162
6163	cfil_info = cfil_info_alloc(so, hash_entry);
6164	if (cfil_info == NULL) {
6165	CFIL_LOG(LOG_ERR, "CFIL: <so %llx> UDP failed to alloc cfil_info", (uint64_t)VM_KERNEL_ADDRPERM(so));
6166	OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6167	return NULL;
6168	}
6169	cfil_info->cfi_filter_control_unit = filter_control_unit;
6170	cfil_info->cfi_dir = outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN;
6171	cfil_info->cfi_debug = DEBUG_FLOW(sotoinpcb(so), so, local, remote);
6172	if (cfil_info->cfi_debug) {
6173	CFIL_LOG(LOG_ERR, "CFIL: <so %llx> UDP (outgoing %d) - debug flow with port %d", (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_log_port);
6174	CFIL_LOG(LOG_ERR, "CFIL: <so %llx> UDP so_gencnt %llx entry flowhash %x cfil %p sockID %llu <%llx>",
6175	(uint64_t)VM_KERNEL_ADDRPERM(so), so->so_gencnt, hash_entry->soflow_flowhash, cfil_info, cfil_info->cfi_sock_id, cfil_info->cfi_sock_id);
6176	}
6177
6178	if (cfil_info_attach_unit(so, filter_control_unit, cfil_info) == `0`) {
6179	CFIL_INFO_FREE(cfil_info);
6180	CFIL_LOG(LOG_ERR, "CFIL: <so %llx> UDP cfil_info_attach_unit(%u) failed",
6181	(uint64_t)VM_KERNEL_ADDRPERM(so), filter_control_unit);
6182	OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
6183	return NULL;
6184	}
6185
6186	if (cfil_info->cfi_debug) {
6187	CFIL_LOG(LOG_ERR, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu <%llx> attached",
6188	(uint64_t)VM_KERNEL_ADDRPERM(so),
6189	filter_control_unit, cfil_info->cfi_sock_id, cfil_info->cfi_sock_id);
6190	}
6191
6192	so->so_flags \|= SOF_CONTENT_FILTER;
6193	OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
6194
6195	/ Hold a reference on the socket for each flow /
6196	so->so_usecount++;
6197
6198	/ link cfil_info to flow /
6199	hash_entry->soflow_feat_ctxt = cfil_info;
6200	hash_entry->soflow_feat_ctxt_id = cfil_info->cfi_sock_id;
6201
6202	if (cfil_info->cfi_debug) {
6203	cfil_info_log(LOG_ERR, cfil_info, msg: "CFIL: ADDED");
6204	}
6205
6206	error = cfil_dispatch_attach_event(so, cfil_info, kcunit: `0`,
6207	conn_dir: outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
6208	/ We can recover from flow control or out of memory errors /
6209	if (error != `0` && error != ENOBUFS && error != ENOMEM) {
6210	CFIL_LOG(LOG_ERR, "CFIL: UDP <so %llx> cfil_dispatch_attach_event failed <error %d>",
6211	(uint64_t)VM_KERNEL_ADDRPERM(so), error);
6212	return NULL;
6213	}
6214
6215	CFIL_INFO_VERIFY(cfil_info);
6216	return cfil_info;
6217	}
6218
6219	errno_t
6220	cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
6221	struct sockaddr local, struct* sockaddr *remote,
6222	struct mbuf data, struct* mbuf *control, uint32_t flags,
6223	struct soflow_hash_entry *hash_entry)
6224	{
6225	#pragma unused(outgoing, so, local, remote, data, control, flags)
6226	errno_t error = `0`;
6227	uint32_t filter_control_unit;
6228	struct cfil_info *cfil_info = NULL;
6229
6230	socket_lock_assert_owned(so);
6231
6232	if (cfil_active_count == `0`) {
6233	CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
6234	OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
6235	return error;
6236	}
6237
6238	// Socket has been blessed
6239	if ((so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != `0`) {
6240	return error;
6241	}
6242
6243	filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6244	if (filter_control_unit == `0`) {
6245	CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
6246	return error;
6247	}
6248
6249	if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
6250	return error;
6251	}
6252
6253	if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != `0`) {
6254	CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
6255	OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
6256	return error;
6257	}
6258
6259	if (hash_entry == NULL) {
6260	CFIL_LOG(LOG_ERR, "CFIL: <so %llx> NULL soflow_hash_entry", (uint64_t)VM_KERNEL_ADDRPERM(so));
6261	return EPIPE;
6262	}
6263
6264	if (hash_entry->soflow_db == NULL) {
6265	CFIL_LOG(LOG_ERR, "CFIL: <so %llx> NULL soflow_hash_entry db", (uint64_t)VM_KERNEL_ADDRPERM(so));
6266	return EPIPE;
6267	}
6268
6269	cfil_info = cfil_sock_udp_get_info(so, filter_control_unit, outgoing, hash_entry, local, remote);
6270	if (cfil_info == NULL) {
6271	return EPIPE;
6272	}
6273	// Update last used timestamp, this is for flow Idle TO
6274
6275	if (cfil_info->cfi_debug) {
6276	cfil_info_log(LOG_ERR, cfil_info, msg: "CFIL: Got flow");
6277	}
6278
6279	if (cfil_info->cfi_flags & CFIF_DROP) {
6280	if (cfil_info->cfi_debug) {
6281	cfil_info_log(LOG_ERR, cfil_info, msg: "CFIL: UDP DROP");
6282	}
6283	return EPIPE;
6284	}
6285	if (control != NULL) {
6286	OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
6287	}
6288	if (data->m_type == MT_OOBDATA) {
6289	CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
6290	(uint64_t)VM_KERNEL_ADDRPERM(so));
6291	OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
6292	}
6293
6294	error = cfil_data_common(so, cfil_info, outgoing, to: remote, data, control, flags);
6295
6296	return error;
6297	}
6298
6299	struct cfil_udp_attached_context {
6300	bool need_wait;
6301	lck_mtx_t *mutex_held;
6302	int attached;
6303	};
6304
6305	static bool
6306	cfil_filters_udp_attached_per_flow(struct socket *so,
6307	struct soflow_hash_entry *hash_entry,
6308	void *context)
6309	{
6310	struct cfil_udp_attached_context *apply_context = NULL;
6311	struct cfil_info *cfil_info = NULL;
6312	struct cfil_entry *entry = NULL;
6313	uint64_t sock_flow_id = `0`;
6314	struct timespec ts;
6315	errno_t error = `0`;
6316	int kcunit;
6317
6318	if (hash_entry->soflow_feat_ctxt == NULL \|\| context == NULL) {
6319	return true;
6320	}
6321
6322	cfil_info = hash_entry->soflow_feat_ctxt;
6323	apply_context = (struct cfil_udp_attached_context *)context;
6324
6325	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6326	entry = &cfil_info->cfi_entries[kcunit - `1`];
6327
6328	/ Are we attached to the filter? /
6329	if (entry->cfe_filter == NULL) {
6330	continue;
6331	}
6332
6333	if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == `0`) {
6334	continue;
6335	}
6336	if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != `0`) {
6337	continue;
6338	}
6339
6340	if (apply_context->need_wait == TRUE) {
6341	if (cfil_info->cfi_debug) {
6342	cfil_info_log(LOG_ERR, cfil_info, msg: "CFIL: UDP PER-FLOW WAIT FOR FLOW TO FINISH");
6343	}
6344
6345	ts.tv_sec = cfil_close_wait_timeout / `1000`;
6346	ts.tv_nsec = (cfil_close_wait_timeout % `1000`) * NSEC_PER_USEC * `1000`;
6347
6348	OSIncrementAtomic(&cfil_stats.cfs_close_wait);
6349	cfil_info->cfi_flags \|= CFIF_CLOSE_WAIT;
6350	sock_flow_id = cfil_info->cfi_sock_id;
6351
6352	error = msleep(chan: (caddr_t)cfil_info, mtx: apply_context->mutex_held,
6353	PSOCK \| PCATCH, wmesg: "cfil_filters_udp_attached_per_flow", ts: &ts);
6354
6355	// Woke up from sleep, validate if cfil_info is still valid
6356	if (so->so_flow_db == NULL \|\|
6357	(cfil_info != soflow_db_get_feature_context(so->so_flow_db, sock_flow_id))) {
6358	// cfil_info is not valid, do not continue
6359	return false;
6360	}
6361
6362	cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
6363
6364	if (cfil_info->cfi_debug) {
6365	cfil_info_log(LOG_ERR, cfil_info, msg: "CFIL: UDP PER-FLOW WAIT FOR FLOW DONE");
6366	}
6367
6368	/*
6369	* Force close in case of timeout
6370	*/
6371	if (error != `0`) {
6372	OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
6373
6374	if (cfil_info->cfi_debug) {
6375	cfil_info_log(LOG_ERR, cfil_info, msg: "CFIL: UDP PER-FLOW WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6376	}
6377
6378	entry->cfe_flags \|= CFEF_CFIL_DETACHED;
6379	return false;
6380	}
6381	}
6382	apply_context->attached = `1`;
6383	return false;
6384	}
6385	return true;
6386	}
6387
6388	/*
6389	* Go through all UDP flows for specified socket and returns TRUE if
6390	* any flow is still attached. If need_wait is TRUE, wait on first
6391	* attached flow.
6392	*/
6393	static int
6394	cfil_filters_udp_attached(struct socket *so, bool need_wait)
6395	{
6396	struct cfil_udp_attached_context apply_context = { `0` };
6397	lck_mtx_t *mutex_held;
6398
6399	socket_lock_assert_owned(so);
6400
6401	if ((so->so_flags & SOF_CONTENT_FILTER) != `0` && so->so_flow_db != NULL) {
6402	if (so->so_proto->pr_getlock != NULL) {
6403	mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
6404	} else {
6405	mutex_held = so->so_proto->pr_domain->dom_mtx;
6406	}
6407	LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
6408
6409	apply_context.need_wait = need_wait;
6410	apply_context.mutex_held = mutex_held;
6411	soflow_db_apply(so->so_flow_db, cfil_filters_udp_attached_per_flow, context: (void *)&apply_context);
6412	}
6413
6414	return apply_context.attached;
6415	}
6416
6417	struct cfil_udp_data_pending_context {
6418	struct sockbuf *sb;
6419	uint64_t total_pending;
6420	};
6421
6422	static bool
6423	cfil_sock_udp_data_pending_per_flow(struct socket *so,
6424	struct soflow_hash_entry *hash_entry,
6425	void *context)
6426	{
6427	#pragma unused(so)
6428	struct cfil_udp_data_pending_context *apply_context = NULL;
6429	struct cfil_info *cfil_info = NULL;
6430	struct cfi_buf *cfi_buf;
6431
6432	uint64_t pending = `0`;
6433
6434	if (hash_entry->soflow_feat_ctxt == NULL \|\| context == NULL) {
6435	return true;
6436	}
6437
6438	cfil_info = hash_entry->soflow_feat_ctxt;
6439	apply_context = (struct cfil_udp_data_pending_context *)context;
6440
6441	if (apply_context->sb == NULL) {
6442	return true;
6443	}
6444
6445	if ((apply_context->sb->sb_flags & SB_RECV) == `0`) {
6446	cfi_buf = &cfil_info->cfi_snd;
6447	} else {
6448	cfi_buf = &cfil_info->cfi_rcv;
6449	}
6450
6451	pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
6452	/*
6453	* If we are limited by the "chars of mbufs used" roughly
6454	* adjust so we won't overcommit
6455	*/
6456	if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
6457	pending = cfi_buf->cfi_pending_mbcnt;
6458	}
6459
6460	apply_context->total_pending += pending;
6461	return true;
6462	}
6463
6464	int32_t
6465	cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
6466	{
6467	struct cfil_udp_data_pending_context apply_context = { `0` };
6468	struct socket *so = sb->sb_so;
6469
6470	socket_lock_assert_owned(so);
6471
6472	if ((so->so_flags & SOF_CONTENT_FILTER) != `0` && so->so_flow_db != NULL &&
6473	(check_thread == FALSE \|\| so->so_snd.sb_cfil_thread != current_thread())) {
6474	apply_context.sb = sb;
6475	soflow_db_apply(so->so_flow_db, cfil_sock_udp_data_pending_per_flow, context: (void *)&apply_context);
6476
6477	VERIFY(apply_context.total_pending < INT32_MAX);
6478	}
6479
6480	return (int32_t)(apply_context.total_pending);
6481	}
6482
6483	struct cfil_udp_notify_shutdown_context {
6484	int how;
6485	int drop_flag;
6486	int shut_flag;
6487	int done_count;
6488	};
6489
6490	static bool
6491	cfil_sock_udp_notify_shutdown_per_flow(struct socket *so,
6492	struct soflow_hash_entry *hash_entry,
6493	void *context)
6494	{
6495	struct cfil_udp_notify_shutdown_context *apply_context = NULL;
6496	struct cfil_info *cfil_info = NULL;
6497	errno_t error = `0`;
6498	int kcunit;
6499
6500	if (hash_entry->soflow_feat_ctxt == NULL \|\| context == NULL) {
6501	return true;
6502	}
6503
6504	cfil_info = hash_entry->soflow_feat_ctxt;
6505	apply_context = (struct cfil_udp_notify_shutdown_context *)context;
6506
6507	// This flow is marked as DROP
6508	if (cfil_info->cfi_flags & apply_context->drop_flag) {
6509	apply_context->done_count++;
6510	return true;
6511	}
6512
6513	// This flow has been shut already, skip
6514	if (cfil_info->cfi_flags & apply_context->shut_flag) {
6515	return true;
6516	}
6517	// Mark flow as shut
6518	cfil_info->cfi_flags \|= apply_context->shut_flag;
6519	apply_context->done_count++;
6520
6521	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6522	/ Disconnect incoming side /
6523	if (apply_context->how != SHUT_WR) {
6524	error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, outgoing: `0`);
6525	}
6526	/ Disconnect outgoing side /
6527	if (apply_context->how != SHUT_RD) {
6528	error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, outgoing: `1`);
6529	}
6530	}
6531
6532	if (cfil_info->cfi_debug) {
6533	cfil_info_log(LOG_ERR, cfil_info, msg: "CFIL: UDP PER-FLOW NOTIFY_SHUTDOWN");
6534	}
6535
6536	return true;
6537	}
6538
6539	int
6540	cfil_sock_udp_notify_shutdown(struct socket so, int* how, int drop_flag, int shut_flag)
6541	{
6542	struct cfil_udp_notify_shutdown_context apply_context = { `0` };
6543	errno_t error = `0`;
6544
6545	socket_lock_assert_owned(so);
6546
6547	if ((so->so_flags & SOF_CONTENT_FILTER) != `0` && so->so_flow_db != NULL) {
6548	apply_context.how = how;
6549	apply_context.drop_flag = drop_flag;
6550	apply_context.shut_flag = shut_flag;
6551
6552	soflow_db_apply(so->so_flow_db, cfil_sock_udp_notify_shutdown_per_flow, context: (void *)&apply_context);
6553	}
6554
6555	if (apply_context.done_count == `0`) {
6556	error = ENOTCONN;
6557	}
6558	return error;
6559	}
6560
6561	int
6562	cfil_sock_udp_shutdown(struct socket so, int* *how)
6563	{
6564	int error = `0`;
6565
6566	if ((so->so_flags & SOF_CONTENT_FILTER) == `0` \|\| (so->so_flow_db == NULL)) {
6567	goto done;
6568	}
6569
6570	socket_lock_assert_owned(so);
6571
6572	CFIL_LOG(LOG_INFO, "so %llx how %d",
6573	(uint64_t)VM_KERNEL_ADDRPERM(so), *how);
6574
6575	/*
6576	* Check the state of the socket before the content filter
6577	*/
6578	if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != `0`) {
6579	/ read already shut down /
6580	error = ENOTCONN;
6581	goto done;
6582	}
6583	if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != `0`) {
6584	/ write already shut down /
6585	error = ENOTCONN;
6586	goto done;
6587	}
6588
6589	/*
6590	* shutdown read: SHUT_RD or SHUT_RDWR
6591	*/
6592	if (*how != SHUT_WR) {
6593	error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
6594	if (error != `0`) {
6595	goto done;
6596	}
6597	}
6598	/*
6599	* shutdown write: SHUT_WR or SHUT_RDWR
6600	*/
6601	if (*how != SHUT_RD) {
6602	error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
6603	if (error != `0`) {
6604	goto done;
6605	}
6606
6607	/*
6608	* When outgoing data is pending, we delay the shutdown at the
6609	* protocol level until the content filters give the final
6610	* verdict on the pending data.
6611	*/
6612	if (cfil_sock_data_pending(sb: &so->so_snd) != `0`) {
6613	/*
6614	* When shutting down the read and write sides at once
6615	* we can proceed to the final shutdown of the read
6616	* side. Otherwise, we just return.
6617	*/
6618	if (*how == SHUT_WR) {
6619	error = EJUSTRETURN;
6620	} else if (*how == SHUT_RDWR) {
6621	*how = SHUT_RD;
6622	}
6623	}
6624	}
6625	done:
6626	return error;
6627	}
6628
6629	void
6630	cfil_sock_udp_close_wait(struct socket *so)
6631	{
6632	socket_lock_assert_owned(so);
6633
6634	while (cfil_filters_udp_attached(so, FALSE)) {
6635	/*
6636	* Notify the filters we are going away so they can detach
6637	*/
6638	cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, drop_flag: `0`, shut_flag: `0`);
6639
6640	/*
6641	* Make sure we need to wait after the filter are notified
6642	* of the disconnection
6643	*/
6644	if (cfil_filters_udp_attached(so, TRUE) == `0`) {
6645	break;
6646	}
6647	}
6648	}
6649
6650	static bool
6651	cfil_sock_udp_is_closed_per_flow(struct socket *so,
6652	struct soflow_hash_entry *hash_entry,
6653	void *context)
6654	{
6655	#pragma unused(context)
6656	struct cfil_info *cfil_info = NULL;
6657	errno_t error = `0`;
6658	int kcunit;
6659
6660	if (hash_entry->soflow_feat_ctxt == NULL) {
6661	return true;
6662	}
6663
6664	cfil_info = hash_entry->soflow_feat_ctxt;
6665
6666	for (kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6667	/ Let the filters know of the closing /
6668	error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
6669	}
6670
6671	/ Last chance to push passed data out /
6672	error = cfil_acquire_sockbuf(so, cfil_info, outgoing: `1`);
6673	if (error == `0`) {
6674	cfil_service_inject_queue(so, cfil_info, outgoing: `1`);
6675	}
6676	cfil_release_sockbuf(so, outgoing: `1`);
6677
6678	cfil_info->cfi_flags \|= CFIF_SOCK_CLOSED;
6679
6680	/ Pending data needs to go /
6681	cfil_flush_queues(so, cfil_info);
6682
6683	CFIL_INFO_VERIFY(cfil_info);
6684
6685	if (cfil_info->cfi_debug) {
6686	cfil_info_log(LOG_ERR, cfil_info, msg: "CFIL: UDP PER-FLOW IS_CLOSED");
6687	}
6688
6689	return true;
6690	}
6691
6692	void
6693	cfil_sock_udp_is_closed(struct socket *so)
6694	{
6695	socket_lock_assert_owned(so);
6696
6697	if ((so->so_flags & SOF_CONTENT_FILTER) != `0` && so->so_flow_db != NULL) {
6698	soflow_db_apply(so->so_flow_db, cfil_sock_udp_is_closed_per_flow, NULL);
6699	}
6700	}
6701
6702	static bool
6703	cfil_sock_udp_buf_update_per_flow(struct socket *so,
6704	struct soflow_hash_entry *hash_entry,
6705	void *context)
6706	{
6707	struct cfil_info *cfil_info = NULL;
6708	struct sockbuf *sb = NULL;
6709	errno_t error = `0`;
6710	int outgoing;
6711
6712	if (hash_entry->soflow_feat_ctxt == NULL \|\| context == NULL) {
6713	return true;
6714	}
6715
6716	cfil_info = hash_entry->soflow_feat_ctxt;
6717	sb = (struct sockbuf *) context;
6718
6719	if ((sb->sb_flags & SB_RECV) == `0`) {
6720	if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == `0`) {
6721	return true;
6722	}
6723	outgoing = `1`;
6724	OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
6725	} else {
6726	if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == `0`) {
6727	return true;
6728	}
6729	outgoing = `0`;
6730	OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
6731	}
6732
6733	CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
6734	(uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
6735
6736	error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
6737	if (error == `0`) {
6738	cfil_service_inject_queue(so, cfil_info, outgoing);
6739	}
6740	cfil_release_sockbuf(so, outgoing);
6741	return true;
6742	}
6743
6744	void
6745	cfil_sock_udp_buf_update(struct sockbuf *sb)
6746	{
6747	struct socket *so = sb->sb_so;
6748
6749	socket_lock_assert_owned(so);
6750
6751	if ((so->so_flags & SOF_CONTENT_FILTER) != `0` && so->so_flow_db != NULL) {
6752	if (!cfil_sbtrim) {
6753	return;
6754	}
6755	soflow_db_apply(so->so_flow_db, cfil_sock_udp_buf_update_per_flow, context: (void *)sb);
6756	}
6757	}
6758
6759	void
6760	cfil_filter_show(u_int32_t kcunit)
6761	{
6762	struct content_filter *cfc = NULL;
6763	struct cfil_entry *entry;
6764	int count = `0`;
6765
6766	if (kcunit > MAX_CONTENT_FILTER) {
6767	return;
6768	}
6769
6770	cfil_rw_lock_shared(lck: &cfil_lck_rw);
6771
6772	if (content_filters[kcunit - `1`] == NULL) {
6773	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
6774	return;
6775	}
6776	cfc = content_filters[kcunit - `1`];
6777
6778	CFIL_LOG(LOG_DEBUG, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6779	kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
6780	if (cfc->cf_flags & CFF_DETACHING) {
6781	CFIL_LOG(LOG_DEBUG, "CFIL: FILTER SHOW:-DETACHING");
6782	}
6783	if (cfc->cf_flags & CFF_ACTIVE) {
6784	CFIL_LOG(LOG_DEBUG, "CFIL: FILTER SHOW:-ACTIVE");
6785	}
6786	if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6787	CFIL_LOG(LOG_DEBUG, "CFIL: FILTER SHOW:-FLOW CONTROLLED");
6788	}
6789
6790	TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
6791	if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
6792	struct cfil_info *cfil_info = entry->cfe_cfil_info;
6793
6794	count++;
6795
6796	if (entry->cfe_flags & CFEF_CFIL_DETACHED) {
6797	cfil_info_log(LOG_DEBUG, cfil_info, msg: "CFIL: FILTER SHOW:-DETACHED");
6798	} else {
6799	cfil_info_log(LOG_DEBUG, cfil_info, msg: "CFIL: FILTER SHOW:-ATTACHED");
6800	}
6801	}
6802	}
6803
6804	CFIL_LOG(LOG_DEBUG, "CFIL: FILTER SHOW:Filter - total entries shown: %d", count);
6805
6806	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
6807	}
6808
6809	void
6810	cfil_info_show(void)
6811	{
6812	struct cfil_info *cfil_info;
6813	int count = `0`;
6814
6815	cfil_rw_lock_shared(lck: &cfil_lck_rw);
6816
6817	CFIL_LOG(LOG_DEBUG, "CFIL: INFO SHOW:count %d", cfil_sock_attached_count);
6818
6819	TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6820	count++;
6821
6822	cfil_info_log(LOG_DEBUG, cfil_info, msg: "CFIL: INFO SHOW");
6823
6824	if (cfil_info->cfi_flags & CFIF_DROP) {
6825	CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - DROP");
6826	}
6827	if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT) {
6828	CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - CLOSE_WAIT");
6829	}
6830	if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED) {
6831	CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - SOCK_CLOSED");
6832	}
6833	if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) {
6834	CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - RETRY_INJECT_IN");
6835	}
6836	if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) {
6837	CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
6838	}
6839	if (cfil_info->cfi_flags & CFIF_SHUT_WR) {
6840	CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - SHUT_WR");
6841	}
6842	if (cfil_info->cfi_flags & CFIF_SHUT_RD) {
6843	CFIL_LOG(LOG_DEBUG, "CFIL: INFO FLAG - SHUT_RD");
6844	}
6845	}
6846
6847	CFIL_LOG(LOG_DEBUG, "CFIL: INFO SHOW:total cfil_info shown: %d", count);
6848
6849	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
6850	}
6851
6852	bool
6853	cfil_info_action_timed_out(struct cfil_info cfil_info, int* timeout)
6854	{
6855	struct cfil_entry *entry;
6856	struct timeval current_tv;
6857	struct timeval diff_time;
6858
6859	if (cfil_info == NULL) {
6860	return false;
6861	}
6862
6863	/*
6864	* If we have queued up more data than passed offset and we haven't received
6865	* an action from user space for a while (the user space filter might have crashed),
6866	* return action timed out.
6867	*/
6868	if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset \|\|
6869	cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
6870	microuptime(tv: &current_tv);
6871
6872	for (int kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6873	entry = &cfil_info->cfi_entries[kcunit - `1`];
6874
6875	if (entry->cfe_filter == NULL) {
6876	continue;
6877	}
6878
6879	if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset \|\|
6880	cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
6881	// haven't gotten an action from this filter, check timeout
6882	timersub(&current_tv, &entry->cfe_last_action, &diff_time);
6883	if (diff_time.tv_sec >= timeout) {
6884	if (cfil_info->cfi_debug) {
6885	cfil_info_log(LOG_ERR, cfil_info, msg: "CFIL: flow ACTION timeout expired");
6886	}
6887	return true;
6888	}
6889	}
6890	}
6891	}
6892	return false;
6893	}
6894
6895	bool
6896	cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
6897	{
6898	if (cfil_info == NULL) {
6899	return false;
6900	}
6901
6902	/*
6903	* Clean up flow if it exceeded queue thresholds
6904	*/
6905	if (cfil_info->cfi_snd.cfi_tail_drop_cnt \|\|
6906	cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
6907	if (cfil_info->cfi_debug) {
6908	CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded:mbuf max < count: %d bytes: %d > tail drop count < OUT: %d IN: %d > ",
6909	cfil_udp_gc_mbuf_num_max,
6910	cfil_udp_gc_mbuf_cnt_max,
6911	cfil_info->cfi_snd.cfi_tail_drop_cnt,
6912	cfil_info->cfi_rcv.cfi_tail_drop_cnt);
6913	cfil_info_log(LOG_ERR, cfil_info, msg: "CFIL: queue threshold exceeded");
6914	}
6915	return true;
6916	}
6917
6918	return false;
6919	}
6920
6921	static bool
6922	cfil_dgram_gc_needed(struct socket so, struct* soflow_hash_entry *hash_entry, u_int64_t current_time)
6923	{
6924	#pragma unused(current_time)
6925	struct cfil_info *cfil_info = NULL;
6926
6927	if (so == NULL \|\| hash_entry == NULL \|\| hash_entry->soflow_feat_ctxt == NULL) {
6928	return false;
6929	}
6930	cfil_info = (struct cfil_info *) hash_entry->soflow_feat_ctxt;
6931
6932	cfil_rw_lock_shared(lck: &cfil_lck_rw);
6933
6934	if (cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) \|\|
6935	cfil_info_buffer_threshold_exceeded(cfil_info)) {
6936	if (cfil_info->cfi_debug) {
6937	cfil_info_log(LOG_ERR, cfil_info, msg: "CFIL: UDP PER-FLOW GC NEEDED");
6938	}
6939	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
6940	return true;
6941	}
6942
6943	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
6944	return false;
6945	}
6946
6947	static bool
6948	cfil_dgram_gc_perform(struct socket so, struct* soflow_hash_entry *hash_entry)
6949	{
6950	struct cfil_info *cfil_info = NULL;
6951
6952	if (so == NULL \|\| hash_entry == NULL \|\| hash_entry->soflow_feat_ctxt == NULL) {
6953	return false;
6954	}
6955	cfil_info = (struct cfil_info *) hash_entry->soflow_feat_ctxt;
6956
6957	if (cfil_info->cfi_debug) {
6958	cfil_info_log(LOG_ERR, cfil_info, msg: "CFIL: UDP PER-FLOW GC PERFORM");
6959	}
6960
6961	for (int kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6962	/ Let the filters know of the closing /
6963	cfil_dispatch_closed_event(so, cfil_info, kcunit);
6964	}
6965	cfil_sock_udp_unlink_flow(so, hash_entry, cfil_info);
6966	CFIL_INFO_FREE(cfil_info);
6967	OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
6968	return true;
6969	}
6970
6971	static bool
6972	cfil_dgram_detach_entry(struct socket so, struct* soflow_hash_entry *hash_entry)
6973	{
6974	struct cfil_info *cfil_info = NULL;
6975
6976	if (hash_entry == NULL \|\| hash_entry->soflow_feat_ctxt == NULL) {
6977	return true;
6978	}
6979	cfil_info = (struct cfil_info *) hash_entry->soflow_feat_ctxt;
6980
6981	if (cfil_info->cfi_debug) {
6982	cfil_info_log(LOG_ERR, cfil_info, msg: "CFIL: DGRAM DETACH ENTRY");
6983	}
6984
6985	cfil_sock_udp_unlink_flow(so, hash_entry, cfil_info);
6986	CFIL_INFO_FREE(cfil_info);
6987	OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
6988
6989	return true;
6990	}
6991
6992	static bool
6993	cfil_dgram_detach_db(struct socket so, struct* soflow_db *db)
6994	{
6995	#pragma unused(db)
6996	if (so && so->so_flags & SOF_CONTENT_FILTER) {
6997	so->so_flags &= ~SOF_CONTENT_FILTER;
6998	CFIL_LOG(LOG_DEBUG, "CFIL: DGRAM DETACH DB <so %llx>", (uint64_t)VM_KERNEL_ADDRPERM(so));
6999	}
7000	return true;
7001	}
7002
7003	struct m_tag *
7004	cfil_dgram_save_socket_state(struct cfil_info cfil_info, struct* mbuf *m)
7005	{
7006	struct m_tag *tag = NULL;
7007	struct cfil_tag *ctag = NULL;
7008	struct soflow_hash_entry *hash_entry = NULL;
7009	struct inpcb *inp = NULL;
7010
7011	if (cfil_info == NULL \|\| cfil_info->cfi_so == NULL \|\|
7012	cfil_info->cfi_hash_entry == NULL \|\| m == NULL \|\| !(m->m_flags & M_PKTHDR)) {
7013	return NULL;
7014	}
7015
7016	inp = sotoinpcb(cfil_info->cfi_so);
7017
7018	/ Allocate a tag /
7019	tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
7020	sizeof(struct cfil_tag), M_DONTWAIT, m);
7021
7022	if (tag) {
7023	ctag = (struct cfil_tag *)(tag->m_tag_data);
7024	ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
7025	ctag->cfil_so_options = cfil_info->cfi_so->so_options;
7026	ctag->cfil_inp_flags = inp ? inp->inp_flags : `0`;
7027
7028	hash_entry = cfil_info->cfi_hash_entry;
7029	if (hash_entry->soflow_family == AF_INET6) {
7030	fill_ip6_sockaddr_4_6(sin46: &ctag->cfil_faddr,
7031	ip6: &hash_entry->soflow_faddr.addr6,
7032	port: hash_entry->soflow_fport, ifscope: hash_entry->soflow_faddr6_ifscope);
7033	} else if (hash_entry->soflow_family == AF_INET) {
7034	fill_ip_sockaddr_4_6(sin46: &ctag->cfil_faddr,
7035	ip: hash_entry->soflow_faddr.addr46.ia46_addr4,
7036	port: hash_entry->soflow_fport);
7037	}
7038	m_tag_prepend(m, tag);
7039	return tag;
7040	}
7041	return NULL;
7042	}
7043
7044	struct m_tag *
7045	cfil_dgram_get_socket_state(struct mbuf m, uint32_t state_change_cnt, uint32_t *options,
7046	struct sockaddr *faddr, int* *inp_flags)
7047	{
7048	struct m_tag *tag = NULL;
7049	struct cfil_tag *ctag = NULL;
7050
7051	tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP);
7052	if (tag) {
7053	ctag = (struct cfil_tag *)(tag->m_tag_data);
7054	if (state_change_cnt) {
7055	*state_change_cnt = ctag->cfil_so_state_change_cnt;
7056	}
7057	if (options) {
7058	*options = ctag->cfil_so_options;
7059	}
7060	if (faddr) {
7061	*faddr = SA(&ctag->cfil_faddr);
7062	}
7063	if (inp_flags) {
7064	*inp_flags = ctag->cfil_inp_flags;
7065	}
7066
7067	/*
7068	* Unlink tag and hand it over to caller.
7069	* Note that caller will be responsible to free it.
7070	*/
7071	m_tag_unlink(m, tag);
7072	return tag;
7073	}
7074	return NULL;
7075	}
7076
7077	boolean_t
7078	cfil_dgram_peek_socket_state(struct mbuf m, int* *inp_flags)
7079	{
7080	struct m_tag *tag = NULL;
7081	struct cfil_tag *ctag = NULL;
7082
7083	tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP);
7084	if (tag) {
7085	ctag = (struct cfil_tag *)(tag->m_tag_data);
7086	if (inp_flags) {
7087	*inp_flags = ctag->cfil_inp_flags;
7088	}
7089	return true;
7090	}
7091	return false;
7092	}
7093
7094	static int
7095	cfil_dispatch_stats_event_locked(int kcunit, struct cfil_stats_report_buffer *buffer, uint32_t stats_count)
7096	{
7097	struct content_filter *cfc = NULL;
7098	errno_t error = `0`;
7099	size_t msgsize = `0`;
7100
7101	if (buffer == NULL \|\| stats_count == `0`) {
7102	return error;
7103	}
7104
7105	if (kcunit > MAX_CONTENT_FILTER) {
7106	return error;
7107	}
7108
7109	cfc = content_filters[kcunit - `1`];
7110	if (cfc == NULL) {
7111	return error;
7112	}
7113
7114	/ Would be wasteful to try /
7115	if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
7116	error = ENOBUFS;
7117	goto done;
7118	}
7119
7120	msgsize = sizeof(struct cfil_msg_stats_report) + (sizeof(struct cfil_msg_sock_stats) * stats_count);
7121	buffer->msghdr.cfm_len = (uint32_t)msgsize;
7122	buffer->msghdr.cfm_version = `1`;
7123	buffer->msghdr.cfm_type = CFM_TYPE_EVENT;
7124	buffer->msghdr.cfm_op = CFM_OP_STATS;
7125	buffer->msghdr.cfm_sock_id = `0`;
7126	buffer->count = stats_count;
7127
7128	if (cfil_log_stats) {
7129	CFIL_LOG(LOG_DEBUG, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
7130	kcunit,
7131	(unsigned long)msgsize,
7132	(unsigned long)sizeof(struct cfil_msg_stats_report),
7133	(unsigned long)sizeof(struct cfil_msg_sock_stats),
7134	(unsigned long)stats_count);
7135	}
7136
7137	error = ctl_enqueuedata(kctlref: cfc->cf_kcref, unit: cfc->cf_kcunit,
7138	data: buffer,
7139	len: msgsize,
7140	CTL_DATA_EOR);
7141	if (error != `0`) {
7142	CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed:%d", error);
7143	goto done;
7144	}
7145	OSIncrementAtomic(&cfil_stats.cfs_stats_event_ok);
7146
7147	if (cfil_log_stats) {
7148	CFIL_LOG(LOG_DEBUG, "CFIL: STATS REPORT:send msg to %d", kcunit);
7149	}
7150	done:
7151
7152	if (error == ENOBUFS) {
7153	OSIncrementAtomic(
7154	&cfil_stats.cfs_stats_event_flow_control);
7155
7156	if (!cfil_rw_lock_shared_to_exclusive(lck: &cfil_lck_rw)) {
7157	cfil_rw_lock_exclusive(lck: &cfil_lck_rw);
7158	}
7159
7160	cfc->cf_flags \|= CFF_FLOW_CONTROLLED;
7161
7162	cfil_rw_lock_exclusive_to_shared(lck: &cfil_lck_rw);
7163	} else if (error != `0`) {
7164	OSIncrementAtomic(&cfil_stats.cfs_stats_event_fail);
7165	}
7166
7167	return error;
7168	}
7169
7170	static void
7171	cfil_stats_report_thread_sleep(bool forever)
7172	{
7173	if (cfil_log_stats) {
7174	CFIL_LOG(LOG_DEBUG, "CFIL: STATS COLLECTION SLEEP");
7175	}
7176
7177	if (forever) {
7178	(void) assert_wait(event: (event_t) &cfil_sock_attached_stats_count,
7179	THREAD_INTERRUPTIBLE);
7180	} else {
7181	uint64_t deadline = `0`;
7182	nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC, result: &deadline);
7183	clock_absolutetime_interval_to_deadline(abstime: deadline, result: &deadline);
7184
7185	(void) assert_wait_deadline(event: &cfil_sock_attached_stats_count,
7186	THREAD_INTERRUPTIBLE, deadline);
7187	}
7188	}
7189
7190	static void
7191	cfil_stats_report_thread_func(void *v, wait_result_t w)
7192	{
7193	#pragma unused(v, w)
7194
7195	ASSERT(cfil_stats_report_thread == current_thread());
7196	thread_set_thread_name(th: current_thread(), name: "CFIL_STATS_REPORT");
7197
7198	// Kick off gc shortly
7199	cfil_stats_report_thread_sleep(false);
7200	thread_block_parameter(continuation: (thread_continue_t) cfil_stats_report, NULL);
7201	/ NOTREACHED /
7202	}
7203
7204	static bool
7205	cfil_stats_collect_flow_stats_for_filter(int kcunit,
7206	struct cfil_info *cfil_info,
7207	struct cfil_entry *entry,
7208	struct timeval current_tv)
7209	{
7210	struct cfil_stats_report_buffer *buffer = NULL;
7211	struct cfil_msg_sock_stats *flow_array = NULL;
7212	struct cfil_msg_sock_stats *stats = NULL;
7213	struct inpcb *inp = NULL;
7214	struct timeval diff_time;
7215	uint64_t diff_time_usecs;
7216	int index = `0`;
7217
7218	if (entry->cfe_stats_report_frequency == `0`) {
7219	return false;
7220	}
7221
7222	buffer = global_cfil_stats_report_buffers[kcunit - `1`];
7223	if (buffer == NULL) {
7224	CFIL_LOG(LOG_ERR, "CFIL: STATS: no buffer");
7225	return false;
7226	}
7227
7228	timersub(&current_tv, &entry->cfe_stats_report_ts, &diff_time);
7229	diff_time_usecs = (diff_time.tv_sec * USEC_PER_SEC) + diff_time.tv_usec;
7230
7231	if (cfil_info->cfi_debug && cfil_log_stats) {
7232	CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu(usecs %llu) @freq %llu usecs sockID %llu <%llx>",
7233	(unsigned long long)entry->cfe_stats_report_ts.tv_sec,
7234	(unsigned long long)entry->cfe_stats_report_ts.tv_usec,
7235	(unsigned long long)current_tv.tv_sec,
7236	(unsigned long long)current_tv.tv_usec,
7237	(unsigned long long)diff_time.tv_sec,
7238	(unsigned long long)diff_time.tv_usec,
7239	(unsigned long long)diff_time_usecs,
7240	(unsigned long long)((entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC),
7241	cfil_info->cfi_sock_id, cfil_info->cfi_sock_id);
7242	}
7243
7244	// Compare elapsed time in usecs
7245	if (diff_time_usecs >= (entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC) {
7246	if (cfil_info->cfi_debug && cfil_log_stats) {
7247	CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - in %llu reported %llu",
7248	cfil_info->cfi_byte_inbound_count,
7249	entry->cfe_byte_inbound_count_reported);
7250	CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - out %llu reported %llu",
7251	cfil_info->cfi_byte_outbound_count,
7252	entry->cfe_byte_outbound_count_reported);
7253	}
7254	// Check if flow has new bytes that have not been reported
7255	if (entry->cfe_byte_inbound_count_reported < cfil_info->cfi_byte_inbound_count \|\|
7256	entry->cfe_byte_outbound_count_reported < cfil_info->cfi_byte_outbound_count) {
7257	flow_array = (struct cfil_msg_sock_stats *)&buffer->stats;
7258	index = global_cfil_stats_counts[kcunit - `1`];
7259
7260	stats = &flow_array[index];
7261	stats->cfs_sock_id = cfil_info->cfi_sock_id;
7262	stats->cfs_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
7263	stats->cfs_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
7264
7265	if (entry->cfe_laddr_sent == false) {
7266	/ cache it if necessary /
7267	if (cfil_info->cfi_so_attach_laddr.sa.sa_len == `0`) {
7268	inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
7269	if (inp != NULL) {
7270	boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
7271	union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
7272	union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
7273	cfil_fill_event_msg_addresses(entry: cfil_info->cfi_hash_entry, inp,
7274	sin_src: src, sin_dst: dst, isIPv4: !IS_INP_V6(inp), outgoing);
7275	}
7276	}
7277
7278	if (cfil_info->cfi_so_attach_laddr.sa.sa_len != `0`) {
7279	stats->cfs_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
7280	entry->cfe_laddr_sent = true;
7281	}
7282	}
7283
7284	global_cfil_stats_counts[kcunit - `1`]++;
7285
7286	entry->cfe_stats_report_ts = current_tv;
7287	entry->cfe_byte_inbound_count_reported = cfil_info->cfi_byte_inbound_count;
7288	entry->cfe_byte_outbound_count_reported = cfil_info->cfi_byte_outbound_count;
7289	if (cfil_info->cfi_debug && cfil_log_stats) {
7290	cfil_info_log(LOG_ERR, cfil_info, msg: "CFIL: STATS COLLECTED");
7291	}
7292	CFI_ADD_TIME_LOG(cfil_info, &current_tv, &cfil_info->cfi_first_event, CFM_OP_STATS);
7293	return true;
7294	}
7295	}
7296	return false;
7297	}
7298
7299	static void
7300	cfil_stats_report(void *v, wait_result_t w)
7301	{
7302	#pragma unused(v, w)
7303
7304	struct cfil_info *cfil_info = NULL;
7305	struct cfil_entry *entry = NULL;
7306	struct timeval current_tv;
7307	uint32_t flow_count = `0`;
7308	uint64_t saved_next_sock_id = `0`; // Next sock id to be reported for next loop
7309	bool flow_reported = false;
7310
7311	if (cfil_log_stats) {
7312	CFIL_LOG(LOG_DEBUG, "CFIL: STATS COLLECTION RUNNING");
7313	}
7314
7315	do {
7316	// Collect all sock ids of flows that has new stats
7317	cfil_rw_lock_shared(lck: &cfil_lck_rw);
7318
7319	if (cfil_sock_attached_stats_count == `0`) {
7320	if (cfil_log_stats) {
7321	CFIL_LOG(LOG_DEBUG, "CFIL: STATS: no flow");
7322	}
7323	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
7324	goto go_sleep;
7325	}
7326
7327	for (int kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7328	if (global_cfil_stats_report_buffers[kcunit - `1`] != NULL) {
7329	memset(s: global_cfil_stats_report_buffers[kcunit - `1`], c: `0`, n: sizeof(struct cfil_stats_report_buffer));
7330	}
7331	global_cfil_stats_counts[kcunit - `1`] = `0`;
7332	}
7333
7334	microuptime(tv: &current_tv);
7335	flow_count = `0`;
7336
7337	TAILQ_FOREACH(cfil_info, &cfil_sock_head_stats, cfi_link_stats) {
7338	if (saved_next_sock_id != `0` &&
7339	saved_next_sock_id == cfil_info->cfi_sock_id) {
7340	// Here is where we left off previously, start accumulating
7341	saved_next_sock_id = `0`;
7342	}
7343
7344	if (saved_next_sock_id == `0`) {
7345	if (flow_count >= CFIL_STATS_REPORT_MAX_COUNT) {
7346	// Examine a fixed number of flows each round. Remember the current flow
7347	// so we can start from here for next loop
7348	saved_next_sock_id = cfil_info->cfi_sock_id;
7349	break;
7350	}
7351
7352	flow_reported = false;
7353	for (int kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7354	entry = &cfil_info->cfi_entries[kcunit - `1`];
7355	if (entry->cfe_filter == NULL) {
7356	if (cfil_info->cfi_debug && cfil_log_stats) {
7357	CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - so %llx no filter",
7358	cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : `0`);
7359	}
7360	continue;
7361	}
7362
7363	if ((entry->cfe_stats_report_frequency > `0`) &&
7364	cfil_stats_collect_flow_stats_for_filter(kcunit, cfil_info, entry, current_tv) == true) {
7365	flow_reported = true;
7366	}
7367	}
7368	if (flow_reported == true) {
7369	flow_count++;
7370	}
7371	}
7372	}
7373
7374	if (flow_count > `0`) {
7375	if (cfil_log_stats) {
7376	CFIL_LOG(LOG_DEBUG, "CFIL: STATS reporting for %d flows", flow_count);
7377	}
7378	for (int kcunit = `1`; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7379	if (global_cfil_stats_report_buffers[kcunit - `1`] != NULL &&
7380	global_cfil_stats_counts[kcunit - `1`] > `0`) {
7381	cfil_dispatch_stats_event_locked(kcunit,
7382	buffer: global_cfil_stats_report_buffers[kcunit - `1`],
7383	stats_count: global_cfil_stats_counts[kcunit - `1`]);
7384	}
7385	}
7386	} else {
7387	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
7388	goto go_sleep;
7389	}
7390
7391	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
7392
7393	// Loop again if we haven't finished the whole cfil_info list
7394	} while (saved_next_sock_id != `0`);
7395
7396	go_sleep:
7397
7398	// Sleep forever (until waken up) if no more flow to report
7399	cfil_rw_lock_shared(lck: &cfil_lck_rw);
7400	cfil_stats_report_thread_sleep(forever: cfil_sock_attached_stats_count == `0` ? true : false);
7401	cfil_rw_unlock_shared(lck: &cfil_lck_rw);
7402	thread_block_parameter(continuation: (thread_continue_t) cfil_stats_report, NULL);
7403	/ NOTREACHED /
7404	}
7405

Browse the source code of xnu/bsd/net/content_filter.c