1/*
2 * Copyright (c) 2013-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*
25 * THEORY OF OPERATION
26 *
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by TCP/IP sockets.
30 *
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
34 * back to the kernel.
35 *
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
40 *
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
44 *
45 * It should be noted that messages about many TCP/IP sockets can be multiplexed
46 * over a single kernel control socket.
47 *
48 * Notes:
49 * - The current implementation is limited to TCP sockets.
50 * - The current implementation supports up to two simultaneous content filters
51 * for the sake of simplicity of the implementation.
52 *
53 *
54 * NECP FILTER CONTROL UNIT
55 *
56 * A user space filter agent uses the Network Extension Control Policy (NECP)
57 * database to specify which TCP/IP sockets need to be filtered. The NECP
58 * criteria may be based on a variety of properties like user ID or proc UUID.
59 *
60 * The NECP "filter control unit" is used by the socket content filter subsystem
61 * to deliver the relevant TCP/IP content information to the appropriate
62 * user space filter agent via its kernel control socket instance.
63 * This works as follows:
64 *
65 * 1) The user space filter agent specifies an NECP filter control unit when
66 * in adds its filtering rules to the NECP database.
67 *
68 * 2) The user space filter agent also sets its NECP filter control unit on the
69 * content filter kernel control socket via the socket option
70 * CFIL_OPT_NECP_CONTROL_UNIT.
71 *
72 * 3) The NECP database is consulted to find out if a given TCP/IP socket
73 * needs to be subjected to content filtering and returns the corresponding
74 * NECP filter control unit -- the NECP filter control unit is actually
75 * stored in the TCP/IP socket structure so the NECP lookup is really simple.
76 *
77 * 4) The NECP filter control unit is then used to find the corresponding
78 * kernel control socket instance.
79 *
80 * Note: NECP currently supports a single filter control unit per TCP/IP socket
81 * but this restriction may be soon lifted.
82 *
83 *
84 * THE MESSAGING PROTOCOL
85 *
86 * The socket content filter subsystem and a user space filter agent
87 * communicate over the kernel control socket via an asynchronous
88 * messaging protocol (this is not a request-response protocol).
89 * The socket content filter subsystem sends event messages to the user
90 * space filter agent about the TCP/IP sockets it is interested to filter.
91 * The user space filter agent sends action messages to either allow
92 * data to pass or to disallow the data flow (and drop the connection).
93 *
94 * All messages over a content filter kernel control socket share the same
95 * common header of type "struct cfil_msg_hdr". The message type tells if
96 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
97 * The message header field "cfm_sock_id" identifies a given TCP/IP socket.
98 * Note the message header length field may be padded for alignment and can
99 * be larger than the actual content of the message.
100 * The field "cfm_op" describe the kind of event or action.
101 *
102 * Here are the kinds of content filter events:
103 * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
104 * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
105 * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
106 * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
107 *
108 *
109 * EVENT MESSAGES
110 *
111 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
112 * data that is being sent or received. The position of this span of data
113 * in the data flow is described by a set of start and end offsets. These
114 * are absolute 64 bits offsets. The first byte sent (or received) starts
115 * at offset 0 and ends at offset 1. The length of the content data
116 * is given by the difference between the end offset and the start offset.
117 *
118 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
119 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
120 * action message is sent by the user space filter agent.
121 *
122 * Note: absolute 64 bits offsets should be large enough for the foreseeable
123 * future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
124 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
125 *
126 * They are two kinds of primary content filter actions:
127 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
128 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
129 *
130 * There is also an action to mark a given client flow as already filtered
131 * at a higher level, CFM_OP_BLESS_CLIENT.
132 *
133 *
134 * ACTION MESSAGES
135 *
136 * The CFM_OP_DATA_UPDATE action messages let the user space filter
137 * agent allow data to flow up to the specified pass offset -- there
138 * is a pass offset for outgoing data and a pass offset for incoming data.
139 * When a new TCP/IP socket is attached to the content filter, each pass offset
140 * is initially set to 0 so not data is allowed to pass by default.
141 * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
142 * then the data flow becomes unrestricted.
143 *
144 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
145 * with a pass offset smaller than the pass offset of a previous
146 * CFM_OP_DATA_UPDATE message is silently ignored.
147 *
148 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
149 * to tell the kernel how much data it wants to see by using the peek offsets.
150 * Just like pass offsets, there is a peek offset for each direction.
151 * When a new TCP/IP socket is attached to the content filter, each peek offset
152 * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
153 * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
154 * with a greater than 0 peek offset is sent by the user space filter agent.
155 * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
156 * then the flow of update data events becomes unrestricted.
157 *
158 * Note that peek offsets cannot be smaller than the corresponding pass offset.
159 * Also a peek offsets cannot be smaller than the corresponding end offset
160 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
161 * to set a too small peek value is silently ignored.
162 *
163 *
164 * PER SOCKET "struct cfil_info"
165 *
166 * As soon as a TCP/IP socket gets attached to a content filter, a
167 * "struct cfil_info" is created to hold the content filtering state for this
168 * socket.
169 *
170 * The content filtering state is made of the following information
171 * for each direction:
172 * - The current pass offset;
173 * - The first and last offsets of the data pending, waiting for a filtering
174 * decision;
175 * - The inject queue for data that passed the filters and that needs
176 * to be re-injected;
177 * - A content filter specific state in a set of "struct cfil_entry"
178 *
179 *
180 * CONTENT FILTER STATE "struct cfil_entry"
181 *
182 * The "struct cfil_entry" maintains the information most relevant to the
183 * message handling over a kernel control socket with a user space filter agent.
184 *
185 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
186 * to the kernel control socket unit it corresponds to and also has a pointer
187 * to the corresponding "struct content_filter".
188 *
189 * For each direction, "struct cfil_entry" maintains the following information:
190 * - The pass offset
191 * - The peek offset
192 * - The offset of the last data peeked at by the filter
193 * - A queue of data that's waiting to be delivered to the user space filter
194 * agent on the kernel control socket
195 * - A queue of data for which event messages have been sent on the kernel
196 * control socket and are pending for a filtering decision.
197 *
198 *
199 * CONTENT FILTER QUEUES
200 *
201 * Data that is being filtered is steered away from the TCP/IP socket buffer
202 * and instead will sit in one of three content filter queues until the data
203 * can be re-injected into the TCP/IP socket buffer.
204 *
205 * A content filter queue is represented by "struct cfil_queue" that contains
206 * a list of mbufs and the start and end offset of the data span of
207 * the list of mbufs.
208 *
209 * The data moves into the three content filter queues according to this
210 * sequence:
211 * a) The "cfe_ctl_q" of "struct cfil_entry"
212 * b) The "cfe_pending_q" of "struct cfil_entry"
213 * c) The "cfi_inject_q" of "struct cfil_info"
214 *
215 * Note: The sequence (a),(b) may be repeated several times if there is more
216 * than one content filter attached to the TCP/IP socket.
217 *
218 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
219 * kernel conntrol socket for two reasons:
220 * - The peek offset is less that the end offset of the mbuf data
221 * - The kernel control socket is flow controlled
222 *
223 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
224 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
225 * socket and are waiting for a pass action message fromn the user space
226 * filter agent. An mbuf length must be fully allowed to pass to be removed
227 * from the cfe_pending_q.
228 *
229 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
230 * by the user space filter agent and that needs to be re-injected into the
231 * TCP/IP socket.
232 *
233 *
234 * IMPACT ON FLOW CONTROL
235 *
236 * An essential aspect of the content filer subsystem is to minimize the
237 * impact on flow control of the TCP/IP sockets being filtered.
238 *
239 * The processing overhead of the content filtering may have an effect on
240 * flow control by adding noticeable delays and cannot be eliminated --
241 * care must be taken by the user space filter agent to minimize the
242 * processing delays.
243 *
244 * The amount of data being filtered is kept in buffers while waiting for
245 * a decision by the user space filter agent. This amount of data pending
246 * needs to be subtracted from the amount of data available in the
247 * corresponding TCP/IP socket buffer. This is done by modifying
248 * sbspace() and tcp_sbspace() to account for amount of data pending
249 * in the content filter.
250 *
251 *
252 * LOCKING STRATEGY
253 *
254 * The global state of content filter subsystem is protected by a single
255 * read-write lock "cfil_lck_rw". The data flow can be done with the
256 * cfil read-write lock held as shared so it can be re-entered from multiple
257 * threads.
258 *
259 * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
260 * protected by the socket lock.
261 *
262 * A TCP/IP socket lock cannot be taken while the cfil read-write lock
263 * is held. That's why we have some sequences where we drop the cfil read-write
264 * lock before taking the TCP/IP lock.
265 *
266 * It is also important to lock the TCP/IP socket buffer while the content
267 * filter is modifying the amount of pending data. Otherwise the calculations
268 * in sbspace() and tcp_sbspace() could be wrong.
269 *
270 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
271 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
272 *
273 * Actually "cfe_link" and "cfe_filter" are protected by both by
274 * "cfil_lck_rw" and the socket lock: they may be modified only when
275 * "cfil_lck_rw" is exclusive and the socket is locked.
276 *
277 * To read the other fields of "struct content_filter" we have to take
278 * "cfil_lck_rw" in shared mode.
279 *
280 *
281 * LIMITATIONS
282 *
283 * - For TCP sockets only
284 *
285 * - Does not support TCP unordered messages
286 */
287
288/*
289 * TO DO LIST
290 *
291 * SOONER:
292 *
293 * Deal with OOB
294 *
295 * LATER:
296 *
297 * If support datagram, enqueue control and address mbufs as well
298 */
299
300#include <sys/types.h>
301#include <sys/kern_control.h>
302#include <sys/queue.h>
303#include <sys/domain.h>
304#include <sys/protosw.h>
305#include <sys/syslog.h>
306#include <sys/systm.h>
307#include <sys/param.h>
308#include <sys/mbuf.h>
309
310#include <kern/locks.h>
311#include <kern/zalloc.h>
312#include <kern/debug.h>
313
314#include <net/content_filter.h>
315
316#include <netinet/in_pcb.h>
317#include <netinet/tcp.h>
318#include <netinet/tcp_var.h>
319#include <netinet/udp.h>
320#include <netinet/udp_var.h>
321
322#include <string.h>
323#include <libkern/libkern.h>
324#include <kern/sched_prim.h>
325
326#define MAX_CONTENT_FILTER 2
327
328struct cfil_entry;
329
330/*
331 * The structure content_filter represents a user space content filter
332 * It's created and associated with a kernel control socket instance
333 */
334struct content_filter {
335 kern_ctl_ref cf_kcref;
336 u_int32_t cf_kcunit;
337 u_int32_t cf_flags;
338
339 uint32_t cf_necp_control_unit;
340
341 uint32_t cf_sock_count;
342 TAILQ_HEAD(, cfil_entry) cf_sock_entries;
343};
344
345#define CFF_ACTIVE 0x01
346#define CFF_DETACHING 0x02
347#define CFF_FLOW_CONTROLLED 0x04
348
349struct content_filter **content_filters = NULL;
350uint32_t cfil_active_count = 0; /* Number of active content filters */
351uint32_t cfil_sock_attached_count = 0; /* Number of sockets attachements */
352uint32_t cfil_sock_udp_attached_count = 0; /* Number of UDP sockets attachements */
353uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
354
355static kern_ctl_ref cfil_kctlref = NULL;
356
357static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
358static lck_attr_t *cfil_lck_attr = NULL;
359static lck_grp_t *cfil_lck_grp = NULL;
360decl_lck_rw_data(static, cfil_lck_rw);
361
362#define CFIL_RW_LCK_MAX 8
363
364int cfil_rw_nxt_lck = 0;
365void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
366
367int cfil_rw_nxt_unlck = 0;
368void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
369
370#define CONTENT_FILTER_ZONE_NAME "content_filter"
371#define CONTENT_FILTER_ZONE_MAX 10
372static struct zone *content_filter_zone = NULL; /* zone for content_filter */
373
374
375#define CFIL_INFO_ZONE_NAME "cfil_info"
376#define CFIL_INFO_ZONE_MAX 1024
377static struct zone *cfil_info_zone = NULL; /* zone for cfil_info */
378
379MBUFQ_HEAD(cfil_mqhead);
380
381struct cfil_queue {
382 uint64_t q_start; /* offset of first byte in queue */
383 uint64_t q_end; /* offset of last byte in queue */
384 struct cfil_mqhead q_mq;
385};
386
387/*
388 * struct cfil_entry
389 *
390 * The is one entry per content filter
391 */
392struct cfil_entry {
393 TAILQ_ENTRY(cfil_entry) cfe_link;
394 struct content_filter *cfe_filter;
395
396 struct cfil_info *cfe_cfil_info;
397 uint32_t cfe_flags;
398 uint32_t cfe_necp_control_unit;
399 struct timeval cfe_last_event; /* To user space */
400 struct timeval cfe_last_action; /* From user space */
401
402 struct cfe_buf {
403 /*
404 * cfe_pending_q holds data that has been delivered to
405 * the filter and for which we are waiting for an action
406 */
407 struct cfil_queue cfe_pending_q;
408 /*
409 * This queue is for data that has not be delivered to
410 * the content filter (new data, pass peek or flow control)
411 */
412 struct cfil_queue cfe_ctl_q;
413
414 uint64_t cfe_pass_offset;
415 uint64_t cfe_peek_offset;
416 uint64_t cfe_peeked;
417 } cfe_snd, cfe_rcv;
418};
419
420#define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
421#define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
422#define CFEF_DATA_START 0x0004 /* can send data event */
423#define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
424#define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
425#define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
426#define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
427#define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
428
429
430#define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
431 struct timeval _tdiff; \
432 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
433 timersub(t1, t0, &_tdiff); \
434 (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
435 (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
436 (cfil)->cfi_op_list_ctr ++; \
437 }
438
439struct cfil_hash_entry;
440
441/*
442 * struct cfil_info
443 *
444 * There is a struct cfil_info per socket
445 */
446struct cfil_info {
447 TAILQ_ENTRY(cfil_info) cfi_link;
448 struct socket *cfi_so;
449 uint64_t cfi_flags;
450 uint64_t cfi_sock_id;
451 struct timeval64 cfi_first_event;
452 uint32_t cfi_op_list_ctr;
453 uint32_t cfi_op_time[CFI_MAX_TIME_LOG_ENTRY]; /* time interval in microseconds since first event */
454 unsigned char cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
455
456 struct cfi_buf {
457 /*
458 * cfi_pending_first and cfi_pending_last describe the total
459 * amount of data outstanding for all the filters on
460 * this socket and data in the flow queue
461 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
462 */
463 uint64_t cfi_pending_first;
464 uint64_t cfi_pending_last;
465 uint32_t cfi_pending_mbcnt;
466 uint32_t cfi_pending_mbnum;
467 uint32_t cfi_tail_drop_cnt;
468 /*
469 * cfi_pass_offset is the minimum of all the filters
470 */
471 uint64_t cfi_pass_offset;
472 /*
473 * cfi_inject_q holds data that needs to be re-injected
474 * into the socket after filtering and that can
475 * be queued because of flow control
476 */
477 struct cfil_queue cfi_inject_q;
478 } cfi_snd, cfi_rcv;
479
480 struct cfil_entry cfi_entries[MAX_CONTENT_FILTER];
481 struct cfil_hash_entry *cfi_hash_entry;
482} __attribute__((aligned(8)));
483
484#define CFIF_DROP 0x0001 /* drop action applied */
485#define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
486#define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
487#define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
488#define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
489#define CFIF_SHUT_WR 0x0040 /* shutdown write */
490#define CFIF_SHUT_RD 0x0080 /* shutdown read */
491
492#define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
493#define CFI_SHIFT_GENCNT 32
494#define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
495#define CFI_SHIFT_FLOWHASH 0
496
497TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
498
499#define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
500#define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
501
502/*
503 * UDP Socket Support
504 */
505LIST_HEAD(cfilhashhead, cfil_hash_entry);
506#define CFILHASHSIZE 16
507#define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
508#define IS_UDP(so) (so && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
509#define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
510 ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
511#define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
512 cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
513#define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
514
515/*
516 * UDP Garbage Collection:
517 */
518static struct thread *cfil_udp_gc_thread;
519#define UDP_FLOW_GC_IDLE_TO 30 // Flow Idle Timeout in seconds
520#define UDP_FLOW_GC_ACTION_TO 10 // Flow Action Timeout (no action from user space) in seconds
521#define UDP_FLOW_GC_MAX_COUNT 100 // Max UDP flows to be handled per run
522#define UDP_FLOW_GC_RUN_INTERVAL_NSEC (10 * NSEC_PER_SEC) // GC wakes up every 10 seconds
523
524/*
525 * UDP flow queue thresholds
526 */
527#define UDP_FLOW_GC_MBUF_CNT_MAX (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
528#define UDP_FLOW_GC_MBUF_NUM_MAX (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
529#define UDP_FLOW_GC_MBUF_SHIFT 5 // Shift to get 1/32 of platform limits
530/*
531 * UDP flow queue threshold globals:
532 */
533static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
534static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
535
536/*
537 * struct cfil_hash_entry
538 *
539 * Hash entry for cfil_info
540 */
541struct cfil_hash_entry {
542 LIST_ENTRY(cfil_hash_entry) cfentry_link;
543 struct cfil_info *cfentry_cfil;
544 u_short cfentry_fport;
545 u_short cfentry_lport;
546 sa_family_t cfentry_family;
547 u_int32_t cfentry_flowhash;
548 u_int32_t cfentry_lastused;
549 union {
550 /* foreign host table entry */
551 struct in_addr_4in6 addr46;
552 struct in6_addr addr6;
553 } cfentry_faddr;
554 union {
555 /* local host table entry */
556 struct in_addr_4in6 addr46;
557 struct in6_addr addr6;
558 } cfentry_laddr;
559};
560
561/*
562 * struct cfil_db
563 *
564 * For each UDP socket, this is a hash table maintaining all cfil_info structs
565 * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
566 */
567struct cfil_db {
568 struct socket *cfdb_so;
569 uint32_t cfdb_count; /* Number of total content filters */
570 struct cfilhashhead *cfdb_hashbase;
571 u_long cfdb_hashmask;
572 struct cfil_hash_entry *cfdb_only_entry; /* Optimization for connected UDP */
573};
574
575/*
576 * CFIL specific mbuf tag:
577 * Save state of socket at the point of data entry into cfil.
578 * Use saved state for reinjection at protocol layer.
579 */
580struct cfil_tag {
581 union sockaddr_in_4_6 cfil_faddr;
582 uint32_t cfil_so_state_change_cnt;
583 short cfil_so_options;
584};
585
586#define CFIL_HASH_ENTRY_ZONE_NAME "cfil_entry_hash"
587#define CFIL_HASH_ENTRY_ZONE_MAX 1024
588static struct zone *cfil_hash_entry_zone = NULL;
589
590#define CFIL_DB_ZONE_NAME "cfil_db"
591#define CFIL_DB_ZONE_MAX 1024
592static struct zone *cfil_db_zone = NULL;
593
594/*
595 * Statistics
596 */
597
598struct cfil_stats cfil_stats;
599
600/*
601 * For troubleshooting
602 */
603int cfil_log_level = LOG_ERR;
604int cfil_debug = 1;
605
606// Debug controls added for selective debugging.
607// Disabled for production. If enabled,
608// these will have performance impact
609#define LIFECYCLE_DEBUG 0
610#define VERDICT_DEBUG 0
611#define DATA_DEBUG 0
612#define SHOW_DEBUG 0
613#define GC_DEBUG 0
614
615/*
616 * Sysctls for logs and statistics
617 */
618static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
619 struct sysctl_req *);
620static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
621 struct sysctl_req *);
622
623SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "cfil");
624
625SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW|CTLFLAG_LOCKED,
626 &cfil_log_level, 0, "");
627
628SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW|CTLFLAG_LOCKED,
629 &cfil_debug, 0, "");
630
631SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD|CTLFLAG_LOCKED,
632 &cfil_sock_attached_count, 0, "");
633
634SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD|CTLFLAG_LOCKED,
635 &cfil_active_count, 0, "");
636
637SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW|CTLFLAG_LOCKED,
638 &cfil_close_wait_timeout, 0, "");
639
640static int cfil_sbtrim = 1;
641SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW|CTLFLAG_LOCKED,
642 &cfil_sbtrim, 0, "");
643
644SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD|CTLFLAG_LOCKED,
645 0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
646
647SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD|CTLFLAG_LOCKED,
648 0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
649
650SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD|CTLFLAG_LOCKED,
651 &cfil_stats, cfil_stats, "");
652
653/*
654 * Forward declaration to appease the compiler
655 */
656static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
657 uint64_t, uint64_t);
658static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
659static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
660static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
661static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
662 struct mbuf *, struct mbuf *, uint32_t);
663static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
664 struct mbuf *, uint64_t);
665static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
666 struct in_addr, u_int16_t);
667static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
668 struct in6_addr *, u_int16_t);
669;
670static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t);
671static void cfil_info_free(struct cfil_info *);
672static struct cfil_info * cfil_info_alloc(struct socket *, struct cfil_hash_entry *);
673static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
674static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
675static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
676static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
677static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
678static void cfil_info_verify(struct cfil_info *);
679static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
680 uint64_t, uint64_t);
681static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
682static void cfil_release_sockbuf(struct socket *, int);
683static int cfil_filters_attached(struct socket *);
684
685static void cfil_rw_lock_exclusive(lck_rw_t *);
686static void cfil_rw_unlock_exclusive(lck_rw_t *);
687static void cfil_rw_lock_shared(lck_rw_t *);
688static void cfil_rw_unlock_shared(lck_rw_t *);
689static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
690static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
691
692static unsigned int cfil_data_length(struct mbuf *, int *, int *);
693static errno_t cfil_db_init(struct socket *);
694static void cfil_db_free(struct socket *so);
695struct cfil_hash_entry *cfil_db_lookup_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
696struct cfil_hash_entry *cfil_db_lookup_entry_with_sockid(struct cfil_db *, u_int64_t);
697struct cfil_hash_entry *cfil_db_add_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
698void cfil_db_delete_entry(struct cfil_db *, struct cfil_hash_entry *);
699struct cfil_hash_entry *cfil_sock_udp_get_flow(struct socket *, uint32_t, bool, struct sockaddr *, struct sockaddr *);
700struct cfil_info *cfil_db_get_cfil_info(struct cfil_db *, cfil_sock_id_t);
701static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
702 struct mbuf *, struct mbuf *, uint32_t);
703static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
704static void cfil_sock_udp_is_closed(struct socket *);
705static int cfil_sock_udp_notify_shutdown(struct socket *, int , int, int);
706static int cfil_sock_udp_shutdown(struct socket *, int *);
707static void cfil_sock_udp_close_wait(struct socket *);
708static void cfil_sock_udp_buf_update(struct sockbuf *);
709static int cfil_filters_udp_attached(struct socket *, bool);
710static void cfil_get_flow_address_v6(struct cfil_hash_entry *, struct inpcb *,
711 struct in6_addr **, struct in6_addr **,
712 u_int16_t *, u_int16_t *);
713static void cfil_get_flow_address(struct cfil_hash_entry *, struct inpcb *,
714 struct in_addr *, struct in_addr *,
715 u_int16_t *, u_int16_t *);
716static void cfil_info_log(int, struct cfil_info *, const char *);
717void cfil_filter_show(u_int32_t);
718void cfil_info_show(void);
719bool cfil_info_idle_timed_out(struct cfil_info *, int, u_int32_t);
720bool cfil_info_action_timed_out(struct cfil_info *, int);
721bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
722struct m_tag *cfil_udp_save_socket_state(struct cfil_info *, struct mbuf *);
723static void cfil_udp_gc_thread_func(void *, wait_result_t);
724static void cfil_info_udp_expire(void *, wait_result_t);
725
726bool check_port(struct sockaddr *, u_short);
727
728/*
729 * Content filter global read write lock
730 */
731
732static void
733cfil_rw_lock_exclusive(lck_rw_t *lck)
734{
735 void *lr_saved;
736
737 lr_saved = __builtin_return_address(0);
738
739 lck_rw_lock_exclusive(lck);
740
741 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
742 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
743}
744
745static void
746cfil_rw_unlock_exclusive(lck_rw_t *lck)
747{
748 void *lr_saved;
749
750 lr_saved = __builtin_return_address(0);
751
752 lck_rw_unlock_exclusive(lck);
753
754 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
755 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
756}
757
758static void
759cfil_rw_lock_shared(lck_rw_t *lck)
760{
761 void *lr_saved;
762
763 lr_saved = __builtin_return_address(0);
764
765 lck_rw_lock_shared(lck);
766
767 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
768 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
769}
770
771static void
772cfil_rw_unlock_shared(lck_rw_t *lck)
773{
774 void *lr_saved;
775
776 lr_saved = __builtin_return_address(0);
777
778 lck_rw_unlock_shared(lck);
779
780 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
781 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
782}
783
784static boolean_t
785cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
786{
787 void *lr_saved;
788 boolean_t upgraded;
789
790 lr_saved = __builtin_return_address(0);
791
792 upgraded = lck_rw_lock_shared_to_exclusive(lck);
793 if (upgraded) {
794 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
795 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
796 }
797 return (upgraded);
798}
799
800static void
801cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
802{
803 void *lr_saved;
804
805 lr_saved = __builtin_return_address(0);
806
807 lck_rw_lock_exclusive_to_shared(lck);
808
809 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
810 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
811}
812
813static void
814cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
815{
816#if !MACH_ASSERT
817#pragma unused(lck, exclusive)
818#endif
819 LCK_RW_ASSERT(lck,
820 exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
821}
822
823/*
824 * Return the number of bytes in the mbuf chain using the same
825 * method as m_length() or sballoc()
826 *
827 * Returns data len - starting from PKT start
828 * - retmbcnt - optional param to get total mbuf bytes in chain
829 * - retmbnum - optional param to get number of mbufs in chain
830 */
831static unsigned int
832cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
833{
834 struct mbuf *m0;
835 unsigned int pktlen = 0;
836 int mbcnt;
837 int mbnum;
838
839 // Locate the start of data
840 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
841 if (m0->m_flags & M_PKTHDR)
842 break;
843 }
844 if (m0 == NULL) {
845 CFIL_LOG(LOG_ERR, "cfil_data_length: no M_PKTHDR");
846 return (0);
847 }
848 m = m0;
849
850 if (retmbcnt == NULL && retmbnum == NULL)
851 return (m_length(m));
852
853 pktlen = 0;
854 mbcnt = 0;
855 mbnum = 0;
856 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
857 pktlen += m0->m_len;
858 mbnum++;
859 mbcnt += MSIZE;
860 if (m0->m_flags & M_EXT)
861 mbcnt += m0->m_ext.ext_size;
862 }
863 if (retmbcnt) {
864 *retmbcnt = mbcnt;
865 }
866 if (retmbnum) {
867 *retmbnum = mbnum;
868 }
869 return (pktlen);
870}
871
872static struct mbuf *
873cfil_data_start(struct mbuf *m)
874{
875 struct mbuf *m0;
876
877 // Locate the start of data
878 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
879 if (m0->m_flags & M_PKTHDR)
880 break;
881 }
882 return m0;
883}
884
885/*
886 * Common mbuf queue utilities
887 */
888
889static inline void
890cfil_queue_init(struct cfil_queue *cfq)
891{
892 cfq->q_start = 0;
893 cfq->q_end = 0;
894 MBUFQ_INIT(&cfq->q_mq);
895}
896
897static inline uint64_t
898cfil_queue_drain(struct cfil_queue *cfq)
899{
900 uint64_t drained = cfq->q_start - cfq->q_end;
901 cfq->q_start = 0;
902 cfq->q_end = 0;
903 MBUFQ_DRAIN(&cfq->q_mq);
904
905 return (drained);
906}
907
908/* Return 1 when empty, 0 otherwise */
909static inline int
910cfil_queue_empty(struct cfil_queue *cfq)
911{
912 return (MBUFQ_EMPTY(&cfq->q_mq));
913}
914
915static inline uint64_t
916cfil_queue_offset_first(struct cfil_queue *cfq)
917{
918 return (cfq->q_start);
919}
920
921static inline uint64_t
922cfil_queue_offset_last(struct cfil_queue *cfq)
923{
924 return (cfq->q_end);
925}
926
927static inline uint64_t
928cfil_queue_len(struct cfil_queue *cfq)
929{
930 return (cfq->q_end - cfq->q_start);
931}
932
933/*
934 * Routines to verify some fundamental assumptions
935 */
936
937static void
938cfil_queue_verify(struct cfil_queue *cfq)
939{
940 mbuf_t chain;
941 mbuf_t m;
942 mbuf_t n;
943 uint64_t queuesize = 0;
944
945 /* Verify offset are ordered */
946 VERIFY(cfq->q_start <= cfq->q_end);
947
948 /*
949 * When queue is empty, the offsets are equal otherwise the offsets
950 * are different
951 */
952 VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
953 (!MBUFQ_EMPTY(&cfq->q_mq) &&
954 cfq->q_start != cfq->q_end));
955
956 MBUFQ_FOREACH(chain, &cfq->q_mq) {
957 size_t chainsize = 0;
958 m = chain;
959 unsigned int mlen = cfil_data_length(m, NULL, NULL);
960 // skip the addr and control stuff if present
961 m = cfil_data_start(m);
962
963 if (m == NULL ||
964 m == (void *)M_TAG_FREE_PATTERN ||
965 m->m_next == (void *)M_TAG_FREE_PATTERN ||
966 m->m_nextpkt == (void *)M_TAG_FREE_PATTERN)
967 panic("%s - mq %p is free at %p", __func__,
968 &cfq->q_mq, m);
969 for (n = m; n != NULL; n = n->m_next) {
970 if (n->m_type != MT_DATA &&
971 n->m_type != MT_HEADER &&
972 n->m_type != MT_OOBDATA)
973 panic("%s - %p unsupported type %u", __func__,
974 n, n->m_type);
975 chainsize += n->m_len;
976 }
977 if (mlen != chainsize)
978 panic("%s - %p m_length() %u != chainsize %lu",
979 __func__, m, mlen, chainsize);
980 queuesize += chainsize;
981 }
982 if (queuesize != cfq->q_end - cfq->q_start)
983 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
984 m, queuesize, cfq->q_end - cfq->q_start);
985}
986
987static void
988cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
989{
990 CFIL_QUEUE_VERIFY(cfq);
991
992 MBUFQ_ENQUEUE(&cfq->q_mq, m);
993 cfq->q_end += len;
994
995 CFIL_QUEUE_VERIFY(cfq);
996}
997
998static void
999cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1000{
1001 CFIL_QUEUE_VERIFY(cfq);
1002
1003 VERIFY(cfil_data_length(m, NULL, NULL) == len);
1004
1005 MBUFQ_REMOVE(&cfq->q_mq, m);
1006 MBUFQ_NEXT(m) = NULL;
1007 cfq->q_start += len;
1008
1009 CFIL_QUEUE_VERIFY(cfq);
1010}
1011
1012static mbuf_t
1013cfil_queue_first(struct cfil_queue *cfq)
1014{
1015 return (MBUFQ_FIRST(&cfq->q_mq));
1016}
1017
1018static mbuf_t
1019cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1020{
1021#pragma unused(cfq)
1022 return (MBUFQ_NEXT(m));
1023}
1024
1025static void
1026cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1027{
1028 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1029 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1030
1031 /* Verify the queues are ordered so that pending is before ctl */
1032 VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1033
1034 /* The peek offset cannot be less than the pass offset */
1035 VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1036
1037 /* Make sure we've updated the offset we peeked at */
1038 VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1039}
1040
1041static void
1042cfil_entry_verify(struct cfil_entry *entry)
1043{
1044 cfil_entry_buf_verify(&entry->cfe_snd);
1045 cfil_entry_buf_verify(&entry->cfe_rcv);
1046}
1047
1048static void
1049cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1050{
1051 CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1052
1053 VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1054 VERIFY(cfi_buf->cfi_pending_mbcnt >= 0);
1055}
1056
1057static void
1058cfil_info_verify(struct cfil_info *cfil_info)
1059{
1060 int i;
1061
1062 if (cfil_info == NULL)
1063 return;
1064
1065 cfil_info_buf_verify(&cfil_info->cfi_snd);
1066 cfil_info_buf_verify(&cfil_info->cfi_rcv);
1067
1068 for (i = 0; i < MAX_CONTENT_FILTER; i++)
1069 cfil_entry_verify(&cfil_info->cfi_entries[i]);
1070}
1071
1072static void
1073verify_content_filter(struct content_filter *cfc)
1074{
1075 struct cfil_entry *entry;
1076 uint32_t count = 0;
1077
1078 VERIFY(cfc->cf_sock_count >= 0);
1079
1080 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1081 count++;
1082 VERIFY(cfc == entry->cfe_filter);
1083 }
1084 VERIFY(count == cfc->cf_sock_count);
1085}
1086
1087/*
1088 * Kernel control socket callbacks
1089 */
1090static errno_t
1091cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1092 void **unitinfo)
1093{
1094 errno_t error = 0;
1095 struct content_filter *cfc = NULL;
1096
1097 CFIL_LOG(LOG_NOTICE, "");
1098
1099 cfc = zalloc(content_filter_zone);
1100 if (cfc == NULL) {
1101 CFIL_LOG(LOG_ERR, "zalloc failed");
1102 error = ENOMEM;
1103 goto done;
1104 }
1105 bzero(cfc, sizeof(struct content_filter));
1106
1107 cfil_rw_lock_exclusive(&cfil_lck_rw);
1108 if (content_filters == NULL) {
1109 struct content_filter **tmp;
1110
1111 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1112
1113 MALLOC(tmp,
1114 struct content_filter **,
1115 MAX_CONTENT_FILTER * sizeof(struct content_filter *),
1116 M_TEMP,
1117 M_WAITOK | M_ZERO);
1118
1119 cfil_rw_lock_exclusive(&cfil_lck_rw);
1120
1121 if (tmp == NULL && content_filters == NULL) {
1122 error = ENOMEM;
1123 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1124 goto done;
1125 }
1126 /* Another thread may have won the race */
1127 if (content_filters != NULL)
1128 FREE(tmp, M_TEMP);
1129 else
1130 content_filters = tmp;
1131 }
1132
1133 if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1134 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1135 error = EINVAL;
1136 } else if (content_filters[sac->sc_unit - 1] != NULL) {
1137 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1138 error = EADDRINUSE;
1139 } else {
1140 /*
1141 * kernel control socket kcunit numbers start at 1
1142 */
1143 content_filters[sac->sc_unit - 1] = cfc;
1144
1145 cfc->cf_kcref = kctlref;
1146 cfc->cf_kcunit = sac->sc_unit;
1147 TAILQ_INIT(&cfc->cf_sock_entries);
1148
1149 *unitinfo = cfc;
1150 cfil_active_count++;
1151 }
1152 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1153done:
1154 if (error != 0 && cfc != NULL)
1155 zfree(content_filter_zone, cfc);
1156
1157 if (error == 0)
1158 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1159 else
1160 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1161
1162 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1163 error, cfil_active_count, sac->sc_unit);
1164
1165 return (error);
1166}
1167
1168static errno_t
1169cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1170{
1171#pragma unused(kctlref)
1172 errno_t error = 0;
1173 struct content_filter *cfc;
1174 struct cfil_entry *entry;
1175 uint64_t sock_flow_id = 0;
1176
1177 CFIL_LOG(LOG_NOTICE, "");
1178
1179 if (content_filters == NULL) {
1180 CFIL_LOG(LOG_ERR, "no content filter");
1181 error = EINVAL;
1182 goto done;
1183 }
1184 if (kcunit > MAX_CONTENT_FILTER) {
1185 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1186 kcunit, MAX_CONTENT_FILTER);
1187 error = EINVAL;
1188 goto done;
1189 }
1190
1191 cfc = (struct content_filter *)unitinfo;
1192 if (cfc == NULL)
1193 goto done;
1194
1195 cfil_rw_lock_exclusive(&cfil_lck_rw);
1196 if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1197 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1198 kcunit);
1199 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1200 goto done;
1201 }
1202 cfc->cf_flags |= CFF_DETACHING;
1203 /*
1204 * Remove all sockets from the filter
1205 */
1206 while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1207 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1208
1209 verify_content_filter(cfc);
1210 /*
1211 * Accept all outstanding data by pushing to next filter
1212 * or back to socket
1213 *
1214 * TBD: Actually we should make sure all data has been pushed
1215 * back to socket
1216 */
1217 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1218 struct cfil_info *cfil_info = entry->cfe_cfil_info;
1219 struct socket *so = cfil_info->cfi_so;
1220 sock_flow_id = cfil_info->cfi_sock_id;
1221
1222 /* Need to let data flow immediately */
1223 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1224 CFEF_DATA_START;
1225
1226 /*
1227 * Respect locking hierarchy
1228 */
1229 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1230
1231 socket_lock(so, 1);
1232
1233 /*
1234 * When cfe_filter is NULL the filter is detached
1235 * and the entry has been removed from cf_sock_entries
1236 */
1237 if ((so->so_cfil == NULL && so->so_cfil_db == NULL) || entry->cfe_filter == NULL) {
1238 cfil_rw_lock_exclusive(&cfil_lck_rw);
1239 goto release;
1240 }
1241
1242 (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
1243 CFM_MAX_OFFSET,
1244 CFM_MAX_OFFSET);
1245
1246 (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
1247 CFM_MAX_OFFSET,
1248 CFM_MAX_OFFSET);
1249
1250 cfil_rw_lock_exclusive(&cfil_lck_rw);
1251
1252 /*
1253 * Check again to make sure if the cfil_info is still valid
1254 * as the socket may have been unlocked when when calling
1255 * cfil_acquire_sockbuf()
1256 */
1257 if (entry->cfe_filter == NULL ||
1258 (so->so_cfil == NULL && cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id) == NULL)) {
1259 goto release;
1260 }
1261
1262 /* The filter is now detached */
1263 entry->cfe_flags |= CFEF_CFIL_DETACHED;
1264#if LIFECYCLE_DEBUG
1265 cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1266#endif
1267 CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1268 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1269 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1270 cfil_filters_attached(so) == 0) {
1271 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1272 (uint64_t)VM_KERNEL_ADDRPERM(so));
1273 wakeup((caddr_t)cfil_info);
1274 }
1275
1276 /*
1277 * Remove the filter entry from the content filter
1278 * but leave the rest of the state intact as the queues
1279 * may not be empty yet
1280 */
1281 entry->cfe_filter = NULL;
1282 entry->cfe_necp_control_unit = 0;
1283
1284 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1285 cfc->cf_sock_count--;
1286release:
1287 socket_unlock(so, 1);
1288 }
1289 }
1290 verify_content_filter(cfc);
1291
1292 VERIFY(cfc->cf_sock_count == 0);
1293
1294 /*
1295 * Make filter inactive
1296 */
1297 content_filters[kcunit - 1] = NULL;
1298 cfil_active_count--;
1299 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1300
1301 zfree(content_filter_zone, cfc);
1302done:
1303 if (error == 0)
1304 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1305 else
1306 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1307
1308 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1309 error, cfil_active_count, kcunit);
1310
1311 return (error);
1312}
1313
1314/*
1315 * cfil_acquire_sockbuf()
1316 *
1317 * Prevent any other thread from acquiring the sockbuf
1318 * We use sb_cfil_thread as a semaphore to prevent other threads from
1319 * messing with the sockbuf -- see sblock()
1320 * Note: We do not set SB_LOCK here because the thread may check or modify
1321 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1322 * sblock(), sbunlock() or sodefunct()
1323 */
1324static int
1325cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
1326{
1327 thread_t tp = current_thread();
1328 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1329 lck_mtx_t *mutex_held;
1330 int error = 0;
1331
1332 /*
1333 * Wait until no thread is holding the sockbuf and other content
1334 * filter threads have released the sockbuf
1335 */
1336 while ((sb->sb_flags & SB_LOCK) ||
1337 (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1338 if (so->so_proto->pr_getlock != NULL)
1339 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1340 else
1341 mutex_held = so->so_proto->pr_domain->dom_mtx;
1342
1343 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1344
1345 sb->sb_wantlock++;
1346 VERIFY(sb->sb_wantlock != 0);
1347
1348 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1349 NULL);
1350
1351 VERIFY(sb->sb_wantlock != 0);
1352 sb->sb_wantlock--;
1353 }
1354 /*
1355 * Use reference count for repetitive calls on same thread
1356 */
1357 if (sb->sb_cfil_refs == 0) {
1358 VERIFY(sb->sb_cfil_thread == NULL);
1359 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1360
1361 sb->sb_cfil_thread = tp;
1362 sb->sb_flags |= SB_LOCK;
1363 }
1364 sb->sb_cfil_refs++;
1365
1366 /* We acquire the socket buffer when we need to cleanup */
1367 if (cfil_info == NULL) {
1368 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1369 (uint64_t)VM_KERNEL_ADDRPERM(so));
1370 error = 0;
1371 } else if (cfil_info->cfi_flags & CFIF_DROP) {
1372 CFIL_LOG(LOG_ERR, "so %llx drop set",
1373 (uint64_t)VM_KERNEL_ADDRPERM(so));
1374 error = EPIPE;
1375 }
1376
1377 return (error);
1378}
1379
1380static void
1381cfil_release_sockbuf(struct socket *so, int outgoing)
1382{
1383 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1384 thread_t tp = current_thread();
1385
1386 socket_lock_assert_owned(so);
1387
1388 if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)
1389 panic("%s sb_cfil_thread %p not current %p", __func__,
1390 sb->sb_cfil_thread, tp);
1391 /*
1392 * Don't panic if we are defunct because SB_LOCK has
1393 * been cleared by sodefunct()
1394 */
1395 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK))
1396 panic("%s SB_LOCK not set on %p", __func__,
1397 sb);
1398 /*
1399 * We can unlock when the thread unwinds to the last reference
1400 */
1401 sb->sb_cfil_refs--;
1402 if (sb->sb_cfil_refs == 0) {
1403 sb->sb_cfil_thread = NULL;
1404 sb->sb_flags &= ~SB_LOCK;
1405
1406 if (sb->sb_wantlock > 0)
1407 wakeup(&sb->sb_flags);
1408 }
1409}
1410
1411cfil_sock_id_t
1412cfil_sock_id_from_socket(struct socket *so)
1413{
1414 if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil)
1415 return (so->so_cfil->cfi_sock_id);
1416 else
1417 return (CFIL_SOCK_ID_NONE);
1418}
1419
1420static bool
1421cfil_socket_safe_lock(struct inpcb *inp)
1422{
1423 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1424 socket_lock(inp->inp_socket, 1);
1425 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1426 return true;
1427 }
1428 socket_unlock(inp->inp_socket, 1);
1429 }
1430 return false;
1431}
1432
1433static struct socket *
1434cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1435{
1436 struct socket *so = NULL;
1437 u_int64_t gencnt = cfil_sock_id >> 32;
1438 u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1439 struct inpcb *inp = NULL;
1440 struct inpcbinfo *pcbinfo = NULL;
1441
1442#if VERDICT_DEBUG
1443 CFIL_LOG(LOG_ERR, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash);
1444#endif
1445
1446 if (udp_only)
1447 goto find_udp;
1448
1449 pcbinfo = &tcbinfo;
1450 lck_rw_lock_shared(pcbinfo->ipi_lock);
1451 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1452 if (inp->inp_state != INPCB_STATE_DEAD &&
1453 inp->inp_socket != NULL &&
1454 inp->inp_flowhash == flowhash &&
1455 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1456 inp->inp_socket->so_cfil != NULL) {
1457 if (cfil_socket_safe_lock(inp))
1458 so = inp->inp_socket;
1459 break;
1460 }
1461 }
1462 lck_rw_done(pcbinfo->ipi_lock);
1463 if (so != NULL) {
1464 goto done;
1465 }
1466
1467find_udp:
1468
1469 pcbinfo = &udbinfo;
1470 lck_rw_lock_shared(pcbinfo->ipi_lock);
1471 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1472 if (inp->inp_state != INPCB_STATE_DEAD &&
1473 inp->inp_socket != NULL &&
1474 inp->inp_socket->so_cfil_db != NULL &&
1475 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1476 if (cfil_socket_safe_lock(inp))
1477 so = inp->inp_socket;
1478 break;
1479 }
1480 }
1481 lck_rw_done(pcbinfo->ipi_lock);
1482
1483done:
1484 if (so == NULL) {
1485 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1486 CFIL_LOG(LOG_DEBUG,
1487 "no socket for sock_id %llx gencnt %llx flowhash %x",
1488 cfil_sock_id, gencnt, flowhash);
1489 }
1490
1491 return (so);
1492}
1493
1494static struct socket *
1495cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1496{
1497 struct socket *so = NULL;
1498 struct inpcb *inp = NULL;
1499 struct inpcbinfo *pcbinfo = &tcbinfo;
1500
1501 lck_rw_lock_shared(pcbinfo->ipi_lock);
1502 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1503 if (inp->inp_state != INPCB_STATE_DEAD &&
1504 inp->inp_socket != NULL &&
1505 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1506 *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1507 if (cfil_socket_safe_lock(inp))
1508 so = inp->inp_socket;
1509 break;
1510 }
1511 }
1512 lck_rw_done(pcbinfo->ipi_lock);
1513 if (so != NULL) {
1514 goto done;
1515 }
1516
1517 pcbinfo = &udbinfo;
1518 lck_rw_lock_shared(pcbinfo->ipi_lock);
1519 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1520 if (inp->inp_state != INPCB_STATE_DEAD &&
1521 inp->inp_socket != NULL &&
1522 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1523 *cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
1524 if (cfil_socket_safe_lock(inp))
1525 so = inp->inp_socket;
1526 break;
1527 }
1528 }
1529 lck_rw_done(pcbinfo->ipi_lock);
1530
1531done:
1532 return (so);
1533}
1534
1535static errno_t
1536cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1537 int flags)
1538{
1539#pragma unused(kctlref, flags)
1540 errno_t error = 0;
1541 struct cfil_msg_hdr *msghdr;
1542 struct content_filter *cfc = (struct content_filter *)unitinfo;
1543 struct socket *so;
1544 struct cfil_msg_action *action_msg;
1545 struct cfil_entry *entry;
1546 struct cfil_info *cfil_info = NULL;
1547
1548 CFIL_LOG(LOG_INFO, "");
1549
1550 if (content_filters == NULL) {
1551 CFIL_LOG(LOG_ERR, "no content filter");
1552 error = EINVAL;
1553 goto done;
1554 }
1555 if (kcunit > MAX_CONTENT_FILTER) {
1556 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1557 kcunit, MAX_CONTENT_FILTER);
1558 error = EINVAL;
1559 goto done;
1560 }
1561
1562 if (m_length(m) < sizeof(struct cfil_msg_hdr)) {
1563 CFIL_LOG(LOG_ERR, "too short %u", m_length(m));
1564 error = EINVAL;
1565 goto done;
1566 }
1567 msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1568 if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1569 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1570 error = EINVAL;
1571 goto done;
1572 }
1573 if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1574 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1575 error = EINVAL;
1576 goto done;
1577 }
1578 /* Validate action operation */
1579 switch (msghdr->cfm_op) {
1580 case CFM_OP_DATA_UPDATE:
1581 OSIncrementAtomic(
1582 &cfil_stats.cfs_ctl_action_data_update);
1583 break;
1584 case CFM_OP_DROP:
1585 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1586 break;
1587 case CFM_OP_BLESS_CLIENT:
1588 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1589 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1590 error = EINVAL;
1591 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1592 msghdr->cfm_len,
1593 msghdr->cfm_op);
1594 goto done;
1595 }
1596 error = cfil_action_bless_client(kcunit, msghdr);
1597 goto done;
1598 default:
1599 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1600 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1601 error = EINVAL;
1602 goto done;
1603 }
1604 if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1605 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1606 error = EINVAL;
1607 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1608 msghdr->cfm_len,
1609 msghdr->cfm_op);
1610 goto done;
1611 }
1612 cfil_rw_lock_shared(&cfil_lck_rw);
1613 if (cfc != (void *)content_filters[kcunit - 1]) {
1614 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1615 kcunit);
1616 error = EINVAL;
1617 cfil_rw_unlock_shared(&cfil_lck_rw);
1618 goto done;
1619 }
1620 cfil_rw_unlock_shared(&cfil_lck_rw);
1621
1622 // Search for socket (TCP+UDP and lock so)
1623 so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
1624 if (so == NULL) {
1625 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1626 msghdr->cfm_sock_id);
1627 error = EINVAL;
1628 goto done;
1629 }
1630
1631 cfil_info = so->so_cfil_db != NULL ?
1632 cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
1633
1634 if (cfil_info == NULL) {
1635 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
1636 (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
1637 error = EINVAL;
1638 goto unlock;
1639 } else if (cfil_info->cfi_flags & CFIF_DROP) {
1640 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
1641 (uint64_t)VM_KERNEL_ADDRPERM(so));
1642 error = EINVAL;
1643 goto unlock;
1644 }
1645 entry = &cfil_info->cfi_entries[kcunit - 1];
1646 if (entry->cfe_filter == NULL) {
1647 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
1648 (uint64_t)VM_KERNEL_ADDRPERM(so));
1649 error = EINVAL;
1650 goto unlock;
1651 }
1652
1653 if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)
1654 entry->cfe_flags |= CFEF_DATA_START;
1655 else {
1656 CFIL_LOG(LOG_ERR,
1657 "so %llx attached not sent for %u",
1658 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1659 error = EINVAL;
1660 goto unlock;
1661 }
1662
1663 microuptime(&entry->cfe_last_action);
1664 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
1665
1666 action_msg = (struct cfil_msg_action *)msghdr;
1667
1668 switch (msghdr->cfm_op) {
1669 case CFM_OP_DATA_UPDATE:
1670#if VERDICT_DEBUG
1671 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1672 (uint64_t)VM_KERNEL_ADDRPERM(so),
1673 cfil_info->cfi_sock_id,
1674 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1675 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
1676#endif
1677 if (action_msg->cfa_out_peek_offset != 0 ||
1678 action_msg->cfa_out_pass_offset != 0)
1679 error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
1680 action_msg->cfa_out_pass_offset,
1681 action_msg->cfa_out_peek_offset);
1682 if (error == EJUSTRETURN)
1683 error = 0;
1684 if (error != 0)
1685 break;
1686 if (action_msg->cfa_in_peek_offset != 0 ||
1687 action_msg->cfa_in_pass_offset != 0)
1688 error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
1689 action_msg->cfa_in_pass_offset,
1690 action_msg->cfa_in_peek_offset);
1691 if (error == EJUSTRETURN)
1692 error = 0;
1693 break;
1694
1695 case CFM_OP_DROP:
1696 error = cfil_action_drop(so, cfil_info, kcunit);
1697 break;
1698
1699 default:
1700 error = EINVAL;
1701 break;
1702 }
1703unlock:
1704 socket_unlock(so, 1);
1705done:
1706 mbuf_freem(m);
1707
1708 if (error == 0)
1709 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
1710 else
1711 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
1712
1713 return (error);
1714}
1715
1716static errno_t
1717cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1718 int opt, void *data, size_t *len)
1719{
1720#pragma unused(kctlref, opt)
1721 struct cfil_info *cfil_info = NULL;
1722 errno_t error = 0;
1723 struct content_filter *cfc = (struct content_filter *)unitinfo;
1724
1725 CFIL_LOG(LOG_NOTICE, "");
1726
1727 cfil_rw_lock_shared(&cfil_lck_rw);
1728
1729 if (content_filters == NULL) {
1730 CFIL_LOG(LOG_ERR, "no content filter");
1731 error = EINVAL;
1732 goto done;
1733 }
1734 if (kcunit > MAX_CONTENT_FILTER) {
1735 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1736 kcunit, MAX_CONTENT_FILTER);
1737 error = EINVAL;
1738 goto done;
1739 }
1740 if (cfc != (void *)content_filters[kcunit - 1]) {
1741 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1742 kcunit);
1743 error = EINVAL;
1744 goto done;
1745 }
1746 switch (opt) {
1747 case CFIL_OPT_NECP_CONTROL_UNIT:
1748 if (*len < sizeof(uint32_t)) {
1749 CFIL_LOG(LOG_ERR, "len too small %lu", *len);
1750 error = EINVAL;
1751 goto done;
1752 }
1753 if (data != NULL) {
1754 *(uint32_t *)data = cfc->cf_necp_control_unit;
1755 }
1756 break;
1757 case CFIL_OPT_GET_SOCKET_INFO:
1758 if (*len != sizeof(struct cfil_opt_sock_info)) {
1759 CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
1760 error = EINVAL;
1761 goto done;
1762 }
1763 if (data == NULL) {
1764 CFIL_LOG(LOG_ERR, "data not passed");
1765 error = EINVAL;
1766 goto done;
1767 }
1768
1769 struct cfil_opt_sock_info *sock_info =
1770 (struct cfil_opt_sock_info *) data;
1771
1772 // Unlock here so that we never hold both cfil_lck_rw and the
1773 // socket_lock at the same time. Otherwise, this can deadlock
1774 // because soclose() takes the socket_lock and then exclusive
1775 // cfil_lck_rw and we require the opposite order.
1776
1777 // WARNING: Be sure to never use anything protected
1778 // by cfil_lck_rw beyond this point.
1779 // WARNING: Be sure to avoid fallthrough and
1780 // goto return_already_unlocked from this branch.
1781 cfil_rw_unlock_shared(&cfil_lck_rw);
1782
1783 // Search (TCP+UDP) and lock socket
1784 struct socket *sock =
1785 cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
1786 if (sock == NULL) {
1787#if LIFECYCLE_DEBUG
1788 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
1789 sock_info->cfs_sock_id);
1790#endif
1791 error = ENOENT;
1792 goto return_already_unlocked;
1793 }
1794
1795 cfil_info = (sock->so_cfil_db != NULL) ?
1796 cfil_db_get_cfil_info(sock->so_cfil_db, sock_info->cfs_sock_id) : sock->so_cfil;
1797
1798 if (cfil_info == NULL) {
1799#if LIFECYCLE_DEBUG
1800 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
1801 (uint64_t)VM_KERNEL_ADDRPERM(sock));
1802#endif
1803 error = EINVAL;
1804 socket_unlock(sock, 1);
1805 goto return_already_unlocked;
1806 }
1807
1808 // Fill out family, type, and protocol
1809 sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
1810 sock_info->cfs_sock_type = sock->so_proto->pr_type;
1811 sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
1812
1813 // Source and destination addresses
1814 struct inpcb *inp = sotoinpcb(sock);
1815 if (inp->inp_vflag & INP_IPV6) {
1816 struct in6_addr *laddr = NULL, *faddr = NULL;
1817 u_int16_t lport = 0, fport = 0;
1818
1819 cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
1820 &laddr, &faddr, &lport, &fport);
1821 fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
1822 fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
1823 } else if (inp->inp_vflag & INP_IPV4) {
1824 struct in_addr laddr = {0}, faddr = {0};
1825 u_int16_t lport = 0, fport = 0;
1826
1827 cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
1828 &laddr, &faddr, &lport, &fport);
1829 fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
1830 fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
1831 }
1832
1833 // Set the pid info
1834 sock_info->cfs_pid = sock->last_pid;
1835 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
1836
1837 if (sock->so_flags & SOF_DELEGATED) {
1838 sock_info->cfs_e_pid = sock->e_pid;
1839 memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
1840 } else {
1841 sock_info->cfs_e_pid = sock->last_pid;
1842 memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
1843 }
1844
1845 socket_unlock(sock, 1);
1846
1847 goto return_already_unlocked;
1848 default:
1849 error = ENOPROTOOPT;
1850 break;
1851 }
1852done:
1853 cfil_rw_unlock_shared(&cfil_lck_rw);
1854
1855 return (error);
1856
1857return_already_unlocked:
1858
1859 return (error);
1860}
1861
1862static errno_t
1863cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1864 int opt, void *data, size_t len)
1865{
1866#pragma unused(kctlref, opt)
1867 errno_t error = 0;
1868 struct content_filter *cfc = (struct content_filter *)unitinfo;
1869
1870 CFIL_LOG(LOG_NOTICE, "");
1871
1872 cfil_rw_lock_exclusive(&cfil_lck_rw);
1873
1874 if (content_filters == NULL) {
1875 CFIL_LOG(LOG_ERR, "no content filter");
1876 error = EINVAL;
1877 goto done;
1878 }
1879 if (kcunit > MAX_CONTENT_FILTER) {
1880 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1881 kcunit, MAX_CONTENT_FILTER);
1882 error = EINVAL;
1883 goto done;
1884 }
1885 if (cfc != (void *)content_filters[kcunit - 1]) {
1886 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1887 kcunit);
1888 error = EINVAL;
1889 goto done;
1890 }
1891 switch (opt) {
1892 case CFIL_OPT_NECP_CONTROL_UNIT:
1893 if (len < sizeof(uint32_t)) {
1894 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
1895 "len too small %lu", len);
1896 error = EINVAL;
1897 goto done;
1898 }
1899 if (cfc->cf_necp_control_unit != 0) {
1900 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
1901 "already set %u",
1902 cfc->cf_necp_control_unit);
1903 error = EINVAL;
1904 goto done;
1905 }
1906 cfc->cf_necp_control_unit = *(uint32_t *)data;
1907 break;
1908 default:
1909 error = ENOPROTOOPT;
1910 break;
1911 }
1912done:
1913 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1914
1915 return (error);
1916}
1917
1918
1919static void
1920cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
1921{
1922#pragma unused(kctlref, flags)
1923 struct content_filter *cfc = (struct content_filter *)unitinfo;
1924 struct socket *so = NULL;
1925 int error;
1926 struct cfil_entry *entry;
1927 struct cfil_info *cfil_info = NULL;
1928
1929 CFIL_LOG(LOG_INFO, "");
1930
1931 if (content_filters == NULL) {
1932 CFIL_LOG(LOG_ERR, "no content filter");
1933 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1934 return;
1935 }
1936 if (kcunit > MAX_CONTENT_FILTER) {
1937 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1938 kcunit, MAX_CONTENT_FILTER);
1939 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1940 return;
1941 }
1942 cfil_rw_lock_shared(&cfil_lck_rw);
1943 if (cfc != (void *)content_filters[kcunit - 1]) {
1944 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1945 kcunit);
1946 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1947 goto done;
1948 }
1949 /* Let's assume the flow control is lifted */
1950 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
1951 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
1952 cfil_rw_lock_exclusive(&cfil_lck_rw);
1953
1954 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
1955
1956 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
1957 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
1958 }
1959 /*
1960 * Flow control will be raised again as soon as an entry cannot enqueue
1961 * to the kernel control socket
1962 */
1963 while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
1964 verify_content_filter(cfc);
1965
1966 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
1967
1968 /* Find an entry that is flow controlled */
1969 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1970 if (entry->cfe_cfil_info == NULL ||
1971 entry->cfe_cfil_info->cfi_so == NULL)
1972 continue;
1973 if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0)
1974 continue;
1975 }
1976 if (entry == NULL)
1977 break;
1978
1979 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
1980
1981 cfil_info = entry->cfe_cfil_info;
1982 so = cfil_info->cfi_so;
1983
1984 cfil_rw_unlock_shared(&cfil_lck_rw);
1985 socket_lock(so, 1);
1986
1987 do {
1988 error = cfil_acquire_sockbuf(so, cfil_info, 1);
1989 if (error == 0)
1990 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
1991 cfil_release_sockbuf(so, 1);
1992 if (error != 0)
1993 break;
1994
1995 error = cfil_acquire_sockbuf(so, cfil_info, 0);
1996 if (error == 0)
1997 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
1998 cfil_release_sockbuf(so, 0);
1999 } while (0);
2000
2001 socket_lock_assert_owned(so);
2002 socket_unlock(so, 1);
2003
2004 cfil_rw_lock_shared(&cfil_lck_rw);
2005 }
2006done:
2007 cfil_rw_unlock_shared(&cfil_lck_rw);
2008}
2009
2010void
2011cfil_init(void)
2012{
2013 struct kern_ctl_reg kern_ctl;
2014 errno_t error = 0;
2015 vm_size_t content_filter_size = 0; /* size of content_filter */
2016 vm_size_t cfil_info_size = 0; /* size of cfil_info */
2017 vm_size_t cfil_hash_entry_size = 0; /* size of cfil_hash_entry */
2018 vm_size_t cfil_db_size = 0; /* size of cfil_db */
2019 unsigned int mbuf_limit = 0;
2020
2021 CFIL_LOG(LOG_NOTICE, "");
2022
2023 /*
2024 * Compile time verifications
2025 */
2026 _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2027 _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2028 _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2029 _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2030
2031 /*
2032 * Runtime time verifications
2033 */
2034 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2035 sizeof(uint32_t)));
2036 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2037 sizeof(uint32_t)));
2038 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2039 sizeof(uint32_t)));
2040 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2041 sizeof(uint32_t)));
2042
2043 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2044 sizeof(uint32_t)));
2045 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2046 sizeof(uint32_t)));
2047
2048 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2049 sizeof(uint32_t)));
2050 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2051 sizeof(uint32_t)));
2052 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2053 sizeof(uint32_t)));
2054 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2055 sizeof(uint32_t)));
2056
2057 /*
2058 * Zone for content filters kernel control sockets
2059 */
2060 content_filter_size = sizeof(struct content_filter);
2061 content_filter_zone = zinit(content_filter_size,
2062 CONTENT_FILTER_ZONE_MAX * content_filter_size,
2063 0,
2064 CONTENT_FILTER_ZONE_NAME);
2065 if (content_filter_zone == NULL) {
2066 panic("%s: zinit(%s) failed", __func__,
2067 CONTENT_FILTER_ZONE_NAME);
2068 /* NOTREACHED */
2069 }
2070 zone_change(content_filter_zone, Z_CALLERACCT, FALSE);
2071 zone_change(content_filter_zone, Z_EXPAND, TRUE);
2072
2073 /*
2074 * Zone for per socket content filters
2075 */
2076 cfil_info_size = sizeof(struct cfil_info);
2077 cfil_info_zone = zinit(cfil_info_size,
2078 CFIL_INFO_ZONE_MAX * cfil_info_size,
2079 0,
2080 CFIL_INFO_ZONE_NAME);
2081 if (cfil_info_zone == NULL) {
2082 panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME);
2083 /* NOTREACHED */
2084 }
2085 zone_change(cfil_info_zone, Z_CALLERACCT, FALSE);
2086 zone_change(cfil_info_zone, Z_EXPAND, TRUE);
2087
2088 /*
2089 * Zone for content filters cfil hash entries and db
2090 */
2091 cfil_hash_entry_size = sizeof(struct cfil_hash_entry);
2092 cfil_hash_entry_zone = zinit(cfil_hash_entry_size,
2093 CFIL_HASH_ENTRY_ZONE_MAX * cfil_hash_entry_size,
2094 0,
2095 CFIL_HASH_ENTRY_ZONE_NAME);
2096 if (cfil_hash_entry_zone == NULL) {
2097 panic("%s: zinit(%s) failed", __func__, CFIL_HASH_ENTRY_ZONE_NAME);
2098 /* NOTREACHED */
2099 }
2100 zone_change(cfil_hash_entry_zone, Z_CALLERACCT, FALSE);
2101 zone_change(cfil_hash_entry_zone, Z_EXPAND, TRUE);
2102
2103 cfil_db_size = sizeof(struct cfil_db);
2104 cfil_db_zone = zinit(cfil_db_size,
2105 CFIL_DB_ZONE_MAX * cfil_db_size,
2106 0,
2107 CFIL_DB_ZONE_NAME);
2108 if (cfil_db_zone == NULL) {
2109 panic("%s: zinit(%s) failed", __func__, CFIL_DB_ZONE_NAME);
2110 /* NOTREACHED */
2111 }
2112 zone_change(cfil_db_zone, Z_CALLERACCT, FALSE);
2113 zone_change(cfil_db_zone, Z_EXPAND, TRUE);
2114
2115 /*
2116 * Allocate locks
2117 */
2118 cfil_lck_grp_attr = lck_grp_attr_alloc_init();
2119 if (cfil_lck_grp_attr == NULL) {
2120 panic("%s: lck_grp_attr_alloc_init failed", __func__);
2121 /* NOTREACHED */
2122 }
2123 cfil_lck_grp = lck_grp_alloc_init("content filter",
2124 cfil_lck_grp_attr);
2125 if (cfil_lck_grp == NULL) {
2126 panic("%s: lck_grp_alloc_init failed", __func__);
2127 /* NOTREACHED */
2128 }
2129 cfil_lck_attr = lck_attr_alloc_init();
2130 if (cfil_lck_attr == NULL) {
2131 panic("%s: lck_attr_alloc_init failed", __func__);
2132 /* NOTREACHED */
2133 }
2134 lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
2135
2136 TAILQ_INIT(&cfil_sock_head);
2137
2138 /*
2139 * Register kernel control
2140 */
2141 bzero(&kern_ctl, sizeof(kern_ctl));
2142 strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2143 sizeof(kern_ctl.ctl_name));
2144 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2145 kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2146 kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2147 kern_ctl.ctl_connect = cfil_ctl_connect;
2148 kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2149 kern_ctl.ctl_send = cfil_ctl_send;
2150 kern_ctl.ctl_getopt = cfil_ctl_getopt;
2151 kern_ctl.ctl_setopt = cfil_ctl_setopt;
2152 kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2153 error = ctl_register(&kern_ctl, &cfil_kctlref);
2154 if (error != 0) {
2155 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2156 return;
2157 }
2158
2159 // Spawn thread for gargage collection
2160 if (kernel_thread_start(cfil_udp_gc_thread_func, NULL,
2161 &cfil_udp_gc_thread) != KERN_SUCCESS) {
2162 panic_plain("%s: Can't create UDP GC thread", __func__);
2163 /* NOTREACHED */
2164 }
2165 /* this must not fail */
2166 VERIFY(cfil_udp_gc_thread != NULL);
2167
2168 // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2169 mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2170 cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2171 cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2172}
2173
2174struct cfil_info *
2175cfil_info_alloc(struct socket *so, struct cfil_hash_entry *hash_entry)
2176{
2177 int kcunit;
2178 struct cfil_info *cfil_info = NULL;
2179 struct inpcb *inp = sotoinpcb(so);
2180
2181 CFIL_LOG(LOG_INFO, "");
2182
2183 socket_lock_assert_owned(so);
2184
2185 cfil_info = zalloc(cfil_info_zone);
2186 if (cfil_info == NULL)
2187 goto done;
2188 bzero(cfil_info, sizeof(struct cfil_info));
2189
2190 cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2191 cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2192
2193 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2194 struct cfil_entry *entry;
2195
2196 entry = &cfil_info->cfi_entries[kcunit - 1];
2197 entry->cfe_cfil_info = cfil_info;
2198
2199 /* Initialize the filter entry */
2200 entry->cfe_filter = NULL;
2201 entry->cfe_flags = 0;
2202 entry->cfe_necp_control_unit = 0;
2203 entry->cfe_snd.cfe_pass_offset = 0;
2204 entry->cfe_snd.cfe_peek_offset = 0;
2205 entry->cfe_snd.cfe_peeked = 0;
2206 entry->cfe_rcv.cfe_pass_offset = 0;
2207 entry->cfe_rcv.cfe_peek_offset = 0;
2208 entry->cfe_rcv.cfe_peeked = 0;
2209 /*
2210 * Timestamp the last action to avoid pre-maturely
2211 * triggering garbage collection
2212 */
2213 microuptime(&entry->cfe_last_action);
2214
2215 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2216 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2217 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2218 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2219 }
2220
2221 cfil_rw_lock_exclusive(&cfil_lck_rw);
2222
2223 /*
2224 * Create a cfi_sock_id that's not the socket pointer!
2225 */
2226
2227 if (hash_entry == NULL) {
2228 // This is the TCP case, cfil_info is tracked per socket
2229 if (inp->inp_flowhash == 0)
2230 inp->inp_flowhash = inp_calc_flowhash(inp);
2231
2232 so->so_cfil = cfil_info;
2233 cfil_info->cfi_so = so;
2234 cfil_info->cfi_sock_id =
2235 ((so->so_gencnt << 32) | inp->inp_flowhash);
2236 } else {
2237 // This is the UDP case, cfil_info is tracked in per-socket hash
2238 cfil_info->cfi_so = so;
2239 hash_entry->cfentry_cfil = cfil_info;
2240 cfil_info->cfi_hash_entry = hash_entry;
2241 cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->cfentry_flowhash & 0xffffffff));
2242 CFIL_LOG(LOG_DEBUG, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2243 inp->inp_flowhash, so->so_gencnt, hash_entry->cfentry_flowhash, cfil_info->cfi_sock_id);
2244
2245 // Wake up gc thread if this is first flow added
2246 if (cfil_sock_udp_attached_count == 0) {
2247 thread_wakeup((caddr_t)&cfil_sock_udp_attached_count);
2248 }
2249
2250 cfil_sock_udp_attached_count++;
2251 }
2252
2253 TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2254
2255 cfil_sock_attached_count++;
2256
2257 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2258
2259done:
2260 if (cfil_info != NULL)
2261 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2262 else
2263 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2264
2265 return (cfil_info);
2266}
2267
2268int
2269cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
2270{
2271 int kcunit;
2272 int attached = 0;
2273
2274 CFIL_LOG(LOG_INFO, "");
2275
2276 socket_lock_assert_owned(so);
2277
2278 cfil_rw_lock_exclusive(&cfil_lck_rw);
2279
2280 for (kcunit = 1;
2281 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2282 kcunit++) {
2283 struct content_filter *cfc = content_filters[kcunit - 1];
2284 struct cfil_entry *entry;
2285
2286 if (cfc == NULL)
2287 continue;
2288 if (cfc->cf_necp_control_unit != filter_control_unit)
2289 continue;
2290
2291 entry = &cfil_info->cfi_entries[kcunit - 1];
2292
2293 entry->cfe_filter = cfc;
2294 entry->cfe_necp_control_unit = filter_control_unit;
2295 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2296 cfc->cf_sock_count++;
2297 verify_content_filter(cfc);
2298 attached = 1;
2299 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2300 break;
2301 }
2302
2303 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2304
2305 return (attached);
2306}
2307
2308static void
2309cfil_info_free(struct cfil_info *cfil_info)
2310{
2311 int kcunit;
2312 uint64_t in_drain = 0;
2313 uint64_t out_drained = 0;
2314
2315 if (cfil_info == NULL)
2316 return;
2317
2318 CFIL_LOG(LOG_INFO, "");
2319
2320 cfil_rw_lock_exclusive(&cfil_lck_rw);
2321
2322 for (kcunit = 1;
2323 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2324 kcunit++) {
2325 struct cfil_entry *entry;
2326 struct content_filter *cfc;
2327
2328 entry = &cfil_info->cfi_entries[kcunit - 1];
2329
2330 /* Don't be silly and try to detach twice */
2331 if (entry->cfe_filter == NULL)
2332 continue;
2333
2334 cfc = content_filters[kcunit - 1];
2335
2336 VERIFY(cfc == entry->cfe_filter);
2337
2338 entry->cfe_filter = NULL;
2339 entry->cfe_necp_control_unit = 0;
2340 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2341 cfc->cf_sock_count--;
2342
2343 verify_content_filter(cfc);
2344 }
2345 if (cfil_info->cfi_hash_entry != NULL)
2346 cfil_sock_udp_attached_count--;
2347 cfil_sock_attached_count--;
2348 TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2349
2350 out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2351 in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2352
2353 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2354 struct cfil_entry *entry;
2355
2356 entry = &cfil_info->cfi_entries[kcunit - 1];
2357 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2358 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2359 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2360 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2361 }
2362 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2363
2364 if (out_drained)
2365 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2366 if (in_drain)
2367 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2368
2369 zfree(cfil_info_zone, cfil_info);
2370}
2371
2372/*
2373 * Entry point from Sockets layer
2374 * The socket is locked.
2375 */
2376errno_t
2377cfil_sock_attach(struct socket *so)
2378{
2379 errno_t error = 0;
2380 uint32_t filter_control_unit;
2381
2382 socket_lock_assert_owned(so);
2383
2384 /* Limit ourselves to TCP that are not MPTCP subflows */
2385 if ((so->so_proto->pr_domain->dom_family != PF_INET &&
2386 so->so_proto->pr_domain->dom_family != PF_INET6) ||
2387 so->so_proto->pr_type != SOCK_STREAM ||
2388 so->so_proto->pr_protocol != IPPROTO_TCP ||
2389 (so->so_flags & SOF_MP_SUBFLOW) != 0 ||
2390 (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0)
2391 goto done;
2392
2393 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2394 if (filter_control_unit == 0)
2395 goto done;
2396
2397 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
2398 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2399 goto done;
2400 }
2401 if (cfil_active_count == 0) {
2402 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2403 goto done;
2404 }
2405 if (so->so_cfil != NULL) {
2406 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2407 CFIL_LOG(LOG_ERR, "already attached");
2408 } else {
2409 cfil_info_alloc(so, NULL);
2410 if (so->so_cfil == NULL) {
2411 error = ENOMEM;
2412 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2413 goto done;
2414 }
2415 }
2416 if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
2417 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
2418 filter_control_unit);
2419 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2420 goto done;
2421 }
2422 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llx",
2423 (uint64_t)VM_KERNEL_ADDRPERM(so),
2424 filter_control_unit, so->so_cfil->cfi_sock_id);
2425
2426 so->so_flags |= SOF_CONTENT_FILTER;
2427 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2428
2429 /* Hold a reference on the socket */
2430 so->so_usecount++;
2431
2432 error = cfil_dispatch_attach_event(so, so->so_cfil, filter_control_unit);
2433 /* We can recover from flow control or out of memory errors */
2434 if (error == ENOBUFS || error == ENOMEM)
2435 error = 0;
2436 else if (error != 0)
2437 goto done;
2438
2439 CFIL_INFO_VERIFY(so->so_cfil);
2440done:
2441 return (error);
2442}
2443
2444/*
2445 * Entry point from Sockets layer
2446 * The socket is locked.
2447 */
2448errno_t
2449cfil_sock_detach(struct socket *so)
2450{
2451 if (IS_UDP(so)) {
2452 cfil_db_free(so);
2453 return (0);
2454 }
2455
2456 if (so->so_cfil) {
2457 if (so->so_flags & SOF_CONTENT_FILTER) {
2458 so->so_flags &= ~SOF_CONTENT_FILTER;
2459 VERIFY(so->so_usecount > 0);
2460 so->so_usecount--;
2461 }
2462 cfil_info_free(so->so_cfil);
2463 so->so_cfil = NULL;
2464 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2465 }
2466 return (0);
2467}
2468
2469static int
2470cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info, uint32_t filter_control_unit)
2471{
2472 errno_t error = 0;
2473 struct cfil_entry *entry = NULL;
2474 struct cfil_msg_sock_attached msg_attached;
2475 uint32_t kcunit;
2476 struct content_filter *cfc = NULL;
2477
2478 socket_lock_assert_owned(so);
2479
2480 cfil_rw_lock_shared(&cfil_lck_rw);
2481
2482 if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
2483 error = EINVAL;
2484 goto done;
2485 }
2486 /*
2487 * Find the matching filter unit
2488 */
2489 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2490 cfc = content_filters[kcunit - 1];
2491
2492 if (cfc == NULL)
2493 continue;
2494 if (cfc->cf_necp_control_unit != filter_control_unit)
2495 continue;
2496 entry = &cfil_info->cfi_entries[kcunit - 1];
2497 if (entry->cfe_filter == NULL)
2498 continue;
2499
2500 VERIFY(cfc == entry->cfe_filter);
2501
2502 break;
2503 }
2504
2505 if (entry == NULL || entry->cfe_filter == NULL)
2506 goto done;
2507
2508 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED))
2509 goto done;
2510
2511 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
2512 (uint64_t)VM_KERNEL_ADDRPERM(so), filter_control_unit, kcunit);
2513
2514 /* Would be wasteful to try when flow controlled */
2515 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2516 error = ENOBUFS;
2517 goto done;
2518 }
2519
2520 bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
2521 msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
2522 msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
2523 msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
2524 msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
2525 msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2526
2527 msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
2528 msg_attached.cfs_sock_type = so->so_proto->pr_type;
2529 msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
2530 msg_attached.cfs_pid = so->last_pid;
2531 memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
2532 if (so->so_flags & SOF_DELEGATED) {
2533 msg_attached.cfs_e_pid = so->e_pid;
2534 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
2535 } else {
2536 msg_attached.cfs_e_pid = so->last_pid;
2537 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
2538 }
2539
2540#if LIFECYCLE_DEBUG
2541 CFIL_LOG(LOG_DEBUG, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
2542 entry->cfe_cfil_info->cfi_sock_id);
2543#endif
2544
2545 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
2546 entry->cfe_filter->cf_kcunit,
2547 &msg_attached,
2548 sizeof(struct cfil_msg_sock_attached),
2549 CTL_DATA_EOR);
2550 if (error != 0) {
2551 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
2552 goto done;
2553 }
2554 microuptime(&entry->cfe_last_event);
2555 cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
2556 cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
2557
2558 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
2559 OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
2560done:
2561
2562 /* We can recover from flow control */
2563 if (error == ENOBUFS) {
2564 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2565 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
2566
2567 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2568 cfil_rw_lock_exclusive(&cfil_lck_rw);
2569
2570 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2571
2572 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2573 } else {
2574 if (error != 0)
2575 OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
2576
2577 cfil_rw_unlock_shared(&cfil_lck_rw);
2578 }
2579 return (error);
2580}
2581
2582static int
2583cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
2584{
2585 errno_t error = 0;
2586 struct mbuf *msg = NULL;
2587 struct cfil_entry *entry;
2588 struct cfe_buf *entrybuf;
2589 struct cfil_msg_hdr msg_disconnected;
2590 struct content_filter *cfc;
2591
2592 socket_lock_assert_owned(so);
2593
2594 cfil_rw_lock_shared(&cfil_lck_rw);
2595
2596 entry = &cfil_info->cfi_entries[kcunit - 1];
2597 if (outgoing)
2598 entrybuf = &entry->cfe_snd;
2599 else
2600 entrybuf = &entry->cfe_rcv;
2601
2602 cfc = entry->cfe_filter;
2603 if (cfc == NULL)
2604 goto done;
2605
2606 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
2607 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
2608
2609 /*
2610 * Send the disconnection event once
2611 */
2612 if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
2613 (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
2614 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
2615 (uint64_t)VM_KERNEL_ADDRPERM(so));
2616 goto done;
2617 }
2618
2619 /*
2620 * We're not disconnected as long as some data is waiting
2621 * to be delivered to the filter
2622 */
2623 if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
2624 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
2625 (uint64_t)VM_KERNEL_ADDRPERM(so));
2626 error = EBUSY;
2627 goto done;
2628 }
2629 /* Would be wasteful to try when flow controlled */
2630 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2631 error = ENOBUFS;
2632 goto done;
2633 }
2634
2635#if LIFECYCLE_DEBUG
2636 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
2637 "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
2638 "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
2639#endif
2640
2641 bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
2642 msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
2643 msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
2644 msg_disconnected.cfm_type = CFM_TYPE_EVENT;
2645 msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
2646 CFM_OP_DISCONNECT_IN;
2647 msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2648 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
2649 entry->cfe_filter->cf_kcunit,
2650 &msg_disconnected,
2651 sizeof(struct cfil_msg_hdr),
2652 CTL_DATA_EOR);
2653 if (error != 0) {
2654 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
2655 mbuf_freem(msg);
2656 goto done;
2657 }
2658 microuptime(&entry->cfe_last_event);
2659 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
2660
2661 /* Remember we have sent the disconnection message */
2662 if (outgoing) {
2663 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
2664 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
2665 } else {
2666 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
2667 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
2668 }
2669done:
2670 if (error == ENOBUFS) {
2671 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2672 OSIncrementAtomic(
2673 &cfil_stats.cfs_disconnect_event_flow_control);
2674
2675 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2676 cfil_rw_lock_exclusive(&cfil_lck_rw);
2677
2678 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2679
2680 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2681 } else {
2682 if (error != 0)
2683 OSIncrementAtomic(
2684 &cfil_stats.cfs_disconnect_event_fail);
2685
2686 cfil_rw_unlock_shared(&cfil_lck_rw);
2687 }
2688 return (error);
2689}
2690
2691int
2692cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
2693{
2694 struct cfil_entry *entry;
2695 struct cfil_msg_sock_closed msg_closed;
2696 errno_t error = 0;
2697 struct content_filter *cfc;
2698
2699 socket_lock_assert_owned(so);
2700
2701 cfil_rw_lock_shared(&cfil_lck_rw);
2702
2703 entry = &cfil_info->cfi_entries[kcunit - 1];
2704 cfc = entry->cfe_filter;
2705 if (cfc == NULL)
2706 goto done;
2707
2708 CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
2709 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
2710
2711 /* Would be wasteful to try when flow controlled */
2712 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2713 error = ENOBUFS;
2714 goto done;
2715 }
2716 /*
2717 * Send a single closed message per filter
2718 */
2719 if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0)
2720 goto done;
2721 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0)
2722 goto done;
2723
2724 microuptime(&entry->cfe_last_event);
2725 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
2726
2727 bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
2728 msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
2729 msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
2730 msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
2731 msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
2732 msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2733 msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
2734 msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
2735 memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t)*CFI_MAX_TIME_LOG_ENTRY);
2736 memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char)*CFI_MAX_TIME_LOG_ENTRY);
2737 msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
2738
2739#if LIFECYCLE_DEBUG
2740 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, cfil_info->cfi_op_list_ctr, cfil_info->cfi_first_event.tv_sec, cfil_info->cfi_first_event.tv_usec);
2741#endif
2742 /* for debugging
2743 if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
2744 msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
2745 }
2746 for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
2747 CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
2748 }
2749 */
2750
2751 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
2752 entry->cfe_filter->cf_kcunit,
2753 &msg_closed,
2754 sizeof(struct cfil_msg_sock_closed),
2755 CTL_DATA_EOR);
2756 if (error != 0) {
2757 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
2758 error);
2759 goto done;
2760 }
2761
2762 entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
2763 OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
2764done:
2765 /* We can recover from flow control */
2766 if (error == ENOBUFS) {
2767 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2768 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
2769
2770 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2771 cfil_rw_lock_exclusive(&cfil_lck_rw);
2772
2773 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2774
2775 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2776 } else {
2777 if (error != 0)
2778 OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
2779
2780 cfil_rw_unlock_shared(&cfil_lck_rw);
2781 }
2782
2783 return (error);
2784}
2785
2786static void
2787fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
2788 struct in6_addr *ip6, u_int16_t port)
2789{
2790 struct sockaddr_in6 *sin6 = &sin46->sin6;
2791
2792 sin6->sin6_family = AF_INET6;
2793 sin6->sin6_len = sizeof(*sin6);
2794 sin6->sin6_port = port;
2795 sin6->sin6_addr = *ip6;
2796 if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
2797 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
2798 sin6->sin6_addr.s6_addr16[1] = 0;
2799 }
2800}
2801
2802static void
2803fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
2804 struct in_addr ip, u_int16_t port)
2805{
2806 struct sockaddr_in *sin = &sin46->sin;
2807
2808 sin->sin_family = AF_INET;
2809 sin->sin_len = sizeof(*sin);
2810 sin->sin_port = port;
2811 sin->sin_addr.s_addr = ip.s_addr;
2812}
2813
2814static void
2815cfil_get_flow_address_v6(struct cfil_hash_entry *entry, struct inpcb *inp,
2816 struct in6_addr **laddr, struct in6_addr **faddr,
2817 u_int16_t *lport, u_int16_t *fport)
2818{
2819 if (entry != NULL) {
2820 *laddr = &entry->cfentry_laddr.addr6;
2821 *faddr = &entry->cfentry_faddr.addr6;
2822 *lport = entry->cfentry_lport;
2823 *fport = entry->cfentry_fport;
2824 } else {
2825 *laddr = &inp->in6p_laddr;
2826 *faddr = &inp->in6p_faddr;
2827 *lport = inp->inp_lport;
2828 *fport = inp->inp_fport;
2829 }
2830}
2831
2832static void
2833cfil_get_flow_address(struct cfil_hash_entry *entry, struct inpcb *inp,
2834 struct in_addr *laddr, struct in_addr *faddr,
2835 u_int16_t *lport, u_int16_t *fport)
2836{
2837 if (entry != NULL) {
2838 *laddr = entry->cfentry_laddr.addr46.ia46_addr4;
2839 *faddr = entry->cfentry_faddr.addr46.ia46_addr4;
2840 *lport = entry->cfentry_lport;
2841 *fport = entry->cfentry_fport;
2842 } else {
2843 *laddr = inp->inp_laddr;
2844 *faddr = inp->inp_faddr;
2845 *lport = inp->inp_lport;
2846 *fport = inp->inp_fport;
2847 }
2848}
2849
2850static int
2851cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
2852 struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
2853{
2854 errno_t error = 0;
2855 struct mbuf *copy = NULL;
2856 struct mbuf *msg = NULL;
2857 unsigned int one = 1;
2858 struct cfil_msg_data_event *data_req;
2859 size_t hdrsize;
2860 struct inpcb *inp = (struct inpcb *)so->so_pcb;
2861 struct cfil_entry *entry;
2862 struct cfe_buf *entrybuf;
2863 struct content_filter *cfc;
2864 struct timeval tv;
2865
2866 cfil_rw_lock_shared(&cfil_lck_rw);
2867
2868 entry = &cfil_info->cfi_entries[kcunit - 1];
2869 if (outgoing)
2870 entrybuf = &entry->cfe_snd;
2871 else
2872 entrybuf = &entry->cfe_rcv;
2873
2874 cfc = entry->cfe_filter;
2875 if (cfc == NULL)
2876 goto done;
2877
2878 data = cfil_data_start(data);
2879 if (data == NULL || (data->m_flags & M_PKTHDR) == 0) {
2880 CFIL_LOG(LOG_ERR, "NOT PKTHDR");
2881 goto done;
2882 }
2883
2884 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
2885 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
2886
2887 socket_lock_assert_owned(so);
2888
2889 /* Would be wasteful to try */
2890 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2891 error = ENOBUFS;
2892 goto done;
2893 }
2894
2895 /* Make a copy of the data to pass to kernel control socket */
2896 copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
2897 M_COPYM_NOOP_HDR);
2898 if (copy == NULL) {
2899 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
2900 error = ENOMEM;
2901 goto done;
2902 }
2903
2904 /* We need an mbuf packet for the message header */
2905 hdrsize = sizeof(struct cfil_msg_data_event);
2906 error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
2907 if (error != 0) {
2908 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
2909 m_freem(copy);
2910 /*
2911 * ENOBUFS is to indicate flow control
2912 */
2913 error = ENOMEM;
2914 goto done;
2915 }
2916 mbuf_setlen(msg, hdrsize);
2917 mbuf_pkthdr_setlen(msg, hdrsize + copylen);
2918 msg->m_next = copy;
2919 data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
2920 bzero(data_req, hdrsize);
2921 data_req->cfd_msghdr.cfm_len = hdrsize + copylen;
2922 data_req->cfd_msghdr.cfm_version = 1;
2923 data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
2924 data_req->cfd_msghdr.cfm_op =
2925 outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
2926 data_req->cfd_msghdr.cfm_sock_id =
2927 entry->cfe_cfil_info->cfi_sock_id;
2928 data_req->cfd_start_offset = entrybuf->cfe_peeked;
2929 data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
2930
2931 /*
2932 * TBD:
2933 * For non connected sockets need to copy addresses from passed
2934 * parameters
2935 */
2936 if (inp->inp_vflag & INP_IPV6) {
2937 struct in6_addr *laddr = NULL, *faddr = NULL;
2938 u_int16_t lport = 0, fport = 0;
2939
2940 cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
2941 &laddr, &faddr, &lport, &fport);
2942 if (outgoing) {
2943 fill_ip6_sockaddr_4_6(&data_req->cfc_src, laddr, lport);
2944 fill_ip6_sockaddr_4_6(&data_req->cfc_dst, faddr, fport);
2945 } else {
2946 fill_ip6_sockaddr_4_6(&data_req->cfc_src, faddr, fport);
2947 fill_ip6_sockaddr_4_6(&data_req->cfc_dst, laddr, lport);
2948 }
2949 } else if (inp->inp_vflag & INP_IPV4) {
2950 struct in_addr laddr = {0}, faddr = {0};
2951 u_int16_t lport = 0, fport = 0;
2952
2953 cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
2954 &laddr, &faddr, &lport, &fport);
2955
2956 if (outgoing) {
2957 fill_ip_sockaddr_4_6(&data_req->cfc_src, laddr, lport);
2958 fill_ip_sockaddr_4_6(&data_req->cfc_dst, faddr, fport);
2959 } else {
2960 fill_ip_sockaddr_4_6(&data_req->cfc_src, faddr, fport);
2961 fill_ip_sockaddr_4_6(&data_req->cfc_dst, laddr, lport);
2962 }
2963 }
2964
2965 microuptime(&tv);
2966 CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
2967
2968 /* Pass the message to the content filter */
2969 error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
2970 entry->cfe_filter->cf_kcunit,
2971 msg, CTL_DATA_EOR);
2972 if (error != 0) {
2973 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
2974 mbuf_freem(msg);
2975 goto done;
2976 }
2977 entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
2978 OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
2979
2980#if VERDICT_DEBUG
2981 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
2982 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen);
2983#endif
2984
2985done:
2986 if (error == ENOBUFS) {
2987 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2988 OSIncrementAtomic(
2989 &cfil_stats.cfs_data_event_flow_control);
2990
2991 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2992 cfil_rw_lock_exclusive(&cfil_lck_rw);
2993
2994 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2995
2996 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2997 } else {
2998 if (error != 0)
2999 OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
3000
3001 cfil_rw_unlock_shared(&cfil_lck_rw);
3002 }
3003 return (error);
3004}
3005
3006/*
3007 * Process the queue of data waiting to be delivered to content filter
3008 */
3009static int
3010cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3011{
3012 errno_t error = 0;
3013 struct mbuf *data, *tmp = NULL;
3014 unsigned int datalen = 0, copylen = 0, copyoffset = 0;
3015 struct cfil_entry *entry;
3016 struct cfe_buf *entrybuf;
3017 uint64_t currentoffset = 0;
3018
3019 if (cfil_info == NULL)
3020 return (0);
3021
3022 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3023 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3024
3025 socket_lock_assert_owned(so);
3026
3027 entry = &cfil_info->cfi_entries[kcunit - 1];
3028 if (outgoing)
3029 entrybuf = &entry->cfe_snd;
3030 else
3031 entrybuf = &entry->cfe_rcv;
3032
3033 /* Send attached message if not yet done */
3034 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3035 error = cfil_dispatch_attach_event(so, cfil_info, kcunit);
3036 if (error != 0) {
3037 /* We can recover from flow control */
3038 if (error == ENOBUFS || error == ENOMEM)
3039 error = 0;
3040 goto done;
3041 }
3042 } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
3043 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
3044 goto done;
3045 }
3046
3047#if DATA_DEBUG
3048 CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3049 entrybuf->cfe_pass_offset,
3050 entrybuf->cfe_peeked,
3051 entrybuf->cfe_peek_offset);
3052#endif
3053
3054 /* Move all data that can pass */
3055 while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
3056 entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
3057 datalen = cfil_data_length(data, NULL, NULL);
3058 tmp = data;
3059
3060 if (entrybuf->cfe_ctl_q.q_start + datalen <=
3061 entrybuf->cfe_pass_offset) {
3062 /*
3063 * The first mbuf can fully pass
3064 */
3065 copylen = datalen;
3066 } else {
3067 /*
3068 * The first mbuf can partially pass
3069 */
3070 copylen = entrybuf->cfe_pass_offset -
3071 entrybuf->cfe_ctl_q.q_start;
3072 }
3073 VERIFY(copylen <= datalen);
3074
3075#if DATA_DEBUG
3076 CFIL_LOG(LOG_DEBUG,
3077 "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3078 "datalen %u copylen %u",
3079 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3080 entrybuf->cfe_ctl_q.q_start,
3081 entrybuf->cfe_peeked,
3082 entrybuf->cfe_pass_offset,
3083 entrybuf->cfe_peek_offset,
3084 datalen, copylen);
3085#endif
3086
3087 /*
3088 * Data that passes has been peeked at explicitly or
3089 * implicitly
3090 */
3091 if (entrybuf->cfe_ctl_q.q_start + copylen >
3092 entrybuf->cfe_peeked)
3093 entrybuf->cfe_peeked =
3094 entrybuf->cfe_ctl_q.q_start + copylen;
3095 /*
3096 * Stop on partial pass
3097 */
3098 if (copylen < datalen)
3099 break;
3100
3101 /* All good, move full data from ctl queue to pending queue */
3102 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
3103
3104 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
3105 if (outgoing)
3106 OSAddAtomic64(datalen,
3107 &cfil_stats.cfs_pending_q_out_enqueued);
3108 else
3109 OSAddAtomic64(datalen,
3110 &cfil_stats.cfs_pending_q_in_enqueued);
3111 }
3112 CFIL_INFO_VERIFY(cfil_info);
3113 if (tmp != NULL)
3114 CFIL_LOG(LOG_DEBUG,
3115 "%llx first %llu peeked %llu pass %llu peek %llu"
3116 "datalen %u copylen %u",
3117 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3118 entrybuf->cfe_ctl_q.q_start,
3119 entrybuf->cfe_peeked,
3120 entrybuf->cfe_pass_offset,
3121 entrybuf->cfe_peek_offset,
3122 datalen, copylen);
3123 tmp = NULL;
3124
3125 /* Now deal with remaining data the filter wants to peek at */
3126 for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
3127 currentoffset = entrybuf->cfe_ctl_q.q_start;
3128 data != NULL && currentoffset < entrybuf->cfe_peek_offset;
3129 data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
3130 currentoffset += datalen) {
3131 datalen = cfil_data_length(data, NULL, NULL);
3132 tmp = data;
3133
3134 /* We've already peeked at this mbuf */
3135 if (currentoffset + datalen <= entrybuf->cfe_peeked)
3136 continue;
3137 /*
3138 * The data in the first mbuf may have been
3139 * partially peeked at
3140 */
3141 copyoffset = entrybuf->cfe_peeked - currentoffset;
3142 VERIFY(copyoffset < datalen);
3143 copylen = datalen - copyoffset;
3144 VERIFY(copylen <= datalen);
3145 /*
3146 * Do not copy more than needed
3147 */
3148 if (currentoffset + copyoffset + copylen >
3149 entrybuf->cfe_peek_offset) {
3150 copylen = entrybuf->cfe_peek_offset -
3151 (currentoffset + copyoffset);
3152 }
3153
3154#if DATA_DEBUG
3155 CFIL_LOG(LOG_DEBUG,
3156 "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3157 "datalen %u copylen %u copyoffset %u",
3158 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3159 currentoffset,
3160 entrybuf->cfe_peeked,
3161 entrybuf->cfe_pass_offset,
3162 entrybuf->cfe_peek_offset,
3163 datalen, copylen, copyoffset);
3164#endif
3165
3166 /*
3167 * Stop if there is nothing more to peek at
3168 */
3169 if (copylen == 0)
3170 break;
3171 /*
3172 * Let the filter get a peek at this span of data
3173 */
3174 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
3175 outgoing, data, copyoffset, copylen);
3176 if (error != 0) {
3177 /* On error, leave data in ctl_q */
3178 break;
3179 }
3180 entrybuf->cfe_peeked += copylen;
3181 if (outgoing)
3182 OSAddAtomic64(copylen,
3183 &cfil_stats.cfs_ctl_q_out_peeked);
3184 else
3185 OSAddAtomic64(copylen,
3186 &cfil_stats.cfs_ctl_q_in_peeked);
3187
3188 /* Stop when data could not be fully peeked at */
3189 if (copylen + copyoffset < datalen)
3190 break;
3191 }
3192 CFIL_INFO_VERIFY(cfil_info);
3193 if (tmp != NULL)
3194 CFIL_LOG(LOG_DEBUG,
3195 "%llx first %llu peeked %llu pass %llu peek %llu"
3196 "datalen %u copylen %u copyoffset %u",
3197 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3198 currentoffset,
3199 entrybuf->cfe_peeked,
3200 entrybuf->cfe_pass_offset,
3201 entrybuf->cfe_peek_offset,
3202 datalen, copylen, copyoffset);
3203
3204 /*
3205 * Process data that has passed the filter
3206 */
3207 error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
3208 if (error != 0) {
3209 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
3210 error);
3211 goto done;
3212 }
3213
3214 /*
3215 * Dispatch disconnect events that could not be sent
3216 */
3217 if (cfil_info == NULL)
3218 goto done;
3219 else if (outgoing) {
3220 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
3221 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT))
3222 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
3223 } else {
3224 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
3225 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))
3226 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
3227 }
3228
3229done:
3230 CFIL_LOG(LOG_DEBUG,
3231 "first %llu peeked %llu pass %llu peek %llu",
3232 entrybuf->cfe_ctl_q.q_start,
3233 entrybuf->cfe_peeked,
3234 entrybuf->cfe_pass_offset,
3235 entrybuf->cfe_peek_offset);
3236
3237 CFIL_INFO_VERIFY(cfil_info);
3238 return (error);
3239}
3240
3241/*
3242 * cfil_data_filter()
3243 *
3244 * Process data for a content filter installed on a socket
3245 */
3246int
3247cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3248 struct mbuf *data, uint64_t datalen)
3249{
3250 errno_t error = 0;
3251 struct cfil_entry *entry;
3252 struct cfe_buf *entrybuf;
3253
3254 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3255 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3256
3257 socket_lock_assert_owned(so);
3258
3259 entry = &cfil_info->cfi_entries[kcunit - 1];
3260 if (outgoing)
3261 entrybuf = &entry->cfe_snd;
3262 else
3263 entrybuf = &entry->cfe_rcv;
3264
3265 /* Are we attached to the filter? */
3266 if (entry->cfe_filter == NULL) {
3267 error = 0;
3268 goto done;
3269 }
3270
3271 /* Dispatch to filters */
3272 cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
3273 if (outgoing)
3274 OSAddAtomic64(datalen,
3275 &cfil_stats.cfs_ctl_q_out_enqueued);
3276 else
3277 OSAddAtomic64(datalen,
3278 &cfil_stats.cfs_ctl_q_in_enqueued);
3279
3280 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
3281 if (error != 0) {
3282 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
3283 error);
3284 }
3285 /*
3286 * We have to return EJUSTRETURN in all cases to avoid double free
3287 * by socket layer
3288 */
3289 error = EJUSTRETURN;
3290done:
3291 CFIL_INFO_VERIFY(cfil_info);
3292
3293 CFIL_LOG(LOG_INFO, "return %d", error);
3294 return (error);
3295}
3296
3297/*
3298 * cfil_service_inject_queue() re-inject data that passed the
3299 * content filters
3300 */
3301static int
3302cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
3303{
3304 mbuf_t data;
3305 unsigned int datalen;
3306 int mbcnt = 0;
3307 int mbnum = 0;
3308 errno_t error = 0;
3309 struct cfi_buf *cfi_buf;
3310 struct cfil_queue *inject_q;
3311 int need_rwakeup = 0;
3312 int count = 0;
3313
3314 if (cfil_info == NULL)
3315 return (0);
3316
3317 socket_lock_assert_owned(so);
3318
3319 if (outgoing) {
3320 cfi_buf = &cfil_info->cfi_snd;
3321 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
3322 } else {
3323 cfi_buf = &cfil_info->cfi_rcv;
3324 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
3325 }
3326 inject_q = &cfi_buf->cfi_inject_q;
3327
3328 if (cfil_queue_empty(inject_q))
3329 return (0);
3330
3331#if DATA_DEBUG | VERDICT_DEBUG
3332 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
3333 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
3334#endif
3335
3336 while ((data = cfil_queue_first(inject_q)) != NULL) {
3337 datalen = cfil_data_length(data, &mbcnt, &mbnum);
3338
3339#if DATA_DEBUG
3340 CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE INJECT-Q: <%s>: <so %llx> data %llx datalen %u (mbcnt %u)",
3341 remote_addr_ptr ? "UNCONNECTED" : "CONNECTED",
3342 (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
3343#endif
3344
3345 /* Remove data from queue and adjust stats */
3346 cfil_queue_remove(inject_q, data, datalen);
3347 cfi_buf->cfi_pending_first += datalen;
3348 cfi_buf->cfi_pending_mbcnt -= mbcnt;
3349 cfi_buf->cfi_pending_mbnum -= mbnum;
3350 cfil_info_buf_verify(cfi_buf);
3351
3352 if (outgoing) {
3353 error = sosend_reinject(so, NULL, data, NULL, 0);
3354 if (error != 0) {
3355#if DATA_DEBUG
3356 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
3357 CFIL_LOG(LOG_ERR, "### sosend() failed %d", error);
3358#endif
3359 break;
3360 }
3361 // At least one injection succeeded, need to wake up pending threads.
3362 need_rwakeup = 1;
3363 } else {
3364 data->m_flags |= M_SKIPCFIL;
3365
3366 /*
3367 * NOTE: We currently only support TCP and UDP.
3368 * For RAWIP, MPTCP and message TCP we'll
3369 * need to call the appropriate sbappendxxx()
3370 * of fix sock_inject_data_in()
3371 */
3372 if (IS_UDP(so) == TRUE) {
3373 if (sbappendchain(&so->so_rcv, data, 0))
3374 need_rwakeup = 1;
3375 } else {
3376 if (sbappendstream(&so->so_rcv, data))
3377 need_rwakeup = 1;
3378 }
3379 }
3380
3381 if (outgoing)
3382 OSAddAtomic64(datalen,
3383 &cfil_stats.cfs_inject_q_out_passed);
3384 else
3385 OSAddAtomic64(datalen,
3386 &cfil_stats.cfs_inject_q_in_passed);
3387
3388 count++;
3389 }
3390
3391#if DATA_DEBUG | VERDICT_DEBUG
3392 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
3393 (uint64_t)VM_KERNEL_ADDRPERM(so), count);
3394#endif
3395
3396 /* A single wakeup is for several packets is more efficient */
3397 if (need_rwakeup) {
3398 if (outgoing == TRUE)
3399 sowwakeup(so);
3400 else
3401 sorwakeup(so);
3402 }
3403
3404 if (error != 0 && cfil_info) {
3405 if (error == ENOBUFS)
3406 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
3407 if (error == ENOMEM)
3408 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
3409
3410 if (outgoing) {
3411 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
3412 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
3413 } else {
3414 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
3415 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
3416 }
3417 }
3418
3419 /*
3420 * Notify
3421 */
3422 if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
3423 cfil_sock_notify_shutdown(so, SHUT_WR);
3424 if (cfil_sock_data_pending(&so->so_snd) == 0)
3425 soshutdownlock_final(so, SHUT_WR);
3426 }
3427 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
3428 if (cfil_filters_attached(so) == 0) {
3429 CFIL_LOG(LOG_INFO, "so %llx waking",
3430 (uint64_t)VM_KERNEL_ADDRPERM(so));
3431 wakeup((caddr_t)cfil_info);
3432 }
3433 }
3434
3435 CFIL_INFO_VERIFY(cfil_info);
3436
3437 return (error);
3438}
3439
3440static int
3441cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3442{
3443 uint64_t passlen, curlen;
3444 mbuf_t data;
3445 unsigned int datalen;
3446 errno_t error = 0;
3447 struct cfil_entry *entry;
3448 struct cfe_buf *entrybuf;
3449 struct cfil_queue *pending_q;
3450
3451 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3452 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3453
3454 socket_lock_assert_owned(so);
3455
3456 entry = &cfil_info->cfi_entries[kcunit - 1];
3457 if (outgoing)
3458 entrybuf = &entry->cfe_snd;
3459 else
3460 entrybuf = &entry->cfe_rcv;
3461
3462 pending_q = &entrybuf->cfe_pending_q;
3463
3464 passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
3465
3466 /*
3467 * Locate the chunks of data that we can pass to the next filter
3468 * A data chunk must be on mbuf boundaries
3469 */
3470 curlen = 0;
3471 while ((data = cfil_queue_first(pending_q)) != NULL) {
3472 datalen = cfil_data_length(data, NULL, NULL);
3473
3474#if DATA_DEBUG
3475 CFIL_LOG(LOG_DEBUG,
3476 "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
3477 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
3478 passlen, curlen);
3479#endif
3480
3481 if (curlen + datalen > passlen)
3482 break;
3483
3484 cfil_queue_remove(pending_q, data, datalen);
3485
3486 curlen += datalen;
3487
3488 for (kcunit += 1;
3489 kcunit <= MAX_CONTENT_FILTER;
3490 kcunit++) {
3491 error = cfil_data_filter(so, cfil_info, kcunit, outgoing,
3492 data, datalen);
3493 /* 0 means passed so we can continue */
3494 if (error != 0)
3495 break;
3496 }
3497 /* When data has passed all filters, re-inject */
3498 if (error == 0) {
3499 if (outgoing) {
3500 cfil_queue_enqueue(
3501 &cfil_info->cfi_snd.cfi_inject_q,
3502 data, datalen);
3503 OSAddAtomic64(datalen,
3504 &cfil_stats.cfs_inject_q_out_enqueued);
3505 } else {
3506 cfil_queue_enqueue(
3507 &cfil_info->cfi_rcv.cfi_inject_q,
3508 data, datalen);
3509 OSAddAtomic64(datalen,
3510 &cfil_stats.cfs_inject_q_in_enqueued);
3511 }
3512 }
3513 }
3514
3515 CFIL_INFO_VERIFY(cfil_info);
3516
3517 return (error);
3518}
3519
3520int
3521cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3522 uint64_t pass_offset, uint64_t peek_offset)
3523{
3524 errno_t error = 0;
3525 struct cfil_entry *entry = NULL;
3526 struct cfe_buf *entrybuf;
3527 int updated = 0;
3528
3529 CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
3530
3531 socket_lock_assert_owned(so);
3532
3533 if (cfil_info == NULL) {
3534 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
3535 (uint64_t)VM_KERNEL_ADDRPERM(so));
3536 error = 0;
3537 goto done;
3538 } else if (cfil_info->cfi_flags & CFIF_DROP) {
3539 CFIL_LOG(LOG_ERR, "so %llx drop set",
3540 (uint64_t)VM_KERNEL_ADDRPERM(so));
3541 error = EPIPE;
3542 goto done;
3543 }
3544
3545 entry = &cfil_info->cfi_entries[kcunit - 1];
3546 if (outgoing)
3547 entrybuf = &entry->cfe_snd;
3548 else
3549 entrybuf = &entry->cfe_rcv;
3550
3551 /* Record updated offsets for this content filter */
3552 if (pass_offset > entrybuf->cfe_pass_offset) {
3553 entrybuf->cfe_pass_offset = pass_offset;
3554
3555 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset)
3556 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
3557 updated = 1;
3558 } else {
3559 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
3560 pass_offset, entrybuf->cfe_pass_offset);
3561 }
3562 /* Filter does not want or need to see data that's allowed to pass */
3563 if (peek_offset > entrybuf->cfe_pass_offset &&
3564 peek_offset > entrybuf->cfe_peek_offset) {
3565 entrybuf->cfe_peek_offset = peek_offset;
3566 updated = 1;
3567 }
3568 /* Nothing to do */
3569 if (updated == 0)
3570 goto done;
3571
3572 /* Move data held in control queue to pending queue if needed */
3573 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
3574 if (error != 0) {
3575 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
3576 error);
3577 goto done;
3578 }
3579 error = EJUSTRETURN;
3580
3581done:
3582 /*
3583 * The filter is effectively detached when pass all from both sides
3584 * or when the socket is closed and no more data is waiting
3585 * to be delivered to the filter
3586 */
3587 if (entry != NULL &&
3588 ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
3589 entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
3590 ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
3591 cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
3592 cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
3593 entry->cfe_flags |= CFEF_CFIL_DETACHED;
3594#if LIFECYCLE_DEBUG
3595 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
3596 "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
3597 "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
3598#endif
3599 CFIL_LOG(LOG_INFO, "so %llx detached %u",
3600 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3601 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
3602 cfil_filters_attached(so) == 0) {
3603#if LIFECYCLE_DEBUG
3604 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAKING");
3605#endif
3606 CFIL_LOG(LOG_INFO, "so %llx waking",
3607 (uint64_t)VM_KERNEL_ADDRPERM(so));
3608 wakeup((caddr_t)cfil_info);
3609 }
3610 }
3611 CFIL_INFO_VERIFY(cfil_info);
3612 CFIL_LOG(LOG_INFO, "return %d", error);
3613 return (error);
3614}
3615
3616/*
3617 * Update pass offset for socket when no data is pending
3618 */
3619static int
3620cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
3621{
3622 struct cfi_buf *cfi_buf;
3623 struct cfil_entry *entry;
3624 struct cfe_buf *entrybuf;
3625 uint32_t kcunit;
3626 uint64_t pass_offset = 0;
3627
3628 if (cfil_info == NULL)
3629 return (0);
3630
3631 CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
3632 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
3633
3634 socket_lock_assert_owned(so);
3635
3636 if (outgoing)
3637 cfi_buf = &cfil_info->cfi_snd;
3638 else
3639 cfi_buf = &cfil_info->cfi_rcv;
3640
3641 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
3642 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing,
3643 cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
3644
3645 if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
3646 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3647 entry = &cfil_info->cfi_entries[kcunit - 1];
3648
3649 /* Are we attached to a filter? */
3650 if (entry->cfe_filter == NULL)
3651 continue;
3652
3653 if (outgoing)
3654 entrybuf = &entry->cfe_snd;
3655 else
3656 entrybuf = &entry->cfe_rcv;
3657
3658 if (pass_offset == 0 ||
3659 entrybuf->cfe_pass_offset < pass_offset)
3660 pass_offset = entrybuf->cfe_pass_offset;
3661 }
3662 cfi_buf->cfi_pass_offset = pass_offset;
3663 }
3664
3665 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
3666 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
3667
3668 return (0);
3669}
3670
3671int
3672cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3673 uint64_t pass_offset, uint64_t peek_offset)
3674{
3675 errno_t error = 0;
3676
3677 CFIL_LOG(LOG_INFO, "");
3678
3679 socket_lock_assert_owned(so);
3680
3681 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
3682 if (error != 0) {
3683 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
3684 (uint64_t)VM_KERNEL_ADDRPERM(so),
3685 outgoing ? "out" : "in");
3686 goto release;
3687 }
3688
3689 error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
3690 pass_offset, peek_offset);
3691
3692 cfil_service_inject_queue(so, cfil_info, outgoing);
3693
3694 cfil_set_socket_pass_offset(so, cfil_info, outgoing);
3695release:
3696 CFIL_INFO_VERIFY(cfil_info);
3697 cfil_release_sockbuf(so, outgoing);
3698
3699 return (error);
3700}
3701
3702
3703static void
3704cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
3705{
3706 struct cfil_entry *entry;
3707 int kcunit;
3708 uint64_t drained;
3709
3710 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL)
3711 goto done;
3712
3713 socket_lock_assert_owned(so);
3714
3715 /*
3716 * Flush the output queues and ignore errors as long as
3717 * we are attached
3718 */
3719 (void) cfil_acquire_sockbuf(so, cfil_info, 1);
3720 if (cfil_info != NULL) {
3721 drained = 0;
3722 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3723 entry = &cfil_info->cfi_entries[kcunit - 1];
3724
3725 drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
3726 drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
3727 }
3728 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
3729
3730 if (drained) {
3731 if (cfil_info->cfi_flags & CFIF_DROP)
3732 OSIncrementAtomic(
3733 &cfil_stats.cfs_flush_out_drop);
3734 else
3735 OSIncrementAtomic(
3736 &cfil_stats.cfs_flush_out_close);
3737 }
3738 }
3739 cfil_release_sockbuf(so, 1);
3740
3741 /*
3742 * Flush the input queues
3743 */
3744 (void) cfil_acquire_sockbuf(so, cfil_info, 0);
3745 if (cfil_info != NULL) {
3746 drained = 0;
3747 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3748 entry = &cfil_info->cfi_entries[kcunit - 1];
3749
3750 drained += cfil_queue_drain(
3751 &entry->cfe_rcv.cfe_ctl_q);
3752 drained += cfil_queue_drain(
3753 &entry->cfe_rcv.cfe_pending_q);
3754 }
3755 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
3756
3757 if (drained) {
3758 if (cfil_info->cfi_flags & CFIF_DROP)
3759 OSIncrementAtomic(
3760 &cfil_stats.cfs_flush_in_drop);
3761 else
3762 OSIncrementAtomic(
3763 &cfil_stats.cfs_flush_in_close);
3764 }
3765 }
3766 cfil_release_sockbuf(so, 0);
3767done:
3768 CFIL_INFO_VERIFY(cfil_info);
3769}
3770
3771int
3772cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
3773{
3774 errno_t error = 0;
3775 struct cfil_entry *entry;
3776 struct proc *p;
3777
3778 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL)
3779 goto done;
3780
3781 socket_lock_assert_owned(so);
3782
3783 entry = &cfil_info->cfi_entries[kcunit - 1];
3784
3785 /* Are we attached to the filter? */
3786 if (entry->cfe_filter == NULL)
3787 goto done;
3788
3789 cfil_info->cfi_flags |= CFIF_DROP;
3790
3791 p = current_proc();
3792
3793 /*
3794 * Force the socket to be marked defunct
3795 * (forcing fixed along with rdar://19391339)
3796 */
3797 if (so->so_cfil_db == NULL) {
3798 error = sosetdefunct(p, so,
3799 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
3800 FALSE);
3801
3802 /* Flush the socket buffer and disconnect */
3803 if (error == 0)
3804 error = sodefunct(p, so,
3805 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
3806 }
3807
3808 /* The filter is done, mark as detached */
3809 entry->cfe_flags |= CFEF_CFIL_DETACHED;
3810#if LIFECYCLE_DEBUG
3811 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: DROP - DETACH");
3812#endif
3813 CFIL_LOG(LOG_INFO, "so %llx detached %u",
3814 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3815
3816 /* Pending data needs to go */
3817 cfil_flush_queues(so, cfil_info);
3818
3819 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
3820 if (cfil_filters_attached(so) == 0) {
3821 CFIL_LOG(LOG_INFO, "so %llx waking",
3822 (uint64_t)VM_KERNEL_ADDRPERM(so));
3823 wakeup((caddr_t)cfil_info);
3824 }
3825 }
3826done:
3827 return (error);
3828}
3829
3830int
3831cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
3832{
3833 errno_t error = 0;
3834 struct cfil_info *cfil_info = NULL;
3835
3836 bool cfil_attached = false;
3837 struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
3838
3839 // Search and lock socket
3840 struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
3841 if (so == NULL) {
3842 error = ENOENT;
3843 } else {
3844 // The client gets a pass automatically
3845 cfil_info = (so->so_cfil_db != NULL) ?
3846 cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
3847
3848 if (cfil_attached) {
3849#if VERDICT_DEBUG
3850 if (cfil_info != NULL) {
3851 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
3852 cfil_info->cfi_hash_entry ? "UDP" : "TCP",
3853 (uint64_t)VM_KERNEL_ADDRPERM(so),
3854 cfil_info->cfi_sock_id);
3855 }
3856#endif
3857 (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
3858 (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
3859 } else {
3860 so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
3861 }
3862 socket_unlock(so, 1);
3863 }
3864
3865 return (error);
3866}
3867
3868static int
3869cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
3870{
3871 struct cfil_entry *entry;
3872 struct cfe_buf *entrybuf;
3873 uint32_t kcunit;
3874
3875 CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
3876 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
3877
3878 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3879 entry = &cfil_info->cfi_entries[kcunit - 1];
3880
3881 /* Are we attached to the filter? */
3882 if (entry->cfe_filter == NULL)
3883 continue;
3884
3885 if (outgoing)
3886 entrybuf = &entry->cfe_snd;
3887 else
3888 entrybuf = &entry->cfe_rcv;
3889
3890 entrybuf->cfe_ctl_q.q_start += datalen;
3891 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
3892 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
3893 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset)
3894 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
3895
3896 entrybuf->cfe_ctl_q.q_end += datalen;
3897
3898 entrybuf->cfe_pending_q.q_start += datalen;
3899 entrybuf->cfe_pending_q.q_end += datalen;
3900 }
3901 CFIL_INFO_VERIFY(cfil_info);
3902 return (0);
3903}
3904
3905int
3906cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
3907 struct mbuf *data, struct mbuf *control, uint32_t flags)
3908{
3909#pragma unused(to, control, flags)
3910 errno_t error = 0;
3911 unsigned int datalen;
3912 int mbcnt = 0;
3913 int mbnum = 0;
3914 int kcunit;
3915 struct cfi_buf *cfi_buf;
3916 struct mbuf *chain = NULL;
3917
3918 if (cfil_info == NULL) {
3919 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
3920 (uint64_t)VM_KERNEL_ADDRPERM(so));
3921 error = 0;
3922 goto done;
3923 } else if (cfil_info->cfi_flags & CFIF_DROP) {
3924 CFIL_LOG(LOG_ERR, "so %llx drop set",
3925 (uint64_t)VM_KERNEL_ADDRPERM(so));
3926 error = EPIPE;
3927 goto done;
3928 }
3929
3930 datalen = cfil_data_length(data, &mbcnt, &mbnum);
3931
3932 if (outgoing)
3933 cfi_buf = &cfil_info->cfi_snd;
3934 else
3935 cfi_buf = &cfil_info->cfi_rcv;
3936
3937 cfi_buf->cfi_pending_last += datalen;
3938 cfi_buf->cfi_pending_mbcnt += mbcnt;
3939 cfi_buf->cfi_pending_mbnum += mbnum;
3940
3941 if (IS_UDP(so)) {
3942 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
3943 cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
3944 cfi_buf->cfi_tail_drop_cnt++;
3945 cfi_buf->cfi_pending_mbcnt -= mbcnt;
3946 cfi_buf->cfi_pending_mbnum -= mbnum;
3947 return (EPIPE);
3948 }
3949 }
3950
3951 cfil_info_buf_verify(cfi_buf);
3952
3953#if DATA_DEBUG
3954 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u cfi_pass_offset %llu",
3955 (uint64_t)VM_KERNEL_ADDRPERM(so),
3956 outgoing ? "OUT" : "IN",
3957 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
3958 (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
3959 cfi_buf->cfi_pending_last,
3960 cfi_buf->cfi_pending_mbcnt,
3961 cfi_buf->cfi_pass_offset);
3962#endif
3963
3964 /* Fast path when below pass offset */
3965 if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
3966 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
3967#if DATA_DEBUG
3968 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: FAST PATH");
3969#endif
3970 } else {
3971 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3972 // Is cfil attached to this filter?
3973 if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
3974 if (IS_UDP(so)) {
3975 /* UDP only:
3976 * Chain addr (incoming only TDB), control (optional) and data into one chain.
3977 * This full chain will be reinjected into socket after recieving verdict.
3978 */
3979 (void) cfil_udp_save_socket_state(cfil_info, data);
3980 chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
3981 if (chain == NULL) {
3982 return (ENOBUFS);
3983 }
3984 data = chain;
3985 }
3986 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
3987 datalen);
3988 }
3989 /* 0 means passed so continue with next filter */
3990 if (error != 0)
3991 break;
3992 }
3993 }
3994
3995 /* Move cursor if no filter claimed the data */
3996 if (error == 0) {
3997 cfi_buf->cfi_pending_first += datalen;
3998 cfi_buf->cfi_pending_mbcnt -= mbcnt;
3999 cfi_buf->cfi_pending_mbnum -= mbnum;
4000 cfil_info_buf_verify(cfi_buf);
4001 }
4002done:
4003 CFIL_INFO_VERIFY(cfil_info);
4004
4005 return (error);
4006}
4007
4008/*
4009 * Callback from socket layer sosendxxx()
4010 */
4011int
4012cfil_sock_data_out(struct socket *so, struct sockaddr *to,
4013 struct mbuf *data, struct mbuf *control, uint32_t flags)
4014{
4015 int error = 0;
4016
4017 if (IS_UDP(so)) {
4018 return (cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags));
4019 }
4020
4021 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4022 return (0);
4023
4024 socket_lock_assert_owned(so);
4025
4026 if (so->so_cfil->cfi_flags & CFIF_DROP) {
4027 CFIL_LOG(LOG_ERR, "so %llx drop set",
4028 (uint64_t)VM_KERNEL_ADDRPERM(so));
4029 return (EPIPE);
4030 }
4031 if (control != NULL) {
4032 CFIL_LOG(LOG_ERR, "so %llx control",
4033 (uint64_t)VM_KERNEL_ADDRPERM(so));
4034 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
4035 }
4036 if ((flags & MSG_OOB)) {
4037 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4038 (uint64_t)VM_KERNEL_ADDRPERM(so));
4039 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
4040 }
4041 if ((so->so_snd.sb_flags & SB_LOCK) == 0)
4042 panic("so %p SB_LOCK not set", so);
4043
4044 if (so->so_snd.sb_cfil_thread != NULL)
4045 panic("%s sb_cfil_thread %p not NULL", __func__,
4046 so->so_snd.sb_cfil_thread);
4047
4048 error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
4049
4050 return (error);
4051}
4052
4053/*
4054 * Callback from socket layer sbappendxxx()
4055 */
4056int
4057cfil_sock_data_in(struct socket *so, struct sockaddr *from,
4058 struct mbuf *data, struct mbuf *control, uint32_t flags)
4059{
4060 int error = 0;
4061
4062 if (IS_UDP(so)) {
4063 return (cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags));
4064 }
4065
4066 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4067 return (0);
4068
4069 socket_lock_assert_owned(so);
4070
4071 if (so->so_cfil->cfi_flags & CFIF_DROP) {
4072 CFIL_LOG(LOG_ERR, "so %llx drop set",
4073 (uint64_t)VM_KERNEL_ADDRPERM(so));
4074 return (EPIPE);
4075 }
4076 if (control != NULL) {
4077 CFIL_LOG(LOG_ERR, "so %llx control",
4078 (uint64_t)VM_KERNEL_ADDRPERM(so));
4079 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
4080 }
4081 if (data->m_type == MT_OOBDATA) {
4082 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4083 (uint64_t)VM_KERNEL_ADDRPERM(so));
4084 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
4085 }
4086 error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
4087
4088 return (error);
4089}
4090
4091/*
4092 * Callback from socket layer soshutdownxxx()
4093 *
4094 * We may delay the shutdown write if there's outgoing data in process.
4095 *
4096 * There is no point in delaying the shutdown read because the process
4097 * indicated that it does not want to read anymore data.
4098 */
4099int
4100cfil_sock_shutdown(struct socket *so, int *how)
4101{
4102 int error = 0;
4103
4104 if (IS_UDP(so)) {
4105 return (cfil_sock_udp_shutdown(so, how));
4106 }
4107
4108 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4109 goto done;
4110
4111 socket_lock_assert_owned(so);
4112
4113 CFIL_LOG(LOG_INFO, "so %llx how %d",
4114 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
4115
4116 /*
4117 * Check the state of the socket before the content filter
4118 */
4119 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
4120 /* read already shut down */
4121 error = ENOTCONN;
4122 goto done;
4123 }
4124 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
4125 /* write already shut down */
4126 error = ENOTCONN;
4127 goto done;
4128 }
4129
4130 if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
4131 CFIL_LOG(LOG_ERR, "so %llx drop set",
4132 (uint64_t)VM_KERNEL_ADDRPERM(so));
4133 goto done;
4134 }
4135
4136 /*
4137 * shutdown read: SHUT_RD or SHUT_RDWR
4138 */
4139 if (*how != SHUT_WR) {
4140 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
4141 error = ENOTCONN;
4142 goto done;
4143 }
4144 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
4145 cfil_sock_notify_shutdown(so, SHUT_RD);
4146 }
4147 /*
4148 * shutdown write: SHUT_WR or SHUT_RDWR
4149 */
4150 if (*how != SHUT_RD) {
4151 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
4152 error = ENOTCONN;
4153 goto done;
4154 }
4155 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
4156 cfil_sock_notify_shutdown(so, SHUT_WR);
4157 /*
4158 * When outgoing data is pending, we delay the shutdown at the
4159 * protocol level until the content filters give the final
4160 * verdict on the pending data.
4161 */
4162 if (cfil_sock_data_pending(&so->so_snd) != 0) {
4163 /*
4164 * When shutting down the read and write sides at once
4165 * we can proceed to the final shutdown of the read
4166 * side. Otherwise, we just return.
4167 */
4168 if (*how == SHUT_WR) {
4169 error = EJUSTRETURN;
4170 } else if (*how == SHUT_RDWR) {
4171 *how = SHUT_RD;
4172 }
4173 }
4174 }
4175done:
4176 return (error);
4177}
4178
4179/*
4180 * This is called when the socket is closed and there is no more
4181 * opportunity for filtering
4182 */
4183void
4184cfil_sock_is_closed(struct socket *so)
4185{
4186 errno_t error = 0;
4187 int kcunit;
4188
4189 if (IS_UDP(so)) {
4190 cfil_sock_udp_is_closed(so);
4191 return;
4192 }
4193
4194 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4195 return;
4196
4197 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
4198
4199 socket_lock_assert_owned(so);
4200
4201 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4202 /* Let the filters know of the closing */
4203 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
4204 }
4205
4206 /* Last chance to push passed data out */
4207 error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
4208 if (error == 0)
4209 cfil_service_inject_queue(so, so->so_cfil, 1);
4210 cfil_release_sockbuf(so, 1);
4211
4212 so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
4213
4214 /* Pending data needs to go */
4215 cfil_flush_queues(so, so->so_cfil);
4216
4217 CFIL_INFO_VERIFY(so->so_cfil);
4218}
4219
4220/*
4221 * This is called when the socket is disconnected so let the filters
4222 * know about the disconnection and that no more data will come
4223 *
4224 * The how parameter has the same values as soshutown()
4225 */
4226void
4227cfil_sock_notify_shutdown(struct socket *so, int how)
4228{
4229 errno_t error = 0;
4230 int kcunit;
4231
4232 if (IS_UDP(so)) {
4233 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
4234 return;
4235 }
4236
4237 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4238 return;
4239
4240 CFIL_LOG(LOG_INFO, "so %llx how %d",
4241 (uint64_t)VM_KERNEL_ADDRPERM(so), how);
4242
4243 socket_lock_assert_owned(so);
4244
4245 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4246 /* Disconnect incoming side */
4247 if (how != SHUT_WR)
4248 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
4249 /* Disconnect outgoing side */
4250 if (how != SHUT_RD)
4251 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
4252 }
4253}
4254
4255static int
4256cfil_filters_attached(struct socket *so)
4257{
4258 struct cfil_entry *entry;
4259 uint32_t kcunit;
4260 int attached = 0;
4261
4262 if (IS_UDP(so)) {
4263 return cfil_filters_udp_attached(so, FALSE);
4264 }
4265
4266 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4267 return (0);
4268
4269 socket_lock_assert_owned(so);
4270
4271 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4272 entry = &so->so_cfil->cfi_entries[kcunit - 1];
4273
4274 /* Are we attached to the filter? */
4275 if (entry->cfe_filter == NULL)
4276 continue;
4277 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0)
4278 continue;
4279 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0)
4280 continue;
4281 attached = 1;
4282 break;
4283 }
4284
4285 return (attached);
4286}
4287
4288/*
4289 * This is called when the socket is closed and we are waiting for
4290 * the filters to gives the final pass or drop
4291 */
4292void
4293cfil_sock_close_wait(struct socket *so)
4294{
4295 lck_mtx_t *mutex_held;
4296 struct timespec ts;
4297 int error;
4298
4299 if (IS_UDP(so)) {
4300 cfil_sock_udp_close_wait(so);
4301 return;
4302 }
4303
4304 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4305 return;
4306
4307 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
4308
4309 if (so->so_proto->pr_getlock != NULL)
4310 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
4311 else
4312 mutex_held = so->so_proto->pr_domain->dom_mtx;
4313 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
4314
4315 while (cfil_filters_attached(so)) {
4316 /*
4317 * Notify the filters we are going away so they can detach
4318 */
4319 cfil_sock_notify_shutdown(so, SHUT_RDWR);
4320
4321 /*
4322 * Make sure we need to wait after the filter are notified
4323 * of the disconnection
4324 */
4325 if (cfil_filters_attached(so) == 0)
4326 break;
4327
4328 CFIL_LOG(LOG_INFO, "so %llx waiting",
4329 (uint64_t)VM_KERNEL_ADDRPERM(so));
4330
4331 ts.tv_sec = cfil_close_wait_timeout / 1000;
4332 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
4333 NSEC_PER_USEC * 1000;
4334
4335 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
4336 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
4337 error = msleep((caddr_t)so->so_cfil, mutex_held,
4338 PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
4339 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
4340
4341 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
4342 (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
4343
4344 /*
4345 * Force close in case of timeout
4346 */
4347 if (error != 0) {
4348 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
4349 break;
4350 }
4351 }
4352
4353}
4354
4355/*
4356 * Returns the size of the data held by the content filter by using
4357 */
4358int32_t
4359cfil_sock_data_pending(struct sockbuf *sb)
4360{
4361 struct socket *so = sb->sb_so;
4362 uint64_t pending = 0;
4363
4364 if (IS_UDP(so)) {
4365 return (cfil_sock_udp_data_pending(sb, FALSE));
4366 }
4367
4368 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
4369 struct cfi_buf *cfi_buf;
4370
4371 socket_lock_assert_owned(so);
4372
4373 if ((sb->sb_flags & SB_RECV) == 0)
4374 cfi_buf = &so->so_cfil->cfi_snd;
4375 else
4376 cfi_buf = &so->so_cfil->cfi_rcv;
4377
4378 pending = cfi_buf->cfi_pending_last -
4379 cfi_buf->cfi_pending_first;
4380
4381 /*
4382 * If we are limited by the "chars of mbufs used" roughly
4383 * adjust so we won't overcommit
4384 */
4385 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt)
4386 pending = cfi_buf->cfi_pending_mbcnt;
4387 }
4388
4389 VERIFY(pending < INT32_MAX);
4390
4391 return (int32_t)(pending);
4392}
4393
4394/*
4395 * Return the socket buffer space used by data being held by content filters
4396 * so processes won't clog the socket buffer
4397 */
4398int32_t
4399cfil_sock_data_space(struct sockbuf *sb)
4400{
4401 struct socket *so = sb->sb_so;
4402 uint64_t pending = 0;
4403
4404 if (IS_UDP(so)) {
4405 return (cfil_sock_udp_data_pending(sb, TRUE));
4406 }
4407
4408 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
4409 so->so_snd.sb_cfil_thread != current_thread()) {
4410 struct cfi_buf *cfi_buf;
4411
4412 socket_lock_assert_owned(so);
4413
4414 if ((sb->sb_flags & SB_RECV) == 0)
4415 cfi_buf = &so->so_cfil->cfi_snd;
4416 else
4417 cfi_buf = &so->so_cfil->cfi_rcv;
4418
4419 pending = cfi_buf->cfi_pending_last -
4420 cfi_buf->cfi_pending_first;
4421
4422 /*
4423 * If we are limited by the "chars of mbufs used" roughly
4424 * adjust so we won't overcommit
4425 */
4426 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending)
4427 pending = cfi_buf->cfi_pending_mbcnt;
4428 }
4429
4430 VERIFY(pending < INT32_MAX);
4431
4432 return (int32_t)(pending);
4433}
4434
4435/*
4436 * A callback from the socket and protocol layer when data becomes
4437 * available in the socket buffer to give a chance for the content filter
4438 * to re-inject data that was held back
4439 */
4440void
4441cfil_sock_buf_update(struct sockbuf *sb)
4442{
4443 int outgoing;
4444 int error;
4445 struct socket *so = sb->sb_so;
4446
4447 if (IS_UDP(so)) {
4448 cfil_sock_udp_buf_update(sb);
4449 return;
4450 }
4451
4452 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4453 return;
4454
4455 if (!cfil_sbtrim)
4456 return;
4457
4458 socket_lock_assert_owned(so);
4459
4460 if ((sb->sb_flags & SB_RECV) == 0) {
4461 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0)
4462 return;
4463 outgoing = 1;
4464 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
4465 } else {
4466 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0)
4467 return;
4468 outgoing = 0;
4469 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
4470 }
4471
4472 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
4473 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4474
4475 error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
4476 if (error == 0)
4477 cfil_service_inject_queue(so, so->so_cfil, outgoing);
4478 cfil_release_sockbuf(so, outgoing);
4479}
4480
4481int
4482sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
4483 struct sysctl_req *req)
4484{
4485#pragma unused(oidp, arg1, arg2)
4486 int error = 0;
4487 size_t len = 0;
4488 u_int32_t i;
4489
4490 /* Read only */
4491 if (req->newptr != USER_ADDR_NULL)
4492 return (EPERM);
4493
4494 cfil_rw_lock_shared(&cfil_lck_rw);
4495
4496 for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
4497 struct cfil_filter_stat filter_stat;
4498 struct content_filter *cfc = content_filters[i];
4499
4500 if (cfc == NULL)
4501 continue;
4502
4503 /* If just asking for the size */
4504 if (req->oldptr == USER_ADDR_NULL) {
4505 len += sizeof(struct cfil_filter_stat);
4506 continue;
4507 }
4508
4509 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
4510 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
4511 filter_stat.cfs_filter_id = cfc->cf_kcunit;
4512 filter_stat.cfs_flags = cfc->cf_flags;
4513 filter_stat.cfs_sock_count = cfc->cf_sock_count;
4514 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
4515
4516 error = SYSCTL_OUT(req, &filter_stat,
4517 sizeof (struct cfil_filter_stat));
4518 if (error != 0)
4519 break;
4520 }
4521 /* If just asking for the size */
4522 if (req->oldptr == USER_ADDR_NULL)
4523 req->oldidx = len;
4524
4525 cfil_rw_unlock_shared(&cfil_lck_rw);
4526
4527#if SHOW_DEBUG
4528 if (req->oldptr != USER_ADDR_NULL) {
4529 for (i = 1; content_filters != NULL && i <= MAX_CONTENT_FILTER; i++) {
4530 cfil_filter_show(i);
4531 }
4532 }
4533#endif
4534
4535 return (error);
4536}
4537
4538static int sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
4539 struct sysctl_req *req)
4540{
4541#pragma unused(oidp, arg1, arg2)
4542 int error = 0;
4543 u_int32_t i;
4544 struct cfil_info *cfi;
4545
4546 /* Read only */
4547 if (req->newptr != USER_ADDR_NULL)
4548 return (EPERM);
4549
4550 cfil_rw_lock_shared(&cfil_lck_rw);
4551
4552 /*
4553 * If just asking for the size,
4554 */
4555 if (req->oldptr == USER_ADDR_NULL) {
4556 req->oldidx = cfil_sock_attached_count *
4557 sizeof(struct cfil_sock_stat);
4558 /* Bump the length in case new sockets gets attached */
4559 req->oldidx += req->oldidx >> 3;
4560 goto done;
4561 }
4562
4563 TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
4564 struct cfil_entry *entry;
4565 struct cfil_sock_stat stat;
4566 struct socket *so = cfi->cfi_so;
4567
4568 bzero(&stat, sizeof(struct cfil_sock_stat));
4569 stat.cfs_len = sizeof(struct cfil_sock_stat);
4570 stat.cfs_sock_id = cfi->cfi_sock_id;
4571 stat.cfs_flags = cfi->cfi_flags;
4572
4573 if (so != NULL) {
4574 stat.cfs_pid = so->last_pid;
4575 memcpy(stat.cfs_uuid, so->last_uuid,
4576 sizeof(uuid_t));
4577 if (so->so_flags & SOF_DELEGATED) {
4578 stat.cfs_e_pid = so->e_pid;
4579 memcpy(stat.cfs_e_uuid, so->e_uuid,
4580 sizeof(uuid_t));
4581 } else {
4582 stat.cfs_e_pid = so->last_pid;
4583 memcpy(stat.cfs_e_uuid, so->last_uuid,
4584 sizeof(uuid_t));
4585 }
4586
4587 stat.cfs_sock_family = so->so_proto->pr_domain->dom_family;
4588 stat.cfs_sock_type = so->so_proto->pr_type;
4589 stat.cfs_sock_protocol = so->so_proto->pr_protocol;
4590 }
4591
4592 stat.cfs_snd.cbs_pending_first =
4593 cfi->cfi_snd.cfi_pending_first;
4594 stat.cfs_snd.cbs_pending_last =
4595 cfi->cfi_snd.cfi_pending_last;
4596 stat.cfs_snd.cbs_inject_q_len =
4597 cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
4598 stat.cfs_snd.cbs_pass_offset =
4599 cfi->cfi_snd.cfi_pass_offset;
4600
4601 stat.cfs_rcv.cbs_pending_first =
4602 cfi->cfi_rcv.cfi_pending_first;
4603 stat.cfs_rcv.cbs_pending_last =
4604 cfi->cfi_rcv.cfi_pending_last;
4605 stat.cfs_rcv.cbs_inject_q_len =
4606 cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
4607 stat.cfs_rcv.cbs_pass_offset =
4608 cfi->cfi_rcv.cfi_pass_offset;
4609
4610 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
4611 struct cfil_entry_stat *estat;
4612 struct cfe_buf *ebuf;
4613 struct cfe_buf_stat *sbuf;
4614
4615 entry = &cfi->cfi_entries[i];
4616
4617 estat = &stat.ces_entries[i];
4618
4619 estat->ces_len = sizeof(struct cfil_entry_stat);
4620 estat->ces_filter_id = entry->cfe_filter ?
4621 entry->cfe_filter->cf_kcunit : 0;
4622 estat->ces_flags = entry->cfe_flags;
4623 estat->ces_necp_control_unit =
4624 entry->cfe_necp_control_unit;
4625
4626 estat->ces_last_event.tv_sec =
4627 (int64_t)entry->cfe_last_event.tv_sec;
4628 estat->ces_last_event.tv_usec =
4629 (int64_t)entry->cfe_last_event.tv_usec;
4630
4631 estat->ces_last_action.tv_sec =
4632 (int64_t)entry->cfe_last_action.tv_sec;
4633 estat->ces_last_action.tv_usec =
4634 (int64_t)entry->cfe_last_action.tv_usec;
4635
4636 ebuf = &entry->cfe_snd;
4637 sbuf = &estat->ces_snd;
4638 sbuf->cbs_pending_first =
4639 cfil_queue_offset_first(&ebuf->cfe_pending_q);
4640 sbuf->cbs_pending_last =
4641 cfil_queue_offset_last(&ebuf->cfe_pending_q);
4642 sbuf->cbs_ctl_first =
4643 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
4644 sbuf->cbs_ctl_last =
4645 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
4646 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
4647 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
4648 sbuf->cbs_peeked = ebuf->cfe_peeked;
4649
4650 ebuf = &entry->cfe_rcv;
4651 sbuf = &estat->ces_rcv;
4652 sbuf->cbs_pending_first =
4653 cfil_queue_offset_first(&ebuf->cfe_pending_q);
4654 sbuf->cbs_pending_last =
4655 cfil_queue_offset_last(&ebuf->cfe_pending_q);
4656 sbuf->cbs_ctl_first =
4657 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
4658 sbuf->cbs_ctl_last =
4659 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
4660 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
4661 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
4662 sbuf->cbs_peeked = ebuf->cfe_peeked;
4663 }
4664 error = SYSCTL_OUT(req, &stat,
4665 sizeof (struct cfil_sock_stat));
4666 if (error != 0)
4667 break;
4668 }
4669done:
4670 cfil_rw_unlock_shared(&cfil_lck_rw);
4671
4672#if SHOW_DEBUG
4673 if (req->oldptr != USER_ADDR_NULL) {
4674 cfil_info_show();
4675 }
4676#endif
4677
4678 return (error);
4679}
4680
4681/*
4682 * UDP Socket Support
4683 */
4684static void
4685cfil_hash_entry_log(int level, struct socket *so, struct cfil_hash_entry *entry, uint64_t sockId, const char* msg)
4686{
4687 char local[MAX_IPv6_STR_LEN+6];
4688 char remote[MAX_IPv6_STR_LEN+6];
4689 const void *addr;
4690
4691 // No sock or not UDP, no-op
4692 if (so == NULL || entry == NULL) {
4693 return;
4694 }
4695
4696 local[0] = remote[0] = 0x0;
4697
4698 switch (entry->cfentry_family) {
4699 case AF_INET6:
4700 addr = &entry->cfentry_laddr.addr6;
4701 inet_ntop(AF_INET6, addr, local, sizeof(local));
4702 addr = &entry->cfentry_faddr.addr6;
4703 inet_ntop(AF_INET6, addr, remote, sizeof(local));
4704 break;
4705 case AF_INET:
4706 addr = &entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
4707 inet_ntop(AF_INET, addr, local, sizeof(local));
4708 addr = &entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
4709 inet_ntop(AF_INET, addr, remote, sizeof(local));
4710 break;
4711 default:
4712 return;
4713 }
4714
4715 CFIL_LOG(level, "<%s>: <UDP so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s",
4716 msg,
4717 (uint64_t)VM_KERNEL_ADDRPERM(so), entry, sockId,
4718 ntohs(entry->cfentry_lport), ntohs(entry->cfentry_fport), local, remote);
4719}
4720
4721static void
4722cfil_inp_log(int level, struct socket *so, const char* msg)
4723{
4724 struct inpcb *inp = NULL;
4725 char local[MAX_IPv6_STR_LEN+6];
4726 char remote[MAX_IPv6_STR_LEN+6];
4727 const void *addr;
4728
4729 if (so == NULL) {
4730 return;
4731 }
4732
4733 inp = sotoinpcb(so);
4734 if (inp == NULL) {
4735 return;
4736 }
4737
4738 local[0] = remote[0] = 0x0;
4739
4740#if INET6
4741 if (inp->inp_vflag & INP_IPV6) {
4742 addr = &inp->in6p_laddr.s6_addr32;
4743 inet_ntop(AF_INET6, addr, local, sizeof(local));
4744 addr = &inp->in6p_faddr.s6_addr32;
4745 inet_ntop(AF_INET6, addr, remote, sizeof(local));
4746 } else
4747#endif /* INET6 */
4748 {
4749 addr = &inp->inp_laddr.s_addr;
4750 inet_ntop(AF_INET, addr, local, sizeof(local));
4751 addr = &inp->inp_faddr.s_addr;
4752 inet_ntop(AF_INET, addr, remote, sizeof(local));
4753 }
4754
4755 if (so->so_cfil != NULL)
4756 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
4757 msg, IS_UDP(so) ? "UDP" : "TCP",
4758 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id,
4759 ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
4760 else
4761 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
4762 msg, IS_UDP(so) ? "UDP" : "TCP",
4763 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
4764 ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
4765}
4766
4767static void
4768cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
4769{
4770 if (cfil_info == NULL)
4771 return;
4772
4773 if (cfil_info->cfi_hash_entry != NULL)
4774 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
4775 else
4776 cfil_inp_log(level, cfil_info->cfi_so, msg);
4777}
4778
4779errno_t
4780cfil_db_init(struct socket *so)
4781{
4782 errno_t error = 0;
4783 struct cfil_db *db = NULL;
4784
4785 CFIL_LOG(LOG_INFO, "");
4786
4787 db = zalloc(cfil_db_zone);
4788 if (db == NULL) {
4789 error = ENOMEM;
4790 goto done;
4791 }
4792 bzero(db, sizeof(struct cfil_db));
4793 db->cfdb_so = so;
4794 db->cfdb_hashbase = hashinit(CFILHASHSIZE, M_CFIL, &db->cfdb_hashmask);
4795 if (db->cfdb_hashbase == NULL) {
4796 zfree(cfil_db_zone, db);
4797 db = NULL;
4798 error = ENOMEM;
4799 goto done;
4800 }
4801
4802 so->so_cfil_db = db;
4803
4804done:
4805 return (error);
4806}
4807
4808void
4809cfil_db_free(struct socket *so)
4810{
4811 struct cfil_hash_entry *entry = NULL;
4812 struct cfil_hash_entry *temp_entry = NULL;
4813 struct cfilhashhead *cfilhash = NULL;
4814 struct cfil_db *db = NULL;
4815
4816 CFIL_LOG(LOG_INFO, "");
4817
4818 if (so == NULL || so->so_cfil_db == NULL) {
4819 return;
4820 }
4821 db = so->so_cfil_db;
4822
4823#if LIFECYCLE_DEBUG
4824 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
4825 (uint64_t)VM_KERNEL_ADDRPERM(so), db, db->cfdb_count);
4826#endif
4827
4828 for (int i = 0; i < CFILHASHSIZE; i++) {
4829 cfilhash = &db->cfdb_hashbase[i];
4830 LIST_FOREACH_SAFE(entry, cfilhash, cfentry_link, temp_entry) {
4831 if (entry->cfentry_cfil != NULL) {
4832#if LIFECYCLE_DEBUG
4833 cfil_info_log(LOG_ERR, entry->cfentry_cfil, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
4834#endif
4835 cfil_info_free(entry->cfentry_cfil);
4836 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
4837 entry->cfentry_cfil = NULL;
4838 }
4839
4840 cfil_db_delete_entry(db, entry);
4841 if (so->so_flags & SOF_CONTENT_FILTER) {
4842 if (db->cfdb_count == 0)
4843 so->so_flags &= ~SOF_CONTENT_FILTER;
4844 VERIFY(so->so_usecount > 0);
4845 so->so_usecount--;
4846 }
4847 }
4848 }
4849
4850 // Make sure all entries are cleaned up!
4851 VERIFY(db->cfdb_count == 0);
4852#if LIFECYCLE_DEBUG
4853 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: so usecount %d", so->so_usecount);
4854#endif
4855
4856 FREE(db->cfdb_hashbase, M_CFIL);
4857 zfree(cfil_db_zone, db);
4858 so->so_cfil_db = NULL;
4859}
4860
4861static bool
4862fill_cfil_hash_entry_from_address(struct cfil_hash_entry *entry, bool isLocal, struct sockaddr *addr)
4863{
4864 struct sockaddr_in *sin = NULL;
4865 struct sockaddr_in6 *sin6 = NULL;
4866
4867 if (entry == NULL || addr == NULL) {
4868 return FALSE;
4869 }
4870
4871 switch (addr->sa_family) {
4872 case AF_INET:
4873 sin = satosin(addr);
4874 if (sin->sin_len != sizeof(*sin)) {
4875 return FALSE;
4876 }
4877 if (isLocal == TRUE) {
4878 entry->cfentry_lport = sin->sin_port;
4879 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
4880 } else {
4881 entry->cfentry_fport = sin->sin_port;
4882 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
4883 }
4884 entry->cfentry_family = AF_INET;
4885 return TRUE;
4886 case AF_INET6:
4887 sin6 = satosin6(addr);
4888 if (sin6->sin6_len != sizeof(*sin6)) {
4889 return FALSE;
4890 }
4891 if (isLocal == TRUE) {
4892 entry->cfentry_lport = sin6->sin6_port;
4893 entry->cfentry_laddr.addr6 = sin6->sin6_addr;
4894 } else {
4895 entry->cfentry_fport = sin6->sin6_port;
4896 entry->cfentry_faddr.addr6 = sin6->sin6_addr;
4897 }
4898 entry->cfentry_family = AF_INET6;
4899 return TRUE;
4900 default:
4901 return FALSE;
4902 }
4903}
4904
4905static bool
4906fill_cfil_hash_entry_from_inp(struct cfil_hash_entry *entry, bool isLocal, struct inpcb *inp)
4907{
4908 if (entry == NULL || inp == NULL) {
4909 return FALSE;
4910 }
4911
4912 if (inp->inp_vflag & INP_IPV4) {
4913 if (isLocal == TRUE) {
4914 entry->cfentry_lport = inp->inp_lport;
4915 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = inp->inp_laddr.s_addr;
4916 } else {
4917 entry->cfentry_fport = inp->inp_fport;
4918 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = inp->inp_faddr.s_addr;
4919 }
4920 entry->cfentry_family = AF_INET;
4921 return TRUE;
4922 } else if (inp->inp_vflag & INP_IPV6) {
4923 if (isLocal == TRUE) {
4924 entry->cfentry_lport = inp->inp_lport;
4925 entry->cfentry_laddr.addr6 = inp->in6p_laddr;
4926 } else {
4927 entry->cfentry_fport = inp->inp_fport;
4928 entry->cfentry_faddr.addr6 = inp->in6p_faddr;
4929 }
4930 entry->cfentry_family = AF_INET6;
4931 return TRUE;
4932 }
4933 return FALSE;
4934}
4935
4936bool
4937check_port(struct sockaddr *addr, u_short port)
4938{
4939 struct sockaddr_in *sin = NULL;
4940 struct sockaddr_in6 *sin6 = NULL;
4941
4942 if (addr == NULL || port == 0) {
4943 return FALSE;
4944 }
4945
4946 switch (addr->sa_family) {
4947 case AF_INET:
4948 sin = satosin(addr);
4949 if (sin->sin_len != sizeof(*sin)) {
4950 return FALSE;
4951 }
4952 if (port == ntohs(sin->sin_port)) {
4953 return TRUE;
4954 }
4955 break;
4956 case AF_INET6:
4957 sin6 = satosin6(addr);
4958 if (sin6->sin6_len != sizeof(*sin6)) {
4959 return FALSE;
4960 }
4961 if (port == ntohs(sin6->sin6_port)) {
4962 return TRUE;
4963 }
4964 break;
4965 default:
4966 break;
4967 }
4968 return FALSE;
4969}
4970
4971struct cfil_hash_entry *
4972cfil_db_lookup_entry_with_sockid(struct cfil_db *db, u_int64_t sock_id)
4973{
4974 struct cfilhashhead *cfilhash = NULL;
4975 u_int32_t flowhash = (u_int32_t)(sock_id & 0x0ffffffff);
4976 struct cfil_hash_entry *nextentry;
4977
4978 if (db == NULL || db->cfdb_hashbase == NULL || sock_id == 0) {
4979 return NULL;
4980 }
4981
4982 flowhash &= db->cfdb_hashmask;
4983 cfilhash = &db->cfdb_hashbase[flowhash];
4984
4985 LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
4986 if (nextentry->cfentry_cfil != NULL &&
4987 nextentry->cfentry_cfil->cfi_sock_id == sock_id) {
4988 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
4989 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), nextentry->cfentry_cfil->cfi_sock_id, flowhash);
4990 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, nextentry, 0, "CFIL: UDP found entry");
4991 return nextentry;
4992 }
4993 }
4994
4995 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
4996 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), sock_id, flowhash);
4997 return NULL;
4998}
4999
5000struct cfil_hash_entry *
5001cfil_db_lookup_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
5002{
5003 struct cfil_hash_entry matchentry;
5004 struct cfil_hash_entry *nextentry = NULL;
5005 struct inpcb *inp = sotoinpcb(db->cfdb_so);
5006 u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5007 int inp_hash_element = 0;
5008 struct cfilhashhead *cfilhash = NULL;
5009
5010 CFIL_LOG(LOG_INFO, "");
5011
5012 if (inp == NULL) {
5013 goto done;
5014 }
5015
5016 if (local != NULL) {
5017 fill_cfil_hash_entry_from_address(&matchentry, TRUE, local);
5018 } else {
5019 fill_cfil_hash_entry_from_inp(&matchentry, TRUE, inp);
5020 }
5021 if (remote != NULL) {
5022 fill_cfil_hash_entry_from_address(&matchentry, FALSE, remote);
5023 } else {
5024 fill_cfil_hash_entry_from_inp(&matchentry, FALSE, inp);
5025 }
5026
5027#if INET6
5028 if (inp->inp_vflag & INP_IPV6) {
5029 hashkey_faddr = matchentry.cfentry_faddr.addr6.s6_addr32[3];
5030 hashkey_laddr = matchentry.cfentry_laddr.addr6.s6_addr32[3];
5031 } else
5032#endif /* INET6 */
5033 {
5034 hashkey_faddr = matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr;
5035 hashkey_laddr = matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr;
5036 }
5037
5038 inp_hash_element = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5039 matchentry.cfentry_lport, matchentry.cfentry_fport);
5040 inp_hash_element &= db->cfdb_hashmask;
5041
5042 cfilhash = &db->cfdb_hashbase[inp_hash_element];
5043
5044 LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5045
5046#if INET6
5047 if ((inp->inp_vflag & INP_IPV6) &&
5048 nextentry->cfentry_lport == matchentry.cfentry_lport &&
5049 nextentry->cfentry_fport == matchentry.cfentry_fport &&
5050 IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_laddr.addr6, &matchentry.cfentry_laddr.addr6) &&
5051 IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_faddr.addr6, &matchentry.cfentry_faddr.addr6)) {
5052#if DATA_DEBUG
5053 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
5054#endif
5055 return nextentry;
5056 } else
5057#endif /* INET6 */
5058 if (nextentry->cfentry_lport == matchentry.cfentry_lport &&
5059 nextentry->cfentry_fport == matchentry.cfentry_fport &&
5060 nextentry->cfentry_laddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr &&
5061 nextentry->cfentry_faddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr) {
5062#if DATA_DEBUG
5063 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
5064#endif
5065 return nextentry;
5066 }
5067 }
5068
5069done:
5070#if DATA_DEBUG
5071 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
5072#endif
5073 return NULL;
5074}
5075
5076void
5077cfil_db_delete_entry(struct cfil_db *db, struct cfil_hash_entry *hash_entry)
5078{
5079 if (hash_entry == NULL)
5080 return;
5081
5082 LIST_REMOVE(hash_entry, cfentry_link);
5083 zfree(cfil_hash_entry_zone, hash_entry);
5084 db->cfdb_count--;
5085 if (db->cfdb_only_entry == hash_entry)
5086 db->cfdb_only_entry = NULL;
5087}
5088
5089struct cfil_hash_entry *
5090cfil_db_add_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
5091{
5092 struct cfil_hash_entry *entry = NULL;
5093 struct inpcb *inp = sotoinpcb(db->cfdb_so);
5094 u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5095 int inp_hash_element = 0;
5096 struct cfilhashhead *cfilhash = NULL;
5097
5098 CFIL_LOG(LOG_INFO, "");
5099
5100 if (inp == NULL) {
5101 goto done;
5102 }
5103
5104 entry = zalloc(cfil_hash_entry_zone);
5105 if (entry == NULL) {
5106 goto done;
5107 }
5108 bzero(entry, sizeof(struct cfil_hash_entry));
5109
5110 if (local != NULL) {
5111 fill_cfil_hash_entry_from_address(entry, TRUE, local);
5112 } else {
5113 fill_cfil_hash_entry_from_inp(entry, TRUE, inp);
5114 }
5115 if (remote != NULL) {
5116 fill_cfil_hash_entry_from_address(entry, FALSE, remote);
5117 } else {
5118 fill_cfil_hash_entry_from_inp(entry, FALSE, inp);
5119 }
5120 entry->cfentry_lastused = net_uptime();
5121
5122#if INET6
5123 if (inp->inp_vflag & INP_IPV6) {
5124 hashkey_faddr = entry->cfentry_faddr.addr6.s6_addr32[3];
5125 hashkey_laddr = entry->cfentry_laddr.addr6.s6_addr32[3];
5126 } else
5127#endif /* INET6 */
5128 {
5129 hashkey_faddr = entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5130 hashkey_laddr = entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5131 }
5132 entry->cfentry_flowhash = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5133 entry->cfentry_lport, entry->cfentry_fport);
5134 inp_hash_element = entry->cfentry_flowhash & db->cfdb_hashmask;
5135
5136 cfilhash = &db->cfdb_hashbase[inp_hash_element];
5137
5138 LIST_INSERT_HEAD(cfilhash, entry, cfentry_link);
5139 db->cfdb_count++;
5140 db->cfdb_only_entry = entry;
5141 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: ADDED");
5142
5143done:
5144 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), db->cfdb_count);
5145 return entry;
5146}
5147
5148struct cfil_info *
5149cfil_db_get_cfil_info(struct cfil_db *db, cfil_sock_id_t id)
5150{
5151 struct cfil_hash_entry *hash_entry = NULL;
5152
5153 CFIL_LOG(LOG_INFO, "");
5154
5155 if (db == NULL || id == 0) {
5156 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NULL DB <id %llu>",
5157 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), id);
5158 return NULL;
5159 }
5160
5161 // This is an optimization for connected UDP socket which only has one flow.
5162 // No need to do the hash lookup.
5163 if (db->cfdb_count == 1) {
5164 if (db->cfdb_only_entry && db->cfdb_only_entry->cfentry_cfil &&
5165 db->cfdb_only_entry->cfentry_cfil->cfi_sock_id == id) {
5166 return (db->cfdb_only_entry->cfentry_cfil);
5167 }
5168 }
5169
5170 hash_entry = cfil_db_lookup_entry_with_sockid(db, id);
5171 return (hash_entry != NULL ? hash_entry->cfentry_cfil : NULL);
5172}
5173
5174struct cfil_hash_entry *
5175cfil_sock_udp_get_flow(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct sockaddr *local, struct sockaddr *remote)
5176{
5177#pragma unused(so, filter_control_unit, outgoing, local, remote)
5178 struct cfil_hash_entry *hash_entry = NULL;
5179
5180 errno_t error = 0;
5181 socket_lock_assert_owned(so);
5182
5183 // If new socket, allocate cfil db
5184 if (so->so_cfil_db == NULL) {
5185 if (cfil_db_init(so) != 0) {
5186 return (NULL);
5187 }
5188 }
5189
5190 // See if flow already exists.
5191 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote);
5192 if (hash_entry != NULL) {
5193 return (hash_entry);
5194 }
5195
5196 hash_entry = cfil_db_add_entry(so->so_cfil_db, local, remote);
5197 if (hash_entry == NULL) {
5198 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5199 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to add entry");
5200 return (NULL);
5201 }
5202
5203 if (cfil_info_alloc(so, hash_entry) == NULL ||
5204 hash_entry->cfentry_cfil == NULL) {
5205 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
5206 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to alloc cfil_info");
5207 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5208 return (NULL);
5209 }
5210
5211#if LIFECYCLE_DEBUG
5212 cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
5213#endif
5214
5215 if (cfil_info_attach_unit(so, filter_control_unit, hash_entry->cfentry_cfil) == 0) {
5216 CFIL_LOG(LOG_ERR, "CFIL: UDP cfil_info_attach_unit(%u) failed",
5217 filter_control_unit);
5218 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
5219 return (NULL);
5220 }
5221 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
5222 (uint64_t)VM_KERNEL_ADDRPERM(so),
5223 filter_control_unit, hash_entry->cfentry_cfil->cfi_sock_id);
5224
5225 so->so_flags |= SOF_CONTENT_FILTER;
5226 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
5227
5228 /* Hold a reference on the socket for each flow */
5229 so->so_usecount++;
5230
5231 error = cfil_dispatch_attach_event(so, hash_entry->cfentry_cfil, filter_control_unit);
5232 /* We can recover from flow control or out of memory errors */
5233 if (error != 0 && error != ENOBUFS && error != ENOMEM)
5234 return (NULL);
5235
5236 CFIL_INFO_VERIFY(hash_entry->cfentry_cfil);
5237 return (hash_entry);
5238}
5239
5240errno_t
5241cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
5242 struct sockaddr *local, struct sockaddr *remote,
5243 struct mbuf *data, struct mbuf *control, uint32_t flags)
5244{
5245#pragma unused(outgoing, so, local, remote, data, control, flags)
5246 errno_t error = 0;
5247 uint32_t filter_control_unit;
5248 struct cfil_hash_entry *hash_entry = NULL;
5249 struct cfil_info *cfil_info = NULL;
5250
5251 socket_lock_assert_owned(so);
5252
5253 if (cfil_active_count == 0) {
5254 CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
5255 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
5256 return (error);
5257 }
5258
5259 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5260 if (filter_control_unit == 0) {
5261 CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
5262 return (error);
5263 }
5264
5265 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
5266 CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
5267 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
5268 return (error);
5269 }
5270
5271 hash_entry = cfil_sock_udp_get_flow(so, filter_control_unit, outgoing, local, remote);
5272 if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
5273 CFIL_LOG(LOG_ERR, "CFIL: Falied to create UDP flow");
5274 return (EPIPE);
5275 }
5276 // Update last used timestamp, this is for flow Idle TO
5277 hash_entry->cfentry_lastused = net_uptime();
5278 cfil_info = hash_entry->cfentry_cfil;
5279
5280 if (cfil_info->cfi_flags & CFIF_DROP) {
5281#if DATA_DEBUG
5282 cfil_hash_entry_log(LOG_DEBUG, so, hash_entry, 0, "CFIL: UDP DROP");
5283#endif
5284 return (EPIPE);
5285 }
5286 if (control != NULL) {
5287 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
5288 }
5289 if (data->m_type == MT_OOBDATA) {
5290 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
5291 (uint64_t)VM_KERNEL_ADDRPERM(so));
5292 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
5293 }
5294
5295 error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
5296
5297 return (error);
5298}
5299
5300/*
5301 * Go through all UDP flows for specified socket and returns TRUE if
5302 * any flow is still attached. If need_wait is TRUE, wait on first
5303 * attached flow.
5304 */
5305static int
5306cfil_filters_udp_attached(struct socket *so, bool need_wait)
5307{
5308 struct timespec ts;
5309 lck_mtx_t *mutex_held;
5310 struct cfilhashhead *cfilhash = NULL;
5311 struct cfil_db *db = NULL;
5312 struct cfil_hash_entry *hash_entry = NULL;
5313 struct cfil_hash_entry *temp_hash_entry = NULL;
5314 struct cfil_info *cfil_info = NULL;
5315 struct cfil_entry *entry = NULL;
5316 errno_t error = 0;
5317 int kcunit;
5318 int attached = 0;
5319
5320 socket_lock_assert_owned(so);
5321
5322 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
5323
5324 if (so->so_proto->pr_getlock != NULL)
5325 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5326 else
5327 mutex_held = so->so_proto->pr_domain->dom_mtx;
5328 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5329
5330 db = so->so_cfil_db;
5331
5332 for (int i = 0; i < CFILHASHSIZE; i++) {
5333 cfilhash = &db->cfdb_hashbase[i];
5334
5335 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5336
5337 if (hash_entry->cfentry_cfil != NULL) {
5338
5339 cfil_info = hash_entry->cfentry_cfil;
5340 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5341 entry = &cfil_info->cfi_entries[kcunit - 1];
5342
5343 /* Are we attached to the filter? */
5344 if (entry->cfe_filter == NULL) {
5345 continue;
5346 }
5347
5348 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0)
5349 continue;
5350 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0)
5351 continue;
5352
5353 attached = 1;
5354
5355 if (need_wait == TRUE) {
5356#if LIFECYCLE_DEBUG
5357 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
5358#endif
5359
5360 ts.tv_sec = cfil_close_wait_timeout / 1000;
5361 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
5362 NSEC_PER_USEC * 1000;
5363
5364 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5365 cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
5366 error = msleep((caddr_t)cfil_info, mutex_held,
5367 PSOCK | PCATCH, "cfil_filters_udp_attached", &ts);
5368 cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
5369
5370#if LIFECYCLE_DEBUG
5371 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
5372#endif
5373
5374 /*
5375 * Force close in case of timeout
5376 */
5377 if (error != 0) {
5378 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5379#if LIFECYCLE_DEBUG
5380 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
5381#endif
5382 entry->cfe_flags |= CFEF_CFIL_DETACHED;
5383 break;
5384 }
5385 }
5386 goto done;
5387 }
5388 }
5389 }
5390 }
5391 }
5392
5393done:
5394 return (attached);
5395}
5396
5397int32_t
5398cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
5399{
5400 struct socket *so = sb->sb_so;
5401 struct cfi_buf *cfi_buf;
5402 uint64_t pending = 0;
5403 uint64_t total_pending = 0;
5404 struct cfilhashhead *cfilhash = NULL;
5405 struct cfil_db *db = NULL;
5406 struct cfil_hash_entry *hash_entry = NULL;
5407 struct cfil_hash_entry *temp_hash_entry = NULL;
5408
5409 socket_lock_assert_owned(so);
5410
5411 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL &&
5412 (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
5413
5414 db = so->so_cfil_db;
5415
5416 for (int i = 0; i < CFILHASHSIZE; i++) {
5417 cfilhash = &db->cfdb_hashbase[i];
5418
5419 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5420
5421 if (hash_entry->cfentry_cfil != NULL) {
5422 if ((sb->sb_flags & SB_RECV) == 0)
5423 cfi_buf = &hash_entry->cfentry_cfil->cfi_snd;
5424 else
5425 cfi_buf = &hash_entry->cfentry_cfil->cfi_rcv;
5426
5427 pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
5428 /*
5429 * If we are limited by the "chars of mbufs used" roughly
5430 * adjust so we won't overcommit
5431 */
5432 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending)
5433 pending = cfi_buf->cfi_pending_mbcnt;
5434
5435 total_pending += pending;
5436 }
5437 }
5438 }
5439
5440 VERIFY(total_pending < INT32_MAX);
5441#if DATA_DEBUG
5442 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx> total pending %llu <check_thread %d>",
5443 (uint64_t)VM_KERNEL_ADDRPERM(so),
5444 total_pending, check_thread);
5445#endif
5446 }
5447
5448 return (int32_t)(total_pending);
5449}
5450
5451int
5452cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
5453{
5454 struct cfil_info *cfil_info = NULL;
5455 struct cfilhashhead *cfilhash = NULL;
5456 struct cfil_db *db = NULL;
5457 struct cfil_hash_entry *hash_entry = NULL;
5458 struct cfil_hash_entry *temp_hash_entry = NULL;
5459 errno_t error = 0;
5460 int done_count = 0;
5461 int kcunit;
5462
5463 socket_lock_assert_owned(so);
5464
5465 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
5466
5467 db = so->so_cfil_db;
5468
5469 for (int i = 0; i < CFILHASHSIZE; i++) {
5470 cfilhash = &db->cfdb_hashbase[i];
5471
5472 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5473
5474 if (hash_entry->cfentry_cfil != NULL) {
5475 cfil_info = hash_entry->cfentry_cfil;
5476
5477 // This flow is marked as DROP
5478 if (cfil_info->cfi_flags & drop_flag) {
5479 done_count++;
5480 continue;
5481 }
5482
5483 // This flow has been shut already, skip
5484 if (cfil_info->cfi_flags & shut_flag) {
5485 continue;
5486 }
5487 // Mark flow as shut
5488 cfil_info->cfi_flags |= shut_flag;
5489 done_count++;
5490
5491 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5492 /* Disconnect incoming side */
5493 if (how != SHUT_WR) {
5494 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
5495 }
5496 /* Disconnect outgoing side */
5497 if (how != SHUT_RD) {
5498 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
5499 }
5500 }
5501 }
5502 }
5503 }
5504 }
5505
5506 if (done_count == 0) {
5507 error = ENOTCONN;
5508 }
5509 return (error);
5510}
5511
5512int
5513cfil_sock_udp_shutdown(struct socket *so, int *how)
5514{
5515 int error = 0;
5516
5517 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_cfil_db == NULL))
5518 goto done;
5519
5520 socket_lock_assert_owned(so);
5521
5522 CFIL_LOG(LOG_INFO, "so %llx how %d",
5523 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
5524
5525 /*
5526 * Check the state of the socket before the content filter
5527 */
5528 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
5529 /* read already shut down */
5530 error = ENOTCONN;
5531 goto done;
5532 }
5533 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
5534 /* write already shut down */
5535 error = ENOTCONN;
5536 goto done;
5537 }
5538
5539 /*
5540 * shutdown read: SHUT_RD or SHUT_RDWR
5541 */
5542 if (*how != SHUT_WR) {
5543 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
5544 if (error != 0)
5545 goto done;
5546 }
5547 /*
5548 * shutdown write: SHUT_WR or SHUT_RDWR
5549 */
5550 if (*how != SHUT_RD) {
5551 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
5552 if (error != 0)
5553 goto done;
5554
5555 /*
5556 * When outgoing data is pending, we delay the shutdown at the
5557 * protocol level until the content filters give the final
5558 * verdict on the pending data.
5559 */
5560 if (cfil_sock_data_pending(&so->so_snd) != 0) {
5561 /*
5562 * When shutting down the read and write sides at once
5563 * we can proceed to the final shutdown of the read
5564 * side. Otherwise, we just return.
5565 */
5566 if (*how == SHUT_WR) {
5567 error = EJUSTRETURN;
5568 } else if (*how == SHUT_RDWR) {
5569 *how = SHUT_RD;
5570 }
5571 }
5572 }
5573done:
5574 return (error);
5575}
5576
5577void
5578cfil_sock_udp_close_wait(struct socket *so)
5579{
5580 socket_lock_assert_owned(so);
5581
5582 while (cfil_filters_udp_attached(so, FALSE)) {
5583 /*
5584 * Notify the filters we are going away so they can detach
5585 */
5586 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
5587
5588 /*
5589 * Make sure we need to wait after the filter are notified
5590 * of the disconnection
5591 */
5592 if (cfil_filters_udp_attached(so, TRUE) == 0)
5593 break;
5594 }
5595}
5596
5597void
5598cfil_sock_udp_is_closed(struct socket *so)
5599{
5600 struct cfil_info *cfil_info = NULL;
5601 struct cfilhashhead *cfilhash = NULL;
5602 struct cfil_db *db = NULL;
5603 struct cfil_hash_entry *hash_entry = NULL;
5604 struct cfil_hash_entry *temp_hash_entry = NULL;
5605 errno_t error = 0;
5606 int kcunit;
5607
5608 socket_lock_assert_owned(so);
5609
5610 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
5611
5612 db = so->so_cfil_db;
5613
5614 for (int i = 0; i < CFILHASHSIZE; i++) {
5615 cfilhash = &db->cfdb_hashbase[i];
5616
5617 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5618 if (hash_entry->cfentry_cfil != NULL) {
5619
5620 cfil_info = hash_entry->cfentry_cfil;
5621
5622 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5623 /* Let the filters know of the closing */
5624 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
5625 }
5626
5627 /* Last chance to push passed data out */
5628 error = cfil_acquire_sockbuf(so, cfil_info, 1);
5629 if (error == 0)
5630 cfil_service_inject_queue(so, cfil_info, 1);
5631 cfil_release_sockbuf(so, 1);
5632
5633 cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
5634
5635 /* Pending data needs to go */
5636 cfil_flush_queues(so, cfil_info);
5637
5638 CFIL_INFO_VERIFY(cfil_info);
5639 }
5640 }
5641 }
5642 }
5643}
5644
5645void
5646cfil_sock_udp_buf_update(struct sockbuf *sb)
5647{
5648 struct cfil_info *cfil_info = NULL;
5649 struct cfilhashhead *cfilhash = NULL;
5650 struct cfil_db *db = NULL;
5651 struct cfil_hash_entry *hash_entry = NULL;
5652 struct cfil_hash_entry *temp_hash_entry = NULL;
5653 errno_t error = 0;
5654 int outgoing;
5655 struct socket *so = sb->sb_so;
5656
5657 socket_lock_assert_owned(so);
5658
5659 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
5660
5661 if (!cfil_sbtrim)
5662 return;
5663
5664 db = so->so_cfil_db;
5665
5666 for (int i = 0; i < CFILHASHSIZE; i++) {
5667 cfilhash = &db->cfdb_hashbase[i];
5668
5669 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5670 if (hash_entry->cfentry_cfil != NULL) {
5671
5672 cfil_info = hash_entry->cfentry_cfil;
5673
5674 if ((sb->sb_flags & SB_RECV) == 0) {
5675 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0)
5676 return;
5677 outgoing = 1;
5678 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5679 } else {
5680 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0)
5681 return;
5682 outgoing = 0;
5683 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5684 }
5685
5686 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5687 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5688
5689 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
5690 if (error == 0)
5691 cfil_service_inject_queue(so, cfil_info, outgoing);
5692 cfil_release_sockbuf(so, outgoing);
5693 }
5694 }
5695 }
5696 }
5697}
5698
5699void
5700cfil_filter_show(u_int32_t kcunit)
5701{
5702 struct content_filter *cfc = NULL;
5703 struct cfil_entry *entry;
5704 int count = 0;
5705
5706 if (content_filters == NULL) {
5707 return;
5708 }
5709 if (kcunit > MAX_CONTENT_FILTER) {
5710 return;
5711 }
5712
5713 cfil_rw_lock_shared(&cfil_lck_rw);
5714
5715 if (content_filters[kcunit - 1] == NULL) {
5716 cfil_rw_unlock_shared(&cfil_lck_rw);
5717 return;
5718 }
5719 cfc = content_filters[kcunit - 1];
5720
5721 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
5722 kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
5723 if (cfc->cf_flags & CFF_DETACHING)
5724 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - DETACHING");
5725 if (cfc->cf_flags & CFF_ACTIVE)
5726 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - ACTIVE");
5727 if (cfc->cf_flags & CFF_FLOW_CONTROLLED)
5728 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
5729
5730 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
5731
5732 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
5733 struct cfil_info *cfil_info = entry->cfe_cfil_info;
5734
5735 count++;
5736
5737 if (entry->cfe_flags & CFEF_CFIL_DETACHED)
5738 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - DETACHED");
5739 else
5740 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - ATTACHED");
5741 }
5742 }
5743
5744 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count);
5745
5746 cfil_rw_unlock_shared(&cfil_lck_rw);
5747
5748}
5749
5750void
5751cfil_info_show(void)
5752{
5753 struct cfil_info *cfil_info;
5754 int count = 0;
5755
5756 cfil_rw_lock_shared(&cfil_lck_rw);
5757
5758 CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count);
5759
5760 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
5761
5762 count++;
5763
5764 cfil_info_log(LOG_ERR, cfil_info, "CFIL: INFO SHOW");
5765
5766 if (cfil_info->cfi_flags & CFIF_DROP)
5767 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - DROP");
5768 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)
5769 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - CLOSE_WAIT");
5770 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED)
5771 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SOCK_CLOSED");
5772 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN)
5773 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_IN");
5774 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT)
5775 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
5776 if (cfil_info->cfi_flags & CFIF_SHUT_WR)
5777 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_WR");
5778 if (cfil_info->cfi_flags & CFIF_SHUT_RD)
5779 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_RD");
5780 }
5781
5782 CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: total cfil_info shown: %d", count);
5783
5784 cfil_rw_unlock_shared(&cfil_lck_rw);
5785}
5786
5787bool
5788cfil_info_idle_timed_out(struct cfil_info *cfil_info, int timeout, u_int32_t current_time)
5789{
5790 if (cfil_info && cfil_info->cfi_hash_entry &&
5791 (current_time - cfil_info->cfi_hash_entry->cfentry_lastused >= (u_int32_t)timeout)) {
5792#if GC_DEBUG
5793 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow IDLE timeout expired");
5794#endif
5795 return true;
5796 }
5797 return false;
5798}
5799
5800bool
5801cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
5802{
5803 struct cfil_entry *entry;
5804 struct timeval current_tv;
5805 struct timeval diff_time;
5806
5807 if (cfil_info == NULL)
5808 return false;
5809
5810 /*
5811 * If we have queued up more data than passed offset and we haven't received
5812 * an action from user space for a while (the user space filter might have crashed),
5813 * return action timed out.
5814 */
5815 if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
5816 cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
5817
5818 microuptime(&current_tv);
5819
5820 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5821 entry = &cfil_info->cfi_entries[kcunit - 1];
5822
5823 if (entry->cfe_filter == NULL)
5824 continue;
5825
5826 if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
5827 cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
5828 // haven't gotten an action from this filter, check timeout
5829 timersub(&current_tv, &entry->cfe_last_action, &diff_time);
5830 if (diff_time.tv_sec >= timeout) {
5831#if GC_DEBUG
5832 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
5833#endif
5834 return true;
5835 }
5836 }
5837 }
5838 }
5839 return false;
5840}
5841
5842bool
5843cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
5844{
5845 if (cfil_info == NULL)
5846 return false;
5847
5848 /*
5849 * Clean up flow if it exceeded queue thresholds
5850 */
5851 if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
5852 cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
5853#if GC_DEBUG
5854 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
5855 cfil_udp_gc_mbuf_num_max,
5856 cfil_udp_gc_mbuf_cnt_max,
5857 cfil_info->cfi_snd.cfi_tail_drop_cnt,
5858 cfil_info->cfi_rcv.cfi_tail_drop_cnt);
5859 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
5860#endif
5861 return true;
5862 }
5863
5864 return false;
5865}
5866
5867static void
5868cfil_udp_gc_thread_sleep(bool forever)
5869{
5870 if (forever) {
5871 (void) assert_wait((event_t) &cfil_sock_udp_attached_count,
5872 THREAD_INTERRUPTIBLE);
5873 } else {
5874 uint64_t deadline = 0;
5875 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC, &deadline);
5876 clock_absolutetime_interval_to_deadline(deadline, &deadline);
5877
5878 (void) assert_wait_deadline(&cfil_sock_udp_attached_count,
5879 THREAD_INTERRUPTIBLE, deadline);
5880 }
5881}
5882
5883static void
5884cfil_udp_gc_thread_func(void *v, wait_result_t w)
5885{
5886#pragma unused(v, w)
5887
5888 ASSERT(cfil_udp_gc_thread == current_thread());
5889 thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
5890
5891 // Kick off gc shortly
5892 cfil_udp_gc_thread_sleep(false);
5893 thread_block_parameter((thread_continue_t) cfil_info_udp_expire, NULL);
5894 /* NOTREACHED */
5895}
5896
5897static void
5898cfil_info_udp_expire(void *v, wait_result_t w)
5899{
5900#pragma unused(v, w)
5901
5902 static uint64_t expired_array[UDP_FLOW_GC_MAX_COUNT];
5903 static uint32_t expired_count = 0;
5904
5905 struct cfil_info *cfil_info;
5906 struct cfil_hash_entry *hash_entry;
5907 struct cfil_db *db;
5908 struct socket *so;
5909 u_int32_t current_time = 0;
5910
5911 current_time = net_uptime();
5912
5913 // Get all expired UDP flow ids
5914 cfil_rw_lock_shared(&cfil_lck_rw);
5915
5916 if (cfil_sock_udp_attached_count == 0) {
5917 cfil_rw_unlock_shared(&cfil_lck_rw);
5918 goto go_sleep;
5919 }
5920
5921 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
5922 if (expired_count >= UDP_FLOW_GC_MAX_COUNT)
5923 break;
5924
5925 if (IS_UDP(cfil_info->cfi_so)) {
5926 if (cfil_info_idle_timed_out(cfil_info, UDP_FLOW_GC_IDLE_TO, current_time) ||
5927 cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
5928 cfil_info_buffer_threshold_exceeded(cfil_info)) {
5929 expired_array[expired_count] = cfil_info->cfi_sock_id;
5930 expired_count++;
5931 }
5932 }
5933 }
5934 cfil_rw_unlock_shared(&cfil_lck_rw);
5935
5936 if (expired_count == 0)
5937 goto go_sleep;
5938
5939 for (uint32_t i = 0; i < expired_count; i++) {
5940
5941 // Search for socket (UDP only and lock so)
5942 so = cfil_socket_from_sock_id(expired_array[i], true);
5943 if (so == NULL) {
5944 continue;
5945 }
5946
5947 cfil_info = cfil_db_get_cfil_info(so->so_cfil_db, expired_array[i]);
5948 if (cfil_info == NULL) {
5949 goto unlock;
5950 }
5951
5952 db = so->so_cfil_db;
5953 hash_entry = cfil_info->cfi_hash_entry;
5954
5955 if (db == NULL || hash_entry == NULL) {
5956 goto unlock;
5957 }
5958
5959#if GC_DEBUG || LIFECYCLE_DEBUG
5960 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
5961#endif
5962
5963 cfil_db_delete_entry(db, hash_entry);
5964 cfil_info_free(cfil_info);
5965 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
5966
5967 if (so->so_flags & SOF_CONTENT_FILTER) {
5968 if (db->cfdb_count == 0)
5969 so->so_flags &= ~SOF_CONTENT_FILTER;
5970 VERIFY(so->so_usecount > 0);
5971 so->so_usecount--;
5972 }
5973unlock:
5974 socket_unlock(so, 1);
5975 }
5976
5977#if GC_DEBUG
5978 CFIL_LOG(LOG_ERR, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count);
5979#endif
5980 expired_count = 0;
5981
5982go_sleep:
5983
5984 // Sleep forever (until waken up) if no more UDP flow to clean
5985 cfil_rw_lock_shared(&cfil_lck_rw);
5986 cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count == 0 ? true : false);
5987 cfil_rw_unlock_shared(&cfil_lck_rw);
5988 thread_block_parameter((thread_continue_t)cfil_info_udp_expire, NULL);
5989 /* NOTREACHED */
5990}
5991
5992struct m_tag *
5993cfil_udp_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
5994{
5995 struct m_tag *tag = NULL;
5996 struct cfil_tag *ctag = NULL;
5997 struct cfil_hash_entry *hash_entry = NULL;
5998
5999 if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
6000 cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
6001 return NULL;
6002 }
6003
6004 /* Allocate a tag */
6005 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
6006 sizeof(struct cfil_tag), M_DONTWAIT, m);
6007
6008 if (tag) {
6009 ctag = (struct cfil_tag*)(tag + 1);
6010 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
6011 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
6012
6013 hash_entry = cfil_info->cfi_hash_entry;
6014 if (hash_entry->cfentry_family == AF_INET6) {
6015 fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
6016 &hash_entry->cfentry_faddr.addr6,
6017 hash_entry->cfentry_fport);
6018 } else if (hash_entry->cfentry_family == AF_INET) {
6019 fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
6020 hash_entry->cfentry_faddr.addr46.ia46_addr4,
6021 hash_entry->cfentry_fport);
6022 }
6023 m_tag_prepend(m, tag);
6024 return (tag);
6025 }
6026 return NULL;
6027}
6028
6029struct m_tag *
6030cfil_udp_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, short *options,
6031 struct sockaddr **faddr)
6032{
6033 struct m_tag *tag = NULL;
6034 struct cfil_tag *ctag = NULL;
6035
6036 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
6037 if (tag) {
6038 ctag = (struct cfil_tag *)(tag + 1);
6039 if (state_change_cnt)
6040 *state_change_cnt = ctag->cfil_so_state_change_cnt;
6041 if (options)
6042 *options = ctag->cfil_so_options;
6043 if (faddr)
6044 *faddr = (struct sockaddr *) &ctag->cfil_faddr;
6045
6046 /*
6047 * Unlink tag and hand it over to caller.
6048 * Note that caller will be responsible to free it.
6049 */
6050 m_tag_unlink(m, tag);
6051 return tag;
6052 }
6053 return NULL;
6054}
6055
6056
6057