1 | /* |
2 | * Copyright (c) 2012-2024 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | |
30 | #include <sys/systm.h> |
31 | #include <sys/kern_control.h> |
32 | #include <net/kpi_protocol.h> |
33 | #include <net/kpi_interface.h> |
34 | #include <sys/socket.h> |
35 | #include <sys/socketvar.h> |
36 | #include <net/if.h> |
37 | #include <net/if_types.h> |
38 | #include <net/bpf.h> |
39 | #include <net/if_ipsec.h> |
40 | #include <sys/mbuf.h> |
41 | #include <sys/sockio.h> |
42 | #include <netinet/in.h> |
43 | #include <netinet/ip6.h> |
44 | #include <netinet6/in6_var.h> |
45 | #include <netinet6/ip6_var.h> |
46 | #include <sys/kauth.h> |
47 | #include <netinet6/ipsec.h> |
48 | #include <netinet6/ipsec6.h> |
49 | #include <netinet6/esp.h> |
50 | #include <netinet6/esp6.h> |
51 | #include <netinet/ip.h> |
52 | #include <net/flowadv.h> |
53 | #include <net/necp.h> |
54 | #include <netkey/key.h> |
55 | #include <net/pktap.h> |
56 | #include <kern/zalloc.h> |
57 | #include <os/log.h> |
58 | |
59 | #if SKYWALK |
60 | #include <skywalk/os_skywalk_private.h> |
61 | #include <skywalk/nexus/flowswitch/nx_flowswitch.h> |
62 | #include <skywalk/nexus/netif/nx_netif.h> |
63 | #define IPSEC_NEXUS 1 |
64 | #else // SKYWALK |
65 | #define IPSEC_NEXUS 0 |
66 | #endif // SKYWALK |
67 | |
68 | extern int net_qos_policy_restricted; |
69 | extern int net_qos_policy_restrict_avapps; |
70 | |
71 | /* Kernel Control functions */ |
72 | static errno_t ipsec_ctl_setup(u_int32_t *unit, void **unitinfo); |
73 | static errno_t ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, |
74 | void **unitinfo); |
75 | static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, |
76 | void **unitinfo); |
77 | static errno_t ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit, |
78 | void *unitinfo); |
79 | static errno_t ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit, |
80 | void *unitinfo, mbuf_t m, int flags); |
81 | static errno_t ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, |
82 | int opt, void *data, size_t *len); |
83 | static errno_t ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, |
84 | int opt, void *data, size_t len); |
85 | |
86 | /* Network Interface functions */ |
87 | static void ipsec_start(ifnet_t interface); |
88 | static errno_t ipsec_output(ifnet_t interface, mbuf_t data); |
89 | static errno_t ipsec_demux(ifnet_t interface, mbuf_t data, char *, |
90 | protocol_family_t *protocol); |
91 | static errno_t ipsec_add_proto(ifnet_t interface, protocol_family_t protocol, |
92 | const struct ifnet_demux_desc *demux_array, |
93 | u_int32_t demux_count); |
94 | static errno_t ipsec_del_proto(ifnet_t interface, protocol_family_t protocol); |
95 | static errno_t ipsec_ioctl(ifnet_t interface, u_long cmd, void *data); |
96 | static void ipsec_detached(ifnet_t interface); |
97 | |
98 | /* Protocol handlers */ |
99 | static errno_t ipsec_attach_proto(ifnet_t interface, protocol_family_t proto); |
100 | static errno_t ipsec_proto_input(ifnet_t interface, protocol_family_t protocol, |
101 | mbuf_t m, char *); |
102 | static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol, |
103 | mbuf_t *packet, const struct sockaddr *dest, void *route, |
104 | char *frame_type, char *link_layer_dest); |
105 | |
106 | static kern_ctl_ref ipsec_kctlref; |
107 | static LCK_ATTR_DECLARE(ipsec_lck_attr, 0, 0); |
108 | static LCK_GRP_DECLARE(ipsec_lck_grp, "ipsec" ); |
109 | static LCK_MTX_DECLARE_ATTR(ipsec_lock, &ipsec_lck_grp, &ipsec_lck_attr); |
110 | |
111 | #if IPSEC_NEXUS |
112 | |
113 | SYSCTL_DECL(_net_ipsec); |
114 | SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec" ); |
115 | static int if_ipsec_verify_interface_creation = 0; |
116 | SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "" ); |
117 | |
118 | #define IPSEC_IF_VERIFY(_e) if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); } |
119 | |
120 | #define IPSEC_IF_DEFAULT_SLOT_SIZE 2048 |
121 | #define IPSEC_IF_DEFAULT_RING_SIZE 64 |
122 | #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64 |
123 | #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128 |
124 | #define IPSEC_IF_DEFAULT_BUF_SEG_SIZE skmem_usr_buf_seg_size |
125 | |
126 | #define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES |
127 | #define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT |
128 | #define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT |
129 | #define IPSEC_NETIF_WMM_RX_RING_COUNT 1 |
130 | #define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT |
131 | #define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT |
132 | |
133 | #define IPSEC_IF_MIN_RING_SIZE 8 |
134 | #define IPSEC_IF_MAX_RING_SIZE 1024 |
135 | |
136 | #define IPSEC_IF_MIN_SLOT_SIZE 1024 |
137 | #define IPSEC_IF_MAX_SLOT_SIZE (16 * 1024) |
138 | |
139 | #define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512 |
140 | |
141 | #define IPSEC_KPIPE_FLAG_WAKE_PKT 0x01 |
142 | |
143 | static uint32_t ipsec_kpipe_mbuf; |
144 | |
145 | static int if_ipsec_max_pending_input = IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT; |
146 | |
147 | static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS; |
148 | static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS; |
149 | static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS; |
150 | |
151 | static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE; |
152 | static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE; |
153 | static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE; |
154 | |
155 | SYSCTL_INT(_net_ipsec, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_max_pending_input, 0, "" ); |
156 | SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW, |
157 | &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I" , "" ); |
158 | SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW, |
159 | &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I" , "" ); |
160 | SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW, |
161 | &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I" , "" ); |
162 | |
163 | static int if_ipsec_debug = 0; |
164 | SYSCTL_INT(_net_ipsec, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_debug, 0, "" ); |
165 | |
166 | static errno_t |
167 | ipsec_register_nexus(void); |
168 | |
169 | typedef struct ipsec_nx { |
170 | uuid_t if_provider; |
171 | uuid_t if_instance; |
172 | uuid_t fsw_provider; |
173 | uuid_t fsw_instance; |
174 | uuid_t fsw_device; |
175 | uuid_t fsw_agent; |
176 | } *ipsec_nx_t; |
177 | |
178 | static nexus_controller_t ipsec_ncd; |
179 | static int ipsec_ncd_refcount; |
180 | static uuid_t ipsec_kpipe_uuid; |
181 | |
182 | #endif // IPSEC_NEXUS |
183 | |
184 | /* Control block allocated for each kernel control connection */ |
185 | struct ipsec_pcb { |
186 | TAILQ_ENTRY(ipsec_pcb) ipsec_chain; |
187 | kern_ctl_ref ipsec_ctlref; |
188 | ifnet_t ipsec_ifp; |
189 | u_int32_t ipsec_unit; |
190 | u_int32_t ipsec_unique_id; |
191 | // These external flags can be set with IPSEC_OPT_FLAGS |
192 | u_int32_t ipsec_external_flags; |
193 | // These internal flags are only used within this driver |
194 | u_int32_t ipsec_internal_flags; |
195 | u_int32_t ipsec_input_frag_size; |
196 | bool ipsec_frag_size_set; |
197 | int ipsec_ext_ifdata_stats; |
198 | mbuf_svc_class_t ipsec_output_service_class; |
199 | char ipsec_if_xname[IFXNAMSIZ]; |
200 | char ipsec_unique_name[IFXNAMSIZ]; |
201 | // PCB lock protects state fields, like ipsec_kpipe_count |
202 | decl_lck_rw_data(, ipsec_pcb_lock); |
203 | // lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers |
204 | decl_lck_mtx_data(, ipsec_pcb_data_move_lock); |
205 | u_int32_t ipsec_pcb_data_move; /* number of data moving contexts */ |
206 | u_int32_t ipsec_pcb_drainers; /* number of threads waiting to drain */ |
207 | u_int32_t ipsec_pcb_data_path_state; /* internal state of interface data path */ |
208 | ipsec_dscp_mapping_t ipsec_output_dscp_mapping; |
209 | |
210 | #if IPSEC_NEXUS |
211 | lck_mtx_t ipsec_input_chain_lock; |
212 | lck_mtx_t ipsec_kpipe_encrypt_lock; |
213 | lck_mtx_t ipsec_kpipe_decrypt_lock; |
214 | struct mbuf * ipsec_input_chain; |
215 | struct mbuf * ipsec_input_chain_last; |
216 | u_int32_t ipsec_input_chain_count; |
217 | // Input chain lock protects the list of input mbufs |
218 | // The input chain lock must be taken AFTER the PCB lock if both are held |
219 | struct ipsec_nx ipsec_nx; |
220 | u_int32_t ipsec_kpipe_count; |
221 | pid_t ipsec_kpipe_pid; |
222 | uuid_t ipsec_kpipe_proc_uuid; |
223 | uuid_t ipsec_kpipe_uuid[IPSEC_IF_MAX_RING_COUNT]; |
224 | void * ipsec_kpipe_rxring[IPSEC_IF_MAX_RING_COUNT]; |
225 | void * ipsec_kpipe_txring[IPSEC_IF_MAX_RING_COUNT]; |
226 | kern_pbufpool_t ipsec_kpipe_pp; |
227 | u_int32_t ipsec_kpipe_tx_ring_size; |
228 | u_int32_t ipsec_kpipe_rx_ring_size; |
229 | |
230 | kern_nexus_t ipsec_netif_nexus; |
231 | kern_pbufpool_t ipsec_netif_pp; |
232 | void * ipsec_netif_rxring[IPSEC_NETIF_MAX_RX_RING_COUNT]; |
233 | void * ipsec_netif_txring[IPSEC_NETIF_MAX_TX_RING_COUNT]; |
234 | uint64_t ipsec_netif_txring_size; |
235 | |
236 | u_int32_t ipsec_slot_size; |
237 | u_int32_t ipsec_netif_ring_size; |
238 | u_int32_t ipsec_tx_fsw_ring_size; |
239 | u_int32_t ipsec_rx_fsw_ring_size; |
240 | bool ipsec_use_netif; |
241 | bool ipsec_needs_netagent; |
242 | #endif // IPSEC_NEXUS |
243 | }; |
244 | |
245 | /* These are internal flags not exposed outside this file */ |
246 | #define IPSEC_FLAGS_KPIPE_ALLOCATED 1 |
247 | |
248 | /* data movement refcounting functions */ |
249 | static boolean_t ipsec_data_move_begin(struct ipsec_pcb *pcb); |
250 | static void ipsec_data_move_end(struct ipsec_pcb *pcb); |
251 | static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb); |
252 | |
253 | /* Data path states */ |
254 | #define IPSEC_PCB_DATA_PATH_READY 0x1 |
255 | |
256 | /* Macros to set/clear/test data path states */ |
257 | #define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY) |
258 | #define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY) |
259 | #define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0) |
260 | |
261 | #if IPSEC_NEXUS |
262 | /* Macros to clear/set/test flags. */ |
263 | static inline void |
264 | ipsec_flag_set(struct ipsec_pcb *pcb, uint32_t flag) |
265 | { |
266 | pcb->ipsec_internal_flags |= flag; |
267 | } |
268 | static inline void |
269 | ipsec_flag_clr(struct ipsec_pcb *pcb, uint32_t flag) |
270 | { |
271 | pcb->ipsec_internal_flags &= ~flag; |
272 | } |
273 | |
274 | static inline bool |
275 | ipsec_flag_isset(struct ipsec_pcb *pcb, uint32_t flag) |
276 | { |
277 | return !!(pcb->ipsec_internal_flags & flag); |
278 | } |
279 | #endif // IPSEC_NEXUS |
280 | |
281 | TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head; |
282 | |
283 | static KALLOC_TYPE_DEFINE(ipsec_pcb_zone, struct ipsec_pcb, NET_KT_DEFAULT); |
284 | |
285 | #define IPSECQ_MAXLEN 256 |
286 | |
287 | #if IPSEC_NEXUS |
288 | static int |
289 | sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS |
290 | { |
291 | #pragma unused(arg1, arg2) |
292 | int value = if_ipsec_ring_size; |
293 | |
294 | int error = sysctl_handle_int(oidp, arg1: &value, arg2: 0, req); |
295 | if (error || !req->newptr) { |
296 | return error; |
297 | } |
298 | |
299 | if (value < IPSEC_IF_MIN_RING_SIZE || |
300 | value > IPSEC_IF_MAX_RING_SIZE) { |
301 | return EINVAL; |
302 | } |
303 | |
304 | if_ipsec_ring_size = value; |
305 | |
306 | return 0; |
307 | } |
308 | |
309 | static int |
310 | sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS |
311 | { |
312 | #pragma unused(arg1, arg2) |
313 | int value = if_ipsec_tx_fsw_ring_size; |
314 | |
315 | int error = sysctl_handle_int(oidp, arg1: &value, arg2: 0, req); |
316 | if (error || !req->newptr) { |
317 | return error; |
318 | } |
319 | |
320 | if (value < IPSEC_IF_MIN_RING_SIZE || |
321 | value > IPSEC_IF_MAX_RING_SIZE) { |
322 | return EINVAL; |
323 | } |
324 | |
325 | if_ipsec_tx_fsw_ring_size = value; |
326 | |
327 | return 0; |
328 | } |
329 | |
330 | static int |
331 | sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS |
332 | { |
333 | #pragma unused(arg1, arg2) |
334 | int value = if_ipsec_rx_fsw_ring_size; |
335 | |
336 | int error = sysctl_handle_int(oidp, arg1: &value, arg2: 0, req); |
337 | if (error || !req->newptr) { |
338 | return error; |
339 | } |
340 | |
341 | if (value < IPSEC_IF_MIN_RING_SIZE || |
342 | value > IPSEC_IF_MAX_RING_SIZE) { |
343 | return EINVAL; |
344 | } |
345 | |
346 | if_ipsec_rx_fsw_ring_size = value; |
347 | |
348 | return 0; |
349 | } |
350 | |
351 | |
352 | static inline bool |
353 | ipsec_in_wmm_mode(struct ipsec_pcb *pcb) |
354 | { |
355 | return pcb->ipsec_kpipe_count == IPSEC_IF_WMM_RING_COUNT; |
356 | } |
357 | |
358 | #endif // IPSEC_NEXUS |
359 | |
360 | errno_t |
361 | ipsec_register_control(void) |
362 | { |
363 | struct kern_ctl_reg kern_ctl; |
364 | errno_t result = 0; |
365 | |
366 | #if (DEVELOPMENT || DEBUG) |
367 | (void)PE_parse_boot_argn("ipsec_kpipe_mbuf" , &ipsec_kpipe_mbuf, |
368 | sizeof(ipsec_kpipe_mbuf)); |
369 | #endif /* DEVELOPMENT || DEBUG */ |
370 | |
371 | #if IPSEC_NEXUS |
372 | ipsec_register_nexus(); |
373 | #endif // IPSEC_NEXUS |
374 | |
375 | TAILQ_INIT(&ipsec_head); |
376 | |
377 | bzero(s: &kern_ctl, n: sizeof(kern_ctl)); |
378 | strlcpy(dst: kern_ctl.ctl_name, IPSEC_CONTROL_NAME, n: sizeof(kern_ctl.ctl_name)); |
379 | kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0; |
380 | kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP; /* Require root */ |
381 | kern_ctl.ctl_sendsize = 64 * 1024; |
382 | kern_ctl.ctl_recvsize = 64 * 1024; |
383 | kern_ctl.ctl_setup = ipsec_ctl_setup; |
384 | kern_ctl.ctl_bind = ipsec_ctl_bind; |
385 | kern_ctl.ctl_connect = ipsec_ctl_connect; |
386 | kern_ctl.ctl_disconnect = ipsec_ctl_disconnect; |
387 | kern_ctl.ctl_send = ipsec_ctl_send; |
388 | kern_ctl.ctl_setopt = ipsec_ctl_setopt; |
389 | kern_ctl.ctl_getopt = ipsec_ctl_getopt; |
390 | |
391 | result = ctl_register(userkctl: &kern_ctl, kctlref: &ipsec_kctlref); |
392 | if (result != 0) { |
393 | os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - ctl_register failed: %d\n" , result); |
394 | return result; |
395 | } |
396 | |
397 | /* Register the protocol plumbers */ |
398 | if ((result = proto_register_plumber(PF_INET, if_fam: IFNET_FAMILY_IPSEC, |
399 | plumb: ipsec_attach_proto, NULL)) != 0) { |
400 | os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n" , |
401 | result); |
402 | ctl_deregister(kctlref: ipsec_kctlref); |
403 | return result; |
404 | } |
405 | |
406 | /* Register the protocol plumbers */ |
407 | if ((result = proto_register_plumber(PF_INET6, if_fam: IFNET_FAMILY_IPSEC, |
408 | plumb: ipsec_attach_proto, NULL)) != 0) { |
409 | proto_unregister_plumber(PF_INET, if_fam: IFNET_FAMILY_IPSEC); |
410 | ctl_deregister(kctlref: ipsec_kctlref); |
411 | os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n" , |
412 | result); |
413 | return result; |
414 | } |
415 | |
416 | return 0; |
417 | } |
418 | |
419 | /* Helpers */ |
420 | int |
421 | ipsec_interface_isvalid(ifnet_t interface) |
422 | { |
423 | struct ipsec_pcb *pcb = NULL; |
424 | |
425 | if (interface == NULL) { |
426 | return 0; |
427 | } |
428 | |
429 | pcb = ifnet_softc(interface); |
430 | |
431 | if (pcb == NULL) { |
432 | return 0; |
433 | } |
434 | |
435 | /* When ctl disconnects, ipsec_unit is set to 0 */ |
436 | if (pcb->ipsec_unit == 0) { |
437 | return 0; |
438 | } |
439 | |
440 | return 1; |
441 | } |
442 | |
443 | #if IPSEC_NEXUS |
444 | boolean_t |
445 | ipsec_interface_needs_netagent(ifnet_t interface) |
446 | { |
447 | struct ipsec_pcb *pcb = NULL; |
448 | |
449 | if (interface == NULL) { |
450 | return FALSE; |
451 | } |
452 | |
453 | pcb = ifnet_softc(interface); |
454 | |
455 | if (pcb == NULL) { |
456 | return FALSE; |
457 | } |
458 | |
459 | return pcb->ipsec_needs_netagent == true; |
460 | } |
461 | #endif // IPSEC_NEXUS |
462 | |
463 | static errno_t |
464 | ipsec_ifnet_set_attrs(ifnet_t ifp) |
465 | { |
466 | /* Set flags and additional information. */ |
467 | ifnet_set_mtu(interface: ifp, mtu: 1500); |
468 | ifnet_set_flags(interface: ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, mask: 0xffff); |
469 | |
470 | /* The interface must generate its own IPv6 LinkLocal address, |
471 | * if possible following the recommendation of RFC2472 to the 64bit interface ID |
472 | */ |
473 | ifnet_set_eflags(interface: ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL); |
474 | |
475 | #if !IPSEC_NEXUS |
476 | /* Reset the stats in case as the interface may have been recycled */ |
477 | struct ifnet_stats_param stats; |
478 | bzero(&stats, sizeof(struct ifnet_stats_param)); |
479 | ifnet_set_stat(ifp, &stats); |
480 | #endif // !IPSEC_NEXUS |
481 | |
482 | return 0; |
483 | } |
484 | |
485 | #if IPSEC_NEXUS |
486 | |
487 | static uuid_t ipsec_nx_dom_prov; |
488 | |
489 | static errno_t |
490 | ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov) |
491 | { |
492 | return 0; |
493 | } |
494 | |
495 | static void |
496 | ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov) |
497 | { |
498 | // Ignore |
499 | } |
500 | |
501 | static errno_t |
502 | ipsec_register_nexus(void) |
503 | { |
504 | const struct kern_nexus_domain_provider_init dp_init = { |
505 | .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION, |
506 | .nxdpi_flags = 0, |
507 | .nxdpi_init = ipsec_nxdp_init, |
508 | .nxdpi_fini = ipsec_nxdp_fini |
509 | }; |
510 | errno_t err = 0; |
511 | |
512 | /* ipsec_nxdp_init() is called before this function returns */ |
513 | err = kern_nexus_register_domain_provider(type: NEXUS_TYPE_NET_IF, |
514 | name: (const uint8_t *) "com.apple.ipsec" , |
515 | init: &dp_init, init_len: sizeof(dp_init), |
516 | dom_prov_uuid: &ipsec_nx_dom_prov); |
517 | if (err != 0) { |
518 | os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n" , __func__); |
519 | return err; |
520 | } |
521 | return 0; |
522 | } |
523 | |
524 | static errno_t |
525 | ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp) |
526 | { |
527 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
528 | pcb->ipsec_netif_nexus = nexus; |
529 | return ipsec_ifnet_set_attrs(ifp); |
530 | } |
531 | |
532 | static errno_t |
533 | ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov, |
534 | proc_t p, kern_nexus_t nexus, |
535 | nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx) |
536 | { |
537 | #pragma unused(nxprov, p) |
538 | #pragma unused(nexus, nexus_port, channel, ch_ctx) |
539 | return 0; |
540 | } |
541 | |
542 | static errno_t |
543 | ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
544 | kern_channel_t channel) |
545 | { |
546 | #pragma unused(nxprov, channel) |
547 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
548 | boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, refio: 1); |
549 | /* Mark the data path as ready */ |
550 | if (ok) { |
551 | lck_mtx_lock(lck: &pcb->ipsec_pcb_data_move_lock); |
552 | IPSEC_SET_DATA_PATH_READY(pcb); |
553 | lck_mtx_unlock(lck: &pcb->ipsec_pcb_data_move_lock); |
554 | } |
555 | return ok ? 0 : ENXIO; |
556 | } |
557 | |
558 | static void |
559 | ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
560 | kern_channel_t channel) |
561 | { |
562 | #pragma unused(nxprov, channel) |
563 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
564 | |
565 | VERIFY(pcb->ipsec_kpipe_count != 0); |
566 | |
567 | /* Wait until all threads in the data paths are done. */ |
568 | ipsec_wait_data_move_drain(pcb); |
569 | } |
570 | |
571 | static void |
572 | ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
573 | kern_channel_t channel) |
574 | { |
575 | #pragma unused(nxprov, channel) |
576 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
577 | |
578 | /* Wait until all threads in the data paths are done. */ |
579 | ipsec_wait_data_move_drain(pcb); |
580 | } |
581 | |
582 | static void |
583 | ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
584 | kern_channel_t channel) |
585 | { |
586 | #pragma unused(nxprov, channel) |
587 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
588 | if (pcb->ipsec_netif_nexus == nexus) { |
589 | pcb->ipsec_netif_nexus = NULL; |
590 | } |
591 | ifnet_decr_iorefcnt(pcb->ipsec_ifp); |
592 | } |
593 | |
594 | static errno_t |
595 | ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
596 | kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring, |
597 | void **ring_ctx) |
598 | { |
599 | #pragma unused(nxprov) |
600 | #pragma unused(channel) |
601 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
602 | uint8_t ring_idx; |
603 | |
604 | for (ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) { |
605 | if (!uuid_compare(uu1: channel->ch_info->cinfo_nx_uuid, uu2: pcb->ipsec_kpipe_uuid[ring_idx])) { |
606 | break; |
607 | } |
608 | } |
609 | |
610 | if (ring_idx == pcb->ipsec_kpipe_count) { |
611 | uuid_string_t uuidstr; |
612 | uuid_unparse(uu: channel->ch_info->cinfo_nx_uuid, out: uuidstr); |
613 | os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n" , __func__, pcb->ipsec_if_xname, uuidstr); |
614 | return ENOENT; |
615 | } |
616 | |
617 | *ring_ctx = (void *)(uintptr_t)ring_idx; |
618 | |
619 | if (!is_tx_ring) { |
620 | VERIFY(pcb->ipsec_kpipe_rxring[ring_idx] == NULL); |
621 | pcb->ipsec_kpipe_rxring[ring_idx] = ring; |
622 | } else { |
623 | VERIFY(pcb->ipsec_kpipe_txring[ring_idx] == NULL); |
624 | pcb->ipsec_kpipe_txring[ring_idx] = ring; |
625 | } |
626 | return 0; |
627 | } |
628 | |
629 | static void |
630 | ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
631 | kern_channel_ring_t ring) |
632 | { |
633 | #pragma unused(nxprov) |
634 | bool found = false; |
635 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
636 | |
637 | for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) { |
638 | if (pcb->ipsec_kpipe_rxring[i] == ring) { |
639 | pcb->ipsec_kpipe_rxring[i] = NULL; |
640 | found = true; |
641 | } else if (pcb->ipsec_kpipe_txring[i] == ring) { |
642 | pcb->ipsec_kpipe_txring[i] = NULL; |
643 | found = true; |
644 | } |
645 | } |
646 | VERIFY(found); |
647 | } |
648 | |
649 | static errno_t |
650 | ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
651 | kern_channel_ring_t tx_ring, uint32_t flags) |
652 | { |
653 | #pragma unused(nxprov) |
654 | #pragma unused(flags) |
655 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
656 | |
657 | if (!ipsec_data_move_begin(pcb)) { |
658 | os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n" , __func__, if_name(pcb->ipsec_ifp)); |
659 | return 0; |
660 | } |
661 | |
662 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
663 | |
664 | if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) { |
665 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
666 | ipsec_data_move_end(pcb); |
667 | return 0; |
668 | } |
669 | |
670 | VERIFY(pcb->ipsec_kpipe_count); |
671 | |
672 | kern_channel_slot_t tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL); |
673 | if (tx_slot == NULL) { |
674 | // Nothing to write, bail |
675 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
676 | ipsec_data_move_end(pcb); |
677 | return 0; |
678 | } |
679 | |
680 | // Signal the netif ring to read |
681 | kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0]; |
682 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
683 | |
684 | if (rx_ring != NULL) { |
685 | kern_channel_notify(rx_ring, flags: 0); |
686 | } |
687 | |
688 | ipsec_data_move_end(pcb); |
689 | return 0; |
690 | } |
691 | |
692 | static mbuf_t |
693 | ipsec_encrypt_mbuf(ifnet_t interface, |
694 | mbuf_t data) |
695 | { |
696 | struct ipsec_output_state ipsec_state; |
697 | int error = 0; |
698 | uint32_t af; |
699 | |
700 | // Make sure this packet isn't looping through the interface |
701 | if (necp_get_last_interface_index_from_packet(packet: data) == interface->if_index) { |
702 | error = -1; |
703 | goto ipsec_output_err; |
704 | } |
705 | |
706 | // Mark the interface so NECP can evaluate tunnel policy |
707 | necp_mark_packet_from_interface(packet: data, interface); |
708 | |
709 | struct ip *ip = mtod(data, struct ip *); |
710 | u_int ip_version = ip->ip_v; |
711 | |
712 | switch (ip_version) { |
713 | case 4: { |
714 | af = AF_INET; |
715 | |
716 | memset(s: &ipsec_state, c: 0, n: sizeof(ipsec_state)); |
717 | ipsec_state.m = data; |
718 | ipsec_state.dst = (struct sockaddr *)&ip->ip_dst; |
719 | memset(s: &ipsec_state.ro, c: 0, n: sizeof(ipsec_state.ro)); |
720 | |
721 | error = ipsec4_interface_output(state: &ipsec_state, interface); |
722 | if (error == 0 && ipsec_state.tunneled == 6) { |
723 | // Tunneled in IPv6 - packet is gone |
724 | // TODO: Don't lose mbuf |
725 | data = NULL; |
726 | goto done; |
727 | } |
728 | |
729 | data = ipsec_state.m; |
730 | if (error || data == NULL) { |
731 | if (error) { |
732 | os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec4_output error %d\n" , error); |
733 | } |
734 | goto ipsec_output_err; |
735 | } |
736 | goto done; |
737 | } |
738 | case 6: { |
739 | af = AF_INET6; |
740 | |
741 | data = ipsec6_splithdr(data); |
742 | if (data == NULL) { |
743 | os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n" ); |
744 | goto ipsec_output_err; |
745 | } |
746 | |
747 | struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *); |
748 | |
749 | memset(s: &ipsec_state, c: 0, n: sizeof(ipsec_state)); |
750 | ipsec_state.m = data; |
751 | ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst; |
752 | memset(s: &ipsec_state.ro, c: 0, n: sizeof(ipsec_state.ro)); |
753 | |
754 | error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m); |
755 | if (error == 0 && ipsec_state.tunneled == 4) { |
756 | // Tunneled in IPv4 - packet is gone |
757 | // TODO: Don't lose mbuf |
758 | data = NULL; |
759 | goto done; |
760 | } |
761 | data = ipsec_state.m; |
762 | if (error || data == NULL) { |
763 | if (error) { |
764 | os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_output error %d\n" , error); |
765 | } |
766 | goto ipsec_output_err; |
767 | } |
768 | goto done; |
769 | } |
770 | default: { |
771 | os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: Received unknown packet version %d\n" , ip_version); |
772 | error = -1; |
773 | goto ipsec_output_err; |
774 | } |
775 | } |
776 | |
777 | done: |
778 | return data; |
779 | |
780 | ipsec_output_err: |
781 | if (data) { |
782 | mbuf_freem(mbuf: data); |
783 | } |
784 | return NULL; |
785 | } |
786 | |
787 | static errno_t |
788 | ipsec_kpipe_sync_rx_mbuf(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
789 | kern_channel_ring_t rx_ring, uint32_t flags) |
790 | { |
791 | #pragma unused(nxprov) |
792 | #pragma unused(flags) |
793 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
794 | struct kern_channel_ring_stat_increment rx_ring_stats; |
795 | uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(ring: rx_ring); |
796 | |
797 | if (!ipsec_data_move_begin(pcb)) { |
798 | os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n" , __func__, if_name(pcb->ipsec_ifp)); |
799 | return 0; |
800 | } |
801 | |
802 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
803 | |
804 | if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) { |
805 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
806 | ipsec_data_move_end(pcb); |
807 | return 0; |
808 | } |
809 | |
810 | VERIFY(pcb->ipsec_kpipe_count); |
811 | VERIFY(ring_idx <= pcb->ipsec_kpipe_count); |
812 | |
813 | // Reclaim user-released slots |
814 | (void) kern_channel_reclaim(rx_ring); |
815 | |
816 | uint32_t avail = kern_channel_available_slot_count(ring: rx_ring); |
817 | if (avail == 0) { |
818 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
819 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n" , __func__, |
820 | pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx); |
821 | ipsec_data_move_end(pcb); |
822 | return 0; |
823 | } |
824 | |
825 | kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx]; |
826 | if (tx_ring == NULL) { |
827 | // Net-If TX ring not set up yet, nothing to read |
828 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
829 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n" , __func__, |
830 | pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx); |
831 | ipsec_data_move_end(pcb); |
832 | return 0; |
833 | } |
834 | |
835 | struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats; |
836 | |
837 | // Unlock ipsec before entering ring |
838 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
839 | |
840 | (void)kr_enter(tx_ring, TRUE); |
841 | |
842 | // Lock again after entering and validate |
843 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
844 | if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) { |
845 | // Ring no longer valid |
846 | // Unlock first, then exit ring |
847 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
848 | kr_exit(tx_ring); |
849 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n" , __func__, |
850 | pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx); |
851 | ipsec_data_move_end(pcb); |
852 | return 0; |
853 | } |
854 | |
855 | struct kern_channel_ring_stat_increment tx_ring_stats; |
856 | bzero(s: &tx_ring_stats, n: sizeof(tx_ring_stats)); |
857 | kern_channel_slot_t tx_pslot = NULL; |
858 | kern_channel_slot_t tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL); |
859 | if (tx_slot == NULL) { |
860 | // Nothing to read, don't bother signalling |
861 | // Unlock first, then exit ring |
862 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
863 | kr_exit(tx_ring); |
864 | ipsec_data_move_end(pcb); |
865 | return 0; |
866 | } |
867 | |
868 | struct kern_pbufpool *rx_pp = rx_ring->ckr_pp; |
869 | VERIFY(rx_pp != NULL); |
870 | struct kern_pbufpool *tx_pp = tx_ring->ckr_pp; |
871 | VERIFY(tx_pp != NULL); |
872 | bzero(s: &rx_ring_stats, n: sizeof(rx_ring_stats)); |
873 | kern_channel_slot_t rx_pslot = NULL; |
874 | kern_channel_slot_t rx_slot = kern_channel_get_next_slot(kring: rx_ring, NULL, NULL); |
875 | kern_packet_t tx_chain_ph = 0; |
876 | |
877 | while (rx_slot != NULL && tx_slot != NULL) { |
878 | size_t length = 0; |
879 | mbuf_t data = NULL; |
880 | errno_t error = 0; |
881 | |
882 | // Allocate rx packet |
883 | kern_packet_t rx_ph = 0; |
884 | error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph); |
885 | if (__improbable(error != 0)) { |
886 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: failed to allocate packet\n" , |
887 | pcb->ipsec_ifp->if_xname); |
888 | break; |
889 | } |
890 | |
891 | kern_packet_t tx_ph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot); |
892 | |
893 | if (tx_ph == 0) { |
894 | // Advance TX ring |
895 | tx_pslot = tx_slot; |
896 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
897 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
898 | continue; |
899 | } |
900 | (void) kern_channel_slot_detach_packet(ring: tx_ring, slot: tx_slot, packet: tx_ph); |
901 | if (tx_chain_ph != 0) { |
902 | kern_packet_append(tx_ph, tx_chain_ph); |
903 | } |
904 | tx_chain_ph = tx_ph; |
905 | |
906 | // Advance TX ring |
907 | tx_pslot = tx_slot; |
908 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
909 | |
910 | kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL); |
911 | VERIFY(tx_buf != NULL); |
912 | uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf); |
913 | VERIFY(tx_baddr != NULL); |
914 | tx_baddr += kern_buflet_get_data_offset(tx_buf); |
915 | |
916 | bpf_tap_packet_out(interface: pcb->ipsec_ifp, DLT_RAW, packet: tx_ph, NULL, header_len: 0); |
917 | |
918 | length = MIN(kern_packet_get_data_length(tx_ph), |
919 | pcb->ipsec_slot_size); |
920 | |
921 | // Increment TX stats |
922 | tx_ring_stats.kcrsi_slots_transferred++; |
923 | tx_ring_stats.kcrsi_bytes_transferred += length; |
924 | |
925 | if (length > 0) { |
926 | error = mbuf_gethdr(how: MBUF_DONTWAIT, type: MBUF_TYPE_HEADER, mbuf: &data); |
927 | if (error == 0) { |
928 | error = mbuf_copyback(mbuf: data, offset: 0, length, data: tx_baddr, how: MBUF_DONTWAIT); |
929 | if (error == 0) { |
930 | // Encrypt and send packet |
931 | lck_mtx_lock(lck: &pcb->ipsec_kpipe_encrypt_lock); |
932 | data = ipsec_encrypt_mbuf(interface: pcb->ipsec_ifp, data); |
933 | lck_mtx_unlock(lck: &pcb->ipsec_kpipe_encrypt_lock); |
934 | } else { |
935 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n" , pcb->ipsec_ifp->if_xname, length, error); |
936 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF); |
937 | STATS_INC(nifs, NETIF_STATS_DROP); |
938 | mbuf_freem(mbuf: data); |
939 | data = NULL; |
940 | } |
941 | } else { |
942 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n" , pcb->ipsec_ifp->if_xname, error); |
943 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF); |
944 | STATS_INC(nifs, NETIF_STATS_DROP); |
945 | } |
946 | } else { |
947 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - 0 length packet\n" , pcb->ipsec_ifp->if_xname); |
948 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
949 | STATS_INC(nifs, NETIF_STATS_DROP); |
950 | } |
951 | |
952 | if (data == NULL) { |
953 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: no encrypted packet to send\n" , pcb->ipsec_ifp->if_xname); |
954 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
955 | break; |
956 | } |
957 | |
958 | length = mbuf_pkthdr_len(mbuf: data); |
959 | if (length > PP_BUF_SIZE_DEF(rx_pp)) { |
960 | // Flush data |
961 | mbuf_freem(mbuf: data); |
962 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
963 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n" , |
964 | pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp)); |
965 | continue; |
966 | } |
967 | |
968 | // Fillout rx packet |
969 | kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL); |
970 | VERIFY(rx_buf != NULL); |
971 | void *rx_baddr = kern_buflet_get_data_address(rx_buf); |
972 | VERIFY(rx_baddr != NULL); |
973 | |
974 | // Copy-in data from mbuf to buflet |
975 | mbuf_copydata(mbuf: data, offset: 0, length, out_data: (void *)rx_baddr); |
976 | kern_packet_clear_flow_uuid(rx_ph); // Zero flow id |
977 | |
978 | // Finalize and attach the packet |
979 | error = kern_buflet_set_data_offset(rx_buf, 0); |
980 | VERIFY(error == 0); |
981 | error = kern_buflet_set_data_length(rx_buf, (uint16_t)length); |
982 | VERIFY(error == 0); |
983 | error = kern_packet_finalize(rx_ph); |
984 | VERIFY(error == 0); |
985 | error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph); |
986 | VERIFY(error == 0); |
987 | |
988 | STATS_INC(nifs, NETIF_STATS_TX_PACKETS); |
989 | STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT); |
990 | |
991 | rx_ring_stats.kcrsi_slots_transferred++; |
992 | rx_ring_stats.kcrsi_bytes_transferred += length; |
993 | |
994 | if (!pcb->ipsec_ext_ifdata_stats) { |
995 | ifnet_stat_increment_out(interface: pcb->ipsec_ifp, packets_out: 1, bytes_out: (uint16_t)length, errors_out: 0); |
996 | } |
997 | |
998 | mbuf_freem(mbuf: data); |
999 | |
1000 | rx_pslot = rx_slot; |
1001 | rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL); |
1002 | } |
1003 | |
1004 | if (rx_pslot) { |
1005 | kern_channel_advance_slot(kring: rx_ring, slot: rx_pslot); |
1006 | kern_channel_increment_ring_net_stats(ring: rx_ring, pcb->ipsec_ifp, stats: &rx_ring_stats); |
1007 | } |
1008 | |
1009 | if (tx_chain_ph != 0) { |
1010 | kern_pbufpool_free_chain(pbufpool: tx_pp, chain: tx_chain_ph); |
1011 | } |
1012 | |
1013 | if (tx_pslot) { |
1014 | kern_channel_advance_slot(kring: tx_ring, slot: tx_pslot); |
1015 | kern_channel_increment_ring_net_stats(ring: tx_ring, pcb->ipsec_ifp, stats: &tx_ring_stats); |
1016 | (void)kern_channel_reclaim(tx_ring); |
1017 | } |
1018 | |
1019 | /* always reenable output */ |
1020 | errno_t error = ifnet_enable_output(interface: pcb->ipsec_ifp); |
1021 | if (error != 0) { |
1022 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n" , error); |
1023 | } |
1024 | |
1025 | // Unlock first, then exit ring |
1026 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1027 | |
1028 | if (tx_pslot != NULL) { |
1029 | kern_channel_notify(tx_ring, flags: 0); |
1030 | } |
1031 | kr_exit(tx_ring); |
1032 | |
1033 | ipsec_data_move_end(pcb); |
1034 | return 0; |
1035 | } |
1036 | |
1037 | static errno_t |
1038 | ipsec_encrypt_kpipe_pkt(ifnet_t interface, kern_packet_t sph, |
1039 | kern_packet_t dph) |
1040 | { |
1041 | uint8_t *sbaddr = NULL; |
1042 | int err = 0; |
1043 | uint32_t slen = 0; |
1044 | |
1045 | VERIFY(interface != NULL); |
1046 | VERIFY(sph != 0); |
1047 | VERIFY(dph != 0); |
1048 | |
1049 | kern_buflet_t sbuf = __packet_get_next_buflet(ph: sph, NULL); |
1050 | VERIFY(sbuf != NULL); |
1051 | slen = __buflet_get_data_length(buf: sbuf); |
1052 | |
1053 | if (__improbable(slen < sizeof(struct ip))) { |
1054 | os_log_error(OS_LOG_DEFAULT, "ipsec encrypt kpipe pkt: source " |
1055 | "buffer shorter than ip header, %u\n" , slen); |
1056 | return EINVAL; |
1057 | } |
1058 | |
1059 | MD_BUFLET_ADDR(SK_PTR_ADDR_KPKT(sph), sbaddr); |
1060 | struct ip *ip = (struct ip *)(void *)sbaddr; |
1061 | ASSERT(IP_HDR_ALIGNED_P(ip)); |
1062 | |
1063 | u_int ip_vers = ip->ip_v; |
1064 | switch (ip_vers) { |
1065 | case IPVERSION: { |
1066 | err = ipsec4_interface_kpipe_output(interface, sph, dph); |
1067 | if (__improbable(err != 0)) { |
1068 | os_log_error(OS_LOG_DEFAULT, "ipsec4 interface kpipe " |
1069 | "output error %d\n" , err); |
1070 | return err; |
1071 | } |
1072 | break; |
1073 | } |
1074 | case 6: { |
1075 | err = ipsec6_interface_kpipe_output(interface, sph, dph); |
1076 | if (__improbable(err != 0)) { |
1077 | os_log_error(OS_LOG_DEFAULT, "ipsec6 interface kpipe " |
1078 | "output error %d\n" , err); |
1079 | return err; |
1080 | } |
1081 | break; |
1082 | } |
1083 | default: { |
1084 | os_log_error(OS_LOG_DEFAULT, "received unknown packet version: %d\n" , |
1085 | ip_vers); |
1086 | return EINVAL; |
1087 | } |
1088 | } |
1089 | |
1090 | return err; |
1091 | } |
1092 | |
1093 | static errno_t |
1094 | ipsec_kpipe_sync_rx_packet(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1095 | kern_channel_ring_t rx_ring, uint32_t flags) |
1096 | { |
1097 | #pragma unused(nxprov) |
1098 | #pragma unused(flags) |
1099 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
1100 | struct kern_channel_ring_stat_increment rx_ring_stats; |
1101 | uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(ring: rx_ring); |
1102 | |
1103 | if (!ipsec_data_move_begin(pcb)) { |
1104 | os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n" , __func__, if_name(pcb->ipsec_ifp)); |
1105 | return 0; |
1106 | } |
1107 | |
1108 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
1109 | |
1110 | if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) { |
1111 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1112 | ipsec_data_move_end(pcb); |
1113 | return 0; |
1114 | } |
1115 | |
1116 | VERIFY(pcb->ipsec_kpipe_count); |
1117 | VERIFY(ring_idx <= pcb->ipsec_kpipe_count); |
1118 | |
1119 | // Reclaim user-released slots |
1120 | (void) kern_channel_reclaim(rx_ring); |
1121 | |
1122 | uint32_t avail = kern_channel_available_slot_count(ring: rx_ring); |
1123 | if (avail == 0) { |
1124 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1125 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n" , __func__, |
1126 | pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx); |
1127 | ipsec_data_move_end(pcb); |
1128 | return 0; |
1129 | } |
1130 | |
1131 | kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx]; |
1132 | if (tx_ring == NULL) { |
1133 | // Net-If TX ring not set up yet, nothing to read |
1134 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1135 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n" , __func__, |
1136 | pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx); |
1137 | ipsec_data_move_end(pcb); |
1138 | return 0; |
1139 | } |
1140 | |
1141 | struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats; |
1142 | |
1143 | // Unlock ipsec before entering ring |
1144 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1145 | |
1146 | (void)kr_enter(tx_ring, TRUE); |
1147 | |
1148 | // Lock again after entering and validate |
1149 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
1150 | if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) { |
1151 | // Ring no longer valid |
1152 | // Unlock first, then exit ring |
1153 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1154 | kr_exit(tx_ring); |
1155 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n" , __func__, |
1156 | pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx); |
1157 | ipsec_data_move_end(pcb); |
1158 | return 0; |
1159 | } |
1160 | |
1161 | struct kern_channel_ring_stat_increment tx_ring_stats; |
1162 | bzero(s: &tx_ring_stats, n: sizeof(tx_ring_stats)); |
1163 | kern_channel_slot_t tx_pslot = NULL; |
1164 | kern_channel_slot_t tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL); |
1165 | if (tx_slot == NULL) { |
1166 | // Nothing to read, don't bother signalling |
1167 | // Unlock first, then exit ring |
1168 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1169 | kr_exit(tx_ring); |
1170 | ipsec_data_move_end(pcb); |
1171 | return 0; |
1172 | } |
1173 | |
1174 | struct kern_pbufpool *rx_pp = rx_ring->ckr_pp; |
1175 | VERIFY(rx_pp != NULL); |
1176 | struct kern_pbufpool *tx_pp = tx_ring->ckr_pp; |
1177 | VERIFY(tx_pp != NULL); |
1178 | bzero(s: &rx_ring_stats, n: sizeof(rx_ring_stats)); |
1179 | kern_channel_slot_t rx_pslot = NULL; |
1180 | kern_channel_slot_t rx_slot = kern_channel_get_next_slot(kring: rx_ring, NULL, NULL); |
1181 | kern_packet_t tx_chain_ph = 0; |
1182 | |
1183 | while (rx_slot != NULL && tx_slot != NULL) { |
1184 | size_t tx_pkt_length = 0; |
1185 | errno_t error = 0; |
1186 | |
1187 | // Allocate rx packet |
1188 | kern_packet_t rx_ph = 0; |
1189 | error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph); |
1190 | if (__improbable(error != 0)) { |
1191 | os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: " |
1192 | "failed to allocate packet\n" , pcb->ipsec_ifp->if_xname); |
1193 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF); |
1194 | STATS_INC(nifs, NETIF_STATS_DROP); |
1195 | break; |
1196 | } |
1197 | |
1198 | kern_packet_t tx_ph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot); |
1199 | if (__improbable(tx_ph == 0)) { |
1200 | // Advance TX ring |
1201 | tx_pslot = tx_slot; |
1202 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
1203 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
1204 | continue; |
1205 | } |
1206 | |
1207 | (void) kern_channel_slot_detach_packet(ring: tx_ring, slot: tx_slot, packet: tx_ph); |
1208 | if (tx_chain_ph != 0) { |
1209 | kern_packet_append(tx_ph, tx_chain_ph); |
1210 | } |
1211 | tx_chain_ph = tx_ph; |
1212 | |
1213 | // Advance TX ring |
1214 | tx_pslot = tx_slot; |
1215 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
1216 | |
1217 | bpf_tap_packet_out(interface: pcb->ipsec_ifp, DLT_RAW, packet: tx_ph, NULL, header_len: 0); |
1218 | |
1219 | tx_pkt_length = kern_packet_get_data_length(tx_ph); |
1220 | if (tx_pkt_length == 0 || tx_pkt_length > pcb->ipsec_slot_size) { |
1221 | os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: " |
1222 | "packet length %zu" , pcb->ipsec_ifp->if_xname, |
1223 | tx_pkt_length); |
1224 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
1225 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
1226 | STATS_INC(nifs, NETIF_STATS_DROP); |
1227 | continue; |
1228 | } |
1229 | |
1230 | // Increment TX stats |
1231 | tx_ring_stats.kcrsi_slots_transferred++; |
1232 | tx_ring_stats.kcrsi_bytes_transferred += tx_pkt_length; |
1233 | |
1234 | // Encrypt packet |
1235 | lck_mtx_lock(lck: &pcb->ipsec_kpipe_encrypt_lock); |
1236 | error = ipsec_encrypt_kpipe_pkt(interface: pcb->ipsec_ifp, sph: tx_ph, dph: rx_ph); |
1237 | lck_mtx_unlock(lck: &pcb->ipsec_kpipe_encrypt_lock); |
1238 | if (__improbable(error != 0)) { |
1239 | os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: " |
1240 | "failed to encrypt packet" , pcb->ipsec_ifp->if_xname); |
1241 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
1242 | STATS_INC(nifs, NETIF_STATS_DROP); |
1243 | continue; |
1244 | } |
1245 | |
1246 | kern_packet_clear_flow_uuid(rx_ph); // Zero flow id |
1247 | // Finalize and attach the packet |
1248 | kern_buflet_t rx_buf = __packet_get_next_buflet(ph: rx_ph, NULL); |
1249 | error = kern_buflet_set_data_offset(rx_buf, 0); |
1250 | VERIFY(error == 0); |
1251 | error = kern_packet_finalize(rx_ph); |
1252 | VERIFY(error == 0); |
1253 | error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph); |
1254 | VERIFY(error == 0); |
1255 | |
1256 | STATS_INC(nifs, NETIF_STATS_TX_PACKETS); |
1257 | STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT); |
1258 | |
1259 | rx_ring_stats.kcrsi_slots_transferred++; |
1260 | rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph); |
1261 | |
1262 | if (!pcb->ipsec_ext_ifdata_stats) { |
1263 | ifnet_stat_increment_out(interface: pcb->ipsec_ifp, packets_out: 1, |
1264 | bytes_out: kern_packet_get_data_length(rx_ph), errors_out: 0); |
1265 | } |
1266 | |
1267 | rx_pslot = rx_slot; |
1268 | rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL); |
1269 | } |
1270 | |
1271 | if (rx_pslot) { |
1272 | kern_channel_advance_slot(kring: rx_ring, slot: rx_pslot); |
1273 | kern_channel_increment_ring_net_stats(ring: rx_ring, pcb->ipsec_ifp, stats: &rx_ring_stats); |
1274 | } |
1275 | |
1276 | if (tx_chain_ph != 0) { |
1277 | kern_pbufpool_free_chain(pbufpool: tx_pp, chain: tx_chain_ph); |
1278 | } |
1279 | |
1280 | if (tx_pslot) { |
1281 | kern_channel_advance_slot(kring: tx_ring, slot: tx_pslot); |
1282 | kern_channel_increment_ring_net_stats(ring: tx_ring, pcb->ipsec_ifp, stats: &tx_ring_stats); |
1283 | (void)kern_channel_reclaim(tx_ring); |
1284 | } |
1285 | |
1286 | /* always reenable output */ |
1287 | errno_t error = ifnet_enable_output(interface: pcb->ipsec_ifp); |
1288 | if (error != 0) { |
1289 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n" , error); |
1290 | } |
1291 | |
1292 | // Unlock first, then exit ring |
1293 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1294 | |
1295 | if (tx_pslot != NULL) { |
1296 | kern_channel_notify(tx_ring, flags: 0); |
1297 | } |
1298 | kr_exit(tx_ring); |
1299 | |
1300 | ipsec_data_move_end(pcb); |
1301 | return 0; |
1302 | } |
1303 | |
1304 | static errno_t |
1305 | ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1306 | kern_channel_ring_t rx_ring, uint32_t flags) |
1307 | { |
1308 | if (__improbable(ipsec_kpipe_mbuf == 1)) { |
1309 | return ipsec_kpipe_sync_rx_mbuf(nxprov, nexus, rx_ring, flags); |
1310 | } else { |
1311 | return ipsec_kpipe_sync_rx_packet(nxprov, nexus, rx_ring, flags); |
1312 | } |
1313 | } |
1314 | |
1315 | static uint8_t |
1316 | ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class) |
1317 | { |
1318 | switch (svc_class) { |
1319 | case KPKT_SC_VO: { |
1320 | return 0; |
1321 | } |
1322 | case KPKT_SC_VI: { |
1323 | return 1; |
1324 | } |
1325 | case KPKT_SC_BE: { |
1326 | return 2; |
1327 | } |
1328 | case KPKT_SC_BK: { |
1329 | return 3; |
1330 | } |
1331 | default: { |
1332 | VERIFY(0); |
1333 | return 0; |
1334 | } |
1335 | } |
1336 | } |
1337 | |
1338 | static errno_t |
1339 | ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1340 | kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring, |
1341 | void **ring_ctx) |
1342 | { |
1343 | #pragma unused(nxprov) |
1344 | #pragma unused(channel) |
1345 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
1346 | |
1347 | if (!is_tx_ring) { |
1348 | VERIFY(pcb->ipsec_netif_rxring[0] == NULL); |
1349 | pcb->ipsec_netif_rxring[0] = ring; |
1350 | } else { |
1351 | uint8_t ring_idx = 0; |
1352 | if (ipsec_in_wmm_mode(pcb)) { |
1353 | int err; |
1354 | kern_packet_svc_class_t svc_class; |
1355 | err = kern_channel_get_service_class(ring, svc: &svc_class); |
1356 | VERIFY(err == 0); |
1357 | ring_idx = ipsec_find_tx_ring_by_svc(svc_class); |
1358 | VERIFY(ring_idx < IPSEC_IF_WMM_RING_COUNT); |
1359 | } |
1360 | |
1361 | *ring_ctx = (void *)(uintptr_t)ring_idx; |
1362 | |
1363 | VERIFY(pcb->ipsec_netif_txring[ring_idx] == NULL); |
1364 | pcb->ipsec_netif_txring[ring_idx] = ring; |
1365 | } |
1366 | return 0; |
1367 | } |
1368 | |
1369 | static void |
1370 | ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1371 | kern_channel_ring_t ring) |
1372 | { |
1373 | #pragma unused(nxprov) |
1374 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
1375 | bool found = false; |
1376 | |
1377 | for (int i = 0; i < IPSEC_NETIF_MAX_RX_RING_COUNT; i++) { |
1378 | if (pcb->ipsec_netif_rxring[i] == ring) { |
1379 | pcb->ipsec_netif_rxring[i] = NULL; |
1380 | VERIFY(!found); |
1381 | found = true; |
1382 | } |
1383 | } |
1384 | for (int i = 0; i < IPSEC_NETIF_MAX_TX_RING_COUNT; i++) { |
1385 | if (pcb->ipsec_netif_txring[i] == ring) { |
1386 | pcb->ipsec_netif_txring[i] = NULL; |
1387 | VERIFY(!found); |
1388 | found = true; |
1389 | } |
1390 | } |
1391 | VERIFY(found); |
1392 | } |
1393 | |
1394 | static bool |
1395 | ipsec_netif_check_policy(ifnet_t interface, mbuf_t data) |
1396 | { |
1397 | necp_kernel_policy_result necp_result = 0; |
1398 | necp_kernel_policy_result_parameter necp_result_parameter = {}; |
1399 | uint32_t necp_matched_policy_id = 0; |
1400 | struct ip_out_args args4 = { }; |
1401 | struct ip6_out_args args6 = { }; |
1402 | |
1403 | // This packet has been marked with IP level policy, do not mark again. |
1404 | if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) { |
1405 | return true; |
1406 | } |
1407 | |
1408 | size_t length = mbuf_pkthdr_len(mbuf: data); |
1409 | if (length < sizeof(struct ip)) { |
1410 | return false; |
1411 | } |
1412 | |
1413 | struct ip *ip = mtod(data, struct ip *); |
1414 | u_int ip_version = ip->ip_v; |
1415 | switch (ip_version) { |
1416 | case 4: { |
1417 | if (interface != NULL) { |
1418 | args4.ipoa_flags |= IPOAF_BOUND_IF; |
1419 | args4.ipoa_boundif = interface->if_index; |
1420 | } |
1421 | necp_matched_policy_id = necp_ip_output_find_policy_match(packet: data, IP_OUTARGS, ipoa: &args4, NULL, |
1422 | result: &necp_result, result_parameter: &necp_result_parameter); |
1423 | break; |
1424 | } |
1425 | case 6: { |
1426 | if (interface != NULL) { |
1427 | args6.ip6oa_flags |= IP6OAF_BOUND_IF; |
1428 | args6.ip6oa_boundif = interface->if_index; |
1429 | } |
1430 | necp_matched_policy_id = necp_ip6_output_find_policy_match(packet: data, IPV6_OUTARGS, ip6oa: &args6, NULL, |
1431 | result: &necp_result, result_parameter: &necp_result_parameter); |
1432 | break; |
1433 | } |
1434 | default: { |
1435 | return false; |
1436 | } |
1437 | } |
1438 | |
1439 | if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP || |
1440 | necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) { |
1441 | /* Drop and flow divert packets should be blocked at the IP layer */ |
1442 | return false; |
1443 | } |
1444 | |
1445 | necp_mark_packet_from_ip(packet: data, policy_id: necp_matched_policy_id); |
1446 | return true; |
1447 | } |
1448 | |
1449 | static errno_t |
1450 | ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1451 | kern_channel_ring_t tx_ring, uint32_t flags) |
1452 | { |
1453 | #pragma unused(nxprov) |
1454 | #pragma unused(flags) |
1455 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
1456 | |
1457 | struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats; |
1458 | |
1459 | if (!ipsec_data_move_begin(pcb)) { |
1460 | os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n" , __func__, if_name(pcb->ipsec_ifp)); |
1461 | return 0; |
1462 | } |
1463 | |
1464 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
1465 | |
1466 | struct kern_channel_ring_stat_increment tx_ring_stats; |
1467 | bzero(s: &tx_ring_stats, n: sizeof(tx_ring_stats)); |
1468 | kern_channel_slot_t tx_pslot = NULL; |
1469 | kern_channel_slot_t tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL); |
1470 | kern_packet_t tx_chain_ph = 0; |
1471 | |
1472 | STATS_INC(nifs, NETIF_STATS_TX_SYNC); |
1473 | |
1474 | if (tx_slot == NULL) { |
1475 | // Nothing to write, don't bother signalling |
1476 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1477 | ipsec_data_move_end(pcb); |
1478 | return 0; |
1479 | } |
1480 | |
1481 | if (pcb->ipsec_kpipe_count && |
1482 | ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) { |
1483 | // Select the corresponding kpipe rx ring |
1484 | uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(ring: tx_ring); |
1485 | VERIFY(ring_idx < IPSEC_IF_MAX_RING_COUNT); |
1486 | kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx]; |
1487 | |
1488 | // Unlock while calling notify |
1489 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1490 | |
1491 | // Signal the kernel pipe ring to read |
1492 | if (rx_ring != NULL) { |
1493 | kern_channel_notify(rx_ring, flags: 0); |
1494 | } |
1495 | |
1496 | ipsec_data_move_end(pcb); |
1497 | return 0; |
1498 | } |
1499 | |
1500 | // If we're here, we're injecting into the BSD stack |
1501 | while (tx_slot != NULL) { |
1502 | size_t length = 0; |
1503 | mbuf_t data = NULL; |
1504 | |
1505 | kern_packet_t tx_ph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot); |
1506 | |
1507 | if (tx_ph == 0) { |
1508 | // Advance TX ring |
1509 | tx_pslot = tx_slot; |
1510 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
1511 | continue; |
1512 | } |
1513 | (void) kern_channel_slot_detach_packet(ring: tx_ring, slot: tx_slot, packet: tx_ph); |
1514 | if (tx_chain_ph != 0) { |
1515 | kern_packet_append(tx_ph, tx_chain_ph); |
1516 | } |
1517 | tx_chain_ph = tx_ph; |
1518 | |
1519 | // Advance TX ring |
1520 | tx_pslot = tx_slot; |
1521 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
1522 | |
1523 | kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL); |
1524 | VERIFY(tx_buf != NULL); |
1525 | uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf); |
1526 | VERIFY(tx_baddr != 0); |
1527 | tx_baddr += kern_buflet_get_data_offset(tx_buf); |
1528 | |
1529 | bpf_tap_packet_out(interface: pcb->ipsec_ifp, DLT_RAW, packet: tx_ph, NULL, header_len: 0); |
1530 | |
1531 | length = MIN(kern_packet_get_data_length(tx_ph), |
1532 | pcb->ipsec_slot_size); |
1533 | |
1534 | if (length > 0) { |
1535 | errno_t error = mbuf_gethdr(how: MBUF_DONTWAIT, type: MBUF_TYPE_HEADER, mbuf: &data); |
1536 | if (error == 0) { |
1537 | error = mbuf_copyback(mbuf: data, offset: 0, length, data: tx_baddr, how: MBUF_DONTWAIT); |
1538 | if (error == 0) { |
1539 | // Mark packet from policy |
1540 | uint32_t policy_id = kern_packet_get_policy_id(tx_ph); |
1541 | uint32_t skip_policy_id = kern_packet_get_skip_policy_id(tx_ph); |
1542 | necp_mark_packet_from_ip_with_skip(packet: data, policy_id, skip_policy_id); |
1543 | |
1544 | // Check policy with NECP |
1545 | if (!ipsec_netif_check_policy(interface: pcb->ipsec_ifp, data)) { |
1546 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - failed policy check\n" , pcb->ipsec_ifp->if_xname); |
1547 | STATS_INC(nifs, NETIF_STATS_DROP); |
1548 | mbuf_freem(mbuf: data); |
1549 | data = NULL; |
1550 | } else { |
1551 | // Send through encryption |
1552 | error = ipsec_output(interface: pcb->ipsec_ifp, data); |
1553 | if (error != 0) { |
1554 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - ipsec_output error %d\n" , pcb->ipsec_ifp->if_xname, error); |
1555 | } |
1556 | } |
1557 | } else { |
1558 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n" , pcb->ipsec_ifp->if_xname, length, error); |
1559 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF); |
1560 | STATS_INC(nifs, NETIF_STATS_DROP); |
1561 | mbuf_freem(mbuf: data); |
1562 | data = NULL; |
1563 | } |
1564 | } else { |
1565 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n" , pcb->ipsec_ifp->if_xname, error); |
1566 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF); |
1567 | STATS_INC(nifs, NETIF_STATS_DROP); |
1568 | } |
1569 | } else { |
1570 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - 0 length packet\n" , pcb->ipsec_ifp->if_xname); |
1571 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
1572 | STATS_INC(nifs, NETIF_STATS_DROP); |
1573 | } |
1574 | |
1575 | if (data == NULL) { |
1576 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s: no encrypted packet to send\n" , pcb->ipsec_ifp->if_xname); |
1577 | break; |
1578 | } |
1579 | |
1580 | STATS_INC(nifs, NETIF_STATS_TX_PACKETS); |
1581 | STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF); |
1582 | |
1583 | tx_ring_stats.kcrsi_slots_transferred++; |
1584 | tx_ring_stats.kcrsi_bytes_transferred += length; |
1585 | } |
1586 | |
1587 | if (tx_chain_ph != 0) { |
1588 | kern_pbufpool_free_chain(pbufpool: tx_ring->ckr_pp, chain: tx_chain_ph); |
1589 | } |
1590 | |
1591 | if (tx_pslot) { |
1592 | kern_channel_advance_slot(kring: tx_ring, slot: tx_pslot); |
1593 | kern_channel_increment_ring_net_stats(ring: tx_ring, pcb->ipsec_ifp, stats: &tx_ring_stats); |
1594 | (void)kern_channel_reclaim(tx_ring); |
1595 | } |
1596 | |
1597 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1598 | ipsec_data_move_end(pcb); |
1599 | |
1600 | return 0; |
1601 | } |
1602 | |
1603 | static errno_t |
1604 | ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1605 | kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx) |
1606 | { |
1607 | #pragma unused(nxprov) |
1608 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
1609 | boolean_t more = false; |
1610 | errno_t rc = 0; |
1611 | |
1612 | VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0); |
1613 | |
1614 | /* |
1615 | * Refill and sync the ring; we may be racing against another thread doing |
1616 | * an RX sync that also wants to do kr_enter(), and so use the blocking |
1617 | * variant here. |
1618 | */ |
1619 | rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more); |
1620 | if (rc != 0 && rc != EAGAIN && rc != EBUSY) { |
1621 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n" , __func__, |
1622 | pcb->ipsec_if_xname, ring->ckr_name, rc); |
1623 | } |
1624 | |
1625 | (void) kr_enter(ring, TRUE); |
1626 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
1627 | if (ring != pcb->ipsec_netif_txring[ring_idx]) { |
1628 | // ring no longer valid |
1629 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1630 | kr_exit(ring); |
1631 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n" , __func__, |
1632 | pcb->ipsec_if_xname, ring->ckr_name, ring_idx); |
1633 | return ENXIO; |
1634 | } |
1635 | |
1636 | if (pcb->ipsec_kpipe_count) { |
1637 | uint32_t tx_available = kern_channel_available_slot_count(ring); |
1638 | if (pcb->ipsec_netif_txring_size > 0 && |
1639 | tx_available >= pcb->ipsec_netif_txring_size - 1) { |
1640 | // No room left in tx ring, disable output for now |
1641 | errno_t error = ifnet_disable_output(interface: pcb->ipsec_ifp); |
1642 | if (error != 0) { |
1643 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n" , error); |
1644 | } |
1645 | } |
1646 | } |
1647 | |
1648 | if (pcb->ipsec_kpipe_count) { |
1649 | kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx]; |
1650 | |
1651 | // Unlock while calling notify |
1652 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1653 | // Signal the kernel pipe ring to read |
1654 | if (rx_ring != NULL) { |
1655 | kern_channel_notify(rx_ring, flags: 0); |
1656 | } |
1657 | } else { |
1658 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1659 | } |
1660 | |
1661 | kr_exit(ring); |
1662 | |
1663 | return 0; |
1664 | } |
1665 | |
1666 | static errno_t |
1667 | ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1668 | kern_channel_ring_t ring, __unused uint32_t flags) |
1669 | { |
1670 | errno_t ret = 0; |
1671 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
1672 | |
1673 | if (!ipsec_data_move_begin(pcb)) { |
1674 | os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n" , __func__, if_name(pcb->ipsec_ifp)); |
1675 | return 0; |
1676 | } |
1677 | |
1678 | if (ipsec_in_wmm_mode(pcb)) { |
1679 | for (uint8_t i = 0; i < IPSEC_IF_WMM_RING_COUNT; i++) { |
1680 | kern_channel_ring_t nring = pcb->ipsec_netif_txring[i]; |
1681 | ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring: nring, flags, ring_idx: i); |
1682 | if (ret) { |
1683 | break; |
1684 | } |
1685 | } |
1686 | } else { |
1687 | ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring, flags, ring_idx: 0); |
1688 | } |
1689 | |
1690 | ipsec_data_move_end(pcb); |
1691 | return ret; |
1692 | } |
1693 | |
1694 | static errno_t |
1695 | ipsec_netif_sync_rx_mbuf(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1696 | kern_channel_ring_t rx_ring, uint32_t flags) |
1697 | { |
1698 | #pragma unused(nxprov) |
1699 | #pragma unused(flags) |
1700 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
1701 | struct kern_channel_ring_stat_increment rx_ring_stats; |
1702 | |
1703 | struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats; |
1704 | |
1705 | if (!ipsec_data_move_begin(pcb)) { |
1706 | os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n" , __func__, if_name(pcb->ipsec_ifp)); |
1707 | return 0; |
1708 | } |
1709 | |
1710 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
1711 | |
1712 | // Reclaim user-released slots |
1713 | (void) kern_channel_reclaim(rx_ring); |
1714 | |
1715 | STATS_INC(nifs, NETIF_STATS_RX_SYNC); |
1716 | |
1717 | uint32_t avail = kern_channel_available_slot_count(ring: rx_ring); |
1718 | if (avail == 0) { |
1719 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1720 | ipsec_data_move_end(pcb); |
1721 | return 0; |
1722 | } |
1723 | |
1724 | struct kern_pbufpool *rx_pp = rx_ring->ckr_pp; |
1725 | VERIFY(rx_pp != NULL); |
1726 | bzero(s: &rx_ring_stats, n: sizeof(rx_ring_stats)); |
1727 | kern_channel_slot_t rx_pslot = NULL; |
1728 | kern_channel_slot_t rx_slot = kern_channel_get_next_slot(kring: rx_ring, NULL, NULL); |
1729 | |
1730 | while (rx_slot != NULL) { |
1731 | // Check for a waiting packet |
1732 | lck_mtx_lock(lck: &pcb->ipsec_input_chain_lock); |
1733 | mbuf_t data = pcb->ipsec_input_chain; |
1734 | if (data == NULL) { |
1735 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
1736 | break; |
1737 | } |
1738 | |
1739 | // Allocate rx packet |
1740 | kern_packet_t rx_ph = 0; |
1741 | errno_t error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph); |
1742 | if (__improbable(error != 0)) { |
1743 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT); |
1744 | STATS_INC(nifs, NETIF_STATS_DROP); |
1745 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
1746 | break; |
1747 | } |
1748 | |
1749 | // Advance waiting packets |
1750 | if (pcb->ipsec_input_chain_count > 0) { |
1751 | pcb->ipsec_input_chain_count--; |
1752 | } |
1753 | pcb->ipsec_input_chain = data->m_nextpkt; |
1754 | data->m_nextpkt = NULL; |
1755 | if (pcb->ipsec_input_chain == NULL) { |
1756 | pcb->ipsec_input_chain_last = NULL; |
1757 | } |
1758 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
1759 | |
1760 | size_t length = mbuf_pkthdr_len(mbuf: data); |
1761 | |
1762 | if (length < sizeof(struct ip)) { |
1763 | // Flush data |
1764 | mbuf_freem(mbuf: data); |
1765 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
1766 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
1767 | STATS_INC(nifs, NETIF_STATS_DROP); |
1768 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n" , |
1769 | pcb->ipsec_ifp->if_xname, length, sizeof(struct ip)); |
1770 | continue; |
1771 | } |
1772 | |
1773 | uint32_t af = 0; |
1774 | struct ip *ip = mtod(data, struct ip *); |
1775 | u_int ip_version = ip->ip_v; |
1776 | switch (ip_version) { |
1777 | case 4: { |
1778 | af = AF_INET; |
1779 | break; |
1780 | } |
1781 | case 6: { |
1782 | af = AF_INET6; |
1783 | break; |
1784 | } |
1785 | default: { |
1786 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n" , |
1787 | pcb->ipsec_ifp->if_xname, ip_version); |
1788 | break; |
1789 | } |
1790 | } |
1791 | |
1792 | if (length > PP_BUF_SIZE_DEF(rx_pp) || |
1793 | (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) { |
1794 | // We need to fragment to send up into the netif |
1795 | |
1796 | u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp); |
1797 | if (pcb->ipsec_frag_size_set && |
1798 | pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) { |
1799 | fragment_mtu = pcb->ipsec_input_frag_size; |
1800 | } |
1801 | |
1802 | mbuf_t fragment_chain = NULL; |
1803 | switch (af) { |
1804 | case AF_INET: { |
1805 | // ip_fragment expects the length in host order |
1806 | ip->ip_len = ntohs(ip->ip_len); |
1807 | |
1808 | // ip_fragment will modify the original data, don't free |
1809 | int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE); |
1810 | if (fragment_error == 0 && data != NULL) { |
1811 | fragment_chain = data; |
1812 | } else { |
1813 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
1814 | STATS_INC(nifs, NETIF_STATS_DROP); |
1815 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n" , |
1816 | pcb->ipsec_ifp->if_xname, length, fragment_error); |
1817 | } |
1818 | break; |
1819 | } |
1820 | case AF_INET6: { |
1821 | if (length < sizeof(struct ip6_hdr)) { |
1822 | mbuf_freem(mbuf: data); |
1823 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
1824 | STATS_INC(nifs, NETIF_STATS_DROP); |
1825 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n" , |
1826 | pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr)); |
1827 | } else { |
1828 | // ip6_do_fragmentation will free the original data on success only |
1829 | struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *); |
1830 | |
1831 | int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr), |
1832 | ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid((uint64_t)data))); |
1833 | if (fragment_error == 0 && data != NULL) { |
1834 | fragment_chain = data; |
1835 | } else { |
1836 | mbuf_freem(mbuf: data); |
1837 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
1838 | STATS_INC(nifs, NETIF_STATS_DROP); |
1839 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n" , |
1840 | pcb->ipsec_ifp->if_xname, length, fragment_error); |
1841 | } |
1842 | } |
1843 | break; |
1844 | } |
1845 | default: { |
1846 | // Cannot fragment unknown families |
1847 | mbuf_freem(mbuf: data); |
1848 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
1849 | STATS_INC(nifs, NETIF_STATS_DROP); |
1850 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n" , |
1851 | pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp)); |
1852 | break; |
1853 | } |
1854 | } |
1855 | |
1856 | if (fragment_chain != NULL) { |
1857 | // Add fragments to chain before continuing |
1858 | lck_mtx_lock(lck: &pcb->ipsec_input_chain_lock); |
1859 | if (pcb->ipsec_input_chain != NULL) { |
1860 | pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain; |
1861 | } else { |
1862 | pcb->ipsec_input_chain = fragment_chain; |
1863 | } |
1864 | pcb->ipsec_input_chain_count++; |
1865 | while (fragment_chain->m_nextpkt) { |
1866 | VERIFY(fragment_chain != fragment_chain->m_nextpkt); |
1867 | fragment_chain = fragment_chain->m_nextpkt; |
1868 | pcb->ipsec_input_chain_count++; |
1869 | } |
1870 | pcb->ipsec_input_chain_last = fragment_chain; |
1871 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
1872 | } |
1873 | |
1874 | // Make sure to free unused rx packet |
1875 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
1876 | |
1877 | continue; |
1878 | } |
1879 | |
1880 | mbuf_pkthdr_setrcvif(mbuf: data, ifp: pcb->ipsec_ifp); |
1881 | |
1882 | // Fillout rx packet |
1883 | kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL); |
1884 | VERIFY(rx_buf != NULL); |
1885 | void *rx_baddr = kern_buflet_get_data_address(rx_buf); |
1886 | VERIFY(rx_baddr != NULL); |
1887 | |
1888 | // Copy-in data from mbuf to buflet |
1889 | mbuf_copydata(mbuf: data, offset: 0, length, out_data: (void *)rx_baddr); |
1890 | kern_packet_clear_flow_uuid(rx_ph); // Zero flow id |
1891 | |
1892 | // Finalize and attach the packet |
1893 | error = kern_buflet_set_data_offset(rx_buf, 0); |
1894 | VERIFY(error == 0); |
1895 | error = kern_buflet_set_data_length(rx_buf, (uint16_t)length); |
1896 | VERIFY(error == 0); |
1897 | error = kern_packet_set_headroom(rx_ph, 0); |
1898 | VERIFY(error == 0); |
1899 | error = kern_packet_finalize(rx_ph); |
1900 | VERIFY(error == 0); |
1901 | error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph); |
1902 | VERIFY(error == 0); |
1903 | |
1904 | STATS_INC(nifs, NETIF_STATS_RX_PACKETS); |
1905 | STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF); |
1906 | bpf_tap_packet_in(interface: pcb->ipsec_ifp, DLT_RAW, packet: rx_ph, NULL, header_len: 0); |
1907 | |
1908 | rx_ring_stats.kcrsi_slots_transferred++; |
1909 | rx_ring_stats.kcrsi_bytes_transferred += length; |
1910 | |
1911 | if (!pcb->ipsec_ext_ifdata_stats) { |
1912 | ifnet_stat_increment_in(interface: pcb->ipsec_ifp, packets_in: 1, bytes_in: (uint16_t)length, errors_in: 0); |
1913 | } |
1914 | |
1915 | mbuf_freem(mbuf: data); |
1916 | |
1917 | // Advance ring |
1918 | rx_pslot = rx_slot; |
1919 | rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL); |
1920 | } |
1921 | |
1922 | for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) { |
1923 | struct kern_channel_ring_stat_increment tx_ring_stats; |
1924 | bzero(s: &tx_ring_stats, n: sizeof(tx_ring_stats)); |
1925 | kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx]; |
1926 | kern_channel_slot_t tx_pslot = NULL; |
1927 | kern_channel_slot_t tx_slot = NULL; |
1928 | if (tx_ring == NULL) { |
1929 | // Net-If TX ring not set up yet, nothing to read |
1930 | goto done; |
1931 | } |
1932 | // Unlock ipsec before entering ring |
1933 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1934 | |
1935 | (void)kr_enter(tx_ring, TRUE); |
1936 | |
1937 | // Lock again after entering and validate |
1938 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
1939 | |
1940 | if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) { |
1941 | goto done; |
1942 | } |
1943 | |
1944 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL); |
1945 | if (tx_slot == NULL) { |
1946 | // Nothing to read, don't bother signalling |
1947 | goto done; |
1948 | } |
1949 | |
1950 | while (rx_slot != NULL && tx_slot != NULL) { |
1951 | size_t length = 0; |
1952 | mbuf_t data = NULL; |
1953 | errno_t error = 0; |
1954 | uint32_t af; |
1955 | |
1956 | // Allocate rx packet |
1957 | kern_packet_t rx_ph = 0; |
1958 | error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph); |
1959 | if (__improbable(error != 0)) { |
1960 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT); |
1961 | STATS_INC(nifs, NETIF_STATS_DROP); |
1962 | break; |
1963 | } |
1964 | |
1965 | kern_packet_t tx_ph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot); |
1966 | |
1967 | // Advance TX ring |
1968 | tx_pslot = tx_slot; |
1969 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
1970 | |
1971 | if (tx_ph == 0) { |
1972 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
1973 | continue; |
1974 | } |
1975 | |
1976 | kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL); |
1977 | VERIFY(tx_buf != NULL); |
1978 | uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf); |
1979 | VERIFY(tx_baddr != 0); |
1980 | tx_baddr += kern_buflet_get_data_offset(tx_buf); |
1981 | |
1982 | length = MIN(kern_packet_get_data_length(tx_ph), |
1983 | pcb->ipsec_slot_size); |
1984 | |
1985 | // Increment TX stats |
1986 | tx_ring_stats.kcrsi_slots_transferred++; |
1987 | tx_ring_stats.kcrsi_bytes_transferred += length; |
1988 | |
1989 | if (length >= sizeof(struct ip)) { |
1990 | error = mbuf_gethdr(how: MBUF_DONTWAIT, type: MBUF_TYPE_HEADER, mbuf: &data); |
1991 | if (error == 0) { |
1992 | error = mbuf_copyback(mbuf: data, offset: 0, length, data: tx_baddr, how: MBUF_DONTWAIT); |
1993 | if (error == 0) { |
1994 | // Check for wake packet flag |
1995 | uuid_t flow_uuid; |
1996 | kern_packet_get_flow_uuid(tx_ph, &flow_uuid); |
1997 | u_int8_t *id_8 = (u_int8_t *)(uintptr_t)flow_uuid; |
1998 | if ((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT) { |
1999 | os_log_info(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: wake packet flag is set\n" , |
2000 | pcb->ipsec_ifp->if_xname); |
2001 | data->m_pkthdr.pkt_flags |= PKTF_WAKE_PKT; |
2002 | } |
2003 | |
2004 | lck_mtx_lock(lck: &pcb->ipsec_kpipe_decrypt_lock); |
2005 | struct ip *ip = mtod(data, struct ip *); |
2006 | u_int ip_version = ip->ip_v; |
2007 | switch (ip_version) { |
2008 | case 4: { |
2009 | af = AF_INET; |
2010 | ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip); |
2011 | ip->ip_off = ntohs(ip->ip_off); |
2012 | |
2013 | if (length < ip->ip_len) { |
2014 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n" , |
2015 | pcb->ipsec_ifp->if_xname, length, ip->ip_len); |
2016 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2017 | STATS_INC(nifs, NETIF_STATS_DROP); |
2018 | mbuf_freem(mbuf: data); |
2019 | data = NULL; |
2020 | } else { |
2021 | data = esp4_input_extended(data, off: sizeof(struct ip), interface: pcb->ipsec_ifp); |
2022 | } |
2023 | break; |
2024 | } |
2025 | case 6: { |
2026 | if (length < sizeof(struct ip6_hdr)) { |
2027 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n" , |
2028 | pcb->ipsec_ifp->if_xname, length); |
2029 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2030 | STATS_INC(nifs, NETIF_STATS_DROP); |
2031 | mbuf_freem(mbuf: data); |
2032 | data = NULL; |
2033 | } else { |
2034 | af = AF_INET6; |
2035 | struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *); |
2036 | const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen); |
2037 | if (length < ip6_len) { |
2038 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n" , |
2039 | pcb->ipsec_ifp->if_xname, length, ip6_len); |
2040 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2041 | STATS_INC(nifs, NETIF_STATS_DROP); |
2042 | mbuf_freem(mbuf: data); |
2043 | data = NULL; |
2044 | } else { |
2045 | int offset = sizeof(struct ip6_hdr); |
2046 | esp6_input_extended(mp: &data, offp: &offset, proto: ip6->ip6_nxt, interface: pcb->ipsec_ifp); |
2047 | } |
2048 | } |
2049 | break; |
2050 | } |
2051 | default: { |
2052 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: unknown ip version %u\n" , |
2053 | pcb->ipsec_ifp->if_xname, ip_version); |
2054 | STATS_INC(nifs, NETIF_STATS_DROP); |
2055 | mbuf_freem(mbuf: data); |
2056 | data = NULL; |
2057 | break; |
2058 | } |
2059 | } |
2060 | lck_mtx_unlock(lck: &pcb->ipsec_kpipe_decrypt_lock); |
2061 | } else { |
2062 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n" , pcb->ipsec_ifp->if_xname, length, error); |
2063 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF); |
2064 | STATS_INC(nifs, NETIF_STATS_DROP); |
2065 | mbuf_freem(mbuf: data); |
2066 | data = NULL; |
2067 | } |
2068 | } else { |
2069 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n" , pcb->ipsec_ifp->if_xname, error); |
2070 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF); |
2071 | STATS_INC(nifs, NETIF_STATS_DROP); |
2072 | } |
2073 | } else { |
2074 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - bad packet length %zu\n" , pcb->ipsec_ifp->if_xname, length); |
2075 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2076 | STATS_INC(nifs, NETIF_STATS_DROP); |
2077 | } |
2078 | |
2079 | if (data == NULL) { |
2080 | // Failed to get decrypted data data |
2081 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
2082 | continue; |
2083 | } |
2084 | |
2085 | length = mbuf_pkthdr_len(mbuf: data); |
2086 | if (length > PP_BUF_SIZE_DEF(rx_pp)) { |
2087 | // Flush data |
2088 | mbuf_freem(mbuf: data); |
2089 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
2090 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2091 | STATS_INC(nifs, NETIF_STATS_DROP); |
2092 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n" , |
2093 | pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp)); |
2094 | continue; |
2095 | } |
2096 | |
2097 | mbuf_pkthdr_setrcvif(mbuf: data, ifp: pcb->ipsec_ifp); |
2098 | |
2099 | // Fillout rx packet |
2100 | kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL); |
2101 | VERIFY(rx_buf != NULL); |
2102 | void *rx_baddr = kern_buflet_get_data_address(rx_buf); |
2103 | VERIFY(rx_baddr != NULL); |
2104 | |
2105 | // Copy-in data from mbuf to buflet |
2106 | mbuf_copydata(mbuf: data, offset: 0, length, out_data: (void *)rx_baddr); |
2107 | kern_packet_clear_flow_uuid(rx_ph); // Zero flow id |
2108 | |
2109 | // Finalize and attach the packet |
2110 | error = kern_buflet_set_data_offset(rx_buf, 0); |
2111 | VERIFY(error == 0); |
2112 | error = kern_buflet_set_data_length(rx_buf, (uint16_t)length); |
2113 | VERIFY(error == 0); |
2114 | error = kern_packet_set_link_header_offset(rx_ph, 0); |
2115 | VERIFY(error == 0); |
2116 | error = kern_packet_set_network_header_offset(rx_ph, 0); |
2117 | VERIFY(error == 0); |
2118 | error = kern_packet_finalize(rx_ph); |
2119 | VERIFY(error == 0); |
2120 | error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph); |
2121 | VERIFY(error == 0); |
2122 | |
2123 | STATS_INC(nifs, NETIF_STATS_RX_PACKETS); |
2124 | STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT); |
2125 | bpf_tap_packet_in(interface: pcb->ipsec_ifp, DLT_RAW, packet: rx_ph, NULL, header_len: 0); |
2126 | |
2127 | rx_ring_stats.kcrsi_slots_transferred++; |
2128 | rx_ring_stats.kcrsi_bytes_transferred += length; |
2129 | |
2130 | if (!pcb->ipsec_ext_ifdata_stats) { |
2131 | ifnet_stat_increment_in(interface: pcb->ipsec_ifp, packets_in: 1, bytes_in: (uint16_t)length, errors_in: 0); |
2132 | } |
2133 | |
2134 | mbuf_freem(mbuf: data); |
2135 | |
2136 | rx_pslot = rx_slot; |
2137 | rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL); |
2138 | } |
2139 | |
2140 | done: |
2141 | if (tx_pslot) { |
2142 | kern_channel_advance_slot(kring: tx_ring, slot: tx_pslot); |
2143 | kern_channel_increment_ring_net_stats(ring: tx_ring, pcb->ipsec_ifp, stats: &tx_ring_stats); |
2144 | (void)kern_channel_reclaim(tx_ring); |
2145 | } |
2146 | |
2147 | // Unlock first, then exit ring |
2148 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
2149 | if (tx_ring != NULL) { |
2150 | if (tx_pslot != NULL) { |
2151 | kern_channel_notify(tx_ring, flags: 0); |
2152 | } |
2153 | kr_exit(tx_ring); |
2154 | } |
2155 | |
2156 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
2157 | } |
2158 | |
2159 | if (rx_pslot) { |
2160 | kern_channel_advance_slot(kring: rx_ring, slot: rx_pslot); |
2161 | kern_channel_increment_ring_net_stats(ring: rx_ring, pcb->ipsec_ifp, stats: &rx_ring_stats); |
2162 | } |
2163 | |
2164 | |
2165 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
2166 | |
2167 | ipsec_data_move_end(pcb); |
2168 | return 0; |
2169 | } |
2170 | |
2171 | static errno_t |
2172 | ipsec_transform_kpipe_pkt_to_netif_pkt(struct ipsec_pcb *pcb, |
2173 | struct kern_channel_ring_stat_increment *tx_ring_stats, |
2174 | struct netif_stats *nifs, kern_packet_t kpipe_ph, kern_packet_t netif_ph) |
2175 | { |
2176 | kern_buflet_t kpipe_buf = NULL, netif_buf = NULL; |
2177 | uint8_t *kpipe_baddr = NULL, *netif_baddr = NULL; |
2178 | uuid_t flow_uuid; |
2179 | size_t iphlen = 0; |
2180 | uint32_t kpipe_buf_len = 0, netif_buf_lim = 0; |
2181 | int err = 0; |
2182 | |
2183 | VERIFY(kpipe_ph != 0); |
2184 | VERIFY(netif_ph != 0); |
2185 | VERIFY(pcb != NULL); |
2186 | VERIFY(tx_ring_stats != NULL); |
2187 | VERIFY(nifs != NULL); |
2188 | |
2189 | kpipe_buf = kern_packet_get_next_buflet(kpipe_ph, NULL); |
2190 | VERIFY(kpipe_buf != NULL); |
2191 | kpipe_baddr = kern_buflet_get_data_address(kpipe_buf); |
2192 | VERIFY(kpipe_baddr != NULL); |
2193 | kpipe_baddr += kern_buflet_get_data_offset(kpipe_buf); |
2194 | kpipe_buf_len = kern_buflet_get_data_length(kpipe_buf); |
2195 | |
2196 | netif_buf = kern_packet_get_next_buflet(netif_ph, NULL); |
2197 | VERIFY(netif_buf != NULL); |
2198 | netif_baddr = kern_buflet_get_data_address(netif_buf); |
2199 | VERIFY(netif_baddr != NULL); |
2200 | netif_baddr += kern_buflet_get_data_offset(netif_buf); |
2201 | netif_buf_lim = __buflet_get_data_limit(buf: netif_buf); |
2202 | netif_buf_lim -= __buflet_get_data_offset(buf: netif_buf); |
2203 | |
2204 | if (kpipe_buf_len > pcb->ipsec_slot_size) { |
2205 | os_log_info(OS_LOG_DEFAULT, |
2206 | "ipsec_transform_kpipe_pkt_to_netif_pkt %s: kpipe buffer length " |
2207 | "%u > pcb ipsec slot size %u" , pcb->ipsec_ifp->if_xname, |
2208 | kpipe_buf_len, pcb->ipsec_slot_size); |
2209 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2210 | err = EMSGSIZE; |
2211 | goto bad; |
2212 | } |
2213 | |
2214 | tx_ring_stats->kcrsi_slots_transferred++; |
2215 | tx_ring_stats->kcrsi_bytes_transferred += kpipe_buf_len; |
2216 | |
2217 | if (__improbable(kpipe_buf_len < sizeof(struct ip))) { |
2218 | os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad " |
2219 | "packet length %u\n" , pcb->ipsec_ifp->if_xname, kpipe_buf_len); |
2220 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2221 | err = EBADMSG; |
2222 | goto bad; |
2223 | } |
2224 | |
2225 | struct ip *ip = (struct ip *)(void *)kpipe_baddr; |
2226 | ASSERT(IP_HDR_ALIGNED_P(ip)); |
2227 | |
2228 | u_int ip_vers = ip->ip_v; |
2229 | switch (ip_vers) { |
2230 | case IPVERSION: { |
2231 | #ifdef _IP_VHL |
2232 | iphlen = IP_VHL_HL(ip->ip_vhl) << 2; |
2233 | #else /* _IP_VHL */ |
2234 | iphlen = ip->ip_hl << 2; |
2235 | #endif /* _IP_VHL */ |
2236 | break; |
2237 | } |
2238 | case 6: { |
2239 | iphlen = sizeof(struct ip6_hdr); |
2240 | break; |
2241 | } |
2242 | default: { |
2243 | os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad " |
2244 | "ip version %u\n" , pcb->ipsec_ifp->if_xname, ip_vers); |
2245 | err = EBADMSG; |
2246 | goto bad; |
2247 | } |
2248 | } |
2249 | |
2250 | if (__improbable(kpipe_buf_len < iphlen)) { |
2251 | os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad " |
2252 | "packet length %u\n" , pcb->ipsec_ifp->if_xname, kpipe_buf_len); |
2253 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2254 | err = EBADMSG; |
2255 | goto bad; |
2256 | } |
2257 | |
2258 | if (__improbable(netif_buf_lim < iphlen)) { |
2259 | os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - netif " |
2260 | "buffer length %u too short\n" , pcb->ipsec_ifp->if_xname, netif_buf_lim); |
2261 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2262 | err = EBADMSG; |
2263 | goto bad; |
2264 | } |
2265 | |
2266 | memcpy(dst: netif_baddr, src: kpipe_baddr, n: iphlen); |
2267 | __buflet_set_data_length(buf: netif_buf, dlen: (uint16_t)iphlen); |
2268 | |
2269 | lck_mtx_lock(lck: &pcb->ipsec_kpipe_decrypt_lock); |
2270 | err = esp_kpipe_input(pcb->ipsec_ifp, kpipe_ph, netif_ph); |
2271 | lck_mtx_unlock(lck: &pcb->ipsec_kpipe_decrypt_lock); |
2272 | |
2273 | if (__improbable((err != 0))) { |
2274 | goto bad; |
2275 | } |
2276 | |
2277 | kern_packet_get_flow_uuid(kpipe_ph, &flow_uuid); |
2278 | uint8_t *id_8 = (uint8_t *)(uintptr_t)flow_uuid; |
2279 | if (__improbable((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT)) { |
2280 | os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s: wake packet " |
2281 | "flag is set\n" , pcb->ipsec_ifp->if_xname); |
2282 | __packet_set_wake_flag(ph: netif_ph); |
2283 | } |
2284 | |
2285 | kern_packet_clear_flow_uuid(netif_ph); |
2286 | err = kern_buflet_set_data_offset(netif_buf, 0); |
2287 | VERIFY(err == 0); |
2288 | err = kern_packet_set_link_header_offset(netif_ph, 0); |
2289 | VERIFY(err == 0); |
2290 | err = kern_packet_set_network_header_offset(netif_ph, 0); |
2291 | VERIFY(err == 0); |
2292 | err = kern_packet_finalize(netif_ph); |
2293 | VERIFY(err == 0); |
2294 | |
2295 | return 0; |
2296 | bad: |
2297 | STATS_INC(nifs, NETIF_STATS_DROP); |
2298 | return err; |
2299 | } |
2300 | |
2301 | |
2302 | static errno_t |
2303 | ipsec_netif_sync_rx_packet(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
2304 | kern_channel_ring_t rx_ring, uint32_t flags) |
2305 | { |
2306 | #pragma unused(nxprov) |
2307 | #pragma unused(flags) |
2308 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
2309 | struct kern_channel_ring_stat_increment rx_ring_stats; |
2310 | |
2311 | struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats; |
2312 | |
2313 | if (!ipsec_data_move_begin(pcb)) { |
2314 | os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n" , __func__, if_name(pcb->ipsec_ifp)); |
2315 | return 0; |
2316 | } |
2317 | |
2318 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
2319 | |
2320 | // Reclaim user-released slots |
2321 | (void) kern_channel_reclaim(rx_ring); |
2322 | |
2323 | STATS_INC(nifs, NETIF_STATS_RX_SYNC); |
2324 | |
2325 | uint32_t avail = kern_channel_available_slot_count(ring: rx_ring); |
2326 | if (avail == 0) { |
2327 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
2328 | ipsec_data_move_end(pcb); |
2329 | return 0; |
2330 | } |
2331 | |
2332 | struct kern_pbufpool *rx_pp = rx_ring->ckr_pp; |
2333 | VERIFY(rx_pp != NULL); |
2334 | bzero(s: &rx_ring_stats, n: sizeof(rx_ring_stats)); |
2335 | kern_channel_slot_t rx_pslot = NULL; |
2336 | kern_channel_slot_t rx_slot = kern_channel_get_next_slot(kring: rx_ring, NULL, NULL); |
2337 | |
2338 | while (rx_slot != NULL) { |
2339 | // Check for a waiting packet |
2340 | lck_mtx_lock(lck: &pcb->ipsec_input_chain_lock); |
2341 | mbuf_t data = pcb->ipsec_input_chain; |
2342 | if (data == NULL) { |
2343 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
2344 | break; |
2345 | } |
2346 | |
2347 | // Allocate rx packet |
2348 | kern_packet_t rx_ph = 0; |
2349 | errno_t error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph); |
2350 | if (__improbable(error != 0)) { |
2351 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT); |
2352 | STATS_INC(nifs, NETIF_STATS_DROP); |
2353 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
2354 | break; |
2355 | } |
2356 | |
2357 | // Advance waiting packets |
2358 | if (pcb->ipsec_input_chain_count > 0) { |
2359 | pcb->ipsec_input_chain_count--; |
2360 | } |
2361 | pcb->ipsec_input_chain = data->m_nextpkt; |
2362 | data->m_nextpkt = NULL; |
2363 | if (pcb->ipsec_input_chain == NULL) { |
2364 | pcb->ipsec_input_chain_last = NULL; |
2365 | } |
2366 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
2367 | |
2368 | size_t length = mbuf_pkthdr_len(mbuf: data); |
2369 | |
2370 | if (length < sizeof(struct ip)) { |
2371 | // Flush data |
2372 | mbuf_freem(mbuf: data); |
2373 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
2374 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2375 | STATS_INC(nifs, NETIF_STATS_DROP); |
2376 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n" , |
2377 | pcb->ipsec_ifp->if_xname, length, sizeof(struct ip)); |
2378 | continue; |
2379 | } |
2380 | |
2381 | uint32_t af = 0; |
2382 | struct ip *ip = mtod(data, struct ip *); |
2383 | u_int ip_version = ip->ip_v; |
2384 | switch (ip_version) { |
2385 | case 4: { |
2386 | af = AF_INET; |
2387 | break; |
2388 | } |
2389 | case 6: { |
2390 | af = AF_INET6; |
2391 | break; |
2392 | } |
2393 | default: { |
2394 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n" , |
2395 | pcb->ipsec_ifp->if_xname, ip_version); |
2396 | break; |
2397 | } |
2398 | } |
2399 | |
2400 | if (length > PP_BUF_SIZE_DEF(rx_pp) || |
2401 | (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) { |
2402 | // We need to fragment to send up into the netif |
2403 | |
2404 | u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp); |
2405 | if (pcb->ipsec_frag_size_set && |
2406 | pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) { |
2407 | fragment_mtu = pcb->ipsec_input_frag_size; |
2408 | } |
2409 | |
2410 | mbuf_t fragment_chain = NULL; |
2411 | switch (af) { |
2412 | case AF_INET: { |
2413 | // ip_fragment expects the length in host order |
2414 | ip->ip_len = ntohs(ip->ip_len); |
2415 | |
2416 | // ip_fragment will modify the original data, don't free |
2417 | int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE); |
2418 | if (fragment_error == 0 && data != NULL) { |
2419 | fragment_chain = data; |
2420 | } else { |
2421 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2422 | STATS_INC(nifs, NETIF_STATS_DROP); |
2423 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n" , |
2424 | pcb->ipsec_ifp->if_xname, length, fragment_error); |
2425 | } |
2426 | break; |
2427 | } |
2428 | case AF_INET6: { |
2429 | if (length < sizeof(struct ip6_hdr)) { |
2430 | mbuf_freem(mbuf: data); |
2431 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2432 | STATS_INC(nifs, NETIF_STATS_DROP); |
2433 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n" , |
2434 | pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr)); |
2435 | } else { |
2436 | // ip6_do_fragmentation will free the original data on success only |
2437 | struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *); |
2438 | |
2439 | int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr), |
2440 | ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid((uint64_t)data))); |
2441 | if (fragment_error == 0 && data != NULL) { |
2442 | fragment_chain = data; |
2443 | } else { |
2444 | mbuf_freem(mbuf: data); |
2445 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2446 | STATS_INC(nifs, NETIF_STATS_DROP); |
2447 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n" , |
2448 | pcb->ipsec_ifp->if_xname, length, fragment_error); |
2449 | } |
2450 | } |
2451 | break; |
2452 | } |
2453 | default: { |
2454 | // Cannot fragment unknown families |
2455 | mbuf_freem(mbuf: data); |
2456 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2457 | STATS_INC(nifs, NETIF_STATS_DROP); |
2458 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n" , |
2459 | pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp)); |
2460 | break; |
2461 | } |
2462 | } |
2463 | |
2464 | if (fragment_chain != NULL) { |
2465 | // Add fragments to chain before continuing |
2466 | lck_mtx_lock(lck: &pcb->ipsec_input_chain_lock); |
2467 | if (pcb->ipsec_input_chain != NULL) { |
2468 | pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain; |
2469 | } else { |
2470 | pcb->ipsec_input_chain = fragment_chain; |
2471 | } |
2472 | pcb->ipsec_input_chain_count++; |
2473 | while (fragment_chain->m_nextpkt) { |
2474 | VERIFY(fragment_chain != fragment_chain->m_nextpkt); |
2475 | fragment_chain = fragment_chain->m_nextpkt; |
2476 | pcb->ipsec_input_chain_count++; |
2477 | } |
2478 | pcb->ipsec_input_chain_last = fragment_chain; |
2479 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
2480 | } |
2481 | |
2482 | // Make sure to free unused rx packet |
2483 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
2484 | |
2485 | continue; |
2486 | } |
2487 | |
2488 | mbuf_pkthdr_setrcvif(mbuf: data, ifp: pcb->ipsec_ifp); |
2489 | |
2490 | // Fillout rx packet |
2491 | kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL); |
2492 | VERIFY(rx_buf != NULL); |
2493 | void *rx_baddr = kern_buflet_get_data_address(rx_buf); |
2494 | VERIFY(rx_baddr != NULL); |
2495 | |
2496 | // Copy-in data from mbuf to buflet |
2497 | mbuf_copydata(mbuf: data, offset: 0, length, out_data: (void *)rx_baddr); |
2498 | kern_packet_clear_flow_uuid(rx_ph); // Zero flow id |
2499 | |
2500 | // Finalize and attach the packet |
2501 | error = kern_buflet_set_data_offset(rx_buf, 0); |
2502 | VERIFY(error == 0); |
2503 | error = kern_buflet_set_data_length(rx_buf, (uint16_t)length); |
2504 | VERIFY(error == 0); |
2505 | error = kern_packet_set_headroom(rx_ph, 0); |
2506 | VERIFY(error == 0); |
2507 | error = kern_packet_finalize(rx_ph); |
2508 | VERIFY(error == 0); |
2509 | error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph); |
2510 | VERIFY(error == 0); |
2511 | |
2512 | STATS_INC(nifs, NETIF_STATS_RX_PACKETS); |
2513 | STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF); |
2514 | bpf_tap_packet_in(interface: pcb->ipsec_ifp, DLT_RAW, packet: rx_ph, NULL, header_len: 0); |
2515 | |
2516 | rx_ring_stats.kcrsi_slots_transferred++; |
2517 | rx_ring_stats.kcrsi_bytes_transferred += length; |
2518 | |
2519 | if (!pcb->ipsec_ext_ifdata_stats) { |
2520 | ifnet_stat_increment_in(interface: pcb->ipsec_ifp, packets_in: 1, bytes_in: (uint16_t)length, errors_in: 0); |
2521 | } |
2522 | |
2523 | mbuf_freem(mbuf: data); |
2524 | |
2525 | // Advance ring |
2526 | rx_pslot = rx_slot; |
2527 | rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL); |
2528 | } |
2529 | |
2530 | for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) { |
2531 | struct kern_channel_ring_stat_increment tx_ring_stats = {}; |
2532 | kern_channel_slot_t tx_pslot = NULL; |
2533 | kern_channel_slot_t tx_slot = NULL; |
2534 | |
2535 | kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx]; |
2536 | if (tx_ring == NULL) { |
2537 | // Net-If TX ring not set up yet, nothing to read |
2538 | goto done; |
2539 | } |
2540 | |
2541 | // Unlock ipsec before entering ring |
2542 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
2543 | |
2544 | (void)kr_enter(tx_ring, TRUE); |
2545 | |
2546 | // Lock again after entering and validate |
2547 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
2548 | |
2549 | if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) { |
2550 | goto done; |
2551 | } |
2552 | |
2553 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL); |
2554 | if (tx_slot == NULL) { |
2555 | // Nothing to read, don't bother signalling |
2556 | goto done; |
2557 | } |
2558 | |
2559 | while (rx_slot != NULL && tx_slot != NULL) { |
2560 | errno_t error = 0; |
2561 | |
2562 | // Allocate rx packet |
2563 | kern_packet_t rx_ph = 0; |
2564 | error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph); |
2565 | if (__improbable(error != 0)) { |
2566 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT); |
2567 | STATS_INC(nifs, NETIF_STATS_DROP); |
2568 | break; |
2569 | } |
2570 | |
2571 | kern_packet_t tx_ph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot); |
2572 | tx_pslot = tx_slot; |
2573 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
2574 | if (tx_ph == 0) { |
2575 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
2576 | continue; |
2577 | } |
2578 | |
2579 | error = ipsec_transform_kpipe_pkt_to_netif_pkt(pcb, |
2580 | tx_ring_stats: &tx_ring_stats, nifs, kpipe_ph: tx_ph, netif_ph: rx_ph); |
2581 | if (error != 0) { |
2582 | // Failed to get decrypted packet |
2583 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
2584 | continue; |
2585 | } |
2586 | |
2587 | error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph); |
2588 | VERIFY(error == 0); |
2589 | |
2590 | STATS_INC(nifs, NETIF_STATS_RX_PACKETS); |
2591 | STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT); |
2592 | |
2593 | bpf_tap_packet_in(interface: pcb->ipsec_ifp, DLT_RAW, packet: rx_ph, NULL, header_len: 0); |
2594 | |
2595 | rx_ring_stats.kcrsi_slots_transferred++; |
2596 | rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph); |
2597 | |
2598 | if (!pcb->ipsec_ext_ifdata_stats) { |
2599 | ifnet_stat_increment_in(interface: pcb->ipsec_ifp, packets_in: 1, |
2600 | bytes_in: kern_packet_get_data_length(rx_ph), errors_in: 0); |
2601 | } |
2602 | |
2603 | rx_pslot = rx_slot; |
2604 | rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL); |
2605 | } |
2606 | |
2607 | done: |
2608 | if (tx_pslot) { |
2609 | kern_channel_advance_slot(kring: tx_ring, slot: tx_pslot); |
2610 | kern_channel_increment_ring_net_stats(ring: tx_ring, pcb->ipsec_ifp, stats: &tx_ring_stats); |
2611 | (void)kern_channel_reclaim(tx_ring); |
2612 | } |
2613 | |
2614 | // Unlock first, then exit ring |
2615 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
2616 | if (tx_ring != NULL) { |
2617 | if (tx_pslot != NULL) { |
2618 | kern_channel_notify(tx_ring, flags: 0); |
2619 | } |
2620 | kr_exit(tx_ring); |
2621 | } |
2622 | |
2623 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
2624 | } |
2625 | |
2626 | if (rx_pslot) { |
2627 | kern_channel_advance_slot(kring: rx_ring, slot: rx_pslot); |
2628 | kern_channel_increment_ring_net_stats(ring: rx_ring, pcb->ipsec_ifp, stats: &rx_ring_stats); |
2629 | } |
2630 | |
2631 | |
2632 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
2633 | |
2634 | ipsec_data_move_end(pcb); |
2635 | return 0; |
2636 | } |
2637 | |
2638 | static errno_t |
2639 | ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
2640 | kern_channel_ring_t rx_ring, uint32_t flags) |
2641 | { |
2642 | if (__improbable(ipsec_kpipe_mbuf == 1)) { |
2643 | return ipsec_netif_sync_rx_mbuf(nxprov, nexus, rx_ring, flags); |
2644 | } else { |
2645 | return ipsec_netif_sync_rx_packet(nxprov, nexus, rx_ring, flags); |
2646 | } |
2647 | } |
2648 | |
2649 | static errno_t |
2650 | ipsec_nexus_ifattach(struct ipsec_pcb *pcb, |
2651 | struct ifnet_init_eparams *init_params, |
2652 | struct ifnet **ifp) |
2653 | { |
2654 | errno_t err; |
2655 | nexus_controller_t controller = kern_nexus_shared_controller(); |
2656 | struct kern_nexus_net_init net_init; |
2657 | struct kern_pbufpool_init pp_init; |
2658 | |
2659 | nexus_name_t provider_name; |
2660 | snprintf((char *)provider_name, count: sizeof(provider_name), |
2661 | "com.apple.netif.%s" , pcb->ipsec_if_xname); |
2662 | |
2663 | struct kern_nexus_provider_init prov_init = { |
2664 | .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION, |
2665 | .nxpi_flags = NXPIF_VIRTUAL_DEVICE, |
2666 | .nxpi_pre_connect = ipsec_nexus_pre_connect, |
2667 | .nxpi_connected = ipsec_nexus_connected, |
2668 | .nxpi_pre_disconnect = ipsec_netif_pre_disconnect, |
2669 | .nxpi_disconnected = ipsec_nexus_disconnected, |
2670 | .nxpi_ring_init = ipsec_netif_ring_init, |
2671 | .nxpi_ring_fini = ipsec_netif_ring_fini, |
2672 | .nxpi_slot_init = NULL, |
2673 | .nxpi_slot_fini = NULL, |
2674 | .nxpi_sync_tx = ipsec_netif_sync_tx, |
2675 | .nxpi_sync_rx = ipsec_netif_sync_rx, |
2676 | .nxpi_tx_doorbell = ipsec_netif_tx_doorbell, |
2677 | }; |
2678 | |
2679 | nexus_attr_t nxa = NULL; |
2680 | err = kern_nexus_attr_create(&nxa); |
2681 | IPSEC_IF_VERIFY(err == 0); |
2682 | if (err != 0) { |
2683 | os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n" , |
2684 | __func__, err); |
2685 | goto failed; |
2686 | } |
2687 | |
2688 | uint64_t slot_buffer_size = pcb->ipsec_slot_size; |
2689 | err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_SLOT_BUF_SIZE, value: slot_buffer_size); |
2690 | VERIFY(err == 0); |
2691 | |
2692 | // Reset ring size for netif nexus to limit memory usage |
2693 | uint64_t ring_size = pcb->ipsec_netif_ring_size; |
2694 | err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_TX_SLOTS, value: ring_size); |
2695 | VERIFY(err == 0); |
2696 | err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_RX_SLOTS, value: ring_size); |
2697 | VERIFY(err == 0); |
2698 | |
2699 | assert(err == 0); |
2700 | |
2701 | if (ipsec_in_wmm_mode(pcb)) { |
2702 | os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n" , |
2703 | __func__, pcb->ipsec_if_xname); |
2704 | |
2705 | init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED; |
2706 | |
2707 | err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_TX_RINGS, |
2708 | IPSEC_NETIF_WMM_TX_RING_COUNT); |
2709 | VERIFY(err == 0); |
2710 | err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_RX_RINGS, |
2711 | IPSEC_NETIF_WMM_RX_RING_COUNT); |
2712 | VERIFY(err == 0); |
2713 | |
2714 | err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_QMAP, value: NEXUS_QMAP_TYPE_WMM); |
2715 | VERIFY(err == 0); |
2716 | } |
2717 | |
2718 | pcb->ipsec_netif_txring_size = ring_size; |
2719 | |
2720 | bzero(s: &pp_init, n: sizeof(pp_init)); |
2721 | pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION; |
2722 | pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE; |
2723 | // Note: we need more packets than can be held in the tx and rx rings because |
2724 | // packets can also be in the AQM queue(s) |
2725 | pp_init.kbi_packets = pcb->ipsec_netif_ring_size * (2 * pcb->ipsec_kpipe_count + 1); |
2726 | pp_init.kbi_bufsize = pcb->ipsec_slot_size; |
2727 | pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE; |
2728 | pp_init.kbi_max_frags = 1; |
2729 | (void) snprintf((char *)pp_init.kbi_name, count: sizeof(pp_init.kbi_name), |
2730 | "%s" , provider_name); |
2731 | pp_init.kbi_ctx = NULL; |
2732 | pp_init.kbi_ctx_retain = NULL; |
2733 | pp_init.kbi_ctx_release = NULL; |
2734 | |
2735 | err = kern_pbufpool_create(&pp_init, &pcb->ipsec_netif_pp, NULL); |
2736 | if (err != 0) { |
2737 | os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n" , __func__, err); |
2738 | goto failed; |
2739 | } |
2740 | |
2741 | err = kern_nexus_controller_register_provider(ctl: controller, |
2742 | dom_prov_uuid: ipsec_nx_dom_prov, |
2743 | provider_name, |
2744 | init: &prov_init, |
2745 | init_len: sizeof(prov_init), |
2746 | nxa, |
2747 | nx_prov_uuid: &pcb->ipsec_nx.if_provider); |
2748 | IPSEC_IF_VERIFY(err == 0); |
2749 | if (err != 0) { |
2750 | os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n" , |
2751 | __func__, err); |
2752 | goto failed; |
2753 | } |
2754 | |
2755 | bzero(s: &net_init, n: sizeof(net_init)); |
2756 | net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION; |
2757 | net_init.nxneti_flags = 0; |
2758 | net_init.nxneti_eparams = init_params; |
2759 | net_init.nxneti_lladdr = NULL; |
2760 | net_init.nxneti_prepare = ipsec_netif_prepare; |
2761 | net_init.nxneti_rx_pbufpool = pcb->ipsec_netif_pp; |
2762 | net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp; |
2763 | err = kern_nexus_controller_alloc_net_provider_instance(ctl: controller, |
2764 | nx_prov_uuid: pcb->ipsec_nx.if_provider, |
2765 | nexus_context: pcb, |
2766 | NULL, |
2767 | nx_uuid: &pcb->ipsec_nx.if_instance, |
2768 | init: &net_init, |
2769 | ifp); |
2770 | IPSEC_IF_VERIFY(err == 0); |
2771 | if (err != 0) { |
2772 | os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n" , |
2773 | __func__, err); |
2774 | kern_nexus_controller_deregister_provider(ctl: controller, |
2775 | nx_prov_uuid: pcb->ipsec_nx.if_provider); |
2776 | uuid_clear(uu: pcb->ipsec_nx.if_provider); |
2777 | goto failed; |
2778 | } |
2779 | |
2780 | failed: |
2781 | if (nxa) { |
2782 | kern_nexus_attr_destroy(attr: nxa); |
2783 | } |
2784 | if (err && pcb->ipsec_netif_pp != NULL) { |
2785 | kern_pbufpool_destroy(pcb->ipsec_netif_pp); |
2786 | pcb->ipsec_netif_pp = NULL; |
2787 | } |
2788 | return err; |
2789 | } |
2790 | |
2791 | static void |
2792 | ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance) |
2793 | { |
2794 | nexus_controller_t controller = kern_nexus_shared_controller(); |
2795 | errno_t err; |
2796 | |
2797 | if (!uuid_is_null(uu: instance)) { |
2798 | err = kern_nexus_controller_free_provider_instance(ctl: controller, |
2799 | nx_uuid: instance); |
2800 | if (err != 0) { |
2801 | os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n" , |
2802 | __func__, err); |
2803 | } |
2804 | uuid_clear(uu: instance); |
2805 | } |
2806 | if (!uuid_is_null(uu: provider)) { |
2807 | err = kern_nexus_controller_deregister_provider(ctl: controller, |
2808 | nx_prov_uuid: provider); |
2809 | if (err != 0) { |
2810 | os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n" , __func__, err); |
2811 | } |
2812 | uuid_clear(uu: provider); |
2813 | } |
2814 | return; |
2815 | } |
2816 | |
2817 | static void |
2818 | ipsec_nexus_detach(struct ipsec_pcb *pcb) |
2819 | { |
2820 | ipsec_nx_t nx = &pcb->ipsec_nx; |
2821 | nexus_controller_t controller = kern_nexus_shared_controller(); |
2822 | errno_t err; |
2823 | |
2824 | if (!uuid_is_null(uu: nx->fsw_device)) { |
2825 | err = kern_nexus_ifdetach(ctl: controller, |
2826 | nx_uuid: nx->fsw_instance, |
2827 | nx_if_uuid: nx->fsw_device); |
2828 | if (err != 0) { |
2829 | os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n" , |
2830 | __func__, err); |
2831 | } |
2832 | } |
2833 | |
2834 | ipsec_detach_provider_and_instance(provider: nx->fsw_provider, |
2835 | instance: nx->fsw_instance); |
2836 | ipsec_detach_provider_and_instance(provider: nx->if_provider, |
2837 | instance: nx->if_instance); |
2838 | |
2839 | if (pcb->ipsec_netif_pp != NULL) { |
2840 | kern_pbufpool_destroy(pcb->ipsec_netif_pp); |
2841 | pcb->ipsec_netif_pp = NULL; |
2842 | } |
2843 | memset(s: nx, c: 0, n: sizeof(*nx)); |
2844 | } |
2845 | |
2846 | static errno_t |
2847 | ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb, |
2848 | const char *type_name, |
2849 | const char *ifname, |
2850 | uuid_t *provider, uuid_t *instance) |
2851 | { |
2852 | nexus_attr_t attr = NULL; |
2853 | nexus_controller_t controller = kern_nexus_shared_controller(); |
2854 | uuid_t dom_prov; |
2855 | errno_t err; |
2856 | struct kern_nexus_init init; |
2857 | nexus_name_t provider_name; |
2858 | |
2859 | err = kern_nexus_get_default_domain_provider(type: NEXUS_TYPE_FLOW_SWITCH, |
2860 | dom_prov_uuid: &dom_prov); |
2861 | IPSEC_IF_VERIFY(err == 0); |
2862 | if (err != 0) { |
2863 | os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n" , |
2864 | __func__, type_name, err); |
2865 | goto failed; |
2866 | } |
2867 | |
2868 | err = kern_nexus_attr_create(&attr); |
2869 | IPSEC_IF_VERIFY(err == 0); |
2870 | if (err != 0) { |
2871 | os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n" , |
2872 | __func__, err); |
2873 | goto failed; |
2874 | } |
2875 | |
2876 | uint64_t slot_buffer_size = pcb->ipsec_slot_size; |
2877 | err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_SLOT_BUF_SIZE, value: slot_buffer_size); |
2878 | VERIFY(err == 0); |
2879 | |
2880 | // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif. |
2881 | uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size; |
2882 | err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_TX_SLOTS, value: tx_ring_size); |
2883 | VERIFY(err == 0); |
2884 | uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size; |
2885 | err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_RX_SLOTS, value: rx_ring_size); |
2886 | VERIFY(err == 0); |
2887 | /* |
2888 | * Configure flowswitch to use super-packet (multi-buflet). |
2889 | * This allows flowswitch to perform intra-stack packet aggregation. |
2890 | */ |
2891 | err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_MAX_FRAGS, |
2892 | NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1); |
2893 | VERIFY(err == 0); |
2894 | |
2895 | snprintf((char *)provider_name, count: sizeof(provider_name), |
2896 | "com.apple.%s.%s" , type_name, ifname); |
2897 | err = kern_nexus_controller_register_provider(ctl: controller, |
2898 | dom_prov_uuid: dom_prov, |
2899 | provider_name, |
2900 | NULL, |
2901 | init_len: 0, |
2902 | nxa: attr, |
2903 | nx_prov_uuid: provider); |
2904 | kern_nexus_attr_destroy(attr); |
2905 | attr = NULL; |
2906 | IPSEC_IF_VERIFY(err == 0); |
2907 | if (err != 0) { |
2908 | os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n" , |
2909 | __func__, type_name, err); |
2910 | goto failed; |
2911 | } |
2912 | bzero(s: &init, n: sizeof(init)); |
2913 | init.nxi_version = KERN_NEXUS_CURRENT_VERSION; |
2914 | err = kern_nexus_controller_alloc_provider_instance(ctl: controller, |
2915 | nx_prov_uuid: *provider, |
2916 | NULL, NULL, |
2917 | nx_uuid: instance, init: &init); |
2918 | IPSEC_IF_VERIFY(err == 0); |
2919 | if (err != 0) { |
2920 | os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n" , |
2921 | __func__, type_name, err); |
2922 | kern_nexus_controller_deregister_provider(ctl: controller, |
2923 | nx_prov_uuid: *provider); |
2924 | uuid_clear(uu: *provider); |
2925 | } |
2926 | failed: |
2927 | return err; |
2928 | } |
2929 | |
2930 | static errno_t |
2931 | ipsec_flowswitch_attach(struct ipsec_pcb *pcb) |
2932 | { |
2933 | nexus_controller_t controller = kern_nexus_shared_controller(); |
2934 | errno_t err = 0; |
2935 | ipsec_nx_t nx = &pcb->ipsec_nx; |
2936 | |
2937 | // Allocate flowswitch |
2938 | err = ipsec_create_fs_provider_and_instance(pcb, |
2939 | type_name: "flowswitch" , |
2940 | ifname: pcb->ipsec_ifp->if_xname, |
2941 | provider: &nx->fsw_provider, |
2942 | instance: &nx->fsw_instance); |
2943 | if (err != 0) { |
2944 | os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n" , |
2945 | __func__); |
2946 | goto failed; |
2947 | } |
2948 | |
2949 | // Attach flowswitch to device port |
2950 | err = kern_nexus_ifattach(controller, nx_uuid: nx->fsw_instance, |
2951 | NULL, nx_attachee: nx->if_instance, |
2952 | FALSE, nx_if_uuid: &nx->fsw_device); |
2953 | if (err != 0) { |
2954 | os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n" , __func__, err); |
2955 | goto failed; |
2956 | } |
2957 | |
2958 | // Extract the agent UUID and save for later |
2959 | struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false); |
2960 | if (flowswitch_nx != NULL) { |
2961 | struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx); |
2962 | if (flowswitch != NULL) { |
2963 | FSW_RLOCK(flowswitch); |
2964 | uuid_copy(dst: nx->fsw_agent, src: flowswitch->fsw_agent_uuid); |
2965 | FSW_UNLOCK(flowswitch); |
2966 | } else { |
2967 | os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - flowswitch is NULL\n" ); |
2968 | } |
2969 | nx_release(flowswitch_nx); |
2970 | } else { |
2971 | os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n" ); |
2972 | } |
2973 | |
2974 | return 0; |
2975 | |
2976 | failed: |
2977 | ipsec_nexus_detach(pcb); |
2978 | |
2979 | errno_t detach_error = 0; |
2980 | if ((detach_error = ifnet_detach(interface: pcb->ipsec_ifp)) != 0) { |
2981 | panic("ipsec_flowswitch_attach - ifnet_detach failed: %d" , detach_error); |
2982 | /* NOT REACHED */ |
2983 | } |
2984 | |
2985 | return err; |
2986 | } |
2987 | |
2988 | #pragma mark Kernel Pipe Nexus |
2989 | |
2990 | static errno_t |
2991 | ipsec_register_kernel_pipe_nexus(struct ipsec_pcb *pcb) |
2992 | { |
2993 | nexus_attr_t nxa = NULL; |
2994 | errno_t result; |
2995 | |
2996 | lck_mtx_lock(lck: &ipsec_lock); |
2997 | if (ipsec_ncd_refcount++) { |
2998 | lck_mtx_unlock(lck: &ipsec_lock); |
2999 | return 0; |
3000 | } |
3001 | |
3002 | result = kern_nexus_controller_create(ctl: &ipsec_ncd); |
3003 | if (result) { |
3004 | os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n" , |
3005 | __FUNCTION__, result); |
3006 | goto done; |
3007 | } |
3008 | |
3009 | uuid_t dom_prov; |
3010 | result = kern_nexus_get_default_domain_provider( |
3011 | type: NEXUS_TYPE_KERNEL_PIPE, dom_prov_uuid: &dom_prov); |
3012 | if (result) { |
3013 | os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n" , |
3014 | __FUNCTION__, result); |
3015 | goto done; |
3016 | } |
3017 | |
3018 | struct kern_nexus_provider_init prov_init = { |
3019 | .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION, |
3020 | .nxpi_flags = NXPIF_VIRTUAL_DEVICE, |
3021 | .nxpi_pre_connect = ipsec_nexus_pre_connect, |
3022 | .nxpi_connected = ipsec_nexus_connected, |
3023 | .nxpi_pre_disconnect = ipsec_nexus_pre_disconnect, |
3024 | .nxpi_disconnected = ipsec_nexus_disconnected, |
3025 | .nxpi_ring_init = ipsec_kpipe_ring_init, |
3026 | .nxpi_ring_fini = ipsec_kpipe_ring_fini, |
3027 | .nxpi_slot_init = NULL, |
3028 | .nxpi_slot_fini = NULL, |
3029 | .nxpi_sync_tx = ipsec_kpipe_sync_tx, |
3030 | .nxpi_sync_rx = ipsec_kpipe_sync_rx, |
3031 | .nxpi_tx_doorbell = NULL, |
3032 | }; |
3033 | |
3034 | result = kern_nexus_attr_create(&nxa); |
3035 | if (result) { |
3036 | os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n" , |
3037 | __FUNCTION__, result); |
3038 | goto done; |
3039 | } |
3040 | |
3041 | uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE; |
3042 | result = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_SLOT_BUF_SIZE, value: slot_buffer_size); |
3043 | VERIFY(result == 0); |
3044 | |
3045 | // Reset ring size for kernel pipe nexus to limit memory usage |
3046 | // Note: It's better to have less on slots on the kpipe TX ring than the netif |
3047 | // so back pressure is applied at the AQM layer |
3048 | uint64_t ring_size = |
3049 | pcb->ipsec_kpipe_tx_ring_size != 0 ? pcb->ipsec_kpipe_tx_ring_size : |
3050 | pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size : |
3051 | if_ipsec_ring_size; |
3052 | result = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_TX_SLOTS, value: ring_size); |
3053 | VERIFY(result == 0); |
3054 | |
3055 | ring_size = |
3056 | pcb->ipsec_kpipe_rx_ring_size != 0 ? pcb->ipsec_kpipe_rx_ring_size : |
3057 | pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size : |
3058 | if_ipsec_ring_size; |
3059 | result = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_RX_SLOTS, value: ring_size); |
3060 | VERIFY(result == 0); |
3061 | |
3062 | result = kern_nexus_controller_register_provider(ctl: ipsec_ncd, |
3063 | dom_prov_uuid: dom_prov, |
3064 | (const uint8_t *)"com.apple.nexus.ipsec.kpipe" , |
3065 | init: &prov_init, |
3066 | init_len: sizeof(prov_init), |
3067 | nxa, |
3068 | nx_prov_uuid: &ipsec_kpipe_uuid); |
3069 | if (result) { |
3070 | os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n" , |
3071 | __FUNCTION__, result); |
3072 | goto done; |
3073 | } |
3074 | |
3075 | done: |
3076 | if (nxa) { |
3077 | kern_nexus_attr_destroy(attr: nxa); |
3078 | } |
3079 | |
3080 | if (result) { |
3081 | if (ipsec_ncd) { |
3082 | kern_nexus_controller_destroy(ctl: ipsec_ncd); |
3083 | ipsec_ncd = NULL; |
3084 | } |
3085 | ipsec_ncd_refcount = 0; |
3086 | } |
3087 | |
3088 | lck_mtx_unlock(lck: &ipsec_lock); |
3089 | |
3090 | return result; |
3091 | } |
3092 | |
3093 | static void |
3094 | ipsec_unregister_kernel_pipe_nexus(void) |
3095 | { |
3096 | lck_mtx_lock(lck: &ipsec_lock); |
3097 | |
3098 | VERIFY(ipsec_ncd_refcount > 0); |
3099 | |
3100 | if (--ipsec_ncd_refcount == 0) { |
3101 | kern_nexus_controller_destroy(ctl: ipsec_ncd); |
3102 | ipsec_ncd = NULL; |
3103 | } |
3104 | |
3105 | lck_mtx_unlock(lck: &ipsec_lock); |
3106 | } |
3107 | |
3108 | /* This structure only holds onto kpipe channels that need to be |
3109 | * freed in the future, but are cleared from the pcb under lock |
3110 | */ |
3111 | struct ipsec_detached_channels { |
3112 | int count; |
3113 | kern_pbufpool_t pp; |
3114 | uuid_t uuids[IPSEC_IF_MAX_RING_COUNT]; |
3115 | }; |
3116 | |
3117 | static void |
3118 | ipsec_detach_channels(struct ipsec_pcb *pcb, struct ipsec_detached_channels *dc) |
3119 | { |
3120 | LCK_RW_ASSERT(&pcb->ipsec_pcb_lock, LCK_RW_TYPE_EXCLUSIVE); |
3121 | |
3122 | if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) { |
3123 | for (int i = 0; i < IPSEC_IF_MAX_RING_COUNT; i++) { |
3124 | VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i])); |
3125 | } |
3126 | dc->count = 0; |
3127 | return; |
3128 | } |
3129 | |
3130 | dc->count = pcb->ipsec_kpipe_count; |
3131 | |
3132 | VERIFY(dc->count >= 0); |
3133 | VERIFY(dc->count <= IPSEC_IF_MAX_RING_COUNT); |
3134 | |
3135 | for (int i = 0; i < dc->count; i++) { |
3136 | VERIFY(!uuid_is_null(pcb->ipsec_kpipe_uuid[i])); |
3137 | uuid_copy(dst: dc->uuids[i], src: pcb->ipsec_kpipe_uuid[i]); |
3138 | uuid_clear(uu: pcb->ipsec_kpipe_uuid[i]); |
3139 | } |
3140 | for (int i = dc->count; i < IPSEC_IF_MAX_RING_COUNT; i++) { |
3141 | VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i])); |
3142 | } |
3143 | |
3144 | if (dc->count) { |
3145 | VERIFY(pcb->ipsec_kpipe_pp); |
3146 | } else { |
3147 | VERIFY(!pcb->ipsec_kpipe_pp); |
3148 | } |
3149 | |
3150 | dc->pp = pcb->ipsec_kpipe_pp; |
3151 | |
3152 | pcb->ipsec_kpipe_pp = NULL; |
3153 | |
3154 | ipsec_flag_clr(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED); |
3155 | } |
3156 | |
3157 | static void |
3158 | ipsec_free_channels(struct ipsec_detached_channels *dc) |
3159 | { |
3160 | if (!dc->count) { |
3161 | return; |
3162 | } |
3163 | |
3164 | for (int i = 0; i < dc->count; i++) { |
3165 | errno_t result; |
3166 | result = kern_nexus_controller_free_provider_instance(ctl: ipsec_ncd, nx_uuid: dc->uuids[i]); |
3167 | VERIFY(!result); |
3168 | } |
3169 | |
3170 | VERIFY(dc->pp); |
3171 | kern_pbufpool_destroy(dc->pp); |
3172 | |
3173 | ipsec_unregister_kernel_pipe_nexus(); |
3174 | |
3175 | memset(s: dc, c: 0, n: sizeof(*dc)); |
3176 | } |
3177 | |
3178 | static errno_t |
3179 | ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc) |
3180 | { |
3181 | struct kern_nexus_init init; |
3182 | struct kern_pbufpool_init pp_init; |
3183 | errno_t result; |
3184 | |
3185 | kauth_cred_t cred = kauth_cred_get(); |
3186 | result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, flags: 0); |
3187 | if (result) { |
3188 | return result; |
3189 | } |
3190 | |
3191 | VERIFY(pcb->ipsec_kpipe_count); |
3192 | VERIFY(!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)); |
3193 | |
3194 | result = ipsec_register_kernel_pipe_nexus(pcb); |
3195 | |
3196 | lck_rw_lock_exclusive(lck: &pcb->ipsec_pcb_lock); |
3197 | |
3198 | if (result) { |
3199 | os_log_error(OS_LOG_DEFAULT, "%s: %s failed to register kernel pipe nexus\n" , |
3200 | __func__, pcb->ipsec_if_xname); |
3201 | goto done; |
3202 | } |
3203 | |
3204 | VERIFY(ipsec_ncd); |
3205 | |
3206 | bzero(s: &pp_init, n: sizeof(pp_init)); |
3207 | pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION; |
3208 | pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE; |
3209 | // Note: We only needs are many packets as can be held in the tx and rx rings |
3210 | pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2 * pcb->ipsec_kpipe_count; |
3211 | pp_init.kbi_bufsize = pcb->ipsec_slot_size; |
3212 | pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE; |
3213 | pp_init.kbi_max_frags = 1; |
3214 | pp_init.kbi_flags |= KBIF_QUANTUM; |
3215 | (void) snprintf((char *)pp_init.kbi_name, count: sizeof(pp_init.kbi_name), |
3216 | "com.apple.kpipe.%s" , pcb->ipsec_if_xname); |
3217 | pp_init.kbi_ctx = NULL; |
3218 | pp_init.kbi_ctx_retain = NULL; |
3219 | pp_init.kbi_ctx_release = NULL; |
3220 | |
3221 | result = kern_pbufpool_create(&pp_init, &pcb->ipsec_kpipe_pp, |
3222 | NULL); |
3223 | if (result != 0) { |
3224 | os_log_error(OS_LOG_DEFAULT, "%s: %s pbufbool create failed, error %d\n" , |
3225 | __func__, pcb->ipsec_if_xname, result); |
3226 | goto done; |
3227 | } |
3228 | |
3229 | bzero(s: &init, n: sizeof(init)); |
3230 | init.nxi_version = KERN_NEXUS_CURRENT_VERSION; |
3231 | init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp; |
3232 | |
3233 | for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) { |
3234 | VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i])); |
3235 | result = kern_nexus_controller_alloc_provider_instance(ctl: ipsec_ncd, |
3236 | nx_prov_uuid: ipsec_kpipe_uuid, nexus_context: pcb, NULL, nx_uuid: &pcb->ipsec_kpipe_uuid[i], init: &init); |
3237 | |
3238 | if (result == 0) { |
3239 | nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT; |
3240 | const bool has_proc_uuid = !uuid_is_null(uu: pcb->ipsec_kpipe_proc_uuid); |
3241 | pid_t pid = pcb->ipsec_kpipe_pid; |
3242 | if (!pid && !has_proc_uuid) { |
3243 | pid = proc_pid(proc); |
3244 | } |
3245 | result = kern_nexus_controller_bind_provider_instance(ctl: ipsec_ncd, |
3246 | nx_uuid: pcb->ipsec_kpipe_uuid[i], port: &port, |
3247 | pid, exec_uuid: has_proc_uuid ? pcb->ipsec_kpipe_proc_uuid : NULL, NULL, |
3248 | key_len: 0, bind_flags: has_proc_uuid ? NEXUS_BIND_EXEC_UUID:NEXUS_BIND_PID); |
3249 | } |
3250 | |
3251 | if (result) { |
3252 | /* Unwind all of them on error */ |
3253 | for (int j = 0; j < IPSEC_IF_MAX_RING_COUNT; j++) { |
3254 | if (!uuid_is_null(uu: pcb->ipsec_kpipe_uuid[j])) { |
3255 | kern_nexus_controller_free_provider_instance(ctl: ipsec_ncd, |
3256 | nx_uuid: pcb->ipsec_kpipe_uuid[j]); |
3257 | uuid_clear(uu: pcb->ipsec_kpipe_uuid[j]); |
3258 | } |
3259 | } |
3260 | goto done; |
3261 | } |
3262 | } |
3263 | |
3264 | done: |
3265 | lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock); |
3266 | |
3267 | if (result) { |
3268 | if (pcb->ipsec_kpipe_pp != NULL) { |
3269 | kern_pbufpool_destroy(pcb->ipsec_kpipe_pp); |
3270 | pcb->ipsec_kpipe_pp = NULL; |
3271 | } |
3272 | ipsec_unregister_kernel_pipe_nexus(); |
3273 | } else { |
3274 | ipsec_flag_set(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED); |
3275 | } |
3276 | |
3277 | return result; |
3278 | } |
3279 | |
3280 | #endif // IPSEC_NEXUS |
3281 | |
3282 | |
3283 | /* Kernel control functions */ |
3284 | |
3285 | static inline int |
3286 | ipsec_find_by_unit(u_int32_t unit) |
3287 | { |
3288 | struct ipsec_pcb *next_pcb = NULL; |
3289 | int found = 0; |
3290 | |
3291 | TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) { |
3292 | if (next_pcb->ipsec_unit == unit) { |
3293 | found = 1; |
3294 | break; |
3295 | } |
3296 | } |
3297 | |
3298 | return found; |
3299 | } |
3300 | |
3301 | static inline void |
3302 | ipsec_free_pcb(struct ipsec_pcb *pcb, bool locked) |
3303 | { |
3304 | #if IPSEC_NEXUS |
3305 | mbuf_freem_list(mbuf: pcb->ipsec_input_chain); |
3306 | pcb->ipsec_input_chain_count = 0; |
3307 | lck_mtx_destroy(lck: &pcb->ipsec_input_chain_lock, grp: &ipsec_lck_grp); |
3308 | lck_mtx_destroy(lck: &pcb->ipsec_kpipe_encrypt_lock, grp: &ipsec_lck_grp); |
3309 | lck_mtx_destroy(lck: &pcb->ipsec_kpipe_decrypt_lock, grp: &ipsec_lck_grp); |
3310 | #endif // IPSEC_NEXUS |
3311 | lck_mtx_destroy(lck: &pcb->ipsec_pcb_data_move_lock, grp: &ipsec_lck_grp); |
3312 | lck_rw_destroy(lck: &pcb->ipsec_pcb_lock, grp: &ipsec_lck_grp); |
3313 | if (!locked) { |
3314 | lck_mtx_lock(lck: &ipsec_lock); |
3315 | } |
3316 | TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain); |
3317 | if (!locked) { |
3318 | lck_mtx_unlock(lck: &ipsec_lock); |
3319 | } |
3320 | zfree(ipsec_pcb_zone, pcb); |
3321 | } |
3322 | |
3323 | static errno_t |
3324 | ipsec_ctl_setup(u_int32_t *unit, void **unitinfo) |
3325 | { |
3326 | if (unit == NULL || unitinfo == NULL) { |
3327 | return EINVAL; |
3328 | } |
3329 | |
3330 | lck_mtx_lock(lck: &ipsec_lock); |
3331 | |
3332 | /* Find next available unit */ |
3333 | if (*unit == 0) { |
3334 | *unit = 1; |
3335 | while (*unit != ctl_maxunit) { |
3336 | if (ipsec_find_by_unit(unit: *unit)) { |
3337 | (*unit)++; |
3338 | } else { |
3339 | break; |
3340 | } |
3341 | } |
3342 | if (*unit == ctl_maxunit) { |
3343 | lck_mtx_unlock(lck: &ipsec_lock); |
3344 | return EBUSY; |
3345 | } |
3346 | } else if (ipsec_find_by_unit(unit: *unit)) { |
3347 | lck_mtx_unlock(lck: &ipsec_lock); |
3348 | return EBUSY; |
3349 | } |
3350 | |
3351 | /* Find some open interface id */ |
3352 | u_int32_t chosen_unique_id = 1; |
3353 | struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list); |
3354 | if (next_pcb != NULL) { |
3355 | /* List was not empty, add one to the last item */ |
3356 | chosen_unique_id = next_pcb->ipsec_unique_id + 1; |
3357 | next_pcb = NULL; |
3358 | |
3359 | /* |
3360 | * If this wrapped the id number, start looking at |
3361 | * the front of the list for an unused id. |
3362 | */ |
3363 | if (chosen_unique_id == 0) { |
3364 | /* Find the next unused ID */ |
3365 | chosen_unique_id = 1; |
3366 | TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) { |
3367 | if (next_pcb->ipsec_unique_id > chosen_unique_id) { |
3368 | /* We found a gap */ |
3369 | break; |
3370 | } |
3371 | |
3372 | chosen_unique_id = next_pcb->ipsec_unique_id + 1; |
3373 | } |
3374 | } |
3375 | } |
3376 | |
3377 | struct ipsec_pcb *pcb = zalloc_flags(ipsec_pcb_zone, Z_WAITOK | Z_ZERO); |
3378 | |
3379 | *unitinfo = pcb; |
3380 | pcb->ipsec_unit = *unit; |
3381 | pcb->ipsec_unique_id = chosen_unique_id; |
3382 | |
3383 | if (next_pcb != NULL) { |
3384 | TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain); |
3385 | } else { |
3386 | TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain); |
3387 | } |
3388 | |
3389 | lck_mtx_unlock(lck: &ipsec_lock); |
3390 | |
3391 | return 0; |
3392 | } |
3393 | |
3394 | static errno_t |
3395 | ipsec_ctl_bind(kern_ctl_ref kctlref, |
3396 | struct sockaddr_ctl *sac, |
3397 | void **unitinfo) |
3398 | { |
3399 | if (*unitinfo == NULL) { |
3400 | u_int32_t unit = 0; |
3401 | (void)ipsec_ctl_setup(unit: &unit, unitinfo); |
3402 | } |
3403 | |
3404 | struct ipsec_pcb *pcb = (struct ipsec_pcb *)*unitinfo; |
3405 | if (pcb == NULL) { |
3406 | return EINVAL; |
3407 | } |
3408 | |
3409 | if (pcb->ipsec_ctlref != NULL) { |
3410 | // Return if bind was already called |
3411 | return EINVAL; |
3412 | } |
3413 | |
3414 | /* Setup the protocol control block */ |
3415 | pcb->ipsec_ctlref = kctlref; |
3416 | pcb->ipsec_unit = sac->sc_unit; |
3417 | pcb->ipsec_output_service_class = MBUF_SC_OAM; |
3418 | |
3419 | #if IPSEC_NEXUS |
3420 | pcb->ipsec_use_netif = false; |
3421 | pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE; |
3422 | pcb->ipsec_netif_ring_size = if_ipsec_ring_size; |
3423 | pcb->ipsec_tx_fsw_ring_size = if_ipsec_tx_fsw_ring_size; |
3424 | pcb->ipsec_rx_fsw_ring_size = if_ipsec_rx_fsw_ring_size; |
3425 | #endif // IPSEC_NEXUS |
3426 | |
3427 | lck_rw_init(lck: &pcb->ipsec_pcb_lock, grp: &ipsec_lck_grp, attr: &ipsec_lck_attr); |
3428 | lck_mtx_init(lck: &pcb->ipsec_pcb_data_move_lock, grp: &ipsec_lck_grp, attr: &ipsec_lck_attr); |
3429 | #if IPSEC_NEXUS |
3430 | pcb->ipsec_input_chain_count = 0; |
3431 | lck_mtx_init(lck: &pcb->ipsec_input_chain_lock, grp: &ipsec_lck_grp, attr: &ipsec_lck_attr); |
3432 | lck_mtx_init(lck: &pcb->ipsec_kpipe_encrypt_lock, grp: &ipsec_lck_grp, attr: &ipsec_lck_attr); |
3433 | lck_mtx_init(lck: &pcb->ipsec_kpipe_decrypt_lock, grp: &ipsec_lck_grp, attr: &ipsec_lck_attr); |
3434 | #endif // IPSEC_NEXUS |
3435 | |
3436 | return 0; |
3437 | } |
3438 | |
3439 | static errno_t |
3440 | ipsec_ctl_connect(kern_ctl_ref kctlref, |
3441 | struct sockaddr_ctl *sac, |
3442 | void **unitinfo) |
3443 | { |
3444 | struct ifnet_init_eparams ipsec_init = {}; |
3445 | errno_t result = 0; |
3446 | |
3447 | if (*unitinfo == NULL) { |
3448 | (void)ipsec_ctl_bind(kctlref, sac, unitinfo); |
3449 | } |
3450 | |
3451 | struct ipsec_pcb *pcb = *unitinfo; |
3452 | if (pcb == NULL) { |
3453 | return EINVAL; |
3454 | } |
3455 | |
3456 | /* Handle case where ipsec_ctl_setup() was called, but ipsec_ctl_bind() was not */ |
3457 | if (pcb->ipsec_ctlref == NULL) { |
3458 | (void)ipsec_ctl_bind(kctlref, sac, unitinfo); |
3459 | } |
3460 | |
3461 | snprintf(pcb->ipsec_if_xname, count: sizeof(pcb->ipsec_if_xname), "ipsec%d" , pcb->ipsec_unit - 1); |
3462 | snprintf(pcb->ipsec_unique_name, count: sizeof(pcb->ipsec_unique_name), "ipsecid%d" , pcb->ipsec_unique_id - 1); |
3463 | os_log(OS_LOG_DEFAULT, "ipsec_ctl_connect: creating interface %s (id %s)\n" , pcb->ipsec_if_xname, pcb->ipsec_unique_name); |
3464 | |
3465 | /* Create the interface */ |
3466 | bzero(s: &ipsec_init, n: sizeof(ipsec_init)); |
3467 | ipsec_init.ver = IFNET_INIT_CURRENT_VERSION; |
3468 | ipsec_init.len = sizeof(ipsec_init); |
3469 | |
3470 | #if IPSEC_NEXUS |
3471 | if (pcb->ipsec_use_netif) { |
3472 | ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO); |
3473 | } else |
3474 | #endif // IPSEC_NEXUS |
3475 | { |
3476 | ipsec_init.flags = IFNET_INIT_NX_NOAUTO; |
3477 | ipsec_init.start = ipsec_start; |
3478 | } |
3479 | ipsec_init.name = "ipsec" ; |
3480 | ipsec_init.unit = pcb->ipsec_unit - 1; |
3481 | ipsec_init.uniqueid = pcb->ipsec_unique_name; |
3482 | ipsec_init.uniqueid_len = (uint32_t)strlen(s: pcb->ipsec_unique_name); |
3483 | ipsec_init.family = IFNET_FAMILY_IPSEC; |
3484 | ipsec_init.type = IFT_OTHER; |
3485 | ipsec_init.demux = ipsec_demux; |
3486 | ipsec_init.add_proto = ipsec_add_proto; |
3487 | ipsec_init.del_proto = ipsec_del_proto; |
3488 | ipsec_init.softc = pcb; |
3489 | ipsec_init.ioctl = ipsec_ioctl; |
3490 | ipsec_init.free = ipsec_detached; |
3491 | |
3492 | #if IPSEC_NEXUS |
3493 | /* We don't support kpipes without a netif */ |
3494 | if (pcb->ipsec_kpipe_count && !pcb->ipsec_use_netif) { |
3495 | result = ENOTSUP; |
3496 | os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - kpipe requires netif: failed %d\n" , result); |
3497 | ipsec_free_pcb(pcb, false); |
3498 | *unitinfo = NULL; |
3499 | return result; |
3500 | } |
3501 | |
3502 | if (if_ipsec_debug != 0) { |
3503 | printf("%s: %s%d use_netif %d kpipe_count %d slot_size %u ring_size %u " |
3504 | "kpipe_tx_ring_size %u kpipe_rx_ring_size %u\n" , |
3505 | __func__, |
3506 | ipsec_init.name, ipsec_init.unit, |
3507 | pcb->ipsec_use_netif, |
3508 | pcb->ipsec_kpipe_count, |
3509 | pcb->ipsec_slot_size, |
3510 | pcb->ipsec_netif_ring_size, |
3511 | pcb->ipsec_kpipe_tx_ring_size, |
3512 | pcb->ipsec_kpipe_rx_ring_size); |
3513 | } |
3514 | if (pcb->ipsec_use_netif) { |
3515 | if (pcb->ipsec_kpipe_count) { |
3516 | result = ipsec_enable_channel(pcb, proc: current_proc()); |
3517 | if (result) { |
3518 | os_log_error(OS_LOG_DEFAULT, "%s: %s failed to enable channels\n" , |
3519 | __func__, pcb->ipsec_if_xname); |
3520 | ipsec_free_pcb(pcb, false); |
3521 | *unitinfo = NULL; |
3522 | return result; |
3523 | } |
3524 | } |
3525 | |
3526 | result = ipsec_nexus_ifattach(pcb, init_params: &ipsec_init, ifp: &pcb->ipsec_ifp); |
3527 | if (result != 0) { |
3528 | os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n" , result); |
3529 | ipsec_free_pcb(pcb, false); |
3530 | *unitinfo = NULL; |
3531 | return result; |
3532 | } |
3533 | |
3534 | result = ipsec_flowswitch_attach(pcb); |
3535 | if (result != 0) { |
3536 | os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_flowswitch_attach failed: %d\n" , result); |
3537 | // Do not call ipsec_free_pcb(). We will be attached already, and will be freed later |
3538 | // in ipsec_detached(). |
3539 | *unitinfo = NULL; |
3540 | return result; |
3541 | } |
3542 | |
3543 | /* Attach to bpf */ |
3544 | bpfattach(interface: pcb->ipsec_ifp, DLT_RAW, header_length: 0); |
3545 | } else |
3546 | #endif // IPSEC_NEXUS |
3547 | { |
3548 | result = ifnet_allocate_extended(init: &ipsec_init, interface: &pcb->ipsec_ifp); |
3549 | if (result != 0) { |
3550 | os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_allocate failed: %d\n" , result); |
3551 | ipsec_free_pcb(pcb, false); |
3552 | *unitinfo = NULL; |
3553 | return result; |
3554 | } |
3555 | ipsec_ifnet_set_attrs(ifp: pcb->ipsec_ifp); |
3556 | |
3557 | /* Attach the interface */ |
3558 | result = ifnet_attach(interface: pcb->ipsec_ifp, NULL); |
3559 | if (result != 0) { |
3560 | os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_attach failed: %d\n" , result); |
3561 | ifnet_release(interface: pcb->ipsec_ifp); |
3562 | ipsec_free_pcb(pcb, false); |
3563 | *unitinfo = NULL; |
3564 | return result; |
3565 | } |
3566 | |
3567 | /* Attach to bpf */ |
3568 | bpfattach(interface: pcb->ipsec_ifp, DLT_NULL, header_length: 0); |
3569 | } |
3570 | |
3571 | #if IPSEC_NEXUS |
3572 | /* |
3573 | * Mark the data path as ready. |
3574 | * If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected. |
3575 | */ |
3576 | if (pcb->ipsec_kpipe_count == 0) { |
3577 | lck_mtx_lock(lck: &pcb->ipsec_pcb_data_move_lock); |
3578 | IPSEC_SET_DATA_PATH_READY(pcb); |
3579 | lck_mtx_unlock(lck: &pcb->ipsec_pcb_data_move_lock); |
3580 | } |
3581 | #endif |
3582 | |
3583 | /* The interfaces resoures allocated, mark it as running */ |
3584 | ifnet_set_flags(interface: pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING); |
3585 | |
3586 | return 0; |
3587 | } |
3588 | |
3589 | static errno_t |
3590 | ipsec_detach_ip(ifnet_t interface, |
3591 | protocol_family_t protocol, |
3592 | socket_t pf_socket) |
3593 | { |
3594 | errno_t result = EPROTONOSUPPORT; |
3595 | |
3596 | /* Attempt a detach */ |
3597 | if (protocol == PF_INET) { |
3598 | struct ifreq ifr; |
3599 | |
3600 | bzero(s: &ifr, n: sizeof(ifr)); |
3601 | snprintf(ifr.ifr_name, count: sizeof(ifr.ifr_name), "%s%d" , |
3602 | ifnet_name(interface), ifnet_unit(interface)); |
3603 | |
3604 | result = sock_ioctl(so: pf_socket, SIOCPROTODETACH, argp: &ifr); |
3605 | } else if (protocol == PF_INET6) { |
3606 | struct in6_ifreq ifr6; |
3607 | |
3608 | bzero(s: &ifr6, n: sizeof(ifr6)); |
3609 | snprintf(ifr6.ifr_name, count: sizeof(ifr6.ifr_name), "%s%d" , |
3610 | ifnet_name(interface), ifnet_unit(interface)); |
3611 | |
3612 | result = sock_ioctl(so: pf_socket, SIOCPROTODETACH_IN6, argp: &ifr6); |
3613 | } |
3614 | |
3615 | return result; |
3616 | } |
3617 | |
3618 | static void |
3619 | ipsec_remove_address(ifnet_t interface, |
3620 | protocol_family_t protocol, |
3621 | ifaddr_t address, |
3622 | socket_t pf_socket) |
3623 | { |
3624 | errno_t result = 0; |
3625 | |
3626 | /* Attempt a detach */ |
3627 | if (protocol == PF_INET) { |
3628 | struct ifreq ifr; |
3629 | |
3630 | bzero(s: &ifr, n: sizeof(ifr)); |
3631 | snprintf(ifr.ifr_name, count: sizeof(ifr.ifr_name), "%s%d" , |
3632 | ifnet_name(interface), ifnet_unit(interface)); |
3633 | result = ifaddr_address(ifaddr: address, out_addr: &ifr.ifr_addr, addr_size: sizeof(ifr.ifr_addr)); |
3634 | if (result != 0) { |
3635 | os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed: %d" , result); |
3636 | } else { |
3637 | result = sock_ioctl(so: pf_socket, SIOCDIFADDR, argp: &ifr); |
3638 | if (result != 0) { |
3639 | os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR failed: %d" , result); |
3640 | } |
3641 | } |
3642 | } else if (protocol == PF_INET6) { |
3643 | struct in6_ifreq ifr6; |
3644 | |
3645 | bzero(s: &ifr6, n: sizeof(ifr6)); |
3646 | snprintf(ifr6.ifr_name, count: sizeof(ifr6.ifr_name), "%s%d" , |
3647 | ifnet_name(interface), ifnet_unit(interface)); |
3648 | result = ifaddr_address(ifaddr: address, out_addr: (struct sockaddr*)&ifr6.ifr_addr, |
3649 | addr_size: sizeof(ifr6.ifr_addr)); |
3650 | if (result != 0) { |
3651 | os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed (v6): %d" , |
3652 | result); |
3653 | } else { |
3654 | result = sock_ioctl(so: pf_socket, SIOCDIFADDR_IN6, argp: &ifr6); |
3655 | if (result != 0) { |
3656 | os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d" , |
3657 | result); |
3658 | } |
3659 | } |
3660 | } |
3661 | } |
3662 | |
3663 | static void |
3664 | ipsec_cleanup_family(ifnet_t interface, |
3665 | protocol_family_t protocol) |
3666 | { |
3667 | errno_t result = 0; |
3668 | socket_t pf_socket = NULL; |
3669 | ifaddr_t *addresses = NULL; |
3670 | int i; |
3671 | |
3672 | if (protocol != PF_INET && protocol != PF_INET6) { |
3673 | os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - invalid protocol family %d\n" , protocol); |
3674 | return; |
3675 | } |
3676 | |
3677 | /* Create a socket for removing addresses and detaching the protocol */ |
3678 | result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket); |
3679 | if (result != 0) { |
3680 | if (result != EAFNOSUPPORT) { |
3681 | os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - failed to create %s socket: %d\n" , |
3682 | protocol == PF_INET ? "IP" : "IPv6" , result); |
3683 | } |
3684 | goto cleanup; |
3685 | } |
3686 | |
3687 | /* always set SS_PRIV, we want to close and detach regardless */ |
3688 | sock_setpriv(so: pf_socket, on: 1); |
3689 | |
3690 | result = ipsec_detach_ip(interface, protocol, pf_socket); |
3691 | if (result == 0 || result == ENXIO) { |
3692 | /* We are done! We either detached or weren't attached. */ |
3693 | goto cleanup; |
3694 | } else if (result != EBUSY) { |
3695 | /* Uh, not really sure what happened here... */ |
3696 | os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n" , result); |
3697 | goto cleanup; |
3698 | } |
3699 | |
3700 | /* |
3701 | * At this point, we received an EBUSY error. This means there are |
3702 | * addresses attached. We should detach them and then try again. |
3703 | */ |
3704 | result = ifnet_get_address_list_family(interface, addresses: &addresses, family: (sa_family_t)protocol); |
3705 | if (result != 0) { |
3706 | os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n" , |
3707 | ifnet_name(interface), ifnet_unit(interface), |
3708 | protocol == PF_INET ? "PF_INET" : "PF_INET6" , result); |
3709 | goto cleanup; |
3710 | } |
3711 | |
3712 | for (i = 0; addresses[i] != 0; i++) { |
3713 | ipsec_remove_address(interface, protocol, address: addresses[i], pf_socket); |
3714 | } |
3715 | ifnet_free_address_list(addresses); |
3716 | addresses = NULL; |
3717 | |
3718 | /* |
3719 | * The addresses should be gone, we should try the remove again. |
3720 | */ |
3721 | result = ipsec_detach_ip(interface, protocol, pf_socket); |
3722 | if (result != 0 && result != ENXIO) { |
3723 | os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n" , result); |
3724 | } |
3725 | |
3726 | cleanup: |
3727 | if (pf_socket != NULL) { |
3728 | sock_close(so: pf_socket); |
3729 | } |
3730 | |
3731 | if (addresses != NULL) { |
3732 | ifnet_free_address_list(addresses); |
3733 | } |
3734 | } |
3735 | |
3736 | static errno_t |
3737 | ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref, |
3738 | __unused u_int32_t unit, |
3739 | void *unitinfo) |
3740 | { |
3741 | struct ipsec_pcb *pcb = unitinfo; |
3742 | ifnet_t ifp = NULL; |
3743 | errno_t result = 0; |
3744 | |
3745 | if (pcb == NULL) { |
3746 | return EINVAL; |
3747 | } |
3748 | |
3749 | /* Wait until all threads in the data paths are done. */ |
3750 | ipsec_wait_data_move_drain(pcb); |
3751 | |
3752 | #if IPSEC_NEXUS |
3753 | // Tell the nexus to stop all rings |
3754 | if (pcb->ipsec_netif_nexus != NULL) { |
3755 | kern_nexus_stop(nx: pcb->ipsec_netif_nexus); |
3756 | } |
3757 | #endif // IPSEC_NEXUS |
3758 | |
3759 | lck_rw_lock_exclusive(lck: &pcb->ipsec_pcb_lock); |
3760 | |
3761 | #if IPSEC_NEXUS |
3762 | if (if_ipsec_debug != 0) { |
3763 | printf("ipsec_ctl_disconnect: detaching interface %s (id %s)\n" , |
3764 | pcb->ipsec_if_xname, pcb->ipsec_unique_name); |
3765 | } |
3766 | |
3767 | struct ipsec_detached_channels dc; |
3768 | ipsec_detach_channels(pcb, dc: &dc); |
3769 | #endif // IPSEC_NEXUS |
3770 | |
3771 | pcb->ipsec_ctlref = NULL; |
3772 | |
3773 | ifp = pcb->ipsec_ifp; |
3774 | if (ifp != NULL) { |
3775 | #if IPSEC_NEXUS |
3776 | if (pcb->ipsec_netif_nexus != NULL) { |
3777 | /* |
3778 | * Quiesce the interface and flush any pending outbound packets. |
3779 | */ |
3780 | if_down(ifp); |
3781 | |
3782 | /* |
3783 | * Suspend data movement and wait for IO threads to exit. |
3784 | * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to |
3785 | * do this because ipsec nexuses are attached/detached separately. |
3786 | */ |
3787 | ifnet_datamov_suspend_and_drain(ifp); |
3788 | if ((result = ifnet_detach(interface: ifp)) != 0) { |
3789 | panic("ipsec_ctl_disconnect - ifnet_detach failed: %d" , result); |
3790 | /* NOT REACHED */ |
3791 | } |
3792 | |
3793 | /* |
3794 | * We want to do everything in our power to ensure that the interface |
3795 | * really goes away when the socket is closed. We must remove IP/IPv6 |
3796 | * addresses and detach the protocols. Finally, we can remove and |
3797 | * release the interface. |
3798 | */ |
3799 | key_delsp_for_ipsec_if(ipsec_if: ifp); |
3800 | |
3801 | ipsec_cleanup_family(interface: ifp, AF_INET); |
3802 | ipsec_cleanup_family(interface: ifp, AF_INET6); |
3803 | |
3804 | lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock); |
3805 | |
3806 | ipsec_free_channels(dc: &dc); |
3807 | |
3808 | ipsec_nexus_detach(pcb); |
3809 | |
3810 | /* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */ |
3811 | ifnet_datamov_resume(ifp); |
3812 | } else |
3813 | #endif // IPSEC_NEXUS |
3814 | { |
3815 | lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock); |
3816 | |
3817 | #if IPSEC_NEXUS |
3818 | ipsec_free_channels(dc: &dc); |
3819 | #endif // IPSEC_NEXUS |
3820 | |
3821 | /* |
3822 | * We want to do everything in our power to ensure that the interface |
3823 | * really goes away when the socket is closed. We must remove IP/IPv6 |
3824 | * addresses and detach the protocols. Finally, we can remove and |
3825 | * release the interface. |
3826 | */ |
3827 | key_delsp_for_ipsec_if(ipsec_if: ifp); |
3828 | |
3829 | ipsec_cleanup_family(interface: ifp, AF_INET); |
3830 | ipsec_cleanup_family(interface: ifp, AF_INET6); |
3831 | |
3832 | /* |
3833 | * Detach now; ipsec_detach() will be called asynchronously once |
3834 | * the I/O reference count drops to 0. There we will invoke |
3835 | * ifnet_release(). |
3836 | */ |
3837 | if ((result = ifnet_detach(interface: ifp)) != 0) { |
3838 | os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_disconnect - ifnet_detach failed: %d\n" , result); |
3839 | } |
3840 | } |
3841 | } else { |
3842 | // Bound, but not connected |
3843 | lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock); |
3844 | ipsec_free_pcb(pcb, false); |
3845 | } |
3846 | |
3847 | return 0; |
3848 | } |
3849 | |
3850 | static errno_t |
3851 | ipsec_ctl_send(__unused kern_ctl_ref kctlref, |
3852 | __unused u_int32_t unit, |
3853 | __unused void *unitinfo, |
3854 | mbuf_t m, |
3855 | __unused int flags) |
3856 | { |
3857 | /* Receive messages from the control socket. Currently unused. */ |
3858 | mbuf_freem(mbuf: m); |
3859 | return 0; |
3860 | } |
3861 | |
3862 | static errno_t |
3863 | ipsec_ctl_setopt(__unused kern_ctl_ref kctlref, |
3864 | __unused u_int32_t unit, |
3865 | void *unitinfo, |
3866 | int opt, |
3867 | void *data, |
3868 | size_t len) |
3869 | { |
3870 | errno_t result = 0; |
3871 | struct ipsec_pcb *pcb = unitinfo; |
3872 | if (pcb == NULL) { |
3873 | return EINVAL; |
3874 | } |
3875 | |
3876 | /* check for privileges for privileged options */ |
3877 | switch (opt) { |
3878 | case IPSEC_OPT_FLAGS: |
3879 | case IPSEC_OPT_EXT_IFDATA_STATS: |
3880 | case IPSEC_OPT_SET_DELEGATE_INTERFACE: |
3881 | case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: |
3882 | case IPSEC_OPT_OUTPUT_DSCP_MAPPING: |
3883 | if (kauth_cred_issuser(cred: kauth_cred_get()) == 0) { |
3884 | return EPERM; |
3885 | } |
3886 | break; |
3887 | } |
3888 | |
3889 | switch (opt) { |
3890 | case IPSEC_OPT_FLAGS: { |
3891 | if (len != sizeof(u_int32_t)) { |
3892 | result = EMSGSIZE; |
3893 | } else { |
3894 | pcb->ipsec_external_flags = *(u_int32_t *)data; |
3895 | } |
3896 | break; |
3897 | } |
3898 | |
3899 | case IPSEC_OPT_EXT_IFDATA_STATS: { |
3900 | if (len != sizeof(int)) { |
3901 | result = EMSGSIZE; |
3902 | break; |
3903 | } |
3904 | if (pcb->ipsec_ifp == NULL) { |
3905 | // Only can set after connecting |
3906 | result = EINVAL; |
3907 | break; |
3908 | } |
3909 | pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0; |
3910 | break; |
3911 | } |
3912 | |
3913 | case IPSEC_OPT_INC_IFDATA_STATS_IN: |
3914 | case IPSEC_OPT_INC_IFDATA_STATS_OUT: { |
3915 | struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data; |
3916 | |
3917 | if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) { |
3918 | result = EINVAL; |
3919 | break; |
3920 | } |
3921 | if (pcb->ipsec_ifp == NULL) { |
3922 | // Only can set after connecting |
3923 | result = EINVAL; |
3924 | break; |
3925 | } |
3926 | if (!pcb->ipsec_ext_ifdata_stats) { |
3927 | result = EINVAL; |
3928 | break; |
3929 | } |
3930 | if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) { |
3931 | ifnet_stat_increment_in(interface: pcb->ipsec_ifp, packets_in: (uint32_t)utsp->utsp_packets, |
3932 | bytes_in: (uint32_t)utsp->utsp_bytes, errors_in: (uint32_t)utsp->utsp_errors); |
3933 | } else { |
3934 | ifnet_stat_increment_out(interface: pcb->ipsec_ifp, packets_out: (uint32_t)utsp->utsp_packets, |
3935 | bytes_out: (uint32_t)utsp->utsp_bytes, errors_out: (uint32_t)utsp->utsp_errors); |
3936 | } |
3937 | break; |
3938 | } |
3939 | |
3940 | case IPSEC_OPT_SET_DELEGATE_INTERFACE: { |
3941 | ifnet_t del_ifp = NULL; |
3942 | char name[IFNAMSIZ]; |
3943 | |
3944 | if (len > IFNAMSIZ - 1) { |
3945 | result = EMSGSIZE; |
3946 | break; |
3947 | } |
3948 | if (pcb->ipsec_ifp == NULL) { |
3949 | // Only can set after connecting |
3950 | result = EINVAL; |
3951 | break; |
3952 | } |
3953 | if (len != 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */ |
3954 | bcopy(src: data, dst: name, n: len); |
3955 | name[len] = 0; |
3956 | result = ifnet_find_by_name(ifname: name, interface: &del_ifp); |
3957 | } |
3958 | if (result == 0) { |
3959 | os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n" , |
3960 | __func__, pcb->ipsec_ifp->if_xname, |
3961 | del_ifp ? del_ifp->if_xname : "NULL" ); |
3962 | |
3963 | result = ifnet_set_delegate(ifp: pcb->ipsec_ifp, delegated_ifp: del_ifp); |
3964 | if (del_ifp) { |
3965 | ifnet_release(interface: del_ifp); |
3966 | } |
3967 | } |
3968 | break; |
3969 | } |
3970 | |
3971 | case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: { |
3972 | if (len != sizeof(int)) { |
3973 | result = EMSGSIZE; |
3974 | break; |
3975 | } |
3976 | if (pcb->ipsec_ifp == NULL) { |
3977 | // Only can set after connecting |
3978 | result = EINVAL; |
3979 | break; |
3980 | } |
3981 | mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data); |
3982 | if (output_service_class == MBUF_SC_UNSPEC) { |
3983 | pcb->ipsec_output_service_class = MBUF_SC_OAM; |
3984 | } else { |
3985 | pcb->ipsec_output_service_class = output_service_class; |
3986 | } |
3987 | os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n" , |
3988 | __func__, pcb->ipsec_ifp->if_xname, |
3989 | pcb->ipsec_output_service_class); |
3990 | break; |
3991 | } |
3992 | |
3993 | #if IPSEC_NEXUS |
3994 | case IPSEC_OPT_ENABLE_CHANNEL: { |
3995 | if (len != sizeof(int)) { |
3996 | result = EMSGSIZE; |
3997 | break; |
3998 | } |
3999 | if (pcb->ipsec_ifp != NULL) { |
4000 | // Only can set before connecting |
4001 | result = EINVAL; |
4002 | break; |
4003 | } |
4004 | if ((*(int *)data) != 0 && |
4005 | (*(int *)data) != 1 && |
4006 | (*(int *)data) != IPSEC_IF_WMM_RING_COUNT) { |
4007 | result = EINVAL; |
4008 | break; |
4009 | } |
4010 | lck_rw_lock_exclusive(lck: &pcb->ipsec_pcb_lock); |
4011 | pcb->ipsec_kpipe_count = *(int *)data; |
4012 | lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock); |
4013 | break; |
4014 | } |
4015 | |
4016 | case IPSEC_OPT_CHANNEL_BIND_PID: { |
4017 | if (len != sizeof(pid_t)) { |
4018 | result = EMSGSIZE; |
4019 | break; |
4020 | } |
4021 | if (pcb->ipsec_ifp != NULL) { |
4022 | // Only can set before connecting |
4023 | result = EINVAL; |
4024 | break; |
4025 | } |
4026 | lck_rw_lock_exclusive(lck: &pcb->ipsec_pcb_lock); |
4027 | pcb->ipsec_kpipe_pid = *(pid_t *)data; |
4028 | lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock); |
4029 | break; |
4030 | } |
4031 | |
4032 | case IPSEC_OPT_CHANNEL_BIND_UUID: { |
4033 | if (len != sizeof(uuid_t)) { |
4034 | result = EMSGSIZE; |
4035 | break; |
4036 | } |
4037 | if (pcb->ipsec_ifp != NULL) { |
4038 | // Only can set before connecting |
4039 | result = EINVAL; |
4040 | break; |
4041 | } |
4042 | lck_rw_lock_exclusive(lck: &pcb->ipsec_pcb_lock); |
4043 | uuid_copy(dst: pcb->ipsec_kpipe_proc_uuid, src: *((uuid_t *)data)); |
4044 | lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock); |
4045 | break; |
4046 | } |
4047 | |
4048 | case IPSEC_OPT_ENABLE_FLOWSWITCH: { |
4049 | if (len != sizeof(int)) { |
4050 | result = EMSGSIZE; |
4051 | break; |
4052 | } |
4053 | if (pcb->ipsec_ifp == NULL) { |
4054 | // Only can set after connecting |
4055 | result = EINVAL; |
4056 | break; |
4057 | } |
4058 | if (!if_is_fsw_transport_netagent_enabled()) { |
4059 | result = ENOTSUP; |
4060 | break; |
4061 | } |
4062 | if (uuid_is_null(uu: pcb->ipsec_nx.fsw_agent)) { |
4063 | result = ENOENT; |
4064 | break; |
4065 | } |
4066 | |
4067 | uint32_t flags = netagent_get_flags(uuid: pcb->ipsec_nx.fsw_agent); |
4068 | |
4069 | if (*(int *)data) { |
4070 | flags |= (NETAGENT_FLAG_NEXUS_PROVIDER | |
4071 | NETAGENT_FLAG_NEXUS_LISTENER); |
4072 | result = netagent_set_flags(uuid: pcb->ipsec_nx.fsw_agent, flags); |
4073 | pcb->ipsec_needs_netagent = true; |
4074 | } else { |
4075 | pcb->ipsec_needs_netagent = false; |
4076 | flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER | |
4077 | NETAGENT_FLAG_NEXUS_LISTENER); |
4078 | result = netagent_set_flags(uuid: pcb->ipsec_nx.fsw_agent, flags); |
4079 | } |
4080 | break; |
4081 | } |
4082 | |
4083 | case IPSEC_OPT_INPUT_FRAG_SIZE: { |
4084 | if (len != sizeof(u_int32_t)) { |
4085 | result = EMSGSIZE; |
4086 | break; |
4087 | } |
4088 | u_int32_t input_frag_size = *(u_int32_t *)data; |
4089 | if (input_frag_size <= sizeof(struct ip6_hdr)) { |
4090 | pcb->ipsec_frag_size_set = FALSE; |
4091 | pcb->ipsec_input_frag_size = 0; |
4092 | } else { |
4093 | pcb->ipsec_frag_size_set = TRUE; |
4094 | pcb->ipsec_input_frag_size = input_frag_size; |
4095 | } |
4096 | break; |
4097 | } |
4098 | case IPSEC_OPT_ENABLE_NETIF: { |
4099 | if (len != sizeof(int)) { |
4100 | result = EMSGSIZE; |
4101 | break; |
4102 | } |
4103 | if (pcb->ipsec_ifp != NULL) { |
4104 | // Only can set before connecting |
4105 | result = EINVAL; |
4106 | break; |
4107 | } |
4108 | lck_rw_lock_exclusive(lck: &pcb->ipsec_pcb_lock); |
4109 | pcb->ipsec_use_netif = !!(*(int *)data); |
4110 | lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock); |
4111 | break; |
4112 | } |
4113 | case IPSEC_OPT_SLOT_SIZE: { |
4114 | if (len != sizeof(u_int32_t)) { |
4115 | result = EMSGSIZE; |
4116 | break; |
4117 | } |
4118 | if (pcb->ipsec_ifp != NULL) { |
4119 | // Only can set before connecting |
4120 | result = EINVAL; |
4121 | break; |
4122 | } |
4123 | u_int32_t slot_size = *(u_int32_t *)data; |
4124 | if (slot_size < IPSEC_IF_MIN_SLOT_SIZE || |
4125 | slot_size > IPSEC_IF_MAX_SLOT_SIZE) { |
4126 | return EINVAL; |
4127 | } |
4128 | pcb->ipsec_slot_size = slot_size; |
4129 | if (if_ipsec_debug != 0) { |
4130 | printf("%s: IPSEC_OPT_SLOT_SIZE %u\n" , __func__, slot_size); |
4131 | } |
4132 | break; |
4133 | } |
4134 | case IPSEC_OPT_NETIF_RING_SIZE: { |
4135 | if (len != sizeof(u_int32_t)) { |
4136 | result = EMSGSIZE; |
4137 | break; |
4138 | } |
4139 | if (pcb->ipsec_ifp != NULL) { |
4140 | // Only can set before connecting |
4141 | result = EINVAL; |
4142 | break; |
4143 | } |
4144 | u_int32_t ring_size = *(u_int32_t *)data; |
4145 | if (ring_size < IPSEC_IF_MIN_RING_SIZE || |
4146 | ring_size > IPSEC_IF_MAX_RING_SIZE) { |
4147 | return EINVAL; |
4148 | } |
4149 | pcb->ipsec_netif_ring_size = ring_size; |
4150 | if (if_ipsec_debug != 0) { |
4151 | printf("%s: IPSEC_OPT_NETIF_RING_SIZE %u\n" , __func__, ring_size); |
4152 | } |
4153 | break; |
4154 | } |
4155 | case IPSEC_OPT_TX_FSW_RING_SIZE: { |
4156 | if (len != sizeof(u_int32_t)) { |
4157 | result = EMSGSIZE; |
4158 | break; |
4159 | } |
4160 | if (pcb->ipsec_ifp != NULL) { |
4161 | // Only can set before connecting |
4162 | result = EINVAL; |
4163 | break; |
4164 | } |
4165 | u_int32_t ring_size = *(u_int32_t *)data; |
4166 | if (ring_size < IPSEC_IF_MIN_RING_SIZE || |
4167 | ring_size > IPSEC_IF_MAX_RING_SIZE) { |
4168 | return EINVAL; |
4169 | } |
4170 | pcb->ipsec_tx_fsw_ring_size = ring_size; |
4171 | if (if_ipsec_debug != 0) { |
4172 | printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n" , __func__, ring_size); |
4173 | } |
4174 | break; |
4175 | } |
4176 | case IPSEC_OPT_RX_FSW_RING_SIZE: { |
4177 | if (len != sizeof(u_int32_t)) { |
4178 | result = EMSGSIZE; |
4179 | break; |
4180 | } |
4181 | if (pcb->ipsec_ifp != NULL) { |
4182 | // Only can set before connecting |
4183 | result = EINVAL; |
4184 | break; |
4185 | } |
4186 | u_int32_t ring_size = *(u_int32_t *)data; |
4187 | if (ring_size < IPSEC_IF_MIN_RING_SIZE || |
4188 | ring_size > IPSEC_IF_MAX_RING_SIZE) { |
4189 | return EINVAL; |
4190 | } |
4191 | pcb->ipsec_rx_fsw_ring_size = ring_size; |
4192 | if (if_ipsec_debug != 0) { |
4193 | printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n" , __func__, ring_size); |
4194 | } |
4195 | break; |
4196 | } |
4197 | case IPSEC_OPT_KPIPE_TX_RING_SIZE: { |
4198 | if (len != sizeof(u_int32_t)) { |
4199 | result = EMSGSIZE; |
4200 | break; |
4201 | } |
4202 | if (pcb->ipsec_ifp != NULL) { |
4203 | // Only can set before connecting |
4204 | result = EINVAL; |
4205 | break; |
4206 | } |
4207 | u_int32_t ring_size = *(u_int32_t *)data; |
4208 | if (ring_size < IPSEC_IF_MIN_RING_SIZE || |
4209 | ring_size > IPSEC_IF_MAX_RING_SIZE) { |
4210 | return EINVAL; |
4211 | } |
4212 | pcb->ipsec_kpipe_tx_ring_size = ring_size; |
4213 | if (if_ipsec_debug != 0) { |
4214 | printf("%s: IPSEC_OPT_KPIPE_TX_RING_SIZE %u\n" , __func__, ring_size); |
4215 | } |
4216 | break; |
4217 | } |
4218 | case IPSEC_OPT_KPIPE_RX_RING_SIZE: { |
4219 | if (len != sizeof(u_int32_t)) { |
4220 | result = EMSGSIZE; |
4221 | break; |
4222 | } |
4223 | if (pcb->ipsec_ifp != NULL) { |
4224 | // Only can set before connecting |
4225 | result = EINVAL; |
4226 | break; |
4227 | } |
4228 | u_int32_t ring_size = *(u_int32_t *)data; |
4229 | if (ring_size < IPSEC_IF_MIN_RING_SIZE || |
4230 | ring_size > IPSEC_IF_MAX_RING_SIZE) { |
4231 | return EINVAL; |
4232 | } |
4233 | pcb->ipsec_kpipe_rx_ring_size = ring_size; |
4234 | if (if_ipsec_debug != 0) { |
4235 | printf("%s: IPSEC_OPT_KPIPE_RX_RING_SIZE %u\n" , __func__, ring_size); |
4236 | } |
4237 | break; |
4238 | } |
4239 | case IPSEC_OPT_OUTPUT_DSCP_MAPPING: { |
4240 | if (len != sizeof(int)) { |
4241 | result = EMSGSIZE; |
4242 | break; |
4243 | } |
4244 | if (pcb->ipsec_ifp == NULL) { |
4245 | // Only can set after connecting |
4246 | result = EINVAL; |
4247 | break; |
4248 | } |
4249 | |
4250 | ipsec_dscp_mapping_t output_dscp_mapping = (ipsec_dscp_mapping_t)(*(int *)data); |
4251 | if (output_dscp_mapping > IPSEC_DSCP_MAPPING_LEGACY) { |
4252 | return EINVAL; |
4253 | } |
4254 | |
4255 | pcb->ipsec_output_dscp_mapping = output_dscp_mapping; |
4256 | |
4257 | os_log(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_DSCP_MAPPING %s DSCP %d\n" , |
4258 | __func__, pcb->ipsec_ifp->if_xname, |
4259 | pcb->ipsec_output_dscp_mapping); |
4260 | break; |
4261 | } |
4262 | |
4263 | #endif // IPSEC_NEXUS |
4264 | |
4265 | default: { |
4266 | result = ENOPROTOOPT; |
4267 | break; |
4268 | } |
4269 | } |
4270 | |
4271 | return result; |
4272 | } |
4273 | |
4274 | static errno_t |
4275 | ipsec_ctl_getopt(__unused kern_ctl_ref kctlref, |
4276 | __unused u_int32_t unit, |
4277 | void *unitinfo, |
4278 | int opt, |
4279 | void *data, |
4280 | size_t *len) |
4281 | { |
4282 | errno_t result = 0; |
4283 | struct ipsec_pcb *pcb = unitinfo; |
4284 | if (pcb == NULL) { |
4285 | return EINVAL; |
4286 | } |
4287 | |
4288 | switch (opt) { |
4289 | case IPSEC_OPT_FLAGS: { |
4290 | if (*len != sizeof(u_int32_t)) { |
4291 | result = EMSGSIZE; |
4292 | } else { |
4293 | *(u_int32_t *)data = pcb->ipsec_external_flags; |
4294 | } |
4295 | break; |
4296 | } |
4297 | |
4298 | case IPSEC_OPT_EXT_IFDATA_STATS: { |
4299 | if (*len != sizeof(int)) { |
4300 | result = EMSGSIZE; |
4301 | } else { |
4302 | *(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0; |
4303 | } |
4304 | break; |
4305 | } |
4306 | |
4307 | case IPSEC_OPT_IFNAME: { |
4308 | if (*len < MIN(strlen(pcb->ipsec_if_xname) + 1, sizeof(pcb->ipsec_if_xname))) { |
4309 | result = EMSGSIZE; |
4310 | } else { |
4311 | if (pcb->ipsec_ifp == NULL) { |
4312 | // Only can get after connecting |
4313 | result = EINVAL; |
4314 | break; |
4315 | } |
4316 | *len = scnprintf(data, count: *len, "%s" , pcb->ipsec_if_xname) + 1; |
4317 | } |
4318 | break; |
4319 | } |
4320 | |
4321 | case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: { |
4322 | if (*len != sizeof(int)) { |
4323 | result = EMSGSIZE; |
4324 | } else { |
4325 | *(int *)data = so_svc2tc(pcb->ipsec_output_service_class); |
4326 | } |
4327 | break; |
4328 | } |
4329 | |
4330 | #if IPSEC_NEXUS |
4331 | |
4332 | case IPSEC_OPT_ENABLE_CHANNEL: { |
4333 | if (*len != sizeof(int)) { |
4334 | result = EMSGSIZE; |
4335 | } else { |
4336 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
4337 | *(int *)data = pcb->ipsec_kpipe_count; |
4338 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
4339 | } |
4340 | break; |
4341 | } |
4342 | |
4343 | case IPSEC_OPT_CHANNEL_BIND_PID: { |
4344 | if (*len != sizeof(pid_t)) { |
4345 | result = EMSGSIZE; |
4346 | } else { |
4347 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
4348 | *(pid_t *)data = pcb->ipsec_kpipe_pid; |
4349 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
4350 | } |
4351 | break; |
4352 | } |
4353 | |
4354 | case IPSEC_OPT_CHANNEL_BIND_UUID: { |
4355 | if (*len != sizeof(uuid_t)) { |
4356 | result = EMSGSIZE; |
4357 | } else { |
4358 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
4359 | uuid_copy(dst: *((uuid_t *)data), src: pcb->ipsec_kpipe_proc_uuid); |
4360 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
4361 | } |
4362 | break; |
4363 | } |
4364 | |
4365 | case IPSEC_OPT_ENABLE_FLOWSWITCH: { |
4366 | if (*len != sizeof(int)) { |
4367 | result = EMSGSIZE; |
4368 | } else { |
4369 | *(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.fsw_agent); |
4370 | } |
4371 | break; |
4372 | } |
4373 | |
4374 | case IPSEC_OPT_ENABLE_NETIF: { |
4375 | if (*len != sizeof(int)) { |
4376 | result = EMSGSIZE; |
4377 | } else { |
4378 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
4379 | *(int *)data = !!pcb->ipsec_use_netif; |
4380 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
4381 | } |
4382 | break; |
4383 | } |
4384 | |
4385 | case IPSEC_OPT_GET_CHANNEL_UUID: { |
4386 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
4387 | if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) { |
4388 | result = ENXIO; |
4389 | } else if (*len != sizeof(uuid_t) * pcb->ipsec_kpipe_count) { |
4390 | result = EMSGSIZE; |
4391 | } else { |
4392 | for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) { |
4393 | uuid_copy(dst: ((uuid_t *)data)[i], src: pcb->ipsec_kpipe_uuid[i]); |
4394 | } |
4395 | } |
4396 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
4397 | break; |
4398 | } |
4399 | |
4400 | case IPSEC_OPT_INPUT_FRAG_SIZE: { |
4401 | if (*len != sizeof(u_int32_t)) { |
4402 | result = EMSGSIZE; |
4403 | } else { |
4404 | *(u_int32_t *)data = pcb->ipsec_input_frag_size; |
4405 | } |
4406 | break; |
4407 | } |
4408 | case IPSEC_OPT_SLOT_SIZE: { |
4409 | if (*len != sizeof(u_int32_t)) { |
4410 | result = EMSGSIZE; |
4411 | } else { |
4412 | *(u_int32_t *)data = pcb->ipsec_slot_size; |
4413 | } |
4414 | break; |
4415 | } |
4416 | case IPSEC_OPT_NETIF_RING_SIZE: { |
4417 | if (*len != sizeof(u_int32_t)) { |
4418 | result = EMSGSIZE; |
4419 | } else { |
4420 | *(u_int32_t *)data = pcb->ipsec_netif_ring_size; |
4421 | } |
4422 | break; |
4423 | } |
4424 | case IPSEC_OPT_TX_FSW_RING_SIZE: { |
4425 | if (*len != sizeof(u_int32_t)) { |
4426 | result = EMSGSIZE; |
4427 | } else { |
4428 | *(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size; |
4429 | } |
4430 | break; |
4431 | } |
4432 | case IPSEC_OPT_RX_FSW_RING_SIZE: { |
4433 | if (*len != sizeof(u_int32_t)) { |
4434 | result = EMSGSIZE; |
4435 | } else { |
4436 | *(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size; |
4437 | } |
4438 | break; |
4439 | } |
4440 | case IPSEC_OPT_KPIPE_TX_RING_SIZE: { |
4441 | if (*len != sizeof(u_int32_t)) { |
4442 | result = EMSGSIZE; |
4443 | } else { |
4444 | *(u_int32_t *)data = pcb->ipsec_kpipe_tx_ring_size; |
4445 | } |
4446 | break; |
4447 | } |
4448 | case IPSEC_OPT_KPIPE_RX_RING_SIZE: { |
4449 | if (*len != sizeof(u_int32_t)) { |
4450 | result = EMSGSIZE; |
4451 | } else { |
4452 | *(u_int32_t *)data = pcb->ipsec_kpipe_rx_ring_size; |
4453 | } |
4454 | break; |
4455 | } |
4456 | |
4457 | #endif // IPSEC_NEXUS |
4458 | |
4459 | default: { |
4460 | result = ENOPROTOOPT; |
4461 | break; |
4462 | } |
4463 | } |
4464 | |
4465 | return result; |
4466 | } |
4467 | |
4468 | /* Network Interface functions */ |
4469 | static errno_t |
4470 | ipsec_output(ifnet_t interface, |
4471 | mbuf_t data) |
4472 | { |
4473 | struct ipsec_pcb *pcb = ifnet_softc(interface); |
4474 | struct ipsec_output_state ipsec_state; |
4475 | struct route ro; |
4476 | struct route_in6 ro6; |
4477 | size_t length; |
4478 | struct ip *ip = NULL; |
4479 | struct ip6_hdr *ip6 = NULL; |
4480 | struct ip_out_args ipoa; |
4481 | struct ip6_out_args ip6oa; |
4482 | int error = 0; |
4483 | u_int ip_version = 0; |
4484 | int flags = 0; |
4485 | struct flowadv *adv = NULL; |
4486 | |
4487 | // Make sure this packet isn't looping through the interface |
4488 | if (necp_get_last_interface_index_from_packet(packet: data) == interface->if_index) { |
4489 | error = EINVAL; |
4490 | goto ipsec_output_err; |
4491 | } |
4492 | |
4493 | // Mark the interface so NECP can evaluate tunnel policy |
4494 | necp_mark_packet_from_interface(packet: data, interface); |
4495 | |
4496 | if (data->m_len < sizeof(*ip)) { |
4497 | os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IP header length: %d.\n" , data->m_len); |
4498 | IPSEC_STAT_INCREMENT(ipsecstat.out_inval); |
4499 | error = EINVAL; |
4500 | goto ipsec_output_err; |
4501 | } |
4502 | |
4503 | ip = mtod(data, struct ip *); |
4504 | ip_version = ip->ip_v; |
4505 | |
4506 | switch (ip_version) { |
4507 | case 4: { |
4508 | u_int8_t ip_hlen = 0; |
4509 | #ifdef _IP_VHL |
4510 | ip_hlen = _IP_VHL_HL(ip->ip_vhl) << 2; |
4511 | #else |
4512 | ip_hlen = (uint8_t)(ip->ip_hl << 2); |
4513 | #endif |
4514 | if (ip_hlen < sizeof(*ip)) { |
4515 | os_log_error(OS_LOG_DEFAULT, "ipsec_output: Bad ip header length %d.\n" , ip_hlen); |
4516 | IPSEC_STAT_INCREMENT(ipsecstat.out_inval); |
4517 | error = EINVAL; |
4518 | goto ipsec_output_err; |
4519 | } |
4520 | #if IPSEC_NEXUS |
4521 | if (!pcb->ipsec_use_netif) |
4522 | #endif // IPSEC_NEXUS |
4523 | { |
4524 | int af = AF_INET; |
4525 | bpf_tap_out(interface: pcb->ipsec_ifp, DLT_NULL, packet: data, header: &af, header_len: sizeof(af)); |
4526 | } |
4527 | |
4528 | /* Apply encryption */ |
4529 | memset(s: &ipsec_state, c: 0, n: sizeof(ipsec_state)); |
4530 | ipsec_state.m = data; |
4531 | ipsec_state.dst = (struct sockaddr *)&ip->ip_dst; |
4532 | memset(s: &ipsec_state.ro, c: 0, n: sizeof(ipsec_state.ro)); |
4533 | ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping; |
4534 | |
4535 | error = ipsec4_interface_output(state: &ipsec_state, interface); |
4536 | /* Tunneled in IPv6 - packet is gone */ |
4537 | if (error == 0 && ipsec_state.tunneled == 6) { |
4538 | goto done; |
4539 | } |
4540 | |
4541 | data = ipsec_state.m; |
4542 | if (error || data == NULL) { |
4543 | if (error) { |
4544 | os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec4_output error %d.\n" , error); |
4545 | } |
4546 | goto ipsec_output_err; |
4547 | } |
4548 | |
4549 | /* Set traffic class, set flow */ |
4550 | m_set_service_class(data, pcb->ipsec_output_service_class); |
4551 | data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET; |
4552 | #if SKYWALK |
4553 | data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash; |
4554 | #else /* !SKYWALK */ |
4555 | data->m_pkthdr.pkt_flowid = interface->if_flowhash; |
4556 | #endif /* !SKYWALK */ |
4557 | data->m_pkthdr.pkt_proto = ip->ip_p; |
4558 | data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC); |
4559 | |
4560 | /* Flip endian-ness for ip_output */ |
4561 | ip = mtod(data, struct ip *); |
4562 | NTOHS(ip->ip_len); |
4563 | NTOHS(ip->ip_off); |
4564 | |
4565 | /* Increment statistics */ |
4566 | length = mbuf_pkthdr_len(mbuf: data); |
4567 | ifnet_stat_increment_out(interface, packets_out: 1, bytes_out: (uint16_t)length, errors_out: 0); |
4568 | |
4569 | /* Send to ip_output */ |
4570 | memset(s: &ro, c: 0, n: sizeof(ro)); |
4571 | |
4572 | flags = (IP_OUTARGS | /* Passing out args to specify interface */ |
4573 | IP_NOIPSEC); /* To ensure the packet doesn't go through ipsec twice */ |
4574 | |
4575 | memset(s: &ipoa, c: 0, n: sizeof(ipoa)); |
4576 | ipoa.ipoa_flowadv.code = 0; |
4577 | ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR; |
4578 | if (ipsec_state.outgoing_if) { |
4579 | ipoa.ipoa_boundif = ipsec_state.outgoing_if; |
4580 | ipoa.ipoa_flags |= IPOAF_BOUND_IF; |
4581 | } |
4582 | ipsec_set_ipoa_for_interface(interface: pcb->ipsec_ifp, ipoa: &ipoa); |
4583 | |
4584 | adv = &ipoa.ipoa_flowadv; |
4585 | |
4586 | (void)ip_output(data, NULL, &ro, flags, NULL, &ipoa); |
4587 | data = NULL; |
4588 | |
4589 | if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) { |
4590 | error = ENOBUFS; |
4591 | ifnet_disable_output(interface); |
4592 | } |
4593 | |
4594 | goto done; |
4595 | } |
4596 | case 6: { |
4597 | if (data->m_len < sizeof(*ip6)) { |
4598 | os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IPv6 header length: %d.\n" , data->m_len); |
4599 | IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); |
4600 | error = EINVAL; |
4601 | goto ipsec_output_err; |
4602 | } |
4603 | #if IPSEC_NEXUS |
4604 | if (!pcb->ipsec_use_netif) |
4605 | #endif // IPSEC_NEXUS |
4606 | { |
4607 | int af = AF_INET6; |
4608 | bpf_tap_out(interface: pcb->ipsec_ifp, DLT_NULL, packet: data, header: &af, header_len: sizeof(af)); |
4609 | } |
4610 | |
4611 | data = ipsec6_splithdr(data); |
4612 | if (data == NULL) { |
4613 | os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_splithdr returned NULL\n" ); |
4614 | goto ipsec_output_err; |
4615 | } |
4616 | |
4617 | ip6 = mtod(data, struct ip6_hdr *); |
4618 | |
4619 | memset(s: &ipsec_state, c: 0, n: sizeof(ipsec_state)); |
4620 | ipsec_state.m = data; |
4621 | ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst; |
4622 | memset(s: &ipsec_state.ro, c: 0, n: sizeof(ipsec_state.ro)); |
4623 | ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping; |
4624 | |
4625 | error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m); |
4626 | if (error == 0 && ipsec_state.tunneled == 4) { /* tunneled in IPv4 - packet is gone */ |
4627 | goto done; |
4628 | } |
4629 | data = ipsec_state.m; |
4630 | if (error || data == NULL) { |
4631 | if (error) { |
4632 | os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_output error %d\n" , error); |
4633 | } |
4634 | goto ipsec_output_err; |
4635 | } |
4636 | |
4637 | /* Set traffic class, set flow */ |
4638 | m_set_service_class(data, pcb->ipsec_output_service_class); |
4639 | data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET; |
4640 | #if SKYWALK |
4641 | data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash; |
4642 | #else /* !SKYWALK */ |
4643 | data->m_pkthdr.pkt_flowid = interface->if_flowhash; |
4644 | #endif /* !SKYWALK */ |
4645 | data->m_pkthdr.pkt_proto = ip6->ip6_nxt; |
4646 | data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC); |
4647 | |
4648 | /* Increment statistics */ |
4649 | length = mbuf_pkthdr_len(mbuf: data); |
4650 | ifnet_stat_increment_out(interface, packets_out: 1, bytes_out: (uint16_t)length, errors_out: 0); |
4651 | |
4652 | /* Send to ip6_output */ |
4653 | memset(s: &ro6, c: 0, n: sizeof(ro6)); |
4654 | |
4655 | flags = IPV6_OUTARGS; |
4656 | |
4657 | memset(s: &ip6oa, c: 0, n: sizeof(ip6oa)); |
4658 | ip6oa.ip6oa_flowadv.code = 0; |
4659 | ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR; |
4660 | if (ipsec_state.outgoing_if) { |
4661 | ip6oa.ip6oa_boundif = ipsec_state.outgoing_if; |
4662 | ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF; |
4663 | ip6_output_setsrcifscope(data, ipsec_state.outgoing_if, NULL); |
4664 | ip6_output_setdstifscope(data, ipsec_state.outgoing_if, NULL); |
4665 | } else { |
4666 | ip6_output_setsrcifscope(data, IFSCOPE_UNKNOWN, NULL); |
4667 | ip6_output_setdstifscope(data, IFSCOPE_UNKNOWN, NULL); |
4668 | } |
4669 | ipsec_set_ip6oa_for_interface(interface: pcb->ipsec_ifp, ip6oa: &ip6oa); |
4670 | |
4671 | adv = &ip6oa.ip6oa_flowadv; |
4672 | |
4673 | (void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa); |
4674 | data = NULL; |
4675 | |
4676 | if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) { |
4677 | error = ENOBUFS; |
4678 | ifnet_disable_output(interface); |
4679 | } |
4680 | |
4681 | goto done; |
4682 | } |
4683 | default: { |
4684 | os_log_error(OS_LOG_DEFAULT, "ipsec_output: Received unknown packet version %d.\n" , ip_version); |
4685 | error = EINVAL; |
4686 | goto ipsec_output_err; |
4687 | } |
4688 | } |
4689 | |
4690 | done: |
4691 | return error; |
4692 | |
4693 | ipsec_output_err: |
4694 | if (data) { |
4695 | mbuf_freem(mbuf: data); |
4696 | } |
4697 | goto done; |
4698 | } |
4699 | |
4700 | static void |
4701 | ipsec_start(ifnet_t interface) |
4702 | { |
4703 | mbuf_t data; |
4704 | struct ipsec_pcb *pcb = ifnet_softc(interface); |
4705 | |
4706 | VERIFY(pcb != NULL); |
4707 | for (;;) { |
4708 | if (ifnet_dequeue(interface, packet: &data) != 0) { |
4709 | break; |
4710 | } |
4711 | if (ipsec_output(interface, data) != 0) { |
4712 | break; |
4713 | } |
4714 | } |
4715 | } |
4716 | |
4717 | /* Network Interface functions */ |
4718 | static errno_t |
4719 | ipsec_demux(__unused ifnet_t interface, |
4720 | mbuf_t data, |
4721 | __unused char *, |
4722 | protocol_family_t *protocol) |
4723 | { |
4724 | struct ip *ip; |
4725 | u_int ip_version; |
4726 | |
4727 | while (data != NULL && mbuf_len(mbuf: data) < 1) { |
4728 | data = mbuf_next(mbuf: data); |
4729 | } |
4730 | |
4731 | if (data == NULL) { |
4732 | return ENOENT; |
4733 | } |
4734 | |
4735 | ip = mtod(data, struct ip *); |
4736 | ip_version = ip->ip_v; |
4737 | |
4738 | switch (ip_version) { |
4739 | case 4: |
4740 | *protocol = PF_INET; |
4741 | return 0; |
4742 | case 6: |
4743 | *protocol = PF_INET6; |
4744 | return 0; |
4745 | default: |
4746 | *protocol = PF_UNSPEC; |
4747 | break; |
4748 | } |
4749 | |
4750 | return 0; |
4751 | } |
4752 | |
4753 | static errno_t |
4754 | ipsec_add_proto(__unused ifnet_t interface, |
4755 | protocol_family_t protocol, |
4756 | __unused const struct ifnet_demux_desc *demux_array, |
4757 | __unused u_int32_t demux_count) |
4758 | { |
4759 | switch (protocol) { |
4760 | case PF_INET: |
4761 | return 0; |
4762 | case PF_INET6: |
4763 | return 0; |
4764 | default: |
4765 | break; |
4766 | } |
4767 | |
4768 | return ENOPROTOOPT; |
4769 | } |
4770 | |
4771 | static errno_t |
4772 | ipsec_del_proto(__unused ifnet_t interface, |
4773 | __unused protocol_family_t protocol) |
4774 | { |
4775 | return 0; |
4776 | } |
4777 | |
4778 | static errno_t |
4779 | ipsec_ioctl(ifnet_t interface, |
4780 | u_long command, |
4781 | void *data) |
4782 | { |
4783 | #if IPSEC_NEXUS |
4784 | struct ipsec_pcb *pcb = ifnet_softc(interface); |
4785 | #endif |
4786 | errno_t result = 0; |
4787 | |
4788 | switch (command) { |
4789 | case SIOCSIFMTU: { |
4790 | #if IPSEC_NEXUS |
4791 | if (pcb->ipsec_use_netif) { |
4792 | // Make sure we can fit packets in the channel buffers |
4793 | if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) { |
4794 | result = EINVAL; |
4795 | } else { |
4796 | ifnet_set_mtu(interface, mtu: (uint32_t)((struct ifreq*)data)->ifr_mtu); |
4797 | } |
4798 | } else |
4799 | #endif // IPSEC_NEXUS |
4800 | { |
4801 | ifnet_set_mtu(interface, mtu: ((struct ifreq*)data)->ifr_mtu); |
4802 | } |
4803 | break; |
4804 | } |
4805 | |
4806 | case SIOCSIFFLAGS: |
4807 | /* ifioctl() takes care of it */ |
4808 | break; |
4809 | |
4810 | case SIOCSIFSUBFAMILY: { |
4811 | uint32_t subfamily; |
4812 | |
4813 | subfamily = ((struct ifreq*)data)->ifr_type.ift_subfamily; |
4814 | switch (subfamily) { |
4815 | case IFRTYPE_SUBFAMILY_BLUETOOTH: |
4816 | interface->if_subfamily = IFNET_SUBFAMILY_BLUETOOTH; |
4817 | break; |
4818 | case IFRTYPE_SUBFAMILY_WIFI: |
4819 | interface->if_subfamily = IFNET_SUBFAMILY_WIFI; |
4820 | break; |
4821 | case IFRTYPE_SUBFAMILY_QUICKRELAY: |
4822 | interface->if_subfamily = IFNET_SUBFAMILY_QUICKRELAY; |
4823 | break; |
4824 | case IFRTYPE_SUBFAMILY_DEFAULT: |
4825 | interface->if_subfamily = IFNET_SUBFAMILY_DEFAULT; |
4826 | break; |
4827 | default: |
4828 | result = EINVAL; |
4829 | break; |
4830 | } |
4831 | break; |
4832 | } |
4833 | |
4834 | default: |
4835 | result = EOPNOTSUPP; |
4836 | } |
4837 | |
4838 | return result; |
4839 | } |
4840 | |
4841 | static void |
4842 | ipsec_detached(ifnet_t interface) |
4843 | { |
4844 | struct ipsec_pcb *pcb = ifnet_softc(interface); |
4845 | |
4846 | (void)ifnet_release(interface); |
4847 | lck_mtx_lock(lck: &ipsec_lock); |
4848 | ipsec_free_pcb(pcb, true); |
4849 | (void)ifnet_dispose(interface); |
4850 | lck_mtx_unlock(lck: &ipsec_lock); |
4851 | } |
4852 | |
4853 | /* Protocol Handlers */ |
4854 | |
4855 | static errno_t |
4856 | ipsec_proto_input(ifnet_t interface, |
4857 | protocol_family_t protocol, |
4858 | mbuf_t m, |
4859 | __unused char *) |
4860 | { |
4861 | mbuf_pkthdr_setrcvif(mbuf: m, ifp: interface); |
4862 | |
4863 | #if IPSEC_NEXUS |
4864 | struct ipsec_pcb *pcb = ifnet_softc(interface); |
4865 | if (!pcb->ipsec_use_netif) |
4866 | #endif // IPSEC_NEXUS |
4867 | { |
4868 | uint32_t af = 0; |
4869 | struct ip *ip = mtod(m, struct ip *); |
4870 | if (ip->ip_v == 4) { |
4871 | af = AF_INET; |
4872 | } else if (ip->ip_v == 6) { |
4873 | af = AF_INET6; |
4874 | } |
4875 | bpf_tap_in(interface, DLT_NULL, packet: m, header: &af, header_len: sizeof(af)); |
4876 | pktap_input(interface, protocol, m, NULL); |
4877 | } |
4878 | |
4879 | int32_t pktlen = m->m_pkthdr.len; |
4880 | if (proto_input(protocol, packet: m) != 0) { |
4881 | ifnet_stat_increment_in(interface, packets_in: 0, bytes_in: 0, errors_in: 1); |
4882 | m_freem(m); |
4883 | } else { |
4884 | ifnet_stat_increment_in(interface, packets_in: 1, bytes_in: pktlen, errors_in: 0); |
4885 | } |
4886 | |
4887 | return 0; |
4888 | } |
4889 | |
4890 | static errno_t |
4891 | ipsec_proto_pre_output(__unused ifnet_t interface, |
4892 | protocol_family_t protocol, |
4893 | __unused mbuf_t *packet, |
4894 | __unused const struct sockaddr *dest, |
4895 | __unused void *route, |
4896 | __unused char *frame_type, |
4897 | __unused char *link_layer_dest) |
4898 | { |
4899 | *(protocol_family_t *)(void *)frame_type = protocol; |
4900 | return 0; |
4901 | } |
4902 | |
4903 | static errno_t |
4904 | ipsec_attach_proto(ifnet_t interface, |
4905 | protocol_family_t protocol) |
4906 | { |
4907 | struct ifnet_attach_proto_param proto; |
4908 | errno_t result; |
4909 | |
4910 | bzero(s: &proto, n: sizeof(proto)); |
4911 | proto.input = ipsec_proto_input; |
4912 | proto.pre_output = ipsec_proto_pre_output; |
4913 | |
4914 | result = ifnet_attach_protocol(interface, protocol_family: protocol, proto_details: &proto); |
4915 | if (result != 0 && result != EEXIST) { |
4916 | os_log_error(OS_LOG_DEFAULT, "ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n" , |
4917 | protocol, result); |
4918 | } |
4919 | |
4920 | return result; |
4921 | } |
4922 | |
4923 | errno_t |
4924 | ipsec_inject_inbound_packet(ifnet_t interface, |
4925 | mbuf_t packet) |
4926 | { |
4927 | #if IPSEC_NEXUS |
4928 | struct ipsec_pcb *pcb = ifnet_softc(interface); |
4929 | |
4930 | if (pcb->ipsec_use_netif) { |
4931 | if (!ipsec_data_move_begin(pcb)) { |
4932 | os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n" , __func__, |
4933 | if_name(pcb->ipsec_ifp)); |
4934 | return ENXIO; |
4935 | } |
4936 | |
4937 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
4938 | |
4939 | lck_mtx_lock(lck: &pcb->ipsec_input_chain_lock); |
4940 | |
4941 | if (pcb->ipsec_input_chain_count > (u_int32_t)if_ipsec_max_pending_input) { |
4942 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
4943 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
4944 | ipsec_data_move_end(pcb); |
4945 | return ENOSPC; |
4946 | } |
4947 | |
4948 | if (pcb->ipsec_input_chain != NULL) { |
4949 | pcb->ipsec_input_chain_last->m_nextpkt = packet; |
4950 | } else { |
4951 | pcb->ipsec_input_chain = packet; |
4952 | } |
4953 | pcb->ipsec_input_chain_count++; |
4954 | while (packet->m_nextpkt) { |
4955 | VERIFY(packet != packet->m_nextpkt); |
4956 | packet = packet->m_nextpkt; |
4957 | pcb->ipsec_input_chain_count++; |
4958 | } |
4959 | pcb->ipsec_input_chain_last = packet; |
4960 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
4961 | |
4962 | kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0]; |
4963 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
4964 | |
4965 | if (rx_ring != NULL) { |
4966 | kern_channel_notify(rx_ring, flags: 0); |
4967 | } |
4968 | |
4969 | ipsec_data_move_end(pcb); |
4970 | return 0; |
4971 | } else |
4972 | #endif // IPSEC_NEXUS |
4973 | { |
4974 | errno_t error; |
4975 | protocol_family_t protocol; |
4976 | if ((error = ipsec_demux(interface, data: packet, NULL, protocol: &protocol)) != 0) { |
4977 | return error; |
4978 | } |
4979 | |
4980 | return ipsec_proto_input(interface, protocol, m: packet, NULL); |
4981 | } |
4982 | } |
4983 | |
4984 | void |
4985 | ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family, |
4986 | uint32_t flowid) |
4987 | { |
4988 | #pragma unused (flowid) |
4989 | if (packet != NULL && interface != NULL) { |
4990 | struct ipsec_pcb *pcb = ifnet_softc(interface); |
4991 | if (pcb != NULL) { |
4992 | /* Set traffic class, set flow */ |
4993 | m_set_service_class(packet, pcb->ipsec_output_service_class); |
4994 | packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET; |
4995 | #if SKYWALK |
4996 | packet->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash; |
4997 | packet->m_pkthdr.pkt_flowid = flowid; |
4998 | #else /* !SKYWALK */ |
4999 | packet->m_pkthdr.pkt_flowid = interface->if_flowhash; |
5000 | #endif /* !SKYWALK */ |
5001 | if (family == AF_INET) { |
5002 | struct ip *ip = mtod(packet, struct ip *); |
5003 | packet->m_pkthdr.pkt_proto = ip->ip_p; |
5004 | } else if (family == AF_INET6) { |
5005 | struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *); |
5006 | packet->m_pkthdr.pkt_proto = ip6->ip6_nxt; |
5007 | } |
5008 | packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC); |
5009 | } |
5010 | } |
5011 | } |
5012 | |
5013 | void |
5014 | ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa) |
5015 | { |
5016 | struct ipsec_pcb *pcb; |
5017 | |
5018 | if (interface == NULL || ipoa == NULL) { |
5019 | return; |
5020 | } |
5021 | pcb = ifnet_softc(interface); |
5022 | |
5023 | if (net_qos_policy_restricted == 0) { |
5024 | ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED; |
5025 | ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class); |
5026 | } else if (pcb->ipsec_output_service_class != MBUF_SC_VO || |
5027 | net_qos_policy_restrict_avapps != 0) { |
5028 | ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED; |
5029 | } else { |
5030 | ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED; |
5031 | ipoa->ipoa_sotc = SO_TC_VO; |
5032 | } |
5033 | } |
5034 | |
5035 | void |
5036 | ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa) |
5037 | { |
5038 | struct ipsec_pcb *pcb; |
5039 | |
5040 | if (interface == NULL || ip6oa == NULL) { |
5041 | return; |
5042 | } |
5043 | pcb = ifnet_softc(interface); |
5044 | |
5045 | if (net_qos_policy_restricted == 0) { |
5046 | ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED; |
5047 | ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class); |
5048 | } else if (pcb->ipsec_output_service_class != MBUF_SC_VO || |
5049 | net_qos_policy_restrict_avapps != 0) { |
5050 | ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED; |
5051 | } else { |
5052 | ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED; |
5053 | ip6oa->ip6oa_sotc = SO_TC_VO; |
5054 | } |
5055 | } |
5056 | |
5057 | static boolean_t |
5058 | ipsec_data_move_begin(struct ipsec_pcb *pcb) |
5059 | { |
5060 | boolean_t ret = 0; |
5061 | |
5062 | lck_mtx_lock_spin(lck: &pcb->ipsec_pcb_data_move_lock); |
5063 | if ((ret = IPSEC_IS_DATA_PATH_READY(pcb))) { |
5064 | pcb->ipsec_pcb_data_move++; |
5065 | } |
5066 | lck_mtx_unlock(lck: &pcb->ipsec_pcb_data_move_lock); |
5067 | |
5068 | return ret; |
5069 | } |
5070 | |
5071 | static void |
5072 | ipsec_data_move_end(struct ipsec_pcb *pcb) |
5073 | { |
5074 | lck_mtx_lock_spin(lck: &pcb->ipsec_pcb_data_move_lock); |
5075 | VERIFY(pcb->ipsec_pcb_data_move > 0); |
5076 | /* |
5077 | * if there's no more thread moving data, wakeup any |
5078 | * drainers that's blocked waiting for this. |
5079 | */ |
5080 | if (--pcb->ipsec_pcb_data_move == 0 && pcb->ipsec_pcb_drainers > 0) { |
5081 | wakeup(chan: &(pcb->ipsec_pcb_data_move)); |
5082 | } |
5083 | lck_mtx_unlock(lck: &pcb->ipsec_pcb_data_move_lock); |
5084 | } |
5085 | |
5086 | static void |
5087 | ipsec_data_move_drain(struct ipsec_pcb *pcb) |
5088 | { |
5089 | lck_mtx_lock(lck: &pcb->ipsec_pcb_data_move_lock); |
5090 | /* data path must already be marked as not ready */ |
5091 | VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb)); |
5092 | pcb->ipsec_pcb_drainers++; |
5093 | while (pcb->ipsec_pcb_data_move != 0) { |
5094 | (void)msleep(chan: &(pcb->ipsec_pcb_data_move), mtx: &pcb->ipsec_pcb_data_move_lock, |
5095 | pri: (PZERO - 1), wmesg: __func__, NULL); |
5096 | } |
5097 | VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb)); |
5098 | VERIFY(pcb->ipsec_pcb_drainers > 0); |
5099 | pcb->ipsec_pcb_drainers--; |
5100 | lck_mtx_unlock(lck: &pcb->ipsec_pcb_data_move_lock); |
5101 | } |
5102 | |
5103 | static void |
5104 | ipsec_wait_data_move_drain(struct ipsec_pcb *pcb) |
5105 | { |
5106 | /* |
5107 | * Mark the data path as not usable. |
5108 | */ |
5109 | lck_mtx_lock(lck: &pcb->ipsec_pcb_data_move_lock); |
5110 | IPSEC_CLR_DATA_PATH_READY(pcb); |
5111 | lck_mtx_unlock(lck: &pcb->ipsec_pcb_data_move_lock); |
5112 | |
5113 | /* Wait until all threads in the data paths are done. */ |
5114 | ipsec_data_move_drain(pcb); |
5115 | } |
5116 | |