1 | /* |
2 | * Copyright (c) 2012-2024 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | |
30 | #include <sys/systm.h> |
31 | #include <sys/kern_control.h> |
32 | #include <net/kpi_protocol.h> |
33 | #include <net/kpi_interface.h> |
34 | #include <sys/socket.h> |
35 | #include <sys/socketvar.h> |
36 | #include <net/if.h> |
37 | #include <net/if_types.h> |
38 | #include <net/bpf.h> |
39 | #include <net/if_ipsec.h> |
40 | #include <sys/mbuf.h> |
41 | #include <sys/sockio.h> |
42 | #include <netinet/in.h> |
43 | #include <netinet/ip6.h> |
44 | #include <netinet6/in6_var.h> |
45 | #include <netinet6/ip6_var.h> |
46 | #include <sys/kauth.h> |
47 | #include <netinet6/ipsec.h> |
48 | #include <netinet6/ipsec6.h> |
49 | #include <netinet6/esp.h> |
50 | #include <netinet6/esp6.h> |
51 | #include <netinet/ip.h> |
52 | #include <net/flowadv.h> |
53 | #include <net/necp.h> |
54 | #include <netkey/key.h> |
55 | #include <net/pktap.h> |
56 | #include <kern/zalloc.h> |
57 | #include <os/log.h> |
58 | |
59 | #if SKYWALK |
60 | #include <skywalk/os_skywalk_private.h> |
61 | #include <skywalk/nexus/flowswitch/nx_flowswitch.h> |
62 | #include <skywalk/nexus/netif/nx_netif.h> |
63 | #define IPSEC_NEXUS 1 |
64 | #else // SKYWALK |
65 | #define IPSEC_NEXUS 0 |
66 | #endif // SKYWALK |
67 | |
68 | extern int net_qos_policy_restricted; |
69 | extern int net_qos_policy_restrict_avapps; |
70 | |
71 | /* Kernel Control functions */ |
72 | static errno_t ipsec_ctl_setup(u_int32_t *unit, void **unitinfo); |
73 | static errno_t ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, |
74 | void **unitinfo); |
75 | static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, |
76 | void **unitinfo); |
77 | static errno_t ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit, |
78 | void *unitinfo); |
79 | static errno_t ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit, |
80 | void *unitinfo, mbuf_t m, int flags); |
81 | static errno_t ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, |
82 | int opt, void *data, size_t *len); |
83 | static errno_t ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, |
84 | int opt, void *data, size_t len); |
85 | |
86 | /* Network Interface functions */ |
87 | static void ipsec_start(ifnet_t interface); |
88 | static errno_t ipsec_output(ifnet_t interface, mbuf_t data); |
89 | static errno_t ipsec_demux(ifnet_t interface, mbuf_t data, char *, |
90 | protocol_family_t *protocol); |
91 | static errno_t ipsec_add_proto(ifnet_t interface, protocol_family_t protocol, |
92 | const struct ifnet_demux_desc *demux_array, |
93 | u_int32_t demux_count); |
94 | static errno_t ipsec_del_proto(ifnet_t interface, protocol_family_t protocol); |
95 | static errno_t ipsec_ioctl(ifnet_t interface, u_long cmd, void *data); |
96 | static void ipsec_detached(ifnet_t interface); |
97 | |
98 | /* Protocol handlers */ |
99 | static errno_t ipsec_attach_proto(ifnet_t interface, protocol_family_t proto); |
100 | static errno_t ipsec_proto_input(ifnet_t interface, protocol_family_t protocol, |
101 | mbuf_t m, char *); |
102 | static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol, |
103 | mbuf_t *packet, const struct sockaddr *dest, void *route, |
104 | char *frame_type, char *link_layer_dest); |
105 | |
106 | static kern_ctl_ref ipsec_kctlref; |
107 | static LCK_ATTR_DECLARE(ipsec_lck_attr, 0, 0); |
108 | static LCK_GRP_DECLARE(ipsec_lck_grp, "ipsec" ); |
109 | static LCK_MTX_DECLARE_ATTR(ipsec_lock, &ipsec_lck_grp, &ipsec_lck_attr); |
110 | |
111 | #if IPSEC_NEXUS |
112 | |
113 | SYSCTL_DECL(_net_ipsec); |
114 | SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec" ); |
115 | static int if_ipsec_verify_interface_creation = 0; |
116 | SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "" ); |
117 | |
118 | #define IPSEC_IF_VERIFY(_e) if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); } |
119 | |
120 | #define IPSEC_IF_DEFAULT_SLOT_SIZE 2048 |
121 | #define IPSEC_IF_DEFAULT_RING_SIZE 64 |
122 | #define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64 |
123 | #define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128 |
124 | #define IPSEC_IF_DEFAULT_BUF_SEG_SIZE skmem_usr_buf_seg_size |
125 | |
126 | #define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES |
127 | #define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT |
128 | #define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT |
129 | #define IPSEC_NETIF_WMM_RX_RING_COUNT 1 |
130 | #define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT |
131 | #define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT |
132 | |
133 | #define IPSEC_IF_MIN_RING_SIZE 8 |
134 | #define IPSEC_IF_MAX_RING_SIZE 1024 |
135 | |
136 | #define IPSEC_IF_MIN_SLOT_SIZE 1024 |
137 | #define IPSEC_IF_MAX_SLOT_SIZE (16 * 1024) |
138 | |
139 | #define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512 |
140 | |
141 | #define IPSEC_KPIPE_FLAG_WAKE_PKT 0x01 |
142 | |
143 | static uint32_t ipsec_kpipe_mbuf; |
144 | |
145 | static int if_ipsec_max_pending_input = IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT; |
146 | |
147 | static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS; |
148 | static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS; |
149 | static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS; |
150 | |
151 | static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE; |
152 | static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE; |
153 | static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE; |
154 | |
155 | SYSCTL_INT(_net_ipsec, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_max_pending_input, 0, "" ); |
156 | SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW, |
157 | &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I" , "" ); |
158 | SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW, |
159 | &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I" , "" ); |
160 | SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW, |
161 | &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I" , "" ); |
162 | |
163 | static int if_ipsec_debug = 0; |
164 | SYSCTL_INT(_net_ipsec, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_debug, 0, "" ); |
165 | |
166 | static errno_t |
167 | ipsec_register_nexus(void); |
168 | |
169 | typedef struct ipsec_nx { |
170 | uuid_t if_provider; |
171 | uuid_t if_instance; |
172 | uuid_t fsw_provider; |
173 | uuid_t fsw_instance; |
174 | uuid_t fsw_device; |
175 | uuid_t fsw_agent; |
176 | } *ipsec_nx_t; |
177 | |
178 | static nexus_controller_t ipsec_ncd; |
179 | static int ipsec_ncd_refcount; |
180 | static uuid_t ipsec_kpipe_uuid; |
181 | |
182 | #endif // IPSEC_NEXUS |
183 | |
184 | /* Control block allocated for each kernel control connection */ |
185 | struct ipsec_pcb { |
186 | TAILQ_ENTRY(ipsec_pcb) ipsec_chain; |
187 | kern_ctl_ref ipsec_ctlref; |
188 | ifnet_t ipsec_ifp; |
189 | u_int32_t ipsec_unit; |
190 | u_int32_t ipsec_unique_id; |
191 | // These external flags can be set with IPSEC_OPT_FLAGS |
192 | u_int32_t ipsec_external_flags; |
193 | // These internal flags are only used within this driver |
194 | u_int32_t ipsec_internal_flags; |
195 | u_int32_t ipsec_input_frag_size; |
196 | bool ipsec_frag_size_set; |
197 | int ipsec_ext_ifdata_stats; |
198 | mbuf_svc_class_t ipsec_output_service_class; |
199 | char ipsec_if_xname[IFXNAMSIZ]; |
200 | char ipsec_unique_name[IFXNAMSIZ]; |
201 | // PCB lock protects state fields, like ipsec_kpipe_count |
202 | decl_lck_rw_data(, ipsec_pcb_lock); |
203 | // lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers |
204 | decl_lck_mtx_data(, ipsec_pcb_data_move_lock); |
205 | u_int32_t ipsec_pcb_data_move; /* number of data moving contexts */ |
206 | u_int32_t ipsec_pcb_drainers; /* number of threads waiting to drain */ |
207 | u_int32_t ipsec_pcb_data_path_state; /* internal state of interface data path */ |
208 | ipsec_dscp_mapping_t ipsec_output_dscp_mapping; |
209 | |
210 | #if IPSEC_NEXUS |
211 | lck_mtx_t ipsec_input_chain_lock; |
212 | lck_mtx_t ipsec_kpipe_encrypt_lock; |
213 | lck_mtx_t ipsec_kpipe_decrypt_lock; |
214 | struct mbuf * ipsec_input_chain; |
215 | struct mbuf * ipsec_input_chain_last; |
216 | u_int32_t ipsec_input_chain_count; |
217 | // Input chain lock protects the list of input mbufs |
218 | // The input chain lock must be taken AFTER the PCB lock if both are held |
219 | struct ipsec_nx ipsec_nx; |
220 | u_int32_t ipsec_kpipe_count; |
221 | pid_t ipsec_kpipe_pid; |
222 | uuid_t ipsec_kpipe_proc_uuid; |
223 | uuid_t ipsec_kpipe_uuid[IPSEC_IF_MAX_RING_COUNT]; |
224 | void * ipsec_kpipe_rxring[IPSEC_IF_MAX_RING_COUNT]; |
225 | void * ipsec_kpipe_txring[IPSEC_IF_MAX_RING_COUNT]; |
226 | kern_pbufpool_t ipsec_kpipe_pp; |
227 | u_int32_t ipsec_kpipe_tx_ring_size; |
228 | u_int32_t ipsec_kpipe_rx_ring_size; |
229 | |
230 | kern_nexus_t ipsec_netif_nexus; |
231 | kern_pbufpool_t ipsec_netif_pp; |
232 | void * ipsec_netif_rxring[IPSEC_NETIF_MAX_RX_RING_COUNT]; |
233 | void * ipsec_netif_txring[IPSEC_NETIF_MAX_TX_RING_COUNT]; |
234 | uint64_t ipsec_netif_txring_size; |
235 | |
236 | u_int32_t ipsec_slot_size; |
237 | u_int32_t ipsec_netif_ring_size; |
238 | u_int32_t ipsec_tx_fsw_ring_size; |
239 | u_int32_t ipsec_rx_fsw_ring_size; |
240 | bool ipsec_use_netif; |
241 | bool ipsec_needs_netagent; |
242 | #endif // IPSEC_NEXUS |
243 | }; |
244 | |
245 | /* These are internal flags not exposed outside this file */ |
246 | #define IPSEC_FLAGS_KPIPE_ALLOCATED 1 |
247 | |
248 | /* data movement refcounting functions */ |
249 | static boolean_t ipsec_data_move_begin(struct ipsec_pcb *pcb); |
250 | static void ipsec_data_move_end(struct ipsec_pcb *pcb); |
251 | static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb); |
252 | |
253 | /* Data path states */ |
254 | #define IPSEC_PCB_DATA_PATH_READY 0x1 |
255 | |
256 | /* Macros to set/clear/test data path states */ |
257 | #define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY) |
258 | #define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY) |
259 | #define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0) |
260 | |
261 | #if IPSEC_NEXUS |
262 | /* Macros to clear/set/test flags. */ |
263 | static inline void |
264 | ipsec_flag_set(struct ipsec_pcb *pcb, uint32_t flag) |
265 | { |
266 | pcb->ipsec_internal_flags |= flag; |
267 | } |
268 | static inline void |
269 | ipsec_flag_clr(struct ipsec_pcb *pcb, uint32_t flag) |
270 | { |
271 | pcb->ipsec_internal_flags &= ~flag; |
272 | } |
273 | |
274 | static inline bool |
275 | ipsec_flag_isset(struct ipsec_pcb *pcb, uint32_t flag) |
276 | { |
277 | return !!(pcb->ipsec_internal_flags & flag); |
278 | } |
279 | #endif // IPSEC_NEXUS |
280 | |
281 | TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head; |
282 | |
283 | static KALLOC_TYPE_DEFINE(ipsec_pcb_zone, struct ipsec_pcb, NET_KT_DEFAULT); |
284 | |
285 | #define IPSECQ_MAXLEN 256 |
286 | |
287 | #if IPSEC_NEXUS |
288 | static int |
289 | sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS |
290 | { |
291 | #pragma unused(arg1, arg2) |
292 | int value = if_ipsec_ring_size; |
293 | |
294 | int error = sysctl_handle_int(oidp, arg1: &value, arg2: 0, req); |
295 | if (error || !req->newptr) { |
296 | return error; |
297 | } |
298 | |
299 | if (value < IPSEC_IF_MIN_RING_SIZE || |
300 | value > IPSEC_IF_MAX_RING_SIZE) { |
301 | return EINVAL; |
302 | } |
303 | |
304 | if_ipsec_ring_size = value; |
305 | |
306 | return 0; |
307 | } |
308 | |
309 | static int |
310 | sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS |
311 | { |
312 | #pragma unused(arg1, arg2) |
313 | int value = if_ipsec_tx_fsw_ring_size; |
314 | |
315 | int error = sysctl_handle_int(oidp, arg1: &value, arg2: 0, req); |
316 | if (error || !req->newptr) { |
317 | return error; |
318 | } |
319 | |
320 | if (value < IPSEC_IF_MIN_RING_SIZE || |
321 | value > IPSEC_IF_MAX_RING_SIZE) { |
322 | return EINVAL; |
323 | } |
324 | |
325 | if_ipsec_tx_fsw_ring_size = value; |
326 | |
327 | return 0; |
328 | } |
329 | |
330 | static int |
331 | sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS |
332 | { |
333 | #pragma unused(arg1, arg2) |
334 | int value = if_ipsec_rx_fsw_ring_size; |
335 | |
336 | int error = sysctl_handle_int(oidp, arg1: &value, arg2: 0, req); |
337 | if (error || !req->newptr) { |
338 | return error; |
339 | } |
340 | |
341 | if (value < IPSEC_IF_MIN_RING_SIZE || |
342 | value > IPSEC_IF_MAX_RING_SIZE) { |
343 | return EINVAL; |
344 | } |
345 | |
346 | if_ipsec_rx_fsw_ring_size = value; |
347 | |
348 | return 0; |
349 | } |
350 | |
351 | |
352 | static inline bool |
353 | ipsec_in_wmm_mode(struct ipsec_pcb *pcb) |
354 | { |
355 | return pcb->ipsec_kpipe_count == IPSEC_IF_WMM_RING_COUNT; |
356 | } |
357 | |
358 | #endif // IPSEC_NEXUS |
359 | |
360 | errno_t |
361 | ipsec_register_control(void) |
362 | { |
363 | struct kern_ctl_reg kern_ctl; |
364 | errno_t result = 0; |
365 | |
366 | #if (DEVELOPMENT || DEBUG) |
367 | (void)PE_parse_boot_argn("ipsec_kpipe_mbuf" , &ipsec_kpipe_mbuf, |
368 | sizeof(ipsec_kpipe_mbuf)); |
369 | #endif /* DEVELOPMENT || DEBUG */ |
370 | |
371 | #if IPSEC_NEXUS |
372 | ipsec_register_nexus(); |
373 | #endif // IPSEC_NEXUS |
374 | |
375 | TAILQ_INIT(&ipsec_head); |
376 | |
377 | bzero(s: &kern_ctl, n: sizeof(kern_ctl)); |
378 | strlcpy(dst: kern_ctl.ctl_name, IPSEC_CONTROL_NAME, n: sizeof(kern_ctl.ctl_name)); |
379 | kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0; |
380 | kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP; /* Require root */ |
381 | kern_ctl.ctl_sendsize = 64 * 1024; |
382 | kern_ctl.ctl_recvsize = 64 * 1024; |
383 | kern_ctl.ctl_setup = ipsec_ctl_setup; |
384 | kern_ctl.ctl_bind = ipsec_ctl_bind; |
385 | kern_ctl.ctl_connect = ipsec_ctl_connect; |
386 | kern_ctl.ctl_disconnect = ipsec_ctl_disconnect; |
387 | kern_ctl.ctl_send = ipsec_ctl_send; |
388 | kern_ctl.ctl_setopt = ipsec_ctl_setopt; |
389 | kern_ctl.ctl_getopt = ipsec_ctl_getopt; |
390 | |
391 | result = ctl_register(userkctl: &kern_ctl, kctlref: &ipsec_kctlref); |
392 | if (result != 0) { |
393 | os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - ctl_register failed: %d\n" , result); |
394 | return result; |
395 | } |
396 | |
397 | /* Register the protocol plumbers */ |
398 | if ((result = proto_register_plumber(PF_INET, if_fam: IFNET_FAMILY_IPSEC, |
399 | plumb: ipsec_attach_proto, NULL)) != 0) { |
400 | os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n" , |
401 | result); |
402 | ctl_deregister(kctlref: ipsec_kctlref); |
403 | return result; |
404 | } |
405 | |
406 | /* Register the protocol plumbers */ |
407 | if ((result = proto_register_plumber(PF_INET6, if_fam: IFNET_FAMILY_IPSEC, |
408 | plumb: ipsec_attach_proto, NULL)) != 0) { |
409 | proto_unregister_plumber(PF_INET, if_fam: IFNET_FAMILY_IPSEC); |
410 | ctl_deregister(kctlref: ipsec_kctlref); |
411 | os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n" , |
412 | result); |
413 | return result; |
414 | } |
415 | |
416 | return 0; |
417 | } |
418 | |
419 | /* Helpers */ |
420 | int |
421 | ipsec_interface_isvalid(ifnet_t interface) |
422 | { |
423 | struct ipsec_pcb *pcb = NULL; |
424 | |
425 | if (interface == NULL) { |
426 | return 0; |
427 | } |
428 | |
429 | pcb = ifnet_softc(interface); |
430 | |
431 | if (pcb == NULL) { |
432 | return 0; |
433 | } |
434 | |
435 | /* When ctl disconnects, ipsec_unit is set to 0 */ |
436 | if (pcb->ipsec_unit == 0) { |
437 | return 0; |
438 | } |
439 | |
440 | return 1; |
441 | } |
442 | |
443 | #if IPSEC_NEXUS |
444 | boolean_t |
445 | ipsec_interface_needs_netagent(ifnet_t interface) |
446 | { |
447 | struct ipsec_pcb *pcb = NULL; |
448 | |
449 | if (interface == NULL) { |
450 | return FALSE; |
451 | } |
452 | |
453 | pcb = ifnet_softc(interface); |
454 | |
455 | if (pcb == NULL) { |
456 | return FALSE; |
457 | } |
458 | |
459 | return pcb->ipsec_needs_netagent == true; |
460 | } |
461 | #endif // IPSEC_NEXUS |
462 | |
463 | static errno_t |
464 | ipsec_ifnet_set_attrs(ifnet_t ifp) |
465 | { |
466 | /* Set flags and additional information. */ |
467 | ifnet_set_mtu(interface: ifp, mtu: 1500); |
468 | ifnet_set_flags(interface: ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, mask: 0xffff); |
469 | |
470 | /* The interface must generate its own IPv6 LinkLocal address, |
471 | * if possible following the recommendation of RFC2472 to the 64bit interface ID |
472 | */ |
473 | ifnet_set_eflags(interface: ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL); |
474 | |
475 | #if !IPSEC_NEXUS |
476 | /* Reset the stats in case as the interface may have been recycled */ |
477 | struct ifnet_stats_param stats; |
478 | bzero(&stats, sizeof(struct ifnet_stats_param)); |
479 | ifnet_set_stat(ifp, &stats); |
480 | #endif // !IPSEC_NEXUS |
481 | |
482 | return 0; |
483 | } |
484 | |
485 | #if IPSEC_NEXUS |
486 | |
487 | static uuid_t ipsec_nx_dom_prov; |
488 | |
489 | static errno_t |
490 | ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov) |
491 | { |
492 | return 0; |
493 | } |
494 | |
495 | static void |
496 | ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov) |
497 | { |
498 | // Ignore |
499 | } |
500 | |
501 | static errno_t |
502 | ipsec_register_nexus(void) |
503 | { |
504 | const struct kern_nexus_domain_provider_init dp_init = { |
505 | .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION, |
506 | .nxdpi_flags = 0, |
507 | .nxdpi_init = ipsec_nxdp_init, |
508 | .nxdpi_fini = ipsec_nxdp_fini |
509 | }; |
510 | errno_t err = 0; |
511 | |
512 | /* ipsec_nxdp_init() is called before this function returns */ |
513 | err = kern_nexus_register_domain_provider(type: NEXUS_TYPE_NET_IF, |
514 | name: (const uint8_t *) "com.apple.ipsec" , |
515 | init: &dp_init, init_len: sizeof(dp_init), |
516 | dom_prov_uuid: &ipsec_nx_dom_prov); |
517 | if (err != 0) { |
518 | os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n" , __func__); |
519 | return err; |
520 | } |
521 | return 0; |
522 | } |
523 | |
524 | static errno_t |
525 | ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp) |
526 | { |
527 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
528 | pcb->ipsec_netif_nexus = nexus; |
529 | return ipsec_ifnet_set_attrs(ifp); |
530 | } |
531 | |
532 | static errno_t |
533 | ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov, |
534 | proc_t p, kern_nexus_t nexus, |
535 | nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx) |
536 | { |
537 | #pragma unused(nxprov, p) |
538 | #pragma unused(nexus, nexus_port, channel, ch_ctx) |
539 | return 0; |
540 | } |
541 | |
542 | static errno_t |
543 | ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
544 | kern_channel_t channel) |
545 | { |
546 | #pragma unused(nxprov, channel) |
547 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
548 | boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, refio: 1); |
549 | /* Mark the data path as ready */ |
550 | if (ok) { |
551 | lck_mtx_lock(lck: &pcb->ipsec_pcb_data_move_lock); |
552 | IPSEC_SET_DATA_PATH_READY(pcb); |
553 | lck_mtx_unlock(lck: &pcb->ipsec_pcb_data_move_lock); |
554 | } |
555 | return ok ? 0 : ENXIO; |
556 | } |
557 | |
558 | static void |
559 | ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
560 | kern_channel_t channel) |
561 | { |
562 | #pragma unused(nxprov, channel) |
563 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
564 | |
565 | VERIFY(pcb->ipsec_kpipe_count != 0); |
566 | |
567 | /* Wait until all threads in the data paths are done. */ |
568 | ipsec_wait_data_move_drain(pcb); |
569 | } |
570 | |
571 | static void |
572 | ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
573 | kern_channel_t channel) |
574 | { |
575 | #pragma unused(nxprov, channel) |
576 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
577 | |
578 | /* Wait until all threads in the data paths are done. */ |
579 | ipsec_wait_data_move_drain(pcb); |
580 | } |
581 | |
582 | static void |
583 | ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
584 | kern_channel_t channel) |
585 | { |
586 | #pragma unused(nxprov, channel) |
587 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
588 | if (pcb->ipsec_netif_nexus == nexus) { |
589 | pcb->ipsec_netif_nexus = NULL; |
590 | } |
591 | ifnet_decr_iorefcnt(pcb->ipsec_ifp); |
592 | } |
593 | |
594 | static errno_t |
595 | ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
596 | kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring, |
597 | void **ring_ctx) |
598 | { |
599 | #pragma unused(nxprov) |
600 | #pragma unused(channel) |
601 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
602 | uint8_t ring_idx; |
603 | |
604 | for (ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) { |
605 | if (!uuid_compare(uu1: channel->ch_info->cinfo_nx_uuid, uu2: pcb->ipsec_kpipe_uuid[ring_idx])) { |
606 | break; |
607 | } |
608 | } |
609 | |
610 | if (ring_idx == pcb->ipsec_kpipe_count) { |
611 | uuid_string_t uuidstr; |
612 | uuid_unparse(uu: channel->ch_info->cinfo_nx_uuid, out: uuidstr); |
613 | os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n" , __func__, pcb->ipsec_if_xname, uuidstr); |
614 | return ENOENT; |
615 | } |
616 | |
617 | *ring_ctx = (void *)(uintptr_t)ring_idx; |
618 | |
619 | if (!is_tx_ring) { |
620 | VERIFY(pcb->ipsec_kpipe_rxring[ring_idx] == NULL); |
621 | pcb->ipsec_kpipe_rxring[ring_idx] = ring; |
622 | } else { |
623 | VERIFY(pcb->ipsec_kpipe_txring[ring_idx] == NULL); |
624 | pcb->ipsec_kpipe_txring[ring_idx] = ring; |
625 | } |
626 | return 0; |
627 | } |
628 | |
629 | static void |
630 | ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
631 | kern_channel_ring_t ring) |
632 | { |
633 | #pragma unused(nxprov) |
634 | bool found = false; |
635 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
636 | |
637 | for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) { |
638 | if (pcb->ipsec_kpipe_rxring[i] == ring) { |
639 | pcb->ipsec_kpipe_rxring[i] = NULL; |
640 | found = true; |
641 | } else if (pcb->ipsec_kpipe_txring[i] == ring) { |
642 | pcb->ipsec_kpipe_txring[i] = NULL; |
643 | found = true; |
644 | } |
645 | } |
646 | VERIFY(found); |
647 | } |
648 | |
649 | static errno_t |
650 | ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
651 | kern_channel_ring_t tx_ring, uint32_t flags) |
652 | { |
653 | #pragma unused(nxprov) |
654 | #pragma unused(flags) |
655 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
656 | |
657 | if (!ipsec_data_move_begin(pcb)) { |
658 | os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n" , __func__, if_name(pcb->ipsec_ifp)); |
659 | return 0; |
660 | } |
661 | |
662 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
663 | |
664 | if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) { |
665 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
666 | ipsec_data_move_end(pcb); |
667 | return 0; |
668 | } |
669 | |
670 | VERIFY(pcb->ipsec_kpipe_count); |
671 | |
672 | kern_channel_slot_t tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL); |
673 | if (tx_slot == NULL) { |
674 | // Nothing to write, bail |
675 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
676 | ipsec_data_move_end(pcb); |
677 | return 0; |
678 | } |
679 | |
680 | // Signal the netif ring to read |
681 | kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0]; |
682 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
683 | |
684 | if (rx_ring != NULL) { |
685 | kern_channel_notify(rx_ring, flags: 0); |
686 | } |
687 | |
688 | ipsec_data_move_end(pcb); |
689 | return 0; |
690 | } |
691 | |
692 | static mbuf_t |
693 | ipsec_encrypt_mbuf(ifnet_t interface, |
694 | mbuf_t data) |
695 | { |
696 | struct ipsec_output_state ipsec_state; |
697 | int error = 0; |
698 | uint32_t af; |
699 | |
700 | // Make sure this packet isn't looping through the interface |
701 | if (necp_get_last_interface_index_from_packet(packet: data) == interface->if_index) { |
702 | error = -1; |
703 | goto ipsec_output_err; |
704 | } |
705 | |
706 | // Mark the interface so NECP can evaluate tunnel policy |
707 | necp_mark_packet_from_interface(packet: data, interface); |
708 | |
709 | struct ip *ip = mtod(data, struct ip *); |
710 | u_int ip_version = ip->ip_v; |
711 | |
712 | switch (ip_version) { |
713 | case 4: { |
714 | af = AF_INET; |
715 | |
716 | memset(s: &ipsec_state, c: 0, n: sizeof(ipsec_state)); |
717 | ipsec_state.m = data; |
718 | ipsec_state.dst = (struct sockaddr *)&ip->ip_dst; |
719 | memset(s: &ipsec_state.ro, c: 0, n: sizeof(ipsec_state.ro)); |
720 | |
721 | error = ipsec4_interface_output(state: &ipsec_state, interface); |
722 | if (error == 0 && ipsec_state.tunneled == 6) { |
723 | // Tunneled in IPv6 - packet is gone |
724 | // TODO: Don't lose mbuf |
725 | data = NULL; |
726 | goto done; |
727 | } |
728 | |
729 | data = ipsec_state.m; |
730 | if (error || data == NULL) { |
731 | if (error) { |
732 | os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec4_output error %d\n" , error); |
733 | } |
734 | goto ipsec_output_err; |
735 | } |
736 | goto done; |
737 | } |
738 | case 6: { |
739 | af = AF_INET6; |
740 | |
741 | data = ipsec6_splithdr(data); |
742 | if (data == NULL) { |
743 | os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n" ); |
744 | goto ipsec_output_err; |
745 | } |
746 | |
747 | struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *); |
748 | |
749 | memset(s: &ipsec_state, c: 0, n: sizeof(ipsec_state)); |
750 | ipsec_state.m = data; |
751 | ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst; |
752 | memset(s: &ipsec_state.ro, c: 0, n: sizeof(ipsec_state.ro)); |
753 | |
754 | error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m); |
755 | if (error == 0 && ipsec_state.tunneled == 4) { |
756 | // Tunneled in IPv4 - packet is gone |
757 | // TODO: Don't lose mbuf |
758 | data = NULL; |
759 | goto done; |
760 | } |
761 | data = ipsec_state.m; |
762 | if (error || data == NULL) { |
763 | if (error) { |
764 | os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_output error %d\n" , error); |
765 | } |
766 | goto ipsec_output_err; |
767 | } |
768 | goto done; |
769 | } |
770 | default: { |
771 | os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: Received unknown packet version %d\n" , ip_version); |
772 | error = -1; |
773 | goto ipsec_output_err; |
774 | } |
775 | } |
776 | |
777 | done: |
778 | return data; |
779 | |
780 | ipsec_output_err: |
781 | if (data) { |
782 | mbuf_freem(mbuf: data); |
783 | } |
784 | return NULL; |
785 | } |
786 | |
787 | static errno_t |
788 | ipsec_kpipe_sync_rx_mbuf(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
789 | kern_channel_ring_t rx_ring, uint32_t flags) |
790 | { |
791 | #pragma unused(nxprov) |
792 | #pragma unused(flags) |
793 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
794 | struct kern_channel_ring_stat_increment rx_ring_stats; |
795 | uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(ring: rx_ring); |
796 | |
797 | if (!ipsec_data_move_begin(pcb)) { |
798 | os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n" , __func__, if_name(pcb->ipsec_ifp)); |
799 | return 0; |
800 | } |
801 | |
802 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
803 | |
804 | if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) { |
805 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
806 | ipsec_data_move_end(pcb); |
807 | return 0; |
808 | } |
809 | |
810 | VERIFY(pcb->ipsec_kpipe_count); |
811 | VERIFY(ring_idx <= pcb->ipsec_kpipe_count); |
812 | |
813 | // Reclaim user-released slots |
814 | (void) kern_channel_reclaim(rx_ring); |
815 | |
816 | uint32_t avail = kern_channel_available_slot_count(ring: rx_ring); |
817 | if (avail == 0) { |
818 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
819 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n" , __func__, |
820 | pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx); |
821 | ipsec_data_move_end(pcb); |
822 | return 0; |
823 | } |
824 | |
825 | kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx]; |
826 | if (tx_ring == NULL) { |
827 | // Net-If TX ring not set up yet, nothing to read |
828 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
829 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n" , __func__, |
830 | pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx); |
831 | ipsec_data_move_end(pcb); |
832 | return 0; |
833 | } |
834 | |
835 | struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats; |
836 | |
837 | // Unlock ipsec before entering ring |
838 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
839 | |
840 | (void)kr_enter(tx_ring, TRUE); |
841 | |
842 | // Lock again after entering and validate |
843 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
844 | if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) { |
845 | // Ring no longer valid |
846 | // Unlock first, then exit ring |
847 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
848 | kr_exit(tx_ring); |
849 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n" , __func__, |
850 | pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx); |
851 | ipsec_data_move_end(pcb); |
852 | return 0; |
853 | } |
854 | |
855 | struct kern_channel_ring_stat_increment tx_ring_stats; |
856 | bzero(s: &tx_ring_stats, n: sizeof(tx_ring_stats)); |
857 | kern_channel_slot_t tx_pslot = NULL; |
858 | kern_channel_slot_t tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL); |
859 | if (tx_slot == NULL) { |
860 | // Nothing to read, don't bother signalling |
861 | // Unlock first, then exit ring |
862 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
863 | kr_exit(tx_ring); |
864 | ipsec_data_move_end(pcb); |
865 | return 0; |
866 | } |
867 | |
868 | struct kern_pbufpool *rx_pp = rx_ring->ckr_pp; |
869 | VERIFY(rx_pp != NULL); |
870 | struct kern_pbufpool *tx_pp = tx_ring->ckr_pp; |
871 | VERIFY(tx_pp != NULL); |
872 | bzero(s: &rx_ring_stats, n: sizeof(rx_ring_stats)); |
873 | kern_channel_slot_t rx_pslot = NULL; |
874 | kern_channel_slot_t rx_slot = kern_channel_get_next_slot(kring: rx_ring, NULL, NULL); |
875 | kern_packet_t tx_chain_ph = 0; |
876 | |
877 | while (rx_slot != NULL && tx_slot != NULL) { |
878 | size_t length = 0; |
879 | mbuf_t data = NULL; |
880 | errno_t error = 0; |
881 | |
882 | // Allocate rx packet |
883 | kern_packet_t rx_ph = 0; |
884 | error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph); |
885 | if (__improbable(error != 0)) { |
886 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: failed to allocate packet\n" , |
887 | pcb->ipsec_ifp->if_xname); |
888 | break; |
889 | } |
890 | |
891 | kern_packet_t tx_ph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot); |
892 | |
893 | if (tx_ph == 0) { |
894 | // Advance TX ring |
895 | tx_pslot = tx_slot; |
896 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
897 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
898 | continue; |
899 | } |
900 | (void) kern_channel_slot_detach_packet(ring: tx_ring, slot: tx_slot, packet: tx_ph); |
901 | if (tx_chain_ph != 0) { |
902 | kern_packet_append(tx_ph, tx_chain_ph); |
903 | } |
904 | tx_chain_ph = tx_ph; |
905 | |
906 | // Advance TX ring |
907 | tx_pslot = tx_slot; |
908 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
909 | |
910 | kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL); |
911 | VERIFY(tx_buf != NULL); |
912 | uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf); |
913 | VERIFY(tx_baddr != NULL); |
914 | tx_baddr += kern_buflet_get_data_offset(tx_buf); |
915 | |
916 | bpf_tap_packet_out(interface: pcb->ipsec_ifp, DLT_RAW, packet: tx_ph, NULL, header_len: 0); |
917 | |
918 | length = MIN(kern_packet_get_data_length(tx_ph), |
919 | pcb->ipsec_slot_size); |
920 | |
921 | // Increment TX stats |
922 | tx_ring_stats.kcrsi_slots_transferred++; |
923 | tx_ring_stats.kcrsi_bytes_transferred += length; |
924 | |
925 | if (length > 0) { |
926 | error = mbuf_gethdr(how: MBUF_DONTWAIT, type: MBUF_TYPE_HEADER, mbuf: &data); |
927 | if (error == 0) { |
928 | error = mbuf_copyback(mbuf: data, offset: 0, length, data: tx_baddr, how: MBUF_DONTWAIT); |
929 | if (error == 0) { |
930 | // Encrypt and send packet |
931 | lck_mtx_lock(lck: &pcb->ipsec_kpipe_encrypt_lock); |
932 | data = ipsec_encrypt_mbuf(interface: pcb->ipsec_ifp, data); |
933 | lck_mtx_unlock(lck: &pcb->ipsec_kpipe_encrypt_lock); |
934 | } else { |
935 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n" , pcb->ipsec_ifp->if_xname, length, error); |
936 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF); |
937 | STATS_INC(nifs, NETIF_STATS_DROP); |
938 | mbuf_freem(mbuf: data); |
939 | data = NULL; |
940 | } |
941 | } else { |
942 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n" , pcb->ipsec_ifp->if_xname, error); |
943 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF); |
944 | STATS_INC(nifs, NETIF_STATS_DROP); |
945 | } |
946 | } else { |
947 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - 0 length packet\n" , pcb->ipsec_ifp->if_xname); |
948 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
949 | STATS_INC(nifs, NETIF_STATS_DROP); |
950 | } |
951 | |
952 | if (data == NULL) { |
953 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: no encrypted packet to send\n" , pcb->ipsec_ifp->if_xname); |
954 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
955 | break; |
956 | } |
957 | |
958 | length = mbuf_pkthdr_len(mbuf: data); |
959 | if (length > PP_BUF_SIZE_DEF(rx_pp)) { |
960 | // Flush data |
961 | mbuf_freem(mbuf: data); |
962 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
963 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n" , |
964 | pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp)); |
965 | continue; |
966 | } |
967 | |
968 | // Fillout rx packet |
969 | kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL); |
970 | VERIFY(rx_buf != NULL); |
971 | void *rx_baddr = kern_buflet_get_data_address(rx_buf); |
972 | VERIFY(rx_baddr != NULL); |
973 | |
974 | // Copy-in data from mbuf to buflet |
975 | mbuf_copydata(mbuf: data, offset: 0, length, out_data: (void *)rx_baddr); |
976 | kern_packet_clear_flow_uuid(rx_ph); // Zero flow id |
977 | |
978 | // Finalize and attach the packet |
979 | error = kern_buflet_set_data_offset(rx_buf, 0); |
980 | VERIFY(error == 0); |
981 | error = kern_buflet_set_data_length(rx_buf, (uint16_t)length); |
982 | VERIFY(error == 0); |
983 | error = kern_packet_finalize(rx_ph); |
984 | VERIFY(error == 0); |
985 | error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph); |
986 | VERIFY(error == 0); |
987 | |
988 | STATS_INC(nifs, NETIF_STATS_TX_PACKETS); |
989 | STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT); |
990 | |
991 | rx_ring_stats.kcrsi_slots_transferred++; |
992 | rx_ring_stats.kcrsi_bytes_transferred += length; |
993 | |
994 | if (!pcb->ipsec_ext_ifdata_stats) { |
995 | ifnet_stat_increment_out(interface: pcb->ipsec_ifp, packets_out: 1, bytes_out: (uint16_t)length, errors_out: 0); |
996 | } |
997 | |
998 | mbuf_freem(mbuf: data); |
999 | |
1000 | rx_pslot = rx_slot; |
1001 | rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL); |
1002 | } |
1003 | |
1004 | if (rx_pslot) { |
1005 | kern_channel_advance_slot(kring: rx_ring, slot: rx_pslot); |
1006 | kern_channel_increment_ring_net_stats(ring: rx_ring, pcb->ipsec_ifp, stats: &rx_ring_stats); |
1007 | } |
1008 | |
1009 | if (tx_chain_ph != 0) { |
1010 | kern_pbufpool_free_chain(pbufpool: tx_pp, chain: tx_chain_ph); |
1011 | } |
1012 | |
1013 | if (tx_pslot) { |
1014 | kern_channel_advance_slot(kring: tx_ring, slot: tx_pslot); |
1015 | kern_channel_increment_ring_net_stats(ring: tx_ring, pcb->ipsec_ifp, stats: &tx_ring_stats); |
1016 | (void)kern_channel_reclaim(tx_ring); |
1017 | } |
1018 | |
1019 | /* always reenable output */ |
1020 | errno_t error = ifnet_enable_output(interface: pcb->ipsec_ifp); |
1021 | if (error != 0) { |
1022 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n" , error); |
1023 | } |
1024 | |
1025 | // Unlock first, then exit ring |
1026 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1027 | |
1028 | if (tx_pslot != NULL) { |
1029 | kern_channel_notify(tx_ring, flags: 0); |
1030 | } |
1031 | kr_exit(tx_ring); |
1032 | |
1033 | ipsec_data_move_end(pcb); |
1034 | return 0; |
1035 | } |
1036 | |
1037 | static errno_t |
1038 | ipsec_encrypt_kpipe_pkt(ifnet_t interface, kern_packet_t sph, |
1039 | kern_packet_t dph) |
1040 | { |
1041 | uint8_t *sbaddr = NULL; |
1042 | int err = 0; |
1043 | uint32_t slen = 0; |
1044 | |
1045 | VERIFY(interface != NULL); |
1046 | VERIFY(sph != 0); |
1047 | VERIFY(dph != 0); |
1048 | |
1049 | kern_buflet_t sbuf = __packet_get_next_buflet(ph: sph, NULL); |
1050 | VERIFY(sbuf != NULL); |
1051 | slen = __buflet_get_data_length(buf: sbuf); |
1052 | |
1053 | if (__improbable(slen < sizeof(struct ip))) { |
1054 | os_log_error(OS_LOG_DEFAULT, "ipsec encrypt kpipe pkt: source " |
1055 | "buffer shorter than ip header, %u\n" , slen); |
1056 | return EINVAL; |
1057 | } |
1058 | |
1059 | MD_BUFLET_ADDR(SK_PTR_ADDR_KPKT(sph), sbaddr); |
1060 | struct ip *ip = (struct ip *)(void *)sbaddr; |
1061 | ASSERT(IP_HDR_ALIGNED_P(ip)); |
1062 | |
1063 | u_int ip_vers = ip->ip_v; |
1064 | switch (ip_vers) { |
1065 | case IPVERSION: { |
1066 | err = ipsec4_interface_kpipe_output(interface, sph, dph); |
1067 | if (__improbable(err != 0)) { |
1068 | os_log_error(OS_LOG_DEFAULT, "ipsec4 interface kpipe " |
1069 | "output error %d\n" , err); |
1070 | return err; |
1071 | } |
1072 | break; |
1073 | } |
1074 | case 6: { |
1075 | err = ipsec6_interface_kpipe_output(interface, sph, dph); |
1076 | if (__improbable(err != 0)) { |
1077 | os_log_error(OS_LOG_DEFAULT, "ipsec6 interface kpipe " |
1078 | "output error %d\n" , err); |
1079 | return err; |
1080 | } |
1081 | break; |
1082 | } |
1083 | default: { |
1084 | os_log_error(OS_LOG_DEFAULT, "received unknown packet version: %d\n" , |
1085 | ip_vers); |
1086 | return EINVAL; |
1087 | } |
1088 | } |
1089 | |
1090 | return err; |
1091 | } |
1092 | |
1093 | static errno_t |
1094 | ipsec_kpipe_sync_rx_packet(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1095 | kern_channel_ring_t rx_ring, uint32_t flags) |
1096 | { |
1097 | #pragma unused(nxprov) |
1098 | #pragma unused(flags) |
1099 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
1100 | struct kern_channel_ring_stat_increment rx_ring_stats; |
1101 | uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(ring: rx_ring); |
1102 | |
1103 | if (!ipsec_data_move_begin(pcb)) { |
1104 | os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n" , __func__, if_name(pcb->ipsec_ifp)); |
1105 | return 0; |
1106 | } |
1107 | |
1108 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
1109 | |
1110 | if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) { |
1111 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1112 | ipsec_data_move_end(pcb); |
1113 | return 0; |
1114 | } |
1115 | |
1116 | VERIFY(pcb->ipsec_kpipe_count); |
1117 | VERIFY(ring_idx <= pcb->ipsec_kpipe_count); |
1118 | |
1119 | // Reclaim user-released slots |
1120 | (void) kern_channel_reclaim(rx_ring); |
1121 | |
1122 | uint32_t avail = kern_channel_available_slot_count(ring: rx_ring); |
1123 | if (avail == 0) { |
1124 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1125 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n" , __func__, |
1126 | pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx); |
1127 | ipsec_data_move_end(pcb); |
1128 | return 0; |
1129 | } |
1130 | |
1131 | kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx]; |
1132 | if (tx_ring == NULL) { |
1133 | // Net-If TX ring not set up yet, nothing to read |
1134 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1135 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n" , __func__, |
1136 | pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx); |
1137 | ipsec_data_move_end(pcb); |
1138 | return 0; |
1139 | } |
1140 | |
1141 | struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats; |
1142 | |
1143 | // Unlock ipsec before entering ring |
1144 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1145 | |
1146 | (void)kr_enter(tx_ring, TRUE); |
1147 | |
1148 | // Lock again after entering and validate |
1149 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
1150 | if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) { |
1151 | // Ring no longer valid |
1152 | // Unlock first, then exit ring |
1153 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1154 | kr_exit(tx_ring); |
1155 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n" , __func__, |
1156 | pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx); |
1157 | ipsec_data_move_end(pcb); |
1158 | return 0; |
1159 | } |
1160 | |
1161 | struct kern_channel_ring_stat_increment tx_ring_stats; |
1162 | bzero(s: &tx_ring_stats, n: sizeof(tx_ring_stats)); |
1163 | kern_channel_slot_t tx_pslot = NULL; |
1164 | kern_channel_slot_t tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL); |
1165 | if (tx_slot == NULL) { |
1166 | // Nothing to read, don't bother signalling |
1167 | // Unlock first, then exit ring |
1168 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1169 | kr_exit(tx_ring); |
1170 | ipsec_data_move_end(pcb); |
1171 | return 0; |
1172 | } |
1173 | |
1174 | struct kern_pbufpool *rx_pp = rx_ring->ckr_pp; |
1175 | VERIFY(rx_pp != NULL); |
1176 | struct kern_pbufpool *tx_pp = tx_ring->ckr_pp; |
1177 | VERIFY(tx_pp != NULL); |
1178 | bzero(s: &rx_ring_stats, n: sizeof(rx_ring_stats)); |
1179 | kern_channel_slot_t rx_pslot = NULL; |
1180 | kern_channel_slot_t rx_slot = kern_channel_get_next_slot(kring: rx_ring, NULL, NULL); |
1181 | kern_packet_t tx_chain_ph = 0; |
1182 | |
1183 | while (rx_slot != NULL && tx_slot != NULL) { |
1184 | size_t tx_pkt_length = 0; |
1185 | errno_t error = 0; |
1186 | |
1187 | // Allocate rx packet |
1188 | kern_packet_t rx_ph = 0; |
1189 | error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph); |
1190 | if (__improbable(error != 0)) { |
1191 | os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: " |
1192 | "failed to allocate packet\n" , pcb->ipsec_ifp->if_xname); |
1193 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF); |
1194 | STATS_INC(nifs, NETIF_STATS_DROP); |
1195 | break; |
1196 | } |
1197 | |
1198 | kern_packet_t tx_ph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot); |
1199 | if (__improbable(tx_ph == 0)) { |
1200 | // Advance TX ring |
1201 | tx_pslot = tx_slot; |
1202 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
1203 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
1204 | continue; |
1205 | } |
1206 | |
1207 | (void) kern_channel_slot_detach_packet(ring: tx_ring, slot: tx_slot, packet: tx_ph); |
1208 | if (tx_chain_ph != 0) { |
1209 | kern_packet_append(tx_ph, tx_chain_ph); |
1210 | } |
1211 | tx_chain_ph = tx_ph; |
1212 | |
1213 | // Advance TX ring |
1214 | tx_pslot = tx_slot; |
1215 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
1216 | |
1217 | bpf_tap_packet_out(interface: pcb->ipsec_ifp, DLT_RAW, packet: tx_ph, NULL, header_len: 0); |
1218 | |
1219 | tx_pkt_length = kern_packet_get_data_length(tx_ph); |
1220 | if (tx_pkt_length == 0 || tx_pkt_length > pcb->ipsec_slot_size) { |
1221 | os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: " |
1222 | "packet length %zu" , pcb->ipsec_ifp->if_xname, |
1223 | tx_pkt_length); |
1224 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
1225 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
1226 | STATS_INC(nifs, NETIF_STATS_DROP); |
1227 | continue; |
1228 | } |
1229 | |
1230 | // Increment TX stats |
1231 | tx_ring_stats.kcrsi_slots_transferred++; |
1232 | tx_ring_stats.kcrsi_bytes_transferred += tx_pkt_length; |
1233 | |
1234 | // Encrypt packet |
1235 | lck_mtx_lock(lck: &pcb->ipsec_kpipe_encrypt_lock); |
1236 | error = ipsec_encrypt_kpipe_pkt(interface: pcb->ipsec_ifp, sph: tx_ph, dph: rx_ph); |
1237 | lck_mtx_unlock(lck: &pcb->ipsec_kpipe_encrypt_lock); |
1238 | if (__improbable(error != 0)) { |
1239 | os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: " |
1240 | "failed to encrypt packet" , pcb->ipsec_ifp->if_xname); |
1241 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
1242 | STATS_INC(nifs, NETIF_STATS_DROP); |
1243 | continue; |
1244 | } |
1245 | |
1246 | kern_packet_clear_flow_uuid(rx_ph); // Zero flow id |
1247 | // Finalize and attach the packet |
1248 | kern_buflet_t rx_buf = __packet_get_next_buflet(ph: rx_ph, NULL); |
1249 | error = kern_buflet_set_data_offset(rx_buf, 0); |
1250 | VERIFY(error == 0); |
1251 | error = kern_packet_finalize(rx_ph); |
1252 | VERIFY(error == 0); |
1253 | error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph); |
1254 | VERIFY(error == 0); |
1255 | |
1256 | STATS_INC(nifs, NETIF_STATS_TX_PACKETS); |
1257 | STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT); |
1258 | |
1259 | rx_ring_stats.kcrsi_slots_transferred++; |
1260 | rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph); |
1261 | |
1262 | if (!pcb->ipsec_ext_ifdata_stats) { |
1263 | ifnet_stat_increment_out(interface: pcb->ipsec_ifp, packets_out: 1, |
1264 | bytes_out: kern_packet_get_data_length(rx_ph), errors_out: 0); |
1265 | } |
1266 | |
1267 | rx_pslot = rx_slot; |
1268 | rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL); |
1269 | } |
1270 | |
1271 | if (rx_pslot) { |
1272 | kern_channel_advance_slot(kring: rx_ring, slot: rx_pslot); |
1273 | kern_channel_increment_ring_net_stats(ring: rx_ring, pcb->ipsec_ifp, stats: &rx_ring_stats); |
1274 | } |
1275 | |
1276 | if (tx_chain_ph != 0) { |
1277 | kern_pbufpool_free_chain(pbufpool: tx_pp, chain: tx_chain_ph); |
1278 | } |
1279 | |
1280 | if (tx_pslot) { |
1281 | kern_channel_advance_slot(kring: tx_ring, slot: tx_pslot); |
1282 | kern_channel_increment_ring_net_stats(ring: tx_ring, pcb->ipsec_ifp, stats: &tx_ring_stats); |
1283 | (void)kern_channel_reclaim(tx_ring); |
1284 | } |
1285 | |
1286 | /* always reenable output */ |
1287 | errno_t error = ifnet_enable_output(interface: pcb->ipsec_ifp); |
1288 | if (error != 0) { |
1289 | os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n" , error); |
1290 | } |
1291 | |
1292 | // Unlock first, then exit ring |
1293 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1294 | |
1295 | if (tx_pslot != NULL) { |
1296 | kern_channel_notify(tx_ring, flags: 0); |
1297 | } |
1298 | kr_exit(tx_ring); |
1299 | |
1300 | ipsec_data_move_end(pcb); |
1301 | return 0; |
1302 | } |
1303 | |
1304 | static errno_t |
1305 | ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1306 | kern_channel_ring_t rx_ring, uint32_t flags) |
1307 | { |
1308 | if (__improbable(ipsec_kpipe_mbuf == 1)) { |
1309 | return ipsec_kpipe_sync_rx_mbuf(nxprov, nexus, rx_ring, flags); |
1310 | } else { |
1311 | return ipsec_kpipe_sync_rx_packet(nxprov, nexus, rx_ring, flags); |
1312 | } |
1313 | } |
1314 | |
1315 | static uint8_t |
1316 | ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class) |
1317 | { |
1318 | switch (svc_class) { |
1319 | case KPKT_SC_VO: { |
1320 | return 0; |
1321 | } |
1322 | case KPKT_SC_VI: { |
1323 | return 1; |
1324 | } |
1325 | case KPKT_SC_BE: { |
1326 | return 2; |
1327 | } |
1328 | case KPKT_SC_BK: { |
1329 | return 3; |
1330 | } |
1331 | default: { |
1332 | VERIFY(0); |
1333 | return 0; |
1334 | } |
1335 | } |
1336 | } |
1337 | |
1338 | static errno_t |
1339 | ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1340 | kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring, |
1341 | void **ring_ctx) |
1342 | { |
1343 | #pragma unused(nxprov) |
1344 | #pragma unused(channel) |
1345 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
1346 | |
1347 | if (!is_tx_ring) { |
1348 | VERIFY(pcb->ipsec_netif_rxring[0] == NULL); |
1349 | pcb->ipsec_netif_rxring[0] = ring; |
1350 | } else { |
1351 | uint8_t ring_idx = 0; |
1352 | if (ipsec_in_wmm_mode(pcb)) { |
1353 | int err; |
1354 | kern_packet_svc_class_t svc_class; |
1355 | err = kern_channel_get_service_class(ring, svc: &svc_class); |
1356 | VERIFY(err == 0); |
1357 | ring_idx = ipsec_find_tx_ring_by_svc(svc_class); |
1358 | VERIFY(ring_idx < IPSEC_IF_WMM_RING_COUNT); |
1359 | } |
1360 | |
1361 | *ring_ctx = (void *)(uintptr_t)ring_idx; |
1362 | |
1363 | VERIFY(pcb->ipsec_netif_txring[ring_idx] == NULL); |
1364 | pcb->ipsec_netif_txring[ring_idx] = ring; |
1365 | } |
1366 | return 0; |
1367 | } |
1368 | |
1369 | static void |
1370 | ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1371 | kern_channel_ring_t ring) |
1372 | { |
1373 | #pragma unused(nxprov) |
1374 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
1375 | bool found = false; |
1376 | |
1377 | for (int i = 0; i < IPSEC_NETIF_MAX_RX_RING_COUNT; i++) { |
1378 | if (pcb->ipsec_netif_rxring[i] == ring) { |
1379 | pcb->ipsec_netif_rxring[i] = NULL; |
1380 | VERIFY(!found); |
1381 | found = true; |
1382 | } |
1383 | } |
1384 | for (int i = 0; i < IPSEC_NETIF_MAX_TX_RING_COUNT; i++) { |
1385 | if (pcb->ipsec_netif_txring[i] == ring) { |
1386 | pcb->ipsec_netif_txring[i] = NULL; |
1387 | VERIFY(!found); |
1388 | found = true; |
1389 | } |
1390 | } |
1391 | VERIFY(found); |
1392 | } |
1393 | |
1394 | static bool |
1395 | ipsec_netif_check_policy(ifnet_t interface, mbuf_t data) |
1396 | { |
1397 | necp_kernel_policy_result necp_result = 0; |
1398 | necp_kernel_policy_result_parameter necp_result_parameter = {}; |
1399 | uint32_t necp_matched_policy_id = 0; |
1400 | struct ip_out_args args4 = { }; |
1401 | struct ip6_out_args args6 = { }; |
1402 | |
1403 | // This packet has been marked with IP level policy, do not mark again. |
1404 | if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) { |
1405 | return true; |
1406 | } |
1407 | |
1408 | size_t length = mbuf_pkthdr_len(mbuf: data); |
1409 | if (length < sizeof(struct ip)) { |
1410 | return false; |
1411 | } |
1412 | |
1413 | struct ip *ip = mtod(data, struct ip *); |
1414 | u_int ip_version = ip->ip_v; |
1415 | switch (ip_version) { |
1416 | case 4: { |
1417 | if (interface != NULL) { |
1418 | args4.ipoa_flags |= IPOAF_BOUND_IF; |
1419 | args4.ipoa_boundif = interface->if_index; |
1420 | } |
1421 | necp_matched_policy_id = necp_ip_output_find_policy_match(packet: data, IP_OUTARGS, ipoa: &args4, NULL, |
1422 | result: &necp_result, result_parameter: &necp_result_parameter); |
1423 | break; |
1424 | } |
1425 | case 6: { |
1426 | if (interface != NULL) { |
1427 | args6.ip6oa_flags |= IP6OAF_BOUND_IF; |
1428 | args6.ip6oa_boundif = interface->if_index; |
1429 | } |
1430 | necp_matched_policy_id = necp_ip6_output_find_policy_match(packet: data, IPV6_OUTARGS, ip6oa: &args6, NULL, |
1431 | result: &necp_result, result_parameter: &necp_result_parameter); |
1432 | break; |
1433 | } |
1434 | default: { |
1435 | return false; |
1436 | } |
1437 | } |
1438 | |
1439 | if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP || |
1440 | necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) { |
1441 | /* Drop and flow divert packets should be blocked at the IP layer */ |
1442 | return false; |
1443 | } |
1444 | |
1445 | necp_mark_packet_from_ip(packet: data, policy_id: necp_matched_policy_id); |
1446 | return true; |
1447 | } |
1448 | |
1449 | static errno_t |
1450 | ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1451 | kern_channel_ring_t tx_ring, uint32_t flags) |
1452 | { |
1453 | #pragma unused(nxprov) |
1454 | #pragma unused(flags) |
1455 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
1456 | |
1457 | struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats; |
1458 | |
1459 | if (!ipsec_data_move_begin(pcb)) { |
1460 | os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n" , __func__, if_name(pcb->ipsec_ifp)); |
1461 | return 0; |
1462 | } |
1463 | |
1464 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
1465 | |
1466 | struct kern_channel_ring_stat_increment tx_ring_stats; |
1467 | bzero(s: &tx_ring_stats, n: sizeof(tx_ring_stats)); |
1468 | kern_channel_slot_t tx_pslot = NULL; |
1469 | kern_channel_slot_t tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL); |
1470 | kern_packet_t tx_chain_ph = 0; |
1471 | |
1472 | STATS_INC(nifs, NETIF_STATS_TX_SYNC); |
1473 | |
1474 | if (tx_slot == NULL) { |
1475 | // Nothing to write, don't bother signalling |
1476 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1477 | ipsec_data_move_end(pcb); |
1478 | return 0; |
1479 | } |
1480 | |
1481 | if (pcb->ipsec_kpipe_count && |
1482 | ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) { |
1483 | // Select the corresponding kpipe rx ring |
1484 | uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(ring: tx_ring); |
1485 | VERIFY(ring_idx < IPSEC_IF_MAX_RING_COUNT); |
1486 | kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx]; |
1487 | |
1488 | // Unlock while calling notify |
1489 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1490 | |
1491 | // Signal the kernel pipe ring to read |
1492 | if (rx_ring != NULL) { |
1493 | kern_channel_notify(rx_ring, flags: 0); |
1494 | } |
1495 | |
1496 | ipsec_data_move_end(pcb); |
1497 | return 0; |
1498 | } |
1499 | |
1500 | // If we're here, we're injecting into the BSD stack |
1501 | while (tx_slot != NULL) { |
1502 | size_t length = 0; |
1503 | mbuf_t data = NULL; |
1504 | |
1505 | kern_packet_t tx_ph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot); |
1506 | |
1507 | if (tx_ph == 0) { |
1508 | // Advance TX ring |
1509 | tx_pslot = tx_slot; |
1510 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
1511 | continue; |
1512 | } |
1513 | (void) kern_channel_slot_detach_packet(ring: tx_ring, slot: tx_slot, packet: tx_ph); |
1514 | if (tx_chain_ph != 0) { |
1515 | kern_packet_append(tx_ph, tx_chain_ph); |
1516 | } |
1517 | tx_chain_ph = tx_ph; |
1518 | |
1519 | // Advance TX ring |
1520 | tx_pslot = tx_slot; |
1521 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
1522 | |
1523 | kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL); |
1524 | VERIFY(tx_buf != NULL); |
1525 | uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf); |
1526 | VERIFY(tx_baddr != 0); |
1527 | tx_baddr += kern_buflet_get_data_offset(tx_buf); |
1528 | |
1529 | bpf_tap_packet_out(interface: pcb->ipsec_ifp, DLT_RAW, packet: tx_ph, NULL, header_len: 0); |
1530 | |
1531 | length = MIN(kern_packet_get_data_length(tx_ph), |
1532 | pcb->ipsec_slot_size); |
1533 | |
1534 | if (length > 0) { |
1535 | errno_t error = mbuf_gethdr(how: MBUF_DONTWAIT, type: MBUF_TYPE_HEADER, mbuf: &data); |
1536 | if (error == 0) { |
1537 | error = mbuf_copyback(mbuf: data, offset: 0, length, data: tx_baddr, how: MBUF_DONTWAIT); |
1538 | if (error == 0) { |
1539 | // Mark packet from policy |
1540 | uint32_t policy_id = kern_packet_get_policy_id(tx_ph); |
1541 | uint32_t skip_policy_id = kern_packet_get_skip_policy_id(tx_ph); |
1542 | necp_mark_packet_from_ip_with_skip(packet: data, policy_id, skip_policy_id); |
1543 | |
1544 | // Check policy with NECP |
1545 | if (!ipsec_netif_check_policy(interface: pcb->ipsec_ifp, data)) { |
1546 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - failed policy check\n" , pcb->ipsec_ifp->if_xname); |
1547 | STATS_INC(nifs, NETIF_STATS_DROP); |
1548 | mbuf_freem(mbuf: data); |
1549 | data = NULL; |
1550 | } else { |
1551 | // Send through encryption |
1552 | error = ipsec_output(interface: pcb->ipsec_ifp, data); |
1553 | if (error != 0) { |
1554 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - ipsec_output error %d\n" , pcb->ipsec_ifp->if_xname, error); |
1555 | } |
1556 | } |
1557 | } else { |
1558 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n" , pcb->ipsec_ifp->if_xname, length, error); |
1559 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF); |
1560 | STATS_INC(nifs, NETIF_STATS_DROP); |
1561 | mbuf_freem(mbuf: data); |
1562 | data = NULL; |
1563 | } |
1564 | } else { |
1565 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n" , pcb->ipsec_ifp->if_xname, error); |
1566 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF); |
1567 | STATS_INC(nifs, NETIF_STATS_DROP); |
1568 | } |
1569 | } else { |
1570 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - 0 length packet\n" , pcb->ipsec_ifp->if_xname); |
1571 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
1572 | STATS_INC(nifs, NETIF_STATS_DROP); |
1573 | } |
1574 | |
1575 | if (data == NULL) { |
1576 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s: no encrypted packet to send\n" , pcb->ipsec_ifp->if_xname); |
1577 | break; |
1578 | } |
1579 | |
1580 | STATS_INC(nifs, NETIF_STATS_TX_PACKETS); |
1581 | STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF); |
1582 | |
1583 | tx_ring_stats.kcrsi_slots_transferred++; |
1584 | tx_ring_stats.kcrsi_bytes_transferred += length; |
1585 | } |
1586 | |
1587 | if (tx_chain_ph != 0) { |
1588 | kern_pbufpool_free_chain(pbufpool: tx_ring->ckr_pp, chain: tx_chain_ph); |
1589 | } |
1590 | |
1591 | if (tx_pslot) { |
1592 | kern_channel_advance_slot(kring: tx_ring, slot: tx_pslot); |
1593 | kern_channel_increment_ring_net_stats(ring: tx_ring, pcb->ipsec_ifp, stats: &tx_ring_stats); |
1594 | (void)kern_channel_reclaim(tx_ring); |
1595 | } |
1596 | |
1597 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1598 | ipsec_data_move_end(pcb); |
1599 | |
1600 | return 0; |
1601 | } |
1602 | |
1603 | static errno_t |
1604 | ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1605 | kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx) |
1606 | { |
1607 | #pragma unused(nxprov) |
1608 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
1609 | boolean_t more = false; |
1610 | errno_t rc = 0; |
1611 | |
1612 | VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0); |
1613 | |
1614 | /* |
1615 | * Refill and sync the ring; we may be racing against another thread doing |
1616 | * an RX sync that also wants to do kr_enter(), and so use the blocking |
1617 | * variant here. |
1618 | */ |
1619 | rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more); |
1620 | if (rc != 0 && rc != EAGAIN && rc != EBUSY) { |
1621 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n" , __func__, |
1622 | pcb->ipsec_if_xname, ring->ckr_name, rc); |
1623 | } |
1624 | |
1625 | (void) kr_enter(ring, TRUE); |
1626 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
1627 | if (ring != pcb->ipsec_netif_txring[ring_idx]) { |
1628 | // ring no longer valid |
1629 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1630 | kr_exit(ring); |
1631 | os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n" , __func__, |
1632 | pcb->ipsec_if_xname, ring->ckr_name, ring_idx); |
1633 | return ENXIO; |
1634 | } |
1635 | |
1636 | if (pcb->ipsec_kpipe_count) { |
1637 | uint32_t tx_available = kern_channel_available_slot_count(ring); |
1638 | if (pcb->ipsec_netif_txring_size > 0 && |
1639 | tx_available >= pcb->ipsec_netif_txring_size - 1) { |
1640 | // No room left in tx ring, disable output for now |
1641 | errno_t error = ifnet_disable_output(interface: pcb->ipsec_ifp); |
1642 | if (error != 0) { |
1643 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n" , error); |
1644 | } |
1645 | } |
1646 | } |
1647 | |
1648 | if (pcb->ipsec_kpipe_count) { |
1649 | kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx]; |
1650 | |
1651 | // Unlock while calling notify |
1652 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1653 | // Signal the kernel pipe ring to read |
1654 | if (rx_ring != NULL) { |
1655 | kern_channel_notify(rx_ring, flags: 0); |
1656 | } |
1657 | } else { |
1658 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1659 | } |
1660 | |
1661 | kr_exit(ring); |
1662 | |
1663 | return 0; |
1664 | } |
1665 | |
1666 | static errno_t |
1667 | ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1668 | kern_channel_ring_t ring, __unused uint32_t flags) |
1669 | { |
1670 | errno_t ret = 0; |
1671 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
1672 | |
1673 | if (!ipsec_data_move_begin(pcb)) { |
1674 | os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n" , __func__, if_name(pcb->ipsec_ifp)); |
1675 | return 0; |
1676 | } |
1677 | |
1678 | if (ipsec_in_wmm_mode(pcb)) { |
1679 | for (uint8_t i = 0; i < IPSEC_IF_WMM_RING_COUNT; i++) { |
1680 | kern_channel_ring_t nring = pcb->ipsec_netif_txring[i]; |
1681 | ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring: nring, flags, ring_idx: i); |
1682 | if (ret) { |
1683 | break; |
1684 | } |
1685 | } |
1686 | } else { |
1687 | ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring, flags, ring_idx: 0); |
1688 | } |
1689 | |
1690 | ipsec_data_move_end(pcb); |
1691 | return ret; |
1692 | } |
1693 | |
1694 | static errno_t |
1695 | ipsec_netif_sync_rx_mbuf(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
1696 | kern_channel_ring_t rx_ring, uint32_t flags) |
1697 | { |
1698 | #pragma unused(nxprov) |
1699 | #pragma unused(flags) |
1700 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
1701 | struct kern_channel_ring_stat_increment rx_ring_stats; |
1702 | |
1703 | struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats; |
1704 | |
1705 | if (!ipsec_data_move_begin(pcb)) { |
1706 | os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n" , __func__, if_name(pcb->ipsec_ifp)); |
1707 | return 0; |
1708 | } |
1709 | |
1710 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
1711 | |
1712 | // Reclaim user-released slots |
1713 | (void) kern_channel_reclaim(rx_ring); |
1714 | |
1715 | STATS_INC(nifs, NETIF_STATS_RX_SYNC); |
1716 | |
1717 | uint32_t avail = kern_channel_available_slot_count(ring: rx_ring); |
1718 | if (avail == 0) { |
1719 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1720 | ipsec_data_move_end(pcb); |
1721 | return 0; |
1722 | } |
1723 | |
1724 | struct kern_pbufpool *rx_pp = rx_ring->ckr_pp; |
1725 | VERIFY(rx_pp != NULL); |
1726 | bzero(s: &rx_ring_stats, n: sizeof(rx_ring_stats)); |
1727 | kern_channel_slot_t rx_pslot = NULL; |
1728 | kern_channel_slot_t rx_slot = kern_channel_get_next_slot(kring: rx_ring, NULL, NULL); |
1729 | |
1730 | while (rx_slot != NULL) { |
1731 | // Check for a waiting packet |
1732 | lck_mtx_lock(lck: &pcb->ipsec_input_chain_lock); |
1733 | mbuf_t data = pcb->ipsec_input_chain; |
1734 | if (data == NULL) { |
1735 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
1736 | break; |
1737 | } |
1738 | |
1739 | // Allocate rx packet |
1740 | kern_packet_t rx_ph = 0; |
1741 | errno_t error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph); |
1742 | if (__improbable(error != 0)) { |
1743 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT); |
1744 | STATS_INC(nifs, NETIF_STATS_DROP); |
1745 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
1746 | break; |
1747 | } |
1748 | |
1749 | // Advance waiting packets |
1750 | if (pcb->ipsec_input_chain_count > 0) { |
1751 | pcb->ipsec_input_chain_count--; |
1752 | } |
1753 | pcb->ipsec_input_chain = data->m_nextpkt; |
1754 | data->m_nextpkt = NULL; |
1755 | if (pcb->ipsec_input_chain == NULL) { |
1756 | pcb->ipsec_input_chain_last = NULL; |
1757 | } |
1758 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
1759 | |
1760 | size_t length = mbuf_pkthdr_len(mbuf: data); |
1761 | |
1762 | if (length < sizeof(struct ip)) { |
1763 | // Flush data |
1764 | mbuf_freem(mbuf: data); |
1765 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
1766 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
1767 | STATS_INC(nifs, NETIF_STATS_DROP); |
1768 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n" , |
1769 | pcb->ipsec_ifp->if_xname, length, sizeof(struct ip)); |
1770 | continue; |
1771 | } |
1772 | |
1773 | uint32_t af = 0; |
1774 | struct ip *ip = mtod(data, struct ip *); |
1775 | u_int ip_version = ip->ip_v; |
1776 | switch (ip_version) { |
1777 | case 4: { |
1778 | af = AF_INET; |
1779 | break; |
1780 | } |
1781 | case 6: { |
1782 | af = AF_INET6; |
1783 | break; |
1784 | } |
1785 | default: { |
1786 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n" , |
1787 | pcb->ipsec_ifp->if_xname, ip_version); |
1788 | break; |
1789 | } |
1790 | } |
1791 | |
1792 | if (length > PP_BUF_SIZE_DEF(rx_pp) || |
1793 | (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) { |
1794 | // We need to fragment to send up into the netif |
1795 | |
1796 | u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp); |
1797 | if (pcb->ipsec_frag_size_set && |
1798 | pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) { |
1799 | fragment_mtu = pcb->ipsec_input_frag_size; |
1800 | } |
1801 | |
1802 | mbuf_t fragment_chain = NULL; |
1803 | switch (af) { |
1804 | case AF_INET: { |
1805 | // ip_fragment expects the length in host order |
1806 | ip->ip_len = ntohs(ip->ip_len); |
1807 | |
1808 | // ip_fragment will modify the original data, don't free |
1809 | int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE); |
1810 | if (fragment_error == 0 && data != NULL) { |
1811 | fragment_chain = data; |
1812 | } else { |
1813 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
1814 | STATS_INC(nifs, NETIF_STATS_DROP); |
1815 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n" , |
1816 | pcb->ipsec_ifp->if_xname, length, fragment_error); |
1817 | } |
1818 | break; |
1819 | } |
1820 | case AF_INET6: { |
1821 | if (length < sizeof(struct ip6_hdr)) { |
1822 | mbuf_freem(mbuf: data); |
1823 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
1824 | STATS_INC(nifs, NETIF_STATS_DROP); |
1825 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n" , |
1826 | pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr)); |
1827 | } else { |
1828 | // ip6_do_fragmentation will free the original data on success only |
1829 | struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *); |
1830 | |
1831 | int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr), |
1832 | ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid((uint64_t)data))); |
1833 | if (fragment_error == 0 && data != NULL) { |
1834 | fragment_chain = data; |
1835 | } else { |
1836 | mbuf_freem(mbuf: data); |
1837 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
1838 | STATS_INC(nifs, NETIF_STATS_DROP); |
1839 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n" , |
1840 | pcb->ipsec_ifp->if_xname, length, fragment_error); |
1841 | } |
1842 | } |
1843 | break; |
1844 | } |
1845 | default: { |
1846 | // Cannot fragment unknown families |
1847 | mbuf_freem(mbuf: data); |
1848 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
1849 | STATS_INC(nifs, NETIF_STATS_DROP); |
1850 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n" , |
1851 | pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp)); |
1852 | break; |
1853 | } |
1854 | } |
1855 | |
1856 | if (fragment_chain != NULL) { |
1857 | // Add fragments to chain before continuing |
1858 | lck_mtx_lock(lck: &pcb->ipsec_input_chain_lock); |
1859 | if (pcb->ipsec_input_chain != NULL) { |
1860 | pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain; |
1861 | } else { |
1862 | pcb->ipsec_input_chain = fragment_chain; |
1863 | } |
1864 | pcb->ipsec_input_chain_count++; |
1865 | while (fragment_chain->m_nextpkt) { |
1866 | VERIFY(fragment_chain != fragment_chain->m_nextpkt); |
1867 | fragment_chain = fragment_chain->m_nextpkt; |
1868 | pcb->ipsec_input_chain_count++; |
1869 | } |
1870 | pcb->ipsec_input_chain_last = fragment_chain; |
1871 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
1872 | } |
1873 | |
1874 | // Make sure to free unused rx packet |
1875 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
1876 | |
1877 | continue; |
1878 | } |
1879 | |
1880 | mbuf_pkthdr_setrcvif(mbuf: data, ifp: pcb->ipsec_ifp); |
1881 | |
1882 | // Fillout rx packet |
1883 | kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL); |
1884 | VERIFY(rx_buf != NULL); |
1885 | void *rx_baddr = kern_buflet_get_data_address(rx_buf); |
1886 | VERIFY(rx_baddr != NULL); |
1887 | |
1888 | // Copy-in data from mbuf to buflet |
1889 | mbuf_copydata(mbuf: data, offset: 0, length, out_data: (void *)rx_baddr); |
1890 | kern_packet_clear_flow_uuid(rx_ph); // Zero flow id |
1891 | |
1892 | // Finalize and attach the packet |
1893 | error = kern_buflet_set_data_offset(rx_buf, 0); |
1894 | VERIFY(error == 0); |
1895 | error = kern_buflet_set_data_length(rx_buf, (uint16_t)length); |
1896 | VERIFY(error == 0); |
1897 | error = kern_packet_set_headroom(rx_ph, 0); |
1898 | VERIFY(error == 0); |
1899 | error = kern_packet_finalize(rx_ph); |
1900 | VERIFY(error == 0); |
1901 | error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph); |
1902 | VERIFY(error == 0); |
1903 | |
1904 | STATS_INC(nifs, NETIF_STATS_RX_PACKETS); |
1905 | STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF); |
1906 | bpf_tap_packet_in(interface: pcb->ipsec_ifp, DLT_RAW, packet: rx_ph, NULL, header_len: 0); |
1907 | |
1908 | rx_ring_stats.kcrsi_slots_transferred++; |
1909 | rx_ring_stats.kcrsi_bytes_transferred += length; |
1910 | |
1911 | if (!pcb->ipsec_ext_ifdata_stats) { |
1912 | ifnet_stat_increment_in(interface: pcb->ipsec_ifp, packets_in: 1, bytes_in: (uint16_t)length, errors_in: 0); |
1913 | } |
1914 | |
1915 | mbuf_freem(mbuf: data); |
1916 | |
1917 | // Advance ring |
1918 | rx_pslot = rx_slot; |
1919 | rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL); |
1920 | } |
1921 | |
1922 | for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) { |
1923 | struct kern_channel_ring_stat_increment tx_ring_stats; |
1924 | bzero(s: &tx_ring_stats, n: sizeof(tx_ring_stats)); |
1925 | kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx]; |
1926 | kern_channel_slot_t tx_pslot = NULL; |
1927 | kern_channel_slot_t tx_slot = NULL; |
1928 | if (tx_ring == NULL) { |
1929 | // Net-If TX ring not set up yet, nothing to read |
1930 | goto done; |
1931 | } |
1932 | // Unlock ipsec before entering ring |
1933 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
1934 | |
1935 | (void)kr_enter(tx_ring, TRUE); |
1936 | |
1937 | // Lock again after entering and validate |
1938 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
1939 | |
1940 | if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) { |
1941 | goto done; |
1942 | } |
1943 | |
1944 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL); |
1945 | if (tx_slot == NULL) { |
1946 | // Nothing to read, don't bother signalling |
1947 | goto done; |
1948 | } |
1949 | |
1950 | while (rx_slot != NULL && tx_slot != NULL) { |
1951 | size_t length = 0; |
1952 | mbuf_t data = NULL; |
1953 | errno_t error = 0; |
1954 | uint32_t af; |
1955 | |
1956 | // Allocate rx packet |
1957 | kern_packet_t rx_ph = 0; |
1958 | error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph); |
1959 | if (__improbable(error != 0)) { |
1960 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT); |
1961 | STATS_INC(nifs, NETIF_STATS_DROP); |
1962 | break; |
1963 | } |
1964 | |
1965 | kern_packet_t tx_ph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot); |
1966 | |
1967 | // Advance TX ring |
1968 | tx_pslot = tx_slot; |
1969 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
1970 | |
1971 | if (tx_ph == 0) { |
1972 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
1973 | continue; |
1974 | } |
1975 | |
1976 | kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL); |
1977 | VERIFY(tx_buf != NULL); |
1978 | uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf); |
1979 | VERIFY(tx_baddr != 0); |
1980 | tx_baddr += kern_buflet_get_data_offset(tx_buf); |
1981 | |
1982 | length = MIN(kern_packet_get_data_length(tx_ph), |
1983 | pcb->ipsec_slot_size); |
1984 | |
1985 | // Increment TX stats |
1986 | tx_ring_stats.kcrsi_slots_transferred++; |
1987 | tx_ring_stats.kcrsi_bytes_transferred += length; |
1988 | |
1989 | if (length >= sizeof(struct ip)) { |
1990 | error = mbuf_gethdr(how: MBUF_DONTWAIT, type: MBUF_TYPE_HEADER, mbuf: &data); |
1991 | if (error == 0) { |
1992 | error = mbuf_copyback(mbuf: data, offset: 0, length, data: tx_baddr, how: MBUF_DONTWAIT); |
1993 | if (error == 0) { |
1994 | // Check for wake packet flag |
1995 | uuid_t flow_uuid; |
1996 | kern_packet_get_flow_uuid(tx_ph, &flow_uuid); |
1997 | u_int8_t *id_8 = (u_int8_t *)(uintptr_t)flow_uuid; |
1998 | if ((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT) { |
1999 | os_log_info(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: wake packet flag is set\n" , |
2000 | pcb->ipsec_ifp->if_xname); |
2001 | data->m_pkthdr.pkt_flags |= PKTF_WAKE_PKT; |
2002 | } |
2003 | |
2004 | lck_mtx_lock(lck: &pcb->ipsec_kpipe_decrypt_lock); |
2005 | struct ip *ip = mtod(data, struct ip *); |
2006 | u_int ip_version = ip->ip_v; |
2007 | switch (ip_version) { |
2008 | case 4: { |
2009 | af = AF_INET; |
2010 | ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip); |
2011 | ip->ip_off = ntohs(ip->ip_off); |
2012 | |
2013 | if (length < ip->ip_len) { |
2014 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n" , |
2015 | pcb->ipsec_ifp->if_xname, length, ip->ip_len); |
2016 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2017 | STATS_INC(nifs, NETIF_STATS_DROP); |
2018 | mbuf_freem(mbuf: data); |
2019 | data = NULL; |
2020 | } else { |
2021 | data = esp4_input_extended(data, off: sizeof(struct ip), interface: pcb->ipsec_ifp); |
2022 | } |
2023 | break; |
2024 | } |
2025 | case 6: { |
2026 | if (length < sizeof(struct ip6_hdr)) { |
2027 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n" , |
2028 | pcb->ipsec_ifp->if_xname, length); |
2029 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2030 | STATS_INC(nifs, NETIF_STATS_DROP); |
2031 | mbuf_freem(mbuf: data); |
2032 | data = NULL; |
2033 | } else { |
2034 | af = AF_INET6; |
2035 | struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *); |
2036 | const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen); |
2037 | if (length < ip6_len) { |
2038 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n" , |
2039 | pcb->ipsec_ifp->if_xname, length, ip6_len); |
2040 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2041 | STATS_INC(nifs, NETIF_STATS_DROP); |
2042 | mbuf_freem(mbuf: data); |
2043 | data = NULL; |
2044 | } else { |
2045 | int offset = sizeof(struct ip6_hdr); |
2046 | esp6_input_extended(mp: &data, offp: &offset, proto: ip6->ip6_nxt, interface: pcb->ipsec_ifp); |
2047 | } |
2048 | } |
2049 | break; |
2050 | } |
2051 | default: { |
2052 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: unknown ip version %u\n" , |
2053 | pcb->ipsec_ifp->if_xname, ip_version); |
2054 | STATS_INC(nifs, NETIF_STATS_DROP); |
2055 | mbuf_freem(mbuf: data); |
2056 | data = NULL; |
2057 | break; |
2058 | } |
2059 | } |
2060 | lck_mtx_unlock(lck: &pcb->ipsec_kpipe_decrypt_lock); |
2061 | } else { |
2062 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n" , pcb->ipsec_ifp->if_xname, length, error); |
2063 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF); |
2064 | STATS_INC(nifs, NETIF_STATS_DROP); |
2065 | mbuf_freem(mbuf: data); |
2066 | data = NULL; |
2067 | } |
2068 | } else { |
2069 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n" , pcb->ipsec_ifp->if_xname, error); |
2070 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF); |
2071 | STATS_INC(nifs, NETIF_STATS_DROP); |
2072 | } |
2073 | } else { |
2074 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - bad packet length %zu\n" , pcb->ipsec_ifp->if_xname, length); |
2075 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2076 | STATS_INC(nifs, NETIF_STATS_DROP); |
2077 | } |
2078 | |
2079 | if (data == NULL) { |
2080 | // Failed to get decrypted data data |
2081 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
2082 | continue; |
2083 | } |
2084 | |
2085 | length = mbuf_pkthdr_len(mbuf: data); |
2086 | if (length > PP_BUF_SIZE_DEF(rx_pp)) { |
2087 | // Flush data |
2088 | mbuf_freem(mbuf: data); |
2089 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
2090 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2091 | STATS_INC(nifs, NETIF_STATS_DROP); |
2092 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n" , |
2093 | pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp)); |
2094 | continue; |
2095 | } |
2096 | |
2097 | mbuf_pkthdr_setrcvif(mbuf: data, ifp: pcb->ipsec_ifp); |
2098 | |
2099 | // Fillout rx packet |
2100 | kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL); |
2101 | VERIFY(rx_buf != NULL); |
2102 | void *rx_baddr = kern_buflet_get_data_address(rx_buf); |
2103 | VERIFY(rx_baddr != NULL); |
2104 | |
2105 | // Copy-in data from mbuf to buflet |
2106 | mbuf_copydata(mbuf: data, offset: 0, length, out_data: (void *)rx_baddr); |
2107 | kern_packet_clear_flow_uuid(rx_ph); // Zero flow id |
2108 | |
2109 | // Finalize and attach the packet |
2110 | error = kern_buflet_set_data_offset(rx_buf, 0); |
2111 | VERIFY(error == 0); |
2112 | error = kern_buflet_set_data_length(rx_buf, (uint16_t)length); |
2113 | VERIFY(error == 0); |
2114 | error = kern_packet_set_link_header_offset(rx_ph, 0); |
2115 | VERIFY(error == 0); |
2116 | error = kern_packet_set_network_header_offset(rx_ph, 0); |
2117 | VERIFY(error == 0); |
2118 | error = kern_packet_finalize(rx_ph); |
2119 | VERIFY(error == 0); |
2120 | error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph); |
2121 | VERIFY(error == 0); |
2122 | |
2123 | STATS_INC(nifs, NETIF_STATS_RX_PACKETS); |
2124 | STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT); |
2125 | bpf_tap_packet_in(interface: pcb->ipsec_ifp, DLT_RAW, packet: rx_ph, NULL, header_len: 0); |
2126 | |
2127 | rx_ring_stats.kcrsi_slots_transferred++; |
2128 | rx_ring_stats.kcrsi_bytes_transferred += length; |
2129 | |
2130 | if (!pcb->ipsec_ext_ifdata_stats) { |
2131 | ifnet_stat_increment_in(interface: pcb->ipsec_ifp, packets_in: 1, bytes_in: (uint16_t)length, errors_in: 0); |
2132 | } |
2133 | |
2134 | mbuf_freem(mbuf: data); |
2135 | |
2136 | rx_pslot = rx_slot; |
2137 | rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL); |
2138 | } |
2139 | |
2140 | done: |
2141 | if (tx_pslot) { |
2142 | kern_channel_advance_slot(kring: tx_ring, slot: tx_pslot); |
2143 | kern_channel_increment_ring_net_stats(ring: tx_ring, pcb->ipsec_ifp, stats: &tx_ring_stats); |
2144 | (void)kern_channel_reclaim(tx_ring); |
2145 | } |
2146 | |
2147 | // Unlock first, then exit ring |
2148 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
2149 | if (tx_ring != NULL) { |
2150 | if (tx_pslot != NULL) { |
2151 | kern_channel_notify(tx_ring, flags: 0); |
2152 | } |
2153 | kr_exit(tx_ring); |
2154 | } |
2155 | |
2156 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
2157 | } |
2158 | |
2159 | if (rx_pslot) { |
2160 | kern_channel_advance_slot(kring: rx_ring, slot: rx_pslot); |
2161 | kern_channel_increment_ring_net_stats(ring: rx_ring, pcb->ipsec_ifp, stats: &rx_ring_stats); |
2162 | } |
2163 | |
2164 | |
2165 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
2166 | |
2167 | ipsec_data_move_end(pcb); |
2168 | return 0; |
2169 | } |
2170 | |
2171 | static errno_t |
2172 | ipsec_transform_kpipe_pkt_to_netif_pkt(struct ipsec_pcb *pcb, |
2173 | struct kern_channel_ring_stat_increment *tx_ring_stats, |
2174 | struct netif_stats *nifs, kern_packet_t kpipe_ph, kern_packet_t netif_ph) |
2175 | { |
2176 | kern_buflet_t kpipe_buf = NULL, netif_buf = NULL; |
2177 | uint8_t *kpipe_baddr = NULL, *netif_baddr = NULL; |
2178 | uuid_t flow_uuid; |
2179 | size_t iphlen = 0; |
2180 | uint32_t kpipe_buf_len = 0, netif_buf_lim = 0; |
2181 | int err = 0; |
2182 | |
2183 | VERIFY(kpipe_ph != 0); |
2184 | VERIFY(netif_ph != 0); |
2185 | VERIFY(pcb != NULL); |
2186 | VERIFY(tx_ring_stats != NULL); |
2187 | VERIFY(nifs != NULL); |
2188 | |
2189 | kpipe_buf = kern_packet_get_next_buflet(kpipe_ph, NULL); |
2190 | VERIFY(kpipe_buf != NULL); |
2191 | kpipe_baddr = kern_buflet_get_data_address(kpipe_buf); |
2192 | VERIFY(kpipe_baddr != NULL); |
2193 | kpipe_baddr += kern_buflet_get_data_offset(kpipe_buf); |
2194 | kpipe_buf_len = kern_buflet_get_data_length(kpipe_buf); |
2195 | |
2196 | netif_buf = kern_packet_get_next_buflet(netif_ph, NULL); |
2197 | VERIFY(netif_buf != NULL); |
2198 | netif_baddr = kern_buflet_get_data_address(netif_buf); |
2199 | VERIFY(netif_baddr != NULL); |
2200 | netif_baddr += kern_buflet_get_data_offset(netif_buf); |
2201 | netif_buf_lim = __buflet_get_data_limit(buf: netif_buf); |
2202 | netif_buf_lim -= __buflet_get_data_offset(buf: netif_buf); |
2203 | |
2204 | if (kpipe_buf_len > pcb->ipsec_slot_size) { |
2205 | os_log_info(OS_LOG_DEFAULT, |
2206 | "ipsec_transform_kpipe_pkt_to_netif_pkt %s: kpipe buffer length " |
2207 | "%u > pcb ipsec slot size %u" , pcb->ipsec_ifp->if_xname, |
2208 | kpipe_buf_len, pcb->ipsec_slot_size); |
2209 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2210 | err = EMSGSIZE; |
2211 | goto bad; |
2212 | } |
2213 | |
2214 | tx_ring_stats->kcrsi_slots_transferred++; |
2215 | tx_ring_stats->kcrsi_bytes_transferred += kpipe_buf_len; |
2216 | |
2217 | if (__improbable(kpipe_buf_len < sizeof(struct ip))) { |
2218 | os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad " |
2219 | "packet length %u\n" , pcb->ipsec_ifp->if_xname, kpipe_buf_len); |
2220 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2221 | err = EBADMSG; |
2222 | goto bad; |
2223 | } |
2224 | |
2225 | struct ip *ip = (struct ip *)(void *)kpipe_baddr; |
2226 | ASSERT(IP_HDR_ALIGNED_P(ip)); |
2227 | |
2228 | u_int ip_vers = ip->ip_v; |
2229 | switch (ip_vers) { |
2230 | case IPVERSION: { |
2231 | #ifdef _IP_VHL |
2232 | iphlen = IP_VHL_HL(ip->ip_vhl) << 2; |
2233 | #else /* _IP_VHL */ |
2234 | iphlen = ip->ip_hl << 2; |
2235 | #endif /* _IP_VHL */ |
2236 | break; |
2237 | } |
2238 | case 6: { |
2239 | iphlen = sizeof(struct ip6_hdr); |
2240 | break; |
2241 | } |
2242 | default: { |
2243 | os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad " |
2244 | "ip version %u\n" , pcb->ipsec_ifp->if_xname, ip_vers); |
2245 | err = EBADMSG; |
2246 | goto bad; |
2247 | } |
2248 | } |
2249 | |
2250 | if (__improbable(kpipe_buf_len < iphlen)) { |
2251 | os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad " |
2252 | "packet length %u\n" , pcb->ipsec_ifp->if_xname, kpipe_buf_len); |
2253 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2254 | err = EBADMSG; |
2255 | goto bad; |
2256 | } |
2257 | |
2258 | if (__improbable(netif_buf_lim < iphlen)) { |
2259 | os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - netif " |
2260 | "buffer length %u too short\n" , pcb->ipsec_ifp->if_xname, netif_buf_lim); |
2261 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2262 | err = EBADMSG; |
2263 | goto bad; |
2264 | } |
2265 | |
2266 | memcpy(dst: netif_baddr, src: kpipe_baddr, n: iphlen); |
2267 | __buflet_set_data_length(buf: netif_buf, dlen: (uint16_t)iphlen); |
2268 | |
2269 | lck_mtx_lock(lck: &pcb->ipsec_kpipe_decrypt_lock); |
2270 | err = esp_kpipe_input(pcb->ipsec_ifp, kpipe_ph, netif_ph); |
2271 | lck_mtx_unlock(lck: &pcb->ipsec_kpipe_decrypt_lock); |
2272 | |
2273 | if (__improbable((err != 0))) { |
2274 | goto bad; |
2275 | } |
2276 | |
2277 | kern_packet_get_flow_uuid(kpipe_ph, &flow_uuid); |
2278 | uint8_t *id_8 = (uint8_t *)(uintptr_t)flow_uuid; |
2279 | if (__improbable((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT)) { |
2280 | os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s: wake packet " |
2281 | "flag is set\n" , pcb->ipsec_ifp->if_xname); |
2282 | __packet_set_wake_flag(ph: netif_ph); |
2283 | } |
2284 | |
2285 | kern_packet_clear_flow_uuid(netif_ph); |
2286 | err = kern_buflet_set_data_offset(netif_buf, 0); |
2287 | VERIFY(err == 0); |
2288 | err = kern_packet_set_link_header_offset(netif_ph, 0); |
2289 | VERIFY(err == 0); |
2290 | err = kern_packet_set_network_header_offset(netif_ph, 0); |
2291 | VERIFY(err == 0); |
2292 | err = kern_packet_finalize(netif_ph); |
2293 | VERIFY(err == 0); |
2294 | |
2295 | return 0; |
2296 | bad: |
2297 | STATS_INC(nifs, NETIF_STATS_DROP); |
2298 | return err; |
2299 | } |
2300 | |
2301 | |
2302 | static errno_t |
2303 | ipsec_netif_sync_rx_packet(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
2304 | kern_channel_ring_t rx_ring, uint32_t flags) |
2305 | { |
2306 | #pragma unused(nxprov) |
2307 | #pragma unused(flags) |
2308 | struct ipsec_pcb *pcb = kern_nexus_get_context(nexus); |
2309 | struct kern_channel_ring_stat_increment rx_ring_stats; |
2310 | |
2311 | struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats; |
2312 | |
2313 | if (!ipsec_data_move_begin(pcb)) { |
2314 | os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n" , __func__, if_name(pcb->ipsec_ifp)); |
2315 | return 0; |
2316 | } |
2317 | |
2318 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
2319 | |
2320 | // Reclaim user-released slots |
2321 | (void) kern_channel_reclaim(rx_ring); |
2322 | |
2323 | STATS_INC(nifs, NETIF_STATS_RX_SYNC); |
2324 | |
2325 | uint32_t avail = kern_channel_available_slot_count(ring: rx_ring); |
2326 | if (avail == 0) { |
2327 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
2328 | ipsec_data_move_end(pcb); |
2329 | return 0; |
2330 | } |
2331 | |
2332 | struct kern_pbufpool *rx_pp = rx_ring->ckr_pp; |
2333 | VERIFY(rx_pp != NULL); |
2334 | bzero(s: &rx_ring_stats, n: sizeof(rx_ring_stats)); |
2335 | kern_channel_slot_t rx_pslot = NULL; |
2336 | kern_channel_slot_t rx_slot = kern_channel_get_next_slot(kring: rx_ring, NULL, NULL); |
2337 | |
2338 | while (rx_slot != NULL) { |
2339 | // Check for a waiting packet |
2340 | lck_mtx_lock(lck: &pcb->ipsec_input_chain_lock); |
2341 | mbuf_t data = pcb->ipsec_input_chain; |
2342 | if (data == NULL) { |
2343 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
2344 | break; |
2345 | } |
2346 | |
2347 | // Allocate rx packet |
2348 | kern_packet_t rx_ph = 0; |
2349 | errno_t error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph); |
2350 | if (__improbable(error != 0)) { |
2351 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT); |
2352 | STATS_INC(nifs, NETIF_STATS_DROP); |
2353 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
2354 | break; |
2355 | } |
2356 | |
2357 | // Advance waiting packets |
2358 | if (pcb->ipsec_input_chain_count > 0) { |
2359 | pcb->ipsec_input_chain_count--; |
2360 | } |
2361 | pcb->ipsec_input_chain = data->m_nextpkt; |
2362 | data->m_nextpkt = NULL; |
2363 | if (pcb->ipsec_input_chain == NULL) { |
2364 | pcb->ipsec_input_chain_last = NULL; |
2365 | } |
2366 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
2367 | |
2368 | size_t length = mbuf_pkthdr_len(mbuf: data); |
2369 | |
2370 | if (length < sizeof(struct ip)) { |
2371 | // Flush data |
2372 | mbuf_freem(mbuf: data); |
2373 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
2374 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2375 | STATS_INC(nifs, NETIF_STATS_DROP); |
2376 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n" , |
2377 | pcb->ipsec_ifp->if_xname, length, sizeof(struct ip)); |
2378 | continue; |
2379 | } |
2380 | |
2381 | uint32_t af = 0; |
2382 | struct ip *ip = mtod(data, struct ip *); |
2383 | u_int ip_version = ip->ip_v; |
2384 | switch (ip_version) { |
2385 | case 4: { |
2386 | af = AF_INET; |
2387 | break; |
2388 | } |
2389 | case 6: { |
2390 | af = AF_INET6; |
2391 | break; |
2392 | } |
2393 | default: { |
2394 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n" , |
2395 | pcb->ipsec_ifp->if_xname, ip_version); |
2396 | break; |
2397 | } |
2398 | } |
2399 | |
2400 | if (length > PP_BUF_SIZE_DEF(rx_pp) || |
2401 | (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) { |
2402 | // We need to fragment to send up into the netif |
2403 | |
2404 | u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp); |
2405 | if (pcb->ipsec_frag_size_set && |
2406 | pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) { |
2407 | fragment_mtu = pcb->ipsec_input_frag_size; |
2408 | } |
2409 | |
2410 | mbuf_t fragment_chain = NULL; |
2411 | switch (af) { |
2412 | case AF_INET: { |
2413 | // ip_fragment expects the length in host order |
2414 | ip->ip_len = ntohs(ip->ip_len); |
2415 | |
2416 | // ip_fragment will modify the original data, don't free |
2417 | int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE); |
2418 | if (fragment_error == 0 && data != NULL) { |
2419 | fragment_chain = data; |
2420 | } else { |
2421 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2422 | STATS_INC(nifs, NETIF_STATS_DROP); |
2423 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n" , |
2424 | pcb->ipsec_ifp->if_xname, length, fragment_error); |
2425 | } |
2426 | break; |
2427 | } |
2428 | case AF_INET6: { |
2429 | if (length < sizeof(struct ip6_hdr)) { |
2430 | mbuf_freem(mbuf: data); |
2431 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2432 | STATS_INC(nifs, NETIF_STATS_DROP); |
2433 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n" , |
2434 | pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr)); |
2435 | } else { |
2436 | // ip6_do_fragmentation will free the original data on success only |
2437 | struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *); |
2438 | |
2439 | int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr), |
2440 | ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid((uint64_t)data))); |
2441 | if (fragment_error == 0 && data != NULL) { |
2442 | fragment_chain = data; |
2443 | } else { |
2444 | mbuf_freem(mbuf: data); |
2445 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2446 | STATS_INC(nifs, NETIF_STATS_DROP); |
2447 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n" , |
2448 | pcb->ipsec_ifp->if_xname, length, fragment_error); |
2449 | } |
2450 | } |
2451 | break; |
2452 | } |
2453 | default: { |
2454 | // Cannot fragment unknown families |
2455 | mbuf_freem(mbuf: data); |
2456 | STATS_INC(nifs, NETIF_STATS_DROP_BADLEN); |
2457 | STATS_INC(nifs, NETIF_STATS_DROP); |
2458 | os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n" , |
2459 | pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp)); |
2460 | break; |
2461 | } |
2462 | } |
2463 | |
2464 | if (fragment_chain != NULL) { |
2465 | // Add fragments to chain before continuing |
2466 | lck_mtx_lock(lck: &pcb->ipsec_input_chain_lock); |
2467 | if (pcb->ipsec_input_chain != NULL) { |
2468 | pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain; |
2469 | } else { |
2470 | pcb->ipsec_input_chain = fragment_chain; |
2471 | } |
2472 | pcb->ipsec_input_chain_count++; |
2473 | while (fragment_chain->m_nextpkt) { |
2474 | VERIFY(fragment_chain != fragment_chain->m_nextpkt); |
2475 | fragment_chain = fragment_chain->m_nextpkt; |
2476 | pcb->ipsec_input_chain_count++; |
2477 | } |
2478 | pcb->ipsec_input_chain_last = fragment_chain; |
2479 | lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock); |
2480 | } |
2481 | |
2482 | // Make sure to free unused rx packet |
2483 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
2484 | |
2485 | continue; |
2486 | } |
2487 | |
2488 | mbuf_pkthdr_setrcvif(mbuf: data, ifp: pcb->ipsec_ifp); |
2489 | |
2490 | // Fillout rx packet |
2491 | kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL); |
2492 | VERIFY(rx_buf != NULL); |
2493 | void *rx_baddr = kern_buflet_get_data_address(rx_buf); |
2494 | VERIFY(rx_baddr != NULL); |
2495 | |
2496 | // Copy-in data from mbuf to buflet |
2497 | mbuf_copydata(mbuf: data, offset: 0, length, out_data: (void *)rx_baddr); |
2498 | kern_packet_clear_flow_uuid(rx_ph); // Zero flow id |
2499 | |
2500 | // Finalize and attach the packet |
2501 | error = kern_buflet_set_data_offset(rx_buf, 0); |
2502 | VERIFY(error == 0); |
2503 | error = kern_buflet_set_data_length(rx_buf, (uint16_t)length); |
2504 | VERIFY(error == 0); |
2505 | error = kern_packet_set_headroom(rx_ph, 0); |
2506 | VERIFY(error == 0); |
2507 | error = kern_packet_finalize(rx_ph); |
2508 | VERIFY(error == 0); |
2509 | error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph); |
2510 | VERIFY(error == 0); |
2511 | |
2512 | STATS_INC(nifs, NETIF_STATS_RX_PACKETS); |
2513 | STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF); |
2514 | bpf_tap_packet_in(interface: pcb->ipsec_ifp, DLT_RAW, packet: rx_ph, NULL, header_len: 0); |
2515 | |
2516 | rx_ring_stats.kcrsi_slots_transferred++; |
2517 | rx_ring_stats.kcrsi_bytes_transferred += length; |
2518 | |
2519 | if (!pcb->ipsec_ext_ifdata_stats) { |
2520 | ifnet_stat_increment_in(interface: pcb->ipsec_ifp, packets_in: 1, bytes_in: (uint16_t)length, errors_in: 0); |
2521 | } |
2522 | |
2523 | mbuf_freem(mbuf: data); |
2524 | |
2525 | // Advance ring |
2526 | rx_pslot = rx_slot; |
2527 | rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL); |
2528 | } |
2529 | |
2530 | for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) { |
2531 | struct kern_channel_ring_stat_increment tx_ring_stats = {}; |
2532 | kern_channel_slot_t tx_pslot = NULL; |
2533 | kern_channel_slot_t tx_slot = NULL; |
2534 | |
2535 | kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx]; |
2536 | if (tx_ring == NULL) { |
2537 | // Net-If TX ring not set up yet, nothing to read |
2538 | goto done; |
2539 | } |
2540 | |
2541 | // Unlock ipsec before entering ring |
2542 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
2543 | |
2544 | (void)kr_enter(tx_ring, TRUE); |
2545 | |
2546 | // Lock again after entering and validate |
2547 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
2548 | |
2549 | if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) { |
2550 | goto done; |
2551 | } |
2552 | |
2553 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL); |
2554 | if (tx_slot == NULL) { |
2555 | // Nothing to read, don't bother signalling |
2556 | goto done; |
2557 | } |
2558 | |
2559 | while (rx_slot != NULL && tx_slot != NULL) { |
2560 | errno_t error = 0; |
2561 | |
2562 | // Allocate rx packet |
2563 | kern_packet_t rx_ph = 0; |
2564 | error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph); |
2565 | if (__improbable(error != 0)) { |
2566 | STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT); |
2567 | STATS_INC(nifs, NETIF_STATS_DROP); |
2568 | break; |
2569 | } |
2570 | |
2571 | kern_packet_t tx_ph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot); |
2572 | tx_pslot = tx_slot; |
2573 | tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL); |
2574 | if (tx_ph == 0) { |
2575 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
2576 | continue; |
2577 | } |
2578 | |
2579 | error = ipsec_transform_kpipe_pkt_to_netif_pkt(pcb, |
2580 | tx_ring_stats: &tx_ring_stats, nifs, kpipe_ph: tx_ph, netif_ph: rx_ph); |
2581 | if (error != 0) { |
2582 | // Failed to get decrypted packet |
2583 | kern_pbufpool_free(pbufpool: rx_pp, rx_ph); |
2584 | continue; |
2585 | } |
2586 | |
2587 | error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph); |
2588 | VERIFY(error == 0); |
2589 | |
2590 | STATS_INC(nifs, NETIF_STATS_RX_PACKETS); |
2591 | STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT); |
2592 | |
2593 | bpf_tap_packet_in(interface: pcb->ipsec_ifp, DLT_RAW, packet: rx_ph, NULL, header_len: 0); |
2594 | |
2595 | rx_ring_stats.kcrsi_slots_transferred++; |
2596 | rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph); |
2597 | |
2598 | if (!pcb->ipsec_ext_ifdata_stats) { |
2599 | ifnet_stat_increment_in(interface: pcb->ipsec_ifp, packets_in: 1, |
2600 | bytes_in: kern_packet_get_data_length(rx_ph), errors_in: 0); |
2601 | } |
2602 | |
2603 | rx_pslot = rx_slot; |
2604 | rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL); |
2605 | } |
2606 | |
2607 | done: |
2608 | if (tx_pslot) { |
2609 | kern_channel_advance_slot(kring: tx_ring, slot: tx_pslot); |
2610 | kern_channel_increment_ring_net_stats(ring: tx_ring, pcb->ipsec_ifp, stats: &tx_ring_stats); |
2611 | (void)kern_channel_reclaim(tx_ring); |
2612 | } |
2613 | |
2614 | // Unlock first, then exit ring |
2615 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
2616 | if (tx_ring != NULL) { |
2617 | if (tx_pslot != NULL) { |
2618 | kern_channel_notify(tx_ring, flags: 0); |
2619 | } |
2620 | kr_exit(tx_ring); |
2621 | } |
2622 | |
2623 | lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock); |
2624 | } |
2625 | |
2626 | if (rx_pslot) { |
2627 | kern_channel_advance_slot(kring: rx_ring, slot: rx_pslot); |
2628 | kern_channel_increment_ring_net_stats(ring: rx_ring, pcb->ipsec_ifp, stats: &rx_ring_stats); |
2629 | } |
2630 | |
2631 | |
2632 | lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock); |
2633 | |
2634 | ipsec_data_move_end(pcb); |
2635 | return 0; |
2636 | } |
2637 | |
2638 | static errno_t |
2639 | ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus, |
2640 | kern_channel_ring_t rx_ring, uint32_t flags) |
2641 | { |
2642 | if (__improbable(ipsec_kpipe_mbuf == 1)) { |
2643 | return ipsec_netif_sync_rx_mbuf(nxprov, nexus, rx_ring, flags); |
2644 | } else { |
2645 | return ipsec_netif_sync_rx_packet(nxprov, nexus, rx_ring, flags); |
2646 | } |
2647 | } |
2648 | |
2649 | static errno_t |
2650 | ipsec_nexus_ifattach(struct ipsec_pcb *pcb, |
2651 | struct ifnet_init_eparams *init_params, |
2652 | struct ifnet **ifp) |
2653 | { |
2654 | errno_t err; |
2655 | nexus_controller_t controller = kern_nexus_shared_controller(); |
2656 | struct kern_nexus_net_init net_init; |
2657 | struct kern_pbufpool_init pp_init; |
2658 | |
2659 | nexus_name_t provider_name; |
2660 | snprintf((char *)provider_name, count: sizeof(provider_name), |
2661 | "com.apple.netif.%s" , pcb->ipsec_if_xname); |
2662 | |
2663 | struct kern_nexus_provider_init prov_init = { |
2664 | .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION, |
2665 | .nxpi_flags = NXPIF_VIRTUAL_DEVICE, |
2666 | .nxpi_pre_connect = ipsec_nexus_pre_connect, |
2667 | .nxpi_connected = ipsec_nexus_connected, |
2668 | .nxpi_pre_disconnect = ipsec_netif_pre_disconnect, |
2669 | .nxpi_disconnected = ipsec_nexus_disconnected, |
2670 | .nxpi_ring_init = ipsec_netif_ring_init, |
2671 | .nxpi_ring_fini = ipsec_netif_ring_fini, |
2672 | .nxpi_slot_init = NULL, |
2673 | .nxpi_slot_fini = NULL, |
2674 | .nxpi_sync_tx = ipsec_netif_sync_tx, |
2675 | .nxpi_sync_rx = ipsec_netif_sync_rx, |
2676 | .nxpi_tx_doorbell = ipsec_netif_tx_doorbell, |
2677 | }; |
2678 | |
2679 | nexus_attr_t nxa = NULL; |
2680 | err = kern_nexus_attr_create(&nxa); |
2681 | IPSEC_IF_VERIFY(err == 0); |
2682 | if (err != 0) { |
2683 | os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n" , |
2684 | __func__, err); |
2685 | goto failed; |
2686 | } |
2687 | |
2688 | uint64_t slot_buffer_size = pcb->ipsec_slot_size; |
2689 | err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_SLOT_BUF_SIZE, value: slot_buffer_size); |
2690 | VERIFY(err == 0); |
2691 | |
2692 | // Reset ring size for netif nexus to limit memory usage |
2693 | uint64_t ring_size = pcb->ipsec_netif_ring_size; |
2694 | err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_TX_SLOTS, value: ring_size); |
2695 | VERIFY(err == 0); |
2696 | err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_RX_SLOTS, value: ring_size); |
2697 | VERIFY(err == 0); |
2698 | |
2699 | assert(err == 0); |
2700 | |
2701 | if (ipsec_in_wmm_mode(pcb)) { |
2702 | os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n" , |
2703 | __func__, pcb->ipsec_if_xname); |
2704 | |
2705 | init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED; |
2706 | |
2707 | err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_TX_RINGS, |
2708 | IPSEC_NETIF_WMM_TX_RING_COUNT); |
2709 | VERIFY(err == 0); |
2710 | err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_RX_RINGS, |
2711 | IPSEC_NETIF_WMM_RX_RING_COUNT); |
2712 | VERIFY(err == 0); |
2713 | |
2714 | err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_QMAP, value: NEXUS_QMAP_TYPE_WMM); |
2715 | VERIFY(err == 0); |
2716 | } |
2717 | |
2718 | pcb->ipsec_netif_txring_size = ring_size; |
2719 | |
2720 | bzero(s: &pp_init, n: sizeof(pp_init)); |
2721 | pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION; |
2722 | pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE; |
2723 | // Note: we need more packets than can be held in the tx and rx rings because |
2724 | // packets can also be in the AQM queue(s) |
2725 | pp_init.kbi_packets = pcb->ipsec_netif_ring_size * (2 * pcb->ipsec_kpipe_count + 1); |
2726 | pp_init.kbi_bufsize = pcb->ipsec_slot_size; |
2727 | pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE; |
2728 | pp_init.kbi_max_frags = 1; |
2729 | (void) snprintf((char *)pp_init.kbi_name, count: sizeof(pp_init.kbi_name), |
2730 | "%s" , provider_name); |
2731 | pp_init.kbi_ctx = NULL; |
2732 | pp_init.kbi_ctx_retain = NULL; |
2733 | pp_init.kbi_ctx_release = NULL; |
2734 | |
2735 | err = kern_pbufpool_create(&pp_init, &pcb->ipsec_netif_pp, NULL); |
2736 | if (err != 0) { |
2737 | os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n" , __func__, err); |
2738 | goto failed; |
2739 | } |
2740 | |
2741 | err = kern_nexus_controller_register_provider(ctl: controller, |
2742 | dom_prov_uuid: ipsec_nx_dom_prov, |
2743 | provider_name, |
2744 | init: &prov_init, |
2745 | init_len: sizeof(prov_init), |
2746 | nxa, |
2747 | nx_prov_uuid: &pcb->ipsec_nx.if_provider); |
2748 | IPSEC_IF_VERIFY(err == 0); |
2749 | if (err != 0) { |
2750 | os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n" , |
2751 | __func__, err); |
2752 | goto failed; |
2753 | } |
2754 | |
2755 | bzero(s: &net_init, n: sizeof(net_init)); |
2756 | net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION; |
2757 | net_init.nxneti_flags = 0; |
2758 | net_init.nxneti_eparams = init_params; |
2759 | net_init.nxneti_lladdr = NULL; |
2760 | net_init.nxneti_prepare = ipsec_netif_prepare; |
2761 | net_init.nxneti_rx_pbufpool = pcb->ipsec_netif_pp; |
2762 | net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp; |
2763 | err = kern_nexus_controller_alloc_net_provider_instance(ctl: controller, |
2764 | nx_prov_uuid: pcb->ipsec_nx.if_provider, |
2765 | nexus_context: pcb, |
2766 | NULL, |
2767 | nx_uuid: &pcb->ipsec_nx.if_instance, |
2768 | init: &net_init, |
2769 | ifp); |
2770 | IPSEC_IF_VERIFY(err == 0); |
2771 | if (err != 0) { |
2772 | os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n" , |
2773 | __func__, err); |
2774 | kern_nexus_controller_deregister_provider(ctl: controller, |
2775 | nx_prov_uuid: pcb->ipsec_nx.if_provider); |
2776 | uuid_clear(uu: pcb->ipsec_nx.if_provider); |
2777 | goto failed; |
2778 | } |
2779 | |
2780 | failed: |
2781 | if (nxa) { |
2782 | kern_nexus_attr_destroy(attr: nxa); |
2783 | } |
2784 | if (err && pcb->ipsec_netif_pp != NULL) { |
2785 | kern_pbufpool_destroy(pcb->ipsec_netif_pp); |
2786 | pcb->ipsec_netif_pp = NULL; |
2787 | } |
2788 | return err; |
2789 | } |
2790 | |
2791 | static void |
2792 | ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance) |
2793 | { |
2794 | nexus_controller_t controller = kern_nexus_shared_controller(); |
2795 | errno_t err; |
2796 | |
2797 | if (!uuid_is_null(uu: instance)) { |
2798 | err = kern_nexus_controller_free_provider_instance(ctl: controller, |
2799 | nx_uuid: instance); |
2800 | if (err != 0) { |
2801 | os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n" , |
2802 | __func__, err); |
2803 | } |
2804 | uuid_clear(uu: instance); |
2805 | } |
2806 | if (!uuid_is_null(uu: provider)) { |
2807 | err = kern_nexus_controller_deregister_provider(ctl: controller, |
2808 | nx_prov_uuid: provider); |
2809 | if (err != 0) { |
2810 | os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n" , __func__, err); |
2811 | } |
2812 | uuid_clear(uu: provider); |
2813 | } |
2814 | return; |
2815 | } |
2816 | |
2817 | static void |
2818 | ipsec_nexus_detach(struct ipsec_pcb *pcb) |
2819 | { |
2820 | ipsec_nx_t nx = &pcb->ipsec_nx; |
2821 | nexus_controller_t controller = kern_nexus_shared_controller(); |
2822 | errno_t err; |
2823 | |
2824 | if (!uuid_is_null(uu: nx->fsw_device)) { |
2825 | err = kern_nexus_ifdetach(ctl: controller, |
2826 | nx_uuid: nx->fsw_instance, |
2827 | nx_if_uuid: nx->fsw_device); |
2828 | if (err != 0) { |
2829 | os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n" , |
2830 | __func__, err); |
2831 | } |
2832 | } |
2833 | |
2834 | ipsec_detach_provider_and_instance(provider: nx->fsw_provider, |
2835 | instance: nx->fsw_instance); |
2836 | ipsec_detach_provider_and_instance(provider: nx->if_provider, |
2837 | instance: nx->if_instance); |
2838 | |
2839 | if (pcb->ipsec_netif_pp != NULL) { |
2840 | kern_pbufpool_destroy(pcb->ipsec_netif_pp); |
2841 | pcb->ipsec_netif_pp = NULL; |
2842 | } |
2843 | memset(s: nx, c: 0, n: sizeof(*nx)); |
2844 | } |
2845 | |
2846 | static errno_t |
2847 | ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb, |
2848 | const char *type_name, |
2849 | const char *ifname, |
2850 | uuid_t *provider, uuid_t *instance) |
2851 | { |
2852 | nexus_attr_t attr = NULL; |
2853 | nexus_controller_t controller = kern_nexus_shared_controller(); |
2854 | uuid_t dom_prov; |
2855 | errno_t err; |
2856 | struct kern_nexus_init init; |
2857 | nexus_name_t provider_name; |
2858 | |
2859 | err = kern_nexus_get_default_domain_provider(type: NEXUS_TYPE_FLOW_SWITCH, |
2860 | dom_prov_uuid: &dom_prov); |
2861 | IPSEC_IF_VERIFY(err == 0); |
2862 | if (err != 0) { |
2863 | os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n" , |
2864 | __func__, type_name, err); |
2865 | goto failed; |
2866 | } |
2867 | |
2868 | err = kern_nexus_attr_create(&attr); |
2869 | IPSEC_IF_VERIFY(err == 0); |
2870 | if (err != 0) { |
2871 | os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n" , |
2872 | __func__, err); |
2873 | goto failed; |
2874 | } |
2875 | |
2876 | uint64_t slot_buffer_size = pcb->ipsec_slot_size; |
2877 | err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_SLOT_BUF_SIZE, value: slot_buffer_size); |
2878 | VERIFY(err == 0); |
2879 | |
2880 | // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif. |
2881 | uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size; |
2882 | err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_TX_SLOTS, value: tx_ring_size); |
2883 | VERIFY(err == 0); |
2884 | uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size; |
2885 | err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_RX_SLOTS, value: rx_ring_size); |
2886 | VERIFY(err == 0); |
2887 | /* |
2888 | * Configure flowswitch to use super-packet (multi-buflet). |
2889 | * This allows flowswitch to perform intra-stack packet aggregation. |
2890 | */ |
2891 | err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_MAX_FRAGS, |
2892 | NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1); |
2893 | VERIFY(err == 0); |
2894 | |
2895 | snprintf((char *)provider_name, count: sizeof(provider_name), |
2896 | "com.apple.%s.%s" , type_name, ifname); |
2897 | err = kern_nexus_controller_register_provider(ctl: controller, |
2898 | dom_prov_uuid: dom_prov, |
2899 | provider_name, |
2900 | NULL, |
2901 | init_len: 0, |
2902 | nxa: attr, |
2903 | nx_prov_uuid: provider); |
2904 | kern_nexus_attr_destroy(attr); |
2905 | attr = NULL; |
2906 | IPSEC_IF_VERIFY(err == 0); |
2907 | if (err != 0) { |
2908 | os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n" , |
2909 | __func__, type_name, err); |
2910 | goto failed; |
2911 | } |
2912 | bzero(s: &init, n: sizeof(init)); |
2913 | init.nxi_version = KERN_NEXUS_CURRENT_VERSION; |
2914 | err = kern_nexus_controller_alloc_provider_instance(ctl: controller, |
2915 | nx_prov_uuid: *provider, |
2916 | NULL, NULL, |
2917 | nx_uuid: instance, init: &init); |
2918 | IPSEC_IF_VERIFY(err == 0); |
2919 | if (err != 0) { |
2920 | os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n" , |
2921 | __func__, type_name, err); |
2922 | kern_nexus_controller_deregister_provider(ctl: controller, |
2923 | nx_prov_uuid: *provider); |
2924 | uuid_clear(uu: *provider); |
2925 | } |
2926 | failed: |
2927 | return err; |
2928 | } |
2929 | |
2930 | static errno_t |
2931 | ipsec_flowswitch_attach(struct ipsec_pcb *pcb) |
2932 | { |
2933 | nexus_controller_t controller = kern_nexus_shared_controller(); |
2934 | errno_t err = 0; |
2935 | ipsec_nx_t nx = &pcb->ipsec_nx; |
2936 | |
2937 | // Allocate flowswitch |
2938 | err = ipsec_create_fs_provider_and_instance(pcb, |
2939 | type_name: "flowswitch" , |
2940 | ifname: pcb->ipsec_ifp->if_xname, |
2941 | provider: &nx->fsw_provider, |
2942 | instance: &nx->fsw_instance); |
2943 | if (err != 0) { |
2944 | os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n" , |
2945 | __func__); |
2946 | goto failed; |
2947 | } |
2948 | |
2949 | // Attach flowswitch to device port |
2950 | err = kern_nexus_ifattach(controller, nx_uuid: nx->fsw_instance, |
2951 | NULL, nx_attachee: nx->if_instance, |
2952 | FALSE, nx_if_uuid: &nx->fsw_device); |
2953 | if (err != 0) { |
2954 | os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n" , __func__, err); |
2955 | goto failed; |
2956 | } |
2957 | |
2958 | // Extract the agent UUID and save for later |
2959 | struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false); |
2960 | if (flowswitch_nx != NULL) { |
2961 | struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx); |
2962 | if (flowswitch != NULL) { |
2963 | FSW_RLOCK(flowswitch); |
2964 | uuid_copy(dst: nx->fsw_agent, src: flowswitch->fsw_agent_uuid); |
2965 | FSW_UNLOCK(flowswitch); |
2966 | } else { |
2967 | os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - flowswitch is NULL\n" ); |
2968 | } |
2969 | nx_release(flowswitch_nx); |
2970 | } else { |
2971 | os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n" ); |
2972 | } |
2973 | |
2974 | return 0; |
2975 | |
2976 | failed: |
2977 | ipsec_nexus_detach(pcb); |
2978 | |
2979 | errno_t detach_error = 0; |
2980 | if ((detach_error = ifnet_detach(interface: pcb->ipsec_ifp)) != 0) { |
2981 | panic("ipsec_flowswitch_attach - ifnet_detach failed: %d" , detach_error); |
2982 | /* NOT REACHED */ |
2983 | } |
2984 | |
2985 | return err; |
2986 | } |
2987 | |
2988 | #pragma mark Kernel Pipe Nexus |
2989 | |
2990 | static errno_t |
2991 | ipsec_register_kernel_pipe_nexus(struct ipsec_pcb *pcb) |
2992 | { |
2993 | nexus_attr_t nxa = NULL; |
2994 | errno_t result; |
2995 | |
2996 | lck_mtx_lock(lck: &ipsec_lock); |
2997 | if (ipsec_ncd_refcount++) { |
2998 | lck_mtx_unlock(lck: &ipsec_lock); |
2999 | return 0; |
3000 | } |
3001 | |
3002 | result = kern_nexus_controller_create(ctl: &ipsec_ncd); |
3003 | if (result) { |
3004 | os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n" , |
3005 | __FUNCTION__, result); |
3006 | goto done; |
3007 | } |
3008 | |
3009 | uuid_t dom_prov; |
3010 | result = kern_nexus_get_default_domain_provider( |
3011 | type: NEXUS_TYPE_KERNEL_PIPE, dom_prov_uuid: &dom_prov); |
3012 | if (result) { |
3013 | os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n" , |
3014 | __FUNCTION__, result); |
3015 | goto done; |
3016 | } |
3017 | |
3018 | struct kern_nexus_provider_init prov_init = { |
3019 | .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION, |
3020 | .nxpi_flags = NXPIF_VIRTUAL_DEVICE, |
3021 | .nxpi_pre_connect = ipsec_nexus_pre_connect, |
3022 | .nxpi_connected = ipsec_nexus_connected, |
3023 | .nxpi_pre_disconnect = ipsec_nexus_pre_disconnect, |
3024 | .nxpi_disconnected = ipsec_nexus_disconnected, |
3025 | .nxpi_ring_init = ipsec_kpipe_ring_init, |
3026 | .nxpi_ring_fini = ipsec_kpipe_ring_fini, |
3027 | .nxpi_slot_init = NULL, |
3028 | .nxpi_slot_fini = NULL, |
3029 | .nxpi_sync_tx = ipsec_kpipe_sync_tx, |
3030 | .nxpi_sync_rx = ipsec_kpipe_sync_rx, |
3031 | .nxpi_tx_doorbell = NULL, |
3032 | }; |
3033 | |
3034 | result = kern_nexus_attr_create(&nxa); |
3035 | if (result) { |
3036 | os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n" , |
3037 | __FUNCTION__, result); |
3038 | goto done; |
3039 | } |
3040 | |
3041 | uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE; |
3042 | result = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_SLOT_BUF_SIZE, value: slot_buffer_size); |
3043 | VERIFY(result == 0); |
3044 | |
3045 | // Reset ring size for kernel pipe nexus to limit memory usage |
3046 | // Note: It's better to have less on slots on the kpipe TX ring than the netif |
3047 | // so back pressure is applied at the AQM layer |
3048 | uint64_t ring_size = |
3049 | pcb->ipsec_kpipe_tx_ring_size != 0 ? pcb->ipsec_kpipe_tx_ring_size : |
3050 | pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size : |
3051 | if_ipsec_ring_size; |
3052 | result = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_TX_SLOTS, value: ring_size); |
3053 | VERIFY(result == 0); |
3054 | |
3055 | ring_size = |
3056 | pcb->ipsec_kpipe_rx_ring_size != 0 ? pcb->ipsec_kpipe_rx_ring_size : |
3057 | pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size : |
3058 | if_ipsec_ring_size; |
3059 | result = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_RX_SLOTS, value: ring_size); |
3060 | VERIFY(result == 0); |
3061 | |
3062 | result = kern_nexus_controller_register_provider(ctl: ipsec_ncd, |
3063 | dom_prov_uuid: dom_prov, |
3064 | (const uint8_t *)"com.apple.nexus.ipsec.kpipe" , |
3065 | init: &prov_init, |
3066 | init_len: sizeof(prov_init), |
3067 | nxa, |
3068 | nx_prov_uuid: &ipsec_kpipe_uuid); |
3069 | if (result) { |
3070 | os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n" , |
3071 | __FUNCTION__, result); |
3072 | goto done; |
3073 | } |
3074 | |
3075 | done: |
3076 | if (nxa) { |
3077 | kern_nexus_attr_destroy(attr: nxa); |
3078 | } |
3079 | |
3080 | if (result) { |
3081 | if (ipsec_ncd) { |
3082 | kern_nexus_controller_destroy(ctl: ipsec_ncd); |
3083 | ipsec_ncd = NULL; |
3084 | } |
3085 | ipsec_ncd_refcount = 0; |
3086 | } |
3087 | |
3088 | lck_mtx_unlock(lck: &ipsec_lock); |
3089 | |
3090 | return result; |
3091 | } |
3092 | |
3093 | static void |
3094 | ipsec_unregister_kernel_pipe_nexus(void) |
3095 | { |
3096 | lck_mtx_lock(lck: &ipsec_lock); |
3097 | |
3098 | VERIFY(ipsec_ncd_refcount > 0); |
3099 | |
3100 | if (--ipsec_ncd_refcount == 0) { |
3101 | kern_nexus_controller_destroy(ctl: ipsec_ncd); |
3102 | ipsec_ncd = NULL; |
3103 | } |
3104 | |
3105 | lck_mtx_unlock(lck: &ipsec_lock); |
3106 | } |
3107 | |
3108 | /* This structure only holds onto kpipe channels that need to be |
3109 | * freed in the future, but are cleared from the pcb under lock |
3110 | */ |
3111 | struct ipsec_detached_channels { |
3112 | int count; |
3113 | kern_pbufpool_t pp; |
3114 | uuid_t uuids[IPSEC_IF_MAX_RING_COUNT]; |
3115 | }; |
3116 | |
3117 | static void |
3118 | ipsec_detach_channels(struct ipsec_pcb *pcb, struct ipsec_detached_channels *dc) |
3119 | { |
3120 | LCK_RW_ASSERT(&pcb->ipsec_pcb_lock, LCK_RW_TYPE_EXCLUSIVE); |
3121 | |
3122 | if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) { |
3123 | for (int i = 0; i < IPSEC_IF_MAX_RING_COUNT; i++) { |
3124 | VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i])); |
3125 | } |
3126 | dc->count = 0; |
3127 | return; |
3128 | } |
3129 | |
3130 | dc->count = pcb->ipsec_kpipe_count; |
3131 | |
3132 | VERIFY(dc->count >= 0); |
3133 | VERIFY(dc->count <= IPSEC_IF_MAX_RING_COUNT); |
3134 | |
3135 | for (int i = 0; i < dc->count; i++) { |
3136 | VERIFY(!uuid_is_null(pcb->ipsec_kpipe_uuid[i])); |
3137 | uuid_copy(dst: dc->uuids[i], src: pcb->ipsec_kpipe_uuid[i]); |
3138 | uuid_clear(uu: pcb->ipsec_kpipe_uuid[i]); |
3139 | } |
3140 | for (int i = dc->count; i < IPSEC_IF_MAX_RING_COUNT; i++) { |
3141 | VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i])); |
3142 | } |
3143 | |
3144 | if (dc->count) { |
3145 | VERIFY(pcb->ipsec_kpipe_pp); |
3146 | } else { |
3147 | VERIFY(!pcb->ipsec_kpipe_pp); |
3148 | } |
3149 | |
3150 | dc->pp = pcb->ipsec_kpipe_pp; |
3151 | |
3152 | pcb->ipsec_kpipe_pp = NULL; |
3153 | |
3154 | ipsec_flag_clr(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED); |
3155 | } |
3156 | |
3157 | static void |
3158 | ipsec_free_channels(struct ipsec_detached_channels *dc) |
3159 | { |
3160 | if (!dc->count) { |
3161 | return; |
3162 | } |
3163 | |
3164 | for (int i = 0; i < dc->count; i++) { |
3165 | errno_t result; |
3166 | result = kern_nexus_controller_free_provider_instance(ctl: ipsec_ncd, nx_uuid: dc->uuids[i]); |
3167 | VERIFY(!result); |
3168 | } |
3169 | |
3170 | VERIFY(dc->pp); |
3171 | kern_pbufpool_destroy(dc->pp); |
3172 | |
3173 | ipsec_unregister_kernel_pipe_nexus(); |
3174 | |
3175 | memset(s: dc, c: 0, n: sizeof(*dc)); |
3176 | } |
3177 | |
3178 | static errno_t |
3179 | ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc) |
3180 | { |
3181 | struct kern_nexus_init init; |
3182 | struct kern_pbufpool_init pp_init; |
3183 | errno_t result; |
3184 | |
3185 | kauth_cred_t cred = kauth_cred_get(); |
3186 | result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, flags: 0); |
3187 | if (result) { |
3188 | return result; |
3189 | } |
3190 | |
3191 | VERIFY(pcb->ipsec_kpipe_count); |
3192 | VERIFY(!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)); |
3193 | |
3194 | result = ipsec_register_kernel_pipe_nexus(pcb); |
3195 | |
3196 | lck_rw_lock_exclusive(lck: &pcb->ipsec_pcb_lock); |
3197 | |
3198 | if (result) { |
3199 | os_log_error(OS_LOG_DEFAULT, "%s: %s failed to register kernel pipe nexus\n" , |
3200 | __func__, pcb->ipsec_if_xname); |
3201 | goto done; |
3202 | } |
3203 | |
3204 | VERIFY(ipsec_ncd); |
3205 | |
3206 | bzero(s: &pp_init, n: sizeof(pp_init)); |
3207 | pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION; |
3208 | pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE; |
3209 | // Note: We only needs are many packets as can be held in the tx and rx rings |
3210 | pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2 * pcb->ipsec_kpipe_count; |
3211 | pp_init.kbi_bufsize = pcb->ipsec_slot_size; |
3212 | pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE; |
3213 | pp_init.kbi_max_frags = 1; |
3214 | pp_init.kbi_flags |= KBIF_QUANTUM; |
3215 | (void) snprintf((char *)pp_init.kbi_name, count: sizeof(pp_init.kbi_name), |
3216 | "com.apple.kpipe.%s" , pcb->ipsec_if_xname); |
3217 | pp_init.kbi_ctx = NULL; |
3218 | pp_init.kbi_ctx_retain = NULL; |
3219 | pp_init.kbi_ctx_release = NULL; |
3220 | |
3221 | result = kern_pbufpool_create(&pp_init, &pcb->ipsec_kpipe_pp, |
3222 | NULL); |
3223 | if (result != 0) { |
3224 | os_log_error(OS_LOG_DEFAULT, "%s: %s pbufbool create failed, error %d\n" , |
3225 | __func__, pcb->ipsec_if_xname, result); |
3226 | goto done; |
3227 | } |
3228 | |
3229 | bzero(s: &init, n: sizeof(init)); |
3230 | init.nxi_version = KERN_NEXUS_CURRENT_VERSION; |
3231 | init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp; |
3232 | |
3233 | for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) { |
3234 | VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i])); |
3235 | result = kern_nexus_controller_alloc_provider_instance(ctl: ipsec_ncd, |
3236 | nx_prov_uuid: ipsec_kpipe_uuid, nexus_context: pcb, NULL, nx_uuid: &pcb->ipsec_kpipe_uuid[i], init: &init); |
3237 | |
3238 | if (result == 0) { |
3239 | nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT; |
3240 | const bool has_proc_uuid = !uuid_is_null(uu: pcb->ipsec_kpipe_proc_uuid); |
3241 | pid_t pid = pcb->ipsec_kpipe_pid; |
3242 | if (!pid && !has_proc_uuid) { |
3243 | pid = proc_pid(proc); |
3244 | } |
3245 | result = kern_nexus_controller_bind_provider_instance(ctl: ipsec_ncd, |
3246 | nx_uuid: pcb->ipsec_kpipe_uuid[i], port: &port, |
3247 | pid, exec_uuid: has_proc_uuid ? pcb->ipsec_kpipe_proc_uuid : NULL, NULL, |
3248 | key_len: 0, bind_flags: has_proc_uuid ? NEXUS_BIND_EXEC_UUID:NEXUS_BIND_PID); |
3249 | } |
3250 | |
3251 | if (result) { |
3252 | /* Unwind all of them on error */ |
3253 | for (int j = 0; j < IPSEC_IF_MAX_RING_COUNT; j++) { |
3254 | if (!uuid_is_null(uu: pcb->ipsec_kpipe_uuid[j])) { |
3255 | kern_nexus_controller_free_provider_instance(ctl: ipsec_ncd, |
3256 | nx_uuid: pcb->ipsec_kpipe_uuid[j]); |
3257 | uuid_clear(uu: pcb->ipsec_kpipe_uuid[j]); |
3258 | } |
3259 | } |
3260 | goto done; |
3261 | } |
3262 | } |
3263 | |
3264 | done: |
3265 | lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock); |
3266 | |
3267 | if (result) { |
3268 | if (pcb->ipsec_kpipe_pp != NULL) { |
3269 | kern_pbufpool_destroy(pcb->ipsec_kpipe_pp); |
3270 | pcb->ipsec_kpipe_pp = NULL; |
3271 | } |
3272 | ipsec_unregister_kernel_pipe_nexus(); |
3273 | } else { |
3274 | ipsec_flag_set(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED); |
3275 | } |
3276 | |
3277 | return result; |
3278 | } |
3279 | |
3280 | #endif // IPSEC_NEXUS |
3281 | |
3282 | |
3283 | /* Kernel control functions */ |
3284 | |
3285 | static inline int |
3286 | ipsec_find_by_unit(u_int32_t unit) |
3287 | { |
3288 | struct ipsec_pcb *next_pcb = NULL; |
3289 | int found = 0; |
3290 | |
3291 | TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) { |
3292 | if (next_pcb->ipsec_unit == unit) { |
3293 | found = 1; |
3294 | break; |
3295 | } |
3296 | } |
3297 | |
3298 | return found; |
3299 | } |
3300 | |
3301 | static inline void |
3302 | ipsec_free_pcb(struct ipsec_pcb *pcb, bool locked) |
3303 | { |
3304 | #if IPSEC_NEXUS |
3305 | mbuf_freem_list(mbuf: pcb->ipsec_input_chain); |
3306 | pcb->ipsec_input_chain_count = 0; |
3307 | lck_mtx_destroy(lck: &pcb->ipsec_input_chain_lock, grp: &ipsec_lck_grp); |
3308 | lck_mtx_destroy(lck: &pcb->ipsec_kpipe_encrypt_lock, grp: &ipsec_lck_grp); |
3309 | lck_mtx_destroy(lck: &pcb->ipsec_kpipe_decrypt_lock, grp: &ipsec_lck_grp); |
3310 | #endif // IPSEC_NEXUS |
3311 | lck_mtx_destroy(lck: &pcb->ipsec_pcb_data_move_lock, grp: &ipsec_lck_grp); |
3312 | lck_rw_destroy(lck: &pcb->ipsec_pcb_lock, grp: &ipsec_lck_grp); |
3313 | if (!locked) { |
3314 | lck_mtx_lock(lck: &ipsec_lock); |
3315 | } |
3316 | TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain); |
3317 | if (!locked) { |
3318 | lck_mtx_unlock(lck: &ipsec_lock); |
3319 | } |
3320 | zfree(ipsec_pcb_zone, pcb); |
3321 | } |
3322 | |
3323 | static errno_t |
3324 | ipsec_ctl_setup(u_int32_t *unit, void **unitinfo) |
3325 | { |
3326 | if (unit == NULL || unitinfo == NULL) { |
3327 | return EINVAL; |
3328 | } |
3329 | |
3330 | lck_mtx_lock(lck: &ipsec_lock); |
3331 | |
3332 | /* Find next available unit */ |
3333 | if (*unit == 0) { |
3334 | *unit = 1; |
3335 | while (*unit != ctl_maxunit) { |
3336 | if (ipsec_find_by_unit(unit: *unit)) { |
3337 | (*unit)++; |
3338 | } else { |
3339 | break; |
3340 | } |
3341 | } |
3342 | if (*unit == ctl_maxunit) { |
3343 | lck_mtx_unlock(lck: &ipsec_lock); |
3344 | return EBUSY; |
3345 | } |
3346 | } else if (ipsec_find_by_unit(unit: *unit)) { |
3347 | lck_mtx_unlock(lck: &ipsec_lock); |
3348 | return EBUSY; |
3349 | } |
3350 | |
3351 | /* Find some open interface id */ |
3352 | u_int32_t chosen_unique_id = 1; |
3353 | struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list); |
3354 | if (next_pcb != NULL) { |
3355 | /* List was not empty, add one to the last item */ |
3356 | chosen_unique_id = next_pcb->ipsec_unique_id + 1; |
3357 | next_pcb = NULL; |
3358 | |
3359 | /* |
3360 | * If this wrapped the id number, start looking at |
3361 | * the front of the list for an unused id. |
3362 | */ |
3363 | if (chosen_unique_id == 0) { |
3364 | /* Find the next unused ID */ |
3365 | chosen_unique_id = 1; |
3366 | TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) { |
3367 | if (next_pcb->ipsec_unique_id > chosen_unique_id) { |
3368 | /* We found a gap */ |
3369 | break; |
3370 | } |
3371 | |
3372 | chosen_unique_id = next_pcb->ipsec_unique_id + 1; |
3373 | } |
3374 | } |
3375 | } |
3376 | |
3377 | struct ipsec_pcb *pcb = zalloc_flags(ipsec_pcb_zone, Z_WAITOK | Z_ZERO); |
3378 | |
3379 | *unitinfo = pcb; |
3380 | pcb->ipsec_unit = *unit; |
3381 | pcb->ipsec_unique_id = chosen_unique_id; |
3382 | |
3383 | if (next_pcb != NULL) { |
3384 | TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain); |
3385 | } else { |
3386 | TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain); |
3387 | } |
3388 | |
3389 | lck_mtx_unlock(lck: &ipsec_lock); |
3390 | |
3391 | return 0; |
3392 | } |
3393 | |
3394 | static errno_t |
3395 | ipsec_ctl_bind(kern_ctl_ref kctlref, |
3396 | struct sockaddr_ctl *sac, |
3397 | void **unitinfo) |
3398 | { |
3399 | if (*unitinfo == NULL) { |
3400 | u_int32_t unit = 0; |
3401 | (void)ipsec_ctl_setup(unit: &unit, unitinfo); |
3402 | } |
3403 | |
3404 | struct ipsec_pcb *pcb = (struct ipsec_pcb *)*unitinfo; |
3405 | if (pcb == NULL) { |
3406 | return EINVAL; |
3407 | } |
3408 | |
3409 | if (pcb->ipsec_ctlref != NULL) { |
3410 | // Return if bind was already called |
3411 | return EINVAL; |
3412 | } |
3413 | |
3414 | /* Setup the protocol control block */ |
3415 | pcb->ipsec_ctlref = kctlref; |
3416 | pcb->ipsec_unit = sac->sc_unit; |
3417 | pcb->ipsec_output_service_class = MBUF_SC_OAM; |
3418 | |
3419 | #if IPSEC_NEXUS |
3420 | pcb->ipsec_use_netif = false; |
3421 | pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE; |
3422 | pcb->ipsec_netif_ring_size = if_ipsec_ring_size; |
3423 | pcb->ipsec_tx_fsw_ring_size = if_ipsec_tx_fsw_ring_size; |
3424 | pcb->ipsec_rx_fsw_ring_size = if_ipsec_rx_fsw_ring_size; |
3425 | #endif // IPSEC_NEXUS |
3426 | |
3427 | lck_rw_init(lck: &pcb->ipsec_pcb_lock, grp: &ipsec_lck_grp, attr: &ipsec_lck_attr); |
3428 | lck_mtx_init(lck: &pcb->ipsec_pcb_data_move_lock, grp: &ipsec_lck_grp, attr: &ipsec_lck_attr); |
3429 | #if IPSEC_NEXUS |
3430 | pcb->ipsec_input_chain_count = 0; |
3431 | lck_mtx_init(lck: &pcb->ipsec_input_chain_lock, grp: &ipsec_lck_grp, attr: &ipsec_lck_attr); |
3432 | lck_mtx_init(lck: &pcb->ipsec_kpipe_encrypt_lock, grp: &ipsec_lck_grp, attr: &ipsec_lck_attr); |
3433 | lck_mtx_init(lck: &pcb->ipsec_kpipe_decrypt_lock, grp: &ipsec_lck_grp, attr: &ipsec_lck_attr); |
3434 | #endif // IPSEC_NEXUS |
3435 | |
3436 | return 0; |
3437 | } |
3438 | |
3439 | static errno_t |
3440 | ipsec_ctl_connect(kern_ctl_ref kctlref, |
3441 | struct sockaddr_ctl *sac, |
3442 | void **unitinfo) |
3443 | { |
3444 | struct ifnet_init_eparams ipsec_init = {}; |
3445 | errno_t result = 0; |
3446 | |
3447 | if (*unitinfo == NULL) { |
|
---|