1/*
2 * Copyright (c) 2012-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29
30#include <sys/systm.h>
31#include <sys/kern_control.h>
32#include <net/kpi_protocol.h>
33#include <net/kpi_interface.h>
34#include <sys/socket.h>
35#include <sys/socketvar.h>
36#include <net/if.h>
37#include <net/if_types.h>
38#include <net/bpf.h>
39#include <net/if_ipsec.h>
40#include <sys/mbuf.h>
41#include <sys/sockio.h>
42#include <netinet/in.h>
43#include <netinet/ip6.h>
44#include <netinet6/in6_var.h>
45#include <netinet6/ip6_var.h>
46#include <sys/kauth.h>
47#include <netinet6/ipsec.h>
48#include <netinet6/ipsec6.h>
49#include <netinet6/esp.h>
50#include <netinet6/esp6.h>
51#include <netinet/ip.h>
52#include <net/flowadv.h>
53#include <net/necp.h>
54#include <netkey/key.h>
55#include <net/pktap.h>
56#include <kern/zalloc.h>
57#include <os/log.h>
58
59#if SKYWALK
60#include <skywalk/os_skywalk_private.h>
61#include <skywalk/nexus/flowswitch/nx_flowswitch.h>
62#include <skywalk/nexus/netif/nx_netif.h>
63#define IPSEC_NEXUS 1
64#else // SKYWALK
65#define IPSEC_NEXUS 0
66#endif // SKYWALK
67
68extern int net_qos_policy_restricted;
69extern int net_qos_policy_restrict_avapps;
70
71/* Kernel Control functions */
72static errno_t ipsec_ctl_setup(u_int32_t *unit, void **unitinfo);
73static errno_t ipsec_ctl_bind(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
74 void **unitinfo);
75static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
76 void **unitinfo);
77static errno_t ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
78 void *unitinfo);
79static errno_t ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
80 void *unitinfo, mbuf_t m, int flags);
81static errno_t ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
82 int opt, void *data, size_t *len);
83static errno_t ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo,
84 int opt, void *data, size_t len);
85
86/* Network Interface functions */
87static void ipsec_start(ifnet_t interface);
88static errno_t ipsec_output(ifnet_t interface, mbuf_t data);
89static errno_t ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header,
90 protocol_family_t *protocol);
91static errno_t ipsec_add_proto(ifnet_t interface, protocol_family_t protocol,
92 const struct ifnet_demux_desc *demux_array,
93 u_int32_t demux_count);
94static errno_t ipsec_del_proto(ifnet_t interface, protocol_family_t protocol);
95static errno_t ipsec_ioctl(ifnet_t interface, u_long cmd, void *data);
96static void ipsec_detached(ifnet_t interface);
97
98/* Protocol handlers */
99static errno_t ipsec_attach_proto(ifnet_t interface, protocol_family_t proto);
100static errno_t ipsec_proto_input(ifnet_t interface, protocol_family_t protocol,
101 mbuf_t m, char *frame_header);
102static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol,
103 mbuf_t *packet, const struct sockaddr *dest, void *route,
104 char *frame_type, char *link_layer_dest);
105
106static kern_ctl_ref ipsec_kctlref;
107static LCK_ATTR_DECLARE(ipsec_lck_attr, 0, 0);
108static LCK_GRP_DECLARE(ipsec_lck_grp, "ipsec");
109static LCK_MTX_DECLARE_ATTR(ipsec_lock, &ipsec_lck_grp, &ipsec_lck_attr);
110
111#if IPSEC_NEXUS
112
113SYSCTL_DECL(_net_ipsec);
114SYSCTL_NODE(_net, OID_AUTO, ipsec, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "IPsec");
115static int if_ipsec_verify_interface_creation = 0;
116SYSCTL_INT(_net_ipsec, OID_AUTO, verify_interface_creation, CTLFLAG_RW | CTLFLAG_LOCKED, &if_ipsec_verify_interface_creation, 0, "");
117
118#define IPSEC_IF_VERIFY(_e) if (__improbable(if_ipsec_verify_interface_creation)) { VERIFY(_e); }
119
120#define IPSEC_IF_DEFAULT_SLOT_SIZE 2048
121#define IPSEC_IF_DEFAULT_RING_SIZE 64
122#define IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE 64
123#define IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE 128
124#define IPSEC_IF_DEFAULT_BUF_SEG_SIZE skmem_usr_buf_seg_size
125
126#define IPSEC_IF_WMM_RING_COUNT NEXUS_NUM_WMM_QUEUES
127#define IPSEC_IF_MAX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
128#define IPSEC_NETIF_WMM_TX_RING_COUNT IPSEC_IF_WMM_RING_COUNT
129#define IPSEC_NETIF_WMM_RX_RING_COUNT 1
130#define IPSEC_NETIF_MAX_TX_RING_COUNT IPSEC_NETIF_WMM_TX_RING_COUNT
131#define IPSEC_NETIF_MAX_RX_RING_COUNT IPSEC_NETIF_WMM_RX_RING_COUNT
132
133#define IPSEC_IF_MIN_RING_SIZE 8
134#define IPSEC_IF_MAX_RING_SIZE 1024
135
136#define IPSEC_IF_MIN_SLOT_SIZE 1024
137#define IPSEC_IF_MAX_SLOT_SIZE (16 * 1024)
138
139#define IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT 512
140
141#define IPSEC_KPIPE_FLAG_WAKE_PKT 0x01
142
143static uint32_t ipsec_kpipe_mbuf;
144
145static int if_ipsec_max_pending_input = IPSEC_DEFAULT_MAX_PENDING_INPUT_COUNT;
146
147static int sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS;
148static int sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS;
149static int sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS;
150
151static int if_ipsec_ring_size = IPSEC_IF_DEFAULT_RING_SIZE;
152static int if_ipsec_tx_fsw_ring_size = IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE;
153static int if_ipsec_rx_fsw_ring_size = IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE;
154
155SYSCTL_INT(_net_ipsec, OID_AUTO, max_pending_input, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_max_pending_input, 0, "");
156SYSCTL_PROC(_net_ipsec, OID_AUTO, ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
157 &if_ipsec_ring_size, IPSEC_IF_DEFAULT_RING_SIZE, &sysctl_if_ipsec_ring_size, "I", "");
158SYSCTL_PROC(_net_ipsec, OID_AUTO, tx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
159 &if_ipsec_tx_fsw_ring_size, IPSEC_IF_DEFAULT_TX_FSW_RING_SIZE, &sysctl_if_ipsec_tx_fsw_ring_size, "I", "");
160SYSCTL_PROC(_net_ipsec, OID_AUTO, rx_fsw_ring_size, CTLTYPE_INT | CTLFLAG_LOCKED | CTLFLAG_RW,
161 &if_ipsec_rx_fsw_ring_size, IPSEC_IF_DEFAULT_RX_FSW_RING_SIZE, &sysctl_if_ipsec_rx_fsw_ring_size, "I", "");
162
163static int if_ipsec_debug = 0;
164SYSCTL_INT(_net_ipsec, OID_AUTO, debug, CTLFLAG_LOCKED | CTLFLAG_RW, &if_ipsec_debug, 0, "");
165
166static errno_t
167ipsec_register_nexus(void);
168
169typedef struct ipsec_nx {
170 uuid_t if_provider;
171 uuid_t if_instance;
172 uuid_t fsw_provider;
173 uuid_t fsw_instance;
174 uuid_t fsw_device;
175 uuid_t fsw_agent;
176} *ipsec_nx_t;
177
178static nexus_controller_t ipsec_ncd;
179static int ipsec_ncd_refcount;
180static uuid_t ipsec_kpipe_uuid;
181
182#endif // IPSEC_NEXUS
183
184/* Control block allocated for each kernel control connection */
185struct ipsec_pcb {
186 TAILQ_ENTRY(ipsec_pcb) ipsec_chain;
187 kern_ctl_ref ipsec_ctlref;
188 ifnet_t ipsec_ifp;
189 u_int32_t ipsec_unit;
190 u_int32_t ipsec_unique_id;
191 // These external flags can be set with IPSEC_OPT_FLAGS
192 u_int32_t ipsec_external_flags;
193 // These internal flags are only used within this driver
194 u_int32_t ipsec_internal_flags;
195 u_int32_t ipsec_input_frag_size;
196 bool ipsec_frag_size_set;
197 int ipsec_ext_ifdata_stats;
198 mbuf_svc_class_t ipsec_output_service_class;
199 char ipsec_if_xname[IFXNAMSIZ];
200 char ipsec_unique_name[IFXNAMSIZ];
201 // PCB lock protects state fields, like ipsec_kpipe_count
202 decl_lck_rw_data(, ipsec_pcb_lock);
203 // lock to protect ipsec_pcb_data_move & ipsec_pcb_drainers
204 decl_lck_mtx_data(, ipsec_pcb_data_move_lock);
205 u_int32_t ipsec_pcb_data_move; /* number of data moving contexts */
206 u_int32_t ipsec_pcb_drainers; /* number of threads waiting to drain */
207 u_int32_t ipsec_pcb_data_path_state; /* internal state of interface data path */
208 ipsec_dscp_mapping_t ipsec_output_dscp_mapping;
209
210#if IPSEC_NEXUS
211 lck_mtx_t ipsec_input_chain_lock;
212 lck_mtx_t ipsec_kpipe_encrypt_lock;
213 lck_mtx_t ipsec_kpipe_decrypt_lock;
214 struct mbuf * ipsec_input_chain;
215 struct mbuf * ipsec_input_chain_last;
216 u_int32_t ipsec_input_chain_count;
217 // Input chain lock protects the list of input mbufs
218 // The input chain lock must be taken AFTER the PCB lock if both are held
219 struct ipsec_nx ipsec_nx;
220 u_int32_t ipsec_kpipe_count;
221 pid_t ipsec_kpipe_pid;
222 uuid_t ipsec_kpipe_proc_uuid;
223 uuid_t ipsec_kpipe_uuid[IPSEC_IF_MAX_RING_COUNT];
224 void * ipsec_kpipe_rxring[IPSEC_IF_MAX_RING_COUNT];
225 void * ipsec_kpipe_txring[IPSEC_IF_MAX_RING_COUNT];
226 kern_pbufpool_t ipsec_kpipe_pp;
227 u_int32_t ipsec_kpipe_tx_ring_size;
228 u_int32_t ipsec_kpipe_rx_ring_size;
229
230 kern_nexus_t ipsec_netif_nexus;
231 kern_pbufpool_t ipsec_netif_pp;
232 void * ipsec_netif_rxring[IPSEC_NETIF_MAX_RX_RING_COUNT];
233 void * ipsec_netif_txring[IPSEC_NETIF_MAX_TX_RING_COUNT];
234 uint64_t ipsec_netif_txring_size;
235
236 u_int32_t ipsec_slot_size;
237 u_int32_t ipsec_netif_ring_size;
238 u_int32_t ipsec_tx_fsw_ring_size;
239 u_int32_t ipsec_rx_fsw_ring_size;
240 bool ipsec_use_netif;
241 bool ipsec_needs_netagent;
242#endif // IPSEC_NEXUS
243};
244
245/* These are internal flags not exposed outside this file */
246#define IPSEC_FLAGS_KPIPE_ALLOCATED 1
247
248/* data movement refcounting functions */
249static boolean_t ipsec_data_move_begin(struct ipsec_pcb *pcb);
250static void ipsec_data_move_end(struct ipsec_pcb *pcb);
251static void ipsec_wait_data_move_drain(struct ipsec_pcb *pcb);
252
253/* Data path states */
254#define IPSEC_PCB_DATA_PATH_READY 0x1
255
256/* Macros to set/clear/test data path states */
257#define IPSEC_SET_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state |= IPSEC_PCB_DATA_PATH_READY)
258#define IPSEC_CLR_DATA_PATH_READY(_pcb) ((_pcb)->ipsec_pcb_data_path_state &= ~IPSEC_PCB_DATA_PATH_READY)
259#define IPSEC_IS_DATA_PATH_READY(_pcb) (((_pcb)->ipsec_pcb_data_path_state & IPSEC_PCB_DATA_PATH_READY) != 0)
260
261#if IPSEC_NEXUS
262/* Macros to clear/set/test flags. */
263static inline void
264ipsec_flag_set(struct ipsec_pcb *pcb, uint32_t flag)
265{
266 pcb->ipsec_internal_flags |= flag;
267}
268static inline void
269ipsec_flag_clr(struct ipsec_pcb *pcb, uint32_t flag)
270{
271 pcb->ipsec_internal_flags &= ~flag;
272}
273
274static inline bool
275ipsec_flag_isset(struct ipsec_pcb *pcb, uint32_t flag)
276{
277 return !!(pcb->ipsec_internal_flags & flag);
278}
279#endif // IPSEC_NEXUS
280
281TAILQ_HEAD(ipsec_list, ipsec_pcb) ipsec_head;
282
283static KALLOC_TYPE_DEFINE(ipsec_pcb_zone, struct ipsec_pcb, NET_KT_DEFAULT);
284
285#define IPSECQ_MAXLEN 256
286
287#if IPSEC_NEXUS
288static int
289sysctl_if_ipsec_ring_size SYSCTL_HANDLER_ARGS
290{
291#pragma unused(arg1, arg2)
292 int value = if_ipsec_ring_size;
293
294 int error = sysctl_handle_int(oidp, arg1: &value, arg2: 0, req);
295 if (error || !req->newptr) {
296 return error;
297 }
298
299 if (value < IPSEC_IF_MIN_RING_SIZE ||
300 value > IPSEC_IF_MAX_RING_SIZE) {
301 return EINVAL;
302 }
303
304 if_ipsec_ring_size = value;
305
306 return 0;
307}
308
309static int
310sysctl_if_ipsec_tx_fsw_ring_size SYSCTL_HANDLER_ARGS
311{
312#pragma unused(arg1, arg2)
313 int value = if_ipsec_tx_fsw_ring_size;
314
315 int error = sysctl_handle_int(oidp, arg1: &value, arg2: 0, req);
316 if (error || !req->newptr) {
317 return error;
318 }
319
320 if (value < IPSEC_IF_MIN_RING_SIZE ||
321 value > IPSEC_IF_MAX_RING_SIZE) {
322 return EINVAL;
323 }
324
325 if_ipsec_tx_fsw_ring_size = value;
326
327 return 0;
328}
329
330static int
331sysctl_if_ipsec_rx_fsw_ring_size SYSCTL_HANDLER_ARGS
332{
333#pragma unused(arg1, arg2)
334 int value = if_ipsec_rx_fsw_ring_size;
335
336 int error = sysctl_handle_int(oidp, arg1: &value, arg2: 0, req);
337 if (error || !req->newptr) {
338 return error;
339 }
340
341 if (value < IPSEC_IF_MIN_RING_SIZE ||
342 value > IPSEC_IF_MAX_RING_SIZE) {
343 return EINVAL;
344 }
345
346 if_ipsec_rx_fsw_ring_size = value;
347
348 return 0;
349}
350
351
352static inline bool
353ipsec_in_wmm_mode(struct ipsec_pcb *pcb)
354{
355 return pcb->ipsec_kpipe_count == IPSEC_IF_WMM_RING_COUNT;
356}
357
358#endif // IPSEC_NEXUS
359
360errno_t
361ipsec_register_control(void)
362{
363 struct kern_ctl_reg kern_ctl;
364 errno_t result = 0;
365
366#if (DEVELOPMENT || DEBUG)
367 (void)PE_parse_boot_argn("ipsec_kpipe_mbuf", &ipsec_kpipe_mbuf,
368 sizeof(ipsec_kpipe_mbuf));
369#endif /* DEVELOPMENT || DEBUG */
370
371#if IPSEC_NEXUS
372 ipsec_register_nexus();
373#endif // IPSEC_NEXUS
374
375 TAILQ_INIT(&ipsec_head);
376
377 bzero(s: &kern_ctl, n: sizeof(kern_ctl));
378 strlcpy(dst: kern_ctl.ctl_name, IPSEC_CONTROL_NAME, n: sizeof(kern_ctl.ctl_name));
379 kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
380 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_SETUP; /* Require root */
381 kern_ctl.ctl_sendsize = 64 * 1024;
382 kern_ctl.ctl_recvsize = 64 * 1024;
383 kern_ctl.ctl_setup = ipsec_ctl_setup;
384 kern_ctl.ctl_bind = ipsec_ctl_bind;
385 kern_ctl.ctl_connect = ipsec_ctl_connect;
386 kern_ctl.ctl_disconnect = ipsec_ctl_disconnect;
387 kern_ctl.ctl_send = ipsec_ctl_send;
388 kern_ctl.ctl_setopt = ipsec_ctl_setopt;
389 kern_ctl.ctl_getopt = ipsec_ctl_getopt;
390
391 result = ctl_register(userkctl: &kern_ctl, kctlref: &ipsec_kctlref);
392 if (result != 0) {
393 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - ctl_register failed: %d\n", result);
394 return result;
395 }
396
397 /* Register the protocol plumbers */
398 if ((result = proto_register_plumber(PF_INET, if_fam: IFNET_FAMILY_IPSEC,
399 plumb: ipsec_attach_proto, NULL)) != 0) {
400 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET, IFNET_FAMILY_IPSEC) failed: %d\n",
401 result);
402 ctl_deregister(kctlref: ipsec_kctlref);
403 return result;
404 }
405
406 /* Register the protocol plumbers */
407 if ((result = proto_register_plumber(PF_INET6, if_fam: IFNET_FAMILY_IPSEC,
408 plumb: ipsec_attach_proto, NULL)) != 0) {
409 proto_unregister_plumber(PF_INET, if_fam: IFNET_FAMILY_IPSEC);
410 ctl_deregister(kctlref: ipsec_kctlref);
411 os_log_error(OS_LOG_DEFAULT, "ipsec_register_control - proto_register_plumber(PF_INET6, IFNET_FAMILY_IPSEC) failed: %d\n",
412 result);
413 return result;
414 }
415
416 return 0;
417}
418
419/* Helpers */
420int
421ipsec_interface_isvalid(ifnet_t interface)
422{
423 struct ipsec_pcb *pcb = NULL;
424
425 if (interface == NULL) {
426 return 0;
427 }
428
429 pcb = ifnet_softc(interface);
430
431 if (pcb == NULL) {
432 return 0;
433 }
434
435 /* When ctl disconnects, ipsec_unit is set to 0 */
436 if (pcb->ipsec_unit == 0) {
437 return 0;
438 }
439
440 return 1;
441}
442
443#if IPSEC_NEXUS
444boolean_t
445ipsec_interface_needs_netagent(ifnet_t interface)
446{
447 struct ipsec_pcb *pcb = NULL;
448
449 if (interface == NULL) {
450 return FALSE;
451 }
452
453 pcb = ifnet_softc(interface);
454
455 if (pcb == NULL) {
456 return FALSE;
457 }
458
459 return pcb->ipsec_needs_netagent == true;
460}
461#endif // IPSEC_NEXUS
462
463static errno_t
464ipsec_ifnet_set_attrs(ifnet_t ifp)
465{
466 /* Set flags and additional information. */
467 ifnet_set_mtu(interface: ifp, mtu: 1500);
468 ifnet_set_flags(interface: ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, mask: 0xffff);
469
470 /* The interface must generate its own IPv6 LinkLocal address,
471 * if possible following the recommendation of RFC2472 to the 64bit interface ID
472 */
473 ifnet_set_eflags(interface: ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL);
474
475#if !IPSEC_NEXUS
476 /* Reset the stats in case as the interface may have been recycled */
477 struct ifnet_stats_param stats;
478 bzero(&stats, sizeof(struct ifnet_stats_param));
479 ifnet_set_stat(ifp, &stats);
480#endif // !IPSEC_NEXUS
481
482 return 0;
483}
484
485#if IPSEC_NEXUS
486
487static uuid_t ipsec_nx_dom_prov;
488
489static errno_t
490ipsec_nxdp_init(__unused kern_nexus_domain_provider_t domprov)
491{
492 return 0;
493}
494
495static void
496ipsec_nxdp_fini(__unused kern_nexus_domain_provider_t domprov)
497{
498 // Ignore
499}
500
501static errno_t
502ipsec_register_nexus(void)
503{
504 const struct kern_nexus_domain_provider_init dp_init = {
505 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
506 .nxdpi_flags = 0,
507 .nxdpi_init = ipsec_nxdp_init,
508 .nxdpi_fini = ipsec_nxdp_fini
509 };
510 errno_t err = 0;
511
512 /* ipsec_nxdp_init() is called before this function returns */
513 err = kern_nexus_register_domain_provider(type: NEXUS_TYPE_NET_IF,
514 name: (const uint8_t *) "com.apple.ipsec",
515 init: &dp_init, init_len: sizeof(dp_init),
516 dom_prov_uuid: &ipsec_nx_dom_prov);
517 if (err != 0) {
518 os_log_error(OS_LOG_DEFAULT, "%s: failed to register domain provider\n", __func__);
519 return err;
520 }
521 return 0;
522}
523
524static errno_t
525ipsec_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
526{
527 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
528 pcb->ipsec_netif_nexus = nexus;
529 return ipsec_ifnet_set_attrs(ifp);
530}
531
532static errno_t
533ipsec_nexus_pre_connect(kern_nexus_provider_t nxprov,
534 proc_t p, kern_nexus_t nexus,
535 nexus_port_t nexus_port, kern_channel_t channel, void **ch_ctx)
536{
537#pragma unused(nxprov, p)
538#pragma unused(nexus, nexus_port, channel, ch_ctx)
539 return 0;
540}
541
542static errno_t
543ipsec_nexus_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
544 kern_channel_t channel)
545{
546#pragma unused(nxprov, channel)
547 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
548 boolean_t ok = ifnet_is_attached(pcb->ipsec_ifp, refio: 1);
549 /* Mark the data path as ready */
550 if (ok) {
551 lck_mtx_lock(lck: &pcb->ipsec_pcb_data_move_lock);
552 IPSEC_SET_DATA_PATH_READY(pcb);
553 lck_mtx_unlock(lck: &pcb->ipsec_pcb_data_move_lock);
554 }
555 return ok ? 0 : ENXIO;
556}
557
558static void
559ipsec_nexus_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
560 kern_channel_t channel)
561{
562#pragma unused(nxprov, channel)
563 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
564
565 VERIFY(pcb->ipsec_kpipe_count != 0);
566
567 /* Wait until all threads in the data paths are done. */
568 ipsec_wait_data_move_drain(pcb);
569}
570
571static void
572ipsec_netif_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
573 kern_channel_t channel)
574{
575#pragma unused(nxprov, channel)
576 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
577
578 /* Wait until all threads in the data paths are done. */
579 ipsec_wait_data_move_drain(pcb);
580}
581
582static void
583ipsec_nexus_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
584 kern_channel_t channel)
585{
586#pragma unused(nxprov, channel)
587 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
588 if (pcb->ipsec_netif_nexus == nexus) {
589 pcb->ipsec_netif_nexus = NULL;
590 }
591 ifnet_decr_iorefcnt(pcb->ipsec_ifp);
592}
593
594static errno_t
595ipsec_kpipe_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
596 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
597 void **ring_ctx)
598{
599#pragma unused(nxprov)
600#pragma unused(channel)
601 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
602 uint8_t ring_idx;
603
604 for (ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
605 if (!uuid_compare(uu1: channel->ch_info->cinfo_nx_uuid, uu2: pcb->ipsec_kpipe_uuid[ring_idx])) {
606 break;
607 }
608 }
609
610 if (ring_idx == pcb->ipsec_kpipe_count) {
611 uuid_string_t uuidstr;
612 uuid_unparse(uu: channel->ch_info->cinfo_nx_uuid, out: uuidstr);
613 os_log_error(OS_LOG_DEFAULT, "%s: %s cannot find channel %s\n", __func__, pcb->ipsec_if_xname, uuidstr);
614 return ENOENT;
615 }
616
617 *ring_ctx = (void *)(uintptr_t)ring_idx;
618
619 if (!is_tx_ring) {
620 VERIFY(pcb->ipsec_kpipe_rxring[ring_idx] == NULL);
621 pcb->ipsec_kpipe_rxring[ring_idx] = ring;
622 } else {
623 VERIFY(pcb->ipsec_kpipe_txring[ring_idx] == NULL);
624 pcb->ipsec_kpipe_txring[ring_idx] = ring;
625 }
626 return 0;
627}
628
629static void
630ipsec_kpipe_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
631 kern_channel_ring_t ring)
632{
633#pragma unused(nxprov)
634 bool found = false;
635 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
636
637 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
638 if (pcb->ipsec_kpipe_rxring[i] == ring) {
639 pcb->ipsec_kpipe_rxring[i] = NULL;
640 found = true;
641 } else if (pcb->ipsec_kpipe_txring[i] == ring) {
642 pcb->ipsec_kpipe_txring[i] = NULL;
643 found = true;
644 }
645 }
646 VERIFY(found);
647}
648
649static errno_t
650ipsec_kpipe_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
651 kern_channel_ring_t tx_ring, uint32_t flags)
652{
653#pragma unused(nxprov)
654#pragma unused(flags)
655 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
656
657 if (!ipsec_data_move_begin(pcb)) {
658 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
659 return 0;
660 }
661
662 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
663
664 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
665 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
666 ipsec_data_move_end(pcb);
667 return 0;
668 }
669
670 VERIFY(pcb->ipsec_kpipe_count);
671
672 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL);
673 if (tx_slot == NULL) {
674 // Nothing to write, bail
675 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
676 ipsec_data_move_end(pcb);
677 return 0;
678 }
679
680 // Signal the netif ring to read
681 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
682 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
683
684 if (rx_ring != NULL) {
685 kern_channel_notify(rx_ring, flags: 0);
686 }
687
688 ipsec_data_move_end(pcb);
689 return 0;
690}
691
692static mbuf_t
693ipsec_encrypt_mbuf(ifnet_t interface,
694 mbuf_t data)
695{
696 struct ipsec_output_state ipsec_state;
697 int error = 0;
698 uint32_t af;
699
700 // Make sure this packet isn't looping through the interface
701 if (necp_get_last_interface_index_from_packet(packet: data) == interface->if_index) {
702 error = -1;
703 goto ipsec_output_err;
704 }
705
706 // Mark the interface so NECP can evaluate tunnel policy
707 necp_mark_packet_from_interface(packet: data, interface);
708
709 struct ip *ip = mtod(data, struct ip *);
710 u_int ip_version = ip->ip_v;
711
712 switch (ip_version) {
713 case 4: {
714 af = AF_INET;
715
716 memset(s: &ipsec_state, c: 0, n: sizeof(ipsec_state));
717 ipsec_state.m = data;
718 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
719 memset(s: &ipsec_state.ro, c: 0, n: sizeof(ipsec_state.ro));
720
721 error = ipsec4_interface_output(state: &ipsec_state, interface);
722 if (error == 0 && ipsec_state.tunneled == 6) {
723 // Tunneled in IPv6 - packet is gone
724 // TODO: Don't lose mbuf
725 data = NULL;
726 goto done;
727 }
728
729 data = ipsec_state.m;
730 if (error || data == NULL) {
731 if (error) {
732 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec4_output error %d\n", error);
733 }
734 goto ipsec_output_err;
735 }
736 goto done;
737 }
738 case 6: {
739 af = AF_INET6;
740
741 data = ipsec6_splithdr(data);
742 if (data == NULL) {
743 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_splithdr returned NULL\n");
744 goto ipsec_output_err;
745 }
746
747 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
748
749 memset(s: &ipsec_state, c: 0, n: sizeof(ipsec_state));
750 ipsec_state.m = data;
751 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
752 memset(s: &ipsec_state.ro, c: 0, n: sizeof(ipsec_state.ro));
753
754 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
755 if (error == 0 && ipsec_state.tunneled == 4) {
756 // Tunneled in IPv4 - packet is gone
757 // TODO: Don't lose mbuf
758 data = NULL;
759 goto done;
760 }
761 data = ipsec_state.m;
762 if (error || data == NULL) {
763 if (error) {
764 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: ipsec6_output error %d\n", error);
765 }
766 goto ipsec_output_err;
767 }
768 goto done;
769 }
770 default: {
771 os_log_error(OS_LOG_DEFAULT, "ipsec_encrypt_mbuf: Received unknown packet version %d\n", ip_version);
772 error = -1;
773 goto ipsec_output_err;
774 }
775 }
776
777done:
778 return data;
779
780ipsec_output_err:
781 if (data) {
782 mbuf_freem(mbuf: data);
783 }
784 return NULL;
785}
786
787static errno_t
788ipsec_kpipe_sync_rx_mbuf(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
789 kern_channel_ring_t rx_ring, uint32_t flags)
790{
791#pragma unused(nxprov)
792#pragma unused(flags)
793 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
794 struct kern_channel_ring_stat_increment rx_ring_stats;
795 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(ring: rx_ring);
796
797 if (!ipsec_data_move_begin(pcb)) {
798 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
799 return 0;
800 }
801
802 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
803
804 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
805 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
806 ipsec_data_move_end(pcb);
807 return 0;
808 }
809
810 VERIFY(pcb->ipsec_kpipe_count);
811 VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
812
813 // Reclaim user-released slots
814 (void) kern_channel_reclaim(rx_ring);
815
816 uint32_t avail = kern_channel_available_slot_count(ring: rx_ring);
817 if (avail == 0) {
818 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
819 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
820 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
821 ipsec_data_move_end(pcb);
822 return 0;
823 }
824
825 kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx];
826 if (tx_ring == NULL) {
827 // Net-If TX ring not set up yet, nothing to read
828 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
829 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
830 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
831 ipsec_data_move_end(pcb);
832 return 0;
833 }
834
835 struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
836
837 // Unlock ipsec before entering ring
838 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
839
840 (void)kr_enter(tx_ring, TRUE);
841
842 // Lock again after entering and validate
843 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
844 if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
845 // Ring no longer valid
846 // Unlock first, then exit ring
847 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
848 kr_exit(tx_ring);
849 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
850 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
851 ipsec_data_move_end(pcb);
852 return 0;
853 }
854
855 struct kern_channel_ring_stat_increment tx_ring_stats;
856 bzero(s: &tx_ring_stats, n: sizeof(tx_ring_stats));
857 kern_channel_slot_t tx_pslot = NULL;
858 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL);
859 if (tx_slot == NULL) {
860 // Nothing to read, don't bother signalling
861 // Unlock first, then exit ring
862 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
863 kr_exit(tx_ring);
864 ipsec_data_move_end(pcb);
865 return 0;
866 }
867
868 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
869 VERIFY(rx_pp != NULL);
870 struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
871 VERIFY(tx_pp != NULL);
872 bzero(s: &rx_ring_stats, n: sizeof(rx_ring_stats));
873 kern_channel_slot_t rx_pslot = NULL;
874 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(kring: rx_ring, NULL, NULL);
875 kern_packet_t tx_chain_ph = 0;
876
877 while (rx_slot != NULL && tx_slot != NULL) {
878 size_t length = 0;
879 mbuf_t data = NULL;
880 errno_t error = 0;
881
882 // Allocate rx packet
883 kern_packet_t rx_ph = 0;
884 error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph);
885 if (__improbable(error != 0)) {
886 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: failed to allocate packet\n",
887 pcb->ipsec_ifp->if_xname);
888 break;
889 }
890
891 kern_packet_t tx_ph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot);
892
893 if (tx_ph == 0) {
894 // Advance TX ring
895 tx_pslot = tx_slot;
896 tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL);
897 kern_pbufpool_free(pbufpool: rx_pp, rx_ph);
898 continue;
899 }
900 (void) kern_channel_slot_detach_packet(ring: tx_ring, slot: tx_slot, packet: tx_ph);
901 if (tx_chain_ph != 0) {
902 kern_packet_append(tx_ph, tx_chain_ph);
903 }
904 tx_chain_ph = tx_ph;
905
906 // Advance TX ring
907 tx_pslot = tx_slot;
908 tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL);
909
910 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
911 VERIFY(tx_buf != NULL);
912 uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
913 VERIFY(tx_baddr != NULL);
914 tx_baddr += kern_buflet_get_data_offset(tx_buf);
915
916 bpf_tap_packet_out(interface: pcb->ipsec_ifp, DLT_RAW, packet: tx_ph, NULL, header_len: 0);
917
918 length = MIN(kern_packet_get_data_length(tx_ph),
919 pcb->ipsec_slot_size);
920
921 // Increment TX stats
922 tx_ring_stats.kcrsi_slots_transferred++;
923 tx_ring_stats.kcrsi_bytes_transferred += length;
924
925 if (length > 0) {
926 error = mbuf_gethdr(how: MBUF_DONTWAIT, type: MBUF_TYPE_HEADER, mbuf: &data);
927 if (error == 0) {
928 error = mbuf_copyback(mbuf: data, offset: 0, length, data: tx_baddr, how: MBUF_DONTWAIT);
929 if (error == 0) {
930 // Encrypt and send packet
931 lck_mtx_lock(lck: &pcb->ipsec_kpipe_encrypt_lock);
932 data = ipsec_encrypt_mbuf(interface: pcb->ipsec_ifp, data);
933 lck_mtx_unlock(lck: &pcb->ipsec_kpipe_encrypt_lock);
934 } else {
935 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
936 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
937 STATS_INC(nifs, NETIF_STATS_DROP);
938 mbuf_freem(mbuf: data);
939 data = NULL;
940 }
941 } else {
942 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
943 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
944 STATS_INC(nifs, NETIF_STATS_DROP);
945 }
946 } else {
947 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
948 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
949 STATS_INC(nifs, NETIF_STATS_DROP);
950 }
951
952 if (data == NULL) {
953 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
954 kern_pbufpool_free(pbufpool: rx_pp, rx_ph);
955 break;
956 }
957
958 length = mbuf_pkthdr_len(mbuf: data);
959 if (length > PP_BUF_SIZE_DEF(rx_pp)) {
960 // Flush data
961 mbuf_freem(mbuf: data);
962 kern_pbufpool_free(pbufpool: rx_pp, rx_ph);
963 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: encrypted packet length %zu > %u\n",
964 pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
965 continue;
966 }
967
968 // Fillout rx packet
969 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
970 VERIFY(rx_buf != NULL);
971 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
972 VERIFY(rx_baddr != NULL);
973
974 // Copy-in data from mbuf to buflet
975 mbuf_copydata(mbuf: data, offset: 0, length, out_data: (void *)rx_baddr);
976 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
977
978 // Finalize and attach the packet
979 error = kern_buflet_set_data_offset(rx_buf, 0);
980 VERIFY(error == 0);
981 error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
982 VERIFY(error == 0);
983 error = kern_packet_finalize(rx_ph);
984 VERIFY(error == 0);
985 error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph);
986 VERIFY(error == 0);
987
988 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
989 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
990
991 rx_ring_stats.kcrsi_slots_transferred++;
992 rx_ring_stats.kcrsi_bytes_transferred += length;
993
994 if (!pcb->ipsec_ext_ifdata_stats) {
995 ifnet_stat_increment_out(interface: pcb->ipsec_ifp, packets_out: 1, bytes_out: (uint16_t)length, errors_out: 0);
996 }
997
998 mbuf_freem(mbuf: data);
999
1000 rx_pslot = rx_slot;
1001 rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL);
1002 }
1003
1004 if (rx_pslot) {
1005 kern_channel_advance_slot(kring: rx_ring, slot: rx_pslot);
1006 kern_channel_increment_ring_net_stats(ring: rx_ring, pcb->ipsec_ifp, stats: &rx_ring_stats);
1007 }
1008
1009 if (tx_chain_ph != 0) {
1010 kern_pbufpool_free_chain(pbufpool: tx_pp, chain: tx_chain_ph);
1011 }
1012
1013 if (tx_pslot) {
1014 kern_channel_advance_slot(kring: tx_ring, slot: tx_pslot);
1015 kern_channel_increment_ring_net_stats(ring: tx_ring, pcb->ipsec_ifp, stats: &tx_ring_stats);
1016 (void)kern_channel_reclaim(tx_ring);
1017 }
1018
1019 /* always reenable output */
1020 errno_t error = ifnet_enable_output(interface: pcb->ipsec_ifp);
1021 if (error != 0) {
1022 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
1023 }
1024
1025 // Unlock first, then exit ring
1026 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1027
1028 if (tx_pslot != NULL) {
1029 kern_channel_notify(tx_ring, flags: 0);
1030 }
1031 kr_exit(tx_ring);
1032
1033 ipsec_data_move_end(pcb);
1034 return 0;
1035}
1036
1037static errno_t
1038ipsec_encrypt_kpipe_pkt(ifnet_t interface, kern_packet_t sph,
1039 kern_packet_t dph)
1040{
1041 uint8_t *sbaddr = NULL;
1042 int err = 0;
1043 uint32_t slen = 0;
1044
1045 VERIFY(interface != NULL);
1046 VERIFY(sph != 0);
1047 VERIFY(dph != 0);
1048
1049 kern_buflet_t sbuf = __packet_get_next_buflet(ph: sph, NULL);
1050 VERIFY(sbuf != NULL);
1051 slen = __buflet_get_data_length(buf: sbuf);
1052
1053 if (__improbable(slen < sizeof(struct ip))) {
1054 os_log_error(OS_LOG_DEFAULT, "ipsec encrypt kpipe pkt: source "
1055 "buffer shorter than ip header, %u\n", slen);
1056 return EINVAL;
1057 }
1058
1059 MD_BUFLET_ADDR(SK_PTR_ADDR_KPKT(sph), sbaddr);
1060 struct ip *ip = (struct ip *)(void *)sbaddr;
1061 ASSERT(IP_HDR_ALIGNED_P(ip));
1062
1063 u_int ip_vers = ip->ip_v;
1064 switch (ip_vers) {
1065 case IPVERSION: {
1066 err = ipsec4_interface_kpipe_output(interface, sph, dph);
1067 if (__improbable(err != 0)) {
1068 os_log_error(OS_LOG_DEFAULT, "ipsec4 interface kpipe "
1069 "output error %d\n", err);
1070 return err;
1071 }
1072 break;
1073 }
1074 case 6: {
1075 err = ipsec6_interface_kpipe_output(interface, sph, dph);
1076 if (__improbable(err != 0)) {
1077 os_log_error(OS_LOG_DEFAULT, "ipsec6 interface kpipe "
1078 "output error %d\n", err);
1079 return err;
1080 }
1081 break;
1082 }
1083 default: {
1084 os_log_error(OS_LOG_DEFAULT, "received unknown packet version: %d\n",
1085 ip_vers);
1086 return EINVAL;
1087 }
1088 }
1089
1090 return err;
1091}
1092
1093static errno_t
1094ipsec_kpipe_sync_rx_packet(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1095 kern_channel_ring_t rx_ring, uint32_t flags)
1096{
1097#pragma unused(nxprov)
1098#pragma unused(flags)
1099 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1100 struct kern_channel_ring_stat_increment rx_ring_stats;
1101 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(ring: rx_ring);
1102
1103 if (!ipsec_data_move_begin(pcb)) {
1104 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1105 return 0;
1106 }
1107
1108 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
1109
1110 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1111 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1112 ipsec_data_move_end(pcb);
1113 return 0;
1114 }
1115
1116 VERIFY(pcb->ipsec_kpipe_count);
1117 VERIFY(ring_idx <= pcb->ipsec_kpipe_count);
1118
1119 // Reclaim user-released slots
1120 (void) kern_channel_reclaim(rx_ring);
1121
1122 uint32_t avail = kern_channel_available_slot_count(ring: rx_ring);
1123 if (avail == 0) {
1124 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1125 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d no room in rx_ring\n", __func__,
1126 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
1127 ipsec_data_move_end(pcb);
1128 return 0;
1129 }
1130
1131 kern_channel_ring_t tx_ring = pcb->ipsec_netif_txring[ring_idx];
1132 if (tx_ring == NULL) {
1133 // Net-If TX ring not set up yet, nothing to read
1134 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1135 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 1\n", __func__,
1136 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
1137 ipsec_data_move_end(pcb);
1138 return 0;
1139 }
1140
1141 struct netif_stats *nifs = &NX_NETIF_PRIVATE(pcb->ipsec_netif_nexus)->nif_stats;
1142
1143 // Unlock ipsec before entering ring
1144 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1145
1146 (void)kr_enter(tx_ring, TRUE);
1147
1148 // Lock again after entering and validate
1149 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
1150 if (tx_ring != pcb->ipsec_netif_txring[ring_idx]) {
1151 // Ring no longer valid
1152 // Unlock first, then exit ring
1153 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1154 kr_exit(tx_ring);
1155 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 2\n", __func__,
1156 pcb->ipsec_if_xname, rx_ring->ckr_name, ring_idx);
1157 ipsec_data_move_end(pcb);
1158 return 0;
1159 }
1160
1161 struct kern_channel_ring_stat_increment tx_ring_stats;
1162 bzero(s: &tx_ring_stats, n: sizeof(tx_ring_stats));
1163 kern_channel_slot_t tx_pslot = NULL;
1164 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL);
1165 if (tx_slot == NULL) {
1166 // Nothing to read, don't bother signalling
1167 // Unlock first, then exit ring
1168 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1169 kr_exit(tx_ring);
1170 ipsec_data_move_end(pcb);
1171 return 0;
1172 }
1173
1174 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1175 VERIFY(rx_pp != NULL);
1176 struct kern_pbufpool *tx_pp = tx_ring->ckr_pp;
1177 VERIFY(tx_pp != NULL);
1178 bzero(s: &rx_ring_stats, n: sizeof(rx_ring_stats));
1179 kern_channel_slot_t rx_pslot = NULL;
1180 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(kring: rx_ring, NULL, NULL);
1181 kern_packet_t tx_chain_ph = 0;
1182
1183 while (rx_slot != NULL && tx_slot != NULL) {
1184 size_t tx_pkt_length = 0;
1185 errno_t error = 0;
1186
1187 // Allocate rx packet
1188 kern_packet_t rx_ph = 0;
1189 error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph);
1190 if (__improbable(error != 0)) {
1191 os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
1192 "failed to allocate packet\n", pcb->ipsec_ifp->if_xname);
1193 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1194 STATS_INC(nifs, NETIF_STATS_DROP);
1195 break;
1196 }
1197
1198 kern_packet_t tx_ph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot);
1199 if (__improbable(tx_ph == 0)) {
1200 // Advance TX ring
1201 tx_pslot = tx_slot;
1202 tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL);
1203 kern_pbufpool_free(pbufpool: rx_pp, rx_ph);
1204 continue;
1205 }
1206
1207 (void) kern_channel_slot_detach_packet(ring: tx_ring, slot: tx_slot, packet: tx_ph);
1208 if (tx_chain_ph != 0) {
1209 kern_packet_append(tx_ph, tx_chain_ph);
1210 }
1211 tx_chain_ph = tx_ph;
1212
1213 // Advance TX ring
1214 tx_pslot = tx_slot;
1215 tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL);
1216
1217 bpf_tap_packet_out(interface: pcb->ipsec_ifp, DLT_RAW, packet: tx_ph, NULL, header_len: 0);
1218
1219 tx_pkt_length = kern_packet_get_data_length(tx_ph);
1220 if (tx_pkt_length == 0 || tx_pkt_length > pcb->ipsec_slot_size) {
1221 os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
1222 "packet length %zu", pcb->ipsec_ifp->if_xname,
1223 tx_pkt_length);
1224 kern_pbufpool_free(pbufpool: rx_pp, rx_ph);
1225 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1226 STATS_INC(nifs, NETIF_STATS_DROP);
1227 continue;
1228 }
1229
1230 // Increment TX stats
1231 tx_ring_stats.kcrsi_slots_transferred++;
1232 tx_ring_stats.kcrsi_bytes_transferred += tx_pkt_length;
1233
1234 // Encrypt packet
1235 lck_mtx_lock(lck: &pcb->ipsec_kpipe_encrypt_lock);
1236 error = ipsec_encrypt_kpipe_pkt(interface: pcb->ipsec_ifp, sph: tx_ph, dph: rx_ph);
1237 lck_mtx_unlock(lck: &pcb->ipsec_kpipe_encrypt_lock);
1238 if (__improbable(error != 0)) {
1239 os_log_info(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx %s: "
1240 "failed to encrypt packet", pcb->ipsec_ifp->if_xname);
1241 kern_pbufpool_free(pbufpool: rx_pp, rx_ph);
1242 STATS_INC(nifs, NETIF_STATS_DROP);
1243 continue;
1244 }
1245
1246 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1247 // Finalize and attach the packet
1248 kern_buflet_t rx_buf = __packet_get_next_buflet(ph: rx_ph, NULL);
1249 error = kern_buflet_set_data_offset(rx_buf, 0);
1250 VERIFY(error == 0);
1251 error = kern_packet_finalize(rx_ph);
1252 VERIFY(error == 0);
1253 error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph);
1254 VERIFY(error == 0);
1255
1256 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1257 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
1258
1259 rx_ring_stats.kcrsi_slots_transferred++;
1260 rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph);
1261
1262 if (!pcb->ipsec_ext_ifdata_stats) {
1263 ifnet_stat_increment_out(interface: pcb->ipsec_ifp, packets_out: 1,
1264 bytes_out: kern_packet_get_data_length(rx_ph), errors_out: 0);
1265 }
1266
1267 rx_pslot = rx_slot;
1268 rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL);
1269 }
1270
1271 if (rx_pslot) {
1272 kern_channel_advance_slot(kring: rx_ring, slot: rx_pslot);
1273 kern_channel_increment_ring_net_stats(ring: rx_ring, pcb->ipsec_ifp, stats: &rx_ring_stats);
1274 }
1275
1276 if (tx_chain_ph != 0) {
1277 kern_pbufpool_free_chain(pbufpool: tx_pp, chain: tx_chain_ph);
1278 }
1279
1280 if (tx_pslot) {
1281 kern_channel_advance_slot(kring: tx_ring, slot: tx_pslot);
1282 kern_channel_increment_ring_net_stats(ring: tx_ring, pcb->ipsec_ifp, stats: &tx_ring_stats);
1283 (void)kern_channel_reclaim(tx_ring);
1284 }
1285
1286 /* always reenable output */
1287 errno_t error = ifnet_enable_output(interface: pcb->ipsec_ifp);
1288 if (error != 0) {
1289 os_log_error(OS_LOG_DEFAULT, "ipsec_kpipe_sync_rx: ifnet_enable_output returned error %d\n", error);
1290 }
1291
1292 // Unlock first, then exit ring
1293 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1294
1295 if (tx_pslot != NULL) {
1296 kern_channel_notify(tx_ring, flags: 0);
1297 }
1298 kr_exit(tx_ring);
1299
1300 ipsec_data_move_end(pcb);
1301 return 0;
1302}
1303
1304static errno_t
1305ipsec_kpipe_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1306 kern_channel_ring_t rx_ring, uint32_t flags)
1307{
1308 if (__improbable(ipsec_kpipe_mbuf == 1)) {
1309 return ipsec_kpipe_sync_rx_mbuf(nxprov, nexus, rx_ring, flags);
1310 } else {
1311 return ipsec_kpipe_sync_rx_packet(nxprov, nexus, rx_ring, flags);
1312 }
1313}
1314
1315static uint8_t
1316ipsec_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1317{
1318 switch (svc_class) {
1319 case KPKT_SC_VO: {
1320 return 0;
1321 }
1322 case KPKT_SC_VI: {
1323 return 1;
1324 }
1325 case KPKT_SC_BE: {
1326 return 2;
1327 }
1328 case KPKT_SC_BK: {
1329 return 3;
1330 }
1331 default: {
1332 VERIFY(0);
1333 return 0;
1334 }
1335 }
1336}
1337
1338static errno_t
1339ipsec_netif_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1340 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1341 void **ring_ctx)
1342{
1343#pragma unused(nxprov)
1344#pragma unused(channel)
1345 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1346
1347 if (!is_tx_ring) {
1348 VERIFY(pcb->ipsec_netif_rxring[0] == NULL);
1349 pcb->ipsec_netif_rxring[0] = ring;
1350 } else {
1351 uint8_t ring_idx = 0;
1352 if (ipsec_in_wmm_mode(pcb)) {
1353 int err;
1354 kern_packet_svc_class_t svc_class;
1355 err = kern_channel_get_service_class(ring, svc: &svc_class);
1356 VERIFY(err == 0);
1357 ring_idx = ipsec_find_tx_ring_by_svc(svc_class);
1358 VERIFY(ring_idx < IPSEC_IF_WMM_RING_COUNT);
1359 }
1360
1361 *ring_ctx = (void *)(uintptr_t)ring_idx;
1362
1363 VERIFY(pcb->ipsec_netif_txring[ring_idx] == NULL);
1364 pcb->ipsec_netif_txring[ring_idx] = ring;
1365 }
1366 return 0;
1367}
1368
1369static void
1370ipsec_netif_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1371 kern_channel_ring_t ring)
1372{
1373#pragma unused(nxprov)
1374 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1375 bool found = false;
1376
1377 for (int i = 0; i < IPSEC_NETIF_MAX_RX_RING_COUNT; i++) {
1378 if (pcb->ipsec_netif_rxring[i] == ring) {
1379 pcb->ipsec_netif_rxring[i] = NULL;
1380 VERIFY(!found);
1381 found = true;
1382 }
1383 }
1384 for (int i = 0; i < IPSEC_NETIF_MAX_TX_RING_COUNT; i++) {
1385 if (pcb->ipsec_netif_txring[i] == ring) {
1386 pcb->ipsec_netif_txring[i] = NULL;
1387 VERIFY(!found);
1388 found = true;
1389 }
1390 }
1391 VERIFY(found);
1392}
1393
1394static bool
1395ipsec_netif_check_policy(ifnet_t interface, mbuf_t data)
1396{
1397 necp_kernel_policy_result necp_result = 0;
1398 necp_kernel_policy_result_parameter necp_result_parameter = {};
1399 uint32_t necp_matched_policy_id = 0;
1400 struct ip_out_args args4 = { };
1401 struct ip6_out_args args6 = { };
1402
1403 // This packet has been marked with IP level policy, do not mark again.
1404 if (data && data->m_pkthdr.necp_mtag.necp_policy_id >= NECP_KERNEL_POLICY_ID_FIRST_VALID_IP) {
1405 return true;
1406 }
1407
1408 size_t length = mbuf_pkthdr_len(mbuf: data);
1409 if (length < sizeof(struct ip)) {
1410 return false;
1411 }
1412
1413 struct ip *ip = mtod(data, struct ip *);
1414 u_int ip_version = ip->ip_v;
1415 switch (ip_version) {
1416 case 4: {
1417 if (interface != NULL) {
1418 args4.ipoa_flags |= IPOAF_BOUND_IF;
1419 args4.ipoa_boundif = interface->if_index;
1420 }
1421 necp_matched_policy_id = necp_ip_output_find_policy_match(packet: data, IP_OUTARGS, ipoa: &args4, NULL,
1422 result: &necp_result, result_parameter: &necp_result_parameter);
1423 break;
1424 }
1425 case 6: {
1426 if (interface != NULL) {
1427 args6.ip6oa_flags |= IP6OAF_BOUND_IF;
1428 args6.ip6oa_boundif = interface->if_index;
1429 }
1430 necp_matched_policy_id = necp_ip6_output_find_policy_match(packet: data, IPV6_OUTARGS, ip6oa: &args6, NULL,
1431 result: &necp_result, result_parameter: &necp_result_parameter);
1432 break;
1433 }
1434 default: {
1435 return false;
1436 }
1437 }
1438
1439 if (necp_result == NECP_KERNEL_POLICY_RESULT_DROP ||
1440 necp_result == NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT) {
1441 /* Drop and flow divert packets should be blocked at the IP layer */
1442 return false;
1443 }
1444
1445 necp_mark_packet_from_ip(packet: data, policy_id: necp_matched_policy_id);
1446 return true;
1447}
1448
1449static errno_t
1450ipsec_netif_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1451 kern_channel_ring_t tx_ring, uint32_t flags)
1452{
1453#pragma unused(nxprov)
1454#pragma unused(flags)
1455 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1456
1457 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1458
1459 if (!ipsec_data_move_begin(pcb)) {
1460 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1461 return 0;
1462 }
1463
1464 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
1465
1466 struct kern_channel_ring_stat_increment tx_ring_stats;
1467 bzero(s: &tx_ring_stats, n: sizeof(tx_ring_stats));
1468 kern_channel_slot_t tx_pslot = NULL;
1469 kern_channel_slot_t tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL);
1470 kern_packet_t tx_chain_ph = 0;
1471
1472 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
1473
1474 if (tx_slot == NULL) {
1475 // Nothing to write, don't bother signalling
1476 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1477 ipsec_data_move_end(pcb);
1478 return 0;
1479 }
1480
1481 if (pcb->ipsec_kpipe_count &&
1482 ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
1483 // Select the corresponding kpipe rx ring
1484 uint8_t ring_idx = (uint8_t)(uintptr_t)kern_channel_ring_get_context(ring: tx_ring);
1485 VERIFY(ring_idx < IPSEC_IF_MAX_RING_COUNT);
1486 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1487
1488 // Unlock while calling notify
1489 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1490
1491 // Signal the kernel pipe ring to read
1492 if (rx_ring != NULL) {
1493 kern_channel_notify(rx_ring, flags: 0);
1494 }
1495
1496 ipsec_data_move_end(pcb);
1497 return 0;
1498 }
1499
1500 // If we're here, we're injecting into the BSD stack
1501 while (tx_slot != NULL) {
1502 size_t length = 0;
1503 mbuf_t data = NULL;
1504
1505 kern_packet_t tx_ph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot);
1506
1507 if (tx_ph == 0) {
1508 // Advance TX ring
1509 tx_pslot = tx_slot;
1510 tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL);
1511 continue;
1512 }
1513 (void) kern_channel_slot_detach_packet(ring: tx_ring, slot: tx_slot, packet: tx_ph);
1514 if (tx_chain_ph != 0) {
1515 kern_packet_append(tx_ph, tx_chain_ph);
1516 }
1517 tx_chain_ph = tx_ph;
1518
1519 // Advance TX ring
1520 tx_pslot = tx_slot;
1521 tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL);
1522
1523 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1524 VERIFY(tx_buf != NULL);
1525 uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1526 VERIFY(tx_baddr != 0);
1527 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1528
1529 bpf_tap_packet_out(interface: pcb->ipsec_ifp, DLT_RAW, packet: tx_ph, NULL, header_len: 0);
1530
1531 length = MIN(kern_packet_get_data_length(tx_ph),
1532 pcb->ipsec_slot_size);
1533
1534 if (length > 0) {
1535 errno_t error = mbuf_gethdr(how: MBUF_DONTWAIT, type: MBUF_TYPE_HEADER, mbuf: &data);
1536 if (error == 0) {
1537 error = mbuf_copyback(mbuf: data, offset: 0, length, data: tx_baddr, how: MBUF_DONTWAIT);
1538 if (error == 0) {
1539 // Mark packet from policy
1540 uint32_t policy_id = kern_packet_get_policy_id(tx_ph);
1541 uint32_t skip_policy_id = kern_packet_get_skip_policy_id(tx_ph);
1542 necp_mark_packet_from_ip_with_skip(packet: data, policy_id, skip_policy_id);
1543
1544 // Check policy with NECP
1545 if (!ipsec_netif_check_policy(interface: pcb->ipsec_ifp, data)) {
1546 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - failed policy check\n", pcb->ipsec_ifp->if_xname);
1547 STATS_INC(nifs, NETIF_STATS_DROP);
1548 mbuf_freem(mbuf: data);
1549 data = NULL;
1550 } else {
1551 // Send through encryption
1552 error = ipsec_output(interface: pcb->ipsec_ifp, data);
1553 if (error != 0) {
1554 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - ipsec_output error %d\n", pcb->ipsec_ifp->if_xname, error);
1555 }
1556 }
1557 } else {
1558 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
1559 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1560 STATS_INC(nifs, NETIF_STATS_DROP);
1561 mbuf_freem(mbuf: data);
1562 data = NULL;
1563 }
1564 } else {
1565 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
1566 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
1567 STATS_INC(nifs, NETIF_STATS_DROP);
1568 }
1569 } else {
1570 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s - 0 length packet\n", pcb->ipsec_ifp->if_xname);
1571 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1572 STATS_INC(nifs, NETIF_STATS_DROP);
1573 }
1574
1575 if (data == NULL) {
1576 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_tx %s: no encrypted packet to send\n", pcb->ipsec_ifp->if_xname);
1577 break;
1578 }
1579
1580 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
1581 STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
1582
1583 tx_ring_stats.kcrsi_slots_transferred++;
1584 tx_ring_stats.kcrsi_bytes_transferred += length;
1585 }
1586
1587 if (tx_chain_ph != 0) {
1588 kern_pbufpool_free_chain(pbufpool: tx_ring->ckr_pp, chain: tx_chain_ph);
1589 }
1590
1591 if (tx_pslot) {
1592 kern_channel_advance_slot(kring: tx_ring, slot: tx_pslot);
1593 kern_channel_increment_ring_net_stats(ring: tx_ring, pcb->ipsec_ifp, stats: &tx_ring_stats);
1594 (void)kern_channel_reclaim(tx_ring);
1595 }
1596
1597 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1598 ipsec_data_move_end(pcb);
1599
1600 return 0;
1601}
1602
1603static errno_t
1604ipsec_netif_tx_doorbell_one(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1605 kern_channel_ring_t ring, uint32_t flags, uint8_t ring_idx)
1606{
1607#pragma unused(nxprov)
1608 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1609 boolean_t more = false;
1610 errno_t rc = 0;
1611
1612 VERIFY((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0);
1613
1614 /*
1615 * Refill and sync the ring; we may be racing against another thread doing
1616 * an RX sync that also wants to do kr_enter(), and so use the blocking
1617 * variant here.
1618 */
1619 rc = kern_channel_tx_refill_canblock(ring, UINT32_MAX, UINT32_MAX, true, &more);
1620 if (rc != 0 && rc != EAGAIN && rc != EBUSY) {
1621 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s tx refill failed %d\n", __func__,
1622 pcb->ipsec_if_xname, ring->ckr_name, rc);
1623 }
1624
1625 (void) kr_enter(ring, TRUE);
1626 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
1627 if (ring != pcb->ipsec_netif_txring[ring_idx]) {
1628 // ring no longer valid
1629 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1630 kr_exit(ring);
1631 os_log_error(OS_LOG_DEFAULT, "%s: %s ring %s index %d bad netif_txring 3\n", __func__,
1632 pcb->ipsec_if_xname, ring->ckr_name, ring_idx);
1633 return ENXIO;
1634 }
1635
1636 if (pcb->ipsec_kpipe_count) {
1637 uint32_t tx_available = kern_channel_available_slot_count(ring);
1638 if (pcb->ipsec_netif_txring_size > 0 &&
1639 tx_available >= pcb->ipsec_netif_txring_size - 1) {
1640 // No room left in tx ring, disable output for now
1641 errno_t error = ifnet_disable_output(interface: pcb->ipsec_ifp);
1642 if (error != 0) {
1643 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_tx_doorbell: ifnet_disable_output returned error %d\n", error);
1644 }
1645 }
1646 }
1647
1648 if (pcb->ipsec_kpipe_count) {
1649 kern_channel_ring_t rx_ring = pcb->ipsec_kpipe_rxring[ring_idx];
1650
1651 // Unlock while calling notify
1652 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1653 // Signal the kernel pipe ring to read
1654 if (rx_ring != NULL) {
1655 kern_channel_notify(rx_ring, flags: 0);
1656 }
1657 } else {
1658 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1659 }
1660
1661 kr_exit(ring);
1662
1663 return 0;
1664}
1665
1666static errno_t
1667ipsec_netif_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1668 kern_channel_ring_t ring, __unused uint32_t flags)
1669{
1670 errno_t ret = 0;
1671 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1672
1673 if (!ipsec_data_move_begin(pcb)) {
1674 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1675 return 0;
1676 }
1677
1678 if (ipsec_in_wmm_mode(pcb)) {
1679 for (uint8_t i = 0; i < IPSEC_IF_WMM_RING_COUNT; i++) {
1680 kern_channel_ring_t nring = pcb->ipsec_netif_txring[i];
1681 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring: nring, flags, ring_idx: i);
1682 if (ret) {
1683 break;
1684 }
1685 }
1686 } else {
1687 ret = ipsec_netif_tx_doorbell_one(nxprov, nexus, ring, flags, ring_idx: 0);
1688 }
1689
1690 ipsec_data_move_end(pcb);
1691 return ret;
1692}
1693
1694static errno_t
1695ipsec_netif_sync_rx_mbuf(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1696 kern_channel_ring_t rx_ring, uint32_t flags)
1697{
1698#pragma unused(nxprov)
1699#pragma unused(flags)
1700 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
1701 struct kern_channel_ring_stat_increment rx_ring_stats;
1702
1703 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1704
1705 if (!ipsec_data_move_begin(pcb)) {
1706 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
1707 return 0;
1708 }
1709
1710 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
1711
1712 // Reclaim user-released slots
1713 (void) kern_channel_reclaim(rx_ring);
1714
1715 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
1716
1717 uint32_t avail = kern_channel_available_slot_count(ring: rx_ring);
1718 if (avail == 0) {
1719 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1720 ipsec_data_move_end(pcb);
1721 return 0;
1722 }
1723
1724 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
1725 VERIFY(rx_pp != NULL);
1726 bzero(s: &rx_ring_stats, n: sizeof(rx_ring_stats));
1727 kern_channel_slot_t rx_pslot = NULL;
1728 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(kring: rx_ring, NULL, NULL);
1729
1730 while (rx_slot != NULL) {
1731 // Check for a waiting packet
1732 lck_mtx_lock(lck: &pcb->ipsec_input_chain_lock);
1733 mbuf_t data = pcb->ipsec_input_chain;
1734 if (data == NULL) {
1735 lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock);
1736 break;
1737 }
1738
1739 // Allocate rx packet
1740 kern_packet_t rx_ph = 0;
1741 errno_t error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph);
1742 if (__improbable(error != 0)) {
1743 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1744 STATS_INC(nifs, NETIF_STATS_DROP);
1745 lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock);
1746 break;
1747 }
1748
1749 // Advance waiting packets
1750 if (pcb->ipsec_input_chain_count > 0) {
1751 pcb->ipsec_input_chain_count--;
1752 }
1753 pcb->ipsec_input_chain = data->m_nextpkt;
1754 data->m_nextpkt = NULL;
1755 if (pcb->ipsec_input_chain == NULL) {
1756 pcb->ipsec_input_chain_last = NULL;
1757 }
1758 lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock);
1759
1760 size_t length = mbuf_pkthdr_len(mbuf: data);
1761
1762 if (length < sizeof(struct ip)) {
1763 // Flush data
1764 mbuf_freem(mbuf: data);
1765 kern_pbufpool_free(pbufpool: rx_pp, rx_ph);
1766 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1767 STATS_INC(nifs, NETIF_STATS_DROP);
1768 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
1769 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
1770 continue;
1771 }
1772
1773 uint32_t af = 0;
1774 struct ip *ip = mtod(data, struct ip *);
1775 u_int ip_version = ip->ip_v;
1776 switch (ip_version) {
1777 case 4: {
1778 af = AF_INET;
1779 break;
1780 }
1781 case 6: {
1782 af = AF_INET6;
1783 break;
1784 }
1785 default: {
1786 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
1787 pcb->ipsec_ifp->if_xname, ip_version);
1788 break;
1789 }
1790 }
1791
1792 if (length > PP_BUF_SIZE_DEF(rx_pp) ||
1793 (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
1794 // We need to fragment to send up into the netif
1795
1796 u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp);
1797 if (pcb->ipsec_frag_size_set &&
1798 pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) {
1799 fragment_mtu = pcb->ipsec_input_frag_size;
1800 }
1801
1802 mbuf_t fragment_chain = NULL;
1803 switch (af) {
1804 case AF_INET: {
1805 // ip_fragment expects the length in host order
1806 ip->ip_len = ntohs(ip->ip_len);
1807
1808 // ip_fragment will modify the original data, don't free
1809 int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
1810 if (fragment_error == 0 && data != NULL) {
1811 fragment_chain = data;
1812 } else {
1813 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1814 STATS_INC(nifs, NETIF_STATS_DROP);
1815 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
1816 pcb->ipsec_ifp->if_xname, length, fragment_error);
1817 }
1818 break;
1819 }
1820 case AF_INET6: {
1821 if (length < sizeof(struct ip6_hdr)) {
1822 mbuf_freem(mbuf: data);
1823 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1824 STATS_INC(nifs, NETIF_STATS_DROP);
1825 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
1826 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
1827 } else {
1828 // ip6_do_fragmentation will free the original data on success only
1829 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
1830
1831 int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
1832 ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid((uint64_t)data)));
1833 if (fragment_error == 0 && data != NULL) {
1834 fragment_chain = data;
1835 } else {
1836 mbuf_freem(mbuf: data);
1837 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1838 STATS_INC(nifs, NETIF_STATS_DROP);
1839 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
1840 pcb->ipsec_ifp->if_xname, length, fragment_error);
1841 }
1842 }
1843 break;
1844 }
1845 default: {
1846 // Cannot fragment unknown families
1847 mbuf_freem(mbuf: data);
1848 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
1849 STATS_INC(nifs, NETIF_STATS_DROP);
1850 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
1851 pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
1852 break;
1853 }
1854 }
1855
1856 if (fragment_chain != NULL) {
1857 // Add fragments to chain before continuing
1858 lck_mtx_lock(lck: &pcb->ipsec_input_chain_lock);
1859 if (pcb->ipsec_input_chain != NULL) {
1860 pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
1861 } else {
1862 pcb->ipsec_input_chain = fragment_chain;
1863 }
1864 pcb->ipsec_input_chain_count++;
1865 while (fragment_chain->m_nextpkt) {
1866 VERIFY(fragment_chain != fragment_chain->m_nextpkt);
1867 fragment_chain = fragment_chain->m_nextpkt;
1868 pcb->ipsec_input_chain_count++;
1869 }
1870 pcb->ipsec_input_chain_last = fragment_chain;
1871 lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock);
1872 }
1873
1874 // Make sure to free unused rx packet
1875 kern_pbufpool_free(pbufpool: rx_pp, rx_ph);
1876
1877 continue;
1878 }
1879
1880 mbuf_pkthdr_setrcvif(mbuf: data, ifp: pcb->ipsec_ifp);
1881
1882 // Fillout rx packet
1883 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
1884 VERIFY(rx_buf != NULL);
1885 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
1886 VERIFY(rx_baddr != NULL);
1887
1888 // Copy-in data from mbuf to buflet
1889 mbuf_copydata(mbuf: data, offset: 0, length, out_data: (void *)rx_baddr);
1890 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
1891
1892 // Finalize and attach the packet
1893 error = kern_buflet_set_data_offset(rx_buf, 0);
1894 VERIFY(error == 0);
1895 error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
1896 VERIFY(error == 0);
1897 error = kern_packet_set_headroom(rx_ph, 0);
1898 VERIFY(error == 0);
1899 error = kern_packet_finalize(rx_ph);
1900 VERIFY(error == 0);
1901 error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph);
1902 VERIFY(error == 0);
1903
1904 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
1905 STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
1906 bpf_tap_packet_in(interface: pcb->ipsec_ifp, DLT_RAW, packet: rx_ph, NULL, header_len: 0);
1907
1908 rx_ring_stats.kcrsi_slots_transferred++;
1909 rx_ring_stats.kcrsi_bytes_transferred += length;
1910
1911 if (!pcb->ipsec_ext_ifdata_stats) {
1912 ifnet_stat_increment_in(interface: pcb->ipsec_ifp, packets_in: 1, bytes_in: (uint16_t)length, errors_in: 0);
1913 }
1914
1915 mbuf_freem(mbuf: data);
1916
1917 // Advance ring
1918 rx_pslot = rx_slot;
1919 rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL);
1920 }
1921
1922 for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
1923 struct kern_channel_ring_stat_increment tx_ring_stats;
1924 bzero(s: &tx_ring_stats, n: sizeof(tx_ring_stats));
1925 kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
1926 kern_channel_slot_t tx_pslot = NULL;
1927 kern_channel_slot_t tx_slot = NULL;
1928 if (tx_ring == NULL) {
1929 // Net-If TX ring not set up yet, nothing to read
1930 goto done;
1931 }
1932 // Unlock ipsec before entering ring
1933 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
1934
1935 (void)kr_enter(tx_ring, TRUE);
1936
1937 // Lock again after entering and validate
1938 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
1939
1940 if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
1941 goto done;
1942 }
1943
1944 tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL);
1945 if (tx_slot == NULL) {
1946 // Nothing to read, don't bother signalling
1947 goto done;
1948 }
1949
1950 while (rx_slot != NULL && tx_slot != NULL) {
1951 size_t length = 0;
1952 mbuf_t data = NULL;
1953 errno_t error = 0;
1954 uint32_t af;
1955
1956 // Allocate rx packet
1957 kern_packet_t rx_ph = 0;
1958 error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph);
1959 if (__improbable(error != 0)) {
1960 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
1961 STATS_INC(nifs, NETIF_STATS_DROP);
1962 break;
1963 }
1964
1965 kern_packet_t tx_ph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot);
1966
1967 // Advance TX ring
1968 tx_pslot = tx_slot;
1969 tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL);
1970
1971 if (tx_ph == 0) {
1972 kern_pbufpool_free(pbufpool: rx_pp, rx_ph);
1973 continue;
1974 }
1975
1976 kern_buflet_t tx_buf = kern_packet_get_next_buflet(tx_ph, NULL);
1977 VERIFY(tx_buf != NULL);
1978 uint8_t *tx_baddr = kern_buflet_get_data_address(tx_buf);
1979 VERIFY(tx_baddr != 0);
1980 tx_baddr += kern_buflet_get_data_offset(tx_buf);
1981
1982 length = MIN(kern_packet_get_data_length(tx_ph),
1983 pcb->ipsec_slot_size);
1984
1985 // Increment TX stats
1986 tx_ring_stats.kcrsi_slots_transferred++;
1987 tx_ring_stats.kcrsi_bytes_transferred += length;
1988
1989 if (length >= sizeof(struct ip)) {
1990 error = mbuf_gethdr(how: MBUF_DONTWAIT, type: MBUF_TYPE_HEADER, mbuf: &data);
1991 if (error == 0) {
1992 error = mbuf_copyback(mbuf: data, offset: 0, length, data: tx_baddr, how: MBUF_DONTWAIT);
1993 if (error == 0) {
1994 // Check for wake packet flag
1995 uuid_t flow_uuid;
1996 kern_packet_get_flow_uuid(tx_ph, &flow_uuid);
1997 u_int8_t *id_8 = (u_int8_t *)(uintptr_t)flow_uuid;
1998 if ((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT) {
1999 os_log_info(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: wake packet flag is set\n",
2000 pcb->ipsec_ifp->if_xname);
2001 data->m_pkthdr.pkt_flags |= PKTF_WAKE_PKT;
2002 }
2003
2004 lck_mtx_lock(lck: &pcb->ipsec_kpipe_decrypt_lock);
2005 struct ip *ip = mtod(data, struct ip *);
2006 u_int ip_version = ip->ip_v;
2007 switch (ip_version) {
2008 case 4: {
2009 af = AF_INET;
2010 ip->ip_len = ntohs(ip->ip_len) - sizeof(struct ip);
2011 ip->ip_off = ntohs(ip->ip_off);
2012
2013 if (length < ip->ip_len) {
2014 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv4 packet length too short (%zu < %u)\n",
2015 pcb->ipsec_ifp->if_xname, length, ip->ip_len);
2016 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2017 STATS_INC(nifs, NETIF_STATS_DROP);
2018 mbuf_freem(mbuf: data);
2019 data = NULL;
2020 } else {
2021 data = esp4_input_extended(data, off: sizeof(struct ip), interface: pcb->ipsec_ifp);
2022 }
2023 break;
2024 }
2025 case 6: {
2026 if (length < sizeof(struct ip6_hdr)) {
2027 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short for header %zu\n",
2028 pcb->ipsec_ifp->if_xname, length);
2029 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2030 STATS_INC(nifs, NETIF_STATS_DROP);
2031 mbuf_freem(mbuf: data);
2032 data = NULL;
2033 } else {
2034 af = AF_INET6;
2035 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
2036 const size_t ip6_len = sizeof(*ip6) + ntohs(ip6->ip6_plen);
2037 if (length < ip6_len) {
2038 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: IPv6 packet length too short (%zu < %zu)\n",
2039 pcb->ipsec_ifp->if_xname, length, ip6_len);
2040 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2041 STATS_INC(nifs, NETIF_STATS_DROP);
2042 mbuf_freem(mbuf: data);
2043 data = NULL;
2044 } else {
2045 int offset = sizeof(struct ip6_hdr);
2046 esp6_input_extended(mp: &data, offp: &offset, proto: ip6->ip6_nxt, interface: pcb->ipsec_ifp);
2047 }
2048 }
2049 break;
2050 }
2051 default: {
2052 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: unknown ip version %u\n",
2053 pcb->ipsec_ifp->if_xname, ip_version);
2054 STATS_INC(nifs, NETIF_STATS_DROP);
2055 mbuf_freem(mbuf: data);
2056 data = NULL;
2057 break;
2058 }
2059 }
2060 lck_mtx_unlock(lck: &pcb->ipsec_kpipe_decrypt_lock);
2061 } else {
2062 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_copyback(%zu) error %d\n", pcb->ipsec_ifp->if_xname, length, error);
2063 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
2064 STATS_INC(nifs, NETIF_STATS_DROP);
2065 mbuf_freem(mbuf: data);
2066 data = NULL;
2067 }
2068 } else {
2069 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - mbuf_gethdr error %d\n", pcb->ipsec_ifp->if_xname, error);
2070 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_MBUF);
2071 STATS_INC(nifs, NETIF_STATS_DROP);
2072 }
2073 } else {
2074 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s - bad packet length %zu\n", pcb->ipsec_ifp->if_xname, length);
2075 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2076 STATS_INC(nifs, NETIF_STATS_DROP);
2077 }
2078
2079 if (data == NULL) {
2080 // Failed to get decrypted data data
2081 kern_pbufpool_free(pbufpool: rx_pp, rx_ph);
2082 continue;
2083 }
2084
2085 length = mbuf_pkthdr_len(mbuf: data);
2086 if (length > PP_BUF_SIZE_DEF(rx_pp)) {
2087 // Flush data
2088 mbuf_freem(mbuf: data);
2089 kern_pbufpool_free(pbufpool: rx_pp, rx_ph);
2090 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2091 STATS_INC(nifs, NETIF_STATS_DROP);
2092 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: decrypted packet length %zu > %u\n",
2093 pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
2094 continue;
2095 }
2096
2097 mbuf_pkthdr_setrcvif(mbuf: data, ifp: pcb->ipsec_ifp);
2098
2099 // Fillout rx packet
2100 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
2101 VERIFY(rx_buf != NULL);
2102 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
2103 VERIFY(rx_baddr != NULL);
2104
2105 // Copy-in data from mbuf to buflet
2106 mbuf_copydata(mbuf: data, offset: 0, length, out_data: (void *)rx_baddr);
2107 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
2108
2109 // Finalize and attach the packet
2110 error = kern_buflet_set_data_offset(rx_buf, 0);
2111 VERIFY(error == 0);
2112 error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
2113 VERIFY(error == 0);
2114 error = kern_packet_set_link_header_offset(rx_ph, 0);
2115 VERIFY(error == 0);
2116 error = kern_packet_set_network_header_offset(rx_ph, 0);
2117 VERIFY(error == 0);
2118 error = kern_packet_finalize(rx_ph);
2119 VERIFY(error == 0);
2120 error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph);
2121 VERIFY(error == 0);
2122
2123 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
2124 STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
2125 bpf_tap_packet_in(interface: pcb->ipsec_ifp, DLT_RAW, packet: rx_ph, NULL, header_len: 0);
2126
2127 rx_ring_stats.kcrsi_slots_transferred++;
2128 rx_ring_stats.kcrsi_bytes_transferred += length;
2129
2130 if (!pcb->ipsec_ext_ifdata_stats) {
2131 ifnet_stat_increment_in(interface: pcb->ipsec_ifp, packets_in: 1, bytes_in: (uint16_t)length, errors_in: 0);
2132 }
2133
2134 mbuf_freem(mbuf: data);
2135
2136 rx_pslot = rx_slot;
2137 rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL);
2138 }
2139
2140done:
2141 if (tx_pslot) {
2142 kern_channel_advance_slot(kring: tx_ring, slot: tx_pslot);
2143 kern_channel_increment_ring_net_stats(ring: tx_ring, pcb->ipsec_ifp, stats: &tx_ring_stats);
2144 (void)kern_channel_reclaim(tx_ring);
2145 }
2146
2147 // Unlock first, then exit ring
2148 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
2149 if (tx_ring != NULL) {
2150 if (tx_pslot != NULL) {
2151 kern_channel_notify(tx_ring, flags: 0);
2152 }
2153 kr_exit(tx_ring);
2154 }
2155
2156 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
2157 }
2158
2159 if (rx_pslot) {
2160 kern_channel_advance_slot(kring: rx_ring, slot: rx_pslot);
2161 kern_channel_increment_ring_net_stats(ring: rx_ring, pcb->ipsec_ifp, stats: &rx_ring_stats);
2162 }
2163
2164
2165 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
2166
2167 ipsec_data_move_end(pcb);
2168 return 0;
2169}
2170
2171static errno_t
2172ipsec_transform_kpipe_pkt_to_netif_pkt(struct ipsec_pcb *pcb,
2173 struct kern_channel_ring_stat_increment *tx_ring_stats,
2174 struct netif_stats *nifs, kern_packet_t kpipe_ph, kern_packet_t netif_ph)
2175{
2176 kern_buflet_t kpipe_buf = NULL, netif_buf = NULL;
2177 uint8_t *kpipe_baddr = NULL, *netif_baddr = NULL;
2178 uuid_t flow_uuid;
2179 size_t iphlen = 0;
2180 uint32_t kpipe_buf_len = 0, netif_buf_lim = 0;
2181 int err = 0;
2182
2183 VERIFY(kpipe_ph != 0);
2184 VERIFY(netif_ph != 0);
2185 VERIFY(pcb != NULL);
2186 VERIFY(tx_ring_stats != NULL);
2187 VERIFY(nifs != NULL);
2188
2189 kpipe_buf = kern_packet_get_next_buflet(kpipe_ph, NULL);
2190 VERIFY(kpipe_buf != NULL);
2191 kpipe_baddr = kern_buflet_get_data_address(kpipe_buf);
2192 VERIFY(kpipe_baddr != NULL);
2193 kpipe_baddr += kern_buflet_get_data_offset(kpipe_buf);
2194 kpipe_buf_len = kern_buflet_get_data_length(kpipe_buf);
2195
2196 netif_buf = kern_packet_get_next_buflet(netif_ph, NULL);
2197 VERIFY(netif_buf != NULL);
2198 netif_baddr = kern_buflet_get_data_address(netif_buf);
2199 VERIFY(netif_baddr != NULL);
2200 netif_baddr += kern_buflet_get_data_offset(netif_buf);
2201 netif_buf_lim = __buflet_get_data_limit(buf: netif_buf);
2202 netif_buf_lim -= __buflet_get_data_offset(buf: netif_buf);
2203
2204 if (kpipe_buf_len > pcb->ipsec_slot_size) {
2205 os_log_info(OS_LOG_DEFAULT,
2206 "ipsec_transform_kpipe_pkt_to_netif_pkt %s: kpipe buffer length "
2207 "%u > pcb ipsec slot size %u", pcb->ipsec_ifp->if_xname,
2208 kpipe_buf_len, pcb->ipsec_slot_size);
2209 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2210 err = EMSGSIZE;
2211 goto bad;
2212 }
2213
2214 tx_ring_stats->kcrsi_slots_transferred++;
2215 tx_ring_stats->kcrsi_bytes_transferred += kpipe_buf_len;
2216
2217 if (__improbable(kpipe_buf_len < sizeof(struct ip))) {
2218 os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
2219 "packet length %u\n", pcb->ipsec_ifp->if_xname, kpipe_buf_len);
2220 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2221 err = EBADMSG;
2222 goto bad;
2223 }
2224
2225 struct ip *ip = (struct ip *)(void *)kpipe_baddr;
2226 ASSERT(IP_HDR_ALIGNED_P(ip));
2227
2228 u_int ip_vers = ip->ip_v;
2229 switch (ip_vers) {
2230 case IPVERSION: {
2231#ifdef _IP_VHL
2232 iphlen = IP_VHL_HL(ip->ip_vhl) << 2;
2233#else /* _IP_VHL */
2234 iphlen = ip->ip_hl << 2;
2235#endif /* _IP_VHL */
2236 break;
2237 }
2238 case 6: {
2239 iphlen = sizeof(struct ip6_hdr);
2240 break;
2241 }
2242 default: {
2243 os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
2244 "ip version %u\n", pcb->ipsec_ifp->if_xname, ip_vers);
2245 err = EBADMSG;
2246 goto bad;
2247 }
2248 }
2249
2250 if (__improbable(kpipe_buf_len < iphlen)) {
2251 os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - bad "
2252 "packet length %u\n", pcb->ipsec_ifp->if_xname, kpipe_buf_len);
2253 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2254 err = EBADMSG;
2255 goto bad;
2256 }
2257
2258 if (__improbable(netif_buf_lim < iphlen)) {
2259 os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s - netif "
2260 "buffer length %u too short\n", pcb->ipsec_ifp->if_xname, netif_buf_lim);
2261 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2262 err = EBADMSG;
2263 goto bad;
2264 }
2265
2266 memcpy(dst: netif_baddr, src: kpipe_baddr, n: iphlen);
2267 __buflet_set_data_length(buf: netif_buf, dlen: (uint16_t)iphlen);
2268
2269 lck_mtx_lock(lck: &pcb->ipsec_kpipe_decrypt_lock);
2270 err = esp_kpipe_input(pcb->ipsec_ifp, kpipe_ph, netif_ph);
2271 lck_mtx_unlock(lck: &pcb->ipsec_kpipe_decrypt_lock);
2272
2273 if (__improbable((err != 0))) {
2274 goto bad;
2275 }
2276
2277 kern_packet_get_flow_uuid(kpipe_ph, &flow_uuid);
2278 uint8_t *id_8 = (uint8_t *)(uintptr_t)flow_uuid;
2279 if (__improbable((id_8[0] & IPSEC_KPIPE_FLAG_WAKE_PKT) == IPSEC_KPIPE_FLAG_WAKE_PKT)) {
2280 os_log_info(OS_LOG_DEFAULT, "ipsec_transform_kpipe_pkt_to_netif_pkt %s: wake packet "
2281 "flag is set\n", pcb->ipsec_ifp->if_xname);
2282 __packet_set_wake_flag(ph: netif_ph);
2283 }
2284
2285 kern_packet_clear_flow_uuid(netif_ph);
2286 err = kern_buflet_set_data_offset(netif_buf, 0);
2287 VERIFY(err == 0);
2288 err = kern_packet_set_link_header_offset(netif_ph, 0);
2289 VERIFY(err == 0);
2290 err = kern_packet_set_network_header_offset(netif_ph, 0);
2291 VERIFY(err == 0);
2292 err = kern_packet_finalize(netif_ph);
2293 VERIFY(err == 0);
2294
2295 return 0;
2296bad:
2297 STATS_INC(nifs, NETIF_STATS_DROP);
2298 return err;
2299}
2300
2301
2302static errno_t
2303ipsec_netif_sync_rx_packet(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2304 kern_channel_ring_t rx_ring, uint32_t flags)
2305{
2306#pragma unused(nxprov)
2307#pragma unused(flags)
2308 struct ipsec_pcb *pcb = kern_nexus_get_context(nexus);
2309 struct kern_channel_ring_stat_increment rx_ring_stats;
2310
2311 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2312
2313 if (!ipsec_data_move_begin(pcb)) {
2314 os_log_error(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__, if_name(pcb->ipsec_ifp));
2315 return 0;
2316 }
2317
2318 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
2319
2320 // Reclaim user-released slots
2321 (void) kern_channel_reclaim(rx_ring);
2322
2323 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
2324
2325 uint32_t avail = kern_channel_available_slot_count(ring: rx_ring);
2326 if (avail == 0) {
2327 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
2328 ipsec_data_move_end(pcb);
2329 return 0;
2330 }
2331
2332 struct kern_pbufpool *rx_pp = rx_ring->ckr_pp;
2333 VERIFY(rx_pp != NULL);
2334 bzero(s: &rx_ring_stats, n: sizeof(rx_ring_stats));
2335 kern_channel_slot_t rx_pslot = NULL;
2336 kern_channel_slot_t rx_slot = kern_channel_get_next_slot(kring: rx_ring, NULL, NULL);
2337
2338 while (rx_slot != NULL) {
2339 // Check for a waiting packet
2340 lck_mtx_lock(lck: &pcb->ipsec_input_chain_lock);
2341 mbuf_t data = pcb->ipsec_input_chain;
2342 if (data == NULL) {
2343 lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock);
2344 break;
2345 }
2346
2347 // Allocate rx packet
2348 kern_packet_t rx_ph = 0;
2349 errno_t error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph);
2350 if (__improbable(error != 0)) {
2351 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
2352 STATS_INC(nifs, NETIF_STATS_DROP);
2353 lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock);
2354 break;
2355 }
2356
2357 // Advance waiting packets
2358 if (pcb->ipsec_input_chain_count > 0) {
2359 pcb->ipsec_input_chain_count--;
2360 }
2361 pcb->ipsec_input_chain = data->m_nextpkt;
2362 data->m_nextpkt = NULL;
2363 if (pcb->ipsec_input_chain == NULL) {
2364 pcb->ipsec_input_chain_last = NULL;
2365 }
2366 lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock);
2367
2368 size_t length = mbuf_pkthdr_len(mbuf: data);
2369
2370 if (length < sizeof(struct ip)) {
2371 // Flush data
2372 mbuf_freem(mbuf: data);
2373 kern_pbufpool_free(pbufpool: rx_pp, rx_ph);
2374 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2375 STATS_INC(nifs, NETIF_STATS_DROP);
2376 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy decrypted packet length cannot hold IP %zu < %zu\n",
2377 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip));
2378 continue;
2379 }
2380
2381 uint32_t af = 0;
2382 struct ip *ip = mtod(data, struct ip *);
2383 u_int ip_version = ip->ip_v;
2384 switch (ip_version) {
2385 case 4: {
2386 af = AF_INET;
2387 break;
2388 }
2389 case 6: {
2390 af = AF_INET6;
2391 break;
2392 }
2393 default: {
2394 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: legacy unknown ip version %u\n",
2395 pcb->ipsec_ifp->if_xname, ip_version);
2396 break;
2397 }
2398 }
2399
2400 if (length > PP_BUF_SIZE_DEF(rx_pp) ||
2401 (pcb->ipsec_frag_size_set && length > pcb->ipsec_input_frag_size)) {
2402 // We need to fragment to send up into the netif
2403
2404 u_int32_t fragment_mtu = PP_BUF_SIZE_DEF(rx_pp);
2405 if (pcb->ipsec_frag_size_set &&
2406 pcb->ipsec_input_frag_size < PP_BUF_SIZE_DEF(rx_pp)) {
2407 fragment_mtu = pcb->ipsec_input_frag_size;
2408 }
2409
2410 mbuf_t fragment_chain = NULL;
2411 switch (af) {
2412 case AF_INET: {
2413 // ip_fragment expects the length in host order
2414 ip->ip_len = ntohs(ip->ip_len);
2415
2416 // ip_fragment will modify the original data, don't free
2417 int fragment_error = ip_fragment(data, pcb->ipsec_ifp, fragment_mtu, TRUE);
2418 if (fragment_error == 0 && data != NULL) {
2419 fragment_chain = data;
2420 } else {
2421 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2422 STATS_INC(nifs, NETIF_STATS_DROP);
2423 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv4 packet of length %zu (%d)\n",
2424 pcb->ipsec_ifp->if_xname, length, fragment_error);
2425 }
2426 break;
2427 }
2428 case AF_INET6: {
2429 if (length < sizeof(struct ip6_hdr)) {
2430 mbuf_freem(mbuf: data);
2431 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2432 STATS_INC(nifs, NETIF_STATS_DROP);
2433 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu < %zu\n",
2434 pcb->ipsec_ifp->if_xname, length, sizeof(struct ip6_hdr));
2435 } else {
2436 // ip6_do_fragmentation will free the original data on success only
2437 struct ip6_hdr *ip6 = mtod(data, struct ip6_hdr *);
2438
2439 int fragment_error = ip6_do_fragmentation(&data, 0, pcb->ipsec_ifp, sizeof(struct ip6_hdr),
2440 ip6, NULL, fragment_mtu, ip6->ip6_nxt, htonl(ip6_randomid((uint64_t)data)));
2441 if (fragment_error == 0 && data != NULL) {
2442 fragment_chain = data;
2443 } else {
2444 mbuf_freem(mbuf: data);
2445 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2446 STATS_INC(nifs, NETIF_STATS_DROP);
2447 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: failed to fragment IPv6 packet of length %zu (%d)\n",
2448 pcb->ipsec_ifp->if_xname, length, fragment_error);
2449 }
2450 }
2451 break;
2452 }
2453 default: {
2454 // Cannot fragment unknown families
2455 mbuf_freem(mbuf: data);
2456 STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
2457 STATS_INC(nifs, NETIF_STATS_DROP);
2458 os_log_error(OS_LOG_DEFAULT, "ipsec_netif_sync_rx %s: uknown legacy decrypted packet length %zu > %u\n",
2459 pcb->ipsec_ifp->if_xname, length, PP_BUF_SIZE_DEF(rx_pp));
2460 break;
2461 }
2462 }
2463
2464 if (fragment_chain != NULL) {
2465 // Add fragments to chain before continuing
2466 lck_mtx_lock(lck: &pcb->ipsec_input_chain_lock);
2467 if (pcb->ipsec_input_chain != NULL) {
2468 pcb->ipsec_input_chain_last->m_nextpkt = fragment_chain;
2469 } else {
2470 pcb->ipsec_input_chain = fragment_chain;
2471 }
2472 pcb->ipsec_input_chain_count++;
2473 while (fragment_chain->m_nextpkt) {
2474 VERIFY(fragment_chain != fragment_chain->m_nextpkt);
2475 fragment_chain = fragment_chain->m_nextpkt;
2476 pcb->ipsec_input_chain_count++;
2477 }
2478 pcb->ipsec_input_chain_last = fragment_chain;
2479 lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock);
2480 }
2481
2482 // Make sure to free unused rx packet
2483 kern_pbufpool_free(pbufpool: rx_pp, rx_ph);
2484
2485 continue;
2486 }
2487
2488 mbuf_pkthdr_setrcvif(mbuf: data, ifp: pcb->ipsec_ifp);
2489
2490 // Fillout rx packet
2491 kern_buflet_t rx_buf = kern_packet_get_next_buflet(rx_ph, NULL);
2492 VERIFY(rx_buf != NULL);
2493 void *rx_baddr = kern_buflet_get_data_address(rx_buf);
2494 VERIFY(rx_baddr != NULL);
2495
2496 // Copy-in data from mbuf to buflet
2497 mbuf_copydata(mbuf: data, offset: 0, length, out_data: (void *)rx_baddr);
2498 kern_packet_clear_flow_uuid(rx_ph); // Zero flow id
2499
2500 // Finalize and attach the packet
2501 error = kern_buflet_set_data_offset(rx_buf, 0);
2502 VERIFY(error == 0);
2503 error = kern_buflet_set_data_length(rx_buf, (uint16_t)length);
2504 VERIFY(error == 0);
2505 error = kern_packet_set_headroom(rx_ph, 0);
2506 VERIFY(error == 0);
2507 error = kern_packet_finalize(rx_ph);
2508 VERIFY(error == 0);
2509 error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph);
2510 VERIFY(error == 0);
2511
2512 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
2513 STATS_INC(nifs, NETIF_STATS_RX_COPY_MBUF);
2514 bpf_tap_packet_in(interface: pcb->ipsec_ifp, DLT_RAW, packet: rx_ph, NULL, header_len: 0);
2515
2516 rx_ring_stats.kcrsi_slots_transferred++;
2517 rx_ring_stats.kcrsi_bytes_transferred += length;
2518
2519 if (!pcb->ipsec_ext_ifdata_stats) {
2520 ifnet_stat_increment_in(interface: pcb->ipsec_ifp, packets_in: 1, bytes_in: (uint16_t)length, errors_in: 0);
2521 }
2522
2523 mbuf_freem(mbuf: data);
2524
2525 // Advance ring
2526 rx_pslot = rx_slot;
2527 rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL);
2528 }
2529
2530 for (uint8_t ring_idx = 0; ring_idx < pcb->ipsec_kpipe_count; ring_idx++) {
2531 struct kern_channel_ring_stat_increment tx_ring_stats = {};
2532 kern_channel_slot_t tx_pslot = NULL;
2533 kern_channel_slot_t tx_slot = NULL;
2534
2535 kern_channel_ring_t tx_ring = pcb->ipsec_kpipe_txring[ring_idx];
2536 if (tx_ring == NULL) {
2537 // Net-If TX ring not set up yet, nothing to read
2538 goto done;
2539 }
2540
2541 // Unlock ipsec before entering ring
2542 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
2543
2544 (void)kr_enter(tx_ring, TRUE);
2545
2546 // Lock again after entering and validate
2547 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
2548
2549 if (tx_ring != pcb->ipsec_kpipe_txring[ring_idx]) {
2550 goto done;
2551 }
2552
2553 tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL);
2554 if (tx_slot == NULL) {
2555 // Nothing to read, don't bother signalling
2556 goto done;
2557 }
2558
2559 while (rx_slot != NULL && tx_slot != NULL) {
2560 errno_t error = 0;
2561
2562 // Allocate rx packet
2563 kern_packet_t rx_ph = 0;
2564 error = kern_pbufpool_alloc_nosleep(pbufpool: rx_pp, bufcnt: 1, packet: &rx_ph);
2565 if (__improbable(error != 0)) {
2566 STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
2567 STATS_INC(nifs, NETIF_STATS_DROP);
2568 break;
2569 }
2570
2571 kern_packet_t tx_ph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot);
2572 tx_pslot = tx_slot;
2573 tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL);
2574 if (tx_ph == 0) {
2575 kern_pbufpool_free(pbufpool: rx_pp, rx_ph);
2576 continue;
2577 }
2578
2579 error = ipsec_transform_kpipe_pkt_to_netif_pkt(pcb,
2580 tx_ring_stats: &tx_ring_stats, nifs, kpipe_ph: tx_ph, netif_ph: rx_ph);
2581 if (error != 0) {
2582 // Failed to get decrypted packet
2583 kern_pbufpool_free(pbufpool: rx_pp, rx_ph);
2584 continue;
2585 }
2586
2587 error = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: rx_ph);
2588 VERIFY(error == 0);
2589
2590 STATS_INC(nifs, NETIF_STATS_RX_PACKETS);
2591 STATS_INC(nifs, NETIF_STATS_RX_COPY_DIRECT);
2592
2593 bpf_tap_packet_in(interface: pcb->ipsec_ifp, DLT_RAW, packet: rx_ph, NULL, header_len: 0);
2594
2595 rx_ring_stats.kcrsi_slots_transferred++;
2596 rx_ring_stats.kcrsi_bytes_transferred += kern_packet_get_data_length(rx_ph);
2597
2598 if (!pcb->ipsec_ext_ifdata_stats) {
2599 ifnet_stat_increment_in(interface: pcb->ipsec_ifp, packets_in: 1,
2600 bytes_in: kern_packet_get_data_length(rx_ph), errors_in: 0);
2601 }
2602
2603 rx_pslot = rx_slot;
2604 rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL);
2605 }
2606
2607done:
2608 if (tx_pslot) {
2609 kern_channel_advance_slot(kring: tx_ring, slot: tx_pslot);
2610 kern_channel_increment_ring_net_stats(ring: tx_ring, pcb->ipsec_ifp, stats: &tx_ring_stats);
2611 (void)kern_channel_reclaim(tx_ring);
2612 }
2613
2614 // Unlock first, then exit ring
2615 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
2616 if (tx_ring != NULL) {
2617 if (tx_pslot != NULL) {
2618 kern_channel_notify(tx_ring, flags: 0);
2619 }
2620 kr_exit(tx_ring);
2621 }
2622
2623 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
2624 }
2625
2626 if (rx_pslot) {
2627 kern_channel_advance_slot(kring: rx_ring, slot: rx_pslot);
2628 kern_channel_increment_ring_net_stats(ring: rx_ring, pcb->ipsec_ifp, stats: &rx_ring_stats);
2629 }
2630
2631
2632 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
2633
2634 ipsec_data_move_end(pcb);
2635 return 0;
2636}
2637
2638static errno_t
2639ipsec_netif_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2640 kern_channel_ring_t rx_ring, uint32_t flags)
2641{
2642 if (__improbable(ipsec_kpipe_mbuf == 1)) {
2643 return ipsec_netif_sync_rx_mbuf(nxprov, nexus, rx_ring, flags);
2644 } else {
2645 return ipsec_netif_sync_rx_packet(nxprov, nexus, rx_ring, flags);
2646 }
2647}
2648
2649static errno_t
2650ipsec_nexus_ifattach(struct ipsec_pcb *pcb,
2651 struct ifnet_init_eparams *init_params,
2652 struct ifnet **ifp)
2653{
2654 errno_t err;
2655 nexus_controller_t controller = kern_nexus_shared_controller();
2656 struct kern_nexus_net_init net_init;
2657 struct kern_pbufpool_init pp_init;
2658
2659 nexus_name_t provider_name;
2660 snprintf((char *)provider_name, count: sizeof(provider_name),
2661 "com.apple.netif.%s", pcb->ipsec_if_xname);
2662
2663 struct kern_nexus_provider_init prov_init = {
2664 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2665 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2666 .nxpi_pre_connect = ipsec_nexus_pre_connect,
2667 .nxpi_connected = ipsec_nexus_connected,
2668 .nxpi_pre_disconnect = ipsec_netif_pre_disconnect,
2669 .nxpi_disconnected = ipsec_nexus_disconnected,
2670 .nxpi_ring_init = ipsec_netif_ring_init,
2671 .nxpi_ring_fini = ipsec_netif_ring_fini,
2672 .nxpi_slot_init = NULL,
2673 .nxpi_slot_fini = NULL,
2674 .nxpi_sync_tx = ipsec_netif_sync_tx,
2675 .nxpi_sync_rx = ipsec_netif_sync_rx,
2676 .nxpi_tx_doorbell = ipsec_netif_tx_doorbell,
2677 };
2678
2679 nexus_attr_t nxa = NULL;
2680 err = kern_nexus_attr_create(&nxa);
2681 IPSEC_IF_VERIFY(err == 0);
2682 if (err != 0) {
2683 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2684 __func__, err);
2685 goto failed;
2686 }
2687
2688 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
2689 err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_SLOT_BUF_SIZE, value: slot_buffer_size);
2690 VERIFY(err == 0);
2691
2692 // Reset ring size for netif nexus to limit memory usage
2693 uint64_t ring_size = pcb->ipsec_netif_ring_size;
2694 err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_TX_SLOTS, value: ring_size);
2695 VERIFY(err == 0);
2696 err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_RX_SLOTS, value: ring_size);
2697 VERIFY(err == 0);
2698
2699 assert(err == 0);
2700
2701 if (ipsec_in_wmm_mode(pcb)) {
2702 os_log(OS_LOG_DEFAULT, "%s: %s enabling wmm mode\n",
2703 __func__, pcb->ipsec_if_xname);
2704
2705 init_params->output_sched_model = IFNET_SCHED_MODEL_DRIVER_MANAGED;
2706
2707 err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_TX_RINGS,
2708 IPSEC_NETIF_WMM_TX_RING_COUNT);
2709 VERIFY(err == 0);
2710 err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_RX_RINGS,
2711 IPSEC_NETIF_WMM_RX_RING_COUNT);
2712 VERIFY(err == 0);
2713
2714 err = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_QMAP, value: NEXUS_QMAP_TYPE_WMM);
2715 VERIFY(err == 0);
2716 }
2717
2718 pcb->ipsec_netif_txring_size = ring_size;
2719
2720 bzero(s: &pp_init, n: sizeof(pp_init));
2721 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
2722 pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
2723 // Note: we need more packets than can be held in the tx and rx rings because
2724 // packets can also be in the AQM queue(s)
2725 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * (2 * pcb->ipsec_kpipe_count + 1);
2726 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
2727 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
2728 pp_init.kbi_max_frags = 1;
2729 (void) snprintf((char *)pp_init.kbi_name, count: sizeof(pp_init.kbi_name),
2730 "%s", provider_name);
2731 pp_init.kbi_ctx = NULL;
2732 pp_init.kbi_ctx_retain = NULL;
2733 pp_init.kbi_ctx_release = NULL;
2734
2735 err = kern_pbufpool_create(&pp_init, &pcb->ipsec_netif_pp, NULL);
2736 if (err != 0) {
2737 os_log_error(OS_LOG_DEFAULT, "%s pbufbool create failed, error %d\n", __func__, err);
2738 goto failed;
2739 }
2740
2741 err = kern_nexus_controller_register_provider(ctl: controller,
2742 dom_prov_uuid: ipsec_nx_dom_prov,
2743 provider_name,
2744 init: &prov_init,
2745 init_len: sizeof(prov_init),
2746 nxa,
2747 nx_prov_uuid: &pcb->ipsec_nx.if_provider);
2748 IPSEC_IF_VERIFY(err == 0);
2749 if (err != 0) {
2750 os_log_error(OS_LOG_DEFAULT, "%s register provider failed, error %d\n",
2751 __func__, err);
2752 goto failed;
2753 }
2754
2755 bzero(s: &net_init, n: sizeof(net_init));
2756 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2757 net_init.nxneti_flags = 0;
2758 net_init.nxneti_eparams = init_params;
2759 net_init.nxneti_lladdr = NULL;
2760 net_init.nxneti_prepare = ipsec_netif_prepare;
2761 net_init.nxneti_rx_pbufpool = pcb->ipsec_netif_pp;
2762 net_init.nxneti_tx_pbufpool = pcb->ipsec_netif_pp;
2763 err = kern_nexus_controller_alloc_net_provider_instance(ctl: controller,
2764 nx_prov_uuid: pcb->ipsec_nx.if_provider,
2765 nexus_context: pcb,
2766 NULL,
2767 nx_uuid: &pcb->ipsec_nx.if_instance,
2768 init: &net_init,
2769 ifp);
2770 IPSEC_IF_VERIFY(err == 0);
2771 if (err != 0) {
2772 os_log_error(OS_LOG_DEFAULT, "%s alloc_net_provider_instance failed, %d\n",
2773 __func__, err);
2774 kern_nexus_controller_deregister_provider(ctl: controller,
2775 nx_prov_uuid: pcb->ipsec_nx.if_provider);
2776 uuid_clear(uu: pcb->ipsec_nx.if_provider);
2777 goto failed;
2778 }
2779
2780failed:
2781 if (nxa) {
2782 kern_nexus_attr_destroy(attr: nxa);
2783 }
2784 if (err && pcb->ipsec_netif_pp != NULL) {
2785 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2786 pcb->ipsec_netif_pp = NULL;
2787 }
2788 return err;
2789}
2790
2791static void
2792ipsec_detach_provider_and_instance(uuid_t provider, uuid_t instance)
2793{
2794 nexus_controller_t controller = kern_nexus_shared_controller();
2795 errno_t err;
2796
2797 if (!uuid_is_null(uu: instance)) {
2798 err = kern_nexus_controller_free_provider_instance(ctl: controller,
2799 nx_uuid: instance);
2800 if (err != 0) {
2801 os_log_error(OS_LOG_DEFAULT, "%s free_provider_instance failed %d\n",
2802 __func__, err);
2803 }
2804 uuid_clear(uu: instance);
2805 }
2806 if (!uuid_is_null(uu: provider)) {
2807 err = kern_nexus_controller_deregister_provider(ctl: controller,
2808 nx_prov_uuid: provider);
2809 if (err != 0) {
2810 os_log_error(OS_LOG_DEFAULT, "%s deregister_provider %d\n", __func__, err);
2811 }
2812 uuid_clear(uu: provider);
2813 }
2814 return;
2815}
2816
2817static void
2818ipsec_nexus_detach(struct ipsec_pcb *pcb)
2819{
2820 ipsec_nx_t nx = &pcb->ipsec_nx;
2821 nexus_controller_t controller = kern_nexus_shared_controller();
2822 errno_t err;
2823
2824 if (!uuid_is_null(uu: nx->fsw_device)) {
2825 err = kern_nexus_ifdetach(ctl: controller,
2826 nx_uuid: nx->fsw_instance,
2827 nx_if_uuid: nx->fsw_device);
2828 if (err != 0) {
2829 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_ifdetach ms device failed %d\n",
2830 __func__, err);
2831 }
2832 }
2833
2834 ipsec_detach_provider_and_instance(provider: nx->fsw_provider,
2835 instance: nx->fsw_instance);
2836 ipsec_detach_provider_and_instance(provider: nx->if_provider,
2837 instance: nx->if_instance);
2838
2839 if (pcb->ipsec_netif_pp != NULL) {
2840 kern_pbufpool_destroy(pcb->ipsec_netif_pp);
2841 pcb->ipsec_netif_pp = NULL;
2842 }
2843 memset(s: nx, c: 0, n: sizeof(*nx));
2844}
2845
2846static errno_t
2847ipsec_create_fs_provider_and_instance(struct ipsec_pcb *pcb,
2848 const char *type_name,
2849 const char *ifname,
2850 uuid_t *provider, uuid_t *instance)
2851{
2852 nexus_attr_t attr = NULL;
2853 nexus_controller_t controller = kern_nexus_shared_controller();
2854 uuid_t dom_prov;
2855 errno_t err;
2856 struct kern_nexus_init init;
2857 nexus_name_t provider_name;
2858
2859 err = kern_nexus_get_default_domain_provider(type: NEXUS_TYPE_FLOW_SWITCH,
2860 dom_prov_uuid: &dom_prov);
2861 IPSEC_IF_VERIFY(err == 0);
2862 if (err != 0) {
2863 os_log_error(OS_LOG_DEFAULT, "%s can't get %s provider, error %d\n",
2864 __func__, type_name, err);
2865 goto failed;
2866 }
2867
2868 err = kern_nexus_attr_create(&attr);
2869 IPSEC_IF_VERIFY(err == 0);
2870 if (err != 0) {
2871 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
2872 __func__, err);
2873 goto failed;
2874 }
2875
2876 uint64_t slot_buffer_size = pcb->ipsec_slot_size;
2877 err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_SLOT_BUF_SIZE, value: slot_buffer_size);
2878 VERIFY(err == 0);
2879
2880 // Reset ring size for flowswitch nexus to limit memory usage. Larger RX than netif.
2881 uint64_t tx_ring_size = pcb->ipsec_tx_fsw_ring_size;
2882 err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_TX_SLOTS, value: tx_ring_size);
2883 VERIFY(err == 0);
2884 uint64_t rx_ring_size = pcb->ipsec_rx_fsw_ring_size;
2885 err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_RX_SLOTS, value: rx_ring_size);
2886 VERIFY(err == 0);
2887 /*
2888 * Configure flowswitch to use super-packet (multi-buflet).
2889 * This allows flowswitch to perform intra-stack packet aggregation.
2890 */
2891 err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_MAX_FRAGS,
2892 NX_FSW_TCP_RX_AGG_ENABLED() ? NX_PBUF_FRAGS_MAX : 1);
2893 VERIFY(err == 0);
2894
2895 snprintf((char *)provider_name, count: sizeof(provider_name),
2896 "com.apple.%s.%s", type_name, ifname);
2897 err = kern_nexus_controller_register_provider(ctl: controller,
2898 dom_prov_uuid: dom_prov,
2899 provider_name,
2900 NULL,
2901 init_len: 0,
2902 nxa: attr,
2903 nx_prov_uuid: provider);
2904 kern_nexus_attr_destroy(attr);
2905 attr = NULL;
2906 IPSEC_IF_VERIFY(err == 0);
2907 if (err != 0) {
2908 os_log_error(OS_LOG_DEFAULT, "%s register %s provider failed, error %d\n",
2909 __func__, type_name, err);
2910 goto failed;
2911 }
2912 bzero(s: &init, n: sizeof(init));
2913 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
2914 err = kern_nexus_controller_alloc_provider_instance(ctl: controller,
2915 nx_prov_uuid: *provider,
2916 NULL, NULL,
2917 nx_uuid: instance, init: &init);
2918 IPSEC_IF_VERIFY(err == 0);
2919 if (err != 0) {
2920 os_log_error(OS_LOG_DEFAULT, "%s alloc_provider_instance %s failed, %d\n",
2921 __func__, type_name, err);
2922 kern_nexus_controller_deregister_provider(ctl: controller,
2923 nx_prov_uuid: *provider);
2924 uuid_clear(uu: *provider);
2925 }
2926failed:
2927 return err;
2928}
2929
2930static errno_t
2931ipsec_flowswitch_attach(struct ipsec_pcb *pcb)
2932{
2933 nexus_controller_t controller = kern_nexus_shared_controller();
2934 errno_t err = 0;
2935 ipsec_nx_t nx = &pcb->ipsec_nx;
2936
2937 // Allocate flowswitch
2938 err = ipsec_create_fs_provider_and_instance(pcb,
2939 type_name: "flowswitch",
2940 ifname: pcb->ipsec_ifp->if_xname,
2941 provider: &nx->fsw_provider,
2942 instance: &nx->fsw_instance);
2943 if (err != 0) {
2944 os_log_error(OS_LOG_DEFAULT, "%s: failed to create bridge provider and instance\n",
2945 __func__);
2946 goto failed;
2947 }
2948
2949 // Attach flowswitch to device port
2950 err = kern_nexus_ifattach(controller, nx_uuid: nx->fsw_instance,
2951 NULL, nx_attachee: nx->if_instance,
2952 FALSE, nx_if_uuid: &nx->fsw_device);
2953 if (err != 0) {
2954 os_log_error(OS_LOG_DEFAULT, "%s kern_nexus_ifattach ms device %d\n", __func__, err);
2955 goto failed;
2956 }
2957
2958 // Extract the agent UUID and save for later
2959 struct kern_nexus *flowswitch_nx = nx_find(nx->fsw_instance, false);
2960 if (flowswitch_nx != NULL) {
2961 struct nx_flowswitch *flowswitch = NX_FSW_PRIVATE(flowswitch_nx);
2962 if (flowswitch != NULL) {
2963 FSW_RLOCK(flowswitch);
2964 uuid_copy(dst: nx->fsw_agent, src: flowswitch->fsw_agent_uuid);
2965 FSW_UNLOCK(flowswitch);
2966 } else {
2967 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - flowswitch is NULL\n");
2968 }
2969 nx_release(flowswitch_nx);
2970 } else {
2971 os_log_error(OS_LOG_DEFAULT, "ipsec_flowswitch_attach - unable to find flowswitch nexus\n");
2972 }
2973
2974 return 0;
2975
2976failed:
2977 ipsec_nexus_detach(pcb);
2978
2979 errno_t detach_error = 0;
2980 if ((detach_error = ifnet_detach(interface: pcb->ipsec_ifp)) != 0) {
2981 panic("ipsec_flowswitch_attach - ifnet_detach failed: %d", detach_error);
2982 /* NOT REACHED */
2983 }
2984
2985 return err;
2986}
2987
2988#pragma mark Kernel Pipe Nexus
2989
2990static errno_t
2991ipsec_register_kernel_pipe_nexus(struct ipsec_pcb *pcb)
2992{
2993 nexus_attr_t nxa = NULL;
2994 errno_t result;
2995
2996 lck_mtx_lock(lck: &ipsec_lock);
2997 if (ipsec_ncd_refcount++) {
2998 lck_mtx_unlock(lck: &ipsec_lock);
2999 return 0;
3000 }
3001
3002 result = kern_nexus_controller_create(ctl: &ipsec_ncd);
3003 if (result) {
3004 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_create failed: %d\n",
3005 __FUNCTION__, result);
3006 goto done;
3007 }
3008
3009 uuid_t dom_prov;
3010 result = kern_nexus_get_default_domain_provider(
3011 type: NEXUS_TYPE_KERNEL_PIPE, dom_prov_uuid: &dom_prov);
3012 if (result) {
3013 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_get_default_domain_provider failed: %d\n",
3014 __FUNCTION__, result);
3015 goto done;
3016 }
3017
3018 struct kern_nexus_provider_init prov_init = {
3019 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
3020 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
3021 .nxpi_pre_connect = ipsec_nexus_pre_connect,
3022 .nxpi_connected = ipsec_nexus_connected,
3023 .nxpi_pre_disconnect = ipsec_nexus_pre_disconnect,
3024 .nxpi_disconnected = ipsec_nexus_disconnected,
3025 .nxpi_ring_init = ipsec_kpipe_ring_init,
3026 .nxpi_ring_fini = ipsec_kpipe_ring_fini,
3027 .nxpi_slot_init = NULL,
3028 .nxpi_slot_fini = NULL,
3029 .nxpi_sync_tx = ipsec_kpipe_sync_tx,
3030 .nxpi_sync_rx = ipsec_kpipe_sync_rx,
3031 .nxpi_tx_doorbell = NULL,
3032 };
3033
3034 result = kern_nexus_attr_create(&nxa);
3035 if (result) {
3036 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_attr_create failed: %d\n",
3037 __FUNCTION__, result);
3038 goto done;
3039 }
3040
3041 uint64_t slot_buffer_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
3042 result = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_SLOT_BUF_SIZE, value: slot_buffer_size);
3043 VERIFY(result == 0);
3044
3045 // Reset ring size for kernel pipe nexus to limit memory usage
3046 // Note: It's better to have less on slots on the kpipe TX ring than the netif
3047 // so back pressure is applied at the AQM layer
3048 uint64_t ring_size =
3049 pcb->ipsec_kpipe_tx_ring_size != 0 ? pcb->ipsec_kpipe_tx_ring_size :
3050 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
3051 if_ipsec_ring_size;
3052 result = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_TX_SLOTS, value: ring_size);
3053 VERIFY(result == 0);
3054
3055 ring_size =
3056 pcb->ipsec_kpipe_rx_ring_size != 0 ? pcb->ipsec_kpipe_rx_ring_size :
3057 pcb->ipsec_netif_ring_size != 0 ? pcb->ipsec_netif_ring_size :
3058 if_ipsec_ring_size;
3059 result = kern_nexus_attr_set(attr: nxa, type: NEXUS_ATTR_RX_SLOTS, value: ring_size);
3060 VERIFY(result == 0);
3061
3062 result = kern_nexus_controller_register_provider(ctl: ipsec_ncd,
3063 dom_prov_uuid: dom_prov,
3064 (const uint8_t *)"com.apple.nexus.ipsec.kpipe",
3065 init: &prov_init,
3066 init_len: sizeof(prov_init),
3067 nxa,
3068 nx_prov_uuid: &ipsec_kpipe_uuid);
3069 if (result) {
3070 os_log_error(OS_LOG_DEFAULT, "%s: kern_nexus_controller_register_provider failed: %d\n",
3071 __FUNCTION__, result);
3072 goto done;
3073 }
3074
3075done:
3076 if (nxa) {
3077 kern_nexus_attr_destroy(attr: nxa);
3078 }
3079
3080 if (result) {
3081 if (ipsec_ncd) {
3082 kern_nexus_controller_destroy(ctl: ipsec_ncd);
3083 ipsec_ncd = NULL;
3084 }
3085 ipsec_ncd_refcount = 0;
3086 }
3087
3088 lck_mtx_unlock(lck: &ipsec_lock);
3089
3090 return result;
3091}
3092
3093static void
3094ipsec_unregister_kernel_pipe_nexus(void)
3095{
3096 lck_mtx_lock(lck: &ipsec_lock);
3097
3098 VERIFY(ipsec_ncd_refcount > 0);
3099
3100 if (--ipsec_ncd_refcount == 0) {
3101 kern_nexus_controller_destroy(ctl: ipsec_ncd);
3102 ipsec_ncd = NULL;
3103 }
3104
3105 lck_mtx_unlock(lck: &ipsec_lock);
3106}
3107
3108/* This structure only holds onto kpipe channels that need to be
3109 * freed in the future, but are cleared from the pcb under lock
3110 */
3111struct ipsec_detached_channels {
3112 int count;
3113 kern_pbufpool_t pp;
3114 uuid_t uuids[IPSEC_IF_MAX_RING_COUNT];
3115};
3116
3117static void
3118ipsec_detach_channels(struct ipsec_pcb *pcb, struct ipsec_detached_channels *dc)
3119{
3120 LCK_RW_ASSERT(&pcb->ipsec_pcb_lock, LCK_RW_TYPE_EXCLUSIVE);
3121
3122 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
3123 for (int i = 0; i < IPSEC_IF_MAX_RING_COUNT; i++) {
3124 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
3125 }
3126 dc->count = 0;
3127 return;
3128 }
3129
3130 dc->count = pcb->ipsec_kpipe_count;
3131
3132 VERIFY(dc->count >= 0);
3133 VERIFY(dc->count <= IPSEC_IF_MAX_RING_COUNT);
3134
3135 for (int i = 0; i < dc->count; i++) {
3136 VERIFY(!uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
3137 uuid_copy(dst: dc->uuids[i], src: pcb->ipsec_kpipe_uuid[i]);
3138 uuid_clear(uu: pcb->ipsec_kpipe_uuid[i]);
3139 }
3140 for (int i = dc->count; i < IPSEC_IF_MAX_RING_COUNT; i++) {
3141 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
3142 }
3143
3144 if (dc->count) {
3145 VERIFY(pcb->ipsec_kpipe_pp);
3146 } else {
3147 VERIFY(!pcb->ipsec_kpipe_pp);
3148 }
3149
3150 dc->pp = pcb->ipsec_kpipe_pp;
3151
3152 pcb->ipsec_kpipe_pp = NULL;
3153
3154 ipsec_flag_clr(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
3155}
3156
3157static void
3158ipsec_free_channels(struct ipsec_detached_channels *dc)
3159{
3160 if (!dc->count) {
3161 return;
3162 }
3163
3164 for (int i = 0; i < dc->count; i++) {
3165 errno_t result;
3166 result = kern_nexus_controller_free_provider_instance(ctl: ipsec_ncd, nx_uuid: dc->uuids[i]);
3167 VERIFY(!result);
3168 }
3169
3170 VERIFY(dc->pp);
3171 kern_pbufpool_destroy(dc->pp);
3172
3173 ipsec_unregister_kernel_pipe_nexus();
3174
3175 memset(s: dc, c: 0, n: sizeof(*dc));
3176}
3177
3178static errno_t
3179ipsec_enable_channel(struct ipsec_pcb *pcb, struct proc *proc)
3180{
3181 struct kern_nexus_init init;
3182 struct kern_pbufpool_init pp_init;
3183 errno_t result;
3184
3185 kauth_cred_t cred = kauth_cred_get();
3186 result = priv_check_cred(cred, PRIV_SKYWALK_REGISTER_KERNEL_PIPE, flags: 0);
3187 if (result) {
3188 return result;
3189 }
3190
3191 VERIFY(pcb->ipsec_kpipe_count);
3192 VERIFY(!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED));
3193
3194 result = ipsec_register_kernel_pipe_nexus(pcb);
3195
3196 lck_rw_lock_exclusive(lck: &pcb->ipsec_pcb_lock);
3197
3198 if (result) {
3199 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to register kernel pipe nexus\n",
3200 __func__, pcb->ipsec_if_xname);
3201 goto done;
3202 }
3203
3204 VERIFY(ipsec_ncd);
3205
3206 bzero(s: &pp_init, n: sizeof(pp_init));
3207 pp_init.kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
3208 pp_init.kbi_flags |= KBIF_VIRTUAL_DEVICE;
3209 // Note: We only needs are many packets as can be held in the tx and rx rings
3210 pp_init.kbi_packets = pcb->ipsec_netif_ring_size * 2 * pcb->ipsec_kpipe_count;
3211 pp_init.kbi_bufsize = pcb->ipsec_slot_size;
3212 pp_init.kbi_buf_seg_size = IPSEC_IF_DEFAULT_BUF_SEG_SIZE;
3213 pp_init.kbi_max_frags = 1;
3214 pp_init.kbi_flags |= KBIF_QUANTUM;
3215 (void) snprintf((char *)pp_init.kbi_name, count: sizeof(pp_init.kbi_name),
3216 "com.apple.kpipe.%s", pcb->ipsec_if_xname);
3217 pp_init.kbi_ctx = NULL;
3218 pp_init.kbi_ctx_retain = NULL;
3219 pp_init.kbi_ctx_release = NULL;
3220
3221 result = kern_pbufpool_create(&pp_init, &pcb->ipsec_kpipe_pp,
3222 NULL);
3223 if (result != 0) {
3224 os_log_error(OS_LOG_DEFAULT, "%s: %s pbufbool create failed, error %d\n",
3225 __func__, pcb->ipsec_if_xname, result);
3226 goto done;
3227 }
3228
3229 bzero(s: &init, n: sizeof(init));
3230 init.nxi_version = KERN_NEXUS_CURRENT_VERSION;
3231 init.nxi_tx_pbufpool = pcb->ipsec_kpipe_pp;
3232
3233 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
3234 VERIFY(uuid_is_null(pcb->ipsec_kpipe_uuid[i]));
3235 result = kern_nexus_controller_alloc_provider_instance(ctl: ipsec_ncd,
3236 nx_prov_uuid: ipsec_kpipe_uuid, nexus_context: pcb, NULL, nx_uuid: &pcb->ipsec_kpipe_uuid[i], init: &init);
3237
3238 if (result == 0) {
3239 nexus_port_t port = NEXUS_PORT_KERNEL_PIPE_CLIENT;
3240 const bool has_proc_uuid = !uuid_is_null(uu: pcb->ipsec_kpipe_proc_uuid);
3241 pid_t pid = pcb->ipsec_kpipe_pid;
3242 if (!pid && !has_proc_uuid) {
3243 pid = proc_pid(proc);
3244 }
3245 result = kern_nexus_controller_bind_provider_instance(ctl: ipsec_ncd,
3246 nx_uuid: pcb->ipsec_kpipe_uuid[i], port: &port,
3247 pid, exec_uuid: has_proc_uuid ? pcb->ipsec_kpipe_proc_uuid : NULL, NULL,
3248 key_len: 0, bind_flags: has_proc_uuid ? NEXUS_BIND_EXEC_UUID:NEXUS_BIND_PID);
3249 }
3250
3251 if (result) {
3252 /* Unwind all of them on error */
3253 for (int j = 0; j < IPSEC_IF_MAX_RING_COUNT; j++) {
3254 if (!uuid_is_null(uu: pcb->ipsec_kpipe_uuid[j])) {
3255 kern_nexus_controller_free_provider_instance(ctl: ipsec_ncd,
3256 nx_uuid: pcb->ipsec_kpipe_uuid[j]);
3257 uuid_clear(uu: pcb->ipsec_kpipe_uuid[j]);
3258 }
3259 }
3260 goto done;
3261 }
3262 }
3263
3264done:
3265 lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock);
3266
3267 if (result) {
3268 if (pcb->ipsec_kpipe_pp != NULL) {
3269 kern_pbufpool_destroy(pcb->ipsec_kpipe_pp);
3270 pcb->ipsec_kpipe_pp = NULL;
3271 }
3272 ipsec_unregister_kernel_pipe_nexus();
3273 } else {
3274 ipsec_flag_set(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED);
3275 }
3276
3277 return result;
3278}
3279
3280#endif // IPSEC_NEXUS
3281
3282
3283/* Kernel control functions */
3284
3285static inline int
3286ipsec_find_by_unit(u_int32_t unit)
3287{
3288 struct ipsec_pcb *next_pcb = NULL;
3289 int found = 0;
3290
3291 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
3292 if (next_pcb->ipsec_unit == unit) {
3293 found = 1;
3294 break;
3295 }
3296 }
3297
3298 return found;
3299}
3300
3301static inline void
3302ipsec_free_pcb(struct ipsec_pcb *pcb, bool locked)
3303{
3304#if IPSEC_NEXUS
3305 mbuf_freem_list(mbuf: pcb->ipsec_input_chain);
3306 pcb->ipsec_input_chain_count = 0;
3307 lck_mtx_destroy(lck: &pcb->ipsec_input_chain_lock, grp: &ipsec_lck_grp);
3308 lck_mtx_destroy(lck: &pcb->ipsec_kpipe_encrypt_lock, grp: &ipsec_lck_grp);
3309 lck_mtx_destroy(lck: &pcb->ipsec_kpipe_decrypt_lock, grp: &ipsec_lck_grp);
3310#endif // IPSEC_NEXUS
3311 lck_mtx_destroy(lck: &pcb->ipsec_pcb_data_move_lock, grp: &ipsec_lck_grp);
3312 lck_rw_destroy(lck: &pcb->ipsec_pcb_lock, grp: &ipsec_lck_grp);
3313 if (!locked) {
3314 lck_mtx_lock(lck: &ipsec_lock);
3315 }
3316 TAILQ_REMOVE(&ipsec_head, pcb, ipsec_chain);
3317 if (!locked) {
3318 lck_mtx_unlock(lck: &ipsec_lock);
3319 }
3320 zfree(ipsec_pcb_zone, pcb);
3321}
3322
3323static errno_t
3324ipsec_ctl_setup(u_int32_t *unit, void **unitinfo)
3325{
3326 if (unit == NULL || unitinfo == NULL) {
3327 return EINVAL;
3328 }
3329
3330 lck_mtx_lock(lck: &ipsec_lock);
3331
3332 /* Find next available unit */
3333 if (*unit == 0) {
3334 *unit = 1;
3335 while (*unit != ctl_maxunit) {
3336 if (ipsec_find_by_unit(unit: *unit)) {
3337 (*unit)++;
3338 } else {
3339 break;
3340 }
3341 }
3342 if (*unit == ctl_maxunit) {
3343 lck_mtx_unlock(lck: &ipsec_lock);
3344 return EBUSY;
3345 }
3346 } else if (ipsec_find_by_unit(unit: *unit)) {
3347 lck_mtx_unlock(lck: &ipsec_lock);
3348 return EBUSY;
3349 }
3350
3351 /* Find some open interface id */
3352 u_int32_t chosen_unique_id = 1;
3353 struct ipsec_pcb *next_pcb = TAILQ_LAST(&ipsec_head, ipsec_list);
3354 if (next_pcb != NULL) {
3355 /* List was not empty, add one to the last item */
3356 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
3357 next_pcb = NULL;
3358
3359 /*
3360 * If this wrapped the id number, start looking at
3361 * the front of the list for an unused id.
3362 */
3363 if (chosen_unique_id == 0) {
3364 /* Find the next unused ID */
3365 chosen_unique_id = 1;
3366 TAILQ_FOREACH(next_pcb, &ipsec_head, ipsec_chain) {
3367 if (next_pcb->ipsec_unique_id > chosen_unique_id) {
3368 /* We found a gap */
3369 break;
3370 }
3371
3372 chosen_unique_id = next_pcb->ipsec_unique_id + 1;
3373 }
3374 }
3375 }
3376
3377 struct ipsec_pcb *pcb = zalloc_flags(ipsec_pcb_zone, Z_WAITOK | Z_ZERO);
3378
3379 *unitinfo = pcb;
3380 pcb->ipsec_unit = *unit;
3381 pcb->ipsec_unique_id = chosen_unique_id;
3382
3383 if (next_pcb != NULL) {
3384 TAILQ_INSERT_BEFORE(next_pcb, pcb, ipsec_chain);
3385 } else {
3386 TAILQ_INSERT_TAIL(&ipsec_head, pcb, ipsec_chain);
3387 }
3388
3389 lck_mtx_unlock(lck: &ipsec_lock);
3390
3391 return 0;
3392}
3393
3394static errno_t
3395ipsec_ctl_bind(kern_ctl_ref kctlref,
3396 struct sockaddr_ctl *sac,
3397 void **unitinfo)
3398{
3399 if (*unitinfo == NULL) {
3400 u_int32_t unit = 0;
3401 (void)ipsec_ctl_setup(unit: &unit, unitinfo);
3402 }
3403
3404 struct ipsec_pcb *pcb = (struct ipsec_pcb *)*unitinfo;
3405 if (pcb == NULL) {
3406 return EINVAL;
3407 }
3408
3409 if (pcb->ipsec_ctlref != NULL) {
3410 // Return if bind was already called
3411 return EINVAL;
3412 }
3413
3414 /* Setup the protocol control block */
3415 pcb->ipsec_ctlref = kctlref;
3416 pcb->ipsec_unit = sac->sc_unit;
3417 pcb->ipsec_output_service_class = MBUF_SC_OAM;
3418
3419#if IPSEC_NEXUS
3420 pcb->ipsec_use_netif = false;
3421 pcb->ipsec_slot_size = IPSEC_IF_DEFAULT_SLOT_SIZE;
3422 pcb->ipsec_netif_ring_size = if_ipsec_ring_size;
3423 pcb->ipsec_tx_fsw_ring_size = if_ipsec_tx_fsw_ring_size;
3424 pcb->ipsec_rx_fsw_ring_size = if_ipsec_rx_fsw_ring_size;
3425#endif // IPSEC_NEXUS
3426
3427 lck_rw_init(lck: &pcb->ipsec_pcb_lock, grp: &ipsec_lck_grp, attr: &ipsec_lck_attr);
3428 lck_mtx_init(lck: &pcb->ipsec_pcb_data_move_lock, grp: &ipsec_lck_grp, attr: &ipsec_lck_attr);
3429#if IPSEC_NEXUS
3430 pcb->ipsec_input_chain_count = 0;
3431 lck_mtx_init(lck: &pcb->ipsec_input_chain_lock, grp: &ipsec_lck_grp, attr: &ipsec_lck_attr);
3432 lck_mtx_init(lck: &pcb->ipsec_kpipe_encrypt_lock, grp: &ipsec_lck_grp, attr: &ipsec_lck_attr);
3433 lck_mtx_init(lck: &pcb->ipsec_kpipe_decrypt_lock, grp: &ipsec_lck_grp, attr: &ipsec_lck_attr);
3434#endif // IPSEC_NEXUS
3435
3436 return 0;
3437}
3438
3439static errno_t
3440ipsec_ctl_connect(kern_ctl_ref kctlref,
3441 struct sockaddr_ctl *sac,
3442 void **unitinfo)
3443{
3444 struct ifnet_init_eparams ipsec_init = {};
3445 errno_t result = 0;
3446
3447 if (*unitinfo == NULL) {
3448 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
3449 }
3450
3451 struct ipsec_pcb *pcb = *unitinfo;
3452 if (pcb == NULL) {
3453 return EINVAL;
3454 }
3455
3456 /* Handle case where ipsec_ctl_setup() was called, but ipsec_ctl_bind() was not */
3457 if (pcb->ipsec_ctlref == NULL) {
3458 (void)ipsec_ctl_bind(kctlref, sac, unitinfo);
3459 }
3460
3461 snprintf(pcb->ipsec_if_xname, count: sizeof(pcb->ipsec_if_xname), "ipsec%d", pcb->ipsec_unit - 1);
3462 snprintf(pcb->ipsec_unique_name, count: sizeof(pcb->ipsec_unique_name), "ipsecid%d", pcb->ipsec_unique_id - 1);
3463 os_log(OS_LOG_DEFAULT, "ipsec_ctl_connect: creating interface %s (id %s)\n", pcb->ipsec_if_xname, pcb->ipsec_unique_name);
3464
3465 /* Create the interface */
3466 bzero(s: &ipsec_init, n: sizeof(ipsec_init));
3467 ipsec_init.ver = IFNET_INIT_CURRENT_VERSION;
3468 ipsec_init.len = sizeof(ipsec_init);
3469
3470#if IPSEC_NEXUS
3471 if (pcb->ipsec_use_netif) {
3472 ipsec_init.flags = (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_NX_NOAUTO);
3473 } else
3474#endif // IPSEC_NEXUS
3475 {
3476 ipsec_init.flags = IFNET_INIT_NX_NOAUTO;
3477 ipsec_init.start = ipsec_start;
3478 }
3479 ipsec_init.name = "ipsec";
3480 ipsec_init.unit = pcb->ipsec_unit - 1;
3481 ipsec_init.uniqueid = pcb->ipsec_unique_name;
3482 ipsec_init.uniqueid_len = (uint32_t)strlen(s: pcb->ipsec_unique_name);
3483 ipsec_init.family = IFNET_FAMILY_IPSEC;
3484 ipsec_init.type = IFT_OTHER;
3485 ipsec_init.demux = ipsec_demux;
3486 ipsec_init.add_proto = ipsec_add_proto;
3487 ipsec_init.del_proto = ipsec_del_proto;
3488 ipsec_init.softc = pcb;
3489 ipsec_init.ioctl = ipsec_ioctl;
3490 ipsec_init.free = ipsec_detached;
3491
3492#if IPSEC_NEXUS
3493 /* We don't support kpipes without a netif */
3494 if (pcb->ipsec_kpipe_count && !pcb->ipsec_use_netif) {
3495 result = ENOTSUP;
3496 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - kpipe requires netif: failed %d\n", result);
3497 ipsec_free_pcb(pcb, false);
3498 *unitinfo = NULL;
3499 return result;
3500 }
3501
3502 if (if_ipsec_debug != 0) {
3503 printf("%s: %s%d use_netif %d kpipe_count %d slot_size %u ring_size %u "
3504 "kpipe_tx_ring_size %u kpipe_rx_ring_size %u\n",
3505 __func__,
3506 ipsec_init.name, ipsec_init.unit,
3507 pcb->ipsec_use_netif,
3508 pcb->ipsec_kpipe_count,
3509 pcb->ipsec_slot_size,
3510 pcb->ipsec_netif_ring_size,
3511 pcb->ipsec_kpipe_tx_ring_size,
3512 pcb->ipsec_kpipe_rx_ring_size);
3513 }
3514 if (pcb->ipsec_use_netif) {
3515 if (pcb->ipsec_kpipe_count) {
3516 result = ipsec_enable_channel(pcb, proc: current_proc());
3517 if (result) {
3518 os_log_error(OS_LOG_DEFAULT, "%s: %s failed to enable channels\n",
3519 __func__, pcb->ipsec_if_xname);
3520 ipsec_free_pcb(pcb, false);
3521 *unitinfo = NULL;
3522 return result;
3523 }
3524 }
3525
3526 result = ipsec_nexus_ifattach(pcb, init_params: &ipsec_init, ifp: &pcb->ipsec_ifp);
3527 if (result != 0) {
3528 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_nexus_ifattach failed: %d\n", result);
3529 ipsec_free_pcb(pcb, false);
3530 *unitinfo = NULL;
3531 return result;
3532 }
3533
3534 result = ipsec_flowswitch_attach(pcb);
3535 if (result != 0) {
3536 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ipsec_flowswitch_attach failed: %d\n", result);
3537 // Do not call ipsec_free_pcb(). We will be attached already, and will be freed later
3538 // in ipsec_detached().
3539 *unitinfo = NULL;
3540 return result;
3541 }
3542
3543 /* Attach to bpf */
3544 bpfattach(interface: pcb->ipsec_ifp, DLT_RAW, header_length: 0);
3545 } else
3546#endif // IPSEC_NEXUS
3547 {
3548 result = ifnet_allocate_extended(init: &ipsec_init, interface: &pcb->ipsec_ifp);
3549 if (result != 0) {
3550 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_allocate failed: %d\n", result);
3551 ipsec_free_pcb(pcb, false);
3552 *unitinfo = NULL;
3553 return result;
3554 }
3555 ipsec_ifnet_set_attrs(ifp: pcb->ipsec_ifp);
3556
3557 /* Attach the interface */
3558 result = ifnet_attach(interface: pcb->ipsec_ifp, NULL);
3559 if (result != 0) {
3560 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_connect - ifnet_attach failed: %d\n", result);
3561 ifnet_release(interface: pcb->ipsec_ifp);
3562 ipsec_free_pcb(pcb, false);
3563 *unitinfo = NULL;
3564 return result;
3565 }
3566
3567 /* Attach to bpf */
3568 bpfattach(interface: pcb->ipsec_ifp, DLT_NULL, header_length: 0);
3569 }
3570
3571#if IPSEC_NEXUS
3572 /*
3573 * Mark the data path as ready.
3574 * If kpipe nexus is being used then the data path is marked ready only when a kpipe channel is connected.
3575 */
3576 if (pcb->ipsec_kpipe_count == 0) {
3577 lck_mtx_lock(lck: &pcb->ipsec_pcb_data_move_lock);
3578 IPSEC_SET_DATA_PATH_READY(pcb);
3579 lck_mtx_unlock(lck: &pcb->ipsec_pcb_data_move_lock);
3580 }
3581#endif
3582
3583 /* The interfaces resoures allocated, mark it as running */
3584 ifnet_set_flags(interface: pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING);
3585
3586 return 0;
3587}
3588
3589static errno_t
3590ipsec_detach_ip(ifnet_t interface,
3591 protocol_family_t protocol,
3592 socket_t pf_socket)
3593{
3594 errno_t result = EPROTONOSUPPORT;
3595
3596 /* Attempt a detach */
3597 if (protocol == PF_INET) {
3598 struct ifreq ifr;
3599
3600 bzero(s: &ifr, n: sizeof(ifr));
3601 snprintf(ifr.ifr_name, count: sizeof(ifr.ifr_name), "%s%d",
3602 ifnet_name(interface), ifnet_unit(interface));
3603
3604 result = sock_ioctl(so: pf_socket, SIOCPROTODETACH, argp: &ifr);
3605 } else if (protocol == PF_INET6) {
3606 struct in6_ifreq ifr6;
3607
3608 bzero(s: &ifr6, n: sizeof(ifr6));
3609 snprintf(ifr6.ifr_name, count: sizeof(ifr6.ifr_name), "%s%d",
3610 ifnet_name(interface), ifnet_unit(interface));
3611
3612 result = sock_ioctl(so: pf_socket, SIOCPROTODETACH_IN6, argp: &ifr6);
3613 }
3614
3615 return result;
3616}
3617
3618static void
3619ipsec_remove_address(ifnet_t interface,
3620 protocol_family_t protocol,
3621 ifaddr_t address,
3622 socket_t pf_socket)
3623{
3624 errno_t result = 0;
3625
3626 /* Attempt a detach */
3627 if (protocol == PF_INET) {
3628 struct ifreq ifr;
3629
3630 bzero(s: &ifr, n: sizeof(ifr));
3631 snprintf(ifr.ifr_name, count: sizeof(ifr.ifr_name), "%s%d",
3632 ifnet_name(interface), ifnet_unit(interface));
3633 result = ifaddr_address(ifaddr: address, out_addr: &ifr.ifr_addr, addr_size: sizeof(ifr.ifr_addr));
3634 if (result != 0) {
3635 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed: %d", result);
3636 } else {
3637 result = sock_ioctl(so: pf_socket, SIOCDIFADDR, argp: &ifr);
3638 if (result != 0) {
3639 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR failed: %d", result);
3640 }
3641 }
3642 } else if (protocol == PF_INET6) {
3643 struct in6_ifreq ifr6;
3644
3645 bzero(s: &ifr6, n: sizeof(ifr6));
3646 snprintf(ifr6.ifr_name, count: sizeof(ifr6.ifr_name), "%s%d",
3647 ifnet_name(interface), ifnet_unit(interface));
3648 result = ifaddr_address(ifaddr: address, out_addr: (struct sockaddr*)&ifr6.ifr_addr,
3649 addr_size: sizeof(ifr6.ifr_addr));
3650 if (result != 0) {
3651 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - ifaddr_address failed (v6): %d",
3652 result);
3653 } else {
3654 result = sock_ioctl(so: pf_socket, SIOCDIFADDR_IN6, argp: &ifr6);
3655 if (result != 0) {
3656 os_log_error(OS_LOG_DEFAULT, "ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d",
3657 result);
3658 }
3659 }
3660 }
3661}
3662
3663static void
3664ipsec_cleanup_family(ifnet_t interface,
3665 protocol_family_t protocol)
3666{
3667 errno_t result = 0;
3668 socket_t pf_socket = NULL;
3669 ifaddr_t *addresses = NULL;
3670 int i;
3671
3672 if (protocol != PF_INET && protocol != PF_INET6) {
3673 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - invalid protocol family %d\n", protocol);
3674 return;
3675 }
3676
3677 /* Create a socket for removing addresses and detaching the protocol */
3678 result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
3679 if (result != 0) {
3680 if (result != EAFNOSUPPORT) {
3681 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - failed to create %s socket: %d\n",
3682 protocol == PF_INET ? "IP" : "IPv6", result);
3683 }
3684 goto cleanup;
3685 }
3686
3687 /* always set SS_PRIV, we want to close and detach regardless */
3688 sock_setpriv(so: pf_socket, on: 1);
3689
3690 result = ipsec_detach_ip(interface, protocol, pf_socket);
3691 if (result == 0 || result == ENXIO) {
3692 /* We are done! We either detached or weren't attached. */
3693 goto cleanup;
3694 } else if (result != EBUSY) {
3695 /* Uh, not really sure what happened here... */
3696 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
3697 goto cleanup;
3698 }
3699
3700 /*
3701 * At this point, we received an EBUSY error. This means there are
3702 * addresses attached. We should detach them and then try again.
3703 */
3704 result = ifnet_get_address_list_family(interface, addresses: &addresses, family: (sa_family_t)protocol);
3705 if (result != 0) {
3706 os_log_error(OS_LOG_DEFAULT, "fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
3707 ifnet_name(interface), ifnet_unit(interface),
3708 protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
3709 goto cleanup;
3710 }
3711
3712 for (i = 0; addresses[i] != 0; i++) {
3713 ipsec_remove_address(interface, protocol, address: addresses[i], pf_socket);
3714 }
3715 ifnet_free_address_list(addresses);
3716 addresses = NULL;
3717
3718 /*
3719 * The addresses should be gone, we should try the remove again.
3720 */
3721 result = ipsec_detach_ip(interface, protocol, pf_socket);
3722 if (result != 0 && result != ENXIO) {
3723 os_log_error(OS_LOG_DEFAULT, "ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result);
3724 }
3725
3726cleanup:
3727 if (pf_socket != NULL) {
3728 sock_close(so: pf_socket);
3729 }
3730
3731 if (addresses != NULL) {
3732 ifnet_free_address_list(addresses);
3733 }
3734}
3735
3736static errno_t
3737ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref,
3738 __unused u_int32_t unit,
3739 void *unitinfo)
3740{
3741 struct ipsec_pcb *pcb = unitinfo;
3742 ifnet_t ifp = NULL;
3743 errno_t result = 0;
3744
3745 if (pcb == NULL) {
3746 return EINVAL;
3747 }
3748
3749 /* Wait until all threads in the data paths are done. */
3750 ipsec_wait_data_move_drain(pcb);
3751
3752#if IPSEC_NEXUS
3753 // Tell the nexus to stop all rings
3754 if (pcb->ipsec_netif_nexus != NULL) {
3755 kern_nexus_stop(nx: pcb->ipsec_netif_nexus);
3756 }
3757#endif // IPSEC_NEXUS
3758
3759 lck_rw_lock_exclusive(lck: &pcb->ipsec_pcb_lock);
3760
3761#if IPSEC_NEXUS
3762 if (if_ipsec_debug != 0) {
3763 printf("ipsec_ctl_disconnect: detaching interface %s (id %s)\n",
3764 pcb->ipsec_if_xname, pcb->ipsec_unique_name);
3765 }
3766
3767 struct ipsec_detached_channels dc;
3768 ipsec_detach_channels(pcb, dc: &dc);
3769#endif // IPSEC_NEXUS
3770
3771 pcb->ipsec_ctlref = NULL;
3772
3773 ifp = pcb->ipsec_ifp;
3774 if (ifp != NULL) {
3775#if IPSEC_NEXUS
3776 if (pcb->ipsec_netif_nexus != NULL) {
3777 /*
3778 * Quiesce the interface and flush any pending outbound packets.
3779 */
3780 if_down(ifp);
3781
3782 /*
3783 * Suspend data movement and wait for IO threads to exit.
3784 * We can't rely on the logic in dlil_quiesce_and_detach_nexuses() to
3785 * do this because ipsec nexuses are attached/detached separately.
3786 */
3787 ifnet_datamov_suspend_and_drain(ifp);
3788 if ((result = ifnet_detach(interface: ifp)) != 0) {
3789 panic("ipsec_ctl_disconnect - ifnet_detach failed: %d", result);
3790 /* NOT REACHED */
3791 }
3792
3793 /*
3794 * We want to do everything in our power to ensure that the interface
3795 * really goes away when the socket is closed. We must remove IP/IPv6
3796 * addresses and detach the protocols. Finally, we can remove and
3797 * release the interface.
3798 */
3799 key_delsp_for_ipsec_if(ipsec_if: ifp);
3800
3801 ipsec_cleanup_family(interface: ifp, AF_INET);
3802 ipsec_cleanup_family(interface: ifp, AF_INET6);
3803
3804 lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock);
3805
3806 ipsec_free_channels(dc: &dc);
3807
3808 ipsec_nexus_detach(pcb);
3809
3810 /* Decrement refcnt added by ifnet_datamov_suspend_and_drain(). */
3811 ifnet_datamov_resume(ifp);
3812 } else
3813#endif // IPSEC_NEXUS
3814 {
3815 lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock);
3816
3817#if IPSEC_NEXUS
3818 ipsec_free_channels(dc: &dc);
3819#endif // IPSEC_NEXUS
3820
3821 /*
3822 * We want to do everything in our power to ensure that the interface
3823 * really goes away when the socket is closed. We must remove IP/IPv6
3824 * addresses and detach the protocols. Finally, we can remove and
3825 * release the interface.
3826 */
3827 key_delsp_for_ipsec_if(ipsec_if: ifp);
3828
3829 ipsec_cleanup_family(interface: ifp, AF_INET);
3830 ipsec_cleanup_family(interface: ifp, AF_INET6);
3831
3832 /*
3833 * Detach now; ipsec_detach() will be called asynchronously once
3834 * the I/O reference count drops to 0. There we will invoke
3835 * ifnet_release().
3836 */
3837 if ((result = ifnet_detach(interface: ifp)) != 0) {
3838 os_log_error(OS_LOG_DEFAULT, "ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result);
3839 }
3840 }
3841 } else {
3842 // Bound, but not connected
3843 lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock);
3844 ipsec_free_pcb(pcb, false);
3845 }
3846
3847 return 0;
3848}
3849
3850static errno_t
3851ipsec_ctl_send(__unused kern_ctl_ref kctlref,
3852 __unused u_int32_t unit,
3853 __unused void *unitinfo,
3854 mbuf_t m,
3855 __unused int flags)
3856{
3857 /* Receive messages from the control socket. Currently unused. */
3858 mbuf_freem(mbuf: m);
3859 return 0;
3860}
3861
3862static errno_t
3863ipsec_ctl_setopt(__unused kern_ctl_ref kctlref,
3864 __unused u_int32_t unit,
3865 void *unitinfo,
3866 int opt,
3867 void *data,
3868 size_t len)
3869{
3870 errno_t result = 0;
3871 struct ipsec_pcb *pcb = unitinfo;
3872 if (pcb == NULL) {
3873 return EINVAL;
3874 }
3875
3876 /* check for privileges for privileged options */
3877 switch (opt) {
3878 case IPSEC_OPT_FLAGS:
3879 case IPSEC_OPT_EXT_IFDATA_STATS:
3880 case IPSEC_OPT_SET_DELEGATE_INTERFACE:
3881 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS:
3882 case IPSEC_OPT_OUTPUT_DSCP_MAPPING:
3883 if (kauth_cred_issuser(cred: kauth_cred_get()) == 0) {
3884 return EPERM;
3885 }
3886 break;
3887 }
3888
3889 switch (opt) {
3890 case IPSEC_OPT_FLAGS: {
3891 if (len != sizeof(u_int32_t)) {
3892 result = EMSGSIZE;
3893 } else {
3894 pcb->ipsec_external_flags = *(u_int32_t *)data;
3895 }
3896 break;
3897 }
3898
3899 case IPSEC_OPT_EXT_IFDATA_STATS: {
3900 if (len != sizeof(int)) {
3901 result = EMSGSIZE;
3902 break;
3903 }
3904 if (pcb->ipsec_ifp == NULL) {
3905 // Only can set after connecting
3906 result = EINVAL;
3907 break;
3908 }
3909 pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0;
3910 break;
3911 }
3912
3913 case IPSEC_OPT_INC_IFDATA_STATS_IN:
3914 case IPSEC_OPT_INC_IFDATA_STATS_OUT: {
3915 struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data;
3916
3917 if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) {
3918 result = EINVAL;
3919 break;
3920 }
3921 if (pcb->ipsec_ifp == NULL) {
3922 // Only can set after connecting
3923 result = EINVAL;
3924 break;
3925 }
3926 if (!pcb->ipsec_ext_ifdata_stats) {
3927 result = EINVAL;
3928 break;
3929 }
3930 if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) {
3931 ifnet_stat_increment_in(interface: pcb->ipsec_ifp, packets_in: (uint32_t)utsp->utsp_packets,
3932 bytes_in: (uint32_t)utsp->utsp_bytes, errors_in: (uint32_t)utsp->utsp_errors);
3933 } else {
3934 ifnet_stat_increment_out(interface: pcb->ipsec_ifp, packets_out: (uint32_t)utsp->utsp_packets,
3935 bytes_out: (uint32_t)utsp->utsp_bytes, errors_out: (uint32_t)utsp->utsp_errors);
3936 }
3937 break;
3938 }
3939
3940 case IPSEC_OPT_SET_DELEGATE_INTERFACE: {
3941 ifnet_t del_ifp = NULL;
3942 char name[IFNAMSIZ];
3943
3944 if (len > IFNAMSIZ - 1) {
3945 result = EMSGSIZE;
3946 break;
3947 }
3948 if (pcb->ipsec_ifp == NULL) {
3949 // Only can set after connecting
3950 result = EINVAL;
3951 break;
3952 }
3953 if (len != 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */
3954 bcopy(src: data, dst: name, n: len);
3955 name[len] = 0;
3956 result = ifnet_find_by_name(ifname: name, interface: &del_ifp);
3957 }
3958 if (result == 0) {
3959 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_SET_DELEGATE_INTERFACE %s to %s\n",
3960 __func__, pcb->ipsec_ifp->if_xname,
3961 del_ifp ? del_ifp->if_xname : "NULL");
3962
3963 result = ifnet_set_delegate(ifp: pcb->ipsec_ifp, delegated_ifp: del_ifp);
3964 if (del_ifp) {
3965 ifnet_release(interface: del_ifp);
3966 }
3967 }
3968 break;
3969 }
3970
3971 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
3972 if (len != sizeof(int)) {
3973 result = EMSGSIZE;
3974 break;
3975 }
3976 if (pcb->ipsec_ifp == NULL) {
3977 // Only can set after connecting
3978 result = EINVAL;
3979 break;
3980 }
3981 mbuf_svc_class_t output_service_class = so_tc2msc(*(int *)data);
3982 if (output_service_class == MBUF_SC_UNSPEC) {
3983 pcb->ipsec_output_service_class = MBUF_SC_OAM;
3984 } else {
3985 pcb->ipsec_output_service_class = output_service_class;
3986 }
3987 os_log_error(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_TRAFFIC_CLASS %s svc %d\n",
3988 __func__, pcb->ipsec_ifp->if_xname,
3989 pcb->ipsec_output_service_class);
3990 break;
3991 }
3992
3993#if IPSEC_NEXUS
3994 case IPSEC_OPT_ENABLE_CHANNEL: {
3995 if (len != sizeof(int)) {
3996 result = EMSGSIZE;
3997 break;
3998 }
3999 if (pcb->ipsec_ifp != NULL) {
4000 // Only can set before connecting
4001 result = EINVAL;
4002 break;
4003 }
4004 if ((*(int *)data) != 0 &&
4005 (*(int *)data) != 1 &&
4006 (*(int *)data) != IPSEC_IF_WMM_RING_COUNT) {
4007 result = EINVAL;
4008 break;
4009 }
4010 lck_rw_lock_exclusive(lck: &pcb->ipsec_pcb_lock);
4011 pcb->ipsec_kpipe_count = *(int *)data;
4012 lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock);
4013 break;
4014 }
4015
4016 case IPSEC_OPT_CHANNEL_BIND_PID: {
4017 if (len != sizeof(pid_t)) {
4018 result = EMSGSIZE;
4019 break;
4020 }
4021 if (pcb->ipsec_ifp != NULL) {
4022 // Only can set before connecting
4023 result = EINVAL;
4024 break;
4025 }
4026 lck_rw_lock_exclusive(lck: &pcb->ipsec_pcb_lock);
4027 pcb->ipsec_kpipe_pid = *(pid_t *)data;
4028 lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock);
4029 break;
4030 }
4031
4032 case IPSEC_OPT_CHANNEL_BIND_UUID: {
4033 if (len != sizeof(uuid_t)) {
4034 result = EMSGSIZE;
4035 break;
4036 }
4037 if (pcb->ipsec_ifp != NULL) {
4038 // Only can set before connecting
4039 result = EINVAL;
4040 break;
4041 }
4042 lck_rw_lock_exclusive(lck: &pcb->ipsec_pcb_lock);
4043 uuid_copy(dst: pcb->ipsec_kpipe_proc_uuid, src: *((uuid_t *)data));
4044 lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock);
4045 break;
4046 }
4047
4048 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
4049 if (len != sizeof(int)) {
4050 result = EMSGSIZE;
4051 break;
4052 }
4053 if (pcb->ipsec_ifp == NULL) {
4054 // Only can set after connecting
4055 result = EINVAL;
4056 break;
4057 }
4058 if (!if_is_fsw_transport_netagent_enabled()) {
4059 result = ENOTSUP;
4060 break;
4061 }
4062 if (uuid_is_null(uu: pcb->ipsec_nx.fsw_agent)) {
4063 result = ENOENT;
4064 break;
4065 }
4066
4067 uint32_t flags = netagent_get_flags(uuid: pcb->ipsec_nx.fsw_agent);
4068
4069 if (*(int *)data) {
4070 flags |= (NETAGENT_FLAG_NEXUS_PROVIDER |
4071 NETAGENT_FLAG_NEXUS_LISTENER);
4072 result = netagent_set_flags(uuid: pcb->ipsec_nx.fsw_agent, flags);
4073 pcb->ipsec_needs_netagent = true;
4074 } else {
4075 pcb->ipsec_needs_netagent = false;
4076 flags &= ~(NETAGENT_FLAG_NEXUS_PROVIDER |
4077 NETAGENT_FLAG_NEXUS_LISTENER);
4078 result = netagent_set_flags(uuid: pcb->ipsec_nx.fsw_agent, flags);
4079 }
4080 break;
4081 }
4082
4083 case IPSEC_OPT_INPUT_FRAG_SIZE: {
4084 if (len != sizeof(u_int32_t)) {
4085 result = EMSGSIZE;
4086 break;
4087 }
4088 u_int32_t input_frag_size = *(u_int32_t *)data;
4089 if (input_frag_size <= sizeof(struct ip6_hdr)) {
4090 pcb->ipsec_frag_size_set = FALSE;
4091 pcb->ipsec_input_frag_size = 0;
4092 } else {
4093 pcb->ipsec_frag_size_set = TRUE;
4094 pcb->ipsec_input_frag_size = input_frag_size;
4095 }
4096 break;
4097 }
4098 case IPSEC_OPT_ENABLE_NETIF: {
4099 if (len != sizeof(int)) {
4100 result = EMSGSIZE;
4101 break;
4102 }
4103 if (pcb->ipsec_ifp != NULL) {
4104 // Only can set before connecting
4105 result = EINVAL;
4106 break;
4107 }
4108 lck_rw_lock_exclusive(lck: &pcb->ipsec_pcb_lock);
4109 pcb->ipsec_use_netif = !!(*(int *)data);
4110 lck_rw_unlock_exclusive(lck: &pcb->ipsec_pcb_lock);
4111 break;
4112 }
4113 case IPSEC_OPT_SLOT_SIZE: {
4114 if (len != sizeof(u_int32_t)) {
4115 result = EMSGSIZE;
4116 break;
4117 }
4118 if (pcb->ipsec_ifp != NULL) {
4119 // Only can set before connecting
4120 result = EINVAL;
4121 break;
4122 }
4123 u_int32_t slot_size = *(u_int32_t *)data;
4124 if (slot_size < IPSEC_IF_MIN_SLOT_SIZE ||
4125 slot_size > IPSEC_IF_MAX_SLOT_SIZE) {
4126 return EINVAL;
4127 }
4128 pcb->ipsec_slot_size = slot_size;
4129 if (if_ipsec_debug != 0) {
4130 printf("%s: IPSEC_OPT_SLOT_SIZE %u\n", __func__, slot_size);
4131 }
4132 break;
4133 }
4134 case IPSEC_OPT_NETIF_RING_SIZE: {
4135 if (len != sizeof(u_int32_t)) {
4136 result = EMSGSIZE;
4137 break;
4138 }
4139 if (pcb->ipsec_ifp != NULL) {
4140 // Only can set before connecting
4141 result = EINVAL;
4142 break;
4143 }
4144 u_int32_t ring_size = *(u_int32_t *)data;
4145 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
4146 ring_size > IPSEC_IF_MAX_RING_SIZE) {
4147 return EINVAL;
4148 }
4149 pcb->ipsec_netif_ring_size = ring_size;
4150 if (if_ipsec_debug != 0) {
4151 printf("%s: IPSEC_OPT_NETIF_RING_SIZE %u\n", __func__, ring_size);
4152 }
4153 break;
4154 }
4155 case IPSEC_OPT_TX_FSW_RING_SIZE: {
4156 if (len != sizeof(u_int32_t)) {
4157 result = EMSGSIZE;
4158 break;
4159 }
4160 if (pcb->ipsec_ifp != NULL) {
4161 // Only can set before connecting
4162 result = EINVAL;
4163 break;
4164 }
4165 u_int32_t ring_size = *(u_int32_t *)data;
4166 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
4167 ring_size > IPSEC_IF_MAX_RING_SIZE) {
4168 return EINVAL;
4169 }
4170 pcb->ipsec_tx_fsw_ring_size = ring_size;
4171 if (if_ipsec_debug != 0) {
4172 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
4173 }
4174 break;
4175 }
4176 case IPSEC_OPT_RX_FSW_RING_SIZE: {
4177 if (len != sizeof(u_int32_t)) {
4178 result = EMSGSIZE;
4179 break;
4180 }
4181 if (pcb->ipsec_ifp != NULL) {
4182 // Only can set before connecting
4183 result = EINVAL;
4184 break;
4185 }
4186 u_int32_t ring_size = *(u_int32_t *)data;
4187 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
4188 ring_size > IPSEC_IF_MAX_RING_SIZE) {
4189 return EINVAL;
4190 }
4191 pcb->ipsec_rx_fsw_ring_size = ring_size;
4192 if (if_ipsec_debug != 0) {
4193 printf("%s: IPSEC_OPT_TX_FSW_RING_SIZE %u\n", __func__, ring_size);
4194 }
4195 break;
4196 }
4197 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
4198 if (len != sizeof(u_int32_t)) {
4199 result = EMSGSIZE;
4200 break;
4201 }
4202 if (pcb->ipsec_ifp != NULL) {
4203 // Only can set before connecting
4204 result = EINVAL;
4205 break;
4206 }
4207 u_int32_t ring_size = *(u_int32_t *)data;
4208 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
4209 ring_size > IPSEC_IF_MAX_RING_SIZE) {
4210 return EINVAL;
4211 }
4212 pcb->ipsec_kpipe_tx_ring_size = ring_size;
4213 if (if_ipsec_debug != 0) {
4214 printf("%s: IPSEC_OPT_KPIPE_TX_RING_SIZE %u\n", __func__, ring_size);
4215 }
4216 break;
4217 }
4218 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
4219 if (len != sizeof(u_int32_t)) {
4220 result = EMSGSIZE;
4221 break;
4222 }
4223 if (pcb->ipsec_ifp != NULL) {
4224 // Only can set before connecting
4225 result = EINVAL;
4226 break;
4227 }
4228 u_int32_t ring_size = *(u_int32_t *)data;
4229 if (ring_size < IPSEC_IF_MIN_RING_SIZE ||
4230 ring_size > IPSEC_IF_MAX_RING_SIZE) {
4231 return EINVAL;
4232 }
4233 pcb->ipsec_kpipe_rx_ring_size = ring_size;
4234 if (if_ipsec_debug != 0) {
4235 printf("%s: IPSEC_OPT_KPIPE_RX_RING_SIZE %u\n", __func__, ring_size);
4236 }
4237 break;
4238 }
4239 case IPSEC_OPT_OUTPUT_DSCP_MAPPING: {
4240 if (len != sizeof(int)) {
4241 result = EMSGSIZE;
4242 break;
4243 }
4244 if (pcb->ipsec_ifp == NULL) {
4245 // Only can set after connecting
4246 result = EINVAL;
4247 break;
4248 }
4249
4250 ipsec_dscp_mapping_t output_dscp_mapping = (ipsec_dscp_mapping_t)(*(int *)data);
4251 if (output_dscp_mapping > IPSEC_DSCP_MAPPING_LEGACY) {
4252 return EINVAL;
4253 }
4254
4255 pcb->ipsec_output_dscp_mapping = output_dscp_mapping;
4256
4257 os_log(OS_LOG_DEFAULT, "%s IPSEC_OPT_OUTPUT_DSCP_MAPPING %s DSCP %d\n",
4258 __func__, pcb->ipsec_ifp->if_xname,
4259 pcb->ipsec_output_dscp_mapping);
4260 break;
4261 }
4262
4263#endif // IPSEC_NEXUS
4264
4265 default: {
4266 result = ENOPROTOOPT;
4267 break;
4268 }
4269 }
4270
4271 return result;
4272}
4273
4274static errno_t
4275ipsec_ctl_getopt(__unused kern_ctl_ref kctlref,
4276 __unused u_int32_t unit,
4277 void *unitinfo,
4278 int opt,
4279 void *data,
4280 size_t *len)
4281{
4282 errno_t result = 0;
4283 struct ipsec_pcb *pcb = unitinfo;
4284 if (pcb == NULL) {
4285 return EINVAL;
4286 }
4287
4288 switch (opt) {
4289 case IPSEC_OPT_FLAGS: {
4290 if (*len != sizeof(u_int32_t)) {
4291 result = EMSGSIZE;
4292 } else {
4293 *(u_int32_t *)data = pcb->ipsec_external_flags;
4294 }
4295 break;
4296 }
4297
4298 case IPSEC_OPT_EXT_IFDATA_STATS: {
4299 if (*len != sizeof(int)) {
4300 result = EMSGSIZE;
4301 } else {
4302 *(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0;
4303 }
4304 break;
4305 }
4306
4307 case IPSEC_OPT_IFNAME: {
4308 if (*len < MIN(strlen(pcb->ipsec_if_xname) + 1, sizeof(pcb->ipsec_if_xname))) {
4309 result = EMSGSIZE;
4310 } else {
4311 if (pcb->ipsec_ifp == NULL) {
4312 // Only can get after connecting
4313 result = EINVAL;
4314 break;
4315 }
4316 *len = scnprintf(data, count: *len, "%s", pcb->ipsec_if_xname) + 1;
4317 }
4318 break;
4319 }
4320
4321 case IPSEC_OPT_OUTPUT_TRAFFIC_CLASS: {
4322 if (*len != sizeof(int)) {
4323 result = EMSGSIZE;
4324 } else {
4325 *(int *)data = so_svc2tc(pcb->ipsec_output_service_class);
4326 }
4327 break;
4328 }
4329
4330#if IPSEC_NEXUS
4331
4332 case IPSEC_OPT_ENABLE_CHANNEL: {
4333 if (*len != sizeof(int)) {
4334 result = EMSGSIZE;
4335 } else {
4336 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
4337 *(int *)data = pcb->ipsec_kpipe_count;
4338 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
4339 }
4340 break;
4341 }
4342
4343 case IPSEC_OPT_CHANNEL_BIND_PID: {
4344 if (*len != sizeof(pid_t)) {
4345 result = EMSGSIZE;
4346 } else {
4347 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
4348 *(pid_t *)data = pcb->ipsec_kpipe_pid;
4349 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
4350 }
4351 break;
4352 }
4353
4354 case IPSEC_OPT_CHANNEL_BIND_UUID: {
4355 if (*len != sizeof(uuid_t)) {
4356 result = EMSGSIZE;
4357 } else {
4358 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
4359 uuid_copy(dst: *((uuid_t *)data), src: pcb->ipsec_kpipe_proc_uuid);
4360 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
4361 }
4362 break;
4363 }
4364
4365 case IPSEC_OPT_ENABLE_FLOWSWITCH: {
4366 if (*len != sizeof(int)) {
4367 result = EMSGSIZE;
4368 } else {
4369 *(int *)data = if_check_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.fsw_agent);
4370 }
4371 break;
4372 }
4373
4374 case IPSEC_OPT_ENABLE_NETIF: {
4375 if (*len != sizeof(int)) {
4376 result = EMSGSIZE;
4377 } else {
4378 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
4379 *(int *)data = !!pcb->ipsec_use_netif;
4380 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
4381 }
4382 break;
4383 }
4384
4385 case IPSEC_OPT_GET_CHANNEL_UUID: {
4386 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
4387 if (!ipsec_flag_isset(pcb, IPSEC_FLAGS_KPIPE_ALLOCATED)) {
4388 result = ENXIO;
4389 } else if (*len != sizeof(uuid_t) * pcb->ipsec_kpipe_count) {
4390 result = EMSGSIZE;
4391 } else {
4392 for (unsigned int i = 0; i < pcb->ipsec_kpipe_count; i++) {
4393 uuid_copy(dst: ((uuid_t *)data)[i], src: pcb->ipsec_kpipe_uuid[i]);
4394 }
4395 }
4396 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
4397 break;
4398 }
4399
4400 case IPSEC_OPT_INPUT_FRAG_SIZE: {
4401 if (*len != sizeof(u_int32_t)) {
4402 result = EMSGSIZE;
4403 } else {
4404 *(u_int32_t *)data = pcb->ipsec_input_frag_size;
4405 }
4406 break;
4407 }
4408 case IPSEC_OPT_SLOT_SIZE: {
4409 if (*len != sizeof(u_int32_t)) {
4410 result = EMSGSIZE;
4411 } else {
4412 *(u_int32_t *)data = pcb->ipsec_slot_size;
4413 }
4414 break;
4415 }
4416 case IPSEC_OPT_NETIF_RING_SIZE: {
4417 if (*len != sizeof(u_int32_t)) {
4418 result = EMSGSIZE;
4419 } else {
4420 *(u_int32_t *)data = pcb->ipsec_netif_ring_size;
4421 }
4422 break;
4423 }
4424 case IPSEC_OPT_TX_FSW_RING_SIZE: {
4425 if (*len != sizeof(u_int32_t)) {
4426 result = EMSGSIZE;
4427 } else {
4428 *(u_int32_t *)data = pcb->ipsec_tx_fsw_ring_size;
4429 }
4430 break;
4431 }
4432 case IPSEC_OPT_RX_FSW_RING_SIZE: {
4433 if (*len != sizeof(u_int32_t)) {
4434 result = EMSGSIZE;
4435 } else {
4436 *(u_int32_t *)data = pcb->ipsec_rx_fsw_ring_size;
4437 }
4438 break;
4439 }
4440 case IPSEC_OPT_KPIPE_TX_RING_SIZE: {
4441 if (*len != sizeof(u_int32_t)) {
4442 result = EMSGSIZE;
4443 } else {
4444 *(u_int32_t *)data = pcb->ipsec_kpipe_tx_ring_size;
4445 }
4446 break;
4447 }
4448 case IPSEC_OPT_KPIPE_RX_RING_SIZE: {
4449 if (*len != sizeof(u_int32_t)) {
4450 result = EMSGSIZE;
4451 } else {
4452 *(u_int32_t *)data = pcb->ipsec_kpipe_rx_ring_size;
4453 }
4454 break;
4455 }
4456
4457#endif // IPSEC_NEXUS
4458
4459 default: {
4460 result = ENOPROTOOPT;
4461 break;
4462 }
4463 }
4464
4465 return result;
4466}
4467
4468/* Network Interface functions */
4469static errno_t
4470ipsec_output(ifnet_t interface,
4471 mbuf_t data)
4472{
4473 struct ipsec_pcb *pcb = ifnet_softc(interface);
4474 struct ipsec_output_state ipsec_state;
4475 struct route ro;
4476 struct route_in6 ro6;
4477 size_t length;
4478 struct ip *ip = NULL;
4479 struct ip6_hdr *ip6 = NULL;
4480 struct ip_out_args ipoa;
4481 struct ip6_out_args ip6oa;
4482 int error = 0;
4483 u_int ip_version = 0;
4484 int flags = 0;
4485 struct flowadv *adv = NULL;
4486
4487 // Make sure this packet isn't looping through the interface
4488 if (necp_get_last_interface_index_from_packet(packet: data) == interface->if_index) {
4489 error = EINVAL;
4490 goto ipsec_output_err;
4491 }
4492
4493 // Mark the interface so NECP can evaluate tunnel policy
4494 necp_mark_packet_from_interface(packet: data, interface);
4495
4496 if (data->m_len < sizeof(*ip)) {
4497 os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IP header length: %d.\n", data->m_len);
4498 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
4499 error = EINVAL;
4500 goto ipsec_output_err;
4501 }
4502
4503 ip = mtod(data, struct ip *);
4504 ip_version = ip->ip_v;
4505
4506 switch (ip_version) {
4507 case 4: {
4508 u_int8_t ip_hlen = 0;
4509#ifdef _IP_VHL
4510 ip_hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
4511#else
4512 ip_hlen = (uint8_t)(ip->ip_hl << 2);
4513#endif
4514 if (ip_hlen < sizeof(*ip)) {
4515 os_log_error(OS_LOG_DEFAULT, "ipsec_output: Bad ip header length %d.\n", ip_hlen);
4516 IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
4517 error = EINVAL;
4518 goto ipsec_output_err;
4519 }
4520#if IPSEC_NEXUS
4521 if (!pcb->ipsec_use_netif)
4522#endif // IPSEC_NEXUS
4523 {
4524 int af = AF_INET;
4525 bpf_tap_out(interface: pcb->ipsec_ifp, DLT_NULL, packet: data, header: &af, header_len: sizeof(af));
4526 }
4527
4528 /* Apply encryption */
4529 memset(s: &ipsec_state, c: 0, n: sizeof(ipsec_state));
4530 ipsec_state.m = data;
4531 ipsec_state.dst = (struct sockaddr *)&ip->ip_dst;
4532 memset(s: &ipsec_state.ro, c: 0, n: sizeof(ipsec_state.ro));
4533 ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
4534
4535 error = ipsec4_interface_output(state: &ipsec_state, interface);
4536 /* Tunneled in IPv6 - packet is gone */
4537 if (error == 0 && ipsec_state.tunneled == 6) {
4538 goto done;
4539 }
4540
4541 data = ipsec_state.m;
4542 if (error || data == NULL) {
4543 if (error) {
4544 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec4_output error %d.\n", error);
4545 }
4546 goto ipsec_output_err;
4547 }
4548
4549 /* Set traffic class, set flow */
4550 m_set_service_class(data, pcb->ipsec_output_service_class);
4551 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4552#if SKYWALK
4553 data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4554#else /* !SKYWALK */
4555 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
4556#endif /* !SKYWALK */
4557 data->m_pkthdr.pkt_proto = ip->ip_p;
4558 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4559
4560 /* Flip endian-ness for ip_output */
4561 ip = mtod(data, struct ip *);
4562 NTOHS(ip->ip_len);
4563 NTOHS(ip->ip_off);
4564
4565 /* Increment statistics */
4566 length = mbuf_pkthdr_len(mbuf: data);
4567 ifnet_stat_increment_out(interface, packets_out: 1, bytes_out: (uint16_t)length, errors_out: 0);
4568
4569 /* Send to ip_output */
4570 memset(s: &ro, c: 0, n: sizeof(ro));
4571
4572 flags = (IP_OUTARGS | /* Passing out args to specify interface */
4573 IP_NOIPSEC); /* To ensure the packet doesn't go through ipsec twice */
4574
4575 memset(s: &ipoa, c: 0, n: sizeof(ipoa));
4576 ipoa.ipoa_flowadv.code = 0;
4577 ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR;
4578 if (ipsec_state.outgoing_if) {
4579 ipoa.ipoa_boundif = ipsec_state.outgoing_if;
4580 ipoa.ipoa_flags |= IPOAF_BOUND_IF;
4581 }
4582 ipsec_set_ipoa_for_interface(interface: pcb->ipsec_ifp, ipoa: &ipoa);
4583
4584 adv = &ipoa.ipoa_flowadv;
4585
4586 (void)ip_output(data, NULL, &ro, flags, NULL, &ipoa);
4587 data = NULL;
4588
4589 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
4590 error = ENOBUFS;
4591 ifnet_disable_output(interface);
4592 }
4593
4594 goto done;
4595 }
4596 case 6: {
4597 if (data->m_len < sizeof(*ip6)) {
4598 os_log_error(OS_LOG_DEFAULT, "ipsec_output: first mbuf length shorter than IPv6 header length: %d.\n", data->m_len);
4599 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
4600 error = EINVAL;
4601 goto ipsec_output_err;
4602 }
4603#if IPSEC_NEXUS
4604 if (!pcb->ipsec_use_netif)
4605#endif // IPSEC_NEXUS
4606 {
4607 int af = AF_INET6;
4608 bpf_tap_out(interface: pcb->ipsec_ifp, DLT_NULL, packet: data, header: &af, header_len: sizeof(af));
4609 }
4610
4611 data = ipsec6_splithdr(data);
4612 if (data == NULL) {
4613 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_splithdr returned NULL\n");
4614 goto ipsec_output_err;
4615 }
4616
4617 ip6 = mtod(data, struct ip6_hdr *);
4618
4619 memset(s: &ipsec_state, c: 0, n: sizeof(ipsec_state));
4620 ipsec_state.m = data;
4621 ipsec_state.dst = (struct sockaddr *)&ip6->ip6_dst;
4622 memset(s: &ipsec_state.ro, c: 0, n: sizeof(ipsec_state.ro));
4623 ipsec_state.dscp_mapping = pcb->ipsec_output_dscp_mapping;
4624
4625 error = ipsec6_interface_output(&ipsec_state, interface, &ip6->ip6_nxt, ipsec_state.m);
4626 if (error == 0 && ipsec_state.tunneled == 4) { /* tunneled in IPv4 - packet is gone */
4627 goto done;
4628 }
4629 data = ipsec_state.m;
4630 if (error || data == NULL) {
4631 if (error) {
4632 os_log_error(OS_LOG_DEFAULT, "ipsec_output: ipsec6_output error %d\n", error);
4633 }
4634 goto ipsec_output_err;
4635 }
4636
4637 /* Set traffic class, set flow */
4638 m_set_service_class(data, pcb->ipsec_output_service_class);
4639 data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4640#if SKYWALK
4641 data->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4642#else /* !SKYWALK */
4643 data->m_pkthdr.pkt_flowid = interface->if_flowhash;
4644#endif /* !SKYWALK */
4645 data->m_pkthdr.pkt_proto = ip6->ip6_nxt;
4646 data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
4647
4648 /* Increment statistics */
4649 length = mbuf_pkthdr_len(mbuf: data);
4650 ifnet_stat_increment_out(interface, packets_out: 1, bytes_out: (uint16_t)length, errors_out: 0);
4651
4652 /* Send to ip6_output */
4653 memset(s: &ro6, c: 0, n: sizeof(ro6));
4654
4655 flags = IPV6_OUTARGS;
4656
4657 memset(s: &ip6oa, c: 0, n: sizeof(ip6oa));
4658 ip6oa.ip6oa_flowadv.code = 0;
4659 ip6oa.ip6oa_flags = IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR;
4660 if (ipsec_state.outgoing_if) {
4661 ip6oa.ip6oa_boundif = ipsec_state.outgoing_if;
4662 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
4663 ip6_output_setsrcifscope(data, ipsec_state.outgoing_if, NULL);
4664 ip6_output_setdstifscope(data, ipsec_state.outgoing_if, NULL);
4665 } else {
4666 ip6_output_setsrcifscope(data, IFSCOPE_UNKNOWN, NULL);
4667 ip6_output_setdstifscope(data, IFSCOPE_UNKNOWN, NULL);
4668 }
4669 ipsec_set_ip6oa_for_interface(interface: pcb->ipsec_ifp, ip6oa: &ip6oa);
4670
4671 adv = &ip6oa.ip6oa_flowadv;
4672
4673 (void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa);
4674 data = NULL;
4675
4676 if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) {
4677 error = ENOBUFS;
4678 ifnet_disable_output(interface);
4679 }
4680
4681 goto done;
4682 }
4683 default: {
4684 os_log_error(OS_LOG_DEFAULT, "ipsec_output: Received unknown packet version %d.\n", ip_version);
4685 error = EINVAL;
4686 goto ipsec_output_err;
4687 }
4688 }
4689
4690done:
4691 return error;
4692
4693ipsec_output_err:
4694 if (data) {
4695 mbuf_freem(mbuf: data);
4696 }
4697 goto done;
4698}
4699
4700static void
4701ipsec_start(ifnet_t interface)
4702{
4703 mbuf_t data;
4704 struct ipsec_pcb *pcb = ifnet_softc(interface);
4705
4706 VERIFY(pcb != NULL);
4707 for (;;) {
4708 if (ifnet_dequeue(interface, packet: &data) != 0) {
4709 break;
4710 }
4711 if (ipsec_output(interface, data) != 0) {
4712 break;
4713 }
4714 }
4715}
4716
4717/* Network Interface functions */
4718static errno_t
4719ipsec_demux(__unused ifnet_t interface,
4720 mbuf_t data,
4721 __unused char *frame_header,
4722 protocol_family_t *protocol)
4723{
4724 struct ip *ip;
4725 u_int ip_version;
4726
4727 while (data != NULL && mbuf_len(mbuf: data) < 1) {
4728 data = mbuf_next(mbuf: data);
4729 }
4730
4731 if (data == NULL) {
4732 return ENOENT;
4733 }
4734
4735 ip = mtod(data, struct ip *);
4736 ip_version = ip->ip_v;
4737
4738 switch (ip_version) {
4739 case 4:
4740 *protocol = PF_INET;
4741 return 0;
4742 case 6:
4743 *protocol = PF_INET6;
4744 return 0;
4745 default:
4746 *protocol = PF_UNSPEC;
4747 break;
4748 }
4749
4750 return 0;
4751}
4752
4753static errno_t
4754ipsec_add_proto(__unused ifnet_t interface,
4755 protocol_family_t protocol,
4756 __unused const struct ifnet_demux_desc *demux_array,
4757 __unused u_int32_t demux_count)
4758{
4759 switch (protocol) {
4760 case PF_INET:
4761 return 0;
4762 case PF_INET6:
4763 return 0;
4764 default:
4765 break;
4766 }
4767
4768 return ENOPROTOOPT;
4769}
4770
4771static errno_t
4772ipsec_del_proto(__unused ifnet_t interface,
4773 __unused protocol_family_t protocol)
4774{
4775 return 0;
4776}
4777
4778static errno_t
4779ipsec_ioctl(ifnet_t interface,
4780 u_long command,
4781 void *data)
4782{
4783#if IPSEC_NEXUS
4784 struct ipsec_pcb *pcb = ifnet_softc(interface);
4785#endif
4786 errno_t result = 0;
4787
4788 switch (command) {
4789 case SIOCSIFMTU: {
4790#if IPSEC_NEXUS
4791 if (pcb->ipsec_use_netif) {
4792 // Make sure we can fit packets in the channel buffers
4793 if (((uint64_t)((struct ifreq*)data)->ifr_mtu) > pcb->ipsec_slot_size) {
4794 result = EINVAL;
4795 } else {
4796 ifnet_set_mtu(interface, mtu: (uint32_t)((struct ifreq*)data)->ifr_mtu);
4797 }
4798 } else
4799#endif // IPSEC_NEXUS
4800 {
4801 ifnet_set_mtu(interface, mtu: ((struct ifreq*)data)->ifr_mtu);
4802 }
4803 break;
4804 }
4805
4806 case SIOCSIFFLAGS:
4807 /* ifioctl() takes care of it */
4808 break;
4809
4810 case SIOCSIFSUBFAMILY: {
4811 uint32_t subfamily;
4812
4813 subfamily = ((struct ifreq*)data)->ifr_type.ift_subfamily;
4814 switch (subfamily) {
4815 case IFRTYPE_SUBFAMILY_BLUETOOTH:
4816 interface->if_subfamily = IFNET_SUBFAMILY_BLUETOOTH;
4817 break;
4818 case IFRTYPE_SUBFAMILY_WIFI:
4819 interface->if_subfamily = IFNET_SUBFAMILY_WIFI;
4820 break;
4821 case IFRTYPE_SUBFAMILY_QUICKRELAY:
4822 interface->if_subfamily = IFNET_SUBFAMILY_QUICKRELAY;
4823 break;
4824 case IFRTYPE_SUBFAMILY_DEFAULT:
4825 interface->if_subfamily = IFNET_SUBFAMILY_DEFAULT;
4826 break;
4827 default:
4828 result = EINVAL;
4829 break;
4830 }
4831 break;
4832 }
4833
4834 default:
4835 result = EOPNOTSUPP;
4836 }
4837
4838 return result;
4839}
4840
4841static void
4842ipsec_detached(ifnet_t interface)
4843{
4844 struct ipsec_pcb *pcb = ifnet_softc(interface);
4845
4846 (void)ifnet_release(interface);
4847 lck_mtx_lock(lck: &ipsec_lock);
4848 ipsec_free_pcb(pcb, true);
4849 (void)ifnet_dispose(interface);
4850 lck_mtx_unlock(lck: &ipsec_lock);
4851}
4852
4853/* Protocol Handlers */
4854
4855static errno_t
4856ipsec_proto_input(ifnet_t interface,
4857 protocol_family_t protocol,
4858 mbuf_t m,
4859 __unused char *frame_header)
4860{
4861 mbuf_pkthdr_setrcvif(mbuf: m, ifp: interface);
4862
4863#if IPSEC_NEXUS
4864 struct ipsec_pcb *pcb = ifnet_softc(interface);
4865 if (!pcb->ipsec_use_netif)
4866#endif // IPSEC_NEXUS
4867 {
4868 uint32_t af = 0;
4869 struct ip *ip = mtod(m, struct ip *);
4870 if (ip->ip_v == 4) {
4871 af = AF_INET;
4872 } else if (ip->ip_v == 6) {
4873 af = AF_INET6;
4874 }
4875 bpf_tap_in(interface, DLT_NULL, packet: m, header: &af, header_len: sizeof(af));
4876 pktap_input(interface, protocol, m, NULL);
4877 }
4878
4879 int32_t pktlen = m->m_pkthdr.len;
4880 if (proto_input(protocol, packet: m) != 0) {
4881 ifnet_stat_increment_in(interface, packets_in: 0, bytes_in: 0, errors_in: 1);
4882 m_freem(m);
4883 } else {
4884 ifnet_stat_increment_in(interface, packets_in: 1, bytes_in: pktlen, errors_in: 0);
4885 }
4886
4887 return 0;
4888}
4889
4890static errno_t
4891ipsec_proto_pre_output(__unused ifnet_t interface,
4892 protocol_family_t protocol,
4893 __unused mbuf_t *packet,
4894 __unused const struct sockaddr *dest,
4895 __unused void *route,
4896 __unused char *frame_type,
4897 __unused char *link_layer_dest)
4898{
4899 *(protocol_family_t *)(void *)frame_type = protocol;
4900 return 0;
4901}
4902
4903static errno_t
4904ipsec_attach_proto(ifnet_t interface,
4905 protocol_family_t protocol)
4906{
4907 struct ifnet_attach_proto_param proto;
4908 errno_t result;
4909
4910 bzero(s: &proto, n: sizeof(proto));
4911 proto.input = ipsec_proto_input;
4912 proto.pre_output = ipsec_proto_pre_output;
4913
4914 result = ifnet_attach_protocol(interface, protocol_family: protocol, proto_details: &proto);
4915 if (result != 0 && result != EEXIST) {
4916 os_log_error(OS_LOG_DEFAULT, "ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n",
4917 protocol, result);
4918 }
4919
4920 return result;
4921}
4922
4923errno_t
4924ipsec_inject_inbound_packet(ifnet_t interface,
4925 mbuf_t packet)
4926{
4927#if IPSEC_NEXUS
4928 struct ipsec_pcb *pcb = ifnet_softc(interface);
4929
4930 if (pcb->ipsec_use_netif) {
4931 if (!ipsec_data_move_begin(pcb)) {
4932 os_log_info(OS_LOG_DEFAULT, "%s: data path stopped for %s\n", __func__,
4933 if_name(pcb->ipsec_ifp));
4934 return ENXIO;
4935 }
4936
4937 lck_rw_lock_shared(lck: &pcb->ipsec_pcb_lock);
4938
4939 lck_mtx_lock(lck: &pcb->ipsec_input_chain_lock);
4940
4941 if (pcb->ipsec_input_chain_count > (u_int32_t)if_ipsec_max_pending_input) {
4942 lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock);
4943 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
4944 ipsec_data_move_end(pcb);
4945 return ENOSPC;
4946 }
4947
4948 if (pcb->ipsec_input_chain != NULL) {
4949 pcb->ipsec_input_chain_last->m_nextpkt = packet;
4950 } else {
4951 pcb->ipsec_input_chain = packet;
4952 }
4953 pcb->ipsec_input_chain_count++;
4954 while (packet->m_nextpkt) {
4955 VERIFY(packet != packet->m_nextpkt);
4956 packet = packet->m_nextpkt;
4957 pcb->ipsec_input_chain_count++;
4958 }
4959 pcb->ipsec_input_chain_last = packet;
4960 lck_mtx_unlock(lck: &pcb->ipsec_input_chain_lock);
4961
4962 kern_channel_ring_t rx_ring = pcb->ipsec_netif_rxring[0];
4963 lck_rw_unlock_shared(lck: &pcb->ipsec_pcb_lock);
4964
4965 if (rx_ring != NULL) {
4966 kern_channel_notify(rx_ring, flags: 0);
4967 }
4968
4969 ipsec_data_move_end(pcb);
4970 return 0;
4971 } else
4972#endif // IPSEC_NEXUS
4973 {
4974 errno_t error;
4975 protocol_family_t protocol;
4976 if ((error = ipsec_demux(interface, data: packet, NULL, protocol: &protocol)) != 0) {
4977 return error;
4978 }
4979
4980 return ipsec_proto_input(interface, protocol, m: packet, NULL);
4981 }
4982}
4983
4984void
4985ipsec_set_pkthdr_for_interface(ifnet_t interface, mbuf_t packet, int family,
4986 uint32_t flowid)
4987{
4988#pragma unused (flowid)
4989 if (packet != NULL && interface != NULL) {
4990 struct ipsec_pcb *pcb = ifnet_softc(interface);
4991 if (pcb != NULL) {
4992 /* Set traffic class, set flow */
4993 m_set_service_class(packet, pcb->ipsec_output_service_class);
4994 packet->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET;
4995#if SKYWALK
4996 packet->m_pkthdr.pkt_mpriv_srcid = interface->if_flowhash;
4997 packet->m_pkthdr.pkt_flowid = flowid;
4998#else /* !SKYWALK */
4999 packet->m_pkthdr.pkt_flowid = interface->if_flowhash;
5000#endif /* !SKYWALK */
5001 if (family == AF_INET) {
5002 struct ip *ip = mtod(packet, struct ip *);
5003 packet->m_pkthdr.pkt_proto = ip->ip_p;
5004 } else if (family == AF_INET6) {
5005 struct ip6_hdr *ip6 = mtod(packet, struct ip6_hdr *);
5006 packet->m_pkthdr.pkt_proto = ip6->ip6_nxt;
5007 }
5008 packet->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC);
5009 }
5010 }
5011}
5012
5013void
5014ipsec_set_ipoa_for_interface(ifnet_t interface, struct ip_out_args *ipoa)
5015{
5016 struct ipsec_pcb *pcb;
5017
5018 if (interface == NULL || ipoa == NULL) {
5019 return;
5020 }
5021 pcb = ifnet_softc(interface);
5022
5023 if (net_qos_policy_restricted == 0) {
5024 ipoa->ipoa_flags |= IPOAF_QOSMARKING_ALLOWED;
5025 ipoa->ipoa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
5026 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
5027 net_qos_policy_restrict_avapps != 0) {
5028 ipoa->ipoa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
5029 } else {
5030 ipoa->ipoa_flags |= IP6OAF_QOSMARKING_ALLOWED;
5031 ipoa->ipoa_sotc = SO_TC_VO;
5032 }
5033}
5034
5035void
5036ipsec_set_ip6oa_for_interface(ifnet_t interface, struct ip6_out_args *ip6oa)
5037{
5038 struct ipsec_pcb *pcb;
5039
5040 if (interface == NULL || ip6oa == NULL) {
5041 return;
5042 }
5043 pcb = ifnet_softc(interface);
5044
5045 if (net_qos_policy_restricted == 0) {
5046 ip6oa->ip6oa_flags |= IPOAF_QOSMARKING_ALLOWED;
5047 ip6oa->ip6oa_sotc = so_svc2tc(pcb->ipsec_output_service_class);
5048 } else if (pcb->ipsec_output_service_class != MBUF_SC_VO ||
5049 net_qos_policy_restrict_avapps != 0) {
5050 ip6oa->ip6oa_flags &= ~IPOAF_QOSMARKING_ALLOWED;
5051 } else {
5052 ip6oa->ip6oa_flags |= IP6OAF_QOSMARKING_ALLOWED;
5053 ip6oa->ip6oa_sotc = SO_TC_VO;
5054 }
5055}
5056
5057static boolean_t
5058ipsec_data_move_begin(struct ipsec_pcb *pcb)
5059{
5060 boolean_t ret = 0;
5061
5062 lck_mtx_lock_spin(lck: &pcb->ipsec_pcb_data_move_lock);
5063 if ((ret = IPSEC_IS_DATA_PATH_READY(pcb))) {
5064 pcb->ipsec_pcb_data_move++;
5065 }
5066 lck_mtx_unlock(lck: &pcb->ipsec_pcb_data_move_lock);
5067
5068 return ret;
5069}
5070
5071static void
5072ipsec_data_move_end(struct ipsec_pcb *pcb)
5073{
5074 lck_mtx_lock_spin(lck: &pcb->ipsec_pcb_data_move_lock);
5075 VERIFY(pcb->ipsec_pcb_data_move > 0);
5076 /*
5077 * if there's no more thread moving data, wakeup any
5078 * drainers that's blocked waiting for this.
5079 */
5080 if (--pcb->ipsec_pcb_data_move == 0 && pcb->ipsec_pcb_drainers > 0) {
5081 wakeup(chan: &(pcb->ipsec_pcb_data_move));
5082 }
5083 lck_mtx_unlock(lck: &pcb->ipsec_pcb_data_move_lock);
5084}
5085
5086static void
5087ipsec_data_move_drain(struct ipsec_pcb *pcb)
5088{
5089 lck_mtx_lock(lck: &pcb->ipsec_pcb_data_move_lock);
5090 /* data path must already be marked as not ready */
5091 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
5092 pcb->ipsec_pcb_drainers++;
5093 while (pcb->ipsec_pcb_data_move != 0) {
5094 (void)msleep(chan: &(pcb->ipsec_pcb_data_move), mtx: &pcb->ipsec_pcb_data_move_lock,
5095 pri: (PZERO - 1), wmesg: __func__, NULL);
5096 }
5097 VERIFY(!IPSEC_IS_DATA_PATH_READY(pcb));
5098 VERIFY(pcb->ipsec_pcb_drainers > 0);
5099 pcb->ipsec_pcb_drainers--;
5100 lck_mtx_unlock(lck: &pcb->ipsec_pcb_data_move_lock);
5101}
5102
5103static void
5104ipsec_wait_data_move_drain(struct ipsec_pcb *pcb)
5105{
5106 /*
5107 * Mark the data path as not usable.
5108 */
5109 lck_mtx_lock(lck: &pcb->ipsec_pcb_data_move_lock);
5110 IPSEC_CLR_DATA_PATH_READY(pcb);
5111 lck_mtx_unlock(lck: &pcb->ipsec_pcb_data_move_lock);
5112
5113 /* Wait until all threads in the data paths are done. */
5114 ipsec_data_move_drain(pcb);
5115}
5116