1/*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#ifndef _SKYWALK_OS_NEXUS_PRIVATE_H_
30#define _SKYWALK_OS_NEXUS_PRIVATE_H_
31
32#if defined(PRIVATE) || defined(BSD_KERNEL_PRIVATE)
33#include <stdbool.h>
34#include <sys/guarded.h>
35#include <skywalk/os_channel.h>
36#include <skywalk/os_nexus.h>
37#include <netinet/in.h>
38#include <netinet/in_private.h>
39#include <netinet/tcp.h>
40#include <netinet/tcp_private.h>
41#include <net/ethernet.h>
42
43/*
44 * Ephemeral port, for NEXUSDOMCAPF_EPHEMERAL capable nexus.
45 */
46#define NEXUS_PORT_ANY ((nexus_port_t)-1)
47#define NEXUS_PORT_MAX ((nexus_port_t)-1)
48
49typedef nexus_port_t nexus_port_size_t;
50
51#define NEXUSCTL_INIT_VERSION_1 1
52#define NEXUSCTL_INIT_CURRENT_VERSION NEXUSCTL_INIT_VERSION_1
53
54/*
55 * Nexus controller init parameters.
56 */
57struct nxctl_init {
58 uint32_t ni_version; /* in: NEXUSCTL_INIT_CURRENT_VERSION */
59 uint32_t __ni_align; /* reserved */
60 guardid_t ni_guard; /* out: guard ID */
61};
62
63/*
64 * Nexus metadata type.
65 *
66 * Be mindful that due to the use of tagged pointers for packets, this
67 * type gets encoded along with the subtype, with the requirement that the
68 * object addresses are aligned on 64-byte boundary at the minimum. That
69 * leaves a total of 4 bits: 2 for type and another 2 for subtype, therefore
70 * limiting the maximum enum value to 3.
71 */
72typedef enum {
73 NEXUS_META_TYPE_INVALID = 0, /* invalid type */
74 NEXUS_META_TYPE_QUANTUM, /* struct __quantum */
75 NEXUS_META_TYPE_PACKET, /* struct __packet */
76 NEXUS_META_TYPE_RESERVED, /* for future */
77 NEXUS_META_TYPE_MAX = NEXUS_META_TYPE_RESERVED
78} nexus_meta_type_t;
79
80typedef enum {
81 NEXUS_META_SUBTYPE_INVALID = 0, /* invalid subtype */
82 NEXUS_META_SUBTYPE_PAYLOAD, /* normal payload mode */
83 NEXUS_META_SUBTYPE_RAW, /* raw (link layer) mode */
84 NEXUS_META_SUBTYPE_RESERVED, /* for future */
85 NEXUS_META_SUBTYPE_MAX = NEXUS_META_SUBTYPE_RESERVED
86} nexus_meta_subtype_t;
87
88/*
89 * Nexus provider parameters.
90 */
91struct nxprov_params {
92 nexus_name_t nxp_name; /* name */
93 uint32_t nxp_namelen; /* length of name */
94 nexus_type_t nxp_type; /* NEXUS_TYPE_* */
95 nexus_meta_type_t nxp_md_type; /* NEXUS_META_TYPE_* */
96 nexus_meta_subtype_t nxp_md_subtype; /* NEXUS_META_SUBTYPE_* */
97 uint32_t nxp_flags; /* NXPF_* */
98 uint32_t nxp_format; /* provider-defined */
99 uint32_t nxp_tx_rings; /* # of channel transmit rings */
100 uint32_t nxp_rx_rings; /* # of channel receive rings */
101 uint32_t nxp_tx_slots; /* # of slots per channel TX ring */
102 uint32_t nxp_rx_slots; /* # of slots per channel RX ring */
103 uint32_t nxp_buf_size; /* size of each buffer */
104 uint32_t nxp_meta_size; /* size of metadata per slot */
105 uint32_t nxp_stats_size; /* size of statistics region */
106 uint32_t nxp_pipes; /* number of pipes */
107 nexus_extension_t nxp_extensions; /* extension specific parameter(s) */
108 uint32_t nxp_mhints; /* memory usage hints */
109 uint32_t nxp_ifindex; /* network interface index */
110 uint32_t nxp_flowadv_max; /* max flow advisory entries */
111 nexus_qmap_type_t nxp_qmap; /* queue mapping type */
112 uint32_t nxp_capabilities; /* nexus capabilities */
113 uint32_t nxp_nexusadv_size; /* nexus advisory region size */
114 uint32_t nxp_max_frags; /* max fragments per packet */
115 /*
116 * reject channel operations if the peer has closed the channel.
117 * Only valid for user-pipe nexus.
118 */
119 boolean_t nxp_reject_on_close;
120 uint32_t nxp_large_buf_size; /* size of large buffer */
121} __attribute__((aligned(64)));
122
123/* valid values for nxp_flags */
124#define NXPF_ANONYMOUS 0x1 /* allow anonymous channel clients */
125#define NXPF_USER_CHANNEL 0x2 /* allow user channel open */
126#define NXPF_NETIF_LLINK 0x4 /* use netif logical link */
127#ifdef KERNEL
128#define NXPF_MASK (NXPF_ANONYMOUS | NXPF_USER_CHANNEL | NXPF_NETIF_LLINK)
129#endif /* KERNEL */
130
131#define NXPF_BITS \
132 "\020\01ANONYMOUS\02USER_CHANNEL"
133
134/* valid values for nxp_capabilities */
135#define NXPCAP_CHECKSUM_PARTIAL 0x1 /* partial checksum */
136#define NXPCAP_USER_PACKET_POOL 0x2 /* user packet pool */
137#define NXPCAP_USER_CHANNEL 0x4 /* allow user channel access */
138
139#define NXPCAP_BITS \
140 "\020\01CHECKSUM_PARTIAL\02USER_PKT_POOL\03USER_CHANNEL"
141
142#define NXPROV_REG_VERSION_1 1
143#define NXPROV_REG_CURRENT_VERSION NXPROV_REG_VERSION_1
144
145/*
146 * Nexus provider registration parameters.
147 */
148struct nxprov_reg {
149 uint32_t nxpreg_version; /* NXPROV_REG_CURRENT_VERSION */
150 uint32_t nxpreg_requested; /* customized attributes */
151 struct nxprov_params nxpreg_params; /* Nexus provider parameters */
152};
153
154/*
155 * Flags for nxpreq_requested; keep in sync with NXA_REQ_* flags.
156 * Note that these are 32-bit, whereas nxa_requested is 64-bit
157 * wide; for now this won't matter.
158 */
159#define NXPREQ_TX_RINGS (1U << 0) /* 0x00000001 */
160#define NXPREQ_RX_RINGS (1U << 1) /* 0x00000002 */
161#define NXPREQ_TX_SLOTS (1U << 2) /* 0x00000004 */
162#define NXPREQ_RX_SLOTS (1U << 3) /* 0x00000008 */
163#define NXPREQ_BUF_SIZE (1U << 4) /* 0x00000010 */
164#define NXPREQ_META_SIZE (1U << 5) /* 0x00000020 */
165#define NXPREQ_STATS_SIZE (1U << 6) /* 0x00000040 */
166#define NXPREQ_ANONYMOUS (1U << 7) /* 0x00000080 */
167#define NXPREQ_PIPES (1U << 8) /* 0x00000100 */
168#define NXPREQ_EXTENSIONS (1U << 9) /* 0x00000200 */
169#define NXPREQ_MHINTS (1U << 10) /* 0x00000400 */
170#define NXPREQ_FLOWADV_MAX (1U << 11) /* 0x00000800 */
171#define NXPREQ_QMAP (1U << 12) /* 0x00001000 */
172#define NXPREQ_CHECKSUM_OFFLOAD (1U << 13) /* 0x00002000 */
173#define NXPREQ_USER_PACKET_POOL (1U << 14) /* 0x00004000 */
174#define NXPREQ_CAPABILITIES (1U << 15) /* 0x00008000 */
175#define NXPREQ_NEXUSADV_SIZE (1U << 16) /* 0x00010000 */
176#define NXPREQ_IFINDEX (1U << 17) /* 0x00020000 */
177#define NXPREQ_USER_CHANNEL (1U << 18) /* 0x00040000 */
178#define NXPREQ_MAX_FRAGS (1U << 19) /* 0x00080000 */
179#define NXPREQ_REJECT_ON_CLOSE (1U << 20) /* 0x00100000 */
180#define NXPREQ_LARGE_BUF_SIZE (1U << 21) /* 0x00200000 */
181
182#define NXPREQ_BITS \
183 "\020\01TX_RINGS\02RX_RINGS\03TX_SLOTS\04RX_SLOTS\05BUF_SIZE" \
184 "\06META_SIZE\07STATS_SIZE\010ANONYMOUS\011EXTRA_BUFS\012PIPES" \
185 "\013EXTENSIONS\014MHINTS\015FLOWADV_MAX\016QMAP" \
186 "\017CKSUM_OFFLOAD\020USER_PKT_POOL\021CAPABS\022NEXUSADV_SIZE" \
187 "\023IFINDEX\024USER_CHANNEL\025MAX_FRAGS\026REJ_CLOSE\027LBUF_SIZE"
188
189/*
190 * Nexus provider registration entry. Also argument for NXOPT_NEXUS_PROV_ENTRY.
191 */
192struct nxprov_reg_ent {
193 uuid_t npre_prov_uuid; /* Nexus provider UUID */
194 struct nxprov_params npre_prov_params; /* Nexus provider parameters */
195};
196
197/*
198 * Nexus options.
199 */
200#define NXOPT_NEXUS_PROV_LIST 1 /* (get) list all provider UUIDS */
201#define NXOPT_NEXUS_PROV_ENTRY 2 /* (get) get params of a provider */
202#define NXOPT_NEXUS_LIST 20 /* (get) list all Nexus instances */
203#define NXOPT_NEXUS_BIND 21 /* (set) bind a Nexus port */
204#define NXOPT_NEXUS_UNBIND 22 /* (set) unbind a Nexus port */
205#define NXOPT_CHANNEL_LIST 30 /* (get) list all Channel instances */
206#define NXOPT_NEXUS_CONFIG 40 /* (set) nexus specific config */
207
208/*
209 * Argument structure for NXOPT_NEXUS_PROV_LIST.
210 */
211struct nxprov_list_req {
212 uint32_t nrl_num_regs; /* array count */
213 uint32_t __nrl_align; /* reserved */
214 user_addr_t nrl_regs; /* array of nexus_reg_ent */
215};
216
217/*
218 * Argument structure for NXOPT_NEXUS_LIST.
219 */
220struct nx_list_req {
221 uuid_t nl_prov_uuid; /* nexus provider UUID */
222 uint32_t nl_num_nx_uuids; /* array count */
223 uint32_t __nl_align; /* reserved */
224 user_addr_t nl_nx_uuids; /* array of nexus UUIDs */
225};
226
227/*
228 * Argument structure for NXOPT_NEXUS_BIND.
229 */
230struct nx_bind_req {
231 uuid_t nb_nx_uuid; /* nexus instance UUID */
232 nexus_port_t nb_port; /* nexus instance port */
233 uint32_t nb_flags; /* NBR_* match flags */
234 uuid_t nb_exec_uuid; /* executable UUID */
235 user_addr_t nb_key; /* key blob */
236 uint32_t nb_key_len; /* key blob length */
237 pid_t nb_pid; /* client PID */
238};
239
240#define NBR_MATCH_PID 0x1 /* match against PID */
241#define NBR_MATCH_EXEC_UUID 0x2 /* match executable's UUID */
242#define NBR_MATCH_KEY 0x4 /* match key blob */
243#ifdef KERNEL
244#define NBR_MATCH_MASK \
245 (NBR_MATCH_PID | NBR_MATCH_EXEC_UUID | NBR_MATCH_KEY)
246#endif /* KERNEL */
247
248/*
249 * Argument structure for NXOPT_NEXUS_UNBIND.
250 */
251struct nx_unbind_req {
252 uuid_t nu_nx_uuid; /* nexus instance UUID */
253 nexus_port_t nu_port; /* nexus instance port */
254};
255
256/*
257 * Argument structure for NXOPT_CHANNEL_LIST.
258 */
259struct ch_list_req {
260 uuid_t cl_nx_uuid; /* nexus instance UUID */
261 uint32_t cl_num_ch_uuids; /* array count */
262 uint32_t __cl_align; /* reserved */
263 user_addr_t cl_ch_uuids; /* array of channel UUIDs */
264};
265
266/*
267 * Skywalk Nexus MIB
268 *
269 * We will use the name MIB now to refer to things that we expose to outside
270 * world for management/telemetry purpose.
271 *
272 * General rule of thumb of this MIB structure is to keep it simple.
273 * Try to avoid variable length field and hierarchical representation wherever
274 * possible. Simple retrieval would return either a single object (simple type
275 * or fixed length compound type) or an object array of same type. This makes
276 * parsing the retrieved information a lot easier.
277 *
278 * For now, we use sysctl as the way MIB interface is exposed. Additional
279 * interfaces could be syscall (e.g. via a nexus controller), etc.
280 */
281#define NXMIB_NETIF_STATS (((uint32_t)1) << 1)
282#define NXMIB_FSW_STATS (((uint32_t)1) << 2)
283#define NXMIB_FLOW (((uint32_t)1) << 3)
284#define NXMIB_FLOW_ADV (((uint32_t)1) << 4)
285#define NXMIB_FLOW_OWNER (((uint32_t)1) << 5)
286#define NXMIB_FLOW_ROUTE (((uint32_t)1) << 6)
287#define NXMIB_LLINK_LIST (((uint32_t)1) << 7)
288#define NXMIB_NETIF_QUEUE_STATS (((uint32_t)1) << 8)
289
290#define NXMIB_QUIC_STATS (((uint32_t)1) << 27)
291#define NXMIB_UDP_STATS (((uint32_t)1) << 28)
292#define NXMIB_TCP_STATS (((uint32_t)1) << 29)
293#define NXMIB_IP6_STATS (((uint32_t)1) << 30)
294#define NXMIB_IP_STATS (((uint32_t)1) << 31)
295
296#define NXMIB_USERSTACK_STATS (NXMIB_IP_STATS | NXMIB_IP6_STATS \
297 | NXMIB_TCP_STATS | NXMIB_UDP_STATS \
298 | NXMIB_QUIC_STATS)
299
300#define NXMIB_FILTER_NX_UUID (((uint64_t)1) << 0)
301#define NXMIB_FILTER_FLOW_ID (((uint64_t)1) << 1)
302#define NXMIB_FILTER_PID (((uint64_t)1) << 2)
303#define NXMIB_FILTER_INFO_TUPLE (((uint64_t)1) << 3)
304
305/*
306 * Nexus MIB filter: used to retrieve only those matching the filter value.
307 */
308struct nexus_mib_filter {
309 uint32_t nmf_type; /* MIB type */
310 uint64_t nmf_bitmap; /* bitmap of following fields */
311
312 uuid_t nmf_nx_uuid; /* nexus instance uuid */
313 uuid_t nmf_flow_id; /* flow rule id */
314 pid_t nmf_pid; /* owner pid */
315 struct info_tuple nmf_info_tuple; /* flow tuple */
316};
317
318/*
319 * Nexus-specific config commands.
320 */
321typedef enum {
322 NXCFG_CMD_ATTACH = 0, /* attach an object to a nexus */
323 NXCFG_CMD_DETACH = 1, /* detach an object from a nexus */
324 NXCFG_CMD_FLOW_ADD = 20, /* add a flow to a nexus */
325 NXCFG_CMD_FLOW_DEL = 21, /* delete a flow from nexus */
326 NXCFG_CMD_FLOW_CONFIG = 22, /* configure a flow in nexus */
327 NXCFG_CMD_NETEM = 30, /* config packet scheduler */
328 NXCFG_CMD_GET_LLINK_INFO = 40, /* collect llink info */
329} nxcfg_cmd_t;
330
331#define NX_SPEC_IF_NAMELEN 64
332
333/*
334 * Argument struture for NXOPT_NEXUS_CONFIG.
335 */
336struct nx_cfg_req {
337 uuid_t nc_nx_uuid; /* nexus instance UUID */
338 nxcfg_cmd_t nc_cmd; /* NXCFG_CMD_* */
339 uint32_t nc_req_len; /* size of request struct */
340 user_addr_t nc_req; /* address of request struct */
341};
342
343/*
344 * Argument structure for NXCFG_CMD_{ATTACH,DETACH}
345 */
346struct nx_spec_req {
347 union {
348 char nsru_name[NX_SPEC_IF_NAMELEN];
349 uuid_t nsru_uuid;
350#ifdef KERNEL
351 struct ifnet *nsru_ifp;
352#endif /* KERNEL */
353 } nsr_u __attribute__((aligned(sizeof(uint64_t)))); /* in */
354 uint32_t nsr_flags; /* in */
355 uuid_t nsr_if_uuid; /* attach: out, detach: in */
356};
357#define nsr_name nsr_u.nsru_name
358#define nsr_uuid nsr_u.nsru_uuid
359#ifdef KERNEL
360#define nsr_ifp nsr_u.nsru_ifp
361#endif /* KERNEL */
362
363#define NXSPECREQ_UUID 0x1 /* nsr_name is uuid_t else ifname */
364#define NXSPECREQ_HOST 0x2 /* attach to host port */
365#ifdef KERNEL
366/* mask off userland-settable bits */
367#define NXSPECREQ_MASK (NXSPECREQ_UUID | NXSPECREQ_HOST)
368#define NXSPECREQ_IFP 0x1000 /* (embryonic) ifnet */
369#endif /* KERNEL */
370
371/*
372 * Structure for flow demuxing for parent/child flows
373 */
374#define FLOW_DEMUX_MAX_LEN 32
375struct flow_demux_pattern {
376 uint16_t fdp_offset;
377 uint16_t fdp_len;
378 uint8_t fdp_mask[FLOW_DEMUX_MAX_LEN];
379 uint8_t fdp_value[FLOW_DEMUX_MAX_LEN];
380};
381
382#define MAX_FLOW_DEMUX_PATTERN 4
383
384/*
385 * Argument structure for NXCFG_CMD_FLOW_{BIND,UNBIND}
386 */
387struct nx_flow_req {
388 nexus_port_t nfr_nx_port;
389 uint16_t nfr_ethertype;
390 ether_addr_t nfr_etheraddr;
391 union sockaddr_in_4_6 nfr_saddr;
392 union sockaddr_in_4_6 nfr_daddr;
393 uint8_t nfr_ip_protocol;
394 uint8_t nfr_transport_protocol;
395 uint16_t nfr_flags;
396 uuid_t nfr_flow_uuid;
397 packet_svc_class_t nfr_svc_class;
398 uuid_t nfr_euuid;
399 uint32_t nfr_policy_id;
400 uint32_t nfr_skip_policy_id;
401 pid_t nfr_epid;
402 flowadv_idx_t nfr_flowadv_idx;
403 uuid_t nfr_bind_key;
404 uint64_t nfr_qset_id;
405 uuid_t nfr_parent_flow_uuid;
406 uint8_t nfr_flow_demux_count;
407 struct flow_demux_pattern nfr_flow_demux_patterns[MAX_FLOW_DEMUX_PATTERN];
408 // below is reserved kernel-only fields
409 union {
410#ifdef KERNEL
411 struct {
412 char _nfr_kernel_field_start[0];
413 void *nfr_context;
414 struct proc *nfr_proc;
415 struct ifnet *nfr_ifp;
416 struct flow_route *nfr_route;
417 struct ns_token *nfr_port_reservation;
418 struct protons_token *nfr_proto_reservation;
419 struct flow_stats *nfr_flow_stats;
420 pid_t nfr_pid;
421 uint32_t nfr_saddr_gencnt;
422 void *nfr_ipsec_reservation;
423 uint32_t nfr_inp_flowhash;
424#if defined(__LP64__)
425 uint8_t _nfr_kernel_pad[4];
426#else /* !__LP64__ */
427 uint8_t _nfr_kernel_pad[36];
428#endif /* !__LP64__ */
429 char _nfr_kernel_field_end[0];
430 };
431#endif /* KERNEL */
432 struct {
433 uint8_t _nfr_opaque[80];
434 /* should be at the same offset as _nfr_kernel_field_end above */
435 char _nfr_common_field_end[0];
436 };
437 };
438};
439
440/* valid flags for nfr_flags */
441#define NXFLOWREQF_TRACK 0x0001 /* enable state tracking */
442#define NXFLOWREQF_QOS_MARKING 0x0002 /* allow qos marking */
443#define NXFLOWREQF_FILTER 0x0004 /* interpose filter */
444#define NXFLOWREQF_CUSTOM_ETHER 0x0008 /* custom ethertype */
445#define NXFLOWREQF_IPV6_ULA 0x0010 /* ipv6 ula */
446#define NXFLOWREQF_LISTENER 0x0020 /* listener */
447#define NXFLOWREQF_OVERRIDE_ADDRESS_SELECTION 0x0040 /* override system address selection */
448#define NXFLOWREQF_USE_STABLE_ADDRESS 0x0080 /* if override local, use stable address */
449#define NXFLOWREQF_FLOWADV 0x0100 /* allocate flow advisory */
450#define NXFLOWREQF_ASIS 0x0200 /* create flow as is in nfr */
451#define NXFLOWREQF_LOW_LATENCY 0x0400 /* low latency flow */
452#define NXFLOWREQF_NOWAKEFROMSLEEP 0x0800 /* Don't wake for traffic to this flow */
453#define NXFLOWREQF_REUSEPORT 0x1000 /* Don't wake for traffic to this flow */
454#define NXFLOWREQF_PARENT 0x4000 /* Parent flow */
455
456#define NXFLOWREQF_BITS \
457 "\020\01TRACK\02QOS_MARKING\03FILTER\04CUSTOM_ETHER\05IPV6_ULA" \
458 "\06LISTENER\07OVERRIDE_ADDRESS_SELECTION\010USE_STABLE_ADDRESS" \
459 "\011ALLOC_FLOWADV\012ASIS\013LOW_LATENCY\014NOWAKEUPFROMSLEEP" \
460 "\015REUSEPORT\017PARENT"
461
462struct flow_ip_addr {
463 union {
464 struct in_addr _v4;
465 struct in6_addr _v6;
466 uint8_t _addr8[16];
467 uint16_t _addr16[8];
468 uint32_t _addr32[4];
469 uint64_t _addr64[2];
470 };
471};
472
473struct flow_key {
474 uint16_t fk_mask;
475 uint8_t fk_ipver;
476 uint8_t fk_proto;
477 uint16_t fk_sport;
478 uint16_t fk_dport;
479 struct flow_ip_addr fk_src;
480 struct flow_ip_addr fk_dst;
481 uint64_t fk_pad[1]; /* pad to 48 bytes */
482} __attribute__((__aligned__(16)));
483
484#define fk_src4 fk_src._v4
485#define fk_dst4 fk_dst._v4
486#define fk_src6 fk_src._v6
487#define fk_dst6 fk_dst._v6
488
489#define FLOW_KEY_LEN sizeof(struct flow_key)
490#define FK_HASH_SEED 0xabcd
491
492#define FKMASK_IPVER (((uint16_t)1) << 0)
493#define FKMASK_PROTO (((uint16_t)1) << 1)
494#define FKMASK_SRC (((uint16_t)1) << 2)
495#define FKMASK_SPORT (((uint16_t)1) << 3)
496#define FKMASK_DST (((uint16_t)1) << 4)
497#define FKMASK_DPORT (((uint16_t)1) << 5)
498
499#define FKMASK_2TUPLE (FKMASK_PROTO | FKMASK_SPORT)
500#define FKMASK_3TUPLE (FKMASK_2TUPLE | FKMASK_IPVER | FKMASK_SRC)
501#define FKMASK_4TUPLE (FKMASK_3TUPLE | FKMASK_DPORT)
502#define FKMASK_5TUPLE (FKMASK_4TUPLE | FKMASK_DST)
503#define FKMASK_IPFLOW1 FKMASK_PROTO
504#define FKMASK_IPFLOW2 (FKMASK_IPFLOW1 | FKMASK_IPVER | FKMASK_SRC)
505#define FKMASK_IPFLOW3 (FKMASK_IPFLOW2 | FKMASK_DST)
506#define FKMASK_IDX_MAX 7
507
508extern const struct flow_key fk_mask_2tuple;
509extern const struct flow_key fk_mask_3tuple;
510extern const struct flow_key fk_mask_4tuple;
511extern const struct flow_key fk_mask_5tuple;
512extern const struct flow_key fk_mask_ipflow1;
513extern const struct flow_key fk_mask_ipflow2;
514extern const struct flow_key fk_mask_ipflow3;
515
516#define FLOW_KEY_CLEAR(_fk) do { \
517 _CASSERT(FLOW_KEY_LEN == 48); \
518 _CASSERT(FLOW_KEY_LEN == sizeof(struct flow_key)); \
519 sk_zero_48(_fk); \
520} while (0)
521
522#ifdef KERNEL
523/* mask off userland-settable bits */
524#define NXFLOWREQF_MASK \
525 (NXFLOWREQF_TRACK | NXFLOWREQF_QOS_MARKING | NXFLOWREQF_FILTER | \
526 NXFLOWREQF_CUSTOM_ETHER | NXFLOWREQF_IPV6_ULA | NXFLOWREQF_LISTENER | \
527 NXFLOWREQF_OVERRIDE_ADDRESS_SELECTION | NXFLOWREQF_USE_STABLE_ADDRESS | \
528 NXFLOWREQF_FLOWADV | NXFLOWREQF_LOW_LATENCY | NXFLOWREQF_NOWAKEFROMSLEEP | \
529 NXFLOWREQF_REUSEPORT | NXFLOWREQF_PARENT)
530
531#define NXFLOWREQF_EXT_PORT_RSV 0x1000 /* external port reservation */
532#define NXFLOWREQF_EXT_PROTO_RSV 0x2000 /* external proto reservation */
533
534static inline void
535nx_flow_req_internalize(struct nx_flow_req *req)
536{
537 _CASSERT(offsetof(struct nx_flow_req, _nfr_kernel_field_end) ==
538 offsetof(struct nx_flow_req, _nfr_common_field_end));
539
540 /* init kernel only fields */
541 bzero(s: &req->_nfr_opaque, n: sizeof(req->_nfr_opaque));
542 req->nfr_flags &= NXFLOWREQF_MASK;
543 req->nfr_context = NULL;
544 req->nfr_flow_stats = NULL;
545 req->nfr_port_reservation = NULL;
546}
547
548static inline void
549nx_flow_req_externalize(struct nx_flow_req *req)
550{
551 /* neutralize kernel only fields */
552 bzero(s: &req->_nfr_opaque, n: sizeof(req->_nfr_opaque));
553 req->nfr_flags &= NXFLOWREQF_MASK;
554}
555#endif /* KERNEL */
556
557struct nx_qset_info {
558 uint64_t nqi_id;
559 uint16_t nqi_flags;
560 uint8_t nqi_num_rx_queues;
561 uint8_t nqi_num_tx_queues;
562};
563
564#define NETIF_LLINK_MAX_QSETS 256
565struct nx_llink_info {
566 uuid_t nli_netif_uuid; /* nexus netif instance uuid */
567 uint64_t nli_link_id;
568 uint16_t nli_link_id_internal;
569 uint8_t nli_state;
570 uint8_t nli_flags;
571 uint16_t nli_qset_cnt;
572 struct nx_qset_info nli_qset[NETIF_LLINK_MAX_QSETS];
573};
574
575#define NETIF_LLINK_INFO_VERSION 0x01
576struct nx_llink_info_req {
577 uint16_t nlir_version;
578 uint16_t nlir_llink_cnt;
579 struct nx_llink_info nlir_llink[0];
580};
581
582/*
583 * Nexus controller descriptor.
584 */
585struct nexus_controller {
586#ifndef KERNEL
587 int ncd_fd;
588 guardid_t ncd_guard;
589#else /* KERNEL */
590 struct nxctl *ncd_nxctl;
591#endif /* KERNEL */
592};
593
594/* For nexus ops without having to create a nexus controller */
595#define __OS_NEXUS_SHARED_USER_CONTROLLER_FD (-1)
596
597/*
598 * Nexus attributes.
599 */
600struct nexus_attr {
601 uint64_t nxa_requested; /* customized attributes */
602 uint64_t nxa_tx_rings; /* # of channel transmit rings */
603 uint64_t nxa_rx_rings; /* # of channel receive rings */
604 uint64_t nxa_tx_slots; /* # of slots per channel TX ring */
605 uint64_t nxa_rx_slots; /* # of slots per channel RX ring */
606 uint64_t nxa_buf_size; /* size of each buffer */
607 uint64_t nxa_meta_size; /* size of metadata per buffer */
608 uint64_t nxa_stats_size; /* size of statistics region */
609 uint64_t nxa_anonymous; /* bool: allow anonymous clients */
610 uint64_t nxa_pipes; /* number of pipes */
611 uint64_t nxa_extensions; /* extension-specific attribute */
612 uint64_t nxa_mhints; /* memory usage hints */
613 uint64_t nxa_ifindex; /* network interface index */
614 uint64_t nxa_flowadv_max; /* max flow advisory entries */
615 uint64_t nxa_qmap; /* queue mapping type */
616 uint64_t nxa_checksum_offload; /* partial checksum offload */
617 uint64_t nxa_user_packet_pool; /* user packet pool */
618 uint64_t nxa_nexusadv_size; /* size of advisory region */
619 uint64_t nxa_user_channel; /* user channel open allowed */
620 uint64_t nxa_max_frags; /* max fragments per packet */
621 /*
622 * reject channel operations if the nexus peer has closed the channel.
623 * valid only for user-pipe nexus.
624 */
625 uint64_t nxa_reject_on_close;
626 uint64_t nxa_large_buf_size; /* size of large buffer */
627};
628
629/*
630 * Flags for nxa_requested; keep in sync with NXPREQ_* flags.
631 * Note that these are 64-bit, whereas nxpreq_requested is
632 * 32-bit wide; for not this won't matter.
633 */
634#define NXA_REQ_TX_RINGS (1ULL << 0) /* 0x0000000000000001 */
635#define NXA_REQ_RX_RINGS (1ULL << 1) /* 0x0000000000000002 */
636#define NXA_REQ_TX_SLOTS (1ULL << 2) /* 0x0000000000000004 */
637#define NXA_REQ_RX_SLOTS (1ULL << 3) /* 0x0000000000000008 */
638#define NXA_REQ_BUF_SIZE (1ULL << 4) /* 0x0000000000000010 */
639#define NXA_REQ_META_SIZE (1ULL << 5) /* 0x0000000000000020 */
640#define NXA_REQ_STATS_SIZE (1ULL << 6) /* 0x0000000000000040 */
641#define NXA_REQ_ANONYMOUS (1ULL << 7) /* 0x0000000000000080 */
642#define NXA_REQ_PIPES (1ULL << 8) /* 0x0000000000000100 */
643#define NXA_REQ_EXTENSIONS (1ULL << 9) /* 0x0000000000000200 */
644#define NXA_REQ_MHINTS (1ULL << 10) /* 0x0000000000000400 */
645#define NXA_REQ_FLOWADV_MAX (1ULL << 11) /* 0x0000000000000800 */
646#define NXA_REQ_QMAP (1ULL << 12) /* 0x0000000000001000 */
647#define NXA_REQ_CHECKSUM_OFFLOAD (1ULL << 13) /* 0x0000000000002000 */
648#define NXA_REQ_USER_PACKET_POOL (1ULL << 14) /* 0x0000000000004000 */
649#define NXA_REQ_CAPABILITIES (1ULL << 15) /* 0x0000000000008000 */
650#define NXA_REQ_NEXUSADV_SIZE (1ULL << 16) /* 0x0000000000010000 */
651#define NXA_REQ_IFINDEX (1ULL << 17) /* 0x0000000000020000 */
652#define NXA_REQ_USER_CHANNEL (1ULL << 18) /* 0x0000000000040000 */
653#define NXA_REQ_MAX_FRAGS (1ULL << 19) /* 0x0000000000080000 */
654#define NXA_REQ_REJECT_ON_CLOSE (1ULL << 20) /* 0x0000000000100000 */
655#define NXA_REQ_LARGE_BUF_SIZE (1ULL << 21) /* 0x0000000000200000 */
656
657#ifndef KERNEL
658#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
659__BEGIN_DECLS
660/* system calls */
661extern int __nexus_open(struct nxctl_init *init, const uint32_t init_len);
662extern int __nexus_register(int ctl, struct nxprov_reg *reg,
663 const uint32_t reg_len, uuid_t *prov_uuid, const uint32_t prov_uuid_len);
664extern int __nexus_deregister(int ctl, const uuid_t prov_uuid,
665 const uint32_t prov_uuid_len);
666extern int __nexus_create(int ctl, const uuid_t prov_uuid,
667 const uint32_t prov_uuid_len, uuid_t *nx_uuid, const uint32_t nx_uuid_len);
668extern int __nexus_destroy(int ctl, const uuid_t nx_uuid,
669 const uint32_t nx_uuid_len);
670extern int __nexus_get_opt(int ctl, const uint32_t opt, void *aoptval,
671 uint32_t *aoptlen);
672extern int __nexus_set_opt(int ctl, const uint32_t opt, const void *aoptval,
673 const uint32_t optlen);
674
675/* private nexus controller APIs */
676extern int __os_nexus_ifattach(const nexus_controller_t ctl,
677 const uuid_t nx_uuid, const char *ifname, const uuid_t netif_uuid,
678 boolean_t host, uuid_t *nx_if_uuid);
679extern int __os_nexus_ifdetach(const nexus_controller_t ctl,
680 const uuid_t nx_uuid, const uuid_t nx_if_uuid);
681
682/* private flow APIs */
683extern int __os_nexus_flow_add(const nexus_controller_t ncd,
684 const uuid_t nx_uuid, const struct nx_flow_req *nfr);
685extern int __os_nexus_flow_del(const nexus_controller_t ncd,
686 const uuid_t nx_uuid, const struct nx_flow_req *nfr);
687extern int __os_nexus_get_llink_info(const nexus_controller_t ncd,
688 const uuid_t nx_uuid, const struct nx_llink_info_req *nlir, size_t len);
689extern int os_nexus_flow_set_wake_from_sleep(const uuid_t nx_uuid,
690 const uuid_t flow_uuid, bool enable);
691
692__END_DECLS
693#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
694#endif /* !KERNEL */
695#if defined(LIBSYSCALL_INTERFACE) || defined(BSD_KERNEL_PRIVATE)
696#include <skywalk/nexus_common.h>
697#include <skywalk/nexus_ioctl.h>
698#endif /* LIBSYSCALL_INTERFACE || BSD_KERNEL_PRIVATE */
699#endif /* PRIVATE || BSD_KERNEL_PRIVATE */
700#endif /* !_SKYWALK_OS_NEXUS_PRIVATE_H_ */
701