1 | /* |
2 | * Copyright (c) 2015-2022 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #ifndef _SKYWALK_OS_NEXUS_PRIVATE_H_ |
30 | #define _SKYWALK_OS_NEXUS_PRIVATE_H_ |
31 | |
32 | #if defined(PRIVATE) || defined(BSD_KERNEL_PRIVATE) |
33 | #include <stdbool.h> |
34 | #include <sys/guarded.h> |
35 | #include <skywalk/os_channel.h> |
36 | #include <skywalk/os_nexus.h> |
37 | #include <netinet/in.h> |
38 | #include <netinet/in_private.h> |
39 | #include <netinet/tcp.h> |
40 | #include <netinet/tcp_private.h> |
41 | #include <net/ethernet.h> |
42 | |
43 | /* |
44 | * Ephemeral port, for NEXUSDOMCAPF_EPHEMERAL capable nexus. |
45 | */ |
46 | #define NEXUS_PORT_ANY ((nexus_port_t)-1) |
47 | #define NEXUS_PORT_MAX ((nexus_port_t)-1) |
48 | |
49 | typedef nexus_port_t nexus_port_size_t; |
50 | |
51 | #define NEXUSCTL_INIT_VERSION_1 1 |
52 | #define NEXUSCTL_INIT_CURRENT_VERSION NEXUSCTL_INIT_VERSION_1 |
53 | |
54 | /* |
55 | * Nexus controller init parameters. |
56 | */ |
57 | struct nxctl_init { |
58 | uint32_t ni_version; /* in: NEXUSCTL_INIT_CURRENT_VERSION */ |
59 | uint32_t __ni_align; /* reserved */ |
60 | guardid_t ni_guard; /* out: guard ID */ |
61 | }; |
62 | |
63 | /* |
64 | * Nexus metadata type. |
65 | * |
66 | * Be mindful that due to the use of tagged pointers for packets, this |
67 | * type gets encoded along with the subtype, with the requirement that the |
68 | * object addresses are aligned on 64-byte boundary at the minimum. That |
69 | * leaves a total of 4 bits: 2 for type and another 2 for subtype, therefore |
70 | * limiting the maximum enum value to 3. |
71 | */ |
72 | typedef enum { |
73 | NEXUS_META_TYPE_INVALID = 0, /* invalid type */ |
74 | NEXUS_META_TYPE_QUANTUM, /* struct __quantum */ |
75 | NEXUS_META_TYPE_PACKET, /* struct __packet */ |
76 | NEXUS_META_TYPE_RESERVED, /* for future */ |
77 | NEXUS_META_TYPE_MAX = NEXUS_META_TYPE_RESERVED |
78 | } nexus_meta_type_t; |
79 | |
80 | typedef enum { |
81 | NEXUS_META_SUBTYPE_INVALID = 0, /* invalid subtype */ |
82 | NEXUS_META_SUBTYPE_PAYLOAD, /* normal payload mode */ |
83 | NEXUS_META_SUBTYPE_RAW, /* raw (link layer) mode */ |
84 | NEXUS_META_SUBTYPE_RESERVED, /* for future */ |
85 | NEXUS_META_SUBTYPE_MAX = NEXUS_META_SUBTYPE_RESERVED |
86 | } nexus_meta_subtype_t; |
87 | |
88 | /* |
89 | * Nexus provider parameters. |
90 | */ |
91 | struct nxprov_params { |
92 | nexus_name_t nxp_name; /* name */ |
93 | uint32_t nxp_namelen; /* length of name */ |
94 | nexus_type_t nxp_type; /* NEXUS_TYPE_* */ |
95 | nexus_meta_type_t nxp_md_type; /* NEXUS_META_TYPE_* */ |
96 | nexus_meta_subtype_t nxp_md_subtype; /* NEXUS_META_SUBTYPE_* */ |
97 | uint32_t nxp_flags; /* NXPF_* */ |
98 | uint32_t nxp_format; /* provider-defined */ |
99 | uint32_t nxp_tx_rings; /* # of channel transmit rings */ |
100 | uint32_t nxp_rx_rings; /* # of channel receive rings */ |
101 | uint32_t nxp_tx_slots; /* # of slots per channel TX ring */ |
102 | uint32_t nxp_rx_slots; /* # of slots per channel RX ring */ |
103 | uint32_t nxp_buf_size; /* size of each buffer */ |
104 | uint32_t nxp_meta_size; /* size of metadata per slot */ |
105 | uint32_t nxp_stats_size; /* size of statistics region */ |
106 | uint32_t nxp_pipes; /* number of pipes */ |
107 | nexus_extension_t nxp_extensions; /* extension specific parameter(s) */ |
108 | uint32_t nxp_mhints; /* memory usage hints */ |
109 | uint32_t nxp_ifindex; /* network interface index */ |
110 | uint32_t nxp_flowadv_max; /* max flow advisory entries */ |
111 | nexus_qmap_type_t nxp_qmap; /* queue mapping type */ |
112 | uint32_t nxp_capabilities; /* nexus capabilities */ |
113 | uint32_t nxp_nexusadv_size; /* nexus advisory region size */ |
114 | uint32_t nxp_max_frags; /* max fragments per packet */ |
115 | /* |
116 | * reject channel operations if the peer has closed the channel. |
117 | * Only valid for user-pipe nexus. |
118 | */ |
119 | boolean_t nxp_reject_on_close; |
120 | uint32_t nxp_large_buf_size; /* size of large buffer */ |
121 | } __attribute__((aligned(64))); |
122 | |
123 | /* valid values for nxp_flags */ |
124 | #define NXPF_ANONYMOUS 0x1 /* allow anonymous channel clients */ |
125 | #define NXPF_USER_CHANNEL 0x2 /* allow user channel open */ |
126 | #define NXPF_NETIF_LLINK 0x4 /* use netif logical link */ |
127 | #ifdef KERNEL |
128 | #define NXPF_MASK (NXPF_ANONYMOUS | NXPF_USER_CHANNEL | NXPF_NETIF_LLINK) |
129 | #endif /* KERNEL */ |
130 | |
131 | #define NXPF_BITS \ |
132 | "\020\01ANONYMOUS\02USER_CHANNEL" |
133 | |
134 | /* valid values for nxp_capabilities */ |
135 | #define NXPCAP_CHECKSUM_PARTIAL 0x1 /* partial checksum */ |
136 | #define NXPCAP_USER_PACKET_POOL 0x2 /* user packet pool */ |
137 | #define NXPCAP_USER_CHANNEL 0x4 /* allow user channel access */ |
138 | |
139 | #define NXPCAP_BITS \ |
140 | "\020\01CHECKSUM_PARTIAL\02USER_PKT_POOL\03USER_CHANNEL" |
141 | |
142 | #define NXPROV_REG_VERSION_1 1 |
143 | #define NXPROV_REG_CURRENT_VERSION NXPROV_REG_VERSION_1 |
144 | |
145 | /* |
146 | * Nexus provider registration parameters. |
147 | */ |
148 | struct nxprov_reg { |
149 | uint32_t nxpreg_version; /* NXPROV_REG_CURRENT_VERSION */ |
150 | uint32_t nxpreg_requested; /* customized attributes */ |
151 | struct nxprov_params nxpreg_params; /* Nexus provider parameters */ |
152 | }; |
153 | |
154 | /* |
155 | * Flags for nxpreq_requested; keep in sync with NXA_REQ_* flags. |
156 | * Note that these are 32-bit, whereas nxa_requested is 64-bit |
157 | * wide; for now this won't matter. |
158 | */ |
159 | #define NXPREQ_TX_RINGS (1U << 0) /* 0x00000001 */ |
160 | #define NXPREQ_RX_RINGS (1U << 1) /* 0x00000002 */ |
161 | #define NXPREQ_TX_SLOTS (1U << 2) /* 0x00000004 */ |
162 | #define NXPREQ_RX_SLOTS (1U << 3) /* 0x00000008 */ |
163 | #define NXPREQ_BUF_SIZE (1U << 4) /* 0x00000010 */ |
164 | #define NXPREQ_META_SIZE (1U << 5) /* 0x00000020 */ |
165 | #define NXPREQ_STATS_SIZE (1U << 6) /* 0x00000040 */ |
166 | #define NXPREQ_ANONYMOUS (1U << 7) /* 0x00000080 */ |
167 | #define NXPREQ_PIPES (1U << 8) /* 0x00000100 */ |
168 | #define NXPREQ_EXTENSIONS (1U << 9) /* 0x00000200 */ |
169 | #define NXPREQ_MHINTS (1U << 10) /* 0x00000400 */ |
170 | #define NXPREQ_FLOWADV_MAX (1U << 11) /* 0x00000800 */ |
171 | #define NXPREQ_QMAP (1U << 12) /* 0x00001000 */ |
172 | #define NXPREQ_CHECKSUM_OFFLOAD (1U << 13) /* 0x00002000 */ |
173 | #define NXPREQ_USER_PACKET_POOL (1U << 14) /* 0x00004000 */ |
174 | #define NXPREQ_CAPABILITIES (1U << 15) /* 0x00008000 */ |
175 | #define NXPREQ_NEXUSADV_SIZE (1U << 16) /* 0x00010000 */ |
176 | #define NXPREQ_IFINDEX (1U << 17) /* 0x00020000 */ |
177 | #define NXPREQ_USER_CHANNEL (1U << 18) /* 0x00040000 */ |
178 | #define NXPREQ_MAX_FRAGS (1U << 19) /* 0x00080000 */ |
179 | #define NXPREQ_REJECT_ON_CLOSE (1U << 20) /* 0x00100000 */ |
180 | #define NXPREQ_LARGE_BUF_SIZE (1U << 21) /* 0x00200000 */ |
181 | |
182 | #define NXPREQ_BITS \ |
183 | "\020\01TX_RINGS\02RX_RINGS\03TX_SLOTS\04RX_SLOTS\05BUF_SIZE" \ |
184 | "\06META_SIZE\07STATS_SIZE\010ANONYMOUS\011EXTRA_BUFS\012PIPES" \ |
185 | "\013EXTENSIONS\014MHINTS\015FLOWADV_MAX\016QMAP" \ |
186 | "\017CKSUM_OFFLOAD\020USER_PKT_POOL\021CAPABS\022NEXUSADV_SIZE" \ |
187 | "\023IFINDEX\024USER_CHANNEL\025MAX_FRAGS\026REJ_CLOSE\027LBUF_SIZE" |
188 | |
189 | /* |
190 | * Nexus provider registration entry. Also argument for NXOPT_NEXUS_PROV_ENTRY. |
191 | */ |
192 | struct nxprov_reg_ent { |
193 | uuid_t npre_prov_uuid; /* Nexus provider UUID */ |
194 | struct nxprov_params npre_prov_params; /* Nexus provider parameters */ |
195 | }; |
196 | |
197 | /* |
198 | * Nexus options. |
199 | */ |
200 | #define NXOPT_NEXUS_PROV_LIST 1 /* (get) list all provider UUIDS */ |
201 | #define NXOPT_NEXUS_PROV_ENTRY 2 /* (get) get params of a provider */ |
202 | #define NXOPT_NEXUS_LIST 20 /* (get) list all Nexus instances */ |
203 | #define NXOPT_NEXUS_BIND 21 /* (set) bind a Nexus port */ |
204 | #define NXOPT_NEXUS_UNBIND 22 /* (set) unbind a Nexus port */ |
205 | #define NXOPT_CHANNEL_LIST 30 /* (get) list all Channel instances */ |
206 | #define NXOPT_NEXUS_CONFIG 40 /* (set) nexus specific config */ |
207 | |
208 | /* |
209 | * Argument structure for NXOPT_NEXUS_PROV_LIST. |
210 | */ |
211 | struct nxprov_list_req { |
212 | uint32_t nrl_num_regs; /* array count */ |
213 | uint32_t __nrl_align; /* reserved */ |
214 | user_addr_t nrl_regs; /* array of nexus_reg_ent */ |
215 | }; |
216 | |
217 | /* |
218 | * Argument structure for NXOPT_NEXUS_LIST. |
219 | */ |
220 | struct nx_list_req { |
221 | uuid_t nl_prov_uuid; /* nexus provider UUID */ |
222 | uint32_t nl_num_nx_uuids; /* array count */ |
223 | uint32_t __nl_align; /* reserved */ |
224 | user_addr_t nl_nx_uuids; /* array of nexus UUIDs */ |
225 | }; |
226 | |
227 | /* |
228 | * Argument structure for NXOPT_NEXUS_BIND. |
229 | */ |
230 | struct nx_bind_req { |
231 | uuid_t nb_nx_uuid; /* nexus instance UUID */ |
232 | nexus_port_t nb_port; /* nexus instance port */ |
233 | uint32_t nb_flags; /* NBR_* match flags */ |
234 | uuid_t nb_exec_uuid; /* executable UUID */ |
235 | user_addr_t nb_key; /* key blob */ |
236 | uint32_t nb_key_len; /* key blob length */ |
237 | pid_t nb_pid; /* client PID */ |
238 | }; |
239 | |
240 | #define NBR_MATCH_PID 0x1 /* match against PID */ |
241 | #define NBR_MATCH_EXEC_UUID 0x2 /* match executable's UUID */ |
242 | #define NBR_MATCH_KEY 0x4 /* match key blob */ |
243 | #ifdef KERNEL |
244 | #define NBR_MATCH_MASK \ |
245 | (NBR_MATCH_PID | NBR_MATCH_EXEC_UUID | NBR_MATCH_KEY) |
246 | #endif /* KERNEL */ |
247 | |
248 | /* |
249 | * Argument structure for NXOPT_NEXUS_UNBIND. |
250 | */ |
251 | struct nx_unbind_req { |
252 | uuid_t nu_nx_uuid; /* nexus instance UUID */ |
253 | nexus_port_t nu_port; /* nexus instance port */ |
254 | }; |
255 | |
256 | /* |
257 | * Argument structure for NXOPT_CHANNEL_LIST. |
258 | */ |
259 | struct ch_list_req { |
260 | uuid_t cl_nx_uuid; /* nexus instance UUID */ |
261 | uint32_t cl_num_ch_uuids; /* array count */ |
262 | uint32_t __cl_align; /* reserved */ |
263 | user_addr_t cl_ch_uuids; /* array of channel UUIDs */ |
264 | }; |
265 | |
266 | /* |
267 | * Skywalk Nexus MIB |
268 | * |
269 | * We will use the name MIB now to refer to things that we expose to outside |
270 | * world for management/telemetry purpose. |
271 | * |
272 | * General rule of thumb of this MIB structure is to keep it simple. |
273 | * Try to avoid variable length field and hierarchical representation wherever |
274 | * possible. Simple retrieval would return either a single object (simple type |
275 | * or fixed length compound type) or an object array of same type. This makes |
276 | * parsing the retrieved information a lot easier. |
277 | * |
278 | * For now, we use sysctl as the way MIB interface is exposed. Additional |
279 | * interfaces could be syscall (e.g. via a nexus controller), etc. |
280 | */ |
281 | #define NXMIB_NETIF_STATS (((uint32_t)1) << 1) |
282 | #define NXMIB_FSW_STATS (((uint32_t)1) << 2) |
283 | #define NXMIB_FLOW (((uint32_t)1) << 3) |
284 | #define NXMIB_FLOW_ADV (((uint32_t)1) << 4) |
285 | #define NXMIB_FLOW_OWNER (((uint32_t)1) << 5) |
286 | #define NXMIB_FLOW_ROUTE (((uint32_t)1) << 6) |
287 | #define NXMIB_LLINK_LIST (((uint32_t)1) << 7) |
288 | #define NXMIB_NETIF_QUEUE_STATS (((uint32_t)1) << 8) |
289 | |
290 | #define NXMIB_QUIC_STATS (((uint32_t)1) << 27) |
291 | #define NXMIB_UDP_STATS (((uint32_t)1) << 28) |
292 | #define NXMIB_TCP_STATS (((uint32_t)1) << 29) |
293 | #define NXMIB_IP6_STATS (((uint32_t)1) << 30) |
294 | #define NXMIB_IP_STATS (((uint32_t)1) << 31) |
295 | |
296 | #define NXMIB_USERSTACK_STATS (NXMIB_IP_STATS | NXMIB_IP6_STATS \ |
297 | | NXMIB_TCP_STATS | NXMIB_UDP_STATS \ |
298 | | NXMIB_QUIC_STATS) |
299 | |
300 | #define NXMIB_FILTER_NX_UUID (((uint64_t)1) << 0) |
301 | #define NXMIB_FILTER_FLOW_ID (((uint64_t)1) << 1) |
302 | #define NXMIB_FILTER_PID (((uint64_t)1) << 2) |
303 | #define NXMIB_FILTER_INFO_TUPLE (((uint64_t)1) << 3) |
304 | |
305 | /* |
306 | * Nexus MIB filter: used to retrieve only those matching the filter value. |
307 | */ |
308 | struct nexus_mib_filter { |
309 | uint32_t nmf_type; /* MIB type */ |
310 | uint64_t nmf_bitmap; /* bitmap of following fields */ |
311 | |
312 | uuid_t nmf_nx_uuid; /* nexus instance uuid */ |
313 | uuid_t nmf_flow_id; /* flow rule id */ |
314 | pid_t nmf_pid; /* owner pid */ |
315 | struct info_tuple nmf_info_tuple; /* flow tuple */ |
316 | }; |
317 | |
318 | /* |
319 | * Nexus-specific config commands. |
320 | */ |
321 | typedef enum { |
322 | NXCFG_CMD_ATTACH = 0, /* attach an object to a nexus */ |
323 | NXCFG_CMD_DETACH = 1, /* detach an object from a nexus */ |
324 | NXCFG_CMD_FLOW_ADD = 20, /* add a flow to a nexus */ |
325 | NXCFG_CMD_FLOW_DEL = 21, /* delete a flow from nexus */ |
326 | NXCFG_CMD_FLOW_CONFIG = 22, /* configure a flow in nexus */ |
327 | NXCFG_CMD_NETEM = 30, /* config packet scheduler */ |
328 | NXCFG_CMD_GET_LLINK_INFO = 40, /* collect llink info */ |
329 | } nxcfg_cmd_t; |
330 | |
331 | #define NX_SPEC_IF_NAMELEN 64 |
332 | |
333 | /* |
334 | * Argument struture for NXOPT_NEXUS_CONFIG. |
335 | */ |
336 | struct nx_cfg_req { |
337 | uuid_t nc_nx_uuid; /* nexus instance UUID */ |
338 | nxcfg_cmd_t nc_cmd; /* NXCFG_CMD_* */ |
339 | uint32_t nc_req_len; /* size of request struct */ |
340 | user_addr_t nc_req; /* address of request struct */ |
341 | }; |
342 | |
343 | /* |
344 | * Argument structure for NXCFG_CMD_{ATTACH,DETACH} |
345 | */ |
346 | struct nx_spec_req { |
347 | union { |
348 | char nsru_name[NX_SPEC_IF_NAMELEN]; |
349 | uuid_t nsru_uuid; |
350 | #ifdef KERNEL |
351 | struct ifnet *nsru_ifp; |
352 | #endif /* KERNEL */ |
353 | } nsr_u __attribute__((aligned(sizeof(uint64_t)))); /* in */ |
354 | uint32_t nsr_flags; /* in */ |
355 | uuid_t nsr_if_uuid; /* attach: out, detach: in */ |
356 | }; |
357 | #define nsr_name nsr_u.nsru_name |
358 | #define nsr_uuid nsr_u.nsru_uuid |
359 | #ifdef KERNEL |
360 | #define nsr_ifp nsr_u.nsru_ifp |
361 | #endif /* KERNEL */ |
362 | |
363 | #define NXSPECREQ_UUID 0x1 /* nsr_name is uuid_t else ifname */ |
364 | #define NXSPECREQ_HOST 0x2 /* attach to host port */ |
365 | #ifdef KERNEL |
366 | /* mask off userland-settable bits */ |
367 | #define NXSPECREQ_MASK (NXSPECREQ_UUID | NXSPECREQ_HOST) |
368 | #define NXSPECREQ_IFP 0x1000 /* (embryonic) ifnet */ |
369 | #endif /* KERNEL */ |
370 | |
371 | /* |
372 | * Structure for flow demuxing for parent/child flows |
373 | */ |
374 | #define FLOW_DEMUX_MAX_LEN 32 |
375 | struct flow_demux_pattern { |
376 | uint16_t fdp_offset; |
377 | uint16_t fdp_len; |
378 | uint8_t fdp_mask[FLOW_DEMUX_MAX_LEN]; |
379 | uint8_t fdp_value[FLOW_DEMUX_MAX_LEN]; |
380 | }; |
381 | |
382 | #define MAX_FLOW_DEMUX_PATTERN 4 |
383 | |
384 | /* |
385 | * Argument structure for NXCFG_CMD_FLOW_{BIND,UNBIND} |
386 | */ |
387 | struct nx_flow_req { |
388 | nexus_port_t nfr_nx_port; |
389 | uint16_t nfr_ethertype; |
390 | ether_addr_t nfr_etheraddr; |
391 | union sockaddr_in_4_6 nfr_saddr; |
392 | union sockaddr_in_4_6 nfr_daddr; |
393 | uint8_t nfr_ip_protocol; |
394 | uint8_t nfr_transport_protocol; |
395 | uint16_t nfr_flags; |
396 | uuid_t nfr_flow_uuid; |
397 | packet_svc_class_t nfr_svc_class; |
398 | uuid_t nfr_euuid; |
399 | uint32_t nfr_policy_id; |
400 | uint32_t nfr_skip_policy_id; |
401 | pid_t nfr_epid; |
402 | flowadv_idx_t nfr_flowadv_idx; |
403 | uuid_t nfr_bind_key; |
404 | uint64_t nfr_qset_id; |
405 | uuid_t nfr_parent_flow_uuid; |
406 | uint8_t nfr_flow_demux_count; |
407 | struct flow_demux_pattern nfr_flow_demux_patterns[MAX_FLOW_DEMUX_PATTERN]; |
408 | // below is reserved kernel-only fields |
409 | union { |
410 | #ifdef KERNEL |
411 | struct { |
412 | char _nfr_kernel_field_start[0]; |
413 | void *nfr_context; |
414 | struct proc *nfr_proc; |
415 | struct ifnet *nfr_ifp; |
416 | struct flow_route *nfr_route; |
417 | struct ns_token *nfr_port_reservation; |
418 | struct protons_token *nfr_proto_reservation; |
419 | struct flow_stats *nfr_flow_stats; |
420 | pid_t nfr_pid; |
421 | uint32_t nfr_saddr_gencnt; |
422 | void *nfr_ipsec_reservation; |
423 | uint32_t nfr_inp_flowhash; |
424 | #if defined(__LP64__) |
425 | uint8_t _nfr_kernel_pad[4]; |
426 | #else /* !__LP64__ */ |
427 | uint8_t _nfr_kernel_pad[36]; |
428 | #endif /* !__LP64__ */ |
429 | char _nfr_kernel_field_end[0]; |
430 | }; |
431 | #endif /* KERNEL */ |
432 | struct { |
433 | uint8_t _nfr_opaque[80]; |
434 | /* should be at the same offset as _nfr_kernel_field_end above */ |
435 | char _nfr_common_field_end[0]; |
436 | }; |
437 | }; |
438 | }; |
439 | |
440 | /* valid flags for nfr_flags */ |
441 | #define NXFLOWREQF_TRACK 0x0001 /* enable state tracking */ |
442 | #define NXFLOWREQF_QOS_MARKING 0x0002 /* allow qos marking */ |
443 | #define NXFLOWREQF_FILTER 0x0004 /* interpose filter */ |
444 | #define NXFLOWREQF_CUSTOM_ETHER 0x0008 /* custom ethertype */ |
445 | #define NXFLOWREQF_IPV6_ULA 0x0010 /* ipv6 ula */ |
446 | #define NXFLOWREQF_LISTENER 0x0020 /* listener */ |
447 | #define NXFLOWREQF_OVERRIDE_ADDRESS_SELECTION 0x0040 /* override system address selection */ |
448 | #define NXFLOWREQF_USE_STABLE_ADDRESS 0x0080 /* if override local, use stable address */ |
449 | #define NXFLOWREQF_FLOWADV 0x0100 /* allocate flow advisory */ |
450 | #define NXFLOWREQF_ASIS 0x0200 /* create flow as is in nfr */ |
451 | #define NXFLOWREQF_LOW_LATENCY 0x0400 /* low latency flow */ |
452 | #define NXFLOWREQF_NOWAKEFROMSLEEP 0x0800 /* Don't wake for traffic to this flow */ |
453 | #define NXFLOWREQF_REUSEPORT 0x1000 /* Don't wake for traffic to this flow */ |
454 | #define NXFLOWREQF_PARENT 0x4000 /* Parent flow */ |
455 | |
456 | #define NXFLOWREQF_BITS \ |
457 | "\020\01TRACK\02QOS_MARKING\03FILTER\04CUSTOM_ETHER\05IPV6_ULA" \ |
458 | "\06LISTENER\07OVERRIDE_ADDRESS_SELECTION\010USE_STABLE_ADDRESS" \ |
459 | "\011ALLOC_FLOWADV\012ASIS\013LOW_LATENCY\014NOWAKEUPFROMSLEEP" \ |
460 | "\015REUSEPORT\017PARENT" |
461 | |
462 | struct flow_ip_addr { |
463 | union { |
464 | struct in_addr _v4; |
465 | struct in6_addr _v6; |
466 | uint8_t _addr8[16]; |
467 | uint16_t _addr16[8]; |
468 | uint32_t _addr32[4]; |
469 | uint64_t _addr64[2]; |
470 | }; |
471 | }; |
472 | |
473 | struct flow_key { |
474 | uint16_t fk_mask; |
475 | uint8_t fk_ipver; |
476 | uint8_t fk_proto; |
477 | uint16_t fk_sport; |
478 | uint16_t fk_dport; |
479 | struct flow_ip_addr fk_src; |
480 | struct flow_ip_addr fk_dst; |
481 | uint64_t fk_pad[1]; /* pad to 48 bytes */ |
482 | } __attribute__((__aligned__(16))); |
483 | |
484 | #define fk_src4 fk_src._v4 |
485 | #define fk_dst4 fk_dst._v4 |
486 | #define fk_src6 fk_src._v6 |
487 | #define fk_dst6 fk_dst._v6 |
488 | |
489 | #define FLOW_KEY_LEN sizeof(struct flow_key) |
490 | #define FK_HASH_SEED 0xabcd |
491 | |
492 | #define FKMASK_IPVER (((uint16_t)1) << 0) |
493 | #define FKMASK_PROTO (((uint16_t)1) << 1) |
494 | #define FKMASK_SRC (((uint16_t)1) << 2) |
495 | #define FKMASK_SPORT (((uint16_t)1) << 3) |
496 | #define FKMASK_DST (((uint16_t)1) << 4) |
497 | #define FKMASK_DPORT (((uint16_t)1) << 5) |
498 | |
499 | #define FKMASK_2TUPLE (FKMASK_PROTO | FKMASK_SPORT) |
500 | #define FKMASK_3TUPLE (FKMASK_2TUPLE | FKMASK_IPVER | FKMASK_SRC) |
501 | #define FKMASK_4TUPLE (FKMASK_3TUPLE | FKMASK_DPORT) |
502 | #define FKMASK_5TUPLE (FKMASK_4TUPLE | FKMASK_DST) |
503 | #define FKMASK_IPFLOW1 FKMASK_PROTO |
504 | #define FKMASK_IPFLOW2 (FKMASK_IPFLOW1 | FKMASK_IPVER | FKMASK_SRC) |
505 | #define FKMASK_IPFLOW3 (FKMASK_IPFLOW2 | FKMASK_DST) |
506 | #define FKMASK_IDX_MAX 7 |
507 | |
508 | extern const struct flow_key fk_mask_2tuple; |
509 | extern const struct flow_key fk_mask_3tuple; |
510 | extern const struct flow_key fk_mask_4tuple; |
511 | extern const struct flow_key fk_mask_5tuple; |
512 | extern const struct flow_key fk_mask_ipflow1; |
513 | extern const struct flow_key fk_mask_ipflow2; |
514 | extern const struct flow_key fk_mask_ipflow3; |
515 | |
516 | #define FLOW_KEY_CLEAR(_fk) do { \ |
517 | _CASSERT(FLOW_KEY_LEN == 48); \ |
518 | _CASSERT(FLOW_KEY_LEN == sizeof(struct flow_key)); \ |
519 | sk_zero_48(_fk); \ |
520 | } while (0) |
521 | |
522 | #ifdef KERNEL |
523 | /* mask off userland-settable bits */ |
524 | #define NXFLOWREQF_MASK \ |
525 | (NXFLOWREQF_TRACK | NXFLOWREQF_QOS_MARKING | NXFLOWREQF_FILTER | \ |
526 | NXFLOWREQF_CUSTOM_ETHER | NXFLOWREQF_IPV6_ULA | NXFLOWREQF_LISTENER | \ |
527 | NXFLOWREQF_OVERRIDE_ADDRESS_SELECTION | NXFLOWREQF_USE_STABLE_ADDRESS | \ |
528 | NXFLOWREQF_FLOWADV | NXFLOWREQF_LOW_LATENCY | NXFLOWREQF_NOWAKEFROMSLEEP | \ |
529 | NXFLOWREQF_REUSEPORT | NXFLOWREQF_PARENT) |
530 | |
531 | #define NXFLOWREQF_EXT_PORT_RSV 0x1000 /* external port reservation */ |
532 | #define NXFLOWREQF_EXT_PROTO_RSV 0x2000 /* external proto reservation */ |
533 | |
534 | static inline void |
535 | nx_flow_req_internalize(struct nx_flow_req *req) |
536 | { |
537 | _CASSERT(offsetof(struct nx_flow_req, _nfr_kernel_field_end) == |
538 | offsetof(struct nx_flow_req, _nfr_common_field_end)); |
539 | |
540 | /* init kernel only fields */ |
541 | bzero(s: &req->_nfr_opaque, n: sizeof(req->_nfr_opaque)); |
542 | req->nfr_flags &= NXFLOWREQF_MASK; |
543 | req->nfr_context = NULL; |
544 | req->nfr_flow_stats = NULL; |
545 | req->nfr_port_reservation = NULL; |
546 | } |
547 | |
548 | static inline void |
549 | nx_flow_req_externalize(struct nx_flow_req *req) |
550 | { |
551 | /* neutralize kernel only fields */ |
552 | bzero(s: &req->_nfr_opaque, n: sizeof(req->_nfr_opaque)); |
553 | req->nfr_flags &= NXFLOWREQF_MASK; |
554 | } |
555 | #endif /* KERNEL */ |
556 | |
557 | struct nx_qset_info { |
558 | uint64_t nqi_id; |
559 | uint16_t nqi_flags; |
560 | uint8_t nqi_num_rx_queues; |
561 | uint8_t nqi_num_tx_queues; |
562 | }; |
563 | |
564 | #define NETIF_LLINK_MAX_QSETS 256 |
565 | struct nx_llink_info { |
566 | uuid_t nli_netif_uuid; /* nexus netif instance uuid */ |
567 | uint64_t nli_link_id; |
568 | uint16_t nli_link_id_internal; |
569 | uint8_t nli_state; |
570 | uint8_t nli_flags; |
571 | uint16_t nli_qset_cnt; |
572 | struct nx_qset_info nli_qset[NETIF_LLINK_MAX_QSETS]; |
573 | }; |
574 | |
575 | #define NETIF_LLINK_INFO_VERSION 0x01 |
576 | struct nx_llink_info_req { |
577 | uint16_t nlir_version; |
578 | uint16_t nlir_llink_cnt; |
579 | struct nx_llink_info nlir_llink[0]; |
580 | }; |
581 | |
582 | /* |
583 | * Nexus controller descriptor. |
584 | */ |
585 | struct nexus_controller { |
586 | #ifndef KERNEL |
587 | int ncd_fd; |
588 | guardid_t ncd_guard; |
589 | #else /* KERNEL */ |
590 | struct nxctl *ncd_nxctl; |
591 | #endif /* KERNEL */ |
592 | }; |
593 | |
594 | /* For nexus ops without having to create a nexus controller */ |
595 | #define __OS_NEXUS_SHARED_USER_CONTROLLER_FD (-1) |
596 | |
597 | /* |
598 | * Nexus attributes. |
599 | */ |
600 | struct nexus_attr { |
601 | uint64_t nxa_requested; /* customized attributes */ |
602 | uint64_t nxa_tx_rings; /* # of channel transmit rings */ |
603 | uint64_t nxa_rx_rings; /* # of channel receive rings */ |
604 | uint64_t nxa_tx_slots; /* # of slots per channel TX ring */ |
605 | uint64_t nxa_rx_slots; /* # of slots per channel RX ring */ |
606 | uint64_t nxa_buf_size; /* size of each buffer */ |
607 | uint64_t nxa_meta_size; /* size of metadata per buffer */ |
608 | uint64_t nxa_stats_size; /* size of statistics region */ |
609 | uint64_t nxa_anonymous; /* bool: allow anonymous clients */ |
610 | uint64_t nxa_pipes; /* number of pipes */ |
611 | uint64_t nxa_extensions; /* extension-specific attribute */ |
612 | uint64_t nxa_mhints; /* memory usage hints */ |
613 | uint64_t nxa_ifindex; /* network interface index */ |
614 | uint64_t nxa_flowadv_max; /* max flow advisory entries */ |
615 | uint64_t nxa_qmap; /* queue mapping type */ |
616 | uint64_t nxa_checksum_offload; /* partial checksum offload */ |
617 | uint64_t nxa_user_packet_pool; /* user packet pool */ |
618 | uint64_t nxa_nexusadv_size; /* size of advisory region */ |
619 | uint64_t nxa_user_channel; /* user channel open allowed */ |
620 | uint64_t nxa_max_frags; /* max fragments per packet */ |
621 | /* |
622 | * reject channel operations if the nexus peer has closed the channel. |
623 | * valid only for user-pipe nexus. |
624 | */ |
625 | uint64_t nxa_reject_on_close; |
626 | uint64_t nxa_large_buf_size; /* size of large buffer */ |
627 | }; |
628 | |
629 | /* |
630 | * Flags for nxa_requested; keep in sync with NXPREQ_* flags. |
631 | * Note that these are 64-bit, whereas nxpreq_requested is |
632 | * 32-bit wide; for not this won't matter. |
633 | */ |
634 | #define NXA_REQ_TX_RINGS (1ULL << 0) /* 0x0000000000000001 */ |
635 | #define NXA_REQ_RX_RINGS (1ULL << 1) /* 0x0000000000000002 */ |
636 | #define NXA_REQ_TX_SLOTS (1ULL << 2) /* 0x0000000000000004 */ |
637 | #define NXA_REQ_RX_SLOTS (1ULL << 3) /* 0x0000000000000008 */ |
638 | #define NXA_REQ_BUF_SIZE (1ULL << 4) /* 0x0000000000000010 */ |
639 | #define NXA_REQ_META_SIZE (1ULL << 5) /* 0x0000000000000020 */ |
640 | #define NXA_REQ_STATS_SIZE (1ULL << 6) /* 0x0000000000000040 */ |
641 | #define NXA_REQ_ANONYMOUS (1ULL << 7) /* 0x0000000000000080 */ |
642 | #define NXA_REQ_PIPES (1ULL << 8) /* 0x0000000000000100 */ |
643 | #define NXA_REQ_EXTENSIONS (1ULL << 9) /* 0x0000000000000200 */ |
644 | #define NXA_REQ_MHINTS (1ULL << 10) /* 0x0000000000000400 */ |
645 | #define NXA_REQ_FLOWADV_MAX (1ULL << 11) /* 0x0000000000000800 */ |
646 | #define NXA_REQ_QMAP (1ULL << 12) /* 0x0000000000001000 */ |
647 | #define NXA_REQ_CHECKSUM_OFFLOAD (1ULL << 13) /* 0x0000000000002000 */ |
648 | #define NXA_REQ_USER_PACKET_POOL (1ULL << 14) /* 0x0000000000004000 */ |
649 | #define NXA_REQ_CAPABILITIES (1ULL << 15) /* 0x0000000000008000 */ |
650 | #define NXA_REQ_NEXUSADV_SIZE (1ULL << 16) /* 0x0000000000010000 */ |
651 | #define NXA_REQ_IFINDEX (1ULL << 17) /* 0x0000000000020000 */ |
652 | #define NXA_REQ_USER_CHANNEL (1ULL << 18) /* 0x0000000000040000 */ |
653 | #define NXA_REQ_MAX_FRAGS (1ULL << 19) /* 0x0000000000080000 */ |
654 | #define NXA_REQ_REJECT_ON_CLOSE (1ULL << 20) /* 0x0000000000100000 */ |
655 | #define NXA_REQ_LARGE_BUF_SIZE (1ULL << 21) /* 0x0000000000200000 */ |
656 | |
657 | #ifndef KERNEL |
658 | #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) |
659 | __BEGIN_DECLS |
660 | /* system calls */ |
661 | extern int __nexus_open(struct nxctl_init *init, const uint32_t init_len); |
662 | extern int __nexus_register(int ctl, struct nxprov_reg *reg, |
663 | const uint32_t reg_len, uuid_t *prov_uuid, const uint32_t prov_uuid_len); |
664 | extern int __nexus_deregister(int ctl, const uuid_t prov_uuid, |
665 | const uint32_t prov_uuid_len); |
666 | extern int __nexus_create(int ctl, const uuid_t prov_uuid, |
667 | const uint32_t prov_uuid_len, uuid_t *nx_uuid, const uint32_t nx_uuid_len); |
668 | extern int __nexus_destroy(int ctl, const uuid_t nx_uuid, |
669 | const uint32_t nx_uuid_len); |
670 | extern int __nexus_get_opt(int ctl, const uint32_t opt, void *aoptval, |
671 | uint32_t *aoptlen); |
672 | extern int __nexus_set_opt(int ctl, const uint32_t opt, const void *aoptval, |
673 | const uint32_t optlen); |
674 | |
675 | /* private nexus controller APIs */ |
676 | extern int __os_nexus_ifattach(const nexus_controller_t ctl, |
677 | const uuid_t nx_uuid, const char *ifname, const uuid_t netif_uuid, |
678 | boolean_t host, uuid_t *nx_if_uuid); |
679 | extern int __os_nexus_ifdetach(const nexus_controller_t ctl, |
680 | const uuid_t nx_uuid, const uuid_t nx_if_uuid); |
681 | |
682 | /* private flow APIs */ |
683 | extern int __os_nexus_flow_add(const nexus_controller_t ncd, |
684 | const uuid_t nx_uuid, const struct nx_flow_req *nfr); |
685 | extern int __os_nexus_flow_del(const nexus_controller_t ncd, |
686 | const uuid_t nx_uuid, const struct nx_flow_req *nfr); |
687 | extern int __os_nexus_get_llink_info(const nexus_controller_t ncd, |
688 | const uuid_t nx_uuid, const struct nx_llink_info_req *nlir, size_t len); |
689 | extern int os_nexus_flow_set_wake_from_sleep(const uuid_t nx_uuid, |
690 | const uuid_t flow_uuid, bool enable); |
691 | |
692 | __END_DECLS |
693 | #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ |
694 | #endif /* !KERNEL */ |
695 | #if defined(LIBSYSCALL_INTERFACE) || defined(BSD_KERNEL_PRIVATE) |
696 | #include <skywalk/nexus_common.h> |
697 | #include <skywalk/nexus_ioctl.h> |
698 | #endif /* LIBSYSCALL_INTERFACE || BSD_KERNEL_PRIVATE */ |
699 | #endif /* PRIVATE || BSD_KERNEL_PRIVATE */ |
700 | #endif /* !_SKYWALK_OS_NEXUS_PRIVATE_H_ */ |
701 | |