| 1 | /* |
| 2 | * Copyright (c) 2015-2022 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | #ifndef _SKYWALK_OS_NEXUS_PRIVATE_H_ |
| 30 | #define _SKYWALK_OS_NEXUS_PRIVATE_H_ |
| 31 | |
| 32 | #if defined(PRIVATE) || defined(BSD_KERNEL_PRIVATE) |
| 33 | #include <stdbool.h> |
| 34 | #include <sys/guarded.h> |
| 35 | #include <skywalk/os_channel.h> |
| 36 | #include <skywalk/os_nexus.h> |
| 37 | #include <netinet/in.h> |
| 38 | #include <netinet/in_private.h> |
| 39 | #include <netinet/tcp.h> |
| 40 | #include <netinet/tcp_private.h> |
| 41 | #include <net/ethernet.h> |
| 42 | |
| 43 | /* |
| 44 | * Ephemeral port, for NEXUSDOMCAPF_EPHEMERAL capable nexus. |
| 45 | */ |
| 46 | #define NEXUS_PORT_ANY ((nexus_port_t)-1) |
| 47 | #define NEXUS_PORT_MAX ((nexus_port_t)-1) |
| 48 | |
| 49 | typedef nexus_port_t nexus_port_size_t; |
| 50 | |
| 51 | #define NEXUSCTL_INIT_VERSION_1 1 |
| 52 | #define NEXUSCTL_INIT_CURRENT_VERSION NEXUSCTL_INIT_VERSION_1 |
| 53 | |
| 54 | /* |
| 55 | * Nexus controller init parameters. |
| 56 | */ |
| 57 | struct nxctl_init { |
| 58 | uint32_t ni_version; /* in: NEXUSCTL_INIT_CURRENT_VERSION */ |
| 59 | uint32_t __ni_align; /* reserved */ |
| 60 | guardid_t ni_guard; /* out: guard ID */ |
| 61 | }; |
| 62 | |
| 63 | /* |
| 64 | * Nexus metadata type. |
| 65 | * |
| 66 | * Be mindful that due to the use of tagged pointers for packets, this |
| 67 | * type gets encoded along with the subtype, with the requirement that the |
| 68 | * object addresses are aligned on 64-byte boundary at the minimum. That |
| 69 | * leaves a total of 4 bits: 2 for type and another 2 for subtype, therefore |
| 70 | * limiting the maximum enum value to 3. |
| 71 | */ |
| 72 | typedef enum { |
| 73 | NEXUS_META_TYPE_INVALID = 0, /* invalid type */ |
| 74 | NEXUS_META_TYPE_QUANTUM, /* struct __quantum */ |
| 75 | NEXUS_META_TYPE_PACKET, /* struct __packet */ |
| 76 | NEXUS_META_TYPE_RESERVED, /* for future */ |
| 77 | NEXUS_META_TYPE_MAX = NEXUS_META_TYPE_RESERVED |
| 78 | } nexus_meta_type_t; |
| 79 | |
| 80 | typedef enum { |
| 81 | NEXUS_META_SUBTYPE_INVALID = 0, /* invalid subtype */ |
| 82 | NEXUS_META_SUBTYPE_PAYLOAD, /* normal payload mode */ |
| 83 | NEXUS_META_SUBTYPE_RAW, /* raw (link layer) mode */ |
| 84 | NEXUS_META_SUBTYPE_RESERVED, /* for future */ |
| 85 | NEXUS_META_SUBTYPE_MAX = NEXUS_META_SUBTYPE_RESERVED |
| 86 | } nexus_meta_subtype_t; |
| 87 | |
| 88 | /* |
| 89 | * Nexus provider parameters. |
| 90 | */ |
| 91 | struct nxprov_params { |
| 92 | nexus_name_t nxp_name; /* name */ |
| 93 | uint32_t nxp_namelen; /* length of name */ |
| 94 | nexus_type_t nxp_type; /* NEXUS_TYPE_* */ |
| 95 | nexus_meta_type_t nxp_md_type; /* NEXUS_META_TYPE_* */ |
| 96 | nexus_meta_subtype_t nxp_md_subtype; /* NEXUS_META_SUBTYPE_* */ |
| 97 | uint32_t nxp_flags; /* NXPF_* */ |
| 98 | uint32_t nxp_format; /* provider-defined */ |
| 99 | uint32_t nxp_tx_rings; /* # of channel transmit rings */ |
| 100 | uint32_t nxp_rx_rings; /* # of channel receive rings */ |
| 101 | uint32_t nxp_tx_slots; /* # of slots per channel TX ring */ |
| 102 | uint32_t nxp_rx_slots; /* # of slots per channel RX ring */ |
| 103 | uint32_t nxp_buf_size; /* size of each buffer */ |
| 104 | uint32_t nxp_meta_size; /* size of metadata per slot */ |
| 105 | uint32_t nxp_stats_size; /* size of statistics region */ |
| 106 | uint32_t nxp_pipes; /* number of pipes */ |
| 107 | nexus_extension_t nxp_extensions; /* extension specific parameter(s) */ |
| 108 | uint32_t nxp_mhints; /* memory usage hints */ |
| 109 | uint32_t nxp_ifindex; /* network interface index */ |
| 110 | uint32_t nxp_flowadv_max; /* max flow advisory entries */ |
| 111 | nexus_qmap_type_t nxp_qmap; /* queue mapping type */ |
| 112 | uint32_t nxp_capabilities; /* nexus capabilities */ |
| 113 | uint32_t nxp_nexusadv_size; /* nexus advisory region size */ |
| 114 | uint32_t nxp_max_frags; /* max fragments per packet */ |
| 115 | /* |
| 116 | * reject channel operations if the peer has closed the channel. |
| 117 | * Only valid for user-pipe nexus. |
| 118 | */ |
| 119 | boolean_t nxp_reject_on_close; |
| 120 | uint32_t nxp_large_buf_size; /* size of large buffer */ |
| 121 | } __attribute__((aligned(64))); |
| 122 | |
| 123 | /* valid values for nxp_flags */ |
| 124 | #define NXPF_ANONYMOUS 0x1 /* allow anonymous channel clients */ |
| 125 | #define NXPF_USER_CHANNEL 0x2 /* allow user channel open */ |
| 126 | #define NXPF_NETIF_LLINK 0x4 /* use netif logical link */ |
| 127 | #ifdef KERNEL |
| 128 | #define NXPF_MASK (NXPF_ANONYMOUS | NXPF_USER_CHANNEL | NXPF_NETIF_LLINK) |
| 129 | #endif /* KERNEL */ |
| 130 | |
| 131 | #define NXPF_BITS \ |
| 132 | "\020\01ANONYMOUS\02USER_CHANNEL" |
| 133 | |
| 134 | /* valid values for nxp_capabilities */ |
| 135 | #define NXPCAP_CHECKSUM_PARTIAL 0x1 /* partial checksum */ |
| 136 | #define NXPCAP_USER_PACKET_POOL 0x2 /* user packet pool */ |
| 137 | #define NXPCAP_USER_CHANNEL 0x4 /* allow user channel access */ |
| 138 | |
| 139 | #define NXPCAP_BITS \ |
| 140 | "\020\01CHECKSUM_PARTIAL\02USER_PKT_POOL\03USER_CHANNEL" |
| 141 | |
| 142 | #define NXPROV_REG_VERSION_1 1 |
| 143 | #define NXPROV_REG_CURRENT_VERSION NXPROV_REG_VERSION_1 |
| 144 | |
| 145 | /* |
| 146 | * Nexus provider registration parameters. |
| 147 | */ |
| 148 | struct nxprov_reg { |
| 149 | uint32_t nxpreg_version; /* NXPROV_REG_CURRENT_VERSION */ |
| 150 | uint32_t nxpreg_requested; /* customized attributes */ |
| 151 | struct nxprov_params nxpreg_params; /* Nexus provider parameters */ |
| 152 | }; |
| 153 | |
| 154 | /* |
| 155 | * Flags for nxpreq_requested; keep in sync with NXA_REQ_* flags. |
| 156 | * Note that these are 32-bit, whereas nxa_requested is 64-bit |
| 157 | * wide; for now this won't matter. |
| 158 | */ |
| 159 | #define NXPREQ_TX_RINGS (1U << 0) /* 0x00000001 */ |
| 160 | #define NXPREQ_RX_RINGS (1U << 1) /* 0x00000002 */ |
| 161 | #define NXPREQ_TX_SLOTS (1U << 2) /* 0x00000004 */ |
| 162 | #define NXPREQ_RX_SLOTS (1U << 3) /* 0x00000008 */ |
| 163 | #define NXPREQ_BUF_SIZE (1U << 4) /* 0x00000010 */ |
| 164 | #define NXPREQ_META_SIZE (1U << 5) /* 0x00000020 */ |
| 165 | #define NXPREQ_STATS_SIZE (1U << 6) /* 0x00000040 */ |
| 166 | #define NXPREQ_ANONYMOUS (1U << 7) /* 0x00000080 */ |
| 167 | #define NXPREQ_PIPES (1U << 8) /* 0x00000100 */ |
| 168 | #define NXPREQ_EXTENSIONS (1U << 9) /* 0x00000200 */ |
| 169 | #define NXPREQ_MHINTS (1U << 10) /* 0x00000400 */ |
| 170 | #define NXPREQ_FLOWADV_MAX (1U << 11) /* 0x00000800 */ |
| 171 | #define NXPREQ_QMAP (1U << 12) /* 0x00001000 */ |
| 172 | #define NXPREQ_CHECKSUM_OFFLOAD (1U << 13) /* 0x00002000 */ |
| 173 | #define NXPREQ_USER_PACKET_POOL (1U << 14) /* 0x00004000 */ |
| 174 | #define NXPREQ_CAPABILITIES (1U << 15) /* 0x00008000 */ |
| 175 | #define NXPREQ_NEXUSADV_SIZE (1U << 16) /* 0x00010000 */ |
| 176 | #define NXPREQ_IFINDEX (1U << 17) /* 0x00020000 */ |
| 177 | #define NXPREQ_USER_CHANNEL (1U << 18) /* 0x00040000 */ |
| 178 | #define NXPREQ_MAX_FRAGS (1U << 19) /* 0x00080000 */ |
| 179 | #define NXPREQ_REJECT_ON_CLOSE (1U << 20) /* 0x00100000 */ |
| 180 | #define NXPREQ_LARGE_BUF_SIZE (1U << 21) /* 0x00200000 */ |
| 181 | |
| 182 | #define NXPREQ_BITS \ |
| 183 | "\020\01TX_RINGS\02RX_RINGS\03TX_SLOTS\04RX_SLOTS\05BUF_SIZE" \ |
| 184 | "\06META_SIZE\07STATS_SIZE\010ANONYMOUS\011EXTRA_BUFS\012PIPES" \ |
| 185 | "\013EXTENSIONS\014MHINTS\015FLOWADV_MAX\016QMAP" \ |
| 186 | "\017CKSUM_OFFLOAD\020USER_PKT_POOL\021CAPABS\022NEXUSADV_SIZE" \ |
| 187 | "\023IFINDEX\024USER_CHANNEL\025MAX_FRAGS\026REJ_CLOSE\027LBUF_SIZE" |
| 188 | |
| 189 | /* |
| 190 | * Nexus provider registration entry. Also argument for NXOPT_NEXUS_PROV_ENTRY. |
| 191 | */ |
| 192 | struct nxprov_reg_ent { |
| 193 | uuid_t npre_prov_uuid; /* Nexus provider UUID */ |
| 194 | struct nxprov_params npre_prov_params; /* Nexus provider parameters */ |
| 195 | }; |
| 196 | |
| 197 | /* |
| 198 | * Nexus options. |
| 199 | */ |
| 200 | #define NXOPT_NEXUS_PROV_LIST 1 /* (get) list all provider UUIDS */ |
| 201 | #define NXOPT_NEXUS_PROV_ENTRY 2 /* (get) get params of a provider */ |
| 202 | #define NXOPT_NEXUS_LIST 20 /* (get) list all Nexus instances */ |
| 203 | #define NXOPT_NEXUS_BIND 21 /* (set) bind a Nexus port */ |
| 204 | #define NXOPT_NEXUS_UNBIND 22 /* (set) unbind a Nexus port */ |
| 205 | #define NXOPT_CHANNEL_LIST 30 /* (get) list all Channel instances */ |
| 206 | #define NXOPT_NEXUS_CONFIG 40 /* (set) nexus specific config */ |
| 207 | |
| 208 | /* |
| 209 | * Argument structure for NXOPT_NEXUS_PROV_LIST. |
| 210 | */ |
| 211 | struct nxprov_list_req { |
| 212 | uint32_t nrl_num_regs; /* array count */ |
| 213 | uint32_t __nrl_align; /* reserved */ |
| 214 | user_addr_t nrl_regs; /* array of nexus_reg_ent */ |
| 215 | }; |
| 216 | |
| 217 | /* |
| 218 | * Argument structure for NXOPT_NEXUS_LIST. |
| 219 | */ |
| 220 | struct nx_list_req { |
| 221 | uuid_t nl_prov_uuid; /* nexus provider UUID */ |
| 222 | uint32_t nl_num_nx_uuids; /* array count */ |
| 223 | uint32_t __nl_align; /* reserved */ |
| 224 | user_addr_t nl_nx_uuids; /* array of nexus UUIDs */ |
| 225 | }; |
| 226 | |
| 227 | /* |
| 228 | * Argument structure for NXOPT_NEXUS_BIND. |
| 229 | */ |
| 230 | struct nx_bind_req { |
| 231 | uuid_t nb_nx_uuid; /* nexus instance UUID */ |
| 232 | nexus_port_t nb_port; /* nexus instance port */ |
| 233 | uint32_t nb_flags; /* NBR_* match flags */ |
| 234 | uuid_t nb_exec_uuid; /* executable UUID */ |
| 235 | user_addr_t nb_key; /* key blob */ |
| 236 | uint32_t nb_key_len; /* key blob length */ |
| 237 | pid_t nb_pid; /* client PID */ |
| 238 | }; |
| 239 | |
| 240 | #define NBR_MATCH_PID 0x1 /* match against PID */ |
| 241 | #define NBR_MATCH_EXEC_UUID 0x2 /* match executable's UUID */ |
| 242 | #define NBR_MATCH_KEY 0x4 /* match key blob */ |
| 243 | #ifdef KERNEL |
| 244 | #define NBR_MATCH_MASK \ |
| 245 | (NBR_MATCH_PID | NBR_MATCH_EXEC_UUID | NBR_MATCH_KEY) |
| 246 | #endif /* KERNEL */ |
| 247 | |
| 248 | /* |
| 249 | * Argument structure for NXOPT_NEXUS_UNBIND. |
| 250 | */ |
| 251 | struct nx_unbind_req { |
| 252 | uuid_t nu_nx_uuid; /* nexus instance UUID */ |
| 253 | nexus_port_t nu_port; /* nexus instance port */ |
| 254 | }; |
| 255 | |
| 256 | /* |
| 257 | * Argument structure for NXOPT_CHANNEL_LIST. |
| 258 | */ |
| 259 | struct ch_list_req { |
| 260 | uuid_t cl_nx_uuid; /* nexus instance UUID */ |
| 261 | uint32_t cl_num_ch_uuids; /* array count */ |
| 262 | uint32_t __cl_align; /* reserved */ |
| 263 | user_addr_t cl_ch_uuids; /* array of channel UUIDs */ |
| 264 | }; |
| 265 | |
| 266 | /* |
| 267 | * Skywalk Nexus MIB |
| 268 | * |
| 269 | * We will use the name MIB now to refer to things that we expose to outside |
| 270 | * world for management/telemetry purpose. |
| 271 | * |
| 272 | * General rule of thumb of this MIB structure is to keep it simple. |
| 273 | * Try to avoid variable length field and hierarchical representation wherever |
| 274 | * possible. Simple retrieval would return either a single object (simple type |
| 275 | * or fixed length compound type) or an object array of same type. This makes |
| 276 | * parsing the retrieved information a lot easier. |
| 277 | * |
| 278 | * For now, we use sysctl as the way MIB interface is exposed. Additional |
| 279 | * interfaces could be syscall (e.g. via a nexus controller), etc. |
| 280 | */ |
| 281 | #define NXMIB_NETIF_STATS (((uint32_t)1) << 1) |
| 282 | #define NXMIB_FSW_STATS (((uint32_t)1) << 2) |
| 283 | #define NXMIB_FLOW (((uint32_t)1) << 3) |
| 284 | #define NXMIB_FLOW_ADV (((uint32_t)1) << 4) |
| 285 | #define NXMIB_FLOW_OWNER (((uint32_t)1) << 5) |
| 286 | #define NXMIB_FLOW_ROUTE (((uint32_t)1) << 6) |
| 287 | #define NXMIB_LLINK_LIST (((uint32_t)1) << 7) |
| 288 | #define NXMIB_NETIF_QUEUE_STATS (((uint32_t)1) << 8) |
| 289 | |
| 290 | #define NXMIB_QUIC_STATS (((uint32_t)1) << 27) |
| 291 | #define NXMIB_UDP_STATS (((uint32_t)1) << 28) |
| 292 | #define NXMIB_TCP_STATS (((uint32_t)1) << 29) |
| 293 | #define NXMIB_IP6_STATS (((uint32_t)1) << 30) |
| 294 | #define NXMIB_IP_STATS (((uint32_t)1) << 31) |
| 295 | |
| 296 | #define NXMIB_USERSTACK_STATS (NXMIB_IP_STATS | NXMIB_IP6_STATS \ |
| 297 | | NXMIB_TCP_STATS | NXMIB_UDP_STATS \ |
| 298 | | NXMIB_QUIC_STATS) |
| 299 | |
| 300 | #define NXMIB_FILTER_NX_UUID (((uint64_t)1) << 0) |
| 301 | #define NXMIB_FILTER_FLOW_ID (((uint64_t)1) << 1) |
| 302 | #define NXMIB_FILTER_PID (((uint64_t)1) << 2) |
| 303 | #define NXMIB_FILTER_INFO_TUPLE (((uint64_t)1) << 3) |
| 304 | |
| 305 | /* |
| 306 | * Nexus MIB filter: used to retrieve only those matching the filter value. |
| 307 | */ |
| 308 | struct nexus_mib_filter { |
| 309 | uint32_t nmf_type; /* MIB type */ |
| 310 | uint64_t nmf_bitmap; /* bitmap of following fields */ |
| 311 | |
| 312 | uuid_t nmf_nx_uuid; /* nexus instance uuid */ |
| 313 | uuid_t nmf_flow_id; /* flow rule id */ |
| 314 | pid_t nmf_pid; /* owner pid */ |
| 315 | struct info_tuple nmf_info_tuple; /* flow tuple */ |
| 316 | }; |
| 317 | |
| 318 | /* |
| 319 | * Nexus-specific config commands. |
| 320 | */ |
| 321 | typedef enum { |
| 322 | NXCFG_CMD_ATTACH = 0, /* attach an object to a nexus */ |
| 323 | NXCFG_CMD_DETACH = 1, /* detach an object from a nexus */ |
| 324 | NXCFG_CMD_FLOW_ADD = 20, /* add a flow to a nexus */ |
| 325 | NXCFG_CMD_FLOW_DEL = 21, /* delete a flow from nexus */ |
| 326 | NXCFG_CMD_FLOW_CONFIG = 22, /* configure a flow in nexus */ |
| 327 | NXCFG_CMD_NETEM = 30, /* config packet scheduler */ |
| 328 | NXCFG_CMD_GET_LLINK_INFO = 40, /* collect llink info */ |
| 329 | } nxcfg_cmd_t; |
| 330 | |
| 331 | #define NX_SPEC_IF_NAMELEN 64 |
| 332 | |
| 333 | /* |
| 334 | * Argument struture for NXOPT_NEXUS_CONFIG. |
| 335 | */ |
| 336 | struct nx_cfg_req { |
| 337 | uuid_t nc_nx_uuid; /* nexus instance UUID */ |
| 338 | nxcfg_cmd_t nc_cmd; /* NXCFG_CMD_* */ |
| 339 | uint32_t nc_req_len; /* size of request struct */ |
| 340 | user_addr_t nc_req; /* address of request struct */ |
| 341 | }; |
| 342 | |
| 343 | /* |
| 344 | * Argument structure for NXCFG_CMD_{ATTACH,DETACH} |
| 345 | */ |
| 346 | struct nx_spec_req { |
| 347 | union { |
| 348 | char nsru_name[NX_SPEC_IF_NAMELEN]; |
| 349 | uuid_t nsru_uuid; |
| 350 | #ifdef KERNEL |
| 351 | struct ifnet *nsru_ifp; |
| 352 | #endif /* KERNEL */ |
| 353 | } nsr_u __attribute__((aligned(sizeof(uint64_t)))); /* in */ |
| 354 | uint32_t nsr_flags; /* in */ |
| 355 | uuid_t nsr_if_uuid; /* attach: out, detach: in */ |
| 356 | }; |
| 357 | #define nsr_name nsr_u.nsru_name |
| 358 | #define nsr_uuid nsr_u.nsru_uuid |
| 359 | #ifdef KERNEL |
| 360 | #define nsr_ifp nsr_u.nsru_ifp |
| 361 | #endif /* KERNEL */ |
| 362 | |
| 363 | #define NXSPECREQ_UUID 0x1 /* nsr_name is uuid_t else ifname */ |
| 364 | #define NXSPECREQ_HOST 0x2 /* attach to host port */ |
| 365 | #ifdef KERNEL |
| 366 | /* mask off userland-settable bits */ |
| 367 | #define NXSPECREQ_MASK (NXSPECREQ_UUID | NXSPECREQ_HOST) |
| 368 | #define NXSPECREQ_IFP 0x1000 /* (embryonic) ifnet */ |
| 369 | #endif /* KERNEL */ |
| 370 | |
| 371 | /* |
| 372 | * Structure for flow demuxing for parent/child flows |
| 373 | */ |
| 374 | #define FLOW_DEMUX_MAX_LEN 32 |
| 375 | struct flow_demux_pattern { |
| 376 | uint16_t fdp_offset; |
| 377 | uint16_t fdp_len; |
| 378 | uint8_t fdp_mask[FLOW_DEMUX_MAX_LEN]; |
| 379 | uint8_t fdp_value[FLOW_DEMUX_MAX_LEN]; |
| 380 | }; |
| 381 | |
| 382 | #define MAX_FLOW_DEMUX_PATTERN 4 |
| 383 | |
| 384 | /* |
| 385 | * Argument structure for NXCFG_CMD_FLOW_{BIND,UNBIND} |
| 386 | */ |
| 387 | struct nx_flow_req { |
| 388 | nexus_port_t nfr_nx_port; |
| 389 | uint16_t nfr_ethertype; |
| 390 | ether_addr_t nfr_etheraddr; |
| 391 | union sockaddr_in_4_6 nfr_saddr; |
| 392 | union sockaddr_in_4_6 nfr_daddr; |
| 393 | uint8_t nfr_ip_protocol; |
| 394 | uint8_t nfr_transport_protocol; |
| 395 | uint16_t nfr_flags; |
| 396 | uuid_t nfr_flow_uuid; |
| 397 | packet_svc_class_t nfr_svc_class; |
| 398 | uuid_t nfr_euuid; |
| 399 | uint32_t nfr_policy_id; |
| 400 | uint32_t nfr_skip_policy_id; |
| 401 | pid_t nfr_epid; |
| 402 | flowadv_idx_t nfr_flowadv_idx; |
| 403 | uuid_t nfr_bind_key; |
| 404 | uint64_t nfr_qset_id; |
| 405 | uuid_t nfr_parent_flow_uuid; |
| 406 | uint8_t nfr_flow_demux_count; |
| 407 | struct flow_demux_pattern nfr_flow_demux_patterns[MAX_FLOW_DEMUX_PATTERN]; |
| 408 | // below is reserved kernel-only fields |
| 409 | union { |
| 410 | #ifdef KERNEL |
| 411 | struct { |
| 412 | char _nfr_kernel_field_start[0]; |
| 413 | void *nfr_context; |
| 414 | struct proc *nfr_proc; |
| 415 | struct ifnet *nfr_ifp; |
| 416 | struct flow_route *nfr_route; |
| 417 | struct ns_token *nfr_port_reservation; |
| 418 | struct protons_token *nfr_proto_reservation; |
| 419 | struct flow_stats *nfr_flow_stats; |
| 420 | pid_t nfr_pid; |
| 421 | uint32_t nfr_saddr_gencnt; |
| 422 | void *nfr_ipsec_reservation; |
| 423 | uint32_t nfr_inp_flowhash; |
| 424 | #if defined(__LP64__) |
| 425 | uint8_t _nfr_kernel_pad[4]; |
| 426 | #else /* !__LP64__ */ |
| 427 | uint8_t _nfr_kernel_pad[36]; |
| 428 | #endif /* !__LP64__ */ |
| 429 | char _nfr_kernel_field_end[0]; |
| 430 | }; |
| 431 | #endif /* KERNEL */ |
| 432 | struct { |
| 433 | uint8_t _nfr_opaque[80]; |
| 434 | /* should be at the same offset as _nfr_kernel_field_end above */ |
| 435 | char _nfr_common_field_end[0]; |
| 436 | }; |
| 437 | }; |
| 438 | }; |
| 439 | |
| 440 | /* valid flags for nfr_flags */ |
| 441 | #define NXFLOWREQF_TRACK 0x0001 /* enable state tracking */ |
| 442 | #define NXFLOWREQF_QOS_MARKING 0x0002 /* allow qos marking */ |
| 443 | #define NXFLOWREQF_FILTER 0x0004 /* interpose filter */ |
| 444 | #define NXFLOWREQF_CUSTOM_ETHER 0x0008 /* custom ethertype */ |
| 445 | #define NXFLOWREQF_IPV6_ULA 0x0010 /* ipv6 ula */ |
| 446 | #define NXFLOWREQF_LISTENER 0x0020 /* listener */ |
| 447 | #define NXFLOWREQF_OVERRIDE_ADDRESS_SELECTION 0x0040 /* override system address selection */ |
| 448 | #define NXFLOWREQF_USE_STABLE_ADDRESS 0x0080 /* if override local, use stable address */ |
| 449 | #define NXFLOWREQF_FLOWADV 0x0100 /* allocate flow advisory */ |
| 450 | #define NXFLOWREQF_ASIS 0x0200 /* create flow as is in nfr */ |
| 451 | #define NXFLOWREQF_LOW_LATENCY 0x0400 /* low latency flow */ |
| 452 | #define NXFLOWREQF_NOWAKEFROMSLEEP 0x0800 /* Don't wake for traffic to this flow */ |
| 453 | #define NXFLOWREQF_REUSEPORT 0x1000 /* Don't wake for traffic to this flow */ |
| 454 | #define NXFLOWREQF_PARENT 0x4000 /* Parent flow */ |
| 455 | |
| 456 | #define NXFLOWREQF_BITS \ |
| 457 | "\020\01TRACK\02QOS_MARKING\03FILTER\04CUSTOM_ETHER\05IPV6_ULA" \ |
| 458 | "\06LISTENER\07OVERRIDE_ADDRESS_SELECTION\010USE_STABLE_ADDRESS" \ |
| 459 | "\011ALLOC_FLOWADV\012ASIS\013LOW_LATENCY\014NOWAKEUPFROMSLEEP" \ |
| 460 | "\015REUSEPORT\017PARENT" |
| 461 | |
| 462 | struct flow_ip_addr { |
| 463 | union { |
| 464 | struct in_addr _v4; |
| 465 | struct in6_addr _v6; |
| 466 | uint8_t _addr8[16]; |
| 467 | uint16_t _addr16[8]; |
| 468 | uint32_t _addr32[4]; |
| 469 | uint64_t _addr64[2]; |
| 470 | }; |
| 471 | }; |
| 472 | |
| 473 | struct flow_key { |
| 474 | uint16_t fk_mask; |
| 475 | uint8_t fk_ipver; |
| 476 | uint8_t fk_proto; |
| 477 | uint16_t fk_sport; |
| 478 | uint16_t fk_dport; |
| 479 | struct flow_ip_addr fk_src; |
| 480 | struct flow_ip_addr fk_dst; |
| 481 | uint64_t fk_pad[1]; /* pad to 48 bytes */ |
| 482 | } __attribute__((__aligned__(16))); |
| 483 | |
| 484 | #define fk_src4 fk_src._v4 |
| 485 | #define fk_dst4 fk_dst._v4 |
| 486 | #define fk_src6 fk_src._v6 |
| 487 | #define fk_dst6 fk_dst._v6 |
| 488 | |
| 489 | #define FLOW_KEY_LEN sizeof(struct flow_key) |
| 490 | #define FK_HASH_SEED 0xabcd |
| 491 | |
| 492 | #define FKMASK_IPVER (((uint16_t)1) << 0) |
| 493 | #define FKMASK_PROTO (((uint16_t)1) << 1) |
| 494 | #define FKMASK_SRC (((uint16_t)1) << 2) |
| 495 | #define FKMASK_SPORT (((uint16_t)1) << 3) |
| 496 | #define FKMASK_DST (((uint16_t)1) << 4) |
| 497 | #define FKMASK_DPORT (((uint16_t)1) << 5) |
| 498 | |
| 499 | #define FKMASK_2TUPLE (FKMASK_PROTO | FKMASK_SPORT) |
| 500 | #define FKMASK_3TUPLE (FKMASK_2TUPLE | FKMASK_IPVER | FKMASK_SRC) |
| 501 | #define FKMASK_4TUPLE (FKMASK_3TUPLE | FKMASK_DPORT) |
| 502 | #define FKMASK_5TUPLE (FKMASK_4TUPLE | FKMASK_DST) |
| 503 | #define FKMASK_IPFLOW1 FKMASK_PROTO |
| 504 | #define FKMASK_IPFLOW2 (FKMASK_IPFLOW1 | FKMASK_IPVER | FKMASK_SRC) |
| 505 | #define FKMASK_IPFLOW3 (FKMASK_IPFLOW2 | FKMASK_DST) |
| 506 | #define FKMASK_IDX_MAX 7 |
| 507 | |
| 508 | extern const struct flow_key fk_mask_2tuple; |
| 509 | extern const struct flow_key fk_mask_3tuple; |
| 510 | extern const struct flow_key fk_mask_4tuple; |
| 511 | extern const struct flow_key fk_mask_5tuple; |
| 512 | extern const struct flow_key fk_mask_ipflow1; |
| 513 | extern const struct flow_key fk_mask_ipflow2; |
| 514 | extern const struct flow_key fk_mask_ipflow3; |
| 515 | |
| 516 | #define FLOW_KEY_CLEAR(_fk) do { \ |
| 517 | _CASSERT(FLOW_KEY_LEN == 48); \ |
| 518 | _CASSERT(FLOW_KEY_LEN == sizeof(struct flow_key)); \ |
| 519 | sk_zero_48(_fk); \ |
| 520 | } while (0) |
| 521 | |
| 522 | #ifdef KERNEL |
| 523 | /* mask off userland-settable bits */ |
| 524 | #define NXFLOWREQF_MASK \ |
| 525 | (NXFLOWREQF_TRACK | NXFLOWREQF_QOS_MARKING | NXFLOWREQF_FILTER | \ |
| 526 | NXFLOWREQF_CUSTOM_ETHER | NXFLOWREQF_IPV6_ULA | NXFLOWREQF_LISTENER | \ |
| 527 | NXFLOWREQF_OVERRIDE_ADDRESS_SELECTION | NXFLOWREQF_USE_STABLE_ADDRESS | \ |
| 528 | NXFLOWREQF_FLOWADV | NXFLOWREQF_LOW_LATENCY | NXFLOWREQF_NOWAKEFROMSLEEP | \ |
| 529 | NXFLOWREQF_REUSEPORT | NXFLOWREQF_PARENT) |
| 530 | |
| 531 | #define NXFLOWREQF_EXT_PORT_RSV 0x1000 /* external port reservation */ |
| 532 | #define NXFLOWREQF_EXT_PROTO_RSV 0x2000 /* external proto reservation */ |
| 533 | |
| 534 | static inline void |
| 535 | nx_flow_req_internalize(struct nx_flow_req *req) |
| 536 | { |
| 537 | _CASSERT(offsetof(struct nx_flow_req, _nfr_kernel_field_end) == |
| 538 | offsetof(struct nx_flow_req, _nfr_common_field_end)); |
| 539 | |
| 540 | /* init kernel only fields */ |
| 541 | bzero(s: &req->_nfr_opaque, n: sizeof(req->_nfr_opaque)); |
| 542 | req->nfr_flags &= NXFLOWREQF_MASK; |
| 543 | req->nfr_context = NULL; |
| 544 | req->nfr_flow_stats = NULL; |
| 545 | req->nfr_port_reservation = NULL; |
| 546 | } |
| 547 | |
| 548 | static inline void |
| 549 | nx_flow_req_externalize(struct nx_flow_req *req) |
| 550 | { |
| 551 | /* neutralize kernel only fields */ |
| 552 | bzero(s: &req->_nfr_opaque, n: sizeof(req->_nfr_opaque)); |
| 553 | req->nfr_flags &= NXFLOWREQF_MASK; |
| 554 | } |
| 555 | #endif /* KERNEL */ |
| 556 | |
| 557 | struct nx_qset_info { |
| 558 | uint64_t nqi_id; |
| 559 | uint16_t nqi_flags; |
| 560 | uint8_t nqi_num_rx_queues; |
| 561 | uint8_t nqi_num_tx_queues; |
| 562 | }; |
| 563 | |
| 564 | #define NETIF_LLINK_MAX_QSETS 256 |
| 565 | struct nx_llink_info { |
| 566 | uuid_t nli_netif_uuid; /* nexus netif instance uuid */ |
| 567 | uint64_t nli_link_id; |
| 568 | uint16_t nli_link_id_internal; |
| 569 | uint8_t nli_state; |
| 570 | uint8_t nli_flags; |
| 571 | uint16_t nli_qset_cnt; |
| 572 | struct nx_qset_info nli_qset[NETIF_LLINK_MAX_QSETS]; |
| 573 | }; |
| 574 | |
| 575 | #define NETIF_LLINK_INFO_VERSION 0x01 |
| 576 | struct nx_llink_info_req { |
| 577 | uint16_t nlir_version; |
| 578 | uint16_t nlir_llink_cnt; |
| 579 | struct nx_llink_info nlir_llink[0]; |
| 580 | }; |
| 581 | |
| 582 | /* |
| 583 | * Nexus controller descriptor. |
| 584 | */ |
| 585 | struct nexus_controller { |
| 586 | #ifndef KERNEL |
| 587 | int ncd_fd; |
| 588 | guardid_t ncd_guard; |
| 589 | #else /* KERNEL */ |
| 590 | struct nxctl *ncd_nxctl; |
| 591 | #endif /* KERNEL */ |
| 592 | }; |
| 593 | |
| 594 | /* For nexus ops without having to create a nexus controller */ |
| 595 | #define __OS_NEXUS_SHARED_USER_CONTROLLER_FD (-1) |
| 596 | |
| 597 | /* |
| 598 | * Nexus attributes. |
| 599 | */ |
| 600 | struct nexus_attr { |
| 601 | uint64_t nxa_requested; /* customized attributes */ |
| 602 | uint64_t nxa_tx_rings; /* # of channel transmit rings */ |
| 603 | uint64_t nxa_rx_rings; /* # of channel receive rings */ |
| 604 | uint64_t nxa_tx_slots; /* # of slots per channel TX ring */ |
| 605 | uint64_t nxa_rx_slots; /* # of slots per channel RX ring */ |
| 606 | uint64_t nxa_buf_size; /* size of each buffer */ |
| 607 | uint64_t nxa_meta_size; /* size of metadata per buffer */ |
| 608 | uint64_t nxa_stats_size; /* size of statistics region */ |
| 609 | uint64_t nxa_anonymous; /* bool: allow anonymous clients */ |
| 610 | uint64_t nxa_pipes; /* number of pipes */ |
| 611 | uint64_t nxa_extensions; /* extension-specific attribute */ |
| 612 | uint64_t nxa_mhints; /* memory usage hints */ |
| 613 | uint64_t nxa_ifindex; /* network interface index */ |
| 614 | uint64_t nxa_flowadv_max; /* max flow advisory entries */ |
| 615 | uint64_t nxa_qmap; /* queue mapping type */ |
| 616 | uint64_t nxa_checksum_offload; /* partial checksum offload */ |
| 617 | uint64_t nxa_user_packet_pool; /* user packet pool */ |
| 618 | uint64_t nxa_nexusadv_size; /* size of advisory region */ |
| 619 | uint64_t nxa_user_channel; /* user channel open allowed */ |
| 620 | uint64_t nxa_max_frags; /* max fragments per packet */ |
| 621 | /* |
| 622 | * reject channel operations if the nexus peer has closed the channel. |
| 623 | * valid only for user-pipe nexus. |
| 624 | */ |
| 625 | uint64_t nxa_reject_on_close; |
| 626 | uint64_t nxa_large_buf_size; /* size of large buffer */ |
| 627 | }; |
| 628 | |
| 629 | /* |
| 630 | * Flags for nxa_requested; keep in sync with NXPREQ_* flags. |
| 631 | * Note that these are 64-bit, whereas nxpreq_requested is |
| 632 | * 32-bit wide; for not this won't matter. |
| 633 | */ |
| 634 | #define NXA_REQ_TX_RINGS (1ULL << 0) /* 0x0000000000000001 */ |
| 635 | #define NXA_REQ_RX_RINGS (1ULL << 1) /* 0x0000000000000002 */ |
| 636 | #define NXA_REQ_TX_SLOTS (1ULL << 2) /* 0x0000000000000004 */ |
| 637 | #define NXA_REQ_RX_SLOTS (1ULL << 3) /* 0x0000000000000008 */ |
| 638 | #define NXA_REQ_BUF_SIZE (1ULL << 4) /* 0x0000000000000010 */ |
| 639 | #define NXA_REQ_META_SIZE (1ULL << 5) /* 0x0000000000000020 */ |
| 640 | #define NXA_REQ_STATS_SIZE (1ULL << 6) /* 0x0000000000000040 */ |
| 641 | #define NXA_REQ_ANONYMOUS (1ULL << 7) /* 0x0000000000000080 */ |
| 642 | #define NXA_REQ_PIPES (1ULL << 8) /* 0x0000000000000100 */ |
| 643 | #define NXA_REQ_EXTENSIONS (1ULL << 9) /* 0x0000000000000200 */ |
| 644 | #define NXA_REQ_MHINTS (1ULL << 10) /* 0x0000000000000400 */ |
| 645 | #define NXA_REQ_FLOWADV_MAX (1ULL << 11) /* 0x0000000000000800 */ |
| 646 | #define NXA_REQ_QMAP (1ULL << 12) /* 0x0000000000001000 */ |
| 647 | #define NXA_REQ_CHECKSUM_OFFLOAD (1ULL << 13) /* 0x0000000000002000 */ |
| 648 | #define NXA_REQ_USER_PACKET_POOL (1ULL << 14) /* 0x0000000000004000 */ |
| 649 | #define NXA_REQ_CAPABILITIES (1ULL << 15) /* 0x0000000000008000 */ |
| 650 | #define NXA_REQ_NEXUSADV_SIZE (1ULL << 16) /* 0x0000000000010000 */ |
| 651 | #define NXA_REQ_IFINDEX (1ULL << 17) /* 0x0000000000020000 */ |
| 652 | #define NXA_REQ_USER_CHANNEL (1ULL << 18) /* 0x0000000000040000 */ |
| 653 | #define NXA_REQ_MAX_FRAGS (1ULL << 19) /* 0x0000000000080000 */ |
| 654 | #define NXA_REQ_REJECT_ON_CLOSE (1ULL << 20) /* 0x0000000000100000 */ |
| 655 | #define NXA_REQ_LARGE_BUF_SIZE (1ULL << 21) /* 0x0000000000200000 */ |
| 656 | |
| 657 | #ifndef KERNEL |
| 658 | #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) |
| 659 | __BEGIN_DECLS |
| 660 | /* system calls */ |
| 661 | extern int __nexus_open(struct nxctl_init *init, const uint32_t init_len); |
| 662 | extern int __nexus_register(int ctl, struct nxprov_reg *reg, |
| 663 | const uint32_t reg_len, uuid_t *prov_uuid, const uint32_t prov_uuid_len); |
| 664 | extern int __nexus_deregister(int ctl, const uuid_t prov_uuid, |
| 665 | const uint32_t prov_uuid_len); |
| 666 | extern int __nexus_create(int ctl, const uuid_t prov_uuid, |
| 667 | const uint32_t prov_uuid_len, uuid_t *nx_uuid, const uint32_t nx_uuid_len); |
| 668 | extern int __nexus_destroy(int ctl, const uuid_t nx_uuid, |
| 669 | const uint32_t nx_uuid_len); |
| 670 | extern int __nexus_get_opt(int ctl, const uint32_t opt, void *aoptval, |
| 671 | uint32_t *aoptlen); |
| 672 | extern int __nexus_set_opt(int ctl, const uint32_t opt, const void *aoptval, |
| 673 | const uint32_t optlen); |
| 674 | |
| 675 | /* private nexus controller APIs */ |
| 676 | extern int __os_nexus_ifattach(const nexus_controller_t ctl, |
| 677 | const uuid_t nx_uuid, const char *ifname, const uuid_t netif_uuid, |
| 678 | boolean_t host, uuid_t *nx_if_uuid); |
| 679 | extern int __os_nexus_ifdetach(const nexus_controller_t ctl, |
| 680 | const uuid_t nx_uuid, const uuid_t nx_if_uuid); |
| 681 | |
| 682 | /* private flow APIs */ |
| 683 | extern int __os_nexus_flow_add(const nexus_controller_t ncd, |
| 684 | const uuid_t nx_uuid, const struct nx_flow_req *nfr); |
| 685 | extern int __os_nexus_flow_del(const nexus_controller_t ncd, |
| 686 | const uuid_t nx_uuid, const struct nx_flow_req *nfr); |
| 687 | extern int __os_nexus_get_llink_info(const nexus_controller_t ncd, |
| 688 | const uuid_t nx_uuid, const struct nx_llink_info_req *nlir, size_t len); |
| 689 | extern int os_nexus_flow_set_wake_from_sleep(const uuid_t nx_uuid, |
| 690 | const uuid_t flow_uuid, bool enable); |
| 691 | |
| 692 | __END_DECLS |
| 693 | #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ |
| 694 | #endif /* !KERNEL */ |
| 695 | #if defined(LIBSYSCALL_INTERFACE) || defined(BSD_KERNEL_PRIVATE) |
| 696 | #include <skywalk/nexus_common.h> |
| 697 | #include <skywalk/nexus_ioctl.h> |
| 698 | #endif /* LIBSYSCALL_INTERFACE || BSD_KERNEL_PRIVATE */ |
| 699 | #endif /* PRIVATE || BSD_KERNEL_PRIVATE */ |
| 700 | #endif /* !_SKYWALK_OS_NEXUS_PRIVATE_H_ */ |
| 701 | |