| 1 | /* |
| 2 | * Copyright (c) 2015-2023 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | /* |
| 30 | * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. |
| 31 | * All rights reserved. |
| 32 | * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. |
| 33 | * |
| 34 | * Redistribution and use in source and binary forms, with or without |
| 35 | * modification, are permitted provided that the following conditions |
| 36 | * are met: |
| 37 | * 1. Redistributions of source code must retain the above copyright |
| 38 | * notice, this list of conditions and the following disclaimer. |
| 39 | * 2. Redistributions in binary form must reproduce the above copyright |
| 40 | * notice, this list of conditions and the following disclaimer in the |
| 41 | * documentation and/or other materials provided with the distribution. |
| 42 | * |
| 43 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
| 44 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 45 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 46 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
| 47 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 48 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 49 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 50 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 51 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 52 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 53 | * SUCH DAMAGE. |
| 54 | */ |
| 55 | |
| 56 | #ifndef _SKYWALK_VAR_H_ |
| 57 | #define _SKYWALK_VAR_H_ |
| 58 | |
| 59 | #ifdef BSD_KERNEL_PRIVATE |
| 60 | #include <stdint.h> |
| 61 | #include <sys/param.h> |
| 62 | #include <sys/systm.h> |
| 63 | #include <sys/types.h> |
| 64 | #include <sys/cdefs.h> |
| 65 | #include <sys/errno.h> |
| 66 | #include <sys/malloc.h> |
| 67 | #include <sys/mbuf.h> |
| 68 | #include <sys/protosw.h> |
| 69 | #include <sys/queue.h> |
| 70 | #include <sys/tree.h> |
| 71 | #include <sys/sysctl.h> |
| 72 | #include <sys/poll.h> |
| 73 | #include <sys/priv.h> |
| 74 | #include <sys/random.h> |
| 75 | #include <sys/kernel.h> |
| 76 | #include <sys/guarded.h> |
| 77 | #include <uuid/uuid.h> |
| 78 | #include <kern/bits.h> |
| 79 | #include <kern/locks.h> |
| 80 | #include <kern/task.h> |
| 81 | #include <kern/thread.h> |
| 82 | #include <kern/zalloc.h> |
| 83 | #include <mach/boolean.h> |
| 84 | #include <machine/atomic.h> |
| 85 | #include <machine/endian.h> |
| 86 | #include <netinet/ip.h> |
| 87 | #include <net/dlil.h> |
| 88 | #include <net/necp.h> |
| 89 | #include <libkern/libkern.h> |
| 90 | #include <libkern/OSAtomic.h> |
| 91 | #include <libkern/OSByteOrder.h> |
| 92 | #include <IOKit/skywalk/IOSkywalkSupport.h> |
| 93 | #include <skywalk/os_nexus_private.h> |
| 94 | #include <skywalk/os_channel_private.h> |
| 95 | #include <skywalk/namespace/netns.h> |
| 96 | #include <skywalk/namespace/protons.h> |
| 97 | #include <skywalk/namespace/flowidns.h> |
| 98 | #include <vm/vm_kern.h> |
| 99 | #include <san/kasan.h> |
| 100 | |
| 101 | /* |
| 102 | * General byte order swapping functions. |
| 103 | */ |
| 104 | #define bswap16(x) OSSwapInt16(x) |
| 105 | #define bswap32(x) OSSwapInt32(x) |
| 106 | #define bswap64(x) OSSwapInt64(x) |
| 107 | |
| 108 | /* |
| 109 | * Atomic operations. |
| 110 | */ |
| 111 | #define SK_ATOMIC_TEST_AND_SET(p) (!os_atomic_cmpxchg((p), 0, 1, acq_rel)) |
| 112 | #define SK_ATOMIC_CLEAR(p) os_atomic_store((p), 0, release) |
| 113 | |
| 114 | extern uint32_t sk_debug; |
| 115 | |
| 116 | /* |
| 117 | * feature bits defined in os_skywalk_private.h |
| 118 | */ |
| 119 | extern uint64_t sk_features; |
| 120 | |
| 121 | SYSCTL_DECL(_kern_skywalk); |
| 122 | SYSCTL_DECL(_kern_skywalk_stats); |
| 123 | |
| 124 | #define SK_LOCK() \ |
| 125 | lck_mtx_lock(&sk_lock) |
| 126 | #define SK_LOCK_TRY() \ |
| 127 | lck_mtx_try_lock(&sk_lock) |
| 128 | #define SK_LOCK_ASSERT_HELD() \ |
| 129 | LCK_MTX_ASSERT(&sk_lock, LCK_MTX_ASSERT_OWNED) |
| 130 | #define SK_LOCK_ASSERT_NOTHELD() \ |
| 131 | LCK_MTX_ASSERT(&sk_lock, LCK_MTX_ASSERT_NOTOWNED) |
| 132 | #define SK_UNLOCK() \ |
| 133 | lck_mtx_unlock(&sk_lock) |
| 134 | |
| 135 | decl_lck_mtx_data(extern, sk_lock); |
| 136 | extern lck_grp_t sk_lock_group; |
| 137 | extern lck_attr_t sk_lock_attr; |
| 138 | |
| 139 | /* |
| 140 | * Ring Types. |
| 141 | */ |
| 142 | enum txrx { |
| 143 | NR_RX = 0, /* RX only */ |
| 144 | NR_TX = 1, /* TX only */ |
| 145 | NR_TXRX, /* RX+TX (alias) */ |
| 146 | NR_A = NR_TXRX, /* alloc only */ |
| 147 | NR_F, /* free only */ |
| 148 | NR_TXRXAF, /* alloc+free (alias) */ |
| 149 | NR_EV = NR_TXRXAF, /* event only */ |
| 150 | NR_LBA, /* large buf alloc */ |
| 151 | NR_ALL /* all of the above */ |
| 152 | }; |
| 153 | |
| 154 | __attribute__((always_inline)) |
| 155 | static inline const char * |
| 156 | sk_ring2str(enum txrx t) |
| 157 | { |
| 158 | switch (t) { |
| 159 | case NR_TX: |
| 160 | return "TX" ; |
| 161 | case NR_RX: |
| 162 | return "RX" ; |
| 163 | case NR_A: |
| 164 | return "ALLOC" ; |
| 165 | case NR_F: |
| 166 | return "FREE" ; |
| 167 | case NR_EV: |
| 168 | return "EVENT" ; |
| 169 | case NR_LBA: |
| 170 | return "LARGE ALLOC" ; |
| 171 | default: |
| 172 | VERIFY(0); |
| 173 | /* NOTREACHED */ |
| 174 | __builtin_unreachable(); |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | __attribute__((always_inline)) |
| 179 | static inline enum txrx |
| 180 | sk_txrx_swap(enum txrx t) |
| 181 | { |
| 182 | return t == NR_RX ? NR_TX : NR_RX; |
| 183 | } |
| 184 | |
| 185 | #define for_rx_tx(t) for ((t) = 0; (t) < NR_TXRX; (t)++) |
| 186 | #define for_a_f(t) for ((t) = NR_A; (t) <= NR_F; (t)++) |
| 187 | #define for_all_rings(t) for ((t) = 0; (t) < NR_ALL; (t)++) |
| 188 | |
| 189 | /* return the next index, with wraparound */ |
| 190 | __attribute__((always_inline)) |
| 191 | static inline uint32_t |
| 192 | SLOT_NEXT(uint32_t i, uint32_t lim) |
| 193 | { |
| 194 | return __improbable(i == lim) ? 0 : i + 1; |
| 195 | } |
| 196 | |
| 197 | /* return the previous index, with wraparound */ |
| 198 | __attribute__((always_inline)) |
| 199 | static inline uint32_t |
| 200 | SLOT_PREV(uint32_t i, uint32_t lim) |
| 201 | { |
| 202 | return __improbable(i == 0) ? lim : i - 1; |
| 203 | } |
| 204 | |
| 205 | /* return the incremented index, with wraparound */ |
| 206 | static inline uint32_t |
| 207 | SLOT_INCREMENT(uint32_t i, uint32_t n, uint32_t lim) |
| 208 | { |
| 209 | i += n; |
| 210 | return __improbable(i > lim) ? i - lim - 1 : i; |
| 211 | } |
| 212 | |
| 213 | /* |
| 214 | * Nexus metadata. |
| 215 | */ |
| 216 | #define NX_METADATA_QUANTUM_SZ \ |
| 217 | (MAX(sizeof (struct __user_quantum), sizeof (struct __kern_quantum))) |
| 218 | #define NX_METADATA_PACKET_SZ(_n) \ |
| 219 | (MAX(_USER_PACKET_SIZE(_n), _KERN_PACKET_SIZE(_n))) |
| 220 | |
| 221 | /* {min,max} internal user metadata object size */ |
| 222 | #define NX_METADATA_OBJ_MIN_SZ \ |
| 223 | (METADATA_PREAMBLE_SZ + NX_METADATA_QUANTUM_SZ) |
| 224 | #define NX_METADATA_OBJ_MAX_SZ 512 |
| 225 | |
| 226 | /* {min,max} client metadata size */ |
| 227 | #define NX_METADATA_USR_MIN_SZ 0 |
| 228 | #define NX_METADATA_USR_MAX_SZ \ |
| 229 | (NX_METADATA_OBJ_MAX_SZ - NX_METADATA_OBJ_MIN_SZ) |
| 230 | |
| 231 | /* |
| 232 | * User-visible statistics. |
| 233 | */ |
| 234 | #define NX_STATS_MIN_SZ 0 |
| 235 | #define NX_STATS_MAX_SZ (16 * 1024) |
| 236 | |
| 237 | /* |
| 238 | * Flow advisory entries. |
| 239 | */ |
| 240 | #define NX_FLOWADV_DEFAULT 512 |
| 241 | #define NX_FLOWADV_MAX (64 * 1024) |
| 242 | #define FO_FLOWADV_CHUNK 64 |
| 243 | |
| 244 | /* |
| 245 | * Nexus advisory. |
| 246 | */ |
| 247 | #define NX_NEXUSADV_MAX_SZ (16 * 1024) |
| 248 | |
| 249 | /* {min,max} number of ring pairs in a nexus */ |
| 250 | #define NX_MIN_NUM_RING_PAIR 1 |
| 251 | #define NX_MAX_NUM_RING_PAIR 8 /* xxx unclear how many */ |
| 252 | #define NX_MIN_NUM_RING (NX_MIN_NUM_RING_PAIR * 2) |
| 253 | #define NX_MAX_NUM_RING (NX_MAX_NUM_RING_PAIR * 2) |
| 254 | |
| 255 | #define NX_MIN_NUM_SLOT_PER_RING 2 |
| 256 | #define NX_MAX_NUM_SLOT_PER_RING (16 * 1024) |
| 257 | |
| 258 | #define NX_MIN_BUF_OBJ_SIZE 64 |
| 259 | #define NX_MAX_BUF_OBJ_SIZE (64 * 1024) |
| 260 | |
| 261 | #define NX_PBUF_FRAGS_MIN 1 |
| 262 | #define NX_PBUF_FRAGS_DEFAULT NX_PBUF_FRAGS_MIN |
| 263 | #define NX_PBUF_FRAGS_MAX 64 |
| 264 | |
| 265 | #define NX_MAX_AGGR_PKT_SIZE IP_MAXPACKET /* max aggregated pkt size */ |
| 266 | |
| 267 | /* |
| 268 | * Compat netif transmit models. |
| 269 | */ |
| 270 | /* uses default parameters as set by driver */ |
| 271 | #define NETIF_COMPAT_TXMODEL_DEFAULT 0 |
| 272 | /* override driver parameters and force IFEF_ENQUEUE_MULTI */ |
| 273 | #define NETIF_COMPAT_TXMODEL_ENQUEUE_MULTI 1 |
| 274 | |
| 275 | /* |
| 276 | * Native netif transmit models. |
| 277 | */ |
| 278 | /* uses default parameters as set by driver */ |
| 279 | #define NETIF_NATIVE_TXMODEL_DEFAULT 0 |
| 280 | /* override driver parameters and force IFEF_ENQUEUE_MULTI */ |
| 281 | #define NETIF_NATIVE_TXMODEL_ENQUEUE_MULTI 1 |
| 282 | |
| 283 | #define _timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_nsec) |
| 284 | #define _timersub(tvp, uvp, vvp) do { \ |
| 285 | (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ |
| 286 | (vvp)->tv_nsec = (tvp)->tv_nsec - (uvp)->tv_nsec; \ |
| 287 | if ((vvp)->tv_nsec < 0) { \ |
| 288 | (vvp)->tv_sec--; \ |
| 289 | (vvp)->tv_nsec += NSEC_PER_SEC; \ |
| 290 | } \ |
| 291 | } while (0) |
| 292 | #define _timernsec(tvp, nsp) do { \ |
| 293 | *(nsp) = (tvp)->tv_nsec; \ |
| 294 | if ((tvp)->tv_sec > 0) \ |
| 295 | *(nsp) += ((tvp)->tv_sec * NSEC_PER_SEC); \ |
| 296 | } while (0) |
| 297 | |
| 298 | struct nexus_adapter; |
| 299 | struct kern_pbufpool; |
| 300 | |
| 301 | extern uint32_t sk_opp_defunct; |
| 302 | extern uint32_t sk_cksum_tx; |
| 303 | extern uint32_t sk_cksum_rx; |
| 304 | extern uint32_t sk_guard; |
| 305 | extern uint32_t sk_headguard_sz; |
| 306 | extern uint32_t sk_tailguard_sz; |
| 307 | |
| 308 | #if (DEVELOPMENT || DEBUG) |
| 309 | extern uint32_t sk_txring_sz; |
| 310 | extern uint32_t sk_rxring_sz; |
| 311 | extern uint32_t sk_net_txring_sz; |
| 312 | extern uint32_t sk_net_rxring_sz; |
| 313 | #endif /* !DEVELOPMENT && !DEBUG */ |
| 314 | |
| 315 | extern uint32_t sk_max_flows; |
| 316 | extern uint32_t sk_fadv_nchunks; |
| 317 | extern uint32_t sk_netif_compat_txmodel; |
| 318 | extern uint32_t sk_netif_native_txmodel; |
| 319 | extern uint16_t sk_tx_delay_qlen; |
| 320 | extern uint16_t sk_tx_delay_timeout; |
| 321 | extern uint32_t sk_netif_compat_aux_cell_tx_ring_sz; |
| 322 | extern uint32_t sk_netif_compat_aux_cell_rx_ring_sz; |
| 323 | extern uint32_t sk_netif_compat_wap_tx_ring_sz; |
| 324 | extern uint32_t sk_netif_compat_wap_rx_ring_sz; |
| 325 | extern uint32_t sk_netif_compat_awdl_tx_ring_sz; |
| 326 | extern uint32_t sk_netif_compat_awdl_rx_ring_sz; |
| 327 | extern uint32_t sk_netif_compat_wif_tx_ring_sz; |
| 328 | extern uint32_t sk_netif_compat_wif_rx_ring_sz; |
| 329 | extern uint32_t sk_netif_compat_usb_eth_tx_ring_sz; |
| 330 | extern uint32_t sk_netif_compat_usb_eth_rx_ring_sz; |
| 331 | extern int sk_netif_compat_rx_mbq_limit; |
| 332 | extern char sk_ll_prefix[IFNAMSIZ]; |
| 333 | extern uint32_t sk_fsw_rx_agg_tcp; |
| 334 | extern uint32_t sk_fsw_tx_agg_tcp; |
| 335 | extern uint32_t sk_fsw_gso_mtu; |
| 336 | |
| 337 | typedef enum fsw_rx_agg_tcp_host { |
| 338 | SK_FSW_RX_AGG_TCP_HOST_OFF = 0, |
| 339 | SK_FSW_RX_AGG_TCP_HOST_ON = 1, |
| 340 | SK_FSW_RX_AGG_TCP_HOST_AUTO |
| 341 | } fsw_rx_agg_tcp_host_t; |
| 342 | extern uint32_t sk_fsw_rx_agg_tcp_host; |
| 343 | extern uint32_t sk_fsw_max_bufs; |
| 344 | |
| 345 | typedef enum netif_mit_cfg { |
| 346 | SK_NETIF_MIT_FORCE_OFF = 0, /* force mitigation OFF */ |
| 347 | SK_NETIF_MIT_FORCE_SIMPLE, /* force mitigation ON (simple) */ |
| 348 | SK_NETIF_MIT_FORCE_ADVANCED, /* force mitigation ON (advanced) */ |
| 349 | SK_NETIF_MIT_AUTO, /* automatic (default) */ |
| 350 | SK_NETIF_MIT_MAX = SK_NETIF_MIT_AUTO, |
| 351 | } netif_mit_cfg_t; |
| 352 | extern uint32_t sk_netif_tx_mit; |
| 353 | extern uint32_t sk_netif_rx_mit; |
| 354 | extern uint32_t sk_rx_sync_packets; |
| 355 | extern uint32_t sk_channel_buflet_alloc; |
| 356 | extern uint32_t sk_min_pool_size; |
| 357 | |
| 358 | struct sk_protect; |
| 359 | typedef const struct sk_protect *__single sk_protect_t; |
| 360 | |
| 361 | __attribute__((always_inline)) |
| 362 | static inline boolean_t |
| 363 | sk_is_sync_protected(void) |
| 364 | { |
| 365 | return net_thread_is_marked(NET_THREAD_CHANNEL_SYNC) != 0; |
| 366 | } |
| 367 | |
| 368 | __attribute__((always_inline)) |
| 369 | static inline sk_protect_t |
| 370 | sk_sync_protect(void) |
| 371 | { |
| 372 | return (sk_protect_t)(const void *) |
| 373 | net_thread_marks_push(NET_THREAD_CHANNEL_SYNC); |
| 374 | } |
| 375 | |
| 376 | |
| 377 | __attribute__((always_inline)) |
| 378 | static inline boolean_t |
| 379 | sk_is_rx_notify_protected(void) |
| 380 | { |
| 381 | return net_thread_is_marked(NET_THREAD_RX_NOTIFY) != 0; |
| 382 | } |
| 383 | |
| 384 | __attribute__((always_inline)) |
| 385 | static inline sk_protect_t |
| 386 | sk_rx_notify_protect(void) |
| 387 | { |
| 388 | return (sk_protect_t)(const void *) |
| 389 | net_thread_marks_push(NET_THREAD_RX_NOTIFY); |
| 390 | } |
| 391 | |
| 392 | __attribute__((always_inline)) |
| 393 | static inline sk_protect_t |
| 394 | sk_tx_notify_protect(void) |
| 395 | { |
| 396 | return (sk_protect_t)(const void *) |
| 397 | net_thread_marks_push(NET_THREAD_TX_NOTIFY); |
| 398 | } |
| 399 | |
| 400 | __attribute__((always_inline)) |
| 401 | static inline boolean_t |
| 402 | sk_is_tx_notify_protected(void) |
| 403 | { |
| 404 | return net_thread_is_marked(NET_THREAD_TX_NOTIFY) != 0; |
| 405 | } |
| 406 | |
| 407 | __attribute__((always_inline)) |
| 408 | static inline boolean_t |
| 409 | sk_is_cache_update_protected(void) |
| 410 | { |
| 411 | return net_thread_is_marked(NET_THREAD_CACHE_UPDATE) != 0; |
| 412 | } |
| 413 | |
| 414 | __attribute__((always_inline)) |
| 415 | static inline sk_protect_t |
| 416 | sk_cache_update_protect(void) |
| 417 | { |
| 418 | return (sk_protect_t)(const void *) |
| 419 | net_thread_marks_push(NET_THREAD_CACHE_UPDATE); |
| 420 | } |
| 421 | |
| 422 | __attribute__((always_inline)) |
| 423 | static inline boolean_t |
| 424 | sk_is_region_update_protected(void) |
| 425 | { |
| 426 | return net_thread_is_marked(NET_THREAD_REGION_UPDATE) != 0; |
| 427 | } |
| 428 | |
| 429 | __attribute__((always_inline)) |
| 430 | static inline sk_protect_t |
| 431 | sk_region_update_protect(void) |
| 432 | { |
| 433 | return (sk_protect_t)(const void *) |
| 434 | net_thread_marks_push(NET_THREAD_REGION_UPDATE); |
| 435 | } |
| 436 | |
| 437 | __attribute__((always_inline)) |
| 438 | static inline boolean_t |
| 439 | sk_is_async_transmit_protected(void) |
| 440 | { |
| 441 | return net_thread_is_marked(NET_THREAD_AYSYNC_TX) != 0; |
| 442 | } |
| 443 | |
| 444 | __attribute__((always_inline)) |
| 445 | static inline sk_protect_t |
| 446 | sk_async_transmit_protect(void) |
| 447 | { |
| 448 | return (sk_protect_t)(const void *) |
| 449 | net_thread_marks_push(NET_THREAD_AYSYNC_TX); |
| 450 | } |
| 451 | |
| 452 | #define sk_sync_unprotect sk_unprotect |
| 453 | #define sk_cache_update_unprotect sk_unprotect |
| 454 | #define sk_region_update_unprotect sk_unprotect |
| 455 | #define sk_tx_notify_unprotect sk_unprotect |
| 456 | #define sk_async_transmit_unprotect sk_unprotect |
| 457 | |
| 458 | __attribute__((always_inline)) |
| 459 | static inline void |
| 460 | sk_unprotect(sk_protect_t protect) |
| 461 | { |
| 462 | net_thread_marks_pop((net_thread_marks_t)(const void*)protect); |
| 463 | } |
| 464 | |
| 465 | |
| 466 | |
| 467 | /* |
| 468 | * For sysctls that allocate a buffer to fill then copyout at completion, |
| 469 | * set an upper bound on the size of the buffer we'll allocate. |
| 470 | */ |
| 471 | #define SK_SYSCTL_ALLOC_MAX ((size_t)(100 * 1024 * 1024)) |
| 472 | |
| 473 | #if (DEVELOPMENT || DEBUG) |
| 474 | typedef void (*_null_func_t)(void); |
| 475 | #define null_func ((_null_func_t)NULL) |
| 476 | |
| 477 | extern uint32_t sk_inject_error_rmask; |
| 478 | #define _SK_INJECT_ERROR(_ie, _en, _ev, _ec, _ej, _f, ...) do { \ |
| 479 | if (__improbable(((_ie) & (1ULL << (_en))) != 0)) { \ |
| 480 | if ((random() & sk_inject_error_rmask) != \ |
| 481 | sk_inject_error_rmask) \ |
| 482 | break; \ |
| 483 | if ((_ej) != NULL) (*(_ej))++; \ |
| 484 | SK_DF(SK_VERB_ERROR_INJECT, "injecting error %d", (_en));\ |
| 485 | if ((_f) != NULL) \ |
| 486 | (_f)(__VA_ARGS__); \ |
| 487 | (_ev) = (_ec); \ |
| 488 | } \ |
| 489 | } while (0) |
| 490 | #else |
| 491 | #define _SK_INJECT_ERROR(_en, _ev, _ec, _f, ...) |
| 492 | #endif /* DEVELOPMENT || DEBUG */ |
| 493 | |
| 494 | __BEGIN_DECLS |
| 495 | extern int skywalk_init(void); |
| 496 | extern int skywalk_priv_check_cred(proc_t, kauth_cred_t, int); |
| 497 | extern int skywalk_priv_check_proc_cred(proc_t, int); |
| 498 | #if CONFIG_MACF |
| 499 | extern int skywalk_mac_system_check_proc_cred(proc_t, const char *); |
| 500 | #endif /* CONFIG_MACF */ |
| 501 | extern int skywalk_nxctl_check_privileges(proc_t, kauth_cred_t); |
| 502 | extern boolean_t skywalk_check_platform_binary(proc_t); |
| 503 | extern boolean_t skywalk_netif_direct_allowed(const char *); |
| 504 | extern boolean_t skywalk_netif_direct_enabled(void); |
| 505 | extern void sk_gen_guard_id(boolean_t, const uuid_t, guardid_t *); |
| 506 | extern const char *sk_uuid_unparse(const uuid_t, uuid_string_t); |
| 507 | #if SK_LOG |
| 508 | extern const char *sk_dump(const char *label, const void *__sized_by(len)obj, |
| 509 | int len, int dumplen, char *__counted_by(lim)dst, int lim); |
| 510 | extern const char *sk_proc_name_address(struct proc *); |
| 511 | extern int sk_proc_pid(struct proc *); |
| 512 | extern const char *sk_sa_ntop(struct sockaddr *, char *, size_t); |
| 513 | extern const char *sk_memstatus2str(uint32_t); |
| 514 | #endif /* SK_LOG */ |
| 515 | |
| 516 | extern bool sk_sa_has_addr(struct sockaddr *sa); |
| 517 | extern bool sk_sa_has_port(struct sockaddr *sa); |
| 518 | extern uint16_t sk_sa_get_port(struct sockaddr *sa); |
| 519 | |
| 520 | extern void skywalk_kill_process(struct proc *, uint64_t); |
| 521 | |
| 522 | enum skywalk_kill_reason { |
| 523 | SKYWALK_KILL_REASON_GENERIC = 0, |
| 524 | SKYWALK_KILL_REASON_HEAD_OOB, |
| 525 | SKYWALK_KILL_REASON_HEAD_OOB_WRAPPED, |
| 526 | SKYWALK_KILL_REASON_CUR_OOB, |
| 527 | SKYWALK_KILL_REASON_CUR_OOB_WRAPPED_1, |
| 528 | SKYWALK_KILL_REASON_CUR_OOB_WRAPPED_2, |
| 529 | SKYWALK_KILL_REASON_TAIL_MISMATCH, |
| 530 | SKYWALK_KILL_REASON_BASIC_SANITY, |
| 531 | SKYWALK_KILL_REASON_UNALLOCATED_PKT, |
| 532 | SKYWALK_KILL_REASON_SLOT_NOT_DETACHED, |
| 533 | SKYWALK_KILL_REASON_QUM_IDX_MISMATCH, |
| 534 | SKYWALK_KILL_REASON_SYNC_FAILED, |
| 535 | SKYWALK_KILL_REASON_INCONSISTENT_READY_BYTES, |
| 536 | SKYWALK_KILL_REASON_BAD_BUFLET_CHAIN, |
| 537 | SKYWALK_KILL_REASON_INTERNALIZE_FAILED, |
| 538 | }; |
| 539 | |
| 540 | #define SKYWALK_KILL_REASON_TX_SYNC 0x0000000000000000ULL |
| 541 | #define SKYWALK_KILL_REASON_EVENT_SYNC 0x1000000000000000ULL |
| 542 | #define SKYWALK_KILL_REASON_FREE_SYNC 0x2000000000000000ULL |
| 543 | #define SKYWALK_KILL_REASON_ALLOC_SYNC 0x4000000000000000ULL |
| 544 | #define SKYWALK_KILL_REASON_RX_SYNC 0x8000000000000000ULL |
| 545 | |
| 546 | /* for convenience */ |
| 547 | extern char *proc_name_address(void *p); |
| 548 | |
| 549 | /* |
| 550 | * skoid is the glue that holds the Skywalk struct model and sysctl properties |
| 551 | * together. It's supposed to be embedded in other Skywalk struct, for instance |
| 552 | * channel, nexus, etc. skoid can holds variable number of properties, which |
| 553 | * is automatically made available to the sysctl interface under the parent |
| 554 | * skoid sysctl node. |
| 555 | * |
| 556 | * The embedding struct should call skoid_create, which does the initialization |
| 557 | * and registration of the associated sysctl_oid under the parent node. All |
| 558 | * first level dynamic skoid nodes must hang under static sysctl nodes defined |
| 559 | * with traditional SYSCTL_NODE macro in linker set. |
| 560 | * skoid_create(1st_level_skoid, skoid_SNODE(_linker_sysctl), name, kind) |
| 561 | * |
| 562 | * The fields in embedding skoid can be expressed as properties of the skoid, |
| 563 | * or separate skoid, depending on the model. If the field is of primitive |
| 564 | * types, then properties should be used. If the field is of compound types |
| 565 | * (e.g. struct), another layer of skoid might be created under the parent. |
| 566 | * |
| 567 | * To add properties to the skoid, call one of the skoid_add_* functions. |
| 568 | * skoid_add_int(&skoid, name, flags, int_ptr) |
| 569 | * To add another skoid as child of a skoid, allocate and call skoid_create |
| 570 | * with the skoid_DNODE(parent_skoid) as parent argument. |
| 571 | * skoid_create(2+_level_skoid, skoid_DNODE(parent_skoid), name, kind) |
| 572 | * |
| 573 | * About life cycle: the embedding struct of skoid must outlive the skoid. |
| 574 | * skoid itself store a cached name, so there is no restriction of the name |
| 575 | * buffer life cycle. Property name should be a const string or string with |
| 576 | * longer life cycle than the skoid. Most often, the skoid has a variable name |
| 577 | * reflecting the Skywalk struct name (e.g. "ms.en0", while the properties has |
| 578 | * a fixed name same as the struct member variable name. |
| 579 | * |
| 580 | * Please use caution regarding access control of skoid properties. |
| 581 | */ |
| 582 | #define SKOID_SNODE(static_parent) (&(sysctl_##static_parent##_children)) |
| 583 | #define SKOID_DNODE(dynamic_parent) (&(dynamic_parent.sko_oid_list)) |
| 584 | #define SKOID_NAME_SIZE 32 |
| 585 | |
| 586 | struct skoid { |
| 587 | struct sysctl_oid_list sko_oid_list; /* self sko_oid & properties */ |
| 588 | struct sysctl_oid sko_oid; /* self sysctl oid storage */ |
| 589 | char sko_name[SKOID_NAME_SIZE]; /* skoid name */ |
| 590 | }; |
| 591 | |
| 592 | extern void skoid_init(void); |
| 593 | extern void skoid_create(struct skoid *skoid, struct sysctl_oid_list *parent, |
| 594 | const char *name, int kind); |
| 595 | extern void skoid_add_int(struct skoid *skoid, const char *name, int flags, |
| 596 | int *ptr); |
| 597 | extern void skoid_add_uint(struct skoid *skoid, const char *name, int flags, |
| 598 | unsigned int *ptr); |
| 599 | extern void skoid_add_handler(struct skoid *skoid, const char *name, int kind, |
| 600 | int (*handler)SYSCTL_HANDLER_ARGS, void *arg1, int arg2); |
| 601 | extern void skoid_destroy(struct skoid *skoid); |
| 602 | |
| 603 | /* |
| 604 | * To avoid accidentally invoking skoid procedure by `sysctl` tool, use this |
| 605 | * macro as guard, so proc is only called with a parameter, e.g. |
| 606 | * sysctl <skoid_proc_name>=1 |
| 607 | */ |
| 608 | #define SKOID_PROC_CALL_GUARD do { \ |
| 609 | if (req->newptr == USER_ADDR_NULL) \ |
| 610 | return (0); \ |
| 611 | } while (0) |
| 612 | |
| 613 | extern kern_allocation_name_t skmem_tag_oid; |
| 614 | extern kern_allocation_name_t skmem_tag_sysctl_buf; |
| 615 | |
| 616 | __END_DECLS |
| 617 | #endif /* BSD_KERNEL_PRIVATE */ |
| 618 | #endif /* _SKYWALK_VAR_H_ */ |
| 619 | |