| 1 | /* |
| 2 | * Copyright (c) 2021 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | /* |
| 30 | * The flowidns (Flow ID namespace) module provides functionality to allocate |
| 31 | * globally unique identifier for a flow. |
| 32 | * Currently we have four modules (flowswitch, inpcb, PF & IPSec driver) in our |
| 33 | * stack which need to generate flow identifiers. These modules stamp every |
| 34 | * outgoing packet with a flowID. This flowID can be used by other upstream |
| 35 | * components in the device for flow classification purpose. For example, the |
| 36 | * FQ-Codel algorithm relies on this per packet flowID to avoid parsing every |
| 37 | * packet header for flow classification. A globally unique flowID can also be |
| 38 | * used by the networking feature offload engines operating at link layer to |
| 39 | * avoid flow classification operations. |
| 40 | * For performance reasons we use the concept of a flow domain and the |
| 41 | * data structures used by the flowidns module have per domain instance. |
| 42 | * These domains represent the above mentioned four modules generating the |
| 43 | * flowID. This allows us to avoid global lock being used while allocating & |
| 44 | * releasing flowID. FlowID is a 32-bit unsigned integer and the 2 most |
| 45 | * significant bits of flowID are used to encode the domain ID. This |
| 46 | * encoding also means that the flowID generator only needs to ensure |
| 47 | * uniqueness of identifier within a domain. |
| 48 | */ |
| 49 | |
| 50 | #include <skywalk/os_skywalk.h> |
| 51 | #include <skywalk/os_skywalk_private.h> |
| 52 | #include <skywalk/namespace/flowidns.h> |
| 53 | #include <dev/random/randomdev.h> |
| 54 | #include <sys/sdt.h> |
| 55 | |
| 56 | /* maximum number of flowID generation retries in case of collision */ |
| 57 | #define FLOWIDNS_MAX_FLOWID_GEN_RETRY 5 |
| 58 | |
| 59 | /* 2 most significant bits of the flowID are used to encode the flow domain */ |
| 60 | #define FLOWIDNS_FLOWID_DOMAIN_SHIFT 30 |
| 61 | #define FLOWIDNS_FLOWID_DOMAIN_MASK (0x03 << FLOWIDNS_FLOWID_DOMAIN_SHIFT) |
| 62 | |
| 63 | #define FLOWIDNS_FLOWID_SET_DOMAIN(_dom, _fid) do { \ |
| 64 | (_fid) &= ~FLOWIDNS_FLOWID_DOMAIN_MASK; \ |
| 65 | (_fid) |= ((_dom) << FLOWIDNS_FLOWID_DOMAIN_SHIFT); \ |
| 66 | } while (0) |
| 67 | |
| 68 | #define FLOWIDNS_FLOWID_GET_DOMAIN(_dom, _fid) do { \ |
| 69 | (_dom) = (_fid) >> FLOWIDNS_FLOWID_DOMAIN_SHIFT; \ |
| 70 | } while (0) |
| 71 | |
| 72 | #define FLOWIDNS_DOM_LOCK(_dom) \ |
| 73 | lck_mtx_lock(&(flowidns_domain_array[(_dom)].fd_mtx)) |
| 74 | #define FLOWIDNS_DOM_UNLOCK(_dom) \ |
| 75 | lck_mtx_unlock(&(flowidns_domain_array[(_dom)].fd_mtx)) |
| 76 | |
| 77 | struct flowidns_flowid_tree_node { |
| 78 | RB_ENTRY(flowidns_flowid_tree_node) fftn_link; |
| 79 | struct flowidns_flow_key fftn_flowkey; |
| 80 | flowidns_flowid_t fftn_flowid; |
| 81 | }; |
| 82 | |
| 83 | static LCK_GRP_DECLARE(flowidns_lock_group, "flowidns_lock" ); |
| 84 | static int __flowidns_inited = 0; |
| 85 | |
| 86 | static SKMEM_TYPE_DEFINE(flowidns_fftn_zone, struct flowidns_flowid_tree_node); |
| 87 | |
| 88 | __attribute__((always_inline)) |
| 89 | static inline int |
| 90 | fftn_cmp(const struct flowidns_flowid_tree_node *fftn1, |
| 91 | const struct flowidns_flowid_tree_node *fftn2) |
| 92 | { |
| 93 | return (signed)(fftn1->fftn_flowid - fftn2->fftn_flowid); |
| 94 | } |
| 95 | |
| 96 | RB_HEAD(flowidns_flowid_tree, flowidns_flowid_tree_node); |
| 97 | RB_PROTOTYPE(flowidns_flowid_tree, flowidns_flowid_tree_node, fftn_link, |
| 98 | fftn_cmp); |
| 99 | RB_GENERATE(flowidns_flowid_tree, flowidns_flowid_tree_node, fftn_link, |
| 100 | fftn_cmp); |
| 101 | |
| 102 | struct flowidns_domain { |
| 103 | decl_lck_mtx_data(, fd_mtx); |
| 104 | struct flowidns_flowid_tree fd_flowid_tree; |
| 105 | uint32_t fd_id; |
| 106 | uint64_t fd_nallocs; |
| 107 | uint64_t fd_nreleases; |
| 108 | uint64_t fd_ncollisions; |
| 109 | }; |
| 110 | |
| 111 | static struct flowidns_domain flowidns_domain_array[FLOWIDNS_DOMAIN_MAX + 1]; |
| 112 | |
| 113 | static struct flowidns_flowid_tree_node * |
| 114 | flowidns_fftn_alloc(bool can_block) |
| 115 | { |
| 116 | struct flowidns_flowid_tree_node *fftn = NULL; |
| 117 | zalloc_flags_t zflags; |
| 118 | |
| 119 | zflags = can_block ? Z_WAITOK_ZERO : Z_NOWAIT_ZERO; |
| 120 | fftn = zalloc_flags(flowidns_fftn_zone, zflags); |
| 121 | return fftn; |
| 122 | } |
| 123 | |
| 124 | static void |
| 125 | flowidns_fftn_free(struct flowidns_flowid_tree_node *fftn) |
| 126 | { |
| 127 | zfree(flowidns_fftn_zone, fftn); |
| 128 | } |
| 129 | |
| 130 | static struct flowidns_flowid_tree_node * |
| 131 | flowidns_find_fftn(flowidns_flowid_t flowid, flowidns_domain_id_t domain) |
| 132 | { |
| 133 | struct flowidns_flowid_tree_node find = { .fftn_flowid = flowid }; |
| 134 | |
| 135 | return RB_FIND(flowidns_flowid_tree, |
| 136 | &(flowidns_domain_array[domain].fd_flowid_tree), &find); |
| 137 | } |
| 138 | |
| 139 | void |
| 140 | flowidns_allocate_flowid(flowidns_domain_id_t domain, |
| 141 | struct flowidns_flow_key *pflow_key, flowidns_flowid_t *pflowid) |
| 142 | { |
| 143 | struct flowidns_flowid_tree_node *fftn = NULL, *dup = NULL; |
| 144 | uint32_t flowid = 0; |
| 145 | int retry_cnt = 0; |
| 146 | |
| 147 | VERIFY(__flowidns_inited == 1); |
| 148 | VERIFY(pflowid != NULL); |
| 149 | VERIFY(pflow_key != NULL); |
| 150 | VERIFY(domain >= FLOWIDNS_DOMAIN_MIN && |
| 151 | domain <= FLOWIDNS_DOMAIN_MAX); |
| 152 | |
| 153 | FLOWIDNS_DOM_LOCK(domain); |
| 154 | |
| 155 | fftn = flowidns_fftn_alloc(true); |
| 156 | if (__improbable(fftn == NULL)) { |
| 157 | panic_plain("failed to allocate flowid node\n" ); |
| 158 | } |
| 159 | retry: |
| 160 | /* try to get a non-zero flow identifier */ |
| 161 | do { |
| 162 | read_frandom(buffer: &flowid, numBytes: sizeof(flowid)); |
| 163 | } while (__improbable(flowid == 0)); |
| 164 | |
| 165 | FLOWIDNS_FLOWID_SET_DOMAIN(domain, flowid); |
| 166 | |
| 167 | fftn->fftn_flowid = flowid; |
| 168 | fftn->fftn_flowkey = *pflow_key; |
| 169 | dup = RB_INSERT(flowidns_flowid_tree, |
| 170 | &(flowidns_domain_array[domain].fd_flowid_tree), fftn); |
| 171 | |
| 172 | /* try to get a unique flow identifier */ |
| 173 | if (dup != NULL) { |
| 174 | retry_cnt++; |
| 175 | flowidns_domain_array[domain].fd_ncollisions++; |
| 176 | SK_ERR("duplicate flowid 0x%x generated, retrying %d" , |
| 177 | flowid, retry_cnt); |
| 178 | /* |
| 179 | * safeguard to check if we need a better hash strategy. |
| 180 | */ |
| 181 | VERIFY(retry_cnt <= FLOWIDNS_MAX_FLOWID_GEN_RETRY); |
| 182 | goto retry; |
| 183 | } |
| 184 | *pflowid = flowid; |
| 185 | flowidns_domain_array[domain].fd_nallocs++; |
| 186 | VERIFY(flowidns_domain_array[domain].fd_nallocs != 0); |
| 187 | |
| 188 | FLOWIDNS_DOM_UNLOCK(domain); |
| 189 | |
| 190 | DTRACE_SKYWALK2(fidalloc, uint32_t, domain, uint32_t, flowid); |
| 191 | } |
| 192 | |
| 193 | void |
| 194 | flowidns_release_flowid(flowidns_flowid_t flowid) |
| 195 | { |
| 196 | struct flowidns_flowid_tree_node *fftn; |
| 197 | flowidns_domain_id_t domain; |
| 198 | |
| 199 | VERIFY(__flowidns_inited == 1); |
| 200 | VERIFY(flowid != 0); |
| 201 | |
| 202 | FLOWIDNS_FLOWID_GET_DOMAIN(domain, flowid); |
| 203 | VERIFY(domain >= FLOWIDNS_DOMAIN_MIN && |
| 204 | domain <= FLOWIDNS_DOMAIN_MAX); |
| 205 | |
| 206 | DTRACE_SKYWALK2(fidrel, uint32_t, domain, uint32_t, flowid); |
| 207 | |
| 208 | FLOWIDNS_DOM_LOCK(domain); |
| 209 | |
| 210 | fftn = flowidns_find_fftn(flowid, domain); |
| 211 | if (fftn == NULL) { |
| 212 | panic_plain("flowid 0x%x not found in domain %d\n" , flowid, |
| 213 | domain); |
| 214 | } |
| 215 | RB_REMOVE(flowidns_flowid_tree, |
| 216 | &(flowidns_domain_array[domain].fd_flowid_tree), fftn); |
| 217 | ASSERT(fftn->fftn_flowid == flowid); |
| 218 | flowidns_fftn_free(fftn); |
| 219 | flowidns_domain_array[domain].fd_nreleases++; |
| 220 | VERIFY(flowidns_domain_array[domain].fd_nreleases != 0); |
| 221 | |
| 222 | FLOWIDNS_DOM_UNLOCK(domain); |
| 223 | } |
| 224 | |
| 225 | int |
| 226 | flowidns_init() |
| 227 | { |
| 228 | flowidns_domain_id_t domain; |
| 229 | |
| 230 | VERIFY(__flowidns_inited == 0); |
| 231 | _CASSERT(SFH_DOMAIN_IPSEC == FLOWIDNS_DOMAIN_IPSEC); |
| 232 | _CASSERT(SFH_DOMAIN_FLOWSWITCH == FLOWIDNS_DOMAIN_FLOWSWITCH); |
| 233 | _CASSERT(SFH_DOMAIN_INPCB == FLOWIDNS_DOMAIN_INPCB); |
| 234 | _CASSERT(SFH_DOMAIN_PF == FLOWIDNS_DOMAIN_PF); |
| 235 | _CASSERT(FLOWIDNS_DOMAIN_MIN == 0); |
| 236 | /* |
| 237 | * FLOWIDNS_FLOWID_DOMAIN_{MASK, SHIFT} macros are based on below |
| 238 | * assumption. |
| 239 | */ |
| 240 | _CASSERT(FLOWIDNS_DOMAIN_MAX == 3); |
| 241 | |
| 242 | for (domain = FLOWIDNS_DOMAIN_MIN; domain <= FLOWIDNS_DOMAIN_MAX; |
| 243 | domain++) { |
| 244 | bzero(s: &flowidns_domain_array[domain], |
| 245 | n: sizeof(struct flowidns_domain)); |
| 246 | flowidns_domain_array[domain].fd_id = domain; |
| 247 | lck_mtx_init(lck: &(flowidns_domain_array[domain].fd_mtx), |
| 248 | grp: &flowidns_lock_group, NULL); |
| 249 | RB_INIT(&(flowidns_domain_array[domain].fd_flowid_tree)); |
| 250 | } |
| 251 | |
| 252 | __flowidns_inited = 1; |
| 253 | SK_D("initialized flow ID namespace" ); |
| 254 | return 0; |
| 255 | } |
| 256 | |
| 257 | void |
| 258 | flowidns_fini(void) |
| 259 | { |
| 260 | flowidns_domain_id_t domain; |
| 261 | struct flowidns_flowid_tree_node *fftn, *fftn_tmp; |
| 262 | |
| 263 | VERIFY(__flowidns_inited == 1); |
| 264 | |
| 265 | for (domain = FLOWIDNS_DOMAIN_MIN; domain <= FLOWIDNS_DOMAIN_MAX; |
| 266 | domain++) { |
| 267 | FLOWIDNS_DOM_LOCK(domain); |
| 268 | |
| 269 | RB_FOREACH_SAFE(fftn, flowidns_flowid_tree, |
| 270 | &(flowidns_domain_array[domain].fd_flowid_tree), |
| 271 | fftn_tmp) { |
| 272 | RB_REMOVE(flowidns_flowid_tree, |
| 273 | &(flowidns_domain_array[domain].fd_flowid_tree), |
| 274 | fftn); |
| 275 | flowidns_fftn_free(fftn); |
| 276 | } |
| 277 | |
| 278 | FLOWIDNS_DOM_UNLOCK(domain); |
| 279 | |
| 280 | lck_mtx_destroy(lck: &(flowidns_domain_array[domain].fd_mtx), |
| 281 | grp: &flowidns_lock_group); |
| 282 | } |
| 283 | |
| 284 | __flowidns_inited = 0; |
| 285 | } |
| 286 | |
| 287 | static int flowidns_stats_sysctl SYSCTL_HANDLER_ARGS; |
| 288 | SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flowidns, |
| 289 | CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, |
| 290 | 0, 0, flowidns_stats_sysctl, "-" , |
| 291 | "flowid allocations (struct sk_stats_flowidns_header, " |
| 292 | "skywalk/os_stats_private.h)" ); |
| 293 | |
| 294 | static int |
| 295 | flowidns_dump_domain(struct sysctl_req *req, struct flowidns_domain *domain) |
| 296 | { |
| 297 | struct flowidns_flowid_tree_node *fftn; |
| 298 | struct sk_stats_flowidns_header ; |
| 299 | struct sk_stats_flowidns_record record; |
| 300 | uint64_t n_records; |
| 301 | int err; |
| 302 | |
| 303 | /* Fill out header */ |
| 304 | memset(s: &header, c: 0, n: sizeof(header)); |
| 305 | header.sfh_domain = domain->fd_id; |
| 306 | header.sfh_nallocs = domain->fd_nallocs; |
| 307 | header.sfh_nreleases = domain->fd_nreleases; |
| 308 | header.sfh_ncollisions = domain->fd_ncollisions; |
| 309 | n_records = domain->fd_nallocs - domain->fd_nreleases; |
| 310 | VERIFY(n_records <= UINT32_MAX); |
| 311 | header.sfh_nrecords = (uint32_t)n_records; |
| 312 | |
| 313 | err = SYSCTL_OUT(req, &header, sizeof(header)); |
| 314 | if (err) { |
| 315 | return err; |
| 316 | } |
| 317 | |
| 318 | /* Fill out records */ |
| 319 | RB_FOREACH(fftn, flowidns_flowid_tree, &domain->fd_flowid_tree) { |
| 320 | VERIFY(n_records > 0); |
| 321 | n_records--; |
| 322 | bzero(s: &record, n: sizeof(record)); |
| 323 | record.sfr_flowid = fftn->fftn_flowid; |
| 324 | record.sfr_af = fftn->fftn_flowkey.ffk_af; |
| 325 | record.sfr_ipproto = fftn->fftn_flowkey.ffk_proto; |
| 326 | record.sfr_protoid = fftn->fftn_flowkey.ffk_protoid; |
| 327 | _CASSERT(sizeof(fftn->fftn_flowkey.ffk_laddr) == |
| 328 | sizeof(record.sfr_laddr)); |
| 329 | _CASSERT(sizeof(fftn->fftn_flowkey.ffk_raddr) == |
| 330 | sizeof(record.sfr_raddr)); |
| 331 | bcopy(src: &(fftn->fftn_flowkey.ffk_laddr), dst: &record.sfr_laddr, |
| 332 | n: sizeof(record.sfr_laddr)); |
| 333 | bcopy(src: &(fftn->fftn_flowkey.ffk_raddr), dst: &record.sfr_raddr, |
| 334 | n: sizeof(record.sfr_raddr)); |
| 335 | |
| 336 | err = SYSCTL_OUT(req, &record, sizeof(record)); |
| 337 | if (err) { |
| 338 | return err; |
| 339 | } |
| 340 | } |
| 341 | VERIFY(n_records == 0); |
| 342 | return 0; |
| 343 | } |
| 344 | |
| 345 | static int |
| 346 | flowidns_stats_sysctl SYSCTL_HANDLER_ARGS |
| 347 | { |
| 348 | #pragma unused(oidp, arg1, arg2) |
| 349 | flowidns_domain_id_t domain; |
| 350 | int err = 0; |
| 351 | |
| 352 | if (!kauth_cred_issuser(cred: kauth_cred_get())) { |
| 353 | return EPERM; |
| 354 | } |
| 355 | |
| 356 | if (__flowidns_inited == 0) { |
| 357 | return ENOTSUP; |
| 358 | } |
| 359 | |
| 360 | net_update_uptime(); |
| 361 | |
| 362 | for (domain = FLOWIDNS_DOMAIN_MIN; domain <= FLOWIDNS_DOMAIN_MAX; |
| 363 | domain++) { |
| 364 | FLOWIDNS_DOM_LOCK(domain); |
| 365 | err = flowidns_dump_domain(req, domain: &flowidns_domain_array[domain]); |
| 366 | FLOWIDNS_DOM_UNLOCK(domain); |
| 367 | if (err != 0) { |
| 368 | return err; |
| 369 | } |
| 370 | } |
| 371 | /* |
| 372 | * If this is just a request for length, add slop because |
| 373 | * this is dynamically changing data |
| 374 | */ |
| 375 | if (req->oldptr == USER_ADDR_NULL) { |
| 376 | req->oldidx += 20 * sizeof(struct sk_stats_flowidns_record); |
| 377 | } |
| 378 | return err; |
| 379 | } |
| 380 | |