| 1 | /* |
| 2 | * Copyright (c) 2016-2022 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | #include <skywalk/os_skywalk_private.h> |
| 30 | |
| 31 | #include <dev/random/randomdev.h> |
| 32 | #include <net/flowhash.h> |
| 33 | #include <netkey/key.h> |
| 34 | |
| 35 | #include <skywalk/nexus/flowswitch/fsw_var.h> |
| 36 | #include <skywalk/nexus/flowswitch/flow/flow_var.h> |
| 37 | #include <skywalk/nexus/netif/nx_netif.h> |
| 38 | #include <skywalk/namespace/flowidns.h> |
| 39 | |
| 40 | struct flow_entry *fe_alloc(boolean_t); |
| 41 | static void fe_free(struct flow_entry *); |
| 42 | static int fe_id_cmp(const struct flow_entry *, const struct flow_entry *); |
| 43 | static void fe_stats_init(struct flow_entry *); |
| 44 | static void fe_stats_update(struct flow_entry *); |
| 45 | |
| 46 | RB_GENERATE_PREV(flow_entry_id_tree, flow_entry, fe_id_link, fe_id_cmp); |
| 47 | |
| 48 | os_refgrp_decl(static, flow_entry_refgrp, "flow_entry" , NULL); |
| 49 | |
| 50 | KALLOC_TYPE_DECLARE(sk_fed_zone); |
| 51 | |
| 52 | const struct flow_key fk_mask_2tuple |
| 53 | __sk_aligned(16) = |
| 54 | { |
| 55 | .fk_mask = FKMASK_2TUPLE, |
| 56 | .fk_ipver = 0, |
| 57 | .fk_proto = 0xff, |
| 58 | .fk_sport = 0xffff, |
| 59 | .fk_dport = 0, |
| 60 | .fk_src._addr64[0] = 0, |
| 61 | .fk_src._addr64[1] = 0, |
| 62 | .fk_dst._addr64[0] = 0, |
| 63 | .fk_dst._addr64[1] = 0, |
| 64 | .fk_pad[0] = 0, |
| 65 | }; |
| 66 | |
| 67 | const struct flow_key fk_mask_3tuple |
| 68 | __sk_aligned(16) = |
| 69 | { |
| 70 | .fk_mask = FKMASK_3TUPLE, |
| 71 | .fk_ipver = 0xff, |
| 72 | .fk_proto = 0xff, |
| 73 | .fk_sport = 0xffff, |
| 74 | .fk_dport = 0, |
| 75 | .fk_src._addr64[0] = 0xffffffffffffffffULL, |
| 76 | .fk_src._addr64[1] = 0xffffffffffffffffULL, |
| 77 | .fk_dst._addr64[0] = 0, |
| 78 | .fk_dst._addr64[1] = 0, |
| 79 | .fk_pad[0] = 0, |
| 80 | }; |
| 81 | |
| 82 | const struct flow_key fk_mask_4tuple |
| 83 | __sk_aligned(16) = |
| 84 | { |
| 85 | .fk_mask = FKMASK_4TUPLE, |
| 86 | .fk_ipver = 0xff, |
| 87 | .fk_proto = 0xff, |
| 88 | .fk_sport = 0xffff, |
| 89 | .fk_dport = 0xffff, |
| 90 | .fk_src._addr64[0] = 0xffffffffffffffffULL, |
| 91 | .fk_src._addr64[1] = 0xffffffffffffffffULL, |
| 92 | .fk_dst._addr64[0] = 0, |
| 93 | .fk_dst._addr64[1] = 0, |
| 94 | .fk_pad[0] = 0, |
| 95 | }; |
| 96 | |
| 97 | const struct flow_key fk_mask_5tuple |
| 98 | __sk_aligned(16) = |
| 99 | { |
| 100 | .fk_mask = FKMASK_5TUPLE, |
| 101 | .fk_ipver = 0xff, |
| 102 | .fk_proto = 0xff, |
| 103 | .fk_sport = 0xffff, |
| 104 | .fk_dport = 0xffff, |
| 105 | .fk_src._addr64[0] = 0xffffffffffffffffULL, |
| 106 | .fk_src._addr64[1] = 0xffffffffffffffffULL, |
| 107 | .fk_dst._addr64[0] = 0xffffffffffffffffULL, |
| 108 | .fk_dst._addr64[1] = 0xffffffffffffffffULL, |
| 109 | .fk_pad[0] = 0, |
| 110 | }; |
| 111 | |
| 112 | const struct flow_key fk_mask_ipflow1 |
| 113 | __sk_aligned(16) = |
| 114 | { |
| 115 | .fk_mask = FKMASK_IPFLOW1, |
| 116 | .fk_ipver = 0, |
| 117 | .fk_proto = 0xff, |
| 118 | .fk_sport = 0, |
| 119 | .fk_dport = 0, |
| 120 | .fk_src._addr64[0] = 0, |
| 121 | .fk_src._addr64[1] = 0, |
| 122 | .fk_dst._addr64[0] = 0, |
| 123 | .fk_dst._addr64[1] = 0, |
| 124 | .fk_pad[0] = 0, |
| 125 | }; |
| 126 | |
| 127 | const struct flow_key fk_mask_ipflow2 |
| 128 | __sk_aligned(16) = |
| 129 | { |
| 130 | .fk_mask = FKMASK_IPFLOW2, |
| 131 | .fk_ipver = 0xff, |
| 132 | .fk_proto = 0xff, |
| 133 | .fk_sport = 0, |
| 134 | .fk_dport = 0, |
| 135 | .fk_src._addr64[0] = 0xffffffffffffffffULL, |
| 136 | .fk_src._addr64[1] = 0xffffffffffffffffULL, |
| 137 | .fk_dst._addr64[0] = 0, |
| 138 | .fk_dst._addr64[1] = 0, |
| 139 | .fk_pad[0] = 0, |
| 140 | }; |
| 141 | |
| 142 | const struct flow_key fk_mask_ipflow3 |
| 143 | __sk_aligned(16) = |
| 144 | { |
| 145 | .fk_mask = FKMASK_IPFLOW3, |
| 146 | .fk_ipver = 0xff, |
| 147 | .fk_proto = 0xff, |
| 148 | .fk_sport = 0, |
| 149 | .fk_dport = 0, |
| 150 | .fk_src._addr64[0] = 0xffffffffffffffffULL, |
| 151 | .fk_src._addr64[1] = 0xffffffffffffffffULL, |
| 152 | .fk_dst._addr64[0] = 0xffffffffffffffffULL, |
| 153 | .fk_dst._addr64[1] = 0xffffffffffffffffULL, |
| 154 | .fk_pad[0] = 0, |
| 155 | }; |
| 156 | |
| 157 | struct flow_owner * |
| 158 | flow_owner_find_by_pid(struct flow_owner_bucket *fob, pid_t pid, void *context, |
| 159 | bool low_latency) |
| 160 | { |
| 161 | struct flow_owner find = { .fo_context = context, .fo_pid = pid, |
| 162 | .fo_low_latency = low_latency}; |
| 163 | |
| 164 | ASSERT(low_latency == true || low_latency == false); |
| 165 | FOB_LOCK_ASSERT_HELD(fob); |
| 166 | return RB_FIND(flow_owner_tree, &fob->fob_owner_head, &find); |
| 167 | } |
| 168 | |
| 169 | struct flow_entry * |
| 170 | flow_entry_find_by_uuid(struct flow_owner *fo, uuid_t uuid) |
| 171 | { |
| 172 | struct flow_entry find, *fe = NULL; |
| 173 | FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo)); |
| 174 | |
| 175 | uuid_copy(dst: find.fe_uuid, src: uuid); |
| 176 | fe = RB_FIND(flow_entry_id_tree, &fo->fo_flow_entry_id_head, &find); |
| 177 | if (fe != NULL) { |
| 178 | flow_entry_retain(fe); |
| 179 | } |
| 180 | |
| 181 | return fe; |
| 182 | } |
| 183 | |
| 184 | static uint32_t |
| 185 | flow_entry_calc_flowid(struct flow_entry *fe) |
| 186 | { |
| 187 | uint32_t flowid; |
| 188 | struct flowidns_flow_key fk; |
| 189 | |
| 190 | bzero(s: &fk, n: sizeof(fk)); |
| 191 | _CASSERT(sizeof(fe->fe_key.fk_src) == sizeof(fk.ffk_laddr)); |
| 192 | _CASSERT(sizeof(fe->fe_key.fk_dst) == sizeof(fk.ffk_raddr)); |
| 193 | bcopy(src: &fe->fe_key.fk_src, dst: &fk.ffk_laddr, n: sizeof(fk.ffk_laddr)); |
| 194 | bcopy(src: &fe->fe_key.fk_dst, dst: &fk.ffk_raddr, n: sizeof(fk.ffk_raddr)); |
| 195 | |
| 196 | fk.ffk_lport = fe->fe_key.fk_sport; |
| 197 | fk.ffk_rport = fe->fe_key.fk_dport; |
| 198 | fk.ffk_af = (fe->fe_key.fk_ipver == 4) ? AF_INET : AF_INET6; |
| 199 | fk.ffk_proto = fe->fe_key.fk_proto; |
| 200 | |
| 201 | flowidns_allocate_flowid(domain: FLOWIDNS_DOMAIN_FLOWSWITCH, flow_key: &fk, flowid: &flowid); |
| 202 | return flowid; |
| 203 | } |
| 204 | |
| 205 | static bool |
| 206 | flow_entry_add_child(struct flow_entry *parent_fe, struct flow_entry *child_fe) |
| 207 | { |
| 208 | SK_LOG_VAR(char dbgbuf[FLOWENTRY_DBGBUF_SIZE]); |
| 209 | ASSERT(parent_fe->fe_flags & FLOWENTF_PARENT); |
| 210 | |
| 211 | lck_rw_lock_exclusive(lck: &parent_fe->fe_child_list_lock); |
| 212 | |
| 213 | if (parent_fe->fe_flags & FLOWENTF_NONVIABLE) { |
| 214 | SK_ERR("child entry add failed, parent fe \"%s\" non viable 0x%llx " |
| 215 | "flags 0x%b %s(%d)" , fe_as_string(parent_fe, |
| 216 | dbgbuf, sizeof(dbgbuf)), SK_KVA(parent_fe), parent_fe->fe_flags, |
| 217 | FLOWENTF_BITS, parent_fe->fe_proc_name, |
| 218 | parent_fe->fe_pid); |
| 219 | lck_rw_unlock_exclusive(lck: &parent_fe->fe_child_list_lock); |
| 220 | return false; |
| 221 | } |
| 222 | |
| 223 | struct flow_entry *fe, *tfe; |
| 224 | TAILQ_FOREACH_SAFE(fe, &parent_fe->fe_child_list, fe_child_link, tfe) { |
| 225 | if (!fe_id_cmp(fe, child_fe)) { |
| 226 | lck_rw_unlock_exclusive(lck: &parent_fe->fe_child_list_lock); |
| 227 | SK_ERR("child entry \"%s\" already exists at fe 0x%llx " |
| 228 | "flags 0x%b %s(%d)" , fe_as_string(fe, |
| 229 | dbgbuf, sizeof(dbgbuf)), SK_KVA(fe), fe->fe_flags, |
| 230 | FLOWENTF_BITS, fe->fe_proc_name, |
| 231 | fe->fe_pid); |
| 232 | return false; |
| 233 | } |
| 234 | |
| 235 | if (fe->fe_flags & FLOWENTF_NONVIABLE) { |
| 236 | TAILQ_REMOVE(&parent_fe->fe_child_list, fe, fe_child_link); |
| 237 | ASSERT(--parent_fe->fe_child_count >= 0); |
| 238 | flow_entry_release(pfe: &fe); |
| 239 | } |
| 240 | } |
| 241 | |
| 242 | flow_entry_retain(fe: child_fe); |
| 243 | TAILQ_INSERT_TAIL(&parent_fe->fe_child_list, child_fe, fe_child_link); |
| 244 | ASSERT(++parent_fe->fe_child_count > 0); |
| 245 | |
| 246 | lck_rw_unlock_exclusive(lck: &parent_fe->fe_child_list_lock); |
| 247 | |
| 248 | return true; |
| 249 | } |
| 250 | |
| 251 | static void |
| 252 | flow_entry_remove_all_children(struct flow_entry *parent_fe, struct nx_flowswitch *fsw) |
| 253 | { |
| 254 | bool sched_reaper_thread = false; |
| 255 | |
| 256 | ASSERT(parent_fe->fe_flags & FLOWENTF_PARENT); |
| 257 | |
| 258 | lck_rw_lock_exclusive(lck: &parent_fe->fe_child_list_lock); |
| 259 | |
| 260 | struct flow_entry *fe, *tfe; |
| 261 | TAILQ_FOREACH_SAFE(fe, &parent_fe->fe_child_list, fe_child_link, tfe) { |
| 262 | if (!(fe->fe_flags & FLOWENTF_NONVIABLE)) { |
| 263 | /* |
| 264 | * fsw_pending_nonviable is a hint for reaper thread; |
| 265 | * due to the fact that setting fe_want_nonviable and |
| 266 | * incrementing fsw_pending_nonviable counter is not |
| 267 | * atomic, let the increment happen first, and the |
| 268 | * thread losing the CAS does decrement. |
| 269 | */ |
| 270 | os_atomic_inc(&fsw->fsw_pending_nonviable, relaxed); |
| 271 | if (os_atomic_cmpxchg(&fe->fe_want_nonviable, 0, 1, acq_rel)) { |
| 272 | sched_reaper_thread = true; |
| 273 | } else { |
| 274 | os_atomic_dec(&fsw->fsw_pending_nonviable, relaxed); |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | TAILQ_REMOVE(&parent_fe->fe_child_list, fe, fe_child_link); |
| 279 | ASSERT(--parent_fe->fe_child_count >= 0); |
| 280 | flow_entry_release(pfe: &fe); |
| 281 | } |
| 282 | |
| 283 | lck_rw_unlock_exclusive(lck: &parent_fe->fe_child_list_lock); |
| 284 | |
| 285 | if (sched_reaper_thread) { |
| 286 | fsw_reap_sched(fsw); |
| 287 | } |
| 288 | } |
| 289 | |
| 290 | static void |
| 291 | flow_entry_set_demux_patterns(struct flow_entry *fe, struct nx_flow_req *req) |
| 292 | { |
| 293 | ASSERT(fe->fe_flags & FLOWENTF_CHILD); |
| 294 | ASSERT(req->nfr_flow_demux_count > 0); |
| 295 | |
| 296 | fe->fe_demux_patterns = sk_alloc_type_array(struct kern_flow_demux_pattern, req->nfr_flow_demux_count, |
| 297 | Z_WAITOK | Z_NOFAIL, skmem_tag_flow_demux); |
| 298 | |
| 299 | for (int i = 0; i < req->nfr_flow_demux_count; i++) { |
| 300 | bcopy(src: &req->nfr_flow_demux_patterns[i], dst: &fe->fe_demux_patterns[i].fdp_demux_pattern, |
| 301 | n: sizeof(struct flow_demux_pattern)); |
| 302 | |
| 303 | fe->fe_demux_patterns[i].fdp_memcmp_mask = NULL; |
| 304 | if (req->nfr_flow_demux_patterns[i].fdp_len == 16) { |
| 305 | fe->fe_demux_patterns[i].fdp_memcmp_mask = sk_memcmp_mask_16B; |
| 306 | } else if (req->nfr_flow_demux_patterns[i].fdp_len == 32) { |
| 307 | fe->fe_demux_patterns[i].fdp_memcmp_mask = sk_memcmp_mask_32B; |
| 308 | } else if (req->nfr_flow_demux_patterns[i].fdp_len > 32) { |
| 309 | VERIFY(0); |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | fe->fe_demux_pattern_count = req->nfr_flow_demux_count; |
| 314 | } |
| 315 | |
| 316 | static int |
| 317 | convert_flowkey_to_inet_td(struct flow_key *key, |
| 318 | struct ifnet_traffic_descriptor_inet *td) |
| 319 | { |
| 320 | if ((key->fk_mask & FKMASK_IPVER) != 0) { |
| 321 | td->inet_ipver = key->fk_ipver; |
| 322 | td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_IPVER; |
| 323 | } |
| 324 | if ((key->fk_mask & FKMASK_PROTO) != 0) { |
| 325 | td->inet_proto = key->fk_proto; |
| 326 | td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_PROTO; |
| 327 | } |
| 328 | if ((key->fk_mask & FKMASK_SRC) != 0) { |
| 329 | if (td->inet_ipver == IPVERSION) { |
| 330 | bcopy(src: &key->fk_src4, dst: &td->inet_laddr.iia_v4addr, |
| 331 | n: sizeof(key->fk_src4)); |
| 332 | } else { |
| 333 | bcopy(src: &key->fk_src6, dst: &td->inet_laddr, |
| 334 | n: sizeof(key->fk_src6)); |
| 335 | } |
| 336 | td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_LADDR; |
| 337 | } |
| 338 | if ((key->fk_mask & FKMASK_DST) != 0) { |
| 339 | if (td->inet_ipver == IPVERSION) { |
| 340 | bcopy(src: &key->fk_dst4, dst: &td->inet_raddr.iia_v4addr, |
| 341 | n: sizeof(key->fk_dst4)); |
| 342 | } else { |
| 343 | bcopy(src: &key->fk_dst6, dst: &td->inet_raddr, |
| 344 | n: sizeof(key->fk_dst6)); |
| 345 | } |
| 346 | td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_RADDR; |
| 347 | } |
| 348 | if ((key->fk_mask & FKMASK_SPORT) != 0) { |
| 349 | td->inet_lport = key->fk_sport; |
| 350 | td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_LPORT; |
| 351 | } |
| 352 | if ((key->fk_mask & FKMASK_DPORT) != 0) { |
| 353 | td->inet_rport = key->fk_dport; |
| 354 | td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_RPORT; |
| 355 | } |
| 356 | td->inet_common.itd_type = IFNET_TRAFFIC_DESCRIPTOR_TYPE_INET; |
| 357 | td->inet_common.itd_len = sizeof(*td); |
| 358 | td->inet_common.itd_flags = IFNET_TRAFFIC_DESCRIPTOR_FLAG_INBOUND | |
| 359 | IFNET_TRAFFIC_DESCRIPTOR_FLAG_OUTBOUND; |
| 360 | return 0; |
| 361 | } |
| 362 | |
| 363 | void |
| 364 | flow_qset_select_dynamic(struct nx_flowswitch *fsw, struct flow_entry *fe, |
| 365 | boolean_t skip_if_no_change) |
| 366 | { |
| 367 | struct ifnet_traffic_descriptor_inet td; |
| 368 | struct ifnet *ifp; |
| 369 | uint64_t qset_id; |
| 370 | struct nx_netif *nif; |
| 371 | boolean_t changed; |
| 372 | int err; |
| 373 | |
| 374 | ifp = fsw->fsw_ifp; |
| 375 | changed = ifnet_sync_traffic_rule_genid(ifp, &fe->fe_tr_genid); |
| 376 | if (!changed && skip_if_no_change) { |
| 377 | return; |
| 378 | } |
| 379 | if (fe->fe_qset != NULL) { |
| 380 | nx_netif_qset_release(&fe->fe_qset); |
| 381 | ASSERT(fe->fe_qset == NULL); |
| 382 | } |
| 383 | if (ifp->if_traffic_rule_count == 0) { |
| 384 | DTRACE_SKYWALK2(no__rules, struct nx_flowswitch *, fsw, |
| 385 | struct flow_entry *, fe); |
| 386 | return; |
| 387 | } |
| 388 | err = convert_flowkey_to_inet_td(key: &fe->fe_key, td: &td); |
| 389 | ASSERT(err == 0); |
| 390 | err = nxctl_inet_traffic_rule_find_qset_id(ifp->if_xname, &td, &qset_id); |
| 391 | if (err != 0) { |
| 392 | DTRACE_SKYWALK3(qset__id__not__found, |
| 393 | struct nx_flowswitch *, fsw, |
| 394 | struct flow_entry *, fe, |
| 395 | struct ifnet_traffic_descriptor_inet *, &td); |
| 396 | return; |
| 397 | } |
| 398 | DTRACE_SKYWALK4(qset__id__found, struct nx_flowswitch *, fsw, |
| 399 | struct flow_entry *, fe, struct ifnet_traffic_descriptor_inet *, |
| 400 | &td, uint64_t, qset_id); |
| 401 | nif = NX_NETIF_PRIVATE(fsw->fsw_dev_ch->ch_na->na_nx); |
| 402 | ASSERT(fe->fe_qset == NULL); |
| 403 | fe->fe_qset = nx_netif_find_qset(nif, qset_id); |
| 404 | } |
| 405 | |
| 406 | /* writer-lock must be owned for memory management functions */ |
| 407 | struct flow_entry * |
| 408 | flow_entry_alloc(struct flow_owner *fo, struct nx_flow_req *req, int *perr) |
| 409 | { |
| 410 | SK_LOG_VAR(char dbgbuf[FLOWENTRY_DBGBUF_SIZE]); |
| 411 | nexus_port_t nx_port = req->nfr_nx_port; |
| 412 | struct flow_entry *fe = NULL; |
| 413 | struct flow_entry *parent_fe = NULL; |
| 414 | flowadv_idx_t fadv_idx = FLOWADV_IDX_NONE; |
| 415 | struct nexus_adapter *dev_na; |
| 416 | struct nx_netif *nif; |
| 417 | int err; |
| 418 | |
| 419 | FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo)); |
| 420 | ASSERT(nx_port != NEXUS_PORT_ANY); |
| 421 | ASSERT(!fo->fo_nx_port_destroyed); |
| 422 | |
| 423 | *perr = 0; |
| 424 | |
| 425 | struct flow_key key __sk_aligned(16); |
| 426 | err = flow_req2key(req, key: &key); |
| 427 | if (__improbable(err != 0)) { |
| 428 | SK_ERR("invalid request (err %d)" , err); |
| 429 | goto done; |
| 430 | } |
| 431 | |
| 432 | struct flow_mgr *fm = fo->fo_fsw->fsw_flow_mgr; |
| 433 | fe = flow_mgr_find_conflicting_fe(fm, fe_key: &key); |
| 434 | if (fe != NULL) { |
| 435 | if ((fe->fe_flags & FLOWENTF_PARENT) && |
| 436 | uuid_compare(uu1: fe->fe_uuid, uu2: req->nfr_parent_flow_uuid) == 0) { |
| 437 | parent_fe = fe; |
| 438 | fe = NULL; |
| 439 | } else { |
| 440 | SK_ERR("entry \"%s\" already exists at fe 0x%llx " |
| 441 | "flags 0x%b %s(%d)" , fe_as_string(fe, |
| 442 | dbgbuf, sizeof(dbgbuf)), SK_KVA(fe), fe->fe_flags, |
| 443 | FLOWENTF_BITS, fe->fe_proc_name, |
| 444 | fe->fe_pid); |
| 445 | /* don't return it */ |
| 446 | flow_entry_release(pfe: &fe); |
| 447 | err = EEXIST; |
| 448 | goto done; |
| 449 | } |
| 450 | } else if (!uuid_is_null(uu: req->nfr_parent_flow_uuid)) { |
| 451 | uuid_string_t uuid_str; |
| 452 | sk_uuid_unparse(req->nfr_parent_flow_uuid, uuid_str); |
| 453 | SK_ERR("parent entry \"%s\" does not exist" , uuid_str); |
| 454 | err = ENOENT; |
| 455 | goto done; |
| 456 | } |
| 457 | |
| 458 | if ((req->nfr_flags & NXFLOWREQF_FLOWADV) && |
| 459 | (flow_owner_flowadv_index_alloc(fo, &fadv_idx) != 0)) { |
| 460 | SK_ERR("failed to alloc flowadv index for flow %s" , |
| 461 | sk_uuid_unparse(req->nfr_flow_uuid, dbgbuf)); |
| 462 | /* XXX: what is the most appropriate error code ? */ |
| 463 | err = ENOSPC; |
| 464 | goto done; |
| 465 | } |
| 466 | |
| 467 | fe = fe_alloc(TRUE); |
| 468 | if (__improbable(fe == NULL)) { |
| 469 | err = ENOMEM; |
| 470 | goto done; |
| 471 | } |
| 472 | |
| 473 | fe->fe_key = key; |
| 474 | if (req->nfr_route != NULL) { |
| 475 | fe->fe_laddr_gencnt = req->nfr_route->fr_laddr_gencnt; |
| 476 | } else { |
| 477 | fe->fe_laddr_gencnt = req->nfr_saddr_gencnt; |
| 478 | } |
| 479 | |
| 480 | if (__improbable(req->nfr_flags & NXFLOWREQF_LISTENER)) { |
| 481 | /* mark this as listener mode */ |
| 482 | os_atomic_or(&fe->fe_flags, FLOWENTF_LISTENER, relaxed); |
| 483 | } else { |
| 484 | ASSERT((fe->fe_key.fk_ipver == IPVERSION && |
| 485 | fe->fe_key.fk_src4.s_addr != INADDR_ANY) || |
| 486 | (fe->fe_key.fk_ipver == IPV6_VERSION && |
| 487 | !IN6_IS_ADDR_UNSPECIFIED(&fe->fe_key.fk_src6))); |
| 488 | |
| 489 | /* mark this as connected mode */ |
| 490 | os_atomic_or(&fe->fe_flags, FLOWENTF_CONNECTED, relaxed); |
| 491 | } |
| 492 | |
| 493 | if (req->nfr_flags & NXFLOWREQF_NOWAKEFROMSLEEP) { |
| 494 | fe->fe_flags |= FLOWENTF_NOWAKEFROMSLEEP; |
| 495 | } |
| 496 | fe->fe_port_reservation = req->nfr_port_reservation; |
| 497 | req->nfr_port_reservation = NULL; |
| 498 | if (req->nfr_flags & NXFLOWREQF_EXT_PORT_RSV) { |
| 499 | fe->fe_flags |= FLOWENTF_EXTRL_PORT; |
| 500 | } |
| 501 | fe->fe_proto_reservation = req->nfr_proto_reservation; |
| 502 | req->nfr_proto_reservation = NULL; |
| 503 | if (req->nfr_flags & NXFLOWREQF_EXT_PROTO_RSV) { |
| 504 | fe->fe_flags |= FLOWENTF_EXTRL_PROTO; |
| 505 | } |
| 506 | fe->fe_ipsec_reservation = req->nfr_ipsec_reservation; |
| 507 | req->nfr_ipsec_reservation = NULL; |
| 508 | |
| 509 | fe->fe_tx_process = dp_flow_tx_process; |
| 510 | fe->fe_rx_process = dp_flow_rx_process; |
| 511 | |
| 512 | dev_na = fo->fo_fsw->fsw_dev_ch->ch_na; |
| 513 | nif = NX_NETIF_PRIVATE(dev_na->na_nx); |
| 514 | if (NX_LLINK_PROV(nif->nif_nx) && |
| 515 | (fe->fe_key.fk_mask & (FKMASK_IPVER | FKMASK_PROTO | FKMASK_DST)) == |
| 516 | (FKMASK_IPVER | FKMASK_PROTO | FKMASK_DST)) { |
| 517 | if (req->nfr_qset_id != 0) { |
| 518 | fe->fe_qset_select = FE_QSET_SELECT_FIXED; |
| 519 | fe->fe_qset_id = req->nfr_qset_id; |
| 520 | fe->fe_qset = nx_netif_find_qset(nif, req->nfr_qset_id); |
| 521 | } else { |
| 522 | fe->fe_qset_select = FE_QSET_SELECT_DYNAMIC; |
| 523 | fe->fe_qset_id = 0; |
| 524 | flow_qset_select_dynamic(fsw: fo->fo_fsw, fe, FALSE); |
| 525 | } |
| 526 | } else { |
| 527 | fe->fe_qset_select = FE_QSET_SELECT_NONE; |
| 528 | } |
| 529 | if (req->nfr_flags & NXFLOWREQF_LOW_LATENCY) { |
| 530 | os_atomic_or(&fe->fe_flags, FLOWENTF_LOW_LATENCY, relaxed); |
| 531 | } |
| 532 | |
| 533 | fe->fe_transport_protocol = req->nfr_transport_protocol; |
| 534 | if (NX_FSW_TCP_RX_AGG_ENABLED() && |
| 535 | (fo->fo_fsw->fsw_nx->nx_prov->nxprov_params->nxp_max_frags > 1) && |
| 536 | (fe->fe_key.fk_proto == IPPROTO_TCP) && |
| 537 | (fe->fe_key.fk_mask == FKMASK_5TUPLE)) { |
| 538 | fe->fe_rx_process = flow_rx_agg_tcp; |
| 539 | } |
| 540 | uuid_copy(dst: fe->fe_uuid, src: req->nfr_flow_uuid); |
| 541 | if ((req->nfr_flags & NXFLOWREQF_LISTENER) == 0 && |
| 542 | (req->nfr_flags & NXFLOWREQF_TRACK) != 0) { |
| 543 | switch (req->nfr_ip_protocol) { |
| 544 | case IPPROTO_TCP: |
| 545 | case IPPROTO_UDP: |
| 546 | os_atomic_or(&fe->fe_flags, FLOWENTF_TRACK, relaxed); |
| 547 | break; |
| 548 | default: |
| 549 | break; |
| 550 | } |
| 551 | } |
| 552 | |
| 553 | if (req->nfr_flags & NXFLOWREQF_QOS_MARKING) { |
| 554 | os_atomic_or(&fe->fe_flags, FLOWENTF_QOS_MARKING, relaxed); |
| 555 | } |
| 556 | |
| 557 | if (req->nfr_flags & NXFLOWREQF_PARENT) { |
| 558 | os_atomic_or(&fe->fe_flags, FLOWENTF_PARENT, relaxed); |
| 559 | TAILQ_INIT(&fe->fe_child_list); |
| 560 | lck_rw_init(lck: &fe->fe_child_list_lock, grp: &nexus_lock_group, attr: &nexus_lock_attr); |
| 561 | } |
| 562 | |
| 563 | if (req->nfr_route != NULL) { |
| 564 | fe->fe_route = req->nfr_route; |
| 565 | req->nfr_route = NULL; |
| 566 | } |
| 567 | |
| 568 | fe->fe_nx_port = nx_port; |
| 569 | fe->fe_adv_idx = fadv_idx; |
| 570 | |
| 571 | if (req->nfr_inp_flowhash != 0) { |
| 572 | /* |
| 573 | * BSD flow, use the inpcb flow hash value |
| 574 | */ |
| 575 | fe->fe_flowid = req->nfr_inp_flowhash; |
| 576 | fe->fe_flags |= FLOWENTF_EXTRL_FLOWID; |
| 577 | } else { |
| 578 | fe->fe_flowid = flow_entry_calc_flowid(fe); |
| 579 | } |
| 580 | |
| 581 | if (fe->fe_adv_idx != FLOWADV_IDX_NONE && fo->fo_nx_port_na != NULL) { |
| 582 | na_flowadv_entry_alloc(fo->fo_nx_port_na, fe->fe_uuid, |
| 583 | fe->fe_adv_idx, fe->fe_flowid); |
| 584 | } |
| 585 | |
| 586 | if (KPKT_VALID_SVC(req->nfr_svc_class)) { |
| 587 | fe->fe_svc_class = (kern_packet_svc_class_t)req->nfr_svc_class; |
| 588 | } else { |
| 589 | fe->fe_svc_class = KPKT_SC_BE; |
| 590 | } |
| 591 | |
| 592 | uuid_copy(dst: fe->fe_eproc_uuid, src: req->nfr_euuid); |
| 593 | fe->fe_policy_id = req->nfr_policy_id; |
| 594 | fe->fe_skip_policy_id = req->nfr_skip_policy_id; |
| 595 | |
| 596 | err = flow_mgr_flow_hash_mask_add(fm, mask: fe->fe_key.fk_mask); |
| 597 | ASSERT(err == 0); |
| 598 | |
| 599 | if (parent_fe != NULL) { |
| 600 | os_atomic_or(&fe->fe_flags, FLOWENTF_CHILD, relaxed); |
| 601 | flow_entry_set_demux_patterns(fe, req); |
| 602 | fe->fe_demux_pkt_data = sk_alloc_data(FLOW_DEMUX_MAX_LEN, Z_WAITOK | Z_NOFAIL, skmem_tag_flow_demux); |
| 603 | if (!flow_entry_add_child(parent_fe, child_fe: fe)) { |
| 604 | goto done; |
| 605 | } |
| 606 | } else { |
| 607 | fe->fe_key_hash = flow_key_hash(key: &fe->fe_key); |
| 608 | err = cuckoo_hashtable_add_with_hash(h: fm->fm_flow_table, node: &fe->fe_cnode, |
| 609 | key: fe->fe_key_hash); |
| 610 | if (err != 0) { |
| 611 | SK_ERR("flow table add failed (err %d)" , err); |
| 612 | flow_mgr_flow_hash_mask_del(fm, mask: fe->fe_key.fk_mask); |
| 613 | goto done; |
| 614 | } |
| 615 | } |
| 616 | |
| 617 | RB_INSERT(flow_entry_id_tree, &fo->fo_flow_entry_id_head, fe); |
| 618 | flow_entry_retain(fe); /* one refcnt in id_tree */ |
| 619 | |
| 620 | *(struct nx_flowswitch **)(uintptr_t)&fe->fe_fsw = fo->fo_fsw; |
| 621 | fe->fe_pid = fo->fo_pid; |
| 622 | if (req->nfr_epid != -1 && req->nfr_epid != fo->fo_pid) { |
| 623 | fe->fe_epid = req->nfr_epid; |
| 624 | proc_name(pid: fe->fe_epid, buf: fe->fe_eproc_name, |
| 625 | size: sizeof(fe->fe_eproc_name)); |
| 626 | } else { |
| 627 | fe->fe_epid = -1; |
| 628 | } |
| 629 | |
| 630 | (void) snprintf(fe->fe_proc_name, count: sizeof(fe->fe_proc_name), "%s" , |
| 631 | fo->fo_name); |
| 632 | |
| 633 | fe_stats_init(fe); |
| 634 | flow_stats_retain(fs: fe->fe_stats); |
| 635 | req->nfr_flow_stats = fe->fe_stats; |
| 636 | |
| 637 | #if SK_LOG |
| 638 | SK_DF(SK_VERB_FLOW, "allocated entry \"%s\" fe 0x%llx flags 0x%b " |
| 639 | "[fo 0x%llx ]" , fe_as_string(fe, dbgbuf, |
| 640 | sizeof(dbgbuf)), SK_KVA(fe), fe->fe_flags, FLOWENTF_BITS, |
| 641 | SK_KVA(fo)); |
| 642 | #endif /* SK_LOG */ |
| 643 | |
| 644 | done: |
| 645 | if (parent_fe != NULL) { |
| 646 | flow_entry_release(pfe: &parent_fe); |
| 647 | } |
| 648 | if (err != 0) { |
| 649 | if (fadv_idx != FLOWADV_IDX_NONE) { |
| 650 | flow_owner_flowadv_index_free(fo, fadv_idx); |
| 651 | } |
| 652 | if (fe != NULL) { |
| 653 | flow_entry_release(pfe: &fe); |
| 654 | } |
| 655 | } |
| 656 | *perr = err; |
| 657 | return fe; |
| 658 | } |
| 659 | |
| 660 | void |
| 661 | flow_entry_teardown(struct flow_owner *fo, struct flow_entry *fe) |
| 662 | { |
| 663 | #if SK_LOG |
| 664 | char dbgbuf[FLOWENTRY_DBGBUF_SIZE]; |
| 665 | SK_DF(SK_VERB_FLOW, "entry \"%s\" fe 0x%llx flags 0x%b [fo 0x%llx] " |
| 666 | "non_via %d withdrawn %d" , fe_as_string(fe, dbgbuf, sizeof(dbgbuf)), |
| 667 | SK_KVA(fe), fe->fe_flags, FLOWENTF_BITS, SK_KVA(fo), |
| 668 | fe->fe_want_nonviable, fe->fe_want_withdraw); |
| 669 | #endif /* SK_LOG */ |
| 670 | struct nx_flowswitch *fsw = fo->fo_fsw; |
| 671 | |
| 672 | FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo)); |
| 673 | |
| 674 | ASSERT(!(fe->fe_flags & FLOWENTF_DESTROYED)); |
| 675 | ASSERT(!(fe->fe_flags & FLOWENTF_LINGERING)); |
| 676 | ASSERT(fsw != NULL); |
| 677 | |
| 678 | if (os_atomic_cmpxchg(&fe->fe_want_nonviable, 1, 0, acq_rel)) { |
| 679 | ASSERT(fsw->fsw_pending_nonviable != 0); |
| 680 | os_atomic_dec(&fsw->fsw_pending_nonviable, relaxed); |
| 681 | os_atomic_or(&fe->fe_flags, FLOWENTF_NONVIABLE, relaxed); |
| 682 | } |
| 683 | |
| 684 | /* always withdraw namespace during tear down */ |
| 685 | if (!(fe->fe_flags & FLOWENTF_EXTRL_PORT) && |
| 686 | !(fe->fe_flags & FLOWENTF_WITHDRAWN)) { |
| 687 | os_atomic_or(&fe->fe_flags, FLOWENTF_WITHDRAWN, relaxed); |
| 688 | os_atomic_store(&fe->fe_want_withdraw, 0, release); |
| 689 | /* local port is now inactive; not eligible for offload */ |
| 690 | flow_namespace_withdraw(&fe->fe_port_reservation); |
| 691 | } |
| 692 | |
| 693 | /* we may get here multiple times, so check */ |
| 694 | if (!(fe->fe_flags & FLOWENTF_TORN_DOWN)) { |
| 695 | os_atomic_or(&fe->fe_flags, FLOWENTF_TORN_DOWN, relaxed); |
| 696 | if (fe->fe_adv_idx != FLOWADV_IDX_NONE) { |
| 697 | if (fo->fo_nx_port_na != NULL) { |
| 698 | na_flowadv_entry_free(fo->fo_nx_port_na, |
| 699 | fe->fe_uuid, fe->fe_adv_idx, fe->fe_flowid); |
| 700 | } |
| 701 | flow_owner_flowadv_index_free(fo, fe->fe_adv_idx); |
| 702 | fe->fe_adv_idx = FLOWADV_IDX_NONE; |
| 703 | } |
| 704 | } |
| 705 | ASSERT(fe->fe_adv_idx == FLOWADV_IDX_NONE); |
| 706 | ASSERT(fe->fe_flags & FLOWENTF_TORN_DOWN); |
| 707 | |
| 708 | /* mark child flow as nonviable */ |
| 709 | if (fe->fe_flags & FLOWENTF_PARENT) { |
| 710 | flow_entry_remove_all_children(parent_fe: fe, fsw); |
| 711 | } |
| 712 | } |
| 713 | |
| 714 | void |
| 715 | flow_entry_destroy(struct flow_owner *fo, struct flow_entry *fe, bool nolinger, |
| 716 | void *close_params) |
| 717 | { |
| 718 | struct flow_mgr *fm = fo->fo_fsw->fsw_flow_mgr; |
| 719 | int err; |
| 720 | |
| 721 | FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo)); |
| 722 | |
| 723 | /* |
| 724 | * regular flow: one in flow_table, one in id_tree, one here |
| 725 | * child flow: one in id_tree, one here |
| 726 | */ |
| 727 | ASSERT(flow_entry_refcnt(fe) > 2 || |
| 728 | ((fe->fe_flags & FLOWENTF_CHILD) && flow_entry_refcnt(fe) > 1)); |
| 729 | |
| 730 | flow_entry_teardown(fo, fe); |
| 731 | |
| 732 | err = flow_mgr_flow_hash_mask_del(fm, mask: fe->fe_key.fk_mask); |
| 733 | ASSERT(err == 0); |
| 734 | |
| 735 | /* only regular or parent flows have entries in flow_table */ |
| 736 | if (__probable(!(fe->fe_flags & FLOWENTF_CHILD))) { |
| 737 | uint32_t hash; |
| 738 | hash = flow_key_hash(key: &fe->fe_key); |
| 739 | cuckoo_hashtable_del(h: fm->fm_flow_table, node: &fe->fe_cnode, key: hash); |
| 740 | } |
| 741 | |
| 742 | RB_REMOVE(flow_entry_id_tree, &fo->fo_flow_entry_id_head, fe); |
| 743 | struct flow_entry *tfe = fe; |
| 744 | flow_entry_release(pfe: &tfe); |
| 745 | |
| 746 | ASSERT(!(fe->fe_flags & FLOWENTF_DESTROYED)); |
| 747 | os_atomic_or(&fe->fe_flags, FLOWENTF_DESTROYED, relaxed); |
| 748 | |
| 749 | if (fe->fe_transport_protocol == IPPROTO_QUIC) { |
| 750 | if (!nolinger && close_params != NULL) { |
| 751 | flow_track_abort_quic(fe, token: close_params); |
| 752 | } |
| 753 | flow_entry_release(pfe: &fe); |
| 754 | } else if (nolinger || !(fe->fe_flags & FLOWENTF_WAIT_CLOSE)) { |
| 755 | flow_entry_release(pfe: &fe); |
| 756 | } else { |
| 757 | fsw_linger_insert(fsw: fe); |
| 758 | } |
| 759 | } |
| 760 | |
| 761 | uint32_t |
| 762 | flow_entry_refcnt(struct flow_entry *fe) |
| 763 | { |
| 764 | return os_ref_get_count(rc: &fe->fe_refcnt); |
| 765 | } |
| 766 | |
| 767 | void |
| 768 | flow_entry_retain(struct flow_entry *fe) |
| 769 | { |
| 770 | os_ref_retain(rc: &fe->fe_refcnt); |
| 771 | } |
| 772 | |
| 773 | void |
| 774 | flow_entry_release(struct flow_entry **pfe) |
| 775 | { |
| 776 | struct flow_entry *fe = *pfe; |
| 777 | ASSERT(fe != NULL); |
| 778 | *pfe = NULL; /* caller lose reference */ |
| 779 | #if SK_LOG |
| 780 | if (__improbable(sk_verbose != 0)) { |
| 781 | char dbgbuf[FLOWENTRY_DBGBUF_SIZE]; |
| 782 | SK_DF(SK_VERB_FLOW, "entry \"%s\" fe 0x%llx flags 0x%b" , |
| 783 | fe_as_string(fe, dbgbuf, sizeof(dbgbuf)), SK_KVA(fe), |
| 784 | fe->fe_flags, FLOWENTF_BITS); |
| 785 | } |
| 786 | #endif /* SK_LOG */ |
| 787 | |
| 788 | if (__improbable(os_ref_release(&fe->fe_refcnt) == 0)) { |
| 789 | fe->fe_nx_port = NEXUS_PORT_ANY; |
| 790 | if (fe->fe_route != NULL) { |
| 791 | flow_route_release(fe->fe_route); |
| 792 | fe->fe_route = NULL; |
| 793 | } |
| 794 | if (fe->fe_qset != NULL) { |
| 795 | nx_netif_qset_release(&fe->fe_qset); |
| 796 | ASSERT(fe->fe_qset == NULL); |
| 797 | } |
| 798 | if (fe->fe_demux_patterns != NULL) { |
| 799 | sk_free_type_array(struct kern_flow_demux_pattern, |
| 800 | fe->fe_demux_pattern_count, fe->fe_demux_patterns); |
| 801 | fe->fe_demux_patterns = NULL; |
| 802 | fe->fe_demux_pattern_count = 0; |
| 803 | } |
| 804 | if (fe->fe_demux_pkt_data != NULL) { |
| 805 | sk_free_data(fe->fe_demux_pkt_data, FLOW_DEMUX_MAX_LEN); |
| 806 | fe->fe_demux_pkt_data = NULL; |
| 807 | } |
| 808 | fe_free(fe); |
| 809 | } |
| 810 | } |
| 811 | |
| 812 | struct flow_entry_dead * |
| 813 | flow_entry_dead_alloc(zalloc_flags_t how) |
| 814 | { |
| 815 | struct flow_entry_dead *fed; |
| 816 | |
| 817 | fed = zalloc_flags(sk_fed_zone, how | Z_ZERO); |
| 818 | if (fed != NULL) { |
| 819 | SK_DF(SK_VERB_MEM, "fed 0x%llx ALLOC" , SK_KVA(fed)); |
| 820 | } |
| 821 | return fed; |
| 822 | } |
| 823 | |
| 824 | void |
| 825 | flow_entry_dead_free(struct flow_entry_dead *fed) |
| 826 | { |
| 827 | SK_DF(SK_VERB_MEM, "fed 0x%llx FREE" , SK_KVA(fed)); |
| 828 | zfree(sk_fed_zone, fed); |
| 829 | } |
| 830 | |
| 831 | static void |
| 832 | fe_stats_init(struct flow_entry *fe) |
| 833 | { |
| 834 | struct nx_flowswitch *fsw = fe->fe_fsw; |
| 835 | struct sk_stats_flow *sf = &fe->fe_stats->fs_stats; |
| 836 | |
| 837 | ASSERT(fe->fe_stats != NULL); |
| 838 | ASSERT(os_ref_get_count(&fe->fe_stats->fs_refcnt) >= 1); |
| 839 | |
| 840 | bzero(s: sf, n: sizeof(*sf)); |
| 841 | uuid_copy(dst: sf->sf_nx_uuid, src: fsw->fsw_nx->nx_uuid); |
| 842 | uuid_copy(dst: sf->sf_uuid, src: fe->fe_uuid); |
| 843 | (void) strlcpy(dst: sf->sf_if_name, src: fsw->fsw_flow_mgr->fm_name, IFNAMSIZ); |
| 844 | sf->sf_if_index = fsw->fsw_ifp->if_index; |
| 845 | sf->sf_pid = fe->fe_pid; |
| 846 | sf->sf_epid = fe->fe_epid; |
| 847 | (void) snprintf(sf->sf_proc_name, count: sizeof(sf->sf_proc_name), "%s" , |
| 848 | fe->fe_proc_name); |
| 849 | (void) snprintf(sf->sf_eproc_name, count: sizeof(sf->sf_eproc_name), "%s" , |
| 850 | fe->fe_eproc_name); |
| 851 | |
| 852 | sf->sf_nx_port = fe->fe_nx_port; |
| 853 | sf->sf_key = fe->fe_key; |
| 854 | sf->sf_protocol = fe->fe_transport_protocol; |
| 855 | sf->sf_svc_class = (packet_svc_class_t)fe->fe_svc_class; |
| 856 | sf->sf_adv_idx = fe->fe_adv_idx; |
| 857 | |
| 858 | if (fe->fe_flags & FLOWENTF_TRACK) { |
| 859 | sf->sf_flags |= SFLOWF_TRACK; |
| 860 | } |
| 861 | if (fe->fe_flags & FLOWENTF_LISTENER) { |
| 862 | sf->sf_flags |= SFLOWF_LISTENER; |
| 863 | } |
| 864 | if (fe->fe_route != NULL && fe->fe_route->fr_flags & FLOWRTF_ONLINK) { |
| 865 | sf->sf_flags |= SFLOWF_ONLINK; |
| 866 | } |
| 867 | |
| 868 | fe_stats_update(fe); |
| 869 | } |
| 870 | |
| 871 | static void |
| 872 | fe_stats_update(struct flow_entry *fe) |
| 873 | { |
| 874 | struct sk_stats_flow *sf = &fe->fe_stats->fs_stats; |
| 875 | |
| 876 | ASSERT(fe->fe_stats != NULL); |
| 877 | ASSERT(os_ref_get_count(&fe->fe_stats->fs_refcnt) >= 1); |
| 878 | |
| 879 | if (fe->fe_flags & FLOWENTF_CONNECTED) { |
| 880 | sf->sf_flags |= SFLOWF_CONNECTED; |
| 881 | } |
| 882 | if (fe->fe_flags & FLOWENTF_QOS_MARKING) { |
| 883 | sf->sf_flags |= SFLOWF_QOS_MARKING; |
| 884 | } |
| 885 | if (fe->fe_flags & FLOWENTF_WAIT_CLOSE) { |
| 886 | sf->sf_flags |= SFLOWF_WAIT_CLOSE; |
| 887 | } |
| 888 | if (fe->fe_flags & FLOWENTF_CLOSE_NOTIFY) { |
| 889 | sf->sf_flags |= SFLOWF_CLOSE_NOTIFY; |
| 890 | } |
| 891 | if (fe->fe_flags & FLOWENTF_ABORTED) { |
| 892 | sf->sf_flags |= SFLOWF_ABORTED; |
| 893 | } |
| 894 | if (fe->fe_flags & FLOWENTF_NONVIABLE) { |
| 895 | sf->sf_flags |= SFLOWF_NONVIABLE; |
| 896 | } |
| 897 | if (fe->fe_flags & FLOWENTF_WITHDRAWN) { |
| 898 | sf->sf_flags |= SFLOWF_WITHDRAWN; |
| 899 | } |
| 900 | if (fe->fe_flags & FLOWENTF_TORN_DOWN) { |
| 901 | sf->sf_flags |= SFLOWF_TORN_DOWN; |
| 902 | } |
| 903 | if (fe->fe_flags & FLOWENTF_DESTROYED) { |
| 904 | sf->sf_flags |= SFLOWF_DESTROYED; |
| 905 | } |
| 906 | if (fe->fe_flags & FLOWENTF_LINGERING) { |
| 907 | sf->sf_flags |= SFLOWF_LINGERING; |
| 908 | } |
| 909 | if (fe->fe_flags & FLOWENTF_LOW_LATENCY) { |
| 910 | sf->sf_flags |= SFLOWF_LOW_LATENCY; |
| 911 | } |
| 912 | if (fe->fe_flags & FLOWENTF_PARENT) { |
| 913 | sf->sf_flags |= SFLOWF_PARENT; |
| 914 | } |
| 915 | if (fe->fe_flags & FLOWENTF_CHILD) { |
| 916 | sf->sf_flags |= SFLOWF_CHILD; |
| 917 | } |
| 918 | if (fe->fe_flags & FLOWENTF_NOWAKEFROMSLEEP) { |
| 919 | sf->sf_flags |= SFLOWF_NOWAKEFROMSLEEP; |
| 920 | } else { |
| 921 | sf->sf_flags &= ~SFLOWF_NOWAKEFROMSLEEP; |
| 922 | } |
| 923 | |
| 924 | sf->sf_bucket_idx = SFLOW_BUCKET_NONE; |
| 925 | |
| 926 | sf->sf_ltrack.sft_state = fe->fe_ltrack.fse_state; |
| 927 | sf->sf_ltrack.sft_seq = fe->fe_ltrack.fse_seqlo; |
| 928 | sf->sf_ltrack.sft_max_win = fe->fe_ltrack.fse_max_win; |
| 929 | sf->sf_ltrack.sft_wscale = fe->fe_ltrack.fse_wscale; |
| 930 | sf->sf_rtrack.sft_state = fe->fe_rtrack.fse_state; |
| 931 | sf->sf_rtrack.sft_seq = fe->fe_rtrack.fse_seqlo; |
| 932 | sf->sf_rtrack.sft_max_win = fe->fe_rtrack.fse_max_win; |
| 933 | } |
| 934 | |
| 935 | void |
| 936 | flow_entry_stats_get(struct flow_entry *fe, struct sk_stats_flow *sf) |
| 937 | { |
| 938 | _CASSERT(sizeof(fe->fe_stats->fs_stats) == sizeof(*sf)); |
| 939 | |
| 940 | fe_stats_update(fe); |
| 941 | bcopy(src: &fe->fe_stats->fs_stats, dst: sf, n: sizeof(*sf)); |
| 942 | } |
| 943 | |
| 944 | struct flow_entry * |
| 945 | fe_alloc(boolean_t can_block) |
| 946 | { |
| 947 | struct flow_entry *fe; |
| 948 | |
| 949 | _CASSERT((offsetof(struct flow_entry, fe_key) % 16) == 0); |
| 950 | |
| 951 | fe = skmem_cache_alloc(sk_fe_cache, |
| 952 | can_block ? SKMEM_SLEEP : SKMEM_NOSLEEP); |
| 953 | if (fe == NULL) { |
| 954 | return NULL; |
| 955 | } |
| 956 | |
| 957 | /* |
| 958 | * fe_key is 16-bytes aligned which requires fe to begin on |
| 959 | * a 16-bytes boundary as well. This alignment is specified |
| 960 | * at sk_fe_cache creation time and we assert here. |
| 961 | */ |
| 962 | ASSERT(IS_P2ALIGNED(fe, 16)); |
| 963 | bzero(s: fe, n: sk_fe_size); |
| 964 | |
| 965 | fe->fe_stats = flow_stats_alloc(cansleep: can_block); |
| 966 | if (fe->fe_stats == NULL) { |
| 967 | skmem_cache_free(sk_fe_cache, fe); |
| 968 | return NULL; |
| 969 | } |
| 970 | |
| 971 | SK_DF(SK_VERB_MEM, "fe 0x%llx ALLOC" , SK_KVA(fe)); |
| 972 | |
| 973 | os_ref_init(&fe->fe_refcnt, &flow_entry_refgrp); |
| 974 | |
| 975 | KPKTQ_INIT(&fe->fe_rx_pktq); |
| 976 | KPKTQ_INIT(&fe->fe_tx_pktq); |
| 977 | |
| 978 | return fe; |
| 979 | } |
| 980 | |
| 981 | static void |
| 982 | fe_free(struct flow_entry *fe) |
| 983 | { |
| 984 | ASSERT(fe->fe_flags & FLOWENTF_TORN_DOWN); |
| 985 | ASSERT(fe->fe_flags & FLOWENTF_DESTROYED); |
| 986 | ASSERT(!(fe->fe_flags & FLOWENTF_LINGERING)); |
| 987 | ASSERT(fe->fe_route == NULL); |
| 988 | |
| 989 | ASSERT(fe->fe_stats != NULL); |
| 990 | flow_stats_release(fs: fe->fe_stats); |
| 991 | fe->fe_stats = NULL; |
| 992 | |
| 993 | /* only at very last existence of flow releases namespace reservation */ |
| 994 | if (!(fe->fe_flags & FLOWENTF_EXTRL_PORT) && |
| 995 | NETNS_TOKEN_VALID(&fe->fe_port_reservation)) { |
| 996 | flow_namespace_destroy(&fe->fe_port_reservation); |
| 997 | ASSERT(!NETNS_TOKEN_VALID(&fe->fe_port_reservation)); |
| 998 | } |
| 999 | fe->fe_port_reservation = NULL; |
| 1000 | |
| 1001 | if (!(fe->fe_flags & FLOWENTF_EXTRL_PROTO) && |
| 1002 | protons_token_is_valid(pt: fe->fe_proto_reservation)) { |
| 1003 | protons_release(ptp: &fe->fe_proto_reservation); |
| 1004 | } |
| 1005 | fe->fe_proto_reservation = NULL; |
| 1006 | |
| 1007 | if (key_custom_ipsec_token_is_valid(fe->fe_ipsec_reservation)) { |
| 1008 | key_release_custom_ipsec(&fe->fe_ipsec_reservation); |
| 1009 | } |
| 1010 | fe->fe_ipsec_reservation = NULL; |
| 1011 | |
| 1012 | if (!(fe->fe_flags & FLOWENTF_EXTRL_FLOWID) && (fe->fe_flowid != 0)) { |
| 1013 | flowidns_release_flowid(flowid: fe->fe_flowid); |
| 1014 | fe->fe_flowid = 0; |
| 1015 | } |
| 1016 | |
| 1017 | skmem_cache_free(sk_fe_cache, fe); |
| 1018 | } |
| 1019 | |
| 1020 | static __inline__ int |
| 1021 | fe_id_cmp(const struct flow_entry *a, const struct flow_entry *b) |
| 1022 | { |
| 1023 | return uuid_compare(uu1: a->fe_uuid, uu2: b->fe_uuid); |
| 1024 | } |
| 1025 | |
| 1026 | #if SK_LOG |
| 1027 | SK_NO_INLINE_ATTRIBUTE |
| 1028 | char * |
| 1029 | fk_as_string(const struct flow_key *fk, char *dst, size_t dsz) |
| 1030 | { |
| 1031 | int af; |
| 1032 | char src_s[MAX_IPv6_STR_LEN]; |
| 1033 | char dst_s[MAX_IPv6_STR_LEN]; |
| 1034 | |
| 1035 | af = fk->fk_ipver == 4 ? AF_INET : AF_INET6; |
| 1036 | |
| 1037 | (void) inet_ntop(af, &fk->fk_src, src_s, sizeof(src_s)); |
| 1038 | (void) inet_ntop(af, &fk->fk_dst, dst_s, sizeof(dst_s)); |
| 1039 | (void) snprintf(dst, dsz, |
| 1040 | "ipver=%u,src=%s,dst=%s,proto=0x%02u,sport=%u,dport=%u " |
| 1041 | "mask=%08x,hash=%08x" , |
| 1042 | fk->fk_ipver, src_s, dst_s, fk->fk_proto, ntohs(fk->fk_sport), |
| 1043 | ntohs(fk->fk_dport), fk->fk_mask, flow_key_hash(fk)); |
| 1044 | |
| 1045 | return dst; |
| 1046 | } |
| 1047 | |
| 1048 | SK_NO_INLINE_ATTRIBUTE |
| 1049 | char * |
| 1050 | fe_as_string(const struct flow_entry *fe, char *dst, size_t dsz) |
| 1051 | { |
| 1052 | char keybuf[FLOWKEY_DBGBUF_SIZE]; /* just for debug message */ |
| 1053 | uuid_string_t uuidstr; |
| 1054 | |
| 1055 | fk_as_string(&fe->fe_key, keybuf, sizeof(keybuf)); |
| 1056 | |
| 1057 | (void) snprintf(dst, dsz, |
| 1058 | "fe 0x%llx proc %s nx_port %d flow_uuid %s %s tp_proto=0x%02u" , |
| 1059 | SK_KVA(fe), fe->fe_proc_name, (int)fe->fe_nx_port, |
| 1060 | sk_uuid_unparse(fe->fe_uuid, uuidstr), |
| 1061 | keybuf, fe->fe_transport_protocol); |
| 1062 | |
| 1063 | return dst; |
| 1064 | } |
| 1065 | #endif /* SK_LOG */ |
| 1066 | |