1 | /* |
2 | * Copyright (c) 2016-2022 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <skywalk/os_skywalk_private.h> |
30 | |
31 | #include <dev/random/randomdev.h> |
32 | #include <net/flowhash.h> |
33 | #include <netkey/key.h> |
34 | |
35 | #include <skywalk/nexus/flowswitch/fsw_var.h> |
36 | #include <skywalk/nexus/flowswitch/flow/flow_var.h> |
37 | #include <skywalk/nexus/netif/nx_netif.h> |
38 | #include <skywalk/namespace/flowidns.h> |
39 | |
40 | struct flow_entry *fe_alloc(boolean_t); |
41 | static void fe_free(struct flow_entry *); |
42 | static int fe_id_cmp(const struct flow_entry *, const struct flow_entry *); |
43 | static void fe_stats_init(struct flow_entry *); |
44 | static void fe_stats_update(struct flow_entry *); |
45 | |
46 | RB_GENERATE_PREV(flow_entry_id_tree, flow_entry, fe_id_link, fe_id_cmp); |
47 | |
48 | os_refgrp_decl(static, flow_entry_refgrp, "flow_entry" , NULL); |
49 | |
50 | KALLOC_TYPE_DECLARE(sk_fed_zone); |
51 | |
52 | const struct flow_key fk_mask_2tuple |
53 | __sk_aligned(16) = |
54 | { |
55 | .fk_mask = FKMASK_2TUPLE, |
56 | .fk_ipver = 0, |
57 | .fk_proto = 0xff, |
58 | .fk_sport = 0xffff, |
59 | .fk_dport = 0, |
60 | .fk_src._addr64[0] = 0, |
61 | .fk_src._addr64[1] = 0, |
62 | .fk_dst._addr64[0] = 0, |
63 | .fk_dst._addr64[1] = 0, |
64 | .fk_pad[0] = 0, |
65 | }; |
66 | |
67 | const struct flow_key fk_mask_3tuple |
68 | __sk_aligned(16) = |
69 | { |
70 | .fk_mask = FKMASK_3TUPLE, |
71 | .fk_ipver = 0xff, |
72 | .fk_proto = 0xff, |
73 | .fk_sport = 0xffff, |
74 | .fk_dport = 0, |
75 | .fk_src._addr64[0] = 0xffffffffffffffffULL, |
76 | .fk_src._addr64[1] = 0xffffffffffffffffULL, |
77 | .fk_dst._addr64[0] = 0, |
78 | .fk_dst._addr64[1] = 0, |
79 | .fk_pad[0] = 0, |
80 | }; |
81 | |
82 | const struct flow_key fk_mask_4tuple |
83 | __sk_aligned(16) = |
84 | { |
85 | .fk_mask = FKMASK_4TUPLE, |
86 | .fk_ipver = 0xff, |
87 | .fk_proto = 0xff, |
88 | .fk_sport = 0xffff, |
89 | .fk_dport = 0xffff, |
90 | .fk_src._addr64[0] = 0xffffffffffffffffULL, |
91 | .fk_src._addr64[1] = 0xffffffffffffffffULL, |
92 | .fk_dst._addr64[0] = 0, |
93 | .fk_dst._addr64[1] = 0, |
94 | .fk_pad[0] = 0, |
95 | }; |
96 | |
97 | const struct flow_key fk_mask_5tuple |
98 | __sk_aligned(16) = |
99 | { |
100 | .fk_mask = FKMASK_5TUPLE, |
101 | .fk_ipver = 0xff, |
102 | .fk_proto = 0xff, |
103 | .fk_sport = 0xffff, |
104 | .fk_dport = 0xffff, |
105 | .fk_src._addr64[0] = 0xffffffffffffffffULL, |
106 | .fk_src._addr64[1] = 0xffffffffffffffffULL, |
107 | .fk_dst._addr64[0] = 0xffffffffffffffffULL, |
108 | .fk_dst._addr64[1] = 0xffffffffffffffffULL, |
109 | .fk_pad[0] = 0, |
110 | }; |
111 | |
112 | const struct flow_key fk_mask_ipflow1 |
113 | __sk_aligned(16) = |
114 | { |
115 | .fk_mask = FKMASK_IPFLOW1, |
116 | .fk_ipver = 0, |
117 | .fk_proto = 0xff, |
118 | .fk_sport = 0, |
119 | .fk_dport = 0, |
120 | .fk_src._addr64[0] = 0, |
121 | .fk_src._addr64[1] = 0, |
122 | .fk_dst._addr64[0] = 0, |
123 | .fk_dst._addr64[1] = 0, |
124 | .fk_pad[0] = 0, |
125 | }; |
126 | |
127 | const struct flow_key fk_mask_ipflow2 |
128 | __sk_aligned(16) = |
129 | { |
130 | .fk_mask = FKMASK_IPFLOW2, |
131 | .fk_ipver = 0xff, |
132 | .fk_proto = 0xff, |
133 | .fk_sport = 0, |
134 | .fk_dport = 0, |
135 | .fk_src._addr64[0] = 0xffffffffffffffffULL, |
136 | .fk_src._addr64[1] = 0xffffffffffffffffULL, |
137 | .fk_dst._addr64[0] = 0, |
138 | .fk_dst._addr64[1] = 0, |
139 | .fk_pad[0] = 0, |
140 | }; |
141 | |
142 | const struct flow_key fk_mask_ipflow3 |
143 | __sk_aligned(16) = |
144 | { |
145 | .fk_mask = FKMASK_IPFLOW3, |
146 | .fk_ipver = 0xff, |
147 | .fk_proto = 0xff, |
148 | .fk_sport = 0, |
149 | .fk_dport = 0, |
150 | .fk_src._addr64[0] = 0xffffffffffffffffULL, |
151 | .fk_src._addr64[1] = 0xffffffffffffffffULL, |
152 | .fk_dst._addr64[0] = 0xffffffffffffffffULL, |
153 | .fk_dst._addr64[1] = 0xffffffffffffffffULL, |
154 | .fk_pad[0] = 0, |
155 | }; |
156 | |
157 | struct flow_owner * |
158 | flow_owner_find_by_pid(struct flow_owner_bucket *fob, pid_t pid, void *context, |
159 | bool low_latency) |
160 | { |
161 | struct flow_owner find = { .fo_context = context, .fo_pid = pid, |
162 | .fo_low_latency = low_latency}; |
163 | |
164 | ASSERT(low_latency == true || low_latency == false); |
165 | FOB_LOCK_ASSERT_HELD(fob); |
166 | return RB_FIND(flow_owner_tree, &fob->fob_owner_head, &find); |
167 | } |
168 | |
169 | struct flow_entry * |
170 | flow_entry_find_by_uuid(struct flow_owner *fo, uuid_t uuid) |
171 | { |
172 | struct flow_entry find, *fe = NULL; |
173 | FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo)); |
174 | |
175 | uuid_copy(dst: find.fe_uuid, src: uuid); |
176 | fe = RB_FIND(flow_entry_id_tree, &fo->fo_flow_entry_id_head, &find); |
177 | if (fe != NULL) { |
178 | flow_entry_retain(fe); |
179 | } |
180 | |
181 | return fe; |
182 | } |
183 | |
184 | static uint32_t |
185 | flow_entry_calc_flowid(struct flow_entry *fe) |
186 | { |
187 | uint32_t flowid; |
188 | struct flowidns_flow_key fk; |
189 | |
190 | bzero(s: &fk, n: sizeof(fk)); |
191 | _CASSERT(sizeof(fe->fe_key.fk_src) == sizeof(fk.ffk_laddr)); |
192 | _CASSERT(sizeof(fe->fe_key.fk_dst) == sizeof(fk.ffk_raddr)); |
193 | bcopy(src: &fe->fe_key.fk_src, dst: &fk.ffk_laddr, n: sizeof(fk.ffk_laddr)); |
194 | bcopy(src: &fe->fe_key.fk_dst, dst: &fk.ffk_raddr, n: sizeof(fk.ffk_raddr)); |
195 | |
196 | fk.ffk_lport = fe->fe_key.fk_sport; |
197 | fk.ffk_rport = fe->fe_key.fk_dport; |
198 | fk.ffk_af = (fe->fe_key.fk_ipver == 4) ? AF_INET : AF_INET6; |
199 | fk.ffk_proto = fe->fe_key.fk_proto; |
200 | |
201 | flowidns_allocate_flowid(domain: FLOWIDNS_DOMAIN_FLOWSWITCH, flow_key: &fk, flowid: &flowid); |
202 | return flowid; |
203 | } |
204 | |
205 | static bool |
206 | flow_entry_add_child(struct flow_entry *parent_fe, struct flow_entry *child_fe) |
207 | { |
208 | SK_LOG_VAR(char dbgbuf[FLOWENTRY_DBGBUF_SIZE]); |
209 | ASSERT(parent_fe->fe_flags & FLOWENTF_PARENT); |
210 | |
211 | lck_rw_lock_exclusive(lck: &parent_fe->fe_child_list_lock); |
212 | |
213 | if (parent_fe->fe_flags & FLOWENTF_NONVIABLE) { |
214 | SK_ERR("child entry add failed, parent fe \"%s\" non viable 0x%llx " |
215 | "flags 0x%b %s(%d)" , fe_as_string(parent_fe, |
216 | dbgbuf, sizeof(dbgbuf)), SK_KVA(parent_fe), parent_fe->fe_flags, |
217 | FLOWENTF_BITS, parent_fe->fe_proc_name, |
218 | parent_fe->fe_pid); |
219 | lck_rw_unlock_exclusive(lck: &parent_fe->fe_child_list_lock); |
220 | return false; |
221 | } |
222 | |
223 | struct flow_entry *fe, *tfe; |
224 | TAILQ_FOREACH_SAFE(fe, &parent_fe->fe_child_list, fe_child_link, tfe) { |
225 | if (!fe_id_cmp(fe, child_fe)) { |
226 | lck_rw_unlock_exclusive(lck: &parent_fe->fe_child_list_lock); |
227 | SK_ERR("child entry \"%s\" already exists at fe 0x%llx " |
228 | "flags 0x%b %s(%d)" , fe_as_string(fe, |
229 | dbgbuf, sizeof(dbgbuf)), SK_KVA(fe), fe->fe_flags, |
230 | FLOWENTF_BITS, fe->fe_proc_name, |
231 | fe->fe_pid); |
232 | return false; |
233 | } |
234 | |
235 | if (fe->fe_flags & FLOWENTF_NONVIABLE) { |
236 | TAILQ_REMOVE(&parent_fe->fe_child_list, fe, fe_child_link); |
237 | ASSERT(--parent_fe->fe_child_count >= 0); |
238 | flow_entry_release(pfe: &fe); |
239 | } |
240 | } |
241 | |
242 | flow_entry_retain(fe: child_fe); |
243 | TAILQ_INSERT_TAIL(&parent_fe->fe_child_list, child_fe, fe_child_link); |
244 | ASSERT(++parent_fe->fe_child_count > 0); |
245 | |
246 | lck_rw_unlock_exclusive(lck: &parent_fe->fe_child_list_lock); |
247 | |
248 | return true; |
249 | } |
250 | |
251 | static void |
252 | flow_entry_remove_all_children(struct flow_entry *parent_fe, struct nx_flowswitch *fsw) |
253 | { |
254 | bool sched_reaper_thread = false; |
255 | |
256 | ASSERT(parent_fe->fe_flags & FLOWENTF_PARENT); |
257 | |
258 | lck_rw_lock_exclusive(lck: &parent_fe->fe_child_list_lock); |
259 | |
260 | struct flow_entry *fe, *tfe; |
261 | TAILQ_FOREACH_SAFE(fe, &parent_fe->fe_child_list, fe_child_link, tfe) { |
262 | if (!(fe->fe_flags & FLOWENTF_NONVIABLE)) { |
263 | /* |
264 | * fsw_pending_nonviable is a hint for reaper thread; |
265 | * due to the fact that setting fe_want_nonviable and |
266 | * incrementing fsw_pending_nonviable counter is not |
267 | * atomic, let the increment happen first, and the |
268 | * thread losing the CAS does decrement. |
269 | */ |
270 | os_atomic_inc(&fsw->fsw_pending_nonviable, relaxed); |
271 | if (os_atomic_cmpxchg(&fe->fe_want_nonviable, 0, 1, acq_rel)) { |
272 | sched_reaper_thread = true; |
273 | } else { |
274 | os_atomic_dec(&fsw->fsw_pending_nonviable, relaxed); |
275 | } |
276 | } |
277 | |
278 | TAILQ_REMOVE(&parent_fe->fe_child_list, fe, fe_child_link); |
279 | ASSERT(--parent_fe->fe_child_count >= 0); |
280 | flow_entry_release(pfe: &fe); |
281 | } |
282 | |
283 | lck_rw_unlock_exclusive(lck: &parent_fe->fe_child_list_lock); |
284 | |
285 | if (sched_reaper_thread) { |
286 | fsw_reap_sched(fsw); |
287 | } |
288 | } |
289 | |
290 | static void |
291 | flow_entry_set_demux_patterns(struct flow_entry *fe, struct nx_flow_req *req) |
292 | { |
293 | ASSERT(fe->fe_flags & FLOWENTF_CHILD); |
294 | ASSERT(req->nfr_flow_demux_count > 0); |
295 | |
296 | fe->fe_demux_patterns = sk_alloc_type_array(struct kern_flow_demux_pattern, req->nfr_flow_demux_count, |
297 | Z_WAITOK | Z_NOFAIL, skmem_tag_flow_demux); |
298 | |
299 | for (int i = 0; i < req->nfr_flow_demux_count; i++) { |
300 | bcopy(src: &req->nfr_flow_demux_patterns[i], dst: &fe->fe_demux_patterns[i].fdp_demux_pattern, |
301 | n: sizeof(struct flow_demux_pattern)); |
302 | |
303 | fe->fe_demux_patterns[i].fdp_memcmp_mask = NULL; |
304 | if (req->nfr_flow_demux_patterns[i].fdp_len == 16) { |
305 | fe->fe_demux_patterns[i].fdp_memcmp_mask = sk_memcmp_mask_16B; |
306 | } else if (req->nfr_flow_demux_patterns[i].fdp_len == 32) { |
307 | fe->fe_demux_patterns[i].fdp_memcmp_mask = sk_memcmp_mask_32B; |
308 | } else if (req->nfr_flow_demux_patterns[i].fdp_len > 32) { |
309 | VERIFY(0); |
310 | } |
311 | } |
312 | |
313 | fe->fe_demux_pattern_count = req->nfr_flow_demux_count; |
314 | } |
315 | |
316 | static int |
317 | convert_flowkey_to_inet_td(struct flow_key *key, |
318 | struct ifnet_traffic_descriptor_inet *td) |
319 | { |
320 | if ((key->fk_mask & FKMASK_IPVER) != 0) { |
321 | td->inet_ipver = key->fk_ipver; |
322 | td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_IPVER; |
323 | } |
324 | if ((key->fk_mask & FKMASK_PROTO) != 0) { |
325 | td->inet_proto = key->fk_proto; |
326 | td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_PROTO; |
327 | } |
328 | if ((key->fk_mask & FKMASK_SRC) != 0) { |
329 | if (td->inet_ipver == IPVERSION) { |
330 | bcopy(src: &key->fk_src4, dst: &td->inet_laddr.iia_v4addr, |
331 | n: sizeof(key->fk_src4)); |
332 | } else { |
333 | bcopy(src: &key->fk_src6, dst: &td->inet_laddr, |
334 | n: sizeof(key->fk_src6)); |
335 | } |
336 | td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_LADDR; |
337 | } |
338 | if ((key->fk_mask & FKMASK_DST) != 0) { |
339 | if (td->inet_ipver == IPVERSION) { |
340 | bcopy(src: &key->fk_dst4, dst: &td->inet_raddr.iia_v4addr, |
341 | n: sizeof(key->fk_dst4)); |
342 | } else { |
343 | bcopy(src: &key->fk_dst6, dst: &td->inet_raddr, |
344 | n: sizeof(key->fk_dst6)); |
345 | } |
346 | td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_RADDR; |
347 | } |
348 | if ((key->fk_mask & FKMASK_SPORT) != 0) { |
349 | td->inet_lport = key->fk_sport; |
350 | td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_LPORT; |
351 | } |
352 | if ((key->fk_mask & FKMASK_DPORT) != 0) { |
353 | td->inet_rport = key->fk_dport; |
354 | td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_RPORT; |
355 | } |
356 | td->inet_common.itd_type = IFNET_TRAFFIC_DESCRIPTOR_TYPE_INET; |
357 | td->inet_common.itd_len = sizeof(*td); |
358 | td->inet_common.itd_flags = IFNET_TRAFFIC_DESCRIPTOR_FLAG_INBOUND | |
359 | IFNET_TRAFFIC_DESCRIPTOR_FLAG_OUTBOUND; |
360 | return 0; |
361 | } |
362 | |
363 | void |
364 | flow_qset_select_dynamic(struct nx_flowswitch *fsw, struct flow_entry *fe, |
365 | boolean_t skip_if_no_change) |
366 | { |
367 | struct ifnet_traffic_descriptor_inet td; |
368 | struct ifnet *ifp; |
369 | uint64_t qset_id; |
370 | struct nx_netif *nif; |
371 | boolean_t changed; |
372 | int err; |
373 | |
374 | ifp = fsw->fsw_ifp; |
375 | changed = ifnet_sync_traffic_rule_genid(ifp, &fe->fe_tr_genid); |
376 | if (!changed && skip_if_no_change) { |
377 | return; |
378 | } |
379 | if (fe->fe_qset != NULL) { |
380 | nx_netif_qset_release(&fe->fe_qset); |
381 | ASSERT(fe->fe_qset == NULL); |
382 | } |
383 | if (ifp->if_traffic_rule_count == 0) { |
384 | DTRACE_SKYWALK2(no__rules, struct nx_flowswitch *, fsw, |
385 | struct flow_entry *, fe); |
386 | return; |
387 | } |
388 | err = convert_flowkey_to_inet_td(key: &fe->fe_key, td: &td); |
389 | ASSERT(err == 0); |
390 | err = nxctl_inet_traffic_rule_find_qset_id(ifp->if_xname, &td, &qset_id); |
391 | if (err != 0) { |
392 | DTRACE_SKYWALK3(qset__id__not__found, |
393 | struct nx_flowswitch *, fsw, |
394 | struct flow_entry *, fe, |
395 | struct ifnet_traffic_descriptor_inet *, &td); |
396 | return; |
397 | } |
398 | DTRACE_SKYWALK4(qset__id__found, struct nx_flowswitch *, fsw, |
399 | struct flow_entry *, fe, struct ifnet_traffic_descriptor_inet *, |
400 | &td, uint64_t, qset_id); |
401 | nif = NX_NETIF_PRIVATE(fsw->fsw_dev_ch->ch_na->na_nx); |
402 | ASSERT(fe->fe_qset == NULL); |
403 | fe->fe_qset = nx_netif_find_qset(nif, qset_id); |
404 | } |
405 | |
406 | /* writer-lock must be owned for memory management functions */ |
407 | struct flow_entry * |
408 | flow_entry_alloc(struct flow_owner *fo, struct nx_flow_req *req, int *perr) |
409 | { |
410 | SK_LOG_VAR(char dbgbuf[FLOWENTRY_DBGBUF_SIZE]); |
411 | nexus_port_t nx_port = req->nfr_nx_port; |
412 | struct flow_entry *fe = NULL; |
413 | struct flow_entry *parent_fe = NULL; |
414 | flowadv_idx_t fadv_idx = FLOWADV_IDX_NONE; |
415 | struct nexus_adapter *dev_na; |
416 | struct nx_netif *nif; |
417 | int err; |
418 | |
419 | FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo)); |
420 | ASSERT(nx_port != NEXUS_PORT_ANY); |
421 | ASSERT(!fo->fo_nx_port_destroyed); |
422 | |
423 | *perr = 0; |
424 | |
425 | struct flow_key key __sk_aligned(16); |
426 | err = flow_req2key(req, key: &key); |
427 | if (__improbable(err != 0)) { |
428 | SK_ERR("invalid request (err %d)" , err); |
429 | goto done; |
430 | } |
431 | |
432 | struct flow_mgr *fm = fo->fo_fsw->fsw_flow_mgr; |
433 | fe = flow_mgr_find_conflicting_fe(fm, fe_key: &key); |
434 | if (fe != NULL) { |
435 | if ((fe->fe_flags & FLOWENTF_PARENT) && |
436 | uuid_compare(uu1: fe->fe_uuid, uu2: req->nfr_parent_flow_uuid) == 0) { |
437 | parent_fe = fe; |
438 | fe = NULL; |
439 | } else { |
440 | SK_ERR("entry \"%s\" already exists at fe 0x%llx " |
441 | "flags 0x%b %s(%d)" , fe_as_string(fe, |
442 | dbgbuf, sizeof(dbgbuf)), SK_KVA(fe), fe->fe_flags, |
443 | FLOWENTF_BITS, fe->fe_proc_name, |
444 | fe->fe_pid); |
445 | /* don't return it */ |
446 | flow_entry_release(pfe: &fe); |
447 | err = EEXIST; |
448 | goto done; |
449 | } |
450 | } else if (!uuid_is_null(uu: req->nfr_parent_flow_uuid)) { |
451 | uuid_string_t uuid_str; |
452 | sk_uuid_unparse(req->nfr_parent_flow_uuid, uuid_str); |
453 | SK_ERR("parent entry \"%s\" does not exist" , uuid_str); |
454 | err = ENOENT; |
455 | goto done; |
456 | } |
457 | |
458 | if ((req->nfr_flags & NXFLOWREQF_FLOWADV) && |
459 | (flow_owner_flowadv_index_alloc(fo, &fadv_idx) != 0)) { |
460 | SK_ERR("failed to alloc flowadv index for flow %s" , |
461 | sk_uuid_unparse(req->nfr_flow_uuid, dbgbuf)); |
462 | /* XXX: what is the most appropriate error code ? */ |
463 | err = ENOSPC; |
464 | goto done; |
465 | } |
466 | |
467 | fe = fe_alloc(TRUE); |
468 | if (__improbable(fe == NULL)) { |
469 | err = ENOMEM; |
470 | goto done; |
471 | } |
472 | |
473 | fe->fe_key = key; |
474 | if (req->nfr_route != NULL) { |
475 | fe->fe_laddr_gencnt = req->nfr_route->fr_laddr_gencnt; |
476 | } else { |
477 | fe->fe_laddr_gencnt = req->nfr_saddr_gencnt; |
478 | } |
479 | |
480 | if (__improbable(req->nfr_flags & NXFLOWREQF_LISTENER)) { |
481 | /* mark this as listener mode */ |
482 | os_atomic_or(&fe->fe_flags, FLOWENTF_LISTENER, relaxed); |
483 | } else { |
484 | ASSERT((fe->fe_key.fk_ipver == IPVERSION && |
485 | fe->fe_key.fk_src4.s_addr != INADDR_ANY) || |
486 | (fe->fe_key.fk_ipver == IPV6_VERSION && |
487 | !IN6_IS_ADDR_UNSPECIFIED(&fe->fe_key.fk_src6))); |
488 | |
489 | /* mark this as connected mode */ |
490 | os_atomic_or(&fe->fe_flags, FLOWENTF_CONNECTED, relaxed); |
491 | } |
492 | |
493 | if (req->nfr_flags & NXFLOWREQF_NOWAKEFROMSLEEP) { |
494 | fe->fe_flags |= FLOWENTF_NOWAKEFROMSLEEP; |
495 | } |
496 | fe->fe_port_reservation = req->nfr_port_reservation; |
497 | req->nfr_port_reservation = NULL; |
498 | if (req->nfr_flags & NXFLOWREQF_EXT_PORT_RSV) { |
499 | fe->fe_flags |= FLOWENTF_EXTRL_PORT; |
500 | } |
501 | fe->fe_proto_reservation = req->nfr_proto_reservation; |
502 | req->nfr_proto_reservation = NULL; |
503 | if (req->nfr_flags & NXFLOWREQF_EXT_PROTO_RSV) { |
504 | fe->fe_flags |= FLOWENTF_EXTRL_PROTO; |
505 | } |
506 | fe->fe_ipsec_reservation = req->nfr_ipsec_reservation; |
507 | req->nfr_ipsec_reservation = NULL; |
508 | |
509 | fe->fe_tx_process = dp_flow_tx_process; |
510 | fe->fe_rx_process = dp_flow_rx_process; |
511 | |
512 | dev_na = fo->fo_fsw->fsw_dev_ch->ch_na; |
513 | nif = NX_NETIF_PRIVATE(dev_na->na_nx); |
514 | if (NX_LLINK_PROV(nif->nif_nx) && |
515 | (fe->fe_key.fk_mask & (FKMASK_IPVER | FKMASK_PROTO | FKMASK_DST)) == |
516 | (FKMASK_IPVER | FKMASK_PROTO | FKMASK_DST)) { |
517 | if (req->nfr_qset_id != 0) { |
518 | fe->fe_qset_select = FE_QSET_SELECT_FIXED; |
519 | fe->fe_qset_id = req->nfr_qset_id; |
520 | fe->fe_qset = nx_netif_find_qset(nif, req->nfr_qset_id); |
521 | } else { |
522 | fe->fe_qset_select = FE_QSET_SELECT_DYNAMIC; |
523 | fe->fe_qset_id = 0; |
524 | flow_qset_select_dynamic(fsw: fo->fo_fsw, fe, FALSE); |
525 | } |
526 | } else { |
527 | fe->fe_qset_select = FE_QSET_SELECT_NONE; |
528 | } |
529 | if (req->nfr_flags & NXFLOWREQF_LOW_LATENCY) { |
530 | os_atomic_or(&fe->fe_flags, FLOWENTF_LOW_LATENCY, relaxed); |
531 | } |
532 | |
533 | fe->fe_transport_protocol = req->nfr_transport_protocol; |
534 | if (NX_FSW_TCP_RX_AGG_ENABLED() && |
535 | (fo->fo_fsw->fsw_nx->nx_prov->nxprov_params->nxp_max_frags > 1) && |
536 | (fe->fe_key.fk_proto == IPPROTO_TCP) && |
537 | (fe->fe_key.fk_mask == FKMASK_5TUPLE)) { |
538 | fe->fe_rx_process = flow_rx_agg_tcp; |
539 | } |
540 | uuid_copy(dst: fe->fe_uuid, src: req->nfr_flow_uuid); |
541 | if ((req->nfr_flags & NXFLOWREQF_LISTENER) == 0 && |
542 | (req->nfr_flags & NXFLOWREQF_TRACK) != 0) { |
543 | switch (req->nfr_ip_protocol) { |
544 | case IPPROTO_TCP: |
545 | case IPPROTO_UDP: |
546 | os_atomic_or(&fe->fe_flags, FLOWENTF_TRACK, relaxed); |
547 | break; |
548 | default: |
549 | break; |
550 | } |
551 | } |
552 | |
553 | if (req->nfr_flags & NXFLOWREQF_QOS_MARKING) { |
554 | os_atomic_or(&fe->fe_flags, FLOWENTF_QOS_MARKING, relaxed); |
555 | } |
556 | |
557 | if (req->nfr_flags & NXFLOWREQF_PARENT) { |
558 | os_atomic_or(&fe->fe_flags, FLOWENTF_PARENT, relaxed); |
559 | TAILQ_INIT(&fe->fe_child_list); |
560 | lck_rw_init(lck: &fe->fe_child_list_lock, grp: &nexus_lock_group, attr: &nexus_lock_attr); |
561 | } |
562 | |
563 | if (req->nfr_route != NULL) { |
564 | fe->fe_route = req->nfr_route; |
565 | req->nfr_route = NULL; |
566 | } |
567 | |
568 | fe->fe_nx_port = nx_port; |
569 | fe->fe_adv_idx = fadv_idx; |
570 | |
571 | if (req->nfr_inp_flowhash != 0) { |
572 | /* |
573 | * BSD flow, use the inpcb flow hash value |
574 | */ |
575 | fe->fe_flowid = req->nfr_inp_flowhash; |
576 | fe->fe_flags |= FLOWENTF_EXTRL_FLOWID; |
577 | } else { |
578 | fe->fe_flowid = flow_entry_calc_flowid(fe); |
579 | } |
580 | |
581 | if (fe->fe_adv_idx != FLOWADV_IDX_NONE && fo->fo_nx_port_na != NULL) { |
582 | na_flowadv_entry_alloc(fo->fo_nx_port_na, fe->fe_uuid, |
583 | fe->fe_adv_idx, fe->fe_flowid); |
584 | } |
585 | |
586 | if (KPKT_VALID_SVC(req->nfr_svc_class)) { |
587 | fe->fe_svc_class = (kern_packet_svc_class_t)req->nfr_svc_class; |
588 | } else { |
589 | fe->fe_svc_class = KPKT_SC_BE; |
590 | } |
591 | |
592 | uuid_copy(dst: fe->fe_eproc_uuid, src: req->nfr_euuid); |
593 | fe->fe_policy_id = req->nfr_policy_id; |
594 | fe->fe_skip_policy_id = req->nfr_skip_policy_id; |
595 | |
596 | err = flow_mgr_flow_hash_mask_add(fm, mask: fe->fe_key.fk_mask); |
597 | ASSERT(err == 0); |
598 | |
599 | if (parent_fe != NULL) { |
600 | os_atomic_or(&fe->fe_flags, FLOWENTF_CHILD, relaxed); |
601 | flow_entry_set_demux_patterns(fe, req); |
602 | fe->fe_demux_pkt_data = sk_alloc_data(FLOW_DEMUX_MAX_LEN, Z_WAITOK | Z_NOFAIL, skmem_tag_flow_demux); |
603 | if (!flow_entry_add_child(parent_fe, child_fe: fe)) { |
604 | goto done; |
605 | } |
606 | } else { |
607 | fe->fe_key_hash = flow_key_hash(key: &fe->fe_key); |
608 | err = cuckoo_hashtable_add_with_hash(h: fm->fm_flow_table, node: &fe->fe_cnode, |
609 | key: fe->fe_key_hash); |
610 | if (err != 0) { |
611 | SK_ERR("flow table add failed (err %d)" , err); |
612 | flow_mgr_flow_hash_mask_del(fm, mask: fe->fe_key.fk_mask); |
613 | goto done; |
614 | } |
615 | } |
616 | |
617 | RB_INSERT(flow_entry_id_tree, &fo->fo_flow_entry_id_head, fe); |
618 | flow_entry_retain(fe); /* one refcnt in id_tree */ |
619 | |
620 | *(struct nx_flowswitch **)(uintptr_t)&fe->fe_fsw = fo->fo_fsw; |
621 | fe->fe_pid = fo->fo_pid; |
622 | if (req->nfr_epid != -1 && req->nfr_epid != fo->fo_pid) { |
623 | fe->fe_epid = req->nfr_epid; |
624 | proc_name(pid: fe->fe_epid, buf: fe->fe_eproc_name, |
625 | size: sizeof(fe->fe_eproc_name)); |
626 | } else { |
627 | fe->fe_epid = -1; |
628 | } |
629 | |
630 | (void) snprintf(fe->fe_proc_name, count: sizeof(fe->fe_proc_name), "%s" , |
631 | fo->fo_name); |
632 | |
633 | fe_stats_init(fe); |
634 | flow_stats_retain(fs: fe->fe_stats); |
635 | req->nfr_flow_stats = fe->fe_stats; |
636 | |
637 | #if SK_LOG |
638 | SK_DF(SK_VERB_FLOW, "allocated entry \"%s\" fe 0x%llx flags 0x%b " |
639 | "[fo 0x%llx ]" , fe_as_string(fe, dbgbuf, |
640 | sizeof(dbgbuf)), SK_KVA(fe), fe->fe_flags, FLOWENTF_BITS, |
641 | SK_KVA(fo)); |
642 | #endif /* SK_LOG */ |
643 | |
644 | done: |
645 | if (parent_fe != NULL) { |
646 | flow_entry_release(pfe: &parent_fe); |
647 | } |
648 | if (err != 0) { |
649 | if (fadv_idx != FLOWADV_IDX_NONE) { |
650 | flow_owner_flowadv_index_free(fo, fadv_idx); |
651 | } |
652 | if (fe != NULL) { |
653 | flow_entry_release(pfe: &fe); |
654 | } |
655 | } |
656 | *perr = err; |
657 | return fe; |
658 | } |
659 | |
660 | void |
661 | flow_entry_teardown(struct flow_owner *fo, struct flow_entry *fe) |
662 | { |
663 | #if SK_LOG |
664 | char dbgbuf[FLOWENTRY_DBGBUF_SIZE]; |
665 | SK_DF(SK_VERB_FLOW, "entry \"%s\" fe 0x%llx flags 0x%b [fo 0x%llx] " |
666 | "non_via %d withdrawn %d" , fe_as_string(fe, dbgbuf, sizeof(dbgbuf)), |
667 | SK_KVA(fe), fe->fe_flags, FLOWENTF_BITS, SK_KVA(fo), |
668 | fe->fe_want_nonviable, fe->fe_want_withdraw); |
669 | #endif /* SK_LOG */ |
670 | struct nx_flowswitch *fsw = fo->fo_fsw; |
671 | |
672 | FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo)); |
673 | |
674 | ASSERT(!(fe->fe_flags & FLOWENTF_DESTROYED)); |
675 | ASSERT(!(fe->fe_flags & FLOWENTF_LINGERING)); |
676 | ASSERT(fsw != NULL); |
677 | |
678 | if (os_atomic_cmpxchg(&fe->fe_want_nonviable, 1, 0, acq_rel)) { |
679 | ASSERT(fsw->fsw_pending_nonviable != 0); |
680 | os_atomic_dec(&fsw->fsw_pending_nonviable, relaxed); |
681 | os_atomic_or(&fe->fe_flags, FLOWENTF_NONVIABLE, relaxed); |
682 | } |
683 | |
684 | /* always withdraw namespace during tear down */ |
685 | if (!(fe->fe_flags & FLOWENTF_EXTRL_PORT) && |
686 | !(fe->fe_flags & FLOWENTF_WITHDRAWN)) { |
687 | os_atomic_or(&fe->fe_flags, FLOWENTF_WITHDRAWN, relaxed); |
688 | os_atomic_store(&fe->fe_want_withdraw, 0, release); |
689 | /* local port is now inactive; not eligible for offload */ |
690 | flow_namespace_withdraw(&fe->fe_port_reservation); |
691 | } |
692 | |
693 | /* we may get here multiple times, so check */ |
694 | if (!(fe->fe_flags & FLOWENTF_TORN_DOWN)) { |
695 | os_atomic_or(&fe->fe_flags, FLOWENTF_TORN_DOWN, relaxed); |
696 | if (fe->fe_adv_idx != FLOWADV_IDX_NONE) { |
697 | if (fo->fo_nx_port_na != NULL) { |
698 | na_flowadv_entry_free(fo->fo_nx_port_na, |
699 | fe->fe_uuid, fe->fe_adv_idx, fe->fe_flowid); |
700 | } |
701 | flow_owner_flowadv_index_free(fo, fe->fe_adv_idx); |
702 | fe->fe_adv_idx = FLOWADV_IDX_NONE; |
703 | } |
704 | } |
705 | ASSERT(fe->fe_adv_idx == FLOWADV_IDX_NONE); |
706 | ASSERT(fe->fe_flags & FLOWENTF_TORN_DOWN); |
707 | |
708 | /* mark child flow as nonviable */ |
709 | if (fe->fe_flags & FLOWENTF_PARENT) { |
710 | flow_entry_remove_all_children(parent_fe: fe, fsw); |
711 | } |
712 | } |
713 | |
714 | void |
715 | flow_entry_destroy(struct flow_owner *fo, struct flow_entry *fe, bool nolinger, |
716 | void *close_params) |
717 | { |
718 | struct flow_mgr *fm = fo->fo_fsw->fsw_flow_mgr; |
719 | int err; |
720 | |
721 | FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo)); |
722 | |
723 | /* |
724 | * regular flow: one in flow_table, one in id_tree, one here |
725 | * child flow: one in id_tree, one here |
726 | */ |
727 | ASSERT(flow_entry_refcnt(fe) > 2 || |
728 | ((fe->fe_flags & FLOWENTF_CHILD) && flow_entry_refcnt(fe) > 1)); |
729 | |
730 | flow_entry_teardown(fo, fe); |
731 | |
732 | err = flow_mgr_flow_hash_mask_del(fm, mask: fe->fe_key.fk_mask); |
733 | ASSERT(err == 0); |
734 | |
735 | /* only regular or parent flows have entries in flow_table */ |
736 | if (__probable(!(fe->fe_flags & FLOWENTF_CHILD))) { |
737 | uint32_t hash; |
738 | hash = flow_key_hash(key: &fe->fe_key); |
739 | cuckoo_hashtable_del(h: fm->fm_flow_table, node: &fe->fe_cnode, key: hash); |
740 | } |
741 | |
742 | RB_REMOVE(flow_entry_id_tree, &fo->fo_flow_entry_id_head, fe); |
743 | struct flow_entry *tfe = fe; |
744 | flow_entry_release(pfe: &tfe); |
745 | |
746 | ASSERT(!(fe->fe_flags & FLOWENTF_DESTROYED)); |
747 | os_atomic_or(&fe->fe_flags, FLOWENTF_DESTROYED, relaxed); |
748 | |
749 | if (fe->fe_transport_protocol == IPPROTO_QUIC) { |
750 | if (!nolinger && close_params != NULL) { |
751 | flow_track_abort_quic(fe, token: close_params); |
752 | } |
753 | flow_entry_release(pfe: &fe); |
754 | } else if (nolinger || !(fe->fe_flags & FLOWENTF_WAIT_CLOSE)) { |
755 | flow_entry_release(pfe: &fe); |
756 | } else { |
757 | fsw_linger_insert(fsw: fe); |
758 | } |
759 | } |
760 | |
761 | uint32_t |
762 | flow_entry_refcnt(struct flow_entry *fe) |
763 | { |
764 | return os_ref_get_count(rc: &fe->fe_refcnt); |
765 | } |
766 | |
767 | void |
768 | flow_entry_retain(struct flow_entry *fe) |
769 | { |
770 | os_ref_retain(rc: &fe->fe_refcnt); |
771 | } |
772 | |
773 | void |
774 | flow_entry_release(struct flow_entry **pfe) |
775 | { |
776 | struct flow_entry *fe = *pfe; |
777 | ASSERT(fe != NULL); |
778 | *pfe = NULL; /* caller lose reference */ |
779 | #if SK_LOG |
780 | if (__improbable(sk_verbose != 0)) { |
781 | char dbgbuf[FLOWENTRY_DBGBUF_SIZE]; |
782 | SK_DF(SK_VERB_FLOW, "entry \"%s\" fe 0x%llx flags 0x%b" , |
783 | fe_as_string(fe, dbgbuf, sizeof(dbgbuf)), SK_KVA(fe), |
784 | fe->fe_flags, FLOWENTF_BITS); |
785 | } |
786 | #endif /* SK_LOG */ |
787 | |
788 | if (__improbable(os_ref_release(&fe->fe_refcnt) == 0)) { |
789 | fe->fe_nx_port = NEXUS_PORT_ANY; |
790 | if (fe->fe_route != NULL) { |
791 | flow_route_release(fe->fe_route); |
792 | fe->fe_route = NULL; |
793 | } |
794 | if (fe->fe_qset != NULL) { |
795 | nx_netif_qset_release(&fe->fe_qset); |
796 | ASSERT(fe->fe_qset == NULL); |
797 | } |
798 | if (fe->fe_demux_patterns != NULL) { |
799 | sk_free_type_array(struct kern_flow_demux_pattern, |
800 | fe->fe_demux_pattern_count, fe->fe_demux_patterns); |
801 | fe->fe_demux_patterns = NULL; |
802 | fe->fe_demux_pattern_count = 0; |
803 | } |
804 | if (fe->fe_demux_pkt_data != NULL) { |
805 | sk_free_data(fe->fe_demux_pkt_data, FLOW_DEMUX_MAX_LEN); |
806 | fe->fe_demux_pkt_data = NULL; |
807 | } |
808 | fe_free(fe); |
809 | } |
810 | } |
811 | |
812 | struct flow_entry_dead * |
813 | flow_entry_dead_alloc(zalloc_flags_t how) |
814 | { |
815 | struct flow_entry_dead *fed; |
816 | |
817 | fed = zalloc_flags(sk_fed_zone, how | Z_ZERO); |
818 | if (fed != NULL) { |
819 | SK_DF(SK_VERB_MEM, "fed 0x%llx ALLOC" , SK_KVA(fed)); |
820 | } |
821 | return fed; |
822 | } |
823 | |
824 | void |
825 | flow_entry_dead_free(struct flow_entry_dead *fed) |
826 | { |
827 | SK_DF(SK_VERB_MEM, "fed 0x%llx FREE" , SK_KVA(fed)); |
828 | zfree(sk_fed_zone, fed); |
829 | } |
830 | |
831 | static void |
832 | fe_stats_init(struct flow_entry *fe) |
833 | { |
834 | struct nx_flowswitch *fsw = fe->fe_fsw; |
835 | struct sk_stats_flow *sf = &fe->fe_stats->fs_stats; |
836 | |
837 | ASSERT(fe->fe_stats != NULL); |
838 | ASSERT(os_ref_get_count(&fe->fe_stats->fs_refcnt) >= 1); |
839 | |
840 | bzero(s: sf, n: sizeof(*sf)); |
841 | uuid_copy(dst: sf->sf_nx_uuid, src: fsw->fsw_nx->nx_uuid); |
842 | uuid_copy(dst: sf->sf_uuid, src: fe->fe_uuid); |
843 | (void) strlcpy(dst: sf->sf_if_name, src: fsw->fsw_flow_mgr->fm_name, IFNAMSIZ); |
844 | sf->sf_if_index = fsw->fsw_ifp->if_index; |
845 | sf->sf_pid = fe->fe_pid; |
846 | sf->sf_epid = fe->fe_epid; |
847 | (void) snprintf(sf->sf_proc_name, count: sizeof(sf->sf_proc_name), "%s" , |
848 | fe->fe_proc_name); |
849 | (void) snprintf(sf->sf_eproc_name, count: sizeof(sf->sf_eproc_name), "%s" , |
850 | fe->fe_eproc_name); |
851 | |
852 | sf->sf_nx_port = fe->fe_nx_port; |
853 | sf->sf_key = fe->fe_key; |
854 | sf->sf_protocol = fe->fe_transport_protocol; |
855 | sf->sf_svc_class = (packet_svc_class_t)fe->fe_svc_class; |
856 | sf->sf_adv_idx = fe->fe_adv_idx; |
857 | |
858 | if (fe->fe_flags & FLOWENTF_TRACK) { |
859 | sf->sf_flags |= SFLOWF_TRACK; |
860 | } |
861 | if (fe->fe_flags & FLOWENTF_LISTENER) { |
862 | sf->sf_flags |= SFLOWF_LISTENER; |
863 | } |
864 | if (fe->fe_route != NULL && fe->fe_route->fr_flags & FLOWRTF_ONLINK) { |
865 | sf->sf_flags |= SFLOWF_ONLINK; |
866 | } |
867 | |
868 | fe_stats_update(fe); |
869 | } |
870 | |
871 | static void |
872 | fe_stats_update(struct flow_entry *fe) |
873 | { |
874 | struct sk_stats_flow *sf = &fe->fe_stats->fs_stats; |
875 | |
876 | ASSERT(fe->fe_stats != NULL); |
877 | ASSERT(os_ref_get_count(&fe->fe_stats->fs_refcnt) >= 1); |
878 | |
879 | if (fe->fe_flags & FLOWENTF_CONNECTED) { |
880 | sf->sf_flags |= SFLOWF_CONNECTED; |
881 | } |
882 | if (fe->fe_flags & FLOWENTF_QOS_MARKING) { |
883 | sf->sf_flags |= SFLOWF_QOS_MARKING; |
884 | } |
885 | if (fe->fe_flags & FLOWENTF_WAIT_CLOSE) { |
886 | sf->sf_flags |= SFLOWF_WAIT_CLOSE; |
887 | } |
888 | if (fe->fe_flags & FLOWENTF_CLOSE_NOTIFY) { |
889 | sf->sf_flags |= SFLOWF_CLOSE_NOTIFY; |
890 | } |
891 | if (fe->fe_flags & FLOWENTF_ABORTED) { |
892 | sf->sf_flags |= SFLOWF_ABORTED; |
893 | } |
894 | if (fe->fe_flags & FLOWENTF_NONVIABLE) { |
895 | sf->sf_flags |= SFLOWF_NONVIABLE; |
896 | } |
897 | if (fe->fe_flags & FLOWENTF_WITHDRAWN) { |
898 | sf->sf_flags |= SFLOWF_WITHDRAWN; |
899 | } |
900 | if (fe->fe_flags & FLOWENTF_TORN_DOWN) { |
901 | sf->sf_flags |= SFLOWF_TORN_DOWN; |
902 | } |
903 | if (fe->fe_flags & FLOWENTF_DESTROYED) { |
904 | sf->sf_flags |= SFLOWF_DESTROYED; |
905 | } |
906 | if (fe->fe_flags & FLOWENTF_LINGERING) { |
907 | sf->sf_flags |= SFLOWF_LINGERING; |
908 | } |
909 | if (fe->fe_flags & FLOWENTF_LOW_LATENCY) { |
910 | sf->sf_flags |= SFLOWF_LOW_LATENCY; |
911 | } |
912 | if (fe->fe_flags & FLOWENTF_PARENT) { |
913 | sf->sf_flags |= SFLOWF_PARENT; |
914 | } |
915 | if (fe->fe_flags & FLOWENTF_CHILD) { |
916 | sf->sf_flags |= SFLOWF_CHILD; |
917 | } |
918 | if (fe->fe_flags & FLOWENTF_NOWAKEFROMSLEEP) { |
919 | sf->sf_flags |= SFLOWF_NOWAKEFROMSLEEP; |
920 | } else { |
921 | sf->sf_flags &= ~SFLOWF_NOWAKEFROMSLEEP; |
922 | } |
923 | |
924 | sf->sf_bucket_idx = SFLOW_BUCKET_NONE; |
925 | |
926 | sf->sf_ltrack.sft_state = fe->fe_ltrack.fse_state; |
927 | sf->sf_ltrack.sft_seq = fe->fe_ltrack.fse_seqlo; |
928 | sf->sf_ltrack.sft_max_win = fe->fe_ltrack.fse_max_win; |
929 | sf->sf_ltrack.sft_wscale = fe->fe_ltrack.fse_wscale; |
930 | sf->sf_rtrack.sft_state = fe->fe_rtrack.fse_state; |
931 | sf->sf_rtrack.sft_seq = fe->fe_rtrack.fse_seqlo; |
932 | sf->sf_rtrack.sft_max_win = fe->fe_rtrack.fse_max_win; |
933 | } |
934 | |
935 | void |
936 | flow_entry_stats_get(struct flow_entry *fe, struct sk_stats_flow *sf) |
937 | { |
938 | _CASSERT(sizeof(fe->fe_stats->fs_stats) == sizeof(*sf)); |
939 | |
940 | fe_stats_update(fe); |
941 | bcopy(src: &fe->fe_stats->fs_stats, dst: sf, n: sizeof(*sf)); |
942 | } |
943 | |
944 | struct flow_entry * |
945 | fe_alloc(boolean_t can_block) |
946 | { |
947 | struct flow_entry *fe; |
948 | |
949 | _CASSERT((offsetof(struct flow_entry, fe_key) % 16) == 0); |
950 | |
951 | fe = skmem_cache_alloc(sk_fe_cache, |
952 | can_block ? SKMEM_SLEEP : SKMEM_NOSLEEP); |
953 | if (fe == NULL) { |
954 | return NULL; |
955 | } |
956 | |
957 | /* |
958 | * fe_key is 16-bytes aligned which requires fe to begin on |
959 | * a 16-bytes boundary as well. This alignment is specified |
960 | * at sk_fe_cache creation time and we assert here. |
961 | */ |
962 | ASSERT(IS_P2ALIGNED(fe, 16)); |
963 | bzero(s: fe, n: sk_fe_size); |
964 | |
965 | fe->fe_stats = flow_stats_alloc(cansleep: can_block); |
966 | if (fe->fe_stats == NULL) { |
967 | skmem_cache_free(sk_fe_cache, fe); |
968 | return NULL; |
969 | } |
970 | |
971 | SK_DF(SK_VERB_MEM, "fe 0x%llx ALLOC" , SK_KVA(fe)); |
972 | |
973 | os_ref_init(&fe->fe_refcnt, &flow_entry_refgrp); |
974 | |
975 | KPKTQ_INIT(&fe->fe_rx_pktq); |
976 | KPKTQ_INIT(&fe->fe_tx_pktq); |
977 | |
978 | return fe; |
979 | } |
980 | |
981 | static void |
982 | fe_free(struct flow_entry *fe) |
983 | { |
984 | ASSERT(fe->fe_flags & FLOWENTF_TORN_DOWN); |
985 | ASSERT(fe->fe_flags & FLOWENTF_DESTROYED); |
986 | ASSERT(!(fe->fe_flags & FLOWENTF_LINGERING)); |
987 | ASSERT(fe->fe_route == NULL); |
988 | |
989 | ASSERT(fe->fe_stats != NULL); |
990 | flow_stats_release(fs: fe->fe_stats); |
991 | fe->fe_stats = NULL; |
992 | |
993 | /* only at very last existence of flow releases namespace reservation */ |
994 | if (!(fe->fe_flags & FLOWENTF_EXTRL_PORT) && |
995 | NETNS_TOKEN_VALID(&fe->fe_port_reservation)) { |
996 | flow_namespace_destroy(&fe->fe_port_reservation); |
997 | ASSERT(!NETNS_TOKEN_VALID(&fe->fe_port_reservation)); |
998 | } |
999 | fe->fe_port_reservation = NULL; |
1000 | |
1001 | if (!(fe->fe_flags & FLOWENTF_EXTRL_PROTO) && |
1002 | protons_token_is_valid(pt: fe->fe_proto_reservation)) { |
1003 | protons_release(ptp: &fe->fe_proto_reservation); |
1004 | } |
1005 | fe->fe_proto_reservation = NULL; |
1006 | |
1007 | if (key_custom_ipsec_token_is_valid(fe->fe_ipsec_reservation)) { |
1008 | key_release_custom_ipsec(&fe->fe_ipsec_reservation); |
1009 | } |
1010 | fe->fe_ipsec_reservation = NULL; |
1011 | |
1012 | if (!(fe->fe_flags & FLOWENTF_EXTRL_FLOWID) && (fe->fe_flowid != 0)) { |
1013 | flowidns_release_flowid(flowid: fe->fe_flowid); |
1014 | fe->fe_flowid = 0; |
1015 | } |
1016 | |
1017 | skmem_cache_free(sk_fe_cache, fe); |
1018 | } |
1019 | |
1020 | static __inline__ int |
1021 | fe_id_cmp(const struct flow_entry *a, const struct flow_entry *b) |
1022 | { |
1023 | return uuid_compare(uu1: a->fe_uuid, uu2: b->fe_uuid); |
1024 | } |
1025 | |
1026 | #if SK_LOG |
1027 | SK_NO_INLINE_ATTRIBUTE |
1028 | char * |
1029 | fk_as_string(const struct flow_key *fk, char *dst, size_t dsz) |
1030 | { |
1031 | int af; |
1032 | char src_s[MAX_IPv6_STR_LEN]; |
1033 | char dst_s[MAX_IPv6_STR_LEN]; |
1034 | |
1035 | af = fk->fk_ipver == 4 ? AF_INET : AF_INET6; |
1036 | |
1037 | (void) inet_ntop(af, &fk->fk_src, src_s, sizeof(src_s)); |
1038 | (void) inet_ntop(af, &fk->fk_dst, dst_s, sizeof(dst_s)); |
1039 | (void) snprintf(dst, dsz, |
1040 | "ipver=%u,src=%s,dst=%s,proto=0x%02u,sport=%u,dport=%u " |
1041 | "mask=%08x,hash=%08x" , |
1042 | fk->fk_ipver, src_s, dst_s, fk->fk_proto, ntohs(fk->fk_sport), |
1043 | ntohs(fk->fk_dport), fk->fk_mask, flow_key_hash(fk)); |
1044 | |
1045 | return dst; |
1046 | } |
1047 | |
1048 | SK_NO_INLINE_ATTRIBUTE |
1049 | char * |
1050 | fe_as_string(const struct flow_entry *fe, char *dst, size_t dsz) |
1051 | { |
1052 | char keybuf[FLOWKEY_DBGBUF_SIZE]; /* just for debug message */ |
1053 | uuid_string_t uuidstr; |
1054 | |
1055 | fk_as_string(&fe->fe_key, keybuf, sizeof(keybuf)); |
1056 | |
1057 | (void) snprintf(dst, dsz, |
1058 | "fe 0x%llx proc %s nx_port %d flow_uuid %s %s tp_proto=0x%02u" , |
1059 | SK_KVA(fe), fe->fe_proc_name, (int)fe->fe_nx_port, |
1060 | sk_uuid_unparse(fe->fe_uuid, uuidstr), |
1061 | keybuf, fe->fe_transport_protocol); |
1062 | |
1063 | return dst; |
1064 | } |
1065 | #endif /* SK_LOG */ |
1066 | |