1 | /* |
2 | * Copyright (c) 2016-2023 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | /* |
30 | * Once a packet is classified, it goes through checks to see if there |
31 | * is a matching flow entry in the flow table. The key used to search |
32 | * the entry is composed of the fields contained in struct flow_ptrs. |
33 | * |
34 | * Flow entry insertion and deletion to the flow table, on behalf of |
35 | * the owning client process, requires the use of the rule ID (UUID) |
36 | * as the search key. |
37 | * |
38 | * Because of the above, each flow entry simultaneously exists in two |
39 | * respective trees: flow_entry_tree and flow_entry_id_tree. |
40 | * |
41 | * Using a single RW lock to protect the two trees is simple, but the |
42 | * data path performance is impacted during flow insertion and deletion, |
43 | * especially as the number of client processes and flows grow. |
44 | * |
45 | * To solve that, we deploy the following scheme: |
46 | * |
47 | * Given that the flow_entry_tree is searched on a per-packet basis, |
48 | * we break it down into a series of trees, each one contained within |
49 | * a flow_bucket structure. The hash from flow_ptrs determines the |
50 | * index of the flow_bucket to search the flow_entry_tree from. |
51 | * |
52 | * The flow_entry_id_tree is searched on each flow insertion and |
53 | * deletion, and similarly we break it down into a series of trees, |
54 | * each contained within a flow_owner_bucket structure. We use the |
55 | * client process ID (pid_t) to determine the bucket index. |
56 | * |
57 | * Each flow_bucket and flow_owner_bucket structure is dynamically |
58 | * created, and is aligned on the CPU cache boundary. The amount |
59 | * of those buckets is determined by client module at the time the |
60 | * flow manager context is initialized. This is done to avoid false |
61 | * sharing, especially given that each bucket has its own RW lock. |
62 | */ |
63 | |
64 | #ifndef _SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_ |
65 | #define _SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_ |
66 | |
67 | #ifdef BSD_KERNEL_PRIVATE |
68 | #include <skywalk/core/skywalk_var.h> |
69 | #include <skywalk/lib/cuckoo_hashtable.h> |
70 | #include <skywalk/namespace/netns.h> |
71 | #include <skywalk/namespace/protons.h> |
72 | #include <skywalk/packet/packet_var.h> |
73 | #include <net/flowhash.h> |
74 | #include <netinet/ip.h> |
75 | #include <netinet/in_stat.h> |
76 | #include <netinet/ip6.h> |
77 | #include <sys/eventhandler.h> |
78 | |
79 | RB_HEAD(flow_owner_tree, flow_owner); |
80 | |
81 | struct flow_owner_bucket { |
82 | decl_lck_mtx_data(, fob_lock); |
83 | struct flow_owner_tree fob_owner_head; |
84 | uint16_t fob_busy_flags; |
85 | uint16_t fob_open_waiters; |
86 | uint16_t fob_close_waiters; |
87 | uint16_t fob_dtor_waiters; |
88 | const size_t fob_idx; |
89 | }; |
90 | |
91 | #define FOBF_OPEN_BUSY 0x1 /* flow open monitor */ |
92 | #define FOBF_CLOSE_BUSY 0x2 /* flow close monitor */ |
93 | #define FOBF_DEAD 0x4 /* no longer usable */ |
94 | |
95 | #define FOB_LOCK(_fob) \ |
96 | lck_mtx_lock(&(_fob)->fob_lock) |
97 | #define FOB_LOCK_SPIN(_fob) \ |
98 | lck_mtx_lock_spin(&(_fob)->fob_lock) |
99 | #define FOB_LOCK_CONVERT(_fob) \ |
100 | lck_mtx_convert_spin(&(_fob)->fob_lock) |
101 | #define FOB_TRY_LOCK(_fob) \ |
102 | lck_mtx_try_lock(&(_fob)->fob_lock) |
103 | #define FOB_LOCK_ASSERT_HELD(_fob) \ |
104 | LCK_MTX_ASSERT(&(_fob)->fob_lock, LCK_MTX_ASSERT_OWNED) |
105 | #define FOB_LOCK_ASSERT_NOTHELD(_fob) \ |
106 | LCK_MTX_ASSERT(&(_fob)->fob_lock, LCK_MTX_ASSERT_NOTOWNED) |
107 | #define FOB_UNLOCK(_fob) \ |
108 | lck_mtx_unlock(&(_fob)->fob_lock) |
109 | |
110 | RB_HEAD(flow_entry_id_tree, flow_entry); |
111 | |
112 | #define FLOW_PROCESS_NAME_LENGTH 24 |
113 | |
114 | struct flow_owner { |
115 | RB_ENTRY(flow_owner) fo_link; |
116 | struct flow_entry_id_tree fo_flow_entry_id_head; |
117 | const struct flow_owner_bucket *fo_bucket; |
118 | void *fo_context; |
119 | pid_t fo_pid; |
120 | bool fo_nx_port_pid_bound; |
121 | bool fo_nx_port_destroyed; |
122 | bool fo_low_latency; |
123 | nexus_port_t fo_nx_port; |
124 | uuid_t fo_key; |
125 | |
126 | struct nexus_adapter * const fo_nx_port_na; |
127 | struct nx_flowswitch * const fo_fsw; |
128 | |
129 | /* |
130 | * Array of bitmaps to manage the flow advisory table indices. |
131 | * Currently we are restricting a flow owner to a single nexus |
132 | * port, so this structure is effectively managing the flow advisory |
133 | * indices for a port. |
134 | */ |
135 | bitmap_t *fo_flowadv_bmap; |
136 | uint32_t fo_flowadv_max; |
137 | uint32_t fo_num_flowadv; |
138 | |
139 | /* for debugging */ |
140 | char fo_name[FLOW_PROCESS_NAME_LENGTH]; |
141 | }; |
142 | |
143 | #define FO_BUCKET(_fo) \ |
144 | __DECONST(struct flow_owner_bucket *, (_fo)->fo_bucket) |
145 | |
146 | RB_PROTOTYPE_SC_PREV(__private_extern__, flow_owner_tree, flow_owner, |
147 | fo_link, fo_cmp); |
148 | RB_PROTOTYPE_SC_PREV(__private_extern__, flow_entry_id_tree, flow_entry, |
149 | fe_id_link, fe_id_cmp); |
150 | |
151 | typedef enum { |
152 | /* |
153 | * TCP states. |
154 | */ |
155 | FT_STATE_CLOSED = 0, /* closed */ |
156 | FT_STATE_LISTEN, /* listening for connection */ |
157 | FT_STATE_SYN_SENT, /* active, have sent SYN */ |
158 | FT_STATE_SYN_RECEIVED, /* have sent and rcvd SYN */ |
159 | FT_STATE_ESTABLISHED, /* established */ |
160 | FT_STATE_CLOSE_WAIT, /* rcvd FIN, waiting close */ |
161 | FT_STATE_FIN_WAIT_1, /* have sent FIN */ |
162 | FT_STATE_CLOSING, /* exchanged FINs, waiting FIN|ACK */ |
163 | FT_STATE_LAST_ACK, /* rcvd FIN, closed, waiting FIN|ACK */ |
164 | FT_STATE_FIN_WAIT_2, /* closed, FIN is ACK'd */ |
165 | FT_STATE_TIME_WAIT, /* quiet wait after close */ |
166 | |
167 | /* |
168 | * UDP states. |
169 | */ |
170 | FT_STATE_NO_TRAFFIC = 20, /* no packet observed */ |
171 | FT_STATE_SINGLE, /* single packet */ |
172 | FT_STATE_MULTIPLE, /* multiple packets */ |
173 | |
174 | FT_STATE_MAX = 255 |
175 | } flow_track_state_t; |
176 | |
177 | struct flow_track_rtt { |
178 | uint64_t frtt_timestamp; /* tracked segment timestamp */ |
179 | uint64_t frtt_last; /* previous net_uptime(rate limiting) */ |
180 | uint32_t frtt_seg_begin; /* tracked segment begin SEQ */ |
181 | uint32_t frtt_seg_end; /* tracked segment end SEQ */ |
182 | uint32_t frtt_usec; /* avg RTT in usec */ |
183 | }; |
184 | |
185 | #define FLOWTRACK_RTT_SAMPLE_INTERVAL 2 /* sample ACK RTT every 2 sec */ |
186 | |
187 | struct flow_track { |
188 | /* |
189 | * TCP specific tracking info. |
190 | */ |
191 | uint32_t fse_seqlo; /* max sequence number sent */ |
192 | uint32_t fse_seqhi; /* max the other end ACKd + win */ |
193 | uint32_t fse_seqlast; /* last sequence number (FIN) */ |
194 | uint16_t fse_max_win; /* largest window (pre scaling) */ |
195 | uint16_t fse_mss; /* maximum segment size option */ |
196 | uint8_t fse_state; /* active state level (FT_STATE_*) */ |
197 | uint8_t fse_wscale; /* window scaling factor */ |
198 | uint16_t fse_flags; /* FLOWSTATEF_* */ |
199 | uint32_t fse_syn_ts; /* SYN timestamp */ |
200 | uint32_t fse_syn_cnt; /* # of SYNs per second */ |
201 | |
202 | struct flow_track_rtt fse_rtt; /* ACK RTT tracking */ |
203 | #define fse_rtt_usec fse_rtt.frtt_usec |
204 | } __sk_aligned(8); |
205 | |
206 | /* valid values for fse_flags */ |
207 | #define FLOWSTATEF_WSCALE 0x1 /* fse_wscale is valid */ |
208 | |
209 | struct flow_llhdr { |
210 | uint32_t flh_gencnt; /* link-layer address gencnt */ |
211 | |
212 | const uint8_t flh_off; |
213 | const uint8_t flh_len; |
214 | uint16_t flh_pad; /* for future */ |
215 | |
216 | union _flh_u { |
217 | uint64_t _buf[2]; |
218 | struct { |
219 | uint16_t _eth_pad; |
220 | struct ether_header _eth; |
221 | } _eth_padded; |
222 | } __sk_aligned(8) _flh; |
223 | #define flh_eth_padded _flh._eth_padded |
224 | #define flh_eth _flh._eth_padded._eth |
225 | }; |
226 | |
227 | typedef enum { |
228 | FE_QSET_SELECT_NONE, |
229 | FE_QSET_SELECT_FIXED, |
230 | FE_QSET_SELECT_DYNAMIC |
231 | } flow_qset_select_t; |
232 | |
233 | extern kern_allocation_name_t skmem_tag_flow_demux; |
234 | typedef int (*flow_demux_memcmp_mask_t)(const uint8_t *src1, const uint8_t *src2, |
235 | const uint8_t *byte_mask); |
236 | |
237 | struct kern_flow_demux_pattern { |
238 | struct flow_demux_pattern fdp_demux_pattern; |
239 | flow_demux_memcmp_mask_t fdp_memcmp_mask; |
240 | }; |
241 | |
242 | #define MAX_PKT_DEMUX_LIMIT 1000 |
243 | |
244 | TAILQ_HEAD(flow_entry_list, flow_entry); |
245 | |
246 | #define FLOW_PROC_FLAG_GSO 0x0001 |
247 | typedef void (*flow_action_t)(struct nx_flowswitch *fsw, struct flow_entry *fe, |
248 | uint32_t flags); |
249 | |
250 | struct flow_entry { |
251 | /**** Common Group ****/ |
252 | os_refcnt_t fe_refcnt; |
253 | struct flow_key fe_key; |
254 | uint32_t fe_flags; |
255 | uint32_t fe_key_hash; |
256 | struct cuckoo_node fe_cnode; |
257 | |
258 | uuid_t fe_uuid __sk_aligned(8); |
259 | nexus_port_t fe_nx_port; |
260 | uint32_t fe_laddr_gencnt; |
261 | uint32_t fe_want_nonviable; |
262 | uint32_t fe_want_withdraw; |
263 | uint8_t fe_transport_protocol; |
264 | |
265 | /**** Rx Group ****/ |
266 | uint16_t fe_rx_frag_count; |
267 | uint32_t fe_rx_pktq_bytes; |
268 | struct pktq fe_rx_pktq; |
269 | TAILQ_ENTRY(flow_entry) fe_rx_link; |
270 | flow_action_t fe_rx_process; |
271 | |
272 | /* |
273 | * largest allocated packet size. |
274 | * used by: |
275 | * - mbuf batch allocation logic during RX aggregtion and netif copy. |
276 | * - packet allocation logic during RX aggregation. |
277 | */ |
278 | uint32_t fe_rx_largest_size; |
279 | |
280 | /**** Tx Group ****/ |
281 | bool fe_tx_is_cont_frag; |
282 | uint32_t fe_tx_frag_id; |
283 | struct pktq fe_tx_pktq; |
284 | TAILQ_ENTRY(flow_entry) fe_tx_link; |
285 | flow_action_t fe_tx_process; |
286 | |
287 | uuid_t fe_eproc_uuid __sk_aligned(8); |
288 | flowadv_idx_t fe_adv_idx; |
289 | kern_packet_svc_class_t fe_svc_class; |
290 | uint32_t fe_policy_id; /* policy id matched to flow */ |
291 | uint32_t fe_skip_policy_id; /* skip policy id matched to flow */ |
292 | |
293 | /**** Misc Group ****/ |
294 | struct nx_flowswitch * const fe_fsw; |
295 | struct ns_token *fe_port_reservation; |
296 | struct protons_token *fe_proto_reservation; |
297 | void *fe_ipsec_reservation; |
298 | |
299 | struct flow_track fe_ltrack; /* local endpoint state */ |
300 | struct flow_track fe_rtrack; /* remote endpoint state */ |
301 | |
302 | /* |
303 | * Flow stats are kept externally stand-alone, refcnt'ed by various |
304 | * users (e.g. flow_entry, necp_client_flow, etc.) |
305 | */ |
306 | struct flow_stats *fe_stats; |
307 | struct flow_route *fe_route; |
308 | |
309 | RB_ENTRY(flow_entry) fe_id_link; |
310 | |
311 | TAILQ_ENTRY(flow_entry) fe_linger_link; |
312 | uint64_t fe_linger_expire; /* expiration deadline */ |
313 | uint32_t fe_linger_wait; /* linger time (seconds) */ |
314 | |
315 | pid_t fe_pid; |
316 | pid_t fe_epid; |
317 | char fe_proc_name[FLOW_PROCESS_NAME_LENGTH]; |
318 | char fe_eproc_name[FLOW_PROCESS_NAME_LENGTH]; |
319 | |
320 | uint32_t fe_flowid; /* globally unique flow ID */ |
321 | |
322 | /* Logical link related information */ |
323 | struct netif_qset *fe_qset; |
324 | uint64_t fe_qset_id; |
325 | flow_qset_select_t fe_qset_select; |
326 | uint32_t fe_tr_genid; |
327 | |
328 | /* Parent child information */ |
329 | decl_lck_rw_data(, fe_child_list_lock); |
330 | struct flow_entry_list fe_child_list; |
331 | TAILQ_ENTRY(flow_entry) fe_child_link; |
332 | #if DEVELOPMENT || DEBUG |
333 | int16_t fe_child_count; |
334 | #endif // DEVELOPMENT || DEBUG |
335 | uint8_t fe_demux_pattern_count; |
336 | struct kern_flow_demux_pattern *fe_demux_patterns; |
337 | uint8_t *fe_demux_pkt_data; |
338 | }; |
339 | |
340 | /* valid values for fe_flags */ |
341 | #define FLOWENTF_INITED 0x00000001 /* {src,dst} states initialized */ |
342 | #define FLOWENTF_TRACK 0x00000010 /* enable state tracking */ |
343 | #define FLOWENTF_CONNECTED 0x00000020 /* connected mode */ |
344 | #define FLOWENTF_LISTENER 0x00000040 /* listener mode */ |
345 | #define FLOWENTF_QOS_MARKING 0x00000100 /* flow can have qos marking */ |
346 | #define FLOWENTF_LOW_LATENCY 0x00000200 /* low latency flow */ |
347 | #define FLOWENTF_WAIT_CLOSE 0x00001000 /* defer free after close */ |
348 | #define FLOWENTF_CLOSE_NOTIFY 0x00002000 /* notify NECP upon tear down */ |
349 | #define FLOWENTF_EXTRL_PORT 0x00004000 /* port reservation is held externally */ |
350 | #define FLOWENTF_EXTRL_PROTO 0x00008000 /* proto reservation is held externally */ |
351 | #define FLOWENTF_EXTRL_FLOWID 0x00010000 /* flowid reservation is held externally */ |
352 | #define FLOWENTF_CHILD 0x00020000 /* child flow */ |
353 | #define FLOWENTF_PARENT 0x00040000 /* parent flow */ |
354 | #define FLOWENTF_NOWAKEFROMSLEEP 0x00080000 /* don't wake for this flow */ |
355 | #define FLOWENTF_ABORTED 0x01000000 /* has sent RST to peer */ |
356 | #define FLOWENTF_NONVIABLE 0x02000000 /* disabled; awaiting tear down */ |
357 | #define FLOWENTF_WITHDRAWN 0x04000000 /* flow has been withdrawn */ |
358 | #define FLOWENTF_TORN_DOWN 0x08000000 /* torn down and awaiting destroy */ |
359 | #define FLOWENTF_HALF_CLOSED 0x10000000 /* flow is half closed */ |
360 | #define FLOWENTF_DESTROYED 0x40000000 /* not in RB trees anymore */ |
361 | #define FLOWENTF_LINGERING 0x80000000 /* destroyed and in linger list */ |
362 | |
363 | #define FLOWENTF_BITS \ |
364 | "\020\01INITED\05TRACK\06CONNECTED\07LISTNER\011QOS_MARKING" \ |
365 | "\012LOW_LATENCY\015WAIT_CLOSE\016CLOSE_NOTIFY\017EXT_PORT" \ |
366 | "\020EXT_PROTO\021EXT_FLOWID\031ABORTED\032NONVIABLE\033WITHDRAWN" \ |
367 | "\034TORN_DOWN\035HALF_CLOSED\037DESTROYED\40LINGERING" |
368 | |
369 | TAILQ_HEAD(flow_entry_linger_head, flow_entry); |
370 | |
371 | struct flow_entry_dead { |
372 | LIST_ENTRY(flow_entry_dead) fed_link; |
373 | |
374 | boolean_t fed_want_nonviable; |
375 | boolean_t fed_want_clonotify; |
376 | |
377 | /* rule (flow) UUID */ |
378 | union { |
379 | uint64_t fed_uuid_64[2]; |
380 | uint32_t fed_uuid_32[4]; |
381 | uuid_t fed_uuid; |
382 | } __sk_aligned(8); |
383 | }; |
384 | |
385 | /* |
386 | * Minimum refcnt for a flow route entry to be considered as idle. |
387 | */ |
388 | #define FLOW_ROUTE_MINREF 2 /* for the 2 RB trees */ |
389 | |
390 | struct flow_route { |
391 | RB_ENTRY(flow_route) fr_link; |
392 | RB_ENTRY(flow_route) fr_id_link; |
393 | |
394 | /* |
395 | * fr_laddr represents the local address that the system chooses |
396 | * for the foreign destination in fr_faddr. The flow entry that |
397 | * is referring to this flow route object may choose a different |
398 | * local address if it wishes. |
399 | * |
400 | * fr_gaddr represents the gateway address to reach the final |
401 | * foreign destination fr_faddr, valid only if the destination is |
402 | * not directly attached (FLOWRTF_GATEWAY is set). |
403 | * |
404 | * The use of sockaddr for storage is for convenience; the port |
405 | * value is not applicable for this object, as this is shared |
406 | * among flow entries. |
407 | */ |
408 | union sockaddr_in_4_6 fr_laddr; /* local IP address */ |
409 | union sockaddr_in_4_6 fr_faddr; /* remote IP address */ |
410 | #define fr_af fr_faddr.sa.sa_family |
411 | union sockaddr_in_4_6 fr_gaddr; /* gateway IP address */ |
412 | |
413 | struct flow_llhdr fr_llhdr; |
414 | #define fr_eth_padded fr_llhdr.flh_eth_padded |
415 | #define fr_eth fr_llhdr.flh_eth |
416 | |
417 | /* |
418 | * In flow_route_tree, we use the destination address as key. |
419 | * To speed up searches, we initialize fr_addr_key to the address |
420 | * portion of fr_faddr depending on the address family. |
421 | */ |
422 | void *fr_addr_key; |
423 | |
424 | /* flow route UUID */ |
425 | uuid_t fr_uuid __sk_aligned(8); |
426 | |
427 | /* |
428 | * fr_usecnt is updated atomically; incremented when a flow entry |
429 | * refers to this object and decremented otherwise. Periodically, |
430 | * the flowswitch instance garbage collects flow_route objects |
431 | * that aren't being referred to by any flow entries. |
432 | * |
433 | * fr_expire is set when fr_usecnt reaches its minimum count, and |
434 | * is cleared when it goes above the minimum count. |
435 | * |
436 | * The spin lock fr_reflock is used to serialize both. |
437 | */ |
438 | decl_lck_spin_data(, fr_reflock); |
439 | uint64_t fr_expire; |
440 | volatile uint32_t fr_usecnt; |
441 | |
442 | uint32_t fr_flags; |
443 | uint32_t fr_laddr_gencnt; /* local IP gencnt */ |
444 | uint32_t fr_addr_len; /* sizeof {in,in6}_addr */ |
445 | |
446 | volatile uint32_t fr_want_configure; |
447 | volatile uint32_t fr_want_probe; |
448 | |
449 | /* lock to serialize resolver */ |
450 | decl_lck_mtx_data(, fr_lock); |
451 | |
452 | /* |
453 | * fr_rt_dst is the route to final destination, and along with |
454 | * fr_rt_evhdlr_tag, they are used in route event registration. |
455 | * |
456 | * fr_rt_gw is valid only if FLOWRTF_GATEWAY is set. |
457 | */ |
458 | eventhandler_tag fr_rt_evhdlr_tag; |
459 | struct rtentry *fr_rt_dst; |
460 | struct rtentry *fr_rt_gw; |
461 | |
462 | /* nexus UUID */ |
463 | uuid_t fr_nx_uuid __sk_aligned(8); |
464 | |
465 | const struct flow_mgr *fr_mgr; |
466 | const struct flow_route_bucket *fr_frb; |
467 | const struct flow_route_id_bucket *fr_frib; |
468 | }; |
469 | |
470 | /* valid values for fr_flags */ |
471 | #define FLOWRTF_ATTACHED 0x00000001 /* attached to RB trees */ |
472 | #define FLOWRTF_ONLINK 0x00000010 /* dst directly on the link */ |
473 | #define FLOWRTF_GATEWAY 0x00000020 /* gw IP address is valid */ |
474 | #define FLOWRTF_RESOLVED 0x00000040 /* flow route is resolved */ |
475 | #define FLOWRTF_HAS_LLINFO 0x00000080 /* has dst link-layer address */ |
476 | #define FLOWRTF_DELETED 0x00000100 /* route has been deleted */ |
477 | #define FLOWRTF_DST_LL_MCAST 0x00000200 /* dst is link layer multicast */ |
478 | #define FLOWRTF_DST_LL_BCAST 0x00000400 /* dst is link layer broadcast */ |
479 | #define FLOWRTF_STABLE_ADDR 0x00000800 /* local address prefers stable */ |
480 | |
481 | #define FR_LOCK(_fr) \ |
482 | lck_mtx_lock(&(_fr)->fr_lock) |
483 | #define FR_TRY_LOCK(_fr) \ |
484 | lck_mtx_try_lock(&(_fr)->fr_lock) |
485 | #define FR_LOCK_ASSERT_HELD(_fr) \ |
486 | LCK_MTX_ASSERT(&(_fr)->fr_lock, LCK_MTX_ASSERT_OWNED) |
487 | #define FR_LOCK_ASSERT_NOTHELD(_fr) \ |
488 | LCK_MTX_ASSERT(&(_fr)->fr_lock, LCK_MTX_ASSERT_NOTOWNED) |
489 | #define FR_UNLOCK(_fr) \ |
490 | lck_mtx_unlock(&(_fr)->fr_lock) |
491 | |
492 | #define FLOWRT_UPD_ETH_DST(_fr, _addr) do { \ |
493 | bcopy((_addr), (_fr)->fr_eth.ether_dhost, ETHER_ADDR_LEN); \ |
494 | (_fr)->fr_flags &= ~(FLOWRTF_DST_LL_MCAST|FLOWRTF_DST_LL_BCAST);\ |
495 | if (ETHER_IS_MULTICAST(_addr)) { \ |
496 | if (_ether_cmp(etherbroadcastaddr, (_addr)) == 0) \ |
497 | (_fr)->fr_flags |= FLOWRTF_DST_LL_BCAST; \ |
498 | else \ |
499 | (_fr)->fr_flags |= FLOWRTF_DST_LL_MCAST; \ |
500 | } \ |
501 | } while (0) |
502 | |
503 | RB_HEAD(flow_route_tree, flow_route); |
504 | RB_PROTOTYPE_SC_PREV(__private_extern__, flow_route_tree, flow_route, |
505 | fr_link, fr_cmp); |
506 | |
507 | struct flow_route_bucket { |
508 | decl_lck_rw_data(, frb_lock); |
509 | struct flow_route_tree frb_head; |
510 | const uint32_t frb_idx; |
511 | }; |
512 | |
513 | #define FRB_WLOCK(_frb) \ |
514 | lck_rw_lock_exclusive(&(_frb)->frb_lock) |
515 | #define FRB_WLOCKTORLOCK(_frb) \ |
516 | lck_rw_lock_exclusive_to_shared(&(_frb)->frb_lock) |
517 | #define FRB_WTRYLOCK(_frb) \ |
518 | lck_rw_try_lock_exclusive(&(_frb)->frb_lock) |
519 | #define FRB_WUNLOCK(_frb) \ |
520 | lck_rw_unlock_exclusive(&(_frb)->frb_lock) |
521 | #define FRB_RLOCK(_frb) \ |
522 | lck_rw_lock_shared(&(_frb)->frb_lock) |
523 | #define FRB_RLOCKTOWLOCK(_frb) \ |
524 | lck_rw_lock_shared_to_exclusive(&(_frb)->frb_lock) |
525 | #define FRB_RTRYLOCK(_frb) \ |
526 | lck_rw_try_lock_shared(&(_frb)->frb_lock) |
527 | #define FRB_RUNLOCK(_frb) \ |
528 | lck_rw_unlock_shared(&(_frb)->frb_lock) |
529 | #define FRB_UNLOCK(_frb) \ |
530 | lck_rw_done(&(_frb)->frb_lock) |
531 | #define FRB_WLOCK_ASSERT_HELD(_frb) \ |
532 | LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_EXCLUSIVE) |
533 | #define FRB_RLOCK_ASSERT_HELD(_frb) \ |
534 | LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_SHARED) |
535 | #define FRB_LOCK_ASSERT_HELD(_frb) \ |
536 | LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_HELD) |
537 | |
538 | RB_HEAD(flow_route_id_tree, flow_route); |
539 | RB_PROTOTYPE_SC_PREV(__private_extern__, flow_route_id_tree, flow_route, |
540 | fr_id_link, fr_id_cmp); |
541 | |
542 | struct flow_route_id_bucket { |
543 | decl_lck_rw_data(, frib_lock); |
544 | struct flow_route_id_tree frib_head; |
545 | const uint32_t frib_idx; |
546 | }; |
547 | |
548 | #define FRIB_WLOCK(_frib) \ |
549 | lck_rw_lock_exclusive(&(_frib)->frib_lock) |
550 | #define FRIB_WLOCKTORLOCK(_frib) \ |
551 | lck_rw_lock_exclusive_to_shared(&(_frib)->frib_lock) |
552 | #define FRIB_WTRYLOCK(_frib) \ |
553 | lck_rw_try_lock_exclusive(&(_frib)->frib_lock) |
554 | #define FRIB_WUNLOCK(_frib) \ |
555 | lck_rw_unlock_exclusive(&(_frib)->frib_lock) |
556 | #define FRIB_RLOCK(_frib) \ |
557 | lck_rw_lock_shared(&(_frib)->frib_lock) |
558 | #define FRIB_RLOCKTOWLOCK(_frib) \ |
559 | lck_rw_lock_shared_to_exclusive(&(_frib)->frib_lock) |
560 | #define FRIB_RTRYLOCK(_frib) \ |
561 | lck_rw_try_lock_shared(&(_frib)->frib_lock) |
562 | #define FRIB_RUNLOCK(_frib) \ |
563 | lck_rw_unlock_shared(&(_frib)->frib_lock) |
564 | #define FRIB_UNLOCK(_frib) \ |
565 | lck_rw_done(&(_frib)->frib_lock) |
566 | #define FRIB_WLOCK_ASSERT_HELD(_frib) \ |
567 | LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_EXCLUSIVE) |
568 | #define FRIB_RLOCK_ASSERT_HELD(_frib) \ |
569 | LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_SHARED) |
570 | #define FRIB_LOCK_ASSERT_HELD(_frib) \ |
571 | LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_HELD) |
572 | |
573 | struct flow_mgr { |
574 | char fm_name[IFNAMSIZ]; |
575 | uuid_t fm_uuid; |
576 | RB_ENTRY(flow_mgr) fm_link; |
577 | |
578 | struct cuckoo_hashtable *fm_flow_table; |
579 | size_t fm_flow_hash_count[FKMASK_IDX_MAX]; /* # of flows with mask */ |
580 | uint16_t fm_flow_hash_masks[FKMASK_IDX_MAX]; |
581 | |
582 | void *fm_owner_buckets __sized_by(fm_owner_bucket_tot_sz); /* cache-aligned fob */ |
583 | const size_t fm_owner_buckets_cnt; /* total # of fobs */ |
584 | const size_t fm_owner_bucket_sz; /* size of each fob */ |
585 | const size_t fm_owner_bucket_tot_sz; /* allocated size of each fob */ |
586 | |
587 | void *fm_route_buckets __sized_by(fm_route_bucket_tot_sz); /* cache-aligned frb */ |
588 | const size_t fm_route_buckets_cnt; /* total # of frb */ |
589 | const size_t fm_route_bucket_sz; /* size of each frb */ |
590 | const size_t fm_route_bucket_tot_sz; /* allocated size of each frb */ |
591 | |
592 | void *fm_route_id_buckets __sized_by(fm_route_id_bucket_tot_sz); /* cache-aligned frib */ |
593 | const size_t fm_route_id_buckets_cnt; /* total # of frib */ |
594 | const size_t fm_route_id_bucket_sz; /* size of each frib */ |
595 | const size_t fm_route_id_bucket_tot_sz; /* allocated size of each frib */ |
596 | }; |
597 | |
598 | /* |
599 | * this func compare match with key; |
600 | * return values: |
601 | * 0 as long as @key(exact) matches what @match(wildcard) wants to match on. |
602 | * 1 when it doesn't match |
603 | */ |
604 | static inline int |
605 | flow_key_cmp(const struct flow_key *match, const struct flow_key *key) |
606 | { |
607 | #define FK_CMP(field, mask) \ |
608 | if ((match->fk_mask & mask) != 0) { \ |
609 | if ((key->fk_mask & mask) == 0) { \ |
610 | return 1; \ |
611 | } \ |
612 | int d = memcmp(&match->field, &key->field, sizeof(match->field)); \ |
613 | if (d != 0) { \ |
614 | return d; \ |
615 | } \ |
616 | } |
617 | |
618 | FK_CMP(fk_ipver, FKMASK_IPVER); |
619 | FK_CMP(fk_proto, FKMASK_PROTO); |
620 | FK_CMP(fk_src, FKMASK_SRC); |
621 | FK_CMP(fk_dst, FKMASK_DST); |
622 | FK_CMP(fk_sport, FKMASK_SPORT); |
623 | FK_CMP(fk_dport, FKMASK_DPORT); |
624 | |
625 | return 0; |
626 | } |
627 | |
628 | /* |
629 | * Similar to flow_key_cmp() except using memory compare with mask, |
630 | * done with SIMD instructions, if available for the platform. |
631 | */ |
632 | static inline int |
633 | flow_key_cmp_mask(const struct flow_key *match, |
634 | const struct flow_key *key, const struct flow_key *mask) |
635 | { |
636 | _CASSERT(FLOW_KEY_LEN == 48); |
637 | _CASSERT(FLOW_KEY_LEN == sizeof(struct flow_key)); |
638 | _CASSERT((sizeof(struct flow_entry) % 16) == 0); |
639 | _CASSERT((offsetof(struct flow_entry, fe_key) % 16) == 0); |
640 | |
641 | /* local variables are __bidi_indexable with -fbounds-safety */ |
642 | const struct flow_key *match_idx = match; |
643 | const struct flow_key *key_idx = key; |
644 | const struct flow_key *mask_idx = mask; |
645 | |
646 | return sk_memcmp_mask_48B(src1: (const uint8_t *)match_idx, |
647 | src2: (const uint8_t *)key_idx, byte_mask: (const uint8_t *)mask_idx); |
648 | } |
649 | |
650 | static inline uint32_t |
651 | flow_key_hash(const struct flow_key *key) |
652 | { |
653 | uint32_t hash = FK_HASH_SEED; |
654 | #define FK_HASH(field, mask) \ |
655 | if ((key->fk_mask & mask) != 0) { \ |
656 | hash = net_flowhash(&key->field, sizeof(key->field), hash); \ |
657 | } |
658 | |
659 | FK_HASH(fk_ipver, FKMASK_IPVER); |
660 | FK_HASH(fk_proto, FKMASK_PROTO); |
661 | FK_HASH(fk_src, FKMASK_SRC); |
662 | FK_HASH(fk_dst, FKMASK_DST); |
663 | FK_HASH(fk_sport, FKMASK_SPORT); |
664 | FK_HASH(fk_dport, FKMASK_DPORT); |
665 | |
666 | return hash; |
667 | } |
668 | |
669 | __attribute__((always_inline)) |
670 | static inline void |
671 | flow_key_unpack(const struct flow_key *key, union sockaddr_in_4_6 *laddr, |
672 | union sockaddr_in_4_6 *faddr, uint8_t *protocol) |
673 | { |
674 | *protocol = key->fk_proto; |
675 | if (key->fk_ipver == IPVERSION) { |
676 | laddr->sa.sa_family = AF_INET; |
677 | laddr->sin.sin_addr = key->fk_src4; |
678 | laddr->sin.sin_port = key->fk_sport; |
679 | faddr->sa.sa_family = AF_INET; |
680 | faddr->sin.sin_addr = key->fk_dst4; |
681 | faddr->sin.sin_port = key->fk_dport; |
682 | } else if (key->fk_ipver == IPV6_VERSION) { |
683 | laddr->sa.sa_family = AF_INET6; |
684 | laddr->sin6.sin6_addr = key->fk_src6; |
685 | laddr->sin6.sin6_port = key->fk_sport; |
686 | faddr->sa.sa_family = AF_INET6; |
687 | faddr->sin6.sin6_addr = key->fk_dst6; |
688 | faddr->sin6.sin6_port = key->fk_dport; |
689 | } |
690 | } |
691 | |
692 | __attribute__((always_inline)) |
693 | static inline int |
694 | flow_req2key(struct nx_flow_req *req, struct flow_key *key) |
695 | { |
696 | FLOW_KEY_CLEAR(key); |
697 | |
698 | if (req->nfr_saddr.sa.sa_family == AF_INET) { |
699 | key->fk_ipver = IPVERSION; |
700 | key->fk_proto = req->nfr_ip_protocol; |
701 | key->fk_mask |= FKMASK_PROTO; |
702 | if (sk_sa_has_addr(SA(&req->nfr_saddr))) { |
703 | key->fk_src4 = req->nfr_saddr.sin.sin_addr; |
704 | key->fk_mask |= (FKMASK_IPVER | FKMASK_SRC); |
705 | } |
706 | if (sk_sa_has_addr(SA(&req->nfr_daddr))) { |
707 | key->fk_dst4 = req->nfr_daddr.sin.sin_addr; |
708 | key->fk_mask |= (FKMASK_IPVER | FKMASK_DST); |
709 | } |
710 | if (sk_sa_has_port(SA(&req->nfr_saddr))) { |
711 | key->fk_sport = req->nfr_saddr.sin.sin_port; |
712 | key->fk_mask |= FKMASK_SPORT; |
713 | } |
714 | if (sk_sa_has_port(SA(&req->nfr_daddr))) { |
715 | key->fk_dport = req->nfr_daddr.sin.sin_port; |
716 | key->fk_mask |= FKMASK_DPORT; |
717 | } |
718 | } else if (req->nfr_saddr.sa.sa_family == AF_INET6) { |
719 | key->fk_ipver = IPV6_VERSION; |
720 | key->fk_proto = req->nfr_ip_protocol; |
721 | key->fk_mask |= FKMASK_PROTO; |
722 | if (sk_sa_has_addr(SA(&req->nfr_saddr))) { |
723 | key->fk_src6 = req->nfr_saddr.sin6.sin6_addr; |
724 | key->fk_mask |= (FKMASK_IPVER | FKMASK_SRC); |
725 | } |
726 | if (sk_sa_has_addr(SA(&req->nfr_daddr))) { |
727 | key->fk_dst6 = req->nfr_daddr.sin6.sin6_addr; |
728 | key->fk_mask |= (FKMASK_IPVER | FKMASK_DST); |
729 | } |
730 | if (sk_sa_has_port(SA(&req->nfr_saddr))) { |
731 | key->fk_sport = req->nfr_saddr.sin6.sin6_port; |
732 | key->fk_mask |= FKMASK_SPORT; |
733 | } |
734 | if (sk_sa_has_port(SA(&req->nfr_daddr))) { |
735 | key->fk_dport = req->nfr_daddr.sin6.sin6_port; |
736 | key->fk_mask |= FKMASK_DPORT; |
737 | } |
738 | } else { |
739 | SK_ERR("unknown AF %d" , req->nfr_saddr.sa.sa_family); |
740 | return ENOTSUP; |
741 | } |
742 | |
743 | switch (key->fk_mask) { |
744 | case FKMASK_5TUPLE: |
745 | case FKMASK_4TUPLE: |
746 | case FKMASK_3TUPLE: |
747 | case FKMASK_2TUPLE: |
748 | case FKMASK_IPFLOW3: |
749 | case FKMASK_IPFLOW2: |
750 | case FKMASK_IPFLOW1: |
751 | break; |
752 | default: |
753 | SK_ERR("unknown flow key mask 0x%04x" , key->fk_mask); |
754 | return ENOTSUP; |
755 | } |
756 | |
757 | return 0; |
758 | } |
759 | |
760 | __attribute__((always_inline)) |
761 | static inline void |
762 | flow_pkt2key(struct __kern_packet *pkt, boolean_t input, |
763 | struct flow_key *key) |
764 | { |
765 | struct __flow *flow = pkt->pkt_flow; |
766 | |
767 | FLOW_KEY_CLEAR(key); |
768 | |
769 | if (__improbable((pkt->pkt_qum_qflags & QUM_F_FLOW_CLASSIFIED) == 0)) { |
770 | return; |
771 | } |
772 | |
773 | ASSERT(flow->flow_l3._l3_ip_ver != 0); |
774 | |
775 | key->fk_ipver = flow->flow_l3._l3_ip_ver; |
776 | key->fk_proto = flow->flow_ip_proto; |
777 | if (input) { |
778 | if (flow->flow_ip_ver == IPVERSION) { |
779 | key->fk_src4 = flow->flow_ipv4_dst; |
780 | key->fk_sport = flow->flow_tcp_dst; |
781 | key->fk_dst4 = flow->flow_ipv4_src; |
782 | key->fk_dport = flow->flow_tcp_src; |
783 | } else { |
784 | key->fk_src6 = flow->flow_ipv6_dst; |
785 | key->fk_sport = flow->flow_tcp_dst; |
786 | key->fk_dst6 = flow->flow_ipv6_src; |
787 | key->fk_dport = flow->flow_tcp_src; |
788 | } |
789 | } else { |
790 | if (flow->flow_ip_ver == IPVERSION) { |
791 | key->fk_src4 = flow->flow_ipv4_src; |
792 | key->fk_sport = flow->flow_tcp_src; |
793 | key->fk_dst4 = flow->flow_ipv4_dst; |
794 | key->fk_dport = flow->flow_tcp_dst; |
795 | } else { |
796 | key->fk_src6 = flow->flow_ipv6_src; |
797 | key->fk_sport = flow->flow_tcp_src; |
798 | key->fk_dst6 = flow->flow_ipv6_dst; |
799 | key->fk_dport = flow->flow_tcp_dst; |
800 | } |
801 | } |
802 | } |
803 | |
804 | __attribute__((always_inline)) |
805 | static inline int |
806 | flow_ip_cmp(const void *a0, const void *b0, size_t alen) |
807 | { |
808 | struct flow_ip_addr *a = __DECONST(struct flow_ip_addr *, a0), |
809 | *b = __DECONST(struct flow_ip_addr *, b0); |
810 | |
811 | switch (alen) { |
812 | case sizeof(struct in_addr): |
813 | if (a->_addr32[0] > b->_addr32[0]) { |
814 | return 1; |
815 | } |
816 | if (a->_addr32[0] < b->_addr32[0]) { |
817 | return -1; |
818 | } |
819 | break; |
820 | |
821 | case sizeof(struct in6_addr): |
822 | if (a->_addr64[1] > b->_addr64[1]) { |
823 | return 1; |
824 | } |
825 | if (a->_addr64[1] < b->_addr64[1]) { |
826 | return -1; |
827 | } |
828 | if (a->_addr64[0] > b->_addr64[0]) { |
829 | return 1; |
830 | } |
831 | if (a->_addr64[0] < b->_addr64[0]) { |
832 | return -1; |
833 | } |
834 | break; |
835 | |
836 | default: |
837 | VERIFY(0); |
838 | /* NOTREACHED */ |
839 | __builtin_unreachable(); |
840 | } |
841 | return 0; |
842 | } |
843 | |
844 | __attribute__((always_inline)) |
845 | static inline struct flow_owner_bucket * |
846 | flow_mgr_get_fob_at_idx(struct flow_mgr *fm, uint32_t idx) |
847 | { |
848 | char *buckets = fm->fm_owner_buckets; |
849 | void *bucket = buckets + (idx * fm->fm_owner_bucket_sz); |
850 | return bucket; |
851 | } |
852 | |
853 | __attribute__((always_inline)) |
854 | static inline struct flow_route_bucket * |
855 | flow_mgr_get_frb_at_idx(struct flow_mgr *fm, uint32_t idx) |
856 | { |
857 | char *buckets = fm->fm_route_buckets; |
858 | void *bucket = buckets + (idx * fm->fm_route_bucket_sz); |
859 | return bucket; |
860 | } |
861 | |
862 | __attribute__((always_inline)) |
863 | static inline struct flow_route_id_bucket * |
864 | flow_mgr_get_frib_at_idx(struct flow_mgr *fm, uint32_t idx) |
865 | { |
866 | char *buckets = fm->fm_route_id_buckets; |
867 | void *bucket = buckets + (idx * fm->fm_route_id_bucket_sz); |
868 | return bucket; |
869 | } |
870 | |
871 | __attribute__((always_inline)) |
872 | static inline uint32_t |
873 | flow_mgr_get_fob_idx(struct flow_mgr *fm, |
874 | struct flow_owner_bucket *bkt) |
875 | { |
876 | ASSERT(((intptr_t)bkt - (intptr_t)fm->fm_owner_buckets) % |
877 | fm->fm_owner_bucket_sz == 0); |
878 | return (uint32_t)(((intptr_t)bkt - (intptr_t)fm->fm_owner_buckets) / |
879 | fm->fm_owner_bucket_sz); |
880 | } |
881 | |
882 | __attribute__((always_inline)) |
883 | static inline size_t |
884 | flow_mgr_get_num_flows(struct flow_mgr *mgr) |
885 | { |
886 | ASSERT(mgr->fm_flow_table != NULL); |
887 | return cuckoo_hashtable_entries(h: mgr->fm_flow_table); |
888 | } |
889 | |
890 | extern unsigned int sk_fo_size; |
891 | extern struct skmem_cache *sk_fo_cache; |
892 | |
893 | extern unsigned int sk_fe_size; |
894 | extern struct skmem_cache *sk_fe_cache; |
895 | |
896 | extern unsigned int sk_fab_size; |
897 | extern struct skmem_cache *sk_fab_cache; |
898 | |
899 | extern uint32_t flow_seed; |
900 | |
901 | extern struct skmem_cache *flow_route_cache; |
902 | extern struct skmem_cache *flow_stats_cache; |
903 | |
904 | __BEGIN_DECLS |
905 | |
906 | typedef void (*flow_route_ctor_fn_t)(void *arg, struct flow_route *); |
907 | typedef int (*flow_route_resolve_fn_t)(void *arg, struct flow_route *, |
908 | struct __kern_packet *); |
909 | |
910 | extern int flow_init(void); |
911 | extern void flow_fini(void); |
912 | |
913 | extern void flow_mgr_init(void); |
914 | extern void flow_mgr_fini(void); |
915 | extern struct flow_mgr *flow_mgr_find_lock(uuid_t); |
916 | extern void flow_mgr_unlock(void); |
917 | extern struct flow_mgr * flow_mgr_create(size_t, size_t, size_t, size_t); |
918 | extern void flow_mgr_destroy(struct flow_mgr *); |
919 | extern void flow_mgr_terminate(struct flow_mgr *); |
920 | extern int flow_mgr_flow_add(struct kern_nexus *nx, struct flow_mgr *fm, |
921 | struct flow_owner *fo, struct ifnet *ifp, struct nx_flow_req *req, |
922 | flow_route_ctor_fn_t fr_ctor, flow_route_resolve_fn_t fr_resolve, void *fr_arg); |
923 | extern struct flow_owner_bucket *flow_mgr_get_fob_by_pid( |
924 | struct flow_mgr *, pid_t); |
925 | extern struct flow_entry *flow_mgr_get_fe_by_uuid_rlock( |
926 | struct flow_mgr *, uuid_t); |
927 | extern struct flow_route_bucket *flow_mgr_get_frb_by_addr( |
928 | struct flow_mgr *, union sockaddr_in_4_6 *); |
929 | extern struct flow_route_id_bucket *flow_mgr_get_frib_by_uuid( |
930 | struct flow_mgr *, uuid_t); |
931 | extern int flow_mgr_flow_hash_mask_add(struct flow_mgr *fm, uint32_t mask); |
932 | extern int flow_mgr_flow_hash_mask_del(struct flow_mgr *fm, uint32_t mask); |
933 | |
934 | extern struct flow_entry * fe_alloc(boolean_t can_block); |
935 | |
936 | extern int flow_namespace_create(union sockaddr_in_4_6 *, uint8_t protocol, |
937 | netns_token *, uint16_t, struct ns_flow_info *); |
938 | extern void flow_namespace_half_close(netns_token *token); |
939 | extern void flow_namespace_withdraw(netns_token *); |
940 | extern void flow_namespace_destroy(netns_token *); |
941 | |
942 | extern struct flow_owner_bucket *flow_owner_buckets_alloc(size_t, size_t *, size_t *); |
943 | extern void flow_owner_buckets_free(struct flow_owner_bucket *, size_t); |
944 | extern void flow_owner_bucket_init(struct flow_owner_bucket *); |
945 | extern void flow_owner_bucket_destroy(struct flow_owner_bucket *); |
946 | extern void flow_owner_bucket_purge_all(struct flow_owner_bucket *); |
947 | extern void flow_owner_attach_nexus_port(struct flow_mgr *, boolean_t, |
948 | pid_t, nexus_port_t); |
949 | extern uint32_t flow_owner_detach_nexus_port(struct flow_mgr *, |
950 | boolean_t, pid_t, nexus_port_t, boolean_t); |
951 | extern struct flow_owner *flow_owner_alloc(struct flow_owner_bucket *, |
952 | struct proc *, nexus_port_t, bool, bool, struct nx_flowswitch*, |
953 | struct nexus_adapter *, void *, bool); |
954 | extern void flow_owner_free(struct flow_owner_bucket *, struct flow_owner *); |
955 | extern struct flow_entry *flow_owner_create_entry(struct flow_owner *, |
956 | struct nx_flow_req *, boolean_t, uint32_t, boolean_t, |
957 | struct flow_route *, int *); |
958 | extern int flow_owner_destroy_entry(struct flow_owner *, uuid_t, bool, void *); |
959 | extern struct flow_owner *flow_owner_find_by_pid(struct flow_owner_bucket *, |
960 | pid_t, void *, bool); |
961 | extern int flow_owner_flowadv_index_alloc(struct flow_owner *, flowadv_idx_t *); |
962 | extern void flow_owner_flowadv_index_free(struct flow_owner *, flowadv_idx_t); |
963 | extern uint32_t flow_owner_activate_nexus_port(struct flow_mgr *, |
964 | boolean_t, pid_t, nexus_port_t, struct nexus_adapter *, |
965 | na_activate_mode_t); |
966 | |
967 | extern struct flow_entry *flow_mgr_find_fe_by_key(struct flow_mgr *, |
968 | struct flow_key *); |
969 | extern struct flow_entry * flow_mgr_find_conflicting_fe(struct flow_mgr *fm, |
970 | struct flow_key *fe_key); |
971 | extern void flow_mgr_foreach_flow(struct flow_mgr *fm, |
972 | void (^flow_handler)(struct flow_entry *fe)); |
973 | extern struct flow_entry *flow_entry_find_by_uuid(struct flow_owner *, |
974 | uuid_t); |
975 | extern struct flow_entry * flow_entry_alloc(struct flow_owner *fo, |
976 | struct nx_flow_req *req, int *perr); |
977 | extern void flow_entry_teardown(struct flow_owner *, struct flow_entry *); |
978 | extern void flow_entry_destroy(struct flow_owner *, struct flow_entry *, bool, |
979 | void *); |
980 | extern void flow_entry_retain(struct flow_entry *fe); |
981 | extern void flow_entry_release(struct flow_entry **pfe); |
982 | extern uint32_t flow_entry_refcnt(struct flow_entry *fe); |
983 | extern bool rx_flow_demux_match(struct nx_flowswitch *, struct flow_entry *, struct __kern_packet *); |
984 | extern struct flow_entry *rx_lookup_child_flow(struct nx_flowswitch *fsw, |
985 | struct flow_entry *, struct __kern_packet *); |
986 | extern struct flow_entry *tx_lookup_child_flow(struct flow_entry *, uuid_t); |
987 | |
988 | extern struct flow_entry_dead *flow_entry_dead_alloc(zalloc_flags_t); |
989 | extern void flow_entry_dead_free(struct flow_entry_dead *); |
990 | |
991 | extern void flow_entry_stats_get(struct flow_entry *, struct sk_stats_flow *); |
992 | |
993 | extern int flow_pkt_classify(struct __kern_packet *pkt, struct ifnet *ifp, |
994 | sa_family_t af, bool input); |
995 | |
996 | extern void flow_track_stats(struct flow_entry *, uint64_t, uint64_t, |
997 | bool, bool); |
998 | extern int flow_pkt_track(struct flow_entry *, struct __kern_packet *, bool); |
999 | extern boolean_t flow_track_tcp_want_abort(struct flow_entry *); |
1000 | extern void flow_track_abort_tcp( struct flow_entry *fe, |
1001 | struct __kern_packet *in_pkt, struct __kern_packet *rst_pkt); |
1002 | extern void flow_track_abort_quic(struct flow_entry *fe, uint8_t *token); |
1003 | |
1004 | extern void fsw_host_rx(struct nx_flowswitch *, struct pktq *); |
1005 | extern void fsw_host_sendup(struct ifnet *, struct mbuf *, struct mbuf *, |
1006 | uint32_t, uint32_t); |
1007 | |
1008 | extern void flow_rx_agg_tcp(struct nx_flowswitch *fsw, struct flow_entry *fe, |
1009 | uint32_t flags); |
1010 | |
1011 | extern void flow_route_init(void); |
1012 | extern void flow_route_fini(void); |
1013 | extern struct flow_route_bucket *flow_route_buckets_alloc(size_t, size_t *, size_t *); |
1014 | extern void flow_route_buckets_free(struct flow_route_bucket *, size_t); |
1015 | extern void flow_route_bucket_init(struct flow_route_bucket *); |
1016 | extern void flow_route_bucket_destroy(struct flow_route_bucket *); |
1017 | extern void flow_route_bucket_purge_all(struct flow_route_bucket *); |
1018 | extern struct flow_route_id_bucket *flow_route_id_buckets_alloc(size_t, |
1019 | size_t *, size_t *); |
1020 | extern void flow_route_id_buckets_free(struct flow_route_id_bucket *, size_t); |
1021 | extern void flow_route_id_bucket_init(struct flow_route_id_bucket *); |
1022 | extern void flow_route_id_bucket_destroy(struct flow_route_id_bucket *); |
1023 | |
1024 | extern int flow_route_select_laddr(union sockaddr_in_4_6 *, |
1025 | union sockaddr_in_4_6 *, struct ifnet *, struct rtentry *, uint32_t *, int); |
1026 | extern int flow_route_find(struct kern_nexus *, struct flow_mgr *, |
1027 | struct ifnet *, struct nx_flow_req *, flow_route_ctor_fn_t, |
1028 | flow_route_resolve_fn_t, void *, struct flow_route **); |
1029 | extern int flow_route_configure(struct flow_route *, struct ifnet *, struct nx_flow_req *); |
1030 | extern void flow_route_retain(struct flow_route *); |
1031 | extern void flow_route_release(struct flow_route *); |
1032 | extern uint32_t flow_route_prune(struct flow_mgr *, struct ifnet *, |
1033 | uint32_t *); |
1034 | extern void flow_route_cleanup(struct flow_route *); |
1035 | extern boolean_t flow_route_laddr_validate(union sockaddr_in_4_6 *, |
1036 | struct ifnet *, uint32_t *); |
1037 | extern boolean_t flow_route_key_validate(struct flow_key *, struct ifnet *, |
1038 | uint32_t *); |
1039 | extern void flow_qset_select_dynamic(struct nx_flowswitch *, |
1040 | struct flow_entry *, boolean_t); |
1041 | extern void flow_stats_init(void); |
1042 | extern void flow_stats_fini(void); |
1043 | extern struct flow_stats *flow_stats_alloc(boolean_t cansleep); |
1044 | |
1045 | #if SK_LOG |
1046 | #define FLOWKEY_DBGBUF_SIZE 256 |
1047 | #define FLOWENTRY_DBGBUF_SIZE 512 |
1048 | extern char *fk_as_string(const struct flow_key *fk, char *, size_t); |
1049 | extern char *fe_as_string(const struct flow_entry *fe, char *, size_t); |
1050 | #endif /* SK_LOG */ |
1051 | __END_DECLS |
1052 | #endif /* BSD_KERNEL_PRIVATE */ |
1053 | #endif /* !_SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_ */ |
1054 | |