1 | /* |
2 | * Copyright (c) 2021 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | /* |
30 | * The flowidns (Flow ID namespace) module provides functionality to allocate |
31 | * globally unique identifier for a flow. |
32 | * Currently we have four modules (flowswitch, inpcb, PF & IPSec driver) in our |
33 | * stack which need to generate flow identifiers. These modules stamp every |
34 | * outgoing packet with a flowID. This flowID can be used by other upstream |
35 | * components in the device for flow classification purpose. For example, the |
36 | * FQ-Codel algorithm relies on this per packet flowID to avoid parsing every |
37 | * packet header for flow classification. A globally unique flowID can also be |
38 | * used by the networking feature offload engines operating at link layer to |
39 | * avoid flow classification operations. |
40 | * For performance reasons we use the concept of a flow domain and the |
41 | * data structures used by the flowidns module have per domain instance. |
42 | * These domains represent the above mentioned four modules generating the |
43 | * flowID. This allows us to avoid global lock being used while allocating & |
44 | * releasing flowID. FlowID is a 32-bit unsigned integer and the 2 most |
45 | * significant bits of flowID are used to encode the domain ID. This |
46 | * encoding also means that the flowID generator only needs to ensure |
47 | * uniqueness of identifier within a domain. |
48 | */ |
49 | |
50 | #include <skywalk/os_skywalk.h> |
51 | #include <skywalk/os_skywalk_private.h> |
52 | #include <skywalk/namespace/flowidns.h> |
53 | #include <dev/random/randomdev.h> |
54 | #include <sys/sdt.h> |
55 | |
56 | /* maximum number of flowID generation retries in case of collision */ |
57 | #define FLOWIDNS_MAX_FLOWID_GEN_RETRY 5 |
58 | |
59 | /* 2 most significant bits of the flowID are used to encode the flow domain */ |
60 | #define FLOWIDNS_FLOWID_DOMAIN_SHIFT 30 |
61 | #define FLOWIDNS_FLOWID_DOMAIN_MASK (0x03 << FLOWIDNS_FLOWID_DOMAIN_SHIFT) |
62 | |
63 | #define FLOWIDNS_FLOWID_SET_DOMAIN(_dom, _fid) do { \ |
64 | (_fid) &= ~FLOWIDNS_FLOWID_DOMAIN_MASK; \ |
65 | (_fid) |= ((_dom) << FLOWIDNS_FLOWID_DOMAIN_SHIFT); \ |
66 | } while (0) |
67 | |
68 | #define FLOWIDNS_FLOWID_GET_DOMAIN(_dom, _fid) do { \ |
69 | (_dom) = (_fid) >> FLOWIDNS_FLOWID_DOMAIN_SHIFT; \ |
70 | } while (0) |
71 | |
72 | #define FLOWIDNS_DOM_LOCK(_dom) \ |
73 | lck_mtx_lock(&(flowidns_domain_array[(_dom)].fd_mtx)) |
74 | #define FLOWIDNS_DOM_UNLOCK(_dom) \ |
75 | lck_mtx_unlock(&(flowidns_domain_array[(_dom)].fd_mtx)) |
76 | |
77 | struct flowidns_flowid_tree_node { |
78 | RB_ENTRY(flowidns_flowid_tree_node) fftn_link; |
79 | struct flowidns_flow_key fftn_flowkey; |
80 | flowidns_flowid_t fftn_flowid; |
81 | }; |
82 | |
83 | static LCK_GRP_DECLARE(flowidns_lock_group, "flowidns_lock" ); |
84 | static int __flowidns_inited = 0; |
85 | |
86 | static SKMEM_TYPE_DEFINE(flowidns_fftn_zone, struct flowidns_flowid_tree_node); |
87 | |
88 | __attribute__((always_inline)) |
89 | static inline int |
90 | fftn_cmp(const struct flowidns_flowid_tree_node *fftn1, |
91 | const struct flowidns_flowid_tree_node *fftn2) |
92 | { |
93 | return (signed)(fftn1->fftn_flowid - fftn2->fftn_flowid); |
94 | } |
95 | |
96 | RB_HEAD(flowidns_flowid_tree, flowidns_flowid_tree_node); |
97 | RB_PROTOTYPE(flowidns_flowid_tree, flowidns_flowid_tree_node, fftn_link, |
98 | fftn_cmp); |
99 | RB_GENERATE(flowidns_flowid_tree, flowidns_flowid_tree_node, fftn_link, |
100 | fftn_cmp); |
101 | |
102 | struct flowidns_domain { |
103 | decl_lck_mtx_data(, fd_mtx); |
104 | struct flowidns_flowid_tree fd_flowid_tree; |
105 | uint32_t fd_id; |
106 | uint64_t fd_nallocs; |
107 | uint64_t fd_nreleases; |
108 | uint64_t fd_ncollisions; |
109 | }; |
110 | |
111 | static struct flowidns_domain flowidns_domain_array[FLOWIDNS_DOMAIN_MAX + 1]; |
112 | |
113 | static struct flowidns_flowid_tree_node * |
114 | flowidns_fftn_alloc(bool can_block) |
115 | { |
116 | struct flowidns_flowid_tree_node *fftn = NULL; |
117 | zalloc_flags_t zflags; |
118 | |
119 | zflags = can_block ? Z_WAITOK_ZERO : Z_NOWAIT_ZERO; |
120 | fftn = zalloc_flags(flowidns_fftn_zone, zflags); |
121 | return fftn; |
122 | } |
123 | |
124 | static void |
125 | flowidns_fftn_free(struct flowidns_flowid_tree_node *fftn) |
126 | { |
127 | zfree(flowidns_fftn_zone, fftn); |
128 | } |
129 | |
130 | static struct flowidns_flowid_tree_node * |
131 | flowidns_find_fftn(flowidns_flowid_t flowid, flowidns_domain_id_t domain) |
132 | { |
133 | struct flowidns_flowid_tree_node find = { .fftn_flowid = flowid }; |
134 | |
135 | return RB_FIND(flowidns_flowid_tree, |
136 | &(flowidns_domain_array[domain].fd_flowid_tree), &find); |
137 | } |
138 | |
139 | void |
140 | flowidns_allocate_flowid(flowidns_domain_id_t domain, |
141 | struct flowidns_flow_key *pflow_key, flowidns_flowid_t *pflowid) |
142 | { |
143 | struct flowidns_flowid_tree_node *fftn = NULL, *dup = NULL; |
144 | uint32_t flowid = 0; |
145 | int retry_cnt = 0; |
146 | |
147 | VERIFY(__flowidns_inited == 1); |
148 | VERIFY(pflowid != NULL); |
149 | VERIFY(pflow_key != NULL); |
150 | VERIFY(domain >= FLOWIDNS_DOMAIN_MIN && |
151 | domain <= FLOWIDNS_DOMAIN_MAX); |
152 | |
153 | FLOWIDNS_DOM_LOCK(domain); |
154 | |
155 | fftn = flowidns_fftn_alloc(true); |
156 | if (__improbable(fftn == NULL)) { |
157 | panic_plain("failed to allocate flowid node\n" ); |
158 | } |
159 | retry: |
160 | /* try to get a non-zero flow identifier */ |
161 | do { |
162 | read_frandom(buffer: &flowid, numBytes: sizeof(flowid)); |
163 | } while (__improbable(flowid == 0)); |
164 | |
165 | FLOWIDNS_FLOWID_SET_DOMAIN(domain, flowid); |
166 | |
167 | fftn->fftn_flowid = flowid; |
168 | fftn->fftn_flowkey = *pflow_key; |
169 | dup = RB_INSERT(flowidns_flowid_tree, |
170 | &(flowidns_domain_array[domain].fd_flowid_tree), fftn); |
171 | |
172 | /* try to get a unique flow identifier */ |
173 | if (dup != NULL) { |
174 | retry_cnt++; |
175 | flowidns_domain_array[domain].fd_ncollisions++; |
176 | SK_ERR("duplicate flowid 0x%x generated, retrying %d" , |
177 | flowid, retry_cnt); |
178 | /* |
179 | * safeguard to check if we need a better hash strategy. |
180 | */ |
181 | VERIFY(retry_cnt <= FLOWIDNS_MAX_FLOWID_GEN_RETRY); |
182 | goto retry; |
183 | } |
184 | *pflowid = flowid; |
185 | flowidns_domain_array[domain].fd_nallocs++; |
186 | VERIFY(flowidns_domain_array[domain].fd_nallocs != 0); |
187 | |
188 | FLOWIDNS_DOM_UNLOCK(domain); |
189 | |
190 | DTRACE_SKYWALK2(fidalloc, uint32_t, domain, uint32_t, flowid); |
191 | } |
192 | |
193 | void |
194 | flowidns_release_flowid(flowidns_flowid_t flowid) |
195 | { |
196 | struct flowidns_flowid_tree_node *fftn; |
197 | flowidns_domain_id_t domain; |
198 | |
199 | VERIFY(__flowidns_inited == 1); |
200 | VERIFY(flowid != 0); |
201 | |
202 | FLOWIDNS_FLOWID_GET_DOMAIN(domain, flowid); |
203 | VERIFY(domain >= FLOWIDNS_DOMAIN_MIN && |
204 | domain <= FLOWIDNS_DOMAIN_MAX); |
205 | |
206 | DTRACE_SKYWALK2(fidrel, uint32_t, domain, uint32_t, flowid); |
207 | |
208 | FLOWIDNS_DOM_LOCK(domain); |
209 | |
210 | fftn = flowidns_find_fftn(flowid, domain); |
211 | if (fftn == NULL) { |
212 | panic_plain("flowid 0x%x not found in domain %d\n" , flowid, |
213 | domain); |
214 | } |
215 | RB_REMOVE(flowidns_flowid_tree, |
216 | &(flowidns_domain_array[domain].fd_flowid_tree), fftn); |
217 | ASSERT(fftn->fftn_flowid == flowid); |
218 | flowidns_fftn_free(fftn); |
219 | flowidns_domain_array[domain].fd_nreleases++; |
220 | VERIFY(flowidns_domain_array[domain].fd_nreleases != 0); |
221 | |
222 | FLOWIDNS_DOM_UNLOCK(domain); |
223 | } |
224 | |
225 | int |
226 | flowidns_init() |
227 | { |
228 | flowidns_domain_id_t domain; |
229 | |
230 | VERIFY(__flowidns_inited == 0); |
231 | _CASSERT(SFH_DOMAIN_IPSEC == FLOWIDNS_DOMAIN_IPSEC); |
232 | _CASSERT(SFH_DOMAIN_FLOWSWITCH == FLOWIDNS_DOMAIN_FLOWSWITCH); |
233 | _CASSERT(SFH_DOMAIN_INPCB == FLOWIDNS_DOMAIN_INPCB); |
234 | _CASSERT(SFH_DOMAIN_PF == FLOWIDNS_DOMAIN_PF); |
235 | _CASSERT(FLOWIDNS_DOMAIN_MIN == 0); |
236 | /* |
237 | * FLOWIDNS_FLOWID_DOMAIN_{MASK, SHIFT} macros are based on below |
238 | * assumption. |
239 | */ |
240 | _CASSERT(FLOWIDNS_DOMAIN_MAX == 3); |
241 | |
242 | for (domain = FLOWIDNS_DOMAIN_MIN; domain <= FLOWIDNS_DOMAIN_MAX; |
243 | domain++) { |
244 | bzero(s: &flowidns_domain_array[domain], |
245 | n: sizeof(struct flowidns_domain)); |
246 | flowidns_domain_array[domain].fd_id = domain; |
247 | lck_mtx_init(lck: &(flowidns_domain_array[domain].fd_mtx), |
248 | grp: &flowidns_lock_group, NULL); |
249 | RB_INIT(&(flowidns_domain_array[domain].fd_flowid_tree)); |
250 | } |
251 | |
252 | __flowidns_inited = 1; |
253 | SK_D("initialized flow ID namespace" ); |
254 | return 0; |
255 | } |
256 | |
257 | void |
258 | flowidns_fini(void) |
259 | { |
260 | flowidns_domain_id_t domain; |
261 | struct flowidns_flowid_tree_node *fftn, *fftn_tmp; |
262 | |
263 | VERIFY(__flowidns_inited == 1); |
264 | |
265 | for (domain = FLOWIDNS_DOMAIN_MIN; domain <= FLOWIDNS_DOMAIN_MAX; |
266 | domain++) { |
267 | FLOWIDNS_DOM_LOCK(domain); |
268 | |
269 | RB_FOREACH_SAFE(fftn, flowidns_flowid_tree, |
270 | &(flowidns_domain_array[domain].fd_flowid_tree), |
271 | fftn_tmp) { |
272 | RB_REMOVE(flowidns_flowid_tree, |
273 | &(flowidns_domain_array[domain].fd_flowid_tree), |
274 | fftn); |
275 | flowidns_fftn_free(fftn); |
276 | } |
277 | |
278 | FLOWIDNS_DOM_UNLOCK(domain); |
279 | |
280 | lck_mtx_destroy(lck: &(flowidns_domain_array[domain].fd_mtx), |
281 | grp: &flowidns_lock_group); |
282 | } |
283 | |
284 | __flowidns_inited = 0; |
285 | } |
286 | |
287 | static int flowidns_stats_sysctl SYSCTL_HANDLER_ARGS; |
288 | SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, flowidns, |
289 | CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, |
290 | 0, 0, flowidns_stats_sysctl, "-" , |
291 | "flowid allocations (struct sk_stats_flowidns_header, " |
292 | "skywalk/os_stats_private.h)" ); |
293 | |
294 | static int |
295 | flowidns_dump_domain(struct sysctl_req *req, struct flowidns_domain *domain) |
296 | { |
297 | struct flowidns_flowid_tree_node *fftn; |
298 | struct sk_stats_flowidns_header ; |
299 | struct sk_stats_flowidns_record record; |
300 | uint64_t n_records; |
301 | int err; |
302 | |
303 | /* Fill out header */ |
304 | memset(s: &header, c: 0, n: sizeof(header)); |
305 | header.sfh_domain = domain->fd_id; |
306 | header.sfh_nallocs = domain->fd_nallocs; |
307 | header.sfh_nreleases = domain->fd_nreleases; |
308 | header.sfh_ncollisions = domain->fd_ncollisions; |
309 | n_records = domain->fd_nallocs - domain->fd_nreleases; |
310 | VERIFY(n_records <= UINT32_MAX); |
311 | header.sfh_nrecords = (uint32_t)n_records; |
312 | |
313 | err = SYSCTL_OUT(req, &header, sizeof(header)); |
314 | if (err) { |
315 | return err; |
316 | } |
317 | |
318 | /* Fill out records */ |
319 | RB_FOREACH(fftn, flowidns_flowid_tree, &domain->fd_flowid_tree) { |
320 | VERIFY(n_records > 0); |
321 | n_records--; |
322 | bzero(s: &record, n: sizeof(record)); |
323 | record.sfr_flowid = fftn->fftn_flowid; |
324 | record.sfr_af = fftn->fftn_flowkey.ffk_af; |
325 | record.sfr_ipproto = fftn->fftn_flowkey.ffk_proto; |
326 | record.sfr_protoid = fftn->fftn_flowkey.ffk_protoid; |
327 | _CASSERT(sizeof(fftn->fftn_flowkey.ffk_laddr) == |
328 | sizeof(record.sfr_laddr)); |
329 | _CASSERT(sizeof(fftn->fftn_flowkey.ffk_raddr) == |
330 | sizeof(record.sfr_raddr)); |
331 | bcopy(src: &(fftn->fftn_flowkey.ffk_laddr), dst: &record.sfr_laddr, |
332 | n: sizeof(record.sfr_laddr)); |
333 | bcopy(src: &(fftn->fftn_flowkey.ffk_raddr), dst: &record.sfr_raddr, |
334 | n: sizeof(record.sfr_raddr)); |
335 | |
336 | err = SYSCTL_OUT(req, &record, sizeof(record)); |
337 | if (err) { |
338 | return err; |
339 | } |
340 | } |
341 | VERIFY(n_records == 0); |
342 | return 0; |
343 | } |
344 | |
345 | static int |
346 | flowidns_stats_sysctl SYSCTL_HANDLER_ARGS |
347 | { |
348 | #pragma unused(oidp, arg1, arg2) |
349 | flowidns_domain_id_t domain; |
350 | int err = 0; |
351 | |
352 | if (!kauth_cred_issuser(cred: kauth_cred_get())) { |
353 | return EPERM; |
354 | } |
355 | |
356 | if (__flowidns_inited == 0) { |
357 | return ENOTSUP; |
358 | } |
359 | |
360 | net_update_uptime(); |
361 | |
362 | for (domain = FLOWIDNS_DOMAIN_MIN; domain <= FLOWIDNS_DOMAIN_MAX; |
363 | domain++) { |
364 | FLOWIDNS_DOM_LOCK(domain); |
365 | err = flowidns_dump_domain(req, domain: &flowidns_domain_array[domain]); |
366 | FLOWIDNS_DOM_UNLOCK(domain); |
367 | if (err != 0) { |
368 | return err; |
369 | } |
370 | } |
371 | /* |
372 | * If this is just a request for length, add slop because |
373 | * this is dynamically changing data |
374 | */ |
375 | if (req->oldptr == USER_ADDR_NULL) { |
376 | req->oldidx += 20 * sizeof(struct sk_stats_flowidns_record); |
377 | } |
378 | return err; |
379 | } |
380 | |