1/*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <skywalk/os_skywalk_private.h>
30#include <skywalk/nexus/flowswitch/fsw_var.h>
31#include <skywalk/nexus/flowswitch/flow/flow_var.h>
32
33static uint32_t flow_owner_bucket_purge_common(struct flow_owner_bucket *,
34 nexus_port_t, boolean_t);
35static int fo_cmp(const struct flow_owner *, const struct flow_owner *);
36static struct flow_owner *fo_alloc(boolean_t);
37static void fo_free(struct flow_owner *);
38
39static LCK_GRP_DECLARE(flow_owner_lock_group, "sk_flow_owner_lock");
40static LCK_ATTR_DECLARE(flow_owner_lock_attr, 0, 0);
41
42RB_GENERATE_PREV(flow_owner_tree, flow_owner, fo_link, fo_cmp);
43
44KALLOC_TYPE_VAR_DEFINE(KT_SK_FOB, struct flow_owner_bucket, KT_DEFAULT);
45
46struct flow_owner_bucket *
47flow_owner_buckets_alloc(size_t fob_cnt, size_t *fob_sz, size_t *tot_sz)
48{
49 size_t cache_sz = skmem_cpu_cache_line_size();
50 struct flow_owner_bucket *fob;
51 size_t fob_tot_sz;
52
53 /* each bucket is CPU cache-aligned */
54 *fob_sz = P2ROUNDUP(sizeof(*fob), cache_sz);
55 *tot_sz = fob_tot_sz = fob_cnt * (*fob_sz);
56 fob = sk_alloc_type_hash(KT_SK_FOB, fob_tot_sz, Z_WAITOK, skmem_tag_fsw_fob_hash);
57 if (__improbable(fob == NULL)) {
58 return NULL;
59 }
60
61#if !KASAN_CLASSIC
62 /*
63 * except in KASAN_CLASSIC mode, kalloc will always maintain cacheline
64 * size alignment if the requested size is a multiple of a cacheline
65 * size (this is true for any size that is a power of two from 16 to
66 * PAGE_SIZE).
67 *
68 * Because this is an optimization only, it is OK to leave KASAN_CLASSIC
69 * not respect this.
70 */
71 ASSERT(IS_P2ALIGNED(fob, cache_sz));
72#endif
73
74 SK_DF(SK_VERB_MEM, "fob 0x%llx fob_cnt %zu fob_sz %zu "
75 "(total %zu bytes) ALLOC", SK_KVA(fob), fob_cnt,
76 *fob_sz, fob_tot_sz);
77
78 return fob;
79}
80
81void
82flow_owner_buckets_free(struct flow_owner_bucket *fob, size_t tot_sz)
83{
84 SK_DF(SK_VERB_MEM, "fob 0x%llx FREE", SK_KVA(fob));
85 sk_free_type_hash(KT_SK_FOB, tot_sz, fob);
86}
87
88void
89flow_owner_bucket_init(struct flow_owner_bucket *fob)
90{
91#if !KASAN_CLASSIC
92 ASSERT(IS_P2ALIGNED(fob, skmem_cpu_cache_line_size()));
93#endif /* !KASAN_CLASSIC */
94 lck_mtx_init(lck: &fob->fob_lock, grp: &flow_owner_lock_group,
95 attr: &flow_owner_lock_attr);
96 RB_INIT(&fob->fob_owner_head);
97}
98
99void
100flow_owner_bucket_destroy(struct flow_owner_bucket *fob)
101{
102 /*
103 * In the event we are called as part of the nexus destructor,
104 * we need to wait until all threads have exited the flow close
105 * critical section, and that the flow_owner_bucket is empty.
106 * By the time we get here, the module initiating the request
107 * (e.g. NECP) has been quiesced, so any flow open requests would
108 * have been rejected.
109 */
110 FOB_LOCK(fob);
111 while (!RB_EMPTY(&fob->fob_owner_head)) {
112 SK_ERR("waiting for fob 0x%llx to go idle", SK_KVA(fob));
113 if (++(fob->fob_dtor_waiters) == 0) { /* wraparound */
114 fob->fob_dtor_waiters++;
115 }
116 (void) msleep(chan: &fob->fob_dtor_waiters, mtx: &fob->fob_lock,
117 pri: (PZERO - 1), wmesg: __FUNCTION__, NULL);
118 }
119 while (fob->fob_busy_flags & FOBF_CLOSE_BUSY) {
120 if (++(fob->fob_close_waiters) == 0) { /* wraparound */
121 fob->fob_close_waiters++;
122 }
123 (void) msleep(chan: &fob->fob_close_waiters, mtx: &fob->fob_lock,
124 pri: (PZERO - 1), wmesg: __FUNCTION__, NULL);
125 }
126 ASSERT(RB_EMPTY(&fob->fob_owner_head));
127 ASSERT(!(fob->fob_busy_flags & FOBF_OPEN_BUSY));
128 ASSERT(!(fob->fob_busy_flags & FOBF_CLOSE_BUSY));
129 FOB_UNLOCK(fob);
130 lck_mtx_destroy(lck: &fob->fob_lock, grp: &flow_owner_lock_group);
131}
132
133static uint32_t
134flow_owner_bucket_purge_common(struct flow_owner_bucket *fob,
135 nexus_port_t nx_port, boolean_t if_idle)
136{
137 /* called by flow_owner_bucket_purge_all()? */
138 boolean_t locked = (nx_port == NEXUS_PORT_ANY);
139 struct flow_owner *fo, *tfo;
140 struct flow_entry *fe, *tfe;
141 uint32_t cnt = 0;
142
143 if (!locked) {
144 FOB_LOCK(fob);
145 }
146 FOB_LOCK_ASSERT_HELD(fob);
147
148 RB_FOREACH_SAFE(fo, flow_owner_tree, &fob->fob_owner_head, tfo) {
149 if (fo->fo_nx_port != nx_port && nx_port != NEXUS_PORT_ANY) {
150 continue;
151 }
152
153 if (!if_idle || nx_port == NEXUS_PORT_ANY) {
154 RB_FOREACH_SAFE(fe, flow_entry_id_tree,
155 &fo->fo_flow_entry_id_head, tfe) {
156 ASSERT(fe->fe_nx_port == fo->fo_nx_port);
157 flow_entry_retain(fe);
158 flow_entry_destroy(fo, fe, FALSE, NULL);
159 }
160 }
161
162 ASSERT(nx_port != NEXUS_PORT_ANY ||
163 RB_EMPTY(&fo->fo_flow_entry_id_head));
164
165 if (RB_EMPTY(&fo->fo_flow_entry_id_head)) {
166 flow_owner_free(fob, fo);
167 ++cnt;
168 } else if (nx_port != NEXUS_PORT_ANY) {
169 /* let ms_flow_unbind() know this port is gone */
170 fo->fo_nx_port_destroyed = TRUE;
171 VERIFY(fo->fo_nx_port_na == NULL);
172 }
173 }
174
175 if (!locked) {
176 FOB_UNLOCK(fob);
177 }
178
179 return cnt;
180}
181
182void
183flow_owner_bucket_purge_all(struct flow_owner_bucket *fob)
184{
185 (void) flow_owner_bucket_purge_common(fob, NEXUS_PORT_ANY, TRUE);
186}
187
188static uint32_t
189flow_owner_bucket_activate_nx_port_common(struct flow_owner_bucket *fob,
190 nexus_port_t nx_port, struct nexus_adapter *nx_port_na,
191 na_activate_mode_t mode)
192{
193 struct flow_owner *fo;
194 struct flow_entry *fe;
195 uint32_t cnt = 0;
196
197 VERIFY(nx_port != NEXUS_PORT_ANY);
198 FOB_LOCK(fob);
199
200 RB_FOREACH(fo, flow_owner_tree, &fob->fob_owner_head) {
201 if (fo->fo_nx_port_destroyed || (fo->fo_nx_port != nx_port)) {
202 continue;
203 }
204
205 if (mode == NA_ACTIVATE_MODE_ON) {
206 VERIFY(fo->fo_nx_port_na == NULL);
207 *(struct nexus_adapter **)(uintptr_t)&fo->fo_nx_port_na = nx_port_na;
208 }
209
210 RB_FOREACH(fe, flow_entry_id_tree,
211 &fo->fo_flow_entry_id_head) {
212 if (fe->fe_flags & FLOWENTF_TORN_DOWN) {
213 continue;
214 }
215 VERIFY(fe->fe_nx_port == fo->fo_nx_port);
216 if (fe->fe_adv_idx != FLOWADV_IDX_NONE) {
217 if (mode == NA_ACTIVATE_MODE_ON) {
218 na_flowadv_entry_alloc(
219 fo->fo_nx_port_na, fe->fe_uuid,
220 fe->fe_adv_idx, fe->fe_flowid);
221 } else if (fo->fo_nx_port_na != NULL) {
222 na_flowadv_entry_free(fo->fo_nx_port_na,
223 fe->fe_uuid, fe->fe_adv_idx,
224 fe->fe_flowid);
225 }
226 }
227 }
228
229 if (mode != NA_ACTIVATE_MODE_ON && fo->fo_nx_port_na != NULL) {
230 *(struct nexus_adapter **)(uintptr_t)&fo->fo_nx_port_na = NULL;
231 }
232
233 ++cnt;
234 }
235
236 FOB_UNLOCK(fob);
237 return cnt;
238}
239
240uint32_t
241flow_owner_activate_nexus_port(struct flow_mgr *fm,
242 boolean_t pid_bound, pid_t pid, nexus_port_t nx_port,
243 struct nexus_adapter *nx_port_na, na_activate_mode_t mode)
244{
245 struct flow_owner_bucket *fob;
246 uint32_t fo_cnt = 0;
247
248 VERIFY(nx_port != NEXUS_PORT_ANY);
249 VERIFY(nx_port_na != NULL);
250
251 if (pid_bound) {
252 fob = flow_mgr_get_fob_by_pid(fm, pid);
253 fo_cnt = flow_owner_bucket_activate_nx_port_common(fob, nx_port,
254 nx_port_na, mode);
255 } else {
256 uint32_t i;
257 /*
258 * Otherwise, this can get expensive since we need to search
259 * thru all proc-mapping buckets to find the flows that are
260 * related to this nexus port.
261 */
262 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
263 fob = flow_mgr_get_fob_at_idx(fm, idx: i);
264 fo_cnt += flow_owner_bucket_activate_nx_port_common(fob,
265 nx_port, nx_port_na, mode);
266 }
267 }
268 /* There shouldn't be more than one flow owners on a nexus port */
269 VERIFY(fo_cnt <= 1);
270 return fo_cnt;
271}
272
273static void
274flow_owner_bucket_attach_common(struct flow_owner_bucket *fob,
275 nexus_port_t nx_port)
276{
277 struct flow_owner *fo;
278
279 VERIFY(nx_port != NEXUS_PORT_ANY);
280 FOB_LOCK(fob);
281
282 RB_FOREACH(fo, flow_owner_tree, &fob->fob_owner_head) {
283 if (fo->fo_nx_port_destroyed && (fo->fo_nx_port == nx_port)) {
284 fo->fo_nx_port_destroyed = FALSE;
285 }
286 }
287
288 FOB_UNLOCK(fob);
289}
290
291void
292flow_owner_attach_nexus_port(struct flow_mgr *fm, boolean_t pid_bound,
293 pid_t pid, nexus_port_t nx_port)
294{
295 struct flow_owner_bucket *fob;
296 ASSERT(nx_port != NEXUS_PORT_ANY);
297
298 if (pid_bound) {
299 fob = flow_mgr_get_fob_by_pid(fm, pid);
300 flow_owner_bucket_attach_common(fob, nx_port);
301 } else {
302 uint32_t i;
303 /*
304 * Otherwise, this can get expensive since we need to search
305 * thru all proc-mapping buckets to find the flows that are
306 * related to this nexus port.
307 */
308 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
309 fob = flow_mgr_get_fob_at_idx(fm, idx: i);
310 flow_owner_bucket_attach_common(fob, nx_port);
311 }
312 }
313}
314
315uint32_t
316flow_owner_detach_nexus_port(struct flow_mgr *fm, boolean_t pid_bound,
317 pid_t pid, nexus_port_t nx_port, boolean_t if_idle)
318{
319 struct flow_owner_bucket *fob;
320 uint32_t purged = 0;
321 ASSERT(nx_port != NEXUS_PORT_ANY);
322
323 if (pid_bound) {
324 fob = flow_mgr_get_fob_by_pid(fm, pid);
325 purged = flow_owner_bucket_purge_common(fob, nx_port, if_idle);
326 } else {
327 uint32_t i;
328 /*
329 * Otherwise, this can get expensive since we need to search
330 * thru all proc-mapping buckets to find the flows that are
331 * related to this nexus port.
332 */
333 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
334 fob = flow_mgr_get_fob_at_idx(fm, idx: i);
335 purged += flow_owner_bucket_purge_common(fob,
336 nx_port, if_idle);
337 }
338 }
339 return purged;
340}
341
342/* 64-bit mask with range */
343#define FO_BMASK64(_beg, _end) \
344 ((((uint64_t)0xffffffffffffffff) >> \
345 (63 - (_end))) & ~((1ULL << (_beg)) - 1))
346
347struct flow_owner *
348flow_owner_alloc(struct flow_owner_bucket *fob, struct proc *p,
349 nexus_port_t nx_port, bool nx_port_pid_bound, bool flowadv,
350 struct nx_flowswitch *fsw, struct nexus_adapter *nx_port_na,
351 void *context, bool low_latency)
352{
353 struct flow_owner *fo;
354 const pid_t pid = proc_pid(p);
355
356 _CASSERT(true == 1);
357 _CASSERT(false == 0);
358 ASSERT(low_latency == true || low_latency == false);
359 ASSERT(nx_port != NEXUS_PORT_ANY);
360 FOB_LOCK_ASSERT_HELD(fob);
361
362#if DEBUG
363 ASSERT(flow_owner_find_by_pid(fob, pid, context, low_latency) == NULL);
364 RB_FOREACH(fo, flow_owner_tree, &fob->fob_owner_head) {
365 if (!fo->fo_nx_port_destroyed && (fo->fo_nx_port == nx_port)) {
366 VERIFY(0);
367 /* NOTREACHED */
368 __builtin_unreachable();
369 }
370 }
371#endif /* DEBUG */
372
373 fo = fo_alloc(TRUE);
374 if (fo != NULL) {
375 if (flowadv) {
376 uint32_t i;
377
378 if ((fo->fo_flowadv_bmap =
379 skmem_cache_alloc(sk_fab_cache, SKMEM_SLEEP)) == NULL) {
380 SK_ERR("failed to alloc flow advisory bitmap");
381 fo_free(fo);
382 return NULL;
383 }
384 bzero(s: fo->fo_flowadv_bmap, n: sk_fab_size);
385 fo->fo_flowadv_max = sk_max_flows;
386
387 /* set the bits for free indices */
388 for (i = 0; i < sk_fadv_nchunks; i++) {
389 uint32_t end = 63;
390
391 if (i == (sk_fadv_nchunks - 1)) {
392 end = ((sk_max_flows - 1) %
393 FO_FLOWADV_CHUNK);
394 }
395
396 fo->fo_flowadv_bmap[i] = FO_BMASK64(0, end);
397 }
398 }
399 RB_INIT(&fo->fo_flow_entry_id_head);
400 /* const override */
401 *(struct flow_owner_bucket **)(uintptr_t)&fo->fo_bucket = fob;
402 fo->fo_context = context;
403 fo->fo_pid = pid;
404 (void) snprintf(fo->fo_name, count: sizeof(fo->fo_name), "%s",
405 proc_name_address(p));
406 fo->fo_nx_port_pid_bound = nx_port_pid_bound;
407 fo->fo_low_latency = low_latency;
408 fo->fo_nx_port = nx_port;
409 *(struct nexus_adapter **)(uintptr_t)&fo->fo_nx_port_na = nx_port_na;
410 *(struct nx_flowswitch **)(uintptr_t)&fo->fo_fsw = fsw;
411 RB_INSERT(flow_owner_tree, &fob->fob_owner_head, fo);
412
413 SK_DF(SK_VERB_FLOW, "%s(%d) fob 0x%llx added fo 0x%llx "
414 "nx_port %d nx_port_pid_bound %d ll %d nx_port_na 0x%llx",
415 fo->fo_name, fo->fo_pid, SK_KVA(fob), SK_KVA(fo),
416 (int)nx_port, nx_port_pid_bound, fo->fo_low_latency,
417 SK_KVA(nx_port_na));
418 }
419
420 return fo;
421}
422
423void
424flow_owner_free(struct flow_owner_bucket *fob, struct flow_owner *fo)
425{
426 FOB_LOCK_ASSERT_HELD(fob);
427
428 ASSERT(fo->fo_bucket == fob);
429 *(struct flow_owner_bucket **)(uintptr_t)&fo->fo_bucket = NULL;
430 RB_REMOVE(flow_owner_tree, &fob->fob_owner_head, fo);
431
432 ASSERT(fo->fo_num_flowadv == 0);
433 skmem_cache_free(sk_fab_cache, fo->fo_flowadv_bmap);
434 fo->fo_flowadv_bmap = NULL;
435
436 /* wake up any thread blocked in flow_owner_bucket_destroy() */
437 if (RB_EMPTY(&fob->fob_owner_head) && fob->fob_dtor_waiters > 0) {
438 fob->fob_dtor_waiters = 0;
439 wakeup(chan: &fob->fob_dtor_waiters);
440 }
441
442 SK_DF(SK_VERB_FLOW, "%s(%d) fob 0x%llx removed fo 0x%llx nx_port %d",
443 fo->fo_name, fo->fo_pid, SK_KVA(fob), SK_KVA(fo),
444 (int)fo->fo_nx_port);
445
446 fo_free(fo);
447}
448
449int
450flow_owner_flowadv_index_alloc(struct flow_owner *fo, flowadv_idx_t *fadv_idx)
451{
452 bitmap_t *bmap = fo->fo_flowadv_bmap;
453 size_t nchunks, i, j, idx = FLOWADV_IDX_NONE;
454
455 FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
456 ASSERT(fo->fo_flowadv_max != 0);
457
458 nchunks = P2ROUNDUP(fo->fo_flowadv_max, FO_FLOWADV_CHUNK) /
459 FO_FLOWADV_CHUNK;
460
461 for (i = 0; i < nchunks; i++) {
462 j = ffsll(bmap[i]);
463 if (j == 0) {
464 /* All indices in this chunk are in use */
465 continue;
466 }
467 --j;
468 /* mark the index as in use */
469 bit_clear(bmap[i], j);
470 idx = (i * FO_FLOWADV_CHUNK) + j;
471 break;
472 }
473
474 if (idx == FLOWADV_IDX_NONE) {
475 SK_ERR("%s(%d) flow advisory table full: num %u max %u",
476 fo->fo_name, fo->fo_pid, fo->fo_num_flowadv,
477 fo->fo_flowadv_max);
478 VERIFY(fo->fo_num_flowadv == fo->fo_flowadv_max);
479 *fadv_idx = FLOWADV_IDX_NONE;
480 return ENOSPC;
481 }
482
483 fo->fo_num_flowadv++;
484 ASSERT(idx < ((flowadv_idx_t) -1));
485 *fadv_idx = (flowadv_idx_t)idx;
486 ASSERT(*fadv_idx < fo->fo_flowadv_max);
487 return 0;
488}
489
490void
491flow_owner_flowadv_index_free(struct flow_owner *fo, flowadv_idx_t fadv_idx)
492{
493 uint32_t chunk_idx, bit_pos;
494 bitmap_t *bmap = fo->fo_flowadv_bmap;
495
496 FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
497 ASSERT(fo->fo_num_flowadv != 0);
498 ASSERT((fo->fo_flowadv_max != 0) && (fadv_idx < fo->fo_flowadv_max));
499
500 chunk_idx = fadv_idx / FO_FLOWADV_CHUNK;
501 bit_pos = fadv_idx % FO_FLOWADV_CHUNK;
502 ASSERT(!bit_test(bmap[chunk_idx], bit_pos));
503 /* mark the index as free */
504 bit_set(bmap[chunk_idx], bit_pos);
505 fo->fo_num_flowadv--;
506}
507
508int
509flow_owner_destroy_entry(struct flow_owner *fo, uuid_t uuid,
510 bool nolinger, void *close_params)
511{
512 struct flow_entry *fe = NULL;
513 int err = 0;
514
515 FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
516
517 /* lookup such flow for this process */
518 fe = flow_entry_find_by_uuid(fo, uuid);
519 if (fe == NULL) {
520 err = ENOENT;
521 } else {
522 /* free flow entry (OK to linger if caller asked) */
523 flow_entry_destroy(fo, fe, nolinger, close_params);
524 }
525
526 return err;
527}
528
529static inline int
530fo_cmp(const struct flow_owner *a, const struct flow_owner *b)
531{
532 if (a->fo_pid > b->fo_pid) {
533 return 1;
534 }
535 if (a->fo_pid < b->fo_pid) {
536 return -1;
537 }
538 if ((intptr_t)a->fo_context > (intptr_t)b->fo_context) {
539 return 1;
540 } else if ((intptr_t)a->fo_context < (intptr_t)b->fo_context) {
541 return -1;
542 }
543 if (a->fo_low_latency != b->fo_low_latency) {
544 if (a->fo_low_latency) {
545 return 1;
546 } else {
547 return -1;
548 }
549 }
550 return 0;
551}
552
553static struct flow_owner *
554fo_alloc(boolean_t can_block)
555{
556 struct flow_owner *fo;
557
558 fo = skmem_cache_alloc(sk_fo_cache,
559 can_block ? SKMEM_SLEEP : SKMEM_NOSLEEP);
560 if (fo == NULL) {
561 return NULL;
562 }
563
564 bzero(s: fo, n: sk_fo_size);
565
566 SK_DF(SK_VERB_MEM, "fo 0x%llx ALLOC", SK_KVA(fo));
567
568 return fo;
569}
570
571static void
572fo_free(struct flow_owner *fo)
573{
574 ASSERT(fo->fo_bucket == NULL);
575 ASSERT(RB_EMPTY(&fo->fo_flow_entry_id_head));
576 ASSERT(fo->fo_flowadv_bmap == NULL);
577
578 SK_DF(SK_VERB_MEM, "fo 0x%llx FREE", SK_KVA(fo));
579
580 skmem_cache_free(sk_fo_cache, fo);
581}
582