flow_owner.c source code [xnu/bsd/skywalk/nexus/flowswitch/flow/flow_owner.c]

1	/*
2	* Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3	*
4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5	*
6	* This file contains Original Code and/or Modifications of Original Code
7	* as defined in and that are subject to the Apple Public Source License
8	* Version 2.0 (the 'License'). You may not use this file except in
9	* compliance with the License. The rights granted to you under the License
10	* may not be used to create, or enable the creation or redistribution of,
11	* unlawful or unlicensed copies of an Apple operating system, or to
12	* circumvent, violate, or enable the circumvention or violation of, any
13	* terms of an Apple operating system software license agreement.
14	*
15	* Please obtain a copy of the License at
16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
17	*
18	* The Original Code and all software distributed under the License are
19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23	* Please see the License for the specific language governing rights and
24	* limitations under the License.
25	*
26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27	*/
28
29	#include <skywalk/os_skywalk_private.h>
30	#include <skywalk/nexus/flowswitch/fsw_var.h>
31	#include <skywalk/nexus/flowswitch/flow/flow_var.h>
32
33	static uint32_t flow_owner_bucket_purge_common(struct flow_owner_bucket *,
34	nexus_port_t, boolean_t);
35	static int fo_cmp(const struct flow_owner , const* struct flow_owner *);
36	static struct flow_owner *fo_alloc(boolean_t);
37	static void fo_free(struct flow_owner *);
38
39	static LCK_GRP_DECLARE(flow_owner_lock_group, "sk_flow_owner_lock");
40	static LCK_ATTR_DECLARE(flow_owner_lock_attr, `0`, `0`);
41
42	RB_GENERATE_PREV(flow_owner_tree, flow_owner, fo_link, fo_cmp);
43
44	KALLOC_TYPE_VAR_DEFINE(KT_SK_FOB, struct flow_owner_bucket, KT_DEFAULT);
45
46	struct flow_owner_bucket *
47	flow_owner_buckets_alloc(size_t fob_cnt, size_t fob_sz, size_t tot_sz)
48	{
49	size_t cache_sz = skmem_cpu_cache_line_size();
50	struct flow_owner_bucket *fob;
51	size_t fob_tot_sz;
52
53	/ each bucket is CPU cache-aligned /
54	fob_sz = P2ROUNDUP(sizeof(fob), cache_sz);
55	tot_sz = fob_tot_sz = fob_cnt (*fob_sz);
56	fob = sk_alloc_type_hash(KT_SK_FOB, fob_tot_sz, Z_WAITOK, skmem_tag_fsw_fob_hash);
57	if (__improbable(fob == NULL)) {
58	return NULL;
59	}
60
61	#if !KASAN_CLASSIC
62	/*
63	* except in KASAN_CLASSIC mode, kalloc will always maintain cacheline
64	* size alignment if the requested size is a multiple of a cacheline
65	* size (this is true for any size that is a power of two from 16 to
66	* PAGE_SIZE).
67	*
68	* Because this is an optimization only, it is OK to leave KASAN_CLASSIC
69	* not respect this.
70	*/
71	ASSERT(IS_P2ALIGNED(fob, cache_sz));
72	#endif
73
74	SK_DF(SK_VERB_MEM, "fob 0x%llx fob_cnt %zu fob_sz %zu "
75	"(total %zu bytes) ALLOC", SK_KVA(fob), fob_cnt,
76	*fob_sz, fob_tot_sz);
77
78	return fob;
79	}
80
81	void
82	flow_owner_buckets_free(struct flow_owner_bucket *fob, size_t tot_sz)
83	{
84	SK_DF(SK_VERB_MEM, "fob 0x%llx FREE", SK_KVA(fob));
85	sk_free_type_hash(KT_SK_FOB, tot_sz, fob);
86	}
87
88	void
89	flow_owner_bucket_init(struct flow_owner_bucket *fob)
90	{
91	#if !KASAN_CLASSIC
92	ASSERT(IS_P2ALIGNED(fob, skmem_cpu_cache_line_size()));
93	#endif /* !KASAN_CLASSIC */
94	lck_mtx_init(lck: &fob->fob_lock, grp: &flow_owner_lock_group,
95	attr: &flow_owner_lock_attr);
96	RB_INIT(&fob->fob_owner_head);
97	}
98
99	void
100	flow_owner_bucket_destroy(struct flow_owner_bucket *fob)
101	{
102	/*
103	* In the event we are called as part of the nexus destructor,
104	* we need to wait until all threads have exited the flow close
105	* critical section, and that the flow_owner_bucket is empty.
106	* By the time we get here, the module initiating the request
107	* (e.g. NECP) has been quiesced, so any flow open requests would
108	* have been rejected.
109	*/
110	FOB_LOCK(fob);
111	while (!RB_EMPTY(&fob->fob_owner_head)) {
112	SK_ERR("waiting for fob 0x%llx to go idle", SK_KVA(fob));
113	if (++(fob->fob_dtor_waiters) == `0`) { / wraparound /
114	fob->fob_dtor_waiters++;
115	}
116	(void) msleep(chan: &fob->fob_dtor_waiters, mtx: &fob->fob_lock,
117	pri: (PZERO - `1`), wmesg: __FUNCTION__, NULL);
118	}
119	while (fob->fob_busy_flags & FOBF_CLOSE_BUSY) {
120	if (++(fob->fob_close_waiters) == `0`) { / wraparound /
121	fob->fob_close_waiters++;
122	}
123	(void) msleep(chan: &fob->fob_close_waiters, mtx: &fob->fob_lock,
124	pri: (PZERO - `1`), wmesg: __FUNCTION__, NULL);
125	}
126	ASSERT(RB_EMPTY(&fob->fob_owner_head));
127	ASSERT(!(fob->fob_busy_flags & FOBF_OPEN_BUSY));
128	ASSERT(!(fob->fob_busy_flags & FOBF_CLOSE_BUSY));
129	FOB_UNLOCK(fob);
130	lck_mtx_destroy(lck: &fob->fob_lock, grp: &flow_owner_lock_group);
131	}
132
133	static uint32_t
134	flow_owner_bucket_purge_common(struct flow_owner_bucket *fob,
135	nexus_port_t nx_port, boolean_t if_idle)
136	{
137	/ called by flow_owner_bucket_purge_all()? /
138	boolean_t locked = (nx_port == NEXUS_PORT_ANY);
139	struct flow_owner fo, tfo;
140	struct flow_entry fe, tfe;
141	uint32_t cnt = `0`;
142
143	if (!locked) {
144	FOB_LOCK(fob);
145	}
146	FOB_LOCK_ASSERT_HELD(fob);
147
148	RB_FOREACH_SAFE(fo, flow_owner_tree, &fob->fob_owner_head, tfo) {
149	if (fo->fo_nx_port != nx_port && nx_port != NEXUS_PORT_ANY) {
150	continue;
151	}
152
153	if (!if_idle \|\| nx_port == NEXUS_PORT_ANY) {
154	RB_FOREACH_SAFE(fe, flow_entry_id_tree,
155	&fo->fo_flow_entry_id_head, tfe) {
156	ASSERT(fe->fe_nx_port == fo->fo_nx_port);
157	flow_entry_retain(fe);
158	flow_entry_destroy(fo, fe, FALSE, NULL);
159	}
160	}
161
162	ASSERT(nx_port != NEXUS_PORT_ANY \|\|
163	RB_EMPTY(&fo->fo_flow_entry_id_head));
164
165	if (RB_EMPTY(&fo->fo_flow_entry_id_head)) {
166	flow_owner_free(fob, fo);
167	++cnt;
168	} else if (nx_port != NEXUS_PORT_ANY) {
169	/ let ms_flow_unbind() know this port is gone /
170	fo->fo_nx_port_destroyed = TRUE;
171	VERIFY(fo->fo_nx_port_na == NULL);
172	}
173	}
174
175	if (!locked) {
176	FOB_UNLOCK(fob);
177	}
178
179	return cnt;
180	}
181
182	void
183	flow_owner_bucket_purge_all(struct flow_owner_bucket *fob)
184	{
185	(void) flow_owner_bucket_purge_common(fob, NEXUS_PORT_ANY, TRUE);
186	}
187
188	static uint32_t
189	flow_owner_bucket_activate_nx_port_common(struct flow_owner_bucket *fob,
190	nexus_port_t nx_port, struct nexus_adapter *nx_port_na,
191	na_activate_mode_t mode)
192	{
193	struct flow_owner *fo;
194	struct flow_entry *fe;
195	uint32_t cnt = `0`;
196
197	VERIFY(nx_port != NEXUS_PORT_ANY);
198	FOB_LOCK(fob);
199
200	RB_FOREACH(fo, flow_owner_tree, &fob->fob_owner_head) {
201	if (fo->fo_nx_port_destroyed \|\| (fo->fo_nx_port != nx_port)) {
202	continue;
203	}
204
205	if (mode == NA_ACTIVATE_MODE_ON) {
206	VERIFY(fo->fo_nx_port_na == NULL);
207	(struct* nexus_adapter **)(uintptr_t)&fo->fo_nx_port_na = nx_port_na;
208	}
209
210	RB_FOREACH(fe, flow_entry_id_tree,
211	&fo->fo_flow_entry_id_head) {
212	if (fe->fe_flags & FLOWENTF_TORN_DOWN) {
213	continue;
214	}
215	VERIFY(fe->fe_nx_port == fo->fo_nx_port);
216	if (fe->fe_adv_idx != FLOWADV_IDX_NONE) {
217	if (mode == NA_ACTIVATE_MODE_ON) {
218	na_flowadv_entry_alloc(
219	fo->fo_nx_port_na, fe->fe_uuid,
220	fe->fe_adv_idx, fe->fe_flowid);
221	} else if (fo->fo_nx_port_na != NULL) {
222	na_flowadv_entry_free(fo->fo_nx_port_na,
223	fe->fe_uuid, fe->fe_adv_idx,
224	fe->fe_flowid);
225	}
226	}
227	}
228
229	if (mode != NA_ACTIVATE_MODE_ON && fo->fo_nx_port_na != NULL) {
230	(struct* nexus_adapter **)(uintptr_t)&fo->fo_nx_port_na = NULL;
231	}
232
233	++cnt;
234	}
235
236	FOB_UNLOCK(fob);
237	return cnt;
238	}
239
240	uint32_t
241	flow_owner_activate_nexus_port(struct flow_mgr *fm,
242	boolean_t pid_bound, pid_t pid, nexus_port_t nx_port,
243	struct nexus_adapter *nx_port_na, na_activate_mode_t mode)
244	{
245	struct flow_owner_bucket *fob;
246	uint32_t fo_cnt = `0`;
247
248	VERIFY(nx_port != NEXUS_PORT_ANY);
249	VERIFY(nx_port_na != NULL);
250
251	if (pid_bound) {
252	fob = flow_mgr_get_fob_by_pid(fm, pid);
253	fo_cnt = flow_owner_bucket_activate_nx_port_common(fob, nx_port,
254	nx_port_na, mode);
255	} else {
256	uint32_t i;
257	/*
258	* Otherwise, this can get expensive since we need to search
259	* thru all proc-mapping buckets to find the flows that are
260	* related to this nexus port.
261	*/
262	for (i = `0`; i < fm->fm_owner_buckets_cnt; i++) {
263	fob = flow_mgr_get_fob_at_idx(fm, idx: i);
264	fo_cnt += flow_owner_bucket_activate_nx_port_common(fob,
265	nx_port, nx_port_na, mode);
266	}
267	}
268	/ There shouldn't be more than one flow owners on a nexus port /
269	VERIFY(fo_cnt <= `1`);
270	return fo_cnt;
271	}
272
273	static void
274	flow_owner_bucket_attach_common(struct flow_owner_bucket *fob,
275	nexus_port_t nx_port)
276	{
277	struct flow_owner *fo;
278
279	VERIFY(nx_port != NEXUS_PORT_ANY);
280	FOB_LOCK(fob);
281
282	RB_FOREACH(fo, flow_owner_tree, &fob->fob_owner_head) {
283	if (fo->fo_nx_port_destroyed && (fo->fo_nx_port == nx_port)) {
284	fo->fo_nx_port_destroyed = FALSE;
285	}
286	}
287
288	FOB_UNLOCK(fob);
289	}
290
291	void
292	flow_owner_attach_nexus_port(struct flow_mgr *fm, boolean_t pid_bound,
293	pid_t pid, nexus_port_t nx_port)
294	{
295	struct flow_owner_bucket *fob;
296	ASSERT(nx_port != NEXUS_PORT_ANY);
297
298	if (pid_bound) {
299	fob = flow_mgr_get_fob_by_pid(fm, pid);
300	flow_owner_bucket_attach_common(fob, nx_port);
301	} else {
302	uint32_t i;
303	/*
304	* Otherwise, this can get expensive since we need to search
305	* thru all proc-mapping buckets to find the flows that are
306	* related to this nexus port.
307	*/
308	for (i = `0`; i < fm->fm_owner_buckets_cnt; i++) {
309	fob = flow_mgr_get_fob_at_idx(fm, idx: i);
310	flow_owner_bucket_attach_common(fob, nx_port);
311	}
312	}
313	}
314
315	uint32_t
316	flow_owner_detach_nexus_port(struct flow_mgr *fm, boolean_t pid_bound,
317	pid_t pid, nexus_port_t nx_port, boolean_t if_idle)
318	{
319	struct flow_owner_bucket *fob;
320	uint32_t purged = `0`;
321	ASSERT(nx_port != NEXUS_PORT_ANY);
322
323	if (pid_bound) {
324	fob = flow_mgr_get_fob_by_pid(fm, pid);
325	purged = flow_owner_bucket_purge_common(fob, nx_port, if_idle);
326	} else {
327	uint32_t i;
328	/*
329	* Otherwise, this can get expensive since we need to search
330	* thru all proc-mapping buckets to find the flows that are
331	* related to this nexus port.
332	*/
333	for (i = `0`; i < fm->fm_owner_buckets_cnt; i++) {
334	fob = flow_mgr_get_fob_at_idx(fm, idx: i);
335	purged += flow_owner_bucket_purge_common(fob,
336	nx_port, if_idle);
337	}
338	}
339	return purged;
340	}
341
342	/ 64-bit mask with range /
343	#define FO_BMASK64(_beg, _end) \
344	((((uint64_t)0xffffffffffffffff) >> \
345	(63 - (_end))) & ~((1ULL << (_beg)) - 1))
346
347	struct flow_owner *
348	flow_owner_alloc(struct flow_owner_bucket fob, struct* proc *p,
349	nexus_port_t nx_port, bool nx_port_pid_bound, bool flowadv,
350	struct nx_flowswitch fsw, struct* nexus_adapter *nx_port_na,
351	void *context, bool low_latency)
352	{
353	struct flow_owner *fo;
354	const pid_t pid = proc_pid(p);
355
356	_CASSERT(true == `1`);
357	_CASSERT(false == `0`);
358	ASSERT(low_latency == true \|\| low_latency == false);
359	ASSERT(nx_port != NEXUS_PORT_ANY);
360	FOB_LOCK_ASSERT_HELD(fob);
361
362	#if DEBUG
363	ASSERT(flow_owner_find_by_pid(fob, pid, context, low_latency) == NULL);
364	RB_FOREACH(fo, flow_owner_tree, &fob->fob_owner_head) {
365	if (!fo->fo_nx_port_destroyed && (fo->fo_nx_port == nx_port)) {
366	VERIFY(`0`);
367	/ NOTREACHED /
368	__builtin_unreachable();
369	}
370	}
371	#endif /* DEBUG */
372
373	fo = fo_alloc(TRUE);
374	if (fo != NULL) {
375	if (flowadv) {
376	uint32_t i;
377
378	if ((fo->fo_flowadv_bmap =
379	skmem_cache_alloc(sk_fab_cache, SKMEM_SLEEP)) == NULL) {
380	SK_ERR("failed to alloc flow advisory bitmap");
381	fo_free(fo);
382	return NULL;
383	}
384	bzero(s: fo->fo_flowadv_bmap, n: sk_fab_size);
385	fo->fo_flowadv_max = sk_max_flows;
386
387	/ set the bits for free indices /
388	for (i = `0`; i < sk_fadv_nchunks; i++) {
389	uint32_t end = `63`;
390
391	if (i == (sk_fadv_nchunks - `1`)) {
392	end = ((sk_max_flows - `1`) %
393	FO_FLOWADV_CHUNK);
394	}
395
396	fo->fo_flowadv_bmap[i] = FO_BMASK64(`0`, end);
397	}
398	}
399	RB_INIT(&fo->fo_flow_entry_id_head);
400	/ const override /
401	(struct* flow_owner_bucket **)(uintptr_t)&fo->fo_bucket = fob;
402	fo->fo_context = context;
403	fo->fo_pid = pid;
404	(void) snprintf(fo->fo_name, count: sizeof(fo->fo_name), "%s",
405	proc_name_address(p));
406	fo->fo_nx_port_pid_bound = nx_port_pid_bound;
407	fo->fo_low_latency = low_latency;
408	fo->fo_nx_port = nx_port;
409	(struct* nexus_adapter **)(uintptr_t)&fo->fo_nx_port_na = nx_port_na;
410	(struct* nx_flowswitch **)(uintptr_t)&fo->fo_fsw = fsw;
411	RB_INSERT(flow_owner_tree, &fob->fob_owner_head, fo);
412
413	SK_DF(SK_VERB_FLOW, "%s(%d) fob 0x%llx added fo 0x%llx "
414	"nx_port %d nx_port_pid_bound %d ll %d nx_port_na 0x%llx",
415	fo->fo_name, fo->fo_pid, SK_KVA(fob), SK_KVA(fo),
416	(int)nx_port, nx_port_pid_bound, fo->fo_low_latency,
417	SK_KVA(nx_port_na));
418	}
419
420	return fo;
421	}
422
423	void
424	flow_owner_free(struct flow_owner_bucket fob, struct* flow_owner *fo)
425	{
426	FOB_LOCK_ASSERT_HELD(fob);
427
428	ASSERT(fo->fo_bucket == fob);
429	(struct* flow_owner_bucket **)(uintptr_t)&fo->fo_bucket = NULL;
430	RB_REMOVE(flow_owner_tree, &fob->fob_owner_head, fo);
431
432	ASSERT(fo->fo_num_flowadv == `0`);
433	skmem_cache_free(sk_fab_cache, fo->fo_flowadv_bmap);
434	fo->fo_flowadv_bmap = NULL;
435
436	/ wake up any thread blocked in flow_owner_bucket_destroy() /
437	if (RB_EMPTY(&fob->fob_owner_head) && fob->fob_dtor_waiters > `0`) {
438	fob->fob_dtor_waiters = `0`;
439	wakeup(chan: &fob->fob_dtor_waiters);
440	}
441
442	SK_DF(SK_VERB_FLOW, "%s(%d) fob 0x%llx removed fo 0x%llx nx_port %d",
443	fo->fo_name, fo->fo_pid, SK_KVA(fob), SK_KVA(fo),
444	(int)fo->fo_nx_port);
445
446	fo_free(fo);
447	}
448
449	int
450	flow_owner_flowadv_index_alloc(struct flow_owner fo, flowadv_idx_t fadv_idx)
451	{
452	bitmap_t *bmap = fo->fo_flowadv_bmap;
453	size_t nchunks, i, j, idx = FLOWADV_IDX_NONE;
454
455	FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
456	ASSERT(fo->fo_flowadv_max != `0`);
457
458	nchunks = P2ROUNDUP(fo->fo_flowadv_max, FO_FLOWADV_CHUNK) /
459	FO_FLOWADV_CHUNK;
460
461	for (i = `0`; i < nchunks; i++) {
462	j = ffsll(bmap[i]);
463	if (j == `0`) {
464	/ All indices in this chunk are in use /
465	continue;
466	}
467	--j;
468	/ mark the index as in use /
469	bit_clear(bmap[i], j);
470	idx = (i * FO_FLOWADV_CHUNK) + j;
471	break;
472	}
473
474	if (idx == FLOWADV_IDX_NONE) {
475	SK_ERR("%s(%d) flow advisory table full: num %u max %u",
476	fo->fo_name, fo->fo_pid, fo->fo_num_flowadv,
477	fo->fo_flowadv_max);
478	VERIFY(fo->fo_num_flowadv == fo->fo_flowadv_max);
479	*fadv_idx = FLOWADV_IDX_NONE;
480	return ENOSPC;
481	}
482
483	fo->fo_num_flowadv++;
484	ASSERT(idx < ((flowadv_idx_t) -`1`));
485	*fadv_idx = (flowadv_idx_t)idx;
486	ASSERT(*fadv_idx < fo->fo_flowadv_max);
487	return `0`;
488	}
489
490	void
491	flow_owner_flowadv_index_free(struct flow_owner *fo, flowadv_idx_t fadv_idx)
492	{
493	uint32_t chunk_idx, bit_pos;
494	bitmap_t *bmap = fo->fo_flowadv_bmap;
495
496	FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
497	ASSERT(fo->fo_num_flowadv != `0`);
498	ASSERT((fo->fo_flowadv_max != `0`) && (fadv_idx < fo->fo_flowadv_max));
499
500	chunk_idx = fadv_idx / FO_FLOWADV_CHUNK;
501	bit_pos = fadv_idx % FO_FLOWADV_CHUNK;
502	ASSERT(!bit_test(bmap[chunk_idx], bit_pos));
503	/ mark the index as free /
504	bit_set(bmap[chunk_idx], bit_pos);
505	fo->fo_num_flowadv--;
506	}
507
508	int
509	flow_owner_destroy_entry(struct flow_owner *fo, uuid_t uuid,
510	bool nolinger, void *close_params)
511	{
512	struct flow_entry *fe = NULL;
513	int err = `0`;
514
515	FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
516
517	/ lookup such flow for this process /
518	fe = flow_entry_find_by_uuid(fo, uuid);
519	if (fe == NULL) {
520	err = ENOENT;
521	} else {
522	/ free flow entry (OK to linger if caller asked) /
523	flow_entry_destroy(fo, fe, nolinger, close_params);
524	}
525
526	return err;
527	}
528
529	static inline int
530	fo_cmp(const struct flow_owner a, const* struct flow_owner *b)
531	{
532	if (a->fo_pid > b->fo_pid) {
533	return `1`;
534	}
535	if (a->fo_pid < b->fo_pid) {
536	return -`1`;
537	}
538	if ((intptr_t)a->fo_context > (intptr_t)b->fo_context) {
539	return `1`;
540	} else if ((intptr_t)a->fo_context < (intptr_t)b->fo_context) {
541	return -`1`;
542	}
543	if (a->fo_low_latency != b->fo_low_latency) {
544	if (a->fo_low_latency) {
545	return `1`;
546	} else {
547	return -`1`;
548	}
549	}
550	return `0`;
551	}
552
553	static struct flow_owner *
554	fo_alloc(boolean_t can_block)
555	{
556	struct flow_owner *fo;
557
558	fo = skmem_cache_alloc(sk_fo_cache,
559	can_block ? SKMEM_SLEEP : SKMEM_NOSLEEP);
560	if (fo == NULL) {
561	return NULL;
562	}
563
564	bzero(s: fo, n: sk_fo_size);
565
566	SK_DF(SK_VERB_MEM, "fo 0x%llx ALLOC", SK_KVA(fo));
567
568	return fo;
569	}
570
571	static void
572	fo_free(struct flow_owner *fo)
573	{
574	ASSERT(fo->fo_bucket == NULL);
575	ASSERT(RB_EMPTY(&fo->fo_flow_entry_id_head));
576	ASSERT(fo->fo_flowadv_bmap == NULL);
577
578	SK_DF(SK_VERB_MEM, "fo 0x%llx FREE", SK_KVA(fo));
579
580	skmem_cache_free(sk_fo_cache, fo);
581	}
582

Browse the source code of xnu/bsd/skywalk/nexus/flowswitch/flow/flow_owner.c