1/*
2 * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri.
31 * All rights reserved.
32 * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 */
55#include <sys/systm.h>
56#include <skywalk/os_skywalk_private.h>
57#include <skywalk/nexus/monitor/nx_monitor.h>
58#include <skywalk/nexus/flowswitch/nx_flowswitch.h>
59#include <skywalk/nexus/netif/nx_netif.h>
60#include <skywalk/nexus/upipe/nx_user_pipe.h>
61#include <skywalk/nexus/kpipe/nx_kernel_pipe.h>
62#include <kern/thread.h>
63
64static int na_krings_use(struct kern_channel *);
65static void na_krings_unuse(struct kern_channel *);
66static void na_krings_verify(struct nexus_adapter *);
67static int na_notify(struct __kern_channel_ring *, struct proc *, uint32_t);
68static void na_set_ring(struct nexus_adapter *, uint32_t, enum txrx, uint32_t);
69static void na_set_all_rings(struct nexus_adapter *, uint32_t);
70static int na_set_ringid(struct kern_channel *, ring_set_t, ring_id_t);
71static void na_unset_ringid(struct kern_channel *);
72static void na_teardown(struct nexus_adapter *, struct kern_channel *,
73 boolean_t);
74
75static int na_kr_create(struct nexus_adapter *, boolean_t);
76static void na_kr_delete(struct nexus_adapter *);
77static int na_kr_setup(struct nexus_adapter *, struct kern_channel *);
78static void na_kr_teardown_all(struct nexus_adapter *, struct kern_channel *,
79 boolean_t);
80static void na_kr_teardown_txrx(struct nexus_adapter *, struct kern_channel *,
81 boolean_t, struct proc *);
82static int na_kr_populate_slots(struct __kern_channel_ring *);
83static void na_kr_depopulate_slots(struct __kern_channel_ring *,
84 struct kern_channel *, boolean_t defunct);
85
86static int na_schema_alloc(struct kern_channel *);
87
88static struct nexus_adapter *na_pseudo_alloc(zalloc_flags_t);
89static void na_pseudo_free(struct nexus_adapter *);
90static int na_pseudo_txsync(struct __kern_channel_ring *, struct proc *,
91 uint32_t);
92static int na_pseudo_rxsync(struct __kern_channel_ring *, struct proc *,
93 uint32_t);
94static int na_pseudo_activate(struct nexus_adapter *, na_activate_mode_t);
95static void na_pseudo_dtor(struct nexus_adapter *);
96static int na_pseudo_krings_create(struct nexus_adapter *,
97 struct kern_channel *);
98static void na_pseudo_krings_delete(struct nexus_adapter *,
99 struct kern_channel *, boolean_t);
100static int na_packet_pool_alloc_sync(struct __kern_channel_ring *,
101 struct proc *, uint32_t);
102static int na_packet_pool_alloc_large_sync(struct __kern_channel_ring *,
103 struct proc *, uint32_t);
104static int na_packet_pool_free_sync(struct __kern_channel_ring *,
105 struct proc *, uint32_t);
106static int na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *,
107 struct proc *, uint32_t);
108static int na_packet_pool_free_buf_sync(struct __kern_channel_ring *,
109 struct proc *, uint32_t);
110
111#define NA_KRING_IDLE_TIMEOUT (NSEC_PER_SEC * 30) /* 30 seconds */
112
113static SKMEM_TYPE_DEFINE(na_pseudo_zone, struct nexus_adapter);
114
115static int __na_inited = 0;
116
117#define NA_NUM_WMM_CLASSES 4
118#define NAKR_WMM_SC2RINGID(_s) PKT_SC2TC(_s)
119#define NAKR_SET_SVC_LUT(_n, _s) \
120 (_n)->na_kring_svc_lut[MBUF_SCIDX(_s)] = NAKR_WMM_SC2RINGID(_s)
121#define NAKR_SET_KR_SVC(_n, _s) \
122 NAKR((_n), NR_TX)[NAKR_WMM_SC2RINGID(_s)].ckr_svc = (_s)
123
124#define NA_UPP_ALLOC_LOWAT 8
125static uint32_t na_upp_alloc_lowat = NA_UPP_ALLOC_LOWAT;
126
127#define NA_UPP_REAP_INTERVAL 10 /* seconds */
128static uint32_t na_upp_reap_interval = NA_UPP_REAP_INTERVAL;
129
130#define NA_UPP_WS_HOLD_TIME 2 /* seconds */
131static uint32_t na_upp_ws_hold_time = NA_UPP_WS_HOLD_TIME;
132
133#define NA_UPP_REAP_MIN_PKTS 0
134static uint32_t na_upp_reap_min_pkts = NA_UPP_REAP_MIN_PKTS;
135
136#define NA_UPP_ALLOC_BUF_LOWAT 64
137static uint32_t na_upp_alloc_buf_lowat = NA_UPP_ALLOC_BUF_LOWAT;
138
139#if (DEVELOPMENT || DEBUG)
140static uint64_t _na_inject_error = 0;
141#define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) \
142 _SK_INJECT_ERROR(_na_inject_error, _en, _ev, _ec, NULL, _f, __VA_ARGS__)
143
144SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_ws_hold_time,
145 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_ws_hold_time,
146 NA_UPP_WS_HOLD_TIME, "");
147SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_interval,
148 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_interval,
149 NA_UPP_REAP_INTERVAL, "");
150SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_min_pkts,
151 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_min_pkts,
152 NA_UPP_REAP_MIN_PKTS, "");
153SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_lowat,
154 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_lowat,
155 NA_UPP_ALLOC_LOWAT, "");
156SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_buf_lowat,
157 CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_buf_lowat,
158 NA_UPP_ALLOC_BUF_LOWAT, "");
159SYSCTL_QUAD(_kern_skywalk, OID_AUTO, na_inject_error,
160 CTLFLAG_RW | CTLFLAG_LOCKED, &_na_inject_error, "");
161#else
162#define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) do { } while (0)
163#endif /* !DEVELOPMENT && !DEBUG */
164
165#define SKMEM_TAG_NX_RINGS "com.apple.skywalk.nexus.rings"
166static SKMEM_TAG_DEFINE(skmem_tag_nx_rings, SKMEM_TAG_NX_RINGS);
167
168#define SKMEM_TAG_NX_CONTEXTS "com.apple.skywalk.nexus.contexts"
169static SKMEM_TAG_DEFINE(skmem_tag_nx_contexts, SKMEM_TAG_NX_CONTEXTS);
170
171#define SKMEM_TAG_NX_SCRATCH "com.apple.skywalk.nexus.scratch"
172static SKMEM_TAG_DEFINE(skmem_tag_nx_scratch, SKMEM_TAG_NX_SCRATCH);
173
174#if !XNU_TARGET_OS_OSX
175/* see KLDBootstrap::readPrelinkedExtensions() for details */
176extern uuid_t kernelcache_uuid;
177#else /* XNU_TARGET_OS_OSX */
178/* see panic_init() for details */
179extern unsigned char *kernel_uuid;
180#endif /* XNU_TARGET_OS_OSX */
181
182void
183na_init(void)
184{
185 /*
186 * Changing the size of nexus_mdata structure won't break ABI,
187 * but we need to be mindful of memory consumption; Thus here
188 * we add a compile-time check to make sure the size is within
189 * the expected limit and that it's properly aligned. This
190 * check may be adjusted in future as needed.
191 */
192 _CASSERT(sizeof(struct nexus_mdata) <= 32 &&
193 IS_P2ALIGNED(sizeof(struct nexus_mdata), 8));
194 _CASSERT(sizeof(struct nexus_mdata) <= sizeof(struct __user_quantum));
195
196 /* see comments on nexus_meta_type_t */
197 _CASSERT(NEXUS_META_TYPE_MAX == 3);
198 _CASSERT(NEXUS_META_SUBTYPE_MAX == 3);
199
200 ASSERT(!__na_inited);
201
202 __na_inited = 1;
203}
204
205void
206na_fini(void)
207{
208 if (__na_inited) {
209 __na_inited = 0;
210 }
211}
212
213/*
214 * Interpret the ringid of an chreq, by translating it into a pair
215 * of intervals of ring indices:
216 *
217 * [txfirst, txlast) and [rxfirst, rxlast)
218 */
219int
220na_interp_ringid(struct nexus_adapter *na, ring_id_t ring_id,
221 ring_set_t ring_set, uint32_t first[NR_TXRX], uint32_t last[NR_TXRX])
222{
223 enum txrx t;
224
225 switch (ring_set) {
226 case RING_SET_ALL:
227 /*
228 * Ring pair eligibility: all ring(s).
229 */
230 if (ring_id != CHANNEL_RING_ID_ANY &&
231 ring_id >= na_get_nrings(na, t: NR_TX) &&
232 ring_id >= na_get_nrings(na, t: NR_RX)) {
233 SK_ERR("\"%s\": invalid ring_id %d for ring_set %u",
234 na->na_name, (int)ring_id, ring_set);
235 return EINVAL;
236 }
237 for_rx_tx(t) {
238 if (ring_id == CHANNEL_RING_ID_ANY) {
239 first[t] = 0;
240 last[t] = na_get_nrings(na, t);
241 } else {
242 first[t] = ring_id;
243 last[t] = ring_id + 1;
244 }
245 }
246 break;
247
248 default:
249 SK_ERR("\"%s\": invalid ring_set %u", na->na_name, ring_set);
250 return EINVAL;
251 }
252
253 SK_DF(SK_VERB_NA | SK_VERB_RING,
254 "\"%s\": ring_id %d, ring_set %u tx [%u,%u) rx [%u,%u)",
255 na->na_name, (int)ring_id, ring_set, first[NR_TX], last[NR_TX],
256 first[NR_RX], last[NR_RX]);
257
258 return 0;
259}
260
261/*
262 * Set the ring ID. For devices with a single queue, a request
263 * for all rings is the same as a single ring.
264 */
265static int
266na_set_ringid(struct kern_channel *ch, ring_set_t ring_set, ring_id_t ring_id)
267{
268 struct nexus_adapter *na = ch->ch_na;
269 int error;
270 enum txrx t;
271 uint32_t n_alloc_rings;
272
273 if ((error = na_interp_ringid(na, ring_id, ring_set,
274 first: ch->ch_first, last: ch->ch_last)) != 0) {
275 return error;
276 }
277
278 n_alloc_rings = na_get_nrings(na, t: NR_A);
279 if (n_alloc_rings != 0) {
280 uint32_t n_large_alloc_rings;
281
282 ch->ch_first[NR_A] = ch->ch_first[NR_F] = 0;
283 ch->ch_last[NR_A] = ch->ch_last[NR_F] =
284 ch->ch_first[NR_A] + n_alloc_rings;
285
286 n_large_alloc_rings = na_get_nrings(na, t: NR_LBA);
287 ch->ch_first[NR_LBA] = 0;
288 ch->ch_last[NR_LBA] = ch->ch_first[NR_LBA] + n_large_alloc_rings;
289 } else {
290 ch->ch_first[NR_A] = ch->ch_last[NR_A] = 0;
291 ch->ch_first[NR_F] = ch->ch_last[NR_F] = 0;
292 ch->ch_first[NR_LBA] = ch->ch_last[NR_LBA] = 0;
293 }
294 ch->ch_first[NR_EV] = 0;
295 ch->ch_last[NR_EV] = ch->ch_first[NR_EV] + na_get_nrings(na, t: NR_EV);
296
297 /* XXX: should we initialize na_si_users for event ring ? */
298
299 /*
300 * Optimization: count the users registered for more than
301 * one ring, which are the ones sleeping on the global queue.
302 * The default na_notify() callback will then avoid signaling
303 * the global queue if nobody is using it
304 */
305 for_rx_tx(t) {
306 if (ch_is_multiplex(ch, t)) {
307 na->na_si_users[t]++;
308 ASSERT(na->na_si_users[t] != 0);
309 }
310 }
311 return 0;
312}
313
314static void
315na_unset_ringid(struct kern_channel *ch)
316{
317 struct nexus_adapter *na = ch->ch_na;
318 enum txrx t;
319
320 for_rx_tx(t) {
321 if (ch_is_multiplex(ch, t)) {
322 ASSERT(na->na_si_users[t] != 0);
323 na->na_si_users[t]--;
324 }
325 ch->ch_first[t] = ch->ch_last[t] = 0;
326 }
327}
328
329/*
330 * Check that the rings we want to bind are not exclusively owned by a previous
331 * bind. If exclusive ownership has been requested, we also mark the rings.
332 */
333/* Hoisted out of line to reduce kernel stack footprint */
334SK_NO_INLINE_ATTRIBUTE
335static int
336na_krings_use(struct kern_channel *ch)
337{
338 struct nexus_adapter *na = ch->ch_na;
339 struct __kern_channel_ring *kring;
340 boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
341 enum txrx t;
342 uint32_t i;
343
344 SK_DF(SK_VERB_NA | SK_VERB_RING, "na \"%s\" (0x%llx) grabbing tx [%u,%u) rx [%u,%u)",
345 na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
346 ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
347
348 /*
349 * First round: check that all the requested rings
350 * are neither alread exclusively owned, nor we
351 * want exclusive ownership when they are already in use
352 */
353 for_all_rings(t) {
354 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
355 kring = &NAKR(na, t)[i];
356 if ((kring->ckr_flags & CKRF_EXCLUSIVE) ||
357 (kring->ckr_users && excl)) {
358 SK_DF(SK_VERB_NA | SK_VERB_RING,
359 "kr \"%s\" (0x%llx) krflags 0x%b is busy",
360 kring->ckr_name, SK_KVA(kring),
361 kring->ckr_flags, CKRF_BITS);
362 return EBUSY;
363 }
364 }
365 }
366
367 /*
368 * Second round: increment usage count and possibly
369 * mark as exclusive
370 */
371
372 for_all_rings(t) {
373 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
374 kring = &NAKR(na, t)[i];
375 kring->ckr_users++;
376 if (excl) {
377 kring->ckr_flags |= CKRF_EXCLUSIVE;
378 }
379 }
380 }
381
382 return 0;
383}
384
385/* Hoisted out of line to reduce kernel stack footprint */
386SK_NO_INLINE_ATTRIBUTE
387static void
388na_krings_unuse(struct kern_channel *ch)
389{
390 struct nexus_adapter *na = ch->ch_na;
391 struct __kern_channel_ring *kring;
392 boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE);
393 enum txrx t;
394 uint32_t i;
395
396 SK_DF(SK_VERB_NA | SK_VERB_RING,
397 "na \"%s\" (0x%llx) releasing tx [%u, %u) rx [%u, %u)",
398 na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX],
399 ch->ch_first[NR_RX], ch->ch_last[NR_RX]);
400
401 for_all_rings(t) {
402 for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) {
403 kring = &NAKR(na, t)[i];
404 if (excl) {
405 kring->ckr_flags &= ~CKRF_EXCLUSIVE;
406 }
407 kring->ckr_users--;
408 }
409 }
410}
411
412/* Hoisted out of line to reduce kernel stack footprint */
413SK_NO_INLINE_ATTRIBUTE
414static void
415na_krings_verify(struct nexus_adapter *na)
416{
417 struct __kern_channel_ring *kring;
418 enum txrx t;
419 uint32_t i;
420
421 for_all_rings(t) {
422 for (i = 0; i < na_get_nrings(na, t); i++) {
423 kring = &NAKR(na, t)[i];
424 /* na_kr_create() validations */
425 ASSERT(kring->ckr_num_slots > 0);
426 ASSERT(kring->ckr_lim == (kring->ckr_num_slots - 1));
427 ASSERT(kring->ckr_pp != NULL);
428
429 if (!(kring->ckr_flags & CKRF_MEM_RING_INITED)) {
430 continue;
431 }
432 /* na_kr_setup() validations */
433 if (KR_KERNEL_ONLY(kring)) {
434 ASSERT(kring->ckr_ring == NULL);
435 } else {
436 ASSERT(kring->ckr_ring != NULL);
437 }
438 ASSERT(kring->ckr_ksds_last ==
439 &kring->ckr_ksds[kring->ckr_lim]);
440 }
441 }
442}
443
444int
445na_bind_channel(struct nexus_adapter *na, struct kern_channel *ch,
446 struct chreq *chr)
447{
448 struct kern_pbufpool *rx_pp = skmem_arena_nexus(ar: na->na_arena)->arn_rx_pp;
449 struct kern_pbufpool *tx_pp = skmem_arena_nexus(ar: na->na_arena)->arn_tx_pp;
450 uint32_t ch_mode = chr->cr_mode;
451 int err = 0;
452
453 SK_LOCK_ASSERT_HELD();
454 ASSERT(ch->ch_schema == NULL);
455 ASSERT(ch->ch_na == NULL);
456
457 /* ring configuration may have changed, fetch from the card */
458 na_update_config(na);
459 ch->ch_na = na; /* store the reference */
460 err = na_set_ringid(ch, ring_set: chr->cr_ring_set, ring_id: chr->cr_ring_id);
461 if (err != 0) {
462 goto err;
463 }
464
465 os_atomic_andnot(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE |
466 CHANF_USER_PACKET_POOL | CHANF_EVENT_RING), relaxed);
467 if (ch_mode & CHMODE_EXCLUSIVE) {
468 os_atomic_or(&ch->ch_flags, CHANF_EXCLUSIVE, relaxed);
469 }
470 /*
471 * Disallow automatic sync for monitor mode, since TX
472 * direction is disabled.
473 */
474 if (ch_mode & CHMODE_MONITOR) {
475 os_atomic_or(&ch->ch_flags, CHANF_RXONLY, relaxed);
476 }
477
478 if (!!(na->na_flags & NAF_USER_PKT_POOL) ^
479 !!(ch_mode & CHMODE_USER_PACKET_POOL)) {
480 SK_ERR("incompatible channel mode (0x%b), na_flags (0x%b)",
481 ch_mode, CHMODE_BITS, na->na_flags, NAF_BITS);
482 err = EINVAL;
483 goto err;
484 }
485
486 if (na->na_arena->ar_flags & ARF_DEFUNCT) {
487 err = ENXIO;
488 goto err;
489 }
490
491 if (ch_mode & CHMODE_USER_PACKET_POOL) {
492 ASSERT(na->na_flags & NAF_USER_PKT_POOL);
493 ASSERT(ch->ch_first[NR_A] != ch->ch_last[NR_A]);
494 ASSERT(ch->ch_first[NR_F] != ch->ch_last[NR_F]);
495 os_atomic_or(&ch->ch_flags, CHANF_USER_PACKET_POOL, relaxed);
496 }
497
498 if (ch_mode & CHMODE_EVENT_RING) {
499 ASSERT(na->na_flags & NAF_USER_PKT_POOL);
500 ASSERT(na->na_flags & NAF_EVENT_RING);
501 ASSERT(ch->ch_first[NR_EV] != ch->ch_last[NR_EV]);
502 os_atomic_or(&ch->ch_flags, CHANF_EVENT_RING, relaxed);
503 }
504
505 /*
506 * If this is the first channel of the adapter, create
507 * the rings and their in-kernel view, the krings.
508 */
509 if (na->na_channels == 0) {
510 err = na->na_krings_create(na, ch);
511 if (err != 0) {
512 goto err;
513 }
514
515 /*
516 * Sanity check; this is already done in na_kr_create(),
517 * but we do it here as well to validate na_kr_setup().
518 */
519 na_krings_verify(na);
520 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
521 skmem_arena_nexus(ar: na->na_arena)->arn_rx_pp->pp_md_type;
522 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
523 skmem_arena_nexus(ar: na->na_arena)->arn_rx_pp->pp_md_subtype;
524 }
525
526 /*
527 * Validate ownership and usability of the krings; take into account
528 * whether some previous bind has exclusive ownership on them.
529 */
530 err = na_krings_use(ch);
531 if (err != 0) {
532 goto err_del_rings;
533 }
534
535 /* for user-facing channel, create a new channel schema */
536 if (!(ch->ch_flags & CHANF_KERNEL)) {
537 err = na_schema_alloc(ch);
538 if (err != 0) {
539 goto err_rel_excl;
540 }
541
542 ASSERT(ch->ch_schema != NULL);
543 ASSERT(ch->ch_schema_offset != (mach_vm_offset_t)-1);
544 } else {
545 ASSERT(ch->ch_schema == NULL);
546 ch->ch_schema_offset = (mach_vm_offset_t)-1;
547 }
548
549 /* update our work timestamp */
550 na->na_work_ts = net_uptime();
551
552 na->na_channels++;
553
554 /*
555 * If user packet pool is desired, initialize the allocated
556 * object hash table in the pool, if not already. This also
557 * retains a refcnt on the pool which the caller must release.
558 */
559 ASSERT(ch->ch_pp == NULL);
560 if (ch_mode & CHMODE_USER_PACKET_POOL) {
561#pragma unused(tx_pp)
562 ASSERT(rx_pp == tx_pp);
563 err = pp_init_upp(rx_pp, TRUE);
564 if (err != 0) {
565 goto err_free_schema;
566 }
567 ch->ch_pp = rx_pp;
568 }
569
570 if (!NA_IS_ACTIVE(na)) {
571 err = na->na_activate(na, NA_ACTIVATE_MODE_ON);
572 if (err != 0) {
573 goto err_release_pp;
574 }
575
576 SK_D("activated \"%s\" adapter 0x%llx", na->na_name,
577 SK_KVA(na));
578 SK_D(" na_md_type: %u", na->na_md_type);
579 SK_D(" na_md_subtype: %u", na->na_md_subtype);
580 }
581
582 SK_D("ch 0x%llx", SK_KVA(ch));
583 SK_D(" ch_flags: 0x%b", ch->ch_flags, CHANF_BITS);
584 if (ch->ch_schema != NULL) {
585 SK_D(" ch_schema: 0x%llx", SK_KVA(ch->ch_schema));
586 }
587 SK_D(" ch_na: 0x%llx (chcnt %u)", SK_KVA(ch->ch_na),
588 ch->ch_na->na_channels);
589 SK_D(" ch_tx_rings: [%u,%u)", ch->ch_first[NR_TX],
590 ch->ch_last[NR_TX]);
591 SK_D(" ch_rx_rings: [%u,%u)", ch->ch_first[NR_RX],
592 ch->ch_last[NR_RX]);
593 SK_D(" ch_alloc_rings: [%u,%u)", ch->ch_first[NR_A],
594 ch->ch_last[NR_A]);
595 SK_D(" ch_free_rings: [%u,%u)", ch->ch_first[NR_F],
596 ch->ch_last[NR_F]);
597 SK_D(" ch_ev_rings: [%u,%u)", ch->ch_first[NR_EV],
598 ch->ch_last[NR_EV]);
599
600 return 0;
601
602err_release_pp:
603 if (ch_mode & CHMODE_USER_PACKET_POOL) {
604 ASSERT(ch->ch_pp != NULL);
605 pp_release(rx_pp);
606 ch->ch_pp = NULL;
607 }
608err_free_schema:
609 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
610 NEXUS_META_TYPE_INVALID;
611 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
612 NEXUS_META_SUBTYPE_INVALID;
613 ASSERT(na->na_channels != 0);
614 na->na_channels--;
615 if (ch->ch_schema != NULL) {
616 skmem_cache_free(
617 skmem_arena_nexus(ar: na->na_arena)->arn_schema_cache,
618 ch->ch_schema);
619 ch->ch_schema = NULL;
620 ch->ch_schema_offset = (mach_vm_offset_t)-1;
621 }
622err_rel_excl:
623 na_krings_unuse(ch);
624err_del_rings:
625 if (na->na_channels == 0) {
626 na->na_krings_delete(na, ch, FALSE);
627 }
628err:
629 ch->ch_na = NULL;
630 ASSERT(err != 0);
631
632 return err;
633}
634
635/*
636 * Undo everything that was done in na_bind_channel().
637 */
638/* call with SK_LOCK held */
639void
640na_unbind_channel(struct kern_channel *ch)
641{
642 struct nexus_adapter *na = ch->ch_na;
643
644 SK_LOCK_ASSERT_HELD();
645
646 ASSERT(na->na_channels != 0);
647 na->na_channels--;
648
649 /* release exclusive use if it was requested at bind time */
650 na_krings_unuse(ch);
651
652 if (na->na_channels == 0) { /* last instance */
653 SK_D("%s(%d): deleting last channel instance for %s",
654 ch->ch_name, ch->ch_pid, na->na_name);
655
656 /*
657 * Free any remaining allocated packets attached to
658 * the slots, followed by a teardown of the arena.
659 */
660 na_teardown(na, ch, FALSE);
661
662 *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type =
663 NEXUS_META_TYPE_INVALID;
664 *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype =
665 NEXUS_META_SUBTYPE_INVALID;
666 } else {
667 SK_D("%s(%d): %s has %u remaining channel instance(s)",
668 ch->ch_name, ch->ch_pid, na->na_name, na->na_channels);
669 }
670
671 /*
672 * Free any allocated packets (for the process) attached to the slots;
673 * note that na_teardown() could have done this there as well.
674 */
675 if (ch->ch_pp != NULL) {
676 ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
677 pp_purge_upp(ch->ch_pp, ch->ch_pid);
678 pp_release(ch->ch_pp);
679 ch->ch_pp = NULL;
680 }
681
682 /* possibily decrement counter of tx_si/rx_si users */
683 na_unset_ringid(ch);
684
685 /* reap the caches now (purge if adapter is idle) */
686 skmem_arena_reap(na->na_arena, (na->na_channels == 0));
687
688 /* delete the csm */
689 if (ch->ch_schema != NULL) {
690 skmem_cache_free(
691 skmem_arena_nexus(ar: na->na_arena)->arn_schema_cache,
692 ch->ch_schema);
693 ch->ch_schema = NULL;
694 ch->ch_schema_offset = (mach_vm_offset_t)-1;
695 }
696
697 /* destroy the memory map */
698 skmem_arena_munmap_channel(na->na_arena, ch);
699
700 /* mark the channel as unbound */
701 os_atomic_andnot(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE), relaxed);
702 ch->ch_na = NULL;
703
704 /* and finally release the nexus adapter; this might free it */
705 (void) na_release_locked(na);
706}
707
708static void
709na_teardown(struct nexus_adapter *na, struct kern_channel *ch,
710 boolean_t defunct)
711{
712 SK_LOCK_ASSERT_HELD();
713 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
714
715#if CONFIG_NEXUS_MONITOR
716 /*
717 * Walk through all the rings and tell any monitor
718 * that the port is going to exit Skywalk mode
719 */
720 nx_mon_stop(na);
721#endif /* CONFIG_NEXUS_MONITOR */
722
723 /*
724 * Deactive the adapter.
725 */
726 (void) na->na_activate(na,
727 (defunct ? NA_ACTIVATE_MODE_DEFUNCT : NA_ACTIVATE_MODE_OFF));
728
729 /*
730 * Free any remaining allocated packets for this process.
731 */
732 if (ch->ch_pp != NULL) {
733 ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL);
734 pp_purge_upp(ch->ch_pp, ch->ch_pid);
735 if (!defunct) {
736 pp_release(ch->ch_pp);
737 ch->ch_pp = NULL;
738 }
739 }
740
741 /*
742 * Delete rings and buffers.
743 */
744 na->na_krings_delete(na, ch, defunct);
745}
746
747/* call with SK_LOCK held */
748/*
749 * Allocate the per-fd structure __user_channel_schema.
750 */
751static int
752na_schema_alloc(struct kern_channel *ch)
753{
754 struct nexus_adapter *na = ch->ch_na;
755 struct skmem_arena *ar = na->na_arena;
756 struct skmem_arena_nexus *arn;
757 mach_vm_offset_t roff[SKMEM_REGIONS];
758 struct __kern_channel_ring *kr;
759 struct __user_channel_schema *csm;
760 struct skmem_obj_info csm_oi, ring_oi, ksd_oi, usd_oi;
761 mach_vm_offset_t base;
762 uint32_t i, j, k, n[NR_ALL];
763 enum txrx t;
764
765 /* see comments for struct __user_channel_schema */
766 _CASSERT(offsetof(struct __user_channel_schema, csm_ver) == 0);
767 _CASSERT(offsetof(struct __user_channel_schema, csm_flags) ==
768 sizeof(csm->csm_ver));
769 _CASSERT(offsetof(struct __user_channel_schema, csm_kern_name) ==
770 sizeof(csm->csm_ver) + sizeof(csm->csm_flags));
771 _CASSERT(offsetof(struct __user_channel_schema, csm_kern_uuid) ==
772 sizeof(csm->csm_ver) + sizeof(csm->csm_flags) +
773 sizeof(csm->csm_kern_name));
774
775 SK_LOCK_ASSERT_HELD();
776
777 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
778 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
779 arn = skmem_arena_nexus(ar);
780 ASSERT(arn != NULL);
781 for_all_rings(t) {
782 n[t] = 0;
783 }
784
785 csm = skmem_cache_alloc(arn->arn_schema_cache, SKMEM_NOSLEEP);
786 if (csm == NULL) {
787 return ENOMEM;
788 }
789
790 skmem_cache_get_obj_info(arn->arn_schema_cache, csm, &csm_oi, NULL);
791 bzero(s: csm, SKMEM_OBJ_SIZE(&csm_oi));
792
793 *(uint32_t *)(uintptr_t)&csm->csm_ver = CSM_CURRENT_VERSION;
794
795 /* kernel version and executable UUID */
796 _CASSERT(sizeof(csm->csm_kern_name) == _SYS_NAMELEN);
797 (void) strncpy((char *)(uintptr_t)csm->csm_kern_name,
798 version, sizeof(csm->csm_kern_name) - 1);
799#if !XNU_TARGET_OS_OSX
800 (void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid,
801 kernelcache_uuid, sizeof(csm->csm_kern_uuid));
802#else /* XNU_TARGET_OS_OSX */
803 if (kernel_uuid != NULL) {
804 (void) memcpy(dst: (void *)(uintptr_t)csm->csm_kern_uuid,
805 src: kernel_uuid, n: sizeof(csm->csm_kern_uuid));
806 }
807#endif /* XNU_TARGET_OS_OSX */
808
809 for_rx_tx(t) {
810 ASSERT((ch->ch_last[t] > 0) || (ch->ch_first[t] == 0));
811 n[t] = ch->ch_last[t] - ch->ch_first[t];
812 ASSERT(n[t] == 0 || n[t] <= na_get_nrings(na, t));
813 }
814
815 /* return total number of tx and rx rings for this channel */
816 *(uint32_t *)(uintptr_t)&csm->csm_tx_rings = n[NR_TX];
817 *(uint32_t *)(uintptr_t)&csm->csm_rx_rings = n[NR_RX];
818
819 if (ch->ch_flags & CHANF_USER_PACKET_POOL) {
820 *(uint32_t *)(uintptr_t)&csm->csm_allocator_ring_pairs =
821 na->na_num_allocator_ring_pairs;
822 n[NR_A] = n[NR_F] = na->na_num_allocator_ring_pairs;
823 ASSERT(n[NR_A] != 0 && n[NR_A] <= na_get_nrings(na, NR_A));
824 ASSERT(n[NR_A] == (ch->ch_last[NR_A] - ch->ch_first[NR_A]));
825 ASSERT(n[NR_F] == (ch->ch_last[NR_F] - ch->ch_first[NR_F]));
826
827 n[NR_LBA] = na->na_num_large_buf_alloc_rings;
828 if (n[NR_LBA] != 0) {
829 *(uint32_t *)(uintptr_t)&csm->csm_large_buf_alloc_rings = n[NR_LBA];
830 ASSERT(n[NR_LBA] == (ch->ch_last[NR_LBA] - ch->ch_first[NR_LBA]));
831 }
832 }
833
834 if (ch->ch_flags & CHANF_EVENT_RING) {
835 n[NR_EV] = ch->ch_last[NR_EV] - ch->ch_first[NR_EV];
836 ASSERT(n[NR_EV] != 0 && n[NR_EV] <= na_get_nrings(na, NR_EV));
837 *(uint32_t *)(uintptr_t)&csm->csm_num_event_rings = n[NR_EV];
838 }
839
840 bzero(s: &roff, n: sizeof(roff));
841 for (i = 0; i < SKMEM_REGIONS; i++) {
842 if (ar->ar_regions[i] == NULL) {
843 ASSERT(i == SKMEM_REGION_GUARD_HEAD ||
844 i == SKMEM_REGION_SCHEMA ||
845 i == SKMEM_REGION_BUF_LARGE ||
846 i == SKMEM_REGION_RXBUF_DEF ||
847 i == SKMEM_REGION_RXBUF_LARGE ||
848 i == SKMEM_REGION_TXBUF_DEF ||
849 i == SKMEM_REGION_TXBUF_LARGE ||
850 i == SKMEM_REGION_RXKMD ||
851 i == SKMEM_REGION_TXKMD ||
852 i == SKMEM_REGION_UMD ||
853 i == SKMEM_REGION_UBFT ||
854 i == SKMEM_REGION_KBFT ||
855 i == SKMEM_REGION_RXKBFT ||
856 i == SKMEM_REGION_TXKBFT ||
857 i == SKMEM_REGION_TXAUSD ||
858 i == SKMEM_REGION_RXFUSD ||
859 i == SKMEM_REGION_USTATS ||
860 i == SKMEM_REGION_KSTATS ||
861 i == SKMEM_REGION_INTRINSIC ||
862 i == SKMEM_REGION_FLOWADV ||
863 i == SKMEM_REGION_NEXUSADV ||
864 i == SKMEM_REGION_SYSCTLS ||
865 i == SKMEM_REGION_GUARD_TAIL);
866 continue;
867 }
868
869 /* not for nexus */
870 ASSERT(i != SKMEM_REGION_SYSCTLS);
871
872 /*
873 * Get region offsets from base of mmap span; the arena
874 * doesn't need to be mmap'd at this point, since we
875 * simply compute the relative offset.
876 */
877 roff[i] = skmem_arena_get_region_offset(ar, i);
878 }
879
880 /*
881 * The schema is made up of the descriptor followed inline by an array
882 * of offsets to the tx, rx, allocator and event rings in the mmap span.
883 * They contain the offset between the ring and schema, so the
884 * information is usable in userspace to reach the ring from
885 * the schema.
886 */
887 base = roff[SKMEM_REGION_SCHEMA] + SKMEM_OBJ_ROFF(&csm_oi);
888
889 /* initialize schema with tx ring info */
890 for (i = 0, j = ch->ch_first[NR_TX]; i < n[NR_TX]; i++, j++) {
891 kr = &na->na_tx_rings[j];
892 if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
893 continue;
894 }
895
896 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
897 skmem_cache_get_obj_info(arn->arn_ring_cache,
898 kr->ckr_ring, &ring_oi, NULL);
899 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].ring_off =
900 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
901
902 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
903 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
904 kr->ckr_ksds, &ksd_oi, &usd_oi);
905
906 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].sd_off =
907 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
908 base;
909 }
910 /* initialize schema with rx ring info */
911 for (i = 0, j = ch->ch_first[NR_RX]; i < n[NR_RX]; i++, j++) {
912 kr = &na->na_rx_rings[j];
913 if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */
914 continue;
915 }
916
917 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
918 skmem_cache_get_obj_info(arn->arn_ring_cache,
919 kr->ckr_ring, &ring_oi, NULL);
920 *(mach_vm_offset_t *)
921 (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].ring_off =
922 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
923
924 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
925 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
926 kr->ckr_ksds, &ksd_oi, &usd_oi);
927
928 *(mach_vm_offset_t *)
929 (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].sd_off =
930 (roff[SKMEM_REGION_RXFUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
931 base;
932 }
933 /* initialize schema with allocator ring info */
934 for (i = 0, j = ch->ch_first[NR_A], k = n[NR_TX] + n[NR_RX];
935 i < n[NR_A]; i++, j++) {
936 mach_vm_offset_t usd_roff;
937
938 usd_roff = roff[SKMEM_REGION_TXAUSD];
939 kr = &na->na_alloc_rings[j];
940 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
941 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
942
943 skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
944 &ring_oi, NULL);
945 *(mach_vm_offset_t *)
946 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
947 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
948
949 skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
950 &ksd_oi, &usd_oi);
951 *(mach_vm_offset_t *)
952 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
953 (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
954 }
955 /* initialize schema with free ring info */
956 for (i = 0, j = ch->ch_first[NR_F], k = n[NR_TX] + n[NR_RX] + n[NR_A];
957 i < n[NR_F]; i++, j++) {
958 mach_vm_offset_t usd_roff;
959
960 usd_roff = roff[SKMEM_REGION_RXFUSD];
961 kr = &na->na_free_rings[j];
962 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
963 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
964
965 skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring,
966 &ring_oi, NULL);
967 *(mach_vm_offset_t *)
968 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
969 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
970
971 skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds,
972 &ksd_oi, &usd_oi);
973 *(mach_vm_offset_t *)
974 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
975 (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base;
976 }
977 /* initialize schema with event ring info */
978 for (i = 0, j = ch->ch_first[NR_EV], k = n[NR_TX] + n[NR_RX] +
979 n[NR_A] + n[NR_F]; i < n[NR_EV]; i++, j++) {
980 ASSERT(csm->csm_num_event_rings != 0);
981 kr = &na->na_event_rings[j];
982 ASSERT(!KR_KERNEL_ONLY(kr));
983 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
984 skmem_cache_get_obj_info(arn->arn_ring_cache,
985 kr->ckr_ring, &ring_oi, NULL);
986 *(mach_vm_offset_t *)
987 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
988 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
989
990 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
991 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
992 kr->ckr_ksds, &ksd_oi, &usd_oi);
993
994 *(mach_vm_offset_t *)
995 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
996 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
997 base;
998 }
999 /* initialize schema with large buf alloc ring info */
1000 for (i = 0, j = ch->ch_first[NR_LBA], k = n[NR_TX] + n[NR_RX] +
1001 n[NR_A] + n[NR_F] + n[NR_EV]; i < n[NR_LBA]; i++, j++) {
1002 ASSERT(csm->csm_large_buf_alloc_rings != 0);
1003 kr = &na->na_large_buf_alloc_rings[j];
1004 ASSERT(!KR_KERNEL_ONLY(kr));
1005 ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED);
1006 skmem_cache_get_obj_info(arn->arn_ring_cache,
1007 kr->ckr_ring, &ring_oi, NULL);
1008 *(mach_vm_offset_t *)
1009 (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off =
1010 (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base;
1011
1012 ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED);
1013 skmem_cache_get_obj_info(kr->ckr_ksds_cache,
1014 kr->ckr_ksds, &ksd_oi, &usd_oi);
1015
1016 *(mach_vm_offset_t *)
1017 (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off =
1018 (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) -
1019 base;
1020 }
1021
1022 *(uint64_t *)(uintptr_t)&csm->csm_md_redzone_cookie =
1023 __ch_umd_redzone_cookie;
1024 *(nexus_meta_type_t *)(uintptr_t)&csm->csm_md_type = na->na_md_type;
1025 *(nexus_meta_subtype_t *)(uintptr_t)&csm->csm_md_subtype =
1026 na->na_md_subtype;
1027
1028 if (arn->arn_stats_obj != NULL) {
1029 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL);
1030 ASSERT(roff[SKMEM_REGION_USTATS] != 0);
1031 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs =
1032 roff[SKMEM_REGION_USTATS];
1033 *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1034 na->na_stats_type;
1035 } else {
1036 ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL);
1037 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs = 0;
1038 *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type =
1039 NEXUS_STATS_TYPE_INVALID;
1040 }
1041
1042 if (arn->arn_flowadv_obj != NULL) {
1043 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL);
1044 ASSERT(roff[SKMEM_REGION_FLOWADV] != 0);
1045 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs =
1046 roff[SKMEM_REGION_FLOWADV];
1047 *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max =
1048 na->na_flowadv_max;
1049 } else {
1050 ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL);
1051 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs = 0;
1052 *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max = 0;
1053 }
1054
1055 if (arn->arn_nexusadv_obj != NULL) {
1056 struct __kern_nexus_adv_metadata *adv_md;
1057
1058 adv_md = arn->arn_nexusadv_obj;
1059 ASSERT(adv_md->knam_version == NX_ADVISORY_MD_CURRENT_VERSION);
1060 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] != NULL);
1061 ASSERT(roff[SKMEM_REGION_NEXUSADV] != 0);
1062 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs =
1063 roff[SKMEM_REGION_NEXUSADV];
1064 } else {
1065 ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL);
1066 *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs = 0;
1067 }
1068
1069 ch->ch_schema = csm;
1070 ch->ch_schema_offset = base;
1071
1072 return 0;
1073}
1074
1075/*
1076 * Called by all routines that create nexus_adapters.
1077 * Attach na to the ifp (if any) and provide defaults
1078 * for optional callbacks. Defaults assume that we
1079 * are creating an hardware nexus_adapter.
1080 */
1081void
1082na_attach_common(struct nexus_adapter *na, struct kern_nexus *nx,
1083 struct kern_nexus_domain_provider *nxdom_prov)
1084{
1085 SK_LOCK_ASSERT_HELD();
1086
1087 ASSERT(nx != NULL);
1088 ASSERT(nxdom_prov != NULL);
1089 ASSERT(na->na_krings_create != NULL);
1090 ASSERT(na->na_krings_delete != NULL);
1091 if (na->na_type != NA_NETIF_COMPAT_DEV) {
1092 ASSERT(na_get_nrings(na, NR_TX) != 0);
1093 }
1094 if (na->na_type != NA_NETIF_COMPAT_HOST) {
1095 ASSERT(na_get_nrings(na, NR_RX) != 0);
1096 }
1097 ASSERT(na->na_channels == 0);
1098
1099 if (na->na_notify == NULL) {
1100 na->na_notify = na_notify;
1101 }
1102
1103 na->na_nx = nx;
1104 na->na_nxdom_prov = nxdom_prov;
1105
1106 SK_D("na 0x%llx nx 0x%llx nxtype %u ar 0x%llx",
1107 SK_KVA(na), SK_KVA(nx), nxdom_prov->nxdom_prov_dom->nxdom_type,
1108 SK_KVA(na->na_arena));
1109}
1110
1111void
1112na_post_event(struct __kern_channel_ring *kring, boolean_t nodelay,
1113 boolean_t within_kevent, boolean_t selwake, uint32_t hint)
1114{
1115 struct nexus_adapter *na = KRNA(kring);
1116 enum txrx t = kring->ckr_tx;
1117
1118 SK_DF(SK_VERB_EVENTS,
1119 "%s(%d) na \"%s\" (0x%llx) kr 0x%llx kev %u sel %u hint 0x%b",
1120 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
1121 na->na_name, SK_KVA(na), SK_KVA(kring), within_kevent, selwake,
1122 hint, CHAN_FILT_HINT_BITS);
1123
1124 csi_selwakeup_one(kring, nodelay, within_kevent, selwake, hint);
1125 /*
1126 * optimization: avoid a wake up on the global
1127 * queue if nobody has registered for more
1128 * than one ring
1129 */
1130 if (na->na_si_users[t] > 0) {
1131 csi_selwakeup_all(na, t, nodelay, within_kevent, selwake, hint);
1132 }
1133}
1134
1135/* default notify callback */
1136static int
1137na_notify(struct __kern_channel_ring *kring, struct proc *p, uint32_t flags)
1138{
1139#pragma unused(p)
1140 SK_DF(SK_VERB_NOTIFY | ((kring->ckr_tx == NR_TX) ?
1141 SK_VERB_TX : SK_VERB_RX),
1142 "%s(%d) [%s] na \"%s\" (0x%llx) kr \"%s\" (0x%llx) krflags 0x%b "
1143 "flags 0x%x, kh %u kt %u | h %u t %u",
1144 sk_proc_name_address(p), sk_proc_pid(p),
1145 (kring->ckr_tx == NR_TX) ? "W" : "R", KRNA(kring)->na_name,
1146 SK_KVA(KRNA(kring)), kring->ckr_name, SK_KVA(kring),
1147 kring->ckr_flags, CKRF_BITS, flags, kring->ckr_khead,
1148 kring->ckr_ktail, kring->ckr_rhead, kring->ckr_rtail);
1149
1150 na_post_event(kring, nodelay: (flags & NA_NOTEF_PUSH),
1151 within_kevent: (flags & NA_NOTEF_IN_KEVENT), TRUE, hint: 0);
1152
1153 return 0;
1154}
1155
1156/*
1157 * Fetch configuration from the device, to cope with dynamic
1158 * reconfigurations after loading the module.
1159 */
1160/* call with SK_LOCK held */
1161int
1162na_update_config(struct nexus_adapter *na)
1163{
1164 uint32_t txr, txd, rxr, rxd;
1165
1166 SK_LOCK_ASSERT_HELD();
1167
1168 txr = txd = rxr = rxd = 0;
1169 if (na->na_config == NULL ||
1170 na->na_config(na, &txr, &txd, &rxr, &rxd)) {
1171 /* take whatever we had at init time */
1172 txr = na_get_nrings(na, t: NR_TX);
1173 txd = na_get_nslots(na, t: NR_TX);
1174 rxr = na_get_nrings(na, t: NR_RX);
1175 rxd = na_get_nslots(na, t: NR_RX);
1176 }
1177
1178 if (na_get_nrings(na, t: NR_TX) == txr &&
1179 na_get_nslots(na, t: NR_TX) == txd &&
1180 na_get_nrings(na, t: NR_RX) == rxr &&
1181 na_get_nslots(na, t: NR_RX) == rxd) {
1182 return 0; /* nothing changed */
1183 }
1184 SK_D("stored config %s: txring %u x %u, rxring %u x %u",
1185 na->na_name, na_get_nrings(na, NR_TX), na_get_nslots(na, NR_TX),
1186 na_get_nrings(na, NR_RX), na_get_nslots(na, NR_RX));
1187 SK_D("new config %s: txring %u x %u, rxring %u x %u",
1188 na->na_name, txr, txd, rxr, rxd);
1189
1190 if (na->na_channels == 0) {
1191 SK_D("configuration changed (but fine)");
1192 na_set_nrings(na, t: NR_TX, v: txr);
1193 na_set_nslots(na, t: NR_TX, v: txd);
1194 na_set_nrings(na, t: NR_RX, v: rxr);
1195 na_set_nslots(na, t: NR_RX, v: rxd);
1196 return 0;
1197 }
1198 SK_ERR("configuration changed while active, this is bad...");
1199 return 1;
1200}
1201
1202static void
1203na_kr_setup_netif_svc_map(struct nexus_adapter *na)
1204{
1205 uint32_t i;
1206 uint32_t num_tx_rings;
1207
1208 ASSERT(na->na_type == NA_NETIF_DEV);
1209 num_tx_rings = na_get_nrings(na, t: NR_TX);
1210
1211 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK_SYS) ==
1212 NAKR_WMM_SC2RINGID(KPKT_SC_BK));
1213 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1214 NAKR_WMM_SC2RINGID(KPKT_SC_RD));
1215 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) ==
1216 NAKR_WMM_SC2RINGID(KPKT_SC_OAM));
1217 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1218 NAKR_WMM_SC2RINGID(KPKT_SC_RV));
1219 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) ==
1220 NAKR_WMM_SC2RINGID(KPKT_SC_VI));
1221 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) ==
1222 NAKR_WMM_SC2RINGID(KPKT_SC_CTL));
1223
1224 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK) < NA_NUM_WMM_CLASSES);
1225 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) < NA_NUM_WMM_CLASSES);
1226 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VI) < NA_NUM_WMM_CLASSES);
1227 _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) < NA_NUM_WMM_CLASSES);
1228
1229 _CASSERT(MBUF_SCIDX(KPKT_SC_BK_SYS) < KPKT_SC_MAX_CLASSES);
1230 _CASSERT(MBUF_SCIDX(KPKT_SC_BK) < KPKT_SC_MAX_CLASSES);
1231 _CASSERT(MBUF_SCIDX(KPKT_SC_BE) < KPKT_SC_MAX_CLASSES);
1232 _CASSERT(MBUF_SCIDX(KPKT_SC_RD) < KPKT_SC_MAX_CLASSES);
1233 _CASSERT(MBUF_SCIDX(KPKT_SC_OAM) < KPKT_SC_MAX_CLASSES);
1234 _CASSERT(MBUF_SCIDX(KPKT_SC_AV) < KPKT_SC_MAX_CLASSES);
1235 _CASSERT(MBUF_SCIDX(KPKT_SC_RV) < KPKT_SC_MAX_CLASSES);
1236 _CASSERT(MBUF_SCIDX(KPKT_SC_VI) < KPKT_SC_MAX_CLASSES);
1237 _CASSERT(MBUF_SCIDX(KPKT_SC_SIG) < KPKT_SC_MAX_CLASSES);
1238 _CASSERT(MBUF_SCIDX(KPKT_SC_VO) < KPKT_SC_MAX_CLASSES);
1239 _CASSERT(MBUF_SCIDX(KPKT_SC_CTL) < KPKT_SC_MAX_CLASSES);
1240
1241 /*
1242 * we support the following 2 configurations:
1243 * 1. packets from all 10 service class map to one ring.
1244 * 2. a 10:4 mapping between service classes and the rings. These 4
1245 * rings map to the 4 WMM access categories.
1246 */
1247 if (na->na_nx->nx_prov->nxprov_params->nxp_qmap == NEXUS_QMAP_TYPE_WMM) {
1248 ASSERT(num_tx_rings == NEXUS_NUM_WMM_QUEUES);
1249 /* setup the adapter's service class LUT */
1250 NAKR_SET_SVC_LUT(na, KPKT_SC_BK_SYS);
1251 NAKR_SET_SVC_LUT(na, KPKT_SC_BK);
1252 NAKR_SET_SVC_LUT(na, KPKT_SC_BE);
1253 NAKR_SET_SVC_LUT(na, KPKT_SC_RD);
1254 NAKR_SET_SVC_LUT(na, KPKT_SC_OAM);
1255 NAKR_SET_SVC_LUT(na, KPKT_SC_AV);
1256 NAKR_SET_SVC_LUT(na, KPKT_SC_RV);
1257 NAKR_SET_SVC_LUT(na, KPKT_SC_VI);
1258 NAKR_SET_SVC_LUT(na, KPKT_SC_SIG);
1259 NAKR_SET_SVC_LUT(na, KPKT_SC_VO);
1260 NAKR_SET_SVC_LUT(na, KPKT_SC_CTL);
1261
1262 /* Initialize the service class for each of the 4 ring */
1263 NAKR_SET_KR_SVC(na, KPKT_SC_BK);
1264 NAKR_SET_KR_SVC(na, KPKT_SC_BE);
1265 NAKR_SET_KR_SVC(na, KPKT_SC_VI);
1266 NAKR_SET_KR_SVC(na, KPKT_SC_VO);
1267 } else {
1268 ASSERT(na->na_nx->nx_prov->nxprov_params->nxp_qmap ==
1269 NEXUS_QMAP_TYPE_DEFAULT);
1270 /* 10: 1 mapping */
1271 for (i = 0; i < KPKT_SC_MAX_CLASSES; i++) {
1272 na->na_kring_svc_lut[i] = 0;
1273 }
1274 for (i = 0; i < num_tx_rings; i++) {
1275 NAKR(na, t: NR_TX)[i].ckr_svc = KPKT_SC_UNSPEC;
1276 }
1277 }
1278}
1279
1280static LCK_GRP_DECLARE(channel_txq_lock_group, "sk_ch_txq_lock");
1281static LCK_GRP_DECLARE(channel_rxq_lock_group, "sk_ch_rxq_lock");
1282static LCK_GRP_DECLARE(channel_txs_lock_group, "sk_ch_txs_lock");
1283static LCK_GRP_DECLARE(channel_rxs_lock_group, "sk_ch_rxs_lock");
1284static LCK_GRP_DECLARE(channel_alloc_lock_group, "sk_ch_alloc_lock");
1285static LCK_GRP_DECLARE(channel_evq_lock_group, "sk_ch_evq_lock");
1286static LCK_GRP_DECLARE(channel_evs_lock_group, "sk_ch_evs_lock");
1287
1288static lck_grp_t *
1289na_kr_q_lck_grp(enum txrx t)
1290{
1291 switch (t) {
1292 case NR_TX:
1293 return &channel_txq_lock_group;
1294 case NR_RX:
1295 return &channel_rxq_lock_group;
1296 case NR_A:
1297 case NR_F:
1298 case NR_LBA:
1299 return &channel_alloc_lock_group;
1300 case NR_EV:
1301 return &channel_evq_lock_group;
1302 default:
1303 VERIFY(0);
1304 /* NOTREACHED */
1305 __builtin_unreachable();
1306 }
1307}
1308
1309static lck_grp_t *
1310na_kr_s_lck_grp(enum txrx t)
1311{
1312 switch (t) {
1313 case NR_TX:
1314 return &channel_txs_lock_group;
1315 case NR_RX:
1316 return &channel_rxs_lock_group;
1317 case NR_A:
1318 case NR_F:
1319 case NR_LBA:
1320 return &channel_alloc_lock_group;
1321 case NR_EV:
1322 return &channel_evs_lock_group;
1323 default:
1324 VERIFY(0);
1325 /* NOTREACHED */
1326 __builtin_unreachable();
1327 }
1328}
1329
1330static void
1331kr_init_tbr(struct __kern_channel_ring *r)
1332{
1333 r->ckr_tbr_depth = CKR_TBR_TOKEN_INVALID;
1334 r->ckr_tbr_token = CKR_TBR_TOKEN_INVALID;
1335 r->ckr_tbr_last = 0;
1336}
1337
1338struct kern_pbufpool *
1339na_kr_get_pp(struct nexus_adapter *na, enum txrx t)
1340{
1341 struct kern_pbufpool *pp = NULL;
1342 switch (t) {
1343 case NR_RX:
1344 case NR_F:
1345 case NR_EV:
1346 pp = skmem_arena_nexus(ar: na->na_arena)->arn_rx_pp;
1347 break;
1348 case NR_TX:
1349 case NR_A:
1350 case NR_LBA:
1351 pp = skmem_arena_nexus(ar: na->na_arena)->arn_tx_pp;
1352 break;
1353 default:
1354 VERIFY(0);
1355 /* NOTREACHED */
1356 __builtin_unreachable();
1357 }
1358
1359 return pp;
1360}
1361
1362/*
1363 * Create the krings array and initialize the fields common to all adapters.
1364 * The array layout is this:
1365 *
1366 * +----------+
1367 * na->na_tx_rings -----> | | \
1368 * | | } na->na_num_tx_rings
1369 * | | /
1370 * na->na_rx_rings ----> +----------+
1371 * | | \
1372 * | | } na->na_num_rx_rings
1373 * | | /
1374 * na->na_alloc_rings -> +----------+
1375 * | | \
1376 * na->na_free_rings --> +----------+ } na->na_num_allocator_ring_pairs
1377 * | | /
1378 * na->na_event_rings -> +----------+
1379 * | | \
1380 * | | } na->na_num_event_rings
1381 * | | /
1382 * na->na_large_buf_alloc_rings -> +----------+
1383 * | | \
1384 * | | } na->na_num_large_buf_alloc_rings
1385 * | | /
1386 * na->na_tail -----> +----------+
1387 */
1388/* call with SK_LOCK held */
1389static int
1390na_kr_create(struct nexus_adapter *na, boolean_t alloc_ctx)
1391{
1392 lck_grp_t *q_lck_grp, *s_lck_grp;
1393 uint32_t i, count, ndesc;
1394 struct kern_pbufpool *pp = NULL;
1395 struct __kern_channel_ring *kring;
1396 uint32_t n[NR_ALL];
1397 int c, tot_slots, err = 0;
1398 enum txrx t;
1399
1400 SK_LOCK_ASSERT_HELD();
1401
1402 n[NR_TX] = na_get_nrings(na, t: NR_TX);
1403 n[NR_RX] = na_get_nrings(na, t: NR_RX);
1404 n[NR_A] = na_get_nrings(na, t: NR_A);
1405 n[NR_F] = na_get_nrings(na, t: NR_F);
1406 n[NR_EV] = na_get_nrings(na, t: NR_EV);
1407 n[NR_LBA] = na_get_nrings(na, t: NR_LBA);
1408
1409 count = n[NR_TX] + n[NR_RX] + n[NR_A] + n[NR_F] + n[NR_EV] + n[NR_LBA];
1410
1411 na->na_tx_rings = sk_alloc_type_array(struct __kern_channel_ring, count,
1412 Z_WAITOK, skmem_tag_nx_rings);
1413 if (__improbable(na->na_tx_rings == NULL)) {
1414 SK_ERR("Cannot allocate krings");
1415 err = ENOMEM;
1416 goto error;
1417 }
1418
1419 na->na_rx_rings = na->na_tx_rings + n[NR_TX];
1420 if (n[NR_A] != 0) {
1421 na->na_alloc_rings = na->na_rx_rings + n[NR_RX];
1422 na->na_free_rings = na->na_alloc_rings + n[NR_A];
1423 } else {
1424 na->na_alloc_rings = na->na_free_rings = NULL;
1425 }
1426 if (n[NR_EV] != 0) {
1427 if (na->na_free_rings != NULL) {
1428 na->na_event_rings = na->na_free_rings + n[NR_F];
1429 } else {
1430 na->na_event_rings = na->na_rx_rings + n[NR_RX];
1431 }
1432 }
1433 if (n[NR_LBA] != 0) {
1434 ASSERT(n[NR_A] != 0);
1435 if (na->na_event_rings != NULL) {
1436 na->na_large_buf_alloc_rings = na->na_event_rings + n[NR_EV];
1437 } else {
1438 /* alloc/free rings must also be present */
1439 ASSERT(na->na_free_rings != NULL);
1440 na->na_large_buf_alloc_rings = na->na_free_rings + n[NR_F];
1441 }
1442 }
1443
1444 /* total number of slots for TX/RX adapter rings */
1445 c = tot_slots = (n[NR_TX] * na_get_nslots(na, t: NR_TX)) +
1446 (n[NR_RX] * na_get_nslots(na, t: NR_RX));
1447
1448 /* for scratch space on alloc and free rings */
1449 if (n[NR_A] != 0) {
1450 tot_slots += n[NR_A] * na_get_nslots(na, t: NR_A);
1451 tot_slots += n[NR_F] * na_get_nslots(na, t: NR_F);
1452 tot_slots += n[NR_LBA] * na_get_nslots(na, t: NR_LBA);
1453 c = tot_slots;
1454 }
1455 na->na_total_slots = tot_slots;
1456
1457 /* slot context (optional) for all TX/RX ring slots of this adapter */
1458 if (alloc_ctx) {
1459 na->na_slot_ctxs =
1460 skn_alloc_type_array(slot_ctxs, struct slot_ctx,
1461 na->na_total_slots, Z_WAITOK, skmem_tag_nx_contexts);
1462 if (na->na_slot_ctxs == NULL) {
1463 SK_ERR("Cannot allocate slot contexts");
1464 err = ENOMEM;
1465 goto error;
1466 }
1467 os_atomic_or(&na->na_flags, NAF_SLOT_CONTEXT, relaxed);
1468 }
1469
1470 /*
1471 * packet handle array storage for all TX/RX ring slots of this
1472 * adapter.
1473 */
1474 na->na_scratch = skn_alloc_type_array(scratch, kern_packet_t,
1475 na->na_total_slots, Z_WAITOK, skmem_tag_nx_scratch);
1476 if (na->na_scratch == NULL) {
1477 SK_ERR("Cannot allocate slot contexts");
1478 err = ENOMEM;
1479 goto error;
1480 }
1481
1482 /*
1483 * All fields in krings are 0 except the one initialized below.
1484 * but better be explicit on important kring fields.
1485 */
1486 for_all_rings(t) {
1487 ndesc = na_get_nslots(na, t);
1488 pp = na_kr_get_pp(na, t);
1489 for (i = 0; i < n[t]; i++) {
1490 kring = &NAKR(na, t)[i];
1491 bzero(s: kring, n: sizeof(*kring));
1492 kring->ckr_na = na;
1493 kring->ckr_pp = pp;
1494 kring->ckr_max_pkt_len =
1495 (t == NR_LBA ? PP_BUF_SIZE_LARGE(pp) :
1496 PP_BUF_SIZE_DEF(pp)) *
1497 pp->pp_max_frags;
1498 kring->ckr_ring_id = i;
1499 kring->ckr_tx = t;
1500 kr_init_to_mhints(kring, ndesc);
1501 kr_init_tbr(r: kring);
1502 if (NA_KERNEL_ONLY(na)) {
1503 kring->ckr_flags |= CKRF_KERNEL_ONLY;
1504 }
1505 if (na->na_flags & NAF_HOST_ONLY) {
1506 kring->ckr_flags |= CKRF_HOST;
1507 }
1508 ASSERT((t >= NR_TXRX) || (c > 0));
1509 if ((t < NR_TXRX) &&
1510 (na->na_flags & NAF_SLOT_CONTEXT)) {
1511 ASSERT(na->na_slot_ctxs != NULL);
1512 kring->ckr_flags |= CKRF_SLOT_CONTEXT;
1513 kring->ckr_slot_ctxs =
1514 na->na_slot_ctxs + (tot_slots - c);
1515 }
1516 ASSERT(na->na_scratch != NULL);
1517 if (t < NR_TXRXAF || t == NR_LBA) {
1518 kring->ckr_scratch =
1519 na->na_scratch + (tot_slots - c);
1520 }
1521 if (t < NR_TXRXAF || t == NR_LBA) {
1522 c -= ndesc;
1523 }
1524 switch (t) {
1525 case NR_A:
1526 if (i == 0) {
1527 kring->ckr_na_sync =
1528 na_packet_pool_alloc_sync;
1529 kring->ckr_alloc_ws =
1530 na_upp_alloc_lowat;
1531 } else {
1532 ASSERT(i == 1);
1533 kring->ckr_na_sync =
1534 na_packet_pool_alloc_buf_sync;
1535 kring->ckr_alloc_ws =
1536 na_upp_alloc_buf_lowat;
1537 }
1538 break;
1539 case NR_F:
1540 if (i == 0) {
1541 kring->ckr_na_sync =
1542 na_packet_pool_free_sync;
1543 } else {
1544 ASSERT(i == 1);
1545 kring->ckr_na_sync =
1546 na_packet_pool_free_buf_sync;
1547 }
1548 break;
1549 case NR_TX:
1550 kring->ckr_na_sync = na->na_txsync;
1551 if (na->na_flags & NAF_TX_MITIGATION) {
1552 kring->ckr_flags |= CKRF_MITIGATION;
1553 }
1554 switch (na->na_type) {
1555#if CONFIG_NEXUS_USER_PIPE
1556 case NA_USER_PIPE:
1557 ASSERT(!(na->na_flags &
1558 NAF_USER_PKT_POOL));
1559 kring->ckr_prologue = kr_txprologue;
1560 kring->ckr_finalize = NULL;
1561 break;
1562#endif /* CONFIG_NEXUS_USER_PIPE */
1563#if CONFIG_NEXUS_MONITOR
1564 case NA_MONITOR:
1565 ASSERT(!(na->na_flags &
1566 NAF_USER_PKT_POOL));
1567 kring->ckr_prologue = kr_txprologue;
1568 kring->ckr_finalize = NULL;
1569 break;
1570#endif /* CONFIG_NEXUS_MONITOR */
1571 default:
1572 if (na->na_flags & NAF_USER_PKT_POOL) {
1573 kring->ckr_prologue =
1574 kr_txprologue_upp;
1575 kring->ckr_finalize =
1576 kr_txfinalize_upp;
1577 } else {
1578 kring->ckr_prologue =
1579 kr_txprologue;
1580 kring->ckr_finalize =
1581 kr_txfinalize;
1582 }
1583 break;
1584 }
1585 break;
1586 case NR_RX:
1587 kring->ckr_na_sync = na->na_rxsync;
1588 if (na->na_flags & NAF_RX_MITIGATION) {
1589 kring->ckr_flags |= CKRF_MITIGATION;
1590 }
1591 switch (na->na_type) {
1592#if CONFIG_NEXUS_USER_PIPE
1593 case NA_USER_PIPE:
1594 ASSERT(!(na->na_flags &
1595 NAF_USER_PKT_POOL));
1596 kring->ckr_prologue =
1597 kr_rxprologue_nodetach;
1598 kring->ckr_finalize = kr_rxfinalize;
1599 break;
1600#endif /* CONFIG_NEXUS_USER_PIPE */
1601#if CONFIG_NEXUS_MONITOR
1602 case NA_MONITOR:
1603 ASSERT(!(na->na_flags &
1604 NAF_USER_PKT_POOL));
1605 kring->ckr_prologue =
1606 kr_rxprologue_nodetach;
1607 kring->ckr_finalize = kr_rxfinalize;
1608 break;
1609#endif /* CONFIG_NEXUS_MONITOR */
1610 default:
1611 if (na->na_flags & NAF_USER_PKT_POOL) {
1612 kring->ckr_prologue =
1613 kr_rxprologue_upp;
1614 kring->ckr_finalize =
1615 kr_rxfinalize_upp;
1616 } else {
1617 kring->ckr_prologue =
1618 kr_rxprologue;
1619 kring->ckr_finalize =
1620 kr_rxfinalize;
1621 }
1622 break;
1623 }
1624 break;
1625 case NR_EV:
1626 kring->ckr_na_sync = kern_channel_event_sync;
1627 break;
1628 case NR_LBA:
1629 kring->ckr_na_sync = na_packet_pool_alloc_large_sync;
1630 kring->ckr_alloc_ws = na_upp_alloc_lowat;
1631 break;
1632 default:
1633 VERIFY(0);
1634 /* NOTREACHED */
1635 __builtin_unreachable();
1636 }
1637 if (t != NR_EV) {
1638 kring->ckr_na_notify = na->na_notify;
1639 } else {
1640 kring->ckr_na_notify = NULL;
1641 }
1642 (void) snprintf(kring->ckr_name,
1643 count: sizeof(kring->ckr_name) - 1,
1644 "%s %s%u%s", na->na_name, sk_ring2str(t), i,
1645 ((kring->ckr_flags & CKRF_HOST) ? "^" : ""));
1646 SK_DF(SK_VERB_NA | SK_VERB_RING,
1647 "kr \"%s\" (0x%llx) krflags 0x%b rh %u rt %u",
1648 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
1649 CKRF_BITS, kring->ckr_rhead, kring->ckr_rtail);
1650 kring->ckr_state = KR_READY;
1651 q_lck_grp = na_kr_q_lck_grp(t);
1652 s_lck_grp = na_kr_s_lck_grp(t);
1653 kring->ckr_qlock_group = q_lck_grp;
1654 lck_mtx_init(lck: &kring->ckr_qlock, grp: kring->ckr_qlock_group,
1655 attr: &channel_lock_attr);
1656 kring->ckr_slock_group = s_lck_grp;
1657 lck_spin_init(lck: &kring->ckr_slock, grp: kring->ckr_slock_group,
1658 attr: &channel_lock_attr);
1659 csi_init(&kring->ckr_si,
1660 (kring->ckr_flags & CKRF_MITIGATION),
1661 na->na_ch_mit_ival);
1662 }
1663 csi_init(&na->na_si[t],
1664 (na->na_flags & (NAF_TX_MITIGATION | NAF_RX_MITIGATION)),
1665 na->na_ch_mit_ival);
1666 }
1667 ASSERT(c == 0);
1668 na->na_tail = na->na_rx_rings + n[NR_RX] + n[NR_A] + n[NR_F] +
1669 n[NR_EV] + n[NR_LBA];
1670
1671 if (na->na_type == NA_NETIF_DEV) {
1672 na_kr_setup_netif_svc_map(na);
1673 }
1674
1675 /* validate now for cases where we create only krings */
1676 na_krings_verify(na);
1677 return 0;
1678
1679error:
1680 ASSERT(err != 0);
1681 if (na->na_tx_rings != NULL) {
1682 sk_free_type_array(struct __kern_channel_ring,
1683 na->na_tail - na->na_tx_rings, na->na_tx_rings);
1684 }
1685 if (na->na_slot_ctxs != NULL) {
1686 ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1687 skn_free_type_array(slot_ctxs,
1688 struct slot_ctx, na->na_total_slots,
1689 na->na_slot_ctxs);
1690 na->na_slot_ctxs = NULL;
1691 }
1692 if (na->na_scratch != NULL) {
1693 skn_free_type_array(scratch,
1694 kern_packet_t, na->na_total_slots,
1695 na->na_scratch);
1696 na->na_scratch = NULL;
1697 }
1698 return err;
1699}
1700
1701/* undo the actions performed by na_kr_create() */
1702/* call with SK_LOCK held */
1703static void
1704na_kr_delete(struct nexus_adapter *na)
1705{
1706 struct __kern_channel_ring *kring = na->na_tx_rings;
1707 enum txrx t;
1708
1709 ASSERT((kring != NULL) && (na->na_tail != NULL));
1710 SK_LOCK_ASSERT_HELD();
1711
1712 for_all_rings(t) {
1713 csi_destroy(&na->na_si[t]);
1714 }
1715 /* we rely on the krings layout described above */
1716 for (; kring != na->na_tail; kring++) {
1717 lck_mtx_destroy(lck: &kring->ckr_qlock, grp: kring->ckr_qlock_group);
1718 lck_spin_destroy(lck: &kring->ckr_slock, grp: kring->ckr_slock_group);
1719 csi_destroy(&kring->ckr_si);
1720 if (kring->ckr_flags & CKRF_SLOT_CONTEXT) {
1721 kring->ckr_flags &= ~CKRF_SLOT_CONTEXT;
1722 ASSERT(kring->ckr_slot_ctxs != NULL);
1723 kring->ckr_slot_ctxs = NULL;
1724 }
1725 }
1726 if (na->na_slot_ctxs != NULL) {
1727 ASSERT(na->na_flags & NAF_SLOT_CONTEXT);
1728 os_atomic_andnot(&na->na_flags, NAF_SLOT_CONTEXT, relaxed);
1729 skn_free_type_array(slot_ctxs,
1730 struct slot_ctx, na->na_total_slots,
1731 na->na_slot_ctxs);
1732 na->na_slot_ctxs = NULL;
1733 }
1734 if (na->na_scratch != NULL) {
1735 skn_free_type_array(scratch,
1736 kern_packet_t, na->na_total_slots,
1737 na->na_scratch);
1738 na->na_scratch = NULL;
1739 }
1740 ASSERT(!(na->na_flags & NAF_SLOT_CONTEXT));
1741 sk_free_type_array(struct __kern_channel_ring,
1742 na->na_tail - na->na_tx_rings, na->na_tx_rings);
1743 na->na_tx_rings = na->na_rx_rings = na->na_alloc_rings =
1744 na->na_free_rings = na->na_event_rings = na->na_tail = NULL;
1745}
1746
1747static void
1748na_kr_slot_desc_init(struct __slot_desc *ksds,
1749 boolean_t kernel_only, struct __slot_desc *usds, size_t ndesc)
1750{
1751 size_t i;
1752
1753 bzero(s: ksds, n: ndesc * SLOT_DESC_SZ);
1754 if (usds != NULL) {
1755 ASSERT(!kernel_only);
1756 bzero(s: usds, n: ndesc * SLOT_DESC_SZ);
1757 } else {
1758 ASSERT(kernel_only);
1759 }
1760
1761 for (i = 0; i < ndesc; i++) {
1762 KSD_INIT(SLOT_DESC_KSD(&ksds[i]));
1763 if (!kernel_only) {
1764 USD_INIT(SLOT_DESC_USD(&usds[i]));
1765 }
1766 }
1767}
1768
1769/* call with SK_LOCK held */
1770static int
1771na_kr_setup(struct nexus_adapter *na, struct kern_channel *ch)
1772{
1773 struct skmem_arena *ar = na->na_arena;
1774 struct skmem_arena_nexus *arn;
1775 mach_vm_offset_t roff[SKMEM_REGIONS];
1776 enum txrx t;
1777 uint32_t i;
1778
1779 SK_LOCK_ASSERT_HELD();
1780 ASSERT(!(na->na_flags & NAF_MEM_NO_INIT));
1781 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
1782 arn = skmem_arena_nexus(ar);
1783 ASSERT(arn != NULL);
1784
1785 bzero(s: &roff, n: sizeof(roff));
1786 for (i = 0; i < SKMEM_REGIONS; i++) {
1787 if (ar->ar_regions[i] == NULL) {
1788 continue;
1789 }
1790
1791 /* not for nexus */
1792 ASSERT(i != SKMEM_REGION_SYSCTLS);
1793
1794 /*
1795 * Get region offsets from base of mmap span; the arena
1796 * doesn't need to be mmap'd at this point, since we
1797 * simply compute the relative offset.
1798 */
1799 roff[i] = skmem_arena_get_region_offset(ar, i);
1800 }
1801
1802 for_all_rings(t) {
1803 for (i = 0; i < na_get_nrings(na, t); i++) {
1804 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
1805 struct __user_channel_ring *ring = kring->ckr_ring;
1806 mach_vm_offset_t ring_off, usd_roff;
1807 struct skmem_obj_info oi, oim;
1808 uint32_t ndesc;
1809
1810 if (ring != NULL) {
1811 SK_DF(SK_VERB_NA | SK_VERB_RING,
1812 "kr 0x%llx (\"%s\") is already "
1813 "initialized", SK_KVA(kring),
1814 kring->ckr_name);
1815 continue; /* already created by somebody else */
1816 }
1817
1818 if (!KR_KERNEL_ONLY(kring) &&
1819 (ring = skmem_cache_alloc(arn->arn_ring_cache,
1820 SKMEM_NOSLEEP)) == NULL) {
1821 SK_ERR("Cannot allocate %s_ring for kr "
1822 "0x%llx (\"%s\")", sk_ring2str(t),
1823 SK_KVA(kring), kring->ckr_name);
1824 goto cleanup;
1825 }
1826 kring->ckr_flags |= CKRF_MEM_RING_INITED;
1827 kring->ckr_ring = ring;
1828 ndesc = kring->ckr_num_slots;
1829
1830 if (ring == NULL) {
1831 goto skip_user_ring_setup;
1832 }
1833
1834 *(uint32_t *)(uintptr_t)&ring->ring_num_slots = ndesc;
1835
1836 /* offset of current ring in mmap span */
1837 skmem_cache_get_obj_info(arn->arn_ring_cache,
1838 ring, &oi, NULL);
1839 ring_off = (roff[SKMEM_REGION_RING] +
1840 SKMEM_OBJ_ROFF(&oi));
1841
1842 /*
1843 * ring_{buf,md,sd}_ofs offsets are relative to the
1844 * current ring, and not to the base of mmap span.
1845 */
1846 *(mach_vm_offset_t *)(uintptr_t)
1847 &ring->ring_def_buf_base =
1848 (roff[SKMEM_REGION_BUF_DEF] - ring_off);
1849 *(mach_vm_offset_t *)(uintptr_t)
1850 &ring->ring_large_buf_base =
1851 (roff[SKMEM_REGION_BUF_LARGE] - ring_off);
1852 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_md_base =
1853 (roff[SKMEM_REGION_UMD] - ring_off);
1854 _CASSERT(sizeof(uint16_t) ==
1855 sizeof(ring->ring_bft_size));
1856 if (roff[SKMEM_REGION_UBFT] != 0) {
1857 ASSERT(ar->ar_regions[SKMEM_REGION_UBFT] !=
1858 NULL);
1859 *(mach_vm_offset_t *)(uintptr_t)
1860 &ring->ring_bft_base =
1861 (roff[SKMEM_REGION_UBFT] - ring_off);
1862 *(uint16_t *)(uintptr_t)&ring->ring_bft_size =
1863 (uint16_t)ar->ar_regions[SKMEM_REGION_UBFT]->
1864 skr_c_obj_size;
1865 ASSERT(ring->ring_bft_size ==
1866 ar->ar_regions[SKMEM_REGION_KBFT]->
1867 skr_c_obj_size);
1868 } else {
1869 *(mach_vm_offset_t *)(uintptr_t)
1870 &ring->ring_bft_base = 0;
1871 *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1872 }
1873
1874 if (t == NR_TX || t == NR_A || t == NR_EV || t == NR_LBA) {
1875 usd_roff = roff[SKMEM_REGION_TXAUSD];
1876 } else {
1877 ASSERT(t == NR_RX || t == NR_F);
1878 usd_roff = roff[SKMEM_REGION_RXFUSD];
1879 }
1880 *(mach_vm_offset_t *)(uintptr_t)&ring->ring_sd_base =
1881 (usd_roff - ring_off);
1882
1883 /* copy values from kring */
1884 ring->ring_head = kring->ckr_rhead;
1885 *(slot_idx_t *)(uintptr_t)&ring->ring_khead =
1886 kring->ckr_khead;
1887 *(slot_idx_t *)(uintptr_t)&ring->ring_tail =
1888 kring->ckr_rtail;
1889
1890 _CASSERT(sizeof(uint32_t) ==
1891 sizeof(ring->ring_def_buf_size));
1892 _CASSERT(sizeof(uint32_t) ==
1893 sizeof(ring->ring_large_buf_size));
1894 _CASSERT(sizeof(uint16_t) ==
1895 sizeof(ring->ring_md_size));
1896 *(uint32_t *)(uintptr_t)&ring->ring_def_buf_size =
1897 ar->ar_regions[SKMEM_REGION_BUF_DEF]->skr_c_obj_size;
1898 if (ar->ar_regions[SKMEM_REGION_BUF_LARGE] != NULL) {
1899 *(uint32_t *)(uintptr_t)&ring->ring_large_buf_size =
1900 ar->ar_regions[SKMEM_REGION_BUF_LARGE]->skr_c_obj_size;
1901 } else {
1902 *(uint32_t *)(uintptr_t)&ring->ring_large_buf_size = 0;
1903 }
1904 if (ar->ar_regions[SKMEM_REGION_UMD] != NULL) {
1905 *(uint16_t *)(uintptr_t)&ring->ring_md_size =
1906 (uint16_t)ar->ar_regions[SKMEM_REGION_UMD]->
1907 skr_c_obj_size;
1908 ASSERT(ring->ring_md_size ==
1909 ar->ar_regions[SKMEM_REGION_KMD]->
1910 skr_c_obj_size);
1911 } else {
1912 *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0;
1913 ASSERT(PP_KERNEL_ONLY(arn->arn_rx_pp));
1914 ASSERT(PP_KERNEL_ONLY(arn->arn_tx_pp));
1915 }
1916
1917 /* ring info */
1918 _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_id));
1919 _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_kind));
1920 *(uint16_t *)(uintptr_t)&ring->ring_id =
1921 (uint16_t)kring->ckr_ring_id;
1922 *(uint16_t *)(uintptr_t)&ring->ring_kind =
1923 (uint16_t)kring->ckr_tx;
1924
1925 SK_DF(SK_VERB_NA | SK_VERB_RING,
1926 "%s_ring at 0x%llx kr 0x%llx (\"%s\")",
1927 sk_ring2str(t), SK_KVA(ring), SK_KVA(kring),
1928 kring->ckr_name);
1929 SK_DF(SK_VERB_NA | SK_VERB_RING,
1930 " num_slots: %u", ring->ring_num_slots);
1931 SK_DF(SK_VERB_NA | SK_VERB_RING,
1932 " def_buf_base: 0x%llx",
1933 (uint64_t)ring->ring_def_buf_base);
1934 SK_DF(SK_VERB_NA | SK_VERB_RING,
1935 " large_buf_base: 0x%llx",
1936 (uint64_t)ring->ring_large_buf_base);
1937 SK_DF(SK_VERB_NA | SK_VERB_RING,
1938 " md_base: 0x%llx",
1939 (uint64_t)ring->ring_md_base);
1940 SK_DF(SK_VERB_NA | SK_VERB_RING,
1941 " sd_base: 0x%llx",
1942 (uint64_t)ring->ring_sd_base);
1943 SK_DF(SK_VERB_NA | SK_VERB_RING,
1944 " h, t: %u, %u, %u", ring->ring_head,
1945 ring->ring_tail);
1946 SK_DF(SK_VERB_NA | SK_VERB_RING,
1947 " md_size: %d",
1948 (uint64_t)ring->ring_md_size);
1949
1950 /* make sure they're in synch */
1951 _CASSERT(NR_RX == CR_KIND_RX);
1952 _CASSERT(NR_TX == CR_KIND_TX);
1953 _CASSERT(NR_A == CR_KIND_ALLOC);
1954 _CASSERT(NR_F == CR_KIND_FREE);
1955 _CASSERT(NR_EV == CR_KIND_EVENT);
1956 _CASSERT(NR_LBA == CR_KIND_LARGE_BUF_ALLOC);
1957
1958skip_user_ring_setup:
1959 /*
1960 * This flag tells na_kr_teardown_all() that it should
1961 * go thru the checks to free up the slot maps.
1962 */
1963 kring->ckr_flags |= CKRF_MEM_SD_INITED;
1964 if (t == NR_TX || t == NR_A || t == NR_EV || t == NR_LBA) {
1965 kring->ckr_ksds_cache = arn->arn_txaksd_cache;
1966 } else {
1967 ASSERT(t == NR_RX || t == NR_F);
1968 kring->ckr_ksds_cache = arn->arn_rxfksd_cache;
1969 }
1970 kring->ckr_ksds =
1971 skmem_cache_alloc(kring->ckr_ksds_cache,
1972 SKMEM_NOSLEEP);
1973 if (kring->ckr_ksds == NULL) {
1974 SK_ERR("Cannot allocate %s_ksds for kr "
1975 "0x%llx (\"%s\")", sk_ring2str(t),
1976 SK_KVA(kring), kring->ckr_name);
1977 goto cleanup;
1978 }
1979 if (!KR_KERNEL_ONLY(kring)) {
1980 skmem_cache_get_obj_info(kring->ckr_ksds_cache,
1981 kring->ckr_ksds, &oi, &oim);
1982 kring->ckr_usds = SKMEM_OBJ_ADDR(&oim);
1983 }
1984 na_kr_slot_desc_init(ksds: kring->ckr_ksds,
1985 KR_KERNEL_ONLY(kring), usds: kring->ckr_usds, ndesc);
1986
1987 /* cache last slot descriptor address */
1988 ASSERT(kring->ckr_lim == (ndesc - 1));
1989 kring->ckr_ksds_last = &kring->ckr_ksds[kring->ckr_lim];
1990
1991 if ((t < NR_TXRX) &&
1992 !(na->na_flags & NAF_USER_PKT_POOL) &&
1993 na_kr_populate_slots(kring) != 0) {
1994 SK_ERR("Cannot allocate buffers for kr "
1995 "0x%llx (\"%s\")", SK_KVA(kring),
1996 kring->ckr_name);
1997 goto cleanup;
1998 }
1999 }
2000 }
2001
2002 return 0;
2003
2004cleanup:
2005 na_kr_teardown_all(na, ch, FALSE);
2006
2007 return ENOMEM;
2008}
2009
2010static void
2011na_kr_teardown_common(struct nexus_adapter *na,
2012 struct __kern_channel_ring *kring, enum txrx t, struct kern_channel *ch,
2013 boolean_t defunct)
2014{
2015 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar: na->na_arena);
2016 struct __user_channel_ring *ckr_ring;
2017 boolean_t sd_idle, sd_inited;
2018
2019 ASSERT(arn != NULL);
2020 kr_enter(kring, TRUE);
2021 /*
2022 * Check for CKRF_MEM_SD_INITED and CKRF_MEM_RING_INITED
2023 * to make sure that the freeing needs to happen (else just
2024 * nullify the values).
2025 * If this adapter owns the memory for the slot descriptors,
2026 * check if the region is marked as busy (sd_idle is false)
2027 * and leave the kring's slot descriptor fields alone if so,
2028 * at defunct time. At final teardown time, sd_idle must be
2029 * true else we assert; this indicates a missing call to
2030 * skmem_arena_nexus_sd_set_noidle().
2031 */
2032 sd_inited = ((kring->ckr_flags & CKRF_MEM_SD_INITED) != 0);
2033 if (sd_inited) {
2034 /* callee will do KR_KSD(), so check */
2035 if (((t < NR_TXRX) || (t == NR_EV)) &&
2036 (kring->ckr_ksds != NULL)) {
2037 na_kr_depopulate_slots(kring, ch, defunct);
2038 }
2039 /* leave CKRF_MEM_SD_INITED flag alone until idle */
2040 sd_idle = skmem_arena_nexus_sd_idle(arn);
2041 VERIFY(sd_idle || defunct);
2042 } else {
2043 sd_idle = TRUE;
2044 }
2045
2046 if (sd_idle) {
2047 kring->ckr_flags &= ~CKRF_MEM_SD_INITED;
2048 if (kring->ckr_ksds != NULL) {
2049 if (sd_inited) {
2050 skmem_cache_free(kring->ckr_ksds_cache,
2051 kring->ckr_ksds);
2052 }
2053 kring->ckr_ksds = NULL;
2054 kring->ckr_ksds_last = NULL;
2055 kring->ckr_usds = NULL;
2056 }
2057 ASSERT(kring->ckr_ksds_last == NULL);
2058 ASSERT(kring->ckr_usds == NULL);
2059 }
2060
2061 if ((ckr_ring = kring->ckr_ring) != NULL) {
2062 kring->ckr_ring = NULL;
2063 }
2064
2065 if (kring->ckr_flags & CKRF_MEM_RING_INITED) {
2066 ASSERT(ckr_ring != NULL || KR_KERNEL_ONLY(kring));
2067 if (ckr_ring != NULL) {
2068 skmem_cache_free(arn->arn_ring_cache, ckr_ring);
2069 }
2070 kring->ckr_flags &= ~CKRF_MEM_RING_INITED;
2071 }
2072
2073 if (defunct) {
2074 /* if defunct, drop everything; see KR_DROP() */
2075 kring->ckr_flags |= CKRF_DEFUNCT;
2076 }
2077 kr_exit(kring);
2078}
2079
2080/*
2081 * Teardown ALL rings of a nexus adapter; this includes {tx,rx,alloc,free,event}
2082 */
2083static void
2084na_kr_teardown_all(struct nexus_adapter *na, struct kern_channel *ch,
2085 boolean_t defunct)
2086{
2087 enum txrx t;
2088
2089 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2090
2091 /* skip if this adapter has no allocated rings */
2092 if (na->na_tx_rings == NULL) {
2093 return;
2094 }
2095
2096 for_all_rings(t) {
2097 for (uint32_t i = 0; i < na_get_nrings(na, t); i++) {
2098 na_kr_teardown_common(na, kring: &NAKR(na, t)[i],
2099 t, ch, defunct);
2100 }
2101 }
2102}
2103
2104/*
2105 * Teardown only {tx,rx} rings assigned to the channel.
2106 */
2107static void
2108na_kr_teardown_txrx(struct nexus_adapter *na, struct kern_channel *ch,
2109 boolean_t defunct, struct proc *p)
2110{
2111 enum txrx t;
2112
2113 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2114
2115 for_rx_tx(t) {
2116 ring_id_t qfirst = ch->ch_first[t];
2117 ring_id_t qlast = ch->ch_last[t];
2118 uint32_t i;
2119
2120 for (i = qfirst; i < qlast; i++) {
2121 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2122 na_kr_teardown_common(na, kring, t, ch, defunct);
2123
2124 /*
2125 * Issue a notify to wake up anyone sleeping in kqueue
2126 * so that they notice the newly defuncted channels and
2127 * return an error
2128 */
2129 kring->ckr_na_notify(kring, p, 0);
2130 }
2131 }
2132}
2133
2134static int
2135na_kr_populate_slots(struct __kern_channel_ring *kring)
2136{
2137 const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2138 struct nexus_adapter *na = KRNA(kring);
2139 kern_pbufpool_t pp = kring->ckr_pp;
2140 uint32_t nslots = kring->ckr_num_slots;
2141 uint32_t start_idx, i;
2142 uint32_t sidx = 0; /* slot counter */
2143 struct __kern_slot_desc *ksd;
2144 struct __user_slot_desc *usd;
2145 struct __kern_quantum *kqum;
2146 nexus_type_t nexus_type;
2147 int err = 0;
2148
2149 ASSERT(kring->ckr_tx < NR_TXRX);
2150 ASSERT(!(KRNA(kring)->na_flags & NAF_USER_PKT_POOL));
2151 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2152 ASSERT(pp != NULL);
2153
2154 /*
2155 * xxx_ppool: remove this special case
2156 */
2157 nexus_type = na->na_nxdom_prov->nxdom_prov_dom->nxdom_type;
2158
2159 switch (nexus_type) {
2160 case NEXUS_TYPE_FLOW_SWITCH:
2161 case NEXUS_TYPE_KERNEL_PIPE:
2162 /*
2163 * xxx_ppool: This is temporary code until we come up with a
2164 * scheme for user space to alloc & attach packets to tx ring.
2165 */
2166 if (kernel_only || kring->ckr_tx == NR_RX) {
2167 return 0;
2168 }
2169 break;
2170
2171 case NEXUS_TYPE_NET_IF:
2172 if (((na->na_type == NA_NETIF_DEV) ||
2173 (na->na_type == NA_NETIF_HOST)) &&
2174 (kernel_only || (kring->ckr_tx == NR_RX))) {
2175 return 0;
2176 }
2177
2178 ASSERT((na->na_type == NA_NETIF_COMPAT_DEV) ||
2179 (na->na_type == NA_NETIF_COMPAT_HOST) ||
2180 (na->na_type == NA_NETIF_DEV) ||
2181 (na->na_type == NA_NETIF_VP));
2182
2183 if (!kernel_only) {
2184 if (kring->ckr_tx == NR_RX) {
2185 return 0;
2186 } else {
2187 break;
2188 }
2189 }
2190
2191 ASSERT(kernel_only);
2192
2193 if ((na->na_type == NA_NETIF_COMPAT_DEV) ||
2194 (na->na_type == NA_NETIF_COMPAT_HOST)) {
2195 return 0;
2196 }
2197 VERIFY(0);
2198 /* NOTREACHED */
2199 __builtin_unreachable();
2200
2201 case NEXUS_TYPE_USER_PIPE:
2202 case NEXUS_TYPE_MONITOR:
2203 break;
2204
2205 default:
2206 VERIFY(0);
2207 /* NOTREACHED */
2208 __builtin_unreachable();
2209 }
2210
2211 /* Fill the ring with packets */
2212 sidx = start_idx = 0;
2213 for (i = 0; i < nslots; i++) {
2214 kqum = SK_PTR_ADDR_KQUM(pp_alloc_packet(pp, pp->pp_max_frags,
2215 SKMEM_NOSLEEP));
2216 if (kqum == NULL) {
2217 err = ENOMEM;
2218 SK_ERR("ar 0x%llx (\"%s\") no more buffers "
2219 "after %u of %u, err %d", SK_KVA(na->na_arena),
2220 na->na_arena->ar_name, i, nslots, err);
2221 goto cleanup;
2222 }
2223 ksd = KR_KSD(kring, i);
2224 usd = (kernel_only ? NULL : KR_USD(kring, i));
2225
2226 /* attach packet to slot */
2227 kqum->qum_ksd = ksd;
2228 ASSERT(!KSD_VALID_METADATA(ksd));
2229 KSD_ATTACH_METADATA(ksd, kqum);
2230 if (usd != NULL) {
2231 USD_ATTACH_METADATA(usd, METADATA_IDX(kqum));
2232 kr_externalize_metadata(kring, pp->pp_max_frags,
2233 kqum, current_proc());
2234 }
2235
2236 SK_DF(SK_VERB_MEM, " C ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2237 " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd), METADATA_IDX(kqum),
2238 SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2239 SK_KVA(&kqum->qum_buf[0]));
2240 if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2241 SK_DF(SK_VERB_MEM, " C usd [%-3d, 0x%llx] "
2242 "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]",
2243 (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2244 SK_KVA(usd), METADATA_IDX(kqum),
2245 SK_KVA(kqum->qum_user),
2246 kqum->qum_user->qum_buf[0].buf_idx,
2247 SK_KVA(&kqum->qum_user->qum_buf[0]));
2248 }
2249
2250 sidx = SLOT_NEXT(i: sidx, lim: kring->ckr_lim);
2251 }
2252
2253 SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") populated %u slots from idx %u",
2254 SK_KVA(na->na_arena), na->na_arena->ar_name, nslots, start_idx);
2255
2256cleanup:
2257 if (err != 0) {
2258 sidx = start_idx;
2259 while (i-- > 0) {
2260 ksd = KR_KSD(kring, i);
2261 usd = (kernel_only ? NULL : KR_USD(kring, i));
2262 kqum = ksd->sd_qum;
2263
2264 ASSERT(ksd == kqum->qum_ksd);
2265 KSD_RESET(ksd);
2266 if (usd != NULL) {
2267 USD_RESET(usd);
2268 }
2269 /* detach packet from slot */
2270 kqum->qum_ksd = NULL;
2271 pp_free_packet(pp, SK_PTR_ADDR(kqum));
2272
2273 sidx = SLOT_NEXT(i: sidx, lim: kring->ckr_lim);
2274 }
2275 }
2276 return err;
2277}
2278
2279static void
2280na_kr_depopulate_slots(struct __kern_channel_ring *kring,
2281 struct kern_channel *ch, boolean_t defunct)
2282{
2283#pragma unused(ch)
2284 const boolean_t kernel_only = KR_KERNEL_ONLY(kring);
2285 uint32_t i, j, n = kring->ckr_num_slots;
2286 struct nexus_adapter *na = KRNA(kring);
2287 struct kern_pbufpool *pp = kring->ckr_pp;
2288 boolean_t upp = FALSE;
2289 obj_idx_t midx;
2290
2291 ASSERT((kring->ckr_tx < NR_TXRX) || (kring->ckr_tx == NR_EV));
2292 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2293
2294 ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS);
2295
2296 if (((na->na_flags & NAF_USER_PKT_POOL) != 0) &&
2297 (kring->ckr_tx != NR_EV)) {
2298 upp = TRUE;
2299 }
2300 for (i = 0, j = 0; i < n; i++) {
2301 struct __kern_slot_desc *ksd = KR_KSD(kring, i);
2302 struct __user_slot_desc *usd;
2303 struct __kern_quantum *qum, *kqum;
2304 boolean_t free_packet = FALSE;
2305 int err;
2306
2307 if (!KSD_VALID_METADATA(ksd)) {
2308 continue;
2309 }
2310
2311 kqum = ksd->sd_qum;
2312 usd = (kernel_only ? NULL : KR_USD(kring, i));
2313 midx = METADATA_IDX(kqum);
2314
2315 /*
2316 * if the packet is internalized it should not be in the
2317 * hash table of packets loaned to user space.
2318 */
2319 if (upp && (kqum->qum_qflags & QUM_F_INTERNALIZED)) {
2320 if ((qum = pp_find_upp(pp, midx)) != NULL) {
2321 panic("internalized packet 0x%llx in htbl",
2322 SK_KVA(qum));
2323 /* NOTREACHED */
2324 __builtin_unreachable();
2325 }
2326 free_packet = TRUE;
2327 } else if (upp) {
2328 /*
2329 * if the packet is not internalized check if it is
2330 * in the list of packets loaned to user-space.
2331 * Remove from the list before freeing.
2332 */
2333 ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED));
2334 qum = pp_remove_upp(pp, midx, &err);
2335 if (err != 0) {
2336 SK_ERR("un-allocated packet or buflet %d %p",
2337 midx, SK_KVA(qum));
2338 if (qum != NULL) {
2339 free_packet = TRUE;
2340 }
2341 }
2342 } else {
2343 free_packet = TRUE;
2344 }
2345
2346 /*
2347 * Clear the user and kernel slot descriptors. Note that
2348 * if we are depopulating the slots due to defunct (and not
2349 * due to normal deallocation/teardown), we leave the user
2350 * slot descriptor alone. At that point the process may
2351 * be suspended, and later when it resumes it would just
2352 * pick up the original contents and move forward with
2353 * whatever it was doing.
2354 */
2355 KSD_RESET(ksd);
2356 if (usd != NULL && !defunct) {
2357 USD_RESET(usd);
2358 }
2359
2360 /* detach packet from slot */
2361 kqum->qum_ksd = NULL;
2362
2363 SK_DF(SK_VERB_MEM, " D ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] "
2364 " kbuf[%-3u, 0x%llx]", i, SK_KVA(ksd),
2365 METADATA_IDX(kqum), SK_KVA(kqum), kqum->qum_buf[0].buf_idx,
2366 SK_KVA(&kqum->qum_buf[0]));
2367 if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) {
2368 SK_DF(SK_VERB_MEM, " D usd [%-3u, 0x%llx] "
2369 "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]",
2370 (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE),
2371 SK_KVA(usd), METADATA_IDX(kqum),
2372 SK_KVA(kqum->qum_user),
2373 kqum->qum_user->qum_buf[0].buf_idx,
2374 SK_KVA(&kqum->qum_user->qum_buf[0]));
2375 }
2376
2377 if (free_packet) {
2378 pp_free_packet(pp, SK_PTR_ADDR(kqum)); ++j;
2379 }
2380 }
2381
2382 SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") depopulated %u of %u slots",
2383 SK_KVA(KRNA(kring)->na_arena), KRNA(kring)->na_arena->ar_name,
2384 j, n);
2385}
2386
2387int
2388na_rings_mem_setup(struct nexus_adapter *na,
2389 boolean_t alloc_ctx, struct kern_channel *ch)
2390{
2391 boolean_t kronly;
2392 int err;
2393
2394 SK_LOCK_ASSERT_HELD();
2395 ASSERT(na->na_channels == 0);
2396 /*
2397 * If NAF_MEM_NO_INIT is set, then only create the krings and not
2398 * the backing memory regions for the adapter.
2399 */
2400 kronly = (na->na_flags & NAF_MEM_NO_INIT);
2401 ASSERT(!kronly || NA_KERNEL_ONLY(na));
2402
2403 /*
2404 * Create and initialize the common fields of the krings array.
2405 * using the information that must be already available in the na.
2406 */
2407 if ((err = na_kr_create(na, alloc_ctx)) == 0 && !kronly) {
2408 err = na_kr_setup(na, ch);
2409 if (err != 0) {
2410 na_kr_delete(na);
2411 }
2412 }
2413
2414 return err;
2415}
2416
2417void
2418na_rings_mem_teardown(struct nexus_adapter *na, struct kern_channel *ch,
2419 boolean_t defunct)
2420{
2421 SK_LOCK_ASSERT_HELD();
2422 ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT));
2423
2424 /*
2425 * Deletes the kring and ring array of the adapter. They
2426 * must have been created using na_rings_mem_setup().
2427 *
2428 * XXX: adi@apple.com -- the parameter "ch" should not be
2429 * needed here; however na_kr_depopulate_slots() needs to
2430 * go thru the channel's user packet pool hash, and so for
2431 * now we leave it here.
2432 */
2433 na_kr_teardown_all(na, ch, defunct);
2434 if (!defunct) {
2435 na_kr_delete(na);
2436 }
2437}
2438
2439void
2440na_ch_rings_defunct(struct kern_channel *ch, struct proc *p)
2441{
2442 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2443
2444 /*
2445 * Depopulate slots on the TX and RX rings of this channel,
2446 * but don't touch other rings owned by other channels if
2447 * this adapter is being shared.
2448 */
2449 na_kr_teardown_txrx(na: ch->ch_na, ch, TRUE, p);
2450}
2451
2452void
2453na_kr_drop(struct nexus_adapter *na, boolean_t drop)
2454{
2455 enum txrx t;
2456 uint32_t i;
2457
2458 for_rx_tx(t) {
2459 for (i = 0; i < na_get_nrings(na, t); i++) {
2460 struct __kern_channel_ring *kring = &NAKR(na, t)[i];
2461 int error;
2462 error = kr_enter(kring, TRUE);
2463 if (drop) {
2464 kring->ckr_flags |= CKRF_DROP;
2465 } else {
2466 kring->ckr_flags &= ~CKRF_DROP;
2467 }
2468
2469 if (error != 0) {
2470 SK_ERR("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2471 "kr_enter failed %d",
2472 na->na_name, SK_KVA(na),
2473 kring->ckr_name, SK_KVA(kring),
2474 error);
2475 } else {
2476 kr_exit(kring);
2477 }
2478 SK_D("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) "
2479 "krflags 0x%b", na->na_name, SK_KVA(na),
2480 kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
2481 CKRF_BITS);
2482 }
2483 }
2484}
2485
2486/*
2487 * Set the stopped/enabled status of ring. When stopping, they also wait
2488 * for all current activity on the ring to terminate. The status change
2489 * is then notified using the na na_notify callback.
2490 */
2491static void
2492na_set_ring(struct nexus_adapter *na, uint32_t ring_id, enum txrx t,
2493 uint32_t state)
2494{
2495 struct __kern_channel_ring *kr = &NAKR(na, t)[ring_id];
2496
2497 /*
2498 * Mark the ring as stopped/enabled, and run through the
2499 * locks to make sure other users get to see it.
2500 */
2501 if (state == KR_READY) {
2502 kr_start(kr);
2503 } else {
2504 kr_stop(kr, state);
2505 }
2506}
2507
2508
2509/* stop or enable all the rings of na */
2510static void
2511na_set_all_rings(struct nexus_adapter *na, uint32_t state)
2512{
2513 uint32_t i;
2514 enum txrx t;
2515
2516 SK_LOCK_ASSERT_HELD();
2517
2518 if (!NA_IS_ACTIVE(na)) {
2519 return;
2520 }
2521
2522 for_rx_tx(t) {
2523 for (i = 0; i < na_get_nrings(na, t); i++) {
2524 na_set_ring(na, ring_id: i, t, state);
2525 }
2526 }
2527}
2528
2529/*
2530 * Convenience function used in drivers. Waits for current txsync()s/rxsync()s
2531 * to finish and prevents any new one from starting. Call this before turning
2532 * Skywalk mode off, or before removing the harware rings (e.g., on module
2533 * onload). As a rule of thumb for linux drivers, this should be placed near
2534 * each napi_disable().
2535 */
2536void
2537na_disable_all_rings(struct nexus_adapter *na)
2538{
2539 na_set_all_rings(na, state: KR_STOPPED);
2540}
2541
2542/*
2543 * Convenience function used in drivers. Re-enables rxsync and txsync on the
2544 * adapter's rings In linux drivers, this should be placed near each
2545 * napi_enable().
2546 */
2547void
2548na_enable_all_rings(struct nexus_adapter *na)
2549{
2550 na_set_all_rings(na, state: KR_READY /* enabled */);
2551}
2552
2553void
2554na_lock_all_rings(struct nexus_adapter *na)
2555{
2556 na_set_all_rings(na, state: KR_LOCKED);
2557}
2558
2559void
2560na_unlock_all_rings(struct nexus_adapter *na)
2561{
2562 na_enable_all_rings(na);
2563}
2564
2565int
2566na_connect(struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr,
2567 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p)
2568{
2569 struct nexus_adapter *na = NULL;
2570 mach_vm_size_t memsize = 0;
2571 int err = 0;
2572 enum txrx t;
2573
2574 ASSERT(!(chr->cr_mode & CHMODE_KERNEL));
2575 ASSERT(!(ch->ch_flags & CHANF_KERNEL));
2576
2577 SK_LOCK_ASSERT_HELD();
2578
2579 /* find the nexus adapter and return the reference */
2580 err = na_find(ch, nx, chr, ch0, nxb, p, &na, TRUE /* create */);
2581 if (err != 0) {
2582 ASSERT(na == NULL);
2583 goto done;
2584 }
2585
2586 if (NA_KERNEL_ONLY(na)) {
2587 err = EBUSY;
2588 goto done;
2589 }
2590
2591 /* reject if the adapter is defunct of non-permissive */
2592 if ((na->na_flags & NAF_DEFUNCT) || na_reject_channel(ch, na)) {
2593 err = ENXIO;
2594 goto done;
2595 }
2596
2597 err = na_bind_channel(na, ch, chr);
2598 if (err != 0) {
2599 goto done;
2600 }
2601
2602 ASSERT(ch->ch_schema != NULL);
2603 ASSERT(na == ch->ch_na);
2604
2605 for_all_rings(t) {
2606 if (na_get_nrings(na, t) == 0) {
2607 ch->ch_si[t] = NULL;
2608 continue;
2609 }
2610 ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2611 &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2612 }
2613
2614 skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2615
2616 if (!(skmem_arena_nexus(ar: na->na_arena)->arn_mode &
2617 AR_NEXUS_MODE_EXTERNAL_PPOOL)) {
2618 os_atomic_or(__DECONST(uint32_t *, &ch->ch_schema->csm_flags), CSM_PRIV_MEM, relaxed);
2619 }
2620
2621 err = skmem_arena_mmap(na->na_arena, p, &ch->ch_mmap);
2622 if (err != 0) {
2623 goto done;
2624 }
2625
2626 os_atomic_or(__DECONST(uint32_t *, &ch->ch_schema->csm_flags), CSM_ACTIVE, relaxed);
2627 chr->cr_memsize = memsize;
2628 chr->cr_memoffset = ch->ch_schema_offset;
2629
2630 SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2631 "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2632 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2633 na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2634 na->na_flags, NAF_BITS);
2635
2636done:
2637 if (err != 0) {
2638 if (ch->ch_schema != NULL || na != NULL) {
2639 if (ch->ch_schema != NULL) {
2640 ASSERT(na == ch->ch_na);
2641 /*
2642 * Callee will unmap memory region if needed,
2643 * as well as release reference held on 'na'.
2644 */
2645 na_disconnect(nx, ch);
2646 na = NULL;
2647 }
2648 if (na != NULL) {
2649 (void) na_release_locked(na);
2650 na = NULL;
2651 }
2652 }
2653 }
2654
2655 return err;
2656}
2657
2658void
2659na_disconnect(struct kern_nexus *nx, struct kern_channel *ch)
2660{
2661#pragma unused(nx)
2662 enum txrx t;
2663
2664 SK_LOCK_ASSERT_HELD();
2665
2666 SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2667 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2668 ch->ch_na->na_name, ch->ch_info->cinfo_nx_port,
2669 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(ch->ch_na),
2670 ch->ch_na->na_flags, NAF_BITS);
2671
2672 /* destroy mapping and release references */
2673 na_unbind_channel(ch);
2674 ASSERT(ch->ch_na == NULL);
2675 ASSERT(ch->ch_schema == NULL);
2676 for_all_rings(t) {
2677 ch->ch_si[t] = NULL;
2678 }
2679}
2680
2681void
2682na_defunct(struct kern_nexus *nx, struct kern_channel *ch,
2683 struct nexus_adapter *na, boolean_t locked)
2684{
2685#pragma unused(nx)
2686 SK_LOCK_ASSERT_HELD();
2687 if (!locked) {
2688 lck_mtx_lock(lck: &ch->ch_lock);
2689 }
2690
2691 LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED);
2692
2693 if (!(na->na_flags & NAF_DEFUNCT)) {
2694 /*
2695 * Mark this adapter as defunct to inform nexus-specific
2696 * teardown handler called by na_teardown() below.
2697 */
2698 os_atomic_or(&na->na_flags, NAF_DEFUNCT, relaxed);
2699
2700 /*
2701 * Depopulate slots.
2702 */
2703 na_teardown(na, ch, TRUE);
2704
2705 /*
2706 * And finally destroy any already-defunct memory regions.
2707 * Do this only if the nexus adapter owns the arena, i.e.
2708 * NAF_MEM_LOANED is not set. Otherwise, we'd expect
2709 * that this routine be called again for the real owner.
2710 */
2711 if (!(na->na_flags & NAF_MEM_LOANED)) {
2712 skmem_arena_defunct(na->na_arena);
2713 }
2714 }
2715
2716 SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d) "
2717 "na 0x%llx naflags %b", ch->ch_name, ch->ch_pid,
2718 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2719 na->na_name, ch->ch_info->cinfo_nx_port,
2720 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2721 na->na_flags, NAF_BITS);
2722
2723 if (!locked) {
2724 lck_mtx_unlock(lck: &ch->ch_lock);
2725 }
2726}
2727
2728/*
2729 * TODO: adi@apple.com -- merge this into na_connect()
2730 */
2731int
2732na_connect_spec(struct kern_nexus *nx, struct kern_channel *ch,
2733 struct chreq *chr, struct proc *p)
2734{
2735#pragma unused(p)
2736 struct nexus_adapter *na = NULL;
2737 mach_vm_size_t memsize = 0;
2738 int error = 0;
2739 enum txrx t;
2740
2741 ASSERT(chr->cr_mode & CHMODE_KERNEL);
2742 ASSERT(ch->ch_flags & CHANF_KERNEL);
2743 ASSERT(ch->ch_na == NULL);
2744 ASSERT(ch->ch_schema == NULL);
2745
2746 SK_LOCK_ASSERT_HELD();
2747
2748 error = na_find(ch, nx, chr, NULL, NULL, kernproc, &na, TRUE);
2749 if (error != 0) {
2750 goto done;
2751 }
2752
2753 if (na == NULL) {
2754 error = EINVAL;
2755 goto done;
2756 }
2757
2758 if (na->na_channels > 0) {
2759 error = EBUSY;
2760 goto done;
2761 }
2762
2763 if (na->na_flags & NAF_DEFUNCT) {
2764 error = ENXIO;
2765 goto done;
2766 }
2767
2768 /*
2769 * Special connect requires the nexus adapter to handle its
2770 * own channel binding and unbinding via na_special(); bail
2771 * if this adapter doesn't support it.
2772 */
2773 if (na->na_special == NULL) {
2774 error = ENOTSUP;
2775 goto done;
2776 }
2777
2778 /* upon success, "ch->ch_na" will point to "na" */
2779 error = na->na_special(na, ch, chr, NXSPEC_CMD_CONNECT);
2780 if (error != 0) {
2781 ASSERT(ch->ch_na == NULL);
2782 goto done;
2783 }
2784
2785 ASSERT(na->na_flags & NAF_SPEC_INIT);
2786 ASSERT(na == ch->ch_na);
2787 /* make sure this is still the case */
2788 ASSERT(ch->ch_schema == NULL);
2789
2790 for_rx_tx(t) {
2791 ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] :
2792 &NAKR(na, t)[ch->ch_first[t]].ckr_si;
2793 }
2794
2795 skmem_arena_get_stats(na->na_arena, &memsize, NULL);
2796 chr->cr_memsize = memsize;
2797
2798 SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx "
2799 "naflags %b", sk_proc_name_address(p), sk_proc_pid(p),
2800 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2801 na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na),
2802 na->na_flags, NAF_BITS);
2803
2804done:
2805 if (error != 0) {
2806 if (ch->ch_na != NULL || na != NULL) {
2807 if (ch->ch_na != NULL) {
2808 ASSERT(na == ch->ch_na);
2809 /* callee will release reference on 'na' */
2810 na_disconnect_spec(nx, ch);
2811 na = NULL;
2812 }
2813 if (na != NULL) {
2814 (void) na_release_locked(na);
2815 na = NULL;
2816 }
2817 }
2818 }
2819
2820 return error;
2821}
2822
2823/*
2824 * TODO: adi@apple.com -- merge this into na_disconnect()
2825 */
2826void
2827na_disconnect_spec(struct kern_nexus *nx, struct kern_channel *ch)
2828{
2829#pragma unused(nx)
2830 struct nexus_adapter *na = ch->ch_na;
2831 enum txrx t;
2832 int error;
2833
2834 SK_LOCK_ASSERT_HELD();
2835 ASSERT(na != NULL);
2836 ASSERT(na->na_flags & NAF_SPEC_INIT); /* has been bound */
2837
2838 SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b",
2839 SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name,
2840 na->na_name, ch->ch_info->cinfo_nx_port,
2841 (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na),
2842 na->na_flags, NAF_BITS);
2843
2844 /* take a reference for this routine */
2845 na_retain_locked(na);
2846
2847 ASSERT(ch->ch_flags & CHANF_KERNEL);
2848 ASSERT(ch->ch_schema == NULL);
2849 ASSERT(na->na_special != NULL);
2850 /* unbind this channel */
2851 error = na->na_special(na, ch, NULL, NXSPEC_CMD_DISCONNECT);
2852 ASSERT(error == 0);
2853 ASSERT(!(na->na_flags & NAF_SPEC_INIT));
2854
2855 /* now release our reference; this may be the last */
2856 na_release_locked(na);
2857 na = NULL;
2858
2859 ASSERT(ch->ch_na == NULL);
2860 for_rx_tx(t) {
2861 ch->ch_si[t] = NULL;
2862 }
2863}
2864
2865void
2866na_start_spec(struct kern_nexus *nx, struct kern_channel *ch)
2867{
2868#pragma unused(nx)
2869 struct nexus_adapter *na = ch->ch_na;
2870
2871 SK_LOCK_ASSERT_HELD();
2872
2873 ASSERT(ch->ch_flags & CHANF_KERNEL);
2874 ASSERT(NA_KERNEL_ONLY(na));
2875 ASSERT(na->na_special != NULL);
2876
2877 na->na_special(na, ch, NULL, NXSPEC_CMD_START);
2878}
2879
2880void
2881na_stop_spec(struct kern_nexus *nx, struct kern_channel *ch)
2882{
2883#pragma unused(nx)
2884 struct nexus_adapter *na = ch->ch_na;
2885
2886 SK_LOCK_ASSERT_HELD();
2887
2888 ASSERT(ch->ch_flags & CHANF_KERNEL);
2889 ASSERT(NA_KERNEL_ONLY(na));
2890 ASSERT(na->na_special != NULL);
2891
2892 na->na_special(na, ch, NULL, NXSPEC_CMD_STOP);
2893}
2894
2895/*
2896 * MUST BE CALLED UNDER SK_LOCK()
2897 *
2898 * Get a refcounted reference to a nexus adapter attached
2899 * to the interface specified by chr.
2900 * This is always called in the execution of an ioctl().
2901 *
2902 * Return ENXIO if the interface specified by the request does
2903 * not exist, ENOTSUP if Skywalk is not supported by the interface,
2904 * EINVAL if parameters are invalid, ENOMEM if needed resources
2905 * could not be allocated.
2906 * If successful, hold a reference to the nexus adapter.
2907 *
2908 * No reference is kept on the real interface, which may then
2909 * disappear at any time.
2910 */
2911int
2912na_find(struct kern_channel *ch, struct kern_nexus *nx, struct chreq *chr,
2913 struct kern_channel *ch0, struct nxbind *nxb, struct proc *p,
2914 struct nexus_adapter **na, boolean_t create)
2915{
2916 int error = 0;
2917
2918 _CASSERT(sizeof(chr->cr_name) == sizeof((*na)->na_name));
2919
2920 *na = NULL; /* default return value */
2921
2922 SK_LOCK_ASSERT_HELD();
2923
2924 /*
2925 * We cascade through all possibile types of nexus adapter.
2926 * All nx_*_na_find() functions return an error and an na,
2927 * with the following combinations:
2928 *
2929 * error na
2930 * 0 NULL type doesn't match
2931 * !0 NULL type matches, but na creation/lookup failed
2932 * 0 !NULL type matches and na created/found
2933 * !0 !NULL impossible
2934 */
2935
2936#if CONFIG_NEXUS_MONITOR
2937 /* try to see if this is a monitor port */
2938 error = nx_monitor_na_find(nx, ch, chr, ch0, nxb, p, na, create);
2939 if (error != 0 || *na != NULL) {
2940 return error;
2941 }
2942#endif /* CONFIG_NEXUS_MONITOR */
2943#if CONFIG_NEXUS_USER_PIPE
2944 /* try to see if this is a pipe port */
2945 error = nx_upipe_na_find(nx, ch, chr, nxb, p, na, create);
2946 if (error != 0 || *na != NULL) {
2947 return error;
2948 }
2949#endif /* CONFIG_NEXUS_USER_PIPE */
2950#if CONFIG_NEXUS_KERNEL_PIPE
2951 /* try to see if this is a kernel pipe port */
2952 error = nx_kpipe_na_find(nx, ch, chr, nxb, p, na, create);
2953 if (error != 0 || *na != NULL) {
2954 return error;
2955 }
2956#endif /* CONFIG_NEXUS_KERNEL_PIPE */
2957#if CONFIG_NEXUS_FLOWSWITCH
2958 /* try to see if this is a flowswitch port */
2959 error = nx_fsw_na_find(nx, ch, chr, nxb, p, na, create);
2960 if (error != 0 || *na != NULL) {
2961 return error;
2962 }
2963#endif /* CONFIG_NEXUS_FLOWSWITCH */
2964#if CONFIG_NEXUS_NETIF
2965 error = nx_netif_na_find(nx, ch, chr, nxb, p, na, create);
2966 if (error != 0 || *na != NULL) {
2967 return error;
2968 }
2969#endif /* CONFIG_NEXUS_NETIF */
2970
2971 ASSERT(*na == NULL);
2972 return ENXIO;
2973}
2974
2975void
2976na_retain_locked(struct nexus_adapter *na)
2977{
2978 SK_LOCK_ASSERT_HELD();
2979
2980 if (na != NULL) {
2981#if SK_LOG
2982 uint32_t oref = os_atomic_inc_orig(&na->na_refcount, relaxed);
2983 SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
2984 na->na_name, SK_KVA(na), oref + 1, na->na_channels);
2985#else /* !SK_LOG */
2986 os_atomic_inc(&na->na_refcount, relaxed);
2987#endif /* !SK_LOG */
2988 }
2989}
2990
2991/* returns 1 iff the nexus_adapter is destroyed */
2992int
2993na_release_locked(struct nexus_adapter *na)
2994{
2995 uint32_t oref;
2996
2997 SK_LOCK_ASSERT_HELD();
2998
2999 ASSERT(na->na_refcount > 0);
3000 oref = os_atomic_dec_orig(&na->na_refcount, relaxed);
3001 if (oref > 1) {
3002 SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u",
3003 na->na_name, SK_KVA(na), oref - 1, na->na_channels);
3004 return 0;
3005 }
3006 ASSERT(na->na_channels == 0);
3007
3008 if (na->na_dtor != NULL) {
3009 na->na_dtor(na);
3010 }
3011
3012 ASSERT(na->na_tx_rings == NULL && na->na_rx_rings == NULL);
3013 ASSERT(na->na_slot_ctxs == NULL);
3014 ASSERT(na->na_scratch == NULL);
3015
3016#if CONFIG_NEXUS_USER_PIPE
3017 nx_upipe_na_dealloc(na);
3018#endif /* CONFIG_NEXUS_USER_PIPE */
3019 if (na->na_arena != NULL) {
3020 skmem_arena_release(na->na_arena);
3021 na->na_arena = NULL;
3022 }
3023
3024 SK_DF(SK_VERB_MEM, "na \"%s\" (0x%llx) being freed",
3025 na->na_name, SK_KVA(na));
3026
3027 NA_FREE(na);
3028 return 1;
3029}
3030
3031static struct nexus_adapter *
3032na_pseudo_alloc(zalloc_flags_t how)
3033{
3034 struct nexus_adapter *na;
3035
3036 na = zalloc_flags(na_pseudo_zone, how | Z_ZERO);
3037 if (na) {
3038 na->na_type = NA_PSEUDO;
3039 na->na_free = na_pseudo_free;
3040 }
3041 return na;
3042}
3043
3044static void
3045na_pseudo_free(struct nexus_adapter *na)
3046{
3047 ASSERT(na->na_refcount == 0);
3048 SK_DF(SK_VERB_MEM, "na 0x%llx FREE", SK_KVA(na));
3049 bzero(s: na, n: sizeof(*na));
3050 zfree(na_pseudo_zone, na);
3051}
3052
3053static int
3054na_pseudo_txsync(struct __kern_channel_ring *kring, struct proc *p,
3055 uint32_t flags)
3056{
3057#pragma unused(kring, p, flags)
3058 SK_DF(SK_VERB_SYNC | SK_VERB_TX,
3059 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3060 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3061 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3062 flags);
3063
3064 return 0;
3065}
3066
3067static int
3068na_pseudo_rxsync(struct __kern_channel_ring *kring, struct proc *p,
3069 uint32_t flags)
3070{
3071#pragma unused(kring, p, flags)
3072 SK_DF(SK_VERB_SYNC | SK_VERB_RX,
3073 "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x",
3074 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3075 SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id,
3076 flags);
3077
3078 ASSERT(kring->ckr_rhead <= kring->ckr_lim);
3079
3080 return 0;
3081}
3082
3083static int
3084na_pseudo_activate(struct nexus_adapter *na, na_activate_mode_t mode)
3085{
3086 SK_D("na \"%s\" (0x%llx) %s", na->na_name,
3087 SK_KVA(na), na_activate_mode2str(mode));
3088
3089 switch (mode) {
3090 case NA_ACTIVATE_MODE_ON:
3091 os_atomic_or(&na->na_flags, NAF_ACTIVE, relaxed);
3092 break;
3093
3094 case NA_ACTIVATE_MODE_DEFUNCT:
3095 break;
3096
3097 case NA_ACTIVATE_MODE_OFF:
3098 os_atomic_andnot(&na->na_flags, NAF_ACTIVE, relaxed);
3099 break;
3100
3101 default:
3102 VERIFY(0);
3103 /* NOTREACHED */
3104 __builtin_unreachable();
3105 }
3106
3107 return 0;
3108}
3109
3110static void
3111na_pseudo_dtor(struct nexus_adapter *na)
3112{
3113#pragma unused(na)
3114}
3115
3116static int
3117na_pseudo_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
3118{
3119 return na_rings_mem_setup(na, FALSE, ch);
3120}
3121
3122static void
3123na_pseudo_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
3124 boolean_t defunct)
3125{
3126 na_rings_mem_teardown(na, ch, defunct);
3127}
3128
3129/*
3130 * Pseudo nexus adapter; typically used as a generic parent adapter.
3131 */
3132int
3133na_pseudo_create(struct kern_nexus *nx, struct chreq *chr,
3134 struct nexus_adapter **ret)
3135{
3136 struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params;
3137 struct nexus_adapter *na;
3138 int error;
3139
3140 SK_LOCK_ASSERT_HELD();
3141 *ret = NULL;
3142
3143 na = na_pseudo_alloc(how: Z_WAITOK);
3144
3145 ASSERT(na->na_type == NA_PSEUDO);
3146 ASSERT(na->na_free == na_pseudo_free);
3147
3148 (void) strncpy(na->na_name, chr->cr_name, sizeof(na->na_name) - 1);
3149 na->na_name[sizeof(na->na_name) - 1] = '\0';
3150 uuid_generate_random(out: na->na_uuid);
3151
3152 /*
3153 * Verify upper bounds; for all cases including user pipe nexus,
3154 * the parameters must have already been validated by corresponding
3155 * nxdom_prov_params() function defined by each domain.
3156 */
3157 na_set_nrings(na, t: NR_TX, v: nxp->nxp_tx_rings);
3158 na_set_nrings(na, t: NR_RX, v: nxp->nxp_rx_rings);
3159 na_set_nslots(na, t: NR_TX, v: nxp->nxp_tx_slots);
3160 na_set_nslots(na, t: NR_RX, v: nxp->nxp_rx_slots);
3161 ASSERT(na_get_nrings(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max);
3162 ASSERT(na_get_nrings(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max);
3163 ASSERT(na_get_nslots(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max);
3164 ASSERT(na_get_nslots(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max);
3165
3166 na->na_txsync = na_pseudo_txsync;
3167 na->na_rxsync = na_pseudo_rxsync;
3168 na->na_activate = na_pseudo_activate;
3169 na->na_dtor = na_pseudo_dtor;
3170 na->na_krings_create = na_pseudo_krings_create;
3171 na->na_krings_delete = na_pseudo_krings_delete;
3172
3173 *(nexus_stats_type_t *)(uintptr_t)&na->na_stats_type =
3174 NEXUS_STATS_TYPE_INVALID;
3175
3176 /* other fields are set in the common routine */
3177 na_attach_common(na, nx, NX_DOM_PROV(nx));
3178
3179 if ((error = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx),
3180 nx, na)) != 0) {
3181 ASSERT(na->na_arena == NULL);
3182 goto err;
3183 }
3184 ASSERT(na->na_arena != NULL);
3185
3186 *(uint32_t *)(uintptr_t)&na->na_flowadv_max = nxp->nxp_flowadv_max;
3187 ASSERT(na->na_flowadv_max == 0 ||
3188 skmem_arena_nexus(na->na_arena)->arn_flowadv_obj != NULL);
3189
3190#if SK_LOG
3191 uuid_string_t uuidstr;
3192 SK_D("na_name: \"%s\"", na->na_name);
3193 SK_D(" UUID: %s", sk_uuid_unparse(na->na_uuid, uuidstr));
3194 SK_D(" nx: 0x%llx (\"%s\":\"%s\")",
3195 SK_KVA(na->na_nx), NX_DOM(na->na_nx)->nxdom_name,
3196 NX_DOM_PROV(na->na_nx)->nxdom_prov_name);
3197 SK_D(" flags: %b", na->na_flags, NAF_BITS);
3198 SK_D(" flowadv_max: %u", na->na_flowadv_max);
3199 SK_D(" rings: tx %u rx %u",
3200 na_get_nrings(na, NR_TX), na_get_nrings(na, NR_RX));
3201 SK_D(" slots: tx %u rx %u",
3202 na_get_nslots(na, NR_TX), na_get_nslots(na, NR_RX));
3203#if CONFIG_NEXUS_USER_PIPE
3204 SK_D(" next_pipe: %u", na->na_next_pipe);
3205 SK_D(" max_pipes: %u", na->na_max_pipes);
3206#endif /* CONFIG_NEXUS_USER_PIPE */
3207#endif /* SK_LOG */
3208
3209 *ret = na;
3210 na_retain_locked(na);
3211
3212 return 0;
3213
3214err:
3215 if (na != NULL) {
3216 if (na->na_arena != NULL) {
3217 skmem_arena_release(na->na_arena);
3218 na->na_arena = NULL;
3219 }
3220 NA_FREE(na);
3221 }
3222 return error;
3223}
3224
3225void
3226na_flowadv_entry_alloc(const struct nexus_adapter *na, uuid_t fae_id,
3227 const flowadv_idx_t fe_idx, const uint32_t flowid)
3228{
3229 struct skmem_arena *ar = na->na_arena;
3230 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar: na->na_arena);
3231 struct __flowadv_entry *fae;
3232
3233 ASSERT(NA_IS_ACTIVE(na) && na->na_flowadv_max != 0);
3234 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3235
3236 AR_LOCK(ar);
3237
3238 /* we must not get here if arena is defunct; this must be valid */
3239 ASSERT(arn->arn_flowadv_obj != NULL);
3240
3241 VERIFY(fe_idx < na->na_flowadv_max);
3242 fae = &arn->arn_flowadv_obj[fe_idx];
3243 uuid_copy(dst: fae->fae_id, src: fae_id);
3244 fae->fae_flowid = flowid;
3245 fae->fae_flags = FLOWADVF_VALID;
3246
3247 AR_UNLOCK(ar);
3248}
3249
3250void
3251na_flowadv_entry_free(const struct nexus_adapter *na, uuid_t fae_id,
3252 const flowadv_idx_t fe_idx, const uint32_t flowid)
3253{
3254#pragma unused(fae_id)
3255 struct skmem_arena *ar = na->na_arena;
3256 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3257
3258 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3259 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3260
3261 AR_LOCK(ar);
3262
3263 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3264 if (arn->arn_flowadv_obj != NULL) {
3265 struct __flowadv_entry *fae;
3266
3267 VERIFY(fe_idx < na->na_flowadv_max);
3268 fae = &arn->arn_flowadv_obj[fe_idx];
3269 ASSERT(uuid_compare(fae->fae_id, fae_id) == 0);
3270 uuid_clear(uu: fae->fae_id);
3271 VERIFY(fae->fae_flowid == flowid);
3272 fae->fae_flowid = 0;
3273 fae->fae_flags = 0;
3274 }
3275
3276 AR_UNLOCK(ar);
3277}
3278
3279bool
3280na_flowadv_set(const struct nexus_adapter *na, const flowadv_idx_t fe_idx,
3281 const flowadv_token_t flow_token)
3282{
3283 struct skmem_arena *ar = na->na_arena;
3284 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3285 bool suspend;
3286
3287 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3288 ASSERT(fe_idx < na->na_flowadv_max);
3289 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3290
3291 AR_LOCK(ar);
3292
3293 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3294
3295 if (arn->arn_flowadv_obj != NULL) {
3296 struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3297
3298 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3299 /*
3300 * We cannot guarantee that the flow is still around by now,
3301 * so check if that's the case and let the caller know.
3302 */
3303 if ((suspend = (fae->fae_token == flow_token))) {
3304 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3305 fae->fae_flags |= FLOWADVF_SUSPENDED;
3306 }
3307 } else {
3308 suspend = false;
3309 }
3310 if (suspend) {
3311 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) flow token 0x%llu fidx %u "
3312 "SUSPEND", sk_proc_name_address(current_proc()),
3313 sk_proc_pid(current_proc()), flow_token, fe_idx);
3314 } else {
3315 SK_ERR("%s(%d) flow token 0x%llu fidx %u no longer around",
3316 sk_proc_name_address(current_proc()),
3317 sk_proc_pid(current_proc()), flow_token, fe_idx);
3318 }
3319
3320 AR_UNLOCK(ar);
3321
3322 return suspend;
3323}
3324
3325int
3326na_flowadv_clear(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3327 const flowadv_token_t flow_token)
3328{
3329 struct nexus_adapter *na = ch->ch_na;
3330 struct skmem_arena *ar = na->na_arena;
3331 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3332 boolean_t resume;
3333
3334 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3335 ASSERT(fe_idx < na->na_flowadv_max);
3336 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3337
3338 AR_LOCK(ar);
3339
3340 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3341
3342 if (arn->arn_flowadv_obj != NULL) {
3343 struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3344
3345 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3346 /*
3347 * We cannot guarantee that the flow is still around by now,
3348 * so check if that's the case and let the caller know.
3349 */
3350 if ((resume = (fae->fae_token == flow_token))) {
3351 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3352 fae->fae_flags &= ~FLOWADVF_SUSPENDED;
3353 }
3354 } else {
3355 resume = FALSE;
3356 }
3357 if (resume) {
3358 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d): flow token 0x%x "
3359 "fidx %u RESUME", ch->ch_name, ch->ch_pid, flow_token,
3360 fe_idx);
3361 } else {
3362 SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3363 ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3364 }
3365
3366 AR_UNLOCK(ar);
3367
3368 return resume;
3369}
3370
3371int
3372na_flowadv_report_ce_event(const struct kern_channel *ch, const flowadv_idx_t fe_idx,
3373 const flowadv_token_t flow_token, uint32_t ce_cnt, uint32_t total_pkt_cnt)
3374{
3375 struct nexus_adapter *na = ch->ch_na;
3376 struct skmem_arena *ar = na->na_arena;
3377 struct skmem_arena_nexus *arn = skmem_arena_nexus(ar);
3378 boolean_t added;
3379
3380 ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0));
3381 ASSERT(fe_idx < na->na_flowadv_max);
3382 ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS);
3383
3384 AR_LOCK(ar);
3385
3386 ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT));
3387
3388 if (arn->arn_flowadv_obj != NULL) {
3389 struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx];
3390
3391 _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token));
3392 /*
3393 * We cannot guarantee that the flow is still around by now,
3394 * so check if that's the case and let the caller know.
3395 */
3396 if ((added = (fae->fae_token == flow_token))) {
3397 ASSERT(fae->fae_flags & FLOWADVF_VALID);
3398 fae->fae_ce_cnt += ce_cnt;
3399 fae->fae_pkt_cnt += total_pkt_cnt;
3400 }
3401 } else {
3402 added = FALSE;
3403 }
3404 if (added) {
3405 SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d): flow token 0x%x "
3406 "fidx %u ce cnt incremented", ch->ch_name,
3407 ch->ch_pid, flow_token, fe_idx);
3408 } else {
3409 SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around",
3410 ch->ch_name, ch->ch_pid, flow_token, fe_idx);
3411 }
3412
3413 AR_UNLOCK(ar);
3414
3415 return added;
3416}
3417
3418void
3419na_flowadv_event(struct __kern_channel_ring *kring)
3420{
3421 ASSERT(kring->ckr_tx == NR_TX);
3422
3423 SK_DF(SK_VERB_EVENTS, "%s(%d) na \"%s\" (0x%llx) kr 0x%llx",
3424 sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()),
3425 KRNA(kring)->na_name, SK_KVA(KRNA(kring)), SK_KVA(kring));
3426
3427 na_post_event(kring, TRUE, FALSE, FALSE, CHAN_FILT_HINT_FLOW_ADV_UPD);
3428}
3429
3430static int
3431na_packet_pool_free_sync(struct __kern_channel_ring *kring, struct proc *p,
3432 uint32_t flags)
3433{
3434#pragma unused(flags, p)
3435 int n, ret = 0;
3436 slot_idx_t j;
3437 struct __kern_slot_desc *ksd;
3438 struct __user_slot_desc *usd;
3439 struct __kern_quantum *kqum;
3440 struct kern_pbufpool *pp = kring->ckr_pp;
3441 uint32_t nfree = 0;
3442
3443 /* packet pool list is protected by channel lock */
3444 ASSERT(!KR_KERNEL_ONLY(kring));
3445
3446 /* # of new slots */
3447 n = kring->ckr_rhead - kring->ckr_khead;
3448 if (n < 0) {
3449 n += kring->ckr_num_slots;
3450 }
3451
3452 /* nothing to free */
3453 if (__improbable(n == 0)) {
3454 SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3455 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3456 "nothing to free");
3457 goto done;
3458 }
3459
3460 j = kring->ckr_khead;
3461 PP_LOCK(pp);
3462 while (n--) {
3463 int err;
3464
3465 ksd = KR_KSD(kring, j);
3466 usd = KR_USD(kring, j);
3467
3468 if (__improbable(!SD_VALID_METADATA(usd))) {
3469 SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3470 ret = EINVAL;
3471 break;
3472 }
3473
3474 kqum = pp_remove_upp_locked(pp, usd->sd_md_idx, &err);
3475 if (__improbable(err != 0)) {
3476 SK_ERR("un-allocated packet or buflet %d %p",
3477 usd->sd_md_idx, SK_KVA(kqum));
3478 ret = EINVAL;
3479 break;
3480 }
3481
3482 /* detach and free the packet */
3483 kqum->qum_qflags &= ~QUM_F_FINALIZED;
3484 kqum->qum_ksd = NULL;
3485 ASSERT(!KSD_VALID_METADATA(ksd));
3486 USD_DETACH_METADATA(usd);
3487 ASSERT(pp == kqum->qum_pp);
3488 ASSERT(nfree < kring->ckr_num_slots);
3489 kring->ckr_scratch[nfree++] = (uint64_t)kqum;
3490 j = SLOT_NEXT(i: j, lim: kring->ckr_lim);
3491 }
3492 PP_UNLOCK(pp);
3493
3494 if (__probable(nfree > 0)) {
3495 pp_free_packet_batch(pp, &kring->ckr_scratch[0], nfree);
3496 }
3497
3498 kring->ckr_khead = j;
3499 kring->ckr_ktail = SLOT_PREV(i: j, lim: kring->ckr_lim);
3500
3501done:
3502 return ret;
3503}
3504
3505#define MAX_BUFLETS 64
3506static int
3507alloc_packets(kern_pbufpool_t pp, uint64_t *buf_arr, bool large, uint32_t *ph_cnt)
3508{
3509 int err;
3510 uint32_t need, need_orig, remain, alloced, i;
3511 uint64_t buflets[MAX_BUFLETS];
3512 uint64_t *pkts;
3513
3514 need_orig = *ph_cnt;
3515 err = kern_pbufpool_alloc_batch_nosleep(pbufpool: pp, bufcnt: large ? 0 : 1, array: buf_arr, size: ph_cnt);
3516 if (!large) {
3517 return err;
3518 }
3519 if (*ph_cnt == 0) {
3520 SK_ERR("failed to alloc %d packets for alloc ring: err %d",
3521 need_orig, err);
3522 DTRACE_SKYWALK2(alloc__pkts__fail, uint32_t, need_orig, int, err);
3523 return err;
3524 }
3525 need = remain = *ph_cnt;
3526 alloced = 0;
3527 pkts = buf_arr;
3528 while (remain > 0) {
3529 uint32_t cnt, cnt_orig;
3530
3531 cnt = MIN(remain, MAX_BUFLETS);
3532 cnt_orig = cnt;
3533 err = pp_alloc_buflet_batch(pp, array: buflets, size: &cnt, SKMEM_NOSLEEP, true);
3534 if (cnt == 0) {
3535 SK_ERR("failed to alloc %d buflets for alloc ring: "
3536 "remain %d, err %d", cnt_orig, remain, err);
3537 DTRACE_SKYWALK3(alloc__bufs__fail, uint32_t, cnt_orig,
3538 uint32_t, remain, int, err);
3539 break;
3540 }
3541 for (i = 0; i < cnt; i++) {
3542 kern_packet_t ph = (kern_packet_t)pkts[i];
3543 kern_buflet_t buf = (kern_buflet_t)buflets[i];
3544 kern_buflet_t pbuf = kern_packet_get_next_buflet(ph, NULL);
3545 VERIFY(kern_packet_add_buflet(ph, pbuf, buf) == 0);
3546 buflets[i] = 0;
3547 }
3548 DTRACE_SKYWALK3(alloc__bufs, uint32_t, remain, uint32_t, cnt,
3549 uint32_t, cnt_orig);
3550 pkts += cnt;
3551 alloced += cnt;
3552 remain -= cnt;
3553 }
3554 /* free packets without attached buffers */
3555 if (remain > 0) {
3556 DTRACE_SKYWALK1(remaining__pkts, uint32_t, remain);
3557 ASSERT(remain + alloced == need);
3558 pp_free_packet_batch(pp, pkts, remain);
3559
3560 /* pp_free_packet_batch() should clear the pkts array */
3561 for (i = 0; i < remain; i++) {
3562 ASSERT(pkts[i] == 0);
3563 }
3564 }
3565 *ph_cnt = alloced;
3566 if (*ph_cnt == 0) {
3567 err = ENOMEM;
3568 } else if (*ph_cnt < need_orig) {
3569 err = EAGAIN;
3570 } else {
3571 err = 0;
3572 }
3573 DTRACE_SKYWALK3(alloc__packets, uint32_t, need_orig, uint32_t, *ph_cnt, int, err);
3574 return err;
3575}
3576
3577static int
3578na_packet_pool_alloc_sync_common(struct __kern_channel_ring *kring, struct proc *p,
3579 uint32_t flags, bool large)
3580{
3581 int b, err;
3582 uint32_t n = 0;
3583 slot_idx_t j;
3584 uint64_t now;
3585 uint32_t curr_ws, ph_needed, ph_cnt;
3586 struct __kern_slot_desc *ksd;
3587 struct __user_slot_desc *usd;
3588 struct __kern_quantum *kqum;
3589 kern_pbufpool_t pp = kring->ckr_pp;
3590 pid_t pid = proc_pid(p);
3591
3592 /* packet pool list is protected by channel lock */
3593 ASSERT(!KR_KERNEL_ONLY(kring));
3594 ASSERT(!PP_KERNEL_ONLY(pp));
3595
3596 now = _net_uptime;
3597 if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3598 if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3599 kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3600 }
3601 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3602 "%s: purged curr_ws(%d)", kring->ckr_name,
3603 kring->ckr_alloc_ws);
3604 return 0;
3605 }
3606 /* reclaim the completed slots */
3607 kring->ckr_khead = kring->ckr_rhead;
3608
3609 /* # of busy (unclaimed) slots */
3610 b = kring->ckr_ktail - kring->ckr_khead;
3611 if (b < 0) {
3612 b += kring->ckr_num_slots;
3613 }
3614
3615 curr_ws = kring->ckr_alloc_ws;
3616 if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3617 /* increment the working set by 50% */
3618 curr_ws += (curr_ws >> 1);
3619 curr_ws = MIN(curr_ws, kring->ckr_lim);
3620 } else {
3621 if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3622 (uint32_t)b >= (curr_ws >> 2)) {
3623 /* decrease the working set by 25% */
3624 curr_ws -= (curr_ws >> 2);
3625 }
3626 }
3627 curr_ws = MAX(curr_ws, na_upp_alloc_lowat);
3628 if (curr_ws > (uint32_t)b) {
3629 n = curr_ws - b;
3630 }
3631 kring->ckr_alloc_ws = curr_ws;
3632 kring->ckr_sync_time = now;
3633
3634 /* min with # of avail free slots (subtract busy from max) */
3635 n = ph_needed = MIN(n, kring->ckr_lim - b);
3636 j = kring->ckr_ktail;
3637 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3638 "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3639
3640 if ((ph_cnt = ph_needed) == 0) {
3641 goto done;
3642 }
3643
3644 err = alloc_packets(pp, buf_arr: kring->ckr_scratch,
3645 PP_HAS_BUFFER_ON_DEMAND(pp) && large, ph_cnt: &ph_cnt);
3646 if (__improbable(ph_cnt == 0)) {
3647 SK_ERR("kr 0x%llx failed to alloc %u packet s(%d)",
3648 SK_KVA(kring), ph_needed, err);
3649 kring->ckr_err_stats.cres_pkt_alloc_failures += ph_needed;
3650 } else {
3651 /*
3652 * Add packets to the allocated list of user packet pool.
3653 */
3654 pp_insert_upp_batch(pp, pid, array: kring->ckr_scratch, num: ph_cnt);
3655 }
3656
3657 for (n = 0; n < ph_cnt; n++) {
3658 ksd = KR_KSD(kring, j);
3659 usd = KR_USD(kring, j);
3660
3661 kqum = SK_PTR_ADDR_KQUM(kring->ckr_scratch[n]);
3662 kring->ckr_scratch[n] = 0;
3663 ASSERT(kqum != NULL);
3664
3665 /* cleanup any stale slot mapping */
3666 KSD_RESET(ksd);
3667 ASSERT(usd != NULL);
3668 USD_RESET(usd);
3669
3670 /*
3671 * Since this packet is freshly allocated and we need to
3672 * have the flag set for the attach to succeed, just set
3673 * it here rather than calling __packet_finalize().
3674 */
3675 kqum->qum_qflags |= QUM_F_FINALIZED;
3676
3677 /* Attach packet to slot */
3678 KR_SLOT_ATTACH_METADATA(kring, ksd, kqum);
3679 /*
3680 * externalize the packet as it is being transferred to
3681 * user space.
3682 */
3683 kr_externalize_metadata(kring, pp->pp_max_frags, kqum, p);
3684
3685 j = SLOT_NEXT(i: j, lim: kring->ckr_lim);
3686 }
3687done:
3688 ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3689 kring->ckr_ktail = j;
3690 return 0;
3691}
3692
3693static int
3694na_packet_pool_alloc_sync(struct __kern_channel_ring *kring, struct proc *p,
3695 uint32_t flags)
3696{
3697 return na_packet_pool_alloc_sync_common(kring, p, flags, false);
3698}
3699
3700static int
3701na_packet_pool_alloc_large_sync(struct __kern_channel_ring *kring, struct proc *p,
3702 uint32_t flags)
3703{
3704 return na_packet_pool_alloc_sync_common(kring, p, flags, true);
3705}
3706
3707static int
3708na_packet_pool_free_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3709 uint32_t flags)
3710{
3711#pragma unused(flags, p)
3712 int n, ret = 0;
3713 slot_idx_t j;
3714 struct __kern_slot_desc *ksd;
3715 struct __user_slot_desc *usd;
3716 struct __kern_buflet *kbft;
3717 struct kern_pbufpool *pp = kring->ckr_pp;
3718
3719 /* packet pool list is protected by channel lock */
3720 ASSERT(!KR_KERNEL_ONLY(kring));
3721
3722 /* # of new slots */
3723 n = kring->ckr_rhead - kring->ckr_khead;
3724 if (n < 0) {
3725 n += kring->ckr_num_slots;
3726 }
3727
3728 /* nothing to free */
3729 if (__improbable(n == 0)) {
3730 SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s",
3731 sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name,
3732 "nothing to free");
3733 goto done;
3734 }
3735
3736 j = kring->ckr_khead;
3737 while (n--) {
3738 int err;
3739
3740 ksd = KR_KSD(kring, j);
3741 usd = KR_USD(kring, j);
3742
3743 if (__improbable(!SD_VALID_METADATA(usd))) {
3744 SK_ERR("bad slot %d 0x%llx", j, SK_KVA(ksd));
3745 ret = EINVAL;
3746 break;
3747 }
3748
3749 kbft = pp_remove_upp_bft(pp, usd->sd_md_idx, &err);
3750 if (__improbable(err != 0)) {
3751 SK_ERR("un-allocated buflet %d %p", usd->sd_md_idx,
3752 SK_KVA(kbft));
3753 ret = EINVAL;
3754 break;
3755 }
3756
3757 /* detach and free the packet */
3758 ASSERT(!KSD_VALID_METADATA(ksd));
3759 USD_DETACH_METADATA(usd);
3760 pp_free_buflet(pp, kbft);
3761 j = SLOT_NEXT(i: j, lim: kring->ckr_lim);
3762 }
3763 kring->ckr_khead = j;
3764 kring->ckr_ktail = SLOT_PREV(i: j, lim: kring->ckr_lim);
3765
3766done:
3767 return ret;
3768}
3769
3770static int
3771na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *kring, struct proc *p,
3772 uint32_t flags)
3773{
3774 int b, err;
3775 uint32_t n = 0;
3776 slot_idx_t j;
3777 uint64_t now;
3778 uint32_t curr_ws, bh_needed, bh_cnt;
3779 struct __kern_slot_desc *ksd;
3780 struct __user_slot_desc *usd;
3781 struct __kern_buflet *kbft;
3782 struct __kern_buflet_ext *kbe;
3783 kern_pbufpool_t pp = kring->ckr_pp;
3784 pid_t pid = proc_pid(p);
3785
3786 /* packet pool list is protected by channel lock */
3787 ASSERT(!KR_KERNEL_ONLY(kring));
3788 ASSERT(!PP_KERNEL_ONLY(pp));
3789
3790 now = _net_uptime;
3791 if ((flags & NA_SYNCF_UPP_PURGE) != 0) {
3792 if (now - kring->ckr_sync_time >= na_upp_reap_interval) {
3793 kring->ckr_alloc_ws = na_upp_reap_min_pkts;
3794 }
3795 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3796 "%s: purged curr_ws(%d)", kring->ckr_name,
3797 kring->ckr_alloc_ws);
3798 return 0;
3799 }
3800 /* reclaim the completed slots */
3801 kring->ckr_khead = kring->ckr_rhead;
3802
3803 /* # of busy (unclaimed) slots */
3804 b = kring->ckr_ktail - kring->ckr_khead;
3805 if (b < 0) {
3806 b += kring->ckr_num_slots;
3807 }
3808
3809 curr_ws = kring->ckr_alloc_ws;
3810 if (flags & NA_SYNCF_FORCE_UPP_SYNC) {
3811 /* increment the working set by 50% */
3812 curr_ws += (curr_ws >> 1);
3813 curr_ws = MIN(curr_ws, kring->ckr_lim);
3814 } else {
3815 if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) &&
3816 (uint32_t)b >= (curr_ws >> 2)) {
3817 /* decrease the working set by 25% */
3818 curr_ws -= (curr_ws >> 2);
3819 }
3820 }
3821 curr_ws = MAX(curr_ws, na_upp_alloc_buf_lowat);
3822 if (curr_ws > (uint32_t)b) {
3823 n = curr_ws - b;
3824 }
3825 kring->ckr_alloc_ws = curr_ws;
3826 kring->ckr_sync_time = now;
3827
3828 /* min with # of avail free slots (subtract busy from max) */
3829 n = bh_needed = MIN(n, kring->ckr_lim - b);
3830 j = kring->ckr_ktail;
3831 SK_DF(SK_VERB_MEM | SK_VERB_SYNC,
3832 "%s: curr_ws(%d), n(%d)", kring->ckr_name, curr_ws, n);
3833
3834 if ((bh_cnt = bh_needed) == 0) {
3835 goto done;
3836 }
3837
3838 err = pp_alloc_buflet_batch(pp, array: kring->ckr_scratch, size: &bh_cnt,
3839 SKMEM_NOSLEEP, false);
3840
3841 if (bh_cnt == 0) {
3842 SK_ERR("kr 0x%llx failed to alloc %u buflets(%d)",
3843 SK_KVA(kring), bh_needed, err);
3844 kring->ckr_err_stats.cres_pkt_alloc_failures += bh_needed;
3845 }
3846
3847 for (n = 0; n < bh_cnt; n++) {
3848 struct __user_buflet *ubft;
3849
3850 ksd = KR_KSD(kring, j);
3851 usd = KR_USD(kring, j);
3852
3853 kbft = (struct __kern_buflet *)(kring->ckr_scratch[n]);
3854 kbe = (struct __kern_buflet_ext *)kbft;
3855 kring->ckr_scratch[n] = 0;
3856 ASSERT(kbft != NULL);
3857
3858 /*
3859 * Add buflet to the allocated list of user packet pool.
3860 */
3861 pp_insert_upp_bft(pp, kbft, pid);
3862
3863 /*
3864 * externalize the buflet as it is being transferred to
3865 * user space.
3866 */
3867 ubft = __DECONST(struct __user_buflet *, kbe->kbe_buf_user);
3868 KBUF_EXTERNALIZE(kbft, ubft, pp);
3869
3870 /* cleanup any stale slot mapping */
3871 KSD_RESET(ksd);
3872 ASSERT(usd != NULL);
3873 USD_RESET(usd);
3874
3875 /* Attach buflet to slot */
3876 KR_SLOT_ATTACH_BUF_METADATA(kring, ksd, kbuf: kbft);
3877
3878 j = SLOT_NEXT(i: j, lim: kring->ckr_lim);
3879 }
3880done:
3881 ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail);
3882 kring->ckr_ktail = j;
3883 return 0;
3884}
3885
3886/* The caller needs to ensure that the NA stays intact */
3887void
3888na_drain(struct nexus_adapter *na, boolean_t purge)
3889{
3890 /* will be cleared on next channel sync */
3891 if (!(os_atomic_or_orig(&na->na_flags, NAF_DRAINING, relaxed) &
3892 NAF_DRAINING) && NA_IS_ACTIVE(na)) {
3893 SK_DF(SK_VERB_NA, "%s: %s na 0x%llx flags %b",
3894 na->na_name, (purge ? "purging" : "pruning"),
3895 SK_KVA(na), na->na_flags, NAF_BITS);
3896
3897 /* reap (purge/prune) caches in the arena */
3898 skmem_arena_reap(na->na_arena, purge);
3899 }
3900}
3901