1 | /* |
2 | * Copyright (c) 2015-2023 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | /* |
30 | * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. |
31 | * All rights reserved. |
32 | * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. |
33 | * |
34 | * Redistribution and use in source and binary forms, with or without |
35 | * modification, are permitted provided that the following conditions |
36 | * are met: |
37 | * 1. Redistributions of source code must retain the above copyright |
38 | * notice, this list of conditions and the following disclaimer. |
39 | * 2. Redistributions in binary form must reproduce the above copyright |
40 | * notice, this list of conditions and the following disclaimer in the |
41 | * documentation and/or other materials provided with the distribution. |
42 | * |
43 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
44 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
45 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
46 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
47 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
48 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
49 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
50 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
51 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
52 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
53 | * SUCH DAMAGE. |
54 | */ |
55 | #include <sys/systm.h> |
56 | #include <skywalk/os_skywalk_private.h> |
57 | #include <skywalk/nexus/monitor/nx_monitor.h> |
58 | #include <skywalk/nexus/flowswitch/nx_flowswitch.h> |
59 | #include <skywalk/nexus/netif/nx_netif.h> |
60 | #include <skywalk/nexus/upipe/nx_user_pipe.h> |
61 | #include <skywalk/nexus/kpipe/nx_kernel_pipe.h> |
62 | #include <kern/thread.h> |
63 | |
64 | static int na_krings_use(struct kern_channel *); |
65 | static void na_krings_unuse(struct kern_channel *); |
66 | static void na_krings_verify(struct nexus_adapter *); |
67 | static int na_notify(struct __kern_channel_ring *, struct proc *, uint32_t); |
68 | static void na_set_ring(struct nexus_adapter *, uint32_t, enum txrx, uint32_t); |
69 | static void na_set_all_rings(struct nexus_adapter *, uint32_t); |
70 | static int na_set_ringid(struct kern_channel *, ring_set_t, ring_id_t); |
71 | static void na_unset_ringid(struct kern_channel *); |
72 | static void na_teardown(struct nexus_adapter *, struct kern_channel *, |
73 | boolean_t); |
74 | |
75 | static int na_kr_create(struct nexus_adapter *, boolean_t); |
76 | static void na_kr_delete(struct nexus_adapter *); |
77 | static int na_kr_setup(struct nexus_adapter *, struct kern_channel *); |
78 | static void na_kr_teardown_all(struct nexus_adapter *, struct kern_channel *, |
79 | boolean_t); |
80 | static void na_kr_teardown_txrx(struct nexus_adapter *, struct kern_channel *, |
81 | boolean_t, struct proc *); |
82 | static int na_kr_populate_slots(struct __kern_channel_ring *); |
83 | static void na_kr_depopulate_slots(struct __kern_channel_ring *, |
84 | struct kern_channel *, boolean_t defunct); |
85 | |
86 | static int na_schema_alloc(struct kern_channel *); |
87 | |
88 | static struct nexus_adapter *na_pseudo_alloc(zalloc_flags_t); |
89 | static void na_pseudo_free(struct nexus_adapter *); |
90 | static int na_pseudo_txsync(struct __kern_channel_ring *, struct proc *, |
91 | uint32_t); |
92 | static int na_pseudo_rxsync(struct __kern_channel_ring *, struct proc *, |
93 | uint32_t); |
94 | static int na_pseudo_activate(struct nexus_adapter *, na_activate_mode_t); |
95 | static void na_pseudo_dtor(struct nexus_adapter *); |
96 | static int na_pseudo_krings_create(struct nexus_adapter *, |
97 | struct kern_channel *); |
98 | static void na_pseudo_krings_delete(struct nexus_adapter *, |
99 | struct kern_channel *, boolean_t); |
100 | static int na_packet_pool_alloc_sync(struct __kern_channel_ring *, |
101 | struct proc *, uint32_t); |
102 | static int na_packet_pool_alloc_large_sync(struct __kern_channel_ring *, |
103 | struct proc *, uint32_t); |
104 | static int na_packet_pool_free_sync(struct __kern_channel_ring *, |
105 | struct proc *, uint32_t); |
106 | static int na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *, |
107 | struct proc *, uint32_t); |
108 | static int na_packet_pool_free_buf_sync(struct __kern_channel_ring *, |
109 | struct proc *, uint32_t); |
110 | |
111 | #define NA_KRING_IDLE_TIMEOUT (NSEC_PER_SEC * 30) /* 30 seconds */ |
112 | |
113 | static SKMEM_TYPE_DEFINE(na_pseudo_zone, struct nexus_adapter); |
114 | |
115 | static int __na_inited = 0; |
116 | |
117 | #define NA_NUM_WMM_CLASSES 4 |
118 | #define NAKR_WMM_SC2RINGID(_s) PKT_SC2TC(_s) |
119 | #define NAKR_SET_SVC_LUT(_n, _s) \ |
120 | (_n)->na_kring_svc_lut[MBUF_SCIDX(_s)] = NAKR_WMM_SC2RINGID(_s) |
121 | #define NAKR_SET_KR_SVC(_n, _s) \ |
122 | NAKR((_n), NR_TX)[NAKR_WMM_SC2RINGID(_s)].ckr_svc = (_s) |
123 | |
124 | #define NA_UPP_ALLOC_LOWAT 8 |
125 | static uint32_t na_upp_alloc_lowat = NA_UPP_ALLOC_LOWAT; |
126 | |
127 | #define NA_UPP_REAP_INTERVAL 10 /* seconds */ |
128 | static uint32_t na_upp_reap_interval = NA_UPP_REAP_INTERVAL; |
129 | |
130 | #define NA_UPP_WS_HOLD_TIME 2 /* seconds */ |
131 | static uint32_t na_upp_ws_hold_time = NA_UPP_WS_HOLD_TIME; |
132 | |
133 | #define NA_UPP_REAP_MIN_PKTS 0 |
134 | static uint32_t na_upp_reap_min_pkts = NA_UPP_REAP_MIN_PKTS; |
135 | |
136 | #define NA_UPP_ALLOC_BUF_LOWAT 64 |
137 | static uint32_t na_upp_alloc_buf_lowat = NA_UPP_ALLOC_BUF_LOWAT; |
138 | |
139 | #if (DEVELOPMENT || DEBUG) |
140 | static uint64_t _na_inject_error = 0; |
141 | #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) \ |
142 | _SK_INJECT_ERROR(_na_inject_error, _en, _ev, _ec, NULL, _f, __VA_ARGS__) |
143 | |
144 | SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_ws_hold_time, |
145 | CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_ws_hold_time, |
146 | NA_UPP_WS_HOLD_TIME, "" ); |
147 | SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_interval, |
148 | CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_interval, |
149 | NA_UPP_REAP_INTERVAL, "" ); |
150 | SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_reap_min_pkts, |
151 | CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_reap_min_pkts, |
152 | NA_UPP_REAP_MIN_PKTS, "" ); |
153 | SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_lowat, |
154 | CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_lowat, |
155 | NA_UPP_ALLOC_LOWAT, "" ); |
156 | SYSCTL_UINT(_kern_skywalk, OID_AUTO, na_upp_alloc_buf_lowat, |
157 | CTLFLAG_RW | CTLFLAG_LOCKED, &na_upp_alloc_buf_lowat, |
158 | NA_UPP_ALLOC_BUF_LOWAT, "" ); |
159 | SYSCTL_QUAD(_kern_skywalk, OID_AUTO, na_inject_error, |
160 | CTLFLAG_RW | CTLFLAG_LOCKED, &_na_inject_error, "" ); |
161 | #else |
162 | #define _NA_INJECT_ERROR(_en, _ev, _ec, _f, ...) do { } while (0) |
163 | #endif /* !DEVELOPMENT && !DEBUG */ |
164 | |
165 | #define SKMEM_TAG_NX_RINGS "com.apple.skywalk.nexus.rings" |
166 | static SKMEM_TAG_DEFINE(skmem_tag_nx_rings, SKMEM_TAG_NX_RINGS); |
167 | |
168 | #define SKMEM_TAG_NX_CONTEXTS "com.apple.skywalk.nexus.contexts" |
169 | static SKMEM_TAG_DEFINE(skmem_tag_nx_contexts, SKMEM_TAG_NX_CONTEXTS); |
170 | |
171 | #define SKMEM_TAG_NX_SCRATCH "com.apple.skywalk.nexus.scratch" |
172 | static SKMEM_TAG_DEFINE(skmem_tag_nx_scratch, SKMEM_TAG_NX_SCRATCH); |
173 | |
174 | #if !XNU_TARGET_OS_OSX |
175 | /* see KLDBootstrap::readPrelinkedExtensions() for details */ |
176 | extern uuid_t kernelcache_uuid; |
177 | #else /* XNU_TARGET_OS_OSX */ |
178 | /* see panic_init() for details */ |
179 | extern unsigned char *kernel_uuid; |
180 | #endif /* XNU_TARGET_OS_OSX */ |
181 | |
182 | void |
183 | na_init(void) |
184 | { |
185 | /* |
186 | * Changing the size of nexus_mdata structure won't break ABI, |
187 | * but we need to be mindful of memory consumption; Thus here |
188 | * we add a compile-time check to make sure the size is within |
189 | * the expected limit and that it's properly aligned. This |
190 | * check may be adjusted in future as needed. |
191 | */ |
192 | _CASSERT(sizeof(struct nexus_mdata) <= 32 && |
193 | IS_P2ALIGNED(sizeof(struct nexus_mdata), 8)); |
194 | _CASSERT(sizeof(struct nexus_mdata) <= sizeof(struct __user_quantum)); |
195 | |
196 | /* see comments on nexus_meta_type_t */ |
197 | _CASSERT(NEXUS_META_TYPE_MAX == 3); |
198 | _CASSERT(NEXUS_META_SUBTYPE_MAX == 3); |
199 | |
200 | ASSERT(!__na_inited); |
201 | |
202 | __na_inited = 1; |
203 | } |
204 | |
205 | void |
206 | na_fini(void) |
207 | { |
208 | if (__na_inited) { |
209 | __na_inited = 0; |
210 | } |
211 | } |
212 | |
213 | /* |
214 | * Interpret the ringid of an chreq, by translating it into a pair |
215 | * of intervals of ring indices: |
216 | * |
217 | * [txfirst, txlast) and [rxfirst, rxlast) |
218 | */ |
219 | int |
220 | na_interp_ringid(struct nexus_adapter *na, ring_id_t ring_id, |
221 | ring_set_t ring_set, uint32_t first[NR_TXRX], uint32_t last[NR_TXRX]) |
222 | { |
223 | enum txrx t; |
224 | |
225 | switch (ring_set) { |
226 | case RING_SET_ALL: |
227 | /* |
228 | * Ring pair eligibility: all ring(s). |
229 | */ |
230 | if (ring_id != CHANNEL_RING_ID_ANY && |
231 | ring_id >= na_get_nrings(na, t: NR_TX) && |
232 | ring_id >= na_get_nrings(na, t: NR_RX)) { |
233 | SK_ERR("\"%s\": invalid ring_id %d for ring_set %u" , |
234 | na->na_name, (int)ring_id, ring_set); |
235 | return EINVAL; |
236 | } |
237 | for_rx_tx(t) { |
238 | if (ring_id == CHANNEL_RING_ID_ANY) { |
239 | first[t] = 0; |
240 | last[t] = na_get_nrings(na, t); |
241 | } else { |
242 | first[t] = ring_id; |
243 | last[t] = ring_id + 1; |
244 | } |
245 | } |
246 | break; |
247 | |
248 | default: |
249 | SK_ERR("\"%s\": invalid ring_set %u" , na->na_name, ring_set); |
250 | return EINVAL; |
251 | } |
252 | |
253 | SK_DF(SK_VERB_NA | SK_VERB_RING, |
254 | "\"%s\": ring_id %d, ring_set %u tx [%u,%u) rx [%u,%u)" , |
255 | na->na_name, (int)ring_id, ring_set, first[NR_TX], last[NR_TX], |
256 | first[NR_RX], last[NR_RX]); |
257 | |
258 | return 0; |
259 | } |
260 | |
261 | /* |
262 | * Set the ring ID. For devices with a single queue, a request |
263 | * for all rings is the same as a single ring. |
264 | */ |
265 | static int |
266 | na_set_ringid(struct kern_channel *ch, ring_set_t ring_set, ring_id_t ring_id) |
267 | { |
268 | struct nexus_adapter *na = ch->ch_na; |
269 | int error; |
270 | enum txrx t; |
271 | uint32_t n_alloc_rings; |
272 | |
273 | if ((error = na_interp_ringid(na, ring_id, ring_set, |
274 | first: ch->ch_first, last: ch->ch_last)) != 0) { |
275 | return error; |
276 | } |
277 | |
278 | n_alloc_rings = na_get_nrings(na, t: NR_A); |
279 | if (n_alloc_rings != 0) { |
280 | uint32_t n_large_alloc_rings; |
281 | |
282 | ch->ch_first[NR_A] = ch->ch_first[NR_F] = 0; |
283 | ch->ch_last[NR_A] = ch->ch_last[NR_F] = |
284 | ch->ch_first[NR_A] + n_alloc_rings; |
285 | |
286 | n_large_alloc_rings = na_get_nrings(na, t: NR_LBA); |
287 | ch->ch_first[NR_LBA] = 0; |
288 | ch->ch_last[NR_LBA] = ch->ch_first[NR_LBA] + n_large_alloc_rings; |
289 | } else { |
290 | ch->ch_first[NR_A] = ch->ch_last[NR_A] = 0; |
291 | ch->ch_first[NR_F] = ch->ch_last[NR_F] = 0; |
292 | ch->ch_first[NR_LBA] = ch->ch_last[NR_LBA] = 0; |
293 | } |
294 | ch->ch_first[NR_EV] = 0; |
295 | ch->ch_last[NR_EV] = ch->ch_first[NR_EV] + na_get_nrings(na, t: NR_EV); |
296 | |
297 | /* XXX: should we initialize na_si_users for event ring ? */ |
298 | |
299 | /* |
300 | * Optimization: count the users registered for more than |
301 | * one ring, which are the ones sleeping on the global queue. |
302 | * The default na_notify() callback will then avoid signaling |
303 | * the global queue if nobody is using it |
304 | */ |
305 | for_rx_tx(t) { |
306 | if (ch_is_multiplex(ch, t)) { |
307 | na->na_si_users[t]++; |
308 | ASSERT(na->na_si_users[t] != 0); |
309 | } |
310 | } |
311 | return 0; |
312 | } |
313 | |
314 | static void |
315 | na_unset_ringid(struct kern_channel *ch) |
316 | { |
317 | struct nexus_adapter *na = ch->ch_na; |
318 | enum txrx t; |
319 | |
320 | for_rx_tx(t) { |
321 | if (ch_is_multiplex(ch, t)) { |
322 | ASSERT(na->na_si_users[t] != 0); |
323 | na->na_si_users[t]--; |
324 | } |
325 | ch->ch_first[t] = ch->ch_last[t] = 0; |
326 | } |
327 | } |
328 | |
329 | /* |
330 | * Check that the rings we want to bind are not exclusively owned by a previous |
331 | * bind. If exclusive ownership has been requested, we also mark the rings. |
332 | */ |
333 | /* Hoisted out of line to reduce kernel stack footprint */ |
334 | SK_NO_INLINE_ATTRIBUTE |
335 | static int |
336 | na_krings_use(struct kern_channel *ch) |
337 | { |
338 | struct nexus_adapter *na = ch->ch_na; |
339 | struct __kern_channel_ring *kring; |
340 | boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE); |
341 | enum txrx t; |
342 | uint32_t i; |
343 | |
344 | SK_DF(SK_VERB_NA | SK_VERB_RING, "na \"%s\" (0x%llx) grabbing tx [%u,%u) rx [%u,%u)" , |
345 | na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX], |
346 | ch->ch_first[NR_RX], ch->ch_last[NR_RX]); |
347 | |
348 | /* |
349 | * First round: check that all the requested rings |
350 | * are neither alread exclusively owned, nor we |
351 | * want exclusive ownership when they are already in use |
352 | */ |
353 | for_all_rings(t) { |
354 | for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) { |
355 | kring = &NAKR(na, t)[i]; |
356 | if ((kring->ckr_flags & CKRF_EXCLUSIVE) || |
357 | (kring->ckr_users && excl)) { |
358 | SK_DF(SK_VERB_NA | SK_VERB_RING, |
359 | "kr \"%s\" (0x%llx) krflags 0x%b is busy" , |
360 | kring->ckr_name, SK_KVA(kring), |
361 | kring->ckr_flags, CKRF_BITS); |
362 | return EBUSY; |
363 | } |
364 | } |
365 | } |
366 | |
367 | /* |
368 | * Second round: increment usage count and possibly |
369 | * mark as exclusive |
370 | */ |
371 | |
372 | for_all_rings(t) { |
373 | for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) { |
374 | kring = &NAKR(na, t)[i]; |
375 | kring->ckr_users++; |
376 | if (excl) { |
377 | kring->ckr_flags |= CKRF_EXCLUSIVE; |
378 | } |
379 | } |
380 | } |
381 | |
382 | return 0; |
383 | } |
384 | |
385 | /* Hoisted out of line to reduce kernel stack footprint */ |
386 | SK_NO_INLINE_ATTRIBUTE |
387 | static void |
388 | na_krings_unuse(struct kern_channel *ch) |
389 | { |
390 | struct nexus_adapter *na = ch->ch_na; |
391 | struct __kern_channel_ring *kring; |
392 | boolean_t excl = !!(ch->ch_flags & CHANF_EXCLUSIVE); |
393 | enum txrx t; |
394 | uint32_t i; |
395 | |
396 | SK_DF(SK_VERB_NA | SK_VERB_RING, |
397 | "na \"%s\" (0x%llx) releasing tx [%u, %u) rx [%u, %u)" , |
398 | na->na_name, SK_KVA(na), ch->ch_first[NR_TX], ch->ch_last[NR_TX], |
399 | ch->ch_first[NR_RX], ch->ch_last[NR_RX]); |
400 | |
401 | for_all_rings(t) { |
402 | for (i = ch->ch_first[t]; i < ch->ch_last[t]; i++) { |
403 | kring = &NAKR(na, t)[i]; |
404 | if (excl) { |
405 | kring->ckr_flags &= ~CKRF_EXCLUSIVE; |
406 | } |
407 | kring->ckr_users--; |
408 | } |
409 | } |
410 | } |
411 | |
412 | /* Hoisted out of line to reduce kernel stack footprint */ |
413 | SK_NO_INLINE_ATTRIBUTE |
414 | static void |
415 | na_krings_verify(struct nexus_adapter *na) |
416 | { |
417 | struct __kern_channel_ring *kring; |
418 | enum txrx t; |
419 | uint32_t i; |
420 | |
421 | for_all_rings(t) { |
422 | for (i = 0; i < na_get_nrings(na, t); i++) { |
423 | kring = &NAKR(na, t)[i]; |
424 | /* na_kr_create() validations */ |
425 | ASSERT(kring->ckr_num_slots > 0); |
426 | ASSERT(kring->ckr_lim == (kring->ckr_num_slots - 1)); |
427 | ASSERT(kring->ckr_pp != NULL); |
428 | |
429 | if (!(kring->ckr_flags & CKRF_MEM_RING_INITED)) { |
430 | continue; |
431 | } |
432 | /* na_kr_setup() validations */ |
433 | if (KR_KERNEL_ONLY(kring)) { |
434 | ASSERT(kring->ckr_ring == NULL); |
435 | } else { |
436 | ASSERT(kring->ckr_ring != NULL); |
437 | } |
438 | ASSERT(kring->ckr_ksds_last == |
439 | &kring->ckr_ksds[kring->ckr_lim]); |
440 | } |
441 | } |
442 | } |
443 | |
444 | int |
445 | na_bind_channel(struct nexus_adapter *na, struct kern_channel *ch, |
446 | struct chreq *chr) |
447 | { |
448 | struct kern_pbufpool *rx_pp = skmem_arena_nexus(ar: na->na_arena)->arn_rx_pp; |
449 | struct kern_pbufpool *tx_pp = skmem_arena_nexus(ar: na->na_arena)->arn_tx_pp; |
450 | uint32_t ch_mode = chr->cr_mode; |
451 | int err = 0; |
452 | |
453 | SK_LOCK_ASSERT_HELD(); |
454 | ASSERT(ch->ch_schema == NULL); |
455 | ASSERT(ch->ch_na == NULL); |
456 | |
457 | /* ring configuration may have changed, fetch from the card */ |
458 | na_update_config(na); |
459 | ch->ch_na = na; /* store the reference */ |
460 | err = na_set_ringid(ch, ring_set: chr->cr_ring_set, ring_id: chr->cr_ring_id); |
461 | if (err != 0) { |
462 | goto err; |
463 | } |
464 | |
465 | os_atomic_andnot(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE | |
466 | CHANF_USER_PACKET_POOL | CHANF_EVENT_RING), relaxed); |
467 | if (ch_mode & CHMODE_EXCLUSIVE) { |
468 | os_atomic_or(&ch->ch_flags, CHANF_EXCLUSIVE, relaxed); |
469 | } |
470 | /* |
471 | * Disallow automatic sync for monitor mode, since TX |
472 | * direction is disabled. |
473 | */ |
474 | if (ch_mode & CHMODE_MONITOR) { |
475 | os_atomic_or(&ch->ch_flags, CHANF_RXONLY, relaxed); |
476 | } |
477 | |
478 | if (!!(na->na_flags & NAF_USER_PKT_POOL) ^ |
479 | !!(ch_mode & CHMODE_USER_PACKET_POOL)) { |
480 | SK_ERR("incompatible channel mode (0x%b), na_flags (0x%b)" , |
481 | ch_mode, CHMODE_BITS, na->na_flags, NAF_BITS); |
482 | err = EINVAL; |
483 | goto err; |
484 | } |
485 | |
486 | if (na->na_arena->ar_flags & ARF_DEFUNCT) { |
487 | err = ENXIO; |
488 | goto err; |
489 | } |
490 | |
491 | if (ch_mode & CHMODE_USER_PACKET_POOL) { |
492 | ASSERT(na->na_flags & NAF_USER_PKT_POOL); |
493 | ASSERT(ch->ch_first[NR_A] != ch->ch_last[NR_A]); |
494 | ASSERT(ch->ch_first[NR_F] != ch->ch_last[NR_F]); |
495 | os_atomic_or(&ch->ch_flags, CHANF_USER_PACKET_POOL, relaxed); |
496 | } |
497 | |
498 | if (ch_mode & CHMODE_EVENT_RING) { |
499 | ASSERT(na->na_flags & NAF_USER_PKT_POOL); |
500 | ASSERT(na->na_flags & NAF_EVENT_RING); |
501 | ASSERT(ch->ch_first[NR_EV] != ch->ch_last[NR_EV]); |
502 | os_atomic_or(&ch->ch_flags, CHANF_EVENT_RING, relaxed); |
503 | } |
504 | |
505 | /* |
506 | * If this is the first channel of the adapter, create |
507 | * the rings and their in-kernel view, the krings. |
508 | */ |
509 | if (na->na_channels == 0) { |
510 | err = na->na_krings_create(na, ch); |
511 | if (err != 0) { |
512 | goto err; |
513 | } |
514 | |
515 | /* |
516 | * Sanity check; this is already done in na_kr_create(), |
517 | * but we do it here as well to validate na_kr_setup(). |
518 | */ |
519 | na_krings_verify(na); |
520 | *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type = |
521 | skmem_arena_nexus(ar: na->na_arena)->arn_rx_pp->pp_md_type; |
522 | *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype = |
523 | skmem_arena_nexus(ar: na->na_arena)->arn_rx_pp->pp_md_subtype; |
524 | } |
525 | |
526 | /* |
527 | * Validate ownership and usability of the krings; take into account |
528 | * whether some previous bind has exclusive ownership on them. |
529 | */ |
530 | err = na_krings_use(ch); |
531 | if (err != 0) { |
532 | goto err_del_rings; |
533 | } |
534 | |
535 | /* for user-facing channel, create a new channel schema */ |
536 | if (!(ch->ch_flags & CHANF_KERNEL)) { |
537 | err = na_schema_alloc(ch); |
538 | if (err != 0) { |
539 | goto err_rel_excl; |
540 | } |
541 | |
542 | ASSERT(ch->ch_schema != NULL); |
543 | ASSERT(ch->ch_schema_offset != (mach_vm_offset_t)-1); |
544 | } else { |
545 | ASSERT(ch->ch_schema == NULL); |
546 | ch->ch_schema_offset = (mach_vm_offset_t)-1; |
547 | } |
548 | |
549 | /* update our work timestamp */ |
550 | na->na_work_ts = net_uptime(); |
551 | |
552 | na->na_channels++; |
553 | |
554 | /* |
555 | * If user packet pool is desired, initialize the allocated |
556 | * object hash table in the pool, if not already. This also |
557 | * retains a refcnt on the pool which the caller must release. |
558 | */ |
559 | ASSERT(ch->ch_pp == NULL); |
560 | if (ch_mode & CHMODE_USER_PACKET_POOL) { |
561 | #pragma unused(tx_pp) |
562 | ASSERT(rx_pp == tx_pp); |
563 | err = pp_init_upp(rx_pp, TRUE); |
564 | if (err != 0) { |
565 | goto err_free_schema; |
566 | } |
567 | ch->ch_pp = rx_pp; |
568 | } |
569 | |
570 | if (!NA_IS_ACTIVE(na)) { |
571 | err = na->na_activate(na, NA_ACTIVATE_MODE_ON); |
572 | if (err != 0) { |
573 | goto err_release_pp; |
574 | } |
575 | |
576 | SK_D("activated \"%s\" adapter 0x%llx" , na->na_name, |
577 | SK_KVA(na)); |
578 | SK_D(" na_md_type: %u" , na->na_md_type); |
579 | SK_D(" na_md_subtype: %u" , na->na_md_subtype); |
580 | } |
581 | |
582 | SK_D("ch 0x%llx" , SK_KVA(ch)); |
583 | SK_D(" ch_flags: 0x%b" , ch->ch_flags, CHANF_BITS); |
584 | if (ch->ch_schema != NULL) { |
585 | SK_D(" ch_schema: 0x%llx" , SK_KVA(ch->ch_schema)); |
586 | } |
587 | SK_D(" ch_na: 0x%llx (chcnt %u)" , SK_KVA(ch->ch_na), |
588 | ch->ch_na->na_channels); |
589 | SK_D(" ch_tx_rings: [%u,%u)" , ch->ch_first[NR_TX], |
590 | ch->ch_last[NR_TX]); |
591 | SK_D(" ch_rx_rings: [%u,%u)" , ch->ch_first[NR_RX], |
592 | ch->ch_last[NR_RX]); |
593 | SK_D(" ch_alloc_rings: [%u,%u)" , ch->ch_first[NR_A], |
594 | ch->ch_last[NR_A]); |
595 | SK_D(" ch_free_rings: [%u,%u)" , ch->ch_first[NR_F], |
596 | ch->ch_last[NR_F]); |
597 | SK_D(" ch_ev_rings: [%u,%u)" , ch->ch_first[NR_EV], |
598 | ch->ch_last[NR_EV]); |
599 | |
600 | return 0; |
601 | |
602 | err_release_pp: |
603 | if (ch_mode & CHMODE_USER_PACKET_POOL) { |
604 | ASSERT(ch->ch_pp != NULL); |
605 | pp_release(rx_pp); |
606 | ch->ch_pp = NULL; |
607 | } |
608 | err_free_schema: |
609 | *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type = |
610 | NEXUS_META_TYPE_INVALID; |
611 | *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype = |
612 | NEXUS_META_SUBTYPE_INVALID; |
613 | ASSERT(na->na_channels != 0); |
614 | na->na_channels--; |
615 | if (ch->ch_schema != NULL) { |
616 | skmem_cache_free( |
617 | skmem_arena_nexus(ar: na->na_arena)->arn_schema_cache, |
618 | ch->ch_schema); |
619 | ch->ch_schema = NULL; |
620 | ch->ch_schema_offset = (mach_vm_offset_t)-1; |
621 | } |
622 | err_rel_excl: |
623 | na_krings_unuse(ch); |
624 | err_del_rings: |
625 | if (na->na_channels == 0) { |
626 | na->na_krings_delete(na, ch, FALSE); |
627 | } |
628 | err: |
629 | ch->ch_na = NULL; |
630 | ASSERT(err != 0); |
631 | |
632 | return err; |
633 | } |
634 | |
635 | /* |
636 | * Undo everything that was done in na_bind_channel(). |
637 | */ |
638 | /* call with SK_LOCK held */ |
639 | void |
640 | na_unbind_channel(struct kern_channel *ch) |
641 | { |
642 | struct nexus_adapter *na = ch->ch_na; |
643 | |
644 | SK_LOCK_ASSERT_HELD(); |
645 | |
646 | ASSERT(na->na_channels != 0); |
647 | na->na_channels--; |
648 | |
649 | /* release exclusive use if it was requested at bind time */ |
650 | na_krings_unuse(ch); |
651 | |
652 | if (na->na_channels == 0) { /* last instance */ |
653 | SK_D("%s(%d): deleting last channel instance for %s" , |
654 | ch->ch_name, ch->ch_pid, na->na_name); |
655 | |
656 | /* |
657 | * Free any remaining allocated packets attached to |
658 | * the slots, followed by a teardown of the arena. |
659 | */ |
660 | na_teardown(na, ch, FALSE); |
661 | |
662 | *(nexus_meta_type_t *)(uintptr_t)&na->na_md_type = |
663 | NEXUS_META_TYPE_INVALID; |
664 | *(nexus_meta_subtype_t *)(uintptr_t)&na->na_md_subtype = |
665 | NEXUS_META_SUBTYPE_INVALID; |
666 | } else { |
667 | SK_D("%s(%d): %s has %u remaining channel instance(s)" , |
668 | ch->ch_name, ch->ch_pid, na->na_name, na->na_channels); |
669 | } |
670 | |
671 | /* |
672 | * Free any allocated packets (for the process) attached to the slots; |
673 | * note that na_teardown() could have done this there as well. |
674 | */ |
675 | if (ch->ch_pp != NULL) { |
676 | ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL); |
677 | pp_purge_upp(ch->ch_pp, ch->ch_pid); |
678 | pp_release(ch->ch_pp); |
679 | ch->ch_pp = NULL; |
680 | } |
681 | |
682 | /* possibily decrement counter of tx_si/rx_si users */ |
683 | na_unset_ringid(ch); |
684 | |
685 | /* reap the caches now (purge if adapter is idle) */ |
686 | skmem_arena_reap(na->na_arena, (na->na_channels == 0)); |
687 | |
688 | /* delete the csm */ |
689 | if (ch->ch_schema != NULL) { |
690 | skmem_cache_free( |
691 | skmem_arena_nexus(ar: na->na_arena)->arn_schema_cache, |
692 | ch->ch_schema); |
693 | ch->ch_schema = NULL; |
694 | ch->ch_schema_offset = (mach_vm_offset_t)-1; |
695 | } |
696 | |
697 | /* destroy the memory map */ |
698 | skmem_arena_munmap_channel(na->na_arena, ch); |
699 | |
700 | /* mark the channel as unbound */ |
701 | os_atomic_andnot(&ch->ch_flags, (CHANF_RXONLY | CHANF_EXCLUSIVE), relaxed); |
702 | ch->ch_na = NULL; |
703 | |
704 | /* and finally release the nexus adapter; this might free it */ |
705 | (void) na_release_locked(na); |
706 | } |
707 | |
708 | static void |
709 | na_teardown(struct nexus_adapter *na, struct kern_channel *ch, |
710 | boolean_t defunct) |
711 | { |
712 | SK_LOCK_ASSERT_HELD(); |
713 | LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED); |
714 | |
715 | #if CONFIG_NEXUS_MONITOR |
716 | /* |
717 | * Walk through all the rings and tell any monitor |
718 | * that the port is going to exit Skywalk mode |
719 | */ |
720 | nx_mon_stop(na); |
721 | #endif /* CONFIG_NEXUS_MONITOR */ |
722 | |
723 | /* |
724 | * Deactive the adapter. |
725 | */ |
726 | (void) na->na_activate(na, |
727 | (defunct ? NA_ACTIVATE_MODE_DEFUNCT : NA_ACTIVATE_MODE_OFF)); |
728 | |
729 | /* |
730 | * Free any remaining allocated packets for this process. |
731 | */ |
732 | if (ch->ch_pp != NULL) { |
733 | ASSERT(ch->ch_flags & CHANF_USER_PACKET_POOL); |
734 | pp_purge_upp(ch->ch_pp, ch->ch_pid); |
735 | if (!defunct) { |
736 | pp_release(ch->ch_pp); |
737 | ch->ch_pp = NULL; |
738 | } |
739 | } |
740 | |
741 | /* |
742 | * Delete rings and buffers. |
743 | */ |
744 | na->na_krings_delete(na, ch, defunct); |
745 | } |
746 | |
747 | /* call with SK_LOCK held */ |
748 | /* |
749 | * Allocate the per-fd structure __user_channel_schema. |
750 | */ |
751 | static int |
752 | na_schema_alloc(struct kern_channel *ch) |
753 | { |
754 | struct nexus_adapter *na = ch->ch_na; |
755 | struct skmem_arena *ar = na->na_arena; |
756 | struct skmem_arena_nexus *arn; |
757 | mach_vm_offset_t roff[SKMEM_REGIONS]; |
758 | struct __kern_channel_ring *kr; |
759 | struct __user_channel_schema *csm; |
760 | struct skmem_obj_info csm_oi, ring_oi, ksd_oi, usd_oi; |
761 | mach_vm_offset_t base; |
762 | uint32_t i, j, k, n[NR_ALL]; |
763 | enum txrx t; |
764 | |
765 | /* see comments for struct __user_channel_schema */ |
766 | _CASSERT(offsetof(struct __user_channel_schema, csm_ver) == 0); |
767 | _CASSERT(offsetof(struct __user_channel_schema, csm_flags) == |
768 | sizeof(csm->csm_ver)); |
769 | _CASSERT(offsetof(struct __user_channel_schema, csm_kern_name) == |
770 | sizeof(csm->csm_ver) + sizeof(csm->csm_flags)); |
771 | _CASSERT(offsetof(struct __user_channel_schema, csm_kern_uuid) == |
772 | sizeof(csm->csm_ver) + sizeof(csm->csm_flags) + |
773 | sizeof(csm->csm_kern_name)); |
774 | |
775 | SK_LOCK_ASSERT_HELD(); |
776 | |
777 | ASSERT(!(ch->ch_flags & CHANF_KERNEL)); |
778 | ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS); |
779 | arn = skmem_arena_nexus(ar); |
780 | ASSERT(arn != NULL); |
781 | for_all_rings(t) { |
782 | n[t] = 0; |
783 | } |
784 | |
785 | csm = skmem_cache_alloc(arn->arn_schema_cache, SKMEM_NOSLEEP); |
786 | if (csm == NULL) { |
787 | return ENOMEM; |
788 | } |
789 | |
790 | skmem_cache_get_obj_info(arn->arn_schema_cache, csm, &csm_oi, NULL); |
791 | bzero(s: csm, SKMEM_OBJ_SIZE(&csm_oi)); |
792 | |
793 | *(uint32_t *)(uintptr_t)&csm->csm_ver = CSM_CURRENT_VERSION; |
794 | |
795 | /* kernel version and executable UUID */ |
796 | _CASSERT(sizeof(csm->csm_kern_name) == _SYS_NAMELEN); |
797 | (void) strncpy((char *)(uintptr_t)csm->csm_kern_name, |
798 | version, sizeof(csm->csm_kern_name) - 1); |
799 | #if !XNU_TARGET_OS_OSX |
800 | (void) memcpy((void *)(uintptr_t)csm->csm_kern_uuid, |
801 | kernelcache_uuid, sizeof(csm->csm_kern_uuid)); |
802 | #else /* XNU_TARGET_OS_OSX */ |
803 | if (kernel_uuid != NULL) { |
804 | (void) memcpy(dst: (void *)(uintptr_t)csm->csm_kern_uuid, |
805 | src: kernel_uuid, n: sizeof(csm->csm_kern_uuid)); |
806 | } |
807 | #endif /* XNU_TARGET_OS_OSX */ |
808 | |
809 | for_rx_tx(t) { |
810 | ASSERT((ch->ch_last[t] > 0) || (ch->ch_first[t] == 0)); |
811 | n[t] = ch->ch_last[t] - ch->ch_first[t]; |
812 | ASSERT(n[t] == 0 || n[t] <= na_get_nrings(na, t)); |
813 | } |
814 | |
815 | /* return total number of tx and rx rings for this channel */ |
816 | *(uint32_t *)(uintptr_t)&csm->csm_tx_rings = n[NR_TX]; |
817 | *(uint32_t *)(uintptr_t)&csm->csm_rx_rings = n[NR_RX]; |
818 | |
819 | if (ch->ch_flags & CHANF_USER_PACKET_POOL) { |
820 | *(uint32_t *)(uintptr_t)&csm->csm_allocator_ring_pairs = |
821 | na->na_num_allocator_ring_pairs; |
822 | n[NR_A] = n[NR_F] = na->na_num_allocator_ring_pairs; |
823 | ASSERT(n[NR_A] != 0 && n[NR_A] <= na_get_nrings(na, NR_A)); |
824 | ASSERT(n[NR_A] == (ch->ch_last[NR_A] - ch->ch_first[NR_A])); |
825 | ASSERT(n[NR_F] == (ch->ch_last[NR_F] - ch->ch_first[NR_F])); |
826 | |
827 | n[NR_LBA] = na->na_num_large_buf_alloc_rings; |
828 | if (n[NR_LBA] != 0) { |
829 | *(uint32_t *)(uintptr_t)&csm->csm_large_buf_alloc_rings = n[NR_LBA]; |
830 | ASSERT(n[NR_LBA] == (ch->ch_last[NR_LBA] - ch->ch_first[NR_LBA])); |
831 | } |
832 | } |
833 | |
834 | if (ch->ch_flags & CHANF_EVENT_RING) { |
835 | n[NR_EV] = ch->ch_last[NR_EV] - ch->ch_first[NR_EV]; |
836 | ASSERT(n[NR_EV] != 0 && n[NR_EV] <= na_get_nrings(na, NR_EV)); |
837 | *(uint32_t *)(uintptr_t)&csm->csm_num_event_rings = n[NR_EV]; |
838 | } |
839 | |
840 | bzero(s: &roff, n: sizeof(roff)); |
841 | for (i = 0; i < SKMEM_REGIONS; i++) { |
842 | if (ar->ar_regions[i] == NULL) { |
843 | ASSERT(i == SKMEM_REGION_GUARD_HEAD || |
844 | i == SKMEM_REGION_SCHEMA || |
845 | i == SKMEM_REGION_BUF_LARGE || |
846 | i == SKMEM_REGION_RXBUF_DEF || |
847 | i == SKMEM_REGION_RXBUF_LARGE || |
848 | i == SKMEM_REGION_TXBUF_DEF || |
849 | i == SKMEM_REGION_TXBUF_LARGE || |
850 | i == SKMEM_REGION_RXKMD || |
851 | i == SKMEM_REGION_TXKMD || |
852 | i == SKMEM_REGION_UMD || |
853 | i == SKMEM_REGION_UBFT || |
854 | i == SKMEM_REGION_KBFT || |
855 | i == SKMEM_REGION_RXKBFT || |
856 | i == SKMEM_REGION_TXKBFT || |
857 | i == SKMEM_REGION_TXAUSD || |
858 | i == SKMEM_REGION_RXFUSD || |
859 | i == SKMEM_REGION_USTATS || |
860 | i == SKMEM_REGION_KSTATS || |
861 | i == SKMEM_REGION_INTRINSIC || |
862 | i == SKMEM_REGION_FLOWADV || |
863 | i == SKMEM_REGION_NEXUSADV || |
864 | i == SKMEM_REGION_SYSCTLS || |
865 | i == SKMEM_REGION_GUARD_TAIL); |
866 | continue; |
867 | } |
868 | |
869 | /* not for nexus */ |
870 | ASSERT(i != SKMEM_REGION_SYSCTLS); |
871 | |
872 | /* |
873 | * Get region offsets from base of mmap span; the arena |
874 | * doesn't need to be mmap'd at this point, since we |
875 | * simply compute the relative offset. |
876 | */ |
877 | roff[i] = skmem_arena_get_region_offset(ar, i); |
878 | } |
879 | |
880 | /* |
881 | * The schema is made up of the descriptor followed inline by an array |
882 | * of offsets to the tx, rx, allocator and event rings in the mmap span. |
883 | * They contain the offset between the ring and schema, so the |
884 | * information is usable in userspace to reach the ring from |
885 | * the schema. |
886 | */ |
887 | base = roff[SKMEM_REGION_SCHEMA] + SKMEM_OBJ_ROFF(&csm_oi); |
888 | |
889 | /* initialize schema with tx ring info */ |
890 | for (i = 0, j = ch->ch_first[NR_TX]; i < n[NR_TX]; i++, j++) { |
891 | kr = &na->na_tx_rings[j]; |
892 | if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */ |
893 | continue; |
894 | } |
895 | |
896 | ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED); |
897 | skmem_cache_get_obj_info(arn->arn_ring_cache, |
898 | kr->ckr_ring, &ring_oi, NULL); |
899 | *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].ring_off = |
900 | (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base; |
901 | |
902 | ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED); |
903 | skmem_cache_get_obj_info(kr->ckr_ksds_cache, |
904 | kr->ckr_ksds, &ksd_oi, &usd_oi); |
905 | |
906 | *(mach_vm_offset_t *)(uintptr_t)&csm->csm_ring_ofs[i].sd_off = |
907 | (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) - |
908 | base; |
909 | } |
910 | /* initialize schema with rx ring info */ |
911 | for (i = 0, j = ch->ch_first[NR_RX]; i < n[NR_RX]; i++, j++) { |
912 | kr = &na->na_rx_rings[j]; |
913 | if (KR_KERNEL_ONLY(kr)) { /* skip kernel-only rings */ |
914 | continue; |
915 | } |
916 | |
917 | ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED); |
918 | skmem_cache_get_obj_info(arn->arn_ring_cache, |
919 | kr->ckr_ring, &ring_oi, NULL); |
920 | *(mach_vm_offset_t *) |
921 | (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].ring_off = |
922 | (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base; |
923 | |
924 | ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED); |
925 | skmem_cache_get_obj_info(kr->ckr_ksds_cache, |
926 | kr->ckr_ksds, &ksd_oi, &usd_oi); |
927 | |
928 | *(mach_vm_offset_t *) |
929 | (uintptr_t)&csm->csm_ring_ofs[i + n[NR_TX]].sd_off = |
930 | (roff[SKMEM_REGION_RXFUSD] + SKMEM_OBJ_ROFF(&usd_oi)) - |
931 | base; |
932 | } |
933 | /* initialize schema with allocator ring info */ |
934 | for (i = 0, j = ch->ch_first[NR_A], k = n[NR_TX] + n[NR_RX]; |
935 | i < n[NR_A]; i++, j++) { |
936 | mach_vm_offset_t usd_roff; |
937 | |
938 | usd_roff = roff[SKMEM_REGION_TXAUSD]; |
939 | kr = &na->na_alloc_rings[j]; |
940 | ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED); |
941 | ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED); |
942 | |
943 | skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring, |
944 | &ring_oi, NULL); |
945 | *(mach_vm_offset_t *) |
946 | (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off = |
947 | (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base; |
948 | |
949 | skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds, |
950 | &ksd_oi, &usd_oi); |
951 | *(mach_vm_offset_t *) |
952 | (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off = |
953 | (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base; |
954 | } |
955 | /* initialize schema with free ring info */ |
956 | for (i = 0, j = ch->ch_first[NR_F], k = n[NR_TX] + n[NR_RX] + n[NR_A]; |
957 | i < n[NR_F]; i++, j++) { |
958 | mach_vm_offset_t usd_roff; |
959 | |
960 | usd_roff = roff[SKMEM_REGION_RXFUSD]; |
961 | kr = &na->na_free_rings[j]; |
962 | ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED); |
963 | ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED); |
964 | |
965 | skmem_cache_get_obj_info(arn->arn_ring_cache, kr->ckr_ring, |
966 | &ring_oi, NULL); |
967 | *(mach_vm_offset_t *) |
968 | (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off = |
969 | (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base; |
970 | |
971 | skmem_cache_get_obj_info(kr->ckr_ksds_cache, kr->ckr_ksds, |
972 | &ksd_oi, &usd_oi); |
973 | *(mach_vm_offset_t *) |
974 | (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off = |
975 | (usd_roff + SKMEM_OBJ_ROFF(&usd_oi)) - base; |
976 | } |
977 | /* initialize schema with event ring info */ |
978 | for (i = 0, j = ch->ch_first[NR_EV], k = n[NR_TX] + n[NR_RX] + |
979 | n[NR_A] + n[NR_F]; i < n[NR_EV]; i++, j++) { |
980 | ASSERT(csm->csm_num_event_rings != 0); |
981 | kr = &na->na_event_rings[j]; |
982 | ASSERT(!KR_KERNEL_ONLY(kr)); |
983 | ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED); |
984 | skmem_cache_get_obj_info(arn->arn_ring_cache, |
985 | kr->ckr_ring, &ring_oi, NULL); |
986 | *(mach_vm_offset_t *) |
987 | (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off = |
988 | (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base; |
989 | |
990 | ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED); |
991 | skmem_cache_get_obj_info(kr->ckr_ksds_cache, |
992 | kr->ckr_ksds, &ksd_oi, &usd_oi); |
993 | |
994 | *(mach_vm_offset_t *) |
995 | (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off = |
996 | (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) - |
997 | base; |
998 | } |
999 | /* initialize schema with large buf alloc ring info */ |
1000 | for (i = 0, j = ch->ch_first[NR_LBA], k = n[NR_TX] + n[NR_RX] + |
1001 | n[NR_A] + n[NR_F] + n[NR_EV]; i < n[NR_LBA]; i++, j++) { |
1002 | ASSERT(csm->csm_large_buf_alloc_rings != 0); |
1003 | kr = &na->na_large_buf_alloc_rings[j]; |
1004 | ASSERT(!KR_KERNEL_ONLY(kr)); |
1005 | ASSERT(kr->ckr_flags & CKRF_MEM_RING_INITED); |
1006 | skmem_cache_get_obj_info(arn->arn_ring_cache, |
1007 | kr->ckr_ring, &ring_oi, NULL); |
1008 | *(mach_vm_offset_t *) |
1009 | (uintptr_t)&csm->csm_ring_ofs[i + k].ring_off = |
1010 | (roff[SKMEM_REGION_RING] + SKMEM_OBJ_ROFF(&ring_oi)) - base; |
1011 | |
1012 | ASSERT(kr->ckr_flags & CKRF_MEM_SD_INITED); |
1013 | skmem_cache_get_obj_info(kr->ckr_ksds_cache, |
1014 | kr->ckr_ksds, &ksd_oi, &usd_oi); |
1015 | |
1016 | *(mach_vm_offset_t *) |
1017 | (uintptr_t)&csm->csm_ring_ofs[i + k].sd_off = |
1018 | (roff[SKMEM_REGION_TXAUSD] + SKMEM_OBJ_ROFF(&usd_oi)) - |
1019 | base; |
1020 | } |
1021 | |
1022 | *(uint64_t *)(uintptr_t)&csm->csm_md_redzone_cookie = |
1023 | __ch_umd_redzone_cookie; |
1024 | *(nexus_meta_type_t *)(uintptr_t)&csm->csm_md_type = na->na_md_type; |
1025 | *(nexus_meta_subtype_t *)(uintptr_t)&csm->csm_md_subtype = |
1026 | na->na_md_subtype; |
1027 | |
1028 | if (arn->arn_stats_obj != NULL) { |
1029 | ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] != NULL); |
1030 | ASSERT(roff[SKMEM_REGION_USTATS] != 0); |
1031 | *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs = |
1032 | roff[SKMEM_REGION_USTATS]; |
1033 | *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type = |
1034 | na->na_stats_type; |
1035 | } else { |
1036 | ASSERT(ar->ar_regions[SKMEM_REGION_USTATS] == NULL); |
1037 | *(mach_vm_offset_t *)(uintptr_t)&csm->csm_stats_ofs = 0; |
1038 | *(nexus_stats_type_t *)(uintptr_t)&csm->csm_stats_type = |
1039 | NEXUS_STATS_TYPE_INVALID; |
1040 | } |
1041 | |
1042 | if (arn->arn_flowadv_obj != NULL) { |
1043 | ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] != NULL); |
1044 | ASSERT(roff[SKMEM_REGION_FLOWADV] != 0); |
1045 | *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs = |
1046 | roff[SKMEM_REGION_FLOWADV]; |
1047 | *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max = |
1048 | na->na_flowadv_max; |
1049 | } else { |
1050 | ASSERT(ar->ar_regions[SKMEM_REGION_FLOWADV] == NULL); |
1051 | *(mach_vm_offset_t *)(uintptr_t)&csm->csm_flowadv_ofs = 0; |
1052 | *(uint32_t *)(uintptr_t)&csm->csm_flowadv_max = 0; |
1053 | } |
1054 | |
1055 | if (arn->arn_nexusadv_obj != NULL) { |
1056 | struct __kern_nexus_adv_metadata *adv_md; |
1057 | |
1058 | adv_md = arn->arn_nexusadv_obj; |
1059 | ASSERT(adv_md->knam_version == NX_ADVISORY_MD_CURRENT_VERSION); |
1060 | ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] != NULL); |
1061 | ASSERT(roff[SKMEM_REGION_NEXUSADV] != 0); |
1062 | *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs = |
1063 | roff[SKMEM_REGION_NEXUSADV]; |
1064 | } else { |
1065 | ASSERT(ar->ar_regions[SKMEM_REGION_NEXUSADV] == NULL); |
1066 | *(mach_vm_offset_t *)(uintptr_t)&csm->csm_nexusadv_ofs = 0; |
1067 | } |
1068 | |
1069 | ch->ch_schema = csm; |
1070 | ch->ch_schema_offset = base; |
1071 | |
1072 | return 0; |
1073 | } |
1074 | |
1075 | /* |
1076 | * Called by all routines that create nexus_adapters. |
1077 | * Attach na to the ifp (if any) and provide defaults |
1078 | * for optional callbacks. Defaults assume that we |
1079 | * are creating an hardware nexus_adapter. |
1080 | */ |
1081 | void |
1082 | na_attach_common(struct nexus_adapter *na, struct kern_nexus *nx, |
1083 | struct kern_nexus_domain_provider *nxdom_prov) |
1084 | { |
1085 | SK_LOCK_ASSERT_HELD(); |
1086 | |
1087 | ASSERT(nx != NULL); |
1088 | ASSERT(nxdom_prov != NULL); |
1089 | ASSERT(na->na_krings_create != NULL); |
1090 | ASSERT(na->na_krings_delete != NULL); |
1091 | if (na->na_type != NA_NETIF_COMPAT_DEV) { |
1092 | ASSERT(na_get_nrings(na, NR_TX) != 0); |
1093 | } |
1094 | if (na->na_type != NA_NETIF_COMPAT_HOST) { |
1095 | ASSERT(na_get_nrings(na, NR_RX) != 0); |
1096 | } |
1097 | ASSERT(na->na_channels == 0); |
1098 | |
1099 | if (na->na_notify == NULL) { |
1100 | na->na_notify = na_notify; |
1101 | } |
1102 | |
1103 | na->na_nx = nx; |
1104 | na->na_nxdom_prov = nxdom_prov; |
1105 | |
1106 | SK_D("na 0x%llx nx 0x%llx nxtype %u ar 0x%llx" , |
1107 | SK_KVA(na), SK_KVA(nx), nxdom_prov->nxdom_prov_dom->nxdom_type, |
1108 | SK_KVA(na->na_arena)); |
1109 | } |
1110 | |
1111 | void |
1112 | na_post_event(struct __kern_channel_ring *kring, boolean_t nodelay, |
1113 | boolean_t within_kevent, boolean_t selwake, uint32_t hint) |
1114 | { |
1115 | struct nexus_adapter *na = KRNA(kring); |
1116 | enum txrx t = kring->ckr_tx; |
1117 | |
1118 | SK_DF(SK_VERB_EVENTS, |
1119 | "%s(%d) na \"%s\" (0x%llx) kr 0x%llx kev %u sel %u hint 0x%b" , |
1120 | sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()), |
1121 | na->na_name, SK_KVA(na), SK_KVA(kring), within_kevent, selwake, |
1122 | hint, CHAN_FILT_HINT_BITS); |
1123 | |
1124 | csi_selwakeup_one(kring, nodelay, within_kevent, selwake, hint); |
1125 | /* |
1126 | * optimization: avoid a wake up on the global |
1127 | * queue if nobody has registered for more |
1128 | * than one ring |
1129 | */ |
1130 | if (na->na_si_users[t] > 0) { |
1131 | csi_selwakeup_all(na, t, nodelay, within_kevent, selwake, hint); |
1132 | } |
1133 | } |
1134 | |
1135 | /* default notify callback */ |
1136 | static int |
1137 | na_notify(struct __kern_channel_ring *kring, struct proc *p, uint32_t flags) |
1138 | { |
1139 | #pragma unused(p) |
1140 | SK_DF(SK_VERB_NOTIFY | ((kring->ckr_tx == NR_TX) ? |
1141 | SK_VERB_TX : SK_VERB_RX), |
1142 | "%s(%d) [%s] na \"%s\" (0x%llx) kr \"%s\" (0x%llx) krflags 0x%b " |
1143 | "flags 0x%x, kh %u kt %u | h %u t %u" , |
1144 | sk_proc_name_address(p), sk_proc_pid(p), |
1145 | (kring->ckr_tx == NR_TX) ? "W" : "R" , KRNA(kring)->na_name, |
1146 | SK_KVA(KRNA(kring)), kring->ckr_name, SK_KVA(kring), |
1147 | kring->ckr_flags, CKRF_BITS, flags, kring->ckr_khead, |
1148 | kring->ckr_ktail, kring->ckr_rhead, kring->ckr_rtail); |
1149 | |
1150 | na_post_event(kring, nodelay: (flags & NA_NOTEF_PUSH), |
1151 | within_kevent: (flags & NA_NOTEF_IN_KEVENT), TRUE, hint: 0); |
1152 | |
1153 | return 0; |
1154 | } |
1155 | |
1156 | /* |
1157 | * Fetch configuration from the device, to cope with dynamic |
1158 | * reconfigurations after loading the module. |
1159 | */ |
1160 | /* call with SK_LOCK held */ |
1161 | int |
1162 | na_update_config(struct nexus_adapter *na) |
1163 | { |
1164 | uint32_t txr, txd, rxr, rxd; |
1165 | |
1166 | SK_LOCK_ASSERT_HELD(); |
1167 | |
1168 | txr = txd = rxr = rxd = 0; |
1169 | if (na->na_config == NULL || |
1170 | na->na_config(na, &txr, &txd, &rxr, &rxd)) { |
1171 | /* take whatever we had at init time */ |
1172 | txr = na_get_nrings(na, t: NR_TX); |
1173 | txd = na_get_nslots(na, t: NR_TX); |
1174 | rxr = na_get_nrings(na, t: NR_RX); |
1175 | rxd = na_get_nslots(na, t: NR_RX); |
1176 | } |
1177 | |
1178 | if (na_get_nrings(na, t: NR_TX) == txr && |
1179 | na_get_nslots(na, t: NR_TX) == txd && |
1180 | na_get_nrings(na, t: NR_RX) == rxr && |
1181 | na_get_nslots(na, t: NR_RX) == rxd) { |
1182 | return 0; /* nothing changed */ |
1183 | } |
1184 | SK_D("stored config %s: txring %u x %u, rxring %u x %u" , |
1185 | na->na_name, na_get_nrings(na, NR_TX), na_get_nslots(na, NR_TX), |
1186 | na_get_nrings(na, NR_RX), na_get_nslots(na, NR_RX)); |
1187 | SK_D("new config %s: txring %u x %u, rxring %u x %u" , |
1188 | na->na_name, txr, txd, rxr, rxd); |
1189 | |
1190 | if (na->na_channels == 0) { |
1191 | SK_D("configuration changed (but fine)" ); |
1192 | na_set_nrings(na, t: NR_TX, v: txr); |
1193 | na_set_nslots(na, t: NR_TX, v: txd); |
1194 | na_set_nrings(na, t: NR_RX, v: rxr); |
1195 | na_set_nslots(na, t: NR_RX, v: rxd); |
1196 | return 0; |
1197 | } |
1198 | SK_ERR("configuration changed while active, this is bad..." ); |
1199 | return 1; |
1200 | } |
1201 | |
1202 | static void |
1203 | na_kr_setup_netif_svc_map(struct nexus_adapter *na) |
1204 | { |
1205 | uint32_t i; |
1206 | uint32_t num_tx_rings; |
1207 | |
1208 | ASSERT(na->na_type == NA_NETIF_DEV); |
1209 | num_tx_rings = na_get_nrings(na, t: NR_TX); |
1210 | |
1211 | _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK_SYS) == |
1212 | NAKR_WMM_SC2RINGID(KPKT_SC_BK)); |
1213 | _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) == |
1214 | NAKR_WMM_SC2RINGID(KPKT_SC_RD)); |
1215 | _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) == |
1216 | NAKR_WMM_SC2RINGID(KPKT_SC_OAM)); |
1217 | _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) == |
1218 | NAKR_WMM_SC2RINGID(KPKT_SC_RV)); |
1219 | _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_AV) == |
1220 | NAKR_WMM_SC2RINGID(KPKT_SC_VI)); |
1221 | _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) == |
1222 | NAKR_WMM_SC2RINGID(KPKT_SC_CTL)); |
1223 | |
1224 | _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BK) < NA_NUM_WMM_CLASSES); |
1225 | _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_BE) < NA_NUM_WMM_CLASSES); |
1226 | _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VI) < NA_NUM_WMM_CLASSES); |
1227 | _CASSERT(NAKR_WMM_SC2RINGID(KPKT_SC_VO) < NA_NUM_WMM_CLASSES); |
1228 | |
1229 | _CASSERT(MBUF_SCIDX(KPKT_SC_BK_SYS) < KPKT_SC_MAX_CLASSES); |
1230 | _CASSERT(MBUF_SCIDX(KPKT_SC_BK) < KPKT_SC_MAX_CLASSES); |
1231 | _CASSERT(MBUF_SCIDX(KPKT_SC_BE) < KPKT_SC_MAX_CLASSES); |
1232 | _CASSERT(MBUF_SCIDX(KPKT_SC_RD) < KPKT_SC_MAX_CLASSES); |
1233 | _CASSERT(MBUF_SCIDX(KPKT_SC_OAM) < KPKT_SC_MAX_CLASSES); |
1234 | _CASSERT(MBUF_SCIDX(KPKT_SC_AV) < KPKT_SC_MAX_CLASSES); |
1235 | _CASSERT(MBUF_SCIDX(KPKT_SC_RV) < KPKT_SC_MAX_CLASSES); |
1236 | _CASSERT(MBUF_SCIDX(KPKT_SC_VI) < KPKT_SC_MAX_CLASSES); |
1237 | _CASSERT(MBUF_SCIDX(KPKT_SC_SIG) < KPKT_SC_MAX_CLASSES); |
1238 | _CASSERT(MBUF_SCIDX(KPKT_SC_VO) < KPKT_SC_MAX_CLASSES); |
1239 | _CASSERT(MBUF_SCIDX(KPKT_SC_CTL) < KPKT_SC_MAX_CLASSES); |
1240 | |
1241 | /* |
1242 | * we support the following 2 configurations: |
1243 | * 1. packets from all 10 service class map to one ring. |
1244 | * 2. a 10:4 mapping between service classes and the rings. These 4 |
1245 | * rings map to the 4 WMM access categories. |
1246 | */ |
1247 | if (na->na_nx->nx_prov->nxprov_params->nxp_qmap == NEXUS_QMAP_TYPE_WMM) { |
1248 | ASSERT(num_tx_rings == NEXUS_NUM_WMM_QUEUES); |
1249 | /* setup the adapter's service class LUT */ |
1250 | NAKR_SET_SVC_LUT(na, KPKT_SC_BK_SYS); |
1251 | NAKR_SET_SVC_LUT(na, KPKT_SC_BK); |
1252 | NAKR_SET_SVC_LUT(na, KPKT_SC_BE); |
1253 | NAKR_SET_SVC_LUT(na, KPKT_SC_RD); |
1254 | NAKR_SET_SVC_LUT(na, KPKT_SC_OAM); |
1255 | NAKR_SET_SVC_LUT(na, KPKT_SC_AV); |
1256 | NAKR_SET_SVC_LUT(na, KPKT_SC_RV); |
1257 | NAKR_SET_SVC_LUT(na, KPKT_SC_VI); |
1258 | NAKR_SET_SVC_LUT(na, KPKT_SC_SIG); |
1259 | NAKR_SET_SVC_LUT(na, KPKT_SC_VO); |
1260 | NAKR_SET_SVC_LUT(na, KPKT_SC_CTL); |
1261 | |
1262 | /* Initialize the service class for each of the 4 ring */ |
1263 | NAKR_SET_KR_SVC(na, KPKT_SC_BK); |
1264 | NAKR_SET_KR_SVC(na, KPKT_SC_BE); |
1265 | NAKR_SET_KR_SVC(na, KPKT_SC_VI); |
1266 | NAKR_SET_KR_SVC(na, KPKT_SC_VO); |
1267 | } else { |
1268 | ASSERT(na->na_nx->nx_prov->nxprov_params->nxp_qmap == |
1269 | NEXUS_QMAP_TYPE_DEFAULT); |
1270 | /* 10: 1 mapping */ |
1271 | for (i = 0; i < KPKT_SC_MAX_CLASSES; i++) { |
1272 | na->na_kring_svc_lut[i] = 0; |
1273 | } |
1274 | for (i = 0; i < num_tx_rings; i++) { |
1275 | NAKR(na, t: NR_TX)[i].ckr_svc = KPKT_SC_UNSPEC; |
1276 | } |
1277 | } |
1278 | } |
1279 | |
1280 | static LCK_GRP_DECLARE(channel_txq_lock_group, "sk_ch_txq_lock" ); |
1281 | static LCK_GRP_DECLARE(channel_rxq_lock_group, "sk_ch_rxq_lock" ); |
1282 | static LCK_GRP_DECLARE(channel_txs_lock_group, "sk_ch_txs_lock" ); |
1283 | static LCK_GRP_DECLARE(channel_rxs_lock_group, "sk_ch_rxs_lock" ); |
1284 | static LCK_GRP_DECLARE(channel_alloc_lock_group, "sk_ch_alloc_lock" ); |
1285 | static LCK_GRP_DECLARE(channel_evq_lock_group, "sk_ch_evq_lock" ); |
1286 | static LCK_GRP_DECLARE(channel_evs_lock_group, "sk_ch_evs_lock" ); |
1287 | |
1288 | static lck_grp_t * |
1289 | na_kr_q_lck_grp(enum txrx t) |
1290 | { |
1291 | switch (t) { |
1292 | case NR_TX: |
1293 | return &channel_txq_lock_group; |
1294 | case NR_RX: |
1295 | return &channel_rxq_lock_group; |
1296 | case NR_A: |
1297 | case NR_F: |
1298 | case NR_LBA: |
1299 | return &channel_alloc_lock_group; |
1300 | case NR_EV: |
1301 | return &channel_evq_lock_group; |
1302 | default: |
1303 | VERIFY(0); |
1304 | /* NOTREACHED */ |
1305 | __builtin_unreachable(); |
1306 | } |
1307 | } |
1308 | |
1309 | static lck_grp_t * |
1310 | na_kr_s_lck_grp(enum txrx t) |
1311 | { |
1312 | switch (t) { |
1313 | case NR_TX: |
1314 | return &channel_txs_lock_group; |
1315 | case NR_RX: |
1316 | return &channel_rxs_lock_group; |
1317 | case NR_A: |
1318 | case NR_F: |
1319 | case NR_LBA: |
1320 | return &channel_alloc_lock_group; |
1321 | case NR_EV: |
1322 | return &channel_evs_lock_group; |
1323 | default: |
1324 | VERIFY(0); |
1325 | /* NOTREACHED */ |
1326 | __builtin_unreachable(); |
1327 | } |
1328 | } |
1329 | |
1330 | static void |
1331 | kr_init_tbr(struct __kern_channel_ring *r) |
1332 | { |
1333 | r->ckr_tbr_depth = CKR_TBR_TOKEN_INVALID; |
1334 | r->ckr_tbr_token = CKR_TBR_TOKEN_INVALID; |
1335 | r->ckr_tbr_last = 0; |
1336 | } |
1337 | |
1338 | struct kern_pbufpool * |
1339 | na_kr_get_pp(struct nexus_adapter *na, enum txrx t) |
1340 | { |
1341 | struct kern_pbufpool *pp = NULL; |
1342 | switch (t) { |
1343 | case NR_RX: |
1344 | case NR_F: |
1345 | case NR_EV: |
1346 | pp = skmem_arena_nexus(ar: na->na_arena)->arn_rx_pp; |
1347 | break; |
1348 | case NR_TX: |
1349 | case NR_A: |
1350 | case NR_LBA: |
1351 | pp = skmem_arena_nexus(ar: na->na_arena)->arn_tx_pp; |
1352 | break; |
1353 | default: |
1354 | VERIFY(0); |
1355 | /* NOTREACHED */ |
1356 | __builtin_unreachable(); |
1357 | } |
1358 | |
1359 | return pp; |
1360 | } |
1361 | |
1362 | /* |
1363 | * Create the krings array and initialize the fields common to all adapters. |
1364 | * The array layout is this: |
1365 | * |
1366 | * +----------+ |
1367 | * na->na_tx_rings -----> | | \ |
1368 | * | | } na->na_num_tx_rings |
1369 | * | | / |
1370 | * na->na_rx_rings ----> +----------+ |
1371 | * | | \ |
1372 | * | | } na->na_num_rx_rings |
1373 | * | | / |
1374 | * na->na_alloc_rings -> +----------+ |
1375 | * | | \ |
1376 | * na->na_free_rings --> +----------+ } na->na_num_allocator_ring_pairs |
1377 | * | | / |
1378 | * na->na_event_rings -> +----------+ |
1379 | * | | \ |
1380 | * | | } na->na_num_event_rings |
1381 | * | | / |
1382 | * na->na_large_buf_alloc_rings -> +----------+ |
1383 | * | | \ |
1384 | * | | } na->na_num_large_buf_alloc_rings |
1385 | * | | / |
1386 | * na->na_tail -----> +----------+ |
1387 | */ |
1388 | /* call with SK_LOCK held */ |
1389 | static int |
1390 | na_kr_create(struct nexus_adapter *na, boolean_t alloc_ctx) |
1391 | { |
1392 | lck_grp_t *q_lck_grp, *s_lck_grp; |
1393 | uint32_t i, count, ndesc; |
1394 | struct kern_pbufpool *pp = NULL; |
1395 | struct __kern_channel_ring *kring; |
1396 | uint32_t n[NR_ALL]; |
1397 | int c, tot_slots, err = 0; |
1398 | enum txrx t; |
1399 | |
1400 | SK_LOCK_ASSERT_HELD(); |
1401 | |
1402 | n[NR_TX] = na_get_nrings(na, t: NR_TX); |
1403 | n[NR_RX] = na_get_nrings(na, t: NR_RX); |
1404 | n[NR_A] = na_get_nrings(na, t: NR_A); |
1405 | n[NR_F] = na_get_nrings(na, t: NR_F); |
1406 | n[NR_EV] = na_get_nrings(na, t: NR_EV); |
1407 | n[NR_LBA] = na_get_nrings(na, t: NR_LBA); |
1408 | |
1409 | count = n[NR_TX] + n[NR_RX] + n[NR_A] + n[NR_F] + n[NR_EV] + n[NR_LBA]; |
1410 | |
1411 | na->na_tx_rings = sk_alloc_type_array(struct __kern_channel_ring, count, |
1412 | Z_WAITOK, skmem_tag_nx_rings); |
1413 | if (__improbable(na->na_tx_rings == NULL)) { |
1414 | SK_ERR("Cannot allocate krings" ); |
1415 | err = ENOMEM; |
1416 | goto error; |
1417 | } |
1418 | |
1419 | na->na_rx_rings = na->na_tx_rings + n[NR_TX]; |
1420 | if (n[NR_A] != 0) { |
1421 | na->na_alloc_rings = na->na_rx_rings + n[NR_RX]; |
1422 | na->na_free_rings = na->na_alloc_rings + n[NR_A]; |
1423 | } else { |
1424 | na->na_alloc_rings = na->na_free_rings = NULL; |
1425 | } |
1426 | if (n[NR_EV] != 0) { |
1427 | if (na->na_free_rings != NULL) { |
1428 | na->na_event_rings = na->na_free_rings + n[NR_F]; |
1429 | } else { |
1430 | na->na_event_rings = na->na_rx_rings + n[NR_RX]; |
1431 | } |
1432 | } |
1433 | if (n[NR_LBA] != 0) { |
1434 | ASSERT(n[NR_A] != 0); |
1435 | if (na->na_event_rings != NULL) { |
1436 | na->na_large_buf_alloc_rings = na->na_event_rings + n[NR_EV]; |
1437 | } else { |
1438 | /* alloc/free rings must also be present */ |
1439 | ASSERT(na->na_free_rings != NULL); |
1440 | na->na_large_buf_alloc_rings = na->na_free_rings + n[NR_F]; |
1441 | } |
1442 | } |
1443 | |
1444 | /* total number of slots for TX/RX adapter rings */ |
1445 | c = tot_slots = (n[NR_TX] * na_get_nslots(na, t: NR_TX)) + |
1446 | (n[NR_RX] * na_get_nslots(na, t: NR_RX)); |
1447 | |
1448 | /* for scratch space on alloc and free rings */ |
1449 | if (n[NR_A] != 0) { |
1450 | tot_slots += n[NR_A] * na_get_nslots(na, t: NR_A); |
1451 | tot_slots += n[NR_F] * na_get_nslots(na, t: NR_F); |
1452 | tot_slots += n[NR_LBA] * na_get_nslots(na, t: NR_LBA); |
1453 | c = tot_slots; |
1454 | } |
1455 | na->na_total_slots = tot_slots; |
1456 | |
1457 | /* slot context (optional) for all TX/RX ring slots of this adapter */ |
1458 | if (alloc_ctx) { |
1459 | na->na_slot_ctxs = |
1460 | skn_alloc_type_array(slot_ctxs, struct slot_ctx, |
1461 | na->na_total_slots, Z_WAITOK, skmem_tag_nx_contexts); |
1462 | if (na->na_slot_ctxs == NULL) { |
1463 | SK_ERR("Cannot allocate slot contexts" ); |
1464 | err = ENOMEM; |
1465 | goto error; |
1466 | } |
1467 | os_atomic_or(&na->na_flags, NAF_SLOT_CONTEXT, relaxed); |
1468 | } |
1469 | |
1470 | /* |
1471 | * packet handle array storage for all TX/RX ring slots of this |
1472 | * adapter. |
1473 | */ |
1474 | na->na_scratch = skn_alloc_type_array(scratch, kern_packet_t, |
1475 | na->na_total_slots, Z_WAITOK, skmem_tag_nx_scratch); |
1476 | if (na->na_scratch == NULL) { |
1477 | SK_ERR("Cannot allocate slot contexts" ); |
1478 | err = ENOMEM; |
1479 | goto error; |
1480 | } |
1481 | |
1482 | /* |
1483 | * All fields in krings are 0 except the one initialized below. |
1484 | * but better be explicit on important kring fields. |
1485 | */ |
1486 | for_all_rings(t) { |
1487 | ndesc = na_get_nslots(na, t); |
1488 | pp = na_kr_get_pp(na, t); |
1489 | for (i = 0; i < n[t]; i++) { |
1490 | kring = &NAKR(na, t)[i]; |
1491 | bzero(s: kring, n: sizeof(*kring)); |
1492 | kring->ckr_na = na; |
1493 | kring->ckr_pp = pp; |
1494 | kring->ckr_max_pkt_len = |
1495 | (t == NR_LBA ? PP_BUF_SIZE_LARGE(pp) : |
1496 | PP_BUF_SIZE_DEF(pp)) * |
1497 | pp->pp_max_frags; |
1498 | kring->ckr_ring_id = i; |
1499 | kring->ckr_tx = t; |
1500 | kr_init_to_mhints(kring, ndesc); |
1501 | kr_init_tbr(r: kring); |
1502 | if (NA_KERNEL_ONLY(na)) { |
1503 | kring->ckr_flags |= CKRF_KERNEL_ONLY; |
1504 | } |
1505 | if (na->na_flags & NAF_HOST_ONLY) { |
1506 | kring->ckr_flags |= CKRF_HOST; |
1507 | } |
1508 | ASSERT((t >= NR_TXRX) || (c > 0)); |
1509 | if ((t < NR_TXRX) && |
1510 | (na->na_flags & NAF_SLOT_CONTEXT)) { |
1511 | ASSERT(na->na_slot_ctxs != NULL); |
1512 | kring->ckr_flags |= CKRF_SLOT_CONTEXT; |
1513 | kring->ckr_slot_ctxs = |
1514 | na->na_slot_ctxs + (tot_slots - c); |
1515 | } |
1516 | ASSERT(na->na_scratch != NULL); |
1517 | if (t < NR_TXRXAF || t == NR_LBA) { |
1518 | kring->ckr_scratch = |
1519 | na->na_scratch + (tot_slots - c); |
1520 | } |
1521 | if (t < NR_TXRXAF || t == NR_LBA) { |
1522 | c -= ndesc; |
1523 | } |
1524 | switch (t) { |
1525 | case NR_A: |
1526 | if (i == 0) { |
1527 | kring->ckr_na_sync = |
1528 | na_packet_pool_alloc_sync; |
1529 | kring->ckr_alloc_ws = |
1530 | na_upp_alloc_lowat; |
1531 | } else { |
1532 | ASSERT(i == 1); |
1533 | kring->ckr_na_sync = |
1534 | na_packet_pool_alloc_buf_sync; |
1535 | kring->ckr_alloc_ws = |
1536 | na_upp_alloc_buf_lowat; |
1537 | } |
1538 | break; |
1539 | case NR_F: |
1540 | if (i == 0) { |
1541 | kring->ckr_na_sync = |
1542 | na_packet_pool_free_sync; |
1543 | } else { |
1544 | ASSERT(i == 1); |
1545 | kring->ckr_na_sync = |
1546 | na_packet_pool_free_buf_sync; |
1547 | } |
1548 | break; |
1549 | case NR_TX: |
1550 | kring->ckr_na_sync = na->na_txsync; |
1551 | if (na->na_flags & NAF_TX_MITIGATION) { |
1552 | kring->ckr_flags |= CKRF_MITIGATION; |
1553 | } |
1554 | switch (na->na_type) { |
1555 | #if CONFIG_NEXUS_USER_PIPE |
1556 | case NA_USER_PIPE: |
1557 | ASSERT(!(na->na_flags & |
1558 | NAF_USER_PKT_POOL)); |
1559 | kring->ckr_prologue = kr_txprologue; |
1560 | kring->ckr_finalize = NULL; |
1561 | break; |
1562 | #endif /* CONFIG_NEXUS_USER_PIPE */ |
1563 | #if CONFIG_NEXUS_MONITOR |
1564 | case NA_MONITOR: |
1565 | ASSERT(!(na->na_flags & |
1566 | NAF_USER_PKT_POOL)); |
1567 | kring->ckr_prologue = kr_txprologue; |
1568 | kring->ckr_finalize = NULL; |
1569 | break; |
1570 | #endif /* CONFIG_NEXUS_MONITOR */ |
1571 | default: |
1572 | if (na->na_flags & NAF_USER_PKT_POOL) { |
1573 | kring->ckr_prologue = |
1574 | kr_txprologue_upp; |
1575 | kring->ckr_finalize = |
1576 | kr_txfinalize_upp; |
1577 | } else { |
1578 | kring->ckr_prologue = |
1579 | kr_txprologue; |
1580 | kring->ckr_finalize = |
1581 | kr_txfinalize; |
1582 | } |
1583 | break; |
1584 | } |
1585 | break; |
1586 | case NR_RX: |
1587 | kring->ckr_na_sync = na->na_rxsync; |
1588 | if (na->na_flags & NAF_RX_MITIGATION) { |
1589 | kring->ckr_flags |= CKRF_MITIGATION; |
1590 | } |
1591 | switch (na->na_type) { |
1592 | #if CONFIG_NEXUS_USER_PIPE |
1593 | case NA_USER_PIPE: |
1594 | ASSERT(!(na->na_flags & |
1595 | NAF_USER_PKT_POOL)); |
1596 | kring->ckr_prologue = |
1597 | kr_rxprologue_nodetach; |
1598 | kring->ckr_finalize = kr_rxfinalize; |
1599 | break; |
1600 | #endif /* CONFIG_NEXUS_USER_PIPE */ |
1601 | #if CONFIG_NEXUS_MONITOR |
1602 | case NA_MONITOR: |
1603 | ASSERT(!(na->na_flags & |
1604 | NAF_USER_PKT_POOL)); |
1605 | kring->ckr_prologue = |
1606 | kr_rxprologue_nodetach; |
1607 | kring->ckr_finalize = kr_rxfinalize; |
1608 | break; |
1609 | #endif /* CONFIG_NEXUS_MONITOR */ |
1610 | default: |
1611 | if (na->na_flags & NAF_USER_PKT_POOL) { |
1612 | kring->ckr_prologue = |
1613 | kr_rxprologue_upp; |
1614 | kring->ckr_finalize = |
1615 | kr_rxfinalize_upp; |
1616 | } else { |
1617 | kring->ckr_prologue = |
1618 | kr_rxprologue; |
1619 | kring->ckr_finalize = |
1620 | kr_rxfinalize; |
1621 | } |
1622 | break; |
1623 | } |
1624 | break; |
1625 | case NR_EV: |
1626 | kring->ckr_na_sync = kern_channel_event_sync; |
1627 | break; |
1628 | case NR_LBA: |
1629 | kring->ckr_na_sync = na_packet_pool_alloc_large_sync; |
1630 | kring->ckr_alloc_ws = na_upp_alloc_lowat; |
1631 | break; |
1632 | default: |
1633 | VERIFY(0); |
1634 | /* NOTREACHED */ |
1635 | __builtin_unreachable(); |
1636 | } |
1637 | if (t != NR_EV) { |
1638 | kring->ckr_na_notify = na->na_notify; |
1639 | } else { |
1640 | kring->ckr_na_notify = NULL; |
1641 | } |
1642 | (void) snprintf(kring->ckr_name, |
1643 | count: sizeof(kring->ckr_name) - 1, |
1644 | "%s %s%u%s" , na->na_name, sk_ring2str(t), i, |
1645 | ((kring->ckr_flags & CKRF_HOST) ? "^" : "" )); |
1646 | SK_DF(SK_VERB_NA | SK_VERB_RING, |
1647 | "kr \"%s\" (0x%llx) krflags 0x%b rh %u rt %u" , |
1648 | kring->ckr_name, SK_KVA(kring), kring->ckr_flags, |
1649 | CKRF_BITS, kring->ckr_rhead, kring->ckr_rtail); |
1650 | kring->ckr_state = KR_READY; |
1651 | q_lck_grp = na_kr_q_lck_grp(t); |
1652 | s_lck_grp = na_kr_s_lck_grp(t); |
1653 | kring->ckr_qlock_group = q_lck_grp; |
1654 | lck_mtx_init(lck: &kring->ckr_qlock, grp: kring->ckr_qlock_group, |
1655 | attr: &channel_lock_attr); |
1656 | kring->ckr_slock_group = s_lck_grp; |
1657 | lck_spin_init(lck: &kring->ckr_slock, grp: kring->ckr_slock_group, |
1658 | attr: &channel_lock_attr); |
1659 | csi_init(&kring->ckr_si, |
1660 | (kring->ckr_flags & CKRF_MITIGATION), |
1661 | na->na_ch_mit_ival); |
1662 | } |
1663 | csi_init(&na->na_si[t], |
1664 | (na->na_flags & (NAF_TX_MITIGATION | NAF_RX_MITIGATION)), |
1665 | na->na_ch_mit_ival); |
1666 | } |
1667 | ASSERT(c == 0); |
1668 | na->na_tail = na->na_rx_rings + n[NR_RX] + n[NR_A] + n[NR_F] + |
1669 | n[NR_EV] + n[NR_LBA]; |
1670 | |
1671 | if (na->na_type == NA_NETIF_DEV) { |
1672 | na_kr_setup_netif_svc_map(na); |
1673 | } |
1674 | |
1675 | /* validate now for cases where we create only krings */ |
1676 | na_krings_verify(na); |
1677 | return 0; |
1678 | |
1679 | error: |
1680 | ASSERT(err != 0); |
1681 | if (na->na_tx_rings != NULL) { |
1682 | sk_free_type_array(struct __kern_channel_ring, |
1683 | na->na_tail - na->na_tx_rings, na->na_tx_rings); |
1684 | } |
1685 | if (na->na_slot_ctxs != NULL) { |
1686 | ASSERT(na->na_flags & NAF_SLOT_CONTEXT); |
1687 | skn_free_type_array(slot_ctxs, |
1688 | struct slot_ctx, na->na_total_slots, |
1689 | na->na_slot_ctxs); |
1690 | na->na_slot_ctxs = NULL; |
1691 | } |
1692 | if (na->na_scratch != NULL) { |
1693 | skn_free_type_array(scratch, |
1694 | kern_packet_t, na->na_total_slots, |
1695 | na->na_scratch); |
1696 | na->na_scratch = NULL; |
1697 | } |
1698 | return err; |
1699 | } |
1700 | |
1701 | /* undo the actions performed by na_kr_create() */ |
1702 | /* call with SK_LOCK held */ |
1703 | static void |
1704 | na_kr_delete(struct nexus_adapter *na) |
1705 | { |
1706 | struct __kern_channel_ring *kring = na->na_tx_rings; |
1707 | enum txrx t; |
1708 | |
1709 | ASSERT((kring != NULL) && (na->na_tail != NULL)); |
1710 | SK_LOCK_ASSERT_HELD(); |
1711 | |
1712 | for_all_rings(t) { |
1713 | csi_destroy(&na->na_si[t]); |
1714 | } |
1715 | /* we rely on the krings layout described above */ |
1716 | for (; kring != na->na_tail; kring++) { |
1717 | lck_mtx_destroy(lck: &kring->ckr_qlock, grp: kring->ckr_qlock_group); |
1718 | lck_spin_destroy(lck: &kring->ckr_slock, grp: kring->ckr_slock_group); |
1719 | csi_destroy(&kring->ckr_si); |
1720 | if (kring->ckr_flags & CKRF_SLOT_CONTEXT) { |
1721 | kring->ckr_flags &= ~CKRF_SLOT_CONTEXT; |
1722 | ASSERT(kring->ckr_slot_ctxs != NULL); |
1723 | kring->ckr_slot_ctxs = NULL; |
1724 | } |
1725 | } |
1726 | if (na->na_slot_ctxs != NULL) { |
1727 | ASSERT(na->na_flags & NAF_SLOT_CONTEXT); |
1728 | os_atomic_andnot(&na->na_flags, NAF_SLOT_CONTEXT, relaxed); |
1729 | skn_free_type_array(slot_ctxs, |
1730 | struct slot_ctx, na->na_total_slots, |
1731 | na->na_slot_ctxs); |
1732 | na->na_slot_ctxs = NULL; |
1733 | } |
1734 | if (na->na_scratch != NULL) { |
1735 | skn_free_type_array(scratch, |
1736 | kern_packet_t, na->na_total_slots, |
1737 | na->na_scratch); |
1738 | na->na_scratch = NULL; |
1739 | } |
1740 | ASSERT(!(na->na_flags & NAF_SLOT_CONTEXT)); |
1741 | sk_free_type_array(struct __kern_channel_ring, |
1742 | na->na_tail - na->na_tx_rings, na->na_tx_rings); |
1743 | na->na_tx_rings = na->na_rx_rings = na->na_alloc_rings = |
1744 | na->na_free_rings = na->na_event_rings = na->na_tail = NULL; |
1745 | } |
1746 | |
1747 | static void |
1748 | na_kr_slot_desc_init(struct __slot_desc *ksds, |
1749 | boolean_t kernel_only, struct __slot_desc *usds, size_t ndesc) |
1750 | { |
1751 | size_t i; |
1752 | |
1753 | bzero(s: ksds, n: ndesc * SLOT_DESC_SZ); |
1754 | if (usds != NULL) { |
1755 | ASSERT(!kernel_only); |
1756 | bzero(s: usds, n: ndesc * SLOT_DESC_SZ); |
1757 | } else { |
1758 | ASSERT(kernel_only); |
1759 | } |
1760 | |
1761 | for (i = 0; i < ndesc; i++) { |
1762 | KSD_INIT(SLOT_DESC_KSD(&ksds[i])); |
1763 | if (!kernel_only) { |
1764 | USD_INIT(SLOT_DESC_USD(&usds[i])); |
1765 | } |
1766 | } |
1767 | } |
1768 | |
1769 | /* call with SK_LOCK held */ |
1770 | static int |
1771 | na_kr_setup(struct nexus_adapter *na, struct kern_channel *ch) |
1772 | { |
1773 | struct skmem_arena *ar = na->na_arena; |
1774 | struct skmem_arena_nexus *arn; |
1775 | mach_vm_offset_t roff[SKMEM_REGIONS]; |
1776 | enum txrx t; |
1777 | uint32_t i; |
1778 | |
1779 | SK_LOCK_ASSERT_HELD(); |
1780 | ASSERT(!(na->na_flags & NAF_MEM_NO_INIT)); |
1781 | ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS); |
1782 | arn = skmem_arena_nexus(ar); |
1783 | ASSERT(arn != NULL); |
1784 | |
1785 | bzero(s: &roff, n: sizeof(roff)); |
1786 | for (i = 0; i < SKMEM_REGIONS; i++) { |
1787 | if (ar->ar_regions[i] == NULL) { |
1788 | continue; |
1789 | } |
1790 | |
1791 | /* not for nexus */ |
1792 | ASSERT(i != SKMEM_REGION_SYSCTLS); |
1793 | |
1794 | /* |
1795 | * Get region offsets from base of mmap span; the arena |
1796 | * doesn't need to be mmap'd at this point, since we |
1797 | * simply compute the relative offset. |
1798 | */ |
1799 | roff[i] = skmem_arena_get_region_offset(ar, i); |
1800 | } |
1801 | |
1802 | for_all_rings(t) { |
1803 | for (i = 0; i < na_get_nrings(na, t); i++) { |
1804 | struct __kern_channel_ring *kring = &NAKR(na, t)[i]; |
1805 | struct __user_channel_ring *ring = kring->ckr_ring; |
1806 | mach_vm_offset_t ring_off, usd_roff; |
1807 | struct skmem_obj_info oi, oim; |
1808 | uint32_t ndesc; |
1809 | |
1810 | if (ring != NULL) { |
1811 | SK_DF(SK_VERB_NA | SK_VERB_RING, |
1812 | "kr 0x%llx (\"%s\") is already " |
1813 | "initialized" , SK_KVA(kring), |
1814 | kring->ckr_name); |
1815 | continue; /* already created by somebody else */ |
1816 | } |
1817 | |
1818 | if (!KR_KERNEL_ONLY(kring) && |
1819 | (ring = skmem_cache_alloc(arn->arn_ring_cache, |
1820 | SKMEM_NOSLEEP)) == NULL) { |
1821 | SK_ERR("Cannot allocate %s_ring for kr " |
1822 | "0x%llx (\"%s\")" , sk_ring2str(t), |
1823 | SK_KVA(kring), kring->ckr_name); |
1824 | goto cleanup; |
1825 | } |
1826 | kring->ckr_flags |= CKRF_MEM_RING_INITED; |
1827 | kring->ckr_ring = ring; |
1828 | ndesc = kring->ckr_num_slots; |
1829 | |
1830 | if (ring == NULL) { |
1831 | goto skip_user_ring_setup; |
1832 | } |
1833 | |
1834 | *(uint32_t *)(uintptr_t)&ring->ring_num_slots = ndesc; |
1835 | |
1836 | /* offset of current ring in mmap span */ |
1837 | skmem_cache_get_obj_info(arn->arn_ring_cache, |
1838 | ring, &oi, NULL); |
1839 | ring_off = (roff[SKMEM_REGION_RING] + |
1840 | SKMEM_OBJ_ROFF(&oi)); |
1841 | |
1842 | /* |
1843 | * ring_{buf,md,sd}_ofs offsets are relative to the |
1844 | * current ring, and not to the base of mmap span. |
1845 | */ |
1846 | *(mach_vm_offset_t *)(uintptr_t) |
1847 | &ring->ring_def_buf_base = |
1848 | (roff[SKMEM_REGION_BUF_DEF] - ring_off); |
1849 | *(mach_vm_offset_t *)(uintptr_t) |
1850 | &ring->ring_large_buf_base = |
1851 | (roff[SKMEM_REGION_BUF_LARGE] - ring_off); |
1852 | *(mach_vm_offset_t *)(uintptr_t)&ring->ring_md_base = |
1853 | (roff[SKMEM_REGION_UMD] - ring_off); |
1854 | _CASSERT(sizeof(uint16_t) == |
1855 | sizeof(ring->ring_bft_size)); |
1856 | if (roff[SKMEM_REGION_UBFT] != 0) { |
1857 | ASSERT(ar->ar_regions[SKMEM_REGION_UBFT] != |
1858 | NULL); |
1859 | *(mach_vm_offset_t *)(uintptr_t) |
1860 | &ring->ring_bft_base = |
1861 | (roff[SKMEM_REGION_UBFT] - ring_off); |
1862 | *(uint16_t *)(uintptr_t)&ring->ring_bft_size = |
1863 | (uint16_t)ar->ar_regions[SKMEM_REGION_UBFT]-> |
1864 | skr_c_obj_size; |
1865 | ASSERT(ring->ring_bft_size == |
1866 | ar->ar_regions[SKMEM_REGION_KBFT]-> |
1867 | skr_c_obj_size); |
1868 | } else { |
1869 | *(mach_vm_offset_t *)(uintptr_t) |
1870 | &ring->ring_bft_base = 0; |
1871 | *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0; |
1872 | } |
1873 | |
1874 | if (t == NR_TX || t == NR_A || t == NR_EV || t == NR_LBA) { |
1875 | usd_roff = roff[SKMEM_REGION_TXAUSD]; |
1876 | } else { |
1877 | ASSERT(t == NR_RX || t == NR_F); |
1878 | usd_roff = roff[SKMEM_REGION_RXFUSD]; |
1879 | } |
1880 | *(mach_vm_offset_t *)(uintptr_t)&ring->ring_sd_base = |
1881 | (usd_roff - ring_off); |
1882 | |
1883 | /* copy values from kring */ |
1884 | ring->ring_head = kring->ckr_rhead; |
1885 | *(slot_idx_t *)(uintptr_t)&ring->ring_khead = |
1886 | kring->ckr_khead; |
1887 | *(slot_idx_t *)(uintptr_t)&ring->ring_tail = |
1888 | kring->ckr_rtail; |
1889 | |
1890 | _CASSERT(sizeof(uint32_t) == |
1891 | sizeof(ring->ring_def_buf_size)); |
1892 | _CASSERT(sizeof(uint32_t) == |
1893 | sizeof(ring->ring_large_buf_size)); |
1894 | _CASSERT(sizeof(uint16_t) == |
1895 | sizeof(ring->ring_md_size)); |
1896 | *(uint32_t *)(uintptr_t)&ring->ring_def_buf_size = |
1897 | ar->ar_regions[SKMEM_REGION_BUF_DEF]->skr_c_obj_size; |
1898 | if (ar->ar_regions[SKMEM_REGION_BUF_LARGE] != NULL) { |
1899 | *(uint32_t *)(uintptr_t)&ring->ring_large_buf_size = |
1900 | ar->ar_regions[SKMEM_REGION_BUF_LARGE]->skr_c_obj_size; |
1901 | } else { |
1902 | *(uint32_t *)(uintptr_t)&ring->ring_large_buf_size = 0; |
1903 | } |
1904 | if (ar->ar_regions[SKMEM_REGION_UMD] != NULL) { |
1905 | *(uint16_t *)(uintptr_t)&ring->ring_md_size = |
1906 | (uint16_t)ar->ar_regions[SKMEM_REGION_UMD]-> |
1907 | skr_c_obj_size; |
1908 | ASSERT(ring->ring_md_size == |
1909 | ar->ar_regions[SKMEM_REGION_KMD]-> |
1910 | skr_c_obj_size); |
1911 | } else { |
1912 | *(uint16_t *)(uintptr_t)&ring->ring_md_size = 0; |
1913 | ASSERT(PP_KERNEL_ONLY(arn->arn_rx_pp)); |
1914 | ASSERT(PP_KERNEL_ONLY(arn->arn_tx_pp)); |
1915 | } |
1916 | |
1917 | /* ring info */ |
1918 | _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_id)); |
1919 | _CASSERT(sizeof(uint16_t) == sizeof(ring->ring_kind)); |
1920 | *(uint16_t *)(uintptr_t)&ring->ring_id = |
1921 | (uint16_t)kring->ckr_ring_id; |
1922 | *(uint16_t *)(uintptr_t)&ring->ring_kind = |
1923 | (uint16_t)kring->ckr_tx; |
1924 | |
1925 | SK_DF(SK_VERB_NA | SK_VERB_RING, |
1926 | "%s_ring at 0x%llx kr 0x%llx (\"%s\")" , |
1927 | sk_ring2str(t), SK_KVA(ring), SK_KVA(kring), |
1928 | kring->ckr_name); |
1929 | SK_DF(SK_VERB_NA | SK_VERB_RING, |
1930 | " num_slots: %u" , ring->ring_num_slots); |
1931 | SK_DF(SK_VERB_NA | SK_VERB_RING, |
1932 | " def_buf_base: 0x%llx" , |
1933 | (uint64_t)ring->ring_def_buf_base); |
1934 | SK_DF(SK_VERB_NA | SK_VERB_RING, |
1935 | " large_buf_base: 0x%llx" , |
1936 | (uint64_t)ring->ring_large_buf_base); |
1937 | SK_DF(SK_VERB_NA | SK_VERB_RING, |
1938 | " md_base: 0x%llx" , |
1939 | (uint64_t)ring->ring_md_base); |
1940 | SK_DF(SK_VERB_NA | SK_VERB_RING, |
1941 | " sd_base: 0x%llx" , |
1942 | (uint64_t)ring->ring_sd_base); |
1943 | SK_DF(SK_VERB_NA | SK_VERB_RING, |
1944 | " h, t: %u, %u, %u" , ring->ring_head, |
1945 | ring->ring_tail); |
1946 | SK_DF(SK_VERB_NA | SK_VERB_RING, |
1947 | " md_size: %d" , |
1948 | (uint64_t)ring->ring_md_size); |
1949 | |
1950 | /* make sure they're in synch */ |
1951 | _CASSERT(NR_RX == CR_KIND_RX); |
1952 | _CASSERT(NR_TX == CR_KIND_TX); |
1953 | _CASSERT(NR_A == CR_KIND_ALLOC); |
1954 | _CASSERT(NR_F == CR_KIND_FREE); |
1955 | _CASSERT(NR_EV == CR_KIND_EVENT); |
1956 | _CASSERT(NR_LBA == CR_KIND_LARGE_BUF_ALLOC); |
1957 | |
1958 | skip_user_ring_setup: |
1959 | /* |
1960 | * This flag tells na_kr_teardown_all() that it should |
1961 | * go thru the checks to free up the slot maps. |
1962 | */ |
1963 | kring->ckr_flags |= CKRF_MEM_SD_INITED; |
1964 | if (t == NR_TX || t == NR_A || t == NR_EV || t == NR_LBA) { |
1965 | kring->ckr_ksds_cache = arn->arn_txaksd_cache; |
1966 | } else { |
1967 | ASSERT(t == NR_RX || t == NR_F); |
1968 | kring->ckr_ksds_cache = arn->arn_rxfksd_cache; |
1969 | } |
1970 | kring->ckr_ksds = |
1971 | skmem_cache_alloc(kring->ckr_ksds_cache, |
1972 | SKMEM_NOSLEEP); |
1973 | if (kring->ckr_ksds == NULL) { |
1974 | SK_ERR("Cannot allocate %s_ksds for kr " |
1975 | "0x%llx (\"%s\")" , sk_ring2str(t), |
1976 | SK_KVA(kring), kring->ckr_name); |
1977 | goto cleanup; |
1978 | } |
1979 | if (!KR_KERNEL_ONLY(kring)) { |
1980 | skmem_cache_get_obj_info(kring->ckr_ksds_cache, |
1981 | kring->ckr_ksds, &oi, &oim); |
1982 | kring->ckr_usds = SKMEM_OBJ_ADDR(&oim); |
1983 | } |
1984 | na_kr_slot_desc_init(ksds: kring->ckr_ksds, |
1985 | KR_KERNEL_ONLY(kring), usds: kring->ckr_usds, ndesc); |
1986 | |
1987 | /* cache last slot descriptor address */ |
1988 | ASSERT(kring->ckr_lim == (ndesc - 1)); |
1989 | kring->ckr_ksds_last = &kring->ckr_ksds[kring->ckr_lim]; |
1990 | |
1991 | if ((t < NR_TXRX) && |
1992 | !(na->na_flags & NAF_USER_PKT_POOL) && |
1993 | na_kr_populate_slots(kring) != 0) { |
1994 | SK_ERR("Cannot allocate buffers for kr " |
1995 | "0x%llx (\"%s\")" , SK_KVA(kring), |
1996 | kring->ckr_name); |
1997 | goto cleanup; |
1998 | } |
1999 | } |
2000 | } |
2001 | |
2002 | return 0; |
2003 | |
2004 | cleanup: |
2005 | na_kr_teardown_all(na, ch, FALSE); |
2006 | |
2007 | return ENOMEM; |
2008 | } |
2009 | |
2010 | static void |
2011 | na_kr_teardown_common(struct nexus_adapter *na, |
2012 | struct __kern_channel_ring *kring, enum txrx t, struct kern_channel *ch, |
2013 | boolean_t defunct) |
2014 | { |
2015 | struct skmem_arena_nexus *arn = skmem_arena_nexus(ar: na->na_arena); |
2016 | struct __user_channel_ring *ckr_ring; |
2017 | boolean_t sd_idle, sd_inited; |
2018 | |
2019 | ASSERT(arn != NULL); |
2020 | kr_enter(kring, TRUE); |
2021 | /* |
2022 | * Check for CKRF_MEM_SD_INITED and CKRF_MEM_RING_INITED |
2023 | * to make sure that the freeing needs to happen (else just |
2024 | * nullify the values). |
2025 | * If this adapter owns the memory for the slot descriptors, |
2026 | * check if the region is marked as busy (sd_idle is false) |
2027 | * and leave the kring's slot descriptor fields alone if so, |
2028 | * at defunct time. At final teardown time, sd_idle must be |
2029 | * true else we assert; this indicates a missing call to |
2030 | * skmem_arena_nexus_sd_set_noidle(). |
2031 | */ |
2032 | sd_inited = ((kring->ckr_flags & CKRF_MEM_SD_INITED) != 0); |
2033 | if (sd_inited) { |
2034 | /* callee will do KR_KSD(), so check */ |
2035 | if (((t < NR_TXRX) || (t == NR_EV)) && |
2036 | (kring->ckr_ksds != NULL)) { |
2037 | na_kr_depopulate_slots(kring, ch, defunct); |
2038 | } |
2039 | /* leave CKRF_MEM_SD_INITED flag alone until idle */ |
2040 | sd_idle = skmem_arena_nexus_sd_idle(arn); |
2041 | VERIFY(sd_idle || defunct); |
2042 | } else { |
2043 | sd_idle = TRUE; |
2044 | } |
2045 | |
2046 | if (sd_idle) { |
2047 | kring->ckr_flags &= ~CKRF_MEM_SD_INITED; |
2048 | if (kring->ckr_ksds != NULL) { |
2049 | if (sd_inited) { |
2050 | skmem_cache_free(kring->ckr_ksds_cache, |
2051 | kring->ckr_ksds); |
2052 | } |
2053 | kring->ckr_ksds = NULL; |
2054 | kring->ckr_ksds_last = NULL; |
2055 | kring->ckr_usds = NULL; |
2056 | } |
2057 | ASSERT(kring->ckr_ksds_last == NULL); |
2058 | ASSERT(kring->ckr_usds == NULL); |
2059 | } |
2060 | |
2061 | if ((ckr_ring = kring->ckr_ring) != NULL) { |
2062 | kring->ckr_ring = NULL; |
2063 | } |
2064 | |
2065 | if (kring->ckr_flags & CKRF_MEM_RING_INITED) { |
2066 | ASSERT(ckr_ring != NULL || KR_KERNEL_ONLY(kring)); |
2067 | if (ckr_ring != NULL) { |
2068 | skmem_cache_free(arn->arn_ring_cache, ckr_ring); |
2069 | } |
2070 | kring->ckr_flags &= ~CKRF_MEM_RING_INITED; |
2071 | } |
2072 | |
2073 | if (defunct) { |
2074 | /* if defunct, drop everything; see KR_DROP() */ |
2075 | kring->ckr_flags |= CKRF_DEFUNCT; |
2076 | } |
2077 | kr_exit(kring); |
2078 | } |
2079 | |
2080 | /* |
2081 | * Teardown ALL rings of a nexus adapter; this includes {tx,rx,alloc,free,event} |
2082 | */ |
2083 | static void |
2084 | na_kr_teardown_all(struct nexus_adapter *na, struct kern_channel *ch, |
2085 | boolean_t defunct) |
2086 | { |
2087 | enum txrx t; |
2088 | |
2089 | ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS); |
2090 | |
2091 | /* skip if this adapter has no allocated rings */ |
2092 | if (na->na_tx_rings == NULL) { |
2093 | return; |
2094 | } |
2095 | |
2096 | for_all_rings(t) { |
2097 | for (uint32_t i = 0; i < na_get_nrings(na, t); i++) { |
2098 | na_kr_teardown_common(na, kring: &NAKR(na, t)[i], |
2099 | t, ch, defunct); |
2100 | } |
2101 | } |
2102 | } |
2103 | |
2104 | /* |
2105 | * Teardown only {tx,rx} rings assigned to the channel. |
2106 | */ |
2107 | static void |
2108 | na_kr_teardown_txrx(struct nexus_adapter *na, struct kern_channel *ch, |
2109 | boolean_t defunct, struct proc *p) |
2110 | { |
2111 | enum txrx t; |
2112 | |
2113 | ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS); |
2114 | |
2115 | for_rx_tx(t) { |
2116 | ring_id_t qfirst = ch->ch_first[t]; |
2117 | ring_id_t qlast = ch->ch_last[t]; |
2118 | uint32_t i; |
2119 | |
2120 | for (i = qfirst; i < qlast; i++) { |
2121 | struct __kern_channel_ring *kring = &NAKR(na, t)[i]; |
2122 | na_kr_teardown_common(na, kring, t, ch, defunct); |
2123 | |
2124 | /* |
2125 | * Issue a notify to wake up anyone sleeping in kqueue |
2126 | * so that they notice the newly defuncted channels and |
2127 | * return an error |
2128 | */ |
2129 | kring->ckr_na_notify(kring, p, 0); |
2130 | } |
2131 | } |
2132 | } |
2133 | |
2134 | static int |
2135 | na_kr_populate_slots(struct __kern_channel_ring *kring) |
2136 | { |
2137 | const boolean_t kernel_only = KR_KERNEL_ONLY(kring); |
2138 | struct nexus_adapter *na = KRNA(kring); |
2139 | kern_pbufpool_t pp = kring->ckr_pp; |
2140 | uint32_t nslots = kring->ckr_num_slots; |
2141 | uint32_t start_idx, i; |
2142 | uint32_t sidx = 0; /* slot counter */ |
2143 | struct __kern_slot_desc *ksd; |
2144 | struct __user_slot_desc *usd; |
2145 | struct __kern_quantum *kqum; |
2146 | nexus_type_t nexus_type; |
2147 | int err = 0; |
2148 | |
2149 | ASSERT(kring->ckr_tx < NR_TXRX); |
2150 | ASSERT(!(KRNA(kring)->na_flags & NAF_USER_PKT_POOL)); |
2151 | ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS); |
2152 | ASSERT(pp != NULL); |
2153 | |
2154 | /* |
2155 | * xxx_ppool: remove this special case |
2156 | */ |
2157 | nexus_type = na->na_nxdom_prov->nxdom_prov_dom->nxdom_type; |
2158 | |
2159 | switch (nexus_type) { |
2160 | case NEXUS_TYPE_FLOW_SWITCH: |
2161 | case NEXUS_TYPE_KERNEL_PIPE: |
2162 | /* |
2163 | * xxx_ppool: This is temporary code until we come up with a |
2164 | * scheme for user space to alloc & attach packets to tx ring. |
2165 | */ |
2166 | if (kernel_only || kring->ckr_tx == NR_RX) { |
2167 | return 0; |
2168 | } |
2169 | break; |
2170 | |
2171 | case NEXUS_TYPE_NET_IF: |
2172 | if (((na->na_type == NA_NETIF_DEV) || |
2173 | (na->na_type == NA_NETIF_HOST)) && |
2174 | (kernel_only || (kring->ckr_tx == NR_RX))) { |
2175 | return 0; |
2176 | } |
2177 | |
2178 | ASSERT((na->na_type == NA_NETIF_COMPAT_DEV) || |
2179 | (na->na_type == NA_NETIF_COMPAT_HOST) || |
2180 | (na->na_type == NA_NETIF_DEV) || |
2181 | (na->na_type == NA_NETIF_VP)); |
2182 | |
2183 | if (!kernel_only) { |
2184 | if (kring->ckr_tx == NR_RX) { |
2185 | return 0; |
2186 | } else { |
2187 | break; |
2188 | } |
2189 | } |
2190 | |
2191 | ASSERT(kernel_only); |
2192 | |
2193 | if ((na->na_type == NA_NETIF_COMPAT_DEV) || |
2194 | (na->na_type == NA_NETIF_COMPAT_HOST)) { |
2195 | return 0; |
2196 | } |
2197 | VERIFY(0); |
2198 | /* NOTREACHED */ |
2199 | __builtin_unreachable(); |
2200 | |
2201 | case NEXUS_TYPE_USER_PIPE: |
2202 | case NEXUS_TYPE_MONITOR: |
2203 | break; |
2204 | |
2205 | default: |
2206 | VERIFY(0); |
2207 | /* NOTREACHED */ |
2208 | __builtin_unreachable(); |
2209 | } |
2210 | |
2211 | /* Fill the ring with packets */ |
2212 | sidx = start_idx = 0; |
2213 | for (i = 0; i < nslots; i++) { |
2214 | kqum = SK_PTR_ADDR_KQUM(pp_alloc_packet(pp, pp->pp_max_frags, |
2215 | SKMEM_NOSLEEP)); |
2216 | if (kqum == NULL) { |
2217 | err = ENOMEM; |
2218 | SK_ERR("ar 0x%llx (\"%s\") no more buffers " |
2219 | "after %u of %u, err %d" , SK_KVA(na->na_arena), |
2220 | na->na_arena->ar_name, i, nslots, err); |
2221 | goto cleanup; |
2222 | } |
2223 | ksd = KR_KSD(kring, i); |
2224 | usd = (kernel_only ? NULL : KR_USD(kring, i)); |
2225 | |
2226 | /* attach packet to slot */ |
2227 | kqum->qum_ksd = ksd; |
2228 | ASSERT(!KSD_VALID_METADATA(ksd)); |
2229 | KSD_ATTACH_METADATA(ksd, kqum); |
2230 | if (usd != NULL) { |
2231 | USD_ATTACH_METADATA(usd, METADATA_IDX(kqum)); |
2232 | kr_externalize_metadata(kring, pp->pp_max_frags, |
2233 | kqum, current_proc()); |
2234 | } |
2235 | |
2236 | SK_DF(SK_VERB_MEM, " C ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] " |
2237 | " kbuf[%-3u, 0x%llx]" , i, SK_KVA(ksd), METADATA_IDX(kqum), |
2238 | SK_KVA(kqum), kqum->qum_buf[0].buf_idx, |
2239 | SK_KVA(&kqum->qum_buf[0])); |
2240 | if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) { |
2241 | SK_DF(SK_VERB_MEM, " C usd [%-3d, 0x%llx] " |
2242 | "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]" , |
2243 | (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE), |
2244 | SK_KVA(usd), METADATA_IDX(kqum), |
2245 | SK_KVA(kqum->qum_user), |
2246 | kqum->qum_user->qum_buf[0].buf_idx, |
2247 | SK_KVA(&kqum->qum_user->qum_buf[0])); |
2248 | } |
2249 | |
2250 | sidx = SLOT_NEXT(i: sidx, lim: kring->ckr_lim); |
2251 | } |
2252 | |
2253 | SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") populated %u slots from idx %u" , |
2254 | SK_KVA(na->na_arena), na->na_arena->ar_name, nslots, start_idx); |
2255 | |
2256 | cleanup: |
2257 | if (err != 0) { |
2258 | sidx = start_idx; |
2259 | while (i-- > 0) { |
2260 | ksd = KR_KSD(kring, i); |
2261 | usd = (kernel_only ? NULL : KR_USD(kring, i)); |
2262 | kqum = ksd->sd_qum; |
2263 | |
2264 | ASSERT(ksd == kqum->qum_ksd); |
2265 | KSD_RESET(ksd); |
2266 | if (usd != NULL) { |
2267 | USD_RESET(usd); |
2268 | } |
2269 | /* detach packet from slot */ |
2270 | kqum->qum_ksd = NULL; |
2271 | pp_free_packet(pp, SK_PTR_ADDR(kqum)); |
2272 | |
2273 | sidx = SLOT_NEXT(i: sidx, lim: kring->ckr_lim); |
2274 | } |
2275 | } |
2276 | return err; |
2277 | } |
2278 | |
2279 | static void |
2280 | na_kr_depopulate_slots(struct __kern_channel_ring *kring, |
2281 | struct kern_channel *ch, boolean_t defunct) |
2282 | { |
2283 | #pragma unused(ch) |
2284 | const boolean_t kernel_only = KR_KERNEL_ONLY(kring); |
2285 | uint32_t i, j, n = kring->ckr_num_slots; |
2286 | struct nexus_adapter *na = KRNA(kring); |
2287 | struct kern_pbufpool *pp = kring->ckr_pp; |
2288 | boolean_t upp = FALSE; |
2289 | obj_idx_t midx; |
2290 | |
2291 | ASSERT((kring->ckr_tx < NR_TXRX) || (kring->ckr_tx == NR_EV)); |
2292 | LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED); |
2293 | |
2294 | ASSERT(na->na_arena->ar_type == SKMEM_ARENA_TYPE_NEXUS); |
2295 | |
2296 | if (((na->na_flags & NAF_USER_PKT_POOL) != 0) && |
2297 | (kring->ckr_tx != NR_EV)) { |
2298 | upp = TRUE; |
2299 | } |
2300 | for (i = 0, j = 0; i < n; i++) { |
2301 | struct __kern_slot_desc *ksd = KR_KSD(kring, i); |
2302 | struct __user_slot_desc *usd; |
2303 | struct __kern_quantum *qum, *kqum; |
2304 | boolean_t free_packet = FALSE; |
2305 | int err; |
2306 | |
2307 | if (!KSD_VALID_METADATA(ksd)) { |
2308 | continue; |
2309 | } |
2310 | |
2311 | kqum = ksd->sd_qum; |
2312 | usd = (kernel_only ? NULL : KR_USD(kring, i)); |
2313 | midx = METADATA_IDX(kqum); |
2314 | |
2315 | /* |
2316 | * if the packet is internalized it should not be in the |
2317 | * hash table of packets loaned to user space. |
2318 | */ |
2319 | if (upp && (kqum->qum_qflags & QUM_F_INTERNALIZED)) { |
2320 | if ((qum = pp_find_upp(pp, midx)) != NULL) { |
2321 | panic("internalized packet 0x%llx in htbl" , |
2322 | SK_KVA(qum)); |
2323 | /* NOTREACHED */ |
2324 | __builtin_unreachable(); |
2325 | } |
2326 | free_packet = TRUE; |
2327 | } else if (upp) { |
2328 | /* |
2329 | * if the packet is not internalized check if it is |
2330 | * in the list of packets loaned to user-space. |
2331 | * Remove from the list before freeing. |
2332 | */ |
2333 | ASSERT(!(kqum->qum_qflags & QUM_F_INTERNALIZED)); |
2334 | qum = pp_remove_upp(pp, midx, &err); |
2335 | if (err != 0) { |
2336 | SK_ERR("un-allocated packet or buflet %d %p" , |
2337 | midx, SK_KVA(qum)); |
2338 | if (qum != NULL) { |
2339 | free_packet = TRUE; |
2340 | } |
2341 | } |
2342 | } else { |
2343 | free_packet = TRUE; |
2344 | } |
2345 | |
2346 | /* |
2347 | * Clear the user and kernel slot descriptors. Note that |
2348 | * if we are depopulating the slots due to defunct (and not |
2349 | * due to normal deallocation/teardown), we leave the user |
2350 | * slot descriptor alone. At that point the process may |
2351 | * be suspended, and later when it resumes it would just |
2352 | * pick up the original contents and move forward with |
2353 | * whatever it was doing. |
2354 | */ |
2355 | KSD_RESET(ksd); |
2356 | if (usd != NULL && !defunct) { |
2357 | USD_RESET(usd); |
2358 | } |
2359 | |
2360 | /* detach packet from slot */ |
2361 | kqum->qum_ksd = NULL; |
2362 | |
2363 | SK_DF(SK_VERB_MEM, " D ksd [%-3d, 0x%llx] kqum [%-3u, 0x%llx] " |
2364 | " kbuf[%-3u, 0x%llx]" , i, SK_KVA(ksd), |
2365 | METADATA_IDX(kqum), SK_KVA(kqum), kqum->qum_buf[0].buf_idx, |
2366 | SK_KVA(&kqum->qum_buf[0])); |
2367 | if (!(kqum->qum_qflags & QUM_F_KERNEL_ONLY)) { |
2368 | SK_DF(SK_VERB_MEM, " D usd [%-3u, 0x%llx] " |
2369 | "uqum [%-3u, 0x%llx] ubuf[%-3u, 0x%llx]" , |
2370 | (int)(usd ? usd->sd_md_idx : OBJ_IDX_NONE), |
2371 | SK_KVA(usd), METADATA_IDX(kqum), |
2372 | SK_KVA(kqum->qum_user), |
2373 | kqum->qum_user->qum_buf[0].buf_idx, |
2374 | SK_KVA(&kqum->qum_user->qum_buf[0])); |
2375 | } |
2376 | |
2377 | if (free_packet) { |
2378 | pp_free_packet(pp, SK_PTR_ADDR(kqum)); ++j; |
2379 | } |
2380 | } |
2381 | |
2382 | SK_DF(SK_VERB_NA | SK_VERB_RING, "ar 0x%llx (\"%s\") depopulated %u of %u slots" , |
2383 | SK_KVA(KRNA(kring)->na_arena), KRNA(kring)->na_arena->ar_name, |
2384 | j, n); |
2385 | } |
2386 | |
2387 | int |
2388 | na_rings_mem_setup(struct nexus_adapter *na, |
2389 | boolean_t alloc_ctx, struct kern_channel *ch) |
2390 | { |
2391 | boolean_t kronly; |
2392 | int err; |
2393 | |
2394 | SK_LOCK_ASSERT_HELD(); |
2395 | ASSERT(na->na_channels == 0); |
2396 | /* |
2397 | * If NAF_MEM_NO_INIT is set, then only create the krings and not |
2398 | * the backing memory regions for the adapter. |
2399 | */ |
2400 | kronly = (na->na_flags & NAF_MEM_NO_INIT); |
2401 | ASSERT(!kronly || NA_KERNEL_ONLY(na)); |
2402 | |
2403 | /* |
2404 | * Create and initialize the common fields of the krings array. |
2405 | * using the information that must be already available in the na. |
2406 | */ |
2407 | if ((err = na_kr_create(na, alloc_ctx)) == 0 && !kronly) { |
2408 | err = na_kr_setup(na, ch); |
2409 | if (err != 0) { |
2410 | na_kr_delete(na); |
2411 | } |
2412 | } |
2413 | |
2414 | return err; |
2415 | } |
2416 | |
2417 | void |
2418 | na_rings_mem_teardown(struct nexus_adapter *na, struct kern_channel *ch, |
2419 | boolean_t defunct) |
2420 | { |
2421 | SK_LOCK_ASSERT_HELD(); |
2422 | ASSERT(na->na_channels == 0 || (na->na_flags & NAF_DEFUNCT)); |
2423 | |
2424 | /* |
2425 | * Deletes the kring and ring array of the adapter. They |
2426 | * must have been created using na_rings_mem_setup(). |
2427 | * |
2428 | * XXX: adi@apple.com -- the parameter "ch" should not be |
2429 | * needed here; however na_kr_depopulate_slots() needs to |
2430 | * go thru the channel's user packet pool hash, and so for |
2431 | * now we leave it here. |
2432 | */ |
2433 | na_kr_teardown_all(na, ch, defunct); |
2434 | if (!defunct) { |
2435 | na_kr_delete(na); |
2436 | } |
2437 | } |
2438 | |
2439 | void |
2440 | na_ch_rings_defunct(struct kern_channel *ch, struct proc *p) |
2441 | { |
2442 | LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED); |
2443 | |
2444 | /* |
2445 | * Depopulate slots on the TX and RX rings of this channel, |
2446 | * but don't touch other rings owned by other channels if |
2447 | * this adapter is being shared. |
2448 | */ |
2449 | na_kr_teardown_txrx(na: ch->ch_na, ch, TRUE, p); |
2450 | } |
2451 | |
2452 | void |
2453 | na_kr_drop(struct nexus_adapter *na, boolean_t drop) |
2454 | { |
2455 | enum txrx t; |
2456 | uint32_t i; |
2457 | |
2458 | for_rx_tx(t) { |
2459 | for (i = 0; i < na_get_nrings(na, t); i++) { |
2460 | struct __kern_channel_ring *kring = &NAKR(na, t)[i]; |
2461 | int error; |
2462 | error = kr_enter(kring, TRUE); |
2463 | if (drop) { |
2464 | kring->ckr_flags |= CKRF_DROP; |
2465 | } else { |
2466 | kring->ckr_flags &= ~CKRF_DROP; |
2467 | } |
2468 | |
2469 | if (error != 0) { |
2470 | SK_ERR("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) " |
2471 | "kr_enter failed %d" , |
2472 | na->na_name, SK_KVA(na), |
2473 | kring->ckr_name, SK_KVA(kring), |
2474 | error); |
2475 | } else { |
2476 | kr_exit(kring); |
2477 | } |
2478 | SK_D("na \"%s\" (0x%llx) kr \"%s\" (0x%llx) " |
2479 | "krflags 0x%b" , na->na_name, SK_KVA(na), |
2480 | kring->ckr_name, SK_KVA(kring), kring->ckr_flags, |
2481 | CKRF_BITS); |
2482 | } |
2483 | } |
2484 | } |
2485 | |
2486 | /* |
2487 | * Set the stopped/enabled status of ring. When stopping, they also wait |
2488 | * for all current activity on the ring to terminate. The status change |
2489 | * is then notified using the na na_notify callback. |
2490 | */ |
2491 | static void |
2492 | na_set_ring(struct nexus_adapter *na, uint32_t ring_id, enum txrx t, |
2493 | uint32_t state) |
2494 | { |
2495 | struct __kern_channel_ring *kr = &NAKR(na, t)[ring_id]; |
2496 | |
2497 | /* |
2498 | * Mark the ring as stopped/enabled, and run through the |
2499 | * locks to make sure other users get to see it. |
2500 | */ |
2501 | if (state == KR_READY) { |
2502 | kr_start(kr); |
2503 | } else { |
2504 | kr_stop(kr, state); |
2505 | } |
2506 | } |
2507 | |
2508 | |
2509 | /* stop or enable all the rings of na */ |
2510 | static void |
2511 | na_set_all_rings(struct nexus_adapter *na, uint32_t state) |
2512 | { |
2513 | uint32_t i; |
2514 | enum txrx t; |
2515 | |
2516 | SK_LOCK_ASSERT_HELD(); |
2517 | |
2518 | if (!NA_IS_ACTIVE(na)) { |
2519 | return; |
2520 | } |
2521 | |
2522 | for_rx_tx(t) { |
2523 | for (i = 0; i < na_get_nrings(na, t); i++) { |
2524 | na_set_ring(na, ring_id: i, t, state); |
2525 | } |
2526 | } |
2527 | } |
2528 | |
2529 | /* |
2530 | * Convenience function used in drivers. Waits for current txsync()s/rxsync()s |
2531 | * to finish and prevents any new one from starting. Call this before turning |
2532 | * Skywalk mode off, or before removing the harware rings (e.g., on module |
2533 | * onload). As a rule of thumb for linux drivers, this should be placed near |
2534 | * each napi_disable(). |
2535 | */ |
2536 | void |
2537 | na_disable_all_rings(struct nexus_adapter *na) |
2538 | { |
2539 | na_set_all_rings(na, state: KR_STOPPED); |
2540 | } |
2541 | |
2542 | /* |
2543 | * Convenience function used in drivers. Re-enables rxsync and txsync on the |
2544 | * adapter's rings In linux drivers, this should be placed near each |
2545 | * napi_enable(). |
2546 | */ |
2547 | void |
2548 | na_enable_all_rings(struct nexus_adapter *na) |
2549 | { |
2550 | na_set_all_rings(na, state: KR_READY /* enabled */); |
2551 | } |
2552 | |
2553 | void |
2554 | na_lock_all_rings(struct nexus_adapter *na) |
2555 | { |
2556 | na_set_all_rings(na, state: KR_LOCKED); |
2557 | } |
2558 | |
2559 | void |
2560 | na_unlock_all_rings(struct nexus_adapter *na) |
2561 | { |
2562 | na_enable_all_rings(na); |
2563 | } |
2564 | |
2565 | int |
2566 | na_connect(struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr, |
2567 | struct kern_channel *ch0, struct nxbind *nxb, struct proc *p) |
2568 | { |
2569 | struct nexus_adapter *na = NULL; |
2570 | mach_vm_size_t memsize = 0; |
2571 | int err = 0; |
2572 | enum txrx t; |
2573 | |
2574 | ASSERT(!(chr->cr_mode & CHMODE_KERNEL)); |
2575 | ASSERT(!(ch->ch_flags & CHANF_KERNEL)); |
2576 | |
2577 | SK_LOCK_ASSERT_HELD(); |
2578 | |
2579 | /* find the nexus adapter and return the reference */ |
2580 | err = na_find(ch, nx, chr, ch0, nxb, p, &na, TRUE /* create */); |
2581 | if (err != 0) { |
2582 | ASSERT(na == NULL); |
2583 | goto done; |
2584 | } |
2585 | |
2586 | if (NA_KERNEL_ONLY(na)) { |
2587 | err = EBUSY; |
2588 | goto done; |
2589 | } |
2590 | |
2591 | /* reject if the adapter is defunct of non-permissive */ |
2592 | if ((na->na_flags & NAF_DEFUNCT) || na_reject_channel(ch, na)) { |
2593 | err = ENXIO; |
2594 | goto done; |
2595 | } |
2596 | |
2597 | err = na_bind_channel(na, ch, chr); |
2598 | if (err != 0) { |
2599 | goto done; |
2600 | } |
2601 | |
2602 | ASSERT(ch->ch_schema != NULL); |
2603 | ASSERT(na == ch->ch_na); |
2604 | |
2605 | for_all_rings(t) { |
2606 | if (na_get_nrings(na, t) == 0) { |
2607 | ch->ch_si[t] = NULL; |
2608 | continue; |
2609 | } |
2610 | ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] : |
2611 | &NAKR(na, t)[ch->ch_first[t]].ckr_si; |
2612 | } |
2613 | |
2614 | skmem_arena_get_stats(na->na_arena, &memsize, NULL); |
2615 | |
2616 | if (!(skmem_arena_nexus(ar: na->na_arena)->arn_mode & |
2617 | AR_NEXUS_MODE_EXTERNAL_PPOOL)) { |
2618 | os_atomic_or(__DECONST(uint32_t *, &ch->ch_schema->csm_flags), CSM_PRIV_MEM, relaxed); |
2619 | } |
2620 | |
2621 | err = skmem_arena_mmap(na->na_arena, p, &ch->ch_mmap); |
2622 | if (err != 0) { |
2623 | goto done; |
2624 | } |
2625 | |
2626 | os_atomic_or(__DECONST(uint32_t *, &ch->ch_schema->csm_flags), CSM_ACTIVE, relaxed); |
2627 | chr->cr_memsize = memsize; |
2628 | chr->cr_memoffset = ch->ch_schema_offset; |
2629 | |
2630 | SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx " |
2631 | "naflags %b" , sk_proc_name_address(p), sk_proc_pid(p), |
2632 | SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name, |
2633 | na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na), |
2634 | na->na_flags, NAF_BITS); |
2635 | |
2636 | done: |
2637 | if (err != 0) { |
2638 | if (ch->ch_schema != NULL || na != NULL) { |
2639 | if (ch->ch_schema != NULL) { |
2640 | ASSERT(na == ch->ch_na); |
2641 | /* |
2642 | * Callee will unmap memory region if needed, |
2643 | * as well as release reference held on 'na'. |
2644 | */ |
2645 | na_disconnect(nx, ch); |
2646 | na = NULL; |
2647 | } |
2648 | if (na != NULL) { |
2649 | (void) na_release_locked(na); |
2650 | na = NULL; |
2651 | } |
2652 | } |
2653 | } |
2654 | |
2655 | return err; |
2656 | } |
2657 | |
2658 | void |
2659 | na_disconnect(struct kern_nexus *nx, struct kern_channel *ch) |
2660 | { |
2661 | #pragma unused(nx) |
2662 | enum txrx t; |
2663 | |
2664 | SK_LOCK_ASSERT_HELD(); |
2665 | |
2666 | SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b" , |
2667 | SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name, |
2668 | ch->ch_na->na_name, ch->ch_info->cinfo_nx_port, |
2669 | (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(ch->ch_na), |
2670 | ch->ch_na->na_flags, NAF_BITS); |
2671 | |
2672 | /* destroy mapping and release references */ |
2673 | na_unbind_channel(ch); |
2674 | ASSERT(ch->ch_na == NULL); |
2675 | ASSERT(ch->ch_schema == NULL); |
2676 | for_all_rings(t) { |
2677 | ch->ch_si[t] = NULL; |
2678 | } |
2679 | } |
2680 | |
2681 | void |
2682 | na_defunct(struct kern_nexus *nx, struct kern_channel *ch, |
2683 | struct nexus_adapter *na, boolean_t locked) |
2684 | { |
2685 | #pragma unused(nx) |
2686 | SK_LOCK_ASSERT_HELD(); |
2687 | if (!locked) { |
2688 | lck_mtx_lock(lck: &ch->ch_lock); |
2689 | } |
2690 | |
2691 | LCK_MTX_ASSERT(&ch->ch_lock, LCK_MTX_ASSERT_OWNED); |
2692 | |
2693 | if (!(na->na_flags & NAF_DEFUNCT)) { |
2694 | /* |
2695 | * Mark this adapter as defunct to inform nexus-specific |
2696 | * teardown handler called by na_teardown() below. |
2697 | */ |
2698 | os_atomic_or(&na->na_flags, NAF_DEFUNCT, relaxed); |
2699 | |
2700 | /* |
2701 | * Depopulate slots. |
2702 | */ |
2703 | na_teardown(na, ch, TRUE); |
2704 | |
2705 | /* |
2706 | * And finally destroy any already-defunct memory regions. |
2707 | * Do this only if the nexus adapter owns the arena, i.e. |
2708 | * NAF_MEM_LOANED is not set. Otherwise, we'd expect |
2709 | * that this routine be called again for the real owner. |
2710 | */ |
2711 | if (!(na->na_flags & NAF_MEM_LOANED)) { |
2712 | skmem_arena_defunct(na->na_arena); |
2713 | } |
2714 | } |
2715 | |
2716 | SK_D("%s(%d): ch 0x%llx -/- nx 0x%llx (%s:\"%s\":%u:%d) " |
2717 | "na 0x%llx naflags %b" , ch->ch_name, ch->ch_pid, |
2718 | SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name, |
2719 | na->na_name, ch->ch_info->cinfo_nx_port, |
2720 | (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na), |
2721 | na->na_flags, NAF_BITS); |
2722 | |
2723 | if (!locked) { |
2724 | lck_mtx_unlock(lck: &ch->ch_lock); |
2725 | } |
2726 | } |
2727 | |
2728 | /* |
2729 | * TODO: adi@apple.com -- merge this into na_connect() |
2730 | */ |
2731 | int |
2732 | na_connect_spec(struct kern_nexus *nx, struct kern_channel *ch, |
2733 | struct chreq *chr, struct proc *p) |
2734 | { |
2735 | #pragma unused(p) |
2736 | struct nexus_adapter *na = NULL; |
2737 | mach_vm_size_t memsize = 0; |
2738 | int error = 0; |
2739 | enum txrx t; |
2740 | |
2741 | ASSERT(chr->cr_mode & CHMODE_KERNEL); |
2742 | ASSERT(ch->ch_flags & CHANF_KERNEL); |
2743 | ASSERT(ch->ch_na == NULL); |
2744 | ASSERT(ch->ch_schema == NULL); |
2745 | |
2746 | SK_LOCK_ASSERT_HELD(); |
2747 | |
2748 | error = na_find(ch, nx, chr, NULL, NULL, kernproc, &na, TRUE); |
2749 | if (error != 0) { |
2750 | goto done; |
2751 | } |
2752 | |
2753 | if (na == NULL) { |
2754 | error = EINVAL; |
2755 | goto done; |
2756 | } |
2757 | |
2758 | if (na->na_channels > 0) { |
2759 | error = EBUSY; |
2760 | goto done; |
2761 | } |
2762 | |
2763 | if (na->na_flags & NAF_DEFUNCT) { |
2764 | error = ENXIO; |
2765 | goto done; |
2766 | } |
2767 | |
2768 | /* |
2769 | * Special connect requires the nexus adapter to handle its |
2770 | * own channel binding and unbinding via na_special(); bail |
2771 | * if this adapter doesn't support it. |
2772 | */ |
2773 | if (na->na_special == NULL) { |
2774 | error = ENOTSUP; |
2775 | goto done; |
2776 | } |
2777 | |
2778 | /* upon success, "ch->ch_na" will point to "na" */ |
2779 | error = na->na_special(na, ch, chr, NXSPEC_CMD_CONNECT); |
2780 | if (error != 0) { |
2781 | ASSERT(ch->ch_na == NULL); |
2782 | goto done; |
2783 | } |
2784 | |
2785 | ASSERT(na->na_flags & NAF_SPEC_INIT); |
2786 | ASSERT(na == ch->ch_na); |
2787 | /* make sure this is still the case */ |
2788 | ASSERT(ch->ch_schema == NULL); |
2789 | |
2790 | for_rx_tx(t) { |
2791 | ch->ch_si[t] = ch_is_multiplex(ch, t) ? &na->na_si[t] : |
2792 | &NAKR(na, t)[ch->ch_first[t]].ckr_si; |
2793 | } |
2794 | |
2795 | skmem_arena_get_stats(na->na_arena, &memsize, NULL); |
2796 | chr->cr_memsize = memsize; |
2797 | |
2798 | SK_D("%s(%d) ch 0x%llx <-> nx 0x%llx (%s:\"%s\":%d:%d) na 0x%llx " |
2799 | "naflags %b" , sk_proc_name_address(p), sk_proc_pid(p), |
2800 | SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name, |
2801 | na->na_name, (int)chr->cr_port, (int)chr->cr_ring_id, SK_KVA(na), |
2802 | na->na_flags, NAF_BITS); |
2803 | |
2804 | done: |
2805 | if (error != 0) { |
2806 | if (ch->ch_na != NULL || na != NULL) { |
2807 | if (ch->ch_na != NULL) { |
2808 | ASSERT(na == ch->ch_na); |
2809 | /* callee will release reference on 'na' */ |
2810 | na_disconnect_spec(nx, ch); |
2811 | na = NULL; |
2812 | } |
2813 | if (na != NULL) { |
2814 | (void) na_release_locked(na); |
2815 | na = NULL; |
2816 | } |
2817 | } |
2818 | } |
2819 | |
2820 | return error; |
2821 | } |
2822 | |
2823 | /* |
2824 | * TODO: adi@apple.com -- merge this into na_disconnect() |
2825 | */ |
2826 | void |
2827 | na_disconnect_spec(struct kern_nexus *nx, struct kern_channel *ch) |
2828 | { |
2829 | #pragma unused(nx) |
2830 | struct nexus_adapter *na = ch->ch_na; |
2831 | enum txrx t; |
2832 | int error; |
2833 | |
2834 | SK_LOCK_ASSERT_HELD(); |
2835 | ASSERT(na != NULL); |
2836 | ASSERT(na->na_flags & NAF_SPEC_INIT); /* has been bound */ |
2837 | |
2838 | SK_D("ch 0x%llx -!- nx 0x%llx (%s:\"%s\":%u:%d) na 0x%llx naflags %b" , |
2839 | SK_KVA(ch), SK_KVA(nx), NX_DOM_PROV(nx)->nxdom_prov_name, |
2840 | na->na_name, ch->ch_info->cinfo_nx_port, |
2841 | (int)ch->ch_info->cinfo_ch_ring_id, SK_KVA(na), |
2842 | na->na_flags, NAF_BITS); |
2843 | |
2844 | /* take a reference for this routine */ |
2845 | na_retain_locked(na); |
2846 | |
2847 | ASSERT(ch->ch_flags & CHANF_KERNEL); |
2848 | ASSERT(ch->ch_schema == NULL); |
2849 | ASSERT(na->na_special != NULL); |
2850 | /* unbind this channel */ |
2851 | error = na->na_special(na, ch, NULL, NXSPEC_CMD_DISCONNECT); |
2852 | ASSERT(error == 0); |
2853 | ASSERT(!(na->na_flags & NAF_SPEC_INIT)); |
2854 | |
2855 | /* now release our reference; this may be the last */ |
2856 | na_release_locked(na); |
2857 | na = NULL; |
2858 | |
2859 | ASSERT(ch->ch_na == NULL); |
2860 | for_rx_tx(t) { |
2861 | ch->ch_si[t] = NULL; |
2862 | } |
2863 | } |
2864 | |
2865 | void |
2866 | na_start_spec(struct kern_nexus *nx, struct kern_channel *ch) |
2867 | { |
2868 | #pragma unused(nx) |
2869 | struct nexus_adapter *na = ch->ch_na; |
2870 | |
2871 | SK_LOCK_ASSERT_HELD(); |
2872 | |
2873 | ASSERT(ch->ch_flags & CHANF_KERNEL); |
2874 | ASSERT(NA_KERNEL_ONLY(na)); |
2875 | ASSERT(na->na_special != NULL); |
2876 | |
2877 | na->na_special(na, ch, NULL, NXSPEC_CMD_START); |
2878 | } |
2879 | |
2880 | void |
2881 | na_stop_spec(struct kern_nexus *nx, struct kern_channel *ch) |
2882 | { |
2883 | #pragma unused(nx) |
2884 | struct nexus_adapter *na = ch->ch_na; |
2885 | |
2886 | SK_LOCK_ASSERT_HELD(); |
2887 | |
2888 | ASSERT(ch->ch_flags & CHANF_KERNEL); |
2889 | ASSERT(NA_KERNEL_ONLY(na)); |
2890 | ASSERT(na->na_special != NULL); |
2891 | |
2892 | na->na_special(na, ch, NULL, NXSPEC_CMD_STOP); |
2893 | } |
2894 | |
2895 | /* |
2896 | * MUST BE CALLED UNDER SK_LOCK() |
2897 | * |
2898 | * Get a refcounted reference to a nexus adapter attached |
2899 | * to the interface specified by chr. |
2900 | * This is always called in the execution of an ioctl(). |
2901 | * |
2902 | * Return ENXIO if the interface specified by the request does |
2903 | * not exist, ENOTSUP if Skywalk is not supported by the interface, |
2904 | * EINVAL if parameters are invalid, ENOMEM if needed resources |
2905 | * could not be allocated. |
2906 | * If successful, hold a reference to the nexus adapter. |
2907 | * |
2908 | * No reference is kept on the real interface, which may then |
2909 | * disappear at any time. |
2910 | */ |
2911 | int |
2912 | na_find(struct kern_channel *ch, struct kern_nexus *nx, struct chreq *chr, |
2913 | struct kern_channel *ch0, struct nxbind *nxb, struct proc *p, |
2914 | struct nexus_adapter **na, boolean_t create) |
2915 | { |
2916 | int error = 0; |
2917 | |
2918 | _CASSERT(sizeof(chr->cr_name) == sizeof((*na)->na_name)); |
2919 | |
2920 | *na = NULL; /* default return value */ |
2921 | |
2922 | SK_LOCK_ASSERT_HELD(); |
2923 | |
2924 | /* |
2925 | * We cascade through all possibile types of nexus adapter. |
2926 | * All nx_*_na_find() functions return an error and an na, |
2927 | * with the following combinations: |
2928 | * |
2929 | * error na |
2930 | * 0 NULL type doesn't match |
2931 | * !0 NULL type matches, but na creation/lookup failed |
2932 | * 0 !NULL type matches and na created/found |
2933 | * !0 !NULL impossible |
2934 | */ |
2935 | |
2936 | #if CONFIG_NEXUS_MONITOR |
2937 | /* try to see if this is a monitor port */ |
2938 | error = nx_monitor_na_find(nx, ch, chr, ch0, nxb, p, na, create); |
2939 | if (error != 0 || *na != NULL) { |
2940 | return error; |
2941 | } |
2942 | #endif /* CONFIG_NEXUS_MONITOR */ |
2943 | #if CONFIG_NEXUS_USER_PIPE |
2944 | /* try to see if this is a pipe port */ |
2945 | error = nx_upipe_na_find(nx, ch, chr, nxb, p, na, create); |
2946 | if (error != 0 || *na != NULL) { |
2947 | return error; |
2948 | } |
2949 | #endif /* CONFIG_NEXUS_USER_PIPE */ |
2950 | #if CONFIG_NEXUS_KERNEL_PIPE |
2951 | /* try to see if this is a kernel pipe port */ |
2952 | error = nx_kpipe_na_find(nx, ch, chr, nxb, p, na, create); |
2953 | if (error != 0 || *na != NULL) { |
2954 | return error; |
2955 | } |
2956 | #endif /* CONFIG_NEXUS_KERNEL_PIPE */ |
2957 | #if CONFIG_NEXUS_FLOWSWITCH |
2958 | /* try to see if this is a flowswitch port */ |
2959 | error = nx_fsw_na_find(nx, ch, chr, nxb, p, na, create); |
2960 | if (error != 0 || *na != NULL) { |
2961 | return error; |
2962 | } |
2963 | #endif /* CONFIG_NEXUS_FLOWSWITCH */ |
2964 | #if CONFIG_NEXUS_NETIF |
2965 | error = nx_netif_na_find(nx, ch, chr, nxb, p, na, create); |
2966 | if (error != 0 || *na != NULL) { |
2967 | return error; |
2968 | } |
2969 | #endif /* CONFIG_NEXUS_NETIF */ |
2970 | |
2971 | ASSERT(*na == NULL); |
2972 | return ENXIO; |
2973 | } |
2974 | |
2975 | void |
2976 | na_retain_locked(struct nexus_adapter *na) |
2977 | { |
2978 | SK_LOCK_ASSERT_HELD(); |
2979 | |
2980 | if (na != NULL) { |
2981 | #if SK_LOG |
2982 | uint32_t oref = os_atomic_inc_orig(&na->na_refcount, relaxed); |
2983 | SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u" , |
2984 | na->na_name, SK_KVA(na), oref + 1, na->na_channels); |
2985 | #else /* !SK_LOG */ |
2986 | os_atomic_inc(&na->na_refcount, relaxed); |
2987 | #endif /* !SK_LOG */ |
2988 | } |
2989 | } |
2990 | |
2991 | /* returns 1 iff the nexus_adapter is destroyed */ |
2992 | int |
2993 | na_release_locked(struct nexus_adapter *na) |
2994 | { |
2995 | uint32_t oref; |
2996 | |
2997 | SK_LOCK_ASSERT_HELD(); |
2998 | |
2999 | ASSERT(na->na_refcount > 0); |
3000 | oref = os_atomic_dec_orig(&na->na_refcount, relaxed); |
3001 | if (oref > 1) { |
3002 | SK_DF(SK_VERB_REFCNT, "na \"%s\" (0x%llx) refcnt %u chcnt %u" , |
3003 | na->na_name, SK_KVA(na), oref - 1, na->na_channels); |
3004 | return 0; |
3005 | } |
3006 | ASSERT(na->na_channels == 0); |
3007 | |
3008 | if (na->na_dtor != NULL) { |
3009 | na->na_dtor(na); |
3010 | } |
3011 | |
3012 | ASSERT(na->na_tx_rings == NULL && na->na_rx_rings == NULL); |
3013 | ASSERT(na->na_slot_ctxs == NULL); |
3014 | ASSERT(na->na_scratch == NULL); |
3015 | |
3016 | #if CONFIG_NEXUS_USER_PIPE |
3017 | nx_upipe_na_dealloc(na); |
3018 | #endif /* CONFIG_NEXUS_USER_PIPE */ |
3019 | if (na->na_arena != NULL) { |
3020 | skmem_arena_release(na->na_arena); |
3021 | na->na_arena = NULL; |
3022 | } |
3023 | |
3024 | SK_DF(SK_VERB_MEM, "na \"%s\" (0x%llx) being freed" , |
3025 | na->na_name, SK_KVA(na)); |
3026 | |
3027 | NA_FREE(na); |
3028 | return 1; |
3029 | } |
3030 | |
3031 | static struct nexus_adapter * |
3032 | na_pseudo_alloc(zalloc_flags_t how) |
3033 | { |
3034 | struct nexus_adapter *na; |
3035 | |
3036 | na = zalloc_flags(na_pseudo_zone, how | Z_ZERO); |
3037 | if (na) { |
3038 | na->na_type = NA_PSEUDO; |
3039 | na->na_free = na_pseudo_free; |
3040 | } |
3041 | return na; |
3042 | } |
3043 | |
3044 | static void |
3045 | na_pseudo_free(struct nexus_adapter *na) |
3046 | { |
3047 | ASSERT(na->na_refcount == 0); |
3048 | SK_DF(SK_VERB_MEM, "na 0x%llx FREE" , SK_KVA(na)); |
3049 | bzero(s: na, n: sizeof(*na)); |
3050 | zfree(na_pseudo_zone, na); |
3051 | } |
3052 | |
3053 | static int |
3054 | na_pseudo_txsync(struct __kern_channel_ring *kring, struct proc *p, |
3055 | uint32_t flags) |
3056 | { |
3057 | #pragma unused(kring, p, flags) |
3058 | SK_DF(SK_VERB_SYNC | SK_VERB_TX, |
3059 | "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x" , |
3060 | sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name, |
3061 | SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id, |
3062 | flags); |
3063 | |
3064 | return 0; |
3065 | } |
3066 | |
3067 | static int |
3068 | na_pseudo_rxsync(struct __kern_channel_ring *kring, struct proc *p, |
3069 | uint32_t flags) |
3070 | { |
3071 | #pragma unused(kring, p, flags) |
3072 | SK_DF(SK_VERB_SYNC | SK_VERB_RX, |
3073 | "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x" , |
3074 | sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name, |
3075 | SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id, |
3076 | flags); |
3077 | |
3078 | ASSERT(kring->ckr_rhead <= kring->ckr_lim); |
3079 | |
3080 | return 0; |
3081 | } |
3082 | |
3083 | static int |
3084 | na_pseudo_activate(struct nexus_adapter *na, na_activate_mode_t mode) |
3085 | { |
3086 | SK_D("na \"%s\" (0x%llx) %s" , na->na_name, |
3087 | SK_KVA(na), na_activate_mode2str(mode)); |
3088 | |
3089 | switch (mode) { |
3090 | case NA_ACTIVATE_MODE_ON: |
3091 | os_atomic_or(&na->na_flags, NAF_ACTIVE, relaxed); |
3092 | break; |
3093 | |
3094 | case NA_ACTIVATE_MODE_DEFUNCT: |
3095 | break; |
3096 | |
3097 | case NA_ACTIVATE_MODE_OFF: |
3098 | os_atomic_andnot(&na->na_flags, NAF_ACTIVE, relaxed); |
3099 | break; |
3100 | |
3101 | default: |
3102 | VERIFY(0); |
3103 | /* NOTREACHED */ |
3104 | __builtin_unreachable(); |
3105 | } |
3106 | |
3107 | return 0; |
3108 | } |
3109 | |
3110 | static void |
3111 | na_pseudo_dtor(struct nexus_adapter *na) |
3112 | { |
3113 | #pragma unused(na) |
3114 | } |
3115 | |
3116 | static int |
3117 | na_pseudo_krings_create(struct nexus_adapter *na, struct kern_channel *ch) |
3118 | { |
3119 | return na_rings_mem_setup(na, FALSE, ch); |
3120 | } |
3121 | |
3122 | static void |
3123 | na_pseudo_krings_delete(struct nexus_adapter *na, struct kern_channel *ch, |
3124 | boolean_t defunct) |
3125 | { |
3126 | na_rings_mem_teardown(na, ch, defunct); |
3127 | } |
3128 | |
3129 | /* |
3130 | * Pseudo nexus adapter; typically used as a generic parent adapter. |
3131 | */ |
3132 | int |
3133 | na_pseudo_create(struct kern_nexus *nx, struct chreq *chr, |
3134 | struct nexus_adapter **ret) |
3135 | { |
3136 | struct nxprov_params *nxp = NX_PROV(nx)->nxprov_params; |
3137 | struct nexus_adapter *na; |
3138 | int error; |
3139 | |
3140 | SK_LOCK_ASSERT_HELD(); |
3141 | *ret = NULL; |
3142 | |
3143 | na = na_pseudo_alloc(how: Z_WAITOK); |
3144 | |
3145 | ASSERT(na->na_type == NA_PSEUDO); |
3146 | ASSERT(na->na_free == na_pseudo_free); |
3147 | |
3148 | (void) strncpy(na->na_name, chr->cr_name, sizeof(na->na_name) - 1); |
3149 | na->na_name[sizeof(na->na_name) - 1] = '\0'; |
3150 | uuid_generate_random(out: na->na_uuid); |
3151 | |
3152 | /* |
3153 | * Verify upper bounds; for all cases including user pipe nexus, |
3154 | * the parameters must have already been validated by corresponding |
3155 | * nxdom_prov_params() function defined by each domain. |
3156 | */ |
3157 | na_set_nrings(na, t: NR_TX, v: nxp->nxp_tx_rings); |
3158 | na_set_nrings(na, t: NR_RX, v: nxp->nxp_rx_rings); |
3159 | na_set_nslots(na, t: NR_TX, v: nxp->nxp_tx_slots); |
3160 | na_set_nslots(na, t: NR_RX, v: nxp->nxp_rx_slots); |
3161 | ASSERT(na_get_nrings(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_rings.nb_max); |
3162 | ASSERT(na_get_nrings(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_rings.nb_max); |
3163 | ASSERT(na_get_nslots(na, NR_TX) <= NX_DOM(nx)->nxdom_tx_slots.nb_max); |
3164 | ASSERT(na_get_nslots(na, NR_RX) <= NX_DOM(nx)->nxdom_rx_slots.nb_max); |
3165 | |
3166 | na->na_txsync = na_pseudo_txsync; |
3167 | na->na_rxsync = na_pseudo_rxsync; |
3168 | na->na_activate = na_pseudo_activate; |
3169 | na->na_dtor = na_pseudo_dtor; |
3170 | na->na_krings_create = na_pseudo_krings_create; |
3171 | na->na_krings_delete = na_pseudo_krings_delete; |
3172 | |
3173 | *(nexus_stats_type_t *)(uintptr_t)&na->na_stats_type = |
3174 | NEXUS_STATS_TYPE_INVALID; |
3175 | |
3176 | /* other fields are set in the common routine */ |
3177 | na_attach_common(na, nx, NX_DOM_PROV(nx)); |
3178 | |
3179 | if ((error = NX_DOM_PROV(nx)->nxdom_prov_mem_new(NX_DOM_PROV(nx), |
3180 | nx, na)) != 0) { |
3181 | ASSERT(na->na_arena == NULL); |
3182 | goto err; |
3183 | } |
3184 | ASSERT(na->na_arena != NULL); |
3185 | |
3186 | *(uint32_t *)(uintptr_t)&na->na_flowadv_max = nxp->nxp_flowadv_max; |
3187 | ASSERT(na->na_flowadv_max == 0 || |
3188 | skmem_arena_nexus(na->na_arena)->arn_flowadv_obj != NULL); |
3189 | |
3190 | #if SK_LOG |
3191 | uuid_string_t uuidstr; |
3192 | SK_D("na_name: \"%s\"" , na->na_name); |
3193 | SK_D(" UUID: %s" , sk_uuid_unparse(na->na_uuid, uuidstr)); |
3194 | SK_D(" nx: 0x%llx (\"%s\":\"%s\")" , |
3195 | SK_KVA(na->na_nx), NX_DOM(na->na_nx)->nxdom_name, |
3196 | NX_DOM_PROV(na->na_nx)->nxdom_prov_name); |
3197 | SK_D(" flags: %b" , na->na_flags, NAF_BITS); |
3198 | SK_D(" flowadv_max: %u" , na->na_flowadv_max); |
3199 | SK_D(" rings: tx %u rx %u" , |
3200 | na_get_nrings(na, NR_TX), na_get_nrings(na, NR_RX)); |
3201 | SK_D(" slots: tx %u rx %u" , |
3202 | na_get_nslots(na, NR_TX), na_get_nslots(na, NR_RX)); |
3203 | #if CONFIG_NEXUS_USER_PIPE |
3204 | SK_D(" next_pipe: %u" , na->na_next_pipe); |
3205 | SK_D(" max_pipes: %u" , na->na_max_pipes); |
3206 | #endif /* CONFIG_NEXUS_USER_PIPE */ |
3207 | #endif /* SK_LOG */ |
3208 | |
3209 | *ret = na; |
3210 | na_retain_locked(na); |
3211 | |
3212 | return 0; |
3213 | |
3214 | err: |
3215 | if (na != NULL) { |
3216 | if (na->na_arena != NULL) { |
3217 | skmem_arena_release(na->na_arena); |
3218 | na->na_arena = NULL; |
3219 | } |
3220 | NA_FREE(na); |
3221 | } |
3222 | return error; |
3223 | } |
3224 | |
3225 | void |
3226 | na_flowadv_entry_alloc(const struct nexus_adapter *na, uuid_t fae_id, |
3227 | const flowadv_idx_t fe_idx, const uint32_t flowid) |
3228 | { |
3229 | struct skmem_arena *ar = na->na_arena; |
3230 | struct skmem_arena_nexus *arn = skmem_arena_nexus(ar: na->na_arena); |
3231 | struct __flowadv_entry *fae; |
3232 | |
3233 | ASSERT(NA_IS_ACTIVE(na) && na->na_flowadv_max != 0); |
3234 | ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS); |
3235 | |
3236 | AR_LOCK(ar); |
3237 | |
3238 | /* we must not get here if arena is defunct; this must be valid */ |
3239 | ASSERT(arn->arn_flowadv_obj != NULL); |
3240 | |
3241 | VERIFY(fe_idx < na->na_flowadv_max); |
3242 | fae = &arn->arn_flowadv_obj[fe_idx]; |
3243 | uuid_copy(dst: fae->fae_id, src: fae_id); |
3244 | fae->fae_flowid = flowid; |
3245 | fae->fae_flags = FLOWADVF_VALID; |
3246 | |
3247 | AR_UNLOCK(ar); |
3248 | } |
3249 | |
3250 | void |
3251 | na_flowadv_entry_free(const struct nexus_adapter *na, uuid_t fae_id, |
3252 | const flowadv_idx_t fe_idx, const uint32_t flowid) |
3253 | { |
3254 | #pragma unused(fae_id) |
3255 | struct skmem_arena *ar = na->na_arena; |
3256 | struct skmem_arena_nexus *arn = skmem_arena_nexus(ar); |
3257 | |
3258 | ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0)); |
3259 | ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS); |
3260 | |
3261 | AR_LOCK(ar); |
3262 | |
3263 | ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT)); |
3264 | if (arn->arn_flowadv_obj != NULL) { |
3265 | struct __flowadv_entry *fae; |
3266 | |
3267 | VERIFY(fe_idx < na->na_flowadv_max); |
3268 | fae = &arn->arn_flowadv_obj[fe_idx]; |
3269 | ASSERT(uuid_compare(fae->fae_id, fae_id) == 0); |
3270 | uuid_clear(uu: fae->fae_id); |
3271 | VERIFY(fae->fae_flowid == flowid); |
3272 | fae->fae_flowid = 0; |
3273 | fae->fae_flags = 0; |
3274 | } |
3275 | |
3276 | AR_UNLOCK(ar); |
3277 | } |
3278 | |
3279 | bool |
3280 | na_flowadv_set(const struct nexus_adapter *na, const flowadv_idx_t fe_idx, |
3281 | const flowadv_token_t flow_token) |
3282 | { |
3283 | struct skmem_arena *ar = na->na_arena; |
3284 | struct skmem_arena_nexus *arn = skmem_arena_nexus(ar); |
3285 | bool suspend; |
3286 | |
3287 | ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0)); |
3288 | ASSERT(fe_idx < na->na_flowadv_max); |
3289 | ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS); |
3290 | |
3291 | AR_LOCK(ar); |
3292 | |
3293 | ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT)); |
3294 | |
3295 | if (arn->arn_flowadv_obj != NULL) { |
3296 | struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx]; |
3297 | |
3298 | _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token)); |
3299 | /* |
3300 | * We cannot guarantee that the flow is still around by now, |
3301 | * so check if that's the case and let the caller know. |
3302 | */ |
3303 | if ((suspend = (fae->fae_token == flow_token))) { |
3304 | ASSERT(fae->fae_flags & FLOWADVF_VALID); |
3305 | fae->fae_flags |= FLOWADVF_SUSPENDED; |
3306 | } |
3307 | } else { |
3308 | suspend = false; |
3309 | } |
3310 | if (suspend) { |
3311 | SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d) flow token 0x%llu fidx %u " |
3312 | "SUSPEND" , sk_proc_name_address(current_proc()), |
3313 | sk_proc_pid(current_proc()), flow_token, fe_idx); |
3314 | } else { |
3315 | SK_ERR("%s(%d) flow token 0x%llu fidx %u no longer around" , |
3316 | sk_proc_name_address(current_proc()), |
3317 | sk_proc_pid(current_proc()), flow_token, fe_idx); |
3318 | } |
3319 | |
3320 | AR_UNLOCK(ar); |
3321 | |
3322 | return suspend; |
3323 | } |
3324 | |
3325 | int |
3326 | na_flowadv_clear(const struct kern_channel *ch, const flowadv_idx_t fe_idx, |
3327 | const flowadv_token_t flow_token) |
3328 | { |
3329 | struct nexus_adapter *na = ch->ch_na; |
3330 | struct skmem_arena *ar = na->na_arena; |
3331 | struct skmem_arena_nexus *arn = skmem_arena_nexus(ar); |
3332 | boolean_t resume; |
3333 | |
3334 | ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0)); |
3335 | ASSERT(fe_idx < na->na_flowadv_max); |
3336 | ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS); |
3337 | |
3338 | AR_LOCK(ar); |
3339 | |
3340 | ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT)); |
3341 | |
3342 | if (arn->arn_flowadv_obj != NULL) { |
3343 | struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx]; |
3344 | |
3345 | _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token)); |
3346 | /* |
3347 | * We cannot guarantee that the flow is still around by now, |
3348 | * so check if that's the case and let the caller know. |
3349 | */ |
3350 | if ((resume = (fae->fae_token == flow_token))) { |
3351 | ASSERT(fae->fae_flags & FLOWADVF_VALID); |
3352 | fae->fae_flags &= ~FLOWADVF_SUSPENDED; |
3353 | } |
3354 | } else { |
3355 | resume = FALSE; |
3356 | } |
3357 | if (resume) { |
3358 | SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d): flow token 0x%x " |
3359 | "fidx %u RESUME" , ch->ch_name, ch->ch_pid, flow_token, |
3360 | fe_idx); |
3361 | } else { |
3362 | SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around" , |
3363 | ch->ch_name, ch->ch_pid, flow_token, fe_idx); |
3364 | } |
3365 | |
3366 | AR_UNLOCK(ar); |
3367 | |
3368 | return resume; |
3369 | } |
3370 | |
3371 | int |
3372 | na_flowadv_report_ce_event(const struct kern_channel *ch, const flowadv_idx_t fe_idx, |
3373 | const flowadv_token_t flow_token, uint32_t ce_cnt, uint32_t total_pkt_cnt) |
3374 | { |
3375 | struct nexus_adapter *na = ch->ch_na; |
3376 | struct skmem_arena *ar = na->na_arena; |
3377 | struct skmem_arena_nexus *arn = skmem_arena_nexus(ar); |
3378 | boolean_t added; |
3379 | |
3380 | ASSERT(NA_IS_ACTIVE(na) && (na->na_flowadv_max != 0)); |
3381 | ASSERT(fe_idx < na->na_flowadv_max); |
3382 | ASSERT(ar->ar_type == SKMEM_ARENA_TYPE_NEXUS); |
3383 | |
3384 | AR_LOCK(ar); |
3385 | |
3386 | ASSERT(arn->arn_flowadv_obj != NULL || (ar->ar_flags & ARF_DEFUNCT)); |
3387 | |
3388 | if (arn->arn_flowadv_obj != NULL) { |
3389 | struct __flowadv_entry *fae = &arn->arn_flowadv_obj[fe_idx]; |
3390 | |
3391 | _CASSERT(sizeof(fae->fae_token) == sizeof(flow_token)); |
3392 | /* |
3393 | * We cannot guarantee that the flow is still around by now, |
3394 | * so check if that's the case and let the caller know. |
3395 | */ |
3396 | if ((added = (fae->fae_token == flow_token))) { |
3397 | ASSERT(fae->fae_flags & FLOWADVF_VALID); |
3398 | fae->fae_ce_cnt += ce_cnt; |
3399 | fae->fae_pkt_cnt += total_pkt_cnt; |
3400 | } |
3401 | } else { |
3402 | added = FALSE; |
3403 | } |
3404 | if (added) { |
3405 | SK_DF(SK_VERB_FLOW_ADVISORY, "%s(%d): flow token 0x%x " |
3406 | "fidx %u ce cnt incremented" , ch->ch_name, |
3407 | ch->ch_pid, flow_token, fe_idx); |
3408 | } else { |
3409 | SK_ERR("%s(%d): flow token 0x%x fidx %u no longer around" , |
3410 | ch->ch_name, ch->ch_pid, flow_token, fe_idx); |
3411 | } |
3412 | |
3413 | AR_UNLOCK(ar); |
3414 | |
3415 | return added; |
3416 | } |
3417 | |
3418 | void |
3419 | na_flowadv_event(struct __kern_channel_ring *kring) |
3420 | { |
3421 | ASSERT(kring->ckr_tx == NR_TX); |
3422 | |
3423 | SK_DF(SK_VERB_EVENTS, "%s(%d) na \"%s\" (0x%llx) kr 0x%llx" , |
3424 | sk_proc_name_address(current_proc()), sk_proc_pid(current_proc()), |
3425 | KRNA(kring)->na_name, SK_KVA(KRNA(kring)), SK_KVA(kring)); |
3426 | |
3427 | na_post_event(kring, TRUE, FALSE, FALSE, CHAN_FILT_HINT_FLOW_ADV_UPD); |
3428 | } |
3429 | |
3430 | static int |
3431 | na_packet_pool_free_sync(struct __kern_channel_ring *kring, struct proc *p, |
3432 | uint32_t flags) |
3433 | { |
3434 | #pragma unused(flags, p) |
3435 | int n, ret = 0; |
3436 | slot_idx_t j; |
3437 | struct __kern_slot_desc *ksd; |
3438 | struct __user_slot_desc *usd; |
3439 | struct __kern_quantum *kqum; |
3440 | struct kern_pbufpool *pp = kring->ckr_pp; |
3441 | uint32_t nfree = 0; |
3442 | |
3443 | /* packet pool list is protected by channel lock */ |
3444 | ASSERT(!KR_KERNEL_ONLY(kring)); |
3445 | |
3446 | /* # of new slots */ |
3447 | n = kring->ckr_rhead - kring->ckr_khead; |
3448 | if (n < 0) { |
3449 | n += kring->ckr_num_slots; |
3450 | } |
3451 | |
3452 | /* nothing to free */ |
3453 | if (__improbable(n == 0)) { |
3454 | SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s" , |
3455 | sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name, |
3456 | "nothing to free" ); |
3457 | goto done; |
3458 | } |
3459 | |
3460 | j = kring->ckr_khead; |
3461 | PP_LOCK(pp); |
3462 | while (n--) { |
3463 | int err; |
3464 | |
3465 | ksd = KR_KSD(kring, j); |
3466 | usd = KR_USD(kring, j); |
3467 | |
3468 | if (__improbable(!SD_VALID_METADATA(usd))) { |
3469 | SK_ERR("bad slot %d 0x%llx" , j, SK_KVA(ksd)); |
3470 | ret = EINVAL; |
3471 | break; |
3472 | } |
3473 | |
3474 | kqum = pp_remove_upp_locked(pp, usd->sd_md_idx, &err); |
3475 | if (__improbable(err != 0)) { |
3476 | SK_ERR("un-allocated packet or buflet %d %p" , |
3477 | usd->sd_md_idx, SK_KVA(kqum)); |
3478 | ret = EINVAL; |
3479 | break; |
3480 | } |
3481 | |
3482 | /* detach and free the packet */ |
3483 | kqum->qum_qflags &= ~QUM_F_FINALIZED; |
3484 | kqum->qum_ksd = NULL; |
3485 | ASSERT(!KSD_VALID_METADATA(ksd)); |
3486 | USD_DETACH_METADATA(usd); |
3487 | ASSERT(pp == kqum->qum_pp); |
3488 | ASSERT(nfree < kring->ckr_num_slots); |
3489 | kring->ckr_scratch[nfree++] = (uint64_t)kqum; |
3490 | j = SLOT_NEXT(i: j, lim: kring->ckr_lim); |
3491 | } |
3492 | PP_UNLOCK(pp); |
3493 | |
3494 | if (__probable(nfree > 0)) { |
3495 | pp_free_packet_batch(pp, &kring->ckr_scratch[0], nfree); |
3496 | } |
3497 | |
3498 | kring->ckr_khead = j; |
3499 | kring->ckr_ktail = SLOT_PREV(i: j, lim: kring->ckr_lim); |
3500 | |
3501 | done: |
3502 | return ret; |
3503 | } |
3504 | |
3505 | #define MAX_BUFLETS 64 |
3506 | static int |
3507 | alloc_packets(kern_pbufpool_t pp, uint64_t *buf_arr, bool large, uint32_t *ph_cnt) |
3508 | { |
3509 | int err; |
3510 | uint32_t need, need_orig, remain, alloced, i; |
3511 | uint64_t buflets[MAX_BUFLETS]; |
3512 | uint64_t *pkts; |
3513 | |
3514 | need_orig = *ph_cnt; |
3515 | err = kern_pbufpool_alloc_batch_nosleep(pbufpool: pp, bufcnt: large ? 0 : 1, array: buf_arr, size: ph_cnt); |
3516 | if (!large) { |
3517 | return err; |
3518 | } |
3519 | if (*ph_cnt == 0) { |
3520 | SK_ERR("failed to alloc %d packets for alloc ring: err %d" , |
3521 | need_orig, err); |
3522 | DTRACE_SKYWALK2(alloc__pkts__fail, uint32_t, need_orig, int, err); |
3523 | return err; |
3524 | } |
3525 | need = remain = *ph_cnt; |
3526 | alloced = 0; |
3527 | pkts = buf_arr; |
3528 | while (remain > 0) { |
3529 | uint32_t cnt, cnt_orig; |
3530 | |
3531 | cnt = MIN(remain, MAX_BUFLETS); |
3532 | cnt_orig = cnt; |
3533 | err = pp_alloc_buflet_batch(pp, array: buflets, size: &cnt, SKMEM_NOSLEEP, true); |
3534 | if (cnt == 0) { |
3535 | SK_ERR("failed to alloc %d buflets for alloc ring: " |
3536 | "remain %d, err %d" , cnt_orig, remain, err); |
3537 | DTRACE_SKYWALK3(alloc__bufs__fail, uint32_t, cnt_orig, |
3538 | uint32_t, remain, int, err); |
3539 | break; |
3540 | } |
3541 | for (i = 0; i < cnt; i++) { |
3542 | kern_packet_t ph = (kern_packet_t)pkts[i]; |
3543 | kern_buflet_t buf = (kern_buflet_t)buflets[i]; |
3544 | kern_buflet_t pbuf = kern_packet_get_next_buflet(ph, NULL); |
3545 | VERIFY(kern_packet_add_buflet(ph, pbuf, buf) == 0); |
3546 | buflets[i] = 0; |
3547 | } |
3548 | DTRACE_SKYWALK3(alloc__bufs, uint32_t, remain, uint32_t, cnt, |
3549 | uint32_t, cnt_orig); |
3550 | pkts += cnt; |
3551 | alloced += cnt; |
3552 | remain -= cnt; |
3553 | } |
3554 | /* free packets without attached buffers */ |
3555 | if (remain > 0) { |
3556 | DTRACE_SKYWALK1(remaining__pkts, uint32_t, remain); |
3557 | ASSERT(remain + alloced == need); |
3558 | pp_free_packet_batch(pp, pkts, remain); |
3559 | |
3560 | /* pp_free_packet_batch() should clear the pkts array */ |
3561 | for (i = 0; i < remain; i++) { |
3562 | ASSERT(pkts[i] == 0); |
3563 | } |
3564 | } |
3565 | *ph_cnt = alloced; |
3566 | if (*ph_cnt == 0) { |
3567 | err = ENOMEM; |
3568 | } else if (*ph_cnt < need_orig) { |
3569 | err = EAGAIN; |
3570 | } else { |
3571 | err = 0; |
3572 | } |
3573 | DTRACE_SKYWALK3(alloc__packets, uint32_t, need_orig, uint32_t, *ph_cnt, int, err); |
3574 | return err; |
3575 | } |
3576 | |
3577 | static int |
3578 | na_packet_pool_alloc_sync_common(struct __kern_channel_ring *kring, struct proc *p, |
3579 | uint32_t flags, bool large) |
3580 | { |
3581 | int b, err; |
3582 | uint32_t n = 0; |
3583 | slot_idx_t j; |
3584 | uint64_t now; |
3585 | uint32_t curr_ws, ph_needed, ph_cnt; |
3586 | struct __kern_slot_desc *ksd; |
3587 | struct __user_slot_desc *usd; |
3588 | struct __kern_quantum *kqum; |
3589 | kern_pbufpool_t pp = kring->ckr_pp; |
3590 | pid_t pid = proc_pid(p); |
3591 | |
3592 | /* packet pool list is protected by channel lock */ |
3593 | ASSERT(!KR_KERNEL_ONLY(kring)); |
3594 | ASSERT(!PP_KERNEL_ONLY(pp)); |
3595 | |
3596 | now = _net_uptime; |
3597 | if ((flags & NA_SYNCF_UPP_PURGE) != 0) { |
3598 | if (now - kring->ckr_sync_time >= na_upp_reap_interval) { |
3599 | kring->ckr_alloc_ws = na_upp_reap_min_pkts; |
3600 | } |
3601 | SK_DF(SK_VERB_MEM | SK_VERB_SYNC, |
3602 | "%s: purged curr_ws(%d)" , kring->ckr_name, |
3603 | kring->ckr_alloc_ws); |
3604 | return 0; |
3605 | } |
3606 | /* reclaim the completed slots */ |
3607 | kring->ckr_khead = kring->ckr_rhead; |
3608 | |
3609 | /* # of busy (unclaimed) slots */ |
3610 | b = kring->ckr_ktail - kring->ckr_khead; |
3611 | if (b < 0) { |
3612 | b += kring->ckr_num_slots; |
3613 | } |
3614 | |
3615 | curr_ws = kring->ckr_alloc_ws; |
3616 | if (flags & NA_SYNCF_FORCE_UPP_SYNC) { |
3617 | /* increment the working set by 50% */ |
3618 | curr_ws += (curr_ws >> 1); |
3619 | curr_ws = MIN(curr_ws, kring->ckr_lim); |
3620 | } else { |
3621 | if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) && |
3622 | (uint32_t)b >= (curr_ws >> 2)) { |
3623 | /* decrease the working set by 25% */ |
3624 | curr_ws -= (curr_ws >> 2); |
3625 | } |
3626 | } |
3627 | curr_ws = MAX(curr_ws, na_upp_alloc_lowat); |
3628 | if (curr_ws > (uint32_t)b) { |
3629 | n = curr_ws - b; |
3630 | } |
3631 | kring->ckr_alloc_ws = curr_ws; |
3632 | kring->ckr_sync_time = now; |
3633 | |
3634 | /* min with # of avail free slots (subtract busy from max) */ |
3635 | n = ph_needed = MIN(n, kring->ckr_lim - b); |
3636 | j = kring->ckr_ktail; |
3637 | SK_DF(SK_VERB_MEM | SK_VERB_SYNC, |
3638 | "%s: curr_ws(%d), n(%d)" , kring->ckr_name, curr_ws, n); |
3639 | |
3640 | if ((ph_cnt = ph_needed) == 0) { |
3641 | goto done; |
3642 | } |
3643 | |
3644 | err = alloc_packets(pp, buf_arr: kring->ckr_scratch, |
3645 | PP_HAS_BUFFER_ON_DEMAND(pp) && large, ph_cnt: &ph_cnt); |
3646 | if (__improbable(ph_cnt == 0)) { |
3647 | SK_ERR("kr 0x%llx failed to alloc %u packet s(%d)" , |
3648 | SK_KVA(kring), ph_needed, err); |
3649 | kring->ckr_err_stats.cres_pkt_alloc_failures += ph_needed; |
3650 | } else { |
3651 | /* |
3652 | * Add packets to the allocated list of user packet pool. |
3653 | */ |
3654 | pp_insert_upp_batch(pp, pid, array: kring->ckr_scratch, num: ph_cnt); |
3655 | } |
3656 | |
3657 | for (n = 0; n < ph_cnt; n++) { |
3658 | ksd = KR_KSD(kring, j); |
3659 | usd = KR_USD(kring, j); |
3660 | |
3661 | kqum = SK_PTR_ADDR_KQUM(kring->ckr_scratch[n]); |
3662 | kring->ckr_scratch[n] = 0; |
3663 | ASSERT(kqum != NULL); |
3664 | |
3665 | /* cleanup any stale slot mapping */ |
3666 | KSD_RESET(ksd); |
3667 | ASSERT(usd != NULL); |
3668 | USD_RESET(usd); |
3669 | |
3670 | /* |
3671 | * Since this packet is freshly allocated and we need to |
3672 | * have the flag set for the attach to succeed, just set |
3673 | * it here rather than calling __packet_finalize(). |
3674 | */ |
3675 | kqum->qum_qflags |= QUM_F_FINALIZED; |
3676 | |
3677 | /* Attach packet to slot */ |
3678 | KR_SLOT_ATTACH_METADATA(kring, ksd, kqum); |
3679 | /* |
3680 | * externalize the packet as it is being transferred to |
3681 | * user space. |
3682 | */ |
3683 | kr_externalize_metadata(kring, pp->pp_max_frags, kqum, p); |
3684 | |
3685 | j = SLOT_NEXT(i: j, lim: kring->ckr_lim); |
3686 | } |
3687 | done: |
3688 | ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail); |
3689 | kring->ckr_ktail = j; |
3690 | return 0; |
3691 | } |
3692 | |
3693 | static int |
3694 | na_packet_pool_alloc_sync(struct __kern_channel_ring *kring, struct proc *p, |
3695 | uint32_t flags) |
3696 | { |
3697 | return na_packet_pool_alloc_sync_common(kring, p, flags, false); |
3698 | } |
3699 | |
3700 | static int |
3701 | na_packet_pool_alloc_large_sync(struct __kern_channel_ring *kring, struct proc *p, |
3702 | uint32_t flags) |
3703 | { |
3704 | return na_packet_pool_alloc_sync_common(kring, p, flags, true); |
3705 | } |
3706 | |
3707 | static int |
3708 | na_packet_pool_free_buf_sync(struct __kern_channel_ring *kring, struct proc *p, |
3709 | uint32_t flags) |
3710 | { |
3711 | #pragma unused(flags, p) |
3712 | int n, ret = 0; |
3713 | slot_idx_t j; |
3714 | struct __kern_slot_desc *ksd; |
3715 | struct __user_slot_desc *usd; |
3716 | struct __kern_buflet *kbft; |
3717 | struct kern_pbufpool *pp = kring->ckr_pp; |
3718 | |
3719 | /* packet pool list is protected by channel lock */ |
3720 | ASSERT(!KR_KERNEL_ONLY(kring)); |
3721 | |
3722 | /* # of new slots */ |
3723 | n = kring->ckr_rhead - kring->ckr_khead; |
3724 | if (n < 0) { |
3725 | n += kring->ckr_num_slots; |
3726 | } |
3727 | |
3728 | /* nothing to free */ |
3729 | if (__improbable(n == 0)) { |
3730 | SK_DF(SK_VERB_MEM | SK_VERB_SYNC, "%s(%d) kr \"%s\" %s" , |
3731 | sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name, |
3732 | "nothing to free" ); |
3733 | goto done; |
3734 | } |
3735 | |
3736 | j = kring->ckr_khead; |
3737 | while (n--) { |
3738 | int err; |
3739 | |
3740 | ksd = KR_KSD(kring, j); |
3741 | usd = KR_USD(kring, j); |
3742 | |
3743 | if (__improbable(!SD_VALID_METADATA(usd))) { |
3744 | SK_ERR("bad slot %d 0x%llx" , j, SK_KVA(ksd)); |
3745 | ret = EINVAL; |
3746 | break; |
3747 | } |
3748 | |
3749 | kbft = pp_remove_upp_bft(pp, usd->sd_md_idx, &err); |
3750 | if (__improbable(err != 0)) { |
3751 | SK_ERR("un-allocated buflet %d %p" , usd->sd_md_idx, |
3752 | SK_KVA(kbft)); |
3753 | ret = EINVAL; |
3754 | break; |
3755 | } |
3756 | |
3757 | /* detach and free the packet */ |
3758 | ASSERT(!KSD_VALID_METADATA(ksd)); |
3759 | USD_DETACH_METADATA(usd); |
3760 | pp_free_buflet(pp, kbft); |
3761 | j = SLOT_NEXT(i: j, lim: kring->ckr_lim); |
3762 | } |
3763 | kring->ckr_khead = j; |
3764 | kring->ckr_ktail = SLOT_PREV(i: j, lim: kring->ckr_lim); |
3765 | |
3766 | done: |
3767 | return ret; |
3768 | } |
3769 | |
3770 | static int |
3771 | na_packet_pool_alloc_buf_sync(struct __kern_channel_ring *kring, struct proc *p, |
3772 | uint32_t flags) |
3773 | { |
3774 | int b, err; |
3775 | uint32_t n = 0; |
3776 | slot_idx_t j; |
3777 | uint64_t now; |
3778 | uint32_t curr_ws, bh_needed, bh_cnt; |
3779 | struct __kern_slot_desc *ksd; |
3780 | struct __user_slot_desc *usd; |
3781 | struct __kern_buflet *kbft; |
3782 | struct __kern_buflet_ext *kbe; |
3783 | kern_pbufpool_t pp = kring->ckr_pp; |
3784 | pid_t pid = proc_pid(p); |
3785 | |
3786 | /* packet pool list is protected by channel lock */ |
3787 | ASSERT(!KR_KERNEL_ONLY(kring)); |
3788 | ASSERT(!PP_KERNEL_ONLY(pp)); |
3789 | |
3790 | now = _net_uptime; |
3791 | if ((flags & NA_SYNCF_UPP_PURGE) != 0) { |
3792 | if (now - kring->ckr_sync_time >= na_upp_reap_interval) { |
3793 | kring->ckr_alloc_ws = na_upp_reap_min_pkts; |
3794 | } |
3795 | SK_DF(SK_VERB_MEM | SK_VERB_SYNC, |
3796 | "%s: purged curr_ws(%d)" , kring->ckr_name, |
3797 | kring->ckr_alloc_ws); |
3798 | return 0; |
3799 | } |
3800 | /* reclaim the completed slots */ |
3801 | kring->ckr_khead = kring->ckr_rhead; |
3802 | |
3803 | /* # of busy (unclaimed) slots */ |
3804 | b = kring->ckr_ktail - kring->ckr_khead; |
3805 | if (b < 0) { |
3806 | b += kring->ckr_num_slots; |
3807 | } |
3808 | |
3809 | curr_ws = kring->ckr_alloc_ws; |
3810 | if (flags & NA_SYNCF_FORCE_UPP_SYNC) { |
3811 | /* increment the working set by 50% */ |
3812 | curr_ws += (curr_ws >> 1); |
3813 | curr_ws = MIN(curr_ws, kring->ckr_lim); |
3814 | } else { |
3815 | if ((now - kring->ckr_sync_time >= na_upp_ws_hold_time) && |
3816 | (uint32_t)b >= (curr_ws >> 2)) { |
3817 | /* decrease the working set by 25% */ |
3818 | curr_ws -= (curr_ws >> 2); |
3819 | } |
3820 | } |
3821 | curr_ws = MAX(curr_ws, na_upp_alloc_buf_lowat); |
3822 | if (curr_ws > (uint32_t)b) { |
3823 | n = curr_ws - b; |
3824 | } |
3825 | kring->ckr_alloc_ws = curr_ws; |
3826 | kring->ckr_sync_time = now; |
3827 | |
3828 | /* min with # of avail free slots (subtract busy from max) */ |
3829 | n = bh_needed = MIN(n, kring->ckr_lim - b); |
3830 | j = kring->ckr_ktail; |
3831 | SK_DF(SK_VERB_MEM | SK_VERB_SYNC, |
3832 | "%s: curr_ws(%d), n(%d)" , kring->ckr_name, curr_ws, n); |
3833 | |
3834 | if ((bh_cnt = bh_needed) == 0) { |
3835 | goto done; |
3836 | } |
3837 | |
3838 | err = pp_alloc_buflet_batch(pp, array: kring->ckr_scratch, size: &bh_cnt, |
3839 | SKMEM_NOSLEEP, false); |
3840 | |
3841 | if (bh_cnt == 0) { |
3842 | SK_ERR("kr 0x%llx failed to alloc %u buflets(%d)" , |
3843 | SK_KVA(kring), bh_needed, err); |
3844 | kring->ckr_err_stats.cres_pkt_alloc_failures += bh_needed; |
3845 | } |
3846 | |
3847 | for (n = 0; n < bh_cnt; n++) { |
3848 | struct __user_buflet *ubft; |
3849 | |
3850 | ksd = KR_KSD(kring, j); |
3851 | usd = KR_USD(kring, j); |
3852 | |
3853 | kbft = (struct __kern_buflet *)(kring->ckr_scratch[n]); |
3854 | kbe = (struct __kern_buflet_ext *)kbft; |
3855 | kring->ckr_scratch[n] = 0; |
3856 | ASSERT(kbft != NULL); |
3857 | |
3858 | /* |
3859 | * Add buflet to the allocated list of user packet pool. |
3860 | */ |
3861 | pp_insert_upp_bft(pp, kbft, pid); |
3862 | |
3863 | /* |
3864 | * externalize the buflet as it is being transferred to |
3865 | * user space. |
3866 | */ |
3867 | ubft = __DECONST(struct __user_buflet *, kbe->kbe_buf_user); |
3868 | KBUF_EXTERNALIZE(kbft, ubft, pp); |
3869 | |
3870 | /* cleanup any stale slot mapping */ |
3871 | KSD_RESET(ksd); |
3872 | ASSERT(usd != NULL); |
3873 | USD_RESET(usd); |
3874 | |
3875 | /* Attach buflet to slot */ |
3876 | KR_SLOT_ATTACH_BUF_METADATA(kring, ksd, kbuf: kbft); |
3877 | |
3878 | j = SLOT_NEXT(i: j, lim: kring->ckr_lim); |
3879 | } |
3880 | done: |
3881 | ASSERT(j != kring->ckr_khead || j == kring->ckr_ktail); |
3882 | kring->ckr_ktail = j; |
3883 | return 0; |
3884 | } |
3885 | |
3886 | /* The caller needs to ensure that the NA stays intact */ |
3887 | void |
3888 | na_drain(struct nexus_adapter *na, boolean_t purge) |
3889 | { |
3890 | /* will be cleared on next channel sync */ |
3891 | if (!(os_atomic_or_orig(&na->na_flags, NAF_DRAINING, relaxed) & |
3892 | NAF_DRAINING) && NA_IS_ACTIVE(na)) { |
3893 | SK_DF(SK_VERB_NA, "%s: %s na 0x%llx flags %b" , |
3894 | na->na_name, (purge ? "purging" : "pruning" ), |
3895 | SK_KVA(na), na->na_flags, NAF_BITS); |
3896 | |
3897 | /* reap (purge/prune) caches in the arena */ |
3898 | skmem_arena_reap(na->na_arena, purge); |
3899 | } |
3900 | } |
3901 | |