1 | /* |
2 | * Copyright (c) 2015-2021 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | /* |
30 | * Copyright (C) 2014 Giuseppe Lettieri. All rights reserved. |
31 | * |
32 | * Redistribution and use in source and binary forms, with or without |
33 | * modification, are permitted provided that the following conditions |
34 | * are met: |
35 | * 1. Redistributions of source code must retain the above copyright |
36 | * notice, this list of conditions and the following disclaimer. |
37 | * 2. Redistributions in binary form must reproduce the above copyright |
38 | * notice, this list of conditions and the following disclaimer in the |
39 | * documentation and/or other materials provided with the distribution. |
40 | * |
41 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
42 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
43 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
44 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
45 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
46 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
47 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
48 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
49 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
50 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
51 | * SUCH DAMAGE. |
52 | */ |
53 | |
54 | /* |
55 | * $FreeBSD$ |
56 | * |
57 | * Monitors |
58 | * |
59 | * netmap monitors can be used to do monitoring of network traffic |
60 | * on another adapter, when the latter adapter is working in netmap mode. |
61 | * |
62 | * Monitors offer to userspace the same interface as any other netmap port, |
63 | * with as many pairs of netmap rings as the monitored adapter. |
64 | * However, only the rx rings are actually used. Each monitor rx ring receives |
65 | * the traffic transiting on both the tx and rx corresponding rings in the |
66 | * monitored adapter. During registration, the user can choose if she wants |
67 | * to intercept tx only, rx only, or both tx and rx traffic. |
68 | * |
69 | * If the monitor is not able to cope with the stream of frames, excess traffic |
70 | * will be dropped. |
71 | * |
72 | * If the monitored adapter leaves netmap mode, the monitor has to be restarted. |
73 | * |
74 | * Monitors can be either zero-copy or copy-based. |
75 | * |
76 | * Copy monitors see the frames before they are consumed: |
77 | * |
78 | * - For tx traffic, this is when the application sends them, before they are |
79 | * passed down to the adapter. |
80 | * |
81 | * - For rx traffic, this is when they are received by the adapter, before |
82 | * they are sent up to the application, if any (note that, if no |
83 | * application is reading from a monitored ring, the ring will eventually |
84 | * fill up and traffic will stop). |
85 | * |
86 | * Zero-copy monitors only see the frames after they have been consumed: |
87 | * |
88 | * - For tx traffic, this is after the slots containing the frames have been |
89 | * marked as free. Note that this may happen at a considerably delay after |
90 | * frame transmission, since freeing of slots is often done lazily. |
91 | * |
92 | * - For rx traffic, this is after the consumer on the monitored adapter |
93 | * has released them. In most cases, the consumer is a userspace |
94 | * application which may have modified the frame contents. |
95 | * |
96 | * Several copy monitors may be active on any ring. Zero-copy monitors, |
97 | * instead, need exclusive access to each of the monitored rings. This may |
98 | * change in the future, if we implement zero-copy monitor chaining. |
99 | * |
100 | */ |
101 | |
102 | #include <skywalk/os_skywalk_private.h> |
103 | #include <skywalk/nexus/monitor/nx_monitor.h> |
104 | |
105 | static int nx_mon_na_txsync(struct __kern_channel_ring *, struct proc *, |
106 | uint32_t); |
107 | static int nx_mon_na_rxsync(struct __kern_channel_ring *, struct proc *, |
108 | uint32_t); |
109 | static int nx_mon_na_krings_create(struct nexus_adapter *, |
110 | struct kern_channel *); |
111 | static void nx_mon_na_krings_delete(struct nexus_adapter *, |
112 | struct kern_channel *, boolean_t); |
113 | static uint32_t nx_mon_txrx2chmode(enum txrx); |
114 | static int nx_mon_kr_alloc(struct __kern_channel_ring *, uint32_t); |
115 | static void nx_mon_kr_dealloc(struct __kern_channel_ring *); |
116 | static int nx_mon_na_krings_locks(struct nexus_adapter *, |
117 | uint32_t[NR_TXRX], uint32_t[NR_TXRX]); |
118 | static void nx_mon_na_krings_unlock(struct nexus_adapter *, |
119 | const uint32_t[NR_TXRX], const uint32_t[NR_TXRX]); |
120 | static int nx_mon_enable(struct nexus_adapter *, int); |
121 | static void nx_mon_disable(struct nexus_adapter *); |
122 | static int nx_mon_add(struct __kern_channel_ring *, |
123 | struct __kern_channel_ring *, boolean_t); |
124 | static void nx_mon_del(struct __kern_channel_ring *, |
125 | struct __kern_channel_ring *, boolean_t); |
126 | static int nx_mon_na_activate_common(struct nexus_adapter *, |
127 | na_activate_mode_t, boolean_t); |
128 | static pkt_copy_from_pkt_t nx_mon_quantum_copy_64x; |
129 | |
130 | static int nx_mon_zcopy_parent_sync(struct __kern_channel_ring *, |
131 | struct proc *, uint32_t, enum txrx); |
132 | static int nx_mon_zcopy_na_activate(struct nexus_adapter *, na_activate_mode_t); |
133 | static void nx_mon_zcopy_na_dtor(struct nexus_adapter *); |
134 | |
135 | static void nx_mon_parent_sync(struct __kern_channel_ring *, struct proc *, |
136 | slot_idx_t, int); |
137 | static int nx_mon_na_activate(struct nexus_adapter *, na_activate_mode_t); |
138 | static void nx_mon_na_dtor(struct nexus_adapter *); |
139 | |
140 | /* |
141 | * monitors work by replacing the nm_sync() and possibly the |
142 | * nm_notify() callbacks in the monitored rings. |
143 | */ |
144 | static int nx_mon_zcopy_parent_txsync(struct __kern_channel_ring *, |
145 | struct proc *, uint32_t); |
146 | static int nx_mon_zcopy_parent_rxsync(struct __kern_channel_ring *, |
147 | struct proc *, uint32_t); |
148 | static int nx_mon_parent_txsync(struct __kern_channel_ring *, |
149 | struct proc *, uint32_t); |
150 | static int nx_mon_parent_rxsync(struct __kern_channel_ring *, |
151 | struct proc *, uint32_t); |
152 | static int nx_mon_parent_notify(struct __kern_channel_ring *, |
153 | struct proc *, uint32_t); |
154 | |
155 | static void nx_mon_dom_init(struct nxdom *); |
156 | static void nx_mon_dom_terminate(struct nxdom *); |
157 | static void nx_mon_dom_fini(struct nxdom *); |
158 | static int nx_mon_dom_bind_port(struct kern_nexus *, nexus_port_t *, |
159 | struct nxbind *, void *); |
160 | static int nx_mon_dom_unbind_port(struct kern_nexus *, nexus_port_t); |
161 | static int nx_mon_dom_connect(struct kern_nexus_domain_provider *, |
162 | struct kern_nexus *, struct kern_channel *, struct chreq *, |
163 | struct kern_channel *, struct nxbind *, struct proc *); |
164 | static void nx_mon_dom_disconnect(struct kern_nexus_domain_provider *, |
165 | struct kern_nexus *, struct kern_channel *); |
166 | static void nx_mon_dom_defunct(struct kern_nexus_domain_provider *, |
167 | struct kern_nexus *, struct kern_channel *, struct proc *); |
168 | static void nx_mon_dom_defunct_finalize(struct kern_nexus_domain_provider *, |
169 | struct kern_nexus *, struct kern_channel *, boolean_t); |
170 | |
171 | static int nx_mon_prov_init(struct kern_nexus_domain_provider *); |
172 | static int nx_mon_prov_params_adjust(const struct kern_nexus_domain_provider *, |
173 | const struct nxprov_params *, struct nxprov_adjusted_params *); |
174 | static int nx_mon_prov_params(struct kern_nexus_domain_provider *, |
175 | const uint32_t, const struct nxprov_params *, struct nxprov_params *, |
176 | struct skmem_region_params[SKMEM_REGIONS], uint32_t); |
177 | static int nx_mon_prov_mem_new(struct kern_nexus_domain_provider *, |
178 | struct kern_nexus *, struct nexus_adapter *); |
179 | static void nx_mon_prov_fini(struct kern_nexus_domain_provider *); |
180 | |
181 | static struct nexus_monitor_adapter *na_mon_alloc(zalloc_flags_t); |
182 | static void na_mon_free(struct nexus_adapter *); |
183 | |
184 | struct nxdom nx_monitor_dom_s = { |
185 | .nxdom_prov_head = |
186 | STAILQ_HEAD_INITIALIZER(nx_monitor_dom_s.nxdom_prov_head), |
187 | .nxdom_type = NEXUS_TYPE_MONITOR, |
188 | .nxdom_md_type = NEXUS_META_TYPE_QUANTUM, |
189 | .nxdom_md_subtype = NEXUS_META_SUBTYPE_PAYLOAD, |
190 | .nxdom_name = "monitor" , |
191 | /* |
192 | * The following values don't really matter much, as a monitor |
193 | * isn't usable on its own; we just define them as non-zeroes. |
194 | */ |
195 | .nxdom_ports = { |
196 | .nb_def = 1, |
197 | .nb_min = 1, |
198 | .nb_max = 1, |
199 | }, |
200 | .nxdom_tx_rings = { |
201 | .nb_def = 1, |
202 | .nb_min = 1, |
203 | .nb_max = 1, |
204 | }, |
205 | .nxdom_rx_rings = { |
206 | .nb_def = 1, |
207 | .nb_min = 1, |
208 | .nb_max = 1, |
209 | }, |
210 | .nxdom_tx_slots = { |
211 | .nb_def = 1, |
212 | .nb_min = 1, |
213 | .nb_max = 1, |
214 | }, |
215 | .nxdom_rx_slots = { |
216 | .nb_def = 1, |
217 | .nb_min = 1, |
218 | .nb_max = 1, |
219 | }, |
220 | .nxdom_buf_size = { |
221 | .nb_def = 64, |
222 | .nb_min = 64, |
223 | .nb_max = 64, |
224 | }, |
225 | .nxdom_large_buf_size = { |
226 | .nb_def = 0, |
227 | .nb_min = 0, |
228 | .nb_max = 0, |
229 | }, |
230 | .nxdom_meta_size = { |
231 | .nb_def = NX_METADATA_OBJ_MIN_SZ, |
232 | .nb_min = NX_METADATA_OBJ_MIN_SZ, |
233 | .nb_max = NX_METADATA_USR_MAX_SZ, |
234 | }, |
235 | .nxdom_stats_size = { |
236 | .nb_def = 0, |
237 | .nb_min = 0, |
238 | .nb_max = NX_STATS_MAX_SZ, |
239 | }, |
240 | .nxdom_pipes = { |
241 | .nb_def = 0, |
242 | .nb_min = 0, |
243 | .nb_max = 0, |
244 | }, |
245 | .nxdom_flowadv_max = { |
246 | .nb_def = 0, |
247 | .nb_min = 0, |
248 | .nb_max = NX_FLOWADV_MAX, |
249 | }, |
250 | .nxdom_nexusadv_size = { |
251 | .nb_def = 0, |
252 | .nb_min = 0, |
253 | .nb_max = NX_NEXUSADV_MAX_SZ, |
254 | }, |
255 | .nxdom_capabilities = { |
256 | .nb_def = NXPCAP_USER_CHANNEL, |
257 | .nb_min = NXPCAP_USER_CHANNEL, |
258 | .nb_max = NXPCAP_USER_CHANNEL, |
259 | }, |
260 | .nxdom_qmap = { |
261 | .nb_def = NEXUS_QMAP_TYPE_INVALID, |
262 | .nb_min = NEXUS_QMAP_TYPE_INVALID, |
263 | .nb_max = NEXUS_QMAP_TYPE_INVALID, |
264 | }, |
265 | .nxdom_max_frags = { |
266 | .nb_def = NX_PBUF_FRAGS_DEFAULT, |
267 | .nb_min = NX_PBUF_FRAGS_MIN, |
268 | .nb_max = NX_PBUF_FRAGS_DEFAULT, |
269 | }, |
270 | .nxdom_init = nx_mon_dom_init, |
271 | .nxdom_terminate = nx_mon_dom_terminate, |
272 | .nxdom_fini = nx_mon_dom_fini, |
273 | .nxdom_find_port = NULL, |
274 | .nxdom_port_is_reserved = NULL, |
275 | .nxdom_bind_port = nx_mon_dom_bind_port, |
276 | .nxdom_unbind_port = nx_mon_dom_unbind_port, |
277 | .nxdom_connect = nx_mon_dom_connect, |
278 | .nxdom_disconnect = nx_mon_dom_disconnect, |
279 | .nxdom_defunct = nx_mon_dom_defunct, |
280 | .nxdom_defunct_finalize = nx_mon_dom_defunct_finalize, |
281 | }; |
282 | |
283 | static struct kern_nexus_domain_provider nx_monitor_prov_s = { |
284 | .nxdom_prov_name = NEXUS_PROVIDER_MONITOR, |
285 | .nxdom_prov_flags = NXDOMPROVF_DEFAULT, |
286 | .nxdom_prov_cb = { |
287 | .dp_cb_init = nx_mon_prov_init, |
288 | .dp_cb_fini = nx_mon_prov_fini, |
289 | .dp_cb_params = nx_mon_prov_params, |
290 | .dp_cb_mem_new = nx_mon_prov_mem_new, |
291 | .dp_cb_config = NULL, |
292 | .dp_cb_nx_ctor = NULL, |
293 | .dp_cb_nx_dtor = NULL, |
294 | .dp_cb_nx_mem_info = NULL, /* not supported */ |
295 | .dp_cb_nx_mib_get = NULL, |
296 | }, |
297 | }; |
298 | |
299 | static SKMEM_TYPE_DEFINE(na_mon_zone, struct nexus_monitor_adapter); |
300 | |
301 | #define SKMEM_TAG_MONITORS "com.apple.skywalk.monitors" |
302 | static SKMEM_TAG_DEFINE(skmem_tag_monitors, SKMEM_TAG_MONITORS); |
303 | |
304 | static void |
305 | nx_mon_dom_init(struct nxdom *nxdom) |
306 | { |
307 | SK_LOCK_ASSERT_HELD(); |
308 | ASSERT(!(nxdom->nxdom_flags & NEXUSDOMF_INITIALIZED)); |
309 | |
310 | (void) nxdom_prov_add(nxdom, &nx_monitor_prov_s); |
311 | } |
312 | |
313 | static void |
314 | nx_mon_dom_terminate(struct nxdom *nxdom) |
315 | { |
316 | struct kern_nexus_domain_provider *nxdom_prov, *tnxdp; |
317 | |
318 | STAILQ_FOREACH_SAFE(nxdom_prov, &nxdom->nxdom_prov_head, |
319 | nxdom_prov_link, tnxdp) { |
320 | (void) nxdom_prov_del(nxdom_prov); |
321 | } |
322 | } |
323 | |
324 | static void |
325 | nx_mon_dom_fini(struct nxdom *nxdom) |
326 | { |
327 | #pragma unused(nxdom) |
328 | } |
329 | |
330 | __attribute__((noreturn)) |
331 | static int |
332 | nx_mon_dom_bind_port(struct kern_nexus *nx, nexus_port_t *nx_port, |
333 | struct nxbind *nxb, void *info) |
334 | { |
335 | #pragma unused(nx, nx_port, nxb, info) |
336 | VERIFY(0); |
337 | /* NOTREACHED */ |
338 | __builtin_unreachable(); |
339 | } |
340 | |
341 | __attribute__((noreturn)) |
342 | static int |
343 | nx_mon_dom_unbind_port(struct kern_nexus *nx, nexus_port_t nx_port) |
344 | { |
345 | #pragma unused(nx, nx_port) |
346 | VERIFY(0); |
347 | /* NOTREACHED */ |
348 | __builtin_unreachable(); |
349 | } |
350 | |
351 | __attribute__((noreturn)) |
352 | static int |
353 | nx_mon_dom_connect(struct kern_nexus_domain_provider *nxdom_prov, |
354 | struct kern_nexus *nx, struct kern_channel *ch, struct chreq *chr, |
355 | struct kern_channel *ch0, struct nxbind *nxb, struct proc *p) |
356 | { |
357 | #pragma unused(nxdom_prov, nx, ch, chr, ch0, nxb, p) |
358 | VERIFY(0); |
359 | /* NOTREACHED */ |
360 | __builtin_unreachable(); |
361 | } |
362 | |
363 | __attribute__((noreturn)) |
364 | static void |
365 | nx_mon_dom_disconnect(struct kern_nexus_domain_provider *nxdom_prov, |
366 | struct kern_nexus *nx, struct kern_channel *ch) |
367 | { |
368 | #pragma unused(nxdom_prov, nx, ch) |
369 | VERIFY(0); |
370 | /* NOTREACHED */ |
371 | __builtin_unreachable(); |
372 | } |
373 | |
374 | static void |
375 | nx_mon_dom_defunct(struct kern_nexus_domain_provider *nxdom_prov, |
376 | struct kern_nexus *nx, struct kern_channel *ch, struct proc *p) |
377 | { |
378 | #pragma unused(nxdom_prov, nx, ch, p) |
379 | } |
380 | |
381 | static void |
382 | nx_mon_dom_defunct_finalize(struct kern_nexus_domain_provider *nxdom_prov, |
383 | struct kern_nexus *nx, struct kern_channel *ch, boolean_t locked) |
384 | { |
385 | #pragma unused(nxdom_prov, nx, ch, locked) |
386 | } |
387 | |
388 | static int |
389 | nx_mon_prov_init(struct kern_nexus_domain_provider *nxdom_prov) |
390 | { |
391 | #pragma unused(nxdom_prov) |
392 | SK_D("initializing %s" , nxdom_prov->nxdom_prov_name); |
393 | return 0; |
394 | } |
395 | |
396 | static int |
397 | nx_mon_prov_params_adjust(const struct kern_nexus_domain_provider *nxdom_prov, |
398 | const struct nxprov_params *nxp, struct nxprov_adjusted_params *adj) |
399 | { |
400 | #pragma unused(nxdom_prov, nxp, adj) |
401 | |
402 | return 0; |
403 | } |
404 | |
405 | static int |
406 | nx_mon_prov_params(struct kern_nexus_domain_provider *nxdom_prov, |
407 | const uint32_t req, const struct nxprov_params *nxp0, |
408 | struct nxprov_params *nxp, struct skmem_region_params srp[SKMEM_REGIONS], |
409 | uint32_t pp_region_config_flags) |
410 | { |
411 | struct nxdom *nxdom = nxdom_prov->nxdom_prov_dom; |
412 | |
413 | return nxprov_params_adjust(nxdom_prov, req, nxp0, nxp, srp, |
414 | nxdom, nxdom, nxdom, pp_region_config_flags, |
415 | adjust_fn: nx_mon_prov_params_adjust); |
416 | } |
417 | |
418 | static int |
419 | nx_mon_prov_mem_new(struct kern_nexus_domain_provider *nxdom_prov, |
420 | struct kern_nexus *nx, struct nexus_adapter *na) |
421 | { |
422 | #pragma unused(nxdom_prov) |
423 | int err = 0; |
424 | |
425 | SK_DF(SK_VERB_MONITOR, |
426 | "nx 0x%llx (\"%s\":\"%s\") na \"%s\" (0x%llx)" , SK_KVA(nx), |
427 | NX_DOM(nx)->nxdom_name, nxdom_prov->nxdom_prov_name, na->na_name, |
428 | SK_KVA(na)); |
429 | |
430 | ASSERT(na->na_arena == NULL); |
431 | ASSERT(NX_USER_CHANNEL_PROV(nx)); |
432 | /* |
433 | * The underlying nexus adapter uses the same memory allocator |
434 | * as the monitored adapter; don't store the pp in the nexus. |
435 | * |
436 | * This means that clients calling kern_nexus_get_pbufpool() |
437 | * will get NULL, but this is fine since we don't expose the |
438 | * monitor to external kernel clients. |
439 | */ |
440 | na->na_arena = skmem_arena_create_for_nexus(na, |
441 | NX_PROV(nx)->nxprov_region_params, NULL, NULL, FALSE, |
442 | FALSE, NULL, &err); |
443 | ASSERT(na->na_arena != NULL || err != 0); |
444 | |
445 | return err; |
446 | } |
447 | |
448 | static void |
449 | nx_mon_prov_fini(struct kern_nexus_domain_provider *nxdom_prov) |
450 | { |
451 | #pragma unused(nxdom_prov) |
452 | SK_D("destroying %s" , nxdom_prov->nxdom_prov_name); |
453 | } |
454 | |
455 | static struct nexus_monitor_adapter * |
456 | na_mon_alloc(zalloc_flags_t how) |
457 | { |
458 | struct nexus_monitor_adapter *mna; |
459 | |
460 | _CASSERT(offsetof(struct nexus_monitor_adapter, mna_up) == 0); |
461 | |
462 | mna = zalloc_flags(na_mon_zone, how | Z_ZERO); |
463 | if (mna) { |
464 | mna->mna_up.na_type = NA_MONITOR; |
465 | mna->mna_up.na_free = na_mon_free; |
466 | } |
467 | return mna; |
468 | } |
469 | |
470 | static void |
471 | na_mon_free(struct nexus_adapter *na) |
472 | { |
473 | struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na; |
474 | |
475 | ASSERT(mna->mna_up.na_refcount == 0); |
476 | SK_DF(SK_VERB_MEM, "mna 0x%llx FREE" , SK_KVA(mna)); |
477 | bzero(s: mna, n: sizeof(*mna)); |
478 | zfree(na_mon_zone, mna); |
479 | } |
480 | |
481 | /* |
482 | * Functions common to both kind of monitors. |
483 | */ |
484 | |
485 | /* |
486 | * nm_sync callback for the monitor's own tx rings. |
487 | * This makes no sense and always returns error |
488 | */ |
489 | static int |
490 | nx_mon_na_txsync(struct __kern_channel_ring *kring, struct proc *p, |
491 | uint32_t flags) |
492 | { |
493 | #pragma unused(kring, p, flags) |
494 | SK_DF(SK_VERB_MONITOR | SK_VERB_SYNC | SK_VERB_TX, |
495 | "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x" , |
496 | sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name, |
497 | SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id, |
498 | flags); |
499 | return EIO; |
500 | } |
501 | |
502 | /* |
503 | * nm_sync callback for the monitor's own rx rings. |
504 | * Note that the lock in nx_mon_zcopy_parent_sync only protects |
505 | * writers among themselves. Synchronization between writers |
506 | * (i.e., nx_mon_zcopy_parent_txsync and nx_mon_zcopy_parent_rxsync) |
507 | * and readers (i.e., nx_mon_zcopy_parent_rxsync) relies on memory barriers. |
508 | */ |
509 | static int |
510 | nx_mon_na_rxsync(struct __kern_channel_ring *kring, struct proc *p, |
511 | uint32_t flags) |
512 | { |
513 | #pragma unused(p, flags) |
514 | SK_DF(SK_VERB_MONITOR | SK_VERB_SYNC | SK_VERB_RX, |
515 | "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b ring %u flags 0%x" , |
516 | sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name, |
517 | SK_KVA(kring), kring->ckr_flags, CKRF_BITS, kring->ckr_ring_id, |
518 | flags); |
519 | kring->ckr_khead = kring->ckr_rhead; |
520 | os_atomic_thread_fence(seq_cst); |
521 | return 0; |
522 | } |
523 | |
524 | /* |
525 | * na_krings_create callbacks for monitors. |
526 | * We could use the default netmap_hw_krings_zmon, but |
527 | * we don't need the nx_mbq. |
528 | */ |
529 | static int |
530 | nx_mon_na_krings_create(struct nexus_adapter *na, struct kern_channel *ch) |
531 | { |
532 | ASSERT(na->na_type == NA_MONITOR); |
533 | return na_rings_mem_setup(na, FALSE, ch); |
534 | } |
535 | |
536 | /* na_krings_delete callback for monitors */ |
537 | static void |
538 | nx_mon_na_krings_delete(struct nexus_adapter *na, struct kern_channel *ch, |
539 | boolean_t defunct) |
540 | { |
541 | ASSERT(na->na_type == NA_MONITOR); |
542 | na_rings_mem_teardown(na, ch, defunct); |
543 | } |
544 | |
545 | __attribute__((always_inline)) |
546 | static inline uint32_t |
547 | nx_mon_txrx2chmode(enum txrx t) |
548 | { |
549 | return t == NR_RX ? CHMODE_MONITOR_RX : CHMODE_MONITOR_TX; |
550 | } |
551 | |
552 | /* allocate the monitors array in the monitored kring */ |
553 | static int |
554 | nx_mon_kr_alloc(struct __kern_channel_ring *kring, uint32_t n) |
555 | { |
556 | struct __kern_channel_ring **nm; |
557 | |
558 | if (n <= kring->ckr_max_monitors) { |
559 | /* we already have more entries that requested */ |
560 | return 0; |
561 | } |
562 | |
563 | nm = sk_realloc_type_array(struct __kern_channel_ring *, |
564 | kring->ckr_max_monitors, n, kring->ckr_monitors, |
565 | Z_WAITOK, skmem_tag_monitors); |
566 | if (nm == NULL) { |
567 | return ENOMEM; |
568 | } |
569 | |
570 | kring->ckr_monitors = nm; |
571 | kring->ckr_max_monitors = n; |
572 | |
573 | return 0; |
574 | } |
575 | |
576 | /* deallocate the parent array in the parent adapter */ |
577 | static void |
578 | nx_mon_kr_dealloc(struct __kern_channel_ring *kring) |
579 | { |
580 | if (kring->ckr_monitors != NULL) { |
581 | if (kring->ckr_n_monitors > 0) { |
582 | SK_ERR("freeing not empty monitor array for \"%s\" " |
583 | "(%u dangling monitors)!" , kring->ckr_name, |
584 | kring->ckr_n_monitors); |
585 | } |
586 | sk_free_type_array(struct __kern_channel_ring *, |
587 | kring->ckr_max_monitors, kring->ckr_monitors); |
588 | kring->ckr_monitors = NULL; |
589 | kring->ckr_max_monitors = 0; |
590 | kring->ckr_n_monitors = 0; |
591 | } |
592 | } |
593 | |
594 | static int |
595 | nx_mon_na_krings_locks(struct nexus_adapter *na, |
596 | uint32_t qfirst[NR_TXRX], uint32_t qlast[NR_TXRX]) |
597 | { |
598 | struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na; |
599 | struct nexus_adapter *pna = mna->mna_pna; |
600 | enum txrx t; |
601 | int err = 0; |
602 | |
603 | for_rx_tx(t) { |
604 | uint32_t i; |
605 | |
606 | if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) { |
607 | continue; |
608 | } |
609 | |
610 | qfirst[t] = qlast[t] = mna->mna_first[t]; |
611 | |
612 | /* synchronize with concurrently running nm_sync()s */ |
613 | for (i = mna->mna_first[t]; i < mna->mna_last[t]; i++) { |
614 | struct __kern_channel_ring *kring; |
615 | |
616 | /* the parent adapter's kring */ |
617 | kring = &NAKR(na: pna, t)[i]; |
618 | kr_stop(kr: kring, state: KR_LOCKED); |
619 | qlast[t] = i + 1; |
620 | } |
621 | if (err != 0) { |
622 | break; |
623 | } |
624 | } |
625 | |
626 | return err; |
627 | } |
628 | |
629 | static void |
630 | nx_mon_na_krings_unlock(struct nexus_adapter *na, |
631 | const uint32_t qfirst[NR_TXRX], const uint32_t qlast[NR_TXRX]) |
632 | { |
633 | struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na; |
634 | struct nexus_adapter *pna = mna->mna_pna; |
635 | enum txrx t; |
636 | |
637 | for_rx_tx(t) { |
638 | uint32_t i; |
639 | |
640 | if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) { |
641 | continue; |
642 | } |
643 | |
644 | /* synchronize with concurrently running nm_sync()s */ |
645 | for (i = qfirst[t]; i < qlast[t]; i++) { |
646 | struct __kern_channel_ring *kring; |
647 | |
648 | /* the parent adapter's kring */ |
649 | kring = &NAKR(na: pna, t)[i]; |
650 | kr_start(kring); |
651 | } |
652 | } |
653 | } |
654 | |
655 | static int |
656 | nx_mon_enable(struct nexus_adapter *na, boolean_t zcopy) |
657 | { |
658 | struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na; |
659 | struct nexus_adapter *pna = mna->mna_pna; |
660 | struct skmem_arena_nexus *na_arena = skmem_arena_nexus(ar: pna->na_arena); |
661 | uint32_t qfirst[NR_TXRX], qlast[NR_TXRX]; |
662 | enum txrx t; |
663 | int err = 0; |
664 | uint32_t i; |
665 | |
666 | ASSERT(!(na->na_flags & NAF_ACTIVE)); |
667 | |
668 | bzero(s: &qfirst, n: sizeof(qfirst)); |
669 | bzero(s: &qlast, n: sizeof(qlast)); |
670 | |
671 | /* |
672 | * Acquire the target kring(s). q{first,last}0 represent the |
673 | * target ring set. q{first,last} represent the ones that have |
674 | * been successfully acquired. In the event the acquisition |
675 | * fails, we must release any previously-acquired rings. |
676 | */ |
677 | if ((err = nx_mon_na_krings_locks(na, qfirst, qlast)) != 0) { |
678 | goto unlock; |
679 | } |
680 | |
681 | ASSERT(na_arena->arn_rx_pp == na_arena->arn_tx_pp); |
682 | if (na_arena->arn_rx_pp->pp_max_frags > 1) { |
683 | VERIFY(na_arena->arn_rx_pp->pp_md_type == NEXUS_META_TYPE_PACKET); |
684 | mna->mna_pkt_copy_from_pkt = pkt_copy_multi_buflet_from_pkt; |
685 | } else { |
686 | if (na_arena->arn_rx_pp->pp_md_type == NEXUS_META_TYPE_PACKET) { |
687 | mna->mna_pkt_copy_from_pkt = pkt_copy_from_pkt; |
688 | } else { |
689 | mna->mna_pkt_copy_from_pkt = nx_mon_quantum_copy_64x; |
690 | } |
691 | } |
692 | |
693 | for_rx_tx(t) { |
694 | if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) { |
695 | continue; |
696 | } |
697 | |
698 | for (i = qfirst[t]; i < qlast[t]; i++) { |
699 | struct __kern_channel_ring *kring, *mkring; |
700 | |
701 | /* the parent adapter's kring */ |
702 | kring = &NAKR(na: pna, t)[i]; |
703 | mkring = &na->na_rx_rings[i]; |
704 | err = nx_mon_add(mkring, kring, zcopy); |
705 | if (err != 0) { |
706 | break; |
707 | } |
708 | } |
709 | if (err != 0) { |
710 | break; |
711 | } |
712 | } |
713 | |
714 | if (err == 0) { |
715 | os_atomic_or(&na->na_flags, NAF_ACTIVE, relaxed); |
716 | goto unlock; |
717 | } |
718 | |
719 | for_rx_tx(t) { |
720 | if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) { |
721 | continue; |
722 | } |
723 | |
724 | for (i = qfirst[t]; i < qlast[t]; i++) { |
725 | struct __kern_channel_ring *kring, *mkring; |
726 | |
727 | /* the parent adapter's kring */ |
728 | kring = &NAKR(na: pna, t)[i]; |
729 | mkring = &na->na_rx_rings[i]; |
730 | nx_mon_del(mkring, kring, FALSE); |
731 | } |
732 | } |
733 | ASSERT(!(na->na_flags & NAF_ACTIVE)); |
734 | |
735 | unlock: |
736 | nx_mon_na_krings_unlock(na, qfirst, qlast); |
737 | |
738 | SK_DF(err ? SK_VERB_ERROR : SK_VERB_MONITOR, |
739 | "%s (0x%llx): mode 0x%x txrings[%u,%u], rxrings[%u,%u] err %d" , |
740 | na->na_name, SK_KVA(na), mna->mna_mode, qfirst[NR_TX], qlast[NR_TX], |
741 | qfirst[NR_RX], qlast[NR_RX], err); |
742 | |
743 | return err; |
744 | } |
745 | |
746 | static void |
747 | nx_mon_disable(struct nexus_adapter *na) |
748 | { |
749 | struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na; |
750 | struct nexus_adapter *pna = mna->mna_pna; |
751 | uint32_t qfirst[NR_TXRX], qlast[NR_TXRX]; |
752 | enum txrx t; |
753 | int err; |
754 | uint32_t i; |
755 | |
756 | ASSERT(na->na_flags & NAF_ACTIVE); |
757 | |
758 | bzero(s: &qfirst, n: sizeof(qfirst)); |
759 | bzero(s: &qlast, n: sizeof(qlast)); |
760 | |
761 | /* blocking kring(s) acquisition; must not fail */ |
762 | err = nx_mon_na_krings_locks(na, qfirst, qlast); |
763 | ASSERT(err == 0); |
764 | mna->mna_pkt_copy_from_pkt = NULL; |
765 | for_rx_tx(t) { |
766 | if (!(mna->mna_mode & nx_mon_txrx2chmode(t))) { |
767 | continue; |
768 | } |
769 | |
770 | for (i = qfirst[t]; i < qlast[t]; i++) { |
771 | struct __kern_channel_ring *kring, *mkring; |
772 | |
773 | kring = &NAKR(na: pna, t)[i]; |
774 | mkring = &na->na_rx_rings[i]; |
775 | nx_mon_del(mkring, kring, FALSE); |
776 | } |
777 | } |
778 | os_atomic_andnot(&na->na_flags, NAF_ACTIVE, relaxed); |
779 | |
780 | nx_mon_na_krings_unlock(na, qfirst, qlast); |
781 | } |
782 | |
783 | /* |
784 | * Add the monitor mkring to the list of monitors of kring. |
785 | * If this is the first monitor, intercept the callbacks |
786 | */ |
787 | static int |
788 | nx_mon_add(struct __kern_channel_ring *mkring, |
789 | struct __kern_channel_ring *kring, boolean_t zcopy) |
790 | { |
791 | int error; |
792 | |
793 | /* make sure the monitor array exists and is big enough */ |
794 | error = nx_mon_kr_alloc(kring, n: kring->ckr_n_monitors + 1); |
795 | if (error != 0) { |
796 | return error; |
797 | } |
798 | |
799 | kring->ckr_monitors[kring->ckr_n_monitors] = mkring; |
800 | mkring->ckr_mon_pos = kring->ckr_n_monitors; |
801 | kring->ckr_n_monitors++; |
802 | if (kring->ckr_n_monitors == 1) { |
803 | /* this is the first monitor, intercept callbacks */ |
804 | SK_DF(SK_VERB_MONITOR, |
805 | "mkr \"%s\" (0x%llx) krflags 0x%b intercept callbacks " |
806 | "on kr \"%s\" (0x%llx) krflags 0x%b" , mkring->ckr_name, |
807 | SK_KVA(mkring), mkring->ckr_flags, CKRF_BITS, |
808 | kring->ckr_name, SK_KVA(kring), kring->ckr_flags, |
809 | CKRF_BITS); |
810 | kring->ckr_mon_sync = kring->ckr_na_sync; |
811 | /* |
812 | * zcopy monitors do not override nm_notify(), but |
813 | * we save the original one regardless, so that |
814 | * nx_mon_del() does not need to know the |
815 | * monitor type |
816 | */ |
817 | kring->ckr_mon_notify = kring->ckr_na_notify; |
818 | if (kring->ckr_tx == NR_TX) { |
819 | kring->ckr_na_sync = |
820 | (zcopy ? nx_mon_zcopy_parent_txsync : |
821 | nx_mon_parent_txsync); |
822 | } else { |
823 | kring->ckr_na_sync = |
824 | (zcopy ? nx_mon_zcopy_parent_rxsync : |
825 | nx_mon_parent_rxsync); |
826 | if (!zcopy) { |
827 | /* also intercept notify */ |
828 | kring->ckr_na_notify = nx_mon_parent_notify; |
829 | kring->ckr_mon_tail = kring->ckr_ktail; |
830 | } |
831 | } |
832 | } else { |
833 | SK_DF(SK_VERB_MONITOR, |
834 | "mkr \"%s\" (0x%llx) krflags 0x%b already intercept " |
835 | "callbacks on kr \"%s\" (0x%llx) krflags 0x%b, " |
836 | "%u monitors" , mkring->ckr_name, SK_KVA(mkring), |
837 | mkring->ckr_flags, CKRF_BITS, kring->ckr_name, |
838 | SK_KVA(kring), kring->ckr_flags, CKRF_BITS, |
839 | kring->ckr_n_monitors); |
840 | } |
841 | return 0; |
842 | } |
843 | |
844 | /* |
845 | * Remove the monitor mkring from the list of monitors of kring. |
846 | * If this is the last monitor, restore the original callbacks |
847 | */ |
848 | static void |
849 | nx_mon_del(struct __kern_channel_ring *mkring, |
850 | struct __kern_channel_ring *kring, boolean_t all) |
851 | { |
852 | ASSERT(kring->ckr_n_monitors != 0); |
853 | if (all) { |
854 | kring->ckr_n_monitors = 0; |
855 | } else { |
856 | kring->ckr_n_monitors--; |
857 | if (mkring->ckr_mon_pos != kring->ckr_n_monitors) { |
858 | kring->ckr_monitors[mkring->ckr_mon_pos] = |
859 | kring->ckr_monitors[kring->ckr_n_monitors]; |
860 | kring->ckr_monitors[mkring->ckr_mon_pos]->ckr_mon_pos = |
861 | mkring->ckr_mon_pos; |
862 | } |
863 | kring->ckr_monitors[kring->ckr_n_monitors] = NULL; |
864 | } |
865 | if (kring->ckr_n_monitors == 0) { |
866 | /* |
867 | * This was the last monitor, restore callbacks |
868 | * and delete monitor array. |
869 | */ |
870 | SK_DF(SK_VERB_MONITOR, |
871 | "restoring sync callback on kr \"%s\" (0x%llx) " |
872 | "krflags 0x%b" , kring->ckr_name, SK_KVA(kring), |
873 | kring->ckr_flags, CKRF_BITS); |
874 | kring->ckr_na_sync = kring->ckr_mon_sync; |
875 | kring->ckr_mon_sync = NULL; |
876 | if (kring->ckr_tx == NR_RX) { |
877 | SK_DF(SK_VERB_MONITOR, |
878 | "restoring notify callback on kr \"%s\" (0x%llx) " |
879 | "krflags 0x%b" , kring->ckr_name, SK_KVA(kring), |
880 | kring->ckr_flags, CKRF_BITS); |
881 | kring->ckr_na_notify = kring->ckr_mon_notify; |
882 | kring->ckr_mon_notify = NULL; |
883 | } |
884 | nx_mon_kr_dealloc(kring); |
885 | } else { |
886 | SK_DF(SK_VERB_MONITOR, |
887 | "NOT restoring callbacks on kr \"%s\" (0x%llx) " |
888 | "krflags 0x%b, %u monitors left" , kring->ckr_name, |
889 | SK_KVA(kring), kring->ckr_flags, CKRF_BITS, |
890 | kring->ckr_n_monitors); |
891 | } |
892 | } |
893 | |
894 | /* |
895 | * This is called when the monitored adapter leaves skywalk mode (see |
896 | * na_unbind_channel). We need to notify the monitors that the monitored |
897 | * rings are gone. We do this by setting their mna->mna_pna to NULL. |
898 | * Note that the rings must be stopped when this happens, so no monitor |
899 | * ring callback can be active. |
900 | */ |
901 | void |
902 | nx_mon_stop(struct nexus_adapter *na) |
903 | { |
904 | enum txrx t; |
905 | |
906 | SK_LOCK_ASSERT_HELD(); |
907 | |
908 | /* skip if this adapter has no allocated rings */ |
909 | if (na->na_tx_rings == NULL) { |
910 | return; |
911 | } |
912 | |
913 | na_disable_all_rings(na); |
914 | |
915 | for_rx_tx(t) { |
916 | uint32_t i; |
917 | |
918 | for (i = 0; i < na_get_nrings(na, t); i++) { |
919 | struct __kern_channel_ring *kring = &NAKR(na, t)[i]; |
920 | uint32_t j; |
921 | |
922 | for (j = 0; j < kring->ckr_n_monitors; j++) { |
923 | struct __kern_channel_ring *mkring = |
924 | kring->ckr_monitors[j]; |
925 | struct nexus_monitor_adapter *mna = |
926 | (struct nexus_monitor_adapter *) |
927 | KRNA(mkring); |
928 | |
929 | /* forget about this adapter */ |
930 | if (mna->mna_pna != NULL) { |
931 | ASSERT(na == mna->mna_pna); |
932 | (void) na_release_locked(na: mna->mna_pna); |
933 | mna->mna_pna = NULL; |
934 | } |
935 | } |
936 | |
937 | /* |
938 | * Remove all monitors and restore callbacks; |
939 | * this is important for nexus adapters that |
940 | * are linked to one another, e.g. pipe, since |
941 | * the callback changes on one adapter affects |
942 | * its peer during sync times. |
943 | */ |
944 | if (kring->ckr_n_monitors > 0) { |
945 | nx_mon_del(NULL, kring, TRUE); |
946 | } |
947 | |
948 | ASSERT(kring->ckr_monitors == NULL); |
949 | ASSERT(kring->ckr_max_monitors == 0); |
950 | ASSERT(kring->ckr_n_monitors == 0); |
951 | } |
952 | } |
953 | |
954 | na_enable_all_rings(na); |
955 | } |
956 | |
957 | /* |
958 | * Common functions for the na_activate() callbacks of both kind of |
959 | * monitors. |
960 | */ |
961 | static int |
962 | nx_mon_na_activate_common(struct nexus_adapter *na, na_activate_mode_t mode, |
963 | boolean_t zcopy) |
964 | { |
965 | struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na; |
966 | struct nexus_adapter *pna = mna->mna_pna; |
967 | int err = 0; |
968 | |
969 | ASSERT(na->na_type == NA_MONITOR); |
970 | |
971 | SK_DF(SK_VERB_MONITOR, "na \"%s\" (0x%llx) %s zcopy %u" , na->na_name, |
972 | SK_KVA(na), na_activate_mode2str(mode), zcopy); |
973 | |
974 | switch (mode) { |
975 | case NA_ACTIVATE_MODE_ON: |
976 | if (pna == NULL) { |
977 | /* parent left skywalk mode, fatal */ |
978 | SK_ERR("%s: internal error" , na->na_name); |
979 | err = ENXIO; |
980 | } else { |
981 | err = nx_mon_enable(na, zcopy); |
982 | } |
983 | break; |
984 | |
985 | case NA_ACTIVATE_MODE_DEFUNCT: |
986 | break; |
987 | |
988 | case NA_ACTIVATE_MODE_OFF: |
989 | if (pna == NULL) { |
990 | SK_DF(SK_VERB_MONITOR, "%s: parent left skywalk mode, " |
991 | "nothing to restore" , na->na_name); |
992 | } else { |
993 | nx_mon_disable(na); |
994 | } |
995 | break; |
996 | |
997 | default: |
998 | VERIFY(0); |
999 | /* NOTREACHED */ |
1000 | __builtin_unreachable(); |
1001 | } |
1002 | |
1003 | return err; |
1004 | } |
1005 | |
1006 | /* |
1007 | * Functions specific for zero-copy monitors. |
1008 | */ |
1009 | |
1010 | /* |
1011 | * Common function for both zero-copy tx and rx nm_sync() |
1012 | * callbacks |
1013 | */ |
1014 | static int |
1015 | nx_mon_zcopy_parent_sync(struct __kern_channel_ring *kring, struct proc *p, |
1016 | uint32_t flags, enum txrx tx) |
1017 | { |
1018 | struct __kern_channel_ring *mkring = kring->ckr_monitors[0]; |
1019 | int rel_slots, free_slots, busy, sent = 0; |
1020 | slot_idx_t beg, end, i; |
1021 | const slot_idx_t lim = kring->ckr_lim; |
1022 | const slot_idx_t mlim; |
1023 | int error = 0; |
1024 | |
1025 | if (mkring == NULL) { |
1026 | SK_RD(5, "NULL monitor on kr \"%s\" (0x%llx) krflags 0x%b" , |
1027 | kring->ckr_name, SK_KVA(kring), kring->ckr_flags, |
1028 | CKRF_BITS); |
1029 | return 0; |
1030 | } |
1031 | |
1032 | ASSERT(!KR_KERNEL_ONLY(kring)); |
1033 | ASSERT(!KR_KERNEL_ONLY(mkring)); |
1034 | |
1035 | /* deconst */ |
1036 | *(slot_idx_t *)(uintptr_t)&mlim = mkring->ckr_lim; |
1037 | |
1038 | /* get the relased slots (rel_slots) */ |
1039 | if (tx == NR_TX) { |
1040 | beg = kring->ckr_ktail; |
1041 | error = kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags); |
1042 | if (error) { |
1043 | return error; |
1044 | } |
1045 | end = kring->ckr_ktail; |
1046 | } else { /* NR_RX */ |
1047 | beg = kring->ckr_khead; |
1048 | end = kring->ckr_rhead; |
1049 | } |
1050 | |
1051 | rel_slots = end - beg; |
1052 | if (rel_slots < 0) { |
1053 | rel_slots += kring->ckr_num_slots; |
1054 | } |
1055 | |
1056 | if (!rel_slots) { |
1057 | /* |
1058 | * No released slots, but we still need |
1059 | * to call rxsync if this is a rx ring |
1060 | */ |
1061 | goto out_rxsync; |
1062 | } |
1063 | |
1064 | /* |
1065 | * We need to lock the monitor receive ring, since it |
1066 | * is the target of bot tx and rx traffic from the monitored |
1067 | * adapter |
1068 | */ |
1069 | KR_LOCK(mkring); |
1070 | /* get the free slots available on the monitor ring */ |
1071 | i = mkring->ckr_ktail; |
1072 | busy = i - mkring->ckr_khead; |
1073 | if (busy < 0) { |
1074 | busy += mkring->ckr_num_slots; |
1075 | } |
1076 | free_slots = mlim - busy; |
1077 | |
1078 | if (!free_slots) { |
1079 | goto out; |
1080 | } |
1081 | |
1082 | /* swap min(free_slots, rel_slots) slots */ |
1083 | if (free_slots < rel_slots) { |
1084 | beg += (rel_slots - free_slots); |
1085 | if (beg >= kring->ckr_num_slots) { |
1086 | beg -= kring->ckr_num_slots; |
1087 | } |
1088 | rel_slots = free_slots; |
1089 | } |
1090 | |
1091 | sent = rel_slots; |
1092 | for (; rel_slots; rel_slots--) { |
1093 | /* |
1094 | * Swap the slots. |
1095 | * |
1096 | * XXX: adi@apple.com -- this bypasses the slot attach/detach |
1097 | * interface, and needs to be changed when monitor adopts the |
1098 | * packet APIs. SD_SWAP() will perform a block copy of the |
1099 | * swap, and will readjust the kernel slot descriptor's sd_user |
1100 | * accordingly. |
1101 | */ |
1102 | SD_SWAP(KR_KSD(mkring, i), KR_USD(mkring, i), |
1103 | KR_KSD(kring, beg), KR_USD(kring, beg)); |
1104 | |
1105 | SK_RD(5, "beg %u buf_idx %u" , beg, |
1106 | METADATA_IDX(KR_KSD(kring, beg)->sd_qum)); |
1107 | |
1108 | beg = SLOT_NEXT(i: beg, lim); |
1109 | i = SLOT_NEXT(i, lim: mlim); |
1110 | } |
1111 | os_atomic_thread_fence(seq_cst); |
1112 | mkring->ckr_ktail = i; |
1113 | |
1114 | out: |
1115 | KR_UNLOCK(mkring); |
1116 | |
1117 | if (sent) { |
1118 | /* notify the new frames to the monitor */ |
1119 | (void) mkring->ckr_na_notify(mkring, p, 0); |
1120 | } |
1121 | |
1122 | out_rxsync: |
1123 | if (tx == NR_RX) { |
1124 | error = kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags); |
1125 | } |
1126 | |
1127 | return error; |
1128 | } |
1129 | |
1130 | /* |
1131 | * Callback used to replace the ckr_na_sync callback in the monitored tx rings. |
1132 | */ |
1133 | static int |
1134 | nx_mon_zcopy_parent_txsync(struct __kern_channel_ring *kring, struct proc *p, |
1135 | uint32_t flags) |
1136 | { |
1137 | SK_DF(SK_VERB_MONITOR, |
1138 | "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b flags 0x%x" , |
1139 | sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name, |
1140 | SK_KVA(kring), kring->ckr_flags, CKRF_BITS, flags); |
1141 | return nx_mon_zcopy_parent_sync(kring, p, flags, tx: NR_TX); |
1142 | } |
1143 | |
1144 | /* callback used to replace the nm_sync callback in the monitored rx rings */ |
1145 | static int |
1146 | nx_mon_zcopy_parent_rxsync(struct __kern_channel_ring *kring, struct proc *p, |
1147 | uint32_t flags) |
1148 | { |
1149 | SK_DF(SK_VERB_MONITOR, |
1150 | "%s(%d) kr \"%s\" (0x%llx) krflags 0x%b flags 0x%x" , |
1151 | sk_proc_name_address(p), sk_proc_pid(p), kring->ckr_name, |
1152 | SK_KVA(kring), kring->ckr_flags, CKRF_BITS, flags); |
1153 | return nx_mon_zcopy_parent_sync(kring, p, flags, tx: NR_RX); |
1154 | } |
1155 | |
1156 | static int |
1157 | nx_mon_zcopy_na_activate(struct nexus_adapter *na, na_activate_mode_t mode) |
1158 | { |
1159 | return nx_mon_na_activate_common(na, mode, TRUE /* zcopy */); |
1160 | } |
1161 | |
1162 | /* na_dtor callback for monitors */ |
1163 | static void |
1164 | nx_mon_zcopy_na_dtor(struct nexus_adapter *na) |
1165 | { |
1166 | struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na; |
1167 | struct nexus_adapter *pna = mna->mna_pna; |
1168 | |
1169 | SK_LOCK_ASSERT_HELD(); |
1170 | ASSERT(na->na_type == NA_MONITOR); |
1171 | |
1172 | if (pna != NULL) { |
1173 | (void) na_release_locked(na: pna); |
1174 | mna->mna_pna = NULL; |
1175 | } |
1176 | } |
1177 | |
1178 | /* |
1179 | * Functions specific for copy monitors. |
1180 | */ |
1181 | |
1182 | static void |
1183 | nx_mon_parent_sync(struct __kern_channel_ring *kring, struct proc *p, |
1184 | slot_idx_t first_new, int new_slots) |
1185 | { |
1186 | nexus_meta_type_t md_type = KRNA(kring)->na_md_type; |
1187 | uint32_t j; |
1188 | |
1189 | for (j = 0; j < kring->ckr_n_monitors; j++) { |
1190 | struct __kern_channel_ring *mkring = kring->ckr_monitors[j]; |
1191 | slot_idx_t i, mlim, beg; |
1192 | int free_slots, busy, sent = 0, m; |
1193 | const slot_idx_t lim = kring->ckr_lim; |
1194 | struct nexus_adapter *dst_na = KRNA(mkring); |
1195 | struct nexus_monitor_adapter *mna = |
1196 | (struct nexus_monitor_adapter *)dst_na; |
1197 | uint32_t max_len = mkring->ckr_pp->pp_max_frags * |
1198 | PP_BUF_SIZE_DEF(mkring->ckr_pp); |
1199 | |
1200 | /* |
1201 | * src and dst adapters must share the same nexus; |
1202 | * this test is done in nx_monitor_na_find(). This |
1203 | * covers both buffer and metadata sizes. |
1204 | */ |
1205 | |
1206 | mlim = mkring->ckr_lim; |
1207 | |
1208 | /* |
1209 | * We need to lock the monitor receive ring, since it |
1210 | * is the target of both tx and rx traffics from the |
1211 | * monitored adapter. |
1212 | */ |
1213 | KR_LOCK(mkring); |
1214 | /* get the free slots available on the monitor ring */ |
1215 | i = mkring->ckr_ktail; |
1216 | busy = i - mkring->ckr_khead; |
1217 | if (busy < 0) { |
1218 | busy += mkring->ckr_num_slots; |
1219 | } |
1220 | free_slots = mlim - busy; |
1221 | |
1222 | if (!free_slots) { |
1223 | goto out; |
1224 | } |
1225 | |
1226 | /* copy min(free_slots, new_slots) slots */ |
1227 | m = new_slots; |
1228 | beg = first_new; |
1229 | if (free_slots < m) { |
1230 | beg += (m - free_slots); |
1231 | if (beg >= kring->ckr_num_slots) { |
1232 | beg -= kring->ckr_num_slots; |
1233 | } |
1234 | m = free_slots; |
1235 | } |
1236 | |
1237 | ASSERT(KRNA(mkring)->na_md_type == md_type); |
1238 | |
1239 | for (; m; m--) { |
1240 | struct __kern_slot_desc *src_sd = KR_KSD(kring, beg); |
1241 | struct __kern_slot_desc *dst_sd = KR_KSD(mkring, i); |
1242 | struct __kern_packet *spkt, *dpkt; |
1243 | kern_packet_t sph, dph; |
1244 | uint32_t copy_len; |
1245 | |
1246 | if (!KSD_VALID_METADATA(src_sd)) { |
1247 | goto skip; |
1248 | } |
1249 | |
1250 | /* retreive packet handles from slot */ |
1251 | spkt = src_sd->sd_pkt; |
1252 | sph = SK_PTR_ENCODE(spkt, METADATA_TYPE(spkt), |
1253 | METADATA_SUBTYPE(spkt)); |
1254 | dpkt = dst_sd->sd_pkt; |
1255 | dph = SK_PTR_ENCODE(dpkt, METADATA_TYPE(dpkt), |
1256 | METADATA_SUBTYPE(dpkt)); |
1257 | |
1258 | ASSERT(METADATA_TYPE(spkt) == METADATA_TYPE(dpkt)); |
1259 | |
1260 | ASSERT(spkt->pkt_qum.qum_len <= (UINT32_MAX - 63)); |
1261 | copy_len = spkt->pkt_qum.qum_len; |
1262 | |
1263 | /* round to a multiple of 64 */ |
1264 | copy_len = (copy_len + 63) & ~63; |
1265 | |
1266 | if (__improbable(copy_len > max_len)) { |
1267 | SK_RD(5, "kr \"%s\" -> mkr \"%s\": " |
1268 | "truncating %u to %u" , |
1269 | kring->ckr_name, mkring->ckr_name, |
1270 | (uint32_t)copy_len, max_len); |
1271 | copy_len = max_len; |
1272 | } |
1273 | |
1274 | /* copy buffers */ |
1275 | mna->mna_pkt_copy_from_pkt(kring->ckr_tx, dph, 0, sph, |
1276 | 0, copy_len, FALSE, 0, 0, FALSE); |
1277 | |
1278 | /* copy the associated meta data */ |
1279 | _QUM_COPY(&(spkt)->pkt_qum, &(dpkt)->pkt_qum); |
1280 | if (md_type == NEXUS_META_TYPE_PACKET) { |
1281 | _PKT_COPY(spkt, dpkt); |
1282 | ASSERT(dpkt->pkt_mbuf == NULL); |
1283 | } |
1284 | |
1285 | ASSERT(!(dpkt->pkt_qum.qum_qflags & QUM_F_KERNEL_ONLY) || |
1286 | PP_KERNEL_ONLY(dpkt->pkt_qum.qum_pp)); |
1287 | |
1288 | sent++; |
1289 | i = SLOT_NEXT(i, lim: mlim); |
1290 | skip: |
1291 | beg = SLOT_NEXT(i: beg, lim); |
1292 | } |
1293 | os_atomic_thread_fence(seq_cst); |
1294 | mkring->ckr_ktail = i; |
1295 | out: |
1296 | KR_UNLOCK(mkring); |
1297 | |
1298 | if (sent) { |
1299 | /* notify the new frames to the monitor */ |
1300 | (void) mkring->ckr_na_notify(mkring, p, 0); |
1301 | } |
1302 | } |
1303 | } |
1304 | |
1305 | /* callback used to replace the nm_sync callback in the monitored tx rings */ |
1306 | static int |
1307 | nx_mon_parent_txsync(struct __kern_channel_ring *kring, struct proc *p, |
1308 | uint32_t flags) |
1309 | { |
1310 | slot_idx_t first_new; |
1311 | int new_slots; |
1312 | nexus_type_t nx_type = |
1313 | kring->ckr_na->na_nxdom_prov->nxdom_prov_dom->nxdom_type; |
1314 | |
1315 | /* |
1316 | * For user pipe nexus, txsync can also be initated from RX process |
1317 | * context, hence user pipe tx ring should be accessed holding |
1318 | * ckr_qlock. |
1319 | */ |
1320 | if (nx_type == NEXUS_TYPE_USER_PIPE) { |
1321 | KR_LOCK(kring); |
1322 | } |
1323 | |
1324 | /* get the new slots */ |
1325 | first_new = kring->ckr_khead; |
1326 | new_slots = kring->ckr_rhead - first_new; |
1327 | if (new_slots < 0) { |
1328 | new_slots += kring->ckr_num_slots; |
1329 | } |
1330 | if (new_slots) { |
1331 | nx_mon_parent_sync(kring, p, first_new, new_slots); |
1332 | } |
1333 | |
1334 | if (nx_type == NEXUS_TYPE_USER_PIPE) { |
1335 | KR_UNLOCK(kring); |
1336 | } |
1337 | |
1338 | return kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags); |
1339 | } |
1340 | |
1341 | /* callback used to replace the nm_sync callback in the monitored rx rings */ |
1342 | static int |
1343 | nx_mon_parent_rxsync(struct __kern_channel_ring *kring, struct proc *p, |
1344 | uint32_t flags) |
1345 | { |
1346 | slot_idx_t first_new; |
1347 | int new_slots, error; |
1348 | |
1349 | /* get the new slots */ |
1350 | error = kring->ckr_mon_sync(kring, p, NA_SYNCF_MONITOR | flags); |
1351 | if (error) { |
1352 | return error; |
1353 | } |
1354 | first_new = kring->ckr_mon_tail; |
1355 | new_slots = kring->ckr_ktail - first_new; |
1356 | if (new_slots < 0) { |
1357 | new_slots += kring->ckr_num_slots; |
1358 | } |
1359 | if (new_slots) { |
1360 | nx_mon_parent_sync(kring, p, first_new, new_slots); |
1361 | } |
1362 | kring->ckr_mon_tail = kring->ckr_ktail; |
1363 | return 0; |
1364 | } |
1365 | |
1366 | /* |
1367 | * Callback used to replace the nm_notify() callback in the monitored rx rings |
1368 | */ |
1369 | static int |
1370 | nx_mon_parent_notify(struct __kern_channel_ring *kring, struct proc *p, |
1371 | uint32_t flags) |
1372 | { |
1373 | int err = 0; |
1374 | sk_protect_t protect = NULL; |
1375 | |
1376 | SK_DF(SK_VERB_MONITOR | SK_VERB_NOTIFY | |
1377 | ((kring->ckr_tx == NR_TX) ? SK_VERB_TX : SK_VERB_RX), |
1378 | "kr \"%s\" (0x%llx) krflags 0x%b flags 0x%x" , kring->ckr_name, |
1379 | SK_KVA(kring), kring->ckr_flags, CKRF_BITS, flags); |
1380 | /* |
1381 | * ?xsync callbacks have tryget called by their callers, |
1382 | * but here we have to call it by ourself. If we can't |
1383 | * acquire the exclusive sync right, skip the sync. |
1384 | */ |
1385 | if ((err = kr_enter(kring, FALSE)) == 0) { |
1386 | protect = sk_sync_protect(); |
1387 | nx_mon_parent_rxsync(kring, p, NA_SYNCF_FORCE_READ); |
1388 | sk_sync_unprotect(protect); |
1389 | kr_exit(kring); |
1390 | } |
1391 | /* in all cases (even error), we must invoke notify */ |
1392 | kring->ckr_mon_notify(kring, p, (NA_NOTEF_MONITOR | flags)); |
1393 | return err; |
1394 | } |
1395 | |
1396 | static int |
1397 | nx_mon_na_activate(struct nexus_adapter *na, na_activate_mode_t mode) |
1398 | { |
1399 | return nx_mon_na_activate_common(na, mode, FALSE /* no zcopy */); |
1400 | } |
1401 | |
1402 | static void |
1403 | nx_mon_na_dtor(struct nexus_adapter *na) |
1404 | { |
1405 | struct nexus_monitor_adapter *mna = (struct nexus_monitor_adapter *)na; |
1406 | struct nexus_adapter *pna = mna->mna_pna; |
1407 | |
1408 | SK_LOCK_ASSERT_HELD(); |
1409 | ASSERT(na->na_type == NA_MONITOR); |
1410 | |
1411 | if (pna != NULL) { |
1412 | (void) na_release_locked(na: pna); |
1413 | mna->mna_pna = NULL; |
1414 | } |
1415 | } |
1416 | |
1417 | /* check if chr is a request for a monitor adapter that we can satisfy */ |
1418 | int |
1419 | nx_monitor_na_find(struct kern_nexus *nx, struct kern_channel *ch, |
1420 | struct chreq *chr, struct kern_channel *ch0, struct nxbind *nxb, |
1421 | struct proc *p, struct nexus_adapter **na, boolean_t create) |
1422 | { |
1423 | #pragma unused(ch) |
1424 | boolean_t zcopy = !!(chr->cr_mode & CHMODE_MONITOR_NO_COPY); |
1425 | struct nexus_adapter *pna = NULL; /* parent adapter */ |
1426 | struct nexus_monitor_adapter *mna = NULL; |
1427 | char monsuff[10] = "" ; |
1428 | struct chreq pchr; |
1429 | uint32_t i; |
1430 | int error; |
1431 | enum txrx t; |
1432 | |
1433 | SK_LOCK_ASSERT_HELD(); |
1434 | *na = NULL; |
1435 | |
1436 | #if SK_LOG |
1437 | uuid_string_t uuidstr; |
1438 | SK_D("name \"%s\" spec_uuid \"%s\" port %d mode 0x%b pipe_id %u " |
1439 | "ring_id %d ring_set %u ep_type %u:%u ch0 0x%llx create %u%s" , |
1440 | chr->cr_name, sk_uuid_unparse(chr->cr_spec_uuid, uuidstr), |
1441 | (int)chr->cr_port, chr->cr_mode, CHMODE_BITS, |
1442 | chr->cr_pipe_id, (int)chr->cr_ring_id, chr->cr_ring_set, |
1443 | chr->cr_real_endpoint, chr->cr_endpoint, SK_KVA(ch0), create, |
1444 | !(chr->cr_mode & CHMODE_MONITOR) ? " (skipped)" : "" ); |
1445 | #endif /* SK_LOG */ |
1446 | |
1447 | if (!(chr->cr_mode & CHMODE_MONITOR)) { |
1448 | return 0; |
1449 | } |
1450 | |
1451 | /* XXX: Don't allow user packet pool mode in monitor for now */ |
1452 | if (chr->cr_mode & CHMODE_USER_PACKET_POOL) { |
1453 | SK_ERR("User Packet pool mode not supported for monitor" ); |
1454 | return ENOTSUP; |
1455 | } |
1456 | |
1457 | mna = na_mon_alloc(how: Z_WAITOK); |
1458 | |
1459 | ASSERT(mna->mna_up.na_type == NA_MONITOR); |
1460 | ASSERT(mna->mna_up.na_free == na_mon_free); |
1461 | |
1462 | /* override the ring set since we're monitoring */ |
1463 | chr->cr_ring_set = RING_SET_ALL; |
1464 | |
1465 | if (ch0 != NULL) { |
1466 | /* |
1467 | * We've been given the owning channel from ch_open(); |
1468 | * use this as shortcut since otherwise we'd have to |
1469 | * find it ourselves. |
1470 | */ |
1471 | #if (DEBUG || DEVELOPMENT) |
1472 | ASSERT(!(ch0->ch_info->cinfo_ch_mode & CHMODE_MONITOR)); |
1473 | ASSERT(ch0->ch_info->cinfo_nx_port == chr->cr_port); |
1474 | #endif /* DEBUG || DEVELOPMENT */ |
1475 | pna = ch0->ch_na; |
1476 | na_retain_locked(na: pna); |
1477 | } else { |
1478 | /* |
1479 | * First, try to find the adapter that we want to monitor |
1480 | * We use the same chr, after we have turned off the monitor |
1481 | * flags. In this way we can potentially monitor everything |
1482 | * skywalk understands, except other monitors. |
1483 | */ |
1484 | memcpy(dst: &pchr, src: chr, n: sizeof(pchr)); |
1485 | pchr.cr_mode &= ~CHMODE_MONITOR; |
1486 | error = na_find(ch, nx, &pchr, ch0, nxb, p, &pna, create); |
1487 | if (error != 0) { |
1488 | SK_ERR("parent lookup failed: %d" , error); |
1489 | return error; |
1490 | } |
1491 | } |
1492 | ASSERT(pna != NULL); |
1493 | SK_DF(SK_VERB_MONITOR, |
1494 | "found parent: \"%s\" (0x%llx)" , pna->na_name, SK_KVA(pna)); |
1495 | |
1496 | if (!NA_IS_ACTIVE(pna)) { |
1497 | /* parent not in skywalk mode */ |
1498 | /* |
1499 | * XXX we can wait for the parent to enter skywalk mode, |
1500 | * by intercepting its na_activate() callback (2014-03-16) |
1501 | */ |
1502 | SK_ERR("parent \"%s\" (0x%llx) not in skywalk mode" , |
1503 | pna->na_name, SK_KVA(pna)); |
1504 | error = ENXIO; |
1505 | goto put_out; |
1506 | } else if (zcopy && NA_KERNEL_ONLY(pna)) { |
1507 | /* |
1508 | * Zero-copy mode requires the parent adapter to be |
1509 | * created in a non-kernel-only mode. |
1510 | */ |
1511 | SK_ERR("parent \"%s\" (0x%llx) is in kernel-only mode" , |
1512 | pna->na_name, SK_KVA(pna)); |
1513 | error = ENODEV; |
1514 | goto put_out; |
1515 | } |
1516 | |
1517 | /* grab all the rings we need in the parent */ |
1518 | mna->mna_pna = pna; |
1519 | error = na_interp_ringid(pna, chr->cr_ring_id, chr->cr_ring_set, |
1520 | mna->mna_first, mna->mna_last); |
1521 | if (error != 0) { |
1522 | SK_ERR("ring_mode %u ring_id %d error %d" , chr->cr_ring_set, |
1523 | (int)chr->cr_ring_id, error); |
1524 | goto put_out; |
1525 | } |
1526 | if (mna->mna_last[NR_TX] - mna->mna_first[NR_TX] == 1) { |
1527 | (void) snprintf(monsuff, count: 10, "-%u" , mna->mna_first[NR_TX]); |
1528 | } |
1529 | (void) snprintf(mna->mna_up.na_name, count: sizeof(mna->mna_up.na_name), |
1530 | "%s%s/%s%s%s" , pna->na_name, monsuff, zcopy ? "z" : "" , |
1531 | (chr->cr_mode & CHMODE_MONITOR_TX) ? "r" : "" , |
1532 | (chr->cr_mode & CHMODE_MONITOR_RX) ? "t" : "" ); |
1533 | uuid_generate_random(out: mna->mna_up.na_uuid); |
1534 | |
1535 | /* these don't apply to the monitor adapter */ |
1536 | *(nexus_stats_type_t *)(uintptr_t)&mna->mna_up.na_stats_type = |
1537 | NEXUS_STATS_TYPE_INVALID; |
1538 | *(uint32_t *)(uintptr_t)&mna->mna_up.na_flowadv_max = 0; |
1539 | |
1540 | if (zcopy) { |
1541 | /* |
1542 | * Zero copy monitors need exclusive access |
1543 | * to the monitored rings. |
1544 | */ |
1545 | for_rx_tx(t) { |
1546 | if (!(chr->cr_mode & nx_mon_txrx2chmode(t))) { |
1547 | continue; |
1548 | } |
1549 | for (i = mna->mna_first[t]; |
1550 | i < mna->mna_last[t]; i++) { |
1551 | struct __kern_channel_ring *kring = |
1552 | &NAKR(na: pna, t)[i]; |
1553 | if (kring->ckr_n_monitors > 0) { |
1554 | error = EBUSY; |
1555 | SK_ERR("kr \"%s\" already monitored " |
1556 | "by \"%s\"" , kring->ckr_name, |
1557 | kring->ckr_monitors[0]->ckr_name); |
1558 | goto put_out; |
1559 | } |
1560 | } |
1561 | } |
1562 | mna->mna_up.na_activate = nx_mon_zcopy_na_activate; |
1563 | mna->mna_up.na_dtor = nx_mon_zcopy_na_dtor; |
1564 | /* |
1565 | * To have zero copy, we need to use the same memory allocator |
1566 | * as the monitored port. |
1567 | */ |
1568 | mna->mna_up.na_arena = pna->na_arena; |
1569 | skmem_arena_retain((&mna->mna_up)->na_arena); |
1570 | os_atomic_or(&mna->mna_up.na_flags, NAF_MEM_LOANED, relaxed); |
1571 | } else { |
1572 | /* normal monitors are incompatible with zero copy ones */ |
1573 | for_rx_tx(t) { |
1574 | if (!(chr->cr_mode & nx_mon_txrx2chmode(t))) { |
1575 | continue; |
1576 | } |
1577 | for (i = mna->mna_first[t]; |
1578 | i < mna->mna_last[t]; i++) { |
1579 | struct __kern_channel_ring *kring = |
1580 | &NAKR(na: pna, t)[i]; |
1581 | if (kring->ckr_n_monitors > 0 && |
1582 | KRNA(kring->ckr_monitors[0])-> |
1583 | na_activate == nx_mon_zcopy_na_activate) { |
1584 | error = EBUSY; |
1585 | SK_ERR("kr \"%s\" is busy (zcopy)" , |
1586 | kring->ckr_name); |
1587 | goto put_out; |
1588 | } |
1589 | } |
1590 | } |
1591 | mna->mna_up.na_activate = nx_mon_na_activate; |
1592 | mna->mna_up.na_dtor = nx_mon_na_dtor; |
1593 | /* |
1594 | * allocate a new (private) allocator instance using the |
1595 | * parent nexus configuration. |
1596 | */ |
1597 | if ((error = nx_monitor_prov_s.nxdom_prov_mem_new( |
1598 | NX_DOM_PROV(nx), nx, &mna->mna_up)) != 0) { |
1599 | ASSERT(mna->mna_up.na_arena == NULL); |
1600 | goto put_out; |
1601 | } |
1602 | ASSERT(mna->mna_up.na_arena != NULL); |
1603 | mna->mna_up.na_rxsync = nx_mon_na_rxsync; |
1604 | } |
1605 | *(nexus_meta_type_t *)(uintptr_t)&mna->mna_up.na_md_type = |
1606 | pna->na_md_type; |
1607 | *(nexus_meta_subtype_t *)(uintptr_t)&mna->mna_up.na_md_subtype = |
1608 | pna->na_md_subtype; |
1609 | |
1610 | /* a do-nothing txsync: monitors cannot be used to inject packets */ |
1611 | mna->mna_up.na_txsync = nx_mon_na_txsync; |
1612 | mna->mna_up.na_rxsync = nx_mon_na_rxsync; |
1613 | mna->mna_up.na_krings_create = nx_mon_na_krings_create; |
1614 | mna->mna_up.na_krings_delete = nx_mon_na_krings_delete; |
1615 | |
1616 | /* |
1617 | * We set the number of our na_rx_rings to be |
1618 | * max(na_num_tx_rings, na_num_rx_rings) in the parent |
1619 | */ |
1620 | na_set_nrings(na: &mna->mna_up, t: NR_TX, v: na_get_nrings(na: pna, t: NR_TX)); |
1621 | na_set_nrings(na: &mna->mna_up, t: NR_RX, v: na_get_nrings(na: pna, t: NR_RX)); |
1622 | if (na_get_nrings(na: pna, t: NR_TX) > na_get_nrings(na: pna, t: NR_RX)) { |
1623 | na_set_nrings(na: &mna->mna_up, t: NR_RX, v: na_get_nrings(na: pna, t: NR_TX)); |
1624 | } |
1625 | na_set_nslots(na: &mna->mna_up, t: NR_TX, v: na_get_nslots(na: pna, t: NR_TX)); |
1626 | na_set_nslots(na: &mna->mna_up, t: NR_RX, v: na_get_nslots(na: pna, t: NR_RX)); |
1627 | |
1628 | na_attach_common(&mna->mna_up, nx, &nx_monitor_prov_s); |
1629 | |
1630 | /* remember the traffic directions we have to monitor */ |
1631 | mna->mna_mode = (chr->cr_mode & CHMODE_MONITOR); |
1632 | |
1633 | /* keep the reference to the parent */ |
1634 | *na = &mna->mna_up; |
1635 | na_retain_locked(na: *na); |
1636 | |
1637 | /* sanity check: monitor and monitored adapters must share the nexus */ |
1638 | ASSERT((*na)->na_nx == pna->na_nx); |
1639 | |
1640 | #if SK_LOG |
1641 | SK_DF(SK_VERB_MONITOR, "created monitor adapter 0x%llx" , SK_KVA(mna)); |
1642 | SK_DF(SK_VERB_MONITOR, "na_name: \"%s\"" , mna->mna_up.na_name); |
1643 | SK_DF(SK_VERB_MONITOR, " UUID: %s" , |
1644 | sk_uuid_unparse(mna->mna_up.na_uuid, uuidstr)); |
1645 | SK_DF(SK_VERB_MONITOR, " nx: 0x%llx (\"%s\":\"%s\")" , |
1646 | SK_KVA(mna->mna_up.na_nx), NX_DOM(mna->mna_up.na_nx)->nxdom_name, |
1647 | NX_DOM_PROV(mna->mna_up.na_nx)->nxdom_prov_name); |
1648 | SK_DF(SK_VERB_MONITOR, " flags: 0x%b" , |
1649 | mna->mna_up.na_flags, NAF_BITS); |
1650 | SK_DF(SK_VERB_MONITOR, " rings: tx %u rx %u" , |
1651 | na_get_nrings(&mna->mna_up, NR_TX), |
1652 | na_get_nrings(&mna->mna_up, NR_RX)); |
1653 | SK_DF(SK_VERB_MONITOR, " slots: tx %u rx %u" , |
1654 | na_get_nslots(&mna->mna_up, NR_TX), |
1655 | na_get_nslots(&mna->mna_up, NR_RX)); |
1656 | #if CONFIG_NEXUS_USER_PIPE |
1657 | SK_DF(SK_VERB_MONITOR, " next_pipe: %u" , mna->mna_up.na_next_pipe); |
1658 | SK_DF(SK_VERB_MONITOR, " max_pipes: %u" , mna->mna_up.na_max_pipes); |
1659 | #endif /* CONFIG_NEXUS_USER_PIPE */ |
1660 | SK_DF(SK_VERB_MONITOR, " mna_tx_rings: [%u,%u)" , mna->mna_first[NR_TX], |
1661 | mna->mna_last[NR_TX]); |
1662 | SK_DF(SK_VERB_MONITOR, " mna_rx_rings: [%u,%u)" , mna->mna_first[NR_RX], |
1663 | mna->mna_last[NR_RX]); |
1664 | SK_DF(SK_VERB_MONITOR, " mna_mode: %u" , mna->mna_mode); |
1665 | #endif /* SK_LOG */ |
1666 | |
1667 | return 0; |
1668 | |
1669 | put_out: |
1670 | if (pna != NULL) { |
1671 | (void) na_release_locked(na: pna); |
1672 | pna = NULL; |
1673 | } |
1674 | NA_FREE(&mna->mna_up); |
1675 | return error; |
1676 | } |
1677 | |
1678 | static void |
1679 | nx_mon_quantum_copy_64x(const enum txrx t, kern_packet_t dph, |
1680 | const uint16_t doff, kern_packet_t sph, const uint16_t soff, |
1681 | const uint32_t len, const boolean_t unused_arg1, |
1682 | const uint16_t unused_arg2, const uint16_t unused_arg3, |
1683 | const boolean_t unused_arg4) |
1684 | { |
1685 | /* for function prototype parity with pkt_copy_from_pkt_t */ |
1686 | #pragma unused(unused_arg1, unused_arg2, unused_arg3, unused_arg4) |
1687 | #pragma unused(t, doff, soff) |
1688 | struct __kern_quantum *dqum = SK_PTR_ADDR_KQUM(dph); |
1689 | struct __kern_quantum *squm = SK_PTR_ADDR_KQUM(sph); |
1690 | uint8_t *sbuf, *dbuf; |
1691 | |
1692 | ASSERT(METADATA_TYPE(squm) == NEXUS_META_TYPE_QUANTUM); |
1693 | ASSERT(METADATA_TYPE(squm) == METADATA_TYPE(dqum)); |
1694 | VERIFY(IS_P2ALIGNED(len, 64)); |
1695 | |
1696 | MD_BUFLET_ADDR(squm, sbuf); |
1697 | MD_BUFLET_ADDR(dqum, dbuf); |
1698 | VERIFY(IS_P2ALIGNED(dbuf, sizeof(uint64_t))); |
1699 | |
1700 | if (__probable(IS_P2ALIGNED(sbuf, sizeof(uint64_t)))) { |
1701 | sk_copy64_64x(src: (uint64_t *)(void *)sbuf, |
1702 | dst: (uint64_t *)(void *)dbuf, l: len); |
1703 | } else { |
1704 | bcopy(src: sbuf, dst: dbuf, n: len); |
1705 | } |
1706 | /* |
1707 | * This copy routine only copies to/from a buflet, so the length |
1708 | * is guaranteed be <= the size of a buflet. |
1709 | */ |
1710 | VERIFY(len <= UINT16_MAX); |
1711 | METADATA_SET_LEN(dqum, (uint16_t)len, 0); |
1712 | } |
1713 | |