1/*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * if_fake.c
31 * - fake network interface used for testing
32 * - "feth" (e.g. "feth0", "feth1") is a virtual ethernet interface that allows
33 * two instances to have their output/input paths "crossed-over" so that
34 * output on one is input on the other
35 */
36
37/*
38 * Modification History:
39 *
40 * September 9, 2015 Dieter Siegmund (dieter@apple.com)
41 * - created
42 */
43
44#include <sys/param.h>
45#include <sys/kernel.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/queue.h>
49#include <sys/socket.h>
50#include <sys/sockio.h>
51#include <sys/sysctl.h>
52#include <sys/systm.h>
53#include <sys/kern_event.h>
54#include <sys/mcache.h>
55#include <sys/syslog.h>
56
57#include <net/bpf.h>
58#include <net/ethernet.h>
59#include <net/if.h>
60#include <net/if_vlan_var.h>
61#include <net/if_fake_var.h>
62#include <net/if_arp.h>
63#include <net/if_dl.h>
64#include <net/if_ether.h>
65#include <net/if_types.h>
66#include <libkern/OSAtomic.h>
67
68#include <net/dlil.h>
69
70#include <net/kpi_interface.h>
71#include <net/kpi_protocol.h>
72
73#include <kern/locks.h>
74#include <kern/zalloc.h>
75
76#include <mach/mach_time.h>
77
78#ifdef INET
79#include <netinet/in.h>
80#include <netinet/if_ether.h>
81#endif
82
83#include <net/if_media.h>
84#include <net/ether_if_module.h>
85#if SKYWALK
86#include <skywalk/os_skywalk_private.h>
87#include <skywalk/nexus/netif/nx_netif.h>
88#include <skywalk/channel/channel_var.h>
89#endif /* SKYWALK */
90
91static boolean_t
92is_power_of_two(unsigned int val)
93{
94 return (val & (val - 1)) == 0;
95}
96
97#define FAKE_ETHER_NAME "feth"
98
99SYSCTL_DECL(_net_link);
100SYSCTL_NODE(_net_link, OID_AUTO, fake, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
101 "Fake interface");
102
103static int if_fake_txstart = 1;
104SYSCTL_INT(_net_link_fake, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
105 &if_fake_txstart, 0, "Fake interface TXSTART mode");
106
107static int if_fake_hwcsum = 0;
108SYSCTL_INT(_net_link_fake, OID_AUTO, hwcsum, CTLFLAG_RW | CTLFLAG_LOCKED,
109 &if_fake_hwcsum, 0, "Fake interface simulate hardware checksum");
110
111static int if_fake_nxattach = 0;
112SYSCTL_INT(_net_link_fake, OID_AUTO, nxattach, CTLFLAG_RW | CTLFLAG_LOCKED,
113 &if_fake_nxattach, 0, "Fake interface auto-attach nexus");
114
115static int if_fake_bsd_mode = 1;
116SYSCTL_INT(_net_link_fake, OID_AUTO, bsd_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
117 &if_fake_bsd_mode, 0, "Fake interface attach as BSD interface");
118
119static int if_fake_debug = 0;
120SYSCTL_INT(_net_link_fake, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
121 &if_fake_debug, 0, "Fake interface debug logs");
122
123#define FETH_DPRINTF(fmt, ...) \
124 { if (if_fake_debug != 0) printf("%s " fmt, __func__, ## __VA_ARGS__); }
125
126static int if_fake_wmm_mode = 0;
127SYSCTL_INT(_net_link_fake, OID_AUTO, wmm_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
128 &if_fake_wmm_mode, 0, "Fake interface in 802.11 WMM mode");
129
130static int if_fake_multibuflet = 0;
131SYSCTL_INT(_net_link_fake, OID_AUTO, multibuflet, CTLFLAG_RW | CTLFLAG_LOCKED,
132 &if_fake_multibuflet, 0, "Fake interface using multi-buflet packets");
133
134static int if_fake_low_latency = 0;
135SYSCTL_INT(_net_link_fake, OID_AUTO, low_latency, CTLFLAG_RW | CTLFLAG_LOCKED,
136 &if_fake_low_latency, 0, "Fake interface with a low latency qset");
137
138static int if_fake_switch_combined_mode = 0;
139SYSCTL_INT(_net_link_fake, OID_AUTO, switch_combined_mode,
140 CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_combined_mode, 0,
141 "Switch a qset between combined and separate mode during dequeues");
142
143static int if_fake_switch_mode_frequency = 10;
144SYSCTL_INT(_net_link_fake, OID_AUTO, switch_mode_frequency,
145 CTLFLAG_RW | CTLFLAG_LOCKED, &if_fake_switch_mode_frequency, 0,
146 "The number of dequeues before we switch between the combined and separated mode");
147
148static int if_fake_tso_support = 0;
149SYSCTL_INT(_net_link_fake, OID_AUTO, tso_support, CTLFLAG_RW | CTLFLAG_LOCKED,
150 &if_fake_tso_support, 0, "Fake interface with support for TSO offload");
151
152#define DEFAULT_EXPIRATION_THRESHOLD 500 /* usec */
153static int if_fake_expiration_threshold_us = DEFAULT_EXPIRATION_THRESHOLD;
154SYSCTL_INT(_net_link_fake, OID_AUTO, expiration_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
155 &if_fake_expiration_threshold_us, DEFAULT_EXPIRATION_THRESHOLD,
156 "Expiration threshold (usec) for expiration testing");
157
158static int if_fake_lro = 0;
159SYSCTL_INT(_net_link_fake, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_LOCKED,
160 &if_fake_lro, 0, "Fake interface report LRO capability");
161
162typedef enum {
163 IFF_PP_MODE_GLOBAL = 0, /* share a global pool */
164 IFF_PP_MODE_PRIVATE = 1, /* creates its own rx/tx pool */
165 IFF_PP_MODE_PRIVATE_SPLIT = 2, /* creates its own split rx & tx pool */
166} iff_pktpool_mode_t;
167static iff_pktpool_mode_t if_fake_pktpool_mode = IFF_PP_MODE_GLOBAL;
168SYSCTL_INT(_net_link_fake, OID_AUTO, pktpool_mode, CTLFLAG_RW | CTLFLAG_LOCKED,
169 &if_fake_pktpool_mode, IFF_PP_MODE_GLOBAL,
170 "Fake interface packet pool mode (0 global, 1 private, 2 private split");
171
172#define FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX 512
173#define FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF 96
174static int if_fake_link_layer_aggregation_factor =
175 FETH_LINK_LAYER_AGGRETATION_FACTOR_DEF;
176static int
177feth_link_layer_aggregation_factor_sysctl SYSCTL_HANDLER_ARGS
178{
179#pragma unused(oidp, arg1, arg2)
180 unsigned int new_value;
181 int changed;
182 int error;
183
184 error = sysctl_io_number(req, bigValue: if_fake_link_layer_aggregation_factor,
185 valueSize: sizeof(if_fake_link_layer_aggregation_factor), pValue: &new_value,
186 changed: &changed);
187 if (error == 0 && changed != 0) {
188 if (new_value <= 0 ||
189 new_value > FETH_LINK_LAYER_AGGRETATION_FACTOR_MAX) {
190 return EINVAL;
191 }
192 if_fake_link_layer_aggregation_factor = new_value;
193 }
194 return error;
195}
196
197SYSCTL_PROC(_net_link_fake, OID_AUTO, link_layer_aggregation_factor,
198 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
199 0, 0, feth_link_layer_aggregation_factor_sysctl, "IU",
200 "Fake interface link layer aggregation factor");
201
202#define FETH_TX_HEADROOM_MAX 32
203static unsigned int if_fake_tx_headroom = FETH_TX_HEADROOM_MAX;
204static int
205feth_tx_headroom_sysctl SYSCTL_HANDLER_ARGS
206{
207#pragma unused(oidp, arg1, arg2)
208 unsigned int new_value;
209 int changed;
210 int error;
211
212 error = sysctl_io_number(req, bigValue: if_fake_tx_headroom,
213 valueSize: sizeof(if_fake_tx_headroom), pValue: &new_value, changed: &changed);
214 if (error == 0 && changed != 0) {
215 if (new_value > FETH_TX_HEADROOM_MAX ||
216 (new_value % 8) != 0) {
217 return EINVAL;
218 }
219 if_fake_tx_headroom = new_value;
220 }
221 return 0;
222}
223
224SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_headroom,
225 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
226 0, 0, feth_tx_headroom_sysctl, "IU", "Fake ethernet Tx headroom");
227
228static int if_fake_fcs = 0;
229SYSCTL_INT(_net_link_fake, OID_AUTO, fcs, CTLFLAG_RW | CTLFLAG_LOCKED,
230 &if_fake_fcs, 0, "Fake interface using frame check sequence");
231
232#define FETH_TRAILER_LENGTH_MAX 28
233char feth_trailer[FETH_TRAILER_LENGTH_MAX + 1] = "trailertrailertrailertrailer";
234static unsigned int if_fake_trailer_length = 0;
235static int
236feth_trailer_length_sysctl SYSCTL_HANDLER_ARGS
237{
238#pragma unused(oidp, arg1, arg2)
239 unsigned int new_value;
240 int changed;
241 int error;
242
243 error = sysctl_io_number(req, bigValue: if_fake_trailer_length,
244 valueSize: sizeof(if_fake_trailer_length), pValue: &new_value, changed: &changed);
245 if (error == 0 && changed != 0) {
246 if (new_value > FETH_TRAILER_LENGTH_MAX) {
247 return EINVAL;
248 }
249 if_fake_trailer_length = new_value;
250 }
251 return 0;
252}
253
254SYSCTL_PROC(_net_link_fake, OID_AUTO, trailer_length,
255 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
256 feth_trailer_length_sysctl, "IU", "Fake interface frame trailer length");
257
258/* sysctl net.link.fake.max_mtu */
259#define FETH_MAX_MTU_DEFAULT 2048
260#define FETH_MAX_MTU_MAX ((16 * 1024) - ETHER_HDR_LEN)
261
262static unsigned int if_fake_max_mtu = FETH_MAX_MTU_DEFAULT;
263
264/* sysctl net.link.fake.buflet_size */
265#define FETH_BUFLET_SIZE_MIN 512
266#define FETH_BUFLET_SIZE_MAX (32 * 1024)
267#define FETH_TSO_BUFLET_SIZE (16 * 1024)
268
269static unsigned int if_fake_buflet_size = FETH_BUFLET_SIZE_MIN;
270static unsigned int if_fake_tso_buffer_size = FETH_TSO_BUFLET_SIZE;
271
272static int
273feth_tso_buffer_size_sysctl SYSCTL_HANDLER_ARGS
274{
275#pragma unused(oidp, arg1, arg2)
276 unsigned int new_value;
277 int changed;
278 int error;
279
280 error = sysctl_io_number(req, bigValue: if_fake_tso_buffer_size,
281 valueSize: sizeof(if_fake_tso_buffer_size), pValue: &new_value, changed: &changed);
282 if (error == 0 && changed != 0) {
283 /* must be a power of 2 between min and max */
284 if (new_value > FETH_BUFLET_SIZE_MAX ||
285 new_value < FETH_BUFLET_SIZE_MIN ||
286 !is_power_of_two(val: new_value)) {
287 return EINVAL;
288 }
289 if_fake_tso_buffer_size = new_value;
290 }
291 return 0;
292}
293
294SYSCTL_PROC(_net_link_fake, OID_AUTO, tso_buf_size,
295 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
296 0, 0, feth_tso_buffer_size_sysctl, "IU", "Fake interface TSO buffer size");
297
298static int
299feth_max_mtu_sysctl SYSCTL_HANDLER_ARGS
300{
301#pragma unused(oidp, arg1, arg2)
302 unsigned int new_value;
303 int changed;
304 int error;
305
306 error = sysctl_io_number(req, bigValue: if_fake_max_mtu,
307 valueSize: sizeof(if_fake_max_mtu), pValue: &new_value, changed: &changed);
308 if (error == 0 && changed != 0) {
309 if (new_value > FETH_MAX_MTU_MAX ||
310 new_value < ETHERMTU ||
311 new_value <= if_fake_buflet_size) {
312 return EINVAL;
313 }
314 if_fake_max_mtu = new_value;
315 }
316 return 0;
317}
318
319SYSCTL_PROC(_net_link_fake, OID_AUTO, max_mtu,
320 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
321 0, 0, feth_max_mtu_sysctl, "IU", "Fake interface maximum MTU");
322
323static int
324feth_buflet_size_sysctl SYSCTL_HANDLER_ARGS
325{
326#pragma unused(oidp, arg1, arg2)
327 unsigned int new_value;
328 int changed;
329 int error;
330
331 error = sysctl_io_number(req, bigValue: if_fake_buflet_size,
332 valueSize: sizeof(if_fake_buflet_size), pValue: &new_value, changed: &changed);
333 if (error == 0 && changed != 0) {
334 /* must be a power of 2 between min and max */
335 if (new_value > FETH_BUFLET_SIZE_MAX ||
336 new_value < FETH_BUFLET_SIZE_MIN ||
337 !is_power_of_two(val: new_value) ||
338 new_value >= if_fake_max_mtu) {
339 return EINVAL;
340 }
341 if_fake_buflet_size = new_value;
342 }
343 return 0;
344}
345
346SYSCTL_PROC(_net_link_fake, OID_AUTO, buflet_size,
347 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
348 0, 0, feth_buflet_size_sysctl, "IU", "Fake interface buflet size");
349
350static unsigned int if_fake_user_access = 0;
351
352static int
353feth_user_access_sysctl SYSCTL_HANDLER_ARGS
354{
355#pragma unused(oidp, arg1, arg2)
356 unsigned int new_value;
357 int changed;
358 int error;
359
360 error = sysctl_io_number(req, bigValue: if_fake_user_access,
361 valueSize: sizeof(if_fake_user_access), pValue: &new_value, changed: &changed);
362 if (error == 0 && changed != 0) {
363 if (new_value != 0) {
364 if (new_value != 1) {
365 return EINVAL;
366 }
367 }
368 if_fake_user_access = new_value;
369 }
370 return 0;
371}
372
373SYSCTL_PROC(_net_link_fake, OID_AUTO, user_access,
374 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
375 0, 0, feth_user_access_sysctl, "IU", "Fake interface user access");
376
377/* sysctl net.link.fake.if_adv_intvl (unit: millisecond) */
378#define FETH_IF_ADV_INTVL_MIN 10
379#define FETH_IF_ADV_INTVL_MAX INT_MAX
380
381static int if_fake_if_adv_interval = 0; /* no interface advisory */
382static int
383feth_if_adv_interval_sysctl SYSCTL_HANDLER_ARGS
384{
385#pragma unused(oidp, arg1, arg2)
386 unsigned int new_value;
387 int changed;
388 int error;
389
390 error = sysctl_io_number(req, bigValue: if_fake_if_adv_interval,
391 valueSize: sizeof(if_fake_if_adv_interval), pValue: &new_value, changed: &changed);
392 if (error == 0 && changed != 0) {
393 if ((new_value != 0) && (new_value > FETH_IF_ADV_INTVL_MAX ||
394 new_value < FETH_IF_ADV_INTVL_MIN)) {
395 return EINVAL;
396 }
397 if_fake_if_adv_interval = new_value;
398 }
399 return 0;
400}
401
402SYSCTL_PROC(_net_link_fake, OID_AUTO, if_adv_intvl,
403 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
404 feth_if_adv_interval_sysctl, "IU",
405 "Fake interface will generate interface advisories reports at the specified interval in ms");
406
407/* sysctl net.link.fake.tx_drops */
408/*
409 * Fake ethernet will drop packet on the transmit path at the specified
410 * rate, i.e drop one in every if_fake_tx_drops number of packets.
411 */
412#define FETH_TX_DROPS_MIN 0
413#define FETH_TX_DROPS_MAX INT_MAX
414static int if_fake_tx_drops = 0; /* no packets are dropped */
415static int
416feth_fake_tx_drops_sysctl SYSCTL_HANDLER_ARGS
417{
418#pragma unused(oidp, arg1, arg2)
419 unsigned int new_value;
420 int changed;
421 int error;
422
423 error = sysctl_io_number(req, bigValue: if_fake_tx_drops,
424 valueSize: sizeof(if_fake_tx_drops), pValue: &new_value, changed: &changed);
425 if (error == 0 && changed != 0) {
426 if (new_value > FETH_TX_DROPS_MAX ||
427 new_value < FETH_TX_DROPS_MIN) {
428 return EINVAL;
429 }
430 if_fake_tx_drops = new_value;
431 }
432 return 0;
433}
434
435SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_drops,
436 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
437 feth_fake_tx_drops_sysctl, "IU",
438 "Fake interface will intermittently drop packets on Tx path");
439
440/* sysctl.net.link.fake.tx_exp_policy */
441
442typedef enum {
443 IFF_TX_EXP_POLICY_DISABLED = 0, /* Expiry notification disabled */
444 IFF_TX_EXP_POLICY_DROP_AND_NOTIFY = 1, /* Expiry notification enabled; drop + notify mode */
445 IFF_TX_EXP_POLICY_NOTIFY_ONLY = 2, /* Expiry notification enabled; notify only mode */
446 IFF_TX_EXP_POLICY_METADATA = 3, /* Expiry notification enabled; use packet metadata */
447} iff_tx_exp_policy_t;
448static iff_tx_exp_policy_t if_fake_tx_exp_policy = IFF_TX_EXP_POLICY_DISABLED;
449
450static int
451feth_fake_tx_exp_policy_sysctl SYSCTL_HANDLER_ARGS
452{
453#pragma unused(oidp, arg1, arg2)
454 unsigned int new_value;
455 int changed;
456 int error;
457
458 error = sysctl_io_number(req, bigValue: if_fake_tx_exp_policy,
459 valueSize: sizeof(if_fake_tx_exp_policy), pValue: &new_value, changed: &changed);
460 FETH_DPRINTF("if_fake_tx_exp_policy: %u -> %u (%d)",
461 if_fake_tx_exp_policy, new_value, changed);
462 if (error == 0 && changed != 0) {
463 if (new_value > IFF_TX_EXP_POLICY_METADATA ||
464 new_value < IFF_TX_EXP_POLICY_DISABLED) {
465 return EINVAL;
466 }
467 if_fake_tx_exp_policy = new_value;
468 }
469 return 0;
470}
471SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_exp_policy,
472 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
473 feth_fake_tx_exp_policy_sysctl, "IU",
474 "Fake interface handling policy for expired TX attempts "
475 "(0 disabled, 1 drop and notify, 2 notify only, 3 packet metadata)");
476
477/* sysctl net.link.fake.tx_completion_mode */
478typedef enum {
479 IFF_TX_COMPL_MODE_SYNC = 0,
480 IFF_TX_COMPL_MODE_ASYNC = 1,
481} iff_tx_completion_mode_t;
482static iff_tx_completion_mode_t if_tx_completion_mode = IFF_TX_COMPL_MODE_SYNC;
483static int
484feth_fake_tx_completion_mode_sysctl SYSCTL_HANDLER_ARGS
485{
486#pragma unused(oidp, arg1, arg2)
487 unsigned int new_value;
488 int changed;
489 int error;
490
491 error = sysctl_io_number(req, bigValue: if_tx_completion_mode,
492 valueSize: sizeof(if_tx_completion_mode), pValue: &new_value, changed: &changed);
493 if (error == 0 && changed != 0) {
494 if (new_value > IFF_TX_COMPL_MODE_ASYNC ||
495 new_value < IFF_TX_COMPL_MODE_SYNC) {
496 return EINVAL;
497 }
498 if_tx_completion_mode = new_value;
499 }
500 return 0;
501}
502SYSCTL_PROC(_net_link_fake, OID_AUTO, tx_completion_mode,
503 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
504 feth_fake_tx_completion_mode_sysctl, "IU",
505 "Fake interface tx completion mode (0 synchronous, 1 asynchronous)");
506
507/* sysctl net.link.fake.llink_cnt */
508
509/* The maximum number of logical links (including default link) */
510#define FETH_MAX_LLINKS 16
511/*
512 * The default number of logical links (including default link).
513 * Zero means logical link mode is disabled.
514 */
515#define FETH_DEF_LLINKS 0
516
517static uint32_t if_fake_llink_cnt = FETH_DEF_LLINKS;
518static int
519feth_fake_llink_cnt_sysctl SYSCTL_HANDLER_ARGS
520{
521#pragma unused(oidp, arg1, arg2)
522 unsigned int new_value;
523 int changed;
524 int error;
525
526 error = sysctl_io_number(req, bigValue: if_fake_llink_cnt,
527 valueSize: sizeof(if_fake_llink_cnt), pValue: &new_value, changed: &changed);
528 if (error == 0 && changed != 0) {
529 if (new_value > FETH_MAX_LLINKS) {
530 return EINVAL;
531 }
532 if_fake_llink_cnt = new_value;
533 }
534 return 0;
535}
536
537SYSCTL_PROC(_net_link_fake, OID_AUTO, llink_cnt,
538 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
539 feth_fake_llink_cnt_sysctl, "IU",
540 "Fake interface logical link count");
541
542/* sysctl net.link.fake.qset_cnt */
543
544/* The maximum number of qsets for each logical link */
545#define FETH_MAX_QSETS 16
546/* The default number of qsets for each logical link */
547#define FETH_DEF_QSETS 4
548
549static uint32_t if_fake_qset_cnt = FETH_DEF_QSETS;
550static int
551feth_fake_qset_cnt_sysctl SYSCTL_HANDLER_ARGS
552{
553#pragma unused(oidp, arg1, arg2)
554 unsigned int new_value;
555 int changed;
556 int error;
557
558 error = sysctl_io_number(req, bigValue: if_fake_qset_cnt,
559 valueSize: sizeof(if_fake_qset_cnt), pValue: &new_value, changed: &changed);
560 if (error == 0 && changed != 0) {
561 if (new_value == 0 ||
562 new_value > FETH_MAX_QSETS) {
563 return EINVAL;
564 }
565 if_fake_qset_cnt = new_value;
566 }
567 return 0;
568}
569
570SYSCTL_PROC(_net_link_fake, OID_AUTO, qset_cnt,
571 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
572 feth_fake_qset_cnt_sysctl, "IU",
573 "Fake interface queue set count");
574
575/**
576** virtual ethernet structures, types
577**/
578
579#define IFF_NUM_TX_RINGS_WMM_MODE 4
580#define IFF_NUM_RX_RINGS_WMM_MODE 1
581#define IFF_MAX_TX_RINGS IFF_NUM_TX_RINGS_WMM_MODE
582#define IFF_MAX_RX_RINGS IFF_NUM_RX_RINGS_WMM_MODE
583#define IFF_NUM_TX_QUEUES_WMM_MODE 4
584#define IFF_NUM_RX_QUEUES_WMM_MODE 1
585#define IFF_MAX_TX_QUEUES IFF_NUM_TX_QUEUES_WMM_MODE
586#define IFF_MAX_RX_QUEUES IFF_NUM_RX_QUEUES_WMM_MODE
587
588#define IFF_MAX_BATCH_SIZE 32
589
590typedef uint16_t iff_flags_t;
591#define IFF_FLAGS_HWCSUM 0x0001
592#define IFF_FLAGS_BSD_MODE 0x0002
593#define IFF_FLAGS_DETACHING 0x0004
594#define IFF_FLAGS_WMM_MODE 0x0008
595#define IFF_FLAGS_MULTIBUFLETS 0x0010
596#define IFF_FLAGS_TSO_SUPPORT 0x0020
597#define IFF_FLAGS_LRO 0x0040
598
599#if SKYWALK
600
601typedef struct {
602 uuid_t fnx_provider;
603 uuid_t fnx_instance;
604} fake_nx, *fake_nx_t;
605
606typedef struct {
607 kern_netif_queue_t fq_queue;
608} fake_queue;
609
610typedef struct {
611 kern_netif_qset_t fqs_qset; /* provided by xnu */
612 fake_queue fqs_rx_queue[IFF_MAX_RX_QUEUES];
613 fake_queue fqs_tx_queue[IFF_MAX_TX_QUEUES];
614 uint32_t fqs_rx_queue_cnt;
615 uint32_t fqs_tx_queue_cnt;
616 uint32_t fqs_llink_idx;
617 uint32_t fqs_idx;
618 uint32_t fqs_dequeue_cnt;
619 uint64_t fqs_id;
620 boolean_t fqs_combined_mode;
621} fake_qset;
622
623typedef struct {
624 uint64_t fl_id;
625 uint32_t fl_idx;
626 uint32_t fl_qset_cnt;
627 fake_qset fl_qset[FETH_MAX_QSETS];
628} fake_llink;
629
630static kern_pbufpool_t S_pp;
631
632#define IFF_TT_OUTPUT 0x01 /* generate trace_tag on output */
633#define IFF_TT_INPUT 0x02 /* generate trace_tag on input */
634static int if_fake_trace_tag_flags = 0;
635SYSCTL_INT(_net_link_fake, OID_AUTO, trace_tag, CTLFLAG_RW | CTLFLAG_LOCKED,
636 &if_fake_trace_tag_flags, 0, "Fake interface generate trace_tag");
637static packet_trace_tag_t if_fake_trace_tag_current = 1;
638
639#endif /* SKYWALK */
640
641struct if_fake {
642 char iff_name[IFNAMSIZ]; /* our unique id */
643 ifnet_t iff_ifp;
644 iff_flags_t iff_flags;
645 uint32_t iff_retain_count;
646 ifnet_t iff_peer; /* the other end */
647 int iff_media_current;
648 int iff_media_active;
649 uint32_t iff_media_count;
650 int iff_media_list[IF_FAKE_MEDIA_LIST_MAX];
651 struct mbuf * iff_pending_tx_packet;
652 boolean_t iff_start_busy;
653 unsigned int iff_max_mtu;
654 uint32_t iff_fcs;
655 uint32_t iff_trailer_length;
656#if SKYWALK
657 fake_nx iff_nx;
658 struct netif_stats *iff_nifs;
659 uint32_t iff_nifs_ref;
660 uint32_t iff_llink_cnt;
661 kern_channel_ring_t iff_rx_ring[IFF_MAX_RX_RINGS];
662 kern_channel_ring_t iff_tx_ring[IFF_MAX_TX_RINGS];
663 fake_llink *iff_llink __counted_by(FETH_MAX_LLINKS);
664 thread_call_t iff_doorbell_tcall;
665 thread_call_t iff_if_adv_tcall;
666 boolean_t iff_doorbell_tcall_active;
667 boolean_t iff_waiting_for_tcall;
668 boolean_t iff_channel_connected;
669 iff_pktpool_mode_t iff_pp_mode;
670 kern_pbufpool_t iff_rx_pp;
671 kern_pbufpool_t iff_tx_pp;
672 uint32_t iff_tx_headroom;
673 unsigned int iff_adv_interval;
674 uint32_t iff_tx_drop_rate;
675 uint32_t iff_tx_pkts_count;
676 iff_tx_completion_mode_t iff_tx_completion_mode;
677 bool iff_intf_adv_enabled;
678 void *iff_intf_adv_kern_ctx;
679 kern_nexus_capab_interface_advisory_notify_fn_t iff_intf_adv_notify;
680 iff_tx_exp_policy_t iff_tx_exp_policy;
681#endif /* SKYWALK */
682};
683
684typedef struct if_fake * if_fake_ref;
685
686static if_fake_ref
687ifnet_get_if_fake(ifnet_t ifp);
688
689static inline boolean_t
690feth_in_bsd_mode(if_fake_ref fakeif)
691{
692 return (fakeif->iff_flags & IFF_FLAGS_BSD_MODE) != 0;
693}
694
695static inline void
696feth_set_detaching(if_fake_ref fakeif)
697{
698 fakeif->iff_flags |= IFF_FLAGS_DETACHING;
699}
700
701static inline boolean_t
702feth_is_detaching(if_fake_ref fakeif)
703{
704 return (fakeif->iff_flags & IFF_FLAGS_DETACHING) != 0;
705}
706
707static int
708feth_enable_dequeue_stall(ifnet_t ifp, uint32_t enable)
709{
710 int error;
711
712 if (enable != 0) {
713 error = ifnet_disable_output(interface: ifp);
714 } else {
715 error = ifnet_enable_output(interface: ifp);
716 }
717
718 return error;
719}
720
721#if SKYWALK
722static inline boolean_t
723feth_in_wmm_mode(if_fake_ref fakeif)
724{
725 return (fakeif->iff_flags & IFF_FLAGS_WMM_MODE) != 0;
726}
727
728static inline boolean_t
729feth_using_multibuflets(if_fake_ref fakeif)
730{
731 return (fakeif->iff_flags & IFF_FLAGS_MULTIBUFLETS) != 0;
732}
733static void feth_detach_netif_nexus(if_fake_ref fakeif);
734
735static inline boolean_t
736feth_has_intf_advisory_configured(if_fake_ref fakeif)
737{
738 return fakeif->iff_adv_interval > 0;
739}
740
741static inline bool
742feth_supports_tso(if_fake_ref fakeif)
743{
744 return (fakeif->iff_flags & IFF_FLAGS_TSO_SUPPORT) != 0;
745}
746#endif /* SKYWALK */
747
748#define FETH_MAXUNIT IF_MAXUNIT
749#define FETH_ZONE_MAX_ELEM MIN(IFNETS_MAX, FETH_MAXUNIT)
750
751static int feth_clone_create(struct if_clone *, u_int32_t, void *);
752static int feth_clone_destroy(ifnet_t);
753static int feth_output(ifnet_t ifp, struct mbuf *m);
754static void feth_start(ifnet_t ifp);
755static int feth_ioctl(ifnet_t ifp, u_long cmd, void * addr);
756static int feth_config(ifnet_t ifp, ifnet_t peer);
757static void feth_if_free(ifnet_t ifp);
758static void feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp);
759static void feth_free(if_fake_ref fakeif);
760
761static struct if_clone
762 feth_cloner = IF_CLONE_INITIALIZER(FAKE_ETHER_NAME,
763 feth_clone_create,
764 feth_clone_destroy,
765 0,
766 FETH_MAXUNIT);
767static void interface_link_event(ifnet_t ifp, u_int32_t event_code);
768
769/* some media words to pretend to be ethernet */
770static int default_media_words[] = {
771 IFM_MAKEWORD(IFM_ETHER, 0, 0, 0),
772 IFM_MAKEWORD(IFM_ETHER, IFM_10G_T, IFM_FDX, 0),
773 IFM_MAKEWORD(IFM_ETHER, IFM_2500_T, IFM_FDX, 0),
774 IFM_MAKEWORD(IFM_ETHER, IFM_5000_T, IFM_FDX, 0),
775
776 IFM_MAKEWORD(IFM_ETHER, IFM_10G_KX4, IFM_FDX, 0),
777 IFM_MAKEWORD(IFM_ETHER, IFM_20G_KR2, IFM_FDX, 0),
778 IFM_MAKEWORD(IFM_ETHER, IFM_2500_SX, IFM_FDX, 0),
779 IFM_MAKEWORD(IFM_ETHER, IFM_25G_KR, IFM_FDX, 0),
780 IFM_MAKEWORD(IFM_ETHER, IFM_40G_SR4, IFM_FDX, 0),
781 IFM_MAKEWORD(IFM_ETHER, IFM_50G_CR2, IFM_FDX, 0),
782 IFM_MAKEWORD(IFM_ETHER, IFM_56G_R4, IFM_FDX, 0),
783 IFM_MAKEWORD(IFM_ETHER, IFM_100G_CR4, IFM_FDX, 0),
784 IFM_MAKEWORD(IFM_ETHER, IFM_400G_AUI8, IFM_FDX, 0),
785};
786#define default_media_words_count (sizeof(default_media_words) \
787 / sizeof (default_media_words[0]))
788
789/**
790** veth locks
791**/
792
793static LCK_GRP_DECLARE(feth_lck_grp, "fake");
794static LCK_MTX_DECLARE(feth_lck_mtx, &feth_lck_grp);
795
796static inline void
797feth_lock(void)
798{
799 lck_mtx_lock(lck: &feth_lck_mtx);
800}
801
802static inline void
803feth_unlock(void)
804{
805 lck_mtx_unlock(lck: &feth_lck_mtx);
806}
807
808static inline int
809get_max_mtu(int bsd_mode, unsigned int max_mtu)
810{
811 unsigned int mtu;
812
813 if (bsd_mode != 0) {
814 mtu = (njcl > 0) ? (M16KCLBYTES - ETHER_HDR_LEN)
815 : MBIGCLBYTES - ETHER_HDR_LEN;
816 if (mtu > max_mtu) {
817 mtu = max_mtu;
818 }
819 } else {
820 mtu = max_mtu;
821 }
822 return mtu;
823}
824
825static inline unsigned int
826feth_max_mtu(ifnet_t ifp)
827{
828 if_fake_ref fakeif;
829 unsigned int max_mtu = ETHERMTU;
830
831 feth_lock();
832 fakeif = ifnet_get_if_fake(ifp);
833 if (fakeif != NULL) {
834 max_mtu = fakeif->iff_max_mtu;
835 }
836 feth_unlock();
837 return max_mtu;
838}
839
840static void
841feth_free(if_fake_ref fakeif)
842{
843 VERIFY(fakeif->iff_retain_count == 0);
844 if (feth_in_bsd_mode(fakeif)) {
845 if (fakeif->iff_pending_tx_packet) {
846 m_freem(fakeif->iff_pending_tx_packet);
847 }
848 }
849#if SKYWALK
850 else {
851 if (fakeif->iff_pp_mode == IFF_PP_MODE_GLOBAL) {
852 VERIFY(fakeif->iff_rx_pp == S_pp);
853 VERIFY(fakeif->iff_tx_pp == S_pp);
854 pp_release(fakeif->iff_rx_pp);
855 fakeif->iff_rx_pp = NULL;
856 pp_release(fakeif->iff_tx_pp);
857 fakeif->iff_tx_pp = NULL;
858 feth_lock();
859 if (S_pp->pp_refcnt == 1) {
860 pp_release(S_pp);
861 S_pp = NULL;
862 }
863 feth_unlock();
864 } else {
865 if (fakeif->iff_rx_pp != NULL) {
866 pp_release(fakeif->iff_rx_pp);
867 fakeif->iff_rx_pp = NULL;
868 }
869 if (fakeif->iff_tx_pp != NULL) {
870 pp_release(fakeif->iff_tx_pp);
871 fakeif->iff_tx_pp = NULL;
872 }
873 }
874 }
875#endif /* SKYWALK */
876
877 FETH_DPRINTF("%s\n", fakeif->iff_name);
878 kfree_type(fake_llink, FETH_MAX_LLINKS, fakeif->iff_llink);
879 kfree_type(struct if_fake, fakeif);
880}
881
882static void
883feth_release(if_fake_ref fakeif)
884{
885 u_int32_t old_retain_count;
886
887 old_retain_count = OSDecrementAtomic(&fakeif->iff_retain_count);
888 switch (old_retain_count) {
889 case 0:
890 VERIFY(old_retain_count != 0);
891 break;
892 case 1:
893 feth_free(fakeif);
894 break;
895 default:
896 break;
897 }
898 return;
899}
900
901#if SKYWALK
902
903static void
904feth_retain(if_fake_ref fakeif)
905{
906 OSIncrementAtomic(&fakeif->iff_retain_count);
907}
908
909static void
910feth_packet_pool_init_prepare(if_fake_ref fakeif,
911 struct kern_pbufpool_init *pp_init)
912{
913 uint32_t max_mtu = fakeif->iff_max_mtu;
914 uint32_t buflet_size = if_fake_buflet_size;
915
916 bzero(s: pp_init, n: sizeof(*pp_init));
917 pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
918 pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
919 pp_init->kbi_packets = 1024; /* TBD configurable */
920 if (feth_supports_tso(fakeif)) {
921 buflet_size = if_fake_tso_buffer_size;
922 }
923 if (feth_using_multibuflets(fakeif)) {
924 pp_init->kbi_bufsize = buflet_size;
925 pp_init->kbi_max_frags = howmany(max_mtu, buflet_size);
926 pp_init->kbi_buflets = pp_init->kbi_packets *
927 pp_init->kbi_max_frags;
928 pp_init->kbi_flags |= KBIF_BUFFER_ON_DEMAND;
929 } else {
930 pp_init->kbi_bufsize = max(a: max_mtu, b: buflet_size);
931 pp_init->kbi_max_frags = 1;
932 pp_init->kbi_buflets = pp_init->kbi_packets;
933 }
934 pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
935 if (if_fake_user_access != 0) {
936 pp_init->kbi_flags |= KBIF_USER_ACCESS;
937 }
938 pp_init->kbi_ctx = NULL;
939 pp_init->kbi_ctx_retain = NULL;
940 pp_init->kbi_ctx_release = NULL;
941}
942
943static errno_t
944feth_packet_pool_make(if_fake_ref fakeif)
945{
946 struct kern_pbufpool_init pp_init;
947 errno_t err;
948
949 feth_packet_pool_init_prepare(fakeif, pp_init: &pp_init);
950
951 switch (fakeif->iff_pp_mode) {
952 case IFF_PP_MODE_GLOBAL:
953 feth_lock();
954 if (S_pp == NULL) {
955 (void)snprintf((char *)pp_init.kbi_name,
956 count: sizeof(pp_init.kbi_name), "%s", "feth shared pp");
957 err = kern_pbufpool_create(&pp_init, &S_pp, NULL);
958 }
959 pp_retain(S_pp);
960 feth_unlock();
961 fakeif->iff_rx_pp = S_pp;
962 pp_retain(S_pp);
963 fakeif->iff_tx_pp = S_pp;
964 break;
965 case IFF_PP_MODE_PRIVATE:
966 (void)snprintf((char *)pp_init.kbi_name,
967 count: sizeof(pp_init.kbi_name), "%s pp", fakeif->iff_name);
968 err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
969 pp_retain(fakeif->iff_rx_pp);
970 fakeif->iff_tx_pp = fakeif->iff_rx_pp;
971 break;
972 case IFF_PP_MODE_PRIVATE_SPLIT:
973 (void)snprintf((char *)pp_init.kbi_name,
974 count: sizeof(pp_init.kbi_name), "%s rx pp", fakeif->iff_name);
975 pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
976 KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
977 pp_init.kbi_flags |= (KBIF_IODIR_IN | KBIF_BUFFER_ON_DEMAND);
978 pp_init.kbi_packets = 1024;
979 pp_init.kbi_bufsize = if_fake_link_layer_aggregation_factor * 1024;
980 err = kern_pbufpool_create(&pp_init, &fakeif->iff_rx_pp, NULL);
981 if (err != 0) {
982 printf("%s: rx pp create failed %d\n", __func__, err);
983 return err;
984 }
985 pp_init.kbi_flags &= ~(KBIF_IODIR_IN | KBIF_IODIR_OUT |
986 KBIF_BUFFER_ON_DEMAND | KBIF_KERNEL_READONLY);
987 pp_init.kbi_flags |= KBIF_IODIR_OUT;
988 pp_init.kbi_packets = 1024; /* TBD configurable */
989 pp_init.kbi_bufsize = fakeif->iff_max_mtu;
990 (void)snprintf((char *)pp_init.kbi_name,
991 count: sizeof(pp_init.kbi_name), "%s tx pp", fakeif->iff_name);
992 err = kern_pbufpool_create(&pp_init, &fakeif->iff_tx_pp, NULL);
993 if (err != 0) {
994 printf("%s: tx pp create failed %d\n", __func__, err);
995 pp_release(fakeif->iff_rx_pp);
996 return err;
997 }
998 break;
999 default:
1000 VERIFY(0);
1001 __builtin_unreachable();
1002 }
1003
1004 return 0;
1005}
1006
1007static void
1008feth_packet_set_trace_tag(kern_packet_t ph, int flag)
1009{
1010 if (if_fake_trace_tag_flags & flag) {
1011 if (++if_fake_trace_tag_current == 0) {
1012 if_fake_trace_tag_current = 1;
1013 }
1014 kern_packet_set_trace_tag(ph, tag: if_fake_trace_tag_current);
1015 }
1016}
1017
1018static errno_t
1019feth_clone_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1020{
1021 errno_t err = 0;
1022 kern_pbufpool_t pp = dif->iff_rx_pp;
1023 kern_packet_t dph = 0, dph0 = 0;
1024 kern_buflet_t sbuf, dbuf0 = NULL, dbuf;
1025 void *saddr, *daddr;
1026 uint32_t soff, doff;
1027 uint32_t slen, dlen;
1028 uint32_t dlim0, dlim;
1029
1030 sbuf = kern_packet_get_next_buflet(sph, NULL);
1031 saddr = kern_buflet_get_data_address(sbuf);
1032 doff = soff = kern_buflet_get_data_offset(sbuf);
1033 dlen = slen = kern_buflet_get_data_length(sbuf);
1034
1035 /* packet clone is only supported for single-buflet */
1036 ASSERT(kern_packet_get_buflet_count(sph) == 1);
1037 ASSERT(soff == kern_packet_get_headroom(sph));
1038 ASSERT(slen == kern_packet_get_data_length(sph));
1039
1040 dph0 = *pdph;
1041 if (dph0 == 0) {
1042 dlim0 = 0;
1043 } else {
1044 dbuf0 = kern_packet_get_next_buflet(dph0, NULL);
1045 ASSERT(kern_buflet_get_object_limit(dbuf0) ==
1046 PP_BUF_OBJ_SIZE_DEF(pp));
1047 ASSERT(kern_buflet_get_data_limit(dbuf0) % 16 == 0);
1048 dlim0 = ((uintptr_t)kern_buflet_get_object_address(dbuf0) +
1049 kern_buflet_get_object_limit(dbuf0)) -
1050 ((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1051 kern_buflet_get_data_limit(dbuf0));
1052 }
1053
1054 if (doff + dlen > dlim0) {
1055 err = kern_pbufpool_alloc_nosleep(pbufpool: pp, bufcnt: 1, packet: &dph);
1056 if (err != 0) {
1057 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1058 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1059 return err;
1060 }
1061 dbuf = kern_packet_get_next_buflet(dph, NULL);
1062 ASSERT(kern_buflet_get_data_address(dbuf) ==
1063 kern_buflet_get_object_address(dbuf));
1064 daddr = kern_buflet_get_data_address(dbuf);
1065 dlim = kern_buflet_get_object_limit(dbuf);
1066 ASSERT(dlim == PP_BUF_OBJ_SIZE_DEF(pp));
1067 } else {
1068 err = kern_packet_clone_nosleep(dph0, &dph, KPKT_COPY_LIGHT);
1069 if (err != 0) {
1070 printf("%s: packet clone err %d\n", __func__, err);
1071 return err;
1072 }
1073 dbuf = kern_packet_get_next_buflet(dph, NULL);
1074 ASSERT(kern_buflet_get_object_address(dbuf) ==
1075 kern_buflet_get_object_address(dbuf0));
1076 daddr = (void *)((uintptr_t)kern_buflet_get_data_address(dbuf0) +
1077 kern_buflet_get_data_limit(dbuf0));
1078 dlim = dlim0;
1079 }
1080
1081 ASSERT(doff + dlen <= dlim);
1082
1083 ASSERT((uintptr_t)daddr % 16 == 0);
1084
1085 bcopy(src: (const void *)((uintptr_t)saddr + soff),
1086 dst: (void *)((uintptr_t)daddr + doff), n: slen);
1087
1088 dlim = MIN(dlim, P2ROUNDUP(doff + dlen, 16));
1089 err = kern_buflet_set_data_address(dbuf, daddr);
1090 VERIFY(err == 0);
1091 err = kern_buflet_set_data_limit(dbuf, dlim);
1092 VERIFY(err == 0);
1093 err = kern_buflet_set_data_length(dbuf, dlen);
1094 VERIFY(err == 0);
1095 err = kern_buflet_set_data_offset(dbuf, doff);
1096 VERIFY(err == 0);
1097 err = kern_packet_set_headroom(dph, doff);
1098 VERIFY(err == 0);
1099 err = kern_packet_set_link_header_length(dph,
1100 kern_packet_get_link_header_length(sph));
1101 VERIFY(err == 0);
1102 err = kern_packet_set_service_class(dph,
1103 kern_packet_get_service_class(sph));
1104 VERIFY(err == 0);
1105 err = kern_packet_finalize(dph);
1106 VERIFY(err == 0);
1107 *pdph = dph;
1108
1109 return err;
1110}
1111
1112static inline void
1113feth_copy_buflet(kern_buflet_t sbuf, kern_buflet_t dbuf)
1114{
1115 errno_t err;
1116 uint32_t off, len;
1117 uint8_t *saddr, *daddr;
1118
1119 saddr = kern_buflet_get_data_address(sbuf);
1120 off = kern_buflet_get_data_offset(sbuf);
1121 len = kern_buflet_get_data_length(sbuf);
1122 daddr = kern_buflet_get_data_address(dbuf);
1123 bcopy(src: (saddr + off), dst: (daddr + off), n: len);
1124 err = kern_buflet_set_data_offset(dbuf, off);
1125 VERIFY(err == 0);
1126 err = kern_buflet_set_data_length(dbuf, len);
1127 VERIFY(err == 0);
1128}
1129
1130static int
1131feth_add_packet_trailer(kern_packet_t ph, void *trailer, size_t trailer_len)
1132{
1133 errno_t err = 0;
1134
1135 ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
1136
1137 kern_buflet_t buf = NULL, iter = NULL;
1138 while ((iter = kern_packet_get_next_buflet(ph, iter)) != NULL) {
1139 buf = iter;
1140 }
1141 ASSERT(buf != NULL);
1142
1143 uint32_t dlim = kern_buflet_get_data_limit(buf);
1144 uint32_t doff = kern_buflet_get_data_offset(buf);
1145 uint32_t dlen = kern_buflet_get_data_length(buf);
1146
1147 size_t trailer_room = dlim - doff - dlen;
1148
1149 if (trailer_room < trailer_len) {
1150 printf("not enough room");
1151 return ERANGE;
1152 }
1153
1154 void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff + dlen);
1155 memcpy(dst: data, src: trailer, n: trailer_len);
1156
1157 err = kern_buflet_set_data_length(buf, dlen + trailer_len);
1158 VERIFY(err == 0);
1159
1160 err = kern_packet_finalize(ph);
1161 VERIFY(err == 0);
1162
1163 FETH_DPRINTF("%s %zuB trailer added\n", __func__, trailer_len);
1164
1165 return 0;
1166}
1167
1168static int
1169feth_add_packet_fcs(kern_packet_t ph)
1170{
1171 uint32_t crc = 0;
1172 int err;
1173
1174 ASSERT(sizeof(crc) == ETHER_CRC_LEN);
1175
1176 kern_buflet_t buf = NULL;
1177 while ((buf = kern_packet_get_next_buflet(ph, buf)) != NULL) {
1178 uint32_t doff = kern_buflet_get_data_offset(buf);
1179 uint32_t dlen = kern_buflet_get_data_length(buf);
1180 void *data = (void *)((uintptr_t)kern_buflet_get_data_address(buf) + doff);
1181 crc = crc32(crc, bufp: data, len: dlen);
1182 }
1183
1184 err = feth_add_packet_trailer(ph, trailer: &crc, ETHER_CRC_LEN);
1185 if (!err) {
1186 return err;
1187 }
1188
1189 err = kern_packet_set_link_ethfcs(ph);
1190 VERIFY(err == 0);
1191
1192 return 0;
1193}
1194
1195static errno_t
1196feth_copy_packet(if_fake_ref dif, kern_packet_t sph, kern_packet_t *pdph)
1197{
1198 errno_t err = 0;
1199 uint16_t i, bufcnt;
1200 mach_vm_address_t baddr;
1201 kern_buflet_t sbuf = NULL, dbuf = NULL;
1202 kern_pbufpool_t pp = dif->iff_rx_pp;
1203 kern_packet_t dph;
1204 boolean_t multi_buflet = feth_using_multibuflets(fakeif: dif);
1205
1206 bufcnt = kern_packet_get_buflet_count(sph);
1207 ASSERT((bufcnt == 1) || multi_buflet);
1208 *pdph = 0;
1209
1210 err = kern_pbufpool_alloc_nosleep(pbufpool: pp, bufcnt: 1, packet: &dph);
1211 if (err != 0) {
1212 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1213 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_PKT);
1214 return err;
1215 }
1216
1217 /* pre-constructed single buflet packet copy */
1218 sbuf = kern_packet_get_next_buflet(sph, NULL);
1219 dbuf = kern_packet_get_next_buflet(dph, NULL);
1220 feth_copy_buflet(sbuf, dbuf);
1221
1222 if (!multi_buflet) {
1223 goto done;
1224 }
1225
1226 /* un-constructed multi-buflet packet copy */
1227 for (i = 1; i < bufcnt; i++) {
1228 kern_buflet_t dbuf_next = NULL;
1229
1230 sbuf = kern_packet_get_next_buflet(sph, sbuf);
1231 VERIFY(sbuf != NULL);
1232 err = kern_pbufpool_alloc_buflet_nosleep(pp, &dbuf_next);
1233 if (err != 0) {
1234 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP);
1235 STATS_INC(dif->iff_nifs, NETIF_STATS_DROP_NOMEM_BUF);
1236 break;
1237 }
1238 ASSERT(dbuf_next != NULL);
1239 feth_copy_buflet(sbuf, dbuf: dbuf_next);
1240 err = kern_packet_add_buflet(ph: dph, bprev: dbuf, bnew: dbuf_next);
1241 VERIFY(err == 0);
1242 dbuf = dbuf_next;
1243 }
1244 if (__improbable(err != 0)) {
1245 dbuf = NULL;
1246 while (i-- != 0) {
1247 dbuf = kern_packet_get_next_buflet(dph, dbuf);
1248 VERIFY(dbuf != NULL);
1249 baddr = (mach_vm_address_t)
1250 kern_buflet_get_data_address(dbuf);
1251 VERIFY(baddr != 0);
1252 }
1253 kern_pbufpool_free(pbufpool: pp, dph);
1254 dph = 0;
1255 }
1256
1257done:
1258 if (__probable(err == 0)) {
1259 err = kern_packet_set_headroom(dph,
1260 kern_packet_get_headroom(sph));
1261 VERIFY(err == 0);
1262 err = kern_packet_set_link_header_length(dph,
1263 kern_packet_get_link_header_length(sph));
1264 VERIFY(err == 0);
1265 err = kern_packet_set_service_class(dph,
1266 kern_packet_get_service_class(sph));
1267 VERIFY(err == 0);
1268 err = kern_packet_finalize(dph);
1269 VERIFY(err == 0);
1270 VERIFY(bufcnt == kern_packet_get_buflet_count(dph));
1271 *pdph = dph;
1272 }
1273 return err;
1274}
1275
1276static inline void
1277feth_update_pkt_tso_metadata_for_rx(kern_packet_t ph)
1278{
1279 /*
1280 * Nothing to do if not a TSO offloaded packet.
1281 */
1282 uint16_t seg_sz = 0;
1283 seg_sz = kern_packet_get_protocol_segment_size(ph);
1284 if (seg_sz == 0) {
1285 return;
1286 }
1287 /*
1288 * For RX, make the packet appear as a fully validated LRO packet.
1289 */
1290 packet_csum_flags_t csum_flags = PACKET_CSUM_IP_CHECKED |
1291 PACKET_CSUM_IP_VALID | PACKET_CSUM_DATA_VALID |
1292 PACKET_CSUM_PSEUDO_HDR;
1293 (void) kern_packet_set_inet_checksum(ph, csum_flags, 0, 0xFFFF, FALSE);
1294 return;
1295}
1296
1297static void
1298feth_rx_submit(if_fake_ref sif, if_fake_ref dif, kern_packet_t sphs[],
1299 uint32_t n_pkts)
1300{
1301 errno_t err = 0;
1302 struct kern_channel_ring_stat_increment stats;
1303 kern_channel_ring_t rx_ring = NULL;
1304 kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
1305 kern_packet_t sph = 0, dph = 0;
1306
1307 memset(s: &stats, c: 0, n: sizeof(stats));
1308
1309 rx_ring = dif->iff_rx_ring[0];
1310 if (rx_ring == NULL) {
1311 return;
1312 }
1313
1314 kr_enter(rx_ring, TRUE);
1315 kern_channel_reclaim(rx_ring);
1316 rx_slot = kern_channel_get_next_slot(kring: rx_ring, NULL, NULL);
1317
1318 for (uint32_t i = 0; i < n_pkts && rx_slot != NULL; i++) {
1319 sph = sphs[i];
1320
1321 switch (dif->iff_pp_mode) {
1322 case IFF_PP_MODE_GLOBAL:
1323 sphs[i] = 0;
1324 dph = sph;
1325 feth_update_pkt_tso_metadata_for_rx(ph: dph);
1326 err = kern_packet_finalize(dph);
1327 VERIFY(err == 0);
1328 break;
1329 case IFF_PP_MODE_PRIVATE:
1330 err = feth_copy_packet(dif, sph, pdph: &dph);
1331 break;
1332 case IFF_PP_MODE_PRIVATE_SPLIT:
1333 err = feth_clone_packet(dif, sph, pdph: &dph);
1334 break;
1335 default:
1336 VERIFY(0);
1337 __builtin_unreachable();
1338 }
1339 if (__improbable(err != 0)) {
1340 continue;
1341 }
1342
1343 if (sif->iff_trailer_length != 0) {
1344 feth_add_packet_trailer(ph: dph, trailer: feth_trailer,
1345 trailer_len: sif->iff_trailer_length);
1346 }
1347 if (sif->iff_fcs != 0) {
1348 feth_add_packet_fcs(ph: dph);
1349 }
1350 feth_packet_set_trace_tag(ph: dph, IFF_TT_INPUT);
1351 bpf_tap_packet_in(interface: dif->iff_ifp, DLT_EN10MB, packet: dph, NULL, header_len: 0);
1352 stats.kcrsi_slots_transferred++;
1353 stats.kcrsi_bytes_transferred
1354 += kern_packet_get_data_length(dph);
1355
1356 /* attach the packet to the RX ring */
1357 err = kern_channel_slot_attach_packet(ring: rx_ring, slot: rx_slot, packet: dph);
1358 VERIFY(err == 0);
1359 last_rx_slot = rx_slot;
1360 rx_slot = kern_channel_get_next_slot(kring: rx_ring, slot: rx_slot, NULL);
1361 }
1362
1363 if (last_rx_slot != NULL) {
1364 kern_channel_advance_slot(kring: rx_ring, slot: last_rx_slot);
1365 kern_channel_increment_ring_net_stats(ring: rx_ring, dif->iff_ifp,
1366 stats: &stats);
1367 }
1368
1369 if (rx_ring != NULL) {
1370 kr_exit(rx_ring);
1371 kern_channel_notify(rx_ring, flags: 0);
1372 }
1373}
1374
1375static void
1376feth_rx_queue_submit(if_fake_ref sif, if_fake_ref dif, uint32_t llink_idx,
1377 uint32_t qset_idx, kern_packet_t sphs[], uint32_t n_pkts)
1378{
1379 errno_t err = 0;
1380 kern_netif_queue_t queue;
1381 kern_packet_t sph = 0, dph = 0;
1382 fake_llink *llink;
1383 fake_qset *qset;
1384
1385 if (llink_idx >= dif->iff_llink_cnt) {
1386 printf("%s: invalid llink_idx idx %d (max %d) on peer %s\n",
1387 __func__, llink_idx, dif->iff_llink_cnt, dif->iff_name);
1388 return;
1389 }
1390 llink = &dif->iff_llink[llink_idx];
1391 if (qset_idx >= llink->fl_qset_cnt) {
1392 printf("%s: invalid qset_idx %d (max %d) on peer %s\n",
1393 __func__, qset_idx, llink->fl_qset_cnt, dif->iff_name);
1394 return;
1395 }
1396 qset = &dif->iff_llink[llink_idx].fl_qset[qset_idx];
1397 queue = qset->fqs_rx_queue[0].fq_queue;
1398 if (queue == NULL) {
1399 printf("%s: NULL default queue (llink_idx %d, qset_idx %d) "
1400 "on peer %s\n", __func__, llink_idx, qset_idx,
1401 dif->iff_name);
1402 return;
1403 }
1404 for (uint32_t i = 0; i < n_pkts; i++) {
1405 uint32_t flags;
1406
1407 sph = sphs[i];
1408
1409 switch (dif->iff_pp_mode) {
1410 case IFF_PP_MODE_GLOBAL:
1411 sphs[i] = 0;
1412 dph = sph;
1413 feth_update_pkt_tso_metadata_for_rx(ph: dph);
1414 break;
1415 case IFF_PP_MODE_PRIVATE:
1416 err = feth_copy_packet(dif, sph, pdph: &dph);
1417 break;
1418 case IFF_PP_MODE_PRIVATE_SPLIT:
1419 err = feth_clone_packet(dif, sph, pdph: &dph);
1420 break;
1421 default:
1422 VERIFY(0);
1423 __builtin_unreachable();
1424 }
1425 if (__improbable(err != 0)) {
1426 continue;
1427 }
1428
1429 if (sif->iff_trailer_length != 0) {
1430 feth_add_packet_trailer(ph: dph, trailer: feth_trailer,
1431 trailer_len: sif->iff_trailer_length);
1432 }
1433 if (sif->iff_fcs != 0) {
1434 feth_add_packet_fcs(ph: dph);
1435 }
1436 feth_packet_set_trace_tag(ph: dph, IFF_TT_INPUT);
1437 bpf_tap_packet_in(interface: dif->iff_ifp, DLT_EN10MB, packet: dph, NULL, header_len: 0);
1438
1439 flags = (i == n_pkts - 1) ?
1440 KERN_NETIF_QUEUE_RX_ENQUEUE_FLAG_FLUSH : 0;
1441 kern_netif_queue_rx_enqueue(queue, dph, 1, flags);
1442 }
1443}
1444
1445static void
1446feth_tx_complete(if_fake_ref fakeif, kern_packet_t phs[], uint32_t nphs)
1447{
1448 for (uint32_t i = 0; i < nphs; i++) {
1449 kern_packet_t ph = phs[i];
1450 if (ph == 0) {
1451 continue;
1452 }
1453 int err = kern_packet_set_tx_completion_status(ph, 0);
1454 VERIFY(err == 0);
1455 kern_packet_tx_completion(ph, fakeif->iff_ifp);
1456 kern_pbufpool_free(pbufpool: fakeif->iff_tx_pp, phs[i]);
1457 phs[i] = 0;
1458 }
1459}
1460
1461#define NSEC_PER_USEC 1000ull
1462/*
1463 * Calculate the time delta that passed from `since' to `until'.
1464 * If `until' happens before `since', returns negative value.
1465 */
1466static bool
1467feth_packet_has_expired(if_fake_ref __unused fakeif, kern_packet_t ph,
1468 uint64_t *out_deadline)
1469{
1470 uint64_t now;
1471 uint64_t packet_expire_time_mach;
1472 int64_t time_until_expiration;
1473 errno_t err;
1474 bool expired = false;
1475
1476 static mach_timebase_info_data_t clock_timebase = {0, 0};
1477
1478 if (clock_timebase.denom == 0) {
1479 clock_timebase_info(info: &clock_timebase);
1480 VERIFY(clock_timebase.denom != 0);
1481 }
1482
1483 err = kern_packet_get_expire_time(ph, &packet_expire_time_mach);
1484 if (err) {
1485 goto out;
1486 }
1487
1488 now = mach_absolute_time();
1489 time_until_expiration = packet_expire_time_mach - now;
1490 if (time_until_expiration < 0) {
1491 /* The packet had expired */
1492 expired = true;
1493 goto out;
1494 }
1495
1496 /* Convert the time_delta from mach ticks to nanoseconds */
1497 time_until_expiration *= clock_timebase.numer;
1498 time_until_expiration /= clock_timebase.denom;
1499 /* convert from nanoseconds to microseconds */
1500 time_until_expiration /= 1000ull;
1501
1502 if (if_fake_expiration_threshold_us < time_until_expiration) {
1503 /* packet has some life ahead of it */
1504 FETH_DPRINTF("Packet has %llu usec until expiration", time_until_expiration);
1505 goto out;
1506 }
1507
1508out:
1509 if (expired && out_deadline) {
1510 *out_deadline = packet_expire_time_mach;
1511 }
1512
1513 return expired;
1514}
1515
1516static errno_t
1517feth_get_packet_notification_details(if_fake_ref fakeif, kern_packet_t ph,
1518 packet_id_t *pkt_id, uint32_t *nx_port_id)
1519{
1520 errno_t err = 0;
1521
1522 err = kern_packet_get_packetid(ph, pkt_id);
1523 if (err != 0) {
1524 FETH_DPRINTF("%s err=%d getting packetid", fakeif->iff_name, err);
1525 return err;
1526 }
1527
1528 err = kern_packet_get_tx_nexus_port_id(ph, nx_port_id);
1529 if (err != 0) {
1530 FETH_DPRINTF("%s err=%d getting nx_port_id", fakeif->iff_name, err);
1531 return err;
1532 }
1533
1534 return 0;
1535}
1536
1537static packet_expiry_action_t
1538feth_get_effective_expn_action(if_fake_ref fakeif, kern_packet_t ph)
1539{
1540 errno_t err;
1541 packet_expiry_action_t expiry_action;
1542
1543 switch (fakeif->iff_tx_exp_policy) {
1544 case IFF_TX_EXP_POLICY_DISABLED:
1545 expiry_action = PACKET_EXPIRY_ACTION_NONE;
1546 break;
1547 case IFF_TX_EXP_POLICY_NOTIFY_ONLY:
1548 expiry_action = PACKET_EXPIRY_ACTION_NOTIFY;
1549 break;
1550 case IFF_TX_EXP_POLICY_DROP_AND_NOTIFY:
1551 expiry_action = PACKET_EXPIRY_ACTION_DROP;
1552 break;
1553 case IFF_TX_EXP_POLICY_METADATA:
1554 err = kern_packet_get_expiry_action(ph, &expiry_action);
1555 if (err != 0) {
1556 if (err != ENOENT) {
1557 FETH_DPRINTF("Error %d when getting expiry action", err);
1558 }
1559 expiry_action = PACKET_EXPIRY_ACTION_NONE;
1560 }
1561 break;
1562 default:
1563 FETH_DPRINTF("Unrecognized value %d for \"net.link.fake.tx_exp_policy\"",
1564 fakeif->iff_tx_exp_policy);
1565 expiry_action = PACKET_EXPIRY_ACTION_NONE;
1566 }
1567
1568 return expiry_action;
1569}
1570
1571/* returns true if the packet is selected for epxiration and should be dropped */
1572static bool
1573feth_tx_expired_error(if_fake_ref fakeif, kern_packet_t ph)
1574{
1575 int err = 0;
1576 uint32_t nx_port_id = 0;
1577 os_channel_event_packet_transmit_expired_t expn = {0};
1578 packet_expiry_action_t expiry_action = PACKET_EXPIRY_ACTION_NONE;
1579
1580 FETH_DPRINTF("%s\n", fakeif->iff_name);
1581
1582 if (feth_packet_has_expired(fakeif, ph, out_deadline: &expn.packet_tx_expiration_deadline)) {
1583 expiry_action = feth_get_effective_expn_action(fakeif, ph);
1584 }
1585
1586 bool drop_packet = (expiry_action == PACKET_EXPIRY_ACTION_DROP);
1587 if (expiry_action != PACKET_EXPIRY_ACTION_NONE) {
1588 /* set the expiration status code */
1589 expn.packet_tx_expiration_status = drop_packet ?
1590 CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_DROPPED :
1591 CHANNEL_EVENT_PKT_TRANSMIT_EXPIRED_ERR_EXPIRED_NOT_DROPPED;
1592
1593 /* Mark the expiration timestamp */
1594 expn.packet_tx_expiration_timestamp = mach_absolute_time();
1595
1596 err = feth_get_packet_notification_details(fakeif, ph,
1597 pkt_id: &expn.packet_id, nx_port_id: &nx_port_id);
1598
1599 if (err == 0) {
1600 err = kern_channel_event_transmit_expired(
1601 fakeif->iff_ifp, &expn, nx_port_id);
1602 FETH_DPRINTF("%s sent epxiry notification on nexus port %u notif code %u\n",
1603 fakeif->iff_name, nx_port_id, expn.packet_tx_expiration_status);
1604 }
1605 if (err != 0) {
1606 FETH_DPRINTF("%s err=%d, nx_port_id: 0x%x\n",
1607 fakeif->iff_name, err, nx_port_id);
1608 }
1609 }
1610
1611 return drop_packet;
1612}
1613
1614/* returns true if the packet is selected for TX error & dropped */
1615static bool
1616feth_tx_complete_error(if_fake_ref fakeif, kern_packet_t ph)
1617{
1618 int err;
1619
1620 if (fakeif->iff_tx_drop_rate == 0 ||
1621 fakeif->iff_tx_pkts_count != fakeif->iff_tx_drop_rate) {
1622 return false;
1623 }
1624 /* simulate TX completion error on the packet */
1625 if (fakeif->iff_tx_completion_mode == IFF_TX_COMPL_MODE_SYNC) {
1626 err = kern_packet_set_tx_completion_status(ph,
1627 CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED);
1628 VERIFY(err == 0);
1629 kern_packet_tx_completion(ph, fakeif->iff_ifp);
1630 } else {
1631 uint32_t nx_port_id = 0;
1632 os_channel_event_packet_transmit_status_t pkt_tx_status = {0};
1633
1634 pkt_tx_status.packet_status =
1635 CHANNEL_EVENT_PKT_TRANSMIT_STATUS_ERR_RETRY_FAILED;
1636 err = feth_get_packet_notification_details(fakeif, ph,
1637 pkt_id: &pkt_tx_status.packet_id, nx_port_id: &nx_port_id);
1638 if (err == 0) {
1639 err = kern_channel_event_transmit_status(
1640 fakeif->iff_ifp, &pkt_tx_status, nx_port_id);
1641 }
1642 if (err != 0) {
1643 FETH_DPRINTF("%s err=%d, nx_port_id: 0x%x\n",
1644 fakeif->iff_name, err, nx_port_id);
1645 }
1646 }
1647
1648 return true;
1649}
1650
1651static void
1652feth_if_adv(thread_call_param_t arg0, thread_call_param_t arg1)
1653{
1654#pragma unused(arg1)
1655 errno_t error;
1656 if_fake_ref fakeif = (if_fake_ref)arg0;
1657 struct ifnet_interface_advisory if_adv;
1658 struct ifnet_stats_param if_stat;
1659
1660 feth_lock();
1661 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
1662 feth_unlock();
1663 return;
1664 }
1665 feth_unlock();
1666
1667 if (!fakeif->iff_intf_adv_enabled) {
1668 goto done;
1669 }
1670
1671 error = ifnet_stat(interface: fakeif->iff_ifp, out_stats: &if_stat);
1672 if (error != 0) {
1673 FETH_DPRINTF("%s: ifnet_stat() failed %d\n",
1674 fakeif->iff_name, error);
1675 goto done;
1676 }
1677 if_adv.header.version = IF_INTERFACE_ADVISORY_VERSION_CURRENT;
1678 if_adv.header.direction = IF_INTERFACE_ADVISORY_DIRECTION_TX;
1679 if_adv.header.interface_type =
1680 IF_INTERFACE_ADVISORY_INTERFACE_TYPE_WIFI;
1681 if_adv.capacity.timestamp = mach_absolute_time();
1682 if_adv.capacity.rate_trend_suggestion =
1683 IF_INTERFACE_ADVISORY_RATE_SUGGESTION_RAMP_NEUTRAL;
1684 if_adv.capacity.max_bandwidth = 1000 * 1000 * 1000; /* 1Gbps */
1685 if_adv.capacity.total_byte_count = if_stat.packets_out;
1686 if_adv.capacity.average_throughput = 1000 * 1000 * 1000; /* 1Gbps */
1687 if_adv.capacity.flushable_queue_size = UINT32_MAX;
1688 if_adv.capacity.non_flushable_queue_size = UINT32_MAX;
1689 if_adv.capacity.average_delay = 1; /* ms */
1690
1691 error = fakeif->iff_intf_adv_notify(fakeif->iff_intf_adv_kern_ctx,
1692 &if_adv);
1693 if (error != 0) {
1694 FETH_DPRINTF("%s: interface advisory report failed %d\n",
1695 fakeif->iff_name, error);
1696 }
1697
1698done:
1699 feth_lock();
1700 if (!feth_is_detaching(fakeif) && fakeif->iff_channel_connected) {
1701 uint64_t deadline;
1702 clock_interval_to_deadline(interval: fakeif->iff_adv_interval,
1703 NSEC_PER_MSEC, result: &deadline);
1704 thread_call_enter_delayed(call: fakeif->iff_if_adv_tcall, deadline);
1705 }
1706 feth_unlock();
1707}
1708
1709static int
1710feth_if_adv_tcall_create(if_fake_ref fakeif)
1711{
1712 uint64_t deadline;
1713
1714 feth_lock();
1715 ASSERT(fakeif->iff_if_adv_tcall == NULL);
1716 ASSERT(fakeif->iff_adv_interval > 0);
1717 ASSERT(fakeif->iff_channel_connected);
1718 fakeif->iff_if_adv_tcall =
1719 thread_call_allocate_with_options(func: feth_if_adv,
1720 param0: (thread_call_param_t)fakeif, pri: THREAD_CALL_PRIORITY_KERNEL,
1721 options: THREAD_CALL_OPTIONS_ONCE);
1722 if (fakeif->iff_if_adv_tcall == NULL) {
1723 printf("%s: %s if_adv tcall alloc failed\n", __func__,
1724 fakeif->iff_name);
1725 return ENXIO;
1726 }
1727 /* retain for the interface advisory thread call */
1728 feth_retain(fakeif);
1729 clock_interval_to_deadline(interval: fakeif->iff_adv_interval,
1730 NSEC_PER_MSEC, result: &deadline);
1731 thread_call_enter_delayed(call: fakeif->iff_if_adv_tcall, deadline);
1732 feth_unlock();
1733 return 0;
1734}
1735
1736static void
1737feth_if_adv_tcall_destroy(if_fake_ref fakeif)
1738{
1739 thread_call_t tcall;
1740
1741 feth_lock();
1742 ASSERT(fakeif->iff_if_adv_tcall != NULL);
1743 tcall = fakeif->iff_if_adv_tcall;
1744 feth_unlock();
1745 (void) thread_call_cancel_wait(call: tcall);
1746 if (!thread_call_free(call: tcall)) {
1747 boolean_t freed;
1748 (void) thread_call_cancel_wait(call: tcall);
1749 freed = thread_call_free(call: tcall);
1750 VERIFY(freed);
1751 }
1752 feth_lock();
1753 fakeif->iff_if_adv_tcall = NULL;
1754 feth_unlock();
1755 /* release for the interface advisory thread call */
1756 feth_release(fakeif);
1757}
1758
1759
1760/**
1761** nexus netif domain provider
1762**/
1763static errno_t
1764feth_nxdp_init(kern_nexus_domain_provider_t domprov)
1765{
1766#pragma unused(domprov)
1767 return 0;
1768}
1769
1770static void
1771feth_nxdp_fini(kern_nexus_domain_provider_t domprov)
1772{
1773#pragma unused(domprov)
1774}
1775
1776static uuid_t feth_nx_dom_prov;
1777
1778static errno_t
1779feth_register_nexus_domain_provider(void)
1780{
1781 const struct kern_nexus_domain_provider_init dp_init = {
1782 .nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
1783 .nxdpi_flags = 0,
1784 .nxdpi_init = feth_nxdp_init,
1785 .nxdpi_fini = feth_nxdp_fini
1786 };
1787 errno_t err = 0;
1788
1789 /* feth_nxdp_init() is called before this function returns */
1790 err = kern_nexus_register_domain_provider(type: NEXUS_TYPE_NET_IF,
1791 name: (const uint8_t *)
1792 "com.apple.feth",
1793 init: &dp_init, init_len: sizeof(dp_init),
1794 dom_prov_uuid: &feth_nx_dom_prov);
1795 if (err != 0) {
1796 printf("%s: failed to register domain provider\n", __func__);
1797 return err;
1798 }
1799 return 0;
1800}
1801
1802/**
1803** netif nexus routines
1804**/
1805static if_fake_ref
1806feth_nexus_context(kern_nexus_t nexus)
1807{
1808 if_fake_ref fakeif;
1809
1810 fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
1811 assert(fakeif != NULL);
1812 return fakeif;
1813}
1814
1815static uint8_t
1816feth_find_tx_ring_by_svc(kern_packet_svc_class_t svc_class)
1817{
1818 switch (svc_class) {
1819 case KPKT_SC_VO:
1820 return 0;
1821 case KPKT_SC_VI:
1822 return 1;
1823 case KPKT_SC_BE:
1824 return 2;
1825 case KPKT_SC_BK:
1826 return 3;
1827 default:
1828 VERIFY(0);
1829 return 0;
1830 }
1831}
1832
1833static errno_t
1834feth_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1835 kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
1836 void **ring_ctx)
1837{
1838 if_fake_ref fakeif;
1839 int err;
1840#pragma unused(nxprov, channel, ring_ctx)
1841 feth_lock();
1842 fakeif = feth_nexus_context(nexus);
1843 if (feth_is_detaching(fakeif)) {
1844 feth_unlock();
1845 return 0;
1846 }
1847 if (is_tx_ring) {
1848 if (feth_in_wmm_mode(fakeif)) {
1849 kern_packet_svc_class_t svc_class;
1850 uint8_t ring_idx;
1851
1852 err = kern_channel_get_service_class(ring, svc: &svc_class);
1853 VERIFY(err == 0);
1854 ring_idx = feth_find_tx_ring_by_svc(svc_class);
1855 VERIFY(ring_idx < IFF_NUM_TX_RINGS_WMM_MODE);
1856 VERIFY(fakeif->iff_tx_ring[ring_idx] == NULL);
1857 fakeif->iff_tx_ring[ring_idx] = ring;
1858 } else {
1859 VERIFY(fakeif->iff_tx_ring[0] == NULL);
1860 fakeif->iff_tx_ring[0] = ring;
1861 }
1862 } else {
1863 VERIFY(fakeif->iff_rx_ring[0] == NULL);
1864 fakeif->iff_rx_ring[0] = ring;
1865 }
1866 fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
1867 feth_unlock();
1868 FETH_DPRINTF("%s: %s ring init\n",
1869 fakeif->iff_name, is_tx_ring ? "TX" : "RX");
1870 return 0;
1871}
1872
1873static void
1874feth_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
1875 kern_channel_ring_t ring)
1876{
1877#pragma unused(nxprov, ring)
1878 if_fake_ref fakeif;
1879 thread_call_t tcall = NULL;
1880
1881 feth_lock();
1882 fakeif = feth_nexus_context(nexus);
1883 if (fakeif->iff_rx_ring[0] == ring) {
1884 fakeif->iff_rx_ring[0] = NULL;
1885 FETH_DPRINTF("%s: RX ring fini\n", fakeif->iff_name);
1886 } else if (feth_in_wmm_mode(fakeif)) {
1887 int i;
1888 for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
1889 if (fakeif->iff_tx_ring[i] == ring) {
1890 fakeif->iff_tx_ring[i] = NULL;
1891 break;
1892 }
1893 }
1894 for (i = 0; i < IFF_MAX_TX_RINGS; i++) {
1895 if (fakeif->iff_tx_ring[i] != NULL) {
1896 break;
1897 }
1898 }
1899 if (i == IFF_MAX_TX_RINGS) {
1900 tcall = fakeif->iff_doorbell_tcall;
1901 fakeif->iff_doorbell_tcall = NULL;
1902 }
1903 FETH_DPRINTF("%s: TX ring fini\n", fakeif->iff_name);
1904 } else if (fakeif->iff_tx_ring[0] == ring) {
1905 tcall = fakeif->iff_doorbell_tcall;
1906 fakeif->iff_doorbell_tcall = NULL;
1907 fakeif->iff_tx_ring[0] = NULL;
1908 }
1909 fakeif->iff_nifs = NULL;
1910 feth_unlock();
1911 if (tcall != NULL) {
1912 boolean_t success;
1913
1914 success = thread_call_cancel_wait(call: tcall);
1915 FETH_DPRINTF("%s: thread_call_cancel %s\n",
1916 fakeif->iff_name,
1917 success ? "SUCCESS" : "FAILURE");
1918 if (!success) {
1919 feth_lock();
1920 if (fakeif->iff_doorbell_tcall_active) {
1921 fakeif->iff_waiting_for_tcall = TRUE;
1922 FETH_DPRINTF("%s: *waiting for threadcall\n",
1923 fakeif->iff_name);
1924 do {
1925 msleep(chan: fakeif, mtx: &feth_lck_mtx,
1926 PZERO, wmesg: "feth threadcall", ts: 0);
1927 } while (fakeif->iff_doorbell_tcall_active);
1928 FETH_DPRINTF("%s: ^threadcall done\n",
1929 fakeif->iff_name);
1930 fakeif->iff_waiting_for_tcall = FALSE;
1931 }
1932 feth_unlock();
1933 }
1934 success = thread_call_free(call: tcall);
1935 FETH_DPRINTF("%s: thread_call_free %s\n",
1936 fakeif->iff_name,
1937 success ? "SUCCESS" : "FAILURE");
1938 feth_release(fakeif);
1939 VERIFY(success == TRUE);
1940 }
1941}
1942
1943static errno_t
1944feth_nx_pre_connect(kern_nexus_provider_t nxprov,
1945 proc_t proc, kern_nexus_t nexus, nexus_port_t port, kern_channel_t channel,
1946 void **channel_context)
1947{
1948#pragma unused(nxprov, proc, nexus, port, channel, channel_context)
1949 return 0;
1950}
1951
1952static errno_t
1953feth_nx_connected(kern_nexus_provider_t nxprov,
1954 kern_nexus_t nexus, kern_channel_t channel)
1955{
1956#pragma unused(nxprov, channel)
1957 int err;
1958 if_fake_ref fakeif;
1959
1960 fakeif = feth_nexus_context(nexus);
1961 feth_lock();
1962 if (feth_is_detaching(fakeif)) {
1963 feth_unlock();
1964 return EBUSY;
1965 }
1966 feth_retain(fakeif);
1967 fakeif->iff_channel_connected = TRUE;
1968 feth_unlock();
1969 if (feth_has_intf_advisory_configured(fakeif)) {
1970 err = feth_if_adv_tcall_create(fakeif);
1971 if (err != 0) {
1972 return err;
1973 }
1974 }
1975 FETH_DPRINTF("%s: connected channel %p\n",
1976 fakeif->iff_name, channel);
1977 return 0;
1978}
1979
1980static void
1981feth_nx_pre_disconnect(kern_nexus_provider_t nxprov,
1982 kern_nexus_t nexus, kern_channel_t channel)
1983{
1984#pragma unused(nxprov, channel)
1985 if_fake_ref fakeif;
1986
1987 fakeif = feth_nexus_context(nexus);
1988 FETH_DPRINTF("%s: pre-disconnect channel %p\n",
1989 fakeif->iff_name, channel);
1990 /* Quiesce the interface and flush any pending outbound packets. */
1991 if_down(fakeif->iff_ifp);
1992 feth_lock();
1993 fakeif->iff_channel_connected = FALSE;
1994 feth_unlock();
1995 if (fakeif->iff_if_adv_tcall != NULL) {
1996 feth_if_adv_tcall_destroy(fakeif);
1997 }
1998}
1999
2000static void
2001feth_nx_disconnected(kern_nexus_provider_t nxprov,
2002 kern_nexus_t nexus, kern_channel_t channel)
2003{
2004#pragma unused(nxprov, channel)
2005 if_fake_ref fakeif;
2006
2007 fakeif = feth_nexus_context(nexus);
2008 FETH_DPRINTF("%s: disconnected channel %p\n",
2009 fakeif->iff_name, channel);
2010 feth_release(fakeif);
2011}
2012
2013static errno_t
2014feth_nx_slot_init(kern_nexus_provider_t nxprov,
2015 kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2016 uint32_t slot_index, struct kern_slot_prop **slot_prop_addr,
2017 void **slot_context)
2018{
2019#pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
2020 return 0;
2021}
2022
2023static void
2024feth_nx_slot_fini(kern_nexus_provider_t nxprov,
2025 kern_nexus_t nexus, kern_channel_ring_t ring, kern_channel_slot_t slot,
2026 uint32_t slot_index)
2027{
2028#pragma unused(nxprov, nexus, ring, slot, slot_index)
2029}
2030
2031static errno_t
2032feth_nx_sync_tx(kern_nexus_provider_t nxprov,
2033 kern_nexus_t nexus, kern_channel_ring_t tx_ring, uint32_t flags)
2034{
2035#pragma unused(nxprov)
2036 if_fake_ref fakeif;
2037 ifnet_t ifp;
2038 kern_channel_slot_t last_tx_slot = NULL;
2039 ifnet_t peer_ifp;
2040 if_fake_ref peer_fakeif = NULL;
2041 struct kern_channel_ring_stat_increment stats;
2042 kern_channel_slot_t tx_slot;
2043 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2044 kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2045 uint32_t n_pkts = 0;
2046
2047 memset(s: &stats, c: 0, n: sizeof(stats));
2048
2049 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2050 fakeif = feth_nexus_context(nexus);
2051 FETH_DPRINTF("%s ring %d flags 0x%x\n", fakeif->iff_name,
2052 tx_ring->ckr_ring_id, flags);
2053 (void)flags;
2054 feth_lock();
2055 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2056 feth_unlock();
2057 return 0;
2058 }
2059 ifp = fakeif->iff_ifp;
2060 peer_ifp = fakeif->iff_peer;
2061 if (peer_ifp != NULL) {
2062 peer_fakeif = ifnet_get_if_fake(ifp: peer_ifp);
2063 if (peer_fakeif != NULL) {
2064 if (feth_is_detaching(fakeif: peer_fakeif)) {
2065 FETH_DPRINTF("%s peer fakeif %s is detaching\n",
2066 fakeif->iff_name, peer_fakeif->iff_name);
2067 goto done;
2068 }
2069 if (!peer_fakeif->iff_channel_connected) {
2070 if (fakeif->iff_tx_exp_policy ==
2071 IFF_TX_EXP_POLICY_DISABLED) {
2072 FETH_DPRINTF("%s peer fakeif %s channel not connected, expn: %d\n",
2073 fakeif->iff_name, peer_fakeif->iff_name,
2074 fakeif->iff_tx_exp_policy);
2075 goto done;
2076 }
2077 }
2078 } else {
2079 FETH_DPRINTF("%s no peer fakeif (peer %p)\n", fakeif->iff_name, peer_ifp);
2080 goto done;
2081 }
2082 } else {
2083 FETH_DPRINTF("%s no peer\n", fakeif->iff_name);
2084 goto done;
2085 }
2086 tx_slot = kern_channel_get_next_slot(kring: tx_ring, NULL, NULL);
2087 while (tx_slot != NULL) {
2088 uint16_t off;
2089 kern_packet_t sph;
2090
2091 /* detach the packet from the TX ring */
2092 sph = kern_channel_slot_get_packet(ring: tx_ring, slot: tx_slot);
2093 VERIFY(sph != 0);
2094 kern_channel_slot_detach_packet(ring: tx_ring, slot: tx_slot, packet: sph);
2095
2096 /* bpf tap output */
2097 off = kern_packet_get_headroom(sph);
2098 VERIFY(off >= fakeif->iff_tx_headroom);
2099 kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2100 feth_packet_set_trace_tag(ph: sph, IFF_TT_OUTPUT);
2101 bpf_tap_packet_out(interface: ifp, DLT_EN10MB, packet: sph, NULL, header_len: 0);
2102
2103 /* drop packets, if requested */
2104 fakeif->iff_tx_pkts_count++;
2105 if (feth_tx_expired_error(fakeif, ph: sph) ||
2106 feth_tx_complete_error(fakeif, ph: sph) ||
2107 !peer_fakeif->iff_channel_connected) {
2108 fakeif->iff_tx_pkts_count = 0;
2109 kern_pbufpool_free(pbufpool: fakeif->iff_tx_pp, sph);
2110 STATS_INC(nifs, NETIF_STATS_DROP);
2111 goto next_tx_slot;
2112 }
2113
2114 ASSERT(sph != 0);
2115 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2116 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2117
2118 stats.kcrsi_slots_transferred++;
2119 stats.kcrsi_bytes_transferred
2120 += kern_packet_get_data_length(sph);
2121
2122 /* prepare batch for receiver */
2123 pkts[n_pkts++] = sph;
2124 if (n_pkts == IFF_MAX_BATCH_SIZE) {
2125 feth_rx_submit(sif: fakeif, dif: peer_fakeif, sphs: pkts, n_pkts);
2126 feth_tx_complete(fakeif, phs: pkts, nphs: n_pkts);
2127 n_pkts = 0;
2128 }
2129
2130next_tx_slot:
2131 last_tx_slot = tx_slot;
2132 tx_slot = kern_channel_get_next_slot(kring: tx_ring, slot: tx_slot, NULL);
2133 }
2134
2135 /* catch last batch for receiver */
2136 if (n_pkts != 0) {
2137 feth_rx_submit(sif: fakeif, dif: peer_fakeif, sphs: pkts, n_pkts);
2138 feth_tx_complete(fakeif, phs: pkts, nphs: n_pkts);
2139 n_pkts = 0;
2140 }
2141
2142 if (last_tx_slot != NULL) {
2143 kern_channel_advance_slot(kring: tx_ring, slot: last_tx_slot);
2144 kern_channel_increment_ring_net_stats(ring: tx_ring, ifp, stats: &stats);
2145 }
2146done:
2147 feth_unlock();
2148 return 0;
2149}
2150
2151static errno_t
2152feth_nx_sync_rx(kern_nexus_provider_t nxprov,
2153 kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2154{
2155#pragma unused(nxprov, ring, flags)
2156 if_fake_ref fakeif;
2157 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2158
2159 STATS_INC(nifs, NETIF_STATS_RX_SYNC);
2160 fakeif = feth_nexus_context(nexus);
2161 FETH_DPRINTF("%s:\n", fakeif->iff_name);
2162 return 0;
2163}
2164
2165static errno_t
2166feth_nx_tx_dequeue_driver_managed(if_fake_ref fakeif, boolean_t doorbell_ctxt)
2167{
2168 int i;
2169 errno_t error = 0;
2170 boolean_t more;
2171
2172 for (i = 0; i < IFF_NUM_TX_RINGS_WMM_MODE; i++) {
2173 kern_channel_ring_t ring = fakeif->iff_tx_ring[i];
2174 if (ring != NULL) {
2175 error = kern_channel_tx_refill(ring, UINT32_MAX,
2176 UINT32_MAX, tx_doorbell_ctxt: doorbell_ctxt, pkts_pending: &more);
2177 }
2178 if (error != 0) {
2179 FETH_DPRINTF("%s: TX refill ring %d (%s) %d\n",
2180 fakeif->iff_name, ring->ckr_ring_id,
2181 doorbell_ctxt ? "sync" : "async", error);
2182 if (!((error == EAGAIN) || (error == EBUSY))) {
2183 break;
2184 }
2185 } else {
2186 FETH_DPRINTF("%s: TX refilled ring %d (%s)\n",
2187 fakeif->iff_name, ring->ckr_ring_id,
2188 doorbell_ctxt ? "sync" : "async");
2189 }
2190 }
2191 return error;
2192}
2193
2194static void
2195feth_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
2196{
2197#pragma unused(arg1)
2198 errno_t error;
2199 if_fake_ref fakeif = (if_fake_ref)arg0;
2200 kern_channel_ring_t ring;
2201 boolean_t more;
2202
2203 feth_lock();
2204 ring = fakeif->iff_tx_ring[0];
2205 if (feth_is_detaching(fakeif) ||
2206 !fakeif->iff_channel_connected ||
2207 ring == NULL) {
2208 goto done;
2209 }
2210 fakeif->iff_doorbell_tcall_active = TRUE;
2211 feth_unlock();
2212 if (feth_in_wmm_mode(fakeif)) {
2213 error = feth_nx_tx_dequeue_driver_managed(fakeif, FALSE);
2214 } else {
2215 error = kern_channel_tx_refill(ring, UINT32_MAX,
2216 UINT32_MAX, FALSE, pkts_pending: &more);
2217 }
2218 if (error != 0) {
2219 FETH_DPRINTF("%s: TX refill failed %d\n",
2220 fakeif->iff_name, error);
2221 } else {
2222 FETH_DPRINTF("%s: TX refilled\n", fakeif->iff_name);
2223 }
2224
2225 feth_lock();
2226done:
2227 fakeif->iff_doorbell_tcall_active = FALSE;
2228 if (fakeif->iff_waiting_for_tcall) {
2229 FETH_DPRINTF("%s: threadcall waking up waiter\n",
2230 fakeif->iff_name);
2231 wakeup(chan: (caddr_t)fakeif);
2232 }
2233 feth_unlock();
2234}
2235
2236static void
2237feth_schedule_async_doorbell(if_fake_ref fakeif)
2238{
2239 thread_call_t tcall;
2240
2241 feth_lock();
2242 if (feth_is_detaching(fakeif) || !fakeif->iff_channel_connected) {
2243 feth_unlock();
2244 return;
2245 }
2246 tcall = fakeif->iff_doorbell_tcall;
2247 if (tcall != NULL) {
2248 thread_call_enter(call: tcall);
2249 } else {
2250 tcall = thread_call_allocate_with_options(func: feth_async_doorbell,
2251 param0: (thread_call_param_t)fakeif,
2252 pri: THREAD_CALL_PRIORITY_KERNEL,
2253 options: THREAD_CALL_OPTIONS_ONCE);
2254 if (tcall == NULL) {
2255 printf("%s: %s tcall alloc failed\n",
2256 __func__, fakeif->iff_name);
2257 } else {
2258 fakeif->iff_doorbell_tcall = tcall;
2259 feth_retain(fakeif);
2260 thread_call_enter(call: tcall);
2261 }
2262 }
2263 feth_unlock();
2264}
2265
2266static errno_t
2267feth_nx_tx_doorbell(kern_nexus_provider_t nxprov,
2268 kern_nexus_t nexus, kern_channel_ring_t ring, uint32_t flags)
2269{
2270#pragma unused(nxprov, ring, flags)
2271 errno_t error;
2272 if_fake_ref fakeif;
2273
2274 fakeif = feth_nexus_context(nexus);
2275 FETH_DPRINTF("%s\n", fakeif->iff_name);
2276
2277 if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
2278 boolean_t more;
2279 /* synchronous tx refill */
2280 if (feth_in_wmm_mode(fakeif)) {
2281 error = feth_nx_tx_dequeue_driver_managed(fakeif, TRUE);
2282 } else {
2283 error = kern_channel_tx_refill(ring, UINT32_MAX,
2284 UINT32_MAX, TRUE, pkts_pending: &more);
2285 }
2286 if (error != 0) {
2287 FETH_DPRINTF("%s: TX refill (sync) %d\n",
2288 fakeif->iff_name, error);
2289 } else {
2290 FETH_DPRINTF("%s: TX refilled (sync)\n",
2291 fakeif->iff_name);
2292 }
2293 } else {
2294 FETH_DPRINTF("%s: schedule async refill\n", fakeif->iff_name);
2295 feth_schedule_async_doorbell(fakeif);
2296 }
2297 return 0;
2298}
2299
2300static errno_t
2301feth_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
2302{
2303 if_fake_ref fakeif;
2304
2305 fakeif = (if_fake_ref)kern_nexus_get_context(nexus);
2306 feth_ifnet_set_attrs(fakeif, ifp);
2307 return 0;
2308}
2309
2310static errno_t
2311feth_nx_intf_adv_config(void *prov_ctx, bool enable)
2312{
2313 if_fake_ref fakeif = prov_ctx;
2314
2315 feth_lock();
2316 fakeif->iff_intf_adv_enabled = enable;
2317 feth_unlock();
2318 FETH_DPRINTF("%s enable %d\n", fakeif->iff_name, enable);
2319 return 0;
2320}
2321
2322static errno_t
2323fill_capab_interface_advisory(if_fake_ref fakeif, void *contents, uint32_t *len)
2324{
2325 struct kern_nexus_capab_interface_advisory *capab = contents;
2326
2327 if (*len != sizeof(*capab)) {
2328 return EINVAL;
2329 }
2330 if (capab->kncia_version !=
2331 KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
2332 return EINVAL;
2333 }
2334 if (!feth_has_intf_advisory_configured(fakeif)) {
2335 return ENOTSUP;
2336 }
2337 VERIFY(capab->kncia_notify != NULL);
2338 fakeif->iff_intf_adv_kern_ctx = capab->kncia_kern_context;
2339 fakeif->iff_intf_adv_notify = capab->kncia_notify;
2340 capab->kncia_provider_context = fakeif;
2341 capab->kncia_config = feth_nx_intf_adv_config;
2342 return 0;
2343}
2344
2345static errno_t
2346feth_notify_steering_info(void *prov_ctx, void *qset_ctx,
2347 struct ifnet_traffic_descriptor_common *td, bool add)
2348{
2349#pragma unused(td)
2350 if_fake_ref fakeif = prov_ctx;
2351 fake_qset *qset = qset_ctx;
2352
2353 FETH_DPRINTF("%s: notify_steering_info: qset_id 0x%llx, %s\n",
2354 fakeif->iff_name, qset->fqs_id, add ? "add" : "remove");
2355 return 0;
2356}
2357
2358static errno_t
2359fill_capab_qset_extensions(if_fake_ref fakeif, void *contents, uint32_t *len)
2360{
2361 struct kern_nexus_capab_qset_extensions *capab = contents;
2362
2363 if (*len != sizeof(*capab)) {
2364 return EINVAL;
2365 }
2366 if (capab->cqe_version !=
2367 KERN_NEXUS_CAPAB_QSET_EXTENSIONS_VERSION_1) {
2368 return EINVAL;
2369 }
2370 capab->cqe_prov_ctx = fakeif;
2371 capab->cqe_notify_steering_info = feth_notify_steering_info;
2372 return 0;
2373}
2374
2375static errno_t
2376feth_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
2377 kern_nexus_capab_t capab, void *contents, uint32_t *len)
2378{
2379#pragma unused(nxprov)
2380 errno_t error;
2381 if_fake_ref fakeif;
2382
2383 fakeif = feth_nexus_context(nexus: nx);
2384 FETH_DPRINTF("%s\n", fakeif->iff_name);
2385
2386 switch (capab) {
2387 case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
2388 error = fill_capab_interface_advisory(fakeif, contents, len);
2389 break;
2390 case KERN_NEXUS_CAPAB_QSET_EXTENSIONS:
2391 error = fill_capab_qset_extensions(fakeif, contents, len);
2392 break;
2393 default:
2394 error = ENOTSUP;
2395 break;
2396 }
2397 return error;
2398}
2399
2400static int
2401feth_set_tso(ifnet_t ifp)
2402{
2403 ifnet_offload_t offload;
2404 uint32_t tso_v4_mtu, tso_v6_mtu;
2405 int error;
2406
2407 offload = IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2408 tso_v4_mtu = if_fake_tso_buffer_size;
2409 tso_v6_mtu = if_fake_tso_buffer_size;
2410 error = ifnet_set_offload(interface: ifp, offload);
2411 if (error != 0) {
2412 printf("%s: set TSO offload failed on %s, err %d\n", __func__,
2413 if_name(ifp), error);
2414 return error;
2415 }
2416 error = ifnet_set_tso_mtu(interface: ifp, AF_INET, mtuLen: tso_v4_mtu);
2417 if (error != 0) {
2418 printf("%s: set TSO MTU IPv4 failed on %s, err %d\n", __func__,
2419 if_name(ifp), error);
2420 return error;
2421 }
2422 error = ifnet_set_tso_mtu(interface: ifp, AF_INET6, mtuLen: tso_v6_mtu);
2423 if (error != 0) {
2424 printf("%s: set TSO MTU IPv6 failed on %s, err %d\n", __func__,
2425 if_name(ifp), error);
2426 return error;
2427 }
2428 return 0;
2429}
2430
2431static errno_t
2432create_netif_provider_and_instance(if_fake_ref fakeif,
2433 struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2434 uuid_t * provider, uuid_t * instance)
2435{
2436 errno_t err;
2437 nexus_controller_t controller = kern_nexus_shared_controller();
2438 struct kern_nexus_net_init net_init;
2439 nexus_name_t provider_name;
2440 nexus_attr_t nexus_attr = NULL;
2441 struct kern_nexus_provider_init prov_init = {
2442 .nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
2443 .nxpi_flags = NXPIF_VIRTUAL_DEVICE,
2444 .nxpi_pre_connect = feth_nx_pre_connect,
2445 .nxpi_connected = feth_nx_connected,
2446 .nxpi_pre_disconnect = feth_nx_pre_disconnect,
2447 .nxpi_disconnected = feth_nx_disconnected,
2448 .nxpi_ring_init = feth_nx_ring_init,
2449 .nxpi_ring_fini = feth_nx_ring_fini,
2450 .nxpi_slot_init = feth_nx_slot_init,
2451 .nxpi_slot_fini = feth_nx_slot_fini,
2452 .nxpi_sync_tx = feth_nx_sync_tx,
2453 .nxpi_sync_rx = feth_nx_sync_rx,
2454 .nxpi_tx_doorbell = feth_nx_tx_doorbell,
2455 .nxpi_config_capab = feth_nx_capab_config,
2456 };
2457
2458 _CASSERT(IFF_MAX_RX_RINGS == 1);
2459 err = kern_nexus_attr_create(&nexus_attr);
2460 if (err != 0) {
2461 printf("%s nexus attribute creation failed, error %d\n",
2462 __func__, err);
2463 goto failed;
2464 }
2465 if (feth_in_wmm_mode(fakeif)) {
2466 err = kern_nexus_attr_set(attr: nexus_attr, type: NEXUS_ATTR_TX_RINGS,
2467 IFF_NUM_TX_RINGS_WMM_MODE);
2468 VERIFY(err == 0);
2469 err = kern_nexus_attr_set(attr: nexus_attr, type: NEXUS_ATTR_RX_RINGS,
2470 IFF_NUM_RX_RINGS_WMM_MODE);
2471 VERIFY(err == 0);
2472 err = kern_nexus_attr_set(attr: nexus_attr, type: NEXUS_ATTR_QMAP,
2473 value: NEXUS_QMAP_TYPE_WMM);
2474 VERIFY(err == 0);
2475 }
2476
2477 err = kern_nexus_attr_set(attr: nexus_attr, type: NEXUS_ATTR_ANONYMOUS, value: 1);
2478 VERIFY(err == 0);
2479 snprintf((char *)provider_name, count: sizeof(provider_name),
2480 "com.apple.netif.%s", fakeif->iff_name);
2481 err = kern_nexus_controller_register_provider(ctl: controller,
2482 dom_prov_uuid: feth_nx_dom_prov,
2483 provider_name,
2484 init: &prov_init,
2485 init_len: sizeof(prov_init),
2486 nxa: nexus_attr,
2487 nx_prov_uuid: provider);
2488 if (err != 0) {
2489 printf("%s register provider failed, error %d\n",
2490 __func__, err);
2491 goto failed;
2492 }
2493 bzero(s: &net_init, n: sizeof(net_init));
2494 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2495 net_init.nxneti_flags = 0;
2496 net_init.nxneti_eparams = init_params;
2497 net_init.nxneti_lladdr = NULL;
2498 net_init.nxneti_prepare = feth_netif_prepare;
2499 net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2500 net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2501 err = kern_nexus_controller_alloc_net_provider_instance(ctl: controller,
2502 nx_prov_uuid: *provider,
2503 nexus_context: fakeif,
2504 NULL,
2505 nx_uuid: instance,
2506 init: &net_init,
2507 ifp);
2508 if (err != 0) {
2509 printf("%s alloc_net_provider_instance failed, %d\n",
2510 __func__, err);
2511 kern_nexus_controller_deregister_provider(ctl: controller,
2512 nx_prov_uuid: *provider);
2513 uuid_clear(uu: *provider);
2514 goto failed;
2515 }
2516 if (feth_supports_tso(fakeif)) {
2517 if ((err = feth_set_tso(ifp: *ifp)) != 0) {
2518 goto failed;
2519 }
2520 }
2521
2522failed:
2523 if (nexus_attr != NULL) {
2524 kern_nexus_attr_destroy(attr: nexus_attr);
2525 }
2526 return err;
2527}
2528
2529/*
2530 * The nif_stats need to be referenced because we don't want it set
2531 * to NULL until the last llink is removed.
2532 */
2533static void
2534get_nexus_stats(if_fake_ref fakeif, kern_nexus_t nexus)
2535{
2536 if (++fakeif->iff_nifs_ref == 1) {
2537 ASSERT(fakeif->iff_nifs == NULL);
2538 fakeif->iff_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2539 }
2540}
2541
2542static void
2543clear_nexus_stats(if_fake_ref fakeif)
2544{
2545 if (--fakeif->iff_nifs_ref == 0) {
2546 ASSERT(fakeif->iff_nifs != NULL);
2547 fakeif->iff_nifs = NULL;
2548 }
2549}
2550
2551static errno_t
2552feth_nx_qset_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2553 void *llink_ctx, uint8_t qset_idx, uint64_t qset_id, kern_netif_qset_t qset,
2554 void **qset_ctx)
2555{
2556#pragma unused(nxprov)
2557 if_fake_ref fakeif;
2558 fake_llink *fl = llink_ctx;
2559 fake_qset *fqs;
2560
2561 feth_lock();
2562 fakeif = feth_nexus_context(nexus);
2563 if (feth_is_detaching(fakeif)) {
2564 feth_unlock();
2565 printf("%s: %s: detaching\n", __func__, fakeif->iff_name);
2566 return ENXIO;
2567 }
2568 if (qset_idx >= fl->fl_qset_cnt) {
2569 feth_unlock();
2570 printf("%s: %s: invalid qset_idx %d\n", __func__,
2571 fakeif->iff_name, qset_idx);
2572 return EINVAL;
2573 }
2574 fqs = &fl->fl_qset[qset_idx];
2575 ASSERT(fqs->fqs_qset == NULL);
2576 fqs->fqs_qset = qset;
2577 fqs->fqs_id = qset_id;
2578 *qset_ctx = fqs;
2579
2580 /* XXX This should really be done during registration */
2581 get_nexus_stats(fakeif, nexus);
2582 feth_unlock();
2583 return 0;
2584}
2585
2586static void
2587feth_nx_qset_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2588 void *qset_ctx)
2589{
2590#pragma unused(nxprov)
2591 if_fake_ref fakeif;
2592 fake_qset *fqs = qset_ctx;
2593
2594 feth_lock();
2595 fakeif = feth_nexus_context(nexus);
2596 clear_nexus_stats(fakeif);
2597 ASSERT(fqs->fqs_qset != NULL);
2598 fqs->fqs_qset = NULL;
2599 fqs->fqs_id = 0;
2600 feth_unlock();
2601}
2602
2603static errno_t
2604feth_nx_queue_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2605 void *qset_ctx, uint8_t qidx, bool tx, kern_netif_queue_t queue,
2606 void **queue_ctx)
2607{
2608#pragma unused(nxprov)
2609 if_fake_ref fakeif;
2610 fake_qset *fqs = qset_ctx;
2611 fake_queue *fq;
2612
2613 feth_lock();
2614 fakeif = feth_nexus_context(nexus);
2615 if (feth_is_detaching(fakeif)) {
2616 printf("%s: %s: detaching\n", __func__, fakeif->iff_name);
2617 feth_unlock();
2618 return ENXIO;
2619 }
2620 if (tx) {
2621 if (qidx >= fqs->fqs_tx_queue_cnt) {
2622 printf("%s: %s: invalid tx qidx %d\n", __func__,
2623 fakeif->iff_name, qidx);
2624 feth_unlock();
2625 return EINVAL;
2626 }
2627 fq = &fqs->fqs_tx_queue[qidx];
2628 } else {
2629 if (qidx >= fqs->fqs_rx_queue_cnt) {
2630 printf("%s: %s: invalid rx qidx %d\n", __func__,
2631 fakeif->iff_name, qidx);
2632 feth_unlock();
2633 return EINVAL;
2634 }
2635 fq = &fqs->fqs_rx_queue[qidx];
2636 }
2637 ASSERT(fq->fq_queue == NULL);
2638 fq->fq_queue = queue;
2639 *queue_ctx = fq;
2640 feth_unlock();
2641 return 0;
2642}
2643
2644static void
2645feth_nx_queue_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2646 void *queue_ctx)
2647{
2648#pragma unused(nxprov, nexus)
2649 fake_queue *fq = queue_ctx;
2650
2651 feth_lock();
2652 ASSERT(fq->fq_queue != NULL);
2653 fq->fq_queue = NULL;
2654 feth_unlock();
2655}
2656
2657static void
2658feth_nx_tx_queue_deliver_pkt_chain(if_fake_ref fakeif, kern_packet_t sph,
2659 struct netif_stats *nifs, if_fake_ref peer_fakeif,
2660 uint32_t llink_idx, uint32_t qset_idx)
2661{
2662 kern_packet_t pkts[IFF_MAX_BATCH_SIZE];
2663 uint32_t n_pkts = 0;
2664
2665 FETH_DPRINTF("%s -> %s\n", fakeif->iff_name, peer_fakeif->iff_name);
2666
2667 while (sph != 0) {
2668 uint16_t off;
2669 kern_packet_t next;
2670
2671 next = kern_packet_get_next(sph);
2672 kern_packet_set_next(sph, 0);
2673
2674 /* bpf tap output */
2675 off = kern_packet_get_headroom(sph);
2676 VERIFY(off >= fakeif->iff_tx_headroom);
2677 kern_packet_set_link_header_length(sph, ETHER_HDR_LEN);
2678 feth_packet_set_trace_tag(ph: sph, IFF_TT_OUTPUT);
2679 bpf_tap_packet_out(interface: fakeif->iff_ifp, DLT_EN10MB, packet: sph, NULL, header_len: 0);
2680
2681 /* drop packets, if requested */
2682 fakeif->iff_tx_pkts_count++;
2683 if (feth_tx_expired_error(fakeif, ph: sph) ||
2684 feth_tx_complete_error(fakeif, ph: sph)) {
2685 fakeif->iff_tx_pkts_count = 0;
2686 kern_pbufpool_free(pbufpool: fakeif->iff_tx_pp, sph);
2687 STATS_INC(nifs, NETIF_STATS_DROP);
2688 goto next_pkt;
2689 }
2690 ASSERT(sph != 0);
2691 STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
2692 STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
2693
2694 /* prepare batch for receiver */
2695 pkts[n_pkts++] = sph;
2696 if (n_pkts == IFF_MAX_BATCH_SIZE) {
2697 feth_rx_queue_submit(sif: fakeif, dif: peer_fakeif, llink_idx,
2698 qset_idx, sphs: pkts, n_pkts);
2699 feth_tx_complete(fakeif, phs: pkts, nphs: n_pkts);
2700 n_pkts = 0;
2701 }
2702next_pkt:
2703 sph = next;
2704 }
2705 /* catch last batch for receiver */
2706 if (n_pkts != 0) {
2707 feth_rx_queue_submit(sif: fakeif, dif: peer_fakeif, llink_idx, qset_idx,
2708 sphs: pkts, n_pkts);
2709 feth_tx_complete(fakeif, phs: pkts, nphs: n_pkts);
2710 n_pkts = 0;
2711 }
2712}
2713
2714static errno_t
2715feth_nx_tx_qset_notify(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
2716 void *qset_ctx, uint32_t flags)
2717{
2718#pragma unused(nxprov)
2719 if_fake_ref fakeif;
2720 ifnet_t ifp;
2721 ifnet_t peer_ifp;
2722 if_fake_ref peer_fakeif = NULL;
2723 struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
2724 fake_qset *qset = qset_ctx;
2725 boolean_t detaching, connected;
2726 uint32_t i;
2727 errno_t err;
2728
2729 STATS_INC(nifs, NETIF_STATS_TX_SYNC);
2730 fakeif = feth_nexus_context(nexus);
2731 FETH_DPRINTF("%s qset %p, idx %d, flags 0x%x\n", fakeif->iff_name, qset,
2732 qset->fqs_idx, flags);
2733
2734 feth_lock();
2735 detaching = feth_is_detaching(fakeif);
2736 connected = fakeif->iff_channel_connected;
2737 if (detaching || !connected) {
2738 FETH_DPRINTF("%s: %s: detaching %s, channel connected %s\n",
2739 __func__, fakeif->iff_name,
2740 (detaching ? "true" : "false"),
2741 (connected ? "true" : "false"));
2742 feth_unlock();
2743 return 0;
2744 }
2745 ifp = fakeif->iff_ifp;
2746 peer_ifp = fakeif->iff_peer;
2747 if (peer_ifp != NULL) {
2748 peer_fakeif = ifnet_get_if_fake(ifp: peer_ifp);
2749 if (peer_fakeif != NULL) {
2750 detaching = feth_is_detaching(fakeif: peer_fakeif);
2751 connected = peer_fakeif->iff_channel_connected;
2752 if (detaching || !connected) {
2753 FETH_DPRINTF("%s: peer %s: detaching %s, "
2754 "channel connected %s\n",
2755 __func__, peer_fakeif->iff_name,
2756 (detaching ? "true" : "false"),
2757 (connected ? "true" : "false"));
2758 goto done;
2759 }
2760 } else {
2761 FETH_DPRINTF("%s: peer_fakeif is NULL\n", __func__);
2762 goto done;
2763 }
2764 } else {
2765 printf("%s: peer_ifp is NULL\n", __func__);
2766 goto done;
2767 }
2768
2769 if (if_fake_switch_combined_mode &&
2770 qset->fqs_dequeue_cnt >= if_fake_switch_mode_frequency) {
2771 if (qset->fqs_combined_mode) {
2772 kern_netif_set_qset_separate(qset: qset->fqs_qset);
2773 } else {
2774 kern_netif_set_qset_combined(qset: qset->fqs_qset);
2775 }
2776 qset->fqs_combined_mode = !qset->fqs_combined_mode;
2777 qset->fqs_dequeue_cnt = 0;
2778 }
2779
2780 for (i = 0; i < qset->fqs_tx_queue_cnt; i++) {
2781 kern_packet_t sph = 0;
2782 kern_netif_queue_t queue = qset->fqs_tx_queue[i].fq_queue;
2783 boolean_t more = FALSE;
2784
2785 err = kern_netif_queue_tx_dequeue(queue, UINT32_MAX, UINT32_MAX,
2786 &more, &sph);
2787 if (err != 0 && err != EAGAIN) {
2788 FETH_DPRINTF("%s queue %p dequeue failed: err "
2789 "%d\n", fakeif->iff_name, queue, err);
2790 }
2791 feth_nx_tx_queue_deliver_pkt_chain(fakeif, sph, nifs,
2792 peer_fakeif, llink_idx: qset->fqs_llink_idx, qset_idx: qset->fqs_idx);
2793 }
2794
2795done:
2796 feth_unlock();
2797 return 0;
2798}
2799
2800static void
2801fill_qset_info_and_params(if_fake_ref fakeif, fake_llink *llink_info,
2802 uint32_t qset_idx, struct kern_nexus_netif_llink_qset_init *qset_init,
2803 bool is_def, bool is_low_latency)
2804{
2805 fake_qset *qset_info = &llink_info->fl_qset[qset_idx];
2806
2807 qset_init->nlqi_flags =
2808 (is_def ? KERN_NEXUS_NET_LLINK_QSET_DEFAULT : 0) |
2809 (is_low_latency ? KERN_NEXUS_NET_LLINK_QSET_LOW_LATENCY : 0) |
2810 KERN_NEXUS_NET_LLINK_QSET_AQM;
2811
2812 if (feth_in_wmm_mode(fakeif)) {
2813 qset_init->nlqi_flags |= KERN_NEXUS_NET_LLINK_QSET_WMM_MODE;
2814 qset_init->nlqi_num_txqs = IFF_NUM_TX_QUEUES_WMM_MODE;
2815 qset_init->nlqi_num_rxqs = IFF_NUM_RX_QUEUES_WMM_MODE;
2816 } else {
2817 qset_init->nlqi_num_txqs = 1;
2818 qset_init->nlqi_num_rxqs = 1;
2819 }
2820 qset_info->fqs_tx_queue_cnt = qset_init->nlqi_num_txqs;
2821 qset_info->fqs_rx_queue_cnt = qset_init->nlqi_num_rxqs;
2822
2823 /* These are needed for locating the peer qset */
2824 qset_info->fqs_llink_idx = llink_info->fl_idx;
2825 qset_info->fqs_idx = qset_idx;
2826}
2827
2828static void
2829fill_llink_info_and_params(if_fake_ref fakeif, uint32_t llink_idx,
2830 struct kern_nexus_netif_llink_init *llink_init, uint32_t llink_id,
2831 struct kern_nexus_netif_llink_qset_init *qset_init, uint32_t qset_cnt,
2832 uint32_t flags)
2833{
2834 fake_llink *llink_info = &fakeif->iff_llink[llink_idx];
2835 uint32_t i;
2836 bool create_ll_qset = if_fake_low_latency && (llink_idx != 0);
2837
2838 for (i = 0; i < qset_cnt; i++) {
2839 fill_qset_info_and_params(fakeif, llink_info, qset_idx: i,
2840 qset_init: &qset_init[i], is_def: i == 0, is_low_latency: create_ll_qset && i == 1);
2841 }
2842 llink_info->fl_idx = llink_idx;
2843
2844 /* This doesn't have to be the same as llink_idx */
2845 llink_info->fl_id = llink_id;
2846 llink_info->fl_qset_cnt = qset_cnt;
2847
2848 llink_init->nli_link_id = llink_id;
2849 llink_init->nli_num_qsets = qset_cnt;
2850 llink_init->nli_qsets = qset_init;
2851 llink_init->nli_flags = flags;
2852 llink_init->nli_ctx = llink_info;
2853}
2854
2855static errno_t
2856create_non_default_llinks(if_fake_ref fakeif)
2857{
2858 struct kern_nexus *nx;
2859 fake_nx_t fnx = &fakeif->iff_nx;
2860 struct kern_nexus_netif_llink_init llink_init;
2861 struct kern_nexus_netif_llink_qset_init qset_init[FETH_MAX_QSETS];
2862 errno_t err;
2863 uint64_t llink_id;
2864 uint32_t i;
2865
2866 nx = nx_find(fnx->fnx_instance, FALSE);
2867 if (nx == NULL) {
2868 printf("%s: %s: nx not found\n", __func__, fakeif->iff_name);
2869 return ENXIO;
2870 }
2871 /* Default llink starts at index 0 */
2872 for (i = 1; i < if_fake_llink_cnt; i++) {
2873 llink_id = (uint64_t)i;
2874
2875 /*
2876 * The llink_init and qset_init structures are reused for
2877 * each llink creation.
2878 */
2879 fill_llink_info_and_params(fakeif, llink_idx: i, llink_init: &llink_init,
2880 llink_id, qset_init, qset_cnt: if_fake_qset_cnt, flags: 0);
2881 err = kern_nexus_netif_llink_add(nx, &llink_init);
2882 if (err != 0) {
2883 printf("%s: %s: llink add failed, error %d\n",
2884 __func__, fakeif->iff_name, err);
2885 goto fail;
2886 }
2887 fakeif->iff_llink_cnt++;
2888 }
2889 nx_release(nx);
2890 return 0;
2891
2892fail:
2893 for (i = 0; i < fakeif->iff_llink_cnt; i++) {
2894 int e;
2895
2896 e = kern_nexus_netif_llink_remove(nx, fakeif->
2897 iff_llink[i].fl_id);
2898 if (e != 0) {
2899 printf("%s: %s: llink remove failed, llink_id 0x%llx, "
2900 "error %d\n", __func__, fakeif->iff_name,
2901 fakeif->iff_llink[i].fl_id, e);
2902 }
2903 fakeif->iff_llink[i].fl_id = 0;
2904 }
2905 fakeif->iff_llink_cnt = 0;
2906 nx_release(nx);
2907 return err;
2908}
2909
2910static errno_t
2911create_netif_llink_provider_and_instance(if_fake_ref fakeif,
2912 struct ifnet_init_eparams * init_params, ifnet_t *ifp,
2913 uuid_t * provider, uuid_t * instance)
2914{
2915 errno_t err;
2916 nexus_controller_t controller = kern_nexus_shared_controller();
2917 struct kern_nexus_net_init net_init;
2918 struct kern_nexus_netif_llink_init llink_init;
2919 struct kern_nexus_netif_llink_qset_init qsets[FETH_MAX_QSETS];
2920
2921 nexus_name_t provider_name;
2922 nexus_attr_t nexus_attr = NULL;
2923 struct kern_nexus_netif_provider_init prov_init = {
2924 .nxnpi_version = KERN_NEXUS_DOMAIN_PROVIDER_NETIF,
2925 .nxnpi_flags = NXPIF_VIRTUAL_DEVICE,
2926 .nxnpi_pre_connect = feth_nx_pre_connect,
2927 .nxnpi_connected = feth_nx_connected,
2928 .nxnpi_pre_disconnect = feth_nx_pre_disconnect,
2929 .nxnpi_disconnected = feth_nx_disconnected,
2930 .nxnpi_qset_init = feth_nx_qset_init,
2931 .nxnpi_qset_fini = feth_nx_qset_fini,
2932 .nxnpi_queue_init = feth_nx_queue_init,
2933 .nxnpi_queue_fini = feth_nx_queue_fini,
2934 .nxnpi_tx_qset_notify = feth_nx_tx_qset_notify,
2935 .nxnpi_config_capab = feth_nx_capab_config,
2936 };
2937
2938 err = kern_nexus_attr_create(&nexus_attr);
2939 if (err != 0) {
2940 printf("%s nexus attribute creation failed, error %d\n",
2941 __func__, err);
2942 goto failed;
2943 }
2944
2945 err = kern_nexus_attr_set(attr: nexus_attr, type: NEXUS_ATTR_ANONYMOUS, value: 1);
2946 VERIFY(err == 0);
2947
2948 snprintf((char *)provider_name, count: sizeof(provider_name),
2949 "com.apple.netif.%s", fakeif->iff_name);
2950 err = kern_nexus_controller_register_provider(ctl: controller,
2951 dom_prov_uuid: feth_nx_dom_prov,
2952 provider_name,
2953 init: (struct kern_nexus_provider_init *)&prov_init,
2954 init_len: sizeof(prov_init),
2955 nxa: nexus_attr,
2956 nx_prov_uuid: provider);
2957 if (err != 0) {
2958 printf("%s register provider failed, error %d\n",
2959 __func__, err);
2960 goto failed;
2961 }
2962 bzero(s: &net_init, n: sizeof(net_init));
2963 net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
2964 net_init.nxneti_flags = 0;
2965 net_init.nxneti_eparams = init_params;
2966 net_init.nxneti_lladdr = NULL;
2967 net_init.nxneti_prepare = feth_netif_prepare;
2968 net_init.nxneti_rx_pbufpool = fakeif->iff_rx_pp;
2969 net_init.nxneti_tx_pbufpool = fakeif->iff_tx_pp;
2970
2971 /*
2972 * Assume llink id is same as the index for if_fake.
2973 * This is not required for other drivers.
2974 */
2975 _CASSERT(NETIF_LLINK_ID_DEFAULT == 0);
2976 fill_llink_info_and_params(fakeif, llink_idx: 0, llink_init: &llink_init,
2977 NETIF_LLINK_ID_DEFAULT, qset_init: qsets, qset_cnt: if_fake_qset_cnt,
2978 KERN_NEXUS_NET_LLINK_DEFAULT);
2979
2980 net_init.nxneti_llink = &llink_init;
2981
2982 err = kern_nexus_controller_alloc_net_provider_instance(ctl: controller,
2983 nx_prov_uuid: *provider, nexus_context: fakeif, NULL, nx_uuid: instance, init: &net_init, ifp);
2984 if (err != 0) {
2985 printf("%s alloc_net_provider_instance failed, %d\n",
2986 __func__, err);
2987 kern_nexus_controller_deregister_provider(ctl: controller,
2988 nx_prov_uuid: *provider);
2989 uuid_clear(uu: *provider);
2990 goto failed;
2991 }
2992 fakeif->iff_llink_cnt++;
2993
2994 if (if_fake_llink_cnt > 1) {
2995 err = create_non_default_llinks(fakeif);
2996 if (err != 0) {
2997 printf("%s create_non_default_llinks failed, %d\n",
2998 __func__, err);
2999 feth_detach_netif_nexus(fakeif);
3000 goto failed;
3001 }
3002 }
3003 if (feth_supports_tso(fakeif)) {
3004 if ((err = feth_set_tso(ifp: *ifp)) != 0) {
3005 goto failed;
3006 }
3007 }
3008failed:
3009 if (nexus_attr != NULL) {
3010 kern_nexus_attr_destroy(attr: nexus_attr);
3011 }
3012 return err;
3013}
3014
3015static errno_t
3016feth_attach_netif_nexus(if_fake_ref fakeif,
3017 struct ifnet_init_eparams * init_params, ifnet_t *ifp)
3018{
3019 errno_t error;
3020 fake_nx_t nx = &fakeif->iff_nx;
3021
3022 error = feth_packet_pool_make(fakeif);
3023 if (error != 0) {
3024 return error;
3025 }
3026 if (if_fake_llink_cnt == 0) {
3027 return create_netif_provider_and_instance(fakeif, init_params,
3028 ifp, provider: &nx->fnx_provider, instance: &nx->fnx_instance);
3029 } else {
3030 return create_netif_llink_provider_and_instance(fakeif,
3031 init_params, ifp, provider: &nx->fnx_provider,
3032 instance: &nx->fnx_instance);
3033 }
3034}
3035
3036static void
3037remove_non_default_llinks(if_fake_ref fakeif)
3038{
3039 struct kern_nexus *nx;
3040 fake_nx_t fnx = &fakeif->iff_nx;
3041 uint32_t i;
3042
3043 if (fakeif->iff_llink_cnt <= 1) {
3044 return;
3045 }
3046 nx = nx_find(fnx->fnx_instance, FALSE);
3047 if (nx == NULL) {
3048 printf("%s: %s: nx not found\n", __func__,
3049 fakeif->iff_name);
3050 return;
3051 }
3052 /* Default llink (at index 0) is freed separately */
3053 for (i = 1; i < fakeif->iff_llink_cnt; i++) {
3054 int err;
3055
3056 err = kern_nexus_netif_llink_remove(nx, fakeif->
3057 iff_llink[i].fl_id);
3058 if (err != 0) {
3059 printf("%s: %s: llink remove failed, llink_id 0x%llx, "
3060 "error %d\n", __func__, fakeif->iff_name,
3061 fakeif->iff_llink[i].fl_id, err);
3062 }
3063 fakeif->iff_llink[i].fl_id = 0;
3064 }
3065 fakeif->iff_llink_cnt = 0;
3066 nx_release(nx);
3067}
3068
3069static void
3070detach_provider_and_instance(uuid_t provider, uuid_t instance)
3071{
3072 nexus_controller_t controller = kern_nexus_shared_controller();
3073 errno_t err;
3074
3075 if (!uuid_is_null(uu: instance)) {
3076 err = kern_nexus_controller_free_provider_instance(ctl: controller,
3077 nx_uuid: instance);
3078 if (err != 0) {
3079 printf("%s free_provider_instance failed %d\n",
3080 __func__, err);
3081 }
3082 uuid_clear(uu: instance);
3083 }
3084 if (!uuid_is_null(uu: provider)) {
3085 err = kern_nexus_controller_deregister_provider(ctl: controller,
3086 nx_prov_uuid: provider);
3087 if (err != 0) {
3088 printf("%s deregister_provider %d\n", __func__, err);
3089 }
3090 uuid_clear(uu: provider);
3091 }
3092 return;
3093}
3094
3095static void
3096feth_detach_netif_nexus(if_fake_ref fakeif)
3097{
3098 fake_nx_t fnx = &fakeif->iff_nx;
3099
3100 remove_non_default_llinks(fakeif);
3101 detach_provider_and_instance(provider: fnx->fnx_provider, instance: fnx->fnx_instance);
3102}
3103
3104#endif /* SKYWALK */
3105
3106/**
3107** feth interface routines
3108**/
3109static void
3110feth_ifnet_set_attrs(if_fake_ref fakeif, ifnet_t ifp)
3111{
3112 uint32_t cap;
3113
3114 cap = ((fakeif->iff_flags & IFF_FLAGS_LRO) != 0) ? IFCAP_LRO : 0;
3115 if (cap != 0) {
3116 errno_t error;
3117
3118 error = ifnet_set_capabilities_supported(interface: ifp, new_caps: cap, IFCAP_VALID);
3119 if (error != 0) {
3120 printf("%s: failed to enable LRO, %d\n",
3121 ifp->if_xname, error);
3122 }
3123 }
3124 (void)ifnet_set_capabilities_enabled(interface: ifp, new_caps: cap, IFCAP_VALID);
3125 ifnet_set_addrlen(interface: ifp, ETHER_ADDR_LEN);
3126 ifnet_set_baudrate(interface: ifp, baudrate: 0);
3127 ifnet_set_mtu(interface: ifp, ETHERMTU);
3128 ifnet_set_flags(interface: ifp,
3129 IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX,
3130 mask: 0xffff);
3131 ifnet_set_hdrlen(interface: ifp, hdrlen: sizeof(struct ether_header));
3132 if ((fakeif->iff_flags & IFF_FLAGS_HWCSUM) != 0) {
3133 ifnet_set_offload(interface: ifp,
3134 offload: IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
3135 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6);
3136 } else {
3137 ifnet_set_offload(interface: ifp, offload: 0);
3138 }
3139}
3140
3141static void
3142interface_link_event(ifnet_t ifp, u_int32_t event_code)
3143{
3144 struct event {
3145 u_int32_t ifnet_family;
3146 u_int32_t unit;
3147 char if_name[IFNAMSIZ];
3148 };
3149 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3150 struct kern_event_msg *header = (struct kern_event_msg*)message;
3151 struct event *data = (struct event *)(header + 1);
3152
3153 header->total_size = sizeof(message);
3154 header->vendor_code = KEV_VENDOR_APPLE;
3155 header->kev_class = KEV_NETWORK_CLASS;
3156 header->kev_subclass = KEV_DL_SUBCLASS;
3157 header->event_code = event_code;
3158 data->ifnet_family = ifnet_family(interface: ifp);
3159 data->unit = (u_int32_t)ifnet_unit(interface: ifp);
3160 strlcpy(dst: data->if_name, src: ifnet_name(interface: ifp), IFNAMSIZ);
3161 ifnet_event(interface: ifp, event_ptr: header);
3162}
3163
3164static if_fake_ref
3165ifnet_get_if_fake(ifnet_t ifp)
3166{
3167 return (if_fake_ref)ifnet_softc(interface: ifp);
3168}
3169
3170static int
3171feth_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params)
3172{
3173 int error;
3174 if_fake_ref fakeif;
3175 struct ifnet_init_eparams feth_init;
3176 ifnet_t ifp;
3177 uint8_t mac_address[ETHER_ADDR_LEN];
3178 fake_llink *iff_llink;
3179
3180 iff_llink = kalloc_type(fake_llink, FETH_MAX_LLINKS, Z_WAITOK_ZERO);
3181 if (iff_llink == NULL) {
3182 return ENOBUFS;
3183 }
3184 fakeif = kalloc_type(struct if_fake, Z_WAITOK_ZERO_NOFAIL);
3185 fakeif->iff_llink = iff_llink;
3186 fakeif->iff_retain_count = 1;
3187#define FAKE_ETHER_NAME_LEN (sizeof(FAKE_ETHER_NAME) - 1)
3188 _CASSERT(FAKE_ETHER_NAME_LEN == 4);
3189 bcopy(FAKE_ETHER_NAME, dst: mac_address, FAKE_ETHER_NAME_LEN);
3190 mac_address[ETHER_ADDR_LEN - 2] = (unit & 0xff00) >> 8;
3191 mac_address[ETHER_ADDR_LEN - 1] = unit & 0xff;
3192 if (if_fake_bsd_mode != 0) {
3193 fakeif->iff_flags |= IFF_FLAGS_BSD_MODE;
3194 }
3195 if (if_fake_hwcsum != 0) {
3196 fakeif->iff_flags |= IFF_FLAGS_HWCSUM;
3197 }
3198 if (if_fake_lro != 0) {
3199 fakeif->iff_flags |= IFF_FLAGS_LRO;
3200 }
3201 fakeif->iff_max_mtu = get_max_mtu(bsd_mode: if_fake_bsd_mode, max_mtu: if_fake_max_mtu);
3202 fakeif->iff_fcs = if_fake_fcs;
3203 fakeif->iff_trailer_length = if_fake_trailer_length;
3204
3205 /* use the interface name as the unique id for ifp recycle */
3206 if ((unsigned int)
3207 snprintf(fakeif->iff_name, count: sizeof(fakeif->iff_name), "%s%d",
3208 ifc->ifc_name, unit) >= sizeof(fakeif->iff_name)) {
3209 feth_release(fakeif);
3210 return EINVAL;
3211 }
3212 bzero(s: &feth_init, n: sizeof(feth_init));
3213 feth_init.ver = IFNET_INIT_CURRENT_VERSION;
3214 feth_init.len = sizeof(feth_init);
3215 if (feth_in_bsd_mode(fakeif)) {
3216 if (if_fake_txstart != 0) {
3217 feth_init.start = feth_start;
3218 } else {
3219 feth_init.flags |= IFNET_INIT_LEGACY;
3220 feth_init.output = feth_output;
3221 }
3222 }
3223#if SKYWALK
3224 else {
3225 feth_init.flags |= IFNET_INIT_SKYWALK_NATIVE;
3226 /*
3227 * Currently we support WMM mode only for Skywalk native
3228 * interface.
3229 */
3230 if (if_fake_wmm_mode != 0) {
3231 fakeif->iff_flags |= IFF_FLAGS_WMM_MODE;
3232 }
3233
3234 if (if_fake_multibuflet != 0) {
3235 fakeif->iff_flags |= IFF_FLAGS_MULTIBUFLETS;
3236 }
3237
3238 if (if_fake_multibuflet != 0 &&
3239 if_fake_pktpool_mode == IFF_PP_MODE_PRIVATE_SPLIT) {
3240 printf("%s: multi-buflet not supported for split rx &"
3241 " tx pool", __func__);
3242 feth_release(fakeif);
3243 return EINVAL;
3244 }
3245
3246 fakeif->iff_pp_mode = if_fake_pktpool_mode;
3247 if (if_fake_tso_support != 0) {
3248 if (fakeif->iff_pp_mode != IFF_PP_MODE_GLOBAL) {
3249 printf("%s: TSO mode requires global packet"
3250 " pool mode\n", __func__);
3251 return EINVAL;
3252 }
3253 fakeif->iff_flags |= IFF_FLAGS_TSO_SUPPORT;
3254 }
3255
3256 fakeif->iff_tx_headroom = if_fake_tx_headroom;
3257 fakeif->iff_adv_interval = if_fake_if_adv_interval;
3258 if (fakeif->iff_adv_interval > 0) {
3259 feth_init.flags |= IFNET_INIT_IF_ADV;
3260 }
3261 fakeif->iff_tx_drop_rate = if_fake_tx_drops;
3262 fakeif->iff_tx_completion_mode = if_tx_completion_mode;
3263 fakeif->iff_tx_exp_policy = if_fake_tx_exp_policy;
3264 }
3265 feth_init.tx_headroom = fakeif->iff_tx_headroom;
3266#endif /* SKYWALK */
3267 if (if_fake_nxattach == 0) {
3268 feth_init.flags |= IFNET_INIT_NX_NOAUTO;
3269 }
3270 feth_init.uniqueid = fakeif->iff_name;
3271 feth_init.uniqueid_len = strlen(s: fakeif->iff_name);
3272 feth_init.name = ifc->ifc_name;
3273 feth_init.unit = unit;
3274 feth_init.family = IFNET_FAMILY_ETHERNET;
3275 feth_init.type = IFT_ETHER;
3276 feth_init.demux = ether_demux;
3277 feth_init.add_proto = ether_add_proto;
3278 feth_init.del_proto = ether_del_proto;
3279 feth_init.check_multi = ether_check_multi;
3280 feth_init.framer_extended = ether_frameout_extended;
3281 feth_init.softc = fakeif;
3282 feth_init.ioctl = feth_ioctl;
3283 feth_init.set_bpf_tap = NULL;
3284 feth_init.detach = feth_if_free;
3285 feth_init.broadcast_addr = etherbroadcastaddr;
3286 feth_init.broadcast_len = ETHER_ADDR_LEN;
3287 if (feth_in_bsd_mode(fakeif)) {
3288 error = ifnet_allocate_extended(init: &feth_init, interface: &ifp);
3289 if (error) {
3290 feth_release(fakeif);
3291 return error;
3292 }
3293 feth_ifnet_set_attrs(fakeif, ifp);
3294 }
3295#if SKYWALK
3296 else {
3297 if (feth_in_wmm_mode(fakeif)) {
3298 feth_init.output_sched_model =
3299 IFNET_SCHED_MODEL_DRIVER_MANAGED;
3300 }
3301 error = feth_attach_netif_nexus(fakeif, init_params: &feth_init, ifp: &ifp);
3302 if (error != 0) {
3303 feth_release(fakeif);
3304 return error;
3305 }
3306 /* take an additional reference to ensure that it doesn't go away */
3307 feth_retain(fakeif);
3308 fakeif->iff_ifp = ifp;
3309 }
3310#endif /* SKYWALK */
3311 fakeif->iff_media_count = MIN(default_media_words_count, IF_FAKE_MEDIA_LIST_MAX);
3312 bcopy(src: default_media_words, dst: fakeif->iff_media_list,
3313 n: fakeif->iff_media_count * sizeof(fakeif->iff_media_list[0]));
3314 if (feth_in_bsd_mode(fakeif)) {
3315 error = ifnet_attach(interface: ifp, NULL);
3316 if (error) {
3317 ifnet_release(interface: ifp);
3318 feth_release(fakeif);
3319 return error;
3320 }
3321 fakeif->iff_ifp = ifp;
3322 }
3323
3324 ifnet_set_lladdr(interface: ifp, lladdr: mac_address, lladdr_len: sizeof(mac_address));
3325
3326 /* attach as ethernet */
3327 bpfattach(interface: ifp, DLT_EN10MB, header_length: sizeof(struct ether_header));
3328 return 0;
3329}
3330
3331static int
3332feth_clone_destroy(ifnet_t ifp)
3333{
3334 if_fake_ref fakeif;
3335#if SKYWALK
3336 boolean_t nx_attached = FALSE;
3337#endif /* SKYWALK */
3338
3339 feth_lock();
3340 fakeif = ifnet_get_if_fake(ifp);
3341 if (fakeif == NULL || feth_is_detaching(fakeif)) {
3342 feth_unlock();
3343 return 0;
3344 }
3345 feth_set_detaching(fakeif);
3346#if SKYWALK
3347 nx_attached = !feth_in_bsd_mode(fakeif);
3348#endif /* SKYWALK */
3349 feth_unlock();
3350
3351#if SKYWALK
3352 if (nx_attached) {
3353 feth_detach_netif_nexus(fakeif);
3354 feth_release(fakeif);
3355 }
3356#endif /* SKYWALK */
3357 feth_config(ifp, NULL);
3358 ifnet_detach(interface: ifp);
3359 return 0;
3360}
3361
3362static void
3363feth_enqueue_input(ifnet_t ifp, struct mbuf * m)
3364{
3365 struct ifnet_stat_increment_param stats = {};
3366
3367 stats.packets_in = 1;
3368 stats.bytes_in = (uint32_t)mbuf_pkthdr_len(mbuf: m) + ETHER_HDR_LEN;
3369 ifnet_input(interface: ifp, first_packet: m, stats: &stats);
3370}
3371
3372static struct mbuf *
3373copy_mbuf(struct mbuf *m)
3374{
3375 struct mbuf * copy_m;
3376 uint32_t pkt_len;
3377 uint32_t offset;
3378
3379 if ((m->m_flags & M_PKTHDR) == 0) {
3380 return NULL;
3381 }
3382 pkt_len = m->m_pkthdr.len;
3383 MGETHDR(copy_m, M_DONTWAIT, MT_DATA);
3384 if (copy_m == NULL) {
3385 goto failed;
3386 }
3387 if (pkt_len > MHLEN) {
3388 if (pkt_len <= MCLBYTES) {
3389 MCLGET(copy_m, M_DONTWAIT);
3390 } else if (pkt_len <= MBIGCLBYTES) {
3391 copy_m = m_mbigget(copy_m, M_DONTWAIT);
3392 } else if (pkt_len <= M16KCLBYTES && njcl > 0) {
3393 copy_m = m_m16kget(copy_m, M_DONTWAIT);
3394 } else {
3395 printf("if_fake: copy_mbuf(): packet too large %d\n",
3396 pkt_len);
3397 goto failed;
3398 }
3399 if (copy_m == NULL || (copy_m->m_flags & M_EXT) == 0) {
3400 goto failed;
3401 }
3402 }
3403 mbuf_setlen(mbuf: copy_m, len: pkt_len);
3404 copy_m->m_pkthdr.len = pkt_len;
3405 copy_m->m_pkthdr.pkt_svc = m->m_pkthdr.pkt_svc;
3406 offset = 0;
3407 while (m != NULL && offset < pkt_len) {
3408 uint32_t frag_len;
3409
3410 frag_len = m->m_len;
3411 if (frag_len > (pkt_len - offset)) {
3412 printf("if_fake_: Large mbuf fragment %d > %d\n",
3413 frag_len, (pkt_len - offset));
3414 goto failed;
3415 }
3416 m_copydata(m, 0, frag_len, mtodo(copy_m, offset));
3417 offset += frag_len;
3418 m = m->m_next;
3419 }
3420 return copy_m;
3421
3422failed:
3423 if (copy_m != NULL) {
3424 m_freem(copy_m);
3425 }
3426 return NULL;
3427}
3428
3429static int
3430feth_add_mbuf_trailer(struct mbuf *m, void *trailer, size_t trailer_len)
3431{
3432 int ret;
3433 ASSERT(trailer_len <= FETH_TRAILER_LENGTH_MAX);
3434
3435 ret = m_append(m, trailer_len, (caddr_t)trailer);
3436 if (ret == 1) {
3437 FETH_DPRINTF("%s %zuB trailer added\n", __func__, trailer_len);
3438 return 0;
3439 }
3440 printf("%s m_append failed\n", __func__);
3441 return ENOTSUP;
3442}
3443
3444static int
3445feth_add_mbuf_fcs(struct mbuf *m)
3446{
3447 uint32_t pkt_len, offset = 0;
3448 uint32_t crc = 0;
3449 int err = 0;
3450
3451 ASSERT(sizeof(crc) == ETHER_CRC_LEN);
3452
3453 pkt_len = m->m_pkthdr.len;
3454 struct mbuf *iter = m;
3455 while (iter != NULL && offset < pkt_len) {
3456 uint32_t frag_len = iter->m_len;
3457 ASSERT(frag_len <= (pkt_len - offset));
3458 crc = crc32(crc, mtod(iter, void *), len: frag_len);
3459 offset += frag_len;
3460 iter = m->m_next;
3461 }
3462
3463 err = feth_add_mbuf_trailer(m, trailer: &crc, ETHER_CRC_LEN);
3464 if (err != 0) {
3465 return err;
3466 }
3467
3468 m->m_flags |= M_HASFCS;
3469
3470 return 0;
3471}
3472
3473static void
3474feth_output_common(ifnet_t ifp, struct mbuf * m, ifnet_t peer,
3475 iff_flags_t flags, bool fcs, void *trailer, size_t trailer_len)
3476{
3477 void * frame_header;
3478
3479 frame_header = mbuf_data(mbuf: m);
3480 if ((flags & IFF_FLAGS_HWCSUM) != 0) {
3481 m->m_pkthdr.csum_data = 0xffff;
3482 m->m_pkthdr.csum_flags =
3483 CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
3484 CSUM_IP_CHECKED | CSUM_IP_VALID;
3485 }
3486
3487 (void)ifnet_stat_increment_out(interface: ifp, packets_out: 1, bytes_out: m->m_pkthdr.len, errors_out: 0);
3488 bpf_tap_out(interface: ifp, DLT_EN10MB, packet: m, NULL, header_len: 0);
3489
3490 if (trailer != 0) {
3491 feth_add_mbuf_trailer(m, trailer, trailer_len);
3492 }
3493 if (fcs) {
3494 feth_add_mbuf_fcs(m);
3495 }
3496
3497 (void)mbuf_pkthdr_setrcvif(mbuf: m, ifp: peer);
3498 mbuf_pkthdr_setheader(mbuf: m, header: frame_header);
3499 mbuf_pkthdr_adjustlen(mbuf: m, amount: -ETHER_HDR_LEN);
3500 (void)mbuf_setdata(mbuf: m, data: (char *)mbuf_data(mbuf: m) + ETHER_HDR_LEN,
3501 len: mbuf_len(mbuf: m) - ETHER_HDR_LEN);
3502 bpf_tap_in(interface: peer, DLT_EN10MB, packet: m, header: frame_header,
3503 header_len: sizeof(struct ether_header));
3504 feth_enqueue_input(ifp: peer, m);
3505}
3506
3507static void
3508feth_start(ifnet_t ifp)
3509{
3510 struct mbuf * copy_m = NULL;
3511 if_fake_ref fakeif;
3512 iff_flags_t flags = 0;
3513 bool fcs;
3514 size_t trailer_len;
3515 ifnet_t peer = NULL;
3516 struct mbuf * m;
3517 struct mbuf * save_m;
3518
3519 feth_lock();
3520 fakeif = ifnet_get_if_fake(ifp);
3521 if (fakeif == NULL) {
3522 feth_unlock();
3523 return;
3524 }
3525
3526 if (fakeif->iff_start_busy) {
3527 feth_unlock();
3528 printf("if_fake: start is busy\n");
3529 return;
3530 }
3531
3532 peer = fakeif->iff_peer;
3533 flags = fakeif->iff_flags;
3534 fcs = fakeif->iff_fcs;
3535 trailer_len = fakeif->iff_trailer_length;
3536
3537 /* check for pending TX */
3538 m = fakeif->iff_pending_tx_packet;
3539 if (m != NULL) {
3540 if (peer != NULL) {
3541 copy_m = copy_mbuf(m);
3542 if (copy_m == NULL) {
3543 feth_unlock();
3544 return;
3545 }
3546 }
3547 fakeif->iff_pending_tx_packet = NULL;
3548 m_freem(m);
3549 m = NULL;
3550 }
3551 fakeif->iff_start_busy = TRUE;
3552 feth_unlock();
3553 save_m = NULL;
3554 for (;;) {
3555 if (copy_m != NULL) {
3556 VERIFY(peer != NULL);
3557 feth_output_common(ifp, m: copy_m, peer, flags, fcs,
3558 trailer: feth_trailer, trailer_len);
3559 copy_m = NULL;
3560 }
3561 if (ifnet_dequeue(interface: ifp, packet: &m) != 0) {
3562 break;
3563 }
3564 if (peer == NULL) {
3565 m_freem(m);
3566 } else {
3567 copy_m = copy_mbuf(m);
3568 if (copy_m == NULL) {
3569 save_m = m;
3570 break;
3571 }
3572 m_freem(m);
3573 }
3574 }
3575 peer = NULL;
3576 feth_lock();
3577 fakeif = ifnet_get_if_fake(ifp);
3578 if (fakeif != NULL) {
3579 fakeif->iff_start_busy = FALSE;
3580 if (save_m != NULL && fakeif->iff_peer != NULL) {
3581 /* save it for next time */
3582 fakeif->iff_pending_tx_packet = save_m;
3583 save_m = NULL;
3584 }
3585 }
3586 feth_unlock();
3587 if (save_m != NULL) {
3588 /* didn't save packet, so free it */
3589 m_freem(save_m);
3590 }
3591}
3592
3593static int
3594feth_output(ifnet_t ifp, struct mbuf * m)
3595{
3596 struct mbuf * copy_m;
3597 if_fake_ref fakeif;
3598 iff_flags_t flags;
3599 bool fcs;
3600 size_t trailer_len;
3601 ifnet_t peer = NULL;
3602
3603 if (m == NULL) {
3604 return 0;
3605 }
3606 copy_m = copy_mbuf(m);
3607 m_freem(m);
3608 m = NULL;
3609 if (copy_m == NULL) {
3610 /* count this as an output error */
3611 ifnet_stat_increment_out(interface: ifp, packets_out: 0, bytes_out: 0, errors_out: 1);
3612 return 0;
3613 }
3614 feth_lock();
3615 fakeif = ifnet_get_if_fake(ifp);
3616 if (fakeif != NULL) {
3617 peer = fakeif->iff_peer;
3618 flags = fakeif->iff_flags;
3619 fcs = fakeif->iff_fcs;
3620 trailer_len = fakeif->iff_trailer_length;
3621 }
3622 feth_unlock();
3623 if (peer == NULL) {
3624 m_freem(copy_m);
3625 ifnet_stat_increment_out(interface: ifp, packets_out: 0, bytes_out: 0, errors_out: 1);
3626 return 0;
3627 }
3628 feth_output_common(ifp, m: copy_m, peer, flags, fcs, trailer: feth_trailer,
3629 trailer_len);
3630 return 0;
3631}
3632
3633static int
3634feth_config(ifnet_t ifp, ifnet_t peer)
3635{
3636 int connected = FALSE;
3637 int disconnected = FALSE;
3638 int error = 0;
3639 if_fake_ref fakeif = NULL;
3640
3641 feth_lock();
3642 fakeif = ifnet_get_if_fake(ifp);
3643 if (fakeif == NULL) {
3644 error = EINVAL;
3645 goto done;
3646 }
3647 if (peer != NULL) {
3648 /* connect to peer */
3649 if_fake_ref peer_fakeif;
3650
3651 peer_fakeif = ifnet_get_if_fake(ifp: peer);
3652 if (peer_fakeif == NULL) {
3653 error = EINVAL;
3654 goto done;
3655 }
3656 if (feth_is_detaching(fakeif) ||
3657 feth_is_detaching(fakeif: peer_fakeif) ||
3658 peer_fakeif->iff_peer != NULL ||
3659 fakeif->iff_peer != NULL) {
3660 error = EBUSY;
3661 goto done;
3662 }
3663#if SKYWALK
3664 if (fakeif->iff_pp_mode !=
3665 peer_fakeif->iff_pp_mode) {
3666 error = EINVAL;
3667 goto done;
3668 }
3669#endif /* SKYWALK */
3670 fakeif->iff_peer = peer;
3671 peer_fakeif->iff_peer = ifp;
3672 connected = TRUE;
3673 } else if (fakeif->iff_peer != NULL) {
3674 /* disconnect from peer */
3675 if_fake_ref peer_fakeif;
3676
3677 peer = fakeif->iff_peer;
3678 peer_fakeif = ifnet_get_if_fake(ifp: peer);
3679 if (peer_fakeif == NULL) {
3680 /* should not happen */
3681 error = EINVAL;
3682 goto done;
3683 }
3684 fakeif->iff_peer = NULL;
3685 peer_fakeif->iff_peer = NULL;
3686 disconnected = TRUE;
3687 }
3688
3689done:
3690 feth_unlock();
3691
3692 /* generate link status event if we connect or disconnect */
3693 if (connected) {
3694 interface_link_event(ifp, KEV_DL_LINK_ON);
3695 interface_link_event(ifp: peer, KEV_DL_LINK_ON);
3696 } else if (disconnected) {
3697 interface_link_event(ifp, KEV_DL_LINK_OFF);
3698 interface_link_event(ifp: peer, KEV_DL_LINK_OFF);
3699 }
3700 return error;
3701}
3702
3703static int
3704feth_set_media(ifnet_t ifp, struct if_fake_request * iffr)
3705{
3706 if_fake_ref fakeif;
3707 int error;
3708
3709 if (iffr->iffr_media.iffm_count > IF_FAKE_MEDIA_LIST_MAX) {
3710 /* list is too long */
3711 return EINVAL;
3712 }
3713 feth_lock();
3714 fakeif = ifnet_get_if_fake(ifp);
3715 if (fakeif == NULL) {
3716 error = EINVAL;
3717 goto done;
3718 }
3719 fakeif->iff_media_count = iffr->iffr_media.iffm_count;
3720 bcopy(src: iffr->iffr_media.iffm_list, dst: fakeif->iff_media_list,
3721 n: iffr->iffr_media.iffm_count * sizeof(fakeif->iff_media_list[0]));
3722#if 0
3723 /* XXX: "auto-negotiate" active with peer? */
3724 /* generate link status event? */
3725 fakeif->iff_media_current = iffr->iffr_media.iffm_current;
3726#endif
3727 error = 0;
3728done:
3729 feth_unlock();
3730 return error;
3731}
3732
3733static int
3734if_fake_request_copyin(user_addr_t user_addr,
3735 struct if_fake_request *iffr, u_int32_t len)
3736{
3737 int error;
3738
3739 if (user_addr == USER_ADDR_NULL || len < sizeof(*iffr)) {
3740 error = EINVAL;
3741 goto done;
3742 }
3743 error = copyin(user_addr, iffr, sizeof(*iffr));
3744 if (error != 0) {
3745 goto done;
3746 }
3747 if (iffr->iffr_reserved[0] != 0 || iffr->iffr_reserved[1] != 0 ||
3748 iffr->iffr_reserved[2] != 0 || iffr->iffr_reserved[3] != 0) {
3749 error = EINVAL;
3750 goto done;
3751 }
3752done:
3753 return error;
3754}
3755
3756static int
3757feth_set_drvspec(ifnet_t ifp, uint32_t cmd, u_int32_t len,
3758 user_addr_t user_addr)
3759{
3760 int error;
3761 struct if_fake_request iffr;
3762 ifnet_t peer;
3763
3764 switch (cmd) {
3765 case IF_FAKE_S_CMD_SET_PEER:
3766 error = if_fake_request_copyin(user_addr, iffr: &iffr, len);
3767 if (error != 0) {
3768 break;
3769 }
3770 if (iffr.iffr_peer_name[0] == '\0') {
3771 error = feth_config(ifp, NULL);
3772 break;
3773 }
3774
3775 /* ensure nul termination */
3776 iffr.iffr_peer_name[IFNAMSIZ - 1] = '\0';
3777 peer = ifunit(iffr.iffr_peer_name);
3778 if (peer == NULL) {
3779 error = ENXIO;
3780 break;
3781 }
3782 if (ifnet_type(interface: peer) != IFT_ETHER) {
3783 error = EINVAL;
3784 break;
3785 }
3786 if (strcmp(s1: ifnet_name(interface: peer), FAKE_ETHER_NAME) != 0) {
3787 error = EINVAL;
3788 break;
3789 }
3790 error = feth_config(ifp, peer);
3791 break;
3792 case IF_FAKE_S_CMD_SET_MEDIA:
3793 error = if_fake_request_copyin(user_addr, iffr: &iffr, len);
3794 if (error != 0) {
3795 break;
3796 }
3797 error = feth_set_media(ifp, iffr: &iffr);
3798 break;
3799 case IF_FAKE_S_CMD_SET_DEQUEUE_STALL:
3800 error = if_fake_request_copyin(user_addr, iffr: &iffr, len);
3801 if (error != 0) {
3802 break;
3803 }
3804 error = feth_enable_dequeue_stall(ifp,
3805 enable: iffr.iffr_dequeue_stall);
3806 break;
3807 default:
3808 error = EOPNOTSUPP;
3809 break;
3810 }
3811 return error;
3812}
3813
3814static int
3815feth_get_drvspec(ifnet_t ifp, u_int32_t cmd, u_int32_t len,
3816 user_addr_t user_addr)
3817{
3818 int error = EOPNOTSUPP;
3819 if_fake_ref fakeif;
3820 struct if_fake_request iffr;
3821 ifnet_t peer;
3822
3823 switch (cmd) {
3824 case IF_FAKE_G_CMD_GET_PEER:
3825 if (len < sizeof(iffr)) {
3826 error = EINVAL;
3827 break;
3828 }
3829 feth_lock();
3830 fakeif = ifnet_get_if_fake(ifp);
3831 if (fakeif == NULL) {
3832 feth_unlock();
3833 error = EOPNOTSUPP;
3834 break;
3835 }
3836 peer = fakeif->iff_peer;
3837 feth_unlock();
3838 bzero(s: &iffr, n: sizeof(iffr));
3839 if (peer != NULL) {
3840 strlcpy(dst: iffr.iffr_peer_name,
3841 if_name(peer),
3842 n: sizeof(iffr.iffr_peer_name));
3843 }
3844 error = copyout(&iffr, user_addr, sizeof(iffr));
3845 break;
3846 default:
3847 break;
3848 }
3849 return error;
3850}
3851
3852union ifdrvu {
3853 struct ifdrv32 *ifdrvu_32;
3854 struct ifdrv64 *ifdrvu_64;
3855 void *ifdrvu_p;
3856};
3857
3858static int
3859feth_ioctl(ifnet_t ifp, u_long cmd, void * data)
3860{
3861 unsigned int count;
3862 struct ifdevmtu * devmtu_p;
3863 union ifdrvu drv;
3864 uint32_t drv_cmd;
3865 uint32_t drv_len;
3866 boolean_t drv_set_command = FALSE;
3867 int error = 0;
3868 struct ifmediareq * ifmr;
3869 struct ifreq * ifr;
3870 if_fake_ref fakeif;
3871 int status;
3872 user_addr_t user_addr;
3873
3874 ifr = (struct ifreq *)data;
3875 switch (cmd) {
3876 case SIOCSIFADDR:
3877 ifnet_set_flags(interface: ifp, IFF_UP, IFF_UP);
3878 break;
3879
3880 case SIOCGIFMEDIA32:
3881 case SIOCGIFMEDIA64:
3882 feth_lock();
3883 fakeif = ifnet_get_if_fake(ifp);
3884 if (fakeif == NULL) {
3885 feth_unlock();
3886 return EOPNOTSUPP;
3887 }
3888 status = (fakeif->iff_peer != NULL)
3889 ? (IFM_AVALID | IFM_ACTIVE) : IFM_AVALID;
3890 ifmr = (struct ifmediareq *)data;
3891 user_addr = (cmd == SIOCGIFMEDIA64) ?
3892 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
3893 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
3894 count = ifmr->ifm_count;
3895 ifmr->ifm_active = IFM_ETHER;
3896 ifmr->ifm_current = IFM_ETHER;
3897 ifmr->ifm_mask = 0;
3898 ifmr->ifm_status = status;
3899 if (user_addr == USER_ADDR_NULL) {
3900 ifmr->ifm_count = fakeif->iff_media_count;
3901 } else if (count > 0) {
3902 if (count > fakeif->iff_media_count) {
3903 count = fakeif->iff_media_count;
3904 }
3905 ifmr->ifm_count = count;
3906 error = copyout(&fakeif->iff_media_list, user_addr,
3907 count * sizeof(int));
3908 }
3909 feth_unlock();
3910 break;
3911
3912 case SIOCGIFDEVMTU:
3913 devmtu_p = &ifr->ifr_devmtu;
3914 devmtu_p->ifdm_current = ifnet_mtu(interface: ifp);
3915 devmtu_p->ifdm_max = feth_max_mtu(ifp);
3916 devmtu_p->ifdm_min = IF_MINMTU;
3917 break;
3918
3919 case SIOCSIFMTU:
3920 if ((unsigned int)ifr->ifr_mtu > feth_max_mtu(ifp) ||
3921 ifr->ifr_mtu < IF_MINMTU) {
3922 error = EINVAL;
3923 } else {
3924 error = ifnet_set_mtu(interface: ifp, mtu: ifr->ifr_mtu);
3925 }
3926 break;
3927
3928 case SIOCSDRVSPEC32:
3929 case SIOCSDRVSPEC64:
3930 error = proc_suser(p: current_proc());
3931 if (error != 0) {
3932 break;
3933 }
3934 drv_set_command = TRUE;
3935 OS_FALLTHROUGH;
3936 case SIOCGDRVSPEC32:
3937 case SIOCGDRVSPEC64:
3938 drv.ifdrvu_p = data;
3939 if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
3940 drv_cmd = drv.ifdrvu_32->ifd_cmd;
3941 drv_len = drv.ifdrvu_32->ifd_len;
3942 user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
3943 } else {
3944 drv_cmd = drv.ifdrvu_64->ifd_cmd;
3945 drv_len = drv.ifdrvu_64->ifd_len;
3946 user_addr = drv.ifdrvu_64->ifd_data;
3947 }
3948 if (drv_set_command) {
3949 error = feth_set_drvspec(ifp, cmd: drv_cmd, len: drv_len,
3950 user_addr);
3951 } else {
3952 error = feth_get_drvspec(ifp, cmd: drv_cmd, len: drv_len,
3953 user_addr);
3954 }
3955 break;
3956
3957 case SIOCSIFLLADDR:
3958 error = ifnet_set_lladdr(interface: ifp, lladdr: ifr->ifr_addr.sa_data,
3959 lladdr_len: ifr->ifr_addr.sa_len);
3960 break;
3961
3962 case SIOCSIFFLAGS:
3963 if ((ifp->if_flags & IFF_UP) != 0) {
3964 /* marked up, set running if not already set */
3965 if ((ifp->if_flags & IFF_RUNNING) == 0) {
3966 /* set running */
3967 error = ifnet_set_flags(interface: ifp, IFF_RUNNING,
3968 IFF_RUNNING);
3969 }
3970 } else if ((ifp->if_flags & IFF_RUNNING) != 0) {
3971 /* marked down, clear running */
3972 error = ifnet_set_flags(interface: ifp, new_flags: 0, IFF_RUNNING);
3973 }
3974 break;
3975
3976 case SIOCADDMULTI:
3977 case SIOCDELMULTI:
3978 error = 0;
3979 break;
3980 case SIOCSIFCAP: {
3981 uint32_t cap;
3982
3983 feth_lock();
3984 fakeif = ifnet_get_if_fake(ifp);
3985 if (fakeif == NULL ||
3986 (fakeif->iff_flags & IFF_FLAGS_LRO) == 0) {
3987 feth_unlock();
3988 return EOPNOTSUPP;
3989 }
3990 feth_unlock();
3991 cap = (ifr->ifr_reqcap & IFCAP_LRO) != 0 ? IFCAP_LRO : 0;
3992 error = ifnet_set_capabilities_enabled(interface: ifp, new_caps: cap, IFCAP_LRO);
3993 break;
3994 }
3995 default:
3996 error = EOPNOTSUPP;
3997 break;
3998 }
3999 return error;
4000}
4001
4002static void
4003feth_if_free(ifnet_t ifp)
4004{
4005 if_fake_ref fakeif;
4006
4007 if (ifp == NULL) {
4008 return;
4009 }
4010 feth_lock();
4011 fakeif = ifnet_get_if_fake(ifp);
4012 if (fakeif == NULL) {
4013 feth_unlock();
4014 return;
4015 }
4016 ifp->if_softc = NULL;
4017#if SKYWALK
4018 VERIFY(fakeif->iff_doorbell_tcall == NULL);
4019#endif /* SKYWALK */
4020 feth_unlock();
4021 feth_release(fakeif);
4022 ifnet_release(interface: ifp);
4023 return;
4024}
4025
4026__private_extern__ void
4027if_fake_init(void)
4028{
4029 int error;
4030
4031#if SKYWALK
4032 (void)feth_register_nexus_domain_provider();
4033#endif /* SKYWALK */
4034 error = if_clone_attach(&feth_cloner);
4035 if (error != 0) {
4036 return;
4037 }
4038 return;
4039}
4040