1/*
2 * Copyright (c) 1999-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
30 * support for mandatory and extensible security protections. This notice
31 * is included in support of clause 2.2 (b) of the Apple Public License,
32 * Version 2.0.
33 */
34#include <stddef.h>
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/malloc.h>
40#include <sys/mbuf.h>
41#include <sys/socket.h>
42#include <sys/domain.h>
43#include <sys/user.h>
44#include <sys/random.h>
45#include <sys/socketvar.h>
46#include <net/if_dl.h>
47#include <net/if.h>
48#include <net/route.h>
49#include <net/if_var.h>
50#include <net/dlil.h>
51#include <net/if_arp.h>
52#include <net/iptap.h>
53#include <net/pktap.h>
54#include <sys/kern_event.h>
55#include <sys/kdebug.h>
56#include <sys/mcache.h>
57#include <sys/syslog.h>
58#include <sys/protosw.h>
59#include <sys/priv.h>
60
61#include <kern/assert.h>
62#include <kern/task.h>
63#include <kern/thread.h>
64#include <kern/sched_prim.h>
65#include <kern/locks.h>
66#include <kern/zalloc.h>
67
68#include <net/kpi_protocol.h>
69#include <net/if_types.h>
70#include <net/if_ipsec.h>
71#include <net/if_llreach.h>
72#include <net/if_utun.h>
73#include <net/kpi_interfacefilter.h>
74#include <net/classq/classq.h>
75#include <net/classq/classq_sfb.h>
76#include <net/flowhash.h>
77#include <net/ntstat.h>
78#include <net/if_llatbl.h>
79#include <net/net_api_stats.h>
80#include <net/if_ports_used.h>
81#include <netinet/in.h>
82#if INET
83#include <netinet/in_var.h>
84#include <netinet/igmp_var.h>
85#include <netinet/ip_var.h>
86#include <netinet/tcp.h>
87#include <netinet/tcp_var.h>
88#include <netinet/udp.h>
89#include <netinet/udp_var.h>
90#include <netinet/if_ether.h>
91#include <netinet/in_pcb.h>
92#include <netinet/in_tclass.h>
93#include <netinet/ip.h>
94#include <netinet/ip_icmp.h>
95#include <netinet/icmp_var.h>
96#endif /* INET */
97
98#if INET6
99#include <net/nat464_utils.h>
100#include <netinet6/in6_var.h>
101#include <netinet6/nd6.h>
102#include <netinet6/mld6_var.h>
103#include <netinet6/scope6_var.h>
104#include <netinet/ip6.h>
105#include <netinet/icmp6.h>
106#endif /* INET6 */
107#include <net/pf_pbuf.h>
108#include <libkern/OSAtomic.h>
109#include <libkern/tree.h>
110
111#include <dev/random/randomdev.h>
112#include <machine/machine_routines.h>
113
114#include <mach/thread_act.h>
115#include <mach/sdt.h>
116
117#if CONFIG_MACF
118#include <sys/kauth.h>
119#include <security/mac_framework.h>
120#include <net/ethernet.h>
121#include <net/firewire.h>
122#endif
123
124#if PF
125#include <net/pfvar.h>
126#endif /* PF */
127#include <net/pktsched/pktsched.h>
128
129#if NECP
130#include <net/necp.h>
131#endif /* NECP */
132
133
134#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0)
135#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2)
136#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8))
137#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8))
138#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8))
139
140#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */
141#define MAX_LINKADDR 4 /* LONGWORDS */
142#define M_NKE M_IFADDR
143
144#if 1
145#define DLIL_PRINTF printf
146#else
147#define DLIL_PRINTF kprintf
148#endif
149
150#define IF_DATA_REQUIRE_ALIGNED_64(f) \
151 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t)))
152
153#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \
154 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t)))
155
156enum {
157 kProtoKPI_v1 = 1,
158 kProtoKPI_v2 = 2
159};
160
161/*
162 * List of if_proto structures in if_proto_hash[] is protected by
163 * the ifnet lock. The rest of the fields are initialized at protocol
164 * attach time and never change, thus no lock required as long as
165 * a reference to it is valid, via if_proto_ref().
166 */
167struct if_proto {
168 SLIST_ENTRY(if_proto) next_hash;
169 u_int32_t refcount;
170 u_int32_t detached;
171 struct ifnet *ifp;
172 protocol_family_t protocol_family;
173 int proto_kpi;
174 union {
175 struct {
176 proto_media_input input;
177 proto_media_preout pre_output;
178 proto_media_event event;
179 proto_media_ioctl ioctl;
180 proto_media_detached detached;
181 proto_media_resolve_multi resolve_multi;
182 proto_media_send_arp send_arp;
183 } v1;
184 struct {
185 proto_media_input_v2 input;
186 proto_media_preout pre_output;
187 proto_media_event event;
188 proto_media_ioctl ioctl;
189 proto_media_detached detached;
190 proto_media_resolve_multi resolve_multi;
191 proto_media_send_arp send_arp;
192 } v2;
193 } kpi;
194};
195
196SLIST_HEAD(proto_hash_entry, if_proto);
197
198#define DLIL_SDLDATALEN \
199 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0]))
200
201struct dlil_ifnet {
202 struct ifnet dl_if; /* public ifnet */
203 /*
204 * DLIL private fields, protected by dl_if_lock
205 */
206 decl_lck_mtx_data(, dl_if_lock);
207 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */
208 u_int32_t dl_if_flags; /* flags (below) */
209 u_int32_t dl_if_refcnt; /* refcnt */
210 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */
211 void *dl_if_uniqueid; /* unique interface id */
212 size_t dl_if_uniqueid_len; /* length of the unique id */
213 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */
214 char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */
215 struct {
216 struct ifaddr ifa; /* lladdr ifa */
217 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */
218 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */
219 } dl_if_lladdr;
220 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */
221 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */
222 ctrace_t dl_if_attach; /* attach PC stacktrace */
223 ctrace_t dl_if_detach; /* detach PC stacktrace */
224};
225
226/* Values for dl_if_flags (private to DLIL) */
227#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */
228#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */
229#define DLIF_DEBUG 0x4 /* has debugging info */
230
231#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */
232
233/* For gdb */
234__private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE;
235
236struct dlil_ifnet_dbg {
237 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */
238 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */
239 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */
240 /*
241 * Circular lists of ifnet_{reference,release} callers.
242 */
243 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE];
244 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE];
245};
246
247#define DLIL_TO_IFP(s) (&s->dl_if)
248#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s)
249
250struct ifnet_filter {
251 TAILQ_ENTRY(ifnet_filter) filt_next;
252 u_int32_t filt_skip;
253 u_int32_t filt_flags;
254 ifnet_t filt_ifp;
255 const char *filt_name;
256 void *filt_cookie;
257 protocol_family_t filt_protocol;
258 iff_input_func filt_input;
259 iff_output_func filt_output;
260 iff_event_func filt_event;
261 iff_ioctl_func filt_ioctl;
262 iff_detached_func filt_detached;
263};
264
265struct proto_input_entry;
266
267static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head;
268static lck_grp_t *dlil_lock_group;
269lck_grp_t *ifnet_lock_group;
270static lck_grp_t *ifnet_head_lock_group;
271static lck_grp_t *ifnet_snd_lock_group;
272static lck_grp_t *ifnet_rcv_lock_group;
273lck_attr_t *ifnet_lock_attr;
274decl_lck_rw_data(static, ifnet_head_lock);
275decl_lck_mtx_data(static, dlil_ifnet_lock);
276u_int32_t dlil_filter_disable_tso_count = 0;
277
278#if DEBUG
279static unsigned int ifnet_debug = 1; /* debugging (enabled) */
280#else
281static unsigned int ifnet_debug; /* debugging (disabled) */
282#endif /* !DEBUG */
283static unsigned int dlif_size; /* size of dlil_ifnet to allocate */
284static unsigned int dlif_bufsize; /* size of dlif_size + headroom */
285static struct zone *dlif_zone; /* zone for dlil_ifnet */
286
287#define DLIF_ZONE_MAX IFNETS_MAX /* maximum elements in zone */
288#define DLIF_ZONE_NAME "ifnet" /* zone name */
289
290static unsigned int dlif_filt_size; /* size of ifnet_filter */
291static struct zone *dlif_filt_zone; /* zone for ifnet_filter */
292
293#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */
294#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */
295
296static unsigned int dlif_phash_size; /* size of ifnet proto hash table */
297static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */
298
299#define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */
300#define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */
301
302static unsigned int dlif_proto_size; /* size of if_proto */
303static struct zone *dlif_proto_zone; /* zone for if_proto */
304
305#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */
306#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */
307
308static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */
309static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */
310static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */
311
312#define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */
313#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */
314
315static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */
316static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */
317static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */
318
319#define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */
320#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */
321
322static u_int32_t net_rtref;
323
324static struct dlil_main_threading_info dlil_main_input_thread_info;
325__private_extern__ struct dlil_threading_info *dlil_main_input_thread =
326 (struct dlil_threading_info *)&dlil_main_input_thread_info;
327
328static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation);
329static int dlil_detach_filter_internal(interface_filter_t filter, int detached);
330static void dlil_if_trace(struct dlil_ifnet *, int);
331static void if_proto_ref(struct if_proto *);
332static void if_proto_free(struct if_proto *);
333static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t);
334static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
335 u_int32_t list_count);
336static void if_flt_monitor_busy(struct ifnet *);
337static void if_flt_monitor_unbusy(struct ifnet *);
338static void if_flt_monitor_enter(struct ifnet *);
339static void if_flt_monitor_leave(struct ifnet *);
340static int dlil_interface_filters_input(struct ifnet *, struct mbuf **,
341 char **, protocol_family_t);
342static int dlil_interface_filters_output(struct ifnet *, struct mbuf **,
343 protocol_family_t);
344static struct ifaddr *dlil_alloc_lladdr(struct ifnet *,
345 const struct sockaddr_dl *);
346static int ifnet_lookup(struct ifnet *);
347static void if_purgeaddrs(struct ifnet *);
348
349static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t,
350 struct mbuf *, char *);
351static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t,
352 struct mbuf *);
353static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t,
354 mbuf_t *, const struct sockaddr *, void *, char *, char *);
355static void ifproto_media_event(struct ifnet *, protocol_family_t,
356 const struct kev_msg *);
357static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t,
358 unsigned long, void *);
359static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *,
360 struct sockaddr_dl *, size_t);
361static errno_t ifproto_media_send_arp(struct ifnet *, u_short,
362 const struct sockaddr_dl *, const struct sockaddr *,
363 const struct sockaddr_dl *, const struct sockaddr *);
364
365static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
366 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
367 boolean_t poll, struct thread *tp);
368static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t,
369 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *);
370static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *);
371static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *,
372 protocol_family_t *);
373static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t,
374 const struct ifnet_demux_desc *, u_int32_t);
375static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t);
376static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *);
377#if CONFIG_EMBEDDED
378static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
379 const struct sockaddr *, const char *, const char *,
380 u_int32_t *, u_int32_t *);
381#else
382static errno_t ifp_if_framer(struct ifnet *, struct mbuf **,
383 const struct sockaddr *, const char *, const char *);
384#endif /* CONFIG_EMBEDDED */
385static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **,
386 const struct sockaddr *, const char *, const char *,
387 u_int32_t *, u_int32_t *);
388static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func);
389static void ifp_if_free(struct ifnet *);
390static void ifp_if_event(struct ifnet *, const struct kev_msg *);
391static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *);
392static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *);
393
394static void dlil_main_input_thread_func(void *, wait_result_t);
395static void dlil_input_thread_func(void *, wait_result_t);
396static void dlil_rxpoll_input_thread_func(void *, wait_result_t);
397static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *);
398static void dlil_terminate_input_thread(struct dlil_threading_info *);
399static void dlil_input_stats_add(const struct ifnet_stat_increment_param *,
400 struct dlil_threading_info *, boolean_t);
401static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *);
402static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *,
403 u_int32_t, ifnet_model_t, boolean_t);
404static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *,
405 const struct ifnet_stat_increment_param *, boolean_t, boolean_t);
406static int dlil_is_clat_needed(protocol_family_t , mbuf_t );
407static errno_t dlil_clat46(ifnet_t, protocol_family_t *, mbuf_t *);
408static errno_t dlil_clat64(ifnet_t, protocol_family_t *, mbuf_t *);
409#if DEBUG || DEVELOPMENT
410static void dlil_verify_sum16(void);
411#endif /* DEBUG || DEVELOPMENT */
412static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t,
413 protocol_family_t);
414static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *,
415 protocol_family_t);
416
417static void ifnet_detacher_thread_func(void *, wait_result_t);
418static int ifnet_detacher_thread_cont(int);
419static void ifnet_detach_final(struct ifnet *);
420static void ifnet_detaching_enqueue(struct ifnet *);
421static struct ifnet *ifnet_detaching_dequeue(void);
422
423static void ifnet_start_thread_fn(void *, wait_result_t);
424static void ifnet_poll_thread_fn(void *, wait_result_t);
425static void ifnet_poll(struct ifnet *);
426static errno_t ifnet_enqueue_common(struct ifnet *, void *,
427 classq_pkt_type_t, boolean_t, boolean_t *);
428
429static void ifp_src_route_copyout(struct ifnet *, struct route *);
430static void ifp_src_route_copyin(struct ifnet *, struct route *);
431#if INET6
432static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *);
433static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *);
434#endif /* INET6 */
435
436static int sysctl_rxpoll SYSCTL_HANDLER_ARGS;
437static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS;
438static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS;
439static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS;
440static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS;
441static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS;
442static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS;
443static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS;
444static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS;
445static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS;
446static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS;
447
448struct chain_len_stats tx_chain_len_stats;
449static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS;
450
451#if TEST_INPUT_THREAD_TERMINATION
452static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS;
453#endif /* TEST_INPUT_THREAD_TERMINATION */
454
455/* The following are protected by dlil_ifnet_lock */
456static TAILQ_HEAD(, ifnet) ifnet_detaching_head;
457static u_int32_t ifnet_detaching_cnt;
458static void *ifnet_delayed_run; /* wait channel for detaching thread */
459
460decl_lck_mtx_data(static, ifnet_fc_lock);
461
462static uint32_t ifnet_flowhash_seed;
463
464struct ifnet_flowhash_key {
465 char ifk_name[IFNAMSIZ];
466 uint32_t ifk_unit;
467 uint32_t ifk_flags;
468 uint32_t ifk_eflags;
469 uint32_t ifk_capabilities;
470 uint32_t ifk_capenable;
471 uint32_t ifk_output_sched_model;
472 uint32_t ifk_rand1;
473 uint32_t ifk_rand2;
474};
475
476/* Flow control entry per interface */
477struct ifnet_fc_entry {
478 RB_ENTRY(ifnet_fc_entry) ifce_entry;
479 u_int32_t ifce_flowhash;
480 struct ifnet *ifce_ifp;
481};
482
483static uint32_t ifnet_calc_flowhash(struct ifnet *);
484static int ifce_cmp(const struct ifnet_fc_entry *,
485 const struct ifnet_fc_entry *);
486static int ifnet_fc_add(struct ifnet *);
487static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t);
488static void ifnet_fc_entry_free(struct ifnet_fc_entry *);
489
490/* protected by ifnet_fc_lock */
491RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree;
492RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
493RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp);
494
495static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */
496static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */
497
498#define IFNET_FC_ZONE_NAME "ifnet_fc_zone"
499#define IFNET_FC_ZONE_MAX 32
500
501extern void bpfdetach(struct ifnet *);
502extern void proto_input_run(void);
503
504extern uint32_t udp_count_opportunistic(unsigned int ifindex,
505 u_int32_t flags);
506extern uint32_t tcp_count_opportunistic(unsigned int ifindex,
507 u_int32_t flags);
508
509__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
510
511#if CONFIG_MACF
512#ifdef CONFIG_EMBEDDED
513int dlil_lladdr_ckreq = 1;
514#else
515int dlil_lladdr_ckreq = 0;
516#endif
517#endif
518
519#if DEBUG
520int dlil_verbose = 1;
521#else
522int dlil_verbose = 0;
523#endif /* DEBUG */
524#if IFNET_INPUT_SANITY_CHK
525/* sanity checking of input packet lists received */
526static u_int32_t dlil_input_sanity_check = 0;
527#endif /* IFNET_INPUT_SANITY_CHK */
528/* rate limit debug messages */
529struct timespec dlil_dbgrate = { 1, 0 };
530
531SYSCTL_DECL(_net_link_generic_system);
532
533SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose,
534 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages");
535
536#define IF_SNDQ_MINLEN 32
537u_int32_t if_sndq_maxlen = IFQ_MAXLEN;
538SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen,
539 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN,
540 sysctl_sndq_maxlen, "I", "Default transmit queue max length");
541
542#define IF_RCVQ_MINLEN 32
543#define IF_RCVQ_MAXLEN 256
544u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN;
545SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen,
546 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN,
547 sysctl_rcvq_maxlen, "I", "Default receive queue max length");
548
549#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */
550static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY;
551SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay,
552 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY,
553 "ilog2 of EWMA decay rate of avg inbound packets");
554
555#define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */
556#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */
557static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME;
558SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time,
559 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime,
560 IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime,
561 "Q", "input poll mode freeze time");
562
563#define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */
564#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */
565static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME;
566SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time,
567 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime,
568 IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime,
569 "Q", "input poll sampling time");
570
571#define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
572#define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
573static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME;
574SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time,
575 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time,
576 IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time,
577 "Q", "input poll interval (time)");
578
579#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */
580static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS;
581SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts,
582 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts,
583 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)");
584
585#define IF_RXPOLL_WLOWAT 10
586static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT;
587SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat,
588 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat,
589 IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat,
590 "I", "input poll wakeup low watermark");
591
592#define IF_RXPOLL_WHIWAT 100
593static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT;
594SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat,
595 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat,
596 IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat,
597 "I", "input poll wakeup high watermark");
598
599static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */
600SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max,
601 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0,
602 "max packets per poll call");
603
604static u_int32_t if_rxpoll = 1;
605SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll,
606 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0,
607 sysctl_rxpoll, "I", "enable opportunistic input polling");
608
609#if TEST_INPUT_THREAD_TERMINATION
610static u_int32_t if_input_thread_termination_spin = 0;
611SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin,
612 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
613 &if_input_thread_termination_spin, 0,
614 sysctl_input_thread_termination_spin,
615 "I", "input thread termination spin limit");
616#endif /* TEST_INPUT_THREAD_TERMINATION */
617
618static u_int32_t cur_dlil_input_threads = 0;
619SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads,
620 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0,
621 "Current number of DLIL input threads");
622
623#if IFNET_INPUT_SANITY_CHK
624SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check,
625 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0,
626 "Turn on sanity checking in DLIL input");
627#endif /* IFNET_INPUT_SANITY_CHK */
628
629static u_int32_t if_flowadv = 1;
630SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory,
631 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1,
632 "enable flow-advisory mechanism");
633
634static u_int32_t if_delaybased_queue = 1;
635SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue,
636 CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1,
637 "enable delay based dynamic queue sizing");
638
639static uint64_t hwcksum_in_invalidated = 0;
640SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
641 hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED,
642 &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum");
643
644uint32_t hwcksum_dbg = 0;
645SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg,
646 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0,
647 "enable hardware cksum debugging");
648
649u_int32_t ifnet_start_delayed = 0;
650SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed,
651 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0,
652 "number of times start was delayed");
653
654u_int32_t ifnet_delay_start_disabled = 0;
655SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled,
656 CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0,
657 "number of times start was delayed");
658
659#define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */
660#define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */
661#define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */
662#define HWCKSUM_DBG_MASK \
663 (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \
664 HWCKSUM_DBG_FINALIZE_FORCED)
665
666static uint32_t hwcksum_dbg_mode = 0;
667SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode,
668 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode,
669 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode");
670
671static uint64_t hwcksum_dbg_partial_forced = 0;
672SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
673 hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED,
674 &hwcksum_dbg_partial_forced, "packets forced using partial cksum");
675
676static uint64_t hwcksum_dbg_partial_forced_bytes = 0;
677SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
678 hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED,
679 &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum");
680
681static uint32_t hwcksum_dbg_partial_rxoff_forced = 0;
682SYSCTL_PROC(_net_link_generic_system, OID_AUTO,
683 hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
684 &hwcksum_dbg_partial_rxoff_forced, 0,
685 sysctl_hwcksum_dbg_partial_rxoff_forced, "I",
686 "forced partial cksum rx offset");
687
688static uint32_t hwcksum_dbg_partial_rxoff_adj = 0;
689SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj,
690 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj,
691 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I",
692 "adjusted partial cksum rx offset");
693
694static uint64_t hwcksum_dbg_verified = 0;
695SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
696 hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED,
697 &hwcksum_dbg_verified, "packets verified for having good checksum");
698
699static uint64_t hwcksum_dbg_bad_cksum = 0;
700SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
701 hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED,
702 &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum");
703
704static uint64_t hwcksum_dbg_bad_rxoff = 0;
705SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
706 hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED,
707 &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff");
708
709static uint64_t hwcksum_dbg_adjusted = 0;
710SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
711 hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED,
712 &hwcksum_dbg_adjusted, "packets with rxoff adjusted");
713
714static uint64_t hwcksum_dbg_finalized_hdr = 0;
715SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
716 hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED,
717 &hwcksum_dbg_finalized_hdr, "finalized headers");
718
719static uint64_t hwcksum_dbg_finalized_data = 0;
720SYSCTL_QUAD(_net_link_generic_system, OID_AUTO,
721 hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED,
722 &hwcksum_dbg_finalized_data, "finalized payloads");
723
724uint32_t hwcksum_tx = 1;
725SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx,
726 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0,
727 "enable transmit hardware checksum offload");
728
729uint32_t hwcksum_rx = 1;
730SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx,
731 CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0,
732 "enable receive hardware checksum offload");
733
734SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats,
735 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9,
736 sysctl_tx_chain_len_stats, "S", "");
737
738uint32_t tx_chain_len_count = 0;
739SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count,
740 CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "");
741
742static uint32_t threshold_notify = 1; /* enable/disable */
743SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify,
744 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "");
745
746static uint32_t threshold_interval = 2; /* in seconds */
747SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval,
748 CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "");
749
750#if (DEVELOPMENT || DEBUG)
751static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS;
752SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames,
753 CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "");
754#endif /* DEVELOPMENT || DEBUG */
755
756struct net_api_stats net_api_stats;
757SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD|CTLFLAG_LOCKED,
758 &net_api_stats, net_api_stats, "");
759
760
761unsigned int net_rxpoll = 1;
762unsigned int net_affinity = 1;
763static kern_return_t dlil_affinity_set(struct thread *, u_int32_t);
764
765extern u_int32_t inject_buckets;
766
767static lck_grp_attr_t *dlil_grp_attributes = NULL;
768static lck_attr_t *dlil_lck_attributes = NULL;
769
770/* DLIL data threshold thread call */
771static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t);
772
773static void dlil_mit_tcall_fn(thread_call_param_t, thread_call_param_t);
774
775uint32_t dlil_rcv_mit_pkts_min = 5;
776uint32_t dlil_rcv_mit_pkts_max = 64;
777uint32_t dlil_rcv_mit_interval = (500 * 1000);
778
779#if (DEVELOPMENT || DEBUG)
780SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_min,
781 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_min, 0, "");
782SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_pkts_max,
783 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_pkts_max, 0, "");
784SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rcv_mit_interval,
785 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_rcv_mit_interval, 0, "");
786#endif /* DEVELOPMENT || DEBUG */
787
788
789#define DLIL_INPUT_CHECK(m, ifp) { \
790 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \
791 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \
792 !(mbuf_flags(m) & MBUF_PKTHDR)) { \
793 panic_plain("%s: invalid mbuf %p\n", __func__, m); \
794 /* NOTREACHED */ \
795 } \
796}
797
798#define DLIL_EWMA(old, new, decay) do { \
799 u_int32_t _avg; \
800 if ((_avg = (old)) > 0) \
801 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \
802 else \
803 _avg = (new); \
804 (old) = _avg; \
805} while (0)
806
807#define MBPS (1ULL * 1000 * 1000)
808#define GBPS (MBPS * 1000)
809
810struct rxpoll_time_tbl {
811 u_int64_t speed; /* downlink speed */
812 u_int32_t plowat; /* packets low watermark */
813 u_int32_t phiwat; /* packets high watermark */
814 u_int32_t blowat; /* bytes low watermark */
815 u_int32_t bhiwat; /* bytes high watermark */
816};
817
818static struct rxpoll_time_tbl rxpoll_tbl[] = {
819 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) },
820 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) },
821 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
822 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
823 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) },
824 { 0, 0, 0, 0, 0 }
825};
826
827int
828proto_hash_value(u_int32_t protocol_family)
829{
830 /*
831 * dlil_proto_unplumb_all() depends on the mapping between
832 * the hash bucket index and the protocol family defined
833 * here; future changes must be applied there as well.
834 */
835 switch (protocol_family) {
836 case PF_INET:
837 return (0);
838 case PF_INET6:
839 return (1);
840 case PF_VLAN:
841 return (2);
842 case PF_UNSPEC:
843 default:
844 return (3);
845 }
846}
847
848/*
849 * Caller must already be holding ifnet lock.
850 */
851static struct if_proto *
852find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
853{
854 struct if_proto *proto = NULL;
855 u_int32_t i = proto_hash_value(protocol_family);
856
857 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
858
859 if (ifp->if_proto_hash != NULL)
860 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
861
862 while (proto != NULL && proto->protocol_family != protocol_family)
863 proto = SLIST_NEXT(proto, next_hash);
864
865 if (proto != NULL)
866 if_proto_ref(proto);
867
868 return (proto);
869}
870
871static void
872if_proto_ref(struct if_proto *proto)
873{
874 atomic_add_32(&proto->refcount, 1);
875}
876
877extern void if_rtproto_del(struct ifnet *ifp, int protocol);
878
879static void
880if_proto_free(struct if_proto *proto)
881{
882 u_int32_t oldval;
883 struct ifnet *ifp = proto->ifp;
884 u_int32_t proto_family = proto->protocol_family;
885 struct kev_dl_proto_data ev_pr_data;
886
887 oldval = atomic_add_32_ov(&proto->refcount, -1);
888 if (oldval > 1)
889 return;
890
891 /* No more reference on this, protocol must have been detached */
892 VERIFY(proto->detached);
893
894 if (proto->proto_kpi == kProtoKPI_v1) {
895 if (proto->kpi.v1.detached)
896 proto->kpi.v1.detached(ifp, proto->protocol_family);
897 }
898 if (proto->proto_kpi == kProtoKPI_v2) {
899 if (proto->kpi.v2.detached)
900 proto->kpi.v2.detached(ifp, proto->protocol_family);
901 }
902
903 /*
904 * Cleanup routes that may still be in the routing table for that
905 * interface/protocol pair.
906 */
907 if_rtproto_del(ifp, proto_family);
908
909 /*
910 * The reserved field carries the number of protocol still attached
911 * (subject to change)
912 */
913 ifnet_lock_shared(ifp);
914 ev_pr_data.proto_family = proto_family;
915 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
916 ifnet_lock_done(ifp);
917
918 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
919 (struct net_event_data *)&ev_pr_data,
920 sizeof (struct kev_dl_proto_data));
921
922 if (ev_pr_data.proto_remaining_count == 0) {
923 /*
924 * The protocol count has gone to zero, mark the interface down.
925 * This used to be done by configd.KernelEventMonitor, but that
926 * is inherently prone to races (rdar://problem/30810208).
927 */
928 (void) ifnet_set_flags(ifp, 0, IFF_UP);
929 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
930 dlil_post_sifflags_msg(ifp);
931 }
932
933 zfree(dlif_proto_zone, proto);
934}
935
936__private_extern__ void
937ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
938{
939#if !MACH_ASSERT
940#pragma unused(ifp)
941#endif
942 unsigned int type = 0;
943 int ass = 1;
944
945 switch (what) {
946 case IFNET_LCK_ASSERT_EXCLUSIVE:
947 type = LCK_RW_ASSERT_EXCLUSIVE;
948 break;
949
950 case IFNET_LCK_ASSERT_SHARED:
951 type = LCK_RW_ASSERT_SHARED;
952 break;
953
954 case IFNET_LCK_ASSERT_OWNED:
955 type = LCK_RW_ASSERT_HELD;
956 break;
957
958 case IFNET_LCK_ASSERT_NOTOWNED:
959 /* nothing to do here for RW lock; bypass assert */
960 ass = 0;
961 break;
962
963 default:
964 panic("bad ifnet assert type: %d", what);
965 /* NOTREACHED */
966 }
967 if (ass)
968 LCK_RW_ASSERT(&ifp->if_lock, type);
969}
970
971__private_extern__ void
972ifnet_lock_shared(struct ifnet *ifp)
973{
974 lck_rw_lock_shared(&ifp->if_lock);
975}
976
977__private_extern__ void
978ifnet_lock_exclusive(struct ifnet *ifp)
979{
980 lck_rw_lock_exclusive(&ifp->if_lock);
981}
982
983__private_extern__ void
984ifnet_lock_done(struct ifnet *ifp)
985{
986 lck_rw_done(&ifp->if_lock);
987}
988
989#if INET
990__private_extern__ void
991if_inetdata_lock_shared(struct ifnet *ifp)
992{
993 lck_rw_lock_shared(&ifp->if_inetdata_lock);
994}
995
996__private_extern__ void
997if_inetdata_lock_exclusive(struct ifnet *ifp)
998{
999 lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
1000}
1001
1002__private_extern__ void
1003if_inetdata_lock_done(struct ifnet *ifp)
1004{
1005 lck_rw_done(&ifp->if_inetdata_lock);
1006}
1007#endif
1008
1009#if INET6
1010__private_extern__ void
1011if_inet6data_lock_shared(struct ifnet *ifp)
1012{
1013 lck_rw_lock_shared(&ifp->if_inet6data_lock);
1014}
1015
1016__private_extern__ void
1017if_inet6data_lock_exclusive(struct ifnet *ifp)
1018{
1019 lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
1020}
1021
1022__private_extern__ void
1023if_inet6data_lock_done(struct ifnet *ifp)
1024{
1025 lck_rw_done(&ifp->if_inet6data_lock);
1026}
1027#endif
1028
1029__private_extern__ void
1030ifnet_head_lock_shared(void)
1031{
1032 lck_rw_lock_shared(&ifnet_head_lock);
1033}
1034
1035__private_extern__ void
1036ifnet_head_lock_exclusive(void)
1037{
1038 lck_rw_lock_exclusive(&ifnet_head_lock);
1039}
1040
1041__private_extern__ void
1042ifnet_head_done(void)
1043{
1044 lck_rw_done(&ifnet_head_lock);
1045}
1046
1047__private_extern__ void
1048ifnet_head_assert_exclusive(void)
1049{
1050 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
1051}
1052
1053/*
1054 * dlil_ifp_protolist
1055 * - get the list of protocols attached to the interface, or just the number
1056 * of attached protocols
1057 * - if the number returned is greater than 'list_count', truncation occurred
1058 *
1059 * Note:
1060 * - caller must already be holding ifnet lock.
1061 */
1062static u_int32_t
1063dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list,
1064 u_int32_t list_count)
1065{
1066 u_int32_t count = 0;
1067 int i;
1068
1069 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
1070
1071 if (ifp->if_proto_hash == NULL)
1072 goto done;
1073
1074 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
1075 struct if_proto *proto;
1076 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
1077 if (list != NULL && count < list_count) {
1078 list[count] = proto->protocol_family;
1079 }
1080 count++;
1081 }
1082 }
1083done:
1084 return (count);
1085}
1086
1087__private_extern__ u_int32_t
1088if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count)
1089{
1090 ifnet_lock_shared(ifp);
1091 count = dlil_ifp_protolist(ifp, protolist, count);
1092 ifnet_lock_done(ifp);
1093 return (count);
1094}
1095
1096__private_extern__ void
1097if_free_protolist(u_int32_t *list)
1098{
1099 _FREE(list, M_TEMP);
1100}
1101
1102__private_extern__ void
1103dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass,
1104 u_int32_t event_code, struct net_event_data *event_data,
1105 u_int32_t event_data_len)
1106{
1107 struct net_event_data ev_data;
1108 struct kev_msg ev_msg;
1109
1110 bzero(&ev_msg, sizeof (ev_msg));
1111 bzero(&ev_data, sizeof (ev_data));
1112 /*
1113 * a net event always starts with a net_event_data structure
1114 * but the caller can generate a simple net event or
1115 * provide a longer event structure to post
1116 */
1117 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1118 ev_msg.kev_class = KEV_NETWORK_CLASS;
1119 ev_msg.kev_subclass = event_subclass;
1120 ev_msg.event_code = event_code;
1121
1122 if (event_data == NULL) {
1123 event_data = &ev_data;
1124 event_data_len = sizeof (struct net_event_data);
1125 }
1126
1127 strlcpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ);
1128 event_data->if_family = ifp->if_family;
1129 event_data->if_unit = (u_int32_t)ifp->if_unit;
1130
1131 ev_msg.dv[0].data_length = event_data_len;
1132 ev_msg.dv[0].data_ptr = event_data;
1133 ev_msg.dv[1].data_length = 0;
1134
1135 /* Don't update interface generation for quality and RRC state changess */
1136 bool update_generation = (event_subclass != KEV_DL_SUBCLASS ||
1137 (event_code != KEV_DL_LINK_QUALITY_METRIC_CHANGED &&
1138 event_code != KEV_DL_RRC_STATE_CHANGED));
1139
1140 dlil_event_internal(ifp, &ev_msg, update_generation);
1141}
1142
1143__private_extern__ int
1144dlil_alloc_local_stats(struct ifnet *ifp)
1145{
1146 int ret = EINVAL;
1147 void *buf, *base, **pbuf;
1148
1149 if (ifp == NULL)
1150 goto end;
1151
1152 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
1153 /* allocate tcpstat_local structure */
1154 buf = zalloc(dlif_tcpstat_zone);
1155 if (buf == NULL) {
1156 ret = ENOMEM;
1157 goto end;
1158 }
1159 bzero(buf, dlif_tcpstat_bufsize);
1160
1161 /* Get the 64-bit aligned base address for this object */
1162 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1163 sizeof (u_int64_t));
1164 VERIFY(((intptr_t)base + dlif_tcpstat_size) <=
1165 ((intptr_t)buf + dlif_tcpstat_bufsize));
1166
1167 /*
1168 * Wind back a pointer size from the aligned base and
1169 * save the original address so we can free it later.
1170 */
1171 pbuf = (void **)((intptr_t)base - sizeof (void *));
1172 *pbuf = buf;
1173 ifp->if_tcp_stat = base;
1174
1175 /* allocate udpstat_local structure */
1176 buf = zalloc(dlif_udpstat_zone);
1177 if (buf == NULL) {
1178 ret = ENOMEM;
1179 goto end;
1180 }
1181 bzero(buf, dlif_udpstat_bufsize);
1182
1183 /* Get the 64-bit aligned base address for this object */
1184 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
1185 sizeof (u_int64_t));
1186 VERIFY(((intptr_t)base + dlif_udpstat_size) <=
1187 ((intptr_t)buf + dlif_udpstat_bufsize));
1188
1189 /*
1190 * Wind back a pointer size from the aligned base and
1191 * save the original address so we can free it later.
1192 */
1193 pbuf = (void **)((intptr_t)base - sizeof (void *));
1194 *pbuf = buf;
1195 ifp->if_udp_stat = base;
1196
1197 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) &&
1198 IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t)));
1199
1200 ret = 0;
1201 }
1202
1203 if (ifp->if_ipv4_stat == NULL) {
1204 MALLOC(ifp->if_ipv4_stat, struct if_tcp_ecn_stat *,
1205 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1206 if (ifp->if_ipv4_stat == NULL) {
1207 ret = ENOMEM;
1208 goto end;
1209 }
1210 }
1211
1212 if (ifp->if_ipv6_stat == NULL) {
1213 MALLOC(ifp->if_ipv6_stat, struct if_tcp_ecn_stat *,
1214 sizeof (struct if_tcp_ecn_stat), M_TEMP, M_WAITOK|M_ZERO);
1215 if (ifp->if_ipv6_stat == NULL) {
1216 ret = ENOMEM;
1217 goto end;
1218 }
1219 }
1220end:
1221 if (ret != 0) {
1222 if (ifp->if_tcp_stat != NULL) {
1223 pbuf = (void **)
1224 ((intptr_t)ifp->if_tcp_stat - sizeof (void *));
1225 zfree(dlif_tcpstat_zone, *pbuf);
1226 ifp->if_tcp_stat = NULL;
1227 }
1228 if (ifp->if_udp_stat != NULL) {
1229 pbuf = (void **)
1230 ((intptr_t)ifp->if_udp_stat - sizeof (void *));
1231 zfree(dlif_udpstat_zone, *pbuf);
1232 ifp->if_udp_stat = NULL;
1233 }
1234 if (ifp->if_ipv4_stat != NULL) {
1235 FREE(ifp->if_ipv4_stat, M_TEMP);
1236 ifp->if_ipv4_stat = NULL;
1237 }
1238 if (ifp->if_ipv6_stat != NULL) {
1239 FREE(ifp->if_ipv6_stat, M_TEMP);
1240 ifp->if_ipv6_stat = NULL;
1241 }
1242 }
1243
1244 return (ret);
1245}
1246
1247static int
1248dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp)
1249{
1250 thread_continue_t func;
1251 u_int32_t limit;
1252 int error;
1253
1254 /* NULL ifp indicates the main input thread, called at dlil_init time */
1255 if (ifp == NULL) {
1256 func = dlil_main_input_thread_func;
1257 VERIFY(inp == dlil_main_input_thread);
1258 (void) strlcat(inp->input_name,
1259 "main_input", DLIL_THREADNAME_LEN);
1260 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1261 func = dlil_rxpoll_input_thread_func;
1262 VERIFY(inp != dlil_main_input_thread);
1263 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1264 "%s_input_poll", if_name(ifp));
1265 } else {
1266 func = dlil_input_thread_func;
1267 VERIFY(inp != dlil_main_input_thread);
1268 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN,
1269 "%s_input", if_name(ifp));
1270 }
1271 VERIFY(inp->input_thr == THREAD_NULL);
1272
1273 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes);
1274 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes);
1275
1276 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1277 inp->ifp = ifp; /* NULL for main input thread */
1278
1279 net_timerclear(&inp->mode_holdtime);
1280 net_timerclear(&inp->mode_lasttime);
1281 net_timerclear(&inp->sample_holdtime);
1282 net_timerclear(&inp->sample_lasttime);
1283 net_timerclear(&inp->dbg_lasttime);
1284
1285 /*
1286 * For interfaces that support opportunistic polling, set the
1287 * low and high watermarks for outstanding inbound packets/bytes.
1288 * Also define freeze times for transitioning between modes
1289 * and updating the average.
1290 */
1291 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) {
1292 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN);
1293 (void) dlil_rxpoll_set_params(ifp, NULL, FALSE);
1294 } else {
1295 limit = (u_int32_t)-1;
1296 }
1297
1298 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1299 if (inp == dlil_main_input_thread) {
1300 struct dlil_main_threading_info *inpm =
1301 (struct dlil_main_threading_info *)inp;
1302 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF);
1303 }
1304
1305 error = kernel_thread_start(func, inp, &inp->input_thr);
1306 if (error == KERN_SUCCESS) {
1307 ml_thread_policy(inp->input_thr, MACHINE_GROUP,
1308 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR));
1309 /*
1310 * We create an affinity set so that the matching workloop
1311 * thread or the starter thread (for loopback) can be
1312 * scheduled on the same processor set as the input thread.
1313 */
1314 if (net_affinity) {
1315 struct thread *tp = inp->input_thr;
1316 u_int32_t tag;
1317 /*
1318 * Randomize to reduce the probability
1319 * of affinity tag namespace collision.
1320 */
1321 read_frandom(&tag, sizeof (tag));
1322 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) {
1323 thread_reference(tp);
1324 inp->tag = tag;
1325 inp->net_affinity = TRUE;
1326 }
1327 }
1328 } else if (inp == dlil_main_input_thread) {
1329 panic_plain("%s: couldn't create main input thread", __func__);
1330 /* NOTREACHED */
1331 } else {
1332 panic_plain("%s: couldn't create %s input thread", __func__,
1333 if_name(ifp));
1334 /* NOTREACHED */
1335 }
1336 OSAddAtomic(1, &cur_dlil_input_threads);
1337
1338 return (error);
1339}
1340
1341#if TEST_INPUT_THREAD_TERMINATION
1342static int
1343sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS
1344{
1345#pragma unused(arg1, arg2)
1346 uint32_t i;
1347 int err;
1348
1349 i = if_input_thread_termination_spin;
1350
1351 err = sysctl_handle_int(oidp, &i, 0, req);
1352 if (err != 0 || req->newptr == USER_ADDR_NULL)
1353 return (err);
1354
1355 if (net_rxpoll == 0)
1356 return (ENXIO);
1357
1358 if_input_thread_termination_spin = i;
1359 return (err);
1360}
1361#endif /* TEST_INPUT_THREAD_TERMINATION */
1362
1363static void
1364dlil_clean_threading_info(struct dlil_threading_info *inp)
1365{
1366 lck_mtx_destroy(&inp->input_lck, inp->lck_grp);
1367 lck_grp_free(inp->lck_grp);
1368
1369 inp->input_waiting = 0;
1370 inp->wtot = 0;
1371 bzero(inp->input_name, sizeof (inp->input_name));
1372 inp->ifp = NULL;
1373 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts));
1374 qlimit(&inp->rcvq_pkts) = 0;
1375 bzero(&inp->stats, sizeof (inp->stats));
1376
1377 VERIFY(!inp->net_affinity);
1378 inp->input_thr = THREAD_NULL;
1379 VERIFY(inp->wloop_thr == THREAD_NULL);
1380 VERIFY(inp->poll_thr == THREAD_NULL);
1381 VERIFY(inp->tag == 0);
1382
1383 inp->mode = IFNET_MODEL_INPUT_POLL_OFF;
1384 bzero(&inp->tstats, sizeof (inp->tstats));
1385 bzero(&inp->pstats, sizeof (inp->pstats));
1386 bzero(&inp->sstats, sizeof (inp->sstats));
1387
1388 net_timerclear(&inp->mode_holdtime);
1389 net_timerclear(&inp->mode_lasttime);
1390 net_timerclear(&inp->sample_holdtime);
1391 net_timerclear(&inp->sample_lasttime);
1392 net_timerclear(&inp->dbg_lasttime);
1393
1394#if IFNET_INPUT_SANITY_CHK
1395 inp->input_mbuf_cnt = 0;
1396#endif /* IFNET_INPUT_SANITY_CHK */
1397}
1398
1399static void
1400dlil_terminate_input_thread(struct dlil_threading_info *inp)
1401{
1402 struct ifnet *ifp = inp->ifp;
1403
1404 VERIFY(current_thread() == inp->input_thr);
1405 VERIFY(inp != dlil_main_input_thread);
1406
1407 OSAddAtomic(-1, &cur_dlil_input_threads);
1408
1409#if TEST_INPUT_THREAD_TERMINATION
1410 { /* do something useless that won't get optimized away */
1411 uint32_t v = 1;
1412 for (uint32_t i = 0;
1413 i < if_input_thread_termination_spin;
1414 i++) {
1415 v = (i + 1) * v;
1416 }
1417 printf("the value is %d\n", v);
1418 }
1419#endif /* TEST_INPUT_THREAD_TERMINATION */
1420
1421 lck_mtx_lock_spin(&inp->input_lck);
1422 VERIFY((inp->input_waiting & DLIL_INPUT_TERMINATE) != 0);
1423 inp->input_waiting |= DLIL_INPUT_TERMINATE_COMPLETE;
1424 wakeup_one((caddr_t)&inp->input_waiting);
1425 lck_mtx_unlock(&inp->input_lck);
1426
1427 /* for the extra refcnt from kernel_thread_start() */
1428 thread_deallocate(current_thread());
1429
1430 if (dlil_verbose) {
1431 printf("%s: input thread terminated\n",
1432 if_name(ifp));
1433 }
1434
1435 /* this is the end */
1436 thread_terminate(current_thread());
1437 /* NOTREACHED */
1438}
1439
1440static kern_return_t
1441dlil_affinity_set(struct thread *tp, u_int32_t tag)
1442{
1443 thread_affinity_policy_data_t policy;
1444
1445 bzero(&policy, sizeof (policy));
1446 policy.affinity_tag = tag;
1447 return (thread_policy_set(tp, THREAD_AFFINITY_POLICY,
1448 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT));
1449}
1450
1451void
1452dlil_init(void)
1453{
1454 thread_t thread = THREAD_NULL;
1455
1456 /*
1457 * The following fields must be 64-bit aligned for atomic operations.
1458 */
1459 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1460 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1461 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1462 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1463 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1464 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1465 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1466 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1467 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1468 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1469 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1470 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1471 IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1472 IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1473 IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1474
1475 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets);
1476 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors);
1477 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets);
1478 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors);
1479 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions);
1480 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes);
1481 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes);
1482 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts);
1483 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts);
1484 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops);
1485 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto);
1486 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs);
1487 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes);
1488 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets);
1489 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes);
1490
1491 /*
1492 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts.
1493 */
1494 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP);
1495 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP);
1496 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP);
1497 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT);
1498 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT);
1499 _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6);
1500 _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6);
1501 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT);
1502 _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL);
1503 _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT);
1504 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING);
1505 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU);
1506 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4);
1507 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6);
1508
1509 /*
1510 * ... as well as the mbuf checksum flags counterparts.
1511 */
1512 _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP);
1513 _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP);
1514 _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP);
1515 _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS);
1516 _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT);
1517 _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6);
1518 _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6);
1519 _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6);
1520 _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL);
1521 _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT);
1522 _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING);
1523
1524 /*
1525 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info.
1526 */
1527 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN);
1528 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN);
1529
1530 _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL);
1531 _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY);
1532 _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER);
1533 _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE);
1534
1535 _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY);
1536 _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY);
1537 _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE);
1538
1539 _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY);
1540 _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK);
1541 _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET);
1542 _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP);
1543 _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN);
1544 _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN);
1545 _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP);
1546 _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC);
1547 _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC);
1548 _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP);
1549 _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF);
1550 _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH);
1551 _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF);
1552 _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE);
1553 _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND);
1554 _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR);
1555
1556 _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY);
1557 _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB);
1558 _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH);
1559 _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI);
1560 _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT);
1561 _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED);
1562 _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC);
1563
1564 _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN);
1565 _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN);
1566
1567 PE_parse_boot_argn("net_affinity", &net_affinity,
1568 sizeof (net_affinity));
1569
1570 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll));
1571
1572 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref));
1573
1574 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug));
1575
1576 dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) :
1577 sizeof (struct dlil_ifnet_dbg);
1578 /* Enforce 64-bit alignment for dlil_ifnet structure */
1579 dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t);
1580 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t));
1581 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize,
1582 0, DLIF_ZONE_NAME);
1583 if (dlif_zone == NULL) {
1584 panic_plain("%s: failed allocating %s", __func__,
1585 DLIF_ZONE_NAME);
1586 /* NOTREACHED */
1587 }
1588 zone_change(dlif_zone, Z_EXPAND, TRUE);
1589 zone_change(dlif_zone, Z_CALLERACCT, FALSE);
1590
1591 dlif_filt_size = sizeof (struct ifnet_filter);
1592 dlif_filt_zone = zinit(dlif_filt_size,
1593 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME);
1594 if (dlif_filt_zone == NULL) {
1595 panic_plain("%s: failed allocating %s", __func__,
1596 DLIF_FILT_ZONE_NAME);
1597 /* NOTREACHED */
1598 }
1599 zone_change(dlif_filt_zone, Z_EXPAND, TRUE);
1600 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE);
1601
1602 dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS;
1603 dlif_phash_zone = zinit(dlif_phash_size,
1604 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME);
1605 if (dlif_phash_zone == NULL) {
1606 panic_plain("%s: failed allocating %s", __func__,
1607 DLIF_PHASH_ZONE_NAME);
1608 /* NOTREACHED */
1609 }
1610 zone_change(dlif_phash_zone, Z_EXPAND, TRUE);
1611 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE);
1612
1613 dlif_proto_size = sizeof (struct if_proto);
1614 dlif_proto_zone = zinit(dlif_proto_size,
1615 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME);
1616 if (dlif_proto_zone == NULL) {
1617 panic_plain("%s: failed allocating %s", __func__,
1618 DLIF_PROTO_ZONE_NAME);
1619 /* NOTREACHED */
1620 }
1621 zone_change(dlif_proto_zone, Z_EXPAND, TRUE);
1622 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE);
1623
1624 dlif_tcpstat_size = sizeof (struct tcpstat_local);
1625 /* Enforce 64-bit alignment for tcpstat_local structure */
1626 dlif_tcpstat_bufsize =
1627 dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t);
1628 dlif_tcpstat_bufsize =
1629 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t));
1630 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize,
1631 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0,
1632 DLIF_TCPSTAT_ZONE_NAME);
1633 if (dlif_tcpstat_zone == NULL) {
1634 panic_plain("%s: failed allocating %s", __func__,
1635 DLIF_TCPSTAT_ZONE_NAME);
1636 /* NOTREACHED */
1637 }
1638 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE);
1639 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE);
1640
1641 dlif_udpstat_size = sizeof (struct udpstat_local);
1642 /* Enforce 64-bit alignment for udpstat_local structure */
1643 dlif_udpstat_bufsize =
1644 dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t);
1645 dlif_udpstat_bufsize =
1646 P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t));
1647 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize,
1648 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0,
1649 DLIF_UDPSTAT_ZONE_NAME);
1650 if (dlif_udpstat_zone == NULL) {
1651 panic_plain("%s: failed allocating %s", __func__,
1652 DLIF_UDPSTAT_ZONE_NAME);
1653 /* NOTREACHED */
1654 }
1655 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE);
1656 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE);
1657
1658 ifnet_llreach_init();
1659 eventhandler_lists_ctxt_init(&ifnet_evhdlr_ctxt);
1660
1661 TAILQ_INIT(&dlil_ifnet_head);
1662 TAILQ_INIT(&ifnet_head);
1663 TAILQ_INIT(&ifnet_detaching_head);
1664 TAILQ_INIT(&ifnet_ordered_head);
1665
1666 /* Setup the lock groups we will use */
1667 dlil_grp_attributes = lck_grp_attr_alloc_init();
1668
1669 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks",
1670 dlil_grp_attributes);
1671 ifnet_lock_group = lck_grp_alloc_init("ifnet locks",
1672 dlil_grp_attributes);
1673 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock",
1674 dlil_grp_attributes);
1675 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks",
1676 dlil_grp_attributes);
1677 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks",
1678 dlil_grp_attributes);
1679
1680 /* Setup the lock attributes we will use */
1681 dlil_lck_attributes = lck_attr_alloc_init();
1682
1683 ifnet_lock_attr = lck_attr_alloc_init();
1684
1685 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group,
1686 dlil_lck_attributes);
1687 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes);
1688
1689 /* Setup interface flow control related items */
1690 lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes);
1691
1692 ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry);
1693 ifnet_fc_zone = zinit(ifnet_fc_zone_size,
1694 IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME);
1695 if (ifnet_fc_zone == NULL) {
1696 panic_plain("%s: failed allocating %s", __func__,
1697 IFNET_FC_ZONE_NAME);
1698 /* NOTREACHED */
1699 }
1700 zone_change(ifnet_fc_zone, Z_EXPAND, TRUE);
1701 zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE);
1702
1703 /* Initialize interface address subsystem */
1704 ifa_init();
1705
1706#if PF
1707 /* Initialize the packet filter */
1708 pfinit();
1709#endif /* PF */
1710
1711 /* Initialize queue algorithms */
1712 classq_init();
1713
1714 /* Initialize packet schedulers */
1715 pktsched_init();
1716
1717 /* Initialize flow advisory subsystem */
1718 flowadv_init();
1719
1720 /* Initialize the pktap virtual interface */
1721 pktap_init();
1722
1723 /* Initialize the service class to dscp map */
1724 net_qos_map_init();
1725
1726 /* Initialize the interface port list */
1727 if_ports_used_init();
1728
1729 /* Initialize the interface low power mode event handler */
1730 if_low_power_evhdlr_init();
1731
1732#if DEBUG || DEVELOPMENT
1733 /* Run self-tests */
1734 dlil_verify_sum16();
1735#endif /* DEBUG || DEVELOPMENT */
1736
1737 /* Initialize link layer table */
1738 lltable_glbl_init();
1739
1740 /*
1741 * Create and start up the main DLIL input thread and the interface
1742 * detacher threads once everything is initialized.
1743 */
1744 dlil_create_input_thread(NULL, dlil_main_input_thread);
1745
1746 if (kernel_thread_start(ifnet_detacher_thread_func,
1747 NULL, &thread) != KERN_SUCCESS) {
1748 panic_plain("%s: couldn't create detacher thread", __func__);
1749 /* NOTREACHED */
1750 }
1751 thread_deallocate(thread);
1752
1753}
1754
1755static void
1756if_flt_monitor_busy(struct ifnet *ifp)
1757{
1758 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1759
1760 ++ifp->if_flt_busy;
1761 VERIFY(ifp->if_flt_busy != 0);
1762}
1763
1764static void
1765if_flt_monitor_unbusy(struct ifnet *ifp)
1766{
1767 if_flt_monitor_leave(ifp);
1768}
1769
1770static void
1771if_flt_monitor_enter(struct ifnet *ifp)
1772{
1773 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1774
1775 while (ifp->if_flt_busy) {
1776 ++ifp->if_flt_waiters;
1777 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
1778 (PZERO - 1), "if_flt_monitor", NULL);
1779 }
1780 if_flt_monitor_busy(ifp);
1781}
1782
1783static void
1784if_flt_monitor_leave(struct ifnet *ifp)
1785{
1786 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1787
1788 VERIFY(ifp->if_flt_busy != 0);
1789 --ifp->if_flt_busy;
1790
1791 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
1792 ifp->if_flt_waiters = 0;
1793 wakeup(&ifp->if_flt_head);
1794 }
1795}
1796
1797__private_extern__ int
1798dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter,
1799 interface_filter_t *filter_ref, u_int32_t flags)
1800{
1801 int retval = 0;
1802 struct ifnet_filter *filter = NULL;
1803
1804 ifnet_head_lock_shared();
1805 /* Check that the interface is in the global list */
1806 if (!ifnet_lookup(ifp)) {
1807 retval = ENXIO;
1808 goto done;
1809 }
1810
1811 filter = zalloc(dlif_filt_zone);
1812 if (filter == NULL) {
1813 retval = ENOMEM;
1814 goto done;
1815 }
1816 bzero(filter, dlif_filt_size);
1817
1818 /* refcnt held above during lookup */
1819 filter->filt_flags = flags;
1820 filter->filt_ifp = ifp;
1821 filter->filt_cookie = if_filter->iff_cookie;
1822 filter->filt_name = if_filter->iff_name;
1823 filter->filt_protocol = if_filter->iff_protocol;
1824 /*
1825 * Do not install filter callbacks for internal coproc interface
1826 */
1827 if (!IFNET_IS_INTCOPROC(ifp)) {
1828 filter->filt_input = if_filter->iff_input;
1829 filter->filt_output = if_filter->iff_output;
1830 filter->filt_event = if_filter->iff_event;
1831 filter->filt_ioctl = if_filter->iff_ioctl;
1832 }
1833 filter->filt_detached = if_filter->iff_detached;
1834
1835 lck_mtx_lock(&ifp->if_flt_lock);
1836 if_flt_monitor_enter(ifp);
1837
1838 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
1839 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next);
1840
1841 if_flt_monitor_leave(ifp);
1842 lck_mtx_unlock(&ifp->if_flt_lock);
1843
1844 *filter_ref = filter;
1845
1846 /*
1847 * Bump filter count and route_generation ID to let TCP
1848 * know it shouldn't do TSO on this connection
1849 */
1850 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1851 OSAddAtomic(1, &dlil_filter_disable_tso_count);
1852 routegenid_update();
1853 }
1854 OSIncrementAtomic64(&net_api_stats.nas_iflt_attach_count);
1855 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total);
1856 if ((filter->filt_flags & DLIL_IFF_INTERNAL)) {
1857 INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total);
1858 }
1859 if (dlil_verbose) {
1860 printf("%s: %s filter attached\n", if_name(ifp),
1861 if_filter->iff_name);
1862 }
1863done:
1864 ifnet_head_done();
1865 if (retval != 0 && ifp != NULL) {
1866 DLIL_PRINTF("%s: failed to attach %s (err=%d)\n",
1867 if_name(ifp), if_filter->iff_name, retval);
1868 }
1869 if (retval != 0 && filter != NULL)
1870 zfree(dlif_filt_zone, filter);
1871
1872 return (retval);
1873}
1874
1875static int
1876dlil_detach_filter_internal(interface_filter_t filter, int detached)
1877{
1878 int retval = 0;
1879
1880 if (detached == 0) {
1881 ifnet_t ifp = NULL;
1882
1883 ifnet_head_lock_shared();
1884 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
1885 interface_filter_t entry = NULL;
1886
1887 lck_mtx_lock(&ifp->if_flt_lock);
1888 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
1889 if (entry != filter || entry->filt_skip)
1890 continue;
1891 /*
1892 * We've found a match; since it's possible
1893 * that the thread gets blocked in the monitor,
1894 * we do the lock dance. Interface should
1895 * not be detached since we still have a use
1896 * count held during filter attach.
1897 */
1898 entry->filt_skip = 1; /* skip input/output */
1899 lck_mtx_unlock(&ifp->if_flt_lock);
1900 ifnet_head_done();
1901
1902 lck_mtx_lock(&ifp->if_flt_lock);
1903 if_flt_monitor_enter(ifp);
1904 LCK_MTX_ASSERT(&ifp->if_flt_lock,
1905 LCK_MTX_ASSERT_OWNED);
1906
1907 /* Remove the filter from the list */
1908 TAILQ_REMOVE(&ifp->if_flt_head, filter,
1909 filt_next);
1910
1911 if_flt_monitor_leave(ifp);
1912 lck_mtx_unlock(&ifp->if_flt_lock);
1913 if (dlil_verbose) {
1914 printf("%s: %s filter detached\n",
1915 if_name(ifp), filter->filt_name);
1916 }
1917 goto destroy;
1918 }
1919 lck_mtx_unlock(&ifp->if_flt_lock);
1920 }
1921 ifnet_head_done();
1922
1923 /* filter parameter is not a valid filter ref */
1924 retval = EINVAL;
1925 goto done;
1926 }
1927
1928 if (dlil_verbose)
1929 printf("%s filter detached\n", filter->filt_name);
1930
1931destroy:
1932
1933 /* Call the detached function if there is one */
1934 if (filter->filt_detached)
1935 filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
1936
1937 /*
1938 * Decrease filter count and route_generation ID to let TCP
1939 * know it should reevalute doing TSO or not
1940 */
1941 if ((filter->filt_flags & DLIL_IFF_TSO) == 0) {
1942 OSAddAtomic(-1, &dlil_filter_disable_tso_count);
1943 routegenid_update();
1944 }
1945
1946 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0);
1947
1948 /* Free the filter */
1949 zfree(dlif_filt_zone, filter);
1950 filter = NULL;
1951done:
1952 if (retval != 0 && filter != NULL) {
1953 DLIL_PRINTF("failed to detach %s filter (err=%d)\n",
1954 filter->filt_name, retval);
1955 }
1956
1957 return (retval);
1958}
1959
1960__private_extern__ void
1961dlil_detach_filter(interface_filter_t filter)
1962{
1963 if (filter == NULL)
1964 return;
1965 dlil_detach_filter_internal(filter, 0);
1966}
1967
1968/*
1969 * Main input thread:
1970 *
1971 * a) handles all inbound packets for lo0
1972 * b) handles all inbound packets for interfaces with no dedicated
1973 * input thread (e.g. anything but Ethernet/PDP or those that support
1974 * opportunistic polling.)
1975 * c) protocol registrations
1976 * d) packet injections
1977 */
1978__attribute__((noreturn))
1979static void
1980dlil_main_input_thread_func(void *v, wait_result_t w)
1981{
1982#pragma unused(w)
1983 struct dlil_main_threading_info *inpm = v;
1984 struct dlil_threading_info *inp = v;
1985
1986 VERIFY(inp == dlil_main_input_thread);
1987 VERIFY(inp->ifp == NULL);
1988 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
1989
1990 while (1) {
1991 struct mbuf *m = NULL, *m_loop = NULL;
1992 u_int32_t m_cnt, m_cnt_loop;
1993 boolean_t proto_req;
1994
1995 lck_mtx_lock_spin(&inp->input_lck);
1996
1997 /* Wait until there is work to be done */
1998 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
1999 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2000 (void) msleep(&inp->input_waiting, &inp->input_lck,
2001 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2002 }
2003
2004 inp->input_waiting |= DLIL_INPUT_RUNNING;
2005 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2006
2007 /* Main input thread cannot be terminated */
2008 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE));
2009
2010 proto_req = (inp->input_waiting &
2011 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER));
2012
2013 /* Packets for non-dedicated interfaces other than lo0 */
2014 m_cnt = qlen(&inp->rcvq_pkts);
2015 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2016
2017 /* Packets exclusive to lo0 */
2018 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts);
2019 m_loop = _getq_all(&inpm->lo_rcvq_pkts, NULL, NULL, NULL);
2020
2021 inp->wtot = 0;
2022
2023 lck_mtx_unlock(&inp->input_lck);
2024
2025 /*
2026 * NOTE warning %%% attention !!!!
2027 * We should think about putting some thread starvation
2028 * safeguards if we deal with long chains of packets.
2029 */
2030 if (m_loop != NULL)
2031 dlil_input_packet_list_extended(lo_ifp, m_loop,
2032 m_cnt_loop, inp->mode);
2033
2034 if (m != NULL)
2035 dlil_input_packet_list_extended(NULL, m,
2036 m_cnt, inp->mode);
2037
2038 if (proto_req)
2039 proto_input_run();
2040 }
2041
2042 /* NOTREACHED */
2043 VERIFY(0); /* we should never get here */
2044}
2045
2046/*
2047 * Input thread for interfaces with legacy input model.
2048 */
2049static void
2050dlil_input_thread_func(void *v, wait_result_t w)
2051{
2052#pragma unused(w)
2053 char thread_name[MAXTHREADNAMESIZE];
2054 struct dlil_threading_info *inp = v;
2055 struct ifnet *ifp = inp->ifp;
2056
2057 /* Construct the name for this thread, and then apply it. */
2058 bzero(thread_name, sizeof(thread_name));
2059 snprintf(thread_name, sizeof(thread_name), "dlil_input_%s", ifp->if_xname);
2060 thread_set_thread_name(inp->input_thr, thread_name);
2061
2062 VERIFY(inp != dlil_main_input_thread);
2063 VERIFY(ifp != NULL);
2064 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll);
2065 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
2066
2067 while (1) {
2068 struct mbuf *m = NULL;
2069 u_int32_t m_cnt;
2070
2071 lck_mtx_lock_spin(&inp->input_lck);
2072
2073 /* Wait until there is work to be done */
2074 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2075 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2076 (void) msleep(&inp->input_waiting, &inp->input_lck,
2077 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2078 }
2079
2080 inp->input_waiting |= DLIL_INPUT_RUNNING;
2081 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2082
2083 /*
2084 * Protocol registration and injection must always use
2085 * the main input thread; in theory the latter can utilize
2086 * the corresponding input thread where the packet arrived
2087 * on, but that requires our knowing the interface in advance
2088 * (and the benefits might not worth the trouble.)
2089 */
2090 VERIFY(!(inp->input_waiting &
2091 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2092
2093 /* Packets for this interface */
2094 m_cnt = qlen(&inp->rcvq_pkts);
2095 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2096
2097 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2098 lck_mtx_unlock(&inp->input_lck);
2099
2100 /* Free up pending packets */
2101 if (m != NULL)
2102 mbuf_freem_list(m);
2103
2104 dlil_terminate_input_thread(inp);
2105 /* NOTREACHED */
2106 return;
2107 }
2108
2109 inp->wtot = 0;
2110
2111 dlil_input_stats_sync(ifp, inp);
2112
2113 lck_mtx_unlock(&inp->input_lck);
2114
2115 /*
2116 * NOTE warning %%% attention !!!!
2117 * We should think about putting some thread starvation
2118 * safeguards if we deal with long chains of packets.
2119 */
2120 if (m != NULL)
2121 dlil_input_packet_list_extended(NULL, m,
2122 m_cnt, inp->mode);
2123 }
2124
2125 /* NOTREACHED */
2126 VERIFY(0); /* we should never get here */
2127}
2128
2129/*
2130 * Input thread for interfaces with opportunistic polling input model.
2131 */
2132static void
2133dlil_rxpoll_input_thread_func(void *v, wait_result_t w)
2134{
2135#pragma unused(w)
2136 struct dlil_threading_info *inp = v;
2137 struct ifnet *ifp = inp->ifp;
2138 struct timespec ts;
2139
2140 VERIFY(inp != dlil_main_input_thread);
2141 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL));
2142
2143 while (1) {
2144 struct mbuf *m = NULL;
2145 u_int32_t m_cnt, m_size, poll_req = 0;
2146 ifnet_model_t mode;
2147 struct timespec now, delta;
2148 u_int64_t ival;
2149
2150 lck_mtx_lock_spin(&inp->input_lck);
2151
2152 if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN)
2153 ival = IF_RXPOLL_INTERVALTIME_MIN;
2154
2155 /* Link parameters changed? */
2156 if (ifp->if_poll_update != 0) {
2157 ifp->if_poll_update = 0;
2158 (void) dlil_rxpoll_set_params(ifp, NULL, TRUE);
2159 }
2160
2161 /* Current operating mode */
2162 mode = inp->mode;
2163
2164 /* Wait until there is work to be done */
2165 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) {
2166 inp->input_waiting &= ~DLIL_INPUT_RUNNING;
2167 (void) msleep(&inp->input_waiting, &inp->input_lck,
2168 (PZERO - 1) | PSPIN, inp->input_name, NULL);
2169 }
2170
2171 inp->input_waiting |= DLIL_INPUT_RUNNING;
2172 inp->input_waiting &= ~DLIL_INPUT_WAITING;
2173
2174 /*
2175 * Protocol registration and injection must always use
2176 * the main input thread; in theory the latter can utilize
2177 * the corresponding input thread where the packet arrived
2178 * on, but that requires our knowing the interface in advance
2179 * (and the benefits might not worth the trouble.)
2180 */
2181 VERIFY(!(inp->input_waiting &
2182 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER)));
2183
2184 if (inp->input_waiting & DLIL_INPUT_TERMINATE) {
2185 /* Free up pending packets */
2186 lck_mtx_convert_spin(&inp->input_lck);
2187 _flushq(&inp->rcvq_pkts);
2188 if (inp->input_mit_tcall != NULL) {
2189 if (thread_call_isactive(inp->input_mit_tcall))
2190 thread_call_cancel(inp->input_mit_tcall);
2191 }
2192 lck_mtx_unlock(&inp->input_lck);
2193
2194 dlil_terminate_input_thread(inp);
2195 /* NOTREACHED */
2196 return;
2197 }
2198
2199 /* Total count of all packets */
2200 m_cnt = qlen(&inp->rcvq_pkts);
2201
2202 /* Total bytes of all packets */
2203 m_size = qsize(&inp->rcvq_pkts);
2204
2205 /* Packets for this interface */
2206 m = _getq_all(&inp->rcvq_pkts, NULL, NULL, NULL);
2207 VERIFY(m != NULL || m_cnt == 0);
2208
2209 nanouptime(&now);
2210 if (!net_timerisset(&inp->sample_lasttime))
2211 *(&inp->sample_lasttime) = *(&now);
2212
2213 net_timersub(&now, &inp->sample_lasttime, &delta);
2214 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) {
2215 u_int32_t ptot, btot;
2216
2217 /* Accumulate statistics for current sampling */
2218 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size);
2219
2220 if (net_timercmp(&delta, &inp->sample_holdtime, <))
2221 goto skip;
2222
2223 *(&inp->sample_lasttime) = *(&now);
2224
2225 /* Calculate min/max of inbound bytes */
2226 btot = (u_int32_t)inp->sstats.bytes;
2227 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot)
2228 inp->rxpoll_bmin = btot;
2229 if (btot > inp->rxpoll_bmax)
2230 inp->rxpoll_bmax = btot;
2231
2232 /* Calculate EWMA of inbound bytes */
2233 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay);
2234
2235 /* Calculate min/max of inbound packets */
2236 ptot = (u_int32_t)inp->sstats.packets;
2237 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot)
2238 inp->rxpoll_pmin = ptot;
2239 if (ptot > inp->rxpoll_pmax)
2240 inp->rxpoll_pmax = ptot;
2241
2242 /* Calculate EWMA of inbound packets */
2243 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay);
2244
2245 /* Reset sampling statistics */
2246 PKTCNTR_CLEAR(&inp->sstats);
2247
2248 /* Calculate EWMA of wakeup requests */
2249 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay);
2250 inp->wtot = 0;
2251
2252 if (dlil_verbose) {
2253 if (!net_timerisset(&inp->dbg_lasttime))
2254 *(&inp->dbg_lasttime) = *(&now);
2255 net_timersub(&now, &inp->dbg_lasttime, &delta);
2256 if (net_timercmp(&delta, &dlil_dbgrate, >=)) {
2257 *(&inp->dbg_lasttime) = *(&now);
2258 printf("%s: [%s] pkts avg %d max %d "
2259 "limits [%d/%d], wreq avg %d "
2260 "limits [%d/%d], bytes avg %d "
2261 "limits [%d/%d]\n", if_name(ifp),
2262 (inp->mode ==
2263 IFNET_MODEL_INPUT_POLL_ON) ?
2264 "ON" : "OFF", inp->rxpoll_pavg,
2265 inp->rxpoll_pmax,
2266 inp->rxpoll_plowat,
2267 inp->rxpoll_phiwat,
2268 inp->rxpoll_wavg,
2269 inp->rxpoll_wlowat,
2270 inp->rxpoll_whiwat,
2271 inp->rxpoll_bavg,
2272 inp->rxpoll_blowat,
2273 inp->rxpoll_bhiwat);
2274 }
2275 }
2276
2277 /* Perform mode transition, if necessary */
2278 if (!net_timerisset(&inp->mode_lasttime))
2279 *(&inp->mode_lasttime) = *(&now);
2280
2281 net_timersub(&now, &inp->mode_lasttime, &delta);
2282 if (net_timercmp(&delta, &inp->mode_holdtime, <))
2283 goto skip;
2284
2285 if (inp->rxpoll_pavg <= inp->rxpoll_plowat &&
2286 inp->rxpoll_bavg <= inp->rxpoll_blowat &&
2287 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) {
2288 mode = IFNET_MODEL_INPUT_POLL_OFF;
2289 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat &&
2290 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat ||
2291 inp->rxpoll_wavg >= inp->rxpoll_whiwat) &&
2292 inp->mode != IFNET_MODEL_INPUT_POLL_ON) {
2293 mode = IFNET_MODEL_INPUT_POLL_ON;
2294 }
2295
2296 if (mode != inp->mode) {
2297 inp->mode = mode;
2298 *(&inp->mode_lasttime) = *(&now);
2299 poll_req++;
2300 }
2301 }
2302skip:
2303 dlil_input_stats_sync(ifp, inp);
2304
2305 lck_mtx_unlock(&inp->input_lck);
2306
2307 /*
2308 * If there's a mode change and interface is still attached,
2309 * perform a downcall to the driver for the new mode. Also
2310 * hold an IO refcnt on the interface to prevent it from
2311 * being detached (will be release below.)
2312 */
2313 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) {
2314 struct ifnet_model_params p = { mode, { 0 } };
2315 errno_t err;
2316
2317 if (dlil_verbose) {
2318 printf("%s: polling is now %s, "
2319 "pkts avg %d max %d limits [%d/%d], "
2320 "wreq avg %d limits [%d/%d], "
2321 "bytes avg %d limits [%d/%d]\n",
2322 if_name(ifp),
2323 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2324 "ON" : "OFF", inp->rxpoll_pavg,
2325 inp->rxpoll_pmax, inp->rxpoll_plowat,
2326 inp->rxpoll_phiwat, inp->rxpoll_wavg,
2327 inp->rxpoll_wlowat, inp->rxpoll_whiwat,
2328 inp->rxpoll_bavg, inp->rxpoll_blowat,
2329 inp->rxpoll_bhiwat);
2330 }
2331
2332 if ((err = ((*ifp->if_input_ctl)(ifp,
2333 IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) {
2334 printf("%s: error setting polling mode "
2335 "to %s (%d)\n", if_name(ifp),
2336 (mode == IFNET_MODEL_INPUT_POLL_ON) ?
2337 "ON" : "OFF", err);
2338 }
2339
2340 switch (mode) {
2341 case IFNET_MODEL_INPUT_POLL_OFF:
2342 ifnet_set_poll_cycle(ifp, NULL);
2343 inp->rxpoll_offreq++;
2344 if (err != 0)
2345 inp->rxpoll_offerr++;
2346 break;
2347
2348 case IFNET_MODEL_INPUT_POLL_ON:
2349 net_nsectimer(&ival, &ts);
2350 ifnet_set_poll_cycle(ifp, &ts);
2351 ifnet_poll(ifp);
2352 inp->rxpoll_onreq++;
2353 if (err != 0)
2354 inp->rxpoll_onerr++;
2355 break;
2356
2357 default:
2358 VERIFY(0);
2359 /* NOTREACHED */
2360 }
2361
2362 /* Release the IO refcnt */
2363 ifnet_decr_iorefcnt(ifp);
2364 }
2365
2366 /*
2367 * NOTE warning %%% attention !!!!
2368 * We should think about putting some thread starvation
2369 * safeguards if we deal with long chains of packets.
2370 */
2371 if (m != NULL)
2372 dlil_input_packet_list_extended(NULL, m, m_cnt, mode);
2373 }
2374
2375 /* NOTREACHED */
2376 VERIFY(0); /* we should never get here */
2377}
2378
2379/*
2380 * Must be called on an attached ifnet (caller is expected to check.)
2381 * Caller may pass NULL for poll parameters to indicate "auto-tuning."
2382 */
2383errno_t
2384dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p,
2385 boolean_t locked)
2386{
2387 struct dlil_threading_info *inp;
2388 u_int64_t sample_holdtime, inbw;
2389
2390 VERIFY(ifp != NULL);
2391 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2392 return (ENXIO);
2393
2394 if (p != NULL) {
2395 if ((p->packets_lowat == 0 && p->packets_hiwat != 0) ||
2396 (p->packets_lowat != 0 && p->packets_hiwat == 0))
2397 return (EINVAL);
2398 if (p->packets_lowat != 0 && /* hiwat must be non-zero */
2399 p->packets_lowat >= p->packets_hiwat)
2400 return (EINVAL);
2401 if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) ||
2402 (p->bytes_lowat != 0 && p->bytes_hiwat == 0))
2403 return (EINVAL);
2404 if (p->bytes_lowat != 0 && /* hiwat must be non-zero */
2405 p->bytes_lowat >= p->bytes_hiwat)
2406 return (EINVAL);
2407 if (p->interval_time != 0 &&
2408 p->interval_time < IF_RXPOLL_INTERVALTIME_MIN)
2409 p->interval_time = IF_RXPOLL_INTERVALTIME_MIN;
2410 }
2411
2412 if (!locked)
2413 lck_mtx_lock(&inp->input_lck);
2414
2415 LCK_MTX_ASSERT(&inp->input_lck, LCK_MTX_ASSERT_OWNED);
2416
2417 /*
2418 * Normally, we'd reset the parameters to the auto-tuned values
2419 * if the the input thread detects a change in link rate. If the
2420 * driver provides its own parameters right after a link rate
2421 * changes, but before the input thread gets to run, we want to
2422 * make sure to keep the driver's values. Clearing if_poll_update
2423 * will achieve that.
2424 */
2425 if (p != NULL && !locked && ifp->if_poll_update != 0)
2426 ifp->if_poll_update = 0;
2427
2428 if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) {
2429 sample_holdtime = 0; /* polling is disabled */
2430 inp->rxpoll_wlowat = inp->rxpoll_plowat =
2431 inp->rxpoll_blowat = 0;
2432 inp->rxpoll_whiwat = inp->rxpoll_phiwat =
2433 inp->rxpoll_bhiwat = (u_int32_t)-1;
2434 inp->rxpoll_plim = 0;
2435 inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN;
2436 } else {
2437 u_int32_t plowat, phiwat, blowat, bhiwat, plim;
2438 u_int64_t ival;
2439 unsigned int n, i;
2440
2441 for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) {
2442 if (inbw < rxpoll_tbl[i].speed)
2443 break;
2444 n = i;
2445 }
2446 /* auto-tune if caller didn't specify a value */
2447 plowat = ((p == NULL || p->packets_lowat == 0) ?
2448 rxpoll_tbl[n].plowat : p->packets_lowat);
2449 phiwat = ((p == NULL || p->packets_hiwat == 0) ?
2450 rxpoll_tbl[n].phiwat : p->packets_hiwat);
2451 blowat = ((p == NULL || p->bytes_lowat == 0) ?
2452 rxpoll_tbl[n].blowat : p->bytes_lowat);
2453 bhiwat = ((p == NULL || p->bytes_hiwat == 0) ?
2454 rxpoll_tbl[n].bhiwat : p->bytes_hiwat);
2455 plim = ((p == NULL || p->packets_limit == 0) ?
2456 if_rxpoll_max : p->packets_limit);
2457 ival = ((p == NULL || p->interval_time == 0) ?
2458 if_rxpoll_interval_time : p->interval_time);
2459
2460 VERIFY(plowat != 0 && phiwat != 0);
2461 VERIFY(blowat != 0 && bhiwat != 0);
2462 VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN);
2463
2464 sample_holdtime = if_rxpoll_sample_holdtime;
2465 inp->rxpoll_wlowat = if_rxpoll_wlowat;
2466 inp->rxpoll_whiwat = if_rxpoll_whiwat;
2467 inp->rxpoll_plowat = plowat;
2468 inp->rxpoll_phiwat = phiwat;
2469 inp->rxpoll_blowat = blowat;
2470 inp->rxpoll_bhiwat = bhiwat;
2471 inp->rxpoll_plim = plim;
2472 inp->rxpoll_ival = ival;
2473 }
2474
2475 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime);
2476 net_nsectimer(&sample_holdtime, &inp->sample_holdtime);
2477
2478 if (dlil_verbose) {
2479 printf("%s: speed %llu bps, sample per %llu nsec, "
2480 "poll interval %llu nsec, pkts per poll %u, "
2481 "pkt limits [%u/%u], wreq limits [%u/%u], "
2482 "bytes limits [%u/%u]\n", if_name(ifp),
2483 inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim,
2484 inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat,
2485 inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat);
2486 }
2487
2488 if (!locked)
2489 lck_mtx_unlock(&inp->input_lck);
2490
2491 return (0);
2492}
2493
2494/*
2495 * Must be called on an attached ifnet (caller is expected to check.)
2496 */
2497errno_t
2498dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p)
2499{
2500 struct dlil_threading_info *inp;
2501
2502 VERIFY(ifp != NULL && p != NULL);
2503 if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL)
2504 return (ENXIO);
2505
2506 bzero(p, sizeof (*p));
2507
2508 lck_mtx_lock(&inp->input_lck);
2509 p->packets_limit = inp->rxpoll_plim;
2510 p->packets_lowat = inp->rxpoll_plowat;
2511 p->packets_hiwat = inp->rxpoll_phiwat;
2512 p->bytes_lowat = inp->rxpoll_blowat;
2513 p->bytes_hiwat = inp->rxpoll_bhiwat;
2514 p->interval_time = inp->rxpoll_ival;
2515 lck_mtx_unlock(&inp->input_lck);
2516
2517 return (0);
2518}
2519
2520errno_t
2521ifnet_input(struct ifnet *ifp, struct mbuf *m_head,
2522 const struct ifnet_stat_increment_param *s)
2523{
2524 return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE));
2525}
2526
2527errno_t
2528ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head,
2529 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s)
2530{
2531 return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE));
2532}
2533
2534static errno_t
2535ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail,
2536 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll)
2537{
2538 dlil_input_func input_func;
2539 struct ifnet_stat_increment_param _s;
2540 u_int32_t m_cnt = 0, m_size = 0;
2541 struct mbuf *last;
2542 errno_t err = 0;
2543
2544 if ((m_head == NULL && !poll) || (s == NULL && ext)) {
2545 if (m_head != NULL)
2546 mbuf_freem_list(m_head);
2547 return (EINVAL);
2548 }
2549
2550 VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll));
2551 VERIFY(m_tail == NULL || ext);
2552 VERIFY(s != NULL || !ext);
2553
2554 /*
2555 * Drop the packet(s) if the parameters are invalid, or if the
2556 * interface is no longer attached; else hold an IO refcnt to
2557 * prevent it from being detached (will be released below.)
2558 */
2559 if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) {
2560 if (m_head != NULL)
2561 mbuf_freem_list(m_head);
2562 return (EINVAL);
2563 }
2564
2565 input_func = ifp->if_input_dlil;
2566 VERIFY(input_func != NULL);
2567
2568 if (m_tail == NULL) {
2569 last = m_head;
2570 while (m_head != NULL) {
2571#if IFNET_INPUT_SANITY_CHK
2572 if (dlil_input_sanity_check != 0)
2573 DLIL_INPUT_CHECK(last, ifp);
2574#endif /* IFNET_INPUT_SANITY_CHK */
2575 m_cnt++;
2576 m_size += m_length(last);
2577 if (mbuf_nextpkt(last) == NULL)
2578 break;
2579 last = mbuf_nextpkt(last);
2580 }
2581 m_tail = last;
2582 } else {
2583#if IFNET_INPUT_SANITY_CHK
2584 if (dlil_input_sanity_check != 0) {
2585 last = m_head;
2586 while (1) {
2587 DLIL_INPUT_CHECK(last, ifp);
2588 m_cnt++;
2589 m_size += m_length(last);
2590 if (mbuf_nextpkt(last) == NULL)
2591 break;
2592 last = mbuf_nextpkt(last);
2593 }
2594 } else {
2595 m_cnt = s->packets_in;
2596 m_size = s->bytes_in;
2597 last = m_tail;
2598 }
2599#else
2600 m_cnt = s->packets_in;
2601 m_size = s->bytes_in;
2602 last = m_tail;
2603#endif /* IFNET_INPUT_SANITY_CHK */
2604 }
2605
2606 if (last != m_tail) {
2607 panic_plain("%s: invalid input packet chain for %s, "
2608 "tail mbuf %p instead of %p\n", __func__, if_name(ifp),
2609 m_tail, last);
2610 }
2611
2612 /*
2613 * Assert packet count only for the extended variant, for backwards
2614 * compatibility, since this came directly from the device driver.
2615 * Relax this assertion for input bytes, as the driver may have
2616 * included the link-layer headers in the computation; hence
2617 * m_size is just an approximation.
2618 */
2619 if (ext && s->packets_in != m_cnt) {
2620 panic_plain("%s: input packet count mismatch for %s, "
2621 "%d instead of %d\n", __func__, if_name(ifp),
2622 s->packets_in, m_cnt);
2623 }
2624
2625 if (s == NULL) {
2626 bzero(&_s, sizeof (_s));
2627 s = &_s;
2628 } else {
2629 _s = *s;
2630 }
2631 _s.packets_in = m_cnt;
2632 _s.bytes_in = m_size;
2633
2634 err = (*input_func)(ifp, m_head, m_tail, s, poll, current_thread());
2635
2636 if (ifp != lo_ifp) {
2637 /* Release the IO refcnt */
2638 ifnet_decr_iorefcnt(ifp);
2639 }
2640
2641 return (err);
2642}
2643
2644
2645errno_t
2646dlil_output_handler(struct ifnet *ifp, struct mbuf *m)
2647{
2648 return (ifp->if_output(ifp, m));
2649}
2650
2651errno_t
2652dlil_input_handler(struct ifnet *ifp, struct mbuf *m_head,
2653 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
2654 boolean_t poll, struct thread *tp)
2655{
2656 struct dlil_threading_info *inp;
2657 u_int32_t m_cnt = s->packets_in;
2658 u_int32_t m_size = s->bytes_in;
2659
2660 if ((inp = ifp->if_inp) == NULL)
2661 inp = dlil_main_input_thread;
2662
2663 /*
2664 * If there is a matching DLIL input thread associated with an
2665 * affinity set, associate this thread with the same set. We
2666 * will only do this once.
2667 */
2668 lck_mtx_lock_spin(&inp->input_lck);
2669 if (inp != dlil_main_input_thread && inp->net_affinity && tp != NULL &&
2670 ((!poll && inp->wloop_thr == THREAD_NULL) ||
2671 (poll && inp->poll_thr == THREAD_NULL))) {
2672 u_int32_t tag = inp->tag;
2673
2674 if (poll) {
2675 VERIFY(inp->poll_thr == THREAD_NULL);
2676 inp->poll_thr = tp;
2677 } else {
2678 VERIFY(inp->wloop_thr == THREAD_NULL);
2679 inp->wloop_thr = tp;
2680 }
2681 lck_mtx_unlock(&inp->input_lck);
2682
2683 /* Associate the current thread with the new affinity tag */
2684 (void) dlil_affinity_set(tp, tag);
2685
2686 /*
2687 * Take a reference on the current thread; during detach,
2688 * we will need to refer to it in order to tear down its
2689 * affinity.
2690 */
2691 thread_reference(tp);
2692 lck_mtx_lock_spin(&inp->input_lck);
2693 }
2694
2695 VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0));
2696
2697 /*
2698 * Because of loopbacked multicast we cannot stuff the ifp in
2699 * the rcvif of the packet header: loopback (lo0) packets use a
2700 * dedicated list so that we can later associate them with lo_ifp
2701 * on their way up the stack. Packets for other interfaces without
2702 * dedicated input threads go to the regular list.
2703 */
2704 if (m_head != NULL) {
2705 if (inp == dlil_main_input_thread && ifp == lo_ifp) {
2706 struct dlil_main_threading_info *inpm =
2707 (struct dlil_main_threading_info *)inp;
2708 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail,
2709 m_cnt, m_size);
2710 } else {
2711 _addq_multi(&inp->rcvq_pkts, m_head, m_tail,
2712 m_cnt, m_size);
2713 }
2714 }
2715
2716#if IFNET_INPUT_SANITY_CHK
2717 if (dlil_input_sanity_check != 0) {
2718 u_int32_t count;
2719 struct mbuf *m0;
2720
2721 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0))
2722 count++;
2723
2724 if (count != m_cnt) {
2725 panic_plain("%s: invalid packet count %d "
2726 "(expected %d)\n", if_name(ifp),
2727 count, m_cnt);
2728 /* NOTREACHED */
2729 }
2730
2731 inp->input_mbuf_cnt += m_cnt;
2732 }
2733#endif /* IFNET_INPUT_SANITY_CHK */
2734
2735 dlil_input_stats_add(s, inp, poll);
2736 /*
2737 * If we're using the main input thread, synchronize the
2738 * stats now since we have the interface context. All
2739 * other cases involving dedicated input threads will
2740 * have their stats synchronized there.
2741 */
2742 if (inp == dlil_main_input_thread)
2743 dlil_input_stats_sync(ifp, inp);
2744
2745 if (inp->input_mit_tcall &&
2746 qlen(&inp->rcvq_pkts) >= dlil_rcv_mit_pkts_min &&
2747 qlen(&inp->rcvq_pkts) < dlil_rcv_mit_pkts_max &&
2748 (ifp->if_family == IFNET_FAMILY_ETHERNET ||
2749 ifp->if_type == IFT_CELLULAR)
2750 ) {
2751 if (!thread_call_isactive(inp->input_mit_tcall)) {
2752 uint64_t deadline;
2753 clock_interval_to_deadline(dlil_rcv_mit_interval,
2754 1, &deadline);
2755 (void) thread_call_enter_delayed(
2756 inp->input_mit_tcall, deadline);
2757 }
2758 } else {
2759 inp->input_waiting |= DLIL_INPUT_WAITING;
2760 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
2761 inp->wtot++;
2762 wakeup_one((caddr_t)&inp->input_waiting);
2763 }
2764 }
2765 lck_mtx_unlock(&inp->input_lck);
2766
2767 return (0);
2768}
2769
2770
2771static void
2772ifnet_start_common(struct ifnet *ifp, boolean_t resetfc)
2773{
2774 if (!(ifp->if_eflags & IFEF_TXSTART))
2775 return;
2776 /*
2777 * If the starter thread is inactive, signal it to do work,
2778 * unless the interface is being flow controlled from below,
2779 * e.g. a virtual interface being flow controlled by a real
2780 * network interface beneath it, or it's been disabled via
2781 * a call to ifnet_disable_output().
2782 */
2783 lck_mtx_lock_spin(&ifp->if_start_lock);
2784 if (resetfc) {
2785 ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED;
2786 } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) {
2787 lck_mtx_unlock(&ifp->if_start_lock);
2788 return;
2789 }
2790 ifp->if_start_req++;
2791 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL &&
2792 (resetfc || !(ifp->if_eflags & IFEF_ENQUEUE_MULTI) ||
2793 IFCQ_LEN(&ifp->if_snd) >= ifp->if_start_delay_qlen ||
2794 ifp->if_start_delayed == 0)) {
2795 (void) thread_wakeup_thread((caddr_t)&ifp->if_start_thread,
2796 ifp->if_start_thread);
2797 }
2798 lck_mtx_unlock(&ifp->if_start_lock);
2799}
2800
2801void
2802ifnet_start(struct ifnet *ifp)
2803{
2804 ifnet_start_common(ifp, FALSE);
2805}
2806
2807static void
2808ifnet_start_thread_fn(void *v, wait_result_t w)
2809{
2810#pragma unused(w)
2811 struct ifnet *ifp = v;
2812 char ifname[IFNAMSIZ + 1];
2813 char thread_name[MAXTHREADNAMESIZE];
2814 struct timespec *ts = NULL;
2815 struct ifclassq *ifq = &ifp->if_snd;
2816 struct timespec delay_start_ts;
2817
2818 /* Construct the name for this thread, and then apply it. */
2819 bzero(thread_name, sizeof(thread_name));
2820 (void) snprintf(thread_name, sizeof (thread_name),
2821 "ifnet_start_%s", ifp->if_xname);
2822 thread_set_thread_name(ifp->if_start_thread, thread_name);
2823
2824 /*
2825 * Treat the dedicated starter thread for lo0 as equivalent to
2826 * the driver workloop thread; if net_affinity is enabled for
2827 * the main input thread, associate this starter thread to it
2828 * by binding them with the same affinity tag. This is done
2829 * only once (as we only have one lo_ifp which never goes away.)
2830 */
2831 if (ifp == lo_ifp) {
2832 struct dlil_threading_info *inp = dlil_main_input_thread;
2833 struct thread *tp = current_thread();
2834
2835 lck_mtx_lock(&inp->input_lck);
2836 if (inp->net_affinity) {
2837 u_int32_t tag = inp->tag;
2838
2839 VERIFY(inp->wloop_thr == THREAD_NULL);
2840 VERIFY(inp->poll_thr == THREAD_NULL);
2841 inp->wloop_thr = tp;
2842 lck_mtx_unlock(&inp->input_lck);
2843
2844 /* Associate this thread with the affinity tag */
2845 (void) dlil_affinity_set(tp, tag);
2846 } else {
2847 lck_mtx_unlock(&inp->input_lck);
2848 }
2849 }
2850
2851 (void) snprintf(ifname, sizeof (ifname), "%s_starter", if_name(ifp));
2852
2853 lck_mtx_lock_spin(&ifp->if_start_lock);
2854
2855 for (;;) {
2856 if (ifp->if_start_thread != NULL) {
2857 (void) msleep(&ifp->if_start_thread,
2858 &ifp->if_start_lock,
2859 (PZERO - 1) | PSPIN, ifname, ts);
2860 }
2861 /* interface is detached? */
2862 if (ifp->if_start_thread == THREAD_NULL) {
2863 ifnet_set_start_cycle(ifp, NULL);
2864 lck_mtx_unlock(&ifp->if_start_lock);
2865 ifnet_purge(ifp);
2866
2867 if (dlil_verbose) {
2868 printf("%s: starter thread terminated\n",
2869 if_name(ifp));
2870 }
2871
2872 /* for the extra refcnt from kernel_thread_start() */
2873 thread_deallocate(current_thread());
2874 /* this is the end */
2875 thread_terminate(current_thread());
2876 /* NOTREACHED */
2877 return;
2878 }
2879
2880 ifp->if_start_active = 1;
2881
2882 for (;;) {
2883 u_int32_t req = ifp->if_start_req;
2884 if (!IFCQ_IS_EMPTY(ifq) &&
2885 (ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
2886 ifp->if_start_delayed == 0 &&
2887 IFCQ_LEN(ifq) < ifp->if_start_delay_qlen &&
2888 (ifp->if_eflags & IFEF_DELAY_START)) {
2889 ifp->if_start_delayed = 1;
2890 ifnet_start_delayed++;
2891 break;
2892 } else {
2893 ifp->if_start_delayed = 0;
2894 }
2895 lck_mtx_unlock(&ifp->if_start_lock);
2896
2897 /*
2898 * If no longer attached, don't call start because ifp
2899 * is being destroyed; else hold an IO refcnt to
2900 * prevent the interface from being detached (will be
2901 * released below.)
2902 */
2903 if (!ifnet_is_attached(ifp, 1)) {
2904 lck_mtx_lock_spin(&ifp->if_start_lock);
2905 break;
2906 }
2907
2908 /* invoke the driver's start routine */
2909 ((*ifp->if_start)(ifp));
2910
2911 /*
2912 * Release the io ref count taken by ifnet_is_attached.
2913 */
2914 ifnet_decr_iorefcnt(ifp);
2915
2916 lck_mtx_lock_spin(&ifp->if_start_lock);
2917
2918 /*
2919 * If there's no pending request or if the
2920 * interface has been disabled, we're done.
2921 */
2922 if (req == ifp->if_start_req ||
2923 (ifp->if_start_flags & IFSF_FLOW_CONTROLLED)) {
2924 break;
2925 }
2926 }
2927
2928 ifp->if_start_req = 0;
2929 ifp->if_start_active = 0;
2930
2931 /*
2932 * Wakeup N ns from now if rate-controlled by TBR, and if
2933 * there are still packets in the send queue which haven't
2934 * been dequeued so far; else sleep indefinitely (ts = NULL)
2935 * until ifnet_start() is called again.
2936 */
2937 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ?
2938 &ifp->if_start_cycle : NULL);
2939
2940 if (ts == NULL && ifp->if_start_delayed == 1) {
2941 delay_start_ts.tv_sec = 0;
2942 delay_start_ts.tv_nsec = ifp->if_start_delay_timeout;
2943 ts = &delay_start_ts;
2944 }
2945
2946 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0)
2947 ts = NULL;
2948 }
2949
2950 /* NOTREACHED */
2951}
2952
2953void
2954ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts)
2955{
2956 if (ts == NULL)
2957 bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle));
2958 else
2959 *(&ifp->if_start_cycle) = *ts;
2960
2961 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
2962 printf("%s: restart interval set to %lu nsec\n",
2963 if_name(ifp), ts->tv_nsec);
2964}
2965
2966static void
2967ifnet_poll(struct ifnet *ifp)
2968{
2969 /*
2970 * If the poller thread is inactive, signal it to do work.
2971 */
2972 lck_mtx_lock_spin(&ifp->if_poll_lock);
2973 ifp->if_poll_req++;
2974 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) {
2975 wakeup_one((caddr_t)&ifp->if_poll_thread);
2976 }
2977 lck_mtx_unlock(&ifp->if_poll_lock);
2978}
2979
2980static void
2981ifnet_poll_thread_fn(void *v, wait_result_t w)
2982{
2983#pragma unused(w)
2984 struct dlil_threading_info *inp;
2985 struct ifnet *ifp = v;
2986 char ifname[IFNAMSIZ + 1];
2987 struct timespec *ts = NULL;
2988 struct ifnet_stat_increment_param s;
2989
2990 snprintf(ifname, sizeof (ifname), "%s_poller",
2991 if_name(ifp));
2992 bzero(&s, sizeof (s));
2993
2994 lck_mtx_lock_spin(&ifp->if_poll_lock);
2995
2996 inp = ifp->if_inp;
2997 VERIFY(inp != NULL);
2998
2999 for (;;) {
3000 if (ifp->if_poll_thread != THREAD_NULL) {
3001 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock,
3002 (PZERO - 1) | PSPIN, ifname, ts);
3003 }
3004
3005 /* interface is detached (maybe while asleep)? */
3006 if (ifp->if_poll_thread == THREAD_NULL) {
3007 ifnet_set_poll_cycle(ifp, NULL);
3008 lck_mtx_unlock(&ifp->if_poll_lock);
3009
3010 if (dlil_verbose) {
3011 printf("%s: poller thread terminated\n",
3012 if_name(ifp));
3013 }
3014
3015 /* for the extra refcnt from kernel_thread_start() */
3016 thread_deallocate(current_thread());
3017 /* this is the end */
3018 thread_terminate(current_thread());
3019 /* NOTREACHED */
3020 return;
3021 }
3022
3023 ifp->if_poll_active = 1;
3024 for (;;) {
3025 struct mbuf *m_head, *m_tail;
3026 u_int32_t m_lim, m_cnt, m_totlen;
3027 u_int16_t req = ifp->if_poll_req;
3028
3029 lck_mtx_unlock(&ifp->if_poll_lock);
3030
3031 /*
3032 * If no longer attached, there's nothing to do;
3033 * else hold an IO refcnt to prevent the interface
3034 * from being detached (will be released below.)
3035 */
3036 if (!ifnet_is_attached(ifp, 1)) {
3037 lck_mtx_lock_spin(&ifp->if_poll_lock);
3038 break;
3039 }
3040
3041 m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim :
3042 MAX((qlimit(&inp->rcvq_pkts)),
3043 (inp->rxpoll_phiwat << 2));
3044
3045 if (dlil_verbose > 1) {
3046 printf("%s: polling up to %d pkts, "
3047 "pkts avg %d max %d, wreq avg %d, "
3048 "bytes avg %d\n",
3049 if_name(ifp), m_lim,
3050 inp->rxpoll_pavg, inp->rxpoll_pmax,
3051 inp->rxpoll_wavg, inp->rxpoll_bavg);
3052 }
3053
3054 /* invoke the driver's input poll routine */
3055 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail,
3056 &m_cnt, &m_totlen));
3057
3058 if (m_head != NULL) {
3059 VERIFY(m_tail != NULL && m_cnt > 0);
3060
3061 if (dlil_verbose > 1) {
3062 printf("%s: polled %d pkts, "
3063 "pkts avg %d max %d, wreq avg %d, "
3064 "bytes avg %d\n",
3065 if_name(ifp), m_cnt,
3066 inp->rxpoll_pavg, inp->rxpoll_pmax,
3067 inp->rxpoll_wavg, inp->rxpoll_bavg);
3068 }
3069
3070 /* stats are required for extended variant */
3071 s.packets_in = m_cnt;
3072 s.bytes_in = m_totlen;
3073
3074 (void) ifnet_input_common(ifp, m_head, m_tail,
3075 &s, TRUE, TRUE);
3076 } else {
3077 if (dlil_verbose > 1) {
3078 printf("%s: no packets, "
3079 "pkts avg %d max %d, wreq avg %d, "
3080 "bytes avg %d\n",
3081 if_name(ifp), inp->rxpoll_pavg,
3082 inp->rxpoll_pmax, inp->rxpoll_wavg,
3083 inp->rxpoll_bavg);
3084 }
3085
3086 (void) ifnet_input_common(ifp, NULL, NULL,
3087 NULL, FALSE, TRUE);
3088 }
3089
3090 /* Release the io ref count */
3091 ifnet_decr_iorefcnt(ifp);
3092
3093 lck_mtx_lock_spin(&ifp->if_poll_lock);
3094
3095 /* if there's no pending request, we're done */
3096 if (req == ifp->if_poll_req) {
3097 break;
3098 }
3099 }
3100 ifp->if_poll_req = 0;
3101 ifp->if_poll_active = 0;
3102
3103 /*
3104 * Wakeup N ns from now, else sleep indefinitely (ts = NULL)
3105 * until ifnet_poll() is called again.
3106 */
3107 ts = &ifp->if_poll_cycle;
3108 if (ts->tv_sec == 0 && ts->tv_nsec == 0)
3109 ts = NULL;
3110 }
3111
3112 /* NOTREACHED */
3113}
3114
3115void
3116ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts)
3117{
3118 if (ts == NULL)
3119 bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle));
3120 else
3121 *(&ifp->if_poll_cycle) = *ts;
3122
3123 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose)
3124 printf("%s: poll interval set to %lu nsec\n",
3125 if_name(ifp), ts->tv_nsec);
3126}
3127
3128void
3129ifnet_purge(struct ifnet *ifp)
3130{
3131 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART))
3132 if_qflush(ifp, 0);
3133}
3134
3135void
3136ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev)
3137{
3138 IFCQ_LOCK_ASSERT_HELD(ifq);
3139
3140 if (!(IFCQ_IS_READY(ifq)))
3141 return;
3142
3143 if (IFCQ_TBR_IS_ENABLED(ifq)) {
3144 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw,
3145 ifq->ifcq_tbr.tbr_percent, 0 };
3146 (void) ifclassq_tbr_set(ifq, &tb, FALSE);
3147 }
3148
3149 ifclassq_update(ifq, ev);
3150}
3151
3152void
3153ifnet_update_rcv(struct ifnet *ifp, cqev_t ev)
3154{
3155 switch (ev) {
3156 case CLASSQ_EV_LINK_BANDWIDTH:
3157 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL))
3158 ifp->if_poll_update++;
3159 break;
3160
3161 default:
3162 break;
3163 }
3164}
3165
3166errno_t
3167ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model)
3168{
3169 struct ifclassq *ifq;
3170 u_int32_t omodel;
3171 errno_t err;
3172
3173 if (ifp == NULL || model >= IFNET_SCHED_MODEL_MAX)
3174 return (EINVAL);
3175 else if (!(ifp->if_eflags & IFEF_TXSTART))
3176 return (ENXIO);
3177
3178 ifq = &ifp->if_snd;
3179 IFCQ_LOCK(ifq);
3180 omodel = ifp->if_output_sched_model;
3181 ifp->if_output_sched_model = model;
3182 if ((err = ifclassq_pktsched_setup(ifq)) != 0)
3183 ifp->if_output_sched_model = omodel;
3184 IFCQ_UNLOCK(ifq);
3185
3186 return (err);
3187}
3188
3189errno_t
3190ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3191{
3192 if (ifp == NULL)
3193 return (EINVAL);
3194 else if (!(ifp->if_eflags & IFEF_TXSTART))
3195 return (ENXIO);
3196
3197 ifclassq_set_maxlen(&ifp->if_snd, maxqlen);
3198
3199 return (0);
3200}
3201
3202errno_t
3203ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3204{
3205 if (ifp == NULL || maxqlen == NULL)
3206 return (EINVAL);
3207 else if (!(ifp->if_eflags & IFEF_TXSTART))
3208 return (ENXIO);
3209
3210 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd);
3211
3212 return (0);
3213}
3214
3215errno_t
3216ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts)
3217{
3218 errno_t err;
3219
3220 if (ifp == NULL || pkts == NULL)
3221 err = EINVAL;
3222 else if (!(ifp->if_eflags & IFEF_TXSTART))
3223 err = ENXIO;
3224 else
3225 err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC,
3226 pkts, NULL);
3227
3228 return (err);
3229}
3230
3231errno_t
3232ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc,
3233 u_int32_t *pkts, u_int32_t *bytes)
3234{
3235 errno_t err;
3236
3237 if (ifp == NULL || !MBUF_VALID_SC(sc) ||
3238 (pkts == NULL && bytes == NULL))
3239 err = EINVAL;
3240 else if (!(ifp->if_eflags & IFEF_TXSTART))
3241 err = ENXIO;
3242 else
3243 err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes);
3244
3245 return (err);
3246}
3247
3248errno_t
3249ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen)
3250{
3251 struct dlil_threading_info *inp;
3252
3253 if (ifp == NULL)
3254 return (EINVAL);
3255 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3256 return (ENXIO);
3257
3258 if (maxqlen == 0)
3259 maxqlen = if_rcvq_maxlen;
3260 else if (maxqlen < IF_RCVQ_MINLEN)
3261 maxqlen = IF_RCVQ_MINLEN;
3262
3263 inp = ifp->if_inp;
3264 lck_mtx_lock(&inp->input_lck);
3265 qlimit(&inp->rcvq_pkts) = maxqlen;
3266 lck_mtx_unlock(&inp->input_lck);
3267
3268 return (0);
3269}
3270
3271errno_t
3272ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen)
3273{
3274 struct dlil_threading_info *inp;
3275
3276 if (ifp == NULL || maxqlen == NULL)
3277 return (EINVAL);
3278 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL)
3279 return (ENXIO);
3280
3281 inp = ifp->if_inp;
3282 lck_mtx_lock(&inp->input_lck);
3283 *maxqlen = qlimit(&inp->rcvq_pkts);
3284 lck_mtx_unlock(&inp->input_lck);
3285 return (0);
3286}
3287
3288void
3289ifnet_enqueue_multi_setup(struct ifnet *ifp, uint16_t delay_qlen,
3290 uint16_t delay_timeout)
3291{
3292 if (delay_qlen > 0 && delay_timeout > 0) {
3293 ifp->if_eflags |= IFEF_ENQUEUE_MULTI;
3294 ifp->if_start_delay_qlen = min(100, delay_qlen);
3295 ifp->if_start_delay_timeout = min(20000, delay_timeout);
3296 /* convert timeout to nanoseconds */
3297 ifp->if_start_delay_timeout *= 1000;
3298 kprintf("%s: forced IFEF_ENQUEUE_MULTI qlen %u timeout %u\n",
3299 ifp->if_xname, (uint32_t)delay_qlen,
3300 (uint32_t)delay_timeout);
3301 } else {
3302 ifp->if_eflags &= ~IFEF_ENQUEUE_MULTI;
3303 }
3304}
3305
3306static inline errno_t
3307ifnet_enqueue_common(struct ifnet *ifp, void *p, classq_pkt_type_t ptype,
3308 boolean_t flush, boolean_t *pdrop)
3309{
3310 volatile uint64_t *fg_ts = NULL;
3311 volatile uint64_t *rt_ts = NULL;
3312 struct mbuf *m = p;
3313 struct timespec now;
3314 u_int64_t now_nsec = 0;
3315 int error = 0;
3316
3317 ASSERT(ifp->if_eflags & IFEF_TXSTART);
3318
3319 /*
3320 * If packet already carries a timestamp, either from dlil_output()
3321 * or from flowswitch, use it here. Otherwise, record timestamp.
3322 * PKTF_TS_VALID is always cleared prior to entering classq, i.e.
3323 * the timestamp value is used internally there.
3324 */
3325 switch (ptype) {
3326 case QP_MBUF:
3327 ASSERT(m->m_flags & M_PKTHDR);
3328 ASSERT(m->m_nextpkt == NULL);
3329
3330 if (!(m->m_pkthdr.pkt_flags & PKTF_TS_VALID) ||
3331 m->m_pkthdr.pkt_timestamp == 0) {
3332 nanouptime(&now);
3333 net_timernsec(&now, &now_nsec);
3334 m->m_pkthdr.pkt_timestamp = now_nsec;
3335 }
3336 m->m_pkthdr.pkt_flags &= ~PKTF_TS_VALID;
3337 /*
3338 * If the packet service class is not background,
3339 * update the timestamp to indicate recent activity
3340 * on a foreground socket.
3341 */
3342 if ((m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) &&
3343 m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) {
3344 if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND)) {
3345 ifp->if_fg_sendts = _net_uptime;
3346 if (fg_ts != NULL)
3347 *fg_ts = _net_uptime;
3348 }
3349 if (m->m_pkthdr.pkt_flags & PKTF_SO_REALTIME) {
3350 ifp->if_rt_sendts = _net_uptime;
3351 if (rt_ts != NULL)
3352 *rt_ts = _net_uptime;
3353 }
3354 }
3355 break;
3356
3357
3358 default:
3359 VERIFY(0);
3360 /* NOTREACHED */
3361 }
3362
3363 if (ifp->if_eflags & IFEF_ENQUEUE_MULTI) {
3364 if (now_nsec == 0) {
3365 nanouptime(&now);
3366 net_timernsec(&now, &now_nsec);
3367 }
3368 /*
3369 * If the driver chose to delay start callback for
3370 * coalescing multiple packets, Then use the following
3371 * heuristics to make sure that start callback will
3372 * be delayed only when bulk data transfer is detected.
3373 * 1. number of packets enqueued in (delay_win * 2) is
3374 * greater than or equal to the delay qlen.
3375 * 2. If delay_start is enabled it will stay enabled for
3376 * another 10 idle windows. This is to take into account
3377 * variable RTT and burst traffic.
3378 * 3. If the time elapsed since last enqueue is more
3379 * than 200ms we disable delaying start callback. This is
3380 * is to take idle time into account.
3381 */
3382 u_int64_t dwin = (ifp->if_start_delay_timeout << 1);
3383 if (ifp->if_start_delay_swin > 0) {
3384 if ((ifp->if_start_delay_swin + dwin) > now_nsec) {
3385 ifp->if_start_delay_cnt++;
3386 } else if ((now_nsec - ifp->if_start_delay_swin)
3387 >= (200 * 1000 * 1000)) {
3388 ifp->if_start_delay_swin = now_nsec;
3389 ifp->if_start_delay_cnt = 1;
3390 ifp->if_start_delay_idle = 0;
3391 if (ifp->if_eflags & IFEF_DELAY_START) {
3392 ifp->if_eflags &=
3393 ~(IFEF_DELAY_START);
3394 ifnet_delay_start_disabled++;
3395 }
3396 } else {
3397 if (ifp->if_start_delay_cnt >=
3398 ifp->if_start_delay_qlen) {
3399 ifp->if_eflags |= IFEF_DELAY_START;
3400 ifp->if_start_delay_idle = 0;
3401 } else {
3402 if (ifp->if_start_delay_idle >= 10) {
3403 ifp->if_eflags &= ~(IFEF_DELAY_START);
3404 ifnet_delay_start_disabled++;
3405 } else {
3406 ifp->if_start_delay_idle++;
3407 }
3408 }
3409 ifp->if_start_delay_swin = now_nsec;
3410 ifp->if_start_delay_cnt = 1;
3411 }
3412 } else {
3413 ifp->if_start_delay_swin = now_nsec;
3414 ifp->if_start_delay_cnt = 1;
3415 ifp->if_start_delay_idle = 0;
3416 ifp->if_eflags &= ~(IFEF_DELAY_START);
3417 }
3418 } else {
3419 ifp->if_eflags &= ~(IFEF_DELAY_START);
3420 }
3421
3422 switch (ptype) {
3423 case QP_MBUF:
3424 /* enqueue the packet (caller consumes object) */
3425 error = ifclassq_enqueue(&ifp->if_snd, m, QP_MBUF, pdrop);
3426 m = NULL;
3427 break;
3428
3429
3430 default:
3431 break;
3432 }
3433
3434 /*
3435 * Tell the driver to start dequeueing; do this even when the queue
3436 * for the packet is suspended (EQSUSPENDED), as the driver could still
3437 * be dequeueing from other unsuspended queues.
3438 */
3439 if (!(ifp->if_eflags & IFEF_ENQUEUE_MULTI) &&
3440 ((error == 0 && flush) || error == EQFULL || error == EQSUSPENDED))
3441 ifnet_start(ifp);
3442
3443 return (error);
3444}
3445
3446errno_t
3447ifnet_enqueue(struct ifnet *ifp, struct mbuf *m)
3448{
3449 boolean_t pdrop;
3450 return (ifnet_enqueue_mbuf(ifp, m, TRUE, &pdrop));
3451}
3452
3453errno_t
3454ifnet_enqueue_mbuf(struct ifnet *ifp, struct mbuf *m, boolean_t flush,
3455 boolean_t *pdrop)
3456{
3457 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) ||
3458 m->m_nextpkt != NULL) {
3459 if (m != NULL) {
3460 m_freem_list(m);
3461 *pdrop = TRUE;
3462 }
3463 return (EINVAL);
3464 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3465 !IF_FULLY_ATTACHED(ifp)) {
3466 /* flag tested without lock for performance */
3467 m_freem(m);
3468 *pdrop = TRUE;
3469 return (ENXIO);
3470 } else if (!(ifp->if_flags & IFF_UP)) {
3471 m_freem(m);
3472 *pdrop = TRUE;
3473 return (ENETDOWN);
3474 }
3475
3476 return (ifnet_enqueue_common(ifp, m, QP_MBUF, flush, pdrop));
3477}
3478
3479
3480errno_t
3481ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp)
3482{
3483 errno_t rc;
3484 classq_pkt_type_t ptype;
3485 if (ifp == NULL || mp == NULL)
3486 return (EINVAL);
3487 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3488 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3489 return (ENXIO);
3490 if (!ifnet_is_attached(ifp, 1))
3491 return (ENXIO);
3492
3493 rc = ifclassq_dequeue(&ifp->if_snd, 1, CLASSQ_DEQUEUE_MAX_BYTE_LIMIT,
3494 (void **)mp, NULL, NULL, NULL, &ptype);
3495 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
3496 ifnet_decr_iorefcnt(ifp);
3497
3498 return (rc);
3499}
3500
3501errno_t
3502ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc,
3503 struct mbuf **mp)
3504{
3505 errno_t rc;
3506 classq_pkt_type_t ptype;
3507 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc))
3508 return (EINVAL);
3509 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3510 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3511 return (ENXIO);
3512 if (!ifnet_is_attached(ifp, 1))
3513 return (ENXIO);
3514
3515 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, 1,
3516 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)mp, NULL, NULL,
3517 NULL, &ptype);
3518 VERIFY((*mp == NULL) || (ptype == QP_MBUF));
3519 ifnet_decr_iorefcnt(ifp);
3520 return (rc);
3521}
3522
3523errno_t
3524ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t pkt_limit,
3525 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3526{
3527 errno_t rc;
3528 classq_pkt_type_t ptype;
3529 if (ifp == NULL || head == NULL || pkt_limit < 1)
3530 return (EINVAL);
3531 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3532 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3533 return (ENXIO);
3534 if (!ifnet_is_attached(ifp, 1))
3535 return (ENXIO);
3536
3537 rc = ifclassq_dequeue(&ifp->if_snd, pkt_limit,
3538 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head, (void **)tail, cnt,
3539 len, &ptype);
3540 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3541 ifnet_decr_iorefcnt(ifp);
3542 return (rc);
3543}
3544
3545errno_t
3546ifnet_dequeue_multi_bytes(struct ifnet *ifp, u_int32_t byte_limit,
3547 struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, u_int32_t *len)
3548{
3549 errno_t rc;
3550 classq_pkt_type_t ptype;
3551 if (ifp == NULL || head == NULL || byte_limit < 1)
3552 return (EINVAL);
3553 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3554 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3555 return (ENXIO);
3556 if (!ifnet_is_attached(ifp, 1))
3557 return (ENXIO);
3558
3559 rc = ifclassq_dequeue(&ifp->if_snd, CLASSQ_DEQUEUE_MAX_PKT_LIMIT,
3560 byte_limit, (void **)head, (void **)tail, cnt, len, &ptype);
3561 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3562 ifnet_decr_iorefcnt(ifp);
3563 return (rc);
3564}
3565
3566errno_t
3567ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc,
3568 u_int32_t pkt_limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt,
3569 u_int32_t *len)
3570{
3571 errno_t rc;
3572 classq_pkt_type_t ptype;
3573 if (ifp == NULL || head == NULL || pkt_limit < 1 ||
3574 !MBUF_VALID_SC(sc))
3575 return (EINVAL);
3576 else if (!(ifp->if_eflags & IFEF_TXSTART) ||
3577 ifp->if_output_sched_model >= IFNET_SCHED_MODEL_MAX)
3578 return (ENXIO);
3579 if (!ifnet_is_attached(ifp, 1))
3580 return (ENXIO);
3581
3582 rc = ifclassq_dequeue_sc(&ifp->if_snd, sc, pkt_limit,
3583 CLASSQ_DEQUEUE_MAX_BYTE_LIMIT, (void **)head,
3584 (void **)tail, cnt, len, &ptype);
3585 VERIFY((*head == NULL) || (ptype == QP_MBUF));
3586 ifnet_decr_iorefcnt(ifp);
3587 return (rc);
3588}
3589
3590#if !CONFIG_EMBEDDED
3591errno_t
3592ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m,
3593 const struct sockaddr *dest, const char *dest_linkaddr,
3594 const char *frame_type, u_int32_t *pre, u_int32_t *post)
3595{
3596 if (pre != NULL)
3597 *pre = 0;
3598 if (post != NULL)
3599 *post = 0;
3600
3601 return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type));
3602}
3603#endif /* !CONFIG_EMBEDDED */
3604
3605static int
3606dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p,
3607 char **frame_header_p, protocol_family_t protocol_family)
3608{
3609 struct ifnet_filter *filter;
3610
3611 /*
3612 * Pass the inbound packet to the interface filters
3613 */
3614 lck_mtx_lock_spin(&ifp->if_flt_lock);
3615 /* prevent filter list from changing in case we drop the lock */
3616 if_flt_monitor_busy(ifp);
3617 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3618 int result;
3619
3620 if (!filter->filt_skip && filter->filt_input != NULL &&
3621 (filter->filt_protocol == 0 ||
3622 filter->filt_protocol == protocol_family)) {
3623 lck_mtx_unlock(&ifp->if_flt_lock);
3624
3625 result = (*filter->filt_input)(filter->filt_cookie,
3626 ifp, protocol_family, m_p, frame_header_p);
3627
3628 lck_mtx_lock_spin(&ifp->if_flt_lock);
3629 if (result != 0) {
3630 /* we're done with the filter list */
3631 if_flt_monitor_unbusy(ifp);
3632 lck_mtx_unlock(&ifp->if_flt_lock);
3633 return (result);
3634 }
3635 }
3636 }
3637 /* we're done with the filter list */
3638 if_flt_monitor_unbusy(ifp);
3639 lck_mtx_unlock(&ifp->if_flt_lock);
3640
3641 /*
3642 * Strip away M_PROTO1 bit prior to sending packet up the stack as
3643 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1
3644 */
3645 if (*m_p != NULL)
3646 (*m_p)->m_flags &= ~M_PROTO1;
3647
3648 return (0);
3649}
3650
3651static int
3652dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p,
3653 protocol_family_t protocol_family)
3654{
3655 struct ifnet_filter *filter;
3656
3657 /*
3658 * Pass the outbound packet to the interface filters
3659 */
3660 lck_mtx_lock_spin(&ifp->if_flt_lock);
3661 /* prevent filter list from changing in case we drop the lock */
3662 if_flt_monitor_busy(ifp);
3663 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
3664 int result;
3665
3666 if (!filter->filt_skip && filter->filt_output != NULL &&
3667 (filter->filt_protocol == 0 ||
3668 filter->filt_protocol == protocol_family)) {
3669 lck_mtx_unlock(&ifp->if_flt_lock);
3670
3671 result = filter->filt_output(filter->filt_cookie, ifp,
3672 protocol_family, m_p);
3673
3674 lck_mtx_lock_spin(&ifp->if_flt_lock);
3675 if (result != 0) {
3676 /* we're done with the filter list */
3677 if_flt_monitor_unbusy(ifp);
3678 lck_mtx_unlock(&ifp->if_flt_lock);
3679 return (result);
3680 }
3681 }
3682 }
3683 /* we're done with the filter list */
3684 if_flt_monitor_unbusy(ifp);
3685 lck_mtx_unlock(&ifp->if_flt_lock);
3686
3687 return (0);
3688}
3689
3690static void
3691dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m)
3692{
3693 int error;
3694
3695 if (ifproto->proto_kpi == kProtoKPI_v1) {
3696 /* Version 1 protocols get one packet at a time */
3697 while (m != NULL) {
3698 char * frame_header;
3699 mbuf_t next_packet;
3700
3701 next_packet = m->m_nextpkt;
3702 m->m_nextpkt = NULL;
3703 frame_header = m->m_pkthdr.pkt_hdr;
3704 m->m_pkthdr.pkt_hdr = NULL;
3705 error = (*ifproto->kpi.v1.input)(ifproto->ifp,
3706 ifproto->protocol_family, m, frame_header);
3707 if (error != 0 && error != EJUSTRETURN)
3708 m_freem(m);
3709 m = next_packet;
3710 }
3711 } else if (ifproto->proto_kpi == kProtoKPI_v2) {
3712 /* Version 2 protocols support packet lists */
3713 error = (*ifproto->kpi.v2.input)(ifproto->ifp,
3714 ifproto->protocol_family, m);
3715 if (error != 0 && error != EJUSTRETURN)
3716 m_freem_list(m);
3717 }
3718}
3719
3720static void
3721dlil_input_stats_add(const struct ifnet_stat_increment_param *s,
3722 struct dlil_threading_info *inp, boolean_t poll)
3723{
3724 struct ifnet_stat_increment_param *d = &inp->stats;
3725
3726 if (s->packets_in != 0)
3727 d->packets_in += s->packets_in;
3728 if (s->bytes_in != 0)
3729 d->bytes_in += s->bytes_in;
3730 if (s->errors_in != 0)
3731 d->errors_in += s->errors_in;
3732
3733 if (s->packets_out != 0)
3734 d->packets_out += s->packets_out;
3735 if (s->bytes_out != 0)
3736 d->bytes_out += s->bytes_out;
3737 if (s->errors_out != 0)
3738 d->errors_out += s->errors_out;
3739
3740 if (s->collisions != 0)
3741 d->collisions += s->collisions;
3742 if (s->dropped != 0)
3743 d->dropped += s->dropped;
3744
3745 if (poll)
3746 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in);
3747}
3748
3749static void
3750dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp)
3751{
3752 struct ifnet_stat_increment_param *s = &inp->stats;
3753
3754 /*
3755 * Use of atomic operations is unavoidable here because
3756 * these stats may also be incremented elsewhere via KPIs.
3757 */
3758 if (s->packets_in != 0) {
3759 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in);
3760 s->packets_in = 0;
3761 }
3762 if (s->bytes_in != 0) {
3763 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in);
3764 s->bytes_in = 0;
3765 }
3766 if (s->errors_in != 0) {
3767 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in);
3768 s->errors_in = 0;
3769 }
3770
3771 if (s->packets_out != 0) {
3772 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out);
3773 s->packets_out = 0;
3774 }
3775 if (s->bytes_out != 0) {
3776 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out);
3777 s->bytes_out = 0;
3778 }
3779 if (s->errors_out != 0) {
3780 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out);
3781 s->errors_out = 0;
3782 }
3783
3784 if (s->collisions != 0) {
3785 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions);
3786 s->collisions = 0;
3787 }
3788 if (s->dropped != 0) {
3789 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped);
3790 s->dropped = 0;
3791 }
3792
3793 if (ifp->if_data_threshold != 0) {
3794 lck_mtx_convert_spin(&inp->input_lck);
3795 ifnet_notify_data_threshold(ifp);
3796 }
3797
3798 /*
3799 * No need for atomic operations as they are modified here
3800 * only from within the DLIL input thread context.
3801 */
3802 if (inp->tstats.packets != 0) {
3803 inp->pstats.ifi_poll_packets += inp->tstats.packets;
3804 inp->tstats.packets = 0;
3805 }
3806 if (inp->tstats.bytes != 0) {
3807 inp->pstats.ifi_poll_bytes += inp->tstats.bytes;
3808 inp->tstats.bytes = 0;
3809 }
3810}
3811
3812__private_extern__ void
3813dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m)
3814{
3815 return (dlil_input_packet_list_common(ifp, m, 0,
3816 IFNET_MODEL_INPUT_POLL_OFF, FALSE));
3817}
3818
3819__private_extern__ void
3820dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m,
3821 u_int32_t cnt, ifnet_model_t mode)
3822{
3823 return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE));
3824}
3825
3826static void
3827dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m,
3828 u_int32_t cnt, ifnet_model_t mode, boolean_t ext)
3829{
3830 int error = 0;
3831 protocol_family_t protocol_family;
3832 mbuf_t next_packet;
3833 ifnet_t ifp = ifp_param;
3834 char *frame_header = NULL;
3835 struct if_proto *last_ifproto = NULL;
3836 mbuf_t pkt_first = NULL;
3837 mbuf_t *pkt_next = NULL;
3838 u_int32_t poll_thresh = 0, poll_ival = 0;
3839
3840 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
3841
3842 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 &&
3843 (poll_ival = if_rxpoll_interval_pkts) > 0)
3844 poll_thresh = cnt;
3845
3846 while (m != NULL) {
3847 struct if_proto *ifproto = NULL;
3848 int iorefcnt = 0;
3849 uint32_t pktf_mask; /* pkt flags to preserve */
3850
3851 if (ifp_param == NULL)
3852 ifp = m->m_pkthdr.rcvif;
3853
3854 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 &&
3855 poll_ival > 0 && (--poll_thresh % poll_ival) == 0)
3856 ifnet_poll(ifp);
3857
3858 /* Check if this mbuf looks valid */
3859 MBUF_INPUT_CHECK(m, ifp);
3860
3861 next_packet = m->m_nextpkt;
3862 m->m_nextpkt = NULL;
3863 frame_header = m->m_pkthdr.pkt_hdr;
3864 m->m_pkthdr.pkt_hdr = NULL;
3865
3866 /*
3867 * Get an IO reference count if the interface is not
3868 * loopback (lo0) and it is attached; lo0 never goes
3869 * away, so optimize for that.
3870 */
3871 if (ifp != lo_ifp) {
3872 if (!ifnet_is_attached(ifp, 1)) {
3873 m_freem(m);
3874 goto next;
3875 }
3876 iorefcnt = 1;
3877 /*
3878 * Preserve the time stamp if it was set.
3879 */
3880 pktf_mask = PKTF_TS_VALID;
3881 } else {
3882 /*
3883 * If this arrived on lo0, preserve interface addr
3884 * info to allow for connectivity between loopback
3885 * and local interface addresses.
3886 */
3887 pktf_mask = (PKTF_LOOP|PKTF_IFAINFO);
3888 }
3889
3890 /* make sure packet comes in clean */
3891 m_classifier_init(m, pktf_mask);
3892
3893 ifp_inc_traffic_class_in(ifp, m);
3894
3895 /* find which protocol family this packet is for */
3896 ifnet_lock_shared(ifp);
3897 error = (*ifp->if_demux)(ifp, m, frame_header,
3898 &protocol_family);
3899 ifnet_lock_done(ifp);
3900 if (error != 0) {
3901 if (error == EJUSTRETURN)
3902 goto next;
3903 protocol_family = 0;
3904 }
3905
3906 pktap_input(ifp, protocol_family, m, frame_header);
3907
3908 /* Drop v4 packets received on CLAT46 enabled interface */
3909 if (protocol_family == PF_INET && IS_INTF_CLAT46(ifp)) {
3910 m_freem(m);
3911 ip6stat.ip6s_clat464_in_v4_drop++;
3912 goto next;
3913 }
3914
3915 /* Translate the packet if it is received on CLAT interface */
3916 if (protocol_family == PF_INET6 && IS_INTF_CLAT46(ifp)
3917 && dlil_is_clat_needed(protocol_family, m)) {
3918 char *data = NULL;
3919 struct ether_header eh;
3920 struct ether_header *ehp = NULL;
3921
3922 if (ifp->if_type == IFT_ETHER) {
3923 ehp = (struct ether_header *)(void *)frame_header;
3924 /* Skip RX Ethernet packets if they are not IPV6 */
3925 if (ntohs(ehp->ether_type) != ETHERTYPE_IPV6)
3926 goto skip_clat;
3927
3928 /* Keep a copy of frame_header for Ethernet packets */
3929 bcopy(frame_header, (caddr_t)&eh, ETHER_HDR_LEN);
3930 }
3931 error = dlil_clat64(ifp, &protocol_family, &m);
3932 data = (char *) mbuf_data(m);
3933 if (error != 0) {
3934 m_freem(m);
3935 ip6stat.ip6s_clat464_in_drop++;
3936 goto next;
3937 }
3938 /* Native v6 should be No-op */
3939 if (protocol_family != PF_INET)
3940 goto skip_clat;
3941
3942 /* Do this only for translated v4 packets. */
3943 switch (ifp->if_type) {
3944 case IFT_CELLULAR:
3945 frame_header = data;
3946 break;
3947 case IFT_ETHER:
3948 /*
3949 * Drop if the mbuf doesn't have enough
3950 * space for Ethernet header
3951 */
3952 if (M_LEADINGSPACE(m) < ETHER_HDR_LEN) {
3953 m_free(m);
3954 ip6stat.ip6s_clat464_in_drop++;
3955 goto next;
3956 }
3957 /*
3958 * Set the frame_header ETHER_HDR_LEN bytes
3959 * preceeding the data pointer. Change
3960 * the ether_type too.
3961 */
3962 frame_header = data - ETHER_HDR_LEN;
3963 eh.ether_type = htons(ETHERTYPE_IP);
3964 bcopy((caddr_t)&eh, frame_header, ETHER_HDR_LEN);
3965 break;
3966 }
3967 }
3968skip_clat:
3969 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) &&
3970 !(m->m_pkthdr.pkt_flags & PKTF_LOOP))
3971 dlil_input_cksum_dbg(ifp, m, frame_header,
3972 protocol_family);
3973
3974 /*
3975 * For partial checksum offload, we expect the driver to
3976 * set the start offset indicating the start of the span
3977 * that is covered by the hardware-computed checksum;
3978 * adjust this start offset accordingly because the data
3979 * pointer has been advanced beyond the link-layer header.
3980 *
3981 * Don't adjust if the interface is a bridge member, as
3982 * the adjustment will occur from the context of the
3983 * bridge interface during input.
3984 */
3985 if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags &
3986 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
3987 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
3988 int adj;
3989 if (frame_header == NULL ||
3990 frame_header < (char *)mbuf_datastart(m) ||
3991 frame_header > (char *)m->m_data ||
3992 (adj = (m->m_data - frame_header)) >
3993 m->m_pkthdr.csum_rx_start) {
3994 m->m_pkthdr.csum_data = 0;
3995 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
3996 hwcksum_in_invalidated++;
3997 } else {
3998 m->m_pkthdr.csum_rx_start -= adj;
3999 }
4000 }
4001
4002 if (clat_debug)
4003 pktap_input(ifp, protocol_family, m, frame_header);
4004
4005 if (m->m_flags & (M_BCAST|M_MCAST))
4006 atomic_add_64(&ifp->if_imcasts, 1);
4007
4008 /* run interface filters, exclude VLAN packets PR-3586856 */
4009 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
4010 error = dlil_interface_filters_input(ifp, &m,
4011 &frame_header, protocol_family);
4012 if (error != 0) {
4013 if (error != EJUSTRETURN)
4014 m_freem(m);
4015 goto next;
4016 }
4017 }
4018 if (error != 0 || ((m->m_flags & M_PROMISC) != 0)) {
4019 m_freem(m);
4020 goto next;
4021 }
4022
4023 /* Lookup the protocol attachment to this interface */
4024 if (protocol_family == 0) {
4025 ifproto = NULL;
4026 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp &&
4027 (last_ifproto->protocol_family == protocol_family)) {
4028 VERIFY(ifproto == NULL);
4029 ifproto = last_ifproto;
4030 if_proto_ref(last_ifproto);
4031 } else {
4032 VERIFY(ifproto == NULL);
4033 ifnet_lock_shared(ifp);
4034 /* callee holds a proto refcnt upon success */
4035 ifproto = find_attached_proto(ifp, protocol_family);
4036 ifnet_lock_done(ifp);
4037 }
4038 if (ifproto == NULL) {
4039 /* no protocol for this packet, discard */
4040 m_freem(m);
4041 goto next;
4042 }
4043 if (ifproto != last_ifproto) {
4044 if (last_ifproto != NULL) {
4045 /* pass up the list for the previous protocol */
4046 dlil_ifproto_input(last_ifproto, pkt_first);
4047 pkt_first = NULL;
4048 if_proto_free(last_ifproto);
4049 }
4050 last_ifproto = ifproto;
4051 if_proto_ref(ifproto);
4052 }
4053 /* extend the list */
4054 m->m_pkthdr.pkt_hdr = frame_header;
4055 if (pkt_first == NULL) {
4056 pkt_first = m;
4057 } else {
4058 *pkt_next = m;
4059 }
4060 pkt_next = &m->m_nextpkt;
4061
4062next:
4063 if (next_packet == NULL && last_ifproto != NULL) {
4064 /* pass up the last list of packets */
4065 dlil_ifproto_input(last_ifproto, pkt_first);
4066 if_proto_free(last_ifproto);
4067 last_ifproto = NULL;
4068 }
4069 if (ifproto != NULL) {
4070 if_proto_free(ifproto);
4071 ifproto = NULL;
4072 }
4073
4074 m = next_packet;
4075
4076 /* update the driver's multicast filter, if needed */
4077 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
4078 ifp->if_updatemcasts = 0;
4079 if (iorefcnt == 1)
4080 ifnet_decr_iorefcnt(ifp);
4081 }
4082
4083 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4084}
4085
4086errno_t
4087if_mcasts_update(struct ifnet *ifp)
4088{
4089 errno_t err;
4090
4091 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
4092 if (err == EAFNOSUPPORT)
4093 err = 0;
4094 printf("%s: %s %d suspended link-layer multicast membership(s) "
4095 "(err=%d)\n", if_name(ifp),
4096 (err == 0 ? "successfully restored" : "failed to restore"),
4097 ifp->if_updatemcasts, err);
4098
4099 /* just return success */
4100 return (0);
4101}
4102
4103/* If ifp is set, we will increment the generation for the interface */
4104int
4105dlil_post_complete_msg(struct ifnet *ifp, struct kev_msg *event)
4106{
4107 if (ifp != NULL) {
4108 ifnet_increment_generation(ifp);
4109 }
4110
4111#if NECP
4112 necp_update_all_clients();
4113#endif /* NECP */
4114
4115 return (kev_post_msg(event));
4116}
4117
4118__private_extern__ void
4119dlil_post_sifflags_msg(struct ifnet * ifp)
4120{
4121 struct kev_msg ev_msg;
4122 struct net_event_data ev_data;
4123
4124 bzero(&ev_data, sizeof (ev_data));
4125 bzero(&ev_msg, sizeof (ev_msg));
4126 ev_msg.vendor_code = KEV_VENDOR_APPLE;
4127 ev_msg.kev_class = KEV_NETWORK_CLASS;
4128 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
4129 ev_msg.event_code = KEV_DL_SIFFLAGS;
4130 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
4131 ev_data.if_family = ifp->if_family;
4132 ev_data.if_unit = (u_int32_t) ifp->if_unit;
4133 ev_msg.dv[0].data_length = sizeof(struct net_event_data);
4134 ev_msg.dv[0].data_ptr = &ev_data;
4135 ev_msg.dv[1].data_length = 0;
4136 dlil_post_complete_msg(ifp, &ev_msg);
4137}
4138
4139#define TMP_IF_PROTO_ARR_SIZE 10
4140static int
4141dlil_event_internal(struct ifnet *ifp, struct kev_msg *event, bool update_generation)
4142{
4143 struct ifnet_filter *filter = NULL;
4144 struct if_proto *proto = NULL;
4145 int if_proto_count = 0;
4146 struct if_proto **tmp_ifproto_arr = NULL;
4147 struct if_proto *tmp_ifproto_stack_arr[TMP_IF_PROTO_ARR_SIZE] = {NULL};
4148 int tmp_ifproto_arr_idx = 0;
4149 bool tmp_malloc = false;
4150
4151 /*
4152 * Pass the event to the interface filters
4153 */
4154 lck_mtx_lock_spin(&ifp->if_flt_lock);
4155 /* prevent filter list from changing in case we drop the lock */
4156 if_flt_monitor_busy(ifp);
4157 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
4158 if (filter->filt_event != NULL) {
4159 lck_mtx_unlock(&ifp->if_flt_lock);
4160
4161 filter->filt_event(filter->filt_cookie, ifp,
4162 filter->filt_protocol, event);
4163
4164 lck_mtx_lock_spin(&ifp->if_flt_lock);
4165 }
4166 }
4167 /* we're done with the filter list */
4168 if_flt_monitor_unbusy(ifp);
4169 lck_mtx_unlock(&ifp->if_flt_lock);
4170
4171 /* Get an io ref count if the interface is attached */
4172 if (!ifnet_is_attached(ifp, 1))
4173 goto done;
4174
4175 /*
4176 * An embedded tmp_list_entry in if_proto may still get
4177 * over-written by another thread after giving up ifnet lock,
4178 * therefore we are avoiding embedded pointers here.
4179 */
4180 ifnet_lock_shared(ifp);
4181 if_proto_count = dlil_ifp_protolist(ifp, NULL, 0);
4182 if (if_proto_count) {
4183 int i;
4184 VERIFY(ifp->if_proto_hash != NULL);
4185 if (if_proto_count <= TMP_IF_PROTO_ARR_SIZE) {
4186 tmp_ifproto_arr = tmp_ifproto_stack_arr;
4187 } else {
4188 MALLOC(tmp_ifproto_arr, struct if_proto **,
4189 sizeof (*tmp_ifproto_arr) * if_proto_count,
4190 M_TEMP, M_ZERO);
4191 if (tmp_ifproto_arr == NULL) {
4192 ifnet_lock_done(ifp);
4193 goto cleanup;
4194 }
4195 tmp_malloc = true;
4196 }
4197
4198 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
4199 SLIST_FOREACH(proto, &ifp->if_proto_hash[i],
4200 next_hash) {
4201 if_proto_ref(proto);
4202 tmp_ifproto_arr[tmp_ifproto_arr_idx] = proto;
4203 tmp_ifproto_arr_idx++;
4204 }
4205 }
4206 VERIFY(if_proto_count == tmp_ifproto_arr_idx);
4207 }
4208 ifnet_lock_done(ifp);
4209
4210 for (tmp_ifproto_arr_idx = 0; tmp_ifproto_arr_idx < if_proto_count;
4211 tmp_ifproto_arr_idx++) {
4212 proto = tmp_ifproto_arr[tmp_ifproto_arr_idx];
4213 VERIFY(proto != NULL);
4214 proto_media_event eventp =
4215 (proto->proto_kpi == kProtoKPI_v1 ?
4216 proto->kpi.v1.event :
4217 proto->kpi.v2.event);
4218
4219 if (eventp != NULL) {
4220 eventp(ifp, proto->protocol_family,
4221 event);
4222 }
4223 if_proto_free(proto);
4224 }
4225
4226cleanup:
4227 if (tmp_malloc) {
4228 FREE(tmp_ifproto_arr, M_TEMP);
4229 }
4230
4231 /* Pass the event to the interface */
4232 if (ifp->if_event != NULL)
4233 ifp->if_event(ifp, event);
4234
4235 /* Release the io ref count */
4236 ifnet_decr_iorefcnt(ifp);
4237done:
4238 return (dlil_post_complete_msg(update_generation ? ifp : NULL, event));
4239}
4240
4241errno_t
4242ifnet_event(ifnet_t ifp, struct kern_event_msg *event)
4243{
4244 struct kev_msg kev_msg;
4245 int result = 0;
4246
4247 if (ifp == NULL || event == NULL)
4248 return (EINVAL);
4249
4250 bzero(&kev_msg, sizeof (kev_msg));
4251 kev_msg.vendor_code = event->vendor_code;
4252 kev_msg.kev_class = event->kev_class;
4253 kev_msg.kev_subclass = event->kev_subclass;
4254 kev_msg.event_code = event->event_code;
4255 kev_msg.dv[0].data_ptr = &event->event_data[0];
4256 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE;
4257 kev_msg.dv[1].data_length = 0;
4258
4259 result = dlil_event_internal(ifp, &kev_msg, TRUE);
4260
4261 return (result);
4262}
4263
4264#if CONFIG_MACF_NET
4265#include <netinet/ip6.h>
4266#include <netinet/ip.h>
4267static int
4268dlil_get_socket_type(struct mbuf **mp, int family, int raw)
4269{
4270 struct mbuf *m;
4271 struct ip *ip;
4272 struct ip6_hdr *ip6;
4273 int type = SOCK_RAW;
4274
4275 if (!raw) {
4276 switch (family) {
4277 case PF_INET:
4278 m = m_pullup(*mp, sizeof(struct ip));
4279 if (m == NULL)
4280 break;
4281 *mp = m;
4282 ip = mtod(m, struct ip *);
4283 if (ip->ip_p == IPPROTO_TCP)
4284 type = SOCK_STREAM;
4285 else if (ip->ip_p == IPPROTO_UDP)
4286 type = SOCK_DGRAM;
4287 break;
4288 case PF_INET6:
4289 m = m_pullup(*mp, sizeof(struct ip6_hdr));
4290 if (m == NULL)
4291 break;
4292 *mp = m;
4293 ip6 = mtod(m, struct ip6_hdr *);
4294 if (ip6->ip6_nxt == IPPROTO_TCP)
4295 type = SOCK_STREAM;
4296 else if (ip6->ip6_nxt == IPPROTO_UDP)
4297 type = SOCK_DGRAM;
4298 break;
4299 }
4300 }
4301
4302 return (type);
4303}
4304#endif
4305
4306static void
4307dlil_count_chain_len(mbuf_t m, struct chain_len_stats *cls)
4308{
4309 mbuf_t n = m;
4310 int chainlen = 0;
4311
4312 while (n != NULL) {
4313 chainlen++;
4314 n = n->m_next;
4315 }
4316 switch (chainlen) {
4317 case 0:
4318 break;
4319 case 1:
4320 atomic_add_64(&cls->cls_one, 1);
4321 break;
4322 case 2:
4323 atomic_add_64(&cls->cls_two, 1);
4324 break;
4325 case 3:
4326 atomic_add_64(&cls->cls_three, 1);
4327 break;
4328 case 4:
4329 atomic_add_64(&cls->cls_four, 1);
4330 break;
4331 case 5:
4332 default:
4333 atomic_add_64(&cls->cls_five_or_more, 1);
4334 break;
4335 }
4336}
4337
4338/*
4339 * dlil_output
4340 *
4341 * Caller should have a lock on the protocol domain if the protocol
4342 * doesn't support finer grained locking. In most cases, the lock
4343 * will be held from the socket layer and won't be released until
4344 * we return back to the socket layer.
4345 *
4346 * This does mean that we must take a protocol lock before we take
4347 * an interface lock if we're going to take both. This makes sense
4348 * because a protocol is likely to interact with an ifp while it
4349 * is under the protocol lock.
4350 *
4351 * An advisory code will be returned if adv is not null. This
4352 * can be used to provide feedback about interface queues to the
4353 * application.
4354 */
4355errno_t
4356dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist,
4357 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv)
4358{
4359 char *frame_type = NULL;
4360 char *dst_linkaddr = NULL;
4361 int retval = 0;
4362 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4];
4363 char dst_linkaddr_buffer[MAX_LINKADDR * 4];
4364 struct if_proto *proto = NULL;
4365 mbuf_t m = NULL;
4366 mbuf_t send_head = NULL;
4367 mbuf_t *send_tail = &send_head;
4368 int iorefcnt = 0;
4369 u_int32_t pre = 0, post = 0;
4370 u_int32_t fpkts = 0, fbytes = 0;
4371 int32_t flen = 0;
4372 struct timespec now;
4373 u_int64_t now_nsec;
4374 boolean_t did_clat46 = FALSE;
4375 protocol_family_t old_proto_family = proto_family;
4376 struct rtentry *rt = NULL;
4377
4378 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0);
4379
4380 /*
4381 * Get an io refcnt if the interface is attached to prevent ifnet_detach
4382 * from happening while this operation is in progress
4383 */
4384 if (!ifnet_is_attached(ifp, 1)) {
4385 retval = ENXIO;
4386 goto cleanup;
4387 }
4388 iorefcnt = 1;
4389
4390 VERIFY(ifp->if_output_dlil != NULL);
4391
4392 /* update the driver's multicast filter, if needed */
4393 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0)
4394 ifp->if_updatemcasts = 0;
4395
4396 frame_type = frame_type_buffer;
4397 dst_linkaddr = dst_linkaddr_buffer;
4398
4399 if (raw == 0) {
4400 ifnet_lock_shared(ifp);
4401 /* callee holds a proto refcnt upon success */
4402 proto = find_attached_proto(ifp, proto_family);
4403 if (proto == NULL) {
4404 ifnet_lock_done(ifp);
4405 retval = ENXIO;
4406 goto cleanup;
4407 }
4408 ifnet_lock_done(ifp);
4409 }
4410
4411preout_again:
4412 if (packetlist == NULL)
4413 goto cleanup;
4414
4415 m = packetlist;
4416 packetlist = packetlist->m_nextpkt;
4417 m->m_nextpkt = NULL;
4418
4419 /*
4420 * Perform address family translation for the first
4421 * packet outside the loop in order to perform address
4422 * lookup for the translated proto family.
4423 */
4424 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
4425 (ifp->if_type == IFT_CELLULAR ||
4426 dlil_is_clat_needed(proto_family, m))) {
4427 retval = dlil_clat46(ifp, &proto_family, &m);
4428 /*
4429 * Go to the next packet if translation fails
4430 */
4431 if (retval != 0) {
4432 m_freem(m);
4433 m = NULL;
4434 ip6stat.ip6s_clat464_out_drop++;
4435 /* Make sure that the proto family is PF_INET */
4436 ASSERT(proto_family == PF_INET);
4437 goto preout_again;
4438 }
4439 /*
4440 * Free the old one and make it point to the IPv6 proto structure.
4441 *
4442 * Change proto for the first time we have successfully
4443 * performed address family translation.
4444 */
4445 if (!did_clat46 && proto_family == PF_INET6) {
4446 struct sockaddr_in6 dest6;
4447 did_clat46 = TRUE;
4448
4449 if (proto != NULL)
4450 if_proto_free(proto);
4451 ifnet_lock_shared(ifp);
4452 /* callee holds a proto refcnt upon success */
4453 proto = find_attached_proto(ifp, proto_family);
4454 if (proto == NULL) {
4455 ifnet_lock_done(ifp);
4456 retval = ENXIO;
4457 m_freem(m);
4458 m = NULL;
4459 goto cleanup;
4460 }
4461 ifnet_lock_done(ifp);
4462 if (ifp->if_type == IFT_ETHER) {
4463 /* Update the dest to translated v6 address */
4464 dest6.sin6_len = sizeof(struct sockaddr_in6);
4465 dest6.sin6_family = AF_INET6;
4466 dest6.sin6_addr = (mtod(m, struct ip6_hdr *))->ip6_dst;
4467 dest = (const struct sockaddr *)&dest6;
4468
4469 /*
4470 * Lookup route to the translated destination
4471 * Free this route ref during cleanup
4472 */
4473 rt = rtalloc1_scoped((struct sockaddr *)&dest6,
4474 0, 0, ifp->if_index);
4475
4476 route = rt;
4477 }
4478 }
4479 }
4480
4481 /*
4482 * This path gets packet chain going to the same destination.
4483 * The pre output routine is used to either trigger resolution of
4484 * the next hop or retreive the next hop's link layer addressing.
4485 * For ex: ether_inet(6)_pre_output routine.
4486 *
4487 * If the routine returns EJUSTRETURN, it implies that packet has
4488 * been queued, and therefore we have to call preout_again for the
4489 * following packet in the chain.
4490 *
4491 * For errors other than EJUSTRETURN, the current packet is freed
4492 * and the rest of the chain (pointed by packetlist is freed as
4493 * part of clean up.
4494 *
4495 * Else if there is no error the retrieved information is used for
4496 * all the packets in the chain.
4497 */
4498 if (raw == 0) {
4499 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ?
4500 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output);
4501 retval = 0;
4502 if (preoutp != NULL) {
4503 retval = preoutp(ifp, proto_family, &m, dest, route,
4504 frame_type, dst_linkaddr);
4505
4506 if (retval != 0) {
4507 if (retval == EJUSTRETURN)
4508 goto preout_again;
4509 m_freem(m);
4510 m = NULL;
4511 goto cleanup;
4512 }
4513 }
4514 }
4515
4516#if CONFIG_MACF_NET
4517 retval = mac_ifnet_check_transmit(ifp, m, proto_family,
4518 dlil_get_socket_type(&m, proto_family, raw));
4519 if (retval != 0) {
4520 m_freem(m);
4521 goto cleanup;
4522 }
4523#endif
4524
4525 do {
4526 /*
4527 * Perform address family translation if needed.
4528 * For now we only support stateless 4 to 6 translation
4529 * on the out path.
4530 *
4531 * The routine below translates IP header, updates protocol
4532 * checksum and also translates ICMP.
4533 *
4534 * We skip the first packet as it is already translated and
4535 * the proto family is set to PF_INET6.
4536 */
4537 if (proto_family == PF_INET && IS_INTF_CLAT46(ifp) &&
4538 (ifp->if_type == IFT_CELLULAR ||
4539 dlil_is_clat_needed(proto_family, m))) {
4540 retval = dlil_clat46(ifp, &proto_family, &m);
4541 /* Goto the next packet if the translation fails */
4542 if (retval != 0) {
4543 m_freem(m);
4544 m = NULL;
4545 ip6stat.ip6s_clat464_out_drop++;
4546 goto next;
4547 }
4548 }
4549
4550#if CONFIG_DTRACE
4551 if (!raw && proto_family == PF_INET) {
4552 struct ip *ip = mtod(m, struct ip *);
4553 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4554 struct ip *, ip, struct ifnet *, ifp,
4555 struct ip *, ip, struct ip6_hdr *, NULL);
4556
4557 } else if (!raw && proto_family == PF_INET6) {
4558 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
4559 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
4560 struct ip6_hdr *, ip6, struct ifnet *, ifp,
4561 struct ip *, NULL, struct ip6_hdr *, ip6);
4562 }
4563#endif /* CONFIG_DTRACE */
4564
4565 if (raw == 0 && ifp->if_framer != NULL) {
4566 int rcvif_set = 0;
4567
4568 /*
4569 * If this is a broadcast packet that needs to be
4570 * looped back into the system, set the inbound ifp
4571 * to that of the outbound ifp. This will allow
4572 * us to determine that it is a legitimate packet
4573 * for the system. Only set the ifp if it's not
4574 * already set, just to be safe.
4575 */
4576 if ((m->m_flags & (M_BCAST | M_LOOP)) &&
4577 m->m_pkthdr.rcvif == NULL) {
4578 m->m_pkthdr.rcvif = ifp;
4579 rcvif_set = 1;
4580 }
4581
4582 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr,
4583 frame_type, &pre, &post);
4584 if (retval != 0) {
4585 if (retval != EJUSTRETURN)
4586 m_freem(m);
4587 goto next;
4588 }
4589
4590 /*
4591 * For partial checksum offload, adjust the start
4592 * and stuff offsets based on the prepended header.
4593 */
4594 if ((m->m_pkthdr.csum_flags &
4595 (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
4596 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
4597 m->m_pkthdr.csum_tx_stuff += pre;
4598 m->m_pkthdr.csum_tx_start += pre;
4599 }
4600
4601 if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK))
4602 dlil_output_cksum_dbg(ifp, m, pre,
4603 proto_family);
4604
4605 /*
4606 * Clear the ifp if it was set above, and to be
4607 * safe, only if it is still the same as the
4608 * outbound ifp we have in context. If it was
4609 * looped back, then a copy of it was sent to the
4610 * loopback interface with the rcvif set, and we
4611 * are clearing the one that will go down to the
4612 * layer below.
4613 */
4614 if (rcvif_set && m->m_pkthdr.rcvif == ifp)
4615 m->m_pkthdr.rcvif = NULL;
4616 }
4617
4618 /*
4619 * Let interface filters (if any) do their thing ...
4620 */
4621 /* Do not pass VLAN tagged packets to filters PR-3586856 */
4622 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) {
4623 retval = dlil_interface_filters_output(ifp,
4624 &m, proto_family);
4625 if (retval != 0) {
4626 if (retval != EJUSTRETURN)
4627 m_freem(m);
4628 goto next;
4629 }
4630 }
4631 /*
4632 * Strip away M_PROTO1 bit prior to sending packet
4633 * to the driver as this field may be used by the driver
4634 */
4635 m->m_flags &= ~M_PROTO1;
4636
4637 /*
4638 * If the underlying interface is not capable of handling a
4639 * packet whose data portion spans across physically disjoint
4640 * pages, we need to "normalize" the packet so that we pass
4641 * down a chain of mbufs where each mbuf points to a span that
4642 * resides in the system page boundary. If the packet does
4643 * not cross page(s), the following is a no-op.
4644 */
4645 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) {
4646 if ((m = m_normalize(m)) == NULL)
4647 goto next;
4648 }
4649
4650 /*
4651 * If this is a TSO packet, make sure the interface still
4652 * advertise TSO capability.
4653 */
4654 if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) {
4655 retval = EMSGSIZE;
4656 m_freem(m);
4657 goto cleanup;
4658 }
4659
4660 ifp_inc_traffic_class_out(ifp, m);
4661 pktap_output(ifp, proto_family, m, pre, post);
4662
4663 /*
4664 * Count the number of elements in the mbuf chain
4665 */
4666 if (tx_chain_len_count) {
4667 dlil_count_chain_len(m, &tx_chain_len_stats);
4668 }
4669
4670 /*
4671 * Record timestamp; ifnet_enqueue() will use this info
4672 * rather than redoing the work. An optimization could
4673 * involve doing this just once at the top, if there are
4674 * no interface filters attached, but that's probably
4675 * not a big deal.
4676 */
4677 nanouptime(&now);
4678 net_timernsec(&now, &now_nsec);
4679 (void) mbuf_set_timestamp(m, now_nsec, TRUE);
4680
4681 /*
4682 * Discard partial sum information if this packet originated
4683 * from another interface; the packet would already have the
4684 * final checksum and we shouldn't recompute it.
4685 */
4686 if ((m->m_pkthdr.pkt_flags & PKTF_FORWARDED) &&
4687 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID|CSUM_PARTIAL)) ==
4688 (CSUM_DATA_VALID|CSUM_PARTIAL)) {
4689 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4690 m->m_pkthdr.csum_data = 0;
4691 }
4692
4693 /*
4694 * Finally, call the driver.
4695 */
4696 if (ifp->if_eflags & (IFEF_SENDLIST | IFEF_ENQUEUE_MULTI)) {
4697 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4698 flen += (m_pktlen(m) - (pre + post));
4699 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4700 }
4701 *send_tail = m;
4702 send_tail = &m->m_nextpkt;
4703 } else {
4704 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
4705 flen = (m_pktlen(m) - (pre + post));
4706 m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED;
4707 } else {
4708 flen = 0;
4709 }
4710 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4711 0, 0, 0, 0, 0);
4712 retval = (*ifp->if_output_dlil)(ifp, m);
4713 if (retval == EQFULL || retval == EQSUSPENDED) {
4714 if (adv != NULL && adv->code == FADV_SUCCESS) {
4715 adv->code = (retval == EQFULL ?
4716 FADV_FLOW_CONTROLLED :
4717 FADV_SUSPENDED);
4718 }
4719 retval = 0;
4720 }
4721 if (retval == 0 && flen > 0) {
4722 fbytes += flen;
4723 fpkts++;
4724 }
4725 if (retval != 0 && dlil_verbose) {
4726 printf("%s: output error on %s retval = %d\n",
4727 __func__, if_name(ifp),
4728 retval);
4729 }
4730 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END,
4731 0, 0, 0, 0, 0);
4732 }
4733 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4734
4735next:
4736 m = packetlist;
4737 if (m != NULL) {
4738 packetlist = packetlist->m_nextpkt;
4739 m->m_nextpkt = NULL;
4740 }
4741 /* Reset the proto family to old proto family for CLAT */
4742 if (did_clat46)
4743 proto_family = old_proto_family;
4744 } while (m != NULL);
4745
4746 if (send_head != NULL) {
4747 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START,
4748 0, 0, 0, 0, 0);
4749 if (ifp->if_eflags & IFEF_SENDLIST) {
4750 retval = (*ifp->if_output_dlil)(ifp, send_head);
4751 if (retval == EQFULL || retval == EQSUSPENDED) {
4752 if (adv != NULL) {
4753 adv->code = (retval == EQFULL ?
4754 FADV_FLOW_CONTROLLED :
4755 FADV_SUSPENDED);
4756 }
4757 retval = 0;
4758 }
4759 if (retval == 0 && flen > 0) {
4760 fbytes += flen;
4761 fpkts++;
4762 }
4763 if (retval != 0 && dlil_verbose) {
4764 printf("%s: output error on %s retval = %d\n",
4765 __func__, if_name(ifp), retval);
4766 }
4767 } else {
4768 struct mbuf *send_m;
4769 int enq_cnt = 0;
4770 VERIFY(ifp->if_eflags & IFEF_ENQUEUE_MULTI);
4771 while (send_head != NULL) {
4772 send_m = send_head;
4773 send_head = send_m->m_nextpkt;
4774 send_m->m_nextpkt = NULL;
4775 retval = (*ifp->if_output_dlil)(ifp, send_m);
4776 if (retval == EQFULL || retval == EQSUSPENDED) {
4777 if (adv != NULL) {
4778 adv->code = (retval == EQFULL ?
4779 FADV_FLOW_CONTROLLED :
4780 FADV_SUSPENDED);
4781 }
4782 retval = 0;
4783 }
4784 if (retval == 0) {
4785 enq_cnt++;
4786 if (flen > 0)
4787 fpkts++;
4788 }
4789 if (retval != 0 && dlil_verbose) {
4790 printf("%s: output error on %s "
4791 "retval = %d\n",
4792 __func__, if_name(ifp), retval);
4793 }
4794 }
4795 if (enq_cnt > 0) {
4796 fbytes += flen;
4797 ifnet_start(ifp);
4798 }
4799 }
4800 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4801 }
4802
4803 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0);
4804
4805cleanup:
4806 if (fbytes > 0)
4807 ifp->if_fbytes += fbytes;
4808 if (fpkts > 0)
4809 ifp->if_fpackets += fpkts;
4810 if (proto != NULL)
4811 if_proto_free(proto);
4812 if (packetlist) /* if any packets are left, clean up */
4813 mbuf_freem_list(packetlist);
4814 if (retval == EJUSTRETURN)
4815 retval = 0;
4816 if (iorefcnt == 1)
4817 ifnet_decr_iorefcnt(ifp);
4818 if (rt != NULL) {
4819 rtfree(rt);
4820 rt = NULL;
4821 }
4822
4823 return (retval);
4824}
4825
4826/*
4827 * This routine checks if the destination address is not a loopback, link-local,
4828 * multicast or broadcast address.
4829 */
4830static int
4831dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
4832{
4833 int ret = 0;
4834 switch(proto_family) {
4835 case PF_INET: {
4836 struct ip *iph = mtod(m, struct ip *);
4837 if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr)))
4838 ret = 1;
4839 break;
4840 }
4841 case PF_INET6: {
4842 struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
4843 if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
4844 CLAT64_NEEDED(&ip6h->ip6_dst))
4845 ret = 1;
4846 break;
4847 }
4848 }
4849
4850 return (ret);
4851}
4852/*
4853 * @brief This routine translates IPv4 packet to IPv6 packet,
4854 * updates protocol checksum and also translates ICMP for code
4855 * along with inner header translation.
4856 *
4857 * @param ifp Pointer to the interface
4858 * @param proto_family pointer to protocol family. It is updated if function
4859 * performs the translation successfully.
4860 * @param m Pointer to the pointer pointing to the packet. Needed because this
4861 * routine can end up changing the mbuf to a different one.
4862 *
4863 * @return 0 on success or else a negative value.
4864 */
4865static errno_t
4866dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
4867{
4868 VERIFY(*proto_family == PF_INET);
4869 VERIFY(IS_INTF_CLAT46(ifp));
4870
4871 pbuf_t pbuf_store, *pbuf = NULL;
4872 struct ip *iph = NULL;
4873 struct in_addr osrc, odst;
4874 uint8_t proto = 0;
4875 struct in6_ifaddr *ia6_clat_src = NULL;
4876 struct in6_addr *src = NULL;
4877 struct in6_addr dst;
4878 int error = 0;
4879 uint32_t off = 0;
4880 uint64_t tot_len = 0;
4881 uint16_t ip_id_val = 0;
4882 uint16_t ip_frag_off = 0;
4883
4884 boolean_t is_frag = FALSE;
4885 boolean_t is_first_frag = TRUE;
4886 boolean_t is_last_frag = TRUE;
4887
4888 pbuf_init_mbuf(&pbuf_store, *m, ifp);
4889 pbuf = &pbuf_store;
4890 iph = pbuf->pb_data;
4891
4892 osrc = iph->ip_src;
4893 odst = iph->ip_dst;
4894 proto = iph->ip_p;
4895 off = iph->ip_hl << 2;
4896 ip_id_val = iph->ip_id;
4897 ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;
4898
4899 tot_len = ntohs(iph->ip_len);
4900
4901 /*
4902 * For packets that are not first frags
4903 * we only need to adjust CSUM.
4904 * For 4 to 6, Fragmentation header gets appended
4905 * after proto translation.
4906 */
4907 if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
4908 is_frag = TRUE;
4909
4910 /* If the offset is not zero, it is not first frag */
4911 if (ip_frag_off != 0)
4912 is_first_frag = FALSE;
4913
4914 /* If IP_MF is set, then it is not last frag */
4915 if (ntohs(iph->ip_off) & IP_MF)
4916 is_last_frag = FALSE;
4917 }
4918
4919 /*
4920 * Retrive the local IPv6 CLAT46 address reserved for stateless
4921 * translation.
4922 */
4923 ia6_clat_src = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
4924 if (ia6_clat_src == NULL) {
4925 ip6stat.ip6s_clat464_out_nov6addr_drop++;
4926 error = -1;
4927 goto cleanup;
4928 }
4929
4930 src = &ia6_clat_src->ia_addr.sin6_addr;
4931
4932 /*
4933 * Translate IPv4 destination to IPv6 destination by using the
4934 * prefixes learned through prior PLAT discovery.
4935 */
4936 if ((error = nat464_synthesize_ipv6(ifp, &odst, &dst)) != 0) {
4937 ip6stat.ip6s_clat464_out_v6synthfail_drop++;
4938 goto cleanup;
4939 }
4940
4941 /* Translate the IP header part first */
4942 error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
4943 iph->ip_ttl, *src, dst, tot_len) == NT_NAT64) ? 0 : -1;
4944
4945 iph = NULL; /* Invalidate iph as pbuf has been modified */
4946
4947 if (error != 0) {
4948 ip6stat.ip6s_clat464_out_46transfail_drop++;
4949 goto cleanup;
4950 }
4951
4952 /*
4953 * Translate protocol header, update checksum, checksum flags
4954 * and related fields.
4955 */
4956 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
4957 proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;
4958
4959 if (error != 0) {
4960 ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
4961 goto cleanup;
4962 }
4963
4964 /* Now insert the IPv6 fragment header */
4965 if (is_frag) {
4966 error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);
4967
4968 if (error != 0) {
4969 ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
4970 goto cleanup;
4971 }
4972 }
4973
4974cleanup:
4975 if (ia6_clat_src != NULL)
4976 IFA_REMREF(&ia6_clat_src->ia_ifa);
4977
4978 if (pbuf_is_valid(pbuf)) {
4979 *m = pbuf->pb_mbuf;
4980 pbuf->pb_mbuf = NULL;
4981 pbuf_destroy(pbuf);
4982 } else {
4983 error = -1;
4984 ip6stat.ip6s_clat464_out_invalpbuf_drop++;
4985 }
4986
4987 if (error == 0) {
4988 *proto_family = PF_INET6;
4989 ip6stat.ip6s_clat464_out_success++;
4990 }
4991
4992 return (error);
4993}
4994
4995/*
4996 * @brief This routine translates incoming IPv6 to IPv4 packet,
4997 * updates protocol checksum and also translates ICMPv6 outer
4998 * and inner headers
4999 *
5000 * @return 0 on success or else a negative value.
5001 */
5002static errno_t
5003dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
5004{
5005 VERIFY(*proto_family == PF_INET6);
5006 VERIFY(IS_INTF_CLAT46(ifp));
5007
5008 struct ip6_hdr *ip6h = NULL;
5009 struct in6_addr osrc, odst;
5010 uint8_t proto = 0;
5011 struct in6_ifaddr *ia6_clat_dst = NULL;
5012 struct in_ifaddr *ia4_clat_dst = NULL;
5013 struct in_addr *dst = NULL;
5014 struct in_addr src;
5015 int error = 0;
5016 uint32_t off = 0;
5017 u_int64_t tot_len = 0;
5018 uint8_t tos = 0;
5019 boolean_t is_first_frag = TRUE;
5020
5021 /* Incoming mbuf does not contain valid IP6 header */
5022 if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
5023 ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
5024 (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
5025 ip6stat.ip6s_clat464_in_tooshort_drop++;
5026 return (-1);
5027 }
5028
5029 ip6h = mtod(*m, struct ip6_hdr *);
5030 /* Validate that mbuf contains IP payload equal to ip6_plen */
5031 if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
5032 ip6stat.ip6s_clat464_in_tooshort_drop++;
5033 return (-1);
5034 }
5035
5036 osrc = ip6h->ip6_src;
5037 odst = ip6h->ip6_dst;
5038
5039 /*
5040 * Retrieve the local CLAT46 reserved IPv6 address.
5041 * Let the packet pass if we don't find one, as the flag
5042 * may get set before IPv6 configuration has taken place.
5043 */
5044 ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
5045 if (ia6_clat_dst == NULL)
5046 goto done;
5047
5048 /*
5049 * Check if the original dest in the packet is same as the reserved
5050 * CLAT46 IPv6 address
5051 */
5052 if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
5053 pbuf_t pbuf_store, *pbuf = NULL;
5054 pbuf_init_mbuf(&pbuf_store, *m, ifp);
5055 pbuf = &pbuf_store;
5056
5057 /*
5058 * Retrive the local CLAT46 IPv4 address reserved for stateless
5059 * translation.
5060 */
5061 ia4_clat_dst = inifa_ifpclatv4(ifp);
5062 if (ia4_clat_dst == NULL) {
5063 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5064 ip6stat.ip6s_clat464_in_nov4addr_drop++;
5065 error = -1;
5066 goto cleanup;
5067 }
5068 IFA_REMREF(&ia6_clat_dst->ia_ifa);
5069
5070 /* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
5071 dst = &ia4_clat_dst->ia_addr.sin_addr;
5072 if ((error = nat464_synthesize_ipv4(ifp, &osrc, &src)) != 0) {
5073 ip6stat.ip6s_clat464_in_v4synthfail_drop++;
5074 error = -1;
5075 goto cleanup;
5076 }
5077
5078 ip6h = pbuf->pb_data;
5079 off = sizeof(struct ip6_hdr);
5080 proto = ip6h->ip6_nxt;
5081 tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
5082 tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);
5083
5084 /*
5085 * Translate the IP header and update the fragmentation
5086 * header if needed
5087 */
5088 error = (nat464_translate_64(pbuf, off, tos, &proto,
5089 ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
5090 0 : -1;
5091
5092 ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */
5093
5094 if (error != 0) {
5095 ip6stat.ip6s_clat464_in_64transfail_drop++;
5096 goto cleanup;
5097 }
5098
5099 /*
5100 * Translate protocol header, update checksum, checksum flags
5101 * and related fields.
5102 */
5103 error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
5104 (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
5105 NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;
5106
5107 if (error != 0) {
5108 ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
5109 goto cleanup;
5110 }
5111
5112cleanup:
5113 if (ia4_clat_dst != NULL)
5114 IFA_REMREF(&ia4_clat_dst->ia_ifa);
5115
5116 if (pbuf_is_valid(pbuf)) {
5117 *m = pbuf->pb_mbuf;
5118 pbuf->pb_mbuf = NULL;
5119 pbuf_destroy(pbuf);
5120 } else {
5121 error = -1;
5122 ip6stat.ip6s_clat464_in_invalpbuf_drop++;
5123 }
5124
5125 if (error == 0) {
5126 *proto_family = PF_INET;
5127 ip6stat.ip6s_clat464_in_success++;
5128 }
5129 } /* CLAT traffic */
5130
5131done:
5132 return (error);
5133}
5134
5135errno_t
5136ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code,
5137 void *ioctl_arg)
5138{
5139 struct ifnet_filter *filter;
5140 int retval = EOPNOTSUPP;
5141 int result = 0;
5142
5143 if (ifp == NULL || ioctl_code == 0)
5144 return (EINVAL);
5145
5146 /* Get an io ref count if the interface is attached */
5147 if (!ifnet_is_attached(ifp, 1))
5148 return (EOPNOTSUPP);
5149
5150 /*
5151 * Run the interface filters first.
5152 * We want to run all filters before calling the protocol,
5153 * interface family, or interface.
5154 */
5155 lck_mtx_lock_spin(&ifp->if_flt_lock);
5156 /* prevent filter list from changing in case we drop the lock */
5157 if_flt_monitor_busy(ifp);
5158 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) {
5159 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 ||
5160 filter->filt_protocol == proto_fam)) {
5161 lck_mtx_unlock(&ifp->if_flt_lock);
5162
5163 result = filter->filt_ioctl(filter->filt_cookie, ifp,
5164 proto_fam, ioctl_code, ioctl_arg);
5165
5166 lck_mtx_lock_spin(&ifp->if_flt_lock);
5167
5168 /* Only update retval if no one has handled the ioctl */
5169 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
5170 if (result == ENOTSUP)
5171 result = EOPNOTSUPP;
5172 retval = result;
5173 if (retval != 0 && retval != EOPNOTSUPP) {
5174 /* we're done with the filter list */
5175 if_flt_monitor_unbusy(ifp);
5176 lck_mtx_unlock(&ifp->if_flt_lock);
5177 goto cleanup;
5178 }
5179 }
5180 }
5181 }
5182 /* we're done with the filter list */
5183 if_flt_monitor_unbusy(ifp);
5184 lck_mtx_unlock(&ifp->if_flt_lock);
5185
5186 /* Allow the protocol to handle the ioctl */
5187 if (proto_fam != 0) {
5188 struct if_proto *proto;
5189
5190 /* callee holds a proto refcnt upon success */
5191 ifnet_lock_shared(ifp);
5192 proto = find_attached_proto(ifp, proto_fam);
5193 ifnet_lock_done(ifp);
5194 if (proto != NULL) {
5195 proto_media_ioctl ioctlp =
5196 (proto->proto_kpi == kProtoKPI_v1 ?
5197 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl);
5198 result = EOPNOTSUPP;
5199 if (ioctlp != NULL)
5200 result = ioctlp(ifp, proto_fam, ioctl_code,
5201 ioctl_arg);
5202 if_proto_free(proto);
5203
5204 /* Only update retval if no one has handled the ioctl */
5205 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
5206 if (result == ENOTSUP)
5207 result = EOPNOTSUPP;
5208 retval = result;
5209 if (retval && retval != EOPNOTSUPP)
5210 goto cleanup;
5211 }
5212 }
5213 }
5214
5215 /* retval is either 0 or EOPNOTSUPP */
5216
5217 /*
5218 * Let the interface handle this ioctl.
5219 * If it returns EOPNOTSUPP, ignore that, we may have
5220 * already handled this in the protocol or family.
5221 */
5222 if (ifp->if_ioctl)
5223 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg);
5224
5225 /* Only update retval if no one has handled the ioctl */
5226 if (retval == EOPNOTSUPP || result == EJUSTRETURN) {
5227 if (result == ENOTSUP)
5228 result = EOPNOTSUPP;
5229 retval = result;
5230 if (retval && retval != EOPNOTSUPP) {
5231 goto cleanup;
5232 }
5233 }
5234
5235cleanup:
5236 if (retval == EJUSTRETURN)
5237 retval = 0;
5238
5239 ifnet_decr_iorefcnt(ifp);
5240
5241 return (retval);
5242}
5243
5244__private_extern__ errno_t
5245dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback)
5246{
5247 errno_t error = 0;
5248
5249
5250 if (ifp->if_set_bpf_tap) {
5251 /* Get an io reference on the interface if it is attached */
5252 if (!ifnet_is_attached(ifp, 1))
5253 return (ENXIO);
5254 error = ifp->if_set_bpf_tap(ifp, mode, callback);
5255 ifnet_decr_iorefcnt(ifp);
5256 }
5257 return (error);
5258}
5259
5260errno_t
5261dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr,
5262 struct sockaddr *ll_addr, size_t ll_len)
5263{
5264 errno_t result = EOPNOTSUPP;
5265 struct if_proto *proto;
5266 const struct sockaddr *verify;
5267 proto_media_resolve_multi resolvep;
5268
5269 if (!ifnet_is_attached(ifp, 1))
5270 return (result);
5271
5272 bzero(ll_addr, ll_len);
5273
5274 /* Call the protocol first; callee holds a proto refcnt upon success */
5275 ifnet_lock_shared(ifp);
5276 proto = find_attached_proto(ifp, proto_addr->sa_family);
5277 ifnet_lock_done(ifp);
5278 if (proto != NULL) {
5279 resolvep = (proto->proto_kpi == kProtoKPI_v1 ?
5280 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi);
5281 if (resolvep != NULL)
5282 result = resolvep(ifp, proto_addr,
5283 (struct sockaddr_dl *)(void *)ll_addr, ll_len);
5284 if_proto_free(proto);
5285 }
5286
5287 /* Let the interface verify the multicast address */
5288 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) {
5289 if (result == 0)
5290 verify = ll_addr;
5291 else
5292 verify = proto_addr;
5293 result = ifp->if_check_multi(ifp, verify);
5294 }
5295
5296 ifnet_decr_iorefcnt(ifp);
5297 return (result);
5298}
5299
5300__private_extern__ errno_t
5301dlil_send_arp_internal(ifnet_t ifp, u_short arpop,
5302 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
5303 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
5304{
5305 struct if_proto *proto;
5306 errno_t result = 0;
5307
5308 /* callee holds a proto refcnt upon success */
5309 ifnet_lock_shared(ifp);
5310 proto = find_attached_proto(ifp, target_proto->sa_family);
5311 ifnet_lock_done(ifp);
5312 if (proto == NULL) {
5313 result = ENOTSUP;
5314 } else {
5315 proto_media_send_arp arpp;
5316 arpp = (proto->proto_kpi == kProtoKPI_v1 ?
5317 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp);
5318 if (arpp == NULL) {
5319 result = ENOTSUP;
5320 } else {
5321 switch (arpop) {
5322 case ARPOP_REQUEST:
5323 arpstat.txrequests++;
5324 if (target_hw != NULL)
5325 arpstat.txurequests++;
5326 break;
5327 case ARPOP_REPLY:
5328 arpstat.txreplies++;
5329 break;
5330 }
5331 result = arpp(ifp, arpop, sender_hw, sender_proto,
5332 target_hw, target_proto);
5333 }
5334 if_proto_free(proto);
5335 }
5336
5337 return (result);
5338}
5339
5340struct net_thread_marks { };
5341static const struct net_thread_marks net_thread_marks_base = { };
5342
5343__private_extern__ const net_thread_marks_t net_thread_marks_none =
5344 &net_thread_marks_base;
5345
5346__private_extern__ net_thread_marks_t
5347net_thread_marks_push(u_int32_t push)
5348{
5349 static const char *const base = (const void*)&net_thread_marks_base;
5350 u_int32_t pop = 0;
5351
5352 if (push != 0) {
5353 struct uthread *uth = get_bsdthread_info(current_thread());
5354
5355 pop = push & ~uth->uu_network_marks;
5356 if (pop != 0)
5357 uth->uu_network_marks |= pop;
5358 }
5359
5360 return ((net_thread_marks_t)&base[pop]);
5361}
5362
5363__private_extern__ net_thread_marks_t
5364net_thread_unmarks_push(u_int32_t unpush)
5365{
5366 static const char *const base = (const void*)&net_thread_marks_base;
5367 u_int32_t unpop = 0;
5368
5369 if (unpush != 0) {
5370 struct uthread *uth = get_bsdthread_info(current_thread());
5371
5372 unpop = unpush & uth->uu_network_marks;
5373 if (unpop != 0)
5374 uth->uu_network_marks &= ~unpop;
5375 }
5376
5377 return ((net_thread_marks_t)&base[unpop]);
5378}
5379
5380__private_extern__ void
5381net_thread_marks_pop(net_thread_marks_t popx)
5382{
5383 static const char *const base = (const void*)&net_thread_marks_base;
5384 const ptrdiff_t pop = (const char *)popx - (const char *)base;
5385
5386 if (pop != 0) {
5387 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
5388 struct uthread *uth = get_bsdthread_info(current_thread());
5389
5390 VERIFY((pop & ones) == pop);
5391 VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop);
5392 uth->uu_network_marks &= ~pop;
5393 }
5394}
5395
5396__private_extern__ void
5397net_thread_unmarks_pop(net_thread_marks_t unpopx)
5398{
5399 static const char *const base = (const void*)&net_thread_marks_base;
5400 ptrdiff_t unpop = (const char *)unpopx - (const char *)base;
5401
5402 if (unpop != 0) {
5403 static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U;
5404 struct uthread *uth = get_bsdthread_info(current_thread());
5405
5406 VERIFY((unpop & ones) == unpop);
5407 VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0);
5408 uth->uu_network_marks |= unpop;
5409 }
5410}
5411
5412__private_extern__ u_int32_t
5413net_thread_is_marked(u_int32_t check)
5414{
5415 if (check != 0) {
5416 struct uthread *uth = get_bsdthread_info(current_thread());
5417 return (uth->uu_network_marks & check);
5418 }
5419 else
5420 return (0);
5421}
5422
5423__private_extern__ u_int32_t
5424net_thread_is_unmarked(u_int32_t check)
5425{
5426 if (check != 0) {
5427 struct uthread *uth = get_bsdthread_info(current_thread());
5428 return (~uth->uu_network_marks & check);
5429 }
5430 else
5431 return (0);
5432}
5433
5434static __inline__ int
5435_is_announcement(const struct sockaddr_in * sender_sin,
5436 const struct sockaddr_in * target_sin)
5437{
5438 if (sender_sin == NULL) {
5439 return (FALSE);
5440 }
5441 return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr);
5442}
5443
5444__private_extern__ errno_t
5445dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw,
5446 const struct sockaddr *sender_proto, const struct sockaddr_dl *target_hw,
5447 const struct sockaddr *target_proto0, u_int32_t rtflags)
5448{
5449 errno_t result = 0;
5450 const struct sockaddr_in * sender_sin;
5451 const struct sockaddr_in * target_sin;
5452 struct sockaddr_inarp target_proto_sinarp;
5453 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0;
5454
5455 if (target_proto == NULL || (sender_proto != NULL &&
5456 sender_proto->sa_family != target_proto->sa_family))
5457 return (EINVAL);
5458
5459 /*
5460 * If the target is a (default) router, provide that
5461 * information to the send_arp callback routine.
5462 */
5463 if (rtflags & RTF_ROUTER) {
5464 bcopy(target_proto, &target_proto_sinarp,
5465 sizeof (struct sockaddr_in));
5466 target_proto_sinarp.sin_other |= SIN_ROUTER;
5467 target_proto = (struct sockaddr *)&target_proto_sinarp;
5468 }
5469
5470 /*
5471 * If this is an ARP request and the target IP is IPv4LL,
5472 * send the request on all interfaces. The exception is
5473 * an announcement, which must only appear on the specific
5474 * interface.
5475 */
5476 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto;
5477 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto;
5478 if (target_proto->sa_family == AF_INET &&
5479 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) &&
5480 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST &&
5481 !_is_announcement(target_sin, sender_sin)) {
5482 ifnet_t *ifp_list;
5483 u_int32_t count;
5484 u_int32_t ifp_on;
5485
5486 result = ENOTSUP;
5487
5488 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) {
5489 for (ifp_on = 0; ifp_on < count; ifp_on++) {
5490 errno_t new_result;
5491 ifaddr_t source_hw = NULL;
5492 ifaddr_t source_ip = NULL;
5493 struct sockaddr_in source_ip_copy;
5494 struct ifnet *cur_ifp = ifp_list[ifp_on];
5495
5496 /*
5497 * Only arp on interfaces marked for IPv4LL
5498 * ARPing. This may mean that we don't ARP on
5499 * the interface the subnet route points to.
5500 */
5501 if (!(cur_ifp->if_eflags & IFEF_ARPLL))
5502 continue;
5503
5504 /* Find the source IP address */
5505 ifnet_lock_shared(cur_ifp);
5506 source_hw = cur_ifp->if_lladdr;
5507 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead,
5508 ifa_link) {
5509 IFA_LOCK(source_ip);
5510 if (source_ip->ifa_addr != NULL &&
5511 source_ip->ifa_addr->sa_family ==
5512 AF_INET) {
5513 /* Copy the source IP address */
5514 source_ip_copy =
5515 *(struct sockaddr_in *)
5516 (void *)source_ip->ifa_addr;
5517 IFA_UNLOCK(source_ip);
5518 break;
5519 }
5520 IFA_UNLOCK(source_ip);
5521 }
5522
5523 /* No IP Source, don't arp */
5524 if (source_ip == NULL) {
5525 ifnet_lock_done(cur_ifp);
5526 continue;
5527 }
5528
5529 IFA_ADDREF(source_hw);
5530 ifnet_lock_done(cur_ifp);
5531
5532 /* Send the ARP */
5533 new_result = dlil_send_arp_internal(cur_ifp,
5534 arpop, (struct sockaddr_dl *)(void *)
5535 source_hw->ifa_addr,
5536 (struct sockaddr *)&source_ip_copy, NULL,
5537 target_proto);
5538
5539 IFA_REMREF(source_hw);
5540 if (result == ENOTSUP) {
5541 result = new_result;
5542 }
5543 }
5544 ifnet_list_free(ifp_list);
5545 }
5546 } else {
5547 result = dlil_send_arp_internal(ifp, arpop, sender_hw,
5548 sender_proto, target_hw, target_proto);
5549 }
5550
5551 return (result);
5552}
5553
5554/*
5555 * Caller must hold ifnet head lock.
5556 */
5557static int
5558ifnet_lookup(struct ifnet *ifp)
5559{
5560 struct ifnet *_ifp;
5561
5562 LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_HELD);
5563 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) {
5564 if (_ifp == ifp)
5565 break;
5566 }
5567 return (_ifp != NULL);
5568}
5569
5570/*
5571 * Caller has to pass a non-zero refio argument to get a
5572 * IO reference count. This will prevent ifnet_detach from
5573 * being called when there are outstanding io reference counts.
5574 */
5575int
5576ifnet_is_attached(struct ifnet *ifp, int refio)
5577{
5578 int ret;
5579
5580 lck_mtx_lock_spin(&ifp->if_ref_lock);
5581 if ((ret = IF_FULLY_ATTACHED(ifp))) {
5582 if (refio > 0)
5583 ifp->if_refio++;
5584 }
5585 lck_mtx_unlock(&ifp->if_ref_lock);
5586
5587 return (ret);
5588}
5589
5590/*
5591 * Caller must ensure the interface is attached; the assumption is that
5592 * there is at least an outstanding IO reference count held already.
5593 * Most callers would call ifnet_is_attached() instead.
5594 */
5595void
5596ifnet_incr_iorefcnt(struct ifnet *ifp)
5597{
5598 lck_mtx_lock_spin(&ifp->if_ref_lock);
5599 VERIFY(IF_FULLY_ATTACHED(ifp));
5600 VERIFY(ifp->if_refio > 0);
5601 ifp->if_refio++;
5602 lck_mtx_unlock(&ifp->if_ref_lock);
5603}
5604
5605void
5606ifnet_decr_iorefcnt(struct ifnet *ifp)
5607{
5608 lck_mtx_lock_spin(&ifp->if_ref_lock);
5609 VERIFY(ifp->if_refio > 0);
5610 VERIFY(ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING));
5611 ifp->if_refio--;
5612
5613 /*
5614 * if there are no more outstanding io references, wakeup the
5615 * ifnet_detach thread if detaching flag is set.
5616 */
5617 if (ifp->if_refio == 0 && (ifp->if_refflags & IFRF_DETACHING))
5618 wakeup(&(ifp->if_refio));
5619
5620 lck_mtx_unlock(&ifp->if_ref_lock);
5621}
5622
5623static void
5624dlil_if_trace(struct dlil_ifnet *dl_if, int refhold)
5625{
5626 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if;
5627 ctrace_t *tr;
5628 u_int32_t idx;
5629 u_int16_t *cnt;
5630
5631 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) {
5632 panic("%s: dl_if %p has no debug structure", __func__, dl_if);
5633 /* NOTREACHED */
5634 }
5635
5636 if (refhold) {
5637 cnt = &dl_if_dbg->dldbg_if_refhold_cnt;
5638 tr = dl_if_dbg->dldbg_if_refhold;
5639 } else {
5640 cnt = &dl_if_dbg->dldbg_if_refrele_cnt;
5641 tr = dl_if_dbg->dldbg_if_refrele;
5642 }
5643
5644 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE;
5645 ctrace_record(&tr[idx]);
5646}
5647
5648errno_t
5649dlil_if_ref(struct ifnet *ifp)
5650{
5651 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5652
5653 if (dl_if == NULL)
5654 return (EINVAL);
5655
5656 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5657 ++dl_if->dl_if_refcnt;
5658 if (dl_if->dl_if_refcnt == 0) {
5659 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
5660 /* NOTREACHED */
5661 }
5662 if (dl_if->dl_if_trace != NULL)
5663 (*dl_if->dl_if_trace)(dl_if, TRUE);
5664 lck_mtx_unlock(&dl_if->dl_if_lock);
5665
5666 return (0);
5667}
5668
5669errno_t
5670dlil_if_free(struct ifnet *ifp)
5671{
5672 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
5673 bool need_release = FALSE;
5674
5675 if (dl_if == NULL)
5676 return (EINVAL);
5677
5678 lck_mtx_lock_spin(&dl_if->dl_if_lock);
5679 switch (dl_if->dl_if_refcnt) {
5680 case 0:
5681 panic("%s: negative refcnt for ifp=%p", __func__, ifp);
5682 /* NOTREACHED */
5683 break;
5684 case 1:
5685 if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
5686 need_release = TRUE;
5687 }
5688 break;
5689 default:
5690 break;
5691 }
5692 --dl_if->dl_if_refcnt;
5693 if (dl_if->dl_if_trace != NULL)
5694 (*dl_if->dl_if_trace)(dl_if, FALSE);
5695 lck_mtx_unlock(&dl_if->dl_if_lock);
5696 if (need_release) {
5697 dlil_if_release(ifp);
5698 }
5699 return (0);
5700}
5701
5702static errno_t
5703dlil_attach_protocol_internal(struct if_proto *proto,
5704 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count,
5705 uint32_t * proto_count)
5706{
5707 struct kev_dl_proto_data ev_pr_data;
5708 struct ifnet *ifp = proto->ifp;
5709 int retval = 0;
5710 u_int32_t hash_value = proto_hash_value(proto->protocol_family);
5711 struct if_proto *prev_proto;
5712 struct if_proto *_proto;
5713
5714 /* callee holds a proto refcnt upon success */
5715 ifnet_lock_exclusive(ifp);
5716 _proto = find_attached_proto(ifp, proto->protocol_family);
5717 if (_proto != NULL) {
5718 ifnet_lock_done(ifp);
5719 if_proto_free(_proto);
5720 return (EEXIST);
5721 }
5722
5723 /*
5724 * Call family module add_proto routine so it can refine the
5725 * demux descriptors as it wishes.
5726 */
5727 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list,
5728 demux_count);
5729 if (retval) {
5730 ifnet_lock_done(ifp);
5731 return (retval);
5732 }
5733
5734 /*
5735 * Insert the protocol in the hash
5736 */
5737 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]);
5738 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL)
5739 prev_proto = SLIST_NEXT(prev_proto, next_hash);
5740 if (prev_proto)
5741 SLIST_INSERT_AFTER(prev_proto, proto, next_hash);
5742 else
5743 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value],
5744 proto, next_hash);
5745
5746 /* hold a proto refcnt for attach */
5747 if_proto_ref(proto);
5748
5749 /*
5750 * The reserved field carries the number of protocol still attached
5751 * (subject to change)
5752 */
5753 ev_pr_data.proto_family = proto->protocol_family;
5754 ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
5755
5756 ifnet_lock_done(ifp);
5757
5758 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED,
5759 (struct net_event_data *)&ev_pr_data,
5760 sizeof (struct kev_dl_proto_data));
5761 if (proto_count != NULL) {
5762 *proto_count = ev_pr_data.proto_remaining_count;
5763 }
5764 return (retval);
5765}
5766
5767errno_t
5768ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol,
5769 const struct ifnet_attach_proto_param *proto_details)
5770{
5771 int retval = 0;
5772 struct if_proto *ifproto = NULL;
5773 uint32_t proto_count = 0;
5774
5775 ifnet_head_lock_shared();
5776 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5777 retval = EINVAL;
5778 goto end;
5779 }
5780 /* Check that the interface is in the global list */
5781 if (!ifnet_lookup(ifp)) {
5782 retval = ENXIO;
5783 goto end;
5784 }
5785
5786 ifproto = zalloc(dlif_proto_zone);
5787 if (ifproto == NULL) {
5788 retval = ENOMEM;
5789 goto end;
5790 }
5791 bzero(ifproto, dlif_proto_size);
5792
5793 /* refcnt held above during lookup */
5794 ifproto->ifp = ifp;
5795 ifproto->protocol_family = protocol;
5796 ifproto->proto_kpi = kProtoKPI_v1;
5797 ifproto->kpi.v1.input = proto_details->input;
5798 ifproto->kpi.v1.pre_output = proto_details->pre_output;
5799 ifproto->kpi.v1.event = proto_details->event;
5800 ifproto->kpi.v1.ioctl = proto_details->ioctl;
5801 ifproto->kpi.v1.detached = proto_details->detached;
5802 ifproto->kpi.v1.resolve_multi = proto_details->resolve;
5803 ifproto->kpi.v1.send_arp = proto_details->send_arp;
5804
5805 retval = dlil_attach_protocol_internal(ifproto,
5806 proto_details->demux_list, proto_details->demux_count,
5807 &proto_count);
5808
5809end:
5810 if (retval != 0 && retval != EEXIST && ifp != NULL) {
5811 DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n",
5812 if_name(ifp), protocol, retval);
5813 } else {
5814 if (dlil_verbose) {
5815 printf("%s: attached v1 protocol %d (count = %d)\n",
5816 if_name(ifp),
5817 protocol, proto_count);
5818 }
5819 }
5820 ifnet_head_done();
5821 if (retval == 0) {
5822 /*
5823 * A protocol has been attached, mark the interface up.
5824 * This used to be done by configd.KernelEventMonitor, but that
5825 * is inherently prone to races (rdar://problem/30810208).
5826 */
5827 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
5828 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
5829 dlil_post_sifflags_msg(ifp);
5830 } else if (ifproto != NULL) {
5831 zfree(dlif_proto_zone, ifproto);
5832 }
5833 return (retval);
5834}
5835
5836errno_t
5837ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol,
5838 const struct ifnet_attach_proto_param_v2 *proto_details)
5839{
5840 int retval = 0;
5841 struct if_proto *ifproto = NULL;
5842 uint32_t proto_count = 0;
5843
5844 ifnet_head_lock_shared();
5845 if (ifp == NULL || protocol == 0 || proto_details == NULL) {
5846 retval = EINVAL;
5847 goto end;
5848 }
5849 /* Check that the interface is in the global list */
5850 if (!ifnet_lookup(ifp)) {
5851 retval = ENXIO;
5852 goto end;
5853 }
5854
5855 ifproto = zalloc(dlif_proto_zone);
5856 if (ifproto == NULL) {
5857 retval = ENOMEM;
5858 goto end;
5859 }
5860 bzero(ifproto, sizeof(*ifproto));
5861
5862 /* refcnt held above during lookup */
5863 ifproto->ifp = ifp;
5864 ifproto->protocol_family = protocol;
5865 ifproto->proto_kpi = kProtoKPI_v2;
5866 ifproto->kpi.v2.input = proto_details->input;
5867 ifproto->kpi.v2.pre_output = proto_details->pre_output;
5868 ifproto->kpi.v2.event = proto_details->event;
5869 ifproto->kpi.v2.ioctl = proto_details->ioctl;
5870 ifproto->kpi.v2.detached = proto_details->detached;
5871 ifproto->kpi.v2.resolve_multi = proto_details->resolve;
5872 ifproto->kpi.v2.send_arp = proto_details->send_arp;
5873
5874 retval = dlil_attach_protocol_internal(ifproto,
5875 proto_details->demux_list, proto_details->demux_count,
5876 &proto_count);
5877
5878end:
5879 if (retval != 0 && retval != EEXIST && ifp != NULL) {
5880 DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n",
5881 if_name(ifp), protocol, retval);
5882 } else {
5883 if (dlil_verbose) {
5884 printf("%s: attached v2 protocol %d (count = %d)\n",
5885 if_name(ifp),
5886 protocol, proto_count);
5887 }
5888 }
5889 ifnet_head_done();
5890 if (retval == 0) {
5891 /*
5892 * A protocol has been attached, mark the interface up.
5893 * This used to be done by configd.KernelEventMonitor, but that
5894 * is inherently prone to races (rdar://problem/30810208).
5895 */
5896 (void) ifnet_set_flags(ifp, IFF_UP, IFF_UP);
5897 (void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
5898 dlil_post_sifflags_msg(ifp);
5899 } else if (ifproto != NULL) {
5900 zfree(dlif_proto_zone, ifproto);
5901 }
5902 return (retval);
5903}
5904
5905errno_t
5906ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family)
5907{
5908 struct if_proto *proto = NULL;
5909 int retval = 0;
5910
5911 if (ifp == NULL || proto_family == 0) {
5912 retval = EINVAL;
5913 goto end;
5914 }
5915
5916 ifnet_lock_exclusive(ifp);
5917 /* callee holds a proto refcnt upon success */
5918 proto = find_attached_proto(ifp, proto_family);
5919 if (proto == NULL) {
5920 retval = ENXIO;
5921 ifnet_lock_done(ifp);
5922 goto end;
5923 }
5924
5925 /* call family module del_proto */
5926 if (ifp->if_del_proto)
5927 ifp->if_del_proto(ifp, proto->protocol_family);
5928
5929 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)],
5930 proto, if_proto, next_hash);
5931
5932 if (proto->proto_kpi == kProtoKPI_v1) {
5933 proto->kpi.v1.input = ifproto_media_input_v1;
5934 proto->kpi.v1.pre_output = ifproto_media_preout;
5935 proto->kpi.v1.event = ifproto_media_event;
5936 proto->kpi.v1.ioctl = ifproto_media_ioctl;
5937 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi;
5938 proto->kpi.v1.send_arp = ifproto_media_send_arp;
5939 } else {
5940 proto->kpi.v2.input = ifproto_media_input_v2;
5941 proto->kpi.v2.pre_output = ifproto_media_preout;
5942 proto->kpi.v2.event = ifproto_media_event;
5943 proto->kpi.v2.ioctl = ifproto_media_ioctl;
5944 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi;
5945 proto->kpi.v2.send_arp = ifproto_media_send_arp;
5946 }
5947 proto->detached = 1;
5948 ifnet_lock_done(ifp);
5949
5950 if (dlil_verbose) {
5951 printf("%s: detached %s protocol %d\n", if_name(ifp),
5952 (proto->proto_kpi == kProtoKPI_v1) ?
5953 "v1" : "v2", proto_family);
5954 }
5955
5956 /* release proto refcnt held during protocol attach */
5957 if_proto_free(proto);
5958
5959 /*
5960 * Release proto refcnt held during lookup; the rest of
5961 * protocol detach steps will happen when the last proto
5962 * reference is released.
5963 */
5964 if_proto_free(proto);
5965
5966end:
5967 return (retval);
5968}
5969
5970
5971static errno_t
5972ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol,
5973 struct mbuf *packet, char *header)
5974{
5975#pragma unused(ifp, protocol, packet, header)
5976 return (ENXIO);
5977}
5978
5979static errno_t
5980ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol,
5981 struct mbuf *packet)
5982{
5983#pragma unused(ifp, protocol, packet)
5984 return (ENXIO);
5985
5986}
5987
5988static errno_t
5989ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol,
5990 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type,
5991 char *link_layer_dest)
5992{
5993#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest)
5994 return (ENXIO);
5995
5996}
5997
5998static void
5999ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol,
6000 const struct kev_msg *event)
6001{
6002#pragma unused(ifp, protocol, event)
6003}
6004
6005static errno_t
6006ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol,
6007 unsigned long command, void *argument)
6008{
6009#pragma unused(ifp, protocol, command, argument)
6010 return (ENXIO);
6011}
6012
6013static errno_t
6014ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr,
6015 struct sockaddr_dl *out_ll, size_t ll_len)
6016{
6017#pragma unused(ifp, proto_addr, out_ll, ll_len)
6018 return (ENXIO);
6019}
6020
6021static errno_t
6022ifproto_media_send_arp(struct ifnet *ifp, u_short arpop,
6023 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto,
6024 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto)
6025{
6026#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto)
6027 return (ENXIO);
6028}
6029
6030extern int if_next_index(void);
6031extern int tcp_ecn_outbound;
6032
6033errno_t
6034ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr)
6035{
6036 struct ifnet *tmp_if;
6037 struct ifaddr *ifa;
6038 struct if_data_internal if_data_saved;
6039 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6040 struct dlil_threading_info *dl_inp;
6041 u_int32_t sflags = 0;
6042 int err;
6043
6044 if (ifp == NULL)
6045 return (EINVAL);
6046
6047 /*
6048 * Serialize ifnet attach using dlil_ifnet_lock, in order to
6049 * prevent the interface from being configured while it is
6050 * embryonic, as ifnet_head_lock is dropped and reacquired
6051 * below prior to marking the ifnet with IFRF_ATTACHED.
6052 */
6053 dlil_if_lock();
6054 ifnet_head_lock_exclusive();
6055 /* Verify we aren't already on the list */
6056 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) {
6057 if (tmp_if == ifp) {
6058 ifnet_head_done();
6059 dlil_if_unlock();
6060 return (EEXIST);
6061 }
6062 }
6063
6064 lck_mtx_lock_spin(&ifp->if_ref_lock);
6065 if (!(ifp->if_refflags & IFRF_EMBRYONIC)) {
6066 panic_plain("%s: flags mismatch (embryonic not set) ifp=%p",
6067 __func__, ifp);
6068 /* NOTREACHED */
6069 }
6070 lck_mtx_unlock(&ifp->if_ref_lock);
6071
6072 ifnet_lock_exclusive(ifp);
6073
6074 /* Sanity check */
6075 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
6076 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
6077
6078 if (ll_addr != NULL) {
6079 if (ifp->if_addrlen == 0) {
6080 ifp->if_addrlen = ll_addr->sdl_alen;
6081 } else if (ll_addr->sdl_alen != ifp->if_addrlen) {
6082 ifnet_lock_done(ifp);
6083 ifnet_head_done();
6084 dlil_if_unlock();
6085 return (EINVAL);
6086 }
6087 }
6088
6089 /*
6090 * Allow interfaces without protocol families to attach
6091 * only if they have the necessary fields filled out.
6092 */
6093 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) {
6094 DLIL_PRINTF("%s: Attempt to attach interface without "
6095 "family module - %d\n", __func__, ifp->if_family);
6096 ifnet_lock_done(ifp);
6097 ifnet_head_done();
6098 dlil_if_unlock();
6099 return (ENODEV);
6100 }
6101
6102 /* Allocate protocol hash table */
6103 VERIFY(ifp->if_proto_hash == NULL);
6104 ifp->if_proto_hash = zalloc(dlif_phash_zone);
6105 if (ifp->if_proto_hash == NULL) {
6106 ifnet_lock_done(ifp);
6107 ifnet_head_done();
6108 dlil_if_unlock();
6109 return (ENOBUFS);
6110 }
6111 bzero(ifp->if_proto_hash, dlif_phash_size);
6112
6113 lck_mtx_lock_spin(&ifp->if_flt_lock);
6114 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
6115 TAILQ_INIT(&ifp->if_flt_head);
6116 VERIFY(ifp->if_flt_busy == 0);
6117 VERIFY(ifp->if_flt_waiters == 0);
6118 lck_mtx_unlock(&ifp->if_flt_lock);
6119
6120 if (!(dl_if->dl_if_flags & DLIF_REUSE)) {
6121 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs));
6122 LIST_INIT(&ifp->if_multiaddrs);
6123 }
6124
6125 VERIFY(ifp->if_allhostsinm == NULL);
6126 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
6127 TAILQ_INIT(&ifp->if_addrhead);
6128
6129 if (ifp->if_index == 0) {
6130 int idx = if_next_index();
6131
6132 if (idx == -1) {
6133 ifp->if_index = 0;
6134 ifnet_lock_done(ifp);
6135 ifnet_head_done();
6136 dlil_if_unlock();
6137 return (ENOBUFS);
6138 }
6139 ifp->if_index = idx;
6140 }
6141 /* There should not be anything occupying this slot */
6142 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
6143
6144 /* allocate (if needed) and initialize a link address */
6145 ifa = dlil_alloc_lladdr(ifp, ll_addr);
6146 if (ifa == NULL) {
6147 ifnet_lock_done(ifp);
6148 ifnet_head_done();
6149 dlil_if_unlock();
6150 return (ENOBUFS);
6151 }
6152
6153 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL);
6154 ifnet_addrs[ifp->if_index - 1] = ifa;
6155
6156 /* make this address the first on the list */
6157 IFA_LOCK(ifa);
6158 /* hold a reference for ifnet_addrs[] */
6159 IFA_ADDREF_LOCKED(ifa);
6160 /* if_attach_link_ifa() holds a reference for ifa_link */
6161 if_attach_link_ifa(ifp, ifa);
6162 IFA_UNLOCK(ifa);
6163
6164#if CONFIG_MACF_NET
6165 mac_ifnet_label_associate(ifp);
6166#endif
6167
6168 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link);
6169 ifindex2ifnet[ifp->if_index] = ifp;
6170
6171 /* Hold a reference to the underlying dlil_ifnet */
6172 ifnet_reference(ifp);
6173
6174 /* Clear stats (save and restore other fields that we care) */
6175 if_data_saved = ifp->if_data;
6176 bzero(&ifp->if_data, sizeof (ifp->if_data));
6177 ifp->if_data.ifi_type = if_data_saved.ifi_type;
6178 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen;
6179 ifp->if_data.ifi_physical = if_data_saved.ifi_physical;
6180 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen;
6181 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen;
6182 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu;
6183 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate;
6184 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist;
6185 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu;
6186 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu;
6187 ifnet_touch_lastchange(ifp);
6188
6189 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL ||
6190 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED ||
6191 ifp->if_output_sched_model == IFNET_SCHED_MODEL_FQ_CODEL);
6192
6193 /* By default, use SFB and enable flow advisory */
6194 sflags = PKTSCHEDF_QALG_SFB;
6195 if (if_flowadv)
6196 sflags |= PKTSCHEDF_QALG_FLOWCTL;
6197
6198 if (if_delaybased_queue)
6199 sflags |= PKTSCHEDF_QALG_DELAYBASED;
6200
6201 if (ifp->if_output_sched_model ==
6202 IFNET_SCHED_MODEL_DRIVER_MANAGED)
6203 sflags |= PKTSCHEDF_QALG_DRIVER_MANAGED;
6204
6205 /* Initialize transmit queue(s) */
6206 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE));
6207 if (err != 0) {
6208 panic_plain("%s: ifp=%p couldn't initialize transmit queue; "
6209 "err=%d", __func__, ifp, err);
6210 /* NOTREACHED */
6211 }
6212
6213 /* Sanity checks on the input thread storage */
6214 dl_inp = &dl_if->dl_if_inpstorage;
6215 bzero(&dl_inp->stats, sizeof (dl_inp->stats));
6216 VERIFY(dl_inp->input_waiting == 0);
6217 VERIFY(dl_inp->wtot == 0);
6218 VERIFY(dl_inp->ifp == NULL);
6219 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts));
6220 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0);
6221 VERIFY(!dl_inp->net_affinity);
6222 VERIFY(ifp->if_inp == NULL);
6223 VERIFY(dl_inp->input_thr == THREAD_NULL);
6224 VERIFY(dl_inp->wloop_thr == THREAD_NULL);
6225 VERIFY(dl_inp->poll_thr == THREAD_NULL);
6226 VERIFY(dl_inp->tag == 0);
6227 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF);
6228 bzero(&dl_inp->tstats, sizeof (dl_inp->tstats));
6229 bzero(&dl_inp->pstats, sizeof (dl_inp->pstats));
6230 bzero(&dl_inp->sstats, sizeof (dl_inp->sstats));
6231#if IFNET_INPUT_SANITY_CHK
6232 VERIFY(dl_inp->input_mbuf_cnt == 0);
6233#endif /* IFNET_INPUT_SANITY_CHK */
6234
6235 /*
6236 * A specific DLIL input thread is created per Ethernet/cellular
6237 * interface or for an interface which supports opportunistic
6238 * input polling. Pseudo interfaces or other types of interfaces
6239 * use the main input thread instead.
6240 */
6241 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ||
6242 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) {
6243 ifp->if_inp = dl_inp;
6244 err = dlil_create_input_thread(ifp, ifp->if_inp);
6245 if (err != 0) {
6246 panic_plain("%s: ifp=%p couldn't get an input thread; "
6247 "err=%d", __func__, ifp, err);
6248 /* NOTREACHED */
6249 }
6250 }
6251
6252 if (ifp->if_inp != NULL && ifp->if_inp->input_mit_tcall == NULL) {
6253 ifp->if_inp->input_mit_tcall =
6254 thread_call_allocate_with_priority(dlil_mit_tcall_fn,
6255 ifp, THREAD_CALL_PRIORITY_KERNEL);
6256 }
6257
6258 /*
6259 * If the driver supports the new transmit model, calculate flow hash
6260 * and create a workloop starter thread to invoke the if_start callback
6261 * where the packets may be dequeued and transmitted.
6262 */
6263 if (ifp->if_eflags & IFEF_TXSTART) {
6264 ifp->if_flowhash = ifnet_calc_flowhash(ifp);
6265 VERIFY(ifp->if_flowhash != 0);
6266 VERIFY(ifp->if_start_thread == THREAD_NULL);
6267
6268 ifnet_set_start_cycle(ifp, NULL);
6269 ifp->if_start_active = 0;
6270 ifp->if_start_req = 0;
6271 ifp->if_start_flags = 0;
6272 VERIFY(ifp->if_start != NULL);
6273 if ((err = kernel_thread_start(ifnet_start_thread_fn,
6274 ifp, &ifp->if_start_thread)) != KERN_SUCCESS) {
6275 panic_plain("%s: "
6276 "ifp=%p couldn't get a start thread; "
6277 "err=%d", __func__, ifp, err);
6278 /* NOTREACHED */
6279 }
6280 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP,
6281 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
6282 } else {
6283 ifp->if_flowhash = 0;
6284 }
6285
6286 /*
6287 * If the driver supports the new receive model, create a poller
6288 * thread to invoke if_input_poll callback where the packets may
6289 * be dequeued from the driver and processed for reception.
6290 */
6291 if (ifp->if_eflags & IFEF_RXPOLL) {
6292 VERIFY(ifp->if_input_poll != NULL);
6293 VERIFY(ifp->if_input_ctl != NULL);
6294 VERIFY(ifp->if_poll_thread == THREAD_NULL);
6295
6296 ifnet_set_poll_cycle(ifp, NULL);
6297 ifp->if_poll_update = 0;
6298 ifp->if_poll_active = 0;
6299 ifp->if_poll_req = 0;
6300 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp,
6301 &ifp->if_poll_thread)) != KERN_SUCCESS) {
6302 panic_plain("%s: ifp=%p couldn't get a poll thread; "
6303 "err=%d", __func__, ifp, err);
6304 /* NOTREACHED */
6305 }
6306 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP,
6307 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP));
6308 }
6309
6310 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
6311 VERIFY(ifp->if_desc.ifd_len == 0);
6312 VERIFY(ifp->if_desc.ifd_desc != NULL);
6313
6314 /* Record attach PC stacktrace */
6315 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach);
6316
6317 ifp->if_updatemcasts = 0;
6318 if (!LIST_EMPTY(&ifp->if_multiaddrs)) {
6319 struct ifmultiaddr *ifma;
6320 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
6321 IFMA_LOCK(ifma);
6322 if (ifma->ifma_addr->sa_family == AF_LINK ||
6323 ifma->ifma_addr->sa_family == AF_UNSPEC)
6324 ifp->if_updatemcasts++;
6325 IFMA_UNLOCK(ifma);
6326 }
6327
6328 printf("%s: attached with %d suspended link-layer multicast "
6329 "membership(s)\n", if_name(ifp),
6330 ifp->if_updatemcasts);
6331 }
6332
6333 /* Clear logging parameters */
6334 bzero(&ifp->if_log, sizeof (ifp->if_log));
6335
6336 /* Clear foreground/realtime activity timestamps */
6337 ifp->if_fg_sendts = 0;
6338 ifp->if_rt_sendts = 0;
6339
6340 VERIFY(ifp->if_delegated.ifp == NULL);
6341 VERIFY(ifp->if_delegated.type == 0);
6342 VERIFY(ifp->if_delegated.family == 0);
6343 VERIFY(ifp->if_delegated.subfamily == 0);
6344 VERIFY(ifp->if_delegated.expensive == 0);
6345
6346 VERIFY(ifp->if_agentids == NULL);
6347 VERIFY(ifp->if_agentcount == 0);
6348
6349 /* Reset interface state */
6350 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
6351 ifp->if_interface_state.valid_bitmask |=
6352 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
6353 ifp->if_interface_state.interface_availability =
6354 IF_INTERFACE_STATE_INTERFACE_AVAILABLE;
6355
6356 /* Initialize Link Quality Metric (loopback [lo0] is always good) */
6357 if (ifp == lo_ifp) {
6358 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_GOOD;
6359 ifp->if_interface_state.valid_bitmask |=
6360 IF_INTERFACE_STATE_LQM_STATE_VALID;
6361 } else {
6362 ifp->if_interface_state.lqm_state = IFNET_LQM_THRESH_UNKNOWN;
6363 }
6364
6365 /*
6366 * Enable ECN capability on this interface depending on the
6367 * value of ECN global setting
6368 */
6369 if (tcp_ecn_outbound == 2 && !IFNET_IS_CELLULAR(ifp)) {
6370 ifp->if_eflags |= IFEF_ECN_ENABLE;
6371 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
6372 }
6373
6374 /*
6375 * Built-in Cyclops always on policy for WiFi infra
6376 */
6377 if (IFNET_IS_WIFI_INFRA(ifp) && net_qos_policy_wifi_enabled != 0) {
6378 errno_t error;
6379
6380 error = if_set_qosmarking_mode(ifp,
6381 IFRTYPE_QOSMARKING_FASTLANE);
6382 if (error != 0) {
6383 printf("%s if_set_qosmarking_mode(%s) error %d\n",
6384 __func__, ifp->if_xname, error);
6385 } else {
6386 ifp->if_eflags |= IFEF_QOSMARKING_ENABLED;
6387#if (DEVELOPMENT || DEBUG)
6388 printf("%s fastlane enabled on %s\n",
6389 __func__, ifp->if_xname);
6390#endif /* (DEVELOPMENT || DEBUG) */
6391 }
6392 }
6393
6394 ifnet_lock_done(ifp);
6395 ifnet_head_done();
6396
6397
6398 lck_mtx_lock(&ifp->if_cached_route_lock);
6399 /* Enable forwarding cached route */
6400 ifp->if_fwd_cacheok = 1;
6401 /* Clean up any existing cached routes */
6402 ROUTE_RELEASE(&ifp->if_fwd_route);
6403 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
6404 ROUTE_RELEASE(&ifp->if_src_route);
6405 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
6406 ROUTE_RELEASE(&ifp->if_src_route6);
6407 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
6408 lck_mtx_unlock(&ifp->if_cached_route_lock);
6409
6410 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE));
6411
6412 /*
6413 * Allocate and attach IGMPv3/MLDv2 interface specific variables
6414 * and trees; do this before the ifnet is marked as attached.
6415 * The ifnet keeps the reference to the info structures even after
6416 * the ifnet is detached, since the network-layer records still
6417 * refer to the info structures even after that. This also
6418 * makes it possible for them to still function after the ifnet
6419 * is recycled or reattached.
6420 */
6421#if INET
6422 if (IGMP_IFINFO(ifp) == NULL) {
6423 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK);
6424 VERIFY(IGMP_IFINFO(ifp) != NULL);
6425 } else {
6426 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp);
6427 igmp_domifreattach(IGMP_IFINFO(ifp));
6428 }
6429#endif /* INET */
6430#if INET6
6431 if (MLD_IFINFO(ifp) == NULL) {
6432 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK);
6433 VERIFY(MLD_IFINFO(ifp) != NULL);
6434 } else {
6435 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp);
6436 mld_domifreattach(MLD_IFINFO(ifp));
6437 }
6438#endif /* INET6 */
6439
6440 VERIFY(ifp->if_data_threshold == 0);
6441 VERIFY(ifp->if_dt_tcall != NULL);
6442
6443 /*
6444 * Finally, mark this ifnet as attached.
6445 */
6446 lck_mtx_lock(rnh_lock);
6447 ifnet_lock_exclusive(ifp);
6448 lck_mtx_lock_spin(&ifp->if_ref_lock);
6449 ifp->if_refflags = IFRF_ATTACHED; /* clears embryonic */
6450 lck_mtx_unlock(&ifp->if_ref_lock);
6451 if (net_rtref) {
6452 /* boot-args override; enable idle notification */
6453 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY,
6454 IFRF_IDLE_NOTIFY);
6455 } else {
6456 /* apply previous request(s) to set the idle flags, if any */
6457 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags,
6458 ifp->if_idle_new_flags_mask);
6459
6460 }
6461 ifnet_lock_done(ifp);
6462 lck_mtx_unlock(rnh_lock);
6463 dlil_if_unlock();
6464
6465#if PF
6466 /*
6467 * Attach packet filter to this interface, if enabled.
6468 */
6469 pf_ifnet_hook(ifp, 1);
6470#endif /* PF */
6471
6472 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0);
6473
6474 if (dlil_verbose) {
6475 printf("%s: attached%s\n", if_name(ifp),
6476 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : "");
6477 }
6478
6479 return (0);
6480}
6481
6482/*
6483 * Prepare the storage for the first/permanent link address, which must
6484 * must have the same lifetime as the ifnet itself. Although the link
6485 * address gets removed from if_addrhead and ifnet_addrs[] at detach time,
6486 * its location in memory must never change as it may still be referred
6487 * to by some parts of the system afterwards (unfortunate implementation
6488 * artifacts inherited from BSD.)
6489 *
6490 * Caller must hold ifnet lock as writer.
6491 */
6492static struct ifaddr *
6493dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
6494{
6495 struct ifaddr *ifa, *oifa;
6496 struct sockaddr_dl *asdl, *msdl;
6497 char workbuf[IFNAMSIZ*2];
6498 int namelen, masklen, socksize;
6499 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
6500
6501 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
6502 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
6503
6504 namelen = snprintf(workbuf, sizeof (workbuf), "%s",
6505 if_name(ifp));
6506 masklen = offsetof(struct sockaddr_dl, sdl_data[0])
6507 + ((namelen > 0) ? namelen : 0);
6508 socksize = masklen + ifp->if_addrlen;
6509#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
6510 if ((u_int32_t)socksize < sizeof (struct sockaddr_dl))
6511 socksize = sizeof(struct sockaddr_dl);
6512 socksize = ROUNDUP(socksize);
6513#undef ROUNDUP
6514
6515 ifa = ifp->if_lladdr;
6516 if (socksize > DLIL_SDLMAXLEN ||
6517 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
6518 /*
6519 * Rare, but in the event that the link address requires
6520 * more storage space than DLIL_SDLMAXLEN, allocate the
6521 * largest possible storages for address and mask, such
6522 * that we can reuse the same space when if_addrlen grows.
6523 * This same space will be used when if_addrlen shrinks.
6524 */
6525 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
6526 int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN;
6527 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
6528 if (ifa == NULL)
6529 return (NULL);
6530 ifa_lock_init(ifa);
6531 /* Don't set IFD_ALLOC, as this is permanent */
6532 ifa->ifa_debug = IFD_LINK;
6533 }
6534 IFA_LOCK(ifa);
6535 /* address and mask sockaddr_dl locations */
6536 asdl = (struct sockaddr_dl *)(ifa + 1);
6537 bzero(asdl, SOCK_MAXADDRLEN);
6538 msdl = (struct sockaddr_dl *)(void *)
6539 ((char *)asdl + SOCK_MAXADDRLEN);
6540 bzero(msdl, SOCK_MAXADDRLEN);
6541 } else {
6542 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
6543 /*
6544 * Use the storage areas for address and mask within the
6545 * dlil_ifnet structure. This is the most common case.
6546 */
6547 if (ifa == NULL) {
6548 ifa = &dl_if->dl_if_lladdr.ifa;
6549 ifa_lock_init(ifa);
6550 /* Don't set IFD_ALLOC, as this is permanent */
6551 ifa->ifa_debug = IFD_LINK;
6552 }
6553 IFA_LOCK(ifa);
6554 /* address and mask sockaddr_dl locations */
6555 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl;
6556 bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl));
6557 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl;
6558 bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl));
6559 }
6560
6561 /* hold a permanent reference for the ifnet itself */
6562 IFA_ADDREF_LOCKED(ifa);
6563 oifa = ifp->if_lladdr;
6564 ifp->if_lladdr = ifa;
6565
6566 VERIFY(ifa->ifa_debug == IFD_LINK);
6567 ifa->ifa_ifp = ifp;
6568 ifa->ifa_rtrequest = link_rtrequest;
6569 ifa->ifa_addr = (struct sockaddr *)asdl;
6570 asdl->sdl_len = socksize;
6571 asdl->sdl_family = AF_LINK;
6572 if (namelen > 0) {
6573 bcopy(workbuf, asdl->sdl_data, min(namelen,
6574 sizeof (asdl->sdl_data)));
6575 asdl->sdl_nlen = namelen;
6576 } else {
6577 asdl->sdl_nlen = 0;
6578 }
6579 asdl->sdl_index = ifp->if_index;
6580 asdl->sdl_type = ifp->if_type;
6581 if (ll_addr != NULL) {
6582 asdl->sdl_alen = ll_addr->sdl_alen;
6583 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen);
6584 } else {
6585 asdl->sdl_alen = 0;
6586 }
6587 ifa->ifa_netmask = (struct sockaddr *)msdl;
6588 msdl->sdl_len = masklen;
6589 while (namelen > 0)
6590 msdl->sdl_data[--namelen] = 0xff;
6591 IFA_UNLOCK(ifa);
6592
6593 if (oifa != NULL)
6594 IFA_REMREF(oifa);
6595
6596 return (ifa);
6597}
6598
6599static void
6600if_purgeaddrs(struct ifnet *ifp)
6601{
6602#if INET
6603 in_purgeaddrs(ifp);
6604#endif /* INET */
6605#if INET6
6606 in6_purgeaddrs(ifp);
6607#endif /* INET6 */
6608}
6609
6610errno_t
6611ifnet_detach(ifnet_t ifp)
6612{
6613 struct ifnet *delegated_ifp;
6614 struct nd_ifinfo *ndi = NULL;
6615
6616 if (ifp == NULL)
6617 return (EINVAL);
6618
6619 ndi = ND_IFINFO(ifp);
6620 if (NULL != ndi)
6621 ndi->cga_initialized = FALSE;
6622
6623 lck_mtx_lock(rnh_lock);
6624 ifnet_head_lock_exclusive();
6625 ifnet_lock_exclusive(ifp);
6626
6627 /*
6628 * Check to see if this interface has previously triggered
6629 * aggressive protocol draining; if so, decrement the global
6630 * refcnt and clear PR_AGGDRAIN on the route domain if
6631 * there are no more of such an interface around.
6632 */
6633 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0);
6634
6635 lck_mtx_lock_spin(&ifp->if_ref_lock);
6636 if (!(ifp->if_refflags & IFRF_ATTACHED)) {
6637 lck_mtx_unlock(&ifp->if_ref_lock);
6638 ifnet_lock_done(ifp);
6639 ifnet_head_done();
6640 lck_mtx_unlock(rnh_lock);
6641 return (EINVAL);
6642 } else if (ifp->if_refflags & IFRF_DETACHING) {
6643 /* Interface has already been detached */
6644 lck_mtx_unlock(&ifp->if_ref_lock);
6645 ifnet_lock_done(ifp);
6646 ifnet_head_done();
6647 lck_mtx_unlock(rnh_lock);
6648 return (ENXIO);
6649 }
6650 VERIFY(!(ifp->if_refflags & IFRF_EMBRYONIC));
6651 /* Indicate this interface is being detached */
6652 ifp->if_refflags &= ~IFRF_ATTACHED;
6653 ifp->if_refflags |= IFRF_DETACHING;
6654 lck_mtx_unlock(&ifp->if_ref_lock);
6655
6656 if (dlil_verbose) {
6657 printf("%s: detaching\n", if_name(ifp));
6658 }
6659
6660 /* clean up flow control entry object if there's any */
6661 if (ifp->if_eflags & IFEF_TXSTART) {
6662 ifnet_flowadv(ifp->if_flowhash);
6663 }
6664
6665 /* Reset ECN enable/disable flags */
6666 ifp->if_eflags &= ~IFEF_ECN_DISABLE;
6667 ifp->if_eflags &= ~IFEF_ECN_ENABLE;
6668
6669 /* Reset CLAT46 flag */
6670 ifp->if_eflags &= ~IFEF_CLAT46;
6671
6672 /*
6673 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will
6674 * no longer be visible during lookups from this point.
6675 */
6676 VERIFY(ifindex2ifnet[ifp->if_index] == ifp);
6677 TAILQ_REMOVE(&ifnet_head, ifp, if_link);
6678 ifp->if_link.tqe_next = NULL;
6679 ifp->if_link.tqe_prev = NULL;
6680 if (ifp->if_ordered_link.tqe_next != NULL ||
6681 ifp->if_ordered_link.tqe_prev != NULL) {
6682 ifnet_remove_from_ordered_list(ifp);
6683 }
6684 ifindex2ifnet[ifp->if_index] = NULL;
6685
6686 /* 18717626 - reset IFEF_IPV4_ROUTER and IFEF_IPV6_ROUTER */
6687 ifp->if_eflags &= ~(IFEF_IPV4_ROUTER | IFEF_IPV6_ROUTER);
6688
6689 /* Record detach PC stacktrace */
6690 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach);
6691
6692 /* Clear logging parameters */
6693 bzero(&ifp->if_log, sizeof (ifp->if_log));
6694
6695 /* Clear delegated interface info (reference released below) */
6696 delegated_ifp = ifp->if_delegated.ifp;
6697 bzero(&ifp->if_delegated, sizeof (ifp->if_delegated));
6698
6699 /* Reset interface state */
6700 bzero(&ifp->if_interface_state, sizeof(ifp->if_interface_state));
6701
6702 ifnet_lock_done(ifp);
6703 ifnet_head_done();
6704 lck_mtx_unlock(rnh_lock);
6705
6706
6707 /* Release reference held on the delegated interface */
6708 if (delegated_ifp != NULL)
6709 ifnet_release(delegated_ifp);
6710
6711 /* Reset Link Quality Metric (unless loopback [lo0]) */
6712 if (ifp != lo_ifp)
6713 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF, 0);
6714
6715 /* Reset TCP local statistics */
6716 if (ifp->if_tcp_stat != NULL)
6717 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat));
6718
6719 /* Reset UDP local statistics */
6720 if (ifp->if_udp_stat != NULL)
6721 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat));
6722
6723 /* Reset ifnet IPv4 stats */
6724 if (ifp->if_ipv4_stat != NULL)
6725 bzero(ifp->if_ipv4_stat, sizeof(*ifp->if_ipv4_stat));
6726
6727 /* Reset ifnet IPv6 stats */
6728 if (ifp->if_ipv6_stat != NULL)
6729 bzero(ifp->if_ipv6_stat, sizeof(*ifp->if_ipv6_stat));
6730
6731 /* Release memory held for interface link status report */
6732 if (ifp->if_link_status != NULL) {
6733 FREE(ifp->if_link_status, M_TEMP);
6734 ifp->if_link_status = NULL;
6735 }
6736
6737 /* Clear agent IDs */
6738 if (ifp->if_agentids != NULL) {
6739 FREE(ifp->if_agentids, M_NETAGENT);
6740 ifp->if_agentids = NULL;
6741 }
6742 ifp->if_agentcount = 0;
6743
6744
6745 /* Let BPF know we're detaching */
6746 bpfdetach(ifp);
6747
6748 /* Mark the interface as DOWN */
6749 if_down(ifp);
6750
6751 /* Disable forwarding cached route */
6752 lck_mtx_lock(&ifp->if_cached_route_lock);
6753 ifp->if_fwd_cacheok = 0;
6754 lck_mtx_unlock(&ifp->if_cached_route_lock);
6755
6756 /* Disable data threshold and wait for any pending event posting */
6757 ifp->if_data_threshold = 0;
6758 VERIFY(ifp->if_dt_tcall != NULL);
6759 (void) thread_call_cancel_wait(ifp->if_dt_tcall);
6760
6761 /*
6762 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the
6763 * references to the info structures and leave them attached to
6764 * this ifnet.
6765 */
6766#if INET
6767 igmp_domifdetach(ifp);
6768#endif /* INET */
6769#if INET6
6770 mld_domifdetach(ifp);
6771#endif /* INET6 */
6772
6773 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0);
6774
6775 /* Let worker thread take care of the rest, to avoid reentrancy */
6776 dlil_if_lock();
6777 ifnet_detaching_enqueue(ifp);
6778 dlil_if_unlock();
6779
6780 return (0);
6781}
6782
6783static void
6784ifnet_detaching_enqueue(struct ifnet *ifp)
6785{
6786 dlil_if_lock_assert();
6787
6788 ++ifnet_detaching_cnt;
6789 VERIFY(ifnet_detaching_cnt != 0);
6790 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link);
6791 wakeup((caddr_t)&ifnet_delayed_run);
6792}
6793
6794static struct ifnet *
6795ifnet_detaching_dequeue(void)
6796{
6797 struct ifnet *ifp;
6798
6799 dlil_if_lock_assert();
6800
6801 ifp = TAILQ_FIRST(&ifnet_detaching_head);
6802 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL);
6803 if (ifp != NULL) {
6804 VERIFY(ifnet_detaching_cnt != 0);
6805 --ifnet_detaching_cnt;
6806 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link);
6807 ifp->if_detaching_link.tqe_next = NULL;
6808 ifp->if_detaching_link.tqe_prev = NULL;
6809 }
6810 return (ifp);
6811}
6812
6813static int
6814ifnet_detacher_thread_cont(int err)
6815{
6816#pragma unused(err)
6817 struct ifnet *ifp;
6818
6819 for (;;) {
6820 dlil_if_lock_assert();
6821 while (ifnet_detaching_cnt == 0) {
6822 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6823 (PZERO - 1), "ifnet_detacher_cont", 0,
6824 ifnet_detacher_thread_cont);
6825 /* NOTREACHED */
6826 }
6827
6828 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL);
6829
6830 /* Take care of detaching ifnet */
6831 ifp = ifnet_detaching_dequeue();
6832 if (ifp != NULL) {
6833 dlil_if_unlock();
6834 ifnet_detach_final(ifp);
6835 dlil_if_lock();
6836 }
6837 }
6838}
6839
6840static void
6841ifnet_detacher_thread_func(void *v, wait_result_t w)
6842{
6843#pragma unused(v, w)
6844 dlil_if_lock();
6845 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock,
6846 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont);
6847 /*
6848 * msleep0() shouldn't have returned as PCATCH was not set;
6849 * therefore assert in this case.
6850 */
6851 dlil_if_unlock();
6852 VERIFY(0);
6853}
6854
6855static void
6856ifnet_detach_final(struct ifnet *ifp)
6857{
6858 struct ifnet_filter *filter, *filter_next;
6859 struct ifnet_filter_head fhead;
6860 struct dlil_threading_info *inp;
6861 struct ifaddr *ifa;
6862 ifnet_detached_func if_free;
6863 int i;
6864
6865 lck_mtx_lock(&ifp->if_ref_lock);
6866 if (!(ifp->if_refflags & IFRF_DETACHING)) {
6867 panic("%s: flags mismatch (detaching not set) ifp=%p",
6868 __func__, ifp);
6869 /* NOTREACHED */
6870 }
6871
6872 /*
6873 * Wait until the existing IO references get released
6874 * before we proceed with ifnet_detach. This is not a
6875 * common case, so block without using a continuation.
6876 */
6877 while (ifp->if_refio > 0) {
6878 printf("%s: Waiting for IO references on %s interface "
6879 "to be released\n", __func__, if_name(ifp));
6880 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock,
6881 (PZERO - 1), "ifnet_ioref_wait", NULL);
6882 }
6883 lck_mtx_unlock(&ifp->if_ref_lock);
6884
6885 /* Drain and destroy send queue */
6886 ifclassq_teardown(ifp);
6887
6888 /* Detach interface filters */
6889 lck_mtx_lock(&ifp->if_flt_lock);
6890 if_flt_monitor_enter(ifp);
6891
6892 LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
6893 fhead = ifp->if_flt_head;
6894 TAILQ_INIT(&ifp->if_flt_head);
6895
6896 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) {
6897 filter_next = TAILQ_NEXT(filter, filt_next);
6898 lck_mtx_unlock(&ifp->if_flt_lock);
6899
6900 dlil_detach_filter_internal(filter, 1);
6901 lck_mtx_lock(&ifp->if_flt_lock);
6902 }
6903 if_flt_monitor_leave(ifp);
6904 lck_mtx_unlock(&ifp->if_flt_lock);
6905
6906 /* Tell upper layers to drop their network addresses */
6907 if_purgeaddrs(ifp);
6908
6909 ifnet_lock_exclusive(ifp);
6910
6911 /* Uplumb all protocols */
6912 for (i = 0; i < PROTO_HASH_SLOTS; i++) {
6913 struct if_proto *proto;
6914
6915 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6916 while (proto != NULL) {
6917 protocol_family_t family = proto->protocol_family;
6918 ifnet_lock_done(ifp);
6919 proto_unplumb(family, ifp);
6920 ifnet_lock_exclusive(ifp);
6921 proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
6922 }
6923 /* There should not be any protocols left */
6924 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i]));
6925 }
6926 zfree(dlif_phash_zone, ifp->if_proto_hash);
6927 ifp->if_proto_hash = NULL;
6928
6929 /* Detach (permanent) link address from if_addrhead */
6930 ifa = TAILQ_FIRST(&ifp->if_addrhead);
6931 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa);
6932 IFA_LOCK(ifa);
6933 if_detach_link_ifa(ifp, ifa);
6934 IFA_UNLOCK(ifa);
6935
6936 /* Remove (permanent) link address from ifnet_addrs[] */
6937 IFA_REMREF(ifa);
6938 ifnet_addrs[ifp->if_index - 1] = NULL;
6939
6940 /* This interface should not be on {ifnet_head,detaching} */
6941 VERIFY(ifp->if_link.tqe_next == NULL);
6942 VERIFY(ifp->if_link.tqe_prev == NULL);
6943 VERIFY(ifp->if_detaching_link.tqe_next == NULL);
6944 VERIFY(ifp->if_detaching_link.tqe_prev == NULL);
6945 VERIFY(ifp->if_ordered_link.tqe_next == NULL);
6946 VERIFY(ifp->if_ordered_link.tqe_prev == NULL);
6947
6948 /* The slot should have been emptied */
6949 VERIFY(ifindex2ifnet[ifp->if_index] == NULL);
6950
6951 /* There should not be any addresses left */
6952 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead));
6953
6954 /*
6955 * Signal the starter thread to terminate itself.
6956 */
6957 if (ifp->if_start_thread != THREAD_NULL) {
6958 lck_mtx_lock_spin(&ifp->if_start_lock);
6959 ifp->if_start_flags = 0;
6960 ifp->if_start_thread = THREAD_NULL;
6961 wakeup_one((caddr_t)&ifp->if_start_thread);
6962 lck_mtx_unlock(&ifp->if_start_lock);
6963 }
6964
6965 /*
6966 * Signal the poller thread to terminate itself.
6967 */
6968 if (ifp->if_poll_thread != THREAD_NULL) {
6969 lck_mtx_lock_spin(&ifp->if_poll_lock);
6970 ifp->if_poll_thread = THREAD_NULL;
6971 wakeup_one((caddr_t)&ifp->if_poll_thread);
6972 lck_mtx_unlock(&ifp->if_poll_lock);
6973 }
6974
6975 /*
6976 * If thread affinity was set for the workloop thread, we will need
6977 * to tear down the affinity and release the extra reference count
6978 * taken at attach time. Does not apply to lo0 or other interfaces
6979 * without dedicated input threads.
6980 */
6981 if ((inp = ifp->if_inp) != NULL) {
6982 VERIFY(inp != dlil_main_input_thread);
6983
6984 if (inp->net_affinity) {
6985 struct thread *tp, *wtp, *ptp;
6986
6987 lck_mtx_lock_spin(&inp->input_lck);
6988 wtp = inp->wloop_thr;
6989 inp->wloop_thr = THREAD_NULL;
6990 ptp = inp->poll_thr;
6991 inp->poll_thr = THREAD_NULL;
6992 tp = inp->input_thr; /* don't nullify now */
6993 inp->tag = 0;
6994 inp->net_affinity = FALSE;
6995 lck_mtx_unlock(&inp->input_lck);
6996
6997 /* Tear down poll thread affinity */
6998 if (ptp != NULL) {
6999 VERIFY(ifp->if_eflags & IFEF_RXPOLL);
7000 (void) dlil_affinity_set(ptp,
7001 THREAD_AFFINITY_TAG_NULL);
7002 thread_deallocate(ptp);
7003 }
7004
7005 /* Tear down workloop thread affinity */
7006 if (wtp != NULL) {
7007 (void) dlil_affinity_set(wtp,
7008 THREAD_AFFINITY_TAG_NULL);
7009 thread_deallocate(wtp);
7010 }
7011
7012 /* Tear down DLIL input thread affinity */
7013 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL);
7014 thread_deallocate(tp);
7015 }
7016
7017 /* disassociate ifp DLIL input thread */
7018 ifp->if_inp = NULL;
7019
7020 /* tell the input thread to terminate */
7021 lck_mtx_lock_spin(&inp->input_lck);
7022 inp->input_waiting |= DLIL_INPUT_TERMINATE;
7023 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) {
7024 wakeup_one((caddr_t)&inp->input_waiting);
7025 }
7026 lck_mtx_unlock(&inp->input_lck);
7027 ifnet_lock_done(ifp);
7028
7029 /* wait for the input thread to terminate */
7030 lck_mtx_lock_spin(&inp->input_lck);
7031 while ((inp->input_waiting & DLIL_INPUT_TERMINATE_COMPLETE)
7032 == 0) {
7033 (void) msleep(&inp->input_waiting, &inp->input_lck,
7034 (PZERO - 1) | PSPIN, inp->input_name, NULL);
7035 }
7036 lck_mtx_unlock(&inp->input_lck);
7037 ifnet_lock_exclusive(ifp);
7038
7039 /* clean-up input thread state */
7040 dlil_clean_threading_info(inp);
7041
7042 }
7043
7044 /* The driver might unload, so point these to ourselves */
7045 if_free = ifp->if_free;
7046 ifp->if_output_dlil = ifp_if_output;
7047 ifp->if_output = ifp_if_output;
7048 ifp->if_pre_enqueue = ifp_if_output;
7049 ifp->if_start = ifp_if_start;
7050 ifp->if_output_ctl = ifp_if_ctl;
7051 ifp->if_input_dlil = ifp_if_input;
7052 ifp->if_input_poll = ifp_if_input_poll;
7053 ifp->if_input_ctl = ifp_if_ctl;
7054 ifp->if_ioctl = ifp_if_ioctl;
7055 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap;
7056 ifp->if_free = ifp_if_free;
7057 ifp->if_demux = ifp_if_demux;
7058 ifp->if_event = ifp_if_event;
7059 ifp->if_framer_legacy = ifp_if_framer;
7060 ifp->if_framer = ifp_if_framer_extended;
7061 ifp->if_add_proto = ifp_if_add_proto;
7062 ifp->if_del_proto = ifp_if_del_proto;
7063 ifp->if_check_multi = ifp_if_check_multi;
7064
7065 /* wipe out interface description */
7066 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE);
7067 ifp->if_desc.ifd_len = 0;
7068 VERIFY(ifp->if_desc.ifd_desc != NULL);
7069 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE);
7070
7071 /* there shouldn't be any delegation by now */
7072 VERIFY(ifp->if_delegated.ifp == NULL);
7073 VERIFY(ifp->if_delegated.type == 0);
7074 VERIFY(ifp->if_delegated.family == 0);
7075 VERIFY(ifp->if_delegated.subfamily == 0);
7076 VERIFY(ifp->if_delegated.expensive == 0);
7077
7078 /* QoS marking get cleared */
7079 ifp->if_eflags &= ~IFEF_QOSMARKING_ENABLED;
7080 if_set_qosmarking_mode(ifp, IFRTYPE_QOSMARKING_MODE_NONE);
7081
7082
7083 ifnet_lock_done(ifp);
7084
7085#if PF
7086 /*
7087 * Detach this interface from packet filter, if enabled.
7088 */
7089 pf_ifnet_hook(ifp, 0);
7090#endif /* PF */
7091
7092 /* Filter list should be empty */
7093 lck_mtx_lock_spin(&ifp->if_flt_lock);
7094 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head));
7095 VERIFY(ifp->if_flt_busy == 0);
7096 VERIFY(ifp->if_flt_waiters == 0);
7097 lck_mtx_unlock(&ifp->if_flt_lock);
7098
7099 /* Last chance to drain send queue */
7100 if_qflush(ifp, 0);
7101
7102 /* Last chance to cleanup any cached route */
7103 lck_mtx_lock(&ifp->if_cached_route_lock);
7104 VERIFY(!ifp->if_fwd_cacheok);
7105 ROUTE_RELEASE(&ifp->if_fwd_route);
7106 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route));
7107 ROUTE_RELEASE(&ifp->if_src_route);
7108 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route));
7109 ROUTE_RELEASE(&ifp->if_src_route6);
7110 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6));
7111 lck_mtx_unlock(&ifp->if_cached_route_lock);
7112
7113 VERIFY(ifp->if_data_threshold == 0);
7114 VERIFY(ifp->if_dt_tcall != NULL);
7115 VERIFY(!thread_call_isactive(ifp->if_dt_tcall));
7116
7117 ifnet_llreach_ifdetach(ifp);
7118
7119 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0);
7120
7121 /*
7122 * Finally, mark this ifnet as detached.
7123 */
7124 lck_mtx_lock_spin(&ifp->if_ref_lock);
7125 if (!(ifp->if_refflags & IFRF_DETACHING)) {
7126 panic("%s: flags mismatch (detaching not set) ifp=%p",
7127 __func__, ifp);
7128 /* NOTREACHED */
7129 }
7130 ifp->if_refflags &= ~IFRF_DETACHING;
7131 lck_mtx_unlock(&ifp->if_ref_lock);
7132 if (if_free != NULL)
7133 if_free(ifp);
7134
7135 if (dlil_verbose)
7136 printf("%s: detached\n", if_name(ifp));
7137
7138 /* Release reference held during ifnet attach */
7139 ifnet_release(ifp);
7140}
7141
7142errno_t
7143ifp_if_output(struct ifnet *ifp, struct mbuf *m)
7144{
7145#pragma unused(ifp)
7146 m_freem_list(m);
7147 return (0);
7148}
7149
7150void
7151ifp_if_start(struct ifnet *ifp)
7152{
7153 ifnet_purge(ifp);
7154}
7155
7156static errno_t
7157ifp_if_input(struct ifnet *ifp, struct mbuf *m_head,
7158 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s,
7159 boolean_t poll, struct thread *tp)
7160{
7161#pragma unused(ifp, m_tail, s, poll, tp)
7162 m_freem_list(m_head);
7163 return (ENXIO);
7164}
7165
7166static void
7167ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt,
7168 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len)
7169{
7170#pragma unused(ifp, flags, max_cnt)
7171 if (m_head != NULL)
7172 *m_head = NULL;
7173 if (m_tail != NULL)
7174 *m_tail = NULL;
7175 if (cnt != NULL)
7176 *cnt = 0;
7177 if (len != NULL)
7178 *len = 0;
7179}
7180
7181static errno_t
7182ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg)
7183{
7184#pragma unused(ifp, cmd, arglen, arg)
7185 return (EOPNOTSUPP);
7186}
7187
7188static errno_t
7189ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf)
7190{
7191#pragma unused(ifp, fh, pf)
7192 m_freem(m);
7193 return (EJUSTRETURN);
7194}
7195
7196static errno_t
7197ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf,
7198 const struct ifnet_demux_desc *da, u_int32_t dc)
7199{
7200#pragma unused(ifp, pf, da, dc)
7201 return (EINVAL);
7202}
7203
7204static errno_t
7205ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf)
7206{
7207#pragma unused(ifp, pf)
7208 return (EINVAL);
7209}
7210
7211static errno_t
7212ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa)
7213{
7214#pragma unused(ifp, sa)
7215 return (EOPNOTSUPP);
7216}
7217
7218#if CONFIG_EMBEDDED
7219static errno_t
7220ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
7221 const struct sockaddr *sa, const char *ll, const char *t,
7222 u_int32_t *pre, u_int32_t *post)
7223#else
7224static errno_t
7225ifp_if_framer(struct ifnet *ifp, struct mbuf **m,
7226 const struct sockaddr *sa, const char *ll, const char *t)
7227#endif /* !CONFIG_EMBEDDED */
7228{
7229#pragma unused(ifp, m, sa, ll, t)
7230#if CONFIG_EMBEDDED
7231 return (ifp_if_framer_extended(ifp, m, sa, ll, t, pre, post));
7232#else
7233 return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL));
7234#endif /* !CONFIG_EMBEDDED */
7235}
7236
7237static errno_t
7238ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m,
7239 const struct sockaddr *sa, const char *ll, const char *t,
7240 u_int32_t *pre, u_int32_t *post)
7241{
7242#pragma unused(ifp, sa, ll, t)
7243 m_freem(*m);
7244 *m = NULL;
7245
7246 if (pre != NULL)
7247 *pre = 0;
7248 if (post != NULL)
7249 *post = 0;
7250
7251 return (EJUSTRETURN);
7252}
7253
7254errno_t
7255ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg)
7256{
7257#pragma unused(ifp, cmd, arg)
7258 return (EOPNOTSUPP);
7259}
7260
7261static errno_t
7262ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f)
7263{
7264#pragma unused(ifp, tm, f)
7265 /* XXX not sure what to do here */
7266 return (0);
7267}
7268
7269static void
7270ifp_if_free(struct ifnet *ifp)
7271{
7272#pragma unused(ifp)
7273}
7274
7275static void
7276ifp_if_event(struct ifnet *ifp, const struct kev_msg *e)
7277{
7278#pragma unused(ifp, e)
7279}
7280
7281int dlil_if_acquire(u_int32_t family, const void *uniqueid,
7282 size_t uniqueid_len, const char *ifxname, struct ifnet **ifp)
7283{
7284 struct ifnet *ifp1 = NULL;
7285 struct dlil_ifnet *dlifp1 = NULL;
7286 void *buf, *base, **pbuf;
7287 int ret = 0;
7288
7289 VERIFY(*ifp == NULL);
7290 dlil_if_lock();
7291 /*
7292 * We absolutely can't have an interface with the same name
7293 * in in-use state.
7294 * To make sure of that list has to be traversed completely
7295 */
7296 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) {
7297 ifp1 = (struct ifnet *)dlifp1;
7298
7299 if (ifp1->if_family != family)
7300 continue;
7301
7302 /*
7303 * If interface is in use, return EBUSY if either unique id
7304 * or interface extended names are the same
7305 */
7306 lck_mtx_lock(&dlifp1->dl_if_lock);
7307 if (strncmp(ifxname, ifp1->if_xname, IFXNAMSIZ) == 0) {
7308 if (dlifp1->dl_if_flags & DLIF_INUSE) {
7309 lck_mtx_unlock(&dlifp1->dl_if_lock);
7310 ret = EBUSY;
7311 goto end;
7312 }
7313 }
7314
7315 if (uniqueid_len) {
7316 if (uniqueid_len == dlifp1->dl_if_uniqueid_len &&
7317 bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len) == 0) {
7318 if (dlifp1->dl_if_flags & DLIF_INUSE) {
7319 lck_mtx_unlock(&dlifp1->dl_if_lock);
7320 ret = EBUSY;
7321 goto end;
7322 } else {
7323 dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE);
7324 /* Cache the first interface that can be recycled */
7325 if (*ifp == NULL)
7326 *ifp = ifp1;
7327 /*
7328 * XXX Do not break or jump to end as we have to traverse
7329 * the whole list to ensure there are no name collisions
7330 */
7331 }
7332 }
7333 }
7334 lck_mtx_unlock(&dlifp1->dl_if_lock);
7335 }
7336
7337 /* If there's an interface that can be recycled, use that */
7338 if (*ifp != NULL)
7339 goto end;
7340
7341 /* no interface found, allocate a new one */
7342 buf = zalloc(dlif_zone);
7343 if (buf == NULL) {
7344 ret = ENOMEM;
7345 goto end;
7346 }
7347 bzero(buf, dlif_bufsize);
7348
7349 /* Get the 64-bit aligned base address for this object */
7350 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t),
7351 sizeof (u_int64_t));
7352 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize));
7353
7354 /*
7355 * Wind back a pointer size from the aligned base and
7356 * save the original address so we can free it later.
7357 */
7358 pbuf = (void **)((intptr_t)base - sizeof (void *));
7359 *pbuf = buf;
7360 dlifp1 = base;
7361
7362 if (uniqueid_len) {
7363 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len,
7364 M_NKE, M_WAITOK);
7365 if (dlifp1->dl_if_uniqueid == NULL) {
7366 zfree(dlif_zone, buf);
7367 ret = ENOMEM;
7368 goto end;
7369 }
7370 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len);
7371 dlifp1->dl_if_uniqueid_len = uniqueid_len;
7372 }
7373
7374 ifp1 = (struct ifnet *)dlifp1;
7375 dlifp1->dl_if_flags = DLIF_INUSE;
7376 if (ifnet_debug) {
7377 dlifp1->dl_if_flags |= DLIF_DEBUG;
7378 dlifp1->dl_if_trace = dlil_if_trace;
7379 }
7380 ifp1->if_name = dlifp1->dl_if_namestorage;
7381 ifp1->if_xname = dlifp1->dl_if_xnamestorage;
7382
7383 /* initialize interface description */
7384 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE;
7385 ifp1->if_desc.ifd_len = 0;
7386 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage;
7387
7388
7389#if CONFIG_MACF_NET
7390 mac_ifnet_label_init(ifp1);
7391#endif
7392
7393 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) {
7394 DLIL_PRINTF("%s: failed to allocate if local stats, "
7395 "error: %d\n", __func__, ret);
7396 /* This probably shouldn't be fatal */
7397 ret = 0;
7398 }
7399
7400 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr);
7401 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr);
7402 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr);
7403 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr);
7404 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group,
7405 ifnet_lock_attr);
7406 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr);
7407#if INET
7408 lck_rw_init(&ifp1->if_inetdata_lock, ifnet_lock_group,
7409 ifnet_lock_attr);
7410 ifp1->if_inetdata = NULL;
7411#endif
7412#if INET6
7413 lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group,
7414 ifnet_lock_attr);
7415 ifp1->if_inet6data = NULL;
7416#endif
7417 lck_rw_init(&ifp1->if_link_status_lock, ifnet_lock_group,
7418 ifnet_lock_attr);
7419 ifp1->if_link_status = NULL;
7420
7421 /* for send data paths */
7422 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group,
7423 ifnet_lock_attr);
7424 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group,
7425 ifnet_lock_attr);
7426 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group,
7427 ifnet_lock_attr);
7428
7429 /* for receive data paths */
7430 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group,
7431 ifnet_lock_attr);
7432
7433 /* thread call allocation is done with sleeping zalloc */
7434 ifp1->if_dt_tcall = thread_call_allocate_with_options(dlil_dt_tcall_fn,
7435 ifp1, THREAD_CALL_PRIORITY_KERNEL, THREAD_CALL_OPTIONS_ONCE);
7436 if (ifp1->if_dt_tcall == NULL) {
7437 panic_plain("%s: couldn't create if_dt_tcall", __func__);
7438 /* NOTREACHED */
7439 }
7440
7441 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link);
7442
7443 *ifp = ifp1;
7444
7445end:
7446 dlil_if_unlock();
7447
7448 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) &&
7449 IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t))));
7450
7451 return (ret);
7452}
7453
7454__private_extern__ void
7455dlil_if_release(ifnet_t ifp)
7456{
7457 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
7458
7459 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
7460 if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
7461 VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
7462 }
7463
7464 ifnet_lock_exclusive(ifp);
7465 lck_mtx_lock(&dlifp->dl_if_lock);
7466 dlifp->dl_if_flags &= ~DLIF_INUSE;
7467 strlcpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ);
7468 ifp->if_name = dlifp->dl_if_namestorage;
7469 /* Reset external name (name + unit) */
7470 ifp->if_xname = dlifp->dl_if_xnamestorage;
7471 snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ,
7472 "%s?", ifp->if_name);
7473 lck_mtx_unlock(&dlifp->dl_if_lock);
7474#if CONFIG_MACF_NET
7475 /*
7476 * We can either recycle the MAC label here or in dlil_if_acquire().
7477 * It seems logical to do it here but this means that anything that
7478 * still has a handle on ifp will now see it as unlabeled.
7479 * Since the interface is "dead" that may be OK. Revisit later.
7480 */
7481 mac_ifnet_label_recycle(ifp);
7482#endif
7483 ifnet_lock_done(ifp);
7484}
7485
7486__private_extern__ void
7487dlil_if_lock(void)
7488{
7489 lck_mtx_lock(&dlil_ifnet_lock);
7490}
7491
7492__private_extern__ void
7493dlil_if_unlock(void)
7494{
7495 lck_mtx_unlock(&dlil_ifnet_lock);
7496}
7497
7498__private_extern__ void
7499dlil_if_lock_assert(void)
7500{
7501 LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
7502}
7503
7504__private_extern__ void
7505dlil_proto_unplumb_all(struct ifnet *ifp)
7506{
7507 /*
7508 * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where
7509 * each bucket contains exactly one entry; PF_VLAN does not need an
7510 * explicit unplumb.
7511 *
7512 * if_proto_hash[3] is for other protocols; we expect anything
7513 * in this bucket to respond to the DETACHING event (which would
7514 * have happened by now) and do the unplumb then.
7515 */
7516 (void) proto_unplumb(PF_INET, ifp);
7517#if INET6
7518 (void) proto_unplumb(PF_INET6, ifp);
7519#endif /* INET6 */
7520}
7521
7522static void
7523ifp_src_route_copyout(struct ifnet *ifp, struct route *dst)
7524{
7525 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7526 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7527
7528 route_copyout(dst, &ifp->if_src_route, sizeof (*dst));
7529
7530 lck_mtx_unlock(&ifp->if_cached_route_lock);
7531}
7532
7533static void
7534ifp_src_route_copyin(struct ifnet *ifp, struct route *src)
7535{
7536 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7537 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7538
7539 if (ifp->if_fwd_cacheok) {
7540 route_copyin(src, &ifp->if_src_route, sizeof (*src));
7541 } else {
7542 ROUTE_RELEASE(src);
7543 }
7544 lck_mtx_unlock(&ifp->if_cached_route_lock);
7545}
7546
7547#if INET6
7548static void
7549ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst)
7550{
7551 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7552 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7553
7554 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6,
7555 sizeof (*dst));
7556
7557 lck_mtx_unlock(&ifp->if_cached_route_lock);
7558}
7559
7560static void
7561ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src)
7562{
7563 lck_mtx_lock_spin(&ifp->if_cached_route_lock);
7564 lck_mtx_convert_spin(&ifp->if_cached_route_lock);
7565
7566 if (ifp->if_fwd_cacheok) {
7567 route_copyin((struct route *)src,
7568 (struct route *)&ifp->if_src_route6, sizeof (*src));
7569 } else {
7570 ROUTE_RELEASE(src);
7571 }
7572 lck_mtx_unlock(&ifp->if_cached_route_lock);
7573}
7574#endif /* INET6 */
7575
7576struct rtentry *
7577ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip)
7578{
7579 struct route src_rt;
7580 struct sockaddr_in *dst;
7581
7582 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst);
7583
7584 ifp_src_route_copyout(ifp, &src_rt);
7585
7586 if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) {
7587 ROUTE_RELEASE(&src_rt);
7588 if (dst->sin_family != AF_INET) {
7589 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
7590 dst->sin_len = sizeof (src_rt.ro_dst);
7591 dst->sin_family = AF_INET;
7592 }
7593 dst->sin_addr = src_ip;
7594
7595 VERIFY(src_rt.ro_rt == NULL);
7596 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst,
7597 0, 0, ifp->if_index);
7598
7599 if (src_rt.ro_rt != NULL) {
7600 /* retain a ref, copyin consumes one */
7601 struct rtentry *rte = src_rt.ro_rt;
7602 RT_ADDREF(rte);
7603 ifp_src_route_copyin(ifp, &src_rt);
7604 src_rt.ro_rt = rte;
7605 }
7606 }
7607
7608 return (src_rt.ro_rt);
7609}
7610
7611#if INET6
7612struct rtentry *
7613ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6)
7614{
7615 struct route_in6 src_rt;
7616
7617 ifp_src_route6_copyout(ifp, &src_rt);
7618
7619 if (ROUTE_UNUSABLE(&src_rt) ||
7620 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) {
7621 ROUTE_RELEASE(&src_rt);
7622 if (src_rt.ro_dst.sin6_family != AF_INET6) {
7623 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst));
7624 src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst);
7625 src_rt.ro_dst.sin6_family = AF_INET6;
7626 }
7627 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6);
7628 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr,
7629 sizeof (src_rt.ro_dst.sin6_addr));
7630
7631 if (src_rt.ro_rt == NULL) {
7632 src_rt.ro_rt = rtalloc1_scoped(
7633 (struct sockaddr *)&src_rt.ro_dst, 0, 0,
7634 ifp->if_index);
7635
7636 if (src_rt.ro_rt != NULL) {
7637 /* retain a ref, copyin consumes one */
7638 struct rtentry *rte = src_rt.ro_rt;
7639 RT_ADDREF(rte);
7640 ifp_src_route6_copyin(ifp, &src_rt);
7641 src_rt.ro_rt = rte;
7642 }
7643 }
7644 }
7645
7646 return (src_rt.ro_rt);
7647}
7648#endif /* INET6 */
7649
7650void
7651if_lqm_update(struct ifnet *ifp, int lqm, int locked)
7652{
7653 struct kev_dl_link_quality_metric_data ev_lqm_data;
7654
7655 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX);
7656
7657 /* Normalize to edge */
7658 if (lqm >= 0 && lqm <= IFNET_LQM_THRESH_ABORT) {
7659 lqm = IFNET_LQM_THRESH_ABORT;
7660 atomic_bitset_32(&tcbinfo.ipi_flags,
7661 INPCBINFO_HANDLE_LQM_ABORT);
7662 inpcb_timer_sched(&tcbinfo, INPCB_TIMER_FAST);
7663 } else if (lqm > IFNET_LQM_THRESH_ABORT &&
7664 lqm <= IFNET_LQM_THRESH_MINIMALLY_VIABLE) {
7665 lqm = IFNET_LQM_THRESH_MINIMALLY_VIABLE;
7666 } else if (lqm > IFNET_LQM_THRESH_MINIMALLY_VIABLE &&
7667 lqm <= IFNET_LQM_THRESH_POOR) {
7668 lqm = IFNET_LQM_THRESH_POOR;
7669 } else if (lqm > IFNET_LQM_THRESH_POOR &&
7670 lqm <= IFNET_LQM_THRESH_GOOD) {
7671 lqm = IFNET_LQM_THRESH_GOOD;
7672 }
7673
7674 /*
7675 * Take the lock if needed
7676 */
7677 if (!locked)
7678 ifnet_lock_exclusive(ifp);
7679
7680 if (lqm == ifp->if_interface_state.lqm_state &&
7681 (ifp->if_interface_state.valid_bitmask &
7682 IF_INTERFACE_STATE_LQM_STATE_VALID)) {
7683 /*
7684 * Release the lock if was not held by the caller
7685 */
7686 if (!locked)
7687 ifnet_lock_done(ifp);
7688 return; /* nothing to update */
7689 }
7690 ifp->if_interface_state.valid_bitmask |=
7691 IF_INTERFACE_STATE_LQM_STATE_VALID;
7692 ifp->if_interface_state.lqm_state = lqm;
7693
7694 /*
7695 * Don't want to hold the lock when issuing kernel events
7696 */
7697 ifnet_lock_done(ifp);
7698
7699 bzero(&ev_lqm_data, sizeof (ev_lqm_data));
7700 ev_lqm_data.link_quality_metric = lqm;
7701
7702 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED,
7703 (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data));
7704
7705 /*
7706 * Reacquire the lock for the caller
7707 */
7708 if (locked)
7709 ifnet_lock_exclusive(ifp);
7710}
7711
7712static void
7713if_rrc_state_update(struct ifnet *ifp, unsigned int rrc_state)
7714{
7715 struct kev_dl_rrc_state kev;
7716
7717 if (rrc_state == ifp->if_interface_state.rrc_state &&
7718 (ifp->if_interface_state.valid_bitmask &
7719 IF_INTERFACE_STATE_RRC_STATE_VALID))
7720 return;
7721
7722 ifp->if_interface_state.valid_bitmask |=
7723 IF_INTERFACE_STATE_RRC_STATE_VALID;
7724
7725 ifp->if_interface_state.rrc_state = rrc_state;
7726
7727 /*
7728 * Don't want to hold the lock when issuing kernel events
7729 */
7730 ifnet_lock_done(ifp);
7731
7732 bzero(&kev, sizeof(struct kev_dl_rrc_state));
7733 kev.rrc_state = rrc_state;
7734
7735 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_RRC_STATE_CHANGED,
7736 (struct net_event_data *)&kev, sizeof(struct kev_dl_rrc_state));
7737
7738 ifnet_lock_exclusive(ifp);
7739}
7740
7741errno_t
7742if_state_update(struct ifnet *ifp,
7743 struct if_interface_state *if_interface_state)
7744{
7745 u_short if_index_available = 0;
7746
7747 ifnet_lock_exclusive(ifp);
7748
7749 if ((ifp->if_type != IFT_CELLULAR) &&
7750 (if_interface_state->valid_bitmask &
7751 IF_INTERFACE_STATE_RRC_STATE_VALID)) {
7752 ifnet_lock_done(ifp);
7753 return (ENOTSUP);
7754 }
7755 if ((if_interface_state->valid_bitmask &
7756 IF_INTERFACE_STATE_LQM_STATE_VALID) &&
7757 (if_interface_state->lqm_state < IFNET_LQM_MIN ||
7758 if_interface_state->lqm_state > IFNET_LQM_MAX)) {
7759 ifnet_lock_done(ifp);
7760 return (EINVAL);
7761 }
7762 if ((if_interface_state->valid_bitmask &
7763 IF_INTERFACE_STATE_RRC_STATE_VALID) &&
7764 if_interface_state->rrc_state !=
7765 IF_INTERFACE_STATE_RRC_STATE_IDLE &&
7766 if_interface_state->rrc_state !=
7767 IF_INTERFACE_STATE_RRC_STATE_CONNECTED) {
7768 ifnet_lock_done(ifp);
7769 return (EINVAL);
7770 }
7771
7772 if (if_interface_state->valid_bitmask &
7773 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7774 if_lqm_update(ifp, if_interface_state->lqm_state, 1);
7775 }
7776 if (if_interface_state->valid_bitmask &
7777 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7778 if_rrc_state_update(ifp, if_interface_state->rrc_state);
7779 }
7780 if (if_interface_state->valid_bitmask &
7781 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7782 ifp->if_interface_state.valid_bitmask |=
7783 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7784 ifp->if_interface_state.interface_availability =
7785 if_interface_state->interface_availability;
7786
7787 if (ifp->if_interface_state.interface_availability ==
7788 IF_INTERFACE_STATE_INTERFACE_AVAILABLE) {
7789 if_index_available = ifp->if_index;
7790 }
7791 }
7792 ifnet_lock_done(ifp);
7793
7794 /*
7795 * Check if the TCP connections going on this interface should be
7796 * forced to send probe packets instead of waiting for TCP timers
7797 * to fire. This will be done when there is an explicit
7798 * notification that the interface became available.
7799 */
7800 if (if_index_available > 0)
7801 tcp_interface_send_probe(if_index_available);
7802
7803 return (0);
7804}
7805
7806void
7807if_get_state(struct ifnet *ifp,
7808 struct if_interface_state *if_interface_state)
7809{
7810 ifnet_lock_shared(ifp);
7811
7812 if_interface_state->valid_bitmask = 0;
7813
7814 if (ifp->if_interface_state.valid_bitmask &
7815 IF_INTERFACE_STATE_RRC_STATE_VALID) {
7816 if_interface_state->valid_bitmask |=
7817 IF_INTERFACE_STATE_RRC_STATE_VALID;
7818 if_interface_state->rrc_state =
7819 ifp->if_interface_state.rrc_state;
7820 }
7821 if (ifp->if_interface_state.valid_bitmask &
7822 IF_INTERFACE_STATE_LQM_STATE_VALID) {
7823 if_interface_state->valid_bitmask |=
7824 IF_INTERFACE_STATE_LQM_STATE_VALID;
7825 if_interface_state->lqm_state =
7826 ifp->if_interface_state.lqm_state;
7827 }
7828 if (ifp->if_interface_state.valid_bitmask &
7829 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID) {
7830 if_interface_state->valid_bitmask |=
7831 IF_INTERFACE_STATE_INTERFACE_AVAILABILITY_VALID;
7832 if_interface_state->interface_availability =
7833 ifp->if_interface_state.interface_availability;
7834 }
7835
7836 ifnet_lock_done(ifp);
7837}
7838
7839errno_t
7840if_probe_connectivity(struct ifnet *ifp, u_int32_t conn_probe)
7841{
7842 ifnet_lock_exclusive(ifp);
7843 if (conn_probe > 1) {
7844 ifnet_lock_done(ifp);
7845 return (EINVAL);
7846 }
7847 if (conn_probe == 0)
7848 ifp->if_eflags &= ~IFEF_PROBE_CONNECTIVITY;
7849 else
7850 ifp->if_eflags |= IFEF_PROBE_CONNECTIVITY;
7851 ifnet_lock_done(ifp);
7852
7853#if NECP
7854 necp_update_all_clients();
7855#endif /* NECP */
7856
7857 tcp_probe_connectivity(ifp, conn_probe);
7858 return (0);
7859}
7860
7861/* for uuid.c */
7862int
7863uuid_get_ethernet(u_int8_t *node)
7864{
7865 struct ifnet *ifp;
7866 struct sockaddr_dl *sdl;
7867
7868 ifnet_head_lock_shared();
7869 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
7870 ifnet_lock_shared(ifp);
7871 IFA_LOCK_SPIN(ifp->if_lladdr);
7872 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr;
7873 if (sdl->sdl_type == IFT_ETHER) {
7874 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN);
7875 IFA_UNLOCK(ifp->if_lladdr);
7876 ifnet_lock_done(ifp);
7877 ifnet_head_done();
7878 return (0);
7879 }
7880 IFA_UNLOCK(ifp->if_lladdr);
7881 ifnet_lock_done(ifp);
7882 }
7883 ifnet_head_done();
7884
7885 return (-1);
7886}
7887
7888static int
7889sysctl_rxpoll SYSCTL_HANDLER_ARGS
7890{
7891#pragma unused(arg1, arg2)
7892 uint32_t i;
7893 int err;
7894
7895 i = if_rxpoll;
7896
7897 err = sysctl_handle_int(oidp, &i, 0, req);
7898 if (err != 0 || req->newptr == USER_ADDR_NULL)
7899 return (err);
7900
7901 if (net_rxpoll == 0)
7902 return (ENXIO);
7903
7904 if_rxpoll = i;
7905 return (err);
7906}
7907
7908static int
7909sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS
7910{
7911#pragma unused(arg1, arg2)
7912 uint64_t q;
7913 int err;
7914
7915 q = if_rxpoll_mode_holdtime;
7916
7917 err = sysctl_handle_quad(oidp, &q, 0, req);
7918 if (err != 0 || req->newptr == USER_ADDR_NULL)
7919 return (err);
7920
7921 if (q < IF_RXPOLL_MODE_HOLDTIME_MIN)
7922 q = IF_RXPOLL_MODE_HOLDTIME_MIN;
7923
7924 if_rxpoll_mode_holdtime = q;
7925
7926 return (err);
7927}
7928
7929static int
7930sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS
7931{
7932#pragma unused(arg1, arg2)
7933 uint64_t q;
7934 int err;
7935
7936 q = if_rxpoll_sample_holdtime;
7937
7938 err = sysctl_handle_quad(oidp, &q, 0, req);
7939 if (err != 0 || req->newptr == USER_ADDR_NULL)
7940 return (err);
7941
7942 if (q < IF_RXPOLL_SAMPLETIME_MIN)
7943 q = IF_RXPOLL_SAMPLETIME_MIN;
7944
7945 if_rxpoll_sample_holdtime = q;
7946
7947 return (err);
7948}
7949
7950static int
7951sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS
7952{
7953#pragma unused(arg1, arg2)
7954 uint64_t q;
7955 int err;
7956
7957 q = if_rxpoll_interval_time;
7958
7959 err = sysctl_handle_quad(oidp, &q, 0, req);
7960 if (err != 0 || req->newptr == USER_ADDR_NULL)
7961 return (err);
7962
7963 if (q < IF_RXPOLL_INTERVALTIME_MIN)
7964 q = IF_RXPOLL_INTERVALTIME_MIN;
7965
7966 if_rxpoll_interval_time = q;
7967
7968 return (err);
7969}
7970
7971static int
7972sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS
7973{
7974#pragma unused(arg1, arg2)
7975 uint32_t i;
7976 int err;
7977
7978 i = if_rxpoll_wlowat;
7979
7980 err = sysctl_handle_int(oidp, &i, 0, req);
7981 if (err != 0 || req->newptr == USER_ADDR_NULL)
7982 return (err);
7983
7984 if (i == 0 || i >= if_rxpoll_whiwat)
7985 return (EINVAL);
7986
7987 if_rxpoll_wlowat = i;
7988 return (err);
7989}
7990
7991static int
7992sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS
7993{
7994#pragma unused(arg1, arg2)
7995 uint32_t i;
7996 int err;
7997
7998 i = if_rxpoll_whiwat;
7999
8000 err = sysctl_handle_int(oidp, &i, 0, req);
8001 if (err != 0 || req->newptr == USER_ADDR_NULL)
8002 return (err);
8003
8004 if (i <= if_rxpoll_wlowat)
8005 return (EINVAL);
8006
8007 if_rxpoll_whiwat = i;
8008 return (err);
8009}
8010
8011static int
8012sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS
8013{
8014#pragma unused(arg1, arg2)
8015 int i, err;
8016
8017 i = if_sndq_maxlen;
8018
8019 err = sysctl_handle_int(oidp, &i, 0, req);
8020 if (err != 0 || req->newptr == USER_ADDR_NULL)
8021 return (err);
8022
8023 if (i < IF_SNDQ_MINLEN)
8024 i = IF_SNDQ_MINLEN;
8025
8026 if_sndq_maxlen = i;
8027 return (err);
8028}
8029
8030static int
8031sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS
8032{
8033#pragma unused(arg1, arg2)
8034 int i, err;
8035
8036 i = if_rcvq_maxlen;
8037
8038 err = sysctl_handle_int(oidp, &i, 0, req);
8039 if (err != 0 || req->newptr == USER_ADDR_NULL)
8040 return (err);
8041
8042 if (i < IF_RCVQ_MINLEN)
8043 i = IF_RCVQ_MINLEN;
8044
8045 if_rcvq_maxlen = i;
8046 return (err);
8047}
8048
8049void
8050dlil_node_present(struct ifnet *ifp, struct sockaddr *sa,
8051 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48])
8052{
8053 struct kev_dl_node_presence kev;
8054 struct sockaddr_dl *sdl;
8055 struct sockaddr_in6 *sin6;
8056
8057 VERIFY(ifp);
8058 VERIFY(sa);
8059 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
8060
8061 bzero(&kev, sizeof (kev));
8062 sin6 = &kev.sin6_node_address;
8063 sdl = &kev.sdl_node_address;
8064 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
8065 kev.rssi = rssi;
8066 kev.link_quality_metric = lqm;
8067 kev.node_proximity_metric = npm;
8068 bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info));
8069
8070 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm);
8071 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE,
8072 &kev.link_data, sizeof (kev));
8073}
8074
8075void
8076dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa)
8077{
8078 struct kev_dl_node_absence kev;
8079 struct sockaddr_in6 *sin6;
8080 struct sockaddr_dl *sdl;
8081
8082 VERIFY(ifp);
8083 VERIFY(sa);
8084 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6);
8085
8086 bzero(&kev, sizeof (kev));
8087 sin6 = &kev.sin6_node_address;
8088 sdl = &kev.sdl_node_address;
8089 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6);
8090
8091 nd6_alt_node_absent(ifp, sin6);
8092 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE,
8093 &kev.link_data, sizeof (kev));
8094}
8095
8096const void *
8097dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep,
8098 kauth_cred_t *credp)
8099{
8100 const u_int8_t *bytes;
8101 size_t size;
8102
8103 bytes = CONST_LLADDR(sdl);
8104 size = sdl->sdl_alen;
8105
8106#if CONFIG_MACF
8107 if (dlil_lladdr_ckreq) {
8108 switch (sdl->sdl_type) {
8109 case IFT_ETHER:
8110 case IFT_IEEE1394:
8111 break;
8112 default:
8113 credp = NULL;
8114 break;
8115 };
8116
8117 if (credp && mac_system_check_info(*credp, "net.link.addr")) {
8118 static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = {
8119 [0] = 2
8120 };
8121
8122 bytes = unspec;
8123 }
8124 }
8125#else
8126#pragma unused(credp)
8127#endif
8128
8129 if (sizep != NULL) *sizep = size;
8130 return (bytes);
8131}
8132
8133void
8134dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN],
8135 u_int8_t info[DLIL_MODARGLEN])
8136{
8137 struct kev_dl_issues kev;
8138 struct timeval tv;
8139
8140 VERIFY(ifp != NULL);
8141 VERIFY(modid != NULL);
8142 _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN);
8143 _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN);
8144
8145 bzero(&kev, sizeof (kev));
8146
8147 microtime(&tv);
8148 kev.timestamp = tv.tv_sec;
8149 bcopy(modid, &kev.modid, DLIL_MODIDLEN);
8150 if (info != NULL)
8151 bcopy(info, &kev.info, DLIL_MODARGLEN);
8152
8153 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES,
8154 &kev.link_data, sizeof (kev));
8155}
8156
8157errno_t
8158ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
8159 struct proc *p)
8160{
8161 u_int32_t level = IFNET_THROTTLE_OFF;
8162 errno_t result = 0;
8163
8164 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC);
8165
8166 if (cmd == SIOCSIFOPPORTUNISTIC) {
8167 /*
8168 * XXX: Use priv_check_cred() instead of root check?
8169 */
8170 if ((result = proc_suser(p)) != 0)
8171 return (result);
8172
8173 if (ifr->ifr_opportunistic.ifo_flags ==
8174 IFRIFOF_BLOCK_OPPORTUNISTIC)
8175 level = IFNET_THROTTLE_OPPORTUNISTIC;
8176 else if (ifr->ifr_opportunistic.ifo_flags == 0)
8177 level = IFNET_THROTTLE_OFF;
8178 else
8179 result = EINVAL;
8180
8181 if (result == 0)
8182 result = ifnet_set_throttle(ifp, level);
8183 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) {
8184 ifr->ifr_opportunistic.ifo_flags = 0;
8185 if (level == IFNET_THROTTLE_OPPORTUNISTIC) {
8186 ifr->ifr_opportunistic.ifo_flags |=
8187 IFRIFOF_BLOCK_OPPORTUNISTIC;
8188 }
8189 }
8190
8191 /*
8192 * Return the count of current opportunistic connections
8193 * over the interface.
8194 */
8195 if (result == 0) {
8196 uint32_t flags = 0;
8197 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ?
8198 INPCB_OPPORTUNISTIC_SETCMD : 0;
8199 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ?
8200 INPCB_OPPORTUNISTIC_THROTTLEON : 0;
8201 ifr->ifr_opportunistic.ifo_inuse =
8202 udp_count_opportunistic(ifp->if_index, flags) +
8203 tcp_count_opportunistic(ifp->if_index, flags);
8204 }
8205
8206 if (result == EALREADY)
8207 result = 0;
8208
8209 return (result);
8210}
8211
8212int
8213ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level)
8214{
8215 struct ifclassq *ifq;
8216 int err = 0;
8217
8218 if (!(ifp->if_eflags & IFEF_TXSTART))
8219 return (ENXIO);
8220
8221 *level = IFNET_THROTTLE_OFF;
8222
8223 ifq = &ifp->if_snd;
8224 IFCQ_LOCK(ifq);
8225 /* Throttling works only for IFCQ, not ALTQ instances */
8226 if (IFCQ_IS_ENABLED(ifq))
8227 IFCQ_GET_THROTTLE(ifq, *level, err);
8228 IFCQ_UNLOCK(ifq);
8229
8230 return (err);
8231}
8232
8233int
8234ifnet_set_throttle(struct ifnet *ifp, u_int32_t level)
8235{
8236 struct ifclassq *ifq;
8237 int err = 0;
8238
8239 if (!(ifp->if_eflags & IFEF_TXSTART))
8240 return (ENXIO);
8241
8242 ifq = &ifp->if_snd;
8243
8244 switch (level) {
8245 case IFNET_THROTTLE_OFF:
8246 case IFNET_THROTTLE_OPPORTUNISTIC:
8247 break;
8248 default:
8249 return (EINVAL);
8250 }
8251
8252 IFCQ_LOCK(ifq);
8253 if (IFCQ_IS_ENABLED(ifq))
8254 IFCQ_SET_THROTTLE(ifq, level, err);
8255 IFCQ_UNLOCK(ifq);
8256
8257 if (err == 0) {
8258 printf("%s: throttling level set to %d\n", if_name(ifp),
8259 level);
8260 if (level == IFNET_THROTTLE_OFF)
8261 ifnet_start(ifp);
8262 }
8263
8264 return (err);
8265}
8266
8267errno_t
8268ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr,
8269 struct proc *p)
8270{
8271#pragma unused(p)
8272 errno_t result = 0;
8273 uint32_t flags;
8274 int level, category, subcategory;
8275
8276 VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG);
8277
8278 if (cmd == SIOCSIFLOG) {
8279 if ((result = priv_check_cred(kauth_cred_get(),
8280 PRIV_NET_INTERFACE_CONTROL, 0)) != 0)
8281 return (result);
8282
8283 level = ifr->ifr_log.ifl_level;
8284 if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX)
8285 result = EINVAL;
8286
8287 flags = ifr->ifr_log.ifl_flags;
8288 if ((flags &= IFNET_LOGF_MASK) == 0)
8289 result = EINVAL;
8290
8291 category = ifr->ifr_log.ifl_category;
8292 subcategory = ifr->ifr_log.ifl_subcategory;
8293
8294 if (result == 0)
8295 result = ifnet_set_log(ifp, level, flags,
8296 category, subcategory);
8297 } else {
8298 result = ifnet_get_log(ifp, &level, &flags, &category,
8299 &subcategory);
8300 if (result == 0) {
8301 ifr->ifr_log.ifl_level = level;
8302 ifr->ifr_log.ifl_flags = flags;
8303 ifr->ifr_log.ifl_category = category;
8304 ifr->ifr_log.ifl_subcategory = subcategory;
8305 }
8306 }
8307
8308 return (result);
8309}
8310
8311int
8312ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags,
8313 int32_t category, int32_t subcategory)
8314{
8315 int err = 0;
8316
8317 VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX);
8318 VERIFY(flags & IFNET_LOGF_MASK);
8319
8320 /*
8321 * The logging level applies to all facilities; make sure to
8322 * update them all with the most current level.
8323 */
8324 flags |= ifp->if_log.flags;
8325
8326 if (ifp->if_output_ctl != NULL) {
8327 struct ifnet_log_params l;
8328
8329 bzero(&l, sizeof (l));
8330 l.level = level;
8331 l.flags = flags;
8332 l.flags &= ~IFNET_LOGF_DLIL;
8333 l.category = category;
8334 l.subcategory = subcategory;
8335
8336 /* Send this request to lower layers */
8337 if (l.flags != 0) {
8338 err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG,
8339 sizeof (l), &l);
8340 }
8341 } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) {
8342 /*
8343 * If targeted to the lower layers without an output
8344 * control callback registered on the interface, just
8345 * silently ignore facilities other than ours.
8346 */
8347 flags &= IFNET_LOGF_DLIL;
8348 if (flags == 0 && (!(ifp->if_log.flags & IFNET_LOGF_DLIL)))
8349 level = 0;
8350 }
8351
8352 if (err == 0) {
8353 if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT)
8354 ifp->if_log.flags = 0;
8355 else
8356 ifp->if_log.flags |= flags;
8357
8358 log(LOG_INFO, "%s: logging level set to %d flags=%b "
8359 "arg=%b, category=%d subcategory=%d\n", if_name(ifp),
8360 ifp->if_log.level, ifp->if_log.flags,
8361 IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS,
8362 category, subcategory);
8363 }
8364
8365 return (err);
8366}
8367
8368int
8369ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags,
8370 int32_t *category, int32_t *subcategory)
8371{
8372 if (level != NULL)
8373 *level = ifp->if_log.level;
8374 if (flags != NULL)
8375 *flags = ifp->if_log.flags;
8376 if (category != NULL)
8377 *category = ifp->if_log.category;
8378 if (subcategory != NULL)
8379 *subcategory = ifp->if_log.subcategory;
8380
8381 return (0);
8382}
8383
8384int
8385ifnet_notify_address(struct ifnet *ifp, int af)
8386{
8387 struct ifnet_notify_address_params na;
8388
8389#if PF
8390 (void) pf_ifaddr_hook(ifp);
8391#endif /* PF */
8392
8393 if (ifp->if_output_ctl == NULL)
8394 return (EOPNOTSUPP);
8395
8396 bzero(&na, sizeof (na));
8397 na.address_family = af;
8398
8399 return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS,
8400 sizeof (na), &na));
8401}
8402
8403errno_t
8404ifnet_flowid(struct ifnet *ifp, uint32_t *flowid)
8405{
8406 if (ifp == NULL || flowid == NULL) {
8407 return (EINVAL);
8408 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
8409 !IF_FULLY_ATTACHED(ifp)) {
8410 return (ENXIO);
8411 }
8412
8413 *flowid = ifp->if_flowhash;
8414
8415 return (0);
8416}
8417
8418errno_t
8419ifnet_disable_output(struct ifnet *ifp)
8420{
8421 int err;
8422
8423 if (ifp == NULL) {
8424 return (EINVAL);
8425 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
8426 !IF_FULLY_ATTACHED(ifp)) {
8427 return (ENXIO);
8428 }
8429
8430 if ((err = ifnet_fc_add(ifp)) == 0) {
8431 lck_mtx_lock_spin(&ifp->if_start_lock);
8432 ifp->if_start_flags |= IFSF_FLOW_CONTROLLED;
8433 lck_mtx_unlock(&ifp->if_start_lock);
8434 }
8435 return (err);
8436}
8437
8438errno_t
8439ifnet_enable_output(struct ifnet *ifp)
8440{
8441 if (ifp == NULL) {
8442 return (EINVAL);
8443 } else if (!(ifp->if_eflags & IFEF_TXSTART) ||
8444 !IF_FULLY_ATTACHED(ifp)) {
8445 return (ENXIO);
8446 }
8447
8448 ifnet_start_common(ifp, TRUE);
8449 return (0);
8450}
8451
8452void
8453ifnet_flowadv(uint32_t flowhash)
8454{
8455 struct ifnet_fc_entry *ifce;
8456 struct ifnet *ifp;
8457
8458 ifce = ifnet_fc_get(flowhash);
8459 if (ifce == NULL)
8460 return;
8461
8462 VERIFY(ifce->ifce_ifp != NULL);
8463 ifp = ifce->ifce_ifp;
8464
8465 /* flow hash gets recalculated per attach, so check */
8466 if (ifnet_is_attached(ifp, 1)) {
8467 if (ifp->if_flowhash == flowhash)
8468 (void) ifnet_enable_output(ifp);
8469 ifnet_decr_iorefcnt(ifp);
8470 }
8471 ifnet_fc_entry_free(ifce);
8472}
8473
8474/*
8475 * Function to compare ifnet_fc_entries in ifnet flow control tree
8476 */
8477static inline int
8478ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2)
8479{
8480 return (fc1->ifce_flowhash - fc2->ifce_flowhash);
8481}
8482
8483static int
8484ifnet_fc_add(struct ifnet *ifp)
8485{
8486 struct ifnet_fc_entry keyfc, *ifce;
8487 uint32_t flowhash;
8488
8489 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART));
8490 VERIFY(ifp->if_flowhash != 0);
8491 flowhash = ifp->if_flowhash;
8492
8493 bzero(&keyfc, sizeof (keyfc));
8494 keyfc.ifce_flowhash = flowhash;
8495
8496 lck_mtx_lock_spin(&ifnet_fc_lock);
8497 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
8498 if (ifce != NULL && ifce->ifce_ifp == ifp) {
8499 /* Entry is already in ifnet_fc_tree, return */
8500 lck_mtx_unlock(&ifnet_fc_lock);
8501 return (0);
8502 }
8503
8504 if (ifce != NULL) {
8505 /*
8506 * There is a different fc entry with the same flow hash
8507 * but different ifp pointer. There can be a collision
8508 * on flow hash but the probability is low. Let's just
8509 * avoid adding a second one when there is a collision.
8510 */
8511 lck_mtx_unlock(&ifnet_fc_lock);
8512 return (EAGAIN);
8513 }
8514
8515 /* become regular mutex */
8516 lck_mtx_convert_spin(&ifnet_fc_lock);
8517
8518 ifce = zalloc(ifnet_fc_zone);
8519 if (ifce == NULL) {
8520 /* memory allocation failed */
8521 lck_mtx_unlock(&ifnet_fc_lock);
8522 return (ENOMEM);
8523 }
8524 bzero(ifce, ifnet_fc_zone_size);
8525
8526 ifce->ifce_flowhash = flowhash;
8527 ifce->ifce_ifp = ifp;
8528
8529 RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce);
8530 lck_mtx_unlock(&ifnet_fc_lock);
8531 return (0);
8532}
8533
8534static struct ifnet_fc_entry *
8535ifnet_fc_get(uint32_t flowhash)
8536{
8537 struct ifnet_fc_entry keyfc, *ifce;
8538 struct ifnet *ifp;
8539
8540 bzero(&keyfc, sizeof (keyfc));
8541 keyfc.ifce_flowhash = flowhash;
8542
8543 lck_mtx_lock_spin(&ifnet_fc_lock);
8544 ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc);
8545 if (ifce == NULL) {
8546 /* Entry is not present in ifnet_fc_tree, return */
8547 lck_mtx_unlock(&ifnet_fc_lock);
8548 return (NULL);
8549 }
8550
8551 RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce);
8552
8553 VERIFY(ifce->ifce_ifp != NULL);
8554 ifp = ifce->ifce_ifp;
8555
8556 /* become regular mutex */
8557 lck_mtx_convert_spin(&ifnet_fc_lock);
8558
8559 if (!ifnet_is_attached(ifp, 0)) {
8560 /*
8561 * This ifp is not attached or in the process of being
8562 * detached; just don't process it.
8563 */
8564 ifnet_fc_entry_free(ifce);
8565 ifce = NULL;
8566 }
8567 lck_mtx_unlock(&ifnet_fc_lock);
8568
8569 return (ifce);
8570}
8571
8572static void
8573ifnet_fc_entry_free(struct ifnet_fc_entry *ifce)
8574{
8575 zfree(ifnet_fc_zone, ifce);
8576}
8577
8578static uint32_t
8579ifnet_calc_flowhash(struct ifnet *ifp)
8580{
8581 struct ifnet_flowhash_key fh __attribute__((aligned(8)));
8582 uint32_t flowhash = 0;
8583
8584 if (ifnet_flowhash_seed == 0)
8585 ifnet_flowhash_seed = RandomULong();
8586
8587 bzero(&fh, sizeof (fh));
8588
8589 (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name);
8590 fh.ifk_unit = ifp->if_unit;
8591 fh.ifk_flags = ifp->if_flags;
8592 fh.ifk_eflags = ifp->if_eflags;
8593 fh.ifk_capabilities = ifp->if_capabilities;
8594 fh.ifk_capenable = ifp->if_capenable;
8595 fh.ifk_output_sched_model = ifp->if_output_sched_model;
8596 fh.ifk_rand1 = RandomULong();
8597 fh.ifk_rand2 = RandomULong();
8598
8599try_again:
8600 flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed);
8601 if (flowhash == 0) {
8602 /* try to get a non-zero flowhash */
8603 ifnet_flowhash_seed = RandomULong();
8604 goto try_again;
8605 }
8606
8607 return (flowhash);
8608}
8609
8610int
8611ifnet_set_netsignature(struct ifnet *ifp, uint8_t family, uint8_t len,
8612 uint16_t flags, uint8_t *data)
8613{
8614#pragma unused(flags)
8615 int error = 0;
8616
8617 switch (family) {
8618 case AF_INET:
8619 if_inetdata_lock_exclusive(ifp);
8620 if (IN_IFEXTRA(ifp) != NULL) {
8621 if (len == 0) {
8622 /* Allow clearing the signature */
8623 IN_IFEXTRA(ifp)->netsig_len = 0;
8624 bzero(IN_IFEXTRA(ifp)->netsig,
8625 sizeof (IN_IFEXTRA(ifp)->netsig));
8626 if_inetdata_lock_done(ifp);
8627 break;
8628 } else if (len > sizeof (IN_IFEXTRA(ifp)->netsig)) {
8629 error = EINVAL;
8630 if_inetdata_lock_done(ifp);
8631 break;
8632 }
8633 IN_IFEXTRA(ifp)->netsig_len = len;
8634 bcopy(data, IN_IFEXTRA(ifp)->netsig, len);
8635 } else {
8636 error = ENOMEM;
8637 }
8638 if_inetdata_lock_done(ifp);
8639 break;
8640
8641 case AF_INET6:
8642 if_inet6data_lock_exclusive(ifp);
8643 if (IN6_IFEXTRA(ifp) != NULL) {
8644 if (len == 0) {
8645 /* Allow clearing the signature */
8646 IN6_IFEXTRA(ifp)->netsig_len = 0;
8647 bzero(IN6_IFEXTRA(ifp)->netsig,
8648 sizeof (IN6_IFEXTRA(ifp)->netsig));
8649 if_inet6data_lock_done(ifp);
8650 break;
8651 } else if (len > sizeof (IN6_IFEXTRA(ifp)->netsig)) {
8652 error = EINVAL;
8653 if_inet6data_lock_done(ifp);
8654 break;
8655 }
8656 IN6_IFEXTRA(ifp)->netsig_len = len;
8657 bcopy(data, IN6_IFEXTRA(ifp)->netsig, len);
8658 } else {
8659 error = ENOMEM;
8660 }
8661 if_inet6data_lock_done(ifp);
8662 break;
8663
8664 default:
8665 error = EINVAL;
8666 break;
8667 }
8668
8669 return (error);
8670}
8671
8672int
8673ifnet_get_netsignature(struct ifnet *ifp, uint8_t family, uint8_t *len,
8674 uint16_t *flags, uint8_t *data)
8675{
8676 int error = 0;
8677
8678 if (ifp == NULL || len == NULL || data == NULL)
8679 return (EINVAL);
8680
8681 switch (family) {
8682 case AF_INET:
8683 if_inetdata_lock_shared(ifp);
8684 if (IN_IFEXTRA(ifp) != NULL) {
8685 if (*len == 0 || *len < IN_IFEXTRA(ifp)->netsig_len) {
8686 error = EINVAL;
8687 if_inetdata_lock_done(ifp);
8688 break;
8689 }
8690 if ((*len = IN_IFEXTRA(ifp)->netsig_len) > 0)
8691 bcopy(IN_IFEXTRA(ifp)->netsig, data, *len);
8692 else
8693 error = ENOENT;
8694 } else {
8695 error = ENOMEM;
8696 }
8697 if_inetdata_lock_done(ifp);
8698 break;
8699
8700 case AF_INET6:
8701 if_inet6data_lock_shared(ifp);
8702 if (IN6_IFEXTRA(ifp) != NULL) {
8703 if (*len == 0 || *len < IN6_IFEXTRA(ifp)->netsig_len) {
8704 error = EINVAL;
8705 if_inet6data_lock_done(ifp);
8706 break;
8707 }
8708 if ((*len = IN6_IFEXTRA(ifp)->netsig_len) > 0)
8709 bcopy(IN6_IFEXTRA(ifp)->netsig, data, *len);
8710 else
8711 error = ENOENT;
8712 } else {
8713 error = ENOMEM;
8714 }
8715 if_inet6data_lock_done(ifp);
8716 break;
8717
8718 default:
8719 error = EINVAL;
8720 break;
8721 }
8722
8723 if (error == 0 && flags != NULL)
8724 *flags = 0;
8725
8726 return (error);
8727}
8728
8729#if INET6
8730int
8731ifnet_set_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8732{
8733 int i, error = 0, one_set = 0;
8734
8735 if_inet6data_lock_exclusive(ifp);
8736
8737 if (IN6_IFEXTRA(ifp) == NULL) {
8738 error = ENOMEM;
8739 goto out;
8740 }
8741
8742 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8743 uint32_t prefix_len =
8744 prefixes[i].prefix_len;
8745 struct in6_addr *prefix =
8746 &prefixes[i].ipv6_prefix;
8747
8748 if (prefix_len == 0) {
8749 clat_log0((LOG_DEBUG,
8750 "NAT64 prefixes purged from Interface %s\n",
8751 if_name(ifp)));
8752 /* Allow clearing the signature */
8753 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = 0;
8754 bzero(&IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
8755 sizeof(struct in6_addr));
8756
8757 continue;
8758 } else if (prefix_len != NAT64_PREFIX_LEN_32 &&
8759 prefix_len != NAT64_PREFIX_LEN_40 &&
8760 prefix_len != NAT64_PREFIX_LEN_48 &&
8761 prefix_len != NAT64_PREFIX_LEN_56 &&
8762 prefix_len != NAT64_PREFIX_LEN_64 &&
8763 prefix_len != NAT64_PREFIX_LEN_96) {
8764 clat_log0((LOG_DEBUG,
8765 "NAT64 prefixlen is incorrect %d\n", prefix_len));
8766 error = EINVAL;
8767 goto out;
8768 }
8769
8770 if (IN6_IS_SCOPE_EMBED(prefix)) {
8771 clat_log0((LOG_DEBUG,
8772 "NAT64 prefix has interface/link local scope.\n"));
8773 error = EINVAL;
8774 goto out;
8775 }
8776
8777 IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len = prefix_len;
8778 bcopy(prefix, &IN6_IFEXTRA(ifp)->nat64_prefixes[i].ipv6_prefix,
8779 sizeof(struct in6_addr));
8780 clat_log0((LOG_DEBUG,
8781 "NAT64 prefix set to %s with prefixlen: %d\n",
8782 ip6_sprintf(prefix), prefix_len));
8783 one_set = 1;
8784 }
8785
8786out:
8787 if_inet6data_lock_done(ifp);
8788
8789 if (error == 0 && one_set != 0)
8790 necp_update_all_clients();
8791
8792 return (error);
8793}
8794
8795int
8796ifnet_get_nat64prefix(struct ifnet *ifp, struct ipv6_prefix *prefixes)
8797{
8798 int i, found_one = 0, error = 0;
8799
8800 if (ifp == NULL)
8801 return (EINVAL);
8802
8803 if_inet6data_lock_shared(ifp);
8804
8805 if (IN6_IFEXTRA(ifp) == NULL) {
8806 error = ENOMEM;
8807 goto out;
8808 }
8809
8810 for (i = 0; i < NAT64_MAX_NUM_PREFIXES; i++) {
8811 if (IN6_IFEXTRA(ifp)->nat64_prefixes[i].prefix_len != 0)
8812 found_one = 1;
8813 }
8814
8815 if (found_one == 0) {
8816 error = ENOENT;
8817 goto out;
8818 }
8819
8820 if (prefixes)
8821 bcopy(IN6_IFEXTRA(ifp)->nat64_prefixes, prefixes,
8822 sizeof(IN6_IFEXTRA(ifp)->nat64_prefixes));
8823
8824out:
8825 if_inet6data_lock_done(ifp);
8826
8827 return (error);
8828}
8829#endif
8830
8831static void
8832dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff,
8833 protocol_family_t pf)
8834{
8835#pragma unused(ifp)
8836 uint32_t did_sw;
8837
8838 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) ||
8839 (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6)))
8840 return;
8841
8842 switch (pf) {
8843 case PF_INET:
8844 did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags);
8845 if (did_sw & CSUM_DELAY_IP)
8846 hwcksum_dbg_finalized_hdr++;
8847 if (did_sw & CSUM_DELAY_DATA)
8848 hwcksum_dbg_finalized_data++;
8849 break;
8850#if INET6
8851 case PF_INET6:
8852 /*
8853 * Checksum offload should not have been enabled when
8854 * extension headers exist; that also means that we
8855 * cannot force-finalize packets with extension headers.
8856 * Indicate to the callee should it skip such case by
8857 * setting optlen to -1.
8858 */
8859 did_sw = in6_finalize_cksum(m, hoff, -1, -1,
8860 m->m_pkthdr.csum_flags);
8861 if (did_sw & CSUM_DELAY_IPV6_DATA)
8862 hwcksum_dbg_finalized_data++;
8863 break;
8864#endif /* INET6 */
8865 default:
8866 return;
8867 }
8868}
8869
8870static void
8871dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header,
8872 protocol_family_t pf)
8873{
8874 uint16_t sum = 0;
8875 uint32_t hlen;
8876
8877 if (frame_header == NULL ||
8878 frame_header < (char *)mbuf_datastart(m) ||
8879 frame_header > (char *)m->m_data) {
8880 printf("%s: frame header pointer 0x%llx out of range "
8881 "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp),
8882 (uint64_t)VM_KERNEL_ADDRPERM(frame_header),
8883 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
8884 (uint64_t)VM_KERNEL_ADDRPERM(m->m_data),
8885 (uint64_t)VM_KERNEL_ADDRPERM(m));
8886 return;
8887 }
8888 hlen = (m->m_data - frame_header);
8889
8890 switch (pf) {
8891 case PF_INET:
8892#if INET6
8893 case PF_INET6:
8894#endif /* INET6 */
8895 break;
8896 default:
8897 return;
8898 }
8899
8900 /*
8901 * Force partial checksum offload; useful to simulate cases
8902 * where the hardware does not support partial checksum offload,
8903 * in order to validate correctness throughout the layers above.
8904 */
8905 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) {
8906 uint32_t foff = hwcksum_dbg_partial_rxoff_forced;
8907
8908 if (foff > (uint32_t)m->m_pkthdr.len)
8909 return;
8910
8911 m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS;
8912
8913 /* Compute 16-bit 1's complement sum from forced offset */
8914 sum = m_sum16(m, foff, (m->m_pkthdr.len - foff));
8915
8916 m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL);
8917 m->m_pkthdr.csum_rx_val = sum;
8918 m->m_pkthdr.csum_rx_start = (foff + hlen);
8919
8920 hwcksum_dbg_partial_forced++;
8921 hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len;
8922 }
8923
8924 /*
8925 * Partial checksum offload verification (and adjustment);
8926 * useful to validate and test cases where the hardware
8927 * supports partial checksum offload.
8928 */
8929 if ((m->m_pkthdr.csum_flags &
8930 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
8931 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
8932 uint32_t rxoff;
8933
8934 /* Start offset must begin after frame header */
8935 rxoff = m->m_pkthdr.csum_rx_start;
8936 if (hlen > rxoff) {
8937 hwcksum_dbg_bad_rxoff++;
8938 if (dlil_verbose) {
8939 printf("%s: partial cksum start offset %d "
8940 "is less than frame header length %d for "
8941 "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen,
8942 (uint64_t)VM_KERNEL_ADDRPERM(m));
8943 }
8944 return;
8945 }
8946 rxoff -= hlen;
8947
8948 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) {
8949 /*
8950 * Compute the expected 16-bit 1's complement sum;
8951 * skip this if we've already computed it above
8952 * when partial checksum offload is forced.
8953 */
8954 sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff));
8955
8956 /* Hardware or driver is buggy */
8957 if (sum != m->m_pkthdr.csum_rx_val) {
8958 hwcksum_dbg_bad_cksum++;
8959 if (dlil_verbose) {
8960 printf("%s: bad partial cksum value "
8961 "0x%x (expected 0x%x) for mbuf "
8962 "0x%llx [rx_start %d]\n",
8963 if_name(ifp),
8964 m->m_pkthdr.csum_rx_val, sum,
8965 (uint64_t)VM_KERNEL_ADDRPERM(m),
8966 m->m_pkthdr.csum_rx_start);
8967 }
8968 return;
8969 }
8970 }
8971 hwcksum_dbg_verified++;
8972
8973 /*
8974 * This code allows us to emulate various hardwares that
8975 * perform 16-bit 1's complement sum beginning at various
8976 * start offset values.
8977 */
8978 if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) {
8979 uint32_t aoff = hwcksum_dbg_partial_rxoff_adj;
8980
8981 if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len)
8982 return;
8983
8984 sum = m_adj_sum16(m, rxoff, aoff,
8985 m_pktlen(m) - aoff, sum);
8986
8987 m->m_pkthdr.csum_rx_val = sum;
8988 m->m_pkthdr.csum_rx_start = (aoff + hlen);
8989
8990 hwcksum_dbg_adjusted++;
8991 }
8992 }
8993}
8994
8995static int
8996sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS
8997{
8998#pragma unused(arg1, arg2)
8999 u_int32_t i;
9000 int err;
9001
9002 i = hwcksum_dbg_mode;
9003
9004 err = sysctl_handle_int(oidp, &i, 0, req);
9005 if (err != 0 || req->newptr == USER_ADDR_NULL)
9006 return (err);
9007
9008 if (hwcksum_dbg == 0)
9009 return (ENODEV);
9010
9011 if ((i & ~HWCKSUM_DBG_MASK) != 0)
9012 return (EINVAL);
9013
9014 hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK);
9015
9016 return (err);
9017}
9018
9019static int
9020sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS
9021{
9022#pragma unused(arg1, arg2)
9023 u_int32_t i;
9024 int err;
9025
9026 i = hwcksum_dbg_partial_rxoff_forced;
9027
9028 err = sysctl_handle_int(oidp, &i, 0, req);
9029 if (err != 0 || req->newptr == USER_ADDR_NULL)
9030 return (err);
9031
9032 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED))
9033 return (ENODEV);
9034
9035 hwcksum_dbg_partial_rxoff_forced = i;
9036
9037 return (err);
9038}
9039
9040static int
9041sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS
9042{
9043#pragma unused(arg1, arg2)
9044 u_int32_t i;
9045 int err;
9046
9047 i = hwcksum_dbg_partial_rxoff_adj;
9048
9049 err = sysctl_handle_int(oidp, &i, 0, req);
9050 if (err != 0 || req->newptr == USER_ADDR_NULL)
9051 return (err);
9052
9053 if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ))
9054 return (ENODEV);
9055
9056 hwcksum_dbg_partial_rxoff_adj = i;
9057
9058 return (err);
9059}
9060
9061static int
9062sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS
9063{
9064#pragma unused(oidp, arg1, arg2)
9065 int err;
9066
9067 if (req->oldptr == USER_ADDR_NULL) {
9068
9069 }
9070 if (req->newptr != USER_ADDR_NULL) {
9071 return (EPERM);
9072 }
9073 err = SYSCTL_OUT(req, &tx_chain_len_stats,
9074 sizeof(struct chain_len_stats));
9075
9076 return (err);
9077}
9078
9079
9080#if DEBUG || DEVELOPMENT
9081/* Blob for sum16 verification */
9082static uint8_t sumdata[] = {
9083 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03,
9084 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45,
9085 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00,
9086 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71,
9087 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93,
9088 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c,
9089 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93,
9090 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75,
9091 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09,
9092 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc,
9093 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42,
9094 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49,
9095 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90,
9096 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85,
9097 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd,
9098 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad,
9099 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc,
9100 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2,
9101 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c,
9102 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea,
9103 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87,
9104 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54,
9105 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e,
9106 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e,
9107 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf,
9108 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75,
9109 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6,
9110 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab,
9111 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17,
9112 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95,
9113 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9,
9114 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe,
9115 0xc8, 0x28, 0x02, 0x00, 0x00
9116};
9117
9118/* Precomputed 16-bit 1's complement sums for various spans of the above data */
9119static struct {
9120 boolean_t init;
9121 uint16_t len;
9122 uint16_t sumr; /* reference */
9123 uint16_t sumrp; /* reference, precomputed */
9124} sumtbl[] = {
9125 { FALSE, 0, 0, 0x0000 },
9126 { FALSE, 1, 0, 0x001f },
9127 { FALSE, 2, 0, 0x8b1f },
9128 { FALSE, 3, 0, 0x8b27 },
9129 { FALSE, 7, 0, 0x790e },
9130 { FALSE, 11, 0, 0xcb6d },
9131 { FALSE, 20, 0, 0x20dd },
9132 { FALSE, 27, 0, 0xbabd },
9133 { FALSE, 32, 0, 0xf3e8 },
9134 { FALSE, 37, 0, 0x197d },
9135 { FALSE, 43, 0, 0x9eae },
9136 { FALSE, 64, 0, 0x4678 },
9137 { FALSE, 127, 0, 0x9399 },
9138 { FALSE, 256, 0, 0xd147 },
9139 { FALSE, 325, 0, 0x0358 },
9140};
9141#define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0]))
9142
9143static void
9144dlil_verify_sum16(void)
9145{
9146 struct mbuf *m;
9147 uint8_t *buf;
9148 int n;
9149
9150 /* Make sure test data plus extra room for alignment fits in cluster */
9151 _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES);
9152
9153 kprintf("DLIL: running SUM16 self-tests ... ");
9154
9155 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
9156 m_align(m, sizeof(sumdata) + (sizeof (uint64_t) * 2));
9157
9158 buf = mtod(m, uint8_t *); /* base address */
9159
9160 for (n = 0; n < SUMTBL_MAX; n++) {
9161 uint16_t len = sumtbl[n].len;
9162 int i;
9163
9164 /* Verify for all possible alignments */
9165 for (i = 0; i < (int)sizeof (uint64_t); i++) {
9166 uint16_t sum, sumr;
9167 uint8_t *c;
9168
9169 /* Copy over test data to mbuf */
9170 VERIFY(len <= sizeof (sumdata));
9171 c = buf + i;
9172 bcopy(sumdata, c, len);
9173
9174 /* Zero-offset test (align by data pointer) */
9175 m->m_data = (caddr_t)c;
9176 m->m_len = len;
9177 sum = m_sum16(m, 0, len);
9178
9179 if (!sumtbl[n].init) {
9180 sumr = in_cksum_mbuf_ref(m, len, 0, 0);
9181 sumtbl[n].sumr = sumr;
9182 sumtbl[n].init = TRUE;
9183 } else {
9184 sumr = sumtbl[n].sumr;
9185 }
9186
9187 /* Something is horribly broken; stop now */
9188 if (sumr != sumtbl[n].sumrp) {
9189 panic_plain("\n%s: broken in_cksum_mbuf_ref() "
9190 "for len=%d align=%d sum=0x%04x "
9191 "[expected=0x%04x]\n", __func__,
9192 len, i, sum, sumr);
9193 /* NOTREACHED */
9194 } else if (sum != sumr) {
9195 panic_plain("\n%s: broken m_sum16() for len=%d "
9196 "align=%d sum=0x%04x [expected=0x%04x]\n",
9197 __func__, len, i, sum, sumr);
9198 /* NOTREACHED */
9199 }
9200
9201 /* Alignment test by offset (fixed data pointer) */
9202 m->m_data = (caddr_t)buf;
9203 m->m_len = i + len;
9204 sum = m_sum16(m, i, len);
9205
9206 /* Something is horribly broken; stop now */
9207 if (sum != sumr) {
9208 panic_plain("\n%s: broken m_sum16() for len=%d "
9209 "offset=%d sum=0x%04x [expected=0x%04x]\n",
9210 __func__, len, i, sum, sumr);
9211 /* NOTREACHED */
9212 }
9213#if INET
9214 /* Simple sum16 contiguous buffer test by aligment */
9215 sum = b_sum16(c, len);
9216
9217 /* Something is horribly broken; stop now */
9218 if (sum != sumr) {
9219 panic_plain("\n%s: broken b_sum16() for len=%d "
9220 "align=%d sum=0x%04x [expected=0x%04x]\n",
9221 __func__, len, i, sum, sumr);
9222 /* NOTREACHED */
9223 }
9224#endif /* INET */
9225 }
9226 }
9227 m_freem(m);
9228
9229 kprintf("PASSED\n");
9230}
9231#endif /* DEBUG || DEVELOPMENT */
9232
9233#define CASE_STRINGIFY(x) case x: return #x
9234
9235__private_extern__ const char *
9236dlil_kev_dl_code_str(u_int32_t event_code)
9237{
9238 switch (event_code) {
9239 CASE_STRINGIFY(KEV_DL_SIFFLAGS);
9240 CASE_STRINGIFY(KEV_DL_SIFMETRICS);
9241 CASE_STRINGIFY(KEV_DL_SIFMTU);
9242 CASE_STRINGIFY(KEV_DL_SIFPHYS);
9243 CASE_STRINGIFY(KEV_DL_SIFMEDIA);
9244 CASE_STRINGIFY(KEV_DL_SIFGENERIC);
9245 CASE_STRINGIFY(KEV_DL_ADDMULTI);
9246 CASE_STRINGIFY(KEV_DL_DELMULTI);
9247 CASE_STRINGIFY(KEV_DL_IF_ATTACHED);
9248 CASE_STRINGIFY(KEV_DL_IF_DETACHING);
9249 CASE_STRINGIFY(KEV_DL_IF_DETACHED);
9250 CASE_STRINGIFY(KEV_DL_LINK_OFF);
9251 CASE_STRINGIFY(KEV_DL_LINK_ON);
9252 CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED);
9253 CASE_STRINGIFY(KEV_DL_PROTO_DETACHED);
9254 CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED);
9255 CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED);
9256 CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT);
9257 CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED);
9258 CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED);
9259 CASE_STRINGIFY(KEV_DL_NODE_PRESENCE);
9260 CASE_STRINGIFY(KEV_DL_NODE_ABSENCE);
9261 CASE_STRINGIFY(KEV_DL_MASTER_ELECTED);
9262 CASE_STRINGIFY(KEV_DL_ISSUES);
9263 CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED);
9264 default:
9265 break;
9266 }
9267 return ("");
9268}
9269
9270static void
9271dlil_dt_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
9272{
9273#pragma unused(arg1)
9274 struct ifnet *ifp = arg0;
9275
9276 if (ifnet_is_attached(ifp, 1)) {
9277 nstat_ifnet_threshold_reached(ifp->if_index);
9278 ifnet_decr_iorefcnt(ifp);
9279 }
9280}
9281
9282void
9283ifnet_notify_data_threshold(struct ifnet *ifp)
9284{
9285 uint64_t bytes = (ifp->if_ibytes + ifp->if_obytes);
9286 uint64_t oldbytes = ifp->if_dt_bytes;
9287
9288 ASSERT(ifp->if_dt_tcall != NULL);
9289
9290 /*
9291 * If we went over the threshold, notify NetworkStatistics.
9292 * We rate-limit it based on the threshold interval value.
9293 */
9294 if (threshold_notify && (bytes - oldbytes) > ifp->if_data_threshold &&
9295 OSCompareAndSwap64(oldbytes, bytes, &ifp->if_dt_bytes) &&
9296 !thread_call_isactive(ifp->if_dt_tcall)) {
9297 uint64_t tival = (threshold_interval * NSEC_PER_SEC);
9298 uint64_t now = mach_absolute_time(), deadline = now;
9299 uint64_t ival;
9300
9301 if (tival != 0) {
9302 nanoseconds_to_absolutetime(tival, &ival);
9303 clock_deadline_for_periodic_event(ival, now, &deadline);
9304 (void) thread_call_enter_delayed(ifp->if_dt_tcall,
9305 deadline);
9306 } else {
9307 (void) thread_call_enter(ifp->if_dt_tcall);
9308 }
9309 }
9310}
9311
9312#if (DEVELOPMENT || DEBUG)
9313/*
9314 * The sysctl variable name contains the input parameters of
9315 * ifnet_get_keepalive_offload_frames()
9316 * ifp (interface index): name[0]
9317 * frames_array_count: name[1]
9318 * frame_data_offset: name[2]
9319 * The return length gives used_frames_count
9320 */
9321static int
9322sysctl_get_kao_frames SYSCTL_HANDLER_ARGS
9323{
9324#pragma unused(oidp)
9325 int *name = (int *)arg1;
9326 u_int namelen = arg2;
9327 int idx;
9328 ifnet_t ifp = NULL;
9329 u_int32_t frames_array_count;
9330 size_t frame_data_offset;
9331 u_int32_t used_frames_count;
9332 struct ifnet_keepalive_offload_frame *frames_array = NULL;
9333 int error = 0;
9334 u_int32_t i;
9335
9336 /*
9337 * Only root can get look at other people TCP frames
9338 */
9339 error = proc_suser(current_proc());
9340 if (error != 0)
9341 goto done;
9342 /*
9343 * Validate the input parameters
9344 */
9345 if (req->newptr != USER_ADDR_NULL) {
9346 error = EPERM;
9347 goto done;
9348 }
9349 if (namelen != 3) {
9350 error = EINVAL;
9351 goto done;
9352 }
9353 if (req->oldptr == USER_ADDR_NULL) {
9354 error = EINVAL;
9355 goto done;
9356 }
9357 if (req->oldlen == 0) {
9358 error = EINVAL;
9359 goto done;
9360 }
9361 idx = name[0];
9362 frames_array_count = name[1];
9363 frame_data_offset = name[2];
9364
9365 /* Make sure the passed buffer is large enough */
9366 if (frames_array_count * sizeof(struct ifnet_keepalive_offload_frame) >
9367 req->oldlen) {
9368 error = ENOMEM;
9369 goto done;
9370 }
9371
9372 ifnet_head_lock_shared();
9373 if (!IF_INDEX_IN_RANGE(idx)) {
9374 ifnet_head_done();
9375 error = ENOENT;
9376 goto done;
9377 }
9378 ifp = ifindex2ifnet[idx];
9379 ifnet_head_done();
9380
9381 frames_array = _MALLOC(frames_array_count *
9382 sizeof(struct ifnet_keepalive_offload_frame), M_TEMP, M_WAITOK);
9383 if (frames_array == NULL) {
9384 error = ENOMEM;
9385 goto done;
9386 }
9387
9388 error = ifnet_get_keepalive_offload_frames(ifp, frames_array,
9389 frames_array_count, frame_data_offset, &used_frames_count);
9390 if (error != 0) {
9391 printf("%s: ifnet_get_keepalive_offload_frames error %d\n",
9392 __func__, error);
9393 goto done;
9394 }
9395
9396 for (i = 0; i < used_frames_count; i++) {
9397 error = SYSCTL_OUT(req, frames_array + i,
9398 sizeof(struct ifnet_keepalive_offload_frame));
9399 if (error != 0) {
9400 goto done;
9401 }
9402 }
9403done:
9404 if (frames_array != NULL)
9405 _FREE(frames_array, M_TEMP);
9406 return (error);
9407}
9408#endif /* DEVELOPMENT || DEBUG */
9409
9410void
9411ifnet_update_stats_per_flow(struct ifnet_stats_per_flow *ifs,
9412 struct ifnet *ifp)
9413{
9414 tcp_update_stats_per_flow(ifs, ifp);
9415}
9416
9417static void
9418dlil_mit_tcall_fn(thread_call_param_t arg0, thread_call_param_t arg1)
9419{
9420#pragma unused(arg1)
9421 struct ifnet *ifp = (struct ifnet *)arg0;
9422 struct dlil_threading_info *inp = ifp->if_inp;
9423
9424 ifnet_lock_shared(ifp);
9425 if (!IF_FULLY_ATTACHED(ifp) || inp == NULL) {
9426 ifnet_lock_done(ifp);
9427 return;
9428 }
9429
9430 lck_mtx_lock_spin(&inp->input_lck);
9431 inp->input_waiting |= DLIL_INPUT_WAITING;
9432 if (!(inp->input_waiting & DLIL_INPUT_RUNNING) ||
9433 !qempty(&inp->rcvq_pkts)) {
9434 inp->wtot++;
9435 wakeup_one((caddr_t)&inp->input_waiting);
9436 }
9437 lck_mtx_unlock(&inp->input_lck);
9438 ifnet_lock_done(ifp);
9439}
9440