1 | /* |
2 | * Copyright (c) 1999-2024 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | /* |
29 | * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce |
30 | * support for mandatory and extensible security protections. This notice |
31 | * is included in support of clause 2.2 (b) of the Apple Public License, |
32 | * Version 2.0. |
33 | */ |
34 | #include "kpi_interface.h" |
35 | #include <stddef.h> |
36 | #include <ptrauth.h> |
37 | |
38 | #include <sys/param.h> |
39 | #include <sys/systm.h> |
40 | #include <sys/kernel.h> |
41 | #include <sys/malloc.h> |
42 | #include <sys/mbuf.h> |
43 | #include <sys/socket.h> |
44 | #include <sys/domain.h> |
45 | #include <sys/user.h> |
46 | #include <sys/random.h> |
47 | #include <sys/socketvar.h> |
48 | #include <net/if_dl.h> |
49 | #include <net/if.h> |
50 | #include <net/route.h> |
51 | #include <net/if_var.h> |
52 | #include <net/dlil.h> |
53 | #include <net/if_arp.h> |
54 | #include <net/iptap.h> |
55 | #include <net/pktap.h> |
56 | #include <net/nwk_wq.h> |
57 | #include <sys/kern_event.h> |
58 | #include <sys/kdebug.h> |
59 | #include <sys/mcache.h> |
60 | #include <sys/syslog.h> |
61 | #include <sys/protosw.h> |
62 | #include <sys/priv.h> |
63 | |
64 | #include <kern/assert.h> |
65 | #include <kern/task.h> |
66 | #include <kern/thread.h> |
67 | #include <kern/sched_prim.h> |
68 | #include <kern/locks.h> |
69 | #include <kern/zalloc.h> |
70 | |
71 | #include <net/kpi_protocol.h> |
72 | #include <net/if_types.h> |
73 | #include <net/if_ipsec.h> |
74 | #include <net/if_llreach.h> |
75 | #include <net/if_utun.h> |
76 | #include <net/kpi_interfacefilter.h> |
77 | #include <net/classq/classq.h> |
78 | #include <net/classq/classq_sfb.h> |
79 | #include <net/flowhash.h> |
80 | #include <net/ntstat.h> |
81 | #if SKYWALK && defined(XNU_TARGET_OS_OSX) |
82 | #include <skywalk/lib/net_filter_event.h> |
83 | #endif /* SKYWALK && XNU_TARGET_OS_OSX */ |
84 | #include <net/net_api_stats.h> |
85 | #include <net/if_ports_used.h> |
86 | #include <net/if_vlan_var.h> |
87 | #include <netinet/in.h> |
88 | #if INET |
89 | #include <netinet/in_var.h> |
90 | #include <netinet/igmp_var.h> |
91 | #include <netinet/ip_var.h> |
92 | #include <netinet/tcp.h> |
93 | #include <netinet/tcp_var.h> |
94 | #include <netinet/udp.h> |
95 | #include <netinet/udp_var.h> |
96 | #include <netinet/if_ether.h> |
97 | #include <netinet/in_pcb.h> |
98 | #include <netinet/in_tclass.h> |
99 | #include <netinet/ip.h> |
100 | #include <netinet/ip_icmp.h> |
101 | #include <netinet/icmp_var.h> |
102 | #endif /* INET */ |
103 | |
104 | #include <net/nat464_utils.h> |
105 | #include <netinet6/in6_var.h> |
106 | #include <netinet6/nd6.h> |
107 | #include <netinet6/mld6_var.h> |
108 | #include <netinet6/scope6_var.h> |
109 | #include <netinet/ip6.h> |
110 | #include <netinet/icmp6.h> |
111 | #include <net/pf_pbuf.h> |
112 | #include <libkern/OSAtomic.h> |
113 | #include <libkern/tree.h> |
114 | |
115 | #include <dev/random/randomdev.h> |
116 | #include <machine/machine_routines.h> |
117 | |
118 | #include <mach/thread_act.h> |
119 | #include <mach/sdt.h> |
120 | |
121 | #if CONFIG_MACF |
122 | #include <sys/kauth.h> |
123 | #include <security/mac_framework.h> |
124 | #include <net/ethernet.h> |
125 | #include <net/firewire.h> |
126 | #endif |
127 | |
128 | #if PF |
129 | #include <net/pfvar.h> |
130 | #endif /* PF */ |
131 | #include <net/pktsched/pktsched.h> |
132 | #include <net/pktsched/pktsched_netem.h> |
133 | |
134 | #if NECP |
135 | #include <net/necp.h> |
136 | #endif /* NECP */ |
137 | |
138 | #if SKYWALK |
139 | #include <skywalk/packet/packet_queue.h> |
140 | #include <skywalk/nexus/netif/nx_netif.h> |
141 | #include <skywalk/nexus/flowswitch/nx_flowswitch.h> |
142 | #endif /* SKYWALK */ |
143 | |
144 | #include <net/sockaddr_utils.h> |
145 | |
146 | #include <os/log.h> |
147 | |
148 | #define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0) |
149 | #define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2) |
150 | #define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8)) |
151 | #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8)) |
152 | #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8)) |
153 | |
154 | #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */ |
155 | #define MAX_LINKADDR 4 /* LONGWORDS */ |
156 | |
157 | #if 1 |
158 | #define DLIL_PRINTF printf |
159 | #else |
160 | #define DLIL_PRINTF kprintf |
161 | #endif |
162 | |
163 | #define IF_DATA_REQUIRE_ALIGNED_64(f) \ |
164 | _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t))) |
165 | |
166 | #define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \ |
167 | _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t))) |
168 | |
169 | enum { |
170 | kProtoKPI_v1 = 1, |
171 | kProtoKPI_v2 = 2 |
172 | }; |
173 | |
174 | uint64_t if_creation_generation_count = 0; |
175 | |
176 | /* |
177 | * List of if_proto structures in if_proto_hash[] is protected by |
178 | * the ifnet lock. The rest of the fields are initialized at protocol |
179 | * attach time and never change, thus no lock required as long as |
180 | * a reference to it is valid, via if_proto_ref(). |
181 | */ |
182 | struct if_proto { |
183 | SLIST_ENTRY(if_proto) next_hash; |
184 | u_int32_t refcount; |
185 | u_int32_t detached; |
186 | struct ifnet *ifp; |
187 | protocol_family_t protocol_family; |
188 | int proto_kpi; |
189 | union { |
190 | struct { |
191 | proto_media_input input; |
192 | proto_media_preout pre_output; |
193 | proto_media_event event; |
194 | proto_media_ioctl ioctl; |
195 | proto_media_detached detached; |
196 | proto_media_resolve_multi resolve_multi; |
197 | proto_media_send_arp send_arp; |
198 | } v1; |
199 | struct { |
200 | proto_media_input_v2 input; |
201 | proto_media_preout pre_output; |
202 | proto_media_event event; |
203 | proto_media_ioctl ioctl; |
204 | proto_media_detached detached; |
205 | proto_media_resolve_multi resolve_multi; |
206 | proto_media_send_arp send_arp; |
207 | } v2; |
208 | } kpi; |
209 | }; |
210 | |
211 | SLIST_HEAD(proto_hash_entry, if_proto); |
212 | |
213 | #define DLIL_SDLDATALEN \ |
214 | (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0])) |
215 | |
216 | /* |
217 | * In the common case, the LL address is stored in the |
218 | * `dl_if_lladdr' member of the `dlil_ifnet'. This is sufficient |
219 | * for LL addresses that do not exceed the `DLIL_SDLMAXLEN' constant. |
220 | */ |
221 | struct dl_if_lladdr_std { |
222 | struct ifaddr ifa; |
223 | u_int8_t addr_sdl_bytes[DLIL_SDLMAXLEN]; |
224 | u_int8_t mask_sdl_bytes[DLIL_SDLMAXLEN]; |
225 | }; |
226 | |
227 | /* |
228 | * However, in some rare cases we encounter LL addresses which |
229 | * would not fit in the `DLIL_SDLMAXLEN' limitation. In such cases |
230 | * we allocate the storage in the permanent arena, using this memory layout. |
231 | */ |
232 | struct dl_if_lladdr_xtra_space { |
233 | struct ifaddr ifa; |
234 | u_int8_t addr_sdl_bytes[SOCK_MAXADDRLEN]; |
235 | u_int8_t mask_sdl_bytes[SOCK_MAXADDRLEN]; |
236 | }; |
237 | |
238 | struct dlil_ifnet { |
239 | struct ifnet dl_if; /* public ifnet */ |
240 | /* |
241 | * DLIL private fields, protected by dl_if_lock |
242 | */ |
243 | decl_lck_mtx_data(, dl_if_lock); |
244 | TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */ |
245 | u_int32_t dl_if_flags; /* flags (below) */ |
246 | u_int32_t dl_if_refcnt; /* refcnt */ |
247 | void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */ |
248 | void *dl_if_uniqueid; /* unique interface id */ |
249 | size_t dl_if_uniqueid_len; /* length of the unique id */ |
250 | char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */ |
251 | char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */ |
252 | struct dl_if_lladdr_std dl_if_lladdr; /* link-level address storage*/ |
253 | u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */ |
254 | u_int8_t dl_if_permanent_ether[ETHER_ADDR_LEN]; /* permanent address */ |
255 | u_int8_t dl_if_permanent_ether_is_set; |
256 | u_int8_t dl_if_unused; |
257 | struct dlil_threading_info dl_if_inpstorage; /* input thread storage */ |
258 | ctrace_t dl_if_attach; /* attach PC stacktrace */ |
259 | ctrace_t dl_if_detach; /* detach PC stacktrace */ |
260 | }; |
261 | |
262 | /* Values for dl_if_flags (private to DLIL) */ |
263 | #define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */ |
264 | #define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */ |
265 | #define DLIF_DEBUG 0x4 /* has debugging info */ |
266 | |
267 | #define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */ |
268 | |
269 | /* For gdb */ |
270 | __private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE; |
271 | |
272 | struct dlil_ifnet_dbg { |
273 | struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */ |
274 | u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */ |
275 | u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */ |
276 | /* |
277 | * Circular lists of ifnet_{reference,release} callers. |
278 | */ |
279 | ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE]; |
280 | ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE]; |
281 | }; |
282 | |
283 | #define DLIL_TO_IFP(s) (&s->dl_if) |
284 | #define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s) |
285 | |
286 | struct ifnet_filter { |
287 | TAILQ_ENTRY(ifnet_filter) filt_next; |
288 | u_int32_t filt_skip; |
289 | u_int32_t filt_flags; |
290 | ifnet_t filt_ifp; |
291 | const char *filt_name; |
292 | void *filt_cookie; |
293 | protocol_family_t filt_protocol; |
294 | iff_input_func filt_input; |
295 | iff_output_func filt_output; |
296 | iff_event_func filt_event; |
297 | iff_ioctl_func filt_ioctl; |
298 | iff_detached_func filt_detached; |
299 | }; |
300 | |
301 | /* Mbuf queue used for freeing the excessive mbufs */ |
302 | typedef MBUFQ_HEAD(dlil_freeq) dlil_freeq_t; |
303 | |
304 | struct proto_input_entry; |
305 | |
306 | static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head; |
307 | |
308 | static LCK_ATTR_DECLARE(dlil_lck_attributes, 0, 0); |
309 | |
310 | static LCK_GRP_DECLARE(dlil_lock_group, "DLIL internal locks" ); |
311 | LCK_GRP_DECLARE(ifnet_lock_group, "ifnet locks" ); |
312 | static LCK_GRP_DECLARE(ifnet_head_lock_group, "ifnet head lock" ); |
313 | static LCK_GRP_DECLARE(ifnet_snd_lock_group, "ifnet snd locks" ); |
314 | static LCK_GRP_DECLARE(ifnet_rcv_lock_group, "ifnet rcv locks" ); |
315 | |
316 | LCK_ATTR_DECLARE(ifnet_lock_attr, 0, 0); |
317 | static LCK_RW_DECLARE_ATTR(ifnet_head_lock, &ifnet_head_lock_group, |
318 | &dlil_lck_attributes); |
319 | static LCK_MTX_DECLARE_ATTR(dlil_ifnet_lock, &dlil_lock_group, |
320 | &dlil_lck_attributes); |
321 | |
322 | #if DEBUG |
323 | static unsigned int ifnet_debug = 1; /* debugging (enabled) */ |
324 | #else |
325 | static unsigned int ifnet_debug; /* debugging (disabled) */ |
326 | #endif /* !DEBUG */ |
327 | static unsigned int dlif_size; /* size of dlil_ifnet to allocate */ |
328 | static unsigned int dlif_bufsize; /* size of dlif_size + headroom */ |
329 | static struct zone *dlif_zone; /* zone for dlil_ifnet */ |
330 | #define DLIF_ZONE_NAME "ifnet" /* zone name */ |
331 | |
332 | static KALLOC_TYPE_DEFINE(dlif_filt_zone, struct ifnet_filter, NET_KT_DEFAULT); |
333 | |
334 | static KALLOC_TYPE_DEFINE(dlif_proto_zone, struct if_proto, NET_KT_DEFAULT); |
335 | |
336 | static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */ |
337 | static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */ |
338 | static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */ |
339 | #define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */ |
340 | |
341 | static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */ |
342 | static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */ |
343 | static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */ |
344 | #define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */ |
345 | |
346 | static u_int32_t net_rtref; |
347 | |
348 | static struct dlil_main_threading_info dlil_main_input_thread_info; |
349 | __private_extern__ struct dlil_threading_info *dlil_main_input_thread = |
350 | (struct dlil_threading_info *)&dlil_main_input_thread_info; |
351 | |
352 | static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg, bool update_generation); |
353 | static int dlil_detach_filter_internal(interface_filter_t filter, int detached); |
354 | static void dlil_if_trace(struct dlil_ifnet *, int); |
355 | static void if_proto_ref(struct if_proto *); |
356 | static void if_proto_free(struct if_proto *); |
357 | static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t); |
358 | static u_int32_t dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list, |
359 | u_int32_t list_count); |
360 | static void _dlil_if_release(ifnet_t ifp, bool clear_in_use); |
361 | static void if_flt_monitor_busy(struct ifnet *); |
362 | static void if_flt_monitor_unbusy(struct ifnet *); |
363 | static void if_flt_monitor_enter(struct ifnet *); |
364 | static void if_flt_monitor_leave(struct ifnet *); |
365 | static int dlil_interface_filters_input(struct ifnet *, struct mbuf **, |
366 | char **, protocol_family_t); |
367 | static int dlil_interface_filters_output(struct ifnet *, struct mbuf **, |
368 | protocol_family_t); |
369 | static struct ifaddr *dlil_alloc_lladdr(struct ifnet *, |
370 | const struct sockaddr_dl *); |
371 | static int ifnet_lookup(struct ifnet *); |
372 | static void if_purgeaddrs(struct ifnet *); |
373 | |
374 | static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t, |
375 | struct mbuf *, char *); |
376 | static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t, |
377 | struct mbuf *); |
378 | static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t, |
379 | mbuf_t *, const struct sockaddr *, void *, char *, char *); |
380 | static void ifproto_media_event(struct ifnet *, protocol_family_t, |
381 | const struct kev_msg *); |
382 | static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t, |
383 | unsigned long, void *); |
384 | static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *, |
385 | struct sockaddr_dl *, size_t); |
386 | static errno_t ifproto_media_send_arp(struct ifnet *, u_short, |
387 | const struct sockaddr_dl *, const struct sockaddr *, |
388 | const struct sockaddr_dl *, const struct sockaddr *); |
389 | |
390 | static errno_t ifp_if_input(struct ifnet *ifp, struct mbuf *m_head, |
391 | struct mbuf *m_tail, const struct ifnet_stat_increment_param *s, |
392 | boolean_t poll, struct thread *tp); |
393 | static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t, |
394 | struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *); |
395 | static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *); |
396 | static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *, |
397 | protocol_family_t *); |
398 | static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t, |
399 | const struct ifnet_demux_desc *, u_int32_t); |
400 | static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t); |
401 | static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *); |
402 | #if !XNU_TARGET_OS_OSX |
403 | static errno_t ifp_if_framer(struct ifnet *, struct mbuf **, |
404 | const struct sockaddr *, const char *, const char *, |
405 | u_int32_t *, u_int32_t *); |
406 | #else /* XNU_TARGET_OS_OSX */ |
407 | static errno_t ifp_if_framer(struct ifnet *, struct mbuf **, |
408 | const struct sockaddr *, const char *, const char *); |
409 | #endif /* XNU_TARGET_OS_OSX */ |
410 | static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **, |
411 | const struct sockaddr *, const char *, const char *, |
412 | u_int32_t *, u_int32_t *); |
413 | static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func); |
414 | static void ifp_if_free(struct ifnet *); |
415 | static void ifp_if_event(struct ifnet *, const struct kev_msg *); |
416 | static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *); |
417 | static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *); |
418 | |
419 | static uint32_t dlil_trim_overcomitted_queue_locked(class_queue_t *, |
420 | dlil_freeq_t *, struct ifnet_stat_increment_param *); |
421 | |
422 | static errno_t dlil_input_async(struct dlil_threading_info *, struct ifnet *, |
423 | struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *, |
424 | boolean_t, struct thread *); |
425 | static errno_t dlil_input_sync(struct dlil_threading_info *, struct ifnet *, |
426 | struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *, |
427 | boolean_t, struct thread *); |
428 | |
429 | static void dlil_main_input_thread_func(void *, wait_result_t); |
430 | static void dlil_main_input_thread_cont(void *, wait_result_t); |
431 | |
432 | static void dlil_input_thread_func(void *, wait_result_t); |
433 | static void dlil_input_thread_cont(void *, wait_result_t); |
434 | |
435 | static void dlil_rxpoll_input_thread_func(void *, wait_result_t); |
436 | static void dlil_rxpoll_input_thread_cont(void *, wait_result_t); |
437 | |
438 | static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *, |
439 | thread_continue_t *); |
440 | static void dlil_terminate_input_thread(struct dlil_threading_info *); |
441 | static void dlil_input_stats_add(const struct ifnet_stat_increment_param *, |
442 | struct dlil_threading_info *, struct ifnet *, boolean_t); |
443 | static boolean_t dlil_input_stats_sync(struct ifnet *, |
444 | struct dlil_threading_info *); |
445 | static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *, |
446 | u_int32_t, ifnet_model_t, boolean_t); |
447 | static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *, |
448 | const struct ifnet_stat_increment_param *, boolean_t, boolean_t); |
449 | static int dlil_is_clat_needed(protocol_family_t, mbuf_t ); |
450 | static errno_t dlil_clat46(ifnet_t, protocol_family_t *, mbuf_t *); |
451 | static errno_t dlil_clat64(ifnet_t, protocol_family_t *, mbuf_t *); |
452 | #if DEBUG || DEVELOPMENT |
453 | static void dlil_verify_sum16(void); |
454 | #endif /* DEBUG || DEVELOPMENT */ |
455 | static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t, |
456 | protocol_family_t); |
457 | static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *, |
458 | protocol_family_t); |
459 | |
460 | static void dlil_incr_pending_thread_count(void); |
461 | static void dlil_decr_pending_thread_count(void); |
462 | |
463 | static void ifnet_detacher_thread_func(void *, wait_result_t); |
464 | static void ifnet_detacher_thread_cont(void *, wait_result_t); |
465 | static void ifnet_detach_final(struct ifnet *); |
466 | static void ifnet_detaching_enqueue(struct ifnet *); |
467 | static struct ifnet *ifnet_detaching_dequeue(void); |
468 | |
469 | static void ifnet_start_thread_func(void *, wait_result_t); |
470 | static void ifnet_start_thread_cont(void *, wait_result_t); |
471 | |
472 | static void ifnet_poll_thread_func(void *, wait_result_t); |
473 | static void ifnet_poll_thread_cont(void *, wait_result_t); |
474 | |
475 | static errno_t ifnet_enqueue_common(struct ifnet *, struct ifclassq *, |
476 | classq_pkt_t *, boolean_t, boolean_t *); |
477 | |
478 | static void ifp_src_route_copyout(struct ifnet *, struct route *); |
479 | static void ifp_src_route_copyin(struct ifnet *, struct route *); |
480 | static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *); |
481 | static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *); |
482 | |
483 | static errno_t if_mcasts_update_async(struct ifnet *); |
484 | |
485 | static int sysctl_rxpoll SYSCTL_HANDLER_ARGS; |
486 | static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS; |
487 | static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS; |
488 | static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS; |
489 | static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS; |
490 | static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS; |
491 | static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS; |
492 | static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS; |
493 | static int sysctl_rcvq_burst_limit SYSCTL_HANDLER_ARGS; |
494 | static int sysctl_rcvq_trim_pct SYSCTL_HANDLER_ARGS; |
495 | static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS; |
496 | static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS; |
497 | static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS; |
498 | |
499 | struct chain_len_stats tx_chain_len_stats; |
500 | static int sysctl_tx_chain_len_stats SYSCTL_HANDLER_ARGS; |
501 | |
502 | #if TEST_INPUT_THREAD_TERMINATION |
503 | static int sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS; |
504 | #endif /* TEST_INPUT_THREAD_TERMINATION */ |
505 | |
506 | /* The following are protected by dlil_ifnet_lock */ |
507 | static TAILQ_HEAD(, ifnet) ifnet_detaching_head; |
508 | static u_int32_t ifnet_detaching_cnt; |
509 | static boolean_t ifnet_detaching_embryonic; |
510 | static void *ifnet_delayed_run; /* wait channel for detaching thread */ |
511 | |
512 | static LCK_MTX_DECLARE_ATTR(ifnet_fc_lock, &dlil_lock_group, |
513 | &dlil_lck_attributes); |
514 | |
515 | static uint32_t ifnet_flowhash_seed; |
516 | |
517 | struct ifnet_flowhash_key { |
518 | char ifk_name[IFNAMSIZ]; |
519 | uint32_t ifk_unit; |
520 | uint32_t ifk_flags; |
521 | uint32_t ifk_eflags; |
522 | uint32_t ifk_capabilities; |
523 | uint32_t ifk_capenable; |
524 | uint32_t ifk_output_sched_model; |
525 | uint32_t ifk_rand1; |
526 | uint32_t ifk_rand2; |
527 | }; |
528 | |
529 | /* Flow control entry per interface */ |
530 | struct ifnet_fc_entry { |
531 | RB_ENTRY(ifnet_fc_entry) ifce_entry; |
532 | u_int32_t ifce_flowhash; |
533 | struct ifnet *ifce_ifp; |
534 | }; |
535 | |
536 | static uint32_t ifnet_calc_flowhash(struct ifnet *); |
537 | static int ifce_cmp(const struct ifnet_fc_entry *, |
538 | const struct ifnet_fc_entry *); |
539 | static int ifnet_fc_add(struct ifnet *); |
540 | static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t); |
541 | static void ifnet_fc_entry_free(struct ifnet_fc_entry *); |
542 | |
543 | /* protected by ifnet_fc_lock */ |
544 | RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree; |
545 | RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp); |
546 | RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp); |
547 | |
548 | static KALLOC_TYPE_DEFINE(ifnet_fc_zone, struct ifnet_fc_entry, NET_KT_DEFAULT); |
549 | |
550 | extern void bpfdetach(struct ifnet *); |
551 | extern void proto_input_run(void); |
552 | |
553 | extern uint32_t udp_count_opportunistic(unsigned int ifindex, |
554 | u_int32_t flags); |
555 | extern uint32_t tcp_count_opportunistic(unsigned int ifindex, |
556 | u_int32_t flags); |
557 | |
558 | __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *); |
559 | |
560 | #if CONFIG_MACF |
561 | #if !XNU_TARGET_OS_OSX |
562 | int dlil_lladdr_ckreq = 1; |
563 | #else /* XNU_TARGET_OS_OSX */ |
564 | int dlil_lladdr_ckreq = 0; |
565 | #endif /* XNU_TARGET_OS_OSX */ |
566 | #endif /* CONFIG_MACF */ |
567 | |
568 | #if DEBUG |
569 | int dlil_verbose = 1; |
570 | #else |
571 | int dlil_verbose = 0; |
572 | #endif /* DEBUG */ |
573 | #if IFNET_INPUT_SANITY_CHK |
574 | /* sanity checking of input packet lists received */ |
575 | static u_int32_t dlil_input_sanity_check = 0; |
576 | #endif /* IFNET_INPUT_SANITY_CHK */ |
577 | /* rate limit debug messages */ |
578 | struct timespec dlil_dbgrate = { .tv_sec = 1, .tv_nsec = 0 }; |
579 | |
580 | SYSCTL_DECL(_net_link_generic_system); |
581 | |
582 | SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose, |
583 | CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages" ); |
584 | |
585 | #define IF_SNDQ_MINLEN 32 |
586 | u_int32_t if_sndq_maxlen = IFQ_MAXLEN; |
587 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen, |
588 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN, |
589 | sysctl_sndq_maxlen, "I" , "Default transmit queue max length" ); |
590 | |
591 | #define IF_RCVQ_MINLEN 32 |
592 | #define IF_RCVQ_MAXLEN 256 |
593 | u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN; |
594 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen, |
595 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN, |
596 | sysctl_rcvq_maxlen, "I" , "Default receive queue max length" ); |
597 | |
598 | /* |
599 | * Protect against possible memory starvation that may happen |
600 | * when the driver is pushing data faster than the AP can process. |
601 | * |
602 | * If at any point during DLIL input phase any of the input queues |
603 | * exceeds the burst limit, DLIL will start to trim the queue, |
604 | * by returning mbufs in the input queue to the cache from which |
605 | * the mbufs were originally allocated, starting from the oldest |
606 | * mbuf and continuing until the new limit (see below) is reached. |
607 | * |
608 | * In order to avoid a steplocked equilibrium, the trimming |
609 | * will continue PAST the burst limit, until the corresponding |
610 | * input queue is reduced to `if_rcvq_trim_pct' %. |
611 | * |
612 | * For example, if the input queue limit is 1024 packets, |
613 | * and the trim percentage (`if_rcvq_trim_pct') is 80 %, |
614 | * the trimming will continue until the queue contains 819 packets |
615 | * (1024 * 80 / 100 == 819). |
616 | * |
617 | * Setting the burst limit too low can hurt the throughput, |
618 | * while setting the burst limit too high can defeat the purpose. |
619 | */ |
620 | #define IF_RCVQ_BURST_LIMIT_MIN 1024 |
621 | #define IF_RCVQ_BURST_LIMIT_DEFAULT 8192 |
622 | #define IF_RCVQ_BURST_LIMIT_MAX 32768 |
623 | uint32_t if_rcvq_burst_limit = IF_RCVQ_BURST_LIMIT_DEFAULT; |
624 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_burst_limit, |
625 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_burst_limit, IF_RCVQ_BURST_LIMIT_DEFAULT, |
626 | sysctl_rcvq_burst_limit, "I" , "Upper memory limit for inbound data" ); |
627 | |
628 | #define IF_RCVQ_TRIM_PCT_MIN 20 |
629 | #define IF_RCVQ_TRIM_PCT_DEFAULT 80 |
630 | #define IF_RCVQ_TRIM_PCT_MAX 100 |
631 | uint32_t if_rcvq_trim_pct = IF_RCVQ_TRIM_PCT_DEFAULT; |
632 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_trim_pct, |
633 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_trim_pct, IF_RCVQ_TRIM_PCT_DEFAULT, |
634 | sysctl_rcvq_trim_pct, "I" , |
635 | "Percentage (0 - 100) of the queue limit to keep after detecting an overflow burst" ); |
636 | |
637 | #define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */ |
638 | u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY; |
639 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay, |
640 | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY, |
641 | "ilog2 of EWMA decay rate of avg inbound packets" ); |
642 | |
643 | #define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */ |
644 | #define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */ |
645 | static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME; |
646 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time, |
647 | CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime, |
648 | IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime, |
649 | "Q" , "input poll mode freeze time" ); |
650 | |
651 | #define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */ |
652 | #define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */ |
653 | static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME; |
654 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time, |
655 | CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime, |
656 | IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime, |
657 | "Q" , "input poll sampling time" ); |
658 | |
659 | static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME; |
660 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time, |
661 | CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time, |
662 | IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time, |
663 | "Q" , "input poll interval (time)" ); |
664 | |
665 | #define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */ |
666 | u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS; |
667 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts, |
668 | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts, |
669 | IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)" ); |
670 | |
671 | #define IF_RXPOLL_WLOWAT 10 |
672 | static u_int32_t if_sysctl_rxpoll_wlowat = IF_RXPOLL_WLOWAT; |
673 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat, |
674 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_wlowat, |
675 | IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat, |
676 | "I" , "input poll wakeup low watermark" ); |
677 | |
678 | #define IF_RXPOLL_WHIWAT 100 |
679 | static u_int32_t if_sysctl_rxpoll_whiwat = IF_RXPOLL_WHIWAT; |
680 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat, |
681 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sysctl_rxpoll_whiwat, |
682 | IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat, |
683 | "I" , "input poll wakeup high watermark" ); |
684 | |
685 | static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */ |
686 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max, |
687 | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0, |
688 | "max packets per poll call" ); |
689 | |
690 | u_int32_t if_rxpoll = 1; |
691 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll, |
692 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0, |
693 | sysctl_rxpoll, "I" , "enable opportunistic input polling" ); |
694 | |
695 | #if TEST_INPUT_THREAD_TERMINATION |
696 | static u_int32_t if_input_thread_termination_spin = 0; |
697 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, input_thread_termination_spin, |
698 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, |
699 | &if_input_thread_termination_spin, 0, |
700 | sysctl_input_thread_termination_spin, |
701 | "I" , "input thread termination spin limit" ); |
702 | #endif /* TEST_INPUT_THREAD_TERMINATION */ |
703 | |
704 | static u_int32_t cur_dlil_input_threads = 0; |
705 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads, |
706 | CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads, 0, |
707 | "Current number of DLIL input threads" ); |
708 | |
709 | #if IFNET_INPUT_SANITY_CHK |
710 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check, |
711 | CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check, 0, |
712 | "Turn on sanity checking in DLIL input" ); |
713 | #endif /* IFNET_INPUT_SANITY_CHK */ |
714 | |
715 | static u_int32_t if_flowadv = 1; |
716 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory, |
717 | CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1, |
718 | "enable flow-advisory mechanism" ); |
719 | |
720 | static u_int32_t if_delaybased_queue = 1; |
721 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, delaybased_queue, |
722 | CTLFLAG_RW | CTLFLAG_LOCKED, &if_delaybased_queue, 1, |
723 | "enable delay based dynamic queue sizing" ); |
724 | |
725 | static uint64_t hwcksum_in_invalidated = 0; |
726 | SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, |
727 | hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED, |
728 | &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum" ); |
729 | |
730 | uint32_t hwcksum_dbg = 0; |
731 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg, |
732 | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0, |
733 | "enable hardware cksum debugging" ); |
734 | |
735 | u_int32_t ifnet_start_delayed = 0; |
736 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delayed, |
737 | CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_start_delayed, 0, |
738 | "number of times start was delayed" ); |
739 | |
740 | u_int32_t ifnet_delay_start_disabled = 0; |
741 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, start_delay_disabled, |
742 | CTLFLAG_RW | CTLFLAG_LOCKED, &ifnet_delay_start_disabled, 0, |
743 | "number of times start was delayed" ); |
744 | |
745 | static inline void |
746 | ifnet_delay_start_disabled_increment(void) |
747 | { |
748 | OSIncrementAtomic(&ifnet_delay_start_disabled); |
749 | } |
750 | |
751 | #define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */ |
752 | #define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */ |
753 | #define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */ |
754 | #define HWCKSUM_DBG_MASK \ |
755 | (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \ |
756 | HWCKSUM_DBG_FINALIZE_FORCED) |
757 | |
758 | static uint32_t hwcksum_dbg_mode = 0; |
759 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode, |
760 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode, |
761 | 0, sysctl_hwcksum_dbg_mode, "I" , "hardware cksum debugging mode" ); |
762 | |
763 | static uint64_t hwcksum_dbg_partial_forced = 0; |
764 | SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, |
765 | hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED, |
766 | &hwcksum_dbg_partial_forced, "packets forced using partial cksum" ); |
767 | |
768 | static uint64_t hwcksum_dbg_partial_forced_bytes = 0; |
769 | SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, |
770 | hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, |
771 | &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum" ); |
772 | |
773 | static uint32_t hwcksum_dbg_partial_rxoff_forced = 0; |
774 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, |
775 | hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, |
776 | &hwcksum_dbg_partial_rxoff_forced, 0, |
777 | sysctl_hwcksum_dbg_partial_rxoff_forced, "I" , |
778 | "forced partial cksum rx offset" ); |
779 | |
780 | static uint32_t hwcksum_dbg_partial_rxoff_adj = 0; |
781 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj, |
782 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj, |
783 | 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I" , |
784 | "adjusted partial cksum rx offset" ); |
785 | |
786 | static uint64_t hwcksum_dbg_verified = 0; |
787 | SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, |
788 | hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED, |
789 | &hwcksum_dbg_verified, "packets verified for having good checksum" ); |
790 | |
791 | static uint64_t hwcksum_dbg_bad_cksum = 0; |
792 | SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, |
793 | hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED, |
794 | &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum" ); |
795 | |
796 | static uint64_t hwcksum_dbg_bad_rxoff = 0; |
797 | SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, |
798 | hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED, |
799 | &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff" ); |
800 | |
801 | static uint64_t hwcksum_dbg_adjusted = 0; |
802 | SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, |
803 | hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED, |
804 | &hwcksum_dbg_adjusted, "packets with rxoff adjusted" ); |
805 | |
806 | static uint64_t hwcksum_dbg_finalized_hdr = 0; |
807 | SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, |
808 | hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED, |
809 | &hwcksum_dbg_finalized_hdr, "finalized headers" ); |
810 | |
811 | static uint64_t hwcksum_dbg_finalized_data = 0; |
812 | SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, |
813 | hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED, |
814 | &hwcksum_dbg_finalized_data, "finalized payloads" ); |
815 | |
816 | uint32_t hwcksum_tx = 1; |
817 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx, |
818 | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0, |
819 | "enable transmit hardware checksum offload" ); |
820 | |
821 | uint32_t hwcksum_rx = 1; |
822 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx, |
823 | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0, |
824 | "enable receive hardware checksum offload" ); |
825 | |
826 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, tx_chain_len_stats, |
827 | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 9, |
828 | sysctl_tx_chain_len_stats, "S" , "" ); |
829 | |
830 | uint32_t tx_chain_len_count = 0; |
831 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, tx_chain_len_count, |
832 | CTLFLAG_RW | CTLFLAG_LOCKED, &tx_chain_len_count, 0, "" ); |
833 | |
834 | static uint32_t threshold_notify = 1; /* enable/disable */ |
835 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_notify, |
836 | CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_notify, 0, "" ); |
837 | |
838 | static uint32_t threshold_interval = 2; /* in seconds */ |
839 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, threshold_interval, |
840 | CTLFLAG_RW | CTLFLAG_LOCKED, &threshold_interval, 0, "" ); |
841 | |
842 | #if (DEVELOPMENT || DEBUG) |
843 | static int sysctl_get_kao_frames SYSCTL_HANDLER_ARGS; |
844 | SYSCTL_NODE(_net_link_generic_system, OID_AUTO, get_kao_frames, |
845 | CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_get_kao_frames, "" ); |
846 | #endif /* DEVELOPMENT || DEBUG */ |
847 | |
848 | struct net_api_stats net_api_stats; |
849 | SYSCTL_STRUCT(_net, OID_AUTO, api_stats, CTLFLAG_RD | CTLFLAG_LOCKED, |
850 | &net_api_stats, net_api_stats, "" ); |
851 | |
852 | uint32_t net_wake_pkt_debug = 0; |
853 | SYSCTL_UINT(_net_link_generic_system, OID_AUTO, wake_pkt_debug, |
854 | CTLFLAG_RW | CTLFLAG_LOCKED, &net_wake_pkt_debug, 0, "" ); |
855 | |
856 | static void log_hexdump(void *data, size_t len); |
857 | |
858 | unsigned int net_rxpoll = 1; |
859 | unsigned int net_affinity = 1; |
860 | unsigned int net_async = 1; /* 0: synchronous, 1: asynchronous */ |
861 | |
862 | static kern_return_t dlil_affinity_set(struct thread *, u_int32_t); |
863 | |
864 | extern u_int32_t inject_buckets; |
865 | |
866 | /* DLIL data threshold thread call */ |
867 | static void dlil_dt_tcall_fn(thread_call_param_t, thread_call_param_t); |
868 | |
869 | void |
870 | ifnet_filter_update_tso(struct ifnet *ifp, boolean_t filter_enable) |
871 | { |
872 | /* |
873 | * update filter count and route_generation ID to let TCP |
874 | * know it should reevalute doing TSO or not |
875 | */ |
876 | if (filter_enable) { |
877 | OSAddAtomic(1, &ifp->if_flt_no_tso_count); |
878 | } else { |
879 | VERIFY(ifp->if_flt_no_tso_count != 0); |
880 | OSAddAtomic(-1, &ifp->if_flt_no_tso_count); |
881 | } |
882 | routegenid_update(); |
883 | } |
884 | |
885 | #if SKYWALK |
886 | |
887 | #if defined(XNU_TARGET_OS_OSX) |
888 | static bool net_check_compatible_if_filter(struct ifnet *ifp); |
889 | #endif /* XNU_TARGET_OS_OSX */ |
890 | |
891 | /* if_attach_nx flags defined in os_skywalk_private.h */ |
892 | static unsigned int if_attach_nx = IF_ATTACH_NX_DEFAULT; |
893 | unsigned int if_enable_fsw_ip_netagent = |
894 | ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0); |
895 | unsigned int if_enable_fsw_transport_netagent = |
896 | ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0); |
897 | |
898 | unsigned int if_netif_all = |
899 | ((IF_ATTACH_NX_DEFAULT & IF_ATTACH_NX_NETIF_ALL) != 0); |
900 | |
901 | /* Configure flowswitch to use max mtu sized buffer */ |
902 | static bool fsw_use_max_mtu_buffer = false; |
903 | |
904 | #if (DEVELOPMENT || DEBUG) |
905 | static int |
906 | if_attach_nx_sysctl SYSCTL_HANDLER_ARGS |
907 | { |
908 | #pragma unused(oidp, arg1, arg2) |
909 | unsigned int new_value; |
910 | int changed; |
911 | int error = sysctl_io_number(req, if_attach_nx, sizeof(if_attach_nx), |
912 | &new_value, &changed); |
913 | if (error) { |
914 | return error; |
915 | } |
916 | if (changed) { |
917 | if ((new_value & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != |
918 | (if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT)) { |
919 | return ENOTSUP; |
920 | } |
921 | if_attach_nx = new_value; |
922 | } |
923 | return 0; |
924 | } |
925 | |
926 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, if_attach_nx, |
927 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, |
928 | 0, 0, &if_attach_nx_sysctl, "IU" , "attach nexus" ); |
929 | |
930 | #endif /* DEVELOPMENT || DEBUG */ |
931 | |
932 | static int |
933 | if_enable_fsw_transport_netagent_sysctl SYSCTL_HANDLER_ARGS |
934 | { |
935 | #pragma unused(oidp, arg1, arg2) |
936 | unsigned int new_value; |
937 | int changed; |
938 | int error; |
939 | |
940 | error = sysctl_io_number(req, bigValue: if_enable_fsw_transport_netagent, |
941 | valueSize: sizeof(if_enable_fsw_transport_netagent), |
942 | pValue: &new_value, changed: &changed); |
943 | if (error == 0 && changed != 0) { |
944 | if (new_value != 0 && new_value != 1) { |
945 | /* only allow 0 or 1 */ |
946 | error = EINVAL; |
947 | } else if ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) { |
948 | /* netagent can be enabled/disabled */ |
949 | if_enable_fsw_transport_netagent = new_value; |
950 | if (new_value == 0) { |
951 | kern_nexus_deregister_netagents(); |
952 | } else { |
953 | kern_nexus_register_netagents(); |
954 | } |
955 | } else { |
956 | /* netagent can't be enabled */ |
957 | error = ENOTSUP; |
958 | } |
959 | } |
960 | return error; |
961 | } |
962 | |
963 | SYSCTL_PROC(_net_link_generic_system, OID_AUTO, enable_netagent, |
964 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, |
965 | 0, 0, &if_enable_fsw_transport_netagent_sysctl, "IU" , |
966 | "enable flowswitch netagent" ); |
967 | |
968 | static void dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw); |
969 | |
970 | #include <skywalk/os_skywalk_private.h> |
971 | |
972 | boolean_t |
973 | ifnet_nx_noauto(ifnet_t ifp) |
974 | { |
975 | return (ifp->if_xflags & IFXF_NX_NOAUTO) != 0; |
976 | } |
977 | |
978 | boolean_t |
979 | ifnet_nx_noauto_flowswitch(ifnet_t ifp) |
980 | { |
981 | return ifnet_is_low_latency(ifp); |
982 | } |
983 | |
984 | boolean_t |
985 | ifnet_is_low_latency(ifnet_t ifp) |
986 | { |
987 | return (ifp->if_xflags & IFXF_LOW_LATENCY) != 0; |
988 | } |
989 | |
990 | boolean_t |
991 | ifnet_needs_compat(ifnet_t ifp) |
992 | { |
993 | if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) { |
994 | return FALSE; |
995 | } |
996 | #if !XNU_TARGET_OS_OSX |
997 | /* |
998 | * To conserve memory, we plumb in the compat layer selectively; this |
999 | * can be overridden via if_attach_nx flag IF_ATTACH_NX_NETIF_ALL. |
1000 | * In particular, we check for Wi-Fi Access Point. |
1001 | */ |
1002 | if (IFNET_IS_WIFI(ifp)) { |
1003 | /* Wi-Fi Access Point */ |
1004 | if (ifp->if_name[0] == 'a' && ifp->if_name[1] == 'p' && |
1005 | ifp->if_name[2] == '\0') { |
1006 | return if_netif_all; |
1007 | } |
1008 | } |
1009 | #else /* XNU_TARGET_OS_OSX */ |
1010 | #pragma unused(ifp) |
1011 | #endif /* XNU_TARGET_OS_OSX */ |
1012 | return TRUE; |
1013 | } |
1014 | |
1015 | boolean_t |
1016 | ifnet_needs_fsw_transport_netagent(ifnet_t ifp) |
1017 | { |
1018 | if (if_is_fsw_transport_netagent_enabled()) { |
1019 | /* check if netagent has been manually enabled for ipsec/utun */ |
1020 | if (ifp->if_family == IFNET_FAMILY_IPSEC) { |
1021 | return ipsec_interface_needs_netagent(interface: ifp); |
1022 | } else if (ifp->if_family == IFNET_FAMILY_UTUN) { |
1023 | return utun_interface_needs_netagent(interface: ifp); |
1024 | } |
1025 | |
1026 | /* check ifnet no auto nexus override */ |
1027 | if (ifnet_nx_noauto(ifp)) { |
1028 | return FALSE; |
1029 | } |
1030 | |
1031 | /* check global if_attach_nx configuration */ |
1032 | switch (ifp->if_family) { |
1033 | case IFNET_FAMILY_CELLULAR: |
1034 | case IFNET_FAMILY_ETHERNET: |
1035 | if ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) { |
1036 | return TRUE; |
1037 | } |
1038 | break; |
1039 | default: |
1040 | break; |
1041 | } |
1042 | } |
1043 | return FALSE; |
1044 | } |
1045 | |
1046 | boolean_t |
1047 | ifnet_needs_fsw_ip_netagent(ifnet_t ifp) |
1048 | { |
1049 | #pragma unused(ifp) |
1050 | if ((if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0) { |
1051 | return TRUE; |
1052 | } |
1053 | return FALSE; |
1054 | } |
1055 | |
1056 | boolean_t |
1057 | ifnet_needs_netif_netagent(ifnet_t ifp) |
1058 | { |
1059 | #pragma unused(ifp) |
1060 | return (if_attach_nx & IF_ATTACH_NX_NETIF_NETAGENT) != 0; |
1061 | } |
1062 | |
1063 | static boolean_t |
1064 | dlil_detach_nexus_instance(nexus_controller_t controller, |
1065 | const char *func_str, uuid_t instance, uuid_t device) |
1066 | { |
1067 | errno_t err; |
1068 | |
1069 | if (instance == NULL || uuid_is_null(uu: instance)) { |
1070 | return FALSE; |
1071 | } |
1072 | |
1073 | /* followed by the device port */ |
1074 | if (device != NULL && !uuid_is_null(uu: device)) { |
1075 | err = kern_nexus_ifdetach(ctl: controller, nx_uuid: instance, nx_if_uuid: device); |
1076 | if (err != 0) { |
1077 | DLIL_PRINTF("%s kern_nexus_ifdetach device failed %d\n" , |
1078 | func_str, err); |
1079 | } |
1080 | } |
1081 | err = kern_nexus_controller_free_provider_instance(ctl: controller, |
1082 | nx_uuid: instance); |
1083 | if (err != 0) { |
1084 | DLIL_PRINTF("%s free_provider_instance failed %d\n" , |
1085 | func_str, err); |
1086 | } |
1087 | return TRUE; |
1088 | } |
1089 | |
1090 | static boolean_t |
1091 | dlil_detach_nexus(const char *func_str, uuid_t provider, uuid_t instance, |
1092 | uuid_t device) |
1093 | { |
1094 | boolean_t detached = FALSE; |
1095 | nexus_controller_t controller = kern_nexus_shared_controller(); |
1096 | int err; |
1097 | |
1098 | if (dlil_detach_nexus_instance(controller, func_str, instance, |
1099 | device)) { |
1100 | detached = TRUE; |
1101 | } |
1102 | if (provider != NULL && !uuid_is_null(uu: provider)) { |
1103 | detached = TRUE; |
1104 | err = kern_nexus_controller_deregister_provider(ctl: controller, |
1105 | nx_prov_uuid: provider); |
1106 | if (err != 0) { |
1107 | DLIL_PRINTF("%s deregister_provider %d\n" , |
1108 | func_str, err); |
1109 | } |
1110 | } |
1111 | return detached; |
1112 | } |
1113 | |
1114 | static errno_t |
1115 | dlil_create_provider_and_instance(nexus_controller_t controller, |
1116 | nexus_type_t type, ifnet_t ifp, uuid_t *provider, uuid_t *instance, |
1117 | nexus_attr_t attr) |
1118 | { |
1119 | uuid_t dom_prov; |
1120 | errno_t err; |
1121 | nexus_name_t provider_name; |
1122 | const char *type_name = |
1123 | (type == NEXUS_TYPE_NET_IF) ? "netif" : "flowswitch" ; |
1124 | struct kern_nexus_init init; |
1125 | |
1126 | err = kern_nexus_get_default_domain_provider(type, dom_prov_uuid: &dom_prov); |
1127 | if (err != 0) { |
1128 | DLIL_PRINTF("%s can't get %s provider, error %d\n" , |
1129 | __func__, type_name, err); |
1130 | goto failed; |
1131 | } |
1132 | |
1133 | snprintf((char *)provider_name, count: sizeof(provider_name), |
1134 | "com.apple.%s.%s" , type_name, if_name(ifp)); |
1135 | err = kern_nexus_controller_register_provider(ctl: controller, |
1136 | dom_prov_uuid: dom_prov, |
1137 | provider_name, |
1138 | NULL, |
1139 | init_len: 0, |
1140 | nxa: attr, |
1141 | nx_prov_uuid: provider); |
1142 | if (err != 0) { |
1143 | DLIL_PRINTF("%s register %s provider failed, error %d\n" , |
1144 | __func__, type_name, err); |
1145 | goto failed; |
1146 | } |
1147 | bzero(s: &init, n: sizeof(init)); |
1148 | init.nxi_version = KERN_NEXUS_CURRENT_VERSION; |
1149 | err = kern_nexus_controller_alloc_provider_instance(ctl: controller, |
1150 | nx_prov_uuid: *provider, |
1151 | NULL, NULL, |
1152 | nx_uuid: instance, init: &init); |
1153 | if (err != 0) { |
1154 | DLIL_PRINTF("%s alloc_provider_instance %s failed, %d\n" , |
1155 | __func__, type_name, err); |
1156 | kern_nexus_controller_deregister_provider(ctl: controller, |
1157 | nx_prov_uuid: *provider); |
1158 | goto failed; |
1159 | } |
1160 | failed: |
1161 | return err; |
1162 | } |
1163 | |
1164 | static boolean_t |
1165 | dlil_attach_netif_nexus_common(ifnet_t ifp, if_nexus_netif_t netif_nx) |
1166 | { |
1167 | nexus_attr_t attr = NULL; |
1168 | nexus_controller_t controller; |
1169 | errno_t err; |
1170 | |
1171 | if ((ifp->if_capabilities & IFCAP_SKYWALK) != 0) { |
1172 | /* it's already attached */ |
1173 | if (dlil_verbose) { |
1174 | DLIL_PRINTF("%s: %s already has nexus attached\n" , |
1175 | __func__, if_name(ifp)); |
1176 | /* already attached */ |
1177 | } |
1178 | goto failed; |
1179 | } |
1180 | |
1181 | err = kern_nexus_attr_create(&attr); |
1182 | if (err != 0) { |
1183 | DLIL_PRINTF("%s: nexus attr create for %s\n" , __func__, |
1184 | if_name(ifp)); |
1185 | goto failed; |
1186 | } |
1187 | err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_IFINDEX, value: ifp->if_index); |
1188 | VERIFY(err == 0); |
1189 | |
1190 | controller = kern_nexus_shared_controller(); |
1191 | |
1192 | /* create the netif provider and instance */ |
1193 | err = dlil_create_provider_and_instance(controller, |
1194 | type: NEXUS_TYPE_NET_IF, ifp, provider: &netif_nx->if_nif_provider, |
1195 | instance: &netif_nx->if_nif_instance, attr); |
1196 | if (err != 0) { |
1197 | goto failed; |
1198 | } |
1199 | err = kern_nexus_ifattach(controller, nx_uuid: netif_nx->if_nif_instance, |
1200 | ifp, NULL, FALSE, nx_if_uuid: &netif_nx->if_nif_attach); |
1201 | if (err != 0) { |
1202 | DLIL_PRINTF("%s kern_nexus_ifattach %d\n" , |
1203 | __func__, err); |
1204 | /* cleanup provider and instance */ |
1205 | dlil_detach_nexus(func_str: __func__, provider: netif_nx->if_nif_provider, |
1206 | instance: netif_nx->if_nif_instance, NULL); |
1207 | goto failed; |
1208 | } |
1209 | return TRUE; |
1210 | |
1211 | failed: |
1212 | if (attr != NULL) { |
1213 | kern_nexus_attr_destroy(attr); |
1214 | } |
1215 | return FALSE; |
1216 | } |
1217 | |
1218 | static boolean_t |
1219 | dlil_attach_netif_compat_nexus(ifnet_t ifp, if_nexus_netif_t netif_nx) |
1220 | { |
1221 | if (ifnet_nx_noauto(ifp) || IFNET_IS_INTCOPROC(ifp) || |
1222 | IFNET_IS_MANAGEMENT(ifp) || IFNET_IS_VMNET(ifp)) { |
1223 | goto failed; |
1224 | } |
1225 | switch (ifp->if_type) { |
1226 | case IFT_CELLULAR: |
1227 | case IFT_ETHER: |
1228 | if ((if_attach_nx & IF_ATTACH_NX_NETIF_COMPAT) == 0) { |
1229 | /* don't auto-attach */ |
1230 | goto failed; |
1231 | } |
1232 | break; |
1233 | default: |
1234 | /* don't auto-attach */ |
1235 | goto failed; |
1236 | } |
1237 | return dlil_attach_netif_nexus_common(ifp, netif_nx); |
1238 | |
1239 | failed: |
1240 | return FALSE; |
1241 | } |
1242 | |
1243 | static boolean_t |
1244 | dlil_is_native_netif_nexus(ifnet_t ifp) |
1245 | { |
1246 | return (ifp->if_eflags & IFEF_SKYWALK_NATIVE) && ifp->if_na != NULL; |
1247 | } |
1248 | |
1249 | __attribute__((noinline)) |
1250 | static void |
1251 | dlil_detach_netif_nexus(if_nexus_netif_t nexus_netif) |
1252 | { |
1253 | dlil_detach_nexus(func_str: __func__, provider: nexus_netif->if_nif_provider, |
1254 | instance: nexus_netif->if_nif_instance, device: nexus_netif->if_nif_attach); |
1255 | } |
1256 | |
1257 | static inline int |
1258 | dlil_siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p) |
1259 | { |
1260 | struct ifreq ifr; |
1261 | int error; |
1262 | |
1263 | bzero(s: &ifr, n: sizeof(ifr)); |
1264 | error = ifnet_ioctl(interface: ifp, protocol: 0, SIOCGIFDEVMTU, ioctl_arg: &ifr); |
1265 | if (error == 0) { |
1266 | *ifdm_p = ifr.ifr_devmtu; |
1267 | } |
1268 | return error; |
1269 | } |
1270 | |
1271 | static inline void |
1272 | _dlil_adjust_large_buf_size_for_tso(ifnet_t ifp, uint32_t *large_buf_size) |
1273 | { |
1274 | #ifdef XNU_TARGET_OS_OSX |
1275 | uint32_t tso_v4_mtu = 0; |
1276 | uint32_t tso_v6_mtu = 0; |
1277 | |
1278 | if (!dlil_is_native_netif_nexus(ifp)) { |
1279 | return; |
1280 | } |
1281 | /* |
1282 | * Note that we are reading the real hwassist flags set by the driver |
1283 | * and not the adjusted ones because nx_netif_host_adjust_if_capabilities() |
1284 | * hasn't been called yet. |
1285 | */ |
1286 | if ((ifp->if_hwassist & IFNET_TSO_IPV4) != 0) { |
1287 | tso_v4_mtu = ifp->if_tso_v4_mtu; |
1288 | } |
1289 | if ((ifp->if_hwassist & IFNET_TSO_IPV6) != 0) { |
1290 | tso_v6_mtu = ifp->if_tso_v6_mtu; |
1291 | } |
1292 | /* |
1293 | * If the hardware supports TSO, adjust the large buf size to match the |
1294 | * supported TSO MTU size. |
1295 | */ |
1296 | if (tso_v4_mtu != 0 || tso_v6_mtu != 0) { |
1297 | *large_buf_size = MAX(tso_v4_mtu, tso_v6_mtu); |
1298 | } else { |
1299 | *large_buf_size = MAX(*large_buf_size, sk_fsw_gso_mtu); |
1300 | } |
1301 | *large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, *large_buf_size); |
1302 | #else |
1303 | #pragma unused(ifp, large_buf_size) |
1304 | #endif /* XNU_TARGET_OS_OSX */ |
1305 | } |
1306 | |
1307 | static inline int |
1308 | _dlil_get_flowswitch_buffer_size(ifnet_t ifp, uuid_t netif, uint32_t *buf_size, |
1309 | bool *use_multi_buflet, uint32_t *large_buf_size) |
1310 | { |
1311 | struct kern_pbufpool_memory_info rx_pp_info; |
1312 | struct kern_pbufpool_memory_info tx_pp_info; |
1313 | uint32_t if_max_mtu = 0; |
1314 | uint32_t drv_buf_size; |
1315 | struct ifdevmtu ifdm; |
1316 | int err; |
1317 | |
1318 | /* |
1319 | * To perform intra-stack RX aggregation flowswitch needs to use |
1320 | * multi-buflet packet. |
1321 | */ |
1322 | *use_multi_buflet = NX_FSW_TCP_RX_AGG_ENABLED(); |
1323 | |
1324 | *large_buf_size = *use_multi_buflet ? NX_FSW_DEF_LARGE_BUFSIZE : 0; |
1325 | /* |
1326 | * IP over Thunderbolt interface can deliver the largest IP packet, |
1327 | * but the driver advertises the MAX MTU as only 9K. |
1328 | */ |
1329 | if (IFNET_IS_THUNDERBOLT_IP(ifp)) { |
1330 | if_max_mtu = IP_MAXPACKET; |
1331 | goto skip_mtu_ioctl; |
1332 | } |
1333 | |
1334 | /* determine max mtu */ |
1335 | bzero(s: &ifdm, n: sizeof(ifdm)); |
1336 | err = dlil_siocgifdevmtu(ifp, ifdm_p: &ifdm); |
1337 | if (__improbable(err != 0)) { |
1338 | DLIL_PRINTF("%s: SIOCGIFDEVMTU failed for %s\n" , |
1339 | __func__, if_name(ifp)); |
1340 | /* use default flowswitch buffer size */ |
1341 | if_max_mtu = NX_FSW_BUFSIZE; |
1342 | } else { |
1343 | DLIL_PRINTF("%s: %s %d %d\n" , __func__, if_name(ifp), |
1344 | ifdm.ifdm_max, ifdm.ifdm_current); |
1345 | /* rdar://problem/44589731 */ |
1346 | if_max_mtu = MAX(ifdm.ifdm_max, ifdm.ifdm_current); |
1347 | } |
1348 | |
1349 | skip_mtu_ioctl: |
1350 | if (if_max_mtu == 0) { |
1351 | DLIL_PRINTF("%s: can't determine MAX MTU for %s\n" , |
1352 | __func__, if_name(ifp)); |
1353 | return EINVAL; |
1354 | } |
1355 | if ((if_max_mtu > NX_FSW_MAXBUFSIZE) && fsw_use_max_mtu_buffer) { |
1356 | DLIL_PRINTF("%s: interace (%s) has MAX MTU (%u) > flowswitch " |
1357 | "max bufsize(%d)\n" , __func__, |
1358 | if_name(ifp), if_max_mtu, NX_FSW_MAXBUFSIZE); |
1359 | return EINVAL; |
1360 | } |
1361 | |
1362 | /* |
1363 | * for skywalk native driver, consult the driver packet pool also. |
1364 | */ |
1365 | if (dlil_is_native_netif_nexus(ifp)) { |
1366 | err = kern_nexus_get_pbufpool_info(nx_uuid: netif, rx_pool: &rx_pp_info, |
1367 | tx_pool: &tx_pp_info); |
1368 | if (err != 0) { |
1369 | DLIL_PRINTF("%s: can't get pbufpool info for %s\n" , |
1370 | __func__, if_name(ifp)); |
1371 | return ENXIO; |
1372 | } |
1373 | drv_buf_size = tx_pp_info.kpm_bufsize * |
1374 | tx_pp_info.kpm_max_frags; |
1375 | if (if_max_mtu > drv_buf_size) { |
1376 | DLIL_PRINTF("%s: interface %s packet pool (rx %d * %d, " |
1377 | "tx %d * %d) can't support max mtu(%d)\n" , __func__, |
1378 | if_name(ifp), rx_pp_info.kpm_bufsize, |
1379 | rx_pp_info.kpm_max_frags, tx_pp_info.kpm_bufsize, |
1380 | tx_pp_info.kpm_max_frags, if_max_mtu); |
1381 | return EINVAL; |
1382 | } |
1383 | } else { |
1384 | drv_buf_size = if_max_mtu; |
1385 | } |
1386 | |
1387 | if ((drv_buf_size > NX_FSW_BUFSIZE) && (!fsw_use_max_mtu_buffer)) { |
1388 | _CASSERT((NX_FSW_BUFSIZE * NX_PBUF_FRAGS_MAX) >= IP_MAXPACKET); |
1389 | *use_multi_buflet = true; |
1390 | /* default flowswitch buffer size */ |
1391 | *buf_size = NX_FSW_BUFSIZE; |
1392 | *large_buf_size = MIN(NX_FSW_MAX_LARGE_BUFSIZE, drv_buf_size); |
1393 | } else { |
1394 | *buf_size = MAX(drv_buf_size, NX_FSW_BUFSIZE); |
1395 | } |
1396 | _dlil_adjust_large_buf_size_for_tso(ifp, large_buf_size); |
1397 | ASSERT(*buf_size <= NX_FSW_MAXBUFSIZE); |
1398 | if (*buf_size >= *large_buf_size) { |
1399 | *large_buf_size = 0; |
1400 | } |
1401 | return 0; |
1402 | } |
1403 | |
1404 | static boolean_t |
1405 | _dlil_attach_flowswitch_nexus(ifnet_t ifp, if_nexus_flowswitch_t nexus_fsw) |
1406 | { |
1407 | nexus_attr_t attr = NULL; |
1408 | nexus_controller_t controller; |
1409 | errno_t err = 0; |
1410 | uuid_t netif; |
1411 | uint32_t buf_size = 0; |
1412 | uint32_t large_buf_size = 0; |
1413 | bool multi_buflet; |
1414 | |
1415 | if (ifnet_nx_noauto(ifp) || ifnet_nx_noauto_flowswitch(ifp) || |
1416 | IFNET_IS_VMNET(ifp)) { |
1417 | goto failed; |
1418 | } |
1419 | |
1420 | if ((ifp->if_capabilities & IFCAP_SKYWALK) == 0) { |
1421 | /* not possible to attach (netif native/compat not plumbed) */ |
1422 | goto failed; |
1423 | } |
1424 | |
1425 | if ((if_attach_nx & IF_ATTACH_NX_FLOWSWITCH) == 0) { |
1426 | /* don't auto-attach */ |
1427 | goto failed; |
1428 | } |
1429 | |
1430 | /* get the netif instance from the ifp */ |
1431 | err = kern_nexus_get_netif_instance(ifp, nx_uuid: netif); |
1432 | if (err != 0) { |
1433 | DLIL_PRINTF("%s: can't find netif for %s\n" , __func__, |
1434 | if_name(ifp)); |
1435 | goto failed; |
1436 | } |
1437 | |
1438 | err = kern_nexus_attr_create(&attr); |
1439 | if (err != 0) { |
1440 | DLIL_PRINTF("%s: nexus attr create for %s\n" , __func__, |
1441 | if_name(ifp)); |
1442 | goto failed; |
1443 | } |
1444 | |
1445 | err = _dlil_get_flowswitch_buffer_size(ifp, netif, buf_size: &buf_size, |
1446 | use_multi_buflet: &multi_buflet, large_buf_size: &large_buf_size); |
1447 | if (err != 0) { |
1448 | goto failed; |
1449 | } |
1450 | ASSERT((buf_size >= NX_FSW_BUFSIZE) && (buf_size <= NX_FSW_MAXBUFSIZE)); |
1451 | ASSERT(large_buf_size <= NX_FSW_MAX_LARGE_BUFSIZE); |
1452 | |
1453 | /* Configure flowswitch buffer size */ |
1454 | err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_SLOT_BUF_SIZE, value: buf_size); |
1455 | VERIFY(err == 0); |
1456 | err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_LARGE_BUF_SIZE, |
1457 | value: large_buf_size); |
1458 | VERIFY(err == 0); |
1459 | |
1460 | /* |
1461 | * Configure flowswitch to use super-packet (multi-buflet). |
1462 | */ |
1463 | err = kern_nexus_attr_set(attr, type: NEXUS_ATTR_MAX_FRAGS, |
1464 | value: multi_buflet ? NX_PBUF_FRAGS_MAX : 1); |
1465 | VERIFY(err == 0); |
1466 | |
1467 | /* create the flowswitch provider and instance */ |
1468 | controller = kern_nexus_shared_controller(); |
1469 | err = dlil_create_provider_and_instance(controller, |
1470 | type: NEXUS_TYPE_FLOW_SWITCH, ifp, provider: &nexus_fsw->if_fsw_provider, |
1471 | instance: &nexus_fsw->if_fsw_instance, attr); |
1472 | if (err != 0) { |
1473 | goto failed; |
1474 | } |
1475 | |
1476 | /* attach the device port */ |
1477 | err = kern_nexus_ifattach(controller, nx_uuid: nexus_fsw->if_fsw_instance, |
1478 | NULL, nx_attachee: netif, FALSE, nx_if_uuid: &nexus_fsw->if_fsw_device); |
1479 | if (err != 0) { |
1480 | DLIL_PRINTF("%s kern_nexus_ifattach device failed %d %s\n" , |
1481 | __func__, err, if_name(ifp)); |
1482 | /* cleanup provider and instance */ |
1483 | dlil_detach_nexus(func_str: __func__, provider: nexus_fsw->if_fsw_provider, |
1484 | instance: nexus_fsw->if_fsw_instance, device: nexus_fsw->if_fsw_device); |
1485 | goto failed; |
1486 | } |
1487 | return TRUE; |
1488 | |
1489 | failed: |
1490 | if (err != 0) { |
1491 | DLIL_PRINTF("%s: failed to attach flowswitch to %s, error %d\n" , |
1492 | __func__, if_name(ifp), err); |
1493 | } else { |
1494 | DLIL_PRINTF("%s: not attaching flowswitch to %s\n" , |
1495 | __func__, if_name(ifp)); |
1496 | } |
1497 | if (attr != NULL) { |
1498 | kern_nexus_attr_destroy(attr); |
1499 | } |
1500 | return FALSE; |
1501 | } |
1502 | |
1503 | static boolean_t |
1504 | dlil_attach_flowswitch_nexus(ifnet_t ifp) |
1505 | { |
1506 | boolean_t attached; |
1507 | if_nexus_flowswitch nexus_fsw; |
1508 | |
1509 | #if (DEVELOPMENT || DEBUG) |
1510 | if (skywalk_netif_direct_allowed(if_name(ifp))) { |
1511 | DLIL_PRINTF("skip attaching fsw to %s" , if_name(ifp)); |
1512 | return FALSE; |
1513 | } |
1514 | #endif /* (DEVELOPMENT || DEBUG) */ |
1515 | |
1516 | /* |
1517 | * flowswitch attachment is not supported for interface using the |
1518 | * legacy model (IFNET_INIT_LEGACY) |
1519 | */ |
1520 | if ((ifp->if_eflags & IFEF_TXSTART) == 0) { |
1521 | DLIL_PRINTF("skip attaching fsw to %s using legacy TX model" , |
1522 | if_name(ifp)); |
1523 | return FALSE; |
1524 | } |
1525 | |
1526 | if (uuid_is_null(uu: ifp->if_nx_flowswitch.if_fsw_instance) == 0) { |
1527 | /* it's already attached */ |
1528 | return FALSE; |
1529 | } |
1530 | bzero(s: &nexus_fsw, n: sizeof(nexus_fsw)); |
1531 | attached = _dlil_attach_flowswitch_nexus(ifp, nexus_fsw: &nexus_fsw); |
1532 | if (attached) { |
1533 | ifnet_lock_exclusive(ifp); |
1534 | if (!IF_FULLY_ATTACHED(ifp)) { |
1535 | /* interface is going away */ |
1536 | attached = FALSE; |
1537 | } else { |
1538 | ifp->if_nx_flowswitch = nexus_fsw; |
1539 | } |
1540 | ifnet_lock_done(ifp); |
1541 | if (!attached) { |
1542 | /* clean up flowswitch nexus */ |
1543 | dlil_detach_flowswitch_nexus(nexus_fsw: &nexus_fsw); |
1544 | } |
1545 | } |
1546 | return attached; |
1547 | } |
1548 | |
1549 | __attribute__((noinline)) |
1550 | static void |
1551 | dlil_detach_flowswitch_nexus(if_nexus_flowswitch_t nexus_fsw) |
1552 | { |
1553 | dlil_detach_nexus(func_str: __func__, provider: nexus_fsw->if_fsw_provider, |
1554 | instance: nexus_fsw->if_fsw_instance, device: nexus_fsw->if_fsw_device); |
1555 | } |
1556 | |
1557 | __attribute__((noinline)) |
1558 | static void |
1559 | dlil_netif_detach_notify(ifnet_t ifp) |
1560 | { |
1561 | ifnet_detach_notify_cb_t notify = NULL; |
1562 | void *arg = NULL; |
1563 | |
1564 | ifnet_get_detach_notify(ifp, cbp: ¬ify, argp: &arg); |
1565 | if (notify == NULL) { |
1566 | DTRACE_SKYWALK1(no__notify, ifnet_t, ifp); |
1567 | return; |
1568 | } |
1569 | (*notify)(arg); |
1570 | } |
1571 | |
1572 | __attribute__((noinline)) |
1573 | static void |
1574 | dlil_quiesce_and_detach_nexuses(ifnet_t ifp) |
1575 | { |
1576 | if_nexus_flowswitch *nx_fsw = &ifp->if_nx_flowswitch; |
1577 | if_nexus_netif *nx_netif = &ifp->if_nx_netif; |
1578 | |
1579 | ifnet_datamov_suspend_and_drain(ifp); |
1580 | if (!uuid_is_null(uu: nx_fsw->if_fsw_device)) { |
1581 | ASSERT(!uuid_is_null(nx_fsw->if_fsw_provider)); |
1582 | ASSERT(!uuid_is_null(nx_fsw->if_fsw_instance)); |
1583 | dlil_detach_flowswitch_nexus(nexus_fsw: nx_fsw); |
1584 | bzero(s: nx_fsw, n: sizeof(*nx_fsw)); |
1585 | } else { |
1586 | ASSERT(uuid_is_null(nx_fsw->if_fsw_provider)); |
1587 | ASSERT(uuid_is_null(nx_fsw->if_fsw_instance)); |
1588 | DTRACE_IP1(fsw__not__attached, ifnet_t, ifp); |
1589 | } |
1590 | |
1591 | if (!uuid_is_null(uu: nx_netif->if_nif_attach)) { |
1592 | ASSERT(!uuid_is_null(nx_netif->if_nif_provider)); |
1593 | ASSERT(!uuid_is_null(nx_netif->if_nif_instance)); |
1594 | dlil_detach_netif_nexus(nexus_netif: nx_netif); |
1595 | bzero(s: nx_netif, n: sizeof(*nx_netif)); |
1596 | } else { |
1597 | ASSERT(uuid_is_null(nx_netif->if_nif_provider)); |
1598 | ASSERT(uuid_is_null(nx_netif->if_nif_instance)); |
1599 | DTRACE_IP1(netif__not__attached, ifnet_t, ifp); |
1600 | } |
1601 | ifnet_datamov_resume(ifp); |
1602 | } |
1603 | |
1604 | boolean_t |
1605 | ifnet_add_netagent(ifnet_t ifp) |
1606 | { |
1607 | int error; |
1608 | |
1609 | error = kern_nexus_interface_add_netagent(ifp); |
1610 | os_log(OS_LOG_DEFAULT, |
1611 | "kern_nexus_interface_add_netagent(%s) returned %d" , |
1612 | ifp->if_xname, error); |
1613 | return error == 0; |
1614 | } |
1615 | |
1616 | boolean_t |
1617 | ifnet_remove_netagent(ifnet_t ifp) |
1618 | { |
1619 | int error; |
1620 | |
1621 | error = kern_nexus_interface_remove_netagent(ifp); |
1622 | os_log(OS_LOG_DEFAULT, |
1623 | "kern_nexus_interface_remove_netagent(%s) returned %d" , |
1624 | ifp->if_xname, error); |
1625 | return error == 0; |
1626 | } |
1627 | |
1628 | boolean_t |
1629 | ifnet_attach_flowswitch_nexus(ifnet_t ifp) |
1630 | { |
1631 | if (!IF_FULLY_ATTACHED(ifp)) { |
1632 | return FALSE; |
1633 | } |
1634 | return dlil_attach_flowswitch_nexus(ifp); |
1635 | } |
1636 | |
1637 | boolean_t |
1638 | ifnet_detach_flowswitch_nexus(ifnet_t ifp) |
1639 | { |
1640 | if_nexus_flowswitch nexus_fsw; |
1641 | |
1642 | ifnet_lock_exclusive(ifp); |
1643 | nexus_fsw = ifp->if_nx_flowswitch; |
1644 | bzero(s: &ifp->if_nx_flowswitch, n: sizeof(ifp->if_nx_flowswitch)); |
1645 | ifnet_lock_done(ifp); |
1646 | return dlil_detach_nexus(func_str: __func__, provider: nexus_fsw.if_fsw_provider, |
1647 | instance: nexus_fsw.if_fsw_instance, device: nexus_fsw.if_fsw_device); |
1648 | } |
1649 | |
1650 | boolean_t |
1651 | ifnet_attach_netif_nexus(ifnet_t ifp) |
1652 | { |
1653 | boolean_t nexus_attached; |
1654 | if_nexus_netif nexus_netif; |
1655 | |
1656 | if (!IF_FULLY_ATTACHED(ifp)) { |
1657 | return FALSE; |
1658 | } |
1659 | nexus_attached = dlil_attach_netif_nexus_common(ifp, netif_nx: &nexus_netif); |
1660 | if (nexus_attached) { |
1661 | ifnet_lock_exclusive(ifp); |
1662 | ifp->if_nx_netif = nexus_netif; |
1663 | ifnet_lock_done(ifp); |
1664 | } |
1665 | return nexus_attached; |
1666 | } |
1667 | |
1668 | boolean_t |
1669 | ifnet_detach_netif_nexus(ifnet_t ifp) |
1670 | { |
1671 | if_nexus_netif nexus_netif; |
1672 | |
1673 | ifnet_lock_exclusive(ifp); |
1674 | nexus_netif = ifp->if_nx_netif; |
1675 | bzero(s: &ifp->if_nx_netif, n: sizeof(ifp->if_nx_netif)); |
1676 | ifnet_lock_done(ifp); |
1677 | |
1678 | return dlil_detach_nexus(func_str: __func__, provider: nexus_netif.if_nif_provider, |
1679 | instance: nexus_netif.if_nif_instance, device: nexus_netif.if_nif_attach); |
1680 | } |
1681 | |
1682 | void |
1683 | ifnet_attach_native_flowswitch(ifnet_t ifp) |
1684 | { |
1685 | if (!dlil_is_native_netif_nexus(ifp)) { |
1686 | /* not a native netif */ |
1687 | return; |
1688 | } |
1689 | ifnet_attach_flowswitch_nexus(ifp); |
1690 | } |
1691 | |
1692 | int |
1693 | ifnet_set_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t cb, void *arg) |
1694 | { |
1695 | lck_mtx_lock(lck: &ifp->if_delegate_lock); |
1696 | while (ifp->if_fsw_rx_cb_ref > 0) { |
1697 | DTRACE_SKYWALK1(wait__fsw, ifnet_t, ifp); |
1698 | (void) msleep(chan: &ifp->if_fsw_rx_cb_ref, mtx: &ifp->if_delegate_lock, |
1699 | pri: (PZERO + 1), wmesg: __FUNCTION__, NULL); |
1700 | DTRACE_SKYWALK1(wake__fsw, ifnet_t, ifp); |
1701 | } |
1702 | ifp->if_fsw_rx_cb = cb; |
1703 | ifp->if_fsw_rx_cb_arg = arg; |
1704 | lck_mtx_unlock(lck: &ifp->if_delegate_lock); |
1705 | return 0; |
1706 | } |
1707 | |
1708 | int |
1709 | ifnet_get_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t *cbp, void **argp) |
1710 | { |
1711 | /* |
1712 | * This is for avoiding the unnecessary lock acquire for interfaces |
1713 | * not used by a redirect interface. |
1714 | */ |
1715 | if (ifp->if_fsw_rx_cb == NULL) { |
1716 | return ENOENT; |
1717 | } |
1718 | lck_mtx_lock(lck: &ifp->if_delegate_lock); |
1719 | if (ifp->if_fsw_rx_cb == NULL) { |
1720 | lck_mtx_unlock(lck: &ifp->if_delegate_lock); |
1721 | return ENOENT; |
1722 | } |
1723 | *cbp = ifp->if_fsw_rx_cb; |
1724 | *argp = ifp->if_fsw_rx_cb_arg; |
1725 | ifp->if_fsw_rx_cb_ref++; |
1726 | lck_mtx_unlock(lck: &ifp->if_delegate_lock); |
1727 | return 0; |
1728 | } |
1729 | |
1730 | void |
1731 | ifnet_release_flowswitch_rx_callback(ifnet_t ifp) |
1732 | { |
1733 | lck_mtx_lock(lck: &ifp->if_delegate_lock); |
1734 | if (--ifp->if_fsw_rx_cb_ref == 0) { |
1735 | wakeup(chan: &ifp->if_fsw_rx_cb_ref); |
1736 | } |
1737 | lck_mtx_unlock(lck: &ifp->if_delegate_lock); |
1738 | } |
1739 | |
1740 | int |
1741 | ifnet_set_delegate_parent(ifnet_t difp, ifnet_t parent) |
1742 | { |
1743 | lck_mtx_lock(lck: &difp->if_delegate_lock); |
1744 | while (difp->if_delegate_parent_ref > 0) { |
1745 | DTRACE_SKYWALK1(wait__parent, ifnet_t, difp); |
1746 | (void) msleep(chan: &difp->if_delegate_parent_ref, mtx: &difp->if_delegate_lock, |
1747 | pri: (PZERO + 1), wmesg: __FUNCTION__, NULL); |
1748 | DTRACE_SKYWALK1(wake__parent, ifnet_t, difp); |
1749 | } |
1750 | difp->if_delegate_parent = parent; |
1751 | lck_mtx_unlock(lck: &difp->if_delegate_lock); |
1752 | return 0; |
1753 | } |
1754 | |
1755 | int |
1756 | ifnet_get_delegate_parent(ifnet_t difp, ifnet_t *parentp) |
1757 | { |
1758 | lck_mtx_lock(lck: &difp->if_delegate_lock); |
1759 | if (difp->if_delegate_parent == NULL) { |
1760 | lck_mtx_unlock(lck: &difp->if_delegate_lock); |
1761 | return ENOENT; |
1762 | } |
1763 | *parentp = difp->if_delegate_parent; |
1764 | difp->if_delegate_parent_ref++; |
1765 | lck_mtx_unlock(lck: &difp->if_delegate_lock); |
1766 | return 0; |
1767 | } |
1768 | |
1769 | void |
1770 | ifnet_release_delegate_parent(ifnet_t difp) |
1771 | { |
1772 | lck_mtx_lock(lck: &difp->if_delegate_lock); |
1773 | if (--difp->if_delegate_parent_ref == 0) { |
1774 | wakeup(chan: &difp->if_delegate_parent_ref); |
1775 | } |
1776 | lck_mtx_unlock(lck: &difp->if_delegate_lock); |
1777 | } |
1778 | |
1779 | __attribute__((noinline)) |
1780 | void |
1781 | ifnet_set_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg) |
1782 | { |
1783 | ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE); |
1784 | ifp->if_detach_notify = notify; |
1785 | ifp->if_detach_notify_arg = arg; |
1786 | } |
1787 | |
1788 | __attribute__((noinline)) |
1789 | void |
1790 | ifnet_get_detach_notify_locked(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp) |
1791 | { |
1792 | ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE); |
1793 | *notifyp = ifp->if_detach_notify; |
1794 | *argp = ifp->if_detach_notify_arg; |
1795 | } |
1796 | |
1797 | __attribute__((noinline)) |
1798 | void |
1799 | ifnet_set_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t notify, void *arg) |
1800 | { |
1801 | ifnet_lock_exclusive(ifp); |
1802 | ifnet_set_detach_notify_locked(ifp, notify, arg); |
1803 | ifnet_lock_done(ifp); |
1804 | } |
1805 | |
1806 | __attribute__((noinline)) |
1807 | void |
1808 | ifnet_get_detach_notify(ifnet_t ifp, ifnet_detach_notify_cb_t *notifyp, void **argp) |
1809 | { |
1810 | ifnet_lock_exclusive(ifp); |
1811 | ifnet_get_detach_notify_locked(ifp, notifyp, argp); |
1812 | ifnet_lock_done(ifp); |
1813 | } |
1814 | #endif /* SKYWALK */ |
1815 | |
1816 | #define DLIL_INPUT_CHECK(m, ifp) { \ |
1817 | struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \ |
1818 | if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \ |
1819 | !(mbuf_flags(m) & MBUF_PKTHDR)) { \ |
1820 | panic_plain("%s: invalid mbuf %p\n", __func__, m); \ |
1821 | /* NOTREACHED */ \ |
1822 | } \ |
1823 | } |
1824 | |
1825 | #define DLIL_EWMA(old, new, decay) do { \ |
1826 | u_int32_t _avg; \ |
1827 | if ((_avg = (old)) > 0) \ |
1828 | _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \ |
1829 | else \ |
1830 | _avg = (new); \ |
1831 | (old) = _avg; \ |
1832 | } while (0) |
1833 | |
1834 | #define MBPS (1ULL * 1000 * 1000) |
1835 | #define GBPS (MBPS * 1000) |
1836 | |
1837 | struct rxpoll_time_tbl { |
1838 | u_int64_t speed; /* downlink speed */ |
1839 | u_int32_t plowat; /* packets low watermark */ |
1840 | u_int32_t phiwat; /* packets high watermark */ |
1841 | u_int32_t blowat; /* bytes low watermark */ |
1842 | u_int32_t bhiwat; /* bytes high watermark */ |
1843 | }; |
1844 | |
1845 | static struct rxpoll_time_tbl rxpoll_tbl[] = { |
1846 | { .speed = 10 * MBPS, .plowat = 2, .phiwat = 8, .blowat = (1 * 1024), .bhiwat = (6 * 1024) }, |
1847 | { .speed = 100 * MBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) }, |
1848 | { .speed = 1 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) }, |
1849 | { .speed = 10 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) }, |
1850 | { .speed = 100 * GBPS, .plowat = 10, .phiwat = 40, .blowat = (4 * 1024), .bhiwat = (64 * 1024) }, |
1851 | { .speed = 0, .plowat = 0, .phiwat = 0, .blowat = 0, .bhiwat = 0 } |
1852 | }; |
1853 | |
1854 | static LCK_MTX_DECLARE_ATTR(dlil_thread_sync_lock, &dlil_lock_group, |
1855 | &dlil_lck_attributes); |
1856 | static uint32_t dlil_pending_thread_cnt = 0; |
1857 | |
1858 | static void |
1859 | dlil_incr_pending_thread_count(void) |
1860 | { |
1861 | LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED); |
1862 | lck_mtx_lock(lck: &dlil_thread_sync_lock); |
1863 | dlil_pending_thread_cnt++; |
1864 | lck_mtx_unlock(lck: &dlil_thread_sync_lock); |
1865 | } |
1866 | |
1867 | static void |
1868 | dlil_decr_pending_thread_count(void) |
1869 | { |
1870 | LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED); |
1871 | lck_mtx_lock(lck: &dlil_thread_sync_lock); |
1872 | VERIFY(dlil_pending_thread_cnt > 0); |
1873 | dlil_pending_thread_cnt--; |
1874 | if (dlil_pending_thread_cnt == 0) { |
1875 | wakeup(chan: &dlil_pending_thread_cnt); |
1876 | } |
1877 | lck_mtx_unlock(lck: &dlil_thread_sync_lock); |
1878 | } |
1879 | |
1880 | int |
1881 | proto_hash_value(u_int32_t protocol_family) |
1882 | { |
1883 | /* |
1884 | * dlil_proto_unplumb_all() depends on the mapping between |
1885 | * the hash bucket index and the protocol family defined |
1886 | * here; future changes must be applied there as well. |
1887 | */ |
1888 | switch (protocol_family) { |
1889 | case PF_INET: |
1890 | return 0; |
1891 | case PF_INET6: |
1892 | return 1; |
1893 | case PF_VLAN: |
1894 | return 2; |
1895 | case PF_UNSPEC: |
1896 | default: |
1897 | return 3; |
1898 | } |
1899 | } |
1900 | |
1901 | /* |
1902 | * Caller must already be holding ifnet lock. |
1903 | */ |
1904 | static struct if_proto * |
1905 | find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family) |
1906 | { |
1907 | struct if_proto *proto = NULL; |
1908 | u_int32_t i = proto_hash_value(protocol_family); |
1909 | |
1910 | ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED); |
1911 | |
1912 | if (ifp->if_proto_hash != NULL) { |
1913 | proto = SLIST_FIRST(&ifp->if_proto_hash[i]); |
1914 | } |
1915 | |
1916 | while (proto != NULL && proto->protocol_family != protocol_family) { |
1917 | proto = SLIST_NEXT(proto, next_hash); |
1918 | } |
1919 | |
1920 | if (proto != NULL) { |
1921 | if_proto_ref(proto); |
1922 | } |
1923 | |
1924 | return proto; |
1925 | } |
1926 | |
1927 | static void |
1928 | if_proto_ref(struct if_proto *proto) |
1929 | { |
1930 | os_atomic_inc(&proto->refcount, relaxed); |
1931 | } |
1932 | |
1933 | extern void if_rtproto_del(struct ifnet *ifp, int protocol); |
1934 | |
1935 | static void |
1936 | if_proto_free(struct if_proto *proto) |
1937 | { |
1938 | u_int32_t oldval; |
1939 | struct ifnet *ifp = proto->ifp; |
1940 | u_int32_t proto_family = proto->protocol_family; |
1941 | struct kev_dl_proto_data ev_pr_data; |
1942 | |
1943 | oldval = os_atomic_dec_orig(&proto->refcount, relaxed); |
1944 | if (oldval > 1) { |
1945 | return; |
1946 | } |
1947 | |
1948 | if (proto->proto_kpi == kProtoKPI_v1) { |
1949 | if (proto->kpi.v1.detached) { |
1950 | proto->kpi.v1.detached(ifp, proto->protocol_family); |
1951 | } |
1952 | } |
1953 | if (proto->proto_kpi == kProtoKPI_v2) { |
1954 | if (proto->kpi.v2.detached) { |
1955 | proto->kpi.v2.detached(ifp, proto->protocol_family); |
1956 | } |
1957 | } |
1958 | |
1959 | /* |
1960 | * Cleanup routes that may still be in the routing table for that |
1961 | * interface/protocol pair. |
1962 | */ |
1963 | if_rtproto_del(ifp, protocol: proto_family); |
1964 | |
1965 | ifnet_lock_shared(ifp); |
1966 | |
1967 | /* No more reference on this, protocol must have been detached */ |
1968 | VERIFY(proto->detached); |
1969 | |
1970 | /* |
1971 | * The reserved field carries the number of protocol still attached |
1972 | * (subject to change) |
1973 | */ |
1974 | ev_pr_data.proto_family = proto_family; |
1975 | ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, list_count: 0); |
1976 | |
1977 | ifnet_lock_done(ifp); |
1978 | |
1979 | dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED, |
1980 | (struct net_event_data *)&ev_pr_data, |
1981 | sizeof(struct kev_dl_proto_data), FALSE); |
1982 | |
1983 | if (ev_pr_data.proto_remaining_count == 0) { |
1984 | /* |
1985 | * The protocol count has gone to zero, mark the interface down. |
1986 | * This used to be done by configd.KernelEventMonitor, but that |
1987 | * is inherently prone to races (rdar://problem/30810208). |
1988 | */ |
1989 | (void) ifnet_set_flags(interface: ifp, new_flags: 0, IFF_UP); |
1990 | (void) ifnet_ioctl(interface: ifp, protocol: 0, SIOCSIFFLAGS, NULL); |
1991 | dlil_post_sifflags_msg(ifp); |
1992 | } |
1993 | |
1994 | zfree(dlif_proto_zone, proto); |
1995 | } |
1996 | |
1997 | __private_extern__ void |
1998 | ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what) |
1999 | { |
2000 | #if !MACH_ASSERT |
2001 | #pragma unused(ifp) |
2002 | #endif |
2003 | unsigned int type = 0; |
2004 | int ass = 1; |
2005 | |
2006 | switch (what) { |
2007 | case IFNET_LCK_ASSERT_EXCLUSIVE: |
2008 | type = LCK_RW_ASSERT_EXCLUSIVE; |
2009 | break; |
2010 | |
2011 | case IFNET_LCK_ASSERT_SHARED: |
2012 | type = LCK_RW_ASSERT_SHARED; |
2013 | break; |
2014 | |
2015 | case IFNET_LCK_ASSERT_OWNED: |
2016 | type = LCK_RW_ASSERT_HELD; |
2017 | break; |
2018 | |
2019 | case IFNET_LCK_ASSERT_NOTOWNED: |
2020 | /* nothing to do here for RW lock; bypass assert */ |
2021 | ass = 0; |
2022 | break; |
2023 | |
2024 | default: |
2025 | panic("bad ifnet assert type: %d" , what); |
2026 | /* NOTREACHED */ |
2027 | } |
2028 | if (ass) { |
2029 | LCK_RW_ASSERT(&ifp->if_lock, type); |
2030 | } |
2031 | } |
2032 | |
2033 | __private_extern__ void |
2034 | ifnet_lock_shared(struct ifnet *ifp) |
2035 | { |
2036 | lck_rw_lock_shared(lck: &ifp->if_lock); |
2037 | } |
2038 | |
2039 | __private_extern__ void |
2040 | ifnet_lock_exclusive(struct ifnet *ifp) |
2041 | { |
2042 | lck_rw_lock_exclusive(lck: &ifp->if_lock); |
2043 | } |
2044 | |
2045 | __private_extern__ void |
2046 | ifnet_lock_done(struct ifnet *ifp) |
2047 | { |
2048 | lck_rw_done(lck: &ifp->if_lock); |
2049 | } |
2050 | |
2051 | #if INET |
2052 | __private_extern__ void |
2053 | if_inetdata_lock_shared(struct ifnet *ifp) |
2054 | { |
2055 | lck_rw_lock_shared(lck: &ifp->if_inetdata_lock); |
2056 | } |
2057 | |
2058 | __private_extern__ void |
2059 | if_inetdata_lock_exclusive(struct ifnet *ifp) |
2060 | { |
2061 | lck_rw_lock_exclusive(lck: &ifp->if_inetdata_lock); |
2062 | } |
2063 | |
2064 | __private_extern__ void |
2065 | if_inetdata_lock_done(struct ifnet *ifp) |
2066 | { |
2067 | lck_rw_done(lck: &ifp->if_inetdata_lock); |
2068 | } |
2069 | #endif |
2070 | |
2071 | __private_extern__ void |
2072 | if_inet6data_lock_shared(struct ifnet *ifp) |
2073 | { |
2074 | lck_rw_lock_shared(lck: &ifp->if_inet6data_lock); |
2075 | } |
2076 | |
2077 | __private_extern__ void |
2078 | if_inet6data_lock_exclusive(struct ifnet *ifp) |
2079 | { |
2080 | lck_rw_lock_exclusive(lck: &ifp->if_inet6data_lock); |
2081 | } |
2082 | |
2083 | __private_extern__ void |
2084 | if_inet6data_lock_done(struct ifnet *ifp) |
2085 | { |
2086 | lck_rw_done(lck: &ifp->if_inet6data_lock); |
2087 | } |
2088 | |
2089 | __private_extern__ void |
2090 | ifnet_head_lock_shared(void) |
2091 | { |
2092 | lck_rw_lock_shared(lck: &ifnet_head_lock); |
2093 | } |
2094 | |
2095 | __private_extern__ void |
2096 | ifnet_head_lock_exclusive(void) |
2097 | { |
2098 | lck_rw_lock_exclusive(lck: &ifnet_head_lock); |
2099 | } |
2100 | |
2101 | __private_extern__ void |
2102 | ifnet_head_done(void) |
2103 | { |
2104 | lck_rw_done(lck: &ifnet_head_lock); |
2105 | } |
2106 | |
2107 | __private_extern__ void |
2108 | ifnet_head_assert_exclusive(void) |
2109 | { |
2110 | LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE); |
2111 | } |
2112 | |
2113 | /* |
2114 | * dlil_ifp_protolist |
2115 | * - get the list of protocols attached to the interface, or just the number |
2116 | * of attached protocols |
2117 | * - if the number returned is greater than 'list_count', truncation occurred |
2118 | * |
2119 | * Note: |
2120 | * - caller must already be holding ifnet lock. |
2121 | */ |
2122 | static u_int32_t |
2123 | dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list, |
2124 | u_int32_t list_count) |
2125 | { |
2126 | u_int32_t count = 0; |
2127 | int i; |
2128 | |
2129 | ifnet_lock_assert(ifp, what: IFNET_LCK_ASSERT_OWNED); |
2130 | |
2131 | if (ifp->if_proto_hash == NULL) { |
2132 | goto done; |
2133 | } |
2134 | |
2135 | for (i = 0; i < PROTO_HASH_SLOTS; i++) { |
2136 | struct if_proto *proto; |
2137 | SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) { |
2138 | if (list != NULL && count < list_count) { |
2139 | list[count] = proto->protocol_family; |
2140 | } |
2141 | count++; |
2142 | } |
2143 | } |
2144 | done: |
2145 | return count; |
2146 | } |
2147 | |
2148 | __private_extern__ u_int32_t |
2149 | if_get_protolist(struct ifnet * ifp, u_int32_t *protolist, u_int32_t count) |
2150 | { |
2151 | ifnet_lock_shared(ifp); |
2152 | count = dlil_ifp_protolist(ifp, list: protolist, list_count: count); |
2153 | ifnet_lock_done(ifp); |
2154 | return count; |
2155 | } |
2156 | |
2157 | __private_extern__ void |
2158 | if_free_protolist(u_int32_t *list) |
2159 | { |
2160 | kfree_data_addr(list); |
2161 | } |
2162 | |
2163 | __private_extern__ int |
2164 | dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass, |
2165 | u_int32_t event_code, struct net_event_data *event_data, |
2166 | u_int32_t event_data_len, boolean_t suppress_generation) |
2167 | { |
2168 | struct net_event_data ev_data; |
2169 | struct kev_msg ev_msg; |
2170 | |
2171 | bzero(s: &ev_msg, n: sizeof(ev_msg)); |
2172 | bzero(s: &ev_data, n: sizeof(ev_data)); |
2173 | /* |
2174 | * a net event always starts with a net_event_data structure |
2175 | * but the caller can generate a simple net event or |
2176 | * provide a longer event structure to post |
2177 | */ |
2178 | ev_msg.vendor_code = KEV_VENDOR_APPLE; |
2179 | ev_msg.kev_class = KEV_NETWORK_CLASS; |
2180 | ev_msg.kev_subclass = event_subclass; |
2181 | ev_msg.event_code = event_code; |
2182 | |
2183 | if (event_data == NULL) { |
2184 | event_data = &ev_data; |
2185 | event_data_len = sizeof(struct net_event_data); |
2186 | } |
2187 | |
2188 | strlcpy(dst: &event_data->if_name[0], src: ifp->if_name, IFNAMSIZ); |
2189 | event_data->if_family = ifp->if_family; |
2190 | event_data->if_unit = (u_int32_t)ifp->if_unit; |
2191 | |
2192 | ev_msg.dv[0].data_length = event_data_len; |
2193 | ev_msg.dv[0].data_ptr = event_data; |
2194 | ev_msg.dv[1].data_length = 0; |
2195 | |
2196 | bool update_generation = true; |
2197 | if (event_subclass == KEV_DL_SUBCLASS) { |
2198 | /* Don't update interface generation for frequent link quality and state changes */ |
2199 | switch (event_code) { |
2200 | case KEV_DL_LINK_QUALITY_METRIC_CHANGED: |
2201 | case KEV_DL_RRC_STATE_CHANGED: |
2202 | case KEV_DL_PRIMARY_ELECTED: |
2203 | update_generation = false; |
2204 | break; |
2205 | default: |
2206 | break; |
2207 | } |
2208 | } |
2209 | |
2210 | /* |
2211 | * Some events that update generation counts might |
2212 | * want to suppress generation count. |
2213 | * One example is node presence/absence where we still |
2214 | * issue kernel event for the invocation but want to avoid |
2215 | * expensive operation of updating generation which triggers |
2216 | * NECP client updates. |
2217 | */ |
2218 | if (suppress_generation) { |
2219 | update_generation = false; |
2220 | } |
2221 | |
2222 | return dlil_event_internal(ifp, msg: &ev_msg, update_generation); |
2223 | } |
2224 | |
2225 | __private_extern__ int |
2226 | dlil_alloc_local_stats(struct ifnet *ifp) |
2227 | { |
2228 | int ret = EINVAL; |
2229 | void *buf, *base, **pbuf; |
2230 | |
2231 | if (ifp == NULL) { |
2232 | goto end; |
2233 | } |
2234 | |
2235 | if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) { |
2236 | /* allocate tcpstat_local structure */ |
2237 | buf = zalloc_flags(dlif_tcpstat_zone, |
2238 | Z_WAITOK | Z_ZERO | Z_NOFAIL); |
2239 | |
2240 | /* Get the 64-bit aligned base address for this object */ |
2241 | base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t), |
2242 | sizeof(u_int64_t)); |
2243 | VERIFY(((intptr_t)base + dlif_tcpstat_size) <= |
2244 | ((intptr_t)buf + dlif_tcpstat_bufsize)); |
2245 | |
2246 | /* |
2247 | * Wind back a pointer size from the aligned base and |
2248 | * save the original address so we can free it later. |
2249 | */ |
2250 | pbuf = (void **)((intptr_t)base - sizeof(void *)); |
2251 | *pbuf = buf; |
2252 | ifp->if_tcp_stat = base; |
2253 | |
2254 | /* allocate udpstat_local structure */ |
2255 | buf = zalloc_flags(dlif_udpstat_zone, |
2256 | Z_WAITOK | Z_ZERO | Z_NOFAIL); |
2257 | |
2258 | /* Get the 64-bit aligned base address for this object */ |
2259 | base = (void *)P2ROUNDUP((intptr_t)buf + sizeof(u_int64_t), |
2260 | sizeof(u_int64_t)); |
2261 | VERIFY(((intptr_t)base + dlif_udpstat_size) <= |
2262 | ((intptr_t)buf + dlif_udpstat_bufsize)); |
2263 | |
2264 | /* |
2265 | * Wind back a pointer size from the aligned base and |
2266 | * save the original address so we can free it later. |
2267 | */ |
2268 | pbuf = (void **)((intptr_t)base - sizeof(void *)); |
2269 | *pbuf = buf; |
2270 | ifp->if_udp_stat = base; |
2271 | |
2272 | VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) && |
2273 | IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t))); |
2274 | |
2275 | ret = 0; |
2276 | } |
2277 | |
2278 | if (ifp->if_ipv4_stat == NULL) { |
2279 | ifp->if_ipv4_stat = kalloc_type(struct if_tcp_ecn_stat, Z_WAITOK | Z_ZERO); |
2280 | } |
2281 | |
2282 | if (ifp->if_ipv6_stat == NULL) { |
2283 | ifp->if_ipv6_stat = kalloc_type(struct if_tcp_ecn_stat, Z_WAITOK | Z_ZERO); |
2284 | } |
2285 | end: |
2286 | if (ifp != NULL && ret != 0) { |
2287 | if (ifp->if_tcp_stat != NULL) { |
2288 | pbuf = (void **) |
2289 | ((intptr_t)ifp->if_tcp_stat - sizeof(void *)); |
2290 | zfree(dlif_tcpstat_zone, *pbuf); |
2291 | ifp->if_tcp_stat = NULL; |
2292 | } |
2293 | if (ifp->if_udp_stat != NULL) { |
2294 | pbuf = (void **) |
2295 | ((intptr_t)ifp->if_udp_stat - sizeof(void *)); |
2296 | zfree(dlif_udpstat_zone, *pbuf); |
2297 | ifp->if_udp_stat = NULL; |
2298 | } |
2299 | /* The macro kfree_type sets the passed pointer to NULL */ |
2300 | if (ifp->if_ipv4_stat != NULL) { |
2301 | kfree_type(struct if_tcp_ecn_stat, ifp->if_ipv4_stat); |
2302 | } |
2303 | if (ifp->if_ipv6_stat != NULL) { |
2304 | kfree_type(struct if_tcp_ecn_stat, ifp->if_ipv6_stat); |
2305 | } |
2306 | } |
2307 | |
2308 | return ret; |
2309 | } |
2310 | |
2311 | static void |
2312 | dlil_reset_rxpoll_params(ifnet_t ifp) |
2313 | { |
2314 | ASSERT(ifp != NULL); |
2315 | ifnet_set_poll_cycle(ifp, NULL); |
2316 | ifp->if_poll_update = 0; |
2317 | ifp->if_poll_flags = 0; |
2318 | ifp->if_poll_req = 0; |
2319 | ifp->if_poll_mode = IFNET_MODEL_INPUT_POLL_OFF; |
2320 | bzero(s: &ifp->if_poll_tstats, n: sizeof(ifp->if_poll_tstats)); |
2321 | bzero(s: &ifp->if_poll_pstats, n: sizeof(ifp->if_poll_pstats)); |
2322 | bzero(s: &ifp->if_poll_sstats, n: sizeof(ifp->if_poll_sstats)); |
2323 | net_timerclear(&ifp->if_poll_mode_holdtime); |
2324 | net_timerclear(&ifp->if_poll_mode_lasttime); |
2325 | net_timerclear(&ifp->if_poll_sample_holdtime); |
2326 | net_timerclear(&ifp->if_poll_sample_lasttime); |
2327 | net_timerclear(&ifp->if_poll_dbg_lasttime); |
2328 | } |
2329 | |
2330 | static int |
2331 | dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp, |
2332 | thread_continue_t *thfunc) |
2333 | { |
2334 | boolean_t dlil_rxpoll_input; |
2335 | thread_continue_t func = NULL; |
2336 | u_int32_t limit; |
2337 | int error = 0; |
2338 | |
2339 | dlil_rxpoll_input = (ifp != NULL && net_rxpoll && |
2340 | (ifp->if_eflags & IFEF_RXPOLL) && (ifp->if_xflags & IFXF_LEGACY)); |
2341 | |
2342 | /* default strategy utilizes the DLIL worker thread */ |
2343 | inp->dlth_strategy = dlil_input_async; |
2344 | |
2345 | /* NULL ifp indicates the main input thread, called at dlil_init time */ |
2346 | if (ifp == NULL) { |
2347 | /* |
2348 | * Main input thread only. |
2349 | */ |
2350 | func = dlil_main_input_thread_func; |
2351 | VERIFY(inp == dlil_main_input_thread); |
2352 | (void) strlcat(dst: inp->dlth_name, |
2353 | src: "main_input" , DLIL_THREADNAME_LEN); |
2354 | } else if (dlil_rxpoll_input) { |
2355 | /* |
2356 | * Legacy (non-netif) hybrid polling. |
2357 | */ |
2358 | func = dlil_rxpoll_input_thread_func; |
2359 | VERIFY(inp != dlil_main_input_thread); |
2360 | (void) snprintf(inp->dlth_name, DLIL_THREADNAME_LEN, |
2361 | "%s_input_poll" , if_name(ifp)); |
2362 | } else if (net_async || (ifp->if_xflags & IFXF_LEGACY)) { |
2363 | /* |
2364 | * Asynchronous strategy. |
2365 | */ |
2366 | func = dlil_input_thread_func; |
2367 | VERIFY(inp != dlil_main_input_thread); |
2368 | (void) snprintf(inp->dlth_name, DLIL_THREADNAME_LEN, |
2369 | "%s_input" , if_name(ifp)); |
2370 | } else { |
2371 | /* |
2372 | * Synchronous strategy if there's a netif below and |
2373 | * the device isn't capable of hybrid polling. |
2374 | */ |
2375 | ASSERT(func == NULL); |
2376 | ASSERT(!(ifp->if_xflags & IFXF_LEGACY)); |
2377 | VERIFY(inp != dlil_main_input_thread); |
2378 | ASSERT(!inp->dlth_affinity); |
2379 | inp->dlth_strategy = dlil_input_sync; |
2380 | } |
2381 | VERIFY(inp->dlth_thread == THREAD_NULL); |
2382 | |
2383 | /* let caller know */ |
2384 | if (thfunc != NULL) { |
2385 | *thfunc = func; |
2386 | } |
2387 | |
2388 | inp->dlth_lock_grp = lck_grp_alloc_init(grp_name: inp->dlth_name, LCK_GRP_ATTR_NULL); |
2389 | lck_mtx_init(lck: &inp->dlth_lock, grp: inp->dlth_lock_grp, attr: &dlil_lck_attributes); |
2390 | |
2391 | inp->dlth_ifp = ifp; /* NULL for main input thread */ |
2392 | |
2393 | /* |
2394 | * For interfaces that support opportunistic polling, set the |
2395 | * low and high watermarks for outstanding inbound packets/bytes. |
2396 | * Also define freeze times for transitioning between modes |
2397 | * and updating the average. |
2398 | */ |
2399 | if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) { |
2400 | limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN); |
2401 | if (ifp->if_xflags & IFXF_LEGACY) { |
2402 | (void) dlil_rxpoll_set_params(ifp, NULL, FALSE); |
2403 | } |
2404 | } else { |
2405 | /* |
2406 | * For interfaces that don't support opportunistic |
2407 | * polling, set the burst limit to prevent memory exhaustion. |
2408 | * The values of `if_rcvq_burst_limit' are safeguarded |
2409 | * on customer builds by `sysctl_rcvq_burst_limit'. |
2410 | */ |
2411 | limit = if_rcvq_burst_limit; |
2412 | } |
2413 | |
2414 | _qinit(&inp->dlth_pkts, Q_DROPTAIL, limit, QP_MBUF); |
2415 | if (inp == dlil_main_input_thread) { |
2416 | struct dlil_main_threading_info *inpm = |
2417 | (struct dlil_main_threading_info *)inp; |
2418 | _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit, QP_MBUF); |
2419 | } |
2420 | |
2421 | if (func == NULL) { |
2422 | ASSERT(!(ifp->if_xflags & IFXF_LEGACY)); |
2423 | ASSERT(error == 0); |
2424 | error = ENODEV; |
2425 | goto done; |
2426 | } |
2427 | |
2428 | error = kernel_thread_start(continuation: func, parameter: inp, new_thread: &inp->dlth_thread); |
2429 | if (error == KERN_SUCCESS) { |
2430 | thread_precedence_policy_data_t info; |
2431 | __unused kern_return_t kret; |
2432 | |
2433 | bzero(s: &info, n: sizeof(info)); |
2434 | info.importance = 0; |
2435 | kret = thread_policy_set(thread: inp->dlth_thread, |
2436 | THREAD_PRECEDENCE_POLICY, policy_info: (thread_policy_t)&info, |
2437 | THREAD_PRECEDENCE_POLICY_COUNT); |
2438 | ASSERT(kret == KERN_SUCCESS); |
2439 | /* |
2440 | * We create an affinity set so that the matching workloop |
2441 | * thread or the starter thread (for loopback) can be |
2442 | * scheduled on the same processor set as the input thread. |
2443 | */ |
2444 | if (net_affinity) { |
2445 | struct thread *tp = inp->dlth_thread; |
2446 | u_int32_t tag; |
2447 | /* |
2448 | * Randomize to reduce the probability |
2449 | * of affinity tag namespace collision. |
2450 | */ |
2451 | read_frandom(buffer: &tag, numBytes: sizeof(tag)); |
2452 | if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) { |
2453 | thread_reference(thread: tp); |
2454 | inp->dlth_affinity_tag = tag; |
2455 | inp->dlth_affinity = TRUE; |
2456 | } |
2457 | } |
2458 | } else if (inp == dlil_main_input_thread) { |
2459 | panic_plain("%s: couldn't create main input thread" , __func__); |
2460 | /* NOTREACHED */ |
2461 | } else { |
2462 | panic_plain("%s: couldn't create %s input thread" , __func__, |
2463 | if_name(ifp)); |
2464 | /* NOTREACHED */ |
2465 | } |
2466 | OSAddAtomic(1, &cur_dlil_input_threads); |
2467 | |
2468 | done: |
2469 | return error; |
2470 | } |
2471 | |
2472 | #if TEST_INPUT_THREAD_TERMINATION |
2473 | static int |
2474 | sysctl_input_thread_termination_spin SYSCTL_HANDLER_ARGS |
2475 | { |
2476 | #pragma unused(arg1, arg2) |
2477 | uint32_t i; |
2478 | int err; |
2479 | |
2480 | i = if_input_thread_termination_spin; |
2481 | |
2482 | err = sysctl_handle_int(oidp, &i, 0, req); |
2483 | if (err != 0 || req->newptr == USER_ADDR_NULL) { |
2484 | return err; |
2485 | } |
2486 | |
2487 | if (net_rxpoll == 0) { |
2488 | return ENXIO; |
2489 | } |
2490 | |
2491 | if_input_thread_termination_spin = i; |
2492 | return err; |
2493 | } |
2494 | #endif /* TEST_INPUT_THREAD_TERMINATION */ |
2495 | |
2496 | static void |
2497 | dlil_clean_threading_info(struct dlil_threading_info *inp) |
2498 | { |
2499 | lck_mtx_destroy(lck: &inp->dlth_lock, grp: inp->dlth_lock_grp); |
2500 | lck_grp_free(grp: inp->dlth_lock_grp); |
2501 | inp->dlth_lock_grp = NULL; |
2502 | |
2503 | inp->dlth_flags = 0; |
2504 | inp->dlth_wtot = 0; |
2505 | bzero(s: inp->dlth_name, n: sizeof(inp->dlth_name)); |
2506 | inp->dlth_ifp = NULL; |
2507 | VERIFY(qhead(&inp->dlth_pkts) == NULL && qempty(&inp->dlth_pkts)); |
2508 | qlimit(&inp->dlth_pkts) = 0; |
2509 | bzero(s: &inp->dlth_stats, n: sizeof(inp->dlth_stats)); |
2510 | |
2511 | VERIFY(!inp->dlth_affinity); |
2512 | inp->dlth_thread = THREAD_NULL; |
2513 | inp->dlth_strategy = NULL; |
2514 | VERIFY(inp->dlth_driver_thread == THREAD_NULL); |
2515 | VERIFY(inp->dlth_poller_thread == THREAD_NULL); |
2516 | VERIFY(inp->dlth_affinity_tag == 0); |
2517 | #if IFNET_INPUT_SANITY_CHK |
2518 | inp->dlth_pkts_cnt = 0; |
2519 | #endif /* IFNET_INPUT_SANITY_CHK */ |
2520 | } |
2521 | |
2522 | static void |
2523 | dlil_terminate_input_thread(struct dlil_threading_info *inp) |
2524 | { |
2525 | struct ifnet *ifp = inp->dlth_ifp; |
2526 | classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt); |
2527 | |
2528 | VERIFY(current_thread() == inp->dlth_thread); |
2529 | VERIFY(inp != dlil_main_input_thread); |
2530 | |
2531 | OSAddAtomic(-1, &cur_dlil_input_threads); |
2532 | |
2533 | #if TEST_INPUT_THREAD_TERMINATION |
2534 | { /* do something useless that won't get optimized away */ |
2535 | uint32_t v = 1; |
2536 | for (uint32_t i = 0; |
2537 | i < if_input_thread_termination_spin; |
2538 | i++) { |
2539 | v = (i + 1) * v; |
2540 | } |
2541 | DLIL_PRINTF("the value is %d\n" , v); |
2542 | } |
2543 | #endif /* TEST_INPUT_THREAD_TERMINATION */ |
2544 | |
2545 | lck_mtx_lock_spin(lck: &inp->dlth_lock); |
2546 | _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL); |
2547 | VERIFY((inp->dlth_flags & DLIL_INPUT_TERMINATE) != 0); |
2548 | inp->dlth_flags |= DLIL_INPUT_TERMINATE_COMPLETE; |
2549 | wakeup_one(chan: (caddr_t)&inp->dlth_flags); |
2550 | lck_mtx_unlock(lck: &inp->dlth_lock); |
2551 | |
2552 | /* free up pending packets */ |
2553 | if (pkt.cp_mbuf != NULL) { |
2554 | mbuf_freem_list(mbuf: pkt.cp_mbuf); |
2555 | } |
2556 | |
2557 | /* for the extra refcnt from kernel_thread_start() */ |
2558 | thread_deallocate(thread: current_thread()); |
2559 | |
2560 | if (dlil_verbose) { |
2561 | DLIL_PRINTF("%s: input thread terminated\n" , |
2562 | if_name(ifp)); |
2563 | } |
2564 | |
2565 | /* this is the end */ |
2566 | thread_terminate(target_act: current_thread()); |
2567 | /* NOTREACHED */ |
2568 | } |
2569 | |
2570 | static kern_return_t |
2571 | dlil_affinity_set(struct thread *tp, u_int32_t tag) |
2572 | { |
2573 | thread_affinity_policy_data_t policy; |
2574 | |
2575 | bzero(s: &policy, n: sizeof(policy)); |
2576 | policy.affinity_tag = tag; |
2577 | return thread_policy_set(thread: tp, THREAD_AFFINITY_POLICY, |
2578 | policy_info: (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT); |
2579 | } |
2580 | |
2581 | #if SKYWALK && defined(XNU_TARGET_OS_OSX) |
2582 | static void |
2583 | dlil_filter_event(struct eventhandler_entry_arg arg __unused, |
2584 | enum net_filter_event_subsystems state) |
2585 | { |
2586 | bool old_if_enable_fsw_transport_netagent = if_enable_fsw_transport_netagent; |
2587 | if ((state & ~NET_FILTER_EVENT_PF_PRIVATE_PROXY) == 0) { |
2588 | if_enable_fsw_transport_netagent = 1; |
2589 | } else { |
2590 | if_enable_fsw_transport_netagent = 0; |
2591 | } |
2592 | if (old_if_enable_fsw_transport_netagent != if_enable_fsw_transport_netagent) { |
2593 | kern_nexus_update_netagents(); |
2594 | } else if (!if_enable_fsw_transport_netagent) { |
2595 | necp_update_all_clients(); |
2596 | } |
2597 | } |
2598 | #endif /* SKYWALK && XNU_TARGET_OS_OSX */ |
2599 | |
2600 | void |
2601 | dlil_init(void) |
2602 | { |
2603 | thread_t thread = THREAD_NULL; |
2604 | |
2605 | /* |
2606 | * The following fields must be 64-bit aligned for atomic operations. |
2607 | */ |
2608 | IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets); |
2609 | IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors); |
2610 | IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets); |
2611 | IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors); |
2612 | IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions); |
2613 | IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes); |
2614 | IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes); |
2615 | IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts); |
2616 | IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts); |
2617 | IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops); |
2618 | IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto); |
2619 | IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs); |
2620 | IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes); |
2621 | IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets); |
2622 | IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes); |
2623 | |
2624 | IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets); |
2625 | IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors); |
2626 | IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets); |
2627 | IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors); |
2628 | IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions); |
2629 | IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes); |
2630 | IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes); |
2631 | IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts); |
2632 | IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts); |
2633 | IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops); |
2634 | IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto); |
2635 | IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs); |
2636 | IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes); |
2637 | IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets); |
2638 | IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes); |
2639 | |
2640 | /* |
2641 | * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts. |
2642 | */ |
2643 | _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP); |
2644 | _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP); |
2645 | _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP); |
2646 | _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT); |
2647 | _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT); |
2648 | _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6); |
2649 | _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6); |
2650 | _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT); |
2651 | _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL); |
2652 | _CASSERT(IF_HWASSIST_CSUM_ZERO_INVERT == IFNET_CSUM_ZERO_INVERT); |
2653 | _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING); |
2654 | _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU); |
2655 | _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4); |
2656 | _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6); |
2657 | |
2658 | /* |
2659 | * ... as well as the mbuf checksum flags counterparts. |
2660 | */ |
2661 | _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP); |
2662 | _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP); |
2663 | _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP); |
2664 | _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS); |
2665 | _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT); |
2666 | _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6); |
2667 | _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6); |
2668 | _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6); |
2669 | _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL); |
2670 | _CASSERT(CSUM_ZERO_INVERT == IF_HWASSIST_CSUM_ZERO_INVERT); |
2671 | _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING); |
2672 | |
2673 | /* |
2674 | * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info. |
2675 | */ |
2676 | _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN); |
2677 | _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN); |
2678 | |
2679 | _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL); |
2680 | _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY); |
2681 | _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER); |
2682 | _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE); |
2683 | |
2684 | _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY); |
2685 | _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY); |
2686 | _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE); |
2687 | |
2688 | _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY); |
2689 | _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK); |
2690 | _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET); |
2691 | _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP); |
2692 | _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN); |
2693 | _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN); |
2694 | _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP); |
2695 | _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC); |
2696 | _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC); |
2697 | _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP); |
2698 | _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF); |
2699 | _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH); |
2700 | _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF); |
2701 | _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE); |
2702 | _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND); |
2703 | _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR); |
2704 | _CASSERT(IFRTYPE_FAMILY_UTUN == IFNET_FAMILY_UTUN); |
2705 | _CASSERT(IFRTYPE_FAMILY_IPSEC == IFNET_FAMILY_IPSEC); |
2706 | |
2707 | _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY); |
2708 | _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB); |
2709 | _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH); |
2710 | _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI); |
2711 | _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT); |
2712 | _CASSERT(IFRTYPE_SUBFAMILY_RESERVED == IFNET_SUBFAMILY_RESERVED); |
2713 | _CASSERT(IFRTYPE_SUBFAMILY_INTCOPROC == IFNET_SUBFAMILY_INTCOPROC); |
2714 | _CASSERT(IFRTYPE_SUBFAMILY_QUICKRELAY == IFNET_SUBFAMILY_QUICKRELAY); |
2715 | _CASSERT(IFRTYPE_SUBFAMILY_VMNET == IFNET_SUBFAMILY_VMNET); |
2716 | _CASSERT(IFRTYPE_SUBFAMILY_SIMCELL == IFNET_SUBFAMILY_SIMCELL); |
2717 | _CASSERT(IFRTYPE_SUBFAMILY_MANAGEMENT == IFNET_SUBFAMILY_MANAGEMENT); |
2718 | |
2719 | _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN); |
2720 | _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN); |
2721 | |
2722 | PE_parse_boot_argn(arg_string: "net_affinity" , arg_ptr: &net_affinity, |
2723 | max_arg: sizeof(net_affinity)); |
2724 | |
2725 | PE_parse_boot_argn(arg_string: "net_rxpoll" , arg_ptr: &net_rxpoll, max_arg: sizeof(net_rxpoll)); |
2726 | |
2727 | PE_parse_boot_argn(arg_string: "net_rtref" , arg_ptr: &net_rtref, max_arg: sizeof(net_rtref)); |
2728 | |
2729 | PE_parse_boot_argn(arg_string: "net_async" , arg_ptr: &net_async, max_arg: sizeof(net_async)); |
2730 | |
2731 | PE_parse_boot_argn(arg_string: "ifnet_debug" , arg_ptr: &ifnet_debug, max_arg: sizeof(ifnet_debug)); |
2732 | |
2733 | VERIFY(dlil_pending_thread_cnt == 0); |
2734 | #if SKYWALK |
2735 | boolean_t pe_enable_fsw_transport_netagent = FALSE; |
2736 | boolean_t pe_disable_fsw_transport_netagent = FALSE; |
2737 | boolean_t enable_fsw_netagent = |
2738 | (((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0) || |
2739 | (if_attach_nx & IF_ATTACH_NX_FSW_IP_NETAGENT) != 0); |
2740 | |
2741 | /* |
2742 | * Check the device tree to see if Skywalk netagent has been explicitly |
2743 | * enabled or disabled. This can be overridden via if_attach_nx below. |
2744 | * Note that the property is a 0-length key, and so checking for the |
2745 | * presence itself is enough (no need to check for the actual value of |
2746 | * the retrieved variable.) |
2747 | */ |
2748 | pe_enable_fsw_transport_netagent = |
2749 | PE_get_default(property_name: "kern.skywalk_netagent_enable" , |
2750 | property_ptr: &pe_enable_fsw_transport_netagent, |
2751 | max_property: sizeof(pe_enable_fsw_transport_netagent)); |
2752 | pe_disable_fsw_transport_netagent = |
2753 | PE_get_default(property_name: "kern.skywalk_netagent_disable" , |
2754 | property_ptr: &pe_disable_fsw_transport_netagent, |
2755 | max_property: sizeof(pe_disable_fsw_transport_netagent)); |
2756 | |
2757 | /* |
2758 | * These two are mutually exclusive, i.e. they both can be absent, |
2759 | * but only one can be present at a time, and so we assert to make |
2760 | * sure it is correct. |
2761 | */ |
2762 | VERIFY((!pe_enable_fsw_transport_netagent && |
2763 | !pe_disable_fsw_transport_netagent) || |
2764 | (pe_enable_fsw_transport_netagent ^ |
2765 | pe_disable_fsw_transport_netagent)); |
2766 | |
2767 | if (pe_enable_fsw_transport_netagent) { |
2768 | kprintf(fmt: "SK: netagent is enabled via an override for " |
2769 | "this platform\n" ); |
2770 | if_attach_nx = SKYWALK_NETWORKING_ENABLED; |
2771 | } else if (pe_disable_fsw_transport_netagent) { |
2772 | kprintf(fmt: "SK: netagent is disabled via an override for " |
2773 | "this platform\n" ); |
2774 | if_attach_nx = SKYWALK_NETWORKING_DISABLED; |
2775 | } else { |
2776 | kprintf(fmt: "SK: netagent is %s by default for this platform\n" , |
2777 | (enable_fsw_netagent ? "enabled" : "disabled" )); |
2778 | if_attach_nx = IF_ATTACH_NX_DEFAULT; |
2779 | } |
2780 | |
2781 | /* |
2782 | * Now see if there's a boot-arg override. |
2783 | */ |
2784 | (void) PE_parse_boot_argn(arg_string: "if_attach_nx" , arg_ptr: &if_attach_nx, |
2785 | max_arg: sizeof(if_attach_nx)); |
2786 | if_enable_fsw_transport_netagent = |
2787 | ((if_attach_nx & IF_ATTACH_NX_FSW_TRANSPORT_NETAGENT) != 0); |
2788 | |
2789 | if_netif_all = ((if_attach_nx & IF_ATTACH_NX_NETIF_ALL) != 0); |
2790 | |
2791 | if (pe_disable_fsw_transport_netagent && |
2792 | if_enable_fsw_transport_netagent) { |
2793 | kprintf(fmt: "SK: netagent is force-enabled\n" ); |
2794 | } else if (!pe_disable_fsw_transport_netagent && |
2795 | !if_enable_fsw_transport_netagent) { |
2796 | kprintf(fmt: "SK: netagent is force-disabled\n" ); |
2797 | } |
2798 | #ifdef XNU_TARGET_OS_OSX |
2799 | if (if_enable_fsw_transport_netagent) { |
2800 | net_filter_event_register(callback: dlil_filter_event); |
2801 | } |
2802 | #endif /* XNU_TARGET_OS_OSX */ |
2803 | |
2804 | #if (DEVELOPMENT || DEBUG) |
2805 | (void) PE_parse_boot_argn("fsw_use_max_mtu_buffer" , |
2806 | &fsw_use_max_mtu_buffer, sizeof(fsw_use_max_mtu_buffer)); |
2807 | #endif /* (DEVELOPMENT || DEBUG) */ |
2808 | |
2809 | #endif /* SKYWALK */ |
2810 | dlif_size = (ifnet_debug == 0) ? sizeof(struct dlil_ifnet) : |
2811 | sizeof(struct dlil_ifnet_dbg); |
2812 | /* Enforce 64-bit alignment for dlil_ifnet structure */ |
2813 | dlif_bufsize = dlif_size + sizeof(void *) + sizeof(u_int64_t); |
2814 | dlif_bufsize = (uint32_t)P2ROUNDUP(dlif_bufsize, sizeof(u_int64_t)); |
2815 | dlif_zone = zone_create(DLIF_ZONE_NAME, size: dlif_bufsize, flags: ZC_ZFREE_CLEARMEM); |
2816 | |
2817 | dlif_tcpstat_size = sizeof(struct tcpstat_local); |
2818 | /* Enforce 64-bit alignment for tcpstat_local structure */ |
2819 | dlif_tcpstat_bufsize = |
2820 | dlif_tcpstat_size + sizeof(void *) + sizeof(u_int64_t); |
2821 | dlif_tcpstat_bufsize = (uint32_t) |
2822 | P2ROUNDUP(dlif_tcpstat_bufsize, sizeof(u_int64_t)); |
2823 | dlif_tcpstat_zone = zone_create(DLIF_TCPSTAT_ZONE_NAME, |
2824 | size: dlif_tcpstat_bufsize, flags: ZC_ZFREE_CLEARMEM); |
2825 | |
2826 | dlif_udpstat_size = sizeof(struct udpstat_local); |
2827 | /* Enforce 64-bit alignment for udpstat_local structure */ |
2828 | dlif_udpstat_bufsize = |
2829 | dlif_udpstat_size + sizeof(void *) + sizeof(u_int64_t); |
2830 | dlif_udpstat_bufsize = (uint32_t) |
2831 | P2ROUNDUP(dlif_udpstat_bufsize, sizeof(u_int64_t)); |
2832 | dlif_udpstat_zone = zone_create(DLIF_UDPSTAT_ZONE_NAME, |
2833 | size: dlif_udpstat_bufsize, flags: ZC_ZFREE_CLEARMEM); |
2834 | |
2835 | eventhandler_lists_ctxt_init(evthdlr_lists_ctxt: &ifnet_evhdlr_ctxt); |
2836 | |
2837 | TAILQ_INIT(&dlil_ifnet_head); |
2838 | TAILQ_INIT(&ifnet_head); |
2839 | TAILQ_INIT(&ifnet_detaching_head); |
2840 | TAILQ_INIT(&ifnet_ordered_head); |
2841 | |
2842 | /* Initialize interface address subsystem */ |
2843 | ifa_init(); |
2844 | |
2845 | #if PF |
2846 | /* Initialize the packet filter */ |
2847 | pfinit(); |
2848 | #endif /* PF */ |
2849 | |
2850 | /* Initialize queue algorithms */ |
2851 | classq_init(); |
2852 | |
2853 | /* Initialize packet schedulers */ |
2854 | pktsched_init(); |
2855 | |
2856 | /* Initialize flow advisory subsystem */ |
2857 | flowadv_init(); |
2858 | |
2859 | /* Initialize the pktap virtual interface */ |
2860 | pktap_init(); |
2861 | |
2862 | /* Initialize the service class to dscp map */ |
2863 | net_qos_map_init(); |
2864 | |
2865 | /* Initialize the interface low power mode event handler */ |
2866 | if_low_power_evhdlr_init(); |
2867 | |
2868 | /* Initialize the interface offload port list subsystem */ |
2869 | if_ports_used_init(); |
2870 | |
2871 | #if DEBUG || DEVELOPMENT |
2872 | /* Run self-tests */ |
2873 | dlil_verify_sum16(); |
2874 | #endif /* DEBUG || DEVELOPMENT */ |
2875 | |
2876 | /* |
2877 | * Create and start up the main DLIL input thread and the interface |
2878 | * detacher threads once everything is initialized. |
2879 | */ |
2880 | dlil_incr_pending_thread_count(); |
2881 | (void) dlil_create_input_thread(NULL, inp: dlil_main_input_thread, NULL); |
2882 | |
2883 | /* |
2884 | * Create ifnet detacher thread. |
2885 | * When an interface gets detached, part of the detach processing |
2886 | * is delayed. The interface is added to delayed detach list |
2887 | * and this thread is woken up to call ifnet_detach_final |
2888 | * on these interfaces. |
2889 | */ |
2890 | dlil_incr_pending_thread_count(); |
2891 | if (kernel_thread_start(continuation: ifnet_detacher_thread_func, |
2892 | NULL, new_thread: &thread) != KERN_SUCCESS) { |
2893 | panic_plain("%s: couldn't create detacher thread" , __func__); |
2894 | /* NOTREACHED */ |
2895 | } |
2896 | thread_deallocate(thread); |
2897 | |
2898 | /* |
2899 | * Wait for the created kernel threads for dlil to get |
2900 | * scheduled and run at least once before we proceed |
2901 | */ |
2902 | lck_mtx_lock(lck: &dlil_thread_sync_lock); |
2903 | while (dlil_pending_thread_cnt != 0) { |
2904 | DLIL_PRINTF("%s: Waiting for all the create dlil kernel " |
2905 | "threads to get scheduled at least once.\n" , __func__); |
2906 | (void) msleep(chan: &dlil_pending_thread_cnt, mtx: &dlil_thread_sync_lock, |
2907 | pri: (PZERO - 1), wmesg: __func__, NULL); |
2908 | LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_ASSERT_OWNED); |
2909 | } |
2910 | lck_mtx_unlock(lck: &dlil_thread_sync_lock); |
2911 | DLIL_PRINTF("%s: All the created dlil kernel threads have been " |
2912 | "scheduled at least once. Proceeding.\n" , __func__); |
2913 | } |
2914 | |
2915 | static void |
2916 | if_flt_monitor_busy(struct ifnet *ifp) |
2917 | { |
2918 | LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED); |
2919 | |
2920 | ++ifp->if_flt_busy; |
2921 | VERIFY(ifp->if_flt_busy != 0); |
2922 | } |
2923 | |
2924 | static void |
2925 | if_flt_monitor_unbusy(struct ifnet *ifp) |
2926 | { |
2927 | if_flt_monitor_leave(ifp); |
2928 | } |
2929 | |
2930 | static void |
2931 | if_flt_monitor_enter(struct ifnet *ifp) |
2932 | { |
2933 | LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED); |
2934 | |
2935 | while (ifp->if_flt_busy) { |
2936 | ++ifp->if_flt_waiters; |
2937 | (void) msleep(chan: &ifp->if_flt_head, mtx: &ifp->if_flt_lock, |
2938 | pri: (PZERO - 1), wmesg: "if_flt_monitor" , NULL); |
2939 | } |
2940 | if_flt_monitor_busy(ifp); |
2941 | } |
2942 | |
2943 | static void |
2944 | if_flt_monitor_leave(struct ifnet *ifp) |
2945 | { |
2946 | LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED); |
2947 | |
2948 | VERIFY(ifp->if_flt_busy != 0); |
2949 | --ifp->if_flt_busy; |
2950 | |
2951 | if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) { |
2952 | ifp->if_flt_waiters = 0; |
2953 | wakeup(chan: &ifp->if_flt_head); |
2954 | } |
2955 | } |
2956 | |
2957 | __private_extern__ int |
2958 | dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter, |
2959 | interface_filter_t *filter_ref, u_int32_t flags) |
2960 | { |
2961 | int retval = 0; |
2962 | struct ifnet_filter *filter = NULL; |
2963 | |
2964 | ifnet_head_lock_shared(); |
2965 | |
2966 | /* Check that the interface is in the global list */ |
2967 | if (!ifnet_lookup(ifp)) { |
2968 | retval = ENXIO; |
2969 | goto done; |
2970 | } |
2971 | if (!ifnet_is_attached(ifp, refio: 1)) { |
2972 | os_log(OS_LOG_DEFAULT, "%s: %s is no longer attached" , |
2973 | __func__, if_name(ifp)); |
2974 | retval = ENXIO; |
2975 | goto done; |
2976 | } |
2977 | |
2978 | filter = zalloc_flags(dlif_filt_zone, Z_WAITOK | Z_ZERO | Z_NOFAIL); |
2979 | |
2980 | /* refcnt held above during lookup */ |
2981 | filter->filt_flags = flags; |
2982 | filter->filt_ifp = ifp; |
2983 | filter->filt_cookie = if_filter->iff_cookie; |
2984 | filter->filt_name = if_filter->iff_name; |
2985 | filter->filt_protocol = if_filter->iff_protocol; |
2986 | /* |
2987 | * Do not install filter callbacks for internal coproc interface |
2988 | * and for management interfaces |
2989 | */ |
2990 | if (!IFNET_IS_INTCOPROC(ifp) && !IFNET_IS_MANAGEMENT(ifp)) { |
2991 | filter->filt_input = if_filter->iff_input; |
2992 | filter->filt_output = if_filter->iff_output; |
2993 | filter->filt_event = if_filter->iff_event; |
2994 | filter->filt_ioctl = if_filter->iff_ioctl; |
2995 | } |
2996 | filter->filt_detached = if_filter->iff_detached; |
2997 | |
2998 | lck_mtx_lock(lck: &ifp->if_flt_lock); |
2999 | if_flt_monitor_enter(ifp); |
3000 | |
3001 | LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED); |
3002 | TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next); |
3003 | |
3004 | *filter_ref = filter; |
3005 | |
3006 | /* |
3007 | * Bump filter count and route_generation ID to let TCP |
3008 | * know it shouldn't do TSO on this connection |
3009 | */ |
3010 | if ((filter->filt_flags & DLIL_IFF_TSO) == 0) { |
3011 | ifnet_filter_update_tso(ifp, TRUE); |
3012 | } |
3013 | OSIncrementAtomic64(address: &net_api_stats.nas_iflt_attach_count); |
3014 | INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_total); |
3015 | if (filter->filt_flags & DLIL_IFF_INTERNAL) { |
3016 | OSIncrementAtomic64(address: &net_api_stats.nas_iflt_attach_os_count); |
3017 | INC_ATOMIC_INT64_LIM(net_api_stats.nas_iflt_attach_os_total); |
3018 | } else { |
3019 | OSAddAtomic(1, &ifp->if_flt_non_os_count); |
3020 | } |
3021 | if_flt_monitor_leave(ifp); |
3022 | lck_mtx_unlock(lck: &ifp->if_flt_lock); |
3023 | |
3024 | #if SKYWALK && defined(XNU_TARGET_OS_OSX) |
3025 | net_filter_event_mark(subsystem: NET_FILTER_EVENT_INTERFACE, |
3026 | compatible: net_check_compatible_if_filter(NULL)); |
3027 | #endif /* SKYWALK && XNU_TARGET_OS_OSX */ |
3028 | |
3029 | if (dlil_verbose) { |
3030 | DLIL_PRINTF("%s: %s filter attached\n" , if_name(ifp), |
3031 | if_filter->iff_name); |
3032 | } |
3033 | ifnet_decr_iorefcnt(ifp); |
3034 | |
3035 | done: |
3036 | ifnet_head_done(); |
3037 | if (retval != 0 && ifp != NULL) { |
3038 | DLIL_PRINTF("%s: failed to attach %s (err=%d)\n" , |
3039 | if_name(ifp), if_filter->iff_name, retval); |
3040 | } |
3041 | if (retval != 0 && filter != NULL) { |
3042 | zfree(dlif_filt_zone, filter); |
3043 | } |
3044 | |
3045 | return retval; |
3046 | } |
3047 | |
3048 | static int |
3049 | dlil_detach_filter_internal(interface_filter_t filter, int detached) |
3050 | { |
3051 | int retval = 0; |
3052 | |
3053 | if (detached == 0) { |
3054 | ifnet_t ifp = NULL; |
3055 | |
3056 | ifnet_head_lock_shared(); |
3057 | TAILQ_FOREACH(ifp, &ifnet_head, if_link) { |
3058 | interface_filter_t entry = NULL; |
3059 | |
3060 | lck_mtx_lock(lck: &ifp->if_flt_lock); |
3061 | TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) { |
3062 | if (entry != filter || entry->filt_skip) { |
3063 | continue; |
3064 | } |
3065 | /* |
3066 | * We've found a match; since it's possible |
3067 | * that the thread gets blocked in the monitor, |
3068 | * we do the lock dance. Interface should |
3069 | * not be detached since we still have a use |
3070 | * count held during filter attach. |
3071 | */ |
3072 | entry->filt_skip = 1; /* skip input/output */ |
3073 | lck_mtx_unlock(lck: &ifp->if_flt_lock); |
3074 | ifnet_head_done(); |
3075 | |
3076 | lck_mtx_lock(lck: &ifp->if_flt_lock); |
3077 | if_flt_monitor_enter(ifp); |
3078 | LCK_MTX_ASSERT(&ifp->if_flt_lock, |
3079 | LCK_MTX_ASSERT_OWNED); |
3080 | |
3081 | /* Remove the filter from the list */ |
3082 | TAILQ_REMOVE(&ifp->if_flt_head, filter, |
3083 | filt_next); |
3084 | |
3085 | if (dlil_verbose) { |
3086 | DLIL_PRINTF("%s: %s filter detached\n" , |
3087 | if_name(ifp), filter->filt_name); |
3088 | } |
3089 | if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) { |
3090 | VERIFY(ifp->if_flt_non_os_count != 0); |
3091 | OSAddAtomic(-1, &ifp->if_flt_non_os_count); |
3092 | } |
3093 | /* |
3094 | * Decrease filter count and route_generation |
3095 | * ID to let TCP know it should reevalute doing |
3096 | * TSO or not. |
3097 | */ |
3098 | if ((filter->filt_flags & DLIL_IFF_TSO) == 0) { |
3099 | ifnet_filter_update_tso(ifp, FALSE); |
3100 | } |
3101 | if_flt_monitor_leave(ifp); |
3102 | lck_mtx_unlock(lck: &ifp->if_flt_lock); |
3103 | goto destroy; |
3104 | } |
3105 | lck_mtx_unlock(lck: &ifp->if_flt_lock); |
3106 | } |
3107 | ifnet_head_done(); |
3108 | |
3109 | /* filter parameter is not a valid filter ref */ |
3110 | retval = EINVAL; |
3111 | goto done; |
3112 | } else { |
3113 | struct ifnet *ifp = filter->filt_ifp; |
3114 | /* |
3115 | * Here we are called from ifnet_detach_final(); the |
3116 | * caller had emptied if_flt_head and we're doing an |
3117 | * implicit filter detach because the interface is |
3118 | * about to go away. Make sure to adjust the counters |
3119 | * in this case. We don't need the protection of the |
3120 | * filter monitor since we're called as part of the |
3121 | * final detach in the context of the detacher thread. |
3122 | */ |
3123 | if (!(filter->filt_flags & DLIL_IFF_INTERNAL)) { |
3124 | VERIFY(ifp->if_flt_non_os_count != 0); |
3125 | OSAddAtomic(-1, &ifp->if_flt_non_os_count); |
3126 | } |
3127 | /* |
3128 | * Decrease filter count and route_generation |
3129 | * ID to let TCP know it should reevalute doing |
3130 | * TSO or not. |
3131 | */ |
3132 | if ((filter->filt_flags & DLIL_IFF_TSO) == 0) { |
3133 | ifnet_filter_update_tso(ifp, FALSE); |
3134 | } |
3135 | } |
3136 | |
3137 | if (dlil_verbose) { |
3138 | DLIL_PRINTF("%s filter detached\n" , filter->filt_name); |
3139 | } |
3140 | |
3141 | destroy: |
3142 | |
3143 | /* Call the detached function if there is one */ |
3144 | if (filter->filt_detached) { |
3145 | filter->filt_detached(filter->filt_cookie, filter->filt_ifp); |
3146 | } |
3147 | |
3148 | VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_count) > 0); |
3149 | if (filter->filt_flags & DLIL_IFF_INTERNAL) { |
3150 | VERIFY(OSDecrementAtomic64(&net_api_stats.nas_iflt_attach_os_count) > 0); |
3151 | } |
3152 | #if SKYWALK && defined(XNU_TARGET_OS_OSX) |
3153 | net_filter_event_mark(subsystem: NET_FILTER_EVENT_INTERFACE, |
3154 | compatible: net_check_compatible_if_filter(NULL)); |
3155 | #endif /* SKYWALK && XNU_TARGET_OS_OSX */ |
3156 | |
3157 | /* Free the filter */ |
3158 | zfree(dlif_filt_zone, filter); |
3159 | filter = NULL; |
3160 | done: |
3161 | if (retval != 0 && filter != NULL) { |
3162 | DLIL_PRINTF("failed to detach %s filter (err=%d)\n" , |
3163 | filter->filt_name, retval); |
3164 | } |
3165 | |
3166 | return retval; |
3167 | } |
3168 | |
3169 | __private_extern__ void |
3170 | dlil_detach_filter(interface_filter_t filter) |
3171 | { |
3172 | if (filter == NULL) { |
3173 | return; |
3174 | } |
3175 | dlil_detach_filter_internal(filter, detached: 0); |
3176 | } |
3177 | |
3178 | __private_extern__ boolean_t |
3179 | dlil_has_ip_filter(void) |
3180 | { |
3181 | boolean_t has_filter = ((net_api_stats.nas_ipf_add_count - net_api_stats.nas_ipf_add_os_count) > 0); |
3182 | |
3183 | VERIFY(net_api_stats.nas_ipf_add_count >= net_api_stats.nas_ipf_add_os_count); |
3184 | |
3185 | DTRACE_IP1(dlil_has_ip_filter, boolean_t, has_filter); |
3186 | return has_filter; |
3187 | } |
3188 | |
3189 | __private_extern__ boolean_t |
3190 | dlil_has_if_filter(struct ifnet *ifp) |
3191 | { |
3192 | boolean_t has_filter = !TAILQ_EMPTY(&ifp->if_flt_head); |
3193 | DTRACE_IP1(dlil_has_if_filter, boolean_t, has_filter); |
3194 | return has_filter; |
3195 | } |
3196 | |
3197 | static inline void |
3198 | dlil_input_wakeup(struct dlil_threading_info *inp) |
3199 | { |
3200 | LCK_MTX_ASSERT(&inp->dlth_lock, LCK_MTX_ASSERT_OWNED); |
3201 | |
3202 | inp->dlth_flags |= DLIL_INPUT_WAITING; |
3203 | if (!(inp->dlth_flags & DLIL_INPUT_RUNNING)) { |
3204 | inp->dlth_wtot++; |
3205 | wakeup_one(chan: (caddr_t)&inp->dlth_flags); |
3206 | } |
3207 | } |
3208 | |
3209 | __attribute__((noreturn)) |
3210 | static void |
3211 | dlil_main_input_thread_func(void *v, wait_result_t w) |
3212 | { |
3213 | #pragma unused(w) |
3214 | struct dlil_threading_info *inp = v; |
3215 | |
3216 | VERIFY(inp == dlil_main_input_thread); |
3217 | VERIFY(inp->dlth_ifp == NULL); |
3218 | VERIFY(current_thread() == inp->dlth_thread); |
3219 | |
3220 | lck_mtx_lock(lck: &inp->dlth_lock); |
3221 | VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING))); |
3222 | (void) assert_wait(event: &inp->dlth_flags, THREAD_UNINT); |
3223 | inp->dlth_flags |= DLIL_INPUT_EMBRYONIC; |
3224 | /* wake up once to get out of embryonic state */ |
3225 | dlil_input_wakeup(inp); |
3226 | lck_mtx_unlock(lck: &inp->dlth_lock); |
3227 | (void) thread_block_parameter(continuation: dlil_main_input_thread_cont, parameter: inp); |
3228 | /* NOTREACHED */ |
3229 | __builtin_unreachable(); |
3230 | } |
3231 | |
3232 | /* |
3233 | * Main input thread: |
3234 | * |
3235 | * a) handles all inbound packets for lo0 |
3236 | * b) handles all inbound packets for interfaces with no dedicated |
3237 | * input thread (e.g. anything but Ethernet/PDP or those that support |
3238 | * opportunistic polling.) |
3239 | * c) protocol registrations |
3240 | * d) packet injections |
3241 | */ |
3242 | __attribute__((noreturn)) |
3243 | static void |
3244 | dlil_main_input_thread_cont(void *v, wait_result_t wres) |
3245 | { |
3246 | struct dlil_main_threading_info *inpm = v; |
3247 | struct dlil_threading_info *inp = v; |
3248 | |
3249 | /* main input thread is uninterruptible */ |
3250 | VERIFY(wres != THREAD_INTERRUPTED); |
3251 | lck_mtx_lock_spin(lck: &inp->dlth_lock); |
3252 | VERIFY(!(inp->dlth_flags & (DLIL_INPUT_TERMINATE | |
3253 | DLIL_INPUT_RUNNING))); |
3254 | inp->dlth_flags |= DLIL_INPUT_RUNNING; |
3255 | |
3256 | while (1) { |
3257 | struct mbuf *m = NULL, *m_loop = NULL; |
3258 | u_int32_t m_cnt, m_cnt_loop; |
3259 | classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt); |
3260 | boolean_t proto_req; |
3261 | boolean_t embryonic; |
3262 | |
3263 | inp->dlth_flags &= ~DLIL_INPUT_WAITING; |
3264 | |
3265 | if (__improbable(embryonic = |
3266 | (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) { |
3267 | inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC; |
3268 | } |
3269 | |
3270 | proto_req = (inp->dlth_flags & |
3271 | (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)); |
3272 | |
3273 | /* Packets for non-dedicated interfaces other than lo0 */ |
3274 | m_cnt = qlen(&inp->dlth_pkts); |
3275 | _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL); |
3276 | m = pkt.cp_mbuf; |
3277 | |
3278 | /* Packets exclusive to lo0 */ |
3279 | m_cnt_loop = qlen(&inpm->lo_rcvq_pkts); |
3280 | _getq_all(&inpm->lo_rcvq_pkts, &pkt, NULL, NULL, NULL); |
3281 | m_loop = pkt.cp_mbuf; |
3282 | |
3283 | inp->dlth_wtot = 0; |
3284 | |
3285 | lck_mtx_unlock(lck: &inp->dlth_lock); |
3286 | |
3287 | if (__improbable(embryonic)) { |
3288 | dlil_decr_pending_thread_count(); |
3289 | } |
3290 | |
3291 | /* |
3292 | * NOTE warning %%% attention !!!! |
3293 | * We should think about putting some thread starvation |
3294 | * safeguards if we deal with long chains of packets. |
3295 | */ |
3296 | if (__probable(m_loop != NULL)) { |
3297 | dlil_input_packet_list_extended(lo_ifp, m_loop, |
3298 | m_cnt_loop, IFNET_MODEL_INPUT_POLL_OFF); |
3299 | } |
3300 | |
3301 | if (__probable(m != NULL)) { |
3302 | dlil_input_packet_list_extended(NULL, m, |
3303 | m_cnt, IFNET_MODEL_INPUT_POLL_OFF); |
3304 | } |
3305 | |
3306 | if (__improbable(proto_req)) { |
3307 | proto_input_run(); |
3308 | } |
3309 | |
3310 | lck_mtx_lock_spin(lck: &inp->dlth_lock); |
3311 | VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING); |
3312 | /* main input thread cannot be terminated */ |
3313 | VERIFY(!(inp->dlth_flags & DLIL_INPUT_TERMINATE)); |
3314 | if (!(inp->dlth_flags & ~DLIL_INPUT_RUNNING)) { |
3315 | break; |
3316 | } |
3317 | } |
3318 | |
3319 | inp->dlth_flags &= ~DLIL_INPUT_RUNNING; |
3320 | (void) assert_wait(event: &inp->dlth_flags, THREAD_UNINT); |
3321 | lck_mtx_unlock(lck: &inp->dlth_lock); |
3322 | (void) thread_block_parameter(continuation: dlil_main_input_thread_cont, parameter: inp); |
3323 | |
3324 | VERIFY(0); /* we should never get here */ |
3325 | /* NOTREACHED */ |
3326 | __builtin_unreachable(); |
3327 | } |
3328 | |
3329 | /* |
3330 | * Input thread for interfaces with legacy input model. |
3331 | */ |
3332 | __attribute__((noreturn)) |
3333 | static void |
3334 | dlil_input_thread_func(void *v, wait_result_t w) |
3335 | { |
3336 | #pragma unused(w) |
3337 | char thread_name[MAXTHREADNAMESIZE]; |
3338 | struct dlil_threading_info *inp = v; |
3339 | struct ifnet *ifp = inp->dlth_ifp; |
3340 | |
3341 | VERIFY(inp != dlil_main_input_thread); |
3342 | VERIFY(ifp != NULL); |
3343 | VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll || |
3344 | !(ifp->if_xflags & IFXF_LEGACY)); |
3345 | VERIFY(ifp->if_poll_mode == IFNET_MODEL_INPUT_POLL_OFF || |
3346 | !(ifp->if_xflags & IFXF_LEGACY)); |
3347 | VERIFY(current_thread() == inp->dlth_thread); |
3348 | |
3349 | /* construct the name for this thread, and then apply it */ |
3350 | bzero(s: thread_name, n: sizeof(thread_name)); |
3351 | (void) snprintf(thread_name, count: sizeof(thread_name), |
3352 | "dlil_input_%s" , ifp->if_xname); |
3353 | thread_set_thread_name(th: inp->dlth_thread, name: thread_name); |
3354 | |
3355 | lck_mtx_lock(lck: &inp->dlth_lock); |
3356 | VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING))); |
3357 | (void) assert_wait(event: &inp->dlth_flags, THREAD_UNINT); |
3358 | inp->dlth_flags |= DLIL_INPUT_EMBRYONIC; |
3359 | /* wake up once to get out of embryonic state */ |
3360 | dlil_input_wakeup(inp); |
3361 | lck_mtx_unlock(lck: &inp->dlth_lock); |
3362 | (void) thread_block_parameter(continuation: dlil_input_thread_cont, parameter: inp); |
3363 | /* NOTREACHED */ |
3364 | __builtin_unreachable(); |
3365 | } |
3366 | |
3367 | __attribute__((noreturn)) |
3368 | static void |
3369 | dlil_input_thread_cont(void *v, wait_result_t wres) |
3370 | { |
3371 | struct dlil_threading_info *inp = v; |
3372 | struct ifnet *ifp = inp->dlth_ifp; |
3373 | |
3374 | lck_mtx_lock_spin(lck: &inp->dlth_lock); |
3375 | if (__improbable(wres == THREAD_INTERRUPTED || |
3376 | (inp->dlth_flags & DLIL_INPUT_TERMINATE))) { |
3377 | goto terminate; |
3378 | } |
3379 | |
3380 | VERIFY(!(inp->dlth_flags & DLIL_INPUT_RUNNING)); |
3381 | inp->dlth_flags |= DLIL_INPUT_RUNNING; |
3382 | |
3383 | while (1) { |
3384 | struct mbuf *m = NULL; |
3385 | classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt); |
3386 | boolean_t notify = FALSE; |
3387 | boolean_t embryonic; |
3388 | u_int32_t m_cnt; |
3389 | |
3390 | inp->dlth_flags &= ~DLIL_INPUT_WAITING; |
3391 | |
3392 | if (__improbable(embryonic = |
3393 | (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) { |
3394 | inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC; |
3395 | } |
3396 | |
3397 | /* |
3398 | * Protocol registration and injection must always use |
3399 | * the main input thread; in theory the latter can utilize |
3400 | * the corresponding input thread where the packet arrived |
3401 | * on, but that requires our knowing the interface in advance |
3402 | * (and the benefits might not worth the trouble.) |
3403 | */ |
3404 | VERIFY(!(inp->dlth_flags & |
3405 | (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER))); |
3406 | |
3407 | /* Packets for this interface */ |
3408 | m_cnt = qlen(&inp->dlth_pkts); |
3409 | _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL); |
3410 | m = pkt.cp_mbuf; |
3411 | |
3412 | inp->dlth_wtot = 0; |
3413 | |
3414 | #if SKYWALK |
3415 | /* |
3416 | * If this interface is attached to a netif nexus, |
3417 | * the stats are already incremented there; otherwise |
3418 | * do it here. |
3419 | */ |
3420 | if (!(ifp->if_capabilities & IFCAP_SKYWALK)) |
3421 | #endif /* SKYWALK */ |
3422 | notify = dlil_input_stats_sync(ifp, inp); |
3423 | |
3424 | lck_mtx_unlock(lck: &inp->dlth_lock); |
3425 | |
3426 | if (__improbable(embryonic)) { |
3427 | ifnet_decr_pending_thread_count(ifp); |
3428 | } |
3429 | |
3430 | if (__improbable(notify)) { |
3431 | ifnet_notify_data_threshold(ifp); |
3432 | } |
3433 | |
3434 | /* |
3435 | * NOTE warning %%% attention !!!! |
3436 | * We should think about putting some thread starvation |
3437 | * safeguards if we deal with long chains of packets. |
3438 | */ |
3439 | if (__probable(m != NULL)) { |
3440 | dlil_input_packet_list_extended(NULL, m, |
3441 | m_cnt, ifp->if_poll_mode); |
3442 | } |
3443 | |
3444 | lck_mtx_lock_spin(lck: &inp->dlth_lock); |
3445 | VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING); |
3446 | if (!(inp->dlth_flags & ~(DLIL_INPUT_RUNNING | |
3447 | DLIL_INPUT_TERMINATE))) { |
3448 | break; |
3449 | } |
3450 | } |
3451 | |
3452 | inp->dlth_flags &= ~DLIL_INPUT_RUNNING; |
3453 | |
3454 | if (__improbable(inp->dlth_flags & DLIL_INPUT_TERMINATE)) { |
3455 | terminate: |
3456 | lck_mtx_unlock(lck: &inp->dlth_lock); |
3457 | dlil_terminate_input_thread(inp); |
3458 | /* NOTREACHED */ |
3459 | } else { |
3460 | (void) assert_wait(event: &inp->dlth_flags, THREAD_UNINT); |
3461 | lck_mtx_unlock(lck: &inp->dlth_lock); |
3462 | (void) thread_block_parameter(continuation: dlil_input_thread_cont, parameter: inp); |
3463 | /* NOTREACHED */ |
3464 | } |
3465 | |
3466 | VERIFY(0); /* we should never get here */ |
3467 | /* NOTREACHED */ |
3468 | __builtin_unreachable(); |
3469 | } |
3470 | |
3471 | /* |
3472 | * Input thread for interfaces with opportunistic polling input model. |
3473 | */ |
3474 | __attribute__((noreturn)) |
3475 | static void |
3476 | dlil_rxpoll_input_thread_func(void *v, wait_result_t w) |
3477 | { |
3478 | #pragma unused(w) |
3479 | char thread_name[MAXTHREADNAMESIZE]; |
3480 | struct dlil_threading_info *inp = v; |
3481 | struct ifnet *ifp = inp->dlth_ifp; |
3482 | |
3483 | VERIFY(inp != dlil_main_input_thread); |
3484 | VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL) && |
3485 | (ifp->if_xflags & IFXF_LEGACY)); |
3486 | VERIFY(current_thread() == inp->dlth_thread); |
3487 | |
3488 | /* construct the name for this thread, and then apply it */ |
3489 | bzero(s: thread_name, n: sizeof(thread_name)); |
3490 | (void) snprintf(thread_name, count: sizeof(thread_name), |
3491 | "dlil_input_poll_%s" , ifp->if_xname); |
3492 | thread_set_thread_name(th: inp->dlth_thread, name: thread_name); |
3493 | |
3494 | lck_mtx_lock(lck: &inp->dlth_lock); |
3495 | VERIFY(!(inp->dlth_flags & (DLIL_INPUT_EMBRYONIC | DLIL_INPUT_RUNNING))); |
3496 | (void) assert_wait(event: &inp->dlth_flags, THREAD_UNINT); |
3497 | inp->dlth_flags |= DLIL_INPUT_EMBRYONIC; |
3498 | /* wake up once to get out of embryonic state */ |
3499 | dlil_input_wakeup(inp); |
3500 | lck_mtx_unlock(lck: &inp->dlth_lock); |
3501 | (void) thread_block_parameter(continuation: dlil_rxpoll_input_thread_cont, parameter: inp); |
3502 | /* NOTREACHED */ |
3503 | __builtin_unreachable(); |
3504 | } |
3505 | |
3506 | __attribute__((noreturn)) |
3507 | static void |
3508 | dlil_rxpoll_input_thread_cont(void *v, wait_result_t wres) |
3509 | { |
3510 | struct dlil_threading_info *inp = v; |
3511 | struct ifnet *ifp = inp->dlth_ifp; |
3512 | struct timespec ts; |
3513 | |
3514 | lck_mtx_lock_spin(lck: &inp->dlth_lock); |
3515 | if (__improbable(wres == THREAD_INTERRUPTED || |
3516 | (inp->dlth_flags & DLIL_INPUT_TERMINATE))) { |
3517 | goto terminate; |
3518 | } |
3519 | |
3520 | VERIFY(!(inp->dlth_flags & DLIL_INPUT_RUNNING)); |
3521 | inp->dlth_flags |= DLIL_INPUT_RUNNING; |
3522 | |
3523 | while (1) { |
3524 | struct mbuf *m = NULL; |
3525 | uint32_t m_cnt, poll_req = 0; |
3526 | uint64_t m_size = 0; |
3527 | ifnet_model_t mode; |
3528 | struct timespec now, delta; |
3529 | classq_pkt_t pkt = CLASSQ_PKT_INITIALIZER(pkt); |
3530 | boolean_t notify; |
3531 | boolean_t embryonic; |
3532 | uint64_t ival; |
3533 | |
3534 | inp->dlth_flags &= ~DLIL_INPUT_WAITING; |
3535 | |
3536 | if (__improbable(embryonic = |
3537 | (inp->dlth_flags & DLIL_INPUT_EMBRYONIC))) { |
3538 | inp->dlth_flags &= ~DLIL_INPUT_EMBRYONIC; |
3539 | goto skip; |
3540 | } |
3541 | |
3542 | if ((ival = ifp->if_rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) { |
3543 | ival = IF_RXPOLL_INTERVALTIME_MIN; |
3544 | } |
3545 | |
3546 | /* Link parameters changed? */ |
3547 | if (ifp->if_poll_update != 0) { |
3548 | ifp->if_poll_update = 0; |
3549 | (void) dlil_rxpoll_set_params(ifp, NULL, TRUE); |
3550 | } |
3551 | |
3552 | /* Current operating mode */ |
3553 | mode = ifp->if_poll_mode; |
3554 | |
3555 | /* |
3556 | * Protocol registration and injection must always use |
3557 | * the main input thread; in theory the latter can utilize |
3558 | * the corresponding input thread where the packet arrived |
3559 | * on, but that requires our knowing the interface in advance |
3560 | * (and the benefits might not worth the trouble.) |
3561 | */ |
3562 | VERIFY(!(inp->dlth_flags & |
3563 | (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER))); |
3564 | |
3565 | /* Total count of all packets */ |
3566 | m_cnt = qlen(&inp->dlth_pkts); |
3567 | |
3568 | /* Total bytes of all packets */ |
3569 | m_size = qsize(&inp->dlth_pkts); |
3570 | |
3571 | /* Packets for this interface */ |
3572 | _getq_all(&inp->dlth_pkts, &pkt, NULL, NULL, NULL); |
3573 | m = pkt.cp_mbuf; |
3574 | VERIFY(m != NULL || m_cnt == 0); |
3575 | |
3576 | nanouptime(ts: &now); |
3577 | if (!net_timerisset(&ifp->if_poll_sample_lasttime)) { |
3578 | *(&ifp->if_poll_sample_lasttime) = *(&now); |
3579 | } |
3580 | |
3581 | net_timersub(&now, &ifp->if_poll_sample_lasttime, &delta); |
3582 | if (if_rxpoll && net_timerisset(&ifp->if_poll_sample_holdtime)) { |
3583 | u_int32_t ptot, btot; |
3584 | |
3585 | /* Accumulate statistics for current sampling */ |
3586 | PKTCNTR_ADD(&ifp->if_poll_sstats, m_cnt, m_size); |
3587 | |
3588 | if (net_timercmp(&delta, &ifp->if_poll_sample_holdtime, <)) { |
3589 | goto skip; |
3590 | } |
3591 | |
3592 | *(&ifp->if_poll_sample_lasttime) = *(&now); |
3593 | |
3594 | /* Calculate min/max of inbound bytes */ |
3595 | btot = (u_int32_t)ifp->if_poll_sstats.bytes; |
3596 | if (ifp->if_rxpoll_bmin == 0 || ifp->if_rxpoll_bmin > btot) { |
3597 | ifp->if_rxpoll_bmin = btot; |
3598 | } |
3599 | if (btot > ifp->if_rxpoll_bmax) { |
3600 | ifp->if_rxpoll_bmax = btot; |
3601 | } |
3602 | |
3603 | /* Calculate EWMA of inbound bytes */ |
3604 | DLIL_EWMA(ifp->if_rxpoll_bavg, btot, if_rxpoll_decay); |
3605 | |
3606 | /* Calculate min/max of inbound packets */ |
3607 | ptot = (u_int32_t)ifp->if_poll_sstats.packets; |
3608 | if (ifp->if_rxpoll_pmin == 0 || ifp->if_rxpoll_pmin > ptot) { |
3609 | ifp->if_rxpoll_pmin = ptot; |
3610 | } |
3611 | if (ptot > ifp->if_rxpoll_pmax) { |
3612 | ifp->if_rxpoll_pmax = ptot; |
3613 | } |
3614 | |
3615 | /* Calculate EWMA of inbound packets */ |
3616 | DLIL_EWMA(ifp->if_rxpoll_pavg, ptot, if_rxpoll_decay); |
3617 | |
3618 | /* Reset sampling statistics */ |
3619 | PKTCNTR_CLEAR(&ifp->if_poll_sstats); |
3620 | |
3621 | /* Calculate EWMA of wakeup requests */ |
3622 | DLIL_EWMA(ifp->if_rxpoll_wavg, inp->dlth_wtot, |
3623 | if_rxpoll_decay); |
3624 | inp->dlth_wtot = 0; |
3625 | |
3626 | if (dlil_verbose) { |
3627 | if (!net_timerisset(&ifp->if_poll_dbg_lasttime)) { |
3628 | *(&ifp->if_poll_dbg_lasttime) = *(&now); |
3629 | } |
3630 | net_timersub(&now, &ifp->if_poll_dbg_lasttime, &delta); |
3631 | if (net_timercmp(&delta, &dlil_dbgrate, >=)) { |
3632 | *(&ifp->if_poll_dbg_lasttime) = *(&now); |
3633 | DLIL_PRINTF("%s: [%s] pkts avg %d max %d " |
3634 | "limits [%d/%d], wreq avg %d " |
3635 | "limits [%d/%d], bytes avg %d " |
3636 | "limits [%d/%d]\n" , if_name(ifp), |
3637 | (ifp->if_poll_mode == |
3638 | IFNET_MODEL_INPUT_POLL_ON) ? |
3639 | "ON" : "OFF" , ifp->if_rxpoll_pavg, |
3640 | ifp->if_rxpoll_pmax, |
3641 | ifp->if_rxpoll_plowat, |
3642 | ifp->if_rxpoll_phiwat, |
3643 | ifp->if_rxpoll_wavg, |
3644 | ifp->if_rxpoll_wlowat, |
3645 | ifp->if_rxpoll_whiwat, |
3646 | ifp->if_rxpoll_bavg, |
3647 | ifp->if_rxpoll_blowat, |
3648 | ifp->if_rxpoll_bhiwat); |
3649 | } |
3650 | } |
3651 | |
3652 | /* Perform mode transition, if necessary */ |
3653 | if (!net_timerisset(&ifp->if_poll_mode_lasttime)) { |
3654 | *(&ifp->if_poll_mode_lasttime) = *(&now); |
3655 | } |
3656 | |
3657 | net_timersub(&now, &ifp->if_poll_mode_lasttime, &delta); |
3658 | if (net_timercmp(&delta, &ifp->if_poll_mode_holdtime, <)) { |
3659 | goto skip; |
3660 | } |
3661 | |
3662 | if (ifp->if_rxpoll_pavg <= ifp->if_rxpoll_plowat && |
3663 | ifp->if_rxpoll_bavg <= ifp->if_rxpoll_blowat && |
3664 | ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_OFF) { |
3665 | mode = IFNET_MODEL_INPUT_POLL_OFF; |
3666 | } else if (ifp->if_rxpoll_pavg >= ifp->if_rxpoll_phiwat && |
3667 | (ifp->if_rxpoll_bavg >= ifp->if_rxpoll_bhiwat || |
3668 | ifp->if_rxpoll_wavg >= ifp->if_rxpoll_whiwat) && |
3669 | ifp->if_poll_mode != IFNET_MODEL_INPUT_POLL_ON) { |
3670 | mode = IFNET_MODEL_INPUT_POLL_ON; |
3671 | } |
3672 | |
3673 | if (mode != ifp->if_poll_mode) { |
3674 | ifp->if_poll_mode = mode; |
3675 | *(&ifp->if_poll_mode_lasttime) = *(&now); |
3676 | poll_req++; |
3677 | } |
3678 | } |
3679 | skip: |
3680 | notify = dlil_input_stats_sync(ifp, inp); |
3681 | |
3682 | lck_mtx_unlock(lck: &inp->dlth_lock); |
3683 | |
3684 | if (__improbable(embryonic)) { |
3685 | ifnet_decr_pending_thread_count(ifp); |
3686 | } |
3687 | |
3688 | if (__improbable(notify)) { |
3689 | ifnet_notify_data_threshold(ifp); |
3690 | } |
3691 | |
3692 | /* |
3693 | * If there's a mode change and interface is still attached, |
3694 | * perform a downcall to the driver for the new mode. Also |
3695 | * hold an IO refcnt on the interface to prevent it from |
3696 | * being detached (will be release below.) |
3697 | */ |
3698 | if (poll_req != 0 && ifnet_is_attached(ifp, refio: 1)) { |
3699 | struct ifnet_model_params p = { |
3700 | .model = mode, .reserved = { 0 } |
3701 | }; |
3702 | errno_t err; |
3703 | |
3704 | if (dlil_verbose) { |
3705 | DLIL_PRINTF("%s: polling is now %s, " |
3706 | "pkts avg %d max %d limits [%d/%d], " |
3707 | "wreq avg %d limits [%d/%d], " |
3708 | "bytes avg %d limits [%d/%d]\n" , |
3709 | if_name(ifp), |
3710 | (mode == IFNET_MODEL_INPUT_POLL_ON) ? |
3711 | "ON" : "OFF" , ifp->if_rxpoll_pavg, |
3712 | ifp->if_rxpoll_pmax, ifp->if_rxpoll_plowat, |
3713 | ifp->if_rxpoll_phiwat, ifp->if_rxpoll_wavg, |
3714 | ifp->if_rxpoll_wlowat, ifp->if_rxpoll_whiwat, |
3715 | ifp->if_rxpoll_bavg, ifp->if_rxpoll_blowat, |
3716 | ifp->if_rxpoll_bhiwat); |
3717 | } |
3718 | |
3719 | if ((err = ((*ifp->if_input_ctl)(ifp, |
3720 | IFNET_CTL_SET_INPUT_MODEL, sizeof(p), &p))) != 0) { |
3721 | DLIL_PRINTF("%s: error setting polling mode " |
3722 | "to %s (%d)\n" , if_name(ifp), |
3723 | (mode == IFNET_MODEL_INPUT_POLL_ON) ? |
3724 | "ON" : "OFF" , err); |
3725 | } |
3726 | |
3727 | switch (mode) { |
3728 | case IFNET_MODEL_INPUT_POLL_OFF: |
3729 | ifnet_set_poll_cycle(ifp, NULL); |
3730 | ifp->if_rxpoll_offreq++; |
3731 | if (err != 0) { |
3732 | ifp->if_rxpoll_offerr++; |
3733 | } |
3734 | break; |
3735 | |
3736 | case IFNET_MODEL_INPUT_POLL_ON: |
3737 | net_nsectimer(&ival, &ts); |
3738 | ifnet_set_poll_cycle(ifp, &ts); |
3739 | ifnet_poll(ifp); |
3740 | ifp->if_rxpoll_onreq++; |
3741 | if (err != 0) { |
3742 | ifp->if_rxpoll_onerr++; |
3743 | } |
3744 | break; |
3745 | |
3746 | default: |
3747 | VERIFY(0); |
3748 | /* NOTREACHED */ |
3749 | } |
3750 | |
3751 | /* Release the IO refcnt */ |
3752 | ifnet_decr_iorefcnt(ifp); |
3753 | } |
3754 | |
3755 | /* |
3756 | * NOTE warning %%% attention !!!! |
3757 | * We should think about putting some thread starvation |
3758 | * safeguards if we deal with long chains of packets. |
3759 | */ |
3760 | if (__probable(m != NULL)) { |
3761 | dlil_input_packet_list_extended(NULL, m, m_cnt, mode); |
3762 | } |
3763 | |
3764 | lck_mtx_lock_spin(lck: &inp->dlth_lock); |
3765 | VERIFY(inp->dlth_flags & DLIL_INPUT_RUNNING); |
3766 | if (!(inp->dlth_flags & ~(DLIL_INPUT_RUNNING | |
3767 | DLIL_INPUT_TERMINATE))) { |
3768 | break; |
3769 | } |
3770 | } |
3771 | |
3772 | inp->dlth_flags &= ~DLIL_INPUT_RUNNING; |
3773 | |
3774 | if (__improbable(inp->dlth_flags & DLIL_INPUT_TERMINATE)) { |
3775 | terminate: |
3776 | lck_mtx_unlock(lck: &inp->dlth_lock); |
3777 | dlil_terminate_input_thread(inp); |
3778 | /* NOTREACHED */ |
3779 | } else { |
3780 | (void) assert_wait(event: &inp->dlth_flags, THREAD_UNINT); |
3781 | lck_mtx_unlock(lck: &inp->dlth_lock); |
3782 | (void) thread_block_parameter(continuation: dlil_rxpoll_input_thread_cont, |
3783 | parameter: inp); |
3784 | /* NOTREACHED */ |
3785 | } |
3786 | |
3787 | VERIFY(0); /* we should never get here */ |
3788 | /* NOTREACHED */ |
3789 | __builtin_unreachable(); |
3790 | } |
3791 | |
3792 | errno_t |
3793 | dlil_rxpoll_validate_params(struct ifnet_poll_params *p) |
3794 | { |
3795 | if (p != NULL) { |
3796 | if ((p->packets_lowat == 0 && p->packets_hiwat != 0) || |
3797 | (p->packets_lowat != 0 && p->packets_hiwat == 0)) { |
3798 | return EINVAL; |
3799 | } |
3800 | if (p->packets_lowat != 0 && /* hiwat must be non-zero */ |
3801 | p->packets_lowat >= p->packets_hiwat) { |
3802 | return EINVAL; |
3803 | } |
3804 | if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) || |
3805 | (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) { |
3806 | return EINVAL; |
3807 | } |
3808 | if (p->bytes_lowat != 0 && /* hiwat must be non-zero */ |
3809 | p->bytes_lowat >= p->bytes_hiwat) { |
3810 | return EINVAL; |
3811 | } |
3812 | if (p->interval_time != 0 && |
3813 | p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) { |
3814 | p->interval_time = IF_RXPOLL_INTERVALTIME_MIN; |
3815 | } |
3816 | } |
3817 | return 0; |
3818 | } |
3819 | |
3820 | void |
3821 | dlil_rxpoll_update_params(struct ifnet *ifp, struct ifnet_poll_params *p) |
3822 | { |
3823 | u_int64_t sample_holdtime, inbw; |
3824 | |
3825 | if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) { |
3826 | sample_holdtime = 0; /* polling is disabled */ |
3827 | ifp->if_rxpoll_wlowat = ifp->if_rxpoll_plowat = |
3828 | ifp->if_rxpoll_blowat = 0; |
3829 | ifp->if_rxpoll_whiwat = ifp->if_rxpoll_phiwat = |
3830 | ifp->if_rxpoll_bhiwat = (u_int32_t)-1; |
3831 | ifp->if_rxpoll_plim = 0; |
3832 | ifp->if_rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN; |
3833 | } else { |
3834 | u_int32_t plowat, phiwat, blowat, bhiwat, plim; |
3835 | u_int64_t ival; |
3836 | unsigned int n, i; |
3837 | |
3838 | for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) { |
3839 | if (inbw < rxpoll_tbl[i].speed) { |
3840 | break; |
3841 | } |
3842 | n = i; |
3843 | } |
3844 | /* auto-tune if caller didn't specify a value */ |
3845 | plowat = ((p == NULL || p->packets_lowat == 0) ? |
3846 | rxpoll_tbl[n].plowat : p->packets_lowat); |
3847 | phiwat = ((p == NULL || p->packets_hiwat == 0) ? |
3848 | rxpoll_tbl[n].phiwat : p->packets_hiwat); |
3849 | blowat = ((p == NULL || p->bytes_lowat == 0) ? |
3850 | rxpoll_tbl[n].blowat : p->bytes_lowat); |
3851 | bhiwat = ((p == NULL || p->bytes_hiwat == 0) ? |
3852 | rxpoll_tbl[n].bhiwat : p->bytes_hiwat); |
3853 | plim = ((p == NULL || p->packets_limit == 0 || |
3854 | if_rxpoll_max != 0) ? if_rxpoll_max : p->packets_limit); |
3855 | ival = ((p == NULL || p->interval_time == 0 || |
3856 | if_rxpoll_interval_time != IF_RXPOLL_INTERVALTIME) ? |
3857 | if_rxpoll_interval_time : p->interval_time); |
3858 | |
3859 | VERIFY(plowat != 0 && phiwat != 0); |
3860 | VERIFY(blowat != 0 && bhiwat != 0); |
3861 | VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN); |
3862 | |
3863 | sample_holdtime = if_rxpoll_sample_holdtime; |
3864 | ifp->if_rxpoll_wlowat = if_sysctl_rxpoll_wlowat; |
3865 | ifp->if_rxpoll_whiwat = if_sysctl_rxpoll_whiwat; |
3866 | ifp->if_rxpoll_plowat = plowat; |
3867 | ifp->if_rxpoll_phiwat = phiwat; |
3868 | ifp->if_rxpoll_blowat = blowat; |
3869 | ifp->if_rxpoll_bhiwat = bhiwat; |
3870 | ifp->if_rxpoll_plim = plim; |
3871 | ifp->if_rxpoll_ival = ival; |
3872 | } |
3873 | |
3874 | net_nsectimer(&if_rxpoll_mode_holdtime, &ifp->if_poll_mode_holdtime); |
3875 | net_nsectimer(&sample_holdtime, &ifp->if_poll_sample_holdtime); |
3876 | |
3877 | if (dlil_verbose) { |
3878 | DLIL_PRINTF("% |
---|