1/*
2 * Copyright (c) 1999-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#ifndef DLIL_H
29#define DLIL_H
30#ifdef KERNEL
31
32#include <sys/kernel_types.h>
33#include <net/kpi_interface.h>
34
35enum {
36 BPF_TAP_DISABLE,
37 BPF_TAP_INPUT,
38 BPF_TAP_OUTPUT,
39 BPF_TAP_INPUT_OUTPUT
40};
41
42/*
43 * DLIL_DESC_ETYPE2 - native_type must point to 2 byte ethernet raw protocol,
44 * variants.native_type_length must be set to 2
45 * DLIL_DESC_SAP - native_type must point to 3 byte SAP protocol
46 * variants.native_type_length must be set to 3
47 * DLIL_DESC_SNAP - native_type must point to 5 byte SNAP protocol
48 * variants.native_type_length must be set to 5
49 *
50 * All protocols must be in Network byte order.
51 *
52 * Future interface families may define more protocol types they know about.
53 * The type implies the offset and context of the protocol data at native_type.
54 * The length of the protocol data specified at native_type must be set in
55 * variants.native_type_length.
56 */
57/* Ethernet specific types */
58#define DLIL_DESC_ETYPE2 4
59#define DLIL_DESC_SAP 5
60#define DLIL_DESC_SNAP 6
61
62#ifdef KERNEL_PRIVATE
63#include <net/if.h>
64#include <net/if_var.h>
65#include <net/classq/classq.h>
66#include <net/flowadv.h>
67#include <sys/kern_event.h>
68#include <kern/thread.h>
69#include <kern/locks.h>
70
71#ifdef BSD_KERNEL_PRIVATE
72/* Operations on timespecs. */
73#define net_timerclear(tvp) (tvp)->tv_sec = (tvp)->tv_nsec = 0
74
75#define net_timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_nsec)
76
77#define net_timercmp(tvp, uvp, cmp) \
78 (((tvp)->tv_sec == (uvp)->tv_sec) ? \
79 ((tvp)->tv_nsec cmp (uvp)->tv_nsec) : \
80 ((tvp)->tv_sec cmp (uvp)->tv_sec))
81
82#define net_timeradd(tvp, uvp, vvp) do { \
83 (vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec; \
84 (vvp)->tv_nsec = (tvp)->tv_nsec + (uvp)->tv_nsec; \
85 if ((vvp)->tv_nsec >= (long)NSEC_PER_SEC) { \
86 (vvp)->tv_sec++; \
87 (vvp)->tv_nsec -= NSEC_PER_SEC; \
88 } \
89} while (0)
90
91#define net_timersub(tvp, uvp, vvp) do { \
92 (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \
93 (vvp)->tv_nsec = (tvp)->tv_nsec - (uvp)->tv_nsec; \
94 if ((vvp)->tv_nsec < 0) { \
95 (vvp)->tv_sec--; \
96 (vvp)->tv_nsec += NSEC_PER_SEC; \
97 } \
98} while (0)
99
100#define net_timerusec(tvp, nsp) do { \
101 *(nsp) = (tvp)->tv_nsec / NSEC_PER_USEC; \
102 if ((tvp)->tv_sec > 0) \
103 *(nsp) += ((tvp)->tv_sec * USEC_PER_SEC); \
104} while (0)
105
106#define net_timernsec(tvp, nsp) do { \
107 *(nsp) = (tvp)->tv_nsec; \
108 if ((tvp)->tv_sec > 0) \
109 *(nsp) += ((tvp)->tv_sec * NSEC_PER_SEC); \
110} while (0)
111
112#if defined(__x86_64__) || defined(__arm64__)
113#define net_nsectimer(nsp, tvp) do { \
114 u_int64_t __nsp = *(nsp); \
115 net_timerclear(tvp); \
116 uint64_t __sec = __nsp / NSEC_PER_SEC; \
117 (tvp)->tv_sec = (__darwin_time_t)__sec; \
118 (tvp)->tv_nsec = (long)(__nsp - __sec * NSEC_PER_SEC); \
119} while (0)
120#else /* 32 bit */
121/*
122 * NSEC needs to be < 2^31*10^9 to be representable in a struct timespec
123 * because __darwin_time_t is 32 bit on 32-bit platforms. This bound
124 * is < 2^61. We get a first approximation to convert into seconds using
125 * the following values.
126 * a = floor(NSEC / 2^29)
127 * inv = floor(2^61 / 10^9)
128 *
129 * The approximation of seconds is correct or too low by 1 unit.
130 * So we fix it by computing the remainder.
131 */
132#define net_nsectimer(nsp, tvp) do { \
133 u_int64_t __nsp = *(nsp); \
134 net_timerclear(tvp); \
135 uint32_t __a = (uint32_t)(__nsp >> 29); \
136 const uint32_t __inv = 0x89705F41; \
137 uint32_t __sec = (uint32_t)(((uint64_t)__a * __inv) >> 32); \
138 uint32_t __rem = (uint32_t)(__nsp - __sec * NSEC_PER_SEC); \
139 __sec += ((__rem >= NSEC_PER_SEC) ? 1 : 0); \
140 (tvp)->tv_sec = (__darwin_time_t)__sec; \
141 (tvp)->tv_nsec = \
142 (long)((__rem >= NSEC_PER_SEC) ? (__rem - NSEC_PER_SEC) : __rem); \
143} while(0)
144#endif /* 32 bit */
145
146struct ifnet;
147struct mbuf;
148struct ether_header;
149struct sockaddr_dl;
150struct iff_filter;
151
152#define DLIL_THREADNAME_LEN 32
153
154/*
155 * DLIL threading info
156 */
157struct dlil_threading_info {
158 decl_lck_mtx_data(, dlth_lock);
159 class_queue_t dlth_pkts; /* queue of pkts */
160 struct ifnet *dlth_ifp; /* pointer to interface */
161 struct ifnet_stat_increment_param dlth_stats; /* incremental stats */
162 uint32_t dlth_flags; /* thread flags (see below) */
163 uint32_t dlth_wtot; /* # of wakeup requests */
164
165 /* strategy (sync or async) */
166 errno_t (*dlth_strategy)(struct dlil_threading_info *,
167 struct ifnet *, struct mbuf *, struct mbuf *,
168 const struct ifnet_stat_increment_param *, boolean_t,
169 struct thread *);
170
171 /*
172 * Thread affinity (workloop and DLIL threads).
173 */
174 boolean_t dlth_affinity; /* affinity set is available */
175 uint32_t dlth_affinity_tag; /* affinity tag */
176 struct thread *dlth_thread; /* DLIL worker thread */
177 struct thread *dlth_driver_thread; /* driver/workloop thread */
178 struct thread *dlth_poller_thread; /* poll thread */
179
180 lck_grp_t *dlth_lock_grp; /* lock group (for lock stats) */
181 char dlth_name[DLIL_THREADNAME_LEN]; /* name storage */
182
183 /* Accounting for trimming of input queues that exceed their limits */
184 uint32_t dlth_trim_cnt; /* # of trim events */
185 uint32_t dlth_trim_pkts_dropped; /* # of packets dropped
186 * when trimming */
187#if IFNET_INPUT_SANITY_CHK
188 /*
189 * For debugging.
190 */
191 uint64_t dlth_pkts_cnt; /* total # of packets */
192#endif
193};
194
195/*
196 * DLIL input thread info (for main/loopback input thread)
197 */
198struct dlil_main_threading_info {
199 struct dlil_threading_info inp;
200 class_queue_t lo_rcvq_pkts; /* queue of lo0 pkts */
201};
202
203/*
204 * Valid values for dlth_flags.
205 *
206 * The following are shared with kpi_protocol.c so that it may wakeup
207 * the input thread to run through packets queued for protocol input.
208 */
209#define DLIL_INPUT_RUNNING 0x80000000
210#define DLIL_INPUT_WAITING 0x40000000
211#define DLIL_PROTO_REGISTER 0x20000000
212#define DLIL_PROTO_WAITING 0x10000000
213#define DLIL_INPUT_TERMINATE 0x08000000
214#define DLIL_INPUT_TERMINATE_COMPLETE 0x04000000
215#define DLIL_INPUT_EMBRYONIC 0x00000001
216
217/*
218 * Flags for dlil_attach_filter()
219 */
220#define DLIL_IFF_TSO 0x01 /* Interface filter supports TSO */
221#define DLIL_IFF_INTERNAL 0x02 /* Apple internal -- do not count towards stats */
222
223/* Input poll interval definitions */
224#define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */
225#define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */
226
227extern int dlil_verbose;
228extern uint32_t hwcksum_dbg;
229extern uint32_t hwcksum_tx;
230extern uint32_t hwcksum_rx;
231extern struct dlil_threading_info *dlil_main_input_thread;
232extern unsigned int net_rxpoll;
233extern uint32_t if_rxpoll;
234extern uint32_t if_rxpoll_decay;
235extern uint32_t if_rxpoll_interval_pkts;
236extern uint32_t if_rcvq_maxlen;
237
238extern void dlil_init(void);
239
240extern errno_t ifp_if_ioctl(struct ifnet *, unsigned long, void *);
241extern errno_t ifp_if_output(struct ifnet *, struct mbuf *);
242extern void ifp_if_start(struct ifnet *);
243
244extern errno_t dlil_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
245
246/*
247 * Send arp internal bypasses the check for IPv4LL.
248 */
249extern errno_t dlil_send_arp_internal(ifnet_t, u_int16_t,
250 const struct sockaddr_dl *, const struct sockaddr *,
251 const struct sockaddr_dl *, const struct sockaddr *);
252
253/*
254 * The following constants are used with the net_thread_mark_apply and
255 * net_thread_is_unmarked functions to control the bits in the uu_network_marks
256 * field of the uthread structure.
257 */
258#define NET_THREAD_HELD_PF 0x1 /* thread is holding PF lock */
259#define NET_THREAD_HELD_DOMAIN 0x2 /* thread is holding domain_proto_mtx */
260#define NET_THREAD_CKREQ_LLADDR 0x4 /* thread reqs MACF check for LLADDR */
261#if SKYWALK
262#define NET_THREAD_CHANNEL_SYNC 0x10000 /* thread is doing channel sync */
263#define NET_THREAD_CACHE_UPDATE 0x20000 /* thread is doing cache update */
264#define NET_THREAD_REGION_UPDATE 0x40000 /* thread is doing region update */
265#define NET_THREAD_RX_NOTIFY 0x80000 /* thread is doing RX notify */
266#define NET_THREAD_TX_NOTIFY 0x100000 /* thread is doing TX notify */
267#define NET_THREAD_AYSYNC_TX 0x200000 /* require use of starter thread */
268#define NET_THREAD_SYNC_RX 0x400000 /* request synchronous Rx handler */
269#endif /* SKYWALK */
270
271/*
272 * net_thread_marks_t is a pointer to a phantom structure type used for
273 * manipulating the uthread:uu_network_marks field. As an example...
274 *
275 * static const u_int32_t bits = NET_THREAD_CKREQ_LLADDR;
276 * struct uthread *uth = current_uthread();
277 *
278 * net_thread_marks_t marks = net_thread_marks_push(bits);
279 * VERIFY((uth->uu_network_marks & NET_THREAD_CKREQ_LLADDR) != 0);
280 * net_thread_marks_pop(marks);
281 *
282 * The net_thread_marks_push() function returns an encoding of the bits
283 * that were changed from zero to one in the uu_network_marks field. When
284 * the net_thread_marks_pop() function later processes that value, it
285 * resets the bits to their previous value.
286 *
287 * The net_thread_unmarks_push() and net_thread_unmarks_pop() functions
288 * are similar to net_thread_marks_push() and net_thread_marks_pop() except
289 * they clear the marks bits in the guarded section rather than set them.
290 *
291 * The net_thread_is_marked() and net_thread_is_unmarked() functions return
292 * the subset of the bits that are currently set or cleared (respectively)
293 * in the uthread:uu_network_marks field.
294 *
295 * Finally, the value of the net_thread_marks_none constant is provided for
296 * comparing for equality with the value returned when no bits in the marks
297 * field are changed by the push.
298 *
299 * It is not significant that a value of type net_thread_marks_t may
300 * compare as equal to the NULL pointer.
301 */
302struct net_thread_marks;
303typedef const struct net_thread_marks *net_thread_marks_t;
304
305extern const net_thread_marks_t net_thread_marks_none;
306
307extern net_thread_marks_t net_thread_marks_push(u_int32_t);
308extern net_thread_marks_t net_thread_unmarks_push(u_int32_t);
309extern void net_thread_marks_pop(net_thread_marks_t);
310extern void net_thread_unmarks_pop(net_thread_marks_t);
311extern u_int32_t net_thread_is_marked(u_int32_t);
312extern u_int32_t net_thread_is_unmarked(u_int32_t);
313
314extern int dlil_output(ifnet_t, protocol_family_t, mbuf_t, void *,
315 const struct sockaddr *, int, struct flowadv *);
316
317extern void dlil_input_packet_list(struct ifnet *, struct mbuf *);
318extern void dlil_input_packet_list_extended(struct ifnet *, struct mbuf *,
319 u_int32_t, ifnet_model_t);
320
321extern errno_t dlil_resolve_multi(struct ifnet *,
322 const struct sockaddr *, struct sockaddr *, size_t);
323
324extern errno_t dlil_send_arp(ifnet_t, u_int16_t, const struct sockaddr_dl *,
325 const struct sockaddr *, const struct sockaddr_dl *,
326 const struct sockaddr *, u_int32_t);
327
328extern int dlil_attach_filter(ifnet_t, const struct iff_filter *,
329 interface_filter_t *, u_int32_t);
330extern void dlil_detach_filter(interface_filter_t);
331extern boolean_t dlil_has_ip_filter(void);
332extern boolean_t dlil_has_if_filter(struct ifnet *);
333
334extern void dlil_proto_unplumb_all(ifnet_t);
335
336extern int dlil_post_msg(struct ifnet *, u_int32_t, u_int32_t,
337 struct net_event_data *, u_int32_t, boolean_t);
338
339extern void dlil_post_sifflags_msg(struct ifnet *);
340
341extern int dlil_post_complete_msg(struct ifnet *, struct kev_msg *);
342
343extern int dlil_alloc_local_stats(struct ifnet *);
344
345extern void ifnet_filter_update_tso(struct ifnet *, boolean_t filter_enable);
346extern errno_t dlil_rxpoll_validate_params(struct ifnet_poll_params *);
347extern void dlil_rxpoll_update_params(struct ifnet *,
348 struct ifnet_poll_params *);
349extern void ifnet_poll(struct ifnet *);
350extern errno_t ifnet_input_poll(struct ifnet *, struct mbuf *,
351 struct mbuf *, const struct ifnet_stat_increment_param *);
352
353#if SKYWALK
354extern boolean_t ifnet_needs_fsw_transport_netagent(ifnet_t ifp);
355extern boolean_t ifnet_needs_fsw_ip_netagent(ifnet_t ifp);
356extern boolean_t ifnet_needs_netif_netagent(ifnet_t ifp);
357extern boolean_t ifnet_needs_compat(ifnet_t ifp);
358extern boolean_t ifnet_nx_noauto(ifnet_t ifp);
359extern boolean_t ifnet_nx_noauto_flowswitch(ifnet_t ifp);
360extern boolean_t ifnet_is_low_latency(ifnet_t ifp);
361extern boolean_t ifnet_attach_flowswitch_nexus(ifnet_t ifp);
362extern boolean_t ifnet_detach_flowswitch_nexus(ifnet_t ifp);
363extern boolean_t ifnet_attach_netif_nexus(ifnet_t ifp);
364extern boolean_t ifnet_detach_netif_nexus(ifnet_t ifp);
365extern boolean_t ifnet_add_netagent(ifnet_t ifp);
366extern boolean_t ifnet_remove_netagent(ifnet_t ifp);
367extern void ifnet_attach_native_flowswitch(ifnet_t ifp);
368extern void ifnet_start_ignore_delay(ifnet_t interface);
369extern int ifnet_set_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t cb, void *arg);
370extern int ifnet_get_flowswitch_rx_callback(ifnet_t ifp, ifnet_fsw_rx_cb_t *cbp, void **argp);
371extern void ifnet_release_flowswitch_rx_callback(ifnet_t ifp);
372extern int ifnet_set_delegate_parent(ifnet_t difp, ifnet_t parent);
373extern int ifnet_get_delegate_parent(ifnet_t difp, ifnet_t *parent);
374extern void ifnet_release_delegate_parent(ifnet_t difp);
375extern void ifnet_set_detach_notify_locked(ifnet_t ifp,
376 ifnet_detach_notify_cb_t cb, void *arg);
377extern void ifnet_get_detach_notify_locked(ifnet_t ifp,
378 ifnet_detach_notify_cb_t *cbp, void **argp);
379extern void ifnet_set_detach_notify(ifnet_t ifp,
380 ifnet_detach_notify_cb_t cb, void *arg);
381extern void ifnet_get_detach_notify(ifnet_t ifp,
382 ifnet_detach_notify_cb_t *cbp, void **argp);
383
384#endif /* SKYWALK */
385
386/*
387 * dlil_if_acquire is obsolete. Use ifnet_allocate.
388 */
389extern int dlil_if_acquire(u_int32_t, const void *, size_t, const char *, struct ifnet **);
390/*
391 * dlil_if_release is obsolete. The equivalent is called automatically when
392 * an interface is detached.
393 */
394extern void dlil_if_release(struct ifnet *ifp);
395
396extern errno_t dlil_if_ref(struct ifnet *);
397extern errno_t dlil_if_free(struct ifnet *);
398
399extern int dlil_node_present(struct ifnet *, struct sockaddr *, int32_t, int,
400 int, u_int8_t[48]);
401extern void dlil_node_absent(struct ifnet *, struct sockaddr *);
402extern int dlil_node_present_v2(struct ifnet *, struct sockaddr *, struct sockaddr_dl *, int32_t, int,
403 int, u_int8_t[48]);
404
405extern const void *__indexable dlil_ifaddr_bytes(const struct sockaddr_dl *, size_t *,
406 kauth_cred_t *);
407
408extern void dlil_report_issues(struct ifnet *, u_int8_t[DLIL_MODIDLEN],
409 u_int8_t[DLIL_MODARGLEN]);
410
411#define PROTO_HASH_SLOTS 4
412
413extern int proto_hash_value(u_int32_t);
414
415extern const char *dlil_kev_dl_code_str(u_int32_t);
416
417extern errno_t dlil_rxpoll_set_params(struct ifnet *,
418 struct ifnet_poll_params *, boolean_t);
419extern errno_t dlil_rxpoll_get_params(struct ifnet *,
420 struct ifnet_poll_params *);
421
422extern errno_t dlil_output_handler(struct ifnet *, struct mbuf *);
423extern errno_t dlil_input_handler(struct ifnet *, struct mbuf *,
424 struct mbuf *, const struct ifnet_stat_increment_param *,
425 boolean_t, struct thread *);
426extern void dlil_ifclassq_setup(struct ifnet *, struct ifclassq *);
427
428#if SKYWALK
429extern errno_t dlil_set_input_handler(struct ifnet *ifp, dlil_input_func fn);
430extern errno_t dlil_set_output_handler(struct ifnet *ifp, dlil_output_func fn);
431extern void dlil_reset_input_handler(struct ifnet *ifp);
432extern void dlil_reset_output_handler(struct ifnet *ifp);
433#endif /* SKYWALK */
434
435/*
436 * This is mostly called from the context of the DLIL input thread;
437 * because of that there is no need for atomic operations.
438 */
439__attribute__((always_inline))
440static inline void
441ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m)
442{
443 if (!(m->m_flags & M_PKTHDR)) {
444 return;
445 }
446
447 switch (m_get_traffic_class(m)) {
448 case MBUF_TC_BE:
449 ifp->if_tc.ifi_ibepackets++;
450 ifp->if_tc.ifi_ibebytes += (u_int64_t)m->m_pkthdr.len;
451 break;
452 case MBUF_TC_BK:
453 ifp->if_tc.ifi_ibkpackets++;
454 ifp->if_tc.ifi_ibkbytes += (u_int64_t)m->m_pkthdr.len;
455 break;
456 case MBUF_TC_VI:
457 ifp->if_tc.ifi_ivipackets++;
458 ifp->if_tc.ifi_ivibytes += (u_int64_t)m->m_pkthdr.len;
459 break;
460 case MBUF_TC_VO:
461 ifp->if_tc.ifi_ivopackets++;
462 ifp->if_tc.ifi_ivobytes += (u_int64_t)m->m_pkthdr.len;
463 break;
464 default:
465 break;
466 }
467
468 if (mbuf_is_traffic_class_privileged(mbuf: m)) {
469 ifp->if_tc.ifi_ipvpackets++;
470 ifp->if_tc.ifi_ipvbytes += (u_int64_t)m->m_pkthdr.len;
471 }
472}
473
474/*
475 * This is called from DLIL output, hence multiple threads could end
476 * up modifying the statistics. We trade off acccuracy for performance
477 * by not using atomic operations here.
478 */
479__attribute__((always_inline))
480static inline void
481ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m)
482{
483 if (!(m->m_flags & M_PKTHDR)) {
484 return;
485 }
486
487 switch (m_get_traffic_class(m)) {
488 case MBUF_TC_BE:
489 ifp->if_tc.ifi_obepackets++;
490 ifp->if_tc.ifi_obebytes += (u_int64_t)m->m_pkthdr.len;
491 break;
492 case MBUF_TC_BK:
493 ifp->if_tc.ifi_obkpackets++;
494 ifp->if_tc.ifi_obkbytes += (u_int64_t)m->m_pkthdr.len;
495 break;
496 case MBUF_TC_VI:
497 ifp->if_tc.ifi_ovipackets++;
498 ifp->if_tc.ifi_ovibytes += (u_int64_t)m->m_pkthdr.len;
499 break;
500 case MBUF_TC_VO:
501 ifp->if_tc.ifi_ovopackets++;
502 ifp->if_tc.ifi_ovobytes += (u_int64_t)m->m_pkthdr.len;
503 break;
504 default:
505 break;
506 }
507
508 if (mbuf_is_traffic_class_privileged(mbuf: m)) {
509 ifp->if_tc.ifi_opvpackets++;
510 ifp->if_tc.ifi_opvbytes += (u_int64_t)m->m_pkthdr.len;
511 }
512}
513
514extern void ifnet_ioctl_async(struct ifnet *, u_long);
515#endif /* BSD_KERNEL_PRIVATE */
516#endif /* KERNEL_PRIVATE */
517#endif /* KERNEL */
518#endif /* DLIL_H */
519