1/*
2 * Copyright (c) 1999-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#ifndef DLIL_H
29#define DLIL_H
30#ifdef KERNEL
31
32#include <sys/kernel_types.h>
33#include <net/kpi_interface.h>
34
35enum {
36 BPF_TAP_DISABLE,
37 BPF_TAP_INPUT,
38 BPF_TAP_OUTPUT,
39 BPF_TAP_INPUT_OUTPUT
40};
41
42/*
43 * DLIL_DESC_ETYPE2 - native_type must point to 2 byte ethernet raw protocol,
44 * variants.native_type_length must be set to 2
45 * DLIL_DESC_SAP - native_type must point to 3 byte SAP protocol
46 * variants.native_type_length must be set to 3
47 * DLIL_DESC_SNAP - native_type must point to 5 byte SNAP protocol
48 * variants.native_type_length must be set to 5
49 *
50 * All protocols must be in Network byte order.
51 *
52 * Future interface families may define more protocol types they know about.
53 * The type implies the offset and context of the protocol data at native_type.
54 * The length of the protocol data specified at native_type must be set in
55 * variants.native_type_length.
56 */
57/* Ethernet specific types */
58#define DLIL_DESC_ETYPE2 4
59#define DLIL_DESC_SAP 5
60#define DLIL_DESC_SNAP 6
61
62#ifdef KERNEL_PRIVATE
63#include <net/if.h>
64#include <net/if_var.h>
65#include <net/classq/classq.h>
66#include <net/flowadv.h>
67#include <sys/kern_event.h>
68#include <kern/thread.h>
69#include <kern/locks.h>
70
71#ifdef BSD_KERNEL_PRIVATE
72/* Operations on timespecs. */
73#define net_timerclear(tvp) (tvp)->tv_sec = (tvp)->tv_nsec = 0
74
75#define net_timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_nsec)
76
77#define net_timercmp(tvp, uvp, cmp) \
78 (((tvp)->tv_sec == (uvp)->tv_sec) ? \
79 ((tvp)->tv_nsec cmp (uvp)->tv_nsec) : \
80 ((tvp)->tv_sec cmp (uvp)->tv_sec))
81
82#define net_timeradd(tvp, uvp, vvp) do { \
83 (vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec; \
84 (vvp)->tv_nsec = (tvp)->tv_nsec + (uvp)->tv_nsec; \
85 if ((vvp)->tv_nsec >= (long)NSEC_PER_SEC) { \
86 (vvp)->tv_sec++; \
87 (vvp)->tv_nsec -= NSEC_PER_SEC; \
88 } \
89} while (0)
90
91#define net_timersub(tvp, uvp, vvp) do { \
92 (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \
93 (vvp)->tv_nsec = (tvp)->tv_nsec - (uvp)->tv_nsec; \
94 if ((vvp)->tv_nsec < 0) { \
95 (vvp)->tv_sec--; \
96 (vvp)->tv_nsec += NSEC_PER_SEC; \
97 } \
98} while (0)
99
100#define net_timernsec(tvp, nsp) do { \
101 *(nsp) = (tvp)->tv_nsec; \
102 if ((tvp)->tv_sec > 0) \
103 *(nsp) += ((tvp)->tv_sec * NSEC_PER_SEC); \
104} while (0)
105
106#if defined(__x86_64__) || defined(__arm64__)
107#define net_nsectimer(nsp, tvp) do { \
108 u_int64_t __nsp = *(nsp); \
109 net_timerclear(tvp); \
110 uint64_t __sec = __nsp / NSEC_PER_SEC; \
111 (tvp)->tv_sec = (__darwin_time_t)__sec; \
112 (tvp)->tv_nsec = (long)(__nsp - __sec * NSEC_PER_SEC); \
113} while (0)
114#else /* 32 bit */
115/*
116 * NSEC needs to be < 2^31*10^9 to be representable in a struct timespec
117 * because __darwin_time_t is 32 bit on 32-bit platforms. This bound
118 * is < 2^61. We get a first approximation to convert into seconds using
119 * the following values.
120 * a = floor(NSEC / 2^29)
121 * inv = floor(2^61 / 10^9)
122 *
123 * The approximation of seconds is correct or too low by 1 unit.
124 * So we fix it by computing the remainder.
125 */
126#define net_nsectimer(nsp, tvp) do { \
127 u_int64_t __nsp = *(nsp); \
128 net_timerclear(tvp); \
129 uint32_t __a = (uint32_t)(__nsp >> 29); \
130 const uint32_t __inv = 0x89705F41; \
131 uint32_t __sec = (uint32_t)(((uint64_t)__a * __inv) >> 32); \
132 uint32_t __rem = (uint32_t)(__nsp - __sec * NSEC_PER_SEC); \
133 __sec += ((__rem >= NSEC_PER_SEC) ? 1 : 0); \
134 (tvp)->tv_sec = (__darwin_time_t)__sec; \
135 (tvp)->tv_nsec = \
136 (long)((__rem >= NSEC_PER_SEC) ? (__rem - NSEC_PER_SEC) : __rem); \
137} while(0)
138#endif /* 32 bit */
139
140struct ifnet;
141struct mbuf;
142struct ether_header;
143struct sockaddr_dl;
144struct iff_filter;
145
146#define DLIL_THREADNAME_LEN 32
147
148/*
149 * DLIL input thread info
150 */
151struct dlil_threading_info {
152 decl_lck_mtx_data(, input_lck);
153 lck_grp_t *lck_grp; /* lock group (for lock stats) */
154 u_int32_t input_waiting; /* DLIL condition of thread */
155 u_int32_t wtot; /* # of wakeup requests */
156 char input_name[DLIL_THREADNAME_LEN]; /* name storage */
157 struct ifnet *ifp; /* pointer to interface */
158 class_queue_t rcvq_pkts; /* queue of pkts */
159 struct ifnet_stat_increment_param stats; /* incremental statistics */
160 /*
161 * Thread affinity (workloop and DLIL threads).
162 */
163 boolean_t net_affinity; /* affinity set is available */
164 struct thread *input_thr; /* input thread */
165 struct thread *wloop_thr; /* workloop thread */
166 struct thread *poll_thr; /* poll thread */
167 u_int32_t tag; /* affinity tag */
168 /*
169 * Opportunistic polling.
170 */
171 ifnet_model_t mode; /* current mode */
172 struct pktcntr tstats; /* incremental polling statistics */
173 struct if_rxpoll_stats pstats; /* polling statistics */
174#define rxpoll_offreq pstats.ifi_poll_off_req
175#define rxpoll_offerr pstats.ifi_poll_off_err
176#define rxpoll_onreq pstats.ifi_poll_on_req
177#define rxpoll_onerr pstats.ifi_poll_on_err
178#define rxpoll_wavg pstats.ifi_poll_wakeups_avg
179#define rxpoll_wlowat pstats.ifi_poll_wakeups_lowat
180#define rxpoll_whiwat pstats.ifi_poll_wakeups_hiwat
181#define rxpoll_pavg pstats.ifi_poll_packets_avg
182#define rxpoll_pmin pstats.ifi_poll_packets_min
183#define rxpoll_pmax pstats.ifi_poll_packets_max
184#define rxpoll_plowat pstats.ifi_poll_packets_lowat
185#define rxpoll_phiwat pstats.ifi_poll_packets_hiwat
186#define rxpoll_bavg pstats.ifi_poll_bytes_avg
187#define rxpoll_bmin pstats.ifi_poll_bytes_min
188#define rxpoll_bmax pstats.ifi_poll_bytes_max
189#define rxpoll_blowat pstats.ifi_poll_bytes_lowat
190#define rxpoll_bhiwat pstats.ifi_poll_bytes_hiwat
191#define rxpoll_plim pstats.ifi_poll_packets_limit
192#define rxpoll_ival pstats.ifi_poll_interval_time
193 struct pktcntr sstats; /* packets and bytes per sampling */
194 struct timespec mode_holdtime; /* mode holdtime in nsec */
195 struct timespec mode_lasttime; /* last mode change time in nsec */
196 struct timespec sample_holdtime; /* sampling holdtime in nsec */
197 struct timespec sample_lasttime; /* last sampling time in nsec */
198 struct timespec dbg_lasttime; /* last debug message time in nsec */
199#if IFNET_INPUT_SANITY_CHK
200 /*
201 * For debugging.
202 */
203 u_int64_t input_mbuf_cnt; /* total # of packets processed */
204#endif
205 thread_call_t input_mit_tcall; /* coalescing input processing */
206};
207
208/*
209 * DLIL input thread info (for main/loopback input thread)
210 */
211struct dlil_main_threading_info {
212 struct dlil_threading_info inp;
213 class_queue_t lo_rcvq_pkts; /* queue of lo0 pkts */
214};
215
216/*
217 * The following are shared with kpi_protocol.c so that it may wakeup
218 * the input thread to run through packets queued for protocol input.
219*/
220#define DLIL_INPUT_RUNNING 0x80000000
221#define DLIL_INPUT_WAITING 0x40000000
222#define DLIL_PROTO_REGISTER 0x20000000
223#define DLIL_PROTO_WAITING 0x10000000
224#define DLIL_INPUT_TERMINATE 0x08000000
225#define DLIL_INPUT_TERMINATE_COMPLETE 0x04000000
226
227/*
228 * Flags for dlil_attach_filter()
229 */
230#define DLIL_IFF_TSO 0x01 /* Interface filter supports TSO */
231#define DLIL_IFF_INTERNAL 0x02 /* Apple internal -- do not count towards stats */
232
233extern int dlil_verbose;
234extern uint32_t hwcksum_dbg;
235extern uint32_t hwcksum_tx;
236extern uint32_t hwcksum_rx;
237extern struct dlil_threading_info *dlil_main_input_thread;
238
239extern void dlil_init(void);
240
241extern errno_t ifp_if_ioctl(struct ifnet *, unsigned long, void *);
242extern errno_t ifp_if_output(struct ifnet *, struct mbuf *);
243extern void ifp_if_start(struct ifnet *);
244
245extern errno_t dlil_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
246
247/*
248 * Send arp internal bypasses the check for IPv4LL.
249 */
250extern errno_t dlil_send_arp_internal(ifnet_t, u_int16_t,
251 const struct sockaddr_dl *, const struct sockaddr *,
252 const struct sockaddr_dl *, const struct sockaddr *);
253
254/*
255 * The following constants are used with the net_thread_mark_apply and
256 * net_thread_is_unmarked functions to control the bits in the uu_network_marks
257 * field of the uthread structure.
258 */
259#define NET_THREAD_HELD_PF 0x1 /* thread is holding PF lock */
260#define NET_THREAD_HELD_DOMAIN 0x2 /* thread is holding domain_proto_mtx */
261#define NET_THREAD_CKREQ_LLADDR 0x4 /* thread reqs MACF check for LLADDR */
262
263/*
264 * net_thread_marks_t is a pointer to a phantom structure type used for
265 * manipulating the uthread:uu_network_marks field. As an example...
266 *
267 * static const u_int32_t bits = NET_THREAD_CKREQ_LLADDR;
268 * struct uthread *uth = get_bsdthread_info(current_thread());
269 *
270 * net_thread_marks_t marks = net_thread_marks_push(bits);
271 * VERIFY((uth->uu_network_marks & NET_THREAD_CKREQ_LLADDR) != 0);
272 * net_thread_marks_pop(marks);
273 *
274 * The net_thread_marks_push() function returns an encoding of the bits
275 * that were changed from zero to one in the uu_network_marks field. When
276 * the net_thread_marks_pop() function later processes that value, it
277 * resets the bits to their previous value.
278 *
279 * The net_thread_unmarks_push() and net_thread_unmarks_pop() functions
280 * are similar to net_thread_marks_push() and net_thread_marks_pop() except
281 * they clear the marks bits in the guarded section rather than set them.
282 *
283 * The net_thread_is_marked() and net_thread_is_unmarked() functions return
284 * the subset of the bits that are currently set or cleared (respectively)
285 * in the uthread:uu_network_marks field.
286 *
287 * Finally, the value of the net_thread_marks_none constant is provided for
288 * comparing for equality with the value returned when no bits in the marks
289 * field are changed by the push.
290 *
291 * It is not significant that a value of type net_thread_marks_t may
292 * compare as equal to the NULL pointer.
293 */
294struct net_thread_marks;
295typedef const struct net_thread_marks *net_thread_marks_t;
296
297extern const net_thread_marks_t net_thread_marks_none;
298
299extern net_thread_marks_t net_thread_marks_push(u_int32_t);
300extern net_thread_marks_t net_thread_unmarks_push(u_int32_t);
301extern void net_thread_marks_pop(net_thread_marks_t);
302extern void net_thread_unmarks_pop(net_thread_marks_t);
303extern u_int32_t net_thread_is_marked(u_int32_t);
304extern u_int32_t net_thread_is_unmarked(u_int32_t);
305
306extern int dlil_output(ifnet_t, protocol_family_t, mbuf_t, void *,
307 const struct sockaddr *, int, struct flowadv *);
308
309extern void dlil_input_packet_list(struct ifnet *, struct mbuf *);
310extern void dlil_input_packet_list_extended(struct ifnet *, struct mbuf *,
311 u_int32_t, ifnet_model_t);
312
313extern errno_t dlil_resolve_multi(struct ifnet *,
314 const struct sockaddr *, struct sockaddr *, size_t);
315
316extern errno_t dlil_send_arp(ifnet_t, u_int16_t, const struct sockaddr_dl *,
317 const struct sockaddr *, const struct sockaddr_dl *,
318 const struct sockaddr *, u_int32_t);
319
320extern int dlil_attach_filter(ifnet_t, const struct iff_filter *,
321 interface_filter_t *, u_int32_t);
322extern void dlil_detach_filter(interface_filter_t);
323
324extern void dlil_proto_unplumb_all(ifnet_t);
325
326extern void dlil_post_msg(struct ifnet *, u_int32_t, u_int32_t,
327 struct net_event_data *, u_int32_t);
328
329extern void dlil_post_sifflags_msg(struct ifnet *);
330
331extern int dlil_post_complete_msg(struct ifnet *, struct kev_msg *);
332
333extern int dlil_alloc_local_stats(struct ifnet *);
334
335
336/*
337 * dlil_if_acquire is obsolete. Use ifnet_allocate.
338 */
339extern int dlil_if_acquire(u_int32_t, const void *, size_t, const char *, struct ifnet **);
340/*
341 * dlil_if_release is obsolete. The equivalent is called automatically when
342 * an interface is detached.
343 */
344extern void dlil_if_release(struct ifnet *ifp);
345
346extern errno_t dlil_if_ref(struct ifnet *);
347extern errno_t dlil_if_free(struct ifnet *);
348
349extern void dlil_node_present(struct ifnet *, struct sockaddr *, int32_t, int,
350 int, u_int8_t[48]);
351extern void dlil_node_absent(struct ifnet *, struct sockaddr *);
352
353extern const void *dlil_ifaddr_bytes(const struct sockaddr_dl *, size_t *,
354 kauth_cred_t *);
355
356extern void dlil_report_issues(struct ifnet *, u_int8_t[DLIL_MODIDLEN],
357 u_int8_t[DLIL_MODARGLEN]);
358
359#define PROTO_HASH_SLOTS 4
360
361extern int proto_hash_value(u_int32_t);
362
363extern const char *dlil_kev_dl_code_str(u_int32_t);
364
365extern errno_t dlil_rxpoll_set_params(struct ifnet *,
366 struct ifnet_poll_params *, boolean_t);
367extern errno_t dlil_rxpoll_get_params(struct ifnet *,
368 struct ifnet_poll_params *);
369
370extern errno_t dlil_output_handler(struct ifnet *, struct mbuf *);
371extern errno_t dlil_input_handler(struct ifnet *, struct mbuf *,
372 struct mbuf *, const struct ifnet_stat_increment_param *,
373 boolean_t, struct thread *);
374
375
376/*
377 * This is mostly called from the context of the DLIL input thread;
378 * because of that there is no need for atomic operations.
379 */
380__attribute__((always_inline))
381static inline void
382ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m)
383{
384 if (!(m->m_flags & M_PKTHDR))
385 return;
386
387 switch (m_get_traffic_class(m)) {
388 case MBUF_TC_BE:
389 ifp->if_tc.ifi_ibepackets++;
390 ifp->if_tc.ifi_ibebytes += m->m_pkthdr.len;
391 break;
392 case MBUF_TC_BK:
393 ifp->if_tc.ifi_ibkpackets++;
394 ifp->if_tc.ifi_ibkbytes += m->m_pkthdr.len;
395 break;
396 case MBUF_TC_VI:
397 ifp->if_tc.ifi_ivipackets++;
398 ifp->if_tc.ifi_ivibytes += m->m_pkthdr.len;
399 break;
400 case MBUF_TC_VO:
401 ifp->if_tc.ifi_ivopackets++;
402 ifp->if_tc.ifi_ivobytes += m->m_pkthdr.len;
403 break;
404 default:
405 break;
406 }
407
408 if (mbuf_is_traffic_class_privileged(m)) {
409 ifp->if_tc.ifi_ipvpackets++;
410 ifp->if_tc.ifi_ipvbytes += m->m_pkthdr.len;
411 }
412}
413
414/*
415 * This is called from DLIL output, hence multiple threads could end
416 * up modifying the statistics. We trade off acccuracy for performance
417 * by not using atomic operations here.
418 */
419__attribute__((always_inline))
420static inline void
421ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m)
422{
423 if (!(m->m_flags & M_PKTHDR))
424 return;
425
426 switch (m_get_traffic_class(m)) {
427 case MBUF_TC_BE:
428 ifp->if_tc.ifi_obepackets++;
429 ifp->if_tc.ifi_obebytes += m->m_pkthdr.len;
430 break;
431 case MBUF_TC_BK:
432 ifp->if_tc.ifi_obkpackets++;
433 ifp->if_tc.ifi_obkbytes += m->m_pkthdr.len;
434 break;
435 case MBUF_TC_VI:
436 ifp->if_tc.ifi_ovipackets++;
437 ifp->if_tc.ifi_ovibytes += m->m_pkthdr.len;
438 break;
439 case MBUF_TC_VO:
440 ifp->if_tc.ifi_ovopackets++;
441 ifp->if_tc.ifi_ovobytes += m->m_pkthdr.len;
442 break;
443 default:
444 break;
445 }
446
447 if (mbuf_is_traffic_class_privileged(m)) {
448 ifp->if_tc.ifi_opvpackets++;
449 ifp->if_tc.ifi_opvbytes += m->m_pkthdr.len;
450 }
451}
452#endif /* BSD_KERNEL_PRIVATE */
453#endif /* KERNEL_PRIVATE */
454#endif /* KERNEL */
455#endif /* DLIL_H */
456