1 | /* |
2 | * Copyright (c) 1999-2017 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | #ifndef DLIL_H |
29 | #define DLIL_H |
30 | #ifdef KERNEL |
31 | |
32 | #include <sys/kernel_types.h> |
33 | #include <net/kpi_interface.h> |
34 | |
35 | enum { |
36 | BPF_TAP_DISABLE, |
37 | BPF_TAP_INPUT, |
38 | BPF_TAP_OUTPUT, |
39 | BPF_TAP_INPUT_OUTPUT |
40 | }; |
41 | |
42 | /* |
43 | * DLIL_DESC_ETYPE2 - native_type must point to 2 byte ethernet raw protocol, |
44 | * variants.native_type_length must be set to 2 |
45 | * DLIL_DESC_SAP - native_type must point to 3 byte SAP protocol |
46 | * variants.native_type_length must be set to 3 |
47 | * DLIL_DESC_SNAP - native_type must point to 5 byte SNAP protocol |
48 | * variants.native_type_length must be set to 5 |
49 | * |
50 | * All protocols must be in Network byte order. |
51 | * |
52 | * Future interface families may define more protocol types they know about. |
53 | * The type implies the offset and context of the protocol data at native_type. |
54 | * The length of the protocol data specified at native_type must be set in |
55 | * variants.native_type_length. |
56 | */ |
57 | /* Ethernet specific types */ |
58 | #define DLIL_DESC_ETYPE2 4 |
59 | #define DLIL_DESC_SAP 5 |
60 | #define DLIL_DESC_SNAP 6 |
61 | |
62 | #ifdef KERNEL_PRIVATE |
63 | #include <net/if.h> |
64 | #include <net/if_var.h> |
65 | #include <net/classq/classq.h> |
66 | #include <net/flowadv.h> |
67 | #include <sys/kern_event.h> |
68 | #include <kern/thread.h> |
69 | #include <kern/locks.h> |
70 | |
71 | #ifdef BSD_KERNEL_PRIVATE |
72 | /* Operations on timespecs. */ |
73 | #define net_timerclear(tvp) (tvp)->tv_sec = (tvp)->tv_nsec = 0 |
74 | |
75 | #define net_timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_nsec) |
76 | |
77 | #define net_timercmp(tvp, uvp, cmp) \ |
78 | (((tvp)->tv_sec == (uvp)->tv_sec) ? \ |
79 | ((tvp)->tv_nsec cmp (uvp)->tv_nsec) : \ |
80 | ((tvp)->tv_sec cmp (uvp)->tv_sec)) |
81 | |
82 | #define net_timeradd(tvp, uvp, vvp) do { \ |
83 | (vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec; \ |
84 | (vvp)->tv_nsec = (tvp)->tv_nsec + (uvp)->tv_nsec; \ |
85 | if ((vvp)->tv_nsec >= (long)NSEC_PER_SEC) { \ |
86 | (vvp)->tv_sec++; \ |
87 | (vvp)->tv_nsec -= NSEC_PER_SEC; \ |
88 | } \ |
89 | } while (0) |
90 | |
91 | #define net_timersub(tvp, uvp, vvp) do { \ |
92 | (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ |
93 | (vvp)->tv_nsec = (tvp)->tv_nsec - (uvp)->tv_nsec; \ |
94 | if ((vvp)->tv_nsec < 0) { \ |
95 | (vvp)->tv_sec--; \ |
96 | (vvp)->tv_nsec += NSEC_PER_SEC; \ |
97 | } \ |
98 | } while (0) |
99 | |
100 | #define net_timernsec(tvp, nsp) do { \ |
101 | *(nsp) = (tvp)->tv_nsec; \ |
102 | if ((tvp)->tv_sec > 0) \ |
103 | *(nsp) += ((tvp)->tv_sec * NSEC_PER_SEC); \ |
104 | } while (0) |
105 | |
106 | #if defined(__x86_64__) || defined(__arm64__) |
107 | #define net_nsectimer(nsp, tvp) do { \ |
108 | u_int64_t __nsp = *(nsp); \ |
109 | net_timerclear(tvp); \ |
110 | uint64_t __sec = __nsp / NSEC_PER_SEC; \ |
111 | (tvp)->tv_sec = (__darwin_time_t)__sec; \ |
112 | (tvp)->tv_nsec = (long)(__nsp - __sec * NSEC_PER_SEC); \ |
113 | } while (0) |
114 | #else /* 32 bit */ |
115 | /* |
116 | * NSEC needs to be < 2^31*10^9 to be representable in a struct timespec |
117 | * because __darwin_time_t is 32 bit on 32-bit platforms. This bound |
118 | * is < 2^61. We get a first approximation to convert into seconds using |
119 | * the following values. |
120 | * a = floor(NSEC / 2^29) |
121 | * inv = floor(2^61 / 10^9) |
122 | * |
123 | * The approximation of seconds is correct or too low by 1 unit. |
124 | * So we fix it by computing the remainder. |
125 | */ |
126 | #define net_nsectimer(nsp, tvp) do { \ |
127 | u_int64_t __nsp = *(nsp); \ |
128 | net_timerclear(tvp); \ |
129 | uint32_t __a = (uint32_t)(__nsp >> 29); \ |
130 | const uint32_t __inv = 0x89705F41; \ |
131 | uint32_t __sec = (uint32_t)(((uint64_t)__a * __inv) >> 32); \ |
132 | uint32_t __rem = (uint32_t)(__nsp - __sec * NSEC_PER_SEC); \ |
133 | __sec += ((__rem >= NSEC_PER_SEC) ? 1 : 0); \ |
134 | (tvp)->tv_sec = (__darwin_time_t)__sec; \ |
135 | (tvp)->tv_nsec = \ |
136 | (long)((__rem >= NSEC_PER_SEC) ? (__rem - NSEC_PER_SEC) : __rem); \ |
137 | } while(0) |
138 | #endif /* 32 bit */ |
139 | |
140 | struct ifnet; |
141 | struct mbuf; |
142 | struct ; |
143 | struct sockaddr_dl; |
144 | struct iff_filter; |
145 | |
146 | #define DLIL_THREADNAME_LEN 32 |
147 | |
148 | /* |
149 | * DLIL input thread info |
150 | */ |
151 | struct dlil_threading_info { |
152 | decl_lck_mtx_data(, input_lck); |
153 | lck_grp_t *lck_grp; /* lock group (for lock stats) */ |
154 | u_int32_t input_waiting; /* DLIL condition of thread */ |
155 | u_int32_t wtot; /* # of wakeup requests */ |
156 | char input_name[DLIL_THREADNAME_LEN]; /* name storage */ |
157 | struct ifnet *ifp; /* pointer to interface */ |
158 | class_queue_t rcvq_pkts; /* queue of pkts */ |
159 | struct ifnet_stat_increment_param stats; /* incremental statistics */ |
160 | /* |
161 | * Thread affinity (workloop and DLIL threads). |
162 | */ |
163 | boolean_t net_affinity; /* affinity set is available */ |
164 | struct thread *input_thr; /* input thread */ |
165 | struct thread *wloop_thr; /* workloop thread */ |
166 | struct thread *poll_thr; /* poll thread */ |
167 | u_int32_t tag; /* affinity tag */ |
168 | /* |
169 | * Opportunistic polling. |
170 | */ |
171 | ifnet_model_t mode; /* current mode */ |
172 | struct pktcntr tstats; /* incremental polling statistics */ |
173 | struct if_rxpoll_stats pstats; /* polling statistics */ |
174 | #define rxpoll_offreq pstats.ifi_poll_off_req |
175 | #define rxpoll_offerr pstats.ifi_poll_off_err |
176 | #define rxpoll_onreq pstats.ifi_poll_on_req |
177 | #define rxpoll_onerr pstats.ifi_poll_on_err |
178 | #define rxpoll_wavg pstats.ifi_poll_wakeups_avg |
179 | #define rxpoll_wlowat pstats.ifi_poll_wakeups_lowat |
180 | #define rxpoll_whiwat pstats.ifi_poll_wakeups_hiwat |
181 | #define rxpoll_pavg pstats.ifi_poll_packets_avg |
182 | #define rxpoll_pmin pstats.ifi_poll_packets_min |
183 | #define rxpoll_pmax pstats.ifi_poll_packets_max |
184 | #define rxpoll_plowat pstats.ifi_poll_packets_lowat |
185 | #define rxpoll_phiwat pstats.ifi_poll_packets_hiwat |
186 | #define rxpoll_bavg pstats.ifi_poll_bytes_avg |
187 | #define rxpoll_bmin pstats.ifi_poll_bytes_min |
188 | #define rxpoll_bmax pstats.ifi_poll_bytes_max |
189 | #define rxpoll_blowat pstats.ifi_poll_bytes_lowat |
190 | #define rxpoll_bhiwat pstats.ifi_poll_bytes_hiwat |
191 | #define rxpoll_plim pstats.ifi_poll_packets_limit |
192 | #define rxpoll_ival pstats.ifi_poll_interval_time |
193 | struct pktcntr sstats; /* packets and bytes per sampling */ |
194 | struct timespec mode_holdtime; /* mode holdtime in nsec */ |
195 | struct timespec mode_lasttime; /* last mode change time in nsec */ |
196 | struct timespec sample_holdtime; /* sampling holdtime in nsec */ |
197 | struct timespec sample_lasttime; /* last sampling time in nsec */ |
198 | struct timespec dbg_lasttime; /* last debug message time in nsec */ |
199 | #if IFNET_INPUT_SANITY_CHK |
200 | /* |
201 | * For debugging. |
202 | */ |
203 | u_int64_t input_mbuf_cnt; /* total # of packets processed */ |
204 | #endif |
205 | thread_call_t input_mit_tcall; /* coalescing input processing */ |
206 | }; |
207 | |
208 | /* |
209 | * DLIL input thread info (for main/loopback input thread) |
210 | */ |
211 | struct dlil_main_threading_info { |
212 | struct dlil_threading_info inp; |
213 | class_queue_t lo_rcvq_pkts; /* queue of lo0 pkts */ |
214 | }; |
215 | |
216 | /* |
217 | * The following are shared with kpi_protocol.c so that it may wakeup |
218 | * the input thread to run through packets queued for protocol input. |
219 | */ |
220 | #define DLIL_INPUT_RUNNING 0x80000000 |
221 | #define DLIL_INPUT_WAITING 0x40000000 |
222 | #define DLIL_PROTO_REGISTER 0x20000000 |
223 | #define DLIL_PROTO_WAITING 0x10000000 |
224 | #define DLIL_INPUT_TERMINATE 0x08000000 |
225 | #define DLIL_INPUT_TERMINATE_COMPLETE 0x04000000 |
226 | |
227 | /* |
228 | * Flags for dlil_attach_filter() |
229 | */ |
230 | #define DLIL_IFF_TSO 0x01 /* Interface filter supports TSO */ |
231 | #define DLIL_IFF_INTERNAL 0x02 /* Apple internal -- do not count towards stats */ |
232 | |
233 | extern int dlil_verbose; |
234 | extern uint32_t hwcksum_dbg; |
235 | extern uint32_t hwcksum_tx; |
236 | extern uint32_t hwcksum_rx; |
237 | extern struct dlil_threading_info *dlil_main_input_thread; |
238 | |
239 | extern void dlil_init(void); |
240 | |
241 | extern errno_t ifp_if_ioctl(struct ifnet *, unsigned long, void *); |
242 | extern errno_t ifp_if_output(struct ifnet *, struct mbuf *); |
243 | extern void ifp_if_start(struct ifnet *); |
244 | |
245 | extern errno_t dlil_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func); |
246 | |
247 | /* |
248 | * Send arp internal bypasses the check for IPv4LL. |
249 | */ |
250 | extern errno_t dlil_send_arp_internal(ifnet_t, u_int16_t, |
251 | const struct sockaddr_dl *, const struct sockaddr *, |
252 | const struct sockaddr_dl *, const struct sockaddr *); |
253 | |
254 | /* |
255 | * The following constants are used with the net_thread_mark_apply and |
256 | * net_thread_is_unmarked functions to control the bits in the uu_network_marks |
257 | * field of the uthread structure. |
258 | */ |
259 | #define NET_THREAD_HELD_PF 0x1 /* thread is holding PF lock */ |
260 | #define NET_THREAD_HELD_DOMAIN 0x2 /* thread is holding domain_proto_mtx */ |
261 | #define NET_THREAD_CKREQ_LLADDR 0x4 /* thread reqs MACF check for LLADDR */ |
262 | |
263 | /* |
264 | * net_thread_marks_t is a pointer to a phantom structure type used for |
265 | * manipulating the uthread:uu_network_marks field. As an example... |
266 | * |
267 | * static const u_int32_t bits = NET_THREAD_CKREQ_LLADDR; |
268 | * struct uthread *uth = get_bsdthread_info(current_thread()); |
269 | * |
270 | * net_thread_marks_t marks = net_thread_marks_push(bits); |
271 | * VERIFY((uth->uu_network_marks & NET_THREAD_CKREQ_LLADDR) != 0); |
272 | * net_thread_marks_pop(marks); |
273 | * |
274 | * The net_thread_marks_push() function returns an encoding of the bits |
275 | * that were changed from zero to one in the uu_network_marks field. When |
276 | * the net_thread_marks_pop() function later processes that value, it |
277 | * resets the bits to their previous value. |
278 | * |
279 | * The net_thread_unmarks_push() and net_thread_unmarks_pop() functions |
280 | * are similar to net_thread_marks_push() and net_thread_marks_pop() except |
281 | * they clear the marks bits in the guarded section rather than set them. |
282 | * |
283 | * The net_thread_is_marked() and net_thread_is_unmarked() functions return |
284 | * the subset of the bits that are currently set or cleared (respectively) |
285 | * in the uthread:uu_network_marks field. |
286 | * |
287 | * Finally, the value of the net_thread_marks_none constant is provided for |
288 | * comparing for equality with the value returned when no bits in the marks |
289 | * field are changed by the push. |
290 | * |
291 | * It is not significant that a value of type net_thread_marks_t may |
292 | * compare as equal to the NULL pointer. |
293 | */ |
294 | struct net_thread_marks; |
295 | typedef const struct net_thread_marks *net_thread_marks_t; |
296 | |
297 | extern const net_thread_marks_t net_thread_marks_none; |
298 | |
299 | extern net_thread_marks_t net_thread_marks_push(u_int32_t); |
300 | extern net_thread_marks_t net_thread_unmarks_push(u_int32_t); |
301 | extern void net_thread_marks_pop(net_thread_marks_t); |
302 | extern void net_thread_unmarks_pop(net_thread_marks_t); |
303 | extern u_int32_t net_thread_is_marked(u_int32_t); |
304 | extern u_int32_t net_thread_is_unmarked(u_int32_t); |
305 | |
306 | extern int dlil_output(ifnet_t, protocol_family_t, mbuf_t, void *, |
307 | const struct sockaddr *, int, struct flowadv *); |
308 | |
309 | extern void dlil_input_packet_list(struct ifnet *, struct mbuf *); |
310 | extern void dlil_input_packet_list_extended(struct ifnet *, struct mbuf *, |
311 | u_int32_t, ifnet_model_t); |
312 | |
313 | extern errno_t dlil_resolve_multi(struct ifnet *, |
314 | const struct sockaddr *, struct sockaddr *, size_t); |
315 | |
316 | extern errno_t dlil_send_arp(ifnet_t, u_int16_t, const struct sockaddr_dl *, |
317 | const struct sockaddr *, const struct sockaddr_dl *, |
318 | const struct sockaddr *, u_int32_t); |
319 | |
320 | extern int dlil_attach_filter(ifnet_t, const struct iff_filter *, |
321 | interface_filter_t *, u_int32_t); |
322 | extern void dlil_detach_filter(interface_filter_t); |
323 | |
324 | extern void dlil_proto_unplumb_all(ifnet_t); |
325 | |
326 | extern void dlil_post_msg(struct ifnet *, u_int32_t, u_int32_t, |
327 | struct net_event_data *, u_int32_t); |
328 | |
329 | extern void dlil_post_sifflags_msg(struct ifnet *); |
330 | |
331 | extern int dlil_post_complete_msg(struct ifnet *, struct kev_msg *); |
332 | |
333 | extern int dlil_alloc_local_stats(struct ifnet *); |
334 | |
335 | |
336 | /* |
337 | * dlil_if_acquire is obsolete. Use ifnet_allocate. |
338 | */ |
339 | extern int dlil_if_acquire(u_int32_t, const void *, size_t, const char *, struct ifnet **); |
340 | /* |
341 | * dlil_if_release is obsolete. The equivalent is called automatically when |
342 | * an interface is detached. |
343 | */ |
344 | extern void dlil_if_release(struct ifnet *ifp); |
345 | |
346 | extern errno_t dlil_if_ref(struct ifnet *); |
347 | extern errno_t dlil_if_free(struct ifnet *); |
348 | |
349 | extern void dlil_node_present(struct ifnet *, struct sockaddr *, int32_t, int, |
350 | int, u_int8_t[48]); |
351 | extern void dlil_node_absent(struct ifnet *, struct sockaddr *); |
352 | |
353 | extern const void *dlil_ifaddr_bytes(const struct sockaddr_dl *, size_t *, |
354 | kauth_cred_t *); |
355 | |
356 | extern void dlil_report_issues(struct ifnet *, u_int8_t[DLIL_MODIDLEN], |
357 | u_int8_t[DLIL_MODARGLEN]); |
358 | |
359 | #define PROTO_HASH_SLOTS 4 |
360 | |
361 | extern int proto_hash_value(u_int32_t); |
362 | |
363 | extern const char *dlil_kev_dl_code_str(u_int32_t); |
364 | |
365 | extern errno_t dlil_rxpoll_set_params(struct ifnet *, |
366 | struct ifnet_poll_params *, boolean_t); |
367 | extern errno_t dlil_rxpoll_get_params(struct ifnet *, |
368 | struct ifnet_poll_params *); |
369 | |
370 | extern errno_t dlil_output_handler(struct ifnet *, struct mbuf *); |
371 | extern errno_t dlil_input_handler(struct ifnet *, struct mbuf *, |
372 | struct mbuf *, const struct ifnet_stat_increment_param *, |
373 | boolean_t, struct thread *); |
374 | |
375 | |
376 | /* |
377 | * This is mostly called from the context of the DLIL input thread; |
378 | * because of that there is no need for atomic operations. |
379 | */ |
380 | __attribute__((always_inline)) |
381 | static inline void |
382 | ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m) |
383 | { |
384 | if (!(m->m_flags & M_PKTHDR)) |
385 | return; |
386 | |
387 | switch (m_get_traffic_class(m)) { |
388 | case MBUF_TC_BE: |
389 | ifp->if_tc.ifi_ibepackets++; |
390 | ifp->if_tc.ifi_ibebytes += m->m_pkthdr.len; |
391 | break; |
392 | case MBUF_TC_BK: |
393 | ifp->if_tc.ifi_ibkpackets++; |
394 | ifp->if_tc.ifi_ibkbytes += m->m_pkthdr.len; |
395 | break; |
396 | case MBUF_TC_VI: |
397 | ifp->if_tc.ifi_ivipackets++; |
398 | ifp->if_tc.ifi_ivibytes += m->m_pkthdr.len; |
399 | break; |
400 | case MBUF_TC_VO: |
401 | ifp->if_tc.ifi_ivopackets++; |
402 | ifp->if_tc.ifi_ivobytes += m->m_pkthdr.len; |
403 | break; |
404 | default: |
405 | break; |
406 | } |
407 | |
408 | if (mbuf_is_traffic_class_privileged(m)) { |
409 | ifp->if_tc.ifi_ipvpackets++; |
410 | ifp->if_tc.ifi_ipvbytes += m->m_pkthdr.len; |
411 | } |
412 | } |
413 | |
414 | /* |
415 | * This is called from DLIL output, hence multiple threads could end |
416 | * up modifying the statistics. We trade off acccuracy for performance |
417 | * by not using atomic operations here. |
418 | */ |
419 | __attribute__((always_inline)) |
420 | static inline void |
421 | ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m) |
422 | { |
423 | if (!(m->m_flags & M_PKTHDR)) |
424 | return; |
425 | |
426 | switch (m_get_traffic_class(m)) { |
427 | case MBUF_TC_BE: |
428 | ifp->if_tc.ifi_obepackets++; |
429 | ifp->if_tc.ifi_obebytes += m->m_pkthdr.len; |
430 | break; |
431 | case MBUF_TC_BK: |
432 | ifp->if_tc.ifi_obkpackets++; |
433 | ifp->if_tc.ifi_obkbytes += m->m_pkthdr.len; |
434 | break; |
435 | case MBUF_TC_VI: |
436 | ifp->if_tc.ifi_ovipackets++; |
437 | ifp->if_tc.ifi_ovibytes += m->m_pkthdr.len; |
438 | break; |
439 | case MBUF_TC_VO: |
440 | ifp->if_tc.ifi_ovopackets++; |
441 | ifp->if_tc.ifi_ovobytes += m->m_pkthdr.len; |
442 | break; |
443 | default: |
444 | break; |
445 | } |
446 | |
447 | if (mbuf_is_traffic_class_privileged(m)) { |
448 | ifp->if_tc.ifi_opvpackets++; |
449 | ifp->if_tc.ifi_opvbytes += m->m_pkthdr.len; |
450 | } |
451 | } |
452 | #endif /* BSD_KERNEL_PRIVATE */ |
453 | #endif /* KERNEL_PRIVATE */ |
454 | #endif /* KERNEL */ |
455 | #endif /* DLIL_H */ |
456 | |