1/*
2 * Copyright (c) 2016-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <skywalk/os_skywalk_private.h>
30#include <skywalk/nexus/flowswitch/nx_flowswitch.h>
31#include <skywalk/nexus/flowswitch/fsw_var.h>
32#include <netinet/in_arp.h>
33#include <netinet/ip6.h>
34#include <netinet6/in6_var.h>
35#include <netinet6/nd6.h>
36#include <net/ethernet.h>
37#include <net/route.h>
38#include <sys/eventhandler.h>
39#include <net/sockaddr_utils.h>
40
41#define FSW_ETHER_LEN_PADDED 16
42#define FSW_ETHER_PADDING (FSW_ETHER_LEN_PADDED - ETHER_HDR_LEN)
43#define FSW_ETHER_FRAME_HEADROOM FSW_ETHER_LEN_PADDED
44
45static void fsw_ethernet_ctor(struct nx_flowswitch *, struct flow_route *);
46static int fsw_ethernet_resolve(struct nx_flowswitch *, struct flow_route *,
47 struct __kern_packet *);
48static void fsw_ethernet_frame(struct nx_flowswitch *, struct flow_route *,
49 struct __kern_packet *);
50static sa_family_t fsw_ethernet_demux(struct nx_flowswitch *,
51 struct __kern_packet *);
52
53extern struct rtstat rtstat;
54
55int
56fsw_ethernet_setup(struct nx_flowswitch *fsw, struct ifnet *ifp)
57{
58 struct ifaddr *lladdr = ifp->if_lladdr;
59
60 if (SDL(lladdr->ifa_addr)->sdl_alen != ETHER_ADDR_LEN ||
61 SDL(lladdr->ifa_addr)->sdl_type != IFT_ETHER) {
62 return ENOTSUP;
63 }
64
65 ifnet_lladdr_copy_bytes(interface: ifp, lladdr: fsw->fsw_ether_shost, ETHER_ADDR_LEN);
66 fsw->fsw_ctor = fsw_ethernet_ctor;
67 fsw->fsw_resolve = fsw_ethernet_resolve;
68 fsw->fsw_frame = fsw_ethernet_frame;
69 fsw->fsw_frame_headroom = FSW_ETHER_FRAME_HEADROOM;
70 fsw->fsw_demux = fsw_ethernet_demux;
71
72 return 0;
73}
74
75static void
76fsw_ethernet_ctor(struct nx_flowswitch *fsw, struct flow_route *fr)
77{
78 ASSERT(fr->fr_af == AF_INET || fr->fr_af == AF_INET6);
79
80 fr->fr_llhdr.flh_gencnt = fsw->fsw_src_lla_gencnt;
81 bcopy(src: fsw->fsw_ether_shost, dst: fr->fr_eth.ether_shost, ETHER_ADDR_LEN);
82 fr->fr_eth.ether_type = ((fr->fr_af == AF_INET) ?
83 htons(ETHERTYPE_IP) : htons(ETHERTYPE_IPV6));
84
85 /* const override */
86 _CASSERT(sizeof(fr->fr_llhdr.flh_off) == sizeof(uint8_t));
87 _CASSERT(sizeof(fr->fr_llhdr.flh_len) == sizeof(uint8_t));
88 *(uint8_t *)(uintptr_t)&fr->fr_llhdr.flh_off = 2;
89 *(uint8_t *)(uintptr_t)&fr->fr_llhdr.flh_len = ETHER_HDR_LEN;
90
91 SK_DF(SK_VERB_FLOW_ROUTE,
92 "fr 0x%llx eth_type 0x%x eth_src %x:%x:%x:%x:%x:%x",
93 SK_KVA(fr), ntohs(fr->fr_eth.ether_type),
94 fr->fr_eth.ether_shost[0], fr->fr_eth.ether_shost[1],
95 fr->fr_eth.ether_shost[2], fr->fr_eth.ether_shost[3],
96 fr->fr_eth.ether_shost[4], fr->fr_eth.ether_shost[5]);
97}
98
99static int
100fsw_ethernet_resolve(struct nx_flowswitch *fsw, struct flow_route *fr,
101 struct __kern_packet *pkt)
102{
103#if SK_LOG
104 char dst_s[MAX_IPv6_STR_LEN];
105#endif /* SK_LOG */
106 struct ifnet *ifp = fsw->fsw_ifp;
107 struct rtentry *tgt_rt = NULL;
108 struct sockaddr *tgt_sa = NULL;
109 struct mbuf *m = NULL;
110 boolean_t reattach_mbuf = FALSE;
111 boolean_t probing;
112 int err = 0;
113
114 ASSERT(fr != NULL);
115 ASSERT(ifp != NULL);
116
117 FR_LOCK(fr);
118 /*
119 * If the destination is on-link, we use the final destination
120 * address as target. If it's off-link, we use the gateway
121 * address instead. Point tgt_rt to the the destination or
122 * gateway route accordingly.
123 */
124 if (fr->fr_flags & FLOWRTF_ONLINK) {
125 tgt_sa = SA(&fr->fr_faddr);
126 tgt_rt = fr->fr_rt_dst;
127 } else if (fr->fr_flags & FLOWRTF_GATEWAY) {
128 tgt_sa = SA(&fr->fr_gaddr);
129 tgt_rt = fr->fr_rt_gw;
130 }
131
132 /*
133 * Perform another routing table lookup if necessary.
134 */
135 if (tgt_rt == NULL || !(tgt_rt->rt_flags & RTF_UP) ||
136 fr->fr_want_configure) {
137 if (fr->fr_want_configure == 0) {
138 os_atomic_inc(&fr->fr_want_configure, relaxed);
139 }
140 err = flow_route_configure(fr, ifp, NULL);
141 if (err != 0) {
142 SK_ERR("failed to configure route to %s on %s (err %d)",
143 sk_sa_ntop(SA(&fr->fr_faddr), dst_s,
144 sizeof(dst_s)), ifp->if_xname, err);
145 goto done;
146 }
147
148 /* refresh pointers */
149 if (fr->fr_flags & FLOWRTF_ONLINK) {
150 tgt_sa = SA(&fr->fr_faddr);
151 tgt_rt = fr->fr_rt_dst;
152 } else if (fr->fr_flags & FLOWRTF_GATEWAY) {
153 tgt_sa = SA(&fr->fr_gaddr);
154 tgt_rt = fr->fr_rt_gw;
155 }
156 }
157
158 if (__improbable(!(fr->fr_flags & (FLOWRTF_ONLINK | FLOWRTF_GATEWAY)))) {
159 err = EHOSTUNREACH;
160 SK_ERR("invalid route for %s on %s (err %d)",
161 sk_sa_ntop(SA(&fr->fr_faddr), dst_s,
162 sizeof(dst_s)), ifp->if_xname, err);
163 goto done;
164 }
165
166 ASSERT(tgt_sa != NULL);
167 ASSERT(tgt_rt != NULL);
168
169 /*
170 * Attempt to convert kpkt to mbuf before acquiring the
171 * rt lock so that the lock won't be held if we need to do
172 * blocked a mbuf allocation.
173 */
174 if (!(fr->fr_flags & FLOWRTF_HAS_LLINFO)) {
175 /*
176 * We need to resolve; if caller passes in a kpkt,
177 * convert the kpkt within to mbuf. Caller is then
178 * reponsible for freeing kpkt. In future, we could
179 * optimize this by having the ARP/ND lookup routines
180 * understand kpkt and perform the conversion only
181 * when it is needed.
182 */
183 if (__probable(pkt != NULL)) {
184 if (pkt->pkt_pflags & PKT_F_MBUF_DATA) {
185 reattach_mbuf = TRUE;
186 m = pkt->pkt_mbuf;
187 KPKT_CLEAR_MBUF_DATA(pkt);
188 } else {
189 m = fsw_classq_kpkt_to_mbuf(fsw, pkt);
190 }
191 if (m == NULL) {
192 /* not a fatal error; move on */
193 SK_ERR("failed to allocate mbuf while "
194 "resolving %s on %s",
195 sk_sa_ntop(SA(&fr->fr_faddr), dst_s,
196 sizeof(dst_s)), ifp->if_xname);
197 }
198 } else {
199 m = NULL;
200 }
201 }
202
203 RT_LOCK(tgt_rt);
204
205 if (__improbable(!IS_DIRECT_HOSTROUTE(tgt_rt) ||
206 tgt_rt->rt_gateway->sa_family != AF_LINK ||
207 SDL(tgt_rt->rt_gateway)->sdl_type != IFT_ETHER)) {
208 rtstat.rts_badrtgwroute++;
209 err = ENETUNREACH;
210 RT_UNLOCK(tgt_rt);
211 SK_ERR("bad gateway route %s on %s (err %d)",
212 sk_sa_ntop(tgt_sa, dst_s, sizeof(dst_s)),
213 ifp->if_xname, err);
214 goto done;
215 }
216
217 /*
218 * If already resolved, grab the link-layer address and mark the
219 * flow route accordingly. Given that we will use the cached
220 * link-layer info, there's no need to convert and enqueue the
221 * packet to ARP/ND (i.e. no need to return EJUSTRETURN).
222 */
223 if (__probable((fr->fr_flags & FLOWRTF_HAS_LLINFO) &&
224 SDL(tgt_rt->rt_gateway)->sdl_alen == ETHER_ADDR_LEN)) {
225 VERIFY(m == NULL);
226 FLOWRT_UPD_ETH_DST(fr, LLADDR(SDL(tgt_rt->rt_gateway)));
227 os_atomic_or(&fr->fr_flags, (FLOWRTF_RESOLVED | FLOWRTF_HAS_LLINFO), relaxed);
228 /* if we're not probing, then we're done */
229 if (!(probing = (fr->fr_want_probe != 0))) {
230 VERIFY(err == 0);
231 RT_UNLOCK(tgt_rt);
232 goto done;
233 }
234 os_atomic_store(&fr->fr_want_probe, 0, release);
235 } else {
236 probing = FALSE;
237 os_atomic_andnot(&fr->fr_flags, (FLOWRTF_RESOLVED | FLOWRTF_HAS_LLINFO), relaxed);
238 }
239
240 SK_DF(SK_VERB_FLOW_ROUTE, "%s %s on %s", (probing ?
241 "probing" : "resolving"), sk_sa_ntop(tgt_sa, dst_s,
242 sizeof(dst_s)), ifp->if_xname);
243
244 /*
245 * Trigger ARP/NDP resolution or probing.
246 */
247 switch (tgt_sa->sa_family) {
248 case AF_INET: {
249 struct sockaddr_dl sdl;
250
251 RT_UNLOCK(tgt_rt);
252 /*
253 * Note we pass NULL as "hint" parameter, as tgt_sa
254 * is already refererring to the target address.
255 */
256 SOCKADDR_ZERO(&sdl, sizeof(sdl));
257 err = arp_lookup_ip(interface: ifp, SIN(tgt_sa), ll_dest: &sdl, ll_dest_len: sizeof(sdl),
258 NULL, packet: m);
259
260 /*
261 * If we're resolving (not probing), and it's now resolved,
262 * grab the link-layer address and update the flow route.
263 * If we get EJUSTRETURN, the mbuf (if any) would have
264 * been added to the hold queue. Any other return values
265 * including 0 means that we need to free it.
266 *
267 * If we're probing, we won't have any mbuf to deal with,
268 * and since we already have the cached llinfo we'll just
269 * return success even if we get EJUSTRETURN.
270 */
271 if (!probing) {
272 if (err == 0 && sdl.sdl_alen == ETHER_ADDR_LEN) {
273 SK_DF(SK_VERB_FLOW_ROUTE,
274 "fast-resolve %s on %s",
275 sk_sa_ntop(SA(&fr->fr_faddr), dst_s,
276 sizeof(dst_s)), ifp->if_xname);
277 FLOWRT_UPD_ETH_DST(fr, LLADDR(&sdl));
278 os_atomic_or(&fr->fr_flags, (FLOWRTF_RESOLVED | FLOWRTF_HAS_LLINFO), relaxed);
279 }
280 if (err == EJUSTRETURN && m != NULL) {
281 SK_DF(SK_VERB_FLOW_ROUTE, "packet queued "
282 "while resolving %s on %s",
283 sk_sa_ntop(SA(&fr->fr_faddr), dst_s,
284 sizeof(dst_s)), ifp->if_xname);
285 m = NULL;
286 }
287 } else {
288 VERIFY(m == NULL);
289 if (err == EJUSTRETURN) {
290 err = 0;
291 }
292 }
293 break;
294 }
295
296 case AF_INET6: {
297 struct llinfo_nd6 *ln = tgt_rt->rt_llinfo;
298
299 /*
300 * Check if the route is down. RTF_LLINFO is set during
301 * RTM_{ADD,RESOLVE}, and is never cleared until the route
302 * is deleted from the routing table.
303 */
304 if ((tgt_rt->rt_flags & (RTF_UP | RTF_LLINFO)) !=
305 (RTF_UP | RTF_LLINFO) || ln == NULL) {
306 err = EHOSTUNREACH;
307 SK_ERR("route unavailable for %s on %s (err %d)",
308 sk_sa_ntop(SA(&fr->fr_faddr), dst_s,
309 sizeof(dst_s)), ifp->if_xname, err);
310 RT_UNLOCK(tgt_rt);
311 break;
312 }
313
314 /*
315 * If we're probing and IPv6 ND cache entry is STALE,
316 * use it anyway but also mark it for delayed probe
317 * and update the expiry.
318 */
319 if (probing) {
320 VERIFY(m == NULL);
321 VERIFY(ln->ln_state > ND6_LLINFO_INCOMPLETE);
322 if (ln->ln_state == ND6_LLINFO_STALE) {
323 ln->ln_asked = 0;
324 ND6_CACHE_STATE_TRANSITION(ln,
325 ND6_LLINFO_DELAY);
326 ln_setexpire(ln, net_uptime() + nd6_delay);
327 RT_UNLOCK(tgt_rt);
328
329 lck_mtx_lock(rnh_lock);
330 nd6_sched_timeout(NULL, NULL);
331 lck_mtx_unlock(rnh_lock);
332
333 SK_DF(SK_VERB_FLOW_ROUTE,
334 "NUD probe scheduled for %s on %s",
335 sk_sa_ntop(tgt_sa, dst_s,
336 sizeof(dst_s)), ifp->if_xname);
337 } else {
338 RT_UNLOCK(tgt_rt);
339 }
340 VERIFY(err == 0);
341 break;
342 }
343
344 /*
345 * If this is a permanent ND entry, we're done.
346 */
347 if (ln->ln_expire == 0 &&
348 ln->ln_state == ND6_LLINFO_REACHABLE) {
349 if (SDL(tgt_rt->rt_gateway)->sdl_alen !=
350 ETHER_ADDR_LEN) {
351 err = EHOSTUNREACH;
352 SK_ERR("invalid permanent route %s on %s"
353 "ln 0x%llx (err %d)",
354 sk_sa_ntop(rt_key(tgt_rt), dst_s,
355 sizeof(dst_s)), ifp->if_xname,
356 SK_KVA(ln), err);
357 } else {
358 SK_DF(SK_VERB_FLOW_ROUTE, "fast-resolve "
359 "permanent route %s on %s",
360 sk_sa_ntop(SA(&fr->fr_faddr), dst_s,
361 sizeof(dst_s)), ifp->if_xname);
362 /* copy permanent address into the flow route */
363 FLOWRT_UPD_ETH_DST(fr,
364 LLADDR(SDL(tgt_rt->rt_gateway)));
365 os_atomic_or(&fr->fr_flags, (FLOWRTF_RESOLVED | FLOWRTF_HAS_LLINFO), relaxed);
366 VERIFY(err == 0);
367 }
368 RT_UNLOCK(tgt_rt);
369 break;
370 }
371
372 if (ln->ln_state == ND6_LLINFO_NOSTATE) {
373 ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_INCOMPLETE);
374 }
375
376 if (ln->ln_state == ND6_LLINFO_INCOMPLETE && (!ln->ln_asked ||
377 !(fr->fr_flags & FLOWRTF_HAS_LLINFO))) {
378 struct nd_ifinfo *ndi = ND_IFINFO(tgt_rt->rt_ifp);
379 /*
380 * There is a neighbor cache entry, but no Ethernet
381 * address response yet. Replace the held mbuf
382 * (if any) with this the one we have (if any),
383 * else leave it alone.
384 *
385 * This code conforms to the rate-limiting rule
386 * described in Section 7.2.2 of RFC 4861, because
387 * the timer is set correctly after sending an
388 * NS below.
389 */
390 if (m != NULL) {
391 if (ln->ln_hold != NULL) {
392 m_freem_list(ln->ln_hold);
393 }
394 ln->ln_hold = m;
395 m = NULL;
396
397 SK_DF(SK_VERB_FLOW_ROUTE,
398 "packet queued while resolving %s on %s",
399 sk_sa_ntop(SA(&fr->fr_faddr), dst_s,
400 sizeof(dst_s)), ifp->if_xname);
401 }
402 VERIFY(ndi != NULL && ndi->initialized);
403 ln->ln_asked++;
404 ln_setexpire(ln, net_uptime() + ndi->retrans / 1000);
405 RT_UNLOCK(tgt_rt);
406
407 SK_DF(SK_VERB_FLOW_ROUTE, "soliciting for %s on %s"
408 "ln 0x%llx state %u", sk_sa_ntop(rt_key(tgt_rt),
409 dst_s, sizeof(dst_s)), ifp->if_xname, SK_KVA(ln),
410 ln->ln_state);
411
412 /* XXX Refactor this to use same src ip */
413 nd6_ns_output(tgt_rt->rt_ifp, NULL,
414 &SIN6(rt_key(tgt_rt))->sin6_addr, NULL, NULL);
415
416 lck_mtx_lock(rnh_lock);
417 nd6_sched_timeout(NULL, NULL);
418 lck_mtx_unlock(rnh_lock);
419 err = EJUSTRETURN;
420 } else {
421 SK_DF(SK_VERB_FLOW_ROUTE, "fast-resolve %s on %s",
422 sk_sa_ntop(SA(&fr->fr_faddr), dst_s,
423 sizeof(dst_s)), ifp->if_xname);
424 /*
425 * The neighbor cache entry has been resolved;
426 * copy the address into the flow route.
427 */
428 FLOWRT_UPD_ETH_DST(fr, LLADDR(SDL(tgt_rt->rt_gateway)));
429 os_atomic_or(&fr->fr_flags, (FLOWRTF_RESOLVED | FLOWRTF_HAS_LLINFO), relaxed);
430 RT_UNLOCK(tgt_rt);
431 VERIFY(err == 0);
432 }
433 /*
434 * XXX Need to optimize for the NDP garbage
435 * collection. It would be even better to unify
436 * BSD/SK NDP management through the completion
437 * of L2/L3 split.
438 */
439 break;
440 }
441
442 default:
443 VERIFY(0);
444 /* NOTREACHED */
445 __builtin_unreachable();
446 }
447 RT_LOCK_ASSERT_NOTHELD(tgt_rt);
448
449done:
450 if (m != NULL) {
451 if (reattach_mbuf) {
452 pkt->pkt_mbuf = m;
453 pkt->pkt_pflags |= PKT_F_MBUF_DATA;
454 } else {
455 m_freem_list(m);
456 }
457 m = NULL;
458 }
459
460 if (__improbable(err != 0 && err != EJUSTRETURN)) {
461 SK_ERR("route to %s on %s can't be resolved (err %d)",
462 sk_sa_ntop(SA(&fr->fr_faddr), dst_s, sizeof(dst_s)),
463 ifp->if_xname, err);
464 /* keep FLOWRTF_HAS_LLINFO as llinfo is still useful */
465 os_atomic_andnot(&fr->fr_flags, FLOWRTF_RESOLVED, relaxed);
466 flow_route_cleanup(fr);
467 }
468
469 FR_UNLOCK(fr);
470
471 return err;
472}
473
474static void
475fsw_ethernet_frame(struct nx_flowswitch *fsw, struct flow_route *fr,
476 struct __kern_packet *pkt)
477{
478 /* in the event the source MAC address changed, update our copy */
479 if (__improbable(fr->fr_llhdr.flh_gencnt != fsw->fsw_src_lla_gencnt)) {
480 uint8_t old_shost[ETHER_ADDR_LEN];
481
482 bcopy(src: &fr->fr_eth.ether_shost, dst: &old_shost, ETHER_ADDR_LEN);
483 fsw_ethernet_ctor(fsw, fr);
484
485 SK_ERR("fr 0x%llx source MAC address updated on %s, "
486 "was %x:%x:%x:%x:%x:%x now %x:%x:%x:%x:%x:%x",
487 SK_KVA(fr), fsw->fsw_ifp,
488 old_shost[0], old_shost[1],
489 old_shost[2], old_shost[3],
490 old_shost[4], old_shost[5],
491 fr->fr_eth.ether_shost[0], fr->fr_eth.ether_shost[1],
492 fr->fr_eth.ether_shost[2], fr->fr_eth.ether_shost[3],
493 fr->fr_eth.ether_shost[4], fr->fr_eth.ether_shost[5]);
494 }
495
496 _CASSERT(sizeof(fr->fr_eth_padded) == FSW_ETHER_LEN_PADDED);
497
498 if ((fr->fr_flags & FLOWRTF_DST_LL_MCAST) != 0) {
499 pkt->pkt_link_flags |= PKT_LINKF_MCAST;
500 } else if ((fr->fr_flags & FLOWRTF_DST_LL_BCAST) != 0) {
501 pkt->pkt_link_flags |= PKT_LINKF_BCAST;
502 }
503
504 ASSERT(pkt->pkt_headroom >= FSW_ETHER_LEN_PADDED);
505
506 char *pkt_buf;
507 MD_BUFLET_ADDR_ABS(pkt, pkt_buf);
508 sk_copy64_16(src: (uint64_t *)(void *)&fr->fr_eth_padded,
509 dst: (uint64_t *)(void *)(pkt_buf + pkt->pkt_headroom - FSW_ETHER_LEN_PADDED));
510
511 pkt->pkt_headroom -= ETHER_HDR_LEN;
512 pkt->pkt_l2_len = ETHER_HDR_LEN;
513
514 if ((pkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) {
515 /* frame and fix up mbuf */
516 struct mbuf *m = pkt->pkt_mbuf;
517 sk_copy64_16(src: (uint64_t *)(void *)&fr->fr_eth_padded,
518 dst: (uint64_t *)(void *)(m->m_data - FSW_ETHER_LEN_PADDED));
519 ASSERT((uintptr_t)m->m_data ==
520 (uintptr_t)mbuf_datastart(m) + FSW_ETHER_FRAME_HEADROOM);
521 m->m_data -= ETHER_HDR_LEN;
522 m->m_len += ETHER_HDR_LEN;
523 m_pktlen(m) += ETHER_HDR_LEN;
524 ASSERT(m->m_len == m_pktlen(m));
525 pkt->pkt_length = m_pktlen(m);
526 } else {
527 METADATA_ADJUST_LEN(pkt, ETHER_HDR_LEN, pkt->pkt_headroom);
528 }
529}
530
531static sa_family_t
532fsw_ethernet_demux(struct nx_flowswitch *fsw, struct __kern_packet *pkt)
533{
534#pragma unused(fsw)
535 const struct ether_header *eh;
536 sa_family_t af = AF_UNSPEC;
537 uint32_t bdlen, bdlim, bdoff;
538 uint8_t *baddr;
539
540 MD_BUFLET_ADDR_ABS_DLEN(pkt, baddr, bdlen, bdlim, bdoff);
541 baddr += pkt->pkt_headroom;
542 eh = (struct ether_header *)(void *)baddr;
543
544 if (__improbable(sizeof(*eh) > pkt->pkt_length)) {
545 STATS_INC(&fsw->fsw_stats, FSW_STATS_RX_DEMUX_ERR);
546 SK_ERR("unrecognized pkt, len %u", pkt->pkt_length);
547 return AF_UNSPEC;
548 }
549
550 if (__improbable(pkt->pkt_headroom + sizeof(*eh) > bdlim)) {
551 SK_ERR("ethernet header overrun 1st buflet");
552 STATS_INC(&fsw->fsw_stats, FSW_STATS_RX_DEMUX_ERR);
553 return AF_UNSPEC;
554 }
555
556 if (__improbable((pkt->pkt_link_flags & PKT_LINKF_ETHFCS) != 0)) {
557 pkt->pkt_length -= ETHER_CRC_LEN;
558 pkt->pkt_link_flags &= ~PKT_LINKF_ETHFCS;
559 if (pkt->pkt_pflags & PKT_F_MBUF_DATA) {
560 ASSERT((pkt->pkt_mbuf->m_flags & M_HASFCS) != 0);
561 m_adj(pkt->pkt_mbuf, -ETHER_CRC_LEN);
562 pkt->pkt_mbuf->m_flags &= ~M_HASFCS;
563 }
564 }
565 pkt->pkt_l2_len = ETHER_HDR_LEN;
566 if ((eh->ether_dhost[0] & 1) == 0) {
567 /*
568 * When the driver is put into promiscuous mode we may receive
569 * unicast frames that are not intended for our interfaces.
570 * They are marked here as being promiscuous so the caller may
571 * dispose of them after passing the packets to any interface
572 * filters.
573 */
574 if (_ether_cmp(a: eh->ether_dhost, IF_LLADDR(fsw->fsw_ifp))) {
575 pkt->pkt_pflags |= PKT_F_PROMISC;
576 STATS_INC(&fsw->fsw_stats, FSW_STATS_RX_DEMUX_PROMISC);
577 return AF_UNSPEC;
578 }
579 }
580 uint16_t ether_type = ntohs(eh->ether_type);
581 switch (ether_type) {
582 case ETHERTYPE_IP:
583 af = AF_INET;
584 break;
585 case ETHERTYPE_IPV6:
586 af = AF_INET6;
587 break;
588 default:
589 STATS_INC(&fsw->fsw_stats, FSW_STATS_RX_DEMUX_UNSPEC);
590 break;
591 }
592
593 return af;
594}
595