1 | /* |
2 | * Copyright (c) 2016-2020 Apple Inc. All rights reserved. |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
5 | * |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License |
8 | * Version 2.0 (the 'License'). You may not use this file except in |
9 | * compliance with the License. The rights granted to you under the License |
10 | * may not be used to create, or enable the creation or redistribution of, |
11 | * unlawful or unlicensed copies of an Apple operating system, or to |
12 | * circumvent, violate, or enable the circumvention or violation of, any |
13 | * terms of an Apple operating system software license agreement. |
14 | * |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
17 | * |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and |
24 | * limitations under the License. |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ |
28 | |
29 | #include <skywalk/os_skywalk_private.h> |
30 | #include <skywalk/nexus/flowswitch/nx_flowswitch.h> |
31 | #include <skywalk/nexus/flowswitch/fsw_var.h> |
32 | #include <netinet/in_arp.h> |
33 | #include <netinet/ip6.h> |
34 | #include <netinet6/in6_var.h> |
35 | #include <netinet6/nd6.h> |
36 | #include <net/ethernet.h> |
37 | #include <net/route.h> |
38 | #include <sys/eventhandler.h> |
39 | #include <net/sockaddr_utils.h> |
40 | |
41 | #define FSW_ETHER_LEN_PADDED 16 |
42 | #define FSW_ETHER_PADDING (FSW_ETHER_LEN_PADDED - ETHER_HDR_LEN) |
43 | #define FSW_ETHER_FRAME_HEADROOM FSW_ETHER_LEN_PADDED |
44 | |
45 | static void fsw_ethernet_ctor(struct nx_flowswitch *, struct flow_route *); |
46 | static int fsw_ethernet_resolve(struct nx_flowswitch *, struct flow_route *, |
47 | struct __kern_packet *); |
48 | static void fsw_ethernet_frame(struct nx_flowswitch *, struct flow_route *, |
49 | struct __kern_packet *); |
50 | static sa_family_t fsw_ethernet_demux(struct nx_flowswitch *, |
51 | struct __kern_packet *); |
52 | |
53 | extern struct rtstat rtstat; |
54 | |
55 | int |
56 | fsw_ethernet_setup(struct nx_flowswitch *fsw, struct ifnet *ifp) |
57 | { |
58 | struct ifaddr *lladdr = ifp->if_lladdr; |
59 | |
60 | if (SDL(lladdr->ifa_addr)->sdl_alen != ETHER_ADDR_LEN || |
61 | SDL(lladdr->ifa_addr)->sdl_type != IFT_ETHER) { |
62 | return ENOTSUP; |
63 | } |
64 | |
65 | ifnet_lladdr_copy_bytes(interface: ifp, lladdr: fsw->fsw_ether_shost, ETHER_ADDR_LEN); |
66 | fsw->fsw_ctor = fsw_ethernet_ctor; |
67 | fsw->fsw_resolve = fsw_ethernet_resolve; |
68 | fsw->fsw_frame = fsw_ethernet_frame; |
69 | fsw->fsw_frame_headroom = FSW_ETHER_FRAME_HEADROOM; |
70 | fsw->fsw_demux = fsw_ethernet_demux; |
71 | |
72 | return 0; |
73 | } |
74 | |
75 | static void |
76 | fsw_ethernet_ctor(struct nx_flowswitch *fsw, struct flow_route *fr) |
77 | { |
78 | ASSERT(fr->fr_af == AF_INET || fr->fr_af == AF_INET6); |
79 | |
80 | fr->fr_llhdr.flh_gencnt = fsw->fsw_src_lla_gencnt; |
81 | bcopy(src: fsw->fsw_ether_shost, dst: fr->fr_eth.ether_shost, ETHER_ADDR_LEN); |
82 | fr->fr_eth.ether_type = ((fr->fr_af == AF_INET) ? |
83 | htons(ETHERTYPE_IP) : htons(ETHERTYPE_IPV6)); |
84 | |
85 | /* const override */ |
86 | _CASSERT(sizeof(fr->fr_llhdr.flh_off) == sizeof(uint8_t)); |
87 | _CASSERT(sizeof(fr->fr_llhdr.flh_len) == sizeof(uint8_t)); |
88 | *(uint8_t *)(uintptr_t)&fr->fr_llhdr.flh_off = 2; |
89 | *(uint8_t *)(uintptr_t)&fr->fr_llhdr.flh_len = ETHER_HDR_LEN; |
90 | |
91 | SK_DF(SK_VERB_FLOW_ROUTE, |
92 | "fr 0x%llx eth_type 0x%x eth_src %x:%x:%x:%x:%x:%x" , |
93 | SK_KVA(fr), ntohs(fr->fr_eth.ether_type), |
94 | fr->fr_eth.ether_shost[0], fr->fr_eth.ether_shost[1], |
95 | fr->fr_eth.ether_shost[2], fr->fr_eth.ether_shost[3], |
96 | fr->fr_eth.ether_shost[4], fr->fr_eth.ether_shost[5]); |
97 | } |
98 | |
99 | static int |
100 | fsw_ethernet_resolve(struct nx_flowswitch *fsw, struct flow_route *fr, |
101 | struct __kern_packet *pkt) |
102 | { |
103 | #if SK_LOG |
104 | char dst_s[MAX_IPv6_STR_LEN]; |
105 | #endif /* SK_LOG */ |
106 | struct ifnet *ifp = fsw->fsw_ifp; |
107 | struct rtentry *tgt_rt = NULL; |
108 | struct sockaddr *tgt_sa = NULL; |
109 | struct mbuf *m = NULL; |
110 | boolean_t reattach_mbuf = FALSE; |
111 | boolean_t probing; |
112 | int err = 0; |
113 | |
114 | ASSERT(fr != NULL); |
115 | ASSERT(ifp != NULL); |
116 | |
117 | FR_LOCK(fr); |
118 | /* |
119 | * If the destination is on-link, we use the final destination |
120 | * address as target. If it's off-link, we use the gateway |
121 | * address instead. Point tgt_rt to the the destination or |
122 | * gateway route accordingly. |
123 | */ |
124 | if (fr->fr_flags & FLOWRTF_ONLINK) { |
125 | tgt_sa = SA(&fr->fr_faddr); |
126 | tgt_rt = fr->fr_rt_dst; |
127 | } else if (fr->fr_flags & FLOWRTF_GATEWAY) { |
128 | tgt_sa = SA(&fr->fr_gaddr); |
129 | tgt_rt = fr->fr_rt_gw; |
130 | } |
131 | |
132 | /* |
133 | * Perform another routing table lookup if necessary. |
134 | */ |
135 | if (tgt_rt == NULL || !(tgt_rt->rt_flags & RTF_UP) || |
136 | fr->fr_want_configure) { |
137 | if (fr->fr_want_configure == 0) { |
138 | os_atomic_inc(&fr->fr_want_configure, relaxed); |
139 | } |
140 | err = flow_route_configure(fr, ifp, NULL); |
141 | if (err != 0) { |
142 | SK_ERR("failed to configure route to %s on %s (err %d)" , |
143 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
144 | sizeof(dst_s)), ifp->if_xname, err); |
145 | goto done; |
146 | } |
147 | |
148 | /* refresh pointers */ |
149 | if (fr->fr_flags & FLOWRTF_ONLINK) { |
150 | tgt_sa = SA(&fr->fr_faddr); |
151 | tgt_rt = fr->fr_rt_dst; |
152 | } else if (fr->fr_flags & FLOWRTF_GATEWAY) { |
153 | tgt_sa = SA(&fr->fr_gaddr); |
154 | tgt_rt = fr->fr_rt_gw; |
155 | } |
156 | } |
157 | |
158 | if (__improbable(!(fr->fr_flags & (FLOWRTF_ONLINK | FLOWRTF_GATEWAY)))) { |
159 | err = EHOSTUNREACH; |
160 | SK_ERR("invalid route for %s on %s (err %d)" , |
161 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
162 | sizeof(dst_s)), ifp->if_xname, err); |
163 | goto done; |
164 | } |
165 | |
166 | ASSERT(tgt_sa != NULL); |
167 | ASSERT(tgt_rt != NULL); |
168 | |
169 | /* |
170 | * Attempt to convert kpkt to mbuf before acquiring the |
171 | * rt lock so that the lock won't be held if we need to do |
172 | * blocked a mbuf allocation. |
173 | */ |
174 | if (!(fr->fr_flags & FLOWRTF_HAS_LLINFO)) { |
175 | /* |
176 | * We need to resolve; if caller passes in a kpkt, |
177 | * convert the kpkt within to mbuf. Caller is then |
178 | * reponsible for freeing kpkt. In future, we could |
179 | * optimize this by having the ARP/ND lookup routines |
180 | * understand kpkt and perform the conversion only |
181 | * when it is needed. |
182 | */ |
183 | if (__probable(pkt != NULL)) { |
184 | if (pkt->pkt_pflags & PKT_F_MBUF_DATA) { |
185 | reattach_mbuf = TRUE; |
186 | m = pkt->pkt_mbuf; |
187 | KPKT_CLEAR_MBUF_DATA(pkt); |
188 | } else { |
189 | m = fsw_classq_kpkt_to_mbuf(fsw, pkt); |
190 | } |
191 | if (m == NULL) { |
192 | /* not a fatal error; move on */ |
193 | SK_ERR("failed to allocate mbuf while " |
194 | "resolving %s on %s" , |
195 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
196 | sizeof(dst_s)), ifp->if_xname); |
197 | } |
198 | } else { |
199 | m = NULL; |
200 | } |
201 | } |
202 | |
203 | RT_LOCK(tgt_rt); |
204 | |
205 | if (__improbable(!IS_DIRECT_HOSTROUTE(tgt_rt) || |
206 | tgt_rt->rt_gateway->sa_family != AF_LINK || |
207 | SDL(tgt_rt->rt_gateway)->sdl_type != IFT_ETHER)) { |
208 | rtstat.rts_badrtgwroute++; |
209 | err = ENETUNREACH; |
210 | RT_UNLOCK(tgt_rt); |
211 | SK_ERR("bad gateway route %s on %s (err %d)" , |
212 | sk_sa_ntop(tgt_sa, dst_s, sizeof(dst_s)), |
213 | ifp->if_xname, err); |
214 | goto done; |
215 | } |
216 | |
217 | /* |
218 | * If already resolved, grab the link-layer address and mark the |
219 | * flow route accordingly. Given that we will use the cached |
220 | * link-layer info, there's no need to convert and enqueue the |
221 | * packet to ARP/ND (i.e. no need to return EJUSTRETURN). |
222 | */ |
223 | if (__probable((fr->fr_flags & FLOWRTF_HAS_LLINFO) && |
224 | SDL(tgt_rt->rt_gateway)->sdl_alen == ETHER_ADDR_LEN)) { |
225 | VERIFY(m == NULL); |
226 | FLOWRT_UPD_ETH_DST(fr, LLADDR(SDL(tgt_rt->rt_gateway))); |
227 | os_atomic_or(&fr->fr_flags, (FLOWRTF_RESOLVED | FLOWRTF_HAS_LLINFO), relaxed); |
228 | /* if we're not probing, then we're done */ |
229 | if (!(probing = (fr->fr_want_probe != 0))) { |
230 | VERIFY(err == 0); |
231 | RT_UNLOCK(tgt_rt); |
232 | goto done; |
233 | } |
234 | os_atomic_store(&fr->fr_want_probe, 0, release); |
235 | } else { |
236 | probing = FALSE; |
237 | os_atomic_andnot(&fr->fr_flags, (FLOWRTF_RESOLVED | FLOWRTF_HAS_LLINFO), relaxed); |
238 | } |
239 | |
240 | SK_DF(SK_VERB_FLOW_ROUTE, "%s %s on %s" , (probing ? |
241 | "probing" : "resolving" ), sk_sa_ntop(tgt_sa, dst_s, |
242 | sizeof(dst_s)), ifp->if_xname); |
243 | |
244 | /* |
245 | * Trigger ARP/NDP resolution or probing. |
246 | */ |
247 | switch (tgt_sa->sa_family) { |
248 | case AF_INET: { |
249 | struct sockaddr_dl sdl; |
250 | |
251 | RT_UNLOCK(tgt_rt); |
252 | /* |
253 | * Note we pass NULL as "hint" parameter, as tgt_sa |
254 | * is already refererring to the target address. |
255 | */ |
256 | SOCKADDR_ZERO(&sdl, sizeof(sdl)); |
257 | err = arp_lookup_ip(interface: ifp, SIN(tgt_sa), ll_dest: &sdl, ll_dest_len: sizeof(sdl), |
258 | NULL, packet: m); |
259 | |
260 | /* |
261 | * If we're resolving (not probing), and it's now resolved, |
262 | * grab the link-layer address and update the flow route. |
263 | * If we get EJUSTRETURN, the mbuf (if any) would have |
264 | * been added to the hold queue. Any other return values |
265 | * including 0 means that we need to free it. |
266 | * |
267 | * If we're probing, we won't have any mbuf to deal with, |
268 | * and since we already have the cached llinfo we'll just |
269 | * return success even if we get EJUSTRETURN. |
270 | */ |
271 | if (!probing) { |
272 | if (err == 0 && sdl.sdl_alen == ETHER_ADDR_LEN) { |
273 | SK_DF(SK_VERB_FLOW_ROUTE, |
274 | "fast-resolve %s on %s" , |
275 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
276 | sizeof(dst_s)), ifp->if_xname); |
277 | FLOWRT_UPD_ETH_DST(fr, LLADDR(&sdl)); |
278 | os_atomic_or(&fr->fr_flags, (FLOWRTF_RESOLVED | FLOWRTF_HAS_LLINFO), relaxed); |
279 | } |
280 | if (err == EJUSTRETURN && m != NULL) { |
281 | SK_DF(SK_VERB_FLOW_ROUTE, "packet queued " |
282 | "while resolving %s on %s" , |
283 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
284 | sizeof(dst_s)), ifp->if_xname); |
285 | m = NULL; |
286 | } |
287 | } else { |
288 | VERIFY(m == NULL); |
289 | if (err == EJUSTRETURN) { |
290 | err = 0; |
291 | } |
292 | } |
293 | break; |
294 | } |
295 | |
296 | case AF_INET6: { |
297 | struct llinfo_nd6 *ln = tgt_rt->rt_llinfo; |
298 | |
299 | /* |
300 | * Check if the route is down. RTF_LLINFO is set during |
301 | * RTM_{ADD,RESOLVE}, and is never cleared until the route |
302 | * is deleted from the routing table. |
303 | */ |
304 | if ((tgt_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != |
305 | (RTF_UP | RTF_LLINFO) || ln == NULL) { |
306 | err = EHOSTUNREACH; |
307 | SK_ERR("route unavailable for %s on %s (err %d)" , |
308 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
309 | sizeof(dst_s)), ifp->if_xname, err); |
310 | RT_UNLOCK(tgt_rt); |
311 | break; |
312 | } |
313 | |
314 | /* |
315 | * If we're probing and IPv6 ND cache entry is STALE, |
316 | * use it anyway but also mark it for delayed probe |
317 | * and update the expiry. |
318 | */ |
319 | if (probing) { |
320 | VERIFY(m == NULL); |
321 | VERIFY(ln->ln_state > ND6_LLINFO_INCOMPLETE); |
322 | if (ln->ln_state == ND6_LLINFO_STALE) { |
323 | ln->ln_asked = 0; |
324 | ND6_CACHE_STATE_TRANSITION(ln, |
325 | ND6_LLINFO_DELAY); |
326 | ln_setexpire(ln, net_uptime() + nd6_delay); |
327 | RT_UNLOCK(tgt_rt); |
328 | |
329 | lck_mtx_lock(rnh_lock); |
330 | nd6_sched_timeout(NULL, NULL); |
331 | lck_mtx_unlock(rnh_lock); |
332 | |
333 | SK_DF(SK_VERB_FLOW_ROUTE, |
334 | "NUD probe scheduled for %s on %s" , |
335 | sk_sa_ntop(tgt_sa, dst_s, |
336 | sizeof(dst_s)), ifp->if_xname); |
337 | } else { |
338 | RT_UNLOCK(tgt_rt); |
339 | } |
340 | VERIFY(err == 0); |
341 | break; |
342 | } |
343 | |
344 | /* |
345 | * If this is a permanent ND entry, we're done. |
346 | */ |
347 | if (ln->ln_expire == 0 && |
348 | ln->ln_state == ND6_LLINFO_REACHABLE) { |
349 | if (SDL(tgt_rt->rt_gateway)->sdl_alen != |
350 | ETHER_ADDR_LEN) { |
351 | err = EHOSTUNREACH; |
352 | SK_ERR("invalid permanent route %s on %s" |
353 | "ln 0x%llx (err %d)" , |
354 | sk_sa_ntop(rt_key(tgt_rt), dst_s, |
355 | sizeof(dst_s)), ifp->if_xname, |
356 | SK_KVA(ln), err); |
357 | } else { |
358 | SK_DF(SK_VERB_FLOW_ROUTE, "fast-resolve " |
359 | "permanent route %s on %s" , |
360 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
361 | sizeof(dst_s)), ifp->if_xname); |
362 | /* copy permanent address into the flow route */ |
363 | FLOWRT_UPD_ETH_DST(fr, |
364 | LLADDR(SDL(tgt_rt->rt_gateway))); |
365 | os_atomic_or(&fr->fr_flags, (FLOWRTF_RESOLVED | FLOWRTF_HAS_LLINFO), relaxed); |
366 | VERIFY(err == 0); |
367 | } |
368 | RT_UNLOCK(tgt_rt); |
369 | break; |
370 | } |
371 | |
372 | if (ln->ln_state == ND6_LLINFO_NOSTATE) { |
373 | ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_INCOMPLETE); |
374 | } |
375 | |
376 | if (ln->ln_state == ND6_LLINFO_INCOMPLETE && (!ln->ln_asked || |
377 | !(fr->fr_flags & FLOWRTF_HAS_LLINFO))) { |
378 | struct nd_ifinfo *ndi = ND_IFINFO(tgt_rt->rt_ifp); |
379 | /* |
380 | * There is a neighbor cache entry, but no Ethernet |
381 | * address response yet. Replace the held mbuf |
382 | * (if any) with this the one we have (if any), |
383 | * else leave it alone. |
384 | * |
385 | * This code conforms to the rate-limiting rule |
386 | * described in Section 7.2.2 of RFC 4861, because |
387 | * the timer is set correctly after sending an |
388 | * NS below. |
389 | */ |
390 | if (m != NULL) { |
391 | if (ln->ln_hold != NULL) { |
392 | m_freem_list(ln->ln_hold); |
393 | } |
394 | ln->ln_hold = m; |
395 | m = NULL; |
396 | |
397 | SK_DF(SK_VERB_FLOW_ROUTE, |
398 | "packet queued while resolving %s on %s" , |
399 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
400 | sizeof(dst_s)), ifp->if_xname); |
401 | } |
402 | VERIFY(ndi != NULL && ndi->initialized); |
403 | ln->ln_asked++; |
404 | ln_setexpire(ln, net_uptime() + ndi->retrans / 1000); |
405 | RT_UNLOCK(tgt_rt); |
406 | |
407 | SK_DF(SK_VERB_FLOW_ROUTE, "soliciting for %s on %s" |
408 | "ln 0x%llx state %u" , sk_sa_ntop(rt_key(tgt_rt), |
409 | dst_s, sizeof(dst_s)), ifp->if_xname, SK_KVA(ln), |
410 | ln->ln_state); |
411 | |
412 | /* XXX Refactor this to use same src ip */ |
413 | nd6_ns_output(tgt_rt->rt_ifp, NULL, |
414 | &SIN6(rt_key(tgt_rt))->sin6_addr, NULL, NULL); |
415 | |
416 | lck_mtx_lock(rnh_lock); |
417 | nd6_sched_timeout(NULL, NULL); |
418 | lck_mtx_unlock(rnh_lock); |
419 | err = EJUSTRETURN; |
420 | } else { |
421 | SK_DF(SK_VERB_FLOW_ROUTE, "fast-resolve %s on %s" , |
422 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
423 | sizeof(dst_s)), ifp->if_xname); |
424 | /* |
425 | * The neighbor cache entry has been resolved; |
426 | * copy the address into the flow route. |
427 | */ |
428 | FLOWRT_UPD_ETH_DST(fr, LLADDR(SDL(tgt_rt->rt_gateway))); |
429 | os_atomic_or(&fr->fr_flags, (FLOWRTF_RESOLVED | FLOWRTF_HAS_LLINFO), relaxed); |
430 | RT_UNLOCK(tgt_rt); |
431 | VERIFY(err == 0); |
432 | } |
433 | /* |
434 | * XXX Need to optimize for the NDP garbage |
435 | * collection. It would be even better to unify |
436 | * BSD/SK NDP management through the completion |
437 | * of L2/L3 split. |
438 | */ |
439 | break; |
440 | } |
441 | |
442 | default: |
443 | VERIFY(0); |
444 | /* NOTREACHED */ |
445 | __builtin_unreachable(); |
446 | } |
447 | RT_LOCK_ASSERT_NOTHELD(tgt_rt); |
448 | |
449 | done: |
450 | if (m != NULL) { |
451 | if (reattach_mbuf) { |
452 | pkt->pkt_mbuf = m; |
453 | pkt->pkt_pflags |= PKT_F_MBUF_DATA; |
454 | } else { |
455 | m_freem_list(m); |
456 | } |
457 | m = NULL; |
458 | } |
459 | |
460 | if (__improbable(err != 0 && err != EJUSTRETURN)) { |
461 | SK_ERR("route to %s on %s can't be resolved (err %d)" , |
462 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, sizeof(dst_s)), |
463 | ifp->if_xname, err); |
464 | /* keep FLOWRTF_HAS_LLINFO as llinfo is still useful */ |
465 | os_atomic_andnot(&fr->fr_flags, FLOWRTF_RESOLVED, relaxed); |
466 | flow_route_cleanup(fr); |
467 | } |
468 | |
469 | FR_UNLOCK(fr); |
470 | |
471 | return err; |
472 | } |
473 | |
474 | static void |
475 | fsw_ethernet_frame(struct nx_flowswitch *fsw, struct flow_route *fr, |
476 | struct __kern_packet *pkt) |
477 | { |
478 | /* in the event the source MAC address changed, update our copy */ |
479 | if (__improbable(fr->fr_llhdr.flh_gencnt != fsw->fsw_src_lla_gencnt)) { |
480 | uint8_t old_shost[ETHER_ADDR_LEN]; |
481 | |
482 | bcopy(src: &fr->fr_eth.ether_shost, dst: &old_shost, ETHER_ADDR_LEN); |
483 | fsw_ethernet_ctor(fsw, fr); |
484 | |
485 | SK_ERR("fr 0x%llx source MAC address updated on %s, " |
486 | "was %x:%x:%x:%x:%x:%x now %x:%x:%x:%x:%x:%x" , |
487 | SK_KVA(fr), fsw->fsw_ifp, |
488 | old_shost[0], old_shost[1], |
489 | old_shost[2], old_shost[3], |
490 | old_shost[4], old_shost[5], |
491 | fr->fr_eth.ether_shost[0], fr->fr_eth.ether_shost[1], |
492 | fr->fr_eth.ether_shost[2], fr->fr_eth.ether_shost[3], |
493 | fr->fr_eth.ether_shost[4], fr->fr_eth.ether_shost[5]); |
494 | } |
495 | |
496 | _CASSERT(sizeof(fr->fr_eth_padded) == FSW_ETHER_LEN_PADDED); |
497 | |
498 | if ((fr->fr_flags & FLOWRTF_DST_LL_MCAST) != 0) { |
499 | pkt->pkt_link_flags |= PKT_LINKF_MCAST; |
500 | } else if ((fr->fr_flags & FLOWRTF_DST_LL_BCAST) != 0) { |
501 | pkt->pkt_link_flags |= PKT_LINKF_BCAST; |
502 | } |
503 | |
504 | ASSERT(pkt->pkt_headroom >= FSW_ETHER_LEN_PADDED); |
505 | |
506 | char *pkt_buf; |
507 | MD_BUFLET_ADDR_ABS(pkt, pkt_buf); |
508 | sk_copy64_16(src: (uint64_t *)(void *)&fr->fr_eth_padded, |
509 | dst: (uint64_t *)(void *)(pkt_buf + pkt->pkt_headroom - FSW_ETHER_LEN_PADDED)); |
510 | |
511 | pkt->pkt_headroom -= ETHER_HDR_LEN; |
512 | pkt->pkt_l2_len = ETHER_HDR_LEN; |
513 | |
514 | if ((pkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) { |
515 | /* frame and fix up mbuf */ |
516 | struct mbuf *m = pkt->pkt_mbuf; |
517 | sk_copy64_16(src: (uint64_t *)(void *)&fr->fr_eth_padded, |
518 | dst: (uint64_t *)(void *)(m->m_data - FSW_ETHER_LEN_PADDED)); |
519 | ASSERT((uintptr_t)m->m_data == |
520 | (uintptr_t)mbuf_datastart(m) + FSW_ETHER_FRAME_HEADROOM); |
521 | m->m_data -= ETHER_HDR_LEN; |
522 | m->m_len += ETHER_HDR_LEN; |
523 | m_pktlen(m) += ETHER_HDR_LEN; |
524 | ASSERT(m->m_len == m_pktlen(m)); |
525 | pkt->pkt_length = m_pktlen(m); |
526 | } else { |
527 | METADATA_ADJUST_LEN(pkt, ETHER_HDR_LEN, pkt->pkt_headroom); |
528 | } |
529 | } |
530 | |
531 | static sa_family_t |
532 | fsw_ethernet_demux(struct nx_flowswitch *fsw, struct __kern_packet *pkt) |
533 | { |
534 | #pragma unused(fsw) |
535 | const struct ether_header *eh; |
536 | sa_family_t af = AF_UNSPEC; |
537 | uint32_t bdlen, bdlim, bdoff; |
538 | uint8_t *baddr; |
539 | |
540 | MD_BUFLET_ADDR_ABS_DLEN(pkt, baddr, bdlen, bdlim, bdoff); |
541 | baddr += pkt->pkt_headroom; |
542 | eh = (struct ether_header *)(void *)baddr; |
543 | |
544 | if (__improbable(sizeof(*eh) > pkt->pkt_length)) { |
545 | STATS_INC(&fsw->fsw_stats, FSW_STATS_RX_DEMUX_ERR); |
546 | SK_ERR("unrecognized pkt, len %u" , pkt->pkt_length); |
547 | return AF_UNSPEC; |
548 | } |
549 | |
550 | if (__improbable(pkt->pkt_headroom + sizeof(*eh) > bdlim)) { |
551 | SK_ERR("ethernet header overrun 1st buflet" ); |
552 | STATS_INC(&fsw->fsw_stats, FSW_STATS_RX_DEMUX_ERR); |
553 | return AF_UNSPEC; |
554 | } |
555 | |
556 | if (__improbable((pkt->pkt_link_flags & PKT_LINKF_ETHFCS) != 0)) { |
557 | pkt->pkt_length -= ETHER_CRC_LEN; |
558 | pkt->pkt_link_flags &= ~PKT_LINKF_ETHFCS; |
559 | if (pkt->pkt_pflags & PKT_F_MBUF_DATA) { |
560 | ASSERT((pkt->pkt_mbuf->m_flags & M_HASFCS) != 0); |
561 | m_adj(pkt->pkt_mbuf, -ETHER_CRC_LEN); |
562 | pkt->pkt_mbuf->m_flags &= ~M_HASFCS; |
563 | } |
564 | } |
565 | pkt->pkt_l2_len = ETHER_HDR_LEN; |
566 | if ((eh->ether_dhost[0] & 1) == 0) { |
567 | /* |
568 | * When the driver is put into promiscuous mode we may receive |
569 | * unicast frames that are not intended for our interfaces. |
570 | * They are marked here as being promiscuous so the caller may |
571 | * dispose of them after passing the packets to any interface |
572 | * filters. |
573 | */ |
574 | if (_ether_cmp(a: eh->ether_dhost, IF_LLADDR(fsw->fsw_ifp))) { |
575 | pkt->pkt_pflags |= PKT_F_PROMISC; |
576 | STATS_INC(&fsw->fsw_stats, FSW_STATS_RX_DEMUX_PROMISC); |
577 | return AF_UNSPEC; |
578 | } |
579 | } |
580 | uint16_t ether_type = ntohs(eh->ether_type); |
581 | switch (ether_type) { |
582 | case ETHERTYPE_IP: |
583 | af = AF_INET; |
584 | break; |
585 | case ETHERTYPE_IPV6: |
586 | af = AF_INET6; |
587 | break; |
588 | default: |
589 | STATS_INC(&fsw->fsw_stats, FSW_STATS_RX_DEMUX_UNSPEC); |
590 | break; |
591 | } |
592 | |
593 | return af; |
594 | } |
595 | |