| 1 | /* |
| 2 | * Copyright (c) 2016-2020 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | |
| 29 | #include <skywalk/os_skywalk_private.h> |
| 30 | #include <skywalk/nexus/flowswitch/nx_flowswitch.h> |
| 31 | #include <skywalk/nexus/flowswitch/fsw_var.h> |
| 32 | #include <netinet/in_arp.h> |
| 33 | #include <netinet/ip6.h> |
| 34 | #include <netinet6/in6_var.h> |
| 35 | #include <netinet6/nd6.h> |
| 36 | #include <net/ethernet.h> |
| 37 | #include <net/route.h> |
| 38 | #include <sys/eventhandler.h> |
| 39 | #include <net/sockaddr_utils.h> |
| 40 | |
| 41 | #define FSW_ETHER_LEN_PADDED 16 |
| 42 | #define FSW_ETHER_PADDING (FSW_ETHER_LEN_PADDED - ETHER_HDR_LEN) |
| 43 | #define FSW_ETHER_FRAME_HEADROOM FSW_ETHER_LEN_PADDED |
| 44 | |
| 45 | static void fsw_ethernet_ctor(struct nx_flowswitch *, struct flow_route *); |
| 46 | static int fsw_ethernet_resolve(struct nx_flowswitch *, struct flow_route *, |
| 47 | struct __kern_packet *); |
| 48 | static void fsw_ethernet_frame(struct nx_flowswitch *, struct flow_route *, |
| 49 | struct __kern_packet *); |
| 50 | static sa_family_t fsw_ethernet_demux(struct nx_flowswitch *, |
| 51 | struct __kern_packet *); |
| 52 | |
| 53 | extern struct rtstat rtstat; |
| 54 | |
| 55 | int |
| 56 | fsw_ethernet_setup(struct nx_flowswitch *fsw, struct ifnet *ifp) |
| 57 | { |
| 58 | struct ifaddr *lladdr = ifp->if_lladdr; |
| 59 | |
| 60 | if (SDL(lladdr->ifa_addr)->sdl_alen != ETHER_ADDR_LEN || |
| 61 | SDL(lladdr->ifa_addr)->sdl_type != IFT_ETHER) { |
| 62 | return ENOTSUP; |
| 63 | } |
| 64 | |
| 65 | ifnet_lladdr_copy_bytes(interface: ifp, lladdr: fsw->fsw_ether_shost, ETHER_ADDR_LEN); |
| 66 | fsw->fsw_ctor = fsw_ethernet_ctor; |
| 67 | fsw->fsw_resolve = fsw_ethernet_resolve; |
| 68 | fsw->fsw_frame = fsw_ethernet_frame; |
| 69 | fsw->fsw_frame_headroom = FSW_ETHER_FRAME_HEADROOM; |
| 70 | fsw->fsw_demux = fsw_ethernet_demux; |
| 71 | |
| 72 | return 0; |
| 73 | } |
| 74 | |
| 75 | static void |
| 76 | fsw_ethernet_ctor(struct nx_flowswitch *fsw, struct flow_route *fr) |
| 77 | { |
| 78 | ASSERT(fr->fr_af == AF_INET || fr->fr_af == AF_INET6); |
| 79 | |
| 80 | fr->fr_llhdr.flh_gencnt = fsw->fsw_src_lla_gencnt; |
| 81 | bcopy(src: fsw->fsw_ether_shost, dst: fr->fr_eth.ether_shost, ETHER_ADDR_LEN); |
| 82 | fr->fr_eth.ether_type = ((fr->fr_af == AF_INET) ? |
| 83 | htons(ETHERTYPE_IP) : htons(ETHERTYPE_IPV6)); |
| 84 | |
| 85 | /* const override */ |
| 86 | _CASSERT(sizeof(fr->fr_llhdr.flh_off) == sizeof(uint8_t)); |
| 87 | _CASSERT(sizeof(fr->fr_llhdr.flh_len) == sizeof(uint8_t)); |
| 88 | *(uint8_t *)(uintptr_t)&fr->fr_llhdr.flh_off = 2; |
| 89 | *(uint8_t *)(uintptr_t)&fr->fr_llhdr.flh_len = ETHER_HDR_LEN; |
| 90 | |
| 91 | SK_DF(SK_VERB_FLOW_ROUTE, |
| 92 | "fr 0x%llx eth_type 0x%x eth_src %x:%x:%x:%x:%x:%x" , |
| 93 | SK_KVA(fr), ntohs(fr->fr_eth.ether_type), |
| 94 | fr->fr_eth.ether_shost[0], fr->fr_eth.ether_shost[1], |
| 95 | fr->fr_eth.ether_shost[2], fr->fr_eth.ether_shost[3], |
| 96 | fr->fr_eth.ether_shost[4], fr->fr_eth.ether_shost[5]); |
| 97 | } |
| 98 | |
| 99 | static int |
| 100 | fsw_ethernet_resolve(struct nx_flowswitch *fsw, struct flow_route *fr, |
| 101 | struct __kern_packet *pkt) |
| 102 | { |
| 103 | #if SK_LOG |
| 104 | char dst_s[MAX_IPv6_STR_LEN]; |
| 105 | #endif /* SK_LOG */ |
| 106 | struct ifnet *ifp = fsw->fsw_ifp; |
| 107 | struct rtentry *tgt_rt = NULL; |
| 108 | struct sockaddr *tgt_sa = NULL; |
| 109 | struct mbuf *m = NULL; |
| 110 | boolean_t reattach_mbuf = FALSE; |
| 111 | boolean_t probing; |
| 112 | int err = 0; |
| 113 | |
| 114 | ASSERT(fr != NULL); |
| 115 | ASSERT(ifp != NULL); |
| 116 | |
| 117 | FR_LOCK(fr); |
| 118 | /* |
| 119 | * If the destination is on-link, we use the final destination |
| 120 | * address as target. If it's off-link, we use the gateway |
| 121 | * address instead. Point tgt_rt to the the destination or |
| 122 | * gateway route accordingly. |
| 123 | */ |
| 124 | if (fr->fr_flags & FLOWRTF_ONLINK) { |
| 125 | tgt_sa = SA(&fr->fr_faddr); |
| 126 | tgt_rt = fr->fr_rt_dst; |
| 127 | } else if (fr->fr_flags & FLOWRTF_GATEWAY) { |
| 128 | tgt_sa = SA(&fr->fr_gaddr); |
| 129 | tgt_rt = fr->fr_rt_gw; |
| 130 | } |
| 131 | |
| 132 | /* |
| 133 | * Perform another routing table lookup if necessary. |
| 134 | */ |
| 135 | if (tgt_rt == NULL || !(tgt_rt->rt_flags & RTF_UP) || |
| 136 | fr->fr_want_configure) { |
| 137 | if (fr->fr_want_configure == 0) { |
| 138 | os_atomic_inc(&fr->fr_want_configure, relaxed); |
| 139 | } |
| 140 | err = flow_route_configure(fr, ifp, NULL); |
| 141 | if (err != 0) { |
| 142 | SK_ERR("failed to configure route to %s on %s (err %d)" , |
| 143 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
| 144 | sizeof(dst_s)), ifp->if_xname, err); |
| 145 | goto done; |
| 146 | } |
| 147 | |
| 148 | /* refresh pointers */ |
| 149 | if (fr->fr_flags & FLOWRTF_ONLINK) { |
| 150 | tgt_sa = SA(&fr->fr_faddr); |
| 151 | tgt_rt = fr->fr_rt_dst; |
| 152 | } else if (fr->fr_flags & FLOWRTF_GATEWAY) { |
| 153 | tgt_sa = SA(&fr->fr_gaddr); |
| 154 | tgt_rt = fr->fr_rt_gw; |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | if (__improbable(!(fr->fr_flags & (FLOWRTF_ONLINK | FLOWRTF_GATEWAY)))) { |
| 159 | err = EHOSTUNREACH; |
| 160 | SK_ERR("invalid route for %s on %s (err %d)" , |
| 161 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
| 162 | sizeof(dst_s)), ifp->if_xname, err); |
| 163 | goto done; |
| 164 | } |
| 165 | |
| 166 | ASSERT(tgt_sa != NULL); |
| 167 | ASSERT(tgt_rt != NULL); |
| 168 | |
| 169 | /* |
| 170 | * Attempt to convert kpkt to mbuf before acquiring the |
| 171 | * rt lock so that the lock won't be held if we need to do |
| 172 | * blocked a mbuf allocation. |
| 173 | */ |
| 174 | if (!(fr->fr_flags & FLOWRTF_HAS_LLINFO)) { |
| 175 | /* |
| 176 | * We need to resolve; if caller passes in a kpkt, |
| 177 | * convert the kpkt within to mbuf. Caller is then |
| 178 | * reponsible for freeing kpkt. In future, we could |
| 179 | * optimize this by having the ARP/ND lookup routines |
| 180 | * understand kpkt and perform the conversion only |
| 181 | * when it is needed. |
| 182 | */ |
| 183 | if (__probable(pkt != NULL)) { |
| 184 | if (pkt->pkt_pflags & PKT_F_MBUF_DATA) { |
| 185 | reattach_mbuf = TRUE; |
| 186 | m = pkt->pkt_mbuf; |
| 187 | KPKT_CLEAR_MBUF_DATA(pkt); |
| 188 | } else { |
| 189 | m = fsw_classq_kpkt_to_mbuf(fsw, pkt); |
| 190 | } |
| 191 | if (m == NULL) { |
| 192 | /* not a fatal error; move on */ |
| 193 | SK_ERR("failed to allocate mbuf while " |
| 194 | "resolving %s on %s" , |
| 195 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
| 196 | sizeof(dst_s)), ifp->if_xname); |
| 197 | } |
| 198 | } else { |
| 199 | m = NULL; |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | RT_LOCK(tgt_rt); |
| 204 | |
| 205 | if (__improbable(!IS_DIRECT_HOSTROUTE(tgt_rt) || |
| 206 | tgt_rt->rt_gateway->sa_family != AF_LINK || |
| 207 | SDL(tgt_rt->rt_gateway)->sdl_type != IFT_ETHER)) { |
| 208 | rtstat.rts_badrtgwroute++; |
| 209 | err = ENETUNREACH; |
| 210 | RT_UNLOCK(tgt_rt); |
| 211 | SK_ERR("bad gateway route %s on %s (err %d)" , |
| 212 | sk_sa_ntop(tgt_sa, dst_s, sizeof(dst_s)), |
| 213 | ifp->if_xname, err); |
| 214 | goto done; |
| 215 | } |
| 216 | |
| 217 | /* |
| 218 | * If already resolved, grab the link-layer address and mark the |
| 219 | * flow route accordingly. Given that we will use the cached |
| 220 | * link-layer info, there's no need to convert and enqueue the |
| 221 | * packet to ARP/ND (i.e. no need to return EJUSTRETURN). |
| 222 | */ |
| 223 | if (__probable((fr->fr_flags & FLOWRTF_HAS_LLINFO) && |
| 224 | SDL(tgt_rt->rt_gateway)->sdl_alen == ETHER_ADDR_LEN)) { |
| 225 | VERIFY(m == NULL); |
| 226 | FLOWRT_UPD_ETH_DST(fr, LLADDR(SDL(tgt_rt->rt_gateway))); |
| 227 | os_atomic_or(&fr->fr_flags, (FLOWRTF_RESOLVED | FLOWRTF_HAS_LLINFO), relaxed); |
| 228 | /* if we're not probing, then we're done */ |
| 229 | if (!(probing = (fr->fr_want_probe != 0))) { |
| 230 | VERIFY(err == 0); |
| 231 | RT_UNLOCK(tgt_rt); |
| 232 | goto done; |
| 233 | } |
| 234 | os_atomic_store(&fr->fr_want_probe, 0, release); |
| 235 | } else { |
| 236 | probing = FALSE; |
| 237 | os_atomic_andnot(&fr->fr_flags, (FLOWRTF_RESOLVED | FLOWRTF_HAS_LLINFO), relaxed); |
| 238 | } |
| 239 | |
| 240 | SK_DF(SK_VERB_FLOW_ROUTE, "%s %s on %s" , (probing ? |
| 241 | "probing" : "resolving" ), sk_sa_ntop(tgt_sa, dst_s, |
| 242 | sizeof(dst_s)), ifp->if_xname); |
| 243 | |
| 244 | /* |
| 245 | * Trigger ARP/NDP resolution or probing. |
| 246 | */ |
| 247 | switch (tgt_sa->sa_family) { |
| 248 | case AF_INET: { |
| 249 | struct sockaddr_dl sdl; |
| 250 | |
| 251 | RT_UNLOCK(tgt_rt); |
| 252 | /* |
| 253 | * Note we pass NULL as "hint" parameter, as tgt_sa |
| 254 | * is already refererring to the target address. |
| 255 | */ |
| 256 | SOCKADDR_ZERO(&sdl, sizeof(sdl)); |
| 257 | err = arp_lookup_ip(interface: ifp, SIN(tgt_sa), ll_dest: &sdl, ll_dest_len: sizeof(sdl), |
| 258 | NULL, packet: m); |
| 259 | |
| 260 | /* |
| 261 | * If we're resolving (not probing), and it's now resolved, |
| 262 | * grab the link-layer address and update the flow route. |
| 263 | * If we get EJUSTRETURN, the mbuf (if any) would have |
| 264 | * been added to the hold queue. Any other return values |
| 265 | * including 0 means that we need to free it. |
| 266 | * |
| 267 | * If we're probing, we won't have any mbuf to deal with, |
| 268 | * and since we already have the cached llinfo we'll just |
| 269 | * return success even if we get EJUSTRETURN. |
| 270 | */ |
| 271 | if (!probing) { |
| 272 | if (err == 0 && sdl.sdl_alen == ETHER_ADDR_LEN) { |
| 273 | SK_DF(SK_VERB_FLOW_ROUTE, |
| 274 | "fast-resolve %s on %s" , |
| 275 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
| 276 | sizeof(dst_s)), ifp->if_xname); |
| 277 | FLOWRT_UPD_ETH_DST(fr, LLADDR(&sdl)); |
| 278 | os_atomic_or(&fr->fr_flags, (FLOWRTF_RESOLVED | FLOWRTF_HAS_LLINFO), relaxed); |
| 279 | } |
| 280 | if (err == EJUSTRETURN && m != NULL) { |
| 281 | SK_DF(SK_VERB_FLOW_ROUTE, "packet queued " |
| 282 | "while resolving %s on %s" , |
| 283 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
| 284 | sizeof(dst_s)), ifp->if_xname); |
| 285 | m = NULL; |
| 286 | } |
| 287 | } else { |
| 288 | VERIFY(m == NULL); |
| 289 | if (err == EJUSTRETURN) { |
| 290 | err = 0; |
| 291 | } |
| 292 | } |
| 293 | break; |
| 294 | } |
| 295 | |
| 296 | case AF_INET6: { |
| 297 | struct llinfo_nd6 *ln = tgt_rt->rt_llinfo; |
| 298 | |
| 299 | /* |
| 300 | * Check if the route is down. RTF_LLINFO is set during |
| 301 | * RTM_{ADD,RESOLVE}, and is never cleared until the route |
| 302 | * is deleted from the routing table. |
| 303 | */ |
| 304 | if ((tgt_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != |
| 305 | (RTF_UP | RTF_LLINFO) || ln == NULL) { |
| 306 | err = EHOSTUNREACH; |
| 307 | SK_ERR("route unavailable for %s on %s (err %d)" , |
| 308 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
| 309 | sizeof(dst_s)), ifp->if_xname, err); |
| 310 | RT_UNLOCK(tgt_rt); |
| 311 | break; |
| 312 | } |
| 313 | |
| 314 | /* |
| 315 | * If we're probing and IPv6 ND cache entry is STALE, |
| 316 | * use it anyway but also mark it for delayed probe |
| 317 | * and update the expiry. |
| 318 | */ |
| 319 | if (probing) { |
| 320 | VERIFY(m == NULL); |
| 321 | VERIFY(ln->ln_state > ND6_LLINFO_INCOMPLETE); |
| 322 | if (ln->ln_state == ND6_LLINFO_STALE) { |
| 323 | ln->ln_asked = 0; |
| 324 | ND6_CACHE_STATE_TRANSITION(ln, |
| 325 | ND6_LLINFO_DELAY); |
| 326 | ln_setexpire(ln, net_uptime() + nd6_delay); |
| 327 | RT_UNLOCK(tgt_rt); |
| 328 | |
| 329 | lck_mtx_lock(rnh_lock); |
| 330 | nd6_sched_timeout(NULL, NULL); |
| 331 | lck_mtx_unlock(rnh_lock); |
| 332 | |
| 333 | SK_DF(SK_VERB_FLOW_ROUTE, |
| 334 | "NUD probe scheduled for %s on %s" , |
| 335 | sk_sa_ntop(tgt_sa, dst_s, |
| 336 | sizeof(dst_s)), ifp->if_xname); |
| 337 | } else { |
| 338 | RT_UNLOCK(tgt_rt); |
| 339 | } |
| 340 | VERIFY(err == 0); |
| 341 | break; |
| 342 | } |
| 343 | |
| 344 | /* |
| 345 | * If this is a permanent ND entry, we're done. |
| 346 | */ |
| 347 | if (ln->ln_expire == 0 && |
| 348 | ln->ln_state == ND6_LLINFO_REACHABLE) { |
| 349 | if (SDL(tgt_rt->rt_gateway)->sdl_alen != |
| 350 | ETHER_ADDR_LEN) { |
| 351 | err = EHOSTUNREACH; |
| 352 | SK_ERR("invalid permanent route %s on %s" |
| 353 | "ln 0x%llx (err %d)" , |
| 354 | sk_sa_ntop(rt_key(tgt_rt), dst_s, |
| 355 | sizeof(dst_s)), ifp->if_xname, |
| 356 | SK_KVA(ln), err); |
| 357 | } else { |
| 358 | SK_DF(SK_VERB_FLOW_ROUTE, "fast-resolve " |
| 359 | "permanent route %s on %s" , |
| 360 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
| 361 | sizeof(dst_s)), ifp->if_xname); |
| 362 | /* copy permanent address into the flow route */ |
| 363 | FLOWRT_UPD_ETH_DST(fr, |
| 364 | LLADDR(SDL(tgt_rt->rt_gateway))); |
| 365 | os_atomic_or(&fr->fr_flags, (FLOWRTF_RESOLVED | FLOWRTF_HAS_LLINFO), relaxed); |
| 366 | VERIFY(err == 0); |
| 367 | } |
| 368 | RT_UNLOCK(tgt_rt); |
| 369 | break; |
| 370 | } |
| 371 | |
| 372 | if (ln->ln_state == ND6_LLINFO_NOSTATE) { |
| 373 | ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_INCOMPLETE); |
| 374 | } |
| 375 | |
| 376 | if (ln->ln_state == ND6_LLINFO_INCOMPLETE && (!ln->ln_asked || |
| 377 | !(fr->fr_flags & FLOWRTF_HAS_LLINFO))) { |
| 378 | struct nd_ifinfo *ndi = ND_IFINFO(tgt_rt->rt_ifp); |
| 379 | /* |
| 380 | * There is a neighbor cache entry, but no Ethernet |
| 381 | * address response yet. Replace the held mbuf |
| 382 | * (if any) with this the one we have (if any), |
| 383 | * else leave it alone. |
| 384 | * |
| 385 | * This code conforms to the rate-limiting rule |
| 386 | * described in Section 7.2.2 of RFC 4861, because |
| 387 | * the timer is set correctly after sending an |
| 388 | * NS below. |
| 389 | */ |
| 390 | if (m != NULL) { |
| 391 | if (ln->ln_hold != NULL) { |
| 392 | m_freem_list(ln->ln_hold); |
| 393 | } |
| 394 | ln->ln_hold = m; |
| 395 | m = NULL; |
| 396 | |
| 397 | SK_DF(SK_VERB_FLOW_ROUTE, |
| 398 | "packet queued while resolving %s on %s" , |
| 399 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
| 400 | sizeof(dst_s)), ifp->if_xname); |
| 401 | } |
| 402 | VERIFY(ndi != NULL && ndi->initialized); |
| 403 | ln->ln_asked++; |
| 404 | ln_setexpire(ln, net_uptime() + ndi->retrans / 1000); |
| 405 | RT_UNLOCK(tgt_rt); |
| 406 | |
| 407 | SK_DF(SK_VERB_FLOW_ROUTE, "soliciting for %s on %s" |
| 408 | "ln 0x%llx state %u" , sk_sa_ntop(rt_key(tgt_rt), |
| 409 | dst_s, sizeof(dst_s)), ifp->if_xname, SK_KVA(ln), |
| 410 | ln->ln_state); |
| 411 | |
| 412 | /* XXX Refactor this to use same src ip */ |
| 413 | nd6_ns_output(tgt_rt->rt_ifp, NULL, |
| 414 | &SIN6(rt_key(tgt_rt))->sin6_addr, NULL, NULL); |
| 415 | |
| 416 | lck_mtx_lock(rnh_lock); |
| 417 | nd6_sched_timeout(NULL, NULL); |
| 418 | lck_mtx_unlock(rnh_lock); |
| 419 | err = EJUSTRETURN; |
| 420 | } else { |
| 421 | SK_DF(SK_VERB_FLOW_ROUTE, "fast-resolve %s on %s" , |
| 422 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, |
| 423 | sizeof(dst_s)), ifp->if_xname); |
| 424 | /* |
| 425 | * The neighbor cache entry has been resolved; |
| 426 | * copy the address into the flow route. |
| 427 | */ |
| 428 | FLOWRT_UPD_ETH_DST(fr, LLADDR(SDL(tgt_rt->rt_gateway))); |
| 429 | os_atomic_or(&fr->fr_flags, (FLOWRTF_RESOLVED | FLOWRTF_HAS_LLINFO), relaxed); |
| 430 | RT_UNLOCK(tgt_rt); |
| 431 | VERIFY(err == 0); |
| 432 | } |
| 433 | /* |
| 434 | * XXX Need to optimize for the NDP garbage |
| 435 | * collection. It would be even better to unify |
| 436 | * BSD/SK NDP management through the completion |
| 437 | * of L2/L3 split. |
| 438 | */ |
| 439 | break; |
| 440 | } |
| 441 | |
| 442 | default: |
| 443 | VERIFY(0); |
| 444 | /* NOTREACHED */ |
| 445 | __builtin_unreachable(); |
| 446 | } |
| 447 | RT_LOCK_ASSERT_NOTHELD(tgt_rt); |
| 448 | |
| 449 | done: |
| 450 | if (m != NULL) { |
| 451 | if (reattach_mbuf) { |
| 452 | pkt->pkt_mbuf = m; |
| 453 | pkt->pkt_pflags |= PKT_F_MBUF_DATA; |
| 454 | } else { |
| 455 | m_freem_list(m); |
| 456 | } |
| 457 | m = NULL; |
| 458 | } |
| 459 | |
| 460 | if (__improbable(err != 0 && err != EJUSTRETURN)) { |
| 461 | SK_ERR("route to %s on %s can't be resolved (err %d)" , |
| 462 | sk_sa_ntop(SA(&fr->fr_faddr), dst_s, sizeof(dst_s)), |
| 463 | ifp->if_xname, err); |
| 464 | /* keep FLOWRTF_HAS_LLINFO as llinfo is still useful */ |
| 465 | os_atomic_andnot(&fr->fr_flags, FLOWRTF_RESOLVED, relaxed); |
| 466 | flow_route_cleanup(fr); |
| 467 | } |
| 468 | |
| 469 | FR_UNLOCK(fr); |
| 470 | |
| 471 | return err; |
| 472 | } |
| 473 | |
| 474 | static void |
| 475 | fsw_ethernet_frame(struct nx_flowswitch *fsw, struct flow_route *fr, |
| 476 | struct __kern_packet *pkt) |
| 477 | { |
| 478 | /* in the event the source MAC address changed, update our copy */ |
| 479 | if (__improbable(fr->fr_llhdr.flh_gencnt != fsw->fsw_src_lla_gencnt)) { |
| 480 | uint8_t old_shost[ETHER_ADDR_LEN]; |
| 481 | |
| 482 | bcopy(src: &fr->fr_eth.ether_shost, dst: &old_shost, ETHER_ADDR_LEN); |
| 483 | fsw_ethernet_ctor(fsw, fr); |
| 484 | |
| 485 | SK_ERR("fr 0x%llx source MAC address updated on %s, " |
| 486 | "was %x:%x:%x:%x:%x:%x now %x:%x:%x:%x:%x:%x" , |
| 487 | SK_KVA(fr), fsw->fsw_ifp, |
| 488 | old_shost[0], old_shost[1], |
| 489 | old_shost[2], old_shost[3], |
| 490 | old_shost[4], old_shost[5], |
| 491 | fr->fr_eth.ether_shost[0], fr->fr_eth.ether_shost[1], |
| 492 | fr->fr_eth.ether_shost[2], fr->fr_eth.ether_shost[3], |
| 493 | fr->fr_eth.ether_shost[4], fr->fr_eth.ether_shost[5]); |
| 494 | } |
| 495 | |
| 496 | _CASSERT(sizeof(fr->fr_eth_padded) == FSW_ETHER_LEN_PADDED); |
| 497 | |
| 498 | if ((fr->fr_flags & FLOWRTF_DST_LL_MCAST) != 0) { |
| 499 | pkt->pkt_link_flags |= PKT_LINKF_MCAST; |
| 500 | } else if ((fr->fr_flags & FLOWRTF_DST_LL_BCAST) != 0) { |
| 501 | pkt->pkt_link_flags |= PKT_LINKF_BCAST; |
| 502 | } |
| 503 | |
| 504 | ASSERT(pkt->pkt_headroom >= FSW_ETHER_LEN_PADDED); |
| 505 | |
| 506 | char *pkt_buf; |
| 507 | MD_BUFLET_ADDR_ABS(pkt, pkt_buf); |
| 508 | sk_copy64_16(src: (uint64_t *)(void *)&fr->fr_eth_padded, |
| 509 | dst: (uint64_t *)(void *)(pkt_buf + pkt->pkt_headroom - FSW_ETHER_LEN_PADDED)); |
| 510 | |
| 511 | pkt->pkt_headroom -= ETHER_HDR_LEN; |
| 512 | pkt->pkt_l2_len = ETHER_HDR_LEN; |
| 513 | |
| 514 | if ((pkt->pkt_pflags & PKT_F_MBUF_DATA) != 0) { |
| 515 | /* frame and fix up mbuf */ |
| 516 | struct mbuf *m = pkt->pkt_mbuf; |
| 517 | sk_copy64_16(src: (uint64_t *)(void *)&fr->fr_eth_padded, |
| 518 | dst: (uint64_t *)(void *)(m->m_data - FSW_ETHER_LEN_PADDED)); |
| 519 | ASSERT((uintptr_t)m->m_data == |
| 520 | (uintptr_t)mbuf_datastart(m) + FSW_ETHER_FRAME_HEADROOM); |
| 521 | m->m_data -= ETHER_HDR_LEN; |
| 522 | m->m_len += ETHER_HDR_LEN; |
| 523 | m_pktlen(m) += ETHER_HDR_LEN; |
| 524 | ASSERT(m->m_len == m_pktlen(m)); |
| 525 | pkt->pkt_length = m_pktlen(m); |
| 526 | } else { |
| 527 | METADATA_ADJUST_LEN(pkt, ETHER_HDR_LEN, pkt->pkt_headroom); |
| 528 | } |
| 529 | } |
| 530 | |
| 531 | static sa_family_t |
| 532 | fsw_ethernet_demux(struct nx_flowswitch *fsw, struct __kern_packet *pkt) |
| 533 | { |
| 534 | #pragma unused(fsw) |
| 535 | const struct ether_header *eh; |
| 536 | sa_family_t af = AF_UNSPEC; |
| 537 | uint32_t bdlen, bdlim, bdoff; |
| 538 | uint8_t *baddr; |
| 539 | |
| 540 | MD_BUFLET_ADDR_ABS_DLEN(pkt, baddr, bdlen, bdlim, bdoff); |
| 541 | baddr += pkt->pkt_headroom; |
| 542 | eh = (struct ether_header *)(void *)baddr; |
| 543 | |
| 544 | if (__improbable(sizeof(*eh) > pkt->pkt_length)) { |
| 545 | STATS_INC(&fsw->fsw_stats, FSW_STATS_RX_DEMUX_ERR); |
| 546 | SK_ERR("unrecognized pkt, len %u" , pkt->pkt_length); |
| 547 | return AF_UNSPEC; |
| 548 | } |
| 549 | |
| 550 | if (__improbable(pkt->pkt_headroom + sizeof(*eh) > bdlim)) { |
| 551 | SK_ERR("ethernet header overrun 1st buflet" ); |
| 552 | STATS_INC(&fsw->fsw_stats, FSW_STATS_RX_DEMUX_ERR); |
| 553 | return AF_UNSPEC; |
| 554 | } |
| 555 | |
| 556 | if (__improbable((pkt->pkt_link_flags & PKT_LINKF_ETHFCS) != 0)) { |
| 557 | pkt->pkt_length -= ETHER_CRC_LEN; |
| 558 | pkt->pkt_link_flags &= ~PKT_LINKF_ETHFCS; |
| 559 | if (pkt->pkt_pflags & PKT_F_MBUF_DATA) { |
| 560 | ASSERT((pkt->pkt_mbuf->m_flags & M_HASFCS) != 0); |
| 561 | m_adj(pkt->pkt_mbuf, -ETHER_CRC_LEN); |
| 562 | pkt->pkt_mbuf->m_flags &= ~M_HASFCS; |
| 563 | } |
| 564 | } |
| 565 | pkt->pkt_l2_len = ETHER_HDR_LEN; |
| 566 | if ((eh->ether_dhost[0] & 1) == 0) { |
| 567 | /* |
| 568 | * When the driver is put into promiscuous mode we may receive |
| 569 | * unicast frames that are not intended for our interfaces. |
| 570 | * They are marked here as being promiscuous so the caller may |
| 571 | * dispose of them after passing the packets to any interface |
| 572 | * filters. |
| 573 | */ |
| 574 | if (_ether_cmp(a: eh->ether_dhost, IF_LLADDR(fsw->fsw_ifp))) { |
| 575 | pkt->pkt_pflags |= PKT_F_PROMISC; |
| 576 | STATS_INC(&fsw->fsw_stats, FSW_STATS_RX_DEMUX_PROMISC); |
| 577 | return AF_UNSPEC; |
| 578 | } |
| 579 | } |
| 580 | uint16_t ether_type = ntohs(eh->ether_type); |
| 581 | switch (ether_type) { |
| 582 | case ETHERTYPE_IP: |
| 583 | af = AF_INET; |
| 584 | break; |
| 585 | case ETHERTYPE_IPV6: |
| 586 | af = AF_INET6; |
| 587 | break; |
| 588 | default: |
| 589 | STATS_INC(&fsw->fsw_stats, FSW_STATS_RX_DEMUX_UNSPEC); |
| 590 | break; |
| 591 | } |
| 592 | |
| 593 | return af; |
| 594 | } |
| 595 | |