1/*
2 * Copyright (c) 2004-2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */
30/*
31 * Copyright 2001 Wasabi Systems, Inc.
32 * All rights reserved.
33 *
34 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed for the NetBSD Project by
47 * Wasabi Systems, Inc.
48 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49 * or promote products derived from this software without specific prior
50 * written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
56 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62 * POSSIBILITY OF SUCH DAMAGE.
63 */
64
65/*
66 * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
67 * All rights reserved.
68 *
69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions
71 * are met:
72 * 1. Redistributions of source code must retain the above copyright
73 * notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright
75 * notice, this list of conditions and the following disclaimer in the
76 * documentation and/or other materials provided with the distribution.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88 * POSSIBILITY OF SUCH DAMAGE.
89 *
90 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91 */
92
93/*
94 * Network interface bridge support.
95 *
96 * TODO:
97 *
98 * - Currently only supports Ethernet-like interfaces (Ethernet,
99 * 802.11, VLANs on Ethernet, etc.) Figure out a nice way
100 * to bridge other types of interfaces (FDDI-FDDI, and maybe
101 * consider heterogenous bridges).
102 *
103 * - GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104 */
105
106#include <sys/cdefs.h>
107
108#include <sys/param.h>
109#include <sys/mbuf.h>
110#include <sys/malloc.h>
111#include <sys/protosw.h>
112#include <sys/systm.h>
113#include <sys/time.h>
114#include <sys/socket.h> /* for net/if.h */
115#include <sys/sockio.h>
116#include <sys/kernel.h>
117#include <sys/random.h>
118#include <sys/syslog.h>
119#include <sys/sysctl.h>
120#include <sys/proc.h>
121#include <sys/lock.h>
122#include <sys/mcache.h>
123
124#include <sys/kauth.h>
125
126#include <kern/thread_call.h>
127
128#include <libkern/libkern.h>
129
130#include <kern/zalloc.h>
131
132#if NBPFILTER > 0
133#include <net/bpf.h>
134#endif
135#include <net/if.h>
136#include <net/if_dl.h>
137#include <net/if_types.h>
138#include <net/if_var.h>
139#include <net/if_media.h>
140#include <net/net_api_stats.h>
141#include <net/pfvar.h>
142
143#include <netinet/in.h> /* for struct arpcom */
144#include <netinet/tcp.h> /* for struct tcphdr */
145#include <netinet/in_systm.h>
146#include <netinet/in_var.h>
147#define _IP_VHL
148#include <netinet/ip.h>
149#include <netinet/ip_var.h>
150#include <netinet/ip6.h>
151#include <netinet6/ip6_var.h>
152#ifdef DEV_CARP
153#include <netinet/ip_carp.h>
154#endif
155#include <netinet/if_ether.h> /* for struct arpcom */
156#include <net/bridgestp.h>
157#include <net/if_bridgevar.h>
158#include <net/if_llc.h>
159#if NVLAN > 0
160#include <net/if_vlan_var.h>
161#endif /* NVLAN > 0 */
162
163#include <net/if_ether.h>
164#include <net/dlil.h>
165#include <net/kpi_interfacefilter.h>
166
167#include <net/route.h>
168#include <dev/random/randomdev.h>
169
170#include <netinet/bootp.h>
171#include <netinet/dhcp.h>
172
173#if SKYWALK
174#include <skywalk/nexus/netif/nx_netif.h>
175#endif /* SKYWALK */
176
177#include <net/sockaddr_utils.h>
178
179#include <os/log.h>
180
181/*
182 * if_bridge_debug, BR_DBGF_*
183 * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
184 * to enable additional logs for the corresponding bridge function
185 * - "sysctl net.link.bridge.debug" controls the value of
186 * 'if_bridge_debug'
187 */
188static uint32_t if_bridge_debug = 0;
189#define BR_DBGF_LIFECYCLE 0x0001
190#define BR_DBGF_INPUT 0x0002
191#define BR_DBGF_OUTPUT 0x0004
192#define BR_DBGF_RT_TABLE 0x0008
193#define BR_DBGF_DELAYED_CALL 0x0010
194#define BR_DBGF_IOCTL 0x0020
195#define BR_DBGF_MBUF 0x0040
196#define BR_DBGF_MCAST 0x0080
197#define BR_DBGF_HOSTFILTER 0x0100
198#define BR_DBGF_CHECKSUM 0x0200
199#define BR_DBGF_MAC_NAT 0x0400
200
201/*
202 * if_bridge_log_level
203 * - 'if_bridge_log_level' ensures that by default important logs are
204 * logged regardless of if_bridge_debug by comparing the log level
205 * in BRIDGE_LOG to if_bridge_log_level
206 * - use "sysctl net.link.bridge.log_level" controls the value of
207 * 'if_bridge_log_level'
208 * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
209 * logs must use LOG_NOTICE to ensure they appear by default
210 */
211static int if_bridge_log_level = LOG_NOTICE;
212
213#define BRIDGE_DBGF_ENABLED(__flag) ((if_bridge_debug & __flag) != 0)
214
215/*
216 * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
217 * - macros to generate the specified log conditionally based on
218 * the specified log level and debug flags
219 * - BRIDGE_LOG_SIMPLE does not include the function name in the log
220 */
221#define BRIDGE_LOG(__level, __dbgf, __string, ...) \
222 do { \
223 if (__level <= if_bridge_log_level || \
224 BRIDGE_DBGF_ENABLED(__dbgf)) { \
225 os_log(OS_LOG_DEFAULT, "%s: " __string, \
226 __func__, ## __VA_ARGS__); \
227 } \
228 } while (0)
229#define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...) \
230 do { \
231 if (__level <= if_bridge_log_level || \
232 BRIDGE_DBGF_ENABLED(__dbgf)) { \
233 os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
234 } \
235 } while (0)
236
237#define _BRIDGE_LOCK(_sc) lck_mtx_lock(&(_sc)->sc_mtx)
238#define _BRIDGE_UNLOCK(_sc) lck_mtx_unlock(&(_sc)->sc_mtx)
239#define BRIDGE_LOCK_ASSERT_HELD(_sc) \
240 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
241#define BRIDGE_LOCK_ASSERT_NOTHELD(_sc) \
242 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
243
244#define BRIDGE_LOCK_DEBUG 1
245#if BRIDGE_LOCK_DEBUG
246
247#define BR_LCKDBG_MAX 4
248
249#define BRIDGE_LOCK(_sc) bridge_lock(_sc)
250#define BRIDGE_UNLOCK(_sc) bridge_unlock(_sc)
251#define BRIDGE_LOCK2REF(_sc, _err) _err = bridge_lock2ref(_sc)
252#define BRIDGE_UNREF(_sc) bridge_unref(_sc)
253#define BRIDGE_XLOCK(_sc) bridge_xlock(_sc)
254#define BRIDGE_XDROP(_sc) bridge_xdrop(_sc)
255
256#else /* !BRIDGE_LOCK_DEBUG */
257
258#define BRIDGE_LOCK(_sc) _BRIDGE_LOCK(_sc)
259#define BRIDGE_UNLOCK(_sc) _BRIDGE_UNLOCK(_sc)
260#define BRIDGE_LOCK2REF(_sc, _err) do { \
261 BRIDGE_LOCK_ASSERT_HELD(_sc); \
262 if ((_sc)->sc_iflist_xcnt > 0) \
263 (_err) = EBUSY; \
264 else { \
265 (_sc)->sc_iflist_ref++; \
266 (_err) = 0; \
267 } \
268 _BRIDGE_UNLOCK(_sc); \
269} while (0)
270#define BRIDGE_UNREF(_sc) do { \
271 _BRIDGE_LOCK(_sc); \
272 (_sc)->sc_iflist_ref--; \
273 if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0)) { \
274 _BRIDGE_UNLOCK(_sc); \
275 wakeup(&(_sc)->sc_cv); \
276 } else \
277 _BRIDGE_UNLOCK(_sc); \
278} while (0)
279#define BRIDGE_XLOCK(_sc) do { \
280 BRIDGE_LOCK_ASSERT_HELD(_sc); \
281 (_sc)->sc_iflist_xcnt++; \
282 while ((_sc)->sc_iflist_ref > 0) \
283 msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO, \
284 "BRIDGE_XLOCK", NULL); \
285} while (0)
286#define BRIDGE_XDROP(_sc) do { \
287 BRIDGE_LOCK_ASSERT_HELD(_sc); \
288 (_sc)->sc_iflist_xcnt--; \
289} while (0)
290
291#endif /* BRIDGE_LOCK_DEBUG */
292
293#if NBPFILTER > 0
294#define BRIDGE_BPF_MTAP_INPUT(sc, m) \
295 if (sc->sc_bpf_input != NULL) \
296 bridge_bpf_input(sc->sc_ifp, m, __func__, __LINE__)
297#else /* NBPFILTER */
298#define BRIDGE_BPF_MTAP_INPUT(ifp, m)
299#endif /* NBPFILTER */
300
301/*
302 * Initial size of the route hash table. Must be a power of two.
303 */
304#ifndef BRIDGE_RTHASH_SIZE
305#define BRIDGE_RTHASH_SIZE 16
306#endif
307
308/*
309 * Maximum size of the routing hash table
310 */
311#define BRIDGE_RTHASH_SIZE_MAX 2048
312
313#define BRIDGE_RTHASH_MASK(sc) ((sc)->sc_rthash_size - 1)
314
315/*
316 * Maximum number of addresses to cache.
317 */
318#ifndef BRIDGE_RTABLE_MAX
319#define BRIDGE_RTABLE_MAX 100
320#endif
321
322
323/*
324 * Timeout (in seconds) for entries learned dynamically.
325 */
326#ifndef BRIDGE_RTABLE_TIMEOUT
327#define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */
328#endif
329
330/*
331 * Number of seconds between walks of the route list.
332 */
333#ifndef BRIDGE_RTABLE_PRUNE_PERIOD
334#define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60)
335#endif
336
337/*
338 * Number of MAC NAT entries
339 * - sized based on 16 clients (including MAC NAT interface)
340 * each with 4 addresses
341 */
342#ifndef BRIDGE_MAC_NAT_ENTRY_MAX
343#define BRIDGE_MAC_NAT_ENTRY_MAX 64
344#endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
345
346/*
347 * List of capabilities to possibly mask on the member interface.
348 */
349#define BRIDGE_IFCAPS_MASK (IFCAP_TSO | IFCAP_TXCSUM)
350/*
351 * List of capabilities to disable on the member interface.
352 */
353#define BRIDGE_IFCAPS_STRIP IFCAP_LRO
354
355/*
356 * Bridge interface list entry.
357 */
358struct bridge_iflist {
359 TAILQ_ENTRY(bridge_iflist) bif_next;
360 struct ifnet *bif_ifp; /* member if */
361 struct bstp_port bif_stp; /* STP state */
362 uint32_t bif_ifflags; /* member if flags */
363 int bif_savedcaps; /* saved capabilities */
364 uint32_t bif_addrmax; /* max # of addresses */
365 uint32_t bif_addrcnt; /* cur. # of addresses */
366 uint32_t bif_addrexceeded; /* # of address violations */
367
368 interface_filter_t bif_iff_ref;
369 struct bridge_softc *bif_sc;
370 uint32_t bif_flags;
371
372 /* host filter */
373 struct in_addr bif_hf_ipsrc;
374 uint8_t bif_hf_hwsrc[ETHER_ADDR_LEN];
375
376 struct ifbrmstats bif_stats;
377};
378
379static inline bool
380bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
381{
382 return (bif->bif_ifflags & flags) == flags;
383}
384
385static inline bool
386bif_has_checksum_offload(struct bridge_iflist * bif)
387{
388 return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
389}
390
391/* fake errors to make the code clearer */
392#define _EBADIP EJUSTRETURN
393#define _EBADIPCHECKSUM EJUSTRETURN
394#define _EBADIPV6 EJUSTRETURN
395#define _EBADUDP EJUSTRETURN
396#define _EBADTCP EJUSTRETURN
397#define _EBADUDPCHECKSUM EJUSTRETURN
398#define _EBADTCPCHECKSUM EJUSTRETURN
399
400#define BIFF_PROMISC 0x01 /* promiscuous mode set */
401#define BIFF_PROTO_ATTACHED 0x02 /* protocol attached */
402#define BIFF_FILTER_ATTACHED 0x04 /* interface filter attached */
403#define BIFF_MEDIA_ACTIVE 0x08 /* interface media active */
404#define BIFF_HOST_FILTER 0x10 /* host filter enabled */
405#define BIFF_HF_HWSRC 0x20 /* host filter source MAC is set */
406#define BIFF_HF_IPSRC 0x40 /* host filter source IP is set */
407#define BIFF_INPUT_BROADCAST 0x80 /* send broadcast packets in */
408#define BIFF_IN_MEMBER_LIST 0x100 /* added to the member list */
409#define BIFF_WIFI_INFRA 0x200 /* interface is Wi-Fi infra */
410#define BIFF_ALL_MULTI 0x400 /* allmulti set */
411#define BIFF_LRO_DISABLED 0x800 /* LRO was disabled */
412#if SKYWALK
413#define BIFF_FLOWSWITCH_ATTACHED 0x1000 /* we attached the flowswitch */
414#define BIFF_NETAGENT_REMOVED 0x2000 /* we removed the netagent */
415#endif /* SKYWALK */
416
417/*
418 * mac_nat_entry
419 * - translates between an IP address and MAC address on a specific
420 * bridge interface member
421 */
422struct mac_nat_entry {
423 LIST_ENTRY(mac_nat_entry) mne_list; /* list linkage */
424 struct bridge_iflist *mne_bif; /* originating interface */
425 unsigned long mne_expire; /* expiration time */
426 union {
427 struct in_addr mneu_ip; /* originating IPv4 address */
428 struct in6_addr mneu_ip6; /* originating IPv6 address */
429 } mne_u;
430 uint8_t mne_mac[ETHER_ADDR_LEN];
431 uint8_t mne_flags;
432 uint8_t mne_reserved;
433};
434#define mne_ip mne_u.mneu_ip
435#define mne_ip6 mne_u.mneu_ip6
436
437#define MNE_FLAGS_IPV6 0x01 /* IPv6 address */
438
439LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
440
441/*
442 * mac_nat_record
443 * - used by bridge_mac_nat_output() to convey the translation that needs
444 * to take place in bridge_mac_nat_translate
445 * - holds enough information so that the translation can be done later without
446 * holding the bridge lock
447 */
448struct mac_nat_record {
449 uint16_t mnr_ether_type;
450 union {
451 uint16_t mnru_arp_offset;
452 struct {
453 uint16_t mnruip_dhcp_flags;
454 uint16_t mnruip_udp_csum;
455 uint8_t mnruip_header_len;
456 } mnru_ip;
457 struct {
458 uint16_t mnruip6_icmp6_len;
459 uint16_t mnruip6_lladdr_offset;
460 uint8_t mnruip6_icmp6_type;
461 uint8_t mnruip6_header_len;
462 } mnru_ip6;
463 } mnr_u;
464};
465
466#define mnr_arp_offset mnr_u.mnru_arp_offset
467
468#define mnr_ip_header_len mnr_u.mnru_ip.mnruip_header_len
469#define mnr_ip_dhcp_flags mnr_u.mnru_ip.mnruip_dhcp_flags
470#define mnr_ip_udp_csum mnr_u.mnru_ip.mnruip_udp_csum
471
472#define mnr_ip6_icmp6_len mnr_u.mnru_ip6.mnruip6_icmp6_len
473#define mnr_ip6_icmp6_type mnr_u.mnru_ip6.mnruip6_icmp6_type
474#define mnr_ip6_header_len mnr_u.mnru_ip6.mnruip6_header_len
475#define mnr_ip6_lladdr_offset mnr_u.mnru_ip6.mnruip6_lladdr_offset
476
477/*
478 * Bridge route node.
479 */
480struct bridge_rtnode {
481 LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */
482 LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */
483 struct bridge_iflist *brt_dst; /* destination if */
484 unsigned long brt_expire; /* expiration time */
485 uint8_t brt_flags; /* address flags */
486 uint8_t brt_addr[ETHER_ADDR_LEN];
487 uint16_t brt_vlan; /* vlan id */
488
489};
490#define brt_ifp brt_dst->bif_ifp
491
492/*
493 * Bridge delayed function call context
494 */
495typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
496
497struct bridge_delayed_call {
498 struct bridge_softc *bdc_sc;
499 bridge_delayed_func_t bdc_func; /* Function to call */
500 struct timespec bdc_ts; /* Time to call */
501 u_int32_t bdc_flags;
502 thread_call_t bdc_thread_call;
503};
504
505#define BDCF_OUTSTANDING 0x01 /* Delayed call has been scheduled */
506#define BDCF_CANCELLING 0x02 /* May be waiting for call completion */
507
508/*
509 * Software state for each bridge.
510 */
511LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
512
513struct bridge_softc {
514 struct ifnet *sc_ifp; /* make this an interface */
515 u_int32_t sc_flags;
516 LIST_ENTRY(bridge_softc) sc_list;
517 decl_lck_mtx_data(, sc_mtx);
518 struct _bridge_rtnode_list *sc_rthash; /* our forwarding table */
519 struct _bridge_rtnode_list sc_rtlist; /* list version of above */
520 uint32_t sc_rthash_key; /* key for hash */
521 uint32_t sc_rthash_size; /* size of the hash table */
522 struct bridge_delayed_call sc_aging_timer;
523 struct bridge_delayed_call sc_resize_call;
524 TAILQ_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */
525 struct bstp_state sc_stp; /* STP state */
526 bpf_packet_func sc_bpf_input;
527 bpf_packet_func sc_bpf_output;
528 void *sc_cv;
529 uint32_t sc_brtmax; /* max # of addresses */
530 uint32_t sc_brtcnt; /* cur. # of addresses */
531 uint32_t sc_brttimeout; /* rt timeout in seconds */
532 uint32_t sc_iflist_ref; /* refcount for sc_iflist */
533 uint32_t sc_iflist_xcnt; /* refcount for sc_iflist */
534 TAILQ_HEAD(, bridge_iflist) sc_iflist; /* member interface list */
535 uint32_t sc_brtexceeded; /* # of cache drops */
536 uint32_t sc_filter_flags; /* ipf and flags */
537 struct ifnet *sc_ifaddr; /* member mac copied from */
538 u_char sc_defaddr[6]; /* Default MAC address */
539 char sc_if_xname[IFNAMSIZ];
540
541 struct bridge_iflist *sc_mac_nat_bif; /* single MAC NAT interface */
542 struct mac_nat_entry_list sc_mne_list; /* MAC NAT IPv4 */
543 struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
544 uint32_t sc_mne_max; /* max # of entries */
545 uint32_t sc_mne_count; /* cur. # of entries */
546 uint32_t sc_mne_allocation_failures;
547#if BRIDGE_LOCK_DEBUG
548 /*
549 * Locking and unlocking calling history
550 */
551 void *lock_lr[BR_LCKDBG_MAX];
552 int next_lock_lr;
553 void *unlock_lr[BR_LCKDBG_MAX];
554 int next_unlock_lr;
555#endif /* BRIDGE_LOCK_DEBUG */
556};
557
558#define SCF_DETACHING 0x01
559#define SCF_RESIZING 0x02
560#define SCF_MEDIA_ACTIVE 0x04
561
562typedef enum {
563 CHECKSUM_OPERATION_NONE = 0,
564 CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
565 CHECKSUM_OPERATION_FINALIZE = 2,
566 CHECKSUM_OPERATION_COMPUTE = 3,
567} ChecksumOperation;
568
569union iphdr {
570 struct ip *ip;
571 struct ip6_hdr *ip6;
572 void * ptr;
573};
574
575typedef struct {
576 u_int ip_hlen; /* IP header length */
577 u_int ip_pay_len; /* length of payload (exclusive of ip_hlen) */
578 u_int ip_opt_len; /* IPv6 options headers length */
579 uint8_t ip_proto; /* IPPROTO_TCP, IPPROTO_UDP, etc. */
580 bool ip_is_ipv4;
581 bool ip_is_fragmented;
582 union iphdr ip_hdr; /* pointer to IP header */
583 void * ip_proto_hdr; /* ptr to protocol header (TCP) */
584} ip_packet_info, *ip_packet_info_t;
585
586struct bridge_hostfilter_stats bridge_hostfilter_stats;
587
588static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
589#if BRIDGE_LOCK_DEBUG
590static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
591#else
592static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
593#endif
594static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
595
596static int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
597
598static KALLOC_TYPE_DEFINE(bridge_rtnode_pool, struct bridge_rtnode, NET_KT_DEFAULT);
599static KALLOC_TYPE_DEFINE(bridge_mne_pool, struct mac_nat_entry, NET_KT_DEFAULT);
600
601static int bridge_clone_create(struct if_clone *, uint32_t, void *);
602static int bridge_clone_destroy(struct ifnet *);
603
604static errno_t bridge_ioctl(struct ifnet *, u_long, void *);
605#if HAS_IF_CAP
606static void bridge_mutecaps(struct bridge_softc *);
607static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
608 int);
609#endif
610static errno_t bridge_set_tso(struct bridge_softc *);
611static void bridge_proto_attach_changed(struct ifnet *);
612static int bridge_init(struct ifnet *);
613#if HAS_BRIDGE_DUMMYNET
614static void bridge_dummynet(struct mbuf *, struct ifnet *);
615#endif
616static void bridge_ifstop(struct ifnet *, int);
617static int bridge_output(struct ifnet *, struct mbuf *);
618static void bridge_finalize_cksum(struct ifnet *, struct mbuf *);
619static void bridge_start(struct ifnet *);
620static errno_t bridge_input(struct ifnet *, mbuf_t *);
621static errno_t bridge_iff_input(void *, ifnet_t, protocol_family_t,
622 mbuf_t *, char **);
623static errno_t bridge_iff_output(void *, ifnet_t, protocol_family_t,
624 mbuf_t *);
625static errno_t bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
626 mbuf_t *m);
627
628static int bridge_enqueue(ifnet_t, struct ifnet *,
629 struct ifnet *, struct mbuf *, ChecksumOperation);
630static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
631
632static void bridge_forward(struct bridge_softc *, struct bridge_iflist *,
633 struct mbuf *);
634
635static void bridge_aging_timer(struct bridge_softc *sc);
636
637static void bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
638 struct mbuf *, int);
639static void bridge_span(struct bridge_softc *, struct mbuf *);
640
641static int bridge_rtupdate(struct bridge_softc *, const uint8_t *,
642 uint16_t, struct bridge_iflist *, int, uint8_t);
643static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
644 uint16_t);
645static void bridge_rttrim(struct bridge_softc *);
646static void bridge_rtage(struct bridge_softc *);
647static void bridge_rtflush(struct bridge_softc *, int);
648static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
649 uint16_t);
650
651static int bridge_rtable_init(struct bridge_softc *);
652static void bridge_rtable_fini(struct bridge_softc *);
653
654static void bridge_rthash_resize(struct bridge_softc *);
655
656static int bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
657static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
658 const uint8_t *, uint16_t);
659static int bridge_rtnode_hash(struct bridge_softc *,
660 struct bridge_rtnode *);
661static int bridge_rtnode_insert(struct bridge_softc *,
662 struct bridge_rtnode *);
663static void bridge_rtnode_destroy(struct bridge_softc *,
664 struct bridge_rtnode *);
665#if BRIDGESTP
666static void bridge_rtable_expire(struct ifnet *, int);
667static void bridge_state_change(struct ifnet *, int);
668#endif /* BRIDGESTP */
669
670static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
671 const char *name);
672static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
673 struct ifnet *ifp);
674static void bridge_delete_member(struct bridge_softc *,
675 struct bridge_iflist *);
676static void bridge_delete_span(struct bridge_softc *,
677 struct bridge_iflist *);
678
679static int bridge_ioctl_add(struct bridge_softc *, void *);
680static int bridge_ioctl_del(struct bridge_softc *, void *);
681static int bridge_ioctl_gifflags(struct bridge_softc *, void *);
682static int bridge_ioctl_sifflags(struct bridge_softc *, void *);
683static int bridge_ioctl_scache(struct bridge_softc *, void *);
684static int bridge_ioctl_gcache(struct bridge_softc *, void *);
685static int bridge_ioctl_gifs32(struct bridge_softc *, void *);
686static int bridge_ioctl_gifs64(struct bridge_softc *, void *);
687static int bridge_ioctl_rts32(struct bridge_softc *, void *);
688static int bridge_ioctl_rts64(struct bridge_softc *, void *);
689static int bridge_ioctl_saddr32(struct bridge_softc *, void *);
690static int bridge_ioctl_saddr64(struct bridge_softc *, void *);
691static int bridge_ioctl_sto(struct bridge_softc *, void *);
692static int bridge_ioctl_gto(struct bridge_softc *, void *);
693static int bridge_ioctl_daddr32(struct bridge_softc *, void *);
694static int bridge_ioctl_daddr64(struct bridge_softc *, void *);
695static int bridge_ioctl_flush(struct bridge_softc *, void *);
696static int bridge_ioctl_gpri(struct bridge_softc *, void *);
697static int bridge_ioctl_spri(struct bridge_softc *, void *);
698static int bridge_ioctl_ght(struct bridge_softc *, void *);
699static int bridge_ioctl_sht(struct bridge_softc *, void *);
700static int bridge_ioctl_gfd(struct bridge_softc *, void *);
701static int bridge_ioctl_sfd(struct bridge_softc *, void *);
702static int bridge_ioctl_gma(struct bridge_softc *, void *);
703static int bridge_ioctl_sma(struct bridge_softc *, void *);
704static int bridge_ioctl_sifprio(struct bridge_softc *, void *);
705static int bridge_ioctl_sifcost(struct bridge_softc *, void *);
706static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
707static int bridge_ioctl_addspan(struct bridge_softc *, void *);
708static int bridge_ioctl_delspan(struct bridge_softc *, void *);
709static int bridge_ioctl_gbparam32(struct bridge_softc *, void *);
710static int bridge_ioctl_gbparam64(struct bridge_softc *, void *);
711static int bridge_ioctl_grte(struct bridge_softc *, void *);
712static int bridge_ioctl_gifsstp32(struct bridge_softc *, void *);
713static int bridge_ioctl_gifsstp64(struct bridge_softc *, void *);
714static int bridge_ioctl_sproto(struct bridge_softc *, void *);
715static int bridge_ioctl_stxhc(struct bridge_softc *, void *);
716static int bridge_ioctl_purge(struct bridge_softc *sc, void *);
717static int bridge_ioctl_gfilt(struct bridge_softc *, void *);
718static int bridge_ioctl_sfilt(struct bridge_softc *, void *);
719static int bridge_ioctl_ghostfilter(struct bridge_softc *, void *);
720static int bridge_ioctl_shostfilter(struct bridge_softc *, void *);
721static int bridge_ioctl_gmnelist32(struct bridge_softc *, void *);
722static int bridge_ioctl_gmnelist64(struct bridge_softc *, void *);
723static int bridge_ioctl_gifstats32(struct bridge_softc *, void *);
724static int bridge_ioctl_gifstats64(struct bridge_softc *, void *);
725
726static int bridge_pf(struct mbuf **, struct ifnet *, uint32_t sc_filter_flags, int input);
727static int bridge_ip_checkbasic(struct mbuf **);
728static int bridge_ip6_checkbasic(struct mbuf **);
729
730static errno_t bridge_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
731static errno_t bridge_bpf_input(ifnet_t, struct mbuf *, const char *, int);
732static errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
733
734static void bridge_detach(ifnet_t);
735static void bridge_link_event(struct ifnet *, u_int32_t);
736static void bridge_iflinkevent(struct ifnet *);
737static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
738static int interface_media_active(struct ifnet *);
739static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
740static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
741static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
742static int bridge_host_filter(struct bridge_iflist *, mbuf_t *);
743
744static errno_t bridge_mac_nat_enable(struct bridge_softc *,
745 struct bridge_iflist *);
746static void bridge_mac_nat_disable(struct bridge_softc *sc);
747static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
748static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
749static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
750 struct bridge_iflist *);
751static ifnet_t bridge_mac_nat_input(struct bridge_softc *, mbuf_t *,
752 boolean_t *);
753static boolean_t bridge_mac_nat_output(struct bridge_softc *,
754 struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
755static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
756 const caddr_t);
757static bool is_broadcast_ip_packet(mbuf_t *);
758static bool in_addr_is_ours(const struct in_addr);
759static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
760
761#define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
762
763static int
764gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
765 boolean_t is_tx);
766
767/* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
768#define VLANTAGOF(_m) 0
769
770u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
771{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
772
773static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
774{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
775
776#if BRIDGESTP
777static struct bstp_cb_ops bridge_ops = {
778 .bcb_state = bridge_state_change,
779 .bcb_rtage = bridge_rtable_expire
780};
781#endif /* BRIDGESTP */
782
783SYSCTL_DECL(_net_link);
784SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
785 "Bridge");
786
787static int bridge_inherit_mac = 0; /* share MAC with first bridge member */
788SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
789 CTLFLAG_RW | CTLFLAG_LOCKED,
790 &bridge_inherit_mac, 0,
791 "Inherit MAC address from the first bridge member");
792
793SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
794 CTLFLAG_RW | CTLFLAG_LOCKED,
795 &bridge_rtable_prune_period, 0,
796 "Interval between pruning of routing table");
797
798static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
799SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
800 CTLFLAG_RW | CTLFLAG_LOCKED,
801 &bridge_rtable_hash_size_max, 0,
802 "Maximum size of the routing hash table");
803
804#if BRIDGE_DELAYED_CALLBACK_DEBUG
805static int bridge_delayed_callback_delay = 0;
806SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
807 CTLFLAG_RW | CTLFLAG_LOCKED,
808 &bridge_delayed_callback_delay, 0,
809 "Delay before calling delayed function");
810#endif
811
812SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
813 hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
814 &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
815
816#if BRIDGESTP
817static int log_stp = 0; /* log STP state changes */
818SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
819 &log_stp, 0, "Log STP state changes");
820#endif /* BRIDGESTP */
821
822struct bridge_control {
823 int (*bc_func)(struct bridge_softc *, void *);
824 unsigned int bc_argsize;
825 unsigned int bc_flags;
826};
827
828#define VMNET_TAG "com.apple.vmnet"
829#define VMNET_LOCAL_TAG VMNET_TAG ".local"
830#define VMNET_BROADCAST_TAG VMNET_TAG ".broadcast"
831#define VMNET_MULTICAST_TAG VMNET_TAG ".multicast"
832
833static u_int16_t vmnet_tag;
834static u_int16_t vmnet_local_tag;
835static u_int16_t vmnet_broadcast_tag;
836static u_int16_t vmnet_multicast_tag;
837
838static u_int16_t
839allocate_pf_tag(char * name)
840{
841 u_int16_t tag;
842
843 tag = pf_tagname2tag_ext(name);
844 BRIDGE_LOG(LOG_NOTICE, 0, "%s %d", name, tag);
845 return tag;
846}
847
848static void
849allocate_vmnet_pf_tags(void)
850{
851 /* allocate tags to use with PF */
852 if (vmnet_tag == 0) {
853 vmnet_tag = allocate_pf_tag(VMNET_TAG);
854 }
855 if (vmnet_local_tag == 0) {
856 vmnet_local_tag = allocate_pf_tag(VMNET_LOCAL_TAG);
857 }
858 if (vmnet_broadcast_tag == 0) {
859 vmnet_broadcast_tag = allocate_pf_tag(VMNET_BROADCAST_TAG);
860 }
861 if (vmnet_multicast_tag == 0) {
862 vmnet_multicast_tag = allocate_pf_tag(VMNET_MULTICAST_TAG);
863 }
864}
865
866#define BC_F_COPYIN 0x01 /* copy arguments in */
867#define BC_F_COPYOUT 0x02 /* copy arguments out */
868#define BC_F_SUSER 0x04 /* do super-user check */
869
870static const struct bridge_control bridge_control_table32[] = {
871 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
872 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
873 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
874 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
875
876 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
877 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
878 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
879 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
880
881 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
882 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
883 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
884 .bc_flags = BC_F_COPYOUT },
885
886 { .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
887 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
888 { .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
889 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
890
891 { .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
892 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
893
894 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
895 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
896 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
897 .bc_flags = BC_F_COPYOUT },
898
899 { .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
900 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
901
902 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
903 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
904
905 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
906 .bc_flags = BC_F_COPYOUT },
907 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
908 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
909
910 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
911 .bc_flags = BC_F_COPYOUT },
912 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
913 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
914
915 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
916 .bc_flags = BC_F_COPYOUT },
917 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
918 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
919
920 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
921 .bc_flags = BC_F_COPYOUT },
922 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
923 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
924
925 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
926 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
927
928 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
929 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
930
931 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
932 .bc_flags = BC_F_COPYOUT },
933 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
934 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
935
936 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
937 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
938
939 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
940 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
941 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
942 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
943
944 { .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
945 .bc_flags = BC_F_COPYOUT },
946
947 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
948 .bc_flags = BC_F_COPYOUT },
949
950 { .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32), /* 30 */
951 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
952
953 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
954 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
955
956 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
957 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
958
959 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
960 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
961
962 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
963 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
964 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
965 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
966
967 { .bc_func = bridge_ioctl_gmnelist32,
968 .bc_argsize = sizeof(struct ifbrmnelist32),
969 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
970 { .bc_func = bridge_ioctl_gifstats32,
971 .bc_argsize = sizeof(struct ifbrmreq32),
972 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
973};
974
975static const struct bridge_control bridge_control_table64[] = {
976 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
977 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
978 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
979 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
980
981 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
982 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
983 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
984 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
985
986 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
987 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
988 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
989 .bc_flags = BC_F_COPYOUT },
990
991 { .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
992 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
993 { .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
994 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
995
996 { .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
997 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
998
999 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
1000 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1001 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
1002 .bc_flags = BC_F_COPYOUT },
1003
1004 { .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1005 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1006
1007 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1008 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1009
1010 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1011 .bc_flags = BC_F_COPYOUT },
1012 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1013 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1014
1015 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1016 .bc_flags = BC_F_COPYOUT },
1017 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1018 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1019
1020 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1021 .bc_flags = BC_F_COPYOUT },
1022 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1023 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1024
1025 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1026 .bc_flags = BC_F_COPYOUT },
1027 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
1028 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1029
1030 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1031 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1032
1033 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1034 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1035
1036 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1037 .bc_flags = BC_F_COPYOUT },
1038 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1039 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1040
1041 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1042 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1043
1044 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1045 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1046 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1047 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1048
1049 { .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1050 .bc_flags = BC_F_COPYOUT },
1051
1052 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1053 .bc_flags = BC_F_COPYOUT },
1054
1055 { .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64), /* 30 */
1056 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1057
1058 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1059 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1060
1061 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1062 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1063
1064 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1065 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1066
1067 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1068 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1069 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1070 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1071
1072 { .bc_func = bridge_ioctl_gmnelist64,
1073 .bc_argsize = sizeof(struct ifbrmnelist64),
1074 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1075 { .bc_func = bridge_ioctl_gifstats64,
1076 .bc_argsize = sizeof(struct ifbrmreq64),
1077 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1078};
1079
1080static const unsigned int bridge_control_table_size =
1081 sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1082
1083static LIST_HEAD(, bridge_softc) bridge_list =
1084 LIST_HEAD_INITIALIZER(bridge_list);
1085
1086#define BRIDGENAME "bridge"
1087#define BRIDGES_MAX IF_MAXUNIT
1088#define BRIDGE_ZONE_MAX_ELEM MIN(IFNETS_MAX, BRIDGES_MAX)
1089
1090static struct if_clone bridge_cloner =
1091 IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1092 0, BRIDGES_MAX);
1093
1094static int if_bridge_txstart = 0;
1095SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1096 &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1097
1098SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1099 &if_bridge_debug, 0, "Bridge debug flags");
1100
1101SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1102 CTLFLAG_RW | CTLFLAG_LOCKED,
1103 &if_bridge_log_level, 0, "Bridge log level");
1104
1105static int if_bridge_segmentation = 1;
1106SYSCTL_INT(_net_link_bridge, OID_AUTO, segmentation,
1107 CTLFLAG_RW | CTLFLAG_LOCKED,
1108 &if_bridge_segmentation, 0, "Bridge interface enable segmentation");
1109
1110static int if_bridge_vmnet_pf_tagging = 1;
1111SYSCTL_INT(_net_link_bridge, OID_AUTO, vmnet_pf_tagging,
1112 CTLFLAG_RW | CTLFLAG_LOCKED,
1113 &if_bridge_segmentation, 0, "Bridge interface enable vmnet PF tagging");
1114
1115#define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX 256
1116#define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT 110
1117#define BRIDGE_TSO_REDUCE_MSS_TX_MAX 256
1118#define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT 0
1119
1120static u_int if_bridge_tso_reduce_mss_forwarding
1121 = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1122static u_int if_bridge_tso_reduce_mss_tx
1123 = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1124
1125static int
1126bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1127{
1128 int changed;
1129 int error;
1130 u_int new_value;
1131
1132 error = sysctl_io_number(req, bigValue: *val, valueSize: sizeof(*val), pValue: &new_value,
1133 changed: &changed);
1134 if (error == 0 && changed != 0) {
1135 if (new_value > val_max) {
1136 return EINVAL;
1137 }
1138 *val = new_value;
1139 }
1140 return error;
1141}
1142
1143static int
1144bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1145{
1146 return bridge_tso_reduce_mss(req, val: &if_bridge_tso_reduce_mss_forwarding,
1147 BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1148}
1149
1150SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1151 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1152 0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1153 "Bridge tso reduce mss when forwarding");
1154
1155static int
1156bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1157{
1158 return bridge_tso_reduce_mss(req, val: &if_bridge_tso_reduce_mss_tx,
1159 BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1160}
1161
1162SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1163 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1164 0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1165 "Bridge tso reduce mss on transmit");
1166
1167#if DEBUG || DEVELOPMENT
1168#define BRIDGE_FORCE_ONE 0x00000001
1169#define BRIDGE_FORCE_TWO 0x00000002
1170static u_int32_t if_bridge_force_errors = 0;
1171SYSCTL_INT(_net_link_bridge, OID_AUTO, force_errors,
1172 CTLFLAG_RW | CTLFLAG_LOCKED,
1173 &if_bridge_force_errors, 0, "Bridge interface force errors");
1174static inline bool
1175bridge_error_is_forced(u_int32_t flags)
1176{
1177 return (if_bridge_force_errors & flags) != 0;
1178}
1179
1180#define BRIDGE_ERROR_GET_FORCED(__is_forced, __flags) \
1181 do { \
1182 __is_forced = bridge_error_is_forced(__flags); \
1183 if (__is_forced) { \
1184 BRIDGE_LOG(LOG_NOTICE, 0, "0x%x forced", __flags); \
1185 } \
1186 } while (0)
1187
1188/*
1189 * net.link.bridge.reduce_tso_mtu
1190 * - when non-zero, the bridge overrides the interface TSO MTU to a lower
1191 * value (i.e. 16K) to enable testing the "use GSO instead" path
1192 */
1193static int if_bridge_reduce_tso_mtu = 0;
1194SYSCTL_INT(_net_link_bridge, OID_AUTO, reduce_tso_mtu,
1195 CTLFLAG_RW | CTLFLAG_LOCKED,
1196 &if_bridge_reduce_tso_mtu, 0, "Bridge interface reduce TSO MTU");
1197
1198#endif /* DEBUG || DEVELOPMENT */
1199
1200static void brlog_ether_header(struct ether_header *);
1201static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1202static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1203static void brlog_mbuf(mbuf_t, const char *, const char *);
1204static void brlog_link(struct bridge_softc * sc);
1205
1206#if BRIDGE_LOCK_DEBUG
1207static void bridge_lock(struct bridge_softc *);
1208static void bridge_unlock(struct bridge_softc *);
1209static int bridge_lock2ref(struct bridge_softc *);
1210static void bridge_unref(struct bridge_softc *);
1211static void bridge_xlock(struct bridge_softc *);
1212static void bridge_xdrop(struct bridge_softc *);
1213
1214static void
1215bridge_lock(struct bridge_softc *sc)
1216{
1217 void *lr_saved = __builtin_return_address(0);
1218
1219 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1220
1221 _BRIDGE_LOCK(sc);
1222
1223 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1224 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1225}
1226
1227static void
1228bridge_unlock(struct bridge_softc *sc)
1229{
1230 void *lr_saved = __builtin_return_address(0);
1231
1232 BRIDGE_LOCK_ASSERT_HELD(sc);
1233
1234 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1235 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1236
1237 _BRIDGE_UNLOCK(sc);
1238}
1239
1240static int
1241bridge_lock2ref(struct bridge_softc *sc)
1242{
1243 int error = 0;
1244 void *lr_saved = __builtin_return_address(0);
1245
1246 BRIDGE_LOCK_ASSERT_HELD(sc);
1247
1248 if (sc->sc_iflist_xcnt > 0) {
1249 error = EBUSY;
1250 } else {
1251 sc->sc_iflist_ref++;
1252 }
1253
1254 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1255 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1256
1257 _BRIDGE_UNLOCK(sc);
1258
1259 return error;
1260}
1261
1262static void
1263bridge_unref(struct bridge_softc *sc)
1264{
1265 void *lr_saved = __builtin_return_address(0);
1266
1267 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1268
1269 _BRIDGE_LOCK(sc);
1270 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1271 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1272
1273 sc->sc_iflist_ref--;
1274
1275 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1276 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1277 if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1278 _BRIDGE_UNLOCK(sc);
1279 wakeup(chan: &sc->sc_cv);
1280 } else {
1281 _BRIDGE_UNLOCK(sc);
1282 }
1283}
1284
1285static void
1286bridge_xlock(struct bridge_softc *sc)
1287{
1288 void *lr_saved = __builtin_return_address(0);
1289
1290 BRIDGE_LOCK_ASSERT_HELD(sc);
1291
1292 sc->sc_iflist_xcnt++;
1293 while (sc->sc_iflist_ref > 0) {
1294 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1295 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1296
1297 msleep(chan: &sc->sc_cv, mtx: &sc->sc_mtx, PZERO, wmesg: "BRIDGE_XLOCK", NULL);
1298
1299 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1300 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1301 }
1302}
1303
1304static void
1305bridge_xdrop(struct bridge_softc *sc)
1306{
1307 BRIDGE_LOCK_ASSERT_HELD(sc);
1308
1309 sc->sc_iflist_xcnt--;
1310}
1311
1312#endif /* BRIDGE_LOCK_DEBUG */
1313
1314static void
1315brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1316{
1317 if (m) {
1318 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1319 "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1320 prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1321 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1322 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1323 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1324 suffix ? suffix : "");
1325 } else {
1326 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1327 }
1328}
1329
1330static void
1331brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1332{
1333 if (m) {
1334 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1335 "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1336 "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1337 prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1338 mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1339 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
1340 (unsigned int)mbuf_maxlen(m),
1341 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1342 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1343 !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1344 if ((mbuf_flags(mbuf: m) & MBUF_PKTHDR)) {
1345 brlog_mbuf_pkthdr(m, prefix: "", suffix);
1346 }
1347 } else {
1348 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1349 }
1350}
1351
1352static void
1353brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1354{
1355 mbuf_t n;
1356 size_t i, j;
1357 size_t pktlen, mlen, maxlen;
1358 unsigned char *ptr;
1359
1360 pktlen = mbuf_pkthdr_len(mbuf: m);
1361
1362 if (offset > pktlen) {
1363 return;
1364 }
1365
1366 maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1367 n = m;
1368 mlen = mbuf_len(mbuf: n);
1369 ptr = mbuf_data(mbuf: n);
1370 for (i = 0, j = 0; i < maxlen; i++, j++) {
1371 if (j >= mlen) {
1372 n = mbuf_next(mbuf: n);
1373 if (n == 0) {
1374 break;
1375 }
1376 ptr = mbuf_data(mbuf: n);
1377 mlen = mbuf_len(mbuf: n);
1378 j = 0;
1379 }
1380 if (i >= offset) {
1381 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1382 "%02x%s", ptr[j], i % 2 ? " " : "");
1383 }
1384 }
1385}
1386
1387static void
1388brlog_ether_header(struct ether_header *eh)
1389{
1390 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1391 "%02x:%02x:%02x:%02x:%02x:%02x > "
1392 "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1393 eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1394 eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1395 eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1396 eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1397 ntohs(eh->ether_type));
1398}
1399
1400static char *
1401ether_ntop(char *buf, size_t len, const u_char *ap)
1402{
1403 snprintf(buf, count: len, "%02x:%02x:%02x:%02x:%02x:%02x",
1404 ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1405
1406 return buf;
1407}
1408
1409static void
1410brlog_link(struct bridge_softc * sc)
1411{
1412 int i;
1413 uint32_t sdl_buffer[(offsetof(struct sockaddr_dl, sdl_data) +
1414 IFNAMSIZ + ETHER_ADDR_LEN)];
1415 struct sockaddr_dl *sdl = SDL((uint8_t*)&sdl_buffer); /* SDL requires byte pointer */
1416 const u_char * lladdr;
1417 char lladdr_str[48];
1418
1419 memset(s: sdl, c: 0, n: sizeof(sdl_buffer));
1420 sdl->sdl_family = AF_LINK;
1421 sdl->sdl_nlen = strlen(s: sc->sc_if_xname);
1422 sdl->sdl_alen = ETHER_ADDR_LEN;
1423 sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1424 memcpy(dst: sdl->sdl_data, src: sc->sc_if_xname, n: sdl->sdl_nlen);
1425 memcpy(LLADDR(sdl), src: sc->sc_defaddr, ETHER_ADDR_LEN);
1426 lladdr_str[0] = '\0';
1427 for (i = 0, lladdr = CONST_LLADDR(sdl);
1428 i < sdl->sdl_alen;
1429 i++, lladdr++) {
1430 char byte_str[4];
1431
1432 snprintf(byte_str, count: sizeof(byte_str), "%s%x", i ? ":" : "",
1433 *lladdr);
1434 strlcat(dst: lladdr_str, src: byte_str, n: sizeof(lladdr_str));
1435 }
1436 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1437 "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1438 " slen %d addr %s", sc->sc_if_xname,
1439 sdl->sdl_len, sdl->sdl_index,
1440 sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1441 sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1442}
1443
1444
1445/*
1446 * bridgeattach:
1447 *
1448 * Pseudo-device attach routine.
1449 */
1450__private_extern__ int
1451bridgeattach(int n)
1452{
1453#pragma unused(n)
1454 int error;
1455
1456 LIST_INIT(&bridge_list);
1457
1458#if BRIDGESTP
1459 bstp_sys_init();
1460#endif /* BRIDGESTP */
1461
1462 error = if_clone_attach(&bridge_cloner);
1463 if (error != 0) {
1464 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1465 }
1466 return error;
1467}
1468
1469
1470static errno_t
1471bridge_ifnet_set_attrs(struct ifnet * ifp)
1472{
1473 errno_t error;
1474
1475 error = ifnet_set_mtu(interface: ifp, ETHERMTU);
1476 if (error != 0) {
1477 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1478 goto done;
1479 }
1480 error = ifnet_set_addrlen(interface: ifp, ETHER_ADDR_LEN);
1481 if (error != 0) {
1482 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1483 goto done;
1484 }
1485 error = ifnet_set_hdrlen(interface: ifp, ETHER_HDR_LEN);
1486 if (error != 0) {
1487 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1488 goto done;
1489 }
1490 error = ifnet_set_flags(interface: ifp,
1491 IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1492 mask: 0xffff);
1493
1494 if (error != 0) {
1495 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1496 goto done;
1497 }
1498done:
1499 return error;
1500}
1501
1502/*
1503 * bridge_clone_create:
1504 *
1505 * Create a new bridge instance.
1506 */
1507static int
1508bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1509{
1510#pragma unused(params)
1511 struct ifnet *ifp = NULL;
1512 struct bridge_softc *sc = NULL;
1513 struct bridge_softc *sc2 = NULL;
1514 struct ifnet_init_eparams init_params;
1515 errno_t error = 0;
1516 uint8_t eth_hostid[ETHER_ADDR_LEN];
1517 int fb, retry, has_hostid;
1518
1519 sc = kalloc_type(struct bridge_softc, Z_WAITOK_ZERO_NOFAIL);
1520 lck_mtx_init(lck: &sc->sc_mtx, grp: &bridge_lock_grp, attr: &bridge_lock_attr);
1521 sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1522 sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1523 sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1524 sc->sc_filter_flags = 0;
1525
1526 TAILQ_INIT(&sc->sc_iflist);
1527
1528 /* use the interface name as the unique id for ifp recycle */
1529 snprintf(sc->sc_if_xname, count: sizeof(sc->sc_if_xname), "%s%d",
1530 ifc->ifc_name, unit);
1531 bzero(s: &init_params, n: sizeof(init_params));
1532 init_params.ver = IFNET_INIT_CURRENT_VERSION;
1533 init_params.len = sizeof(init_params);
1534 /* Initialize our routing table. */
1535 error = bridge_rtable_init(sc);
1536 if (error != 0) {
1537 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1538 goto done;
1539 }
1540 TAILQ_INIT(&sc->sc_spanlist);
1541 if (if_bridge_txstart) {
1542 init_params.start = bridge_start;
1543 } else {
1544 init_params.flags = IFNET_INIT_LEGACY;
1545 init_params.output = bridge_output;
1546 }
1547 init_params.set_bpf_tap = bridge_set_bpf_tap;
1548 init_params.uniqueid = sc->sc_if_xname;
1549 init_params.uniqueid_len = strlen(s: sc->sc_if_xname);
1550 init_params.sndq_maxlen = IFQ_MAXLEN;
1551 init_params.name = ifc->ifc_name;
1552 init_params.unit = unit;
1553 init_params.family = IFNET_FAMILY_ETHERNET;
1554 init_params.type = IFT_BRIDGE;
1555 init_params.demux = ether_demux;
1556 init_params.add_proto = ether_add_proto;
1557 init_params.del_proto = ether_del_proto;
1558 init_params.check_multi = ether_check_multi;
1559 init_params.framer_extended = ether_frameout_extended;
1560 init_params.softc = sc;
1561 init_params.ioctl = bridge_ioctl;
1562 init_params.detach = bridge_detach;
1563 init_params.broadcast_addr = etherbroadcastaddr;
1564 init_params.broadcast_len = ETHER_ADDR_LEN;
1565
1566 error = ifnet_allocate_extended(init: &init_params, interface: &ifp);
1567 if (error != 0) {
1568 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1569 goto done;
1570 }
1571 LIST_INIT(&sc->sc_mne_list);
1572 LIST_INIT(&sc->sc_mne_list_v6);
1573 sc->sc_ifp = ifp;
1574 error = bridge_ifnet_set_attrs(ifp);
1575 if (error != 0) {
1576 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1577 error);
1578 goto done;
1579 }
1580 /*
1581 * Generate an ethernet address with a locally administered address.
1582 *
1583 * Since we are using random ethernet addresses for the bridge, it is
1584 * possible that we might have address collisions, so make sure that
1585 * this hardware address isn't already in use on another bridge.
1586 * The first try uses the "hostid" and falls back to read_frandom();
1587 * for "hostid", we use the MAC address of the first-encountered
1588 * Ethernet-type interface that is currently configured.
1589 */
1590 fb = 0;
1591 has_hostid = (uuid_get_ethernet(&eth_hostid[0]) == 0);
1592 for (retry = 1; retry != 0;) {
1593 if (fb || has_hostid == 0) {
1594 read_frandom(buffer: &sc->sc_defaddr, ETHER_ADDR_LEN);
1595 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1596 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1597 } else {
1598 bcopy(src: &eth_hostid[0], dst: &sc->sc_defaddr,
1599 ETHER_ADDR_LEN);
1600 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1601 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1602 sc->sc_defaddr[3] = /* stir it up a bit */
1603 ((sc->sc_defaddr[3] & 0x0f) << 4) |
1604 ((sc->sc_defaddr[3] & 0xf0) >> 4);
1605 /*
1606 * Mix in the LSB as it's actually pretty significant,
1607 * see rdar://14076061
1608 */
1609 sc->sc_defaddr[4] =
1610 (((sc->sc_defaddr[4] & 0x0f) << 4) |
1611 ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1612 sc->sc_defaddr[5];
1613 sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1614 }
1615
1616 fb = 1;
1617 retry = 0;
1618 lck_mtx_lock(lck: &bridge_list_mtx);
1619 LIST_FOREACH(sc2, &bridge_list, sc_list) {
1620 if (_ether_cmp(a: sc->sc_defaddr,
1621 IF_LLADDR(sc2->sc_ifp)) == 0) {
1622 retry = 1;
1623 }
1624 }
1625 lck_mtx_unlock(lck: &bridge_list_mtx);
1626 }
1627
1628 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1629
1630 if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1631 brlog_link(sc);
1632 }
1633 error = ifnet_attach(interface: ifp, NULL);
1634 if (error != 0) {
1635 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1636 goto done;
1637 }
1638
1639 error = ifnet_set_lladdr_and_type(interface: ifp, lladdr: sc->sc_defaddr, ETHER_ADDR_LEN,
1640 IFT_ETHER);
1641 if (error != 0) {
1642 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1643 error);
1644 goto done;
1645 }
1646
1647 ifnet_set_offload(interface: ifp,
1648 offload: IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1649 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1650 error = bridge_set_tso(sc);
1651 if (error != 0) {
1652 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1653 goto done;
1654 }
1655#if BRIDGESTP
1656 bstp_attach(&sc->sc_stp, &bridge_ops);
1657#endif /* BRIDGESTP */
1658
1659 lck_mtx_lock(lck: &bridge_list_mtx);
1660 LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1661 lck_mtx_unlock(lck: &bridge_list_mtx);
1662
1663 /* attach as ethernet */
1664 error = bpf_attach(interface: ifp, DLT_EN10MB, header_length: sizeof(struct ether_header),
1665 NULL, NULL);
1666
1667done:
1668 if (error != 0) {
1669 BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1670 /* TBD: Clean up: sc, sc_rthash etc */
1671 }
1672
1673 return error;
1674}
1675
1676/*
1677 * bridge_clone_destroy:
1678 *
1679 * Destroy a bridge instance.
1680 */
1681static int
1682bridge_clone_destroy(struct ifnet *ifp)
1683{
1684 struct bridge_softc *sc = ifp->if_softc;
1685 struct bridge_iflist *bif;
1686 errno_t error;
1687
1688 BRIDGE_LOCK(sc);
1689 if ((sc->sc_flags & SCF_DETACHING)) {
1690 BRIDGE_UNLOCK(sc);
1691 return 0;
1692 }
1693 sc->sc_flags |= SCF_DETACHING;
1694
1695 bridge_ifstop(ifp, 1);
1696
1697 bridge_cancel_delayed_call(&sc->sc_resize_call);
1698
1699 bridge_cleanup_delayed_call(&sc->sc_resize_call);
1700 bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1701
1702 error = ifnet_set_flags(interface: ifp, new_flags: 0, IFF_UP);
1703 if (error != 0) {
1704 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1705 }
1706
1707 while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1708 bridge_delete_member(sc, bif);
1709 }
1710
1711 while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1712 bridge_delete_span(sc, bif);
1713 }
1714 BRIDGE_UNLOCK(sc);
1715
1716 error = ifnet_detach(interface: ifp);
1717 if (error != 0) {
1718 panic("%s (%d): ifnet_detach(%p) failed %d",
1719 __func__, __LINE__, ifp, error);
1720 }
1721 return 0;
1722}
1723
1724#define DRVSPEC do { \
1725 if (ifd->ifd_cmd >= bridge_control_table_size) { \
1726 error = EINVAL; \
1727 break; \
1728 } \
1729 bc = &bridge_control_table[ifd->ifd_cmd]; \
1730 \
1731 if (cmd == SIOCGDRVSPEC && \
1732 (bc->bc_flags & BC_F_COPYOUT) == 0) { \
1733 error = EINVAL; \
1734 break; \
1735 } else if (cmd == SIOCSDRVSPEC && \
1736 (bc->bc_flags & BC_F_COPYOUT) != 0) { \
1737 error = EINVAL; \
1738 break; \
1739 } \
1740 \
1741 if (bc->bc_flags & BC_F_SUSER) { \
1742 error = kauth_authorize_generic(kauth_cred_get(), \
1743 KAUTH_GENERIC_ISSUSER); \
1744 if (error) \
1745 break; \
1746 } \
1747 \
1748 if (ifd->ifd_len != bc->bc_argsize || \
1749 ifd->ifd_len > sizeof (args)) { \
1750 error = EINVAL; \
1751 break; \
1752 } \
1753 \
1754 bzero(&args, sizeof (args)); \
1755 if (bc->bc_flags & BC_F_COPYIN) { \
1756 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); \
1757 if (error) \
1758 break; \
1759 } \
1760 \
1761 BRIDGE_LOCK(sc); \
1762 error = (*bc->bc_func)(sc, &args); \
1763 BRIDGE_UNLOCK(sc); \
1764 if (error) \
1765 break; \
1766 \
1767 if (bc->bc_flags & BC_F_COPYOUT) \
1768 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); \
1769} while (0)
1770
1771static boolean_t
1772interface_needs_input_broadcast(struct ifnet * ifp)
1773{
1774 /*
1775 * Selectively enable input broadcast only when necessary.
1776 * The bridge interface itself attaches a fake protocol
1777 * so checking for at least two protocols means that the
1778 * interface is being used for something besides bridging
1779 * and needs to see broadcast packets from other members.
1780 */
1781 return if_get_protolist(ifp, NULL, count: 0) >= 2;
1782}
1783
1784static boolean_t
1785bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1786{
1787 boolean_t old_input_broadcast;
1788
1789 old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1790 if (input_broadcast) {
1791 bif->bif_flags |= BIFF_INPUT_BROADCAST;
1792 } else {
1793 bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1794 }
1795 return old_input_broadcast != input_broadcast;
1796}
1797
1798/*
1799 * bridge_ioctl:
1800 *
1801 * Handle a control request from the operator.
1802 */
1803static errno_t
1804bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1805{
1806 struct bridge_softc *sc = ifp->if_softc;
1807 struct ifreq *ifr = (struct ifreq *)data;
1808 struct bridge_iflist *bif;
1809 int error = 0;
1810
1811 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1812
1813 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1814 "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1815 ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1816 (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1817 (char)IOCGROUP(cmd), cmd & 0xff);
1818
1819 switch (cmd) {
1820 case SIOCSIFADDR:
1821 case SIOCAIFADDR:
1822 ifnet_set_flags(interface: ifp, IFF_UP, IFF_UP);
1823 break;
1824
1825 case SIOCGIFMEDIA32:
1826 case SIOCGIFMEDIA64: {
1827 struct ifmediareq *ifmr = (struct ifmediareq *)data;
1828 user_addr_t user_addr;
1829
1830 user_addr = (cmd == SIOCGIFMEDIA64) ?
1831 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
1832 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
1833
1834 ifmr->ifm_status = IFM_AVALID;
1835 ifmr->ifm_mask = 0;
1836 ifmr->ifm_count = 1;
1837
1838 BRIDGE_LOCK(sc);
1839 if (!(sc->sc_flags & SCF_DETACHING) &&
1840 (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1841 ifmr->ifm_status |= IFM_ACTIVE;
1842 ifmr->ifm_active = ifmr->ifm_current =
1843 IFM_ETHER | IFM_AUTO;
1844 } else {
1845 ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1846 }
1847 BRIDGE_UNLOCK(sc);
1848
1849 if (user_addr != USER_ADDR_NULL) {
1850 error = copyout(&ifmr->ifm_current, user_addr,
1851 sizeof(int));
1852 }
1853 break;
1854 }
1855
1856 case SIOCADDMULTI:
1857 case SIOCDELMULTI:
1858 break;
1859
1860 case SIOCSDRVSPEC32:
1861 case SIOCGDRVSPEC32: {
1862 union {
1863 struct ifbreq ifbreq;
1864 struct ifbifconf32 ifbifconf;
1865 struct ifbareq32 ifbareq;
1866 struct ifbaconf32 ifbaconf;
1867 struct ifbrparam ifbrparam;
1868 struct ifbropreq32 ifbropreq;
1869 } args;
1870 struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1871 const struct bridge_control *bridge_control_table =
1872 bridge_control_table32, *bc;
1873
1874 DRVSPEC;
1875
1876 break;
1877 }
1878 case SIOCSDRVSPEC64:
1879 case SIOCGDRVSPEC64: {
1880 union {
1881 struct ifbreq ifbreq;
1882 struct ifbifconf64 ifbifconf;
1883 struct ifbareq64 ifbareq;
1884 struct ifbaconf64 ifbaconf;
1885 struct ifbrparam ifbrparam;
1886 struct ifbropreq64 ifbropreq;
1887 } args;
1888 struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1889 const struct bridge_control *bridge_control_table =
1890 bridge_control_table64, *bc;
1891
1892 DRVSPEC;
1893
1894 break;
1895 }
1896
1897 case SIOCSIFFLAGS:
1898 if (!(ifp->if_flags & IFF_UP) &&
1899 (ifp->if_flags & IFF_RUNNING)) {
1900 /*
1901 * If interface is marked down and it is running,
1902 * then stop and disable it.
1903 */
1904 BRIDGE_LOCK(sc);
1905 bridge_ifstop(ifp, 1);
1906 BRIDGE_UNLOCK(sc);
1907 } else if ((ifp->if_flags & IFF_UP) &&
1908 !(ifp->if_flags & IFF_RUNNING)) {
1909 /*
1910 * If interface is marked up and it is stopped, then
1911 * start it.
1912 */
1913 BRIDGE_LOCK(sc);
1914 error = bridge_init(ifp);
1915 BRIDGE_UNLOCK(sc);
1916 }
1917 break;
1918
1919 case SIOCSIFLLADDR:
1920 error = ifnet_set_lladdr(interface: ifp, lladdr: ifr->ifr_addr.sa_data,
1921 lladdr_len: ifr->ifr_addr.sa_len);
1922 if (error != 0) {
1923 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1924 "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1925 error);
1926 }
1927 break;
1928
1929 case SIOCSIFMTU:
1930 if (ifr->ifr_mtu < 576) {
1931 error = EINVAL;
1932 break;
1933 }
1934 BRIDGE_LOCK(sc);
1935 if (TAILQ_EMPTY(&sc->sc_iflist)) {
1936 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1937 BRIDGE_UNLOCK(sc);
1938 break;
1939 }
1940 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1941 if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1942 BRIDGE_LOG(LOG_NOTICE, 0,
1943 "%s invalid MTU: %u(%s) != %d",
1944 sc->sc_ifp->if_xname,
1945 bif->bif_ifp->if_mtu,
1946 bif->bif_ifp->if_xname, ifr->ifr_mtu);
1947 error = EINVAL;
1948 break;
1949 }
1950 }
1951 if (!error) {
1952 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1953 }
1954 BRIDGE_UNLOCK(sc);
1955 break;
1956
1957 default:
1958 error = ether_ioctl(interface: ifp, command: cmd, data);
1959 if (error != 0 && error != EOPNOTSUPP) {
1960 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1961 "ifp %s cmd 0x%08lx "
1962 "(%c%c [%lu] %c %lu) failed error: %d",
1963 ifp->if_xname, cmd,
1964 (cmd & IOC_IN) ? 'I' : ' ',
1965 (cmd & IOC_OUT) ? 'O' : ' ',
1966 IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
1967 cmd & 0xff, error);
1968 }
1969 break;
1970 }
1971 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1972
1973 return error;
1974}
1975
1976#if HAS_IF_CAP
1977/*
1978 * bridge_mutecaps:
1979 *
1980 * Clear or restore unwanted capabilities on the member interface
1981 */
1982static void
1983bridge_mutecaps(struct bridge_softc *sc)
1984{
1985 struct bridge_iflist *bif;
1986 int enabled, mask;
1987
1988 /* Initial bitmask of capabilities to test */
1989 mask = BRIDGE_IFCAPS_MASK;
1990
1991 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1992 /* Every member must support it or its disabled */
1993 mask &= bif->bif_savedcaps;
1994 }
1995
1996 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1997 enabled = bif->bif_ifp->if_capenable;
1998 enabled &= ~BRIDGE_IFCAPS_STRIP;
1999 /* strip off mask bits and enable them again if allowed */
2000 enabled &= ~BRIDGE_IFCAPS_MASK;
2001 enabled |= mask;
2002
2003 bridge_set_ifcap(sc, bif, enabled);
2004 }
2005}
2006
2007static void
2008bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
2009{
2010 struct ifnet *ifp = bif->bif_ifp;
2011 struct ifreq ifr;
2012 int error;
2013
2014 bzero(&ifr, sizeof(ifr));
2015 ifr.ifr_reqcap = set;
2016
2017 if (ifp->if_capenable != set) {
2018 IFF_LOCKGIANT(ifp);
2019 error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2020 IFF_UNLOCKGIANT(ifp);
2021 if (error) {
2022 BRIDGE_LOG(LOG_NOTICE, 0,
2023 "%s error setting interface capabilities on %s",
2024 sc->sc_ifp->if_xname, ifp->if_xname);
2025 }
2026 }
2027}
2028#endif /* HAS_IF_CAP */
2029
2030static errno_t
2031siocsifcap(struct ifnet * ifp, uint32_t cap_enable)
2032{
2033 struct ifreq ifr;
2034
2035 bzero(s: &ifr, n: sizeof(ifr));
2036 ifr.ifr_reqcap = cap_enable;
2037 return ifnet_ioctl(interface: ifp, protocol: 0, SIOCSIFCAP, ioctl_arg: &ifr);
2038}
2039
2040static const char *
2041enable_disable_str(boolean_t enable)
2042{
2043 return enable ? "enable" : "disable";
2044}
2045
2046static boolean_t
2047bridge_set_lro(struct ifnet * ifp, boolean_t enable)
2048{
2049 uint32_t cap_enable;
2050 uint32_t cap_supported;
2051 boolean_t changed = FALSE;
2052 boolean_t lro_enabled;
2053
2054 cap_supported = ifnet_capabilities_supported(interface: ifp);
2055 if ((cap_supported & IFCAP_LRO) == 0) {
2056 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2057 "%s doesn't support LRO",
2058 ifp->if_xname);
2059 goto done;
2060 }
2061 cap_enable = ifnet_capabilities_enabled(interface: ifp);
2062 lro_enabled = (cap_enable & IFCAP_LRO) != 0;
2063 if (lro_enabled != enable) {
2064 errno_t error;
2065
2066 if (enable) {
2067 cap_enable |= IFCAP_LRO;
2068 } else {
2069 cap_enable &= ~IFCAP_LRO;
2070 }
2071 error = siocsifcap(ifp, cap_enable);
2072 if (error != 0) {
2073 BRIDGE_LOG(LOG_NOTICE, 0,
2074 "%s %s failed (cap 0x%x) %d",
2075 ifp->if_xname,
2076 enable_disable_str(enable),
2077 cap_enable,
2078 error);
2079 } else {
2080 changed = TRUE;
2081 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2082 "%s %s success (cap 0x%x)",
2083 ifp->if_xname,
2084 enable_disable_str(enable),
2085 cap_enable);
2086 }
2087 }
2088done:
2089 return changed;
2090}
2091
2092static errno_t
2093bridge_set_tso(struct bridge_softc *sc)
2094{
2095 struct bridge_iflist *bif;
2096 u_int32_t tso_v4_mtu;
2097 u_int32_t tso_v6_mtu;
2098 ifnet_offload_t offload;
2099 errno_t error = 0;
2100
2101 /* By default, support TSO */
2102 offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2103 tso_v4_mtu = IP_MAXPACKET;
2104 tso_v6_mtu = IP_MAXPACKET;
2105
2106 /* Use the lowest common denominator of the members */
2107 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2108 ifnet_t ifp = bif->bif_ifp;
2109
2110 if (ifp == NULL) {
2111 continue;
2112 }
2113
2114 if (offload & IFNET_TSO_IPV4) {
2115 if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2116 if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2117 tso_v4_mtu = ifp->if_tso_v4_mtu;
2118 }
2119 } else {
2120 offload &= ~IFNET_TSO_IPV4;
2121 tso_v4_mtu = 0;
2122 }
2123 }
2124 if (offload & IFNET_TSO_IPV6) {
2125 if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2126 if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2127 tso_v6_mtu = ifp->if_tso_v6_mtu;
2128 }
2129 } else {
2130 offload &= ~IFNET_TSO_IPV6;
2131 tso_v6_mtu = 0;
2132 }
2133 }
2134 }
2135
2136 if (offload != sc->sc_ifp->if_hwassist) {
2137 error = ifnet_set_offload(interface: sc->sc_ifp, offload);
2138 if (error != 0) {
2139 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2140 "ifnet_set_offload(%s, 0x%x) failed %d",
2141 sc->sc_ifp->if_xname, offload, error);
2142 goto done;
2143 }
2144 /*
2145 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2146 * as large as the interface MTU
2147 */
2148 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2149 if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2150 tso_v4_mtu = sc->sc_ifp->if_mtu;
2151 }
2152 error = ifnet_set_tso_mtu(interface: sc->sc_ifp, AF_INET,
2153 mtuLen: tso_v4_mtu);
2154 if (error != 0) {
2155 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2156 "ifnet_set_tso_mtu(%s, "
2157 "AF_INET, %u) failed %d",
2158 sc->sc_ifp->if_xname,
2159 tso_v4_mtu, error);
2160 goto done;
2161 }
2162 }
2163 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2164 if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2165 tso_v6_mtu = sc->sc_ifp->if_mtu;
2166 }
2167 error = ifnet_set_tso_mtu(interface: sc->sc_ifp, AF_INET6,
2168 mtuLen: tso_v6_mtu);
2169 if (error != 0) {
2170 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2171 "ifnet_set_tso_mtu(%s, "
2172 "AF_INET6, %u) failed %d",
2173 sc->sc_ifp->if_xname,
2174 tso_v6_mtu, error);
2175 goto done;
2176 }
2177 }
2178 }
2179done:
2180 return error;
2181}
2182
2183/*
2184 * bridge_lookup_member:
2185 *
2186 * Lookup a bridge member interface.
2187 */
2188static struct bridge_iflist *
2189bridge_lookup_member(struct bridge_softc *sc, const char *name)
2190{
2191 struct bridge_iflist *bif;
2192 struct ifnet *ifp;
2193
2194 BRIDGE_LOCK_ASSERT_HELD(sc);
2195
2196 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2197 ifp = bif->bif_ifp;
2198 if (strcmp(s1: ifp->if_xname, s2: name) == 0) {
2199 return bif;
2200 }
2201 }
2202
2203 return NULL;
2204}
2205
2206/*
2207 * bridge_lookup_member_if:
2208 *
2209 * Lookup a bridge member interface by ifnet*.
2210 */
2211static struct bridge_iflist *
2212bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2213{
2214 struct bridge_iflist *bif;
2215
2216 BRIDGE_LOCK_ASSERT_HELD(sc);
2217
2218 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2219 if (bif->bif_ifp == member_ifp) {
2220 return bif;
2221 }
2222 }
2223
2224 return NULL;
2225}
2226
2227static errno_t
2228bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2229 mbuf_t *data, char **frame_ptr)
2230{
2231#pragma unused(protocol)
2232 errno_t error = 0;
2233 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2234 struct bridge_softc *sc = bif->bif_sc;
2235 int included = 0;
2236 size_t frmlen = 0;
2237 mbuf_t m = *data;
2238
2239 if ((m->m_flags & M_PROTO1)) {
2240 goto out;
2241 }
2242
2243 if (*frame_ptr >= (char *)mbuf_datastart(mbuf: m) &&
2244 *frame_ptr <= (char *)mbuf_data(mbuf: m)) {
2245 included = 1;
2246 frmlen = (char *)mbuf_data(mbuf: m) - *frame_ptr;
2247 }
2248 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2249 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2250 "frmlen %lu", sc->sc_ifp->if_xname,
2251 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2252 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
2253 (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2254 included ? "inside" : "outside", frmlen);
2255 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2256 brlog_mbuf(m, prefix: "bridge_iff_input[", suffix: "");
2257 brlog_ether_header(eh: (struct ether_header *)
2258 (void *)*frame_ptr);
2259 brlog_mbuf_data(m, offset: 0, len: 20);
2260 }
2261 if (included == 0) {
2262 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2263 goto out;
2264 }
2265
2266 /* Move data pointer to start of frame to the link layer header */
2267 (void) mbuf_setdata(mbuf: m, data: (char *)mbuf_data(mbuf: m) - frmlen,
2268 len: mbuf_len(mbuf: m) + frmlen);
2269 (void) mbuf_pkthdr_adjustlen(mbuf: m, amount: frmlen);
2270
2271 /* make sure we can access the ethernet header */
2272 if (mbuf_pkthdr_len(mbuf: m) < sizeof(struct ether_header)) {
2273 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2274 "short frame %lu < %lu",
2275 mbuf_pkthdr_len(m), sizeof(struct ether_header));
2276 goto out;
2277 }
2278 if (mbuf_len(mbuf: m) < sizeof(struct ether_header)) {
2279 error = mbuf_pullup(mbuf: data, len: sizeof(struct ether_header));
2280 if (error != 0) {
2281 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2282 "mbuf_pullup(%lu) failed %d",
2283 sizeof(struct ether_header),
2284 error);
2285 error = EJUSTRETURN;
2286 goto out;
2287 }
2288 if (m != *data) {
2289 m = *data;
2290 *frame_ptr = mbuf_data(mbuf: m);
2291 }
2292 }
2293
2294 error = bridge_input(ifp, data);
2295
2296 /* Adjust packet back to original */
2297 if (error == 0) {
2298 /* bridge_input might have modified *data */
2299 if (*data != m) {
2300 m = *data;
2301 *frame_ptr = mbuf_data(mbuf: m);
2302 }
2303 (void) mbuf_setdata(mbuf: m, data: (char *)mbuf_data(mbuf: m) + frmlen,
2304 len: mbuf_len(mbuf: m) - frmlen);
2305 (void) mbuf_pkthdr_adjustlen(mbuf: m, amount: -frmlen);
2306 }
2307
2308 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2309 BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2310 brlog_mbuf(m, prefix: "bridge_iff_input]", suffix: "");
2311 }
2312
2313out:
2314 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2315
2316 return error;
2317}
2318
2319static errno_t
2320bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2321 mbuf_t *data)
2322{
2323#pragma unused(protocol)
2324 errno_t error = 0;
2325 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2326 struct bridge_softc *sc = bif->bif_sc;
2327 mbuf_t m = *data;
2328
2329 if ((m->m_flags & M_PROTO1)) {
2330 goto out;
2331 }
2332 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2333 "%s from %s m 0x%llx data 0x%llx",
2334 sc->sc_ifp->if_xname, ifp->if_xname,
2335 (uint64_t)VM_KERNEL_ADDRPERM(m),
2336 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
2337
2338 error = bridge_member_output(sc, ifp, m: data);
2339 if (error != 0 && error != EJUSTRETURN) {
2340 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2341 "bridge_member_output failed error %d",
2342 error);
2343 }
2344out:
2345 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2346
2347 return error;
2348}
2349
2350static void
2351bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2352 const struct kev_msg *event_msg)
2353{
2354#pragma unused(protocol)
2355 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2356 struct bridge_softc *sc = bif->bif_sc;
2357
2358 if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2359 event_msg->kev_class == KEV_NETWORK_CLASS &&
2360 event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2361 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2362 "%s event_code %u - %s",
2363 ifp->if_xname, event_msg->event_code,
2364 dlil_kev_dl_code_str(event_msg->event_code));
2365
2366 switch (event_msg->event_code) {
2367 case KEV_DL_LINK_OFF:
2368 case KEV_DL_LINK_ON: {
2369 bridge_iflinkevent(ifp);
2370#if BRIDGESTP
2371 bstp_linkstate(ifp, event_msg->event_code);
2372#endif /* BRIDGESTP */
2373 break;
2374 }
2375 case KEV_DL_SIFFLAGS: {
2376 if ((ifp->if_flags & IFF_UP) == 0) {
2377 break;
2378 }
2379 if ((bif->bif_flags & BIFF_PROMISC) == 0) {
2380 errno_t error;
2381
2382 error = ifnet_set_promiscuous(interface: ifp, on: 1);
2383 if (error != 0) {
2384 BRIDGE_LOG(LOG_NOTICE, 0,
2385 "ifnet_set_promiscuous (%s)"
2386 " failed %d", ifp->if_xname,
2387 error);
2388 } else {
2389 bif->bif_flags |= BIFF_PROMISC;
2390 }
2391 }
2392 if ((bif->bif_flags & BIFF_WIFI_INFRA) != 0 &&
2393 (bif->bif_flags & BIFF_ALL_MULTI) == 0) {
2394 errno_t error;
2395
2396 error = if_allmulti(ifp, 1);
2397 if (error != 0) {
2398 BRIDGE_LOG(LOG_NOTICE, 0,
2399 "if_allmulti (%s)"
2400 " failed %d", ifp->if_xname,
2401 error);
2402 } else {
2403 bif->bif_flags |= BIFF_ALL_MULTI;
2404#ifdef XNU_PLATFORM_AppleTVOS
2405 ip6_forwarding = 1;
2406#endif /* XNU_PLATFORM_AppleTVOS */
2407 }
2408 }
2409 break;
2410 }
2411 case KEV_DL_IFCAP_CHANGED: {
2412 BRIDGE_LOCK(sc);
2413 bridge_set_tso(sc);
2414 BRIDGE_UNLOCK(sc);
2415 break;
2416 }
2417 case KEV_DL_PROTO_DETACHED:
2418 case KEV_DL_PROTO_ATTACHED: {
2419 bridge_proto_attach_changed(ifp);
2420 break;
2421 }
2422 default:
2423 break;
2424 }
2425 }
2426}
2427
2428/*
2429 * bridge_iff_detached:
2430 *
2431 * Called when our interface filter has been detached from a
2432 * member interface.
2433 */
2434static void
2435bridge_iff_detached(void *cookie, ifnet_t ifp)
2436{
2437#pragma unused(cookie)
2438 struct bridge_iflist *bif;
2439 struct bridge_softc *sc = ifp->if_bridge;
2440
2441 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2442
2443 /* Check if the interface is a bridge member */
2444 if (sc != NULL) {
2445 BRIDGE_LOCK(sc);
2446 bif = bridge_lookup_member_if(sc, member_ifp: ifp);
2447 if (bif != NULL) {
2448 bridge_delete_member(sc, bif);
2449 }
2450 BRIDGE_UNLOCK(sc);
2451 return;
2452 }
2453 /* Check if the interface is a span port */
2454 lck_mtx_lock(lck: &bridge_list_mtx);
2455 LIST_FOREACH(sc, &bridge_list, sc_list) {
2456 BRIDGE_LOCK(sc);
2457 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2458 if (ifp == bif->bif_ifp) {
2459 bridge_delete_span(sc, bif);
2460 break;
2461 }
2462 BRIDGE_UNLOCK(sc);
2463 }
2464 lck_mtx_unlock(lck: &bridge_list_mtx);
2465}
2466
2467static errno_t
2468bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2469 char *header)
2470{
2471#pragma unused(protocol, packet, header)
2472 BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2473 ifp->if_xname);
2474 return 0;
2475}
2476
2477static int
2478bridge_attach_protocol(struct ifnet *ifp)
2479{
2480 int error;
2481 struct ifnet_attach_proto_param reg;
2482
2483 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2484 bzero(s: &reg, n: sizeof(reg));
2485 reg.input = bridge_proto_input;
2486
2487 error = ifnet_attach_protocol(interface: ifp, PF_BRIDGE, proto_details: &reg);
2488 if (error) {
2489 BRIDGE_LOG(LOG_NOTICE, 0,
2490 "ifnet_attach_protocol(%s) failed, %d",
2491 ifp->if_xname, error);
2492 }
2493
2494 return error;
2495}
2496
2497static int
2498bridge_detach_protocol(struct ifnet *ifp)
2499{
2500 int error;
2501
2502 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2503 error = ifnet_detach_protocol(interface: ifp, PF_BRIDGE);
2504 if (error) {
2505 BRIDGE_LOG(LOG_NOTICE, 0,
2506 "ifnet_detach_protocol(%s) failed, %d",
2507 ifp->if_xname, error);
2508 }
2509
2510 return error;
2511}
2512
2513/*
2514 * bridge_delete_member:
2515 *
2516 * Delete the specified member interface.
2517 */
2518static void
2519bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2520{
2521#if SKYWALK
2522 boolean_t add_netagent = FALSE;
2523#endif /* SKYWALK */
2524 uint32_t bif_flags;
2525 struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2526 int lladdr_changed = 0, error;
2527 uint8_t eaddr[ETHER_ADDR_LEN];
2528 u_int32_t event_code = 0;
2529
2530 BRIDGE_LOCK_ASSERT_HELD(sc);
2531 VERIFY(ifs != NULL);
2532
2533 /*
2534 * Remove the member from the list first so it cannot be found anymore
2535 * when we release the bridge lock below
2536 */
2537 if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2538 BRIDGE_XLOCK(sc);
2539 TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2540 BRIDGE_XDROP(sc);
2541 }
2542 if (sc->sc_mac_nat_bif != NULL) {
2543 if (bif == sc->sc_mac_nat_bif) {
2544 bridge_mac_nat_disable(sc);
2545 } else {
2546 bridge_mac_nat_flush_entries(sc, bif);
2547 }
2548 }
2549#if BRIDGESTP
2550 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2551 bstp_disable(&bif->bif_stp);
2552 }
2553#endif /* BRIDGESTP */
2554
2555 /*
2556 * If removing the interface that gave the bridge its mac address, set
2557 * the mac address of the bridge to the address of the next member, or
2558 * to its default address if no members are left.
2559 */
2560 if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2561 ifnet_release(interface: sc->sc_ifaddr);
2562 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2563 bcopy(src: sc->sc_defaddr, dst: eaddr, ETHER_ADDR_LEN);
2564 sc->sc_ifaddr = NULL;
2565 } else {
2566 struct ifnet *fif =
2567 TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2568 bcopy(IF_LLADDR(fif), dst: eaddr, ETHER_ADDR_LEN);
2569 sc->sc_ifaddr = fif;
2570 ifnet_reference(interface: fif); /* for sc_ifaddr */
2571 }
2572 lladdr_changed = 1;
2573 }
2574
2575#if HAS_IF_CAP
2576 bridge_mutecaps(sc); /* recalculate now this interface is removed */
2577#endif /* HAS_IF_CAP */
2578
2579 error = bridge_set_tso(sc);
2580 if (error != 0) {
2581 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2582 }
2583
2584 bridge_rtdelete(sc, ifp: ifs, IFBF_FLUSHALL);
2585
2586 KASSERT(bif->bif_addrcnt == 0,
2587 ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2588
2589 /*
2590 * Update link status of the bridge based on its remaining members
2591 */
2592 event_code = bridge_updatelinkstatus(sc);
2593 bif_flags = bif->bif_flags;
2594 BRIDGE_UNLOCK(sc);
2595
2596 /* only perform these steps if the interface is still attached */
2597 if (ifnet_is_attached(ifs, refio: 1)) {
2598#if SKYWALK
2599 add_netagent = (bif_flags & BIFF_NETAGENT_REMOVED) != 0;
2600
2601 if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2602 ifnet_detach_flowswitch_nexus(ifp: ifs);
2603 }
2604#endif /* SKYWALK */
2605 /* disable promiscuous mode */
2606 if ((bif_flags & BIFF_PROMISC) != 0) {
2607 (void) ifnet_set_promiscuous(interface: ifs, on: 0);
2608 }
2609 /* disable all multi */
2610 if ((bif_flags & BIFF_ALL_MULTI) != 0) {
2611 (void)if_allmulti(ifs, 0);
2612 }
2613#if HAS_IF_CAP
2614 /* re-enable any interface capabilities */
2615 bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2616#endif
2617 /* detach bridge "protocol" */
2618 if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2619 (void)bridge_detach_protocol(ifp: ifs);
2620 }
2621 /* detach interface filter */
2622 if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2623 iflt_detach(filter_ref: bif->bif_iff_ref);
2624 }
2625 /* re-enable LRO */
2626 if ((bif_flags & BIFF_LRO_DISABLED) != 0) {
2627 (void)bridge_set_lro(ifp: ifs, TRUE);
2628 }
2629 ifnet_decr_iorefcnt(ifs);
2630 }
2631
2632 if (lladdr_changed &&
2633 (error = ifnet_set_lladdr(interface: bifp, lladdr: eaddr, ETHER_ADDR_LEN)) != 0) {
2634 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2635 }
2636
2637 if (event_code != 0) {
2638 bridge_link_event(bifp, event_code);
2639 }
2640
2641#if BRIDGESTP
2642 bstp_destroy(&bif->bif_stp); /* prepare to free */
2643#endif /* BRIDGESTP */
2644
2645 kfree_type(struct bridge_iflist, bif);
2646 ifs->if_bridge = NULL;
2647#if SKYWALK
2648 if (add_netagent && ifnet_is_attached(ifs, refio: 1)) {
2649 (void)ifnet_add_netagent(ifp: ifs);
2650 ifnet_decr_iorefcnt(ifs);
2651 }
2652#endif /* SKYWALK */
2653
2654 ifnet_release(interface: ifs);
2655
2656 BRIDGE_LOCK(sc);
2657}
2658
2659/*
2660 * bridge_delete_span:
2661 *
2662 * Delete the specified span interface.
2663 */
2664static void
2665bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2666{
2667 BRIDGE_LOCK_ASSERT_HELD(sc);
2668
2669 KASSERT(bif->bif_ifp->if_bridge == NULL,
2670 ("%s: not a span interface", __func__));
2671
2672 ifnet_release(interface: bif->bif_ifp);
2673
2674 TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2675 kfree_type(struct bridge_iflist, bif);
2676}
2677
2678static int
2679bridge_ioctl_add(struct bridge_softc *sc, void *arg)
2680{
2681 struct ifbreq *req = arg;
2682 struct bridge_iflist *bif = NULL;
2683 struct ifnet *ifs, *bifp = sc->sc_ifp;
2684 int error = 0, lladdr_changed = 0;
2685 uint8_t eaddr[ETHER_ADDR_LEN];
2686 struct iff_filter iff;
2687 u_int32_t event_code = 0;
2688 boolean_t input_broadcast;
2689 int media_active;
2690 boolean_t wifi_infra = FALSE;
2691
2692 ifs = ifunit(req->ifbr_ifsname);
2693 if (ifs == NULL) {
2694 return ENOENT;
2695 }
2696 if (ifs->if_ioctl == NULL) { /* must be supported */
2697 return EINVAL;
2698 }
2699
2700 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
2701 return EINVAL;
2702 }
2703
2704 /* If it's in the span list, it can't be a member. */
2705 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2706 if (ifs == bif->bif_ifp) {
2707 return EBUSY;
2708 }
2709 }
2710
2711 if (ifs->if_bridge == sc) {
2712 return EEXIST;
2713 }
2714
2715 if (ifs->if_bridge != NULL) {
2716 return EBUSY;
2717 }
2718
2719 switch (ifs->if_type) {
2720 case IFT_ETHER:
2721 if (strcmp(s1: ifs->if_name, s2: "en") == 0 &&
2722 ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2723 (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2724 /* XXX is there a better way to identify Wi-Fi STA? */
2725 wifi_infra = TRUE;
2726 }
2727 break;
2728 case IFT_L2VLAN:
2729 case IFT_IEEE8023ADLAG:
2730 break;
2731 case IFT_GIF:
2732 /* currently not supported */
2733 /* FALLTHRU */
2734 default:
2735 return EINVAL;
2736 }
2737
2738 /* fail to add the interface if the MTU doesn't match */
2739 if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2740 BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2741 sc->sc_ifp->if_xname,
2742 ifs->if_xname);
2743 return EINVAL;
2744 }
2745
2746 /* there's already an interface that's doing MAC NAT */
2747 if (wifi_infra && sc->sc_mac_nat_bif != NULL) {
2748 return EBUSY;
2749 }
2750
2751 /* prevent the interface from detaching while we add the member */
2752 if (!ifnet_is_attached(ifs, refio: 1)) {
2753 return ENXIO;
2754 }
2755
2756 /* allocate a new member */
2757 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2758 bif->bif_ifp = ifs;
2759 ifnet_reference(interface: ifs);
2760 bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2761#if HAS_IF_CAP
2762 bif->bif_savedcaps = ifs->if_capenable;
2763#endif /* HAS_IF_CAP */
2764 bif->bif_sc = sc;
2765 if (wifi_infra) {
2766 (void)bridge_mac_nat_enable(sc, bif);
2767 }
2768
2769 if (IFNET_IS_VMNET(ifs)) {
2770 allocate_vmnet_pf_tags();
2771 }
2772 /* Allow the first Ethernet member to define the MTU */
2773 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2774 sc->sc_ifp->if_mtu = ifs->if_mtu;
2775 }
2776
2777 /*
2778 * Assign the interface's MAC address to the bridge if it's the first
2779 * member and the MAC address of the bridge has not been changed from
2780 * the default (randomly) generated one.
2781 */
2782 if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2783 _ether_cmp(IF_LLADDR(sc->sc_ifp), b: sc->sc_defaddr) == 0) {
2784 bcopy(IF_LLADDR(ifs), dst: eaddr, ETHER_ADDR_LEN);
2785 sc->sc_ifaddr = ifs;
2786 ifnet_reference(interface: ifs); /* for sc_ifaddr */
2787 lladdr_changed = 1;
2788 }
2789
2790 ifs->if_bridge = sc;
2791#if BRIDGESTP
2792 bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2793#endif /* BRIDGESTP */
2794
2795#if HAS_IF_CAP
2796 /* Set interface capabilities to the intersection set of all members */
2797 bridge_mutecaps(sc);
2798#endif /* HAS_IF_CAP */
2799
2800
2801 /*
2802 * Respect lock ordering with DLIL lock for the following operations
2803 */
2804 BRIDGE_UNLOCK(sc);
2805
2806 /* enable promiscuous mode */
2807 error = ifnet_set_promiscuous(interface: ifs, on: 1);
2808 switch (error) {
2809 case 0:
2810 bif->bif_flags |= BIFF_PROMISC;
2811 break;
2812 case ENETDOWN:
2813 case EPWROFF:
2814 BRIDGE_LOG(LOG_NOTICE, 0,
2815 "ifnet_set_promiscuous(%s) failed %d, ignoring",
2816 ifs->if_xname, error);
2817 /* Ignore error when device is not up */
2818 error = 0;
2819 break;
2820 default:
2821 BRIDGE_LOG(LOG_NOTICE, 0,
2822 "ifnet_set_promiscuous(%s) failed %d",
2823 ifs->if_xname, error);
2824 BRIDGE_LOCK(sc);
2825 goto out;
2826 }
2827 if (wifi_infra) {
2828 int this_error;
2829
2830 /* Wi-Fi doesn't really support promiscuous, set allmulti */
2831 bif->bif_flags |= BIFF_WIFI_INFRA;
2832 this_error = if_allmulti(ifs, 1);
2833 if (this_error == 0) {
2834 bif->bif_flags |= BIFF_ALL_MULTI;
2835#ifdef XNU_PLATFORM_AppleTVOS
2836 ip6_forwarding = 1;
2837#endif /* XNU_PLATFORM_AppleTVOS */
2838 } else {
2839 BRIDGE_LOG(LOG_NOTICE, 0,
2840 "if_allmulti(%s) failed %d, ignoring",
2841 ifs->if_xname, this_error);
2842 }
2843 }
2844#if SKYWALK
2845 /* ensure that the flowswitch is present for native interface */
2846 if (SKYWALK_NATIVE(ifs)) {
2847 if (ifnet_attach_flowswitch_nexus(ifp: ifs)) {
2848 bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2849 }
2850 }
2851 /* remove the netagent on the flowswitch (rdar://75050182) */
2852 if (if_is_fsw_netagent_enabled()) {
2853 (void)ifnet_remove_netagent(ifp: ifs);
2854 bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2855 }
2856#endif /* SKYWALK */
2857
2858 /*
2859 * install an interface filter
2860 */
2861 memset(s: &iff, c: 0, n: sizeof(struct iff_filter));
2862 iff.iff_cookie = bif;
2863 iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2864 iff.iff_input = bridge_iff_input;
2865 iff.iff_output = bridge_iff_output;
2866 iff.iff_event = bridge_iff_event;
2867 iff.iff_detached = bridge_iff_detached;
2868 error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2869 DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
2870 if (error != 0) {
2871 BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2872 BRIDGE_LOCK(sc);
2873 goto out;
2874 }
2875 bif->bif_flags |= BIFF_FILTER_ATTACHED;
2876
2877 /*
2878 * install a dummy "bridge" protocol
2879 */
2880 if ((error = bridge_attach_protocol(ifp: ifs)) != 0) {
2881 if (error != 0) {
2882 BRIDGE_LOG(LOG_NOTICE, 0,
2883 "bridge_attach_protocol failed %d", error);
2884 BRIDGE_LOCK(sc);
2885 goto out;
2886 }
2887 }
2888 bif->bif_flags |= BIFF_PROTO_ATTACHED;
2889
2890 if (lladdr_changed &&
2891 (error = ifnet_set_lladdr(interface: bifp, lladdr: eaddr, ETHER_ADDR_LEN)) != 0) {
2892 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2893 }
2894
2895 media_active = interface_media_active(ifs);
2896
2897 /* disable LRO */
2898 if (bridge_set_lro(ifp: ifs, FALSE)) {
2899 bif->bif_flags |= BIFF_LRO_DISABLED;
2900 }
2901
2902 /*
2903 * No failures past this point. Add the member to the list.
2904 */
2905 BRIDGE_LOCK(sc);
2906 bif->bif_flags |= BIFF_IN_MEMBER_LIST;
2907 BRIDGE_XLOCK(sc);
2908 TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2909 BRIDGE_XDROP(sc);
2910
2911 /* cache the member link status */
2912 if (media_active != 0) {
2913 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2914 } else {
2915 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2916 }
2917
2918 /* the new member may change the link status of the bridge interface */
2919 event_code = bridge_updatelinkstatus(sc);
2920
2921 /* check whether we need input broadcast or not */
2922 input_broadcast = interface_needs_input_broadcast(ifp: ifs);
2923 bif_set_input_broadcast(bif, input_broadcast);
2924 BRIDGE_UNLOCK(sc);
2925
2926 if (event_code != 0) {
2927 bridge_link_event(bifp, event_code);
2928 }
2929 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2930 "%s input broadcast %s", ifs->if_xname,
2931 input_broadcast ? "ENABLED" : "DISABLED");
2932
2933 BRIDGE_LOCK(sc);
2934 bridge_set_tso(sc);
2935
2936out:
2937 /* allow the interface to detach */
2938 ifnet_decr_iorefcnt(ifs);
2939
2940 if (error != 0) {
2941 if (bif != NULL) {
2942 bridge_delete_member(sc, bif);
2943 }
2944 } else if (IFNET_IS_VMNET(ifs)) {
2945 INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
2946 }
2947
2948 return error;
2949}
2950
2951static int
2952bridge_ioctl_del(struct bridge_softc *sc, void *arg)
2953{
2954 struct ifbreq *req = arg;
2955 struct bridge_iflist *bif;
2956
2957 bif = bridge_lookup_member(sc, name: req->ifbr_ifsname);
2958 if (bif == NULL) {
2959 return ENOENT;
2960 }
2961
2962 bridge_delete_member(sc, bif);
2963
2964 return 0;
2965}
2966
2967static int
2968bridge_ioctl_purge(struct bridge_softc *sc, void *arg)
2969{
2970#pragma unused(sc, arg)
2971 return 0;
2972}
2973
2974static int
2975bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
2976{
2977 struct ifbreq *req = arg;
2978 struct bridge_iflist *bif;
2979
2980 bif = bridge_lookup_member(sc, name: req->ifbr_ifsname);
2981 if (bif == NULL) {
2982 return ENOENT;
2983 }
2984
2985 struct bstp_port *bp;
2986
2987 bp = &bif->bif_stp;
2988 req->ifbr_state = bp->bp_state;
2989 req->ifbr_priority = bp->bp_priority;
2990 req->ifbr_path_cost = bp->bp_path_cost;
2991 req->ifbr_proto = bp->bp_protover;
2992 req->ifbr_role = bp->bp_role;
2993 req->ifbr_stpflags = bp->bp_flags;
2994 req->ifbr_ifsflags = bif->bif_ifflags;
2995
2996 /* Copy STP state options as flags */
2997 if (bp->bp_operedge) {
2998 req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
2999 }
3000 if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
3001 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
3002 }
3003 if (bp->bp_ptp_link) {
3004 req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
3005 }
3006 if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
3007 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
3008 }
3009 if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
3010 req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
3011 }
3012 if (bp->bp_flags & BSTP_PORT_ADMCOST) {
3013 req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
3014 }
3015
3016 req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
3017 req->ifbr_addrcnt = bif->bif_addrcnt;
3018 req->ifbr_addrmax = bif->bif_addrmax;
3019 req->ifbr_addrexceeded = bif->bif_addrexceeded;
3020
3021 return 0;
3022}
3023
3024static int
3025bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
3026{
3027 struct ifbreq *req = arg;
3028 struct bridge_iflist *bif;
3029#if BRIDGESTP
3030 struct bstp_port *bp;
3031 int error;
3032#endif /* BRIDGESTP */
3033
3034 bif = bridge_lookup_member(sc, name: req->ifbr_ifsname);
3035 if (bif == NULL) {
3036 return ENOENT;
3037 }
3038
3039 if (req->ifbr_ifsflags & IFBIF_SPAN) {
3040 /* SPAN is readonly */
3041 return EINVAL;
3042 }
3043#define _EXCLUSIVE_FLAGS (IFBIF_CHECKSUM_OFFLOAD | IFBIF_MAC_NAT)
3044 if ((req->ifbr_ifsflags & _EXCLUSIVE_FLAGS) == _EXCLUSIVE_FLAGS) {
3045 /* can't specify both MAC-NAT and checksum offload */
3046 return EINVAL;
3047 }
3048 if ((req->ifbr_ifsflags & IFBIF_MAC_NAT) != 0) {
3049 errno_t error;
3050
3051 error = bridge_mac_nat_enable(sc, bif);
3052 if (error != 0) {
3053 return error;
3054 }
3055 } else if (sc->sc_mac_nat_bif == bif) {
3056 bridge_mac_nat_disable(sc);
3057 }
3058
3059
3060#if BRIDGESTP
3061 if (req->ifbr_ifsflags & IFBIF_STP) {
3062 if ((bif->bif_ifflags & IFBIF_STP) == 0) {
3063 error = bstp_enable(&bif->bif_stp);
3064 if (error) {
3065 return error;
3066 }
3067 }
3068 } else {
3069 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
3070 bstp_disable(&bif->bif_stp);
3071 }
3072 }
3073
3074 /* Pass on STP flags */
3075 bp = &bif->bif_stp;
3076 bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
3077 bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
3078 bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
3079 bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
3080#else /* !BRIDGESTP */
3081 if (req->ifbr_ifsflags & IFBIF_STP) {
3082 return EOPNOTSUPP;
3083 }
3084#endif /* !BRIDGESTP */
3085
3086 /* Save the bits relating to the bridge */
3087 bif->bif_ifflags = req->ifbr_ifsflags & IFBIFMASK;
3088
3089
3090 return 0;
3091}
3092
3093static int
3094bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
3095{
3096 struct ifbrparam *param = arg;
3097
3098 sc->sc_brtmax = param->ifbrp_csize;
3099 bridge_rttrim(sc);
3100 return 0;
3101}
3102
3103static int
3104bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
3105{
3106 struct ifbrparam *param = arg;
3107
3108 param->ifbrp_csize = sc->sc_brtmax;
3109
3110 return 0;
3111}
3112
3113#define BRIDGE_IOCTL_GIFS do { \
3114 struct bridge_iflist *bif; \
3115 struct ifbreq breq; \
3116 char *buf, *outbuf; \
3117 unsigned int count, buflen, len; \
3118 \
3119 count = 0; \
3120 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) \
3121 count++; \
3122 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) \
3123 count++; \
3124 \
3125 buflen = sizeof (breq) * count; \
3126 if (bifc->ifbic_len == 0) { \
3127 bifc->ifbic_len = buflen; \
3128 return (0); \
3129 } \
3130 BRIDGE_UNLOCK(sc); \
3131 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3132 BRIDGE_LOCK(sc); \
3133 \
3134 count = 0; \
3135 buf = outbuf; \
3136 len = min(bifc->ifbic_len, buflen); \
3137 bzero(&breq, sizeof (breq)); \
3138 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3139 if (len < sizeof (breq)) \
3140 break; \
3141 \
3142 snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3143 "%s", bif->bif_ifp->if_xname); \
3144 /* Fill in the ifbreq structure */ \
3145 error = bridge_ioctl_gifflags(sc, &breq); \
3146 if (error) \
3147 break; \
3148 memcpy(buf, &breq, sizeof (breq)); \
3149 count++; \
3150 buf += sizeof (breq); \
3151 len -= sizeof (breq); \
3152 } \
3153 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) { \
3154 if (len < sizeof (breq)) \
3155 break; \
3156 \
3157 snprintf(breq.ifbr_ifsname, \
3158 sizeof (breq.ifbr_ifsname), \
3159 "%s", bif->bif_ifp->if_xname); \
3160 breq.ifbr_ifsflags = bif->bif_ifflags; \
3161 breq.ifbr_portno \
3162 = bif->bif_ifp->if_index & 0xfff; \
3163 memcpy(buf, &breq, sizeof (breq)); \
3164 count++; \
3165 buf += sizeof (breq); \
3166 len -= sizeof (breq); \
3167 } \
3168 \
3169 BRIDGE_UNLOCK(sc); \
3170 bifc->ifbic_len = sizeof (breq) * count; \
3171 error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len); \
3172 BRIDGE_LOCK(sc); \
3173 kfree_data(outbuf, buflen); \
3174} while (0)
3175
3176static int
3177bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
3178{
3179 struct ifbifconf64 *bifc = arg;
3180 int error = 0;
3181
3182 BRIDGE_IOCTL_GIFS;
3183
3184 return error;
3185}
3186
3187static int
3188bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
3189{
3190 struct ifbifconf32 *bifc = arg;
3191 int error = 0;
3192
3193 BRIDGE_IOCTL_GIFS;
3194
3195 return error;
3196}
3197
3198#define BRIDGE_IOCTL_RTS do { \
3199 struct bridge_rtnode *brt; \
3200 char *buf; \
3201 char *outbuf = NULL; \
3202 unsigned int count, buflen, len; \
3203 unsigned long now; \
3204 \
3205 if (bac->ifbac_len == 0) \
3206 return (0); \
3207 \
3208 bzero(&bareq, sizeof (bareq)); \
3209 count = 0; \
3210 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) \
3211 count++; \
3212 buflen = sizeof (bareq) * count; \
3213 \
3214 BRIDGE_UNLOCK(sc); \
3215 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3216 BRIDGE_LOCK(sc); \
3217 \
3218 count = 0; \
3219 buf = outbuf; \
3220 len = min(bac->ifbac_len, buflen); \
3221 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \
3222 if (len < sizeof (bareq)) \
3223 goto out; \
3224 snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname), \
3225 "%s", brt->brt_ifp->if_xname); \
3226 memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3227 bareq.ifba_vlan = brt->brt_vlan; \
3228 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \
3229 now = (unsigned long) net_uptime(); \
3230 if (now < brt->brt_expire) \
3231 bareq.ifba_expire = \
3232 brt->brt_expire - now; \
3233 } else \
3234 bareq.ifba_expire = 0; \
3235 bareq.ifba_flags = brt->brt_flags; \
3236 \
3237 memcpy(buf, &bareq, sizeof (bareq)); \
3238 count++; \
3239 buf += sizeof (bareq); \
3240 len -= sizeof (bareq); \
3241 } \
3242out: \
3243 bac->ifbac_len = sizeof (bareq) * count; \
3244 if (outbuf != NULL) { \
3245 BRIDGE_UNLOCK(sc); \
3246 error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len); \
3247 kfree_data(outbuf, buflen); \
3248 BRIDGE_LOCK(sc); \
3249 } \
3250 return (error); \
3251} while (0)
3252
3253static int
3254bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
3255{
3256 struct ifbaconf64 *bac = arg;
3257 struct ifbareq64 bareq;
3258 int error = 0;
3259
3260 BRIDGE_IOCTL_RTS;
3261 return error;
3262}
3263
3264static int
3265bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
3266{
3267 struct ifbaconf32 *bac = arg;
3268 struct ifbareq32 bareq;
3269 int error = 0;
3270
3271 BRIDGE_IOCTL_RTS;
3272 return error;
3273}
3274
3275static int
3276bridge_ioctl_saddr32(struct bridge_softc *sc, void *arg)
3277{
3278 struct ifbareq32 *req = arg;
3279 struct bridge_iflist *bif;
3280 int error;
3281
3282 bif = bridge_lookup_member(sc, name: req->ifba_ifsname);
3283 if (bif == NULL) {
3284 return ENOENT;
3285 }
3286
3287 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3288 req->ifba_flags);
3289
3290 return error;
3291}
3292
3293static int
3294bridge_ioctl_saddr64(struct bridge_softc *sc, void *arg)
3295{
3296 struct ifbareq64 *req = arg;
3297 struct bridge_iflist *bif;
3298 int error;
3299
3300 bif = bridge_lookup_member(sc, name: req->ifba_ifsname);
3301 if (bif == NULL) {
3302 return ENOENT;
3303 }
3304
3305 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3306 req->ifba_flags);
3307
3308 return error;
3309}
3310
3311static int
3312bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
3313{
3314 struct ifbrparam *param = arg;
3315
3316 sc->sc_brttimeout = param->ifbrp_ctime;
3317 return 0;
3318}
3319
3320static int
3321bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
3322{
3323 struct ifbrparam *param = arg;
3324
3325 param->ifbrp_ctime = sc->sc_brttimeout;
3326 return 0;
3327}
3328
3329static int
3330bridge_ioctl_daddr32(struct bridge_softc *sc, void *arg)
3331{
3332 struct ifbareq32 *req = arg;
3333
3334 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3335}
3336
3337static int
3338bridge_ioctl_daddr64(struct bridge_softc *sc, void *arg)
3339{
3340 struct ifbareq64 *req = arg;
3341
3342 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3343}
3344
3345static int
3346bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
3347{
3348 struct ifbreq *req = arg;
3349
3350 bridge_rtflush(sc, req->ifbr_ifsflags);
3351 return 0;
3352}
3353
3354static int
3355bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
3356{
3357 struct ifbrparam *param = arg;
3358 struct bstp_state *bs = &sc->sc_stp;
3359
3360 param->ifbrp_prio = bs->bs_bridge_priority;
3361 return 0;
3362}
3363
3364static int
3365bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
3366{
3367#if BRIDGESTP
3368 struct ifbrparam *param = arg;
3369
3370 return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3371#else /* !BRIDGESTP */
3372#pragma unused(sc, arg)
3373 return EOPNOTSUPP;
3374#endif /* !BRIDGESTP */
3375}
3376
3377static int
3378bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
3379{
3380 struct ifbrparam *param = arg;
3381 struct bstp_state *bs = &sc->sc_stp;
3382
3383 param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3384 return 0;
3385}
3386
3387static int
3388bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
3389{
3390#if BRIDGESTP
3391 struct ifbrparam *param = arg;
3392
3393 return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3394#else /* !BRIDGESTP */
3395#pragma unused(sc, arg)
3396 return EOPNOTSUPP;
3397#endif /* !BRIDGESTP */
3398}
3399
3400static int
3401bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
3402{
3403 struct ifbrparam *param;
3404 struct bstp_state *bs;
3405
3406 param = arg;
3407 bs = &sc->sc_stp;
3408 param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3409 return 0;
3410}
3411
3412static int
3413bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
3414{
3415#if BRIDGESTP
3416 struct ifbrparam *param = arg;
3417
3418 return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3419#else /* !BRIDGESTP */
3420#pragma unused(sc, arg)
3421 return EOPNOTSUPP;
3422#endif /* !BRIDGESTP */
3423}
3424
3425static int
3426bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
3427{
3428 struct ifbrparam *param;
3429 struct bstp_state *bs;
3430
3431 param = arg;
3432 bs = &sc->sc_stp;
3433 param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3434 return 0;
3435}
3436
3437static int
3438bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
3439{
3440#if BRIDGESTP
3441 struct ifbrparam *param = arg;
3442
3443 return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3444#else /* !BRIDGESTP */
3445#pragma unused(sc, arg)
3446 return EOPNOTSUPP;
3447#endif /* !BRIDGESTP */
3448}
3449
3450static int
3451bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
3452{
3453#if BRIDGESTP
3454 struct ifbreq *req = arg;
3455 struct bridge_iflist *bif;
3456
3457 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3458 if (bif == NULL) {
3459 return ENOENT;
3460 }
3461
3462 return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3463#else /* !BRIDGESTP */
3464#pragma unused(sc, arg)
3465 return EOPNOTSUPP;
3466#endif /* !BRIDGESTP */
3467}
3468
3469static int
3470bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
3471{
3472#if BRIDGESTP
3473 struct ifbreq *req = arg;
3474 struct bridge_iflist *bif;
3475
3476 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3477 if (bif == NULL) {
3478 return ENOENT;
3479 }
3480
3481 return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3482#else /* !BRIDGESTP */
3483#pragma unused(sc, arg)
3484 return EOPNOTSUPP;
3485#endif /* !BRIDGESTP */
3486}
3487
3488static int
3489bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
3490{
3491 struct ifbrparam *param = arg;
3492
3493 param->ifbrp_filter = sc->sc_filter_flags;
3494
3495 return 0;
3496}
3497
3498static int
3499bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
3500{
3501 struct ifbrparam *param = arg;
3502
3503 if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3504 return EINVAL;
3505 }
3506
3507 if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3508 return EINVAL;
3509 }
3510
3511 sc->sc_filter_flags = param->ifbrp_filter;
3512
3513 return 0;
3514}
3515
3516static int
3517bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
3518{
3519 struct ifbreq *req = arg;
3520 struct bridge_iflist *bif;
3521
3522 bif = bridge_lookup_member(sc, name: req->ifbr_ifsname);
3523 if (bif == NULL) {
3524 return ENOENT;
3525 }
3526
3527 bif->bif_addrmax = req->ifbr_addrmax;
3528 return 0;
3529}
3530
3531static int
3532bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
3533{
3534 struct ifbreq *req = arg;
3535 struct bridge_iflist *bif = NULL;
3536 struct ifnet *ifs;
3537
3538 ifs = ifunit(req->ifbr_ifsname);
3539 if (ifs == NULL) {
3540 return ENOENT;
3541 }
3542
3543 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
3544 return EINVAL;
3545 }
3546
3547 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3548 if (ifs == bif->bif_ifp) {
3549 return EBUSY;
3550 }
3551
3552 if (ifs->if_bridge != NULL) {
3553 return EBUSY;
3554 }
3555
3556 switch (ifs->if_type) {
3557 case IFT_ETHER:
3558 case IFT_L2VLAN:
3559 case IFT_IEEE8023ADLAG:
3560 break;
3561 case IFT_GIF:
3562 /* currently not supported */
3563 /* FALLTHRU */
3564 default:
3565 return EINVAL;
3566 }
3567
3568 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3569
3570 bif->bif_ifp = ifs;
3571 bif->bif_ifflags = IFBIF_SPAN;
3572
3573 ifnet_reference(interface: bif->bif_ifp);
3574
3575 TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3576
3577 return 0;
3578}
3579
3580static int
3581bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
3582{
3583 struct ifbreq *req = arg;
3584 struct bridge_iflist *bif;
3585 struct ifnet *ifs;
3586
3587 ifs = ifunit(req->ifbr_ifsname);
3588 if (ifs == NULL) {
3589 return ENOENT;
3590 }
3591
3592 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3593 if (ifs == bif->bif_ifp) {
3594 break;
3595 }
3596
3597 if (bif == NULL) {
3598 return ENOENT;
3599 }
3600
3601 bridge_delete_span(sc, bif);
3602
3603 return 0;
3604}
3605
3606#define BRIDGE_IOCTL_GBPARAM do { \
3607 struct bstp_state *bs = &sc->sc_stp; \
3608 struct bstp_port *root_port; \
3609 \
3610 req->ifbop_maxage = bs->bs_bridge_max_age >> 8; \
3611 req->ifbop_hellotime = bs->bs_bridge_htime >> 8; \
3612 req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; \
3613 \
3614 root_port = bs->bs_root_port; \
3615 if (root_port == NULL) \
3616 req->ifbop_root_port = 0; \
3617 else \
3618 req->ifbop_root_port = root_port->bp_ifp->if_index; \
3619 \
3620 req->ifbop_holdcount = bs->bs_txholdcount; \
3621 req->ifbop_priority = bs->bs_bridge_priority; \
3622 req->ifbop_protocol = bs->bs_protover; \
3623 req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; \
3624 req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; \
3625 req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; \
3626 req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; \
3627 req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; \
3628 req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; \
3629} while (0)
3630
3631static int
3632bridge_ioctl_gbparam32(struct bridge_softc *sc, void *arg)
3633{
3634 struct ifbropreq32 *req = arg;
3635
3636 BRIDGE_IOCTL_GBPARAM;
3637 return 0;
3638}
3639
3640static int
3641bridge_ioctl_gbparam64(struct bridge_softc *sc, void *arg)
3642{
3643 struct ifbropreq64 *req = arg;
3644
3645 BRIDGE_IOCTL_GBPARAM;
3646 return 0;
3647}
3648
3649static int
3650bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
3651{
3652 struct ifbrparam *param = arg;
3653
3654 param->ifbrp_cexceeded = sc->sc_brtexceeded;
3655 return 0;
3656}
3657
3658#define BRIDGE_IOCTL_GIFSSTP do { \
3659 struct bridge_iflist *bif; \
3660 struct bstp_port *bp; \
3661 struct ifbpstpreq bpreq; \
3662 char *buf, *outbuf; \
3663 unsigned int count, buflen, len; \
3664 \
3665 count = 0; \
3666 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3667 if ((bif->bif_ifflags & IFBIF_STP) != 0) \
3668 count++; \
3669 } \
3670 \
3671 buflen = sizeof (bpreq) * count; \
3672 if (bifstp->ifbpstp_len == 0) { \
3673 bifstp->ifbpstp_len = buflen; \
3674 return (0); \
3675 } \
3676 \
3677 BRIDGE_UNLOCK(sc); \
3678 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3679 BRIDGE_LOCK(sc); \
3680 \
3681 count = 0; \
3682 buf = outbuf; \
3683 len = min(bifstp->ifbpstp_len, buflen); \
3684 bzero(&bpreq, sizeof (bpreq)); \
3685 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3686 if (len < sizeof (bpreq)) \
3687 break; \
3688 \
3689 if ((bif->bif_ifflags & IFBIF_STP) == 0) \
3690 continue; \
3691 \
3692 bp = &bif->bif_stp; \
3693 bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; \
3694 bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; \
3695 bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; \
3696 bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; \
3697 bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3698 bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; \
3699 \
3700 memcpy(buf, &bpreq, sizeof (bpreq)); \
3701 count++; \
3702 buf += sizeof (bpreq); \
3703 len -= sizeof (bpreq); \
3704 } \
3705 \
3706 BRIDGE_UNLOCK(sc); \
3707 bifstp->ifbpstp_len = sizeof (bpreq) * count; \
3708 error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); \
3709 BRIDGE_LOCK(sc); \
3710 kfree_data(outbuf, buflen); \
3711 return (error); \
3712} while (0)
3713
3714static int
3715bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *arg)
3716{
3717 struct ifbpstpconf32 *bifstp = arg;
3718 int error = 0;
3719
3720 BRIDGE_IOCTL_GIFSSTP;
3721 return error;
3722}
3723
3724static int
3725bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *arg)
3726{
3727 struct ifbpstpconf64 *bifstp = arg;
3728 int error = 0;
3729
3730 BRIDGE_IOCTL_GIFSSTP;
3731 return error;
3732}
3733
3734static int
3735bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
3736{
3737#if BRIDGESTP
3738 struct ifbrparam *param = arg;
3739
3740 return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3741#else /* !BRIDGESTP */
3742#pragma unused(sc, arg)
3743 return EOPNOTSUPP;
3744#endif /* !BRIDGESTP */
3745}
3746
3747static int
3748bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
3749{
3750#if BRIDGESTP
3751 struct ifbrparam *param = arg;
3752
3753 return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3754#else /* !BRIDGESTP */
3755#pragma unused(sc, arg)
3756 return EOPNOTSUPP;
3757#endif /* !BRIDGESTP */
3758}
3759
3760
3761static int
3762bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *arg)
3763{
3764 struct ifbrhostfilter *req = arg;
3765 struct bridge_iflist *bif;
3766
3767 bif = bridge_lookup_member(sc, name: req->ifbrhf_ifsname);
3768 if (bif == NULL) {
3769 return ENOENT;
3770 }
3771
3772 bzero(s: req, n: sizeof(struct ifbrhostfilter));
3773 if (bif->bif_flags & BIFF_HOST_FILTER) {
3774 req->ifbrhf_flags |= IFBRHF_ENABLED;
3775 bcopy(src: bif->bif_hf_hwsrc, dst: req->ifbrhf_hwsrca,
3776 ETHER_ADDR_LEN);
3777 req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3778 }
3779 return 0;
3780}
3781
3782static int
3783bridge_ioctl_shostfilter(struct bridge_softc *sc, void *arg)
3784{
3785 struct ifbrhostfilter *req = arg;
3786 struct bridge_iflist *bif;
3787
3788 bif = bridge_lookup_member(sc, name: req->ifbrhf_ifsname);
3789 if (bif == NULL) {
3790 return ENOENT;
3791 }
3792
3793 if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3794 bif->bif_flags |= BIFF_HOST_FILTER;
3795
3796 if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3797 bcopy(src: req->ifbrhf_hwsrca, dst: bif->bif_hf_hwsrc,
3798 ETHER_ADDR_LEN);
3799 if (bcmp(s1: req->ifbrhf_hwsrca, s2: ethernulladdr,
3800 ETHER_ADDR_LEN) != 0) {
3801 bif->bif_flags |= BIFF_HF_HWSRC;
3802 } else {
3803 bif->bif_flags &= ~BIFF_HF_HWSRC;
3804 }
3805 }
3806 if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3807 bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3808 if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3809 bif->bif_flags |= BIFF_HF_IPSRC;
3810 } else {
3811 bif->bif_flags &= ~BIFF_HF_IPSRC;
3812 }
3813 }
3814 } else {
3815 bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3816 BIFF_HF_IPSRC);
3817 bzero(s: bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3818 bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3819 }
3820
3821 return 0;
3822}
3823
3824static char *
3825bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3826 unsigned int * count_p, char *buf, unsigned int *len_p)
3827{
3828 unsigned int count = *count_p;
3829 struct ifbrmne ifbmne;
3830 unsigned int len = *len_p;
3831 struct mac_nat_entry *mne;
3832 unsigned long now;
3833
3834 bzero(s: &ifbmne, n: sizeof(ifbmne));
3835 LIST_FOREACH(mne, list, mne_list) {
3836 if (len < sizeof(ifbmne)) {
3837 break;
3838 }
3839 snprintf(ifbmne.ifbmne_ifname, count: sizeof(ifbmne.ifbmne_ifname),
3840 "%s", mne->mne_bif->bif_ifp->if_xname);
3841 memcpy(dst: ifbmne.ifbmne_mac, src: mne->mne_mac,
3842 n: sizeof(ifbmne.ifbmne_mac));
3843 now = (unsigned long) net_uptime();
3844 if (now < mne->mne_expire) {
3845 ifbmne.ifbmne_expire = mne->mne_expire - now;
3846 } else {
3847 ifbmne.ifbmne_expire = 0;
3848 }
3849 if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3850 ifbmne.ifbmne_af = AF_INET6;
3851 ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3852 } else {
3853 ifbmne.ifbmne_af = AF_INET;
3854 ifbmne.ifbmne_ip_addr = mne->mne_ip;
3855 }
3856 memcpy(dst: buf, src: &ifbmne, n: sizeof(ifbmne));
3857 count++;
3858 buf += sizeof(ifbmne);
3859 len -= sizeof(ifbmne);
3860 }
3861 *count_p = count;
3862 *len_p = len;
3863 return buf;
3864}
3865
3866/*
3867 * bridge_ioctl_gmnelist()
3868 * Perform the get mac_nat_entry list ioctl.
3869 *
3870 * Note:
3871 * The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3872 * field size/layout except for the last field ifbml_buf, the user-supplied
3873 * buffer pointer. That is passed in separately via the 'user_addr'
3874 * parameter from the respective 32-bit or 64-bit ioctl routine.
3875 */
3876static int
3877bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3878 user_addr_t user_addr)
3879{
3880 unsigned int count;
3881 char *buf;
3882 int error = 0;
3883 char *outbuf = NULL;
3884 struct mac_nat_entry *mne;
3885 unsigned int buflen;
3886 unsigned int len;
3887
3888 mnl->ifbml_elsize = sizeof(struct ifbrmne);
3889 count = 0;
3890 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3891 count++;
3892 }
3893 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3894 count++;
3895 }
3896 buflen = sizeof(struct ifbrmne) * count;
3897 if (buflen == 0 || mnl->ifbml_len == 0) {
3898 mnl->ifbml_len = buflen;
3899 return error;
3900 }
3901 BRIDGE_UNLOCK(sc);
3902 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3903 BRIDGE_LOCK(sc);
3904 count = 0;
3905 buf = outbuf;
3906 len = min(a: mnl->ifbml_len, b: buflen);
3907 buf = bridge_mac_nat_entry_out(list: &sc->sc_mne_list, count_p: &count, buf, len_p: &len);
3908 buf = bridge_mac_nat_entry_out(list: &sc->sc_mne_list_v6, count_p: &count, buf, len_p: &len);
3909 mnl->ifbml_len = count * sizeof(struct ifbrmne);
3910 BRIDGE_UNLOCK(sc);
3911 error = copyout(outbuf, user_addr, mnl->ifbml_len);
3912 kfree_data(outbuf, buflen);
3913 BRIDGE_LOCK(sc);
3914 return error;
3915}
3916
3917static int
3918bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *arg)
3919{
3920 struct ifbrmnelist64 *mnl = arg;
3921
3922 return bridge_ioctl_gmnelist(sc, mnl: arg, user_addr: mnl->ifbml_buf);
3923}
3924
3925static int
3926bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *arg)
3927{
3928 struct ifbrmnelist32 *mnl = arg;
3929
3930 return bridge_ioctl_gmnelist(sc, mnl: arg,
3931 CAST_USER_ADDR_T(mnl->ifbml_buf));
3932}
3933
3934/*
3935 * bridge_ioctl_gifstats()
3936 * Return per-member stats.
3937 *
3938 * Note:
3939 * The ifbrmreq32 and ifbrmreq64 structures have the same
3940 * field size/layout except for the last field brmr_buf, the user-supplied
3941 * buffer pointer. That is passed in separately via the 'user_addr'
3942 * parameter from the respective 32-bit or 64-bit ioctl routine.
3943 */
3944static int
3945bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
3946 user_addr_t user_addr)
3947{
3948 struct bridge_iflist *bif;
3949 int error = 0;
3950 unsigned int buflen;
3951
3952 bif = bridge_lookup_member(sc, name: mreq->brmr_ifname);
3953 if (bif == NULL) {
3954 error = ENOENT;
3955 goto done;
3956 }
3957
3958 buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
3959 if (buflen == 0 || mreq->brmr_len == 0) {
3960 mreq->brmr_len = buflen;
3961 goto done;
3962 }
3963 if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
3964 error = ENOBUFS;
3965 goto done;
3966 }
3967 mreq->brmr_len = buflen;
3968 error = copyout(&bif->bif_stats, user_addr, buflen);
3969done:
3970 return error;
3971}
3972
3973static int
3974bridge_ioctl_gifstats32(struct bridge_softc *sc, void *arg)
3975{
3976 struct ifbrmreq32 *mreq = arg;
3977
3978 return bridge_ioctl_gifstats(sc, mreq: arg, user_addr: mreq->brmr_buf);
3979}
3980
3981static int
3982bridge_ioctl_gifstats64(struct bridge_softc *sc, void *arg)
3983{
3984 struct ifbrmreq64 *mreq = arg;
3985
3986 return bridge_ioctl_gifstats(sc, mreq: arg, user_addr: mreq->brmr_buf);
3987}
3988
3989/*
3990 * bridge_proto_attach_changed
3991 *
3992 * Called when protocol attachment on the interface changes.
3993 */
3994static void
3995bridge_proto_attach_changed(struct ifnet *ifp)
3996{
3997 boolean_t changed = FALSE;
3998 struct bridge_iflist *bif;
3999 boolean_t input_broadcast;
4000 struct bridge_softc *sc = ifp->if_bridge;
4001
4002 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4003 if (sc == NULL) {
4004 return;
4005 }
4006 input_broadcast = interface_needs_input_broadcast(ifp);
4007 BRIDGE_LOCK(sc);
4008 bif = bridge_lookup_member_if(sc, member_ifp: ifp);
4009 if (bif != NULL) {
4010 changed = bif_set_input_broadcast(bif, input_broadcast);
4011 }
4012 BRIDGE_UNLOCK(sc);
4013 if (changed) {
4014 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
4015 "%s input broadcast %s", ifp->if_xname,
4016 input_broadcast ? "ENABLED" : "DISABLED");
4017 }
4018 return;
4019}
4020
4021/*
4022 * interface_media_active:
4023 *
4024 * Tells if an interface media is active.
4025 */
4026static int
4027interface_media_active(struct ifnet *ifp)
4028{
4029 struct ifmediareq ifmr;
4030 int status = 0;
4031
4032 bzero(s: &ifmr, n: sizeof(ifmr));
4033 if (ifnet_ioctl(interface: ifp, protocol: 0, SIOCGIFMEDIA, ioctl_arg: &ifmr) == 0) {
4034 if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
4035 status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
4036 }
4037 }
4038
4039 return status;
4040}
4041
4042/*
4043 * bridge_updatelinkstatus:
4044 *
4045 * Update the media active status of the bridge based on the
4046 * media active status of its member.
4047 * If changed, return the corresponding onf/off link event.
4048 */
4049static u_int32_t
4050bridge_updatelinkstatus(struct bridge_softc *sc)
4051{
4052 struct bridge_iflist *bif;
4053 int active_member = 0;
4054 u_int32_t event_code = 0;
4055
4056 BRIDGE_LOCK_ASSERT_HELD(sc);
4057
4058 /*
4059 * Find out if we have an active interface
4060 */
4061 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
4062 if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
4063 active_member = 1;
4064 break;
4065 }
4066 }
4067
4068 if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4069 sc->sc_flags |= SCF_MEDIA_ACTIVE;
4070 event_code = KEV_DL_LINK_ON;
4071 } else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4072 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
4073 event_code = KEV_DL_LINK_OFF;
4074 }
4075
4076 return event_code;
4077}
4078
4079/*
4080 * bridge_iflinkevent:
4081 */
4082static void
4083bridge_iflinkevent(struct ifnet *ifp)
4084{
4085 struct bridge_softc *sc = ifp->if_bridge;
4086 struct bridge_iflist *bif;
4087 u_int32_t event_code = 0;
4088 int media_active;
4089
4090 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4091
4092 /* Check if the interface is a bridge member */
4093 if (sc == NULL) {
4094 return;
4095 }
4096
4097 media_active = interface_media_active(ifp);
4098 BRIDGE_LOCK(sc);
4099 bif = bridge_lookup_member_if(sc, member_ifp: ifp);
4100 if (bif != NULL) {
4101 if (media_active) {
4102 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
4103 } else {
4104 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
4105 }
4106 if (sc->sc_mac_nat_bif != NULL) {
4107 bridge_mac_nat_flush_entries(sc, bif);
4108 }
4109
4110 event_code = bridge_updatelinkstatus(sc);
4111 }
4112 BRIDGE_UNLOCK(sc);
4113
4114 if (event_code != 0) {
4115 bridge_link_event(sc->sc_ifp, event_code);
4116 }
4117}
4118
4119/*
4120 * bridge_delayed_callback:
4121 *
4122 * Makes a delayed call
4123 */
4124static void
4125bridge_delayed_callback(void *param, __unused void *param2)
4126{
4127 struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
4128 struct bridge_softc *sc = call->bdc_sc;
4129
4130#if BRIDGE_DELAYED_CALLBACK_DEBUG
4131 if (bridge_delayed_callback_delay > 0) {
4132 struct timespec ts;
4133
4134 ts.tv_sec = bridge_delayed_callback_delay;
4135 ts.tv_nsec = 0;
4136
4137 BRIDGE_LOG(LOG_NOTICE, 0,
4138 "sleeping for %d seconds",
4139 bridge_delayed_callback_delay);
4140
4141 msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4142 __func__, &ts);
4143
4144 BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4145 }
4146#endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4147
4148 BRIDGE_LOCK(sc);
4149
4150#if BRIDGE_DELAYED_CALLBACK_DEBUG
4151 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4152 "%s call 0x%llx flags 0x%x",
4153 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4154 call->bdc_flags);
4155}
4156#endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4157
4158 if (call->bdc_flags & BDCF_CANCELLING) {
4159 wakeup(chan: call);
4160 } else {
4161 if ((sc->sc_flags & SCF_DETACHING) == 0) {
4162 (*call->bdc_func)(sc);
4163 }
4164 }
4165 call->bdc_flags &= ~BDCF_OUTSTANDING;
4166 BRIDGE_UNLOCK(sc);
4167}
4168
4169/*
4170 * bridge_schedule_delayed_call:
4171 *
4172 * Schedule a function to be called on a separate thread
4173 * The actual call may be scheduled to run at a given time or ASAP.
4174 */
4175static void
4176bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4177{
4178 uint64_t deadline = 0;
4179 struct bridge_softc *sc = call->bdc_sc;
4180
4181 BRIDGE_LOCK_ASSERT_HELD(sc);
4182
4183 if ((sc->sc_flags & SCF_DETACHING) ||
4184 (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4185 return;
4186 }
4187
4188 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4189 nanoseconds_to_absolutetime(
4190 nanoseconds: (uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4191 call->bdc_ts.tv_nsec, result: &deadline);
4192 clock_absolutetime_interval_to_deadline(abstime: deadline, result: &deadline);
4193 }
4194
4195 call->bdc_flags = BDCF_OUTSTANDING;
4196
4197#if BRIDGE_DELAYED_CALLBACK_DEBUG
4198 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4199 "%s call 0x%llx flags 0x%x",
4200 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4201 call->bdc_flags);
4202}
4203#endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4204
4205 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4206 thread_call_func_delayed(
4207 func: (thread_call_func_t)bridge_delayed_callback,
4208 param: call, deadline);
4209 } else {
4210 if (call->bdc_thread_call == NULL) {
4211 call->bdc_thread_call = thread_call_allocate(
4212 func: (thread_call_func_t)bridge_delayed_callback,
4213 param0: call);
4214 }
4215 thread_call_enter(call: call->bdc_thread_call);
4216 }
4217}
4218
4219/*
4220 * bridge_cancel_delayed_call:
4221 *
4222 * Cancel a queued or running delayed call.
4223 * If call is running, does not return until the call is done to
4224 * prevent race condition with the brigde interface getting destroyed
4225 */
4226static void
4227bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4228{
4229 boolean_t result;
4230 struct bridge_softc *sc = call->bdc_sc;
4231
4232 /*
4233 * The call was never scheduled
4234 */
4235 if (sc == NULL) {
4236 return;
4237 }
4238
4239 BRIDGE_LOCK_ASSERT_HELD(sc);
4240
4241 call->bdc_flags |= BDCF_CANCELLING;
4242
4243 while (call->bdc_flags & BDCF_OUTSTANDING) {
4244 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4245 "%s call 0x%llx flags 0x%x",
4246 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4247 call->bdc_flags);
4248 result = thread_call_func_cancel(
4249 func: (thread_call_func_t)bridge_delayed_callback, param: call, FALSE);
4250
4251 if (result) {
4252 /*
4253 * We managed to dequeue the delayed call
4254 */
4255 call->bdc_flags &= ~BDCF_OUTSTANDING;
4256 } else {
4257 /*
4258 * Wait for delayed call do be done running
4259 */
4260 msleep(chan: call, mtx: &sc->sc_mtx, PZERO, wmesg: __func__, NULL);
4261 }
4262 }
4263 call->bdc_flags &= ~BDCF_CANCELLING;
4264}
4265
4266/*
4267 * bridge_cleanup_delayed_call:
4268 *
4269 * Dispose resource allocated for a delayed call
4270 * Assume the delayed call is not queued or running .
4271 */
4272static void
4273bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4274{
4275 boolean_t result;
4276 struct bridge_softc *sc = call->bdc_sc;
4277
4278 /*
4279 * The call was never scheduled
4280 */
4281 if (sc == NULL) {
4282 return;
4283 }
4284
4285 BRIDGE_LOCK_ASSERT_HELD(sc);
4286
4287 VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4288 VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4289
4290 if (call->bdc_thread_call != NULL) {
4291 result = thread_call_free(call: call->bdc_thread_call);
4292 if (result == FALSE) {
4293 panic("%s thread_call_free() failed for call %p",
4294 __func__, call);
4295 }
4296 call->bdc_thread_call = NULL;
4297 }
4298}
4299
4300/*
4301 * bridge_init:
4302 *
4303 * Initialize a bridge interface.
4304 */
4305static int
4306bridge_init(struct ifnet *ifp)
4307{
4308 struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4309 errno_t error;
4310
4311 BRIDGE_LOCK_ASSERT_HELD(sc);
4312
4313 if ((ifnet_flags(interface: ifp) & IFF_RUNNING)) {
4314 return 0;
4315 }
4316
4317 error = ifnet_set_flags(interface: ifp, IFF_RUNNING, IFF_RUNNING);
4318
4319 /*
4320 * Calling bridge_aging_timer() is OK as there are no entries to
4321 * age so we're just going to arm the timer
4322 */
4323 bridge_aging_timer(sc);
4324#if BRIDGESTP
4325 if (error == 0) {
4326 bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4327 }
4328#endif /* BRIDGESTP */
4329 return error;
4330}
4331
4332/*
4333 * bridge_ifstop:
4334 *
4335 * Stop the bridge interface.
4336 */
4337static void
4338bridge_ifstop(struct ifnet *ifp, int disable)
4339{
4340#pragma unused(disable)
4341 struct bridge_softc *sc = ifp->if_softc;
4342
4343 BRIDGE_LOCK_ASSERT_HELD(sc);
4344
4345 if ((ifnet_flags(interface: ifp) & IFF_RUNNING) == 0) {
4346 return;
4347 }
4348
4349 bridge_cancel_delayed_call(call: &sc->sc_aging_timer);
4350
4351#if BRIDGESTP
4352 bstp_stop(&sc->sc_stp);
4353#endif /* BRIDGESTP */
4354
4355 bridge_rtflush(sc, IFBF_FLUSHDYN);
4356 (void) ifnet_set_flags(interface: ifp, new_flags: 0, IFF_RUNNING);
4357}
4358
4359/*
4360 * bridge_compute_cksum:
4361 *
4362 * If the packet has checksum flags, compare the hardware checksum
4363 * capabilities of the source and destination interfaces. If they
4364 * are the same, there's nothing to do. If they are different,
4365 * finalize the checksum so that it can be sent on the destination
4366 * interface.
4367 */
4368static void
4369bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4370{
4371 uint32_t csum_flags;
4372 uint16_t dst_hw_csum;
4373 uint32_t did_sw = 0;
4374 struct ether_header *eh;
4375 uint16_t src_hw_csum;
4376
4377 if (src_if == dst_if) {
4378 return;
4379 }
4380 csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4381 if (csum_flags == 0) {
4382 /* no checksum offload */
4383 return;
4384 }
4385
4386 /*
4387 * if destination/source differ in checksum offload
4388 * capabilities, finalize/compute the checksum
4389 */
4390 dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4391 src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4392 if (dst_hw_csum == src_hw_csum) {
4393 return;
4394 }
4395 eh = mtod(m, struct ether_header *);
4396 switch (ntohs(eh->ether_type)) {
4397 case ETHERTYPE_IP:
4398 did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4399 break;
4400 case ETHERTYPE_IPV6:
4401 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4402 break;
4403 }
4404 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4405 "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4406 src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4407 m->m_pkthdr.csum_flags);
4408}
4409
4410static errno_t
4411bridge_transmit(struct ifnet * ifp, struct mbuf *m)
4412{
4413 struct flowadv adv = { .code = FADV_SUCCESS };
4414 errno_t error;
4415
4416 error = dlil_output(ifp, 0, m, NULL, NULL, 1, &adv);
4417 if (error == 0) {
4418 if (adv.code == FADV_FLOW_CONTROLLED) {
4419 error = EQFULL;
4420 } else if (adv.code == FADV_SUSPENDED) {
4421 error = EQSUSPENDED;
4422 }
4423 }
4424 return error;
4425}
4426
4427static int
4428get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4429 bool *is_fragmented)
4430{
4431 int newoff;
4432
4433 *is_fragmented = false;
4434 while (1) {
4435 newoff = ip6_nexthdr(m, off, proto, nxtp);
4436 if (newoff < 0) {
4437 return off;
4438 } else if (newoff < off) {
4439 return -1; /* invalid */
4440 } else if (newoff == off) {
4441 return newoff;
4442 }
4443 off = newoff;
4444 proto = *nxtp;
4445 if (proto == IPPROTO_FRAGMENT) {
4446 *is_fragmented = true;
4447 }
4448 }
4449}
4450
4451static int
4452bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4453 ip_packet_info_t info_p, struct bripstats * stats_p)
4454{
4455 int error = 0;
4456 u_int hlen;
4457 u_int ip_hlen;
4458 u_int ip_pay_len;
4459 struct mbuf * m0 = *mp;
4460 int off;
4461 int opt_len = 0;
4462 int proto = 0;
4463
4464 bzero(s: info_p, n: sizeof(*info_p));
4465 if (is_ipv4) {
4466 struct ip * ip;
4467 u_int ip_total_len;
4468
4469 /* IPv4 */
4470 hlen = mac_hlen + sizeof(struct ip);
4471 if (m0->m_pkthdr.len < hlen) {
4472 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4473 "Short IP packet %d < %d",
4474 m0->m_pkthdr.len, hlen);
4475 error = _EBADIP;
4476 stats_p->bips_bad_ip++;
4477 goto done;
4478 }
4479 if (m0->m_len < hlen) {
4480 *mp = m0 = m_pullup(m0, hlen);
4481 if (m0 == NULL) {
4482 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4483 "m_pullup failed hlen %d",
4484 hlen);
4485 error = ENOBUFS;
4486 stats_p->bips_bad_ip++;
4487 goto done;
4488 }
4489 }
4490 ip = (struct ip *)(void *)(mtod(m0, uint8_t *) + mac_hlen);
4491 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4492 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4493 "bad IP version");
4494 error = _EBADIP;
4495 stats_p->bips_bad_ip++;
4496 goto done;
4497 }
4498 ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4499 if (ip_hlen < sizeof(struct ip)) {
4500 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4501 "bad IP header length %d < %d",
4502 ip_hlen,
4503 (int)sizeof(struct ip));
4504 error = _EBADIP;
4505 stats_p->bips_bad_ip++;
4506 goto done;
4507 }
4508 hlen = mac_hlen + ip_hlen;
4509 if (m0->m_len < hlen) {
4510 *mp = m0 = m_pullup(m0, hlen);
4511 if (m0 == NULL) {
4512 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4513 "m_pullup failed hlen %d",
4514 hlen);
4515 error = ENOBUFS;
4516 stats_p->bips_bad_ip++;
4517 goto done;
4518 }
4519 }
4520
4521 ip_total_len = ntohs(ip->ip_len);
4522 if (ip_total_len < ip_hlen) {
4523 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4524 "IP total len %d < header len %d",
4525 ip_total_len, ip_hlen);
4526 error = _EBADIP;
4527 stats_p->bips_bad_ip++;
4528 goto done;
4529 }
4530 if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4531 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4532 "invalid IP payload length %d > %d",
4533 ip_total_len,
4534 (m0->m_pkthdr.len - mac_hlen));
4535 error = _EBADIP;
4536 stats_p->bips_bad_ip++;
4537 goto done;
4538 }
4539 ip_pay_len = ip_total_len - ip_hlen;
4540 info_p->ip_proto = ip->ip_p;
4541 info_p->ip_hdr.ip = ip;
4542#define FRAG_BITS (IP_OFFMASK | IP_MF)
4543 if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4544 info_p->ip_is_fragmented = true;
4545 }
4546 stats_p->bips_ip++;
4547 } else {
4548 struct ip6_hdr *ip6;
4549
4550 /* IPv6 */
4551 hlen = mac_hlen + sizeof(struct ip6_hdr);
4552 if (m0->m_pkthdr.len < hlen) {
4553 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4554 "short IPv6 packet %d < %d",
4555 m0->m_pkthdr.len, hlen);
4556 error = _EBADIPV6;
4557 stats_p->bips_bad_ip6++;
4558 goto done;
4559 }
4560 if (m0->m_len < hlen) {
4561 *mp = m0 = m_pullup(m0, hlen);
4562 if (m0 == NULL) {
4563 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4564 "m_pullup failed hlen %d",
4565 hlen);
4566 error = ENOBUFS;
4567 stats_p->bips_bad_ip6++;
4568 goto done;
4569 }
4570 }
4571 ip6 = (struct ip6_hdr *)(mtod(m0, uint8_t *) + mac_hlen);
4572 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4573 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4574 "bad IPv6 version");
4575 error = _EBADIPV6;
4576 stats_p->bips_bad_ip6++;
4577 goto done;
4578 }
4579 off = get_last_ip6_hdr(m: m0, off: mac_hlen, IPPROTO_IPV6, nxtp: &proto,
4580 is_fragmented: &info_p->ip_is_fragmented);
4581 if (off < 0 || m0->m_pkthdr.len < off) {
4582 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4583 "ip6_lasthdr() returned %d",
4584 off);
4585 error = _EBADIPV6;
4586 stats_p->bips_bad_ip6++;
4587 goto done;
4588 }
4589 ip_hlen = sizeof(*ip6);
4590 opt_len = off - mac_hlen - ip_hlen;
4591 if (opt_len < 0) {
4592 error = _EBADIPV6;
4593 stats_p->bips_bad_ip6++;
4594 goto done;
4595 }
4596 info_p->ip_proto = proto;
4597 info_p->ip_hdr.ip6 = ip6;
4598 ip_pay_len = ntohs(ip6->ip6_plen);
4599 if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4600 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4601 "invalid IPv6 payload length %d > %d",
4602 ip_pay_len,
4603 (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4604 error = _EBADIPV6;
4605 stats_p->bips_bad_ip6++;
4606 goto done;
4607 }
4608 stats_p->bips_ip6++;
4609 }
4610 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4611 "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4612 is_ipv4 ? '4' : '6',
4613 proto, ip_hlen, ip_pay_len, opt_len,
4614 m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4615 info_p->ip_hlen = ip_hlen;
4616 info_p->ip_pay_len = ip_pay_len;
4617 info_p->ip_opt_len = opt_len;
4618 info_p->ip_is_ipv4 = is_ipv4;
4619done:
4620 return error;
4621}
4622
4623static int
4624bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4625 ip_packet_info_t info_p, struct bripstats * stats_p)
4626{
4627 int error;
4628 u_int hlen;
4629
4630 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4631 if (error != 0) {
4632 goto done;
4633 }
4634 if (info_p->ip_proto != IPPROTO_TCP) {
4635 /* not a TCP frame, not an error, just a bad guess */
4636 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4637 "non-TCP (%d) IPv%c frame %d bytes",
4638 info_p->ip_proto, is_ipv4 ? '4' : '6',
4639 (*mp)->m_pkthdr.len);
4640 goto done;
4641 }
4642 if (info_p->ip_is_fragmented) {
4643 /* both TSO and IP fragmentation don't make sense */
4644 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4645 "fragmented TSO packet?");
4646 stats_p->bips_bad_tcp++;
4647 error = _EBADTCP;
4648 goto done;
4649 }
4650 hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4651 info_p->ip_opt_len;
4652 if ((*mp)->m_len < hlen) {
4653 *mp = m_pullup(*mp, hlen);
4654 if (*mp == NULL) {
4655 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4656 "m_pullup %d failed",
4657 hlen);
4658 stats_p->bips_bad_tcp++;
4659 error = _EBADTCP;
4660 goto done;
4661 }
4662 }
4663 info_p->ip_proto_hdr = ((caddr_t)info_p->ip_hdr.ptr) +
4664 info_p->ip_hlen + info_p->ip_opt_len;
4665done:
4666 return error;
4667}
4668
4669static inline void
4670proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4671{
4672 if (proto == IPPROTO_TCP) {
4673 stats_p->brcs_tcp_checksum++;
4674 } else {
4675 stats_p->brcs_udp_checksum++;
4676 }
4677 return;
4678}
4679
4680static bool
4681ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4682{
4683 uint16_t ether_type;
4684 bool is_ip = TRUE;
4685
4686 ether_type = ntohs(eh->ether_type);
4687 switch (ether_type) {
4688 case ETHERTYPE_IP:
4689 *is_ipv4 = TRUE;
4690 break;
4691 case ETHERTYPE_IPV6:
4692 *is_ipv4 = FALSE;
4693 break;
4694 default:
4695 is_ip = FALSE;
4696 break;
4697 }
4698 return is_ip;
4699}
4700
4701static errno_t
4702bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4703{
4704 struct brcsumstats *csum_stats_p;
4705 struct ether_header *eh;
4706 errno_t error = 0;
4707 ip_packet_info info;
4708 bool is_ipv4;
4709 struct mbuf * m;
4710 u_int mac_hlen = sizeof(struct ether_header);
4711 uint16_t sum;
4712 bool valid;
4713
4714 eh = mtod(*mp, struct ether_header *);
4715 if (!ether_header_type_is_ip(eh, is_ipv4: &is_ipv4)) {
4716 goto done;
4717 }
4718 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p: &info,
4719 stats_p: &stats_p->brms_out_ip);
4720 m = *mp;
4721 if (error != 0) {
4722 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4723 "bridge_get_ip_proto failed %d",
4724 error);
4725 goto done;
4726 }
4727 if (is_ipv4) {
4728 if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4729 /* hardware offloaded IP header checksum */
4730 valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4731 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4732 "IP checksum HW %svalid",
4733 valid ? "" : "in");
4734 if (!valid) {
4735 stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum++;
4736 error = _EBADIPCHECKSUM;
4737 goto done;
4738 }
4739 stats_p->brms_out_cksum_good_hw.brcs_ip_checksum++;
4740 } else {
4741 /* verify */
4742 sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4743 valid = (sum == 0);
4744 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4745 "IP checksum SW %svalid",
4746 valid ? "" : "in");
4747 if (!valid) {
4748 stats_p->brms_out_cksum_bad.brcs_ip_checksum++;
4749 error = _EBADIPCHECKSUM;
4750 goto done;
4751 }
4752 stats_p->brms_out_cksum_good.brcs_ip_checksum++;
4753 }
4754 }
4755 if (info.ip_is_fragmented) {
4756 /* can't verify checksum on fragmented packets */
4757 goto done;
4758 }
4759 switch (info.ip_proto) {
4760 case IPPROTO_TCP:
4761 stats_p->brms_out_ip.bips_tcp++;
4762 break;
4763 case IPPROTO_UDP:
4764 stats_p->brms_out_ip.bips_udp++;
4765 break;
4766 default:
4767 goto done;
4768 }
4769 /* check for hardware offloaded UDP/TCP checksum */
4770#define HW_CSUM (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4771 if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4772 /* checksum verified by hardware */
4773 valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4774 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4775 "IPv%c %s checksum HW 0x%x %svalid",
4776 is_ipv4 ? '4' : '6',
4777 (info.ip_proto == IPPROTO_TCP)
4778 ? "TCP" : "UDP",
4779 m->m_pkthdr.csum_data,
4780 valid ? "" : "in" );
4781 if (!valid) {
4782 /* bad checksum */
4783 csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4784 error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4785 : _EBADTCPCHECKSUM;
4786 } else {
4787 /* good checksum */
4788 csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4789 }
4790 proto_csum_stats_increment(proto: info.ip_proto, stats_p: csum_stats_p);
4791 goto done;
4792 }
4793 m->m_data += mac_hlen;
4794 m->m_len -= mac_hlen;
4795 m->m_pkthdr.len -= mac_hlen;
4796 if (is_ipv4) {
4797 sum = inet_cksum(m, info.ip_proto,
4798 info.ip_hlen,
4799 info.ip_pay_len);
4800 } else {
4801 sum = inet6_cksum(m, info.ip_proto,
4802 info.ip_hlen + info.ip_opt_len,
4803 info.ip_pay_len - info.ip_opt_len);
4804 }
4805 valid = (sum == 0);
4806 if (valid) {
4807 csum_stats_p = &stats_p->brms_out_cksum_good;
4808 } else {
4809 csum_stats_p = &stats_p->brms_out_cksum_bad;
4810 error = (info.ip_proto == IPPROTO_TCP)
4811 ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4812 }
4813 proto_csum_stats_increment(proto: info.ip_proto, stats_p: csum_stats_p);
4814 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4815 "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4816 is_ipv4 ? '4' : '6',
4817 (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4818 valid ? "" : "in",
4819 sum, info.ip_hlen, info.ip_pay_len);
4820 m->m_data -= mac_hlen;
4821 m->m_len += mac_hlen;
4822 m->m_pkthdr.len += mac_hlen;
4823done:
4824 return error;
4825}
4826
4827static errno_t
4828bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
4829 struct ifbrmstats * stats_p)
4830{
4831 uint16_t * csum_p;
4832 errno_t error = 0;
4833 u_int hlen;
4834 struct mbuf * m0 = *mp;
4835 u_int mac_hlen = sizeof(struct ether_header);
4836 u_int pkt_hdr_len;
4837 struct tcphdr * tcp;
4838 u_int tcp_hlen;
4839 struct udphdr * udp;
4840
4841 if (info_p->ip_is_ipv4) {
4842 /* compute IP header checksum */
4843 info_p->ip_hdr.ip->ip_sum = 0;
4844 info_p->ip_hdr.ip->ip_sum = inet_cksum(m0, 0, mac_hlen,
4845 info_p->ip_hlen);
4846 stats_p->brms_in_computed_cksum.brcs_ip_checksum++;
4847 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4848 "IPv4 checksum 0x%x",
4849 ntohs(info_p->ip_hdr.ip->ip_sum));
4850 }
4851 if (info_p->ip_is_fragmented) {
4852 /* can't compute checksum on fragmented packets */
4853 goto done;
4854 }
4855 pkt_hdr_len = m0->m_pkthdr.len;
4856 switch (info_p->ip_proto) {
4857 case IPPROTO_TCP:
4858 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
4859 + sizeof(struct tcphdr);
4860 if (m0->m_len < hlen) {
4861 *mp = m0 = m_pullup(m0, hlen);
4862 if (m0 == NULL) {
4863 stats_p->brms_in_ip.bips_bad_tcp++;
4864 error = _EBADTCP;
4865 goto done;
4866 }
4867 }
4868 tcp = (struct tcphdr *)(void *)
4869 ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4870 + info_p->ip_opt_len);
4871 tcp_hlen = tcp->th_off << 2;
4872 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
4873 if (hlen > pkt_hdr_len) {
4874 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4875 "bad tcp header length %u",
4876 tcp_hlen);
4877 stats_p->brms_in_ip.bips_bad_tcp++;
4878 error = _EBADTCP;
4879 goto done;
4880 }
4881 csum_p = &tcp->th_sum;
4882 stats_p->brms_in_ip.bips_tcp++;
4883 break;
4884 case IPPROTO_UDP:
4885 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
4886 if (m0->m_len < hlen) {
4887 *mp = m0 = m_pullup(m0, hlen);
4888 if (m0 == NULL) {
4889 stats_p->brms_in_ip.bips_bad_udp++;
4890 error = ENOBUFS;
4891 goto done;
4892 }
4893 }
4894 udp = (struct udphdr *)(void *)
4895 ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4896 + info_p->ip_opt_len);
4897 csum_p = &udp->uh_sum;
4898 stats_p->brms_in_ip.bips_udp++;
4899 break;
4900 default:
4901 /* not TCP or UDP */
4902 goto done;
4903 }
4904 *csum_p = 0;
4905 m0->m_data += mac_hlen;
4906 m0->m_len -= mac_hlen;
4907 m0->m_pkthdr.len -= mac_hlen;
4908 if (info_p->ip_is_ipv4) {
4909 *csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
4910 info_p->ip_pay_len);
4911 } else {
4912 *csum_p = inet6_cksum(m0, info_p->ip_proto,
4913 info_p->ip_hlen + info_p->ip_opt_len,
4914 info_p->ip_pay_len - info_p->ip_opt_len);
4915 }
4916 if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
4917 /* RFC 1122 4.1.3.4 */
4918 *csum_p = 0xffff;
4919 }
4920 m0->m_data -= mac_hlen;
4921 m0->m_len += mac_hlen;
4922 m0->m_pkthdr.len += mac_hlen;
4923 proto_csum_stats_increment(proto: info_p->ip_proto,
4924 stats_p: &stats_p->brms_in_computed_cksum);
4925
4926 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4927 "IPv%c %s set checksum 0x%x",
4928 info_p->ip_is_ipv4 ? '4' : '6',
4929 (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4930 ntohs(*csum_p));
4931done:
4932 return error;
4933}
4934
4935static errno_t
4936bridge_send(struct ifnet *src_ifp,
4937 struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4938{
4939 switch (cksum_op) {
4940 case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
4941 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4942 break;
4943 case CHECKSUM_OPERATION_FINALIZE:
4944 /* the checksum might not be correct, finalize now */
4945 bridge_finalize_cksum(dst_ifp, m);
4946 break;
4947 case CHECKSUM_OPERATION_COMPUTE:
4948 bridge_compute_cksum(src_if: src_ifp, dst_if: dst_ifp, m);
4949 break;
4950 default:
4951 break;
4952 }
4953#if HAS_IF_CAP
4954 /*
4955 * If underlying interface can not do VLAN tag insertion itself
4956 * then attach a packet tag that holds it.
4957 */
4958 if ((m->m_flags & M_VLANTAG) &&
4959 (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4960 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4961 if (m == NULL) {
4962 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4963 "%s: unable to prepend VLAN header",
4964 dst_ifp->if_xname);
4965 (void) ifnet_stat_increment_out(dst_ifp,
4966 0, 0, 1);
4967 return 0;
4968 }
4969 m->m_flags &= ~M_VLANTAG;
4970 }
4971#endif /* HAS_IF_CAP */
4972 return bridge_transmit(ifp: dst_ifp, m);
4973}
4974
4975static errno_t
4976bridge_send_tso(struct ifnet *dst_ifp, struct mbuf *m, bool is_ipv4)
4977{
4978 errno_t error;
4979 u_int mac_hlen;
4980
4981 mac_hlen = sizeof(struct ether_header);
4982
4983#if HAS_IF_CAP
4984 /*
4985 * If underlying interface can not do VLAN tag insertion itself
4986 * then attach a packet tag that holds it.
4987 */
4988 if ((m->m_flags & M_VLANTAG) &&
4989 (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4990 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4991 if (m == NULL) {
4992 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4993 "%s: unable to prepend VLAN header",
4994 dst_ifp->if_xname);
4995 (void) ifnet_stat_increment_out(dst_ifp,
4996 0, 0, 1);
4997 error = ENOBUFS;
4998 goto done;
4999 }
5000 m->m_flags &= ~M_VLANTAG;
5001 mac_hlen += ETHER_VLAN_ENCAP_LEN;
5002 }
5003#endif /* HAS_IF_CAP */
5004 error = gso_tcp(ifp: dst_ifp, mp: &m, mac_hlen, is_ipv4, TRUE);
5005 return error;
5006}
5007
5008static uint32_t
5009get_if_tso_mtu(struct ifnet * ifp, bool is_ipv4)
5010{
5011 uint32_t tso_mtu;
5012
5013 tso_mtu = is_ipv4 ? ifp->if_tso_v4_mtu : ifp->if_tso_v6_mtu;
5014 if (tso_mtu == 0) {
5015 tso_mtu = IP_MAXPACKET;
5016 }
5017
5018#if DEBUG || DEVELOPMENT
5019#define REDUCED_TSO_MTU (16 * 1024)
5020 if (if_bridge_reduce_tso_mtu != 0 && tso_mtu > REDUCED_TSO_MTU) {
5021 tso_mtu = REDUCED_TSO_MTU;
5022 }
5023#endif /* DEBUG || DEVELOPMENT */
5024 return tso_mtu;
5025}
5026
5027/*
5028 * tso_hwassist:
5029 * - determine whether the destination interface supports TSO offload
5030 * - if the packet is already marked for offload and the hardware supports
5031 * it, just allow the packet to continue on
5032 * - if not, parse the packet headers to verify that this is a large TCP
5033 * packet requiring segmentation; if the hardware doesn't support it
5034 * set need_sw_tso; otherwise, mark the packet for TSO offload
5035 */
5036static int
5037tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
5038 bool * need_sw_tso, bool * is_large_tcp)
5039{
5040 int error = 0;
5041 u_int32_t if_csum;
5042 u_int32_t if_tso;
5043 u_int32_t mbuf_tso;
5044 bool supports_cksum = false;
5045
5046 *need_sw_tso = false;
5047 *is_large_tcp = false;
5048 if (is_ipv4) {
5049 /*
5050 * Enable both TCP and IP offload if the hardware supports it.
5051 * If the hardware doesn't support TCP offload, supports_cksum
5052 * will be false so we won't set either offload.
5053 */
5054 if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
5055 supports_cksum = (if_csum & CSUM_TCP) != 0;
5056 if_tso = IFNET_TSO_IPV4;
5057 mbuf_tso = CSUM_TSO_IPV4;
5058 } else {
5059 supports_cksum = (ifp->if_hwassist & CSUM_TCPIPV6) != 0;
5060 if_csum = CSUM_TCPIPV6;
5061 if_tso = IFNET_TSO_IPV6;
5062 mbuf_tso = CSUM_TSO_IPV6;
5063 }
5064 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5065 "%s: does%s support checksum 0x%x if_csum 0x%x",
5066 ifp->if_xname, supports_cksum ? "" : " not",
5067 ifp->if_hwassist, if_csum);
5068 if ((ifp->if_hwassist & if_tso) != 0 &&
5069 ((*mp)->m_pkthdr.csum_flags & mbuf_tso) != 0) {
5070 /* hardware TSO, mbuf already marked */
5071 } else {
5072 /* verify that this is a large TCP frame */
5073 uint32_t csum_flags;
5074 ip_packet_info info;
5075 int mss;
5076 uint32_t pkt_mtu;
5077 struct bripstats stats;
5078 struct tcphdr * tcp;
5079 uint32_t tso_mtu;
5080
5081 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
5082 info_p: &info, stats_p: &stats);
5083 if (error != 0) {
5084 /* bad packet */
5085 goto done;
5086 }
5087 if (info.ip_proto_hdr == NULL) {
5088 /* not a TCP packet */
5089 goto done;
5090 }
5091 pkt_mtu = info.ip_hlen + info.ip_pay_len + info.ip_opt_len;
5092 if (pkt_mtu <= ifp->if_mtu) {
5093 /* not actually a large packet */
5094 goto done;
5095 }
5096 if ((ifp->if_hwassist & if_tso) == 0) {
5097 /* hardware does not support TSO, enable sw tso */
5098 *need_sw_tso = if_bridge_segmentation != 0;
5099 goto done;
5100 }
5101 tso_mtu = get_if_tso_mtu(ifp, is_ipv4);
5102 if (pkt_mtu > tso_mtu) {
5103 /* hardware can't segment this, enable sw tso */
5104 *need_sw_tso = if_bridge_segmentation != 0;
5105 goto done;
5106 }
5107
5108 /* use hardware TSO */
5109 (*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
5110 tcp = (struct tcphdr *)info.ip_proto_hdr;
5111 mss = ifp->if_mtu - info.ip_hlen - info.ip_opt_len
5112 - (tcp->th_off << 2) - if_bridge_tso_reduce_mss_tx;
5113 assert(mss > 0);
5114 csum_flags = mbuf_tso;
5115 if (supports_cksum) {
5116 csum_flags |= if_csum;
5117 }
5118 (*mp)->m_pkthdr.tso_segsz = mss;
5119 (*mp)->m_pkthdr.csum_flags |= csum_flags;
5120 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
5121 *is_large_tcp = true;
5122 }
5123done:
5124 return error;
5125}
5126
5127/*
5128 * bridge_enqueue:
5129 *
5130 * Enqueue a packet on a bridge member interface.
5131 *
5132 */
5133static errno_t
5134bridge_enqueue(ifnet_t bridge_ifp, struct ifnet *src_ifp,
5135 struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
5136{
5137 errno_t error = 0;
5138 int len;
5139
5140 VERIFY(dst_ifp != NULL);
5141
5142 /*
5143 * We may be sending a fragment so traverse the mbuf
5144 *
5145 * NOTE: bridge_fragment() is called only when PFIL_HOOKS is enabled.
5146 */
5147 for (struct mbuf *next_m = NULL; m != NULL; m = next_m) {
5148 bool need_sw_tso = false;
5149 bool is_ipv4 = false;
5150 bool is_large_pkt;
5151 errno_t _error = 0;
5152
5153 len = m->m_pkthdr.len;
5154 m->m_flags |= M_PROTO1; /* set to avoid loops */
5155 next_m = m->m_nextpkt;
5156 m->m_nextpkt = NULL;
5157 /*
5158 * Need to segment the packet if it is a large frame
5159 * and the destination interface does not support TSO.
5160 *
5161 * Note that with trailers, it's possible for a packet to
5162 * be large but not actually require segmentation.
5163 */
5164 is_large_pkt = (len > (bridge_ifp->if_mtu + ETHER_HDR_LEN));
5165 if (is_large_pkt) {
5166 struct ether_header *eh;
5167 bool is_large_tcp = false;
5168
5169 eh = mtod(m, struct ether_header *);
5170 if (ether_header_type_is_ip(eh, is_ipv4: &is_ipv4)) {
5171 _error = tso_hwassist(mp: &m, is_ipv4,
5172 ifp: dst_ifp, mac_hlen: sizeof(struct ether_header),
5173 need_sw_tso: &need_sw_tso, is_large_tcp: &is_large_tcp);
5174 if (is_large_tcp) {
5175 cksum_op = CHECKSUM_OPERATION_NONE;
5176 }
5177 } else {
5178 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5179 "large non IP packet");
5180 }
5181 }
5182 if (_error != 0) {
5183 if (m != NULL) {
5184 m_freem(m);
5185 }
5186 } else if (need_sw_tso) {
5187 _error = bridge_send_tso(dst_ifp, m, is_ipv4);
5188 } else {
5189 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5190 "%s bridge_send(%s) len %d op %d",
5191 bridge_ifp->if_xname,
5192 dst_ifp->if_xname,
5193 len, cksum_op);
5194 _error = bridge_send(src_ifp, dst_ifp, m, cksum_op);
5195 }
5196
5197 /* Preserve first error value */
5198 if (error == 0 && _error != 0) {
5199 error = _error;
5200 }
5201 if (_error == 0) {
5202 (void) ifnet_stat_increment_out(interface: bridge_ifp, packets_out: 1, bytes_out: len, errors_out: 0);
5203 } else {
5204 (void) ifnet_stat_increment_out(interface: bridge_ifp, packets_out: 0, bytes_out: 0, errors_out: 1);
5205 }
5206 }
5207
5208 return error;
5209}
5210
5211#if HAS_BRIDGE_DUMMYNET
5212/*
5213 * bridge_dummynet:
5214 *
5215 * Receive a queued packet from dummynet and pass it on to the output
5216 * interface.
5217 *
5218 * The mbuf has the Ethernet header already attached.
5219 */
5220static void
5221bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
5222{
5223 struct bridge_softc *sc;
5224
5225 sc = ifp->if_bridge;
5226
5227 /*
5228 * The packet didn't originate from a member interface. This should only
5229 * ever happen if a member interface is removed while packets are
5230 * queued for it.
5231 */
5232 if (sc == NULL) {
5233 m_freem(m);
5234 return;
5235 }
5236
5237 if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
5238 if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) {
5239 return;
5240 }
5241 if (m == NULL) {
5242 return;
5243 }
5244 }
5245 (void) bridge_enqueue(sc->sc_ifp, NULL, ifp, m, CHECKSUM_OPERATION_NONE);
5246}
5247
5248#endif /* HAS_BRIDGE_DUMMYNET */
5249
5250/*
5251 * bridge_member_output:
5252 *
5253 * Send output from a bridge member interface. This
5254 * performs the bridging function for locally originated
5255 * packets.
5256 *
5257 * The mbuf has the Ethernet header already attached.
5258 */
5259static errno_t
5260bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5261{
5262 ifnet_t bridge_ifp;
5263 struct ether_header *eh;
5264 struct ifnet *dst_if;
5265 uint16_t vlan;
5266 struct bridge_iflist *mac_nat_bif;
5267 ifnet_t mac_nat_ifp;
5268 mbuf_t m = *data;
5269
5270 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5271 "ifp %s", ifp->if_xname);
5272 if (m->m_len < ETHER_HDR_LEN) {
5273 m = m_pullup(m, ETHER_HDR_LEN);
5274 if (m == NULL) {
5275 *data = NULL;
5276 return EJUSTRETURN;
5277 }
5278 }
5279
5280 eh = mtod(m, struct ether_header *);
5281 vlan = VLANTAGOF(m);
5282
5283 BRIDGE_LOCK(sc);
5284 mac_nat_bif = sc->sc_mac_nat_bif;
5285 mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5286 if (mac_nat_ifp == ifp) {
5287 /* record the IP address used by the MAC NAT interface */
5288 (void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5289 m = *data;
5290 if (m == NULL) {
5291 /* packet was deallocated */
5292 BRIDGE_UNLOCK(sc);
5293 return EJUSTRETURN;
5294 }
5295 }
5296 bridge_ifp = sc->sc_ifp;
5297
5298 /*
5299 * APPLE MODIFICATION
5300 * If the packet is an 802.1X ethertype, then only send on the
5301 * original output interface.
5302 */
5303 if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5304 dst_if = ifp;
5305 goto sendunicast;
5306 }
5307
5308 /*
5309 * If bridge is down, but the original output interface is up,
5310 * go ahead and send out that interface. Otherwise, the packet
5311 * is dropped below.
5312 */
5313 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5314 dst_if = ifp;
5315 goto sendunicast;
5316 }
5317
5318 /*
5319 * If the packet is a multicast, or we don't know a better way to
5320 * get there, send to all interfaces.
5321 */
5322 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5323 dst_if = NULL;
5324 } else {
5325 dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
5326 }
5327 if (dst_if == NULL) {
5328 struct bridge_iflist *bif;
5329 struct mbuf *mc;
5330 errno_t error;
5331
5332
5333 bridge_span(sc, m);
5334
5335 BRIDGE_LOCK2REF(sc, error);
5336 if (error != 0) {
5337 m_freem(m);
5338 return EJUSTRETURN;
5339 }
5340
5341 /*
5342 * Duplicate and send the packet across all member interfaces
5343 * except the originating interface.
5344 */
5345 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5346 dst_if = bif->bif_ifp;
5347 if (dst_if == ifp) {
5348 /* skip the originating interface */
5349 continue;
5350 }
5351 /* skip interface with inactive link status */
5352 if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5353 continue;
5354 }
5355#if 0
5356 if (dst_if->if_type == IFT_GIF) {
5357 continue;
5358 }
5359#endif
5360 /* skip interface that isn't running */
5361 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5362 continue;
5363 }
5364 /*
5365 * If the interface is participating in spanning
5366 * tree, make sure the port is in a state that
5367 * allows forwarding.
5368 */
5369 if ((bif->bif_ifflags & IFBIF_STP) &&
5370 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5371 continue;
5372 }
5373 /*
5374 * If the destination is the MAC NAT interface,
5375 * skip sending the packet. The packet can't be sent
5376 * if the source MAC is incorrect.
5377 */
5378 if (dst_if == mac_nat_ifp) {
5379 continue;
5380 }
5381
5382 /* make a deep copy to send on this member interface */
5383 mc = m_dup(m, M_DONTWAIT);
5384 if (mc == NULL) {
5385 (void)ifnet_stat_increment_out(interface: bridge_ifp,
5386 packets_out: 0, bytes_out: 0, errors_out: 1);
5387 continue;
5388 }
5389 (void)bridge_enqueue(bridge_ifp, src_ifp: ifp, dst_ifp: dst_if,
5390 m: mc, cksum_op: CHECKSUM_OPERATION_COMPUTE);
5391 }
5392 BRIDGE_UNREF(sc);
5393
5394 if ((ifp->if_flags & IFF_RUNNING) == 0) {
5395 m_freem(m);
5396 return EJUSTRETURN;
5397 }
5398 /* allow packet to continue on the originating interface */
5399 return 0;
5400 }
5401
5402sendunicast:
5403 /*
5404 * XXX Spanning tree consideration here?
5405 */
5406
5407 bridge_span(sc, m);
5408 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5409 m_freem(m);
5410 BRIDGE_UNLOCK(sc);
5411 return EJUSTRETURN;
5412 }
5413
5414 BRIDGE_UNLOCK(sc);
5415 if (dst_if == ifp) {
5416 /* allow packet to continue on the originating interface */
5417 return 0;
5418 }
5419 if (dst_if != mac_nat_ifp) {
5420 (void) bridge_enqueue(bridge_ifp, src_ifp: ifp, dst_ifp: dst_if, m,
5421 cksum_op: CHECKSUM_OPERATION_COMPUTE);
5422 } else {
5423 /*
5424 * This is not the original output interface
5425 * and the destination is the MAC NAT interface.
5426 * Drop the packet because the packet can't be sent
5427 * if the source MAC is incorrect.
5428 */
5429 m_freem(m);
5430 }
5431 return EJUSTRETURN;
5432}
5433
5434/*
5435 * Output callback.
5436 *
5437 * This routine is called externally from above only when if_bridge_txstart
5438 * is disabled; otherwise it is called internally by bridge_start().
5439 */
5440static int
5441bridge_output(struct ifnet *ifp, struct mbuf *m)
5442{
5443 struct bridge_softc *sc = ifnet_softc(interface: ifp);
5444 struct ether_header *eh;
5445 struct ifnet *dst_if = NULL;
5446 int error = 0;
5447
5448 eh = mtod(m, struct ether_header *);
5449
5450 BRIDGE_LOCK(sc);
5451
5452 if (!(m->m_flags & (M_BCAST | M_MCAST))) {
5453 dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0);
5454 }
5455
5456 (void) ifnet_stat_increment_out(interface: ifp, packets_out: 1, bytes_out: m->m_pkthdr.len, errors_out: 0);
5457
5458#if NBPFILTER > 0
5459 if (sc->sc_bpf_output) {
5460 bridge_bpf_output(ifp, m);
5461 }
5462#endif
5463
5464 if (dst_if == NULL) {
5465 /* callee will unlock */
5466 bridge_broadcast(sc, NULL, m, 0);
5467 } else {
5468 ifnet_t bridge_ifp;
5469
5470 bridge_ifp = sc->sc_ifp;
5471 BRIDGE_UNLOCK(sc);
5472
5473 error = bridge_enqueue(bridge_ifp, NULL, dst_ifp: dst_if, m,
5474 cksum_op: CHECKSUM_OPERATION_FINALIZE);
5475 }
5476
5477 return error;
5478}
5479
5480static void
5481bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5482{
5483 struct ether_header *eh;
5484 bool is_ipv4;
5485 uint32_t sw_csum, hwcap;
5486 uint32_t did_sw;
5487 uint32_t csum_flags;
5488
5489 eh = mtod(m, struct ether_header *);
5490 if (!ether_header_type_is_ip(eh, is_ipv4: &is_ipv4)) {
5491 return;
5492 }
5493
5494 /* do in software what the hardware cannot */
5495 hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5496 csum_flags = m->m_pkthdr.csum_flags;
5497 sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5498 sw_csum &= IF_HWASSIST_CSUM_MASK;
5499
5500 if (is_ipv4) {
5501 if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5502 (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5503 if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5504 uint16_t start =
5505 sizeof(*eh) + sizeof(struct ip);
5506 uint16_t ulpoff =
5507 m->m_pkthdr.csum_data & 0xffff;
5508 m->m_pkthdr.csum_flags |=
5509 (CSUM_DATA_VALID | CSUM_PARTIAL);
5510 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5511 m->m_pkthdr.csum_tx_start = start;
5512 } else {
5513 sw_csum |= (CSUM_DELAY_DATA &
5514 m->m_pkthdr.csum_flags);
5515 }
5516 }
5517 did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5518 } else {
5519 if ((hwcap & CSUM_PARTIAL) &&
5520 !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5521 (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5522 if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5523 uint16_t start =
5524 sizeof(*eh) + sizeof(struct ip6_hdr);
5525 uint16_t ulpoff =
5526 m->m_pkthdr.csum_data & 0xffff;
5527 m->m_pkthdr.csum_flags |=
5528 (CSUM_DATA_VALID | CSUM_PARTIAL);
5529 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5530 m->m_pkthdr.csum_tx_start = start;
5531 } else {
5532 sw_csum |= (CSUM_DELAY_IPV6_DATA &
5533 m->m_pkthdr.csum_flags);
5534 }
5535 }
5536 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5537 }
5538 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5539 "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5540 ifp->if_xname, csum_flags, hwcap, sw_csum,
5541 did_sw, m->m_pkthdr.csum_flags);
5542}
5543
5544/*
5545 * bridge_start:
5546 *
5547 * Start output on a bridge.
5548 *
5549 * This routine is invoked by the start worker thread; because we never call
5550 * it directly, there is no need do deploy any serialization mechanism other
5551 * than what's already used by the worker thread, i.e. this is already single
5552 * threaded.
5553 *
5554 * This routine is called only when if_bridge_txstart is enabled.
5555 */
5556static void
5557bridge_start(struct ifnet *ifp)
5558{
5559 struct mbuf *m;
5560
5561 for (;;) {
5562 if (ifnet_dequeue(interface: ifp, packet: &m) != 0) {
5563 break;
5564 }
5565
5566 (void) bridge_output(ifp, m);
5567 }
5568}
5569
5570/*
5571 * bridge_forward:
5572 *
5573 * The forwarding function of the bridge.
5574 *
5575 * NOTE: Releases the lock on return.
5576 */
5577static void
5578bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
5579 struct mbuf *m)
5580{
5581 struct bridge_iflist *dbif;
5582 ifnet_t bridge_ifp;
5583 struct ifnet *src_if, *dst_if;
5584 struct ether_header *eh;
5585 uint16_t vlan;
5586 uint8_t *dst;
5587 int error;
5588 struct mac_nat_record mnr;
5589 bool translate_mac = FALSE;
5590 uint32_t sc_filter_flags = 0;
5591
5592 BRIDGE_LOCK_ASSERT_HELD(sc);
5593
5594 bridge_ifp = sc->sc_ifp;
5595 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5596 "%s m 0x%llx", bridge_ifp->if_xname,
5597 (uint64_t)VM_KERNEL_ADDRPERM(m));
5598
5599 src_if = m->m_pkthdr.rcvif;
5600 if (src_if != sbif->bif_ifp) {
5601 const char * src_if_name;
5602
5603 src_if_name = (src_if != NULL) ? src_if->if_xname : "?";
5604 BRIDGE_LOG(LOG_NOTICE, 0,
5605 "src_if %s != bif_ifp %s",
5606 src_if_name, sbif->bif_ifp->if_xname);
5607 goto drop;
5608 }
5609
5610 (void) ifnet_stat_increment_in(interface: bridge_ifp, packets_in: 1, bytes_in: m->m_pkthdr.len, errors_in: 0);
5611 vlan = VLANTAGOF(m);
5612
5613
5614 if ((sbif->bif_ifflags & IFBIF_STP) &&
5615 sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5616 goto drop;
5617 }
5618
5619 eh = mtod(m, struct ether_header *);
5620 dst = eh->ether_dhost;
5621
5622 /* If the interface is learning, record the address. */
5623 if (sbif->bif_ifflags & IFBIF_LEARNING) {
5624 error = bridge_rtupdate(sc, eh->ether_shost, vlan,
5625 sbif, 0, IFBAF_DYNAMIC);
5626 /*
5627 * If the interface has addresses limits then deny any source
5628 * that is not in the cache.
5629 */
5630 if (error && sbif->bif_addrmax) {
5631 goto drop;
5632 }
5633 }
5634
5635 if ((sbif->bif_ifflags & IFBIF_STP) != 0 &&
5636 sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
5637 goto drop;
5638 }
5639
5640 /*
5641 * At this point, the port either doesn't participate
5642 * in spanning tree or it is in the forwarding state.
5643 */
5644
5645 /*
5646 * If the packet is unicast, destined for someone on
5647 * "this" side of the bridge, drop it.
5648 */
5649 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5650 /* unicast */
5651 dst_if = bridge_rtlookup(sc, dst, vlan);
5652 if (src_if == dst_if) {
5653 goto drop;
5654 }
5655 } else {
5656 /* broadcast/multicast */
5657
5658 /*
5659 * Check if its a reserved multicast address, any address
5660 * listed in 802.1D section 7.12.6 may not be forwarded by the
5661 * bridge.
5662 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
5663 */
5664 if (dst[0] == 0x01 && dst[1] == 0x80 &&
5665 dst[2] == 0xc2 && dst[3] == 0x00 &&
5666 dst[4] == 0x00 && dst[5] <= 0x0f) {
5667 goto drop;
5668 }
5669
5670
5671 /* ...forward it to all interfaces. */
5672 os_atomic_inc(&bridge_ifp->if_imcasts, relaxed);
5673 dst_if = NULL;
5674 }
5675
5676 /*
5677 * If we have a destination interface which is a member of our bridge,
5678 * OR this is a unicast packet, push it through the bpf(4) machinery.
5679 * For broadcast or multicast packets, don't bother because it will
5680 * be reinjected into ether_input. We do this before we pass the packets
5681 * through the pfil(9) framework, as it is possible that pfil(9) will
5682 * drop the packet, or possibly modify it, making it difficult to debug
5683 * firewall issues on the bridge.
5684 */
5685#if NBPFILTER > 0
5686 if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
5687 dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5688 m->m_pkthdr.rcvif = bridge_ifp;
5689 BRIDGE_BPF_MTAP_INPUT(sc, m);
5690 }
5691#endif /* NBPFILTER */
5692
5693 if (dst_if == NULL) {
5694 /* bridge_broadcast will unlock */
5695 bridge_broadcast(sc, sbif, m, 1);
5696 return;
5697 }
5698
5699 /*
5700 * Unicast.
5701 */
5702 /*
5703 * At this point, we're dealing with a unicast frame
5704 * going to a different interface.
5705 */
5706 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5707 goto drop;
5708 }
5709
5710 dbif = bridge_lookup_member_if(sc, member_ifp: dst_if);
5711 if (dbif == NULL) {
5712 /* Not a member of the bridge (anymore?) */
5713 goto drop;
5714 }
5715
5716 /* Private segments can not talk to each other */
5717 if (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) {
5718 goto drop;
5719 }
5720
5721 if ((dbif->bif_ifflags & IFBIF_STP) &&
5722 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5723 goto drop;
5724 }
5725
5726#if HAS_DHCPRA_MASK
5727 /* APPLE MODIFICATION <rdar:6985737> */
5728 if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) {
5729 m = ip_xdhcpra_output(dst_if, m);
5730 if (!m) {
5731 ++bridge_ifp.if_xdhcpra;
5732 BRIDGE_UNLOCK(sc);
5733 return;
5734 }
5735 }
5736#endif /* HAS_DHCPRA_MASK */
5737
5738 if (dbif == sc->sc_mac_nat_bif) {
5739 /* determine how to translate the packet */
5740 translate_mac
5741 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
5742 if (m == NULL) {
5743 /* packet was deallocated */
5744 BRIDGE_UNLOCK(sc);
5745 return;
5746 }
5747 } else if (bif_has_checksum_offload(bif: dbif) &&
5748 !bif_has_checksum_offload(bif: sbif)) {
5749 /*
5750 * If the destination interface has checksum offload enabled,
5751 * verify the checksum now, unless the source interface also has
5752 * checksum offload enabled. The checksum in that case has
5753 * already just been computed and verifying it is unnecessary.
5754 */
5755 error = bridge_verify_checksum(mp: &m, stats_p: &dbif->bif_stats);
5756 if (error != 0) {
5757 BRIDGE_UNLOCK(sc);
5758 if (m != NULL) {
5759 m_freem(m);
5760 }
5761 return;
5762 }
5763 }
5764
5765 sc_filter_flags = sc->sc_filter_flags;
5766
5767 BRIDGE_UNLOCK(sc);
5768 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5769 if (bridge_pf(&m, dst_if, sc_filter_flags, FALSE) != 0) {
5770 return;
5771 }
5772 if (m == NULL) {
5773 return;
5774 }
5775 }
5776
5777 /* if we need to, translate the MAC address */
5778 if (translate_mac) {
5779 bridge_mac_nat_translate(&m, &mnr, IF_LLADDR(dst_if));
5780 }
5781 /*
5782 * We're forwarding an inbound packet in which the checksum must
5783 * already have been computed and if required, verified.
5784 */
5785 if (m != NULL) {
5786 (void) bridge_enqueue(bridge_ifp, src_ifp: src_if, dst_ifp: dst_if, m,
5787 cksum_op: CHECKSUM_OPERATION_CLEAR_OFFLOAD);
5788 }
5789 return;
5790
5791drop:
5792 BRIDGE_UNLOCK(sc);
5793 m_freem(m);
5794}
5795
5796static void
5797inject_input_packet(ifnet_t ifp, mbuf_t m)
5798{
5799 mbuf_pkthdr_setrcvif(mbuf: m, ifp);
5800 mbuf_pkthdr_setheader(mbuf: m, header: mbuf_data(mbuf: m));
5801 mbuf_setdata(mbuf: m, data: (char *)mbuf_data(mbuf: m) + ETHER_HDR_LEN,
5802 len: mbuf_len(mbuf: m) - ETHER_HDR_LEN);
5803 mbuf_pkthdr_adjustlen(mbuf: m, amount: -ETHER_HDR_LEN);
5804 m->m_flags |= M_PROTO1; /* set to avoid loops */
5805 dlil_input_packet_list(ifp, m);
5806 return;
5807}
5808
5809static bool
5810in_addr_is_ours(struct in_addr ip)
5811{
5812 struct in_ifaddr *ia;
5813 bool ours = false;
5814
5815 lck_rw_lock_shared(lck: &in_ifaddr_rwlock);
5816 TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5817 if (IA_SIN(ia)->sin_addr.s_addr == ip.s_addr) {
5818 ours = true;
5819 break;
5820 }
5821 }
5822 lck_rw_done(lck: &in_ifaddr_rwlock);
5823 return ours;
5824}
5825
5826static bool
5827in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5828{
5829 struct in6_ifaddr *ia6;
5830 bool ours = false;
5831
5832 if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5833 struct in6_addr dst_ip;
5834
5835 /* need to embed scope ID for comparison */
5836 bcopy(src: ip6_p, dst: &dst_ip, n: sizeof(dst_ip));
5837 dst_ip.s6_addr16[1] = htons(ifscope);
5838 ip6_p = &dst_ip;
5839 }
5840 lck_rw_lock_shared(lck: &in6_ifaddr_rwlock);
5841 TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5842 if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5843 ia6->ia_addr.sin6_scope_id, ifscope)) {
5844 ours = true;
5845 break;
5846 }
5847 }
5848 lck_rw_done(lck: &in6_ifaddr_rwlock);
5849 return ours;
5850}
5851
5852static void
5853bridge_interface_input(ifnet_t bridge_ifp, mbuf_t m,
5854 bpf_packet_func bpf_input_func)
5855{
5856 size_t byte_count;
5857 struct ether_header *eh;
5858 errno_t error;
5859 bool is_ipv4;
5860 int len;
5861 u_int mac_hlen;
5862 int pkt_count;
5863
5864 /* segment large packets before sending them up */
5865 if (if_bridge_segmentation == 0) {
5866 goto done;
5867 }
5868 len = m->m_pkthdr.len;
5869 if (len <= (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5870 goto done;
5871 }
5872 eh = mtod(m, struct ether_header *);
5873 if (!ether_header_type_is_ip(eh, is_ipv4: &is_ipv4)) {
5874 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5875 "large non IPv4/IPv6 packet");
5876 goto done;
5877 }
5878
5879 /*
5880 * We have a large IPv4/IPv6 TCP packet. Segment it if required.
5881 *
5882 * If gso_tcp() returns success (0), the packet(s) are
5883 * ready to be passed up. If the destination is a local IP address,
5884 * the packet will be passed up as a large, single packet.
5885 *
5886 * If gso_tcp() returns an error, the packet has already
5887 * been freed.
5888 */
5889 mac_hlen = sizeof(*eh);
5890 error = gso_tcp(ifp: bridge_ifp, mp: &m, mac_hlen, is_ipv4, FALSE);
5891 if (error != 0) {
5892 return;
5893 }
5894
5895done:
5896 pkt_count = 0;
5897 byte_count = 0;
5898 for (mbuf_t scan = m; scan != NULL; scan = scan->m_nextpkt) {
5899 /* Mark the packet as arriving on the bridge interface */
5900 mbuf_pkthdr_setrcvif(mbuf: scan, ifp: bridge_ifp);
5901 mbuf_pkthdr_setheader(mbuf: scan, header: mbuf_data(mbuf: scan));
5902 if (bpf_input_func != NULL) {
5903 (*bpf_input_func)(bridge_ifp, scan);
5904 }
5905 mbuf_setdata(mbuf: scan, data: (char *)mbuf_data(mbuf: scan) + ETHER_HDR_LEN,
5906 len: mbuf_len(mbuf: scan) - ETHER_HDR_LEN);
5907 mbuf_pkthdr_adjustlen(mbuf: scan, amount: -ETHER_HDR_LEN);
5908 byte_count += mbuf_pkthdr_len(mbuf: scan);
5909 pkt_count++;
5910 }
5911 (void)ifnet_stat_increment_in(interface: bridge_ifp, packets_in: pkt_count, bytes_in: byte_count, errors_in: 0);
5912 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5913 "%s %d packet(s) %ld bytes",
5914 bridge_ifp->if_xname, pkt_count, byte_count);
5915 dlil_input_packet_list(bridge_ifp, m);
5916 return;
5917}
5918
5919static bool
5920is_our_ip(ip_packet_info_t info_p, uint32_t ifscope)
5921{
5922 bool ours;
5923
5924 if (info_p->ip_is_ipv4) {
5925 struct in_addr dst_ip;
5926
5927 bcopy(src: &info_p->ip_hdr.ip->ip_dst, dst: &dst_ip, n: sizeof(dst_ip));
5928 ours = in_addr_is_ours(ip: dst_ip);
5929 } else {
5930 ours = in6_addr_is_ours(ip6_p: &info_p->ip_hdr.ip6->ip6_dst, ifscope);
5931 }
5932 return ours;
5933}
5934
5935static inline errno_t
5936bridge_vmnet_tag_input(ifnet_t bridge_ifp, ifnet_t ifp,
5937 const u_char * ether_dhost, mbuf_t *mp,
5938 bool is_broadcast, bool is_ip, bool is_ipv4,
5939 ip_packet_info * info_p, struct bripstats * stats_p,
5940 bool *info_initialized)
5941{
5942 errno_t error = 0;
5943 bool is_local = false;
5944 struct pf_mtag *pf_mtag;
5945 u_int16_t tag = vmnet_tag;
5946
5947 *info_initialized = false;
5948 if (is_broadcast) {
5949 if (_ether_cmp(a: ether_dhost, b: etherbroadcastaddr) == 0) {
5950 tag = vmnet_broadcast_tag;
5951 } else {
5952 tag = vmnet_multicast_tag;
5953 }
5954 } else if (is_ip) {
5955 unsigned int mac_hlen = sizeof(struct ether_header);
5956
5957 bzero(s: stats_p, n: sizeof(*stats_p));
5958 *info_initialized = true;
5959 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p,
5960 stats_p);
5961 if (error != 0) {
5962 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_INPUT,
5963 "%s(%s) bridge_get_ip_proto failed %d",
5964 bridge_ifp->if_xname,
5965 ifp->if_xname, error);
5966 if (*mp == NULL) {
5967 return EJUSTRETURN;
5968 }
5969 } else {
5970 is_local = is_our_ip(info_p, ifscope: bridge_ifp->if_index);
5971 if (is_local) {
5972 tag = vmnet_local_tag;
5973 }
5974 }
5975 }
5976 pf_mtag = pf_get_mtag(*mp);
5977 if (pf_mtag != NULL) {
5978 pf_mtag->pftag_tag = tag;
5979 }
5980#if DEBUG || DEVELOPMENT
5981 {
5982 bool forced;
5983
5984 BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_ONE);
5985 if (forced) {
5986 m_freem(*mp);
5987 *mp = NULL;
5988 error = EJUSTRETURN;
5989 goto done;
5990 }
5991 BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_TWO);
5992 if (forced) {
5993 error = _EBADIP;
5994 goto done;
5995 }
5996 }
5997done:
5998#endif /* DEBUG || DEVELOPMENT */
5999 return error;
6000}
6001
6002static void
6003bripstats_apply(struct bripstats *dst_p, const struct bripstats *src_p)
6004{
6005 dst_p->bips_ip += src_p->bips_ip;
6006 dst_p->bips_ip6 += src_p->bips_ip6;
6007 dst_p->bips_udp += src_p->bips_udp;
6008 dst_p->bips_tcp += src_p->bips_tcp;
6009
6010 dst_p->bips_bad_ip += src_p->bips_bad_ip;
6011 dst_p->bips_bad_ip6 += src_p->bips_bad_ip6;
6012 dst_p->bips_bad_udp += src_p->bips_bad_udp;
6013 dst_p->bips_bad_tcp += src_p->bips_bad_tcp;
6014}
6015
6016static void
6017bridge_bripstats_apply(ifnet_t ifp, const struct bripstats *stats_p)
6018{
6019 struct bridge_iflist *bif;
6020 struct bridge_softc *sc = ifp->if_bridge;
6021
6022 BRIDGE_LOCK(sc);
6023 bif = bridge_lookup_member_if(sc, member_ifp: ifp);
6024 if (bif == NULL) {
6025 goto done;
6026 }
6027 if (!bif_has_checksum_offload(bif)) {
6028 goto done;
6029 }
6030 bripstats_apply(dst_p: &bif->bif_stats.brms_in_ip, src_p: stats_p);
6031
6032done:
6033 BRIDGE_UNLOCK(sc);
6034 return;
6035}
6036
6037/*
6038 * bridge_input:
6039 *
6040 * Filter input from a member interface. Queue the packet for
6041 * bridging if it is not for us.
6042 */
6043errno_t
6044bridge_input(struct ifnet *ifp, mbuf_t *data)
6045{
6046 struct bridge_softc *sc = ifp->if_bridge;
6047 struct bridge_iflist *bif, *bif2;
6048 struct ether_header eh_in;
6049 bool is_ip = false;
6050 bool is_ipv4 = false;
6051 ifnet_t bridge_ifp;
6052 struct mbuf *mc, *mc2;
6053 unsigned int mac_hlen = sizeof(struct ether_header);
6054 uint16_t vlan;
6055 errno_t error;
6056 ip_packet_info info;
6057 struct bripstats stats;
6058 bool info_initialized = false;
6059 errno_t ip_packet_error = 0;
6060 bool is_broadcast;
6061 bool is_ip_broadcast = false;
6062 bool is_ifp_mac = false;
6063 mbuf_t m = *data;
6064 uint32_t sc_filter_flags = 0;
6065
6066 bridge_ifp = sc->sc_ifp;
6067 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6068 "%s from %s m 0x%llx data 0x%llx",
6069 bridge_ifp->if_xname, ifp->if_xname,
6070 (uint64_t)VM_KERNEL_ADDRPERM(m),
6071 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
6072 if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
6073 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6074 "%s not running passing along",
6075 bridge_ifp->if_xname);
6076 return 0;
6077 }
6078
6079 vlan = VLANTAGOF(m);
6080
6081#ifdef IFF_MONITOR
6082 /*
6083 * Implement support for bridge monitoring. If this flag has been
6084 * set on this interface, discard the packet once we push it through
6085 * the bpf(4) machinery, but before we do, increment the byte and
6086 * packet counters associated with this interface.
6087 */
6088 if ((bridge_ifp->if_flags & IFF_MONITOR) != 0) {
6089 m->m_pkthdr.rcvif = bridge_ifp;
6090 BRIDGE_BPF_MTAP_INPUT(sc, m);
6091 (void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
6092 *data = NULL;
6093 m_freem(m);
6094 return EJUSTRETURN;
6095 }
6096#endif /* IFF_MONITOR */
6097
6098 is_broadcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
6099
6100 /*
6101 * Need to clear the promiscuous flag otherwise it will be
6102 * dropped by DLIL after processing filters
6103 */
6104 if ((mbuf_flags(mbuf: m) & MBUF_PROMISC)) {
6105 mbuf_setflags_mask(mbuf: m, flags: 0, mask: MBUF_PROMISC);
6106 }
6107
6108 /* copy the ethernet header */
6109 eh_in = *(mtod(m, struct ether_header *));
6110
6111 is_ip = ether_header_type_is_ip(eh: &eh_in, is_ipv4: &is_ipv4);
6112
6113 if (if_bridge_vmnet_pf_tagging != 0 && IFNET_IS_VMNET(ifp)) {
6114 /* tag packets coming from VMNET interfaces */
6115 ip_packet_error = bridge_vmnet_tag_input(bridge_ifp, ifp,
6116 ether_dhost: eh_in.ether_dhost, mp: data, is_broadcast, is_ip, is_ipv4,
6117 info_p: &info, stats_p: &stats, info_initialized: &info_initialized);
6118 m = *data;
6119 if (m == NULL) {
6120 bridge_bripstats_apply(ifp, stats_p: &stats);
6121 return EJUSTRETURN;
6122 }
6123 }
6124
6125 sc_filter_flags = sc->sc_filter_flags;
6126 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6127 error = bridge_pf(data, ifp, sc_filter_flags, TRUE);
6128 m = *data;
6129 if (error != 0 || m == NULL) {
6130 return EJUSTRETURN;
6131 }
6132 }
6133
6134 BRIDGE_LOCK(sc);
6135 bif = bridge_lookup_member_if(sc, member_ifp: ifp);
6136 if (bif == NULL) {
6137 BRIDGE_UNLOCK(sc);
6138 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6139 "%s bridge_lookup_member_if failed",
6140 bridge_ifp->if_xname);
6141 return 0;
6142 }
6143 if (is_ip && bif_has_checksum_offload(bif)) {
6144 if (info_initialized) {
6145 bripstats_apply(dst_p: &bif->bif_stats.brms_in_ip, src_p: &stats);
6146 } else {
6147 error = bridge_get_ip_proto(mp: data, mac_hlen, is_ipv4,
6148 info_p: &info, stats_p: &bif->bif_stats.brms_in_ip);
6149 if (error != 0) {
6150 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6151 "%s(%s) bridge_get_ip_proto failed %d",
6152 bridge_ifp->if_xname,
6153 bif->bif_ifp->if_xname, error);
6154 ip_packet_error = error;
6155 }
6156 }
6157 if (ip_packet_error == 0) {
6158 /* need to compute IP/UDP/TCP/checksums */
6159 error = bridge_offload_checksum(mp: data, info_p: &info,
6160 stats_p: &bif->bif_stats);
6161 if (error != 0) {
6162 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6163 "%s(%s) bridge_offload_checksum failed %d",
6164 bridge_ifp->if_xname,
6165 bif->bif_ifp->if_xname, error);
6166 ip_packet_error = error;
6167 }
6168 }
6169 if (ip_packet_error != 0) {
6170 BRIDGE_UNLOCK(sc);
6171 if (*data != NULL) {
6172 m_freem(*data);
6173 *data = NULL;
6174 }
6175 return EJUSTRETURN;
6176 }
6177 m = *data;
6178 }
6179
6180 if (bif->bif_flags & BIFF_HOST_FILTER) {
6181 error = bridge_host_filter(bif, data);
6182 if (error != 0) {
6183 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6184 "%s bridge_host_filter failed",
6185 bif->bif_ifp->if_xname);
6186 BRIDGE_UNLOCK(sc);
6187 return EJUSTRETURN;
6188 }
6189 m = *data;
6190 }
6191
6192 if (!is_broadcast &&
6193 _ether_cmp(a: eh_in.ether_dhost, IF_LLADDR(ifp)) == 0) {
6194 /* the packet is unicast to the interface's MAC address */
6195 if (is_ip && sc->sc_mac_nat_bif == bif) {
6196 /* doing MAC-NAT, check if destination is IP broadcast */
6197 is_ip_broadcast = is_broadcast_ip_packet(data);
6198 if (*data == NULL) {
6199 BRIDGE_UNLOCK(sc);
6200 return EJUSTRETURN;
6201 }
6202 m = *data;
6203 }
6204 if (!is_ip_broadcast) {
6205 is_ifp_mac = TRUE;
6206 }
6207 }
6208
6209 bridge_span(sc, m);
6210
6211 if (is_broadcast || is_ip_broadcast) {
6212 if (is_broadcast && (m->m_flags & M_MCAST) != 0) {
6213 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6214 " multicast: "
6215 "%02x:%02x:%02x:%02x:%02x:%02x",
6216 eh_in.ether_dhost[0], eh_in.ether_dhost[1],
6217 eh_in.ether_dhost[2], eh_in.ether_dhost[3],
6218 eh_in.ether_dhost[4], eh_in.ether_dhost[5]);
6219 }
6220 /* Tap off 802.1D packets; they do not get forwarded. */
6221 if (is_broadcast &&
6222 _ether_cmp(a: eh_in.ether_dhost, b: bstp_etheraddr) == 0) {
6223#if BRIDGESTP
6224 m = bstp_input(&bif->bif_stp, ifp, m);
6225#else /* !BRIDGESTP */
6226 m_freem(m);
6227 m = NULL;
6228#endif /* !BRIDGESTP */
6229 if (m == NULL) {
6230 BRIDGE_UNLOCK(sc);
6231 return EJUSTRETURN;
6232 }
6233 }
6234
6235 if ((bif->bif_ifflags & IFBIF_STP) &&
6236 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6237 BRIDGE_UNLOCK(sc);
6238 return 0;
6239 }
6240
6241 /*
6242 * Make a deep copy of the packet and enqueue the copy
6243 * for bridge processing.
6244 */
6245 mc = m_dup(m, M_DONTWAIT);
6246 if (mc == NULL) {
6247 BRIDGE_UNLOCK(sc);
6248 return 0;
6249 }
6250
6251 /*
6252 * Perform the bridge forwarding function with the copy.
6253 *
6254 * Note that bridge_forward calls BRIDGE_UNLOCK
6255 */
6256 if (is_ip_broadcast) {
6257 struct ether_header *eh;
6258
6259 /* make the copy look like it is actually broadcast */
6260 mc->m_flags |= M_BCAST;
6261 eh = mtod(mc, struct ether_header *);
6262 bcopy(src: etherbroadcastaddr, dst: eh->ether_dhost,
6263 ETHER_ADDR_LEN);
6264 }
6265 bridge_forward(sc, sbif: bif, m: mc);
6266
6267 /*
6268 * Reinject the mbuf as arriving on the bridge so we have a
6269 * chance at claiming multicast packets. We can not loop back
6270 * here from ether_input as a bridge is never a member of a
6271 * bridge.
6272 */
6273 VERIFY(bridge_ifp->if_bridge == NULL);
6274 mc2 = m_dup(m, M_DONTWAIT);
6275 if (mc2 != NULL) {
6276 /* Keep the layer3 header aligned */
6277 int i = min(a: mc2->m_pkthdr.len, b: max_protohdr);
6278 mc2 = m_copyup(mc2, i, ETHER_ALIGN);
6279 }
6280 if (mc2 != NULL) {
6281 /* mark packet as arriving on the bridge */
6282 mc2->m_pkthdr.rcvif = bridge_ifp;
6283 mc2->m_pkthdr.pkt_hdr = mbuf_data(mbuf: mc2);
6284 BRIDGE_BPF_MTAP_INPUT(sc, mc2);
6285 (void) mbuf_setdata(mbuf: mc2,
6286 data: (char *)mbuf_data(mbuf: mc2) + ETHER_HDR_LEN,
6287 len: mbuf_len(mbuf: mc2) - ETHER_HDR_LEN);
6288 (void) mbuf_pkthdr_adjustlen(mbuf: mc2, amount: -ETHER_HDR_LEN);
6289 (void) ifnet_stat_increment_in(interface: bridge_ifp, packets_in: 1,
6290 bytes_in: mbuf_pkthdr_len(mbuf: mc2), errors_in: 0);
6291 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6292 "%s mcast for us", bridge_ifp->if_xname);
6293 dlil_input_packet_list(bridge_ifp, mc2);
6294 }
6295
6296 /* Return the original packet for local processing. */
6297 return 0;
6298 }
6299
6300 if ((bif->bif_ifflags & IFBIF_STP) &&
6301 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6302 BRIDGE_UNLOCK(sc);
6303 return 0;
6304 }
6305
6306#ifdef DEV_CARP
6307#define CARP_CHECK_WE_ARE_DST(iface) \
6308 ((iface)->if_carp &&\
6309 carp_forus((iface)->if_carp, eh_in.ether_dhost))
6310#define CARP_CHECK_WE_ARE_SRC(iface) \
6311 ((iface)->if_carp &&\
6312 carp_forus((iface)->if_carp, eh_in.ether_shost))
6313#else
6314#define CARP_CHECK_WE_ARE_DST(iface) 0
6315#define CARP_CHECK_WE_ARE_SRC(iface) 0
6316#endif
6317
6318#define PFIL_HOOKED_INET6 PFIL_HOOKED(&inet6_pfil_hook)
6319
6320#define PFIL_PHYS(sc, ifp, m)
6321
6322#define GRAB_OUR_PACKETS(iface) \
6323 if ((iface)->if_type == IFT_GIF) \
6324 continue; \
6325 /* It is destined for us. */ \
6326 if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_dhost) == 0 || \
6327 CARP_CHECK_WE_ARE_DST((iface))) { \
6328 if ((iface)->if_type == IFT_BRIDGE) { \
6329 BRIDGE_BPF_MTAP_INPUT(sc, m); \
6330 /* Filter on the physical interface. */ \
6331 PFIL_PHYS(sc, iface, m); \
6332 } else { \
6333 bpf_tap_in(iface, DLT_EN10MB, m, NULL, 0); \
6334 } \
6335 if (bif->bif_ifflags & IFBIF_LEARNING) { \
6336 error = bridge_rtupdate(sc, eh_in.ether_shost, \
6337 vlan, bif, 0, IFBAF_DYNAMIC); \
6338 if (error && bif->bif_addrmax) { \
6339 BRIDGE_UNLOCK(sc); \
6340 m_freem(m); \
6341 return (EJUSTRETURN); \
6342 } \
6343 } \
6344 BRIDGE_UNLOCK(sc); \
6345 inject_input_packet(iface, m); \
6346 return (EJUSTRETURN); \
6347 } \
6348 \
6349 /* We just received a packet that we sent out. */ \
6350 if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_shost) == 0 || \
6351 CARP_CHECK_WE_ARE_SRC((iface))) { \
6352 BRIDGE_UNLOCK(sc); \
6353 m_freem(m); \
6354 return (EJUSTRETURN); \
6355 }
6356
6357 /*
6358 * Unicast.
6359 */
6360
6361 /* handle MAC-NAT if enabled */
6362 if (is_ifp_mac && sc->sc_mac_nat_bif == bif) {
6363 ifnet_t dst_if;
6364 boolean_t is_input = FALSE;
6365
6366 dst_if = bridge_mac_nat_input(sc, data, &is_input);
6367 m = *data;
6368 if (dst_if == ifp) {
6369 /* our input packet */
6370 } else if (dst_if != NULL || m == NULL) {
6371 BRIDGE_UNLOCK(sc);
6372 if (dst_if != NULL) {
6373 ASSERT(m != NULL);
6374 if (is_input) {
6375 inject_input_packet(ifp: dst_if, m);
6376 } else {
6377 (void)bridge_enqueue(bridge_ifp, NULL,
6378 dst_ifp: dst_if, m,
6379 cksum_op: CHECKSUM_OPERATION_CLEAR_OFFLOAD);
6380 }
6381 }
6382 return EJUSTRETURN;
6383 }
6384 }
6385
6386 /*
6387 * If the packet is for the bridge, pass it up for local processing.
6388 */
6389 if (_ether_cmp(a: eh_in.ether_dhost, IF_LLADDR(bridge_ifp)) == 0 ||
6390 CARP_CHECK_WE_ARE_DST(bridge_ifp)) {
6391 bpf_packet_func bpf_input_func = sc->sc_bpf_input;
6392
6393 /*
6394 * If the interface is learning, and the source
6395 * address is valid and not multicast, record
6396 * the address.
6397 */
6398 if (bif->bif_ifflags & IFBIF_LEARNING) {
6399 (void) bridge_rtupdate(sc, eh_in.ether_shost,
6400 vlan, bif, 0, IFBAF_DYNAMIC);
6401 }
6402 BRIDGE_UNLOCK(sc);
6403
6404 bridge_interface_input(bridge_ifp, m, bpf_input_func);
6405 return EJUSTRETURN;
6406 }
6407
6408 /*
6409 * if the destination of the packet is for the MAC address of
6410 * the member interface itself, then we don't need to forward
6411 * it -- just pass it back. Note that it'll likely just be
6412 * dropped by the stack, but if something else is bound to
6413 * the interface directly (for example, the wireless stats
6414 * protocol -- although that actually uses BPF right now),
6415 * then it will consume the packet
6416 *
6417 * ALSO, note that we do this check AFTER checking for the
6418 * bridge's own MAC address, because the bridge may be
6419 * using the SAME MAC address as one of its interfaces
6420 */
6421 if (is_ifp_mac) {
6422
6423#ifdef VERY_VERY_VERY_DIAGNOSTIC
6424 BRIDGE_LOG(LOG_NOTICE, 0,
6425 "not forwarding packet bound for member interface");
6426#endif
6427
6428 BRIDGE_UNLOCK(sc);
6429 return 0;
6430 }
6431
6432 /* Now check the remaining bridge members. */
6433 TAILQ_FOREACH(bif2, &sc->sc_iflist, bif_next) {
6434 if (bif2->bif_ifp != ifp) {
6435 GRAB_OUR_PACKETS(bif2->bif_ifp);
6436 }
6437 }
6438
6439#undef CARP_CHECK_WE_ARE_DST
6440#undef CARP_CHECK_WE_ARE_SRC
6441#undef GRAB_OUR_PACKETS
6442
6443 /*
6444 * Perform the bridge forwarding function.
6445 *
6446 * Note that bridge_forward calls BRIDGE_UNLOCK
6447 */
6448 bridge_forward(sc, sbif: bif, m);
6449
6450 return EJUSTRETURN;
6451}
6452
6453/*
6454 * bridge_broadcast:
6455 *
6456 * Send a frame to all interfaces that are members of
6457 * the bridge, except for the one on which the packet
6458 * arrived.
6459 *
6460 * NOTE: Releases the lock on return.
6461 */
6462static void
6463bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6464 struct mbuf *m, int runfilt)
6465{
6466 ifnet_t bridge_ifp;
6467 struct bridge_iflist *dbif;
6468 struct ifnet * src_if;
6469 struct mbuf *mc;
6470 struct mbuf *mc_in;
6471 struct ifnet *dst_if;
6472 int error = 0, used = 0;
6473 boolean_t bridge_if_out;
6474 ChecksumOperation cksum_op;
6475 struct mac_nat_record mnr;
6476 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6477 boolean_t translate_mac = FALSE;
6478 uint32_t sc_filter_flags = 0;
6479
6480 bridge_ifp = sc->sc_ifp;
6481 if (sbif != NULL) {
6482 bridge_if_out = FALSE;
6483 src_if = sbif->bif_ifp;
6484 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6485 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6486 /* get the translation record while holding the lock */
6487 translate_mac
6488 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6489 if (m == NULL) {
6490 /* packet was deallocated */
6491 BRIDGE_UNLOCK(sc);
6492 return;
6493 }
6494 }
6495 } else {
6496 /*
6497 * sbif is NULL when the bridge interface calls
6498 * bridge_broadcast().
6499 */
6500 bridge_if_out = TRUE;
6501 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6502 sbif = NULL;
6503 src_if = NULL;
6504 }
6505
6506 BRIDGE_LOCK2REF(sc, error);
6507 if (error) {
6508 m_freem(m);
6509 return;
6510 }
6511
6512 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6513 dst_if = dbif->bif_ifp;
6514 if (dst_if == src_if) {
6515 /* skip the interface that the packet came in on */
6516 continue;
6517 }
6518
6519 /* Private segments can not talk to each other */
6520 if (sbif != NULL &&
6521 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6522 continue;
6523 }
6524
6525 if ((dbif->bif_ifflags & IFBIF_STP) &&
6526 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6527 continue;
6528 }
6529
6530 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6531 (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
6532 continue;
6533 }
6534
6535 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6536 continue;
6537 }
6538
6539 if (!(dbif->bif_flags & BIFF_MEDIA_ACTIVE)) {
6540 continue;
6541 }
6542
6543 if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6544 mc = m;
6545 used = 1;
6546 } else {
6547 mc = m_dup(m, M_DONTWAIT);
6548 if (mc == NULL) {
6549 (void) ifnet_stat_increment_out(interface: bridge_ifp,
6550 packets_out: 0, bytes_out: 0, errors_out: 1);
6551 continue;
6552 }
6553 }
6554
6555 /*
6556 * If broadcast input is enabled, do so only if this
6557 * is an input packet.
6558 */
6559 if (!bridge_if_out &&
6560 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6561 mc_in = m_dup(m: mc, M_DONTWAIT);
6562 /* this could fail, but we continue anyways */
6563 } else {
6564 mc_in = NULL;
6565 }
6566
6567 /* out */
6568 if (translate_mac && mac_nat_bif == dbif) {
6569 /* translate the packet without holding the lock */
6570 bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6571 }
6572
6573 sc_filter_flags = sc->sc_filter_flags;
6574 if (runfilt &&
6575 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6576 if (used == 0) {
6577 /* Keep the layer3 header aligned */
6578 int i = min(a: mc->m_pkthdr.len, b: max_protohdr);
6579 mc = m_copyup(mc, i, ETHER_ALIGN);
6580 if (mc == NULL) {
6581 (void) ifnet_stat_increment_out(
6582 interface: sc->sc_ifp, packets_out: 0, bytes_out: 0, errors_out: 1);
6583 if (mc_in != NULL) {
6584 m_freem(mc_in);
6585 mc_in = NULL;
6586 }
6587 continue;
6588 }
6589 }
6590 if (bridge_pf(&mc, dst_if, sc_filter_flags, FALSE) != 0) {
6591 if (mc_in != NULL) {
6592 m_freem(mc_in);
6593 mc_in = NULL;
6594 }
6595 continue;
6596 }
6597 if (mc == NULL) {
6598 if (mc_in != NULL) {
6599 m_freem(mc_in);
6600 mc_in = NULL;
6601 }
6602 continue;
6603 }
6604 }
6605
6606 if (mc != NULL) {
6607 /* verify checksum if necessary */
6608 if (bif_has_checksum_offload(bif: dbif) && sbif != NULL &&
6609 !bif_has_checksum_offload(bif: sbif)) {
6610 error = bridge_verify_checksum(mp: &mc,
6611 stats_p: &dbif->bif_stats);
6612 if (error != 0) {
6613 if (mc != NULL) {
6614 m_freem(mc);
6615 }
6616 mc = NULL;
6617 }
6618 }
6619 if (mc != NULL) {
6620 (void) bridge_enqueue(bridge_ifp,
6621 NULL, dst_ifp: dst_if, m: mc, cksum_op);
6622 }
6623 }
6624
6625 /* in */
6626 if (mc_in == NULL) {
6627 continue;
6628 }
6629 bpf_tap_in(interface: dst_if, DLT_EN10MB, packet: mc_in, NULL, header_len: 0);
6630 mbuf_pkthdr_setrcvif(mbuf: mc_in, ifp: dst_if);
6631 mbuf_pkthdr_setheader(mbuf: mc_in, header: mbuf_data(mbuf: mc_in));
6632 mbuf_setdata(mbuf: mc_in, data: (char *)mbuf_data(mbuf: mc_in) + ETHER_HDR_LEN,
6633 len: mbuf_len(mbuf: mc_in) - ETHER_HDR_LEN);
6634 mbuf_pkthdr_adjustlen(mbuf: mc_in, amount: -ETHER_HDR_LEN);
6635 mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6636 dlil_input_packet_list(dst_if, mc_in);
6637 }
6638 if (used == 0) {
6639 m_freem(m);
6640 }
6641
6642
6643 BRIDGE_UNREF(sc);
6644}
6645
6646/*
6647 * bridge_span:
6648 *
6649 * Duplicate a packet out one or more interfaces that are in span mode,
6650 * the original mbuf is unmodified.
6651 */
6652static void
6653bridge_span(struct bridge_softc *sc, struct mbuf *m)
6654{
6655 struct bridge_iflist *bif;
6656 struct ifnet *dst_if;
6657 struct mbuf *mc;
6658
6659 if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6660 return;
6661 }
6662
6663 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6664 dst_if = bif->bif_ifp;
6665
6666 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6667 continue;
6668 }
6669
6670 mc = m_copypacket(m, M_DONTWAIT);
6671 if (mc == NULL) {
6672 (void) ifnet_stat_increment_out(interface: sc->sc_ifp, packets_out: 0, bytes_out: 0, errors_out: 1);
6673 continue;
6674 }
6675
6676 (void) bridge_enqueue(bridge_ifp: sc->sc_ifp, NULL, dst_ifp: dst_if, m: mc,
6677 cksum_op: CHECKSUM_OPERATION_NONE);
6678 }
6679}
6680
6681
6682/*
6683 * bridge_rtupdate:
6684 *
6685 * Add a bridge routing entry.
6686 */
6687static int
6688bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
6689 struct bridge_iflist *bif, int setflags, uint8_t flags)
6690{
6691 struct bridge_rtnode *brt;
6692 int error;
6693
6694 BRIDGE_LOCK_ASSERT_HELD(sc);
6695
6696 /* Check the source address is valid and not multicast. */
6697 if (ETHER_IS_MULTICAST(dst) ||
6698 (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6699 dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6700 return EINVAL;
6701 }
6702
6703
6704 /* 802.1p frames map to vlan 1 */
6705 if (vlan == 0) {
6706 vlan = 1;
6707 }
6708
6709 /*
6710 * A route for this destination might already exist. If so,
6711 * update it, otherwise create a new one.
6712 */
6713 if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6714 if (sc->sc_brtcnt >= sc->sc_brtmax) {
6715 sc->sc_brtexceeded++;
6716 return ENOSPC;
6717 }
6718 /* Check per interface address limits (if enabled) */
6719 if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6720 bif->bif_addrexceeded++;
6721 return ENOSPC;
6722 }
6723
6724 /*
6725 * Allocate a new bridge forwarding node, and
6726 * initialize the expiration time and Ethernet
6727 * address.
6728 */
6729 brt = zalloc_noblock(kt_view: bridge_rtnode_pool);
6730 if (brt == NULL) {
6731 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6732 "zalloc_nolock failed");
6733 return ENOMEM;
6734 }
6735 bzero(s: brt, n: sizeof(struct bridge_rtnode));
6736
6737 if (bif->bif_ifflags & IFBIF_STICKY) {
6738 brt->brt_flags = IFBAF_STICKY;
6739 } else {
6740 brt->brt_flags = IFBAF_DYNAMIC;
6741 }
6742
6743 memcpy(dst: brt->brt_addr, src: dst, ETHER_ADDR_LEN);
6744 brt->brt_vlan = vlan;
6745
6746
6747 if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6748 zfree(bridge_rtnode_pool, brt);
6749 return error;
6750 }
6751 brt->brt_dst = bif;
6752 bif->bif_addrcnt++;
6753 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6754 "added %02x:%02x:%02x:%02x:%02x:%02x "
6755 "on %s count %u hashsize %u",
6756 dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6757 sc->sc_ifp->if_xname, sc->sc_brtcnt,
6758 sc->sc_rthash_size);
6759 }
6760
6761 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6762 brt->brt_dst != bif) {
6763 brt->brt_dst->bif_addrcnt--;
6764 brt->brt_dst = bif;
6765 brt->brt_dst->bif_addrcnt++;
6766 }
6767
6768 if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6769 unsigned long now;
6770
6771 now = (unsigned long) net_uptime();
6772 brt->brt_expire = now + sc->sc_brttimeout;
6773 }
6774 if (setflags) {
6775 brt->brt_flags = flags;
6776 }
6777
6778
6779 return 0;
6780}
6781
6782/*
6783 * bridge_rtlookup:
6784 *
6785 * Lookup the destination interface for an address.
6786 */
6787static struct ifnet *
6788bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6789{
6790 struct bridge_rtnode *brt;
6791
6792 BRIDGE_LOCK_ASSERT_HELD(sc);
6793
6794 if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6795 return NULL;
6796 }
6797
6798 return brt->brt_ifp;
6799}
6800
6801/*
6802 * bridge_rttrim:
6803 *
6804 * Trim the routine table so that we have a number
6805 * of routing entries less than or equal to the
6806 * maximum number.
6807 */
6808static void
6809bridge_rttrim(struct bridge_softc *sc)
6810{
6811 struct bridge_rtnode *brt, *nbrt;
6812
6813 BRIDGE_LOCK_ASSERT_HELD(sc);
6814
6815 /* Make sure we actually need to do this. */
6816 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6817 return;
6818 }
6819
6820 /* Force an aging cycle; this might trim enough addresses. */
6821 bridge_rtage(sc);
6822 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6823 return;
6824 }
6825
6826 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6827 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6828 bridge_rtnode_destroy(sc, brt);
6829 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6830 return;
6831 }
6832 }
6833 }
6834}
6835
6836/*
6837 * bridge_aging_timer:
6838 *
6839 * Aging periodic timer for the bridge routing table.
6840 */
6841static void
6842bridge_aging_timer(struct bridge_softc *sc)
6843{
6844 BRIDGE_LOCK_ASSERT_HELD(sc);
6845
6846 bridge_rtage(sc);
6847 if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6848 (sc->sc_flags & SCF_DETACHING) == 0) {
6849 sc->sc_aging_timer.bdc_sc = sc;
6850 sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6851 sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6852 bridge_schedule_delayed_call(call: &sc->sc_aging_timer);
6853 }
6854}
6855
6856/*
6857 * bridge_rtage:
6858 *
6859 * Perform an aging cycle.
6860 */
6861static void
6862bridge_rtage(struct bridge_softc *sc)
6863{
6864 struct bridge_rtnode *brt, *nbrt;
6865 unsigned long now;
6866
6867 BRIDGE_LOCK_ASSERT_HELD(sc);
6868
6869 now = (unsigned long) net_uptime();
6870
6871 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6872 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6873 if (now >= brt->brt_expire) {
6874 bridge_rtnode_destroy(sc, brt);
6875 }
6876 }
6877 }
6878 if (sc->sc_mac_nat_bif != NULL) {
6879 bridge_mac_nat_age_entries(sc, now);
6880 }
6881}
6882
6883/*
6884 * bridge_rtflush:
6885 *
6886 * Remove all dynamic addresses from the bridge.
6887 */
6888static void
6889bridge_rtflush(struct bridge_softc *sc, int full)
6890{
6891 struct bridge_rtnode *brt, *nbrt;
6892
6893 BRIDGE_LOCK_ASSERT_HELD(sc);
6894
6895 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6896 if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6897 bridge_rtnode_destroy(sc, brt);
6898 }
6899 }
6900}
6901
6902/*
6903 * bridge_rtdaddr:
6904 *
6905 * Remove an address from the table.
6906 */
6907static int
6908bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6909{
6910 struct bridge_rtnode *brt;
6911 int found = 0;
6912
6913 BRIDGE_LOCK_ASSERT_HELD(sc);
6914
6915 /*
6916 * If vlan is zero then we want to delete for all vlans so the lookup
6917 * may return more than one.
6918 */
6919 while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6920 bridge_rtnode_destroy(sc, brt);
6921 found = 1;
6922 }
6923
6924 return found ? 0 : ENOENT;
6925}
6926
6927/*
6928 * bridge_rtdelete:
6929 *
6930 * Delete routes to a specific member interface.
6931 */
6932static void
6933bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6934{
6935 struct bridge_rtnode *brt, *nbrt;
6936
6937 BRIDGE_LOCK_ASSERT_HELD(sc);
6938
6939 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6940 if (brt->brt_ifp == ifp && (full ||
6941 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6942 bridge_rtnode_destroy(sc, brt);
6943 }
6944 }
6945}
6946
6947/*
6948 * bridge_rtable_init:
6949 *
6950 * Initialize the route table for this bridge.
6951 */
6952static int
6953bridge_rtable_init(struct bridge_softc *sc)
6954{
6955 u_int32_t i;
6956
6957 sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6958 BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6959 sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6960
6961 for (i = 0; i < sc->sc_rthash_size; i++) {
6962 LIST_INIT(&sc->sc_rthash[i]);
6963 }
6964
6965 sc->sc_rthash_key = RandomULong();
6966
6967 LIST_INIT(&sc->sc_rtlist);
6968
6969 return 0;
6970}
6971
6972/*
6973 * bridge_rthash_delayed_resize:
6974 *
6975 * Resize the routing table hash on a delayed thread call.
6976 */
6977static void
6978bridge_rthash_delayed_resize(struct bridge_softc *sc)
6979{
6980 u_int32_t new_rthash_size = 0;
6981 u_int32_t old_rthash_size = 0;
6982 struct _bridge_rtnode_list *new_rthash = NULL;
6983 struct _bridge_rtnode_list *old_rthash = NULL;
6984 u_int32_t i;
6985 struct bridge_rtnode *brt;
6986 int error = 0;
6987
6988 BRIDGE_LOCK_ASSERT_HELD(sc);
6989
6990 /*
6991 * Four entries per hash bucket is our ideal load factor
6992 */
6993 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6994 goto out;
6995 }
6996
6997 /*
6998 * Doubling the number of hash buckets may be too simplistic
6999 * especially when facing a spike of new entries
7000 */
7001 new_rthash_size = sc->sc_rthash_size * 2;
7002
7003 sc->sc_flags |= SCF_RESIZING;
7004 BRIDGE_UNLOCK(sc);
7005
7006 new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
7007 Z_WAITOK | Z_ZERO);
7008
7009 BRIDGE_LOCK(sc);
7010 sc->sc_flags &= ~SCF_RESIZING;
7011
7012 if (new_rthash == NULL) {
7013 error = ENOMEM;
7014 goto out;
7015 }
7016 if ((sc->sc_flags & SCF_DETACHING)) {
7017 error = ENODEV;
7018 goto out;
7019 }
7020 /*
7021 * Fail safe from here on
7022 */
7023 old_rthash = sc->sc_rthash;
7024 old_rthash_size = sc->sc_rthash_size;
7025 sc->sc_rthash = new_rthash;
7026 sc->sc_rthash_size = new_rthash_size;
7027
7028 /*
7029 * Get a new key to force entries to be shuffled around to reduce
7030 * the likelihood they will land in the same buckets
7031 */
7032 sc->sc_rthash_key = RandomULong();
7033
7034 for (i = 0; i < sc->sc_rthash_size; i++) {
7035 LIST_INIT(&sc->sc_rthash[i]);
7036 }
7037
7038 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7039 LIST_REMOVE(brt, brt_hash);
7040 (void) bridge_rtnode_hash(sc, brt);
7041 }
7042out:
7043 if (error == 0) {
7044 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7045 "%s new size %u",
7046 sc->sc_ifp->if_xname, sc->sc_rthash_size);
7047 kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
7048 } else {
7049 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
7050 "%s failed %d", sc->sc_ifp->if_xname, error);
7051 kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
7052 }
7053}
7054
7055/*
7056 * Resize the number of hash buckets based on the load factor
7057 * Currently only grow
7058 * Failing to resize the hash table is not fatal
7059 */
7060static void
7061bridge_rthash_resize(struct bridge_softc *sc)
7062{
7063 BRIDGE_LOCK_ASSERT_HELD(sc);
7064
7065 if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
7066 return;
7067 }
7068
7069 /*
7070 * Four entries per hash bucket is our ideal load factor
7071 */
7072 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
7073 return;
7074 }
7075 /*
7076 * Hard limit on the size of the routing hash table
7077 */
7078 if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
7079 return;
7080 }
7081
7082 sc->sc_resize_call.bdc_sc = sc;
7083 sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
7084 bridge_schedule_delayed_call(call: &sc->sc_resize_call);
7085}
7086
7087/*
7088 * bridge_rtable_fini:
7089 *
7090 * Deconstruct the route table for this bridge.
7091 */
7092static void
7093bridge_rtable_fini(struct bridge_softc *sc)
7094{
7095 KASSERT(sc->sc_brtcnt == 0,
7096 ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
7097 kfree_type(struct _bridge_rtnode_list, sc->sc_rthash_size,
7098 sc->sc_rthash);
7099 sc->sc_rthash = NULL;
7100 sc->sc_rthash_size = 0;
7101}
7102
7103/*
7104 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
7105 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
7106 */
7107#define mix(a, b, c) \
7108do { \
7109 a -= b; a -= c; a ^= (c >> 13); \
7110 b -= c; b -= a; b ^= (a << 8); \
7111 c -= a; c -= b; c ^= (b >> 13); \
7112 a -= b; a -= c; a ^= (c >> 12); \
7113 b -= c; b -= a; b ^= (a << 16); \
7114 c -= a; c -= b; c ^= (b >> 5); \
7115 a -= b; a -= c; a ^= (c >> 3); \
7116 b -= c; b -= a; b ^= (a << 10); \
7117 c -= a; c -= b; c ^= (b >> 15); \
7118} while ( /*CONSTCOND*/ 0)
7119
7120static __inline uint32_t
7121bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
7122{
7123 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
7124
7125 b += addr[5] << 8;
7126 b += addr[4];
7127 a += addr[3] << 24;
7128 a += addr[2] << 16;
7129 a += addr[1] << 8;
7130 a += addr[0];
7131
7132 mix(a, b, c);
7133
7134 return c & BRIDGE_RTHASH_MASK(sc);
7135}
7136
7137#undef mix
7138
7139static int
7140bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
7141{
7142 int i, d;
7143
7144 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
7145 d = ((int)a[i]) - ((int)b[i]);
7146 }
7147
7148 return d;
7149}
7150
7151/*
7152 * bridge_rtnode_lookup:
7153 *
7154 * Look up a bridge route node for the specified destination. Compare the
7155 * vlan id or if zero then just return the first match.
7156 */
7157static struct bridge_rtnode *
7158bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr,
7159 uint16_t vlan)
7160{
7161 struct bridge_rtnode *brt;
7162 uint32_t hash;
7163 int dir;
7164
7165 BRIDGE_LOCK_ASSERT_HELD(sc);
7166
7167 hash = bridge_rthash(sc, addr);
7168 LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7169 dir = bridge_rtnode_addr_cmp(a: addr, b: brt->brt_addr);
7170 if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7171 return brt;
7172 }
7173 if (dir > 0) {
7174 return NULL;
7175 }
7176 }
7177
7178 return NULL;
7179}
7180
7181/*
7182 * bridge_rtnode_hash:
7183 *
7184 * Insert the specified bridge node into the route hash table.
7185 * This is used when adding a new node or to rehash when resizing
7186 * the hash table
7187 */
7188static int
7189bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7190{
7191 struct bridge_rtnode *lbrt;
7192 uint32_t hash;
7193 int dir;
7194
7195 BRIDGE_LOCK_ASSERT_HELD(sc);
7196
7197 hash = bridge_rthash(sc, addr: brt->brt_addr);
7198
7199 lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7200 if (lbrt == NULL) {
7201 LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7202 goto out;
7203 }
7204
7205 do {
7206 dir = bridge_rtnode_addr_cmp(a: brt->brt_addr, b: lbrt->brt_addr);
7207 if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7208 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7209 "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7210 sc->sc_ifp->if_xname,
7211 brt->brt_addr[0], brt->brt_addr[1],
7212 brt->brt_addr[2], brt->brt_addr[3],
7213 brt->brt_addr[4], brt->brt_addr[5]);
7214 return EEXIST;
7215 }
7216 if (dir > 0) {
7217 LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7218 goto out;
7219 }
7220 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7221 LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7222 goto out;
7223 }
7224 lbrt = LIST_NEXT(lbrt, brt_hash);
7225 } while (lbrt != NULL);
7226
7227 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7228 "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7229 sc->sc_ifp->if_xname,
7230 brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7231 brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7232out:
7233 return 0;
7234}
7235
7236/*
7237 * bridge_rtnode_insert:
7238 *
7239 * Insert the specified bridge node into the route table. We
7240 * assume the entry is not already in the table.
7241 */
7242static int
7243bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7244{
7245 int error;
7246
7247 error = bridge_rtnode_hash(sc, brt);
7248 if (error != 0) {
7249 return error;
7250 }
7251
7252 LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7253 sc->sc_brtcnt++;
7254
7255 bridge_rthash_resize(sc);
7256
7257 return 0;
7258}
7259
7260/*
7261 * bridge_rtnode_destroy:
7262 *
7263 * Destroy a bridge rtnode.
7264 */
7265static void
7266bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7267{
7268 BRIDGE_LOCK_ASSERT_HELD(sc);
7269
7270 LIST_REMOVE(brt, brt_hash);
7271
7272 LIST_REMOVE(brt, brt_list);
7273 sc->sc_brtcnt--;
7274 brt->brt_dst->bif_addrcnt--;
7275 zfree(bridge_rtnode_pool, brt);
7276}
7277
7278#if BRIDGESTP
7279/*
7280 * bridge_rtable_expire:
7281 *
7282 * Set the expiry time for all routes on an interface.
7283 */
7284static void
7285bridge_rtable_expire(struct ifnet *ifp, int age)
7286{
7287 struct bridge_softc *sc = ifp->if_bridge;
7288 struct bridge_rtnode *brt;
7289
7290 BRIDGE_LOCK(sc);
7291
7292 /*
7293 * If the age is zero then flush, otherwise set all the expiry times to
7294 * age for the interface
7295 */
7296 if (age == 0) {
7297 bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7298 } else {
7299 unsigned long now;
7300
7301 now = (unsigned long) net_uptime();
7302
7303 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7304 /* Cap the expiry time to 'age' */
7305 if (brt->brt_ifp == ifp &&
7306 brt->brt_expire > now + age &&
7307 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7308 brt->brt_expire = now + age;
7309 }
7310 }
7311 }
7312 BRIDGE_UNLOCK(sc);
7313}
7314
7315/*
7316 * bridge_state_change:
7317 *
7318 * Callback from the bridgestp code when a port changes states.
7319 */
7320static void
7321bridge_state_change(struct ifnet *ifp, int state)
7322{
7323 struct bridge_softc *sc = ifp->if_bridge;
7324 static const char *stpstates[] = {
7325 "disabled",
7326 "listening",
7327 "learning",
7328 "forwarding",
7329 "blocking",
7330 "discarding"
7331 };
7332
7333 if (log_stp) {
7334 log(LOG_NOTICE, "%s: state changed to %s on %s",
7335 sc->sc_ifp->if_xname,
7336 stpstates[state], ifp->if_xname);
7337 }
7338}
7339#endif /* BRIDGESTP */
7340
7341/*
7342 * bridge_set_bpf_tap:
7343 *
7344 * Sets ups the BPF callbacks.
7345 */
7346static errno_t
7347bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
7348{
7349 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(interface: ifp);
7350
7351 /* TBD locking */
7352 if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
7353 return ENODEV;
7354 }
7355 switch (mode) {
7356 case BPF_TAP_DISABLE:
7357 sc->sc_bpf_input = sc->sc_bpf_output = NULL;
7358 break;
7359
7360 case BPF_TAP_INPUT:
7361 sc->sc_bpf_input = bpf_callback;
7362 break;
7363
7364 case BPF_TAP_OUTPUT:
7365 sc->sc_bpf_output = bpf_callback;
7366 break;
7367
7368 case BPF_TAP_INPUT_OUTPUT:
7369 sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
7370 break;
7371
7372 default:
7373 break;
7374 }
7375
7376 return 0;
7377}
7378
7379/*
7380 * bridge_detach:
7381 *
7382 * Callback when interface has been detached.
7383 */
7384static void
7385bridge_detach(ifnet_t ifp)
7386{
7387 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(interface: ifp);
7388
7389#if BRIDGESTP
7390 bstp_detach(&sc->sc_stp);
7391#endif /* BRIDGESTP */
7392
7393 /* Tear down the routing table. */
7394 bridge_rtable_fini(sc);
7395
7396 lck_mtx_lock(lck: &bridge_list_mtx);
7397 LIST_REMOVE(sc, sc_list);
7398 lck_mtx_unlock(lck: &bridge_list_mtx);
7399
7400 ifnet_release(interface: ifp);
7401
7402 lck_mtx_destroy(lck: &sc->sc_mtx, grp: &bridge_lock_grp);
7403 kfree_type(struct bridge_softc, sc);
7404}
7405
7406/*
7407 * bridge_bpf_input:
7408 *
7409 * Invoke the input BPF callback if enabled
7410 */
7411static errno_t
7412bridge_bpf_input(ifnet_t ifp, struct mbuf *m, const char * func, int line)
7413{
7414 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(interface: ifp);
7415 bpf_packet_func input_func = sc->sc_bpf_input;
7416
7417 if (input_func != NULL) {
7418 if (mbuf_pkthdr_rcvif(mbuf: m) != ifp) {
7419 BRIDGE_LOG(LOG_NOTICE, 0,
7420 "%s.%d: rcvif: 0x%llx != ifp 0x%llx", func, line,
7421 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
7422 (uint64_t)VM_KERNEL_ADDRPERM(ifp));
7423 }
7424 (*input_func)(ifp, m);
7425 }
7426 return 0;
7427}
7428
7429/*
7430 * bridge_bpf_output:
7431 *
7432 * Invoke the output BPF callback if enabled
7433 */
7434static errno_t
7435bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
7436{
7437 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(interface: ifp);
7438 bpf_packet_func output_func = sc->sc_bpf_output;
7439
7440 if (output_func != NULL) {
7441 (*output_func)(ifp, m);
7442 }
7443 return 0;
7444}
7445
7446/*
7447 * bridge_link_event:
7448 *
7449 * Report a data link event on an interface
7450 */
7451static void
7452bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7453{
7454 struct event {
7455 u_int32_t ifnet_family;
7456 u_int32_t unit;
7457 char if_name[IFNAMSIZ];
7458 };
7459 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7460 struct kern_event_msg *header = (struct kern_event_msg*)message;
7461 struct event *data = (struct event *)(header + 1);
7462
7463 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7464 "%s event_code %u - %s", ifp->if_xname,
7465 event_code, dlil_kev_dl_code_str(event_code));
7466 header->total_size = sizeof(message);
7467 header->vendor_code = KEV_VENDOR_APPLE;
7468 header->kev_class = KEV_NETWORK_CLASS;
7469 header->kev_subclass = KEV_DL_SUBCLASS;
7470 header->event_code = event_code;
7471 data->ifnet_family = ifnet_family(interface: ifp);
7472 data->unit = (u_int32_t)ifnet_unit(interface: ifp);
7473 strlcpy(dst: data->if_name, src: ifnet_name(interface: ifp), IFNAMSIZ);
7474 ifnet_event(interface: ifp, event_ptr: header);
7475}
7476
7477#define BRIDGE_HF_DROP(reason, func, line) { \
7478 bridge_hostfilter_stats.reason++; \
7479 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER, \
7480 "%s.%d" #reason, func, line); \
7481 error = EINVAL; \
7482 }
7483
7484/*
7485 * Make sure this is a DHCP or Bootp request that match the host filter
7486 */
7487static int
7488bridge_dhcp_filter(struct bridge_iflist *bif, struct mbuf *m, size_t offset)
7489{
7490 int error = EINVAL;
7491 struct dhcp dhcp;
7492
7493 /*
7494 * Note: We use the dhcp structure because bootp structure definition
7495 * is larger and some vendors do not pad the request
7496 */
7497 error = mbuf_copydata(mbuf: m, offset, length: sizeof(struct dhcp), out_data: &dhcp);
7498 if (error != 0) {
7499 BRIDGE_HF_DROP(brhf_dhcp_too_small, __func__, __LINE__);
7500 goto done;
7501 }
7502 if (dhcp.dp_op != BOOTREQUEST) {
7503 BRIDGE_HF_DROP(brhf_dhcp_bad_op, __func__, __LINE__);
7504 goto done;
7505 }
7506 /*
7507 * The hardware address must be an exact match
7508 */
7509 if (dhcp.dp_htype != ARPHRD_ETHER) {
7510 BRIDGE_HF_DROP(brhf_dhcp_bad_htype, __func__, __LINE__);
7511 goto done;
7512 }
7513 if (dhcp.dp_hlen != ETHER_ADDR_LEN) {
7514 BRIDGE_HF_DROP(brhf_dhcp_bad_hlen, __func__, __LINE__);
7515 goto done;
7516 }
7517 if (bcmp(s1: dhcp.dp_chaddr, s2: bif->bif_hf_hwsrc,
7518 ETHER_ADDR_LEN) != 0) {
7519 BRIDGE_HF_DROP(brhf_dhcp_bad_chaddr, __func__, __LINE__);
7520 goto done;
7521 }
7522 /*
7523 * Client address must match the host address or be not specified
7524 */
7525 if (dhcp.dp_ciaddr.s_addr != bif->bif_hf_ipsrc.s_addr &&
7526 dhcp.dp_ciaddr.s_addr != INADDR_ANY) {
7527 BRIDGE_HF_DROP(brhf_dhcp_bad_ciaddr, __func__, __LINE__);
7528 goto done;
7529 }
7530 error = 0;
7531done:
7532 return error;
7533}
7534
7535static int
7536bridge_host_filter(struct bridge_iflist *bif, mbuf_t *data)
7537{
7538 int error = EINVAL;
7539 struct ether_header *eh;
7540 static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
7541 mbuf_t m = *data;
7542
7543 eh = mtod(m, struct ether_header *);
7544
7545 /*
7546 * Restrict the source hardware address
7547 */
7548 if ((bif->bif_flags & BIFF_HF_HWSRC) != 0 &&
7549 bcmp(s1: eh->ether_shost, s2: bif->bif_hf_hwsrc,
7550 ETHER_ADDR_LEN) != 0) {
7551 BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr, __func__, __LINE__);
7552 goto done;
7553 }
7554
7555 /*
7556 * Restrict Ethernet protocols to ARP and IP/IPv6
7557 */
7558 if (eh->ether_type == htons(ETHERTYPE_ARP)) {
7559 struct ether_arp *ea;
7560 size_t minlen = sizeof(struct ether_header) +
7561 sizeof(struct ether_arp);
7562
7563 /*
7564 * Make the Ethernet and ARP headers contiguous
7565 */
7566 if (mbuf_pkthdr_len(mbuf: m) < minlen) {
7567 BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7568 goto done;
7569 }
7570 if (mbuf_len(mbuf: m) < minlen && mbuf_pullup(mbuf: data, len: minlen) != 0) {
7571 BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7572 __func__, __LINE__);
7573 goto done;
7574 }
7575 m = *data;
7576
7577 /*
7578 * Verify this is an ethernet/ip arp
7579 */
7580 eh = mtod(m, struct ether_header *);
7581 ea = (struct ether_arp *)(eh + 1);
7582 if (ea->arp_hrd != htons(ARPHRD_ETHER)) {
7583 BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7584 __func__, __LINE__);
7585 goto done;
7586 }
7587 if (ea->arp_pro != htons(ETHERTYPE_IP)) {
7588 BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7589 __func__, __LINE__);
7590 goto done;
7591 }
7592 /*
7593 * Verify the address lengths are correct
7594 */
7595 if (ea->arp_hln != ETHER_ADDR_LEN) {
7596 BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7597 goto done;
7598 }
7599 if (ea->arp_pln != sizeof(struct in_addr)) {
7600 BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7601 __func__, __LINE__);
7602 goto done;
7603 }
7604 /*
7605 * Allow only ARP request or ARP reply
7606 */
7607 if (ea->arp_op != htons(ARPOP_REQUEST) &&
7608 ea->arp_op != htons(ARPOP_REPLY)) {
7609 BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7610 goto done;
7611 }
7612 if ((bif->bif_flags & BIFF_HF_HWSRC) != 0) {
7613 /*
7614 * Verify source hardware address matches
7615 */
7616 if (bcmp(s1: ea->arp_sha, s2: bif->bif_hf_hwsrc,
7617 ETHER_ADDR_LEN) != 0) {
7618 BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7619 goto done;
7620 }
7621 }
7622 if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7623 /*
7624 * Verify source protocol address:
7625 * May be null for an ARP probe
7626 */
7627 if (bcmp(s1: ea->arp_spa, s2: &bif->bif_hf_ipsrc.s_addr,
7628 n: sizeof(struct in_addr)) != 0 &&
7629 bcmp(s1: ea->arp_spa, s2: &inaddr_any,
7630 n: sizeof(struct in_addr)) != 0) {
7631 BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7632 goto done;
7633 }
7634 }
7635 bridge_hostfilter_stats.brhf_arp_ok += 1;
7636 error = 0;
7637 } else if (eh->ether_type == htons(ETHERTYPE_IP)) {
7638 size_t minlen = sizeof(struct ether_header) + sizeof(struct ip);
7639 struct ip iphdr;
7640 size_t offset;
7641
7642 /*
7643 * Make the Ethernet and IP headers contiguous
7644 */
7645 if (mbuf_pkthdr_len(mbuf: m) < minlen) {
7646 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7647 goto done;
7648 }
7649 offset = sizeof(struct ether_header);
7650 error = mbuf_copydata(mbuf: m, offset, length: sizeof(struct ip), out_data: &iphdr);
7651 if (error != 0) {
7652 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7653 goto done;
7654 }
7655 if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7656 /*
7657 * Verify the source IP address
7658 */
7659 if (iphdr.ip_p == IPPROTO_UDP) {
7660 struct udphdr udp;
7661
7662 minlen += sizeof(struct udphdr);
7663 if (mbuf_pkthdr_len(mbuf: m) < minlen) {
7664 BRIDGE_HF_DROP(brhf_ip_too_small,
7665 __func__, __LINE__);
7666 goto done;
7667 }
7668
7669 /*
7670 * Allow all zero addresses for DHCP requests
7671 */
7672 if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr &&
7673 iphdr.ip_src.s_addr != INADDR_ANY) {
7674 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7675 __func__, __LINE__);
7676 goto done;
7677 }
7678 offset = sizeof(struct ether_header) +
7679 (IP_VHL_HL(iphdr.ip_vhl) << 2);
7680 error = mbuf_copydata(mbuf: m, offset,
7681 length: sizeof(struct udphdr), out_data: &udp);
7682 if (error != 0) {
7683 BRIDGE_HF_DROP(brhf_ip_too_small,
7684 __func__, __LINE__);
7685 goto done;
7686 }
7687 /*
7688 * Either it's a Bootp/DHCP packet that we like or
7689 * it's a UDP packet from the host IP as source address
7690 */
7691 if (udp.uh_sport == htons(IPPORT_BOOTPC) &&
7692 udp.uh_dport == htons(IPPORT_BOOTPS)) {
7693 minlen += sizeof(struct dhcp);
7694 if (mbuf_pkthdr_len(mbuf: m) < minlen) {
7695 BRIDGE_HF_DROP(brhf_ip_too_small,
7696 __func__, __LINE__);
7697 goto done;
7698 }
7699 offset += sizeof(struct udphdr);
7700 error = bridge_dhcp_filter(bif, m, offset);
7701 if (error != 0) {
7702 goto done;
7703 }
7704 } else if (iphdr.ip_src.s_addr == INADDR_ANY) {
7705 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7706 __func__, __LINE__);
7707 goto done;
7708 }
7709 } else if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr) {
7710 assert(bif->bif_hf_ipsrc.s_addr != INADDR_ANY);
7711 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr, __func__, __LINE__);
7712 goto done;
7713 }
7714 }
7715 /*
7716 * Allow only boring IP protocols
7717 */
7718 if (iphdr.ip_p != IPPROTO_TCP &&
7719 iphdr.ip_p != IPPROTO_UDP &&
7720 iphdr.ip_p != IPPROTO_ICMP &&
7721 iphdr.ip_p != IPPROTO_IGMP) {
7722 BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7723 goto done;
7724 }
7725 bridge_hostfilter_stats.brhf_ip_ok += 1;
7726 error = 0;
7727 } else if (eh->ether_type == htons(ETHERTYPE_IPV6)) {
7728 size_t minlen = sizeof(struct ether_header) + sizeof(struct ip6_hdr);
7729 struct ip6_hdr ip6hdr;
7730 size_t offset;
7731
7732 /*
7733 * Make the Ethernet and IP headers contiguous
7734 */
7735 if (mbuf_pkthdr_len(mbuf: m) < minlen) {
7736 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7737 goto done;
7738 }
7739 offset = sizeof(struct ether_header);
7740 error = mbuf_copydata(mbuf: m, offset, length: sizeof(struct ip6_hdr), out_data: &ip6hdr);
7741 if (error != 0) {
7742 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7743 goto done;
7744 }
7745 /*
7746 * Allow only boring IPv6 protocols
7747 */
7748 if (ip6hdr.ip6_nxt != IPPROTO_TCP &&
7749 ip6hdr.ip6_nxt != IPPROTO_UDP &&
7750 ip6hdr.ip6_nxt != IPPROTO_ICMPV6) {
7751 BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7752 goto done;
7753 }
7754 bridge_hostfilter_stats.brhf_ip_ok += 1;
7755 error = 0;
7756 } else {
7757 BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
7758 goto done;
7759 }
7760done:
7761 if (error != 0) {
7762 if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
7763 if (m) {
7764 brlog_mbuf_data(m, offset: 0,
7765 len: sizeof(struct ether_header) +
7766 sizeof(struct ip));
7767 }
7768 }
7769
7770 if (m != NULL) {
7771 m_freem(m);
7772 }
7773 }
7774 return error;
7775}
7776
7777/*
7778 * MAC NAT
7779 */
7780
7781static errno_t
7782bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7783{
7784 errno_t error = 0;
7785
7786 BRIDGE_LOCK_ASSERT_HELD(sc);
7787
7788 if (IFNET_IS_VMNET(bif->bif_ifp)) {
7789 error = EINVAL;
7790 goto done;
7791 }
7792 if (sc->sc_mac_nat_bif != NULL) {
7793 if (sc->sc_mac_nat_bif != bif) {
7794 error = EBUSY;
7795 }
7796 goto done;
7797 }
7798 sc->sc_mac_nat_bif = bif;
7799 bif->bif_ifflags |= IFBIF_MAC_NAT;
7800 bridge_mac_nat_populate_entries(sc);
7801
7802done:
7803 return error;
7804}
7805
7806static void
7807bridge_mac_nat_disable(struct bridge_softc *sc)
7808{
7809 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7810
7811 assert(mac_nat_bif != NULL);
7812 bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7813 mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7814 sc->sc_mac_nat_bif = NULL;
7815 return;
7816}
7817
7818static void
7819mac_nat_entry_print2(struct mac_nat_entry *mne,
7820 char *ifname, const char *msg1, const char *msg2)
7821{
7822 int af;
7823 char etopbuf[24];
7824 char ntopbuf[MAX_IPv6_STR_LEN];
7825 const char *space;
7826
7827 af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7828 ether_ntop(buf: etopbuf, len: sizeof(etopbuf), ap: mne->mne_mac);
7829 (void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7830 if (msg2 == NULL) {
7831 msg2 = "";
7832 space = "";
7833 } else {
7834 space = " ";
7835 }
7836 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7837 "%s %s%s%s %p (%s, %s, %s)",
7838 ifname, msg1, space, msg2, mne, mne->mne_bif->bif_ifp->if_xname,
7839 ntopbuf, etopbuf);
7840}
7841
7842static void
7843mac_nat_entry_print(struct mac_nat_entry *mne,
7844 char *ifname, const char *msg)
7845{
7846 mac_nat_entry_print2(mne, ifname, msg1: msg, NULL);
7847}
7848
7849static struct mac_nat_entry *
7850bridge_lookup_mac_nat_entry(struct bridge_softc *sc, int af, void * ip)
7851{
7852 struct mac_nat_entry *mne;
7853 struct mac_nat_entry *ret_mne = NULL;
7854
7855 if (af == AF_INET) {
7856 in_addr_t s_addr = ((struct in_addr *)ip)->s_addr;
7857
7858 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7859 if (mne->mne_ip.s_addr == s_addr) {
7860 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7861 mac_nat_entry_print(mne, ifname: sc->sc_if_xname,
7862 msg: "found");
7863 }
7864 ret_mne = mne;
7865 break;
7866 }
7867 }
7868 } else {
7869 const struct in6_addr *ip6 = (const struct in6_addr *)ip;
7870
7871 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7872 if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7873 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7874 mac_nat_entry_print(mne, ifname: sc->sc_if_xname,
7875 msg: "found");
7876 }
7877 ret_mne = mne;
7878 break;
7879 }
7880 }
7881 }
7882 return ret_mne;
7883}
7884
7885static void
7886bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7887 struct mac_nat_entry *mne, const char *reason)
7888{
7889 LIST_REMOVE(mne, mne_list);
7890 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7891 mac_nat_entry_print(mne, ifname: sc->sc_if_xname, msg: reason);
7892 }
7893 zfree(bridge_mne_pool, mne);
7894 sc->sc_mne_count--;
7895}
7896
7897static struct mac_nat_entry *
7898bridge_create_mac_nat_entry(struct bridge_softc *sc,
7899 struct bridge_iflist *bif, int af, const void *ip, uint8_t *eaddr)
7900{
7901 struct mac_nat_entry_list *list;
7902 struct mac_nat_entry *mne;
7903
7904 if (sc->sc_mne_count >= sc->sc_mne_max) {
7905 sc->sc_mne_allocation_failures++;
7906 return NULL;
7907 }
7908 mne = zalloc_noblock(kt_view: bridge_mne_pool);
7909 if (mne == NULL) {
7910 sc->sc_mne_allocation_failures++;
7911 return NULL;
7912 }
7913 sc->sc_mne_count++;
7914 bzero(s: mne, n: sizeof(*mne));
7915 bcopy(src: eaddr, dst: mne->mne_mac, n: sizeof(mne->mne_mac));
7916 mne->mne_bif = bif;
7917 if (af == AF_INET) {
7918 bcopy(src: ip, dst: &mne->mne_ip, n: sizeof(mne->mne_ip));
7919 list = &sc->sc_mne_list;
7920 } else {
7921 bcopy(src: ip, dst: &mne->mne_ip6, n: sizeof(mne->mne_ip6));
7922 mne->mne_flags |= MNE_FLAGS_IPV6;
7923 list = &sc->sc_mne_list_v6;
7924 }
7925 LIST_INSERT_HEAD(list, mne, mne_list);
7926 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7927 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7928 mac_nat_entry_print(mne, ifname: sc->sc_if_xname, msg: "created");
7929 }
7930 return mne;
7931}
7932
7933static struct mac_nat_entry *
7934bridge_update_mac_nat_entry(struct bridge_softc *sc,
7935 struct bridge_iflist *bif, int af, void *ip, uint8_t *eaddr)
7936{
7937 struct mac_nat_entry *mne;
7938
7939 mne = bridge_lookup_mac_nat_entry(sc, af, ip);
7940 if (mne != NULL) {
7941 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7942
7943 if (mne->mne_bif == mac_nat_bif) {
7944 /* the MAC NAT interface takes precedence */
7945 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7946 if (mne->mne_bif != bif) {
7947 mac_nat_entry_print2(mne,
7948 ifname: sc->sc_if_xname, msg1: "reject",
7949 msg2: bif->bif_ifp->if_xname);
7950 }
7951 }
7952 } else if (mne->mne_bif != bif) {
7953 const char *old_if = mne->mne_bif->bif_ifp->if_xname;
7954
7955 mne->mne_bif = bif;
7956 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7957 mac_nat_entry_print2(mne,
7958 ifname: sc->sc_if_xname, msg1: "replaced",
7959 msg2: old_if);
7960 }
7961 bcopy(src: eaddr, dst: mne->mne_mac, n: sizeof(mne->mne_mac));
7962 }
7963 mne->mne_expire = (unsigned long)net_uptime() +
7964 sc->sc_brttimeout;
7965 } else {
7966 mne = bridge_create_mac_nat_entry(sc, bif, af, ip, eaddr);
7967 }
7968 return mne;
7969}
7970
7971static void
7972bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7973 struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7974{
7975 struct mac_nat_entry *mne;
7976 struct mac_nat_entry *tmne;
7977
7978 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7979 if (bif != NULL && mne->mne_bif != bif) {
7980 continue;
7981 }
7982 bridge_destroy_mac_nat_entry(sc, mne, reason: "flushed");
7983 }
7984}
7985
7986/*
7987 * bridge_mac_nat_flush_entries:
7988 *
7989 * Flush MAC NAT entries for the specified member. Flush all entries if
7990 * the member is the one that requires MAC NAT, otherwise just flush the
7991 * ones for the specified member.
7992 */
7993static void
7994bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7995{
7996 struct bridge_iflist *flush_bif;
7997
7998 flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7999 bridge_mac_nat_flush_entries_common(sc, list: &sc->sc_mne_list, bif: flush_bif);
8000 bridge_mac_nat_flush_entries_common(sc, list: &sc->sc_mne_list_v6, bif: flush_bif);
8001}
8002
8003static void
8004bridge_mac_nat_populate_entries(struct bridge_softc *sc)
8005{
8006 errno_t error;
8007 ifnet_t ifp;
8008 ifaddr_t *list;
8009 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
8010
8011 assert(mac_nat_bif != NULL);
8012 ifp = mac_nat_bif->bif_ifp;
8013 error = ifnet_get_address_list(interface: ifp, addresses: &list);
8014 if (error != 0) {
8015 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8016 "ifnet_get_address_list(%s) failed %d",
8017 ifp->if_xname, error);
8018 return;
8019 }
8020 for (ifaddr_t *scan = list; *scan != NULL; scan++) {
8021 sa_family_t af;
8022 void *ip;
8023
8024 union {
8025 struct sockaddr sa;
8026 struct sockaddr_in sin;
8027 struct sockaddr_in6 sin6;
8028 } u;
8029 af = ifaddr_address_family(ifaddr: *scan);
8030 switch (af) {
8031 case AF_INET:
8032 case AF_INET6:
8033 error = ifaddr_address(ifaddr: *scan, out_addr: &u.sa, addr_size: sizeof(u));
8034 if (error != 0) {
8035 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8036 "ifaddr_address failed %d",
8037 error);
8038 break;
8039 }
8040 if (af == AF_INET) {
8041 ip = (void *)&u.sin.sin_addr;
8042 } else {
8043 if (IN6_IS_ADDR_LINKLOCAL(&u.sin6.sin6_addr)) {
8044 /* remove scope ID */
8045 u.sin6.sin6_addr.s6_addr16[1] = 0;
8046 }
8047 ip = (void *)&u.sin6.sin6_addr;
8048 }
8049 bridge_create_mac_nat_entry(sc, bif: mac_nat_bif, af, ip,
8050 eaddr: (uint8_t *)IF_LLADDR(ifp));
8051 break;
8052 default:
8053 break;
8054 }
8055 }
8056 ifnet_free_address_list(addresses: list);
8057 return;
8058}
8059
8060static void
8061bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
8062 struct mac_nat_entry_list *list, unsigned long now)
8063{
8064 struct mac_nat_entry *mne;
8065 struct mac_nat_entry *tmne;
8066
8067 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
8068 if (now >= mne->mne_expire) {
8069 bridge_destroy_mac_nat_entry(sc, mne, reason: "aged out");
8070 }
8071 }
8072}
8073
8074static void
8075bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
8076{
8077 if (sc->sc_mac_nat_bif == NULL) {
8078 return;
8079 }
8080 bridge_mac_nat_age_entries_common(sc, list: &sc->sc_mne_list, now);
8081 bridge_mac_nat_age_entries_common(sc, list: &sc->sc_mne_list_v6, now);
8082}
8083
8084static const char *
8085get_in_out_string(boolean_t is_output)
8086{
8087 return is_output ? "OUT" : "IN";
8088}
8089
8090/*
8091 * is_valid_arp_packet:
8092 * Verify that this is a valid ARP packet.
8093 *
8094 * Returns TRUE if the packet is valid, FALSE otherwise.
8095 */
8096static boolean_t
8097is_valid_arp_packet(mbuf_t *data, boolean_t is_output,
8098 struct ether_header **eh_p, struct ether_arp **ea_p)
8099{
8100 struct ether_arp *ea;
8101 struct ether_header *eh;
8102 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
8103 boolean_t is_valid = FALSE;
8104 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8105
8106 if (mbuf_pkthdr_len(mbuf: *data) < minlen) {
8107 BRIDGE_LOG(LOG_DEBUG, flags,
8108 "ARP %s short frame %lu < %lu",
8109 get_in_out_string(is_output),
8110 mbuf_pkthdr_len(*data), minlen);
8111 goto done;
8112 }
8113 if (mbuf_len(mbuf: *data) < minlen && mbuf_pullup(mbuf: data, len: minlen) != 0) {
8114 BRIDGE_LOG(LOG_DEBUG, flags,
8115 "ARP %s size %lu mbuf_pullup fail",
8116 get_in_out_string(is_output),
8117 minlen);
8118 *data = NULL;
8119 goto done;
8120 }
8121
8122 /* validate ARP packet */
8123 eh = mtod(*data, struct ether_header *);
8124 ea = (struct ether_arp *)(eh + 1);
8125 if (ntohs(ea->arp_hrd) != ARPHRD_ETHER) {
8126 BRIDGE_LOG(LOG_DEBUG, flags,
8127 "ARP %s htype not ethernet",
8128 get_in_out_string(is_output));
8129 goto done;
8130 }
8131 if (ea->arp_hln != ETHER_ADDR_LEN) {
8132 BRIDGE_LOG(LOG_DEBUG, flags,
8133 "ARP %s hlen not ethernet",
8134 get_in_out_string(is_output));
8135 goto done;
8136 }
8137 if (ntohs(ea->arp_pro) != ETHERTYPE_IP) {
8138 BRIDGE_LOG(LOG_DEBUG, flags,
8139 "ARP %s ptype not IP",
8140 get_in_out_string(is_output));
8141 goto done;
8142 }
8143 if (ea->arp_pln != sizeof(struct in_addr)) {
8144 BRIDGE_LOG(LOG_DEBUG, flags,
8145 "ARP %s plen not IP",
8146 get_in_out_string(is_output));
8147 goto done;
8148 }
8149 is_valid = TRUE;
8150 *ea_p = ea;
8151 *eh_p = eh;
8152done:
8153 return is_valid;
8154}
8155
8156static struct mac_nat_entry *
8157bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
8158{
8159 struct ether_arp *ea;
8160 struct ether_header *eh;
8161 struct mac_nat_entry *mne = NULL;
8162 u_short op;
8163 struct in_addr tpa;
8164
8165 if (!is_valid_arp_packet(data, FALSE, eh_p: &eh, ea_p: &ea)) {
8166 goto done;
8167 }
8168 op = ntohs(ea->arp_op);
8169 switch (op) {
8170 case ARPOP_REQUEST:
8171 case ARPOP_REPLY:
8172 /* only care about REQUEST and REPLY */
8173 break;
8174 default:
8175 goto done;
8176 }
8177
8178 /* check the target IP address for a NAT entry */
8179 bcopy(src: ea->arp_tpa, dst: &tpa, n: sizeof(tpa));
8180 if (tpa.s_addr != 0) {
8181 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, ip: &tpa);
8182 }
8183 if (mne != NULL) {
8184 if (op == ARPOP_REPLY) {
8185 /* translate the MAC address */
8186 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8187 char mac_src[24];
8188 char mac_dst[24];
8189
8190 ether_ntop(buf: mac_src, len: sizeof(mac_src),
8191 ap: ea->arp_tha);
8192 ether_ntop(buf: mac_dst, len: sizeof(mac_dst),
8193 ap: mne->mne_mac);
8194 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8195 "%s %s ARP %s -> %s",
8196 sc->sc_if_xname,
8197 mne->mne_bif->bif_ifp->if_xname,
8198 mac_src, mac_dst);
8199 }
8200 bcopy(src: mne->mne_mac, dst: ea->arp_tha, n: sizeof(ea->arp_tha));
8201 }
8202 } else {
8203 /* handle conflicting ARP (sender matches mne) */
8204 struct in_addr spa;
8205
8206 bcopy(src: ea->arp_spa, dst: &spa, n: sizeof(spa));
8207 if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
8208 /* check the source IP for a NAT entry */
8209 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, ip: &spa);
8210 }
8211 }
8212
8213done:
8214 return mne;
8215}
8216
8217static boolean_t
8218bridge_mac_nat_arp_output(struct bridge_softc *sc,
8219 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8220{
8221 struct ether_arp *ea;
8222 struct ether_header *eh;
8223 struct in_addr ip;
8224 struct mac_nat_entry *mne = NULL;
8225 u_short op;
8226 boolean_t translate = FALSE;
8227
8228 if (!is_valid_arp_packet(data, TRUE, eh_p: &eh, ea_p: &ea)) {
8229 goto done;
8230 }
8231 op = ntohs(ea->arp_op);
8232 switch (op) {
8233 case ARPOP_REQUEST:
8234 case ARPOP_REPLY:
8235 /* only care about REQUEST and REPLY */
8236 break;
8237 default:
8238 goto done;
8239 }
8240
8241 bcopy(src: ea->arp_spa, dst: &ip, n: sizeof(ip));
8242 if (ip.s_addr == 0) {
8243 goto done;
8244 }
8245 /* XXX validate IP address: no multicast/broadcast */
8246 mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, ip: &ip, eaddr: ea->arp_sha);
8247 if (mnr != NULL && mne != NULL) {
8248 /* record the offset to do the replacement */
8249 translate = TRUE;
8250 mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
8251 }
8252
8253done:
8254 return translate;
8255}
8256
8257#define ETHER_IPV4_HEADER_LEN (sizeof(struct ether_header) + \
8258 + sizeof(struct ip))
8259static struct ether_header *
8260get_ether_ip_header(mbuf_t *data, boolean_t is_output)
8261{
8262 struct ether_header *eh = NULL;
8263 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8264 size_t minlen = ETHER_IPV4_HEADER_LEN;
8265
8266 if (mbuf_pkthdr_len(mbuf: *data) < minlen) {
8267 BRIDGE_LOG(LOG_DEBUG, flags,
8268 "IP %s short frame %lu < %lu",
8269 get_in_out_string(is_output),
8270 mbuf_pkthdr_len(*data), minlen);
8271 goto done;
8272 }
8273 if (mbuf_len(mbuf: *data) < minlen && mbuf_pullup(mbuf: data, len: minlen) != 0) {
8274 BRIDGE_LOG(LOG_DEBUG, flags,
8275 "IP %s size %lu mbuf_pullup fail",
8276 get_in_out_string(is_output),
8277 minlen);
8278 *data = NULL;
8279 goto done;
8280 }
8281 eh = mtod(*data, struct ether_header *);
8282done:
8283 return eh;
8284}
8285
8286static bool
8287is_broadcast_ip_packet(mbuf_t *data)
8288{
8289 struct ether_header *eh;
8290 uint16_t ether_type;
8291 bool is_broadcast = FALSE;
8292
8293 eh = mtod(*data, struct ether_header *);
8294 ether_type = ntohs(eh->ether_type);
8295 switch (ether_type) {
8296 case ETHERTYPE_IP:
8297 eh = get_ether_ip_header(data, FALSE);
8298 if (eh != NULL) {
8299 struct in_addr dst;
8300 struct ip *iphdr;
8301
8302 iphdr = (struct ip *)(void *)(eh + 1);
8303 bcopy(src: &iphdr->ip_dst, dst: &dst, n: sizeof(dst));
8304 is_broadcast = (dst.s_addr == INADDR_BROADCAST);
8305 }
8306 break;
8307 default:
8308 break;
8309 }
8310 return is_broadcast;
8311}
8312
8313static struct mac_nat_entry *
8314bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
8315{
8316 struct in_addr dst;
8317 struct ether_header *eh;
8318 struct ip *iphdr;
8319 struct mac_nat_entry *mne = NULL;
8320
8321 eh = get_ether_ip_header(data, FALSE);
8322 if (eh == NULL) {
8323 goto done;
8324 }
8325 iphdr = (struct ip *)(void *)(eh + 1);
8326 bcopy(src: &iphdr->ip_dst, dst: &dst, n: sizeof(dst));
8327 /* XXX validate IP address */
8328 if (dst.s_addr == 0) {
8329 goto done;
8330 }
8331 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, ip: &dst);
8332done:
8333 return mne;
8334}
8335
8336static void
8337bridge_mac_nat_udp_output(struct bridge_softc *sc,
8338 struct bridge_iflist *bif, mbuf_t m,
8339 uint8_t ip_header_len, struct mac_nat_record *mnr)
8340{
8341 uint16_t dp_flags;
8342 errno_t error;
8343 size_t offset;
8344 struct udphdr udphdr;
8345
8346 /* copy the UDP header */
8347 offset = sizeof(struct ether_header) + ip_header_len;
8348 error = mbuf_copydata(mbuf: m, offset, length: sizeof(struct udphdr), out_data: &udphdr);
8349 if (error != 0) {
8350 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8351 "mbuf_copydata udphdr failed %d",
8352 error);
8353 return;
8354 }
8355 if (ntohs(udphdr.uh_sport) != IPPORT_BOOTPC ||
8356 ntohs(udphdr.uh_dport) != IPPORT_BOOTPS) {
8357 /* not a BOOTP/DHCP packet */
8358 return;
8359 }
8360 /* check whether the broadcast bit is already set */
8361 offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
8362 error = mbuf_copydata(mbuf: m, offset, length: sizeof(dp_flags), out_data: &dp_flags);
8363 if (error != 0) {
8364 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8365 "mbuf_copydata dp_flags failed %d",
8366 error);
8367 return;
8368 }
8369 if ((ntohs(dp_flags) & DHCP_FLAGS_BROADCAST) != 0) {
8370 /* it's already set, nothing to do */
8371 return;
8372 }
8373 /* broadcast bit needs to be set */
8374 mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
8375 mnr->mnr_ip_header_len = ip_header_len;
8376 if (udphdr.uh_sum != 0) {
8377 uint16_t delta;
8378
8379 /* adjust checksum to take modified dp_flags into account */
8380 delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8381 mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8382 }
8383 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8384 "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8385 sc->sc_if_xname,
8386 bif->bif_ifp->if_xname,
8387 ntohs(mnr->mnr_ip_dhcp_flags),
8388 ntohs(mnr->mnr_ip_udp_csum));
8389 return;
8390}
8391
8392static boolean_t
8393bridge_mac_nat_ip_output(struct bridge_softc *sc,
8394 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8395{
8396#pragma unused(mnr)
8397 struct ether_header *eh;
8398 struct in_addr ip;
8399 struct ip *iphdr;
8400 uint8_t ip_header_len;
8401 struct mac_nat_entry *mne = NULL;
8402 boolean_t translate = FALSE;
8403
8404 eh = get_ether_ip_header(data, TRUE);
8405 if (eh == NULL) {
8406 goto done;
8407 }
8408 iphdr = (struct ip *)(void *)(eh + 1);
8409 ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8410 if (ip_header_len < sizeof(ip)) {
8411 /* bogus IP header */
8412 goto done;
8413 }
8414 bcopy(src: &iphdr->ip_src, dst: &ip, n: sizeof(ip));
8415 /* XXX validate the source address */
8416 if (ip.s_addr != 0) {
8417 mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, ip: &ip,
8418 eaddr: eh->ether_shost);
8419 }
8420 if (mnr != NULL) {
8421 if (ip.s_addr == 0 && iphdr->ip_p == IPPROTO_UDP) {
8422 /* handle DHCP must broadcast */
8423 bridge_mac_nat_udp_output(sc, bif, m: *data,
8424 ip_header_len, mnr);
8425 }
8426 translate = TRUE;
8427 }
8428done:
8429 return translate;
8430}
8431
8432#define ETHER_IPV6_HEADER_LEN (sizeof(struct ether_header) + \
8433 + sizeof(struct ip6_hdr))
8434static struct ether_header *
8435get_ether_ipv6_header(mbuf_t *data, size_t plen, boolean_t is_output)
8436{
8437 struct ether_header *eh = NULL;
8438 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8439 size_t minlen = ETHER_IPV6_HEADER_LEN + plen;
8440
8441 if (mbuf_pkthdr_len(mbuf: *data) < minlen) {
8442 BRIDGE_LOG(LOG_DEBUG, flags,
8443 "IP %s short frame %lu < %lu",
8444 get_in_out_string(is_output),
8445 mbuf_pkthdr_len(*data), minlen);
8446 goto done;
8447 }
8448 if (mbuf_len(mbuf: *data) < minlen && mbuf_pullup(mbuf: data, len: minlen) != 0) {
8449 BRIDGE_LOG(LOG_DEBUG, flags,
8450 "IP %s size %lu mbuf_pullup fail",
8451 get_in_out_string(is_output),
8452 minlen);
8453 *data = NULL;
8454 goto done;
8455 }
8456 eh = mtod(*data, struct ether_header *);
8457done:
8458 return eh;
8459}
8460
8461#include <netinet/icmp6.h>
8462#include <netinet6/nd6.h>
8463
8464#define ETHER_ND_LLADDR_LEN (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8465
8466static void
8467bridge_mac_nat_icmpv6_output(struct bridge_softc *sc,
8468 struct bridge_iflist *bif,
8469 mbuf_t *data, struct ip6_hdr *ip6h,
8470 struct in6_addr *saddrp,
8471 struct mac_nat_record *mnr)
8472{
8473 struct ether_header *eh;
8474 struct icmp6_hdr *icmp6;
8475 uint8_t icmp6_type;
8476 uint32_t icmp6len;
8477 int lladdrlen = 0;
8478 char *lladdr = NULL;
8479 unsigned int off = sizeof(*ip6h);
8480
8481 icmp6len = (u_int32_t)ntohs(ip6h->ip6_plen);
8482 if (icmp6len < sizeof(*icmp6)) {
8483 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8484 "short IPv6 payload length %d < %lu",
8485 icmp6len, sizeof(*icmp6));
8486 return;
8487 }
8488
8489 /* pullup IP6 header + ICMPv6 header */
8490 eh = get_ether_ipv6_header(data, plen: sizeof(*icmp6), TRUE);
8491 if (eh == NULL) {
8492 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8493 "failed to pullup icmp6 header");
8494 return;
8495 }
8496 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8497 icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8498 icmp6_type = icmp6->icmp6_type;
8499 switch (icmp6_type) {
8500 case ND_NEIGHBOR_SOLICIT:
8501 case ND_NEIGHBOR_ADVERT:
8502 case ND_ROUTER_ADVERT:
8503 case ND_ROUTER_SOLICIT:
8504 break;
8505 default:
8506 return;
8507 }
8508
8509 /* pullup IP6 header + payload */
8510 eh = get_ether_ipv6_header(data, plen: icmp6len, TRUE);
8511 if (eh == NULL) {
8512 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8513 "failed to pullup icmp6 + payload");
8514 return;
8515 }
8516 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8517 icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8518 switch (icmp6_type) {
8519 case ND_NEIGHBOR_SOLICIT: {
8520 struct nd_neighbor_solicit *nd_ns;
8521 union nd_opts ndopts;
8522 boolean_t is_dad_probe;
8523 struct in6_addr taddr;
8524
8525 if (icmp6len < sizeof(*nd_ns)) {
8526 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8527 "short nd_ns %d < %lu",
8528 icmp6len, sizeof(*nd_ns));
8529 return;
8530 }
8531
8532 nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8533 bcopy(src: &nd_ns->nd_ns_target, dst: &taddr, n: sizeof(taddr));
8534 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8535 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8536 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8537 "invalid target ignored");
8538 return;
8539 }
8540 /* parse options */
8541 nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8542 if (nd6_options(&ndopts) < 0) {
8543 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8544 "invalid ND6 NS option");
8545 return;
8546 }
8547 if (ndopts.nd_opts_src_lladdr != NULL) {
8548 lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8549 lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8550 }
8551 is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8552 if (lladdr != NULL) {
8553 if (is_dad_probe) {
8554 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8555 "bad ND6 DAD packet");
8556 return;
8557 }
8558 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8559 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8560 "source lladdrlen %d != %lu",
8561 lladdrlen, ETHER_ND_LLADDR_LEN);
8562 return;
8563 }
8564 }
8565 if (is_dad_probe) {
8566 /* node is trying use taddr, create an mne for taddr */
8567 *saddrp = taddr;
8568 }
8569 break;
8570 }
8571 case ND_NEIGHBOR_ADVERT: {
8572 struct nd_neighbor_advert *nd_na;
8573 union nd_opts ndopts;
8574 struct in6_addr taddr;
8575
8576
8577 nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8578
8579 if (icmp6len < sizeof(*nd_na)) {
8580 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8581 "short nd_na %d < %lu",
8582 icmp6len, sizeof(*nd_na));
8583 return;
8584 }
8585
8586 bcopy(src: &nd_na->nd_na_target, dst: &taddr, n: sizeof(taddr));
8587 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8588 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8589 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8590 "invalid target ignored");
8591 return;
8592 }
8593 /* parse options */
8594 nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8595 if (nd6_options(&ndopts) < 0) {
8596 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8597 "invalid ND6 NA option");
8598 return;
8599 }
8600 if (ndopts.nd_opts_tgt_lladdr == NULL) {
8601 /* target linklayer, nothing to do */
8602 return;
8603 }
8604 lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
8605 lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
8606 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8607 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8608 "target lladdrlen %d != %lu",
8609 lladdrlen, ETHER_ND_LLADDR_LEN);
8610 return;
8611 }
8612 break;
8613 }
8614 case ND_ROUTER_ADVERT:
8615 case ND_ROUTER_SOLICIT: {
8616 union nd_opts ndopts;
8617 uint32_t type_length;
8618 const char *description;
8619
8620 if (icmp6_type == ND_ROUTER_ADVERT) {
8621 type_length = sizeof(struct nd_router_advert);
8622 description = "RA";
8623 } else {
8624 type_length = sizeof(struct nd_router_solicit);
8625 description = "RS";
8626 }
8627 if (icmp6len < type_length) {
8628 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8629 "short ND6 %s %d < %d",
8630 description, icmp6len, type_length);
8631 return;
8632 }
8633 /* parse options */
8634 nd6_option_init(((uint8_t *)icmp6) + type_length,
8635 icmp6len - type_length, &ndopts);
8636 if (nd6_options(&ndopts) < 0) {
8637 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8638 "invalid ND6 %s option", description);
8639 return;
8640 }
8641 if (ndopts.nd_opts_src_lladdr != NULL) {
8642 lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8643 lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8644 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8645 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8646 "source lladdrlen %d != %lu",
8647 lladdrlen, ETHER_ND_LLADDR_LEN);
8648 return;
8649 }
8650 }
8651 break;
8652 }
8653 default:
8654 break;
8655 }
8656 if (lladdr != NULL) {
8657 mnr->mnr_ip6_lladdr_offset = (uint16_t)
8658 ((uintptr_t)lladdr - (uintptr_t)eh);
8659 mnr->mnr_ip6_icmp6_len = icmp6len;
8660 mnr->mnr_ip6_icmp6_type = icmp6_type;
8661 mnr->mnr_ip6_header_len = off;
8662 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8663 const char *str;
8664
8665 switch (mnr->mnr_ip6_icmp6_type) {
8666 case ND_ROUTER_ADVERT:
8667 str = "ROUTER ADVERT";
8668 break;
8669 case ND_ROUTER_SOLICIT:
8670 str = "ROUTER SOLICIT";
8671 break;
8672 case ND_NEIGHBOR_ADVERT:
8673 str = "NEIGHBOR ADVERT";
8674 break;
8675 case ND_NEIGHBOR_SOLICIT:
8676 str = "NEIGHBOR SOLICIT";
8677 break;
8678 default:
8679 str = "";
8680 break;
8681 }
8682 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8683 "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8684 sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8685 mnr->mnr_ip6_header_len,
8686 mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8687 }
8688 }
8689}
8690
8691static struct mac_nat_entry *
8692bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8693{
8694 struct in6_addr dst;
8695 struct ether_header *eh;
8696 struct ip6_hdr *ip6h;
8697 struct mac_nat_entry *mne = NULL;
8698
8699 eh = get_ether_ipv6_header(data, plen: 0, FALSE);
8700 if (eh == NULL) {
8701 goto done;
8702 }
8703 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8704 bcopy(src: &ip6h->ip6_dst, dst: &dst, n: sizeof(dst));
8705 /* XXX validate IPv6 address */
8706 if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8707 goto done;
8708 }
8709 mne = bridge_lookup_mac_nat_entry(sc, AF_INET6, ip: &dst);
8710
8711done:
8712 return mne;
8713}
8714
8715static boolean_t
8716bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8717 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8718{
8719 struct ether_header *eh;
8720 ether_addr_t ether_shost;
8721 struct ip6_hdr *ip6h;
8722 struct in6_addr saddr;
8723 boolean_t translate;
8724
8725 translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8726 eh = get_ether_ipv6_header(data, plen: 0, TRUE);
8727 if (eh == NULL) {
8728 translate = FALSE;
8729 goto done;
8730 }
8731 bcopy(src: eh->ether_shost, dst: &ether_shost, n: sizeof(ether_shost));
8732 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8733 bcopy(src: &ip6h->ip6_src, dst: &saddr, n: sizeof(saddr));
8734 if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8735 bridge_mac_nat_icmpv6_output(sc, bif, data, ip6h, saddrp: &saddr, mnr);
8736 }
8737 if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8738 goto done;
8739 }
8740 (void)bridge_update_mac_nat_entry(sc, bif, AF_INET6, ip: &saddr,
8741 eaddr: ether_shost.octet);
8742
8743done:
8744 return translate;
8745}
8746
8747/*
8748 * bridge_mac_nat_input:
8749 * Process a packet arriving on the MAC NAT interface (sc_mac_nat_bif).
8750 * This interface is the "external" interface with respect to NAT.
8751 * The interface is only capable of receiving a single MAC address
8752 * (e.g. a Wi-Fi STA interface).
8753 *
8754 * When a packet arrives on the external interface, look up the destination
8755 * IP address in the mac_nat_entry table. If there is a match, *is_input
8756 * is set to TRUE if it's for the MAC NAT interface, otherwise *is_input
8757 * is set to FALSE and translate the MAC address if necessary.
8758 *
8759 * Returns:
8760 * The internal interface to direct the packet to, or NULL if the packet
8761 * should not be redirected.
8762 *
8763 * *data may be updated to point at a different mbuf chain, or set to NULL
8764 * if the chain was deallocated during processing.
8765 */
8766static ifnet_t
8767bridge_mac_nat_input(struct bridge_softc *sc, mbuf_t *data,
8768 boolean_t *is_input)
8769{
8770 ifnet_t dst_if = NULL;
8771 struct ether_header *eh;
8772 uint16_t ether_type;
8773 boolean_t is_unicast;
8774 mbuf_t m = *data;
8775 struct mac_nat_entry *mne = NULL;
8776
8777 BRIDGE_LOCK_ASSERT_HELD(sc);
8778 *is_input = FALSE;
8779 assert(sc->sc_mac_nat_bif != NULL);
8780 is_unicast = ((m->m_flags & (M_BCAST | M_MCAST)) == 0);
8781 eh = mtod(m, struct ether_header *);
8782 ether_type = ntohs(eh->ether_type);
8783 switch (ether_type) {
8784 case ETHERTYPE_ARP:
8785 mne = bridge_mac_nat_arp_input(sc, data);
8786 break;
8787 case ETHERTYPE_IP:
8788 if (is_unicast) {
8789 mne = bridge_mac_nat_ip_input(sc, data);
8790 }
8791 break;
8792 case ETHERTYPE_IPV6:
8793 if (is_unicast) {
8794 mne = bridge_mac_nat_ipv6_input(sc, data);
8795 }
8796 break;
8797 default:
8798 break;
8799 }
8800 if (mne != NULL) {
8801 if (is_unicast) {
8802 if (m != *data) {
8803 /* it may have changed */
8804 eh = mtod(*data, struct ether_header *);
8805 }
8806 bcopy(src: mne->mne_mac, dst: eh->ether_dhost,
8807 n: sizeof(eh->ether_dhost));
8808 }
8809 dst_if = mne->mne_bif->bif_ifp;
8810 *is_input = (mne->mne_bif == sc->sc_mac_nat_bif);
8811 }
8812 return dst_if;
8813}
8814
8815/*
8816 * bridge_mac_nat_output:
8817 * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8818 * from the interface 'bif'.
8819 *
8820 * Create a mac_nat_entry containing the source IP address and MAC address
8821 * from the packet. Populate a mac_nat_record with information detailing
8822 * how to translate the packet. Translation takes place later when
8823 * the bridge lock is no longer held.
8824 *
8825 * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8826 * interface is generating an output packet. No translation is required in this
8827 * case, we just record the IP address used to prevent another bif from
8828 * claiming our IP address.
8829 *
8830 * Returns:
8831 * TRUE if the packet should be translated (*mnr updated as well),
8832 * FALSE otherwise.
8833 *
8834 * *data may be updated to point at a different mbuf chain or NULL if
8835 * the chain was deallocated during processing.
8836 */
8837
8838static boolean_t
8839bridge_mac_nat_output(struct bridge_softc *sc,
8840 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8841{
8842 struct ether_header *eh;
8843 uint16_t ether_type;
8844 boolean_t translate = FALSE;
8845
8846 BRIDGE_LOCK_ASSERT_HELD(sc);
8847 assert(sc->sc_mac_nat_bif != NULL);
8848
8849 eh = mtod(*data, struct ether_header *);
8850 ether_type = ntohs(eh->ether_type);
8851 if (mnr != NULL) {
8852 bzero(s: mnr, n: sizeof(*mnr));
8853 mnr->mnr_ether_type = ether_type;
8854 }
8855 switch (ether_type) {
8856 case ETHERTYPE_ARP:
8857 translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8858 break;
8859 case ETHERTYPE_IP:
8860 translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8861 break;
8862 case ETHERTYPE_IPV6:
8863 translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8864 break;
8865 default:
8866 break;
8867 }
8868 return translate;
8869}
8870
8871static void
8872bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8873 const caddr_t eaddr)
8874{
8875 errno_t error;
8876
8877 if (mnr->mnr_arp_offset == 0) {
8878 return;
8879 }
8880 /* replace the source hardware address */
8881 error = mbuf_copyback(mbuf: *data, offset: mnr->mnr_arp_offset,
8882 ETHER_ADDR_LEN, data: eaddr,
8883 how: MBUF_DONTWAIT);
8884 if (error != 0) {
8885 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8886 "mbuf_copyback failed");
8887 m_freem(*data);
8888 *data = NULL;
8889 }
8890 return;
8891}
8892
8893static void
8894bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8895{
8896 errno_t error;
8897 size_t offset;
8898
8899 if (mnr->mnr_ip_header_len == 0) {
8900 return;
8901 }
8902 /* update the UDP checksum */
8903 offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8904 error = mbuf_copyback(mbuf: *data, offset: offset + offsetof(struct udphdr, uh_sum),
8905 length: sizeof(mnr->mnr_ip_udp_csum),
8906 data: &mnr->mnr_ip_udp_csum,
8907 how: MBUF_DONTWAIT);
8908 if (error != 0) {
8909 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8910 "mbuf_copyback uh_sum failed");
8911 m_freem(*data);
8912 *data = NULL;
8913 }
8914 /* update the DHCP must broadcast flag */
8915 offset += sizeof(struct udphdr);
8916 error = mbuf_copyback(mbuf: *data, offset: offset + offsetof(struct dhcp, dp_flags),
8917 length: sizeof(mnr->mnr_ip_dhcp_flags),
8918 data: &mnr->mnr_ip_dhcp_flags,
8919 how: MBUF_DONTWAIT);
8920 if (error != 0) {
8921 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8922 "mbuf_copyback dp_flags failed");
8923 m_freem(*data);
8924 *data = NULL;
8925 }
8926}
8927
8928static void
8929bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8930 const caddr_t eaddr)
8931{
8932 uint16_t cksum;
8933 errno_t error;
8934 mbuf_t m = *data;
8935
8936 if (mnr->mnr_ip6_header_len == 0) {
8937 return;
8938 }
8939 switch (mnr->mnr_ip6_icmp6_type) {
8940 case ND_ROUTER_ADVERT:
8941 case ND_ROUTER_SOLICIT:
8942 case ND_NEIGHBOR_SOLICIT:
8943 case ND_NEIGHBOR_ADVERT:
8944 if (mnr->mnr_ip6_lladdr_offset == 0) {
8945 /* nothing to do */
8946 return;
8947 }
8948 break;
8949 default:
8950 return;
8951 }
8952
8953 /*
8954 * replace the lladdr
8955 */
8956 error = mbuf_copyback(mbuf: m, offset: mnr->mnr_ip6_lladdr_offset,
8957 ETHER_ADDR_LEN, data: eaddr,
8958 how: MBUF_DONTWAIT);
8959 if (error != 0) {
8960 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8961 "mbuf_copyback lladdr failed");
8962 m_freem(m);
8963 *data = NULL;
8964 return;
8965 }
8966
8967 /*
8968 * recompute the icmp6 checksum
8969 */
8970
8971 /* skip past the ethernet header */
8972 mbuf_setdata(mbuf: m, data: (char *)mbuf_data(mbuf: m) + ETHER_HDR_LEN,
8973 len: mbuf_len(mbuf: m) - ETHER_HDR_LEN);
8974 mbuf_pkthdr_adjustlen(mbuf: m, amount: -ETHER_HDR_LEN);
8975
8976#define CKSUM_OFFSET_ICMP6 offsetof(struct icmp6_hdr, icmp6_cksum)
8977 /* set the checksum to zero */
8978 cksum = 0;
8979 error = mbuf_copyback(mbuf: m, offset: mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8980 length: sizeof(cksum), data: &cksum, how: MBUF_DONTWAIT);
8981 if (error != 0) {
8982 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8983 "mbuf_copyback cksum=0 failed");
8984 m_freem(m);
8985 *data = NULL;
8986 return;
8987 }
8988 /* compute and set the new checksum */
8989 cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8990 mnr->mnr_ip6_icmp6_len);
8991 error = mbuf_copyback(mbuf: m, offset: mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8992 length: sizeof(cksum), data: &cksum, how: MBUF_DONTWAIT);
8993 if (error != 0) {
8994 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8995 "mbuf_copyback cksum failed");
8996 m_freem(m);
8997 *data = NULL;
8998 return;
8999 }
9000 /* restore the ethernet header */
9001 mbuf_setdata(mbuf: m, data: (char *)mbuf_data(mbuf: m) - ETHER_HDR_LEN,
9002 len: mbuf_len(mbuf: m) + ETHER_HDR_LEN);
9003 mbuf_pkthdr_adjustlen(mbuf: m, ETHER_HDR_LEN);
9004 return;
9005}
9006
9007static void
9008bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
9009 const caddr_t eaddr)
9010{
9011 struct ether_header *eh;
9012
9013 /* replace the source ethernet address with the single MAC */
9014 eh = mtod(*data, struct ether_header *);
9015 bcopy(src: eaddr, dst: eh->ether_shost, n: sizeof(eh->ether_shost));
9016 switch (mnr->mnr_ether_type) {
9017 case ETHERTYPE_ARP:
9018 bridge_mac_nat_arp_translate(data, mnr, eaddr);
9019 break;
9020
9021 case ETHERTYPE_IP:
9022 bridge_mac_nat_ip_translate(data, mnr);
9023 break;
9024
9025 case ETHERTYPE_IPV6:
9026 bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
9027 break;
9028
9029 default:
9030 break;
9031 }
9032 return;
9033}
9034
9035/*
9036 * bridge packet filtering
9037 */
9038
9039/*
9040 * Perform basic checks on header size since
9041 * pfil assumes ip_input has already processed
9042 * it for it. Cut-and-pasted from ip_input.c.
9043 * Given how simple the IPv6 version is,
9044 * does the IPv4 version really need to be
9045 * this complicated?
9046 *
9047 * XXX Should we update ipstat here, or not?
9048 * XXX Right now we update ipstat but not
9049 * XXX csum_counter.
9050 */
9051static int
9052bridge_ip_checkbasic(struct mbuf **mp)
9053{
9054 struct mbuf *m = *mp;
9055 struct ip *ip;
9056 int len, hlen;
9057 u_short sum;
9058
9059 if (*mp == NULL) {
9060 return -1;
9061 }
9062
9063 if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
9064 /* max_linkhdr is already rounded up to nearest 4-byte */
9065 if ((m = m_copyup(m, sizeof(struct ip),
9066 max_linkhdr)) == NULL) {
9067 /* XXXJRT new stat, please */
9068 ipstat.ips_toosmall++;
9069 goto bad;
9070 }
9071 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
9072 if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
9073 ipstat.ips_toosmall++;
9074 goto bad;
9075 }
9076 }
9077 ip = mtod(m, struct ip *);
9078 if (ip == NULL) {
9079 goto bad;
9080 }
9081
9082 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
9083 ipstat.ips_badvers++;
9084 goto bad;
9085 }
9086 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9087 if (hlen < (int)sizeof(struct ip)) { /* minimum header length */
9088 ipstat.ips_badhlen++;
9089 goto bad;
9090 }
9091 if (hlen > m->m_len) {
9092 if ((m = m_pullup(m, hlen)) == 0) {
9093 ipstat.ips_badhlen++;
9094 goto bad;
9095 }
9096 ip = mtod(m, struct ip *);
9097 if (ip == NULL) {
9098 goto bad;
9099 }
9100 }
9101
9102 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
9103 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
9104 } else {
9105 if (hlen == sizeof(struct ip)) {
9106 sum = in_cksum_hdr(ip);
9107 } else {
9108 sum = in_cksum(m, hlen);
9109 }
9110 }
9111 if (sum) {
9112 ipstat.ips_badsum++;
9113 goto bad;
9114 }
9115
9116 /* Retrieve the packet length. */
9117 len = ntohs(ip->ip_len);
9118
9119 /*
9120 * Check for additional length bogosity
9121 */
9122 if (len < hlen) {
9123 ipstat.ips_badlen++;
9124 goto bad;
9125 }
9126
9127 /*
9128 * Check that the amount of data in the buffers
9129 * is as at least much as the IP header would have us expect.
9130 * Drop packet if shorter than we expect.
9131 */
9132 if (m->m_pkthdr.len < len) {
9133 ipstat.ips_tooshort++;
9134 goto bad;
9135 }
9136
9137 /* Checks out, proceed */
9138 *mp = m;
9139 return 0;
9140
9141bad:
9142 *mp = m;
9143 return -1;
9144}
9145
9146/*
9147 * Same as above, but for IPv6.
9148 * Cut-and-pasted from ip6_input.c.
9149 * XXX Should we update ip6stat, or not?
9150 */
9151static int
9152bridge_ip6_checkbasic(struct mbuf **mp)
9153{
9154 struct mbuf *m = *mp;
9155 struct ip6_hdr *ip6;
9156
9157 /*
9158 * If the IPv6 header is not aligned, slurp it up into a new
9159 * mbuf with space for link headers, in the event we forward
9160 * it. Otherwise, if it is aligned, make sure the entire base
9161 * IPv6 header is in the first mbuf of the chain.
9162 */
9163 if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
9164 struct ifnet *inifp = m->m_pkthdr.rcvif;
9165 /* max_linkhdr is already rounded up to nearest 4-byte */
9166 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
9167 max_linkhdr)) == NULL) {
9168 /* XXXJRT new stat, please */
9169 ip6stat.ip6s_toosmall++;
9170 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9171 goto bad;
9172 }
9173 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
9174 struct ifnet *inifp = m->m_pkthdr.rcvif;
9175 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
9176 ip6stat.ip6s_toosmall++;
9177 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9178 goto bad;
9179 }
9180 }
9181
9182 ip6 = mtod(m, struct ip6_hdr *);
9183
9184 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
9185 ip6stat.ip6s_badvers++;
9186 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
9187 goto bad;
9188 }
9189
9190 /* Checks out, proceed */
9191 *mp = m;
9192 return 0;
9193
9194bad:
9195 *mp = m;
9196 return -1;
9197}
9198
9199/*
9200 * the PF routines expect to be called from ip_input, so we
9201 * need to do and undo here some of the same processing.
9202 *
9203 * XXX : this is heavily inspired on bridge_pfil()
9204 */
9205static int
9206bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
9207 int input)
9208{
9209 /*
9210 * XXX : mpetit : heavily inspired by bridge_pfil()
9211 */
9212
9213 int snap, error, i, hlen;
9214 struct ether_header *eh1, eh2;
9215 struct ip *ip;
9216 struct llc llc1;
9217 u_int16_t ether_type;
9218
9219 snap = 0;
9220 error = -1; /* Default error if not error == 0 */
9221
9222 if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
9223 return 0; /* filtering is disabled */
9224 }
9225 i = min(a: (*mp)->m_pkthdr.len, b: max_protohdr);
9226 if ((*mp)->m_len < i) {
9227 *mp = m_pullup(*mp, i);
9228 if (*mp == NULL) {
9229 BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9230 return -1;
9231 }
9232 }
9233
9234 eh1 = mtod(*mp, struct ether_header *);
9235 ether_type = ntohs(eh1->ether_type);
9236
9237 /*
9238 * Check for SNAP/LLC.
9239 */
9240 if (ether_type < ETHERMTU) {
9241 struct llc *llc2 = (struct llc *)(eh1 + 1);
9242
9243 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9244 llc2->llc_dsap == LLC_SNAP_LSAP &&
9245 llc2->llc_ssap == LLC_SNAP_LSAP &&
9246 llc2->llc_control == LLC_UI) {
9247 ether_type = htons(llc2->llc_un.type_snap.ether_type);
9248 snap = 1;
9249 }
9250 }
9251
9252 /*
9253 * If we're trying to filter bridge traffic, don't look at anything
9254 * other than IP and ARP traffic. If the filter doesn't understand
9255 * IPv6, don't allow IPv6 through the bridge either. This is lame
9256 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9257 * but of course we don't have an AppleTalk filter to begin with.
9258 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9259 * ARP traffic.)
9260 */
9261 switch (ether_type) {
9262 case ETHERTYPE_ARP:
9263 case ETHERTYPE_REVARP:
9264 return 0; /* Automatically pass */
9265
9266 case ETHERTYPE_IP:
9267 case ETHERTYPE_IPV6:
9268 break;
9269 default:
9270 /*
9271 * Check to see if the user wants to pass non-ip
9272 * packets, these will not be checked by pf and
9273 * passed unconditionally so the default is to drop.
9274 */
9275 if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9276 goto bad;
9277 }
9278 break;
9279 }
9280
9281 /* Strip off the Ethernet header and keep a copy. */
9282 m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9283 m_adj(*mp, ETHER_HDR_LEN);
9284
9285 /* Strip off snap header, if present */
9286 if (snap) {
9287 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9288 m_adj(*mp, sizeof(struct llc));
9289 }
9290
9291 /*
9292 * Check the IP header for alignment and errors
9293 */
9294 switch (ether_type) {
9295 case ETHERTYPE_IP:
9296 error = bridge_ip_checkbasic(mp);
9297 break;
9298 case ETHERTYPE_IPV6:
9299 error = bridge_ip6_checkbasic(mp);
9300 break;
9301 default:
9302 error = 0;
9303 break;
9304 }
9305 if (error) {
9306 goto bad;
9307 }
9308
9309 error = 0;
9310
9311 /*
9312 * Run the packet through pf rules
9313 */
9314 switch (ether_type) {
9315 case ETHERTYPE_IP:
9316 /*
9317 * before calling the firewall, swap fields the same as
9318 * IP does. here we assume the header is contiguous
9319 */
9320 ip = mtod(*mp, struct ip *);
9321
9322 ip->ip_len = ntohs(ip->ip_len);
9323 ip->ip_off = ntohs(ip->ip_off);
9324
9325 if (ifp != NULL) {
9326 error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9327 }
9328
9329 if (*mp == NULL || error != 0) { /* filter may consume */
9330 break;
9331 }
9332
9333 /* Recalculate the ip checksum and restore byte ordering */
9334 ip = mtod(*mp, struct ip *);
9335 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9336 if (hlen < (int)sizeof(struct ip)) {
9337 goto bad;
9338 }
9339 if (hlen > (*mp)->m_len) {
9340 if ((*mp = m_pullup(*mp, hlen)) == 0) {
9341 goto bad;
9342 }
9343 ip = mtod(*mp, struct ip *);
9344 if (ip == NULL) {
9345 goto bad;
9346 }
9347 }
9348 ip->ip_len = htons(ip->ip_len);
9349 ip->ip_off = htons(ip->ip_off);
9350 ip->ip_sum = 0;
9351 if (hlen == sizeof(struct ip)) {
9352 ip->ip_sum = in_cksum_hdr(ip);
9353 } else {
9354 ip->ip_sum = in_cksum(*mp, hlen);
9355 }
9356 break;
9357
9358 case ETHERTYPE_IPV6:
9359 if (ifp != NULL) {
9360 error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9361 }
9362
9363 if (*mp == NULL || error != 0) { /* filter may consume */
9364 break;
9365 }
9366 break;
9367 default:
9368 error = 0;
9369 break;
9370 }
9371
9372 if (*mp == NULL) {
9373 return error;
9374 }
9375 if (error != 0) {
9376 goto bad;
9377 }
9378
9379 error = -1;
9380
9381 /*
9382 * Finally, put everything back the way it was and return
9383 */
9384 if (snap) {
9385 M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9386 if (*mp == NULL) {
9387 return error;
9388 }
9389 bcopy(src: &llc1, mtod(*mp, caddr_t), n: sizeof(struct llc));
9390 }
9391
9392 M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9393 if (*mp == NULL) {
9394 return error;
9395 }
9396 bcopy(src: &eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9397
9398 return 0;
9399
9400bad:
9401 m_freem(*mp);
9402 *mp = NULL;
9403 return error;
9404}
9405
9406/*
9407 * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
9408 * All rights reserved.
9409 *
9410 * Redistribution and use in source and binary forms, with or without
9411 * modification, are permitted provided that the following conditions
9412 * are met:
9413 * 1. Redistributions of source code must retain the above copyright
9414 * notice, this list of conditions and the following disclaimer.
9415 * 2. Redistributions in binary form must reproduce the above copyright
9416 * notice, this list of conditions and the following disclaimer in the
9417 * documentation and/or other materials provided with the distribution.
9418 *
9419 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9420 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9421 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
9422 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
9423 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9424 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
9425 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
9426 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
9427 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
9428 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
9429 * SUCH DAMAGE.
9430 */
9431
9432/*
9433 * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
9434 *
9435 * Create a queue of packets/segments which fit the given mss + hdr_len.
9436 * m0 points to mbuf chain to be segmented.
9437 * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
9438 * into segments of length MSS bytes and then copy the first hdr_len bytes
9439 * from m0 at the top of each segment.
9440 * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
9441 * in each segment after the first hdr_len bytes
9442 *
9443 * Return the new queue with the segments on success, NULL on failure.
9444 * (the mbuf queue is freed in this case).
9445 * nsegs contains the number of segments generated.
9446 */
9447
9448static struct mbuf *
9449m_seg(struct mbuf *m0, int hdr_len, int mss, int *nsegs,
9450 char * hdr2_buf, int hdr2_len)
9451{
9452 int off = 0, n, firstlen;
9453 struct mbuf **mnext, *mseg;
9454 int total_len = m0->m_pkthdr.len;
9455
9456 /*
9457 * Segmentation useless
9458 */
9459 if (total_len <= hdr_len + mss) {
9460 return m0;
9461 }
9462
9463 if (hdr2_buf == NULL || hdr2_len <= 0) {
9464 hdr2_buf = NULL;
9465 hdr2_len = 0;
9466 }
9467
9468 off = hdr_len + mss;
9469 firstlen = mss; /* first segment stored in the original mbuf */
9470
9471 mnext = &(m0->m_nextpkt); /* pointer to next packet */
9472
9473 for (n = 1; off < total_len; off += mss, n++) {
9474 struct mbuf *m;
9475 /*
9476 * Copy the header from the original packet
9477 * and create a new mbuf chain
9478 */
9479 if (MHLEN < hdr_len) {
9480 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
9481 } else {
9482 m = m_gethdr(M_NOWAIT, MT_DATA);
9483 }
9484
9485 if (m == NULL) {
9486#ifdef GSO_DEBUG
9487 D("MGETHDR error\n");
9488#endif
9489 goto err;
9490 }
9491
9492 m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
9493
9494 m->m_len = hdr_len;
9495 /*
9496 * if the optional header is present, copy it
9497 */
9498 if (hdr2_buf != NULL) {
9499 m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
9500 }
9501
9502 m->m_flags |= (m0->m_flags & M_COPYFLAGS);
9503 if (off + mss >= total_len) { /* last segment */
9504 mss = total_len - off;
9505 }
9506 /*
9507 * Copy the payload from original packet
9508 */
9509 mseg = m_copym(m0, off, mss, M_NOWAIT);
9510 if (mseg == NULL) {
9511 m_freem(m);
9512#ifdef GSO_DEBUG
9513 D("m_copym error\n");
9514#endif
9515 goto err;
9516 }
9517 m_cat(m, mseg);
9518
9519 m->m_pkthdr.len = hdr_len + hdr2_len + mss;
9520 m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
9521 /*
9522 * Copy the checksum flags and data (in_cksum() need this)
9523 */
9524 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
9525 m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
9526 m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
9527
9528 *mnext = m;
9529 mnext = &(m->m_nextpkt);
9530 }
9531
9532 /*
9533 * Update first segment.
9534 * If the optional header is present, is necessary
9535 * to insert it into the first segment.
9536 */
9537 if (hdr2_buf == NULL) {
9538 m_adj(m0, hdr_len + firstlen - total_len);
9539 m0->m_pkthdr.len = hdr_len + firstlen;
9540 } else {
9541 mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
9542 if (mseg == NULL) {
9543#ifdef GSO_DEBUG
9544 D("m_copym error\n");
9545#endif
9546 goto err;
9547 }
9548 m_adj(m0, hdr_len - total_len);
9549 m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
9550 m_cat(m0, mseg);
9551 m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
9552 }
9553
9554 if (nsegs != NULL) {
9555 *nsegs = n;
9556 }
9557 return m0;
9558err:
9559 while (m0 != NULL) {
9560 mseg = m0->m_nextpkt;
9561 m0->m_nextpkt = NULL;
9562 m_freem(m0);
9563 m0 = mseg;
9564 }
9565 return NULL;
9566}
9567
9568/*
9569 * Wrappers of IPv4 checksum functions
9570 */
9571static inline void
9572gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
9573{
9574 m->m_data += mac_hlen;
9575 m->m_len -= mac_hlen;
9576 m->m_pkthdr.len -= mac_hlen;
9577#if __FreeBSD_version < 1000000
9578 ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
9579#endif
9580
9581 in_delayed_cksum(m);
9582
9583#if __FreeBSD_version < 1000000
9584 ip->ip_len = htons(ip->ip_len);
9585#endif
9586 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
9587 m->m_len += mac_hlen;
9588 m->m_pkthdr.len += mac_hlen;
9589 m->m_data -= mac_hlen;
9590}
9591
9592static inline void
9593gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
9594{
9595 m->m_data += mac_hlen;
9596
9597 ip->ip_sum = in_cksum(m, ip_hlen);
9598
9599 m->m_pkthdr.csum_flags &= ~CSUM_IP;
9600 m->m_data -= mac_hlen;
9601}
9602
9603/*
9604 * Structure that contains the state during the TCP segmentation
9605 */
9606struct gso_ip_tcp_state {
9607 void (*update)
9608 (struct gso_ip_tcp_state*, struct mbuf*);
9609 void (*internal)
9610 (struct gso_ip_tcp_state*, struct mbuf*);
9611 union iphdr hdr;
9612 struct tcphdr *tcp;
9613 int mac_hlen;
9614 int ip_hlen;
9615 int tcp_hlen;
9616 int hlen;
9617 int pay_len;
9618 int sw_csum;
9619 uint32_t tcp_seq;
9620 uint16_t ip_id;
9621 boolean_t is_tx;
9622};
9623
9624/*
9625 * Update the pointers to TCP and IPv4 headers
9626 */
9627static inline void
9628gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9629{
9630 state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) + state->mac_hlen);
9631 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip) + state->ip_hlen);
9632 state->pay_len = m->m_pkthdr.len - state->hlen;
9633}
9634
9635/*
9636 * Set properly the TCP and IPv4 headers
9637 */
9638static inline void
9639gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9640{
9641 /*
9642 * Update IP header
9643 */
9644 state->hdr.ip->ip_id = htons((state->ip_id)++);
9645 state->hdr.ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
9646 /*
9647 * TCP Checksum
9648 */
9649 state->tcp->th_sum = 0;
9650 state->tcp->th_sum = in_pseudo(state->hdr.ip->ip_src.s_addr,
9651 state->hdr.ip->ip_dst.s_addr,
9652 htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
9653 /*
9654 * Checksum HW not supported (TCP)
9655 */
9656 if (state->sw_csum & CSUM_DELAY_DATA) {
9657 gso_ipv4_data_cksum(m, ip: state->hdr.ip, mac_hlen: state->mac_hlen);
9658 }
9659
9660 state->tcp_seq += state->pay_len;
9661 /*
9662 * IP Checksum
9663 */
9664 state->hdr.ip->ip_sum = 0;
9665 /*
9666 * Checksum HW not supported (IP)
9667 */
9668 if (state->sw_csum & CSUM_IP) {
9669 gso_ipv4_hdr_cksum(m, ip: state->hdr.ip, mac_hlen: state->mac_hlen, ip_hlen: state->ip_hlen);
9670 }
9671}
9672
9673
9674/*
9675 * Updates the pointers to TCP and IPv6 headers
9676 */
9677static inline void
9678gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9679{
9680 state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + state->mac_hlen);
9681 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip6) + state->ip_hlen);
9682 state->pay_len = m->m_pkthdr.len - state->hlen;
9683}
9684
9685/*
9686 * Sets properly the TCP and IPv6 headers
9687 */
9688static inline void
9689gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9690{
9691 state->hdr.ip6->ip6_plen = htons(m->m_pkthdr.len -
9692 state->mac_hlen - state->ip_hlen);
9693 /*
9694 * TCP Checksum
9695 */
9696 state->tcp->th_sum = 0;
9697 state->tcp->th_sum = in6_pseudo(&state->hdr.ip6->ip6_src,
9698 &state->hdr.ip6->ip6_dst,
9699 htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
9700 /*
9701 * Checksum HW not supported (TCP)
9702 */
9703 if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
9704 (void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
9705 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
9706 }
9707 state->tcp_seq += state->pay_len;
9708}
9709
9710/*
9711 * Init the state during the TCP segmentation
9712 */
9713static void
9714gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
9715 bool is_ipv4, int mac_hlen, int ip_hlen,
9716 void * ip_hdr, struct tcphdr * tcp_hdr)
9717{
9718#pragma unused(ifp)
9719
9720 state->hdr.ptr = ip_hdr;
9721 state->tcp = tcp_hdr;
9722 if (is_ipv4) {
9723 state->ip_id = ntohs(state->hdr.ip->ip_id);
9724 state->update = gso_ipv4_tcp_update;
9725 state->internal = gso_ipv4_tcp_internal;
9726 state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
9727 } else {
9728 state->update = gso_ipv6_tcp_update;
9729 state->internal = gso_ipv6_tcp_internal;
9730 state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
9731 }
9732 state->mac_hlen = mac_hlen;
9733 state->ip_hlen = ip_hlen;
9734 state->tcp_hlen = state->tcp->th_off << 2;
9735 state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
9736 state->tcp_seq = ntohl(state->tcp->th_seq);
9737 //state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
9738 return;
9739}
9740
9741/*
9742 * GSO on TCP/IP (v4 or v6)
9743 *
9744 * If is_tx is TRUE, segmented packets are transmitted after they are
9745 * segmented.
9746 *
9747 * If is_tx is FALSE, the segmented packets are returned as a chain in *mp.
9748 */
9749static int
9750gso_ip_tcp(struct ifnet *ifp, struct mbuf **mp, struct gso_ip_tcp_state *state,
9751 boolean_t is_tx)
9752{
9753 struct mbuf *m, *m_tx;
9754 int error = 0;
9755 int mss = 0;
9756 int nsegs = 0;
9757 struct mbuf *m0 = *mp;
9758#ifdef GSO_STATS
9759 int total_len = m0->m_pkthdr.len;
9760#endif /* GSO_STATS */
9761
9762#if 1
9763 u_int reduce_mss;
9764
9765 reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
9766 : if_bridge_tso_reduce_mss_forwarding;
9767 mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen - reduce_mss;
9768 assert(mss > 0);
9769#else
9770 if (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) {/* TSO with GSO */
9771 mss = ifp->if_hw_tsomax - state->ip_hlen - state->tcp_hlen;
9772 } else {
9773 mss = m0->m_pkthdr.tso_segsz;
9774 }
9775#endif
9776
9777 *mp = m0 = m_seg(m0, hdr_len: state->hlen, mss, nsegs: &nsegs, hdr2_buf: 0, hdr2_len: 0);
9778 if (m0 == NULL) {
9779 return ENOBUFS; /* XXX ok? */
9780 }
9781 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
9782 "%s %s mss %d nsegs %d",
9783 ifp->if_xname,
9784 is_tx ? "TX" : "RX",
9785 mss, nsegs);
9786 /*
9787 * XXX-ste: can this happen?
9788 */
9789 if (m0->m_nextpkt == NULL) {
9790#ifdef GSO_DEBUG
9791 D("only 1 segment");
9792#endif
9793 if (is_tx) {
9794 error = bridge_transmit(ifp, m: m0);
9795 }
9796 return error;
9797 }
9798#ifdef GSO_STATS
9799 GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
9800 GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
9801 GSOSTAT_ADD(tcp.gsos_osegments, nsegs);
9802#endif /* GSO_STATS */
9803
9804 /* first pkt */
9805 m = m0;
9806
9807 state->update(state, m);
9808
9809 do {
9810 state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
9811
9812 state->internal(state, m);
9813 m_tx = m;
9814 m = m->m_nextpkt;
9815 if (is_tx) {
9816 m_tx->m_nextpkt = NULL;
9817 if ((error = bridge_transmit(ifp, m: m_tx)) != 0) {
9818 /*
9819 * XXX: If a segment can not be sent, discard the following
9820 * segments and propagate the error to the upper levels.
9821 * In this way the TCP retransmits all the initial packet.
9822 */
9823#ifdef GSO_DEBUG
9824 D("if_transmit error\n");
9825#endif
9826 goto err;
9827 }
9828 }
9829 state->update(state, m);
9830
9831 state->tcp->th_flags &= ~TH_CWR;
9832 state->tcp->th_seq = htonl(state->tcp_seq);
9833 } while (m->m_nextpkt);
9834
9835 /* last pkt */
9836 state->internal(state, m);
9837
9838 if (is_tx) {
9839 error = bridge_transmit(ifp, m);
9840#ifdef GSO_DEBUG
9841 if (error) {
9842 D("last if_transmit error\n");
9843 D("error - type = %d \n", error);
9844 }
9845#endif
9846 }
9847#ifdef GSO_STATS
9848 if (!error) {
9849 GSOSTAT_INC(tcp.gsos_segmented);
9850 GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
9851 GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
9852 GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
9853 }
9854#endif /* GSO_STATS */
9855 return error;
9856
9857err:
9858#ifdef GSO_DEBUG
9859 D("error - type = %d \n", error);
9860#endif
9861 while (m != NULL) {
9862 m_tx = m->m_nextpkt;
9863 m->m_nextpkt = NULL;
9864 m_freem(m);
9865 m = m_tx;
9866 }
9867 return error;
9868}
9869
9870/*
9871 * GSO for TCP/IPv[46]
9872 */
9873static int
9874gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
9875 boolean_t is_tx)
9876{
9877 int error;
9878 ip_packet_info info;
9879 uint32_t csum_flags;
9880 struct gso_ip_tcp_state state;
9881 struct bripstats stats; /* XXX ignored */
9882 struct tcphdr *tcp;
9883
9884 if (!is_tx && ipforwarding == 0) {
9885 /* no need to segment if the packet will not be forwarded */
9886 return 0;
9887 }
9888 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4, info_p: &info, stats_p: &stats);
9889 if (error != 0) {
9890 if (*mp != NULL) {
9891 m_freem(*mp);
9892 *mp = NULL;
9893 }
9894 return error;
9895 }
9896 if (info.ip_proto_hdr == NULL) {
9897 /* not a TCP packet */
9898 return 0;
9899 }
9900 tcp = (struct tcphdr *)(void *)info.ip_proto_hdr;
9901 gso_ip_tcp_init_state(state: &state, ifp, is_ipv4, mac_hlen,
9902 ip_hlen: info.ip_hlen + info.ip_opt_len, ip_hdr: info.ip_hdr.ptr, tcp_hdr: tcp);
9903 if (is_ipv4) {
9904 csum_flags = CSUM_DELAY_DATA; /* XXX */
9905 if (!is_tx) {
9906 /* if RX to our local IP address, don't segment */
9907 struct in_addr dst_ip;
9908
9909 bcopy(src: &state.hdr.ip->ip_dst, dst: &dst_ip, n: sizeof(dst_ip));
9910 if (in_addr_is_ours(ip: dst_ip)) {
9911 return 0;
9912 }
9913 }
9914 } else {
9915 csum_flags = CSUM_DELAY_IPV6_DATA; /* XXX */
9916 if (!is_tx) {
9917 /* if RX to our local IP address, don't segment */
9918 if (in6_addr_is_ours(ip6_p: &state.hdr.ip6->ip6_dst,
9919 ifscope: ifp->if_index)) {
9920 /* local IP address, no need to segment */
9921 return 0;
9922 }
9923 }
9924 }
9925 (*mp)->m_pkthdr.csum_flags = csum_flags;
9926 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
9927 return gso_ip_tcp(ifp, mp, state: &state, is_tx);
9928}
9929