1/*
2 * Copyright (c) 2004-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 * if_bond.c
31 * - bond/failover interface
32 * - implements IEEE 802.3ad Link Aggregation
33 */
34
35/*
36 * Modification History:
37 *
38 * April 29, 2004 Dieter Siegmund (dieter@apple.com)
39 * - created
40 */
41
42#include <sys/param.h>
43#include <sys/kernel.h>
44#include <sys/malloc.h>
45#include <sys/mbuf.h>
46#include <sys/queue.h>
47#include <sys/socket.h>
48#include <sys/sockio.h>
49#include <sys/sysctl.h>
50#include <sys/systm.h>
51#include <sys/kern_event.h>
52#include <net/bpf.h>
53#include <net/ethernet.h>
54#include <net/if.h>
55#include <net/kpi_interface.h>
56#include <net/kpi_interfacefilter.h>
57#include <net/if_arp.h>
58#include <net/if_dl.h>
59#include <net/if_ether.h>
60#include <net/if_types.h>
61#include <net/if_bond_var.h>
62#include <net/ieee8023ad.h>
63#include <net/lacp.h>
64#include <net/dlil.h>
65#include <sys/time.h>
66#include <net/devtimer.h>
67#include <net/if_vlan_var.h>
68#include <net/kpi_protocol.h>
69#include <sys/protosw.h>
70#include <kern/locks.h>
71#include <kern/zalloc.h>
72#include <os/refcnt.h>
73
74#include <netinet/in.h>
75#include <netinet/if_ether.h>
76#include <netinet/in_systm.h>
77#include <netinet/ip.h>
78#include <netinet/ip6.h>
79
80#include <net/if_media.h>
81#include <net/multicast_list.h>
82
83#include <net/sockaddr_utils.h>
84
85SYSCTL_DECL(_net_link);
86SYSCTL_NODE(_net_link, OID_AUTO, bond, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
87 "Bond interface");
88
89static int if_bond_debug = 0;
90SYSCTL_INT(_net_link_bond, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
91 &if_bond_debug, 0, "Bond interface debug logs");
92
93static struct ether_addr slow_proto_multicast = {
94 .octet = IEEE8023AD_SLOW_PROTO_MULTICAST
95};
96
97typedef struct ifbond_s ifbond, * ifbond_ref;
98typedef struct bondport_s bondport, * bondport_ref;
99
100#define BOND_MAXUNIT 128
101#define BOND_ZONE_MAX_ELEM MIN(IFNETS_MAX, BOND_MAXUNIT)
102#define BONDNAME "bond"
103
104#define EA_FORMAT "%x:%x:%x:%x:%x:%x"
105#define EA_CH(e, i) ((u_char)((u_char *)(e))[(i)])
106#define EA_LIST(ea) EA_CH(ea,0),EA_CH(ea,1),EA_CH(ea,2),EA_CH(ea,3),EA_CH(ea,4),EA_CH(ea,5)
107
108#define timestamp_printf printf
109
110/**
111** bond locks
112**/
113
114static LCK_GRP_DECLARE(bond_lck_grp, "if_bond");
115static LCK_MTX_DECLARE(bond_lck_mtx, &bond_lck_grp);
116
117static __inline__ void
118bond_assert_lock_held(void)
119{
120 LCK_MTX_ASSERT(&bond_lck_mtx, LCK_MTX_ASSERT_OWNED);
121}
122
123static __inline__ void
124bond_assert_lock_not_held(void)
125{
126 LCK_MTX_ASSERT(&bond_lck_mtx, LCK_MTX_ASSERT_NOTOWNED);
127}
128
129static __inline__ void
130bond_lock(void)
131{
132 lck_mtx_lock(lck: &bond_lck_mtx);
133}
134
135static __inline__ void
136bond_unlock(void)
137{
138 lck_mtx_unlock(lck: &bond_lck_mtx);
139}
140
141/**
142** bond structures, types
143**/
144
145struct LAG_info_s {
146 lacp_system li_system;
147 lacp_system_priority li_system_priority;
148 lacp_key li_key;
149};
150typedef struct LAG_info_s LAG_info, * LAG_info_ref;
151
152struct bondport_s;
153TAILQ_HEAD(port_list, bondport_s);
154struct ifbond_s;
155TAILQ_HEAD(ifbond_list, ifbond_s);
156struct LAG_s;
157TAILQ_HEAD(lag_list, LAG_s);
158
159typedef struct ifbond_s ifbond, * ifbond_ref;
160typedef struct bondport_s bondport, * bondport_ref;
161
162struct LAG_s {
163 TAILQ_ENTRY(LAG_s) lag_list;
164 struct port_list lag_port_list;
165 short lag_port_count;
166 short lag_selected_port_count;
167 int lag_active_media;
168 LAG_info lag_info;
169};
170typedef struct LAG_s LAG, * LAG_ref;
171
172typedef struct partner_state_s {
173 LAG_info ps_lag_info;
174 lacp_port ps_port;
175 lacp_port_priority ps_port_priority;
176 lacp_actor_partner_state ps_state;
177} partner_state, * partner_state_ref;
178
179struct ifbond_s {
180 TAILQ_ENTRY(ifbond_s) ifb_bond_list;
181 int ifb_flags;
182 struct os_refcnt ifb_retain_count;
183 char ifb_name[IFNAMSIZ];
184 struct ifnet * ifb_ifp;
185 bpf_packet_func ifb_bpf_input;
186 bpf_packet_func ifb_bpf_output;
187 int ifb_altmtu;
188 struct port_list ifb_port_list;
189 short ifb_port_count;
190 struct lag_list ifb_lag_list;
191 lacp_key ifb_key;
192 short ifb_max_active;/* 0 == unlimited */
193 LAG_ref ifb_active_lag;
194 struct ifmultiaddr * ifb_ifma_slow_proto;
195 bondport_ref * ifb_distributing_array;
196 int ifb_distributing_count;
197 int ifb_distributing_max;
198 int ifb_last_link_event;
199 int ifb_mode;/* LACP, STATIC */
200};
201
202struct media_info {
203 int mi_active;
204 int mi_status;
205};
206
207enum {
208 ReceiveState_none = 0,
209 ReceiveState_INITIALIZE = 1,
210 ReceiveState_PORT_DISABLED = 2,
211 ReceiveState_EXPIRED = 3,
212 ReceiveState_LACP_DISABLED = 4,
213 ReceiveState_DEFAULTED = 5,
214 ReceiveState_CURRENT = 6,
215};
216
217typedef u_char ReceiveState;
218
219enum {
220 SelectedState_UNSELECTED = IF_BOND_STATUS_SELECTED_STATE_UNSELECTED,
221 SelectedState_SELECTED = IF_BOND_STATUS_SELECTED_STATE_SELECTED,
222 SelectedState_STANDBY = IF_BOND_STATUS_SELECTED_STATE_STANDBY
223};
224typedef u_char SelectedState;
225
226static __inline__ const char *
227SelectedStateString(SelectedState s)
228{
229 static const char * names[] = { "UNSELECTED", "SELECTED", "STANDBY" };
230
231 if (s <= SelectedState_STANDBY) {
232 return names[s];
233 }
234 return "<unknown>";
235}
236
237enum {
238 MuxState_none = 0,
239 MuxState_DETACHED = 1,
240 MuxState_WAITING = 2,
241 MuxState_ATTACHED = 3,
242 MuxState_COLLECTING_DISTRIBUTING = 4,
243};
244
245typedef u_char MuxState;
246
247#define PORT_CONTROL_FLAGS_IN_LIST 0x01
248#define PORT_CONTROL_FLAGS_PROTO_ATTACHED 0x02
249#define PORT_CONTROL_FLAGS_FILTER_ATTACHED 0x04
250#define PORT_CONTROL_FLAGS_LLADDR_SET 0x08
251#define PORT_CONTROL_FLAGS_MTU_SET 0x10
252#define PORT_CONTROL_FLAGS_PROMISCUOUS_SET 0x20
253#define PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET 0x40
254
255
256static inline bool
257uint32_bit_is_set(uint32_t flags, uint32_t flags_to_test)
258{
259 return (flags & flags_to_test) != 0;
260}
261
262static inline void
263uint32_bit_set(uint32_t * flags_p, uint32_t flags_to_set)
264{
265 *flags_p |= flags_to_set;
266}
267
268static inline void
269uint32_bit_clear(uint32_t * flags_p, uint32_t flags_to_clear)
270{
271 *flags_p &= ~flags_to_clear;
272}
273
274struct bondport_s {
275 TAILQ_ENTRY(bondport_s) po_port_list;
276 ifbond_ref po_bond;
277 struct multicast_list po_multicast;
278 struct ifnet * po_ifp;
279 struct ether_addr po_saved_addr;
280 int po_enabled;
281 char po_name[IFNAMSIZ];
282 struct ifdevmtu po_devmtu;
283 uint32_t po_control_flags;
284 interface_filter_t po_filter;
285
286 /* LACP */
287 TAILQ_ENTRY(bondport_s) po_lag_port_list;
288 devtimer_ref po_current_while_timer;
289 devtimer_ref po_periodic_timer;
290 devtimer_ref po_wait_while_timer;
291 devtimer_ref po_transmit_timer;
292 partner_state po_partner_state;
293 lacp_port_priority po_priority;
294 lacp_actor_partner_state po_actor_state;
295 u_char po_flags;
296 u_char po_periodic_interval;
297 u_char po_n_transmit;
298 ReceiveState po_receive_state;
299 MuxState po_mux_state;
300 SelectedState po_selected;
301 int32_t po_last_transmit_secs;
302 struct media_info po_media_info;
303 uint64_t po_force_link_event_time;
304 LAG_ref po_lag;
305};
306
307#define IFBF_PROMISC 0x1 /* promiscuous mode */
308#define IFBF_IF_DETACHING 0x2 /* interface is detaching */
309#define IFBF_LLADDR 0x4 /* specific link address requested */
310#define IFBF_CHANGE_IN_PROGRESS 0x8 /* interface add/remove in progress */
311
312static int bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p,
313 user_addr_t datap);
314
315static __inline__ bool
316ifbond_flags_if_detaching(ifbond_ref ifb)
317{
318 return (ifb->ifb_flags & IFBF_IF_DETACHING) != 0;
319}
320
321static __inline__ void
322ifbond_flags_set_if_detaching(ifbond_ref ifb)
323{
324 ifb->ifb_flags |= IFBF_IF_DETACHING;
325 return;
326}
327
328static __inline__ bool
329ifbond_flags_lladdr(ifbond_ref ifb)
330{
331 return (ifb->ifb_flags & IFBF_LLADDR) != 0;
332}
333
334static __inline__ bool
335ifbond_flags_change_in_progress(ifbond_ref ifb)
336{
337 return (ifb->ifb_flags & IFBF_CHANGE_IN_PROGRESS) != 0;
338}
339
340static __inline__ void
341ifbond_flags_set_change_in_progress(ifbond_ref ifb)
342{
343 ifb->ifb_flags |= IFBF_CHANGE_IN_PROGRESS;
344 return;
345}
346
347static __inline__ void
348ifbond_flags_clear_change_in_progress(ifbond_ref ifb)
349{
350 ifb->ifb_flags &= ~IFBF_CHANGE_IN_PROGRESS;
351 return;
352}
353
354static __inline__ bool
355ifbond_flags_promisc(ifbond_ref ifb)
356{
357 return (ifb->ifb_flags & IFBF_PROMISC) != 0;
358}
359
360static __inline__ void
361ifbond_flags_set_promisc(ifbond_ref ifb)
362{
363 ifb->ifb_flags |= IFBF_PROMISC;
364 return;
365}
366
367static __inline__ void
368ifbond_flags_clear_promisc(ifbond_ref ifb)
369{
370 ifb->ifb_flags &= ~IFBF_PROMISC;
371 return;
372}
373
374/*
375 * bondport_ref->po_flags bits
376 */
377#define BONDPORT_FLAGS_NTT 0x01
378#define BONDPORT_FLAGS_READY 0x02
379#define BONDPORT_FLAGS_SELECTED_CHANGED 0x04
380#define BONDPORT_FLAGS_MUX_ATTACHED 0x08
381#define BONDPORT_FLAGS_DISTRIBUTING 0x10
382#define BONDPORT_FLAGS_UNUSED2 0x20
383#define BONDPORT_FLAGS_UNUSED3 0x40
384#define BONDPORT_FLAGS_UNUSED4 0x80
385
386static __inline__ void
387bondport_flags_set_ntt(bondport_ref p)
388{
389 p->po_flags |= BONDPORT_FLAGS_NTT;
390 return;
391}
392
393static __inline__ void
394bondport_flags_clear_ntt(bondport_ref p)
395{
396 p->po_flags &= ~BONDPORT_FLAGS_NTT;
397 return;
398}
399
400static __inline__ int
401bondport_flags_ntt(bondport_ref p)
402{
403 return (p->po_flags & BONDPORT_FLAGS_NTT) != 0;
404}
405
406static __inline__ void
407bondport_flags_set_ready(bondport_ref p)
408{
409 p->po_flags |= BONDPORT_FLAGS_READY;
410 return;
411}
412
413static __inline__ void
414bondport_flags_clear_ready(bondport_ref p)
415{
416 p->po_flags &= ~BONDPORT_FLAGS_READY;
417 return;
418}
419
420static __inline__ int
421bondport_flags_ready(bondport_ref p)
422{
423 return (p->po_flags & BONDPORT_FLAGS_READY) != 0;
424}
425
426static __inline__ void
427bondport_flags_set_selected_changed(bondport_ref p)
428{
429 p->po_flags |= BONDPORT_FLAGS_SELECTED_CHANGED;
430 return;
431}
432
433static __inline__ void
434bondport_flags_clear_selected_changed(bondport_ref p)
435{
436 p->po_flags &= ~BONDPORT_FLAGS_SELECTED_CHANGED;
437 return;
438}
439
440static __inline__ int
441bondport_flags_selected_changed(bondport_ref p)
442{
443 return (p->po_flags & BONDPORT_FLAGS_SELECTED_CHANGED) != 0;
444}
445
446static __inline__ void
447bondport_flags_set_mux_attached(bondport_ref p)
448{
449 p->po_flags |= BONDPORT_FLAGS_MUX_ATTACHED;
450 return;
451}
452
453static __inline__ void
454bondport_flags_clear_mux_attached(bondport_ref p)
455{
456 p->po_flags &= ~BONDPORT_FLAGS_MUX_ATTACHED;
457 return;
458}
459
460static __inline__ int
461bondport_flags_mux_attached(bondport_ref p)
462{
463 return (p->po_flags & BONDPORT_FLAGS_MUX_ATTACHED) != 0;
464}
465
466static __inline__ void
467bondport_flags_set_distributing(bondport_ref p)
468{
469 p->po_flags |= BONDPORT_FLAGS_DISTRIBUTING;
470 return;
471}
472
473static __inline__ void
474bondport_flags_clear_distributing(bondport_ref p)
475{
476 p->po_flags &= ~BONDPORT_FLAGS_DISTRIBUTING;
477 return;
478}
479
480static __inline__ int
481bondport_flags_distributing(bondport_ref p)
482{
483 return (p->po_flags & BONDPORT_FLAGS_DISTRIBUTING) != 0;
484}
485
486typedef struct bond_globals_s {
487 struct ifbond_list ifbond_list;
488 lacp_system system;
489 lacp_system_priority system_priority;
490} * bond_globals_ref;
491
492static bond_globals_ref g_bond;
493
494/**
495** packet_buffer routines
496** - thin wrapper for mbuf
497**/
498
499typedef struct mbuf * packet_buffer_ref;
500
501static packet_buffer_ref
502packet_buffer_allocate(int length)
503{
504 packet_buffer_ref m;
505 int size;
506
507 /* leave room for ethernet header */
508 size = length + sizeof(struct ether_header);
509 if (size > (int)MHLEN) {
510 if (size > (int)MCLBYTES) {
511 printf("bond: packet_buffer_allocate size %d > max %u\n",
512 size, MCLBYTES);
513 return NULL;
514 }
515 m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
516 } else {
517 m = m_gethdr(M_WAITOK, MT_DATA);
518 }
519 if (m == NULL) {
520 return NULL;
521 }
522 m->m_len = size;
523 m->m_pkthdr.len = size;
524 return m;
525}
526
527static void *
528packet_buffer_byteptr(packet_buffer_ref buf)
529{
530 return m_mtod_current(m: buf) + sizeof(struct ether_header);
531}
532
533typedef enum {
534 LAEventStart,
535 LAEventTimeout,
536 LAEventPacket,
537 LAEventMediaChange,
538 LAEventSelectedChange,
539 LAEventPortMoved,
540 LAEventReady
541} LAEvent;
542
543/**
544** Receive machine
545**/
546static void
547bondport_receive_machine(bondport_ref p, LAEvent event,
548 void * event_data);
549/**
550** Periodic Transmission machine
551**/
552static void
553bondport_periodic_transmit_machine(bondport_ref p, LAEvent event,
554 void * event_data);
555
556/**
557** Transmit machine
558**/
559#define TRANSMIT_MACHINE_TX_IMMEDIATE ((void *)1)
560
561static void
562bondport_transmit_machine(bondport_ref p, LAEvent event,
563 void * event_data);
564
565/**
566** Mux machine
567**/
568static void
569bondport_mux_machine(bondport_ref p, LAEvent event,
570 void * event_data);
571
572/**
573** bond, LAG
574**/
575static void
576ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media);
577
578static void
579ifbond_deactivate_LAG(ifbond_ref bond, LAG_ref lag);
580
581static int
582ifbond_all_ports_ready(ifbond_ref bond);
583
584static LAG_ref
585ifbond_find_best_LAG(ifbond_ref bond, int * active_media);
586
587static int
588LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media);
589
590static int
591ifbond_selection(ifbond_ref bond);
592
593static void
594bond_handle_event(struct ifnet * port_ifp, int event_code);
595
596/**
597** bondport
598**/
599
600static void
601bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p);
602
603static void
604bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf);
605
606static bondport_ref
607bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
608 int active, int short_timeout, int * error);
609static void
610bondport_start(bondport_ref p);
611
612static void
613bondport_free(bondport_ref p);
614
615static int
616bondport_aggregatable(bondport_ref p);
617
618static int
619bondport_remove_from_LAG(bondport_ref p);
620
621static void
622bondport_set_selected(bondport_ref p, SelectedState s);
623
624static int
625bondport_matches_LAG(bondport_ref p, LAG_ref lag);
626
627static void
628bondport_link_status_changed(bondport_ref p);
629
630static void
631bondport_enable_distributing(bondport_ref p);
632
633static void
634bondport_disable_distributing(bondport_ref p);
635
636static __inline__ int
637bondport_collecting(bondport_ref p)
638{
639 if (p->po_bond->ifb_mode == IF_BOND_MODE_LACP) {
640 return lacp_actor_partner_state_collecting(state: p->po_actor_state);
641 }
642 return TRUE;
643}
644
645/**
646** bond interface/dlil specific routines
647**/
648static int bond_clone_create(struct if_clone *, u_int32_t, void *);
649static int bond_clone_destroy(struct ifnet *);
650static int bond_output(struct ifnet *ifp, struct mbuf *m);
651static int bond_ioctl(struct ifnet *ifp, u_long cmd, void * addr);
652static int bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode,
653 bpf_packet_func func);
654static int bond_attach_protocol(struct ifnet *ifp);
655static int bond_detach_protocol(struct ifnet *ifp);
656static errno_t bond_iff_input(void *cookie, ifnet_t ifp,
657 protocol_family_t protocol, mbuf_t *data, char **frame_ptr);
658static int bond_attach_filter(struct ifnet *ifp, interface_filter_t * filter_p);
659static int bond_setmulti(struct ifnet *ifp);
660static int bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp);
661static int bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp);
662static void bond_if_free(struct ifnet * ifp);
663static void interface_link_event(struct ifnet * ifp, u_int32_t event_code);
664
665static struct if_clone bond_cloner = IF_CLONE_INITIALIZER(BONDNAME,
666 bond_clone_create,
667 bond_clone_destroy,
668 0,
669 BOND_MAXUNIT);
670
671static int
672siocsifmtu(struct ifnet * ifp, int mtu)
673{
674 struct ifreq ifr;
675
676 bzero(s: &ifr, n: sizeof(ifr));
677 ifr.ifr_mtu = mtu;
678 return ifnet_ioctl(interface: ifp, protocol: 0, SIOCSIFMTU, ioctl_arg: &ifr);
679}
680
681static int
682siocgifdevmtu(struct ifnet * ifp, struct ifdevmtu * ifdm_p)
683{
684 struct ifreq ifr;
685 int error;
686
687 bzero(s: &ifr, n: sizeof(ifr));
688 error = ifnet_ioctl(interface: ifp, protocol: 0, SIOCGIFDEVMTU, ioctl_arg: &ifr);
689 if (error == 0) {
690 *ifdm_p = ifr.ifr_devmtu;
691 }
692 return error;
693}
694
695static __inline__ void
696ether_addr_copy(void * dest, const void * source)
697{
698 bcopy(src: source, dst: dest, ETHER_ADDR_LEN);
699 return;
700}
701
702static __inline__ void
703ifbond_retain(ifbond_ref ifb)
704{
705 os_ref_retain(rc: &ifb->ifb_retain_count);
706}
707
708static __inline__ void
709ifbond_release(ifbond_ref ifb)
710{
711 if (os_ref_release(rc: &ifb->ifb_retain_count) != 0) {
712 return;
713 }
714
715 if (if_bond_debug) {
716 printf("ifbond_release(%s)\n", ifb->ifb_name);
717 }
718 if (ifb->ifb_ifma_slow_proto != NULL) {
719 if (if_bond_debug) {
720 printf("ifbond_release(%s) removing multicast\n",
721 ifb->ifb_name);
722 }
723 (void) if_delmulti_anon(ifb->ifb_ifma_slow_proto->ifma_ifp,
724 ifb->ifb_ifma_slow_proto->ifma_addr);
725 IFMA_REMREF(ifb->ifb_ifma_slow_proto);
726 }
727 kfree_type(bondport_ref, ifb->ifb_distributing_max,
728 ifb->ifb_distributing_array);
729 kfree_type(struct ifbond_s, ifb);
730}
731
732/*
733 * Function: ifbond_wait
734 * Purpose:
735 * Allows a single thread to gain exclusive access to the ifbond
736 * data structure. Some operations take a long time to complete,
737 * and some have side-effects that we can't predict. Holding the
738 * bond_lock() across such operations is not possible.
739 *
740 * For example:
741 * 1) The SIOCSIFLLADDR ioctl takes a long time (several seconds) to
742 * complete. Simply holding the bond_lock() would freeze all other
743 * data structure accesses during that time.
744 * 2) When we attach our protocol to the interface, a dlil event is
745 * generated and invokes our bond_event() function. bond_event()
746 * needs to take the bond_lock(), but we're already holding it, so
747 * we're deadlocked against ourselves.
748 * Notes:
749 * Before calling, you must be holding the bond_lock and have taken
750 * a reference on the ifbond_ref.
751 */
752static void
753ifbond_wait(ifbond_ref ifb, const char * msg)
754{
755 int waited = 0;
756
757 /* other add/remove in progress */
758 while (ifbond_flags_change_in_progress(ifb)) {
759 if (if_bond_debug) {
760 printf("%s: %s msleep\n", ifb->ifb_name, msg);
761 }
762 waited = 1;
763 (void)msleep(chan: ifb, mtx: &bond_lck_mtx, PZERO, wmesg: msg, ts: 0);
764 }
765 /* prevent other bond list remove/add from taking place */
766 ifbond_flags_set_change_in_progress(ifb);
767 if (if_bond_debug && waited) {
768 printf("%s: %s woke up\n", ifb->ifb_name, msg);
769 }
770 return;
771}
772
773/*
774 * Function: ifbond_signal
775 * Purpose:
776 * Allows the thread that previously invoked ifbond_wait() to
777 * give up exclusive access to the ifbond data structure, and wake up
778 * any other threads waiting to access
779 * Notes:
780 * Before calling, you must be holding the bond_lock and have taken
781 * a reference on the ifbond_ref.
782 */
783static void
784ifbond_signal(ifbond_ref ifb, const char * msg)
785{
786 ifbond_flags_clear_change_in_progress(ifb);
787 wakeup(chan: (caddr_t)ifb);
788 if (if_bond_debug) {
789 printf("%s: %s wakeup\n", ifb->ifb_name, msg);
790 }
791 return;
792}
793
794/**
795** Media information
796**/
797
798static int
799link_speed(int active)
800{
801 switch (IFM_SUBTYPE(active)) {
802 case IFM_AUTO:
803 case IFM_MANUAL:
804 case IFM_NONE:
805 return 0;
806 case IFM_10_T:
807 case IFM_10_2:
808 case IFM_10_5:
809 case IFM_10_STP:
810 case IFM_10_FL:
811 return 10;
812 case IFM_100_TX:
813 case IFM_100_FX:
814 case IFM_100_T4:
815 case IFM_100_VG:
816 case IFM_100_T2:
817 return 100;
818 case IFM_1000_SX:
819 case IFM_1000_LX:
820 case IFM_1000_CX:
821 case IFM_1000_TX:
822 case IFM_1000_CX_SGMII:
823 case IFM_1000_KX:
824 return 1000;
825 case IFM_HPNA_1:
826 return 1;
827 default:
828 /* assume that new defined types are going to be at least 10GigE */
829 case IFM_10G_SR:
830 case IFM_10G_LR:
831 case IFM_10G_KX4:
832 case IFM_10G_KR:
833 case IFM_10G_CR1:
834 case IFM_10G_ER:
835 return 10000;
836 case IFM_2500_T:
837 return 2500;
838 case IFM_5000_T:
839 return 5000;
840 case IFM_20G_KR2:
841 return 20000;
842 case IFM_25G_CR:
843 case IFM_25G_KR:
844 case IFM_25G_SR:
845 case IFM_25G_LR:
846 return 25000;
847 case IFM_40G_CR4:
848 case IFM_40G_SR4:
849 case IFM_40G_LR4:
850 case IFM_40G_KR4:
851 return 40000;
852 case IFM_50G_CR2:
853 case IFM_50G_KR2:
854 case IFM_50G_SR2:
855 case IFM_50G_LR2:
856 return 50000;
857 case IFM_56G_R4:
858 return 56000;
859 case IFM_100G_CR4:
860 case IFM_100G_SR4:
861 case IFM_100G_KR4:
862 case IFM_100G_LR4:
863 return 100000;
864 }
865}
866
867static __inline__ int
868media_active(const struct media_info * mi)
869{
870 if ((mi->mi_status & IFM_AVALID) == 0) {
871 return 1;
872 }
873 return (mi->mi_status & IFM_ACTIVE) != 0;
874}
875
876static __inline__ int
877media_full_duplex(const struct media_info * mi)
878{
879 return (mi->mi_active & IFM_FDX) != 0;
880}
881
882static __inline__ int
883media_type_unknown(const struct media_info * mi)
884{
885 int unknown;
886
887 switch (IFM_SUBTYPE(mi->mi_active)) {
888 case IFM_AUTO:
889 case IFM_MANUAL:
890 case IFM_NONE:
891 unknown = 1;
892 break;
893 default:
894 unknown = 0;
895 break;
896 }
897 return unknown;
898}
899
900static __inline__ int
901media_ok(const struct media_info * mi)
902{
903 return media_full_duplex(mi) || media_type_unknown(mi);
904}
905
906static __inline__ int
907media_speed(const struct media_info * mi)
908{
909 return link_speed(active: mi->mi_active);
910}
911
912static struct media_info
913interface_media_info(struct ifnet * ifp)
914{
915 struct ifmediareq ifmr;
916 struct media_info mi;
917
918 bzero(s: &mi, n: sizeof(mi));
919 bzero(s: &ifmr, n: sizeof(ifmr));
920 if (ifnet_ioctl(interface: ifp, protocol: 0, SIOCGIFMEDIA, ioctl_arg: &ifmr) == 0) {
921 if (ifmr.ifm_count != 0) {
922 mi.mi_status = ifmr.ifm_status;
923 mi.mi_active = ifmr.ifm_active;
924 }
925 }
926 return mi;
927}
928
929static int
930if_siflladdr(struct ifnet * ifp, const struct ether_addr * ea_p)
931{
932 struct ifreq ifr;
933
934 /*
935 * XXX setting the sa_len to ETHER_ADDR_LEN is wrong, but the driver
936 * currently expects it that way
937 */
938 ifr.ifr_addr.sa_family = AF_UNSPEC;
939 ifr.ifr_addr.sa_len = ETHER_ADDR_LEN;
940 ether_addr_copy(dest: ifr.ifr_addr.sa_data, source: ea_p);
941 return ifnet_ioctl(interface: ifp, protocol: 0, SIOCSIFLLADDR, ioctl_arg: &ifr);
942}
943
944/**
945** bond_globals
946**/
947static bond_globals_ref
948bond_globals_create(lacp_system_priority sys_pri,
949 lacp_system_ref sys)
950{
951 bond_globals_ref b;
952
953 b = kalloc_type(struct bond_globals_s, Z_WAITOK | Z_ZERO | Z_NOFAIL);
954 TAILQ_INIT(&b->ifbond_list);
955 b->system = *sys;
956 b->system_priority = sys_pri;
957 return b;
958}
959
960static int
961bond_globals_init(void)
962{
963 bond_globals_ref b;
964 int i;
965 struct ifnet * ifp;
966
967 bond_assert_lock_not_held();
968
969 if (g_bond != NULL) {
970 return 0;
971 }
972
973 /*
974 * use en0's ethernet address as the system identifier, and if it's not
975 * there, use en1 .. en3
976 */
977 ifp = NULL;
978 for (i = 0; i < 4; i++) {
979 char ifname[IFNAMSIZ + 1];
980 snprintf(ifname, count: sizeof(ifname), "en%d", i);
981 ifp = ifunit(ifname);
982 if (ifp != NULL) {
983 break;
984 }
985 }
986 b = NULL;
987 if (ifp != NULL) {
988 b = bond_globals_create(sys_pri: 0x8000, sys: (lacp_system_ref)IF_LLADDR(ifp));
989 }
990 bond_lock();
991 if (g_bond != NULL) {
992 bond_unlock();
993 kfree_type(struct bond_globals_s, b);
994 return 0;
995 }
996 g_bond = b;
997 bond_unlock();
998 if (ifp == NULL) {
999 return ENXIO;
1000 }
1001 if (b == NULL) {
1002 return ENOMEM;
1003 }
1004 return 0;
1005}
1006
1007static void
1008bond_bpf_vlan(struct ifnet * ifp, struct mbuf * m,
1009 const struct ether_header * eh_p,
1010 u_int16_t vlan_tag, bpf_packet_func func)
1011{
1012 struct ether_vlan_header * vlh_p;
1013 struct mbuf * vl_m;
1014
1015 vl_m = m_get(M_DONTWAIT, MT_DATA);
1016 if (vl_m == NULL) {
1017 return;
1018 }
1019 /* populate a new mbuf containing the vlan ethernet header */
1020 vl_m->m_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1021 vlh_p = mtod(vl_m, struct ether_vlan_header *);
1022 bcopy(src: eh_p, dst: vlh_p, offsetof(struct ether_header, ether_type));
1023 vlh_p->evl_encap_proto = htons(ETHERTYPE_VLAN);
1024 vlh_p->evl_tag = htons(vlan_tag);
1025 vlh_p->evl_proto = eh_p->ether_type;
1026 vl_m->m_next = m;
1027 (*func)(ifp, vl_m);
1028 vl_m->m_next = NULL;
1029 m_free(vl_m);
1030 return;
1031}
1032
1033static __inline__ void
1034bond_bpf_output(struct ifnet * ifp, struct mbuf * m,
1035 bpf_packet_func func)
1036{
1037 if (func != NULL) {
1038 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1039 const struct ether_header * eh_p;
1040 eh_p = mtod(m, const struct ether_header *);
1041 m->m_data += ETHER_HDR_LEN;
1042 m->m_len -= ETHER_HDR_LEN;
1043 bond_bpf_vlan(ifp, m, eh_p, vlan_tag: m->m_pkthdr.vlan_tag, func);
1044 m->m_data -= ETHER_HDR_LEN;
1045 m->m_len += ETHER_HDR_LEN;
1046 } else {
1047 (*func)(ifp, m);
1048 }
1049 }
1050 return;
1051}
1052
1053static __inline__ void
1054bond_bpf_input(ifnet_t ifp, mbuf_t m, const struct ether_header * eh_p,
1055 bpf_packet_func func)
1056{
1057 if (func != NULL) {
1058 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1059 bond_bpf_vlan(ifp, m, eh_p, vlan_tag: m->m_pkthdr.vlan_tag, func);
1060 } else {
1061 /* restore the header */
1062 m->m_data -= ETHER_HDR_LEN;
1063 m->m_len += ETHER_HDR_LEN;
1064 (*func)(ifp, m);
1065 m->m_data += ETHER_HDR_LEN;
1066 m->m_len -= ETHER_HDR_LEN;
1067 }
1068 }
1069 return;
1070}
1071
1072/*
1073 * Function: bond_setmulti
1074 * Purpose:
1075 * Enable multicast reception on "our" interface by enabling multicasts on
1076 * each of the member ports.
1077 */
1078static int
1079bond_setmulti(struct ifnet * ifp)
1080{
1081 ifbond_ref ifb;
1082 int error;
1083 int result = 0;
1084 bondport_ref p;
1085
1086 bond_lock();
1087 ifb = ifnet_softc(interface: ifp);
1088 if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1089 || TAILQ_EMPTY(&ifb->ifb_port_list)) {
1090 bond_unlock();
1091 return 0;
1092 }
1093 ifbond_retain(ifb);
1094 ifbond_wait(ifb, msg: "bond_setmulti");
1095
1096 if (ifbond_flags_if_detaching(ifb)) {
1097 /* someone destroyed the bond while we were waiting */
1098 result = EBUSY;
1099 goto signal_done;
1100 }
1101 bond_unlock();
1102
1103 /* ifbond_wait() let's us safely walk the list without holding the lock */
1104 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1105 struct ifnet * port_ifp = p->po_ifp;
1106
1107 error = multicast_list_program(mc_list: &p->po_multicast,
1108 source_ifp: ifp, target_ifp: port_ifp);
1109 if (error != 0) {
1110 printf("bond_setmulti(%s): "
1111 "multicast_list_program(%s%d) failed, %d\n",
1112 ifb->ifb_name, ifnet_name(interface: port_ifp),
1113 ifnet_unit(interface: port_ifp), error);
1114 result = error;
1115 }
1116 }
1117 bond_lock();
1118signal_done:
1119 ifbond_signal(ifb, msg: __func__);
1120 bond_unlock();
1121 ifbond_release(ifb);
1122 return result;
1123}
1124
1125static int
1126bond_clone_attach(void)
1127{
1128 int error;
1129
1130 if ((error = if_clone_attach(&bond_cloner)) != 0) {
1131 return error;
1132 }
1133 return 0;
1134}
1135
1136static int
1137ifbond_add_slow_proto_multicast(ifbond_ref ifb)
1138{
1139 int error;
1140 struct ifmultiaddr * ifma = NULL;
1141 struct sockaddr_dl sdl;
1142
1143 bond_assert_lock_not_held();
1144
1145 SOCKADDR_ZERO(&sdl, sizeof(sdl));
1146 sdl.sdl_len = sizeof(sdl);
1147 sdl.sdl_family = AF_LINK;
1148 sdl.sdl_type = IFT_ETHER;
1149 sdl.sdl_nlen = 0;
1150 sdl.sdl_alen = sizeof(slow_proto_multicast);
1151 bcopy(src: &slow_proto_multicast, dst: sdl.sdl_data, n: sizeof(slow_proto_multicast));
1152 error = if_addmulti_anon(ifb->ifb_ifp, SA(&sdl), &ifma);
1153 if (error == 0) {
1154 ifb->ifb_ifma_slow_proto = ifma;
1155 }
1156 return error;
1157}
1158
1159static int
1160bond_clone_create(struct if_clone * ifc, u_int32_t unit, __unused void *params)
1161{
1162 int error;
1163 ifbond_ref ifb;
1164 ifnet_t ifp;
1165 struct ifnet_init_eparams bond_init;
1166
1167 error = bond_globals_init();
1168 if (error != 0) {
1169 return error;
1170 }
1171
1172 ifb = kalloc_type(struct ifbond_s, Z_WAITOK_ZERO_NOFAIL);
1173 os_ref_init(&ifb->ifb_retain_count, NULL);
1174 TAILQ_INIT(&ifb->ifb_port_list);
1175 TAILQ_INIT(&ifb->ifb_lag_list);
1176 ifb->ifb_key = unit + 1;
1177
1178 /* use the interface name as the unique id for ifp recycle */
1179 if ((u_int32_t)snprintf(ifb->ifb_name, count: sizeof(ifb->ifb_name), "%s%d",
1180 ifc->ifc_name, unit) >= sizeof(ifb->ifb_name)) {
1181 ifbond_release(ifb);
1182 return EINVAL;
1183 }
1184
1185 bzero(s: &bond_init, n: sizeof(bond_init));
1186 bond_init.ver = IFNET_INIT_CURRENT_VERSION;
1187 bond_init.len = sizeof(bond_init);
1188 bond_init.flags = IFNET_INIT_LEGACY;
1189 bond_init.uniqueid = ifb->ifb_name;
1190 bond_init.uniqueid_len = strlen(s: ifb->ifb_name);
1191 bond_init.name = ifc->ifc_name;
1192 bond_init.unit = unit;
1193 bond_init.family = IFNET_FAMILY_BOND;
1194 bond_init.type = IFT_IEEE8023ADLAG;
1195 bond_init.output = bond_output;
1196 bond_init.demux = ether_demux;
1197 bond_init.add_proto = ether_add_proto;
1198 bond_init.del_proto = ether_del_proto;
1199 bond_init.check_multi = ether_check_multi;
1200 bond_init.framer_extended = ether_frameout_extended;
1201 bond_init.ioctl = bond_ioctl;
1202 bond_init.set_bpf_tap = bond_set_bpf_tap;
1203 bond_init.detach = bond_if_free;
1204 bond_init.broadcast_addr = etherbroadcastaddr;
1205 bond_init.broadcast_len = ETHER_ADDR_LEN;
1206 bond_init.softc = ifb;
1207 error = ifnet_allocate_extended(init: &bond_init, interface: &ifp);
1208
1209 if (error) {
1210 ifbond_release(ifb);
1211 return error;
1212 }
1213
1214 ifb->ifb_ifp = ifp;
1215 ifnet_set_offload(interface: ifp, offload: 0);
1216 ifnet_set_addrlen(interface: ifp, ETHER_ADDR_LEN); /* XXX ethernet specific */
1217 ifnet_set_flags(interface: ifp, IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX, mask: 0xffff);
1218 ifnet_set_mtu(interface: ifp, ETHERMTU);
1219
1220 error = ifnet_attach(interface: ifp, NULL);
1221 if (error != 0) {
1222 ifnet_release(interface: ifp);
1223 ifbond_release(ifb);
1224 return error;
1225 }
1226 error = ifbond_add_slow_proto_multicast(ifb);
1227 if (error != 0) {
1228 printf("bond_clone_create(%s): "
1229 "failed to add slow_proto multicast, %d\n",
1230 ifb->ifb_name, error);
1231 }
1232
1233 /* attach as ethernet */
1234 bpfattach(interface: ifp, DLT_EN10MB, header_length: sizeof(struct ether_header));
1235
1236 bond_lock();
1237 TAILQ_INSERT_HEAD(&g_bond->ifbond_list, ifb, ifb_bond_list);
1238 bond_unlock();
1239
1240 return 0;
1241}
1242
1243static void
1244bond_remove_all_interfaces(ifbond_ref ifb)
1245{
1246 bondport_ref p;
1247
1248 bond_assert_lock_held();
1249
1250 /*
1251 * do this in reverse order to avoid re-programming the mac address
1252 * as each head interface is removed
1253 */
1254 while ((p = TAILQ_LAST(&ifb->ifb_port_list, port_list)) != NULL) {
1255 bond_remove_interface(ifb, port_ifp: p->po_ifp);
1256 }
1257 return;
1258}
1259
1260static void
1261bond_remove(ifbond_ref ifb)
1262{
1263 bond_assert_lock_held();
1264 ifbond_flags_set_if_detaching(ifb);
1265 TAILQ_REMOVE(&g_bond->ifbond_list, ifb, ifb_bond_list);
1266 bond_remove_all_interfaces(ifb);
1267 return;
1268}
1269
1270static void
1271bond_if_detach(struct ifnet * ifp)
1272{
1273 int error;
1274
1275 error = ifnet_detach(interface: ifp);
1276 if (error) {
1277 printf("bond_if_detach %s%d: ifnet_detach failed, %d\n",
1278 ifnet_name(interface: ifp), ifnet_unit(interface: ifp), error);
1279 }
1280
1281 return;
1282}
1283
1284static int
1285bond_clone_destroy(struct ifnet * ifp)
1286{
1287 ifbond_ref ifb;
1288
1289 bond_lock();
1290 ifb = ifnet_softc(interface: ifp);
1291 if (ifb == NULL || ifnet_type(interface: ifp) != IFT_IEEE8023ADLAG) {
1292 bond_unlock();
1293 return 0;
1294 }
1295 if (ifbond_flags_if_detaching(ifb)) {
1296 bond_unlock();
1297 return 0;
1298 }
1299 bond_remove(ifb);
1300 bond_unlock();
1301 bond_if_detach(ifp);
1302 return 0;
1303}
1304
1305static int
1306bond_set_bpf_tap(struct ifnet * ifp, bpf_tap_mode mode, bpf_packet_func func)
1307{
1308 ifbond_ref ifb;
1309
1310 bond_lock();
1311 ifb = ifnet_softc(interface: ifp);
1312 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
1313 bond_unlock();
1314 return ENODEV;
1315 }
1316 switch (mode) {
1317 case BPF_TAP_DISABLE:
1318 ifb->ifb_bpf_input = ifb->ifb_bpf_output = NULL;
1319 break;
1320
1321 case BPF_TAP_INPUT:
1322 ifb->ifb_bpf_input = func;
1323 break;
1324
1325 case BPF_TAP_OUTPUT:
1326 ifb->ifb_bpf_output = func;
1327 break;
1328
1329 case BPF_TAP_INPUT_OUTPUT:
1330 ifb->ifb_bpf_input = ifb->ifb_bpf_output = func;
1331 break;
1332 default:
1333 break;
1334 }
1335 bond_unlock();
1336 return 0;
1337}
1338
1339static uint32_t
1340ether_header_hash(struct ether_header * eh_p)
1341{
1342 uint32_t h;
1343
1344 /* get 32-bits from destination ether and ether type */
1345 h = (*((uint16_t *)&eh_p->ether_dhost[4]) << 16)
1346 | eh_p->ether_type;
1347 h ^= *((uint32_t *)&eh_p->ether_dhost[0]);
1348 return h;
1349}
1350
1351static struct mbuf *
1352S_mbuf_skip_to_offset(struct mbuf * m, int32_t * offset)
1353{
1354 int len;
1355
1356 len = m->m_len;
1357 while (*offset >= len) {
1358 *offset -= len;
1359 m = m->m_next;
1360 if (m == NULL) {
1361 break;
1362 }
1363 len = m->m_len;
1364 }
1365 return m;
1366}
1367
1368#if BYTE_ORDER == BIG_ENDIAN
1369static __inline__ uint32_t
1370make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1371{
1372 return ((uint32_t)c0 << 24) | ((uint32_t)c1 << 16)
1373 | ((uint32_t)c2 << 8) | (uint32_t)c3;
1374}
1375#else /* BYTE_ORDER == LITTLE_ENDIAN */
1376static __inline__ uint32_t
1377make_uint32(u_char c0, u_char c1, u_char c2, u_char c3)
1378{
1379 return ((uint32_t)c3 << 24) | ((uint32_t)c2 << 16)
1380 | ((uint32_t)c1 << 8) | (uint32_t)c0;
1381}
1382#endif /* BYTE_ORDER == LITTLE_ENDIAN */
1383
1384static int
1385S_mbuf_copy_uint32(struct mbuf * m, int32_t offset, uint32_t * val)
1386{
1387 struct mbuf * current;
1388 u_char * current_data;
1389 struct mbuf * next;
1390 u_char * next_data;
1391 int space_current;
1392
1393 current = S_mbuf_skip_to_offset(m, offset: &offset);
1394 if (current == NULL) {
1395 return 1;
1396 }
1397 current_data = mtod(current, u_char *) + offset;
1398 space_current = current->m_len - offset;
1399 if (space_current >= (int)sizeof(uint32_t)) {
1400 *val = *((uint32_t *)current_data);
1401 return 0;
1402 }
1403 next = current->m_next;
1404 if (next == NULL || (next->m_len + space_current) < (int)sizeof(uint32_t)) {
1405 return 1;
1406 }
1407 next_data = mtod(next, u_char *);
1408 switch (space_current) {
1409 case 1:
1410 *val = make_uint32(c0: current_data[0], c1: next_data[0],
1411 c2: next_data[1], c3: next_data[2]);
1412 break;
1413 case 2:
1414 *val = make_uint32(c0: current_data[0], c1: current_data[1],
1415 c2: next_data[0], c3: next_data[1]);
1416 break;
1417 default:
1418 *val = make_uint32(c0: current_data[0], c1: current_data[1],
1419 c2: current_data[2], c3: next_data[0]);
1420 break;
1421 }
1422 return 0;
1423}
1424
1425#define IP_SRC_OFFSET (offsetof(struct ip, ip_src) - offsetof(struct ip, ip_p))
1426#define IP_DST_OFFSET (offsetof(struct ip, ip_dst) - offsetof(struct ip, ip_p))
1427
1428static uint32_t
1429ip_header_hash(struct mbuf * m)
1430{
1431 u_char * data;
1432 struct in_addr ip_dst;
1433 struct in_addr ip_src;
1434 u_char ip_p;
1435 int32_t offset;
1436 struct mbuf * orig_m = m;
1437
1438 /* find the IP protocol field relative to the start of the packet */
1439 offset = offsetof(struct ip, ip_p) + sizeof(struct ether_header);
1440 m = S_mbuf_skip_to_offset(m, offset: &offset);
1441 if (m == NULL || m->m_len < 1) {
1442 goto bad_ip_packet;
1443 }
1444 data = mtod(m, u_char *) + offset;
1445 ip_p = *data;
1446
1447 /* find the IP src relative to the IP protocol */
1448 if ((m->m_len - offset)
1449 >= (int)(IP_SRC_OFFSET + sizeof(struct in_addr) * 2)) {
1450 /* this should be the normal case */
1451 ip_src = *(struct in_addr *)(data + IP_SRC_OFFSET);
1452 ip_dst = *(struct in_addr *)(data + IP_DST_OFFSET);
1453 } else {
1454 if (S_mbuf_copy_uint32(m, offset: offset + IP_SRC_OFFSET,
1455 val: (uint32_t *)&ip_src.s_addr)) {
1456 goto bad_ip_packet;
1457 }
1458 if (S_mbuf_copy_uint32(m, offset: offset + IP_DST_OFFSET,
1459 val: (uint32_t *)&ip_dst.s_addr)) {
1460 goto bad_ip_packet;
1461 }
1462 }
1463 return ntohl(ip_dst.s_addr) ^ ntohl(ip_src.s_addr) ^ ((uint32_t)ip_p);
1464
1465bad_ip_packet:
1466 return ether_header_hash(mtod(orig_m, struct ether_header *));
1467}
1468
1469#define IP6_ADDRS_LEN (sizeof(struct in6_addr) * 2)
1470static uint32_t
1471ipv6_header_hash(struct mbuf * m)
1472{
1473 u_char * data;
1474 int i;
1475 int32_t offset;
1476 struct mbuf * orig_m = m;
1477 uint32_t * scan;
1478 uint32_t val;
1479
1480 /* find the IP protocol field relative to the start of the packet */
1481 offset = offsetof(struct ip6_hdr, ip6_src) + sizeof(struct ether_header);
1482 m = S_mbuf_skip_to_offset(m, offset: &offset);
1483 if (m == NULL) {
1484 goto bad_ipv6_packet;
1485 }
1486 data = mtod(m, u_char *) + offset;
1487 val = 0;
1488 if ((m->m_len - offset) >= (int)IP6_ADDRS_LEN) {
1489 /* this should be the normal case */
1490 for (i = 0, scan = (uint32_t *)data;
1491 i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t));
1492 i++, scan++) {
1493 val ^= *scan;
1494 }
1495 } else {
1496 for (i = 0; i < (int)(IP6_ADDRS_LEN / sizeof(uint32_t)); i++) {
1497 uint32_t tmp;
1498 if (S_mbuf_copy_uint32(m, offset: offset + i * sizeof(uint32_t),
1499 val: (uint32_t *)&tmp)) {
1500 goto bad_ipv6_packet;
1501 }
1502 val ^= tmp;
1503 }
1504 }
1505 return ntohl(val);
1506
1507bad_ipv6_packet:
1508 return ether_header_hash(mtod(orig_m, struct ether_header *));
1509}
1510
1511static int
1512bond_output(struct ifnet * ifp, struct mbuf * m)
1513{
1514 bpf_packet_func bpf_func;
1515 uint32_t h;
1516 ifbond_ref ifb;
1517 struct ifnet * port_ifp = NULL;
1518 int err;
1519 struct flowadv adv = { .code = FADV_SUCCESS };
1520
1521 if (m == 0) {
1522 return 0;
1523 }
1524 if ((m->m_flags & M_PKTHDR) == 0) {
1525 m_freem(m);
1526 return 0;
1527 }
1528 if (m->m_pkthdr.pkt_flowid != 0) {
1529 h = m->m_pkthdr.pkt_flowid;
1530 } else {
1531 struct ether_header * eh_p;
1532
1533 eh_p = mtod(m, struct ether_header *);
1534 switch (ntohs(eh_p->ether_type)) {
1535 case ETHERTYPE_IP:
1536 h = ip_header_hash(m);
1537 break;
1538 case ETHERTYPE_IPV6:
1539 h = ipv6_header_hash(m);
1540 break;
1541 default:
1542 h = ether_header_hash(eh_p);
1543 break;
1544 }
1545 }
1546 bond_lock();
1547 ifb = ifnet_softc(interface: ifp);
1548 if (ifb == NULL || ifbond_flags_if_detaching(ifb)
1549 || ifb->ifb_distributing_count == 0) {
1550 goto done;
1551 }
1552 h %= ifb->ifb_distributing_count;
1553 port_ifp = ifb->ifb_distributing_array[h]->po_ifp;
1554 bpf_func = ifb->ifb_bpf_output;
1555 bond_unlock();
1556
1557 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1558 (void)ifnet_stat_increment_out(interface: ifp, packets_out: 1,
1559 bytes_out: m->m_pkthdr.len + ETHER_VLAN_ENCAP_LEN,
1560 errors_out: 0);
1561 } else {
1562 (void)ifnet_stat_increment_out(interface: ifp, packets_out: 1, bytes_out: m->m_pkthdr.len, errors_out: 0);
1563 }
1564 bond_bpf_output(ifp, m, func: bpf_func);
1565
1566 err = dlil_output(port_ifp, PF_BOND, m, NULL, NULL, 1, &adv);
1567
1568 if (err == 0) {
1569 if (adv.code == FADV_FLOW_CONTROLLED) {
1570 err = EQFULL;
1571 } else if (adv.code == FADV_SUSPENDED) {
1572 err = EQSUSPENDED;
1573 }
1574 }
1575
1576 return err;
1577
1578done:
1579 bond_unlock();
1580 m_freem(m);
1581 return 0;
1582}
1583
1584static bondport_ref
1585ifbond_lookup_port(ifbond_ref ifb, struct ifnet * port_ifp)
1586{
1587 bondport_ref p;
1588 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
1589 if (p->po_ifp == port_ifp) {
1590 return p;
1591 }
1592 }
1593 return NULL;
1594}
1595
1596static bondport_ref
1597bond_lookup_port(struct ifnet * port_ifp)
1598{
1599 ifbond_ref ifb;
1600 bondport_ref port;
1601
1602 TAILQ_FOREACH(ifb, &g_bond->ifbond_list, ifb_bond_list) {
1603 port = ifbond_lookup_port(ifb, port_ifp);
1604 if (port != NULL) {
1605 return port;
1606 }
1607 }
1608 return NULL;
1609}
1610
1611static void
1612bond_receive_lacpdu(struct mbuf * m, struct ifnet * port_ifp)
1613{
1614 struct ifnet * bond_ifp = NULL;
1615 ifbond_ref ifb;
1616 int event_code = 0;
1617 bool need_link_update = false;
1618 bondport_ref p;
1619
1620 bond_lock();
1621 if ((ifnet_eflags(interface: port_ifp) & IFEF_BOND) == 0) {
1622 goto done;
1623 }
1624 p = bond_lookup_port(port_ifp);
1625 if (p == NULL) {
1626 goto done;
1627 }
1628 if (p->po_enabled == 0) {
1629 goto done;
1630 }
1631 ifb = p->po_bond;
1632 if (ifb->ifb_mode != IF_BOND_MODE_LACP) {
1633 goto done;
1634 }
1635 /*
1636 * Work-around for rdar://problem/51372042
1637 * Sometimes, the link comes up but the driver doesn't report the
1638 * negotiated medium at that time. When we receive an LACPDU packet,
1639 * and the medium is unknown, force a link status check. Don't force
1640 * the link status check more often than _FORCE_LINK_EVENT_INTERVAL
1641 * seconds.
1642 */
1643#define _FORCE_LINK_EVENT_INTERVAL 1
1644 if (media_type_unknown(mi: &p->po_media_info)) {
1645 uint64_t now = net_uptime();
1646
1647 if ((now - p->po_force_link_event_time) >=
1648 _FORCE_LINK_EVENT_INTERVAL) {
1649 need_link_update = true;
1650 p->po_force_link_event_time = now;
1651 }
1652 }
1653 bondport_receive_lacpdu(p, in_lacpdu_p: (lacpdu_ref)m_mtod_current(m));
1654 if (ifbond_selection(bond: ifb)) {
1655 event_code = (ifb->ifb_active_lag == NULL)
1656 ? KEV_DL_LINK_OFF
1657 : KEV_DL_LINK_ON;
1658 /* XXX need to take a reference on bond_ifp */
1659 bond_ifp = ifb->ifb_ifp;
1660 ifb->ifb_last_link_event = event_code;
1661 } else {
1662 event_code = (ifb->ifb_active_lag == NULL)
1663 ? KEV_DL_LINK_OFF
1664 : KEV_DL_LINK_ON;
1665 if (event_code != ifb->ifb_last_link_event) {
1666 if (if_bond_debug) {
1667 timestamp_printf("%s: (receive) generating LINK event\n",
1668 ifb->ifb_name);
1669 }
1670 bond_ifp = ifb->ifb_ifp;
1671 ifb->ifb_last_link_event = event_code;
1672 }
1673 }
1674
1675done:
1676 bond_unlock();
1677 if (bond_ifp != NULL) {
1678 interface_link_event(ifp: bond_ifp, event_code);
1679 }
1680 m_freem(m);
1681 if (need_link_update) {
1682 if (if_bond_debug != 0) {
1683 printf("bond: simulating link status changed event");
1684 }
1685 bond_handle_event(port_ifp, KEV_DL_LINK_ON);
1686 }
1687 return;
1688}
1689
1690static void
1691bond_receive_la_marker_pdu(struct mbuf * m, struct ifnet * port_ifp)
1692{
1693 la_marker_pdu_ref marker_p;
1694 bondport_ref p;
1695
1696 marker_p = (la_marker_pdu_ref)(m_mtod_current(m) + ETHER_HDR_LEN);
1697 if (marker_p->lm_marker_tlv_type != LA_MARKER_TLV_TYPE_MARKER) {
1698 goto failed;
1699 }
1700 bond_lock();
1701 if ((ifnet_eflags(interface: port_ifp) & IFEF_BOND) == 0) {
1702 bond_unlock();
1703 goto failed;
1704 }
1705 p = bond_lookup_port(port_ifp);
1706 if (p == NULL || p->po_enabled == 0
1707 || p->po_bond->ifb_mode != IF_BOND_MODE_LACP) {
1708 bond_unlock();
1709 goto failed;
1710 }
1711 /* echo back the same packet as a marker response */
1712 marker_p->lm_marker_tlv_type = LA_MARKER_TLV_TYPE_MARKER_RESPONSE;
1713 bondport_slow_proto_transmit(p, buf: (packet_buffer_ref)m);
1714 bond_unlock();
1715 return;
1716
1717failed:
1718 m_freem(m);
1719 return;
1720}
1721
1722static void
1723bond_input(ifnet_t port_ifp, mbuf_t m, char *frame_header)
1724{
1725 bpf_packet_func bpf_func;
1726 const struct ether_header * eh_p;
1727 ifbond_ref ifb;
1728 struct ifnet * ifp;
1729 bondport_ref p;
1730
1731 eh_p = (const struct ether_header *)frame_header;
1732 if ((m->m_flags & M_MCAST) != 0
1733 && bcmp(s1: eh_p->ether_dhost, s2: &slow_proto_multicast,
1734 n: sizeof(eh_p->ether_dhost)) == 0
1735 && ntohs(eh_p->ether_type) == IEEE8023AD_SLOW_PROTO_ETHERTYPE) {
1736 u_char subtype = *mtod(m, u_char *);
1737
1738 if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LACP) {
1739 if (m->m_pkthdr.len < (int)offsetof(lacpdu, la_reserved)) {
1740 m_freem(m);
1741 return;
1742 }
1743 /* send to lacp */
1744 if (m->m_len < (int)offsetof(lacpdu, la_reserved)) {
1745 m = m_pullup(m, offsetof(lacpdu, la_reserved));
1746 if (m == NULL) {
1747 return;
1748 }
1749 }
1750 bond_receive_lacpdu(m, port_ifp);
1751 return;
1752 } else if (subtype == IEEE8023AD_SLOW_PROTO_SUBTYPE_LA_MARKER_PROTOCOL) {
1753 int min_size;
1754
1755 /* restore the ethernet header pointer in the mbuf */
1756 m->m_pkthdr.len += ETHER_HDR_LEN;
1757 m->m_data -= ETHER_HDR_LEN;
1758 m->m_len += ETHER_HDR_LEN;
1759 min_size = ETHER_HDR_LEN + offsetof(la_marker_pdu, lm_reserved);
1760 if (m->m_pkthdr.len < min_size) {
1761 m_freem(m);
1762 return;
1763 }
1764 /* send to lacp */
1765 if (m->m_len < min_size) {
1766 m = m_pullup(m, min_size);
1767 if (m == NULL) {
1768 return;
1769 }
1770 }
1771 /* send to marker responder */
1772 bond_receive_la_marker_pdu(m, port_ifp);
1773 return;
1774 } else if (subtype == 0
1775 || subtype > IEEE8023AD_SLOW_PROTO_SUBTYPE_RESERVED_END) {
1776 /* invalid subtype, discard the frame */
1777 m_freem(m);
1778 return;
1779 }
1780 }
1781 bond_lock();
1782 if ((ifnet_eflags(interface: port_ifp) & IFEF_BOND) == 0) {
1783 goto done;
1784 }
1785 p = bond_lookup_port(port_ifp);
1786 if (p == NULL || bondport_collecting(p) == 0) {
1787 goto done;
1788 }
1789
1790 ifb = p->po_bond;
1791 ifp = ifb->ifb_ifp;
1792 bpf_func = ifb->ifb_bpf_input;
1793 bond_unlock();
1794
1795 /*
1796 * Need to clear the promiscous flags otherwise it will be
1797 * dropped by DLIL after processing filters
1798 */
1799 if ((mbuf_flags(mbuf: m) & MBUF_PROMISC)) {
1800 mbuf_setflags_mask(mbuf: m, flags: 0, mask: MBUF_PROMISC);
1801 }
1802
1803 if (m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) {
1804 (void)ifnet_stat_increment_in(interface: ifp, packets_in: 1,
1805 bytes_in: (m->m_pkthdr.len + ETHER_HDR_LEN
1806 + ETHER_VLAN_ENCAP_LEN), errors_in: 0);
1807 } else {
1808 (void)ifnet_stat_increment_in(interface: ifp, packets_in: 1,
1809 bytes_in: (m->m_pkthdr.len + ETHER_HDR_LEN), errors_in: 0);
1810 }
1811
1812 /* make the packet appear as if it arrived on the bonded interface */
1813 m->m_pkthdr.rcvif = ifp;
1814 bond_bpf_input(ifp, m, eh_p, func: bpf_func);
1815 m->m_pkthdr.pkt_hdr = frame_header;
1816 dlil_input_packet_list(ifp, m);
1817 return;
1818
1819done:
1820 bond_unlock();
1821 m_freem(m);
1822 return;
1823}
1824
1825static errno_t
1826bond_iff_input(void *cookie, ifnet_t port_ifp, protocol_family_t protocol,
1827 mbuf_t *data, char **frame_header_ptr)
1828{
1829#pragma unused(cookie)
1830#pragma unused(protocol)
1831 mbuf_t m = *data;
1832 char * frame_header = *frame_header_ptr;
1833
1834 bond_input(port_ifp, m, frame_header);
1835 return EJUSTRETURN;
1836}
1837
1838static __inline__ const char *
1839bondport_get_name(bondport_ref p)
1840{
1841 return p->po_name;
1842}
1843
1844static __inline__ int
1845bondport_get_index(bondport_ref p)
1846{
1847 return ifnet_index(interface: p->po_ifp);
1848}
1849
1850static void
1851bondport_slow_proto_transmit(bondport_ref p, packet_buffer_ref buf)
1852{
1853 struct ether_header * eh_p;
1854 int error;
1855
1856 /* packet_buffer_allocate leaves room for ethernet header */
1857 eh_p = mtod(buf, struct ether_header *);
1858 bcopy(src: &slow_proto_multicast, dst: &eh_p->ether_dhost, n: sizeof(eh_p->ether_dhost));
1859 bcopy(src: &p->po_saved_addr, dst: eh_p->ether_shost, n: sizeof(eh_p->ether_shost));
1860 eh_p->ether_type = htons(IEEE8023AD_SLOW_PROTO_ETHERTYPE);
1861 error = ifnet_output_raw(interface: p->po_ifp, PF_BOND, packet: buf);
1862 if (error != 0) {
1863 printf("bondport_slow_proto_transmit(%s) failed %d\n",
1864 bondport_get_name(p), error);
1865 }
1866 return;
1867}
1868
1869static void
1870bondport_timer_process_func(devtimer_ref timer,
1871 devtimer_process_func_event event)
1872{
1873 bondport_ref p;
1874
1875 switch (event) {
1876 case devtimer_process_func_event_lock:
1877 bond_lock();
1878 devtimer_retain(timer);
1879 break;
1880 case devtimer_process_func_event_unlock:
1881 if (devtimer_valid(timer)) {
1882 /* as long as the devtimer is valid, we can look at arg0 */
1883 int event_code = 0;
1884 struct ifnet * bond_ifp = NULL;
1885
1886 p = (bondport_ref)devtimer_arg0(timer);
1887 if (ifbond_selection(bond: p->po_bond)) {
1888 event_code = (p->po_bond->ifb_active_lag == NULL)
1889 ? KEV_DL_LINK_OFF
1890 : KEV_DL_LINK_ON;
1891 /* XXX need to take a reference on bond_ifp */
1892 bond_ifp = p->po_bond->ifb_ifp;
1893 p->po_bond->ifb_last_link_event = event_code;
1894 } else {
1895 event_code = (p->po_bond->ifb_active_lag == NULL)
1896 ? KEV_DL_LINK_OFF
1897 : KEV_DL_LINK_ON;
1898 if (event_code != p->po_bond->ifb_last_link_event) {
1899 if (if_bond_debug) {
1900 timestamp_printf("%s: (timer) generating LINK event\n",
1901 p->po_bond->ifb_name);
1902 }
1903 bond_ifp = p->po_bond->ifb_ifp;
1904 p->po_bond->ifb_last_link_event = event_code;
1905 }
1906 }
1907 devtimer_release(timer);
1908 bond_unlock();
1909 if (bond_ifp != NULL) {
1910 interface_link_event(ifp: bond_ifp, event_code);
1911 }
1912 } else {
1913 /* timer is going away */
1914 devtimer_release(timer);
1915 bond_unlock();
1916 }
1917 break;
1918 default:
1919 break;
1920 }
1921}
1922
1923static bondport_ref
1924bondport_create(struct ifnet * port_ifp, lacp_port_priority priority,
1925 int active, int short_timeout, int * ret_error)
1926{
1927 int error = 0;
1928 bondport_ref p = NULL;
1929 lacp_actor_partner_state s;
1930
1931 *ret_error = 0;
1932 p = kalloc_type(struct bondport_s, Z_WAITOK | Z_ZERO | Z_NOFAIL);
1933 multicast_list_init(mc_list: &p->po_multicast);
1934 if ((u_int32_t)snprintf(p->po_name, count: sizeof(p->po_name), "%s%d",
1935 ifnet_name(interface: port_ifp), ifnet_unit(interface: port_ifp))
1936 >= sizeof(p->po_name)) {
1937 printf("if_bond: name too large\n");
1938 *ret_error = EINVAL;
1939 goto failed;
1940 }
1941 error = siocgifdevmtu(ifp: port_ifp, ifdm_p: &p->po_devmtu);
1942 if (error != 0) {
1943 printf("if_bond: SIOCGIFDEVMTU %s failed, %d\n",
1944 bondport_get_name(p), error);
1945 goto failed;
1946 }
1947 /* remember the current interface MTU so it can be restored */
1948 p->po_devmtu.ifdm_current = ifnet_mtu(interface: port_ifp);
1949 p->po_ifp = port_ifp;
1950 p->po_media_info = interface_media_info(ifp: port_ifp);
1951 p->po_current_while_timer = devtimer_create(process_func: bondport_timer_process_func, arg0: p);
1952 if (p->po_current_while_timer == NULL) {
1953 *ret_error = ENOMEM;
1954 goto failed;
1955 }
1956 p->po_periodic_timer = devtimer_create(process_func: bondport_timer_process_func, arg0: p);
1957 if (p->po_periodic_timer == NULL) {
1958 *ret_error = ENOMEM;
1959 goto failed;
1960 }
1961 p->po_wait_while_timer = devtimer_create(process_func: bondport_timer_process_func, arg0: p);
1962 if (p->po_wait_while_timer == NULL) {
1963 *ret_error = ENOMEM;
1964 goto failed;
1965 }
1966 p->po_transmit_timer = devtimer_create(process_func: bondport_timer_process_func, arg0: p);
1967 if (p->po_transmit_timer == NULL) {
1968 *ret_error = ENOMEM;
1969 goto failed;
1970 }
1971 p->po_receive_state = ReceiveState_none;
1972 p->po_mux_state = MuxState_none;
1973 p->po_priority = priority;
1974 s = 0;
1975 s = lacp_actor_partner_state_set_aggregatable(state: s);
1976 if (short_timeout) {
1977 s = lacp_actor_partner_state_set_short_timeout(state: s);
1978 }
1979 if (active) {
1980 s = lacp_actor_partner_state_set_active_lacp(state: s);
1981 }
1982 p->po_actor_state = s;
1983 return p;
1984
1985failed:
1986 bondport_free(p);
1987 return NULL;
1988}
1989
1990static void
1991bondport_start(bondport_ref p)
1992{
1993 bondport_receive_machine(p, event: LAEventStart, NULL);
1994 bondport_mux_machine(p, event: LAEventStart, NULL);
1995 bondport_periodic_transmit_machine(p, event: LAEventStart, NULL);
1996 bondport_transmit_machine(p, event: LAEventStart, NULL);
1997 return;
1998}
1999
2000/*
2001 * Function: bondport_invalidate_timers
2002 * Purpose:
2003 * Invalidate all of the timers for the bondport.
2004 */
2005static void
2006bondport_invalidate_timers(bondport_ref p)
2007{
2008 devtimer_invalidate(timer: p->po_current_while_timer);
2009 devtimer_invalidate(timer: p->po_periodic_timer);
2010 devtimer_invalidate(timer: p->po_wait_while_timer);
2011 devtimer_invalidate(timer: p->po_transmit_timer);
2012}
2013
2014/*
2015 * Function: bondport_cancel_timers
2016 * Purpose:
2017 * Cancel all of the timers for the bondport.
2018 */
2019static void
2020bondport_cancel_timers(bondport_ref p)
2021{
2022 devtimer_cancel(t: p->po_current_while_timer);
2023 devtimer_cancel(t: p->po_periodic_timer);
2024 devtimer_cancel(t: p->po_wait_while_timer);
2025 devtimer_cancel(t: p->po_transmit_timer);
2026}
2027
2028static void
2029bondport_free(bondport_ref p)
2030{
2031 multicast_list_remove(mc_list: &p->po_multicast);
2032 devtimer_release(timer: p->po_current_while_timer);
2033 devtimer_release(timer: p->po_periodic_timer);
2034 devtimer_release(timer: p->po_wait_while_timer);
2035 devtimer_release(timer: p->po_transmit_timer);
2036 kfree_type(struct bondport_s, p);
2037 return;
2038}
2039
2040static __inline__ int
2041bond_device_mtu(struct ifnet * ifp, ifbond_ref ifb)
2042{
2043 return ((int)ifnet_mtu(interface: ifp) > ifb->ifb_altmtu)
2044 ? (int)ifnet_mtu(interface: ifp) : ifb->ifb_altmtu;
2045}
2046
2047static int
2048bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp)
2049{
2050 u_int32_t eflags;
2051 uint32_t control_flags = 0;
2052 int devmtu;
2053 int error = 0;
2054 int event_code = 0;
2055 interface_filter_t filter = NULL;
2056 int first = FALSE;
2057 ifbond_ref ifb;
2058 bondport_ref * new_array = NULL;
2059 bondport_ref * old_array = NULL;
2060 bondport_ref p;
2061 int old_max = 0;
2062 int new_max = 0;
2063
2064 if (IFNET_IS_INTCOPROC(port_ifp) || IFNET_IS_MANAGEMENT(port_ifp)) {
2065 return EINVAL;
2066 }
2067
2068 /* pre-allocate space for new port */
2069 p = bondport_create(port_ifp, priority: 0x8000, active: 1, short_timeout: 0, ret_error: &error);
2070 if (p == NULL) {
2071 return error;
2072 }
2073 bond_lock();
2074 ifb = (ifbond_ref)ifnet_softc(interface: ifp);
2075 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2076 bond_unlock();
2077 bondport_free(p);
2078 return ifb == NULL ? EOPNOTSUPP : EBUSY;
2079 }
2080
2081 /* make sure this interface can handle our current MTU */
2082 devmtu = bond_device_mtu(ifp, ifb);
2083 if (devmtu != 0
2084 && (devmtu > p->po_devmtu.ifdm_max || devmtu < p->po_devmtu.ifdm_min)) {
2085 bond_unlock();
2086 printf("if_bond: interface %s doesn't support mtu %d",
2087 bondport_get_name(p), devmtu);
2088 bondport_free(p);
2089 return EINVAL;
2090 }
2091
2092 /* make sure ifb doesn't get de-allocated while we wait */
2093 ifbond_retain(ifb);
2094
2095 /* wait for other add or remove to complete */
2096 ifbond_wait(ifb, msg: __func__);
2097
2098 if (ifbond_flags_if_detaching(ifb)) {
2099 /* someone destroyed the bond while we were waiting */
2100 error = EBUSY;
2101 goto signal_done;
2102 }
2103 if (bond_lookup_port(port_ifp) != NULL) {
2104 /* port is already part of a bond */
2105 error = EBUSY;
2106 goto signal_done;
2107 }
2108 if ((ifnet_eflags(interface: port_ifp) & (IFEF_VLAN | IFEF_BOND)) != 0) {
2109 /* interface already has VLAN's, or is part of bond */
2110 error = EBUSY;
2111 goto signal_done;
2112 }
2113
2114 /* mark the interface busy */
2115 eflags = if_set_eflags(port_ifp, IFEF_BOND);
2116 if ((eflags & IFEF_VLAN) != 0) {
2117 /* vlan got in ahead of us */
2118 if_clear_eflags(port_ifp, IFEF_BOND);
2119 error = EBUSY;
2120 goto signal_done;
2121 }
2122
2123 if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2124 ifnet_set_offload(interface: ifp, offload: ifnet_offload(interface: port_ifp));
2125 ifnet_set_flags(interface: ifp, IFF_RUNNING, IFF_RUNNING);
2126 if (ifbond_flags_lladdr(ifb) == FALSE) {
2127 first = TRUE;
2128 }
2129 } else {
2130 ifnet_offload_t ifp_offload;
2131 ifnet_offload_t port_ifp_offload;
2132
2133 ifp_offload = ifnet_offload(interface: ifp);
2134 port_ifp_offload = ifnet_offload(interface: port_ifp);
2135 if (ifp_offload != port_ifp_offload) {
2136 ifnet_offload_t offload;
2137
2138 offload = ifp_offload & port_ifp_offload;
2139 printf("%s(%s, %s) "
2140 "hwassist values don't match 0x%x != 0x%x, using 0x%x instead\n",
2141 __func__,
2142 ifb->ifb_name, bondport_get_name(p),
2143 ifp_offload, port_ifp_offload, offload);
2144 /*
2145 * XXX
2146 * if the bond has VLAN's, we can't simply change the hwassist
2147 * field behind its back: this needs work
2148 */
2149 ifnet_set_offload(interface: ifp, offload);
2150 }
2151 }
2152 p->po_bond = ifb;
2153
2154 /* remember the port's ethernet address so it can be restored */
2155 ether_addr_copy(dest: &p->po_saved_addr, IF_LLADDR(port_ifp));
2156
2157 /* add it to the list of ports */
2158 TAILQ_INSERT_TAIL(&ifb->ifb_port_list, p, po_port_list);
2159 ifb->ifb_port_count++;
2160
2161 bond_unlock();
2162
2163
2164 /* first port added to bond determines bond's ethernet address */
2165 if (first) {
2166 ifnet_set_lladdr_and_type(interface: ifp, IF_LLADDR(port_ifp), ETHER_ADDR_LEN,
2167 IFT_ETHER);
2168 }
2169 uint32_bit_set(flags_p: &control_flags, PORT_CONTROL_FLAGS_IN_LIST);
2170
2171 /* allocate a larger distributing array */
2172 new_max = ifb->ifb_port_count;
2173 new_array = kalloc_type(bondport_ref, new_max, Z_WAITOK);
2174 if (new_array == NULL) {
2175 error = ENOMEM;
2176 goto failed;
2177 }
2178
2179 /* attach our BOND "protocol" to the interface */
2180 error = bond_attach_protocol(ifp: port_ifp);
2181 if (error) {
2182 goto failed;
2183 }
2184 uint32_bit_set(flags_p: &control_flags, PORT_CONTROL_FLAGS_PROTO_ATTACHED);
2185
2186 /* attach our BOND interface filter */
2187 error = bond_attach_filter(ifp: port_ifp, filter_p: &filter);
2188 if (error != 0) {
2189 goto failed;
2190 }
2191 uint32_bit_set(flags_p: &control_flags, PORT_CONTROL_FLAGS_FILTER_ATTACHED);
2192
2193 /* set the interface MTU */
2194 devmtu = bond_device_mtu(ifp, ifb);
2195 error = siocsifmtu(ifp: port_ifp, mtu: devmtu);
2196 if (error != 0) {
2197 printf("%s(%s, %s):"
2198 " SIOCSIFMTU %d failed %d\n",
2199 __func__,
2200 ifb->ifb_name, bondport_get_name(p), devmtu, error);
2201 goto failed;
2202 }
2203 uint32_bit_set(flags_p: &control_flags, PORT_CONTROL_FLAGS_MTU_SET);
2204
2205 /* program the port with our multicast addresses */
2206 error = multicast_list_program(mc_list: &p->po_multicast, source_ifp: ifp, target_ifp: port_ifp);
2207 if (error) {
2208 printf("%s(%s, %s): multicast_list_program failed %d\n",
2209 __func__,
2210 ifb->ifb_name, bondport_get_name(p), error);
2211 goto failed;
2212 }
2213
2214 /* mark the interface up */
2215 ifnet_set_flags(interface: port_ifp, IFF_UP, IFF_UP);
2216
2217 error = ifnet_ioctl(interface: port_ifp, protocol: 0, SIOCSIFFLAGS, NULL);
2218 if (error != 0) {
2219 printf("%s(%s, %s): SIOCSIFFLAGS failed %d\n",
2220 __func__,
2221 ifb->ifb_name, bondport_get_name(p), error);
2222 goto failed;
2223 }
2224
2225 /* re-program the port's ethernet address */
2226 error = if_siflladdr(ifp: port_ifp,
2227 ea_p: (const struct ether_addr *)IF_LLADDR(ifp));
2228 if (error == 0) {
2229 if (memcmp(IF_LLADDR(ifp), IF_LLADDR(port_ifp), ETHER_ADDR_LEN)
2230 != 0) {
2231 /* it lied, it really doesn't support setting lladdr */
2232 error = EOPNOTSUPP;
2233 }
2234 }
2235 if (error != 0) {
2236 /* port doesn't support setting the link address */
2237 printf("%s(%s, %s): if_siflladdr failed %d\n",
2238 __func__,
2239 ifb->ifb_name, bondport_get_name(p), error);
2240 error = ifnet_set_promiscuous(interface: port_ifp, on: 1);
2241 if (error != 0) {
2242 /* port doesn't support setting promiscuous mode */
2243 printf("%s(%s, %s): set promiscuous failed %d\n",
2244 __func__,
2245 ifb->ifb_name, bondport_get_name(p), error);
2246 goto failed;
2247 }
2248 uint32_bit_set(flags_p: &control_flags,
2249 PORT_CONTROL_FLAGS_PROMISCUOUS_SET);
2250 } else {
2251 uint32_bit_set(flags_p: &control_flags,
2252 PORT_CONTROL_FLAGS_LLADDR_SET);
2253 }
2254
2255 /* if we're in promiscuous mode, enable that as well */
2256 if (ifbond_flags_promisc(ifb)) {
2257 error = ifnet_set_promiscuous(interface: port_ifp, on: 1);
2258 if (error != 0) {
2259 /* port doesn't support setting promiscuous mode */
2260 printf("%s(%s, %s): set promiscuous failed %d\n",
2261 __func__,
2262 ifb->ifb_name, bondport_get_name(p), error);
2263 goto failed;
2264 }
2265 uint32_bit_set(flags_p: &control_flags,
2266 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2267 }
2268
2269 bond_lock();
2270
2271 /* no failures past this point */
2272 p->po_enabled = 1;
2273 p->po_control_flags = control_flags;
2274
2275 /* copy the contents of the existing distributing array */
2276 if (ifb->ifb_distributing_count) {
2277 bcopy(src: ifb->ifb_distributing_array, dst: new_array,
2278 n: sizeof(*new_array) * ifb->ifb_distributing_count);
2279 }
2280 old_array = ifb->ifb_distributing_array;
2281 old_max = ifb->ifb_distributing_max;
2282 ifb->ifb_distributing_array = new_array;
2283 ifb->ifb_distributing_max = new_max;
2284
2285 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2286 bondport_start(p);
2287
2288 /* check if we need to generate a link status event */
2289 if (ifbond_selection(bond: ifb)) {
2290 event_code = (ifb->ifb_active_lag == NULL)
2291 ? KEV_DL_LINK_OFF
2292 : KEV_DL_LINK_ON;
2293 ifb->ifb_last_link_event = event_code;
2294 }
2295 } else {
2296 /* are we adding the first distributing interface? */
2297 if (media_active(mi: &p->po_media_info)) {
2298 if (ifb->ifb_distributing_count == 0) {
2299 ifb->ifb_last_link_event = event_code = KEV_DL_LINK_ON;
2300 }
2301 bondport_enable_distributing(p);
2302 } else {
2303 bondport_disable_distributing(p);
2304 }
2305 }
2306 p->po_filter = filter;
2307
2308 /* clear the busy state, and wakeup anyone waiting */
2309 ifbond_signal(ifb, msg: __func__);
2310 bond_unlock();
2311 if (event_code != 0) {
2312 interface_link_event(ifp, event_code);
2313 }
2314 kfree_type(bondport_ref, old_max, old_array);
2315 return 0;
2316
2317failed:
2318 bond_assert_lock_not_held();
2319
2320 /* if this was the first port to be added, clear our address */
2321 if (first) {
2322 ifnet_set_lladdr_and_type(interface: ifp, NULL, length: 0, IFT_IEEE8023ADLAG);
2323 }
2324
2325 kfree_type(bondport_ref, new_max, new_array);
2326 if (uint32_bit_is_set(flags: control_flags,
2327 PORT_CONTROL_FLAGS_LLADDR_SET)) {
2328 int error1;
2329
2330 error1 = if_siflladdr(ifp: port_ifp, ea_p: &p->po_saved_addr);
2331 if (error1 != 0) {
2332 printf("%s(%s, %s): if_siflladdr restore failed %d\n",
2333 __func__,
2334 ifb->ifb_name, bondport_get_name(p), error1);
2335 }
2336 }
2337 if (uint32_bit_is_set(flags: control_flags,
2338 PORT_CONTROL_FLAGS_PROMISCUOUS_SET)) {
2339 int error1;
2340
2341 error1 = ifnet_set_promiscuous(interface: port_ifp, on: 0);
2342 if (error1 != 0) {
2343 printf("%s(%s, %s): promiscous mode disable failed %d\n",
2344 __func__,
2345 ifb->ifb_name, bondport_get_name(p), error1);
2346 }
2347 }
2348 if (uint32_bit_is_set(flags: control_flags,
2349 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET)) {
2350 int error1;
2351
2352 error1 = ifnet_set_promiscuous(interface: port_ifp, on: 0);
2353 if (error1 != 0) {
2354 printf("%s(%s, %s): promiscous mode disable failed %d\n",
2355 __func__,
2356 ifb->ifb_name, bondport_get_name(p), error1);
2357 }
2358 }
2359 if (uint32_bit_is_set(flags: control_flags,
2360 PORT_CONTROL_FLAGS_PROTO_ATTACHED)) {
2361 (void)bond_detach_protocol(ifp: port_ifp);
2362 }
2363 if (uint32_bit_is_set(flags: control_flags,
2364 PORT_CONTROL_FLAGS_FILTER_ATTACHED)) {
2365 iflt_detach(filter_ref: filter);
2366 }
2367 if (uint32_bit_is_set(flags: control_flags,
2368 PORT_CONTROL_FLAGS_MTU_SET)) {
2369 int error1;
2370
2371 error1 = siocsifmtu(ifp: port_ifp, mtu: p->po_devmtu.ifdm_current);
2372 if (error1 != 0) {
2373 printf("%s(%s, %s): SIOCSIFMTU %d failed %d\n",
2374 __func__,
2375 ifb->ifb_name, bondport_get_name(p),
2376 p->po_devmtu.ifdm_current, error1);
2377 }
2378 }
2379 bond_lock();
2380 if (uint32_bit_is_set(flags: control_flags,
2381 PORT_CONTROL_FLAGS_IN_LIST)) {
2382 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2383 ifb->ifb_port_count--;
2384 }
2385 if_clear_eflags(ifp, IFEF_BOND);
2386 if (TAILQ_EMPTY(&ifb->ifb_port_list)) {
2387 ifb->ifb_altmtu = 0;
2388 ifnet_set_mtu(interface: ifp, ETHERMTU);
2389 ifnet_set_offload(interface: ifp, offload: 0);
2390 }
2391
2392signal_done:
2393 ifbond_signal(ifb, msg: __func__);
2394 bond_unlock();
2395 ifbond_release(ifb);
2396 bondport_free(p);
2397 return error;
2398}
2399
2400static int
2401bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp)
2402{
2403 int active_lag = 0;
2404 int error = 0;
2405 int event_code = 0;
2406 bondport_ref head_port;
2407 struct ifnet * ifp;
2408 interface_filter_t filter;
2409 int last = FALSE;
2410 int new_link_address = FALSE;
2411 bondport_ref p;
2412 lacp_actor_partner_state s;
2413 int was_distributing;
2414
2415 bond_assert_lock_held();
2416
2417 ifbond_retain(ifb);
2418 ifbond_wait(ifb, msg: "bond_remove_interface");
2419
2420 p = ifbond_lookup_port(ifb, port_ifp);
2421 if (p == NULL) {
2422 error = ENXIO;
2423 /* it got removed by another thread */
2424 goto signal_done;
2425 }
2426
2427 /* de-select it and remove it from the lists */
2428 was_distributing = bondport_flags_distributing(p);
2429 bondport_disable_distributing(p);
2430 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2431 bondport_set_selected(p, s: SelectedState_UNSELECTED);
2432 active_lag = bondport_remove_from_LAG(p);
2433 /* invalidate timers here while holding the bond_lock */
2434 bondport_invalidate_timers(p);
2435
2436 /* announce that we're Individual now */
2437 s = p->po_actor_state;
2438 s = lacp_actor_partner_state_set_individual(state: s);
2439 s = lacp_actor_partner_state_set_not_collecting(state: s);
2440 s = lacp_actor_partner_state_set_not_distributing(state: s);
2441 s = lacp_actor_partner_state_set_out_of_sync(state: s);
2442 p->po_actor_state = s;
2443 bondport_flags_set_ntt(p);
2444 }
2445
2446 TAILQ_REMOVE(&ifb->ifb_port_list, p, po_port_list);
2447 ifb->ifb_port_count--;
2448
2449 ifp = ifb->ifb_ifp;
2450 head_port = TAILQ_FIRST(&ifb->ifb_port_list);
2451 if (head_port == NULL) {
2452 ifnet_set_flags(interface: ifp, new_flags: 0, IFF_RUNNING);
2453 if (ifbond_flags_lladdr(ifb) == FALSE) {
2454 last = TRUE;
2455 }
2456 ifnet_set_offload(interface: ifp, offload: 0);
2457 ifnet_set_mtu(interface: ifp, ETHERMTU);
2458 ifb->ifb_altmtu = 0;
2459 } else if (ifbond_flags_lladdr(ifb) == FALSE
2460 && bcmp(s1: &p->po_saved_addr, IF_LLADDR(ifp),
2461 ETHER_ADDR_LEN) == 0) {
2462 new_link_address = TRUE;
2463 }
2464 /* check if we need to generate a link status event */
2465 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2466 if (ifbond_selection(bond: ifb) || active_lag) {
2467 event_code = (ifb->ifb_active_lag == NULL)
2468 ? KEV_DL_LINK_OFF
2469 : KEV_DL_LINK_ON;
2470 ifb->ifb_last_link_event = event_code;
2471 }
2472 bondport_transmit_machine(p, event: LAEventStart,
2473 TRANSMIT_MACHINE_TX_IMMEDIATE);
2474 } else {
2475 /* are we removing the last distributing interface? */
2476 if (was_distributing && ifb->ifb_distributing_count == 0) {
2477 ifb->ifb_last_link_event = event_code = KEV_DL_LINK_OFF;
2478 }
2479 }
2480 filter = p->po_filter;
2481 bond_unlock();
2482
2483 if (last) {
2484 ifnet_set_lladdr_and_type(interface: ifp, NULL, length: 0, IFT_IEEE8023ADLAG);
2485 } else if (new_link_address) {
2486 struct ifnet * scan_ifp;
2487 bondport_ref scan_port;
2488
2489 /* ifbond_wait() allows port list traversal without holding the lock */
2490
2491 /* this port gave the bond its ethernet address, switch to new one */
2492 ifnet_set_lladdr_and_type(interface: ifp,
2493 lladdr: &head_port->po_saved_addr, ETHER_ADDR_LEN,
2494 IFT_ETHER);
2495
2496 /* re-program each port with the new link address */
2497 TAILQ_FOREACH(scan_port, &ifb->ifb_port_list, po_port_list) {
2498 scan_ifp = scan_port->po_ifp;
2499
2500 if (!uint32_bit_is_set(flags: scan_port->po_control_flags,
2501 PORT_CONTROL_FLAGS_LLADDR_SET)) {
2502 /* port doesn't support setting lladdr */
2503 continue;
2504 }
2505 error = if_siflladdr(ifp: scan_ifp,
2506 ea_p: (const struct ether_addr *) IF_LLADDR(ifp));
2507 if (error != 0) {
2508 printf("%s(%s, %s): "
2509 "if_siflladdr (%s) failed %d\n",
2510 __func__,
2511 ifb->ifb_name, bondport_get_name(p),
2512 bondport_get_name(p: scan_port), error);
2513 }
2514 }
2515 }
2516
2517 /* restore the port's ethernet address */
2518 if (uint32_bit_is_set(flags: p->po_control_flags,
2519 PORT_CONTROL_FLAGS_LLADDR_SET)) {
2520 error = if_siflladdr(ifp: port_ifp, ea_p: &p->po_saved_addr);
2521 if (error != 0) {
2522 printf("%s(%s, %s): if_siflladdr failed %d\n",
2523 __func__,
2524 ifb->ifb_name, bondport_get_name(p), error);
2525 }
2526 }
2527
2528 /* disable promiscous mode (if we enabled it) */
2529 if (uint32_bit_is_set(flags: p->po_control_flags,
2530 PORT_CONTROL_FLAGS_PROMISCUOUS_SET)) {
2531 error = ifnet_set_promiscuous(interface: port_ifp, on: 0);
2532 if (error != 0) {
2533 printf("%s(%s, %s): disable promiscuous failed %d\n",
2534 __func__,
2535 ifb->ifb_name, bondport_get_name(p), error);
2536 }
2537 }
2538
2539 /* disable promiscous mode from bond (if we enabled it) */
2540 if (uint32_bit_is_set(flags: p->po_control_flags,
2541 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET)) {
2542 error = ifnet_set_promiscuous(interface: port_ifp, on: 0);
2543 if (error != 0) {
2544 printf("%s(%s, %s): disable promiscuous failed %d\n",
2545 __func__,
2546 ifb->ifb_name, bondport_get_name(p), error);
2547 }
2548 }
2549
2550 /* restore the port's MTU */
2551 error = siocsifmtu(ifp: port_ifp, mtu: p->po_devmtu.ifdm_current);
2552 if (error != 0) {
2553 printf("%s(%s, %s): SIOCSIFMTU %d failed %d\n",
2554 __func__,
2555 ifb->ifb_name, bondport_get_name(p),
2556 p->po_devmtu.ifdm_current, error);
2557 }
2558
2559 /* remove the bond "protocol" */
2560 bond_detach_protocol(ifp: port_ifp);
2561
2562 /* detach the filter */
2563 if (filter != NULL) {
2564 iflt_detach(filter_ref: filter);
2565 }
2566
2567 /* generate link event */
2568 if (event_code != 0) {
2569 interface_link_event(ifp, event_code);
2570 }
2571
2572 bond_lock();
2573 bondport_free(p);
2574 if_clear_eflags(port_ifp, IFEF_BOND);
2575 /* release this bondport's reference to the ifbond */
2576 ifbond_release(ifb);
2577
2578signal_done:
2579 ifbond_signal(ifb, msg: __func__);
2580 ifbond_release(ifb);
2581 return error;
2582}
2583
2584static void
2585bond_set_lacp_mode(ifbond_ref ifb)
2586{
2587 bondport_ref p;
2588
2589 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2590 bondport_disable_distributing(p);
2591 bondport_start(p);
2592 }
2593 return;
2594}
2595
2596static void
2597bond_set_static_mode(ifbond_ref ifb)
2598{
2599 bondport_ref p;
2600 lacp_actor_partner_state s;
2601
2602 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2603 bondport_disable_distributing(p);
2604 bondport_set_selected(p, s: SelectedState_UNSELECTED);
2605 (void)bondport_remove_from_LAG(p);
2606 bondport_cancel_timers(p);
2607
2608 /* announce that we're Individual now */
2609 s = p->po_actor_state;
2610 s = lacp_actor_partner_state_set_individual(state: s);
2611 s = lacp_actor_partner_state_set_not_collecting(state: s);
2612 s = lacp_actor_partner_state_set_not_distributing(state: s);
2613 s = lacp_actor_partner_state_set_out_of_sync(state: s);
2614 p->po_actor_state = s;
2615 bondport_flags_set_ntt(p);
2616 bondport_transmit_machine(p, event: LAEventStart,
2617 TRANSMIT_MACHINE_TX_IMMEDIATE);
2618 /* clear state */
2619 p->po_actor_state = 0;
2620 bzero(s: &p->po_partner_state, n: sizeof(p->po_partner_state));
2621
2622 if (media_active(mi: &p->po_media_info)) {
2623 bondport_enable_distributing(p);
2624 } else {
2625 bondport_disable_distributing(p);
2626 }
2627 }
2628 return;
2629}
2630
2631static int
2632bond_set_mode(struct ifnet * ifp, int mode)
2633{
2634 int error = 0;
2635 int event_code = 0;
2636 ifbond_ref ifb;
2637
2638 bond_lock();
2639 ifb = (ifbond_ref)ifnet_softc(interface: ifp);
2640 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2641 bond_unlock();
2642 return (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2643 }
2644 if (ifb->ifb_mode == mode) {
2645 bond_unlock();
2646 return 0;
2647 }
2648
2649 ifbond_retain(ifb);
2650 ifbond_wait(ifb, msg: "bond_set_mode");
2651
2652 /* verify (again) that the mode is actually different */
2653 if (ifb->ifb_mode == mode) {
2654 /* nothing to do */
2655 goto signal_done;
2656 }
2657
2658 ifb->ifb_mode = mode;
2659 if (mode == IF_BOND_MODE_LACP) {
2660 bond_set_lacp_mode(ifb);
2661
2662 /* check if we need to generate a link status event */
2663 if (ifbond_selection(bond: ifb)) {
2664 event_code = (ifb->ifb_active_lag == NULL)
2665 ? KEV_DL_LINK_OFF
2666 : KEV_DL_LINK_ON;
2667 }
2668 } else {
2669 bond_set_static_mode(ifb);
2670 event_code = (ifb->ifb_distributing_count == 0)
2671 ? KEV_DL_LINK_OFF
2672 : KEV_DL_LINK_ON;
2673 }
2674 ifb->ifb_last_link_event = event_code;
2675
2676signal_done:
2677 ifbond_signal(ifb, msg: __func__);
2678 bond_unlock();
2679 ifbond_release(ifb);
2680
2681 if (event_code != 0) {
2682 interface_link_event(ifp, event_code);
2683 }
2684 return error;
2685}
2686
2687static int
2688bond_get_status(ifbond_ref ifb, struct if_bond_req * ibr_p, user_addr_t datap)
2689{
2690 int count;
2691 user_addr_t dst;
2692 int error = 0;
2693 struct if_bond_status_req * ibsr;
2694 struct if_bond_status ibs;
2695 bondport_ref port;
2696
2697 ibsr = &(ibr_p->ibr_ibru.ibru_status);
2698 if (ibsr->ibsr_version != IF_BOND_STATUS_REQ_VERSION) {
2699 return EINVAL;
2700 }
2701 ibsr->ibsr_key = ifb->ifb_key;
2702 ibsr->ibsr_mode = ifb->ifb_mode;
2703 ibsr->ibsr_total = ifb->ifb_port_count;
2704 dst = proc_is64bit(current_proc())
2705 ? ibsr->ibsr_ibsru.ibsru_buffer64
2706 : CAST_USER_ADDR_T(ibsr->ibsr_ibsru.ibsru_buffer);
2707 if (dst == USER_ADDR_NULL) {
2708 /* just want to know how many there are */
2709 goto done;
2710 }
2711 if (ibsr->ibsr_count < 0) {
2712 return EINVAL;
2713 }
2714 count = (ifb->ifb_port_count < ibsr->ibsr_count)
2715 ? ifb->ifb_port_count : ibsr->ibsr_count;
2716 TAILQ_FOREACH(port, &ifb->ifb_port_list, po_port_list) {
2717 struct if_bond_partner_state * ibps_p;
2718 partner_state_ref ps;
2719
2720 if (count == 0) {
2721 break;
2722 }
2723 bzero(s: &ibs, n: sizeof(ibs));
2724 strlcpy(dst: ibs.ibs_if_name, src: port->po_name, n: sizeof(ibs.ibs_if_name));
2725 ibs.ibs_port_priority = port->po_priority;
2726 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2727 ibs.ibs_state = port->po_actor_state;
2728 ibs.ibs_selected_state = port->po_selected;
2729 ps = &port->po_partner_state;
2730 ibps_p = &ibs.ibs_partner_state;
2731 ibps_p->ibps_system = ps->ps_lag_info.li_system;
2732 ibps_p->ibps_system_priority = ps->ps_lag_info.li_system_priority;
2733 ibps_p->ibps_key = ps->ps_lag_info.li_key;
2734 ibps_p->ibps_port = ps->ps_port;
2735 ibps_p->ibps_port_priority = ps->ps_port_priority;
2736 ibps_p->ibps_state = ps->ps_state;
2737 } else {
2738 /* fake the selected information */
2739 ibs.ibs_selected_state = bondport_flags_distributing(p: port)
2740 ? SelectedState_SELECTED : SelectedState_UNSELECTED;
2741 }
2742 error = copyout(&ibs, dst, sizeof(ibs));
2743 if (error != 0) {
2744 break;
2745 }
2746 dst += sizeof(ibs);
2747 count--;
2748 }
2749
2750done:
2751 if (error == 0) {
2752 error = copyout(ibr_p, datap, sizeof(*ibr_p));
2753 } else {
2754 (void)copyout(ibr_p, datap, sizeof(*ibr_p));
2755 }
2756 return error;
2757}
2758
2759static int
2760bond_set_promisc(struct ifnet * ifp)
2761{
2762 int error = 0;
2763 ifbond_ref ifb;
2764 bool is_promisc;
2765 bondport_ref p;
2766 int val;
2767
2768 is_promisc = (ifnet_flags(interface: ifp) & IFF_PROMISC) != 0;
2769
2770 /* determine whether promiscuous state needs to be changed */
2771 bond_lock();
2772 ifb = (ifbond_ref)ifnet_softc(interface: ifp);
2773 if (ifb == NULL) {
2774 bond_unlock();
2775 error = EBUSY;
2776 goto done;
2777 }
2778 if (is_promisc == ifbond_flags_promisc(ifb)) {
2779 /* already in the right state */
2780 bond_unlock();
2781 goto done;
2782 }
2783 ifbond_retain(ifb);
2784 ifbond_wait(ifb, msg: __func__);
2785 if (ifbond_flags_if_detaching(ifb)) {
2786 /* someone destroyed the bond while we were waiting */
2787 error = EBUSY;
2788 goto signal_done;
2789 }
2790 bond_unlock();
2791
2792 /* update the promiscuous state of each memeber */
2793 val = is_promisc ? 1 : 0;
2794 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2795 struct ifnet * port_ifp = p->po_ifp;
2796 bool port_is_promisc;
2797
2798 port_is_promisc = uint32_bit_is_set(flags: p->po_control_flags,
2799 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2800 if (port_is_promisc == is_promisc) {
2801 /* already in the right state */
2802 continue;
2803 }
2804 error = ifnet_set_promiscuous(interface: port_ifp, on: val);
2805 if (error != 0) {
2806 printf("%s: ifnet_set_promiscuous(%s, %d): failed %d",
2807 ifb->ifb_name, port_ifp->if_xname, val, error);
2808 continue;
2809 }
2810 printf("%s: ifnet_set_promiscuous(%s, %d): succeeded",
2811 ifb->ifb_name, port_ifp->if_xname, val);
2812 if (is_promisc) {
2813 /* remember that we set it */
2814 uint32_bit_set(flags_p: &p->po_control_flags,
2815 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2816 } else {
2817 uint32_bit_clear(flags_p: &p->po_control_flags,
2818 PORT_CONTROL_FLAGS_BOND_PROMISCUOUS_SET);
2819 }
2820 }
2821
2822 /* assume that updating promiscuous state succeeded */
2823 error = 0;
2824 bond_lock();
2825
2826 /* update our internal state */
2827 if (is_promisc) {
2828 ifbond_flags_set_promisc(ifb);
2829 } else {
2830 ifbond_flags_clear_promisc(ifb);
2831 }
2832
2833signal_done:
2834 ifbond_signal(ifb, msg: __func__);
2835 bond_unlock();
2836 ifbond_release(ifb);
2837
2838done:
2839 return error;
2840}
2841
2842static void
2843bond_get_mtu_values(ifbond_ref ifb, int * ret_min, int * ret_max)
2844{
2845 int mtu_min = 0;
2846 int mtu_max = 0;
2847 bondport_ref p;
2848
2849 if (TAILQ_FIRST(&ifb->ifb_port_list) != NULL) {
2850 mtu_min = IF_MINMTU;
2851 }
2852 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2853 struct ifdevmtu * devmtu_p = &p->po_devmtu;
2854
2855 if (devmtu_p->ifdm_min > mtu_min) {
2856 mtu_min = devmtu_p->ifdm_min;
2857 }
2858 if (mtu_max == 0 || devmtu_p->ifdm_max < mtu_max) {
2859 mtu_max = devmtu_p->ifdm_max;
2860 }
2861 }
2862 *ret_min = mtu_min;
2863 *ret_max = mtu_max;
2864 return;
2865}
2866
2867static int
2868bond_set_mtu_on_ports(ifbond_ref ifb, int mtu)
2869{
2870 int error = 0;
2871 bondport_ref p;
2872
2873 TAILQ_FOREACH(p, &ifb->ifb_port_list, po_port_list) {
2874 error = siocsifmtu(ifp: p->po_ifp, mtu);
2875 if (error != 0) {
2876 printf("if_bond(%s): SIOCSIFMTU %s failed, %d\n",
2877 ifb->ifb_name, bondport_get_name(p), error);
2878 break;
2879 }
2880 }
2881 return error;
2882}
2883
2884static int
2885bond_set_mtu(struct ifnet * ifp, int mtu, int isdevmtu)
2886{
2887 int error = 0;
2888 ifbond_ref ifb;
2889 int mtu_min;
2890 int mtu_max;
2891 int new_max;
2892 int old_max;
2893
2894 bond_lock();
2895 ifb = (ifbond_ref)ifnet_softc(interface: ifp);
2896 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2897 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
2898 goto done;
2899 }
2900 ifbond_retain(ifb);
2901 ifbond_wait(ifb, msg: "bond_set_mtu");
2902
2903 /* check again */
2904 if (ifnet_softc(interface: ifp) == NULL || ifbond_flags_if_detaching(ifb)) {
2905 error = EBUSY;
2906 goto signal_done;
2907 }
2908 bond_get_mtu_values(ifb, ret_min: &mtu_min, ret_max: &mtu_max);
2909 if (mtu > mtu_max) {
2910 error = EINVAL;
2911 goto signal_done;
2912 }
2913 if (mtu < mtu_min && (isdevmtu == 0 || mtu != 0)) {
2914 /* allow SIOCSIFALTMTU to set the mtu to 0 */
2915 error = EINVAL;
2916 goto signal_done;
2917 }
2918 if (isdevmtu) {
2919 new_max = (mtu > (int)ifnet_mtu(interface: ifp)) ? mtu : (int)ifnet_mtu(interface: ifp);
2920 } else {
2921 new_max = (mtu > ifb->ifb_altmtu) ? mtu : ifb->ifb_altmtu;
2922 }
2923 old_max = ((int)ifnet_mtu(interface: ifp) > ifb->ifb_altmtu)
2924 ? (int)ifnet_mtu(interface: ifp) : ifb->ifb_altmtu;
2925 if (new_max != old_max) {
2926 /* we can safely walk the list of port without the lock held */
2927 bond_unlock();
2928 error = bond_set_mtu_on_ports(ifb, mtu: new_max);
2929 if (error != 0) {
2930 /* try our best to back out of it */
2931 (void)bond_set_mtu_on_ports(ifb, mtu: old_max);
2932 }
2933 bond_lock();
2934 }
2935 if (error == 0) {
2936 if (isdevmtu) {
2937 ifb->ifb_altmtu = mtu;
2938 } else {
2939 ifnet_set_mtu(interface: ifp, mtu);
2940 }
2941 }
2942
2943signal_done:
2944 ifbond_signal(ifb, msg: __func__);
2945 ifbond_release(ifb);
2946
2947done:
2948 bond_unlock();
2949 return error;
2950}
2951
2952static int
2953bond_ioctl(struct ifnet *ifp, u_long cmd, void * data)
2954{
2955 int error = 0;
2956 struct if_bond_req ibr;
2957 struct ifaddr * ifa;
2958 ifbond_ref ifb;
2959 struct ifreq * ifr;
2960 struct ifmediareq *ifmr;
2961 struct ifnet * port_ifp = NULL;
2962 user_addr_t user_addr;
2963
2964 if (ifnet_type(interface: ifp) != IFT_IEEE8023ADLAG) {
2965 return EOPNOTSUPP;
2966 }
2967 ifr = (struct ifreq *)data;
2968 ifa = (struct ifaddr *)data;
2969
2970 switch (cmd) {
2971 case SIOCSIFADDR:
2972 ifnet_set_flags(interface: ifp, IFF_UP, IFF_UP);
2973 break;
2974
2975 case SIOCGIFMEDIA32:
2976 case SIOCGIFMEDIA64:
2977 bond_lock();
2978 ifb = (ifbond_ref)ifnet_softc(interface: ifp);
2979 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
2980 bond_unlock();
2981 return ifb == NULL ? EOPNOTSUPP : EBUSY;
2982 }
2983 ifmr = (struct ifmediareq *)data;
2984 ifmr->ifm_current = IFM_ETHER;
2985 ifmr->ifm_mask = 0;
2986 ifmr->ifm_status = IFM_AVALID;
2987 ifmr->ifm_active = IFM_ETHER;
2988 ifmr->ifm_count = 1;
2989 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
2990 if (ifb->ifb_active_lag != NULL) {
2991 ifmr->ifm_active = ifb->ifb_active_lag->lag_active_media;
2992 ifmr->ifm_status |= IFM_ACTIVE;
2993 }
2994 } else if (ifb->ifb_distributing_count > 0) {
2995 ifmr->ifm_active
2996 = ifb->ifb_distributing_array[0]->po_media_info.mi_active;
2997 ifmr->ifm_status |= IFM_ACTIVE;
2998 }
2999 bond_unlock();
3000 user_addr = (cmd == SIOCGIFMEDIA64) ?
3001 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
3002 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
3003 if (user_addr != USER_ADDR_NULL) {
3004 error = copyout(&ifmr->ifm_current,
3005 user_addr,
3006 sizeof(int));
3007 }
3008 break;
3009
3010 case SIOCSIFMEDIA:
3011 /* XXX send the SIFMEDIA to all children? Or force autoselect? */
3012 error = EINVAL;
3013 break;
3014
3015 case SIOCGIFDEVMTU:
3016 bond_lock();
3017 ifb = (ifbond_ref)ifnet_softc(interface: ifp);
3018 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3019 bond_unlock();
3020 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
3021 break;
3022 }
3023 ifr->ifr_devmtu.ifdm_current = bond_device_mtu(ifp, ifb);
3024 bond_get_mtu_values(ifb, ret_min: &ifr->ifr_devmtu.ifdm_min,
3025 ret_max: &ifr->ifr_devmtu.ifdm_max);
3026 bond_unlock();
3027 break;
3028
3029 case SIOCGIFALTMTU:
3030 bond_lock();
3031 ifb = (ifbond_ref)ifnet_softc(interface: ifp);
3032 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3033 bond_unlock();
3034 error = (ifb == NULL) ? EOPNOTSUPP : EBUSY;
3035 break;
3036 }
3037 ifr->ifr_mtu = ifb->ifb_altmtu;
3038 bond_unlock();
3039 break;
3040
3041 case SIOCSIFALTMTU:
3042 error = bond_set_mtu(ifp, mtu: ifr->ifr_mtu, isdevmtu: 1);
3043 break;
3044
3045 case SIOCSIFMTU:
3046 error = bond_set_mtu(ifp, mtu: ifr->ifr_mtu, isdevmtu: 0);
3047 break;
3048
3049 case SIOCSIFBOND:
3050 user_addr = proc_is64bit(current_proc())
3051 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
3052 error = copyin(user_addr, &ibr, sizeof(ibr));
3053 if (error) {
3054 break;
3055 }
3056 switch (ibr.ibr_op) {
3057 case IF_BOND_OP_ADD_INTERFACE:
3058 case IF_BOND_OP_REMOVE_INTERFACE:
3059 port_ifp = ifunit(ibr.ibr_ibru.ibru_if_name);
3060 if (port_ifp == NULL) {
3061 error = ENXIO;
3062 break;
3063 }
3064 if (ifnet_type(interface: port_ifp) != IFT_ETHER) {
3065 error = EPROTONOSUPPORT;
3066 break;
3067 }
3068 break;
3069 case IF_BOND_OP_SET_VERBOSE:
3070 case IF_BOND_OP_SET_MODE:
3071 break;
3072 default:
3073 error = EOPNOTSUPP;
3074 break;
3075 }
3076 if (error != 0) {
3077 break;
3078 }
3079 switch (ibr.ibr_op) {
3080 case IF_BOND_OP_ADD_INTERFACE:
3081 error = bond_add_interface(ifp, port_ifp);
3082 break;
3083 case IF_BOND_OP_REMOVE_INTERFACE:
3084 bond_lock();
3085 ifb = (ifbond_ref)ifnet_softc(interface: ifp);
3086 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3087 bond_unlock();
3088 return ifb == NULL ? EOPNOTSUPP : EBUSY;
3089 }
3090 error = bond_remove_interface(ifb, port_ifp);
3091 bond_unlock();
3092 break;
3093 case IF_BOND_OP_SET_VERBOSE:
3094 bond_lock();
3095 if_bond_debug = ibr.ibr_ibru.ibru_int_val;
3096 bond_unlock();
3097 break;
3098 case IF_BOND_OP_SET_MODE:
3099 switch (ibr.ibr_ibru.ibru_int_val) {
3100 case IF_BOND_MODE_LACP:
3101 case IF_BOND_MODE_STATIC:
3102 break;
3103 default:
3104 error = EINVAL;
3105 break;
3106 }
3107 if (error != 0) {
3108 break;
3109 }
3110 error = bond_set_mode(ifp, mode: ibr.ibr_ibru.ibru_int_val);
3111 break;
3112 }
3113 break; /* SIOCSIFBOND */
3114
3115 case SIOCGIFBOND:
3116 user_addr = proc_is64bit(current_proc())
3117 ? ifr->ifr_data64 : CAST_USER_ADDR_T(ifr->ifr_data);
3118 error = copyin(user_addr, &ibr, sizeof(ibr));
3119 if (error) {
3120 break;
3121 }
3122 switch (ibr.ibr_op) {
3123 case IF_BOND_OP_GET_STATUS:
3124 break;
3125 default:
3126 error = EOPNOTSUPP;
3127 break;
3128 }
3129 if (error != 0) {
3130 break;
3131 }
3132 bond_lock();
3133 ifb = (ifbond_ref)ifnet_softc(interface: ifp);
3134 if (ifb == NULL || ifbond_flags_if_detaching(ifb)) {
3135 bond_unlock();
3136 return ifb == NULL ? EOPNOTSUPP : EBUSY;
3137 }
3138 switch (ibr.ibr_op) {
3139 case IF_BOND_OP_GET_STATUS:
3140 error = bond_get_status(ifb, ibr_p: &ibr, datap: user_addr);
3141 break;
3142 }
3143 bond_unlock();
3144 break; /* SIOCGIFBOND */
3145
3146 case SIOCSIFLLADDR:
3147 error = EOPNOTSUPP;
3148 break;
3149
3150 case SIOCSIFFLAGS:
3151 /* enable promiscuous mode on members */
3152 error = bond_set_promisc(ifp);
3153 break;
3154
3155 case SIOCADDMULTI:
3156 case SIOCDELMULTI:
3157 error = bond_setmulti(ifp);
3158 break;
3159 default:
3160 error = EOPNOTSUPP;
3161 }
3162 return error;
3163}
3164
3165static void
3166bond_if_free(struct ifnet * ifp)
3167{
3168 ifbond_ref ifb;
3169
3170 if (ifp == NULL) {
3171 return;
3172 }
3173 bond_lock();
3174 ifb = (ifbond_ref)ifnet_softc(interface: ifp);
3175 if (ifb == NULL) {
3176 bond_unlock();
3177 return;
3178 }
3179 ifbond_release(ifb);
3180 bond_unlock();
3181 ifnet_release(interface: ifp);
3182 return;
3183}
3184
3185static void
3186bond_handle_event(struct ifnet * port_ifp, int event_code)
3187{
3188 struct ifnet * bond_ifp = NULL;
3189 ifbond_ref ifb;
3190 int old_distributing_count;
3191 bondport_ref p;
3192 struct media_info media_info = { .mi_active = 0, .mi_status = 0 };
3193
3194 switch (event_code) {
3195 case KEV_DL_IF_DETACHED:
3196 case KEV_DL_IF_DETACHING:
3197 break;
3198 case KEV_DL_LINK_OFF:
3199 case KEV_DL_LINK_ON:
3200 media_info = interface_media_info(ifp: port_ifp);
3201 break;
3202 default:
3203 return;
3204 }
3205 bond_lock();
3206 p = bond_lookup_port(port_ifp);
3207 if (p == NULL) {
3208 bond_unlock();
3209 return;
3210 }
3211 ifb = p->po_bond;
3212 old_distributing_count = ifb->ifb_distributing_count;
3213 switch (event_code) {
3214 case KEV_DL_IF_DETACHED:
3215 case KEV_DL_IF_DETACHING:
3216 bond_remove_interface(ifb, port_ifp: p->po_ifp);
3217 break;
3218 case KEV_DL_LINK_OFF:
3219 case KEV_DL_LINK_ON:
3220 p->po_media_info = media_info;
3221 if (p->po_enabled) {
3222 bondport_link_status_changed(p);
3223 }
3224 break;
3225 }
3226 /* generate a link-event */
3227 if (ifb->ifb_mode == IF_BOND_MODE_LACP) {
3228 if (ifbond_selection(bond: ifb)) {
3229 event_code = (ifb->ifb_active_lag == NULL)
3230 ? KEV_DL_LINK_OFF
3231 : KEV_DL_LINK_ON;
3232 /* XXX need to take a reference on bond_ifp */
3233 bond_ifp = ifb->ifb_ifp;
3234 ifb->ifb_last_link_event = event_code;
3235 } else {
3236 event_code = (ifb->ifb_active_lag == NULL)
3237 ? KEV_DL_LINK_OFF
3238 : KEV_DL_LINK_ON;
3239 if (event_code != ifb->ifb_last_link_event) {
3240 if (if_bond_debug) {
3241 timestamp_printf("%s: (event) generating LINK event\n",
3242 ifb->ifb_name);
3243 }
3244 bond_ifp = ifb->ifb_ifp;
3245 ifb->ifb_last_link_event = event_code;
3246 }
3247 }
3248 } else {
3249 /*
3250 * if the distributing array membership changed from 0 <-> !0
3251 * generate a link event
3252 */
3253 if (old_distributing_count == 0
3254 && ifb->ifb_distributing_count != 0) {
3255 event_code = KEV_DL_LINK_ON;
3256 } else if (old_distributing_count != 0
3257 && ifb->ifb_distributing_count == 0) {
3258 event_code = KEV_DL_LINK_OFF;
3259 }
3260 if (event_code != 0 && event_code != ifb->ifb_last_link_event) {
3261 bond_ifp = ifb->ifb_ifp;
3262 ifb->ifb_last_link_event = event_code;
3263 }
3264 }
3265
3266 bond_unlock();
3267 if (bond_ifp != NULL) {
3268 interface_link_event(ifp: bond_ifp, event_code);
3269 }
3270 return;
3271}
3272
3273static void
3274bond_iff_event(__unused void *cookie, ifnet_t port_ifp,
3275 __unused protocol_family_t protocol,
3276 const struct kev_msg *event)
3277{
3278 int event_code;
3279
3280 if (event->vendor_code != KEV_VENDOR_APPLE
3281 || event->kev_class != KEV_NETWORK_CLASS
3282 || event->kev_subclass != KEV_DL_SUBCLASS) {
3283 return;
3284 }
3285 event_code = event->event_code;
3286 switch (event_code) {
3287 case KEV_DL_LINK_OFF:
3288 case KEV_DL_LINK_ON:
3289 case KEV_DL_IF_DETACHING:
3290 case KEV_DL_IF_DETACHED:
3291 bond_handle_event(port_ifp, event_code);
3292 break;
3293 default:
3294 break;
3295 }
3296 return;
3297}
3298
3299static void
3300bond_iff_detached(__unused void *cookie, ifnet_t port_ifp)
3301{
3302 bond_handle_event(port_ifp, KEV_DL_IF_DETACHED);
3303 return;
3304}
3305
3306static void
3307interface_link_event(struct ifnet * ifp, u_int32_t event_code)
3308{
3309 struct event {
3310 u_int32_t ifnet_family;
3311 u_int32_t unit;
3312 char if_name[IFNAMSIZ];
3313 };
3314 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
3315 struct kern_event_msg *header = (struct kern_event_msg*)message;
3316 struct event *data = (struct event *)(header + 1);
3317
3318 header->total_size = sizeof(message);
3319 header->vendor_code = KEV_VENDOR_APPLE;
3320 header->kev_class = KEV_NETWORK_CLASS;
3321 header->kev_subclass = KEV_DL_SUBCLASS;
3322 header->event_code = event_code;
3323 data->ifnet_family = ifnet_family(interface: ifp);
3324 data->unit = (u_int32_t)ifnet_unit(interface: ifp);
3325 strlcpy(dst: data->if_name, src: ifnet_name(interface: ifp), IFNAMSIZ);
3326 ifnet_event(interface: ifp, event_ptr: header);
3327}
3328
3329static errno_t
3330bond_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
3331 char *header)
3332{
3333#pragma unused(protocol, packet, header)
3334 if (if_bond_debug != 0) {
3335 printf("%s: unexpected packet from %s\n", __func__,
3336 ifp->if_xname);
3337 }
3338 return 0;
3339}
3340
3341
3342/*
3343 * Function: bond_attach_protocol
3344 * Purpose:
3345 * Attach a DLIL protocol to the interface.
3346 *
3347 * The ethernet demux special cases to always return PF_BOND if the
3348 * interface is bonded. That means we receive all traffic from that
3349 * interface without passing any of the traffic to any other attached
3350 * protocol.
3351 */
3352static int
3353bond_attach_protocol(struct ifnet *ifp)
3354{
3355 int error;
3356 struct ifnet_attach_proto_param reg;
3357
3358 bzero(s: &reg, n: sizeof(reg));
3359 reg.input = bond_proto_input;
3360
3361 error = ifnet_attach_protocol(interface: ifp, PF_BOND, proto_details: &reg);
3362 if (error) {
3363 printf("bond over %s%d: ifnet_attach_protocol failed, %d\n",
3364 ifnet_name(interface: ifp), ifnet_unit(interface: ifp), error);
3365 }
3366 return error;
3367}
3368
3369/*
3370 * Function: bond_detach_protocol
3371 * Purpose:
3372 * Detach our DLIL protocol from an interface
3373 */
3374static int
3375bond_detach_protocol(struct ifnet *ifp)
3376{
3377 int error;
3378
3379 error = ifnet_detach_protocol(interface: ifp, PF_BOND);
3380 if (error) {
3381 printf("bond over %s%d: ifnet_detach_protocol failed, %d\n",
3382 ifnet_name(interface: ifp), ifnet_unit(interface: ifp), error);
3383 }
3384 return error;
3385}
3386
3387/*
3388 * Function: bond_attach_filter
3389 * Purpose:
3390 * Attach our DLIL interface filter.
3391 */
3392static int
3393bond_attach_filter(struct ifnet *ifp, interface_filter_t * filter_p)
3394{
3395 int error;
3396 struct iff_filter iff;
3397
3398 /*
3399 * install an interface filter
3400 */
3401 memset(s: &iff, c: 0, n: sizeof(struct iff_filter));
3402 iff.iff_name = "com.apple.kernel.bsd.net.if_bond";
3403 iff.iff_input = bond_iff_input;
3404 iff.iff_event = bond_iff_event;
3405 iff.iff_detached = bond_iff_detached;
3406 error = dlil_attach_filter(ifp, &iff, filter_p,
3407 DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
3408 if (error != 0) {
3409 printf("%s: dlil_attach_filter failed %d\n", __func__, error);
3410 }
3411 return error;
3412}
3413
3414
3415/*
3416 * DLIL interface family functions
3417 */
3418extern int ether_attach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3419extern void ether_detach_inet(ifnet_t ifp, protocol_family_t protocol_family);
3420extern int ether_attach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3421extern void ether_detach_inet6(ifnet_t ifp, protocol_family_t protocol_family);
3422extern int ether_attach_at(ifnet_t ifp, protocol_family_t protocol_family);
3423extern void ether_detach_at(ifnet_t ifp, protocol_family_t protocol_family);
3424
3425__private_extern__ int
3426bond_family_init(void)
3427{
3428 int error = 0;
3429
3430 error = proto_register_plumber(PF_INET, APPLE_IF_FAM_BOND,
3431 plumb: ether_attach_inet,
3432 unplumb: ether_detach_inet);
3433 if (error != 0) {
3434 printf("bond: proto_register_plumber failed for AF_INET error=%d\n",
3435 error);
3436 goto done;
3437 }
3438 error = proto_register_plumber(PF_INET6, APPLE_IF_FAM_BOND,
3439 plumb: ether_attach_inet6,
3440 unplumb: ether_detach_inet6);
3441 if (error != 0) {
3442 printf("bond: proto_register_plumber failed for AF_INET6 error=%d\n",
3443 error);
3444 goto done;
3445 }
3446 error = bond_clone_attach();
3447 if (error != 0) {
3448 printf("bond: proto_register_plumber failed bond_clone_attach error=%d\n",
3449 error);
3450 goto done;
3451 }
3452
3453done:
3454 return error;
3455}
3456/**
3457**
3458** LACP routines:
3459**
3460**/
3461
3462/**
3463** LACP ifbond_list routines
3464**/
3465static bondport_ref
3466ifbond_list_find_moved_port(bondport_ref rx_port,
3467 const lacp_actor_partner_tlv_ref atlv)
3468{
3469 ifbond_ref bond;
3470 bondport_ref p;
3471 partner_state_ref ps;
3472 LAG_info_ref ps_li;
3473
3474 TAILQ_FOREACH(bond, &g_bond->ifbond_list, ifb_bond_list) {
3475 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3476 if (rx_port == p) {
3477 /* no point in comparing against ourselves */
3478 continue;
3479 }
3480 if (p->po_receive_state != ReceiveState_PORT_DISABLED) {
3481 /* it's not clear that we should be checking this */
3482 continue;
3483 }
3484 ps = &p->po_partner_state;
3485 if (lacp_actor_partner_state_defaulted(state: ps->ps_state)) {
3486 continue;
3487 }
3488 ps_li = &ps->ps_lag_info;
3489 if (ps->ps_port == lacp_actor_partner_tlv_get_port(tlv: atlv)
3490 && bcmp(s1: &ps_li->li_system, s2: atlv->lap_system,
3491 n: sizeof(ps_li->li_system)) == 0) {
3492 if (if_bond_debug) {
3493 timestamp_printf("System " EA_FORMAT
3494 " Port 0x%x moved from %s to %s\n",
3495 EA_LIST(&ps_li->li_system), ps->ps_port,
3496 bondport_get_name(p),
3497 bondport_get_name(p: rx_port));
3498 }
3499 return p;
3500 }
3501 }
3502 }
3503 return NULL;
3504}
3505
3506/**
3507** LACP ifbond, LAG routines
3508**/
3509
3510static int
3511ifbond_selection(ifbond_ref bond)
3512{
3513 int all_ports_ready = 0;
3514 int active_media = 0;
3515 LAG_ref lag = NULL;
3516 int lag_changed = 0;
3517 bondport_ref p;
3518 int port_speed = 0;
3519
3520 lag = ifbond_find_best_LAG(bond, active_media: &active_media);
3521 if (lag != bond->ifb_active_lag) {
3522 if (bond->ifb_active_lag != NULL) {
3523 ifbond_deactivate_LAG(bond, lag: bond->ifb_active_lag);
3524 bond->ifb_active_lag = NULL;
3525 }
3526 bond->ifb_active_lag = lag;
3527 if (lag != NULL) {
3528 ifbond_activate_LAG(bond, lag, active_media);
3529 }
3530 lag_changed = 1;
3531 } else if (lag != NULL) {
3532 if (lag->lag_active_media != active_media) {
3533 if (if_bond_debug) {
3534 timestamp_printf("LAG PORT SPEED CHANGED from %d to %d\n",
3535 link_speed(active: lag->lag_active_media),
3536 link_speed(active: active_media));
3537 }
3538 ifbond_deactivate_LAG(bond, lag);
3539 ifbond_activate_LAG(bond, lag, active_media);
3540 lag_changed = 1;
3541 }
3542 }
3543 if (lag != NULL) {
3544 port_speed = link_speed(active: active_media);
3545 all_ports_ready = ifbond_all_ports_ready(bond);
3546 }
3547 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3548 if (lag != NULL && p->po_lag == lag
3549 && media_speed(mi: &p->po_media_info) == port_speed
3550 && (p->po_mux_state == MuxState_DETACHED
3551 || p->po_selected == SelectedState_SELECTED
3552 || p->po_selected == SelectedState_STANDBY)
3553 && bondport_aggregatable(p)) {
3554 if (bond->ifb_max_active > 0) {
3555 if (lag->lag_selected_port_count < bond->ifb_max_active) {
3556 if (p->po_selected == SelectedState_STANDBY
3557 || p->po_selected == SelectedState_UNSELECTED) {
3558 bondport_set_selected(p, s: SelectedState_SELECTED);
3559 }
3560 } else if (p->po_selected == SelectedState_UNSELECTED) {
3561 bondport_set_selected(p, s: SelectedState_STANDBY);
3562 }
3563 } else {
3564 bondport_set_selected(p, s: SelectedState_SELECTED);
3565 }
3566 }
3567 if (bondport_flags_selected_changed(p)) {
3568 bondport_flags_clear_selected_changed(p);
3569 bondport_mux_machine(p, event: LAEventSelectedChange, NULL);
3570 }
3571 if (all_ports_ready
3572 && bondport_flags_ready(p)
3573 && p->po_mux_state == MuxState_WAITING) {
3574 bondport_mux_machine(p, event: LAEventReady, NULL);
3575 }
3576 bondport_transmit_machine(p, event: LAEventStart, NULL);
3577 }
3578 return lag_changed;
3579}
3580
3581static LAG_ref
3582ifbond_find_best_LAG(ifbond_ref bond, int * active_media)
3583{
3584 int best_active = 0;
3585 LAG_ref best_lag = NULL;
3586 int best_count = 0;
3587 int best_speed = 0;
3588 LAG_ref lag;
3589
3590 if (bond->ifb_active_lag != NULL) {
3591 best_lag = bond->ifb_active_lag;
3592 best_count = LAG_get_aggregatable_port_count(lag: best_lag, active_media: &best_active);
3593 if (bond->ifb_max_active > 0
3594 && best_count > bond->ifb_max_active) {
3595 best_count = bond->ifb_max_active;
3596 }
3597 best_speed = link_speed(active: best_active);
3598 }
3599 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3600 int active;
3601 int count;
3602 int speed;
3603
3604 if (lag == bond->ifb_active_lag) {
3605 /* we've already computed it */
3606 continue;
3607 }
3608 count = LAG_get_aggregatable_port_count(lag, active_media: &active);
3609 if (count == 0) {
3610 continue;
3611 }
3612 if (bond->ifb_max_active > 0
3613 && count > bond->ifb_max_active) {
3614 /* if there's a limit, don't count extra links */
3615 count = bond->ifb_max_active;
3616 }
3617 speed = link_speed(active);
3618 if ((count * speed) > (best_count * best_speed)) {
3619 best_count = count;
3620 best_speed = speed;
3621 best_active = active;
3622 best_lag = lag;
3623 }
3624 }
3625 if (best_count == 0) {
3626 return NULL;
3627 }
3628 *active_media = best_active;
3629 return best_lag;
3630}
3631
3632static void
3633ifbond_deactivate_LAG(__unused ifbond_ref bond, LAG_ref lag)
3634{
3635 bondport_ref p;
3636
3637 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3638 bondport_set_selected(p, s: SelectedState_UNSELECTED);
3639 }
3640 return;
3641}
3642
3643static void
3644ifbond_activate_LAG(ifbond_ref bond, LAG_ref lag, int active_media)
3645{
3646 int need = 0;
3647 bondport_ref p;
3648
3649 if (bond->ifb_max_active > 0) {
3650 need = bond->ifb_max_active;
3651 }
3652 lag->lag_active_media = active_media;
3653 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3654 if (bondport_aggregatable(p) == 0) {
3655 bondport_set_selected(p, s: SelectedState_UNSELECTED);
3656 } else if (media_speed(mi: &p->po_media_info) != link_speed(active: active_media)) {
3657 bondport_set_selected(p, s: SelectedState_UNSELECTED);
3658 } else if (p->po_mux_state == MuxState_DETACHED) {
3659 if (bond->ifb_max_active > 0) {
3660 if (need > 0) {
3661 bondport_set_selected(p, s: SelectedState_SELECTED);
3662 need--;
3663 } else {
3664 bondport_set_selected(p, s: SelectedState_STANDBY);
3665 }
3666 } else {
3667 bondport_set_selected(p, s: SelectedState_SELECTED);
3668 }
3669 } else {
3670 bondport_set_selected(p, s: SelectedState_UNSELECTED);
3671 }
3672 }
3673 return;
3674}
3675
3676#if 0
3677static void
3678ifbond_set_max_active(ifbond_ref bond, int max_active)
3679{
3680 LAG_ref lag = bond->ifb_active_lag;
3681
3682 bond->ifb_max_active = max_active;
3683 if (bond->ifb_max_active <= 0 || lag == NULL) {
3684 return;
3685 }
3686 if (lag->lag_selected_port_count > bond->ifb_max_active) {
3687 bondport_ref p;
3688 int remove_count;
3689
3690 remove_count = lag->lag_selected_port_count - bond->ifb_max_active;
3691 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3692 if (p->po_selected == SelectedState_SELECTED) {
3693 bondport_set_selected(p, SelectedState_UNSELECTED);
3694 remove_count--;
3695 if (remove_count == 0) {
3696 break;
3697 }
3698 }
3699 }
3700 }
3701 return;
3702}
3703#endif
3704
3705static int
3706ifbond_all_ports_ready(ifbond_ref bond)
3707{
3708 int ready = 0;
3709 bondport_ref p;
3710
3711 if (bond->ifb_active_lag == NULL) {
3712 return 0;
3713 }
3714 TAILQ_FOREACH(p, &bond->ifb_active_lag->lag_port_list, po_lag_port_list) {
3715 if (p->po_mux_state == MuxState_WAITING
3716 && p->po_selected == SelectedState_SELECTED) {
3717 if (bondport_flags_ready(p) == 0) {
3718 return 0;
3719 }
3720 }
3721 /* note that there was at least one ready port */
3722 ready = 1;
3723 }
3724 return ready;
3725}
3726
3727static int
3728ifbond_all_ports_attached(ifbond_ref bond, bondport_ref this_port)
3729{
3730 bondport_ref p;
3731
3732 TAILQ_FOREACH(p, &bond->ifb_port_list, po_port_list) {
3733 if (this_port == p) {
3734 continue;
3735 }
3736 if (bondport_flags_mux_attached(p) == 0) {
3737 return 0;
3738 }
3739 }
3740 return 1;
3741}
3742
3743static LAG_ref
3744ifbond_get_LAG_matching_port(ifbond_ref bond, bondport_ref p)
3745{
3746 LAG_ref lag;
3747
3748 TAILQ_FOREACH(lag, &bond->ifb_lag_list, lag_list) {
3749 if (bcmp(s1: &lag->lag_info, s2: &p->po_partner_state.ps_lag_info,
3750 n: sizeof(lag->lag_info)) == 0) {
3751 return lag;
3752 }
3753 }
3754 return NULL;
3755}
3756
3757static int
3758LAG_get_aggregatable_port_count(LAG_ref lag, int * active_media)
3759{
3760 int active;
3761 int count;
3762 bondport_ref p;
3763 int speed;
3764
3765 active = 0;
3766 count = 0;
3767 speed = 0;
3768 TAILQ_FOREACH(p, &lag->lag_port_list, po_lag_port_list) {
3769 if (bondport_aggregatable(p)) {
3770 int this_speed;
3771
3772 this_speed = media_speed(mi: &p->po_media_info);
3773 if (this_speed == 0) {
3774 continue;
3775 }
3776 if (this_speed > speed) {
3777 active = p->po_media_info.mi_active;
3778 speed = this_speed;
3779 count = 1;
3780 } else if (this_speed == speed) {
3781 count++;
3782 }
3783 }
3784 }
3785 *active_media = active;
3786 return count;
3787}
3788
3789
3790/**
3791** LACP bondport routines
3792**/
3793static void
3794bondport_link_status_changed(bondport_ref p)
3795{
3796 ifbond_ref bond = p->po_bond;
3797
3798 if (if_bond_debug) {
3799 if (media_active(mi: &p->po_media_info)) {
3800 const char * duplex_string;
3801
3802 if (media_full_duplex(mi: &p->po_media_info)) {
3803 duplex_string = "full";
3804 } else if (media_type_unknown(mi: &p->po_media_info)) {
3805 duplex_string = "unknown";
3806 } else {
3807 duplex_string = "half";
3808 }
3809 timestamp_printf("[%s] Link UP %d Mbit/s %s duplex\n",
3810 bondport_get_name(p),
3811 media_speed(mi: &p->po_media_info),
3812 duplex_string);
3813 } else {
3814 timestamp_printf("[%s] Link DOWN\n",
3815 bondport_get_name(p));
3816 }
3817 }
3818 if (bond->ifb_mode == IF_BOND_MODE_LACP) {
3819 if (media_active(mi: &p->po_media_info)
3820 && bond->ifb_active_lag != NULL
3821 && p->po_lag == bond->ifb_active_lag
3822 && p->po_selected != SelectedState_UNSELECTED) {
3823 if (media_speed(mi: &p->po_media_info) != p->po_lag->lag_active_media) {
3824 if (if_bond_debug) {
3825 timestamp_printf("[%s] Port speed %d differs from LAG %d\n",
3826 bondport_get_name(p),
3827 media_speed(mi: &p->po_media_info),
3828 link_speed(active: p->po_lag->lag_active_media));
3829 }
3830 bondport_set_selected(p, s: SelectedState_UNSELECTED);
3831 }
3832 }
3833 bondport_receive_machine(p, event: LAEventMediaChange, NULL);
3834 bondport_mux_machine(p, event: LAEventMediaChange, NULL);
3835 bondport_periodic_transmit_machine(p, event: LAEventMediaChange, NULL);
3836 } else {
3837 if (media_active(mi: &p->po_media_info)) {
3838 bondport_enable_distributing(p);
3839 } else {
3840 bondport_disable_distributing(p);
3841 }
3842 }
3843 return;
3844}
3845
3846static int
3847bondport_aggregatable(bondport_ref p)
3848{
3849 partner_state_ref ps = &p->po_partner_state;
3850
3851 if (lacp_actor_partner_state_aggregatable(state: p->po_actor_state) == 0
3852 || lacp_actor_partner_state_aggregatable(state: ps->ps_state) == 0) {
3853 /* we and/or our partner are individual */
3854 return 0;
3855 }
3856 if (p->po_lag == NULL) {
3857 return 0;
3858 }
3859 switch (p->po_receive_state) {
3860 default:
3861 if (if_bond_debug) {
3862 timestamp_printf("[%s] Port is not selectable\n",
3863 bondport_get_name(p));
3864 }
3865 return 0;
3866 case ReceiveState_CURRENT:
3867 case ReceiveState_EXPIRED:
3868 break;
3869 }
3870 return 1;
3871}
3872
3873static int
3874bondport_matches_LAG(bondport_ref p, LAG_ref lag)
3875{
3876 LAG_info_ref lag_li;
3877 partner_state_ref ps;
3878 LAG_info_ref ps_li;
3879
3880 ps = &p->po_partner_state;
3881 ps_li = &ps->ps_lag_info;
3882 lag_li = &lag->lag_info;
3883 if (ps_li->li_system_priority == lag_li->li_system_priority
3884 && ps_li->li_key == lag_li->li_key
3885 && (bcmp(s1: &ps_li->li_system, s2: &lag_li->li_system,
3886 n: sizeof(lag_li->li_system))
3887 == 0)) {
3888 return 1;
3889 }
3890 return 0;
3891}
3892
3893static int
3894bondport_remove_from_LAG(bondport_ref p)
3895{
3896 int active_lag = 0;
3897 ifbond_ref bond = p->po_bond;
3898 LAG_ref lag = p->po_lag;
3899
3900 if (lag == NULL) {
3901 return 0;
3902 }
3903 TAILQ_REMOVE(&lag->lag_port_list, p, po_lag_port_list);
3904 if (if_bond_debug) {
3905 timestamp_printf("[%s] Removed from LAG (0x%04x," EA_FORMAT
3906 ",0x%04x)\n",
3907 bondport_get_name(p),
3908 lag->lag_info.li_system_priority,
3909 EA_LIST(&lag->lag_info.li_system),
3910 lag->lag_info.li_key);
3911 }
3912 p->po_lag = NULL;
3913 lag->lag_port_count--;
3914 if (lag->lag_port_count > 0) {
3915 return bond->ifb_active_lag == lag;
3916 }
3917 if (if_bond_debug) {
3918 timestamp_printf("Key 0x%04x: LAG Released (%04x," EA_FORMAT
3919 ",0x%04x)\n",
3920 bond->ifb_key,
3921 lag->lag_info.li_system_priority,
3922 EA_LIST(&lag->lag_info.li_system),
3923 lag->lag_info.li_key);
3924 }
3925 TAILQ_REMOVE(&bond->ifb_lag_list, lag, lag_list);
3926 if (bond->ifb_active_lag == lag) {
3927 bond->ifb_active_lag = NULL;
3928 active_lag = 1;
3929 }
3930 kfree_type(struct LAG_s, lag);
3931 return active_lag;
3932}
3933
3934static void
3935bondport_add_to_LAG(bondport_ref p, LAG_ref lag)
3936{
3937 TAILQ_INSERT_TAIL(&lag->lag_port_list, p, po_lag_port_list);
3938 p->po_lag = lag;
3939 lag->lag_port_count++;
3940 if (if_bond_debug) {
3941 timestamp_printf("[%s] Added to LAG (0x%04x," EA_FORMAT "0x%04x)\n",
3942 bondport_get_name(p),
3943 lag->lag_info.li_system_priority,
3944 EA_LIST(&lag->lag_info.li_system),
3945 lag->lag_info.li_key);
3946 }
3947 return;
3948}
3949
3950static void
3951bondport_assign_to_LAG(bondport_ref p)
3952{
3953 ifbond_ref bond = p->po_bond;
3954 LAG_ref lag;
3955
3956 if (lacp_actor_partner_state_defaulted(state: p->po_actor_state)) {
3957 bondport_remove_from_LAG(p);
3958 return;
3959 }
3960 lag = p->po_lag;
3961 if (lag != NULL) {
3962 if (bondport_matches_LAG(p, lag)) {
3963 /* still OK */
3964 return;
3965 }
3966 bondport_remove_from_LAG(p);
3967 }
3968 lag = ifbond_get_LAG_matching_port(bond, p);
3969 if (lag != NULL) {
3970 bondport_add_to_LAG(p, lag);
3971 return;
3972 }
3973 lag = kalloc_type(struct LAG_s, Z_WAITOK);
3974 TAILQ_INIT(&lag->lag_port_list);
3975 lag->lag_port_count = 0;
3976 lag->lag_selected_port_count = 0;
3977 lag->lag_info = p->po_partner_state.ps_lag_info;
3978 TAILQ_INSERT_TAIL(&bond->ifb_lag_list, lag, lag_list);
3979 if (if_bond_debug) {
3980 timestamp_printf("Key 0x%04x: LAG Created (0x%04x," EA_FORMAT
3981 ",0x%04x)\n",
3982 bond->ifb_key,
3983 lag->lag_info.li_system_priority,
3984 EA_LIST(&lag->lag_info.li_system),
3985 lag->lag_info.li_key);
3986 }
3987 bondport_add_to_LAG(p, lag);
3988 return;
3989}
3990
3991static void
3992bondport_receive_lacpdu(bondport_ref p, lacpdu_ref in_lacpdu_p)
3993{
3994 bondport_ref moved_port;
3995
3996 moved_port
3997 = ifbond_list_find_moved_port(rx_port: p, atlv: (const lacp_actor_partner_tlv_ref)
3998 &in_lacpdu_p->la_actor_tlv);
3999 if (moved_port != NULL) {
4000 bondport_receive_machine(p: moved_port, event: LAEventPortMoved, NULL);
4001 }
4002 bondport_receive_machine(p, event: LAEventPacket, event_data: in_lacpdu_p);
4003 bondport_mux_machine(p, event: LAEventPacket, event_data: in_lacpdu_p);
4004 bondport_periodic_transmit_machine(p, event: LAEventPacket, event_data: in_lacpdu_p);
4005 return;
4006}
4007
4008static void
4009bondport_set_selected(bondport_ref p, SelectedState s)
4010{
4011 if (s != p->po_selected) {
4012 ifbond_ref bond = p->po_bond;
4013 LAG_ref lag = p->po_lag;
4014
4015 bondport_flags_set_selected_changed(p);
4016 if (lag != NULL && bond->ifb_active_lag == lag) {
4017 if (p->po_selected == SelectedState_SELECTED) {
4018 lag->lag_selected_port_count--;
4019 } else if (s == SelectedState_SELECTED) {
4020 lag->lag_selected_port_count++;
4021 }
4022 if (if_bond_debug) {
4023 timestamp_printf("[%s] SetSelected: %s (was %s)\n",
4024 bondport_get_name(p),
4025 SelectedStateString(s),
4026 SelectedStateString(s: p->po_selected));
4027 }
4028 }
4029 }
4030 p->po_selected = s;
4031 return;
4032}
4033
4034/**
4035** Receive machine
4036**/
4037
4038static void
4039bondport_UpdateDefaultSelected(bondport_ref p)
4040{
4041 bondport_set_selected(p, s: SelectedState_UNSELECTED);
4042 return;
4043}
4044
4045static void
4046bondport_RecordDefault(bondport_ref p)
4047{
4048 bzero(s: &p->po_partner_state, n: sizeof(p->po_partner_state));
4049 p->po_actor_state
4050 = lacp_actor_partner_state_set_defaulted(state: p->po_actor_state);
4051 bondport_assign_to_LAG(p);
4052 return;
4053}
4054
4055static void
4056bondport_UpdateSelected(bondport_ref p, lacpdu_ref lacpdu_p)
4057{
4058 lacp_actor_partner_tlv_ref actor;
4059 partner_state_ref ps;
4060 LAG_info_ref ps_li;
4061
4062 /* compare the PDU's Actor information to our Partner state */
4063 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
4064 ps = &p->po_partner_state;
4065 ps_li = &ps->ps_lag_info;
4066 if (lacp_actor_partner_tlv_get_port(tlv: actor) != ps->ps_port
4067 || (lacp_actor_partner_tlv_get_port_priority(tlv: actor)
4068 != ps->ps_port_priority)
4069 || bcmp(s1: actor->lap_system, s2: &ps_li->li_system, n: sizeof(ps_li->li_system))
4070 || (lacp_actor_partner_tlv_get_system_priority(tlv: actor)
4071 != ps_li->li_system_priority)
4072 || (lacp_actor_partner_tlv_get_key(tlv: actor) != ps_li->li_key)
4073 || (lacp_actor_partner_state_aggregatable(state: actor->lap_state)
4074 != lacp_actor_partner_state_aggregatable(state: ps->ps_state))) {
4075 bondport_set_selected(p, s: SelectedState_UNSELECTED);
4076 if (if_bond_debug) {
4077 timestamp_printf("[%s] updateSelected UNSELECTED\n",
4078 bondport_get_name(p));
4079 }
4080 }
4081 return;
4082}
4083
4084static void
4085bondport_RecordPDU(bondport_ref p, lacpdu_ref lacpdu_p)
4086{
4087 lacp_actor_partner_tlv_ref actor;
4088 ifbond_ref bond = p->po_bond;
4089 int lacp_maintain = 0;
4090 partner_state_ref ps;
4091 lacp_actor_partner_tlv_ref partner;
4092 LAG_info_ref ps_li;
4093
4094 /* copy the PDU's Actor information into our Partner state */
4095 actor = (lacp_actor_partner_tlv_ref)lacpdu_p->la_actor_tlv;
4096 ps = &p->po_partner_state;
4097 ps_li = &ps->ps_lag_info;
4098 ps->ps_port = lacp_actor_partner_tlv_get_port(tlv: actor);
4099 ps->ps_port_priority = lacp_actor_partner_tlv_get_port_priority(tlv: actor);
4100 ps_li->li_system = *((lacp_system_ref)actor->lap_system);
4101 ps_li->li_system_priority
4102 = lacp_actor_partner_tlv_get_system_priority(tlv: actor);
4103 ps_li->li_key = lacp_actor_partner_tlv_get_key(tlv: actor);
4104 ps->ps_state = lacp_actor_partner_state_set_out_of_sync(state: actor->lap_state);
4105 p->po_actor_state
4106 = lacp_actor_partner_state_set_not_defaulted(state: p->po_actor_state);
4107
4108 /* compare the PDU's Partner information to our own information */
4109 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
4110
4111 if (lacp_actor_partner_state_active_lacp(state: ps->ps_state)
4112 || (lacp_actor_partner_state_active_lacp(state: p->po_actor_state)
4113 && lacp_actor_partner_state_active_lacp(state: partner->lap_state))) {
4114 if (if_bond_debug) {
4115 timestamp_printf("[%s] recordPDU: LACP will maintain\n",
4116 bondport_get_name(p));
4117 }
4118 lacp_maintain = 1;
4119 }
4120 if ((lacp_actor_partner_tlv_get_port(tlv: partner)
4121 == bondport_get_index(p))
4122 && lacp_actor_partner_tlv_get_port_priority(tlv: partner) == p->po_priority
4123 && bcmp(s1: partner->lap_system, s2: &g_bond->system,
4124 n: sizeof(g_bond->system)) == 0
4125 && (lacp_actor_partner_tlv_get_system_priority(tlv: partner)
4126 == g_bond->system_priority)
4127 && lacp_actor_partner_tlv_get_key(tlv: partner) == bond->ifb_key
4128 && (lacp_actor_partner_state_aggregatable(state: partner->lap_state)
4129 == lacp_actor_partner_state_aggregatable(state: p->po_actor_state))
4130 && lacp_actor_partner_state_in_sync(state: actor->lap_state)
4131 && lacp_maintain) {
4132 ps->ps_state = lacp_actor_partner_state_set_in_sync(state: ps->ps_state);
4133 if (if_bond_debug) {
4134 timestamp_printf("[%s] recordPDU: LACP partner in sync\n",
4135 bondport_get_name(p));
4136 }
4137 } else if (lacp_actor_partner_state_aggregatable(state: actor->lap_state) == 0
4138 && lacp_actor_partner_state_in_sync(state: actor->lap_state)
4139 && lacp_maintain) {
4140 ps->ps_state = lacp_actor_partner_state_set_in_sync(state: ps->ps_state);
4141 if (if_bond_debug) {
4142 timestamp_printf("[%s] recordPDU: LACP partner in sync (ind)\n",
4143 bondport_get_name(p));
4144 }
4145 }
4146 bondport_assign_to_LAG(p);
4147 return;
4148}
4149
4150static __inline__ lacp_actor_partner_state
4151updateNTTBits(lacp_actor_partner_state s)
4152{
4153 return s & (LACP_ACTOR_PARTNER_STATE_LACP_ACTIVITY
4154 | LACP_ACTOR_PARTNER_STATE_LACP_TIMEOUT
4155 | LACP_ACTOR_PARTNER_STATE_AGGREGATION
4156 | LACP_ACTOR_PARTNER_STATE_SYNCHRONIZATION);
4157}
4158
4159static void
4160bondport_UpdateNTT(bondport_ref p, lacpdu_ref lacpdu_p)
4161{
4162 ifbond_ref bond = p->po_bond;
4163 lacp_actor_partner_tlv_ref partner;
4164
4165 /* compare the PDU's Actor information to our Partner state */
4166 partner = (lacp_actor_partner_tlv_ref)lacpdu_p->la_partner_tlv;
4167 if ((lacp_actor_partner_tlv_get_port(tlv: partner) != bondport_get_index(p))
4168 || lacp_actor_partner_tlv_get_port_priority(tlv: partner) != p->po_priority
4169 || bcmp(s1: partner->lap_system, s2: &g_bond->system, n: sizeof(g_bond->system))
4170 || (lacp_actor_partner_tlv_get_system_priority(tlv: partner)
4171 != g_bond->system_priority)
4172 || lacp_actor_partner_tlv_get_key(tlv: partner) != bond->ifb_key
4173 || (updateNTTBits(s: partner->lap_state)
4174 != updateNTTBits(s: p->po_actor_state))) {
4175 bondport_flags_set_ntt(p);
4176 if (if_bond_debug) {
4177 timestamp_printf("[%s] updateNTT: Need To Transmit\n",
4178 bondport_get_name(p));
4179 }
4180 }
4181 return;
4182}
4183
4184static void
4185bondport_AttachMuxToAggregator(bondport_ref p)
4186{
4187 if (bondport_flags_mux_attached(p) == 0) {
4188 if (if_bond_debug) {
4189 timestamp_printf("[%s] Attached Mux To Aggregator\n",
4190 bondport_get_name(p));
4191 }
4192 bondport_flags_set_mux_attached(p);
4193 }
4194 return;
4195}
4196
4197static void
4198bondport_DetachMuxFromAggregator(bondport_ref p)
4199{
4200 if (bondport_flags_mux_attached(p)) {
4201 if (if_bond_debug) {
4202 timestamp_printf("[%s] Detached Mux From Aggregator\n",
4203 bondport_get_name(p));
4204 }
4205 bondport_flags_clear_mux_attached(p);
4206 }
4207 return;
4208}
4209
4210static void
4211bondport_enable_distributing(bondport_ref p)
4212{
4213 if (bondport_flags_distributing(p) == 0) {
4214 ifbond_ref bond = p->po_bond;
4215
4216 bond->ifb_distributing_array[bond->ifb_distributing_count++] = p;
4217 if (if_bond_debug) {
4218 timestamp_printf("[%s] Distribution Enabled\n",
4219 bondport_get_name(p));
4220 }
4221 bondport_flags_set_distributing(p);
4222 }
4223 return;
4224}
4225
4226static void
4227bondport_disable_distributing(bondport_ref p)
4228{
4229 if (bondport_flags_distributing(p)) {
4230 bondport_ref * array;
4231 ifbond_ref bond;
4232 int count;
4233 int i;
4234
4235 bond = p->po_bond;
4236 array = bond->ifb_distributing_array;
4237 count = bond->ifb_distributing_count;
4238 for (i = 0; i < count; i++) {
4239 if (array[i] == p) {
4240 int j;
4241
4242 for (j = i; j < (count - 1); j++) {
4243 array[j] = array[j + 1];
4244 }
4245 break;
4246 }
4247 }
4248 bond->ifb_distributing_count--;
4249 if (if_bond_debug) {
4250 timestamp_printf("[%s] Distribution Disabled\n",
4251 bondport_get_name(p));
4252 }
4253 bondport_flags_clear_distributing(p);
4254 }
4255 return;
4256}
4257
4258/**
4259** Receive machine functions
4260**/
4261static void
4262bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
4263 void * event_data);
4264static void
4265bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
4266 void * event_data);
4267static void
4268bondport_receive_machine_expired(bondport_ref p, LAEvent event,
4269 void * event_data);
4270static void
4271bondport_receive_machine_lacp_disabled(bondport_ref p, LAEvent event,
4272 void * event_data);
4273static void
4274bondport_receive_machine_defaulted(bondport_ref p, LAEvent event,
4275 void * event_data);
4276static void
4277bondport_receive_machine_current(bondport_ref p, LAEvent event,
4278 void * event_data);
4279
4280static void
4281bondport_receive_machine_event(bondport_ref p, LAEvent event,
4282 void * event_data)
4283{
4284 switch (p->po_receive_state) {
4285 case ReceiveState_none:
4286 bondport_receive_machine_initialize(p, event: LAEventStart, NULL);
4287 break;
4288 case ReceiveState_INITIALIZE:
4289 bondport_receive_machine_initialize(p, event, event_data);
4290 break;
4291 case ReceiveState_PORT_DISABLED:
4292 bondport_receive_machine_port_disabled(p, event, event_data);
4293 break;
4294 case ReceiveState_EXPIRED:
4295 bondport_receive_machine_expired(p, event, event_data);
4296 break;
4297 case ReceiveState_LACP_DISABLED:
4298 bondport_receive_machine_lacp_disabled(p, event, event_data);
4299 break;
4300 case ReceiveState_DEFAULTED:
4301 bondport_receive_machine_defaulted(p, event, event_data);
4302 break;
4303 case ReceiveState_CURRENT:
4304 bondport_receive_machine_current(p, event, event_data);
4305 break;
4306 default:
4307 break;
4308 }
4309 return;
4310}
4311
4312static void
4313bondport_receive_machine(bondport_ref p, LAEvent event,
4314 void * event_data)
4315{
4316 switch (event) {
4317 case LAEventPacket:
4318 if (p->po_receive_state != ReceiveState_LACP_DISABLED) {
4319 bondport_receive_machine_current(p, event, event_data);
4320 }
4321 break;
4322 case LAEventMediaChange:
4323 if (media_active(mi: &p->po_media_info)) {
4324 switch (p->po_receive_state) {
4325 case ReceiveState_PORT_DISABLED:
4326 case ReceiveState_LACP_DISABLED:
4327 bondport_receive_machine_port_disabled(p, event: LAEventMediaChange, NULL);
4328 break;
4329 default:
4330 break;
4331 }
4332 } else {
4333 bondport_receive_machine_port_disabled(p, event: LAEventStart, NULL);
4334 }
4335 break;
4336 default:
4337 bondport_receive_machine_event(p, event, event_data);
4338 break;
4339 }
4340 return;
4341}
4342
4343static void
4344bondport_receive_machine_initialize(bondport_ref p, LAEvent event,
4345 __unused void * event_data)
4346{
4347 switch (event) {
4348 case LAEventStart:
4349 devtimer_cancel(t: p->po_current_while_timer);
4350 if (if_bond_debug) {
4351 timestamp_printf("[%s] Receive INITIALIZE\n",
4352 bondport_get_name(p));
4353 }
4354 p->po_receive_state = ReceiveState_INITIALIZE;
4355 bondport_set_selected(p, s: SelectedState_UNSELECTED);
4356 bondport_RecordDefault(p);
4357 p->po_actor_state
4358 = lacp_actor_partner_state_set_not_expired(state: p->po_actor_state);
4359 bondport_receive_machine_port_disabled(p, event: LAEventStart, NULL);
4360 break;
4361 default:
4362 break;
4363 }
4364 return;
4365}
4366
4367static void
4368bondport_receive_machine_port_disabled(bondport_ref p, LAEvent event,
4369 __unused void * event_data)
4370{
4371 partner_state_ref ps;
4372
4373 switch (event) {
4374 case LAEventStart:
4375 devtimer_cancel(t: p->po_current_while_timer);
4376 if (if_bond_debug) {
4377 timestamp_printf("[%s] Receive PORT_DISABLED\n",
4378 bondport_get_name(p));