xref: /openbsd/sys/net/if_aggr.c (revision 9ea232b5)
1 /*	$OpenBSD: if_aggr.c,v 1.42 2023/12/23 10:52:54 bluhm Exp $ */
2 
3 /*
4  * Copyright (c) 2019 The University of Queensland
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /*
20  * This driver implements 802.1AX Link Aggregation (formerly 802.3ad)
21  *
22  * The specification describes systems with multiple ports that that
23  * can dynamically form aggregations. The relationships between ports
24  * and aggregations is such that arbitrary ports connected to ports
25  * on other systems may move between aggregations, and there can be
26  * as many aggregations as ports. An aggregation in this model is
27  * effectively an interface, and becomes the point that Ethernet traffic
28  * enters and leaves the system. The spec also contains a description
29  * of the Link Aggregation Control Protocol (LACP) for use on the wire,
30  * and how to process it and select ports and aggregations based on
31  * it.
32  *
33  * This driver implements a simplified or constrained model where each
34  * aggr(4) interface is effectively an independent system, and will
35  * only support one aggregation. This supports the use of the kernel
36  * interface as a static entity that is created and configured once,
37  * and has the link "come up" when that one aggregation is selected
38  * by the LACP protocol.
39  */
40 
41 /*
42  * This code was written by David Gwynne <dlg@uq.edu.au> as part
43  * of the Information Technology Infrastructure Group (ITIG) in the
44  * Faculty of Engineering, Architecture and Information Technology
45  * (EAIT).
46  */
47 
48 /*
49  * TODO:
50  *
51  * - add locking
52  * - figure out the Ready_N and Ready logic
53  */
54 
55 #include "bpfilter.h"
56 
57 #include <sys/param.h>
58 #include <sys/kernel.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/queue.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/systm.h>
65 #include <sys/syslog.h>
66 #include <sys/rwlock.h>
67 #include <sys/percpu.h>
68 #include <sys/smr.h>
69 #include <sys/task.h>
70 
71 #include <net/if.h>
72 #include <net/if_dl.h>
73 #include <net/if_types.h>
74 
75 #include <net/if_media.h>
76 
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 
80 #include <crypto/siphash.h> /* if_trunk.h uses siphash bits */
81 #include <net/if_trunk.h>
82 
83 #if NBPFILTER > 0
84 #include <net/bpf.h>
85 #endif
86 
87 /*
88  * Link Aggregation Control Protocol (LACP)
89  */
90 
91 struct ether_slowproto_hdr {
92 	uint8_t		sph_subtype;
93 	uint8_t		sph_version;
94 } __packed;
95 
96 #define SLOWPROTOCOLS_SUBTYPE_LACP	1
97 #define SLOWPROTOCOLS_SUBTYPE_LACP_MARKER \
98 					2
99 
100 #define LACP_VERSION			1
101 
102 #define LACP_FAST_PERIODIC_TIME		1
103 #define LACP_SLOW_PERIODIC_TIME		30
104 #define LACP_TIMEOUT_FACTOR		3
105 #define LACP_AGGREGATION_WAIT_TIME	2
106 
107 #define LACP_TX_MACHINE_RATE		3 /* per LACP_FAST_PERIODIC_TIME */
108 
109 #define LACP_ADDR_C_BRIDGE		{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }
110 #define LACP_ADDR_SLOW			{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 }
111 #define LACP_ADDR_SLOW_E64		0x0180c2000002ULL
112 #define LACP_ADDR_NON_TPMR_BRIDGE	{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03 }
113 
114 struct lacp_tlv_hdr {
115 	uint8_t			lacp_tlv_type;
116 	uint8_t			lacp_tlv_length;
117 } __packed __aligned(2);
118 
119 /* LACP TLV types */
120 
121 #define LACP_T_TERMINATOR		0x00
122 #define LACP_T_ACTOR			0x01
123 #define LACP_T_PARTNER			0x02
124 #define LACP_T_COLLECTOR		0x03
125 
126 /* LACPv2 TLV types */
127 
128 #define LACP_T_PORT_ALGORITHM		0x04
129 #define LACP_T_PORT_CONVERSATION_ID_DIGEST \
130 					0x05
131 #define LACP_T_PORT_CONVERSATION_MASK	0x06
132 #define LACP_T_PORT_CONVERSATION_SERVICE_MAPPING \
133 					0x0a
134 
135 struct lacp_sysid {
136 	uint16_t		lacp_sysid_priority;
137 	uint8_t			lacp_sysid_mac[ETHER_ADDR_LEN];
138 } __packed __aligned(2);
139 
140 struct lacp_portid {
141 	uint16_t		lacp_portid_priority;
142 	uint16_t		lacp_portid_number;
143 } __packed __aligned(2);
144 
145 struct lacp_port_info {
146 	struct lacp_sysid	lacp_sysid;
147 	uint16_t		lacp_key;
148 	struct lacp_portid	lacp_portid;
149 	uint8_t			lacp_state;
150 	uint8_t			lacp_reserved[3];
151 } __packed __aligned(2);
152 
153 #define LACP_STATE_ACTIVITY		(1 << 0)
154 #define LACP_STATE_TIMEOUT		(1 << 1)
155 #define LACP_STATE_AGGREGATION		(1 << 2)
156 #define LACP_STATE_SYNC			(1 << 3)
157 #define LACP_STATE_COLLECTING		(1 << 4)
158 #define LACP_STATE_DISTRIBUTING		(1 << 5)
159 #define LACP_STATE_DEFAULTED		(1 << 6)
160 #define LACP_STATE_EXPIRED		(1 << 7)
161 
162 struct lacp_collector_info {
163 	uint16_t		lacp_maxdelay;
164 	uint8_t			lacp_reserved[12];
165 } __packed __aligned(2);
166 
167 struct lacp_du {
168 	struct ether_slowproto_hdr
169 				lacp_du_sph;
170 	struct lacp_tlv_hdr	lacp_actor_info_tlv;
171 	struct lacp_port_info	lacp_actor_info;
172 	struct lacp_tlv_hdr	lacp_partner_info_tlv;
173 	struct lacp_port_info	lacp_partner_info;
174 	struct lacp_tlv_hdr	lacp_collector_info_tlv;
175 	struct lacp_collector_info
176 				lacp_collector_info;
177 	/* other TLVs go here */
178 	struct lacp_tlv_hdr	lacp_terminator;
179 	uint8_t			lacp_pad[50];
180 } __packed __aligned(2);
181 
182 /* Marker TLV types */
183 
184 #define MARKER_T_INFORMATION		0x01
185 #define MARKER_T_RESPONSE		0x02
186 
187 struct marker_info {
188 	uint16_t		marker_requester_port;
189 	uint8_t			marker_requester_system[ETHER_ADDR_LEN];
190 	uint8_t			marker_requester_txid[4];
191 	uint8_t			marker_pad[2];
192 } __packed __aligned(2);
193 
194 struct marker_pdu {
195 	struct ether_slowproto_hdr
196 				marker_sph;
197 
198 	struct lacp_tlv_hdr	marker_info_tlv;
199 	struct marker_info	marker_info;
200 	struct lacp_tlv_hdr	marker_terminator;
201 	uint8_t			marker_pad[90];
202 } __packed __aligned(2);
203 
204 enum lacp_rxm_state {
205 	LACP_RXM_S_BEGIN = 0,
206 	LACP_RXM_S_INITIALIZE,
207 	LACP_RXM_S_PORT_DISABLED,
208 	LACP_RXM_S_EXPIRED,
209 	LACP_RXM_S_LACP_DISABLED,
210 	LACP_RXM_S_DEFAULTED,
211 	LACP_RXM_S_CURRENT,
212 };
213 
214 enum lacp_rxm_event {
215 	LACP_RXM_E_BEGIN,
216 	LACP_RXM_E_UCT,
217 	LACP_RXM_E_PORT_MOVED,
218 	LACP_RXM_E_NOT_PORT_MOVED,
219 	LACP_RXM_E_PORT_ENABLED,
220 	LACP_RXM_E_NOT_PORT_ENABLED,
221 	LACP_RXM_E_LACP_ENABLED,
222 	LACP_RXM_E_NOT_LACP_ENABLED,
223 	LACP_RXM_E_LACPDU, /* CtrlMuxN:M_UNITDATA.indication(LACPDU) */
224 	LACP_RXM_E_TIMER_EXPIRED, /* current_while_timer expired */
225 };
226 
227 enum lacp_mux_state {
228 	LACP_MUX_S_BEGIN = 0,
229 	LACP_MUX_S_DETACHED,
230 	LACP_MUX_S_WAITING,
231 	LACP_MUX_S_ATTACHED,
232 	LACP_MUX_S_DISTRIBUTING,
233 	LACP_MUX_S_COLLECTING,
234 };
235 
236 enum lacp_mux_event {
237 	LACP_MUX_E_BEGIN,
238 	LACP_MUX_E_SELECTED,
239 	LACP_MUX_E_STANDBY,
240 	LACP_MUX_E_UNSELECTED,
241 	LACP_MUX_E_READY,
242 	LACP_MUX_E_SYNC,
243 	LACP_MUX_E_NOT_SYNC,
244 	LACP_MUX_E_COLLECTING,
245 	LACP_MUX_E_NOT_COLLECTING,
246 };
247 
248 /*
249  * LACP variables
250  */
251 
252 static const uint8_t lacp_address_slow[ETHER_ADDR_LEN] = LACP_ADDR_SLOW;
253 
254 static const char *lacp_rxm_state_names[] = {
255 	"BEGIN",
256 	"INITIALIZE",
257 	"PORT_DISABLED",
258 	"EXPIRED",
259 	"LACP_DISABLED",
260 	"DEFAULTED",
261 	"CURRENT",
262 };
263 
264 static const char *lacp_rxm_event_names[] = {
265 	"BEGIN",
266 	"UCT",
267 	"port_moved",
268 	"!port_moved",
269 	"port_enabled",
270 	"!port_enabled",
271 	"LACP_Enabled",
272 	"!LACP_Enabled",
273 	"LACPDU",
274 	"current_while_timer expired",
275 };
276 
277 static const char *lacp_mux_state_names[] = {
278 	"BEGIN",
279 	"DETACHED",
280 	"WAITING",
281 	"ATTACHED",
282 	"DISTRIBUTING",
283 	"COLLECTING",
284 };
285 
286 static const char *lacp_mux_event_names[] = {
287 	"BEGIN",
288 	"Selected == SELECTED",
289 	"Selected == STANDBY",
290 	"Selected == UNSELECTED",
291 	"Ready",
292 	"Partner.Sync",
293 	"! Partner.Sync",
294 	"Partner.Collecting",
295 	"! Partner.Collecting",
296 };
297 
298 /*
299  * aggr interface
300  */
301 
302 #define AGGR_MAX_PORTS		32
303 #define AGGR_MAX_SLOW_PKTS	(AGGR_MAX_PORTS * 3)
304 
305 struct aggr_multiaddr {
306 	TAILQ_ENTRY(aggr_multiaddr)
307 				m_entry;
308 	unsigned int		m_refs;
309 	uint8_t			m_addrlo[ETHER_ADDR_LEN];
310 	uint8_t			m_addrhi[ETHER_ADDR_LEN];
311 	struct sockaddr_storage m_addr;
312 };
313 TAILQ_HEAD(aggr_multiaddrs, aggr_multiaddr);
314 
315 struct aggr_softc;
316 
317 enum aggr_port_selected {
318 	AGGR_PORT_UNSELECTED,
319 	AGGR_PORT_SELECTED,
320 	AGGR_PORT_STANDBY,
321 };
322 
323 static const char *aggr_port_selected_names[] = {
324 	"UNSELECTED",
325 	"SELECTED",
326 	"STANDBY",
327 };
328 
329 struct aggr_port {
330 	struct ifnet		*p_ifp0;
331 
332 	uint8_t			 p_lladdr[ETHER_ADDR_LEN];
333 	uint32_t		 p_mtu;
334 
335 	int (*p_ioctl)(struct ifnet *, u_long, caddr_t);
336 	void (*p_input)(struct ifnet *, struct mbuf *);
337 	int (*p_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
338 	    struct rtentry *);
339 
340 	struct task		 p_lhook;
341 	struct task		 p_dhook;
342 
343 	struct aggr_softc	*p_aggr;
344 	TAILQ_ENTRY(aggr_port)	 p_entry;
345 
346 	unsigned int		 p_collecting;
347 	unsigned int		 p_distributing;
348 	TAILQ_ENTRY(aggr_port)	 p_entry_distributing;
349 	TAILQ_ENTRY(aggr_port)	 p_entry_muxen;
350 
351 	/* Partner information */
352 	enum aggr_port_selected	 p_muxed;
353 	enum aggr_port_selected	 p_selected;		/* Selected */
354 	struct lacp_port_info	 p_partner;
355 #define p_partner_state		 p_partner.lacp_state
356 
357 	uint8_t			 p_actor_state;
358 	uint8_t			 p_lacp_timeout;
359 
360 	struct timeout		 p_current_while_timer;
361 	struct timeout		 p_wait_while_timer;
362 
363 	/* Receive machine */
364 	enum lacp_rxm_state	 p_rxm_state;
365 	struct mbuf_queue	 p_rxm_mq;
366 	struct task		 p_rxm_task;
367 
368 	/* Periodic Transmission machine */
369 	struct timeout		 p_ptm_tx;
370 
371 	/* Mux machine */
372 	enum lacp_mux_state	 p_mux_state;
373 
374 	/* Transmit machine */
375 	int			 p_txm_log[LACP_TX_MACHINE_RATE];
376 	unsigned int		 p_txm_slot;
377 	struct timeout		 p_txm_ntt;
378 };
379 
380 TAILQ_HEAD(aggr_port_list, aggr_port);
381 
382 struct aggr_map {
383 	struct ifnet		*m_ifp0s[AGGR_MAX_PORTS];
384 };
385 
386 struct aggr_softc {
387 	struct arpcom		 sc_ac;
388 #define sc_if			 sc_ac.ac_if
389 	unsigned int		 sc_dead;
390 	unsigned int		 sc_promisc;
391 	struct ifmedia		 sc_media;
392 
393 	struct aggr_multiaddrs	 sc_multiaddrs;
394 
395 	unsigned int		 sc_mix;
396 
397 	struct aggr_map		 sc_maps[2];
398 	unsigned int		 sc_map_gen;
399 	struct aggr_map		*sc_map;
400 
401 	struct rwlock		 sc_lock;
402 	struct aggr_port_list	 sc_ports;
403 	struct aggr_port_list	 sc_distributing;
404 	struct aggr_port_list	 sc_muxen;
405 	unsigned int		 sc_nports;
406 	unsigned int		 sc_ndistributing;
407 
408 	struct timeout		 sc_tick;
409 
410 	uint8_t			 sc_lacp_mode;
411 #define AGGR_LACP_MODE_PASSIVE		0
412 #define AGGR_LACP_MODE_ACTIVE		1
413 	uint8_t			 sc_lacp_timeout;
414 #define AGGR_LACP_TIMEOUT_SLOW		0
415 #define AGGR_LACP_TIMEOUT_FAST		1
416 	uint16_t		 sc_lacp_prio;
417 	uint16_t		 sc_lacp_port_prio;
418 
419 	struct lacp_sysid	 sc_partner_system;
420 	uint16_t		 sc_partner_key;
421 };
422 
423 #define DPRINTF(_sc, fmt...)	do { \
424 	if (ISSET((_sc)->sc_if.if_flags, IFF_DEBUG)) \
425 		printf(fmt); \
426 } while (0)
427 
428 static const unsigned int aggr_periodic_times[] = {
429 	[AGGR_LACP_TIMEOUT_SLOW] = LACP_SLOW_PERIODIC_TIME,
430 	[AGGR_LACP_TIMEOUT_FAST] = LACP_FAST_PERIODIC_TIME,
431 };
432 
433 static int	aggr_clone_create(struct if_clone *, int);
434 static int	aggr_clone_destroy(struct ifnet *);
435 
436 static int	aggr_ioctl(struct ifnet *, u_long, caddr_t);
437 static void	aggr_start(struct ifqueue *);
438 static int	aggr_enqueue(struct ifnet *, struct mbuf *);
439 
440 static int	aggr_media_change(struct ifnet *);
441 static void	aggr_media_status(struct ifnet *, struct ifmediareq *);
442 
443 static int	aggr_up(struct aggr_softc *);
444 static int	aggr_down(struct aggr_softc *);
445 static int	aggr_iff(struct aggr_softc *);
446 
447 static void	aggr_p_linkch(void *);
448 static void	aggr_p_detach(void *);
449 static int	aggr_p_ioctl(struct ifnet *, u_long, caddr_t);
450 static int	aggr_p_output(struct ifnet *, struct mbuf *,
451 		    struct sockaddr *, struct rtentry *);
452 
453 static int	aggr_get_trunk(struct aggr_softc *, struct trunk_reqall *);
454 static int	aggr_set_options(struct aggr_softc *,
455 		    const struct trunk_opts *);
456 static int	aggr_get_options(struct aggr_softc *, struct trunk_opts *);
457 static int	aggr_set_lladdr(struct aggr_softc *, const struct ifreq *);
458 static int	aggr_set_mtu(struct aggr_softc *, uint32_t);
459 static void	aggr_p_dtor(struct aggr_softc *, struct aggr_port *,
460 		    const char *);
461 static int	aggr_p_setlladdr(struct aggr_port *, const uint8_t *);
462 static int	aggr_p_set_mtu(struct aggr_port *, uint32_t);
463 static int	aggr_add_port(struct aggr_softc *,
464 		    const struct trunk_reqport *);
465 static int	aggr_get_port(struct aggr_softc *, struct trunk_reqport *);
466 static int	aggr_del_port(struct aggr_softc *,
467 		    const struct trunk_reqport *);
468 static int	aggr_group(struct aggr_softc *, struct aggr_port *, u_long);
469 static int	aggr_multi(struct aggr_softc *, struct aggr_port *,
470 		    const struct aggr_multiaddr *, u_long);
471 static void	aggr_update_capabilities(struct aggr_softc *);
472 static void	aggr_set_lacp_mode(struct aggr_softc *, int);
473 static void	aggr_set_lacp_timeout(struct aggr_softc *, int);
474 static int	aggr_multi_add(struct aggr_softc *, struct ifreq *);
475 static int	aggr_multi_del(struct aggr_softc *, struct ifreq *);
476 
477 static void	aggr_map(struct aggr_softc *);
478 
479 static void	aggr_record_default(struct aggr_softc *, struct aggr_port *);
480 static void	aggr_current_while_timer(void *);
481 static void	aggr_wait_while_timer(void *);
482 static void	aggr_rx(void *);
483 static void	aggr_rxm_ev(struct aggr_softc *, struct aggr_port *,
484 		    enum lacp_rxm_event, const struct lacp_du *);
485 #define aggr_rxm(_sc, _p, _ev) \
486 		aggr_rxm_ev((_sc), (_p), (_ev), NULL)
487 #define aggr_rxm_lacpdu(_sc, _p, _lacpdu) \
488 		aggr_rxm_ev((_sc), (_p), LACP_RXM_E_LACPDU, (_lacpdu))
489 
490 static void	aggr_mux(struct aggr_softc *, struct aggr_port *,
491 		    enum lacp_mux_event);
492 static int	aggr_mux_ev(struct aggr_softc *, struct aggr_port *,
493 		    enum lacp_mux_event, int *);
494 
495 static void	aggr_set_partner_timeout(struct aggr_port *, int);
496 
497 static void	aggr_ptm_tx(void *);
498 
499 static void	aggr_transmit_machine(void *);
500 static void	aggr_ntt(struct aggr_port *);
501 static void	aggr_ntt_transmit(struct aggr_port *);
502 
503 static void	aggr_set_selected(struct aggr_port *, enum aggr_port_selected,
504 		    enum lacp_mux_event);
505 static void	aggr_unselected(struct aggr_port *);
506 
507 static void	aggr_selection_logic(struct aggr_softc *, struct aggr_port *);
508 
509 static struct if_clone aggr_cloner =
510     IF_CLONE_INITIALIZER("aggr", aggr_clone_create, aggr_clone_destroy);
511 
512 void
513 aggrattach(int count)
514 {
515 	if_clone_attach(&aggr_cloner);
516 }
517 
518 static int
519 aggr_clone_create(struct if_clone *ifc, int unit)
520 {
521 	struct aggr_softc *sc;
522 	struct ifnet *ifp;
523 
524 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
525 	if (sc == NULL)
526 		return (ENOMEM);
527 
528 	sc->sc_mix = arc4random();
529 
530 	ifp = &sc->sc_if;
531 
532 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
533 	    ifc->ifc_name, unit);
534 
535 	TAILQ_INIT(&sc->sc_multiaddrs);
536 	rw_init(&sc->sc_lock, "aggrlk");
537 	TAILQ_INIT(&sc->sc_ports);
538 	sc->sc_nports = 0;
539 	TAILQ_INIT(&sc->sc_distributing);
540 	sc->sc_ndistributing = 0;
541 	TAILQ_INIT(&sc->sc_muxen);
542 
543 	sc->sc_map_gen = 0;
544 	sc->sc_map = NULL; /* no links yet */
545 
546 	sc->sc_lacp_mode = AGGR_LACP_MODE_ACTIVE;
547 	sc->sc_lacp_timeout = AGGR_LACP_TIMEOUT_SLOW;
548 	sc->sc_lacp_prio = 0x8000; /* medium */
549 	sc->sc_lacp_port_prio = 0x8000; /* medium */
550 
551 	ifmedia_init(&sc->sc_media, 0, aggr_media_change, aggr_media_status);
552 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
553 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
554 
555 	ifp->if_softc = sc;
556 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
557 	ifp->if_ioctl = aggr_ioctl;
558 	ifp->if_qstart = aggr_start;
559 	ifp->if_enqueue = aggr_enqueue;
560 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
561 	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
562 	ifp->if_link_state = LINK_STATE_DOWN;
563 	ether_fakeaddr(ifp);
564 
565 	if_counters_alloc(ifp);
566 	if_attach(ifp);
567 	ether_ifattach(ifp);
568 
569 	ifp->if_llprio = IFQ_MAXPRIO;
570 
571 	return (0);
572 }
573 
574 static int
575 aggr_clone_destroy(struct ifnet *ifp)
576 {
577 	struct aggr_softc *sc = ifp->if_softc;
578 	struct aggr_port *p;
579 
580 	NET_LOCK();
581 	sc->sc_dead = 1;
582 
583 	if (ISSET(ifp->if_flags, IFF_RUNNING))
584 		aggr_down(sc);
585 	NET_UNLOCK();
586 
587 	ether_ifdetach(ifp);
588 	if_detach(ifp);
589 
590 	/* last ref, no need to lock. aggr_p_dtor locks anyway */
591 	NET_LOCK();
592 	while ((p = TAILQ_FIRST(&sc->sc_ports)) != NULL)
593 		aggr_p_dtor(sc, p, "destroy");
594 	NET_UNLOCK();
595 
596 	free(sc, M_DEVBUF, sizeof(*sc));
597 
598 	return (0);
599 }
600 
601 /*
602  * LACP_Enabled
603  */
604 static inline int
605 aggr_lacp_enabled(struct aggr_softc *sc)
606 {
607 	struct ifnet *ifp = &sc->sc_if;
608 	return (ISSET(ifp->if_flags, IFF_RUNNING));
609 }
610 
611 /*
612  * port_enabled
613  */
614 static int
615 aggr_port_enabled(struct aggr_port *p)
616 {
617 	struct ifnet *ifp0 = p->p_ifp0;
618 
619 	if (!ISSET(ifp0->if_flags, IFF_RUNNING))
620 		return (0);
621 
622 	if (!LINK_STATE_IS_UP(ifp0->if_link_state))
623 		return (0);
624 
625 	return (1);
626 }
627 
628 /*
629  * port_moved
630  *
631  * This variable is set to TRUE if the Receive machine for an Aggregation
632  * Port is in the PORT_DISABLED state, and the combination of
633  * Partner_Oper_System and Partner_Oper_Port_Number in use by that
634  * Aggregation Port has been received in an incoming LACPDU on a
635  * different Aggregation Port. This variable is set to FALSE once the
636  * INITIALIZE state of the Receive machine has set the Partner information
637  * for the Aggregation Port to administrative default values.
638  *
639  * Value: Boolean
640 */
641 static int
642 aggr_port_moved(struct aggr_softc *sc, struct aggr_port *p)
643 {
644 	return (0);
645 }
646 
647 static void
648 aggr_transmit(struct aggr_softc *sc, const struct aggr_map *map, struct mbuf *m)
649 {
650 	struct ifnet *ifp = &sc->sc_if;
651 	struct ifnet *ifp0;
652 	uint16_t flow = 0;
653 
654 #if NBPFILTER > 0
655 	{
656 		caddr_t if_bpf = ifp->if_bpf;
657 		if (if_bpf)
658 			bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
659 	}
660 #endif
661 
662 	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
663 		flow = m->m_pkthdr.ph_flowid;
664 
665 	ifp0 = map->m_ifp0s[flow % AGGR_MAX_PORTS];
666 
667 	if (if_enqueue(ifp0, m) != 0)
668 		counters_inc(ifp->if_counters, ifc_oerrors);
669 }
670 
671 static int
672 aggr_enqueue(struct ifnet *ifp, struct mbuf *m)
673 {
674 	struct aggr_softc *sc;
675 	const struct aggr_map *map;
676 	int error = 0;
677 
678 	if (!ifq_is_priq(&ifp->if_snd))
679 		return (if_enqueue_ifq(ifp, m));
680 
681 	sc = ifp->if_softc;
682 
683 	smr_read_enter();
684 	map = SMR_PTR_GET(&sc->sc_map);
685 	if (__predict_false(map == NULL)) {
686 		m_freem(m);
687 		error = ENETDOWN;
688 	} else {
689 		counters_pkt(ifp->if_counters,
690 		    ifc_opackets, ifc_obytes, m->m_pkthdr.len);
691 		aggr_transmit(sc, map, m);
692 	}
693 	smr_read_leave();
694 
695 	return (error);
696 }
697 
698 static void
699 aggr_start(struct ifqueue *ifq)
700 {
701 	struct ifnet *ifp = ifq->ifq_if;
702 	struct aggr_softc *sc = ifp->if_softc;
703 	const struct aggr_map *map;
704 
705 	smr_read_enter();
706 	map = SMR_PTR_GET(&sc->sc_map);
707 	if (__predict_false(map == NULL))
708 		ifq_purge(ifq);
709 	else {
710 		struct mbuf *m;
711 
712 		while ((m = ifq_dequeue(ifq)) != NULL)
713 			aggr_transmit(sc, map, m);
714 	}
715 	smr_read_leave();
716 }
717 
718 static inline int
719 aggr_eh_is_slow(const struct ether_header *eh)
720 {
721 	uint64_t dst;
722 
723 	if (eh->ether_type != htons(ETHERTYPE_SLOW))
724 		return (0);
725 
726 	dst = ether_addr_to_e64((struct ether_addr *)eh->ether_dhost);
727 	return (dst == LACP_ADDR_SLOW_E64);
728 }
729 
730 static void
731 aggr_input(struct ifnet *ifp0, struct mbuf *m)
732 {
733 	struct arpcom *ac0 = (struct arpcom *)ifp0;
734 	struct aggr_port *p = ac0->ac_trunkport;
735 	struct aggr_softc *sc = p->p_aggr;
736 	struct ifnet *ifp = &sc->sc_if;
737 	struct ether_header *eh;
738 	int hlen = sizeof(*eh);
739 
740 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
741 		goto drop;
742 
743 	eh = mtod(m, struct ether_header *);
744 	if (!ISSET(m->m_flags, M_VLANTAG) &&
745 	    __predict_false(aggr_eh_is_slow(eh))) {
746 		struct ether_slowproto_hdr *sph;
747 
748 		hlen += sizeof(*sph);
749 		if (m->m_len < hlen) {
750 			m = m_pullup(m, hlen);
751 			if (m == NULL) {
752 				/* short++ */
753 				return;
754 			}
755 			eh = mtod(m, struct ether_header *);
756 		}
757 
758 		sph = (struct ether_slowproto_hdr *)(eh + 1);
759 		switch (sph->sph_subtype) {
760 		case SLOWPROTOCOLS_SUBTYPE_LACP:
761 		case SLOWPROTOCOLS_SUBTYPE_LACP_MARKER:
762 			if (mq_enqueue(&p->p_rxm_mq, m) == 0)
763 				task_add(systq, &p->p_rxm_task);
764 			return;
765 		default:
766 			break;
767 		}
768 	}
769 
770 	if (__predict_false(!p->p_collecting))
771 		goto drop;
772 
773 	if (!ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
774 		m->m_pkthdr.ph_flowid = ifp0->if_index ^ sc->sc_mix;
775 
776 	if_vinput(ifp, m);
777 
778 	return;
779 
780 drop:
781 	m_freem(m);
782 }
783 
784 static int
785 aggr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
786 {
787 	struct aggr_softc *sc = ifp->if_softc;
788 	struct ifreq *ifr = (struct ifreq *)data;
789 	int error = 0;
790 
791 	if (sc->sc_dead)
792 		return (ENXIO);
793 
794 	switch (cmd) {
795 	case SIOCSIFADDR:
796 		break;
797 
798 	case SIOCSIFFLAGS:
799 		if (ISSET(ifp->if_flags, IFF_UP)) {
800 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
801 				error = aggr_up(sc);
802 			else
803 				error = ENETRESET;
804 		} else {
805 			if (ISSET(ifp->if_flags, IFF_RUNNING))
806 				error = aggr_down(sc);
807 		}
808 		break;
809 
810 	case SIOCSIFLLADDR:
811 		error = aggr_set_lladdr(sc, ifr);
812 		break;
813 
814 	case SIOCSTRUNK:
815 		error = suser(curproc);
816 		if (error != 0)
817 			break;
818 
819 		if (((struct trunk_reqall *)data)->ra_proto !=
820 		    TRUNK_PROTO_LACP) {
821 			error = EPROTONOSUPPORT;
822 			break;
823 		}
824 
825 		/* nop */
826 		break;
827 	case SIOCGTRUNK:
828 		error = aggr_get_trunk(sc, (struct trunk_reqall *)data);
829 		break;
830 
831 	case SIOCSTRUNKOPTS:
832 		error = suser(curproc);
833 		if (error != 0)
834 			break;
835 
836 		error = aggr_set_options(sc, (struct trunk_opts *)data);
837 		break;
838 
839 	case SIOCGTRUNKOPTS:
840 		error = aggr_get_options(sc, (struct trunk_opts *)data);
841 		break;
842 
843 	case SIOCGTRUNKPORT:
844 		error = aggr_get_port(sc, (struct trunk_reqport *)data);
845 		break;
846 	case SIOCSTRUNKPORT:
847 		error = suser(curproc);
848 		if (error != 0)
849 			break;
850 
851 		error = aggr_add_port(sc, (struct trunk_reqport *)data);
852 		break;
853 	case SIOCSTRUNKDELPORT:
854 		error = suser(curproc);
855 		if (error != 0)
856 			break;
857 
858 		error = aggr_del_port(sc, (struct trunk_reqport *)data);
859 		break;
860 
861 	case SIOCSIFMTU:
862 		error = aggr_set_mtu(sc, ifr->ifr_mtu);
863 		break;
864 
865 	case SIOCADDMULTI:
866 		error = aggr_multi_add(sc, ifr);
867 		break;
868 	case SIOCDELMULTI:
869 		error = aggr_multi_del(sc, ifr);
870 		break;
871 
872 	case SIOCSIFMEDIA:
873 		error = EOPNOTSUPP;
874 		break;
875 	case SIOCGIFMEDIA:
876 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
877 		break;
878 
879 	default:
880 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
881 		break;
882 	}
883 
884 	if (error == ENETRESET)
885 		error = aggr_iff(sc);
886 
887 	return (error);
888 }
889 
890 static int
891 aggr_get_trunk(struct aggr_softc *sc, struct trunk_reqall *ra)
892 {
893 	struct ifnet *ifp = &sc->sc_if;
894 	struct trunk_reqport rp;
895 	struct aggr_port *p;
896 	size_t size = ra->ra_size;
897 	caddr_t ubuf = (caddr_t)ra->ra_port;
898 	struct lacp_opreq *req;
899 	uint8_t state = 0;
900 	int error = 0;
901 
902 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE)
903 		SET(state, LACP_STATE_ACTIVITY);
904 	if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST)
905 		SET(state, LACP_STATE_TIMEOUT);
906 
907 	ra->ra_proto = TRUNK_PROTO_LACP;
908 	memset(&ra->ra_psc, 0, sizeof(ra->ra_psc));
909 
910 	/*
911 	 * aggr(4) does not support Individual links so don't bother
912 	 * with portprio, portno, and state, as per the spec.
913 	 */
914 
915 	req = &ra->ra_lacpreq;
916 	req->actor_prio = sc->sc_lacp_prio;
917 	CTASSERT(sizeof(req->actor_mac) == sizeof(sc->sc_ac.ac_enaddr));
918 	memcpy(req->actor_mac, &sc->sc_ac.ac_enaddr, sizeof(req->actor_mac));
919 	req->actor_key = ifp->if_index;
920 	req->actor_state = state;
921 
922 	req->partner_prio = ntohs(sc->sc_partner_system.lacp_sysid_priority);
923 	CTASSERT(sizeof(req->partner_mac) ==
924 	    sizeof(sc->sc_partner_system.lacp_sysid_mac));
925 	memcpy(req->partner_mac, sc->sc_partner_system.lacp_sysid_mac,
926 	    sizeof(req->partner_mac));
927 	req->partner_key = ntohs(sc->sc_partner_key);
928 
929 	ra->ra_ports = sc->sc_nports;
930 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
931 		struct ifnet *ifp0;
932 		struct lacp_opreq *opreq;
933 
934 		if (size < sizeof(rp))
935 			break;
936 
937 		ifp0 = p->p_ifp0;
938 
939 		CTASSERT(sizeof(rp.rp_ifname) == sizeof(ifp->if_xname));
940 		CTASSERT(sizeof(rp.rp_portname) == sizeof(ifp0->if_xname));
941 
942 		memset(&rp, 0, sizeof(rp));
943 		memcpy(rp.rp_ifname, ifp->if_xname, sizeof(rp.rp_ifname));
944 		memcpy(rp.rp_portname, ifp0->if_xname, sizeof(rp.rp_portname));
945 
946 		if (p->p_muxed)
947 			SET(rp.rp_flags, TRUNK_PORT_ACTIVE);
948 		if (p->p_collecting)
949 			SET(rp.rp_flags, TRUNK_PORT_COLLECTING);
950 		if (p->p_distributing)
951 			SET(rp.rp_flags, TRUNK_PORT_DISTRIBUTING);
952 		if (!aggr_port_enabled(p))
953 			SET(rp.rp_flags, TRUNK_PORT_DISABLED);
954 
955 		opreq = &rp.rp_lacpreq;
956 
957 		opreq->actor_prio = sc->sc_lacp_prio;
958 		memcpy(opreq->actor_mac, &sc->sc_ac.ac_enaddr,
959 		    sizeof(req->actor_mac));
960 		opreq->actor_key = ifp->if_index;
961 		opreq->actor_portprio = sc->sc_lacp_port_prio;
962 		opreq->actor_portno = ifp0->if_index;
963 		opreq->actor_state = state | p->p_actor_state;
964 
965 		opreq->partner_prio =
966 		    ntohs(p->p_partner.lacp_sysid.lacp_sysid_priority);
967 		CTASSERT(sizeof(opreq->partner_mac) ==
968 		    sizeof(p->p_partner.lacp_sysid.lacp_sysid_mac));
969 		memcpy(opreq->partner_mac,
970 		    p->p_partner.lacp_sysid.lacp_sysid_mac,
971 		    sizeof(opreq->partner_mac));
972 		opreq->partner_key = ntohs(p->p_partner.lacp_key);
973 		opreq->partner_portprio =
974 		    ntohs(p->p_partner.lacp_portid.lacp_portid_priority);
975 		opreq->partner_portno =
976 		    ntohs(p->p_partner.lacp_portid.lacp_portid_number);
977 		opreq->partner_state = p->p_partner_state;
978 
979 		error = copyout(&rp, ubuf, sizeof(rp));
980 		if (error != 0)
981 			break;
982 
983 		ubuf += sizeof(rp);
984 		size -= sizeof(rp);
985 	}
986 
987 	return (error);
988 }
989 
990 static int
991 aggr_get_options(struct aggr_softc *sc, struct trunk_opts *tro)
992 {
993 	struct lacp_adminopts *opt = &tro->to_lacpopts;
994 
995 	if (tro->to_proto != TRUNK_PROTO_LACP)
996 		return (EPROTONOSUPPORT);
997 
998 	opt->lacp_mode = sc->sc_lacp_mode;
999 	opt->lacp_timeout = sc->sc_lacp_timeout;
1000 	opt->lacp_prio = sc->sc_lacp_prio;
1001 	opt->lacp_portprio = sc->sc_lacp_port_prio;
1002 	opt->lacp_ifqprio = sc->sc_if.if_llprio;
1003 
1004 	return (0);
1005 }
1006 
1007 static int
1008 aggr_set_options(struct aggr_softc *sc, const struct trunk_opts *tro)
1009 {
1010 	const struct lacp_adminopts *opt = &tro->to_lacpopts;
1011 
1012 	if (tro->to_proto != TRUNK_PROTO_LACP)
1013 		return (EPROTONOSUPPORT);
1014 
1015 	switch (tro->to_opts) {
1016 	case TRUNK_OPT_LACP_MODE:
1017 		switch (opt->lacp_mode) {
1018 		case AGGR_LACP_MODE_PASSIVE:
1019 		case AGGR_LACP_MODE_ACTIVE:
1020 			break;
1021 		default:
1022 			return (EINVAL);
1023 		}
1024 
1025 		aggr_set_lacp_mode(sc, opt->lacp_mode);
1026 		break;
1027 
1028 	case TRUNK_OPT_LACP_TIMEOUT:
1029 		if (opt->lacp_timeout >= nitems(aggr_periodic_times))
1030 			return (EINVAL);
1031 
1032 		aggr_set_lacp_timeout(sc, opt->lacp_timeout);
1033 		break;
1034 
1035 	case TRUNK_OPT_LACP_SYS_PRIO:
1036 		if (opt->lacp_prio == 0)
1037 			return (EINVAL);
1038 
1039 		sc->sc_lacp_prio = opt->lacp_prio;
1040 		break;
1041 
1042 	case TRUNK_OPT_LACP_PORT_PRIO:
1043 		if (opt->lacp_portprio == 0)
1044 			return (EINVAL);
1045 
1046 		sc->sc_lacp_port_prio = opt->lacp_portprio;
1047 		break;
1048 
1049 	default:
1050 		return (ENODEV);
1051 	}
1052 
1053 	return (0);
1054 }
1055 
1056 static int
1057 aggr_add_port(struct aggr_softc *sc, const struct trunk_reqport *rp)
1058 {
1059 	struct ifnet *ifp = &sc->sc_if;
1060 	struct ifnet *ifp0;
1061 	struct arpcom *ac0;
1062 	struct aggr_port *p;
1063 	struct aggr_multiaddr *ma;
1064 	int past = ticks - (hz * LACP_TIMEOUT_FACTOR);
1065 	int i;
1066 	int error;
1067 
1068 	NET_ASSERT_LOCKED();
1069 	if (sc->sc_nports > AGGR_MAX_PORTS)
1070 		return (ENOSPC);
1071 
1072 	ifp0 = if_unit(rp->rp_portname);
1073 	if (ifp0 == NULL)
1074 		return (EINVAL);
1075 
1076 	if (ifp0->if_index == ifp->if_index) {
1077 		error = EINVAL;
1078 		goto put;
1079 	}
1080 
1081 	if (ifp0->if_type != IFT_ETHER) {
1082 		error = EPROTONOSUPPORT;
1083 		goto put;
1084 	}
1085 
1086 	error = ether_brport_isset(ifp0);
1087 	if (error != 0)
1088 		goto put;
1089 
1090 	if (ifp0->if_hardmtu < ifp->if_mtu) {
1091 		error = ENOBUFS;
1092 		goto put;
1093 	}
1094 
1095 	ac0 = (struct arpcom *)ifp0;
1096 	if (ac0->ac_trunkport != NULL) {
1097 		error = EBUSY;
1098 		goto put;
1099 	}
1100 
1101 	/* let's try */
1102 
1103 	p = malloc(sizeof(*p), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
1104 	if (p == NULL) {
1105 		error = ENOMEM;
1106 		goto put;
1107 	}
1108 
1109 	for (i = 0; i < nitems(p->p_txm_log); i++)
1110 		p->p_txm_log[i] = past;
1111 
1112 	p->p_ifp0 = ifp0;
1113 	p->p_aggr = sc;
1114 	p->p_mtu = ifp0->if_mtu;
1115 
1116 	CTASSERT(sizeof(p->p_lladdr) == sizeof(ac0->ac_enaddr));
1117 	memcpy(p->p_lladdr, ac0->ac_enaddr, sizeof(p->p_lladdr));
1118 	p->p_ioctl = ifp0->if_ioctl;
1119 	p->p_input = ifp0->if_input;
1120 	p->p_output = ifp0->if_output;
1121 
1122 	error = aggr_group(sc, p, SIOCADDMULTI);
1123 	if (error != 0)
1124 		goto free;
1125 
1126 	error = aggr_p_setlladdr(p, sc->sc_ac.ac_enaddr);
1127 	if (error != 0)
1128 		goto ungroup;
1129 
1130 	error = aggr_p_set_mtu(p, ifp->if_mtu);
1131 	if (error != 0)
1132 		goto resetlladdr;
1133 
1134 	if (sc->sc_promisc) {
1135 		error = ifpromisc(ifp0, 1);
1136 		if (error != 0)
1137 			goto unmtu;
1138 	}
1139 
1140 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
1141 		if (aggr_multi(sc, p, ma, SIOCADDMULTI) != 0) {
1142 			log(LOG_WARNING, "%s %s: "
1143 			    "unable to add multicast address\n",
1144 			    ifp->if_xname, ifp0->if_xname);
1145 		}
1146 	}
1147 
1148 	task_set(&p->p_lhook, aggr_p_linkch, p);
1149 	if_linkstatehook_add(ifp0, &p->p_lhook);
1150 
1151 	task_set(&p->p_dhook, aggr_p_detach, p);
1152 	if_detachhook_add(ifp0, &p->p_dhook);
1153 
1154 	task_set(&p->p_rxm_task, aggr_rx, p);
1155 	mq_init(&p->p_rxm_mq, 3, IPL_NET);
1156 
1157 	timeout_set_proc(&p->p_ptm_tx, aggr_ptm_tx, p);
1158 	timeout_set_proc(&p->p_txm_ntt, aggr_transmit_machine, p);
1159 	timeout_set_proc(&p->p_current_while_timer,
1160 	    aggr_current_while_timer, p);
1161 	timeout_set_proc(&p->p_wait_while_timer, aggr_wait_while_timer, p);
1162 
1163 	p->p_muxed = 0;
1164 	p->p_collecting = 0;
1165 	p->p_distributing = 0;
1166 	p->p_selected = AGGR_PORT_UNSELECTED;
1167 	p->p_actor_state = LACP_STATE_AGGREGATION;
1168 
1169 	/* commit */
1170 	DPRINTF(sc, "%s %s trunkport: creating port\n",
1171 	    ifp->if_xname, ifp0->if_xname);
1172 
1173 	TAILQ_INSERT_TAIL(&sc->sc_ports, p, p_entry);
1174 	sc->sc_nports++;
1175 
1176 	aggr_update_capabilities(sc);
1177 
1178 	/*
1179          * use (and modification) of ifp->if_input and ac->ac_trunkport
1180          * is protected by NET_LOCK.
1181 	 */
1182 
1183 	ac0->ac_trunkport = p;
1184 
1185 	/* make sure p is visible before handlers can run */
1186 	membar_producer();
1187 	ifp0->if_ioctl = aggr_p_ioctl;
1188 	ifp0->if_input = aggr_input;
1189 	ifp0->if_output = aggr_p_output;
1190 
1191 	aggr_mux(sc, p, LACP_MUX_E_BEGIN);
1192 	aggr_rxm(sc, p, LACP_RXM_E_BEGIN);
1193 	aggr_p_linkch(p);
1194 
1195 	return (0);
1196 
1197 unmtu:
1198 	if (aggr_p_set_mtu(p, p->p_mtu) != 0) {
1199 		log(LOG_WARNING, "%s add %s: unable to reset mtu %u\n",
1200 		    ifp->if_xname, ifp0->if_xname, p->p_mtu);
1201 	}
1202 resetlladdr:
1203 	if (aggr_p_setlladdr(p, p->p_lladdr) != 0) {
1204 		log(LOG_WARNING, "%s add %s: unable to reset lladdr\n",
1205 		    ifp->if_xname, ifp0->if_xname);
1206 	}
1207 ungroup:
1208 	if (aggr_group(sc, p, SIOCDELMULTI) != 0) {
1209 		log(LOG_WARNING, "%s add %s: "
1210 		    "unable to remove LACP group address\n",
1211 		    ifp->if_xname, ifp0->if_xname);
1212 	}
1213 free:
1214 	free(p, M_DEVBUF, sizeof(*p));
1215 put:
1216 	if_put(ifp0);
1217 	return (error);
1218 }
1219 
1220 static struct aggr_port *
1221 aggr_trunkport(struct aggr_softc *sc, const char *name)
1222 {
1223 	struct aggr_port *p;
1224 
1225 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
1226 		if (strcmp(p->p_ifp0->if_xname, name) == 0)
1227 			return (p);
1228 	}
1229 
1230 	return (NULL);
1231 }
1232 
1233 static int
1234 aggr_get_port(struct aggr_softc *sc, struct trunk_reqport *rp)
1235 {
1236 	struct aggr_port *p;
1237 
1238 	NET_ASSERT_LOCKED();
1239 	p = aggr_trunkport(sc, rp->rp_portname);
1240 	if (p == NULL)
1241 		return (EINVAL);
1242 
1243 	/* XXX */
1244 
1245 	return (0);
1246 }
1247 
1248 static int
1249 aggr_del_port(struct aggr_softc *sc, const struct trunk_reqport *rp)
1250 {
1251 	struct aggr_port *p;
1252 
1253 	NET_ASSERT_LOCKED();
1254 	p = aggr_trunkport(sc, rp->rp_portname);
1255 	if (p == NULL)
1256 		return (EINVAL);
1257 
1258 	aggr_p_dtor(sc, p, "del");
1259 
1260 	return (0);
1261 }
1262 
1263 static int
1264 aggr_p_setlladdr(struct aggr_port *p, const uint8_t *addr)
1265 {
1266 	struct ifnet *ifp0 = p->p_ifp0;
1267 	struct ifreq ifr;
1268 	struct sockaddr *sa;
1269 	int error;
1270 
1271 	memset(&ifr, 0, sizeof(ifr));
1272 
1273 	CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname));
1274 	memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name));
1275 
1276 	sa = &ifr.ifr_addr;
1277 
1278 	/* wtf is this? */
1279 	sa->sa_len = ETHER_ADDR_LEN;
1280 	sa->sa_family = AF_LINK;
1281 	CTASSERT(sizeof(sa->sa_data) >= ETHER_ADDR_LEN);
1282 	memcpy(sa->sa_data, addr, ETHER_ADDR_LEN);
1283 
1284 	error = (*p->p_ioctl)(ifp0, SIOCSIFLLADDR, (caddr_t)&ifr);
1285 	switch (error) {
1286 	case ENOTTY:
1287 	case 0:
1288 		break;
1289 	default:
1290 		return (error);
1291 	}
1292 
1293 	error = if_setlladdr(ifp0, addr);
1294 	if (error != 0)
1295 		return (error);
1296 
1297 	ifnewlladdr(ifp0);
1298 
1299 	return (0);
1300 }
1301 
1302 static int
1303 aggr_p_set_mtu(struct aggr_port *p, uint32_t mtu)
1304 {
1305 	struct ifnet *ifp0 = p->p_ifp0;
1306 	struct ifreq ifr;
1307 
1308 	memset(&ifr, 0, sizeof(ifr));
1309 
1310 	CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname));
1311 	memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name));
1312 
1313 	ifr.ifr_mtu = mtu;
1314 
1315 	return ((*p->p_ioctl)(ifp0, SIOCSIFMTU, (caddr_t)&ifr));
1316 }
1317 
1318 static int
1319 aggr_p_ioctl(struct ifnet *ifp0, u_long cmd, caddr_t data)
1320 {
1321 	struct arpcom *ac0 = (struct arpcom *)ifp0;
1322 	struct aggr_port *p = ac0->ac_trunkport;
1323 	struct ifreq *ifr = (struct ifreq *)data;
1324 	int error = 0;
1325 
1326 	switch (cmd) {
1327 	case SIOCGTRUNKPORT: {
1328 		struct trunk_reqport *rp = (struct trunk_reqport *)data;
1329 		struct aggr_softc *sc = p->p_aggr;
1330 		struct ifnet *ifp = &sc->sc_if;
1331 
1332 		if (strncmp(rp->rp_ifname, rp->rp_portname,
1333 		    sizeof(rp->rp_ifname)) != 0)
1334 			return (EINVAL);
1335 
1336 		CTASSERT(sizeof(rp->rp_ifname) == sizeof(ifp->if_xname));
1337 		memcpy(rp->rp_ifname, ifp->if_xname, sizeof(rp->rp_ifname));
1338 		break;
1339 	}
1340 
1341 	case SIOCSIFMTU:
1342 		if (ifr->ifr_mtu == ifp0->if_mtu)
1343 			break; /* nop */
1344 
1345 		/* FALLTHROUGH */
1346 	case SIOCSIFLLADDR:
1347 		error = EBUSY;
1348 		break;
1349 
1350 	case SIOCSIFFLAGS:
1351 		if (!ISSET(ifp0->if_flags, IFF_UP) &&
1352 		    ISSET(ifp0->if_flags, IFF_RUNNING)) {
1353 			/* port is going down */
1354 			if (p->p_selected == AGGR_PORT_SELECTED) {
1355 				aggr_unselected(p);
1356 				aggr_ntt_transmit(p); /* XXX */
1357 			}
1358 		}
1359 		/* FALLTHROUGH */
1360 	default:
1361 		error = (*p->p_ioctl)(ifp0, cmd, data);
1362 		break;
1363 	}
1364 
1365 	return (error);
1366 }
1367 
1368 static int
1369 aggr_p_output(struct ifnet *ifp0, struct mbuf *m, struct sockaddr *dst,
1370     struct rtentry *rt)
1371 {
1372 	struct arpcom *ac0 = (struct arpcom *)ifp0;
1373 	struct aggr_port *p = ac0->ac_trunkport;
1374 
1375 	/* restrict transmission to bpf only */
1376 	if (m_tag_find(m, PACKET_TAG_DLT, NULL) == NULL) {
1377 		m_freem(m);
1378 		return (EBUSY);
1379 	}
1380 
1381 	return ((*p->p_output)(ifp0, m, dst, rt));
1382 }
1383 
1384 static void
1385 aggr_p_dtor(struct aggr_softc *sc, struct aggr_port *p, const char *op)
1386 {
1387 	struct ifnet *ifp = &sc->sc_if;
1388 	struct ifnet *ifp0 = p->p_ifp0;
1389 	struct arpcom *ac0 = (struct arpcom *)ifp0;
1390 	struct aggr_multiaddr *ma;
1391 	enum aggr_port_selected selected;
1392 	int error;
1393 
1394 	DPRINTF(sc, "%s %s %s: destroying port\n",
1395 	    ifp->if_xname, ifp0->if_xname, op);
1396 
1397 	selected = p->p_selected;
1398 	aggr_rxm(sc, p, LACP_RXM_E_NOT_PORT_ENABLED);
1399 	aggr_unselected(p);
1400 	if (aggr_port_enabled(p) && selected == AGGR_PORT_SELECTED)
1401 		aggr_ntt_transmit(p);
1402 
1403 	timeout_del(&p->p_ptm_tx);
1404 	timeout_del_barrier(&p->p_txm_ntt); /* XXX */
1405 	timeout_del(&p->p_current_while_timer);
1406 	timeout_del(&p->p_wait_while_timer);
1407 
1408 	/*
1409          * use (and modification) of ifp->if_input and ac->ac_trunkport
1410          * is protected by NET_LOCK.
1411 	 */
1412 
1413 	ac0->ac_trunkport = NULL;
1414 	ifp0->if_input = p->p_input;
1415 	ifp0->if_ioctl = p->p_ioctl;
1416 	ifp0->if_output = p->p_output;
1417 
1418 	TAILQ_REMOVE(&sc->sc_ports, p, p_entry);
1419 	sc->sc_nports--;
1420 
1421 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
1422 		error = aggr_multi(sc, p, ma, SIOCDELMULTI);
1423 		if (error != 0) {
1424 			log(LOG_WARNING, "%s %s %s: "
1425 			    "unable to remove multicast address (%d)\n",
1426 			    ifp->if_xname, op, ifp0->if_xname, error);
1427 		}
1428 	}
1429 
1430 	if (sc->sc_promisc) {
1431 		error = ifpromisc(ifp0, 0);
1432 		if (error != 0) {
1433 			log(LOG_WARNING, "%s %s %s: "
1434 			    "unable to disable promisc (%d)\n",
1435 			    ifp->if_xname, op, ifp0->if_xname, error);
1436 		}
1437 	}
1438 
1439 	error = aggr_p_set_mtu(p, p->p_mtu);
1440 	if (error != 0) {
1441 		log(LOG_WARNING, "%s %s %s: unable to restore mtu %u (%d)\n",
1442 		    ifp->if_xname, op, ifp0->if_xname, p->p_mtu, error);
1443 	}
1444 
1445 	error = aggr_p_setlladdr(p, p->p_lladdr);
1446 	if (error != 0) {
1447 		log(LOG_WARNING, "%s %s %s: unable to restore lladdr (%d)\n",
1448 		    ifp->if_xname, op, ifp0->if_xname, error);
1449 	}
1450 
1451 	error = aggr_group(sc, p, SIOCDELMULTI);
1452 	if (error != 0) {
1453 		log(LOG_WARNING, "%s %s %s: "
1454 		    "unable to remove LACP group address (%d)\n",
1455 		    ifp->if_xname, op, ifp0->if_xname, error);
1456 	}
1457 
1458 	if_detachhook_del(ifp0, &p->p_dhook);
1459 	if_linkstatehook_del(ifp0, &p->p_lhook);
1460 
1461 	if_put(ifp0);
1462 	free(p, M_DEVBUF, sizeof(*p));
1463 
1464 	/* XXX this is a pretty ugly place to update this */
1465 	aggr_update_capabilities(sc);
1466 }
1467 
1468 static void
1469 aggr_p_detach(void *arg)
1470 {
1471 	struct aggr_port *p = arg;
1472 	struct aggr_softc *sc = p->p_aggr;
1473 
1474 	aggr_p_dtor(sc, p, "detach");
1475 
1476 	NET_ASSERT_LOCKED();
1477 }
1478 
1479 static void
1480 aggr_p_linkch(void *arg)
1481 {
1482 	struct aggr_port *p = arg;
1483 	struct aggr_softc *sc = p->p_aggr;
1484 
1485 	NET_ASSERT_LOCKED();
1486 
1487 	if (aggr_port_enabled(p)) {
1488 		aggr_rxm(sc, p, LACP_RXM_E_PORT_ENABLED);
1489 
1490 		if (aggr_lacp_enabled(sc)) {
1491 			timeout_add_sec(&p->p_ptm_tx,
1492 			    aggr_periodic_times[AGGR_LACP_TIMEOUT_FAST]);
1493 		}
1494 	} else {
1495 		aggr_rxm(sc, p, LACP_RXM_E_NOT_PORT_ENABLED);
1496 		aggr_unselected(p);
1497 		aggr_record_default(sc, p);
1498 		timeout_del(&p->p_ptm_tx);
1499 	}
1500 }
1501 
1502 static void
1503 aggr_map(struct aggr_softc *sc)
1504 {
1505 	struct ifnet *ifp = &sc->sc_if;
1506 	struct aggr_map *map = NULL;
1507 	struct aggr_port *p;
1508 	unsigned int gen;
1509 	unsigned int i;
1510 	int link_state = LINK_STATE_DOWN;
1511 
1512 	p = TAILQ_FIRST(&sc->sc_distributing);
1513 	if (p != NULL) {
1514 		gen = sc->sc_map_gen++;
1515 		map = &sc->sc_maps[gen % nitems(sc->sc_maps)];
1516 
1517 		for (i = 0; i < nitems(map->m_ifp0s); i++) {
1518 			map->m_ifp0s[i] = p->p_ifp0;
1519 
1520 			p = TAILQ_NEXT(p, p_entry_distributing);
1521 			if (p == NULL)
1522 				p = TAILQ_FIRST(&sc->sc_distributing);
1523 		}
1524 
1525 		link_state = LINK_STATE_FULL_DUPLEX;
1526 	}
1527 
1528 	SMR_PTR_SET_LOCKED(&sc->sc_map, map);
1529 	smr_barrier();
1530 
1531 	if (ifp->if_link_state != link_state) {
1532 		ifp->if_link_state = link_state;
1533 		if_link_state_change(ifp);
1534 	}
1535 }
1536 
1537 static void
1538 aggr_current_while_timer(void *arg)
1539 {
1540 	struct aggr_port *p = arg;
1541 	struct aggr_softc *sc = p->p_aggr;
1542 
1543 	aggr_rxm(sc, p, LACP_RXM_E_TIMER_EXPIRED);
1544 }
1545 
1546 static void
1547 aggr_wait_while_timer(void *arg)
1548 {
1549 	struct aggr_port *p = arg;
1550 	struct aggr_softc *sc = p->p_aggr;
1551 
1552 	aggr_selection_logic(sc, p);
1553 }
1554 
1555 static void
1556 aggr_start_current_while_timer(struct aggr_port *p, unsigned int t)
1557 {
1558 	timeout_add_sec(&p->p_current_while_timer,
1559 		aggr_periodic_times[t] * LACP_TIMEOUT_FACTOR);
1560 }
1561 
1562 static void
1563 aggr_input_lacpdu(struct aggr_port *p, struct mbuf *m)
1564 {
1565 	struct aggr_softc *sc = p->p_aggr;
1566 	struct lacp_du *lacpdu;
1567 
1568 	if (m->m_len < sizeof(*lacpdu)) {
1569 		m = m_pullup(m, sizeof(*lacpdu));
1570 		if (m == NULL)
1571 			return;
1572 	}
1573 
1574 	/*
1575 	 * In the process of executing the recordPDU function, a Receive
1576 	 * machine compliant to this standard shall not validate the
1577 	 * Version Number, TLV_type, or Reserved fields in received
1578 	 * LACPDUs. The same actions are taken regardless of the values
1579 	 * received in these fields. A Receive machine may validate
1580 	 * the Actor_Information_Length, Partner_Information_Length,
1581 	 * Collector_Information_Length, or Terminator_Length fields.
1582 	 */
1583 
1584 	lacpdu = mtod(m, struct lacp_du *);
1585 	aggr_rxm_lacpdu(sc, p, lacpdu);
1586 
1587 	m_freem(m);
1588 }
1589 
1590 static void
1591 aggr_update_selected(struct aggr_softc *sc, struct aggr_port *p,
1592     const struct lacp_du *lacpdu)
1593 {
1594 	const struct lacp_port_info *rpi = &lacpdu->lacp_actor_info;
1595 	const struct lacp_port_info *lpi = &p->p_partner;
1596 
1597 	if ((rpi->lacp_portid.lacp_portid_number ==
1598 	     lpi->lacp_portid.lacp_portid_number) &&
1599 	    (rpi->lacp_portid.lacp_portid_priority ==
1600 	     lpi->lacp_portid.lacp_portid_priority) &&
1601 	    ETHER_IS_EQ(rpi->lacp_sysid.lacp_sysid_mac,
1602 	     lpi->lacp_sysid.lacp_sysid_mac) &&
1603 	    (rpi->lacp_sysid.lacp_sysid_priority ==
1604 	     lpi->lacp_sysid.lacp_sysid_priority) &&
1605 	    (rpi->lacp_key == lpi->lacp_key) &&
1606 	    (ISSET(rpi->lacp_state, LACP_STATE_AGGREGATION) ==
1607 	     ISSET(lpi->lacp_state, LACP_STATE_AGGREGATION)))
1608 		return;
1609 
1610 	aggr_unselected(p);
1611 }
1612 
1613 static void
1614 aggr_record_default(struct aggr_softc *sc, struct aggr_port *p)
1615 {
1616 	struct lacp_port_info *pi = &p->p_partner;
1617 
1618 	pi->lacp_sysid.lacp_sysid_priority = htons(0);
1619 	memset(pi->lacp_sysid.lacp_sysid_mac, 0,
1620 	    sizeof(pi->lacp_sysid.lacp_sysid_mac));
1621 
1622 	pi->lacp_key = htons(0);
1623 
1624 	pi->lacp_portid.lacp_portid_priority = htons(0);
1625 	pi->lacp_portid.lacp_portid_number = htons(0);
1626 
1627 	SET(p->p_actor_state, LACP_STATE_DEFAULTED);
1628 
1629 	pi->lacp_state = LACP_STATE_AGGREGATION | LACP_STATE_SYNC;
1630 	if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST)
1631 		SET(pi->lacp_state, LACP_STATE_TIMEOUT);
1632 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE)
1633 		SET(pi->lacp_state, LACP_STATE_ACTIVITY);
1634 
1635 	/* notify Mux */
1636 	aggr_mux(sc, p, LACP_MUX_E_NOT_COLLECTING);
1637 	aggr_mux(sc, p, LACP_MUX_E_SYNC);
1638 }
1639 
1640 static void
1641 aggr_update_default_selected(struct aggr_softc *sc, struct aggr_port *p)
1642 {
1643 	const struct lacp_port_info *pi = &p->p_partner;
1644 
1645 	if ((pi->lacp_portid.lacp_portid_number == htons(0)) &&
1646 	    (pi->lacp_portid.lacp_portid_priority == htons(0)) &&
1647 	    ETHER_IS_ANYADDR(pi->lacp_sysid.lacp_sysid_mac) &&
1648 	    (pi->lacp_sysid.lacp_sysid_priority == htons(0)) &&
1649 	    (pi->lacp_key == htons(0)) &&
1650 	    ISSET(pi->lacp_state, LACP_STATE_AGGREGATION))
1651 		return;
1652 
1653 	aggr_unselected(p);
1654 	aggr_selection_logic(sc, p); /* restart */
1655 }
1656 
1657 static int
1658 aggr_update_ntt(struct aggr_port *p, const struct lacp_du *lacpdu)
1659 {
1660 	struct aggr_softc *sc = p->p_aggr;
1661 	struct arpcom *ac = &sc->sc_ac;
1662 	struct ifnet *ifp = &ac->ac_if;
1663 	struct ifnet *ifp0 = p->p_ifp0;
1664 	const struct lacp_port_info *pi = &lacpdu->lacp_partner_info;
1665 	uint8_t bits = LACP_STATE_ACTIVITY | LACP_STATE_TIMEOUT |
1666 	    LACP_STATE_SYNC | LACP_STATE_AGGREGATION;
1667 	uint8_t state = p->p_actor_state;
1668 	int sync = 0;
1669 
1670 	if (pi->lacp_portid.lacp_portid_number != htons(ifp0->if_index))
1671 		goto ntt;
1672 	if (pi->lacp_portid.lacp_portid_priority !=
1673 	    htons(sc->sc_lacp_port_prio))
1674 		goto ntt;
1675 	if (!ETHER_IS_EQ(pi->lacp_sysid.lacp_sysid_mac, ac->ac_enaddr))
1676 		goto ntt;
1677 	if (pi->lacp_sysid.lacp_sysid_priority !=
1678 	    htons(sc->sc_lacp_prio))
1679 		goto ntt;
1680 	if (pi->lacp_key != htons(ifp->if_index))
1681 		goto ntt;
1682 	if (ISSET(pi->lacp_state, LACP_STATE_SYNC) !=
1683 	    ISSET(state, LACP_STATE_SYNC))
1684 		goto ntt;
1685 	sync = 1;
1686 
1687 	if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST)
1688 		SET(state, LACP_STATE_TIMEOUT);
1689 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE)
1690 		SET(state, LACP_STATE_ACTIVITY);
1691 
1692 	if (ISSET(pi->lacp_state, bits) != ISSET(state, bits))
1693 		goto ntt;
1694 
1695 	return (1);
1696 
1697 ntt:
1698 	aggr_ntt(p);
1699 
1700 	return (sync);
1701 }
1702 
1703 static void
1704 aggr_recordpdu(struct aggr_port *p, const struct lacp_du *lacpdu, int sync)
1705 {
1706 	struct aggr_softc *sc = p->p_aggr;
1707 	const struct lacp_port_info *rpi = &lacpdu->lacp_actor_info;
1708 	struct lacp_port_info *lpi = &p->p_partner;
1709 	int active = ISSET(rpi->lacp_state, LACP_STATE_ACTIVITY) ||
1710 	    (ISSET(p->p_actor_state, LACP_STATE_ACTIVITY) &&
1711 	     ISSET(lacpdu->lacp_partner_info.lacp_state, LACP_STATE_ACTIVITY));
1712 
1713 	lpi->lacp_portid.lacp_portid_number =
1714 	    rpi->lacp_portid.lacp_portid_number;
1715 	lpi->lacp_portid.lacp_portid_priority =
1716 	    rpi->lacp_portid.lacp_portid_priority;
1717 	memcpy(lpi->lacp_sysid.lacp_sysid_mac,
1718 	    rpi->lacp_sysid.lacp_sysid_mac,
1719 	    sizeof(lpi->lacp_sysid.lacp_sysid_mac));
1720 	lpi->lacp_sysid.lacp_sysid_priority =
1721 	    rpi->lacp_sysid.lacp_sysid_priority;
1722 	lpi->lacp_key = rpi->lacp_key;
1723 	lpi->lacp_state = rpi->lacp_state & ~LACP_STATE_SYNC;
1724 
1725 	CLR(p->p_actor_state, LACP_STATE_DEFAULTED);
1726 
1727 	if (active && ISSET(rpi->lacp_state, LACP_STATE_SYNC) && sync) {
1728 		SET(p->p_partner_state, LACP_STATE_SYNC);
1729 		aggr_mux(sc, p, LACP_MUX_E_SYNC);
1730 	} else {
1731 		CLR(p->p_partner_state, LACP_STATE_SYNC);
1732 		aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC);
1733 	}
1734 }
1735 
1736 static void
1737 aggr_marker_response(struct aggr_port *p, struct mbuf *m)
1738 {
1739 	struct aggr_softc *sc = p->p_aggr;
1740 	struct arpcom *ac = &sc->sc_ac;
1741 	struct ifnet *ifp0 = p->p_ifp0;
1742 	struct marker_pdu *mpdu;
1743 	struct ether_header *eh;
1744 
1745 	mpdu = mtod(m, struct marker_pdu *);
1746 	mpdu->marker_info_tlv.lacp_tlv_type = MARKER_T_RESPONSE;
1747 
1748 	m = m_prepend(m, sizeof(*eh), M_DONTWAIT);
1749 	if (m == NULL)
1750 		return;
1751 
1752 	eh = mtod(m, struct ether_header *);
1753 	memcpy(eh->ether_dhost, lacp_address_slow, sizeof(eh->ether_dhost));
1754 	memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(eh->ether_shost));
1755 	eh->ether_type = htons(ETHERTYPE_SLOW);
1756 
1757 	(void)if_enqueue(ifp0, m);
1758 }
1759 
1760 static void
1761 aggr_input_marker(struct aggr_port *p, struct mbuf *m)
1762 {
1763 	struct marker_pdu *mpdu;
1764 
1765 	if (m->m_len < sizeof(*mpdu)) {
1766 		m = m_pullup(m, sizeof(*mpdu));
1767 		if (m == NULL)
1768 			return;
1769 	}
1770 
1771 	mpdu = mtod(m, struct marker_pdu *);
1772 	switch (mpdu->marker_info_tlv.lacp_tlv_type) {
1773 	case MARKER_T_INFORMATION:
1774 		aggr_marker_response(p, m);
1775 		break;
1776 	default:
1777 		m_freem(m);
1778 		break;
1779 	}
1780 }
1781 
1782 static void
1783 aggr_rx(void *arg)
1784 {
1785 	struct aggr_port *p = arg;
1786 	struct mbuf_list ml;
1787 	struct mbuf *m;
1788 
1789 	mq_delist(&p->p_rxm_mq, &ml);
1790 
1791 	while ((m = ml_dequeue(&ml)) != NULL) {
1792 		struct ether_slowproto_hdr *sph;
1793 
1794 		/* aggr_input has checked eh already */
1795 		m_adj(m, sizeof(struct ether_header));
1796 
1797 		sph = mtod(m, struct ether_slowproto_hdr *);
1798 		switch (sph->sph_subtype) {
1799 		case SLOWPROTOCOLS_SUBTYPE_LACP:
1800 			aggr_input_lacpdu(p, m);
1801 			break;
1802 		case SLOWPROTOCOLS_SUBTYPE_LACP_MARKER:
1803 			aggr_input_marker(p, m);
1804 			break;
1805 		default:
1806 			panic("unexpected slow protocol subtype");
1807 			/* NOTREACHED */
1808 		}
1809 	}
1810 }
1811 
1812 static void
1813 aggr_set_selected(struct aggr_port *p, enum aggr_port_selected s,
1814     enum lacp_mux_event ev)
1815 {
1816 	struct aggr_softc *sc = p->p_aggr;
1817 
1818 	if (p->p_selected != s) {
1819 		DPRINTF(sc, "%s %s: Selected %s -> %s\n",
1820 		    sc->sc_if.if_xname, p->p_ifp0->if_xname,
1821 		    aggr_port_selected_names[p->p_selected],
1822 		    aggr_port_selected_names[s]);
1823 		p->p_selected = s;
1824 	}
1825 	aggr_mux(sc, p, ev);
1826 }
1827 
1828 static void
1829 aggr_unselected(struct aggr_port *p)
1830 {
1831 	aggr_set_selected(p, AGGR_PORT_UNSELECTED, LACP_MUX_E_UNSELECTED);
1832 }
1833 
1834 static inline void
1835 aggr_selected(struct aggr_port *p)
1836 {
1837 	aggr_set_selected(p, AGGR_PORT_SELECTED, LACP_MUX_E_SELECTED);
1838 }
1839 
1840 #ifdef notyet
1841 static inline void
1842 aggr_standby(struct aggr_port *p)
1843 {
1844 	aggr_set_selected(p, AGGR_PORT_STANDBY, LACP_MUX_E_STANDBY);
1845 }
1846 #endif
1847 
1848 static void
1849 aggr_selection_logic(struct aggr_softc *sc, struct aggr_port *p)
1850 {
1851 	const struct lacp_port_info *pi;
1852 	struct arpcom *ac = &sc->sc_ac;
1853 	struct ifnet *ifp = &ac->ac_if;
1854 	const uint8_t *mac;
1855 
1856 	if (p->p_rxm_state != LACP_RXM_S_CURRENT) {
1857 		DPRINTF(sc, "%s %s: selection logic: unselected (rxm !%s)\n",
1858 		    ifp->if_xname, p->p_ifp0->if_xname,
1859 		    lacp_rxm_state_names[LACP_RXM_S_CURRENT]);
1860 		goto unselected;
1861 	}
1862 
1863 	pi = &p->p_partner;
1864 	if (pi->lacp_key == htons(0)) {
1865 		DPRINTF(sc, "%s %s: selection logic: unselected "
1866 		    "(partner key == 0)\n",
1867 		    ifp->if_xname, p->p_ifp0->if_xname);
1868 		goto unselected;
1869 	}
1870 
1871 	/*
1872 	 * aggr(4) does not support individual interfaces
1873 	 */
1874 	if (!ISSET(pi->lacp_state, LACP_STATE_AGGREGATION)) {
1875 		DPRINTF(sc, "%s %s: selection logic: unselected "
1876 		    "(partner state is Individual)\n",
1877 		    ifp->if_xname, p->p_ifp0->if_xname);
1878 		goto unselected;
1879 	}
1880 
1881 	/*
1882 	 * Any pair of Aggregation Ports that are members of the same
1883 	 * LAG, but are connected together by the same link, shall not
1884 	 * select the same Aggregator
1885 	 */
1886 
1887 	mac = pi->lacp_sysid.lacp_sysid_mac;
1888 	if (ETHER_IS_EQ(mac, ac->ac_enaddr) &&
1889 	    pi->lacp_key == htons(ifp->if_index)) {
1890 		DPRINTF(sc, "%s %s: selection logic: unselected "
1891 		    "(partner sysid !eq)\n",
1892 		    ifp->if_xname, p->p_ifp0->if_xname);
1893 		goto unselected;
1894 	}
1895 
1896 	if (!TAILQ_EMPTY(&sc->sc_muxen)) {
1897 		/* an aggregation has already been selected */
1898 		if (!ETHER_IS_EQ(mac, sc->sc_partner_system.lacp_sysid_mac) ||
1899 		    sc->sc_partner_key != pi->lacp_key) {
1900 			DPRINTF(sc, "%s %s: selection logic: unselected "
1901 			    "(partner sysid != selection)\n",
1902 			    ifp->if_xname, p->p_ifp0->if_xname);
1903 			goto unselected;
1904 		}
1905 	}
1906 
1907 	aggr_selected(p);
1908 	return;
1909 
1910 unselected:
1911 	aggr_unselected(p);
1912 }
1913 
1914 static void
1915 aggr_mux(struct aggr_softc *sc, struct aggr_port *p, enum lacp_mux_event ev)
1916 {
1917 	int ntt = 0;
1918 
1919 	/*
1920 	 * the mux can move through multiple states based on a
1921 	 * single event, so loop until the event is completely consumed.
1922 	 * debounce NTT = TRUE through the multiple state transitions.
1923 	 */
1924 
1925 	while (aggr_mux_ev(sc, p, ev, &ntt) != 0)
1926 		;
1927 
1928 	if (ntt)
1929 		aggr_ntt(p);
1930 }
1931 
1932 #ifdef notyet
1933 static int
1934 aggr_ready_n(struct aggr_port *p)
1935 {
1936 	return (p->p_mux_state == LACP_MUX_S_WAITING &&
1937 	    !timeout_pending(&p->p_wait_while_timer));
1938 }
1939 #endif
1940 
1941 static inline int
1942 aggr_ready(struct aggr_softc *sc)
1943 {
1944 	return (1);
1945 }
1946 
1947 static void
1948 aggr_disable_distributing(struct aggr_softc *sc, struct aggr_port *p)
1949 {
1950 	if (!p->p_distributing)
1951 		return;
1952 
1953 	sc->sc_ndistributing--;
1954 	TAILQ_REMOVE(&sc->sc_distributing, p, p_entry_distributing);
1955 	p->p_distributing = 0;
1956 
1957 	aggr_map(sc);
1958 
1959 	DPRINTF(sc, "%s %s: distributing disabled\n",
1960 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
1961 }
1962 
1963 static void
1964 aggr_enable_distributing(struct aggr_softc *sc, struct aggr_port *p)
1965 {
1966 	if (p->p_distributing)
1967 		return;
1968 
1969 	/* check the LAG ID? */
1970 
1971 	p->p_distributing = 1;
1972 	TAILQ_INSERT_TAIL(&sc->sc_distributing, p, p_entry_distributing);
1973 	sc->sc_ndistributing++;
1974 
1975 	aggr_map(sc);
1976 
1977 	DPRINTF(sc, "%s %s: distributing enabled\n",
1978 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
1979 }
1980 
1981 static void
1982 aggr_disable_collecting(struct aggr_softc *sc, struct aggr_port *p)
1983 {
1984 	if (!p->p_collecting)
1985 		return;
1986 
1987 	p->p_collecting = 0;
1988 
1989 	DPRINTF(sc, "%s %s: collecting disabled\n",
1990 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
1991 }
1992 
1993 static void
1994 aggr_enable_collecting(struct aggr_softc *sc, struct aggr_port *p)
1995 {
1996 	if (p->p_collecting)
1997 		return;
1998 
1999 	p->p_collecting = 1;
2000 
2001 	DPRINTF(sc, "%s %s: collecting enabled\n",
2002 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
2003 }
2004 
2005 static void
2006 aggr_attach_mux(struct aggr_softc *sc, struct aggr_port *p)
2007 {
2008 	const struct lacp_port_info *pi = &p->p_partner;
2009 
2010 	if (p->p_muxed)
2011 		return;
2012 
2013 	p->p_muxed = 1;
2014 	if (TAILQ_EMPTY(&sc->sc_muxen)) {
2015 		KASSERT(sc->sc_partner_key == htons(0));
2016 		sc->sc_partner_system = pi->lacp_sysid;
2017 		sc->sc_partner_key = pi->lacp_key;
2018 	}
2019 
2020 	TAILQ_INSERT_TAIL(&sc->sc_muxen, p, p_entry_muxen);
2021 
2022 	DPRINTF(sc, "%s %s: mux attached\n",
2023 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
2024 }
2025 
2026 static void
2027 aggr_detach_mux(struct aggr_softc *sc, struct aggr_port *p)
2028 {
2029 	if (!p->p_muxed)
2030 		return;
2031 
2032 	p->p_muxed = 0;
2033 
2034 	TAILQ_REMOVE(&sc->sc_muxen, p, p_entry_muxen);
2035 	if (TAILQ_EMPTY(&sc->sc_muxen)) {
2036 		memset(&sc->sc_partner_system.lacp_sysid_mac, 0,
2037 		    sizeof(sc->sc_partner_system.lacp_sysid_mac));
2038 		sc->sc_partner_system.lacp_sysid_priority = htons(0);
2039 		sc->sc_partner_key = htons(0);
2040 	}
2041 
2042 	DPRINTF(sc, "%s %s: mux detached\n",
2043 	    sc->sc_if.if_xname, p->p_ifp0->if_xname);
2044 }
2045 
2046 static int
2047 aggr_mux_ev(struct aggr_softc *sc, struct aggr_port *p, enum lacp_mux_event ev,
2048     int *ntt)
2049 {
2050 	enum lacp_mux_state nstate = LACP_MUX_S_DETACHED;
2051 
2052 	switch (p->p_mux_state) {
2053 	case LACP_MUX_S_BEGIN:
2054 		KASSERT(ev == LACP_MUX_E_BEGIN);
2055 		nstate = LACP_MUX_S_DETACHED;
2056 		break;
2057 	case LACP_MUX_S_DETACHED:
2058 		switch (ev) {
2059 		case LACP_MUX_E_SELECTED:
2060 		case LACP_MUX_E_STANDBY:
2061 			nstate = LACP_MUX_S_WAITING;
2062 			break;
2063 		default:
2064 			return (0);
2065 		}
2066 		break;
2067 	case LACP_MUX_S_WAITING:
2068 		switch (ev) {
2069 		case LACP_MUX_E_UNSELECTED:
2070 			nstate = LACP_MUX_S_DETACHED;
2071 			break;
2072 		case LACP_MUX_E_SELECTED:
2073 		case LACP_MUX_E_READY:
2074 			if (aggr_ready(sc) &&
2075 			    p->p_selected == AGGR_PORT_SELECTED) {
2076 				nstate = LACP_MUX_S_ATTACHED;
2077 				break;
2078 			}
2079 			/* FALLTHROUGH */
2080 		default:
2081 			return (0);
2082 		}
2083 		break;
2084 	case LACP_MUX_S_ATTACHED:
2085 		switch (ev) {
2086 		case LACP_MUX_E_UNSELECTED:
2087 		case LACP_MUX_E_STANDBY:
2088 			nstate = LACP_MUX_S_DETACHED;
2089 			break;
2090 		case LACP_MUX_E_SELECTED:
2091 		case LACP_MUX_E_SYNC:
2092 			if (p->p_selected == AGGR_PORT_SELECTED &&
2093 			    ISSET(p->p_partner_state, LACP_STATE_SYNC)) {
2094 				nstate = LACP_MUX_S_COLLECTING;
2095 				break;
2096 			}
2097 			/* FALLTHROUGH */
2098 		default:
2099 			return (0);
2100 		}
2101 		break;
2102 	case LACP_MUX_S_COLLECTING:
2103 		switch (ev) {
2104 		case LACP_MUX_E_UNSELECTED:
2105 		case LACP_MUX_E_STANDBY:
2106 		case LACP_MUX_E_NOT_SYNC:
2107 			nstate = LACP_MUX_S_ATTACHED;
2108 			break;
2109 		case LACP_MUX_E_SELECTED:
2110 		case LACP_MUX_E_SYNC:
2111 		case LACP_MUX_E_COLLECTING:
2112 			if (p->p_selected == AGGR_PORT_SELECTED &&
2113 			    ISSET(p->p_partner_state, LACP_STATE_SYNC) &&
2114 			    ISSET(p->p_partner_state, LACP_STATE_COLLECTING)) {
2115 				nstate = LACP_MUX_S_DISTRIBUTING;
2116 				break;
2117 			}
2118 			/* FALLTHROUGH */
2119 		default:
2120 			return (0);
2121 		}
2122 		break;
2123 	case LACP_MUX_S_DISTRIBUTING:
2124 		switch (ev) {
2125 		case LACP_MUX_E_UNSELECTED:
2126 		case LACP_MUX_E_STANDBY:
2127 		case LACP_MUX_E_NOT_SYNC:
2128 		case LACP_MUX_E_NOT_COLLECTING:
2129 			nstate = LACP_MUX_S_COLLECTING;
2130 			break;
2131 		default:
2132 			return (0);
2133 		}
2134 		break;
2135 	}
2136 
2137 	DPRINTF(sc, "%s %s mux: %s (%s) -> %s\n",
2138 	    sc->sc_if.if_xname, p->p_ifp0->if_xname,
2139 	    lacp_mux_state_names[p->p_mux_state], lacp_mux_event_names[ev],
2140 	    lacp_mux_state_names[nstate]);
2141 
2142 	/* act on the new state */
2143 	switch (nstate) {
2144 	case LACP_MUX_S_BEGIN:
2145 		panic("unexpected mux nstate BEGIN");
2146 		/* NOTREACHED */
2147 	case LACP_MUX_S_DETACHED:
2148 		/*
2149 		 * Detach_Mux_From_Aggregator();
2150 		 * Actor.Sync = FALSE;
2151 		 * Disable_Distributing();
2152 		 * Actor.Distributing = FALSE;
2153 		 * Actor.Collecting = FALSE;
2154 		 * Disable_Collecting();
2155 		 * NTT = TRUE;
2156 		 */
2157 		aggr_detach_mux(sc, p);
2158 		CLR(p->p_actor_state, LACP_STATE_SYNC);
2159 		aggr_disable_distributing(sc, p);
2160 		CLR(p->p_actor_state, LACP_STATE_DISTRIBUTING);
2161 		CLR(p->p_actor_state, LACP_STATE_COLLECTING);
2162 		aggr_disable_collecting(sc, p);
2163 		*ntt = 1;
2164 		break;
2165 	case LACP_MUX_S_WAITING:
2166 		/*
2167 		 * Start wait_while_timer
2168 		 */
2169 		timeout_add_sec(&p->p_wait_while_timer,
2170 		    LACP_AGGREGATION_WAIT_TIME);
2171 		break;
2172 	case LACP_MUX_S_ATTACHED:
2173 		/*
2174 		 * Attach_Mux_To_Aggregator();
2175 		 * Actor.Sync = TRUE;
2176 		 * Actor.Collecting = FALSE;
2177 		 * Disable_Collecting();
2178 		 * NTT = TRUE;
2179 		 */
2180 		aggr_attach_mux(sc, p);
2181 		SET(p->p_actor_state, LACP_STATE_SYNC);
2182 		CLR(p->p_actor_state, LACP_STATE_COLLECTING);
2183 		aggr_disable_collecting(sc, p);
2184 		*ntt = 1;
2185 		break;
2186 
2187 	case LACP_MUX_S_COLLECTING:
2188 		/*
2189 		 * Enable_Collecting();
2190 		 * Actor.Collecting = TRUE;
2191 		 * Disable_Distributing();
2192 		 * Actor.Distributing = FALSE;
2193 		 * NTT = TRUE;
2194 		 */
2195 		aggr_enable_collecting(sc, p);
2196 		SET(p->p_actor_state, LACP_STATE_COLLECTING);
2197 		aggr_disable_distributing(sc, p);
2198 		CLR(p->p_actor_state, LACP_STATE_DISTRIBUTING);
2199 		*ntt = 1;
2200 		break;
2201 	case LACP_MUX_S_DISTRIBUTING:
2202 		/*
2203 		 * Actor.Distributing = TRUE;
2204 		 * Enable_Distributing();
2205 		 */
2206 		SET(p->p_actor_state, LACP_STATE_DISTRIBUTING);
2207 		aggr_enable_distributing(sc, p);
2208 		break;
2209 	}
2210 
2211 	p->p_mux_state = nstate;
2212 
2213 	return (1);
2214 }
2215 
2216 static void
2217 aggr_rxm_ev(struct aggr_softc *sc, struct aggr_port *p,
2218     enum lacp_rxm_event ev, const struct lacp_du *lacpdu)
2219 {
2220 	unsigned int port_disabled = 0;
2221 	enum lacp_rxm_state nstate = LACP_RXM_S_BEGIN;
2222 
2223 	KASSERT((ev == LACP_RXM_E_LACPDU) == (lacpdu != NULL));
2224 
2225 	/* global transitions */
2226 
2227 	switch (ev) {
2228 	case LACP_RXM_E_NOT_PORT_ENABLED:
2229 		port_disabled = !aggr_port_moved(sc, p);
2230 		break;
2231 	case LACP_RXM_E_NOT_PORT_MOVED:
2232 		port_disabled = !aggr_port_enabled(p);
2233 		break;
2234 	default:
2235 		break;
2236 	}
2237 
2238 	if (port_disabled)
2239 		nstate = LACP_RXM_S_PORT_DISABLED;
2240 	else switch (p->p_rxm_state) { /* local state transitions */
2241 	case LACP_RXM_S_BEGIN:
2242 		KASSERT(ev == LACP_RXM_E_BEGIN);
2243 		nstate = LACP_RXM_S_INITIALIZE;
2244 		break;
2245 	case LACP_RXM_S_INITIALIZE:
2246 		/* this should only be handled via UCT in nstate handling */
2247 		panic("unexpected rxm state INITIALIZE");
2248 
2249 	case LACP_RXM_S_PORT_DISABLED:
2250 		switch (ev) {
2251 		case LACP_RXM_E_PORT_MOVED:
2252 			nstate = LACP_RXM_S_INITIALIZE;
2253 			break;
2254 		case LACP_RXM_E_PORT_ENABLED:
2255 			nstate = aggr_lacp_enabled(sc) ?
2256 			    LACP_RXM_S_EXPIRED : LACP_RXM_S_LACP_DISABLED;
2257 			break;
2258 		case LACP_RXM_E_LACP_ENABLED:
2259 			if (!aggr_port_enabled(p))
2260 				return;
2261 			nstate = LACP_RXM_S_EXPIRED;
2262 			break;
2263 		case LACP_RXM_E_NOT_LACP_ENABLED:
2264 			if (!aggr_port_enabled(p))
2265 				return;
2266 			nstate = LACP_RXM_S_LACP_DISABLED;
2267 			break;
2268 		default:
2269 			return;
2270 		}
2271 		break;
2272 	case LACP_RXM_S_EXPIRED:
2273 		switch (ev) {
2274 		case LACP_RXM_E_LACPDU:
2275 			nstate = LACP_RXM_S_CURRENT;
2276 			break;
2277 		case LACP_RXM_E_TIMER_EXPIRED:
2278 			nstate = LACP_RXM_S_DEFAULTED;
2279 			break;
2280 		default:
2281 			return;
2282 		}
2283 		break;
2284 	case LACP_RXM_S_LACP_DISABLED:
2285 		switch (ev) {
2286 		case LACP_RXM_E_LACP_ENABLED:
2287 			nstate = LACP_RXM_S_PORT_DISABLED;
2288 			break;
2289 		default:
2290 			return;
2291 		}
2292 		break;
2293 	case LACP_RXM_S_DEFAULTED:
2294 		switch (ev) {
2295 		case LACP_RXM_E_LACPDU:
2296 			nstate = LACP_RXM_S_CURRENT;
2297 			break;
2298 		default:
2299 			return;
2300 		}
2301 		break;
2302 	case LACP_RXM_S_CURRENT:
2303 		switch (ev) {
2304 		case LACP_RXM_E_TIMER_EXPIRED:
2305 			nstate = LACP_RXM_S_EXPIRED;
2306 			break;
2307 		case LACP_RXM_E_LACPDU:
2308 			nstate = LACP_RXM_S_CURRENT;
2309 			break;
2310 		default:
2311 			return;
2312 		}
2313 		break;
2314 	}
2315 
2316 uct:
2317 	if (p->p_rxm_state != nstate) {
2318 		DPRINTF(sc, "%s %s rxm: %s (%s) -> %s\n",
2319 		    sc->sc_if.if_xname, p->p_ifp0->if_xname,
2320 		    lacp_rxm_state_names[p->p_rxm_state],
2321 		    lacp_rxm_event_names[ev],
2322 		    lacp_rxm_state_names[nstate]);
2323 	}
2324 
2325 	/* record the new state */
2326 	p->p_rxm_state = nstate;
2327 
2328 	/* act on the new state */
2329 	switch (nstate) {
2330 	case LACP_RXM_S_BEGIN:
2331 		panic("unexpected rxm nstate BEGIN");
2332 		/* NOTREACHED */
2333 	case LACP_RXM_S_INITIALIZE:
2334 		/*
2335 		 * Selected = UNSELECTED;
2336 		 * recordDefault();
2337 		 * Actor_Oper_Port_State.Expired = FALSE;
2338 		 * port_moved = FALSE;
2339 		 */
2340 		aggr_unselected(p);
2341 		aggr_record_default(sc, p);
2342 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2343 
2344 		ev = LACP_RXM_E_UCT;
2345 		nstate = LACP_RXM_S_PORT_DISABLED;
2346 		goto uct;
2347 		/* NOTREACHED */
2348 	case LACP_RXM_S_PORT_DISABLED:
2349 		/*
2350 		 * Partner_Oper_Port_State.Synchronization = FALSE;
2351 		 */
2352 		CLR(p->p_partner_state, LACP_STATE_SYNC);
2353 		aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC);
2354 		break;
2355 	case LACP_RXM_S_EXPIRED:
2356 		/*
2357 		 * Partner_Oper_Port_State.Synchronization = FALSE;
2358 		 * Partner_Oper_Port_State.LACP_Timeout = Short Timeout;
2359 		 * start current_while_timer(Short Timeout);
2360 		 * Actor_Oper_Port_State.Expired = TRUE;
2361 		 */
2362 
2363 		CLR(p->p_partner_state, LACP_STATE_SYNC);
2364 		aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC);
2365 		aggr_set_partner_timeout(p, AGGR_LACP_TIMEOUT_FAST);
2366 		aggr_start_current_while_timer(p, AGGR_LACP_TIMEOUT_FAST);
2367 		SET(p->p_actor_state, LACP_STATE_EXPIRED);
2368 
2369 		break;
2370 	case LACP_RXM_S_LACP_DISABLED:
2371 		/*
2372 		 * Selected = UNSELECTED;
2373 		 * recordDefault();
2374 		 * Partner_Oper_Port_State.Aggregation = FALSE;
2375 		 * Actor_Oper_Port_State.Expired = FALSE;
2376 		 */
2377 		aggr_unselected(p);
2378 		aggr_record_default(sc, p);
2379 		CLR(p->p_partner_state, LACP_STATE_AGGREGATION);
2380 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2381 		break;
2382 	case LACP_RXM_S_DEFAULTED:
2383 		/*
2384 		 * update_Default_Selected();
2385 		 * recordDefault();
2386 		 * Actor_Oper_Port_State.Expired = FALSE;
2387 		 */
2388 		aggr_update_default_selected(sc, p);
2389 		aggr_record_default(sc, p);
2390 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2391 		break;
2392 	case LACP_RXM_S_CURRENT: {
2393 		/*
2394 		 * update_Selected();
2395 		 * update_NTT();
2396 		 * if (Actor_System_LACP_Version >=2 ) recordVersionNumber();
2397 		 * recordPDU();
2398 		 * start current_while_timer(
2399 		 *     Actor_Oper_Port_State.LACP_Timeout);
2400 		 * Actor_Oper_Port_State.Expired = FALSE;
2401 		 */
2402 		int sync;
2403 
2404 		aggr_update_selected(sc, p, lacpdu);
2405 		sync = aggr_update_ntt(p, lacpdu);
2406 		/* don't support v2 yet */
2407 		aggr_recordpdu(p, lacpdu, sync);
2408 		aggr_start_current_while_timer(p, sc->sc_lacp_timeout);
2409 		CLR(p->p_actor_state, LACP_STATE_EXPIRED);
2410 
2411 		if (p->p_selected == AGGR_PORT_UNSELECTED)
2412 			aggr_selection_logic(sc, p); /* restart */
2413 
2414 		}
2415 		break;
2416 	}
2417 }
2418 
2419 static int
2420 aggr_up(struct aggr_softc *sc)
2421 {
2422 	struct ifnet *ifp = &sc->sc_if;
2423 	struct aggr_port *p;
2424 
2425 	NET_ASSERT_LOCKED();
2426 	KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING));
2427 
2428 	SET(ifp->if_flags, IFF_RUNNING); /* LACP_Enabled = TRUE */
2429 
2430 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2431 		aggr_rxm(sc, p, LACP_RXM_E_LACP_ENABLED);
2432 		aggr_p_linkch(p);
2433 	}
2434 
2435 	/* start the Periodic Transmission machine */
2436 	if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE) {
2437 		TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2438 			if (!aggr_port_enabled(p))
2439 				continue;
2440 
2441 			timeout_add_sec(&p->p_ptm_tx,
2442 			    aggr_periodic_times[sc->sc_lacp_timeout]);
2443 		}
2444 	}
2445 
2446 	return (ENETRESET);
2447 }
2448 
2449 static int
2450 aggr_iff(struct aggr_softc *sc)
2451 {
2452 	struct ifnet *ifp = &sc->sc_if;
2453 	unsigned int promisc = ISSET(ifp->if_flags, IFF_PROMISC);
2454 
2455 	NET_ASSERT_LOCKED();
2456 
2457 	if (promisc != sc->sc_promisc) {
2458 		struct aggr_port *p;
2459 
2460 		rw_enter_read(&sc->sc_lock);
2461 		TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2462 			struct ifnet *ifp0 = p->p_ifp0;
2463 			if (ifpromisc(ifp0, promisc) != 0) {
2464 				log(LOG_WARNING, "%s iff %s: "
2465 				    "unable to turn promisc %s\n",
2466 				    ifp->if_xname, ifp0->if_xname,
2467 				    promisc ? "on" : "off");
2468 			}
2469 		}
2470 		rw_exit_read(&sc->sc_lock);
2471 
2472 		sc->sc_promisc = promisc;
2473 	}
2474 
2475 	return (0);
2476 }
2477 
2478 static int
2479 aggr_down(struct aggr_softc *sc)
2480 {
2481 	struct ifnet *ifp = &sc->sc_if;
2482 	struct aggr_port *p;
2483 
2484 	NET_ASSERT_LOCKED();
2485 	CLR(ifp->if_flags, IFF_RUNNING); /* LACP_Enabled = FALSE */
2486 
2487 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2488 		aggr_rxm(sc, p, LACP_RXM_E_NOT_LACP_ENABLED);
2489 
2490 		/* stop the Periodic Transmission machine */
2491 		timeout_del(&p->p_ptm_tx);
2492 
2493 		/* stop the Mux machine */
2494 		aggr_mux(sc, p, LACP_MUX_E_UNSELECTED);
2495 
2496 		/* stop the Transmit machine */
2497 		timeout_del(&p->p_txm_ntt);
2498 	}
2499 
2500 	KASSERT(TAILQ_EMPTY(&sc->sc_distributing));
2501 	KASSERT(sc->sc_ndistributing == 0);
2502 	KASSERT(SMR_PTR_GET_LOCKED(&sc->sc_map) == NULL);
2503 
2504 	return (ENETRESET);
2505 }
2506 
2507 static int
2508 aggr_set_lladdr(struct aggr_softc *sc, const struct ifreq *ifr)
2509 {
2510 	struct ifnet *ifp = &sc->sc_if;
2511 	struct aggr_port *p;
2512 	const uint8_t *lladdr = ifr->ifr_addr.sa_data;
2513 
2514 	rw_enter_read(&sc->sc_lock);
2515 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2516 		if (aggr_p_setlladdr(p, lladdr) != 0) {
2517 			struct ifnet *ifp0 = p->p_ifp0;
2518 			log(LOG_WARNING, "%s setlladdr %s: "
2519 			    "unable to set lladdr\n",
2520 			    ifp->if_xname, ifp0->if_xname);
2521 		}
2522 	}
2523 	rw_exit_read(&sc->sc_lock);
2524 
2525 	return (0);
2526 }
2527 
2528 static int
2529 aggr_set_mtu(struct aggr_softc *sc, uint32_t mtu)
2530 {
2531 	struct ifnet *ifp = &sc->sc_if;
2532 	struct aggr_port *p;
2533 
2534 	if (mtu < ETHERMIN || mtu > ifp->if_hardmtu)
2535 		return (EINVAL);
2536 
2537 	ifp->if_mtu = mtu;
2538 
2539 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2540 		if (aggr_p_set_mtu(p, mtu) != 0) {
2541 			struct ifnet *ifp0 = p->p_ifp0;
2542 			log(LOG_WARNING, "%s %s: unable to set mtu %u\n",
2543 			    ifp->if_xname, ifp0->if_xname, mtu);
2544 		}
2545 	}
2546 
2547 	return (0);
2548 }
2549 
2550 static int
2551 aggr_group(struct aggr_softc *sc, struct aggr_port *p, u_long cmd)
2552 {
2553 	struct ifnet *ifp0 = p->p_ifp0;
2554 	struct ifreq ifr;
2555 	struct sockaddr *sa;
2556 
2557 	memset(&ifr, 0, sizeof(ifr));
2558 
2559 	/* make it convincing */
2560 	CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname));
2561 	memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name));
2562 
2563 	sa = &ifr.ifr_addr;
2564 	CTASSERT(sizeof(sa->sa_data) >= sizeof(lacp_address_slow));
2565 
2566 	sa->sa_family = AF_UNSPEC;
2567 	memcpy(sa->sa_data, lacp_address_slow, sizeof(lacp_address_slow));
2568 
2569 	return ((*p->p_ioctl)(ifp0, cmd, (caddr_t)&ifr));
2570 }
2571 
2572 static int
2573 aggr_multi(struct aggr_softc *sc, struct aggr_port *p,
2574     const struct aggr_multiaddr *ma, u_long cmd)
2575 {
2576 	struct ifnet *ifp0 = p->p_ifp0;
2577 	struct {
2578 		char			if_name[IFNAMSIZ];
2579 		struct sockaddr_storage if_addr;
2580 	} ifr;
2581 
2582 	memset(&ifr, 0, sizeof(ifr));
2583 
2584 	/* make it convincing */
2585 	CTASSERT(sizeof(ifr.if_name) == sizeof(ifp0->if_xname));
2586 	memcpy(ifr.if_name, ifp0->if_xname, sizeof(ifr.if_name));
2587 
2588 	ifr.if_addr = ma->m_addr;
2589 
2590 	return ((*p->p_ioctl)(ifp0, cmd, (caddr_t)&ifr));
2591 }
2592 
2593 static void
2594 aggr_media_status(struct ifnet *ifp, struct ifmediareq *imr)
2595 {
2596 	struct aggr_softc *sc = ifp->if_softc;
2597 
2598 	imr->ifm_status = IFM_AVALID;
2599 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
2600 
2601 	smr_read_enter(); /* there's no reason to block... */
2602 	if (SMR_PTR_GET(&sc->sc_map) != NULL)
2603 		imr->ifm_status |= IFM_ACTIVE;
2604 	smr_read_leave();
2605 }
2606 
2607 static int
2608 aggr_media_change(struct ifnet *ifp)
2609 {
2610 	return (EOPNOTSUPP);
2611 }
2612 
2613 static void
2614 aggr_update_capabilities(struct aggr_softc *sc)
2615 {
2616 	struct aggr_port *p;
2617 	uint32_t hardmtu = ETHER_MAX_HARDMTU_LEN;
2618 	uint32_t capabilities = ~0;
2619 	int set = 0;
2620 
2621 	/* Do not inherit LRO capabilities. */
2622 	CLR(capabilities, IFCAP_LRO);
2623 
2624 	rw_enter_read(&sc->sc_lock);
2625 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2626 		struct ifnet *ifp0 = p->p_ifp0;
2627 
2628 		set = 1;
2629 		capabilities &= ifp0->if_capabilities;
2630 		if (ifp0->if_hardmtu < hardmtu)
2631 			hardmtu = ifp0->if_hardmtu;
2632 	}
2633 	rw_exit_read(&sc->sc_lock);
2634 
2635 	sc->sc_if.if_hardmtu = hardmtu;
2636 	sc->sc_if.if_capabilities = (set ? capabilities : 0);
2637 }
2638 
2639 static void
2640 aggr_ptm_tx(void *arg)
2641 {
2642 	struct aggr_port *p = arg;
2643 	unsigned int timeout;
2644 
2645 	aggr_ntt(p);
2646 
2647 	timeout = ISSET(p->p_partner_state, LACP_STATE_TIMEOUT) ?
2648 	    AGGR_LACP_TIMEOUT_FAST : AGGR_LACP_TIMEOUT_SLOW;
2649 	timeout_add_sec(&p->p_ptm_tx, aggr_periodic_times[timeout]);
2650 }
2651 
2652 static inline void
2653 aggr_lacp_tlv_set(struct lacp_tlv_hdr *tlv, uint8_t type, uint8_t len)
2654 {
2655 	tlv->lacp_tlv_type = type;
2656 	tlv->lacp_tlv_length = sizeof(*tlv) + len;
2657 }
2658 
2659 static void
2660 aggr_ntt_transmit(struct aggr_port *p)
2661 {
2662 	struct aggr_softc *sc = p->p_aggr;
2663 	struct arpcom *ac = &sc->sc_ac;
2664 	struct ifnet *ifp = &sc->sc_if;
2665 	struct ifnet *ifp0 = p->p_ifp0;
2666 	struct mbuf *m;
2667 	struct lacp_du *lacpdu;
2668 	struct lacp_port_info *pi;
2669 	struct lacp_collector_info *ci;
2670 	struct ether_header *eh;
2671 	int linkhdr = max_linkhdr + ETHER_ALIGN;
2672 	int len = linkhdr + sizeof(*eh) + sizeof(*lacpdu);
2673 
2674 	m = m_gethdr(M_DONTWAIT, MT_DATA);
2675 	if (m == NULL)
2676 		return;
2677 
2678 	if (len > MHLEN) {
2679 		MCLGETL(m, M_DONTWAIT, len);
2680 		if (!ISSET(m->m_flags, M_EXT)) {
2681 			m_freem(m);
2682 			return;
2683 		}
2684 	}
2685 
2686 	m->m_pkthdr.pf.prio = sc->sc_if.if_llprio;
2687 	m->m_pkthdr.len = m->m_len = len;
2688 	memset(m->m_data, 0, m->m_len);
2689 	m_adj(m, linkhdr);
2690 
2691 	eh = mtod(m, struct ether_header *);
2692 
2693 	CTASSERT(sizeof(eh->ether_dhost) == sizeof(lacp_address_slow));
2694 	CTASSERT(sizeof(eh->ether_shost) == sizeof(ac->ac_enaddr));
2695 
2696 	memcpy(eh->ether_dhost, lacp_address_slow, sizeof(eh->ether_dhost));
2697 	memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(eh->ether_shost));
2698 	eh->ether_type = htons(ETHERTYPE_SLOW);
2699 
2700 	lacpdu = (struct lacp_du *)(eh + 1);
2701 	lacpdu->lacp_du_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP;
2702 	lacpdu->lacp_du_sph.sph_version = LACP_VERSION;
2703 
2704 	pi = &lacpdu->lacp_actor_info;
2705 	aggr_lacp_tlv_set(&lacpdu->lacp_actor_info_tlv,
2706 	    LACP_T_ACTOR, sizeof(*pi));
2707 
2708 	pi->lacp_sysid.lacp_sysid_priority = htons(sc->sc_lacp_prio);
2709 	CTASSERT(sizeof(pi->lacp_sysid.lacp_sysid_mac) ==
2710 	    sizeof(ac->ac_enaddr));
2711 	memcpy(pi->lacp_sysid.lacp_sysid_mac, ac->ac_enaddr,
2712 	    sizeof(pi->lacp_sysid.lacp_sysid_mac));
2713 
2714 	pi->lacp_key = htons(ifp->if_index);
2715 
2716 	pi->lacp_portid.lacp_portid_priority = htons(sc->sc_lacp_port_prio);
2717 	pi->lacp_portid.lacp_portid_number = htons(ifp0->if_index);
2718 
2719 	pi->lacp_state = p->p_actor_state;
2720 	if (sc->sc_lacp_mode)
2721 		SET(pi->lacp_state, LACP_STATE_ACTIVITY);
2722 	if (sc->sc_lacp_timeout)
2723 		SET(pi->lacp_state, LACP_STATE_TIMEOUT);
2724 
2725 	pi = &lacpdu->lacp_partner_info;
2726 	aggr_lacp_tlv_set(&lacpdu->lacp_partner_info_tlv,
2727 	    LACP_T_PARTNER, sizeof(*pi));
2728 
2729 	*pi = p->p_partner;
2730 
2731 	ci = &lacpdu->lacp_collector_info;
2732 	aggr_lacp_tlv_set(&lacpdu->lacp_collector_info_tlv,
2733 	    LACP_T_COLLECTOR, sizeof(*ci));
2734 	ci->lacp_maxdelay = htons(0);
2735 
2736 	lacpdu->lacp_terminator.lacp_tlv_type = LACP_T_TERMINATOR;
2737 	lacpdu->lacp_terminator.lacp_tlv_length = 0;
2738 
2739 	(void)if_enqueue(ifp0, m);
2740 }
2741 
2742 static void
2743 aggr_ntt(struct aggr_port *p)
2744 {
2745 	if (!timeout_pending(&p->p_txm_ntt))
2746 		timeout_add(&p->p_txm_ntt, 0);
2747 }
2748 
2749 static void
2750 aggr_transmit_machine(void *arg)
2751 {
2752 	struct aggr_port *p = arg;
2753 	struct aggr_softc *sc = p->p_aggr;
2754 	unsigned int slot;
2755 	int *log;
2756 	int period = hz * LACP_FAST_PERIODIC_TIME;
2757 	int diff;
2758 
2759 	if (!aggr_lacp_enabled(sc) || !aggr_port_enabled(p))
2760 		return;
2761 
2762 	slot = p->p_txm_slot;
2763 	log = &p->p_txm_log[slot % nitems(p->p_txm_log)];
2764 
2765 	diff = ticks - *log;
2766 	if (diff < period) {
2767 		timeout_add(&p->p_txm_ntt, period - diff);
2768 		return;
2769 	}
2770 
2771 	*log = ticks;
2772 	p->p_txm_slot = ++slot;
2773 
2774 #if 0
2775 	DPRINTF(sc, "%s %s ntt\n", sc->sc_if.if_xname, p->p_ifp0->if_xname);
2776 #endif
2777 
2778 	aggr_ntt_transmit(p);
2779 }
2780 
2781 static void
2782 aggr_set_lacp_mode(struct aggr_softc *sc, int mode)
2783 {
2784 	sc->sc_lacp_mode = mode;
2785 
2786 	if (mode == AGGR_LACP_MODE_PASSIVE) {
2787 		struct aggr_port *p;
2788 
2789 		TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2790 			if (!ISSET(p->p_partner_state, LACP_STATE_ACTIVITY))
2791 				timeout_del(&p->p_ptm_tx);
2792 		}
2793 	}
2794 }
2795 
2796 static void
2797 aggr_set_partner_timeout(struct aggr_port *p, int timeout)
2798 {
2799 	uint8_t ostate = ISSET(p->p_partner_state, LACP_STATE_TIMEOUT);
2800 	uint8_t nstate = (timeout == AGGR_LACP_TIMEOUT_FAST) ?
2801 	    LACP_STATE_TIMEOUT : 0;
2802 
2803 	if (ostate == nstate)
2804 		return;
2805 
2806 	if (timeout == AGGR_LACP_TIMEOUT_FAST) {
2807 		SET(p->p_partner_state, LACP_STATE_TIMEOUT);
2808 		timeout_add_sec(&p->p_ptm_tx,
2809 		    aggr_periodic_times[AGGR_LACP_TIMEOUT_FAST]);
2810 	} else
2811 		CLR(p->p_partner_state, LACP_STATE_TIMEOUT);
2812 }
2813 
2814 static void
2815 aggr_set_lacp_timeout(struct aggr_softc *sc, int timeout)
2816 {
2817 	struct aggr_port *p;
2818 
2819 	sc->sc_lacp_timeout = timeout;
2820 
2821 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2822 		if (!ISSET(p->p_actor_state, LACP_STATE_DEFAULTED))
2823 			continue;
2824 
2825 		aggr_set_partner_timeout(p, timeout);
2826 	}
2827 }
2828 
2829 static int
2830 aggr_multi_eq(const struct aggr_multiaddr *ma,
2831     const uint8_t *addrlo, const uint8_t *addrhi)
2832 {
2833 	return (ETHER_IS_EQ(ma->m_addrlo, addrlo) &&
2834 	    ETHER_IS_EQ(ma->m_addrhi, addrhi));
2835 }
2836 
2837 static int
2838 aggr_multi_add(struct aggr_softc *sc, struct ifreq *ifr)
2839 {
2840 	struct ifnet *ifp = &sc->sc_if;
2841 	struct aggr_port *p;
2842 	struct aggr_multiaddr *ma;
2843 	uint8_t addrlo[ETHER_ADDR_LEN];
2844 	uint8_t addrhi[ETHER_ADDR_LEN];
2845 	int error;
2846 
2847 	error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2848 	if (error != 0)
2849 		return (error);
2850 
2851 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
2852 		if (aggr_multi_eq(ma, addrlo, addrhi)) {
2853 			ma->m_refs++;
2854 			return (0);
2855 		}
2856 	}
2857 
2858 	ma = malloc(sizeof(*ma), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
2859 	if (ma == NULL)
2860 		return (ENOMEM);
2861 
2862 	ma->m_refs = 1;
2863 	memcpy(&ma->m_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len);
2864 	memcpy(ma->m_addrlo, addrlo, sizeof(ma->m_addrlo));
2865 	memcpy(ma->m_addrhi, addrhi, sizeof(ma->m_addrhi));
2866 	TAILQ_INSERT_TAIL(&sc->sc_multiaddrs, ma, m_entry);
2867 
2868 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2869 		struct ifnet *ifp0 = p->p_ifp0;
2870 
2871 		if (aggr_multi(sc, p, ma, SIOCADDMULTI) != 0) {
2872 			log(LOG_WARNING, "%s %s: "
2873 			    "unable to add multicast address\n",
2874 			    ifp->if_xname, ifp0->if_xname);
2875 		}
2876 	}
2877 
2878 	return (0);
2879 }
2880 
2881 int
2882 aggr_multi_del(struct aggr_softc *sc, struct ifreq *ifr)
2883 {
2884 	struct ifnet *ifp = &sc->sc_if;
2885 	struct aggr_port *p;
2886 	struct aggr_multiaddr *ma;
2887 	uint8_t addrlo[ETHER_ADDR_LEN];
2888 	uint8_t addrhi[ETHER_ADDR_LEN];
2889 	int error;
2890 
2891 	error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2892 	if (error != 0)
2893 		return (error);
2894 
2895 	TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) {
2896 		if (aggr_multi_eq(ma, addrlo, addrhi))
2897 			break;
2898 	}
2899 
2900 	if (ma == NULL)
2901 		return (EINVAL);
2902 
2903 	if (--ma->m_refs > 0)
2904 		return (0);
2905 
2906 	TAILQ_REMOVE(&sc->sc_multiaddrs, ma, m_entry);
2907 
2908 	TAILQ_FOREACH(p, &sc->sc_ports, p_entry) {
2909 		struct ifnet *ifp0 = p->p_ifp0;
2910 
2911 		if (aggr_multi(sc, p, ma, SIOCDELMULTI) != 0) {
2912 			log(LOG_WARNING, "%s %s: "
2913 			    "unable to delete multicast address\n",
2914 			    ifp->if_xname, ifp0->if_xname);
2915 		}
2916 	}
2917 
2918 	free(ma, M_DEVBUF, sizeof(*ma));
2919 
2920 	return (0);
2921 }
2922