xref: /illumos-gate/usr/src/uts/common/io/aggr/aggr_lacp.c (revision 57c40785)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/sysmacros.h>
32 #include <sys/conf.h>
33 #include <sys/cmn_err.h>
34 #include <sys/list.h>
35 #include <sys/ksynch.h>
36 #include <sys/kmem.h>
37 #include <sys/stream.h>
38 #include <sys/modctl.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/atomic.h>
42 #include <sys/stat.h>
43 #include <sys/byteorder.h>
44 #include <sys/strsun.h>
45 #include <sys/isa_defs.h>
46 
47 #include <sys/aggr.h>
48 #include <sys/aggr_impl.h>
49 
50 static struct ether_addr	etherzeroaddr = {
51 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
52 };
53 
54 /*
55  * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec.
56  */
57 static struct ether_addr   slow_multicast_addr = {
58 	0x01, 0x80, 0xc2, 0x00, 0x00, 0x02
59 };
60 
61 #ifdef DEBUG
62 /* LACP state machine debugging support */
63 static uint32_t aggr_lacp_debug = 0;
64 #define	AGGR_LACP_DBG(x)	if (aggr_lacp_debug) { (void) printf x; }
65 #else
66 #define	AGGR_LACP_DBG(x)	{}
67 #endif /* DEBUG */
68 
69 #define	NSECS_PER_SEC   1000000000ll
70 
71 /* used by lacp_misconfig_walker() */
72 typedef struct lacp_misconfig_check_state_s {
73 	aggr_port_t *cs_portp;
74 	boolean_t cs_found;
75 } lacp_misconfig_check_state_t;
76 
77 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS;
78 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS;
79 static const char *lacp_mux_str[] = LACP_MUX_STRINGS;
80 
81 static uint16_t lacp_port_priority = 0x1000;
82 static uint16_t lacp_system_priority = 0x1000;
83 
84 /*
85  * Maintains a list of all ports in ATTACHED state. This information
86  * is used to detect misconfiguration.
87  */
88 typedef struct lacp_sel_ports {
89 	datalink_id_t sp_grp_linkid;
90 	datalink_id_t sp_linkid;
91 	/* Note: sp_partner_system must be 2-byte aligned */
92 	struct ether_addr sp_partner_system;
93 	uint32_t sp_partner_key;
94 	struct lacp_sel_ports *sp_next;
95 } lacp_sel_ports_t;
96 
97 static lacp_sel_ports_t *sel_ports = NULL;
98 static kmutex_t lacp_sel_lock;
99 
100 static void periodic_timer_pop_locked(aggr_port_t *);
101 static void periodic_timer_pop(void *);
102 static void lacp_xmit_sm(aggr_port_t *);
103 static void lacp_periodic_sm(aggr_port_t *);
104 static void fill_lacp_pdu(aggr_port_t *, lacp_t *);
105 static void fill_lacp_ether(aggr_port_t *, struct ether_header *);
106 static void lacp_on(aggr_port_t *);
107 static void lacp_off(aggr_port_t *);
108 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *);
109 static void lacp_receive_sm(aggr_port_t *, lacp_t *);
110 static void aggr_set_coll_dist(aggr_port_t *, boolean_t);
111 static void aggr_set_coll_dist_locked(aggr_port_t *, boolean_t);
112 static void start_wait_while_timer(aggr_port_t *);
113 static void stop_wait_while_timer(aggr_port_t *);
114 static void lacp_reset_port(aggr_port_t *);
115 static void stop_current_while_timer(aggr_port_t *);
116 static void current_while_timer_pop(void *);
117 static void update_default_selected(aggr_port_t *);
118 static boolean_t update_selected(aggr_port_t *, lacp_t *);
119 static boolean_t lacp_sel_ports_add(aggr_port_t *);
120 static void lacp_sel_ports_del(aggr_port_t *);
121 
122 void
123 aggr_lacp_init(void)
124 {
125 	mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL);
126 }
127 
128 void
129 aggr_lacp_fini(void)
130 {
131 	mutex_destroy(&lacp_sel_lock);
132 }
133 
134 /*
135  * Set the port LACP state to SELECTED. Returns B_FALSE if the operation
136  * could not be performed due to a memory allocation error, B_TRUE otherwise.
137  */
138 static boolean_t
139 lacp_port_select(aggr_port_t *portp)
140 {
141 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
142 
143 	if (!lacp_sel_ports_add(portp))
144 		return (B_FALSE);
145 	portp->lp_lacp.sm.selected = AGGR_SELECTED;
146 	return (B_TRUE);
147 }
148 
149 /*
150  * Set the port LACP state to UNSELECTED.
151  */
152 static void
153 lacp_port_unselect(aggr_port_t *portp)
154 {
155 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
156 
157 	lacp_sel_ports_del(portp);
158 	portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
159 }
160 
161 /*
162  * Initialize group specific LACP state and parameters.
163  */
164 void
165 aggr_lacp_init_grp(aggr_grp_t *aggrp)
166 {
167 	aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT;
168 	aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority;
169 	aggrp->aggr.CollectorMaxDelay = 10;
170 	aggrp->lg_lacp_mode = AGGR_LACP_OFF;
171 	aggrp->aggr.ready = B_FALSE;
172 }
173 
174 /*
175  * Complete LACP info initialization at port creation time.
176  */
177 void
178 aggr_lacp_init_port(aggr_port_t *portp)
179 {
180 	aggr_grp_t *aggrp = portp->lp_grp;
181 	aggr_lacp_port_t *pl = &portp->lp_lacp;
182 
183 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(aggrp));
184 	ASSERT(RW_LOCK_HELD(&aggrp->lg_lock));
185 	ASSERT(RW_LOCK_HELD(&portp->lp_lock));
186 
187 	/* actor port # */
188 	pl->ActorPortNumber = portp->lp_portid;
189 	AGGR_LACP_DBG(("aggr_lacp_init_port(%d): "
190 	    "ActorPortNumber = 0x%x\n", portp->lp_linkid,
191 	    pl->ActorPortNumber));
192 
193 	pl->ActorPortPriority = (uint16_t)lacp_port_priority;
194 	pl->ActorPortAggrId = 0;	/* aggregator id - not used */
195 	pl->NTT = B_FALSE;			/* need to transmit */
196 
197 	pl->ActorAdminPortKey = aggrp->lg_key;
198 	pl->ActorOperPortKey = pl->ActorAdminPortKey;
199 	AGGR_LACP_DBG(("aggr_lacp_init_port(%d) "
200 	    "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n",
201 	    portp->lp_linkid, pl->ActorAdminPortKey, pl->ActorOperPortKey));
202 
203 	/* Actor admin. port state */
204 	pl->ActorAdminPortState.bit.activity = B_FALSE;
205 	pl->ActorAdminPortState.bit.timeout = B_TRUE;
206 	pl->ActorAdminPortState.bit.aggregation = B_TRUE;
207 	pl->ActorAdminPortState.bit.sync = B_FALSE;
208 	pl->ActorAdminPortState.bit.collecting = B_FALSE;
209 	pl->ActorAdminPortState.bit.distributing = B_FALSE;
210 	pl->ActorAdminPortState.bit.defaulted = B_FALSE;
211 	pl->ActorAdminPortState.bit.expired = B_FALSE;
212 	pl->ActorOperPortState = pl->ActorAdminPortState;
213 
214 	/*
215 	 * Partner Administrative Information
216 	 * (All initialized to zero except for the following)
217 	 * Fast Timeouts.
218 	 */
219 	pl->PartnerAdminPortState.bit.timeout =
220 	    pl->PartnerOperPortState.bit.timeout = B_TRUE;
221 
222 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
223 
224 	/*
225 	 * State machine information.
226 	 */
227 	pl->sm.lacp_on = B_FALSE;		/* LACP Off default */
228 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
229 	pl->sm.lacp_enabled = B_FALSE;
230 	pl->sm.port_enabled = B_FALSE;		/* Link Down */
231 	pl->sm.actor_churn = B_FALSE;
232 	pl->sm.partner_churn = B_FALSE;
233 	pl->sm.ready_n = B_FALSE;
234 	pl->sm.port_moved = B_FALSE;
235 
236 	lacp_port_unselect(portp);
237 
238 	pl->sm.periodic_state = LACP_NO_PERIODIC;
239 	pl->sm.receive_state = LACP_INITIALIZE;
240 	pl->sm.mux_state = LACP_DETACHED;
241 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
242 
243 	/*
244 	 * Timer information.
245 	 */
246 	pl->current_while_timer.id = 0;
247 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
248 
249 	pl->periodic_timer.id = 0;
250 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
251 
252 	pl->wait_while_timer.id = 0;
253 	pl->wait_while_timer.val = AGGREGATE_WAIT_TIME;
254 }
255 
256 /*
257  * Port initialization when we need to
258  * turn LACP on/off, etc. Not everything is
259  * reset like in the above routine.
260  *		Do NOT modify things like link status.
261  */
262 static void
263 lacp_reset_port(aggr_port_t *portp)
264 {
265 	aggr_lacp_port_t *pl = &portp->lp_lacp;
266 
267 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
268 
269 	pl->NTT = B_FALSE;			/* need to transmit */
270 
271 	/* reset operational port state */
272 	pl->ActorOperPortState.bit.timeout =
273 	    pl->ActorAdminPortState.bit.timeout;
274 
275 	pl->ActorOperPortState.bit.sync = B_FALSE;
276 	pl->ActorOperPortState.bit.collecting = B_FALSE;
277 	pl->ActorOperPortState.bit.distributing = B_FALSE;
278 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
279 	pl->ActorOperPortState.bit.expired = B_FALSE;
280 
281 	pl->PartnerOperPortState.bit.timeout = B_TRUE;	/* fast t/o */
282 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
283 
284 	/*
285 	 * State machine information.
286 	 */
287 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
288 	pl->sm.actor_churn = B_FALSE;
289 	pl->sm.partner_churn = B_FALSE;
290 	pl->sm.ready_n = B_FALSE;
291 
292 	lacp_port_unselect(portp);
293 
294 	pl->sm.periodic_state = LACP_NO_PERIODIC;
295 	pl->sm.receive_state = LACP_INITIALIZE;
296 	pl->sm.mux_state = LACP_DETACHED;
297 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
298 
299 	/*
300 	 * Timer information.
301 	 */
302 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
303 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
304 }
305 
306 static void
307 aggr_lacp_mcast_on(aggr_port_t *port)
308 {
309 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(port->lp_grp));
310 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
311 
312 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
313 		return;
314 
315 	(void) aggr_port_multicst(port, B_TRUE,
316 	    (uchar_t *)&slow_multicast_addr);
317 }
318 
319 static void
320 aggr_lacp_mcast_off(aggr_port_t *port)
321 {
322 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(port->lp_grp));
323 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
324 
325 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
326 		return;
327 
328 	(void) aggr_port_multicst(port, B_FALSE,
329 	    (uchar_t *)&slow_multicast_addr);
330 }
331 
332 static void
333 start_periodic_timer(aggr_port_t *portp)
334 {
335 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
336 
337 	if (portp->lp_lacp.periodic_timer.id == 0) {
338 		portp->lp_lacp.periodic_timer.id =
339 		    timeout(periodic_timer_pop, portp,
340 		    drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val));
341 	}
342 }
343 
344 static void
345 stop_periodic_timer(aggr_port_t *portp)
346 {
347 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
348 
349 	if (portp->lp_lacp.periodic_timer.id != 0) {
350 		AGGR_LACP_UNLOCK(portp->lp_grp);
351 		(void) untimeout(portp->lp_lacp.periodic_timer.id);
352 		AGGR_LACP_LOCK_WRITER(portp->lp_grp);
353 		portp->lp_lacp.periodic_timer.id = 0;
354 	}
355 }
356 
357 /*
358  * When the timer pops, we arrive here to
359  * clear out LACPDU count as well as transmit an
360  * LACPDU. We then set the periodic state and let
361  * the periodic state machine restart the timer.
362  */
363 
364 static void
365 periodic_timer_pop_locked(aggr_port_t *portp)
366 {
367 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
368 
369 	portp->lp_lacp.periodic_timer.id = NULL;
370 	portp->lp_lacp_stats.LACPDUsTx = 0;
371 
372 	/* current timestamp */
373 	portp->lp_lacp.time = gethrtime();
374 	portp->lp_lacp.NTT = B_TRUE;
375 	lacp_xmit_sm(portp);
376 
377 	/*
378 	 * Set Periodic State machine state based on the
379 	 * value of the Partner Operation Port State timeout
380 	 * bit.
381 	 */
382 	if (portp->lp_lacp.PartnerOperPortState.bit.timeout) {
383 		portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME;
384 		portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC;
385 	} else {
386 		portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME;
387 		portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC;
388 	}
389 
390 	lacp_periodic_sm(portp);
391 }
392 
393 static void
394 periodic_timer_pop(void *data)
395 {
396 	aggr_port_t *portp = data;
397 
398 	if (portp->lp_closing)
399 		return;
400 
401 	AGGR_LACP_LOCK_WRITER(portp->lp_grp);
402 	periodic_timer_pop_locked(portp);
403 	AGGR_LACP_UNLOCK(portp->lp_grp);
404 }
405 
406 /*
407  * Invoked from:
408  *	- startup upon aggregation
409  *	- when the periodic timer pops
410  *	- when the periodic timer value is changed
411  *	- when the port is attached or detached
412  *	- when LACP mode is changed.
413  */
414 static void
415 lacp_periodic_sm(aggr_port_t *portp)
416 {
417 	lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state;
418 	aggr_lacp_port_t *pl = &portp->lp_lacp;
419 
420 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
421 
422 	/* LACP_OFF state not in specification so check here.  */
423 	if (!pl->sm.lacp_on) {
424 		/* Stop timer whether it is running or not */
425 		stop_periodic_timer(portp);
426 		pl->sm.periodic_state = LACP_NO_PERIODIC;
427 		pl->NTT = B_FALSE;
428 		AGGR_LACP_DBG(("lacp_periodic_sm(%d):NO LACP "
429 		    "%s--->%s\n", portp->lp_linkid,
430 		    lacp_periodic_str[oldstate],
431 		    lacp_periodic_str[pl->sm.periodic_state]));
432 		return;
433 	}
434 
435 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
436 	    !pl->sm.port_enabled ||
437 	    !pl->ActorOperPortState.bit.activity &&
438 	    !pl->PartnerOperPortState.bit.activity) {
439 
440 		/* Stop timer whether it is running or not */
441 		stop_periodic_timer(portp);
442 		pl->sm.periodic_state = LACP_NO_PERIODIC;
443 		pl->NTT = B_FALSE;
444 		AGGR_LACP_DBG(("lacp_periodic_sm(%d):STOP %s--->%s\n",
445 		    portp->lp_linkid, lacp_periodic_str[oldstate],
446 		    lacp_periodic_str[pl->sm.periodic_state]));
447 		return;
448 	}
449 
450 	/*
451 	 * Startup with FAST_PERIODIC_TIME if no previous LACPDU
452 	 * has been received. Then after we timeout, then it is
453 	 * possible to go to SLOW_PERIODIC_TIME.
454 	 */
455 	if (pl->sm.periodic_state == LACP_NO_PERIODIC) {
456 		pl->periodic_timer.val = FAST_PERIODIC_TIME;
457 		pl->sm.periodic_state = LACP_FAST_PERIODIC;
458 	} else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) &&
459 	    pl->PartnerOperPortState.bit.timeout) {
460 		/*
461 		 * If we receive a bit indicating we are going to
462 		 * fast periodic from slow periodic, stop the timer
463 		 * and let the periodic_timer_pop routine deal
464 		 * with reseting the periodic state and transmitting
465 		 * a LACPDU.
466 		 */
467 		stop_periodic_timer(portp);
468 		periodic_timer_pop_locked(portp);
469 	}
470 
471 	/* Rearm timer with value provided by partner */
472 	start_periodic_timer(portp);
473 }
474 
475 /*
476  * This routine transmits an LACPDU if lacp_enabled
477  * is TRUE and if NTT is set.
478  */
479 static void
480 lacp_xmit_sm(aggr_port_t *portp)
481 {
482 	aggr_lacp_port_t *pl = &portp->lp_lacp;
483 	size_t	len;
484 	mblk_t  *mp;
485 	hrtime_t now, elapsed;
486 	const mac_txinfo_t *mtp;
487 
488 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
489 
490 	/* LACP_OFF state not in specification so check here.  */
491 	if (!pl->sm.lacp_on || !pl->NTT || !portp->lp_started)
492 		return;
493 
494 	/*
495 	 * Do nothing if LACP has been turned off or if the
496 	 * periodic state machine is not enabled.
497 	 */
498 	if ((pl->sm.periodic_state == LACP_NO_PERIODIC) ||
499 	    !pl->sm.lacp_enabled || pl->sm.begin) {
500 		pl->NTT = B_FALSE;
501 		return;
502 	}
503 
504 	/*
505 	 * If we have sent 5 Slow packets in the last second, avoid
506 	 * sending any more here. No more than three LACPDUs may be transmitted
507 	 * in any Fast_Periodic_Time interval.
508 	 */
509 	if (portp->lp_lacp_stats.LACPDUsTx >= 3) {
510 		/*
511 		 * Grab the current time value and see if
512 		 * more than 1 second has passed. If so,
513 		 * reset the timestamp and clear the count.
514 		 */
515 		now = gethrtime();
516 		elapsed = now - pl->time;
517 		if (elapsed > NSECS_PER_SEC) {
518 			portp->lp_lacp_stats.LACPDUsTx = 0;
519 			pl->time = now;
520 		} else {
521 			return;
522 		}
523 	}
524 
525 	len = sizeof (lacp_t) + sizeof (struct ether_header);
526 	mp = allocb(len, BPRI_MED);
527 	if (mp == NULL)
528 		return;
529 
530 	mp->b_wptr = mp->b_rptr + len;
531 	bzero(mp->b_rptr, len);
532 
533 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
534 	fill_lacp_pdu(portp,
535 	    (lacp_t *)(mp->b_rptr + sizeof (struct ether_header)));
536 
537 	/*
538 	 * Store the transmit info pointer locally in case it changes between
539 	 * loading mt_fn and mt_arg.
540 	 */
541 	mtp = portp->lp_txinfo;
542 	mtp->mt_fn(mtp->mt_arg, mp);
543 
544 	pl->NTT = B_FALSE;
545 	portp->lp_lacp_stats.LACPDUsTx++;
546 }
547 
548 /*
549  * Initialize the ethernet header of a LACP packet sent from the specified
550  * port.
551  */
552 static void
553 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether)
554 {
555 	bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL);
556 	bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost),
557 	    ETHERADDRL);
558 	ether->ether_type = htons(ETHERTYPE_SLOW);
559 }
560 
561 static void
562 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
563 {
564 	aggr_lacp_port_t *pl = &portp->lp_lacp;
565 	aggr_grp_t *aggrp = portp->lp_grp;
566 
567 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
568 
569 	lacp->subtype = LACP_SUBTYPE;
570 	lacp->version = LACP_VERSION;
571 
572 	rw_enter(&aggrp->lg_lock, RW_READER);
573 	rw_enter(&portp->lp_lock, RW_READER);
574 
575 	/*
576 	 * Actor Information
577 	 */
578 	lacp->actor_info.tlv_type = ACTOR_TLV;
579 	lacp->actor_info.information_len = sizeof (link_info_t);
580 	lacp->actor_info.system_priority =
581 	    htons(aggrp->aggr.ActorSystemPriority);
582 	bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id,
583 	    ETHERADDRL);
584 	lacp->actor_info.key = htons(pl->ActorOperPortKey);
585 	lacp->actor_info.port_priority = htons(pl->ActorPortPriority);
586 	lacp->actor_info.port = htons(pl->ActorPortNumber);
587 	lacp->actor_info.state.state = pl->ActorOperPortState.state;
588 
589 	/*
590 	 * Partner Information
591 	 */
592 	lacp->partner_info.tlv_type = PARTNER_TLV;
593 	lacp->partner_info.information_len = sizeof (link_info_t);
594 	lacp->partner_info.system_priority =
595 	    htons(pl->PartnerOperSysPriority);
596 	lacp->partner_info.system_id = pl->PartnerOperSystem;
597 	lacp->partner_info.key = htons(pl->PartnerOperKey);
598 	lacp->partner_info.port_priority =
599 	    htons(pl->PartnerOperPortPriority);
600 	lacp->partner_info.port = htons(pl->PartnerOperPortNum);
601 	lacp->partner_info.state.state = pl->PartnerOperPortState.state;
602 
603 	/* Collector Information */
604 	lacp->tlv_collector = COLLECTOR_TLV;
605 	lacp->collector_len = 0x10;
606 	lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay);
607 
608 	/* Termination Information */
609 	lacp->tlv_terminator = TERMINATOR_TLV;
610 	lacp->terminator_len = 0x0;
611 
612 	rw_exit(&portp->lp_lock);
613 	rw_exit(&aggrp->lg_lock);
614 }
615 
616 /*
617  * lacp_mux_sm - LACP mux state machine
618  *		This state machine is invoked from:
619  *			- startup upon aggregation
620  *			- from the Selection logic
621  *			- when the wait_while_timer pops
622  *			- when the aggregation MAC address is changed
623  *			- when receiving DL_NOTE_LINK_UP/DOWN
624  *			- when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL
625  *			- when LACP mode is changed.
626  *			- when a DL_NOTE_SPEED is received
627  */
628 static void
629 lacp_mux_sm(aggr_port_t *portp)
630 {
631 	aggr_grp_t *aggrp = portp->lp_grp;
632 	boolean_t NTT_updated = B_FALSE;
633 	aggr_lacp_port_t *pl = &portp->lp_lacp;
634 	lacp_mux_state_t oldstate = pl->sm.mux_state;
635 
636 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(aggrp));
637 
638 	/* LACP_OFF state not in specification so check here.  */
639 	if (!pl->sm.lacp_on) {
640 		pl->sm.mux_state = LACP_DETACHED;
641 		pl->ActorOperPortState.bit.sync = B_FALSE;
642 
643 		if (pl->ActorOperPortState.bit.collecting ||
644 		    pl->ActorOperPortState.bit.distributing) {
645 			AGGR_LACP_DBG(("trunk link: (%d): "
646 			    "Collector_Distributor Disabled.\n",
647 			    portp->lp_linkid));
648 		}
649 
650 		pl->ActorOperPortState.bit.collecting =
651 		    pl->ActorOperPortState.bit.distributing = B_FALSE;
652 		return;
653 	}
654 
655 	if (pl->sm.begin || !pl->sm.lacp_enabled)
656 		pl->sm.mux_state = LACP_DETACHED;
657 
658 again:
659 	/* determine next state, or return if state unchanged */
660 	switch (pl->sm.mux_state) {
661 	case LACP_DETACHED:
662 		if (pl->sm.begin) {
663 			break;
664 		}
665 
666 		if ((pl->sm.selected == AGGR_SELECTED) ||
667 		    (pl->sm.selected == AGGR_STANDBY)) {
668 			pl->sm.mux_state = LACP_WAITING;
669 			break;
670 		}
671 		return;
672 
673 	case LACP_WAITING:
674 		if (pl->sm.selected == AGGR_UNSELECTED) {
675 			pl->sm.mux_state = LACP_DETACHED;
676 			break;
677 		}
678 
679 		if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) {
680 			pl->sm.mux_state = LACP_ATTACHED;
681 			break;
682 		}
683 		return;
684 
685 	case LACP_ATTACHED:
686 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
687 		    (pl->sm.selected == AGGR_STANDBY)) {
688 			pl->sm.mux_state = LACP_DETACHED;
689 			break;
690 		}
691 
692 		if ((pl->sm.selected == AGGR_SELECTED) &&
693 		    pl->PartnerOperPortState.bit.sync) {
694 			pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING;
695 			break;
696 		}
697 		return;
698 
699 	case LACP_COLLECTING_DISTRIBUTING:
700 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
701 		    (pl->sm.selected == AGGR_STANDBY) ||
702 		    !pl->PartnerOperPortState.bit.sync) {
703 			pl->sm.mux_state = LACP_ATTACHED;
704 			break;
705 		}
706 		return;
707 	}
708 
709 	AGGR_LACP_DBG(("lacp_mux_sm(%d):%s--->%s\n",
710 	    portp->lp_linkid, lacp_mux_str[oldstate],
711 	    lacp_mux_str[pl->sm.mux_state]));
712 
713 	/* perform actions on entering a new state */
714 	switch (pl->sm.mux_state) {
715 	case LACP_DETACHED:
716 		if (pl->ActorOperPortState.bit.collecting ||
717 		    pl->ActorOperPortState.bit.distributing) {
718 			AGGR_LACP_DBG(("trunk link: (%d): "
719 			    "Collector_Distributor Disabled.\n",
720 			    portp->lp_linkid));
721 		}
722 
723 		pl->ActorOperPortState.bit.sync =
724 		    pl->ActorOperPortState.bit.collecting = B_FALSE;
725 
726 		/* Turn OFF Collector_Distributor */
727 		aggr_set_coll_dist(portp, B_FALSE);
728 
729 		pl->ActorOperPortState.bit.distributing = B_FALSE;
730 		NTT_updated = B_TRUE;
731 		break;
732 
733 	case LACP_WAITING:
734 		start_wait_while_timer(portp);
735 		break;
736 
737 	case LACP_ATTACHED:
738 		if (pl->ActorOperPortState.bit.collecting ||
739 		    pl->ActorOperPortState.bit.distributing) {
740 			AGGR_LACP_DBG(("trunk link: (%d): "
741 			    "Collector_Distributor Disabled.\n",
742 			    portp->lp_linkid));
743 		}
744 
745 		pl->ActorOperPortState.bit.sync = B_TRUE;
746 		pl->ActorOperPortState.bit.collecting = B_FALSE;
747 
748 		/* Turn OFF Collector_Distributor */
749 		aggr_set_coll_dist(portp, B_FALSE);
750 
751 		pl->ActorOperPortState.bit.distributing = B_FALSE;
752 		NTT_updated = B_TRUE;
753 		if (pl->PartnerOperPortState.bit.sync) {
754 			/*
755 			 * We had already received an updated sync from
756 			 * the partner. Attempt to transition to
757 			 * collecting/distributing now.
758 			 */
759 			goto again;
760 		}
761 		break;
762 
763 	case LACP_COLLECTING_DISTRIBUTING:
764 		if (!pl->ActorOperPortState.bit.collecting &&
765 		    !pl->ActorOperPortState.bit.distributing) {
766 			AGGR_LACP_DBG(("trunk link: (%d): "
767 			    "Collector_Distributor Enabled.\n",
768 			    portp->lp_linkid));
769 		}
770 		pl->ActorOperPortState.bit.distributing = B_TRUE;
771 
772 		/* Turn Collector_Distributor back ON */
773 		aggr_set_coll_dist(portp, B_TRUE);
774 
775 		pl->ActorOperPortState.bit.collecting = B_TRUE;
776 		NTT_updated = B_TRUE;
777 		break;
778 	}
779 
780 	/*
781 	 * If we updated the state of the NTT variable, then
782 	 * initiate a LACPDU transmission.
783 	 */
784 	if (NTT_updated) {
785 		pl->NTT = B_TRUE;
786 		lacp_xmit_sm(portp);
787 	}
788 } /* lacp_mux_sm */
789 
790 
791 static void
792 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp)
793 {
794 	marker_pdu_t		*markerp = (marker_pdu_t *)mp->b_rptr;
795 	const mac_txinfo_t	*mtp;
796 
797 	AGGR_LACP_LOCK_WRITER(portp->lp_grp);
798 
799 	AGGR_LACP_DBG(("trunk link: (%d): MARKER PDU received:\n",
800 	    portp->lp_linkid));
801 
802 	/* LACP_OFF state not in specification so check here.  */
803 	if (!portp->lp_lacp.sm.lacp_on)
804 		goto bail;
805 
806 	if (MBLKL(mp) < sizeof (marker_pdu_t))
807 		goto bail;
808 
809 	if (markerp->version != MARKER_VERSION) {
810 		AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
811 		    "version = %d does not match s/w version %d\n",
812 		    portp->lp_linkid, markerp->version, MARKER_VERSION));
813 		goto bail;
814 	}
815 
816 	if (markerp->tlv_marker == MARKER_RESPONSE_TLV) {
817 		/* We do not yet send out MARKER info PDUs */
818 		AGGR_LACP_DBG(("trunk link (%d): MARKER RESPONSE PDU: "
819 		    " MARKER TLV = %d - We don't send out info type!\n",
820 		    portp->lp_linkid, markerp->tlv_marker));
821 		goto bail;
822 	}
823 
824 	if (markerp->tlv_marker != MARKER_INFO_TLV) {
825 		AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
826 		    " MARKER TLV = %d \n", portp->lp_linkid,
827 		    markerp->tlv_marker));
828 		goto bail;
829 	}
830 
831 	if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) {
832 		AGGR_LACP_DBG(("trunk link (%d): Malformed MARKER PDU: "
833 		    " MARKER length = %d \n", portp->lp_linkid,
834 		    markerp->marker_len));
835 		goto bail;
836 	}
837 
838 	if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) {
839 		AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
840 		    " MARKER Port %d not equal to Partner port %d\n",
841 		    portp->lp_linkid, markerp->requestor_port,
842 		    portp->lp_lacp.PartnerOperPortNum));
843 		goto bail;
844 	}
845 
846 	if (ether_cmp(&markerp->system_id,
847 	    &portp->lp_lacp.PartnerOperSystem) != 0) {
848 		AGGR_LACP_DBG(("trunk link (%d): MARKER PDU: "
849 		    " MARKER MAC not equal to Partner MAC\n",
850 		    portp->lp_linkid));
851 		goto bail;
852 	}
853 
854 	/*
855 	 * Turn into Marker Response PDU
856 	 * and return mblk to sending system
857 	 */
858 	markerp->tlv_marker = MARKER_RESPONSE_TLV;
859 
860 	/* reuse the space that was used by received ethernet header */
861 	ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header));
862 	mp->b_rptr -= sizeof (struct ether_header);
863 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
864 
865 	/*
866 	 * Store the transmit info pointer locally in case it changes between
867 	 * loading mt_fn and mt_arg.
868 	 */
869 	mtp = portp->lp_txinfo;
870 	AGGR_LACP_UNLOCK(portp->lp_grp);
871 
872 	mtp->mt_fn(mtp->mt_arg, mp);
873 	return;
874 
875 bail:
876 	AGGR_LACP_UNLOCK(portp->lp_grp);
877 	freemsg(mp);
878 }
879 
880 
881 /*
882  * Update the LACP mode (off, active, or passive) of the specified group.
883  */
884 void
885 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode)
886 {
887 	aggr_lacp_mode_t old_mode = grp->lg_lacp_mode;
888 	aggr_port_t *port;
889 
890 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(grp));
891 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
892 
893 	if (mode == old_mode)
894 		return;
895 
896 	grp->lg_lacp_mode = mode;
897 
898 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
899 		port->lp_lacp.ActorAdminPortState.bit.activity =
900 		    port->lp_lacp.ActorOperPortState.bit.activity =
901 		    (mode == AGGR_LACP_ACTIVE);
902 
903 		if (old_mode == AGGR_LACP_OFF) {
904 			/* OFF -> {PASSIVE,ACTIVE} */
905 			/* turn OFF Collector_Distributor */
906 			aggr_set_coll_dist(port, B_FALSE);
907 			rw_enter(&port->lp_lock, RW_WRITER);
908 			lacp_on(port);
909 			if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
910 				aggr_lacp_port_attached(port);
911 			rw_exit(&port->lp_lock);
912 		} else if (mode == AGGR_LACP_OFF) {
913 			/* {PASSIVE,ACTIVE} -> OFF */
914 			rw_enter(&port->lp_lock, RW_WRITER);
915 			lacp_off(port);
916 			rw_exit(&port->lp_lock);
917 			if (!grp->lg_closing) {
918 				/* Turn ON Collector_Distributor */
919 				aggr_set_coll_dist(port, B_TRUE);
920 			}
921 		} else {
922 			/* PASSIVE->ACTIVE or ACTIVE->PASSIVE */
923 			port->lp_lacp.sm.begin = B_TRUE;
924 			lacp_mux_sm(port);
925 			lacp_periodic_sm(port);
926 
927 			/* kick off state machines */
928 			lacp_receive_sm(port, NULL);
929 			lacp_mux_sm(port);
930 		}
931 
932 		if (grp->lg_closing)
933 			break;
934 	}
935 }
936 
937 
938 /*
939  * Update the LACP timer (short or long) of the specified group.
940  */
941 void
942 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer)
943 {
944 	aggr_port_t *port;
945 
946 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(grp));
947 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
948 
949 	if (timer == grp->aggr.PeriodicTimer)
950 		return;
951 
952 	grp->aggr.PeriodicTimer = timer;
953 
954 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
955 		port->lp_lacp.ActorAdminPortState.bit.timeout =
956 		    port->lp_lacp.ActorOperPortState.bit.timeout =
957 		    (timer == AGGR_LACP_TIMER_SHORT);
958 	}
959 }
960 
961 
962 /*
963  * Sets the initial LACP mode (off, active, passive) and LACP timer
964  * (short, long) of the specified group.
965  */
966 void
967 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode,
968     aggr_lacp_timer_t timer)
969 {
970 	aggr_port_t *port;
971 
972 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(grp));
973 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
974 
975 	grp->lg_lacp_mode = mode;
976 	grp->aggr.PeriodicTimer = timer;
977 
978 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
979 		port->lp_lacp.ActorAdminPortState.bit.activity =
980 		    port->lp_lacp.ActorOperPortState.bit.activity =
981 		    (mode == AGGR_LACP_ACTIVE);
982 
983 		port->lp_lacp.ActorAdminPortState.bit.timeout =
984 		    port->lp_lacp.ActorOperPortState.bit.timeout =
985 		    (timer == AGGR_LACP_TIMER_SHORT);
986 
987 		if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
988 			/* Turn ON Collector_Distributor */
989 			aggr_set_coll_dist(port, B_TRUE);
990 		} else { /* LACP_ACTIVE/PASSIVE */
991 			rw_enter(&port->lp_lock, RW_WRITER);
992 			lacp_on(port);
993 			rw_exit(&port->lp_lock);
994 		}
995 	}
996 }
997 
998 /*
999  * Verify that the Partner MAC and Key recorded by the specified
1000  * port are not found in other ports that are not part of our
1001  * aggregation. Returns B_TRUE if such a port is found, B_FALSE
1002  * otherwise.
1003  */
1004 static boolean_t
1005 lacp_misconfig_check(aggr_port_t *portp)
1006 {
1007 	aggr_grp_t *grp = portp->lp_grp;
1008 	lacp_sel_ports_t *cport;
1009 
1010 	mutex_enter(&lacp_sel_lock);
1011 
1012 	for (cport = sel_ports; cport != NULL; cport = cport->sp_next) {
1013 
1014 		/* skip entries of the group of the port being checked */
1015 		if (cport->sp_grp_linkid == grp->lg_linkid)
1016 			continue;
1017 
1018 		if ((ether_cmp(&cport->sp_partner_system,
1019 		    &grp->aggr.PartnerSystem) == 0) &&
1020 		    (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) {
1021 			char mac_str[ETHERADDRL*3];
1022 			struct ether_addr *mac = &cport->sp_partner_system;
1023 
1024 			/*
1025 			 * The Partner port information is already in use
1026 			 * by ports in another aggregation so disable this
1027 			 * port.
1028 			 */
1029 
1030 			(void) snprintf(mac_str, sizeof (mac_str),
1031 			    "%x:%x:%x:%x:%x:%x",
1032 			    mac->ether_addr_octet[0], mac->ether_addr_octet[1],
1033 			    mac->ether_addr_octet[2], mac->ether_addr_octet[3],
1034 			    mac->ether_addr_octet[4], mac->ether_addr_octet[5]);
1035 
1036 			portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
1037 
1038 			cmn_err(CE_NOTE, "aggr %d port %d: Port Partner "
1039 			    "MAC %s and key %d in use on aggregation %d "
1040 			    "port %d\n", grp->lg_linkid, portp->lp_linkid,
1041 			    mac_str, portp->lp_lacp.PartnerOperKey,
1042 			    cport->sp_grp_linkid, cport->sp_linkid);
1043 			break;
1044 		}
1045 	}
1046 
1047 	mutex_exit(&lacp_sel_lock);
1048 	return (cport != NULL);
1049 }
1050 
1051 /*
1052  * Remove the specified port from the list of selected ports.
1053  */
1054 static void
1055 lacp_sel_ports_del(aggr_port_t *portp)
1056 {
1057 	lacp_sel_ports_t *cport, **prev = NULL;
1058 
1059 	mutex_enter(&lacp_sel_lock);
1060 
1061 	prev = &sel_ports;
1062 	for (cport = sel_ports; cport != NULL; prev = &cport->sp_next,
1063 	    cport = cport->sp_next) {
1064 		if (portp->lp_linkid == cport->sp_linkid)
1065 			break;
1066 	}
1067 
1068 	if (cport == NULL) {
1069 		mutex_exit(&lacp_sel_lock);
1070 		return;
1071 	}
1072 
1073 	*prev = cport->sp_next;
1074 	kmem_free(cport, sizeof (*cport));
1075 
1076 	mutex_exit(&lacp_sel_lock);
1077 }
1078 
1079 /*
1080  * Add the specified port to the list of selected ports. Returns B_FALSE
1081  * if the operation could not be performed due to an memory allocation
1082  * error.
1083  */
1084 static boolean_t
1085 lacp_sel_ports_add(aggr_port_t *portp)
1086 {
1087 	lacp_sel_ports_t *new_port;
1088 	lacp_sel_ports_t *cport, **last;
1089 
1090 	mutex_enter(&lacp_sel_lock);
1091 
1092 	/* check if port is already in the list */
1093 	last = &sel_ports;
1094 	for (cport = sel_ports; cport != NULL;
1095 	    last = &cport->sp_next, cport = cport->sp_next) {
1096 		if (portp->lp_linkid == cport->sp_linkid) {
1097 			ASSERT(cport->sp_partner_key ==
1098 			    portp->lp_lacp.PartnerOperKey);
1099 			ASSERT(ether_cmp(&cport->sp_partner_system,
1100 			    &portp->lp_lacp.PartnerOperSystem) == 0);
1101 
1102 			mutex_exit(&lacp_sel_lock);
1103 			return (B_TRUE);
1104 		}
1105 	}
1106 
1107 	/* create and initialize new entry */
1108 	new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP);
1109 	if (new_port == NULL) {
1110 		mutex_exit(&lacp_sel_lock);
1111 		return (B_FALSE);
1112 	}
1113 
1114 	new_port->sp_grp_linkid = portp->lp_grp->lg_linkid;
1115 	bcopy(&portp->lp_lacp.PartnerOperSystem,
1116 	    &new_port->sp_partner_system, sizeof (new_port->sp_partner_system));
1117 	new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey;
1118 	new_port->sp_linkid = portp->lp_linkid;
1119 
1120 	*last = new_port;
1121 
1122 	mutex_exit(&lacp_sel_lock);
1123 	return (B_TRUE);
1124 }
1125 
1126 /*
1127  * lacp_selection_logic - LACP selection logic
1128  *		Sets the selected variable on a per port basis
1129  *		and sets Ready when all waiting ports are ready
1130  *		to go online.
1131  *
1132  * parameters:
1133  *      - portp - instance this applies to.
1134  *
1135  * invoked:
1136  *    - when initialization is needed
1137  *    - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state
1138  *    - When the lacp_receive_sm goes to the LACP_DEFAULTED state
1139  *    - every time the wait_while_timer pops
1140  *    - everytime we turn LACP on/off
1141  */
1142 static void
1143 lacp_selection_logic(aggr_port_t *portp)
1144 {
1145 	aggr_port_t *tpp;
1146 	aggr_grp_t *aggrp = portp->lp_grp;
1147 	int ports_waiting;
1148 	boolean_t reset_mac = B_FALSE;
1149 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1150 
1151 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(aggrp));
1152 
1153 	/* LACP_OFF state not in specification so check here.  */
1154 	if (!pl->sm.lacp_on) {
1155 		lacp_port_unselect(portp);
1156 		aggrp->aggr.ready = B_FALSE;
1157 		lacp_mux_sm(portp);
1158 		return;
1159 	}
1160 
1161 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
1162 	    (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) {
1163 
1164 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1165 		    "selected %d-->%d (begin=%d, lacp_enabled = %d, "
1166 		    "lp_state=%d)\n", portp->lp_linkid, pl->sm.selected,
1167 		    AGGR_UNSELECTED, pl->sm.begin, pl->sm.lacp_enabled,
1168 		    portp->lp_state));
1169 
1170 		lacp_port_unselect(portp);
1171 		aggrp->aggr.ready = B_FALSE;
1172 		lacp_mux_sm(portp);
1173 		return;
1174 	}
1175 
1176 	/*
1177 	 * If LACP is not enabled then selected is never set.
1178 	 */
1179 	if (!pl->sm.lacp_enabled) {
1180 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): selected %d-->%d\n",
1181 		    portp->lp_linkid, pl->sm.selected, AGGR_UNSELECTED));
1182 
1183 		lacp_port_unselect(portp);
1184 		lacp_mux_sm(portp);
1185 		return;
1186 	}
1187 
1188 	/*
1189 	 * Check if the Partner MAC or Key are zero. If so, we have
1190 	 * not received any LACP info or it has expired and the
1191 	 * receive machine is in the LACP_DEFAULTED state.
1192 	 */
1193 	if (ether_cmp(&pl->PartnerOperSystem, &etherzeroaddr) == 0 ||
1194 	    (pl->PartnerOperKey == 0)) {
1195 
1196 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1197 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1198 			    &etherzeroaddr) != 0 &&
1199 			    (tpp->lp_lacp.PartnerOperKey != 0))
1200 				break;
1201 		}
1202 
1203 		/*
1204 		 * If all ports have no key or aggregation address,
1205 		 * then clear the negotiated Partner MAC and key.
1206 		 */
1207 		if (tpp == NULL) {
1208 			/* Clear the aggregation Partner MAC and key */
1209 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1210 			aggrp->aggr.PartnerOperAggrKey = 0;
1211 		}
1212 
1213 		return;
1214 	}
1215 
1216 	/*
1217 	 * Insure that at least one port in the aggregation
1218 	 * matches the Partner aggregation MAC and key. If not,
1219 	 * then clear the aggregation MAC and key. Later we will
1220 	 * set the Partner aggregation MAC and key to that of the
1221 	 * current port's Partner MAC and key.
1222 	 */
1223 	if (ether_cmp(&pl->PartnerOperSystem,
1224 	    &aggrp->aggr.PartnerSystem) != 0 ||
1225 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1226 
1227 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1228 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1229 			    &aggrp->aggr.PartnerSystem) == 0 &&
1230 			    (tpp->lp_lacp.PartnerOperKey ==
1231 			    aggrp->aggr.PartnerOperAggrKey))
1232 				break;
1233 		}
1234 
1235 		if (tpp == NULL) {
1236 			/* Clear the aggregation Partner MAC and key */
1237 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1238 			aggrp->aggr.PartnerOperAggrKey = 0;
1239 			reset_mac = B_TRUE;
1240 		}
1241 	}
1242 
1243 	/*
1244 	 * If our Actor MAC is found in the Partner MAC
1245 	 * on this port then we have a loopback misconfiguration.
1246 	 */
1247 	if (ether_cmp(&pl->PartnerOperSystem,
1248 	    (struct ether_addr *)&aggrp->lg_addr) == 0) {
1249 		cmn_err(CE_NOTE, "trunk link: (%d): Loopback condition.\n",
1250 		    portp->lp_linkid);
1251 
1252 		lacp_port_unselect(portp);
1253 		lacp_mux_sm(portp);
1254 		return;
1255 	}
1256 
1257 	/*
1258 	 * If our Partner MAC and Key are found on any other
1259 	 * ports that are not in our aggregation, we have
1260 	 * a misconfiguration.
1261 	 */
1262 	if (lacp_misconfig_check(portp)) {
1263 		lacp_mux_sm(portp);
1264 		return;
1265 	}
1266 
1267 	/*
1268 	 * If the Aggregation Partner MAC and Key have not been
1269 	 * set, then this is either the first port or the aggregation
1270 	 * MAC and key have been reset. In either case we must set
1271 	 * the values of the Partner MAC and key.
1272 	 */
1273 	if (ether_cmp(&aggrp->aggr.PartnerSystem, &etherzeroaddr) == 0 &&
1274 	    (aggrp->aggr.PartnerOperAggrKey == 0)) {
1275 		/* Set aggregation Partner MAC and key */
1276 		aggrp->aggr.PartnerSystem = pl->PartnerOperSystem;
1277 		aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey;
1278 
1279 		/*
1280 		 * If we reset Partner aggregation MAC, then restart
1281 		 * selection_logic on ports that match new MAC address.
1282 		 */
1283 		if (reset_mac) {
1284 			for (tpp = aggrp->lg_ports; tpp; tpp =
1285 			    tpp->lp_next) {
1286 				if (tpp == portp)
1287 					continue;
1288 				if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1289 				    &aggrp->aggr.PartnerSystem) == 0 &&
1290 				    (tpp->lp_lacp.PartnerOperKey ==
1291 				    aggrp->aggr.PartnerOperAggrKey))
1292 					lacp_selection_logic(tpp);
1293 			}
1294 		}
1295 	} else if (ether_cmp(&pl->PartnerOperSystem,
1296 	    &aggrp->aggr.PartnerSystem) != 0 ||
1297 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1298 		/*
1299 		 * The Partner port information does not match
1300 		 * that of the other ports in the aggregation
1301 		 * so disable this port.
1302 		 */
1303 		lacp_port_unselect(portp);
1304 
1305 		cmn_err(CE_NOTE, "trunk link: (%d): Port Partner MAC "
1306 		    "or key (%d) incompatible with Aggregation Partner "
1307 		    "MAC or key (%d)\n", portp->lp_linkid, pl->PartnerOperKey,
1308 		    aggrp->aggr.PartnerOperAggrKey);
1309 
1310 		lacp_mux_sm(portp);
1311 		return;
1312 	}
1313 
1314 	/* If we get to here, automatically set selected */
1315 	if (pl->sm.selected != AGGR_SELECTED) {
1316 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1317 		    "selected %d-->%d\n", portp->lp_linkid,
1318 		    pl->sm.selected, AGGR_SELECTED));
1319 		if (!lacp_port_select(portp))
1320 			return;
1321 		lacp_mux_sm(portp);
1322 	}
1323 
1324 	/*
1325 	 * From this point onward we have selected the port
1326 	 * and are simply checking if the Ready flag should
1327 	 * be set.
1328 	 */
1329 
1330 	/*
1331 	 * If at least two ports are waiting to aggregate
1332 	 * and ready_n is set on all ports waiting to aggregate
1333 	 * then set READY for the aggregation.
1334 	 */
1335 
1336 	ports_waiting = 0;
1337 
1338 	if (!aggrp->aggr.ready) {
1339 		/*
1340 		 * If all ports in the aggregation have received compatible
1341 		 * partner information and they match up correctly with the
1342 		 * switch, there is no need to wait for all the
1343 		 * wait_while_timers to pop.
1344 		 */
1345 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1346 			if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) ||
1347 			    tpp->lp_lacp.sm.begin) &&
1348 			    !pl->PartnerOperPortState.bit.sync) {
1349 				/* Add up ports uninitialized or waiting */
1350 				ports_waiting++;
1351 				if (!tpp->lp_lacp.sm.ready_n)
1352 					return;
1353 			}
1354 		}
1355 	}
1356 
1357 	if (aggrp->aggr.ready) {
1358 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): "
1359 		    "aggr.ready already set\n", portp->lp_linkid));
1360 		lacp_mux_sm(portp);
1361 	} else {
1362 		AGGR_LACP_DBG(("lacp_selection_logic:(%d): Ready %d-->%d\n",
1363 		    portp->lp_linkid, aggrp->aggr.ready, B_TRUE));
1364 		aggrp->aggr.ready = B_TRUE;
1365 
1366 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next)
1367 			lacp_mux_sm(tpp);
1368 	}
1369 
1370 }
1371 
1372 /*
1373  * wait_while_timer_pop - When the timer pops, we arrive here to
1374  *			set ready_n and trigger the selection logic.
1375  */
1376 static void
1377 wait_while_timer_pop(void *data)
1378 {
1379 	aggr_port_t *portp = data;
1380 
1381 	if (portp->lp_closing)
1382 		return;
1383 
1384 	AGGR_LACP_LOCK_WRITER(portp->lp_grp);
1385 
1386 	AGGR_LACP_DBG(("trunk link:(%d): wait_while_timer pop \n",
1387 	    portp->lp_linkid));
1388 	portp->lp_lacp.wait_while_timer.id = 0;
1389 	portp->lp_lacp.sm.ready_n = B_TRUE;
1390 
1391 	lacp_selection_logic(portp);
1392 	AGGR_LACP_UNLOCK(portp->lp_grp);
1393 }
1394 
1395 static void
1396 start_wait_while_timer(aggr_port_t *portp)
1397 {
1398 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
1399 
1400 	if (portp->lp_lacp.wait_while_timer.id == 0) {
1401 		portp->lp_lacp.wait_while_timer.id =
1402 		    timeout(wait_while_timer_pop, portp,
1403 		    drv_usectohz(1000000 *
1404 		    portp->lp_lacp.wait_while_timer.val));
1405 	}
1406 }
1407 
1408 
1409 static void
1410 stop_wait_while_timer(portp)
1411 aggr_port_t *portp;
1412 {
1413 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
1414 
1415 	if (portp->lp_lacp.wait_while_timer.id != 0) {
1416 		AGGR_LACP_UNLOCK(portp->lp_grp);
1417 		(void) untimeout(portp->lp_lacp.wait_while_timer.id);
1418 		AGGR_LACP_LOCK_WRITER(portp->lp_grp);
1419 		portp->lp_lacp.wait_while_timer.id = 0;
1420 	}
1421 }
1422 
1423 /*
1424  * Invoked when a port has been attached to a group.
1425  * Complete the processing that couldn't be finished from lacp_on()
1426  * because the port was not started. We know that the link is full
1427  * duplex and ON, otherwise it wouldn't be attached.
1428  */
1429 void
1430 aggr_lacp_port_attached(aggr_port_t *portp)
1431 {
1432 	aggr_grp_t *grp = portp->lp_grp;
1433 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1434 
1435 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(grp));
1436 	ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED);
1437 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1438 
1439 	AGGR_LACP_DBG(("aggr_lacp_port_attached: port %d\n",
1440 	    portp->lp_linkid));
1441 
1442 	portp->lp_lacp.sm.port_enabled = B_TRUE;	/* link on */
1443 
1444 	if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
1445 		pl->ActorAdminPortState.bit.activity =
1446 		    pl->ActorOperPortState.bit.activity = B_FALSE;
1447 
1448 		/* Turn ON Collector_Distributor */
1449 		aggr_set_coll_dist_locked(portp, B_TRUE);
1450 
1451 		return;
1452 	}
1453 
1454 	pl->ActorAdminPortState.bit.activity =
1455 	    pl->ActorOperPortState.bit.activity =
1456 	    (grp->lg_lacp_mode == AGGR_LACP_ACTIVE);
1457 
1458 	pl->ActorAdminPortState.bit.timeout =
1459 	    pl->ActorOperPortState.bit.timeout =
1460 	    (grp->aggr.PeriodicTimer == AGGR_LACP_TIMER_SHORT);
1461 
1462 	pl->sm.lacp_enabled = B_TRUE;
1463 	pl->ActorOperPortState.bit.aggregation = B_TRUE;
1464 	pl->sm.begin = B_TRUE;
1465 
1466 	if (!pl->sm.lacp_on) {
1467 		/* Turn OFF Collector_Distributor */
1468 		aggr_set_coll_dist_locked(portp, B_FALSE);
1469 
1470 		lacp_on(portp);
1471 	} else {
1472 		lacp_receive_sm(portp, NULL);
1473 		lacp_mux_sm(portp);
1474 
1475 		/* Enable Multicast Slow Protocol address */
1476 		aggr_lacp_mcast_on(portp);
1477 
1478 		/* periodic_sm is started up from the receive machine */
1479 		lacp_selection_logic(portp);
1480 	}
1481 }
1482 
1483 /*
1484  * Invoked when a port has been detached from a group. Turn off
1485  * LACP processing if it was enabled.
1486  */
1487 void
1488 aggr_lacp_port_detached(aggr_port_t *portp)
1489 {
1490 	aggr_grp_t *grp = portp->lp_grp;
1491 
1492 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(grp));
1493 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1494 
1495 	AGGR_LACP_DBG(("aggr_lacp_port_detached: port %d\n",
1496 	    portp->lp_linkid));
1497 
1498 	portp->lp_lacp.sm.port_enabled = B_FALSE;
1499 
1500 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1501 		return;
1502 
1503 	/* Disable Slow Protocol PDUs */
1504 	lacp_off(portp);
1505 }
1506 
1507 
1508 /*
1509  * Invoked after the outbound port selection policy has been changed.
1510  */
1511 void
1512 aggr_lacp_policy_changed(aggr_grp_t *grp)
1513 {
1514 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(grp));
1515 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1516 
1517 	/* suspend transmission for CollectorMaxDelay time */
1518 	delay(grp->aggr.CollectorMaxDelay * 10);
1519 }
1520 
1521 
1522 /*
1523  * Enable Slow Protocol LACP and Marker PDUs.
1524  */
1525 static void
1526 lacp_on(aggr_port_t *portp)
1527 {
1528 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
1529 	ASSERT(RW_WRITE_HELD(&portp->lp_grp->lg_lock));
1530 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1531 
1532 	/*
1533 	 * Reset the state machines and Partner operational
1534 	 * information. Careful to not reset things like
1535 	 * our link state.
1536 	 */
1537 	lacp_reset_port(portp);
1538 	portp->lp_lacp.sm.lacp_on = B_TRUE;
1539 
1540 	AGGR_LACP_DBG(("lacp_on:(%d): \n", portp->lp_linkid));
1541 
1542 	lacp_receive_sm(portp, NULL);
1543 	lacp_mux_sm(portp);
1544 
1545 	if (portp->lp_state != AGGR_PORT_STATE_ATTACHED)
1546 		return;
1547 
1548 	/* Enable Multicast Slow Protocol address */
1549 	aggr_lacp_mcast_on(portp);
1550 
1551 	/* periodic_sm is started up from the receive machine */
1552 	lacp_selection_logic(portp);
1553 } /* lacp_on */
1554 
1555 
1556 /* Disable Slow Protocol LACP and Marker PDUs */
1557 static void
1558 lacp_off(aggr_port_t *portp)
1559 {
1560 	aggr_grp_t *grp = portp->lp_grp;
1561 
1562 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
1563 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1564 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1565 
1566 	portp->lp_lacp.sm.lacp_on = B_FALSE;
1567 
1568 	AGGR_LACP_DBG(("lacp_off:(%d): \n", portp->lp_linkid));
1569 
1570 	/*
1571 	 * Disable Slow Protocol Timers.  We must temporarily release
1572 	 * the group and port locks to avoid deadlocks. Make sure that
1573 	 * neither the port nor group are closing after re-acquiring
1574 	 * their locks.
1575 	 */
1576 	rw_exit(&portp->lp_lock);
1577 	rw_exit(&grp->lg_lock);
1578 
1579 	stop_periodic_timer(portp);
1580 	stop_current_while_timer(portp);
1581 	stop_wait_while_timer(portp);
1582 
1583 	rw_enter(&grp->lg_lock, RW_WRITER);
1584 	rw_enter(&portp->lp_lock, RW_WRITER);
1585 
1586 	if (!portp->lp_closing && !grp->lg_closing) {
1587 		lacp_mux_sm(portp);
1588 		lacp_periodic_sm(portp);
1589 		lacp_selection_logic(portp);
1590 	}
1591 
1592 	/* Turn OFF Collector_Distributor */
1593 	aggr_set_coll_dist_locked(portp, B_FALSE);
1594 
1595 	/* Disable Multicast Slow Protocol address */
1596 	aggr_lacp_mcast_off(portp);
1597 
1598 	lacp_reset_port(portp);
1599 }
1600 
1601 
1602 static boolean_t
1603 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
1604 {
1605 	/*
1606 	 * 43.4.12 - "a Receive machine shall not validate
1607 	 * the Version Number, TLV_type, or Reserved fields in received
1608 	 * LACPDUs."
1609 	 * ... "a Receive machine may validate the Actor_Information_Length,
1610 	 * Partner_Information_Length, Collector_Information_Length,
1611 	 * or Terminator_Length fields."
1612 	 */
1613 	if ((lacp->actor_info.information_len != sizeof (link_info_t)) ||
1614 	    (lacp->partner_info.information_len != sizeof (link_info_t)) ||
1615 	    (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) ||
1616 	    (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) {
1617 		AGGR_LACP_DBG(("trunk link (%d): Malformed LACPDU: "
1618 		    " Terminator Length = %d \n", portp->lp_linkid,
1619 		    lacp->terminator_len));
1620 		return (B_FALSE);
1621 	}
1622 
1623 	return (B_TRUE);
1624 }
1625 
1626 
1627 static void
1628 start_current_while_timer(aggr_port_t *portp, uint_t time)
1629 {
1630 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
1631 
1632 	if (portp->lp_lacp.current_while_timer.id == 0) {
1633 		if (time > 0) {
1634 			portp->lp_lacp.current_while_timer.val = time;
1635 		} else if (portp->lp_lacp.ActorOperPortState.bit.timeout) {
1636 			portp->lp_lacp.current_while_timer.val =
1637 			    SHORT_TIMEOUT_TIME;
1638 		} else {
1639 			portp->lp_lacp.current_while_timer.val =
1640 			    LONG_TIMEOUT_TIME;
1641 		}
1642 
1643 		portp->lp_lacp.current_while_timer.id =
1644 		    timeout(current_while_timer_pop, portp,
1645 		    drv_usectohz((clock_t)1000000 *
1646 		    (clock_t)portp->lp_lacp.current_while_timer.val));
1647 	}
1648 }
1649 
1650 
1651 static void
1652 stop_current_while_timer(aggr_port_t *portp)
1653 {
1654 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
1655 
1656 	if (portp->lp_lacp.current_while_timer.id != 0) {
1657 		AGGR_LACP_UNLOCK(portp->lp_grp);
1658 		(void) untimeout(portp->lp_lacp.current_while_timer.id);
1659 		AGGR_LACP_LOCK_WRITER(portp->lp_grp);
1660 		portp->lp_lacp.current_while_timer.id = 0;
1661 	}
1662 }
1663 
1664 
1665 static void
1666 current_while_timer_pop(void *data)
1667 {
1668 	aggr_port_t *portp = (aggr_port_t *)data;
1669 
1670 	if (portp->lp_closing)
1671 		return;
1672 
1673 	AGGR_LACP_LOCK_WRITER(portp->lp_grp);
1674 
1675 	AGGR_LACP_DBG(("trunk link:(%d): current_while_timer "
1676 	    "pop id=%p\n", portp->lp_linkid,
1677 	    portp->lp_lacp.current_while_timer.id));
1678 
1679 	portp->lp_lacp.current_while_timer.id = 0;
1680 	lacp_receive_sm(portp, NULL);
1681 	AGGR_LACP_UNLOCK(portp->lp_grp);
1682 }
1683 
1684 
1685 /*
1686  * record_Default - Simply copies over administrative values
1687  * to the partner operational values, and sets our state to indicate we
1688  * are using defaulted values.
1689  */
1690 static void
1691 record_Default(aggr_port_t *portp)
1692 {
1693 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1694 
1695 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
1696 
1697 	pl->PartnerOperPortNum = pl->PartnerAdminPortNum;
1698 	pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority;
1699 	pl->PartnerOperSystem = pl->PartnerAdminSystem;
1700 	pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority;
1701 	pl->PartnerOperKey = pl->PartnerAdminKey;
1702 	pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state;
1703 
1704 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
1705 }
1706 
1707 
1708 /* Returns B_TRUE on sync value changing */
1709 static boolean_t
1710 record_PDU(aggr_port_t *portp, lacp_t *lacp)
1711 {
1712 	aggr_grp_t *aggrp = portp->lp_grp;
1713 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1714 	uint8_t save_sync;
1715 
1716 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
1717 
1718 	/*
1719 	 * Partner Information
1720 	 */
1721 	pl->PartnerOperPortNum = ntohs(lacp->actor_info.port);
1722 	pl->PartnerOperPortPriority =
1723 	    ntohs(lacp->actor_info.port_priority);
1724 	pl->PartnerOperSystem = lacp->actor_info.system_id;
1725 	pl->PartnerOperSysPriority =
1726 	    htons(lacp->actor_info.system_priority);
1727 	pl->PartnerOperKey = ntohs(lacp->actor_info.key);
1728 
1729 	/* All state info except for Synchronization */
1730 	save_sync = pl->PartnerOperPortState.bit.sync;
1731 	pl->PartnerOperPortState.state = lacp->actor_info.state.state;
1732 
1733 	/* Defaulted set to FALSE */
1734 	pl->ActorOperPortState.bit.defaulted = B_FALSE;
1735 
1736 	/*
1737 	 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system,
1738 	 *		Partner_System_Priority, Partner_Key, and
1739 	 *		Partner_State.Aggregation) are compared to the
1740 	 *		corresponding operations paramters values for
1741 	 *		the Actor. If these are equal, or if this is
1742 	 *		an individual link, we are synchronized.
1743 	 */
1744 	if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) &&
1745 	    (ntohs(lacp->partner_info.port_priority) ==
1746 	    pl->ActorPortPriority) &&
1747 	    (ether_cmp(&lacp->partner_info.system_id,
1748 	    (struct ether_addr *)&aggrp->lg_addr) == 0) &&
1749 	    (ntohs(lacp->partner_info.system_priority) ==
1750 	    aggrp->aggr.ActorSystemPriority) &&
1751 	    (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) &&
1752 	    (lacp->partner_info.state.bit.aggregation ==
1753 	    pl->ActorOperPortState.bit.aggregation)) ||
1754 	    (!lacp->actor_info.state.bit.aggregation)) {
1755 
1756 		pl->PartnerOperPortState.bit.sync =
1757 		    lacp->actor_info.state.bit.sync;
1758 	} else {
1759 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1760 	}
1761 
1762 	if (save_sync != pl->PartnerOperPortState.bit.sync) {
1763 		AGGR_LACP_DBG(("record_PDU:(%d): partner sync "
1764 		    "%d -->%d\n", portp->lp_linkid, save_sync,
1765 		    pl->PartnerOperPortState.bit.sync));
1766 		return (B_TRUE);
1767 	} else {
1768 		return (B_FALSE);
1769 	}
1770 }
1771 
1772 
1773 /*
1774  * update_selected - If any of the Partner parameters has
1775  *			changed from a previous value, then
1776  *			unselect the link from the aggregator.
1777  */
1778 static boolean_t
1779 update_selected(aggr_port_t *portp, lacp_t *lacp)
1780 {
1781 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1782 
1783 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
1784 
1785 	if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) ||
1786 	    (pl->PartnerOperPortPriority !=
1787 	    ntohs(lacp->actor_info.port_priority)) ||
1788 	    (ether_cmp(&pl->PartnerOperSystem,
1789 	    &lacp->actor_info.system_id) != 0) ||
1790 	    (pl->PartnerOperSysPriority !=
1791 	    ntohs(lacp->actor_info.system_priority)) ||
1792 	    (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) ||
1793 	    (pl->PartnerOperPortState.bit.aggregation !=
1794 	    lacp->actor_info.state.bit.aggregation)) {
1795 		AGGR_LACP_DBG(("update_selected:(%d): "
1796 		    "selected  %d-->%d\n", portp->lp_linkid, pl->sm.selected,
1797 		    AGGR_UNSELECTED));
1798 
1799 		lacp_port_unselect(portp);
1800 		return (B_TRUE);
1801 	} else {
1802 		return (B_FALSE);
1803 	}
1804 }
1805 
1806 
1807 /*
1808  * update_default_selected - If any of the operational Partner parameters
1809  *			is different than that of the administrative values
1810  *			then unselect the link from the aggregator.
1811  */
1812 static void
1813 update_default_selected(aggr_port_t *portp)
1814 {
1815 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1816 
1817 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
1818 
1819 	if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) ||
1820 	    (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) ||
1821 	    (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) ||
1822 	    (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) ||
1823 	    (pl->PartnerOperKey != pl->PartnerAdminKey) ||
1824 	    (pl->PartnerOperPortState.bit.aggregation !=
1825 	    pl->PartnerAdminPortState.bit.aggregation)) {
1826 
1827 		AGGR_LACP_DBG(("update_default_selected:(%d): "
1828 		    "selected  %d-->%d\n", portp->lp_linkid,
1829 		    pl->sm.selected, AGGR_UNSELECTED));
1830 
1831 		lacp_port_unselect(portp);
1832 	}
1833 }
1834 
1835 
1836 /*
1837  * update_NTT - If any of the Partner values in the received LACPDU
1838  *			are different than that of the Actor operational
1839  *			values then set NTT to true.
1840  */
1841 static void
1842 update_NTT(aggr_port_t *portp, lacp_t *lacp)
1843 {
1844 	aggr_grp_t *aggrp = portp->lp_grp;
1845 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1846 
1847 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
1848 
1849 	if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) ||
1850 	    (pl->ActorPortPriority !=
1851 	    ntohs(lacp->partner_info.port_priority)) ||
1852 	    (ether_cmp(&aggrp->lg_addr,
1853 	    &lacp->partner_info.system_id) != 0) ||
1854 	    (aggrp->aggr.ActorSystemPriority !=
1855 	    ntohs(lacp->partner_info.system_priority)) ||
1856 	    (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) ||
1857 	    (pl->ActorOperPortState.bit.activity !=
1858 	    lacp->partner_info.state.bit.activity) ||
1859 	    (pl->ActorOperPortState.bit.timeout !=
1860 	    lacp->partner_info.state.bit.timeout) ||
1861 	    (pl->ActorOperPortState.bit.sync !=
1862 	    lacp->partner_info.state.bit.sync) ||
1863 	    (pl->ActorOperPortState.bit.aggregation !=
1864 	    lacp->partner_info.state.bit.aggregation)) {
1865 
1866 		AGGR_LACP_DBG(("update_NTT:(%d): NTT  %d-->%d\n",
1867 		    portp->lp_linkid, pl->NTT, B_TRUE));
1868 
1869 		pl->NTT = B_TRUE;
1870 	}
1871 }
1872 
1873 /*
1874  * lacp_receive_sm - LACP receive state machine
1875  *
1876  * parameters:
1877  *      - portp - instance this applies to.
1878  *      - lacp - pointer in the case of a received LACPDU.
1879  *                This value is NULL if there is no LACPDU.
1880  *
1881  * invoked:
1882  *    - when initialization is needed
1883  *    - upon reception of an LACPDU. This is the common case.
1884  *    - every time the current_while_timer pops
1885  */
1886 static void
1887 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp)
1888 {
1889 	boolean_t sync_updated, selected_updated, save_activity;
1890 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1891 	lacp_receive_state_t oldstate = pl->sm.receive_state;
1892 
1893 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
1894 
1895 	/* LACP_OFF state not in specification so check here.  */
1896 	if (!pl->sm.lacp_on)
1897 		return;
1898 
1899 	/* figure next state */
1900 	if (pl->sm.begin || pl->sm.port_moved) {
1901 		pl->sm.receive_state = LACP_INITIALIZE;
1902 	} else if (!pl->sm.port_enabled) {	/* DL_NOTE_LINK_DOWN */
1903 		pl->sm.receive_state = LACP_PORT_DISABLED;
1904 	} else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */
1905 		pl->sm.receive_state =
1906 		    (pl->sm.receive_state == LACP_PORT_DISABLED) ?
1907 		    LACP_DISABLED : LACP_PORT_DISABLED;
1908 	} else if (lacp != NULL) {
1909 		if ((pl->sm.receive_state == LACP_EXPIRED) ||
1910 		    (pl->sm.receive_state == LACP_DEFAULTED)) {
1911 			pl->sm.receive_state = LACP_CURRENT;
1912 		}
1913 	} else if ((pl->sm.receive_state == LACP_CURRENT) &&
1914 	    (pl->current_while_timer.id == 0)) {
1915 		pl->sm.receive_state = LACP_EXPIRED;
1916 	} else if ((pl->sm.receive_state == LACP_EXPIRED) &&
1917 	    (pl->current_while_timer.id == 0)) {
1918 		pl->sm.receive_state = LACP_DEFAULTED;
1919 	}
1920 
1921 
1922 	if (!((lacp && (oldstate == LACP_CURRENT) &&
1923 	    (pl->sm.receive_state == LACP_CURRENT)))) {
1924 		AGGR_LACP_DBG(("lacp_receive_sm(%d):%s--->%s\n",
1925 		    portp->lp_linkid, lacp_receive_str[oldstate],
1926 		    lacp_receive_str[pl->sm.receive_state]));
1927 	}
1928 
1929 	switch (pl->sm.receive_state) {
1930 	case LACP_INITIALIZE:
1931 		lacp_port_unselect(portp);
1932 		record_Default(portp);
1933 		pl->ActorOperPortState.bit.expired = B_FALSE;
1934 		pl->sm.port_moved = B_FALSE;
1935 		pl->sm.receive_state = LACP_PORT_DISABLED;
1936 		pl->sm.begin = B_FALSE;
1937 		lacp_receive_sm(portp, NULL);
1938 		break;
1939 
1940 	case LACP_PORT_DISABLED:
1941 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1942 		/*
1943 		 * Stop current_while_timer in case
1944 		 * we got here from link down
1945 		 */
1946 		stop_current_while_timer(portp);
1947 
1948 		if (pl->sm.port_enabled && !pl->sm.lacp_enabled) {
1949 			pl->sm.receive_state = LACP_DISABLED;
1950 			lacp_receive_sm(portp, lacp);
1951 			/* We goto LACP_DISABLED state */
1952 			break;
1953 		} else if (pl->sm.port_enabled && pl->sm.lacp_enabled) {
1954 			pl->sm.receive_state = LACP_EXPIRED;
1955 			/*
1956 			 * FALL THROUGH TO LACP_EXPIRED CASE:
1957 			 * We have no way of knowing if we get into
1958 			 * lacp_receive_sm() from a  current_while_timer
1959 			 * expiring as it has never been kicked off yet!
1960 			 */
1961 		} else {
1962 			/* We stay in LACP_PORT_DISABLED state */
1963 			break;
1964 		}
1965 		/* LACP_PORT_DISABLED -> LACP_EXPIRED */
1966 		/* FALLTHROUGH */
1967 
1968 	case LACP_EXPIRED:
1969 		/*
1970 		 * Arrives here from LACP_PORT_DISABLED state as well as
1971 		 * as well as current_while_timer expiring.
1972 		 */
1973 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1974 		pl->PartnerOperPortState.bit.timeout = B_TRUE;
1975 
1976 		pl->ActorOperPortState.bit.expired = B_TRUE;
1977 		start_current_while_timer(portp, SHORT_TIMEOUT_TIME);
1978 		lacp_periodic_sm(portp);
1979 		break;
1980 
1981 	case LACP_DISABLED:
1982 		/*
1983 		 * This is the normal state for recv_sm when LACP_OFF
1984 		 * is set or the NIC is in half duplex mode.
1985 		 */
1986 		lacp_port_unselect(portp);
1987 		record_Default(portp);
1988 		pl->PartnerOperPortState.bit.aggregation = B_FALSE;
1989 		pl->ActorOperPortState.bit.expired = B_FALSE;
1990 		break;
1991 
1992 	case LACP_DEFAULTED:
1993 		/*
1994 		 * Current_while_timer expired a second time.
1995 		 */
1996 		update_default_selected(portp);
1997 		record_Default(portp);	/* overwrite Partner Oper val */
1998 		pl->ActorOperPortState.bit.expired = B_FALSE;
1999 		pl->PartnerOperPortState.bit.sync = B_TRUE;
2000 
2001 		lacp_selection_logic(portp);
2002 		lacp_mux_sm(portp);
2003 		break;
2004 
2005 	case LACP_CURRENT:
2006 		/*
2007 		 * Reception of LACPDU
2008 		 */
2009 
2010 		if (!lacp) /* no LACPDU so current_while_timer popped */
2011 			break;
2012 
2013 		AGGR_LACP_DBG(("lacp_receive_sm: (%d): LACPDU received:\n",
2014 		    portp->lp_linkid));
2015 
2016 		/*
2017 		 * Validate Actor_Information_Length,
2018 		 * Partner_Information_Length, Collector_Information_Length,
2019 		 * and Terminator_Length fields.
2020 		 */
2021 		if (!valid_lacp_pdu(portp, lacp)) {
2022 			AGGR_LACP_DBG(("lacp_receive_sm (%d): "
2023 			    "Invalid LACPDU received\n",
2024 			    portp->lp_linkid));
2025 			break;
2026 		}
2027 
2028 		save_activity = pl->PartnerOperPortState.bit.activity;
2029 		selected_updated = update_selected(portp, lacp);
2030 		update_NTT(portp, lacp);
2031 		sync_updated = record_PDU(portp, lacp);
2032 
2033 		pl->ActorOperPortState.bit.expired = B_FALSE;
2034 
2035 		if (selected_updated) {
2036 			lacp_selection_logic(portp);
2037 			lacp_mux_sm(portp);
2038 		} else if (sync_updated) {
2039 			lacp_mux_sm(portp);
2040 		}
2041 
2042 		/*
2043 		 * If the periodic timer value bit has been modified
2044 		 * or the partner activity bit has been changed then
2045 		 * we need to respectively:
2046 		 *  - restart the timer with the proper timeout value.
2047 		 *  - possibly enable/disable transmission of LACPDUs.
2048 		 */
2049 		if ((pl->PartnerOperPortState.bit.timeout &&
2050 		    (pl->periodic_timer.val != FAST_PERIODIC_TIME)) ||
2051 		    (!pl->PartnerOperPortState.bit.timeout &&
2052 		    (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) ||
2053 		    (pl->PartnerOperPortState.bit.activity !=
2054 		    save_activity)) {
2055 			lacp_periodic_sm(portp);
2056 		}
2057 
2058 		stop_current_while_timer(portp);
2059 		/* Check if we need to transmit an LACPDU */
2060 		if (pl->NTT)
2061 			lacp_xmit_sm(portp);
2062 		start_current_while_timer(portp, 0);
2063 
2064 		break;
2065 	}
2066 }
2067 
2068 static void
2069 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable)
2070 {
2071 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
2072 	rw_enter(&portp->lp_lock, RW_WRITER);
2073 	aggr_set_coll_dist_locked(portp, enable);
2074 	rw_exit(&portp->lp_lock);
2075 }
2076 
2077 static void
2078 aggr_set_coll_dist_locked(aggr_port_t *portp, boolean_t enable)
2079 {
2080 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(portp->lp_grp));
2081 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
2082 
2083 	AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%d) %s\n",
2084 	    portp->lp_linkid, enable ? "ENABLED" : "DISABLED"));
2085 
2086 	if (!enable) {
2087 		/*
2088 		 * Turn OFF Collector_Distributor.
2089 		 */
2090 		portp->lp_collector_enabled = B_FALSE;
2091 		aggr_send_port_disable(portp);
2092 		return;
2093 	}
2094 
2095 	/*
2096 	 * Turn ON Collector_Distributor.
2097 	 */
2098 
2099 	if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on &&
2100 	    (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) {
2101 		/* Port is compatible and can be aggregated */
2102 		portp->lp_collector_enabled = B_TRUE;
2103 		aggr_send_port_enable(portp);
2104 	}
2105 }
2106 
2107 /*
2108  * Process a received Marker or LACPDU.
2109  */
2110 void
2111 aggr_lacp_rx(aggr_port_t *portp, mblk_t *dmp)
2112 {
2113 	lacp_t	*lacp;
2114 
2115 	dmp->b_rptr += sizeof (struct ether_header);
2116 
2117 	if (MBLKL(dmp) < sizeof (lacp_t)) {
2118 		freemsg(dmp);
2119 		return;
2120 	}
2121 
2122 	lacp = (lacp_t *)dmp->b_rptr;
2123 
2124 	switch (lacp->subtype) {
2125 	case LACP_SUBTYPE:
2126 		AGGR_LACP_DBG(("aggr_lacp_rx:(%d): LACPDU received.\n",
2127 		    portp->lp_linkid));
2128 
2129 		AGGR_LACP_LOCK_WRITER(portp->lp_grp);
2130 		if (!portp->lp_lacp.sm.lacp_on) {
2131 			AGGR_LACP_UNLOCK(portp->lp_grp);
2132 			break;
2133 		}
2134 		lacp_receive_sm(portp, lacp);
2135 		AGGR_LACP_UNLOCK(portp->lp_grp);
2136 		break;
2137 
2138 	case MARKER_SUBTYPE:
2139 		AGGR_LACP_DBG(("aggr_lacp_rx:(%d): Marker Packet received.\n",
2140 		    portp->lp_linkid));
2141 
2142 		(void) receive_marker_pdu(portp, dmp);
2143 		break;
2144 
2145 	default:
2146 		AGGR_LACP_DBG(("aggr_lacp_rx: (%d): "
2147 		    "Unknown Slow Protocol type %d\n",
2148 		    portp->lp_linkid, lacp->subtype));
2149 		break;
2150 	}
2151 
2152 	freemsg(dmp);
2153 }
2154