xref: /illumos-gate/usr/src/uts/common/io/aggr/aggr_lacp.c (revision 09295472)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * IEEE 802.3ad Link Aggregation - LACP & Marker Protocol processing.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/sysmacros.h>
34 #include <sys/conf.h>
35 #include <sys/cmn_err.h>
36 #include <sys/list.h>
37 #include <sys/ksynch.h>
38 #include <sys/kmem.h>
39 #include <sys/stream.h>
40 #include <sys/modctl.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/atomic.h>
44 #include <sys/stat.h>
45 #include <sys/byteorder.h>
46 #include <sys/strsun.h>
47 #include <sys/isa_defs.h>
48 
49 #include <sys/aggr.h>
50 #include <sys/aggr_impl.h>
51 
52 static struct ether_addr	etherzeroaddr = {
53 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00
54 };
55 
56 /*
57  * Slow_Protocol_Multicast address, as per IEEE 802.3ad spec.
58  */
59 static struct ether_addr   slow_multicast_addr = {
60 	0x01, 0x80, 0xc2, 0x00, 0x00, 0x02
61 };
62 
63 #ifdef DEBUG
64 /* LACP state machine debugging support */
65 static uint32_t aggr_lacp_debug = 0;
66 #define	AGGR_LACP_DBG(x)	if (aggr_lacp_debug) { (void) printf x; }
67 #else
68 #define	AGGR_LACP_DBG(x)	{}
69 #endif /* DEBUG */
70 
71 #define	NSECS_PER_SEC   1000000000ll
72 
73 /* used by lacp_misconfig_walker() */
74 typedef struct lacp_misconfig_check_state_s {
75 	aggr_port_t *cs_portp;
76 	boolean_t cs_found;
77 } lacp_misconfig_check_state_t;
78 
79 static const char *lacp_receive_str[] = LACP_RECEIVE_STATE_STRINGS;
80 static const char *lacp_periodic_str[] = LACP_PERIODIC_STRINGS;
81 static const char *lacp_mux_str[] = LACP_MUX_STRINGS;
82 
83 static uint16_t lacp_port_priority = 0x1000;
84 static uint16_t lacp_system_priority = 0x1000;
85 
86 /*
87  * Maintains a list of all ports in ATTACHED state. This information
88  * is used to detect misconfiguration.
89  */
90 typedef struct lacp_sel_ports {
91 	uint16_t sp_key;
92 	char sp_devname[MAXNAMELEN + 1];
93 	uint_t sp_port;
94 	struct ether_addr sp_partner_system;
95 	uint32_t sp_partner_key;
96 	struct lacp_sel_ports *sp_next;
97 } lacp_sel_ports_t;
98 
99 static lacp_sel_ports_t *sel_ports = NULL;
100 static kmutex_t lacp_sel_lock;
101 
102 static void periodic_timer_pop_locked(aggr_port_t *);
103 static void periodic_timer_pop(void *);
104 static void lacp_xmit_sm(aggr_port_t *);
105 static void lacp_periodic_sm(aggr_port_t *);
106 static void fill_lacp_pdu(aggr_port_t *, lacp_t *);
107 static void fill_lacp_ether(aggr_port_t *, struct ether_header *);
108 static void lacp_on(aggr_port_t *);
109 static void lacp_off(aggr_port_t *);
110 static boolean_t valid_lacp_pdu(aggr_port_t *, lacp_t *);
111 static void lacp_receive_sm(aggr_port_t *, lacp_t *);
112 static void aggr_set_coll_dist(aggr_port_t *, boolean_t);
113 static void aggr_set_coll_dist_locked(aggr_port_t *, boolean_t);
114 static void start_wait_while_timer(aggr_port_t *);
115 static void stop_wait_while_timer(aggr_port_t *);
116 static void lacp_reset_port(aggr_port_t *);
117 static void stop_current_while_timer(aggr_port_t *);
118 static void current_while_timer_pop(void *);
119 static void update_default_selected(aggr_port_t *);
120 static boolean_t update_selected(aggr_port_t *, lacp_t *);
121 static boolean_t lacp_sel_ports_add(aggr_port_t *);
122 static void lacp_sel_ports_del(aggr_port_t *);
123 
124 void
125 aggr_lacp_init(void)
126 {
127 	mutex_init(&lacp_sel_lock, NULL, MUTEX_DEFAULT, NULL);
128 }
129 
130 void
131 aggr_lacp_fini(void)
132 {
133 	mutex_destroy(&lacp_sel_lock);
134 }
135 
136 static int
137 inst_num(char *devname)
138 {
139 	int inst = 0;
140 	int fact = 1;
141 	char *p = &devname[strlen(devname)-1];
142 
143 	while (*p >= '0' && *p <= '9' && p >= devname) {
144 		inst += (*p - '0') * fact;
145 		fact *= 10;
146 		p--;
147 	}
148 
149 	return (inst);
150 }
151 
152 /*
153  * Set the port LACP state to SELECTED. Returns B_FALSE if the operation
154  * could not be performed due to a memory allocation error, B_TRUE otherwise.
155  */
156 static boolean_t
157 lacp_port_select(aggr_port_t *portp)
158 {
159 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
160 
161 	if (!lacp_sel_ports_add(portp))
162 		return (B_FALSE);
163 	portp->lp_lacp.sm.selected = AGGR_SELECTED;
164 	return (B_TRUE);
165 }
166 
167 /*
168  * Set the port LACP state to UNSELECTED.
169  */
170 static void
171 lacp_port_unselect(aggr_port_t *portp)
172 {
173 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
174 
175 	lacp_sel_ports_del(portp);
176 	portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
177 }
178 
179 /*
180  * Initialize group specific LACP state and parameters.
181  */
182 void
183 aggr_lacp_init_grp(aggr_grp_t *aggrp)
184 {
185 	aggrp->aggr.PeriodicTimer = AGGR_LACP_TIMER_SHORT;
186 	aggrp->aggr.ActorSystemPriority = (uint16_t)lacp_system_priority;
187 	aggrp->aggr.CollectorMaxDelay = 10;
188 	aggrp->lg_lacp_mode = AGGR_LACP_OFF;
189 	aggrp->aggr.ready = B_FALSE;
190 }
191 
192 /*
193  * Complete LACP info initialization at port creation time.
194  */
195 void
196 aggr_lacp_init_port(aggr_port_t *portp)
197 {
198 	aggr_grp_t *aggrp = portp->lp_grp;
199 	aggr_lacp_port_t *pl = &portp->lp_lacp;
200 	uint16_t offset;
201 	uint32_t instance;
202 
203 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
204 	ASSERT(RW_LOCK_HELD(&aggrp->lg_lock));
205 	ASSERT(RW_LOCK_HELD(&portp->lp_lock));
206 
207 	/*
208 	 * Port numbers must be unique. For now, we encode the first two
209 	 * characters into the top byte of the port number. This will work
210 	 * with multiple types of NICs provided that the first two
211 	 * characters are unique.
212 	 */
213 	offset = ((portp->lp_devname[0] + portp->lp_devname[1]) << 8);
214 	instance = inst_num(portp->lp_devname);
215 	/* actor port # */
216 	pl->ActorPortNumber = offset + instance + portp->lp_port;
217 	AGGR_LACP_DBG(("aggr_lacp_init_port(%s/%d): "
218 	    "ActorPortNumber = 0x%x\n", portp->lp_devname,
219 	    portp->lp_port, pl->ActorPortNumber));
220 
221 	pl->ActorPortPriority = (uint16_t)lacp_port_priority;
222 	pl->ActorPortAggrId = 0;	/* aggregator id - not used */
223 	pl->NTT = B_FALSE;			/* need to transmit */
224 
225 	pl->ActorAdminPortKey = aggrp->lg_key;
226 	pl->ActorOperPortKey = pl->ActorAdminPortKey;
227 	AGGR_LACP_DBG(("aggr_lacp_init_port(%s/%d) "
228 	    "ActorAdminPortKey = 0x%x, ActorAdminPortKey = 0x%x\n",
229 	    portp->lp_devname, portp->lp_port, pl->ActorAdminPortKey,
230 	    pl->ActorOperPortKey));
231 
232 	/* Actor admin. port state */
233 	pl->ActorAdminPortState.bit.activity = B_FALSE;
234 	pl->ActorAdminPortState.bit.timeout = B_TRUE;
235 	pl->ActorAdminPortState.bit.aggregation = B_TRUE;
236 	pl->ActorAdminPortState.bit.sync = B_FALSE;
237 	pl->ActorAdminPortState.bit.collecting = B_FALSE;
238 	pl->ActorAdminPortState.bit.distributing = B_FALSE;
239 	pl->ActorAdminPortState.bit.defaulted = B_FALSE;
240 	pl->ActorAdminPortState.bit.expired = B_FALSE;
241 	pl->ActorOperPortState = pl->ActorAdminPortState;
242 
243 	/*
244 	 * Partner Administrative Information
245 	 * (All initialized to zero except for the following)
246 	 * Fast Timeouts.
247 	 */
248 	pl->PartnerAdminPortState.bit.timeout =
249 	    pl->PartnerOperPortState.bit.timeout = B_TRUE;
250 
251 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
252 
253 	/*
254 	 * State machine information.
255 	 */
256 	pl->sm.lacp_on = B_FALSE;		/* LACP Off default */
257 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
258 	pl->sm.lacp_enabled = B_FALSE;
259 	pl->sm.port_enabled = B_FALSE;		/* Link Down */
260 	pl->sm.actor_churn = B_FALSE;
261 	pl->sm.partner_churn = B_FALSE;
262 	pl->sm.ready_n = B_FALSE;
263 	pl->sm.port_moved = B_FALSE;
264 
265 	lacp_port_unselect(portp);
266 
267 	pl->sm.periodic_state = LACP_NO_PERIODIC;
268 	pl->sm.receive_state = LACP_INITIALIZE;
269 	pl->sm.mux_state = LACP_DETACHED;
270 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
271 
272 	/*
273 	 * Timer information.
274 	 */
275 	pl->current_while_timer.id = 0;
276 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
277 
278 	pl->periodic_timer.id = 0;
279 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
280 
281 	pl->wait_while_timer.id = 0;
282 	pl->wait_while_timer.val = AGGREGATE_WAIT_TIME;
283 }
284 
285 /*
286  * Port initialization when we need to
287  * turn LACP on/off, etc. Not everything is
288  * reset like in the above routine.
289  *		Do NOT modify things like link status.
290  */
291 static void
292 lacp_reset_port(aggr_port_t *portp)
293 {
294 	aggr_lacp_port_t *pl = &portp->lp_lacp;
295 
296 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
297 
298 	pl->NTT = B_FALSE;			/* need to transmit */
299 
300 	/* reset operational port state */
301 	pl->ActorOperPortState.bit.timeout =
302 		pl->ActorAdminPortState.bit.timeout;
303 
304 	pl->ActorOperPortState.bit.sync = B_FALSE;
305 	pl->ActorOperPortState.bit.collecting = B_FALSE;
306 	pl->ActorOperPortState.bit.distributing = B_FALSE;
307 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
308 	pl->ActorOperPortState.bit.expired = B_FALSE;
309 
310 	pl->PartnerOperPortState.bit.timeout = B_TRUE;	/* fast t/o */
311 	pl->PartnerCollectorMaxDelay = 0; /* tens of microseconds */
312 
313 	/*
314 	 * State machine information.
315 	 */
316 	pl->sm.begin = B_TRUE;		/* Prevents transmissions */
317 	pl->sm.actor_churn = B_FALSE;
318 	pl->sm.partner_churn = B_FALSE;
319 	pl->sm.ready_n = B_FALSE;
320 
321 	lacp_port_unselect(portp);
322 
323 	pl->sm.periodic_state = LACP_NO_PERIODIC;
324 	pl->sm.receive_state = LACP_INITIALIZE;
325 	pl->sm.mux_state = LACP_DETACHED;
326 	pl->sm.churn_state = LACP_NO_ACTOR_CHURN;
327 
328 	/*
329 	 * Timer information.
330 	 */
331 	pl->current_while_timer.val = SHORT_TIMEOUT_TIME;
332 	pl->periodic_timer.val = FAST_PERIODIC_TIME;
333 }
334 
335 static void
336 aggr_lacp_mcast_on(aggr_port_t *port)
337 {
338 	ASSERT(AGGR_LACP_LOCK_HELD(port->lp_grp));
339 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
340 
341 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
342 		return;
343 
344 	(void) aggr_port_multicst(port, B_TRUE,
345 	    (uchar_t *)&slow_multicast_addr);
346 }
347 
348 static void
349 aggr_lacp_mcast_off(aggr_port_t *port)
350 {
351 	ASSERT(AGGR_LACP_LOCK_HELD(port->lp_grp));
352 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
353 
354 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
355 		return;
356 
357 	(void) aggr_port_multicst(port, B_FALSE,
358 	    (uchar_t *)&slow_multicast_addr);
359 }
360 
361 static void
362 start_periodic_timer(aggr_port_t *portp)
363 {
364 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
365 
366 	if (portp->lp_lacp.periodic_timer.id == 0) {
367 		portp->lp_lacp.periodic_timer.id =
368 		    timeout(periodic_timer_pop, portp,
369 		    drv_usectohz(1000000 * portp->lp_lacp.periodic_timer.val));
370 	}
371 }
372 
373 static void
374 stop_periodic_timer(aggr_port_t *portp)
375 {
376 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
377 
378 	if (portp->lp_lacp.periodic_timer.id != 0) {
379 		AGGR_LACP_UNLOCK(portp->lp_grp);
380 		(void) untimeout(portp->lp_lacp.periodic_timer.id);
381 		AGGR_LACP_LOCK(portp->lp_grp);
382 		portp->lp_lacp.periodic_timer.id = 0;
383 	}
384 }
385 
386 /*
387  * When the timer pops, we arrive here to
388  * clear out LACPDU count as well as transmit an
389  * LACPDU. We then set the periodic state and let
390  * the periodic state machine restart the timer.
391  */
392 
393 static void
394 periodic_timer_pop_locked(aggr_port_t *portp)
395 {
396 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
397 
398 	portp->lp_lacp.periodic_timer.id = NULL;
399 	portp->lp_lacp_stats.LACPDUsTx = 0;
400 
401 	/* current timestamp */
402 	portp->lp_lacp.time = gethrtime();
403 	portp->lp_lacp.NTT = B_TRUE;
404 	lacp_xmit_sm(portp);
405 
406 	/*
407 	 * Set Periodic State machine state based on the
408 	 * value of the Partner Operation Port State timeout
409 	 * bit.
410 	 */
411 	if (portp->lp_lacp.PartnerOperPortState.bit.timeout) {
412 		portp->lp_lacp.periodic_timer.val = FAST_PERIODIC_TIME;
413 		portp->lp_lacp.sm.periodic_state = LACP_FAST_PERIODIC;
414 	} else {
415 		portp->lp_lacp.periodic_timer.val = SLOW_PERIODIC_TIME;
416 		portp->lp_lacp.sm.periodic_state = LACP_SLOW_PERIODIC;
417 	}
418 
419 	lacp_periodic_sm(portp);
420 }
421 
422 static void
423 periodic_timer_pop(void *data)
424 {
425 	aggr_port_t *portp = data;
426 
427 	if (portp->lp_closing)
428 		return;
429 
430 	AGGR_LACP_LOCK(portp->lp_grp);
431 	periodic_timer_pop_locked(portp);
432 	AGGR_LACP_UNLOCK(portp->lp_grp);
433 }
434 
435 /*
436  * Invoked from:
437  *	- startup upon aggregation
438  *	- when the periodic timer pops
439  *	- when the periodic timer value is changed
440  *	- when the port is attached or detached
441  *	- when LACP mode is changed.
442  */
443 static void
444 lacp_periodic_sm(aggr_port_t *portp)
445 {
446 	lacp_periodic_state_t oldstate = portp->lp_lacp.sm.periodic_state;
447 	aggr_lacp_port_t *pl = &portp->lp_lacp;
448 
449 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
450 
451 	/* LACP_OFF state not in specification so check here.  */
452 	if (!pl->sm.lacp_on) {
453 		/* Stop timer whether it is running or not */
454 		stop_periodic_timer(portp);
455 		pl->sm.periodic_state = LACP_NO_PERIODIC;
456 		pl->NTT = B_FALSE;
457 		AGGR_LACP_DBG(("lacp_periodic_sm(%s/%d):NO LACP "
458 		    "%s--->%s\n", portp->lp_devname, portp->lp_port,
459 		    lacp_periodic_str[oldstate],
460 		    lacp_periodic_str[pl->sm.periodic_state]));
461 		return;
462 	}
463 
464 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
465 	    !pl->sm.port_enabled ||
466 	    !pl->ActorOperPortState.bit.activity &&
467 	    !pl->PartnerOperPortState.bit.activity) {
468 
469 		/* Stop timer whether it is running or not */
470 		stop_periodic_timer(portp);
471 		pl->sm.periodic_state = LACP_NO_PERIODIC;
472 		pl->NTT = B_FALSE;
473 		AGGR_LACP_DBG(("lacp_periodic_sm(%s/%d):STOP %s--->%s\n",
474 		    portp->lp_devname, portp->lp_port,
475 		    lacp_periodic_str[oldstate],
476 		    lacp_periodic_str[pl->sm.periodic_state]));
477 		return;
478 	}
479 
480 	/*
481 	 * Startup with FAST_PERIODIC_TIME if no previous LACPDU
482 	 * has been received. Then after we timeout, then it is
483 	 * possible to go to SLOW_PERIODIC_TIME.
484 	 */
485 	if (pl->sm.periodic_state == LACP_NO_PERIODIC) {
486 		pl->periodic_timer.val = FAST_PERIODIC_TIME;
487 		pl->sm.periodic_state = LACP_FAST_PERIODIC;
488 	} else if ((pl->sm.periodic_state == LACP_SLOW_PERIODIC) &&
489 	    pl->PartnerOperPortState.bit.timeout) {
490 		/*
491 		 * If we receive a bit indicating we are going to
492 		 * fast periodic from slow periodic, stop the timer
493 		 * and let the periodic_timer_pop routine deal
494 		 * with reseting the periodic state and transmitting
495 		 * a LACPDU.
496 		 */
497 		stop_periodic_timer(portp);
498 		periodic_timer_pop_locked(portp);
499 	}
500 
501 	/* Rearm timer with value provided by partner */
502 	start_periodic_timer(portp);
503 }
504 
505 /*
506  * This routine transmits an LACPDU if lacp_enabled
507  * is TRUE and if NTT is set.
508  */
509 static void
510 lacp_xmit_sm(aggr_port_t *portp)
511 {
512 	aggr_lacp_port_t *pl = &portp->lp_lacp;
513 	size_t	len;
514 	mblk_t  *mp;
515 	hrtime_t now, elapsed;
516 	const mac_txinfo_t *mtp;
517 
518 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
519 
520 	/* LACP_OFF state not in specification so check here.  */
521 	if (!pl->sm.lacp_on || !pl->NTT || !portp->lp_started)
522 		return;
523 
524 	/*
525 	 * Do nothing if LACP has been turned off or if the
526 	 * periodic state machine is not enabled.
527 	 */
528 	if ((pl->sm.periodic_state == LACP_NO_PERIODIC) ||
529 	    !pl->sm.lacp_enabled || pl->sm.begin) {
530 		pl->NTT = B_FALSE;
531 		return;
532 	}
533 
534 	/*
535 	 * If we have sent 5 Slow packets in the last second, avoid
536 	 * sending any more here. No more than three LACPDUs may be transmitted
537 	 * in any Fast_Periodic_Time interval.
538 	 */
539 	if (portp->lp_lacp_stats.LACPDUsTx >= 3) {
540 		/*
541 		 * Grab the current time value and see if
542 		 * more than 1 second has passed. If so,
543 		 * reset the timestamp and clear the count.
544 		 */
545 		now = gethrtime();
546 		elapsed = now - pl->time;
547 		if (elapsed > NSECS_PER_SEC) {
548 			portp->lp_lacp_stats.LACPDUsTx = 0;
549 			pl->time = now;
550 		} else {
551 			return;
552 		}
553 	}
554 
555 	len = sizeof (lacp_t) + sizeof (struct ether_header);
556 	mp = allocb(len, BPRI_MED);
557 	if (mp == NULL)
558 		return;
559 
560 	mp->b_wptr = mp->b_rptr + len;
561 	bzero(mp->b_rptr, len);
562 
563 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
564 	fill_lacp_pdu(portp,
565 	    (lacp_t *)(mp->b_rptr + sizeof (struct ether_header)));
566 
567 	/*
568 	 * Store the transmit info pointer locally in case it changes between
569 	 * loading mt_fn and mt_arg.
570 	 */
571 	mtp = portp->lp_txinfo;
572 	mtp->mt_fn(mtp->mt_arg, mp);
573 
574 	pl->NTT = B_FALSE;
575 	portp->lp_lacp_stats.LACPDUsTx++;
576 }
577 
578 /*
579  * Initialize the ethernet header of a LACP packet sent from the specified
580  * port.
581  */
582 static void
583 fill_lacp_ether(aggr_port_t *port, struct ether_header *ether)
584 {
585 	bcopy(port->lp_addr, (uint8_t *)&(ether->ether_shost), ETHERADDRL);
586 	bcopy(&slow_multicast_addr, (uint8_t *)&(ether->ether_dhost),
587 	    ETHERADDRL);
588 	ether->ether_type = htons(ETHERTYPE_SLOW);
589 }
590 
591 static void
592 fill_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
593 {
594 	aggr_lacp_port_t *pl = &portp->lp_lacp;
595 	aggr_grp_t *aggrp = portp->lp_grp;
596 
597 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
598 
599 	lacp->subtype = LACP_SUBTYPE;
600 	lacp->version = LACP_VERSION;
601 
602 	rw_enter(&aggrp->lg_lock, RW_READER);
603 	rw_enter(&portp->lp_lock, RW_READER);
604 
605 	/*
606 	 * Actor Information
607 	 */
608 	lacp->actor_info.tlv_type = ACTOR_TLV;
609 	lacp->actor_info.information_len = sizeof (link_info_t);
610 	lacp->actor_info.system_priority =
611 	    htons(aggrp->aggr.ActorSystemPriority);
612 	bcopy(aggrp->lg_addr, (uchar_t *)&lacp->actor_info.system_id,
613 	    ETHERADDRL);
614 	lacp->actor_info.key = htons(pl->ActorOperPortKey);
615 	lacp->actor_info.port_priority = htons(pl->ActorPortPriority);
616 	lacp->actor_info.port = htons(pl->ActorPortNumber);
617 	lacp->actor_info.state.state = pl->ActorOperPortState.state;
618 
619 	/*
620 	 * Partner Information
621 	 */
622 	lacp->partner_info.tlv_type = PARTNER_TLV;
623 	lacp->partner_info.information_len = sizeof (link_info_t);
624 	lacp->partner_info.system_priority =
625 	    htons(pl->PartnerOperSysPriority);
626 	lacp->partner_info.system_id = pl->PartnerOperSystem;
627 	lacp->partner_info.key = htons(pl->PartnerOperKey);
628 	lacp->partner_info.port_priority =
629 	    htons(pl->PartnerOperPortPriority);
630 	lacp->partner_info.port = htons(pl->PartnerOperPortNum);
631 	lacp->partner_info.state.state = pl->PartnerOperPortState.state;
632 
633 	/* Collector Information */
634 	lacp->tlv_collector = COLLECTOR_TLV;
635 	lacp->collector_len = 0x10;
636 	lacp->collector_max_delay = htons(aggrp->aggr.CollectorMaxDelay);
637 
638 	/* Termination Information */
639 	lacp->tlv_terminator = TERMINATOR_TLV;
640 	lacp->terminator_len = 0x0;
641 
642 	rw_exit(&portp->lp_lock);
643 	rw_exit(&aggrp->lg_lock);
644 }
645 
646 /*
647  * lacp_mux_sm - LACP mux state machine
648  *		This state machine is invoked from:
649  *			- startup upon aggregation
650  *			- from the Selection logic
651  *			- when the wait_while_timer pops
652  *			- when the aggregation MAC address is changed
653  *			- when receiving DL_NOTE_LINK_UP/DOWN
654  *			- when receiving DL_NOTE_AGGR_AVAIL/UNAVAIL
655  *			- when LACP mode is changed.
656  *			- when a DL_NOTE_SPEED is received
657  */
658 static void
659 lacp_mux_sm(aggr_port_t *portp)
660 {
661 	aggr_grp_t *aggrp = portp->lp_grp;
662 	boolean_t NTT_updated = B_FALSE;
663 	aggr_lacp_port_t *pl = &portp->lp_lacp;
664 	lacp_mux_state_t oldstate = pl->sm.mux_state;
665 
666 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
667 
668 	/* LACP_OFF state not in specification so check here.  */
669 	if (!pl->sm.lacp_on) {
670 		pl->sm.mux_state = LACP_DETACHED;
671 		pl->ActorOperPortState.bit.sync = B_FALSE;
672 
673 		if (pl->ActorOperPortState.bit.collecting ||
674 		    pl->ActorOperPortState.bit.distributing) {
675 			AGGR_LACP_DBG(("trunk link: (%s/%d): "
676 			    "Collector_Distributor Disabled.\n",
677 			    portp->lp_devname, portp->lp_port));
678 		}
679 
680 		pl->ActorOperPortState.bit.collecting =
681 		    pl->ActorOperPortState.bit.distributing = B_FALSE;
682 		return;
683 	}
684 
685 	if (pl->sm.begin || !pl->sm.lacp_enabled)
686 		pl->sm.mux_state = LACP_DETACHED;
687 
688 again:
689 	/* determine next state, or return if state unchanged */
690 	switch (pl->sm.mux_state) {
691 	case LACP_DETACHED:
692 		if (pl->sm.begin) {
693 			break;
694 		}
695 
696 		if ((pl->sm.selected == AGGR_SELECTED) ||
697 		    (pl->sm.selected == AGGR_STANDBY)) {
698 			pl->sm.mux_state = LACP_WAITING;
699 			break;
700 		}
701 		return;
702 
703 	case LACP_WAITING:
704 		if (pl->sm.selected == AGGR_UNSELECTED) {
705 			pl->sm.mux_state = LACP_DETACHED;
706 			break;
707 		}
708 
709 		if ((pl->sm.selected == AGGR_SELECTED) && aggrp->aggr.ready) {
710 			pl->sm.mux_state = LACP_ATTACHED;
711 			break;
712 		}
713 		return;
714 
715 	case LACP_ATTACHED:
716 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
717 		    (pl->sm.selected == AGGR_STANDBY)) {
718 			pl->sm.mux_state = LACP_DETACHED;
719 			break;
720 		}
721 
722 		if ((pl->sm.selected == AGGR_SELECTED) &&
723 		    pl->PartnerOperPortState.bit.sync) {
724 			pl->sm.mux_state = LACP_COLLECTING_DISTRIBUTING;
725 			break;
726 		}
727 		return;
728 
729 	case LACP_COLLECTING_DISTRIBUTING:
730 		if ((pl->sm.selected == AGGR_UNSELECTED) ||
731 		    (pl->sm.selected == AGGR_STANDBY) ||
732 		    !pl->PartnerOperPortState.bit.sync) {
733 			pl->sm.mux_state = LACP_ATTACHED;
734 			break;
735 		}
736 		return;
737 	}
738 
739 	AGGR_LACP_DBG(("lacp_mux_sm(%s/%d):%s--->%s\n",
740 	    portp->lp_devname, portp->lp_port, lacp_mux_str[oldstate],
741 	    lacp_mux_str[pl->sm.mux_state]));
742 
743 	/* perform actions on entering a new state */
744 	switch (pl->sm.mux_state) {
745 	case LACP_DETACHED:
746 		if (pl->ActorOperPortState.bit.collecting ||
747 		    pl->ActorOperPortState.bit.distributing) {
748 			AGGR_LACP_DBG(("trunk link: (%s/%d): "
749 			    "Collector_Distributor Disabled.\n",
750 			    portp->lp_devname, portp->lp_port));
751 		}
752 
753 		pl->ActorOperPortState.bit.sync =
754 		    pl->ActorOperPortState.bit.collecting = B_FALSE;
755 
756 		/* Turn OFF Collector_Distributor */
757 		aggr_set_coll_dist(portp, B_FALSE);
758 
759 		pl->ActorOperPortState.bit.distributing = B_FALSE;
760 		NTT_updated = B_TRUE;
761 		break;
762 
763 	case LACP_WAITING:
764 		start_wait_while_timer(portp);
765 		break;
766 
767 	case LACP_ATTACHED:
768 		if (pl->ActorOperPortState.bit.collecting ||
769 		    pl->ActorOperPortState.bit.distributing) {
770 			AGGR_LACP_DBG(("trunk link: (%s%d): "
771 			    "Collector_Distributor Disabled.\n",
772 			    portp->lp_devname, portp->lp_port));
773 		}
774 
775 		pl->ActorOperPortState.bit.sync = B_TRUE;
776 		pl->ActorOperPortState.bit.collecting = B_FALSE;
777 
778 		/* Turn OFF Collector_Distributor */
779 		aggr_set_coll_dist(portp, B_FALSE);
780 
781 		pl->ActorOperPortState.bit.distributing = B_FALSE;
782 		NTT_updated = B_TRUE;
783 		if (pl->PartnerOperPortState.bit.sync) {
784 			/*
785 			 * We had already received an updated sync from
786 			 * the partner. Attempt to transition to
787 			 * collecting/distributing now.
788 			 */
789 			goto again;
790 		}
791 		break;
792 
793 	case LACP_COLLECTING_DISTRIBUTING:
794 		if (!pl->ActorOperPortState.bit.collecting &&
795 		    !pl->ActorOperPortState.bit.distributing) {
796 			AGGR_LACP_DBG(("trunk link: (%s/%d): "
797 			    "Collector_Distributor Enabled.\n",
798 			    portp->lp_devname, portp->lp_port));
799 		}
800 		pl->ActorOperPortState.bit.distributing = B_TRUE;
801 
802 		/* Turn Collector_Distributor back ON */
803 		aggr_set_coll_dist(portp, B_TRUE);
804 
805 		pl->ActorOperPortState.bit.collecting = B_TRUE;
806 		NTT_updated = B_TRUE;
807 		break;
808 	}
809 
810 	/*
811 	 * If we updated the state of the NTT variable, then
812 	 * initiate a LACPDU transmission.
813 	 */
814 	if (NTT_updated) {
815 		pl->NTT = B_TRUE;
816 		lacp_xmit_sm(portp);
817 	}
818 } /* lacp_mux_sm */
819 
820 
821 static void
822 receive_marker_pdu(aggr_port_t *portp, mblk_t *mp)
823 {
824 	marker_pdu_t		*markerp = (marker_pdu_t *)mp->b_rptr;
825 	const mac_txinfo_t	*mtp;
826 
827 	AGGR_LACP_LOCK(portp->lp_grp);
828 
829 	AGGR_LACP_DBG(("trunk link: (%s/%d): MARKER PDU received:\n",
830 	    portp->lp_devname, portp->lp_port));
831 
832 	/* LACP_OFF state not in specification so check here.  */
833 	if (!portp->lp_lacp.sm.lacp_on)
834 		goto bail;
835 
836 	if (MBLKL(mp) < sizeof (marker_pdu_t))
837 		goto bail;
838 
839 	if (markerp->version != MARKER_VERSION) {
840 		AGGR_LACP_DBG(("trunk link (%s/%d): Malformed MARKER PDU: "
841 		    "version = %d does not match s/w version %d\n",
842 		    portp->lp_devname, portp->lp_port,
843 		    markerp->version, MARKER_VERSION));
844 		goto bail;
845 	}
846 
847 	if (markerp->tlv_marker == MARKER_RESPONSE_TLV) {
848 		/* We do not yet send out MARKER info PDUs */
849 		AGGR_LACP_DBG(("trunk link (%s/%d): MARKER RESPONSE PDU: "
850 		    " MARKER TLV = %d - We don't send out info type!\n",
851 		    portp->lp_devname, portp->lp_port,
852 		    markerp->tlv_marker));
853 		goto bail;
854 	}
855 
856 	if (markerp->tlv_marker != MARKER_INFO_TLV) {
857 		AGGR_LACP_DBG(("trunk link (%s/%d): Malformed MARKER PDU: "
858 		    " MARKER TLV = %d \n", portp->lp_devname, portp->lp_port,
859 		    markerp->tlv_marker));
860 		goto bail;
861 	}
862 
863 	if (markerp->marker_len != MARKER_INFO_RESPONSE_LENGTH) {
864 		AGGR_LACP_DBG(("trunk link (%s/%d): Malformed MARKER PDU: "
865 		    " MARKER length = %d \n", portp->lp_devname, portp->lp_port,
866 		    markerp->marker_len));
867 		goto bail;
868 	}
869 
870 	if (markerp->requestor_port != portp->lp_lacp.PartnerOperPortNum) {
871 		AGGR_LACP_DBG(("trunk link (%s/%d): MARKER PDU: "
872 		    " MARKER Port %d not equal to Partner port %d\n",
873 		    portp->lp_devname, portp->lp_port,
874 		    markerp->requestor_port,
875 		    portp->lp_lacp.PartnerOperPortNum));
876 		goto bail;
877 	}
878 
879 	if (ether_cmp(&markerp->system_id,
880 	    &portp->lp_lacp.PartnerOperSystem) != 0) {
881 		AGGR_LACP_DBG(("trunk link (%s/%d): MARKER PDU: "
882 		    " MARKER MAC not equal to Partner MAC\n",
883 		    portp->lp_devname, portp->lp_port));
884 		goto bail;
885 	}
886 
887 	/*
888 	 * Turn into Marker Response PDU
889 	 * and return mblk to sending system
890 	 */
891 	markerp->tlv_marker = MARKER_RESPONSE_TLV;
892 
893 	/* reuse the space that was used by received ethernet header */
894 	ASSERT(MBLKHEAD(mp) >= sizeof (struct ether_header));
895 	mp->b_rptr -= sizeof (struct ether_header);
896 	fill_lacp_ether(portp, (struct ether_header *)mp->b_rptr);
897 	AGGR_LACP_UNLOCK(portp->lp_grp);
898 
899 	/*
900 	 * Store the transmit info pointer locally in case it changes between
901 	 * loading mt_fn and mt_arg.
902 	 */
903 	mtp = portp->lp_txinfo;
904 	mtp->mt_fn(mtp->mt_arg, mp);
905 	return;
906 
907 bail:
908 	AGGR_LACP_UNLOCK(portp->lp_grp);
909 	freemsg(mp);
910 }
911 
912 
913 /*
914  * Update the LACP mode (off, active, or passive) of the specified group.
915  */
916 void
917 aggr_lacp_update_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode)
918 {
919 	aggr_lacp_mode_t old_mode = grp->lg_lacp_mode;
920 	aggr_port_t *port;
921 
922 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
923 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
924 
925 	if (mode == old_mode)
926 		return;
927 
928 	grp->lg_lacp_mode = mode;
929 
930 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
931 		port->lp_lacp.ActorAdminPortState.bit.activity =
932 		    port->lp_lacp.ActorOperPortState.bit.activity =
933 		    (mode == AGGR_LACP_ACTIVE);
934 
935 		if (old_mode == AGGR_LACP_OFF) {
936 			/* OFF -> {PASSIVE,ACTIVE} */
937 			/* turn OFF Collector_Distributor */
938 			aggr_set_coll_dist(port, B_FALSE);
939 			rw_enter(&port->lp_lock, RW_WRITER);
940 			lacp_on(port);
941 			if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
942 				aggr_lacp_port_attached(port);
943 			rw_exit(&port->lp_lock);
944 		} else if (mode == AGGR_LACP_OFF) {
945 			/* {PASSIVE,ACTIVE} -> OFF */
946 			rw_enter(&port->lp_lock, RW_WRITER);
947 			lacp_off(port);
948 			rw_exit(&port->lp_lock);
949 			if (!grp->lg_closing) {
950 				/* Turn ON Collector_Distributor */
951 				aggr_set_coll_dist(port, B_TRUE);
952 			}
953 		} else {
954 			/* PASSIVE->ACTIVE or ACTIVE->PASSIVE */
955 			port->lp_lacp.sm.begin = B_TRUE;
956 			lacp_mux_sm(port);
957 			lacp_periodic_sm(port);
958 
959 			/* kick off state machines */
960 			lacp_receive_sm(port, NULL);
961 			lacp_mux_sm(port);
962 		}
963 
964 		if (grp->lg_closing)
965 			break;
966 	}
967 }
968 
969 
970 /*
971  * Update the LACP timer (short or long) of the specified group.
972  */
973 void
974 aggr_lacp_update_timer(aggr_grp_t *grp, aggr_lacp_timer_t timer)
975 {
976 	aggr_port_t *port;
977 
978 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
979 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
980 
981 	if (timer == grp->aggr.PeriodicTimer)
982 		return;
983 
984 	grp->aggr.PeriodicTimer = timer;
985 
986 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
987 		port->lp_lacp.ActorAdminPortState.bit.timeout =
988 		    port->lp_lacp.ActorOperPortState.bit.timeout =
989 		    (timer == AGGR_LACP_TIMER_SHORT);
990 	}
991 }
992 
993 
994 /*
995  * Sets the initial LACP mode (off, active, passive) and LACP timer
996  * (short, long) of the specified group.
997  */
998 void
999 aggr_lacp_set_mode(aggr_grp_t *grp, aggr_lacp_mode_t mode,
1000     aggr_lacp_timer_t timer)
1001 {
1002 	aggr_port_t *port;
1003 
1004 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1005 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1006 
1007 	grp->lg_lacp_mode = mode;
1008 	grp->aggr.PeriodicTimer = timer;
1009 
1010 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1011 		port->lp_lacp.ActorAdminPortState.bit.activity =
1012 		    port->lp_lacp.ActorOperPortState.bit.activity =
1013 		    (mode == AGGR_LACP_ACTIVE);
1014 
1015 		port->lp_lacp.ActorAdminPortState.bit.timeout =
1016 			port->lp_lacp.ActorOperPortState.bit.timeout =
1017 			(timer == AGGR_LACP_TIMER_SHORT);
1018 
1019 		if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
1020 			/* Turn ON Collector_Distributor */
1021 			aggr_set_coll_dist(port, B_TRUE);
1022 		} else { /* LACP_ACTIVE/PASSIVE */
1023 			rw_enter(&port->lp_lock, RW_WRITER);
1024 			lacp_on(port);
1025 			rw_exit(&port->lp_lock);
1026 		}
1027 	}
1028 }
1029 
1030 /*
1031  * Verify that the Partner MAC and Key recorded by the specified
1032  * port are not found in other ports that are not part of our
1033  * aggregation. Returns B_TRUE if such a port is found, B_FALSE
1034  * otherwise.
1035  */
1036 static boolean_t
1037 lacp_misconfig_check(aggr_port_t *portp)
1038 {
1039 	aggr_grp_t *grp = portp->lp_grp;
1040 	lacp_sel_ports_t *cport;
1041 
1042 	mutex_enter(&lacp_sel_lock);
1043 
1044 	for (cport = sel_ports; cport != NULL; cport = cport->sp_next) {
1045 
1046 		/* skip entries of the group of the port being checked */
1047 		if (cport->sp_key == grp->lg_key)
1048 			continue;
1049 
1050 		if ((ether_cmp(&cport->sp_partner_system,
1051 		    &grp->aggr.PartnerSystem) == 0) &&
1052 		    (cport->sp_partner_key == grp->aggr.PartnerOperAggrKey)) {
1053 			char mac_str[ETHERADDRL*3];
1054 			struct ether_addr *mac = &cport->sp_partner_system;
1055 
1056 			/*
1057 			 * The Partner port information is already in use
1058 			 * by ports in another aggregation so disable this
1059 			 * port.
1060 			 */
1061 
1062 			(void) snprintf(mac_str, sizeof (mac_str),
1063 			    "%x:%x:%x:%x:%x:%x",
1064 			    mac->ether_addr_octet[0], mac->ether_addr_octet[1],
1065 			    mac->ether_addr_octet[2], mac->ether_addr_octet[3],
1066 			    mac->ether_addr_octet[4], mac->ether_addr_octet[5]);
1067 
1068 			portp->lp_lacp.sm.selected = AGGR_UNSELECTED;
1069 			cmn_err(CE_NOTE, "aggr key %d port %s/%d: Port Partner "
1070 			    "MAC %s and key %d in use on aggregation "
1071 			    "key %d port %s/%d\n", grp->lg_key,
1072 			    portp->lp_devname, portp->lp_port,
1073 			    mac_str, portp->lp_lacp.PartnerOperKey,
1074 			    cport->sp_key, cport->sp_devname, cport->sp_port);
1075 			break;
1076 		}
1077 	}
1078 
1079 	mutex_exit(&lacp_sel_lock);
1080 	return (cport != NULL);
1081 }
1082 
1083 /*
1084  * Remove the specified port from the list of selected ports.
1085  */
1086 static void
1087 lacp_sel_ports_del(aggr_port_t *portp)
1088 {
1089 	lacp_sel_ports_t *cport, **prev = NULL;
1090 
1091 	mutex_enter(&lacp_sel_lock);
1092 
1093 	prev = &sel_ports;
1094 	for (cport = sel_ports; cport != NULL; prev = &cport->sp_next,
1095 	    cport = cport->sp_next) {
1096 		if (bcmp(portp->lp_devname, cport->sp_devname,
1097 		    MAXNAMELEN + 1) == 0 &&
1098 		    (portp->lp_port == cport->sp_port)) {
1099 			break;
1100 		}
1101 	}
1102 
1103 	if (cport == NULL) {
1104 		mutex_exit(&lacp_sel_lock);
1105 		return;
1106 	}
1107 
1108 	*prev = cport->sp_next;
1109 	kmem_free(cport, sizeof (*cport));
1110 
1111 	mutex_exit(&lacp_sel_lock);
1112 }
1113 
1114 /*
1115  * Add the specified port to the list of selected ports. Returns B_FALSE
1116  * if the operation could not be performed due to an memory allocation
1117  * error.
1118  */
1119 static boolean_t
1120 lacp_sel_ports_add(aggr_port_t *portp)
1121 {
1122 	lacp_sel_ports_t *new_port;
1123 	lacp_sel_ports_t *cport, **last;
1124 
1125 	mutex_enter(&lacp_sel_lock);
1126 
1127 	/* check if port is already in the list */
1128 	last = &sel_ports;
1129 	for (cport = sel_ports; cport != NULL;
1130 	    last = &cport->sp_next, cport = cport->sp_next) {
1131 		if (bcmp(portp->lp_devname, cport->sp_devname,
1132 		    MAXNAMELEN + 1) == 0 && (portp->lp_port ==
1133 		    cport->sp_port)) {
1134 			ASSERT(cport->sp_partner_key ==
1135 			    portp->lp_lacp.PartnerOperKey);
1136 			ASSERT(ether_cmp(&cport->sp_partner_system,
1137 			    &portp->lp_lacp.PartnerOperSystem) == 0);
1138 
1139 			mutex_exit(&lacp_sel_lock);
1140 			return (B_TRUE);
1141 		}
1142 	}
1143 
1144 	/* create and initialize new entry */
1145 	new_port = kmem_zalloc(sizeof (lacp_sel_ports_t), KM_NOSLEEP);
1146 	if (new_port == NULL) {
1147 		mutex_exit(&lacp_sel_lock);
1148 		return (B_FALSE);
1149 	}
1150 
1151 	new_port->sp_key = portp->lp_grp->lg_key;
1152 	bcopy(&portp->lp_lacp.PartnerOperSystem,
1153 	    &new_port->sp_partner_system, sizeof (new_port->sp_partner_system));
1154 	new_port->sp_partner_key = portp->lp_lacp.PartnerOperKey;
1155 	bcopy(portp->lp_devname, new_port->sp_devname, MAXNAMELEN + 1);
1156 	new_port->sp_port = portp->lp_port;
1157 
1158 	*last = new_port;
1159 
1160 	mutex_exit(&lacp_sel_lock);
1161 	return (B_TRUE);
1162 }
1163 
1164 /*
1165  * lacp_selection_logic - LACP selection logic
1166  *		Sets the selected variable on a per port basis
1167  *		and sets Ready when all waiting ports are ready
1168  *		to go online.
1169  *
1170  * parameters:
1171  *      - portp - instance this applies to.
1172  *
1173  * invoked:
1174  *    - when initialization is needed
1175  *    - when UNSELECTED is set from the lacp_receive_sm() in LACP_CURRENT state
1176  *    - When the lacp_receive_sm goes to the LACP_DEFAULTED state
1177  *    - every time the wait_while_timer pops
1178  *    - everytime we turn LACP on/off
1179  */
1180 static void
1181 lacp_selection_logic(aggr_port_t *portp)
1182 {
1183 	aggr_port_t *tpp;
1184 	aggr_grp_t *aggrp = portp->lp_grp;
1185 	int ports_waiting;
1186 	boolean_t reset_mac = B_FALSE;
1187 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1188 
1189 	ASSERT(AGGR_LACP_LOCK_HELD(aggrp));
1190 
1191 	/* LACP_OFF state not in specification so check here.  */
1192 	if (!pl->sm.lacp_on) {
1193 		lacp_port_unselect(portp);
1194 		aggrp->aggr.ready = B_FALSE;
1195 		lacp_mux_sm(portp);
1196 		return;
1197 	}
1198 
1199 	if (pl->sm.begin || !pl->sm.lacp_enabled ||
1200 	    (portp->lp_state != AGGR_PORT_STATE_ATTACHED)) {
1201 
1202 		AGGR_LACP_DBG(("lacp_selection_logic:(%s/%d): "
1203 		    "selected %d-->%d (begin=%d, lacp_enabled = %d, "
1204 		    "lp_state=%d)\n", portp->lp_devname, portp->lp_port,
1205 		    pl->sm.selected, AGGR_UNSELECTED,
1206 		    pl->sm.begin, pl->sm.lacp_enabled,
1207 		    portp->lp_state));
1208 
1209 		lacp_port_unselect(portp);
1210 		aggrp->aggr.ready = B_FALSE;
1211 		lacp_mux_sm(portp);
1212 		return;
1213 	}
1214 
1215 	/*
1216 	 * If LACP is not enabled then selected is never set.
1217 	 */
1218 	if (!pl->sm.lacp_enabled) {
1219 		AGGR_LACP_DBG(("lacp_selection_logic:(%s/%d): "
1220 		    "selected %d-->%d\n", portp->lp_devname, portp->lp_port,
1221 		    pl->sm.selected, AGGR_UNSELECTED));
1222 
1223 		lacp_port_unselect(portp);
1224 		lacp_mux_sm(portp);
1225 		return;
1226 	}
1227 
1228 	/*
1229 	 * Check if the Partner MAC or Key are zero. If so, we have
1230 	 * not received any LACP info or it has expired and the
1231 	 * receive machine is in the LACP_DEFAULTED state.
1232 	 */
1233 	if (ether_cmp(&pl->PartnerOperSystem, &etherzeroaddr) == 0 ||
1234 	    (pl->PartnerOperKey == 0)) {
1235 
1236 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1237 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1238 			    &etherzeroaddr) != 0 &&
1239 			    (tpp->lp_lacp.PartnerOperKey != 0))
1240 				break;
1241 		}
1242 
1243 		/*
1244 		 * If all ports have no key or aggregation address,
1245 		 * then clear the negotiated Partner MAC and key.
1246 		 */
1247 		if (tpp == NULL) {
1248 			/* Clear the aggregation Partner MAC and key */
1249 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1250 			aggrp->aggr.PartnerOperAggrKey = 0;
1251 		}
1252 
1253 		return;
1254 	}
1255 
1256 	/*
1257 	 * Insure that at least one port in the aggregation
1258 	 * matches the Partner aggregation MAC and key. If not,
1259 	 * then clear the aggregation MAC and key. Later we will
1260 	 * set the Partner aggregation MAC and key to that of the
1261 	 * current port's Partner MAC and key.
1262 	 */
1263 	if (ether_cmp(&pl->PartnerOperSystem,
1264 	    &aggrp->aggr.PartnerSystem) != 0 ||
1265 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1266 
1267 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1268 			if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1269 			    &aggrp->aggr.PartnerSystem) == 0 &&
1270 			    (tpp->lp_lacp.PartnerOperKey ==
1271 			    aggrp->aggr.PartnerOperAggrKey))
1272 				break;
1273 		}
1274 
1275 		if (tpp == NULL) {
1276 			/* Clear the aggregation Partner MAC and key */
1277 			aggrp->aggr.PartnerSystem = etherzeroaddr;
1278 			aggrp->aggr.PartnerOperAggrKey = 0;
1279 			reset_mac = B_TRUE;
1280 		}
1281 	}
1282 
1283 	/*
1284 	 * If our Actor MAC is found in the Partner MAC
1285 	 * on this port then we have a loopback misconfiguration.
1286 	 */
1287 	if (ether_cmp(&pl->PartnerOperSystem,
1288 	    (struct ether_addr *)&aggrp->lg_addr) == 0) {
1289 		cmn_err(CE_NOTE, "trunk link: (%s/%d): Loopback condition.\n",
1290 		    portp->lp_devname, portp->lp_port);
1291 
1292 		lacp_port_unselect(portp);
1293 		lacp_mux_sm(portp);
1294 		return;
1295 	}
1296 
1297 	/*
1298 	 * If our Partner MAC and Key are found on any other
1299 	 * ports that are not in our aggregation, we have
1300 	 * a misconfiguration.
1301 	 */
1302 	if (lacp_misconfig_check(portp)) {
1303 		lacp_mux_sm(portp);
1304 		return;
1305 	}
1306 
1307 	/*
1308 	 * If the Aggregation Partner MAC and Key have not been
1309 	 * set, then this is either the first port or the aggregation
1310 	 * MAC and key have been reset. In either case we must set
1311 	 * the values of the Partner MAC and key.
1312 	 */
1313 	if (ether_cmp(&aggrp->aggr.PartnerSystem, &etherzeroaddr) == 0 &&
1314 	    (aggrp->aggr.PartnerOperAggrKey == 0)) {
1315 		/* Set aggregation Partner MAC and key */
1316 		aggrp->aggr.PartnerSystem = pl->PartnerOperSystem;
1317 		aggrp->aggr.PartnerOperAggrKey = pl->PartnerOperKey;
1318 
1319 		/*
1320 		 * If we reset Partner aggregation MAC, then restart
1321 		 * selection_logic on ports that match new MAC address.
1322 		 */
1323 		if (reset_mac) {
1324 			for (tpp = aggrp->lg_ports; tpp; tpp =
1325 			    tpp->lp_next) {
1326 				if (tpp == portp)
1327 					continue;
1328 				if (ether_cmp(&tpp->lp_lacp.PartnerOperSystem,
1329 				    &aggrp->aggr.PartnerSystem) == 0 &&
1330 				    (tpp->lp_lacp.PartnerOperKey ==
1331 				    aggrp->aggr.PartnerOperAggrKey))
1332 					lacp_selection_logic(tpp);
1333 			}
1334 		}
1335 	} else if (ether_cmp(&pl->PartnerOperSystem,
1336 	    &aggrp->aggr.PartnerSystem) != 0 ||
1337 	    (pl->PartnerOperKey != aggrp->aggr.PartnerOperAggrKey)) {
1338 		/*
1339 		 * The Partner port information does not match
1340 		 * that of the other ports in the aggregation
1341 		 * so disable this port.
1342 		 */
1343 		lacp_port_unselect(portp);
1344 
1345 		cmn_err(CE_NOTE, "trunk link: (%s/%d): Port Partner MAC or"
1346 		    " key (%d) incompatible with Aggregation Partner "
1347 		    "MAC or key (%d)\n",
1348 		    portp->lp_devname, portp->lp_port,
1349 		    pl->PartnerOperKey, aggrp->aggr.PartnerOperAggrKey);
1350 
1351 		lacp_mux_sm(portp);
1352 		return;
1353 	}
1354 
1355 	/* If we get to here, automatically set selected */
1356 	if (pl->sm.selected != AGGR_SELECTED) {
1357 		AGGR_LACP_DBG(("lacp_selection_logic:(%s/%d): "
1358 		    "selected %d-->%d\n", portp->lp_devname, portp->lp_port,
1359 		    pl->sm.selected, AGGR_SELECTED));
1360 		if (!lacp_port_select(portp))
1361 			return;
1362 		lacp_mux_sm(portp);
1363 	}
1364 
1365 	/*
1366 	 * From this point onward we have selected the port
1367 	 * and are simply checking if the Ready flag should
1368 	 * be set.
1369 	 */
1370 
1371 	/*
1372 	 * If at least two ports are waiting to aggregate
1373 	 * and ready_n is set on all ports waiting to aggregate
1374 	 * then set READY for the aggregation.
1375 	 */
1376 
1377 	ports_waiting = 0;
1378 
1379 	if (!aggrp->aggr.ready) {
1380 		/*
1381 		 * If all ports in the aggregation have received compatible
1382 		 * partner information and they match up correctly with the
1383 		 * switch, there is no need to wait for all the
1384 		 * wait_while_timers to pop.
1385 		 */
1386 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next) {
1387 			if (((tpp->lp_lacp.sm.mux_state == LACP_WAITING) ||
1388 			    tpp->lp_lacp.sm.begin) &&
1389 			    !pl->PartnerOperPortState.bit.sync) {
1390 				/* Add up ports uninitialized or waiting */
1391 				ports_waiting++;
1392 				if (!tpp->lp_lacp.sm.ready_n)
1393 					return;
1394 			}
1395 		}
1396 	}
1397 
1398 	if (aggrp->aggr.ready) {
1399 		AGGR_LACP_DBG(("lacp_selection_logic:(%s/%d): "
1400 		    "aggr.ready already set\n", portp->lp_devname,
1401 		    portp->lp_port));
1402 		lacp_mux_sm(portp);
1403 	} else {
1404 		AGGR_LACP_DBG(("lacp_selection_logic:(%s/%d): Ready %d-->%d\n",
1405 		    portp->lp_devname, portp->lp_port, aggrp->aggr.ready,
1406 		    B_TRUE));
1407 		aggrp->aggr.ready = B_TRUE;
1408 
1409 		for (tpp = aggrp->lg_ports; tpp; tpp = tpp->lp_next)
1410 			lacp_mux_sm(tpp);
1411 	}
1412 
1413 }
1414 
1415 /*
1416  * wait_while_timer_pop - When the timer pops, we arrive here to
1417  *			set ready_n and trigger the selection logic.
1418  */
1419 static void
1420 wait_while_timer_pop(void *data)
1421 {
1422 	aggr_port_t *portp = data;
1423 
1424 	if (portp->lp_closing)
1425 		return;
1426 
1427 	AGGR_LACP_LOCK(portp->lp_grp);
1428 
1429 	AGGR_LACP_DBG(("trunk link:(%s/%d): wait_while_timer pop \n",
1430 	    portp->lp_devname, portp->lp_port));
1431 	portp->lp_lacp.wait_while_timer.id = 0;
1432 	portp->lp_lacp.sm.ready_n = B_TRUE;
1433 
1434 	lacp_selection_logic(portp);
1435 	AGGR_LACP_UNLOCK(portp->lp_grp);
1436 }
1437 
1438 static void
1439 start_wait_while_timer(aggr_port_t *portp)
1440 {
1441 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1442 
1443 	if (portp->lp_lacp.wait_while_timer.id == 0) {
1444 		portp->lp_lacp.wait_while_timer.id =
1445 		    timeout(wait_while_timer_pop, portp,
1446 		    drv_usectohz(1000000 *
1447 		    portp->lp_lacp.wait_while_timer.val));
1448 	}
1449 }
1450 
1451 
1452 static void
1453 stop_wait_while_timer(portp)
1454 aggr_port_t *portp;
1455 {
1456 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1457 
1458 	if (portp->lp_lacp.wait_while_timer.id != 0) {
1459 		AGGR_LACP_UNLOCK(portp->lp_grp);
1460 		(void) untimeout(portp->lp_lacp.wait_while_timer.id);
1461 		AGGR_LACP_LOCK(portp->lp_grp);
1462 		portp->lp_lacp.wait_while_timer.id = 0;
1463 	}
1464 }
1465 
1466 /*
1467  * Invoked when a port has been attached to a group.
1468  * Complete the processing that couldn't be finished from lacp_on()
1469  * because the port was not started. We know that the link is full
1470  * duplex and ON, otherwise it wouldn't be attached.
1471  */
1472 void
1473 aggr_lacp_port_attached(aggr_port_t *portp)
1474 {
1475 	aggr_grp_t *grp = portp->lp_grp;
1476 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1477 
1478 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1479 	ASSERT(portp->lp_state == AGGR_PORT_STATE_ATTACHED);
1480 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1481 
1482 	AGGR_LACP_DBG(("aggr_lacp_port_attached: "
1483 	    "port %s/%d\n", portp->lp_devname, portp->lp_port));
1484 
1485 	portp->lp_lacp.sm.port_enabled = B_TRUE;	/* link on */
1486 
1487 	if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
1488 		pl->ActorAdminPortState.bit.activity =
1489 		    pl->ActorOperPortState.bit.activity = B_FALSE;
1490 
1491 		/* Turn ON Collector_Distributor */
1492 		aggr_set_coll_dist_locked(portp, B_TRUE);
1493 
1494 		return;
1495 	}
1496 
1497 	pl->ActorAdminPortState.bit.activity =
1498 	    pl->ActorOperPortState.bit.activity =
1499 	    (grp->lg_lacp_mode == AGGR_LACP_ACTIVE);
1500 
1501 	pl->ActorAdminPortState.bit.timeout =
1502 	    pl->ActorOperPortState.bit.timeout =
1503 	    (grp->aggr.PeriodicTimer == AGGR_LACP_TIMER_SHORT);
1504 
1505 	pl->sm.lacp_enabled = B_TRUE;
1506 	pl->ActorOperPortState.bit.aggregation = B_TRUE;
1507 	pl->sm.begin = B_TRUE;
1508 
1509 	if (!pl->sm.lacp_on) {
1510 		/* Turn OFF Collector_Distributor */
1511 		aggr_set_coll_dist_locked(portp, B_FALSE);
1512 
1513 		lacp_on(portp);
1514 	} else {
1515 		lacp_receive_sm(portp, NULL);
1516 		lacp_mux_sm(portp);
1517 
1518 		/* Enable Multicast Slow Protocol address */
1519 		aggr_lacp_mcast_on(portp);
1520 
1521 		/* periodic_sm is started up from the receive machine */
1522 		lacp_selection_logic(portp);
1523 	}
1524 }
1525 
1526 /*
1527  * Invoked when a port has been detached from a group. Turn off
1528  * LACP processing if it was enabled.
1529  */
1530 void
1531 aggr_lacp_port_detached(aggr_port_t *portp)
1532 {
1533 	aggr_grp_t *grp = portp->lp_grp;
1534 
1535 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1536 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1537 
1538 	AGGR_LACP_DBG(("aggr_lacp_port_detached: port %s/%d\n",
1539 	    portp->lp_devname, portp->lp_port));
1540 
1541 	portp->lp_lacp.sm.port_enabled = B_FALSE;
1542 
1543 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1544 		return;
1545 
1546 	/* Disable Slow Protocol PDUs */
1547 	lacp_off(portp);
1548 }
1549 
1550 
1551 /*
1552  * Invoked after the outbound port selection policy has been changed.
1553  */
1554 void
1555 aggr_lacp_policy_changed(aggr_grp_t *grp)
1556 {
1557 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
1558 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1559 
1560 	/* suspend transmission for CollectorMaxDelay time */
1561 	delay(grp->aggr.CollectorMaxDelay * 10);
1562 }
1563 
1564 
1565 /*
1566  * Enable Slow Protocol LACP and Marker PDUs.
1567  */
1568 static void
1569 lacp_on(aggr_port_t *portp)
1570 {
1571 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1572 	ASSERT(RW_WRITE_HELD(&portp->lp_grp->lg_lock));
1573 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1574 
1575 	/*
1576 	 * Reset the state machines and Partner operational
1577 	 * information. Careful to not reset things like
1578 	 * our link state.
1579 	 */
1580 	lacp_reset_port(portp);
1581 	portp->lp_lacp.sm.lacp_on = B_TRUE;
1582 
1583 	AGGR_LACP_DBG(("lacp_on:(%s/%d): \n", portp->lp_devname,
1584 	    portp->lp_port));
1585 
1586 	lacp_receive_sm(portp, NULL);
1587 	lacp_mux_sm(portp);
1588 
1589 	if (portp->lp_state != AGGR_PORT_STATE_ATTACHED)
1590 		return;
1591 
1592 	/* Enable Multicast Slow Protocol address */
1593 	aggr_lacp_mcast_on(portp);
1594 
1595 	/* periodic_sm is started up from the receive machine */
1596 	lacp_selection_logic(portp);
1597 } /* lacp_on */
1598 
1599 
1600 /* Disable Slow Protocol LACP and Marker PDUs */
1601 static void
1602 lacp_off(aggr_port_t *portp)
1603 {
1604 	aggr_grp_t *grp = portp->lp_grp;
1605 
1606 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1607 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1608 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
1609 
1610 	portp->lp_lacp.sm.lacp_on = B_FALSE;
1611 
1612 	AGGR_LACP_DBG(("lacp_off:(%s/%d): \n", portp->lp_devname,
1613 	    portp->lp_port));
1614 
1615 	/*
1616 	 * Disable Slow Protocol Timers. We must temporarely release
1617 	 * the group and port locks in order to avod deadlocks. Make
1618 	 * sure that the port nor the group are closing after re-acquiring
1619 	 * their locks.
1620 	 */
1621 	rw_exit(&portp->lp_lock);
1622 	rw_exit(&grp->lg_lock);
1623 
1624 	stop_periodic_timer(portp);
1625 	stop_current_while_timer(portp);
1626 	stop_wait_while_timer(portp);
1627 
1628 	rw_enter(&grp->lg_lock, RW_WRITER);
1629 	rw_enter(&portp->lp_lock, RW_WRITER);
1630 
1631 	if (!portp->lp_closing && !grp->lg_closing) {
1632 		lacp_mux_sm(portp);
1633 		lacp_periodic_sm(portp);
1634 		lacp_selection_logic(portp);
1635 	}
1636 
1637 	/* Turn OFF Collector_Distributor */
1638 	aggr_set_coll_dist_locked(portp, B_FALSE);
1639 
1640 	/* Disable Multicast Slow Protocol address */
1641 	aggr_lacp_mcast_off(portp);
1642 
1643 	lacp_reset_port(portp);
1644 }
1645 
1646 
1647 static boolean_t
1648 valid_lacp_pdu(aggr_port_t *portp, lacp_t *lacp)
1649 {
1650 	/*
1651 	 * 43.4.12 - "a Receive machine shall not validate
1652 	 * the Version Number, TLV_type, or Reserved fields in received
1653 	 * LACPDUs."
1654 	 * ... "a Receive machine may validate the Actor_Information_Length,
1655 	 * Partner_Information_Length, Collector_Information_Length,
1656 	 * or Terminator_Length fields."
1657 	 */
1658 	if ((lacp->actor_info.information_len != sizeof (link_info_t)) ||
1659 	    (lacp->partner_info.information_len != sizeof (link_info_t)) ||
1660 	    (lacp->collector_len != LACP_COLLECTOR_INFO_LEN) ||
1661 	    (lacp->terminator_len != LACP_TERMINATOR_INFO_LEN)) {
1662 		AGGR_LACP_DBG(("trunk link (%s/%d): Malformed LACPDU: "
1663 		    " Terminator Length = %d \n", portp->lp_devname,
1664 		    portp->lp_port, lacp->terminator_len));
1665 		return (B_FALSE);
1666 	}
1667 
1668 	return (B_TRUE);
1669 }
1670 
1671 
1672 static void
1673 start_current_while_timer(aggr_port_t *portp, uint_t time)
1674 {
1675 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1676 
1677 	if (portp->lp_lacp.current_while_timer.id == 0) {
1678 		if (time > 0) {
1679 			portp->lp_lacp.current_while_timer.val = time;
1680 		} else if (portp->lp_lacp.ActorOperPortState.bit.timeout) {
1681 			portp->lp_lacp.current_while_timer.val =
1682 			    SHORT_TIMEOUT_TIME;
1683 		} else {
1684 			portp->lp_lacp.current_while_timer.val =
1685 			    LONG_TIMEOUT_TIME;
1686 		}
1687 
1688 		portp->lp_lacp.current_while_timer.id =
1689 		    timeout(current_while_timer_pop, portp,
1690 		    drv_usectohz((clock_t)1000000 *
1691 		    (clock_t)portp->lp_lacp.current_while_timer.val));
1692 	}
1693 }
1694 
1695 
1696 static void
1697 stop_current_while_timer(aggr_port_t *portp)
1698 {
1699 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1700 
1701 	if (portp->lp_lacp.current_while_timer.id != 0) {
1702 		AGGR_LACP_UNLOCK(portp->lp_grp);
1703 		(void) untimeout(portp->lp_lacp.current_while_timer.id);
1704 		AGGR_LACP_LOCK(portp->lp_grp);
1705 		portp->lp_lacp.current_while_timer.id = 0;
1706 	}
1707 }
1708 
1709 
1710 static void
1711 current_while_timer_pop(void *data)
1712 {
1713 	aggr_port_t *portp = (aggr_port_t *)data;
1714 
1715 	if (portp->lp_closing)
1716 		return;
1717 
1718 	AGGR_LACP_LOCK(portp->lp_grp);
1719 
1720 	AGGR_LACP_DBG(("trunk link:(%s/%d): current_while_timer "
1721 	    "pop id=%p\n", portp->lp_devname, portp->lp_port,
1722 	    portp->lp_lacp.current_while_timer.id));
1723 
1724 	portp->lp_lacp.current_while_timer.id = 0;
1725 	lacp_receive_sm(portp, NULL);
1726 	AGGR_LACP_UNLOCK(portp->lp_grp);
1727 }
1728 
1729 
1730 /*
1731  * record_Default - Simply copies over administrative values
1732  * to the partner operational values, and sets our state to indicate we
1733  * are using defaulted values.
1734  */
1735 static void
1736 record_Default(aggr_port_t *portp)
1737 {
1738 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1739 
1740 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1741 
1742 	pl->PartnerOperPortNum = pl->PartnerAdminPortNum;
1743 	pl->PartnerOperPortPriority = pl->PartnerAdminPortPriority;
1744 	pl->PartnerOperSystem = pl->PartnerAdminSystem;
1745 	pl->PartnerOperSysPriority = pl->PartnerAdminSysPriority;
1746 	pl->PartnerOperKey = pl->PartnerAdminKey;
1747 	pl->PartnerOperPortState.state = pl->PartnerAdminPortState.state;
1748 
1749 	pl->ActorOperPortState.bit.defaulted = B_TRUE;
1750 }
1751 
1752 
1753 /* Returns B_TRUE on sync value changing */
1754 static boolean_t
1755 record_PDU(aggr_port_t *portp, lacp_t *lacp)
1756 {
1757 	aggr_grp_t *aggrp = portp->lp_grp;
1758 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1759 	uint8_t save_sync;
1760 
1761 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1762 
1763 	/*
1764 	 * Partner Information
1765 	 */
1766 	pl->PartnerOperPortNum = ntohs(lacp->actor_info.port);
1767 	pl->PartnerOperPortPriority =
1768 	    ntohs(lacp->actor_info.port_priority);
1769 	pl->PartnerOperSystem = lacp->actor_info.system_id;
1770 	pl->PartnerOperSysPriority =
1771 	    htons(lacp->actor_info.system_priority);
1772 	pl->PartnerOperKey = ntohs(lacp->actor_info.key);
1773 
1774 	/* All state info except for Synchronization */
1775 	save_sync = pl->PartnerOperPortState.bit.sync;
1776 	pl->PartnerOperPortState.state = lacp->actor_info.state.state;
1777 
1778 	/* Defaulted set to FALSE */
1779 	pl->ActorOperPortState.bit.defaulted = B_FALSE;
1780 
1781 	/*
1782 	 * 43.4.9 - (Partner_Port, Partner_Port_Priority, Partner_system,
1783 	 *		Partner_System_Priority, Partner_Key, and
1784 	 *		Partner_State.Aggregation) are compared to the
1785 	 *		corresponding operations paramters values for
1786 	 *		the Actor. If these are equal, or if this is
1787 	 *		an individual link, we are synchronized.
1788 	 */
1789 	if (((ntohs(lacp->partner_info.port) == pl->ActorPortNumber) &&
1790 	    (ntohs(lacp->partner_info.port_priority) ==
1791 	    pl->ActorPortPriority) &&
1792 	    (ether_cmp(&lacp->partner_info.system_id,
1793 		(struct ether_addr *)&aggrp->lg_addr) == 0) &&
1794 	    (ntohs(lacp->partner_info.system_priority) ==
1795 	    aggrp->aggr.ActorSystemPriority) &&
1796 	    (ntohs(lacp->partner_info.key) == pl->ActorOperPortKey) &&
1797 	    (lacp->partner_info.state.bit.aggregation ==
1798 	    pl->ActorOperPortState.bit.aggregation)) ||
1799 	    (!lacp->actor_info.state.bit.aggregation)) {
1800 
1801 		pl->PartnerOperPortState.bit.sync =
1802 		    lacp->actor_info.state.bit.sync;
1803 	} else {
1804 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1805 	}
1806 
1807 	if (save_sync != pl->PartnerOperPortState.bit.sync) {
1808 		AGGR_LACP_DBG(("record_PDU:(%s/%d): partner sync "
1809 		    "%d -->%d\n", portp->lp_devname, portp->lp_port,
1810 		    save_sync, pl->PartnerOperPortState.bit.sync));
1811 		return (B_TRUE);
1812 	} else {
1813 		return (B_FALSE);
1814 	}
1815 }
1816 
1817 
1818 /*
1819  * update_selected - If any of the Partner parameters has
1820  *			changed from a previous value, then
1821  *			unselect the link from the aggregator.
1822  */
1823 static boolean_t
1824 update_selected(aggr_port_t *portp, lacp_t *lacp)
1825 {
1826 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1827 
1828 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1829 
1830 	if ((pl->PartnerOperPortNum != ntohs(lacp->actor_info.port)) ||
1831 	    (pl->PartnerOperPortPriority !=
1832 	    ntohs(lacp->actor_info.port_priority)) ||
1833 	    (ether_cmp(&pl->PartnerOperSystem,
1834 	    &lacp->actor_info.system_id) != 0) ||
1835 	    (pl->PartnerOperSysPriority !=
1836 	    ntohs(lacp->actor_info.system_priority)) ||
1837 	    (pl->PartnerOperKey != ntohs(lacp->actor_info.key)) ||
1838 	    (pl->PartnerOperPortState.bit.aggregation !=
1839 	    lacp->actor_info.state.bit.aggregation)) {
1840 		AGGR_LACP_DBG(("update_selected:(%s/%d): "
1841 		    "selected  %d-->%d\n", portp->lp_devname, portp->lp_port,
1842 		    pl->sm.selected, AGGR_UNSELECTED));
1843 
1844 		lacp_port_unselect(portp);
1845 		return (B_TRUE);
1846 	} else {
1847 		return (B_FALSE);
1848 	}
1849 }
1850 
1851 
1852 /*
1853  * update_default_selected - If any of the operational Partner parameters
1854  *			is different than that of the administrative values
1855  *			then unselect the link from the aggregator.
1856  */
1857 static void
1858 update_default_selected(aggr_port_t *portp)
1859 {
1860 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1861 
1862 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1863 
1864 	if ((pl->PartnerAdminPortNum != pl->PartnerOperPortNum) ||
1865 	    (pl->PartnerOperPortPriority != pl->PartnerAdminPortPriority) ||
1866 	    (ether_cmp(&pl->PartnerOperSystem, &pl->PartnerAdminSystem) != 0) ||
1867 	    (pl->PartnerOperSysPriority != pl->PartnerAdminSysPriority) ||
1868 	    (pl->PartnerOperKey != pl->PartnerAdminKey) ||
1869 	    (pl->PartnerOperPortState.bit.aggregation !=
1870 	    pl->PartnerAdminPortState.bit.aggregation)) {
1871 
1872 		AGGR_LACP_DBG(("update_default_selected:(%s/%d): "
1873 		    "selected  %d-->%d\n", portp->lp_devname, portp->lp_port,
1874 		    pl->sm.selected, AGGR_UNSELECTED));
1875 
1876 		lacp_port_unselect(portp);
1877 	}
1878 }
1879 
1880 
1881 /*
1882  * update_NTT - If any of the Partner values in the received LACPDU
1883  *			are different than that of the Actor operational
1884  *			values then set NTT to true.
1885  */
1886 static void
1887 update_NTT(aggr_port_t *portp, lacp_t *lacp)
1888 {
1889 	aggr_grp_t *aggrp = portp->lp_grp;
1890 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1891 
1892 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1893 
1894 	if ((pl->ActorPortNumber != ntohs(lacp->partner_info.port)) ||
1895 	    (pl->ActorPortPriority !=
1896 	    ntohs(lacp->partner_info.port_priority)) ||
1897 	    (ether_cmp(&aggrp->lg_addr,
1898 	    &lacp->partner_info.system_id) != 0) ||
1899 	    (aggrp->aggr.ActorSystemPriority !=
1900 	    ntohs(lacp->partner_info.system_priority)) ||
1901 	    (pl->ActorOperPortKey != ntohs(lacp->partner_info.key)) ||
1902 	    (pl->ActorOperPortState.bit.activity !=
1903 	    lacp->partner_info.state.bit.activity) ||
1904 	    (pl->ActorOperPortState.bit.timeout !=
1905 	    lacp->partner_info.state.bit.timeout) ||
1906 	    (pl->ActorOperPortState.bit.sync !=
1907 	    lacp->partner_info.state.bit.sync) ||
1908 	    (pl->ActorOperPortState.bit.aggregation !=
1909 	    lacp->partner_info.state.bit.aggregation)) {
1910 
1911 		AGGR_LACP_DBG(("update_NTT:(%s/%d): NTT  %d-->%d\n",
1912 		    portp->lp_devname, portp->lp_port, pl->NTT,
1913 		    B_TRUE));
1914 
1915 		pl->NTT = B_TRUE;
1916 	}
1917 }
1918 
1919 /*
1920  * lacp_receive_sm - LACP receive state machine
1921  *
1922  * parameters:
1923  *      - portp - instance this applies to.
1924  *      - lacp - pointer in the case of a received LACPDU.
1925  *                This value is NULL if there is no LACPDU.
1926  *
1927  * invoked:
1928  *    - when initialization is needed
1929  *    - upon reception of an LACPDU. This is the common case.
1930  *    - every time the current_while_timer pops
1931  */
1932 static void
1933 lacp_receive_sm(aggr_port_t *portp, lacp_t *lacp)
1934 {
1935 	boolean_t sync_updated, selected_updated, save_activity;
1936 	aggr_lacp_port_t *pl = &portp->lp_lacp;
1937 	lacp_receive_state_t oldstate = pl->sm.receive_state;
1938 
1939 	ASSERT(AGGR_LACP_LOCK_HELD(portp->lp_grp));
1940 
1941 	/* LACP_OFF state not in specification so check here.  */
1942 	if (!pl->sm.lacp_on)
1943 		return;
1944 
1945 	/* figure next state */
1946 	if (pl->sm.begin || pl->sm.port_moved) {
1947 		pl->sm.receive_state = LACP_INITIALIZE;
1948 	} else if (!pl->sm.port_enabled) {	/* DL_NOTE_LINK_DOWN */
1949 		pl->sm.receive_state = LACP_PORT_DISABLED;
1950 	} else if (!pl->sm.lacp_enabled) { /* DL_NOTE_AGGR_UNAVAIL */
1951 		pl->sm.receive_state =
1952 		    (pl->sm.receive_state == LACP_PORT_DISABLED) ?
1953 		    LACP_DISABLED : LACP_PORT_DISABLED;
1954 	} else if (lacp != NULL) {
1955 		if ((pl->sm.receive_state == LACP_EXPIRED) ||
1956 		    (pl->sm.receive_state == LACP_DEFAULTED)) {
1957 			pl->sm.receive_state = LACP_CURRENT;
1958 		}
1959 	} else if ((pl->sm.receive_state == LACP_CURRENT) &&
1960 	    (pl->current_while_timer.id == 0)) {
1961 		pl->sm.receive_state = LACP_EXPIRED;
1962 	} else if ((pl->sm.receive_state == LACP_EXPIRED) &&
1963 	    (pl->current_while_timer.id == 0)) {
1964 		pl->sm.receive_state = LACP_DEFAULTED;
1965 	}
1966 
1967 
1968 	if (!((lacp && (oldstate == LACP_CURRENT) &&
1969 	    (pl->sm.receive_state == LACP_CURRENT)))) {
1970 		AGGR_LACP_DBG(("lacp_receive_sm(%s/%d):%s--->%s\n",
1971 		    portp->lp_devname, portp->lp_port,
1972 		    lacp_receive_str[oldstate],
1973 		    lacp_receive_str[pl->sm.receive_state]));
1974 	}
1975 
1976 	switch (pl->sm.receive_state) {
1977 	case LACP_INITIALIZE:
1978 		lacp_port_unselect(portp);
1979 		record_Default(portp);
1980 		pl->ActorOperPortState.bit.expired = B_FALSE;
1981 		pl->sm.port_moved = B_FALSE;
1982 		pl->sm.receive_state = LACP_PORT_DISABLED;
1983 		pl->sm.begin = B_FALSE;
1984 		lacp_receive_sm(portp, NULL);
1985 		break;
1986 
1987 	case LACP_PORT_DISABLED:
1988 		pl->PartnerOperPortState.bit.sync = B_FALSE;
1989 		/*
1990 		 * Stop current_while_timer in case
1991 		 * we got here from link down
1992 		 */
1993 		stop_current_while_timer(portp);
1994 
1995 		if (pl->sm.port_enabled && !pl->sm.lacp_enabled) {
1996 			pl->sm.receive_state = LACP_DISABLED;
1997 			lacp_receive_sm(portp, lacp);
1998 			/* We goto LACP_DISABLED state */
1999 			break;
2000 		} else if (pl->sm.port_enabled && pl->sm.lacp_enabled) {
2001 			pl->sm.receive_state = LACP_EXPIRED;
2002 			/*
2003 			 * FALL THROUGH TO LACP_EXPIRED CASE:
2004 			 * We have no way of knowing if we get into
2005 			 * lacp_receive_sm() from a  current_while_timer
2006 			 * expiring as it has never been kicked off yet!
2007 			 */
2008 		} else {
2009 			/* We stay in LACP_PORT_DISABLED state */
2010 			break;
2011 		}
2012 		/* LACP_PORT_DISABLED -> LACP_EXPIRED */
2013 		/* FALLTHROUGH */
2014 
2015 	case LACP_EXPIRED:
2016 		/*
2017 		 * Arrives here from LACP_PORT_DISABLED state as well as
2018 		 * as well as current_while_timer expiring.
2019 		 */
2020 		pl->PartnerOperPortState.bit.sync = B_FALSE;
2021 		pl->PartnerOperPortState.bit.timeout = B_TRUE;
2022 
2023 		pl->ActorOperPortState.bit.expired = B_TRUE;
2024 		start_current_while_timer(portp, SHORT_TIMEOUT_TIME);
2025 		lacp_periodic_sm(portp);
2026 		break;
2027 
2028 	case LACP_DISABLED:
2029 		/*
2030 		 * This is the normal state for recv_sm when LACP_OFF
2031 		 * is set or the NIC is in half duplex mode.
2032 		 */
2033 		lacp_port_unselect(portp);
2034 		record_Default(portp);
2035 		pl->PartnerOperPortState.bit.aggregation = B_FALSE;
2036 		pl->ActorOperPortState.bit.expired = B_FALSE;
2037 		break;
2038 
2039 	case LACP_DEFAULTED:
2040 		/*
2041 		 * Current_while_timer expired a second time.
2042 		 */
2043 		update_default_selected(portp);
2044 		record_Default(portp);	/* overwrite Partner Oper val */
2045 		pl->ActorOperPortState.bit.expired = B_FALSE;
2046 		pl->PartnerOperPortState.bit.sync = B_TRUE;
2047 
2048 		lacp_selection_logic(portp);
2049 		lacp_mux_sm(portp);
2050 		break;
2051 
2052 	case LACP_CURRENT:
2053 		/*
2054 		 * Reception of LACPDU
2055 		 */
2056 
2057 		if (!lacp) /* no LACPDU so current_while_timer popped */
2058 			break;
2059 
2060 		AGGR_LACP_DBG(("lacp_receive_sm: (%s/%d): "
2061 		    "LACPDU received:\n", portp->lp_devname, portp->lp_port));
2062 
2063 		/*
2064 		 * Validate Actor_Information_Length,
2065 		 * Partner_Information_Length, Collector_Information_Length,
2066 		 * and Terminator_Length fields.
2067 		 */
2068 		if (!valid_lacp_pdu(portp, lacp)) {
2069 			AGGR_LACP_DBG(("lacp_receive_sm (%s/%d): "
2070 			    "Invalid LACPDU received\n",
2071 			    portp->lp_devname, portp->lp_port));
2072 			break;
2073 		}
2074 
2075 		save_activity = pl->PartnerOperPortState.bit.activity;
2076 		selected_updated = update_selected(portp, lacp);
2077 		update_NTT(portp, lacp);
2078 		sync_updated = record_PDU(portp, lacp);
2079 
2080 		pl->ActorOperPortState.bit.expired = B_FALSE;
2081 
2082 		if (selected_updated) {
2083 			lacp_selection_logic(portp);
2084 			lacp_mux_sm(portp);
2085 		} else if (sync_updated) {
2086 			lacp_mux_sm(portp);
2087 		}
2088 
2089 		/*
2090 		 * If the periodic timer value bit has been modified
2091 		 * or the partner activity bit has been changed then
2092 		 * we need to respectively:
2093 		 *  - restart the timer with the proper timeout value.
2094 		 *  - possibly enable/disable transmission of LACPDUs.
2095 		 */
2096 		if ((pl->PartnerOperPortState.bit.timeout &&
2097 		    (pl->periodic_timer.val != FAST_PERIODIC_TIME)) ||
2098 		    (!pl->PartnerOperPortState.bit.timeout &&
2099 		    (pl->periodic_timer.val != SLOW_PERIODIC_TIME)) ||
2100 		    (pl->PartnerOperPortState.bit.activity !=
2101 		    save_activity)) {
2102 			lacp_periodic_sm(portp);
2103 		}
2104 
2105 		stop_current_while_timer(portp);
2106 		/* Check if we need to transmit an LACPDU */
2107 		if (pl->NTT)
2108 			lacp_xmit_sm(portp);
2109 		start_current_while_timer(portp, 0);
2110 
2111 		break;
2112 	}
2113 }
2114 
2115 static void
2116 aggr_set_coll_dist(aggr_port_t *portp, boolean_t enable)
2117 {
2118 	rw_enter(&portp->lp_lock, RW_WRITER);
2119 	aggr_set_coll_dist_locked(portp, enable);
2120 	rw_exit(&portp->lp_lock);
2121 }
2122 
2123 static void
2124 aggr_set_coll_dist_locked(aggr_port_t *portp, boolean_t enable)
2125 {
2126 	ASSERT(RW_WRITE_HELD(&portp->lp_lock));
2127 
2128 	AGGR_LACP_DBG(("AGGR_SET_COLL_DIST_TYPE: (%s/%d) %s\n",
2129 	    portp->lp_devname, portp->lp_port,
2130 	    enable ? "ENABLED" : "DISABLED"));
2131 
2132 	if (!enable) {
2133 		/*
2134 		 * Turn OFF Collector_Distributor.
2135 		 */
2136 		portp->lp_collector_enabled = B_FALSE;
2137 		aggr_send_port_disable(portp);
2138 		return;
2139 	}
2140 
2141 	/*
2142 	 * Turn ON Collector_Distributor.
2143 	 */
2144 
2145 	if (!portp->lp_lacp.sm.lacp_on || (portp->lp_lacp.sm.lacp_on &&
2146 	    (portp->lp_lacp.sm.mux_state == LACP_COLLECTING_DISTRIBUTING))) {
2147 		/* Port is compatible and can be aggregated */
2148 		portp->lp_collector_enabled = B_TRUE;
2149 		aggr_send_port_enable(portp);
2150 	}
2151 }
2152 
2153 /*
2154  * Process a received Marker or LACPDU.
2155  */
2156 void
2157 aggr_lacp_rx(aggr_port_t *portp, mblk_t *dmp)
2158 {
2159 	lacp_t	*lacp;
2160 
2161 	dmp->b_rptr += sizeof (struct ether_header);
2162 
2163 	if (MBLKL(dmp) < sizeof (lacp_t)) {
2164 		freemsg(dmp);
2165 		return;
2166 	}
2167 
2168 	lacp = (lacp_t *)dmp->b_rptr;
2169 
2170 	switch (lacp->subtype) {
2171 	case LACP_SUBTYPE:
2172 		AGGR_LACP_DBG(("aggr_lacp_rx:(%s/%d): "
2173 		    "LACPDU received.\n", portp->lp_devname, portp->lp_port));
2174 
2175 		AGGR_LACP_LOCK(portp->lp_grp);
2176 		if (!portp->lp_lacp.sm.lacp_on) {
2177 			AGGR_LACP_UNLOCK(portp->lp_grp);
2178 			break;
2179 		}
2180 		lacp_receive_sm(portp, lacp);
2181 		AGGR_LACP_UNLOCK(portp->lp_grp);
2182 		break;
2183 
2184 	case MARKER_SUBTYPE:
2185 		AGGR_LACP_DBG(("aggr_lacp_rx:(%s%d): "
2186 		    "Marker Packet received.\n",
2187 		    portp->lp_devname, portp->lp_port));
2188 
2189 		(void) receive_marker_pdu(portp, dmp);
2190 		break;
2191 
2192 	default:
2193 		AGGR_LACP_DBG(("aggr_lacp_rx: (%s%d): "
2194 		    "Unknown Slow Protocol type %d\n",
2195 		    portp->lp_devname, portp->lp_port, lacp->subtype));
2196 		break;
2197 	}
2198 
2199 	freemsg(dmp);
2200 }
2201