xref: /illumos-gate/usr/src/uts/sun4v/io/vsw.c (revision 179c3dac)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/debug.h>
30 #include <sys/time.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/user.h>
34 #include <sys/stropts.h>
35 #include <sys/stream.h>
36 #include <sys/strlog.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/cpu.h>
40 #include <sys/kmem.h>
41 #include <sys/conf.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ksynch.h>
45 #include <sys/stat.h>
46 #include <sys/kstat.h>
47 #include <sys/vtrace.h>
48 #include <sys/strsun.h>
49 #include <sys/dlpi.h>
50 #include <sys/ethernet.h>
51 #include <net/if.h>
52 #include <sys/varargs.h>
53 #include <sys/machsystm.h>
54 #include <sys/modctl.h>
55 #include <sys/modhash.h>
56 #include <sys/mac_provider.h>
57 #include <sys/mac_ether.h>
58 #include <sys/taskq.h>
59 #include <sys/note.h>
60 #include <sys/mach_descrip.h>
61 #include <sys/mac_provider.h>
62 #include <sys/mdeg.h>
63 #include <sys/ldc.h>
64 #include <sys/vsw_fdb.h>
65 #include <sys/vsw.h>
66 #include <sys/vio_mailbox.h>
67 #include <sys/vnet_mailbox.h>
68 #include <sys/vnet_common.h>
69 #include <sys/vio_util.h>
70 #include <sys/sdt.h>
71 #include <sys/atomic.h>
72 #include <sys/callb.h>
73 #include <sys/vlan.h>
74 
75 /*
76  * Function prototypes.
77  */
78 static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
79 static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
80 static	int vsw_unattach(vsw_t *vswp);
81 static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
82 static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *);
83 static	int vsw_mod_cleanup(void);
84 
85 /* MDEG routines */
86 static	int vsw_mdeg_register(vsw_t *vswp);
87 static	void vsw_mdeg_unregister(vsw_t *vswp);
88 static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
89 static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
90 static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
91 static	int vsw_read_mdprops(vsw_t *vswp);
92 static	void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
93 	mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp,
94 	uint16_t *nvidsp, uint16_t *default_idp);
95 static	int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
96 	md_t *mdp, mde_cookie_t *node);
97 static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
98 	mde_cookie_t node);
99 static	void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
100 	uint32_t *mtu);
101 static	int vsw_mtu_update(vsw_t *vswp, uint32_t mtu);
102 static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
103 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
104 static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1,
105 	vsw_vlanid_t *vids2, int nvids);
106 
107 /* Mac driver related routines */
108 static int vsw_mac_register(vsw_t *);
109 static int vsw_mac_unregister(vsw_t *);
110 static int vsw_m_stat(void *, uint_t, uint64_t *);
111 static void vsw_m_stop(void *arg);
112 static int vsw_m_start(void *arg);
113 static int vsw_m_unicst(void *arg, const uint8_t *);
114 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
115 static int vsw_m_promisc(void *arg, boolean_t);
116 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
117 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
118     mblk_t *mp, vsw_macrx_flags_t flags);
119 
120 /*
121  * Functions imported from other files.
122  */
123 extern void vsw_setup_switching_thread(void *arg);
124 extern int vsw_setup_switching_start(vsw_t *vswp);
125 extern void vsw_setup_switching_stop(vsw_t *vswp);
126 extern int vsw_setup_switching(vsw_t *);
127 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
128     vsw_port_t *port, mac_resource_handle_t mrh);
129 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
130 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
131 extern void vsw_del_mcst_vsw(vsw_t *);
132 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
133 extern void vsw_detach_ports(vsw_t *vswp);
134 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
135 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
136 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
137 	md_t *prev_mdp, mde_cookie_t prev_mdex);
138 extern	int vsw_port_attach(vsw_port_t *port);
139 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
140 extern int vsw_mac_open(vsw_t *vswp);
141 extern void vsw_mac_close(vsw_t *vswp);
142 extern void vsw_mac_cleanup_ports(vsw_t *vswp);
143 extern void vsw_unset_addrs(vsw_t *vswp);
144 extern void vsw_setup_layer2_post_process(vsw_t *vswp);
145 extern void vsw_create_vlans(void *arg, int type);
146 extern void vsw_destroy_vlans(void *arg, int type);
147 extern void vsw_vlan_add_ids(void *arg, int type);
148 extern void vsw_vlan_remove_ids(void *arg, int type);
149 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
150 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
151 	mblk_t **npt);
152 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
153 extern void vsw_hio_cleanup(vsw_t *vswp);
154 extern void vsw_hio_start_ports(vsw_t *vswp);
155 extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
156 extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
157 extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
158 extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid,
159     vsw_vlanid_t *new_vids, int new_nvids);
160 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type);
161 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type);
162 extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans,
163     uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids);
164 extern void vsw_reset_ports(vsw_t *vswp);
165 extern void vsw_port_reset(vsw_port_t *portp);
166 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
167 
168 /*
169  * Internal tunables.
170  */
171 int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
172 int	vsw_wretries = 100;		/* # of write attempts */
173 int	vsw_desc_delay = 0;		/* delay in us */
174 int	vsw_read_attempts = 5;		/* # of reads of descriptor */
175 int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
176 int	vsw_mac_open_retries = 300;	/* max # of mac_open() retries */
177 					/* 300*3 = 900sec(15min) of max tmout */
178 int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
179 int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
180 int	vsw_ldc_retries = 5;		/* # of ldc_close() retries */
181 int	vsw_ldc_delay = 1000;		/* 1 ms delay for ldc_close() */
182 boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
183 boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
184 
185 uint32_t	vsw_fdb_nchains = 8;	/* # of chains in fdb hash table */
186 uint32_t	vsw_vlan_nchains = 4;	/* # of chains in vlan id hash table */
187 uint32_t	vsw_ethermtu = 1500;	/* mtu of the device */
188 
189 /* delay in usec to wait for all references on a fdb entry to be dropped */
190 uint32_t vsw_fdbe_refcnt_delay = 10;
191 
192 /*
193  * Default vlan id. This is only used internally when the "default-vlan-id"
194  * property is not present in the MD device node. Therefore, this should not be
195  * used as a tunable; if this value is changed, the corresponding variable
196  * should be updated to the same value in all vnets connected to this vsw.
197  */
198 uint16_t	vsw_default_vlan_id = 1;
199 
200 /*
201  * Workaround for a version handshake bug in obp's vnet.
202  * If vsw initiates version negotiation starting from the highest version,
203  * obp sends a nack and terminates version handshake. To workaround
204  * this, we do not initiate version handshake when the channel comes up.
205  * Instead, we wait for the peer to send its version info msg and go through
206  * the version protocol exchange. If we successfully negotiate a version,
207  * before sending the ack, we send our version info msg to the peer
208  * using the <major,minor> version that we are about to ack.
209  */
210 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
211 
212 /*
213  * In the absence of "priority-ether-types" property in MD, the following
214  * internal tunable can be set to specify a single priority ethertype.
215  */
216 uint64_t vsw_pri_eth_type = 0;
217 
218 /*
219  * Number of transmit priority buffers that are preallocated per device.
220  * This number is chosen to be a small value to throttle transmission
221  * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
222  */
223 uint32_t vsw_pri_tx_nmblks = 64;
224 
225 /*
226  * Number of RARP packets sent to announce macaddr to the physical switch,
227  * after vsw's physical device is changed dynamically or after a guest (client
228  * vnet) is live migrated in.
229  */
230 uint32_t vsw_publish_macaddr_count = 3;
231 
232 boolean_t vsw_hio_enabled = B_TRUE;	/* Enable/disable HybridIO */
233 int vsw_hio_max_cleanup_retries = 10;	/* Max retries for HybridIO cleanp */
234 int vsw_hio_cleanup_delay = 10000;	/* 10ms */
235 
236 /* Number of transmit descriptors -  must be power of 2 */
237 uint32_t vsw_ntxds = VSW_RING_NUM_EL;
238 
239 /*
240  * Max number of mblks received in one receive operation.
241  */
242 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
243 
244 /*
245  * Internal tunables for receive buffer pools, that is,  the size and number of
246  * mblks for each pool. At least 3 sizes must be specified if these are used.
247  * The sizes must be specified in increasing order. Non-zero value of the first
248  * size will be used as a hint to use these values instead of the algorithm
249  * that determines the sizes based on MTU.
250  */
251 uint32_t vsw_mblk_size1 = 0;
252 uint32_t vsw_mblk_size2 = 0;
253 uint32_t vsw_mblk_size3 = 0;
254 uint32_t vsw_mblk_size4 = 0;
255 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
256 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
257 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
258 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS;	/* number of mblks for pool4 */
259 
260 /*
261  * Set this to non-zero to enable additional internal receive buffer pools
262  * based on the MTU of the device for better performance at the cost of more
263  * memory consumption. This is turned off by default, to use allocb(9F) for
264  * receive buffer allocations of sizes > 2K.
265  */
266 boolean_t vsw_jumbo_rxpools = B_FALSE;
267 
268 /*
269  * vsw_max_tx_qcount is the maximum # of packets that can be queued
270  * before the tx worker thread begins processing the queue. Its value
271  * is chosen to be 4x the default length of tx descriptor ring.
272  */
273 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
274 
275 /*
276  * MAC callbacks
277  */
278 static	mac_callbacks_t	vsw_m_callbacks = {
279 	0,
280 	vsw_m_stat,
281 	vsw_m_start,
282 	vsw_m_stop,
283 	vsw_m_promisc,
284 	vsw_m_multicst,
285 	vsw_m_unicst,
286 	vsw_m_tx,
287 	NULL,
288 	NULL,
289 	NULL
290 };
291 
292 static	struct	cb_ops	vsw_cb_ops = {
293 	nulldev,			/* cb_open */
294 	nulldev,			/* cb_close */
295 	nodev,				/* cb_strategy */
296 	nodev,				/* cb_print */
297 	nodev,				/* cb_dump */
298 	nodev,				/* cb_read */
299 	nodev,				/* cb_write */
300 	nodev,				/* cb_ioctl */
301 	nodev,				/* cb_devmap */
302 	nodev,				/* cb_mmap */
303 	nodev,				/* cb_segmap */
304 	nochpoll,			/* cb_chpoll */
305 	ddi_prop_op,			/* cb_prop_op */
306 	NULL,				/* cb_stream */
307 	D_MP,				/* cb_flag */
308 	CB_REV,				/* rev */
309 	nodev,				/* int (*cb_aread)() */
310 	nodev				/* int (*cb_awrite)() */
311 };
312 
313 static	struct	dev_ops	vsw_ops = {
314 	DEVO_REV,		/* devo_rev */
315 	0,			/* devo_refcnt */
316 	NULL,			/* devo_getinfo */
317 	nulldev,		/* devo_identify */
318 	nulldev,		/* devo_probe */
319 	vsw_attach,		/* devo_attach */
320 	vsw_detach,		/* devo_detach */
321 	nodev,			/* devo_reset */
322 	&vsw_cb_ops,		/* devo_cb_ops */
323 	(struct bus_ops *)NULL,	/* devo_bus_ops */
324 	ddi_power		/* devo_power */
325 };
326 
327 extern	struct	mod_ops	mod_driverops;
328 static struct modldrv vswmodldrv = {
329 	&mod_driverops,
330 	"sun4v Virtual Switch",
331 	&vsw_ops,
332 };
333 
334 #define	LDC_ENTER_LOCK(ldcp)	\
335 				mutex_enter(&((ldcp)->ldc_cblock));\
336 				mutex_enter(&((ldcp)->ldc_rxlock));\
337 				mutex_enter(&((ldcp)->ldc_txlock));
338 #define	LDC_EXIT_LOCK(ldcp)	\
339 				mutex_exit(&((ldcp)->ldc_txlock));\
340 				mutex_exit(&((ldcp)->ldc_rxlock));\
341 				mutex_exit(&((ldcp)->ldc_cblock));
342 
343 /* Driver soft state ptr  */
344 static void	*vsw_state;
345 
346 /*
347  * Linked list of "vsw_t" structures - one per instance.
348  */
349 vsw_t		*vsw_head = NULL;
350 vio_mblk_pool_t	*vsw_rx_poolp = NULL;
351 krwlock_t	vsw_rw;
352 
353 /*
354  * Property names
355  */
356 static char vdev_propname[] = "virtual-device";
357 static char vsw_propname[] = "virtual-network-switch";
358 static char physdev_propname[] = "vsw-phys-dev";
359 static char smode_propname[] = "vsw-switch-mode";
360 static char macaddr_propname[] = "local-mac-address";
361 static char remaddr_propname[] = "remote-mac-address";
362 static char ldcids_propname[] = "ldc-ids";
363 static char chan_propname[] = "channel-endpoint";
364 static char id_propname[] = "id";
365 static char reg_propname[] = "reg";
366 static char pri_types_propname[] = "priority-ether-types";
367 static char vsw_pvid_propname[] = "port-vlan-id";
368 static char vsw_vid_propname[] = "vlan-id";
369 static char vsw_dvid_propname[] = "default-vlan-id";
370 static char port_pvid_propname[] = "remote-port-vlan-id";
371 static char port_vid_propname[] = "remote-vlan-id";
372 static char hybrid_propname[] = "hybrid";
373 static char vsw_mtu_propname[] = "mtu";
374 
375 /*
376  * Matching criteria passed to the MDEG to register interest
377  * in changes to 'virtual-device-port' nodes identified by their
378  * 'id' property.
379  */
380 static md_prop_match_t vport_prop_match[] = {
381 	{ MDET_PROP_VAL,    "id"   },
382 	{ MDET_LIST_END,    NULL    }
383 };
384 
385 static mdeg_node_match_t vport_match = { "virtual-device-port",
386 						vport_prop_match };
387 
388 /*
389  * Matching criteria passed to the MDEG to register interest
390  * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
391  * by their 'name' and 'cfg-handle' properties.
392  */
393 static md_prop_match_t vdev_prop_match[] = {
394 	{ MDET_PROP_STR,    "name"   },
395 	{ MDET_PROP_VAL,    "cfg-handle" },
396 	{ MDET_LIST_END,    NULL    }
397 };
398 
399 static mdeg_node_match_t vdev_match = { "virtual-device",
400 						vdev_prop_match };
401 
402 
403 /*
404  * Specification of an MD node passed to the MDEG to filter any
405  * 'vport' nodes that do not belong to the specified node. This
406  * template is copied for each vsw instance and filled in with
407  * the appropriate 'cfg-handle' value before being passed to the MDEG.
408  */
409 static mdeg_prop_spec_t vsw_prop_template[] = {
410 	{ MDET_PROP_STR,    "name",		vsw_propname },
411 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
412 	{ MDET_LIST_END,    NULL,		NULL	}
413 };
414 
415 #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
416 
417 #ifdef	DEBUG
418 /*
419  * Print debug messages - set to 0x1f to enable all msgs
420  * or 0x0 to turn all off.
421  */
422 int vswdbg = 0x0;
423 
424 /*
425  * debug levels:
426  * 0x01:	Function entry/exit tracing
427  * 0x02:	Internal function messages
428  * 0x04:	Verbose internal messages
429  * 0x08:	Warning messages
430  * 0x10:	Error messages
431  */
432 
433 void
434 vswdebug(vsw_t *vswp, const char *fmt, ...)
435 {
436 	char buf[512];
437 	va_list ap;
438 
439 	va_start(ap, fmt);
440 	(void) vsprintf(buf, fmt, ap);
441 	va_end(ap);
442 
443 	if (vswp == NULL)
444 		cmn_err(CE_CONT, "%s\n", buf);
445 	else
446 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
447 }
448 
449 #endif	/* DEBUG */
450 
451 static struct modlinkage modlinkage = {
452 	MODREV_1,
453 	&vswmodldrv,
454 	NULL
455 };
456 
457 int
458 _init(void)
459 {
460 	int status;
461 
462 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
463 
464 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
465 	if (status != 0) {
466 		return (status);
467 	}
468 
469 	mac_init_ops(&vsw_ops, DRV_NAME);
470 	status = mod_install(&modlinkage);
471 	if (status != 0) {
472 		ddi_soft_state_fini(&vsw_state);
473 	}
474 	return (status);
475 }
476 
477 int
478 _fini(void)
479 {
480 	int status;
481 
482 	status = vsw_mod_cleanup();
483 	if (status != 0)
484 		return (status);
485 
486 	status = mod_remove(&modlinkage);
487 	if (status != 0)
488 		return (status);
489 	mac_fini_ops(&vsw_ops);
490 	ddi_soft_state_fini(&vsw_state);
491 
492 	rw_destroy(&vsw_rw);
493 
494 	return (status);
495 }
496 
497 int
498 _info(struct modinfo *modinfop)
499 {
500 	return (mod_info(&modlinkage, modinfop));
501 }
502 
503 static int
504 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
505 {
506 	vsw_t			*vswp;
507 	int			instance;
508 	char			hashname[MAXNAMELEN];
509 	char			qname[TASKQ_NAMELEN];
510 	vsw_attach_progress_t	progress = PROG_init;
511 	int			rv;
512 
513 	switch (cmd) {
514 	case DDI_ATTACH:
515 		break;
516 	case DDI_RESUME:
517 		/* nothing to do for this non-device */
518 		return (DDI_SUCCESS);
519 	case DDI_PM_RESUME:
520 	default:
521 		return (DDI_FAILURE);
522 	}
523 
524 	instance = ddi_get_instance(dip);
525 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
526 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
527 		return (DDI_FAILURE);
528 	}
529 	vswp = ddi_get_soft_state(vsw_state, instance);
530 
531 	if (vswp == NULL) {
532 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
533 		goto vsw_attach_fail;
534 	}
535 
536 	vswp->dip = dip;
537 	vswp->instance = instance;
538 	ddi_set_driver_private(dip, (caddr_t)vswp);
539 
540 	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
541 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
542 	mutex_init(&vswp->sw_thr_lock, NULL, MUTEX_DRIVER, NULL);
543 	cv_init(&vswp->sw_thr_cv, NULL, CV_DRIVER, NULL);
544 	rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL);
545 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
546 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
547 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
548 
549 	progress |= PROG_locks;
550 
551 	rv = vsw_read_mdprops(vswp);
552 	if (rv != 0)
553 		goto vsw_attach_fail;
554 
555 	progress |= PROG_readmd;
556 
557 	/* setup the unicast forwarding database  */
558 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
559 	    vswp->instance);
560 	D2(vswp, "creating unicast hash table (%s)...", hashname);
561 	vswp->fdb_nchains = vsw_fdb_nchains;
562 	vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
563 	    mod_hash_null_valdtor, sizeof (void *));
564 	vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
565 	progress |= PROG_fdb;
566 
567 	/* setup the multicast fowarding database */
568 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
569 	    vswp->instance);
570 	D2(vswp, "creating multicast hash table %s)...", hashname);
571 	vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
572 	    mod_hash_null_valdtor, sizeof (void *));
573 
574 	progress |= PROG_mfdb;
575 
576 	/*
577 	 * Create the taskq which will process all the VIO
578 	 * control messages.
579 	 */
580 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
581 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
582 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
583 		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
584 		    vswp->instance);
585 		goto vsw_attach_fail;
586 	}
587 
588 	progress |= PROG_taskq;
589 
590 	/* prevent auto-detaching */
591 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
592 	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
593 		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
594 		    "instance %u", DDI_NO_AUTODETACH, instance);
595 	}
596 
597 	/*
598 	 * The null switching function is set to avoid panic until
599 	 * switch mode is setup.
600 	 */
601 	vswp->vsw_switch_frame = vsw_switch_frame_nop;
602 
603 	/*
604 	 * Setup the required switching mode, based on the mdprops that we read
605 	 * earlier. We start a thread to do this, to avoid calling mac_open()
606 	 * directly from attach().
607 	 */
608 	rv = vsw_setup_switching_start(vswp);
609 	if (rv != 0) {
610 		goto vsw_attach_fail;
611 	}
612 
613 	progress |= PROG_swmode;
614 
615 	/* Register with mac layer as a provider */
616 	rv = vsw_mac_register(vswp);
617 	if (rv != 0)
618 		goto vsw_attach_fail;
619 
620 	progress |= PROG_macreg;
621 
622 	/*
623 	 * Now we have everything setup, register an interest in
624 	 * specific MD nodes.
625 	 *
626 	 * The callback is invoked in 2 cases, firstly if upon mdeg
627 	 * registration there are existing nodes which match our specified
628 	 * criteria, and secondly if the MD is changed (and again, there
629 	 * are nodes which we are interested in present within it. Note
630 	 * that our callback will be invoked even if our specified nodes
631 	 * have not actually changed).
632 	 *
633 	 */
634 	rv = vsw_mdeg_register(vswp);
635 	if (rv != 0)
636 		goto vsw_attach_fail;
637 
638 	progress |= PROG_mdreg;
639 
640 	vswp->attach_progress = progress;
641 
642 	WRITE_ENTER(&vsw_rw);
643 	vswp->next = vsw_head;
644 	vsw_head = vswp;
645 	RW_EXIT(&vsw_rw);
646 
647 	ddi_report_dev(vswp->dip);
648 	return (DDI_SUCCESS);
649 
650 vsw_attach_fail:
651 	DERR(NULL, "vsw_attach: failed");
652 
653 	vswp->attach_progress = progress;
654 	(void) vsw_unattach(vswp);
655 	ddi_soft_state_free(vsw_state, instance);
656 	return (DDI_FAILURE);
657 }
658 
659 static int
660 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
661 {
662 	vsw_t			**vswpp, *vswp;
663 	int 			instance;
664 
665 	instance = ddi_get_instance(dip);
666 	vswp = ddi_get_soft_state(vsw_state, instance);
667 
668 	if (vswp == NULL) {
669 		return (DDI_FAILURE);
670 	}
671 
672 	switch (cmd) {
673 	case DDI_DETACH:
674 		break;
675 	case DDI_SUSPEND:
676 	case DDI_PM_SUSPEND:
677 	default:
678 		return (DDI_FAILURE);
679 	}
680 
681 	D2(vswp, "detaching instance %d", instance);
682 
683 	if (vsw_unattach(vswp) != 0) {
684 		return (DDI_FAILURE);
685 	}
686 
687 	ddi_remove_minor_node(dip, NULL);
688 
689 	WRITE_ENTER(&vsw_rw);
690 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
691 		if (*vswpp == vswp) {
692 			*vswpp = vswp->next;
693 			break;
694 		}
695 	}
696 	RW_EXIT(&vsw_rw);
697 
698 	ddi_soft_state_free(vsw_state, instance);
699 
700 	return (DDI_SUCCESS);
701 }
702 
703 /*
704  * Common routine to handle vsw_attach() failure and vsw_detach(). Note that
705  * the only reason this function could fail is if mac_unregister() fails.
706  * Otherwise, this function must ensure that all resources are freed and return
707  * success.
708  */
709 static int
710 vsw_unattach(vsw_t *vswp)
711 {
712 	vio_mblk_pool_t		*poolp, *npoolp;
713 	vsw_attach_progress_t	progress;
714 
715 	progress = vswp->attach_progress;
716 
717 	/*
718 	 * Unregister from the gldv3 subsystem. This can fail, in particular
719 	 * if there are still any open references to this mac device; in which
720 	 * case we just return failure without continuing to detach further.
721 	 */
722 	if (progress & PROG_macreg) {
723 		if (vsw_mac_unregister(vswp) != 0) {
724 			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
725 			    "MAC layer", vswp->instance);
726 			return (1);
727 		}
728 		progress &= ~PROG_macreg;
729 	}
730 
731 	/*
732 	 * Now that we have unregistered from gldv3, we must finish all other
733 	 * steps and successfully return from this function; otherwise we will
734 	 * end up leaving the device in a broken/unusable state.
735 	 *
736 	 * If we have registered with mdeg, unregister now to stop further
737 	 * callbacks to this vsw device and/or its ports. Then, detach any
738 	 * existing ports.
739 	 */
740 	if (progress & PROG_mdreg) {
741 		vsw_mdeg_unregister(vswp);
742 		vsw_detach_ports(vswp);
743 
744 		/*
745 		 * At this point, we attempt to free receive mblk pools that
746 		 * couldn't be destroyed when the ports were detached; if this
747 		 * attempt also fails, we hook up the pool(s) to the module so
748 		 * they can be cleaned up in _fini().
749 		 */
750 		poolp = vswp->rxh;
751 		while (poolp != NULL) {
752 			npoolp = vswp->rxh = poolp->nextp;
753 			if (vio_destroy_mblks(poolp) != 0) {
754 				WRITE_ENTER(&vsw_rw);
755 				poolp->nextp = vsw_rx_poolp;
756 				vsw_rx_poolp = poolp;
757 				RW_EXIT(&vsw_rw);
758 			}
759 			poolp = npoolp;
760 		}
761 		progress &= ~PROG_mdreg;
762 	}
763 
764 	/*
765 	 * If we have started a thread to setup the switching mode, stop it, if
766 	 * it is still running. If it has finished setting up the switching
767 	 * mode, then we need to clean up some additional things if we are
768 	 * running in L2 mode: first free up any hybrid resources; then stop
769 	 * and close the underlying physical device. Note that we would have
770 	 * already released all per mac_client resources (ucast, mcast addrs,
771 	 * hio-shares etc) as all the ports are detached and if the vsw device
772 	 * itself was in use as an interface, it has been unplumbed (otherwise
773 	 * mac_unregister() above would fail).
774 	 */
775 	if (progress & PROG_swmode) {
776 
777 		vsw_setup_switching_stop(vswp);
778 
779 		if (vswp->hio_capable == B_TRUE) {
780 			vsw_hio_cleanup(vswp);
781 			vswp->hio_capable = B_FALSE;
782 		}
783 
784 		mutex_enter(&vswp->mac_lock);
785 		vsw_mac_close(vswp);
786 		mutex_exit(&vswp->mac_lock);
787 
788 		progress &= ~PROG_swmode;
789 	}
790 
791 	/*
792 	 * By now any pending tasks have finished and the underlying
793 	 * ldc's have been destroyed, so its safe to delete the control
794 	 * message taskq.
795 	 */
796 	if (progress & PROG_taskq) {
797 		ddi_taskq_destroy(vswp->taskq_p);
798 		progress &= ~PROG_taskq;
799 	}
800 
801 	/* Destroy the multicast hash table */
802 	if (progress & PROG_mfdb) {
803 		mod_hash_destroy_hash(vswp->mfdb);
804 		progress &= ~PROG_mfdb;
805 	}
806 
807 	/* Destroy the vlan hash table and fdb */
808 	if (progress & PROG_fdb) {
809 		vsw_destroy_vlans(vswp, VSW_LOCALDEV);
810 		mod_hash_destroy_hash(vswp->fdb_hashp);
811 		progress &= ~PROG_fdb;
812 	}
813 
814 	if (progress & PROG_readmd) {
815 		if (VSW_PRI_ETH_DEFINED(vswp)) {
816 			kmem_free(vswp->pri_types,
817 			    sizeof (uint16_t) * vswp->pri_num_types);
818 			(void) vio_destroy_mblks(vswp->pri_tx_vmp);
819 		}
820 		progress &= ~PROG_readmd;
821 	}
822 
823 	if (progress & PROG_locks) {
824 		rw_destroy(&vswp->plist.lockrw);
825 		rw_destroy(&vswp->mfdbrw);
826 		rw_destroy(&vswp->if_lockrw);
827 		rw_destroy(&vswp->maccl_rwlock);
828 		cv_destroy(&vswp->sw_thr_cv);
829 		mutex_destroy(&vswp->sw_thr_lock);
830 		mutex_destroy(&vswp->mca_lock);
831 		mutex_destroy(&vswp->mac_lock);
832 		progress &= ~PROG_locks;
833 	}
834 
835 	vswp->attach_progress = progress;
836 
837 	return (0);
838 }
839 
840 /*
841  * one time cleanup.
842  */
843 static int
844 vsw_mod_cleanup(void)
845 {
846 	vio_mblk_pool_t		*poolp, *npoolp;
847 
848 	/*
849 	 * If any rx mblk pools are still in use, return
850 	 * error and stop the module from unloading.
851 	 */
852 	WRITE_ENTER(&vsw_rw);
853 	poolp = vsw_rx_poolp;
854 	while (poolp != NULL) {
855 		npoolp = vsw_rx_poolp = poolp->nextp;
856 		if (vio_destroy_mblks(poolp) != 0) {
857 			vsw_rx_poolp = poolp;
858 			RW_EXIT(&vsw_rw);
859 			return (EBUSY);
860 		}
861 		poolp = npoolp;
862 	}
863 	RW_EXIT(&vsw_rw);
864 
865 	return (0);
866 }
867 
868 /*
869  * Get the value of the "vsw-phys-dev" property in the specified
870  * node. This property is the name of the physical device that
871  * the virtual switch will use to talk to the outside world.
872  *
873  * Note it is valid for this property to be NULL (but the property
874  * itself must exist). Callers of this routine should verify that
875  * the value returned is what they expected (i.e. either NULL or non NULL).
876  *
877  * On success returns value of the property in region pointed to by
878  * the 'name' argument, and with return value of 0. Otherwise returns 1.
879  */
880 static int
881 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
882 {
883 	int		len = 0;
884 	int		instance;
885 	char		*physname = NULL;
886 	char		*dev;
887 	const char	*dev_name;
888 	char		myname[MAXNAMELEN];
889 
890 	dev_name = ddi_driver_name(vswp->dip);
891 	instance = ddi_get_instance(vswp->dip);
892 	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
893 
894 	if (md_get_prop_data(mdp, node, physdev_propname,
895 	    (uint8_t **)(&physname), &len) != 0) {
896 		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
897 		    "device(s) from MD", vswp->instance);
898 		return (1);
899 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
900 		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
901 		    vswp->instance, physname);
902 		return (1);
903 	} else if (strcmp(myname, physname) == 0) {
904 		/*
905 		 * Prevent the vswitch from opening itself as the
906 		 * network device.
907 		 */
908 		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
909 		    vswp->instance, physname);
910 		return (1);
911 	} else {
912 		(void) strncpy(name, physname, strlen(physname) + 1);
913 		D2(vswp, "%s: using first device specified (%s)",
914 		    __func__, physname);
915 	}
916 
917 #ifdef DEBUG
918 	/*
919 	 * As a temporary measure to aid testing we check to see if there
920 	 * is a vsw.conf file present. If there is we use the value of the
921 	 * vsw_physname property in the file as the name of the physical
922 	 * device, overriding the value from the MD.
923 	 *
924 	 * There may be multiple devices listed, but for the moment
925 	 * we just use the first one.
926 	 */
927 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
928 	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
929 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
930 			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
931 			    vswp->instance, dev);
932 			ddi_prop_free(dev);
933 			return (1);
934 		} else {
935 			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
936 			    "config file", vswp->instance, dev);
937 
938 			(void) strncpy(name, dev, strlen(dev) + 1);
939 		}
940 
941 		ddi_prop_free(dev);
942 	}
943 #endif
944 
945 	return (0);
946 }
947 
948 /*
949  * Read the 'vsw-switch-mode' property from the specified MD node.
950  *
951  * Returns 0 on success, otherwise returns 1.
952  */
953 static int
954 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode)
955 {
956 	int		len = 0;
957 	char		*smode = NULL;
958 	char		*curr_mode = NULL;
959 
960 	D1(vswp, "%s: enter", __func__);
961 
962 	/*
963 	 * Get the switch-mode property. The modes are listed in
964 	 * decreasing order of preference, i.e. prefered mode is
965 	 * first item in list.
966 	 */
967 	len = 0;
968 	if (md_get_prop_data(mdp, node, smode_propname,
969 	    (uint8_t **)(&smode), &len) != 0) {
970 		/*
971 		 * Unable to get switch-mode property from MD, nothing
972 		 * more we can do.
973 		 */
974 		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
975 		    " from the MD", vswp->instance);
976 		return (1);
977 	}
978 
979 	curr_mode = smode;
980 	/*
981 	 * Modes of operation:
982 	 * 'switched'	 - layer 2 switching, underlying HW in
983 	 *			programmed mode.
984 	 * 'promiscuous' - layer 2 switching, underlying HW in
985 	 *			promiscuous mode.
986 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
987 	 *			in non-promiscuous mode.
988 	 */
989 	while (curr_mode < (smode + len)) {
990 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
991 		if (strcmp(curr_mode, "switched") == 0) {
992 			*mode = VSW_LAYER2;
993 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
994 			*mode = VSW_LAYER2 | VSW_LAYER2_PROMISC;
995 		} else if (strcmp(curr_mode, "routed") == 0) {
996 			*mode = VSW_LAYER3;
997 		} else {
998 			cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, "
999 			    "setting to default switched mode",
1000 			    vswp->instance, curr_mode);
1001 			*mode = VSW_LAYER2;
1002 		}
1003 		curr_mode += strlen(curr_mode) + 1;
1004 	}
1005 
1006 	D2(vswp, "%s: %d mode", __func__, *mode);
1007 
1008 	D1(vswp, "%s: exit", __func__);
1009 
1010 	return (0);
1011 }
1012 
1013 /*
1014  * Register with the MAC layer as a network device, so we
1015  * can be plumbed if necessary.
1016  */
1017 static int
1018 vsw_mac_register(vsw_t *vswp)
1019 {
1020 	mac_register_t	*macp;
1021 	int		rv;
1022 
1023 	D1(vswp, "%s: enter", __func__);
1024 
1025 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1026 		return (EINVAL);
1027 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1028 	macp->m_driver = vswp;
1029 	macp->m_dip = vswp->dip;
1030 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1031 	macp->m_callbacks = &vsw_m_callbacks;
1032 	macp->m_min_sdu = 0;
1033 	macp->m_max_sdu = vswp->mtu;
1034 	macp->m_margin = VLAN_TAGSZ;
1035 	rv = mac_register(macp, &vswp->if_mh);
1036 	mac_free(macp);
1037 	if (rv != 0) {
1038 		/*
1039 		 * Treat this as a non-fatal error as we may be
1040 		 * able to operate in some other mode.
1041 		 */
1042 		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
1043 		    "a provider with MAC layer", vswp->instance);
1044 		return (rv);
1045 	}
1046 
1047 	vswp->if_state |= VSW_IF_REG;
1048 
1049 	D1(vswp, "%s: exit", __func__);
1050 
1051 	return (rv);
1052 }
1053 
1054 static int
1055 vsw_mac_unregister(vsw_t *vswp)
1056 {
1057 	int		rv = 0;
1058 
1059 	D1(vswp, "%s: enter", __func__);
1060 
1061 	WRITE_ENTER(&vswp->if_lockrw);
1062 
1063 	if (vswp->if_state & VSW_IF_REG) {
1064 		rv = mac_unregister(vswp->if_mh);
1065 		if (rv != 0) {
1066 			DWARN(vswp, "%s: unable to unregister from MAC "
1067 			    "framework", __func__);
1068 
1069 			RW_EXIT(&vswp->if_lockrw);
1070 			D1(vswp, "%s: fail exit", __func__);
1071 			return (rv);
1072 		}
1073 
1074 		/* mark i/f as down and unregistered */
1075 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1076 	}
1077 	RW_EXIT(&vswp->if_lockrw);
1078 
1079 	D1(vswp, "%s: exit", __func__);
1080 
1081 	return (rv);
1082 }
1083 
1084 static int
1085 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1086 {
1087 	vsw_t			*vswp = (vsw_t *)arg;
1088 
1089 	D1(vswp, "%s: enter", __func__);
1090 
1091 	mutex_enter(&vswp->mac_lock);
1092 	if (vswp->mh == NULL) {
1093 		mutex_exit(&vswp->mac_lock);
1094 		return (EINVAL);
1095 	}
1096 
1097 	/* return stats from underlying device */
1098 	*val = mac_stat_get(vswp->mh, stat);
1099 
1100 	mutex_exit(&vswp->mac_lock);
1101 
1102 	return (0);
1103 }
1104 
1105 static void
1106 vsw_m_stop(void *arg)
1107 {
1108 	vsw_t	*vswp = (vsw_t *)arg;
1109 
1110 	D1(vswp, "%s: enter", __func__);
1111 
1112 	WRITE_ENTER(&vswp->if_lockrw);
1113 	vswp->if_state &= ~VSW_IF_UP;
1114 	RW_EXIT(&vswp->if_lockrw);
1115 
1116 	/* Cleanup and close the mac client */
1117 	vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV);
1118 
1119 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1120 }
1121 
1122 static int
1123 vsw_m_start(void *arg)
1124 {
1125 	int		rv;
1126 	vsw_t		*vswp = (vsw_t *)arg;
1127 
1128 	D1(vswp, "%s: enter", __func__);
1129 
1130 	WRITE_ENTER(&vswp->if_lockrw);
1131 
1132 	vswp->if_state |= VSW_IF_UP;
1133 
1134 	if (vswp->switching_setup_done == B_FALSE) {
1135 		/*
1136 		 * If the switching mode has not been setup yet, just
1137 		 * return. The unicast address will be programmed
1138 		 * after the physical device is successfully setup by the
1139 		 * timeout handler.
1140 		 */
1141 		RW_EXIT(&vswp->if_lockrw);
1142 		return (0);
1143 	}
1144 
1145 	/* if in layer2 mode, program unicast address. */
1146 	if (vswp->mh != NULL) {
1147 		/* Init a mac client and program addresses */
1148 		rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV);
1149 		if (rv != 0) {
1150 			cmn_err(CE_NOTE,
1151 			    "!vsw%d: failed to program interface "
1152 			    "unicast address\n", vswp->instance);
1153 		}
1154 	}
1155 
1156 	RW_EXIT(&vswp->if_lockrw);
1157 
1158 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1159 	return (0);
1160 }
1161 
1162 /*
1163  * Change the local interface address.
1164  *
1165  * Note: we don't support this entry point. The local
1166  * mac address of the switch can only be changed via its
1167  * MD node properties.
1168  */
1169 static int
1170 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1171 {
1172 	_NOTE(ARGUNUSED(arg, macaddr))
1173 
1174 	return (DDI_FAILURE);
1175 }
1176 
1177 static int
1178 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1179 {
1180 	vsw_t		*vswp = (vsw_t *)arg;
1181 	mcst_addr_t	*mcst_p = NULL;
1182 	uint64_t	addr = 0x0;
1183 	int		i, ret = 0;
1184 
1185 	D1(vswp, "%s: enter", __func__);
1186 
1187 	/*
1188 	 * Convert address into form that can be used
1189 	 * as hash table key.
1190 	 */
1191 	for (i = 0; i < ETHERADDRL; i++) {
1192 		addr = (addr << 8) | mca[i];
1193 	}
1194 
1195 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1196 
1197 	if (add) {
1198 		D2(vswp, "%s: adding multicast", __func__);
1199 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1200 			/*
1201 			 * Update the list of multicast addresses
1202 			 * contained within the vsw_t structure to
1203 			 * include this new one.
1204 			 */
1205 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1206 			if (mcst_p == NULL) {
1207 				DERR(vswp, "%s unable to alloc mem", __func__);
1208 				(void) vsw_del_mcst(vswp,
1209 				    VSW_LOCALDEV, addr, NULL);
1210 				return (1);
1211 			}
1212 			mcst_p->addr = addr;
1213 			ether_copy(mca, &mcst_p->mca);
1214 
1215 			/*
1216 			 * Call into the underlying driver to program the
1217 			 * address into HW.
1218 			 */
1219 			ret = vsw_mac_multicast_add(vswp, NULL, mcst_p,
1220 			    VSW_LOCALDEV);
1221 			if (ret != 0) {
1222 				(void) vsw_del_mcst(vswp,
1223 				    VSW_LOCALDEV, addr, NULL);
1224 				kmem_free(mcst_p, sizeof (*mcst_p));
1225 				return (ret);
1226 			}
1227 
1228 			mutex_enter(&vswp->mca_lock);
1229 			mcst_p->nextp = vswp->mcap;
1230 			vswp->mcap = mcst_p;
1231 			mutex_exit(&vswp->mca_lock);
1232 		} else {
1233 			cmn_err(CE_WARN, "!vsw%d: unable to add multicast "
1234 			    "address", vswp->instance);
1235 		}
1236 		return (ret);
1237 	}
1238 
1239 	D2(vswp, "%s: removing multicast", __func__);
1240 	/*
1241 	 * Remove the address from the hash table..
1242 	 */
1243 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1244 
1245 		/*
1246 		 * ..and then from the list maintained in the
1247 		 * vsw_t structure.
1248 		 */
1249 		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1250 		ASSERT(mcst_p != NULL);
1251 
1252 		vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV);
1253 		kmem_free(mcst_p, sizeof (*mcst_p));
1254 	}
1255 
1256 	D1(vswp, "%s: exit", __func__);
1257 
1258 	return (0);
1259 }
1260 
1261 static int
1262 vsw_m_promisc(void *arg, boolean_t on)
1263 {
1264 	vsw_t		*vswp = (vsw_t *)arg;
1265 
1266 	D1(vswp, "%s: enter", __func__);
1267 
1268 	WRITE_ENTER(&vswp->if_lockrw);
1269 	if (on)
1270 		vswp->if_state |= VSW_IF_PROMISC;
1271 	else
1272 		vswp->if_state &= ~VSW_IF_PROMISC;
1273 	RW_EXIT(&vswp->if_lockrw);
1274 
1275 	D1(vswp, "%s: exit", __func__);
1276 
1277 	return (0);
1278 }
1279 
1280 static mblk_t *
1281 vsw_m_tx(void *arg, mblk_t *mp)
1282 {
1283 	vsw_t		*vswp = (vsw_t *)arg;
1284 
1285 	D1(vswp, "%s: enter", __func__);
1286 
1287 	mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
1288 
1289 	if (mp == NULL) {
1290 		return (NULL);
1291 	}
1292 
1293 	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1294 
1295 	D1(vswp, "%s: exit", __func__);
1296 
1297 	return (NULL);
1298 }
1299 
1300 /*
1301  * Register for machine description (MD) updates.
1302  *
1303  * Returns 0 on success, 1 on failure.
1304  */
1305 static int
1306 vsw_mdeg_register(vsw_t *vswp)
1307 {
1308 	mdeg_prop_spec_t	*pspecp;
1309 	mdeg_node_spec_t	*inst_specp;
1310 	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1311 	size_t			templatesz;
1312 	int			rv;
1313 
1314 	D1(vswp, "%s: enter", __func__);
1315 
1316 	/*
1317 	 * Allocate and initialize a per-instance copy
1318 	 * of the global property spec array that will
1319 	 * uniquely identify this vsw instance.
1320 	 */
1321 	templatesz = sizeof (vsw_prop_template);
1322 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1323 
1324 	bcopy(vsw_prop_template, pspecp, templatesz);
1325 
1326 	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1327 
1328 	/* initialize the complete prop spec structure */
1329 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1330 	inst_specp->namep = "virtual-device";
1331 	inst_specp->specp = pspecp;
1332 
1333 	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1334 	    vswp->regprop);
1335 	/*
1336 	 * Register an interest in 'virtual-device' nodes with a
1337 	 * 'name' property of 'virtual-network-switch'
1338 	 */
1339 	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1340 	    (void *)vswp, &mdeg_hdl);
1341 	if (rv != MDEG_SUCCESS) {
1342 		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1343 		    __func__, rv);
1344 		goto mdeg_reg_fail;
1345 	}
1346 
1347 	/*
1348 	 * Register an interest in 'vsw-port' nodes.
1349 	 */
1350 	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1351 	    (void *)vswp, &mdeg_port_hdl);
1352 	if (rv != MDEG_SUCCESS) {
1353 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1354 		(void) mdeg_unregister(mdeg_hdl);
1355 		goto mdeg_reg_fail;
1356 	}
1357 
1358 	/* save off data that will be needed later */
1359 	vswp->inst_spec = inst_specp;
1360 	vswp->mdeg_hdl = mdeg_hdl;
1361 	vswp->mdeg_port_hdl = mdeg_port_hdl;
1362 
1363 	D1(vswp, "%s: exit", __func__);
1364 	return (0);
1365 
1366 mdeg_reg_fail:
1367 	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1368 	    vswp->instance);
1369 	kmem_free(pspecp, templatesz);
1370 	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1371 
1372 	vswp->mdeg_hdl = NULL;
1373 	vswp->mdeg_port_hdl = NULL;
1374 
1375 	return (1);
1376 }
1377 
1378 static void
1379 vsw_mdeg_unregister(vsw_t *vswp)
1380 {
1381 	D1(vswp, "vsw_mdeg_unregister: enter");
1382 
1383 	if (vswp->mdeg_hdl != NULL)
1384 		(void) mdeg_unregister(vswp->mdeg_hdl);
1385 
1386 	if (vswp->mdeg_port_hdl != NULL)
1387 		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1388 
1389 	if (vswp->inst_spec != NULL) {
1390 		if (vswp->inst_spec->specp != NULL) {
1391 			(void) kmem_free(vswp->inst_spec->specp,
1392 			    sizeof (vsw_prop_template));
1393 			vswp->inst_spec->specp = NULL;
1394 		}
1395 
1396 		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1397 		vswp->inst_spec = NULL;
1398 	}
1399 
1400 	D1(vswp, "vsw_mdeg_unregister: exit");
1401 }
1402 
1403 /*
1404  * Mdeg callback invoked for the vsw node itself.
1405  */
1406 static int
1407 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1408 {
1409 	vsw_t		*vswp;
1410 	md_t		*mdp;
1411 	mde_cookie_t	node;
1412 	uint64_t	inst;
1413 	char		*node_name = NULL;
1414 
1415 	if (resp == NULL)
1416 		return (MDEG_FAILURE);
1417 
1418 	vswp = (vsw_t *)cb_argp;
1419 
1420 	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1421 	    " : prev matched %d", __func__, resp->added.nelem,
1422 	    resp->removed.nelem, resp->match_curr.nelem,
1423 	    resp->match_prev.nelem);
1424 
1425 	/*
1426 	 * We get an initial callback for this node as 'added'
1427 	 * after registering with mdeg. Note that we would have
1428 	 * already gathered information about this vsw node by
1429 	 * walking MD earlier during attach (in vsw_read_mdprops()).
1430 	 * So, there is a window where the properties of this
1431 	 * node might have changed when we get this initial 'added'
1432 	 * callback. We handle this as if an update occured
1433 	 * and invoke the same function which handles updates to
1434 	 * the properties of this vsw-node if any.
1435 	 *
1436 	 * A non-zero 'match' value indicates that the MD has been
1437 	 * updated and that a virtual-network-switch node is
1438 	 * present which may or may not have been updated. It is
1439 	 * up to the clients to examine their own nodes and
1440 	 * determine if they have changed.
1441 	 */
1442 	if (resp->added.nelem != 0) {
1443 
1444 		if (resp->added.nelem != 1) {
1445 			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1446 			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1447 			return (MDEG_FAILURE);
1448 		}
1449 
1450 		mdp = resp->added.mdp;
1451 		node = resp->added.mdep[0];
1452 
1453 	} else if (resp->match_curr.nelem != 0) {
1454 
1455 		if (resp->match_curr.nelem != 1) {
1456 			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1457 			    "invalid: %d\n", vswp->instance,
1458 			    resp->match_curr.nelem);
1459 			return (MDEG_FAILURE);
1460 		}
1461 
1462 		mdp = resp->match_curr.mdp;
1463 		node = resp->match_curr.mdep[0];
1464 
1465 	} else {
1466 		return (MDEG_FAILURE);
1467 	}
1468 
1469 	/* Validate name and instance */
1470 	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1471 		DERR(vswp, "%s: unable to get node name\n",  __func__);
1472 		return (MDEG_FAILURE);
1473 	}
1474 
1475 	/* is this a virtual-network-switch? */
1476 	if (strcmp(node_name, vsw_propname) != 0) {
1477 		DERR(vswp, "%s: Invalid node name: %s\n",
1478 		    __func__, node_name);
1479 		return (MDEG_FAILURE);
1480 	}
1481 
1482 	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1483 		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1484 		    __func__);
1485 		return (MDEG_FAILURE);
1486 	}
1487 
1488 	/* is this the right instance of vsw? */
1489 	if (inst != vswp->regprop) {
1490 		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1491 		    __func__, inst);
1492 		return (MDEG_FAILURE);
1493 	}
1494 
1495 	vsw_update_md_prop(vswp, mdp, node);
1496 
1497 	return (MDEG_SUCCESS);
1498 }
1499 
1500 /*
1501  * Mdeg callback invoked for changes to the vsw-port nodes
1502  * under the vsw node.
1503  */
1504 static int
1505 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1506 {
1507 	vsw_t		*vswp;
1508 	int		idx;
1509 	md_t		*mdp;
1510 	mde_cookie_t	node;
1511 	uint64_t	inst;
1512 	int		rv;
1513 
1514 	if ((resp == NULL) || (cb_argp == NULL))
1515 		return (MDEG_FAILURE);
1516 
1517 	vswp = (vsw_t *)cb_argp;
1518 
1519 	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1520 	    " : prev matched %d", __func__, resp->added.nelem,
1521 	    resp->removed.nelem, resp->match_curr.nelem,
1522 	    resp->match_prev.nelem);
1523 
1524 	/* process added ports */
1525 	for (idx = 0; idx < resp->added.nelem; idx++) {
1526 		mdp = resp->added.mdp;
1527 		node = resp->added.mdep[idx];
1528 
1529 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1530 
1531 		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1532 			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1533 			    "(0x%lx), err=%d", vswp->instance, node, rv);
1534 		}
1535 	}
1536 
1537 	/* process removed ports */
1538 	for (idx = 0; idx < resp->removed.nelem; idx++) {
1539 		mdp = resp->removed.mdp;
1540 		node = resp->removed.mdep[idx];
1541 
1542 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1543 			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1544 			    __func__, id_propname, idx);
1545 			continue;
1546 		}
1547 
1548 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1549 
1550 		if (vsw_port_detach(vswp, inst) != 0) {
1551 			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1552 			    vswp->instance, inst);
1553 		}
1554 	}
1555 
1556 	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1557 		(void) vsw_port_update(vswp, resp->match_curr.mdp,
1558 		    resp->match_curr.mdep[idx],
1559 		    resp->match_prev.mdp,
1560 		    resp->match_prev.mdep[idx]);
1561 	}
1562 
1563 	D1(vswp, "%s: exit", __func__);
1564 
1565 	return (MDEG_SUCCESS);
1566 }
1567 
1568 /*
1569  * Scan the machine description for this instance of vsw
1570  * and read its properties. Called only from vsw_attach().
1571  * Returns: 0 on success, 1 on failure.
1572  */
1573 static int
1574 vsw_read_mdprops(vsw_t *vswp)
1575 {
1576 	md_t		*mdp = NULL;
1577 	mde_cookie_t	rootnode;
1578 	mde_cookie_t	*listp = NULL;
1579 	uint64_t	inst;
1580 	uint64_t	cfgh;
1581 	char		*name;
1582 	int		rv = 1;
1583 	int		num_nodes = 0;
1584 	int		num_devs = 0;
1585 	int		listsz = 0;
1586 	int		i;
1587 
1588 	/*
1589 	 * In each 'virtual-device' node in the MD there is a
1590 	 * 'cfg-handle' property which is the MD's concept of
1591 	 * an instance number (this may be completely different from
1592 	 * the device drivers instance #). OBP reads that value and
1593 	 * stores it in the 'reg' property of the appropriate node in
1594 	 * the device tree. We first read this reg property and use this
1595 	 * to compare against the 'cfg-handle' property of vsw nodes
1596 	 * in MD to get to this specific vsw instance and then read
1597 	 * other properties that we are interested in.
1598 	 * We also cache the value of 'reg' property and use it later
1599 	 * to register callbacks with mdeg (see vsw_mdeg_register())
1600 	 */
1601 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1602 	    DDI_PROP_DONTPASS, reg_propname, -1);
1603 	if (inst == -1) {
1604 		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1605 		    "OBP device tree", vswp->instance, reg_propname);
1606 		return (rv);
1607 	}
1608 
1609 	vswp->regprop = inst;
1610 
1611 	if ((mdp = md_get_handle()) == NULL) {
1612 		DWARN(vswp, "%s: cannot init MD\n", __func__);
1613 		return (rv);
1614 	}
1615 
1616 	num_nodes = md_node_count(mdp);
1617 	ASSERT(num_nodes > 0);
1618 
1619 	listsz = num_nodes * sizeof (mde_cookie_t);
1620 	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1621 
1622 	rootnode = md_root_node(mdp);
1623 
1624 	/* search for all "virtual_device" nodes */
1625 	num_devs = md_scan_dag(mdp, rootnode,
1626 	    md_find_name(mdp, vdev_propname),
1627 	    md_find_name(mdp, "fwd"), listp);
1628 	if (num_devs <= 0) {
1629 		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1630 		goto vsw_readmd_exit;
1631 	}
1632 
1633 	/*
1634 	 * Now loop through the list of virtual-devices looking for
1635 	 * devices with name "virtual-network-switch" and for each
1636 	 * such device compare its instance with what we have from
1637 	 * the 'reg' property to find the right node in MD and then
1638 	 * read all its properties.
1639 	 */
1640 	for (i = 0; i < num_devs; i++) {
1641 
1642 		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1643 			DWARN(vswp, "%s: name property not found\n",
1644 			    __func__);
1645 			goto vsw_readmd_exit;
1646 		}
1647 
1648 		/* is this a virtual-network-switch? */
1649 		if (strcmp(name, vsw_propname) != 0)
1650 			continue;
1651 
1652 		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1653 			DWARN(vswp, "%s: cfg-handle property not found\n",
1654 			    __func__);
1655 			goto vsw_readmd_exit;
1656 		}
1657 
1658 		/* is this the required instance of vsw? */
1659 		if (inst != cfgh)
1660 			continue;
1661 
1662 		/* now read all properties of this vsw instance */
1663 		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1664 		break;
1665 	}
1666 
1667 vsw_readmd_exit:
1668 
1669 	kmem_free(listp, listsz);
1670 	(void) md_fini_handle(mdp);
1671 	return (rv);
1672 }
1673 
1674 /*
1675  * Read the initial start-of-day values from the specified MD node.
1676  */
1677 static int
1678 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1679 {
1680 	uint64_t	macaddr = 0;
1681 
1682 	D1(vswp, "%s: enter", __func__);
1683 
1684 	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1685 		return (1);
1686 	}
1687 
1688 	/* mac address for vswitch device itself */
1689 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1690 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1691 		    vswp->instance);
1692 		return (1);
1693 	}
1694 
1695 	vsw_save_lmacaddr(vswp, macaddr);
1696 
1697 	if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) {
1698 		DWARN(vswp, "%s: Unable to read %s property from MD, "
1699 		    "defaulting to 'switched' mode",
1700 		    __func__, smode_propname);
1701 
1702 		vswp->smode = VSW_LAYER2;
1703 	}
1704 
1705 	/* read mtu */
1706 	vsw_mtu_read(vswp, mdp, node, &vswp->mtu);
1707 	if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) {
1708 		vswp->mtu = ETHERMTU;
1709 	}
1710 	vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) +
1711 	    VLAN_TAGSZ;
1712 
1713 	/* read vlan id properties of this vsw instance */
1714 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
1715 	    &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
1716 
1717 	/* read priority-ether-types */
1718 	vsw_read_pri_eth_types(vswp, mdp, node);
1719 
1720 	D1(vswp, "%s: exit", __func__);
1721 	return (0);
1722 }
1723 
1724 /*
1725  * Read vlan id properties of the given MD node.
1726  * Arguments:
1727  *   arg:          device argument(vsw device or a port)
1728  *   type:         type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
1729  *   mdp:          machine description
1730  *   node:         md node cookie
1731  *
1732  * Returns:
1733  *   pvidp:        port-vlan-id of the node
1734  *   vidspp:       list of vlan-ids of the node
1735  *   nvidsp:       # of vlan-ids in the list
1736  *   default_idp:  default-vlan-id of the node(if node is vsw device)
1737  */
1738 static void
1739 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1740 	uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp,
1741 	uint16_t *default_idp)
1742 {
1743 	vsw_t		*vswp;
1744 	vsw_port_t	*portp;
1745 	char		*pvid_propname;
1746 	char		*vid_propname;
1747 	uint_t		nvids = 0;
1748 	uint32_t	vids_size;
1749 	int		rv;
1750 	int		i;
1751 	uint64_t	*data;
1752 	uint64_t	val;
1753 	int		size;
1754 	int		inst;
1755 
1756 	if (type == VSW_LOCALDEV) {
1757 
1758 		vswp = (vsw_t *)arg;
1759 		pvid_propname = vsw_pvid_propname;
1760 		vid_propname = vsw_vid_propname;
1761 		inst = vswp->instance;
1762 
1763 	} else if (type == VSW_VNETPORT) {
1764 
1765 		portp = (vsw_port_t *)arg;
1766 		vswp = portp->p_vswp;
1767 		pvid_propname = port_pvid_propname;
1768 		vid_propname = port_vid_propname;
1769 		inst = portp->p_instance;
1770 
1771 	} else {
1772 		return;
1773 	}
1774 
1775 	if (type == VSW_LOCALDEV && default_idp != NULL) {
1776 		rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
1777 		if (rv != 0) {
1778 			DWARN(vswp, "%s: prop(%s) not found", __func__,
1779 			    vsw_dvid_propname);
1780 
1781 			*default_idp = vsw_default_vlan_id;
1782 		} else {
1783 			*default_idp = val & 0xFFF;
1784 			D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1785 			    vsw_dvid_propname, inst, *default_idp);
1786 		}
1787 	}
1788 
1789 	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1790 	if (rv != 0) {
1791 		DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
1792 		*pvidp = vsw_default_vlan_id;
1793 	} else {
1794 
1795 		*pvidp = val & 0xFFF;
1796 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1797 		    pvid_propname, inst, *pvidp);
1798 	}
1799 
1800 	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1801 	    &size);
1802 	if (rv != 0) {
1803 		D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
1804 		size = 0;
1805 	} else {
1806 		size /= sizeof (uint64_t);
1807 	}
1808 	nvids = size;
1809 
1810 	if (nvids != 0) {
1811 		D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
1812 		vids_size = sizeof (vsw_vlanid_t) * nvids;
1813 		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1814 		for (i = 0; i < nvids; i++) {
1815 			(*vidspp)[i].vl_vid = data[i] & 0xFFFF;
1816 			(*vidspp)[i].vl_set = B_FALSE;
1817 			D2(vswp, " %d ", (*vidspp)[i].vl_vid);
1818 		}
1819 		D2(vswp, "\n");
1820 	}
1821 
1822 	*nvidsp = nvids;
1823 }
1824 
1825 /*
1826  * This function reads "priority-ether-types" property from md. This property
1827  * is used to enable support for priority frames. Applications which need
1828  * guaranteed and timely delivery of certain high priority frames to/from
1829  * a vnet or vsw within ldoms, should configure this property by providing
1830  * the ether type(s) for which the priority facility is needed.
1831  * Normal data frames are delivered over a ldc channel using the descriptor
1832  * ring mechanism which is constrained by factors such as descriptor ring size,
1833  * the rate at which the ring is processed at the peer ldc end point, etc.
1834  * The priority mechanism provides an Out-Of-Band path to send/receive frames
1835  * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1836  * descriptor ring path and enables a more reliable and timely delivery of
1837  * frames to the peer.
1838  */
1839 static void
1840 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1841 {
1842 	int		rv;
1843 	uint16_t	*types;
1844 	uint64_t	*data;
1845 	int		size;
1846 	int		i;
1847 	size_t		mblk_sz;
1848 
1849 	rv = md_get_prop_data(mdp, node, pri_types_propname,
1850 	    (uint8_t **)&data, &size);
1851 	if (rv != 0) {
1852 		/*
1853 		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1854 		 * Check if 'vsw_pri_eth_type' has been set in that case.
1855 		 */
1856 		if (vsw_pri_eth_type != 0) {
1857 			size = sizeof (vsw_pri_eth_type);
1858 			data = &vsw_pri_eth_type;
1859 		} else {
1860 			D3(vswp, "%s: prop(%s) not found", __func__,
1861 			    pri_types_propname);
1862 			size = 0;
1863 		}
1864 	}
1865 
1866 	if (size == 0) {
1867 		vswp->pri_num_types = 0;
1868 		return;
1869 	}
1870 
1871 	/*
1872 	 * we have some priority-ether-types defined;
1873 	 * allocate a table of these types and also
1874 	 * allocate a pool of mblks to transmit these
1875 	 * priority packets.
1876 	 */
1877 	size /= sizeof (uint64_t);
1878 	vswp->pri_num_types = size;
1879 	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1880 	for (i = 0, types = vswp->pri_types; i < size; i++) {
1881 		types[i] = data[i] & 0xFFFF;
1882 	}
1883 	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1884 	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
1885 }
1886 
1887 static void
1888 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1889 {
1890 	int		rv;
1891 	int		inst;
1892 	uint64_t	val;
1893 	char		*mtu_propname;
1894 
1895 	mtu_propname = vsw_mtu_propname;
1896 	inst = vswp->instance;
1897 
1898 	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1899 	if (rv != 0) {
1900 		D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname);
1901 		*mtu = vsw_ethermtu;
1902 	} else {
1903 
1904 		*mtu = val & 0xFFFF;
1905 		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1906 		    mtu_propname, inst, *mtu);
1907 	}
1908 }
1909 
1910 /*
1911  * Update the mtu of the vsw device. We first check if the device has been
1912  * plumbed and if so fail the mtu update. Otherwise, we continue to update the
1913  * new mtu and reset all ports to initiate handshake re-negotiation with peers
1914  * using the new mtu.
1915  */
1916 static int
1917 vsw_mtu_update(vsw_t *vswp, uint32_t mtu)
1918 {
1919 	int	rv;
1920 
1921 	WRITE_ENTER(&vswp->if_lockrw);
1922 
1923 	if (vswp->if_state & VSW_IF_UP) {
1924 
1925 		RW_EXIT(&vswp->if_lockrw);
1926 
1927 		cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
1928 		    " as the device is plumbed\n", vswp->instance);
1929 		return (EBUSY);
1930 
1931 	} else {
1932 
1933 		D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n",
1934 		    __func__, vswp->mtu, mtu);
1935 
1936 		vswp->mtu = mtu;
1937 		vswp->max_frame_size = vswp->mtu +
1938 		    sizeof (struct ether_header) + VLAN_TAGSZ;
1939 
1940 		rv = mac_maxsdu_update(vswp->if_mh, mtu);
1941 		if (rv != 0) {
1942 			cmn_err(CE_NOTE,
1943 			    "!vsw%d: Unable to update mtu with mac"
1944 			    " layer\n", vswp->instance);
1945 		}
1946 
1947 		RW_EXIT(&vswp->if_lockrw);
1948 
1949 		/* Reset ports to renegotiate with the new mtu */
1950 		vsw_reset_ports(vswp);
1951 
1952 	}
1953 
1954 	return (0);
1955 }
1956 
1957 /*
1958  * Check to see if the relevant properties in the specified node have
1959  * changed, and if so take the appropriate action.
1960  *
1961  * If any of the properties are missing or invalid we don't take
1962  * any action, as this function should only be invoked when modifications
1963  * have been made to what we assume is a working configuration, which
1964  * we leave active.
1965  *
1966  * Note it is legal for this routine to be invoked even if none of the
1967  * properties in the port node within the MD have actually changed.
1968  */
1969 static void
1970 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1971 {
1972 	char		physname[LIFNAMSIZ];
1973 	char		drv[LIFNAMSIZ];
1974 	uint_t		ddi_instance;
1975 	uint8_t		new_smode;
1976 	int		i;
1977 	uint64_t 	macaddr = 0;
1978 	enum		{MD_init = 0x1,
1979 				MD_physname = 0x2,
1980 				MD_macaddr = 0x4,
1981 				MD_smode = 0x8,
1982 				MD_vlans = 0x10,
1983 				MD_mtu = 0x20} updated;
1984 	int		rv;
1985 	uint16_t	pvid;
1986 	vsw_vlanid_t	*vids;
1987 	uint16_t	nvids;
1988 	uint32_t	mtu;
1989 
1990 	updated = MD_init;
1991 
1992 	D1(vswp, "%s: enter", __func__);
1993 
1994 	/*
1995 	 * Check if name of physical device in MD has changed.
1996 	 */
1997 	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
1998 		/*
1999 		 * Do basic sanity check on new device name/instance,
2000 		 * if its non NULL. It is valid for the device name to
2001 		 * have changed from a non NULL to a NULL value, i.e.
2002 		 * the vsw is being changed to 'routed' mode.
2003 		 */
2004 		if ((strlen(physname) != 0) &&
2005 		    (ddi_parse(physname, drv,
2006 		    &ddi_instance) != DDI_SUCCESS)) {
2007 			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
2008 			    " a valid device name/instance",
2009 			    vswp->instance, physname);
2010 			goto fail_reconf;
2011 		}
2012 
2013 		if (strcmp(physname, vswp->physname)) {
2014 			D2(vswp, "%s: device name changed from %s to %s",
2015 			    __func__, vswp->physname, physname);
2016 
2017 			updated |= MD_physname;
2018 		} else {
2019 			D2(vswp, "%s: device name unchanged at %s",
2020 			    __func__, vswp->physname);
2021 		}
2022 	} else {
2023 		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
2024 		    "device from updated MD.", vswp->instance);
2025 		goto fail_reconf;
2026 	}
2027 
2028 	/*
2029 	 * Check if MAC address has changed.
2030 	 */
2031 	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
2032 		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
2033 		    vswp->instance);
2034 		goto fail_reconf;
2035 	} else {
2036 		uint64_t maddr = macaddr;
2037 		READ_ENTER(&vswp->if_lockrw);
2038 		for (i = ETHERADDRL - 1; i >= 0; i--) {
2039 			if (vswp->if_addr.ether_addr_octet[i]
2040 			    != (macaddr & 0xFF)) {
2041 				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
2042 				    __func__, i,
2043 				    vswp->if_addr.ether_addr_octet[i],
2044 				    (macaddr & 0xFF));
2045 				updated |= MD_macaddr;
2046 				macaddr = maddr;
2047 				break;
2048 			}
2049 			macaddr >>= 8;
2050 		}
2051 		RW_EXIT(&vswp->if_lockrw);
2052 		if (updated & MD_macaddr) {
2053 			vsw_save_lmacaddr(vswp, macaddr);
2054 		}
2055 	}
2056 
2057 	/*
2058 	 * Check if switching modes have changed.
2059 	 */
2060 	if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) {
2061 		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
2062 		    vswp->instance, smode_propname);
2063 		goto fail_reconf;
2064 	} else {
2065 		if (new_smode != vswp->smode) {
2066 			D2(vswp, "%s: switching mode changed from %d to %d",
2067 			    __func__, vswp->smode, new_smode);
2068 
2069 			updated |= MD_smode;
2070 		}
2071 	}
2072 
2073 	/* Read the vlan ids */
2074 	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
2075 	    &nvids, NULL);
2076 
2077 	/* Determine if there are any vlan id updates */
2078 	if ((pvid != vswp->pvid) ||		/* pvid changed? */
2079 	    (nvids != vswp->nvids) ||		/* # of vids changed? */
2080 	    ((nvids != 0) && (vswp->nvids != 0) &&	/* vids changed? */
2081 	    !vsw_cmp_vids(vids, vswp->vids, nvids))) {
2082 		updated |= MD_vlans;
2083 	}
2084 
2085 	/* Read mtu */
2086 	vsw_mtu_read(vswp, mdp, node, &mtu);
2087 	if (mtu != vswp->mtu) {
2088 		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2089 			updated |= MD_mtu;
2090 		} else {
2091 			cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
2092 			    " as the specified value:%d is invalid\n",
2093 			    vswp->instance, mtu);
2094 		}
2095 	}
2096 
2097 	/*
2098 	 * Now make any changes which are needed...
2099 	 */
2100 
2101 	if (updated & (MD_physname | MD_smode | MD_mtu)) {
2102 
2103 		/*
2104 		 * Stop any pending thread to setup switching mode.
2105 		 */
2106 		vsw_setup_switching_stop(vswp);
2107 
2108 		/* Cleanup HybridIO */
2109 		vsw_hio_cleanup(vswp);
2110 
2111 		/*
2112 		 * Remove unicst, mcst addrs of vsw interface
2113 		 * and ports from the physdev. This also closes
2114 		 * the corresponding mac clients.
2115 		 */
2116 		vsw_unset_addrs(vswp);
2117 
2118 		/*
2119 		 * Stop, detach and close the old device..
2120 		 */
2121 		mutex_enter(&vswp->mac_lock);
2122 		vsw_mac_close(vswp);
2123 		mutex_exit(&vswp->mac_lock);
2124 
2125 		/*
2126 		 * Update phys name.
2127 		 */
2128 		if (updated & MD_physname) {
2129 			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
2130 			    vswp->instance, vswp->physname, physname);
2131 			(void) strncpy(vswp->physname,
2132 			    physname, strlen(physname) + 1);
2133 		}
2134 
2135 		/*
2136 		 * Update array with the new switch mode values.
2137 		 */
2138 		if (updated & MD_smode) {
2139 			vswp->smode = new_smode;
2140 		}
2141 
2142 		/* Update mtu */
2143 		if (updated & MD_mtu) {
2144 			rv = vsw_mtu_update(vswp, mtu);
2145 			if (rv != 0) {
2146 				goto fail_update;
2147 			}
2148 		}
2149 
2150 		/*
2151 		 * ..and attach, start the new device.
2152 		 */
2153 		rv = vsw_setup_switching(vswp);
2154 		if (rv == EAGAIN) {
2155 			/*
2156 			 * Unable to setup switching mode.
2157 			 * As the error is EAGAIN, schedule a thread to retry
2158 			 * and return. Programming addresses of ports and
2159 			 * vsw interface will be done by the thread when the
2160 			 * switching setup completes successfully.
2161 			 */
2162 			if (vsw_setup_switching_start(vswp) != 0) {
2163 				goto fail_update;
2164 			}
2165 			return;
2166 
2167 		} else if (rv) {
2168 			goto fail_update;
2169 		}
2170 
2171 		vsw_setup_layer2_post_process(vswp);
2172 	} else if (updated & MD_macaddr) {
2173 		/*
2174 		 * We enter here if only MD_macaddr is exclusively updated.
2175 		 * If MD_physname and/or MD_smode are also updated, then
2176 		 * as part of that, we would have implicitly processed
2177 		 * MD_macaddr update (above).
2178 		 */
2179 		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
2180 		    vswp->instance, macaddr);
2181 
2182 		READ_ENTER(&vswp->if_lockrw);
2183 		if (vswp->if_state & VSW_IF_UP) {
2184 			/* reconfigure with new address */
2185 			vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0);
2186 
2187 			/*
2188 			 * Notify the MAC layer of the changed address.
2189 			 */
2190 			mac_unicst_update(vswp->if_mh,
2191 			    (uint8_t *)&vswp->if_addr);
2192 
2193 		}
2194 		RW_EXIT(&vswp->if_lockrw);
2195 
2196 	}
2197 
2198 	if (updated & MD_vlans) {
2199 		/* Remove existing vlan ids from the hash table. */
2200 		vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
2201 
2202 		if (vswp->if_state & VSW_IF_UP) {
2203 			vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids);
2204 		} else {
2205 			if (vswp->nvids != 0) {
2206 				kmem_free(vswp->vids,
2207 				    sizeof (vsw_vlanid_t) * vswp->nvids);
2208 			}
2209 			vswp->vids = vids;
2210 			vswp->nvids = nvids;
2211 			vswp->pvid = pvid;
2212 		}
2213 
2214 		/* add these new vlan ids into hash table */
2215 		vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
2216 	} else {
2217 		if (nvids != 0) {
2218 			kmem_free(vids, sizeof (vsw_vlanid_t) * nvids);
2219 		}
2220 	}
2221 
2222 	return;
2223 
2224 fail_reconf:
2225 	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
2226 	return;
2227 
2228 fail_update:
2229 	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
2230 	    vswp->instance);
2231 }
2232 
2233 /*
2234  * Read the port's md properties.
2235  */
2236 static int
2237 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
2238 	md_t *mdp, mde_cookie_t *node)
2239 {
2240 	uint64_t		ldc_id;
2241 	uint8_t			*addrp;
2242 	int			i, addrsz;
2243 	int			num_nodes = 0, nchan = 0;
2244 	int			listsz = 0;
2245 	mde_cookie_t		*listp = NULL;
2246 	struct ether_addr	ea;
2247 	uint64_t		macaddr;
2248 	uint64_t		inst = 0;
2249 	uint64_t		val;
2250 
2251 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2252 		DWARN(vswp, "%s: prop(%s) not found", __func__,
2253 		    id_propname);
2254 		return (1);
2255 	}
2256 
2257 	/*
2258 	 * Find the channel endpoint node(s) (which should be under this
2259 	 * port node) which contain the channel id(s).
2260 	 */
2261 	if ((num_nodes = md_node_count(mdp)) <= 0) {
2262 		DERR(vswp, "%s: invalid number of nodes found (%d)",
2263 		    __func__, num_nodes);
2264 		return (1);
2265 	}
2266 
2267 	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2268 
2269 	/* allocate enough space for node list */
2270 	listsz = num_nodes * sizeof (mde_cookie_t);
2271 	listp = kmem_zalloc(listsz, KM_SLEEP);
2272 
2273 	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2274 	    md_find_name(mdp, "fwd"), listp);
2275 
2276 	if (nchan <= 0) {
2277 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2278 		kmem_free(listp, listsz);
2279 		return (1);
2280 	}
2281 
2282 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2283 
2284 	/* use property from first node found */
2285 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2286 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2287 		    id_propname);
2288 		kmem_free(listp, listsz);
2289 		return (1);
2290 	}
2291 
2292 	/* don't need list any more */
2293 	kmem_free(listp, listsz);
2294 
2295 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2296 
2297 	/* read mac-address property */
2298 	if (md_get_prop_data(mdp, *node, remaddr_propname,
2299 	    &addrp, &addrsz)) {
2300 		DWARN(vswp, "%s: prop(%s) not found",
2301 		    __func__, remaddr_propname);
2302 		return (1);
2303 	}
2304 
2305 	if (addrsz < ETHERADDRL) {
2306 		DWARN(vswp, "%s: invalid address size", __func__);
2307 		return (1);
2308 	}
2309 
2310 	macaddr = *((uint64_t *)addrp);
2311 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2312 
2313 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2314 		ea.ether_addr_octet[i] = macaddr & 0xFF;
2315 		macaddr >>= 8;
2316 	}
2317 
2318 	/* now update all properties into the port */
2319 	portp->p_vswp = vswp;
2320 	portp->p_instance = inst;
2321 	portp->addr_set = B_FALSE;
2322 	ether_copy(&ea, &portp->p_macaddr);
2323 	if (nchan > VSW_PORT_MAX_LDCS) {
2324 		D2(vswp, "%s: using first of %d ldc ids",
2325 		    __func__, nchan);
2326 		nchan = VSW_PORT_MAX_LDCS;
2327 	}
2328 	portp->num_ldcs = nchan;
2329 	portp->ldc_ids =
2330 	    kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
2331 	bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
2332 
2333 	/* read vlan id properties of this port node */
2334 	vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
2335 	    &portp->vids, &portp->nvids, NULL);
2336 
2337 	/* Check if hybrid property is present */
2338 	if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) {
2339 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2340 		portp->p_hio_enabled = B_TRUE;
2341 	} else {
2342 		portp->p_hio_enabled = B_FALSE;
2343 	}
2344 	/*
2345 	 * Port hio capability determined after version
2346 	 * negotiation, i.e., when we know the peer is HybridIO capable.
2347 	 */
2348 	portp->p_hio_capable = B_FALSE;
2349 	return (0);
2350 }
2351 
2352 /*
2353  * Add a new port to the system.
2354  *
2355  * Returns 0 on success, 1 on failure.
2356  */
2357 int
2358 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
2359 {
2360 	vsw_port_t	*portp;
2361 	int		rv;
2362 
2363 	portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
2364 
2365 	rv = vsw_port_read_props(portp, vswp, mdp, node);
2366 	if (rv != 0) {
2367 		kmem_free(portp, sizeof (*portp));
2368 		return (1);
2369 	}
2370 
2371 	rv = vsw_port_attach(portp);
2372 	if (rv != 0) {
2373 		DERR(vswp, "%s: failed to attach port", __func__);
2374 		return (1);
2375 	}
2376 
2377 	return (0);
2378 }
2379 
2380 static int
2381 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2382 	md_t *prev_mdp, mde_cookie_t prev_mdex)
2383 {
2384 	uint64_t	cport_num;
2385 	uint64_t	pport_num;
2386 	vsw_port_list_t	*plistp;
2387 	vsw_port_t	*portp;
2388 	boolean_t	updated_vlans = B_FALSE;
2389 	uint16_t	pvid;
2390 	vsw_vlanid_t	*vids;
2391 	uint16_t	nvids;
2392 	uint64_t	val;
2393 	boolean_t	hio_enabled = B_FALSE;
2394 
2395 	/*
2396 	 * For now, we get port updates only if vlan ids changed.
2397 	 * We read the port num and do some sanity check.
2398 	 */
2399 	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2400 		return (1);
2401 	}
2402 
2403 	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2404 		return (1);
2405 	}
2406 	if (cport_num != pport_num)
2407 		return (1);
2408 
2409 	plistp = &(vswp->plist);
2410 
2411 	READ_ENTER(&plistp->lockrw);
2412 
2413 	portp = vsw_lookup_port(vswp, cport_num);
2414 	if (portp == NULL) {
2415 		RW_EXIT(&plistp->lockrw);
2416 		return (1);
2417 	}
2418 
2419 	/* Read the vlan ids */
2420 	vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
2421 	    &vids, &nvids, NULL);
2422 
2423 	/* Determine if there are any vlan id updates */
2424 	if ((pvid != portp->pvid) ||		/* pvid changed? */
2425 	    (nvids != portp->nvids) ||		/* # of vids changed? */
2426 	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2427 	    !vsw_cmp_vids(vids, portp->vids, nvids))) {
2428 		updated_vlans = B_TRUE;
2429 	}
2430 
2431 	if (updated_vlans == B_TRUE) {
2432 
2433 		/* Remove existing vlan ids from the hash table. */
2434 		vsw_vlan_remove_ids(portp, VSW_VNETPORT);
2435 
2436 		/* Reconfigure vlans with network device */
2437 		vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids);
2438 
2439 		/* add these new vlan ids into hash table */
2440 		vsw_vlan_add_ids(portp, VSW_VNETPORT);
2441 
2442 		/* reset the port if it is vlan unaware (ver < 1.3) */
2443 		vsw_vlan_unaware_port_reset(portp);
2444 	}
2445 
2446 	/* Check if hybrid property is present */
2447 	if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) {
2448 		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2449 		hio_enabled = B_TRUE;
2450 	}
2451 
2452 	if (portp->p_hio_enabled != hio_enabled) {
2453 		vsw_hio_port_update(portp, hio_enabled);
2454 	}
2455 
2456 	RW_EXIT(&plistp->lockrw);
2457 
2458 	return (0);
2459 }
2460 
2461 /*
2462  * vsw_mac_rx -- A common function to send packets to the interface.
2463  * By default this function check if the interface is UP or not, the
2464  * rest of the behaviour depends on the flags as below:
2465  *
2466  *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2467  *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2468  *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2469  */
2470 void
2471 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2472     mblk_t *mp, vsw_macrx_flags_t flags)
2473 {
2474 	mblk_t		*mpt;
2475 
2476 	D1(vswp, "%s:enter\n", __func__);
2477 	READ_ENTER(&vswp->if_lockrw);
2478 	/* Check if the interface is up */
2479 	if (!(vswp->if_state & VSW_IF_UP)) {
2480 		RW_EXIT(&vswp->if_lockrw);
2481 		/* Free messages only if FREEMSG flag specified */
2482 		if (flags & VSW_MACRX_FREEMSG) {
2483 			freemsgchain(mp);
2484 		}
2485 		D1(vswp, "%s:exit\n", __func__);
2486 		return;
2487 	}
2488 	/*
2489 	 * If PROMISC flag is passed, then check if
2490 	 * the interface is in the PROMISC mode.
2491 	 * If not, drop the messages.
2492 	 */
2493 	if (flags & VSW_MACRX_PROMISC) {
2494 		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2495 			RW_EXIT(&vswp->if_lockrw);
2496 			/* Free messages only if FREEMSG flag specified */
2497 			if (flags & VSW_MACRX_FREEMSG) {
2498 				freemsgchain(mp);
2499 			}
2500 			D1(vswp, "%s:exit\n", __func__);
2501 			return;
2502 		}
2503 	}
2504 	RW_EXIT(&vswp->if_lockrw);
2505 	/*
2506 	 * If COPYMSG flag is passed, then make a copy
2507 	 * of the message chain and send up the copy.
2508 	 */
2509 	if (flags & VSW_MACRX_COPYMSG) {
2510 		mp = copymsgchain(mp);
2511 		if (mp == NULL) {
2512 			D1(vswp, "%s:exit\n", __func__);
2513 			return;
2514 		}
2515 	}
2516 
2517 	D2(vswp, "%s: sending up stack", __func__);
2518 
2519 	mpt = NULL;
2520 	(void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
2521 	if (mp != NULL) {
2522 		mac_rx(vswp->if_mh, mrh, mp);
2523 	}
2524 	D1(vswp, "%s:exit\n", __func__);
2525 }
2526 
2527 /* copy mac address of vsw into soft state structure */
2528 static void
2529 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2530 {
2531 	int	i;
2532 
2533 	WRITE_ENTER(&vswp->if_lockrw);
2534 	for (i = ETHERADDRL - 1; i >= 0; i--) {
2535 		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2536 		macaddr >>= 8;
2537 	}
2538 	RW_EXIT(&vswp->if_lockrw);
2539 }
2540 
2541 /* Compare VLAN ids, array size expected to be same. */
2542 static boolean_t
2543 vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids)
2544 {
2545 	int i, j;
2546 	uint16_t vid;
2547 
2548 	for (i = 0; i < nvids; i++) {
2549 		vid = vids1[i].vl_vid;
2550 		for (j = 0; j < nvids; j++) {
2551 			if (vid == vids2[i].vl_vid)
2552 				break;
2553 		}
2554 		if (j == nvids) {
2555 			return (B_FALSE);
2556 		}
2557 	}
2558 	return (B_TRUE);
2559 }
2560