xref: /illumos-gate/usr/src/uts/sun4v/io/vnet.c (revision 6ba597c5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/param.h>
30 #include <sys/stream.h>
31 #include <sys/kmem.h>
32 #include <sys/conf.h>
33 #include <sys/devops.h>
34 #include <sys/ksynch.h>
35 #include <sys/stat.h>
36 #include <sys/modctl.h>
37 #include <sys/modhash.h>
38 #include <sys/debug.h>
39 #include <sys/ethernet.h>
40 #include <sys/dlpi.h>
41 #include <net/if.h>
42 #include <sys/mac_provider.h>
43 #include <sys/mac_client.h>
44 #include <sys/mac_client_priv.h>
45 #include <sys/mac_ether.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/strsun.h>
49 #include <sys/note.h>
50 #include <sys/atomic.h>
51 #include <sys/vnet.h>
52 #include <sys/vlan.h>
53 #include <sys/vnet_mailbox.h>
54 #include <sys/vnet_common.h>
55 #include <sys/dds.h>
56 #include <sys/strsubr.h>
57 #include <sys/taskq.h>
58 
59 /*
60  * Function prototypes.
61  */
62 
63 /* DDI entrypoints */
64 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
65 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
66 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
67 
68 /* MAC entrypoints  */
69 static int vnet_m_stat(void *, uint_t, uint64_t *);
70 static int vnet_m_start(void *);
71 static void vnet_m_stop(void *);
72 static int vnet_m_promisc(void *, boolean_t);
73 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
74 static int vnet_m_unicst(void *, const uint8_t *);
75 mblk_t *vnet_m_tx(void *, mblk_t *);
76 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
77 #ifdef	VNET_IOC_DEBUG
78 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
79 #endif
80 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data);
81 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
82 	const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle);
83 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index,
84 	mac_group_info_t *infop, mac_group_handle_t handle);
85 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
86 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver);
87 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
88 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver);
89 static int vnet_ring_enable_intr(void *arg);
90 static int vnet_ring_disable_intr(void *arg);
91 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup);
92 static int vnet_addmac(void *arg, const uint8_t *mac_addr);
93 static int vnet_remmac(void *arg, const uint8_t *mac_addr);
94 
95 /* vnet internal functions */
96 static int vnet_unattach(vnet_t *vnetp);
97 static void vnet_ring_grp_init(vnet_t *vnetp);
98 static void vnet_ring_grp_uninit(vnet_t *vnetp);
99 static int vnet_mac_register(vnet_t *);
100 static int vnet_read_mac_address(vnet_t *vnetp);
101 static int vnet_bind_vgenring(vnet_res_t *vresp);
102 static void vnet_unbind_vgenring(vnet_res_t *vresp);
103 static int vnet_bind_hwrings(vnet_t *vnetp);
104 static void vnet_unbind_hwrings(vnet_t *vnetp);
105 static int vnet_bind_rings(vnet_res_t *vresp);
106 static void vnet_unbind_rings(vnet_res_t *vresp);
107 static int vnet_hio_stat(void *, uint_t, uint64_t *);
108 static int vnet_hio_start(void *);
109 static void vnet_hio_stop(void *);
110 static void vnet_hio_notify_cb(void *arg, mac_notify_type_t type);
111 mblk_t *vnet_hio_tx(void *, mblk_t *);
112 
113 /* Forwarding database (FDB) routines */
114 static void vnet_fdb_create(vnet_t *vnetp);
115 static void vnet_fdb_destroy(vnet_t *vnetp);
116 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
117 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
118 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
119 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
120 
121 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
122 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
123 static void vnet_tx_update(vio_net_handle_t vrh);
124 static void vnet_res_start_task(void *arg);
125 static void vnet_start_resources(vnet_t *vnetp);
126 static void vnet_stop_resources(vnet_t *vnetp);
127 static void vnet_dispatch_res_task(vnet_t *vnetp);
128 static void vnet_res_start_task(void *arg);
129 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
130 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp);
131 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp);
132 
133 /* Exported to vnet_gen */
134 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
135 void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
136 void vnet_dds_cleanup_hio(vnet_t *vnetp);
137 
138 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
139     vnet_res_t *vresp);
140 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
141 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
142 static void vnet_hio_destroy_kstats(kstat_t *ksp);
143 
144 /* Exported to to vnet_dds */
145 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
146 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname);
147 void vnet_hio_mac_cleanup(vnet_t *vnetp);
148 
149 /* Externs that are imported from vnet_gen */
150 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
151     const uint8_t *macaddr, void **vgenhdl);
152 extern int vgen_init_mdeg(void *arg);
153 extern void vgen_uninit(void *arg);
154 extern int vgen_dds_tx(void *arg, void *dmsg);
155 extern void vgen_mod_init(void);
156 extern int vgen_mod_cleanup(void);
157 extern void vgen_mod_fini(void);
158 extern int vgen_enable_intr(void *arg);
159 extern int vgen_disable_intr(void *arg);
160 extern mblk_t *vgen_poll(void *arg, int bytes_to_pickup);
161 
162 /* Externs that are imported from vnet_dds */
163 extern void vdds_mod_init(void);
164 extern void vdds_mod_fini(void);
165 extern int vdds_init(vnet_t *vnetp);
166 extern void vdds_cleanup(vnet_t *vnetp);
167 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
168 extern void vdds_cleanup_hybrid_res(void *arg);
169 extern void vdds_cleanup_hio(vnet_t *vnetp);
170 
171 /* Externs imported from mac_impl */
172 extern mblk_t *mac_hwring_tx(mac_ring_handle_t, mblk_t *);
173 
174 #define	DRV_NAME	"vnet"
175 #define	VNET_FDBE_REFHOLD(p)						\
176 {									\
177 	atomic_inc_32(&(p)->refcnt);					\
178 	ASSERT((p)->refcnt != 0);					\
179 }
180 
181 #define	VNET_FDBE_REFRELE(p)						\
182 {									\
183 	ASSERT((p)->refcnt != 0);					\
184 	atomic_dec_32(&(p)->refcnt);					\
185 }
186 
187 #ifdef	VNET_IOC_DEBUG
188 #define	VNET_M_CALLBACK_FLAGS	(MC_IOCTL | MC_GETCAPAB)
189 #else
190 #define	VNET_M_CALLBACK_FLAGS	(MC_GETCAPAB)
191 #endif
192 
193 static mac_callbacks_t vnet_m_callbacks = {
194 	VNET_M_CALLBACK_FLAGS,
195 	vnet_m_stat,
196 	vnet_m_start,
197 	vnet_m_stop,
198 	vnet_m_promisc,
199 	vnet_m_multicst,
200 	NULL,	/* m_unicst entry must be NULL while rx rings are exposed */
201 	NULL,	/* m_tx entry must be NULL while tx rings are exposed */
202 	vnet_m_ioctl,
203 	vnet_m_capab,
204 	NULL
205 };
206 
207 static mac_callbacks_t vnet_hio_res_callbacks = {
208 	0,
209 	vnet_hio_stat,
210 	vnet_hio_start,
211 	vnet_hio_stop,
212 	NULL,
213 	NULL,
214 	NULL,
215 	vnet_hio_tx,
216 	NULL,
217 	NULL,
218 	NULL
219 };
220 
221 /*
222  * Linked list of "vnet_t" structures - one per instance.
223  */
224 static vnet_t	*vnet_headp = NULL;
225 static krwlock_t vnet_rw;
226 
227 /* Tunables */
228 uint32_t vnet_ntxds = VNET_NTXDS;	/* power of 2 transmit descriptors */
229 uint32_t vnet_ldcwd_interval = VNET_LDCWD_INTERVAL; /* watchdog freq in msec */
230 uint32_t vnet_ldcwd_txtimeout = VNET_LDCWD_TXTIMEOUT;  /* tx timeout in msec */
231 uint32_t vnet_ldc_mtu = VNET_LDC_MTU;		/* ldc mtu */
232 
233 /* Configure tx serialization in mac layer for the vnet device */
234 boolean_t vnet_mac_tx_serialize = B_TRUE;
235 
236 /*
237  * Set this to non-zero to enable additional internal receive buffer pools
238  * based on the MTU of the device for better performance at the cost of more
239  * memory consumption. This is turned off by default, to use allocb(9F) for
240  * receive buffer allocations of sizes > 2K.
241  */
242 boolean_t vnet_jumbo_rxpools = B_FALSE;
243 
244 /* # of chains in fdb hash table */
245 uint32_t	vnet_fdb_nchains = VNET_NFDB_HASH;
246 
247 /* Internal tunables */
248 uint32_t	vnet_ethermtu = 1500;	/* mtu of the device */
249 
250 /*
251  * Default vlan id. This is only used internally when the "default-vlan-id"
252  * property is not present in the MD device node. Therefore, this should not be
253  * used as a tunable; if this value is changed, the corresponding variable
254  * should be updated to the same value in vsw and also other vnets connected to
255  * the same vsw.
256  */
257 uint16_t	vnet_default_vlan_id = 1;
258 
259 /* delay in usec to wait for all references on a fdb entry to be dropped */
260 uint32_t vnet_fdbe_refcnt_delay = 10;
261 
262 static struct ether_addr etherbroadcastaddr = {
263 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
264 };
265 
266 /* mac_open() retry delay in usec */
267 uint32_t vnet_mac_open_delay = 100;	/* 0.1 ms */
268 
269 /* max # of mac_open() retries */
270 uint32_t vnet_mac_open_retries = 100;
271 
272 /*
273  * Property names
274  */
275 static char macaddr_propname[] = "local-mac-address";
276 
277 /*
278  * This is the string displayed by modinfo(1m).
279  */
280 static char vnet_ident[] = "vnet driver";
281 extern struct mod_ops mod_driverops;
282 static struct cb_ops cb_vnetops = {
283 	nulldev,		/* cb_open */
284 	nulldev,		/* cb_close */
285 	nodev,			/* cb_strategy */
286 	nodev,			/* cb_print */
287 	nodev,			/* cb_dump */
288 	nodev,			/* cb_read */
289 	nodev,			/* cb_write */
290 	nodev,			/* cb_ioctl */
291 	nodev,			/* cb_devmap */
292 	nodev,			/* cb_mmap */
293 	nodev,			/* cb_segmap */
294 	nochpoll,		/* cb_chpoll */
295 	ddi_prop_op,		/* cb_prop_op */
296 	NULL,			/* cb_stream */
297 	(int)(D_MP)		/* cb_flag */
298 };
299 
300 static struct dev_ops vnetops = {
301 	DEVO_REV,		/* devo_rev */
302 	0,			/* devo_refcnt */
303 	NULL,			/* devo_getinfo */
304 	nulldev,		/* devo_identify */
305 	nulldev,		/* devo_probe */
306 	vnetattach,		/* devo_attach */
307 	vnetdetach,		/* devo_detach */
308 	nodev,			/* devo_reset */
309 	&cb_vnetops,		/* devo_cb_ops */
310 	(struct bus_ops *)NULL,	/* devo_bus_ops */
311 	NULL,			/* devo_power */
312 	ddi_quiesce_not_supported,	/* devo_quiesce */
313 };
314 
315 static struct modldrv modldrv = {
316 	&mod_driverops,		/* Type of module.  This one is a driver */
317 	vnet_ident,		/* ID string */
318 	&vnetops		/* driver specific ops */
319 };
320 
321 static struct modlinkage modlinkage = {
322 	MODREV_1, (void *)&modldrv, NULL
323 };
324 
325 #ifdef DEBUG
326 
327 /*
328  * Print debug messages - set to 0xf to enable all msgs
329  */
330 int vnet_dbglevel = 0x8;
331 
332 static void
333 debug_printf(const char *fname, void *arg, const char *fmt, ...)
334 {
335 	char    buf[512];
336 	va_list ap;
337 	vnet_t *vnetp = (vnet_t *)arg;
338 	char    *bufp = buf;
339 
340 	if (vnetp == NULL) {
341 		(void) sprintf(bufp, "%s: ", fname);
342 		bufp += strlen(bufp);
343 	} else {
344 		(void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
345 		bufp += strlen(bufp);
346 	}
347 	va_start(ap, fmt);
348 	(void) vsprintf(bufp, fmt, ap);
349 	va_end(ap);
350 	cmn_err(CE_CONT, "%s\n", buf);
351 }
352 
353 #endif
354 
355 /* _init(9E): initialize the loadable module */
356 int
357 _init(void)
358 {
359 	int status;
360 
361 	DBG1(NULL, "enter\n");
362 
363 	mac_init_ops(&vnetops, "vnet");
364 	status = mod_install(&modlinkage);
365 	if (status != 0) {
366 		mac_fini_ops(&vnetops);
367 	}
368 	vdds_mod_init();
369 	vgen_mod_init();
370 	DBG1(NULL, "exit(%d)\n", status);
371 	return (status);
372 }
373 
374 /* _fini(9E): prepare the module for unloading. */
375 int
376 _fini(void)
377 {
378 	int		status;
379 
380 	DBG1(NULL, "enter\n");
381 
382 	status = vgen_mod_cleanup();
383 	if (status != 0)
384 		return (status);
385 
386 	status = mod_remove(&modlinkage);
387 	if (status != 0)
388 		return (status);
389 	mac_fini_ops(&vnetops);
390 	vgen_mod_fini();
391 	vdds_mod_fini();
392 
393 	DBG1(NULL, "exit(%d)\n", status);
394 	return (status);
395 }
396 
397 /* _info(9E): return information about the loadable module */
398 int
399 _info(struct modinfo *modinfop)
400 {
401 	return (mod_info(&modlinkage, modinfop));
402 }
403 
404 /*
405  * attach(9E): attach a device to the system.
406  * called once for each instance of the device on the system.
407  */
408 static int
409 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
410 {
411 	vnet_t			*vnetp;
412 	int			status;
413 	int			instance;
414 	uint64_t		reg;
415 	char			qname[TASKQ_NAMELEN];
416 	vnet_attach_progress_t	attach_progress;
417 
418 	attach_progress = AST_init;
419 
420 	switch (cmd) {
421 	case DDI_ATTACH:
422 		break;
423 	case DDI_RESUME:
424 	case DDI_PM_RESUME:
425 	default:
426 		goto vnet_attach_fail;
427 	}
428 
429 	instance = ddi_get_instance(dip);
430 	DBG1(NULL, "instance(%d) enter\n", instance);
431 
432 	/* allocate vnet_t and mac_t structures */
433 	vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
434 	vnetp->dip = dip;
435 	vnetp->instance = instance;
436 	rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
437 	rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
438 	attach_progress |= AST_vnet_alloc;
439 
440 	vnet_ring_grp_init(vnetp);
441 	attach_progress |= AST_ring_init;
442 
443 	status = vdds_init(vnetp);
444 	if (status != 0) {
445 		goto vnet_attach_fail;
446 	}
447 	attach_progress |= AST_vdds_init;
448 
449 	/* setup links to vnet_t from both devinfo and mac_t */
450 	ddi_set_driver_private(dip, (caddr_t)vnetp);
451 
452 	/* read the mac address */
453 	status = vnet_read_mac_address(vnetp);
454 	if (status != DDI_SUCCESS) {
455 		goto vnet_attach_fail;
456 	}
457 	attach_progress |= AST_read_macaddr;
458 
459 	reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
460 	    DDI_PROP_DONTPASS, "reg", -1);
461 	if (reg == -1) {
462 		goto vnet_attach_fail;
463 	}
464 	vnetp->reg = reg;
465 
466 	vnet_fdb_create(vnetp);
467 	attach_progress |= AST_fdbh_alloc;
468 
469 	(void) snprintf(qname, TASKQ_NAMELEN, "vnet_taskq%d", instance);
470 	if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
471 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
472 		cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
473 		    instance);
474 		goto vnet_attach_fail;
475 	}
476 	attach_progress |= AST_taskq_create;
477 
478 	/* add to the list of vnet devices */
479 	WRITE_ENTER(&vnet_rw);
480 	vnetp->nextp = vnet_headp;
481 	vnet_headp = vnetp;
482 	RW_EXIT(&vnet_rw);
483 
484 	attach_progress |= AST_vnet_list;
485 
486 	/*
487 	 * Initialize the generic vnet plugin which provides communication via
488 	 * sun4v LDC (logical domain channel) based resources. This involves 2
489 	 * steps; first, vgen_init() is invoked to read the various properties
490 	 * of the vnet device from its MD node (including its mtu which is
491 	 * needed to mac_register()) and obtain a handle to the vgen layer.
492 	 * After mac_register() is done and we have a mac handle, we then
493 	 * invoke vgen_init_mdeg() which registers with the the MD event
494 	 * generator (mdeg) framework to allow LDC resource notifications.
495 	 * Note: this sequence also allows us to report the correct default #
496 	 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked
497 	 * in the context of mac_register(); and avoids conflicting with
498 	 * dynamic pseudo rx rings which get added/removed as a result of mdeg
499 	 * events in vgen.
500 	 */
501 	status = vgen_init(vnetp, reg, vnetp->dip,
502 	    (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
503 	if (status != DDI_SUCCESS) {
504 		DERR(vnetp, "vgen_init() failed\n");
505 		goto vnet_attach_fail;
506 	}
507 	attach_progress |= AST_vgen_init;
508 
509 	status = vnet_mac_register(vnetp);
510 	if (status != DDI_SUCCESS) {
511 		goto vnet_attach_fail;
512 	}
513 	vnetp->link_state = LINK_STATE_UNKNOWN;
514 	attach_progress |= AST_macreg;
515 
516 	status = vgen_init_mdeg(vnetp->vgenhdl);
517 	if (status != DDI_SUCCESS) {
518 		goto vnet_attach_fail;
519 	}
520 	attach_progress |= AST_init_mdeg;
521 
522 	vnetp->attach_progress = attach_progress;
523 
524 	DBG1(NULL, "instance(%d) exit\n", instance);
525 	return (DDI_SUCCESS);
526 
527 vnet_attach_fail:
528 	vnetp->attach_progress = attach_progress;
529 	status = vnet_unattach(vnetp);
530 	ASSERT(status == 0);
531 	return (DDI_FAILURE);
532 }
533 
534 /*
535  * detach(9E): detach a device from the system.
536  */
537 static int
538 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
539 {
540 	vnet_t		*vnetp;
541 	int		instance;
542 
543 	instance = ddi_get_instance(dip);
544 	DBG1(NULL, "instance(%d) enter\n", instance);
545 
546 	vnetp = ddi_get_driver_private(dip);
547 	if (vnetp == NULL) {
548 		goto vnet_detach_fail;
549 	}
550 
551 	switch (cmd) {
552 	case DDI_DETACH:
553 		break;
554 	case DDI_SUSPEND:
555 	case DDI_PM_SUSPEND:
556 	default:
557 		goto vnet_detach_fail;
558 	}
559 
560 	if (vnet_unattach(vnetp) != 0) {
561 		goto vnet_detach_fail;
562 	}
563 
564 	return (DDI_SUCCESS);
565 
566 vnet_detach_fail:
567 	return (DDI_FAILURE);
568 }
569 
570 /*
571  * Common routine to handle vnetattach() failure and vnetdetach(). Note that
572  * the only reason this function could fail is if mac_unregister() fails.
573  * Otherwise, this function must ensure that all resources are freed and return
574  * success.
575  */
576 static int
577 vnet_unattach(vnet_t *vnetp)
578 {
579 	vnet_attach_progress_t	attach_progress;
580 
581 	attach_progress = vnetp->attach_progress;
582 
583 	/*
584 	 * Disable the mac device in the gldv3 subsystem. This can fail, in
585 	 * particular if there are still any open references to this mac
586 	 * device; in which case we just return failure without continuing to
587 	 * detach further.
588 	 * If it succeeds, we then invoke vgen_uninit() which should unregister
589 	 * any pseudo rings registered with the mac layer. Note we keep the
590 	 * AST_macreg flag on, so we can unregister with the mac layer at
591 	 * the end of this routine.
592 	 */
593 	if (attach_progress & AST_macreg) {
594 		if (mac_disable(vnetp->mh) != 0) {
595 			return (1);
596 		}
597 	}
598 
599 	/*
600 	 * Now that we have disabled the device, we must finish all other steps
601 	 * and successfully return from this function; otherwise we will end up
602 	 * leaving the device in a broken/unusable state.
603 	 *
604 	 * First, release any hybrid resources assigned to this vnet device.
605 	 */
606 	if (attach_progress & AST_vdds_init) {
607 		vdds_cleanup(vnetp);
608 		attach_progress &= ~AST_vdds_init;
609 	}
610 
611 	/*
612 	 * Uninit vgen. This stops further mdeg callbacks to this vnet
613 	 * device and/or its ports; and detaches any existing ports.
614 	 */
615 	if (attach_progress & (AST_vgen_init|AST_init_mdeg)) {
616 		vgen_uninit(vnetp->vgenhdl);
617 		attach_progress &= ~AST_vgen_init;
618 		attach_progress &= ~AST_init_mdeg;
619 	}
620 
621 	/* Destroy the taskq. */
622 	if (attach_progress & AST_taskq_create) {
623 		ddi_taskq_destroy(vnetp->taskqp);
624 		attach_progress &= ~AST_taskq_create;
625 	}
626 
627 	/* Destroy fdb. */
628 	if (attach_progress & AST_fdbh_alloc) {
629 		vnet_fdb_destroy(vnetp);
630 		attach_progress &= ~AST_fdbh_alloc;
631 	}
632 
633 	/* Remove from the device list */
634 	if (attach_progress & AST_vnet_list) {
635 		vnet_t		**vnetpp;
636 		/* unlink from instance(vnet_t) list */
637 		WRITE_ENTER(&vnet_rw);
638 		for (vnetpp = &vnet_headp; *vnetpp;
639 		    vnetpp = &(*vnetpp)->nextp) {
640 			if (*vnetpp == vnetp) {
641 				*vnetpp = vnetp->nextp;
642 				break;
643 			}
644 		}
645 		RW_EXIT(&vnet_rw);
646 		attach_progress &= ~AST_vnet_list;
647 	}
648 
649 	if (attach_progress & AST_ring_init) {
650 		vnet_ring_grp_uninit(vnetp);
651 		attach_progress &= ~AST_ring_init;
652 	}
653 
654 	if (attach_progress & AST_macreg) {
655 		VERIFY(mac_unregister(vnetp->mh) == 0);
656 		vnetp->mh = NULL;
657 		attach_progress &= ~AST_macreg;
658 	}
659 
660 	if (attach_progress & AST_vnet_alloc) {
661 		rw_destroy(&vnetp->vrwlock);
662 		rw_destroy(&vnetp->vsw_fp_rw);
663 		attach_progress &= ~AST_vnet_list;
664 		KMEM_FREE(vnetp);
665 	}
666 
667 	return (0);
668 }
669 
670 /* enable the device for transmit/receive */
671 static int
672 vnet_m_start(void *arg)
673 {
674 	vnet_t		*vnetp = arg;
675 
676 	DBG1(vnetp, "enter\n");
677 
678 	WRITE_ENTER(&vnetp->vrwlock);
679 	vnetp->flags |= VNET_STARTED;
680 	vnet_start_resources(vnetp);
681 	RW_EXIT(&vnetp->vrwlock);
682 
683 	DBG1(vnetp, "exit\n");
684 	return (VNET_SUCCESS);
685 
686 }
687 
688 /* stop transmit/receive for the device */
689 static void
690 vnet_m_stop(void *arg)
691 {
692 	vnet_t		*vnetp = arg;
693 
694 	DBG1(vnetp, "enter\n");
695 
696 	WRITE_ENTER(&vnetp->vrwlock);
697 	if (vnetp->flags & VNET_STARTED) {
698 		/*
699 		 * Set the flags appropriately; this should prevent starting of
700 		 * any new resources that are added(see vnet_res_start_task()),
701 		 * while we release the vrwlock in vnet_stop_resources() before
702 		 * stopping each resource.
703 		 */
704 		vnetp->flags &= ~VNET_STARTED;
705 		vnetp->flags |= VNET_STOPPING;
706 		vnet_stop_resources(vnetp);
707 		vnetp->flags &= ~VNET_STOPPING;
708 	}
709 	RW_EXIT(&vnetp->vrwlock);
710 
711 	DBG1(vnetp, "exit\n");
712 }
713 
714 /* set the unicast mac address of the device */
715 static int
716 vnet_m_unicst(void *arg, const uint8_t *macaddr)
717 {
718 	_NOTE(ARGUNUSED(macaddr))
719 
720 	vnet_t *vnetp = arg;
721 
722 	DBG1(vnetp, "enter\n");
723 	/*
724 	 * NOTE: setting mac address dynamically is not supported.
725 	 */
726 	DBG1(vnetp, "exit\n");
727 
728 	return (VNET_FAILURE);
729 }
730 
731 /* enable/disable a multicast address */
732 static int
733 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
734 {
735 	_NOTE(ARGUNUSED(add, mca))
736 
737 	vnet_t		*vnetp = arg;
738 	vnet_res_t	*vresp;
739 	mac_register_t	*macp;
740 	mac_callbacks_t	*cbp;
741 	int		rv = VNET_SUCCESS;
742 
743 	DBG1(vnetp, "enter\n");
744 
745 	READ_ENTER(&vnetp->vsw_fp_rw);
746 	if (vnetp->vsw_fp == NULL) {
747 		RW_EXIT(&vnetp->vsw_fp_rw);
748 		return (EAGAIN);
749 	}
750 	VNET_FDBE_REFHOLD(vnetp->vsw_fp);
751 	RW_EXIT(&vnetp->vsw_fp_rw);
752 
753 	vresp = vnetp->vsw_fp;
754 	macp = &vresp->macreg;
755 	cbp = macp->m_callbacks;
756 	rv = cbp->mc_multicst(macp->m_driver, add, mca);
757 
758 	VNET_FDBE_REFRELE(vnetp->vsw_fp);
759 
760 	DBG1(vnetp, "exit(%d)\n", rv);
761 	return (rv);
762 }
763 
764 /* set or clear promiscuous mode on the device */
765 static int
766 vnet_m_promisc(void *arg, boolean_t on)
767 {
768 	_NOTE(ARGUNUSED(on))
769 
770 	vnet_t *vnetp = arg;
771 	DBG1(vnetp, "enter\n");
772 	/*
773 	 * NOTE: setting promiscuous mode is not supported, just return success.
774 	 */
775 	DBG1(vnetp, "exit\n");
776 	return (VNET_SUCCESS);
777 }
778 
779 /*
780  * Transmit a chain of packets. This function provides switching functionality
781  * based on the destination mac address to reach other guests (within ldoms) or
782  * external hosts.
783  */
784 mblk_t *
785 vnet_tx_ring_send(void *arg, mblk_t *mp)
786 {
787 	vnet_pseudo_tx_ring_t	*tx_ringp;
788 	vnet_t			*vnetp;
789 	vnet_res_t		*vresp;
790 	mblk_t			*next;
791 	mblk_t			*resid_mp;
792 	mac_register_t		*macp;
793 	struct ether_header	*ehp;
794 	boolean_t		is_unicast;
795 	boolean_t		is_pvid;	/* non-default pvid ? */
796 	boolean_t		hres;		/* Hybrid resource ? */
797 	void			*tx_arg;
798 
799 	tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
800 	vnetp = (vnet_t *)tx_ringp->vnetp;
801 	DBG1(vnetp, "enter\n");
802 	ASSERT(mp != NULL);
803 
804 	is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
805 
806 	while (mp != NULL) {
807 
808 		next = mp->b_next;
809 		mp->b_next = NULL;
810 
811 		/*
812 		 * Find fdb entry for the destination
813 		 * and hold a reference to it.
814 		 */
815 		ehp = (struct ether_header *)mp->b_rptr;
816 		vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
817 		if (vresp != NULL) {
818 
819 			/*
820 			 * Destination found in FDB.
821 			 * The destination is a vnet device within ldoms
822 			 * and directly reachable, invoke the tx function
823 			 * in the fdb entry.
824 			 */
825 			macp = &vresp->macreg;
826 			resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
827 
828 			/* tx done; now release ref on fdb entry */
829 			VNET_FDBE_REFRELE(vresp);
830 
831 			if (resid_mp != NULL) {
832 				/* m_tx failed */
833 				mp->b_next = next;
834 				break;
835 			}
836 		} else {
837 			is_unicast = !(IS_BROADCAST(ehp) ||
838 			    (IS_MULTICAST(ehp)));
839 			/*
840 			 * Destination is not in FDB.
841 			 * If the destination is broadcast or multicast,
842 			 * then forward the packet to vswitch.
843 			 * If a Hybrid resource avilable, then send the
844 			 * unicast packet via hybrid resource, otherwise
845 			 * forward it to vswitch.
846 			 */
847 			READ_ENTER(&vnetp->vsw_fp_rw);
848 
849 			if ((is_unicast) && (vnetp->hio_fp != NULL)) {
850 				vresp = vnetp->hio_fp;
851 				hres = B_TRUE;
852 			} else {
853 				vresp = vnetp->vsw_fp;
854 				hres = B_FALSE;
855 			}
856 			if (vresp == NULL) {
857 				/*
858 				 * no fdb entry to vsw? drop the packet.
859 				 */
860 				RW_EXIT(&vnetp->vsw_fp_rw);
861 				freemsg(mp);
862 				mp = next;
863 				continue;
864 			}
865 
866 			/* ref hold the fdb entry to vsw */
867 			VNET_FDBE_REFHOLD(vresp);
868 
869 			RW_EXIT(&vnetp->vsw_fp_rw);
870 
871 			/*
872 			 * In the case of a hybrid resource we need to insert
873 			 * the tag for the pvid case here; unlike packets that
874 			 * are destined to a vnet/vsw in which case the vgen
875 			 * layer does the tagging before sending it over ldc.
876 			 */
877 			if (hres == B_TRUE) {
878 				/*
879 				 * Determine if the frame being transmitted
880 				 * over the hybrid resource is untagged. If so,
881 				 * insert the tag before transmitting.
882 				 */
883 				if (is_pvid == B_TRUE &&
884 				    ehp->ether_type != htons(ETHERTYPE_VLAN)) {
885 
886 					mp = vnet_vlan_insert_tag(mp,
887 					    vnetp->pvid);
888 					if (mp == NULL) {
889 						VNET_FDBE_REFRELE(vresp);
890 						mp = next;
891 						continue;
892 					}
893 
894 				}
895 
896 				macp = &vresp->macreg;
897 				tx_arg = tx_ringp;
898 			} else {
899 				macp = &vresp->macreg;
900 				tx_arg = macp->m_driver;
901 			}
902 			resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp);
903 
904 			/* tx done; now release ref on fdb entry */
905 			VNET_FDBE_REFRELE(vresp);
906 
907 			if (resid_mp != NULL) {
908 				/* m_tx failed */
909 				mp->b_next = next;
910 				break;
911 			}
912 		}
913 
914 		mp = next;
915 	}
916 
917 	DBG1(vnetp, "exit\n");
918 	return (mp);
919 }
920 
921 /* get statistics from the device */
922 int
923 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
924 {
925 	vnet_t *vnetp = arg;
926 	vnet_res_t	*vresp;
927 	mac_register_t	*macp;
928 	mac_callbacks_t	*cbp;
929 	uint64_t val_total = 0;
930 
931 	DBG1(vnetp, "enter\n");
932 
933 	/*
934 	 * get the specified statistic from each transport and return the
935 	 * aggregate val.  This obviously only works for counters.
936 	 */
937 	if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
938 	    (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
939 		return (ENOTSUP);
940 	}
941 
942 	READ_ENTER(&vnetp->vrwlock);
943 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
944 		macp = &vresp->macreg;
945 		cbp = macp->m_callbacks;
946 		if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
947 			val_total += *val;
948 	}
949 	RW_EXIT(&vnetp->vrwlock);
950 
951 	*val = val_total;
952 
953 	DBG1(vnetp, "exit\n");
954 	return (0);
955 }
956 
957 static void
958 vnet_ring_grp_init(vnet_t *vnetp)
959 {
960 	vnet_pseudo_rx_group_t	*rx_grp;
961 	vnet_pseudo_rx_ring_t	*rx_ringp;
962 	vnet_pseudo_tx_group_t	*tx_grp;
963 	vnet_pseudo_tx_ring_t	*tx_ringp;
964 	int			i;
965 
966 	tx_grp = &vnetp->tx_grp[0];
967 	tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) *
968 	    VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP);
969 	for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) {
970 		tx_ringp[i].state |= VNET_TXRING_SHARED;
971 	}
972 	tx_grp->rings = tx_ringp;
973 	tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS;
974 
975 	rx_grp = &vnetp->rx_grp[0];
976 	rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP;
977 	rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL);
978 	rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) *
979 	    rx_grp->max_ring_cnt, KM_SLEEP);
980 
981 	/*
982 	 * Setup the first 3 Pseudo RX Rings that are reserved;
983 	 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource.
984 	 */
985 	rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE;
986 	rx_ringp[0].index = 0;
987 	rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
988 	rx_ringp[1].index = 1;
989 	rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
990 	rx_ringp[2].index = 2;
991 
992 	rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
993 	rx_grp->rings = rx_ringp;
994 
995 	for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
996 	    i < rx_grp->max_ring_cnt; i++) {
997 		rx_ringp = &rx_grp->rings[i];
998 		rx_ringp->state = VNET_RXRING_FREE;
999 		rx_ringp->index = i;
1000 	}
1001 }
1002 
1003 static void
1004 vnet_ring_grp_uninit(vnet_t *vnetp)
1005 {
1006 	vnet_pseudo_rx_group_t	*rx_grp;
1007 	vnet_pseudo_tx_group_t	*tx_grp;
1008 
1009 	tx_grp = &vnetp->tx_grp[0];
1010 	if (tx_grp->rings != NULL) {
1011 		ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS);
1012 		kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) *
1013 		    tx_grp->ring_cnt);
1014 		tx_grp->rings = NULL;
1015 	}
1016 
1017 	rx_grp = &vnetp->rx_grp[0];
1018 	if (rx_grp->rings != NULL) {
1019 		ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP);
1020 		ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1021 		kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) *
1022 		    rx_grp->max_ring_cnt);
1023 		rx_grp->rings = NULL;
1024 	}
1025 }
1026 
1027 static vnet_pseudo_rx_ring_t *
1028 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp)
1029 {
1030 	vnet_pseudo_rx_group_t  *rx_grp;
1031 	vnet_pseudo_rx_ring_t	*rx_ringp;
1032 	int			index;
1033 
1034 	rx_grp = &vnetp->rx_grp[0];
1035 	WRITE_ENTER(&rx_grp->lock);
1036 
1037 	if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) {
1038 		/* no rings available */
1039 		RW_EXIT(&rx_grp->lock);
1040 		return (NULL);
1041 	}
1042 
1043 	for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1044 	    index < rx_grp->max_ring_cnt; index++) {
1045 		rx_ringp = &rx_grp->rings[index];
1046 		if (rx_ringp->state == VNET_RXRING_FREE) {
1047 			rx_ringp->state |= VNET_RXRING_INUSE;
1048 			rx_grp->ring_cnt++;
1049 			break;
1050 		}
1051 	}
1052 
1053 	RW_EXIT(&rx_grp->lock);
1054 	return (rx_ringp);
1055 }
1056 
1057 static void
1058 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp)
1059 {
1060 	vnet_pseudo_rx_group_t  *rx_grp;
1061 
1062 	ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1063 	rx_grp = &vnetp->rx_grp[0];
1064 	WRITE_ENTER(&rx_grp->lock);
1065 
1066 	if (ringp->state != VNET_RXRING_FREE) {
1067 		ringp->state = VNET_RXRING_FREE;
1068 		ringp->handle = NULL;
1069 		rx_grp->ring_cnt--;
1070 	}
1071 
1072 	RW_EXIT(&rx_grp->lock);
1073 }
1074 
1075 /* wrapper function for mac_register() */
1076 static int
1077 vnet_mac_register(vnet_t *vnetp)
1078 {
1079 	mac_register_t	*macp;
1080 	int		err;
1081 
1082 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1083 		return (DDI_FAILURE);
1084 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1085 	macp->m_driver = vnetp;
1086 	macp->m_dip = vnetp->dip;
1087 	macp->m_src_addr = vnetp->curr_macaddr;
1088 	macp->m_callbacks = &vnet_m_callbacks;
1089 	macp->m_min_sdu = 0;
1090 	macp->m_max_sdu = vnetp->mtu;
1091 	macp->m_margin = VLAN_TAGSZ;
1092 
1093 	/*
1094 	 * MAC_VIRT_SERIALIZE flag is needed while hybridIO is enabled to
1095 	 * workaround tx lock contention issues in nxge.
1096 	 */
1097 	macp->m_v12n = MAC_VIRT_LEVEL1;
1098 	if (vnet_mac_tx_serialize == B_TRUE) {
1099 		macp->m_v12n |= MAC_VIRT_SERIALIZE;
1100 	}
1101 
1102 	/*
1103 	 * Finally, we're ready to register ourselves with the MAC layer
1104 	 * interface; if this succeeds, we're all ready to start()
1105 	 */
1106 	err = mac_register(macp, &vnetp->mh);
1107 	mac_free(macp);
1108 	return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
1109 }
1110 
1111 /* read the mac address of the device */
1112 static int
1113 vnet_read_mac_address(vnet_t *vnetp)
1114 {
1115 	uchar_t 	*macaddr;
1116 	uint32_t 	size;
1117 	int 		rv;
1118 
1119 	rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
1120 	    DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
1121 	if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
1122 		DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
1123 		    macaddr_propname, rv);
1124 		return (DDI_FAILURE);
1125 	}
1126 	bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
1127 	bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
1128 	ddi_prop_free(macaddr);
1129 
1130 	return (DDI_SUCCESS);
1131 }
1132 
1133 static void
1134 vnet_fdb_create(vnet_t *vnetp)
1135 {
1136 	char		hashname[MAXNAMELEN];
1137 
1138 	(void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
1139 	    vnetp->instance);
1140 	vnetp->fdb_nchains = vnet_fdb_nchains;
1141 	vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
1142 	    mod_hash_null_valdtor, sizeof (void *));
1143 }
1144 
1145 static void
1146 vnet_fdb_destroy(vnet_t *vnetp)
1147 {
1148 	/* destroy fdb-hash-table */
1149 	if (vnetp->fdb_hashp != NULL) {
1150 		mod_hash_destroy_hash(vnetp->fdb_hashp);
1151 		vnetp->fdb_hashp = NULL;
1152 		vnetp->fdb_nchains = 0;
1153 	}
1154 }
1155 
1156 /*
1157  * Add an entry into the fdb.
1158  */
1159 void
1160 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
1161 {
1162 	uint64_t	addr = 0;
1163 	int		rv;
1164 
1165 	KEY_HASH(addr, vresp->rem_macaddr);
1166 
1167 	/*
1168 	 * If the entry being added corresponds to LDC_SERVICE resource,
1169 	 * that is, vswitch connection, it is added to the hash and also
1170 	 * the entry is cached, an additional reference count reflects
1171 	 * this. The HYBRID resource is not added to the hash, but only
1172 	 * cached, as it is only used for sending out packets for unknown
1173 	 * unicast destinations.
1174 	 */
1175 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1176 	    (vresp->refcnt = 1) : (vresp->refcnt = 0);
1177 
1178 	/*
1179 	 * Note: duplicate keys will be rejected by mod_hash.
1180 	 */
1181 	if (vresp->type != VIO_NET_RES_HYBRID) {
1182 		rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1183 		    (mod_hash_val_t)vresp);
1184 		if (rv != 0) {
1185 			DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
1186 			return;
1187 		}
1188 	}
1189 
1190 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1191 		/* Cache the fdb entry to vsw-port */
1192 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1193 		if (vnetp->vsw_fp == NULL)
1194 			vnetp->vsw_fp = vresp;
1195 		RW_EXIT(&vnetp->vsw_fp_rw);
1196 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
1197 		/* Cache the fdb entry to hybrid resource */
1198 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1199 		if (vnetp->hio_fp == NULL)
1200 			vnetp->hio_fp = vresp;
1201 		RW_EXIT(&vnetp->vsw_fp_rw);
1202 	}
1203 }
1204 
1205 /*
1206  * Remove an entry from fdb.
1207  */
1208 static void
1209 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
1210 {
1211 	uint64_t	addr = 0;
1212 	int		rv;
1213 	uint32_t	refcnt;
1214 	vnet_res_t	*tmp;
1215 
1216 	KEY_HASH(addr, vresp->rem_macaddr);
1217 
1218 	/*
1219 	 * Remove the entry from fdb hash table.
1220 	 * This prevents further references to this fdb entry.
1221 	 */
1222 	if (vresp->type != VIO_NET_RES_HYBRID) {
1223 		rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1224 		    (mod_hash_val_t *)&tmp);
1225 		if (rv != 0) {
1226 			/*
1227 			 * As the resources are added to the hash only
1228 			 * after they are started, this can occur if
1229 			 * a resource unregisters before it is ever started.
1230 			 */
1231 			return;
1232 		}
1233 	}
1234 
1235 	if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1236 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1237 
1238 		ASSERT(tmp == vnetp->vsw_fp);
1239 		vnetp->vsw_fp = NULL;
1240 
1241 		RW_EXIT(&vnetp->vsw_fp_rw);
1242 	} else if (vresp->type == VIO_NET_RES_HYBRID) {
1243 		WRITE_ENTER(&vnetp->vsw_fp_rw);
1244 
1245 		vnetp->hio_fp = NULL;
1246 
1247 		RW_EXIT(&vnetp->vsw_fp_rw);
1248 	}
1249 
1250 	/*
1251 	 * If there are threads already ref holding before the entry was
1252 	 * removed from hash table, then wait for ref count to drop to zero.
1253 	 */
1254 	(vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1255 	    (refcnt = 1) : (refcnt = 0);
1256 	while (vresp->refcnt > refcnt) {
1257 		delay(drv_usectohz(vnet_fdbe_refcnt_delay));
1258 	}
1259 }
1260 
1261 /*
1262  * Search fdb for a given mac address. If an entry is found, hold
1263  * a reference to it and return the entry; else returns NULL.
1264  */
1265 static vnet_res_t *
1266 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
1267 {
1268 	uint64_t	key = 0;
1269 	vnet_res_t	*vresp;
1270 	int		rv;
1271 
1272 	KEY_HASH(key, addrp->ether_addr_octet);
1273 
1274 	rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
1275 	    (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
1276 
1277 	if (rv != 0)
1278 		return (NULL);
1279 
1280 	return (vresp);
1281 }
1282 
1283 /*
1284  * Callback function provided to mod_hash_find_cb(). After finding the fdb
1285  * entry corresponding to the key (macaddr), this callback will be invoked by
1286  * mod_hash_find_cb() to atomically increment the reference count on the fdb
1287  * entry before returning the found entry.
1288  */
1289 static void
1290 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1291 {
1292 	_NOTE(ARGUNUSED(key))
1293 	VNET_FDBE_REFHOLD((vnet_res_t *)val);
1294 }
1295 
1296 /*
1297  * Frames received that are tagged with the pvid of the vnet device must be
1298  * untagged before sending up the stack. This function walks the chain of rx
1299  * frames, untags any such frames and returns the updated chain.
1300  *
1301  * Arguments:
1302  *    pvid:  pvid of the vnet device for which packets are being received
1303  *    mp:    head of pkt chain to be validated and untagged
1304  *
1305  * Returns:
1306  *    mp:    head of updated chain of packets
1307  */
1308 static void
1309 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
1310 {
1311 	struct ether_vlan_header	*evhp;
1312 	mblk_t				*bp;
1313 	mblk_t				*bpt;
1314 	mblk_t				*bph;
1315 	mblk_t				*bpn;
1316 
1317 	bpn = bph = bpt = NULL;
1318 
1319 	for (bp = *mp; bp != NULL; bp = bpn) {
1320 
1321 		bpn = bp->b_next;
1322 		bp->b_next = bp->b_prev = NULL;
1323 
1324 		evhp = (struct ether_vlan_header *)bp->b_rptr;
1325 
1326 		if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
1327 		    VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
1328 
1329 			bp = vnet_vlan_remove_tag(bp);
1330 			if (bp == NULL) {
1331 				continue;
1332 			}
1333 
1334 		}
1335 
1336 		/* build a chain of processed packets */
1337 		if (bph == NULL) {
1338 			bph = bpt = bp;
1339 		} else {
1340 			bpt->b_next = bp;
1341 			bpt = bp;
1342 		}
1343 
1344 	}
1345 
1346 	*mp = bph;
1347 }
1348 
1349 static void
1350 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
1351 {
1352 	vnet_res_t		*vresp = (vnet_res_t *)vrh;
1353 	vnet_t			*vnetp = vresp->vnetp;
1354 	vnet_pseudo_rx_ring_t	*ringp;
1355 
1356 	if ((vnetp == NULL) || (vnetp->mh == 0)) {
1357 		freemsgchain(mp);
1358 		return;
1359 	}
1360 
1361 	ringp = vresp->rx_ringp;
1362 	mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
1363 }
1364 
1365 void
1366 vnet_tx_update(vio_net_handle_t vrh)
1367 {
1368 	vnet_res_t		*vresp = (vnet_res_t *)vrh;
1369 	vnet_t			*vnetp = vresp->vnetp;
1370 	vnet_pseudo_tx_ring_t	*tx_ringp;
1371 	vnet_pseudo_tx_group_t	*tx_grp;
1372 	int			i;
1373 
1374 	if (vnetp == NULL || vnetp->mh == NULL) {
1375 		return;
1376 	}
1377 
1378 	/*
1379 	 * Currently, the tx hwring API (used to access rings that belong to
1380 	 * a Hybrid IO resource) does not provide us a per ring flow ctrl
1381 	 * update; also the pseudo rings are shared by the ports/ldcs in the
1382 	 * vgen layer. Thus we can't figure out which pseudo ring is being
1383 	 * re-enabled for transmits. To work around this, when we get a tx
1384 	 * restart notification from below, we simply propagate that to all
1385 	 * the tx pseudo rings registered with the mac layer above.
1386 	 *
1387 	 * There are a couple of side effects with this approach, but they are
1388 	 * not harmful, as outlined below:
1389 	 *
1390 	 * A) We might send an invalid ring_update() for a ring that is not
1391 	 * really flow controlled. This will not have any effect in the mac
1392 	 * layer and packets will continue to be transmitted on that ring.
1393 	 *
1394 	 * B) We might end up clearing the flow control in the mac layer for
1395 	 * a ring that is still flow controlled in the underlying resource.
1396 	 * This will result in the mac layer restarting	transmit, only to be
1397 	 * flow controlled again on that ring.
1398 	 */
1399 	tx_grp = &vnetp->tx_grp[0];
1400 	for (i = 0; i < tx_grp->ring_cnt; i++) {
1401 		tx_ringp = &tx_grp->rings[i];
1402 		mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
1403 	}
1404 }
1405 
1406 /*
1407  * Update the new mtu of vnet into the mac layer. First check if the device has
1408  * been plumbed and if so fail the mtu update. Returns 0 on success.
1409  */
1410 int
1411 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
1412 {
1413 	int	rv;
1414 
1415 	if (vnetp == NULL || vnetp->mh == NULL) {
1416 		return (EINVAL);
1417 	}
1418 
1419 	WRITE_ENTER(&vnetp->vrwlock);
1420 
1421 	if (vnetp->flags & VNET_STARTED) {
1422 		RW_EXIT(&vnetp->vrwlock);
1423 		cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1424 		    "update as the device is plumbed\n",
1425 		    vnetp->instance);
1426 		return (EBUSY);
1427 	}
1428 
1429 	/* update mtu in the mac layer */
1430 	rv = mac_maxsdu_update(vnetp->mh, mtu);
1431 	if (rv != 0) {
1432 		RW_EXIT(&vnetp->vrwlock);
1433 		cmn_err(CE_NOTE,
1434 		    "!vnet%d: Unable to update mtu with mac layer\n",
1435 		    vnetp->instance);
1436 		return (EIO);
1437 	}
1438 
1439 	vnetp->mtu = mtu;
1440 
1441 	RW_EXIT(&vnetp->vrwlock);
1442 
1443 	return (0);
1444 }
1445 
1446 /*
1447  * Update the link state of vnet to the mac layer.
1448  */
1449 void
1450 vnet_link_update(vnet_t *vnetp, link_state_t link_state)
1451 {
1452 	if (vnetp == NULL || vnetp->mh == NULL) {
1453 		return;
1454 	}
1455 
1456 	WRITE_ENTER(&vnetp->vrwlock);
1457 	if (vnetp->link_state == link_state) {
1458 		RW_EXIT(&vnetp->vrwlock);
1459 		return;
1460 	}
1461 	vnetp->link_state = link_state;
1462 	RW_EXIT(&vnetp->vrwlock);
1463 
1464 	mac_link_update(vnetp->mh, link_state);
1465 }
1466 
1467 /*
1468  * vio_net_resource_reg -- An interface called to register a resource
1469  *	with vnet.
1470  *	macp -- a GLDv3 mac_register that has all the details of
1471  *		a resource and its callbacks etc.
1472  *	type -- resource type.
1473  *	local_macaddr -- resource's MAC address. This is used to
1474  *			 associate a resource with a corresponding vnet.
1475  *	remote_macaddr -- remote side MAC address. This is ignored for
1476  *			  the Hybrid resources.
1477  *	vhp -- A handle returned to the caller.
1478  *	vcb -- A set of callbacks provided to the callers.
1479  */
1480 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1481     ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1482     vio_net_callbacks_t *vcb)
1483 {
1484 	vnet_t		*vnetp;
1485 	vnet_res_t	*vresp;
1486 
1487 	vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1488 	ether_copy(local_macaddr, vresp->local_macaddr);
1489 	ether_copy(rem_macaddr, vresp->rem_macaddr);
1490 	vresp->type = type;
1491 	bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1492 
1493 	DBG1(NULL, "Resource Registerig type=0%X\n", type);
1494 
1495 	READ_ENTER(&vnet_rw);
1496 	vnetp = vnet_headp;
1497 	while (vnetp != NULL) {
1498 		if (VNET_MATCH_RES(vresp, vnetp)) {
1499 			vresp->vnetp = vnetp;
1500 
1501 			/* Setup kstats for hio resource */
1502 			if (vresp->type == VIO_NET_RES_HYBRID) {
1503 				vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
1504 				    "hio", vresp);
1505 				if (vresp->ksp == NULL) {
1506 					cmn_err(CE_NOTE, "!vnet%d: Cannot "
1507 					    "create kstats for hio resource",
1508 					    vnetp->instance);
1509 				}
1510 			}
1511 			vnet_add_resource(vnetp, vresp);
1512 			break;
1513 		}
1514 		vnetp = vnetp->nextp;
1515 	}
1516 	RW_EXIT(&vnet_rw);
1517 	if (vresp->vnetp == NULL) {
1518 		DWARN(NULL, "No vnet instance");
1519 		kmem_free(vresp, sizeof (vnet_res_t));
1520 		return (ENXIO);
1521 	}
1522 
1523 	*vhp = vresp;
1524 	vcb->vio_net_rx_cb = vnet_rx;
1525 	vcb->vio_net_tx_update = vnet_tx_update;
1526 	vcb->vio_net_report_err = vnet_handle_res_err;
1527 
1528 	/* Bind the resource to pseudo ring(s) */
1529 	if (vnet_bind_rings(vresp) != 0) {
1530 		(void) vnet_rem_resource(vnetp, vresp);
1531 		vnet_hio_destroy_kstats(vresp->ksp);
1532 		KMEM_FREE(vresp);
1533 		return (1);
1534 	}
1535 
1536 	/* Dispatch a task to start resources */
1537 	vnet_dispatch_res_task(vnetp);
1538 	return (0);
1539 }
1540 
1541 /*
1542  * vio_net_resource_unreg -- An interface to unregister a resource.
1543  */
1544 void
1545 vio_net_resource_unreg(vio_net_handle_t vhp)
1546 {
1547 	vnet_res_t	*vresp = (vnet_res_t *)vhp;
1548 	vnet_t		*vnetp = vresp->vnetp;
1549 
1550 	DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1551 
1552 	ASSERT(vnetp != NULL);
1553 	/*
1554 	 * Remove the resource from fdb; this ensures
1555 	 * there are no references to the resource.
1556 	 */
1557 	vnet_fdbe_del(vnetp, vresp);
1558 
1559 	vnet_unbind_rings(vresp);
1560 
1561 	/* Now remove the resource from the list */
1562 	(void) vnet_rem_resource(vnetp, vresp);
1563 
1564 	vnet_hio_destroy_kstats(vresp->ksp);
1565 	KMEM_FREE(vresp);
1566 }
1567 
1568 static void
1569 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp)
1570 {
1571 	WRITE_ENTER(&vnetp->vrwlock);
1572 	vresp->nextp = vnetp->vres_list;
1573 	vnetp->vres_list = vresp;
1574 	RW_EXIT(&vnetp->vrwlock);
1575 }
1576 
1577 static vnet_res_t *
1578 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp)
1579 {
1580 	vnet_res_t	*vrp;
1581 
1582 	WRITE_ENTER(&vnetp->vrwlock);
1583 	if (vresp == vnetp->vres_list) {
1584 		vnetp->vres_list = vresp->nextp;
1585 	} else {
1586 		vrp = vnetp->vres_list;
1587 		while (vrp->nextp != NULL) {
1588 			if (vrp->nextp == vresp) {
1589 				vrp->nextp = vresp->nextp;
1590 				break;
1591 			}
1592 			vrp = vrp->nextp;
1593 		}
1594 	}
1595 	vresp->vnetp = NULL;
1596 	vresp->nextp = NULL;
1597 
1598 	RW_EXIT(&vnetp->vrwlock);
1599 
1600 	return (vresp);
1601 }
1602 
1603 /*
1604  * vnet_dds_rx -- an interface called by vgen to DDS messages.
1605  */
1606 void
1607 vnet_dds_rx(void *arg, void *dmsg)
1608 {
1609 	vnet_t *vnetp = arg;
1610 	vdds_process_dds_msg(vnetp, dmsg);
1611 }
1612 
1613 /*
1614  * vnet_send_dds_msg -- An interface provided to DDS to send
1615  *	DDS messages. This simply sends meessages via vgen.
1616  */
1617 int
1618 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1619 {
1620 	int rv;
1621 
1622 	if (vnetp->vgenhdl != NULL) {
1623 		rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1624 	}
1625 	return (rv);
1626 }
1627 
1628 /*
1629  * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources.
1630  */
1631 void
1632 vnet_dds_cleanup_hio(vnet_t *vnetp)
1633 {
1634 	vdds_cleanup_hio(vnetp);
1635 }
1636 
1637 /*
1638  * vnet_handle_res_err -- A callback function called by a resource
1639  *	to report an error. For example, vgen can call to report
1640  *	an LDC down/reset event. This will trigger cleanup of associated
1641  *	Hybrid resource.
1642  */
1643 /* ARGSUSED */
1644 static void
1645 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1646 {
1647 	vnet_res_t *vresp = (vnet_res_t *)vrh;
1648 	vnet_t *vnetp = vresp->vnetp;
1649 
1650 	if (vnetp == NULL) {
1651 		return;
1652 	}
1653 	if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1654 	    (vresp->type != VIO_NET_RES_HYBRID)) {
1655 		return;
1656 	}
1657 
1658 	vdds_cleanup_hio(vnetp);
1659 }
1660 
1661 /*
1662  * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1663  */
1664 static void
1665 vnet_dispatch_res_task(vnet_t *vnetp)
1666 {
1667 	int rv;
1668 
1669 	/*
1670 	 * Dispatch the task. It could be the case that vnetp->flags does
1671 	 * not have VNET_STARTED set. This is ok as vnet_rest_start_task()
1672 	 * can abort the task when the task is started. See related comments
1673 	 * in vnet_m_stop() and vnet_stop_resources().
1674 	 */
1675 	rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1676 	    vnetp, DDI_NOSLEEP);
1677 	if (rv != DDI_SUCCESS) {
1678 		cmn_err(CE_WARN,
1679 		    "vnet%d:Can't dispatch start resource task",
1680 		    vnetp->instance);
1681 	}
1682 }
1683 
1684 /*
1685  * vnet_res_start_task -- A taskq callback function that starts a resource.
1686  */
1687 static void
1688 vnet_res_start_task(void *arg)
1689 {
1690 	vnet_t *vnetp = arg;
1691 
1692 	WRITE_ENTER(&vnetp->vrwlock);
1693 	if (vnetp->flags & VNET_STARTED) {
1694 		vnet_start_resources(vnetp);
1695 	}
1696 	RW_EXIT(&vnetp->vrwlock);
1697 }
1698 
1699 /*
1700  * vnet_start_resources -- starts all resources associated with
1701  *	a vnet.
1702  */
1703 static void
1704 vnet_start_resources(vnet_t *vnetp)
1705 {
1706 	mac_register_t	*macp;
1707 	mac_callbacks_t	*cbp;
1708 	vnet_res_t	*vresp;
1709 	int rv;
1710 
1711 	DBG1(vnetp, "enter\n");
1712 
1713 	ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1714 
1715 	for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1716 		/* skip if it is already started */
1717 		if (vresp->flags & VNET_STARTED) {
1718 			continue;
1719 		}
1720 		macp = &vresp->macreg;
1721 		cbp = macp->m_callbacks;
1722 		rv = cbp->mc_start(macp->m_driver);
1723 		if (rv == 0) {
1724 			/*
1725 			 * Successfully started the resource, so now
1726 			 * add it to the fdb.
1727 			 */
1728 			vresp->flags |= VNET_STARTED;
1729 			vnet_fdbe_add(vnetp, vresp);
1730 		}
1731 	}
1732 
1733 	DBG1(vnetp, "exit\n");
1734 
1735 }
1736 
1737 /*
1738  * vnet_stop_resources -- stop all resources associated with a vnet.
1739  */
1740 static void
1741 vnet_stop_resources(vnet_t *vnetp)
1742 {
1743 	vnet_res_t	*vresp;
1744 	mac_register_t	*macp;
1745 	mac_callbacks_t	*cbp;
1746 
1747 	DBG1(vnetp, "enter\n");
1748 
1749 	ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1750 
1751 	for (vresp = vnetp->vres_list; vresp != NULL; ) {
1752 		if (vresp->flags & VNET_STARTED) {
1753 			/*
1754 			 * Release the lock while invoking mc_stop() of the
1755 			 * underlying resource. We hold a reference to this
1756 			 * resource to prevent being removed from the list in
1757 			 * vio_net_resource_unreg(). Note that new resources
1758 			 * can be added to the head of the list while the lock
1759 			 * is released, but they won't be started, as
1760 			 * VNET_STARTED flag has been cleared for the vnet
1761 			 * device in vnet_m_stop(). Also, while the lock is
1762 			 * released a resource could be removed from the list
1763 			 * in vio_net_resource_unreg(); but that is ok, as we
1764 			 * re-acquire the lock and only then access the forward
1765 			 * link (vresp->nextp) to continue with the next
1766 			 * resource.
1767 			 */
1768 			vresp->flags &= ~VNET_STARTED;
1769 			vresp->flags |= VNET_STOPPING;
1770 			macp = &vresp->macreg;
1771 			cbp = macp->m_callbacks;
1772 			VNET_FDBE_REFHOLD(vresp);
1773 			RW_EXIT(&vnetp->vrwlock);
1774 
1775 			cbp->mc_stop(macp->m_driver);
1776 
1777 			WRITE_ENTER(&vnetp->vrwlock);
1778 			vresp->flags &= ~VNET_STOPPING;
1779 			VNET_FDBE_REFRELE(vresp);
1780 		}
1781 		vresp = vresp->nextp;
1782 	}
1783 	DBG1(vnetp, "exit\n");
1784 }
1785 
1786 /*
1787  * Setup kstats for the HIO statistics.
1788  * NOTE: the synchronization for the statistics is the
1789  * responsibility of the caller.
1790  */
1791 kstat_t *
1792 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
1793 {
1794 	kstat_t *ksp;
1795 	vnet_t *vnetp = vresp->vnetp;
1796 	vnet_hio_kstats_t *hiokp;
1797 	size_t size;
1798 
1799 	ASSERT(vnetp != NULL);
1800 	size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
1801 	ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
1802 	    KSTAT_TYPE_NAMED, size, 0);
1803 	if (ksp == NULL) {
1804 		return (NULL);
1805 	}
1806 
1807 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1808 	kstat_named_init(&hiokp->ipackets,		"ipackets",
1809 	    KSTAT_DATA_ULONG);
1810 	kstat_named_init(&hiokp->ierrors,		"ierrors",
1811 	    KSTAT_DATA_ULONG);
1812 	kstat_named_init(&hiokp->opackets,		"opackets",
1813 	    KSTAT_DATA_ULONG);
1814 	kstat_named_init(&hiokp->oerrors,		"oerrors",
1815 	    KSTAT_DATA_ULONG);
1816 
1817 
1818 	/* MIB II kstat variables */
1819 	kstat_named_init(&hiokp->rbytes,		"rbytes",
1820 	    KSTAT_DATA_ULONG);
1821 	kstat_named_init(&hiokp->obytes,		"obytes",
1822 	    KSTAT_DATA_ULONG);
1823 	kstat_named_init(&hiokp->multircv,		"multircv",
1824 	    KSTAT_DATA_ULONG);
1825 	kstat_named_init(&hiokp->multixmt,		"multixmt",
1826 	    KSTAT_DATA_ULONG);
1827 	kstat_named_init(&hiokp->brdcstrcv,		"brdcstrcv",
1828 	    KSTAT_DATA_ULONG);
1829 	kstat_named_init(&hiokp->brdcstxmt,		"brdcstxmt",
1830 	    KSTAT_DATA_ULONG);
1831 	kstat_named_init(&hiokp->norcvbuf,		"norcvbuf",
1832 	    KSTAT_DATA_ULONG);
1833 	kstat_named_init(&hiokp->noxmtbuf,		"noxmtbuf",
1834 	    KSTAT_DATA_ULONG);
1835 
1836 	ksp->ks_update = vnet_hio_update_kstats;
1837 	ksp->ks_private = (void *)vresp;
1838 	kstat_install(ksp);
1839 	return (ksp);
1840 }
1841 
1842 /*
1843  * Destroy kstats.
1844  */
1845 static void
1846 vnet_hio_destroy_kstats(kstat_t *ksp)
1847 {
1848 	if (ksp != NULL)
1849 		kstat_delete(ksp);
1850 }
1851 
1852 /*
1853  * Update the kstats.
1854  */
1855 static int
1856 vnet_hio_update_kstats(kstat_t *ksp, int rw)
1857 {
1858 	vnet_t *vnetp;
1859 	vnet_res_t *vresp;
1860 	vnet_hio_stats_t statsp;
1861 	vnet_hio_kstats_t *hiokp;
1862 
1863 	vresp = (vnet_res_t *)ksp->ks_private;
1864 	vnetp = vresp->vnetp;
1865 
1866 	bzero(&statsp, sizeof (vnet_hio_stats_t));
1867 
1868 	READ_ENTER(&vnetp->vsw_fp_rw);
1869 	if (vnetp->hio_fp == NULL) {
1870 		/* not using hio resources, just return */
1871 		RW_EXIT(&vnetp->vsw_fp_rw);
1872 		return (0);
1873 	}
1874 	VNET_FDBE_REFHOLD(vnetp->hio_fp);
1875 	RW_EXIT(&vnetp->vsw_fp_rw);
1876 	vnet_hio_get_stats(vnetp->hio_fp, &statsp);
1877 	VNET_FDBE_REFRELE(vnetp->hio_fp);
1878 
1879 	hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1880 
1881 	if (rw == KSTAT_READ) {
1882 		/* Link Input/Output stats */
1883 		hiokp->ipackets.value.ul	= (uint32_t)statsp.ipackets;
1884 		hiokp->ipackets64.value.ull	= statsp.ipackets;
1885 		hiokp->ierrors.value.ul		= statsp.ierrors;
1886 		hiokp->opackets.value.ul	= (uint32_t)statsp.opackets;
1887 		hiokp->opackets64.value.ull	= statsp.opackets;
1888 		hiokp->oerrors.value.ul		= statsp.oerrors;
1889 
1890 		/* MIB II kstat variables */
1891 		hiokp->rbytes.value.ul		= (uint32_t)statsp.rbytes;
1892 		hiokp->rbytes64.value.ull	= statsp.rbytes;
1893 		hiokp->obytes.value.ul		= (uint32_t)statsp.obytes;
1894 		hiokp->obytes64.value.ull	= statsp.obytes;
1895 		hiokp->multircv.value.ul	= statsp.multircv;
1896 		hiokp->multixmt.value.ul	= statsp.multixmt;
1897 		hiokp->brdcstrcv.value.ul	= statsp.brdcstrcv;
1898 		hiokp->brdcstxmt.value.ul	= statsp.brdcstxmt;
1899 		hiokp->norcvbuf.value.ul	= statsp.norcvbuf;
1900 		hiokp->noxmtbuf.value.ul	= statsp.noxmtbuf;
1901 	} else {
1902 		return (EACCES);
1903 	}
1904 
1905 	return (0);
1906 }
1907 
1908 static void
1909 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
1910 {
1911 	mac_register_t		*macp;
1912 	mac_callbacks_t		*cbp;
1913 	uint64_t		val;
1914 	int			stat;
1915 
1916 	/*
1917 	 * get the specified statistics from the underlying nxge.
1918 	 */
1919 	macp = &vresp->macreg;
1920 	cbp = macp->m_callbacks;
1921 	for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
1922 		if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
1923 			switch (stat) {
1924 			case MAC_STAT_IPACKETS:
1925 				statsp->ipackets = val;
1926 				break;
1927 
1928 			case MAC_STAT_IERRORS:
1929 				statsp->ierrors = val;
1930 				break;
1931 
1932 			case MAC_STAT_OPACKETS:
1933 				statsp->opackets = val;
1934 				break;
1935 
1936 			case MAC_STAT_OERRORS:
1937 				statsp->oerrors = val;
1938 				break;
1939 
1940 			case MAC_STAT_RBYTES:
1941 				statsp->rbytes = val;
1942 				break;
1943 
1944 			case MAC_STAT_OBYTES:
1945 				statsp->obytes = val;
1946 				break;
1947 
1948 			case MAC_STAT_MULTIRCV:
1949 				statsp->multircv = val;
1950 				break;
1951 
1952 			case MAC_STAT_MULTIXMT:
1953 				statsp->multixmt = val;
1954 				break;
1955 
1956 			case MAC_STAT_BRDCSTRCV:
1957 				statsp->brdcstrcv = val;
1958 				break;
1959 
1960 			case MAC_STAT_BRDCSTXMT:
1961 				statsp->brdcstxmt = val;
1962 				break;
1963 
1964 			case MAC_STAT_NOXMTBUF:
1965 				statsp->noxmtbuf = val;
1966 				break;
1967 
1968 			case MAC_STAT_NORCVBUF:
1969 				statsp->norcvbuf = val;
1970 				break;
1971 
1972 			default:
1973 				/*
1974 				 * parameters not interested.
1975 				 */
1976 				break;
1977 			}
1978 		}
1979 	}
1980 }
1981 
1982 static boolean_t
1983 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data)
1984 {
1985 	vnet_t	*vnetp = (vnet_t *)arg;
1986 
1987 	if (vnetp == NULL) {
1988 		return (0);
1989 	}
1990 
1991 	switch (cap) {
1992 
1993 	case MAC_CAPAB_RINGS: {
1994 
1995 		mac_capab_rings_t *cap_rings = cap_data;
1996 		/*
1997 		 * Rings Capability Notes:
1998 		 * We advertise rings to make use of the rings framework in
1999 		 * gldv3 mac layer, to improve the performance. This is
2000 		 * specifically needed when a Hybrid resource (with multiple
2001 		 * tx/rx hardware rings) is assigned to a vnet device. We also
2002 		 * leverage this for the normal case when no Hybrid resource is
2003 		 * assigned.
2004 		 *
2005 		 * Ring Allocation:
2006 		 * - TX path:
2007 		 * We expose a pseudo ring group with 2 pseudo tx rings (as
2008 		 * currently HybridIO exports only 2 rings) In the normal case,
2009 		 * transmit traffic that comes down to the driver through the
2010 		 * mri_tx (vnet_tx_ring_send()) entry point goes through the
2011 		 * distributed switching algorithm in vnet and gets transmitted
2012 		 * over a port/LDC in the vgen layer to either the vswitch or a
2013 		 * peer vnet. If and when a Hybrid resource is assigned to the
2014 		 * vnet, we obtain the tx ring information of the Hybrid device
2015 		 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings.
2016 		 * Traffic being sent over the Hybrid resource by the mac layer
2017 		 * gets spread across both hw rings, as they are mapped to the
2018 		 * 2 pseudo tx rings in vnet.
2019 		 *
2020 		 * - RX path:
2021 		 * We expose a pseudo ring group with 3 pseudo rx rings (static
2022 		 * rings) initially. The first (default) pseudo rx ring is
2023 		 * reserved for the resource that connects to the vswitch
2024 		 * service. The next 2 rings are reserved for a Hybrid resource
2025 		 * that may be assigned to the vnet device. If and when a
2026 		 * Hybrid resource is assigned to the vnet, we obtain the rx
2027 		 * ring information of the Hybrid device (nxge) and map these
2028 		 * pseudo rings 1:1 to the 2 hw rx rings. For each additional
2029 		 * resource that connects to a peer vnet, we dynamically
2030 		 * allocate a pseudo rx ring and map it to that resource, when
2031 		 * the resource gets added; and the pseudo rx ring is
2032 		 * dynamically registered with the upper mac layer. We do the
2033 		 * reverse and unregister the ring with the mac layer when
2034 		 * the resource gets removed.
2035 		 *
2036 		 * Synchronization notes:
2037 		 * We don't need any lock to protect members of ring structure,
2038 		 * specifically ringp->hw_rh, in either the TX or the RX ring,
2039 		 * as explained below.
2040 		 * - TX ring:
2041 		 * ring->hw_rh is initialized only when a Hybrid resource is
2042 		 * associated; and gets referenced only in vnet_hio_tx(). The
2043 		 * Hybrid resource itself is available in fdb only after tx
2044 		 * hwrings are found and mapped; i.e, in vio_net_resource_reg()
2045 		 * we call vnet_bind_rings() first and then call
2046 		 * vnet_start_resources() which adds an entry to fdb. For
2047 		 * traffic going over LDC resources, we don't reference
2048 		 * ring->hw_rh at all.
2049 		 * - RX ring:
2050 		 * For rings mapped to Hybrid resource ring->hw_rh is
2051 		 * initialized and only then do we add the rx callback for
2052 		 * the underlying Hybrid resource; we disable callbacks before
2053 		 * we unmap ring->hw_rh. For rings mapped to LDC resources, we
2054 		 * stop the rx callbacks (in vgen) before we remove ring->hw_rh
2055 		 * (vio_net_resource_unreg()).
2056 		 */
2057 
2058 		if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
2059 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2060 
2061 			/*
2062 			 * The ring_cnt for rx grp is initialized in
2063 			 * vnet_ring_grp_init(). Later, the ring_cnt gets
2064 			 * updated dynamically whenever LDC resources are added
2065 			 * or removed.
2066 			 */
2067 			cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt;
2068 			cap_rings->mr_rget = vnet_get_ring;
2069 
2070 			cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS;
2071 			cap_rings->mr_gget = vnet_get_group;
2072 			cap_rings->mr_gaddring = NULL;
2073 			cap_rings->mr_gremring = NULL;
2074 		} else {
2075 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2076 
2077 			/*
2078 			 * The ring_cnt for tx grp is initialized in
2079 			 * vnet_ring_grp_init() and remains constant, as we
2080 			 * do not support dymanic tx rings for now.
2081 			 */
2082 			cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt;
2083 			cap_rings->mr_rget = vnet_get_ring;
2084 
2085 			/*
2086 			 * Transmit rings are not grouped; i.e, the number of
2087 			 * transmit ring groups advertised should be set to 0.
2088 			 */
2089 			cap_rings->mr_gnum = 0;
2090 
2091 			cap_rings->mr_gget = vnet_get_group;
2092 			cap_rings->mr_gaddring = NULL;
2093 			cap_rings->mr_gremring = NULL;
2094 		}
2095 		return (B_TRUE);
2096 
2097 	}
2098 
2099 	default:
2100 		break;
2101 
2102 	}
2103 
2104 	return (B_FALSE);
2105 }
2106 
2107 /*
2108  * Callback funtion for MAC layer to get ring information.
2109  */
2110 static void
2111 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
2112     const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle)
2113 {
2114 	vnet_t	*vnetp = arg;
2115 
2116 	switch (rtype) {
2117 
2118 	case MAC_RING_TYPE_RX: {
2119 
2120 		vnet_pseudo_rx_group_t	*rx_grp;
2121 		vnet_pseudo_rx_ring_t	*rx_ringp;
2122 		mac_intr_t		*mintr;
2123 
2124 		/* We advertised only one RX group */
2125 		ASSERT(g_index == 0);
2126 		rx_grp = &vnetp->rx_grp[g_index];
2127 
2128 		/* Check the current # of rings in the rx group */
2129 		ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt));
2130 
2131 		/* Get the ring based on the index */
2132 		rx_ringp = &rx_grp->rings[r_index];
2133 
2134 		rx_ringp->handle = r_handle;
2135 		/*
2136 		 * Note: we don't need to save the incoming r_index in rx_ring,
2137 		 * as vnet_ring_grp_init() would have initialized the index for
2138 		 * each ring in the array.
2139 		 */
2140 		rx_ringp->grp = rx_grp;
2141 		rx_ringp->vnetp = vnetp;
2142 
2143 		mintr = &infop->mri_intr;
2144 		mintr->mi_handle = (mac_intr_handle_t)rx_ringp;
2145 		mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr;
2146 		mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr;
2147 
2148 		infop->mri_driver = (mac_ring_driver_t)rx_ringp;
2149 		infop->mri_start = vnet_rx_ring_start;
2150 		infop->mri_stop = vnet_rx_ring_stop;
2151 
2152 		/* Set the poll function, as this is an rx ring */
2153 		infop->mri_poll = vnet_rx_poll;
2154 
2155 		break;
2156 	}
2157 
2158 	case MAC_RING_TYPE_TX: {
2159 		vnet_pseudo_tx_group_t	*tx_grp;
2160 		vnet_pseudo_tx_ring_t	*tx_ringp;
2161 
2162 		/*
2163 		 * No need to check grp index; mac layer passes -1 for it.
2164 		 */
2165 		tx_grp = &vnetp->tx_grp[0];
2166 
2167 		/* Check the # of rings in the tx group */
2168 		ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt));
2169 
2170 		/* Get the ring based on the index */
2171 		tx_ringp = &tx_grp->rings[r_index];
2172 
2173 		tx_ringp->handle = r_handle;
2174 		tx_ringp->index = r_index;
2175 		tx_ringp->grp = tx_grp;
2176 		tx_ringp->vnetp = vnetp;
2177 
2178 		infop->mri_driver = (mac_ring_driver_t)tx_ringp;
2179 		infop->mri_start = vnet_tx_ring_start;
2180 		infop->mri_stop = vnet_tx_ring_stop;
2181 
2182 		/* Set the transmit function, as this is a tx ring */
2183 		infop->mri_tx = vnet_tx_ring_send;
2184 
2185 		break;
2186 	}
2187 
2188 	default:
2189 		break;
2190 	}
2191 }
2192 
2193 /*
2194  * Callback funtion for MAC layer to get group information.
2195  */
2196 static void
2197 vnet_get_group(void *arg, mac_ring_type_t type, const int index,
2198 	mac_group_info_t *infop, mac_group_handle_t handle)
2199 {
2200 	vnet_t	*vnetp = (vnet_t *)arg;
2201 
2202 	switch (type) {
2203 
2204 	case MAC_RING_TYPE_RX:
2205 	{
2206 		vnet_pseudo_rx_group_t	*rx_grp;
2207 
2208 		/* We advertised only one RX group */
2209 		ASSERT(index == 0);
2210 
2211 		rx_grp = &vnetp->rx_grp[index];
2212 		rx_grp->handle = handle;
2213 		rx_grp->index = index;
2214 		rx_grp->vnetp = vnetp;
2215 
2216 		infop->mgi_driver = (mac_group_driver_t)rx_grp;
2217 		infop->mgi_start = NULL;
2218 		infop->mgi_stop = NULL;
2219 		infop->mgi_addmac = vnet_addmac;
2220 		infop->mgi_remmac = vnet_remmac;
2221 		infop->mgi_count = rx_grp->ring_cnt;
2222 
2223 		break;
2224 	}
2225 
2226 	case MAC_RING_TYPE_TX:
2227 	{
2228 		vnet_pseudo_tx_group_t	*tx_grp;
2229 
2230 		/* We advertised only one TX group */
2231 		ASSERT(index == 0);
2232 
2233 		tx_grp = &vnetp->tx_grp[index];
2234 		tx_grp->handle = handle;
2235 		tx_grp->index = index;
2236 		tx_grp->vnetp = vnetp;
2237 
2238 		infop->mgi_driver = (mac_group_driver_t)tx_grp;
2239 		infop->mgi_start = NULL;
2240 		infop->mgi_stop = NULL;
2241 		infop->mgi_addmac = NULL;
2242 		infop->mgi_remmac = NULL;
2243 		infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS;
2244 
2245 		break;
2246 	}
2247 
2248 	default:
2249 		break;
2250 
2251 	}
2252 }
2253 
2254 static int
2255 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2256 {
2257 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2258 	int			err;
2259 
2260 	/*
2261 	 * If this ring is mapped to a LDC resource, simply mark the state to
2262 	 * indicate the ring is started and return.
2263 	 */
2264 	if ((rx_ringp->state &
2265 	    (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2266 		rx_ringp->gen_num = mr_gen_num;
2267 		rx_ringp->state |= VNET_RXRING_STARTED;
2268 		return (0);
2269 	}
2270 
2271 	ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2272 
2273 	/*
2274 	 * This must be a ring reserved for a hwring. If the hwring is not
2275 	 * bound yet, simply mark the state to indicate the ring is started and
2276 	 * return. If and when a hybrid resource is activated for this vnet
2277 	 * device, we will bind the hwring and start it then. If a hwring is
2278 	 * already bound, start it now.
2279 	 */
2280 	if (rx_ringp->hw_rh == NULL) {
2281 		rx_ringp->gen_num = mr_gen_num;
2282 		rx_ringp->state |= VNET_RXRING_STARTED;
2283 		return (0);
2284 	}
2285 
2286 	err = mac_hwring_start(rx_ringp->hw_rh);
2287 	if (err == 0) {
2288 		rx_ringp->gen_num = mr_gen_num;
2289 		rx_ringp->state |= VNET_RXRING_STARTED;
2290 	} else {
2291 		err = ENXIO;
2292 	}
2293 
2294 	return (err);
2295 }
2296 
2297 static void
2298 vnet_rx_ring_stop(mac_ring_driver_t arg)
2299 {
2300 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2301 
2302 	/*
2303 	 * If this ring is mapped to a LDC resource, simply mark the state to
2304 	 * indicate the ring is now stopped and return.
2305 	 */
2306 	if ((rx_ringp->state &
2307 	    (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2308 		rx_ringp->state &= ~VNET_RXRING_STARTED;
2309 		return;
2310 	}
2311 
2312 	ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2313 
2314 	/*
2315 	 * This must be a ring reserved for a hwring. If the hwring is not
2316 	 * bound yet, simply mark the state to indicate the ring is stopped and
2317 	 * return. If a hwring is already bound, stop it now.
2318 	 */
2319 	if (rx_ringp->hw_rh == NULL) {
2320 		rx_ringp->state &= ~VNET_RXRING_STARTED;
2321 		return;
2322 	}
2323 
2324 	mac_hwring_stop(rx_ringp->hw_rh);
2325 	rx_ringp->state &= ~VNET_RXRING_STARTED;
2326 }
2327 
2328 /* ARGSUSED */
2329 static int
2330 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2331 {
2332 	vnet_pseudo_tx_ring_t	*tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2333 
2334 	tx_ringp->state |= VNET_TXRING_STARTED;
2335 	return (0);
2336 }
2337 
2338 static void
2339 vnet_tx_ring_stop(mac_ring_driver_t arg)
2340 {
2341 	vnet_pseudo_tx_ring_t	*tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2342 
2343 	tx_ringp->state &= ~VNET_TXRING_STARTED;
2344 }
2345 
2346 /*
2347  * Disable polling for a ring and enable its interrupt.
2348  */
2349 static int
2350 vnet_ring_enable_intr(void *arg)
2351 {
2352 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2353 	vnet_res_t		*vresp;
2354 
2355 	if (rx_ringp->hw_rh == NULL) {
2356 		/*
2357 		 * Ring enable intr func is being invoked, but the ring is
2358 		 * not bound to any underlying resource ? This must be a ring
2359 		 * reserved for Hybrid resource and no such resource has been
2360 		 * assigned to this vnet device yet. We simply return success.
2361 		 */
2362 		ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2363 		return (0);
2364 	}
2365 
2366 	/*
2367 	 * The rx ring has been bound to either a LDC or a Hybrid resource.
2368 	 * Call the appropriate function to enable interrupts for the ring.
2369 	 */
2370 	if (rx_ringp->state & VNET_RXRING_HYBRID) {
2371 		return (mac_hwring_enable_intr(rx_ringp->hw_rh));
2372 	} else {
2373 		vresp = (vnet_res_t *)rx_ringp->hw_rh;
2374 		return (vgen_enable_intr(vresp->macreg.m_driver));
2375 	}
2376 }
2377 
2378 /*
2379  * Enable polling for a ring and disable its interrupt.
2380  */
2381 static int
2382 vnet_ring_disable_intr(void *arg)
2383 {
2384 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2385 	vnet_res_t		*vresp;
2386 
2387 	if (rx_ringp->hw_rh == NULL) {
2388 		/*
2389 		 * Ring disable intr func is being invoked, but the ring is
2390 		 * not bound to any underlying resource ? This must be a ring
2391 		 * reserved for Hybrid resource and no such resource has been
2392 		 * assigned to this vnet device yet. We simply return success.
2393 		 */
2394 		ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2395 		return (0);
2396 	}
2397 
2398 	/*
2399 	 * The rx ring has been bound to either a LDC or a Hybrid resource.
2400 	 * Call the appropriate function to disable interrupts for the ring.
2401 	 */
2402 	if (rx_ringp->state & VNET_RXRING_HYBRID) {
2403 		return (mac_hwring_disable_intr(rx_ringp->hw_rh));
2404 	} else {
2405 		vresp = (vnet_res_t *)rx_ringp->hw_rh;
2406 		return (vgen_disable_intr(vresp->macreg.m_driver));
2407 	}
2408 }
2409 
2410 /*
2411  * Poll 'bytes_to_pickup' bytes of message from the rx ring.
2412  */
2413 static mblk_t *
2414 vnet_rx_poll(void *arg, int bytes_to_pickup)
2415 {
2416 	vnet_pseudo_rx_ring_t	*rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2417 	mblk_t			*mp = NULL;
2418 	vnet_res_t		*vresp;
2419 	vnet_t			*vnetp = rx_ringp->vnetp;
2420 
2421 	if (rx_ringp->hw_rh == NULL) {
2422 		return (NULL);
2423 	}
2424 
2425 	if (rx_ringp->state & VNET_RXRING_HYBRID) {
2426 		mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup);
2427 		/*
2428 		 * Packets received over a hybrid resource need additional
2429 		 * processing to remove the tag, for the pvid case. The
2430 		 * underlying resource is not aware of the vnet's pvid and thus
2431 		 * packets are received with the vlan tag in the header; unlike
2432 		 * packets that are received over a ldc channel in which case
2433 		 * the peer vnet/vsw would have already removed the tag.
2434 		 */
2435 		if (vnetp->pvid != vnetp->default_vlan_id) {
2436 			vnet_rx_frames_untag(vnetp->pvid, &mp);
2437 		}
2438 	} else {
2439 		vresp = (vnet_res_t *)rx_ringp->hw_rh;
2440 		mp = vgen_poll(vresp->macreg.m_driver, bytes_to_pickup);
2441 	}
2442 	return (mp);
2443 }
2444 
2445 /* ARGSUSED */
2446 void
2447 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
2448 	boolean_t loopback)
2449 {
2450 	vnet_t			*vnetp = (vnet_t *)arg;
2451 	vnet_pseudo_rx_ring_t	*ringp = (vnet_pseudo_rx_ring_t *)mrh;
2452 
2453 	/*
2454 	 * Packets received over a hybrid resource need additional processing
2455 	 * to remove the tag, for the pvid case. The underlying resource is
2456 	 * not aware of the vnet's pvid and thus packets are received with the
2457 	 * vlan tag in the header; unlike packets that are received over a ldc
2458 	 * channel in which case the peer vnet/vsw would have already removed
2459 	 * the tag.
2460 	 */
2461 	if (vnetp->pvid != vnetp->default_vlan_id) {
2462 		vnet_rx_frames_untag(vnetp->pvid, &mp);
2463 		if (mp == NULL) {
2464 			return;
2465 		}
2466 	}
2467 	mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
2468 }
2469 
2470 static int
2471 vnet_addmac(void *arg, const uint8_t *mac_addr)
2472 {
2473 	vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2474 	vnet_t			*vnetp;
2475 
2476 	vnetp = rx_grp->vnetp;
2477 
2478 	if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2479 		return (0);
2480 	}
2481 
2482 	cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n",
2483 	    vnetp->instance, __func__);
2484 	return (EINVAL);
2485 }
2486 
2487 static int
2488 vnet_remmac(void *arg, const uint8_t *mac_addr)
2489 {
2490 	vnet_pseudo_rx_group_t  *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2491 	vnet_t			*vnetp;
2492 
2493 	vnetp = rx_grp->vnetp;
2494 
2495 	if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2496 		return (0);
2497 	}
2498 
2499 	cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n",
2500 	    vnetp->instance, __func__, ether_sprintf((void *)mac_addr));
2501 	return (EINVAL);
2502 }
2503 
2504 int
2505 vnet_hio_mac_init(vnet_t *vnetp, char *ifname)
2506 {
2507 	mac_handle_t		mh;
2508 	mac_client_handle_t	mch = NULL;
2509 	mac_unicast_handle_t	muh = NULL;
2510 	mac_diag_t		diag;
2511 	mac_register_t		*macp;
2512 	char			client_name[MAXNAMELEN];
2513 	int			rv;
2514 	uint16_t		mac_flags = MAC_UNICAST_TAG_DISABLE |
2515 	    MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY;
2516 	vio_net_callbacks_t	vcb;
2517 	ether_addr_t		rem_addr =
2518 		{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
2519 	uint32_t		retries = 0;
2520 
2521 	if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2522 		return (EAGAIN);
2523 	}
2524 
2525 	do {
2526 		rv = mac_open_by_linkname(ifname, &mh);
2527 		if (rv == 0) {
2528 			break;
2529 		}
2530 		if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) {
2531 			mac_free(macp);
2532 			return (rv);
2533 		}
2534 		drv_usecwait(vnet_mac_open_delay);
2535 	} while (rv == ENOENT);
2536 
2537 	vnetp->hio_mh = mh;
2538 
2539 	(void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance,
2540 	    ifname);
2541 	rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE);
2542 	if (rv != 0) {
2543 		goto fail;
2544 	}
2545 	vnetp->hio_mch = mch;
2546 
2547 	rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0,
2548 	    &diag);
2549 	if (rv != 0) {
2550 		goto fail;
2551 	}
2552 	vnetp->hio_muh = muh;
2553 
2554 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2555 	macp->m_driver = vnetp;
2556 	macp->m_dip = NULL;
2557 	macp->m_src_addr = NULL;
2558 	macp->m_callbacks = &vnet_hio_res_callbacks;
2559 	macp->m_min_sdu = 0;
2560 	macp->m_max_sdu = ETHERMTU;
2561 
2562 	rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID,
2563 	    vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb);
2564 	if (rv != 0) {
2565 		goto fail;
2566 	}
2567 	mac_free(macp);
2568 
2569 	/* add the recv callback */
2570 	mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp);
2571 
2572 	/* add the notify callback - only tx updates for now */
2573 	vnetp->hio_mnh = mac_notify_add(vnetp->hio_mh, vnet_hio_notify_cb,
2574 	    vnetp);
2575 
2576 	return (0);
2577 
2578 fail:
2579 	mac_free(macp);
2580 	vnet_hio_mac_cleanup(vnetp);
2581 	return (1);
2582 }
2583 
2584 void
2585 vnet_hio_mac_cleanup(vnet_t *vnetp)
2586 {
2587 	if (vnetp->hio_mnh != NULL) {
2588 		(void) mac_notify_remove(vnetp->hio_mnh, B_TRUE);
2589 		vnetp->hio_mnh = NULL;
2590 	}
2591 
2592 	if (vnetp->hio_vhp != NULL) {
2593 		vio_net_resource_unreg(vnetp->hio_vhp);
2594 		vnetp->hio_vhp = NULL;
2595 	}
2596 
2597 	if (vnetp->hio_muh != NULL) {
2598 		(void) mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh);
2599 		vnetp->hio_muh = NULL;
2600 	}
2601 
2602 	if (vnetp->hio_mch != NULL) {
2603 		mac_client_close(vnetp->hio_mch, 0);
2604 		vnetp->hio_mch = NULL;
2605 	}
2606 
2607 	if (vnetp->hio_mh != NULL) {
2608 		mac_close(vnetp->hio_mh);
2609 		vnetp->hio_mh = NULL;
2610 	}
2611 }
2612 
2613 /* Bind pseudo rings to hwrings */
2614 static int
2615 vnet_bind_hwrings(vnet_t *vnetp)
2616 {
2617 	mac_ring_handle_t	hw_rh[VNET_NUM_HYBRID_RINGS];
2618 	mac_perim_handle_t	mph1;
2619 	vnet_pseudo_rx_group_t	*rx_grp;
2620 	vnet_pseudo_rx_ring_t	*rx_ringp;
2621 	vnet_pseudo_tx_group_t	*tx_grp;
2622 	vnet_pseudo_tx_ring_t	*tx_ringp;
2623 	int			hw_ring_cnt;
2624 	int			i;
2625 	int			rv;
2626 
2627 	mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2628 
2629 	/* Get the list of the underlying RX rings. */
2630 	hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh,
2631 	    MAC_RING_TYPE_RX);
2632 
2633 	/* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */
2634 	if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2635 		cmn_err(CE_WARN,
2636 		    "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n",
2637 		    vnetp->instance, hw_ring_cnt);
2638 		goto fail;
2639 	}
2640 
2641 	if (vnetp->rx_hwgh != NULL) {
2642 		/*
2643 		 * Quiesce the HW ring and the mac srs on the ring. Note
2644 		 * that the HW ring will be restarted when the pseudo ring
2645 		 * is started. At that time all the packets will be
2646 		 * directly passed up to the pseudo RX ring and handled
2647 		 * by mac srs created over the pseudo RX ring.
2648 		 */
2649 		mac_rx_client_quiesce(vnetp->hio_mch);
2650 		mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE);
2651 	}
2652 
2653 	/*
2654 	 * Bind the pseudo rings to the hwrings and start the hwrings.
2655 	 * Note we don't need to register these with the upper mac, as we have
2656 	 * statically exported these pseudo rxrings which are reserved for
2657 	 * rxrings of Hybrid resource.
2658 	 */
2659 	rx_grp = &vnetp->rx_grp[0];
2660 	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2661 		/* Pick the rxrings reserved for Hybrid resource */
2662 		rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
2663 
2664 		/* Store the hw ring handle */
2665 		rx_ringp->hw_rh = hw_rh[i];
2666 
2667 		/* Bind the pseudo ring to the underlying hwring */
2668 		mac_hwring_setup(rx_ringp->hw_rh,
2669 		    (mac_resource_handle_t)rx_ringp);
2670 
2671 		/* Start the hwring if needed */
2672 		if (rx_ringp->state & VNET_RXRING_STARTED) {
2673 			rv = mac_hwring_start(rx_ringp->hw_rh);
2674 			if (rv != 0) {
2675 				mac_hwring_teardown(rx_ringp->hw_rh);
2676 				rx_ringp->hw_rh = NULL;
2677 				goto fail;
2678 			}
2679 		}
2680 	}
2681 
2682 	/* Get the list of the underlying TX rings. */
2683 	hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh,
2684 	    MAC_RING_TYPE_TX);
2685 
2686 	/* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */
2687 	if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2688 		cmn_err(CE_WARN,
2689 		    "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n",
2690 		    vnetp->instance, hw_ring_cnt);
2691 		goto fail;
2692 	}
2693 
2694 	/*
2695 	 * Now map the pseudo txrings to the hw txrings. Note we don't need
2696 	 * to register these with the upper mac, as we have statically exported
2697 	 * these rings. Note that these rings will continue to be used for LDC
2698 	 * resources to peer vnets and vswitch (shared ring).
2699 	 */
2700 	tx_grp = &vnetp->tx_grp[0];
2701 	for (i = 0; i < tx_grp->ring_cnt; i++) {
2702 		tx_ringp = &tx_grp->rings[i];
2703 		tx_ringp->hw_rh = hw_rh[i];
2704 		tx_ringp->state |= VNET_TXRING_HYBRID;
2705 	}
2706 
2707 	mac_perim_exit(mph1);
2708 	return (0);
2709 
2710 fail:
2711 	mac_perim_exit(mph1);
2712 	vnet_unbind_hwrings(vnetp);
2713 	return (1);
2714 }
2715 
2716 /* Unbind pseudo rings from hwrings */
2717 static void
2718 vnet_unbind_hwrings(vnet_t *vnetp)
2719 {
2720 	mac_perim_handle_t	mph1;
2721 	vnet_pseudo_rx_ring_t	*rx_ringp;
2722 	vnet_pseudo_rx_group_t	*rx_grp;
2723 	vnet_pseudo_tx_group_t	*tx_grp;
2724 	vnet_pseudo_tx_ring_t	*tx_ringp;
2725 	int			i;
2726 
2727 	mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2728 
2729 	tx_grp = &vnetp->tx_grp[0];
2730 	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2731 		tx_ringp = &tx_grp->rings[i];
2732 		if (tx_ringp->state & VNET_TXRING_HYBRID) {
2733 			tx_ringp->state &= ~VNET_TXRING_HYBRID;
2734 			tx_ringp->hw_rh = NULL;
2735 		}
2736 	}
2737 
2738 	rx_grp = &vnetp->rx_grp[0];
2739 	for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2740 		rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
2741 		if (rx_ringp->hw_rh != NULL) {
2742 			/* Stop the hwring */
2743 			mac_hwring_stop(rx_ringp->hw_rh);
2744 
2745 			/* Teardown the hwring */
2746 			mac_hwring_teardown(rx_ringp->hw_rh);
2747 			rx_ringp->hw_rh = NULL;
2748 		}
2749 	}
2750 
2751 	if (vnetp->rx_hwgh != NULL) {
2752 		vnetp->rx_hwgh = NULL;
2753 		/*
2754 		 * First clear the permanent-quiesced flag of the RX srs then
2755 		 * restart the HW ring and the mac srs on the ring.
2756 		 */
2757 		mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE);
2758 		mac_rx_client_restart(vnetp->hio_mch);
2759 	}
2760 
2761 	mac_perim_exit(mph1);
2762 }
2763 
2764 /* Bind pseudo ring to a LDC resource */
2765 static int
2766 vnet_bind_vgenring(vnet_res_t *vresp)
2767 {
2768 	vnet_t			*vnetp;
2769 	vnet_pseudo_rx_group_t	*rx_grp;
2770 	vnet_pseudo_rx_ring_t	*rx_ringp;
2771 	mac_perim_handle_t	mph1;
2772 	int			rv;
2773 	int			type;
2774 
2775 	vnetp = vresp->vnetp;
2776 	type = vresp->type;
2777 	rx_grp = &vnetp->rx_grp[0];
2778 
2779 	if (type == VIO_NET_RES_LDC_SERVICE) {
2780 		/*
2781 		 * Ring Index 0 is the default ring in the group and is
2782 		 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
2783 		 * is allocated statically and is reported to the mac layer
2784 		 * in vnet_m_capab(). So, all we need to do here, is save a
2785 		 * reference to the associated vresp.
2786 		 */
2787 		rx_ringp = &rx_grp->rings[0];
2788 		rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2789 		vresp->rx_ringp = (void *)rx_ringp;
2790 		return (0);
2791 	}
2792 	ASSERT(type == VIO_NET_RES_LDC_GUEST);
2793 
2794 	mac_perim_enter_by_mh(vnetp->mh, &mph1);
2795 
2796 	rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp);
2797 	if (rx_ringp == NULL) {
2798 		cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring",
2799 		    vnetp->instance);
2800 		goto fail;
2801 	}
2802 
2803 	/* Store the LDC resource itself as the ring handle */
2804 	rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2805 
2806 	/*
2807 	 * Save a reference to the ring in the resource for lookup during
2808 	 * unbind. Note this is only done for LDC resources. We don't need this
2809 	 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its
2810 	 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2).
2811 	 */
2812 	vresp->rx_ringp = (void *)rx_ringp;
2813 	rx_ringp->state |= VNET_RXRING_LDC_GUEST;
2814 
2815 	/* Register the pseudo ring with upper-mac */
2816 	rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index);
2817 	if (rv != 0) {
2818 		rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
2819 		rx_ringp->hw_rh = NULL;
2820 		vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
2821 		goto fail;
2822 	}
2823 
2824 	mac_perim_exit(mph1);
2825 	return (0);
2826 fail:
2827 	mac_perim_exit(mph1);
2828 	return (1);
2829 }
2830 
2831 /* Unbind pseudo ring from a LDC resource */
2832 static void
2833 vnet_unbind_vgenring(vnet_res_t *vresp)
2834 {
2835 	vnet_t			*vnetp;
2836 	vnet_pseudo_rx_group_t	*rx_grp;
2837 	vnet_pseudo_rx_ring_t	*rx_ringp;
2838 	mac_perim_handle_t	mph1;
2839 	int			type;
2840 
2841 	vnetp = vresp->vnetp;
2842 	type = vresp->type;
2843 	rx_grp = &vnetp->rx_grp[0];
2844 
2845 	if (vresp->rx_ringp == NULL) {
2846 		return;
2847 	}
2848 
2849 	if (type == VIO_NET_RES_LDC_SERVICE) {
2850 		/*
2851 		 * Ring Index 0 is the default ring in the group and is
2852 		 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
2853 		 * is allocated statically and is reported to the mac layer
2854 		 * in vnet_m_capab(). So, all we need to do here, is remove its
2855 		 * reference to the associated vresp.
2856 		 */
2857 		rx_ringp = &rx_grp->rings[0];
2858 		rx_ringp->hw_rh = NULL;
2859 		vresp->rx_ringp = NULL;
2860 		return;
2861 	}
2862 	ASSERT(type == VIO_NET_RES_LDC_GUEST);
2863 
2864 	mac_perim_enter_by_mh(vnetp->mh, &mph1);
2865 
2866 	rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp;
2867 	vresp->rx_ringp = NULL;
2868 
2869 	if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) {
2870 		/* Unregister the pseudo ring with upper-mac */
2871 		mac_group_rem_ring(rx_grp->handle, rx_ringp->handle);
2872 
2873 		rx_ringp->hw_rh = NULL;
2874 		rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
2875 
2876 		/* Free the pseudo rx ring */
2877 		vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
2878 	}
2879 
2880 	mac_perim_exit(mph1);
2881 }
2882 
2883 static void
2884 vnet_unbind_rings(vnet_res_t *vresp)
2885 {
2886 	switch (vresp->type) {
2887 
2888 	case VIO_NET_RES_LDC_SERVICE:
2889 	case VIO_NET_RES_LDC_GUEST:
2890 		vnet_unbind_vgenring(vresp);
2891 		break;
2892 
2893 	case VIO_NET_RES_HYBRID:
2894 		vnet_unbind_hwrings(vresp->vnetp);
2895 		break;
2896 
2897 	default:
2898 		break;
2899 
2900 	}
2901 }
2902 
2903 static int
2904 vnet_bind_rings(vnet_res_t *vresp)
2905 {
2906 	int	rv;
2907 
2908 	switch (vresp->type) {
2909 
2910 	case VIO_NET_RES_LDC_SERVICE:
2911 	case VIO_NET_RES_LDC_GUEST:
2912 		rv = vnet_bind_vgenring(vresp);
2913 		break;
2914 
2915 	case VIO_NET_RES_HYBRID:
2916 		rv = vnet_bind_hwrings(vresp->vnetp);
2917 		break;
2918 
2919 	default:
2920 		rv = 1;
2921 		break;
2922 
2923 	}
2924 
2925 	return (rv);
2926 }
2927 
2928 /* ARGSUSED */
2929 int
2930 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val)
2931 {
2932 	vnet_t	*vnetp = (vnet_t *)arg;
2933 
2934 	*val = mac_stat_get(vnetp->hio_mh, stat);
2935 	return (0);
2936 }
2937 
2938 /*
2939  * The start() and stop() routines for the Hybrid resource below, are just
2940  * dummy functions. This is provided to avoid resource type specific code in
2941  * vnet_start_resources() and vnet_stop_resources(). The starting and stopping
2942  * of the Hybrid resource happens in the context of the mac_client interfaces
2943  * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup().
2944  */
2945 /* ARGSUSED */
2946 static int
2947 vnet_hio_start(void *arg)
2948 {
2949 	return (0);
2950 }
2951 
2952 /* ARGSUSED */
2953 static void
2954 vnet_hio_stop(void *arg)
2955 {
2956 }
2957 
2958 mblk_t *
2959 vnet_hio_tx(void *arg, mblk_t *mp)
2960 {
2961 	vnet_pseudo_tx_ring_t	*tx_ringp;
2962 	mblk_t			*nextp;
2963 	mblk_t			*ret_mp;
2964 
2965 	tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2966 	for (;;) {
2967 		nextp = mp->b_next;
2968 		mp->b_next = NULL;
2969 
2970 		ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp);
2971 		if (ret_mp != NULL) {
2972 			ret_mp->b_next = nextp;
2973 			mp = ret_mp;
2974 			break;
2975 		}
2976 
2977 		if ((mp = nextp) == NULL)
2978 			break;
2979 	}
2980 	return (mp);
2981 }
2982 
2983 static void
2984 vnet_hio_notify_cb(void *arg, mac_notify_type_t type)
2985 {
2986 	vnet_t			*vnetp = (vnet_t *)arg;
2987 	mac_perim_handle_t	mph;
2988 
2989 	mac_perim_enter_by_mh(vnetp->hio_mh, &mph);
2990 	switch (type) {
2991 	case MAC_NOTE_TX:
2992 		vnet_tx_update(vnetp->hio_vhp);
2993 		break;
2994 
2995 	default:
2996 		break;
2997 	}
2998 	mac_perim_exit(mph);
2999 }
3000 
3001 #ifdef	VNET_IOC_DEBUG
3002 
3003 /*
3004  * The ioctl entry point is used only for debugging for now. The ioctl commands
3005  * can be used to force the link state of the channel connected to vsw.
3006  */
3007 static void
3008 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3009 {
3010 	struct iocblk	*iocp;
3011 	vnet_t		*vnetp;
3012 
3013 	iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
3014 	iocp->ioc_error = 0;
3015 	vnetp = (vnet_t *)arg;
3016 
3017 	if (vnetp == NULL) {
3018 		miocnak(q, mp, 0, EINVAL);
3019 		return;
3020 	}
3021 
3022 	switch (iocp->ioc_cmd) {
3023 
3024 	case VNET_FORCE_LINK_DOWN:
3025 	case VNET_FORCE_LINK_UP:
3026 		vnet_force_link_state(vnetp, q, mp);
3027 		break;
3028 
3029 	default:
3030 		iocp->ioc_error = EINVAL;
3031 		miocnak(q, mp, 0, iocp->ioc_error);
3032 		break;
3033 
3034 	}
3035 }
3036 
3037 static void
3038 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp)
3039 {
3040 	mac_register_t	*macp;
3041 	mac_callbacks_t	*cbp;
3042 	vnet_res_t	*vresp;
3043 
3044 	READ_ENTER(&vnetp->vsw_fp_rw);
3045 
3046 	vresp = vnetp->vsw_fp;
3047 	if (vresp == NULL) {
3048 		RW_EXIT(&vnetp->vsw_fp_rw);
3049 		return;
3050 	}
3051 
3052 	macp = &vresp->macreg;
3053 	cbp = macp->m_callbacks;
3054 	cbp->mc_ioctl(macp->m_driver, q, mp);
3055 
3056 	RW_EXIT(&vnetp->vsw_fp_rw);
3057 }
3058 
3059 #else
3060 
3061 static void
3062 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3063 {
3064 	vnet_t		*vnetp;
3065 
3066 	vnetp = (vnet_t *)arg;
3067 
3068 	if (vnetp == NULL) {
3069 		miocnak(q, mp, 0, EINVAL);
3070 		return;
3071 	}
3072 
3073 	/* ioctl support only for debugging */
3074 	miocnak(q, mp, 0, ENOTSUP);
3075 }
3076 
3077 #endif
3078