xref: /illumos-gate/usr/src/uts/common/io/igb/igb_main.c (revision c3ea2840)
1 /*
2  * CDDL HEADER START
3  *
4  * Copyright(c) 2007-2009 Intel Corporation. All rights reserved.
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at:
10  *	http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When using or redistributing this file, you may do so under the
15  * License only. No other modification of this header is permitted.
16  *
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  */
23 
24 /*
25  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms of the CDDL.
27  */
28 
29 #include "igb_sw.h"
30 
31 static char ident[] = "Intel 1Gb Ethernet";
32 static char igb_version[] = "igb 1.1.4";
33 
34 /*
35  * Local function protoypes
36  */
37 static int igb_register_mac(igb_t *);
38 static int igb_identify_hardware(igb_t *);
39 static int igb_regs_map(igb_t *);
40 static void igb_init_properties(igb_t *);
41 static int igb_init_driver_settings(igb_t *);
42 static void igb_init_locks(igb_t *);
43 static void igb_destroy_locks(igb_t *);
44 static int igb_init(igb_t *);
45 static int igb_chip_start(igb_t *);
46 static void igb_chip_stop(igb_t *);
47 static int igb_reset(igb_t *);
48 static void igb_tx_clean(igb_t *);
49 static boolean_t igb_tx_drain(igb_t *);
50 static boolean_t igb_rx_drain(igb_t *);
51 static int igb_alloc_rings(igb_t *);
52 static int igb_init_rings(igb_t *);
53 static void igb_free_rings(igb_t *);
54 static void igb_fini_rings(igb_t *);
55 static void igb_setup_rings(igb_t *);
56 static void igb_setup_rx(igb_t *);
57 static void igb_setup_tx(igb_t *);
58 static void igb_setup_rx_ring(igb_rx_ring_t *);
59 static void igb_setup_tx_ring(igb_tx_ring_t *);
60 static void igb_setup_rss(igb_t *);
61 static void igb_setup_mac_rss_classify(igb_t *);
62 static void igb_setup_mac_classify(igb_t *);
63 static void igb_init_unicst(igb_t *);
64 static void igb_setup_multicst(igb_t *);
65 static void igb_get_phy_state(igb_t *);
66 static void igb_get_conf(igb_t *);
67 static int igb_get_prop(igb_t *, char *, int, int, int);
68 static boolean_t igb_is_link_up(igb_t *);
69 static boolean_t igb_link_check(igb_t *);
70 static void igb_local_timer(void *);
71 static void igb_arm_watchdog_timer(igb_t *);
72 static void igb_start_watchdog_timer(igb_t *);
73 static void igb_restart_watchdog_timer(igb_t *);
74 static void igb_stop_watchdog_timer(igb_t *);
75 static void igb_disable_adapter_interrupts(igb_t *);
76 static void igb_enable_adapter_interrupts_82575(igb_t *);
77 static void igb_enable_adapter_interrupts_82576(igb_t *);
78 static boolean_t is_valid_mac_addr(uint8_t *);
79 static boolean_t igb_stall_check(igb_t *);
80 static boolean_t igb_set_loopback_mode(igb_t *, uint32_t);
81 static void igb_set_external_loopback(igb_t *);
82 static void igb_set_internal_mac_loopback(igb_t *);
83 static void igb_set_internal_phy_loopback(igb_t *);
84 static void igb_set_internal_serdes_loopback(igb_t *);
85 static boolean_t igb_find_mac_address(igb_t *);
86 static int igb_alloc_intrs(igb_t *);
87 static int igb_alloc_intr_handles(igb_t *, int);
88 static int igb_add_intr_handlers(igb_t *);
89 static void igb_rem_intr_handlers(igb_t *);
90 static void igb_rem_intrs(igb_t *);
91 static int igb_enable_intrs(igb_t *);
92 static int igb_disable_intrs(igb_t *);
93 static void igb_setup_msix_82575(igb_t *);
94 static void igb_setup_msix_82576(igb_t *);
95 static uint_t igb_intr_legacy(void *, void *);
96 static uint_t igb_intr_msi(void *, void *);
97 static uint_t igb_intr_rx(void *, void *);
98 static uint_t igb_intr_tx(void *, void *);
99 static uint_t igb_intr_tx_other(void *, void *);
100 static void igb_intr_rx_work(igb_rx_ring_t *);
101 static void igb_intr_tx_work(igb_tx_ring_t *);
102 static void igb_intr_link_work(igb_t *);
103 static void igb_get_driver_control(struct e1000_hw *);
104 static void igb_release_driver_control(struct e1000_hw *);
105 
106 static int igb_attach(dev_info_t *, ddi_attach_cmd_t);
107 static int igb_detach(dev_info_t *, ddi_detach_cmd_t);
108 static int igb_resume(dev_info_t *);
109 static int igb_suspend(dev_info_t *);
110 static int igb_quiesce(dev_info_t *);
111 static void igb_unconfigure(dev_info_t *, igb_t *);
112 static int igb_fm_error_cb(dev_info_t *, ddi_fm_error_t *,
113     const void *);
114 static void igb_fm_init(igb_t *);
115 static void igb_fm_fini(igb_t *);
116 
117 
118 static struct cb_ops igb_cb_ops = {
119 	nulldev,		/* cb_open */
120 	nulldev,		/* cb_close */
121 	nodev,			/* cb_strategy */
122 	nodev,			/* cb_print */
123 	nodev,			/* cb_dump */
124 	nodev,			/* cb_read */
125 	nodev,			/* cb_write */
126 	nodev,			/* cb_ioctl */
127 	nodev,			/* cb_devmap */
128 	nodev,			/* cb_mmap */
129 	nodev,			/* cb_segmap */
130 	nochpoll,		/* cb_chpoll */
131 	ddi_prop_op,		/* cb_prop_op */
132 	NULL,			/* cb_stream */
133 	D_MP | D_HOTPLUG,	/* cb_flag */
134 	CB_REV,			/* cb_rev */
135 	nodev,			/* cb_aread */
136 	nodev			/* cb_awrite */
137 };
138 
139 static struct dev_ops igb_dev_ops = {
140 	DEVO_REV,		/* devo_rev */
141 	0,			/* devo_refcnt */
142 	NULL,			/* devo_getinfo */
143 	nulldev,		/* devo_identify */
144 	nulldev,		/* devo_probe */
145 	igb_attach,		/* devo_attach */
146 	igb_detach,		/* devo_detach */
147 	nodev,			/* devo_reset */
148 	&igb_cb_ops,		/* devo_cb_ops */
149 	NULL,			/* devo_bus_ops */
150 	ddi_power,		/* devo_power */
151 	igb_quiesce,	/* devo_quiesce */
152 };
153 
154 static struct modldrv igb_modldrv = {
155 	&mod_driverops,		/* Type of module.  This one is a driver */
156 	ident,			/* Discription string */
157 	&igb_dev_ops,		/* driver ops */
158 };
159 
160 static struct modlinkage igb_modlinkage = {
161 	MODREV_1, &igb_modldrv, NULL
162 };
163 
164 /* Access attributes for register mapping */
165 ddi_device_acc_attr_t igb_regs_acc_attr = {
166 	DDI_DEVICE_ATTR_V0,
167 	DDI_STRUCTURE_LE_ACC,
168 	DDI_STRICTORDER_ACC,
169 	DDI_FLAGERR_ACC
170 };
171 
172 #define	IGB_M_CALLBACK_FLAGS	(MC_IOCTL | MC_GETCAPAB)
173 
174 static mac_callbacks_t igb_m_callbacks = {
175 	IGB_M_CALLBACK_FLAGS,
176 	igb_m_stat,
177 	igb_m_start,
178 	igb_m_stop,
179 	igb_m_promisc,
180 	igb_m_multicst,
181 	NULL,
182 	NULL,
183 	igb_m_ioctl,
184 	igb_m_getcapab
185 };
186 
187 /*
188  * Initialize capabilities of each supported adapter type
189  */
190 static adapter_info_t igb_82575_cap = {
191 	/* limits */
192 	4,		/* maximum number of rx queues */
193 	1,		/* minimum number of rx queues */
194 	4,		/* default number of rx queues */
195 	4,		/* maximum number of tx queues */
196 	1,		/* minimum number of tx queues */
197 	4,		/* default number of tx queues */
198 	65535,		/* maximum interrupt throttle rate */
199 	0,		/* minimum interrupt throttle rate */
200 	200,		/* default interrupt throttle rate */
201 
202 	/* function pointers */
203 	igb_enable_adapter_interrupts_82575,
204 	igb_setup_msix_82575,
205 
206 	/* capabilities */
207 	(IGB_FLAG_HAS_DCA |	/* capability flags */
208 	IGB_FLAG_VMDQ_POOL)
209 };
210 
211 static adapter_info_t igb_82576_cap = {
212 	/* limits */
213 	12,		/* maximum number of rx queues */
214 	1,		/* minimum number of rx queues */
215 	4,		/* default number of rx queues */
216 	12,		/* maximum number of tx queues */
217 	1,		/* minimum number of tx queues */
218 	4,		/* default number of tx queues */
219 	65535,		/* maximum interrupt throttle rate */
220 	0,		/* minimum interrupt throttle rate */
221 	200,		/* default interrupt throttle rate */
222 
223 	/* function pointers */
224 	igb_enable_adapter_interrupts_82576,
225 	igb_setup_msix_82576,
226 
227 	/* capabilities */
228 	(IGB_FLAG_HAS_DCA |	/* capability flags */
229 	IGB_FLAG_VMDQ_POOL |
230 	IGB_FLAG_NEED_CTX_IDX)
231 };
232 
233 /*
234  * Module Initialization Functions
235  */
236 
237 int
238 _init(void)
239 {
240 	int status;
241 
242 	mac_init_ops(&igb_dev_ops, MODULE_NAME);
243 
244 	status = mod_install(&igb_modlinkage);
245 
246 	if (status != DDI_SUCCESS) {
247 		mac_fini_ops(&igb_dev_ops);
248 	}
249 
250 	return (status);
251 }
252 
253 int
254 _fini(void)
255 {
256 	int status;
257 
258 	status = mod_remove(&igb_modlinkage);
259 
260 	if (status == DDI_SUCCESS) {
261 		mac_fini_ops(&igb_dev_ops);
262 	}
263 
264 	return (status);
265 
266 }
267 
268 int
269 _info(struct modinfo *modinfop)
270 {
271 	int status;
272 
273 	status = mod_info(&igb_modlinkage, modinfop);
274 
275 	return (status);
276 }
277 
278 /*
279  * igb_attach - driver attach
280  *
281  * This function is the device specific initialization entry
282  * point. This entry point is required and must be written.
283  * The DDI_ATTACH command must be provided in the attach entry
284  * point. When attach() is called with cmd set to DDI_ATTACH,
285  * all normal kernel services (such as kmem_alloc(9F)) are
286  * available for use by the driver.
287  *
288  * The attach() function will be called once for each instance
289  * of  the  device  on  the  system with cmd set to DDI_ATTACH.
290  * Until attach() succeeds, the only driver entry points which
291  * may be called are open(9E) and getinfo(9E).
292  */
293 static int
294 igb_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
295 {
296 	igb_t *igb;
297 	struct igb_osdep *osdep;
298 	struct e1000_hw *hw;
299 	int instance;
300 
301 	/*
302 	 * Check the command and perform corresponding operations
303 	 */
304 	switch (cmd) {
305 	default:
306 		return (DDI_FAILURE);
307 
308 	case DDI_RESUME:
309 		return (igb_resume(devinfo));
310 
311 	case DDI_ATTACH:
312 		break;
313 	}
314 
315 	/* Get the device instance */
316 	instance = ddi_get_instance(devinfo);
317 
318 	/* Allocate memory for the instance data structure */
319 	igb = kmem_zalloc(sizeof (igb_t), KM_SLEEP);
320 
321 	igb->dip = devinfo;
322 	igb->instance = instance;
323 
324 	hw = &igb->hw;
325 	osdep = &igb->osdep;
326 	hw->back = osdep;
327 	osdep->igb = igb;
328 
329 	/* Attach the instance pointer to the dev_info data structure */
330 	ddi_set_driver_private(devinfo, igb);
331 
332 
333 	/* Initialize for fma support */
334 	igb->fm_capabilities = igb_get_prop(igb, "fm-capable",
335 	    0, 0x0f,
336 	    DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |
337 	    DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE);
338 	igb_fm_init(igb);
339 	igb->attach_progress |= ATTACH_PROGRESS_FMINIT;
340 
341 	/*
342 	 * Map PCI config space registers
343 	 */
344 	if (pci_config_setup(devinfo, &osdep->cfg_handle) != DDI_SUCCESS) {
345 		igb_error(igb, "Failed to map PCI configurations");
346 		goto attach_fail;
347 	}
348 	igb->attach_progress |= ATTACH_PROGRESS_PCI_CONFIG;
349 
350 	/*
351 	 * Identify the chipset family
352 	 */
353 	if (igb_identify_hardware(igb) != IGB_SUCCESS) {
354 		igb_error(igb, "Failed to identify hardware");
355 		goto attach_fail;
356 	}
357 
358 	/*
359 	 * Map device registers
360 	 */
361 	if (igb_regs_map(igb) != IGB_SUCCESS) {
362 		igb_error(igb, "Failed to map device registers");
363 		goto attach_fail;
364 	}
365 	igb->attach_progress |= ATTACH_PROGRESS_REGS_MAP;
366 
367 	/*
368 	 * Initialize driver parameters
369 	 */
370 	igb_init_properties(igb);
371 	igb->attach_progress |= ATTACH_PROGRESS_PROPS;
372 
373 	/*
374 	 * Allocate interrupts
375 	 */
376 	if (igb_alloc_intrs(igb) != IGB_SUCCESS) {
377 		igb_error(igb, "Failed to allocate interrupts");
378 		goto attach_fail;
379 	}
380 	igb->attach_progress |= ATTACH_PROGRESS_ALLOC_INTR;
381 
382 	/*
383 	 * Allocate rx/tx rings based on the ring numbers.
384 	 * The actual numbers of rx/tx rings are decided by the number of
385 	 * allocated interrupt vectors, so we should allocate the rings after
386 	 * interrupts are allocated.
387 	 */
388 	if (igb_alloc_rings(igb) != IGB_SUCCESS) {
389 		igb_error(igb, "Failed to allocate rx/tx rings or groups");
390 		goto attach_fail;
391 	}
392 	igb->attach_progress |= ATTACH_PROGRESS_ALLOC_RINGS;
393 
394 	/*
395 	 * Add interrupt handlers
396 	 */
397 	if (igb_add_intr_handlers(igb) != IGB_SUCCESS) {
398 		igb_error(igb, "Failed to add interrupt handlers");
399 		goto attach_fail;
400 	}
401 	igb->attach_progress |= ATTACH_PROGRESS_ADD_INTR;
402 
403 	/*
404 	 * Initialize driver parameters
405 	 */
406 	if (igb_init_driver_settings(igb) != IGB_SUCCESS) {
407 		igb_error(igb, "Failed to initialize driver settings");
408 		goto attach_fail;
409 	}
410 
411 	if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK) {
412 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
413 		goto attach_fail;
414 	}
415 
416 	/*
417 	 * Initialize mutexes for this device.
418 	 * Do this before enabling the interrupt handler and
419 	 * register the softint to avoid the condition where
420 	 * interrupt handler can try using uninitialized mutex
421 	 */
422 	igb_init_locks(igb);
423 	igb->attach_progress |= ATTACH_PROGRESS_LOCKS;
424 
425 	/*
426 	 * Initialize chipset hardware
427 	 */
428 	mutex_enter(&igb->gen_lock);
429 	if (igb_init(igb) != IGB_SUCCESS) {
430 		mutex_exit(&igb->gen_lock);
431 		igb_error(igb, "Failed to initialize adapter");
432 		goto attach_fail;
433 	}
434 	mutex_exit(&igb->gen_lock);
435 	igb->attach_progress |= ATTACH_PROGRESS_INIT;
436 
437 	/*
438 	 * Initialize DMA and hardware settings for rx/tx rings
439 	 */
440 	if (igb_init_rings(igb) != IGB_SUCCESS) {
441 		igb_error(igb, "Failed to initialize rings");
442 		goto attach_fail;
443 	}
444 	igb->attach_progress |= ATTACH_PROGRESS_INIT_RINGS;
445 
446 	/*
447 	 * Initialize statistics
448 	 */
449 	if (igb_init_stats(igb) != IGB_SUCCESS) {
450 		igb_error(igb, "Failed to initialize statistics");
451 		goto attach_fail;
452 	}
453 	igb->attach_progress |= ATTACH_PROGRESS_STATS;
454 
455 	/*
456 	 * Initialize NDD parameters
457 	 */
458 	if (igb_nd_init(igb) != IGB_SUCCESS) {
459 		igb_error(igb, "Failed to initialize ndd");
460 		goto attach_fail;
461 	}
462 	igb->attach_progress |= ATTACH_PROGRESS_NDD;
463 
464 	/*
465 	 * Register the driver to the MAC
466 	 */
467 	if (igb_register_mac(igb) != IGB_SUCCESS) {
468 		igb_error(igb, "Failed to register MAC");
469 		goto attach_fail;
470 	}
471 	igb->attach_progress |= ATTACH_PROGRESS_MAC;
472 
473 	/*
474 	 * Now that mutex locks are initialized, and the chip is also
475 	 * initialized, enable interrupts.
476 	 */
477 	if (igb_enable_intrs(igb) != IGB_SUCCESS) {
478 		igb_error(igb, "Failed to enable DDI interrupts");
479 		goto attach_fail;
480 	}
481 	igb->attach_progress |= ATTACH_PROGRESS_ENABLE_INTR;
482 
483 	igb_log(igb, "%s", igb_version);
484 	igb->igb_state |= IGB_INITIALIZED;
485 
486 	return (DDI_SUCCESS);
487 
488 attach_fail:
489 	igb_unconfigure(devinfo, igb);
490 	return (DDI_FAILURE);
491 }
492 
493 /*
494  * igb_detach - driver detach
495  *
496  * The detach() function is the complement of the attach routine.
497  * If cmd is set to DDI_DETACH, detach() is used to remove  the
498  * state  associated  with  a  given  instance of a device node
499  * prior to the removal of that instance from the system.
500  *
501  * The detach() function will be called once for each  instance
502  * of the device for which there has been a successful attach()
503  * once there are no longer  any  opens  on  the  device.
504  *
505  * Interrupts routine are disabled, All memory allocated by this
506  * driver are freed.
507  */
508 static int
509 igb_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
510 {
511 	igb_t *igb;
512 
513 	/*
514 	 * Check detach command
515 	 */
516 	switch (cmd) {
517 	default:
518 		return (DDI_FAILURE);
519 
520 	case DDI_SUSPEND:
521 		return (igb_suspend(devinfo));
522 
523 	case DDI_DETACH:
524 		break;
525 	}
526 
527 
528 	/*
529 	 * Get the pointer to the driver private data structure
530 	 */
531 	igb = (igb_t *)ddi_get_driver_private(devinfo);
532 	if (igb == NULL)
533 		return (DDI_FAILURE);
534 
535 	/*
536 	 * Unregister MAC. If failed, we have to fail the detach
537 	 */
538 	if (mac_unregister(igb->mac_hdl) != 0) {
539 		igb_error(igb, "Failed to unregister MAC");
540 		return (DDI_FAILURE);
541 	}
542 	igb->attach_progress &= ~ATTACH_PROGRESS_MAC;
543 
544 	/*
545 	 * If the device is still running, it needs to be stopped first.
546 	 * This check is necessary because under some specific circumstances,
547 	 * the detach routine can be called without stopping the interface
548 	 * first.
549 	 */
550 	mutex_enter(&igb->gen_lock);
551 	if (igb->igb_state & IGB_STARTED) {
552 		igb->igb_state &= ~IGB_STARTED;
553 		igb_stop(igb);
554 		mutex_exit(&igb->gen_lock);
555 		/* Disable and stop the watchdog timer */
556 		igb_disable_watchdog_timer(igb);
557 	} else
558 		mutex_exit(&igb->gen_lock);
559 
560 	/*
561 	 * Check if there are still rx buffers held by the upper layer.
562 	 * If so, fail the detach.
563 	 */
564 	if (!igb_rx_drain(igb))
565 		return (DDI_FAILURE);
566 
567 	/*
568 	 * Do the remaining unconfigure routines
569 	 */
570 	igb_unconfigure(devinfo, igb);
571 
572 	return (DDI_SUCCESS);
573 }
574 
575 /*
576  * quiesce(9E) entry point.
577  *
578  * This function is called when the system is single-threaded at high
579  * PIL with preemption disabled. Therefore, this function must not be
580  * blocked.
581  *
582  * This function returns DDI_SUCCESS on success, or DDI_FAILURE on failure.
583  * DDI_FAILURE indicates an error condition and should almost never happen.
584  */
585 static int
586 igb_quiesce(dev_info_t *devinfo)
587 {
588 	igb_t *igb;
589 	struct e1000_hw *hw;
590 
591 	igb = (igb_t *)ddi_get_driver_private(devinfo);
592 
593 	if (igb == NULL)
594 		return (DDI_FAILURE);
595 
596 	hw = &igb->hw;
597 
598 	/*
599 	 * Disable the adapter interrupts
600 	 */
601 	igb_disable_adapter_interrupts(igb);
602 
603 	/* Tell firmware driver is no longer in control */
604 	igb_release_driver_control(hw);
605 
606 	/*
607 	 * Reset the chipset
608 	 */
609 	(void) e1000_reset_hw(hw);
610 
611 	/*
612 	 * Reset PHY if possible
613 	 */
614 	if (e1000_check_reset_block(hw) == E1000_SUCCESS)
615 		(void) e1000_phy_hw_reset(hw);
616 
617 	return (DDI_SUCCESS);
618 }
619 
620 
621 static void
622 igb_unconfigure(dev_info_t *devinfo, igb_t *igb)
623 {
624 	/*
625 	 * Disable interrupt
626 	 */
627 	if (igb->attach_progress & ATTACH_PROGRESS_ENABLE_INTR) {
628 		(void) igb_disable_intrs(igb);
629 	}
630 
631 	/*
632 	 * Unregister MAC
633 	 */
634 	if (igb->attach_progress & ATTACH_PROGRESS_MAC) {
635 		(void) mac_unregister(igb->mac_hdl);
636 	}
637 
638 	/*
639 	 * Free ndd parameters
640 	 */
641 	if (igb->attach_progress & ATTACH_PROGRESS_NDD) {
642 		igb_nd_cleanup(igb);
643 	}
644 
645 	/*
646 	 * Free statistics
647 	 */
648 	if (igb->attach_progress & ATTACH_PROGRESS_STATS) {
649 		kstat_delete((kstat_t *)igb->igb_ks);
650 	}
651 
652 	/*
653 	 * Remove interrupt handlers
654 	 */
655 	if (igb->attach_progress & ATTACH_PROGRESS_ADD_INTR) {
656 		igb_rem_intr_handlers(igb);
657 	}
658 
659 	/*
660 	 * Remove interrupts
661 	 */
662 	if (igb->attach_progress & ATTACH_PROGRESS_ALLOC_INTR) {
663 		igb_rem_intrs(igb);
664 	}
665 
666 	/*
667 	 * Remove driver properties
668 	 */
669 	if (igb->attach_progress & ATTACH_PROGRESS_PROPS) {
670 		(void) ddi_prop_remove_all(devinfo);
671 	}
672 
673 	/*
674 	 * Release the DMA resources of rx/tx rings
675 	 */
676 	if (igb->attach_progress & ATTACH_PROGRESS_INIT_RINGS) {
677 		igb_fini_rings(igb);
678 	}
679 
680 	/*
681 	 * Stop the chipset
682 	 */
683 	if (igb->attach_progress & ATTACH_PROGRESS_INIT) {
684 		mutex_enter(&igb->gen_lock);
685 		igb_chip_stop(igb);
686 		mutex_exit(&igb->gen_lock);
687 		if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
688 			ddi_fm_service_impact(igb->dip, DDI_SERVICE_UNAFFECTED);
689 	}
690 
691 	/*
692 	 * Free register handle
693 	 */
694 	if (igb->attach_progress & ATTACH_PROGRESS_REGS_MAP) {
695 		if (igb->osdep.reg_handle != NULL)
696 			ddi_regs_map_free(&igb->osdep.reg_handle);
697 	}
698 
699 	/*
700 	 * Free PCI config handle
701 	 */
702 	if (igb->attach_progress & ATTACH_PROGRESS_PCI_CONFIG) {
703 		if (igb->osdep.cfg_handle != NULL)
704 			pci_config_teardown(&igb->osdep.cfg_handle);
705 	}
706 
707 	/*
708 	 * Free locks
709 	 */
710 	if (igb->attach_progress & ATTACH_PROGRESS_LOCKS) {
711 		igb_destroy_locks(igb);
712 	}
713 
714 	/*
715 	 * Free the rx/tx rings
716 	 */
717 	if (igb->attach_progress & ATTACH_PROGRESS_ALLOC_RINGS) {
718 		igb_free_rings(igb);
719 	}
720 
721 	/*
722 	 * Remove FMA
723 	 */
724 	if (igb->attach_progress & ATTACH_PROGRESS_FMINIT) {
725 		igb_fm_fini(igb);
726 	}
727 
728 	/*
729 	 * Free the driver data structure
730 	 */
731 	kmem_free(igb, sizeof (igb_t));
732 
733 	ddi_set_driver_private(devinfo, NULL);
734 }
735 
736 /*
737  * igb_register_mac - Register the driver and its function pointers with
738  * the GLD interface
739  */
740 static int
741 igb_register_mac(igb_t *igb)
742 {
743 	struct e1000_hw *hw = &igb->hw;
744 	mac_register_t *mac;
745 	int status;
746 
747 	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
748 		return (IGB_FAILURE);
749 
750 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
751 	mac->m_driver = igb;
752 	mac->m_dip = igb->dip;
753 	mac->m_src_addr = hw->mac.addr;
754 	mac->m_callbacks = &igb_m_callbacks;
755 	mac->m_min_sdu = 0;
756 	mac->m_max_sdu = igb->max_frame_size -
757 	    sizeof (struct ether_vlan_header) - ETHERFCSL;
758 	mac->m_margin = VLAN_TAGSZ;
759 	mac->m_v12n = MAC_VIRT_LEVEL1;
760 
761 	status = mac_register(mac, &igb->mac_hdl);
762 
763 	mac_free(mac);
764 
765 	return ((status == 0) ? IGB_SUCCESS : IGB_FAILURE);
766 }
767 
768 /*
769  * igb_identify_hardware - Identify the type of the chipset
770  */
771 static int
772 igb_identify_hardware(igb_t *igb)
773 {
774 	struct e1000_hw *hw = &igb->hw;
775 	struct igb_osdep *osdep = &igb->osdep;
776 
777 	/*
778 	 * Get the device id
779 	 */
780 	hw->vendor_id =
781 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_VENID);
782 	hw->device_id =
783 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_DEVID);
784 	hw->revision_id =
785 	    pci_config_get8(osdep->cfg_handle, PCI_CONF_REVID);
786 	hw->subsystem_device_id =
787 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_SUBSYSID);
788 	hw->subsystem_vendor_id =
789 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_SUBVENID);
790 
791 	/*
792 	 * Set the mac type of the adapter based on the device id
793 	 */
794 	if (e1000_set_mac_type(hw) != E1000_SUCCESS) {
795 		return (IGB_FAILURE);
796 	}
797 
798 	/*
799 	 * Install adapter capabilities based on mac type
800 	 */
801 	switch (hw->mac.type) {
802 	case e1000_82575:
803 		igb->capab = &igb_82575_cap;
804 		break;
805 	case e1000_82576:
806 		igb->capab = &igb_82576_cap;
807 		break;
808 	default:
809 		return (IGB_FAILURE);
810 	}
811 
812 	return (IGB_SUCCESS);
813 }
814 
815 /*
816  * igb_regs_map - Map the device registers
817  */
818 static int
819 igb_regs_map(igb_t *igb)
820 {
821 	dev_info_t *devinfo = igb->dip;
822 	struct e1000_hw *hw = &igb->hw;
823 	struct igb_osdep *osdep = &igb->osdep;
824 	off_t mem_size;
825 
826 	/*
827 	 * First get the size of device registers to be mapped.
828 	 */
829 	if (ddi_dev_regsize(devinfo, IGB_ADAPTER_REGSET, &mem_size) !=
830 	    DDI_SUCCESS) {
831 		return (IGB_FAILURE);
832 	}
833 
834 	/*
835 	 * Call ddi_regs_map_setup() to map registers
836 	 */
837 	if ((ddi_regs_map_setup(devinfo, IGB_ADAPTER_REGSET,
838 	    (caddr_t *)&hw->hw_addr, 0,
839 	    mem_size, &igb_regs_acc_attr,
840 	    &osdep->reg_handle)) != DDI_SUCCESS) {
841 		return (IGB_FAILURE);
842 	}
843 
844 	return (IGB_SUCCESS);
845 }
846 
847 /*
848  * igb_init_properties - Initialize driver properties
849  */
850 static void
851 igb_init_properties(igb_t *igb)
852 {
853 	/*
854 	 * Get conf file properties, including link settings
855 	 * jumbo frames, ring number, descriptor number, etc.
856 	 */
857 	igb_get_conf(igb);
858 }
859 
860 /*
861  * igb_init_driver_settings - Initialize driver settings
862  *
863  * The settings include hardware function pointers, bus information,
864  * rx/tx rings settings, link state, and any other parameters that
865  * need to be setup during driver initialization.
866  */
867 static int
868 igb_init_driver_settings(igb_t *igb)
869 {
870 	struct e1000_hw *hw = &igb->hw;
871 	igb_rx_ring_t *rx_ring;
872 	igb_tx_ring_t *tx_ring;
873 	uint32_t rx_size;
874 	uint32_t tx_size;
875 	int i;
876 
877 	/*
878 	 * Initialize chipset specific hardware function pointers
879 	 */
880 	if (e1000_setup_init_funcs(hw, B_TRUE) != E1000_SUCCESS) {
881 		return (IGB_FAILURE);
882 	}
883 
884 	/*
885 	 * Get bus information
886 	 */
887 	if (e1000_get_bus_info(hw) != E1000_SUCCESS) {
888 		return (IGB_FAILURE);
889 	}
890 
891 	/*
892 	 * Set rx buffer size
893 	 * The IP header alignment room is counted in the calculation.
894 	 * The rx buffer size is in unit of 1K that is required by the
895 	 * chipset hardware.
896 	 */
897 	rx_size = igb->max_frame_size + IPHDR_ALIGN_ROOM;
898 	igb->rx_buf_size = ((rx_size >> 10) +
899 	    ((rx_size & (((uint32_t)1 << 10) - 1)) > 0 ? 1 : 0)) << 10;
900 
901 	/*
902 	 * Set tx buffer size
903 	 */
904 	tx_size = igb->max_frame_size;
905 	igb->tx_buf_size = ((tx_size >> 10) +
906 	    ((tx_size & (((uint32_t)1 << 10) - 1)) > 0 ? 1 : 0)) << 10;
907 
908 	/*
909 	 * Initialize rx/tx rings parameters
910 	 */
911 	for (i = 0; i < igb->num_rx_rings; i++) {
912 		rx_ring = &igb->rx_rings[i];
913 		rx_ring->index = i;
914 		rx_ring->igb = igb;
915 
916 		rx_ring->ring_size = igb->rx_ring_size;
917 		rx_ring->free_list_size = igb->rx_ring_size;
918 		rx_ring->copy_thresh = igb->rx_copy_thresh;
919 		rx_ring->limit_per_intr = igb->rx_limit_per_intr;
920 	}
921 
922 	for (i = 0; i < igb->num_tx_rings; i++) {
923 		tx_ring = &igb->tx_rings[i];
924 		tx_ring->index = i;
925 		tx_ring->igb = igb;
926 		if (igb->tx_head_wb_enable)
927 			tx_ring->tx_recycle = igb_tx_recycle_head_wb;
928 		else
929 			tx_ring->tx_recycle = igb_tx_recycle_legacy;
930 
931 		tx_ring->ring_size = igb->tx_ring_size;
932 		tx_ring->free_list_size = igb->tx_ring_size +
933 		    (igb->tx_ring_size >> 1);
934 		tx_ring->copy_thresh = igb->tx_copy_thresh;
935 		tx_ring->recycle_thresh = igb->tx_recycle_thresh;
936 		tx_ring->overload_thresh = igb->tx_overload_thresh;
937 		tx_ring->resched_thresh = igb->tx_resched_thresh;
938 	}
939 
940 	/*
941 	 * Initialize values of interrupt throttling rates
942 	 */
943 	for (i = 1; i < MAX_NUM_EITR; i++)
944 		igb->intr_throttling[i] = igb->intr_throttling[0];
945 
946 	/*
947 	 * The initial link state should be "unknown"
948 	 */
949 	igb->link_state = LINK_STATE_UNKNOWN;
950 
951 	return (IGB_SUCCESS);
952 }
953 
954 /*
955  * igb_init_locks - Initialize locks
956  */
957 static void
958 igb_init_locks(igb_t *igb)
959 {
960 	igb_rx_ring_t *rx_ring;
961 	igb_tx_ring_t *tx_ring;
962 	int i;
963 
964 	for (i = 0; i < igb->num_rx_rings; i++) {
965 		rx_ring = &igb->rx_rings[i];
966 		mutex_init(&rx_ring->rx_lock, NULL,
967 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
968 		mutex_init(&rx_ring->recycle_lock, NULL,
969 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
970 	}
971 
972 	for (i = 0; i < igb->num_tx_rings; i++) {
973 		tx_ring = &igb->tx_rings[i];
974 		mutex_init(&tx_ring->tx_lock, NULL,
975 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
976 		mutex_init(&tx_ring->recycle_lock, NULL,
977 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
978 		mutex_init(&tx_ring->tcb_head_lock, NULL,
979 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
980 		mutex_init(&tx_ring->tcb_tail_lock, NULL,
981 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
982 	}
983 
984 	mutex_init(&igb->gen_lock, NULL,
985 	    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
986 
987 	mutex_init(&igb->watchdog_lock, NULL,
988 	    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
989 }
990 
991 /*
992  * igb_destroy_locks - Destroy locks
993  */
994 static void
995 igb_destroy_locks(igb_t *igb)
996 {
997 	igb_rx_ring_t *rx_ring;
998 	igb_tx_ring_t *tx_ring;
999 	int i;
1000 
1001 	for (i = 0; i < igb->num_rx_rings; i++) {
1002 		rx_ring = &igb->rx_rings[i];
1003 		mutex_destroy(&rx_ring->rx_lock);
1004 		mutex_destroy(&rx_ring->recycle_lock);
1005 	}
1006 
1007 	for (i = 0; i < igb->num_tx_rings; i++) {
1008 		tx_ring = &igb->tx_rings[i];
1009 		mutex_destroy(&tx_ring->tx_lock);
1010 		mutex_destroy(&tx_ring->recycle_lock);
1011 		mutex_destroy(&tx_ring->tcb_head_lock);
1012 		mutex_destroy(&tx_ring->tcb_tail_lock);
1013 	}
1014 
1015 	mutex_destroy(&igb->gen_lock);
1016 	mutex_destroy(&igb->watchdog_lock);
1017 }
1018 
1019 static int
1020 igb_resume(dev_info_t *devinfo)
1021 {
1022 	igb_t *igb;
1023 
1024 	igb = (igb_t *)ddi_get_driver_private(devinfo);
1025 	if (igb == NULL)
1026 		return (DDI_FAILURE);
1027 
1028 	mutex_enter(&igb->gen_lock);
1029 
1030 	if (igb->igb_state & IGB_STARTED) {
1031 		if (igb_start(igb) != IGB_SUCCESS) {
1032 			mutex_exit(&igb->gen_lock);
1033 			return (DDI_FAILURE);
1034 		}
1035 
1036 		/*
1037 		 * Enable and start the watchdog timer
1038 		 */
1039 		igb_enable_watchdog_timer(igb);
1040 	}
1041 
1042 	igb->igb_state &= ~IGB_SUSPENDED;
1043 
1044 	mutex_exit(&igb->gen_lock);
1045 
1046 	return (DDI_SUCCESS);
1047 }
1048 
1049 static int
1050 igb_suspend(dev_info_t *devinfo)
1051 {
1052 	igb_t *igb;
1053 
1054 	igb = (igb_t *)ddi_get_driver_private(devinfo);
1055 	if (igb == NULL)
1056 		return (DDI_FAILURE);
1057 
1058 	mutex_enter(&igb->gen_lock);
1059 
1060 	igb->igb_state |= IGB_SUSPENDED;
1061 
1062 	igb_stop(igb);
1063 
1064 	mutex_exit(&igb->gen_lock);
1065 
1066 	/*
1067 	 * Disable and stop the watchdog timer
1068 	 */
1069 	igb_disable_watchdog_timer(igb);
1070 
1071 	return (DDI_SUCCESS);
1072 }
1073 
1074 /*
1075  * igb_init - Initialize the device
1076  */
1077 static int
1078 igb_init(igb_t *igb)
1079 {
1080 	struct e1000_hw *hw = &igb->hw;
1081 	uint32_t pba;
1082 	uint32_t high_water;
1083 
1084 	ASSERT(mutex_owned(&igb->gen_lock));
1085 
1086 	/*
1087 	 * Reset chipset to put the hardware in a known state
1088 	 * before we try to do anything with the eeprom
1089 	 */
1090 	if (e1000_reset_hw(hw) != E1000_SUCCESS) {
1091 		igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1092 		goto init_fail;
1093 	}
1094 
1095 	/*
1096 	 * NVM validation
1097 	 */
1098 	if (e1000_validate_nvm_checksum(hw) < 0) {
1099 		/*
1100 		 * Some PCI-E parts fail the first check due to
1101 		 * the link being in sleep state.  Call it again,
1102 		 * if it fails a second time its a real issue.
1103 		 */
1104 		if (e1000_validate_nvm_checksum(hw) < 0) {
1105 			igb_error(igb,
1106 			    "Invalid NVM checksum. Please contact "
1107 			    "the vendor to update the NVM.");
1108 			igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1109 			goto init_fail;
1110 		}
1111 	}
1112 
1113 	/*
1114 	 * Setup flow control
1115 	 *
1116 	 * These parameters set thresholds for the adapter's generation(Tx)
1117 	 * and response(Rx) to Ethernet PAUSE frames.  These are just threshold
1118 	 * settings.  Flow control is enabled or disabled in the configuration
1119 	 * file.
1120 	 * High-water mark is set down from the top of the rx fifo (not
1121 	 * sensitive to max_frame_size) and low-water is set just below
1122 	 * high-water mark.
1123 	 * The high water mark must be low enough to fit one full frame above
1124 	 * it in the rx FIFO.  Should be the lower of:
1125 	 * 90% of the Rx FIFO size, or the full Rx FIFO size minus one full
1126 	 * frame.
1127 	 */
1128 	if (hw->mac.type == e1000_82575) {
1129 		pba = E1000_PBA_34K;
1130 	} else {
1131 		pba = E1000_PBA_64K;
1132 	}
1133 
1134 	high_water = min(((pba << 10) * 9 / 10),
1135 	    ((pba << 10) - igb->max_frame_size));
1136 
1137 	if (hw->mac.type == e1000_82575) {
1138 		/* 8-byte granularity */
1139 		hw->fc.high_water = high_water & 0xFFF8;
1140 		hw->fc.low_water = hw->fc.high_water - 8;
1141 	} else {
1142 		/* 16-byte granularity */
1143 		hw->fc.high_water = high_water & 0xFFF0;
1144 		hw->fc.low_water = hw->fc.high_water - 16;
1145 	}
1146 
1147 	hw->fc.pause_time = E1000_FC_PAUSE_TIME;
1148 	hw->fc.send_xon = B_TRUE;
1149 
1150 	/*
1151 	 * Reset the chipset hardware the second time to validate
1152 	 * the PBA setting.
1153 	 */
1154 	if (e1000_reset_hw(hw) != E1000_SUCCESS) {
1155 		igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1156 		goto init_fail;
1157 	}
1158 
1159 	/*
1160 	 * Don't wait for auto-negotiation to complete
1161 	 */
1162 	hw->phy.autoneg_wait_to_complete = B_FALSE;
1163 
1164 	/*
1165 	 * Copper options
1166 	 */
1167 	if (hw->phy.media_type == e1000_media_type_copper) {
1168 		hw->phy.mdix = 0;	/* AUTO_ALL_MODES */
1169 		hw->phy.disable_polarity_correction = B_FALSE;
1170 		hw->phy.ms_type = e1000_ms_hw_default; /* E1000_MASTER_SLAVE */
1171 	}
1172 
1173 	/*
1174 	 * Initialize link settings
1175 	 */
1176 	(void) igb_setup_link(igb, B_FALSE);
1177 
1178 	/*
1179 	 * Initialize the chipset hardware
1180 	 */
1181 	if (igb_chip_start(igb) != IGB_SUCCESS) {
1182 		igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1183 		goto init_fail;
1184 	}
1185 
1186 	if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK) {
1187 		goto init_fail;
1188 	}
1189 
1190 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
1191 		goto init_fail;
1192 	}
1193 
1194 	return (IGB_SUCCESS);
1195 
1196 init_fail:
1197 	/*
1198 	 * Reset PHY if possible
1199 	 */
1200 	if (e1000_check_reset_block(hw) == E1000_SUCCESS)
1201 		(void) e1000_phy_hw_reset(hw);
1202 
1203 	ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1204 
1205 	return (IGB_FAILURE);
1206 }
1207 
1208 /*
1209  * igb_init_rings - Allocate DMA resources for all rx/tx rings and
1210  * initialize relevant hardware settings.
1211  */
1212 static int
1213 igb_init_rings(igb_t *igb)
1214 {
1215 	int i;
1216 
1217 	/*
1218 	 * Allocate buffers for all the rx/tx rings
1219 	 */
1220 	if (igb_alloc_dma(igb) != IGB_SUCCESS)
1221 		return (IGB_FAILURE);
1222 
1223 	/*
1224 	 * Setup the rx/tx rings
1225 	 */
1226 	mutex_enter(&igb->gen_lock);
1227 
1228 	for (i = 0; i < igb->num_rx_rings; i++)
1229 		mutex_enter(&igb->rx_rings[i].rx_lock);
1230 	for (i = 0; i < igb->num_tx_rings; i++)
1231 		mutex_enter(&igb->tx_rings[i].tx_lock);
1232 
1233 	igb_setup_rings(igb);
1234 
1235 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1236 		mutex_exit(&igb->tx_rings[i].tx_lock);
1237 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1238 		mutex_exit(&igb->rx_rings[i].rx_lock);
1239 
1240 	mutex_exit(&igb->gen_lock);
1241 
1242 	return (IGB_SUCCESS);
1243 }
1244 
1245 /*
1246  * igb_fini_rings - Release DMA resources of all rx/tx rings
1247  */
1248 static void
1249 igb_fini_rings(igb_t *igb)
1250 {
1251 	/*
1252 	 * Release the DMA/memory resources of rx/tx rings
1253 	 */
1254 	igb_free_dma(igb);
1255 }
1256 
1257 /*
1258  * igb_chip_start - Initialize and start the chipset hardware
1259  */
1260 static int
1261 igb_chip_start(igb_t *igb)
1262 {
1263 	struct e1000_hw *hw = &igb->hw;
1264 	int i;
1265 
1266 	ASSERT(mutex_owned(&igb->gen_lock));
1267 
1268 	/*
1269 	 * Get the mac address
1270 	 * This function should handle SPARC case correctly.
1271 	 */
1272 	if (!igb_find_mac_address(igb)) {
1273 		igb_error(igb, "Failed to get the mac address");
1274 		return (IGB_FAILURE);
1275 	}
1276 
1277 	/* Validate mac address */
1278 	if (!is_valid_mac_addr(hw->mac.addr)) {
1279 		igb_error(igb, "Invalid mac address");
1280 		return (IGB_FAILURE);
1281 	}
1282 
1283 	/* Disable wakeup control by default */
1284 	E1000_WRITE_REG(hw, E1000_WUC, 0);
1285 
1286 	/*
1287 	 * Configure/Initialize hardware
1288 	 */
1289 	if (e1000_init_hw(hw) != E1000_SUCCESS) {
1290 		igb_error(igb, "Failed to initialize hardware");
1291 		return (IGB_FAILURE);
1292 	}
1293 
1294 	/*
1295 	 * Make sure driver has control
1296 	 */
1297 	igb_get_driver_control(hw);
1298 
1299 	/*
1300 	 * Setup MSI-X interrupts
1301 	 */
1302 	if (igb->intr_type == DDI_INTR_TYPE_MSIX)
1303 		igb->capab->setup_msix(igb);
1304 
1305 	/*
1306 	 * Initialize unicast addresses.
1307 	 */
1308 	igb_init_unicst(igb);
1309 
1310 	/*
1311 	 * Setup and initialize the mctable structures.
1312 	 */
1313 	igb_setup_multicst(igb);
1314 
1315 	/*
1316 	 * Set interrupt throttling rate
1317 	 */
1318 	for (i = 0; i < igb->intr_cnt; i++)
1319 		E1000_WRITE_REG(hw, E1000_EITR(i), igb->intr_throttling[i]);
1320 
1321 	/* Enable PCI-E master */
1322 	if (hw->bus.type == e1000_bus_type_pci_express) {
1323 		e1000_enable_pciex_master(hw);
1324 	}
1325 
1326 	/*
1327 	 * Save the state of the phy
1328 	 */
1329 	igb_get_phy_state(igb);
1330 
1331 	return (IGB_SUCCESS);
1332 }
1333 
1334 /*
1335  * igb_chip_stop - Stop the chipset hardware
1336  */
1337 static void
1338 igb_chip_stop(igb_t *igb)
1339 {
1340 	struct e1000_hw *hw = &igb->hw;
1341 
1342 	ASSERT(mutex_owned(&igb->gen_lock));
1343 
1344 	/* Tell firmware driver is no longer in control */
1345 	igb_release_driver_control(hw);
1346 
1347 	/*
1348 	 * Reset the chipset
1349 	 */
1350 	if (e1000_reset_hw(hw) != E1000_SUCCESS) {
1351 		igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1352 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1353 	}
1354 
1355 	/*
1356 	 * Reset PHY if possible
1357 	 */
1358 	if (e1000_check_reset_block(hw) == E1000_SUCCESS)
1359 		(void) e1000_phy_hw_reset(hw);
1360 }
1361 
1362 /*
1363  * igb_reset - Reset the chipset and restart the driver.
1364  *
1365  * It involves stopping and re-starting the chipset,
1366  * and re-configuring the rx/tx rings.
1367  */
1368 static int
1369 igb_reset(igb_t *igb)
1370 {
1371 	int i;
1372 
1373 	mutex_enter(&igb->gen_lock);
1374 
1375 	ASSERT(igb->igb_state & IGB_STARTED);
1376 
1377 	/*
1378 	 * Disable the adapter interrupts to stop any rx/tx activities
1379 	 * before draining pending data and resetting hardware.
1380 	 */
1381 	igb_disable_adapter_interrupts(igb);
1382 
1383 	/*
1384 	 * Drain the pending transmit packets
1385 	 */
1386 	(void) igb_tx_drain(igb);
1387 
1388 	for (i = 0; i < igb->num_rx_rings; i++)
1389 		mutex_enter(&igb->rx_rings[i].rx_lock);
1390 	for (i = 0; i < igb->num_tx_rings; i++)
1391 		mutex_enter(&igb->tx_rings[i].tx_lock);
1392 
1393 	/*
1394 	 * Stop the chipset hardware
1395 	 */
1396 	igb_chip_stop(igb);
1397 
1398 	/*
1399 	 * Clean the pending tx data/resources
1400 	 */
1401 	igb_tx_clean(igb);
1402 
1403 	/*
1404 	 * Start the chipset hardware
1405 	 */
1406 	if (igb_chip_start(igb) != IGB_SUCCESS) {
1407 		igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1408 		goto reset_failure;
1409 	}
1410 
1411 	/*
1412 	 * Setup the rx/tx rings
1413 	 */
1414 	igb_setup_rings(igb);
1415 
1416 	/*
1417 	 * Enable adapter interrupts
1418 	 * The interrupts must be enabled after the driver state is START
1419 	 */
1420 	igb->capab->enable_intr(igb);
1421 
1422 	if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK)
1423 		goto reset_failure;
1424 
1425 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
1426 		goto reset_failure;
1427 
1428 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1429 		mutex_exit(&igb->tx_rings[i].tx_lock);
1430 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1431 		mutex_exit(&igb->rx_rings[i].rx_lock);
1432 
1433 	mutex_exit(&igb->gen_lock);
1434 
1435 	return (IGB_SUCCESS);
1436 
1437 reset_failure:
1438 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1439 		mutex_exit(&igb->tx_rings[i].tx_lock);
1440 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1441 		mutex_exit(&igb->rx_rings[i].rx_lock);
1442 
1443 	mutex_exit(&igb->gen_lock);
1444 
1445 	ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1446 
1447 	return (IGB_FAILURE);
1448 }
1449 
1450 /*
1451  * igb_tx_clean - Clean the pending transmit packets and DMA resources
1452  */
1453 static void
1454 igb_tx_clean(igb_t *igb)
1455 {
1456 	igb_tx_ring_t *tx_ring;
1457 	tx_control_block_t *tcb;
1458 	link_list_t pending_list;
1459 	uint32_t desc_num;
1460 	int i, j;
1461 
1462 	LINK_LIST_INIT(&pending_list);
1463 
1464 	for (i = 0; i < igb->num_tx_rings; i++) {
1465 		tx_ring = &igb->tx_rings[i];
1466 
1467 		mutex_enter(&tx_ring->recycle_lock);
1468 
1469 		/*
1470 		 * Clean the pending tx data - the pending packets in the
1471 		 * work_list that have no chances to be transmitted again.
1472 		 *
1473 		 * We must ensure the chipset is stopped or the link is down
1474 		 * before cleaning the transmit packets.
1475 		 */
1476 		desc_num = 0;
1477 		for (j = 0; j < tx_ring->ring_size; j++) {
1478 			tcb = tx_ring->work_list[j];
1479 			if (tcb != NULL) {
1480 				desc_num += tcb->desc_num;
1481 
1482 				tx_ring->work_list[j] = NULL;
1483 
1484 				igb_free_tcb(tcb);
1485 
1486 				LIST_PUSH_TAIL(&pending_list, &tcb->link);
1487 			}
1488 		}
1489 
1490 		if (desc_num > 0) {
1491 			atomic_add_32(&tx_ring->tbd_free, desc_num);
1492 			ASSERT(tx_ring->tbd_free == tx_ring->ring_size);
1493 
1494 			/*
1495 			 * Reset the head and tail pointers of the tbd ring;
1496 			 * Reset the head write-back if it is enabled.
1497 			 */
1498 			tx_ring->tbd_head = 0;
1499 			tx_ring->tbd_tail = 0;
1500 			if (igb->tx_head_wb_enable)
1501 				*tx_ring->tbd_head_wb = 0;
1502 
1503 			E1000_WRITE_REG(&igb->hw, E1000_TDH(tx_ring->index), 0);
1504 			E1000_WRITE_REG(&igb->hw, E1000_TDT(tx_ring->index), 0);
1505 		}
1506 
1507 		mutex_exit(&tx_ring->recycle_lock);
1508 
1509 		/*
1510 		 * Add the tx control blocks in the pending list to
1511 		 * the free list.
1512 		 */
1513 		igb_put_free_list(tx_ring, &pending_list);
1514 	}
1515 }
1516 
1517 /*
1518  * igb_tx_drain - Drain the tx rings to allow pending packets to be transmitted
1519  */
1520 static boolean_t
1521 igb_tx_drain(igb_t *igb)
1522 {
1523 	igb_tx_ring_t *tx_ring;
1524 	boolean_t done;
1525 	int i, j;
1526 
1527 	/*
1528 	 * Wait for a specific time to allow pending tx packets
1529 	 * to be transmitted.
1530 	 *
1531 	 * Check the counter tbd_free to see if transmission is done.
1532 	 * No lock protection is needed here.
1533 	 *
1534 	 * Return B_TRUE if all pending packets have been transmitted;
1535 	 * Otherwise return B_FALSE;
1536 	 */
1537 	for (i = 0; i < TX_DRAIN_TIME; i++) {
1538 
1539 		done = B_TRUE;
1540 		for (j = 0; j < igb->num_tx_rings; j++) {
1541 			tx_ring = &igb->tx_rings[j];
1542 			done = done &&
1543 			    (tx_ring->tbd_free == tx_ring->ring_size);
1544 		}
1545 
1546 		if (done)
1547 			break;
1548 
1549 		msec_delay(1);
1550 	}
1551 
1552 	return (done);
1553 }
1554 
1555 /*
1556  * igb_rx_drain - Wait for all rx buffers to be released by upper layer
1557  */
1558 static boolean_t
1559 igb_rx_drain(igb_t *igb)
1560 {
1561 	igb_rx_ring_t *rx_ring;
1562 	boolean_t done;
1563 	int i, j;
1564 
1565 	/*
1566 	 * Polling the rx free list to check if those rx buffers held by
1567 	 * the upper layer are released.
1568 	 *
1569 	 * Check the counter rcb_free to see if all pending buffers are
1570 	 * released. No lock protection is needed here.
1571 	 *
1572 	 * Return B_TRUE if all pending buffers have been released;
1573 	 * Otherwise return B_FALSE;
1574 	 */
1575 	for (i = 0; i < RX_DRAIN_TIME; i++) {
1576 
1577 		done = B_TRUE;
1578 		for (j = 0; j < igb->num_rx_rings; j++) {
1579 			rx_ring = &igb->rx_rings[j];
1580 			done = done &&
1581 			    (rx_ring->rcb_free == rx_ring->free_list_size);
1582 		}
1583 
1584 		if (done)
1585 			break;
1586 
1587 		msec_delay(1);
1588 	}
1589 
1590 	return (done);
1591 }
1592 
1593 /*
1594  * igb_start - Start the driver/chipset
1595  */
1596 int
1597 igb_start(igb_t *igb)
1598 {
1599 	int i;
1600 
1601 	ASSERT(mutex_owned(&igb->gen_lock));
1602 
1603 	for (i = 0; i < igb->num_rx_rings; i++)
1604 		mutex_enter(&igb->rx_rings[i].rx_lock);
1605 	for (i = 0; i < igb->num_tx_rings; i++)
1606 		mutex_enter(&igb->tx_rings[i].tx_lock);
1607 
1608 	/*
1609 	 * Start the chipset hardware
1610 	 */
1611 	if (!(igb->attach_progress & ATTACH_PROGRESS_INIT)) {
1612 		if (igb_init(igb) != IGB_SUCCESS) {
1613 			igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
1614 			goto start_failure;
1615 		}
1616 		igb->attach_progress |= ATTACH_PROGRESS_INIT;
1617 	}
1618 
1619 	/*
1620 	 * Setup the rx/tx rings
1621 	 */
1622 	igb_setup_rings(igb);
1623 
1624 	/*
1625 	 * Enable adapter interrupts
1626 	 * The interrupts must be enabled after the driver state is START
1627 	 */
1628 	igb->capab->enable_intr(igb);
1629 
1630 	if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK)
1631 		goto start_failure;
1632 
1633 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
1634 		goto start_failure;
1635 
1636 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1637 		mutex_exit(&igb->tx_rings[i].tx_lock);
1638 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1639 		mutex_exit(&igb->rx_rings[i].rx_lock);
1640 
1641 	return (IGB_SUCCESS);
1642 
1643 start_failure:
1644 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1645 		mutex_exit(&igb->tx_rings[i].tx_lock);
1646 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1647 		mutex_exit(&igb->rx_rings[i].rx_lock);
1648 
1649 	ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1650 
1651 	return (IGB_FAILURE);
1652 }
1653 
1654 /*
1655  * igb_stop - Stop the driver/chipset
1656  */
1657 void
1658 igb_stop(igb_t *igb)
1659 {
1660 	int i;
1661 
1662 	ASSERT(mutex_owned(&igb->gen_lock));
1663 
1664 	igb->attach_progress &= ~ ATTACH_PROGRESS_INIT;
1665 
1666 	/*
1667 	 * Disable the adapter interrupts
1668 	 */
1669 	igb_disable_adapter_interrupts(igb);
1670 
1671 	/*
1672 	 * Drain the pending tx packets
1673 	 */
1674 	(void) igb_tx_drain(igb);
1675 
1676 	for (i = 0; i < igb->num_rx_rings; i++)
1677 		mutex_enter(&igb->rx_rings[i].rx_lock);
1678 	for (i = 0; i < igb->num_tx_rings; i++)
1679 		mutex_enter(&igb->tx_rings[i].tx_lock);
1680 
1681 	/*
1682 	 * Stop the chipset hardware
1683 	 */
1684 	igb_chip_stop(igb);
1685 
1686 	/*
1687 	 * Clean the pending tx data/resources
1688 	 */
1689 	igb_tx_clean(igb);
1690 
1691 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
1692 		mutex_exit(&igb->tx_rings[i].tx_lock);
1693 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
1694 		mutex_exit(&igb->rx_rings[i].rx_lock);
1695 
1696 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
1697 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1698 }
1699 
1700 /*
1701  * igb_alloc_rings - Allocate memory space for rx/tx rings
1702  */
1703 static int
1704 igb_alloc_rings(igb_t *igb)
1705 {
1706 	/*
1707 	 * Allocate memory space for rx rings
1708 	 */
1709 	igb->rx_rings = kmem_zalloc(
1710 	    sizeof (igb_rx_ring_t) * igb->num_rx_rings,
1711 	    KM_NOSLEEP);
1712 
1713 	if (igb->rx_rings == NULL) {
1714 		return (IGB_FAILURE);
1715 	}
1716 
1717 	/*
1718 	 * Allocate memory space for tx rings
1719 	 */
1720 	igb->tx_rings = kmem_zalloc(
1721 	    sizeof (igb_tx_ring_t) * igb->num_tx_rings,
1722 	    KM_NOSLEEP);
1723 
1724 	if (igb->tx_rings == NULL) {
1725 		kmem_free(igb->rx_rings,
1726 		    sizeof (igb_rx_ring_t) * igb->num_rx_rings);
1727 		igb->rx_rings = NULL;
1728 		return (IGB_FAILURE);
1729 	}
1730 
1731 	/*
1732 	 * Allocate memory space for rx ring groups
1733 	 */
1734 	igb->rx_groups = kmem_zalloc(
1735 	    sizeof (igb_rx_group_t) * igb->num_rx_groups,
1736 	    KM_NOSLEEP);
1737 
1738 	if (igb->rx_groups == NULL) {
1739 		kmem_free(igb->rx_rings,
1740 		    sizeof (igb_rx_ring_t) * igb->num_rx_rings);
1741 		kmem_free(igb->tx_rings,
1742 		    sizeof (igb_tx_ring_t) * igb->num_tx_rings);
1743 		igb->rx_rings = NULL;
1744 		igb->tx_rings = NULL;
1745 		return (IGB_FAILURE);
1746 	}
1747 
1748 	return (IGB_SUCCESS);
1749 }
1750 
1751 /*
1752  * igb_free_rings - Free the memory space of rx/tx rings.
1753  */
1754 static void
1755 igb_free_rings(igb_t *igb)
1756 {
1757 	if (igb->rx_rings != NULL) {
1758 		kmem_free(igb->rx_rings,
1759 		    sizeof (igb_rx_ring_t) * igb->num_rx_rings);
1760 		igb->rx_rings = NULL;
1761 	}
1762 
1763 	if (igb->tx_rings != NULL) {
1764 		kmem_free(igb->tx_rings,
1765 		    sizeof (igb_tx_ring_t) * igb->num_tx_rings);
1766 		igb->tx_rings = NULL;
1767 	}
1768 
1769 	if (igb->rx_groups != NULL) {
1770 		kmem_free(igb->rx_groups,
1771 		    sizeof (igb_rx_group_t) * igb->num_rx_groups);
1772 		igb->rx_groups = NULL;
1773 	}
1774 }
1775 
1776 /*
1777  * igb_setup_rings - Setup rx/tx rings
1778  */
1779 static void
1780 igb_setup_rings(igb_t *igb)
1781 {
1782 	/*
1783 	 * Setup the rx/tx rings, including the following:
1784 	 *
1785 	 * 1. Setup the descriptor ring and the control block buffers;
1786 	 * 2. Initialize necessary registers for receive/transmit;
1787 	 * 3. Initialize software pointers/parameters for receive/transmit;
1788 	 */
1789 	igb_setup_rx(igb);
1790 
1791 	igb_setup_tx(igb);
1792 
1793 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
1794 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
1795 }
1796 
1797 static void
1798 igb_setup_rx_ring(igb_rx_ring_t *rx_ring)
1799 {
1800 	igb_t *igb = rx_ring->igb;
1801 	struct e1000_hw *hw = &igb->hw;
1802 	rx_control_block_t *rcb;
1803 	union e1000_adv_rx_desc	*rbd;
1804 	uint32_t size;
1805 	uint32_t buf_low;
1806 	uint32_t buf_high;
1807 	uint32_t reg_val;
1808 	int i;
1809 
1810 	ASSERT(mutex_owned(&rx_ring->rx_lock));
1811 	ASSERT(mutex_owned(&igb->gen_lock));
1812 
1813 	for (i = 0; i < igb->rx_ring_size; i++) {
1814 		rcb = rx_ring->work_list[i];
1815 		rbd = &rx_ring->rbd_ring[i];
1816 
1817 		rbd->read.pkt_addr = rcb->rx_buf.dma_address;
1818 		rbd->read.hdr_addr = NULL;
1819 	}
1820 
1821 	/*
1822 	 * Initialize the length register
1823 	 */
1824 	size = rx_ring->ring_size * sizeof (union e1000_adv_rx_desc);
1825 	E1000_WRITE_REG(hw, E1000_RDLEN(rx_ring->index), size);
1826 
1827 	/*
1828 	 * Initialize the base address registers
1829 	 */
1830 	buf_low = (uint32_t)rx_ring->rbd_area.dma_address;
1831 	buf_high = (uint32_t)(rx_ring->rbd_area.dma_address >> 32);
1832 	E1000_WRITE_REG(hw, E1000_RDBAH(rx_ring->index), buf_high);
1833 	E1000_WRITE_REG(hw, E1000_RDBAL(rx_ring->index), buf_low);
1834 
1835 	/*
1836 	 * Setup head & tail pointers
1837 	 */
1838 	E1000_WRITE_REG(hw, E1000_RDT(rx_ring->index), rx_ring->ring_size - 1);
1839 	E1000_WRITE_REG(hw, E1000_RDH(rx_ring->index), 0);
1840 
1841 	rx_ring->rbd_next = 0;
1842 
1843 	/*
1844 	 * Note: Considering the case that the chipset is being reset
1845 	 * and there are still some buffers held by the upper layer,
1846 	 * we should not reset the values of rcb_head, rcb_tail and
1847 	 * rcb_free;
1848 	 */
1849 	if (igb->igb_state == IGB_UNKNOWN) {
1850 		rx_ring->rcb_head = 0;
1851 		rx_ring->rcb_tail = 0;
1852 		rx_ring->rcb_free = rx_ring->free_list_size;
1853 	}
1854 
1855 	/*
1856 	 * Setup the Receive Descriptor Control Register (RXDCTL)
1857 	 */
1858 	reg_val = E1000_READ_REG(hw, E1000_RXDCTL(rx_ring->index));
1859 	reg_val |= E1000_RXDCTL_QUEUE_ENABLE;
1860 	reg_val &= 0xFFF00000;
1861 	reg_val |= 16;		/* pthresh */
1862 	reg_val |= 8 << 8;	/* hthresh */
1863 	reg_val |= 1 << 16;	/* wthresh */
1864 	E1000_WRITE_REG(hw, E1000_RXDCTL(rx_ring->index), reg_val);
1865 
1866 	/*
1867 	 * Setup the Split and Replication Receive Control Register.
1868 	 * Set the rx buffer size and the advanced descriptor type.
1869 	 */
1870 	reg_val = (igb->rx_buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) |
1871 	    E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
1872 
1873 	E1000_WRITE_REG(hw, E1000_SRRCTL(rx_ring->index), reg_val);
1874 }
1875 
1876 static void
1877 igb_setup_rx(igb_t *igb)
1878 {
1879 	igb_rx_ring_t *rx_ring;
1880 	igb_rx_group_t *rx_group;
1881 	struct e1000_hw *hw = &igb->hw;
1882 	uint32_t reg_val, rctl;
1883 	uint32_t ring_per_group;
1884 	int i;
1885 
1886 	/*
1887 	 * Setup the Receive Control Register (RCTL), and ENABLE the
1888 	 * receiver. The initial configuration is to: Enable the receiver,
1889 	 * accept broadcasts, discard bad packets (and long packets),
1890 	 * disable VLAN filter checking, set the receive descriptor
1891 	 * minimum threshold size to 1/2, and the receive buffer size to
1892 	 * 2k.
1893 	 */
1894 	rctl = E1000_READ_REG(hw, E1000_RCTL);
1895 
1896 	/*
1897 	 * only used for wakeup control.  This driver doesn't do wakeup
1898 	 * but leave this here for completeness.
1899 	 */
1900 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
1901 
1902 	rctl |= E1000_RCTL_EN |		/* Enable Receive Unit */
1903 	    E1000_RCTL_BAM |		/* Accept Broadcast Packets */
1904 	    E1000_RCTL_LPE |		/* Large Packet Enable bit */
1905 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) |
1906 	    E1000_RCTL_RDMTS_HALF |
1907 	    E1000_RCTL_SECRC |		/* Strip Ethernet CRC */
1908 	    E1000_RCTL_LBM_NO;		/* Loopback Mode = none */
1909 
1910 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
1911 
1912 	for (i = 0; i < igb->num_rx_groups; i++) {
1913 		rx_group = &igb->rx_groups[i];
1914 		rx_group->index = i;
1915 		rx_group->igb = igb;
1916 	}
1917 
1918 	/*
1919 	 * igb_setup_rx_ring must be called after configuring RCTL
1920 	 */
1921 	ring_per_group = igb->num_rx_rings / igb->num_rx_groups;
1922 	for (i = 0; i < igb->num_rx_rings; i++) {
1923 		rx_ring = &igb->rx_rings[i];
1924 		igb_setup_rx_ring(rx_ring);
1925 
1926 		/*
1927 		 * Map a ring to a group by assigning a group index
1928 		 */
1929 		rx_ring->group_index = i / ring_per_group;
1930 	}
1931 
1932 	/*
1933 	 * Setup the Rx Long Packet Max Length register
1934 	 */
1935 	E1000_WRITE_REG(hw, E1000_RLPML, igb->max_frame_size);
1936 
1937 	/*
1938 	 * Hardware checksum settings
1939 	 */
1940 	if (igb->rx_hcksum_enable) {
1941 		reg_val =
1942 		    E1000_RXCSUM_TUOFL |	/* TCP/UDP checksum */
1943 		    E1000_RXCSUM_IPOFL;		/* IP checksum */
1944 
1945 		E1000_WRITE_REG(hw, E1000_RXCSUM, reg_val);
1946 	}
1947 
1948 	/*
1949 	 * Setup classify and RSS for multiple receive queues
1950 	 */
1951 	switch (igb->vmdq_mode) {
1952 	case E1000_VMDQ_OFF:
1953 		/*
1954 		 * One ring group, only RSS is needed when more than
1955 		 * one ring enabled.
1956 		 */
1957 		if (igb->num_rx_rings > 1)
1958 			igb_setup_rss(igb);
1959 		break;
1960 	case E1000_VMDQ_MAC:
1961 		/*
1962 		 * Multiple groups, each group has one ring,
1963 		 * only the MAC classification is needed.
1964 		 */
1965 		igb_setup_mac_classify(igb);
1966 		break;
1967 	case E1000_VMDQ_MAC_RSS:
1968 		/*
1969 		 * Multiple groups and multiple rings, both
1970 		 * MAC classification and RSS are needed.
1971 		 */
1972 		igb_setup_mac_rss_classify(igb);
1973 		break;
1974 	}
1975 }
1976 
1977 static void
1978 igb_setup_tx_ring(igb_tx_ring_t *tx_ring)
1979 {
1980 	igb_t *igb = tx_ring->igb;
1981 	struct e1000_hw *hw = &igb->hw;
1982 	uint32_t size;
1983 	uint32_t buf_low;
1984 	uint32_t buf_high;
1985 	uint32_t reg_val;
1986 
1987 	ASSERT(mutex_owned(&tx_ring->tx_lock));
1988 	ASSERT(mutex_owned(&igb->gen_lock));
1989 
1990 
1991 	/*
1992 	 * Initialize the length register
1993 	 */
1994 	size = tx_ring->ring_size * sizeof (union e1000_adv_tx_desc);
1995 	E1000_WRITE_REG(hw, E1000_TDLEN(tx_ring->index), size);
1996 
1997 	/*
1998 	 * Initialize the base address registers
1999 	 */
2000 	buf_low = (uint32_t)tx_ring->tbd_area.dma_address;
2001 	buf_high = (uint32_t)(tx_ring->tbd_area.dma_address >> 32);
2002 	E1000_WRITE_REG(hw, E1000_TDBAL(tx_ring->index), buf_low);
2003 	E1000_WRITE_REG(hw, E1000_TDBAH(tx_ring->index), buf_high);
2004 
2005 	/*
2006 	 * Setup head & tail pointers
2007 	 */
2008 	E1000_WRITE_REG(hw, E1000_TDH(tx_ring->index), 0);
2009 	E1000_WRITE_REG(hw, E1000_TDT(tx_ring->index), 0);
2010 
2011 	/*
2012 	 * Setup head write-back
2013 	 */
2014 	if (igb->tx_head_wb_enable) {
2015 		/*
2016 		 * The memory of the head write-back is allocated using
2017 		 * the extra tbd beyond the tail of the tbd ring.
2018 		 */
2019 		tx_ring->tbd_head_wb = (uint32_t *)
2020 		    ((uintptr_t)tx_ring->tbd_area.address + size);
2021 		*tx_ring->tbd_head_wb = 0;
2022 
2023 		buf_low = (uint32_t)
2024 		    (tx_ring->tbd_area.dma_address + size);
2025 		buf_high = (uint32_t)
2026 		    ((tx_ring->tbd_area.dma_address + size) >> 32);
2027 
2028 		/* Set the head write-back enable bit */
2029 		buf_low |= E1000_TX_HEAD_WB_ENABLE;
2030 
2031 		E1000_WRITE_REG(hw, E1000_TDWBAL(tx_ring->index), buf_low);
2032 		E1000_WRITE_REG(hw, E1000_TDWBAH(tx_ring->index), buf_high);
2033 
2034 		/*
2035 		 * Turn off relaxed ordering for head write back or it will
2036 		 * cause problems with the tx recycling
2037 		 */
2038 		reg_val = E1000_READ_REG(hw,
2039 		    E1000_DCA_TXCTRL(tx_ring->index));
2040 		reg_val &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN;
2041 		E1000_WRITE_REG(hw,
2042 		    E1000_DCA_TXCTRL(tx_ring->index), reg_val);
2043 	} else {
2044 		tx_ring->tbd_head_wb = NULL;
2045 	}
2046 
2047 	tx_ring->tbd_head = 0;
2048 	tx_ring->tbd_tail = 0;
2049 	tx_ring->tbd_free = tx_ring->ring_size;
2050 
2051 	/*
2052 	 * Note: for the case that the chipset is being reset, we should not
2053 	 * reset the values of tcb_head, tcb_tail. And considering there might
2054 	 * still be some packets kept in the pending_list, we should not assert
2055 	 * (tcb_free == free_list_size) here.
2056 	 */
2057 	if (igb->igb_state == IGB_UNKNOWN) {
2058 		tx_ring->tcb_head = 0;
2059 		tx_ring->tcb_tail = 0;
2060 		tx_ring->tcb_free = tx_ring->free_list_size;
2061 	}
2062 
2063 	/*
2064 	 * Enable specific tx ring, it is required by multiple tx
2065 	 * ring support.
2066 	 */
2067 	reg_val = E1000_READ_REG(hw, E1000_TXDCTL(tx_ring->index));
2068 	reg_val |= E1000_TXDCTL_QUEUE_ENABLE;
2069 	E1000_WRITE_REG(hw, E1000_TXDCTL(tx_ring->index), reg_val);
2070 
2071 	/*
2072 	 * Initialize hardware checksum offload settings
2073 	 */
2074 	tx_ring->hcksum_context.hcksum_flags = 0;
2075 	tx_ring->hcksum_context.ip_hdr_len = 0;
2076 	tx_ring->hcksum_context.mac_hdr_len = 0;
2077 	tx_ring->hcksum_context.l4_proto = 0;
2078 
2079 	/*
2080 	 * Enable TXDCTL per queue
2081 	 */
2082 	reg_val = E1000_READ_REG(hw, E1000_TXDCTL(tx_ring->index));
2083 	reg_val |= E1000_TXDCTL_QUEUE_ENABLE;
2084 	E1000_WRITE_REG(hw, E1000_TXDCTL(tx_ring->index), reg_val);
2085 }
2086 
2087 static void
2088 igb_setup_tx(igb_t *igb)
2089 {
2090 	igb_tx_ring_t *tx_ring;
2091 	struct e1000_hw *hw = &igb->hw;
2092 	uint32_t reg_val;
2093 	int i;
2094 
2095 	for (i = 0; i < igb->num_tx_rings; i++) {
2096 		tx_ring = &igb->tx_rings[i];
2097 		igb_setup_tx_ring(tx_ring);
2098 	}
2099 
2100 	/*
2101 	 * Setup the Transmit Control Register (TCTL)
2102 	 */
2103 	reg_val = E1000_READ_REG(hw, E1000_TCTL);
2104 	reg_val &= ~E1000_TCTL_CT;
2105 	reg_val |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2106 	    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2107 
2108 	e1000_config_collision_dist(hw);
2109 
2110 	/* Enable transmits */
2111 	reg_val |= E1000_TCTL_EN;
2112 
2113 	E1000_WRITE_REG(hw, E1000_TCTL, reg_val);
2114 }
2115 
2116 /*
2117  * igb_setup_rss - Setup receive-side scaling feature
2118  */
2119 static void
2120 igb_setup_rss(igb_t *igb)
2121 {
2122 	struct e1000_hw *hw = &igb->hw;
2123 	uint32_t i, mrqc, rxcsum;
2124 	int shift = 0;
2125 	uint32_t random;
2126 	union e1000_reta {
2127 		uint32_t	dword;
2128 		uint8_t		bytes[4];
2129 	} reta;
2130 
2131 	/* Setup the Redirection Table */
2132 	if (hw->mac.type == e1000_82576) {
2133 		shift = 0;
2134 	} else if (hw->mac.type == e1000_82575) {
2135 		shift = 6;
2136 	}
2137 	for (i = 0; i < (32 * 4); i++) {
2138 		reta.bytes[i & 3] = (i % igb->num_rx_rings) << shift;
2139 		if ((i & 3) == 3) {
2140 			E1000_WRITE_REG(hw,
2141 			    (E1000_RETA(0) + (i & ~3)), reta.dword);
2142 		}
2143 	}
2144 
2145 	/* Fill out hash function seeds */
2146 	for (i = 0; i < 10; i++) {
2147 		(void) random_get_pseudo_bytes((uint8_t *)&random,
2148 		    sizeof (uint32_t));
2149 		E1000_WRITE_REG(hw, E1000_RSSRK(i), random);
2150 	}
2151 
2152 	/* Setup the Multiple Receive Queue Control register */
2153 	mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2154 	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2155 	    E1000_MRQC_RSS_FIELD_IPV4_TCP |
2156 	    E1000_MRQC_RSS_FIELD_IPV6 |
2157 	    E1000_MRQC_RSS_FIELD_IPV6_TCP |
2158 	    E1000_MRQC_RSS_FIELD_IPV4_UDP |
2159 	    E1000_MRQC_RSS_FIELD_IPV6_UDP |
2160 	    E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2161 	    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2162 
2163 	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2164 
2165 	/*
2166 	 * Disable Packet Checksum to enable RSS for multiple receive queues.
2167 	 *
2168 	 * The Packet Checksum is not ethernet CRC. It is another kind of
2169 	 * checksum offloading provided by the 82575 chipset besides the IP
2170 	 * header checksum offloading and the TCP/UDP checksum offloading.
2171 	 * The Packet Checksum is by default computed over the entire packet
2172 	 * from the first byte of the DA through the last byte of the CRC,
2173 	 * including the Ethernet and IP headers.
2174 	 *
2175 	 * It is a hardware limitation that Packet Checksum is mutually
2176 	 * exclusive with RSS.
2177 	 */
2178 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2179 	rxcsum |= E1000_RXCSUM_PCSD;
2180 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2181 }
2182 
2183 /*
2184  * igb_setup_mac_rss_classify - Setup MAC classification and rss
2185  */
2186 static void
2187 igb_setup_mac_rss_classify(igb_t *igb)
2188 {
2189 	struct e1000_hw *hw = &igb->hw;
2190 	uint32_t i, mrqc, vmdctl, rxcsum;
2191 	uint32_t ring_per_group;
2192 	int shift_group0, shift_group1;
2193 	uint32_t random;
2194 	union e1000_reta {
2195 		uint32_t	dword;
2196 		uint8_t		bytes[4];
2197 	} reta;
2198 
2199 	ring_per_group = igb->num_rx_rings / igb->num_rx_groups;
2200 
2201 	/* Setup the Redirection Table, it is shared between two groups */
2202 	shift_group0 = 2;
2203 	shift_group1 = 6;
2204 	for (i = 0; i < (32 * 4); i++) {
2205 		reta.bytes[i & 3] = ((i % ring_per_group) << shift_group0) |
2206 		    ((ring_per_group + (i % ring_per_group)) << shift_group1);
2207 		if ((i & 3) == 3) {
2208 			E1000_WRITE_REG(hw,
2209 			    (E1000_RETA(0) + (i & ~3)), reta.dword);
2210 		}
2211 	}
2212 
2213 	/* Fill out hash function seeds */
2214 	for (i = 0; i < 10; i++) {
2215 		(void) random_get_pseudo_bytes((uint8_t *)&random,
2216 		    sizeof (uint32_t));
2217 		E1000_WRITE_REG(hw, E1000_RSSRK(i), random);
2218 	}
2219 
2220 	/*
2221 	 * Setup the Multiple Receive Queue Control register,
2222 	 * enable VMDq based on packet destination MAC address and RSS.
2223 	 */
2224 	mrqc = E1000_MRQC_ENABLE_VMDQ_MAC_RSS_GROUP;
2225 	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2226 	    E1000_MRQC_RSS_FIELD_IPV4_TCP |
2227 	    E1000_MRQC_RSS_FIELD_IPV6 |
2228 	    E1000_MRQC_RSS_FIELD_IPV6_TCP |
2229 	    E1000_MRQC_RSS_FIELD_IPV4_UDP |
2230 	    E1000_MRQC_RSS_FIELD_IPV6_UDP |
2231 	    E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2232 	    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2233 
2234 	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2235 
2236 
2237 	/* Define the default group and default queues */
2238 	vmdctl = E1000_VMDQ_MAC_GROUP_DEFAULT_QUEUE;
2239 	E1000_WRITE_REG(hw, E1000_VT_CTL, vmdctl);
2240 
2241 	/*
2242 	 * Disable Packet Checksum to enable RSS for multiple receive queues.
2243 	 *
2244 	 * The Packet Checksum is not ethernet CRC. It is another kind of
2245 	 * checksum offloading provided by the 82575 chipset besides the IP
2246 	 * header checksum offloading and the TCP/UDP checksum offloading.
2247 	 * The Packet Checksum is by default computed over the entire packet
2248 	 * from the first byte of the DA through the last byte of the CRC,
2249 	 * including the Ethernet and IP headers.
2250 	 *
2251 	 * It is a hardware limitation that Packet Checksum is mutually
2252 	 * exclusive with RSS.
2253 	 */
2254 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2255 	rxcsum |= E1000_RXCSUM_PCSD;
2256 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2257 }
2258 
2259 /*
2260  * igb_setup_mac_classify - Setup MAC classification feature
2261  */
2262 static void
2263 igb_setup_mac_classify(igb_t *igb)
2264 {
2265 	struct e1000_hw *hw = &igb->hw;
2266 	uint32_t mrqc, rxcsum;
2267 
2268 	/*
2269 	 * Setup the Multiple Receive Queue Control register,
2270 	 * enable VMDq based on packet destination MAC address.
2271 	 */
2272 	mrqc = E1000_MRQC_ENABLE_VMDQ_MAC_GROUP;
2273 	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2274 
2275 	/*
2276 	 * Disable Packet Checksum to enable RSS for multiple receive queues.
2277 	 *
2278 	 * The Packet Checksum is not ethernet CRC. It is another kind of
2279 	 * checksum offloading provided by the 82575 chipset besides the IP
2280 	 * header checksum offloading and the TCP/UDP checksum offloading.
2281 	 * The Packet Checksum is by default computed over the entire packet
2282 	 * from the first byte of the DA through the last byte of the CRC,
2283 	 * including the Ethernet and IP headers.
2284 	 *
2285 	 * It is a hardware limitation that Packet Checksum is mutually
2286 	 * exclusive with RSS.
2287 	 */
2288 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2289 	rxcsum |= E1000_RXCSUM_PCSD;
2290 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2291 
2292 }
2293 
2294 /*
2295  * igb_init_unicst - Initialize the unicast addresses
2296  */
2297 static void
2298 igb_init_unicst(igb_t *igb)
2299 {
2300 	struct e1000_hw *hw = &igb->hw;
2301 	int slot;
2302 
2303 	/*
2304 	 * Here we should consider two situations:
2305 	 *
2306 	 * 1. Chipset is initialized the first time
2307 	 *    Initialize the multiple unicast addresses, and
2308 	 *    save the default MAC address.
2309 	 *
2310 	 * 2. Chipset is reset
2311 	 *    Recover the multiple unicast addresses from the
2312 	 *    software data structure to the RAR registers.
2313 	 */
2314 
2315 	/*
2316 	 * Clear the default MAC address in the RAR0 rgister,
2317 	 * which is loaded from EEPROM when system boot or chipreset,
2318 	 * this will cause the conficts with add_mac/rem_mac entry
2319 	 * points when VMDq is enabled. For this reason, the RAR0
2320 	 * must be cleared for both cases mentioned above.
2321 	 */
2322 	e1000_rar_clear(hw, 0);
2323 
2324 	if (!igb->unicst_init) {
2325 
2326 		/* Initialize the multiple unicast addresses */
2327 		igb->unicst_total = MAX_NUM_UNICAST_ADDRESSES;
2328 		igb->unicst_avail = igb->unicst_total;
2329 
2330 		for (slot = 0; slot < igb->unicst_total; slot++)
2331 			igb->unicst_addr[slot].mac.set = 0;
2332 
2333 		igb->unicst_init = B_TRUE;
2334 	} else {
2335 		/* Re-configure the RAR registers */
2336 		for (slot = 0; slot < igb->unicst_total; slot++) {
2337 			e1000_rar_set_vmdq(hw, igb->unicst_addr[slot].mac.addr,
2338 			    slot, igb->vmdq_mode,
2339 			    igb->unicst_addr[slot].mac.group_index);
2340 		}
2341 	}
2342 
2343 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
2344 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
2345 }
2346 
2347 /*
2348  * igb_unicst_find - Find the slot for the specified unicast address
2349  */
2350 int
2351 igb_unicst_find(igb_t *igb, const uint8_t *mac_addr)
2352 {
2353 	int slot;
2354 
2355 	ASSERT(mutex_owned(&igb->gen_lock));
2356 
2357 	for (slot = 0; slot < igb->unicst_total; slot++) {
2358 		if (bcmp(igb->unicst_addr[slot].mac.addr,
2359 		    mac_addr, ETHERADDRL) == 0)
2360 			return (slot);
2361 	}
2362 
2363 	return (-1);
2364 }
2365 
2366 /*
2367  * igb_unicst_set - Set the unicast address to the specified slot
2368  */
2369 int
2370 igb_unicst_set(igb_t *igb, const uint8_t *mac_addr,
2371     int slot)
2372 {
2373 	struct e1000_hw *hw = &igb->hw;
2374 
2375 	ASSERT(mutex_owned(&igb->gen_lock));
2376 
2377 	/*
2378 	 * Save the unicast address in the software data structure
2379 	 */
2380 	bcopy(mac_addr, igb->unicst_addr[slot].mac.addr, ETHERADDRL);
2381 
2382 	/*
2383 	 * Set the unicast address to the RAR register
2384 	 */
2385 	e1000_rar_set(hw, (uint8_t *)mac_addr, slot);
2386 
2387 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
2388 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
2389 		return (EIO);
2390 	}
2391 
2392 	return (0);
2393 }
2394 
2395 /*
2396  * igb_multicst_add - Add a multicst address
2397  */
2398 int
2399 igb_multicst_add(igb_t *igb, const uint8_t *multiaddr)
2400 {
2401 	ASSERT(mutex_owned(&igb->gen_lock));
2402 
2403 	if ((multiaddr[0] & 01) == 0) {
2404 		return (EINVAL);
2405 	}
2406 
2407 	if (igb->mcast_count >= MAX_NUM_MULTICAST_ADDRESSES) {
2408 		return (ENOENT);
2409 	}
2410 
2411 	bcopy(multiaddr,
2412 	    &igb->mcast_table[igb->mcast_count], ETHERADDRL);
2413 	igb->mcast_count++;
2414 
2415 	/*
2416 	 * Update the multicast table in the hardware
2417 	 */
2418 	igb_setup_multicst(igb);
2419 
2420 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
2421 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
2422 		return (EIO);
2423 	}
2424 
2425 	return (0);
2426 }
2427 
2428 /*
2429  * igb_multicst_remove - Remove a multicst address
2430  */
2431 int
2432 igb_multicst_remove(igb_t *igb, const uint8_t *multiaddr)
2433 {
2434 	int i;
2435 
2436 	ASSERT(mutex_owned(&igb->gen_lock));
2437 
2438 	for (i = 0; i < igb->mcast_count; i++) {
2439 		if (bcmp(multiaddr, &igb->mcast_table[i],
2440 		    ETHERADDRL) == 0) {
2441 			for (i++; i < igb->mcast_count; i++) {
2442 				igb->mcast_table[i - 1] =
2443 				    igb->mcast_table[i];
2444 			}
2445 			igb->mcast_count--;
2446 			break;
2447 		}
2448 	}
2449 
2450 	/*
2451 	 * Update the multicast table in the hardware
2452 	 */
2453 	igb_setup_multicst(igb);
2454 
2455 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
2456 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
2457 		return (EIO);
2458 	}
2459 
2460 	return (0);
2461 }
2462 
2463 /*
2464  * igb_setup_multicast - setup multicast data structures
2465  *
2466  * This routine initializes all of the multicast related structures
2467  * and save them in the hardware registers.
2468  */
2469 static void
2470 igb_setup_multicst(igb_t *igb)
2471 {
2472 	uint8_t *mc_addr_list;
2473 	uint32_t mc_addr_count;
2474 	struct e1000_hw *hw = &igb->hw;
2475 
2476 	ASSERT(mutex_owned(&igb->gen_lock));
2477 
2478 	ASSERT(igb->mcast_count <= MAX_NUM_MULTICAST_ADDRESSES);
2479 
2480 	mc_addr_list = (uint8_t *)igb->mcast_table;
2481 	mc_addr_count = igb->mcast_count;
2482 
2483 	/*
2484 	 * Update the multicase addresses to the MTA registers
2485 	 */
2486 	e1000_update_mc_addr_list(hw, mc_addr_list, mc_addr_count,
2487 	    igb->unicst_total, hw->mac.rar_entry_count);
2488 }
2489 
2490 /*
2491  * igb_get_conf - Get driver configurations set in driver.conf
2492  *
2493  * This routine gets user-configured values out of the configuration
2494  * file igb.conf.
2495  *
2496  * For each configurable value, there is a minimum, a maximum, and a
2497  * default.
2498  * If user does not configure a value, use the default.
2499  * If user configures below the minimum, use the minumum.
2500  * If user configures above the maximum, use the maxumum.
2501  */
2502 static void
2503 igb_get_conf(igb_t *igb)
2504 {
2505 	struct e1000_hw *hw = &igb->hw;
2506 	uint32_t default_mtu;
2507 	uint32_t flow_control;
2508 	uint32_t ring_per_group;
2509 	int i;
2510 
2511 	/*
2512 	 * igb driver supports the following user configurations:
2513 	 *
2514 	 * Link configurations:
2515 	 *    adv_autoneg_cap
2516 	 *    adv_1000fdx_cap
2517 	 *    adv_100fdx_cap
2518 	 *    adv_100hdx_cap
2519 	 *    adv_10fdx_cap
2520 	 *    adv_10hdx_cap
2521 	 * Note: 1000hdx is not supported.
2522 	 *
2523 	 * Jumbo frame configuration:
2524 	 *    default_mtu
2525 	 *
2526 	 * Ethernet flow control configuration:
2527 	 *    flow_control
2528 	 *
2529 	 * Multiple rings configurations:
2530 	 *    tx_queue_number
2531 	 *    tx_ring_size
2532 	 *    rx_queue_number
2533 	 *    rx_ring_size
2534 	 *
2535 	 * Call igb_get_prop() to get the value for a specific
2536 	 * configuration parameter.
2537 	 */
2538 
2539 	/*
2540 	 * Link configurations
2541 	 */
2542 	igb->param_adv_autoneg_cap = igb_get_prop(igb,
2543 	    PROP_ADV_AUTONEG_CAP, 0, 1, 1);
2544 	igb->param_adv_1000fdx_cap = igb_get_prop(igb,
2545 	    PROP_ADV_1000FDX_CAP, 0, 1, 1);
2546 	igb->param_adv_100fdx_cap = igb_get_prop(igb,
2547 	    PROP_ADV_100FDX_CAP, 0, 1, 1);
2548 	igb->param_adv_100hdx_cap = igb_get_prop(igb,
2549 	    PROP_ADV_100HDX_CAP, 0, 1, 1);
2550 	igb->param_adv_10fdx_cap = igb_get_prop(igb,
2551 	    PROP_ADV_10FDX_CAP, 0, 1, 1);
2552 	igb->param_adv_10hdx_cap = igb_get_prop(igb,
2553 	    PROP_ADV_10HDX_CAP, 0, 1, 1);
2554 
2555 	/*
2556 	 * Jumbo frame configurations
2557 	 */
2558 	default_mtu = igb_get_prop(igb, PROP_DEFAULT_MTU,
2559 	    MIN_MTU, MAX_MTU, DEFAULT_MTU);
2560 
2561 	igb->max_frame_size = default_mtu +
2562 	    sizeof (struct ether_vlan_header) + ETHERFCSL;
2563 
2564 	/*
2565 	 * Ethernet flow control configuration
2566 	 */
2567 	flow_control = igb_get_prop(igb, PROP_FLOW_CONTROL,
2568 	    e1000_fc_none, 4, e1000_fc_full);
2569 	if (flow_control == 4)
2570 		flow_control = e1000_fc_default;
2571 
2572 	hw->fc.requested_mode = flow_control;
2573 
2574 	/*
2575 	 * Multiple rings configurations
2576 	 */
2577 	igb->tx_ring_size = igb_get_prop(igb, PROP_TX_RING_SIZE,
2578 	    MIN_TX_RING_SIZE, MAX_TX_RING_SIZE, DEFAULT_TX_RING_SIZE);
2579 	igb->rx_ring_size = igb_get_prop(igb, PROP_RX_RING_SIZE,
2580 	    MIN_RX_RING_SIZE, MAX_RX_RING_SIZE, DEFAULT_RX_RING_SIZE);
2581 
2582 	igb->mr_enable = igb_get_prop(igb, PROP_MR_ENABLE, 0, 1, 1);
2583 	igb->num_rx_groups = igb_get_prop(igb, PROP_RX_GROUP_NUM,
2584 	    MIN_RX_GROUP_NUM, MAX_RX_GROUP_NUM, DEFAULT_RX_GROUP_NUM);
2585 	/*
2586 	 * Currently we do not support VMDq for 82576.
2587 	 * If it is e1000_82576, set num_rx_groups to 1.
2588 	 */
2589 	if (hw->mac.type == e1000_82576)
2590 		igb->num_rx_groups = 1;
2591 
2592 	if (igb->mr_enable) {
2593 		igb->num_tx_rings = igb->capab->def_tx_que_num;
2594 		igb->num_rx_rings = igb->capab->def_rx_que_num;
2595 	} else {
2596 		igb->num_tx_rings = 1;
2597 		igb->num_rx_rings = 1;
2598 
2599 		if (igb->num_rx_groups > 1) {
2600 			igb_error(igb,
2601 			    "Invalid rx groups number. Please enable multiple "
2602 			    "rings first");
2603 			igb->num_rx_groups = 1;
2604 		}
2605 	}
2606 
2607 	/*
2608 	 * Check the divisibility between rx rings and rx groups.
2609 	 */
2610 	for (i = igb->num_rx_groups; i > 0; i--) {
2611 		if ((igb->num_rx_rings % i) == 0)
2612 			break;
2613 	}
2614 	if (i != igb->num_rx_groups) {
2615 		igb_error(igb,
2616 		    "Invalid rx groups number. Downgrade the rx group "
2617 		    "number to %d.", i);
2618 		igb->num_rx_groups = i;
2619 	}
2620 
2621 	/*
2622 	 * Get the ring number per group.
2623 	 */
2624 	ring_per_group = igb->num_rx_rings / igb->num_rx_groups;
2625 
2626 	if (igb->num_rx_groups == 1) {
2627 		/*
2628 		 * One rx ring group, the rx ring number is num_rx_rings.
2629 		 */
2630 		igb->vmdq_mode = E1000_VMDQ_OFF;
2631 	} else if (ring_per_group == 1) {
2632 		/*
2633 		 * Multiple rx groups, each group has one rx ring.
2634 		 */
2635 		igb->vmdq_mode = E1000_VMDQ_MAC;
2636 	} else {
2637 		/*
2638 		 * Multiple groups and multiple rings.
2639 		 */
2640 		igb->vmdq_mode = E1000_VMDQ_MAC_RSS;
2641 	}
2642 
2643 	/*
2644 	 * Tunable used to force an interrupt type. The only use is
2645 	 * for testing of the lesser interrupt types.
2646 	 * 0 = don't force interrupt type
2647 	 * 1 = force interrupt type MSIX
2648 	 * 2 = force interrupt type MSI
2649 	 * 3 = force interrupt type Legacy
2650 	 */
2651 	igb->intr_force = igb_get_prop(igb, PROP_INTR_FORCE,
2652 	    IGB_INTR_NONE, IGB_INTR_LEGACY, IGB_INTR_NONE);
2653 
2654 	igb->tx_hcksum_enable = igb_get_prop(igb, PROP_TX_HCKSUM_ENABLE,
2655 	    0, 1, 1);
2656 	igb->rx_hcksum_enable = igb_get_prop(igb, PROP_RX_HCKSUM_ENABLE,
2657 	    0, 1, 1);
2658 	igb->lso_enable = igb_get_prop(igb, PROP_LSO_ENABLE,
2659 	    0, 1, 0);
2660 	igb->tx_head_wb_enable = igb_get_prop(igb, PROP_TX_HEAD_WB_ENABLE,
2661 	    0, 1, 1);
2662 
2663 	igb->tx_copy_thresh = igb_get_prop(igb, PROP_TX_COPY_THRESHOLD,
2664 	    MIN_TX_COPY_THRESHOLD, MAX_TX_COPY_THRESHOLD,
2665 	    DEFAULT_TX_COPY_THRESHOLD);
2666 	igb->tx_recycle_thresh = igb_get_prop(igb, PROP_TX_RECYCLE_THRESHOLD,
2667 	    MIN_TX_RECYCLE_THRESHOLD, MAX_TX_RECYCLE_THRESHOLD,
2668 	    DEFAULT_TX_RECYCLE_THRESHOLD);
2669 	igb->tx_overload_thresh = igb_get_prop(igb, PROP_TX_OVERLOAD_THRESHOLD,
2670 	    MIN_TX_OVERLOAD_THRESHOLD, MAX_TX_OVERLOAD_THRESHOLD,
2671 	    DEFAULT_TX_OVERLOAD_THRESHOLD);
2672 	igb->tx_resched_thresh = igb_get_prop(igb, PROP_TX_RESCHED_THRESHOLD,
2673 	    MIN_TX_RESCHED_THRESHOLD, MAX_TX_RESCHED_THRESHOLD,
2674 	    DEFAULT_TX_RESCHED_THRESHOLD);
2675 
2676 	igb->rx_copy_thresh = igb_get_prop(igb, PROP_RX_COPY_THRESHOLD,
2677 	    MIN_RX_COPY_THRESHOLD, MAX_RX_COPY_THRESHOLD,
2678 	    DEFAULT_RX_COPY_THRESHOLD);
2679 	igb->rx_limit_per_intr = igb_get_prop(igb, PROP_RX_LIMIT_PER_INTR,
2680 	    MIN_RX_LIMIT_PER_INTR, MAX_RX_LIMIT_PER_INTR,
2681 	    DEFAULT_RX_LIMIT_PER_INTR);
2682 
2683 	igb->intr_throttling[0] = igb_get_prop(igb, PROP_INTR_THROTTLING,
2684 	    igb->capab->min_intr_throttle,
2685 	    igb->capab->max_intr_throttle,
2686 	    igb->capab->def_intr_throttle);
2687 }
2688 
2689 /*
2690  * igb_get_prop - Get a property value out of the configuration file igb.conf
2691  *
2692  * Caller provides the name of the property, a default value, a minimum
2693  * value, and a maximum value.
2694  *
2695  * Return configured value of the property, with default, minimum and
2696  * maximum properly applied.
2697  */
2698 static int
2699 igb_get_prop(igb_t *igb,
2700     char *propname,	/* name of the property */
2701     int minval,		/* minimum acceptable value */
2702     int maxval,		/* maximim acceptable value */
2703     int defval)		/* default value */
2704 {
2705 	int value;
2706 
2707 	/*
2708 	 * Call ddi_prop_get_int() to read the conf settings
2709 	 */
2710 	value = ddi_prop_get_int(DDI_DEV_T_ANY, igb->dip,
2711 	    DDI_PROP_DONTPASS, propname, defval);
2712 
2713 	if (value > maxval)
2714 		value = maxval;
2715 
2716 	if (value < minval)
2717 		value = minval;
2718 
2719 	return (value);
2720 }
2721 
2722 /*
2723  * igb_setup_link - Using the link properties to setup the link
2724  */
2725 int
2726 igb_setup_link(igb_t *igb, boolean_t setup_hw)
2727 {
2728 	struct e1000_mac_info *mac;
2729 	struct e1000_phy_info *phy;
2730 	boolean_t invalid;
2731 
2732 	mac = &igb->hw.mac;
2733 	phy = &igb->hw.phy;
2734 	invalid = B_FALSE;
2735 
2736 	if (igb->param_adv_autoneg_cap == 1) {
2737 		mac->autoneg = B_TRUE;
2738 		phy->autoneg_advertised = 0;
2739 
2740 		/*
2741 		 * 1000hdx is not supported for autonegotiation
2742 		 */
2743 		if (igb->param_adv_1000fdx_cap == 1)
2744 			phy->autoneg_advertised |= ADVERTISE_1000_FULL;
2745 
2746 		if (igb->param_adv_100fdx_cap == 1)
2747 			phy->autoneg_advertised |= ADVERTISE_100_FULL;
2748 
2749 		if (igb->param_adv_100hdx_cap == 1)
2750 			phy->autoneg_advertised |= ADVERTISE_100_HALF;
2751 
2752 		if (igb->param_adv_10fdx_cap == 1)
2753 			phy->autoneg_advertised |= ADVERTISE_10_FULL;
2754 
2755 		if (igb->param_adv_10hdx_cap == 1)
2756 			phy->autoneg_advertised |= ADVERTISE_10_HALF;
2757 
2758 		if (phy->autoneg_advertised == 0)
2759 			invalid = B_TRUE;
2760 	} else {
2761 		mac->autoneg = B_FALSE;
2762 
2763 		/*
2764 		 * 1000fdx and 1000hdx are not supported for forced link
2765 		 */
2766 		if (igb->param_adv_100fdx_cap == 1)
2767 			mac->forced_speed_duplex = ADVERTISE_100_FULL;
2768 		else if (igb->param_adv_100hdx_cap == 1)
2769 			mac->forced_speed_duplex = ADVERTISE_100_HALF;
2770 		else if (igb->param_adv_10fdx_cap == 1)
2771 			mac->forced_speed_duplex = ADVERTISE_10_FULL;
2772 		else if (igb->param_adv_10hdx_cap == 1)
2773 			mac->forced_speed_duplex = ADVERTISE_10_HALF;
2774 		else
2775 			invalid = B_TRUE;
2776 	}
2777 
2778 	if (invalid) {
2779 		igb_notice(igb, "Invalid link settings. Setup link to "
2780 		    "autonegotiation with full link capabilities.");
2781 		mac->autoneg = B_TRUE;
2782 		phy->autoneg_advertised = ADVERTISE_1000_FULL |
2783 		    ADVERTISE_100_FULL | ADVERTISE_100_HALF |
2784 		    ADVERTISE_10_FULL | ADVERTISE_10_HALF;
2785 	}
2786 
2787 	if (setup_hw) {
2788 		if (e1000_setup_link(&igb->hw) != E1000_SUCCESS)
2789 			return (IGB_FAILURE);
2790 	}
2791 
2792 	return (IGB_SUCCESS);
2793 }
2794 
2795 
2796 /*
2797  * igb_is_link_up - Check if the link is up
2798  */
2799 static boolean_t
2800 igb_is_link_up(igb_t *igb)
2801 {
2802 	struct e1000_hw *hw = &igb->hw;
2803 	boolean_t link_up;
2804 
2805 	ASSERT(mutex_owned(&igb->gen_lock));
2806 
2807 	(void) e1000_check_for_link(hw);
2808 
2809 	if ((E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU) ||
2810 	    ((hw->phy.media_type == e1000_media_type_internal_serdes) &&
2811 	    (hw->mac.serdes_has_link))) {
2812 		link_up = B_TRUE;
2813 	} else {
2814 		link_up = B_FALSE;
2815 	}
2816 
2817 	return (link_up);
2818 }
2819 
2820 /*
2821  * igb_link_check - Link status processing
2822  */
2823 static boolean_t
2824 igb_link_check(igb_t *igb)
2825 {
2826 	struct e1000_hw *hw = &igb->hw;
2827 	uint16_t speed = 0, duplex = 0;
2828 	boolean_t link_changed = B_FALSE;
2829 
2830 	ASSERT(mutex_owned(&igb->gen_lock));
2831 
2832 	if (igb_is_link_up(igb)) {
2833 		/*
2834 		 * The Link is up, check whether it was marked as down earlier
2835 		 */
2836 		if (igb->link_state != LINK_STATE_UP) {
2837 			(void) e1000_get_speed_and_duplex(hw, &speed, &duplex);
2838 			igb->link_speed = speed;
2839 			igb->link_duplex = duplex;
2840 			igb->link_state = LINK_STATE_UP;
2841 			igb->link_down_timeout = 0;
2842 			link_changed = B_TRUE;
2843 		}
2844 	} else {
2845 		if (igb->link_state != LINK_STATE_DOWN) {
2846 			igb->link_speed = 0;
2847 			igb->link_duplex = 0;
2848 			igb->link_state = LINK_STATE_DOWN;
2849 			link_changed = B_TRUE;
2850 		}
2851 
2852 		if (igb->igb_state & IGB_STARTED) {
2853 			if (igb->link_down_timeout < MAX_LINK_DOWN_TIMEOUT) {
2854 				igb->link_down_timeout++;
2855 			} else if (igb->link_down_timeout ==
2856 			    MAX_LINK_DOWN_TIMEOUT) {
2857 				igb_tx_clean(igb);
2858 				igb->link_down_timeout++;
2859 			}
2860 		}
2861 	}
2862 
2863 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
2864 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
2865 
2866 	return (link_changed);
2867 }
2868 
2869 /*
2870  * igb_local_timer - driver watchdog function
2871  *
2872  * This function will handle the transmit stall check, link status check and
2873  * other routines.
2874  */
2875 static void
2876 igb_local_timer(void *arg)
2877 {
2878 	igb_t *igb = (igb_t *)arg;
2879 	struct e1000_hw *hw = &igb->hw;
2880 	boolean_t link_changed;
2881 
2882 	if (igb_stall_check(igb)) {
2883 		igb_fm_ereport(igb, DDI_FM_DEVICE_STALL);
2884 		igb->reset_count++;
2885 		if (igb_reset(igb) == IGB_SUCCESS)
2886 			ddi_fm_service_impact(igb->dip,
2887 			    DDI_SERVICE_RESTORED);
2888 	}
2889 
2890 	mutex_enter(&igb->gen_lock);
2891 	link_changed = igb_link_check(igb);
2892 	mutex_exit(&igb->gen_lock);
2893 
2894 	if (link_changed)
2895 		mac_link_update(igb->mac_hdl, igb->link_state);
2896 
2897 	/*
2898 	 * Set Timer Interrupts
2899 	 */
2900 	if (igb->intr_type != DDI_INTR_TYPE_MSIX)
2901 		E1000_WRITE_REG(hw, E1000_ICS, E1000_IMS_RXT0);
2902 
2903 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
2904 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
2905 
2906 	igb_restart_watchdog_timer(igb);
2907 }
2908 
2909 /*
2910  * igb_stall_check - check for transmit stall
2911  *
2912  * This function checks if the adapter is stalled (in transmit).
2913  *
2914  * It is called each time the watchdog timeout is invoked.
2915  * If the transmit descriptor reclaim continuously fails,
2916  * the watchdog value will increment by 1. If the watchdog
2917  * value exceeds the threshold, the igb is assumed to
2918  * have stalled and need to be reset.
2919  */
2920 static boolean_t
2921 igb_stall_check(igb_t *igb)
2922 {
2923 	igb_tx_ring_t *tx_ring;
2924 	boolean_t result;
2925 	int i;
2926 
2927 	if (igb->link_state != LINK_STATE_UP)
2928 		return (B_FALSE);
2929 
2930 	/*
2931 	 * If any tx ring is stalled, we'll reset the chipset
2932 	 */
2933 	result = B_FALSE;
2934 	for (i = 0; i < igb->num_tx_rings; i++) {
2935 		tx_ring = &igb->tx_rings[i];
2936 
2937 		if (tx_ring->recycle_fail > 0)
2938 			tx_ring->stall_watchdog++;
2939 		else
2940 			tx_ring->stall_watchdog = 0;
2941 
2942 		if (tx_ring->stall_watchdog >= STALL_WATCHDOG_TIMEOUT) {
2943 			result = B_TRUE;
2944 			break;
2945 		}
2946 	}
2947 
2948 	if (result) {
2949 		tx_ring->stall_watchdog = 0;
2950 		tx_ring->recycle_fail = 0;
2951 	}
2952 
2953 	return (result);
2954 }
2955 
2956 
2957 /*
2958  * is_valid_mac_addr - Check if the mac address is valid
2959  */
2960 static boolean_t
2961 is_valid_mac_addr(uint8_t *mac_addr)
2962 {
2963 	const uint8_t addr_test1[6] = { 0, 0, 0, 0, 0, 0 };
2964 	const uint8_t addr_test2[6] =
2965 	    { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
2966 
2967 	if (!(bcmp(addr_test1, mac_addr, ETHERADDRL)) ||
2968 	    !(bcmp(addr_test2, mac_addr, ETHERADDRL)))
2969 		return (B_FALSE);
2970 
2971 	return (B_TRUE);
2972 }
2973 
2974 static boolean_t
2975 igb_find_mac_address(igb_t *igb)
2976 {
2977 	struct e1000_hw *hw = &igb->hw;
2978 #ifdef __sparc
2979 	uchar_t *bytes;
2980 	struct ether_addr sysaddr;
2981 	uint_t nelts;
2982 	int err;
2983 	boolean_t found = B_FALSE;
2984 
2985 	/*
2986 	 * The "vendor's factory-set address" may already have
2987 	 * been extracted from the chip, but if the property
2988 	 * "local-mac-address" is set we use that instead.
2989 	 *
2990 	 * We check whether it looks like an array of 6
2991 	 * bytes (which it should, if OBP set it).  If we can't
2992 	 * make sense of it this way, we'll ignore it.
2993 	 */
2994 	err = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, igb->dip,
2995 	    DDI_PROP_DONTPASS, "local-mac-address", &bytes, &nelts);
2996 	if (err == DDI_PROP_SUCCESS) {
2997 		if (nelts == ETHERADDRL) {
2998 			while (nelts--)
2999 				hw->mac.addr[nelts] = bytes[nelts];
3000 			found = B_TRUE;
3001 		}
3002 		ddi_prop_free(bytes);
3003 	}
3004 
3005 	/*
3006 	 * Look up the OBP property "local-mac-address?". If the user has set
3007 	 * 'local-mac-address? = false', use "the system address" instead.
3008 	 */
3009 	if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, igb->dip, 0,
3010 	    "local-mac-address?", &bytes, &nelts) == DDI_PROP_SUCCESS) {
3011 		if (strncmp("false", (caddr_t)bytes, (size_t)nelts) == 0) {
3012 			if (localetheraddr(NULL, &sysaddr) != 0) {
3013 				bcopy(&sysaddr, hw->mac.addr, ETHERADDRL);
3014 				found = B_TRUE;
3015 			}
3016 		}
3017 		ddi_prop_free(bytes);
3018 	}
3019 
3020 	/*
3021 	 * Finally(!), if there's a valid "mac-address" property (created
3022 	 * if we netbooted from this interface), we must use this instead
3023 	 * of any of the above to ensure that the NFS/install server doesn't
3024 	 * get confused by the address changing as Solaris takes over!
3025 	 */
3026 	err = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, igb->dip,
3027 	    DDI_PROP_DONTPASS, "mac-address", &bytes, &nelts);
3028 	if (err == DDI_PROP_SUCCESS) {
3029 		if (nelts == ETHERADDRL) {
3030 			while (nelts--)
3031 				hw->mac.addr[nelts] = bytes[nelts];
3032 			found = B_TRUE;
3033 		}
3034 		ddi_prop_free(bytes);
3035 	}
3036 
3037 	if (found) {
3038 		bcopy(hw->mac.addr, hw->mac.perm_addr, ETHERADDRL);
3039 		return (B_TRUE);
3040 	}
3041 #endif
3042 
3043 	/*
3044 	 * Read the device MAC address from the EEPROM
3045 	 */
3046 	if (e1000_read_mac_addr(hw) != E1000_SUCCESS)
3047 		return (B_FALSE);
3048 
3049 	return (B_TRUE);
3050 }
3051 
3052 #pragma inline(igb_arm_watchdog_timer)
3053 
3054 static void
3055 igb_arm_watchdog_timer(igb_t *igb)
3056 {
3057 	/*
3058 	 * Fire a watchdog timer
3059 	 */
3060 	igb->watchdog_tid =
3061 	    timeout(igb_local_timer,
3062 	    (void *)igb, 1 * drv_usectohz(1000000));
3063 
3064 }
3065 
3066 /*
3067  * igb_enable_watchdog_timer - Enable and start the driver watchdog timer
3068  */
3069 void
3070 igb_enable_watchdog_timer(igb_t *igb)
3071 {
3072 	mutex_enter(&igb->watchdog_lock);
3073 
3074 	if (!igb->watchdog_enable) {
3075 		igb->watchdog_enable = B_TRUE;
3076 		igb->watchdog_start = B_TRUE;
3077 		igb_arm_watchdog_timer(igb);
3078 	}
3079 
3080 	mutex_exit(&igb->watchdog_lock);
3081 
3082 }
3083 
3084 /*
3085  * igb_disable_watchdog_timer - Disable and stop the driver watchdog timer
3086  */
3087 void
3088 igb_disable_watchdog_timer(igb_t *igb)
3089 {
3090 	timeout_id_t tid;
3091 
3092 	mutex_enter(&igb->watchdog_lock);
3093 
3094 	igb->watchdog_enable = B_FALSE;
3095 	igb->watchdog_start = B_FALSE;
3096 	tid = igb->watchdog_tid;
3097 	igb->watchdog_tid = 0;
3098 
3099 	mutex_exit(&igb->watchdog_lock);
3100 
3101 	if (tid != 0)
3102 		(void) untimeout(tid);
3103 
3104 }
3105 
3106 /*
3107  * igb_start_watchdog_timer - Start the driver watchdog timer
3108  */
3109 static void
3110 igb_start_watchdog_timer(igb_t *igb)
3111 {
3112 	mutex_enter(&igb->watchdog_lock);
3113 
3114 	if (igb->watchdog_enable) {
3115 		if (!igb->watchdog_start) {
3116 			igb->watchdog_start = B_TRUE;
3117 			igb_arm_watchdog_timer(igb);
3118 		}
3119 	}
3120 
3121 	mutex_exit(&igb->watchdog_lock);
3122 }
3123 
3124 /*
3125  * igb_restart_watchdog_timer - Restart the driver watchdog timer
3126  */
3127 static void
3128 igb_restart_watchdog_timer(igb_t *igb)
3129 {
3130 	mutex_enter(&igb->watchdog_lock);
3131 
3132 	if (igb->watchdog_start)
3133 		igb_arm_watchdog_timer(igb);
3134 
3135 	mutex_exit(&igb->watchdog_lock);
3136 }
3137 
3138 /*
3139  * igb_stop_watchdog_timer - Stop the driver watchdog timer
3140  */
3141 static void
3142 igb_stop_watchdog_timer(igb_t *igb)
3143 {
3144 	timeout_id_t tid;
3145 
3146 	mutex_enter(&igb->watchdog_lock);
3147 
3148 	igb->watchdog_start = B_FALSE;
3149 	tid = igb->watchdog_tid;
3150 	igb->watchdog_tid = 0;
3151 
3152 	mutex_exit(&igb->watchdog_lock);
3153 
3154 	if (tid != 0)
3155 		(void) untimeout(tid);
3156 }
3157 
3158 /*
3159  * igb_disable_adapter_interrupts - Clear/disable all hardware interrupts
3160  */
3161 static void
3162 igb_disable_adapter_interrupts(igb_t *igb)
3163 {
3164 	struct e1000_hw *hw = &igb->hw;
3165 
3166 	/*
3167 	 * Set the IMC register to mask all the interrupts,
3168 	 * including the tx interrupts.
3169 	 */
3170 	E1000_WRITE_REG(hw, E1000_IMC, ~0);
3171 	E1000_WRITE_REG(hw, E1000_IAM, 0);
3172 
3173 	/*
3174 	 * Additional disabling for MSI-X
3175 	 */
3176 	if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
3177 		E1000_WRITE_REG(hw, E1000_EIMC, ~0);
3178 		E1000_WRITE_REG(hw, E1000_EIAC, 0);
3179 		E1000_WRITE_REG(hw, E1000_EIAM, 0);
3180 	}
3181 
3182 	E1000_WRITE_FLUSH(hw);
3183 }
3184 
3185 /*
3186  * igb_enable_adapter_interrupts_82576 - Enable NIC interrupts for 82576
3187  */
3188 static void
3189 igb_enable_adapter_interrupts_82576(igb_t *igb)
3190 {
3191 	struct e1000_hw *hw = &igb->hw;
3192 
3193 	if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
3194 
3195 		/* Interrupt enabling for MSI-X */
3196 		E1000_WRITE_REG(hw, E1000_EIMS, igb->eims_mask);
3197 		E1000_WRITE_REG(hw, E1000_EIAC, igb->eims_mask);
3198 		igb->ims_mask = E1000_IMS_LSC;
3199 		E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_LSC);
3200 	} else {
3201 		/* Interrupt enabling for MSI and legacy */
3202 		E1000_WRITE_REG(hw, E1000_IVAR0, E1000_IVAR_VALID);
3203 		igb->ims_mask = IMS_ENABLE_MASK | E1000_IMS_TXQE;
3204 		E1000_WRITE_REG(hw, E1000_IMS,
3205 		    (IMS_ENABLE_MASK | E1000_IMS_TXQE));
3206 	}
3207 
3208 	/* Disable auto-mask for ICR interrupt bits */
3209 	E1000_WRITE_REG(hw, E1000_IAM, 0);
3210 
3211 	E1000_WRITE_FLUSH(hw);
3212 }
3213 
3214 /*
3215  * igb_enable_adapter_interrupts_82575 - Enable NIC interrupts for 82575
3216  */
3217 static void
3218 igb_enable_adapter_interrupts_82575(igb_t *igb)
3219 {
3220 	struct e1000_hw *hw = &igb->hw;
3221 	uint32_t reg;
3222 
3223 	if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
3224 		/* Interrupt enabling for MSI-X */
3225 		E1000_WRITE_REG(hw, E1000_EIMS, igb->eims_mask);
3226 		E1000_WRITE_REG(hw, E1000_EIAC, igb->eims_mask);
3227 		igb->ims_mask = E1000_IMS_LSC;
3228 		E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_LSC);
3229 
3230 		/* Enable MSI-X PBA support */
3231 		reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
3232 		reg |= E1000_CTRL_EXT_PBA_CLR;
3233 
3234 		/* Non-selective interrupt clear-on-read */
3235 		reg |= E1000_CTRL_EXT_IRCA;	/* Called NSICR in the EAS */
3236 
3237 		E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg);
3238 	} else {
3239 		/* Interrupt enabling for MSI and legacy */
3240 		igb->ims_mask = IMS_ENABLE_MASK;
3241 		E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK);
3242 	}
3243 
3244 	E1000_WRITE_FLUSH(hw);
3245 }
3246 
3247 /*
3248  * Loopback Support
3249  */
3250 static lb_property_t lb_normal =
3251 	{ normal,	"normal",	IGB_LB_NONE		};
3252 static lb_property_t lb_external =
3253 	{ external,	"External",	IGB_LB_EXTERNAL		};
3254 static lb_property_t lb_mac =
3255 	{ internal,	"MAC",		IGB_LB_INTERNAL_MAC	};
3256 static lb_property_t lb_phy =
3257 	{ internal,	"PHY",		IGB_LB_INTERNAL_PHY	};
3258 static lb_property_t lb_serdes =
3259 	{ internal,	"SerDes",	IGB_LB_INTERNAL_SERDES	};
3260 
3261 enum ioc_reply
3262 igb_loopback_ioctl(igb_t *igb, struct iocblk *iocp, mblk_t *mp)
3263 {
3264 	lb_info_sz_t *lbsp;
3265 	lb_property_t *lbpp;
3266 	struct e1000_hw *hw;
3267 	uint32_t *lbmp;
3268 	uint32_t size;
3269 	uint32_t value;
3270 
3271 	hw = &igb->hw;
3272 
3273 	if (mp->b_cont == NULL)
3274 		return (IOC_INVAL);
3275 
3276 	switch (iocp->ioc_cmd) {
3277 	default:
3278 		return (IOC_INVAL);
3279 
3280 	case LB_GET_INFO_SIZE:
3281 		size = sizeof (lb_info_sz_t);
3282 		if (iocp->ioc_count != size)
3283 			return (IOC_INVAL);
3284 
3285 		value = sizeof (lb_normal);
3286 		value += sizeof (lb_mac);
3287 		if (hw->phy.media_type == e1000_media_type_copper)
3288 			value += sizeof (lb_phy);
3289 		else
3290 			value += sizeof (lb_serdes);
3291 		value += sizeof (lb_external);
3292 
3293 		lbsp = (lb_info_sz_t *)(uintptr_t)mp->b_cont->b_rptr;
3294 		*lbsp = value;
3295 		break;
3296 
3297 	case LB_GET_INFO:
3298 		value = sizeof (lb_normal);
3299 		value += sizeof (lb_mac);
3300 		if (hw->phy.media_type == e1000_media_type_copper)
3301 			value += sizeof (lb_phy);
3302 		else
3303 			value += sizeof (lb_serdes);
3304 		value += sizeof (lb_external);
3305 
3306 		size = value;
3307 		if (iocp->ioc_count != size)
3308 			return (IOC_INVAL);
3309 
3310 		value = 0;
3311 		lbpp = (lb_property_t *)(uintptr_t)mp->b_cont->b_rptr;
3312 
3313 		lbpp[value++] = lb_normal;
3314 		lbpp[value++] = lb_mac;
3315 		if (hw->phy.media_type == e1000_media_type_copper)
3316 			lbpp[value++] = lb_phy;
3317 		else
3318 			lbpp[value++] = lb_serdes;
3319 		lbpp[value++] = lb_external;
3320 		break;
3321 
3322 	case LB_GET_MODE:
3323 		size = sizeof (uint32_t);
3324 		if (iocp->ioc_count != size)
3325 			return (IOC_INVAL);
3326 
3327 		lbmp = (uint32_t *)(uintptr_t)mp->b_cont->b_rptr;
3328 		*lbmp = igb->loopback_mode;
3329 		break;
3330 
3331 	case LB_SET_MODE:
3332 		size = 0;
3333 		if (iocp->ioc_count != sizeof (uint32_t))
3334 			return (IOC_INVAL);
3335 
3336 		lbmp = (uint32_t *)(uintptr_t)mp->b_cont->b_rptr;
3337 		if (!igb_set_loopback_mode(igb, *lbmp))
3338 			return (IOC_INVAL);
3339 		break;
3340 	}
3341 
3342 	iocp->ioc_count = size;
3343 	iocp->ioc_error = 0;
3344 
3345 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
3346 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
3347 		return (IOC_INVAL);
3348 	}
3349 
3350 	return (IOC_REPLY);
3351 }
3352 
3353 /*
3354  * igb_set_loopback_mode - Setup loopback based on the loopback mode
3355  */
3356 static boolean_t
3357 igb_set_loopback_mode(igb_t *igb, uint32_t mode)
3358 {
3359 	struct e1000_hw *hw;
3360 
3361 	if (mode == igb->loopback_mode)
3362 		return (B_TRUE);
3363 
3364 	hw = &igb->hw;
3365 
3366 	igb->loopback_mode = mode;
3367 
3368 	if (mode == IGB_LB_NONE) {
3369 		/* Reset the chip */
3370 		hw->phy.autoneg_wait_to_complete = B_TRUE;
3371 		(void) igb_reset(igb);
3372 		hw->phy.autoneg_wait_to_complete = B_FALSE;
3373 		return (B_TRUE);
3374 	}
3375 
3376 	mutex_enter(&igb->gen_lock);
3377 
3378 	switch (mode) {
3379 	default:
3380 		mutex_exit(&igb->gen_lock);
3381 		return (B_FALSE);
3382 
3383 	case IGB_LB_EXTERNAL:
3384 		igb_set_external_loopback(igb);
3385 		break;
3386 
3387 	case IGB_LB_INTERNAL_MAC:
3388 		igb_set_internal_mac_loopback(igb);
3389 		break;
3390 
3391 	case IGB_LB_INTERNAL_PHY:
3392 		igb_set_internal_phy_loopback(igb);
3393 		break;
3394 
3395 	case IGB_LB_INTERNAL_SERDES:
3396 		igb_set_internal_serdes_loopback(igb);
3397 		break;
3398 	}
3399 
3400 	mutex_exit(&igb->gen_lock);
3401 
3402 	return (B_TRUE);
3403 }
3404 
3405 /*
3406  * igb_set_external_loopback - Set the external loopback mode
3407  */
3408 static void
3409 igb_set_external_loopback(igb_t *igb)
3410 {
3411 	struct e1000_hw *hw;
3412 
3413 	hw = &igb->hw;
3414 
3415 	/* Set phy to known state */
3416 	(void) e1000_phy_hw_reset(hw);
3417 
3418 	(void) e1000_write_phy_reg(hw, 0x0, 0x0140);
3419 	(void) e1000_write_phy_reg(hw, 0x9, 0x1b00);
3420 	(void) e1000_write_phy_reg(hw, 0x12, 0x1610);
3421 	(void) e1000_write_phy_reg(hw, 0x1f37, 0x3f1c);
3422 }
3423 
3424 /*
3425  * igb_set_internal_mac_loopback - Set the internal MAC loopback mode
3426  */
3427 static void
3428 igb_set_internal_mac_loopback(igb_t *igb)
3429 {
3430 	struct e1000_hw *hw;
3431 	uint32_t ctrl;
3432 	uint32_t rctl;
3433 
3434 	hw = &igb->hw;
3435 
3436 	/* Set the Receive Control register */
3437 	rctl = E1000_READ_REG(hw, E1000_RCTL);
3438 	rctl &= ~E1000_RCTL_LBM_TCVR;
3439 	rctl |= E1000_RCTL_LBM_MAC;
3440 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
3441 
3442 	/* Set the Device Control register */
3443 	ctrl = E1000_READ_REG(hw, E1000_CTRL);
3444 	ctrl &= ~E1000_CTRL_SPD_SEL;	/* Clear the speed sel bits */
3445 	ctrl |= (E1000_CTRL_SLU |	/* Force link up */
3446 	    E1000_CTRL_FRCSPD |		/* Force speed */
3447 	    E1000_CTRL_FRCDPX |		/* Force duplex */
3448 	    E1000_CTRL_SPD_1000 |	/* Force speed to 1000 */
3449 	    E1000_CTRL_FD);		/* Force full duplex */
3450 	ctrl &= ~E1000_CTRL_ILOS;	/* Clear ILOS when there's a link */
3451 	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
3452 }
3453 
3454 /*
3455  * igb_set_internal_phy_loopback - Set the internal PHY loopback mode
3456  */
3457 static void
3458 igb_set_internal_phy_loopback(igb_t *igb)
3459 {
3460 	struct e1000_hw *hw;
3461 	uint32_t ctrl_ext;
3462 	uint16_t phy_ctrl;
3463 	uint16_t phy_pconf;
3464 
3465 	hw = &igb->hw;
3466 
3467 	/* Set link mode to PHY (00b) in the Extended Control register */
3468 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
3469 	ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK;
3470 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
3471 
3472 	/*
3473 	 * Set PHY control register (0x4140):
3474 	 *    Set full duplex mode
3475 	 *    Set loopback bit
3476 	 *    Clear auto-neg enable bit
3477 	 *    Set PHY speed
3478 	 */
3479 	phy_ctrl = MII_CR_FULL_DUPLEX | MII_CR_SPEED_1000 | MII_CR_LOOPBACK;
3480 	(void) e1000_write_phy_reg(hw, PHY_CONTROL, phy_ctrl);
3481 
3482 	/* Set the link disable bit in the Port Configuration register */
3483 	(void) e1000_read_phy_reg(hw, 0x10, &phy_pconf);
3484 	phy_pconf |= (uint16_t)1 << 14;
3485 	(void) e1000_write_phy_reg(hw, 0x10, phy_pconf);
3486 }
3487 
3488 /*
3489  * igb_set_internal_serdes_loopback - Set the internal SerDes loopback mode
3490  */
3491 static void
3492 igb_set_internal_serdes_loopback(igb_t *igb)
3493 {
3494 	struct e1000_hw *hw;
3495 	uint32_t ctrl_ext;
3496 	uint32_t ctrl;
3497 	uint32_t pcs_lctl;
3498 	uint32_t connsw;
3499 
3500 	hw = &igb->hw;
3501 
3502 	/* Set link mode to SerDes (11b) in the Extended Control register */
3503 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
3504 	ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
3505 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
3506 
3507 	/* Configure the SerDes to loopback */
3508 	E1000_WRITE_REG(hw, E1000_SCTL, 0x410);
3509 
3510 	/* Set Device Control register */
3511 	ctrl = E1000_READ_REG(hw, E1000_CTRL);
3512 	ctrl |= (E1000_CTRL_FD |	/* Force full duplex */
3513 	    E1000_CTRL_SLU);		/* Force link up */
3514 	ctrl &= ~(E1000_CTRL_RFCE |	/* Disable receive flow control */
3515 	    E1000_CTRL_TFCE |		/* Disable transmit flow control */
3516 	    E1000_CTRL_LRST);		/* Clear link reset */
3517 	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
3518 
3519 	/* Set PCS Link Control register */
3520 	pcs_lctl = E1000_READ_REG(hw, E1000_PCS_LCTL);
3521 	pcs_lctl |= (E1000_PCS_LCTL_FORCE_LINK |
3522 	    E1000_PCS_LCTL_FSD |
3523 	    E1000_PCS_LCTL_FDV_FULL |
3524 	    E1000_PCS_LCTL_FLV_LINK_UP);
3525 	pcs_lctl &= ~E1000_PCS_LCTL_AN_ENABLE;
3526 	E1000_WRITE_REG(hw, E1000_PCS_LCTL, pcs_lctl);
3527 
3528 	/* Set the Copper/Fiber Switch Control - CONNSW register */
3529 	connsw = E1000_READ_REG(hw, E1000_CONNSW);
3530 	connsw &= ~E1000_CONNSW_ENRGSRC;
3531 	E1000_WRITE_REG(hw, E1000_CONNSW, connsw);
3532 }
3533 
3534 #pragma inline(igb_intr_rx_work)
3535 /*
3536  * igb_intr_rx_work - rx processing of ISR
3537  */
3538 static void
3539 igb_intr_rx_work(igb_rx_ring_t *rx_ring)
3540 {
3541 	mblk_t *mp;
3542 
3543 	mutex_enter(&rx_ring->rx_lock);
3544 	mp = igb_rx(rx_ring, IGB_NO_POLL);
3545 	mutex_exit(&rx_ring->rx_lock);
3546 
3547 	if (mp != NULL)
3548 		mac_rx_ring(rx_ring->igb->mac_hdl, rx_ring->ring_handle, mp,
3549 		    rx_ring->ring_gen_num);
3550 }
3551 
3552 #pragma inline(igb_intr_tx_work)
3553 /*
3554  * igb_intr_tx_work - tx processing of ISR
3555  */
3556 static void
3557 igb_intr_tx_work(igb_tx_ring_t *tx_ring)
3558 {
3559 	/* Recycle the tx descriptors */
3560 	tx_ring->tx_recycle(tx_ring);
3561 
3562 	/* Schedule the re-transmit */
3563 	if (tx_ring->reschedule &&
3564 	    (tx_ring->tbd_free >= tx_ring->resched_thresh)) {
3565 		tx_ring->reschedule = B_FALSE;
3566 		mac_tx_ring_update(tx_ring->igb->mac_hdl, tx_ring->ring_handle);
3567 		IGB_DEBUG_STAT(tx_ring->stat_reschedule);
3568 	}
3569 }
3570 
3571 #pragma inline(igb_intr_link_work)
3572 /*
3573  * igb_intr_link_work - link-status-change processing of ISR
3574  */
3575 static void
3576 igb_intr_link_work(igb_t *igb)
3577 {
3578 	boolean_t link_changed;
3579 
3580 	igb_stop_watchdog_timer(igb);
3581 
3582 	mutex_enter(&igb->gen_lock);
3583 
3584 	/*
3585 	 * Because we got a link-status-change interrupt, force
3586 	 * e1000_check_for_link() to look at phy
3587 	 */
3588 	igb->hw.mac.get_link_status = B_TRUE;
3589 
3590 	/* igb_link_check takes care of link status change */
3591 	link_changed = igb_link_check(igb);
3592 
3593 	/* Get new phy state */
3594 	igb_get_phy_state(igb);
3595 
3596 	mutex_exit(&igb->gen_lock);
3597 
3598 	if (link_changed)
3599 		mac_link_update(igb->mac_hdl, igb->link_state);
3600 
3601 	igb_start_watchdog_timer(igb);
3602 }
3603 
3604 /*
3605  * igb_intr_legacy - Interrupt handler for legacy interrupts
3606  */
3607 static uint_t
3608 igb_intr_legacy(void *arg1, void *arg2)
3609 {
3610 	igb_t *igb = (igb_t *)arg1;
3611 	igb_tx_ring_t *tx_ring;
3612 	uint32_t icr;
3613 	mblk_t *mp;
3614 	boolean_t tx_reschedule;
3615 	boolean_t link_changed;
3616 	uint_t result;
3617 
3618 	_NOTE(ARGUNUSED(arg2));
3619 
3620 	mutex_enter(&igb->gen_lock);
3621 
3622 	if (igb->igb_state & IGB_SUSPENDED) {
3623 		mutex_exit(&igb->gen_lock);
3624 		return (DDI_INTR_UNCLAIMED);
3625 	}
3626 
3627 	mp = NULL;
3628 	tx_reschedule = B_FALSE;
3629 	link_changed = B_FALSE;
3630 	icr = E1000_READ_REG(&igb->hw, E1000_ICR);
3631 
3632 	if (icr & E1000_ICR_INT_ASSERTED) {
3633 		/*
3634 		 * E1000_ICR_INT_ASSERTED bit was set:
3635 		 * Read(Clear) the ICR, claim this interrupt,
3636 		 * look for work to do.
3637 		 */
3638 		ASSERT(igb->num_rx_rings == 1);
3639 		ASSERT(igb->num_tx_rings == 1);
3640 
3641 		/* Make sure all interrupt causes cleared */
3642 		(void) E1000_READ_REG(&igb->hw, E1000_EICR);
3643 
3644 		if (icr & E1000_ICR_RXT0) {
3645 			mp = igb_rx(&igb->rx_rings[0], IGB_NO_POLL);
3646 		}
3647 
3648 		if (icr & E1000_ICR_TXDW) {
3649 			tx_ring = &igb->tx_rings[0];
3650 
3651 			/* Recycle the tx descriptors */
3652 			tx_ring->tx_recycle(tx_ring);
3653 
3654 			/* Schedule the re-transmit */
3655 			tx_reschedule = (tx_ring->reschedule &&
3656 			    (tx_ring->tbd_free >= tx_ring->resched_thresh));
3657 		}
3658 
3659 		if (icr & E1000_ICR_LSC) {
3660 			/*
3661 			 * Because we got a link-status-change interrupt, force
3662 			 * e1000_check_for_link() to look at phy
3663 			 */
3664 			igb->hw.mac.get_link_status = B_TRUE;
3665 
3666 			/* igb_link_check takes care of link status change */
3667 			link_changed = igb_link_check(igb);
3668 
3669 			/* Get new phy state */
3670 			igb_get_phy_state(igb);
3671 		}
3672 
3673 		result = DDI_INTR_CLAIMED;
3674 	} else {
3675 		/*
3676 		 * E1000_ICR_INT_ASSERTED bit was not set:
3677 		 * Don't claim this interrupt.
3678 		 */
3679 		result = DDI_INTR_UNCLAIMED;
3680 	}
3681 
3682 	mutex_exit(&igb->gen_lock);
3683 
3684 	/*
3685 	 * Do the following work outside of the gen_lock
3686 	 */
3687 	if (mp != NULL)
3688 		mac_rx(igb->mac_hdl, NULL, mp);
3689 
3690 	if (tx_reschedule)  {
3691 		tx_ring->reschedule = B_FALSE;
3692 		mac_tx_ring_update(igb->mac_hdl, tx_ring->ring_handle);
3693 		IGB_DEBUG_STAT(tx_ring->stat_reschedule);
3694 	}
3695 
3696 	if (link_changed)
3697 		mac_link_update(igb->mac_hdl, igb->link_state);
3698 
3699 	return (result);
3700 }
3701 
3702 /*
3703  * igb_intr_msi - Interrupt handler for MSI
3704  */
3705 static uint_t
3706 igb_intr_msi(void *arg1, void *arg2)
3707 {
3708 	igb_t *igb = (igb_t *)arg1;
3709 	uint32_t icr;
3710 
3711 	_NOTE(ARGUNUSED(arg2));
3712 
3713 	icr = E1000_READ_REG(&igb->hw, E1000_ICR);
3714 
3715 	/* Make sure all interrupt causes cleared */
3716 	(void) E1000_READ_REG(&igb->hw, E1000_EICR);
3717 
3718 	/*
3719 	 * For MSI interrupt, we have only one vector,
3720 	 * so we have only one rx ring and one tx ring enabled.
3721 	 */
3722 	ASSERT(igb->num_rx_rings == 1);
3723 	ASSERT(igb->num_tx_rings == 1);
3724 
3725 	if (icr & E1000_ICR_RXT0) {
3726 		igb_intr_rx_work(&igb->rx_rings[0]);
3727 	}
3728 
3729 	if (icr & E1000_ICR_TXDW) {
3730 		igb_intr_tx_work(&igb->tx_rings[0]);
3731 	}
3732 
3733 	if (icr & E1000_ICR_LSC) {
3734 		igb_intr_link_work(igb);
3735 	}
3736 
3737 	return (DDI_INTR_CLAIMED);
3738 }
3739 
3740 /*
3741  * igb_intr_rx - Interrupt handler for rx
3742  */
3743 static uint_t
3744 igb_intr_rx(void *arg1, void *arg2)
3745 {
3746 	igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)arg1;
3747 
3748 	_NOTE(ARGUNUSED(arg2));
3749 
3750 	/*
3751 	 * Only used via MSI-X vector so don't check cause bits
3752 	 * and only clean the given ring.
3753 	 */
3754 	igb_intr_rx_work(rx_ring);
3755 
3756 	return (DDI_INTR_CLAIMED);
3757 }
3758 
3759 /*
3760  * igb_intr_tx - Interrupt handler for tx
3761  */
3762 static uint_t
3763 igb_intr_tx(void *arg1, void *arg2)
3764 {
3765 	igb_tx_ring_t *tx_ring = (igb_tx_ring_t *)arg1;
3766 
3767 	_NOTE(ARGUNUSED(arg2));
3768 
3769 	/*
3770 	 * Only used via MSI-X vector so don't check cause bits
3771 	 * and only clean the given ring.
3772 	 */
3773 	igb_intr_tx_work(tx_ring);
3774 
3775 	return (DDI_INTR_CLAIMED);
3776 }
3777 
3778 /*
3779  * igb_intr_tx_other - Interrupt handler for both tx and other
3780  *
3781  */
3782 static uint_t
3783 igb_intr_tx_other(void *arg1, void *arg2)
3784 {
3785 	igb_t *igb = (igb_t *)arg1;
3786 	uint32_t icr;
3787 
3788 	_NOTE(ARGUNUSED(arg2));
3789 
3790 	icr = E1000_READ_REG(&igb->hw, E1000_ICR);
3791 
3792 	/*
3793 	 * Look for tx reclaiming work first. Remember, in the
3794 	 * case of only interrupt sharing, only one tx ring is
3795 	 * used
3796 	 */
3797 	igb_intr_tx_work(&igb->tx_rings[0]);
3798 
3799 	/*
3800 	 * Need check cause bits and only link change will
3801 	 * be processed
3802 	 */
3803 	if (icr & E1000_ICR_LSC) {
3804 		igb_intr_link_work(igb);
3805 	}
3806 
3807 	return (DDI_INTR_CLAIMED);
3808 }
3809 
3810 /*
3811  * igb_alloc_intrs - Allocate interrupts for the driver
3812  *
3813  * Normal sequence is to try MSI-X; if not sucessful, try MSI;
3814  * if not successful, try Legacy.
3815  * igb->intr_force can be used to force sequence to start with
3816  * any of the 3 types.
3817  * If MSI-X is not used, number of tx/rx rings is forced to 1.
3818  */
3819 static int
3820 igb_alloc_intrs(igb_t *igb)
3821 {
3822 	dev_info_t *devinfo;
3823 	int intr_types;
3824 	int rc;
3825 
3826 	devinfo = igb->dip;
3827 
3828 	/* Get supported interrupt types */
3829 	rc = ddi_intr_get_supported_types(devinfo, &intr_types);
3830 
3831 	if (rc != DDI_SUCCESS) {
3832 		igb_log(igb,
3833 		    "Get supported interrupt types failed: %d", rc);
3834 		return (IGB_FAILURE);
3835 	}
3836 	IGB_DEBUGLOG_1(igb, "Supported interrupt types: %x", intr_types);
3837 
3838 	igb->intr_type = 0;
3839 
3840 	/* Install MSI-X interrupts */
3841 	if ((intr_types & DDI_INTR_TYPE_MSIX) &&
3842 	    (igb->intr_force <= IGB_INTR_MSIX)) {
3843 		rc = igb_alloc_intr_handles(igb, DDI_INTR_TYPE_MSIX);
3844 
3845 		if (rc == IGB_SUCCESS)
3846 			return (IGB_SUCCESS);
3847 
3848 		igb_log(igb,
3849 		    "Allocate MSI-X failed, trying MSI interrupts...");
3850 	}
3851 
3852 	/* MSI-X not used, force rings to 1 */
3853 	igb->num_rx_rings = 1;
3854 	igb->num_tx_rings = 1;
3855 	igb_log(igb,
3856 	    "MSI-X not used, force rx and tx queue number to 1");
3857 
3858 	/* Install MSI interrupts */
3859 	if ((intr_types & DDI_INTR_TYPE_MSI) &&
3860 	    (igb->intr_force <= IGB_INTR_MSI)) {
3861 		rc = igb_alloc_intr_handles(igb, DDI_INTR_TYPE_MSI);
3862 
3863 		if (rc == IGB_SUCCESS)
3864 			return (IGB_SUCCESS);
3865 
3866 		igb_log(igb,
3867 		    "Allocate MSI failed, trying Legacy interrupts...");
3868 	}
3869 
3870 	/* Install legacy interrupts */
3871 	if (intr_types & DDI_INTR_TYPE_FIXED) {
3872 		rc = igb_alloc_intr_handles(igb, DDI_INTR_TYPE_FIXED);
3873 
3874 		if (rc == IGB_SUCCESS)
3875 			return (IGB_SUCCESS);
3876 
3877 		igb_log(igb,
3878 		    "Allocate Legacy interrupts failed");
3879 	}
3880 
3881 	/* If none of the 3 types succeeded, return failure */
3882 	return (IGB_FAILURE);
3883 }
3884 
3885 /*
3886  * igb_alloc_intr_handles - Allocate interrupt handles.
3887  *
3888  * For legacy and MSI, only 1 handle is needed.  For MSI-X,
3889  * if fewer than 2 handles are available, return failure.
3890  * Upon success, this sets the number of Rx rings to a number that
3891  * matches the handles available for Rx interrupts.
3892  */
3893 static int
3894 igb_alloc_intr_handles(igb_t *igb, int intr_type)
3895 {
3896 	dev_info_t *devinfo;
3897 	int orig, request, count, avail, actual;
3898 	int diff, minimum;
3899 	int rc;
3900 
3901 	devinfo = igb->dip;
3902 
3903 	switch (intr_type) {
3904 	case DDI_INTR_TYPE_FIXED:
3905 		request = 1;	/* Request 1 legacy interrupt handle */
3906 		minimum = 1;
3907 		IGB_DEBUGLOG_0(igb, "interrupt type: legacy");
3908 		break;
3909 
3910 	case DDI_INTR_TYPE_MSI:
3911 		request = 1;	/* Request 1 MSI interrupt handle */
3912 		minimum = 1;
3913 		IGB_DEBUGLOG_0(igb, "interrupt type: MSI");
3914 		break;
3915 
3916 	case DDI_INTR_TYPE_MSIX:
3917 		/*
3918 		 * Number of vectors for the adapter is
3919 		 * # rx rings + # tx rings
3920 		 * One of tx vectors is for tx & other
3921 		 */
3922 		request = igb->num_rx_rings + igb->num_tx_rings;
3923 		orig = request;
3924 		minimum = 2;
3925 		IGB_DEBUGLOG_0(igb, "interrupt type: MSI-X");
3926 		break;
3927 
3928 	default:
3929 		igb_log(igb,
3930 		    "invalid call to igb_alloc_intr_handles(): %d\n",
3931 		    intr_type);
3932 		return (IGB_FAILURE);
3933 	}
3934 	IGB_DEBUGLOG_2(igb, "interrupt handles requested: %d  minimum: %d",
3935 	    request, minimum);
3936 
3937 	/*
3938 	 * Get number of supported interrupts
3939 	 */
3940 	rc = ddi_intr_get_nintrs(devinfo, intr_type, &count);
3941 	if ((rc != DDI_SUCCESS) || (count < minimum)) {
3942 		igb_log(igb,
3943 		    "Get supported interrupt number failed. "
3944 		    "Return: %d, count: %d", rc, count);
3945 		return (IGB_FAILURE);
3946 	}
3947 	IGB_DEBUGLOG_1(igb, "interrupts supported: %d", count);
3948 
3949 	/*
3950 	 * Get number of available interrupts
3951 	 */
3952 	rc = ddi_intr_get_navail(devinfo, intr_type, &avail);
3953 	if ((rc != DDI_SUCCESS) || (avail < minimum)) {
3954 		igb_log(igb,
3955 		    "Get available interrupt number failed. "
3956 		    "Return: %d, available: %d", rc, avail);
3957 		return (IGB_FAILURE);
3958 	}
3959 	IGB_DEBUGLOG_1(igb, "interrupts available: %d", avail);
3960 
3961 	if (avail < request) {
3962 		igb_log(igb, "Request %d handles, %d available",
3963 		    request, avail);
3964 		request = avail;
3965 	}
3966 
3967 	actual = 0;
3968 	igb->intr_cnt = 0;
3969 
3970 	/*
3971 	 * Allocate an array of interrupt handles
3972 	 */
3973 	igb->intr_size = request * sizeof (ddi_intr_handle_t);
3974 	igb->htable = kmem_alloc(igb->intr_size, KM_SLEEP);
3975 
3976 	rc = ddi_intr_alloc(devinfo, igb->htable, intr_type, 0,
3977 	    request, &actual, DDI_INTR_ALLOC_NORMAL);
3978 	if (rc != DDI_SUCCESS) {
3979 		igb_log(igb, "Allocate interrupts failed. "
3980 		    "return: %d, request: %d, actual: %d",
3981 		    rc, request, actual);
3982 		goto alloc_handle_fail;
3983 	}
3984 	IGB_DEBUGLOG_1(igb, "interrupts actually allocated: %d", actual);
3985 
3986 	igb->intr_cnt = actual;
3987 
3988 	if (actual < minimum) {
3989 		igb_log(igb, "Insufficient interrupt handles allocated: %d",
3990 		    actual);
3991 		goto alloc_handle_fail;
3992 	}
3993 
3994 	/*
3995 	 * For MSI-X, actual might force us to reduce number of tx & rx rings
3996 	 */
3997 	if ((intr_type == DDI_INTR_TYPE_MSIX) && (orig > actual)) {
3998 		diff = orig - actual;
3999 		if (diff < igb->num_tx_rings) {
4000 			igb_log(igb,
4001 			    "MSI-X vectors force Tx queue number to %d",
4002 			    igb->num_tx_rings - diff);
4003 			igb->num_tx_rings -= diff;
4004 		} else {
4005 			igb_log(igb,
4006 			    "MSI-X vectors force Tx queue number to 1");
4007 			igb->num_tx_rings = 1;
4008 
4009 			igb_log(igb,
4010 			    "MSI-X vectors force Rx queue number to %d",
4011 			    actual - 1);
4012 			igb->num_rx_rings = actual - 1;
4013 		}
4014 	}
4015 
4016 	/*
4017 	 * Get priority for first vector, assume remaining are all the same
4018 	 */
4019 	rc = ddi_intr_get_pri(igb->htable[0], &igb->intr_pri);
4020 	if (rc != DDI_SUCCESS) {
4021 		igb_log(igb,
4022 		    "Get interrupt priority failed: %d", rc);
4023 		goto alloc_handle_fail;
4024 	}
4025 
4026 	rc = ddi_intr_get_cap(igb->htable[0], &igb->intr_cap);
4027 	if (rc != DDI_SUCCESS) {
4028 		igb_log(igb,
4029 		    "Get interrupt cap failed: %d", rc);
4030 		goto alloc_handle_fail;
4031 	}
4032 
4033 	igb->intr_type = intr_type;
4034 
4035 	return (IGB_SUCCESS);
4036 
4037 alloc_handle_fail:
4038 	igb_rem_intrs(igb);
4039 
4040 	return (IGB_FAILURE);
4041 }
4042 
4043 /*
4044  * igb_add_intr_handlers - Add interrupt handlers based on the interrupt type
4045  *
4046  * Before adding the interrupt handlers, the interrupt vectors have
4047  * been allocated, and the rx/tx rings have also been allocated.
4048  */
4049 static int
4050 igb_add_intr_handlers(igb_t *igb)
4051 {
4052 	igb_rx_ring_t *rx_ring;
4053 	igb_tx_ring_t *tx_ring;
4054 	int vector;
4055 	int rc;
4056 	int i;
4057 
4058 	vector = 0;
4059 
4060 	switch (igb->intr_type) {
4061 	case DDI_INTR_TYPE_MSIX:
4062 		/* Add interrupt handler for tx + other */
4063 		tx_ring = &igb->tx_rings[0];
4064 		rc = ddi_intr_add_handler(igb->htable[vector],
4065 		    (ddi_intr_handler_t *)igb_intr_tx_other,
4066 		    (void *)igb, NULL);
4067 
4068 		if (rc != DDI_SUCCESS) {
4069 			igb_log(igb,
4070 			    "Add tx/other interrupt handler failed: %d", rc);
4071 			return (IGB_FAILURE);
4072 		}
4073 		tx_ring->intr_vector = vector;
4074 		vector++;
4075 
4076 		/* Add interrupt handler for each rx ring */
4077 		for (i = 0; i < igb->num_rx_rings; i++) {
4078 			rx_ring = &igb->rx_rings[i];
4079 
4080 			rc = ddi_intr_add_handler(igb->htable[vector],
4081 			    (ddi_intr_handler_t *)igb_intr_rx,
4082 			    (void *)rx_ring, NULL);
4083 
4084 			if (rc != DDI_SUCCESS) {
4085 				igb_log(igb,
4086 				    "Add rx interrupt handler failed. "
4087 				    "return: %d, rx ring: %d", rc, i);
4088 				for (vector--; vector >= 0; vector--) {
4089 					(void) ddi_intr_remove_handler(
4090 					    igb->htable[vector]);
4091 				}
4092 				return (IGB_FAILURE);
4093 			}
4094 
4095 			rx_ring->intr_vector = vector;
4096 
4097 			vector++;
4098 		}
4099 
4100 		/* Add interrupt handler for each tx ring from 2nd ring */
4101 		for (i = 1; i < igb->num_tx_rings; i++) {
4102 			tx_ring = &igb->tx_rings[i];
4103 
4104 			rc = ddi_intr_add_handler(igb->htable[vector],
4105 			    (ddi_intr_handler_t *)igb_intr_tx,
4106 			    (void *)tx_ring, NULL);
4107 
4108 			if (rc != DDI_SUCCESS) {
4109 				igb_log(igb,
4110 				    "Add tx interrupt handler failed. "
4111 				    "return: %d, tx ring: %d", rc, i);
4112 				for (vector--; vector >= 0; vector--) {
4113 					(void) ddi_intr_remove_handler(
4114 					    igb->htable[vector]);
4115 				}
4116 				return (IGB_FAILURE);
4117 			}
4118 
4119 			tx_ring->intr_vector = vector;
4120 
4121 			vector++;
4122 		}
4123 
4124 		break;
4125 
4126 	case DDI_INTR_TYPE_MSI:
4127 		/* Add interrupt handlers for the only vector */
4128 		rc = ddi_intr_add_handler(igb->htable[vector],
4129 		    (ddi_intr_handler_t *)igb_intr_msi,
4130 		    (void *)igb, NULL);
4131 
4132 		if (rc != DDI_SUCCESS) {
4133 			igb_log(igb,
4134 			    "Add MSI interrupt handler failed: %d", rc);
4135 			return (IGB_FAILURE);
4136 		}
4137 
4138 		rx_ring = &igb->rx_rings[0];
4139 		rx_ring->intr_vector = vector;
4140 
4141 		vector++;
4142 		break;
4143 
4144 	case DDI_INTR_TYPE_FIXED:
4145 		/* Add interrupt handlers for the only vector */
4146 		rc = ddi_intr_add_handler(igb->htable[vector],
4147 		    (ddi_intr_handler_t *)igb_intr_legacy,
4148 		    (void *)igb, NULL);
4149 
4150 		if (rc != DDI_SUCCESS) {
4151 			igb_log(igb,
4152 			    "Add legacy interrupt handler failed: %d", rc);
4153 			return (IGB_FAILURE);
4154 		}
4155 
4156 		rx_ring = &igb->rx_rings[0];
4157 		rx_ring->intr_vector = vector;
4158 
4159 		vector++;
4160 		break;
4161 
4162 	default:
4163 		return (IGB_FAILURE);
4164 	}
4165 
4166 	ASSERT(vector == igb->intr_cnt);
4167 
4168 	return (IGB_SUCCESS);
4169 }
4170 
4171 /*
4172  * igb_setup_msix_82575 - setup 82575 adapter to use MSI-X interrupts
4173  *
4174  * For each vector enabled on the adapter, Set the MSIXBM register accordingly
4175  */
4176 static void
4177 igb_setup_msix_82575(igb_t *igb)
4178 {
4179 	uint32_t eims = 0;
4180 	int i, vector;
4181 	struct e1000_hw *hw = &igb->hw;
4182 
4183 	/*
4184 	 * Set vector for tx ring 0 and other causes.
4185 	 * NOTE assumption that it is vector 0.
4186 	 */
4187 	vector = 0;
4188 
4189 	igb->eims_mask = E1000_EICR_TX_QUEUE0 | E1000_EICR_OTHER;
4190 	E1000_WRITE_REG(hw, E1000_MSIXBM(vector), igb->eims_mask);
4191 	vector++;
4192 
4193 	for (i = 0; i < igb->num_rx_rings; i++) {
4194 		/*
4195 		 * Set vector for each rx ring
4196 		 */
4197 		eims = (E1000_EICR_RX_QUEUE0 << i);
4198 		E1000_WRITE_REG(hw, E1000_MSIXBM(vector), eims);
4199 
4200 		/*
4201 		 * Accumulate bits to enable in
4202 		 * igb_enable_adapter_interrupts_82575()
4203 		 */
4204 		igb->eims_mask |= eims;
4205 
4206 		vector++;
4207 	}
4208 
4209 	for (i = 1; i < igb->num_tx_rings; i++) {
4210 		/*
4211 		 * Set vector for each tx ring from 2nd tx ring
4212 		 */
4213 		eims = (E1000_EICR_TX_QUEUE0 << i);
4214 		E1000_WRITE_REG(hw, E1000_MSIXBM(vector), eims);
4215 
4216 		/*
4217 		 * Accumulate bits to enable in
4218 		 * igb_enable_adapter_interrupts_82575()
4219 		 */
4220 		igb->eims_mask |= eims;
4221 
4222 		vector++;
4223 	}
4224 
4225 	ASSERT(vector == igb->intr_cnt);
4226 
4227 	/*
4228 	 * Disable IAM for ICR interrupt bits
4229 	 */
4230 	E1000_WRITE_REG(hw, E1000_IAM, 0);
4231 	E1000_WRITE_FLUSH(hw);
4232 }
4233 
4234 /*
4235  * igb_setup_msix_82576 - setup 82576 adapter to use MSI-X interrupts
4236  *
4237  * 82576 uses a table based method for assigning vectors.  Each queue has a
4238  * single entry in the table to which we write a vector number along with a
4239  * "valid" bit.  The entry is a single byte in a 4-byte register.  Vectors
4240  * take a different position in the 4-byte register depending on whether
4241  * they are numbered above or below 8.
4242  */
4243 static void
4244 igb_setup_msix_82576(igb_t *igb)
4245 {
4246 	struct e1000_hw *hw = &igb->hw;
4247 	uint32_t ivar, index, vector;
4248 	int i;
4249 
4250 	/* must enable msi-x capability before IVAR settings */
4251 	E1000_WRITE_REG(hw, E1000_GPIE,
4252 	    (E1000_GPIE_MSIX_MODE | E1000_GPIE_PBA | E1000_GPIE_NSICR));
4253 
4254 	/*
4255 	 * Set vector for tx ring 0 and other causes.
4256 	 * NOTE assumption that it is vector 0.
4257 	 * This is also interdependent with installation of interrupt service
4258 	 * routines in igb_add_intr_handlers().
4259 	 */
4260 
4261 	/* assign "other" causes to vector 0 */
4262 	vector = 0;
4263 	ivar = ((vector | E1000_IVAR_VALID) << 8);
4264 	E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
4265 
4266 	/* assign tx ring 0 to vector 0 */
4267 	ivar = ((vector | E1000_IVAR_VALID) << 8);
4268 	E1000_WRITE_REG(hw, E1000_IVAR0, ivar);
4269 
4270 	/* prepare to enable tx & other interrupt causes */
4271 	igb->eims_mask = (1 << vector);
4272 
4273 	vector ++;
4274 	for (i = 0; i < igb->num_rx_rings; i++) {
4275 		/*
4276 		 * Set vector for each rx ring
4277 		 */
4278 		index = (i & 0x7);
4279 		ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
4280 
4281 		if (i < 8) {
4282 			/* vector goes into low byte of register */
4283 			ivar = ivar & 0xFFFFFF00;
4284 			ivar |= (vector | E1000_IVAR_VALID);
4285 		} else {
4286 			/* vector goes into third byte of register */
4287 			ivar = ivar & 0xFF00FFFF;
4288 			ivar |= ((vector | E1000_IVAR_VALID) << 16);
4289 		}
4290 		E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
4291 
4292 		/* Accumulate interrupt-cause bits to enable */
4293 		igb->eims_mask |= (1 << vector);
4294 
4295 		vector ++;
4296 	}
4297 
4298 	for (i = 1; i < igb->num_tx_rings; i++) {
4299 		/*
4300 		 * Set vector for each tx ring from 2nd tx ring.
4301 		 * Note assumption that tx vectors numericall follow rx vectors.
4302 		 */
4303 		index = (i & 0x7);
4304 		ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
4305 
4306 		if (i < 8) {
4307 			/* vector goes into second byte of register */
4308 			ivar = ivar & 0xFFFF00FF;
4309 			ivar |= ((vector | E1000_IVAR_VALID) << 8);
4310 		} else {
4311 			/* vector goes into fourth byte of register */
4312 			ivar = ivar & 0x00FFFFFF;
4313 			ivar |= (vector | E1000_IVAR_VALID) << 24;
4314 		}
4315 		E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
4316 
4317 		/* Accumulate interrupt-cause bits to enable */
4318 		igb->eims_mask |= (1 << vector);
4319 
4320 		vector ++;
4321 	}
4322 
4323 	ASSERT(vector == igb->intr_cnt);
4324 }
4325 
4326 /*
4327  * igb_rem_intr_handlers - remove the interrupt handlers
4328  */
4329 static void
4330 igb_rem_intr_handlers(igb_t *igb)
4331 {
4332 	int i;
4333 	int rc;
4334 
4335 	for (i = 0; i < igb->intr_cnt; i++) {
4336 		rc = ddi_intr_remove_handler(igb->htable[i]);
4337 		if (rc != DDI_SUCCESS) {
4338 			IGB_DEBUGLOG_1(igb,
4339 			    "Remove intr handler failed: %d", rc);
4340 		}
4341 	}
4342 }
4343 
4344 /*
4345  * igb_rem_intrs - remove the allocated interrupts
4346  */
4347 static void
4348 igb_rem_intrs(igb_t *igb)
4349 {
4350 	int i;
4351 	int rc;
4352 
4353 	for (i = 0; i < igb->intr_cnt; i++) {
4354 		rc = ddi_intr_free(igb->htable[i]);
4355 		if (rc != DDI_SUCCESS) {
4356 			IGB_DEBUGLOG_1(igb,
4357 			    "Free intr failed: %d", rc);
4358 		}
4359 	}
4360 
4361 	kmem_free(igb->htable, igb->intr_size);
4362 	igb->htable = NULL;
4363 }
4364 
4365 /*
4366  * igb_enable_intrs - enable all the ddi interrupts
4367  */
4368 static int
4369 igb_enable_intrs(igb_t *igb)
4370 {
4371 	int i;
4372 	int rc;
4373 
4374 	/* Enable interrupts */
4375 	if (igb->intr_cap & DDI_INTR_FLAG_BLOCK) {
4376 		/* Call ddi_intr_block_enable() for MSI */
4377 		rc = ddi_intr_block_enable(igb->htable, igb->intr_cnt);
4378 		if (rc != DDI_SUCCESS) {
4379 			igb_log(igb,
4380 			    "Enable block intr failed: %d", rc);
4381 			return (IGB_FAILURE);
4382 		}
4383 	} else {
4384 		/* Call ddi_intr_enable() for Legacy/MSI non block enable */
4385 		for (i = 0; i < igb->intr_cnt; i++) {
4386 			rc = ddi_intr_enable(igb->htable[i]);
4387 			if (rc != DDI_SUCCESS) {
4388 				igb_log(igb,
4389 				    "Enable intr failed: %d", rc);
4390 				return (IGB_FAILURE);
4391 			}
4392 		}
4393 	}
4394 
4395 	return (IGB_SUCCESS);
4396 }
4397 
4398 /*
4399  * igb_disable_intrs - disable all the ddi interrupts
4400  */
4401 static int
4402 igb_disable_intrs(igb_t *igb)
4403 {
4404 	int i;
4405 	int rc;
4406 
4407 	/* Disable all interrupts */
4408 	if (igb->intr_cap & DDI_INTR_FLAG_BLOCK) {
4409 		rc = ddi_intr_block_disable(igb->htable, igb->intr_cnt);
4410 		if (rc != DDI_SUCCESS) {
4411 			igb_log(igb,
4412 			    "Disable block intr failed: %d", rc);
4413 			return (IGB_FAILURE);
4414 		}
4415 	} else {
4416 		for (i = 0; i < igb->intr_cnt; i++) {
4417 			rc = ddi_intr_disable(igb->htable[i]);
4418 			if (rc != DDI_SUCCESS) {
4419 				igb_log(igb,
4420 				    "Disable intr failed: %d", rc);
4421 				return (IGB_FAILURE);
4422 			}
4423 		}
4424 	}
4425 
4426 	return (IGB_SUCCESS);
4427 }
4428 
4429 /*
4430  * igb_get_phy_state - Get and save the parameters read from PHY registers
4431  */
4432 static void
4433 igb_get_phy_state(igb_t *igb)
4434 {
4435 	struct e1000_hw *hw = &igb->hw;
4436 	uint16_t phy_ctrl;
4437 	uint16_t phy_status;
4438 	uint16_t phy_an_adv;
4439 	uint16_t phy_an_exp;
4440 	uint16_t phy_ext_status;
4441 	uint16_t phy_1000t_ctrl;
4442 	uint16_t phy_1000t_status;
4443 	uint16_t phy_lp_able;
4444 
4445 	ASSERT(mutex_owned(&igb->gen_lock));
4446 
4447 	(void) e1000_read_phy_reg(hw, PHY_CONTROL, &phy_ctrl);
4448 	(void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status);
4449 	(void) e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &phy_an_adv);
4450 	(void) e1000_read_phy_reg(hw, PHY_AUTONEG_EXP, &phy_an_exp);
4451 	(void) e1000_read_phy_reg(hw, PHY_EXT_STATUS, &phy_ext_status);
4452 	(void) e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_1000t_ctrl);
4453 	(void) e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_1000t_status);
4454 	(void) e1000_read_phy_reg(hw, PHY_LP_ABILITY, &phy_lp_able);
4455 
4456 	igb->param_autoneg_cap =
4457 	    (phy_status & MII_SR_AUTONEG_CAPS) ? 1 : 0;
4458 	igb->param_pause_cap =
4459 	    (phy_an_adv & NWAY_AR_PAUSE) ? 1 : 0;
4460 	igb->param_asym_pause_cap =
4461 	    (phy_an_adv & NWAY_AR_ASM_DIR) ? 1 : 0;
4462 	igb->param_1000fdx_cap = ((phy_ext_status & IEEE_ESR_1000T_FD_CAPS) ||
4463 	    (phy_ext_status & IEEE_ESR_1000X_FD_CAPS)) ? 1 : 0;
4464 	igb->param_1000hdx_cap = ((phy_ext_status & IEEE_ESR_1000T_HD_CAPS) ||
4465 	    (phy_ext_status & IEEE_ESR_1000X_HD_CAPS)) ? 1 : 0;
4466 	igb->param_100t4_cap =
4467 	    (phy_status & MII_SR_100T4_CAPS) ? 1 : 0;
4468 	igb->param_100fdx_cap = ((phy_status & MII_SR_100X_FD_CAPS) ||
4469 	    (phy_status & MII_SR_100T2_FD_CAPS)) ? 1 : 0;
4470 	igb->param_100hdx_cap = ((phy_status & MII_SR_100X_HD_CAPS) ||
4471 	    (phy_status & MII_SR_100T2_HD_CAPS)) ? 1 : 0;
4472 	igb->param_10fdx_cap =
4473 	    (phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0;
4474 	igb->param_10hdx_cap =
4475 	    (phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0;
4476 	igb->param_rem_fault =
4477 	    (phy_status & MII_SR_REMOTE_FAULT) ? 1 : 0;
4478 
4479 	igb->param_adv_autoneg_cap = hw->mac.autoneg;
4480 	igb->param_adv_pause_cap =
4481 	    (phy_an_adv & NWAY_AR_PAUSE) ? 1 : 0;
4482 	igb->param_adv_asym_pause_cap =
4483 	    (phy_an_adv & NWAY_AR_ASM_DIR) ? 1 : 0;
4484 	igb->param_adv_1000hdx_cap =
4485 	    (phy_1000t_ctrl & CR_1000T_HD_CAPS) ? 1 : 0;
4486 	igb->param_adv_100t4_cap =
4487 	    (phy_an_adv & NWAY_AR_100T4_CAPS) ? 1 : 0;
4488 	igb->param_adv_rem_fault =
4489 	    (phy_an_adv & NWAY_AR_REMOTE_FAULT) ? 1 : 0;
4490 	if (igb->param_adv_autoneg_cap == 1) {
4491 		igb->param_adv_1000fdx_cap =
4492 		    (phy_1000t_ctrl & CR_1000T_FD_CAPS) ? 1 : 0;
4493 		igb->param_adv_100fdx_cap =
4494 		    (phy_an_adv & NWAY_AR_100TX_FD_CAPS) ? 1 : 0;
4495 		igb->param_adv_100hdx_cap =
4496 		    (phy_an_adv & NWAY_AR_100TX_HD_CAPS) ? 1 : 0;
4497 		igb->param_adv_10fdx_cap =
4498 		    (phy_an_adv & NWAY_AR_10T_FD_CAPS) ? 1 : 0;
4499 		igb->param_adv_10hdx_cap =
4500 		    (phy_an_adv & NWAY_AR_10T_HD_CAPS) ? 1 : 0;
4501 	}
4502 
4503 	igb->param_lp_autoneg_cap =
4504 	    (phy_an_exp & NWAY_ER_LP_NWAY_CAPS) ? 1 : 0;
4505 	igb->param_lp_pause_cap =
4506 	    (phy_lp_able & NWAY_LPAR_PAUSE) ? 1 : 0;
4507 	igb->param_lp_asym_pause_cap =
4508 	    (phy_lp_able & NWAY_LPAR_ASM_DIR) ? 1 : 0;
4509 	igb->param_lp_1000fdx_cap =
4510 	    (phy_1000t_status & SR_1000T_LP_FD_CAPS) ? 1 : 0;
4511 	igb->param_lp_1000hdx_cap =
4512 	    (phy_1000t_status & SR_1000T_LP_HD_CAPS) ? 1 : 0;
4513 	igb->param_lp_100t4_cap =
4514 	    (phy_lp_able & NWAY_LPAR_100T4_CAPS) ? 1 : 0;
4515 	igb->param_lp_100fdx_cap =
4516 	    (phy_lp_able & NWAY_LPAR_100TX_FD_CAPS) ? 1 : 0;
4517 	igb->param_lp_100hdx_cap =
4518 	    (phy_lp_able & NWAY_LPAR_100TX_HD_CAPS) ? 1 : 0;
4519 	igb->param_lp_10fdx_cap =
4520 	    (phy_lp_able & NWAY_LPAR_10T_FD_CAPS) ? 1 : 0;
4521 	igb->param_lp_10hdx_cap =
4522 	    (phy_lp_able & NWAY_LPAR_10T_HD_CAPS) ? 1 : 0;
4523 	igb->param_lp_rem_fault =
4524 	    (phy_lp_able & NWAY_LPAR_REMOTE_FAULT) ? 1 : 0;
4525 }
4526 
4527 /*
4528  * igb_get_driver_control
4529  */
4530 static void
4531 igb_get_driver_control(struct e1000_hw *hw)
4532 {
4533 	uint32_t ctrl_ext;
4534 
4535 	/* Notify firmware that driver is in control of device */
4536 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
4537 	ctrl_ext |= E1000_CTRL_EXT_DRV_LOAD;
4538 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
4539 }
4540 
4541 /*
4542  * igb_release_driver_control
4543  */
4544 static void
4545 igb_release_driver_control(struct e1000_hw *hw)
4546 {
4547 	uint32_t ctrl_ext;
4548 
4549 	/* Notify firmware that driver is no longer in control of device */
4550 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
4551 	ctrl_ext &= ~E1000_CTRL_EXT_DRV_LOAD;
4552 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
4553 }
4554 
4555 /*
4556  * igb_atomic_reserve - Atomic decrease operation
4557  */
4558 int
4559 igb_atomic_reserve(uint32_t *count_p, uint32_t n)
4560 {
4561 	uint32_t oldval;
4562 	uint32_t newval;
4563 
4564 	/* ATOMICALLY */
4565 	do {
4566 		oldval = *count_p;
4567 		if (oldval < n)
4568 			return (-1);
4569 		newval = oldval - n;
4570 	} while (atomic_cas_32(count_p, oldval, newval) != oldval);
4571 
4572 	return (newval);
4573 }
4574 
4575 /*
4576  * FMA support
4577  */
4578 
4579 int
4580 igb_check_acc_handle(ddi_acc_handle_t handle)
4581 {
4582 	ddi_fm_error_t de;
4583 
4584 	ddi_fm_acc_err_get(handle, &de, DDI_FME_VERSION);
4585 	ddi_fm_acc_err_clear(handle, DDI_FME_VERSION);
4586 	return (de.fme_status);
4587 }
4588 
4589 int
4590 igb_check_dma_handle(ddi_dma_handle_t handle)
4591 {
4592 	ddi_fm_error_t de;
4593 
4594 	ddi_fm_dma_err_get(handle, &de, DDI_FME_VERSION);
4595 	return (de.fme_status);
4596 }
4597 
4598 /*
4599  * The IO fault service error handling callback function
4600  */
4601 /*ARGSUSED*/
4602 static int
4603 igb_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data)
4604 {
4605 	/*
4606 	 * as the driver can always deal with an error in any dma or
4607 	 * access handle, we can just return the fme_status value.
4608 	 */
4609 	pci_ereport_post(dip, err, NULL);
4610 	return (err->fme_status);
4611 }
4612 
4613 static void
4614 igb_fm_init(igb_t *igb)
4615 {
4616 	ddi_iblock_cookie_t iblk;
4617 	int fma_acc_flag, fma_dma_flag;
4618 
4619 	/* Only register with IO Fault Services if we have some capability */
4620 	if (igb->fm_capabilities & DDI_FM_ACCCHK_CAPABLE) {
4621 		igb_regs_acc_attr.devacc_attr_access = DDI_FLAGERR_ACC;
4622 		fma_acc_flag = 1;
4623 	} else {
4624 		igb_regs_acc_attr.devacc_attr_access = DDI_DEFAULT_ACC;
4625 		fma_acc_flag = 0;
4626 	}
4627 
4628 	if (igb->fm_capabilities & DDI_FM_DMACHK_CAPABLE) {
4629 		fma_dma_flag = 1;
4630 	} else {
4631 		fma_dma_flag = 0;
4632 	}
4633 
4634 	(void) igb_set_fma_flags(fma_acc_flag, fma_dma_flag);
4635 
4636 	if (igb->fm_capabilities) {
4637 
4638 		/* Register capabilities with IO Fault Services */
4639 		ddi_fm_init(igb->dip, &igb->fm_capabilities, &iblk);
4640 
4641 		/*
4642 		 * Initialize pci ereport capabilities if ereport capable
4643 		 */
4644 		if (DDI_FM_EREPORT_CAP(igb->fm_capabilities) ||
4645 		    DDI_FM_ERRCB_CAP(igb->fm_capabilities))
4646 			pci_ereport_setup(igb->dip);
4647 
4648 		/*
4649 		 * Register error callback if error callback capable
4650 		 */
4651 		if (DDI_FM_ERRCB_CAP(igb->fm_capabilities))
4652 			ddi_fm_handler_register(igb->dip,
4653 			    igb_fm_error_cb, (void*) igb);
4654 	}
4655 }
4656 
4657 static void
4658 igb_fm_fini(igb_t *igb)
4659 {
4660 	/* Only unregister FMA capabilities if we registered some */
4661 	if (igb->fm_capabilities) {
4662 
4663 		/*
4664 		 * Release any resources allocated by pci_ereport_setup()
4665 		 */
4666 		if (DDI_FM_EREPORT_CAP(igb->fm_capabilities) ||
4667 		    DDI_FM_ERRCB_CAP(igb->fm_capabilities))
4668 			pci_ereport_teardown(igb->dip);
4669 
4670 		/*
4671 		 * Un-register error callback if error callback capable
4672 		 */
4673 		if (DDI_FM_ERRCB_CAP(igb->fm_capabilities))
4674 			ddi_fm_handler_unregister(igb->dip);
4675 
4676 		/* Unregister from IO Fault Services */
4677 		ddi_fm_fini(igb->dip);
4678 	}
4679 }
4680 
4681 void
4682 igb_fm_ereport(igb_t *igb, char *detail)
4683 {
4684 	uint64_t ena;
4685 	char buf[FM_MAX_CLASS];
4686 
4687 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
4688 	ena = fm_ena_generate(0, FM_ENA_FMT1);
4689 	if (DDI_FM_EREPORT_CAP(igb->fm_capabilities)) {
4690 		ddi_fm_ereport_post(igb->dip, buf, ena, DDI_NOSLEEP,
4691 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, NULL);
4692 	}
4693 }
4694