xref: /dragonfly/sys/dev/netif/ix/if_ix.c (revision 7bcb6caf)
1 /*
2  * Copyright (c) 2001-2017, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *
15  *  3. Neither the name of the Intel Corporation nor the names of its
16  *     contributors may be used to endorse or promote products derived from
17  *     this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_ifpoll.h"
33 #include "opt_ix.h"
34 
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/endian.h>
38 #include <sys/interrupt.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/proc.h>
43 #include <sys/rman.h>
44 #include <sys/serialize.h>
45 #include <sys/serialize2.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 #include <sys/taskqueue.h>
51 
52 #include <net/bpf.h>
53 #include <net/ethernet.h>
54 #include <net/if.h>
55 #include <net/if_arp.h>
56 #include <net/if_dl.h>
57 #include <net/if_media.h>
58 #include <net/ifq_var.h>
59 #include <net/if_ringmap.h>
60 #include <net/toeplitz.h>
61 #include <net/toeplitz2.h>
62 #include <net/vlan/if_vlan_var.h>
63 #include <net/vlan/if_vlan_ether.h>
64 #include <net/if_poll.h>
65 
66 #include <netinet/in_systm.h>
67 #include <netinet/in.h>
68 #include <netinet/ip.h>
69 
70 #include <bus/pci/pcivar.h>
71 #include <bus/pci/pcireg.h>
72 
73 #include <dev/netif/ix/ixgbe_common.h>
74 #include <dev/netif/ix/ixgbe_api.h>
75 #include <dev/netif/ix/if_ix.h>
76 
77 #define IX_IFM_DEFAULT		(IFM_ETHER | IFM_AUTO)
78 
79 #ifdef IX_RSS_DEBUG
80 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...) \
81 do { \
82 	if (sc->rss_debug >= lvl) \
83 		if_printf(&sc->arpcom.ac_if, fmt, __VA_ARGS__); \
84 } while (0)
85 #else	/* !IX_RSS_DEBUG */
86 #define IX_RSS_DPRINTF(sc, lvl, fmt, ...)	((void)0)
87 #endif	/* IX_RSS_DEBUG */
88 
89 #define IX_NAME			"Intel(R) PRO/10GbE "
90 #define IX_DEVICE(id) \
91 	{ IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_##id, IX_NAME #id }
92 #define IX_DEVICE_NULL		{ 0, 0, NULL }
93 
94 static struct ix_device {
95 	uint16_t	vid;
96 	uint16_t	did;
97 	const char	*desc;
98 } ix_devices[] = {
99 	IX_DEVICE(82598AF_DUAL_PORT),
100 	IX_DEVICE(82598AF_SINGLE_PORT),
101 	IX_DEVICE(82598EB_CX4),
102 	IX_DEVICE(82598AT),
103 	IX_DEVICE(82598AT2),
104 	IX_DEVICE(82598),
105 	IX_DEVICE(82598_DA_DUAL_PORT),
106 	IX_DEVICE(82598_CX4_DUAL_PORT),
107 	IX_DEVICE(82598EB_XF_LR),
108 	IX_DEVICE(82598_SR_DUAL_PORT_EM),
109 	IX_DEVICE(82598EB_SFP_LOM),
110 	IX_DEVICE(82599_KX4),
111 	IX_DEVICE(82599_KX4_MEZZ),
112 	IX_DEVICE(82599_SFP),
113 	IX_DEVICE(82599_XAUI_LOM),
114 	IX_DEVICE(82599_CX4),
115 	IX_DEVICE(82599_T3_LOM),
116 	IX_DEVICE(82599_COMBO_BACKPLANE),
117 	IX_DEVICE(82599_BACKPLANE_FCOE),
118 	IX_DEVICE(82599_SFP_SF2),
119 	IX_DEVICE(82599_SFP_FCOE),
120 	IX_DEVICE(82599EN_SFP),
121 	IX_DEVICE(82599_SFP_SF_QP),
122 	IX_DEVICE(82599_QSFP_SF_QP),
123 	IX_DEVICE(X540T),
124 	IX_DEVICE(X540T1),
125 	IX_DEVICE(X550T),
126 	IX_DEVICE(X550T1),
127 	IX_DEVICE(X550EM_X_KR),
128 	IX_DEVICE(X550EM_X_KX4),
129 	IX_DEVICE(X550EM_X_10G_T),
130 	IX_DEVICE(X550EM_X_1G_T),
131 	IX_DEVICE(X550EM_X_SFP),
132 	IX_DEVICE(X550EM_A_KR),
133 	IX_DEVICE(X550EM_A_KR_L),
134 	IX_DEVICE(X550EM_A_SFP),
135 	IX_DEVICE(X550EM_A_SFP_N),
136 	IX_DEVICE(X550EM_A_SGMII),
137 	IX_DEVICE(X550EM_A_SGMII_L),
138 	IX_DEVICE(X550EM_A_10G_T),
139 	IX_DEVICE(X550EM_A_1G_T),
140 	IX_DEVICE(X550EM_A_1G_T_L),
141 #if 0
142 	IX_DEVICE(X540_BYPASS),
143 	IX_DEVICE(82599_BYPASS),
144 #endif
145 
146 	/* required last entry */
147 	IX_DEVICE_NULL
148 };
149 
150 static int	ix_probe(device_t);
151 static int	ix_attach(device_t);
152 static int	ix_detach(device_t);
153 static int	ix_shutdown(device_t);
154 
155 static void	ix_serialize(struct ifnet *, enum ifnet_serialize);
156 static void	ix_deserialize(struct ifnet *, enum ifnet_serialize);
157 static int	ix_tryserialize(struct ifnet *, enum ifnet_serialize);
158 #ifdef INVARIANTS
159 static void	ix_serialize_assert(struct ifnet *, enum ifnet_serialize,
160 		    boolean_t);
161 #endif
162 static void	ix_start(struct ifnet *, struct ifaltq_subque *);
163 static void	ix_watchdog(struct ifaltq_subque *);
164 static int	ix_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
165 static void	ix_init(void *);
166 static void	ix_stop(struct ix_softc *);
167 static void	ix_media_status(struct ifnet *, struct ifmediareq *);
168 static int	ix_media_change(struct ifnet *);
169 static void	ix_timer(void *);
170 #ifdef IFPOLL_ENABLE
171 static void	ix_npoll(struct ifnet *, struct ifpoll_info *);
172 static void	ix_npoll_rx(struct ifnet *, void *, int);
173 static void	ix_npoll_rx_direct(struct ifnet *, void *, int);
174 static void	ix_npoll_tx(struct ifnet *, void *, int);
175 static void	ix_npoll_status(struct ifnet *);
176 #endif
177 
178 static void	ix_add_sysctl(struct ix_softc *);
179 static void	ix_add_intr_rate_sysctl(struct ix_softc *, int,
180 		    const char *, int (*)(SYSCTL_HANDLER_ARGS), const char *);
181 static int	ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
182 static int	ix_sysctl_tx_nmbuf(SYSCTL_HANDLER_ARGS);
183 static int	ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS);
184 static int	ix_sysctl_txd(SYSCTL_HANDLER_ARGS);
185 static int	ix_sysctl_rxd(SYSCTL_HANDLER_ARGS);
186 static int	ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS);
187 static int	ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int);
188 static int	ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS);
189 static int	ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS);
190 static int	ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS);
191 static int	ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS);
192 #if 0
193 static void     ix_add_hw_stats(struct ix_softc *);
194 #endif
195 
196 static void	ix_watchdog_reset(struct ix_softc *);
197 static void	ix_watchdog_task(void *, int);
198 static void	ix_sync_netisr(struct ix_softc *, int);
199 static void	ix_slot_info(struct ix_softc *);
200 static int	ix_alloc_rings(struct ix_softc *);
201 static void	ix_free_rings(struct ix_softc *);
202 static void	ix_setup_ifp(struct ix_softc *);
203 static void	ix_setup_serialize(struct ix_softc *);
204 static void	ix_setup_caps(struct ix_softc *);
205 static void	ix_set_ring_inuse(struct ix_softc *, boolean_t);
206 static void	ix_set_timer_cpuid(struct ix_softc *, boolean_t);
207 static void	ix_update_stats(struct ix_softc *);
208 static void	ix_detect_fanfail(struct ix_softc *, uint32_t, boolean_t);
209 
210 static void	ix_set_promisc(struct ix_softc *);
211 static void	ix_set_multi(struct ix_softc *);
212 static void	ix_set_vlan(struct ix_softc *);
213 static uint8_t	*ix_mc_array_itr(struct ixgbe_hw *, uint8_t **, uint32_t *);
214 static enum ixgbe_fc_mode ix_ifmedia2fc(int);
215 static const char *ix_ifmedia2str(int);
216 static const char *ix_fc2str(enum ixgbe_fc_mode);
217 
218 static void	ix_get_txring_cnt(const struct ix_softc *, int *, int *);
219 static int	ix_get_txring_inuse(const struct ix_softc *, boolean_t);
220 static void	ix_init_tx_ring(struct ix_tx_ring *);
221 static void	ix_free_tx_ring(struct ix_tx_ring *);
222 static int	ix_create_tx_ring(struct ix_tx_ring *);
223 static void	ix_destroy_tx_ring(struct ix_tx_ring *, int);
224 static void	ix_init_tx_unit(struct ix_softc *);
225 static int	ix_encap(struct ix_tx_ring *, struct mbuf **,
226 		    uint16_t *, int *);
227 static int	ix_tx_ctx_setup(struct ix_tx_ring *,
228 		    const struct mbuf *, uint32_t *, uint32_t *);
229 static int	ix_tso_ctx_setup(struct ix_tx_ring *,
230 		    const struct mbuf *, uint32_t *, uint32_t *);
231 static void	ix_txeof(struct ix_tx_ring *, int);
232 static void	ix_txgc(struct ix_tx_ring *);
233 static void	ix_txgc_timer(void *);
234 
235 static void	ix_get_rxring_cnt(const struct ix_softc *, int *, int *);
236 static int	ix_get_rxring_inuse(const struct ix_softc *, boolean_t);
237 static int	ix_init_rx_ring(struct ix_rx_ring *);
238 static void	ix_free_rx_ring(struct ix_rx_ring *);
239 static int	ix_create_rx_ring(struct ix_rx_ring *);
240 static void	ix_destroy_rx_ring(struct ix_rx_ring *, int);
241 static void	ix_init_rx_unit(struct ix_softc *, boolean_t);
242 #if 0
243 static void	ix_setup_hw_rsc(struct ix_rx_ring *);
244 #endif
245 static int	ix_newbuf(struct ix_rx_ring *, int, boolean_t);
246 static void	ix_rxeof(struct ix_rx_ring *, int);
247 static void	ix_rx_discard(struct ix_rx_ring *, int, boolean_t);
248 static void	ix_enable_rx_drop(struct ix_softc *);
249 static void	ix_disable_rx_drop(struct ix_softc *);
250 
251 static void	ix_config_gpie(struct ix_softc *);
252 static void	ix_alloc_msix(struct ix_softc *);
253 static void	ix_free_msix(struct ix_softc *, boolean_t);
254 static void	ix_setup_msix_eims(const struct ix_softc *, int,
255 		    uint32_t *, uint32_t *);
256 static int	ix_alloc_intr(struct ix_softc *);
257 static void	ix_free_intr(struct ix_softc *);
258 static int	ix_setup_intr(struct ix_softc *);
259 static void	ix_teardown_intr(struct ix_softc *, int);
260 static void	ix_enable_intr(struct ix_softc *);
261 static void	ix_disable_intr(struct ix_softc *);
262 static void	ix_set_ivar(struct ix_softc *, uint8_t, uint8_t, int8_t);
263 static void	ix_set_eitr(struct ix_softc *, int, int);
264 static void	ix_intr_status(struct ix_softc *, uint32_t);
265 static void	ix_intr_82598(void *);
266 static void	ix_intr(void *);
267 static void	ix_msix_rxtx(void *);
268 static void	ix_msix_rx(void *);
269 static void	ix_msix_tx(void *);
270 static void	ix_msix_status(void *);
271 
272 static void	ix_config_link(struct ix_softc *);
273 static boolean_t ix_sfp_probe(struct ix_softc *);
274 static boolean_t ix_is_sfp(struct ixgbe_hw *);
275 static void	ix_update_link_status(struct ix_softc *);
276 static void	ix_handle_link(struct ix_softc *);
277 static void	ix_handle_mod(struct ix_softc *);
278 static void	ix_handle_msf(struct ix_softc *);
279 static void	ix_handle_phy(struct ix_softc *);
280 static int	ix_powerdown(struct ix_softc *);
281 static void	ix_config_flowctrl(struct ix_softc *);
282 static void	ix_config_dmac(struct ix_softc *);
283 static void	ix_init_media(struct ix_softc *);
284 
285 static device_method_t ix_methods[] = {
286 	/* Device interface */
287 	DEVMETHOD(device_probe,		ix_probe),
288 	DEVMETHOD(device_attach,	ix_attach),
289 	DEVMETHOD(device_detach,	ix_detach),
290 	DEVMETHOD(device_shutdown,	ix_shutdown),
291 	DEVMETHOD_END
292 };
293 
294 static driver_t ix_driver = {
295 	"ix",
296 	ix_methods,
297 	sizeof(struct ix_softc)
298 };
299 
300 static devclass_t ix_devclass;
301 
302 DECLARE_DUMMY_MODULE(if_ix);
303 DRIVER_MODULE(if_ix, pci, ix_driver, ix_devclass, NULL, NULL);
304 
305 static int	ix_msi_enable = 1;
306 static int	ix_msix_enable = 1;
307 static int	ix_rxr = 0;
308 static int	ix_txr = 0;
309 static int	ix_txd = IX_PERF_TXD;
310 static int	ix_rxd = IX_PERF_RXD;
311 static int	ix_unsupported_sfp = 0;
312 static int	ix_direct_input = 1;
313 
314 static char	ix_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_NONE;
315 
316 TUNABLE_INT("hw.ix.msi.enable", &ix_msi_enable);
317 TUNABLE_INT("hw.ix.msix.enable", &ix_msix_enable);
318 TUNABLE_INT("hw.ix.rxr", &ix_rxr);
319 TUNABLE_INT("hw.ix.txr", &ix_txr);
320 TUNABLE_INT("hw.ix.txd", &ix_txd);
321 TUNABLE_INT("hw.ix.rxd", &ix_rxd);
322 TUNABLE_INT("hw.ix.unsupported_sfp", &ix_unsupported_sfp);
323 TUNABLE_STR("hw.ix.flow_ctrl", ix_flowctrl, sizeof(ix_flowctrl));
324 TUNABLE_INT("hw.ix.direct_input", &ix_direct_input);
325 
326 /*
327  * Smart speed setting, default to on.  This only works
328  * as a compile option right now as its during attach,
329  * set this to 'ixgbe_smart_speed_off' to disable.
330  */
331 static const enum ixgbe_smart_speed ix_smart_speed =
332     ixgbe_smart_speed_on;
333 
334 static __inline void
335 ix_try_txgc(struct ix_tx_ring *txr, int8_t dec)
336 {
337 
338 	if (txr->tx_running > 0) {
339 		txr->tx_running -= dec;
340 		if (txr->tx_running <= 0 && txr->tx_nmbuf &&
341 		    txr->tx_avail < txr->tx_ndesc &&
342 		    txr->tx_avail + txr->tx_intr_nsegs > txr->tx_ndesc)
343 			ix_txgc(txr);
344 	}
345 }
346 
347 static void
348 ix_txgc_timer(void *xtxr)
349 {
350 	struct ix_tx_ring *txr = xtxr;
351 	struct ifnet *ifp = &txr->tx_sc->arpcom.ac_if;
352 
353 	if ((ifp->if_flags & (IFF_RUNNING | IFF_UP | IFF_NPOLLING)) !=
354 	    (IFF_RUNNING | IFF_UP))
355 		return;
356 
357 	if (!lwkt_serialize_try(&txr->tx_serialize))
358 		goto done;
359 
360 	if ((ifp->if_flags & (IFF_RUNNING | IFF_UP | IFF_NPOLLING)) !=
361 	    (IFF_RUNNING | IFF_UP)) {
362 		lwkt_serialize_exit(&txr->tx_serialize);
363 		return;
364 	}
365 	ix_try_txgc(txr, IX_TX_RUNNING_DEC);
366 
367 	lwkt_serialize_exit(&txr->tx_serialize);
368 done:
369 	callout_reset(&txr->tx_gc_timer, 1, ix_txgc_timer, txr);
370 }
371 
372 static __inline void
373 ix_tx_intr(struct ix_tx_ring *txr, int hdr)
374 {
375 
376 	ix_txeof(txr, hdr);
377 	if (!ifsq_is_empty(txr->tx_ifsq))
378 		ifsq_devstart(txr->tx_ifsq);
379 }
380 
381 static __inline void
382 ix_free_txbuf(struct ix_tx_ring *txr, struct ix_tx_buf *txbuf)
383 {
384 
385 	KKASSERT(txbuf->m_head != NULL);
386 	KKASSERT(txr->tx_nmbuf > 0);
387 	txr->tx_nmbuf--;
388 
389 	bus_dmamap_unload(txr->tx_tag, txbuf->map);
390 	m_freem(txbuf->m_head);
391 	txbuf->m_head = NULL;
392 }
393 
394 static int
395 ix_probe(device_t dev)
396 {
397 	const struct ix_device *d;
398 	uint16_t vid, did;
399 
400 	vid = pci_get_vendor(dev);
401 	did = pci_get_device(dev);
402 
403 	for (d = ix_devices; d->desc != NULL; ++d) {
404 		if (vid == d->vid && did == d->did) {
405 			device_set_desc(dev, d->desc);
406 			return 0;
407 		}
408 	}
409 	return ENXIO;
410 }
411 
412 static void
413 ix_get_rxring_cnt(const struct ix_softc *sc, int *ring_cnt, int *ring_cntmax)
414 {
415 
416 	switch (sc->hw.mac.type) {
417 	case ixgbe_mac_X550:
418 	case ixgbe_mac_X550EM_x:
419 	case ixgbe_mac_X550EM_a:
420 		*ring_cntmax = IX_MAX_RXRING_X550;
421 		break;
422 
423 	default:
424 		*ring_cntmax = IX_MAX_RXRING;
425 		break;
426 	}
427 	*ring_cnt = device_getenv_int(sc->dev, "rxr", ix_rxr);
428 }
429 
430 static void
431 ix_get_txring_cnt(const struct ix_softc *sc, int *ring_cnt, int *ring_cntmax)
432 {
433 
434 	switch (sc->hw.mac.type) {
435 	case ixgbe_mac_82598EB:
436 		*ring_cntmax = IX_MAX_TXRING_82598;
437 		break;
438 
439 	case ixgbe_mac_82599EB:
440 		*ring_cntmax = IX_MAX_TXRING_82599;
441 		break;
442 
443 	case ixgbe_mac_X540:
444 		*ring_cntmax = IX_MAX_TXRING_X540;
445 		break;
446 
447 	case ixgbe_mac_X550:
448 	case ixgbe_mac_X550EM_x:
449 	case ixgbe_mac_X550EM_a:
450 		*ring_cntmax = IX_MAX_TXRING_X550;
451 		break;
452 
453 	default:
454 		*ring_cntmax = IX_MAX_TXRING;
455 		break;
456 	}
457 	*ring_cnt = device_getenv_int(sc->dev, "txr", ix_txr);
458 }
459 
460 static int
461 ix_attach(device_t dev)
462 {
463 	struct ix_softc *sc = device_get_softc(dev);
464 	struct ixgbe_hw *hw;
465 	int error, ring_cnt, ring_cntmax;
466 	uint32_t ctrl_ext;
467 	char flowctrl[IFM_ETH_FC_STRLEN];
468 
469 	sc->dev = dev;
470 	hw = &sc->hw;
471 	hw->back = sc;
472 
473 	if_initname(&sc->arpcom.ac_if, device_get_name(dev),
474 	    device_get_unit(dev));
475 	ifmedia_init(&sc->media, IFM_IMASK | IFM_ETH_FCMASK,
476 	    ix_media_change, ix_media_status);
477 
478 	/* Save frame size */
479 	sc->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHER_CRC_LEN;
480 
481 	sc->direct_input = ix_direct_input;
482 	TASK_INIT(&sc->wdog_task, 0, ix_watchdog_task, sc);
483 
484 	callout_init_mp(&sc->timer);
485 	lwkt_serialize_init(&sc->main_serialize);
486 
487 	/*
488 	 * Save off the information about this board
489 	 */
490 	hw->vendor_id = pci_get_vendor(dev);
491 	hw->device_id = pci_get_device(dev);
492 	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
493 	hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2);
494 	hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2);
495 
496 	/* Enable bus mastering */
497 	pci_enable_busmaster(dev);
498 
499 	/*
500 	 * Allocate IO memory
501 	 */
502 	sc->mem_rid = PCIR_BAR(0);
503 	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
504 	    &sc->mem_rid, RF_ACTIVE);
505 	if (sc->mem_res == NULL) {
506 		device_printf(dev, "Unable to allocate bus resource: memory\n");
507 		error = ENXIO;
508 		goto failed;
509 	}
510 
511 	sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->mem_res);
512 	sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->mem_res);
513 
514 	sc->hw.hw_addr = (uint8_t *)&sc->osdep.mem_bus_space_handle;
515 
516 	/* Let hardware know driver is loaded */
517 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
518 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
519 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
520 
521 	/*
522 	 * Initialize the shared code
523 	 */
524 	if (ixgbe_init_shared_code(hw)) {
525 		device_printf(dev, "Unable to initialize the shared code\n");
526 		error = ENXIO;
527 		goto failed;
528 	}
529 
530 	if (hw->mbx.ops.init_params)
531 		hw->mbx.ops.init_params(hw);
532 
533 	hw->allow_unsupported_sfp = ix_unsupported_sfp;
534 
535 	/* Pick up the 82599 settings */
536 	if (hw->mac.type != ixgbe_mac_82598EB)
537 		hw->phy.smart_speed = ix_smart_speed;
538 
539 	/* Setup hardware capabilities */
540 	ix_setup_caps(sc);
541 
542 	/* Allocate multicast array memory. */
543 	sc->mta = kmalloc(sizeof(*sc->mta) * IX_MAX_MCASTADDR,
544 	    M_DEVBUF, M_WAITOK);
545 
546 	/* Save initial wake up filter configuration; WOL is disabled. */
547 	sc->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC);
548 
549 	/* Verify adapter fan is still functional (if applicable) */
550 	if (sc->caps & IX_CAP_DETECT_FANFAIL)
551 		ix_detect_fanfail(sc, IXGBE_READ_REG(hw, IXGBE_ESDP), FALSE);
552 
553 	/* Ensure SW/FW semaphore is free */
554 	ixgbe_init_swfw_semaphore(hw);
555 
556 #ifdef notyet
557 	/* Enable EEE power saving */
558 	if (sc->caps & IX_CAP_EEE)
559 		hw->mac.ops.setup_eee(hw, true);
560 #endif
561 
562 	/*
563 	 * Configure total supported RX/TX ring count
564 	 */
565 	ix_get_rxring_cnt(sc, &ring_cnt, &ring_cntmax);
566 	sc->rx_rmap = if_ringmap_alloc(dev, ring_cnt, ring_cntmax);
567 	ix_get_txring_cnt(sc, &ring_cnt, &ring_cntmax);
568 	sc->tx_rmap = if_ringmap_alloc(dev, ring_cnt, ring_cntmax);
569 	if_ringmap_match(dev, sc->rx_rmap, sc->tx_rmap);
570 
571 	sc->rx_ring_cnt = if_ringmap_count(sc->rx_rmap);
572 	sc->rx_ring_inuse = sc->rx_ring_cnt;
573 	sc->tx_ring_cnt = if_ringmap_count(sc->tx_rmap);
574 	sc->tx_ring_inuse = sc->tx_ring_cnt;
575 
576 	/* Allocate TX/RX rings */
577 	error = ix_alloc_rings(sc);
578 	if (error)
579 		goto failed;
580 
581 	/* Allocate interrupt */
582 	error = ix_alloc_intr(sc);
583 	if (error)
584 		goto failed;
585 
586 	/* Setup serializes */
587 	ix_setup_serialize(sc);
588 
589 	hw->phy.reset_if_overtemp = TRUE;
590 	error = ixgbe_reset_hw(hw);
591 	hw->phy.reset_if_overtemp = FALSE;
592 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
593 		/*
594 		 * No optics in this port; ask timer routine
595 		 * to probe for later insertion.
596 		 */
597 		sc->sfp_probe = TRUE;
598 		error = 0;
599 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
600 		device_printf(dev, "Unsupported SFP+ module detected!\n");
601 		error = EIO;
602 		goto failed;
603 	} else if (error) {
604 		device_printf(dev, "Hardware initialization failed\n");
605 		error = EIO;
606 		goto failed;
607 	}
608 
609 	/* Make sure we have a good EEPROM before we read from it */
610 	if (ixgbe_validate_eeprom_checksum(&sc->hw, NULL) < 0) {
611 		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
612 		error = EIO;
613 		goto failed;
614 	}
615 
616 	error = ixgbe_start_hw(hw);
617 	if (error == IXGBE_ERR_EEPROM_VERSION) {
618 		device_printf(dev, "Pre-production device detected\n");
619 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
620 		device_printf(dev, "Unsupported SFP+ Module\n");
621 		error = EIO;
622 		goto failed;
623 	} else if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
624 		device_printf(dev, "No SFP+ Module found\n");
625 	}
626 
627 	/* Enable the optics for 82599 SFP+ fiber */
628 	ixgbe_enable_tx_laser(hw);
629 
630 	/* Enable power to the phy. */
631 	ixgbe_set_phy_power(hw, TRUE);
632 
633 	sc->ifm_media = IX_IFM_DEFAULT;
634 	/* Get default flow control settings */
635 	device_getenv_string(dev, "flow_ctrl", flowctrl, sizeof(flowctrl),
636 	    ix_flowctrl);
637 	sc->ifm_media |= ifmedia_str2ethfc(flowctrl);
638 	sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
639 
640 	/* Setup OS specific network interface */
641 	ix_setup_ifp(sc);
642 
643 	/* Add sysctl tree */
644 	ix_add_sysctl(sc);
645 
646 	error = ix_setup_intr(sc);
647 	if (error) {
648 		ether_ifdetach(&sc->arpcom.ac_if);
649 		goto failed;
650 	}
651 
652 	/* Initialize statistics */
653 	ix_update_stats(sc);
654 
655 	/* Check PCIE slot type/speed/width */
656 	ix_slot_info(sc);
657 
658 	return 0;
659 failed:
660 	ix_detach(dev);
661 	return error;
662 }
663 
664 static int
665 ix_detach(device_t dev)
666 {
667 	struct ix_softc *sc = device_get_softc(dev);
668 
669 	if (device_is_attached(dev)) {
670 		struct ifnet *ifp = &sc->arpcom.ac_if;
671 
672 		ix_sync_netisr(sc, IFF_UP);
673 		taskqueue_drain(taskqueue_thread[0], &sc->wdog_task);
674 
675 		ifnet_serialize_all(ifp);
676 
677 		ix_powerdown(sc);
678 		ix_teardown_intr(sc, sc->intr_cnt);
679 
680 		ifnet_deserialize_all(ifp);
681 
682 		callout_terminate(&sc->timer);
683 		ether_ifdetach(ifp);
684 	}
685 
686 	if (sc->mem_res != NULL) {
687 		uint32_t ctrl_ext;
688 
689 		/* Let hardware know driver is unloading */
690 		ctrl_ext = IXGBE_READ_REG(&sc->hw, IXGBE_CTRL_EXT);
691 		ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
692 		IXGBE_WRITE_REG(&sc->hw, IXGBE_CTRL_EXT, ctrl_ext);
693 	}
694 
695 	ifmedia_removeall(&sc->media);
696 	bus_generic_detach(dev);
697 
698 	ix_free_intr(sc);
699 
700 	if (sc->msix_mem_res != NULL) {
701 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_mem_rid,
702 		    sc->msix_mem_res);
703 	}
704 	if (sc->mem_res != NULL) {
705 		bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid,
706 		    sc->mem_res);
707 	}
708 
709 	ix_free_rings(sc);
710 
711 	if (sc->mta != NULL)
712 		kfree(sc->mta, M_DEVBUF);
713 	if (sc->serializes != NULL)
714 		kfree(sc->serializes, M_DEVBUF);
715 
716 	if (sc->rx_rmap != NULL)
717 		if_ringmap_free(sc->rx_rmap);
718 	if (sc->rx_rmap_intr != NULL)
719 		if_ringmap_free(sc->rx_rmap_intr);
720 	if (sc->tx_rmap != NULL)
721 		if_ringmap_free(sc->tx_rmap);
722 	if (sc->tx_rmap_intr != NULL)
723 		if_ringmap_free(sc->tx_rmap_intr);
724 
725 	return 0;
726 }
727 
728 static int
729 ix_shutdown(device_t dev)
730 {
731 	struct ix_softc *sc = device_get_softc(dev);
732 	struct ifnet *ifp = &sc->arpcom.ac_if;
733 
734 	ix_sync_netisr(sc, IFF_UP);
735 	taskqueue_drain(taskqueue_thread[0], &sc->wdog_task);
736 
737 	ifnet_serialize_all(ifp);
738 	ix_powerdown(sc);
739 	ifnet_deserialize_all(ifp);
740 
741 	return 0;
742 }
743 
744 static void
745 ix_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
746 {
747 	struct ix_softc *sc = ifp->if_softc;
748 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
749 	int idx = -1;
750 	uint16_t nsegs;
751 
752 	KKASSERT(txr->tx_ifsq == ifsq);
753 	ASSERT_SERIALIZED(&txr->tx_serialize);
754 
755 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
756 		return;
757 
758 	if (!sc->link_active || (txr->tx_flags & IX_TXFLAG_ENABLED) == 0) {
759 		ifsq_purge(ifsq);
760 		return;
761 	}
762 
763 	while (!ifsq_is_empty(ifsq)) {
764 		struct mbuf *m_head;
765 
766 		if (txr->tx_avail <= IX_MAX_SCATTER + IX_TX_RESERVED) {
767 			ifsq_set_oactive(ifsq);
768 			txr->tx_watchdog.wd_timer = 5;
769 			break;
770 		}
771 
772 		m_head = ifsq_dequeue(ifsq);
773 		if (m_head == NULL)
774 			break;
775 
776 		if (ix_encap(txr, &m_head, &nsegs, &idx)) {
777 			IFNET_STAT_INC(ifp, oerrors, 1);
778 			continue;
779 		}
780 
781 		/*
782 		 * TX interrupt are aggressively aggregated, so increasing
783 		 * opackets at TX interrupt time will make the opackets
784 		 * statistics vastly inaccurate; we do the opackets increment
785 		 * now.
786 		 */
787 		IFNET_STAT_INC(ifp, opackets, 1);
788 
789 		if (nsegs >= txr->tx_wreg_nsegs) {
790 			IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
791 			nsegs = 0;
792 			idx = -1;
793 		}
794 
795 		ETHER_BPF_MTAP(ifp, m_head);
796 	}
797 	if (idx >= 0)
798 		IXGBE_WRITE_REG(&sc->hw, IXGBE_TDT(txr->tx_idx), idx);
799 	txr->tx_running = IX_TX_RUNNING;
800 }
801 
802 static int
803 ix_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
804 {
805 	struct ix_softc *sc = ifp->if_softc;
806 	struct ifreq *ifr = (struct ifreq *) data;
807 	int error = 0, mask, reinit;
808 
809 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
810 
811 	switch (command) {
812 	case SIOCSIFMTU:
813 		if (ifr->ifr_mtu > IX_MAX_MTU) {
814 			error = EINVAL;
815 		} else {
816 			ifp->if_mtu = ifr->ifr_mtu;
817 			sc->max_frame_size = ifp->if_mtu + IX_MTU_HDR;
818 			ix_init(sc);
819 		}
820 		break;
821 
822 	case SIOCSIFFLAGS:
823 		if (ifp->if_flags & IFF_UP) {
824 			if (ifp->if_flags & IFF_RUNNING) {
825 				if ((ifp->if_flags ^ sc->if_flags) &
826 				    (IFF_PROMISC | IFF_ALLMULTI))
827 					ix_set_promisc(sc);
828 			} else {
829 				ix_init(sc);
830 			}
831 		} else if (ifp->if_flags & IFF_RUNNING) {
832 			ix_stop(sc);
833 		}
834 		sc->if_flags = ifp->if_flags;
835 		break;
836 
837 	case SIOCADDMULTI:
838 	case SIOCDELMULTI:
839 		if (ifp->if_flags & IFF_RUNNING) {
840 			ix_disable_intr(sc);
841 			ix_set_multi(sc);
842 #ifdef IFPOLL_ENABLE
843 			if ((ifp->if_flags & IFF_NPOLLING) == 0)
844 #endif
845 				ix_enable_intr(sc);
846 		}
847 		break;
848 
849 	case SIOCSIFMEDIA:
850 	case SIOCGIFMEDIA:
851 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
852 		break;
853 
854 	case SIOCSIFCAP:
855 		reinit = 0;
856 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
857 		if (mask & IFCAP_RXCSUM) {
858 			ifp->if_capenable ^= IFCAP_RXCSUM;
859 			reinit = 1;
860 		}
861 		if (mask & IFCAP_VLAN_HWTAGGING) {
862 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
863 			reinit = 1;
864 		}
865 		if (mask & IFCAP_TXCSUM) {
866 			ifp->if_capenable ^= IFCAP_TXCSUM;
867 			if (ifp->if_capenable & IFCAP_TXCSUM)
868 				ifp->if_hwassist |= CSUM_OFFLOAD;
869 			else
870 				ifp->if_hwassist &= ~CSUM_OFFLOAD;
871 		}
872 		if (mask & IFCAP_TSO) {
873 			ifp->if_capenable ^= IFCAP_TSO;
874 			if (ifp->if_capenable & IFCAP_TSO)
875 				ifp->if_hwassist |= CSUM_TSO;
876 			else
877 				ifp->if_hwassist &= ~CSUM_TSO;
878 		}
879 		if (mask & IFCAP_RSS)
880 			ifp->if_capenable ^= IFCAP_RSS;
881 		if (reinit && (ifp->if_flags & IFF_RUNNING))
882 			ix_init(sc);
883 		break;
884 
885 #if 0
886 	case SIOCGI2C:
887 	{
888 		struct ixgbe_i2c_req	i2c;
889 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
890 		if (error)
891 			break;
892 		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
893 			error = EINVAL;
894 			break;
895 		}
896 		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
897 		    i2c.dev_addr, i2c.data);
898 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
899 		break;
900 	}
901 #endif
902 
903 	default:
904 		error = ether_ioctl(ifp, command, data);
905 		break;
906 	}
907 	return error;
908 }
909 
910 #define IXGBE_MHADD_MFS_SHIFT 16
911 
912 static void
913 ix_init(void *xsc)
914 {
915 	struct ix_softc *sc = xsc;
916 	struct ifnet *ifp = &sc->arpcom.ac_if;
917 	struct ixgbe_hw *hw = &sc->hw;
918 	uint32_t rxctrl;
919 	int i, error;
920 	boolean_t polling;
921 
922 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
923 
924 	ix_stop(sc);
925 
926 	polling = FALSE;
927 #ifdef IFPOLL_ENABLE
928 	if (ifp->if_flags & IFF_NPOLLING)
929 		polling = TRUE;
930 #endif
931 
932 	/* Configure # of used RX/TX rings */
933 	ix_set_ring_inuse(sc, polling);
934 	ifq_set_subq_divisor(&ifp->if_snd, sc->tx_ring_inuse);
935 
936 	/* Get the latest mac address, User can use a LAA */
937 	bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
938 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
939 	hw->addr_ctrl.rar_used_count = 1;
940 
941 	/* Prepare transmit descriptors and buffers */
942 	for (i = 0; i < sc->tx_ring_inuse; ++i)
943 		ix_init_tx_ring(&sc->tx_rings[i]);
944 
945 	ixgbe_init_hw(hw);
946 	ix_init_tx_unit(sc);
947 
948 	/* Setup Multicast table */
949 	ix_set_multi(sc);
950 
951 	/* Prepare receive descriptors and buffers */
952 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
953 		error = ix_init_rx_ring(&sc->rx_rings[i]);
954 		if (error) {
955 			if_printf(ifp, "Could not initialize RX ring%d\n", i);
956 			ix_stop(sc);
957 			return;
958 		}
959 	}
960 
961 	/* Configure RX settings */
962 	ix_init_rx_unit(sc, polling);
963 
964 	/* Enable SDP & MSI-X interrupts based on adapter */
965 	ix_config_gpie(sc);
966 
967 	/* Set MTU size */
968 	if (ifp->if_mtu > ETHERMTU) {
969 		uint32_t mhadd;
970 
971 		/* aka IXGBE_MAXFRS on 82599 and newer */
972 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
973 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
974 		mhadd |= sc->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
975 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
976 	}
977 
978 	/*
979 	 * Enable TX rings
980 	 */
981 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
982 		uint32_t txdctl;
983 
984 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
985 		txdctl |= IXGBE_TXDCTL_ENABLE;
986 
987 		/*
988 		 * Set WTHRESH to 0, since TX head write-back is used
989 		 */
990 		txdctl &= ~(0x7f << 16);
991 
992 		/*
993 		 * When the internal queue falls below PTHRESH (32),
994 		 * start prefetching as long as there are at least
995 		 * HTHRESH (1) buffers ready. The values are taken
996 		 * from the Intel linux driver 3.8.21.
997 		 * Prefetching enables tx line rate even with 1 queue.
998 		 */
999 		txdctl |= (32 << 0) | (1 << 8);
1000 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1001 	}
1002 
1003 	/*
1004 	 * Enable RX rings
1005 	 */
1006 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
1007 		uint32_t rxdctl;
1008 		int k;
1009 
1010 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1011 		if (hw->mac.type == ixgbe_mac_82598EB) {
1012 			/*
1013 			 * PTHRESH = 21
1014 			 * HTHRESH = 4
1015 			 * WTHRESH = 8
1016 			 */
1017 			rxdctl &= ~0x3FFFFF;
1018 			rxdctl |= 0x080420;
1019 		}
1020 		rxdctl |= IXGBE_RXDCTL_ENABLE;
1021 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1022 		for (k = 0; k < 10; ++k) {
1023 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1024 			    IXGBE_RXDCTL_ENABLE)
1025 				break;
1026 			else
1027 				msec_delay(1);
1028 		}
1029 		wmb();
1030 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i),
1031 		    sc->rx_rings[0].rx_ndesc - 1);
1032 	}
1033 
1034 	/* Enable Receive engine */
1035 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1036 	if (hw->mac.type == ixgbe_mac_82598EB)
1037 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
1038 	rxctrl |= IXGBE_RXCTRL_RXEN;
1039 	ixgbe_enable_rx_dma(hw, rxctrl);
1040 
1041 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1042 		const struct ix_tx_ring *txr = &sc->tx_rings[i];
1043 
1044 		if (txr->tx_intr_vec >= 0) {
1045 			ix_set_ivar(sc, i, txr->tx_intr_vec, 1);
1046 		} else if (!polling) {
1047 			/*
1048 			 * Unconfigured TX interrupt vector could only
1049 			 * happen for MSI-X.
1050 			 */
1051 			KASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX,
1052 			    ("TX intr vector is not set"));
1053 			if (bootverbose)
1054 				if_printf(ifp, "IVAR skips TX ring %d\n", i);
1055 		}
1056 	}
1057 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
1058 		const struct ix_rx_ring *rxr = &sc->rx_rings[i];
1059 
1060 		if (polling && rxr->rx_intr_vec < 0)
1061 			continue;
1062 
1063 		KKASSERT(rxr->rx_intr_vec >= 0);
1064 		ix_set_ivar(sc, i, rxr->rx_intr_vec, 0);
1065 		if (rxr->rx_txr != NULL) {
1066 			/*
1067 			 * Piggyback the TX ring interrupt onto the RX
1068 			 * ring interrupt vector.
1069 			 */
1070 			KASSERT(rxr->rx_txr->tx_intr_vec < 0,
1071 			    ("piggybacked TX ring configured intr vector"));
1072 			ix_set_ivar(sc, rxr->rx_txr->tx_idx,
1073 			    rxr->rx_intr_vec, 1);
1074 			if (bootverbose) {
1075 				if_printf(ifp, "IVAR RX ring %d piggybacks "
1076 				    "TX ring %u\n", i, rxr->rx_txr->tx_idx);
1077 			}
1078 		}
1079 	}
1080 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
1081 		/* Set up status MSI-X vector; it is using fixed entry 1 */
1082 		ix_set_ivar(sc, 1, sc->sts_msix_vec, -1);
1083 
1084 		/* Set up auto-mask for TX and RX rings */
1085 		if (hw->mac.type == ixgbe_mac_82598EB) {
1086 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
1087 		} else {
1088 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1089 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1090 		}
1091 	} else {
1092 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EIMS_RTX_QUEUE);
1093 	}
1094 	for (i = 0; i < sc->intr_cnt; ++i)
1095 		ix_set_eitr(sc, i, sc->intr_data[i].intr_rate);
1096 
1097 	/*
1098 	 * Check on any SFP devices that need to be kick-started
1099 	 */
1100 	if (hw->phy.type == ixgbe_phy_none) {
1101 		error = hw->phy.ops.identify(hw);
1102 		if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1103 			if_printf(ifp,
1104 			    "Unsupported SFP+ module type was detected.\n");
1105 			/* XXX stop */
1106 			return;
1107 		}
1108 	}
1109 
1110 	/* Config/Enable Link */
1111 	ix_config_link(sc);
1112 
1113 	/* Hardware Packet Buffer & Flow Control setup */
1114 	ix_config_flowctrl(sc);
1115 
1116 	/* Initialize the FC settings */
1117 	ixgbe_start_hw(hw);
1118 
1119 	/* Set up VLAN support and filter */
1120 	ix_set_vlan(sc);
1121 
1122 	/* Setup DMA Coalescing */
1123 	ix_config_dmac(sc);
1124 
1125 	/*
1126 	 * Only enable interrupts if we are not polling, make sure
1127 	 * they are off otherwise.
1128 	 */
1129 	if (polling)
1130 		ix_disable_intr(sc);
1131 	else
1132 		ix_enable_intr(sc);
1133 
1134 	ifp->if_flags |= IFF_RUNNING;
1135 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
1136 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1137 
1138 		ifsq_clr_oactive(txr->tx_ifsq);
1139 		ifsq_watchdog_start(&txr->tx_watchdog);
1140 
1141 		if (!polling) {
1142 			callout_reset_bycpu(&txr->tx_gc_timer, 1,
1143 			    ix_txgc_timer, txr, txr->tx_intr_cpuid);
1144 		}
1145 	}
1146 
1147 	ix_set_timer_cpuid(sc, polling);
1148 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1149 }
1150 
1151 static void
1152 ix_intr(void *xsc)
1153 {
1154 	struct ix_softc *sc = xsc;
1155 	struct ixgbe_hw	*hw = &sc->hw;
1156 	uint32_t eicr;
1157 
1158 	ASSERT_SERIALIZED(&sc->main_serialize);
1159 
1160 	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1161 	if (eicr == 0) {
1162 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1163 		return;
1164 	}
1165 
1166 	if (eicr & IX_RX0_INTR_MASK) {
1167 		struct ix_rx_ring *rxr = &sc->rx_rings[0];
1168 
1169 		lwkt_serialize_enter(&rxr->rx_serialize);
1170 		ix_rxeof(rxr, -1);
1171 		lwkt_serialize_exit(&rxr->rx_serialize);
1172 	}
1173 	if (eicr & IX_RX1_INTR_MASK) {
1174 		struct ix_rx_ring *rxr;
1175 
1176 		KKASSERT(sc->rx_ring_inuse == IX_MIN_RXRING_RSS);
1177 		rxr = &sc->rx_rings[1];
1178 
1179 		lwkt_serialize_enter(&rxr->rx_serialize);
1180 		ix_rxeof(rxr, -1);
1181 		lwkt_serialize_exit(&rxr->rx_serialize);
1182 	}
1183 
1184 	if (eicr & IX_TX_INTR_MASK) {
1185 		struct ix_tx_ring *txr = &sc->tx_rings[0];
1186 
1187 		lwkt_serialize_enter(&txr->tx_serialize);
1188 		ix_tx_intr(txr, *(txr->tx_hdr));
1189 		lwkt_serialize_exit(&txr->tx_serialize);
1190 	}
1191 
1192 	if (__predict_false(eicr & IX_EICR_STATUS))
1193 		ix_intr_status(sc, eicr);
1194 
1195 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
1196 }
1197 
1198 static void
1199 ix_intr_82598(void *xsc)
1200 {
1201 	struct ix_softc *sc = xsc;
1202 
1203 	ASSERT_SERIALIZED(&sc->main_serialize);
1204 
1205 	/* Software workaround for 82598 errata #26 */
1206 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, IXGBE_IRQ_CLEAR_MASK);
1207 
1208 	ix_intr(sc);
1209 }
1210 
1211 static void
1212 ix_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1213 {
1214 	struct ix_softc *sc = ifp->if_softc;
1215 	struct ifmedia *ifm = &sc->media;
1216 	int layer;
1217 
1218 	ix_update_link_status(sc);
1219 
1220 	ifmr->ifm_status = IFM_AVALID;
1221 	ifmr->ifm_active = IFM_ETHER;
1222 
1223 	if (!sc->link_active) {
1224 		if (IFM_SUBTYPE(ifm->ifm_media) != IFM_AUTO)
1225 			ifmr->ifm_active |= ifm->ifm_media;
1226 		else
1227 			ifmr->ifm_active |= IFM_NONE;
1228 		return;
1229 	}
1230 	ifmr->ifm_status |= IFM_ACTIVE;
1231 
1232 	layer = sc->phy_layer;
1233 
1234 	if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) ||
1235 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) ||
1236 	    (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) ||
1237 	    (layer & IXGBE_PHYSICAL_LAYER_10BASE_T)) {
1238 		switch (sc->link_speed) {
1239 		case IXGBE_LINK_SPEED_10GB_FULL:
1240 			ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
1241 			break;
1242 		case IXGBE_LINK_SPEED_1GB_FULL:
1243 			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1244 			break;
1245 		case IXGBE_LINK_SPEED_100_FULL:
1246 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1247 			break;
1248 		case IXGBE_LINK_SPEED_10_FULL:
1249 			ifmr->ifm_active |= IFM_10_T | IFM_FDX;
1250 			break;
1251 		}
1252 	} else if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
1253 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
1254 		switch (sc->link_speed) {
1255 		case IXGBE_LINK_SPEED_10GB_FULL:
1256 			ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX;
1257 			break;
1258 		}
1259 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
1260 		switch (sc->link_speed) {
1261 		case IXGBE_LINK_SPEED_10GB_FULL:
1262 			ifmr->ifm_active |= IFM_10G_LR | IFM_FDX;
1263 			break;
1264 		case IXGBE_LINK_SPEED_1GB_FULL:
1265 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1266 			break;
1267 		}
1268 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
1269 		switch (sc->link_speed) {
1270 		case IXGBE_LINK_SPEED_10GB_FULL:
1271 			ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX;
1272 			break;
1273 		case IXGBE_LINK_SPEED_1GB_FULL:
1274 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
1275 			break;
1276 		}
1277 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) ||
1278 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX)) {
1279 		switch (sc->link_speed) {
1280 		case IXGBE_LINK_SPEED_10GB_FULL:
1281 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1282 			break;
1283 		case IXGBE_LINK_SPEED_1GB_FULL:
1284 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1285 			break;
1286 		}
1287 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
1288 		switch (sc->link_speed) {
1289 		case IXGBE_LINK_SPEED_10GB_FULL:
1290 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1291 			break;
1292 		}
1293 	} else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
1294 		/*
1295 		 * XXX: These need to use the proper media types once
1296 		 * they're added.
1297 		 */
1298 		switch (sc->link_speed) {
1299 		case IXGBE_LINK_SPEED_10GB_FULL:
1300 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
1301 			break;
1302 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1303 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1304 			break;
1305 		case IXGBE_LINK_SPEED_1GB_FULL:
1306 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1307 			break;
1308 		}
1309 	} else if ((layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) ||
1310 	    (layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX) ||
1311 	    (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX)) {
1312 		/*
1313 		 * XXX: These need to use the proper media types once
1314 		 * they're added.
1315 		 */
1316 		switch (sc->link_speed) {
1317 		case IXGBE_LINK_SPEED_10GB_FULL:
1318 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
1319 			break;
1320 		case IXGBE_LINK_SPEED_2_5GB_FULL:
1321 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
1322 			break;
1323 		case IXGBE_LINK_SPEED_1GB_FULL:
1324 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
1325 			break;
1326 		}
1327 	}
1328 
1329 	/* If nothing is recognized... */
1330 	if (IFM_SUBTYPE(ifmr->ifm_active) == 0)
1331 		ifmr->ifm_active |= IFM_NONE;
1332 
1333 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
1334 		ifmr->ifm_active |= (sc->ifm_media & IFM_ETH_FCMASK);
1335 
1336 	switch (sc->hw.fc.current_mode) {
1337 	case ixgbe_fc_full:
1338 		ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
1339 		break;
1340 	case ixgbe_fc_rx_pause:
1341 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
1342 		break;
1343 	case ixgbe_fc_tx_pause:
1344 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
1345 		break;
1346 	default:
1347 		break;
1348 	}
1349 }
1350 
1351 static int
1352 ix_media_change(struct ifnet *ifp)
1353 {
1354 	struct ix_softc *sc = ifp->if_softc;
1355 	struct ifmedia *ifm = &sc->media;
1356 	struct ixgbe_hw *hw = &sc->hw;
1357 
1358 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1359 		return (EINVAL);
1360 
1361 	if (hw->phy.media_type == ixgbe_media_type_backplane ||
1362 	    hw->mac.ops.setup_link == NULL) {
1363 		if ((ifm->ifm_media ^ sc->ifm_media) & IFM_ETH_FCMASK) {
1364 			/* Only flow control setting changes are allowed */
1365 			return (EOPNOTSUPP);
1366 		}
1367 	}
1368 
1369 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1370 	case IFM_AUTO:
1371 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
1372 		break;
1373 
1374 	case IFM_10G_T:
1375 	case IFM_10G_LRM:
1376 	case IFM_10G_SR:	/* XXX also KR */
1377 	case IFM_10G_LR:
1378 	case IFM_10G_CX4:	/* XXX also KX4 */
1379 	case IFM_10G_TWINAX:
1380 		sc->advspeed = IXGBE_LINK_SPEED_10GB_FULL;
1381 		break;
1382 
1383 	case IFM_1000_T:
1384 	case IFM_1000_LX:
1385 	case IFM_1000_SX:
1386 	case IFM_1000_CX:	/* XXX is KX */
1387 		sc->advspeed = IXGBE_LINK_SPEED_1GB_FULL;
1388 		break;
1389 
1390 	case IFM_100_TX:
1391 		sc->advspeed = IXGBE_LINK_SPEED_100_FULL;
1392 		break;
1393 
1394 	default:
1395 		if (bootverbose) {
1396 			if_printf(ifp, "Invalid media type %d!\n",
1397 			    ifm->ifm_media);
1398 		}
1399 		return EINVAL;
1400 	}
1401 	sc->ifm_media = ifm->ifm_media;
1402 
1403 #if 0
1404 	if (hw->mac.ops.setup_link != NULL) {
1405 		hw->mac.autotry_restart = TRUE;
1406 		hw->mac.ops.setup_link(hw, sc->advspeed, TRUE);
1407 	}
1408 #else
1409 	if (ifp->if_flags & IFF_RUNNING)
1410 		ix_init(sc);
1411 #endif
1412 	return 0;
1413 }
1414 
1415 static __inline int
1416 ix_tso_pullup(struct mbuf **mp)
1417 {
1418 	int hoff, iphlen, thoff;
1419 	struct mbuf *m;
1420 
1421 	m = *mp;
1422 	KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1423 
1424 	iphlen = m->m_pkthdr.csum_iphlen;
1425 	thoff = m->m_pkthdr.csum_thlen;
1426 	hoff = m->m_pkthdr.csum_lhlen;
1427 
1428 	KASSERT(iphlen > 0, ("invalid ip hlen"));
1429 	KASSERT(thoff > 0, ("invalid tcp hlen"));
1430 	KASSERT(hoff > 0, ("invalid ether hlen"));
1431 
1432 	if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1433 		m = m_pullup(m, hoff + iphlen + thoff);
1434 		if (m == NULL) {
1435 			*mp = NULL;
1436 			return ENOBUFS;
1437 		}
1438 		*mp = m;
1439 	}
1440 	return 0;
1441 }
1442 
1443 static int
1444 ix_encap(struct ix_tx_ring *txr, struct mbuf **m_headp,
1445     uint16_t *segs_used, int *idx)
1446 {
1447 	uint32_t olinfo_status = 0, cmd_type_len, cmd_rs = 0;
1448 	int i, j, error, nsegs, first, maxsegs;
1449 	struct mbuf *m_head = *m_headp;
1450 	bus_dma_segment_t segs[IX_MAX_SCATTER];
1451 	bus_dmamap_t map;
1452 	struct ix_tx_buf *txbuf;
1453 	union ixgbe_adv_tx_desc *txd = NULL;
1454 
1455 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1456 		error = ix_tso_pullup(m_headp);
1457 		if (__predict_false(error))
1458 			return error;
1459 		m_head = *m_headp;
1460 	}
1461 
1462 	/* Basic descriptor defines */
1463 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1464 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1465 
1466 	if (m_head->m_flags & M_VLANTAG)
1467 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1468 
1469 	/*
1470 	 * Important to capture the first descriptor
1471 	 * used because it will contain the index of
1472 	 * the one we tell the hardware to report back
1473 	 */
1474 	first = txr->tx_next_avail;
1475 	txbuf = &txr->tx_buf[first];
1476 	map = txbuf->map;
1477 
1478 	/*
1479 	 * Map the packet for DMA.
1480 	 */
1481 	maxsegs = txr->tx_avail - IX_TX_RESERVED;
1482 	if (maxsegs > IX_MAX_SCATTER)
1483 		maxsegs = IX_MAX_SCATTER;
1484 
1485 	error = bus_dmamap_load_mbuf_defrag(txr->tx_tag, map, m_headp,
1486 	    segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1487 	if (__predict_false(error)) {
1488 		m_freem(*m_headp);
1489 		*m_headp = NULL;
1490 		return error;
1491 	}
1492 	bus_dmamap_sync(txr->tx_tag, map, BUS_DMASYNC_PREWRITE);
1493 
1494 	m_head = *m_headp;
1495 
1496 	/*
1497 	 * Set up the appropriate offload context if requested,
1498 	 * this may consume one TX descriptor.
1499 	 */
1500 	if (ix_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status)) {
1501 		(*segs_used)++;
1502 		txr->tx_nsegs++;
1503 	}
1504 
1505 	*segs_used += nsegs;
1506 	txr->tx_nsegs += nsegs;
1507 	if (txr->tx_nsegs >= txr->tx_intr_nsegs) {
1508 		/*
1509 		 * Report Status (RS) is turned on every intr_nsegs
1510 		 * descriptors (roughly).
1511 		 */
1512 		txr->tx_nsegs = 0;
1513 		cmd_rs = IXGBE_TXD_CMD_RS;
1514 	}
1515 
1516 	i = txr->tx_next_avail;
1517 	for (j = 0; j < nsegs; j++) {
1518 		bus_size_t seglen;
1519 		bus_addr_t segaddr;
1520 
1521 		txbuf = &txr->tx_buf[i];
1522 		txd = &txr->tx_base[i];
1523 		seglen = segs[j].ds_len;
1524 		segaddr = htole64(segs[j].ds_addr);
1525 
1526 		txd->read.buffer_addr = segaddr;
1527 		txd->read.cmd_type_len = htole32(IXGBE_TXD_CMD_IFCS |
1528 		    cmd_type_len |seglen);
1529 		txd->read.olinfo_status = htole32(olinfo_status);
1530 
1531 		if (++i == txr->tx_ndesc)
1532 			i = 0;
1533 	}
1534 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | cmd_rs);
1535 
1536 	txr->tx_avail -= nsegs;
1537 	txr->tx_next_avail = i;
1538 	txr->tx_nmbuf++;
1539 
1540 	txbuf->m_head = m_head;
1541 	txr->tx_buf[first].map = txbuf->map;
1542 	txbuf->map = map;
1543 
1544 	/*
1545 	 * Defer TDT updating, until enough descrptors are setup
1546 	 */
1547 	*idx = i;
1548 
1549 	return 0;
1550 }
1551 
1552 static void
1553 ix_set_promisc(struct ix_softc *sc)
1554 {
1555 	struct ifnet *ifp = &sc->arpcom.ac_if;
1556 	uint32_t reg_rctl;
1557 	int mcnt = 0;
1558 
1559 	reg_rctl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1560 	reg_rctl &= ~IXGBE_FCTRL_UPE;
1561 	if (ifp->if_flags & IFF_ALLMULTI) {
1562 		mcnt = IX_MAX_MCASTADDR;
1563 	} else {
1564 		struct ifmultiaddr *ifma;
1565 
1566 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1567 			if (ifma->ifma_addr->sa_family != AF_LINK)
1568 				continue;
1569 			if (mcnt == IX_MAX_MCASTADDR)
1570 				break;
1571 			mcnt++;
1572 		}
1573 	}
1574 	if (mcnt < IX_MAX_MCASTADDR)
1575 		reg_rctl &= ~IXGBE_FCTRL_MPE;
1576 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1577 
1578 	if (ifp->if_flags & IFF_PROMISC) {
1579 		reg_rctl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1580 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1581 	} else if (ifp->if_flags & IFF_ALLMULTI) {
1582 		reg_rctl |= IXGBE_FCTRL_MPE;
1583 		reg_rctl &= ~IXGBE_FCTRL_UPE;
1584 		IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, reg_rctl);
1585 	}
1586 }
1587 
1588 static void
1589 ix_set_multi(struct ix_softc *sc)
1590 {
1591 	struct ifnet *ifp = &sc->arpcom.ac_if;
1592 	struct ifmultiaddr *ifma;
1593 	uint32_t fctrl;
1594 	struct ix_mc_addr *mta;
1595 	int mcnt = 0;
1596 
1597 	mta = sc->mta;
1598 	bzero(mta, sizeof(*mta) * IX_MAX_MCASTADDR);
1599 
1600 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1601 		if (ifma->ifma_addr->sa_family != AF_LINK)
1602 			continue;
1603 		if (mcnt == IX_MAX_MCASTADDR)
1604 			break;
1605 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1606 		    mta[mcnt].addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
1607 		mcnt++;
1608 	}
1609 
1610 	fctrl = IXGBE_READ_REG(&sc->hw, IXGBE_FCTRL);
1611 	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1612 	if (ifp->if_flags & IFF_PROMISC) {
1613 		fctrl |= IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE;
1614 	} else if (mcnt >= IX_MAX_MCASTADDR || (ifp->if_flags & IFF_ALLMULTI)) {
1615 		fctrl |= IXGBE_FCTRL_MPE;
1616 		fctrl &= ~IXGBE_FCTRL_UPE;
1617 	} else {
1618 		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1619 	}
1620 	IXGBE_WRITE_REG(&sc->hw, IXGBE_FCTRL, fctrl);
1621 
1622 	if (mcnt < IX_MAX_MCASTADDR) {
1623 		ixgbe_update_mc_addr_list(&sc->hw,
1624 		    (uint8_t *)mta, mcnt, ix_mc_array_itr, TRUE);
1625 	}
1626 }
1627 
1628 /*
1629  * This is an iterator function now needed by the multicast
1630  * shared code. It simply feeds the shared code routine the
1631  * addresses in the array of ix_set_multi() one by one.
1632  */
1633 static uint8_t *
1634 ix_mc_array_itr(struct ixgbe_hw *hw, uint8_t **update_ptr, uint32_t *vmdq)
1635 {
1636 	struct ix_mc_addr *mta = (struct ix_mc_addr *)*update_ptr;
1637 
1638 	*vmdq = mta->vmdq;
1639 	*update_ptr = (uint8_t *)(mta + 1);
1640 
1641 	return (mta->addr);
1642 }
1643 
1644 static void
1645 ix_timer(void *arg)
1646 {
1647 	struct ix_softc *sc = arg;
1648 
1649 	lwkt_serialize_enter(&sc->main_serialize);
1650 
1651 	if ((sc->arpcom.ac_if.if_flags & IFF_RUNNING) == 0) {
1652 		lwkt_serialize_exit(&sc->main_serialize);
1653 		return;
1654 	}
1655 
1656 	/* Check for pluggable optics */
1657 	if (sc->sfp_probe) {
1658 		if (!ix_sfp_probe(sc))
1659 			goto done; /* Nothing to do */
1660 	}
1661 
1662 	ix_update_link_status(sc);
1663 	ix_update_stats(sc);
1664 
1665 done:
1666 	callout_reset_bycpu(&sc->timer, hz, ix_timer, sc, sc->timer_cpuid);
1667 	lwkt_serialize_exit(&sc->main_serialize);
1668 }
1669 
1670 static void
1671 ix_update_link_status(struct ix_softc *sc)
1672 {
1673 	struct ifnet *ifp = &sc->arpcom.ac_if;
1674 
1675 	if (sc->link_up) {
1676 		if (sc->link_active == FALSE) {
1677 			if (bootverbose) {
1678 				if_printf(ifp, "Link is up %d Gbps %s\n",
1679 				    sc->link_speed == 128 ? 10 : 1,
1680 				    "Full Duplex");
1681 			}
1682 
1683 			/*
1684 			 * Update any Flow Control changes
1685 			 */
1686 			ixgbe_fc_enable(&sc->hw);
1687 			/* MUST after ixgbe_fc_enable() */
1688 			if (sc->rx_ring_inuse > 1) {
1689 				switch (sc->hw.fc.current_mode) {
1690 				case ixgbe_fc_rx_pause:
1691 				case ixgbe_fc_tx_pause:
1692 				case ixgbe_fc_full:
1693 					ix_disable_rx_drop(sc);
1694 					break;
1695 
1696 				case ixgbe_fc_none:
1697 					ix_enable_rx_drop(sc);
1698 					break;
1699 
1700 				default:
1701 					break;
1702 				}
1703 			}
1704 
1705 			/* Update DMA coalescing config */
1706 			ix_config_dmac(sc);
1707 
1708 			sc->link_active = TRUE;
1709 
1710 			ifp->if_link_state = LINK_STATE_UP;
1711 			if_link_state_change(ifp);
1712 		}
1713 	} else { /* Link down */
1714 		if (sc->link_active == TRUE) {
1715 			if (bootverbose)
1716 				if_printf(ifp, "Link is Down\n");
1717 			ifp->if_link_state = LINK_STATE_DOWN;
1718 			if_link_state_change(ifp);
1719 
1720 			sc->link_active = FALSE;
1721 		}
1722 	}
1723 }
1724 
1725 static void
1726 ix_stop(struct ix_softc *sc)
1727 {
1728 	struct ixgbe_hw *hw = &sc->hw;
1729 	struct ifnet *ifp = &sc->arpcom.ac_if;
1730 	int i;
1731 
1732 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1733 
1734 	ix_disable_intr(sc);
1735 	callout_stop(&sc->timer);
1736 
1737 	ifp->if_flags &= ~IFF_RUNNING;
1738 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1739 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1740 
1741 		ifsq_clr_oactive(txr->tx_ifsq);
1742 		ifsq_watchdog_stop(&txr->tx_watchdog);
1743 		txr->tx_flags &= ~IX_TXFLAG_ENABLED;
1744 
1745 		txr->tx_running = 0;
1746 		callout_stop(&txr->tx_gc_timer);
1747 	}
1748 
1749 	ixgbe_reset_hw(hw);
1750 	hw->adapter_stopped = FALSE;
1751 	ixgbe_stop_adapter(hw);
1752 	if (hw->mac.type == ixgbe_mac_82599EB)
1753 		ixgbe_stop_mac_link_on_d3_82599(hw);
1754 	/* Turn off the laser - noop with no optics */
1755 	ixgbe_disable_tx_laser(hw);
1756 
1757 	/* Update the stack */
1758 	sc->link_up = FALSE;
1759 	ix_update_link_status(sc);
1760 
1761 	/* Reprogram the RAR[0] in case user changed it. */
1762 	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
1763 
1764 	for (i = 0; i < sc->tx_ring_cnt; ++i)
1765 		ix_free_tx_ring(&sc->tx_rings[i]);
1766 
1767 	for (i = 0; i < sc->rx_ring_cnt; ++i)
1768 		ix_free_rx_ring(&sc->rx_rings[i]);
1769 }
1770 
1771 static void
1772 ix_setup_ifp(struct ix_softc *sc)
1773 {
1774 	struct ixgbe_hw *hw = &sc->hw;
1775 	struct ifnet *ifp = &sc->arpcom.ac_if;
1776 	int i;
1777 
1778 	ifp->if_baudrate = IF_Gbps(10UL);
1779 
1780 	ifp->if_softc = sc;
1781 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1782 	ifp->if_init = ix_init;
1783 	ifp->if_ioctl = ix_ioctl;
1784 	ifp->if_start = ix_start;
1785 	ifp->if_serialize = ix_serialize;
1786 	ifp->if_deserialize = ix_deserialize;
1787 	ifp->if_tryserialize = ix_tryserialize;
1788 #ifdef INVARIANTS
1789 	ifp->if_serialize_assert = ix_serialize_assert;
1790 #endif
1791 #ifdef IFPOLL_ENABLE
1792 	ifp->if_npoll = ix_npoll;
1793 #endif
1794 
1795 	/* Increase TSO burst length */
1796 	ifp->if_tsolen = (8 * ETHERMTU);
1797 
1798 	ifp->if_nmbclusters = sc->rx_ring_cnt * sc->rx_rings[0].rx_ndesc;
1799 	ifp->if_nmbjclusters = ifp->if_nmbclusters;
1800 
1801 	ifq_set_maxlen(&ifp->if_snd, sc->tx_rings[0].tx_ndesc - 2);
1802 	ifq_set_ready(&ifp->if_snd);
1803 	ifq_set_subq_cnt(&ifp->if_snd, sc->tx_ring_cnt);
1804 
1805 	ifp->if_mapsubq = ifq_mapsubq_modulo;
1806 	ifq_set_subq_divisor(&ifp->if_snd, 1);
1807 
1808 	ether_ifattach(ifp, hw->mac.addr, NULL);
1809 
1810 	ifp->if_capabilities =
1811 	    IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1812 	if (IX_ENABLE_HWRSS(sc))
1813 		ifp->if_capabilities |= IFCAP_RSS;
1814 	ifp->if_capenable = ifp->if_capabilities;
1815 	ifp->if_hwassist = CSUM_OFFLOAD | CSUM_TSO;
1816 
1817 	/*
1818 	 * Tell the upper layer(s) we support long frames.
1819 	 */
1820 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
1821 
1822 	/* Setup TX rings and subqueues */
1823 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1824 		struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
1825 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1826 
1827 		ifsq_set_cpuid(ifsq, txr->tx_intr_cpuid);
1828 		ifsq_set_priv(ifsq, txr);
1829 		ifsq_set_hw_serialize(ifsq, &txr->tx_serialize);
1830 		txr->tx_ifsq = ifsq;
1831 
1832 		ifsq_watchdog_init(&txr->tx_watchdog, ifsq, ix_watchdog);
1833 	}
1834 
1835 	/* Specify the media types supported by this adapter */
1836 	sc->phy_layer = ixgbe_get_supported_physical_layer(hw);
1837 	ix_init_media(sc);
1838 }
1839 
1840 static boolean_t
1841 ix_is_sfp(struct ixgbe_hw *hw)
1842 {
1843 	switch (hw->mac.type) {
1844 	case ixgbe_mac_82598EB:
1845 		if (hw->phy.type == ixgbe_phy_nl)
1846 			return TRUE;
1847 		return FALSE;
1848 
1849 	case ixgbe_mac_82599EB:
1850 		switch (hw->mac.ops.get_media_type(hw)) {
1851 		case ixgbe_media_type_fiber:
1852 		case ixgbe_media_type_fiber_qsfp:
1853 			return TRUE;
1854 		default:
1855 			return FALSE;
1856 		}
1857 
1858 	case ixgbe_mac_X550EM_x:
1859 	case ixgbe_mac_X550EM_a:
1860 		if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber)
1861 			return TRUE;
1862 		return FALSE;
1863 
1864 	default:
1865 		return FALSE;
1866 	}
1867 }
1868 
1869 static void
1870 ix_config_link(struct ix_softc *sc)
1871 {
1872 	struct ixgbe_hw *hw = &sc->hw;
1873 	boolean_t sfp;
1874 
1875 	sfp = ix_is_sfp(hw);
1876 	if (sfp) {
1877 		if (hw->phy.multispeed_fiber)
1878 			ixgbe_enable_tx_laser(hw);
1879 		ix_handle_mod(sc);
1880 	} else {
1881 		uint32_t autoneg, err = 0;
1882 
1883 		if (hw->mac.ops.check_link != NULL) {
1884 			err = ixgbe_check_link(hw, &sc->link_speed,
1885 			    &sc->link_up, FALSE);
1886 			if (err)
1887 				return;
1888 		}
1889 
1890 		if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
1891 			autoneg = sc->advspeed;
1892 		else
1893 			autoneg = hw->phy.autoneg_advertised;
1894 		if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
1895 			bool negotiate;
1896 
1897 			err = hw->mac.ops.get_link_capabilities(hw,
1898 			    &autoneg, &negotiate);
1899 			if (err)
1900 				return;
1901 		}
1902 
1903 		if (hw->mac.ops.setup_link != NULL) {
1904 			err = hw->mac.ops.setup_link(hw,
1905 			    autoneg, sc->link_up);
1906 			if (err)
1907 				return;
1908 		}
1909 	}
1910 }
1911 
1912 static int
1913 ix_alloc_rings(struct ix_softc *sc)
1914 {
1915 	int error, i;
1916 
1917 	/*
1918 	 * Create top level busdma tag
1919 	 */
1920 	error = bus_dma_tag_create(NULL, 1, 0,
1921 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1922 	    BUS_SPACE_MAXSIZE_32BIT, 0, BUS_SPACE_MAXSIZE_32BIT, 0,
1923 	    &sc->parent_tag);
1924 	if (error) {
1925 		device_printf(sc->dev, "could not create top level DMA tag\n");
1926 		return error;
1927 	}
1928 
1929 	/*
1930 	 * Allocate TX descriptor rings and buffers
1931 	 */
1932 	sc->tx_rings = kmalloc_cachealign(
1933 	    sizeof(struct ix_tx_ring) * sc->tx_ring_cnt,
1934 	    M_DEVBUF, M_WAITOK | M_ZERO);
1935 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
1936 		struct ix_tx_ring *txr = &sc->tx_rings[i];
1937 
1938 		txr->tx_sc = sc;
1939 		txr->tx_idx = i;
1940 		txr->tx_intr_vec = -1;
1941 		txr->tx_intr_cpuid = -1;
1942 		lwkt_serialize_init(&txr->tx_serialize);
1943 		callout_init_mp(&txr->tx_gc_timer);
1944 
1945 		error = ix_create_tx_ring(txr);
1946 		if (error)
1947 			return error;
1948 	}
1949 
1950 	/*
1951 	 * Allocate RX descriptor rings and buffers
1952 	 */
1953 	sc->rx_rings = kmalloc_cachealign(
1954 	    sizeof(struct ix_rx_ring) * sc->rx_ring_cnt,
1955 	    M_DEVBUF, M_WAITOK | M_ZERO);
1956 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
1957 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
1958 
1959 		rxr->rx_sc = sc;
1960 		rxr->rx_idx = i;
1961 		rxr->rx_intr_vec = -1;
1962 		lwkt_serialize_init(&rxr->rx_serialize);
1963 
1964 		error = ix_create_rx_ring(rxr);
1965 		if (error)
1966 			return error;
1967 	}
1968 
1969 	return 0;
1970 }
1971 
1972 static int
1973 ix_create_tx_ring(struct ix_tx_ring *txr)
1974 {
1975 	int error, i, tsize, ntxd;
1976 
1977 	/*
1978 	 * Validate number of transmit descriptors.  It must not exceed
1979 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
1980 	 */
1981 	ntxd = device_getenv_int(txr->tx_sc->dev, "txd", ix_txd);
1982 	if (((ntxd * sizeof(union ixgbe_adv_tx_desc)) % IX_DBA_ALIGN) != 0 ||
1983 	    ntxd < IX_MIN_TXD || ntxd > IX_MAX_TXD) {
1984 		device_printf(txr->tx_sc->dev,
1985 		    "Using %d TX descriptors instead of %d!\n",
1986 		    IX_DEF_TXD, ntxd);
1987 		txr->tx_ndesc = IX_DEF_TXD;
1988 	} else {
1989 		txr->tx_ndesc = ntxd;
1990 	}
1991 
1992 	/*
1993 	 * Allocate TX head write-back buffer
1994 	 */
1995 	txr->tx_hdr = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
1996 	    __VM_CACHELINE_SIZE, __VM_CACHELINE_SIZE, BUS_DMA_WAITOK,
1997 	    &txr->tx_hdr_dtag, &txr->tx_hdr_map, &txr->tx_hdr_paddr);
1998 	if (txr->tx_hdr == NULL) {
1999 		device_printf(txr->tx_sc->dev,
2000 		    "Unable to allocate TX head write-back buffer\n");
2001 		return ENOMEM;
2002 	}
2003 
2004 	/*
2005 	 * Allocate TX descriptor ring
2006 	 */
2007 	tsize = roundup2(txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc),
2008 	    IX_DBA_ALIGN);
2009 	txr->tx_base = bus_dmamem_coherent_any(txr->tx_sc->parent_tag,
2010 	    IX_DBA_ALIGN, tsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2011 	    &txr->tx_base_dtag, &txr->tx_base_map, &txr->tx_base_paddr);
2012 	if (txr->tx_base == NULL) {
2013 		device_printf(txr->tx_sc->dev,
2014 		    "Unable to allocate TX Descriptor memory\n");
2015 		return ENOMEM;
2016 	}
2017 
2018 	tsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_tx_buf) * txr->tx_ndesc);
2019 	txr->tx_buf = kmalloc_cachealign(tsize, M_DEVBUF, M_WAITOK | M_ZERO);
2020 
2021 	/*
2022 	 * Create DMA tag for TX buffers
2023 	 */
2024 	error = bus_dma_tag_create(txr->tx_sc->parent_tag,
2025 	    1, 0,		/* alignment, bounds */
2026 	    BUS_SPACE_MAXADDR,	/* lowaddr */
2027 	    BUS_SPACE_MAXADDR,	/* highaddr */
2028 	    NULL, NULL,		/* filter, filterarg */
2029 	    IX_TSO_SIZE,	/* maxsize */
2030 	    IX_MAX_SCATTER,	/* nsegments */
2031 	    PAGE_SIZE,		/* maxsegsize */
2032 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
2033 	    BUS_DMA_ONEBPAGE,	/* flags */
2034 	    &txr->tx_tag);
2035 	if (error) {
2036 		device_printf(txr->tx_sc->dev,
2037 		    "Unable to allocate TX DMA tag\n");
2038 		kfree(txr->tx_buf, M_DEVBUF);
2039 		txr->tx_buf = NULL;
2040 		return error;
2041 	}
2042 
2043 	/*
2044 	 * Create DMA maps for TX buffers
2045 	 */
2046 	for (i = 0; i < txr->tx_ndesc; ++i) {
2047 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
2048 
2049 		error = bus_dmamap_create(txr->tx_tag,
2050 		    BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &txbuf->map);
2051 		if (error) {
2052 			device_printf(txr->tx_sc->dev,
2053 			    "Unable to create TX DMA map\n");
2054 			ix_destroy_tx_ring(txr, i);
2055 			return error;
2056 		}
2057 	}
2058 
2059 	/*
2060 	 * Initialize various watermark
2061 	 */
2062 	txr->tx_wreg_nsegs = IX_DEF_TXWREG_NSEGS;
2063 	txr->tx_intr_nsegs = txr->tx_ndesc / 16;
2064 
2065 	return 0;
2066 }
2067 
2068 static void
2069 ix_destroy_tx_ring(struct ix_tx_ring *txr, int ndesc)
2070 {
2071 	int i;
2072 
2073 	if (txr->tx_hdr != NULL) {
2074 		bus_dmamap_unload(txr->tx_hdr_dtag, txr->tx_hdr_map);
2075 		bus_dmamem_free(txr->tx_hdr_dtag,
2076 		    __DEVOLATILE(void *, txr->tx_hdr), txr->tx_hdr_map);
2077 		bus_dma_tag_destroy(txr->tx_hdr_dtag);
2078 		txr->tx_hdr = NULL;
2079 	}
2080 
2081 	if (txr->tx_base != NULL) {
2082 		bus_dmamap_unload(txr->tx_base_dtag, txr->tx_base_map);
2083 		bus_dmamem_free(txr->tx_base_dtag, txr->tx_base,
2084 		    txr->tx_base_map);
2085 		bus_dma_tag_destroy(txr->tx_base_dtag);
2086 		txr->tx_base = NULL;
2087 	}
2088 
2089 	if (txr->tx_buf == NULL)
2090 		return;
2091 
2092 	for (i = 0; i < ndesc; ++i) {
2093 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
2094 
2095 		KKASSERT(txbuf->m_head == NULL);
2096 		bus_dmamap_destroy(txr->tx_tag, txbuf->map);
2097 	}
2098 	bus_dma_tag_destroy(txr->tx_tag);
2099 
2100 	kfree(txr->tx_buf, M_DEVBUF);
2101 	txr->tx_buf = NULL;
2102 }
2103 
2104 static void
2105 ix_init_tx_ring(struct ix_tx_ring *txr)
2106 {
2107 	/* Clear the old ring contents */
2108 	bzero(txr->tx_base, sizeof(union ixgbe_adv_tx_desc) * txr->tx_ndesc);
2109 
2110 	/* Clear TX head write-back buffer */
2111 	*(txr->tx_hdr) = 0;
2112 
2113 	/* Reset indices */
2114 	txr->tx_next_avail = 0;
2115 	txr->tx_next_clean = 0;
2116 	txr->tx_nsegs = 0;
2117 	txr->tx_nmbuf = 0;
2118 	txr->tx_running = 0;
2119 
2120 	/* Set number of descriptors available */
2121 	txr->tx_avail = txr->tx_ndesc;
2122 
2123 	/* Enable this TX ring */
2124 	txr->tx_flags |= IX_TXFLAG_ENABLED;
2125 }
2126 
2127 static void
2128 ix_init_tx_unit(struct ix_softc *sc)
2129 {
2130 	struct ixgbe_hw	*hw = &sc->hw;
2131 	int i;
2132 
2133 	/*
2134 	 * Setup the Base and Length of the Tx Descriptor Ring
2135 	 */
2136 	for (i = 0; i < sc->tx_ring_inuse; ++i) {
2137 		struct ix_tx_ring *txr = &sc->tx_rings[i];
2138 		uint64_t tdba = txr->tx_base_paddr;
2139 		uint64_t hdr_paddr = txr->tx_hdr_paddr;
2140 		uint32_t txctrl;
2141 
2142 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (uint32_t)tdba);
2143 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (uint32_t)(tdba >> 32));
2144 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
2145 		    txr->tx_ndesc * sizeof(union ixgbe_adv_tx_desc));
2146 
2147 		/* Setup the HW Tx Head and Tail descriptor pointers */
2148 		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
2149 		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
2150 
2151 		/* Disable TX head write-back relax ordering */
2152 		switch (hw->mac.type) {
2153 		case ixgbe_mac_82598EB:
2154 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
2155 			break;
2156 		default:
2157 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
2158 			break;
2159 		}
2160 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
2161 		switch (hw->mac.type) {
2162 		case ixgbe_mac_82598EB:
2163 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
2164 			break;
2165 		default:
2166 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
2167 			break;
2168 		}
2169 
2170 		/* Enable TX head write-back */
2171 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAH(i),
2172 		    (uint32_t)(hdr_paddr >> 32));
2173 		IXGBE_WRITE_REG(hw, IXGBE_TDWBAL(i),
2174 		    ((uint32_t)hdr_paddr) | IXGBE_TDWBAL_HEAD_WB_ENABLE);
2175 	}
2176 
2177 	if (hw->mac.type != ixgbe_mac_82598EB) {
2178 		uint32_t dmatxctl, rttdcs;
2179 
2180 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2181 		dmatxctl |= IXGBE_DMATXCTL_TE;
2182 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
2183 
2184 		/* Disable arbiter to set MTQC */
2185 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2186 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
2187 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2188 
2189 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2190 
2191 		/* Reenable aribter */
2192 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
2193 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2194 	}
2195 }
2196 
2197 static int
2198 ix_tx_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2199     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2200 {
2201 	struct ixgbe_adv_tx_context_desc *TXD;
2202 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2203 	int ehdrlen, ip_hlen = 0, ctxd;
2204 	boolean_t offload = TRUE;
2205 
2206 	/* First check if TSO is to be used */
2207 	if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
2208 		return ix_tso_ctx_setup(txr, mp,
2209 		    cmd_type_len, olinfo_status);
2210 	}
2211 
2212 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
2213 		offload = FALSE;
2214 
2215 	/* Indicate the whole packet as payload when not doing TSO */
2216 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
2217 
2218 	/*
2219 	 * In advanced descriptors the vlan tag must be placed into the
2220 	 * context descriptor.  Hence we need to make one even if not
2221 	 * doing checksum offloads.
2222 	 */
2223 	if (mp->m_flags & M_VLANTAG) {
2224 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2225 		    IXGBE_ADVTXD_VLAN_SHIFT;
2226 	} else if (!offload) {
2227 		/* No TX descriptor is consumed */
2228 		return 0;
2229 	}
2230 
2231 	/* Set the ether header length */
2232 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2233 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2234 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2235 
2236 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2237 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2238 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2239 		ip_hlen = mp->m_pkthdr.csum_iphlen;
2240 		KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2241 	}
2242 	vlan_macip_lens |= ip_hlen;
2243 
2244 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2245 	if (mp->m_pkthdr.csum_flags & CSUM_TCP)
2246 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2247 	else if (mp->m_pkthdr.csum_flags & CSUM_UDP)
2248 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
2249 
2250 	if (mp->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
2251 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2252 
2253 	/* Now ready a context descriptor */
2254 	ctxd = txr->tx_next_avail;
2255 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
2256 
2257 	/* Now copy bits into descriptor */
2258 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2259 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2260 	TXD->seqnum_seed = htole32(0);
2261 	TXD->mss_l4len_idx = htole32(0);
2262 
2263 	/* We've consumed the first desc, adjust counters */
2264 	if (++ctxd == txr->tx_ndesc)
2265 		ctxd = 0;
2266 	txr->tx_next_avail = ctxd;
2267 	--txr->tx_avail;
2268 
2269 	/* One TX descriptor is consumed */
2270 	return 1;
2271 }
2272 
2273 static int
2274 ix_tso_ctx_setup(struct ix_tx_ring *txr, const struct mbuf *mp,
2275     uint32_t *cmd_type_len, uint32_t *olinfo_status)
2276 {
2277 	struct ixgbe_adv_tx_context_desc *TXD;
2278 	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2279 	uint32_t mss_l4len_idx = 0, paylen;
2280 	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2281 
2282 	ehdrlen = mp->m_pkthdr.csum_lhlen;
2283 	KASSERT(ehdrlen > 0, ("invalid ether hlen"));
2284 
2285 	ip_hlen = mp->m_pkthdr.csum_iphlen;
2286 	KASSERT(ip_hlen > 0, ("invalid ip hlen"));
2287 
2288 	tcp_hlen = mp->m_pkthdr.csum_thlen;
2289 	KASSERT(tcp_hlen > 0, ("invalid tcp hlen"));
2290 
2291 	ctxd = txr->tx_next_avail;
2292 	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
2293 
2294 	if (mp->m_flags & M_VLANTAG) {
2295 		vlan_macip_lens |= htole16(mp->m_pkthdr.ether_vlantag) <<
2296 		    IXGBE_ADVTXD_VLAN_SHIFT;
2297 	}
2298 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
2299 	vlan_macip_lens |= ip_hlen;
2300 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
2301 
2302 	/* ADV DTYPE TUCMD */
2303 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
2304 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
2305 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
2306 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
2307 
2308 	/* MSS L4LEN IDX */
2309 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
2310 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
2311 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2312 
2313 	TXD->seqnum_seed = htole32(0);
2314 
2315 	if (++ctxd == txr->tx_ndesc)
2316 		ctxd = 0;
2317 
2318 	txr->tx_avail--;
2319 	txr->tx_next_avail = ctxd;
2320 
2321 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
2322 
2323 	/* This is used in the transmit desc in encap */
2324 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
2325 
2326 	*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
2327 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
2328 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
2329 
2330 	/* One TX descriptor is consumed */
2331 	return 1;
2332 }
2333 
2334 static void
2335 ix_txeof(struct ix_tx_ring *txr, int hdr)
2336 {
2337 	int first, avail;
2338 
2339 	if (txr->tx_avail == txr->tx_ndesc)
2340 		return;
2341 
2342 	first = txr->tx_next_clean;
2343 	if (first == hdr)
2344 		return;
2345 
2346 	avail = txr->tx_avail;
2347 	while (first != hdr) {
2348 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2349 
2350 		KKASSERT(avail < txr->tx_ndesc);
2351 		++avail;
2352 
2353 		if (txbuf->m_head != NULL)
2354 			ix_free_txbuf(txr, txbuf);
2355 		if (++first == txr->tx_ndesc)
2356 			first = 0;
2357 	}
2358 	txr->tx_next_clean = first;
2359 	txr->tx_avail = avail;
2360 
2361 	if (txr->tx_avail > IX_MAX_SCATTER + IX_TX_RESERVED) {
2362 		ifsq_clr_oactive(txr->tx_ifsq);
2363 		txr->tx_watchdog.wd_timer = 0;
2364 	}
2365 	txr->tx_running = IX_TX_RUNNING;
2366 }
2367 
2368 static void
2369 ix_txgc(struct ix_tx_ring *txr)
2370 {
2371 	int first, hdr;
2372 #ifdef INVARIANTS
2373 	int avail;
2374 #endif
2375 
2376 	if (txr->tx_avail == txr->tx_ndesc)
2377 		return;
2378 
2379 	hdr = IXGBE_READ_REG(&txr->tx_sc->hw, IXGBE_TDH(txr->tx_idx));
2380 	first = txr->tx_next_clean;
2381 	if (first == hdr)
2382 		goto done;
2383 	txr->tx_gc++;
2384 
2385 #ifdef INVARIANTS
2386 	avail = txr->tx_avail;
2387 #endif
2388 	while (first != hdr) {
2389 		struct ix_tx_buf *txbuf = &txr->tx_buf[first];
2390 
2391 #ifdef INVARIANTS
2392 		KKASSERT(avail < txr->tx_ndesc);
2393 		++avail;
2394 #endif
2395 		if (txbuf->m_head != NULL)
2396 			ix_free_txbuf(txr, txbuf);
2397 		if (++first == txr->tx_ndesc)
2398 			first = 0;
2399 	}
2400 done:
2401 	if (txr->tx_nmbuf)
2402 		txr->tx_running = IX_TX_RUNNING;
2403 }
2404 
2405 static int
2406 ix_create_rx_ring(struct ix_rx_ring *rxr)
2407 {
2408 	int i, rsize, error, nrxd;
2409 
2410 	/*
2411 	 * Validate number of receive descriptors.  It must not exceed
2412 	 * hardware maximum, and must be multiple of IX_DBA_ALIGN.
2413 	 */
2414 	nrxd = device_getenv_int(rxr->rx_sc->dev, "rxd", ix_rxd);
2415 	if (((nrxd * sizeof(union ixgbe_adv_rx_desc)) % IX_DBA_ALIGN) != 0 ||
2416 	    nrxd < IX_MIN_RXD || nrxd > IX_MAX_RXD) {
2417 		device_printf(rxr->rx_sc->dev,
2418 		    "Using %d RX descriptors instead of %d!\n",
2419 		    IX_DEF_RXD, nrxd);
2420 		rxr->rx_ndesc = IX_DEF_RXD;
2421 	} else {
2422 		rxr->rx_ndesc = nrxd;
2423 	}
2424 
2425 	/*
2426 	 * Allocate RX descriptor ring
2427 	 */
2428 	rsize = roundup2(rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc),
2429 	    IX_DBA_ALIGN);
2430 	rxr->rx_base = bus_dmamem_coherent_any(rxr->rx_sc->parent_tag,
2431 	    IX_DBA_ALIGN, rsize, BUS_DMA_WAITOK | BUS_DMA_ZERO,
2432 	    &rxr->rx_base_dtag, &rxr->rx_base_map, &rxr->rx_base_paddr);
2433 	if (rxr->rx_base == NULL) {
2434 		device_printf(rxr->rx_sc->dev,
2435 		    "Unable to allocate TX Descriptor memory\n");
2436 		return ENOMEM;
2437 	}
2438 
2439 	rsize = __VM_CACHELINE_ALIGN(sizeof(struct ix_rx_buf) * rxr->rx_ndesc);
2440 	rxr->rx_buf = kmalloc_cachealign(rsize, M_DEVBUF, M_WAITOK | M_ZERO);
2441 
2442 	/*
2443 	 * Create DMA tag for RX buffers
2444 	 */
2445 	error = bus_dma_tag_create(rxr->rx_sc->parent_tag,
2446 	    1, 0,		/* alignment, bounds */
2447 	    BUS_SPACE_MAXADDR,	/* lowaddr */
2448 	    BUS_SPACE_MAXADDR,	/* highaddr */
2449 	    NULL, NULL,		/* filter, filterarg */
2450 	    PAGE_SIZE,		/* maxsize */
2451 	    1,			/* nsegments */
2452 	    PAGE_SIZE,		/* maxsegsize */
2453 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, /* flags */
2454 	    &rxr->rx_tag);
2455 	if (error) {
2456 		device_printf(rxr->rx_sc->dev,
2457 		    "Unable to create RX DMA tag\n");
2458 		kfree(rxr->rx_buf, M_DEVBUF);
2459 		rxr->rx_buf = NULL;
2460 		return error;
2461 	}
2462 
2463 	/*
2464 	 * Create spare DMA map for RX buffers
2465 	 */
2466 	error = bus_dmamap_create(rxr->rx_tag, BUS_DMA_WAITOK,
2467 	    &rxr->rx_sparemap);
2468 	if (error) {
2469 		device_printf(rxr->rx_sc->dev,
2470 		    "Unable to create spare RX DMA map\n");
2471 		bus_dma_tag_destroy(rxr->rx_tag);
2472 		kfree(rxr->rx_buf, M_DEVBUF);
2473 		rxr->rx_buf = NULL;
2474 		return error;
2475 	}
2476 
2477 	/*
2478 	 * Create DMA maps for RX buffers
2479 	 */
2480 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2481 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2482 
2483 		error = bus_dmamap_create(rxr->rx_tag,
2484 		    BUS_DMA_WAITOK, &rxbuf->map);
2485 		if (error) {
2486 			device_printf(rxr->rx_sc->dev,
2487 			    "Unable to create RX dma map\n");
2488 			ix_destroy_rx_ring(rxr, i);
2489 			return error;
2490 		}
2491 	}
2492 
2493 	/*
2494 	 * Initialize various watermark
2495 	 */
2496 	rxr->rx_wreg_nsegs = IX_DEF_RXWREG_NSEGS;
2497 
2498 	return 0;
2499 }
2500 
2501 static void
2502 ix_destroy_rx_ring(struct ix_rx_ring *rxr, int ndesc)
2503 {
2504 	int i;
2505 
2506 	if (rxr->rx_base != NULL) {
2507 		bus_dmamap_unload(rxr->rx_base_dtag, rxr->rx_base_map);
2508 		bus_dmamem_free(rxr->rx_base_dtag, rxr->rx_base,
2509 		    rxr->rx_base_map);
2510 		bus_dma_tag_destroy(rxr->rx_base_dtag);
2511 		rxr->rx_base = NULL;
2512 	}
2513 
2514 	if (rxr->rx_buf == NULL)
2515 		return;
2516 
2517 	for (i = 0; i < ndesc; ++i) {
2518 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2519 
2520 		KKASSERT(rxbuf->m_head == NULL);
2521 		bus_dmamap_destroy(rxr->rx_tag, rxbuf->map);
2522 	}
2523 	bus_dmamap_destroy(rxr->rx_tag, rxr->rx_sparemap);
2524 	bus_dma_tag_destroy(rxr->rx_tag);
2525 
2526 	kfree(rxr->rx_buf, M_DEVBUF);
2527 	rxr->rx_buf = NULL;
2528 }
2529 
2530 /*
2531 ** Used to detect a descriptor that has
2532 ** been merged by Hardware RSC.
2533 */
2534 static __inline uint32_t
2535 ix_rsc_count(union ixgbe_adv_rx_desc *rx)
2536 {
2537 	return (le32toh(rx->wb.lower.lo_dword.data) &
2538 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
2539 }
2540 
2541 #if 0
2542 /*********************************************************************
2543  *
2544  *  Initialize Hardware RSC (LRO) feature on 82599
2545  *  for an RX ring, this is toggled by the LRO capability
2546  *  even though it is transparent to the stack.
2547  *
2548  *  NOTE: since this HW feature only works with IPV4 and
2549  *        our testing has shown soft LRO to be as effective
2550  *        I have decided to disable this by default.
2551  *
2552  **********************************************************************/
2553 static void
2554 ix_setup_hw_rsc(struct ix_rx_ring *rxr)
2555 {
2556 	struct	ix_softc 	*sc = rxr->rx_sc;
2557 	struct	ixgbe_hw	*hw = &sc->hw;
2558 	uint32_t			rscctrl, rdrxctl;
2559 
2560 #if 0
2561 	/* If turning LRO/RSC off we need to disable it */
2562 	if ((sc->arpcom.ac_if.if_capenable & IFCAP_LRO) == 0) {
2563 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2564 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
2565 		return;
2566 	}
2567 #endif
2568 
2569 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
2570 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
2571 	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
2572 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
2573 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
2574 
2575 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
2576 	rscctrl |= IXGBE_RSCCTL_RSCEN;
2577 	/*
2578 	** Limit the total number of descriptors that
2579 	** can be combined, so it does not exceed 64K
2580 	*/
2581 	if (rxr->mbuf_sz == MCLBYTES)
2582 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
2583 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
2584 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
2585 	else if (rxr->mbuf_sz == MJUM9BYTES)
2586 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
2587 	else  /* Using 16K cluster */
2588 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
2589 
2590 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
2591 
2592 	/* Enable TCP header recognition */
2593 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
2594 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
2595 	    IXGBE_PSRTYPE_TCPHDR));
2596 
2597 	/* Disable RSC for ACK packets */
2598 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
2599 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
2600 
2601 	rxr->hw_rsc = TRUE;
2602 }
2603 #endif
2604 
2605 static int
2606 ix_init_rx_ring(struct ix_rx_ring *rxr)
2607 {
2608 	int i;
2609 
2610 	/* Clear the ring contents */
2611 	bzero(rxr->rx_base, rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2612 
2613 	/* XXX we need JUMPAGESIZE for RSC too */
2614 	if (rxr->rx_sc->max_frame_size <= MCLBYTES)
2615 		rxr->rx_mbuf_sz = MCLBYTES;
2616 	else
2617 		rxr->rx_mbuf_sz = MJUMPAGESIZE;
2618 
2619 	/* Now replenish the mbufs */
2620 	for (i = 0; i < rxr->rx_ndesc; ++i) {
2621 		int error;
2622 
2623 		error = ix_newbuf(rxr, i, TRUE);
2624 		if (error)
2625 			return error;
2626 	}
2627 
2628 	/* Setup our descriptor indices */
2629 	rxr->rx_next_check = 0;
2630 	rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2631 
2632 #if 0
2633 	/*
2634 	** Now set up the LRO interface:
2635 	*/
2636 	if (ixgbe_rsc_enable)
2637 		ix_setup_hw_rsc(rxr);
2638 #endif
2639 
2640 	return 0;
2641 }
2642 
2643 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
2644 
2645 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
2646 
2647 static void
2648 ix_init_rx_unit(struct ix_softc *sc, boolean_t polling)
2649 {
2650 	struct ixgbe_hw	*hw = &sc->hw;
2651 	struct ifnet *ifp = &sc->arpcom.ac_if;
2652 	uint32_t bufsz, fctrl, rxcsum, hlreg;
2653 	int i;
2654 
2655 	/*
2656 	 * Make sure receives are disabled while setting up the descriptor ring
2657 	 */
2658 	ixgbe_disable_rx(hw);
2659 
2660 	/* Enable broadcasts */
2661 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
2662 	fctrl |= IXGBE_FCTRL_BAM;
2663 	if (hw->mac.type == ixgbe_mac_82598EB) {
2664 		fctrl |= IXGBE_FCTRL_DPF;
2665 		fctrl |= IXGBE_FCTRL_PMCF;
2666 	}
2667 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
2668 
2669 	/* Set for Jumbo Frames? */
2670 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
2671 	if (ifp->if_mtu > ETHERMTU)
2672 		hlreg |= IXGBE_HLREG0_JUMBOEN;
2673 	else
2674 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
2675 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
2676 
2677 	KKASSERT(sc->rx_rings[0].rx_mbuf_sz >= MCLBYTES);
2678 	bufsz = (sc->rx_rings[0].rx_mbuf_sz + BSIZEPKT_ROUNDUP) >>
2679 	    IXGBE_SRRCTL_BSIZEPKT_SHIFT;
2680 
2681 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
2682 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
2683 		uint64_t rdba = rxr->rx_base_paddr;
2684 		uint32_t srrctl;
2685 
2686 		/* Setup the Base and Length of the Rx Descriptor Ring */
2687 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i), (uint32_t)rdba);
2688 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (uint32_t)(rdba >> 32));
2689 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
2690 		    rxr->rx_ndesc * sizeof(union ixgbe_adv_rx_desc));
2691 
2692 		/*
2693 		 * Set up the SRRCTL register
2694 		 */
2695 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
2696 
2697 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
2698 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
2699 		srrctl |= bufsz;
2700 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
2701 		if (sc->rx_ring_inuse > 1) {
2702 			/* See the commend near ix_enable_rx_drop() */
2703 			if (sc->ifm_media &
2704 			    (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
2705 				srrctl &= ~IXGBE_SRRCTL_DROP_EN;
2706 				if (i == 0 && bootverbose) {
2707 					if_printf(ifp, "flow control %s, "
2708 					    "disable RX drop\n",
2709 					    ix_ifmedia2str(sc->ifm_media));
2710 				}
2711 			} else {
2712 				srrctl |= IXGBE_SRRCTL_DROP_EN;
2713 				if (i == 0 && bootverbose) {
2714 					if_printf(ifp, "flow control %s, "
2715 					    "enable RX drop\n",
2716 					    ix_ifmedia2str(sc->ifm_media));
2717 				}
2718 			}
2719 		}
2720 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
2721 
2722 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
2723 		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
2724 		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
2725 	}
2726 
2727 	if (sc->hw.mac.type != ixgbe_mac_82598EB)
2728 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), 0);
2729 
2730 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
2731 
2732 	/*
2733 	 * Setup RSS
2734 	 */
2735 	if (sc->rx_ring_inuse > 1) {
2736 		uint8_t key[IX_NRSSRK * IX_RSSRK_SIZE];
2737 		const struct if_ringmap *rm;
2738 		int j, r, nreta, table_nent;
2739 
2740 		/*
2741 		 * NOTE:
2742 		 * When we reach here, RSS has already been disabled
2743 		 * in ix_stop(), so we could safely configure RSS key
2744 		 * and redirect table.
2745 		 */
2746 
2747 		/*
2748 		 * Configure RSS key
2749 		 */
2750 		toeplitz_get_key(key, sizeof(key));
2751 		for (i = 0; i < IX_NRSSRK; ++i) {
2752 			uint32_t rssrk;
2753 
2754 			rssrk = IX_RSSRK_VAL(key, i);
2755 			IX_RSS_DPRINTF(sc, 1, "rssrk%d 0x%08x\n",
2756 			    i, rssrk);
2757 
2758 			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rssrk);
2759 		}
2760 
2761 		/*
2762 		 * Configure RSS redirect table.
2763 		 */
2764 
2765 		/* Table size will differ based on MAC */
2766 		switch (hw->mac.type) {
2767 		case ixgbe_mac_X550:
2768 		case ixgbe_mac_X550EM_x:
2769 		case ixgbe_mac_X550EM_a:
2770 			nreta = IX_NRETA_X550;
2771 			break;
2772 		default:
2773 			nreta = IX_NRETA;
2774 			break;
2775 		}
2776 
2777 		table_nent = nreta * IX_RETA_SIZE;
2778 		KASSERT(table_nent <= IX_RDRTABLE_SIZE,
2779 		    ("invalid RETA count %d", nreta));
2780 		if (polling)
2781 			rm = sc->rx_rmap;
2782 		else
2783 			rm = sc->rx_rmap_intr;
2784 		if_ringmap_rdrtable(rm, sc->rdr_table, table_nent);
2785 
2786 		r = 0;
2787 		for (j = 0; j < nreta; ++j) {
2788 			uint32_t reta = 0;
2789 
2790 			for (i = 0; i < IX_RETA_SIZE; ++i) {
2791 				uint32_t q;
2792 
2793 				q = sc->rdr_table[r];
2794 				KASSERT(q < sc->rx_ring_inuse,
2795 				    ("invalid RX ring index %d", q));
2796 				reta |= q << (8 * i);
2797 				++r;
2798 			}
2799 			IX_RSS_DPRINTF(sc, 1, "reta 0x%08x\n", reta);
2800 			if (j < IX_NRETA) {
2801 				IXGBE_WRITE_REG(hw, IXGBE_RETA(j), reta);
2802 			} else {
2803 				IXGBE_WRITE_REG(hw, IXGBE_ERETA(j - IX_NRETA),
2804 				    reta);
2805 			}
2806 		}
2807 
2808 		/*
2809 		 * Enable multiple receive queues.
2810 		 * Enable IPv4 RSS standard hash functions.
2811 		 */
2812 		IXGBE_WRITE_REG(hw, IXGBE_MRQC,
2813 		    IXGBE_MRQC_RSSEN |
2814 		    IXGBE_MRQC_RSS_FIELD_IPV4 |
2815 		    IXGBE_MRQC_RSS_FIELD_IPV4_TCP);
2816 
2817 		/*
2818 		 * NOTE:
2819 		 * PCSD must be enabled to enable multiple
2820 		 * receive queues.
2821 		 */
2822 		rxcsum |= IXGBE_RXCSUM_PCSD;
2823 	}
2824 
2825 	if (ifp->if_capenable & IFCAP_RXCSUM)
2826 		rxcsum |= IXGBE_RXCSUM_PCSD;
2827 
2828 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
2829 }
2830 
2831 static __inline void
2832 ix_rx_refresh(struct ix_rx_ring *rxr, int i)
2833 {
2834 	if (--i < 0)
2835 		i = rxr->rx_ndesc - 1;
2836 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, IXGBE_RDT(rxr->rx_idx), i);
2837 }
2838 
2839 static __inline void
2840 ix_rxcsum(uint32_t staterr, struct mbuf *mp, uint32_t ptype)
2841 {
2842 	if ((ptype &
2843 	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_IPV4_EX)) == 0) {
2844 		/* Not IPv4 */
2845 		return;
2846 	}
2847 
2848 	if ((staterr & (IXGBE_RXD_STAT_IPCS | IXGBE_RXDADV_ERR_IPE)) ==
2849 	    IXGBE_RXD_STAT_IPCS)
2850 		mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID;
2851 
2852 	if ((ptype &
2853 	     (IXGBE_RXDADV_PKTTYPE_TCP | IXGBE_RXDADV_PKTTYPE_UDP)) == 0) {
2854 		/*
2855 		 * - Neither TCP nor UDP
2856 		 * - IPv4 fragment
2857 		 */
2858 		return;
2859 	}
2860 
2861 	if ((staterr & (IXGBE_RXD_STAT_L4CS | IXGBE_RXDADV_ERR_TCPE)) ==
2862 	    IXGBE_RXD_STAT_L4CS) {
2863 		mp->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
2864 		    CSUM_FRAG_NOT_CHECKED;
2865 		mp->m_pkthdr.csum_data = htons(0xffff);
2866 	}
2867 }
2868 
2869 static __inline struct pktinfo *
2870 ix_rssinfo(struct mbuf *m, struct pktinfo *pi,
2871     uint32_t hash, uint32_t hashtype, uint32_t ptype)
2872 {
2873 	switch (hashtype) {
2874 	case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2875 		pi->pi_netisr = NETISR_IP;
2876 		pi->pi_flags = 0;
2877 		pi->pi_l3proto = IPPROTO_TCP;
2878 		break;
2879 
2880 	case IXGBE_RXDADV_RSSTYPE_IPV4:
2881 		if ((ptype & IXGBE_RXDADV_PKTTYPE_UDP) == 0) {
2882 			/* Not UDP or is fragment */
2883 			return NULL;
2884 		}
2885 		pi->pi_netisr = NETISR_IP;
2886 		pi->pi_flags = 0;
2887 		pi->pi_l3proto = IPPROTO_UDP;
2888 		break;
2889 
2890 	default:
2891 		return NULL;
2892 	}
2893 
2894 	m_sethash(m, toeplitz_hash(hash));
2895 	return pi;
2896 }
2897 
2898 static __inline void
2899 ix_setup_rxdesc(union ixgbe_adv_rx_desc *rxd, const struct ix_rx_buf *rxbuf)
2900 {
2901 	rxd->read.pkt_addr = htole64(rxbuf->paddr);
2902 	rxd->wb.upper.status_error = 0;
2903 }
2904 
2905 static void
2906 ix_rx_discard(struct ix_rx_ring *rxr, int i, boolean_t eop)
2907 {
2908 	struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
2909 
2910 	/*
2911 	 * XXX discard may not be correct
2912 	 */
2913 	if (eop) {
2914 		IFNET_STAT_INC(&rxr->rx_sc->arpcom.ac_if, ierrors, 1);
2915 		rxr->rx_flags &= ~IX_RXRING_FLAG_DISC;
2916 	} else {
2917 		rxr->rx_flags |= IX_RXRING_FLAG_DISC;
2918 	}
2919 	if (rxbuf->fmp != NULL) {
2920 		m_freem(rxbuf->fmp);
2921 		rxbuf->fmp = NULL;
2922 		rxbuf->lmp = NULL;
2923 	}
2924 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
2925 }
2926 
2927 static void
2928 ix_rxeof(struct ix_rx_ring *rxr, int count)
2929 {
2930 	struct ifnet *ifp = &rxr->rx_sc->arpcom.ac_if;
2931 	int i, nsegs = 0, cpuid = mycpuid;
2932 
2933 	i = rxr->rx_next_check;
2934 	while (count != 0) {
2935 		struct ix_rx_buf *rxbuf, *nbuf = NULL;
2936 		union ixgbe_adv_rx_desc	*cur;
2937 		struct mbuf *sendmp = NULL, *mp;
2938 		struct pktinfo *pi = NULL, pi0;
2939 		uint32_t rsc = 0, ptype, staterr, hash, hashtype;
2940 		uint16_t len;
2941 		boolean_t eop;
2942 
2943 		cur = &rxr->rx_base[i];
2944 		staterr = le32toh(cur->wb.upper.status_error);
2945 
2946 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
2947 			break;
2948 		++nsegs;
2949 
2950 		rxbuf = &rxr->rx_buf[i];
2951 		mp = rxbuf->m_head;
2952 
2953 		len = le16toh(cur->wb.upper.length);
2954 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
2955 		    IXGBE_RXDADV_PKTTYPE_MASK;
2956 		hash = le32toh(cur->wb.lower.hi_dword.rss);
2957 		hashtype = le32toh(cur->wb.lower.lo_dword.data) &
2958 		    IXGBE_RXDADV_RSSTYPE_MASK;
2959 
2960 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
2961 		if (eop)
2962 			--count;
2963 
2964 		/*
2965 		 * Make sure bad packets are discarded
2966 		 */
2967 		if ((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
2968 		    (rxr->rx_flags & IX_RXRING_FLAG_DISC)) {
2969 			ix_rx_discard(rxr, i, eop);
2970 			goto next_desc;
2971 		}
2972 
2973 		bus_dmamap_sync(rxr->rx_tag, rxbuf->map, BUS_DMASYNC_POSTREAD);
2974 		if (ix_newbuf(rxr, i, FALSE) != 0) {
2975 			ix_rx_discard(rxr, i, eop);
2976 			goto next_desc;
2977 		}
2978 
2979 		/*
2980 		 * On 82599 which supports a hardware LRO, packets
2981 		 * need not be fragmented across sequential descriptors,
2982 		 * rather the next descriptor is indicated in bits
2983 		 * of the descriptor.  This also means that we might
2984 		 * proceses more than one packet at a time, something
2985 		 * that has never been true before, it required
2986 		 * eliminating global chain pointers in favor of what
2987 		 * we are doing here.
2988 		 */
2989 		if (!eop) {
2990 			int nextp;
2991 
2992 			/*
2993 			 * Figure out the next descriptor
2994 			 * of this frame.
2995 			 */
2996 			if (rxr->rx_flags & IX_RXRING_FLAG_LRO)
2997 				rsc = ix_rsc_count(cur);
2998 			if (rsc) { /* Get hardware index */
2999 				nextp = ((staterr &
3000 				    IXGBE_RXDADV_NEXTP_MASK) >>
3001 				    IXGBE_RXDADV_NEXTP_SHIFT);
3002 			} else { /* Just sequential */
3003 				nextp = i + 1;
3004 				if (nextp == rxr->rx_ndesc)
3005 					nextp = 0;
3006 			}
3007 			nbuf = &rxr->rx_buf[nextp];
3008 			prefetch(nbuf);
3009 		}
3010 		mp->m_len = len;
3011 
3012 		/*
3013 		 * Rather than using the fmp/lmp global pointers
3014 		 * we now keep the head of a packet chain in the
3015 		 * buffer struct and pass this along from one
3016 		 * descriptor to the next, until we get EOP.
3017 		 */
3018 		if (rxbuf->fmp == NULL) {
3019 			mp->m_pkthdr.len = len;
3020 			rxbuf->fmp = mp;
3021 			rxbuf->lmp = mp;
3022 		} else {
3023 			rxbuf->fmp->m_pkthdr.len += len;
3024 			rxbuf->lmp->m_next = mp;
3025 			rxbuf->lmp = mp;
3026 		}
3027 
3028 		if (nbuf != NULL) {
3029 			/*
3030 			 * Not the last fragment of this frame,
3031 			 * pass this fragment list on
3032 			 */
3033 			nbuf->fmp = rxbuf->fmp;
3034 			nbuf->lmp = rxbuf->lmp;
3035 		} else {
3036 			/*
3037 			 * Send this frame
3038 			 */
3039 			sendmp = rxbuf->fmp;
3040 
3041 			sendmp->m_pkthdr.rcvif = ifp;
3042 			IFNET_STAT_INC(ifp, ipackets, 1);
3043 #ifdef IX_RSS_DEBUG
3044 			rxr->rx_pkts++;
3045 #endif
3046 
3047 			/* Process vlan info */
3048 			if (staterr & IXGBE_RXD_STAT_VP) {
3049 				sendmp->m_pkthdr.ether_vlantag =
3050 				    le16toh(cur->wb.upper.vlan);
3051 				sendmp->m_flags |= M_VLANTAG;
3052 			}
3053 			if (ifp->if_capenable & IFCAP_RXCSUM)
3054 				ix_rxcsum(staterr, sendmp, ptype);
3055 			if (ifp->if_capenable & IFCAP_RSS) {
3056 				pi = ix_rssinfo(sendmp, &pi0,
3057 				    hash, hashtype, ptype);
3058 			}
3059 		}
3060 		rxbuf->fmp = NULL;
3061 		rxbuf->lmp = NULL;
3062 next_desc:
3063 		/* Advance our pointers to the next descriptor. */
3064 		if (++i == rxr->rx_ndesc)
3065 			i = 0;
3066 
3067 		if (sendmp != NULL)
3068 			ifp->if_input(ifp, sendmp, pi, cpuid);
3069 
3070 		if (nsegs >= rxr->rx_wreg_nsegs) {
3071 			ix_rx_refresh(rxr, i);
3072 			nsegs = 0;
3073 		}
3074 	}
3075 	rxr->rx_next_check = i;
3076 
3077 	if (nsegs > 0)
3078 		ix_rx_refresh(rxr, i);
3079 }
3080 
3081 static void
3082 ix_set_vlan(struct ix_softc *sc)
3083 {
3084 	struct ixgbe_hw *hw = &sc->hw;
3085 	uint32_t ctrl;
3086 
3087 	if (hw->mac.type == ixgbe_mac_82598EB) {
3088 		ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3089 		ctrl |= IXGBE_VLNCTRL_VME;
3090 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
3091 	} else {
3092 		int i;
3093 
3094 		/*
3095 		 * On 82599 and later chips the VLAN enable is
3096 		 * per queue in RXDCTL
3097 		 */
3098 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
3099 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
3100 			ctrl |= IXGBE_RXDCTL_VME;
3101 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
3102 		}
3103 	}
3104 }
3105 
3106 static void
3107 ix_enable_intr(struct ix_softc *sc)
3108 {
3109 	struct ixgbe_hw	*hw = &sc->hw;
3110 	uint32_t fwsm;
3111 	int i;
3112 
3113 	for (i = 0; i < sc->intr_cnt; ++i)
3114 		lwkt_serialize_handler_enable(sc->intr_data[i].intr_serialize);
3115 
3116 	sc->intr_mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
3117 
3118 	switch (hw->mac.type) {
3119 	case ixgbe_mac_82599EB:
3120 		sc->intr_mask |= IXGBE_EIMS_ECC;
3121 		/* Temperature sensor on some adapters */
3122 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP0;
3123 		/* SFP+ (RX_LOS_N & MOD_ABS_N) */
3124 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
3125 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP2;
3126 		break;
3127 
3128 	case ixgbe_mac_X540:
3129 		sc->intr_mask |= IXGBE_EIMS_ECC;
3130 		/* Detect if Thermal Sensor is enabled */
3131 		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
3132 		if (fwsm & IXGBE_FWSM_TS_ENABLED)
3133 			sc->intr_mask |= IXGBE_EIMS_TS;
3134 		break;
3135 
3136 	case ixgbe_mac_X550:
3137 		sc->intr_mask |= IXGBE_EIMS_ECC;
3138 		/* MAC thermal sensor is automatically enabled */
3139 		sc->intr_mask |= IXGBE_EIMS_TS;
3140 		break;
3141 
3142 	case ixgbe_mac_X550EM_a:
3143 	case ixgbe_mac_X550EM_x:
3144 		sc->intr_mask |= IXGBE_EIMS_ECC;
3145 		/* Some devices use SDP0 for important information */
3146 		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
3147 		    hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP ||
3148 		    hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP_N ||
3149 		    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
3150 			sc->intr_mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw);
3151 		if (hw->phy.type == ixgbe_phy_x550em_ext_t)
3152 			sc->intr_mask |= IXGBE_EICR_GPI_SDP0_X540;
3153 		break;
3154 
3155 	default:
3156 		break;
3157 	}
3158 
3159 	/* Enable Fan Failure detection */
3160 	if (sc->caps & IX_CAP_DETECT_FANFAIL)
3161 		sc->intr_mask |= IXGBE_EIMS_GPI_SDP1;
3162 
3163 	/* With MSI-X we use auto clear for RX and TX rings */
3164 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3165 		/*
3166 		 * There are no EIAC1/EIAC2 for newer chips; the related
3167 		 * bits for TX and RX rings > 16 are always auto clear.
3168 		 *
3169 		 * XXX which bits?  There are _no_ documented EICR1 and
3170 		 * EICR2 at all; only EICR.
3171 		 */
3172 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, IXGBE_EIMS_RTX_QUEUE);
3173 	} else {
3174 		sc->intr_mask |= IX_TX_INTR_MASK | IX_RX0_INTR_MASK;
3175 
3176 		KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3177 		if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS)
3178 			sc->intr_mask |= IX_RX1_INTR_MASK;
3179 	}
3180 
3181 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, sc->intr_mask);
3182 
3183 	/*
3184 	 * Enable RX and TX rings for MSI-X
3185 	 */
3186 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3187 		for (i = 0; i < sc->tx_ring_inuse; ++i) {
3188 			const struct ix_tx_ring *txr = &sc->tx_rings[i];
3189 
3190 			if (txr->tx_intr_vec >= 0) {
3191 				IXGBE_WRITE_REG(hw, txr->tx_eims,
3192 				    txr->tx_eims_val);
3193 			}
3194 		}
3195 		for (i = 0; i < sc->rx_ring_inuse; ++i) {
3196 			const struct ix_rx_ring *rxr = &sc->rx_rings[i];
3197 
3198 			KKASSERT(rxr->rx_intr_vec >= 0);
3199 			IXGBE_WRITE_REG(hw, rxr->rx_eims, rxr->rx_eims_val);
3200 		}
3201 	}
3202 
3203 	IXGBE_WRITE_FLUSH(hw);
3204 }
3205 
3206 static void
3207 ix_disable_intr(struct ix_softc *sc)
3208 {
3209 	int i;
3210 
3211 	if (sc->intr_type == PCI_INTR_TYPE_MSIX)
3212 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIAC, 0);
3213 
3214 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
3215 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, ~0);
3216 	} else {
3217 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC, 0xFFFF0000);
3218 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(0), ~0);
3219 		IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMC_EX(1), ~0);
3220 	}
3221 	IXGBE_WRITE_FLUSH(&sc->hw);
3222 
3223 	for (i = 0; i < sc->intr_cnt; ++i)
3224 		lwkt_serialize_handler_disable(sc->intr_data[i].intr_serialize);
3225 }
3226 
3227 static void
3228 ix_slot_info(struct ix_softc *sc)
3229 {
3230 	device_t dev = sc->dev;
3231 	struct ixgbe_hw *hw = &sc->hw;
3232 	uint32_t offset;
3233 	uint16_t link;
3234 	boolean_t bus_info_valid = TRUE;
3235 
3236 	/* Some devices are behind an internal bridge */
3237 	switch (hw->device_id) {
3238 	case IXGBE_DEV_ID_82599_SFP_SF_QP:
3239 	case IXGBE_DEV_ID_82599_QSFP_SF_QP:
3240 		goto get_parent_info;
3241 	default:
3242 		break;
3243 	}
3244 
3245 	ixgbe_get_bus_info(hw);
3246 
3247 	/*
3248 	 * Some devices don't use PCI-E, but there is no need
3249 	 * to display "Unknown" for bus speed and width.
3250 	 */
3251 	switch (hw->mac.type) {
3252 	case ixgbe_mac_X550EM_x:
3253 	case ixgbe_mac_X550EM_a:
3254 		return;
3255 	default:
3256 		goto display;
3257 	}
3258 
3259 get_parent_info:
3260 	/*
3261 	 * For the Quad port adapter we need to parse back up
3262 	 * the PCI tree to find the speed of the expansion slot
3263 	 * into which this adapter is plugged.  A bit more work.
3264 	 */
3265 	dev = device_get_parent(device_get_parent(dev));
3266 #ifdef IXGBE_DEBUG
3267 	device_printf(dev, "parent pcib = %x,%x,%x\n", pci_get_bus(dev),
3268 	    pci_get_slot(dev), pci_get_function(dev));
3269 #endif
3270 	dev = device_get_parent(device_get_parent(dev));
3271 #ifdef IXGBE_DEBUG
3272 	device_printf(dev, "slot pcib = %x,%x,%x\n", pci_get_bus(dev),
3273 	    pci_get_slot(dev), pci_get_function(dev));
3274 #endif
3275 	/* Now get the PCI Express Capabilities offset */
3276 	offset = pci_get_pciecap_ptr(dev);
3277 	if (offset == 0) {
3278 		/*
3279 		 * Hmm...can't get PCI-Express capabilities.
3280 		 * Falling back to default method.
3281 		 */
3282 		bus_info_valid = FALSE;
3283 		ixgbe_get_bus_info(hw);
3284 		goto display;
3285 	}
3286 	/* ...and read the Link Status Register */
3287 	link = pci_read_config(dev, offset + PCIER_LINKSTAT, 2);
3288 	ixgbe_set_pci_config_data_generic(hw, link);
3289 
3290 display:
3291 	device_printf(dev, "PCI Express Bus: Speed %s %s\n",
3292 	    hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
3293 	    hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
3294 	    hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : "Unknown",
3295 	    hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
3296 	    hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
3297 	    hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : "Unknown");
3298 
3299 	if (bus_info_valid) {
3300 		if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP &&
3301 		    hw->bus.width <= ixgbe_bus_width_pcie_x4 &&
3302 		    hw->bus.speed == ixgbe_bus_speed_2500) {
3303 			device_printf(dev, "PCI-Express bandwidth available "
3304 			    "for this card is not sufficient for optimal "
3305 			    "performance.\n");
3306 			device_printf(dev, "For optimal performance a "
3307 			    "x8 PCIE, or x4 PCIE Gen2 slot is required.\n");
3308 		}
3309 		if (hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP &&
3310 		    hw->bus.width <= ixgbe_bus_width_pcie_x8 &&
3311 		    hw->bus.speed < ixgbe_bus_speed_8000) {
3312 			device_printf(dev, "PCI-Express bandwidth available "
3313 			    "for this card is not sufficient for optimal "
3314 			    "performance.\n");
3315 			device_printf(dev, "For optimal performance a "
3316 			    "x8 PCIE Gen3 slot is required.\n");
3317 		}
3318 	} else {
3319 		device_printf(dev, "Unable to determine slot speed/width.  "
3320 		    "The speed/width reported are that of the internal "
3321 		    "switch.\n");
3322 	}
3323 }
3324 
3325 /*
3326  * TODO comment is incorrect
3327  *
3328  * Setup the correct IVAR register for a particular MSIX interrupt
3329  * - entry is the register array entry
3330  * - vector is the MSIX vector for this queue
3331  * - type is RX/TX/MISC
3332  */
3333 static void
3334 ix_set_ivar(struct ix_softc *sc, uint8_t entry, uint8_t vector,
3335     int8_t type)
3336 {
3337 	struct ixgbe_hw *hw = &sc->hw;
3338 	uint32_t ivar, index;
3339 
3340 	vector |= IXGBE_IVAR_ALLOC_VAL;
3341 
3342 	switch (hw->mac.type) {
3343 	case ixgbe_mac_82598EB:
3344 		if (type == -1)
3345 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
3346 		else
3347 			entry += (type * 64);
3348 		index = (entry >> 2) & 0x1F;
3349 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
3350 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
3351 		ivar |= (vector << (8 * (entry & 0x3)));
3352 		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
3353 		break;
3354 
3355 	case ixgbe_mac_82599EB:
3356 	case ixgbe_mac_X540:
3357 	case ixgbe_mac_X550:
3358 	case ixgbe_mac_X550EM_a:
3359 	case ixgbe_mac_X550EM_x:
3360 		if (type == -1) { /* MISC IVAR */
3361 			index = (entry & 1) * 8;
3362 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
3363 			ivar &= ~(0xFF << index);
3364 			ivar |= (vector << index);
3365 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
3366 		} else {	/* RX/TX IVARS */
3367 			index = (16 * (entry & 1)) + (8 * type);
3368 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
3369 			ivar &= ~(0xFF << index);
3370 			ivar |= (vector << index);
3371 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
3372 		}
3373 		/* FALL THROUGH */
3374 	default:
3375 		break;
3376 	}
3377 }
3378 
3379 static boolean_t
3380 ix_sfp_probe(struct ix_softc *sc)
3381 {
3382 	struct ixgbe_hw	*hw = &sc->hw;
3383 
3384 	if (hw->phy.type == ixgbe_phy_nl &&
3385 	    hw->phy.sfp_type == ixgbe_sfp_type_not_present) {
3386 		int32_t ret;
3387 
3388 		ret = hw->phy.ops.identify_sfp(hw);
3389 		if (ret)
3390 			return FALSE;
3391 
3392 		ret = hw->phy.ops.reset(hw);
3393 		sc->sfp_probe = FALSE;
3394 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3395 			if_printf(&sc->arpcom.ac_if,
3396 			     "Unsupported SFP+ module detected!  "
3397 			     "Reload driver with supported module.\n");
3398 			return FALSE;
3399 		}
3400 		if_printf(&sc->arpcom.ac_if, "SFP+ module detected!\n");
3401 
3402 		/* We now have supported optics */
3403 		return TRUE;
3404 	}
3405 	return FALSE;
3406 }
3407 
3408 static void
3409 ix_handle_link(struct ix_softc *sc)
3410 {
3411 	ixgbe_check_link(&sc->hw, &sc->link_speed, &sc->link_up, 0);
3412 	ix_update_link_status(sc);
3413 }
3414 
3415 /*
3416  * Handling SFP module
3417  */
3418 static void
3419 ix_handle_mod(struct ix_softc *sc)
3420 {
3421 	struct ixgbe_hw *hw = &sc->hw;
3422 	uint32_t err;
3423 
3424 	if (sc->hw.need_crosstalk_fix) {
3425 		uint32_t cage_full = 0;
3426 
3427 		switch (hw->mac.type) {
3428 		case ixgbe_mac_82599EB:
3429 			cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) &
3430 			    IXGBE_ESDP_SDP2;
3431 			break;
3432 
3433 		case ixgbe_mac_X550EM_x:
3434 		case ixgbe_mac_X550EM_a:
3435 			cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) &
3436 			    IXGBE_ESDP_SDP0;
3437 			break;
3438 
3439 		default:
3440 			break;
3441 		}
3442 
3443 		if (!cage_full)
3444 			return;
3445 	}
3446 
3447 	err = hw->phy.ops.identify_sfp(hw);
3448 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3449 		if_printf(&sc->arpcom.ac_if,
3450 		    "Unsupported SFP+ module type was detected.\n");
3451 		return;
3452 	}
3453 
3454 	if (hw->mac.type == ixgbe_mac_82598EB)
3455 		err = hw->phy.ops.reset(hw);
3456 	else
3457 		err = hw->mac.ops.setup_sfp(hw);
3458 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
3459 		if_printf(&sc->arpcom.ac_if,
3460 		    "Setup failure - unsupported SFP+ module type.\n");
3461 		return;
3462 	}
3463 	ix_handle_msf(sc);
3464 }
3465 
3466 /*
3467  * Handling MSF (multispeed fiber)
3468  */
3469 static void
3470 ix_handle_msf(struct ix_softc *sc)
3471 {
3472 	struct ixgbe_hw *hw = &sc->hw;
3473 	uint32_t autoneg;
3474 
3475 	sc->phy_layer = ixgbe_get_supported_physical_layer(hw);
3476 	ix_init_media(sc);
3477 
3478 	if (sc->advspeed != IXGBE_LINK_SPEED_UNKNOWN)
3479 		autoneg = sc->advspeed;
3480 	else
3481 		autoneg = hw->phy.autoneg_advertised;
3482 	if (!autoneg && hw->mac.ops.get_link_capabilities != NULL) {
3483 		bool negotiate;
3484 
3485 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
3486 	}
3487 	if (hw->mac.ops.setup_link != NULL)
3488 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
3489 }
3490 
3491 static void
3492 ix_handle_phy(struct ix_softc *sc)
3493 {
3494 	struct ixgbe_hw *hw = &sc->hw;
3495 	int error;
3496 
3497 	error = hw->phy.ops.handle_lasi(hw);
3498 	if (error == IXGBE_ERR_OVERTEMP) {
3499 		if_printf(&sc->arpcom.ac_if,
3500 		    "CRITICAL: EXTERNAL PHY OVER TEMP!!  "
3501 		    "PHY will downshift to lower power state!\n");
3502 	} else if (error) {
3503 		if_printf(&sc->arpcom.ac_if,
3504 		    "Error handling LASI interrupt: %d\n", error);
3505 	}
3506 }
3507 
3508 static void
3509 ix_update_stats(struct ix_softc *sc)
3510 {
3511 	struct ifnet *ifp = &sc->arpcom.ac_if;
3512 	struct ixgbe_hw *hw = &sc->hw;
3513 	struct ixgbe_hw_stats *stats = &sc->stats;
3514 	uint32_t missed_rx = 0, bprc, lxon, lxoff, total;
3515 	uint64_t total_missed_rx = 0;
3516 	int i;
3517 
3518 	stats->crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
3519 	stats->illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
3520 	stats->errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
3521 	stats->mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
3522 	stats->mpc[0] += IXGBE_READ_REG(hw, IXGBE_MPC(0));
3523 
3524 	for (i = 0; i < 16; i++) {
3525 		stats->qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
3526 		stats->qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
3527 		stats->qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
3528 	}
3529 	stats->mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
3530 	stats->mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
3531 	stats->rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
3532 
3533 	/* Hardware workaround, gprc counts missed packets */
3534 	stats->gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
3535 	stats->gprc -= missed_rx;
3536 
3537 	if (hw->mac.type != ixgbe_mac_82598EB) {
3538 		stats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
3539 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
3540 		stats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
3541 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
3542 		stats->tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
3543 		    ((uint64_t)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
3544 		stats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
3545 		stats->lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
3546 	} else {
3547 		stats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
3548 		stats->lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
3549 		/* 82598 only has a counter in the high register */
3550 		stats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
3551 		stats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
3552 		stats->tor += IXGBE_READ_REG(hw, IXGBE_TORH);
3553 	}
3554 
3555 	/*
3556 	 * Workaround: mprc hardware is incorrectly counting
3557 	 * broadcasts, so for now we subtract those.
3558 	 */
3559 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
3560 	stats->bprc += bprc;
3561 	stats->mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
3562 	if (hw->mac.type == ixgbe_mac_82598EB)
3563 		stats->mprc -= bprc;
3564 
3565 	stats->prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
3566 	stats->prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
3567 	stats->prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
3568 	stats->prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
3569 	stats->prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
3570 	stats->prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
3571 
3572 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
3573 	stats->lxontxc += lxon;
3574 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
3575 	stats->lxofftxc += lxoff;
3576 	total = lxon + lxoff;
3577 
3578 	stats->gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
3579 	stats->mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
3580 	stats->ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
3581 	stats->gptc -= total;
3582 	stats->mptc -= total;
3583 	stats->ptc64 -= total;
3584 	stats->gotc -= total * ETHER_MIN_LEN;
3585 
3586 	stats->ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
3587 	stats->rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
3588 	stats->roc += IXGBE_READ_REG(hw, IXGBE_ROC);
3589 	stats->rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
3590 	stats->mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
3591 	stats->mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
3592 	stats->mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
3593 	stats->tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
3594 	stats->tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
3595 	stats->ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
3596 	stats->ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
3597 	stats->ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
3598 	stats->ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
3599 	stats->ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
3600 	stats->bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
3601 	stats->xec += IXGBE_READ_REG(hw, IXGBE_XEC);
3602 	stats->fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
3603 	stats->fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
3604 	/* Only read FCOE on 82599 */
3605 	if (hw->mac.type != ixgbe_mac_82598EB) {
3606 		stats->fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
3607 		stats->fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
3608 		stats->fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
3609 		stats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
3610 		stats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
3611 	}
3612 
3613 	/* Rx Errors */
3614 	IFNET_STAT_SET(ifp, iqdrops, total_missed_rx);
3615 	IFNET_STAT_SET(ifp, ierrors, sc->stats.crcerrs + sc->stats.rlec);
3616 }
3617 
3618 #if 0
3619 /*
3620  * Add sysctl variables, one per statistic, to the system.
3621  */
3622 static void
3623 ix_add_hw_stats(struct ix_softc *sc)
3624 {
3625 
3626 	device_t dev = sc->dev;
3627 
3628 	struct ix_tx_ring *txr = sc->tx_rings;
3629 	struct ix_rx_ring *rxr = sc->rx_rings;
3630 
3631 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
3632 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
3633 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
3634 	struct ixgbe_hw_stats *stats = &sc->stats;
3635 
3636 	struct sysctl_oid *stat_node, *queue_node;
3637 	struct sysctl_oid_list *stat_list, *queue_list;
3638 
3639 #define QUEUE_NAME_LEN 32
3640 	char namebuf[QUEUE_NAME_LEN];
3641 
3642 	/* MAC stats get the own sub node */
3643 
3644 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
3645 				    CTLFLAG_RD, NULL, "MAC Statistics");
3646 	stat_list = SYSCTL_CHILDREN(stat_node);
3647 
3648 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
3649 			CTLFLAG_RD, &stats->crcerrs,
3650 			"CRC Errors");
3651 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
3652 			CTLFLAG_RD, &stats->illerrc,
3653 			"Illegal Byte Errors");
3654 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
3655 			CTLFLAG_RD, &stats->errbc,
3656 			"Byte Errors");
3657 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
3658 			CTLFLAG_RD, &stats->mspdc,
3659 			"MAC Short Packets Discarded");
3660 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
3661 			CTLFLAG_RD, &stats->mlfc,
3662 			"MAC Local Faults");
3663 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
3664 			CTLFLAG_RD, &stats->mrfc,
3665 			"MAC Remote Faults");
3666 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
3667 			CTLFLAG_RD, &stats->rlec,
3668 			"Receive Length Errors");
3669 
3670 	/* Flow Control stats */
3671 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
3672 			CTLFLAG_RD, &stats->lxontxc,
3673 			"Link XON Transmitted");
3674 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
3675 			CTLFLAG_RD, &stats->lxonrxc,
3676 			"Link XON Received");
3677 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
3678 			CTLFLAG_RD, &stats->lxofftxc,
3679 			"Link XOFF Transmitted");
3680 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
3681 			CTLFLAG_RD, &stats->lxoffrxc,
3682 			"Link XOFF Received");
3683 
3684 	/* Packet Reception Stats */
3685 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
3686 			CTLFLAG_RD, &stats->tor,
3687 			"Total Octets Received");
3688 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
3689 			CTLFLAG_RD, &stats->gorc,
3690 			"Good Octets Received");
3691 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
3692 			CTLFLAG_RD, &stats->tpr,
3693 			"Total Packets Received");
3694 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
3695 			CTLFLAG_RD, &stats->gprc,
3696 			"Good Packets Received");
3697 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
3698 			CTLFLAG_RD, &stats->mprc,
3699 			"Multicast Packets Received");
3700 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
3701 			CTLFLAG_RD, &stats->bprc,
3702 			"Broadcast Packets Received");
3703 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
3704 			CTLFLAG_RD, &stats->prc64,
3705 			"64 byte frames received ");
3706 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
3707 			CTLFLAG_RD, &stats->prc127,
3708 			"65-127 byte frames received");
3709 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
3710 			CTLFLAG_RD, &stats->prc255,
3711 			"128-255 byte frames received");
3712 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
3713 			CTLFLAG_RD, &stats->prc511,
3714 			"256-511 byte frames received");
3715 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
3716 			CTLFLAG_RD, &stats->prc1023,
3717 			"512-1023 byte frames received");
3718 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
3719 			CTLFLAG_RD, &stats->prc1522,
3720 			"1023-1522 byte frames received");
3721 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
3722 			CTLFLAG_RD, &stats->ruc,
3723 			"Receive Undersized");
3724 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
3725 			CTLFLAG_RD, &stats->rfc,
3726 			"Fragmented Packets Received ");
3727 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
3728 			CTLFLAG_RD, &stats->roc,
3729 			"Oversized Packets Received");
3730 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
3731 			CTLFLAG_RD, &stats->rjc,
3732 			"Received Jabber");
3733 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
3734 			CTLFLAG_RD, &stats->mngprc,
3735 			"Management Packets Received");
3736 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
3737 			CTLFLAG_RD, &stats->mngptc,
3738 			"Management Packets Dropped");
3739 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
3740 			CTLFLAG_RD, &stats->xec,
3741 			"Checksum Errors");
3742 
3743 	/* Packet Transmission Stats */
3744 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
3745 			CTLFLAG_RD, &stats->gotc,
3746 			"Good Octets Transmitted");
3747 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
3748 			CTLFLAG_RD, &stats->tpt,
3749 			"Total Packets Transmitted");
3750 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
3751 			CTLFLAG_RD, &stats->gptc,
3752 			"Good Packets Transmitted");
3753 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
3754 			CTLFLAG_RD, &stats->bptc,
3755 			"Broadcast Packets Transmitted");
3756 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
3757 			CTLFLAG_RD, &stats->mptc,
3758 			"Multicast Packets Transmitted");
3759 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
3760 			CTLFLAG_RD, &stats->mngptc,
3761 			"Management Packets Transmitted");
3762 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
3763 			CTLFLAG_RD, &stats->ptc64,
3764 			"64 byte frames transmitted ");
3765 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
3766 			CTLFLAG_RD, &stats->ptc127,
3767 			"65-127 byte frames transmitted");
3768 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
3769 			CTLFLAG_RD, &stats->ptc255,
3770 			"128-255 byte frames transmitted");
3771 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
3772 			CTLFLAG_RD, &stats->ptc511,
3773 			"256-511 byte frames transmitted");
3774 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
3775 			CTLFLAG_RD, &stats->ptc1023,
3776 			"512-1023 byte frames transmitted");
3777 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
3778 			CTLFLAG_RD, &stats->ptc1522,
3779 			"1024-1522 byte frames transmitted");
3780 }
3781 #endif
3782 
3783 /*
3784  * Enable the hardware to drop packets when the buffer is full.
3785  * This is useful when multiple RX rings are used, so that no
3786  * single RX ring being full stalls the entire RX engine.  We
3787  * only enable this when multiple RX rings are used and when
3788  * flow control is disabled.
3789  */
3790 static void
3791 ix_enable_rx_drop(struct ix_softc *sc)
3792 {
3793 	struct ixgbe_hw *hw = &sc->hw;
3794 	int i;
3795 
3796 	if (bootverbose) {
3797 		if_printf(&sc->arpcom.ac_if,
3798 		    "flow control %s, enable RX drop\n",
3799 		    ix_fc2str(sc->hw.fc.current_mode));
3800 	}
3801 
3802 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3803 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3804 
3805 		srrctl |= IXGBE_SRRCTL_DROP_EN;
3806 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3807 	}
3808 }
3809 
3810 static void
3811 ix_disable_rx_drop(struct ix_softc *sc)
3812 {
3813 	struct ixgbe_hw *hw = &sc->hw;
3814 	int i;
3815 
3816 	if (bootverbose) {
3817 		if_printf(&sc->arpcom.ac_if,
3818 		    "flow control %s, disable RX drop\n",
3819 		    ix_fc2str(sc->hw.fc.current_mode));
3820 	}
3821 
3822 	for (i = 0; i < sc->rx_ring_inuse; ++i) {
3823 		uint32_t srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
3824 
3825 		srrctl &= ~IXGBE_SRRCTL_DROP_EN;
3826 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
3827 	}
3828 }
3829 
3830 static void
3831 ix_setup_serialize(struct ix_softc *sc)
3832 {
3833 	int i = 0, j;
3834 
3835 	/* Main + RX + TX */
3836 	sc->nserialize = 1 + sc->rx_ring_cnt + sc->tx_ring_cnt;
3837 	sc->serializes =
3838 	    kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
3839 	        M_DEVBUF, M_WAITOK | M_ZERO);
3840 
3841 	/*
3842 	 * Setup serializes
3843 	 *
3844 	 * NOTE: Order is critical
3845 	 */
3846 
3847 	KKASSERT(i < sc->nserialize);
3848 	sc->serializes[i++] = &sc->main_serialize;
3849 
3850 	for (j = 0; j < sc->rx_ring_cnt; ++j) {
3851 		KKASSERT(i < sc->nserialize);
3852 		sc->serializes[i++] = &sc->rx_rings[j].rx_serialize;
3853 	}
3854 
3855 	for (j = 0; j < sc->tx_ring_cnt; ++j) {
3856 		KKASSERT(i < sc->nserialize);
3857 		sc->serializes[i++] = &sc->tx_rings[j].tx_serialize;
3858 	}
3859 
3860 	KKASSERT(i == sc->nserialize);
3861 }
3862 
3863 static int
3864 ix_alloc_intr(struct ix_softc *sc)
3865 {
3866 	struct ix_intr_data *intr;
3867 	struct ix_tx_ring *txr;
3868 	u_int intr_flags;
3869 	int i;
3870 
3871 	ix_alloc_msix(sc);
3872 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
3873 		ix_set_ring_inuse(sc, FALSE);
3874 		goto done;
3875 	}
3876 
3877 	/*
3878 	 * Reset some settings changed by ix_alloc_msix().
3879 	 */
3880 	if (sc->rx_rmap_intr != NULL) {
3881 		if_ringmap_free(sc->rx_rmap_intr);
3882 		sc->rx_rmap_intr = NULL;
3883 	}
3884 	if (sc->tx_rmap_intr != NULL) {
3885 		if_ringmap_free(sc->tx_rmap_intr);
3886 		sc->tx_rmap_intr = NULL;
3887 	}
3888 	if (sc->intr_data != NULL) {
3889 		kfree(sc->intr_data, M_DEVBUF);
3890 		sc->intr_data = NULL;
3891 	}
3892 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
3893 		txr = &sc->tx_rings[i];
3894 		txr->tx_intr_vec = -1;
3895 		txr->tx_intr_cpuid = -1;
3896 	}
3897 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
3898 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
3899 
3900 		rxr->rx_intr_vec = -1;
3901 		rxr->rx_txr = NULL;
3902 	}
3903 
3904 	sc->intr_cnt = 1;
3905 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data), M_DEVBUF,
3906 	    M_WAITOK | M_ZERO);
3907 	intr = &sc->intr_data[0];
3908 
3909 	/*
3910 	 * Allocate MSI/legacy interrupt resource
3911 	 */
3912 	if (sc->caps & IX_CAP_LEGACY_INTR) {
3913 		sc->intr_type = pci_alloc_1intr(sc->dev, ix_msi_enable,
3914 		    &intr->intr_rid, &intr_flags);
3915 	} else {
3916 		int cpu;
3917 
3918 		/*
3919 		 * Only MSI is supported.
3920 		 */
3921 		cpu = device_getenv_int(sc->dev, "msi.cpu", -1);
3922 		if (cpu >= ncpus)
3923 			cpu = ncpus - 1;
3924 
3925 		if (pci_alloc_msi(sc->dev, &intr->intr_rid, 1, cpu) == 0) {
3926 			sc->intr_type = PCI_INTR_TYPE_MSI;
3927 			intr_flags = RF_ACTIVE;
3928 		} else {
3929 			sc->intr_type = PCI_INTR_TYPE_LEGACY;
3930 			device_printf(sc->dev, "Unable to allocate MSI\n");
3931 			return ENXIO;
3932 		}
3933 	}
3934 
3935 	intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
3936 	    &intr->intr_rid, intr_flags);
3937 	if (intr->intr_res == NULL) {
3938 		device_printf(sc->dev, "Unable to allocate bus resource: "
3939 		    "interrupt\n");
3940 		return ENXIO;
3941 	}
3942 
3943 	intr->intr_serialize = &sc->main_serialize;
3944 	intr->intr_cpuid = rman_get_cpuid(intr->intr_res);
3945 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
3946 		intr->intr_func = ix_intr_82598;
3947 	else
3948 		intr->intr_func = ix_intr;
3949 	intr->intr_funcarg = sc;
3950 	intr->intr_rate = IX_INTR_RATE;
3951 	intr->intr_use = IX_INTR_USE_RXTX;
3952 
3953 	sc->tx_rings[0].tx_intr_vec = IX_TX_INTR_VEC;
3954 	sc->tx_rings[0].tx_intr_cpuid = intr->intr_cpuid;
3955 
3956 	sc->rx_rings[0].rx_intr_vec = IX_RX0_INTR_VEC;
3957 
3958 	ix_set_ring_inuse(sc, FALSE);
3959 
3960 	KKASSERT(sc->rx_ring_inuse <= IX_MIN_RXRING_RSS);
3961 	if (sc->rx_ring_inuse == IX_MIN_RXRING_RSS) {
3962 		sc->rx_rings[1].rx_intr_vec = IX_RX1_INTR_VEC;
3963 
3964 		/*
3965 		 * Allocate RX ring map for RSS setup.
3966 		 */
3967 		sc->rx_rmap_intr = if_ringmap_alloc(sc->dev,
3968 		    IX_MIN_RXRING_RSS, IX_MIN_RXRING_RSS);
3969 		KASSERT(if_ringmap_count(sc->rx_rmap_intr) ==
3970 		    sc->rx_ring_inuse, ("RX ring inuse mismatch"));
3971 	}
3972 done:
3973 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
3974 		txr = &sc->tx_rings[i];
3975 		if (txr->tx_intr_cpuid < 0)
3976 			txr->tx_intr_cpuid = 0;
3977 	}
3978 	return 0;
3979 }
3980 
3981 static void
3982 ix_free_intr(struct ix_softc *sc)
3983 {
3984 	if (sc->intr_data == NULL)
3985 		return;
3986 
3987 	if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
3988 		struct ix_intr_data *intr = &sc->intr_data[0];
3989 
3990 		KKASSERT(sc->intr_cnt == 1);
3991 		if (intr->intr_res != NULL) {
3992 			bus_release_resource(sc->dev, SYS_RES_IRQ,
3993 			    intr->intr_rid, intr->intr_res);
3994 		}
3995 		if (sc->intr_type == PCI_INTR_TYPE_MSI)
3996 			pci_release_msi(sc->dev);
3997 
3998 		kfree(sc->intr_data, M_DEVBUF);
3999 	} else {
4000 		ix_free_msix(sc, TRUE);
4001 	}
4002 }
4003 
4004 static void
4005 ix_set_ring_inuse(struct ix_softc *sc, boolean_t polling)
4006 {
4007 	sc->rx_ring_inuse = ix_get_rxring_inuse(sc, polling);
4008 	sc->tx_ring_inuse = ix_get_txring_inuse(sc, polling);
4009 	if (bootverbose) {
4010 		if_printf(&sc->arpcom.ac_if,
4011 		    "RX rings %d/%d, TX rings %d/%d\n",
4012 		    sc->rx_ring_inuse, sc->rx_ring_cnt,
4013 		    sc->tx_ring_inuse, sc->tx_ring_cnt);
4014 	}
4015 }
4016 
4017 static int
4018 ix_get_rxring_inuse(const struct ix_softc *sc, boolean_t polling)
4019 {
4020 	if (!IX_ENABLE_HWRSS(sc))
4021 		return 1;
4022 
4023 	if (polling)
4024 		return sc->rx_ring_cnt;
4025 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
4026 		return IX_MIN_RXRING_RSS;
4027 	else
4028 		return sc->rx_ring_msix;
4029 }
4030 
4031 static int
4032 ix_get_txring_inuse(const struct ix_softc *sc, boolean_t polling)
4033 {
4034 	if (!IX_ENABLE_HWTSS(sc))
4035 		return 1;
4036 
4037 	if (polling)
4038 		return sc->tx_ring_cnt;
4039 	else if (sc->intr_type != PCI_INTR_TYPE_MSIX)
4040 		return 1;
4041 	else
4042 		return sc->tx_ring_msix;
4043 }
4044 
4045 static int
4046 ix_setup_intr(struct ix_softc *sc)
4047 {
4048 	int i;
4049 
4050 	for (i = 0; i < sc->intr_cnt; ++i) {
4051 		struct ix_intr_data *intr = &sc->intr_data[i];
4052 		int error;
4053 
4054 		error = bus_setup_intr_descr(sc->dev, intr->intr_res,
4055 		    INTR_MPSAFE, intr->intr_func, intr->intr_funcarg,
4056 		    &intr->intr_hand, intr->intr_serialize, intr->intr_desc);
4057 		if (error) {
4058 			device_printf(sc->dev, "can't setup %dth intr\n", i);
4059 			ix_teardown_intr(sc, i);
4060 			return error;
4061 		}
4062 	}
4063 	return 0;
4064 }
4065 
4066 static void
4067 ix_teardown_intr(struct ix_softc *sc, int intr_cnt)
4068 {
4069 	int i;
4070 
4071 	if (sc->intr_data == NULL)
4072 		return;
4073 
4074 	for (i = 0; i < intr_cnt; ++i) {
4075 		struct ix_intr_data *intr = &sc->intr_data[i];
4076 
4077 		bus_teardown_intr(sc->dev, intr->intr_res, intr->intr_hand);
4078 	}
4079 }
4080 
4081 static void
4082 ix_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
4083 {
4084 	struct ix_softc *sc = ifp->if_softc;
4085 
4086 	ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
4087 }
4088 
4089 static void
4090 ix_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4091 {
4092 	struct ix_softc *sc = ifp->if_softc;
4093 
4094 	ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
4095 }
4096 
4097 static int
4098 ix_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4099 {
4100 	struct ix_softc *sc = ifp->if_softc;
4101 
4102 	return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
4103 }
4104 
4105 #ifdef INVARIANTS
4106 
4107 static void
4108 ix_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
4109     boolean_t serialized)
4110 {
4111 	struct ix_softc *sc = ifp->if_softc;
4112 
4113 	ifnet_serialize_array_assert(sc->serializes, sc->nserialize, slz,
4114 	    serialized);
4115 }
4116 
4117 #endif	/* INVARIANTS */
4118 
4119 static void
4120 ix_free_rings(struct ix_softc *sc)
4121 {
4122 	int i;
4123 
4124 	if (sc->tx_rings != NULL) {
4125 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
4126 			struct ix_tx_ring *txr = &sc->tx_rings[i];
4127 
4128 			ix_destroy_tx_ring(txr, txr->tx_ndesc);
4129 		}
4130 		kfree(sc->tx_rings, M_DEVBUF);
4131 	}
4132 
4133 	if (sc->rx_rings != NULL) {
4134 		for (i =0; i < sc->rx_ring_cnt; ++i) {
4135 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
4136 
4137 			ix_destroy_rx_ring(rxr, rxr->rx_ndesc);
4138 		}
4139 		kfree(sc->rx_rings, M_DEVBUF);
4140 	}
4141 
4142 	if (sc->parent_tag != NULL)
4143 		bus_dma_tag_destroy(sc->parent_tag);
4144 }
4145 
4146 static void
4147 ix_watchdog_reset(struct ix_softc *sc)
4148 {
4149 	int i;
4150 
4151 	ASSERT_IFNET_SERIALIZED_ALL(&sc->arpcom.ac_if);
4152 	ix_init(sc);
4153 	for (i = 0; i < sc->tx_ring_inuse; ++i)
4154 		ifsq_devstart_sched(sc->tx_rings[i].tx_ifsq);
4155 }
4156 
4157 static void
4158 ix_sync_netisr(struct ix_softc *sc, int flags)
4159 {
4160 	struct ifnet *ifp = &sc->arpcom.ac_if;
4161 
4162 	ifnet_serialize_all(ifp);
4163 	if (ifp->if_flags & IFF_RUNNING) {
4164 		ifp->if_flags &= ~(IFF_RUNNING | flags);
4165 	} else {
4166 		ifnet_deserialize_all(ifp);
4167 		return;
4168 	}
4169 	ifnet_deserialize_all(ifp);
4170 
4171 	/* Make sure that polling stopped. */
4172 	netmsg_service_sync();
4173 }
4174 
4175 static void
4176 ix_watchdog_task(void *xsc, int pending __unused)
4177 {
4178 	struct ix_softc *sc = xsc;
4179 	struct ifnet *ifp = &sc->arpcom.ac_if;
4180 
4181 	ix_sync_netisr(sc, 0);
4182 
4183 	ifnet_serialize_all(ifp);
4184 	if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == IFF_UP)
4185 		ix_watchdog_reset(sc);
4186 	ifnet_deserialize_all(ifp);
4187 }
4188 
4189 static void
4190 ix_watchdog(struct ifaltq_subque *ifsq)
4191 {
4192 	struct ix_tx_ring *txr = ifsq_get_priv(ifsq);
4193 	struct ifnet *ifp = ifsq_get_ifp(ifsq);
4194 	struct ix_softc *sc = ifp->if_softc;
4195 
4196 	KKASSERT(txr->tx_ifsq == ifsq);
4197 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
4198 
4199 	/*
4200 	 * If the interface has been paused then don't do the watchdog check
4201 	 */
4202 	if (IXGBE_READ_REG(&sc->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) {
4203 		txr->tx_watchdog.wd_timer = 5;
4204 		return;
4205 	}
4206 
4207 	if_printf(ifp, "Watchdog timeout -- resetting\n");
4208 	if_printf(ifp, "Queue(%d) tdh = %d, hw tdt = %d\n", txr->tx_idx,
4209 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDH(txr->tx_idx)),
4210 	    IXGBE_READ_REG(&sc->hw, IXGBE_TDT(txr->tx_idx)));
4211 	if_printf(ifp, "TX(%d) desc avail = %d, next TX to Clean = %d\n",
4212 	    txr->tx_idx, txr->tx_avail, txr->tx_next_clean);
4213 
4214 	if ((ifp->if_flags & (IFF_IDIRECT | IFF_NPOLLING | IFF_RUNNING)) ==
4215 	    (IFF_IDIRECT | IFF_NPOLLING | IFF_RUNNING))
4216 		taskqueue_enqueue(taskqueue_thread[0], &sc->wdog_task);
4217 	else
4218 		ix_watchdog_reset(sc);
4219 }
4220 
4221 static void
4222 ix_free_tx_ring(struct ix_tx_ring *txr)
4223 {
4224 	int i;
4225 
4226 	for (i = 0; i < txr->tx_ndesc; ++i) {
4227 		struct ix_tx_buf *txbuf = &txr->tx_buf[i];
4228 
4229 		if (txbuf->m_head != NULL)
4230 			ix_free_txbuf(txr, txbuf);
4231 	}
4232 }
4233 
4234 static void
4235 ix_free_rx_ring(struct ix_rx_ring *rxr)
4236 {
4237 	int i;
4238 
4239 	for (i = 0; i < rxr->rx_ndesc; ++i) {
4240 		struct ix_rx_buf *rxbuf = &rxr->rx_buf[i];
4241 
4242 		if (rxbuf->fmp != NULL) {
4243 			m_freem(rxbuf->fmp);
4244 			rxbuf->fmp = NULL;
4245 			rxbuf->lmp = NULL;
4246 		} else {
4247 			KKASSERT(rxbuf->lmp == NULL);
4248 		}
4249 		if (rxbuf->m_head != NULL) {
4250 			bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
4251 			m_freem(rxbuf->m_head);
4252 			rxbuf->m_head = NULL;
4253 		}
4254 	}
4255 }
4256 
4257 static int
4258 ix_newbuf(struct ix_rx_ring *rxr, int i, boolean_t wait)
4259 {
4260 	struct mbuf *m;
4261 	bus_dma_segment_t seg;
4262 	bus_dmamap_t map;
4263 	struct ix_rx_buf *rxbuf;
4264 	int flags, error, nseg;
4265 
4266 	flags = M_NOWAIT;
4267 	if (__predict_false(wait))
4268 		flags = M_WAITOK;
4269 
4270 	m = m_getjcl(flags, MT_DATA, M_PKTHDR, rxr->rx_mbuf_sz);
4271 	if (m == NULL) {
4272 		if (wait) {
4273 			if_printf(&rxr->rx_sc->arpcom.ac_if,
4274 			    "Unable to allocate RX mbuf\n");
4275 		}
4276 		return ENOBUFS;
4277 	}
4278 	m->m_len = m->m_pkthdr.len = rxr->rx_mbuf_sz;
4279 
4280 	error = bus_dmamap_load_mbuf_segment(rxr->rx_tag,
4281 	    rxr->rx_sparemap, m, &seg, 1, &nseg, BUS_DMA_NOWAIT);
4282 	if (error) {
4283 		m_freem(m);
4284 		if (wait) {
4285 			if_printf(&rxr->rx_sc->arpcom.ac_if,
4286 			    "Unable to load RX mbuf\n");
4287 		}
4288 		return error;
4289 	}
4290 
4291 	rxbuf = &rxr->rx_buf[i];
4292 	if (rxbuf->m_head != NULL)
4293 		bus_dmamap_unload(rxr->rx_tag, rxbuf->map);
4294 
4295 	map = rxbuf->map;
4296 	rxbuf->map = rxr->rx_sparemap;
4297 	rxr->rx_sparemap = map;
4298 
4299 	rxbuf->m_head = m;
4300 	rxbuf->paddr = seg.ds_addr;
4301 
4302 	ix_setup_rxdesc(&rxr->rx_base[i], rxbuf);
4303 	return 0;
4304 }
4305 
4306 static void
4307 ix_add_sysctl(struct ix_softc *sc)
4308 {
4309 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev);
4310 	struct sysctl_oid *tree = device_get_sysctl_tree(sc->dev);
4311 	char node[32];
4312 	int i;
4313 
4314 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4315 	    OID_AUTO, "rxr", CTLFLAG_RD, &sc->rx_ring_cnt, 0, "# of RX rings");
4316 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4317 	    OID_AUTO, "rxr_inuse", CTLFLAG_RD, &sc->rx_ring_inuse, 0,
4318 	    "# of RX rings used");
4319 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4320 	    OID_AUTO, "txr", CTLFLAG_RD, &sc->tx_ring_cnt, 0, "# of TX rings");
4321 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4322 	    OID_AUTO, "txr_inuse", CTLFLAG_RD, &sc->tx_ring_inuse, 0,
4323 	    "# of TX rings used");
4324 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4325 	    OID_AUTO, "rxd", CTLTYPE_INT | CTLFLAG_RD,
4326 	    sc, 0, ix_sysctl_rxd, "I",
4327 	    "# of RX descs");
4328 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4329 	    OID_AUTO, "txd", CTLTYPE_INT | CTLFLAG_RD,
4330 	    sc, 0, ix_sysctl_txd, "I",
4331 	    "# of TX descs");
4332 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4333 	    OID_AUTO, "tx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4334 	    sc, 0, ix_sysctl_tx_wreg_nsegs, "I",
4335 	    "# of segments sent before write to hardware register");
4336 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4337 	    OID_AUTO, "rx_wreg_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4338 	    sc, 0, ix_sysctl_rx_wreg_nsegs, "I",
4339 	    "# of received segments sent before write to hardware register");
4340 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4341 	    OID_AUTO, "tx_intr_nsegs", CTLTYPE_INT | CTLFLAG_RW,
4342 	    sc, 0, ix_sysctl_tx_intr_nsegs, "I",
4343 	    "# of segments per TX interrupt");
4344 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4345 	    OID_AUTO, "direct_input", CTLFLAG_RW, &sc->direct_input, 0,
4346 	    "Enable direct input");
4347 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
4348 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4349 		    OID_AUTO, "tx_msix_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4350 		    sc->tx_rmap_intr, 0, if_ringmap_cpumap_sysctl, "I",
4351 		    "TX MSI-X CPU map");
4352 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4353 		    OID_AUTO, "rx_msix_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4354 		    sc->rx_rmap_intr, 0, if_ringmap_cpumap_sysctl, "I",
4355 		    "RX MSI-X CPU map");
4356 	}
4357 #ifdef IFPOLL_ENABLE
4358 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4359 	    OID_AUTO, "tx_poll_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4360 	    sc->tx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
4361 	    "TX polling CPU map");
4362 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree),
4363 	    OID_AUTO, "rx_poll_cpumap", CTLTYPE_OPAQUE | CTLFLAG_RD,
4364 	    sc->rx_rmap, 0, if_ringmap_cpumap_sysctl, "I",
4365 	    "RX polling CPU map");
4366 #endif
4367 
4368 #define IX_ADD_INTR_RATE_SYSCTL(sc, use, name) \
4369 do { \
4370 	ix_add_intr_rate_sysctl(sc, IX_INTR_USE_##use, #name, \
4371 	    ix_sysctl_##name, #use " interrupt rate"); \
4372 } while (0)
4373 
4374 	IX_ADD_INTR_RATE_SYSCTL(sc, RXTX, rxtx_intr_rate);
4375 	IX_ADD_INTR_RATE_SYSCTL(sc, RX, rx_intr_rate);
4376 	IX_ADD_INTR_RATE_SYSCTL(sc, TX, tx_intr_rate);
4377 	IX_ADD_INTR_RATE_SYSCTL(sc, STATUS, sts_intr_rate);
4378 
4379 #undef IX_ADD_INTR_RATE_SYSCTL
4380 
4381 #ifdef IX_RSS_DEBUG
4382 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree),
4383 	    OID_AUTO, "rss_debug", CTLFLAG_RW, &sc->rss_debug, 0,
4384 	    "RSS debug level");
4385 	for (i = 0; i < sc->rx_ring_cnt; ++i) {
4386 		ksnprintf(node, sizeof(node), "rx%d_pkt", i);
4387 		SYSCTL_ADD_ULONG(ctx,
4388 		    SYSCTL_CHILDREN(tree), OID_AUTO, node,
4389 		    CTLFLAG_RW, &sc->rx_rings[i].rx_pkts, "RXed packets");
4390 	}
4391 #endif
4392 	for (i = 0; i < sc->tx_ring_cnt; ++i) {
4393 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4394 
4395 		ksnprintf(node, sizeof(node), "tx%d_nmbuf", i);
4396 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, node,
4397 		    CTLTYPE_INT | CTLFLAG_RD, txr, 0, ix_sysctl_tx_nmbuf, "I",
4398 		    "# of pending TX mbufs");
4399 
4400 		ksnprintf(node, sizeof(node), "tx%d_gc", i);
4401 		SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, node,
4402 		    CTLFLAG_RW, &txr->tx_gc, "# of TX desc GC");
4403 	}
4404 
4405 #if 0
4406 	ix_add_hw_stats(sc);
4407 #endif
4408 
4409 }
4410 
4411 static int
4412 ix_sysctl_tx_nmbuf(SYSCTL_HANDLER_ARGS)
4413 {
4414 	struct ix_tx_ring *txr = (void *)arg1;
4415 	int nmbuf;
4416 
4417 	nmbuf = txr->tx_nmbuf;
4418 	return (sysctl_handle_int(oidp, &nmbuf, 0, req));
4419 }
4420 
4421 static int
4422 ix_sysctl_tx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4423 {
4424 	struct ix_softc *sc = (void *)arg1;
4425 	struct ifnet *ifp = &sc->arpcom.ac_if;
4426 	int error, nsegs, i;
4427 
4428 	nsegs = sc->tx_rings[0].tx_wreg_nsegs;
4429 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4430 	if (error || req->newptr == NULL)
4431 		return error;
4432 	if (nsegs < 0)
4433 		return EINVAL;
4434 
4435 	ifnet_serialize_all(ifp);
4436 	for (i = 0; i < sc->tx_ring_cnt; ++i)
4437 		sc->tx_rings[i].tx_wreg_nsegs = nsegs;
4438 	ifnet_deserialize_all(ifp);
4439 
4440 	return 0;
4441 }
4442 
4443 static int
4444 ix_sysctl_rx_wreg_nsegs(SYSCTL_HANDLER_ARGS)
4445 {
4446 	struct ix_softc *sc = (void *)arg1;
4447 	struct ifnet *ifp = &sc->arpcom.ac_if;
4448 	int error, nsegs, i;
4449 
4450 	nsegs = sc->rx_rings[0].rx_wreg_nsegs;
4451 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4452 	if (error || req->newptr == NULL)
4453 		return error;
4454 	if (nsegs < 0)
4455 		return EINVAL;
4456 
4457 	ifnet_serialize_all(ifp);
4458 	for (i = 0; i < sc->rx_ring_cnt; ++i)
4459 		sc->rx_rings[i].rx_wreg_nsegs =nsegs;
4460 	ifnet_deserialize_all(ifp);
4461 
4462 	return 0;
4463 }
4464 
4465 static int
4466 ix_sysctl_txd(SYSCTL_HANDLER_ARGS)
4467 {
4468 	struct ix_softc *sc = (void *)arg1;
4469 	int txd;
4470 
4471 	txd = sc->tx_rings[0].tx_ndesc;
4472 	return sysctl_handle_int(oidp, &txd, 0, req);
4473 }
4474 
4475 static int
4476 ix_sysctl_rxd(SYSCTL_HANDLER_ARGS)
4477 {
4478 	struct ix_softc *sc = (void *)arg1;
4479 	int rxd;
4480 
4481 	rxd = sc->rx_rings[0].rx_ndesc;
4482 	return sysctl_handle_int(oidp, &rxd, 0, req);
4483 }
4484 
4485 static int
4486 ix_sysctl_tx_intr_nsegs(SYSCTL_HANDLER_ARGS)
4487 {
4488 	struct ix_softc *sc = (void *)arg1;
4489 	struct ifnet *ifp = &sc->arpcom.ac_if;
4490 	struct ix_tx_ring *txr = &sc->tx_rings[0];
4491 	int error, nsegs;
4492 
4493 	nsegs = txr->tx_intr_nsegs;
4494 	error = sysctl_handle_int(oidp, &nsegs, 0, req);
4495 	if (error || req->newptr == NULL)
4496 		return error;
4497 	if (nsegs < 0)
4498 		return EINVAL;
4499 
4500 	ifnet_serialize_all(ifp);
4501 
4502 	if (nsegs >= txr->tx_ndesc - IX_MAX_SCATTER - IX_TX_RESERVED) {
4503 		error = EINVAL;
4504 	} else {
4505 		int i;
4506 
4507 		error = 0;
4508 		for (i = 0; i < sc->tx_ring_cnt; ++i)
4509 			sc->tx_rings[i].tx_intr_nsegs = nsegs;
4510 	}
4511 
4512 	ifnet_deserialize_all(ifp);
4513 
4514 	return error;
4515 }
4516 
4517 static void
4518 ix_set_eitr(struct ix_softc *sc, int idx, int rate)
4519 {
4520 	uint32_t eitr, eitr_intvl;
4521 
4522 	eitr = IXGBE_READ_REG(&sc->hw, IXGBE_EITR(idx));
4523 	eitr_intvl = 1000000000 / 256 / rate;
4524 
4525 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4526 		eitr &= ~IX_EITR_INTVL_MASK_82598;
4527 		if (eitr_intvl == 0)
4528 			eitr_intvl = 1;
4529 		else if (eitr_intvl > IX_EITR_INTVL_MASK_82598)
4530 			eitr_intvl = IX_EITR_INTVL_MASK_82598;
4531 	} else {
4532 		eitr &= ~IX_EITR_INTVL_MASK;
4533 
4534 		eitr_intvl &= ~IX_EITR_INTVL_RSVD_MASK;
4535 		if (eitr_intvl == 0)
4536 			eitr_intvl = IX_EITR_INTVL_MIN;
4537 		else if (eitr_intvl > IX_EITR_INTVL_MAX)
4538 			eitr_intvl = IX_EITR_INTVL_MAX;
4539 	}
4540 	eitr |= eitr_intvl;
4541 
4542 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EITR(idx), eitr);
4543 }
4544 
4545 static int
4546 ix_sysctl_rxtx_intr_rate(SYSCTL_HANDLER_ARGS)
4547 {
4548 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RXTX);
4549 }
4550 
4551 static int
4552 ix_sysctl_rx_intr_rate(SYSCTL_HANDLER_ARGS)
4553 {
4554 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_RX);
4555 }
4556 
4557 static int
4558 ix_sysctl_tx_intr_rate(SYSCTL_HANDLER_ARGS)
4559 {
4560 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_TX);
4561 }
4562 
4563 static int
4564 ix_sysctl_sts_intr_rate(SYSCTL_HANDLER_ARGS)
4565 {
4566 	return ix_sysctl_intr_rate(oidp, arg1, arg2, req, IX_INTR_USE_STATUS);
4567 }
4568 
4569 static int
4570 ix_sysctl_intr_rate(SYSCTL_HANDLER_ARGS, int use)
4571 {
4572 	struct ix_softc *sc = (void *)arg1;
4573 	struct ifnet *ifp = &sc->arpcom.ac_if;
4574 	int error, rate, i;
4575 
4576 	rate = 0;
4577 	for (i = 0; i < sc->intr_cnt; ++i) {
4578 		if (sc->intr_data[i].intr_use == use) {
4579 			rate = sc->intr_data[i].intr_rate;
4580 			break;
4581 		}
4582 	}
4583 
4584 	error = sysctl_handle_int(oidp, &rate, 0, req);
4585 	if (error || req->newptr == NULL)
4586 		return error;
4587 	if (rate <= 0)
4588 		return EINVAL;
4589 
4590 	ifnet_serialize_all(ifp);
4591 
4592 	for (i = 0; i < sc->intr_cnt; ++i) {
4593 		if (sc->intr_data[i].intr_use == use) {
4594 			sc->intr_data[i].intr_rate = rate;
4595 			if (ifp->if_flags & IFF_RUNNING)
4596 				ix_set_eitr(sc, i, rate);
4597 		}
4598 	}
4599 
4600 	ifnet_deserialize_all(ifp);
4601 
4602 	return error;
4603 }
4604 
4605 static void
4606 ix_add_intr_rate_sysctl(struct ix_softc *sc, int use,
4607     const char *name, int (*handler)(SYSCTL_HANDLER_ARGS), const char *desc)
4608 {
4609 	int i;
4610 
4611 	for (i = 0; i < sc->intr_cnt; ++i) {
4612 		if (sc->intr_data[i].intr_use == use) {
4613 			SYSCTL_ADD_PROC(device_get_sysctl_ctx(sc->dev),
4614 			    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)),
4615 			    OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW,
4616 			    sc, 0, handler, "I", desc);
4617 			break;
4618 		}
4619 	}
4620 }
4621 
4622 static void
4623 ix_set_timer_cpuid(struct ix_softc *sc, boolean_t polling)
4624 {
4625 	if (polling || sc->intr_type == PCI_INTR_TYPE_MSIX)
4626 		sc->timer_cpuid = 0; /* XXX fixed */
4627 	else
4628 		sc->timer_cpuid = rman_get_cpuid(sc->intr_data[0].intr_res);
4629 }
4630 
4631 static void
4632 ix_alloc_msix(struct ix_softc *sc)
4633 {
4634 	int msix_enable, msix_cnt, msix_ring, alloc_cnt;
4635 	struct ix_intr_data *intr;
4636 	int i, x, error;
4637 	int ring_cnt, ring_cntmax;
4638 	boolean_t setup = FALSE;
4639 
4640 	msix_enable = ix_msix_enable;
4641 	/*
4642 	 * Don't enable MSI-X on 82598 by default, see:
4643 	 * 82598 specification update errata #38
4644 	 */
4645 	if (sc->hw.mac.type == ixgbe_mac_82598EB)
4646 		msix_enable = 0;
4647 	msix_enable = device_getenv_int(sc->dev, "msix.enable", msix_enable);
4648 	if (!msix_enable)
4649 		return;
4650 
4651 	msix_cnt = pci_msix_count(sc->dev);
4652 #ifdef IX_MSIX_DEBUG
4653 	msix_cnt = device_getenv_int(sc->dev, "msix.count", msix_cnt);
4654 #endif
4655 	if (msix_cnt <= 1) {
4656 		/* One MSI-X model does not make sense. */
4657 		return;
4658 	}
4659 
4660 	/*
4661 	 * Make sure that we don't break interrupt related registers
4662 	 * (EIMS, etc) limitation.
4663 	 */
4664 	if (sc->hw.mac.type == ixgbe_mac_82598EB) {
4665 		if (msix_cnt > IX_MAX_MSIX_82598)
4666 			msix_cnt = IX_MAX_MSIX_82598;
4667 	} else {
4668 		if (msix_cnt > IX_MAX_MSIX)
4669 			msix_cnt = IX_MAX_MSIX;
4670 	}
4671 	if (bootverbose)
4672 		device_printf(sc->dev, "MSI-X count %d\n", msix_cnt);
4673 	msix_ring = msix_cnt - 1; /* -1 for status */
4674 
4675 	/*
4676 	 * Configure # of RX/TX rings usable by MSI-X.
4677 	 */
4678 	ix_get_rxring_cnt(sc, &ring_cnt, &ring_cntmax);
4679 	if (ring_cntmax > msix_ring)
4680 		ring_cntmax = msix_ring;
4681 	sc->rx_rmap_intr = if_ringmap_alloc(sc->dev, ring_cnt, ring_cntmax);
4682 
4683 	ix_get_txring_cnt(sc, &ring_cnt, &ring_cntmax);
4684 	if (ring_cntmax > msix_ring)
4685 		ring_cntmax = msix_ring;
4686 	sc->tx_rmap_intr = if_ringmap_alloc(sc->dev, ring_cnt, ring_cntmax);
4687 
4688 	if_ringmap_match(sc->dev, sc->rx_rmap_intr, sc->tx_rmap_intr);
4689 	sc->rx_ring_msix = if_ringmap_count(sc->rx_rmap_intr);
4690 	KASSERT(sc->rx_ring_msix <= sc->rx_ring_cnt,
4691 	    ("total RX ring count %d, MSI-X RX ring count %d",
4692 	     sc->rx_ring_cnt, sc->rx_ring_msix));
4693 	sc->tx_ring_msix = if_ringmap_count(sc->tx_rmap_intr);
4694 	KASSERT(sc->tx_ring_msix <= sc->tx_ring_cnt,
4695 	    ("total TX ring count %d, MSI-X TX ring count %d",
4696 	     sc->tx_ring_cnt, sc->tx_ring_msix));
4697 
4698 	/*
4699 	 * Aggregate TX/RX MSI-X
4700 	 */
4701 	ring_cntmax = sc->rx_ring_msix;
4702 	if (ring_cntmax < sc->tx_ring_msix)
4703 		ring_cntmax = sc->tx_ring_msix;
4704 	KASSERT(ring_cntmax <= msix_ring,
4705 	    ("invalid ring count max %d, MSI-X count for rings %d",
4706 	     ring_cntmax, msix_ring));
4707 
4708 	alloc_cnt = ring_cntmax + 1; /* +1 for status */
4709 	if (bootverbose) {
4710 		device_printf(sc->dev, "MSI-X alloc %d, "
4711 		    "RX ring %d, TX ring %d\n", alloc_cnt,
4712 		    sc->rx_ring_msix, sc->tx_ring_msix);
4713 	}
4714 
4715 	sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82598);
4716 	sc->msix_mem_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4717 	    &sc->msix_mem_rid, RF_ACTIVE);
4718 	if (sc->msix_mem_res == NULL) {
4719 		sc->msix_mem_rid = PCIR_BAR(IX_MSIX_BAR_82599);
4720 		sc->msix_mem_res = bus_alloc_resource_any(sc->dev,
4721 		    SYS_RES_MEMORY, &sc->msix_mem_rid, RF_ACTIVE);
4722 		if (sc->msix_mem_res == NULL) {
4723 			device_printf(sc->dev, "Unable to map MSI-X table\n");
4724 			return;
4725 		}
4726 	}
4727 
4728 	sc->intr_cnt = alloc_cnt;
4729 	sc->intr_data = kmalloc(sizeof(struct ix_intr_data) * sc->intr_cnt,
4730 	    M_DEVBUF, M_WAITOK | M_ZERO);
4731 	for (x = 0; x < sc->intr_cnt; ++x) {
4732 		intr = &sc->intr_data[x];
4733 		intr->intr_rid = -1;
4734 		intr->intr_rate = IX_INTR_RATE;
4735 	}
4736 
4737 	x = 0;
4738 	for (i = 0; i < sc->rx_ring_msix; ++i) {
4739 		struct ix_rx_ring *rxr = &sc->rx_rings[i];
4740 		struct ix_tx_ring *txr = NULL;
4741 		int cpuid, j;
4742 
4743 		KKASSERT(x < sc->intr_cnt);
4744 		rxr->rx_intr_vec = x;
4745 		ix_setup_msix_eims(sc, x,
4746 		    &rxr->rx_eims, &rxr->rx_eims_val);
4747 
4748 		cpuid = if_ringmap_cpumap(sc->rx_rmap_intr, i);
4749 
4750 		/*
4751 		 * Try finding TX ring to piggyback.
4752 		 */
4753 		for (j = 0; j < sc->tx_ring_msix; ++j) {
4754 			if (cpuid ==
4755 			    if_ringmap_cpumap(sc->tx_rmap_intr, j)) {
4756 				txr = &sc->tx_rings[j];
4757 				KKASSERT(txr->tx_intr_cpuid < 0);
4758 				break;
4759 			}
4760 		}
4761 		rxr->rx_txr = txr;
4762 
4763 		intr = &sc->intr_data[x++];
4764 		intr->intr_serialize = &rxr->rx_serialize;
4765 		if (txr != NULL) {
4766 			ksnprintf(intr->intr_desc0,
4767 			    sizeof(intr->intr_desc0), "%s rx%dtx%d",
4768 			    device_get_nameunit(sc->dev), i, txr->tx_idx);
4769 			intr->intr_use = IX_INTR_USE_RXTX;
4770 			intr->intr_func = ix_msix_rxtx;
4771 		} else {
4772 			ksnprintf(intr->intr_desc0,
4773 			    sizeof(intr->intr_desc0), "%s rx%d",
4774 			    device_get_nameunit(sc->dev), i);
4775 			intr->intr_rate = IX_MSIX_RX_RATE;
4776 			intr->intr_use = IX_INTR_USE_RX;
4777 			intr->intr_func = ix_msix_rx;
4778 		}
4779 		intr->intr_funcarg = rxr;
4780 		intr->intr_cpuid = cpuid;
4781 		KKASSERT(intr->intr_cpuid < netisr_ncpus);
4782 		intr->intr_desc = intr->intr_desc0;
4783 
4784 		if (txr != NULL) {
4785 			txr->tx_intr_cpuid = intr->intr_cpuid;
4786 			/* NOTE: Leave TX ring's intr_vec negative. */
4787 		}
4788 	}
4789 
4790 	for (i = 0; i < sc->tx_ring_msix; ++i) {
4791 		struct ix_tx_ring *txr = &sc->tx_rings[i];
4792 
4793 		if (txr->tx_intr_cpuid >= 0) {
4794 			/* Piggybacked by RX ring. */
4795 			continue;
4796 		}
4797 
4798 		KKASSERT(x < sc->intr_cnt);
4799 		txr->tx_intr_vec = x;
4800 		ix_setup_msix_eims(sc, x, &txr->tx_eims, &txr->tx_eims_val);
4801 
4802 		intr = &sc->intr_data[x++];
4803 		intr->intr_serialize = &txr->tx_serialize;
4804 		intr->intr_rate = IX_MSIX_TX_RATE;
4805 		intr->intr_use = IX_INTR_USE_TX;
4806 		intr->intr_func = ix_msix_tx;
4807 		intr->intr_funcarg = txr;
4808 		intr->intr_cpuid = if_ringmap_cpumap(sc->tx_rmap_intr, i);
4809 		KKASSERT(intr->intr_cpuid < netisr_ncpus);
4810 		ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s tx%d",
4811 		    device_get_nameunit(sc->dev), i);
4812 		intr->intr_desc = intr->intr_desc0;
4813 
4814 		txr->tx_intr_cpuid = intr->intr_cpuid;
4815 	}
4816 
4817 	/*
4818 	 * Status MSI-X
4819 	 */
4820 	KKASSERT(x < sc->intr_cnt);
4821 	sc->sts_msix_vec = x;
4822 
4823 	intr = &sc->intr_data[x++];
4824 
4825 	intr->intr_serialize = &sc->main_serialize;
4826 	intr->intr_func = ix_msix_status;
4827 	intr->intr_funcarg = sc;
4828 	intr->intr_cpuid = 0;
4829 	intr->intr_use = IX_INTR_USE_STATUS;
4830 
4831 	ksnprintf(intr->intr_desc0, sizeof(intr->intr_desc0), "%s sts",
4832 	    device_get_nameunit(sc->dev));
4833 	intr->intr_desc = intr->intr_desc0;
4834 
4835 	KKASSERT(x == sc->intr_cnt);
4836 
4837 	error = pci_setup_msix(sc->dev);
4838 	if (error) {
4839 		device_printf(sc->dev, "Setup MSI-X failed\n");
4840 		goto back;
4841 	}
4842 	setup = TRUE;
4843 
4844 	for (i = 0; i < sc->intr_cnt; ++i) {
4845 		intr = &sc->intr_data[i];
4846 
4847 		error = pci_alloc_msix_vector(sc->dev, i, &intr->intr_rid,
4848 		    intr->intr_cpuid);
4849 		if (error) {
4850 			device_printf(sc->dev,
4851 			    "Unable to allocate MSI-X %d on cpu%d\n", i,
4852 			    intr->intr_cpuid);
4853 			goto back;
4854 		}
4855 
4856 		intr->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4857 		    &intr->intr_rid, RF_ACTIVE);
4858 		if (intr->intr_res == NULL) {
4859 			device_printf(sc->dev,
4860 			    "Unable to allocate MSI-X %d resource\n", i);
4861 			error = ENOMEM;
4862 			goto back;
4863 		}
4864 	}
4865 
4866 	pci_enable_msix(sc->dev);
4867 	sc->intr_type = PCI_INTR_TYPE_MSIX;
4868 back:
4869 	if (error)
4870 		ix_free_msix(sc, setup);
4871 }
4872 
4873 static void
4874 ix_free_msix(struct ix_softc *sc, boolean_t setup)
4875 {
4876 	int i;
4877 
4878 	KKASSERT(sc->intr_cnt > 1);
4879 
4880 	for (i = 0; i < sc->intr_cnt; ++i) {
4881 		struct ix_intr_data *intr = &sc->intr_data[i];
4882 
4883 		if (intr->intr_res != NULL) {
4884 			bus_release_resource(sc->dev, SYS_RES_IRQ,
4885 			    intr->intr_rid, intr->intr_res);
4886 		}
4887 		if (intr->intr_rid >= 0)
4888 			pci_release_msix_vector(sc->dev, intr->intr_rid);
4889 	}
4890 	if (setup)
4891 		pci_teardown_msix(sc->dev);
4892 
4893 	sc->intr_cnt = 0;
4894 	kfree(sc->intr_data, M_DEVBUF);
4895 	sc->intr_data = NULL;
4896 }
4897 
4898 static void
4899 ix_msix_rx(void *xrxr)
4900 {
4901 	struct ix_rx_ring *rxr = xrxr;
4902 
4903 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4904 
4905 	ix_rxeof(rxr, -1);
4906 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4907 }
4908 
4909 static void
4910 ix_msix_tx(void *xtxr)
4911 {
4912 	struct ix_tx_ring *txr = xtxr;
4913 
4914 	ASSERT_SERIALIZED(&txr->tx_serialize);
4915 
4916 	ix_tx_intr(txr, *(txr->tx_hdr));
4917 	IXGBE_WRITE_REG(&txr->tx_sc->hw, txr->tx_eims, txr->tx_eims_val);
4918 }
4919 
4920 static void
4921 ix_msix_rxtx(void *xrxr)
4922 {
4923 	struct ix_rx_ring *rxr = xrxr;
4924 	struct ix_tx_ring *txr;
4925 	int hdr;
4926 
4927 	ASSERT_SERIALIZED(&rxr->rx_serialize);
4928 
4929 	ix_rxeof(rxr, -1);
4930 
4931 	/*
4932 	 * NOTE:
4933 	 * Since tx_next_clean is only changed by ix_txeof(),
4934 	 * which is called only in interrupt handler, the
4935 	 * check w/o holding tx serializer is MPSAFE.
4936 	 */
4937 	txr = rxr->rx_txr;
4938 	hdr = *(txr->tx_hdr);
4939 	if (hdr != txr->tx_next_clean) {
4940 		lwkt_serialize_enter(&txr->tx_serialize);
4941 		ix_tx_intr(txr, hdr);
4942 		lwkt_serialize_exit(&txr->tx_serialize);
4943 	}
4944 
4945 	IXGBE_WRITE_REG(&rxr->rx_sc->hw, rxr->rx_eims, rxr->rx_eims_val);
4946 }
4947 
4948 static void
4949 ix_intr_status(struct ix_softc *sc, uint32_t eicr)
4950 {
4951 	struct ixgbe_hw *hw = &sc->hw;
4952 
4953 	/* Link status change */
4954 	if (eicr & IXGBE_EICR_LSC)
4955 		ix_handle_link(sc);
4956 
4957 	if (hw->mac.type != ixgbe_mac_82598EB) {
4958 		if (eicr & IXGBE_EICR_ECC)
4959 			if_printf(&sc->arpcom.ac_if, "ECC ERROR!!  REBOOT!!\n");
4960 
4961 		/* Check for over temp condition */
4962 		if (sc->caps & IX_CAP_TEMP_SENSOR) {
4963 			int32_t retval;
4964 
4965 			switch (sc->hw.mac.type) {
4966 			case ixgbe_mac_X550EM_a:
4967 				if ((eicr & IXGBE_EICR_GPI_SDP0_X550EM_a) == 0)
4968 					break;
4969 				retval = hw->phy.ops.check_overtemp(hw);
4970 				if (retval != IXGBE_ERR_OVERTEMP)
4971 					break;
4972 
4973 				/* Disable more temp sensor interrupts. */
4974 				IXGBE_WRITE_REG(hw, IXGBE_EIMC,
4975 				    IXGBE_EICR_GPI_SDP0_X550EM_a);
4976 				if_printf(&sc->arpcom.ac_if, "CRITICAL: "
4977 				    "OVER TEMP!!  PHY IS SHUT DOWN!!  "
4978 				    "SHUTDOWN!!\n");
4979 				break;
4980 
4981 			default:
4982 				if ((eicr & IXGBE_EICR_TS) == 0)
4983 					break;
4984 				retval = hw->phy.ops.check_overtemp(hw);
4985 				if (retval != IXGBE_ERR_OVERTEMP)
4986 					break;
4987 
4988 				/* Disable more temp sensor interrupts. */
4989 				IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_TS);
4990 				if_printf(&sc->arpcom.ac_if, "CRITICAL: "
4991 				    "OVER TEMP!!  PHY IS SHUT DOWN!!  "
4992 				    "SHUTDOWN!!\n");
4993 				break;
4994 			}
4995 		}
4996 	}
4997 
4998 	if (ix_is_sfp(hw)) {
4999 		uint32_t eicr_mask;
5000 
5001 		/* Pluggable optics-related interrupt */
5002 		if (hw->mac.type >= ixgbe_mac_X540)
5003 			eicr_mask = IXGBE_EICR_GPI_SDP0_X540;
5004 		else
5005 			eicr_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw);
5006 
5007 		if (eicr & eicr_mask)
5008 			ix_handle_mod(sc);
5009 
5010 		if (hw->mac.type == ixgbe_mac_82599EB &&
5011 		    (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw)))
5012 			ix_handle_msf(sc);
5013 	}
5014 
5015 	/* Check for fan failure */
5016 	if (sc->caps & IX_CAP_DETECT_FANFAIL)
5017 		ix_detect_fanfail(sc, eicr, TRUE);
5018 
5019 	/* External PHY interrupt */
5020 	if (hw->phy.type == ixgbe_phy_x550em_ext_t &&
5021 	    (eicr & IXGBE_EICR_GPI_SDP0_X540))
5022 		ix_handle_phy(sc);
5023 }
5024 
5025 static void
5026 ix_msix_status(void *xsc)
5027 {
5028 	struct ix_softc *sc = xsc;
5029 	uint32_t eicr;
5030 
5031 	ASSERT_SERIALIZED(&sc->main_serialize);
5032 
5033 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
5034 	ix_intr_status(sc, eicr);
5035 
5036 	IXGBE_WRITE_REG(&sc->hw, IXGBE_EIMS, sc->intr_mask);
5037 }
5038 
5039 static void
5040 ix_setup_msix_eims(const struct ix_softc *sc, int x,
5041     uint32_t *eims, uint32_t *eims_val)
5042 {
5043 	if (x < 32) {
5044 		if (sc->hw.mac.type == ixgbe_mac_82598EB) {
5045 			KASSERT(x < IX_MAX_MSIX_82598,
5046 			    ("%s: invalid vector %d for 82598",
5047 			     device_get_nameunit(sc->dev), x));
5048 			*eims = IXGBE_EIMS;
5049 		} else {
5050 			*eims = IXGBE_EIMS_EX(0);
5051 		}
5052 		*eims_val = 1 << x;
5053 	} else {
5054 		KASSERT(x < IX_MAX_MSIX, ("%s: invalid vector %d",
5055 		    device_get_nameunit(sc->dev), x));
5056 		KASSERT(sc->hw.mac.type != ixgbe_mac_82598EB,
5057 		    ("%s: invalid vector %d for 82598",
5058 		     device_get_nameunit(sc->dev), x));
5059 		*eims = IXGBE_EIMS_EX(1);
5060 		*eims_val = 1 << (x - 32);
5061 	}
5062 }
5063 
5064 #ifdef IFPOLL_ENABLE
5065 
5066 static void
5067 ix_npoll_status(struct ifnet *ifp)
5068 {
5069 	struct ix_softc *sc = ifp->if_softc;
5070 	uint32_t eicr;
5071 
5072 	ASSERT_SERIALIZED(&sc->main_serialize);
5073 
5074 	eicr = IXGBE_READ_REG(&sc->hw, IXGBE_EICR);
5075 	ix_intr_status(sc, eicr);
5076 }
5077 
5078 static void
5079 ix_npoll_tx(struct ifnet *ifp, void *arg, int cycle __unused)
5080 {
5081 	struct ix_tx_ring *txr = arg;
5082 
5083 	ASSERT_SERIALIZED(&txr->tx_serialize);
5084 
5085 	ix_tx_intr(txr, *(txr->tx_hdr));
5086 	ix_try_txgc(txr, 1);
5087 }
5088 
5089 static void
5090 ix_npoll_rx(struct ifnet *ifp __unused, void *arg, int cycle)
5091 {
5092 	struct ix_rx_ring *rxr = arg;
5093 
5094 	ASSERT_SERIALIZED(&rxr->rx_serialize);
5095 	ix_rxeof(rxr, cycle);
5096 }
5097 
5098 static void
5099 ix_npoll_rx_direct(struct ifnet *ifp __unused, void *arg, int cycle)
5100 {
5101 	struct ix_rx_ring *rxr = arg;
5102 
5103 	ASSERT_NOT_SERIALIZED(&rxr->rx_serialize);
5104 	ix_rxeof(rxr, cycle);
5105 }
5106 
5107 static void
5108 ix_npoll(struct ifnet *ifp, struct ifpoll_info *info)
5109 {
5110 	struct ix_softc *sc = ifp->if_softc;
5111 	int i, txr_cnt, rxr_cnt, idirect;
5112 
5113 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
5114 
5115 	idirect = sc->direct_input;
5116 	cpu_ccfence();
5117 
5118 	if (info) {
5119 		int cpu;
5120 
5121 		info->ifpi_status.status_func = ix_npoll_status;
5122 		info->ifpi_status.serializer = &sc->main_serialize;
5123 
5124 		txr_cnt = ix_get_txring_inuse(sc, TRUE);
5125 		for (i = 0; i < txr_cnt; ++i) {
5126 			struct ix_tx_ring *txr = &sc->tx_rings[i];
5127 
5128 			cpu = if_ringmap_cpumap(sc->tx_rmap, i);
5129 			KKASSERT(cpu < netisr_ncpus);
5130 			info->ifpi_tx[cpu].poll_func = ix_npoll_tx;
5131 			info->ifpi_tx[cpu].arg = txr;
5132 			info->ifpi_tx[cpu].serializer = &txr->tx_serialize;
5133 			ifsq_set_cpuid(txr->tx_ifsq, cpu);
5134 		}
5135 
5136 		rxr_cnt = ix_get_rxring_inuse(sc, TRUE);
5137 		for (i = 0; i < rxr_cnt; ++i) {
5138 			struct ix_rx_ring *rxr = &sc->rx_rings[i];
5139 
5140 			cpu = if_ringmap_cpumap(sc->rx_rmap, i);
5141 			KKASSERT(cpu < netisr_ncpus);
5142 			info->ifpi_rx[cpu].arg = rxr;
5143 			if (idirect) {
5144 				info->ifpi_rx[cpu].poll_func =
5145 				    ix_npoll_rx_direct;
5146 				info->ifpi_rx[cpu].serializer = NULL;
5147 			} else {
5148 				info->ifpi_rx[cpu].poll_func = ix_npoll_rx;
5149 				info->ifpi_rx[cpu].serializer =
5150 				    &rxr->rx_serialize;
5151 			}
5152 		}
5153 		if (idirect)
5154 			ifp->if_flags |= IFF_IDIRECT;
5155 	} else {
5156 		ifp->if_flags &= ~IFF_IDIRECT;
5157 		for (i = 0; i < sc->tx_ring_cnt; ++i) {
5158 			struct ix_tx_ring *txr = &sc->tx_rings[i];
5159 
5160 			ifsq_set_cpuid(txr->tx_ifsq, txr->tx_intr_cpuid);
5161 		}
5162 	}
5163 	if (ifp->if_flags & IFF_RUNNING)
5164 		ix_init(sc);
5165 }
5166 
5167 #endif /* IFPOLL_ENABLE */
5168 
5169 static enum ixgbe_fc_mode
5170 ix_ifmedia2fc(int ifm)
5171 {
5172 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
5173 
5174 	switch (fc_opt) {
5175 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
5176 		return ixgbe_fc_full;
5177 
5178 	case IFM_ETH_RXPAUSE:
5179 		return ixgbe_fc_rx_pause;
5180 
5181 	case IFM_ETH_TXPAUSE:
5182 		return ixgbe_fc_tx_pause;
5183 
5184 	default:
5185 		return ixgbe_fc_none;
5186 	}
5187 }
5188 
5189 static const char *
5190 ix_ifmedia2str(int ifm)
5191 {
5192 	int fc_opt = ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
5193 
5194 	switch (fc_opt) {
5195 	case (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE):
5196 		return IFM_ETH_FC_FULL;
5197 
5198 	case IFM_ETH_RXPAUSE:
5199 		return IFM_ETH_FC_RXPAUSE;
5200 
5201 	case IFM_ETH_TXPAUSE:
5202 		return IFM_ETH_FC_TXPAUSE;
5203 
5204 	default:
5205 		return IFM_ETH_FC_NONE;
5206 	}
5207 }
5208 
5209 static const char *
5210 ix_fc2str(enum ixgbe_fc_mode fc)
5211 {
5212 	switch (fc) {
5213 	case ixgbe_fc_full:
5214 		return IFM_ETH_FC_FULL;
5215 
5216 	case ixgbe_fc_rx_pause:
5217 		return IFM_ETH_FC_RXPAUSE;
5218 
5219 	case ixgbe_fc_tx_pause:
5220 		return IFM_ETH_FC_TXPAUSE;
5221 
5222 	default:
5223 		return IFM_ETH_FC_NONE;
5224 	}
5225 }
5226 
5227 static int
5228 ix_powerdown(struct ix_softc *sc)
5229 {
5230 	struct ixgbe_hw *hw = &sc->hw;
5231 	int error = 0;
5232 
5233 	/* Limit power management flow to X550EM baseT */
5234 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
5235 	    hw->phy.ops.enter_lplu) {
5236 		/* Turn off support for APM wakeup. (Using ACPI instead) */
5237 		IXGBE_WRITE_REG(hw, IXGBE_GRC,
5238 		    IXGBE_READ_REG(hw, IXGBE_GRC) & ~(uint32_t)2);
5239 
5240 		/*
5241 		 * Clear Wake Up Status register to prevent any previous wakeup
5242 		 * events from waking us up immediately after we suspend.
5243 		 */
5244 		IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
5245 
5246 		/*
5247 		 * Program the Wakeup Filter Control register with user filter
5248 		 * settings
5249 		 */
5250 		IXGBE_WRITE_REG(hw, IXGBE_WUFC, sc->wufc);
5251 
5252 		/* Enable wakeups and power management in Wakeup Control */
5253 		IXGBE_WRITE_REG(hw, IXGBE_WUC,
5254 		    IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN);
5255 
5256 		/* X550EM baseT adapters need a special LPLU flow */
5257 		hw->phy.reset_disable = true;
5258 		ix_stop(sc);
5259 		error = hw->phy.ops.enter_lplu(hw);
5260 		if (error) {
5261 			if_printf(&sc->arpcom.ac_if,
5262 			    "Error entering LPLU: %d\n", error);
5263 		}
5264 		hw->phy.reset_disable = false;
5265 	} else {
5266 		/* Just stop for other adapters */
5267 		ix_stop(sc);
5268 	}
5269 	return error;
5270 }
5271 
5272 static void
5273 ix_config_flowctrl(struct ix_softc *sc)
5274 {
5275 	struct ixgbe_hw *hw = &sc->hw;
5276 	uint32_t rxpb, frame, size, tmp;
5277 
5278 	frame = sc->max_frame_size;
5279 
5280 	/* Calculate High Water */
5281 	switch (hw->mac.type) {
5282 	case ixgbe_mac_X540:
5283 	case ixgbe_mac_X550:
5284 	case ixgbe_mac_X550EM_a:
5285 	case ixgbe_mac_X550EM_x:
5286 		tmp = IXGBE_DV_X540(frame, frame);
5287 		break;
5288 	default:
5289 		tmp = IXGBE_DV(frame, frame);
5290 		break;
5291 	}
5292 	size = IXGBE_BT2KB(tmp);
5293 	rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
5294 	hw->fc.high_water[0] = rxpb - size;
5295 
5296 	/* Now calculate Low Water */
5297 	switch (hw->mac.type) {
5298 	case ixgbe_mac_X540:
5299 	case ixgbe_mac_X550:
5300 	case ixgbe_mac_X550EM_a:
5301 	case ixgbe_mac_X550EM_x:
5302 		tmp = IXGBE_LOW_DV_X540(frame);
5303 		break;
5304 	default:
5305 		tmp = IXGBE_LOW_DV(frame);
5306 		break;
5307 	}
5308 	hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
5309 
5310 	hw->fc.requested_mode = ix_ifmedia2fc(sc->ifm_media);
5311 	if (sc->ifm_media & IFM_ETH_FORCEPAUSE)
5312 		hw->fc.disable_fc_autoneg = TRUE;
5313 	else
5314 		hw->fc.disable_fc_autoneg = FALSE;
5315 	hw->fc.pause_time = IX_FC_PAUSE;
5316 	hw->fc.send_xon = TRUE;
5317 }
5318 
5319 static void
5320 ix_config_dmac(struct ix_softc *sc)
5321 {
5322 	struct ixgbe_hw *hw = &sc->hw;
5323 	struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config;
5324 
5325 	if (hw->mac.type < ixgbe_mac_X550 || !hw->mac.ops.dmac_config)
5326 		return;
5327 
5328 	if ((dcfg->watchdog_timer ^ sc->dmac) ||
5329 	    (dcfg->link_speed ^ sc->link_speed)) {
5330 		dcfg->watchdog_timer = sc->dmac;
5331 		dcfg->fcoe_en = false;
5332 		dcfg->link_speed = sc->link_speed;
5333 		dcfg->num_tcs = 1;
5334 
5335 		if (bootverbose) {
5336 			if_printf(&sc->arpcom.ac_if, "dmac settings: "
5337 			    "watchdog %d, link speed %d\n",
5338 			    dcfg->watchdog_timer, dcfg->link_speed);
5339 		}
5340 
5341 		hw->mac.ops.dmac_config(hw);
5342 	}
5343 }
5344 
5345 static void
5346 ix_init_media(struct ix_softc *sc)
5347 {
5348 	struct ixgbe_hw *hw = &sc->hw;
5349 	uint32_t layer;
5350 
5351 	ifmedia_removeall(&sc->media);
5352 
5353 	layer = sc->phy_layer;
5354 
5355 	/*
5356 	 * Media types with matching DragonFlyBSD media defines
5357 	 */
5358 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
5359 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_T | IFM_FDX,
5360 		    0, NULL);
5361 	}
5362 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
5363 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
5364 		    0, NULL);
5365 	}
5366 	if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) {
5367 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
5368 		    0, NULL);
5369 		/* No half-duplex support */
5370 	}
5371 	if (layer & IXGBE_PHYSICAL_LAYER_10BASE_T) {
5372 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10_T | IFM_FDX,
5373 		    0, NULL);
5374 		/* No half-duplex support */
5375 	}
5376 
5377 	if ((layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) ||
5378 	    (layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)) {
5379 		ifmedia_add_nodup(&sc->media,
5380 		    IFM_ETHER | IFM_10G_TWINAX | IFM_FDX, 0, NULL);
5381 	}
5382 
5383 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
5384 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LR | IFM_FDX,
5385 		    0, NULL);
5386 		if (hw->phy.multispeed_fiber) {
5387 			ifmedia_add_nodup(&sc->media,
5388 			    IFM_ETHER | IFM_1000_LX | IFM_FDX, 0, NULL);
5389 		}
5390 	}
5391 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) {
5392 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_LRM | IFM_FDX,
5393 		    0, NULL);
5394 		if (hw->phy.multispeed_fiber) {
5395 			ifmedia_add_nodup(&sc->media,
5396 			    IFM_ETHER | IFM_1000_LX | IFM_FDX, 0, NULL);
5397 		}
5398 	}
5399 
5400 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
5401 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5402 		    0, NULL);
5403 		if (hw->phy.multispeed_fiber) {
5404 			ifmedia_add_nodup(&sc->media,
5405 			    IFM_ETHER | IFM_1000_SX | IFM_FDX, 0, NULL);
5406 		}
5407 	} else if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
5408 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
5409 		    0, NULL);
5410 	}
5411 
5412 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) {
5413 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5414 		    0, NULL);
5415 	}
5416 
5417 	/*
5418 	 * XXX Other (no matching DragonFlyBSD media type):
5419 	 * To workaround this, we'll assign these completely
5420 	 * inappropriate media types.
5421 	 */
5422 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
5423 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKR\n");
5424 		if_printf(&sc->arpcom.ac_if, "10GbaseKR mapped to 10GbaseSR\n");
5425 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_SR | IFM_FDX,
5426 		    0, NULL);
5427 	}
5428 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) {
5429 		if_printf(&sc->arpcom.ac_if, "Media supported: 10GbaseKX4\n");
5430 		if_printf(&sc->arpcom.ac_if,
5431 		    "10GbaseKX4 mapped to 10GbaseCX4\n");
5432 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_10G_CX4 | IFM_FDX,
5433 		    0, NULL);
5434 	}
5435 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) {
5436 		if_printf(&sc->arpcom.ac_if, "Media supported: 1000baseKX\n");
5437 		if_printf(&sc->arpcom.ac_if,
5438 		    "1000baseKX mapped to 1000baseCX\n");
5439 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_1000_CX | IFM_FDX,
5440 		    0, NULL);
5441 	}
5442 	if (layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX) {
5443 		if_printf(&sc->arpcom.ac_if, "Media supported: 2500baseKX\n");
5444 		if_printf(&sc->arpcom.ac_if,
5445 		    "2500baseKX mapped to 2500baseSX\n");
5446 		ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_2500_SX | IFM_FDX,
5447 		    0, NULL);
5448 	}
5449 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) {
5450 		if_printf(&sc->arpcom.ac_if,
5451 		    "Media supported: 1000baseBX, ignored\n");
5452 	}
5453 
5454 	/* XXX we probably don't need this */
5455 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
5456 		ifmedia_add_nodup(&sc->media,
5457 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
5458 	}
5459 
5460 	ifmedia_add_nodup(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
5461 
5462 	if (ifmedia_tryset(&sc->media, sc->ifm_media)) {
5463 		int flowctrl = (sc->ifm_media & IFM_ETH_FCMASK);
5464 
5465 		sc->advspeed = IXGBE_LINK_SPEED_UNKNOWN;
5466 		sc->ifm_media = IX_IFM_DEFAULT | flowctrl;
5467 		ifmedia_set(&sc->media, sc->ifm_media);
5468 	}
5469 }
5470 
5471 static void
5472 ix_setup_caps(struct ix_softc *sc)
5473 {
5474 
5475 	sc->caps |= IX_CAP_LEGACY_INTR;
5476 
5477 	switch (sc->hw.mac.type) {
5478 	case ixgbe_mac_82598EB:
5479 		if (sc->hw.device_id == IXGBE_DEV_ID_82598AT)
5480 			sc->caps |= IX_CAP_DETECT_FANFAIL;
5481 		break;
5482 
5483 	case ixgbe_mac_X550:
5484 		sc->caps |= IX_CAP_TEMP_SENSOR;
5485 		break;
5486 
5487 	case ixgbe_mac_X550EM_x:
5488 		if (sc->hw.device_id == IXGBE_DEV_ID_X550EM_X_KR)
5489 			sc->caps |= IX_CAP_EEE;
5490 		break;
5491 
5492 	case ixgbe_mac_X550EM_a:
5493 		sc->caps &= ~IX_CAP_LEGACY_INTR;
5494 		if (sc->hw.device_id == IXGBE_DEV_ID_X550EM_A_1G_T ||
5495 		    sc->hw.device_id == IXGBE_DEV_ID_X550EM_A_1G_T_L)
5496 			sc->caps |= IX_CAP_TEMP_SENSOR | IX_CAP_EEE;
5497 		break;
5498 
5499 	case ixgbe_mac_82599EB:
5500 		if (sc->hw.device_id == IXGBE_DEV_ID_82599_QSFP_SF_QP)
5501 			sc->caps &= ~IX_CAP_LEGACY_INTR;
5502 		break;
5503 
5504 	default:
5505 		break;
5506 	}
5507 }
5508 
5509 static void
5510 ix_detect_fanfail(struct ix_softc *sc, uint32_t reg, boolean_t intr)
5511 {
5512 	uint32_t mask;
5513 
5514 	mask = intr ? IXGBE_EICR_GPI_SDP1_BY_MAC(&sc->hw) : IXGBE_ESDP_SDP1;
5515 	if (reg & mask) {
5516 		if_printf(&sc->arpcom.ac_if,
5517 		    "CRITICAL: FAN FAILURE!!  REPLACE IMMEDIATELY!!\n");
5518 	}
5519 }
5520 
5521 static void
5522 ix_config_gpie(struct ix_softc *sc)
5523 {
5524 	struct ixgbe_hw *hw = &sc->hw;
5525 	uint32_t gpie;
5526 
5527 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
5528 
5529 	if (sc->intr_type == PCI_INTR_TYPE_MSIX) {
5530 		/* Enable Enhanced MSI-X mode */
5531 		gpie |= IXGBE_GPIE_MSIX_MODE |
5532 		    IXGBE_GPIE_EIAME |
5533 		    IXGBE_GPIE_PBA_SUPPORT |
5534 		    IXGBE_GPIE_OCD;
5535 	}
5536 
5537 	/* Fan Failure Interrupt */
5538 	if (sc->caps & IX_CAP_DETECT_FANFAIL)
5539 		gpie |= IXGBE_SDP1_GPIEN;
5540 
5541 	/* Thermal Sensor Interrupt */
5542 	if (sc->caps & IX_CAP_TEMP_SENSOR)
5543 		gpie |= IXGBE_SDP0_GPIEN_X540;
5544 
5545 	/* Link detection */
5546 	switch (hw->mac.type) {
5547 	case ixgbe_mac_82599EB:
5548 		gpie |= IXGBE_SDP1_GPIEN | IXGBE_SDP2_GPIEN;
5549 		break;
5550 
5551 	case ixgbe_mac_X550EM_x:
5552 	case ixgbe_mac_X550EM_a:
5553 		gpie |= IXGBE_SDP0_GPIEN_X540;
5554 		break;
5555 
5556 	default:
5557 		break;
5558 	}
5559 
5560 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
5561 }
5562